Home

To make sure the Vega charts render correctly, view the notebook not from the Github repo but the published website here: https://walterra.github.io/jupyter2kibana/viz-4c-anomaly-detection-split.html

viz-4c-anomaly-detection-split.ipynb

Note: This example is not fully working after the latest updates.

With this notebook we're back at using small multiples, but this time visualizing data about distinct entities with highlighted anomalies.

In [1]:
import datetime
import altair as alt
import eland as ed
from elasticsearch import Elasticsearch
import elastic_transport
import logging
import json
import numpy as np
import matplotlib.pyplot as plt
import urllib3
import warnings

alt.data_transformers.disable_max_rows()
logging.getLogger("elastic_transport").setLevel(logging.ERROR)

# Suppress insecure SSL connection warnings
# In dev environments with the `verify_certs=False`
# you might want to reduce those warnings.
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
urllib3.disable_warnings(elastic_transport.SecurityWarning)

# For rendering the notebook to HTML hide all warnings
warnings.filterwarnings('ignore')
In [2]:
index_name = '.ml-anomalies-fq-mean-split-1216'
vis_name = 'ml-anomalies-cw-split'
In [3]:
with open('config.json') as config_file:
  es_config = json.load(config_file)

# First instantiate an 'Elasticsearch' instance with the supplied config
es = Elasticsearch(
    hosts=[es_config['es_client']],
    basic_auth=[
        es_config['user'],
        es_config['password']
    ],
    # Only in development environments with self signed certificates fall back to use `verify_certs=False`
    verify_certs=False
)

ed_df = ed.DataFrame(es, index_name)
ed_df_number = ed_df.select_dtypes(include=np.number)
ed_df_number.shape
Out[3]:
(9585, 73)
In [4]:
# Note: To create the Vega spec using Altair we reference ES via URL first. This will only work
# for non-secured ES instances. If your ES instance runs using SSL and/or authentication the chart
# in this cell will render empty. You can still save the visualization in Kibana correctly in the
# next cell because there the URL gets replaced with an Elasticsearch query
# to be used via the Kibana Vega plugin.

# WARNING:
# Do the following approach using a proxy only for demo purposes in a development environment.
# It will expose a secured ES instance unsecured!
# To make this work for demo purposes run the nodejs based proxy in a separate terminal like this:
# NODE_TLS_REJECT_UNAUTHORIZED='0' node proxy

# URL as ES endpoint
# url = 'http://localhost:9220/'+index_name+'/_search?size=10000'

# URL static fallback
url = 'https://walterra.github.io/jupyter2kibana/data/fq_mean_split.json'

url_data = alt.Data(url=url, format=alt.DataFormat(property='hits.hits',type='json'))

fields = [
    'anomaly_score',
    'actual',
    'typical',
    'event_count',
    'model_lower',
    'model_median',
    'model_upper',
    'timestamp',
    'result_type',
    'partition_field_value',
    'record_score',
]

rename_dict = dict((a, 'datum._source.'+a) for a in fields)

base = alt.Chart(url_data).transform_calculate(**rename_dict)

url_chart = base.transform_fold(
    fields,
    as_=['attribute', 'value']
).mark_bar().encode(
    alt.X('value:Q', bin=True, title=''),
    alt.Y('count()', title=''),
    tooltip=[
        alt.Tooltip('value:Q', bin=True, title='x'),
        alt.Tooltip('count()', title='y')
    ]
).properties(
    width=100,
    height=100
)

url_charts = alt.ConcatChart(
    concat=[
      url_chart.transform_filter(alt.datum.attribute == attribute).properties(title=attribute)
      for attribute in sorted(fields)
    ],
    columns=4
).resolve_axis(
    x='independent',
    y='independent'
).resolve_scale(
    x='independent', 
    y='independent'
)

url_charts
Out[4]:
In [5]:
# custom: ['#1f77b4', 'lightgray', 'gray']
# ml:     ['#32a7c2', '#c8e6ef', '#bbd7df']

colors = {'model_bounds':'lightgray','model_median':'gray','actual':'#1f77b4'}

base = alt.Chart(url_data, width=110, height=60).transform_calculate(**rename_dict).transform_filter(
    (alt.datum.result_type == 'record') | (alt.datum.result_type == 'model_plot')
).transform_aggregate(
    mean_actual='mean(actual)',
    mean_anomaly_score='max(record_score)',
    mean_model_upper='mean(model_upper)',
    mean_model_median='mean(model_median)',
    mean_model_lower='mean(model_lower)',
    groupby=["timestamp"]
)

model_bounds = base.mark_area(color=colors['model_bounds'], opacity=0.5).encode(
    alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('mean_model_upper:Q'),
    alt.Y2('mean_model_lower:Q'),
)

model_median = base.mark_line(color=colors['model_median'], opacity=0.5, strokeJoin='round').encode(
    alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('mean_model_median:Q', title='')
)

actual = base.mark_line(color=colors['actual'], strokeJoin='round').encode(
    alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('mean_actual:Q')
)

anomalies = base.mark_circle(size=40, opacity=1, stroke=colors['actual'], strokeWidth=1).transform_filter(
    alt.datum.mean_anomaly_score > 0
).encode(
    alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('mean_actual:Q'),
    color=alt.Color(
        'mean_anomaly_score:Q',
        scale=alt.Scale(
            domain=[0,25, 25, 50, 50, 75, 75, 100],
            range=['lightblue', 'lightblue', 'yellow', 'yellow', 'orange', 'orange', 'red', 'red']
    )),
)

tooltip = base.mark_circle(opacity=0, size=100).encode(
    alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('mean_actual:Q'),
    tooltip=['mean_actual:Q', 'mean_anomaly_score:Q', 'timestamp:T']
)

chart = (model_bounds + model_median + actual + anomalies + tooltip).facet(
    facet='partition_field_value:N',
    columns=5
).configure_axis(
    grid=True,
    gridColor="#eee",
    domainColor="#ddd",
    tickColor="#ddd",
    labelColor="gray",
    labelBound=True,
).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=14,
    fontWeight='bold',
    anchor='start',
    color='gray'
)

chart
Out[5]:
In [6]:
# from kibana_vega_util import saveVegaLiteVis
# from elasticsearch import Elasticsearch 
# es=Elasticsearch([{'host':'localhost','port':9200}])

# saveVegaLiteVis(es, index_name, vis_name, chart, resultSize=10000)