Home

To make sure the Vega charts render correctly, view the notebook not from the Github repo but the published website here: https://walterra.github.io/jupyter2kibana/viz-4a-anomaly-detection.html

viz-4a-anomaly-detection.ipynb

The first notebook in this set of examples (viz-4x) demonstrates how to replicate one of the custom charts of Kibana's Machine Learning plugin as an embeddable chart for dashboards.

In [1]:
import datetime
import altair as alt
import eland as ed
from elasticsearch import Elasticsearch
import elastic_transport
import logging
import json
import numpy as np
import matplotlib.pyplot as plt
import urllib3
import warnings

alt.data_transformers.disable_max_rows()
logging.getLogger("elastic_transport").setLevel(logging.ERROR)

# Suppress insecure SSL connection warnings
# In dev environments with the `verify_certs=False`
# you might want to reduce those warnings.
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
urllib3.disable_warnings(elastic_transport.SecurityWarning)

# For rendering the notebook to HTML hide all warnings
warnings.filterwarnings('ignore')
In [2]:
index_name = '.ml-anomalies-fq_single_count_15m'
vis_name = 'ml-anomalies-fq'
In [3]:
with open('config.json') as config_file:
  es_config = json.load(config_file)

# First instantiate an 'Elasticsearch' instance with the supplied config
es = Elasticsearch(
    hosts=[es_config['es_client']],
    basic_auth=[
        es_config['user'],
        es_config['password']
    ],
    # Only in development environments with self signed certificates fall back to use `verify_certs=False`
    verify_certs=False
)

ed_df = ed.DataFrame(es, index_name)
ed_df_number = ed_df.select_dtypes(include=np.number)
ed_df_number.shape
Out[3]:
(975, 73)
In [4]:
# Note: To create the Vega spec using Altair we reference ES via URL first. This will only work
# for non-secured ES instances. If your ES instance runs using SSL and/or authentication the chart
# in this cell will render empty. You can still save the visualization in Kibana correctly in the
# next cell because there the URL gets replaced with an Elasticsearch query
# to be used via the Kibana Vega plugin.

# WARNING:
# Do the following approach using a proxy only for demo purposes in a development environment.
# It will expose a secured ES instance unsecured!
# To make this work for demo purposes run the nodejs based proxy in a separate terminal like this:
# NODE_TLS_REJECT_UNAUTHORIZED='0' node proxy

# URL as ES endpoint
# url = 'http://localhost:9220/'+index_name+'/_search?size=1000'

# URL static fallback
url = 'https://walterra.github.io/jupyter2kibana/data/fq_single_count_15m.json'

url_data = alt.Data(url=url, format=alt.DataFormat(property='hits.hits',type='json'))

fields = [
    'anomaly_score',
    'actual',
    'typical',
    'event_count',
    'model_lower',
    'model_median',
    'model_upper',
    'timestamp',
    'result_type'
]

rename_dict = dict((a, 'datum._source.'+a) for a in fields)

base = alt.Chart(url_data).transform_calculate(**rename_dict)

url_chart = base.transform_fold(
    fields,
    as_=['attribute', 'value']
).mark_bar().encode(
    alt.X('value:Q', bin=True, title=''),
    alt.Y('count()', title=''),
    tooltip=[
        alt.Tooltip('value:Q', bin=True, title='x'),
        alt.Tooltip('count()', title='y')
    ]
).properties(
    width=100,
    height=100
)

url_charts = alt.ConcatChart(
    concat=[
      url_chart.transform_filter(alt.datum.attribute == attribute).properties(title=attribute)
      for attribute in sorted(fields)
    ],
    columns=4
).resolve_axis(
    x='independent',
    y='independent'
).resolve_scale(
    x='independent', 
    y='independent'
)

url_charts
Out[4]:
In [5]:
# custom: ['#1f77b4', 'lightgray', 'gray']
# ml:     ['#32a7c2', '#c8e6ef', '#bbd7df']

colors = {'model_bounds':'lightgray','model_median':'gray','actual':'#1f77b4'}

base = alt.Chart(url_data, width=800, height=300).transform_calculate(**rename_dict)

model_bounds = base.mark_area(color=colors['model_bounds'], opacity=0.5).transform_filter(
    alt.datum.result_type == 'model_plot'
).encode(
    alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('mean(model_upper):Q'),
    alt.Y2('mean(model_lower):Q'),
)

model_median = base.mark_line(color=colors['model_median'], opacity=0.5, strokeJoin='round').transform_filter(
    alt.datum.result_type == 'model_plot'
).encode(
    alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('mean(model_median):Q', title='')
)

actual = base.mark_line(color=colors['actual'], strokeJoin='round').transform_filter(
    alt.datum.result_type == 'model_plot'
).encode(
    alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('mean(actual):Q')
)

anomalies = base.mark_point(color='red', size=60).transform_aggregate(
    mean_actual='mean(actual)',
    mean_anomaly_score='mean(anomaly_score)',
    groupby=["timestamp"]
).transform_filter(
    alt.datum.mean_anomaly_score > 0
).encode(
    alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('mean_actual:Q')
)

tooltip = base.mark_circle(opacity=0, size=100).transform_aggregate(
    mean_actual='mean(actual)',
    mean_anomaly_score='mean(anomaly_score)',
    groupby=["timestamp"]
).encode(
    alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('mean_actual:Q'),
    tooltip=['mean_actual:Q', 'mean_anomaly_score:Q', 'timestamp:T']
)

chart = (model_bounds + model_median + actual + anomalies + tooltip).configure_axis(
    grid=True,
    gridColor="#eee",
    domainColor="#ddd",
    tickColor="#ddd",
    labelColor="gray",
    labelBound=True,
).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=14,
    fontWeight='bold',
    anchor='start',
    color='gray'
)

chart
Out[5]:
In [6]:
from kibana_vega_util import saveVegaLiteVis

saveVegaLiteVis(
    index_name,
    vis_name,
    chart,
    resultSize=1000,
    # Only in development environments with self signed certificates fall back to use `verify=False`
    verify=False
)
Out[6]:
<Response [409]>