Home

To make sure the Vega charts render correctly, view the notebook not from the Github repo but the published website here: https://walterra.github.io/jupyter2kibana/viz-4b-anomaly-detection-annotation.html

viz-4b-anomaly-detection-annotation.ipynb

This notebook adds user created annotations as an additional layer to the visualization. It's a great example of how VEGA is able to fetch data from different sources/indices, something not easily doable with Kibana's own visualizations.

In [1]:
import datetime
import altair as alt
import eland as ed
from elasticsearch import Elasticsearch
import elastic_transport
import logging
import json
import numpy as np
import matplotlib.pyplot as plt
import urllib3
import warnings

alt.data_transformers.disable_max_rows()
logging.getLogger("elastic_transport").setLevel(logging.ERROR)

# Suppress insecure SSL connection warnings
# In dev environments with the `verify_certs=False`
# you might want to reduce those warnings.
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
urllib3.disable_warnings(elastic_transport.SecurityWarning)

# For rendering the notebook to HTML hide all warnings
warnings.filterwarnings('ignore')
In [2]:
annotation_index_name = '.ml-annotations-read'
index_name = '.ml-anomalies-fq_single_count_15m'
vis_name = 'ml-anomalies-fq'
In [3]:
with open('config.json') as config_file:
  es_config = json.load(config_file)

# First instantiate an 'Elasticsearch' instance with the supplied config
es = Elasticsearch(
    hosts=[es_config['es_client']],
    basic_auth=[
        es_config['user'],
        es_config['password']
    ],
    # Only in development environments with self signed certificates fall back to use `verify_certs=False`
    verify_certs=False
)

ed_df = ed.DataFrame(es, index_name)
ed_df_number = ed_df.select_dtypes(include=np.number)
ed_df_number.shape
Out[3]:
(975, 73)
In [4]:
# Note: To create the Vega spec using Altair we reference ES via URL first. This will only work
# for non-secured ES instances. If your ES instance runs using SSL and/or authentication the chart
# in this cell will render empty. You can still save the visualization in Kibana correctly in the
# next cell because there the URL gets replaced with an Elasticsearch query
# to be used via the Kibana Vega plugin.

# WARNING:
# Do the following approach using a proxy only for demo purposes in a development environment.
# It will expose a secured ES instance unsecured!
# To make this work for demo purposes run the nodejs based proxy in a separate terminal like this:
# NODE_TLS_REJECT_UNAUTHORIZED='0' node proxy

# URL as ES endpoint
# url_annotation = 'http://localhost:9220/'+annotation_index_name+'/_search?size=10000'

# URL static fallback
url_annotation = 'https://walterra.github.io/jupyter2kibana/data/fq_single_count_15m_annotations.json'

url_data_annotation = alt.Data(
    url=url_annotation,
    format=alt.DataFormat(property='hits.hits',type='json'),
    name='ml-annotations'
)

fields_annotation = [
    'annotation',
    'timestamp',
    'end_timestamp',
    'job_id',
]

rename_dict_annotation = dict((a, 'datum._source.'+a) for a in fields_annotation)

base_annotation = alt.Chart(url_data_annotation).transform_calculate(**rename_dict_annotation)

annotations = base_annotation.transform_filter(
    alt.datum.job_id == "fq_single_count_15m"
).transform_calculate(
    y="350",
    y2="0"
).mark_rect(
    color="#369",
    opacity=0.15,
).encode(
    alt.X('timestamp:T'),
    alt.X2('end_timestamp:T'),
    alt.Y('y:Q'),
    alt.Y2('y2:Q'),
    tooltip=['annotation:N'],
)

annotations
Out[4]:
In [5]:
# URL as ES endpoint
# url = 'http://localhost:9220/'+index_name+'/_search?size=1000'

# URL static fallback
url = 'https://walterra.github.io/jupyter2kibana/data/fq_single_count_15m.json'

url_data = alt.Data(
    url=url,
    format=alt.DataFormat(property='hits.hits',type='json'),
    name='ml-anomalies'
)

fields = [
    'anomaly_score',
    'actual',
    'typical',
    'event_count',
    'model_lower',
    'model_median',
    'model_upper',
    'timestamp',
    'result_type'
]

rename_dict = dict((a, 'datum._source.'+a) for a in fields)

base = alt.Chart(url_data).transform_calculate(**rename_dict)

url_chart = base.transform_fold(
    fields,
    as_=['attribute', 'value']
).mark_bar().encode(
    alt.X('value:Q', bin=True, title=''),
    alt.Y('count()', title=''),
    tooltip=[
        alt.Tooltip('value:Q', bin=True, title='x'),
        alt.Tooltip('count()', title='y')
    ]
).properties(
    width=100,
    height=100
)

url_charts = alt.ConcatChart(
    concat=[
      url_chart.transform_filter(alt.datum.attribute == attribute).properties(title=attribute)
      for attribute in sorted(fields)
    ],
    columns=4
).resolve_axis(
    x='independent',
    y='independent'
).resolve_scale(
    x='independent', 
    y='independent'
)

url_charts
Out[5]:
In [6]:
# custom: ['#1f77b4', 'lightgray', 'gray']
# ml:     ['#32a7c2', '#c8e6ef', '#bbd7df']

colors = {'model_bounds':'lightgray','model_median':'gray','actual':'#1f77b4'}

base = alt.Chart(url_data, width=800, height=300).transform_calculate(**rename_dict)

model_bounds = base.mark_area(color=colors['model_bounds'], opacity=0.5).transform_filter(
    alt.datum.result_type == 'model_plot'
).encode(
    alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('mean(model_upper):Q'),
    alt.Y2('mean(model_lower):Q'),
)

model_median = base.mark_line(color=colors['model_median'], opacity=0.5, strokeJoin='round').transform_filter(
    alt.datum.result_type == 'model_plot'
).encode(
    alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('mean(model_median):Q', title='')
)

actual = base.mark_line(color=colors['actual'], strokeJoin='round').transform_filter(
    alt.datum.result_type == 'model_plot'
).encode(
    alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('mean(actual):Q')
)

anomalies = base.mark_point(color='red', size=60).transform_aggregate(
    mean_actual='mean(actual)',
    mean_anomaly_score='mean(anomaly_score)',
    groupby=["timestamp"]
).transform_filter(
    alt.datum.mean_anomaly_score > 0
).encode(
    alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('mean_actual:Q')
)

tooltip = base.mark_circle(opacity=0, size=100).transform_aggregate(
    mean_actual='mean(actual)',
    mean_anomaly_score='mean(anomaly_score)',
    groupby=["timestamp"]
).encode(
    alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('mean_actual:Q'),
    tooltip=['mean_actual:Q', 'mean_anomaly_score:Q', 'timestamp:T']
)

chart = (annotations + (model_bounds + model_median + actual + anomalies + tooltip)).configure_axis(
    grid=True,
    gridColor="#eee",
    domainColor="#ddd",
    tickColor="#ddd",
    labelColor="gray",
    labelBound=True,
).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=14,
    fontWeight='bold',
    anchor='start',
    color='gray'
)

chart
Out[6]:
In [7]:
json.loads(chart.to_json())
Out[7]:
{'$schema': 'https://vega.github.io/schema/vega-lite/v4.17.0.json',
 'config': {'axis': {'domainColor': '#ddd',
   'grid': True,
   'gridColor': '#eee',
   'labelBound': True,
   'labelColor': 'gray',
   'tickColor': '#ddd'},
  'title': {'anchor': 'start',
   'color': 'gray',
   'fontSize': 14,
   'fontWeight': 'bold'},
  'view': {'continuousHeight': 300, 'continuousWidth': 400, 'strokeWidth': 0}},
 'layer': [{'data': {'format': {'property': 'hits.hits', 'type': 'json'},
    'name': 'ml-annotations',
    'url': 'https://walterra.github.io/jupyter2kibana/data/fq_single_count_15m_annotations.json'},
   'encoding': {'tooltip': [{'field': 'annotation', 'type': 'nominal'}],
    'x': {'field': 'timestamp', 'type': 'temporal'},
    'x2': {'field': 'end_timestamp'},
    'y': {'field': 'y', 'type': 'quantitative'},
    'y2': {'field': 'y2'}},
   'mark': {'color': '#369', 'opacity': 0.15, 'type': 'rect'},
   'transform': [{'as': 'annotation', 'calculate': 'datum._source.annotation'},
    {'as': 'timestamp', 'calculate': 'datum._source.timestamp'},
    {'as': 'end_timestamp', 'calculate': 'datum._source.end_timestamp'},
    {'as': 'job_id', 'calculate': 'datum._source.job_id'},
    {'filter': "(datum.job_id === 'fq_single_count_15m')"},
    {'as': 'y', 'calculate': '350'},
    {'as': 'y2', 'calculate': '0'}]},
  {'data': {'format': {'property': 'hits.hits', 'type': 'json'},
    'name': 'ml-anomalies',
    'url': 'https://walterra.github.io/jupyter2kibana/data/fq_single_count_15m.json'},
   'layer': [{'encoding': {'x': {'axis': {'grid': False},
       'field': 'timestamp',
       'scale': {'nice': False, 'zero': False},
       'title': '',
       'type': 'temporal'},
      'y': {'aggregate': 'mean',
       'field': 'model_upper',
       'type': 'quantitative'},
      'y2': {'aggregate': 'mean', 'field': 'model_lower'}},
     'height': 300,
     'mark': {'color': 'lightgray', 'opacity': 0.5, 'type': 'area'},
     'transform': [{'as': 'anomaly_score',
       'calculate': 'datum._source.anomaly_score'},
      {'as': 'actual', 'calculate': 'datum._source.actual'},
      {'as': 'typical', 'calculate': 'datum._source.typical'},
      {'as': 'event_count', 'calculate': 'datum._source.event_count'},
      {'as': 'model_lower', 'calculate': 'datum._source.model_lower'},
      {'as': 'model_median', 'calculate': 'datum._source.model_median'},
      {'as': 'model_upper', 'calculate': 'datum._source.model_upper'},
      {'as': 'timestamp', 'calculate': 'datum._source.timestamp'},
      {'as': 'result_type', 'calculate': 'datum._source.result_type'},
      {'filter': "(datum.result_type === 'model_plot')"}],
     'width': 800},
    {'encoding': {'x': {'axis': {'grid': False},
       'field': 'timestamp',
       'scale': {'nice': False, 'zero': False},
       'title': '',
       'type': 'temporal'},
      'y': {'aggregate': 'mean',
       'field': 'model_median',
       'title': '',
       'type': 'quantitative'}},
     'height': 300,
     'mark': {'color': 'gray',
      'opacity': 0.5,
      'strokeJoin': 'round',
      'type': 'line'},
     'transform': [{'as': 'anomaly_score',
       'calculate': 'datum._source.anomaly_score'},
      {'as': 'actual', 'calculate': 'datum._source.actual'},
      {'as': 'typical', 'calculate': 'datum._source.typical'},
      {'as': 'event_count', 'calculate': 'datum._source.event_count'},
      {'as': 'model_lower', 'calculate': 'datum._source.model_lower'},
      {'as': 'model_median', 'calculate': 'datum._source.model_median'},
      {'as': 'model_upper', 'calculate': 'datum._source.model_upper'},
      {'as': 'timestamp', 'calculate': 'datum._source.timestamp'},
      {'as': 'result_type', 'calculate': 'datum._source.result_type'},
      {'filter': "(datum.result_type === 'model_plot')"}],
     'width': 800},
    {'encoding': {'x': {'axis': {'grid': False},
       'field': 'timestamp',
       'scale': {'nice': False, 'zero': False},
       'title': '',
       'type': 'temporal'},
      'y': {'aggregate': 'mean', 'field': 'actual', 'type': 'quantitative'}},
     'height': 300,
     'mark': {'color': '#1f77b4', 'strokeJoin': 'round', 'type': 'line'},
     'transform': [{'as': 'anomaly_score',
       'calculate': 'datum._source.anomaly_score'},
      {'as': 'actual', 'calculate': 'datum._source.actual'},
      {'as': 'typical', 'calculate': 'datum._source.typical'},
      {'as': 'event_count', 'calculate': 'datum._source.event_count'},
      {'as': 'model_lower', 'calculate': 'datum._source.model_lower'},
      {'as': 'model_median', 'calculate': 'datum._source.model_median'},
      {'as': 'model_upper', 'calculate': 'datum._source.model_upper'},
      {'as': 'timestamp', 'calculate': 'datum._source.timestamp'},
      {'as': 'result_type', 'calculate': 'datum._source.result_type'},
      {'filter': "(datum.result_type === 'model_plot')"}],
     'width': 800},
    {'encoding': {'x': {'axis': {'grid': False},
       'field': 'timestamp',
       'scale': {'nice': False, 'zero': False},
       'title': '',
       'type': 'temporal'},
      'y': {'field': 'mean_actual', 'type': 'quantitative'}},
     'height': 300,
     'mark': {'color': 'red', 'size': 60, 'type': 'point'},
     'transform': [{'as': 'anomaly_score',
       'calculate': 'datum._source.anomaly_score'},
      {'as': 'actual', 'calculate': 'datum._source.actual'},
      {'as': 'typical', 'calculate': 'datum._source.typical'},
      {'as': 'event_count', 'calculate': 'datum._source.event_count'},
      {'as': 'model_lower', 'calculate': 'datum._source.model_lower'},
      {'as': 'model_median', 'calculate': 'datum._source.model_median'},
      {'as': 'model_upper', 'calculate': 'datum._source.model_upper'},
      {'as': 'timestamp', 'calculate': 'datum._source.timestamp'},
      {'as': 'result_type', 'calculate': 'datum._source.result_type'},
      {'aggregate': [{'as': 'mean_actual', 'field': 'actual', 'op': 'mean'},
        {'as': 'mean_anomaly_score', 'field': 'anomaly_score', 'op': 'mean'}],
       'groupby': ['timestamp']},
      {'filter': '(datum.mean_anomaly_score > 0)'}],
     'width': 800},
    {'encoding': {'tooltip': [{'field': 'mean_actual', 'type': 'quantitative'},
       {'field': 'mean_anomaly_score', 'type': 'quantitative'},
       {'field': 'timestamp', 'type': 'temporal'}],
      'x': {'axis': {'grid': False},
       'field': 'timestamp',
       'scale': {'nice': False, 'zero': False},
       'title': '',
       'type': 'temporal'},
      'y': {'field': 'mean_actual', 'type': 'quantitative'}},
     'height': 300,
     'mark': {'opacity': 0, 'size': 100, 'type': 'circle'},
     'transform': [{'as': 'anomaly_score',
       'calculate': 'datum._source.anomaly_score'},
      {'as': 'actual', 'calculate': 'datum._source.actual'},
      {'as': 'typical', 'calculate': 'datum._source.typical'},
      {'as': 'event_count', 'calculate': 'datum._source.event_count'},
      {'as': 'model_lower', 'calculate': 'datum._source.model_lower'},
      {'as': 'model_median', 'calculate': 'datum._source.model_median'},
      {'as': 'model_upper', 'calculate': 'datum._source.model_upper'},
      {'as': 'timestamp', 'calculate': 'datum._source.timestamp'},
      {'as': 'result_type', 'calculate': 'datum._source.result_type'},
      {'aggregate': [{'as': 'mean_actual', 'field': 'actual', 'op': 'mean'},
        {'as': 'mean_anomaly_score', 'field': 'anomaly_score', 'op': 'mean'}],
       'groupby': ['timestamp']}],
     'width': 800}]}]}
In [8]:
import json
import requests

resultSize=10000
visName='ml-anomalies-fq-annotations'

chart_json = json.loads(chart.to_json())
chart_json['layer'][0]['data']['url'] = {
    "%context%": True,
    "%timefield%": "timestamp",
    "index": annotation_index_name,
    "body": {
        "size": resultSize
    }
}
chart_json['layer'][1]['data']['url'] = {
    "%context%": True,
    "%timefield%": "timestamp",
    "index": index_name,
    "body": {
        "size": resultSize
    }
}

visState = {
  "type": "vega",
  "aggs": [],
  "params": {
    "spec": json.dumps(chart_json, sort_keys=True, indent=4, separators=(',', ': ')),
  },
  "title": visName
}

visSavedObject={
    "attributes" : {
      "title" : visName,
      "visState" : json.dumps(visState, sort_keys=True, indent=4, separators=(',', ': ')),
      "uiStateJSON" : "{}",
      "description" : "",
      "version" : 1,
      "kibanaSavedObjectMeta" : {
        "searchSourceJSON" : json.dumps({
          "query": {
            "language": "kuery",
            "query": ""
          },
          "filter": []
        }),
      }
    },
}


requests.post(
    es_config['kibana_client'] + '/api/saved_objects/visualization/' + visName,
    json=visSavedObject,
    auth=(es_config['user'], es_config['password']),
    headers={"kbn-xsrf":"jupyter2kibana"},
    # Only in development environments with self signed certificates fall back to use `verify=False`
    verify=False
)
Out[8]:
<Response [409]>