To make sure the Vega charts render correctly, view the notebook not from the Github repo but the published website here: https://walterra.github.io/jupyter2kibana/viz-4b-anomaly-detection-annotation.html
This notebook adds user created annotations as an additional layer to the visualization. It's a great example of how VEGA is able to fetch data from different sources/indices, something not easily doable with Kibana's own visualizations.
import datetime
import altair as alt
import eland as ed
from elasticsearch import Elasticsearch
import elastic_transport
import logging
import json
import numpy as np
import matplotlib.pyplot as plt
import urllib3
import warnings
alt.data_transformers.disable_max_rows()
logging.getLogger("elastic_transport").setLevel(logging.ERROR)
# Suppress insecure SSL connection warnings
# In dev environments with the `verify_certs=False`
# you might want to reduce those warnings.
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
urllib3.disable_warnings(elastic_transport.SecurityWarning)
# For rendering the notebook to HTML hide all warnings
warnings.filterwarnings('ignore')
annotation_index_name = '.ml-annotations-read'
index_name = '.ml-anomalies-fq_single_count_15m'
vis_name = 'ml-anomalies-fq'
with open('config.json') as config_file:
es_config = json.load(config_file)
# First instantiate an 'Elasticsearch' instance with the supplied config
es = Elasticsearch(
hosts=[es_config['es_client']],
basic_auth=[
es_config['user'],
es_config['password']
],
# Only in development environments with self signed certificates fall back to use `verify_certs=False`
verify_certs=False
)
ed_df = ed.DataFrame(es, index_name)
ed_df_number = ed_df.select_dtypes(include=np.number)
ed_df_number.shape
# Note: To create the Vega spec using Altair we reference ES via URL first. This will only work
# for non-secured ES instances. If your ES instance runs using SSL and/or authentication the chart
# in this cell will render empty. You can still save the visualization in Kibana correctly in the
# next cell because there the URL gets replaced with an Elasticsearch query
# to be used via the Kibana Vega plugin.
# WARNING:
# Do the following approach using a proxy only for demo purposes in a development environment.
# It will expose a secured ES instance unsecured!
# To make this work for demo purposes run the nodejs based proxy in a separate terminal like this:
# NODE_TLS_REJECT_UNAUTHORIZED='0' node proxy
# URL as ES endpoint
# url_annotation = 'http://localhost:9220/'+annotation_index_name+'/_search?size=10000'
# URL static fallback
url_annotation = 'https://walterra.github.io/jupyter2kibana/data/fq_single_count_15m_annotations.json'
url_data_annotation = alt.Data(
url=url_annotation,
format=alt.DataFormat(property='hits.hits',type='json'),
name='ml-annotations'
)
fields_annotation = [
'annotation',
'timestamp',
'end_timestamp',
'job_id',
]
rename_dict_annotation = dict((a, 'datum._source.'+a) for a in fields_annotation)
base_annotation = alt.Chart(url_data_annotation).transform_calculate(**rename_dict_annotation)
annotations = base_annotation.transform_filter(
alt.datum.job_id == "fq_single_count_15m"
).transform_calculate(
y="350",
y2="0"
).mark_rect(
color="#369",
opacity=0.15,
).encode(
alt.X('timestamp:T'),
alt.X2('end_timestamp:T'),
alt.Y('y:Q'),
alt.Y2('y2:Q'),
tooltip=['annotation:N'],
)
annotations
# URL as ES endpoint
# url = 'http://localhost:9220/'+index_name+'/_search?size=1000'
# URL static fallback
url = 'https://walterra.github.io/jupyter2kibana/data/fq_single_count_15m.json'
url_data = alt.Data(
url=url,
format=alt.DataFormat(property='hits.hits',type='json'),
name='ml-anomalies'
)
fields = [
'anomaly_score',
'actual',
'typical',
'event_count',
'model_lower',
'model_median',
'model_upper',
'timestamp',
'result_type'
]
rename_dict = dict((a, 'datum._source.'+a) for a in fields)
base = alt.Chart(url_data).transform_calculate(**rename_dict)
url_chart = base.transform_fold(
fields,
as_=['attribute', 'value']
).mark_bar().encode(
alt.X('value:Q', bin=True, title=''),
alt.Y('count()', title=''),
tooltip=[
alt.Tooltip('value:Q', bin=True, title='x'),
alt.Tooltip('count()', title='y')
]
).properties(
width=100,
height=100
)
url_charts = alt.ConcatChart(
concat=[
url_chart.transform_filter(alt.datum.attribute == attribute).properties(title=attribute)
for attribute in sorted(fields)
],
columns=4
).resolve_axis(
x='independent',
y='independent'
).resolve_scale(
x='independent',
y='independent'
)
url_charts
# custom: ['#1f77b4', 'lightgray', 'gray']
# ml: ['#32a7c2', '#c8e6ef', '#bbd7df']
colors = {'model_bounds':'lightgray','model_median':'gray','actual':'#1f77b4'}
base = alt.Chart(url_data, width=800, height=300).transform_calculate(**rename_dict)
model_bounds = base.mark_area(color=colors['model_bounds'], opacity=0.5).transform_filter(
alt.datum.result_type == 'model_plot'
).encode(
alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
alt.Y('mean(model_upper):Q'),
alt.Y2('mean(model_lower):Q'),
)
model_median = base.mark_line(color=colors['model_median'], opacity=0.5, strokeJoin='round').transform_filter(
alt.datum.result_type == 'model_plot'
).encode(
alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
alt.Y('mean(model_median):Q', title='')
)
actual = base.mark_line(color=colors['actual'], strokeJoin='round').transform_filter(
alt.datum.result_type == 'model_plot'
).encode(
alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
alt.Y('mean(actual):Q')
)
anomalies = base.mark_point(color='red', size=60).transform_aggregate(
mean_actual='mean(actual)',
mean_anomaly_score='mean(anomaly_score)',
groupby=["timestamp"]
).transform_filter(
alt.datum.mean_anomaly_score > 0
).encode(
alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
alt.Y('mean_actual:Q')
)
tooltip = base.mark_circle(opacity=0, size=100).transform_aggregate(
mean_actual='mean(actual)',
mean_anomaly_score='mean(anomaly_score)',
groupby=["timestamp"]
).encode(
alt.X('timestamp:T', scale=alt.Scale(zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
alt.Y('mean_actual:Q'),
tooltip=['mean_actual:Q', 'mean_anomaly_score:Q', 'timestamp:T']
)
chart = (annotations + (model_bounds + model_median + actual + anomalies + tooltip)).configure_axis(
grid=True,
gridColor="#eee",
domainColor="#ddd",
tickColor="#ddd",
labelColor="gray",
labelBound=True,
).configure_view(
strokeWidth=0
).configure_title(
fontSize=14,
fontWeight='bold',
anchor='start',
color='gray'
)
chart
json.loads(chart.to_json())
import json
import requests
resultSize=10000
visName='ml-anomalies-fq-annotations'
chart_json = json.loads(chart.to_json())
chart_json['layer'][0]['data']['url'] = {
"%context%": True,
"%timefield%": "timestamp",
"index": annotation_index_name,
"body": {
"size": resultSize
}
}
chart_json['layer'][1]['data']['url'] = {
"%context%": True,
"%timefield%": "timestamp",
"index": index_name,
"body": {
"size": resultSize
}
}
visState = {
"type": "vega",
"aggs": [],
"params": {
"spec": json.dumps(chart_json, sort_keys=True, indent=4, separators=(',', ': ')),
},
"title": visName
}
visSavedObject={
"attributes" : {
"title" : visName,
"visState" : json.dumps(visState, sort_keys=True, indent=4, separators=(',', ': ')),
"uiStateJSON" : "{}",
"description" : "",
"version" : 1,
"kibanaSavedObjectMeta" : {
"searchSourceJSON" : json.dumps({
"query": {
"language": "kuery",
"query": ""
},
"filter": []
}),
}
},
}
requests.post(
es_config['kibana_client'] + '/api/saved_objects/visualization/' + visName,
json=visSavedObject,
auth=(es_config['user'], es_config['password']),
headers={"kbn-xsrf":"jupyter2kibana"},
# Only in development environments with self signed certificates fall back to use `verify=False`
verify=False
)