To make sure the Vega charts render correctly, view the notebook not from the Github repo but the published website here: https://walterra.github.io/jupyter2kibana/viz-2b-cars-outlier.html
This notebook uses an index that was created using Elasticsearch's Machine Learning to detect outliers on the cars dataset. This index including ML's metadata is used to create a scatterplot matrix that highlights outliers. Additionally it includes a Vega based slider to adjust the threshold for highlighting.
import datetime
import altair as alt
import eland as ed
from elasticsearch import Elasticsearch
import elastic_transport
import logging
import json
import numpy as np
import matplotlib.pyplot as plt
import urllib3
import warnings
alt.data_transformers.disable_max_rows()
logging.getLogger("elastic_transport").setLevel(logging.ERROR)
# Suppress insecure SSL connection warnings
# In dev environments with the `verify_certs=False`
# you might want to reduce those warnings.
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
urllib3.disable_warnings(elastic_transport.SecurityWarning)
# For rendering the notebook to HTML hide all warnings
warnings.filterwarnings('ignore')
index_name = 'cars_outlier'
with open('config.json') as config_file:
es_config = json.load(config_file)
# First instantiate an 'Elasticsearch' instance with the supplied config
es = Elasticsearch(
hosts=[es_config['es_client']],
basic_auth=[
es_config['user'],
es_config['password']
],
# Only in development environments with self signed certificates fall back to use `verify_certs=False`
verify_certs=False
)
ed_df = ed.DataFrame(es, index_name)
ed_df.head()
# Note: To create the Vega spec using Altair we reference ES via URL first. This will only work
# for non-secured ES instances. If your ES instance runs using SSL and/or authentication the chart
# in this cell will render empty. You can still save the visualization in Kibana correctly in the
# next cell because there the URL gets replaced with an Elasticsearch query
# to be used via the Kibana Vega plugin.
# WARNING:
# Do the following approach using a proxy only for demo purposes in a development environment.
# It will expose a secured ES instance unsecured!
# To make this work for demo purposes run the nodejs based proxy in a separate terminal like this:
# NODE_TLS_REJECT_UNAUTHORIZED='0' node proxy
# URL as ES endpoint
# url = 'http://localhost:9220/'+index_name+'/_search?size=1000'
# URL static fallback
url = 'https://walterra.github.io/jupyter2kibana/data/cars_outlier.json'
url_data = alt.Data(url=url, format=alt.DataFormat(property='hits.hits',type='json'))
fields = ['Acceleration', 'Cylinders', 'Displacement', 'Horsepower',
'Miles_per_Gallon', 'Name', 'Origin', 'Weight_in_lbs', 'Year','ml.outlier_score']
rename_dict = dict((a, 'datum._source.'+a) for a in fields)
slider = alt.binding_range(min=0, max=1, step=.01, name='Outlier score Threshold:')
selector = alt.selection_single(name="SelectorName", fields=['cutoff'],
bind=slider, init={'cutoff': .8})
chart = alt.Chart(url_data).transform_calculate(**rename_dict).mark_point().encode(
alt.X(alt.repeat("column"), type='quantitative'),
alt.Y(alt.repeat("row"), type='quantitative'),
color=alt.condition(
alt.datum["ml.outlier_score"] >= selector.cutoff,
alt.value('red'), alt.value('gray')
),
opacity=alt.condition(
alt.datum["ml.outlier_score"] >= selector.cutoff,
alt.value(.75), alt.value(.25)
),
size=alt.condition(
alt.datum["ml.outlier_score"] >= selector.cutoff,
alt.value(28), alt.value(2)
),
tooltip=['Name:N', 'ml.outlier_score:Q', 'Horsepower:Q', 'Acceleration:Q', 'Miles_per_Gallon:Q']
).properties(
width=150,
height=150
).repeat(
row=['Horsepower', 'Acceleration', 'Miles_per_Gallon'],
column=['Miles_per_Gallon', 'Acceleration', 'Horsepower']
).interactive().add_selection(
selector
)
chart
from kibana_vega_util import saveVegaVis
saveVegaVis(
index_name,
'def-vega-cars-outlier-1',
chart,
resultSize=1000,
# Only in development environments with self signed certificates fall back to use `verify=False`
verify=False,
timeField="Year"
)