Home

To make sure the Vega charts render correctly, view the notebook not from the Github repo but the published website here: https://walterra.github.io/jupyter2kibana/viz-2b-cars-outlier.html

viz-2b-cars-outlier.ipynb

This notebook uses an index that was created using Elasticsearch's Machine Learning to detect outliers on the cars dataset. This index including ML's metadata is used to create a scatterplot matrix that highlights outliers. Additionally it includes a Vega based slider to adjust the threshold for highlighting.

In [1]:
import datetime
import altair as alt
import eland as ed
from elasticsearch import Elasticsearch
import elastic_transport
import logging
import json
import numpy as np
import matplotlib.pyplot as plt
import urllib3
import warnings

alt.data_transformers.disable_max_rows()
logging.getLogger("elastic_transport").setLevel(logging.ERROR)

# Suppress insecure SSL connection warnings
# In dev environments with the `verify_certs=False`
# you might want to reduce those warnings.
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
urllib3.disable_warnings(elastic_transport.SecurityWarning)

# For rendering the notebook to HTML hide all warnings
warnings.filterwarnings('ignore')
In [2]:
index_name = 'cars_outlier'
In [3]:
with open('config.json') as config_file:
  es_config = json.load(config_file)

# First instantiate an 'Elasticsearch' instance with the supplied config
es = Elasticsearch(
    hosts=[es_config['es_client']],
    basic_auth=[
        es_config['user'],
        es_config['password']
    ],
    # Only in development environments with self signed certificates fall back to use `verify_certs=False`
    verify_certs=False
)

ed_df = ed.DataFrame(es, index_name)
ed_df.head()
Out[3]:
Acceleration Cylinders Displacement Horsepower Miles_per_Gallon Name Origin Weight_in_lbs Year ml.feature_influence ml.feature_influence.feature_name ml.feature_influence.influence ml.outlier_score ml__incremental_id
0 12.0 8 307.0 130.0 18.0 chevrolet chevelle malibu USA 3504 1970-01-01 NaN NaN NaN 0.059161 0
1 11.5 8 350.0 165.0 15.0 buick skylark 320 USA 3693 1970-01-01 NaN NaN NaN 0.044278 1
2 11.0 8 318.0 150.0 18.0 plymouth satellite USA 3436 1970-01-01 NaN NaN NaN 0.031178 2
3 12.0 8 304.0 150.0 16.0 amc rebel sst USA 3433 1970-01-01 NaN NaN NaN 0.033840 3
4 10.5 8 302.0 140.0 17.0 ford torino USA 3449 1970-01-01 NaN NaN NaN 0.042260 4

5 rows × 14 columns

In [4]:
# Note: To create the Vega spec using Altair we reference ES via URL first. This will only work
# for non-secured ES instances. If your ES instance runs using SSL and/or authentication the chart
# in this cell will render empty. You can still save the visualization in Kibana correctly in the
# next cell because there the URL gets replaced with an Elasticsearch query
# to be used via the Kibana Vega plugin.

# WARNING:
# Do the following approach using a proxy only for demo purposes in a development environment.
# It will expose a secured ES instance unsecured!
# To make this work for demo purposes run the nodejs based proxy in a separate terminal like this:
# NODE_TLS_REJECT_UNAUTHORIZED='0' node proxy

# URL as ES endpoint
# url = 'http://localhost:9220/'+index_name+'/_search?size=1000'

# URL static fallback
url = 'https://walterra.github.io/jupyter2kibana/data/cars_outlier.json'

url_data = alt.Data(url=url, format=alt.DataFormat(property='hits.hits',type='json'))

fields = ['Acceleration', 'Cylinders', 'Displacement', 'Horsepower',
       'Miles_per_Gallon', 'Name', 'Origin', 'Weight_in_lbs', 'Year','ml.outlier_score']

rename_dict = dict((a, 'datum._source.'+a) for a in fields)

slider = alt.binding_range(min=0, max=1, step=.01, name='Outlier score Threshold:')
selector = alt.selection_single(name="SelectorName", fields=['cutoff'],
                                bind=slider, init={'cutoff': .8})

chart = alt.Chart(url_data).transform_calculate(**rename_dict).mark_point().encode(
    alt.X(alt.repeat("column"), type='quantitative'),
    alt.Y(alt.repeat("row"), type='quantitative'),
    color=alt.condition(
        alt.datum["ml.outlier_score"] >= selector.cutoff,
        alt.value('red'), alt.value('gray')
    ),
    opacity=alt.condition(
        alt.datum["ml.outlier_score"] >= selector.cutoff,
        alt.value(.75), alt.value(.25)
    ),
    size=alt.condition(
        alt.datum["ml.outlier_score"] >= selector.cutoff,
        alt.value(28), alt.value(2)
    ),
    tooltip=['Name:N', 'ml.outlier_score:Q', 'Horsepower:Q', 'Acceleration:Q', 'Miles_per_Gallon:Q']
).properties(
    width=150,
    height=150
).repeat(
    row=['Horsepower', 'Acceleration', 'Miles_per_Gallon'],
    column=['Miles_per_Gallon', 'Acceleration', 'Horsepower']
).interactive().add_selection(
    selector
)

chart
Out[4]:
In [5]:
from kibana_vega_util import saveVegaVis

saveVegaVis(
    index_name,
    'def-vega-cars-outlier-1',
    chart,
    resultSize=1000,
    # Only in development environments with self signed certificates fall back to use `verify=False`
    verify=False,
    timeField="Year"
)
Out[5]:
<Response [409]>