python visualization

Posted by neverset on September 25, 2020

Plotly Express

By default, all of the Plotly visualizations are interactive

basic functions

import plotly.express as px
px.scatter()
px.line()
px.bar()
px.histogram()
px.box()
px.violin()
px.strip()

many chart types

fig = px.treemap()
fig = px.sunburst(
#to save image
fig.write_image()
#save to html
fig.write_html()

integrate with pandas

pd.options.plotting.backend = "plotly"
fig = df[['sodium', 'potass']].plot(kind='hist',
                                nbins=50,
                                histnorm='probability density',
                                opacity=0.75,
                                marginal='box',
                                title='Potassium and Sodium Distributions')
fig.write_image('potassium_sodium_plots.png')

Interactive options

graph_obj

create interactive map

# Manipulating the original dataframe 
df_countrydate = df_countries.groupby(['Date','Country']). sum().reset_index() 
#Creating the visualization 
fig = px.choropleth(df_countrydate, locations="Country", locationmode = "country names", color="AvTemp", hover_name="Country", animation_frame="Date" ) 
fig.update_layout( title_text = 'Average Temperature Change', title_x = 0.5, geo=dict( showframe = False, showcoastlines = False, )) 
fig.show()

Dash

built based on Flask (as backend), React(as frontend), and Plotly, ideal for building visualization apps. Features:

  • easy to build complicated UIs with multiple inputs, multiple outputs, and inputs that depend on other inputs
  • support multi-user with independent sessions drawbacks:
  • no intermediate values in the reactive graph
  • concurrency in callback is not possible
  • Cannot have callbacks with no Inputs or with no Output
structure
-Assets
-Components
-Layout
-Pages
-Utils
-Environments
-Cache-Directory

1) Assets Dash will automatically serve files (mages, Stylesheets, or Javascript files) in this folder 2) Components reusable components

import dash_html_components as html
from utils.functions import formatter_2_decimals

def make_dash_table(df):
    body = []
    header = []
    for column in df.columns:
        header.append(html.Th(column))

    for index, row in df.iterrows():
        html_row = []
        for i in range(len(row)):
            html_row.append(html.Td(formatter_2_decimals(row[i])))
        body.append(html.Tr(html_row))
    
    tHead = html.Thead(html.Tr(header))
    tBody = html.Tbody(body)
    table = html.Table([tHead, tBody])

    return table

3) Layout common layout including UI view and callback functions

#layout.py
import dash_html_components as html
import dash_core_components as dcc
from layout.sidebar.sidebar import sidebar
content = html.Div(id="page-content")
layout = html.Div([dcc.Location(id="url"), sidebar, content])

#view.py
import dash_bootstrap_components as dbc
import dash_html_components as html
from utils.constants import home_page_location, gdp_page_location, iris_page_location
 
# sidebar header consists of a title, and a toggle, the latter is hidden on large screens  
sidebar_header = dbc.Row(
    [
        dbc.Col(html.H2("Sidebar", className="display-4")),
        dbc.Col(
            [
                html.Button(
                    # use the Bootstrap navbar-toggler classes to style
                    html.Span(className="navbar-toggler-icon"),
                    className="navbar-toggler",
                    # the navbar-toggler classes don't set color
                    style={
                        "color": "rgba(0,0,0,.5)",
                        "border-color": "rgba(0,0,0,.1)",
                    },
                    id="navbar-toggle",
                ),
                html.Button(
                    # use the Bootstrap navbar-toggler classes to style
                    html.Span(className="navbar-toggler-icon"),
                    className="navbar-toggler",
                    # the navbar-toggler classes don't set color
                    style={
                        "color": "rgba(0,0,0,.5)",
                        "border-color": "rgba(0,0,0,.1)",
                    },
                    id="sidebar-toggle",
                ),
            ],
            # the column containing the toggle will be only as wide as the
            # toggle, resulting in the toggle being right aligned
            width="auto",
            # vertically align the toggle in the center
            align="center",
        ),
    ]
)

sidebar = html.Div(
    [
        sidebar_header,
        # we wrap the horizontal rule and short blurb in a div that can be
        # hidden on a small screen
        html.Div(
            [
                html.Hr(),
                html.P(
                    "A responsive sidebar layout with collapsible navigation "
                    "links.",
                    className="lead",
                ),
            ],
            id="blurb",
        ),
        # use the Collapse component to animate hiding / revealing links
        dbc.Collapse(
            dbc.Nav(
                [
                    dbc.NavLink("Home", href=home_page_location, active="exact"),
                    dbc.NavLink("GDP", href=gdp_page_location, active="exact"),
                    dbc.NavLink("Iris", href=iris_page_location, active="exact"),
                ],
                vertical=True,
                pills=True,
            ),
            id="collapse",
        ),
    ],
    id="sidebar",
)

#callback.py
from dash.dependencies import Input, Output, State
from app import app

@app.callback(
    Output("sidebar", "className"),
    [Input("sidebar-toggle", "n_clicks")],
    [State("sidebar", "className")],
)
def toggle_classname(n, classname):
    if n and classname == "":
        return "collapsed"
    return ""

@app.callback(
    Output("collapse", "is_open"),
    [Input("navbar-toggle", "n_clicks")],
    [State("collapse", "is_open")],
)
def toggle_collapse(n, is_open):
    if n:
        return not is_open
    return is_open    

4) Pages MVC architectural pattern for each page

#model.py
import pandas as pd
from app import cache
from utils.constants import TIMEOUT

@cache.memoize(timeout=TIMEOUT)
def query_data():
    # This could be an expensive data querying step
    gdp_data = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminderDataFiveYear.csv')
    return gdp_data.to_json(date_format='iso', orient='split')

def dataframe():
    return pd.read_json(query_data(), orient='split')

#view.py
import dash_core_components as dcc
import dash_html_components as html
from pages.gdp.gdp_data import dataframe

layout = html.Div([
    html.H1("GDP viewer"),
    html.Hr(),
    dcc.Graph(id='graph-with-slider'),
    dcc.Slider(
        id='year-slider',
        min=dataframe()['year'].min(),
        max=dataframe()['year'].max(),
        value=dataframe()['year'].min(),
        marks={str(year): str(year) for year in dataframe()['year'].unique()},
        step=None
    )
])

#controller.py
from dash.dependencies import Input, Output
import plotly.express as px
from app import app
from pages.gdp.gdp_data import dataframe

@app.callback(
    Output('graph-with-slider', 'figure'),
    Input('year-slider', 'value'))
def update_figure(selected_year):
    gdp_data = dataframe()
    filtered_df = gdp_data[gdp_data.year == selected_year]

    fig = px.scatter(filtered_df, x="gdpPercap", y="lifeExp",
                    size="pop", color="continent", hover_name="country",
                    log_x=True, size_max=55)

    fig.update_layout(transition_duration=500)

    return fig

5) Utils functions can be used for general purposes

6) Environments

#setting dev and prod environments
import os
from os.path import join, dirname
from dotenv import load_dotenv

dotenv_path = join(dirname(__file__), os.getenv("ENVIRONMENT_FILE"))
load_dotenv(dotenv_path=dotenv_path, override=True)

APP_HOST = os.environ.get("HOST")
APP_PORT = int(os.environ.get("PORT"))
APP_DEBUG = bool(os.environ.get("DEBUG"))
DEV_TOOLS_PROPS_CHECK = bool(os.environ.get("DEV_TOOLS_PROPS_CHECK"))

7) Cache-Directory use caches to store intermiediate results

from flask_caching import Cache
cache = Cache(app.server, config={
    'CACHE_TYPE': 'filesystem',
    'CACHE_DIR': 'cache-directory'
})

8) Routing

#route.py
import dash_bootstrap_components as dbc
import dash_html_components as html
from dash.dependencies import Input, Output

from app import app

from utils.constants import home_page_location, gdp_page_location, iris_page_location

from pages.home import home
from pages.gdp import gdp
from pages.iris import iris

@app.callback(
Output("page-content", "children"), 
Input("url", "pathname")
)
def render_page_content(pathname):
    if pathname == home_page_location:
        return home.layout
    elif pathname == gdp_page_location:
        return gdp.layout
    elif pathname == iris_page_location:
        return iris.layout
    # If the user tries to reach a different page, return a 404 message
    return dbc.Jumbotron(
        [
            html.H1("404: Not found", className="text-danger"),
            html.Hr(),
            html.P(f"The pathname {pathname} was not recognised..."),
        ]
    )

9) index.py and app.py

#app
import dash
import dash_bootstrap_components as dbc
from flask_caching import Cache
from utils.external_assets import FONT_AWSOME, CUSTOM_STYLE
from layout.layout import layout
import flask

server = flask.Flask(__name__) # define flask app.server
app = dash.Dash(
    __name__,
    server=server,
    suppress_callback_exceptions=True, 
    external_stylesheets=[
        dbc.themes.BOOTSTRAP,
        FONT_AWSOME,
        CUSTOM_STYLE
    ],
    meta_tags=[
        {"name": "viewport", "content": "width=device-width, initial-scale=1"}
    ]
)
cache = Cache(app.server, config={
    'CACHE_TYPE': 'filesystem',
    'CACHE_DIR': 'cache-directory'
})

app.layout = layout

#index.py
from app import app, server
from routes import render_page_content
from layout.sidebar.sidebar_callbacks import toggle_collapse, toggle_classname
from pages.gdp.gdp_callbacks import update_figure
from pages.iris.iris_callbacks import make_graph
from environment.settings import APP_HOST, APP_PORT, APP_DEBUG, DEV_TOOLS_PROPS_CHECK

if __name__ == "__main__":
    app.run_server(
        host=APP_HOST,
        port=APP_PORT,
        debug=APP_DEBUG,
        dev_tools_props_check=DEV_TOOLS_PROPS_CHECK
    )

Streamlit

streamlit run streamlit_example.py

#streamlit_example.py
import streamlit as st
import pandas as pd
import plotly.express as px


@st.cache()
def load_data():
    df = pd.read_csv(
        'https://github.com/chris1610/pbpython/blob/master/data/cereal_data.csv?raw=True'
    )
    return df


# Read in the cereal data
df = load_data()

st.title('Rating exploration')

# Only a subset of options make sense
x_options = [
    'calories', 'protein', 'fat', 'sodium', 'fiber', 'carbo', 'sugars',
    'potass'
]

# Allow use to choose
x_axis = st.sidebar.selectbox('Which value do you want to explore?', x_options)

# plot the value
fig = px.scatter(df,
                x=x_axis,
                y='rating',
                hover_name='name',
                title=f'Cereal ratings vs. {x_axis}')

st.plotly_chart(fig)

Datapane

Datapane is an API allow people to view visualization result and share result easily in the cloud.Deploying Python scripts and notebooks is also possible.
It supports:

  • Pandas
  • Plots from Python visualization libraries such as Plotly, Bokeh, Altair, and Folium
  • Markdown To install

    pip3 install datapane #generate report you need token datapane login –server=https://datapane.com/ –token=yourtoken

generate report

import pandas as pd
import altair as alt
import datapane as dp
df = pd.read_csv('https://query1.finance.yahoo.com/v7/finance/download/GOOG?period2=1585222905&interval=1mo&events=history')
chart = alt.Chart(df).encode(
    x='Date:T',
    y='Open'
).mark_line().interactive()
#generate report
r = dp.Report(
dp.Markdown('My simple report'), #add description to the report
dp.Table(df), #create a table
dp.Plot(chart) #create a chart
)

# Publish your report. Make sure to have visibility='PUBLIC' if you want to share your report
r.publish(name='stock_report', visibility='PUBLIC')

add interactive parameter in report

datapane script deploy --script=<your_python_script.py> --name=name_of_your_report

voila

voila can convert jupyter script into a standalone web application, it support ipywidgets library in jupyter

$ pip install voila
$ conda install voila -c conda-forge
#convert command
voila <jupyter_script> <command-line options>
#for example
voila test.ipynb --theme=dark --template=material

Create interactive widgets

slider

import ipywidgets as widgets
import pandas as pd
style = {'description_width': 'initial'}
limit_case = widgets.IntSlider(
    value=1000,
    min=100,
    max=5000,
    step=1,
    description='Max Number of Case:',
    disabled=False,
    style=style
)

def update_df_length(limit):   
    df = pd.read_csv('SF_crimes.csv')
    df = df.iloc[0:limit, :]
    print("Number of rows in the dataset that have been successfully loaded:"+str(len(df)))

#connect widgets with callback function
widgets.interactive(update_df_length, limit=limit_case)

multiple selection

import pandas as pd
import ipywidgets as widgets
from ipywidgets import Layout
df = pd.read_csv('SF_crimes.csv')
unique_district = df.PdDistrict.unique()
district = widgets.SelectMultiple(
    options = unique_district.tolist(),
    value = ['BAYVIEW', 'NORTHERN'],
    description='District',
    disabled=False,
    layout = Layout(width='50%', height='80px', display='flex')
)
district

intergrate widgets with map

import matplotlib.pyplot as plt
from folium import plugins

def update_map(district, category, limit):
    
    df = pd.read_csv('SF_crimes.csv')
    df = df.iloc[0:limit, :]
    
    latitude = 37.77
    longitude = -122.42
    
    df_dist = df.loc[df['PdDistrict'].isin(np.array(district))]
    df_category = df_dist.loc[df_dist['Category'].isin(np.array(category))]
    
    cat_unique = df_category['Category'].value_counts()
    cat_unique = cat_unique.reset_index()
    
    dist_unique = df_category['PdDistrict'].value_counts()
    dist_unique = dist_unique.reset_index()
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,10))

    # create map and display it
    sanfran_map = folium.Map(location=[latitude, longitude], zoom_start=12)

    
    incidents = plugins.MarkerCluster().add_to(sanfran_map)

# loop through the dataframe and add each data point to the mark cluster
    for lat, lng, label, in zip(df_category.Y, df_category.X, df_category.Category):
        folium.Marker(
        location=[lat, lng],
        icon=None,
        popup=label,
        ).add_to(incidents)
# show map
    display(sanfran_map)
    
    ax1.bar(cat_unique['index'], cat_unique['Category'])
    ax1.set_title('Amount of Criminal Case Based on Category')
    ax2.bar(dist_unique['index'], dist_unique['PdDistrict'])
    ax2.set_title('Amount of Criminal Case in Selected District')
    
    plt.show()

matplotlib

FuncAnimation

create dynamic plot with animator = ani.FuncAnimation(fig, chartfunc, interval = 100)

import matplotlib.animation as ani
import numpy as np
import matplotlib.pyplot as pltcolor = ['red', 'green', 'blue', 'orange']
fig = plt.figure()
plt.xticks(rotation=45, ha="right", rotation_mode="anchor") #rotate the x-axis values
plt.subplots_adjust(bottom = 0.2, top = 0.9) #ensuring the dates (on the x-axis) fit in the screen
plt.ylabel('No of Deaths')
plt.xlabel('Dates')

#define chartfunc
def buildmebarchart(i=int):
    plt.legend(df1.columns)
    p = plt.plot(df1[:i].index, df1[:i].values) #note it only returns the dataset, up to the point i
    for i in range(,4):
        p[i].set_color(color[i]) #set the colour of each curveimport matplotlib.animation as ani

#call funcanimation to make plot dynamic
animator = ani.FuncAnimation(fig, buildmebarchart, interval = 100)
plt.show()

Using LaTeX font

# import libraries
import numpy as np
import matplotlib.pyplot as plt
# adjust matplotlib parameters
plt.rcParams.update(plt.rcParamsDefault)
plt.rcParams['text.usetex'] = True
plt.rcParams['font.size'] = 18
plt.rcParams['font.family'] = "serif"
# create mock data
r = 15
theta = 5
rw = 12
gamma = 0.1
err = np.arange(0., r, .1)
z = np.where(err < rw, 0, gamma * (err-rw)**2 * np.sin(np.deg2rad(theta)))  
# visualize the data
plt.scatter(err, z, s = 10)
plt.title(r'$\Sigma(x) = \gamma x^2 \sin(\theta)$', pad = 20)
plt.xlabel('x')
plt.ylabel('$\phi$')
# save the plots
plt.savefig('latex.png', dpi = 300, pad_inches = .1, bbox_inches = 'tight')

zoom-in effect

# create one zoom in
fig = plt.figure()
# Set the random seed 
np.random.seed(100)
# Create mock data
x = np.random.normal(400, 50, 10_000)
y = np.random.normal(300, 50, 10_000)
c = np.random.rand(10_000)
# Create zoom-in plot
ax = plt.scatter(x, y, s = 5, c = c)
plt.xlim(400, 500)
plt.ylim(350, 400)
plt.xlabel('x', labelpad = 15)
plt.ylabel('y', labelpad = 15)
# Create zoom-out plot
ax_new = fig.add_axes([0.6, 0.6, 0.2, 0.2]) # the position of zoom-out plot compare to the ratio of zoom-in plot 
plt.scatter(x, y, s = 1, c = c)
# Save figure with nice margin
plt.savefig('zoom.png', dpi = 300, bbox_inches = 'tight', pad_inches = .1)

#create two zoom in 
# Define a function using lambda
stock = lambda A, amp, angle, phase: A * angle + amp * np.sin(angle + phase)
# Define parameters 
theta = np.linspace(0., 2 * np.pi, 250) # x-axis
np.random.seed(100)
noise = 0.2 * np.random.random(250)
y = stock(.1, .2, theta, 1.2) + noise # y-axis
# Create main container with size of 6x5
fig = plt.figure(figsize=(6, 5))
plt.subplots_adjust(bottom = 0., left = 0, top = 1., right = 1)
# Create first axes, the top-left plot with green plot
sub1 = fig.add_subplot(2,2,1) # two rows, two columns, fist cell
sub1.plot(theta, y, color = 'green')
sub1.set_xlim(1, 2)
sub1.set_ylim(0.2, .5)
sub1.set_ylabel('y', labelpad = 15)
# Create second axes, the top-left plot with orange plot
sub2 = fig.add_subplot(2,2,2) # two rows, two columns, second cell
sub2.plot(theta, y, color = 'orange')
sub2.set_xlim(5, 6)
sub2.set_ylim(.4, 1)
# Create third axes, a combination of third and fourth cell
sub3 = fig.add_subplot(2,2,(3,4)) # two rows, two colums, combined third and fourth cell
sub3.plot(theta, y, color = 'darkorchid', alpha = .7)
sub3.set_xlim(0, 6.5)
sub3.set_ylim(0, 1)
sub3.set_xlabel(r'$\theta$ (rad)', labelpad = 15)
sub3.set_ylabel('y', labelpad = 15)
# Create blocked area in third axes
sub3.fill_between((1,2), 0, 1, facecolor='green', alpha=0.2) # blocked area for first axes
sub3.fill_between((5,6), 0, 1, facecolor='orange', alpha=0.2) # blocked area for second axes
# Create left side of Connection patch for first axes
con1 = ConnectionPatch(xyA=(1, .2), coordsA=sub1.transData, 
                    xyB=(1, .3), coordsB=sub3.transData, color = 'green')
# Add left side to the figure
fig.add_artist(con1)
# Create right side of Connection patch for first axes
con2 = ConnectionPatch(xyA=(2, .2), coordsA=sub1.transData, 
                    xyB=(2, .3), coordsB=sub3.transData, color = 'green')
# Add right side to the figure
fig.add_artist(con2)
# Create left side of Connection patch for second axes
con3 = ConnectionPatch(xyA=(5, .4), coordsA=sub2.transData, 
                    xyB=(5, .5), coordsB=sub3.transData, color = 'orange')
# Add left side to the figure
fig.add_artist(con3)
# Create right side of Connection patch for second axes
con4 = ConnectionPatch(xyA=(6, .4), coordsA=sub2.transData, 
                    xyB=(6, .9), coordsB=sub3.transData, color = 'orange')
# Add right side to the figure
fig.add_artist(con4)
# Save figure with nice margin
plt.savefig('zoom_effect_2.png', dpi = 300, bbox_inches = 'tight', pad_inches = .1)

create outbox legend

plt.legend(bbox_to_anchor=(1.05, 1.04)) # position of the legend

Creating continuous error plots

plt.fill_between(x, upper_limit, lower_limit)

Adjusting box pad margin

using plt.savefig() followed by a complex argument: bbox_inches and pad_inches

Bokeh

Bokeh is a python library for interactive data visualization and web app

Glyphs

from bokeh.io import output_file, show
from bokeh.plotting import figure

plot = figure()
plot.line(x=[1,2,3,4], y=[4,7,2,16])
output_file('line_glyph.html')
plot.circle([1,2,3,4], [4,7,2,16], fill_color="white", size=10)
show(plot) ### Data Sources format compatible with data formats of lists, NumPy arrays and pandas DataFrames

ColumnDataSource

inbuilt data format in bokeh is ColumnDataSource. access data is by specifying its column name, and passing our CDS as the source parameter of a plotting method

from bokeh.models import ColumnDataSource
source_from_dict = ColumnDataSource(data={
                                'x' : [1,2,3,4,5],
                                'y' : [1,4,9,16,25]})
#or
source_from_df = ColumnDataSource(df)
plot = figure()
plot.line(x='Date', y='Open', source=source)
output_file('cds_glyph.html')
show(plot)

customization

customize plot tool

from bokeh.plotting import figure, show
p = figure(toolbar_location="left",
    tools="pan,wheel_zoom,lasso_select,tap,undo,reset")
show(p)

HoverTools

Field names are prepended with @, plot metadata are prepened with $

from bokeh.models import HoverTool
hover = HoverTool(tooltips=[
        ("index", "$index"),
        ("label", "@column_name"),
        ("label2", "@{multi-word column name}"),
        ("colour", "$color[hex, swatch]:fill_color")])

color mapping

mapper = linear_cmap(field_name='y', palette=Spectral6, low=min(y), high=max(y))
plot = figure(plot_height=300, plot_width=600)
plot.circle(x,y, color=mapper, line_color='black', size=10)

annotations

p = figure(
    plot_width=500,
    plot_height=500,
    x_axis_label="X Label",
    y_axis_label="Y Label",
    title="My custom plot",
    toolbar_location="left",
    tools="pan,wheel_zoom,reset")
show(p)

arrange plots

layout = row(p1, p2, p3)
#or 
layout = column(p1, p2, p3)
#or
layout = gridplot([None, p1], [p2, p3], toolbar_location=None)
# add tab
first = Panel(child=row(p1, p2), title='first')
second = Panel(child=row(p3), title='second')
tabs = Tabs(tabs=[first, second])
# link plot with same data source
p3.x_range = p2.x_range = p1.x_range
p3.y_range = p2.y_range = p1.y_range
show(layout)

Pygal

#pip install pygal
import pygal
bar_chart = pygal.Bar() 
bar_chart.title = 'NBA data'

#add data
bar_chart.add('qw', 38387) 
bar_chart.add('wq', 36928) 
bar_chart.add('eq', 34384) 
bar_chart.add('eqw', 33643) 
bar_chart.add('fsdf', 32292) 
bar_chart.render_in_browser()
bar_chart.render_to_file('NBA.svg')