Ch. 9 Beyond matplotlib
Last update: Thu Nov 19 17:20:43 2020 -0600 (49b93b1)
R
library(reticulate)
reticulate::use_condaenv("r-python")
9.1 brokenaxis
9.1.1 Usage
Python
# https://github.com/bendichter/brokenaxes/blob/master/examples/plot_usage.py
import matplotlib.pyplot as plt
from brokenaxes import brokenaxes
import numpy as np
= plt.figure(figsize=(5,2))
fig = brokenaxes(xlims=((0, .1), (.4, .7)), ylims=((-1, .7), (.79, 1)), hspace=.05)
bax = np.linspace(0, 1, 100)
x 10 * x), label='sin')
bax.plot(x, np.sin(10 * x), label='cos')
bax.plot(x, np.cos(=3)
bax.legend(loc'time')
bax.set_xlabel('value')
bax.set_ylabel( plt.show()
9.1.2 Subplots
Python
# https://github.com/bendichter/brokenaxes/blob/master/examples/plot_subplots.py
from brokenaxes import brokenaxes
from matplotlib.gridspec import GridSpec
import numpy as np
= GridSpec(2,1)
sps1, sps2
= brokenaxes(xlims=((.1, .3),(.7, .8)), subplot_spec=sps1)
bax = np.linspace(0, 1, 100)
x *30), ls=':', color='m')
bax.plot(x, np.sin(x= np.random.poisson(3, 1000)
x = brokenaxes(xlims=((0, 2.5), (3, 6)), subplot_spec=sps2)
bax ='bar')
bax.hist(x, histtype plt.show()
9.1.3 Log scales
Python
# https://github.com/bendichter/brokenaxes/blob/master/examples/plot_logscales.py
# Log scales
# ==========
# Brokenaxe compute automatically the correct layout for a 1:1 scale. However, for
# logarithmic scales, the 1:1 scale has to be adapted. This is done via the
# `yscale` or `xscale` arguments.
import matplotlib.pyplot as plt
from brokenaxes import brokenaxes
import numpy as np
= plt.figure(figsize=(5,5))
fig = brokenaxes(xlims=((1, 500), (600, 10000)),
bax =((1, 500), (600, 10000)),
ylims=.15, xscale='log', yscale='log')
hspace
= np.logspace(0.0, 4, 100)
x ='$y=x=10^{0}$ to $10^{4}$')
bax.loglog(x, x, label='best')
bax.legend(loc='both', which='major', ls='-')
bax.grid(axis='both', which='minor', ls='--', alpha=0.4)
bax.grid(axis'x')
bax.set_xlabel('y')
bax.set_ylabel( plt.show()
9.1.4 Different scales
Python
# https://github.com/bendichter/brokenaxes/blob/master/examples/plot_different_scales.py
# Different scales with brokenaxes
# ================================
# This example shows how to customize the scales and the ticks of each broken
# axes.
#############################################################################
# brokenaxes lets you choose the aspect ratio of each sub-axes thanks to the
# `height_ratios` and `width_ratios` to over-pass the default 1:1 scale for all
# axes. However, by default the ticks spacing are still identical for all axes.
# In this example, we present how to customize the ticks of your brokenaxes.
import numpy as np
import matplotlib.pyplot as plt
from brokenaxes import brokenaxes
import matplotlib.ticker as ticker
def make_plot():
= np.linspace(0, 5*2*np.pi, 300)
x = np.sin(x)*100
y1 = np.sin(x+np.pi)*5 + 90
y2 = 30*np.exp(-x) - 50
y3 = 90 + (1-np.exp(6/x))
y4
= brokenaxes(
bax =[(-100, 0), (80, 100)],
ylims=[(0, 5), (10, 30)],
xlims=[1, 3],
height_ratios=[3, 5]
width_ratios
)
="Big sin")
bax.plot(x, y1, label="Small sin")
bax.plot(x, y2, label="Exponential 1")
bax.plot(x, y3, label'--', label="Exponential 2")
bax.plot(x, y4,
="lower right")
bax.legend(loc"Example for different scales for the x and y axis")
bax.set_title(
return bax
#############################################################################
# Use the AutoLocator() ticker
# ----------------------------
plt.figure()= make_plot()
bax
# Then, we get the different axes created and set the ticks according to the
# axe x and y limits.
for i, ax in enumerate(bax.last_row):
ax.xaxis.set_major_locator(ticker.AutoLocator())'xscale {i}'.format(i=i))
ax.set_xlabel(for i, ax in enumerate(bax.first_col):
ax.yaxis.set_major_locator(ticker.AutoLocator())'yscale {i}'.format(i=i))
ax.set_ylabel(
##############################################################################
# .. note:: It is not necessary to loop through all the axes since they all
# share the same x and y limits in a given column or row.
##############################################################################
# Manually set the ticks
# ----------------------
# Since brokenaxes return normal matplotlib axes, you could also set them
# manually.
= plt.figure()
fig2 = make_plot()
bax 0].set_yticks([80, 85, 90, 95, 100])
bax.first_col[1].set_yticks([-100, -50, 0])
bax.first_col[0].set_xticks([0, 1, 2, 3, 4, 5])
bax.last_row[1].set_xticks([10, 20, 30])
bax.last_row[ plt.show()
9.2 yellowbrick
9.2.1 Pearson correlation
Python
# https://www.scikit-yb.org/en/latest/quickstart.html
import pandas as pd
from yellowbrick.datasets import load_bikeshare
= load_bikeshare()
X, y print(X.head())
#:> season year month hour holiday weekday workingday weather temp feelslike humidity windspeed
#:> 0 1 0 1 0 0 6 0 1 0.24 0.2879 0.81 0.0
#:> 1 1 0 1 1 0 6 0 1 0.22 0.2727 0.80 0.0
#:> 2 1 0 1 2 0 6 0 1 0.22 0.2727 0.80 0.0
#:> 3 1 0 1 3 0 6 0 1 0.24 0.2879 0.75 0.0
#:> 4 1 0 1 4 0 6 0 1 0.24 0.2879 0.75 0.0
Python
from yellowbrick.features import Rank2D
= Rank2D(algorithm="pearson")
visualizer
visualizer.fit_transform(X) visualizer.show()
9.2.2 Scatter diagram
Python
# https://www.scikit-yb.org/en/latest//quickstart-2.py
from yellowbrick.features import JointPlotVisualizer
= JointPlotVisualizer(columns=['temp', 'feelslike'])
visualizer
visualizer.fit_transform(X, y) visualizer.show()
9.2.3 Residuals plot
Python
# https://www.scikit-yb.org/en/latest//quickstart-3.py
from yellowbrick.regressor import ResidualsPlot
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
# Create training and test sets
= train_test_split(
X_train, X_test, y_train, y_test =0.1
X, y, test_size
)= ResidualsPlot(LinearRegression())
visualizer
visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test) visualizer.show()
9.2.4 Discrimination threshold
Python
from yellowbrick.classifier import discrimination_threshold
from sklearn.linear_model import LogisticRegression
from yellowbrick.datasets import load_spam
= load_spam()
X, y = discrimination_threshold(
visualizer ="auto", solver="liblinear"), X, y
LogisticRegression(multi_class )
9.2.5 Intercluster distance
Python
# https://www.scikit-yb.org/en/latest//oneliners-17.py
from yellowbrick.datasets import load_nfl
from sklearn.cluster import MiniBatchKMeans
from yellowbrick.cluster import intercluster_distance
= load_nfl()
X, y = intercluster_distance(MiniBatchKMeans(5, random_state=777), X) visualizer