Ch. 9 Beyond `matplotlib`

Last update: Thu Nov 19 17:20:43 2020 -0600 (49b93b1)

library(reticulate)
reticulate::use_condaenv("r-python")

9.1 `brokenaxis`

9.1.1 Usage

Python

# https://github.com/bendichter/brokenaxes/blob/master/examples/plot_usage.py
import matplotlib.pyplot as plt
from brokenaxes import brokenaxes
import numpy as np

fig = plt.figure(figsize=(5,2))
bax = brokenaxes(xlims=((0, .1), (.4, .7)), ylims=((-1, .7), (.79, 1)), hspace=.05)
x = np.linspace(0, 1, 100)
bax.plot(x, np.sin(10 * x), label='sin')
bax.plot(x, np.cos(10 * x), label='cos')
bax.legend(loc=3)
bax.set_xlabel('time')
bax.set_ylabel('value')
plt.show()

9.1.2 Subplots

Python

# https://github.com/bendichter/brokenaxes/blob/master/examples/plot_subplots.py
from brokenaxes import brokenaxes
from matplotlib.gridspec import GridSpec
import numpy as np

sps1, sps2 = GridSpec(2,1)

bax = brokenaxes(xlims=((.1, .3),(.7, .8)), subplot_spec=sps1)
x = np.linspace(0, 1, 100)
bax.plot(x, np.sin(x*30), ls=':', color='m')
x = np.random.poisson(3, 1000)
bax = brokenaxes(xlims=((0, 2.5), (3, 6)), subplot_spec=sps2)
bax.hist(x, histtype='bar')
plt.show()

9.1.3 Log scales

Python

# https://github.com/bendichter/brokenaxes/blob/master/examples/plot_logscales.py
# Log scales
# ==========
# Brokenaxe compute automatically the correct layout for a 1:1 scale. However, for
# logarithmic scales, the 1:1 scale has to be adapted. This is done via the
# `yscale` or `xscale` arguments.
import matplotlib.pyplot as plt
from brokenaxes import brokenaxes
import numpy as np

fig = plt.figure(figsize=(5,5))
bax = brokenaxes(xlims=((1, 500), (600, 10000)),
         ylims=((1, 500), (600, 10000)),
         hspace=.15, xscale='log', yscale='log')

x = np.logspace(0.0, 4, 100)
bax.loglog(x, x, label='$y=x=10^{0}$ to $10^{4}$')
bax.legend(loc='best')
bax.grid(axis='both', which='major', ls='-')
bax.grid(axis='both', which='minor', ls='--', alpha=0.4)
bax.set_xlabel('x')
bax.set_ylabel('y')
plt.show()

9.1.4 Different scales

Python

# https://github.com/bendichter/brokenaxes/blob/master/examples/plot_different_scales.py
# Different scales with brokenaxes
# ================================
# This example shows how to customize the scales and the ticks of each broken
# axes.
#############################################################################
# brokenaxes lets you choose the aspect ratio of each sub-axes thanks to the
# `height_ratios` and `width_ratios` to over-pass the default 1:1 scale for all
# axes. However, by default the ticks spacing are still identical for all axes.
# In this example, we present how to customize the ticks of your brokenaxes.
import numpy as np
import matplotlib.pyplot as plt
from brokenaxes import brokenaxes
import matplotlib.ticker as ticker

def make_plot():
    x = np.linspace(0, 5*2*np.pi, 300)
    y1 = np.sin(x)*100
    y2 = np.sin(x+np.pi)*5 + 90
    y3 = 30*np.exp(-x) - 50
    y4 = 90 + (1-np.exp(6/x))

    bax = brokenaxes(
        ylims=[(-100, 0), (80, 100)],
        xlims=[(0, 5), (10, 30)],
        height_ratios=[1, 3],
        width_ratios=[3, 5]
    )

    bax.plot(x, y1, label="Big sin")
    bax.plot(x, y2, label="Small sin")
    bax.plot(x, y3, label="Exponential 1")
    bax.plot(x, y4, '--', label="Exponential 2")

    bax.legend(loc="lower right")
    bax.set_title("Example for different scales for the x and y axis")

    return bax

#############################################################################
# Use the AutoLocator() ticker
# ----------------------------
plt.figure()
bax = make_plot()

# Then, we get the different axes created and set the ticks according to the
# axe x and y limits.
for i, ax in enumerate(bax.last_row):
    ax.xaxis.set_major_locator(ticker.AutoLocator())
    ax.set_xlabel('xscale {i}'.format(i=i))
for i, ax in enumerate(bax.first_col):
    ax.yaxis.set_major_locator(ticker.AutoLocator())
    ax.set_ylabel('yscale {i}'.format(i=i))

##############################################################################
# .. note:: It is not necessary to loop through all the axes since they all
#      share the same x and y limits in a given column or row.


##############################################################################
# Manually set the ticks
# ----------------------
# Since brokenaxes return normal matplotlib axes, you could also set them
# manually.
fig2 = plt.figure()
bax = make_plot()
bax.first_col[0].set_yticks([80, 85, 90, 95, 100])
bax.first_col[1].set_yticks([-100, -50, 0])
bax.last_row[0].set_xticks([0, 1, 2, 3, 4, 5])
bax.last_row[1].set_xticks([10, 20, 30])
plt.show()

9.2 `yellowbrick`

9.2.1 Pearson correlation

Python

# https://www.scikit-yb.org/en/latest/quickstart.html
import pandas as pd
from yellowbrick.datasets import load_bikeshare
X, y = load_bikeshare()
print(X.head())

#:>    season  year  month  hour  holiday  weekday  workingday  weather  temp  feelslike  humidity  windspeed
#:> 0       1     0      1     0        0        6           0        1  0.24     0.2879      0.81        0.0
#:> 1       1     0      1     1        0        6           0        1  0.22     0.2727      0.80        0.0
#:> 2       1     0      1     2        0        6           0        1  0.22     0.2727      0.80        0.0
#:> 3       1     0      1     3        0        6           0        1  0.24     0.2879      0.75        0.0
#:> 4       1     0      1     4        0        6           0        1  0.24     0.2879      0.75        0.0

Python

from yellowbrick.features import Rank2D
visualizer = Rank2D(algorithm="pearson")
visualizer.fit_transform(X)
visualizer.show()

9.2.2 Scatter diagram

Python

# https://www.scikit-yb.org/en/latest//quickstart-2.py
from yellowbrick.features import JointPlotVisualizer
visualizer = JointPlotVisualizer(columns=['temp', 'feelslike'])
visualizer.fit_transform(X, y)
visualizer.show()

9.2.3 Residuals plot

Python

# https://www.scikit-yb.org/en/latest//quickstart-3.py
from yellowbrick.regressor import ResidualsPlot
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# Create training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.1
)
visualizer = ResidualsPlot(LinearRegression())
visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.show()

9.2.4 Discrimination threshold

Python

from yellowbrick.classifier import discrimination_threshold
from sklearn.linear_model import LogisticRegression
from yellowbrick.datasets import load_spam

X, y = load_spam()
visualizer = discrimination_threshold(
    LogisticRegression(multi_class="auto", solver="liblinear"), X, y
)

9.2.5 Intercluster distance

Python

# https://www.scikit-yb.org/en/latest//oneliners-17.py
from yellowbrick.datasets import load_nfl
from sklearn.cluster import MiniBatchKMeans
from yellowbrick.cluster import intercluster_distance

X, y = load_nfl()
visualizer = intercluster_distance(MiniBatchKMeans(5, random_state=777), X)

8 Advanced

Ch. 9 Beyond matplotlib

9.1 brokenaxis

9.1.1 Usage

9.1.2 Subplots

9.1.3 Log scales

9.1.4 Different scales

9.2 yellowbrick

9.2.1 Pearson correlation

9.2.2 Scatter diagram

9.2.3 Residuals plot

9.2.4 Discrimination threshold

9.2.5 Intercluster distance

Ch. 9 Beyond `matplotlib`

9.1 `brokenaxis`

9.2 `yellowbrick`