Ch. 9 Beyond matplotlib

Last update: Thu Nov 19 17:20:43 2020 -0600 (49b93b1)

R

library(reticulate)
reticulate::use_condaenv("r-python")

9.1 brokenaxis

9.1.1 Usage

Python

# https://github.com/bendichter/brokenaxes/blob/master/examples/plot_usage.py
import matplotlib.pyplot as plt
from brokenaxes import brokenaxes
import numpy as np

fig = plt.figure(figsize=(5,2))
bax = brokenaxes(xlims=((0, .1), (.4, .7)), ylims=((-1, .7), (.79, 1)), hspace=.05)
x = np.linspace(0, 1, 100)
bax.plot(x, np.sin(10 * x), label='sin')
bax.plot(x, np.cos(10 * x), label='cos')
bax.legend(loc=3)
bax.set_xlabel('time')
bax.set_ylabel('value')
plt.show()

9.1.2 Subplots

Python

# https://github.com/bendichter/brokenaxes/blob/master/examples/plot_subplots.py
from brokenaxes import brokenaxes
from matplotlib.gridspec import GridSpec
import numpy as np

sps1, sps2 = GridSpec(2,1)

bax = brokenaxes(xlims=((.1, .3),(.7, .8)), subplot_spec=sps1)
x = np.linspace(0, 1, 100)
bax.plot(x, np.sin(x*30), ls=':', color='m')
x = np.random.poisson(3, 1000)
bax = brokenaxes(xlims=((0, 2.5), (3, 6)), subplot_spec=sps2)
bax.hist(x, histtype='bar')
plt.show()

9.1.3 Log scales

Python

# https://github.com/bendichter/brokenaxes/blob/master/examples/plot_logscales.py
# Log scales
# ==========
# Brokenaxe compute automatically the correct layout for a 1:1 scale. However, for
# logarithmic scales, the 1:1 scale has to be adapted. This is done via the
# `yscale` or `xscale` arguments.
import matplotlib.pyplot as plt
from brokenaxes import brokenaxes
import numpy as np

fig = plt.figure(figsize=(5,5))
bax = brokenaxes(xlims=((1, 500), (600, 10000)),
         ylims=((1, 500), (600, 10000)),
         hspace=.15, xscale='log', yscale='log')

x = np.logspace(0.0, 4, 100)
bax.loglog(x, x, label='$y=x=10^{0}$ to $10^{4}$')
bax.legend(loc='best')
bax.grid(axis='both', which='major', ls='-')
bax.grid(axis='both', which='minor', ls='--', alpha=0.4)
bax.set_xlabel('x')
bax.set_ylabel('y')
plt.show()

9.1.4 Different scales

Python

# https://github.com/bendichter/brokenaxes/blob/master/examples/plot_different_scales.py
# Different scales with brokenaxes
# ================================
# This example shows how to customize the scales and the ticks of each broken
# axes.
#############################################################################
# brokenaxes lets you choose the aspect ratio of each sub-axes thanks to the
# `height_ratios` and `width_ratios` to over-pass the default 1:1 scale for all
# axes. However, by default the ticks spacing are still identical for all axes.
# In this example, we present how to customize the ticks of your brokenaxes.
import numpy as np
import matplotlib.pyplot as plt
from brokenaxes import brokenaxes
import matplotlib.ticker as ticker

def make_plot():
    x = np.linspace(0, 5*2*np.pi, 300)
    y1 = np.sin(x)*100
    y2 = np.sin(x+np.pi)*5 + 90
    y3 = 30*np.exp(-x) - 50
    y4 = 90 + (1-np.exp(6/x))

    bax = brokenaxes(
        ylims=[(-100, 0), (80, 100)],
        xlims=[(0, 5), (10, 30)],
        height_ratios=[1, 3],
        width_ratios=[3, 5]
    )

    bax.plot(x, y1, label="Big sin")
    bax.plot(x, y2, label="Small sin")
    bax.plot(x, y3, label="Exponential 1")
    bax.plot(x, y4, '--', label="Exponential 2")

    bax.legend(loc="lower right")
    bax.set_title("Example for different scales for the x and y axis")

    return bax

#############################################################################
# Use the AutoLocator() ticker
# ----------------------------
plt.figure()
bax = make_plot()

# Then, we get the different axes created and set the ticks according to the
# axe x and y limits.
for i, ax in enumerate(bax.last_row):
    ax.xaxis.set_major_locator(ticker.AutoLocator())
    ax.set_xlabel('xscale {i}'.format(i=i))
for i, ax in enumerate(bax.first_col):
    ax.yaxis.set_major_locator(ticker.AutoLocator())
    ax.set_ylabel('yscale {i}'.format(i=i))

##############################################################################
# .. note:: It is not necessary to loop through all the axes since they all
#      share the same x and y limits in a given column or row.


##############################################################################
# Manually set the ticks
# ----------------------
# Since brokenaxes return normal matplotlib axes, you could also set them
# manually.
fig2 = plt.figure()
bax = make_plot()
bax.first_col[0].set_yticks([80, 85, 90, 95, 100])
bax.first_col[1].set_yticks([-100, -50, 0])
bax.last_row[0].set_xticks([0, 1, 2, 3, 4, 5])
bax.last_row[1].set_xticks([10, 20, 30])
plt.show()

9.2 yellowbrick

9.2.1 Pearson correlation

Python

# https://www.scikit-yb.org/en/latest/quickstart.html
import pandas as pd
from yellowbrick.datasets import load_bikeshare
X, y = load_bikeshare()
print(X.head())
#:>    season  year  month  hour  holiday  weekday  workingday  weather  temp  feelslike  humidity  windspeed
#:> 0       1     0      1     0        0        6           0        1  0.24     0.2879      0.81        0.0
#:> 1       1     0      1     1        0        6           0        1  0.22     0.2727      0.80        0.0
#:> 2       1     0      1     2        0        6           0        1  0.22     0.2727      0.80        0.0
#:> 3       1     0      1     3        0        6           0        1  0.24     0.2879      0.75        0.0
#:> 4       1     0      1     4        0        6           0        1  0.24     0.2879      0.75        0.0

Python

from yellowbrick.features import Rank2D
visualizer = Rank2D(algorithm="pearson")
visualizer.fit_transform(X)
visualizer.show()

9.2.2 Scatter diagram

Python

# https://www.scikit-yb.org/en/latest//quickstart-2.py
from yellowbrick.features import JointPlotVisualizer
visualizer = JointPlotVisualizer(columns=['temp', 'feelslike'])
visualizer.fit_transform(X, y)
visualizer.show()

9.2.3 Residuals plot

Python

# https://www.scikit-yb.org/en/latest//quickstart-3.py
from yellowbrick.regressor import ResidualsPlot
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# Create training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.1
)
visualizer = ResidualsPlot(LinearRegression())
visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.show()

9.2.4 Discrimination threshold

Python

from yellowbrick.classifier import discrimination_threshold
from sklearn.linear_model import LogisticRegression
from yellowbrick.datasets import load_spam

X, y = load_spam()
visualizer = discrimination_threshold(
    LogisticRegression(multi_class="auto", solver="liblinear"), X, y
)

9.2.5 Intercluster distance

Python

# https://www.scikit-yb.org/en/latest//oneliners-17.py
from yellowbrick.datasets import load_nfl
from sklearn.cluster import MiniBatchKMeans
from yellowbrick.cluster import intercluster_distance

X, y = load_nfl()
visualizer = intercluster_distance(MiniBatchKMeans(5, random_state=777), X)