Ch. 1 Introduction
Last update: Thu Nov 19 17:20:43 2020 -0600 (49b93b1)
Remember that in the previous chapter we said that the best way of obtaining reproducible results writing Python code in Rmarkdown is creating stand-alone Python environments. The next code block is written in R and what is doing with reticulate::use_condaenv("r-python")
, is activating the Python environment r-python
to be used by Python to build the notebooks written in Rmarkdown . Later we will see how to create these environments.
R
library(reticulate)
reticulate::use_condaenv("r-python")
1.1 “Hello world”
The environment r-python
already has the basic Python libraries, among them numpy
and matplotlib
. This is one of the simplest of examples: plotting the sine of a random numpy
array.
Python
import matplotlib.pyplot as plt
import numpy as np
= np.arange(0.0, 2.0, 0.01)
t = 1 + np.sin(2*np.pi*t)
s
plt.plot(t, s)
'time (s)')
plt.xlabel('voltage (mV)')
plt.ylabel('About as simple as it gets, folks')
plt.title(True)
plt.grid("test.png")
plt.savefig( plt.show()
1.2 The parts of a plot
I love this plot because it helps to formulate the right question when you are looking for online assistance. Sooner of later, you will be in need to customize the \(x\) or \(y\) axis ticks in such a way that present specific data points and skip the defaults. Or, get rid of so many $x$ axis labels that are superimposing one over each other.
Python
# https://matplotlib.org/gallery/showcase/anatomy.html#sphx-glr-gallery-showcase-anatomy-py
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import AutoMinorLocator, MultipleLocator, FuncFormatter
19680801)
np.random.seed(
= np.linspace(0.5, 3.5, 100)
X = 3+np.cos(X)
Y1 = 1+np.cos(1+X/0.75)/2
Y2 = np.random.uniform(Y1, Y2, len(X))
Y3
= plt.figure(figsize=(8, 8))
fig = fig.add_subplot(1, 1, 1, aspect=1)
ax
def minor_tick(x, pos):
if not x % 1.0:
return ""
return "%.2f" % x
1.000))
ax.xaxis.set_major_locator(MultipleLocator(4))
ax.xaxis.set_minor_locator(AutoMinorLocator(1.000))
ax.yaxis.set_major_locator(MultipleLocator(4))
ax.yaxis.set_minor_locator(AutoMinorLocator(
ax.xaxis.set_minor_formatter(FuncFormatter(minor_tick))
0, 4)
ax.set_xlim(#:> (0.0, 4.0)
0, 4)
ax.set_ylim(#:> (0.0, 4.0)
='major', width=1.0)
ax.tick_params(which='major', length=10)
ax.tick_params(which='minor', width=1.0, labelsize=10)
ax.tick_params(which='minor', length=5, labelsize=10, labelcolor='0.25')
ax.tick_params(which
="--", linewidth=0.5, color='.25', zorder=-10)
ax.grid(linestyle
=(0.25, 0.25, 1.00), lw=2, label="Blue signal", zorder=10)
ax.plot(X, Y1, c=(1.00, 0.25, 0.25), lw=2, label="Red signal")
ax.plot(X, Y2, c=0,
ax.plot(X, Y3, linewidth='o', markerfacecolor='w', markeredgecolor='k')
marker
"Anatomy of a figure", fontsize=20, verticalalignment='bottom')
ax.set_title("X axis label")
ax.set_xlabel("Y axis label")
ax.set_ylabel(
ax.legend()
def circle(x, y, radius=0.15):
from matplotlib.patches import Circle
from matplotlib.patheffects import withStroke
= Circle((x, y), radius, clip_on=False, zorder=10, linewidth=1,
circle ='black', facecolor=(0, 0, 0, .0125),
edgecolor=[withStroke(linewidth=5, foreground='w')])
path_effects
ax.add_artist(circle)
def text(x, y, text):
="white",
ax.text(x, y, text, backgroundcolor='center', va='top', weight='bold', color='blue')
ha
# Minor tick
0.50, -0.10)
circle(0.50, -0.32, "Minor tick label")
text(
# Major tick
-0.03, 4.00)
circle(0.03, 3.80, "Major tick")
text(
# Minor tick
0.00, 3.50)
circle(0.00, 3.30, "Minor tick")
text(
# Major tick label
-0.15, 3.00)
circle(-0.15, 2.80, "Major tick label")
text(
# X Label
1.80, -0.27)
circle(1.80, -0.45, "X axis label")
text(
# Y Label
-0.27, 1.80)
circle(-0.27, 1.6, "Y axis label")
text(
# Title
1.60, 4.13)
circle(1.60, 3.93, "Title")
text(
# Blue plot
1.75, 2.80)
circle(1.75, 2.60, "Line\n(line plot)")
text(
# Red plot
1.20, 0.60)
circle(1.20, 0.40, "Line\n(line plot)")
text(
# Scatter plot
3.20, 1.75)
circle(3.20, 1.55, "Markers\n(scatter plot)")
text(
# Grid
3.00, 3.00)
circle(3.00, 2.80, "Grid")
text(
# Legend
3.70, 3.80)
circle(3.70, 3.60, "Legend")
text(
# Axes
0.5, 0.5)
circle(0.5, 0.3, "Axes")
text(
# Figure
-0.3, 0.65)
circle(-0.3, 0.45, "Figure")
text(
= 'blue'
color 'Spines', xy=(4.0, 0.35), xycoords='data',
ax.annotate(=(3.3, 0.5), textcoords='data',
xytext='bold', color=color,
weight=dict(arrowstyle='->',
arrowprops="arc3",
connectionstyle=color))
color
'', xy=(3.15, 0.0), xycoords='data',
ax.annotate(=(3.45, 0.45), textcoords='data',
xytext='bold', color=color,
weight=dict(arrowstyle='->',
arrowprops="arc3",
connectionstyle=color))
color
4.0, -0.4, "Made with http://matplotlib.org",
ax.text(=10, ha="right", color='.5')
fontsize
plt.show()
1.3 Can do business plots too …
Not precisely the kind of plots I am interested in right now, all the kinds of business plots are available in matplotlib
, including the infamous pie chart.
Python
import numpy as np
import matplotlib.pyplot as plt
'ggplot')
plt.style.use(
= plt.subplots(ncols=2, nrows=2)
fig, axes = axes.ravel()
ax1, ax2, ax3, ax4
# scatter plot (Note: `plt.scatter` doesn't use default colors)
= np.random.normal(size=(2, 200))
x, y 'o')
ax1.plot(x, y,
# sinusoidal lines with colors from default color cycle
= 2*np.pi
L = np.linspace(0, L)
x = len(plt.rcParams['axes.prop_cycle'])
ncolors = np.linspace(0, L, ncolors, endpoint=False)
shift for s in shift:
+ s), '-')
ax2.plot(x, np.sin(x 0)
ax2.margins(
# bar graphs
= np.arange(5)
x = np.random.randint(1, 25, size=(2, 5))
y1, y2 = 0.25
width
ax3.bar(x, y1, width)+ width, y2, width,
ax3.bar(x =list(plt.rcParams['axes.prop_cycle'])[2]['color'])
color+ width)
ax3.set_xticks(x 'a', 'b', 'c', 'd', 'e'])
ax3.set_xticklabels([
# circles with colors from default color cycle
for i, color in enumerate(plt.rcParams['axes.prop_cycle']):
= np.random.normal(size=2)
xy =0.3, color=color['color']))
ax4.add_patch(plt.Circle(xy, radius'equal')
ax4.axis(0)
ax4.margins( plt.show()
1.4 And real time …
Strip charts are the favorites for plotting real time data, from sensors, or from any other internet source.
Python
# https://matplotlib.org/gallery/lines_bars_and_markers/cohere.html#sphx-glr-gallery-lines-bars-and-markers-cohere-py
import numpy as np
import matplotlib.pyplot as plt
# Fixing random state for reproducibility
19680801)
np.random.seed(
= 0.01
dt = np.arange(0, 30, dt)
t = np.random.randn(len(t)) # white noise 1
nse1 = np.random.randn(len(t)) # white noise 2
nse2
# Two signals with a coherent part at 10Hz and a random part
= np.sin(2 * np.pi * 10 * t) + nse1
s1 = np.sin(2 * np.pi * 10 * t) + nse2
s2
= plt.subplots(2, 1)
fig, axs 0].plot(t, s1, t, s2)
axs[0].set_xlim(0, 2)
axs[0].set_xlabel('time')
axs[0].set_ylabel('s1 and s2')
axs[0].grid(True)
axs[
= axs[1].cohere(s1, s2, 256, 1. / dt)
cxy, f 1].set_ylabel('coherence')
axs[
fig.tight_layout() plt.show()
1.5 And also 3D …
Although in data science 3D plots are not recommended, if there is a compelling case where a 3D plot explains a discovery better than a 2D plot, then, it should be okay and justified. But the rule is not abusing of 3D. What you are trying to convey is information per square centimeter of graphics.
Python
# https://matplotlib.org/2.0.2/examples/mplot3d/contour3d_demo.html
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
from matplotlib import cm
= plt.figure()
fig = fig.add_subplot(111, projection='3d')
ax = axes3d.get_test_data(0.05)
X, Y, Z = ax.contour(X, Y, Z, cmap=cm.coolwarm)
cset =9, inline=1)
ax.clabel(cset, fontsize
plt.show()