In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

%load_ext version_information
%load_ext rpy2.ipython

Customizing Plots

In [2]:
url = 'http://bit.ly/2b72LNj'
df = pd.read_csv(url)
In [3]:
df.head()
Out[3]:
model mpg cyl disp hp drat wt qsec vs am gear carb
0 Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
1 Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
2 Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
3 Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
4 Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2

Customizing matplotllib graphics

In [4]:
plt.plot('wt', 'mpg', '-o', data = df.sort_values('wt'))
pass
_images/Customizing_Plots_5_0.png

Adding labels

In [5]:
plt.plot('wt', 'mpg', '-o', data = df.sort_values('wt'))
plt.title('MPG versus Weight')
plt.xlabel('Weight')
plt.ylabel('MPG')
pass
_images/Customizing_Plots_7_0.png

Changing Axes Limits

In [6]:
plt.plot('wt', 'mpg', '-o', data = df.sort_values('wt'))
plt.xlim([1, 6])
plt.ylim([0, 40])

pass
_images/Customizing_Plots_9_0.png

Changing coordinate systems

In [7]:
plt.plot('wt', 'mpg', '-o', data = df.sort_values('wt'))
plt.yscale("log")
plt.yticks(np.linspace(10, 100, 10))
pass
_images/Customizing_Plots_11_0.png

Changing attributes of visual eleemnets

In [8]:
plt.plot('wt', 'mpg', color = 'orange', linestyle = 'dashed',
         marker = 's', mec = 'blue', mew = 1, mfc = 'red',
         data = df.sort_values('wt'))
pass
_images/Customizing_Plots_13_0.png

Adding annotations

In [9]:
plt.plot('wt', 'mpg', '-o', data = df.sort_values('wt'))
plt.text(4.0, 30.0, 'Interesting!', fontsize = 15, color = 'red')
plt.text(2.4, 32, 'A', bbox = dict(facecolor='yellow', alpha =0.5))
plt.arrow(4.3, 29, -0.80, -3, head_length = 0.9, head_width = 0.3, fc='r', ec='r', lw =0.75)
pass
_images/Customizing_Plots_15_0.png

Adding legends

In [10]:
plt.plot('wt', 'mpg', '-o', data = df.sort_values('wt'), label = 'Fuel Consumption')
plt.plot('wt', 'hp', '-o', data = df.sort_values('wt'), label = 'Horsepower')
plt.legend(loc = 'upper left', fontsize = 13)
pass
_images/Customizing_Plots_17_0.png

Using styles

In [11]:
plt.style.available
Out[11]:
['classic',
 'seaborn-whitegrid',
 'bmh',
 'seaborn-paper',
 'ggplot',
 'seaborn-pastel',
 'seaborn-notebook',
 'seaborn-ticks',
 'seaborn-dark-palette',
 'seaborn-white',
 'seaborn-darkgrid',
 'dark_background',
 'grayscale',
 'fivethirtyeight',
 'seaborn-colorblind',
 'seaborn-talk',
 'seaborn-poster',
 'seaborn-deep',
 'seaborn-bright',
 'seaborn-muted',
 'seaborn-dark',
 'presentation']
In [12]:
with plt.style.context('seaborn-white'):
    plt.plot('wt', 'mpg', '-o', data = df.sort_values('wt'))
_images/Customizing_Plots_20_0.png
In [13]:
with plt.style.context('seaborn-dark-palette'):
    plt.plot('wt', 'mpg', '-o', data = df.sort_values('wt'))
_images/Customizing_Plots_21_0.png
In [14]:
with plt.style.context('bmh'):
    plt.plot('wt', 'mpg', '-o', data = df.sort_values('wt'))
_images/Customizing_Plots_22_0.png
In [15]:
with plt.xkcd():
    plt.plot('wt', 'mpg', '-o', data = df.sort_values('wt'))
_images/Customizing_Plots_23_0.png

Customizing seaborn graphics

Since seaborn is built on top of matplotlib, customization options for matplotlib will also work with seaborn. However, seabornn plotting functions often give much more scope for customization.

In [16]:
ax = sns.swarmplot('gear', 'mpg', hue = 'wt',
                   size = 15, palette = "BuGn_r",
                   data = df)
ax.legend_.remove()
pass
_images/Customizing_Plots_25_0.png

Layout for Multiple Plots

Data aware grids

In [17]:
g = sns.FacetGrid(df, row="am", col="cyl", margin_titles=True)
g.map(sns.boxplot, 'carb', 'mpg')
pass
_images/Customizing_Plots_28_0.png
In [18]:
g = sns.FacetGrid(df, row="am", col="cyl", margin_titles=True,
                  sharex = False, sharey = False)
g.map(sns.distplot, 'mpg', rug = True, color = "orange",)
pass
/Users/cliburn/anaconda2/envs/p3/lib/python3.5/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j
_images/Customizing_Plots_29_1.png

Pairwise Plots

In [19]:
df1 = df[['mpg', 'hp', 'drat', 'wt', 'qsec']]
In [20]:
g = sns.PairGrid(df1)
g.map_upper(plt.scatter)
g.map_lower(sns.kdeplot)
g.map_diag(sns.kdeplot, lw=3, legend=False)
pass
/Users/cliburn/anaconda2/envs/p3/lib/python3.5/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j
_images/Customizing_Plots_32_1.png

Several seaborn plots use these grids under the hood

In [21]:
sns.lmplot(x = 'wt', y = 'mpg', col = 'am', data = df)
pass
_images/Customizing_Plots_34_0.png

Laying Out Multiple Different Types of Plots

In [22]:
plt.figure(figsize=(9,9))
ax1 = plt.subplot2grid((3,3), (0,0), colspan=3)
ax2 = plt.subplot2grid((3,3), (1,0), colspan=2)
ax3 = plt.subplot2grid((3,3), (1, 2), rowspan=2)
ax4 = plt.subplot2grid((3,3), (2, 0))
ax5 = plt.subplot2grid((3,3), (2, 1))

sns.regplot('wt', 'mpg', data = df, ax = ax1)
sns.violinplot('gear', 'hp', data = df, ax = ax2)
sns.swarmplot('wt', 'gear', data = df, orient = "h",
              size = 10, alpha = 0.8, split = True, palette = 'pastel', ax = ax3)
sns.kdeplot(df.wt, df.hp, ax = ax4)
sns.barplot('cyl', 'hp', data = df,
            palette = sns.light_palette('orange'),  ax = ax5)
plt.tight_layout()
_images/Customizing_Plots_36_0.png

Exercises

1a. Load the iris data set found in data/iris.csv into a DataFrame named iris.

  • How many rows and columns are there in iris?
  • Display the first 6 rows.
In [ ]:




1b. Make a regression plot of Sepal.Length (x-coordinate) by Sepal.Width (y-coordinate) using the lmplot function. Add a title “Bad regression”.

In [ ]:




1c. Create a new figure where you fit separate linear regressions of Sepal.Length (x-coordinate) by Sepal.Width for each species on the same plot.

In [ ]:




1d. Create a new figure where you have separate linear regression plots of Sepal.Length (x-coordinate) by Sepal.Width for each species. This figure should have 1 row and 3 columns.

In [ ]:




1e Create a new figure with 2 roww and 2 columns, where each figure shows a swarmplot comparing one of the 4 flower features (Sepal.Length, Sepal.Width, Petal.Length, Petal.Width) across Species.

Challenge: Can you do this in two lines of code?

In [ ]:




1f. Repeat the exercise in 1e, but change the titles of the subplots to “One” and “Two” for the first row and “Three”, “Four” for the second row.

In [ ]:




Version Information

In [23]:
%load_ext version_information
%version_information
The version_information extension is already loaded. To reload it, use:
  %reload_ext version_information
Out[23]:
SoftwareVersion
Python3.5.2 64bit [GCC 4.2.1 Compatible Apple LLVM 4.2 (clang-425.0.28)]
IPython5.0.0
OSDarwin 15.6.0 x86_64 i386 64bit
Tue Aug 16 09:06:03 2016 EDT
In [ ]: