In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
%load_ext version_information
%load_ext rpy2.ipython
Customizing Plots¶
In [2]:
url = 'http://bit.ly/2b72LNj'
df = pd.read_csv(url)
In [3]:
df.head()
Out[3]:
model | mpg | cyl | disp | hp | drat | wt | qsec | vs | am | gear | carb | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Mazda RX4 | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.620 | 16.46 | 0 | 1 | 4 | 4 |
1 | Mazda RX4 Wag | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.875 | 17.02 | 0 | 1 | 4 | 4 |
2 | Datsun 710 | 22.8 | 4 | 108.0 | 93 | 3.85 | 2.320 | 18.61 | 1 | 1 | 4 | 1 |
3 | Hornet 4 Drive | 21.4 | 6 | 258.0 | 110 | 3.08 | 3.215 | 19.44 | 1 | 0 | 3 | 1 |
4 | Hornet Sportabout | 18.7 | 8 | 360.0 | 175 | 3.15 | 3.440 | 17.02 | 0 | 0 | 3 | 2 |
Customizing matplotllib
graphics¶
In [4]:
plt.plot('wt', 'mpg', '-o', data = df.sort_values('wt'))
pass
Adding labels¶
In [5]:
plt.plot('wt', 'mpg', '-o', data = df.sort_values('wt'))
plt.title('MPG versus Weight')
plt.xlabel('Weight')
plt.ylabel('MPG')
pass
Changing Axes Limits¶
In [6]:
plt.plot('wt', 'mpg', '-o', data = df.sort_values('wt'))
plt.xlim([1, 6])
plt.ylim([0, 40])
pass
Changing coordinate systems¶
In [7]:
plt.plot('wt', 'mpg', '-o', data = df.sort_values('wt'))
plt.yscale("log")
plt.yticks(np.linspace(10, 100, 10))
pass
Changing attributes of visual eleemnets¶
In [8]:
plt.plot('wt', 'mpg', color = 'orange', linestyle = 'dashed',
marker = 's', mec = 'blue', mew = 1, mfc = 'red',
data = df.sort_values('wt'))
pass
Adding annotations¶
In [9]:
plt.plot('wt', 'mpg', '-o', data = df.sort_values('wt'))
plt.text(4.0, 30.0, 'Interesting!', fontsize = 15, color = 'red')
plt.text(2.4, 32, 'A', bbox = dict(facecolor='yellow', alpha =0.5))
plt.arrow(4.3, 29, -0.80, -3, head_length = 0.9, head_width = 0.3, fc='r', ec='r', lw =0.75)
pass
Adding legends¶
In [10]:
plt.plot('wt', 'mpg', '-o', data = df.sort_values('wt'), label = 'Fuel Consumption')
plt.plot('wt', 'hp', '-o', data = df.sort_values('wt'), label = 'Horsepower')
plt.legend(loc = 'upper left', fontsize = 13)
pass
Using styles¶
In [11]:
plt.style.available
Out[11]:
['classic',
'seaborn-whitegrid',
'bmh',
'seaborn-paper',
'ggplot',
'seaborn-pastel',
'seaborn-notebook',
'seaborn-ticks',
'seaborn-dark-palette',
'seaborn-white',
'seaborn-darkgrid',
'dark_background',
'grayscale',
'fivethirtyeight',
'seaborn-colorblind',
'seaborn-talk',
'seaborn-poster',
'seaborn-deep',
'seaborn-bright',
'seaborn-muted',
'seaborn-dark',
'presentation']
In [12]:
with plt.style.context('seaborn-white'):
plt.plot('wt', 'mpg', '-o', data = df.sort_values('wt'))
In [13]:
with plt.style.context('seaborn-dark-palette'):
plt.plot('wt', 'mpg', '-o', data = df.sort_values('wt'))
In [14]:
with plt.style.context('bmh'):
plt.plot('wt', 'mpg', '-o', data = df.sort_values('wt'))
In [15]:
with plt.xkcd():
plt.plot('wt', 'mpg', '-o', data = df.sort_values('wt'))
Customizing seaborn
graphics¶
Since seaborn
is built on top of matplotlib
, customization
options for matplotlib
will also work with seaborn
. However,
seabornn
plotting functions often give much more scope for
customization.
In [16]:
ax = sns.swarmplot('gear', 'mpg', hue = 'wt',
size = 15, palette = "BuGn_r",
data = df)
ax.legend_.remove()
pass
Layout for Multiple Plots¶
Data aware grids¶
In [17]:
g = sns.FacetGrid(df, row="am", col="cyl", margin_titles=True)
g.map(sns.boxplot, 'carb', 'mpg')
pass
In [18]:
g = sns.FacetGrid(df, row="am", col="cyl", margin_titles=True,
sharex = False, sharey = False)
g.map(sns.distplot, 'mpg', rug = True, color = "orange",)
pass
/Users/cliburn/anaconda2/envs/p3/lib/python3.5/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j
Pairwise Plots¶
In [19]:
df1 = df[['mpg', 'hp', 'drat', 'wt', 'qsec']]
In [20]:
g = sns.PairGrid(df1)
g.map_upper(plt.scatter)
g.map_lower(sns.kdeplot)
g.map_diag(sns.kdeplot, lw=3, legend=False)
pass
/Users/cliburn/anaconda2/envs/p3/lib/python3.5/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j
Several seaborn
plots use these grids under the hood¶
In [21]:
sns.lmplot(x = 'wt', y = 'mpg', col = 'am', data = df)
pass
Laying Out Multiple Different Types of Plots¶
In [22]:
plt.figure(figsize=(9,9))
ax1 = plt.subplot2grid((3,3), (0,0), colspan=3)
ax2 = plt.subplot2grid((3,3), (1,0), colspan=2)
ax3 = plt.subplot2grid((3,3), (1, 2), rowspan=2)
ax4 = plt.subplot2grid((3,3), (2, 0))
ax5 = plt.subplot2grid((3,3), (2, 1))
sns.regplot('wt', 'mpg', data = df, ax = ax1)
sns.violinplot('gear', 'hp', data = df, ax = ax2)
sns.swarmplot('wt', 'gear', data = df, orient = "h",
size = 10, alpha = 0.8, split = True, palette = 'pastel', ax = ax3)
sns.kdeplot(df.wt, df.hp, ax = ax4)
sns.barplot('cyl', 'hp', data = df,
palette = sns.light_palette('orange'), ax = ax5)
plt.tight_layout()
Exercises¶
1a. Load the iris
data set found in data/iris.csv
into a
DataFrame named iris
.
- How many rows and columns are there in
iris
? - Display the first 6 rows.
In [ ]:
1b. Make a regression plot of Sepal.Length (x-coordinate) by
Sepal.Width (y-coordinate) using the lmplot
function. Add a title
“Bad regression”.
In [ ]:
1c. Create a new figure where you fit separate linear regressions of Sepal.Length (x-coordinate) by Sepal.Width for each species on the same plot.
In [ ]:
1d. Create a new figure where you have separate linear regression plots of Sepal.Length (x-coordinate) by Sepal.Width for each species. This figure should have 1 row and 3 columns.
In [ ]:
1e Create a new figure with 2 roww and 2 columns, where each figure
shows a swarmplot
comparing one of the 4 flower features
(Sepal.Length, Sepal.Width, Petal.Length, Petal.Width) across Species.
Challenge: Can you do this in two lines of code?
In [ ]:
1f. Repeat the exercise in 1e, but change the titles of the subplots to “One” and “Two” for the first row and “Three”, “Four” for the second row.
In [ ]:
Version Information¶
In [23]:
%load_ext version_information
%version_information
The version_information extension is already loaded. To reload it, use:
%reload_ext version_information
Out[23]:
Software | Version |
---|---|
Python | 3.5.2 64bit [GCC 4.2.1 Compatible Apple LLVM 4.2 (clang-425.0.28)] |
IPython | 5.0.0 |
OS | Darwin 15.6.0 x86_64 i386 64bit |
Tue Aug 16 09:06:03 2016 EDT |
In [ ]: