Richard Waterman
November 2023
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from datetime import datetime, date
# Read in some data
os.chdir('C:\\Users\\water\\Dropbox (Penn)\\Teaching\\4770f2023\\DataSets')
op_data = pd.read_csv("Outpatient.csv", parse_dates=['SchedDate', 'ApptDate'])
op_data['ScheduleLag'] = op_data['ApptDate'] - op_data['SchedDate']
op_data.columns
Index(['PID', 'SchedDate', 'ApptDate', 'Dept', 'Language', 'Sex', 'Age',
'Race', 'Status', 'ScheduleLag'],
dtype='object')
sns.set() # The default theme.
sns.set(rc={'figure.figsize':(4.5,3.18)}) # A default plot size for axes level plots.
op_data['SL'] = op_data['ScheduleLag'].dt.days # Create a new variable that has schedule lag in days.
sns.boxplot(x='SL', data=op_data); # Create a default boxplot. The semi-colon is a trick to stop unwanted output on the terminal.
pass # Another way to surpress unwanted text output from the plot commands.
# Note below that the data argument is not quoted, but the x argument is quoted.
# "Flier" is the name for the outliers.
sns.boxplot(x='SL', data = op_data, color='red', fliersize=1.0);
pass
Axes.boxplot(self, x, notch=None, sym=None, vert=None, whis=None, positions=None, widths=None, patch_artist=None,
bootstrap=None, usermedians=None, conf_intervals=None, meanline=None, showmeans=None, showcaps=None, showbox=None,
showfliers=None, boxprops=None, labels=None, flierprops=None, medianprops=None, meanprops=None, capprops=None,
whiskerprops=None, manage_ticks=True, autorange=False, zorder=None, *, data=None)
g = sns.boxplot(x='SL', data = op_data, color='red', fliersize=1.0, notch=True, sym="");
print(type(g))
<class 'matplotlib.axes._subplots.AxesSubplot'>
g = sns.catplot(x='SL', data = op_data, kind="box", height = 2, aspect=2) # A boxplot, from the "catplot" figure level command.
g.set_xlabels("Schedule lag") # You can tweak these plots using built in methods.
g.fig.suptitle('Distribution of schedule lags')
print(type(g));
<class 'seaborn.axisgrid.FacetGrid'>
g = sns.catplot(x='SL', data = op_data, kind="violin",height=4) # A boxplot, from the "catplot" figure level command.
g.set_xlabels("Schedule lag") # You can tweak these plots using built in methods.
g.fig.suptitle('Distribution of schedule lags');
pass
sns.displot(op_data['SL'], kde=False, rug=True,height=4, aspect=2); # Remove the kernel density estimate and add a rug plot.
pass
import os
os.chdir('C:\\Users\\water\\Dropbox (Penn)\\Teaching\\4770f2023\\Images')
sns.displot(op_data['SL'], kde=True, height=4, aspect=2);
plt.savefig("output_{0}.png".format('OP')) # savefig method for png format.
plt.savefig("output_{0}.jpeg".format('OP')) # savefig method for jpeg format.
plt.savefig("output_{0}.svg".format('OP')) # savefig method for svg format.
#### Build the plot
g = sns.catplot(x='Dept', kind="count", palette="ch:.25", data=new_dept, height=3, aspect=2) #ch stands for a "cube-helix" color palette.
g.set_xticklabels(rotation=45, horizontalalignment='right')
plt.subplots_adjust(bottom=0.4) # This adds more white space to the bottom of the plot.
plt.savefig("output_{0}.png".format('Dept')) # savefig method.
new_dept = op_data.loc[op_data['Dept'].isin(top_dept.index)] # Keep all of the columns now.
g = sns.catplot(x='Dept', kind="count", palette="ch:.25", data=new_dept,
col="Sex", height=4, aspect=2) # Note the 'col' argument.
g.set_xticklabels(rotation=45, horizontalalignment='right');
pass
g = sns.catplot(x='Dept', kind="count", palette="ch:.25", data=new_dept,
row = 'Sex', col="Status", height=2, aspect=1.5) # Note the 'col' argument.
g.set_xticklabels(rotation=45, horizontalalignment='right')
pass
dept_counts = op_data['Dept'].value_counts()[:5] # Just work with the frequencies here.
dept_counts.plot.pie(figsize=(6, 6)); # This is a pandas plot.
pass
Possible values are: Accent, Accent_r, Blues, Blues_r, BrBG, BrBG_r, BuGn, BuGn_r, BuPu, BuPu_r, CMRmap, CMRmap_r, Dark2,
Dark2_r, GnBu, GnBu_r, Greens, Greens_r, Greys, Greys_r, OrRd, OrRd_r, Oranges, Oranges_r, PRGn, PRGn_r, Paired, Paired_r,
Pastel1, Pastel1_r, Pastel2, Pastel2_r, PiYG, PiYG_r, PuBu, PuBuGn, PuBuGn_r, PuBu_r, PuOr, PuOr_r, PuRd, PuRd_r, Purples,
Purples_r, RdBu, RdBu_r, RdGy, RdGy_r, RdPu, RdPu_r, RdYlBu, RdYlBu_r, RdYlGn, RdYlGn_r, Reds, Reds_r, Set1, Set1_r, Set2,
Set2_r, Set3, Set3_r, Spectral, Spectral_r, Wistia, Wistia_r, YlGn, YlGnBu, YlGnBu_r, YlGn_r, YlOrBr, YlOrBr_r, YlOrRd,
YlOrRd_r, afmhot, afmhot_r, autumn, autumn_r, binary, binary_r, bone, bone_r, brg, brg_r, bwr, bwr_r, cividis, cividis_r, cool,
cool_r, coolwarm, coolwarm_r, copper, copper_r, cubehelix, cubehelix_r, flag, flag_r, gist_earth, gist_earth_r, gist_gray,
gist_gray_r, gist_heat, gist_heat_r, gist_ncar, gist_ncar_r, gist_rainbow, gist_rainbow_r, gist_stern, gist_stern_r, gist_yarg,
gist_yarg_r, gnuplot, gnuplot2, gnuplot2_r, gnuplot_r, gray, gray_r, hot, hot_r, hsv, hsv_r, icefire, icefire_r, inferno,
inferno_r, jet, jet_r, magma, magma_r, mako, mako_r, nipy_spectral, nipy_spectral_r, ocean, ocean_r, pink, pink_r, plasma,
plasma_r, prism, prism_r, rainbow, rainbow_r, rocket, rocket_r, seismic, seismic_r, spring, spring_r, summer, summer_r, tab10,
tab10_r, tab20, tab20_r, tab20b, tab20b_r, tab20c, tab20c_r, terrain, terrain_r, twilight, twilight_r, twilight_shifted,
twilight_shifted_r, viridis, viridis_r, vlag, vlag_r, winter, winter_r
g = sns.catplot(x='Dept', kind="count", palette=custom_palette, data=new_dept, height=4, aspect=2)
g.set_xticklabels(rotation=45, horizontalalignment='right');
pass
g = sns.catplot(x='Dept', kind="count", palette=wharton_colors, data=new_dept, height=3, aspect=2)
g.set_xticklabels(rotation=45, horizontalalignment='right');
pass
sns.set_palette(sns.color_palette("flare")) # Use a different palette.
new_dept = op_data.loc[op_data['Dept'].isin(top_dept.index)] #Subset the data.
g = sns.catplot(x = 'Dept', y = "SL", data = new_dept, height=4, aspect=2) # The default "catplot"
g.set_xticklabels(rotation=45, horizontalalignment='right');
pass
sns.set_palette(sns.color_palette("Set2")) # Use a different palette.
g = sns.catplot(x='Dept',y="SL", kind="box", data=new_dept, height=3, aspect=2) # The comparison boxplots
g.set_xticklabels(rotation=45, horizontalalignment='right');
pass
g = sns.catplot(x ='Dept', y="SL", kind="violin", data=new_dept, height=3, aspect=2) # The default "catplot"
g.set_xticklabels(rotation=45, horizontalalignment='right');
pass
sns.catplot(x = 'Dept', y = "SL", hue ='Status', kind="box", data = new_dept, height=3, aspect=3); # Note the 'hue' argument.
pass
f, ax = plt.subplots(1, 1, figsize = (10, 5)) # Set the size of the plot.
sns.boxplot(x = 'Dept', y = "SL", hue = 'Status', data = new_dept, ax=ax); # Note we are back to boxplot.
plt.savefig("output_{0}.png".format('Comps')) # savefig method for png format.
g = sns.catplot(x='Dept', y="SL", kind="bar", hue='Status', data=new_dept,
height=4, aspect=3, errcolor="red", errorbar=('ci', 95)) # The 'bar' kind.
g.set_ylabels("Average Schedule lag");
pass
os.chdir('C:\\Users\\water\\Dropbox (Penn)\\Teaching\\4770f2023\\DataSets')
car_data = pd.read_csv("Car08_just_499.csv")
print(car_data.columns)
Index(['Make/Model', 'MPG_City', 'MPG_Hwy', 'Weight(lb)', 'Seating',
'Horsepower', 'HP/Pound', 'Displacement', 'Cylinders', 'Origin',
'Transmission', 'EPA_Class', 'Length', 'Fuel', 'HEV', 'Turbocharger',
'Make', 'Model', 'GP1000M_City', 'GP1000M_Hwy'],
dtype='object')
This is a simple plot of the two variables.
sns.relplot(x="Weight(lb)", y="GP1000M_City", hue = "Transmission", style="Cylinders", data=car_data);
pass
sns.relplot(x="Weight(lb)", y="GP1000M_City", hue="Transmission", style="Cylinders", palette="copper", data=car_data);
pass
sns.relplot(x="Weight(lb)", y="GP1000M_City", hue = "Transmission", size = "Horsepower",
sizes=(20, 200), style="Cylinders", palette="Reds", data=car_data, height=3, aspect=2);
pass
sns.jointplot(x="Weight(lb)", y="GP1000M_City", data=car_data, color="red", kind="kde", height=5);
pass
sns.set(style="whitegrid", font_scale=0.75) # Change the style
tmp_data = car_data[['GP1000M_City', 'Weight(lb)', 'Horsepower', 'Length', 'Transmission']]
sns.pairplot(tmp_data, hue="Transmission", height=2);
plt.savefig("output_{0}.png".format('Cars')) # savefig method for png format
os.chdir('C:\\Users\\water\\Dropbox (Penn)\\Teaching\\4770f2023\\Notes\images') # This is where the plot will be saved.
op_data['SL_8Cut'] = pd.qcut(op_data['SL'], 8) # Create 8 levels with equal numbers in each category.
fig, ax1 = plt.subplots(figsize=(18, 9)) # Controlling plot size using matplotlib. This returns a figure to plot on.
plt.xticks(fontsize=8)
plt.yticks(fontsize=8)
plt.rcParams['font.size'] = 12
plt.rcParams['text.color'] = 'black'
mosaic(op_data, ['SL_8Cut', 'Status'], ax=ax1); # Plots the mosaic plot on the axes "ax1".
plt.savefig("mosaic_{0}.png".format('OP')) # savefig method for png format.
plt.close() # This will stop the plot being displayed here.
pass