In [1]:
from rpy2.robjects import r
from rpy2.robjects import pandas2ri
def data(name): return pandas2ri.ri2py(r[name])
In [2]:
df = data('iris')
%time df.describe()
Wall time: 214 ms
Out[2]:
Sepal.Length Sepal.Width Petal.Length Petal.Width
count 150.000000 150.000000 150.000000 150.000000
mean 5.843333 3.057333 3.758000 1.199333
std 0.828066 0.435866 1.765298 0.762238
min 4.300000 2.000000 1.000000 0.100000
25% 5.100000 2.800000 1.600000 0.300000
50% 5.800000 3.000000 4.350000 1.300000
75% 6.400000 3.300000 5.100000 1.800000
max 7.900000 4.400000 6.900000 2.500000
In [3]:
df.shape
Out[3]:
(150, 5)
In [4]:
df.head()
Out[4]:
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
5 5.0 3.6 1.4 0.2 setosa
In [5]:
# class distribution
print(df.groupby('Species').size())
Species
setosa        50
versicolor    50
virginica     50
dtype: int64
In [6]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (12, 9)
matplotlib.style.use('ggplot')
# box and whisker plots
df.plot(kind='box', subplots=True, layout=(2,2), sharex=False, sharey=False)
plt.show()
In [7]:
import seaborn as sns
sns.pairplot(df)
plt.show()