In [1]:
# Hide warnings if there are any
import warnings
warnings.filterwarnings('ignore')
# R magic setting
%load_ext rpy2.ipython
%R require(tidyverse)
# SQL magic setting
%load_ext sql
%sql sqlite://
#%config SqlMagic.feedback = False
import pandas as pd
import seaborn as sns
import rpy2.robjects as ro
from rpy2.robjects import pandas2ri
In [2]:
%%R
#data(iris)
#iris = rename(iris,
#            Sepal_Length = Sepal.Length,
#            Sepal_Width = Sepal.Width,
#            Petal_Length = Petal.Length,
#            Petal_Width = Petal.Width    
#            )
#iris_db <- src_sqlite("iris_db.sqlite3", create = TRUE)
#copy_to(iris_db, iris, temporary = FALSE)
iris_db = src_sqlite("iris_db.sqlite3")
iris <- tbl(iris_db, "iris")
In [3]:
print(ro.r('iris %>% group_by(Species) %>% \
  summarise_each(funs(mean(., na.rm = TRUE)))'))
Source:   query [?? x 5]

Database: sqlite 3.11.1 [iris_db.sqlite3]



     Species Sepal_Length Sepal_Width Petal_Length Petal_Width

       <chr>        <dbl>       <dbl>        <dbl>       <dbl>

1     setosa        5.006       3.428        1.462       0.246

2 versicolor        5.936       2.770        4.260       1.326

3  virginica        6.588       2.974        5.552       2.026

In [4]:
print(ro.r(' \
iris_db%>% \
  tbl(sql( \
    " \
    SELECT * \
    FROM iris \
    " \
  )) \
           '))
Source:   query [?? x 5]

Database: sqlite 3.11.1 [iris_db.sqlite3]



   Sepal_Length Sepal_Width Petal_Length Petal_Width Species

          <dbl>       <dbl>        <dbl>       <dbl>   <chr>

1           5.1         3.5          1.4         0.2  setosa

2           4.9         3.0          1.4         0.2  setosa

3           4.7         3.2          1.3         0.2  setosa

4           4.6         3.1          1.5         0.2  setosa

5           5.0         3.6          1.4         0.2  setosa

6           5.4         3.9          1.7         0.4  setosa

7           4.6         3.4          1.4         0.3  setosa

8           5.0         3.4          1.5         0.2  setosa

9           4.4         2.9          1.4         0.2  setosa

10          4.9         3.1          1.5         0.1  setosa

# ... with more rows

In [5]:
%sql sqlite:///iris_db.sqlite3 
Out[5]:
u'Connected: None@iris_db.sqlite3'
In [6]:
%%sql
SELECT name FROM sqlite_master WHERE type='table'
Done.
Out[6]:
name
iris
sqlite_stat1
In [7]:
%%sql

-- Select everything
SELECT *
FROM iris
limit 5
Done.
Out[7]:
Sepal_Length Sepal_Width Petal_Length Petal_Width Species
5.1 3.5 1.4 0.2 setosa
4.9 3.0 1.4 0.2 setosa
4.7 3.2 1.3 0.2 setosa
4.6 3.1 1.5 0.2 setosa
5.0 3.6 1.4 0.2 setosa
In [8]:
iris = %sql SELECT * FROM iris
iris_df = iris.DataFrame()
iris_df.info()
Done.
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
Sepal_Length    150 non-null float64
Sepal_Width     150 non-null float64
Petal_Length    150 non-null float64
Petal_Width     150 non-null float64
Species         150 non-null object
dtypes: float64(4), object(1)
memory usage: 5.9+ KB
In [9]:
iris_df.groupby('Species').mean()
Out[9]:
Sepal_Length Sepal_Width Petal_Length Petal_Width
Species
setosa 5.006 3.428 1.462 0.246
versicolor 5.936 2.770 4.260 1.326
virginica 6.588 2.974 5.552 2.026
In [10]:
%matplotlib inline
sns.pairplot(iris_df, hue="Species")
Out[10]:
<seaborn.axisgrid.PairGrid at 0x1145c9e8>