In [11]:
library(tidyverse)
In [12]:
df = as_tibble(iris)
#system.time(
df %>% 
select(1:4) %>%
summary()
#)
  Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
 Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
 1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
 Median :5.800   Median :3.000   Median :4.350   Median :1.300  
 Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
 3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
 Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
In [13]:
describe = function(df) {
          rbind(
                count=map(map(df, na.omit),NROW),
                mean=map(df,mean,na.rm=TRUE),
                std=map(df,sd,na.rm=TRUE),
                min=map(df,min,na.rm=TRUE),    
                "25%"=map(df, quantile, 0.25),
                "50%"=map(df,median,na.rm=TRUE),
                "75%"=map(df, quantile, 0.75),
                max=map(df,max,na.rm=TRUE))
               }
In [14]:
# system.time(
df %>%
select(1:4) %>%
describe()
#)
Sepal.LengthSepal.WidthPetal.LengthPetal.Width
count150150150150
mean5.8433333.0573333.758 1.199333
std0.82806610.43586631.765298 0.7622377
min4.32 1 0.1
25%5.12.81.60.3
50%5.8 3 4.351.3
75%6.43.35.11.8
max7.94.46.92.5
In [15]:
dim(df)
  1. 150
  2. 5
In [16]:
head(df)
Sepal.LengthSepal.WidthPetal.LengthPetal.WidthSpecies
5.1 3.5 1.4 0.2 setosa
4.9 3.0 1.4 0.2 setosa
4.7 3.2 1.3 0.2 setosa
4.6 3.1 1.5 0.2 setosa
5.0 3.6 1.4 0.2 setosa
5.4 3.9 1.7 0.4 setosa
In [17]:
map(df,class)
$Sepal.Length
'numeric'
$Sepal.Width
'numeric'
$Petal.Length
'numeric'
$Petal.Width
'numeric'
$Species
'factor'
In [18]:
par(mfrow=c(2,2))
  for(i in 1:4) {
  boxplot(df[,i], main=names(df)[i])
}
In [19]:
# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols:   Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)
  
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots==1) {
    print(plots[[1]])
    
  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

p1 = ggplot(data = df) +
  geom_boxplot(aes(x=factor(0), y = df[[1]])) +
  xlab(colnames(df)[1]) + 
  ylab("") +
  theme(axis.text.x = element_blank())

p2 = ggplot(data = df) +
  geom_boxplot(aes(x=factor(0), y = df[[2]])) +
  xlab(colnames(df)[2]) + 
  ylab("") +
  theme(axis.text.x = element_blank())


p3 = ggplot(data = df) +
  geom_boxplot(aes(x=factor(0), y = df[[3]])) +
  xlab(colnames(df)[3]) + 
  ylab("") +
  theme(axis.text.x = element_blank())

p4 = ggplot(data = df) +
  geom_boxplot(aes(x=factor(0), y = df[[4]])) +
  xlab(colnames(df)[4]) + 
  ylab("") +
  theme(axis.text.x = element_blank())

multiplot(p1, p3, p2, p4, cols=2)
In [20]:
library(GGally)
ggpairs(df, columns=1:4)
In [ ]: