Shape Fun!

Programming Graphics in SAS (SGPLOT) and R (ggplot2)

CSJP

Bar Chart …

SAS - SGPLOT

PROC SGPLOT DATA=chocolate;
    VBAR FavoriteFlavor / GROUP = AgeGroup;
    LABEL FavoriteFlavor = 'Flavor of Chocolate';
    TITLE 'Favorite Chocolate Flavors by Age Group';
RUN;

R - ggplot2

ggplot(chocolate, aes(x = FavoriteFlavor, fill = AgeGroup)) + geom_bar(colour = "black") + 
    scale_fill_brewer(palette = "Pastel1") + theme(legend.position = "bottom") + 
    xlab("Flavor of Chocolate") + ylab("Frequency") + ggtitle("Favorite Chocolate Flavors by Age Group")

Histogram/Density …

SAS - SGPLOT

PROC SGPLOT DATA=bikerace;
  HISTOGRAM NumberLaps / SHOWBINS;
  DENSITY NumberLaps;
  DENSITY NumberLaps / TYPE=KERNEL;
  TITLE 'Bicycle Criterium Results';
RUN;

R - ggplot2

ggplot(bikerace, aes(x = NumberLaps)) + geom_histogram(aes(y = ..density..), 
    breaks = breaks, right = TRUE, fill = "cornsilk", colour = "grey60", size = 0.5) + 
    geom_density(alpha = 0.2, aes(fill = "#FF6666"), colour = element_blank()) + 
    stat_function(fun = dnorm, args = list(mean = mean(bikerace$NumberLaps), 
        sd = sd(bikerace$NumberLaps))) + stat_function(fun = dnorm, args = list(mean = mean(bikerace$NumberLaps), 
    sd = sd(bikerace$NumberLaps)), geom = "area", alpha = 0.2, aes(fill = "#6666FF")) + 
    scale_fill_discrete(labels = c("Normal", "Kernel")) + labs(fill = "Curves") + 
    guides(fill = guide_legend(reverse = TRUE)) + theme(legend.position = "bottom") + 
    xlim(15, 65) + ggtitle("Bicycle Criterium Results")

Box Plot …

SAS - SGPLOT

PROC SGPLOT DATA=bikerace;
  VBOX NumberLaps / CATEGRORY = Division;
  TITLE 'Bicycle Criterium Results';
RUN;

R - ggplot2

ggplot(bikerace, aes(x = Division, y = NumberLaps, fill = Division)) + geom_boxplot(width = 0.5, 
    outlier.size = 3, outlier.shape = 21) + stat_summary(fun.y = "mean", geom = "point", 
    shape = 23, size = 3, fill = "white") + scale_fill_brewer(palette = "Pastel2") + 
    theme(legend.position = c(1, 1), legend.justification = c(1, 1)) + labs(fill = "Division\nAgeGroup") + 
    guides(fill = FALSE) + ggtitle("Bicycle Criterium Results")

Scatter Plot …

SAS - SGPLOT

PROC SGPLOT DATA=onionrings;
  SCATTER X = Action Y = BSales;
  SCATTER X = Action Y = Csales;
  XAXIS LABEL ='Hits + Runs' VALUES = (0 TO 40 BY 10);
  YAXIS LABEL ='Number Sold';
  LABEL BSales = 'Sales in Bleachers';
  LABEL CSales = 'Sales at Stands';
  TITLE 'Onion Ring Sales vs. Game Action';
RUN;

R - ggplot2

ggplot(mydata, aes(x = Action, y = Count, colour = Sales, shape = Sales)) + 
    geom_point(size = 3) + theme(legend.position = "bottom") + scale_x_discrete(breaks = seq(0, 
    40, by = 10)) + expand_limits(x = 45) + xlab("Hits + Runs") + ylab("Number Sold") + 
    scale_shape_discrete(labels = c("Sales in Stands", "Sales at Bleachers")) + 
    scale_colour_discrete(labels = c("Sales in Stands", "Sales at Bleachers")) + 
    guides(shape = guide_legend(reverse = TRUE), colour = guide_legend(reverse = TRUE)) + 
    ggtitle("Onion Ring Sales vs. Game Action")

Series/Line Plot …

SAS - SGPLOT

PROC SGPLOT DATA=temperatures;
  SERIES X = Month Y = IntFalls;
  SERIES X = Month Y = Raleigh;
  SERIES X = Month Y = Yuma;
  REFLINE 32 75 / TRANSPARENCY = 0.5 LABEL = ('32 degrees' '75 degrees');
  XAXIS TYPE=DISCRETE;
  YAXIS LABEL = 'Average High Temperature (F)';
  TITLE 'Temperatures by Month for International Falls,  '
                'Raleigh, and Yuma';
RUN;

R - ggplot2

ggplot(mydata, aes(x = Month, y = Temp, linetype = City, colour = City)) + geom_line(size = 1) + 
    scale_colour_brewer(palette = "Set1") + scale_x_discrete(breaks = seq(0, 
    12, by = 1)) + scale_y_discrete(breaks = seq(0, 100, by = 20)) + expand_limits(y = 110) + 
    geom_hline(aes(yintercept = c(32, 75)), alpha = 0.3) + annotate("text", 
    x = 12, y = 34.5, label = "32 degrees", alpha = 0.3) + annotate("text", 
    x = 12, y = 77.5, label = "75 degrees", alpha = 0.3) + theme(legend.position = "bottom") + 
    ggtitle("Temperatures by Month for International Falls, Raleigh, and Yuma")

Curve-Fitting Plot …

SAS - SGPLOT

PROC SGPLOT DATA=Olympic1500;
  REG X=Year Y=Men / CLM NOLEGCLM NOMARKERS;
  LOESS X=Year Y=Men / CLM NOLEGCLM;
  YAXIS LABEL='Time in Seconds';
  TITLE "Olympic Times For Men's 1500 Meter Run";
RUN;

R - ggplot2

ggplot(Olympics1500, aes(x = Year, y = Men)) + geom_point(colour = muted("green"), 
    shape = 21, fill = "yellow", size = 3) + stat_smooth(method = lm, aes(colour = muted("yellow")), 
    fill = "grey60", size = 1) + stat_smooth(method = loess, aes(colour = muted("blue")), 
    fill = "grey40", size = 1, alpha = 0.2) + scale_x_continuous(breaks = seq(1900, 
    2000, by = 25)) + scale_colour_discrete(name = "Fit", labels = c("Loess", 
    "Regression")) + theme(legend.position = "bottom") + guides(colour = guide_legend(reverse = TRUE)) + 
    ggtitle("Olympic Times for Men's 1500 Meter Run")

Panels …

SAS - SGPLOT

PROC FREQ DATA=bus;
  TABLES BusType*OnTimeOrLate / PLOTS=FREQPLOT(TWOWAY=GROUPHORIZONTAL);
RUN;

R - ggplot2

ggplot(bus, aes(x = OnTimeorLate, fill = OnTimeorLate)) + scale_fill_brewer(palette = "Pastel1") + 
    geom_bar() + facet_grid(. ~ BusType) + ylab("Frequency") + guides(fill = FALSE) + 
    ggtitle("Distribution of BusType by OnTimeorLate")