Recreate the graphs below by building them up layer by layer with ggplot2 commands.
Recreate the following plot of flight delays in Texas.
Use
library(maps)
texas <- map_data("state", "texas")
to retrieve the coordinates necessary to make a polygon in the shape of Texas.
texas <- map_data("state", "texas") texmap <- c( geom_polygon(data = texas, colour = "grey70", fill = NA), scale_x_continuous("", limits = c(-107, -93)), scale_y_continuous("", limits = c(25.9, 37)) ) ggplot(feb13, aes(long, lat)) + texmap + geom_point(aes(size = ntot, colour = ndelay / ntot)) + geom_text(aes(label = origin), data = subset(feb13, ndelay >= 100), size = 4, hjust = 1.5) + scale_area("total flights", to = c(1, 8)) + scale_colour_gradient("percent delayed")
Recreate the following plot of flights cancelled by size of the airport.
ggplot(feb13, aes(ntot, ncancel)) + geom_point(data = subset(feb13, origin == "IAH"), size = 7, colour = alpha("red", 0.5)) + geom_point() + geom_text(data = subset(feb13, origin == "IAH"), aes(label = origin), hjust = -.5) + geom_smooth(method = "lm", se = T) + labs(y = "Number of flights cancelled", x = "Total number of flights")
Recreate the following map of flight delays for airports with 100 or more flights on Feb. 13th.
lower48 <- subset(feb13, long > -130) lower48 <- subset(lower48, lat > 20) ggplot(subset(lower48, ntot >= 100), aes(long, lat)) + borders("state") + geom_point(aes(size = ndelay, colour = log(avgdelay)))
Recreate the following plot of cancellations by longitude.
ggplot(feb13, aes(long, cperc)) + geom_point(aes(colour = cperc, size = ntot)) + geom_text(data = subset(feb13, cperc > 0.4 & long < -100), aes(label = origin), hjust = 1.2, angle = -45, colour = "orange")
Recreate the following plot of flight volume by longitude.
Specify stat = "density"
to generate the ..density..
variable.
ggplot(feb13, aes(long, ntot)) + geom_area(aes(y = ..density..), stat = "density", alpha = 0.5) + geom_vline(xintercept = c(-118, -87)) + geom_text(aes(x,y, label = "Los Angeles"), data = data.frame(x = - 119, y = 0), size = 4, hjust = 0, vjust = 0, angle = 90) + geom_text(aes(x,y, label = "Chicago"), data = data.frame(x = -88, y = 0), size = 4, hjust = 0, vjust = 0, angle = 90)
Recreate the following chart for the 11 busiest airports.
Ensure you specified stringsAsFactors = F
when you read in the data set.
main <- subset(feb13, ntot > 400) ggplot(main, aes(origin, ntot)) + geom_bar(aes(fill = cperc)) + opts(axis.text.x = theme_text(angle = 90, hjust = 1))
Include the words that appear on the graph.
Start by making the following data set
names <- read.csv("baby-names.csv", header = T, stringsAsFactors = F) class <- c("Rakesh", "Luis", "Yanli", "Yen-yin", "Sarah", "Delma", "Chandra", "Elizabeth", "Kim-chi", "Amanda", "Thomas", "Caroline", "Da", "Christine", "Debra", "Christopher", "Justin", "Lisa", "Meng", "Emilian","Rachel", "Lu", "Casper", "Jingjing", "Chengyong", "Ruo", "Zhongyu") class_names <- subset(names, name %in% class) class_names <- ddply(class_names, c("name", "year"), summarise, percent = sum(percent) / length(percent))
ggplot(class_names, aes(year, percent)) + geom_area(aes(group = name, fill = name)) + geom_text(aes(year, percent, label = "*some names did not appear in the dataset"), data = data.frame(year = 1925, percent = 0.10), size = 4)
Include the words that appear on the graph.
Use the same data set as above
Consider using round_any()
to make continuous variables discrete.
ggplot(class_names, aes(year, percent)) + geom_boxplot(aes(group = round_any(year, 5, floor))) + geom_smooth(se = F, size = 1) + geom_text(aes(year, percent, label = "*blue line is a smoothed mean"), colour = "blue", data = data.frame(year = 1906, percent = 0.029), size = 4) + geom_text(aes(year, percent, label = "Popularity of class names as a group"), data = data.frame(year = 1911, percent = 0.03), size = 4)
Create both graphs then comment on the merits of each one compared to the other.
ggplot(diamonds, aes(clarity)) + geom_bar(aes(fill = cut), position = "dodge") ggplot(diamonds, aes(clarity)) + geom_bar(aes(fill = cut)) + facet_grid(cut ~ .)
Recreate the following pie chart.
To make a bar graph of the whole data set, specify aes(x = "", ...)
. What happens to the bar when you switch the x or y axis to polar coordinates?
ggplot(diamonds, aes(x = "", fill = cut)) + geom_bar(width = 1) + coord_polar(theta = "y")
Recreate the following density map.
Consider stat_density2d(..., contour = F)
.
ggplot(b, aes(year, g)) + stat_density2d(geom = "tile", aes(fill = ..density..), contour = F) + scale_fill_gradient(low = "black", high = "white")
Recreate the following line graph.
yankees <- subset(b, team == "NYA") yankees <- transform(yankees, team = "Yankees") boston <- subset(b, team == "BOS") boston <- transform(boston, team = "Red Sox") yb <- rbind(yankees, boston) yb_runs <- ddply(yb, c("year", "team"), summarise, total_runs = sum(r, na.rm = T))
ggplot(yb_runs, aes(year, total_runs)) + geom_smooth(aes(colour = team)) + scale_colour_manual(value = c("red", "blue")) + geom_vline(aes(xintercept = c(1918, 2004))) + geom_text(aes(x,y, label = "Curse Begins"), data = data.frame(x = 1917, y = 400), size = 3, hjust = 0, vjust = 0, angle = 90) + geom_text(aes(x,y, label = "Curse Ends"), data = data.frame(x = 2003, y = 400), size = 3, hjust = 0, vjust = 0, angle = 90)
Recreate the following line graph super imposed on the bar chart.
yb_homeruns <- ddply(yb, c("year", "team"), summarise, total_hr = sum(hr, na.rm = T)) ggplot(yb_homeruns, aes(year, total_hr)) + geom_bar(aes(fill = team), stat = "identity", position = "dodge") + scale_fill_manual(value = alpha(c("red", "blue"), 0.4)) + geom_smooth(aes(colour = team)) + scale_colour_manual(value = c("red", "blue"))
Recreate the following ovelapping areas graph.
Consider scale_fill_manual()
.
ggplot(yb_homeruns, aes(year, total_hr)) + geom_area(aes(fill = team), position = "identity") + scale_fill_manual(value = alpha(c("red", "blue"), 0.4)) + geom_vline(aes(xintercept = 1918)) + geom_text(aes(x,y, label = "Curse Begins"), data = data.frame(x = 1919, y = -10), size = 3, hjust = 0, vjust = 0)
Recreate the following box plots.
yb_curse <- subset(yb, year > 1918 & year <= 2004) yb_curse <- transform(yb_curse, curse = "Curse years") yb_noncurse <- subset(yb, year <= 1918 | year > 2004) yb_noncurse <- transform(yb_noncurse, curse = "Non-curse Years") yb <- rbind(yb_curse, yb_noncurse) ggplot(yb, aes(team, hr / r)) + geom_boxplot() + facet_grid( . ~ curse)
Recreate the following map of countries baseball players have come from.
library(maps) world_map <- map_data("world") names(world_map)[5] <- "country" p_country <- ddply(p, "country", summarise, total = length(country)) p_map <- merge(p_country, world_map, by = "country", all = T) p_map <- p_map[order(p_map$order), ]
ggplot(p_map, aes(long, lat)) + geom_polygon(aes(group = group, fill = log(total)), colour = "grey60", size = .3) + ylim(-55, 85)
Recreate the following chart of the 10 most represented foreign countries in the combined dataset.
Use stat = "bin"
to generate the ..count..
variable.
bp <- merge(b, p, by = "id") bp_country <- ddply(bp, "country", summarise, total = length(country)) bp_country <- bp_country[order(-bp_country$total), ] bp_10 <- subset(bp, country %in% bp_country[2:11, 1]) ggplot(bp_10, aes(year)) + geom_area(aes(y = ..count.., fill = country), stat = "bin", binwidth = 10, position = "stack") + opts(title = "10 most represented foreign countries in combined dataset") + xlab("year (bin = 10 years)")
Recreate the side by side bar charts.
bp_trimmed <- subset(bp, bats != "") ggplot(bp_trimmed, aes(throws)) + geom_bar() + facet_grid (. ~ bats) + opts(title = "Hand preference by batting preference")
Recreate the following scatterplot.
ggplot(bp, aes(height, so)) + geom_jitter(position = position_jitter(width = 5), alpha = 0.05) + xlim(60, 85)
Recreate the following comparison of players who have hit more than 60 homeruns in a season.
ggplot(subset(bp, hr > 60), aes(weight, hr)) + geom_point() + geom_smooth(method = "lm", se = F) + geom_text(aes(label = paste(first, last, sep = " ")), hjust = -0.1) + xlim(203, 233) + opts(title = "Weight vs. performance among record holders")