ggplot2
Mon Lecture
Basics of ggplot2
Explore several geoms
.
And a little data wrangling with dplyr
as needed!
Wed Lecture
geom
s.We could spend all semester on data viz principles.
Be thoughtful and iterate.
We could spend all semester on data viz principles.
Be thoughtful and iterate.
Let's spend some time considering the strengths and weaknesses of the graphs we encountered last time.
We could spend all semester on data viz principles.
Be thoughtful and iterate.
Let's spend some time considering the strengths and weaknesses of the graphs we encountered last time.
data: dataset that contains the data
geom: geometric shape that the data are mapped to
aesthetic: visual properties of the geom
coord: coordinate system
scale: controls how data are mapped to the visual values of the aesthetic
guide: legend to help user convert visual display back to the data
ggplot(data = ---, mapping = aes(---)) + geom_---(---) + coord_---() + scale_---_---() + ---
# Load library that has dataset of interestlibrary(mosaicData)# Grab datadata(Births2015)# Load tidyverse (which contains ggplot2)library(tidyverse)
# Example codeggplot(data = ---, mapping = aes(---)) + geom_---(---) + coord_---() + scale_---_---() + ---
# Create plotggplot(data = Births2015, mapping = aes(x = date, y = births)) + geom_point()
# Look at structure of data with dplyr functionglimpse(Births2015)
## Rows: 365## Columns: 8## $ date <date> 2015-01-01, 2015-01-02, 2015-01-03, 2015-01-04, 2015-01-…## $ births <dbl> 8068, 10850, 8328, 7065, 11892, 12425, 12141, 12094, 1186…## $ wday <ord> Thu, Fri, Sat, Sun, Mon, Tue, Wed, Thu, Fri, Sat, Sun, Mo…## $ year <dbl> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 201…## $ month <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …## $ day_of_year <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…## $ day_of_month <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…## $ day_of_week <dbl> 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, …
# Create plotggplot(data = Births2015, mapping = aes(x = date, y = births, color = day_of_week)) + geom_point()
# Create plotggplot(data = Births2015, mapping = aes(x = date, y = births, color = wday)) + geom_point() + theme(legend.position = "bottom")
You can also adjust the aspect ratio with the R chunk option: fig.asp
What if we want to see the direction that the number of births take over time for each day of the week?
geom
? # Create plotggplot(data = Births2015, mapping = aes(x = date, y = births, color = wday)) + geom_line() + theme(legend.position = "bottom")
# Create plotggplot(data = Births2015, mapping = aes(x = date, y = births, color = wday)) + geom_line() + geom_point() + theme(legend.position = "bottom")
library(lubridate)ggplot(data = Births2015, mapping = aes(x = date, y = births, color = wday)) + geom_line() + geom_point() + theme(legend.position = "bottom") + coord_cartesian(xlim = as_date(c("2015-01-01", "2015-01-31")))
ggplot(data = Births2015, mapping = aes(x = date, y = births, color = "midnightblue")) + geom_point()
ggplot(data = Births2015, mapping = aes(x = date, y = births)) + geom_point(color = "midnightblue")
date
is mapped to x
births
is mapped to y
color
ggplot(data = Births2015, mapping = aes(x = date, y = births)) + geom_point(color = "#ff006e")
ggplot(data = Births2015, mapping = aes(x = date, y = births, color = wday)) + geom_point(color = "#ff006e") + geom_line() + theme(legend.position = "bottom")
geom
layerggplot(data = Births2015, mapping = aes(x = date, y = births, color = wday)) + geom_line() + geom_point(color = "#ff006e") + theme(legend.position = "bottom")
Many are listed on the first page of the ggplot2
cheatsheet.
Can also ask R:
apropos("geom_")
## [1] "geom_abline" "geom_area" "geom_bar" ## [4] "geom_bin_2d" "geom_bin2d" "geom_blank" ## [7] "geom_boxplot" "geom_col" "geom_contour" ## [10] "geom_contour_filled" "geom_count" "geom_crossbar" ## [13] "geom_curve" "geom_density" "geom_density_2d" ## [16] "geom_density_2d_filled" "geom_density2d" "geom_density2d_filled" ## [19] "geom_dotplot" "geom_errorbar" "geom_errorbarh" ## [22] "geom_freqpoly" "geom_function" "geom_hex" ## [25] "geom_histogram" "geom_hline" "geom_jitter" ## [28] "geom_label" "geom_line" "geom_linerange" ## [31] "geom_map" "geom_path" "geom_point" ## [34] "geom_pointrange" "geom_polygon" "geom_qq" ## [37] "geom_qq_line" "geom_quantile" "geom_raster" ## [40] "geom_rect" "geom_ribbon" "geom_rug" ## [43] "geom_segment" "geom_sf" "geom_sf_label" ## [46] "geom_sf_text" "geom_smooth" "geom_spoke" ## [49] "geom_step" "geom_text" "geom_tile" ## [52] "geom_violin" "geom_vline" "update_geom_defaults"
ggplot(data = Births2015, mapping = aes(x = date, y = births, color = wday)) + geom_point() + geom_smooth(method = "lm", se = FALSE) + theme(legend.position = "bottom")
ggplot(data = Births2015, mapping = aes(x = date, y = births, color = wday)) + geom_point() + geom_smooth(se = FALSE) + theme(legend.position = "bottom")
ggplot(data = Births2015, mapping = aes(x = date, y = births, color = wday)) + geom_point() + geom_smooth(color = "black", se = FALSE) + theme(legend.position = "bottom")
Need a new dataset with more categorical variables
The Alison Bechdel Rule: A movie passes the test if:
Movies from 1970 - 2013
movies <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-03-09/movies.csv') %>% filter(rated %in% c("R", "PG-13", "PG", "G"))
glimpse(movies)
## Rows: 1,549## Columns: 34## $ year <dbl> 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 20…## $ imdb <chr> "tt2024544", "tt1272878", "tt0453562", "tt1335975", "tt1…## $ title <chr> "12 Years a Slave", "2 Guns", "42", "47 Ronin", "A Good …## $ test <chr> "notalk-disagree", "notalk", "men", "men", "notalk", "ok…## $ clean_test <chr> "notalk", "notalk", "men", "men", "notalk", "ok", "ok", …## $ binary <chr> "FAIL", "FAIL", "FAIL", "FAIL", "FAIL", "PASS", "PASS", …## $ budget <dbl> 2.00e+07, 6.10e+07, 4.00e+07, 2.25e+08, 9.20e+07, 1.20e+…## $ domgross <chr> "53107035", "75612460", "95020213", "38362475", "6734919…## $ intgross <chr> "158607035", "132493015", "95020213", "145803842", "3042…## $ code <chr> "2013FAIL", "2013FAIL", "2013FAIL", "2013FAIL", "2013FAI…## $ budget_2013 <dbl> 2.00e+07, 6.10e+07, 4.00e+07, 2.25e+08, 9.20e+07, 1.20e+…## $ domgross_2013 <chr> "53107035", "75612460", "95020213", "38362475", "6734919…## $ intgross_2013 <chr> "158607035", "132493015", "95020213", "145803842", "3042…## $ period_code <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…## $ decade_code <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…## $ imdb_id <chr> "2024544", "1272878", "0453562", "1335975", "1606378", "…## $ plot <chr> "In the antebellum United States, Solomon Northup, a fre…## $ rated <chr> "R", "R", "PG-13", "PG-13", "R", "R", "PG-13", "PG-13", …## $ response <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TR…## $ language <chr> "English", "English, Spanish", "English", "English, Japa…## $ country <chr> "USA, UK", "USA", "USA", "USA", "USA", "UK", "USA", "USA…## $ writer <chr> "John Ridley (screenplay), Solomon Northup (based on \"T…## $ metascore <dbl> 97, 55, 62, 29, 28, 55, 48, 33, 90, 58, 52, 78, 83, 53, …## $ imdb_rating <dbl> 8.3, 6.8, 7.6, 6.6, 5.4, 7.8, 5.7, 5.0, 7.5, 7.4, 6.2, 7…## $ director <chr> "Steve McQueen", "Baltasar Kormákur", "Brian Helgeland",…## $ released <chr> "08 Nov 2013", "02 Aug 2013", "12 Apr 2013", "25 Dec 201…## $ actors <chr> "Chiwetel Ejiofor, Dwight Henry, Dickie Gravois, Bryan B…## $ genre <chr> "Biography, Drama, History", "Action, Comedy, Crime", "B…## $ awards <chr> "Won 3 Oscars. Another 131 wins & 137 nominations.", "1 …## $ runtime <chr> "134 min", "109 min", "128 min", "118 min", "98 min", "1…## $ type <chr> "movie", "movie", "movie", "movie", "movie", "movie", "m…## $ poster <chr> "http://ia.media-imdb.com/images/M/MV5BMjExMTEzODkyN15BM…## $ imdb_votes <dbl> 143446, 87301, 43608, 25735, 123837, 85871, 18973, 10826…## $ error <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
geom
s for describing amounts/frequencies?geom_bar
ggplot(data = movies, mapping = aes(x = binary)) + geom_bar()
geom_col
# First wrangle with dplyrmovies_ag <- count(movies, binary) movies_ag
## # A tibble: 2 × 2## binary n## <chr> <int>## 1 FAIL 863## 2 PASS 686
ggplot(data = movies_ag, mapping = aes(x = binary, y = n)) + geom_col()
geom_point
againggplot(data = movies_ag, mapping = aes(x = binary, y = n)) + geom_point(size = 4)
geom_point
+ geom_segment
ggplot(data = movies_ag, mapping = aes(x = binary, y = n)) + geom_segment(mapping = aes(xend = binary), yend = 0) + geom_point(size = 10, color = "orange") + ylim(c(0, 875))
geom_bar
ggplot(data = movies, mapping = aes(x = rated, fill = binary)) + geom_bar()
geom_bar
ggplot(data = movies, mapping = aes(x = rated, fill = binary)) + geom_bar(position = "fill")
geom_bar
ggplot(data = movies, mapping = aes(x = rated, fill = binary)) + geom_bar(position = "dodge")
geom_tile
movies_ag <- count(movies, rated, binary)ggplot(data = movies_ag, mapping = aes(x = rated, y = binary, fill = n)) + geom_tile()
geom_tile
movies_ag <- count(movies, rated, binary)ggplot(data = movies_ag, mapping = aes(x = rated, y = binary, fill = n)) + geom_tile() + scale_fill_viridis_c(direction = -1)
fill
scale!movies_ag <- group_by(movies, rated, binary) %>% summarize(mean_budget = mean(budget))ggplot(data = movies_ag, mapping = aes(x = rated, y = binary, fill = mean_budget)) + geom_tile() + scale_fill_viridis_c(direction = -1) + theme(legend.position = "bottom")
geom
s (graphs) for visualizing distributions?geom_histogram
ggplot(movies, aes(x = budget)) + geom_histogram()
geom_histogram
ggplot(movies, aes(x = budget)) + geom_histogram(bins = 50, color = "white", fill = "darkcyan")
binwidth
or bins
argumentsgeom_histogram
ggplot(movies, aes(x = budget, fill = binary)) + geom_histogram(bins = 50, color = "white")
geom_histogram
ggplot(movies, aes(x = budget, fill = binary)) + geom_histogram(bins = 50, alpha = 0.4, position = "identity")
ggplot(movies, aes(x = budget, fill = binary)) + geom_histogram(bins = 50) + facet_wrap(~binary) + guides(fill = "none")
ggplot(movies, aes(x = budget, fill = binary)) + geom_histogram() + facet_grid(rated ~ binary, scales = "free_y") + guides(fill = "none")
geom_density
ggplot(movies, aes(x = budget, fill = binary)) + geom_density(alpha = 0.4) + theme(legend.position = "bottom")
geom_density
ggplot(movies, aes(x = budget, fill = binary)) + geom_density(position = "fill") + theme(legend.position = "bottom")
geom_boxplot
ggplot(movies, aes(x = binary, y = budget)) + geom_boxplot()
geom_boxplot
ggplot(movies, aes(x = binary, y = budget)) + geom_boxplot(varwidth = TRUE, notch = TRUE)
varwidth
do?notch = TRUE
?geom_boxplot
ggplot(movies, aes(x = binary, y = budget, fill = rated)) + geom_boxplot()
geom_violin
ggplot(movies, aes(x = binary, y = budget)) + geom_violin()
geom_violin
ggplot(movies, aes(x = binary, y = budget)) + geom_violin() + geom_jitter(alpha = .1, width = .1, color = "darkcyan")
Keyboard shortcuts
↑, ←, Pg Up, k | Go to previous slide |
↓, →, Pg Dn, Space, j | Go to next slide |
Home | Go to first slide |
End | Go to last slide |
Number + Return | Go to specific slide |
b / m / f | Toggle blackout / mirrored / fullscreen mode |
c | Clone slideshow |
p | Toggle presenter mode |
t | Restart the presentation timer |
?, h | Toggle this help |
s | Toggle scribble toolbox |
Esc | Back to slideshow |