library(tidyverse)
Notes D
Making plots with ggplot2: barplots and scatterplots`
#Import the can_lang dataset from last class
#can_lang <- read.csv("can_lang.csv")
#OR
<- read.csv("https://raw.githubusercontent.com/ttimbers/canlang/master/inst/extdata/can_lang.csv") can_lang
Recall our last example:
<- can_lang %>%
ten_lang filter(category == "Aboriginal languages") %>%
arrange(by=desc(mother_tongue)) %>%
select(language, mother_tongue) %>%
slice(1:10)
ggplot: barplots with geom_bar
ggplot(ten_lang, aes(x = language, y = mother_tongue)) +
geom_bar(stat = "identity")
Is there any improvements we could make to this graph?
To better view text
ggplot(ten_lang, aes(x = language, y = mother_tongue)) +
geom_bar(stat = "identity") +
coord_flip()
#OR
ggplot(ten_lang, aes(x = mother_tongue, y = language)) +
geom_bar(stat = "identity")
Labels, Colors, and Themes
ggplot(ten_lang, aes(x = mother_tongue, y = reorder(language, mother_tongue))) +
geom_bar(fill="lightblue", stat = "identity") +
ylab("Language") +
xlab("Mother Tongue (Number of Canadian Residents)") +
ggtitle("Ten Aboriginal Languages Most Often \n Reported by Canadian Residents \n as Their Mother Tongue") +
theme_minimal()
ggplot: scatterplot with geom_point
ggplot(can_lang, aes(x=most_at_home, y=mother_tongue)) +
geom_point()
With labels
ggplot(can_lang, aes(x = most_at_home, y = mother_tongue)) +
geom_point() +
xlab("Language spoken most at home \n (number of Canadian residents)") +
ylab("Mother tongue \n (number of Canadian residents)") +
theme(text = element_text(size = 12))
Axis transformations
library(scales)
ggplot(can_lang, aes(x = most_at_home, y = mother_tongue)) +
geom_point() +
xlab("Language spoken most at home \n (number of Canadian residents)") +
ylab("Mother tongue \n (number of Canadian residents)") +
theme(text = element_text(size = 12)) +
scale_x_log10(labels = label_comma()) +
scale_y_log10(labels = label_comma())
mutate
to create new columns
<- can_lang %>%
can_lang mutate(
mother_tongue_percent = (mother_tongue / 35151728) * 100,
most_at_home_percent = (most_at_home / 35151728) * 100
)
Scatterplot with percents
ggplot(can_lang, aes(x = most_at_home_percent, y = mother_tongue_percent)) +
geom_point() +
xlab("Language spoken most at home \n (percentage of Canadian residents)") +
ylab("Mother tongue \n (percentage of Canadian residents)") +
theme(text = element_text(size = 12)) +
scale_x_log10(labels = comma) +
scale_y_log10(labels = comma)
Scatterplot with Percents and Colors
ggplot(can_lang, aes(x = most_at_home_percent,
y = mother_tongue_percent,
color = category, shape=category)) +
geom_point() +
xlab("Language spoken most at home \n (percentage of Canadian residents)") +
ylab("Mother tongue \n (percentage of Canadian residents)") +
theme(text = element_text(size = 12),
legend.position = "top",
legend.direction = "vertical") +
scale_x_log10(labels = comma) +
scale_y_log10(labels = comma)