#LOAD PACKAGES
library(tidyverse)
library(maps)
#You may need to install the "devtools" package. This is because jpmap is hosted on Github rather than another server like CRAN.
#If you are prompted to update packages, you can hit <ENTER> to skip - if there is a required update, you will see a message
# install.packages("devtools")
::install_github("UchidaMizuki/jpmap")
devtoolslibrary(jpmap)
#LOAD DATA
<- map_data("world") world_map
STAT 118: Notes K
Maps with maps
and sf
Before we get started, some context:
R
is fantastic for spatial analysis (not covered in this class… look for classes related to spatial statistics)R
is great for interactive data visualization (vialeaflet
orshiny
… more on this on Thursday)R
is okay at spacial data visualization (creating maps).- There are many different packages in
R
for creating maps. I’ve found that different packages perform best for different maps. We will talk about a few different ones today. - If you have a highly map-centric project, there is nothing wrong with working in ArcGIS or QGIS if you find the mapping tools in R insufficient. There are many recent improvements with new packages (like
sp
,rgdal
andrgeos
) which profiles much of the functionality of GIS packages! Exciting! (not very beginner friendly - requires familiarity with GIS concepts)
- There are many different packages in
Using the maps
package
Perhaps the simplest approach to drawing maps is to use geom_polygon()
to draw boundaries for different regions.
The maps
package contains several built in maps: world (for all countries in the world), france, italy, nz, usa, state (usa state boundaries), and county (usa counties). The maps package isn’t particularly accurate or up-to-date, but it’s built into R so it’s an easy place to start.
To reference each map you use map_data("mapname")
.
#World Map
ggplot(world_map, aes(long, lat, group=group)) +
geom_polygon() +
coord_quickmap()
Note:
coord_quickmap()
adjusts the axes to ensure that longitude and latitude are rendered on the same scale. It is very important that this aspect ratio is maintained or a country may appear super stretched or super squished.- the
aes(group=group)
option – This is SUPER IMPORTANT, so R knows which things to connect together
What about subsetting the data?
#Subset to get Japan
<- map_data("world", region ="Japan")
japan
#Subset to get USA
<- map_data("world", region ="USA") usa
What if aspect ratio is not maintained?
# ASPECT RATIO NOT MAINTAINED.
ggplot(japan, aes(long, lat)) +
geom_polygon(aes(group=group)) +
theme_light() +
ggtitle("Japan - Aspect Ratio Not Maintained (not good)")
# ASPECT RATIO MAINTAINED
ggplot(japan, aes(long, lat)) +
geom_polygon(aes(group=group)) +
coord_quickmap() +
theme_light() +
ggtitle("Japan - Aspect Ratio Maintained (better)")
Japan with Prefectures
We will now plot Japan using ggplot, using the japan object we created from the map_data package. Note that this is when the format of japan is as a data frame.
#Plot of Japan
%>%
japanggplot(aes(x=long,y=lat))+
geom_polygon(aes(group=group)) +
coord_quickmap()
How to customize colors?
You can use either numeric codes, or the name of a color, when you specify the palette of your map.
#you can use numeric or character names https://r-charts.com/colors/
ggplot(japan, aes(long, lat)) +
geom_polygon(aes(group=group), fill ="#FFFFF0", color ="#000000") +
coord_quickmap() +
theme_light()
ggplot(japan, aes(long, lat)) +
geom_polygon(aes(group=group), fill ="ivory", color ="black") +
coord_quickmap() +
theme_light()
Using the sf
package
There are a few limitations to the approach outlined above, not least of which is the fact that the simple “longitude-latitude” data format is not typically used in real world mapping. Vector data for maps are typically encoded using the “simple features” standard produced by the Open Geospatial Consortium. The sf
package developed by Edzer Pebesma provides an excellent toolset for working with such data, and the geom_sf()
and coord_sf()
functions in ggplot2 are designed to work together with the sf
package.
#LOAD PACKAGES
#install.packages("sf") - note some students are getting a pop-up when they install the sf package for the first time. Select the "no" option when it pops up in your console.
library(sf)
Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
#some students are needing into install the rgeos package seperately as well
#library(rgeos)
#load japan prefecture map from the jpmap package
<-jpmap::prefecture
prefecture
#we need to do a little cleanup on prefecture names - make them all lower case
$pref_name<-tolower(prefecture$pref_name)
prefecture#and now make them all ASCII (non-accented) characters
$pref_name<-iconv(prefecture$pref_name, from = 'UTF-8', to = 'ASCII//TRANSLIT') prefecture
For our first example, we will create a prefecture map of Japan. Note that we need less code, because geom_sf knows how to plot sf objects without you having to tell R what to use for X/Y axes. Note that you also do not need coord_quickmap.
ggplot(data=prefecture)+
geom_sf(fill="ivory",color="black")+
theme_light()
We can even just plot a single prefecture of interest by filtering our sf object!
<-prefecture%>%
miyagifilter(pref_name=="miyagi")
ggplot(miyagi)+
geom_sf(fill="ivory",color="black")+
theme_light()
For our second example, we will be working with a dataset of Japanese census data tfrom the choroplethr
package.
#install.packages("choroplethr")
library(choroplethr)
#load the japan census data from the choroplethr package
data(df_japan_census)
#Let's take a peak at the census data
str(df_japan_census)
'data.frame': 47 obs. of 4 variables:
$ region : chr "aichi" "akita" "aomori" "chiba" ...
$ pop_2010 : num 7411000 1086000 1373000 6216000 1431000 ...
$ percent_pop_change_2005_2010: num 2.2 -5.2 -4.4 2.6 -2.5 -1.9 0.4 -3 -1.3 -0.8 ...
$ pop_density_km2_2010 : num 1434.8 93.3 142.4 1205.5 252.1 ...
#rename the prefecture. We need to do this in order to merge with the prefecture sf dataframe
<-df_japan_census%>%
df_japan_censusrename("pref_name"="region")
#Now, we can merge the census data with the prefecture sf map.
<-merge(prefecture,df_japan_census,by="pref_name") merged_prefs
You should notice that the merged_prefs
dataset is now saved in your R environment. This dataset contains information about total population of each prefecture in 2010, the precent population change from 2005-2010, and population density per \(km^2\) in 2010. Let’s take a look at that dataset.
Each row represents a prefecture in Japan This data frame contains the following columns:
pref_name
Name of each prefecture (without capitalization or accent characters)pref_code
Numeric code that is unique to each prefecturepref_name_ja
Name of each prefecture in Japanesepop_2010
Population of each prefecture in 2010pop_density_km2_2010
Population density of each prefecture in \(km^2\).geometry
The mapping information inside a special features. Here, each prefecture is a multipolygon.
Let’s begin by plotting the map as we did above, but filling in each sf multipolygon by population in 2010:
%>%
merged_prefsggplot()+
geom_sf(aes(fill=pop_2010))+
theme_light()+
ggtitle("Prefecture Population, 2010")+
labs(fill="Population (2010)")
Here are some options to customize the plot that you might be interested in:
Using RColorBrewer palette
library(RColorBrewer)
#color options https://r-graph-gallery.com/38-rcolorbrewers-palettes.html
%>%
merged_prefs ggplot() +
geom_sf(aes(fill = pop_2010)) +
theme_light() +
ggtitle("Prefecture Population, 2010") +
labs(fill="Population (2010)") +
scale_fill_gradientn(colors = brewer.pal(8,"Spectral") ) #customize colors
Using part of a RColorBrewer palette
%>%
merged_prefs ggplot() +
geom_sf(aes(fill = pop_2010)) +
theme_light() +
ggtitle("Prefecture Population, 2010") +
labs(fill="Population (2010)") +
scale_fill_gradientn(colors = brewer.pal(11,"Spectral")[5:11] ) #customize colors
Building your own color palette using scale_fill_gradientn
%>%
merged_prefs ggplot() +
geom_sf(aes(fill = pop_2010)) +
theme_light() +
ggtitle("Prefecture Population, 2010") +
labs(fill="Population (2010)")+
scale_fill_gradientn(colors = c("yellow","orange","red"))
A note about customizing colors:
- you should use a color scheme that is sequential (has order to it), when you are displaying continuous data
- you should use a color scheme that is categorical, when your data is in categories and isn’t ordered you should use a color scheme that is diverging, when want to put emphasis on two extremes and mid-range. For example, you might use a diverging palette from red to blue for political party affiliation in the US.
- pay attention to your map being color blind friendly (
RdYlGr
is the worst…) - as a general rule, try not to use blue to represent a land mass (let’s reserve that for bodies of water)
Adding labels
%>%
merged_prefs ggplot() +
geom_sf(aes(fill = pop_2010)) +
theme_light() +
ggtitle("Prefecture Population, 2010") +
labs(fill="Population (2010)",x="",y="")+
scale_fill_gradientn(colors = brewer.pal(8, "Spectral") ) + #customize colors
geom_sf_text(data = merged_prefs[merged_prefs$pop_2010 >7500000,], aes(label = pref_name), fontface="bold")