Choropleth Map in ggplot2 - r

I'm trying to reproduce the Choropleth Map given here with the code provided by Hadley.
library(ggplot2)
library(maps)
# First (and most annoying) task - get matching state and county variables
# for both datasets. And unfortauntely it's not quite right, as you can
# see from the finish product - some counties are missing.
unemp <- read.csv("unemployment09.csv", header = F, stringsAsFactors = F)
names(unemp) <- c("id", "state_fips", "county_fips", "name", "year",
"?", "?", "?", "rate")
unemp$county <- tolower(gsub(" County, [A-Z]{2}", "", unemp$name))
unemp$state <- gsub("^.*([A-Z]{2}).*$", "\\1", unemp$name)
county_df <- map_data("county")
names(county_df) <- c("long", "lat", "group", "order", "state_name", "county")
county_df$state <- state.abb[match(county_df$state_name, tolower(state.name))]
county_df$state_name <- NULL
state_df <- map_data("state")
# Combine together
choropleth <- merge(county_df, unemp, by = c("state", "county"))
choropleth <- choropleth[order(choropleth$order), ]
# Discretise rate to use with Brewer colour scheme - many options here
# choropleth$rate_d <- cut_number(choropleth$rate, 5)
# choropleth$rate_d <- cut_interval(choropleth$rate, 5)
# Nathan's choice is a little odd:
choropleth$rate_d <- cut(choropleth$rate, breaks = c(seq(0, 10, by = 2), 35))
# Once you have the data in the right format, recreating the plot is straight
# forward.
ggplot(choropleth, aes(long, lat, group = group)) +
geom_polygon(aes(fill = rate_d), colour = alpha("white", 1/2), size = 0.2) +
geom_polygon(data = state_df, colour = "white", fill = NA) +
scale_fill_brewer(pal = "PuRd")
But this code gives the following error:
Error in do.call("layer", list(mapping = mapping, data = data, stat = stat, :
could not find function "alpha"
Deleting alpha and using this code
ggplot(choropleth, aes(long, lat, group = group)) +
geom_polygon(aes(fill = rate_d), colour = "white", size = 0.2) +
geom_polygon(data = state_df, colour = "white", fill = NA) +
scale_fill_brewer(pal = "PuRd")
gives the following error:
Error in scale_map.discrete(scale, df[[j]]) : attempt to apply
non-function
How can I fix this problem?

Related

ggplot two gradient fills on one plot

I've been producing animated maps showing the progression of COVID case data. In the interest of producing a minimal example I have skinnied the code down to the below, which only produces one frame. In practice I also read a number of csv files. I've tried to eliminate that in this example, but there is still one with county population data. I have posted it at https://pastebin.com/jCD9tP0X
library(urbnmapr) # For map
library(ggplot2) # For map
library(dplyr) # For summarizing
library(tidyr) # For reshaping
library(stringr) # For padding leading zeros
library(ggrepel)
library(ggmap)
library(usmap)
library(gganimate)
library(magrittr)
library(gifski)
library(scales)
#first run setup tasks
#these can be commented out once the data frames are in place
###################begin first run only################################
#define census regions
NE_region <- c("ME","NH","VT","MA", "CT", "RI", "NY", "PA", "NJ")
ne_region_bases <-c("Hanscom AFB", "Rome, NY")
# Get COVID cases, available from:
url <- "https://static.usafacts.org/public/data/covid-19/covid_confirmed_usafacts.csv"
COV <- read.csv(url, stringsAsFactors = FALSE)
#sometimes there are encoding issues with the first column name
names(COV)[1] <- "countyFIPS"
Covid <- pivot_longer(COV, cols=starts_with("X"),
values_to="cases",
names_to=c("X","date_infected"),
names_sep="X") %>%
mutate(infected = as.Date(date_infected, format="%m.%d.%Y"),
countyFIPS = str_pad(as.character(countyFIPS), 5, pad="0"))
# Obtain map data for counties (to link with covid data) and states (for showing borders)
states_sf <- get_urbn_map(map = "states", sf = TRUE)
counties_sf <- get_urbn_map(map = "counties", sf = TRUE)
# Merge county map with total cases of cov
#use this line to produce animated maps
#pop_counties_cov <- inner_join(counties_sf, Covid, by=c("county_fips"="countyFIPS"))
#use this one for a single map of the latest data
pop_counties_cov <- inner_join(counties_sf, group_by(Covid, countyFIPS) %>%
summarise(cases=sum(cases)), by=c("county_fips"="countyFIPS"))
#read the county population data
#csv at https://pastebin.com/jCD9tP0X
counties_pop <- read.csv("countyPopulations.csv", header=TRUE, stringsAsFactors = FALSE)
#pad the single digit state FIPS states
counties_pop <- counties_pop %>% mutate(CountyFIPS=str_pad(as.character(CountyFIPS),5,pad="0"))
#merge the population and covid data by FIPS
pop_counties_cov$population <- counties_pop$Population[match(pop_counties_cov$county_fips,counties_pop$CountyFIPS)]
#calculate the infection rate
pop_counties_cov <- pop_counties_cov %>% mutate(infRate = (cases/population)*100)
#counties with 0 infections don't appear in the usafacts data, so didn't get a population
#set them to 0
pop_counties_cov$population[is.na(pop_counties_cov$population)] <- 0
pop_counties_cov$infRate[is.na(pop_counties_cov$infRate)] <- 0
plotDate="April14"
basepath = "your/output file/path/here/"
naColor = "white"
lowColor = "green"
midColor = "maroon"
highColor = "red"
baseFill = "dodgerblue4"
baseColor = "firebrick"
baseShape = 23
###################end first run only################################
###################Northeast Map################################
#filter out states
ne_pop_counties_cov <- pop_counties_cov %>% filter(state_abbv %in% NE_region)
ne_states_sf <- states_sf %>% filter(state_abbv %in% NE_region)
ne_counties_sf <- counties_sf %>% filter(state_abbv %in% NE_region)
#filter out bases
neBases <- structure(list(Base = c("Hanscom AFB", "Rome, NY"), longitude = c(-71.2743123,
-75.4557303),
latitude = c(42.4579955, 43.2128473),
personnel = c(2906L,822L),
longitude.1 = c(2296805.44531269, 1951897.82199569),
latitude.1 = c(128586.352781279, 99159.9145180969)),
row.names = c(NA, -2L), class = "data.frame")
p <- ne_pop_counties_cov %>%
ggplot() +
geom_sf(mapping = aes(fill = infRate, geometry=geometry), color = NA) +
geom_sf(data = ne_states_sf, fill = NA, color = "black", size = 0.25) +
coord_sf(datum = NA) +
scale_fill_gradient(name = "% Pop \nInfected", trans = "log",low=lowColor, high=highColor,
breaks=c(0, max(ne_pop_counties_cov$infRate)),
na.value = naColor) +
geom_point(data=neBases,
aes(x=longitude.1, y=latitude.1,size=personnel),
shape = baseShape,
color = baseColor,
fill = baseFill) +
theme_bw() +
labs(size='AFMC \nMil + Civ') +
theme(legend.position="bottom",
panel.border = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank())
print(p)
###################End Northeast Map################################
If you run this you should get a single frame...when I do the whole animation, here is the final frame
The diamonds represent the locations of air force bases we're interested in within the region, and they are sized by how many personnel are there.
What I have been asked to do is to make the diamonds the same size, but "color code" the fill based on the number of personnel. I don't think this is a good idea, but I'm not the boss.
I'm not sure how to have two gradient fills on a single plot?
If you want to place a second filling gradient, you can have the use of new_scale_fill function from ggnewscale package:
library(ggnewscale)
p <- ne_pop_counties_cov %>%
ggplot() +
geom_sf(mapping = aes(fill = infRate, geometry=geometry), color = NA) +
geom_sf(data = ne_states_sf, fill = NA, color = "black", size = 0.25) +
coord_sf(datum = NA) +
scale_fill_gradient(name = "% Pop \nInfected", trans = "log",low=lowColor, high=highColor,
breaks=c(0, max(ne_pop_counties_cov$infRate)),
na.value = naColor) +
new_scale_fill()+
geom_point(data=neBases,
aes(x=longitude.1, y=latitude.1,fill=personnel),
shape = baseShape,
color = "black",
#fill = baseFill,
size = 5) +
scale_fill_gradient(name = "AFMC \nMil + Civ",
low = "blue", high = "magenta",
breaks = c(1,max(neBases$personnel)))+
theme_bw() +
theme(legend.position="bottom",
panel.border = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank())
print(p)
Does it answer your question ?

R: scatter points using longitude/latitude

I want to fill the colour on a map. However, the plot doesn't come out as aspected.
How can I visualize the data with longitude and latitude?
install.packages("WDI")
install.packages("tidyverse")
library(WDI)
library(tidyverse)
literacy_globe <- WDI(country = "all", indicator = "SE.ADT.LITR.ZS", start = 2015, end = 2018, extra = TRUE)
literacy_globe <- na.omit(literacy_globe)
ggplot(literacy_globe, aes(x = longitude, y = latitude, group = iso3c)) +
geom_point(aes(fill = income), colour = "white")
I'd like the result similar to:
You can use the following code
#Loading the required packges
library(WDI)
library(tidyverse)
library(maptools)
library("ggplot2")
library("sf")
#Downloading the data
literacy_globe <- WDI(country = "all", indicator = "SE.ADT.LITR.ZS", start = 2015, end = 2018, extra = TRUE)
#Removing the NAs
literacy_globe_1 <- na.omit(literacy_globe)
#Saving the data as .csv file as your data contains blank cells which are not NAs
write.csv(literacy_globe_1, "literacy_globe_1.csv")
#Reading the data from .csv file
data <- read.csv("literacy_globe_1.csv")
#Removing the NAs
literacy_globe <- na.omit(data)
summary(literacy_globe)
head(literacy_globe,2)
#Mapping using ggplot2 package
data(wrld_simpl)
#sp to sf conversion
world <- st_as_sf(wrld_simpl)
# now create the map
ggplot(world) +
geom_sf(colour = "black", fill = NA) + coord_sf(expand = FALSE) +
theme_bw() + geom_point(aes(longitude, latitude),data= literacy_globe, colour=alpha("red",0.7))
For white fill of polygon and grey outside area, you can use
ggplot(world) +
geom_sf(colour = "black", fill = "white") + coord_sf(expand = FALSE) +
geom_point(aes(longitude, latitude),data= literacy_globe, colour=alpha("red",0.7))
Update
choropleth map
literacy_globe <- WDI(country = "all", indicator = "SE.ADT.LITR.ZS", start = 2015, end = 2018, extra = TRUE)
literacy_globe <- na.omit(literacy_globe)
summary(literacy_globe)
head(literacy_globe,2)
#Using ggplot2 package
data(wrld_simpl)
#fortify shape file to get into dataframe
wrld_simpl.f <- fortify(wrld_simpl, region = "NAME")
class(wrld_simpl.f)
head(wrld_simpl.f)
#merge with coefficients and reorder
merge.shp<-merge(wrld_simpl.f,literacy_globe, by.x = "id", by.y = "country", all.x=TRUE)
final.plot<-merge.shp[order(merge.shp$order), ]
head(final.plot, 2)
#basic plot
ggplot() +
geom_polygon(data = final.plot,
aes(x = long, y = lat, group = group, fill = income),
color = "black", size = 0.25)
I found another way to draw the hierarchy scatter on world map, but i were not so sure if it has some drawbakcs.
literacy_globe <- WDI(country = "all", indicator = "SE.ADT.LITR.ZS", start =
2015, end = 2018, extra = TRUE)
literacy_globe <- na.omit(literacy_globe)
lit.long <- literacy_globe$longitude
lit.lat <- literacy_globe$latitude
income <- literacy_globe$income
# prepare a NULL map
mp<-NULL
mapworld<-borders("world",colour = "gray50",fill="white")
#mp = empty map
#plot a map
mp <- ggplot() + mapworld + ylim(-60,90)
#geom_point plot the data on it
mp2 <- mp + geom_point(aes(x = lit.long, y = lit.lat), color = "darkblue",
fill = income) +
scale_size(range = c(1,1))

Overlay region borders in R map generates unwanted lines

I have already generated a simple map for Nigerian states, and now I would like to highlight in my map the borders for the Nigerian regions (that group Nigerian states).
When I add the layer for the borders with geom_polygon, they appear lines that do not correspond to region borders. I found a similar problem here Map county borders on to a ggmap
but this does not seem to be working for my case.
Here are the shapefiles and the database I am working on:
https://www.dropbox.com/sh/cek92s50jixowfx/AABwIVZKvtff8-9slhfCbxEca?dl=0
The code I am using is
#LOAD SHAPEFILES AND DATABASE
ng_dist <- readShapeSpatial("NGA_adm1.shp")
ng_dist_regions <- readShapeSpatial("NGA_adm_Region.shp")
NG_States <- read.csv("State_color_map.csv", header = TRUE, sep=",")
#VERIFY THE MAPS LOADED PROPERLY
plot(ng_dist)
plot(ng_dist_regions)
# STATE MAP - fortify and match shapefile and database IDs names
intersect(ng_dist$NAME_1, NG_States$STATE)
ng_dist <- fortify(ng_dist, region = "NAME_1")
ng_dist$id[which(ng_dist$id == "Akwa Ibom")] <- "Akwa-ibom"
ng_dist$id[which(ng_dist$id == "Nassarawa")] <- "Nasarawa"
ng_dist$id[which(ng_dist$id == "Cross River")] <- "C/river"
ng_dist$id[which(ng_dist$id == "Federal Capital Territory")] <- "FCT"
intersect(ng_dist$id, NG_States$STATE)
# REGION MAP - fortify
ng_dist_regions <- fortify(ng_dist_regions, region = "Region")
### Convert dummy variable to factor
NG_States$Abia <- as.factor(NG_States$Abia)
#PLOT MAP with coloured Abia State
cols <- c("0" = "#e6e6e6","1" = "#6eab27")
ABIA <- NG_States$Abia
Abia_map <- ggplot(NG_States, aes(fill = ABIA)) +
geom_map(data = NG_States, aes(map_id = NG_States$STATE, fill = ABIA), map = ng_dist, color = "black", size = 0.10) +
expand_limits(x = ng_dist$long, y = ng_dist$lat) +
theme_nothing(legend = FALSE) +
labs(title="Abia") +
coord_map() +
scale_fill_manual(name="", values=cols, labels=c("","Abia"))
Abia_map
#Add layer for region borders
d <- Abia_map +
geom_polygon(aes(x = ng_dist_regions$long, y = ng_dist_regions$lat, group = ng_dist_regions$id, fill = NA), data = ng_dist_regions, color = "red", size = 0.8)
d
Here is my result
Nigerian States and Regions
I have tried to add other options, such as coord_fixed() or expand_limits(x = ng_dist_regions$long, y = ng_dist_regions$lat), but I am quite basic R user and I don't know other solutions.
Using group, instead of id as group seems to solve the problem.
d <- Abia_map +
geom_path(aes(x = long, y = lat, group = group), data = ng_dist_regions, color = "red", size = 0.8, inherit.aes = FALSE)
d

How to add diagonal lines in NA value polygons using ggplot?

I'm working to plot the consolidated Z-value deviations (for a series of factors) from the national average for Pakistan on a fortified SPDF. For the purposes of this question, my data is irrelevant. I could provide it if necessary.
I am using ggplot to create my output where the command and result look something like this:
ggplot() + geom_polygon(data = plot.pakmod_sumZ, aes(x = long, y = lat, group = group, fill = SumZ.Cat), color = "black", size = 0.25, na.rm = TRUE) + scale_fill_manual(name = "Deviations from National Average", labels = c("-7", "-6", "-5", "-4", "-3", "-2", "-1", "Positive"), values = c("darkorange4","brown", "orangered1","tomato1","darkorange3","orange","yellow", "greenyellow"), na.value = "Grey", guide = guide_legend(reverse = TRUE)) + coord_map() + labs(x = NULL, y = NULL) + scale_x_discrete(breaks = NULL) + scale_y_discrete(breaks = NULL) + theme_minimal()
Deviations from National Average
I am trying to figure out now if it's possible to add diagonal lines in the polygons which have missing values and are coloured grey. Can this be done using ggplot?
This is an example I took from here. I opted to use the horizontal error bar geom. Mind that this isn't the only way of doing this.
library(ggplot2)
library(sp)
library(rgdal)
library(rgeos)
# create a local directory for the data
localDir <- "R_GIS_data"
if (!file.exists(localDir)) {
dir.create(localDir)
}
# download and unzip the data
url <- "ftp://www.ecy.wa.gov/gis_a/inlandWaters/wria.zip"
file <- paste(localDir, basename(url), sep='/')
if (!file.exists(file)) {
download.file(url, file)
unzip(file,exdir=localDir)
}
# create a layer name for the shapefiles (text before file extension)
layerName <- "WRIA_poly"
# read data into a SpatialPolygonsDataFrame object
dataProjected <- readOGR(dsn=localDir, layer=layerName)
dataProjected#data$id <- rownames(dataProjected#data)
# create a data.frame from our spatial object
watershedPoints <- fortify(dataProjected)
# merge the "fortified" data with the data from our spatial object
watershedDF <- merge(watershedPoints, dataProjected#data, by = "id")
dataProjected#data$id <- rownames(dataProjected#data)
watershedPoints <- fortify(dataProjected)
watershedDF <- merge(watershedPoints, dataProjected#data, by = "id")
ggWatershed <- ggplot(data = watershedDF, aes(x=long, y=lat, group = group, fill = WRIA_NM)) +
geom_polygon() +
geom_path(color = "white") +
scale_fill_hue(l = 40) +
coord_equal() +
theme(legend.position = "none", title = element_blank())
# Adding coordinates to the data part of SPDF. `sd` is the variable of interest
# which is beign plotted here. Each line extends sd away from long coordinate
dataProjected#data$sd <- rnorm(nrow(xy), mean = 50000, sd = 10000)
xy <- coordinates(dataProjected)
dataProjected#data$long <- xy[, 1]
dataProjected#data$lat <- xy[, 2]
ggWatershed +
geom_errorbarh(data = dataProjected#data, aes(group = id, xmin = long - sd, xmax = long + sd))

Using ggplot parameters in a custom function throws object not found error

I have written a function to load spatial data, extract data from an input dataset and merge this dataset with the spatial data. Then my function returns a map on which my cases get plotted.
My function works fine if I return my plot as the following:
(with fill = totalCases)
return ({
ggplot() +
geom_polygon(data = sl_adm2_Month, aes(x = long, y = lat, group = group,
fill = totalCases), colour = "white") +
geom_text(data = sl_adm2_months_names_DF, aes(label = NAME_2, x = long.1, y = lat.2, group = NAME_2), size = 3) +
# labs(title = paste("Ebola", str_sub(as.character(variable), 6, -1), "cases by district in Sierra Leone - until", format(as.Date(date), "%B %Y"))) +
xlab("") +
ylab("") +
theme_gray() +
theme(legend.position = "bottom")
})
However, my goal is to pass a parameter providing the value (= variable) for the fill parameter as you can see in my below code. But this throws the following error:
Error in eval(expr, envir, enclos) : object 'variable' not found
Here is my code:
plotMonths <- function(data, variable, date) {
# Reloading district polygons
sl_adm2_months <- readOGR("C:/Users/woba/Documents/Ordina/TFS-Projects/Ordina - Mail Analytics/Johnson/Wouter/03. GeoData map - R/Sierra Leone adm2", "SLE_adm2", verbose = TRUE, stringsAsFactors = FALSE)
sl_adm2_months_DF <- fortify(sl_adm2_months, region = "NAME_2")
# Getting the correct District names
colnames(sl_adm2_months_DF)[7] <- "District"
sl_adm2_months_DF$District <- ifelse(sl_adm2_months_DF$District == "Western Rural", "Western Area Rural", as.character(sl_adm2_months_DF$District))
sl_adm2_months_DF$District <- ifelse(sl_adm2_months_DF$District == "Western Urban", "Western Area Urban", as.character(sl_adm2_months_DF$District))
sl_adm2_months_DF$District <- as.factor(sl_adm2_months_DF$District)
#Extracting district names for plotting
sl_adm2_months_names_DF <- data.frame(long = coordinates(sl_adm2_months[, 1]), lat = coordinates(sl_adm2_months[, 2]))
sl_adm2_months_names_DF[, "ID_2"] <- sl_adm2_months#data[, "ID_2"]
sl_adm2_months_names_DF[, "NAME_2"] <- sl_adm2_months#data[, "NAME_2"]
# Subset May data
sl_Month <- data[data$Country == "Sierra Leone" & data$Date <= as.Date(date), ]
sl_Month <- droplevels(sl_Month)
sl_Month[is.na(sl_Month)] <- 0
confirmed <- ddply(sl_Month, .(Localite), function(x){max(x$cmlConfirmed.cases, na.rm = T)})
cases <- ddply(sl_Month, .(Localite), function(x){max(x$cmlCases, na.rm = T)})
deaths <- ddply(sl_Month, .(Localite), function(x){max(x$cmlDeaths, na.rm = T)})
sl_Month <- merge(cases, deaths, by = "Localite")
sl_Month <- merge(sl_Month, confirmed, by = "Localite")
sl_Month <- droplevels(sl_Month)
sl_Month <- droplevels(sl_Month)
colnames(sl_Month)<- c("District", "totalCases", "totalDeaths", "totalConfirmed")
sl_Month <- sl_Month[-which(sl_Month$District == "National"),]
# Merging Month data with District polygons
sl_adm2_Month <- merge(sl_adm2_months_DF, sl_Month, by = "District", all.x = TRUE)
sl_adm2_Month$totalCases <- as.numeric(sl_adm2_Month$totalCases)
sl_adm2_Month$totalDeaths <- as.numeric(sl_adm2_Month$totalDeaths)
sl_adm2_Month$totalConfirmed <- as.numeric(sl_adm2_Month$totalConfirmed)
#NA to 0 for values missing for districts
sl_adm2_Month[is.na(sl_adm2_Month)] <- 0
#Sorting
sl_adm2_Month <- sl_adm2_Month[order(sl_adm2_Month$District, sl_adm2_Month$order), ]
# Prints & Views
print(head(sl_Month))
View(sl_Month)
View(sl_adm2_Month)
Sys.setlocale("LC_TIME", "English")
# Plotting Cases
return ({
ggplot() +
geom_polygon(data = sl_adm2_Month, aes(x = long, y = lat, group = group,
fill = variable), colour = "white") +
geom_text(data = sl_adm2_months_names_DF, aes(label = NAME_2, x = long.1, y = lat.2, group = NAME_2), size = 3) +
# labs(title = paste("Ebola", str_sub(as.character(variable), 6, -1), "cases by district in Sierra Leone - until", format(as.Date(date), "%B %Y"))) +
xlab("") +
ylab("") +
theme_gray() +
theme(legend.position = "bottom")
})
}
# Plotting the months - variable = second input and must be IN c(totalDeaths, totalCases, totalConfirmed)
plotMonths(final_dataset, "totalCases", "2014-05-31")
I've read some similar questions on the forum but wasn't able to resolve my issue.
Any help on how to fix this is very welcome!
Using 'aes_string' instead of 'aes' solved my issue.
aes_string(x = "long", y = "lat", group = "group", fill = variable)
Explanation on the differences between aes & aes_string for the ggplot2 package can be found here:
What is the difference between aes and aes_string (ggplot2) in R
All credit goes to Axeman & Benjamin - their answers solved my issue!

Resources