ggplot map legend does not display consistently

ggplot map legend does not display consistently - r

I'll apologize in advance that the example below isn't "minimal" but I haven't been able to reproduce this behavior except in the particular instance of my full data set.
I asked this question before here and thought I had found the answer, but the behavior has returned and is vexing me. Basically I have a script that reads daily COVID-19 case numbers and produces maps where the counties are color-coded by the percent of the population infected. The script produces five maps, a national one and one for each of the four official census regions: northeast, midwest, south and west. To cut down on size, the below is just the national and widwest maps.
My original script actually produces animated gifs showing the spread of the disease, but they take a long time to render. The below version just gives a single plot of the most recent data and should run pretty quickly.
I've used a dput in the below script to avoid you having to read a file and geocode locations (I commented out the code) but there is still a large csv file of county populations that has to be read in. I have posted it at pastebin here.
library(urbnmapr) # For map
library(ggplot2) # For map
library(dplyr) # For summarizing
library(tidyr) # For reshaping
library(stringr) # For padding leading zeros
library(ggrepel)
library(ggmap)
library(usmap)
library(gganimate)
library(magrittr)
library(gifski)
library(ggnewscale)
#if using Microsoft R, update checkpoint to get latest packages
#checkpoint("2020-03-01")
#start the clock
ptm <- proc.time()
set.seed(42)
#first run setup tasks
#these can be commented out once the data frames are in place
###################begin first run only################################
#register_google(key = "your google map key here")
#AFMCbases<-read.csv("C:/Users/jerem/Desktop/Work/covid_maps/AFMCbases.csv", header=TRUE, stringsAsFactors = FALSE)
#geocode the place names
# for(i in 1:nrow(AFMCbases)){
# result <- geocode(AFMCbases$Base[i])
# AFMCbases$longitude[i] <- as.numeric(result[1])
# AFMCbases$latitude[i] <- as.numeric(result[2])
# }
#transform the lat/lons to appropriate map projection
# locations<-AFMCbases[,2:3]
# new_locations <- usmap_transform(locations)
# AFMCbases <- cbind(AFMCbases,new_locations[,3:4])
AFMCbases <- structure(list(Base = c("Gunter AFB", "Davis Monthan AFB", "Edwards AFB",
"Robins AFB", "Scott AFB", "Hanscom AFB", "Offutt AFB", "Holloman AFB",
"Kirtland AFB", "Rome, NY", "Wright-Patterson AFB", "Tinker AFB",
"Arnold AFB", "Joint Base San Antonio", "Hill AFB", "Arlington, VA",
"Eglin AFB"), longitude = c(-86.244558, -110.8592578, -117.8912085,
-83.591719, -89.8550095, -71.2743123, -95.9145568, -106.099291,
-106.5338817, -75.4557303, -84.0537448, -97.4158295, -86.0303306,
-98.4523675, -111.9826984, -77.1067698, -86.5533382), latitude = c(32.4083744,
32.1675525, 34.9240314, 32.6400014, 38.5415911, 42.4579955, 41.1242718,
32.8440404, 35.0539718, 43.2128473, 39.8137298, 35.4277, 35.3828616,
29.4512786, 41.10968, 38.8799697, 30.4635583), personnel = c(820L,
605L, 5317L, 14088L, 613L, 2906L, 177L, 699L, 1264L, 822L, 15299L,
16032L, 389L, 3443L, 13679L, 1157L, 8143L), longitude.1 = c(1292311.33608434,
-1025218.68277084, -1622487.54697885, 1533762.39465597, 881032.996527566,
2296805.44531269, 342224.203588191, -572424.401062999, -596268.294707156,
1951897.82199569, 1352969.1130143, 234917.935027853, 1263808.11814915,
151230.865464104, -1000093.31185121, 1953459.66491185, 1292835.72883446
), latitude.1 = c(-1293180.11438144, -1358896.37536667, -946347.80198453,
-1223833.19307048, -664025.051658055, 128586.352781279, -422393.887189579,
-1328730.76688869, -1081540.1543388, 99159.9145180969, -445535.143260001,
-1059563.46211616, -963250.657602903, -1722291.94024992, -359543.815036425,
-408019.910644083, -1511165.09243038)), class = "data.frame", row.names = c(NA,
-17L))
#define census regions
west_region <-c("WA", "OR","CA","NV","ID", "MT", "WY", "UT","CO", "AZ", "NM")
NE_region <- c("ME","NH","VT","MA", "CT", "RI", "NY", "PA", "NJ")
midwest_region <- c("ND", "SD", "NE", "KS", "MN", "IA", "MO", "WI", "IL","MI", "IN","OH")
south_region <- c("TX", "OK", "AR", "LA", "MS", "TN", "KY", "AL", "GA","FL","SC","NC","VA","WV","DC","MD","DE")
west_region_bases <- c("Davis Monthan AFB", "Edwards AFB","Holloman AFB","Kirtland AFB","Hill AFB")
south_region_bases <- c("Robins AFB","Tinker AFB", "Arnold AFB", "Joint Base San Antonio", "Arlington, VA", "Eglin AFB")
mw_region_bases <- c("Scott AFB", "Offutt AFB", "Wright-Patterson AFB")
ne_region_bases <-c("Hanscom AFB", "Rome, NY")
# Get COVID cases, available from:
url <- "https://static.usafacts.org/public/data/covid-19/covid_confirmed_usafacts.csv"
COV <- read.csv(url, stringsAsFactors = FALSE)
#sometimes there are encoding issues with the first column name
names(COV)[1] <- "countyFIPS"
Covid <- pivot_longer(COV, cols=starts_with("X"),
values_to="cases",
names_to=c("X","date_infected"),
names_sep="X") %>%
mutate(infected = as.Date(date_infected, format="%m.%d.%y"),
countyFIPS = str_pad(as.character(countyFIPS), 5, pad="0"))
# Obtain map data for counties (to link with covid data) and states (for showing borders)
states_sf <- get_urbn_map(map = "states", sf = TRUE)
counties_sf <- get_urbn_map(map = "counties", sf = TRUE)
# Merge county map with total cases of cov
#this is the line to use for making animations
#pop_counties_cov <- inner_join(counties_sf, Covid, by=c("county_fips"="countyFIPS")) %>%
#to make last frame only
pop_counties_cov <- inner_join(counties_sf, group_by(Covid, countyFIPS) %>%
summarise(cases=max(cases)), by=c("county_fips"="countyFIPS"))
#read the county population data
counties_pop <- read.csv("C:/Users/jerem/Desktop/Work/covid_maps/countyPopulations.csv", header=TRUE, stringsAsFactors = FALSE)
#pad the single digit state FIPS states
counties_pop <- counties_pop %>% mutate(CountyFIPS=str_pad(as.character(CountyFIPS),5,pad="0"))
#merge the population and covid data by FIPS
pop_counties_cov$population <- counties_pop$Population[match(pop_counties_cov$county_fips,counties_pop$CountyFIPS)]
#calculate the infection rate
pop_counties_cov <- pop_counties_cov %>% mutate(infRate = (cases/population)*100)
#counties with 0 infections don't appear in the usafacts data, so didn't get a population
#set them to 0
pop_counties_cov$population[is.na(pop_counties_cov$population)] <- 0
pop_counties_cov$infRate[is.na(pop_counties_cov$infRate)] <- 0
plotDate="April20"
basepath = "C:/your/output/file/path"
naColor = "white"
lowColor = "green"
midColor = "maroon"
highColor = "red"
baseFill = "dodgerblue4"
baseColor = "firebrick"
baseShape = 23
scaleLow = "magenta"
scaleHigh = "blue"
###################end first run only################################
###################National Map################################
p <- pop_counties_cov %>%
ggplot() +
geom_sf(mapping = aes(fill = infRate, geometry=geometry), color = NA) +
geom_sf(data = states_sf, fill = NA, color = "black", size = 0.25) +
coord_sf(datum = NA) +
scale_fill_gradient(name = "% Pop \nInfected", trans = "log",low=lowColor, high=highColor,
breaks=c(0, round(max(pop_counties_cov$infRate),1)),
na.value = naColor) +
new_scale_fill() +
geom_point(data=AFMCbases,
aes(x=longitude.1, y=latitude.1,fill=personnel),
shape= baseShape,
color = "black",
size = 3) +
scale_fill_gradient(name="AFMC \nMil + Civ",
low = scaleLow, high = scaleHigh,
breaks = c(1, max(AFMCbases$personnel)))+
theme_bw() +
theme(legend.position="bottom",
panel.border = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank()) +
labs(title=paste('Confirmed COVID-19 Cases: ', max(Covid$infected),sep=""),
subtitle='HQ AFMC/A9A \nData: usafacts.org')
# a <- p + transition_time(infected) +
# labs(title='Confirmed COVID-19 Cases: {frame_time}',
# subtitle='HQ AFMC/A9A \nData: usafacts.org')
#
# animate(a,
# device="png",
# renderer=file_renderer(paste(basepath,plotDate,"/national",sep=""),
# prefix="gganim_plot",
# overwrite=TRUE)
# )
#
# #make the national animated gif
# png_files <- list.files(paste(basepath,plotDate,"/national",sep=""), pattern = ".*png$", full.names = TRUE)
# st = format(Sys.time(), "%Y-%m-%d")
# gifName <- paste(basepath,plotDate,"/national/COVID-19-Cases-byCounty_",st,".gif",sep="")
# gifski(png_files, gif_file = gifName, width = 1000, height = 750, delay = 0.25, loop=FALSE)
#save the image
st = format(Sys.time(), "%Y-%m-%d")
SaveFilename = paste(basepath,plotDate,"/national/COVID-19-Cases-byCounty_",st,".png",sep="")
if(!dir.exists(paste(basepath,plotDate,"/national",sep=""))) dir.create(paste(basepath,plotDate,"/national",sep=""))
ggsave(filename=SaveFilename, plot = p, dpi = 300)
###################End National Map################################
###################Midwest Map################################
#filter out states
#neCovid <- Covid %>% filter(State %in% NE_region )
mw_pop_counties_cov <- pop_counties_cov %>% filter(state_abbv %in% midwest_region)
mw_states_sf <- states_sf %>% filter(state_abbv %in% midwest_region)
mw_counties_sf <- counties_sf %>% filter(state_abbv %in% midwest_region)
#filter out bases
mwBases <- AFMCbases %>% filter(Base %in% mw_region_bases)
p <- mw_pop_counties_cov %>%
ggplot() +
geom_sf(mapping = aes(fill = infRate, geometry=geometry), color = NA) +
geom_sf(data = mw_states_sf, fill = NA, color = "black", size = 0.25) +
coord_sf(datum = NA) +
scale_fill_gradient(name = "% Pop \nInfected", trans = "log",low=lowColor, high=highColor,
breaks=c(0, round(max(mw_pop_counties_cov$infRate),1)),
na.value = naColor) +
new_scale_fill() +
geom_point(data=mwBases,
aes(x=longitude.1, y=latitude.1,fill=personnel),
shape = baseShape,
color = "black",
size=3) +
scale_fill_gradient(name="AFMC \nMil + Civ",
low=scaleLow, high = scaleHigh,
breaks = c(1, max(mwBases$personnel)))+
theme_bw() +
theme(legend.position="bottom",
panel.border = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank()) +
labs(title=paste('Confirmed COVID-19 Cases: ', max(Covid$infected),sep=""),
subtitle='HQ AFMC/A9A \nData: usafacts.org')
# a <- p + transition_time(infected) +
# labs(title='Confirmed COVID-19 Cases: {frame_time}',
# subtitle='HQ AFMC/A9A \nData: usafacts.org')
#
# animate(a,
# device="png",
# renderer=file_renderer(paste(basepath,plotDate,"/midwest",sep=""),
# prefix="gganim_plot",
# overwrite=TRUE)
# )
#
# #make the midwest animated gif
# png_files <- list.files(paste(basepath,plotDate,"/midwest",sep=""), pattern = ".*png$", full.names = TRUE)
# st = format(Sys.time(), "%Y-%m-%d")
# gifName <- paste(basepath,plotDate,"/midwest/MW_COVID-19-Cases-byCounty_",st,".gif",sep="")
# gifski(png_files, gif_file = gifName, width = 1000, height = 750, delay = 0.25, loop=FALSE)
st = format(Sys.time(), "%Y-%m-%d")
SaveFilename = paste(basepath,plotDate,"/midwest/MW_COVID-19-Cases-byCounty_",st,".png",sep="")
if(!dir.exists(paste(basepath,plotDate,"/midwest",sep=""))) dir.create(paste(basepath,plotDate,"/midwest",sep=""))
ggsave(filename=SaveFilename, plot = p, dpi = 300)
###################End Midwest Map################################
This is the national map I got this morning when I ran the code
Note that there is a scale for the number of personnel at the bases (the colored diamonds) but there is no scale for the shading of the counties.
Here is the midwest map. You can see from the code that it is the same ggplot just with a dataset that is filtered down to the counties in the midwest region.
Now the scale is there. As mentioned in my previous question I thought that the answer had been something to do with the width of the image being insufficient to accommodate the scale. When I added a newline in the legend text to shorten it that appeared to do the trick. But now the legend is disappearing again, andmaking the output image wider has no effect. Plus, just by eyeball it would appear there is plenty of room in the national plot to accommodate the scale.
Another bizarre aspect is the behavior associated with rounding the breaks. Below is a west map where I applied no rounding to the breaks
scale_fill_gradient(name = "% Pop \nInfected",trans = "log", low=lowColor, high=highColor,
breaks=c(0, max(west_pop_counties_cov$infRate)),
na.value = naColor)
So the scale is back but it goes to 6 decimal places. If I try to round it to 2
scale_fill_gradient(name = "% Pop \nInfected",trans = "log", low=lowColor, high=highColor,
breaks=c(0, round(max(west_pop_counties_cov$infRate),2)),
na.value = naColor)
I get this map
which surely indicates the horizontal space isn't the issue...if it can accommodate 6 decimal places then surely there's room for 2?
I've spent as much time trying to figure out this inconsistent scale behavior as I spent writing the original script. I need these things to be consistent so that I can provide them as work products on a regular interval.

You can add manual labels and add some space to prevent the key to overlap with the title:
pop_counties_cov %>%
ggplot() +
geom_sf(mapping = aes(fill = infRate, geometry=geometry), color = NA) +
geom_sf(data = states_sf, fill = NA, color = "black", size = 0.25) +
coord_sf(datum = NA) +
scale_fill_gradient(name = "% Pop\nInfected ", trans = "log2", low=lowColor, high=highColor,
breaks=c(min(pop_counties_cov$infRate[pop_counties_cov$infRate!=0]), max(pop_counties_cov$infRate)),
labels = round(c(min(pop_counties_cov$infRate[pop_counties_cov$infRate!=0]),
max(pop_counties_cov$infRate)), 1),
na.value = naColor) +
new_scale_fill() +
geom_point(data=AFMCbases,
aes(x=longitude.1, y=latitude.1,fill=personnel),
shape= baseShape,
color = "black",
size = 3) +
scale_fill_gradient(name="AFMC \nMil + Civ",
low = scaleLow, high = scaleHigh,
breaks = c(1, max(AFMCbases$personnel)))+
theme_bw() +
theme(legend.position="bottom",
panel.border = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank()) +
labs(title=paste('Confirmed COVID-19 Cases: ', max(Covid$infected),sep=""),
subtitle='HQ AFMC/A9A \nData: usafacts.org')

Your issue is due to the presence of 0 values in your variable infRate which messed up with the log transformation in your scale_fill_gradient as observed by this Warning message:
Warning message: Transformation introduced infinite values in discrete y-axis
Here, you can find a way to circuwent that by setting limits and breaks argument using non-0 minimal value:
> summary(pop_counties_cov$infRate)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.00000 0.01543 0.03993 0.09178 0.09043 2.87425
> summary(pop_counties_cov$infRate[pop_counties_cov$infRate != 0])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.001537 0.023724 0.048169 0.105326 0.102350 2.874253
Setting these new limits (and removing rows with 0 values) give you this:
pop_counties_cov %>%
filter(infRate != 0) %>%
ggplot() +
geom_sf(mapping = aes(fill = infRate, geometry=geometry), color = NA) +
geom_sf(data = states_sf, fill = NA, color = "black", size = 0.25) +
coord_sf(datum = NA) +
scale_fill_gradient(name = "% Pop \nInfected", trans = "log",low=lowColor, high=highColor,
breaks=c(0.001,2.9),
na.value = naColor, limits = c(0.001,2.9)) +
new_scale_fill() +
geom_point(data=AFMCbases,
aes(x=longitude.1, y=latitude.1,fill=personnel),
shape= baseShape,
color = "black",
size = 3) +
scale_fill_gradient(name="AFMC \nMil + Civ",
low = scaleLow, high = scaleHigh,
breaks = c(1, max(AFMCbases$personnel)))+
theme_bw() +
theme(legend.position="bottom",
panel.border = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank()) +
labs(title=paste('Confirmed COVID-19 Cases: ', max(Covid$infected),sep=""),
subtitle='HQ AFMC/A9A \nData: usafacts.org')
Does it answer your question ?

Related

Cropping a map in the shape of a country boundary

I'm trying to download temperature data and visualise it using R. I used the raster package to download the temperature and ggplot2 to visualise it.
library(raster)
library(ggplot2)
library(magrittr)
tmax_data <- getData(name = "worldclim", var = "tmax", res = 10)
gain(tmax_data)=0.1
tmax_mean <- mean(tmax_data)
tmax_mean_df <- as.data.frame(tmax_mean, xy = TRUE, na.rm = TRUE)
tmax_mean_df %>%
ggplot(aes(x=x,y=y)) +
geom_raster(aes(fill = layer)) +
labs(title = "Mean monthly maximum temperatures",
subtitle = "For the years 1970-2000") +
xlab("Longitude") +
ylab("Latitude") +
scale_fill_continuous(name = "Temperature (°C)")
However, the dataset contains the temperature values of the whole world. ut I want to visualise specific countries. I can crop the map by defining a bounding box but I'd like to crop the map in the shape of the country (instead of a square). Are there any packages that allow this functionality? Maybe by passing the shapefile of a country and cropping the map in that shape?

You can use the sf package in combination with raster::crop and raster::mask. Here is a demonstration for France:
library(raster)
library(ggplot2)
library(magrittr)
library(sf)
tmax_data <- getData(name = "worldclim", var = "tmax", res = 10)
gain(tmax_data)=0.1
tmax_mean <- mean(tmax_data)
france_sf <- st_as_sf(maps::map(database = "france", plot = FALSE, fill = TRUE))
tmax_mean_france <- raster::crop(
raster::mask(tmax_mean, as_Spatial(france_sf)),
as_Spatial(france_sf)
)
tmax_mean_france_df <- as.data.frame(tmax_mean_france, xy = TRUE, na.rm = TRUE)
tmax_mean_france_df %>%
ggplot(aes(x=x,y=y)) +
geom_raster(aes(fill = layer)) +
labs(title = "Mean monthly maximum temperatures **in France**",
subtitle = "For the years 1970-2000") +
xlab("Longitude") +
ylab("Latitude") +
scale_fill_continuous(name = "Temperature (°C)")

how do i combine multiple data sources in ggplot using split and sapply?

this question is linked to a previous one answered by #Rui Barradas and #Duck, but i need more help. Previous link here:
how do i vectorise (automate) plot creation in R
Basically, I need to combine 3 datasets into one plot with a secondary y axis. All datasets need to be split by SITENAME and will facet wrap by Sampling.Year. I am using split and sapply. Being facet wrap the plots look something like this:
However, i'm now trying to add the two other data sources into the plots, to look something like this:
But i am struggling to add the two other data sources and get them to split by SITENAME. Her is my code so far...
Record plot format as a function to be applied to a split list df (ideally 'df' would be added as geom_line with a secondary y axis, and 'FF_start_dates' will be added as a vertical dashed line):
SITENAME_plot <- function(AllDates_TPAF){
ggplot(AllDates_TPAF, aes(DATE, Daily.Ave.PAF)) +
geom_point(aes(colour = Risk), size = 3) +
scale_colour_manual(values=c("Very Low" = "dark green","Low" = "light green",
"Moderate" = "yellow", "High" = "orange", "Very High" = "red"), drop = FALSE) +
labs(x = "Month", y = "Total PAF (% affected)") +
scale_x_date(breaks = "1 month", labels = scales::date_format("%B")) +
facet_wrap(~Sampling.Year, ncol = 1, scales = "free")+
scale_y_continuous(limits = c(0, 100), sec.axis = sec_axis(~., name = "Water level (m)")) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme(legend.text=element_text(size=15)) +
theme(axis.text=element_text(size=15),
axis.title=element_text(size=15,face="bold")) +
guides(color = guide_legend(reverse = TRUE))+
theme_bw() +
ggtitle(unique(AllDates_TPAF$SITENAME))
}
plot write function:
SITENAME_plot_write <- function(name, g, dir = "N:/abc/"){
flname <- file.path(dir, name)
flname <- paste0(flname, ".jpg")
png(filename = flname, width = 1500, height = 1000)
print(g)
dev.off()
flname
}
Apply function to list split by SITENAME:
sp1 <- split(AllDates_TPAF, AllDates_TPAF$SITENAME)
gg_list <- sapply(sp1, SITENAME_plot, simplify = FALSE)
mapply(SITENAME_plot_write, names(gg_list), gg_list, MoreArgs = list(dir = getwd()))
dev.off()
I have uploaded samples of all 3 datasets here: Sample Data
Apologies for not using gsub but there was too much data and I couldn't get it to work properly
thanks in advance for any help you can give, even if it is just to point me towards a web tutorial of some kind.

You can try next code. I used the data you shared. Just be careful with names of all datasets. Ideally, the key columns as DATE and Sampling.Year should be present in all dataframes before making the split. Also some variables as Risk was absent so I added an example var with same name. Here the code, I added a function for the plot you want:
library(tidyverse)
library(readxl)
#Data
df1 <- read_excel('Sample data.xlsx',1)
#Create var
df1$Risk <- c(rep(c("Very Low","Low","Moderate","High","Very High"),67),"Very High")
#Other data
df2 <- read_excel('Sample data.xlsx',2)
df3 <- read_excel('Sample data.xlsx',3)
#Split 1
L1 <- split(df1,df1$SITENAME)
L2 <- split(df2,df2$SITENAME)
L3 <- split(df3,df3$`Site Name`)
#Function to create plots
myplot <- function(x,y,z)
{
#Merge x and y
#Check for duplicates and avoid column
y <- y[!duplicated(paste(y$DATE,y$Sampling.Year)),]
y$SITENAME <- NULL
xy <- merge(x,y,by.x = c('Sampling.Year','DATE'),by.y = c('Sampling.Year','DATE'),all.x=T)
#Format to dates
xy$DATE <- as.Date(xy$DATE)
#Scale factor
scaleFactor <- max(xy$Daily.Ave.PAF) / max(xy$Height)
#Rename for consistency in names
names(z)[4] <- 'DATE'
#Format date
z$DATE <- as.Date(z$DATE)
#Plot
#Plot
G <- ggplot(xy, aes(DATE, Daily.Ave.PAF)) +
geom_point(aes(colour = Risk), size = 3) +
scale_colour_manual(values=c("Very Low" = "dark green","Low" = "light green",
"Moderate" = "yellow", "High" = "orange", "Very High" = "red"), drop = FALSE) +
scale_x_date(breaks = "1 month", labels = scales::date_format("%b %Y")) +
geom_line(aes(x=DATE,y=Height*scaleFactor))+
scale_y_continuous(name="Total PAF (% affected)", sec.axis=sec_axis(~./scaleFactor, name="Water level (m)"))+
labs(x = "Month") +
geom_vline(data = z,aes(xintercept = DATE),linetype="dashed")+
facet_wrap(~Sampling.Year, ncol = 1, scales = "free")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme(legend.text=element_text(size=15)) +
theme(axis.text=element_text(size=15),
axis.title=element_text(size=15,face="bold")) +
guides(color = guide_legend(reverse = TRUE))+
theme_bw() +
ggtitle(unique(xy$SITENAME))
return(G)
}
#Create a list of plots
Lplots <- mapply(FUN = myplot,x=L1,y=L2,z=L3,SIMPLIFY = FALSE)
#Now format names
vnames <- paste0(names(Lplots),'.png')
mapply(ggsave, Lplots,filename = vnames,width = 30,units = 'cm')
You will end up with plots like these saved in your dir:
Some dashed lines do not appear in plots because they were not present in the data you provided.

ggplot two gradient fills on one plot

I've been producing animated maps showing the progression of COVID case data. In the interest of producing a minimal example I have skinnied the code down to the below, which only produces one frame. In practice I also read a number of csv files. I've tried to eliminate that in this example, but there is still one with county population data. I have posted it at https://pastebin.com/jCD9tP0X
library(urbnmapr) # For map
library(ggplot2) # For map
library(dplyr) # For summarizing
library(tidyr) # For reshaping
library(stringr) # For padding leading zeros
library(ggrepel)
library(ggmap)
library(usmap)
library(gganimate)
library(magrittr)
library(gifski)
library(scales)
#first run setup tasks
#these can be commented out once the data frames are in place
###################begin first run only################################
#define census regions
NE_region <- c("ME","NH","VT","MA", "CT", "RI", "NY", "PA", "NJ")
ne_region_bases <-c("Hanscom AFB", "Rome, NY")
# Get COVID cases, available from:
url <- "https://static.usafacts.org/public/data/covid-19/covid_confirmed_usafacts.csv"
COV <- read.csv(url, stringsAsFactors = FALSE)
#sometimes there are encoding issues with the first column name
names(COV)[1] <- "countyFIPS"
Covid <- pivot_longer(COV, cols=starts_with("X"),
values_to="cases",
names_to=c("X","date_infected"),
names_sep="X") %>%
mutate(infected = as.Date(date_infected, format="%m.%d.%Y"),
countyFIPS = str_pad(as.character(countyFIPS), 5, pad="0"))
# Obtain map data for counties (to link with covid data) and states (for showing borders)
states_sf <- get_urbn_map(map = "states", sf = TRUE)
counties_sf <- get_urbn_map(map = "counties", sf = TRUE)
# Merge county map with total cases of cov
#use this line to produce animated maps
#pop_counties_cov <- inner_join(counties_sf, Covid, by=c("county_fips"="countyFIPS"))
#use this one for a single map of the latest data
pop_counties_cov <- inner_join(counties_sf, group_by(Covid, countyFIPS) %>%
summarise(cases=sum(cases)), by=c("county_fips"="countyFIPS"))
#read the county population data
#csv at https://pastebin.com/jCD9tP0X
counties_pop <- read.csv("countyPopulations.csv", header=TRUE, stringsAsFactors = FALSE)
#pad the single digit state FIPS states
counties_pop <- counties_pop %>% mutate(CountyFIPS=str_pad(as.character(CountyFIPS),5,pad="0"))
#merge the population and covid data by FIPS
pop_counties_cov$population <- counties_pop$Population[match(pop_counties_cov$county_fips,counties_pop$CountyFIPS)]
#calculate the infection rate
pop_counties_cov <- pop_counties_cov %>% mutate(infRate = (cases/population)*100)
#counties with 0 infections don't appear in the usafacts data, so didn't get a population
#set them to 0
pop_counties_cov$population[is.na(pop_counties_cov$population)] <- 0
pop_counties_cov$infRate[is.na(pop_counties_cov$infRate)] <- 0
plotDate="April14"
basepath = "your/output file/path/here/"
naColor = "white"
lowColor = "green"
midColor = "maroon"
highColor = "red"
baseFill = "dodgerblue4"
baseColor = "firebrick"
baseShape = 23
###################end first run only################################
###################Northeast Map################################
#filter out states
ne_pop_counties_cov <- pop_counties_cov %>% filter(state_abbv %in% NE_region)
ne_states_sf <- states_sf %>% filter(state_abbv %in% NE_region)
ne_counties_sf <- counties_sf %>% filter(state_abbv %in% NE_region)
#filter out bases
neBases <- structure(list(Base = c("Hanscom AFB", "Rome, NY"), longitude = c(-71.2743123,
-75.4557303),
latitude = c(42.4579955, 43.2128473),
personnel = c(2906L,822L),
longitude.1 = c(2296805.44531269, 1951897.82199569),
latitude.1 = c(128586.352781279, 99159.9145180969)),
row.names = c(NA, -2L), class = "data.frame")
p <- ne_pop_counties_cov %>%
ggplot() +
geom_sf(mapping = aes(fill = infRate, geometry=geometry), color = NA) +
geom_sf(data = ne_states_sf, fill = NA, color = "black", size = 0.25) +
coord_sf(datum = NA) +
scale_fill_gradient(name = "% Pop \nInfected", trans = "log",low=lowColor, high=highColor,
breaks=c(0, max(ne_pop_counties_cov$infRate)),
na.value = naColor) +
geom_point(data=neBases,
aes(x=longitude.1, y=latitude.1,size=personnel),
shape = baseShape,
color = baseColor,
fill = baseFill) +
theme_bw() +
labs(size='AFMC \nMil + Civ') +
theme(legend.position="bottom",
panel.border = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank())
print(p)
###################End Northeast Map################################
If you run this you should get a single frame...when I do the whole animation, here is the final frame
The diamonds represent the locations of air force bases we're interested in within the region, and they are sized by how many personnel are there.
What I have been asked to do is to make the diamonds the same size, but "color code" the fill based on the number of personnel. I don't think this is a good idea, but I'm not the boss.
I'm not sure how to have two gradient fills on a single plot?

If you want to place a second filling gradient, you can have the use of new_scale_fill function from ggnewscale package:
library(ggnewscale)
p <- ne_pop_counties_cov %>%
ggplot() +
geom_sf(mapping = aes(fill = infRate, geometry=geometry), color = NA) +
geom_sf(data = ne_states_sf, fill = NA, color = "black", size = 0.25) +
coord_sf(datum = NA) +
scale_fill_gradient(name = "% Pop \nInfected", trans = "log",low=lowColor, high=highColor,
breaks=c(0, max(ne_pop_counties_cov$infRate)),
na.value = naColor) +
new_scale_fill()+
geom_point(data=neBases,
aes(x=longitude.1, y=latitude.1,fill=personnel),
shape = baseShape,
color = "black",
#fill = baseFill,
size = 5) +
scale_fill_gradient(name = "AFMC \nMil + Civ",
low = "blue", high = "magenta",
breaks = c(1,max(neBases$personnel)))+
theme_bw() +
theme(legend.position="bottom",
panel.border = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank())
print(p)
Does it answer your question ?

Using ggplot parameters in a custom function throws object not found error

I have written a function to load spatial data, extract data from an input dataset and merge this dataset with the spatial data. Then my function returns a map on which my cases get plotted.
My function works fine if I return my plot as the following:
(with fill = totalCases)
return ({
ggplot() +
geom_polygon(data = sl_adm2_Month, aes(x = long, y = lat, group = group,
fill = totalCases), colour = "white") +
geom_text(data = sl_adm2_months_names_DF, aes(label = NAME_2, x = long.1, y = lat.2, group = NAME_2), size = 3) +
# labs(title = paste("Ebola", str_sub(as.character(variable), 6, -1), "cases by district in Sierra Leone - until", format(as.Date(date), "%B %Y"))) +
xlab("") +
ylab("") +
theme_gray() +
theme(legend.position = "bottom")
})
However, my goal is to pass a parameter providing the value (= variable) for the fill parameter as you can see in my below code. But this throws the following error:
Error in eval(expr, envir, enclos) : object 'variable' not found
Here is my code:
plotMonths <- function(data, variable, date) {
# Reloading district polygons
sl_adm2_months <- readOGR("C:/Users/woba/Documents/Ordina/TFS-Projects/Ordina - Mail Analytics/Johnson/Wouter/03. GeoData map - R/Sierra Leone adm2", "SLE_adm2", verbose = TRUE, stringsAsFactors = FALSE)
sl_adm2_months_DF <- fortify(sl_adm2_months, region = "NAME_2")
# Getting the correct District names
colnames(sl_adm2_months_DF)[7] <- "District"
sl_adm2_months_DF$District <- ifelse(sl_adm2_months_DF$District == "Western Rural", "Western Area Rural", as.character(sl_adm2_months_DF$District))
sl_adm2_months_DF$District <- ifelse(sl_adm2_months_DF$District == "Western Urban", "Western Area Urban", as.character(sl_adm2_months_DF$District))
sl_adm2_months_DF$District <- as.factor(sl_adm2_months_DF$District)
#Extracting district names for plotting
sl_adm2_months_names_DF <- data.frame(long = coordinates(sl_adm2_months[, 1]), lat = coordinates(sl_adm2_months[, 2]))
sl_adm2_months_names_DF[, "ID_2"] <- sl_adm2_months#data[, "ID_2"]
sl_adm2_months_names_DF[, "NAME_2"] <- sl_adm2_months#data[, "NAME_2"]
# Subset May data
sl_Month <- data[data$Country == "Sierra Leone" & data$Date <= as.Date(date), ]
sl_Month <- droplevels(sl_Month)
sl_Month[is.na(sl_Month)] <- 0
confirmed <- ddply(sl_Month, .(Localite), function(x){max(x$cmlConfirmed.cases, na.rm = T)})
cases <- ddply(sl_Month, .(Localite), function(x){max(x$cmlCases, na.rm = T)})
deaths <- ddply(sl_Month, .(Localite), function(x){max(x$cmlDeaths, na.rm = T)})
sl_Month <- merge(cases, deaths, by = "Localite")
sl_Month <- merge(sl_Month, confirmed, by = "Localite")
sl_Month <- droplevels(sl_Month)
sl_Month <- droplevels(sl_Month)
colnames(sl_Month)<- c("District", "totalCases", "totalDeaths", "totalConfirmed")
sl_Month <- sl_Month[-which(sl_Month$District == "National"),]
# Merging Month data with District polygons
sl_adm2_Month <- merge(sl_adm2_months_DF, sl_Month, by = "District", all.x = TRUE)
sl_adm2_Month$totalCases <- as.numeric(sl_adm2_Month$totalCases)
sl_adm2_Month$totalDeaths <- as.numeric(sl_adm2_Month$totalDeaths)
sl_adm2_Month$totalConfirmed <- as.numeric(sl_adm2_Month$totalConfirmed)
#NA to 0 for values missing for districts
sl_adm2_Month[is.na(sl_adm2_Month)] <- 0
#Sorting
sl_adm2_Month <- sl_adm2_Month[order(sl_adm2_Month$District, sl_adm2_Month$order), ]
# Prints & Views
print(head(sl_Month))
View(sl_Month)
View(sl_adm2_Month)
Sys.setlocale("LC_TIME", "English")
# Plotting Cases
return ({
ggplot() +
geom_polygon(data = sl_adm2_Month, aes(x = long, y = lat, group = group,
fill = variable), colour = "white") +
geom_text(data = sl_adm2_months_names_DF, aes(label = NAME_2, x = long.1, y = lat.2, group = NAME_2), size = 3) +
# labs(title = paste("Ebola", str_sub(as.character(variable), 6, -1), "cases by district in Sierra Leone - until", format(as.Date(date), "%B %Y"))) +
xlab("") +
ylab("") +
theme_gray() +
theme(legend.position = "bottom")
})
}
# Plotting the months - variable = second input and must be IN c(totalDeaths, totalCases, totalConfirmed)
plotMonths(final_dataset, "totalCases", "2014-05-31")
I've read some similar questions on the forum but wasn't able to resolve my issue.
Any help on how to fix this is very welcome!

Using 'aes_string' instead of 'aes' solved my issue.
aes_string(x = "long", y = "lat", group = "group", fill = variable)
Explanation on the differences between aes & aes_string for the ggplot2 package can be found here:
What is the difference between aes and aes_string (ggplot2) in R
All credit goes to Axeman & Benjamin - their answers solved my issue!

Choropleth Map in ggplot2

I'm trying to reproduce the Choropleth Map given here with the code provided by Hadley.
library(ggplot2)
library(maps)
# First (and most annoying) task - get matching state and county variables
# for both datasets. And unfortauntely it's not quite right, as you can
# see from the finish product - some counties are missing.
unemp <- read.csv("unemployment09.csv", header = F, stringsAsFactors = F)
names(unemp) <- c("id", "state_fips", "county_fips", "name", "year",
"?", "?", "?", "rate")
unemp$county <- tolower(gsub(" County, [A-Z]{2}", "", unemp$name))
unemp$state <- gsub("^.*([A-Z]{2}).*$", "\\1", unemp$name)
county_df <- map_data("county")
names(county_df) <- c("long", "lat", "group", "order", "state_name", "county")
county_df$state <- state.abb[match(county_df$state_name, tolower(state.name))]
county_df$state_name <- NULL
state_df <- map_data("state")
# Combine together
choropleth <- merge(county_df, unemp, by = c("state", "county"))
choropleth <- choropleth[order(choropleth$order), ]
# Discretise rate to use with Brewer colour scheme - many options here
# choropleth$rate_d <- cut_number(choropleth$rate, 5)
# choropleth$rate_d <- cut_interval(choropleth$rate, 5)
# Nathan's choice is a little odd:
choropleth$rate_d <- cut(choropleth$rate, breaks = c(seq(0, 10, by = 2), 35))
# Once you have the data in the right format, recreating the plot is straight
# forward.
ggplot(choropleth, aes(long, lat, group = group)) +
geom_polygon(aes(fill = rate_d), colour = alpha("white", 1/2), size = 0.2) +
geom_polygon(data = state_df, colour = "white", fill = NA) +
scale_fill_brewer(pal = "PuRd")
But this code gives the following error:
Error in do.call("layer", list(mapping = mapping, data = data, stat = stat, :
could not find function "alpha"
Deleting alpha and using this code
ggplot(choropleth, aes(long, lat, group = group)) +
geom_polygon(aes(fill = rate_d), colour = "white", size = 0.2) +
geom_polygon(data = state_df, colour = "white", fill = NA) +
scale_fill_brewer(pal = "PuRd")
gives the following error:
Error in scale_map.discrete(scale, df[[j]]) : attempt to apply
non-function
How can I fix this problem?

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

ggplot map legend does not display consistently - r

Related

Cropping a map in the shape of a country boundary

how do i combine multiple data sources in ggplot using split and sapply?

ggplot two gradient fills on one plot

Using ggplot parameters in a custom function throws object not found error

Choropleth Map in ggplot2

Categories

Resources