ggplot two gradient fills on one plot - r

I've been producing animated maps showing the progression of COVID case data. In the interest of producing a minimal example I have skinnied the code down to the below, which only produces one frame. In practice I also read a number of csv files. I've tried to eliminate that in this example, but there is still one with county population data. I have posted it at https://pastebin.com/jCD9tP0X
library(urbnmapr) # For map
library(ggplot2) # For map
library(dplyr) # For summarizing
library(tidyr) # For reshaping
library(stringr) # For padding leading zeros
library(ggrepel)
library(ggmap)
library(usmap)
library(gganimate)
library(magrittr)
library(gifski)
library(scales)
#first run setup tasks
#these can be commented out once the data frames are in place
###################begin first run only################################
#define census regions
NE_region <- c("ME","NH","VT","MA", "CT", "RI", "NY", "PA", "NJ")
ne_region_bases <-c("Hanscom AFB", "Rome, NY")
# Get COVID cases, available from:
url <- "https://static.usafacts.org/public/data/covid-19/covid_confirmed_usafacts.csv"
COV <- read.csv(url, stringsAsFactors = FALSE)
#sometimes there are encoding issues with the first column name
names(COV)[1] <- "countyFIPS"
Covid <- pivot_longer(COV, cols=starts_with("X"),
values_to="cases",
names_to=c("X","date_infected"),
names_sep="X") %>%
mutate(infected = as.Date(date_infected, format="%m.%d.%Y"),
countyFIPS = str_pad(as.character(countyFIPS), 5, pad="0"))
# Obtain map data for counties (to link with covid data) and states (for showing borders)
states_sf <- get_urbn_map(map = "states", sf = TRUE)
counties_sf <- get_urbn_map(map = "counties", sf = TRUE)
# Merge county map with total cases of cov
#use this line to produce animated maps
#pop_counties_cov <- inner_join(counties_sf, Covid, by=c("county_fips"="countyFIPS"))
#use this one for a single map of the latest data
pop_counties_cov <- inner_join(counties_sf, group_by(Covid, countyFIPS) %>%
summarise(cases=sum(cases)), by=c("county_fips"="countyFIPS"))
#read the county population data
#csv at https://pastebin.com/jCD9tP0X
counties_pop <- read.csv("countyPopulations.csv", header=TRUE, stringsAsFactors = FALSE)
#pad the single digit state FIPS states
counties_pop <- counties_pop %>% mutate(CountyFIPS=str_pad(as.character(CountyFIPS),5,pad="0"))
#merge the population and covid data by FIPS
pop_counties_cov$population <- counties_pop$Population[match(pop_counties_cov$county_fips,counties_pop$CountyFIPS)]
#calculate the infection rate
pop_counties_cov <- pop_counties_cov %>% mutate(infRate = (cases/population)*100)
#counties with 0 infections don't appear in the usafacts data, so didn't get a population
#set them to 0
pop_counties_cov$population[is.na(pop_counties_cov$population)] <- 0
pop_counties_cov$infRate[is.na(pop_counties_cov$infRate)] <- 0
plotDate="April14"
basepath = "your/output file/path/here/"
naColor = "white"
lowColor = "green"
midColor = "maroon"
highColor = "red"
baseFill = "dodgerblue4"
baseColor = "firebrick"
baseShape = 23
###################end first run only################################
###################Northeast Map################################
#filter out states
ne_pop_counties_cov <- pop_counties_cov %>% filter(state_abbv %in% NE_region)
ne_states_sf <- states_sf %>% filter(state_abbv %in% NE_region)
ne_counties_sf <- counties_sf %>% filter(state_abbv %in% NE_region)
#filter out bases
neBases <- structure(list(Base = c("Hanscom AFB", "Rome, NY"), longitude = c(-71.2743123,
-75.4557303),
latitude = c(42.4579955, 43.2128473),
personnel = c(2906L,822L),
longitude.1 = c(2296805.44531269, 1951897.82199569),
latitude.1 = c(128586.352781279, 99159.9145180969)),
row.names = c(NA, -2L), class = "data.frame")
p <- ne_pop_counties_cov %>%
ggplot() +
geom_sf(mapping = aes(fill = infRate, geometry=geometry), color = NA) +
geom_sf(data = ne_states_sf, fill = NA, color = "black", size = 0.25) +
coord_sf(datum = NA) +
scale_fill_gradient(name = "% Pop \nInfected", trans = "log",low=lowColor, high=highColor,
breaks=c(0, max(ne_pop_counties_cov$infRate)),
na.value = naColor) +
geom_point(data=neBases,
aes(x=longitude.1, y=latitude.1,size=personnel),
shape = baseShape,
color = baseColor,
fill = baseFill) +
theme_bw() +
labs(size='AFMC \nMil + Civ') +
theme(legend.position="bottom",
panel.border = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank())
print(p)
###################End Northeast Map################################
If you run this you should get a single frame...when I do the whole animation, here is the final frame
The diamonds represent the locations of air force bases we're interested in within the region, and they are sized by how many personnel are there.
What I have been asked to do is to make the diamonds the same size, but "color code" the fill based on the number of personnel. I don't think this is a good idea, but I'm not the boss.
I'm not sure how to have two gradient fills on a single plot?

If you want to place a second filling gradient, you can have the use of new_scale_fill function from ggnewscale package:
library(ggnewscale)
p <- ne_pop_counties_cov %>%
ggplot() +
geom_sf(mapping = aes(fill = infRate, geometry=geometry), color = NA) +
geom_sf(data = ne_states_sf, fill = NA, color = "black", size = 0.25) +
coord_sf(datum = NA) +
scale_fill_gradient(name = "% Pop \nInfected", trans = "log",low=lowColor, high=highColor,
breaks=c(0, max(ne_pop_counties_cov$infRate)),
na.value = naColor) +
new_scale_fill()+
geom_point(data=neBases,
aes(x=longitude.1, y=latitude.1,fill=personnel),
shape = baseShape,
color = "black",
#fill = baseFill,
size = 5) +
scale_fill_gradient(name = "AFMC \nMil + Civ",
low = "blue", high = "magenta",
breaks = c(1,max(neBases$personnel)))+
theme_bw() +
theme(legend.position="bottom",
panel.border = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank())
print(p)
Does it answer your question ?

Related

R alluvial plots with different width?

I am trying to make a certain alluvial plot with different widths specified in different columns. Let me try to explain it by drawing it, as I am not sure how to do this in ggalluvial.
Notice that the width of the flow from the Male box represents 3 units, while it represents 10 in box 3. Is it possible to create such graphs in ggalluvial? Or how can one construct such a graph in R?
I haven't drawn the other flows just to focus on the flow from male to 3.
I would hereby would like to present some data to create such a graph:
test_data <- data.table(`2018 - Gender` = c("Male", "Female", "Female", "Male"),
`2018 - Value` = c(10, 20, 30, 20),
`2019 - Gender` = c("Male", "Female", "Male", "Female"),
`2019 - Value` = c(20, 30, 10, 10)
)
Notice that the column names determine the "columns" in the graphs (i.e. the x-axis). While the Gender variable determines the blocks. The value from 2018 is the starting width, while the value from 2019 is the ending width of the strata.
As some have pointed out that I need to put more focus on my question. The question is how to make flow graphs with different starting and ending width.
Perhaps the following dummy example gives you a better idea. Please check if your data is in alluvial form with is_alluvia_form(), before you plot it.
c <- c(LETTERS[1:4], LETTERS[2:6], LETTERS[3:7], LETTERS[3:8])
t <- c(rep("Fortnight 1",4), rep("Fortnight 2",5), rep("Fortnight 3",5), rep("Fortnight 4",6))
s <- c(rep(c("Female","Male"),10))
ag <- c(2,3,4,6,11,13)
f <- rnorm(20,20,99)
df <- data.frame(Timeframe=t,Code=c,Sex=s,Freq=round(abs(f))) %>% mutate(Organization=ifelse((row_number() %in% ag), "Agencia2","Agencia1" ))
alluvial_data <- as.data.frame(df %>%select(Organization, Timeframe, Code, Freq, Sex))
alluvial_data <- alluvial_data %>% mutate(id = row_number())
#Remove duplicates
alluvial_data <- alluvial_data %>%
distinct(Organization, Timeframe, Code, Sex, .keep_all = TRUE)
#levels(alluvial_data$Timeframe)
# Convert Timeframe to Factor - Categorical Variable
alluvial_data$Timeframe <-as.factor(alluvial_data$Timeframe)
# Convert Code to String
alluvial_data$Code <-as.character(alluvial_data$Code)
library(RColorBrewer)
# Define the number of colors you want
nb.cols <- 10
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
mycolor2 <- colorRampPalette(brewer.pal(2, "Set2"))(nb.cols)
# Chart
ggplot(alluvial_data,
aes(y = Freq, axis1 = Organization, axis2 = Timeframe, axis3 = Code,fill=Sex)) +
#scale_fill_brewer(type = "qual", palette = "Set2") +
scale_x_discrete(limits=c("Organization","Timeframe","Code"), expand=c(0.05,0.05)) +
scale_fill_manual(values = mycolors) +
geom_flow(stat = "alluvium", lode.guidance = "frontback" #, color="grey"
) +
geom_stratum(width = 1/4, fill = "cyan", color = "grey") +
geom_label(stat = "stratum", aes(label = after_stat(stratum))) +
theme(legend.position = "bottom") +
ggtitle("Organizations") +
guides(fill=guide_legend(override.aes = list(color=mycolors[1:2])))+
labs(fill=NULL)

how do i combine multiple data sources in ggplot using split and sapply?

this question is linked to a previous one answered by #Rui Barradas and #Duck, but i need more help. Previous link here:
how do i vectorise (automate) plot creation in R
Basically, I need to combine 3 datasets into one plot with a secondary y axis. All datasets need to be split by SITENAME and will facet wrap by Sampling.Year. I am using split and sapply. Being facet wrap the plots look something like this:
However, i'm now trying to add the two other data sources into the plots, to look something like this:
But i am struggling to add the two other data sources and get them to split by SITENAME. Her is my code so far...
Record plot format as a function to be applied to a split list df (ideally 'df' would be added as geom_line with a secondary y axis, and 'FF_start_dates' will be added as a vertical dashed line):
SITENAME_plot <- function(AllDates_TPAF){
ggplot(AllDates_TPAF, aes(DATE, Daily.Ave.PAF)) +
geom_point(aes(colour = Risk), size = 3) +
scale_colour_manual(values=c("Very Low" = "dark green","Low" = "light green",
"Moderate" = "yellow", "High" = "orange", "Very High" = "red"), drop = FALSE) +
labs(x = "Month", y = "Total PAF (% affected)") +
scale_x_date(breaks = "1 month", labels = scales::date_format("%B")) +
facet_wrap(~Sampling.Year, ncol = 1, scales = "free")+
scale_y_continuous(limits = c(0, 100), sec.axis = sec_axis(~., name = "Water level (m)")) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme(legend.text=element_text(size=15)) +
theme(axis.text=element_text(size=15),
axis.title=element_text(size=15,face="bold")) +
guides(color = guide_legend(reverse = TRUE))+
theme_bw() +
ggtitle(unique(AllDates_TPAF$SITENAME))
}
plot write function:
SITENAME_plot_write <- function(name, g, dir = "N:/abc/"){
flname <- file.path(dir, name)
flname <- paste0(flname, ".jpg")
png(filename = flname, width = 1500, height = 1000)
print(g)
dev.off()
flname
}
Apply function to list split by SITENAME:
sp1 <- split(AllDates_TPAF, AllDates_TPAF$SITENAME)
gg_list <- sapply(sp1, SITENAME_plot, simplify = FALSE)
mapply(SITENAME_plot_write, names(gg_list), gg_list, MoreArgs = list(dir = getwd()))
dev.off()
I have uploaded samples of all 3 datasets here: Sample Data
Apologies for not using gsub but there was too much data and I couldn't get it to work properly
thanks in advance for any help you can give, even if it is just to point me towards a web tutorial of some kind.
You can try next code. I used the data you shared. Just be careful with names of all datasets. Ideally, the key columns as DATE and Sampling.Year should be present in all dataframes before making the split. Also some variables as Risk was absent so I added an example var with same name. Here the code, I added a function for the plot you want:
library(tidyverse)
library(readxl)
#Data
df1 <- read_excel('Sample data.xlsx',1)
#Create var
df1$Risk <- c(rep(c("Very Low","Low","Moderate","High","Very High"),67),"Very High")
#Other data
df2 <- read_excel('Sample data.xlsx',2)
df3 <- read_excel('Sample data.xlsx',3)
#Split 1
L1 <- split(df1,df1$SITENAME)
L2 <- split(df2,df2$SITENAME)
L3 <- split(df3,df3$`Site Name`)
#Function to create plots
myplot <- function(x,y,z)
{
#Merge x and y
#Check for duplicates and avoid column
y <- y[!duplicated(paste(y$DATE,y$Sampling.Year)),]
y$SITENAME <- NULL
xy <- merge(x,y,by.x = c('Sampling.Year','DATE'),by.y = c('Sampling.Year','DATE'),all.x=T)
#Format to dates
xy$DATE <- as.Date(xy$DATE)
#Scale factor
scaleFactor <- max(xy$Daily.Ave.PAF) / max(xy$Height)
#Rename for consistency in names
names(z)[4] <- 'DATE'
#Format date
z$DATE <- as.Date(z$DATE)
#Plot
#Plot
G <- ggplot(xy, aes(DATE, Daily.Ave.PAF)) +
geom_point(aes(colour = Risk), size = 3) +
scale_colour_manual(values=c("Very Low" = "dark green","Low" = "light green",
"Moderate" = "yellow", "High" = "orange", "Very High" = "red"), drop = FALSE) +
scale_x_date(breaks = "1 month", labels = scales::date_format("%b %Y")) +
geom_line(aes(x=DATE,y=Height*scaleFactor))+
scale_y_continuous(name="Total PAF (% affected)", sec.axis=sec_axis(~./scaleFactor, name="Water level (m)"))+
labs(x = "Month") +
geom_vline(data = z,aes(xintercept = DATE),linetype="dashed")+
facet_wrap(~Sampling.Year, ncol = 1, scales = "free")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme(legend.text=element_text(size=15)) +
theme(axis.text=element_text(size=15),
axis.title=element_text(size=15,face="bold")) +
guides(color = guide_legend(reverse = TRUE))+
theme_bw() +
ggtitle(unique(xy$SITENAME))
return(G)
}
#Create a list of plots
Lplots <- mapply(FUN = myplot,x=L1,y=L2,z=L3,SIMPLIFY = FALSE)
#Now format names
vnames <- paste0(names(Lplots),'.png')
mapply(ggsave, Lplots,filename = vnames,width = 30,units = 'cm')
You will end up with plots like these saved in your dir:
Some dashed lines do not appear in plots because they were not present in the data you provided.

ggplot map legend does not display consistently

I'll apologize in advance that the example below isn't "minimal" but I haven't been able to reproduce this behavior except in the particular instance of my full data set.
I asked this question before here and thought I had found the answer, but the behavior has returned and is vexing me. Basically I have a script that reads daily COVID-19 case numbers and produces maps where the counties are color-coded by the percent of the population infected. The script produces five maps, a national one and one for each of the four official census regions: northeast, midwest, south and west. To cut down on size, the below is just the national and widwest maps.
My original script actually produces animated gifs showing the spread of the disease, but they take a long time to render. The below version just gives a single plot of the most recent data and should run pretty quickly.
I've used a dput in the below script to avoid you having to read a file and geocode locations (I commented out the code) but there is still a large csv file of county populations that has to be read in. I have posted it at pastebin here.
library(urbnmapr) # For map
library(ggplot2) # For map
library(dplyr) # For summarizing
library(tidyr) # For reshaping
library(stringr) # For padding leading zeros
library(ggrepel)
library(ggmap)
library(usmap)
library(gganimate)
library(magrittr)
library(gifski)
library(ggnewscale)
#if using Microsoft R, update checkpoint to get latest packages
#checkpoint("2020-03-01")
#start the clock
ptm <- proc.time()
set.seed(42)
#first run setup tasks
#these can be commented out once the data frames are in place
###################begin first run only################################
#register_google(key = "your google map key here")
#AFMCbases<-read.csv("C:/Users/jerem/Desktop/Work/covid_maps/AFMCbases.csv", header=TRUE, stringsAsFactors = FALSE)
#geocode the place names
# for(i in 1:nrow(AFMCbases)){
# result <- geocode(AFMCbases$Base[i])
# AFMCbases$longitude[i] <- as.numeric(result[1])
# AFMCbases$latitude[i] <- as.numeric(result[2])
# }
#transform the lat/lons to appropriate map projection
# locations<-AFMCbases[,2:3]
# new_locations <- usmap_transform(locations)
# AFMCbases <- cbind(AFMCbases,new_locations[,3:4])
AFMCbases <- structure(list(Base = c("Gunter AFB", "Davis Monthan AFB", "Edwards AFB",
"Robins AFB", "Scott AFB", "Hanscom AFB", "Offutt AFB", "Holloman AFB",
"Kirtland AFB", "Rome, NY", "Wright-Patterson AFB", "Tinker AFB",
"Arnold AFB", "Joint Base San Antonio", "Hill AFB", "Arlington, VA",
"Eglin AFB"), longitude = c(-86.244558, -110.8592578, -117.8912085,
-83.591719, -89.8550095, -71.2743123, -95.9145568, -106.099291,
-106.5338817, -75.4557303, -84.0537448, -97.4158295, -86.0303306,
-98.4523675, -111.9826984, -77.1067698, -86.5533382), latitude = c(32.4083744,
32.1675525, 34.9240314, 32.6400014, 38.5415911, 42.4579955, 41.1242718,
32.8440404, 35.0539718, 43.2128473, 39.8137298, 35.4277, 35.3828616,
29.4512786, 41.10968, 38.8799697, 30.4635583), personnel = c(820L,
605L, 5317L, 14088L, 613L, 2906L, 177L, 699L, 1264L, 822L, 15299L,
16032L, 389L, 3443L, 13679L, 1157L, 8143L), longitude.1 = c(1292311.33608434,
-1025218.68277084, -1622487.54697885, 1533762.39465597, 881032.996527566,
2296805.44531269, 342224.203588191, -572424.401062999, -596268.294707156,
1951897.82199569, 1352969.1130143, 234917.935027853, 1263808.11814915,
151230.865464104, -1000093.31185121, 1953459.66491185, 1292835.72883446
), latitude.1 = c(-1293180.11438144, -1358896.37536667, -946347.80198453,
-1223833.19307048, -664025.051658055, 128586.352781279, -422393.887189579,
-1328730.76688869, -1081540.1543388, 99159.9145180969, -445535.143260001,
-1059563.46211616, -963250.657602903, -1722291.94024992, -359543.815036425,
-408019.910644083, -1511165.09243038)), class = "data.frame", row.names = c(NA,
-17L))
#define census regions
west_region <-c("WA", "OR","CA","NV","ID", "MT", "WY", "UT","CO", "AZ", "NM")
NE_region <- c("ME","NH","VT","MA", "CT", "RI", "NY", "PA", "NJ")
midwest_region <- c("ND", "SD", "NE", "KS", "MN", "IA", "MO", "WI", "IL","MI", "IN","OH")
south_region <- c("TX", "OK", "AR", "LA", "MS", "TN", "KY", "AL", "GA","FL","SC","NC","VA","WV","DC","MD","DE")
west_region_bases <- c("Davis Monthan AFB", "Edwards AFB","Holloman AFB","Kirtland AFB","Hill AFB")
south_region_bases <- c("Robins AFB","Tinker AFB", "Arnold AFB", "Joint Base San Antonio", "Arlington, VA", "Eglin AFB")
mw_region_bases <- c("Scott AFB", "Offutt AFB", "Wright-Patterson AFB")
ne_region_bases <-c("Hanscom AFB", "Rome, NY")
# Get COVID cases, available from:
url <- "https://static.usafacts.org/public/data/covid-19/covid_confirmed_usafacts.csv"
COV <- read.csv(url, stringsAsFactors = FALSE)
#sometimes there are encoding issues with the first column name
names(COV)[1] <- "countyFIPS"
Covid <- pivot_longer(COV, cols=starts_with("X"),
values_to="cases",
names_to=c("X","date_infected"),
names_sep="X") %>%
mutate(infected = as.Date(date_infected, format="%m.%d.%y"),
countyFIPS = str_pad(as.character(countyFIPS), 5, pad="0"))
# Obtain map data for counties (to link with covid data) and states (for showing borders)
states_sf <- get_urbn_map(map = "states", sf = TRUE)
counties_sf <- get_urbn_map(map = "counties", sf = TRUE)
# Merge county map with total cases of cov
#this is the line to use for making animations
#pop_counties_cov <- inner_join(counties_sf, Covid, by=c("county_fips"="countyFIPS")) %>%
#to make last frame only
pop_counties_cov <- inner_join(counties_sf, group_by(Covid, countyFIPS) %>%
summarise(cases=max(cases)), by=c("county_fips"="countyFIPS"))
#read the county population data
counties_pop <- read.csv("C:/Users/jerem/Desktop/Work/covid_maps/countyPopulations.csv", header=TRUE, stringsAsFactors = FALSE)
#pad the single digit state FIPS states
counties_pop <- counties_pop %>% mutate(CountyFIPS=str_pad(as.character(CountyFIPS),5,pad="0"))
#merge the population and covid data by FIPS
pop_counties_cov$population <- counties_pop$Population[match(pop_counties_cov$county_fips,counties_pop$CountyFIPS)]
#calculate the infection rate
pop_counties_cov <- pop_counties_cov %>% mutate(infRate = (cases/population)*100)
#counties with 0 infections don't appear in the usafacts data, so didn't get a population
#set them to 0
pop_counties_cov$population[is.na(pop_counties_cov$population)] <- 0
pop_counties_cov$infRate[is.na(pop_counties_cov$infRate)] <- 0
plotDate="April20"
basepath = "C:/your/output/file/path"
naColor = "white"
lowColor = "green"
midColor = "maroon"
highColor = "red"
baseFill = "dodgerblue4"
baseColor = "firebrick"
baseShape = 23
scaleLow = "magenta"
scaleHigh = "blue"
###################end first run only################################
###################National Map################################
p <- pop_counties_cov %>%
ggplot() +
geom_sf(mapping = aes(fill = infRate, geometry=geometry), color = NA) +
geom_sf(data = states_sf, fill = NA, color = "black", size = 0.25) +
coord_sf(datum = NA) +
scale_fill_gradient(name = "% Pop \nInfected", trans = "log",low=lowColor, high=highColor,
breaks=c(0, round(max(pop_counties_cov$infRate),1)),
na.value = naColor) +
new_scale_fill() +
geom_point(data=AFMCbases,
aes(x=longitude.1, y=latitude.1,fill=personnel),
shape= baseShape,
color = "black",
size = 3) +
scale_fill_gradient(name="AFMC \nMil + Civ",
low = scaleLow, high = scaleHigh,
breaks = c(1, max(AFMCbases$personnel)))+
theme_bw() +
theme(legend.position="bottom",
panel.border = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank()) +
labs(title=paste('Confirmed COVID-19 Cases: ', max(Covid$infected),sep=""),
subtitle='HQ AFMC/A9A \nData: usafacts.org')
# a <- p + transition_time(infected) +
# labs(title='Confirmed COVID-19 Cases: {frame_time}',
# subtitle='HQ AFMC/A9A \nData: usafacts.org')
#
# animate(a,
# device="png",
# renderer=file_renderer(paste(basepath,plotDate,"/national",sep=""),
# prefix="gganim_plot",
# overwrite=TRUE)
# )
#
# #make the national animated gif
# png_files <- list.files(paste(basepath,plotDate,"/national",sep=""), pattern = ".*png$", full.names = TRUE)
# st = format(Sys.time(), "%Y-%m-%d")
# gifName <- paste(basepath,plotDate,"/national/COVID-19-Cases-byCounty_",st,".gif",sep="")
# gifski(png_files, gif_file = gifName, width = 1000, height = 750, delay = 0.25, loop=FALSE)
#save the image
st = format(Sys.time(), "%Y-%m-%d")
SaveFilename = paste(basepath,plotDate,"/national/COVID-19-Cases-byCounty_",st,".png",sep="")
if(!dir.exists(paste(basepath,plotDate,"/national",sep=""))) dir.create(paste(basepath,plotDate,"/national",sep=""))
ggsave(filename=SaveFilename, plot = p, dpi = 300)
###################End National Map################################
###################Midwest Map################################
#filter out states
#neCovid <- Covid %>% filter(State %in% NE_region )
mw_pop_counties_cov <- pop_counties_cov %>% filter(state_abbv %in% midwest_region)
mw_states_sf <- states_sf %>% filter(state_abbv %in% midwest_region)
mw_counties_sf <- counties_sf %>% filter(state_abbv %in% midwest_region)
#filter out bases
mwBases <- AFMCbases %>% filter(Base %in% mw_region_bases)
p <- mw_pop_counties_cov %>%
ggplot() +
geom_sf(mapping = aes(fill = infRate, geometry=geometry), color = NA) +
geom_sf(data = mw_states_sf, fill = NA, color = "black", size = 0.25) +
coord_sf(datum = NA) +
scale_fill_gradient(name = "% Pop \nInfected", trans = "log",low=lowColor, high=highColor,
breaks=c(0, round(max(mw_pop_counties_cov$infRate),1)),
na.value = naColor) +
new_scale_fill() +
geom_point(data=mwBases,
aes(x=longitude.1, y=latitude.1,fill=personnel),
shape = baseShape,
color = "black",
size=3) +
scale_fill_gradient(name="AFMC \nMil + Civ",
low=scaleLow, high = scaleHigh,
breaks = c(1, max(mwBases$personnel)))+
theme_bw() +
theme(legend.position="bottom",
panel.border = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank()) +
labs(title=paste('Confirmed COVID-19 Cases: ', max(Covid$infected),sep=""),
subtitle='HQ AFMC/A9A \nData: usafacts.org')
# a <- p + transition_time(infected) +
# labs(title='Confirmed COVID-19 Cases: {frame_time}',
# subtitle='HQ AFMC/A9A \nData: usafacts.org')
#
# animate(a,
# device="png",
# renderer=file_renderer(paste(basepath,plotDate,"/midwest",sep=""),
# prefix="gganim_plot",
# overwrite=TRUE)
# )
#
# #make the midwest animated gif
# png_files <- list.files(paste(basepath,plotDate,"/midwest",sep=""), pattern = ".*png$", full.names = TRUE)
# st = format(Sys.time(), "%Y-%m-%d")
# gifName <- paste(basepath,plotDate,"/midwest/MW_COVID-19-Cases-byCounty_",st,".gif",sep="")
# gifski(png_files, gif_file = gifName, width = 1000, height = 750, delay = 0.25, loop=FALSE)
st = format(Sys.time(), "%Y-%m-%d")
SaveFilename = paste(basepath,plotDate,"/midwest/MW_COVID-19-Cases-byCounty_",st,".png",sep="")
if(!dir.exists(paste(basepath,plotDate,"/midwest",sep=""))) dir.create(paste(basepath,plotDate,"/midwest",sep=""))
ggsave(filename=SaveFilename, plot = p, dpi = 300)
###################End Midwest Map################################
This is the national map I got this morning when I ran the code
Note that there is a scale for the number of personnel at the bases (the colored diamonds) but there is no scale for the shading of the counties.
Here is the midwest map. You can see from the code that it is the same ggplot just with a dataset that is filtered down to the counties in the midwest region.
Now the scale is there. As mentioned in my previous question I thought that the answer had been something to do with the width of the image being insufficient to accommodate the scale. When I added a newline in the legend text to shorten it that appeared to do the trick. But now the legend is disappearing again, andmaking the output image wider has no effect. Plus, just by eyeball it would appear there is plenty of room in the national plot to accommodate the scale.
Another bizarre aspect is the behavior associated with rounding the breaks. Below is a west map where I applied no rounding to the breaks
scale_fill_gradient(name = "% Pop \nInfected",trans = "log", low=lowColor, high=highColor,
breaks=c(0, max(west_pop_counties_cov$infRate)),
na.value = naColor)
So the scale is back but it goes to 6 decimal places. If I try to round it to 2
scale_fill_gradient(name = "% Pop \nInfected",trans = "log", low=lowColor, high=highColor,
breaks=c(0, round(max(west_pop_counties_cov$infRate),2)),
na.value = naColor)
I get this map
which surely indicates the horizontal space isn't the issue...if it can accommodate 6 decimal places then surely there's room for 2?
I've spent as much time trying to figure out this inconsistent scale behavior as I spent writing the original script. I need these things to be consistent so that I can provide them as work products on a regular interval.
You can add manual labels and add some space to prevent the key to overlap with the title:
pop_counties_cov %>%
ggplot() +
geom_sf(mapping = aes(fill = infRate, geometry=geometry), color = NA) +
geom_sf(data = states_sf, fill = NA, color = "black", size = 0.25) +
coord_sf(datum = NA) +
scale_fill_gradient(name = "% Pop\nInfected ", trans = "log2", low=lowColor, high=highColor,
breaks=c(min(pop_counties_cov$infRate[pop_counties_cov$infRate!=0]), max(pop_counties_cov$infRate)),
labels = round(c(min(pop_counties_cov$infRate[pop_counties_cov$infRate!=0]),
max(pop_counties_cov$infRate)), 1),
na.value = naColor) +
new_scale_fill() +
geom_point(data=AFMCbases,
aes(x=longitude.1, y=latitude.1,fill=personnel),
shape= baseShape,
color = "black",
size = 3) +
scale_fill_gradient(name="AFMC \nMil + Civ",
low = scaleLow, high = scaleHigh,
breaks = c(1, max(AFMCbases$personnel)))+
theme_bw() +
theme(legend.position="bottom",
panel.border = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank()) +
labs(title=paste('Confirmed COVID-19 Cases: ', max(Covid$infected),sep=""),
subtitle='HQ AFMC/A9A \nData: usafacts.org')
Your issue is due to the presence of 0 values in your variable infRate which messed up with the log transformation in your scale_fill_gradient as observed by this Warning message:
Warning message: Transformation introduced infinite values in discrete y-axis
Here, you can find a way to circuwent that by setting limits and breaks argument using non-0 minimal value:
> summary(pop_counties_cov$infRate)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.00000 0.01543 0.03993 0.09178 0.09043 2.87425
> summary(pop_counties_cov$infRate[pop_counties_cov$infRate != 0])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.001537 0.023724 0.048169 0.105326 0.102350 2.874253
Setting these new limits (and removing rows with 0 values) give you this:
pop_counties_cov %>%
filter(infRate != 0) %>%
ggplot() +
geom_sf(mapping = aes(fill = infRate, geometry=geometry), color = NA) +
geom_sf(data = states_sf, fill = NA, color = "black", size = 0.25) +
coord_sf(datum = NA) +
scale_fill_gradient(name = "% Pop \nInfected", trans = "log",low=lowColor, high=highColor,
breaks=c(0.001,2.9),
na.value = naColor, limits = c(0.001,2.9)) +
new_scale_fill() +
geom_point(data=AFMCbases,
aes(x=longitude.1, y=latitude.1,fill=personnel),
shape= baseShape,
color = "black",
size = 3) +
scale_fill_gradient(name="AFMC \nMil + Civ",
low = scaleLow, high = scaleHigh,
breaks = c(1, max(AFMCbases$personnel)))+
theme_bw() +
theme(legend.position="bottom",
panel.border = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank()) +
labs(title=paste('Confirmed COVID-19 Cases: ', max(Covid$infected),sep=""),
subtitle='HQ AFMC/A9A \nData: usafacts.org')
Does it answer your question ?

Dissolving maps polygon

I want to dissolve a polygon so I get only a lines for the outline of the whole region instead of it being broken up by county.
install.packages (c("tidyverse","mapdata","maps","stringr","viridis"))
library(tidyverse)
library(mapdata)
library(maps)
library(stringr)
library(viridis)
california <- map_data("state", region="california")
california1 <- ggplot() +
geom_polygon(data = california,
aes(x = long, y = lat, group = group),
color="black", fill="NA") +
coord_quickmap()
#california county lines
uscounties <-map_data("county")
ca_county <- uscounties %>% filter(region == "california")
central<- ca_county %>%
filter(subregion %in% c("alpline", "kings", "tulare", "fresno", "inyo", "kern", "madera"))
ca2 <- california1 +
theme_void() +
geom_polygon(data = central,
aes(x = long, y = lat, group = group),
fill = "white", color = "black") +
geom_polygon(color = "black", fill = NA) +
annotate("text", x = -119, y = 46.5, label="Central", colour="black")
ca2
Thanks in advance for the help!
I've answered a similar question before. Reworked it slightly for your use case, with explanations in annotated code below:
library(tidyverse)
library(maps)
# get map (as map object)
county_map <- map("county", regions = "california",
fill = T, plot = FALSE)
# convert to SpatialPolygonsDataFrame object (using maptools & sp packages)
county_map_match <- data.frame(name = county_map$names) %>%
separate(name, c("region", "subregion"), sep = ",", remove = FALSE) %>%
mutate(central = subregion %in% c("alpline", "kings", "tulare",
"fresno", "inyo", "kern", "madera")) %>%
column_to_rownames("name")
county_map <- maptools::map2SpatialPolygons(county_map, ID = county_map$names)
county_map <- sp::SpatialPolygonsDataFrame(county_map, county_map_match)
rm(county_map_match)
# remove any invalidity (using rgeos package) before dissolving
rgeos::gIsValid(county_map) # check
county_map <- rgeos::gBuffer(county_map, byid = TRUE, width = 0)
rgeos::gIsValid(county_map) # check again (invalidities removed)
# dissolve by whether each polygon is part of central area
county_map <- maptools::unionSpatialPolygons(county_map, IDs = county_map$central)
county_map <- fortify(county_map)
county_map <- county_map %>% filter(group == "TRUE.1")
# plot all the central counties as one polygon
ggplot() +
geom_polygon(data = county_map,
aes(x = long, y = lat, group = group),
fill = "white", colour = "black") +
coord_map()

Choropleth Map in ggplot2

I'm trying to reproduce the Choropleth Map given here with the code provided by Hadley.
library(ggplot2)
library(maps)
# First (and most annoying) task - get matching state and county variables
# for both datasets. And unfortauntely it's not quite right, as you can
# see from the finish product - some counties are missing.
unemp <- read.csv("unemployment09.csv", header = F, stringsAsFactors = F)
names(unemp) <- c("id", "state_fips", "county_fips", "name", "year",
"?", "?", "?", "rate")
unemp$county <- tolower(gsub(" County, [A-Z]{2}", "", unemp$name))
unemp$state <- gsub("^.*([A-Z]{2}).*$", "\\1", unemp$name)
county_df <- map_data("county")
names(county_df) <- c("long", "lat", "group", "order", "state_name", "county")
county_df$state <- state.abb[match(county_df$state_name, tolower(state.name))]
county_df$state_name <- NULL
state_df <- map_data("state")
# Combine together
choropleth <- merge(county_df, unemp, by = c("state", "county"))
choropleth <- choropleth[order(choropleth$order), ]
# Discretise rate to use with Brewer colour scheme - many options here
# choropleth$rate_d <- cut_number(choropleth$rate, 5)
# choropleth$rate_d <- cut_interval(choropleth$rate, 5)
# Nathan's choice is a little odd:
choropleth$rate_d <- cut(choropleth$rate, breaks = c(seq(0, 10, by = 2), 35))
# Once you have the data in the right format, recreating the plot is straight
# forward.
ggplot(choropleth, aes(long, lat, group = group)) +
geom_polygon(aes(fill = rate_d), colour = alpha("white", 1/2), size = 0.2) +
geom_polygon(data = state_df, colour = "white", fill = NA) +
scale_fill_brewer(pal = "PuRd")
But this code gives the following error:
Error in do.call("layer", list(mapping = mapping, data = data, stat = stat, :
could not find function "alpha"
Deleting alpha and using this code
ggplot(choropleth, aes(long, lat, group = group)) +
geom_polygon(aes(fill = rate_d), colour = "white", size = 0.2) +
geom_polygon(data = state_df, colour = "white", fill = NA) +
scale_fill_brewer(pal = "PuRd")
gives the following error:
Error in scale_map.discrete(scale, df[[j]]) : attempt to apply
non-function
How can I fix this problem?

Resources