Draw a line between 2 zip code on US map in R - r

I have been trying to draw a straight line connecting 2 zip code in the US. So far I've only been able to plot points of each zip code in the US, but can't draw the line between them
Here is what my data look like. I am trying to draw 8 points on the US map and connect them, with 4 lines. I tried using zip.plot but it would only point points, not drawing a line
df1 <- data.frame(trip = c(1,2,3,4), zip1 = c(55803,87112,55107,66006), zip2=c(12909,93703,12205,78210))
df1
trip zip1 zip2
1 1 55803 12909
2 2 87112 93703
3 3 55107 12205
4 4 66006 78210

Take a look at the code for zip.plot function, and you'll see it is straightforward. It will merge your zip code data with longitude and latitude data from data(zips). You'll notice it will plot points, but no function to connect them, and it doesn't return points plotted.
You could adapt a similar function that meets your needs. If you include library(muRL) you can load zip data by data(zips). After plotting the points, you can add lines to connect them based on trip variable.
For example, create a new function zip.plot.new:
library(muRL)
data(zips)
zip.plot.new <- function(data, map.type = "state", ...){
data.z <- merge(data, zips[,c("zip", "lat", "lon")], by.x = "zip", by.y = "zip", all.x = TRUE)
maps::map(map.type, ...)
points(data.z$lon, data.z$lat, cex = 1, col = "black", pch = 20)
mapply(lines, split(data.z$lon, data.z$trip), split(data.z$lat, data.z$trip))
}
This includes mapply(lines... to connect points by trip.
Then, you can use your data frame, convert to longer form, and call this new function:
library(tidyverse)
df1 %>%
pivot_longer(cols = starts_with("zip_"), names_to = c(".value", "group"), names_sep = "_") %>%
zip.plot.new(.)
Note that zip code 12909 was not matched in the data (appears not valid?).
Data
df1 <- data.frame(trip = c(1,2,3,4),
zip_1 = c("55803","87112","55107","66006"),
zip_2 = c("12909","93703","12205","78210"))
Edit: Here's a ggplot version:
library(ggmap)
library(maps)
library(ggplot2)
library(tidyverse)
MainStates <- map_data("state")
point_data <- df1 %>%
pivot_longer(cols = starts_with("zip_"), names_to = c(".value", "group"), names_sep = "_") %>%
mutate(zip = factor(zip, levels = levels(zips$zip))) %>%
left_join(zips)
ggplot() +
geom_polygon(data=MainStates, aes(x=long, y=lat, group=group), color = "black", fill = "white") +
geom_point(data = point_data, aes(x = lon, y = lat, group = trip)) +
geom_line(data = point_data, aes(x = lon, y = lat, group = trip)) +
coord_fixed(1.3) +
theme_nothing()

Related

R Plotting three timeseries in two facet_grids in ggplot

Is it possible to plot three timeseries in only two grids using ggplot and facet_grid()?
# Create some fake data
stock1 = cumprod(1+c(0, rnorm(99, 0, .05)))
stock2 = cumprod(1+c(0, rnorm(99, 0, .075)))
indicator = sample(1:50, 100, replace = TRUE)
date_seq = seq.Date(as.Date("2023-01-01"), length.out = 100, by = 1)
df = data.frame(date = date_seq, stock1 = stock1, stock2 = stock2, indicator = indicator)
Now I would like to see an upper graph with the two stocks and one lower graph with the indicator using facet_grid().
The only result I get is a three-grid plot
grid_df = pivot_longer(df, c(stock1, stock2, indicator), names_to = "underlying", values_to = "values")
ggplot(grid_df, aes(x = date, y = values, colour = underlying)) +
geom_line() +
facet_grid(vars(underlying), scales = "free")
I dont know how to group the two stocks to bring them into one grid.
Thanks for help!
You could add an extra column to your longer format data where you could combine the stocks 1 and 2 to one string called stocks and leave the indicator alone using an ifelse to assign them to the facet_grid like this:
library(ggplot2)
library(dplyr)
library(tidyr)
grid_df = pivot_longer(df, c(stock1, stock2, indicator), names_to = "underlying", values_to = "values") %>%
mutate(grids = ifelse(underlying == "indicator", "indicator", "stock"))
ggplot(grid_df, aes(x = date, y = values, colour = underlying)) +
geom_line() +
facet_grid(vars(grids), scales = "free")
Created on 2023-02-19 with reprex v2.0.2

Color sf points and linestring by numeric variable

I've got some GPS data (latlong) and I want to plot the GPS points and their connecting lines and color both by the time difference between the two GPS points. I've figured out how to color the points and convert the points to a LINESTRING but I can't figure out how to recolor the scale of the line.
I saw this post:
Color portions of sf LINESTRING by variable that shows how to break the linestrings into segments and color the segments by a categorical variable but as I have close to 100,000 observations I'd like to avoid just splitting my plot up into 99,999 pieces and also, my data is continuous.
Here's some toy data:
# Create some data points
fake_data = data.frame(Time = 1:6,
Long = c(-90.46200, -90.46160, -90.46170, -90.46150, -90.46100, -90.46240),
Lat = c(33.88540, 33.88750, 33.88520, 33.88340, 33.88540, 33.88150))
# Define as points
points = st_as_sf(fake_data, coords = c("Long", "Lat"), crs = 4326, remove = FALSE)
# Connect the dots
lines = points %>% summarize(do_union = FALSE) %>% st_cast("LINESTRING")
library(ggplot2)
# Plot
ggplot(data = points)+
geom_sf(aes(color = as.numeric(points$Time)))+
geom_sf(data = lines)+#, aes(color = numeric(points$Time[1:(length(points$Time)-1)])))+ #did not work
ylim(c(33.87, 33.89))+
xlim(c(-90.47, -90.45))+
scale_color_gradient(name = "Time", position="bottom" , low = "blue", high = "red")
Thank you!
I'm confident there are prettier ways to do this, but this works!
I needed to add in a group variable to use to generate linegroups. This was inspired by: https://stackoverflow.com/a/48979401/3642716 and their answer with how to solve for troops in the tidyverse dataset.
library(sf)
library(dplyr)
library(ggplot2)
# Create some data points
fake_data = data.frame(Time = 1:6,
Long = c(-90.46200, -90.46160, -90.46170, -90.46150, -90.46100, -90.46240),
Lat = c(33.88540, 33.88750, 33.88520, 33.88340, 33.88540, 33.88150),
group = 1)
# Define as points
points = st_as_sf(fake_data, coords = c("Long", "Lat"), crs = 4326, remove = FALSE)
# Connect the dots
lines <- fake_data
lines %<>% group_by(group) %>%
slice(rep(1:n(), each = 2)) %>%
slice(-c(1, n())) %>%
mutate(linegroup = lapply(1:(n()/2), function(x) rep(x, 2)) %>% unlist) %>%
ungroup %>%
group_by(linegroup) %>%
st_as_sf(coords = c("Long","Lat"), crs = 4326, remove = F) %>%
summarize( do_union = F) %>%
st_cast("LINESTRING")
# Plot
ggplot(data = points)+
geom_sf(aes(color = `Time`))+
geom_sf(data = lines, aes(color = `linegroup`))+#, aes(color = numeric(points$Time[1:(length(points$Time)-1)])))+ #did not work
ylim(c(33.881, 33.888))+
xlim(c(-90.463, -90.460))+
scale_color_gradient(name = "Time", position="bottom" , low = "blue", high = "red")
Looks like this:

How to display the ID of outliers on a boxplot

I want to display the IDs that have extreme values on a boxplot but I have no idea how to do it.
For example the IDs corresponding to the values 10, 98 and 120
Poids<-c(round(rnorm(100,65,10),1),10,53,120,98)
ID<-c(paste("A",1:26,sep = ""),paste("B",1:26,sep = ""),paste("C",1:26,sep = ""),
paste("D",1:26,sep = ""))
mydata<-data.frame(ID=ID,Poids=Poids)
Using tidyverse packages you can create a subset inside geom_text, here how:
Data
Poids <- c(round(rnorm(100,65,10),1),10,53,120,98)
ID <- c(paste("A",1:26,sep = ""),paste("B",1:26,sep = ""),paste("C",1:26,sep = ""),
paste("D",1:26,sep = ""))
mydata <- data.frame(ID=ID,Poids=Poids)
Setting values manually
Code
library(dplyr)
library(ggplot2)
mydata %>%
ggplot(aes(x = Poids))+
geom_boxplot()+
geom_text(
data = mydata %>% filter(Poids %in% c(10,98,120)),
mapping = aes(y = 0,label = ID),
nudge_y = .05
)
Output
Using boxplot outlier criteria
Code
# remotes::install_github("vbfelix/relper")
library(relper)
mydata %>%
ggplot(aes(x = Poids))+
geom_boxplot()+
geom_text(
data = mydata %>% filter(is_outlier(Poids)),
mapping = aes(y = 0,label = ID),
nudge_y = .05
)
Output

Difficulty in customising cartogram output in R Studio

I am able to produce a cartogram using cartogram::cartogram_cont() But then have difficulty in customising the styling.
I have used broom::tidy() and dplyr::left_join() to fortify the cartogram, but I think perhaps the tidy stage has interfered with the plotOrder. If possible, I will include the output cartograms.
I'm attempting to replicate this type of output, but within my locality. Plesae note that the dataset used for the weighting in cartogram_cont() is not particularly significant, just a proof of concept at this stage:
[R Graph Gallery][1]
[1]: https://www.r-graph-gallery.com/331-basic-cartogram/
Shapefile from: [Lle Shapefile Location][2]
[2]: http://lle.gov.wales/catalogue/item/LocalAuthorities/?lang=en
library(dplyr)
library(leaflet)
library(maptools)
library(cartogram)
library(devtools)
install_github("HanOostdijk/odataR" , build_vignettes = T)
library(odataR)
library(tidyr)
library(rgdal)
library(htmltools)
#Read in shapefile and transform shape
#dsn = folder name, layer = filename but drop the .shp
shapefile <- readOGR(dsn = "Wales Shapefile",
layer = "localauthoritiesPolygon") %>%
#Transform coordinate referencing system
spTransform(CRS("+init=epsg:4326"))
#Next step is to join an interesting dataset to the shapefile using dplyr, then pass this to the cartoram package to render.
#Gone for the teacher sickness dataset from Stats Wales. Noticed it's only up to 2017, wonder if they've stopped collecting.
teacher_sickness_data <- odataR_query('http://open.statswales.gov.wales/dataset/schw0001')
#Check values for join.
categories <- unique(teacher_sickness_data$Area_ItemName_ENG)
categories_shp <- shapefile#data$name_en
categories
categories_shp
#Teacher data has "All Welsh local authorities". Not contained in shapefile so remove.
UA_sickness_data <- teacher_sickness_data[-c(2, 4:6, 8, 9, 11:13, 15:17)] %>%
filter(Area_ItemName_ENG != "All Welsh local authorities")
#Perform join to shapefile
shapefile_1 <- shapefile %>%
merge(UA_sickness_data, by.x = "name_en", by.y = "Area_ItemName_ENG",
duplicateGeoms = TRUE)
#Shiny App will allow choice of inputs to achieve one row per polygon. However, for testing
#functionality with cartograph functions, perform test filtering.
data_filtered <- UA_sickness_data %>%
filter(Year_ItemName_ENG == 2017) %>%
filter(Type_ItemName_ENG == "Full-time") %>%
filter(Variable_ItemName_ENG == "Total days of sick leave")
test_merge <- shapefile %>%
merge(data_filtered, by.x = "name_en", by.y = "Area_ItemName_ENG")
nc_pal <- colorNumeric(palette = "Reds",
domain = log(test_merge#data$Data))
m <-test_merge %>%
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(weight = 1,
color = ~nc_pal(log(Data)),
label = ~name_en,
highlight = highlightOptions(weight = 3,
color = "crimson",
bringToFront = TRUE),
popup = ~ paste0(Variable_ItemName_ENG, "<br/>",
"<b/>",
Data))
m
wales_cart <- cartogram_cont(test_merge, "Data", itermax=5)
plot(wales_cart)
[![Wales_Cartogram][3]][3]
[3]: https://i.stack.imgur.com/2tsMC.png
library(tidyverse)
library(ggmap)
library(broom)
library(rgeos) #used for gBuffer
#Buffer allows to tidy cartogram based on factor of choice.
wales_cart_buffered <- gBuffer(wales_cart, byid=TRUE, width=0)
#tidy cartogram in order to pass to ggplot
spdf_fortified_wales <- tidy(wales_cart_buffered, region = "name_en")
#Now perform a join based on english UA names
spdf_fortified_wales_joined <- spdf_fortified_wales %>%
left_join(. , wales_cart#data, by=c("id"="name_en"))
ggplot() +
geom_polygon(data = spdf_fortified_wales_joined, aes(fill = Data, x = long, y = lat, group = "name_en") , size=0, alpha=0.9) +
coord_map() +
theme_void()
[![incorrect_ggplot][4]][4]
[4]: https://i.stack.imgur.com/as0Z4.png
ggplot() +
geom_polygon(data = spdf_fortified_wales_joined, aes(fill = Data, x = long, y = lat, group = "name_en") , size=0, alpha=0.9) +
coord_map() +
theme_void()
Success Criteria: Polygons are rendered correctly distorted and colour scale reflects weighting variable.

Create shaded polygons around points with ggplot2

I saw yesterday this beautiful map of McDonalds restaurants in USA. I wanted to replicate it for France (I found some data that can be downloaded here).
I have no problem plotting the dots:
library(readxl)
library(ggplot2)
library(raster)
#open data
mac_do_FR <- read_excel("./mcdo_france.xlsx")
mac_do_FR_df <- as.data.frame(mac_do_FR)
#get a map of France
mapaFR <- getData("GADM", country="France", level=0)
#plot dots on the map
ggplot() +
geom_polygon(data = mapaFR, aes(x = long, y = lat, group = group),
fill = "transparent", size = 0.1, color="black") +
geom_point(data = mac_do_FR_df, aes(x = lon, y = lat),
colour = "orange", size = 1)
I tried several methods (Thiessen polygons, heat maps, buffers), but the results I get are very poor. I can't figure out how the shaded polygons were plotted on the American map. Any pointers?
Here's my result, but it did take some manual data wrangling.
Step 1: Get geospatial data.
library(sp)
# generate a map of France, along with a fortified dataframe version for ease of
# referencing lat / long ranges
mapaFR <- raster::getData("GADM", country="France", level=0)
map.FR <- fortify(mapaFR)
# generate a spatial point version of the same map, defining your own grid size
# (a smaller size yields a higher resolution heatmap in the final product, but will
# take longer to calculate)
grid.size = 0.01
points.FR <- expand.grid(
x = seq(min(map.FR$long), max(map.FR$long), by = grid.size),
y = seq(min(map.FR$lat), max(map.FR$lat), by = grid.size)
)
points.FR <- SpatialPoints(coords = points.FR, proj4string = mapaFR#proj4string)
Step 2: Generate a voronoi diagram based on store locations, & obtain the corresponding polygons as a SpatialPolygonsDataFrame object.
library(deldir)
library(dplyr)
voronoi.tiles <- deldir(mac_do_FR_df$lon, mac_do_FR_df$lat,
rw = c(min(map.FR$long), max(map.FR$long),
min(map.FR$lat), max(map.FR$lat)))
voronoi.tiles <- tile.list(voronoi.tiles)
voronoi.center <- lapply(voronoi.tiles,
function(l) data.frame(x.center = l$pt[1],
y.center = l$pt[2],
ptNum = l$ptNum)) %>%
data.table::rbindlist()
voronoi.polygons <- lapply(voronoi.tiles,
function(l) Polygon(coords = matrix(c(l$x, l$y),
ncol = 2),
hole = FALSE) %>%
list() %>%
Polygons(ID = l$ptNum)) %>%
SpatialPolygons(proj4string = mapaFR#proj4string) %>%
SpatialPolygonsDataFrame(data = voronoi.center,
match.ID = "ptNum")
rm(voronoi.tiles, voronoi.center)
Step 3. Check which voronoi polygon each point on the map overlaps with, & calculate its distance to the corresponding nearest store.
which.voronoi <- over(points.FR, voronoi.polygons)
points.FR <- cbind(as.data.frame(points.FR), which.voronoi)
rm(which.voronoi)
points.FR <- points.FR %>%
rowwise() %>%
mutate(dist = geosphere::distm(x = c(x, y), y = c(x.center, y.center))) %>%
ungroup() %>%
mutate(dist = ifelse(is.na(dist), max(dist, na.rm = TRUE), dist)) %>%
mutate(dist = dist / 1000) # convert from m to km for easier reading
Step 4. Plot, adjusting the fill gradient parameters as needed. I felt the result of a square root transformation looks quite good for emphasizing distances close to a store, while a log transformation is rather too exaggerated, but your mileage may vary.
ggplot() +
geom_raster(data = points.FR %>%
mutate(dist = pmin(dist, 100)),
aes(x = x, y = y, fill = dist)) +
# optional. shows outline of France for reference
geom_polygon(data = map.FR,
aes(x = long, y = lat, group = group),
fill = NA, colour = "white") +
# define colour range, mid point, & transformation (if desired) for fill
scale_fill_gradient2(low = "yellow", mid = "red", high = "black",
midpoint = 4, trans = "sqrt") +
labs(x = "longitude",
y = "latitude",
fill = "Distance in km") +
coord_quickmap()

Resources