Create shaded polygons around points with ggplot2 - r

I saw yesterday this beautiful map of McDonalds restaurants in USA. I wanted to replicate it for France (I found some data that can be downloaded here).
I have no problem plotting the dots:
library(readxl)
library(ggplot2)
library(raster)
#open data
mac_do_FR <- read_excel("./mcdo_france.xlsx")
mac_do_FR_df <- as.data.frame(mac_do_FR)
#get a map of France
mapaFR <- getData("GADM", country="France", level=0)
#plot dots on the map
ggplot() +
geom_polygon(data = mapaFR, aes(x = long, y = lat, group = group),
fill = "transparent", size = 0.1, color="black") +
geom_point(data = mac_do_FR_df, aes(x = lon, y = lat),
colour = "orange", size = 1)
I tried several methods (Thiessen polygons, heat maps, buffers), but the results I get are very poor. I can't figure out how the shaded polygons were plotted on the American map. Any pointers?

Here's my result, but it did take some manual data wrangling.
Step 1: Get geospatial data.
library(sp)
# generate a map of France, along with a fortified dataframe version for ease of
# referencing lat / long ranges
mapaFR <- raster::getData("GADM", country="France", level=0)
map.FR <- fortify(mapaFR)
# generate a spatial point version of the same map, defining your own grid size
# (a smaller size yields a higher resolution heatmap in the final product, but will
# take longer to calculate)
grid.size = 0.01
points.FR <- expand.grid(
x = seq(min(map.FR$long), max(map.FR$long), by = grid.size),
y = seq(min(map.FR$lat), max(map.FR$lat), by = grid.size)
)
points.FR <- SpatialPoints(coords = points.FR, proj4string = mapaFR#proj4string)
Step 2: Generate a voronoi diagram based on store locations, & obtain the corresponding polygons as a SpatialPolygonsDataFrame object.
library(deldir)
library(dplyr)
voronoi.tiles <- deldir(mac_do_FR_df$lon, mac_do_FR_df$lat,
rw = c(min(map.FR$long), max(map.FR$long),
min(map.FR$lat), max(map.FR$lat)))
voronoi.tiles <- tile.list(voronoi.tiles)
voronoi.center <- lapply(voronoi.tiles,
function(l) data.frame(x.center = l$pt[1],
y.center = l$pt[2],
ptNum = l$ptNum)) %>%
data.table::rbindlist()
voronoi.polygons <- lapply(voronoi.tiles,
function(l) Polygon(coords = matrix(c(l$x, l$y),
ncol = 2),
hole = FALSE) %>%
list() %>%
Polygons(ID = l$ptNum)) %>%
SpatialPolygons(proj4string = mapaFR#proj4string) %>%
SpatialPolygonsDataFrame(data = voronoi.center,
match.ID = "ptNum")
rm(voronoi.tiles, voronoi.center)
Step 3. Check which voronoi polygon each point on the map overlaps with, & calculate its distance to the corresponding nearest store.
which.voronoi <- over(points.FR, voronoi.polygons)
points.FR <- cbind(as.data.frame(points.FR), which.voronoi)
rm(which.voronoi)
points.FR <- points.FR %>%
rowwise() %>%
mutate(dist = geosphere::distm(x = c(x, y), y = c(x.center, y.center))) %>%
ungroup() %>%
mutate(dist = ifelse(is.na(dist), max(dist, na.rm = TRUE), dist)) %>%
mutate(dist = dist / 1000) # convert from m to km for easier reading
Step 4. Plot, adjusting the fill gradient parameters as needed. I felt the result of a square root transformation looks quite good for emphasizing distances close to a store, while a log transformation is rather too exaggerated, but your mileage may vary.
ggplot() +
geom_raster(data = points.FR %>%
mutate(dist = pmin(dist, 100)),
aes(x = x, y = y, fill = dist)) +
# optional. shows outline of France for reference
geom_polygon(data = map.FR,
aes(x = long, y = lat, group = group),
fill = NA, colour = "white") +
# define colour range, mid point, & transformation (if desired) for fill
scale_fill_gradient2(low = "yellow", mid = "red", high = "black",
midpoint = 4, trans = "sqrt") +
labs(x = "longitude",
y = "latitude",
fill = "Distance in km") +
coord_quickmap()

Related

Create random points based in distance and boundary conditions

In my example, I have:
# Packages
library(sf)
library(ggplot2)
# Create some points
set.seed(1)
df <- data.frame(
gr = c(rep("a",5),rep("b",5)),
x = rnorm(10),
y = rnorm(10)
)
df <- st_as_sf(df,coords = c("x","y"),remove = F, crs = 4326)
df.laea = st_transform(
df,
crs = "+proj=laea +x_0=4600000 +y_0=4600000 +lon_0=0.13 +lat_0=0.24 +datum=WGS84 +units=m"
)
# Create a countour of the area
ch <- st_convex_hull(st_union(df.laea))
ggplot() +
geom_sf(data = ch, fill = "white", color = "black") +
geom_sf(data = df.laea,color = "black")
Now, I'd like to create 10 random points but the conditions are that this points must be inside the ch boundaries and a minimum distance of 10 meters of each df.laea points that exist inside this ch area.
Please, any help with it?
I think the only tricky thing here is that a simple st_difference() of your polygon and the buffered points will return ten polygons, each with one of the points removed. Thus you have to either use a for loop or reduce() to remove one buffered point after the other from the polygon. To use reduce() you have to transform the vector to a proper list of sf instead of an sfc vector. This is what I did below.
# Packages
library(sf)
library(ggplot2)
library(purrr)
ch_minus <- df.laea$geometry |>
st_buffer(10000) |>
{\(vec) map(seq_along(vec), \(x) vec[x])}() |> # Transform buffered points to reducible list
reduce(.init = ch, st_difference)
sampled_points <- st_sample(ch_minus, 10)
ch_minus |>
ggplot() +
geom_sf() +
geom_sf(data = sampled_points)
You can buffer the points by the distance you'd like, then intersect those polygons with the ch polygon. From there, use st_sample and the associated arguments to get the points you want.
Example code:
## buffer df.laea 10m
laea_buff <- st_buffer(df.laea, dist = 10000) #changed dist to 10km to make it noticable in plot
# area to sample from:
sample_area <- st_intersection(ch, laea_buff)
# sample above area, all within 10km of a point and inside the `ch` polygon
points <- st_sample(sample_area, size = 10)
#plotting:
ggplot() +
geom_sf(data = points, color = 'red') +
geom_sf(data = laea_buff, color = 'black', fill = NA) +
geom_sf(data = ch, color = 'black', fill = NA) +
geom_sf(data = sample_area, color = 'pink', fill = NA) +
geom_sf(data = df.laea, color = 'black', size = .5)
Created on 2023-02-14 by the reprex package (v2.0.1)
As a comment on the nice answer by shs: it is possible to first use a sf::st_combine() call on the df.laea object & merge the 10 points to a single multipoint geometry.
This, when buffered, will work as an input for the necessary sf::st_difference() call to form a sampling area with holes, removing the need for a for cycle / map & reduce call.
# Packages
library(sf)
library(ggplot2)
# Create some points
set.seed(1)
df <- data.frame(
gr = c(rep("a",5),rep("b",5)),
x = rnorm(10),
y = rnorm(10)
)
df <- st_as_sf(df,coords = c("x","y"),remove = F, crs = 4326)
df.laea = st_transform(
df,
crs = "+proj=laea +x_0=4600000 +y_0=4600000 +lon_0=0.13 +lat_0=0.24 +datum=WGS84 +units=m"
)
# merge 10 points to 1 multipoing
mod_laea <- df.laea %>%
st_combine()
# sampling area = difference between hull and buffered points
sampling_area <- mod_laea %>%
st_convex_hull() %>%
st_difference(st_buffer(mod_laea, 10000))
# sample over sampling area
sampled_points <- st_sample(sampling_area, 10)
# a visual overview
ggplot() +
geom_sf(data = sampling_area, fill = "white", color = "black") +
geom_sf(data = df.laea, color = "black") +
geom_sf(data = sampled_points, color = "red", pch = 4)

Using gBuffer from rgeos with correct projection

I want to show 15 mile radius circles around points in a map using gBuffer. As far as I can tell I have the points and the map in the same projection, but when I produce the circles on the map, they are too large. Here is my code. The tigerline files for the state and counties can be found at https://www.census.gov/cgi-bin/geo/shapefiles/index.php.
library(tidyverse)
library(rgdal)
library(rgeos)
library(ggplot2)
state <- readOGR('C:\\Users\\Mesonet\\Desktop\\map_folder\\tl_2020_us_state\\tl_2020_us_state.shp')
state <- state[which(state$STATEFP == '46'),]
state <- spTransform(state, CRS("+init=epsg:3857"))
counties <- readOGR('C:\\Users\\Mesonet\\Desktop\\map_folder\\tl_2020_us_county\\tl_2020_us_county.shp')
counties <- counties[which(counties$STATEFP == '46'),]
counties <- spTransform(counties, CRS("+init=epsg:3857"))
sites <- data.frame(Lon = c(-98.1096,-98.27935), Lat = c(43.9029, 43.717258))
coordinates(sites) <- ~Lon + Lat
proj4string(sites) <- CRS("+proj=longlat")
sites <- spTransform(sites, CRS = CRS("+init=epsg:3857"))
# Miles to meters conversion
mile2meter <- function(x){x * 1609.344}
# Buffer creation
site_buffer <- gBuffer(sites, width = mile2meter(15))
png('C:\\Users\\Mesonet\\Desktop\\map_folder\\new_test.png', height = 3000, width = 42*100, res = 100)
ggplot() + geom_path(counties, mapping = aes(x = long, y = lat, group = group), size = 1.75,
alpha = 0.45, col = 'darkgreen') + geom_path(state, mapping = aes(x = long, y = lat, group =
group), size = 0.8) + theme(axis.text = element_blank()) + geom_polygon(site_buffer, mapping
= aes(x = long, y = lat, group = group), fill = '#0000FF', alpha = 1, size = 2)
dev.off()
These two locations are 15.35 miles apart, but the plot shows two circles that overlap each other by a couple miles. I can't figure out why, since from what I can see everything is in the same projection, but I might be wrong. Thank you.

Make grid map from spatial data

I have spatial coordinates in a data frame where each row (Longitude, Latitude) corresponds to the occurrence of an event I am following. I tried to map these data but instead of using points, I want to create a grid with cells of a resolution of 5 nautical miles (~ 0.083333) and count the number of occurrences of the event is each cell and plot it.
This is the code I came to write with the help of some resources. But it doesn't look the way I expected it to be. Can you figure out what's I'm doing wrong? I attached the raw positions and the resulting map I get.
Here is the link to the data.
re_pi = read.csv(file = "~/Desktop/Events.csv")
gridx <- seq(from=-19,to=-10,by=0.083333)
gridy <- seq(from=20,to=29,by=0.083333)
xcell <- unlist(lapply(re_pi$LON,function(x) min(which(gridx>x))))
ycell <- unlist(lapply(re_pi$LAT,function(y) min(which(gridy>y))))
re_pi$cell <- (length(gridx) - 1) * ycell + xcell
rr = re_pi %>%
group_by(cell)%>%
summarise(Lat = mean(LAT),Lon = mean(LON),Freq = length(cell))
my_theme <- theme_bw() + theme(panel.ontop=TRUE, panel.background=element_blank())
my_cols <- scale_color_distiller(palette='Spectral')
my_fill <- scale_fill_distiller(palette='Spectral')
ggplot(rr, aes(y=Lat, x=Lon, fill=Effort)) + geom_tile(width=1.2, height=1.2) +
borders('world', xlim=range(rr$Lon), ylim=range(rr$Lat), colour='black') + my_theme + my_fill +
coord_quickmap(xlim=range(rr$Lon), ylim=range(rr$Lat))
Nice dataset, assume these are fishing vessel VMS data. Here may be one way to achieve your objective, heavily reliant on the tidyverse and by-passing raster and shapes.
library(tidyverse)
library(mapdata) # higher resolution maps
# poor man's gridding function
grade <- function (x, dx) {
if (dx > 1)
warning("Not tested for grids larger than one")
brks <- seq(floor(min(x)), ceiling(max(x)), dx)
ints <- findInterval(x, brks, all.inside = TRUE)
x <- (brks[ints] + brks[ints + 1])/2
return(x)
}
d <-
read_csv("https://raw.githubusercontent.com/abenmhamed/data/main/Events.csv") %>%
janitor::clean_names() %>%
# make a grid 0.01 x 0.01 longitude / latitude
mutate(lon = grade(lon, 0.01),
lat = grade(lat, 0.01)) %>%
group_by(lon, lat) %>%
count() %>%
# not much happening south of 21 and north of 26
filter(between(lat, 21, 26.25))
d %>%
ggplot() +
theme_bw() +
geom_tile(aes(lon, lat, fill = n)) +
scale_fill_viridis_c(option = "B", direction = -1) +
# only data within the data-bounds
borders(database = "worldHires",
xlim = range(d$lon), ylim = range(d$lat),
fill = "grey") +
labs(x = NULL, y = NULL, fill = "Effort") +
# limit plot
coord_quickmap(xlim = range(d$lon), ylim = range(d$lat)) +
# legends within plot
theme(legend.position = c(0.77, 0.26))
Here is my attempt using the sf package. First I imported your data and converted it to an sf object. Then, I created another sf object which includes the grids. I used the raster package and the sf package in order to create the grids. Once I had the two sf object, I counted how many data points exist in each grid and added the results as a new column in foo. Finally, I drew a graphic.
library(tidyverse)
library(sf)
library(raster)
library(viridis)
# Import the data and convert it to an sf object
mydata <- read_csv("https://raw.githubusercontent.com/abenmhamed/data/main/Events.csv") %>%
st_as_sf(coords = c("LON", "LAT"),
crs = 4326, agr = "constant")
# Create an sf object for the grid
gridx <- seq(from = -19,to = -10, by = 0.083333)
gridy <- seq(from = 20,to = 29, by = 0.083333)
foo <- raster(xmn = -19, xmx = -10,
ymn = 20, ymx = 29,
nrows = length(gridx),
ncols = length(gridy)) %>%
rasterToPolygons() %>%
st_as_sf(crs = 4326) %>%
mutate(group = 1:(length(gridx)*length(gridy))) %>%
st_cast("MULTIPOLYGON")
# Now count how many data points exist in each grid
mutate(foo,
count = lengths(st_intersects(x = foo, y = mydata))) -> foo
# Draw a graphic
ggplot() +
geom_sf(data = foo, aes(fill = count)) +
scale_fill_viridis(option = "D") -> g

Spatial network based on maximum distance in ggplot2

I would like to plot network matrix of regions in ggplot - I know that for ggplot we need data.frame in tidy format in order to plot it.
I am able to plot network based on number of neighbours in ggplot however when I need spatial network based on maximum distance I get an error when creating data frame for ggplot.
I provided example down bellow:
library(ggplot2)
library(sf)
library(spdep)
# Polygon data
URL <- "https://biogeo.ucdavis.edu/data/gadm3.6/Rsp/gadm36_DEU_1_sp.rds"
data <- readRDS(url(URL))
CORD <- rbind(
coordinates(data)
)
rownames(CORD) <- NULL
# Spatial Network based on number of neighbours
cns <- knearneigh(CORD, k = 5, longlat=T)
scnsn <- knn2nb(cns, row.names = NULL, sym = T)
cS <- nb2listw(scnsn)
data_df <- data.frame(CORD)
colnames(data_df) <- c("long", "lat")
# Creating dataframe from spatail network (neiresth neighbours) for ggplot plot
n = length(attributes(cS$neighbours)$region.id)
DA = data.frame(
from = rep(1:n,sapply(cS$neighbours,length)),
to = unlist(cS$neighbours),
weight = unlist(cS$weights)
)
DA = cbind(DA, data_df[DA$from,], data_df[DA$to,])
colnames(DA)[4:7] = c("long","lat","long_to","lat_to")
# ggplot of spatial network
ggplot(data, aes(x = long, y =lat))+
geom_polygon(aes(group = group), color = "red", fill = FALSE) +
geom_segment(data = DA, aes(xend = long_to, yend = lat_to), size=0.5, color = "royalblue") +
coord_map()
### Another type of network matrix - Maximum distance
nb200km <- dnearneigh(CORD, d1=0, d2=100, longlat=T)
summary(nb200km)
cS_distance <- nb2listw(nb200km, zero.policy = T)
# I need to recreate this plot in ggplot
plot(data)
plot(W, coordinates(data), add = T)
data_df <- data.frame(CORD)
colnames(data_df) <- c("long", "lat")
n = length(attributes(cS_distance$neighbours)$region.id)
DA = data.frame(
from = rep(1:n,sapply(cS_distance$neighboaurs,length)),
to = unlist(cS_distance$neighbours),
weight = unlist(cS_distance$weights)
)
DA = cbind(DA, data_df[DA$from,], data_df[DA$to,])
colnames(DA)[4:7] = c("long","lat","long_to","lat_to")
creating dataframe from cS object works, however creating a dataframe from cS_distance object returns an error.
I would like to ask how to solve the error and plot distance spatial network in ggplot.
I'm not sure if this is what you're looking for, but the problem seems to be that you have some regions with no neighbours in cS_distance, so DA$to contains some zero values. This means when you do data_df[DA$from,] it has more rows than data_df[DA$to,], and your code throws an error when you try to cbind them.
If you filter out the rows where DA$to is zero, you get this:
n = length(attributes(cS_distance$neighbours)$region.id)
from <- rep(1:n,sapply(cS_distance$neighbours,length))
to <- unlist(cS_distance$neighbours)[]
weight <- numeric(length(to))
weight[which(to != 0)] <- unlist(cS_distance$weights)
DA = data.frame(from = from, to = to, weight = weight)
DA <- DA[DA$to != 0,]
DA = cbind(DA, data_df[DA$from,], data_df[DA$to,])
colnames(DA)[4:7] = c("long","lat","long_to","lat_to")
# ggplot of spatial network
ggplot(data, aes(x = long, y =lat))+
geom_polygon(aes(group = group), color = "red", fill = NA) +
geom_segment(data = DA, aes(xend = long_to, yend = lat_to), size=0.5, color = "royalblue") +
coord_map()

Automatic Label Placement for GIS maps in R

I'm making GIS maps in R using the sf package (and related packages) to read in shapefiles, and ggplot2 (and friends) for plotting. This works fine, but I can find no way to (automatically/programmatically) create label placements for features such as rivers and roads. These features are typically linestrings, with irregular shapes. See image attached for example from wikimedia.
The ggrepel package works well for labeling points in an automated way, but this doesn't make much sense for other geographic features that aren't discrete Lat/Long points.
I could imagine doing this by placing individual text labels on each feature individually, but I'm looking for something more automated, if possible. I realize such automation isn't a trivial problem, but it's been solved before (ArcGIS apparently has a way of doing this with an extension called maplex, but I don't have access to the software, and I'd like to stay in R if possible).
Does anyone know of a way of doing this?
MWE here:
#MWE Linestring labeling
library(tidyverse)
library(sf)
library(ggrepel)
set.seed(120)
#pick a county from the built-in North Carolina dataset
BuncombeCounty <- st_read(system.file("shapes/", package="maptools"), "sids") %>%
filter(NAME == "Buncombe")
#pick 4 random points in that county
pts_sf <- data.frame(
x = seq(-82.3, -82.7, by=-0.1) %>%
sample(4),
y = seq(35.5, 35.7, by=0.05) %>%
sample(4),
placenames = c("A", "B", "C", "D")
) %>%
st_as_sf(coords = c("x","y"))
#link those points into a linestring
linestring_sf <- pts_sf %>%
st_coordinates() %>%
st_linestring()
st_cast("LINESTRING")
#plot them with labels, using geom_text_repel() from the `ggrepel` package
ggplot() +
geom_sf(data = BuncombeCounty) +
geom_sf(data = linestring_sf) +
geom_label_repel(data = pts_sf,
stat = "sf_coordinates",
aes(geometry = geometry,
label = placenames),
nudge_y = 0.05,
label.r = 0, #don't round corners of label boxes
min.segment.length = 0,
segment.size = 0.4,
segment.color = "dodgerblue")
I think I have something that might work for you. I've taken the liberty of changing your example to something a bit more realistic: a couple of random "rivers" made with smoothed random walks, each 100 points long:
library(tidyverse)
library(sf)
library(ggrepel)
BuncombeCounty <- st_read(system.file("shapes/", package = "maptools"), "sids") %>%
filter(NAME == "Buncombe")
set.seed(120)
x1 <- seq(-82.795, -82.285, length.out = 100)
y1 <- cumsum(runif(100, -.01, .01))
y1 <- predict(loess(y1 ~ x1, span = 0.1)) + 35.6
x2 <- x1 + 0.02
y2 <- cumsum(runif(100, -.01, .01))
y2 <- predict(loess(y2 ~ x2, span = 0.1)) + 35.57
river_1 <- data.frame(x = x1, y = y1) %>%
st_as_sf(coords = c("x", "y")) %>%
st_coordinates() %>%
st_linestring() %>%
st_cast("LINESTRING")
river_2 <- data.frame(x = x2, y = y2) %>%
st_as_sf(coords = c("x", "y")) %>%
st_coordinates() %>%
st_linestring() %>%
st_cast("LINESTRING")
We can plot them as per your example:
riverplot <- ggplot() +
geom_sf(data = BuncombeCounty) +
geom_sf(data = river_1, colour = "blue", size = 2) +
geom_sf(data = river_2, colour = "blue", size = 2)
riverplot
My solution is basically to extract points from the linestrings and label them. Like the picture at the top of your question, you might want multiple copies of each label along the length of the linestring, so if you want n labels you just extract n equally-spaced points.
Of course, you want to be able to label both rivers at once without the labels clashing, so you'll need to be able to pass multiple geographical features as a named list.
Here is a function that does all that:
linestring_labels <- function(linestrings, n)
{
do.call(rbind, mapply(function(linestring, label)
{
n_points <- length(linestring)/2
distance <- round(n_points / (n + 1))
data.frame(x = linestring[1:n * distance],
y = linestring[1:n * distance + n_points],
label = rep(label, n))
}, linestrings, names(linestrings), SIMPLIFY = FALSE)) %>%
st_as_sf(coords = c("x","y"))
}
So if we put the objects we want to label in a named list like this:
river_list <- list("River 1" = river_1, "River 2" = river_2)
Then we can do this:
riverplot +
geom_label_repel(data = linestring_labels(river_list, 3),
stat = "sf_coordinates",
aes(geometry = geometry, label = label),
nudge_y = 0.05,
label.r = 0, #don't round corners of label boxes
min.segment.length = 0,
segment.size = 0.4,
segment.color = "dodgerblue")
It's now much easier to do this using the geomtextpath package. Using the same example data as above, we can now do:
library(geomtextpath)
ggplot() +
geom_sf(data = BuncombeCounty, fill = "#DADABA") +
geom_textsf(data = river_1, size = 4, vjust = -1, text_smoothing = 30,
label = paste(rep("River 1", 3), collapse = "\t\t\t\t\t\t\t\t"),
linecolour = "blue3") +
geom_textsf(data = river_2, size = 4, vjust = -0.5, text_smoothing = 30,
label = paste(rep("River 2", 3), collapse = "\t\t\t\t\t\t\t\t"),
linecolour = "blue3")

Resources