create evenly spaced polylines over counties using R - r

I would like to create evenly spaced polylines going North to South with 50 mile spacing between each line and 10 miles long. Not sure if this is possible using sf package. In the example below, I would like to have the lines filling the counties across the state of Washington.
library(tigris)
library(leaflet)
states <- states(cb = TRUE)
counties<-counties(cb=TRUE)
counties<- counties%>%filter(STATEFP==53)
states<- states%>%filter(NAME=="Washington")
leaflet(states) %>%
addProviderTiles("CartoDB.Positron") %>%
addPolygons(fillColor = "white",
color = "black",
weight = 0.5) %>%
addPolygons(data=counties,color='red',fillColor = 'white')%>%
setView(-120.5, 47.3, zoom=8)
I've updated to include an image of what I'd like to do below.

You can create a multilinestring sf object from scratch by specifying coordinates.
You can get these coordinates from the extent (bounding box) of Washington, but you may also be interested in knowing how to create a grid, which I will demonstrate below because it may be helpful.
Copy and paste this reproducible example:
library(tidyverse)
library(tigris)
library(leaflet)
library(sf)
library(raster)
states <- states(cb = TRUE)
# subset for WA and transform to a meter-based CRS
states <- states %>%
filter(NAME == "Washington") %>%
st_transform(crs = 3857) # Mercator
# fifty miles in meters
fm <- 80467.2
# subset for Washington
states_sp <- as(states, "Spatial")
# create a grid, convert it to polygons to plot
grid <- raster(extent(states_sp),
resolution = c(fm, fm),
crs = proj4string(states_sp))
grid <- rasterToPolygons(grid)
plot(states_sp)
plot(grid, add = TRUE)
# find the top y coordinate and calculate 50 mile intervals moving south
ty <- extent(grid)[4] # y coordinate along northern WA edge
ty <- ty - (fm * 0:7) # y coordinates moving south at 10 mile intervals
# create a list of sf linestring objects
l <- vector("list", length(ty))
for(i in seq_along(l)){
l[[i]] <-
st_linestring(
rbind(
c(extent(grid)[1], ty[i]),
c(extent(grid)[2], ty[i])
)
)
}
# create the multilinestring, which expects a list of linestrings
ml <- st_multilinestring(l)
plot(states_sp)
plot(as(ml, "Spatial"), add = TRUE, col = "red")
As you can see, I switch back and forth between sf and sp objects using the functions as(sf_object, "Spatial") and st_as_sf(sp_object). Use these to transform the data to your needs.

Related

Plot an ellipse between two geographical foci in R

I am trying to find out the geographical area that is equidistant from two points, and to plot this as an ellipse.
I can produce plots for one point easily using st_buffer, and can find numerous R functions that will plot ellipse from a known centroid if I define the axis, but have not been able to find one that will plot an ellipse given two known foci and a defined distance.
The similar question here gets some way towards an answer, but is not readily applicable to geographic situations - Draw an ellipse based on its foci
My code is pretty simple at the moment, and given each coordinate with a 100km radius. However, I would like to find out all the positions that would be reachable by a 200km (or other defined distance) trip between both sites.
library(tidyverse)
library(sf)
#Give Coordinates
citylocations <- tibble::tribble(
~city, ~lon, ~lat,
"London", -0.1276, 51.5072,
"Birmingham", -1.8904, 52.4862,
)
citydflocations <- as.data.frame(citylocations)
#Convert to SF
citysflocations <- sf::st_as_sf(citydflocations, coords = c("lon","lat" ), crs = 4326)
#Convert location file to National Grid Planar
cityBNGsflocations <- citysflocations %>%
st_transform(citysflocations, crs = 27700)
#Produce circles with 100km buffer
dat_circles <- st_buffer(cityBNGsflocations, dist = 100000)
join_circles <- st_union(dat_circles) %>%
st_transform(4326)
plot(join_circles, col = 'lightblue')```
The function below should create buffers of varying distances for each of the two points it is given, finds the intersection the two buffers, unions the intersections, and finally returns a convex hull of those intersections. The output should be a near approximation of an ellipse with the two points as foci.
The straight-line(s) distance from one city to any edge of the polygon and then to the other city should equal the distance given in the function (200,000m in the example below).
It works on the data provided, but is fragile as there's no error checking or warning suppression. Make sure the dist argument is greater than the distance between the two points, and that the points have a crs that can use meters as a distance. (lat/lon might not work)
The example below only uses 20 points for the 'ellipse', but changing the function should be relatively straightforward.
library(sf)
#> Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1; sf_use_s2() is TRUE
library(tidyverse)
#Give Coordinates
citylocations <- tibble::tribble(
~city, ~lon, ~lat,
"London", -0.1276, 51.5072,
"Birmingham", -1.8904, 52.4862,
)
citydflocations <- as.data.frame(citylocations)
#Convert to SF
citysflocations <- sf::st_as_sf(citydflocations, coords = c("lon","lat" ), crs = 4326)
#Convert location file to National Grid Planar
cityBNGsflocations <- citysflocations %>%
st_transform(citysflocations, crs = 27700)
#Produce circles with 100km buffer
dat_circles <- st_buffer(cityBNGsflocations, dist = 100000)
join_circles <- st_union(dat_circles) %>%
st_transform(4326)
#plot(join_circles, col = 'lightblue')
### the ellipse function using 20 buffers ####
ellipse_fn <- function(x_sf, y_sf, distance){
#set distance argument to meters, get sequence of distances for buffers
distance = units::set_units(distance, 'm')
dists_1 <- seq(units::set_units(0, 'm'), distance, length.out = 22)
# create empty sf object to place for loop objects in
# purrr would probably be better here
nrows <- 20
df <- st_sf(city = rep(NA, nrows), city.1 = rep(NA, nrows), geometry = st_sfc(lapply(1:nrows, function(x) st_geometrycollection())))
intersections <- for(i in 2:21){
buff_1 <- st_buffer(cityBNGsflocations[1,], dist = dists_1[i])
buff_2 <- st_buffer(cityBNGsflocations[2,], dist = distance - dists_1[i])
intersection <- st_intersection(buff_1, buff_2)
df[i-1,] <- intersection
}
df %>%
st_set_crs(st_crs(x_sf)) %>%
st_union() %>%
st_convex_hull()
}
### end ellipse function ###
# Using the ellipse function with 2 points & 200000m distance
ellipse_sf <- ellipse_fn(cityBNGsflocations[1,], cityBNGsflocations[2,], dist = 200000)
# You'll get lots of warnings here about spatial constance...
ggplot() +
geom_sf(data = ellipse_sf, fill = 'black', alpha = .2) +
geom_sf(data = cityBNGsflocations, color = 'red')
Created on 2022-06-03 by the reprex package (v2.0.1)
mapview plot of the cities & 'ellipse' on a map:

How to extract the largest polygon in a raster?

I can read a rastre and exrec polygones likes:
If I have this code to read a raster and shapefile:
library(raster)
library(geojsonsf)
library(sf)
library(exactextractr)
r <- raster(matrix(rnorm(10*12), nrow=10), xmn = -180, xmx= 180, ymn = -90, ymx= 90)
myurl <- "http://eric.clst.org/assets/wiki/uploads/Stuff/gz_2010_us_050_00_500k.json"
geo <- readLines(myurl)
geo <- paste0(geo, collapse = "")
system.time({ sf <- geojson_sf(geo)})
#add crs information for the raster 'r'
crs(r) <- 4326
# extract the 'r' raster value for each polygon 'NAME' in 'sf'
res <- do.call(rbind, exactextractr::exact_extract(r, sf, include_cols = 'NAME'))[-3]
It can be that several polygons are within one pixel and I need to extract not all polygons but only the largest polygon in a pixel.
Here's some code that does what I think you want which is to find the polygon that is largest for each of the raster cells. I have modified the code so rasters can be uniquely identified and then I use GEO_ID because NAME is not unique in the data (there are 31 Washingtons for example). I use dplyr to find the maximum coverage for each raster and mapview to view the results and convince myself that the code is working.
library(raster)
library(geojsonsf)
library(sf)
library(exactextractr)
library(mapview)
library(dplyr)
# Give the raster cells a unique identifier so we can use this to find which
# polygon is the maximum in a given raster
r <- raster(matrix(1:120, nrow=10), xmn = -180, xmx= 180, ymn = -90, ymx= 90)
crs(r) <- 4326
myurl <- "http://eric.clst.org/assets/wiki/uploads/Stuff/gz_2010_us_050_00_500k.json"
geo <- readLines(myurl)
geo <- paste0(geo, collapse = "")
sf <- geojson_sf(geo)
# Find the polygons that overlap with the raster cells
# the coverage fraction says how much overlap so
# simply find the largest whilst grouping ny the id
# of the raster
# Use GEO_ID because NAME has many duplicates
overlay <- do.call(rbind, exactextractr::exact_extract(r, sf, include_cols = c('GEO_ID', 'NAME')))
# Find the maximum in each raster
maximum_in_raster <- overlay %>% group_by(value) %>% top_n(1, coverage_fraction)
# Create a subset of polygons corresponding to the maxima
# This is where GEO_ID is important
maximum_polygons <- sf[sf$GEO_ID %in% maximum_in_raster$GEO_ID, 'NAME']
# Make a grid to display
sf_grid <- st_make_grid(r, n=c(12,10))
# Use mapview to have a look at the results to convince ourselves that it's working
mapview(maximum_polygons, alpha.regions=1) + mapview(sf_grid, col.regions='white')
Here's an example showing the maxima in the south west of the US.
The 4 polygons shown are Lake, Nye, San Bernardino and San Luis Obispo. The horizontal and vertical lines show the boundaries of the raster cells.
And to make the point about duplicate names, here is the code to count how many names appear in each raster cell.
counts <- overlay %>% count(value, NAME, sort = T)
head(counts)
value NAME n
1 33 Washington 13
2 23 Lincoln 12
3 33 Franklin 12
4 23 Washington 10
5 23 Douglas 9
6 23 Grant 9
Which shows 13 Washingtons in cell 33.

Filling road network data gaps

I have a motorway network with count points that can be matched to road links. However, they only match around half the osm links. The network is uni directional and it should be possible to assign data from joining links to the missing links.
I currently have a rather ugly and long solution based on a WHILE loop that sequentially fills the connecting links. However, I think a more elegant solution might be possible by using an sfnetwork or spatial lines network. The packages stplanr, sfnetwork and dodger closely match what I want to do, but all seem to focus on routing and origin destination data.
Below is a reproducible example that uses a small area of UK motorway network and removes a random sample of half the links and generates flow and speed data for the half remaining.
How do I fill in the missing links with data from either end of the missing links?
library(tidyverse)
library(mapview)
library(sf)
library(osmdata)
## define area to import osm data
x_max <- -2.31
x_min <- -2.38
y_max <- 51.48
y_min <- 51.51
##create a data frame to setup a polygon generation
df <- data.frame(X = c(x_min, x_max, x_max, x_min),
Y = c(y_max, y_max, y_min, y_min))
##generate a polygon of the area
rd_area <- df %>%
st_as_sf(coords = c("X", "Y"), crs = 4326) %>%
dplyr::summarise(geometry = st_combine(geometry)) %>%
st_cast("POLYGON")
##get osm geometry for motorway links for defined area
x <- opq(bbox = rd_area) %>%
add_osm_feature(key = c('highway'), value = c('motorway',
'motorway_link')) %>% osmdata_sf()
## extract line geometry, generate a unique segment ID and get rid of excess columns
rdz <- x$osm_lines %>%
mutate(seg_id = paste0("L", sprintf("%02d", 1:NROW(bicycle)))) %>%
select(seg_id)
## pretend we only have traffic counts and speeds for half the links
osm_dat <- rdz[c(3,4,5,7,11,14,15),]
## links without data
osm_nodat <- filter(rdz, !seg_id %in% osm_dat$seg_id)
## visualise links with data and without
mapview(osm_dat, color = "green")+mapview(osm_nodat, color = "red")
## make up some data to work with
pretend_counts <- st_centroid(osm_dat)
## assign some random annual average daily flow and speed averages
pretend_counts$aadt <- sample(200:600, nrow(pretend_counts))
pretend_counts$speed <- sample(40:80, nrow(pretend_counts))
Here is one quick and elegant solution from the Cyipt project https://github.com/cyipt/cyipt/blob/master/scripts/prep_data/get_traffic.R
It uses the code from the get.aadt.class function and uses Voroni polygons to give the flows and speeds to the nearest roads. However, it doesn't distribute, i.e. split the flows where one links meets two and it sometimes results in opposing directions having the same flows and speeds.
library(dismo) ## dismo package for voroni polygon generation
#Make voronoi polygons and convert to SF
voronoi <- dismo::voronoi(x = st_coordinates(pretend_counts))
voronoi <- as(voronoi, "sf")
st_crs(voronoi) <- st_crs(pretend_counts)
#Find Intersections of roads with vernoi polygons
inter <- st_intersects(osm_nodat,voronoi)
#Get aadt and ncycle values
osm_nodat$aadt <- as.numeric(lapply(1:nrow(osm_nodat),function(x){as.numeric(round(mean(pretend_counts$aadt[inter[[x]]])),0)}))
osm_nodat$speed <- as.numeric(lapply(1:nrow(osm_nodat),function(x){as.numeric(round(mean(pretend_counts$speed[inter[[x]]])),0)}))
#Remove Unneded Data
all_osm <- as.data.frame(rbind(osm_dat, osm_nodat))
st_geometry(all_osm) <- all_osm$geometry
flows <- dplyr::select(all_osm, aadt)
mapview(flows)

Looking for a polygon where a point is contained in R

I'm working with a dataframe containing longitude and latitude for each point. I have a shapefile containing mutually exclusive polygons. I would like to find the index of the polygon it where each point is contained. Is there a specific function that helps me achieve this? I've been trying with the sf package, but I'm open to doing it with another one. Any help is greatly appreciated.
I believe you may be looking for function sf::st_intersects() - in combination with sparse = TRUE setting it returns a list, which can be in this use case (points & a set of non-overlapping polygons) converted to a vector easily.
Consider this example, built on the North Carolina shapefile shipped with {sf}
library(sf)
# as shapefile included with sf package
shape <- st_read(system.file("shape/nc.shp", package="sf")) %>%
st_transform(4326) # WGS84 is a good default
# three semi random cities
cities <- data.frame(name = c("Raleigh", "Greensboro", "Wilmington"),
x = c(-78.633333, -79.819444, -77.912222),
y = c(35.766667, 36.08, 34.223333)) %>%
st_as_sf(coords = c("x", "y"), crs = 4326) # again, WGS84
# plot cities on full map
plot(st_geometry(shape))
plot(cities, add = T, pch = 4)
# this is your index
index_of_intersection <- st_intersects(cities, shape, sparse = T) %>%
as.numeric()
# plot on subsetted map to doublecheck
plot(st_geometry(shape[index_of_intersection, ]))
plot(cities, add = T, pch = 4)

Finding the radius of a circle that circumscribes a polygon

I am trying to find the best way of obtaining: the length of the longest line from the center of a polygon to its edge.
In the code below, I download the polygon data of the 75254 zip code located in Texas, USA. I then determine the location of its center with sf::st_centroid() and I plot the geometries using the tmap package.
# Useful packages
library(dplyr)
library(sf)
library(tigris)
library(tmap)
# Download polygon data
geo <- tigris::zctas(cb = TRUE, starts_with = "75254")
geo <- st_as_sf(geo)
# Determine the location of the polygon's center
geo_center <- st_centroid(geo)
# Plot geometries
tm_shape(geo) +
tm_polygons() +
tm_shape(geo_center) +
tm_dots(size = 0.1, col = "red")
Once again, is there an efficient way to determine the length of the line going from the center of the polygon all the way to the farthest point on the polygon's edge? In other words, how can I find the radius of the circle that perfectly circumscribes the polygon given that both the circle and the polygon have the same center?
Thank you very much for your help.
One point here, although I mentioned, st_bbox wouldn't work as the centroid of the bbox and the one of your shape are not the same, since the centroid is weighted. See here one approach based on the further distance to the points of the border, but you woud need to project your shape (currently is unprojected):
library(dplyr)
library(sf)
library(tigris)
library(tmap)
# Download polygon data
geo <- tigris::zctas(cb = TRUE, starts_with = "75254")
geo <- st_as_sf(geo)
st_crs(geo)
#> Coordinate Reference System:
#> EPSG: 4269
#> proj4string: "+proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs"
#Need to project
geo=st_transform(geo,3857)
# Determine the location of the polygon's center
geo_center <- st_centroid(geo)
#> Warning in st_centroid.sf(geo): st_centroid assumes attributes are constant over
#> geometries of x
plot(st_geometry(geo))
plot(st_geometry(geo_center), col="blue", add=TRUE)
#Cast to points
geopoints=st_cast(geo,"POINT")
#> Warning in st_cast.sf(geo, "POINT"): repeating attributes for all sub-geometries
#> for which they may not be constant
r=max(st_distance(geo_center,geopoints))
r
#> 3684.917 [m]
buffer=st_buffer(geo_center,dist=r)
plot(st_geometry(buffer), add=TRUE, border="green")
OP didn't ask for this, but in case anyone else would like to do this for multiple shapes, here's a version that builds on dieghernan's example to do that.
library(dplyr)
library(sf)
library(tigris)
library(tmap)
# Download polygon data
raw <- tigris::zctas(cb = TRUE, starts_with = "752")
geo <- raw %>%
st_as_sf() %>%
slice(1:5) %>%
st_transform(3857) %>%
arrange(GEOID10) # Sort on GEOID now so we don't have to worry about group_by resorting later
# Compute the convex hull
hull <- geo %>% st_convex_hull()
# Compute centroids
geo_center <- st_centroid(geo)
# Add centroid, then cast hull to points
hull_points <- hull %>%
mutate(centroid_geometry = geo_center$geometry) %>%
st_cast("POINT")
# Compute distance from centroid to all points in hull
hull_points$dist_to_centroid <- as.numeric(hull_points %>%
st_distance(hull_points$centroid_geometry, by_element = TRUE))
# Pick the hull point the furthest distance from the centroid
hull_max <- hull_points %>%
arrange(GEOID10) %>%
group_by(GEOID10) %>%
summarize(max_dist = max(dist_to_centroid)) %>%
ungroup()
# Draw a circle using that distance
geo_circumscribed <- geo_center %>% st_buffer(hull_max$max_dist)
# Plot the shape, the hull, the centroids, and the circumscribed circles
tm_shape(geo) +
tm_borders(col = "red") +
tm_shape(hull) +
tm_borders(col = "blue", alpha = 0.5) +
tm_shape(geo_center) +
tm_symbols(col = "red", size = 0.1) +
tm_shape(geo_circumscribed) +
tm_borders(col = "green")

Resources