Merge adjacent polygons in sf - r

I'm trying to merge groups of adjacent polygons, but I'm getting big multipolygons with non-adjacent areas. In the code block below plot(Matsuyama.sf) shows a large contiguous region and a few islands, but I can't extract those geometries. How do I get those areas and geometries.
library(sf)
library(tidyverse)
Matsuyama.sf <- st_read("https://geoshape.ex.nii.ac.jp/city/geojson/20210101/38/38201A1968.geojson")
Matsuyama.sf <- st_transform(Matsuyama.sf, crs=4326)
plot(Matsuyama.sf)
st_area(Matsuyama.sf)
I can split into hundreds of polygons, but the code options below just lump them back together into one
split.sf <- st_cast(Matsuyama.sf, "POLYGON")
clumps_1.sf <- st_join(split.sf, split.sf, join = st_intersects)
clumps_2sf <- Matsuyama.sf %>% mutate(INTERSECT = st_intersects(.))
What am I missing?

Related

Create circular areas around a coordinate

I need to create circular areas around a coordinate to identify points inside this circular area. Any ideas how can I do this the easiest way in R?
You can do this with the sf package. Will need to bring in your data, this differs if you have a spatail data file, or are getting your data from tables. Then buffer your co-ordinate to get your circular polygon, then intersect that against your points ot get only those that overlap.
library(sf)
library(tidyverse)
##if from table data###
CO_ORD <- st_as_sf(x = "co-ord-data-file.csv", coords=c("Long", "Lat"))
POINTS <- st_as_sf(x = "points-data-file.csv", coords=c("Long", "Lat"))
###if from spatail data file####
CO_ORD <- st_read(dsn="co-ord-spatial-data.gdb", layer="co-ord-layer")
POINTS <- st_read(dsn="poins-spatial-data.gdb", layer="points-layer")
CO_ORD_BUFF <- st_buffer(x = CO_ORD, dist=500)
OVERLAPPING_POINTS <- st_intersection(POINTS, CO_ORD_BUFF)

Filling road network data gaps

I have a motorway network with count points that can be matched to road links. However, they only match around half the osm links. The network is uni directional and it should be possible to assign data from joining links to the missing links.
I currently have a rather ugly and long solution based on a WHILE loop that sequentially fills the connecting links. However, I think a more elegant solution might be possible by using an sfnetwork or spatial lines network. The packages stplanr, sfnetwork and dodger closely match what I want to do, but all seem to focus on routing and origin destination data.
Below is a reproducible example that uses a small area of UK motorway network and removes a random sample of half the links and generates flow and speed data for the half remaining.
How do I fill in the missing links with data from either end of the missing links?
library(tidyverse)
library(mapview)
library(sf)
library(osmdata)
## define area to import osm data
x_max <- -2.31
x_min <- -2.38
y_max <- 51.48
y_min <- 51.51
##create a data frame to setup a polygon generation
df <- data.frame(X = c(x_min, x_max, x_max, x_min),
Y = c(y_max, y_max, y_min, y_min))
##generate a polygon of the area
rd_area <- df %>%
st_as_sf(coords = c("X", "Y"), crs = 4326) %>%
dplyr::summarise(geometry = st_combine(geometry)) %>%
st_cast("POLYGON")
##get osm geometry for motorway links for defined area
x <- opq(bbox = rd_area) %>%
add_osm_feature(key = c('highway'), value = c('motorway',
'motorway_link')) %>% osmdata_sf()
## extract line geometry, generate a unique segment ID and get rid of excess columns
rdz <- x$osm_lines %>%
mutate(seg_id = paste0("L", sprintf("%02d", 1:NROW(bicycle)))) %>%
select(seg_id)
## pretend we only have traffic counts and speeds for half the links
osm_dat <- rdz[c(3,4,5,7,11,14,15),]
## links without data
osm_nodat <- filter(rdz, !seg_id %in% osm_dat$seg_id)
## visualise links with data and without
mapview(osm_dat, color = "green")+mapview(osm_nodat, color = "red")
## make up some data to work with
pretend_counts <- st_centroid(osm_dat)
## assign some random annual average daily flow and speed averages
pretend_counts$aadt <- sample(200:600, nrow(pretend_counts))
pretend_counts$speed <- sample(40:80, nrow(pretend_counts))
Here is one quick and elegant solution from the Cyipt project https://github.com/cyipt/cyipt/blob/master/scripts/prep_data/get_traffic.R
It uses the code from the get.aadt.class function and uses Voroni polygons to give the flows and speeds to the nearest roads. However, it doesn't distribute, i.e. split the flows where one links meets two and it sometimes results in opposing directions having the same flows and speeds.
library(dismo) ## dismo package for voroni polygon generation
#Make voronoi polygons and convert to SF
voronoi <- dismo::voronoi(x = st_coordinates(pretend_counts))
voronoi <- as(voronoi, "sf")
st_crs(voronoi) <- st_crs(pretend_counts)
#Find Intersections of roads with vernoi polygons
inter <- st_intersects(osm_nodat,voronoi)
#Get aadt and ncycle values
osm_nodat$aadt <- as.numeric(lapply(1:nrow(osm_nodat),function(x){as.numeric(round(mean(pretend_counts$aadt[inter[[x]]])),0)}))
osm_nodat$speed <- as.numeric(lapply(1:nrow(osm_nodat),function(x){as.numeric(round(mean(pretend_counts$speed[inter[[x]]])),0)}))
#Remove Unneded Data
all_osm <- as.data.frame(rbind(osm_dat, osm_nodat))
st_geometry(all_osm) <- all_osm$geometry
flows <- dplyr::select(all_osm, aadt)
mapview(flows)

Neighboring Zipcodes from State Polygons

First time posting on SO
I have a shapefile that has the geometries for each Zipcode along with state name. I want to figure out which zipcodes lie on the state borders.
The way I figured to achieve this is by combining all zipcodes for each state and leading to the geometry for a state and then finding the neighboring zipcodes for each state.
I combined the zipcodes into states using:
state_shape <- shapefile %>% group_by(State) %>% summarise(geometry = sf::st_union(geometry))
But then when I try to find the neighboring zipcodes using poly2nb
state_nb <- poly2nb(st_geometry(state_shape))
It gives me an Error:
Error in poly2nb(st_geometry(state_shape)) : Polygon geometries required
I understand to find the border zipcodes I will have to pass the zipcode geometries in poly2nb, but the error persists.
Any help will be highly appreciated, also any other approaches to this problem are more than welcome.
Consider this example, built on the widely available North Carolina shapefile that is distributed with {sf} package.
What the example does is:
creates a border line of North Carolina by first dissolving the counties, and then casting the resulting multipolygon to a multilinestring
runs sf::st_touches() on the counties and borderline with sparse set to false; the result is a logical vector that can be used to subset the original shapefile (filtering out the counties that share a border with the NC border)
presents the results in a graphical format, using {ggplot2}; the bordering counties are blue and the rest just blank for context
library(sf)
library(dplyr)
library(ggplot2)
# all NC counties (from shapefile distributed with {sf})
shape <- st_read(system.file("shape/nc.shp", package="sf"))
# border, via dplyr::summarise() & cast as a linestring
border <- shape %>%
summarise() %>%
st_cast("MULTILINESTRING")
# logical vector of length nrow(shape)
neighbours <- sf::st_touches(shape,
border,
sparse = F)
# report results
ggplot() +
geom_sf(data = shape[neighbours, ], fill = "blue") + # border counties
geom_sf(data = shape, fill = NA, color = "grey45") # all counties for context

How to rasterize sf geometries tile by tile?

What I´m looking for is a way to rasterize (or fasterize) geometries within each tile of a certain extent step by step and join the parts of the rasterized geometries to entire raster objects.
## create sample data
# create a frame
library(sf)
ob = st_sf(st_sfc(st_polygon(list(rbind(c(0,0), c(0,9), c(6,9), c(6,0), c(0,0)))))
# create tiles
library(GSIF)
tl <- getSpatialTiles(as(ob, 'Spatial'), block.x=3, overlap.percent=2)
plot(tl)
# create sample polygons
g <- st_sfc(st_point(c(1,2)), st_point(c(5,6)), st_point(c(2,4)), st_point(c(3,3)), st_point(c(3,4)), st_point(c(4,5)))
g.b <- st_buffer(g,0.6)
p <- st_sf(value = ceiling(10*runif(6)),
geometry = st_sfc(g.b))
plot(p, add=TRUE)
Cropping the polygons (or parts of polygons) that are within each tile works fine , i guess. My actual goal is to process the data tile by tile. It looks like the following loop does this and also joins the parts of the polygons to entire polygons. Well, the output (cr) is the same as the input (p)... I thought that this step might be necessary in order to rasterize the resulting (parts of) polygons in the next step. i didn´t expect that the polygons would be joined. I was trying to build a loop based on the intermediate result, therefore following lines are add:
# crop polygons (or parts) for each tile
result <- p
for(i in 1:length(tl)) {cr <- rbind(result, st_crop(p, tl[i]))}
cr <- cr[-7,]
library(scales)
plot(st_geometry(cr), col=alpha("white", 0.5), add=TRUE)
However, I struggle rasterizing the (intermediate) results.
Try this:
myF <- function(i){return(st_crop(p, tl[i]))}
cr <- do.call(rbind, lapply(1:length(tl), myF))

Label a point depending on which polygon contains it (NYC civic geospatial data)

I have the longitude and latitude of 5449 trees in NYC, as well as a shapefile for 55 different Neighborhood Tabulation Areas (NTAs). Each NTA has a unique NTACode in the shapefile, and I need to append a third column to the long/lat table telling me which NTA (if any) each tree falls under.
I've made some progress already using other point-in-polygon threads on stackoverflow, especially this one that looks at multiple polygons, but I'm still getting errors when trying to use gContains and don't know how I could check/label each tree for different polygons (I'm guessing some sort of sapply or for loop?).
Below is my code. Data/shapefiles can be found here: http://bit.ly/1BMJubM
library(rgdal)
library(rgeos)
library(ggplot2)
#import data
setwd("< path here >")
xy <- read.csv("lonlat.csv")
#import shapefile
map <- readOGR(dsn="CPI_Zones-NTA", layer="CPI_Zones-NTA", p4s="+init=epsg:25832")
map <- spTransform(map, CRS("+proj=longlat +datum=WGS84"))
#generate the polygons, though this doesn't seem to be generating all of the NTAs
nPolys <- sapply(map#polygons, function(x)length(x#Polygons))
region <- map[which(nPolys==max(nPolys)),]
plot(region, col="lightgreen")
#setting the region and points
region.df <- fortify(region)
points <- data.frame(long=xy$INTPTLON10,
lat =xy$INTPTLAT10,
id =c(1:5449),
stringsAsFactors=F)
#drawing the points / polygon overlay; currently only the points are appearing
ggplot(region.df, aes(x=long,y=lat,group=group))+
geom_polygon(fill="lightgreen")+
geom_path(colour="grey50")+
geom_point(data=points,aes(x=long,y=lat,group=NULL, color=id), size=1)+
xlim(-74.25, -73.7)+
ylim(40.5, 40.92)+
coord_fixed()
#this should check whether each tree falls into **any** of the NTAs, but I need it to specifically return **which** NTA
sapply(1:5449,function(i)
list(id=points[i,]$id, gContains(region,SpatialPoints(points[i,1:2],proj4string=CRS(proj4string(region))))))
#this is something I tried earlier to see if writing a new column using the over() function could work, but I ended up with a column of NAs
pts = SpatialPoints(xy)
nyc <- readShapeSpatial("< path to shapefile here >")
xy$nrow=over(pts,SpatialPolygons(nyc#polygons), returnlist=TRUE)
The NTAs we're checking for are these ones (visualized in GIS): http://bit.ly/1A3jEcE
Try simply:
ShapeFile <- readShapeSpatial("Shapefile.shp")
points <- data.frame(long=xy$INTPTLON10,
lat =xy$INTPTLAT10,
stringsAsFactors=F)
dimnames(points)[[1]] <- seq(1, length(xy$INTPTLON10), 1)
points <- SpatialPoints(points)
df <- over(points, ShapeFile)
I omitted transformation of shapefile because this is not the main subject here.

Resources