Return Intersection Locations from SpatialLinesDataFrame - r

I am analyzing a road network file, and am trying to get coordinates (or spdf) that represents all the intersections. I have looked through sp, rgeos, & raster, but can't seem to find an appropriate solution that will take just 1 object and analyze its geometry for intersections.
The goal is to find all types of intersections:
Is there a package specifically for road network analysis that will do this? (If you know of something that will achieve this & more (sinuosity calculations, length, etc.), I'm all ears.
Simple spatialLinesDataframe:
library(sp)
library(rgeos)
## Roughly taken from the sp vignette:
l1 <- cbind(c(-79.81022, -79.80993), c(43.24589, 43.24654))
l2 <- cbind(c(-79.81022, -79.80993), c(43.24654, 43.24589))
l3 <- cbind(c(-79.81022, -79.80990), c(43.24589, 43.24589))
Sl1 <- Line(l1)
Sl2 <- Line(l2)
Sl3 <- Line(l3)
S1 <- Lines(list(Sl1), ID = "a")
S2 <- Lines(list(Sl2), ID = "b")
S3 <- Lines(list(Sl3), ID = "c")
Sl <- SpatialLines(list(S1, S2, S3))
## sample data: line lengths
df <- data.frame(len = sapply(1:length(Sl), function(i) gLength(Sl[i, ])))
rownames(df) <- sapply(1:length(Sl), function(i) Sl#lines[[i]]#ID)
## SpatialLines to SpatialLinesDataFrame
sampleLines <- SpatialLinesDataFrame(Sl, data = df)
plot(sampleLines, col = c("red", "blue", "green"))

Using the approach from How to receive differences of intersecting SpatialLines in R?
intersections <- gIntersects(Sl, byid = TRUE)
intersections[lower.tri(intersections, diag = TRUE)] <- NA
intersections <- reshape2::melt(intersections, na.rm = TRUE)
t(apply(intersections, 1,
function(x) coordinates(gIntersection(Sl[x[1]], Sl[x[2]]))))
# [,1] [,2]
# 4 -79.810075 43.246215
# 7 -79.810220 43.245890
# 8 -79.809930 43.245890

Related

Points in multiple polygons using R

Currently I have two data.frames, one of polygons (poly.x, poly.y, enum) and one of points (pt.x, pt.y) where enum is the id of the polygon. I am trying to determine which points belong to which polygons so I get a data.frame of (pt.x, pt.y, enum).
My first attempt uses point.in.polygon from the sp package and lapply functions to find which polygon(s) the point belongs to. While my code works, it takes a long time on large data sets.
My second attempt uses over also from the sp package, cobbled together from questions on gis stackexchange. While it is much faster, I cannot seem to get the correct output from over as it is a dataframe of 1s and NAs.
Below I've included a simplified working example (npoly can be changed to test the speed of different methods) as well as my working attempt using sp::point.in.polygon and nonsensical output from my sp::over attempt. I'm not fussed which method I end up using as long as it's fast.
Any help would be much appreciated!
#-------------------------------------------
# Libraries
library(ggplot2) # sample plots
library(dplyr) # bind_rows(), etc
library(sp) # spatial data
# Sample data
npoly = 100
# polygons
localpolydf <- data.frame(
x = rep(c(0, 1, 1, 0), npoly) + rep(0:(npoly-1), each = 4),
y = rep(c(0, 0, 1, 1), npoly),
enum = rep(1:npoly, each = 4))
# points
offsetdf <- data.frame(
x = seq(min(localpolydf$x) - 0.5, max(localpolydf$x) + 0.5, by = 0.5),
y = runif(npoly*2 + 3, 0, 1))
# Sample plot
ggplot() +
geom_polygon(aes(x, y, group = enum),
localpolydf, fill = NA, colour = "black") +
geom_point(aes(x, y), offsetdf)
#-------------------------------------------
# Dplyr and lapply solution for point.in.polygon
ptm <- proc.time() # Start timer
# create lists
offsetlist <- split(offsetdf, rownames(offsetdf))
polygonlist <- split(localpolydf, localpolydf$enum)
# lapply over each pt in offsetlist
pts <- lapply(offsetlist, function(pt) {
# lapply over each polygon in polygonlist
ptpoly <- lapply(polygonlist, function(poly) {
data.frame(
enum = poly$enum[1],
ptin = point.in.polygon(pt[1,1], pt[1,2], poly$x, poly$y))
})
ptpoly <- bind_rows(ptpoly) %>% filter(ptin != 0)
if (nrow(ptpoly) == 0) return(data.frame(x = pt$x, y = pt$y, enum = NA, ptin = NA))
ptpoly$x = pt$x
ptpoly$y = pt$y
return(ptpoly[c("x", "y", "enum", "ptin")])
})
pts_apply <- bind_rows(pts)
proc.time() - ptm # end timer
#-------------------------------------------
# Attempted sp solution for over
ptm <- proc.time() # Start timer
# Split the dataframe into a list based on enum and then remove enum from df in the list
polygonlist <- split(localpolydf, localpolydf$enum)
polygonlist <- lapply(polygonlist, function(x) x[,c("x", "y")])
# Convert the list to Polygon, then create a Polygons object
polygonsp <- sapply(polygonlist, Polygon)
polygonsp <- Polygons(polygonsp, ID = 1)
polygonsp <- SpatialPolygons(list(polygonsp))
plot(polygonsp)
# Convert points to coordinates
offsetps <- offsetdf
coordinates(offsetps) <- ~x+y
points(offsetps$x, offsetps$y)
# Determine polygons points are in
pts_sp <- over(offsetps, polygonsp)
proc.time() - ptm # end timer
#===========================================
# Output
# Apply: point.in.polygon
> head(pts_apply)
x y enum ptin
1 -0.5 0.2218138 NA NA
2 4.0 0.9785541 4 2
3 4.0 0.9785541 5 2
4 49.0 0.3971479 49 2
5 49.0 0.3971479 50 2
6 49.5 0.1177206 50 1
user system elapsed
4.434 0.002 4.435
# SP: over
> head(pts_sp)
1 2 3 4 5 6
NA 1 1 NA 1 NA
user system elapsed
0.048 0.000 0.047
An alternative to using over is to use sf::intersection as the sf package is becoming more and more popular.
Getting the data into sf objects took me a little bit of work but if you are working with external data you can just read in with st_read and it will already be in the correct form.
Here is how to approach:
library(tidyverse)
library(sf)
# convert into st_polygon friendly format (all polygons must be closed)
# must be a nicer way to do this!
localpoly <- localpolydf %>% split(localpolydf$enum) %>%
lapply(function(x) rbind(x,x[1,])) %>%
lapply(function(x) x[,1:2]) %>%
lapply(function(x) list(as.matrix(x))) %>%
lapply(function(x) st_polygon(x))
# convert points into sf object
points <- st_as_sf(offsetdf,coords=c('x','y'),remove = F)
#convert polygons to sf object and add id column
polys <- localpoly %>% st_sfc() %>% st_sf(geom=.) %>%
mutate(id=factor(1:100))
#find intersection
joined <- polys %>% st_intersection(points)
# Sample plot
ggplot() + geom_sf(data=polys) +
geom_sf(data=joined %>% filter(id %in% c(1:10)),aes(col=id)) +
lims(x=c(0,10))
Note that to use geom_sf at the time of writing you will need to install the development version of ggplot.
plot output:
over returns an index of points inside a geometry. Perhaps something like this:
xy <- offsetps[names(na.omit(pts_sp == 1)), ]
plot(polygonsp, axes = 1, xlim = c(0, 10))
points(offsetps)
points(xy, col = "red")
After having another look, I realised Roman did pts_sp == 1 because I only had 1 ID for all of my squares, i.e. when I did ID = 1.
Once I fixed that, I was able to a column with ID = enum. To handle points in multiple polygons I can use returnList = TRUE and add additional lines to convert the list to a data.frame but it isn't necessar here.
# Attempted sp solution
ptm <- proc.time() # Start timer
# Split the dataframe into a list based on enum and then remove enum from df in the list
polygonlist <- split(localpolydf, localpolydf$enum)
# Convert the list to Polygon, then create a Polygons object
polygonsp <- sapply(polygonlist, function(poly){
Polygons(list(Polygon(poly[, c("x", "y")])), ID = poly[1, "enum"])
})
# polygonsp <- Polygons(polygonsp, ID = 1)
polygonsp <- SpatialPolygons(polygonsp)
plot(polygonsp)
# Convert points to coordinates
offsetps <- offsetdf
coordinates(offsetps) <- ~x+y
points(offsetps$x, offsetps$y)
# Determine polygons points are in
pts_sp <- over(offsetps, polygonsp)
pts_sp <- data.frame(
x = offsetps$x, y = offsetps$y,
enum = unique(localpolydf$enum)[pts_sp])
proc.time() - ptm # end timer

Reverse cluster analysis; identifying empty space or a lack of density in R with longitude and latitude?

I'm working on a project where I have a very large amount of points and I am looking to identify regions (defined by a lack of clustering) where the density of these points is statistically significantly less relative to others. Normally a visual would be enough but I have so many points that it is to difficult to tell where these empty spaces are and a density heat map doesn't help me zero in on smaller regions. Maybe I'm missing something very simpler here, but I am hoping someone can at least send me in the right direction of where to look. Below is a reproducible sample quick and dirty lets take these points from open data and map them to the borough file for NYC:
#libraries--------------------------
library(ggplot2)
library(ggmap)
library(sp)
library(jsonlite)
library(RJSONIO)
library(rgdal)
#call api data--------------------------
df = fromJSON("https://data.cityofnewyork.us/resource/24t3-xqyv.json?$query= SELECT Lat, Long_")
df <- data.frame(t(matrix(unlist(df),nrow=length(unlist(df[1])))))
names(df)[names(df) == 'X2'] = 'x'
names(df)[names(df) == 'X1'] = 'y'
df = df[, c("x", "y")]
df$x = as.numeric(as.character(df$x))
df$y = as.numeric(as.character(df$y))
df$x = round(df$x, 4)
df$y = round(df$y, 4)
df$x[df$x < -74.2] = NA
df$y[df$y < 40.5] = NA
df = na.omit(df)
#map data----------------------------
cd = readOGR("nybb.shp", layer = "nybb")
cd = spTransform(cd, CRS("+proj=longlat +datum=WGS84"))
cd_f = fortify(cd)
#map data
nyc = ggplot() +
geom_polygon(aes(x=long,
y=lat, group=group), fill='grey',
size=.2,color='black', data=cd_f, alpha=1)
nyc + geom_point(aes(x = x, y = y), data = df, size = 1)
#how would I go about finding the empty spaces? That is the regions where there are no clusters?
In this case there aren't a lot of points but for the sake of demonstration, how would I:
identify pockets of low density
potentially draw polygon boundaries on those pockets?
Appreciate the help!
One way to get polygonal areas of low density would be to construct the
Dirichlet/Voronoi tesselation and choose the largest ones.
Below I use spatstat and deldir (loaded by spatstat) to do this.
It is not so fast so with many more points I don't know how well it will
go.
To use the results in ggmap and other spatial packages you can convert
back from owin and ppp to the spatial classes from sp and use
spTransform to get lat, long coordinates.
First load the packages:
library(maptools)
library(spatstat)
library(jsonlite)
Map and points in coordinates of shapefile (note I read in data from
local files downloaded from www):
cd = readOGR("nybb.shp", layer = "nybb")
#> OGR data source with driver: ESRI Shapefile
#> Source: "nybb.shp", layer: "nybb"
#> with 5 features
#> It has 4 fields
df <- fromJSON("NYC_data.json")
df <- as.data.frame(matrix(as.numeric(unlist(df)), ncol = 2, byrow = TRUE))
df <- df[, c(2, 1)]
names(df) <- c("x", "y")
df <- df[df$x > -74.2 & df$y > 40.5, ]
coordinates(df) <- ~x+y
proj4string(df) <- CRS("+proj=longlat +datum=WGS84")
df2 <- spTransform(df, proj4string(cd))
Switch to spatstat classes:
cd2 <- as(cd, "SpatialPolygons")
W <- as(cd2, "owin")
X <- as(df2, "ppp")
Window(X) <- W
plot(X, main = "")
Compute Dirichlet tessellation and areas and plot the tessellation:
d <- dirichlet(X)
#> Warning: 96 duplicated points were removed
a <- tile.areas(d)
plot(d, main = "")
Combine the n_areas biggest areas of the tessellation:
n_areas <- 30
empty <- tess(tiles = d$tiles[tail(order(a), n = n_areas)])
empty2 <- as.owin(empty)
Plot the result:
plot(W, main = "")
plot(empty2, col = "red", add = TRUE)
plot(X, add = TRUE)

Counting spatialpoints in gridcells

SO-gurues!
I am trying to count the densities of surviving units in different gridcells.
I have two shapefiles with points from the two survey periods in question (one before and one after the mortality event). What I intend is to see whether there is a difference in survival rates and link the proportion of survival to any climatic variable obtained from the raster value of the desired grid. In the code snippet below I have created some random raster and shapefiles.
packs = c('raster', 'rgdal', 'spatstat', 'sp' ,'dplyr')
sapply(packs, FUN = 'require', character.only = TRUE)
xy <- matrix(rnorm(1024),32,32) #Creating the desired raster
image(xy)
rast <- raster(xy)
extent(rast) <- c(36,37,-3,-2)
projection(rast) <- CRS("+proj=longlat +datum=WGS84")
points <- runifpoint(n =4000, c(36,37,-3,-2)) # Creating the points
x <- points$x
y <- points$y
values <- c(rep(1, 900), rep(0, 3100))
xy <- cbind(x, y)
points <- cbind(x, y, values)
points <- data.frame(points)
shp <- SpatialPointsDataFrame(coords = xy, data = data.frame(values) ) # creating shpfiles
projection(shp) <- CRS("+proj=longlat +datum=WGS84")
subs <- filter(points, values == 1)
suxy <- select(subs, x,y)
shpsub <- SpatialPointsDataFrame(coords = suxy, data = data.frame(subs$values)) # creating shpfiles
projection(shpsub) <- CRS("+proj=longlat +datum=WGS84")
When I attempt to extract the points I use the following lines of code
shp <- spTransform(shp, projection(rast)) # make sure they have same transformation
shpsub <- spTransform(shpsub, projection(rast))
XY <- xyFromCell(rast, cell = 1:ncell(rast))
v <- as.data.frame(rast) #Extract values from raster
XY <- data.frame(XY, v) # Creating a data frame containing coord., cellno and value
XY$cell <- c(1:ncell(rast))
cells <- cellFromXY(rast,shp) # find which cells the points are in
cells <- rle(cells) # returns a value and a length, fast for counting
cellsfound <- cellFromXY(rast,shpsub)
cellsfound <- rle(cellsfound)
Proportion <- data.frame(cell = cells$values, shp = cells$lengths)
test <- data.frame(cell = rep(NA,NROW(Proportion)), shpsub = rep(NA, NROW(Proportion)))
test$cell <- c(cellsfound$values, rep(NA, nrow(test) - length(cellsfound$values)))
test$shpsub <- c(cellsfound$lengths, rep(NA, NROW(test) - length(cellsfound$lengths)))
Proportion <- full_join(Proportion, test, by = "cell")
test.Proportion <- mutate(Proportion, Proportion = shpsub/shp) #Calculating Proportion
XY <- left_join(XY, test.Proportion, by = "cell") # Adding Proportion to coord and cell no.
XY.m <- summarise(XY, )
XY <- na.omit(XY) ; XY <- XY[,-4]
As I see it. Using rle() returns the same cells multiple times instead of counting the no of points within each individual cell as was my intention. Can anyone please explain me how to do this in a way that retrieves the information on the number of occurrences in the individual cells?

How to receive differences of intersecting SpatialLines in R?

How to receive a list of SpatialLines of the differences of the intersecting SpatialLines only?
create SpatialLines:
#from the sp vignette:
l1 = cbind(c(1,2,3,4),c(3,2,2,4))
rownames(l1) = letters[1:4]
l2 = cbind(c(2,2,3,3),c(3,2,2,5))
rownames(l2) = letters[1:4]
l3 = cbind(c(1,2,3,4),c(1,2,2,1))
rownames(l3) = letters[1:4]
Sl1 = Line(l1)
Sl2 = Line(l2)
Sl3 = Line(l3)
Ll1 = Lines(list(Sl1), ID="a")
Ll2 = Lines(list(Sl2), ID="b")
Ll3 = Lines(list(Sl3), ID="c")
Sl = SpatialLines(list(Ll1,Ll2,Ll3))
resulting SpatialLines ("Sl") show intersections and differences.
Receiving the differences of all SpatialLines of the list can be achieved like this:
C = combn(1:length(Sl),2)
C2 = cbind(C,C[2:1,])
MyDiffs = apply(C2, 2, function(x){gDifference(Sl[x[1]], Sl[x[2]])})
see spacedman´s answer to this question
Looking for the differences of the intersecting SpatialLines only.
I was thinking about something like if the condition gIntersect=TRUE then apply gDifference(). However, I can´t find a way to do that in R.
Maybe there´s a smarter solution...
Edit:
The answer of bogdata works, but all differences appear twice.
Manipulating the matrix in a way that the lower triangular part get removed led to the result that some doubled differences are kept while others get removed.
library("reshape2")
# compute intersection matrix by ID
intersections <- gIntersects(Sl, byid=TRUE)
# set lower triangular part of matrix NA
intersections[lower.tri(intersections, diag = TRUE)] <- NA
# melt matrix into edge list (+remove NA)
intersections <- melt(intersections, na.rm=TRUE)
# compute differences
MyDiffs = apply(intersections, 1, function(x){gDifference(Sl[x[1]], Sl[x[2]])})
Any suggestions?
Just use gIntersects with byid=T and the melt function in reshape2:
library("reshape2")
# compute intersection matrix by ID
intersections <- gIntersects(Sl, byid=T)
# melt matrix into edge list
intersections <- melt(intersections)
# keep only intersecting lines, remove diagonals
intersections <- subset(intersections, Var1 != Var2 & value)
# compute differences
MyDiffs = apply(intersections, 1, function(x){gDifference(Sl[x[1]], Sl[x[2]])})

get coordinates of a patch in a raster map (raster package in R)

I have a raster map with many patches (clumps of continguous cells with the same value). What I need to do is to obtain the coordinates of the center (or close to the center) of each patch.
I am very unexperienced with raster package but it seems I can get coordinates only if I know the position of the cells in the map. Is there any way to get coordinates giving a value of the cells instead? Thank you
If by patch you mean clumps, Raster package allows you to find , and isolate, clumps. Taking the clump() raster package example, and extending it:
library(raster)
library(igraph)
detach("package:coin", unload=TRUE)
r <- raster(ncols=12, nrows=12)
set.seed(0)
r[] <- round(runif(ncell(r))*0.7 )
rc <- clump(r)
clump_id <- getValues(rc)
xy <- xyFromCell(rc,1:ncell(rc))
df <- data.frame(xy, clump_id, is_clump = rc[] %in% freq(rc, useNA = 'no')[,1])
df[df$is_clump == T, ]
plot(r)
plot(rc)
text(df[df$is_clump == T, 1:2], labels = df[df$is_clump == T, 3])
May not be as interesting as you could expect.
You do it all over with directions = 4
rc <- clump(r, directions = 4)
clump_id <- getValues(rc)
xy <- xyFromCell(rc,1:ncell(rc))
df <- data.frame(xy, clump_id, is_clump = rc[] %in% freq(rc, useNA = 'no')[,1])
df[df$is_clump == T, ]
to get
and maybe clump 'centroids'
dfm <- ddply(df[df$is_clump == T, ], .(clump_id), summarise, xm = mean(x), ym = mean(y))
plot(rc)
text(dfm[, 2:3], labels = dfm$clump_id)
Notes:
There will be an error if you try to use clump() without first
detach modeltools library. modeltools is called by coin and maybe
other statistical libraries.
You could take the mean of the coordinates of each patch:
# some dummy data
m <- matrix(c(
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,4,4,0,
0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,4,4,0,
0,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,4,4,0,
0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,
0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
0,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,
0,0,2,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), nrow=20, byrow=T)
# create a raster
r <- raster(m)
# convert raster to points
p <- data.frame(rasterToPoints(r))
# filter out packground
p <- p[p$layer > 0,]
# for each patch calc mean coordinates
sapply(split(p[, c("x", "y")], p$layer), colMeans)

Resources