Filling road network data gaps - r

I have a motorway network with count points that can be matched to road links. However, they only match around half the osm links. The network is uni directional and it should be possible to assign data from joining links to the missing links.
I currently have a rather ugly and long solution based on a WHILE loop that sequentially fills the connecting links. However, I think a more elegant solution might be possible by using an sfnetwork or spatial lines network. The packages stplanr, sfnetwork and dodger closely match what I want to do, but all seem to focus on routing and origin destination data.
Below is a reproducible example that uses a small area of UK motorway network and removes a random sample of half the links and generates flow and speed data for the half remaining.
How do I fill in the missing links with data from either end of the missing links?
library(tidyverse)
library(mapview)
library(sf)
library(osmdata)
## define area to import osm data
x_max <- -2.31
x_min <- -2.38
y_max <- 51.48
y_min <- 51.51
##create a data frame to setup a polygon generation
df <- data.frame(X = c(x_min, x_max, x_max, x_min),
Y = c(y_max, y_max, y_min, y_min))
##generate a polygon of the area
rd_area <- df %>%
st_as_sf(coords = c("X", "Y"), crs = 4326) %>%
dplyr::summarise(geometry = st_combine(geometry)) %>%
st_cast("POLYGON")
##get osm geometry for motorway links for defined area
x <- opq(bbox = rd_area) %>%
add_osm_feature(key = c('highway'), value = c('motorway',
'motorway_link')) %>% osmdata_sf()
## extract line geometry, generate a unique segment ID and get rid of excess columns
rdz <- x$osm_lines %>%
mutate(seg_id = paste0("L", sprintf("%02d", 1:NROW(bicycle)))) %>%
select(seg_id)
## pretend we only have traffic counts and speeds for half the links
osm_dat <- rdz[c(3,4,5,7,11,14,15),]
## links without data
osm_nodat <- filter(rdz, !seg_id %in% osm_dat$seg_id)
## visualise links with data and without
mapview(osm_dat, color = "green")+mapview(osm_nodat, color = "red")
## make up some data to work with
pretend_counts <- st_centroid(osm_dat)
## assign some random annual average daily flow and speed averages
pretend_counts$aadt <- sample(200:600, nrow(pretend_counts))
pretend_counts$speed <- sample(40:80, nrow(pretend_counts))

Here is one quick and elegant solution from the Cyipt project https://github.com/cyipt/cyipt/blob/master/scripts/prep_data/get_traffic.R
It uses the code from the get.aadt.class function and uses Voroni polygons to give the flows and speeds to the nearest roads. However, it doesn't distribute, i.e. split the flows where one links meets two and it sometimes results in opposing directions having the same flows and speeds.
library(dismo) ## dismo package for voroni polygon generation
#Make voronoi polygons and convert to SF
voronoi <- dismo::voronoi(x = st_coordinates(pretend_counts))
voronoi <- as(voronoi, "sf")
st_crs(voronoi) <- st_crs(pretend_counts)
#Find Intersections of roads with vernoi polygons
inter <- st_intersects(osm_nodat,voronoi)
#Get aadt and ncycle values
osm_nodat$aadt <- as.numeric(lapply(1:nrow(osm_nodat),function(x){as.numeric(round(mean(pretend_counts$aadt[inter[[x]]])),0)}))
osm_nodat$speed <- as.numeric(lapply(1:nrow(osm_nodat),function(x){as.numeric(round(mean(pretend_counts$speed[inter[[x]]])),0)}))
#Remove Unneded Data
all_osm <- as.data.frame(rbind(osm_dat, osm_nodat))
st_geometry(all_osm) <- all_osm$geometry
flows <- dplyr::select(all_osm, aadt)
mapview(flows)

Related

Create circular areas around a coordinate

I need to create circular areas around a coordinate to identify points inside this circular area. Any ideas how can I do this the easiest way in R?
You can do this with the sf package. Will need to bring in your data, this differs if you have a spatail data file, or are getting your data from tables. Then buffer your co-ordinate to get your circular polygon, then intersect that against your points ot get only those that overlap.
library(sf)
library(tidyverse)
##if from table data###
CO_ORD <- st_as_sf(x = "co-ord-data-file.csv", coords=c("Long", "Lat"))
POINTS <- st_as_sf(x = "points-data-file.csv", coords=c("Long", "Lat"))
###if from spatail data file####
CO_ORD <- st_read(dsn="co-ord-spatial-data.gdb", layer="co-ord-layer")
POINTS <- st_read(dsn="poins-spatial-data.gdb", layer="points-layer")
CO_ORD_BUFF <- st_buffer(x = CO_ORD, dist=500)
OVERLAPPING_POINTS <- st_intersection(POINTS, CO_ORD_BUFF)

R: Heading/ Direction of Travel/ bearing from .gpx file - tmaptools

Using tmaptools package in R - How can I extract the 'Bearing' information from a .GPX track file. This appears in Garmin Basecamp but does not appear using tmaptools::read_GPX. Currently I use the below code. But surely there is a simpler way? Link to GPS Track: https://www.dropbox.com/s/02p3yyjkv9fmrni/Barron_Thomatis_2019_EOD.gpx?dl=0
library(tmaptools)
library(tmap)
library(sf)
library(tidyverse)
library(geosphere)
GPSTrack <- read_GPX("Barron_Thomatis_2019_EOD.gpx", layers = "track_points", as.sf = TRUE)
#
#Adjust GPS Track Data
#
#Extract Lat & Lon from Track geometery (c(lat, Lon))
GPSTrack_Pts <- st_coordinates(GPSTrack)
#Add X, Y Columns to Track
GPSTrack2 <- cbind(GPSTrack, GPSTrack_Pts)
#Create a coordinate vector by combining X & Y
coords <- cbind(GPSTrack2$X,GPSTrack2$Y)
#Convert GPS Track into SpatialPoints format for calculating Bearing
GPSTrack_SpPts <- SpatialPoints(coords)
#Create GPS Point Bearing, GPP point distance & GPS Time interval columns
empty <- st_as_sfc("POINT(EMPTY)")
GPSTrack2 <- GPSTrack2 %>%
st_set_crs(4326) %>% # will use great circle distance
mutate(
Bearing = bearing(coords))
#Convert Bearing to Course and Add as column
GPSTrack2 <- GPSTrack2 %>%
mutate(course = (Bearing + 360) %% 360) # add full circle, i.e. +360, and determine modulo for 360
I suggest you use lwgeom::st_geod_azimuth() for this task - it makes for somewhat more concise code.
Note that there is a challenge when adding the vector of bearings back to the spatial dataframe of points; it has by definition one element less than is the number of rows (you need two points to define a bearing).
One possibility of achieving that - if required - is by concatenating the vector with a single NA value representing the bearing of the very last point. By definition it has no azimuth, as there is no following point.
The azimuth values are objects of class units, originally in radians. Should the class create a problem (as it does with concatenating with the NA) you can easily convert it to a plain number via units::drop_units().
library(sf)
library(dplyr)
library(lwgeom)
points <- st_read("Barron_Thomatis_2019_EOD.gpx",
layer = "track_points",
quiet = T,
stringsAsFactors = F)
points <- points %>%
mutate(bearing = c(lwgeom::st_geod_azimuth(.) %>% units::drop_units(), NA))

sp::over(). Does the dot belong to one of the polygons identified with an OGRGeoJSON file?

I'm trying to get a boolleans vector, where for example, v[i] =1 tells me if an i-th point (latitude longitude pair, present inside a train dataframe) falls within one of the geographical areas identified by an OGRGeoJSON file.
The OGR file is structured roughly like this:
District 1: 24 polygonal
District 2: 4 polygonal
District 3: 27 polygonal
District 4: 18 polygonal
District 5: 34 polygonal
That's what I tried to do.
However, the results obtained are not correct because the polygonal that is generated is a mix of all the various areas present in the OGR file.
library(rgdal)
library(httr)
library(sp)
r <- GET('https://data.cityofnewyork.us/api/geospatial/tqmj-j8zm?method=export&format=GeoJSON')
nyc_neighborhoods <- readOGR(content(r,'text'), 'OGRGeoJSON', verbose = F)
#New York City polygonal
pol_lat <- c(nyc_neighborhoods_df$lat)
pol_long <- c(nyc_neighborhoods_df$long)
xy <- cbind(pol_lat, pol_long)
p = Polygon(xy)
ps = Polygons(list(p),1)
pol = SpatialPolygons(list(ps))
#Points to analyse (pair of coordinates)
ny_lat <- c(train$pickup_latitude, train$dropoff_latitude)
ny_long <- c(train$pickup_longitude, train$dropoff_longitude)
ny_coord <- cbind(ny_lat, ny_long)
pts <- SpatialPoints(ny_coord)
#Query: Does the point to analyze fall in or out NYC?
over(pts, pol, returnList = TRUE)
How can I fix this to get the correct result?
sp is an older package which is being phased out in favor of the newer "Simple Features" sf package. Let me know if you are open to using the pipe operator %>% from the magrittr package, as it works nicely with the sf package (as does dplyr and purrr).
Using sf, you could do:
library(sf)
# Replace this with the path to the geojson file
geojson_path <- "path/to/file.geojson"
boroughs <- sf::st_read(dsn = geojson_path, stringsAsFactors = FALSE)
Now making a very simple spatial point object to stand in for the "trains" data.
# Make test data.frame
test_df <-
data.frame(
# Random test point I chose, a couple of blocks from Central Park
a = "manhattan_point",
y = 40.771959,
x = -73.964128,
stringsAsFactors = FALSE)
# Turn the test_df into a spatial object
test_point <-
sf::st_as_sf(
test_df,
# The coords argument tells the st_as_sf function
# what columns store the longitude and latitude data
# which it uses to associate a spatial point to each
# row in the data.frame
coords = c("x", "y"),
crs = 4326 # WGS84
)
Now we are ready to determine what polygon(s) our point falls in:
# Get the sparse binary predicate. This will give a list with as
# many elements as there are spatial objects in the first argument,
# in this case, test_point, which has 1 element.
# It also has attributes which detail what the relationship is
# (intersection, in our case)
sparse_bin_pred <- sf::st_intersects(test_point, boroughs)
# Output the boro_name that matched. I think the package purrr
# offers some more intuitive ways to do this, but
lapply(
sparse_bin_pred,
function(x) boroughs$boro_name[x]
)
That last part outputs:
[[1]]
[1] "Manhattan"

Population density within polygons

So, I have some questions regarding the raster package in R. I have a raster with estimated population in each grid point. I also have a shapefile with polygons of regions. I want to find out the coordinates of the neighborhood with the highest population density within each regions. Supose that each neighborhood is a homogeneous square of 5 by 5 grid points.
The following toy example mimics my problem.
library(raster)
library(maptools)
set.seed(123)
data(wrld_simpl)
wrld_simpl <- st_as_sf(wrld_simpl)
contr_c_am <- wrld_simpl %>%
filter(SUBREGION ==13) %>%
filter(FIPS != "MX") %>%
select(NAME)
# Create a raster of population (sorry for the bad example spatial distribution)
r <- raster(xmn=-180, xmx=180, ymn=-90, ymx=90, res=0.1)
values(r) <- runif(ncell(r), 0, 100)
# keep only raster around the region of interest
r_small <- crop(r, extent(contr_c_am))
plot(r_small)
plot(st_geometry(contr_c_am), add = T)
raster_contr_c_am <- rasterize(contr_c_am, r)
raster_contr_c_am is the population grid and the name of the region is saved as an attribute.
Somehow I need to filter only grid points from one region, and probably use some funcion like focal() to find total nearby population.
focal(raster_contr_c_am, matrix(1,5,5),sum, pad = T, padValue = 0)
Then, I need to find which grid point has the highest value within each region, and save it's coordinates.
I hope my explanation is not too confusing,
Thanks for any help!
Here's an example that iterates over the shape defining the region, then uses the raster values within the region and the focal() function to find the maximum.
library(raster)
library(maptools)
library(sf)
library(dplyr)
set.seed(123)
data(wrld_simpl)
wrld_simpl <- st_as_sf(wrld_simpl)
contr_c_am <- wrld_simpl %>%
filter(SUBREGION ==13) %>%
filter(FIPS != "MX") %>%
select(NAME)
# Create a raster of population (sorry for the bad example spatial distribution)
r <- raster(xmn=-180, xmx=180, ymn=-90, ymx=90, res=0.1)
values(r) <- runif(ncell(r), 0, 100)
# keep only raster around the region of interest
r_small <- crop(r, extent(contr_c_am))
raster_contr_c_am <- rasterize(contr_c_am, r_small)
# function to find the max raster value using focal
# in a region
findMax <- function(region, raster) {
tt <- trim((mask(raster, region))) # focus on the region
ff <- focal(tt, w=matrix(1/25,nc=5,nr=5))
maximumCell <- which.max(ff) # find the maximum cell id
maximumvalue <- maxValue(ff) # find the maximum value
maximumx <- xFromCell(ff, maximumCell) # get the coordinates
maximumy <- yFromCell(ff, maximumCell)
# return a data frame
df <- data.frame(maximumx, maximumy, maximumvalue)
df
}
numberOfShapes <- nrow(contr_c_am)
ll <- lapply(1:numberOfShapes, function(s) findMax(region = contr_c_am[s,], raster = r_small))
merged <- do.call(rbind, ll)
maxpoints <- st_as_sf(merged, coords=c('maximumx', 'maximumy'), crs=crs(contr_c_am))
library(mapview) # optional but nice visualization - select layers to see if things look right
mapview(maxpoints) + mapview(r_small) + mapview(contr_c_am)
I've made an sf object so that it can be plotted with the other spatial objects. Using the mapview package, I get this.

Difficulty with gBuffer in R: Resulting buffer is incorrect size

The overall goal of this code is to generate random points within a circular buffer based around a single lat/long point which I will enter in as needed. My apparent issue is that the buffer generated from gBuffer is not the correct size/location and therefore the points are farther than desired from the input location.
I am attempting to create a 130 meter buffer around a point. To construct my code I have been using 44.55555, -68.55555. I am using decimal degrees lat/long as that is what my data is in.
I have tried multiple stackoverflow threads to find the answer including:
Buffer (geo)spatial points in R with gbuffer
Create buffer and count points in R
#Enter in the lat and Long
NestLat <- readline(prompt="Enter Nest Latitude:") #Use 44.55555
NestLong <- readline(prompt="Enter Nest Longitude:") #Use -68.55555
#Coordinate from text to spatial points
NestLat <- as.numeric(NestLat)
NestLong <- as.numeric(NestLong)
nestcoords <- cbind(NestLat, NestLong)
nestcoords_sp <- SpatialPoints(nestcoords, proj4string=CRS("+proj=longlat +datum=WGS84"))
nestcoords_sp <- spTransform(nestcoords_sp, CRS("+init=epsg:2960"))
#Create buffer to generate 3 random points within 130m of nest
nestbuffer130 <- gBuffer(nestcoords_sp, width = 130)
nestbuffer130 <- spTransform(nestbuffer130, CRS("+proj=longlat +datum=WGS84"))
randoms130 <- spsample(nestbuffer130, 3, type = "random")
randoms130 <- spTransform(randoms130, CRS("+proj=longlat +datum=WGS84"))
nestbuffer130spdf <- as(nestbuffer130, "SpatialPolygonsDataFrame")
randoms130 <- as(randoms130, "SpatialPointsDataFrame")
The final buffer seems to be a circle with radius of 335 and not placed in the correct location spatially.
How are you measuring radius? The code seems to work just fine if you just want the sample points in the right location. Here is your code slightly modified with the gDistance function showing that your points are within the buffer zone. SF is now the preferred spatial package for R and the syntax is clearer and easier. I added what your code would look like with the SF package.
library(rgeos)
library(sp)
#Enter in the lat and Long
NestLat <- 44.55555
NestLong <- -68.55555
#Coordinate from text to spatial points
NestLat <- as.numeric(NestLat)
NestLong <- as.numeric(NestLong)
nestcoords <- cbind(NestLat, NestLong)
nestcoords_sp <- SpatialPoints(nestcoords, proj4string=CRS("+proj=longlat +datum=WGS84"))
nestcoords_sp <- spTransform(nestcoords_sp, CRS("+init=epsg:2960"))
#Create buffer to generate 3 random points within 130m of nest
nestbuffer130 <- gBuffer(nestcoords_sp, width = 130)
randoms130 <- spsample(nestbuffer130, 3, type = "random")
nestbuffer130spdf <- as(nestbuffer130, "SpatialPolygonsDataFrame")
randoms130 <- as(randoms130, "SpatialPointsDataFrame")
# measure distance
gDistance(randoms130, nestcoords_sp, byid = T)
SF
library(sf)
# turn coordinates into spatial poitns using sf
NestLat <- 44.55555
NestLong <- -68.55555
nestPoints <- st_point(c(NestLong,NestLat)) %>%
st_sfc(crs = 4326) %>%
st_transform(crs = 2960)
mapview(randoms130) + nestPoints
#Create buffer to generate 3 random points within 130m of nest
nestbuffer130 <- st_buffer(nestPoints, dist = 130)
randoms130 <- st_sample(nestbuffer130, 3)
# measure distance between points
st_distance(nestPoints,randoms130)
nestbuffer130SF <- st_sf(data = data.frame(ID = 1:length(randoms130)),
geometry = randoms130, crs = st_crs(randoms130)) %>%
st_transform(crs = 4326)
# check data visually
library(mapview)
mapview(nestbuffer130SF) + nestPoints

Resources