Length of polygon in R - r

I want to calculate length of each polygon.
-Around each polygon I created points (st_sample),
-from combiantion of points I created all possible polyline,
-for polylines which are inside polygon I calucalted length,
-the longest polyline is my result (max length of poylgon).
I wrote code which got me results but it is really slow. Do you have some solution for improvment of my code? I know that with two loops I cannot expect some miracle about speed but I do not know how get results another way.
If nothing else mybe at least some alterntive solution for creating all polyline from combination of points for one polygon in one step without loop ? :)
thank you
library(sf)
library(data.table)
poly=st_read(system.file("shape/nc.shp", package="sf"))
poly=poly[1:10,]
poly=st_cast(poly,"POLYGON")
poly$max_length=0
##Combination of 10 points, withot repetiton
aa=CJ(1:10,1:10)
aa=aa[!duplicated(t(apply(aa[,.(V1, V2)], 1, sort))),][V1!=V2]
##for each polygon create sample of coordinates along line, from them I create polyline and calculated length for linestring which are inside polygon
for (ii in 1:nrow(poly)){
ncl=st_cast(poly[ii,],"LINESTRING")
##sample of point along line
ncp=st_cast(st_sample(ncl,10, type="regular", exact=T),"POINT")
##create empty sf
aaa=st_sf(st_sfc())
st_crs(aaa)="NAD27"
##for each combination of points create linestring and calculate length only for polylines which are inside polygon
for (i in 1:nrow(aa)){
aaa=rbind(aaa,st_sf(geometry=st_cast(st_union(ncp[t(aa[i])]),"LINESTRING")))
}
poly$max_length[ii]=as.numeric(max(st_length(aaa[unlist(st_contains(poly[ii,],aaa)),])))
}
Second attempt with running function inside data.table. One loop less but problem is probably second loop.
poly=st_read(system.file("shape/nc.shp", package="sf"))
poly=poly[1:10,]
poly=st_cast(poly,"POLYGON")
poly$max_length=0
##Combination of 10 points, withot repetiton
aa=CJ(1:10,1:10)
aa=aa[!duplicated(t(apply(aa[,.(V1, V2)], 1, sort))),][V1!=V2]
overFun <- function(x){
ncl=st_cast(x[,geometry],"LINESTRING")
##sample of point along line
ncp=st_cast(st_sample(ncl,40, type="regular", exact=T),"POINT")
##create empty sf
aaa=st_sf(st_sfc())
st_crs(aaa)="NAD27"
##for each combination pof points create linestring and calculate length
for (i in 1:nrow(aa)){
aaa=rbind(aaa,st_sf(geometry=st_cast(st_union(ncp[t(aa[i])]),"LINESTRING")))
}
as.numeric(max(st_length(aaa[unlist(st_contains(x[,geometry],aaa)),])))}
setDT(poly)
##run function inside data.table
poly[,max_length:=overFun(poly), by=seq(nrow(poly))]
Edit: I found some solution for my problem which is enough fast for my needs.
Using parallel library inside data.table with function which also work on a data.table. There is still question why some polyline are excluded with function st_contains (see picture upper). Maybe some problem with precision?
library(sf)
library(data.table)
poly=st_read(system.file("shape/nc.shp", package="sf"))
poly=st_cast(poly,"POLYGON")
setDT(poly)
##Combination of 10 points, withot repetiton
aa=CJ(1:10,1:10)
aa=aa[!duplicated(t(apply(aa[,.(V1, V2)], 1, sort))),][V1!=V2]
overFun <- function(x){
ncl=st_cast(poly[1,geometry],"LINESTRING")
##sample of point along line
ncp=st_cast(st_sample(ncl,10, type="regular", exact=T),"POINT")
df=data.table(ncp[aa[,V1]],ncp[aa[,V2]] )
df[,v3:=st_cast(st_union(st_as_sf(V1),st_as_sf(V2)),"LINESTRING"), by=seq(nrow(df))]
as.numeric(max(st_length(df[unlist(st_contains(poly[1,geometry], df$v3)),]$v3)))}
library(parallel)
cl <- makeCluster(detectCores() - 1)
clusterExport(cl, list("overFun","data.table","st_cast","CJ","poly","st_sample","st_sf","st_sfc","aa","st_length","st_union",
"st_as_sf","st_contains"))
system.time(poly[,c("max_length"):=.(clusterMap(cl, overFun, poly$geometry)),])
stopCluster(cl)

I encountered a similar problem and frankly have not found any ready-made solution.
I will use the same Ashe county from sf package.
library(sf)
library(dplyr)
shape <- st_read(system.file("shape/nc.shp", package="sf")) %>%
dplyr::filter(CNTY_ID == 1825) %>% # Keep only one polygon
st_transform(32617) # Reproject to WGS 84 / UTM zone 17N
Solution 1
What you can do with just dplyr, tidyr, and sf is to turn polygons into points and calculate the distance between all the points. From this variety, choose the maximal value. It would be a green line from your example figure.
library(tidyr)
shape %>%
st_cast("POINT") %>% # turn polygon into points
distinct() %>% # remove duplicates
st_distance() %>% # calculate distance matrix
as.data.frame() %>%
gather(point_id, dist) %>% # convert to long format
pull(dist) %>% # keep only distance column
max()
#> Warning in st_cast.sf(., "POINT"): repeating attributes for all sub-geometries
#> for which they may not be constant
#> 45865.15 [m]
Solution 2
You can also use the Momocs package. It was created for 2D morphometric analysis. While it wasn't essential to reproject our shape to UTM in the first case (sf can handle geographic coordinates), your polygon should be projected in the case of the Momocs package.
library(Momocs)
shape %>%
st_cast("POINT") %>% # Polygon to points
distinct() %>% # remove duplicates
st_coordinates() %>% # get coordinates matrix
coo_calliper() # calculate max length
#> Warning in st_cast.sf(., "POINT"): repeating attributes for all sub-geometries
#> for which they may not be constant
#> [1] 45865.15
Comments
There are several other functions in the Momocs package. For example, you can calculate the length of a shape based on their iniertia axis i.e. alignment to the x-axis. The coo_length will return you 44432.02 [m].
For example, one can apply several functions from the Momocs package to the coordinate matrix as following:
point_matrix <- shape %>%
st_cast("POINT") %>%
distinct() %>%
st_coordinates()
#> Warning in st_cast.sf(., "POINT"): repeating attributes for all sub-geometries
#> for which they may not be constant
funs <- list("length" = coo_length,
"width" = coo_width,
"elongation" = coo_elongation)
sapply(funs, function(fun, x) fun(x), x = point_matrix)
#> length width elongation
#> 4.443202e+04 3.921162e+04 1.174917e-01

If you are after circumference of your polygons consider this code:
library(sf)
library(dplyr)
shape <- st_read(system.file("shape/nc.shp", package="sf")) # included with sf package
lengths <- shape %>%
mutate(circumference = st_length(.)) %>%
st_drop_geometry() %>%
select(NAME, circumference)
head(lengths)
NAME circumference
1 Ashe 141665.4 [m]
2 Alleghany 119929.0 [m]
3 Surry 160497.7 [m]
4 Currituck 301515.3 [m]
5 Northampton 211953.8 [m]
6 Hertford 160892.0 [m]
If you have some holes inside and do not want them included in the circumference consider removing them via nngeo::st_remove_holes().

Related

How to make an equal area grid in r using the terra package

I want to make an equal area grid (400 square miles per grid cell) over Wisconsin. I am doing this using the code from this link: Creating an equal distance spatial grid in R.
But, this code isn't very flexible, and I also need the grid to be more than just polygons. I need it to be a shapefile. I like the Terra package, but am unable to figure out how to do this in the terra package. The WI shapefile can be downloaded from https://data-wi-dnr.opendata.arcgis.com/datasets/wi-dnr::wisconsin-state-boundary-24k/explore.
My code looks like this:
library(sf)
library(terra)
library(tidyverse)
wi_shape <- vect('C:\\Users\\ruben\\Downloads\\Wisconsin_State_Boundary_24K\\Wisconsin_State_Boundary_24K.shp')
plot(wi_shape)
wi_grid <- st_make_grid(wi_shape, square = T, cellsize = c(20 * 1609.344, 20 * 1609.344))
plot(wi_grid, add = T)
How do I define a grid that is centered on a lat/lon point, where the output is a shapefile that contains attributes for each grid cell? I'm not sure why this is so confusing to me. Thank you.
If your goal is to make a raster based on the extent of another spatial dataset (polygons in this case) you can do
library(terra)
wi <- vect('Wisconsin_State_Boundary_24K.shp')
r <- rast(wi, res=(20 * 1609.344))
You can turn these into polygons and write them to a file with
v <- as.polygons(r)
writeVector(v, "test.shp")
To define a lon/lat center for the grid, you could do the following.
Coordinates of an example lon/lat point projected to the crs of your polygons (Wisconsin Transverse Mercator).
center <- cbind(-90, 45) |> vect(crs="+proj=longlat")
cprj <- crds(project(center, wi))
res <- 20 * 1609.344
Create a single cells around that point and expand the raster:
e <- rep(cprj, each=2) + c(-res, res) / 2
x <- rast(ext(e), crs=crs(wi), ncol=1, nrow=1)
x <- extend(x, wi, snap="out")
The result
plot(as.polygons(x), border="blue")
lines(wi, col="red")
points(cprj, pch="x", cex=2)
I should also mention that you are not using an equal-area coordinate reference system. You can see the variation in cell sizes with
a <- cellSize(x)
But it is very small (less than 1%) relative to the average cell size
diff(minmax(a))
# area
#max 1690441
global(a, mean)
# mean
#area 1036257046
Let's try to tidy this a little bit.
[...] and I also need the grid to be more than just polygons. I need it to be a shapefile.
It's exactly the other way around from my point of view. Once you obtained a proper representation of a polygon, you can export it in whatever format you like (which is supported), e.g. an ESRI Shapefile.
I like the Terra package, but am unable to figure out how to do this in the terra package.
Maybe you did not notice, but actually you are not really using {terra} to create your grid, but {sf} (with SpatVector input from terra, which is accepted here).
library(sf)
#> Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(terra)
#> terra 1.6.33
wi_shape <- vect('Wisconsin_State_Boundary_24K.shp')
class(wi_shape)
#> [1] "SpatVector"
#> attr(,"package")
#> [1] "terra"
wi_grid <- st_make_grid(wi_shape, square = T, cellsize = c(20 * 1609.344, 20 * 1609.344))
class(wi_grid)
#> [1] "sfc_POLYGON" "sfc"
It's a minor adjustment, but basically, you can cut this dependency here for now. Also - although I'm not sure is this is the type of flexibility you are looking for - I found it very pleasing to work with {units} recently if you are about to do some conversion stuff like square miles in meters. In the end, once your code is running properly, you can substitute your hardcoded values by variables step by step and wrap a function out of this. This should not be a big deal in the end.
In order to shift your grid to be centered on a specific lat/lon point, you can leverage the offset attribute of st_make_grid(). However, since this only shifts the grid based on the original extent, you might lose coverage with this approach:
library(sf)
#> Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
wi_shape <- read_sf("Wisconsin_State_Boundary_24K.shp")
# area of 400 square miles
A <- units::as_units(400, "mi^2")
# boundary length in square meters to fit the metric projection
b <- sqrt(A)
units(b) <- "m"
# let's assume you wanted your grid to be centered on 45.5° N / 89.5° W
p <- c(-89.5, 45.5) |>
st_point() |>
st_sfc(crs = "epsg:4326") |>
st_transform("epsg:3071") |>
st_coordinates()
p
#> X Y
#> 1 559063.9 558617.2
# create an initial grid for centroid determination
wi_grid <- st_make_grid(wi_shape, cellsize = c(b, b), square = TRUE)
# determine the centroid of your grid created
wi_grid_centroid <- wi_grid |>
st_union() |>
st_centroid() |>
st_coordinates()
wi_grid_centroid
#> X Y
#> 1 536240.6 482603.9
# this should be your vector of displacement, expressed as the difference
delta <- wi_grid_centroid - p
delta
#> X Y
#> 1 -22823.31 -76013.3
# `st_make_grid(offset = ...)` requires lower left corner coordinates (x, y) of the grid,
# so you need some extent information which you can acquire via `st_bbox()`
bbox <- st_bbox(wi_grid)
# compute the adjusted lower left corner
llc_new <- c(st_bbox(wi_grid)["xmin"] + delta[1], st_bbox(wi_grid)["ymin"] + delta[2])
# create your grid with an offset
wi_grid_offset <- st_make_grid(wi_shape, cellsize = c(b, b), square = TRUE, offset = llc_new) |>
st_as_sf()
# append attributes
n <- dim(wi_grid_offset)[1]
wi_grid_offset[["id"]] <- paste0("A", 1:n)
wi_grid_offset[["area"]] <- st_area(wi_grid_offset) |> as.numeric()
# inspect
plot(st_geometry(wi_shape))
plot(st_geometry(wi_grid_offset), border = "red", add = TRUE)
If you wanted to export your polygon features ("grid") in shapefile format, simply make use of st_write(wi_grid_sf, "wi_grid_sf.shp").
PS: For this example you need none of the tidyverse stuff, so there is no need to load it.

Nearest adjacent polygon, distance and closest point to other polygons

I have multiple polygons in a dataset and I would like to:
Identify the nearest polygon to each polygon and what the distance between them is
Calculate the coordinates of where the nearest parts of the two polygons are (so I can draw a line and visually check the distances)
If the distance is 800 metres of less, join the polygons together to make multipart polygons
This code does half of my first ask and I know st_distance can do the latter. I was hoping for a solution that wouldn't need for a matrix of every distance between every polygon to be generated.
library(sf)
library(dplyr)
download.file("https://drive.google.com/uc?export=download&id=1-I4F2NYvFWkNqy7ASFNxnyrwr_wT0lGF" , destfile="ProximityAreas.zip")
unzip("ProximityAreas.zip")
Proximity_Areas <- st_read("Proximity_Areas.gpkg")
Nearest_UID <- st_nearest_feature(Proximity_Areas)
Proximity_Areas <- Proximity_Areas %>%
select(UID) %>%
mutate(NearUID = UID[Nearest_UID])
Is there a method of producing two outputs 1) an appended Proximity_Areas file that included the distance and XY coorindates for the nearest points for the UID and Neatest_UID and 2) a file that looks similar to the original Proximity_Areas file, just with merged polygons if the criteria is met?
Once you have created index of nearest neighbors you can calculate the connecting lines via a sf::st_nearest_points() call.
An interesting aspect is that if you make the call on geometries (not sf, but sfc objects) you do the calculation pairwise (i.e. not in a matrix way).
The call will return linestrings, which is very helpful since you can calculate their length and have two of your objectives (nearest points & distance) at a single call...
lines <- Proximity_Areas %>%
st_geometry() %>% # extact geometry
# create a line to nearest neighbour as geometry
st_nearest_points(st_geometry(Proximity_Areas)[Nearest_UID], pairwise =T) %>%
# make sf again (so it can hold data)
st_as_sf() %>%
# add some data - start, finish, lenght
mutate(start = Proximity_Areas$UID,
end = Proximity_Areas$UID[Nearest_UID],
distance = st_length(.))
glimpse(lines)
# Rows: 39
# Columns: 4
# $ x <LINESTRING [m]> LINESTRING (273421.5 170677..., LINESTRING (265535.1 166136..., LINESTRING (265363.3 1…
# $ start <chr> "U001", "U002", "U003", "U004", "U005", "U006", "U007", "U008", "U009", "U010", "U011", "U012", "…
# $ end <chr> "U026", "U010", "U013", "U033", "U032", "U014", "U028", "U036", "U011", "U008", "U028", "U030", "…
# $ distance [m] 317.84698 [m], 579.85131 [m], 529.67907 [m], 559.96441 [m], 0.00000 [m], 80.54011 [m], 754.94311 [m…
mapview::mapview(lines)
The part about joining close objects together is a bit tricky, since you don't know how many polygons you will end up with - you can have a polygon A that is far from C, but will end up merged since both are close to B. This does not vectorize easily and you are likely to end up running a while loop. For a possible approach consider this related answer Dissolving polygons by distance - R

How to determine if a point lies within an sf geometry that spans the dateline?

Using the R package sf, I'm trying to determine whether some points occur within the bounds of a shapefile (in this case, Hawai‘i's, EEZ). The shapefile in question can be found here. Unfortunately, the boundaries of the area in question span +/-180 longitude, which I think is what's messing me up. (I read on the sf website some business about spherical geometry in the new version, but I haven't been able to get that version to install. I think the polygons I'm dealing with are sufficiently "flat" to avoid any of those issues anyway). Part of the issue seems to be that my shapefile contains multiple geometries broken up by the dateline but I'm not sure how to combine them.
How do you tell, using sf, whether some points are inside of the bounds of some object in a shapefile (that happens to span the dateline)?
I have tried various combinations of st_shift_longitude to no avail. I have also tried transforming to what I think is a planar projection (2163), and that didn't work.
Here's how I'm currently trying to do this:
library(sf)
library(maps)
library(ggplot2)
library(tidyverse)
# this is the shapefile from the link above
eez_unshifted <- read_sf("USMaritimeLimitsAndBoundariesSHP/USMaritimeLimitsNBoundaries.shp") %>%
filter(OBJECTID == 1206) %>%
st_transform(4326)
eez_shifted <- read_sf("USMaritimeLimitsAndBoundariesSHP/USMaritimeLimitsNBoundaries.shp") %>%
filter(OBJECTID == 1206) %>%
st_transform(4326) %>%
st_shift_longitude()
# four points, in and out of the geometry, on either side of the dateline
pnts <- tibble(x=c(-171.952474,176.251978,179.006220,-167.922929),y=c(25.561970,17.442716,28.463375,15.991429)) %>%
st_as_sf(coords=c('x','y'),crs=st_crs(eez_unshifted))
# these all return false for every point
st_within(pnts,eez_unshifted)
st_within(st_shift_longitude(pnts),eez_unshifted)
st_within(pnts,eez_shifted)
st_within(st_shift_longitude(pnts),eez_shifted)
# these also all return false for every point
st_intersects(pnts,eez_unshifted)
st_intersects(st_shift_longitude(pnts),eez_unshifted)
st_intersects(pnts,eez_shifted)
st_intersects(st_shift_longitude(pnts),eez_shifted)
# plot the data just to show that it looks right
wrld2 <- st_as_sf(maps::map('world2', plot=F, fill=T))
ggplot() +
geom_sf(data=wrld2, fill='gray20',color="lightgrey",size=0.07) +
geom_sf(data=eez_shifted) +
geom_sf(data=st_shift_longitude(pnts)) +
coord_sf(xlim=c(100,290), ylim=c(-60,60)) +
xlab("Longitude") +
ylab("Latitude")
The answer is to make sure the geometry you're checking against is a polygon:
> eez_poly <- st_polygonize(eez_shifted)
> st_within(pnts,eez_poly)
although coordinates are longitude/latitude, st_within assumes that they are planar
Sparse geometry binary predicate list of length 4, where the predicate was `within'
1: 1
2: (empty)
3: 1
4: (empty)

Measuring the distance of all points along a line in R (linestring in sf)

I have a smoothed line (a simplified abstraction of a coastline) that is a linestring and I want to measure the length of the line at frequent intervals along it. I can create the smoothed line, and measure its length:
library(raster)
library(sf)
library(tidyverse)
library(rnaturalearth)
library(smoothr)
# create bounding box for the line
xmin=-80
xmax=-66
ymin=24
ymax=45
bbox <- extent(xmin, xmax, ymin, ymax)
# get coarse outline of the US
usamap <- rnaturalearth::ne_countries(scale = "small", country = "united states of america", returnclass = "sf")[1] %>%
st_cast("MULTILINESTRING")
# crop to extent of the bbox and get rid of a border line that isn't coastal
bbox2 <- st_set_crs(st_as_sf(as(raster::extent(-80, -74, 42, 45.5), "SpatialPolygons")), st_crs(usamap))
cropmap <- usamap %>%
st_crop(bbox) %>%
st_difference(bbox2)
# smooth the line
smoothmap <- cropmap %>%
smoothr::smooth(method="ksmooth", smoothness=8)
# measure the line length
st_length(smoothmap) # I get 1855956m
I'm going to be "snapping" sample sites to points along this line, and I need to know how far they are along the coastline. However, I can't figure out how to measure the length of the line at intervals along the coastline. The interval size isn't terribly important so long as it's at relatively fine resolution, perhaps every 1km or every 0.01 degree.
What I would like to produce is a dataframe with x,y columns containing the lat/lon of points along the line, and a "length" column containing the distance along the line (from the origin to that point). Here are some of the things I've tried:
Iterating over bounding boxes. I tried cropping the line with smaller and smaller bounding boxes (using regular intervals in lat/lon), but because the line bends "backward" around -76, 38, some of the cropped boxes don't encompass the complete line segment that I expected. This approach works for the top right half of the line, but not for the bottom left half--that just returns lengths of zero.
Cropping the extent before implementing the smoother, and then measuring the line. Since the smoother function does not produce the same shape if it is only measuring a segment of the original line, this doesn't actually measure distance along the same line.
Getting the coordinates of the linestring with st_coordinates, trimming off one row (one point on the line), and recasting the remaining coordinates as a linestring. This approach does not produce a single linestring but instead a chain of points (since st_cast doesn't know how to connect them again), so it can't be measured normally.
It would be ideal to "edit" the geometry of smoothmap to delete one row of coordinates at a time, repeatedly measure the line, and write out the end point coordinates and the line length to a dataframe. However, I'm not sure if it's possible to edit a sf object's coordinates without turning it into a dataframe.
If I understand your question, I think you can do this:
x <- as(smoothmap, "Spatial")
g <- geom(x)
d <- pointDistance(g[-nrow(g), c("x", "y")], g[-1, c("x", "y")], lonlat=TRUE)
gg <- data.frame(g[, c('x','y')], seglength=c(d, 0))
gg$lengthfromhere <- rev(cumsum(rev(gg[,"seglength"])))
head(gg)
# x y seglength lengthfromhere
#1 -67.06494 45.00000 70850.765 1855956
#2 -67.74832 44.58805 2405.180 1785105
#3 -67.77221 44.57474 2490.175 1782700
#4 -67.79692 44.56095 2577.254 1780210
#5 -67.82248 44.54667 2666.340 1777633
#6 -67.84890 44.53189 2757.336 1774967
tail(gg)
# x y seglength lengthfromhere
#539 -79.09383 33.34224 2580.481 111543.5
#540 -79.11531 33.32753 2542.557 108963.0
#541 -79.13648 33.31306 2512.564 106420.5
#542 -79.15739 33.29874 2479.949 103907.9
#543 -79.17802 33.28460 101427.939 101427.9
#544 -80.00000 32.68751 0.000 0.0
I believe you need sf::st_line_sample:
# Transform to metric
smoothmap_utm <- st_transform(smoothmap, 3857)
# Get samples at every kilometer
smoothmap_samples <- st_line_sample(smoothmap_utm, density = 1/1000)
# Transform back to a sf data.frame
smoothmaps_points <- map(smoothmap_samples, function(x) data.frame(geometry = st_geometry(x))) %>%
map_df(as.data.frame) %>% st_sf() %>%
st_cast("POINT") %>%
st_set_crs(3857) %>%
st_transform(4326)
mapview(smoothmaps_points) + mapview(smoothmap)
Getting to your desired output:
# Function to transform sf to lon,lat
sfc_as_cols <- function(x, names = c("lon","lat")) {
stopifnot(inherits(x,"sf") && inherits(sf::st_geometry(x),"sfc_POINT"))
ret <- sf::st_coordinates(x)
ret <- tibble::as_tibble(ret)
stopifnot(length(names) == ncol(ret))
x <- x[ , !names(x) %in% names]
ret <- setNames(ret,names)
ui <- dplyr::bind_cols(x,ret)
st_set_geometry(ui, NULL)
}
smoothmaps_points_xy <- sfc_as_cols(smoothmaps_points) %>%
mutate(dist = cumsum(c(0, rep(1000, times = n() - 1))))
smoothmaps_points_xy
lon lat dist
1 -67.06836 44.99794 0
2 -67.07521 44.99383 1000
3 -67.08206 44.98972 2000
4 -67.08891 44.98560 3000
5 -67.09575 44.98149 4000
6 -67.10260 44.97737 5000
Important
But if your ultimate goal is to get points distance in a path, I would recommend checking rgeos::gProject.

how to merge a shapefile with a dataframe with latitude/longitude data

I am struggling with the following issue
I have downloaded the PLUTO NYC Manhattan Shapefile for the NYC tax lots from here https://www1.nyc.gov/site/planning/data-maps/open-data/dwn-pluto-mappluto.page
I am able to read them in sf with a simple st_read
> mydf
Simple feature collection with 42638 features and 90 fields
geometry type: MULTIPOLYGON
dimension: XY
bbox: xmin: 971045.3 ymin: 188447.4 xmax: 1010027 ymax: 259571.5
epsg (SRID): NA
proj4string: +proj=lcc +lat_1=40.66666666666666 +lat_2=41.03333333333333 +lat_0=40.16666666666666 +lon_0=-74 +x_0=300000 +y_0=0 +datum=NAD83 +units=us-ft +no_defs
First 10 features:
Borough Block Lot CD CT2010 CB2010 SchoolDist Council ZipCode FireComp PolicePrct HealthCent HealthArea
1 MN 1545 52 108 138 4000 02 5 10028 E022 19 13 3700
My problem is the following: I have a dataframe as follows
> data_frame('lat' = c(40.785091,40.785091), 'lon' = c(-73.968285, -73.968285))
# A tibble: 2 x 2
lat lon
<dbl> <dbl>
1 40.785091 -73.968285
2 40.785091 -73.968285
I would like to merge this data to the mydf dataframe above, so that I can count how many latitude/longitude observations I have within each tax lot (remember, mydf is at the tax lot granularity), and plot the corresponding map of it. I need to do so using sf.
In essence something similar to
pol <- mydf %>% select(SchoolDist)
plot(pol)
but where the counts for each tax lot come from counting how many points in my latitude/longitude dataframe fall into them.
Of course, in my small example I just have 2 points in the same tax lot, so that would just highlight one single tax lot in the whole area. My real data contains a lot more points.
I think there is an easy way to do it, but I was not able to find it.
Thanks!
This is how I would do it with arbitrary polygon and point data. I wouldn't merge the two and instead just use a geometry predicate to get the counts that you want. Here we:
Use the built in nc dataset and transform to 3857 crs, which is projected rather than lat-long (avoids a warning in st_contains)
Create 1000 random points within the bounding box of nc, using st_bbox and runif. Note that st_as_sf can turn a data.frame with lat long columns into sf points.
Use lengths(st_contains(polygons, points) to get the counts of points per polygon. sgbp objects created by a geometry predicate are basically "for each geometry in sf x, what indices of geometries in sf y satisfy the predicate". So lengths1 effectively gives the number of points that satisfy the predicate for each geometry, in this case number of points contained within each polygon.
Once the counts are in the sf object as a column, we can just select and plot them with the plot.sf method.
For your data, simply replace nc with mydf and leave out the call to tibble, instead use your data.frame with the right lat long pairs.
library(tidyverse)
library(sf)
#> Linking to GEOS 3.6.1, GDAL 2.2.3, proj.4 4.9.3
nc <- system.file("shape/nc.shp", package="sf") %>%
read_sf() %>%
st_transform(3857)
set.seed(1000)
points <- tibble(
x = runif(1000, min = st_bbox(nc)[1], max = st_bbox(nc)[3]),
y = runif(1000, min = st_bbox(nc)[2], max = st_bbox(nc)[4])
) %>%
st_as_sf(coords = c("x", "y"), crs = 3857)
plot(nc$geometry)
plot(points$geometry, add = TRUE)
nc %>%
mutate(pt_count = lengths(st_contains(nc, points))) %>%
select(pt_count) %>%
plot()
Created on 2018-05-02 by the reprex package (v0.2.0).
I tried this on your data, but the intersection is empty for the both sets of points you provided. However, the code should work.
EDIT: Simplified group_by + mutate with add_count:
mydf = st_read("MN_Dcp_Mappinglot.shp")
xydf = data.frame(lat=c(40.758896,40.758896), lon=c(-73.985130, -73.985130))
xysf = st_as_sf(xydf, coords=c('lon', 'lat'), crs=st_crs(mydf))
## NB: make sure to st_transform both to common CRS, as Calum You suggests
xysf %>%
sf::st_intersection(mydf) %>%
dplyr::add_count(LOT)
Reproducible example:
nc = sf::st_read(system.file("shape/nc.shp", package="sf"))
ncxy = sf::st_as_sf(data.frame(lon=c(-80, -80.1, -82), lat=c(35.5, 35.5, 35.5)),
coords=c('lon', 'lat'), crs=st_crs(nc))
ncxy = ncxy %>%
sf::st_intersection(nc) %>%
dplyr::add_count(FIPS)
## a better approach
ncxy = ncxy %>%
sf::st_join(nc, join=st_intersects) %>%
dplyr::add_count(FIPS)
The new column n includes the total number of points per FIPS code.
ncxy %>% dplyr::group_by(FIPS) %>% dplyr::distinct(n)
> although coordinates are longitude/latitude, st_intersects assumes
that they are planar
# A tibble: 2 x 2
# Groups: FIPS [2]
FIPS n
<fctr> <int>
1 37123 2
2 37161 1
I'm not sure why your data results in an empty intersection, but since the code works on the example above there must be a separate issue.
HT: st_join approach from this answer.

Resources