How can i connect two linestrings?
It is possible to lay a very slim buffer around the lines and then connect them like so:
one_line <- lines %>%
st_buffer(0.05) %>%
st_intersection() %>%
st_union() %>%
st_cast('LINESTRING')
There are 2 problems with this:
a) below is a very small subset of my data containing one such disconnected line segment - if i use the above method on the small part it forms a complete polygon which, when converted to a linestring just makes a very narrow loop
b) if i use the whole data set it kind of works but creates lines at the approximate distance of the buffer around my original line. See picture below:
Blue & red are the edge lines while black would be the original.
I thought to simply average them out but when i convert the 2 lines to coordinates (st_coordinates()), the resulting tables have different lengths and are not in order.
I looked around but did not really find any useful answers.
Here is a dput of the geometry data:
lines <- structure(list(structure(list(structure(c(2880, 2880.92, 2881.72,
2882.47, 2883.17, 2883.84, 2884.5, 2894.05, 2894.69, 2895.29393034826,
340255.362641509, 340257.22, 340259.03, 340260.85, 340262.69,
340264.55, 340266.4, 340293.7, 340295.61, 340297.500995024), .Dim = c(10L,
2L)), structure(c(2907.22402724177, 2914.21353757771, 340330.886392736,
340350.2), .Dim = c(2L, 2L))), class = c("XY", "MULTILINESTRING",
"sfg")), structure(c(2895.3, 2896.82, 2897.26, 2897.72, 2907.2,
340297.52, 340302.26, 340303.58, 340304.89, 340330.82), .Dim = c(5L,
2L), class = c("XY", "LINESTRING", "sfg"))), n_empty = 0L, crs = structure(list(
input = "EPSG:31256", wkt = "PROJCRS[\"MGI / Austria GK East\",\n BASEGEOGCRS[\"MGI\",\n DATUM[\"Militar-Geographische Institut\",\n ELLIPSOID[\"Bessel 1841\",6377397.155,299.1528128,\n LENGTHUNIT[\"metre\",1]]],\n PRIMEM[\"Greenwich\",0,\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n ID[\"EPSG\",4312]],\n CONVERSION[\"Austria Gauss-Kruger East\",\n METHOD[\"Transverse Mercator\",\n ID[\"EPSG\",9807]],\n PARAMETER[\"Latitude of natural origin\",0,\n ANGLEUNIT[\"degree\",0.0174532925199433],\n ID[\"EPSG\",8801]],\n PARAMETER[\"Longitude of natural origin\",16.3333333333333,\n ANGLEUNIT[\"degree\",0.0174532925199433],\n ID[\"EPSG\",8802]],\n PARAMETER[\"Scale factor at natural origin\",1,\n SCALEUNIT[\"unity\",1],\n ID[\"EPSG\",8805]],\n PARAMETER[\"False easting\",0,\n LENGTHUNIT[\"metre\",1],\n ID[\"EPSG\",8806]],\n PARAMETER[\"False northing\",-5000000,\n LENGTHUNIT[\"metre\",1],\n ID[\"EPSG\",8807]]],\n CS[Cartesian,2],\n AXIS[\"northing (X)\",north,\n ORDER[1],\n LENGTHUNIT[\"metre\",1]],\n AXIS[\"easting (Y)\",east,\n ORDER[2],\n LENGTHUNIT[\"metre\",1]],\n USAGE[\n SCOPE[\"Engineering survey, topographic mapping.\"],\n AREA[\"Austria east of 14°50'E of Greenwich (32°30'E of Ferro).\"],\n BBOX[46.56,14.83,49.02,17.17]],\n ID[\"EPSG\",31256]]"), class = "crs"), idx = structure(c(1,
2, 1, 1), .Dim = c(2L, 2L)), class = c("sfc_GEOMETRY", "sfc"), precision = 0, bbox = structure(c(xmin = 2880,
ymin = 340255.362641509, xmax = 2914.21353757771, ymax = 340350.2
), class = "bbox"), classes = c("MULTILINESTRING", "LINESTRING"
))
For the above example, you could cast to MULTIPOINT, then union, and cast to LINESTRING.
``` r
library(tidyverse) #overkill, but easier
library(sf)
library(patchwork) #to plot side-by-side
# load data from above
# lines <-
single_line <- lines %>%
st_as_sf() %>%
st_cast('MULTIPOINT') %>%
st_union() %>%
st_cast('LINESTRING')
head(single_line)
#> Geometry set for 1 feature
#> Geometry type: LINESTRING
#> Dimension: XY
#> Bounding box: xmin: 2880 ymin: 340255.4 xmax: 2914.214 ymax: 340350.2
#> Projected CRS: MGI / Austria GK East
#> LINESTRING (2880 340255.4, 2880.92 340257.2, 28...
p1 <- ggplot() +
geom_sf(data = st_as_sf(lines), col = c('red', 'blue')) + ggtitle('lines')
p2 <- ggplot() + geom_sf(data = single_line, col = 'black') + ggtitle('lines cast & unioned')
p3 <- p1 + p2
p3
Created on 2022-03-08 by the reprex package (v0.3.0)
This is an incomplete answer because it still requires some manual input but it can be generalized by implementing a few rules.
The idea is to find the places where the line is broken and then go to the start/endpoints of the closest line. Then make a line segment bridging the break and combining all the line segments.
library(lwgeom)
library(tidyverse)
library(sf)
line1 <- lines %>%
st_sf() %>%
st_combine() %>%
st_sf() %>%
st_line_merge() %>%
st_cast("LINESTRING")
### this is still some manual work which needs to be improved
newline1 <- c(st_startpoint(line1[3,]), st_endpoint(line1[2,])) %>%
st_combine() %>%
st_cast("MULTIPOINT") %>%
st_union() %>%
st_cast("LINESTRING") %>%
st_sf()
newline2 <- c(st_startpoint(line1[2,]), st_endpoint(line1[1,])) %>%
st_combine() %>%
st_cast("MULTIPOINT") %>%
st_union() %>%
st_cast("LINESTRING") %>%
st_sf()
line1[nrow(line1)+1,]<-newline1
line1[nrow(line1)+1,]<-newline2
###
line1_uni <- line1 %>%
st_sf() %>%
st_combine() %>%
st_sf() %>%
st_line_merge() %>%
st_cast("LINESTRING") %>%
st_sf()
line1_uni
Simple feature collection with 1 feature and 0 fields
Geometry type: LINESTRING
Dimension: XY
Bounding box: xmin: 2880 ymin: 340255.4 xmax: 2914.214 ymax: 340350.2
Projected CRS: MGI / Austria GK East
geometry
1 LINESTRING (2880 340255.4, ...
I've got a table in Postgis which contains the field beginpunt, a geometry field containing a single point.
Next I'll create a 100*100m square which contains the point.
In Postgis I'll use the query below and it works great.
select
*
,st_transform(
ST_MakeEnvelope(floor(st_x(st_transform(beginpunt,28992))/100) *100,
floor(st_y(st_transform(beginpunt,28992))/100) *100,
floor(st_x(st_transform(beginpunt,28992))/100) *100 +100,
floor(st_y(st_transform(beginpunt,28992))/100) *100 +100, 28992),4326) as cbs_begin
from cbs
For a specific reason I want to achieve the same without using Postgis, but do the whole excersise in R.
This is where I get stuck. I can create a single polygon, but my problem is doing it rowwise in a dataframe.
I have the same table (cbs) in R
for simplicity i created a few extra fields:
dput(cbs) #this is the input dataframe
cbs$beginpunt_rd<-st_transform(cbs$beginpunt,28992) #transform crs
cbs$begincbsx<-floor(st_coordinates(cbs$beginpunt_rd)[,1]/100)*100 #define origin x
cbs$begincbsy<-floor(st_coordinates(cbs$beginpunt_rd)[,2]/100)*100 #define origin y
cbs$pol<-NA #create an extra empty field in the dataframe
tbldata<-st_sfc() #create an empty table
for(i in 1:nrow(cbs)){
cbs_begin <- st_polygon(
list(
cbind(
rbind(cbs$begincbsx[i],cbs$begincbsx[i]+100,cbs$begincbsx[i]+100 , cbs$begincbsx[i],cbs$begincbsx[i]),
rbind(cbs$begincbsy[i],cbs$begincbsy[i],cbs$begincbsy[i]+100 , cbs$begincbsy[i]+100,cbs$begincbsy[i])
)
))
if(i<2){tbldata<-cbs[FALSE,]} #create empty dataframe
tbldata[nrow(tbldata) + 1, ] <- c(cbs[i,1:5],st_sfc(st_polygon(cbs_begin),crs=28992)) #add row to dataframe
}
dput(tbldata) #this is the output dataframe
The code above gives me a (new) dataframe with a field "pol" containing a list with the right coordinates,
but it's missing the step to make it a polygon or sf object.
I've seen some examples of how it could be done, but I can't get it to work
Edit: the input-table cbs
dput(cbs) result is:
structure(list(id = c(9594L, 9520L, 9492L, 83859L,
9438L, 9490L), beginpunt = structure(list(structure(c(5.0499337,
51.5676501), class = c("XY", "POINT", "sfg")), structure(c(5.0146573,
51.5818484), class = c("XY", "POINT", "sfg")), structure(c(5.08459,
51.557595), class = c("XY", "POINT", "sfg")), structure(c(5.0129685,
51.5865527), class = c("XY", "POINT", "sfg")), structure(c(5.0548874,
51.5164541), class = c("XY", "POINT", "sfg")), structure(c(5.0647628,
51.5812475), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT",
"sfc"), precision = 0, bbox = structure(c(xmin = 5.0129685, ymin = 51.5164541,
xmax = 5.08459, ymax = 51.5865527), class = "bbox"), crs = structure(list(
input = "EPSG:4326", wkt = "GEOGCRS[\"WGS 84\",\n DATUM[\"World Geodetic System 1984\",\n ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n LENGTHUNIT[\"metre\",1]]],\n PRIMEM[\"Greenwich\",0,\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n CS[ellipsoidal,2],\n AXIS[\"geodetic latitude (Lat)\",north,\n ORDER[1],\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n AXIS[\"geodetic longitude (Lon)\",east,\n ORDER[2],\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n USAGE[\n SCOPE[\"Horizontal component of 3D system.\"],\n AREA[\"World.\"],\n BBOX[-90,-180,90,180]],\n ID[\"EPSG\",4326]]"), class = "crs"), n_empty = 0L)), row.names = c(NA,
6L), class = "data.frame")
and the output/result table: tbldata
dput(tbldata)
structure(list(id = c(9594L, 9520L, 9492L, 83859L,
9438L, 9490L), beginpunt = structure(list(structure(c(5.0499337,
51.5676501), class = c("XY", "POINT", "sfg")), structure(c(5.0146573,
51.5818484), class = c("XY", "POINT", "sfg")), structure(c(5.08459,
51.557595), class = c("XY", "POINT", "sfg")), structure(c(5.0129685,
51.5865527), class = c("XY", "POINT", "sfg")), structure(c(5.0548874,
51.5164541), class = c("XY", "POINT", "sfg")), structure(c(5.0647628,
51.5812475), class = c("XY", "POINT", "sfg"))), precision = 0, bbox = structure(c(xmin = 5.0499337,
ymin = 51.5676501, xmax = 5.0499337, ymax = 51.5676501), class = "bbox"), crs = structure(list(
input = "EPSG:4326", wkt = "GEOGCRS[\"WGS 84\",\n DATUM[\"World Geodetic System 1984\",\n ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n LENGTHUNIT[\"metre\",1]]],\n PRIMEM[\"Greenwich\",0,\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n CS[ellipsoidal,2],\n AXIS[\"geodetic latitude (Lat)\",north,\n ORDER[1],\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n AXIS[\"geodetic longitude (Lon)\",east,\n ORDER[2],\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n USAGE[\n SCOPE[\"Horizontal component of 3D system.\"],\n AREA[\"World.\"],\n BBOX[-90,-180,90,180]],\n ID[\"EPSG\",4326]]"), class = "crs"), classes = character(0), n_empty = 0L, class = c("sfc_POINT",
"sfc")), beginpunt_rd = structure(list(structure(c(131616.180272543,
397688.855374183), class = c("XY", "POINT", "sfg")), structure(c(129178.446528786,
399280.343643684), class = c("XY", "POINT", "sfg")), structure(c(134014.355256952,
396559.663404012), class = c("XY", "POINT", "sfg")), structure(c(129064.081985984,
399804.302704657), class = c("XY", "POINT", "sfg")), structure(c(131933.668374674,
391991.666781811), class = c("XY", "POINT", "sfg")), structure(c(132651.016139436,
399196.932221466), class = c("XY", "POINT", "sfg"))), precision = 0, bbox = structure(c(xmin = 131616.180272543,
ymin = 397688.855374183, xmax = 131616.180272543, ymax = 397688.855374183
), class = "bbox"), crs = structure(list(input = "EPSG:28992",
wkt = "PROJCRS[\"Amersfoort / RD New\",\n BASEGEOGCRS[\"Amersfoort\",\n DATUM[\"Amersfoort\",\n ELLIPSOID[\"Bessel 1841\",6377397.155,299.1528128,\n LENGTHUNIT[\"metre\",1]]],\n PRIMEM[\"Greenwich\",0,\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n ID[\"EPSG\",4289]],\n CONVERSION[\"RD New\",\n METHOD[\"Oblique Stereographic\",\n ID[\"EPSG\",9809]],\n PARAMETER[\"Latitude of natural origin\",52.1561605555556,\n ANGLEUNIT[\"degree\",0.0174532925199433],\n ID[\"EPSG\",8801]],\n PARAMETER[\"Longitude of natural origin\",5.38763888888889,\n ANGLEUNIT[\"degree\",0.0174532925199433],\n ID[\"EPSG\",8802]],\n PARAMETER[\"Scale factor at natural origin\",0.9999079,\n SCALEUNIT[\"unity\",1],\n ID[\"EPSG\",8805]],\n PARAMETER[\"False easting\",155000,\n LENGTHUNIT[\"metre\",1],\n ID[\"EPSG\",8806]],\n PARAMETER[\"False northing\",463000,\n LENGTHUNIT[\"metre\",1],\n ID[\"EPSG\",8807]]],\n CS[Cartesian,2],\n AXIS[\"easting (X)\",east,\n ORDER[1],\n LENGTHUNIT[\"metre\",1]],\n AXIS[\"northing (Y)\",north,\n ORDER[2],\n LENGTHUNIT[\"metre\",1]],\n USAGE[\n SCOPE[\"Engineering survey, topographic mapping.\"],\n AREA[\"Netherlands - onshore, including Waddenzee, Dutch Wadden Islands and 12-mile offshore coastal zone.\"],\n BBOX[50.75,3.2,53.7,7.22]],\n ID[\"EPSG\",28992]]"), class = "crs"), classes = character(0), n_empty = 0L, class = c("sfc_POINT",
"sfc")), begincbsx = c(131600, 129100, 134000, 129000, 131900,
132600), begincbsy = c(397600, 399200, 396500, 399800, 391900,
399100), pol = list(structure(c(131600, 131700, 131700, 131600,
131600, 397600, 397600, 397700, 397700, 397600), .Dim = c(5L,
2L)), structure(c(129100, 129200, 129200, 129100, 129100, 399200,
399200, 399300, 399300, 399200), .Dim = c(5L, 2L)), structure(c(134000,
134100, 134100, 134000, 134000, 396500, 396500, 396600, 396600,
396500), .Dim = c(5L, 2L)), structure(c(129000, 129100, 129100,
129000, 129000, 399800, 399800, 399900, 399900, 399800), .Dim = c(5L,
2L)), structure(c(131900, 132000, 132000, 131900, 131900, 391900,
391900, 392000, 392000, 391900), .Dim = c(5L, 2L)), structure(c(132600,
132700, 132700, 132600, 132600, 399100, 399100, 399200, 399200,
399100), .Dim = c(5L, 2L)))), row.names = c(NA, 6L), class = "data.frame")
The expected output is
You just have to enter the following line of code and it should work :
cbs <- st_sf(cbs, sf_column_name = "beginpunt", crs = 4326)
Output :
#> Simple feature collection with 6 features and 1 field
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: 5.012968 ymin: 51.51645 xmax: 5.08459 ymax: 51.58655
#> Geodetic CRS: WGS 84
#> id beginpunt
#> 1 9594 POINT (5.049934 51.56765)
#> 2 9520 POINT (5.014657 51.58185)
#> 3 9492 POINT (5.08459 51.55759)
#> 4 83859 POINT (5.012969 51.58655)
#> 5 9438 POINT (5.054887 51.51645)
#> 6 9490 POINT (5.064763 51.58125)
Created on 2021-09-28 by the reprex package (v2.0.1)
Edit 1
Based on #skaqqs' comment, I complete my answer without taking into account his suggestion which normally works very well... but only if the points are arranged in the right order to produce a valid polygon (which is normally the most frequent case).
The original order of the points in your dataframe does not produce a valid polygon: keeping the original order, we get intersecting lines. So, if we choose the classical solution proposed by #skaqqs and draw the polygon, here is the result:
cbs_standard <- st_combine(cbs)
(cbs_standard <- st_cast(cbs_standard, "POLYGON"))
#> Geometry set for 1 feature
#> Geometry type: POLYGON
#> Dimension: XY
#> Bounding box: xmin: 5.012968 ymin: 51.51645 xmax: 5.08459 ymax: 51.58655
#> Geodetic CRS: WGS 84
#> POLYGON ((5.049934 51.56765, 5.014657 51.58185,...
plot(cbs_standard)
To circumvent this difficulty, I suggest another solution by installing and loading the "concaveman" package.
So, please find below the (very simple) procedure to obtain a topologically valid polygon.
install.packages("concaveman")
library(concaveman)
(cbs_concaveman <- concaveman(cbs))
#> Simple feature collection with 1 feature and 0 fields
#> Geometry type: POLYGON
#> Dimension: XY
#> Bounding box: xmin: 5.013 ymin: 51.5165 xmax: 5.0846 ymax: 51.5866
#> Geodetic CRS: WGS 84
#> polygons
#> 1 POLYGON ((5.0147 51.5818, 5...
plot(cbs_concaveman)
Created on 2021-09-28 by the reprex package (v2.0.1)
Edit 2
O.K. we did not understand each other #GeoDude. To follow up on your comment, here is (I hope!) the solution to your problem. You just have to type these few lines of code and it should work:
z <- st_polygon()
for (i in seq(length(tbldata$pol))){
z[i] <- st_polygon(list(tbldata$pol[[i]]))
}
sfc <- st_sfc(z, crs = 28992)
tbldata <- st_sf(tbldata, geom = sfc, row.names = 1:length(z))
tbldata$pol <- NULL
tbldata
#> Simple feature collection with 6 features and 3 fields
#> Active geometry column: geom
#> Geometry type: POLYGON
#> Dimension: XY
#> Bounding box: xmin: 129000 ymin: 391900 xmax: 134100 ymax: 399900
#> Projected CRS: Amersfoort / RD New
#> id beginpunt beginpunt_rd begincbsx begincbsy
#> 1 9594 POINT (5.049934 51.56765) POINT (131616.2 397688.9) 131600 397600
#> 2 9520 POINT (5.014657 51.58185) POINT (129178.4 399280.3) 129100 399200
#> 3 9492 POINT (5.08459 51.55759) POINT (134014.4 396559.7) 134000 396500
#> 4 83859 POINT (5.012969 51.58655) POINT (129064.1 399804.3) 129000 399800
#> 5 9438 POINT (5.054887 51.51645) POINT (131933.7 391991.7) 131900 391900
#> 6 9490 POINT (5.064763 51.58125) POINT (132651 399196.9) 132600 399100
#> geom
#> 1 POLYGON ((131600 397600, 13...
#> 2 POLYGON ((131600 397600, 13...
#> 3 POLYGON ((131600 397600, 13...
#> 4 POLYGON ((131600 397600, 13...
#> 5 POLYGON ((131600 397600, 13...
#> 6 POLYGON ((131600 397600, 13...
Created on 2021-09-29 by the reprex package (v2.0.1)
And the expected output:
library(tmap)
tmap_mode("view")
#> tmap mode set to interactive viewing
tmap::tm_shape(tbldata) +
tm_basemap(server = "OpenStreetMap")+
tm_fill(NA) +
tm_borders(col="black")
Created on 2021-09-29 by the reprex package (v2.0.1)
Edit 3
As a follow-up to your comment #Geodude, please find the fix below. Besides, I removed the for loop to take into account #nniloc's remark (For this, I built the function polygon_list() that I have applied to tbldata$pol with sapply()).
polygon_list <- function(x){
st_polygon(list(x))
}
h <- st_polygon()
h <- sapply(tbldata$pol, polygon_list, simplify = FALSE)
sfc <- st_sfc(h, crs = 28992)
tbldata <- st_sf(tbldata, geom = sfc)
tbldata$pol <- NULL
tbldata
#> Simple feature collection with 6 features and 3 fields
#> Active geometry column: geom
#> Geometry type: POLYGON
#> Dimension: XY
#> Bounding box: xmin: 129000 ymin: 391900 xmax: 134100 ymax: 399900
#> Projected CRS: Amersfoort / RD New
#> id beginpunt beginpunt_rd begincbsx begincbsy
#> 1 9594 POINT (5.049934 51.56765) POINT (131616.2 397688.9) 131600 397600
#> 2 9520 POINT (5.014657 51.58185) POINT (129178.4 399280.3) 129100 399200
#> 3 9492 POINT (5.08459 51.55759) POINT (134014.4 396559.7) 134000 396500
#> 4 83859 POINT (5.012969 51.58655) POINT (129064.1 399804.3) 129000 399800
#> 5 9438 POINT (5.054887 51.51645) POINT (131933.7 391991.7) 131900 391900
#> 6 9490 POINT (5.064763 51.58125) POINT (132651 399196.9) 132600 399100
#> geom
#> 1 POLYGON ((131600 397600, 13...
#> 2 POLYGON ((129100 399200, 12...
#> 3 POLYGON ((134000 396500, 13...
#> 4 POLYGON ((129000 399800, 12...
#> 5 POLYGON ((131900 391900, 13...
#> 6 POLYGON ((132600 399100, 13...
And by checking one of the polygons, the problem appears to be solved:
tbldata$geom[[3]]
#> POLYGON ((134000 396500, 134100 396500, 134100 396600, 134000 396600, 134000 396500))
Created on 2021-10-01 by the reprex package (v2.0.1)
A tidyverse solution. More or less the same as #lovalery but skips the for loops.
library(tidyverse)
library(sf)
library(ggspatial)
df_sf <- tbldata %>%
mutate(pol2 = st_sfc(st_multipolygon(list(pol)))) %>%
select(-beginpunt, -beginpunt_rd) %>%
st_as_sf(crs = 28992)
# plot to verify
ggplot(df_sf) +
annotation_map_tile(zoom = 13) +
geom_sf(col = 'blue', fill = NA)
I import a csv of longitude and latitude coordinates and convert them to polygon shapefiles. I place a grid over the polygons and find the centroid of each grid square. I then extract the coordinates of the centroids and place it in a dataframe, but I need to be able to say which polygon a particular centroid is in.
#Create shapefile of polygons
polygon <- lapply(split(df, df$shape), function(x) { coords <-
as.matrix(cbind(x$longitude, x$latitude)); list(rbind(coords, coords[1,]))})
Coord_Ref <- st_crs(4326)
polygon <- st_sfc(st_multipolygon(x=polygon), crs = Coord_Ref)
polygon <- st_cast(polygon, "POLYGON")
#Create grid and centroids
PolygonBits <- st_make_grid(polygon, cellsize=0.0002)
PolygonBitCentroids <- st_centroid(st_make_grid(polygon, cellsize=0.0002))
#Extract coordinates and place them in dataframe
PolygonBitCentroids <- st_coordinates(PolygonBitCentroids)
PolygonBitCentroids <- as.data.frame(PolygonBitCentroids)
The first three rows of the PolygonBitCentroids dataframe looks as follows:
X Y
1 -0.0014 0.1990
2 -0.0012 0.1990
3 -0.0010 0.1990
But I need something like this:
X Y Shape
1 -0.0014 0.1990 Polygon 1
2 -0.0012 0.1990 Polygon 1
3 -0.0010 0.1990 Polygon 1
Reproducible data:
structure(list(shape = c("polygon 1", "polygon 1", "polygon 1",
"polygon 1", "polygon 2", "polygon 2", "polygon 2", "polygon 2",
"polygon 3", "polygon 3", "polygon 3", "polygon 3", "polygon 4",
"polygon 4", "polygon 4", "polygon 4"), longitude = c(0, 1, 1,
0, 1.5, 2, 2, 1.5, -2, -2, -1, -1, 0, 1, 1, 0), latitude = c(1,
1, 0, 0, 1, 1, 0, 0, -0.5, -2, -2, -0.5, 1.5, 1.5, 2, 2)), class =
"data.frame", row.names = c(NA,
-16L), spec = structure(list(cols = list(shape = structure(list(),
class = c("collector_character",
"collector")), longitude = structure(list(), class =
c("collector_double",
"collector")), latitude = structure(list(), class =
c("collector_double",
"collector"))), default = structure(list(), class =
c("collector_guess",
"collector")), skip = 1), class = "col_spec"))
The solution to this problem is to do point-in-polygon via st_join.
This is pretty straightforward with the tidyverse, and I'm sure you can use the following to translate to base R.
(I took the liberty to change your reproducible data slightly, since polygon 4 is not a valid polygon given that it only has 3 points):
First we create an sf from the xy dataframe
library(sf)
library(tidyverse)
polygons <- polygons %>%
st_as_sf(coords = c('longitude', 'latitude')) %>%
st_set_crs(4326)
When plotted, this looks like this
polygons <- polygons %>%
group_by(shape) %>%
summarise(do_union=FALSE) %>%
st_cast("POLYGON")
This correctly creates the polygons from the points.
a call to plot(polygons) produces the following plot:
(the do_union=FALSE argument is important because otherwise order is not preserved). Next we create a separate sf object for the grids:
grids <- polygons %>%
st_make_grid(cellsize = 0.2) %>%
st_centroid() %>%
st_sf()
Finally, we join the two sf objects using st_within`
grids %>% st_join(polygons, join = st_within)
What you get looks exactly as you asked for.
Simple feature collection with 92 features and 1 field
geometry type: POINT
dimension: XY
bbox: xmin: -1.9 ymin: -1.9 xmax: 1.9 ymax: 1.9
CRS: EPSG:4326
First 10 features:
shape geometry
1 <NA> POINT (-1.9 -1.9)
2 <NA> POINT (-1.1 -1.9)
3 <NA> POINT (-0.9 -1.9)
4 polygon 3 POINT (-1.9 -1.7)
5 <NA> POINT (-1.7 -1.7)
6 <NA> POINT (-1.3 -1.7)
7 polygon 3 POINT (-1.1 -1.7)
8 <NA> POINT (-0.9 -1.7)
9 polygon 3 POINT (-1.9 -1.5)
10 polygon 3 POINT (-1.7 -1.5)
If you plot the output, you'll get
Suppose t is:
t <- structure(list(structure(list(structure(c(-89.990791, -89.990772,
-89.990901, -89.99092, -89.990791, 30.727025, 30.727083, 30.727114,
30.727057, 30.727025), .Dim = c(5L, 2L))), class = c("XY", "POLYGON",
"sfg")), structure(list(structure(c(-89.991691, -89.991755, -89.991755,
-89.991691, -89.991691, 30.716004, 30.716004, 30.715916, 30.715916,
30.716004), .Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg"
))), class = c("sfc_POLYGON", "sfc"), precision = 0, bbox = structure(c(xmin = -89.991755,
ymin = 30.715916, xmax = -89.990772, ymax = 30.727114), class = "bbox"), crs = structure(list(
epsg = 4326L, proj4string = "+proj=longlat +datum=WGS84 +no_defs"), class = "crs"), n_empty = 0L)
> t
Geometry set for 2 features
geometry type: POLYGON
dimension: XY
bbox: xmin: -89.99176 ymin: 30.71592 xmax: -89.99077 ymax: 30.72711
epsg (SRID): 4326
proj4string: +proj=longlat +datum=WGS84 +no_defs
POLYGON ((-89.99079 30.72703, -89.99077 30.7270...
POLYGON ((-89.99169 30.716, -89.99175 30.716, -...
How can I filter polygons by long/lat boundaries? Suppose I want to filter out any polygon that hax lat>30.72 (so to keep only the second polygon). Is there any specific function I can use to filter polygons?
I do not know if there is a ready-made function, but a rectangular “spatial filter”
is easy to build. Just define the “corners”, create a bbox from them, convert
to polygon and find which of your original polygons are contained / overlap
the "filter area".
Here is a quick-and-dirty example:
library(sf)
polys_sf<-st_read(system.file("shape/nc.shp", package="sf") ) %>%
st_transform(crs="+init=epsg:4326")
plot(st_geometry(polys_sf))
Define a “spatial filter”
xmin <- -80
xmax <- -76
ymin <- 34
ymax <- 36
create a polygon based on the filter. (You can use "NA" for some values, so if you want for example to filter only "on the left", you can set xmax to NA)
filt_bbox <- sf::st_bbox(c(xmin = ifelse(is.na(xmin), -180, xmin),
ymin = ifelse(is.na(ymin), -90, ymin),
xmax = ifelse(is.na(xmax), +180, xmax),
ymax = ifelse(is.na(ymax), +90, ymax)),
crs = st_crs(4326)) %>%
sf::st_as_sfc(.)
Now "filter" the original dataset based on the bbox polygon: Use st_within if you want to keep only the polys completely contained in the defined area
find_data <- sf::st_within(polys_sf, filt_bbox)
#> although coordinates are longitude/latitude, st_within assumes that they are planar
filt_data <- polys_sf[which(lengths(find_data) != 0), ]
plot(filt_bbox)
plot(st_geometry(filt_data), add = TRUE, reset = FALSE)
Use st_intersects if you want to keep all polys that intersect the defined area
find_data <- sf::st_intersects(polys_sf, filt_bbox)
#> although coordinates are longitude/latitude, st_intersects assumes that they are planar
filt_data <- polys_sf[which(lengths(find_data) != 0), ]
plot(st_geometry(filt_data))
plot(filt_bbox, add = TRUE)
(Clearly, this works if both your polys and the “filtering extent” are lat/long, otherwise
you have to take care of reprojecting, etcetera.)
Created on 2018-11-15 by the reprex package (v0.2.1)
You can do this a lot easier using a simple function:
filter_sf <- function(.data, xmin = NULL, xmax = NULL, ymin = NULL, ymax = NULL) {
bb <- sf::st_bbox(.data)
if (!is.null(xmin)) bb["xmin"] <- xmin
if (!is.null(xmax)) bb["xmax"] <- xmax
if (!is.null(ymin)) bb["ymin"] <- ymin
if (!is.null(ymax)) bb["ymax"] <- ymax
sf::st_filter(.data, sf::st_as_sfc(bb), .predicate = sf::st_within)
}
So now you can do:
t %>%
filter_sf(ymin = 30.72)
This method works for both POLYGON and MULTIPOLYGON geometries.