In R, I wish to convert the alpha shape polygon surrounding a bunch of points into one single spatial polygon object.
library(sf)
library(alphahull)
To start out, I create the point random points distribution
dat <- matrix(c(1,2,3,4,5, 3,3,5,6,9), ncol = 2)
I find the alpha shape covering the points (i.e. a polygon encompassing all points). I am particularly interested in this function as it has the feature to find a more or less tight polygon shape according to the given alpha
dat.ashape<- ashape(dat, alpha= 7)
I take the coordinates of the extreme
coords<- dat.ashape$x[dat.ashape$alpha.extreme,]
I make the last point same as the first (to have a closed shape)
coords<- rbind(coords, coords[1,])
To make things to work I need to order the point in sequence
coords<- cbind(coords, NA)
coords[,3]<- c(1, 5, 3, 2, 4, 6)
coords<- coords[order(coords[,3]),]
I create the simple spatial point feature from the coordinate matrix
dat.sf <- st_multipoint(coords, dim = "XYZ")
... and create the polygon
tst<- dat.sf %>% #
st_cast('POLYGON')
Finally, comparing the point and shape distribution and the polygon, I was able to build the polygon correctly, but this is rather easy with six points! (Because I made myself manually the right order)
plot(dat.ashape)
plot(tst, add=T, col=adjustcolor('red', alpha.f=.3), border=2)
In a more sophisticated example with say 100 points, I get stuck in the part where I should get the sequence of points right, before st_cast into polygon.
set.seed(1)
dat <- matrix(stats::rnorm(100), ncol = 2)
dat.ashape<- ashape(dat, alpha=7)
coords<- dat.ashape$x[dat.ashape$alpha.extreme,]
coords<- rbind(coords, coords[1,])
dat.sf <- st_multipoint(coords, dim = "XY")
tst <- dat.sf %>%
st_cast('POLYGON')
plot(dat.ashape)
plot(tst, add=T, col=adjustcolor('red', alpha.f=.3), col.line='red', border=2)
.... and I obviously do not get trick done.
I am grateful for any help!
OK, I was not happy with the concaveman. I really wanted the Delaunay triangulation as basis of my hull computation as I like alphahull a lot. Also, after reading this I wanted to find a (or my) viable way for converting the hull retrieved from alphahull package to a spatial polygon, which I could further use for my broader spatial analysis. Therefore I wrote the following function to do the job:
hull2poly <- function(my.ashape){
require(sf)
if(class(my.ashape) != "ashape") {stop('error, your input must be
ashape class')} else
my.edge<- data.frame(my.ashape$edges)[,c( 'x1', 'y1', 'x2', 'y2')]
x<- my.edge[,1:2]
y<- my.edge[,3:4]
my.edge2<- matrix(t(cbind(x,y)), byrow=T,ncol=2)
my.edge2<- as.data.frame(my.edge2)
names(my.edge2)<- c('x','y')
my.edge2$id <- unlist(lapply((1: (nrow(my.edge2)/2)),
FUN=function(x){c(rep(x,2))}))
start.edge<- 1
new.id<- start.edge
new.edges<- my.edge2[which(my.edge2$id== start.edge ),]
while(length(new.id)<= length(unique(my.edge2$id))-1){
internal.id<- new.id[length(new.id)]
edge <- my.edge2[which(my.edge2$id== internal.id ),]
where.to.search <- my.edge2[which(my.edge2$id %in% new.id ==F ),]
index1<- apply(where.to.search[,1:2], 1, function(x){x == edge[1,1:2]})
index1<- as.numeric(names(which(apply(index1,2, sum)>0)))[1]
index2<- apply(where.to.search[,1:2], 1, function(x){x == edge[2,1:2]})
index2<- as.numeric(names(which(apply(index2,2, sum)>0)))[1]
main.index<- c(index1, index2)
ifelse(all(!is.na(main.index)),
# yes
{flag<- c(T,T)
main.index<- main.index[2]
point.coord<- my.edge2[main.index,]
segment<- my.edge2[my.edge2$id==my.edge2[main.index,'id'],]
new.id<- c( new.id, my.edge2[main.index,]$id) },
# no
ifelse(which(!is.na(main.index))==1,
# yes
{flag<- c(T,F)
main.index<- main.index[flag]
point.coord<- my.edge2[main.index,]
segment<-
my.edge2[my.edge2$id==my.edge2[main.index,'id'],]
new.id<- c( new.id, my.edge2[main.index,]$id)},
# no
{flag<- c(F,T)
main.index<- main.index[flag]
point.coord<- my.edge2[main.index,]
segment<- my.edge2[my.edge2$id==my.edge2[main.index,'id'],]
new.id<- c( new.id, my.edge2[main.index,]$id)} ) )
index3<- t(apply(segment, 1, function(x){x ==point.coord}))
new.edges<- rbind(new.edges, rbind(point.coord, segment[which(apply(index3,1, sum)<3),]))
}
tst <- st_multipoint(as.matrix(new.edges), dim = "XYZ")
poly<- tst %>% #
st_cast('POLYGON')
return(poly)}
So, if you wish to give a try with a cloud of 1000 points:
library(alphahull)
set.seed(1)
dat <- matrix(stats::rnorm(1000), ncol = 2)
dat <- as.data.frame(dat)
dat.ashape<- ashape(dat, alpha= 2)
tmp<- hull2poly(dat.ashape)
plot(tmp)
I hope, it comes useful for someone.
Searching for further alternatives to packages alphahull, rgeos, sf for computing the hull surrounding a bunch of points, I finally found concaveman thanks to this post, which does the trick, being compatible with sf objects.
library(concaveman)
library(sf)
set.seed(1)
dat <- matrix(stats::rnorm(100), ncol = 2)
dat <- as.data.frame(dat)
names(dat)<- c('x', 'y')
dat.sf<-st_as_sf(dat, coords=c("x","y"))
polygon <- concaveman(dat.sf)
plot(dat.sf, pch=16)
plot(polygon, add=T, col=adjustcolor('red', alpha.f=.3), col.line='red', border=2)
The other answer wouldn't work for my dataset so I found another way.
First I used the coordinate pairs to create the line segments of the alpha shape. Then I just bound those line segments together and converted that to a polygon.
library(sp)
library(sf)
library(alphahull)
dat.ashape<- ashape(dat, alpha=7)
a<- data.frame(dat.ashape$edges)[,c( 'x1', 'y1', 'x2', 'y2')]
# create first line segment to initialize the object
i=1
line_obj <- sp::Line(cbind( c(a$x1[i], a$x2[i]),c(a$y1[i], a$y2[i]) ))
lines_obj <- sp::Lines(list(line_obj),ID=i)
myLines <- sp::SpatialLines(list(lines_obj))
for (i in 2:nrow(a)){
line_obj <- sp::Line(cbind( c(a$x1[i], a$x2[i]),c(a$y1[i], a$y2[i]) ))
lines_obj <- sp::Lines(list(line_obj),ID=i)
myLines <- rbind(myLines, sp::SpatialLines(list(lines_obj))) #bind the line to the rest
}
sfL<-as(myLines, "sf") #convert lines to sf
alphapoly = st_collection_extract(st_polygonize(st_union(sfL))) # union the lines and convert to polygon
apol<-as_Spatial(alphapoly) #if you want to convert back to sp
library(leaflet) #plot the line segments and polygon in Leaflet
leaflet() %>% addTiles() %>% addPolygons(data=apol) %>% addPolylines(data=myLines, color="yellow")
I want to calculate the distance between two points in two different datasets. I don't want to calculate the distance between all points - just to the nearest point of datasetB.
Some examples:
Dataset A - Persons
http://pastebin.com/HbaeqACi
Dataset B - Waterfeatures:
http://pastebin.com/UdDvNtHs
Dataset C - City:
http://pastebin.com/nATnkMRk
So...I want to calculate the distance of each person to the nearest waterfeature point.
I've already tried to work with the rgeos package and after struggling with some projections errors, I've got it to work. But this calculate (at least I assume it) all distances to every point, but, as already said, I've only interested in the distance to the nearest waterfeature point.
# load csv files
persons = read.csv("persons.csv", header = TRUE)
water = read.csv("water.csv", header = TRUE)
# change dataframes to SpatialPointDataFrame and assign a projection
library(sp)
library(rgeos)
coordinates(persons) <- c("POINT_X", "POINT_Y")
proj4string(persons) <- CRS("+proj=utm +datum=WGS84")
coordinates(water) <- c("POINT_X", "POINT_Y")
proj4string(water) <- CRS("+proj=utm +datum=WGS84")
# use rgoes package to calculate the distance
distance <- gDistance(persons, water, byid=TRUE)
# works, but calculates a huge number of distances
Is there any parameter, which I've missed. Or do I need to use another package or function? I've also looked at spatstat, which is able to calculate the distance to the nearest neighbor, but not of two different datasets: http://hosho.ees.hokudai.ac.jp/~kubo/Rdoc/library/spatstat/html/nndist.html
Edit:
The complete R-Script including plotting of the datasets:
library(RgoogleMaps)
library(ggplot2)
library(ggmap)
library(sp)
library(fossil)
#load data
persons = read.csv("person.csv", header = TRUE, stringsAsFactors=FALSE)
water = read.csv("water.csv", header =TRUE, stringsAsFactors=FALSE)
city = read.csv("city.csv", header =TRUE)
# plot data
persons_ggplot2 <- persons
city_ggplot2 <- city
water_ggplot2 <- water
gc <- geocode('new york, usa')
center <- as.numeric(gc)
G <- ggmap(get_googlemap(center = center, color = 'bw', scale = 1, zoom = 11, maptype = "terrain", frame=T), extent="device")
G1 <- G + geom_point(aes(x=POINT_X, y=POINT_Y ),data=city, shape = 22, color="black", fill = "yellow", size = 4) + geom_point(aes(x=POINT_X, y=POINT_Y ),data=persons, shape = 8, color="red", size=2.5) + geom_point(aes(x=POINT_X, y=POINT_Y ),data=water_ggplot2, color="blue", size=1)
plot(G1)
#### calculate distance
# Generate unique coordinates dataframe
UniqueCoordinates <- data.frame(unique(persons[,4:5]))
UniqueCoordinates$Id <- formatC((1:nrow(UniqueCoordinates)), width=3,flag=0)
# Generate a function that looks for the closest waterfeature for each id coordinates
NearestW <- function(id){
tmp <- UniqueCoordinates[UniqueCoordinates$Id==id, 1:2]
WaterFeatures <- rbind(tmp,water[,2:3])
tmp1 <- earth.dist(WaterFeatures, dist=TRUE)[1:(nrow(WaterFeatures)-1)]
tmp1 <- which.min(tmp1)
tmp1 <- water[tmp1,1]
tmp1 <- data.frame(tmp1, WaterFeature=tmp)
return(tmp1)
}
#apply to each id and the merge
CoordinatesWaterFeature <- ldply(UniqueCoordinates$Id, NearestW)
persons <- merge(persons, CoordinatesWaterFeature, by.x=c(4,5), by.y=c(2,3))
What about writing a function that looks for the nearest waterfeature for every person?
#requires function earth.dist from "fossil" package
require(fossil)
#load data
persons = read.csv("person.csv", header = TRUE, stringsAsFactors=FALSE)
water = read.csv("water.csv", header =TRUE, stringsAsFactors=FALSE)
#Generate unique coordinates dataframe
UniqueCoordinates <- data.frame(unique(persons[,4:5]))
UniqueCoordinates$Id <- formatC((1:nrow(UniqueCoordinates)), width=3,flag=0)
#Generate a function that looks for the closest waterfeature for each id coordinates
NearestW <- function(id){
tmp <- UniqueCoordinates[UniqueCoordinates$Id==id, 1:2]
WaterFeatures <- rbind(tmp,water[,2:3])
tmp1 <- earth.dist(WaterFeatures, dist=TRUE)[1:(nrow(WaterFeatures)-1)]
tmp1 <- min(tmp1)
tmp1 <- data.frame(tmp1, WaterFeature=tmp)
return(tmp1)
}
#apply to each id and the merge
CoordinatesWaterFeature <- ldply(UniqueCoordinates$Id, NearestW)
persons <- merge(persons, CoordinatesWaterFeature, by.x=c(4,5), by.y=c(2,3))
NOTE: I've added a stringsAsFactors parameter to the original read.csv , it make the merging easier at the end
NOTE:Column tmp1 notes the number of METERS to the nearest water feature
Maybe I'm a little too late, but you can use spatstat to compute distances between two different datasets. The command is nncross. The arguments you have to use are two objects of type ppp, which you can create using the as.ppp() function.
I have a raster map with many patches (clumps of continguous cells with the same value). What I need to do is to obtain the coordinates of the center (or close to the center) of each patch.
I am very unexperienced with raster package but it seems I can get coordinates only if I know the position of the cells in the map. Is there any way to get coordinates giving a value of the cells instead? Thank you
If by patch you mean clumps, Raster package allows you to find , and isolate, clumps. Taking the clump() raster package example, and extending it:
library(raster)
library(igraph)
detach("package:coin", unload=TRUE)
r <- raster(ncols=12, nrows=12)
set.seed(0)
r[] <- round(runif(ncell(r))*0.7 )
rc <- clump(r)
clump_id <- getValues(rc)
xy <- xyFromCell(rc,1:ncell(rc))
df <- data.frame(xy, clump_id, is_clump = rc[] %in% freq(rc, useNA = 'no')[,1])
df[df$is_clump == T, ]
plot(r)
plot(rc)
text(df[df$is_clump == T, 1:2], labels = df[df$is_clump == T, 3])
May not be as interesting as you could expect.
You do it all over with directions = 4
rc <- clump(r, directions = 4)
clump_id <- getValues(rc)
xy <- xyFromCell(rc,1:ncell(rc))
df <- data.frame(xy, clump_id, is_clump = rc[] %in% freq(rc, useNA = 'no')[,1])
df[df$is_clump == T, ]
to get
and maybe clump 'centroids'
dfm <- ddply(df[df$is_clump == T, ], .(clump_id), summarise, xm = mean(x), ym = mean(y))
plot(rc)
text(dfm[, 2:3], labels = dfm$clump_id)
Notes:
There will be an error if you try to use clump() without first
detach modeltools library. modeltools is called by coin and maybe
other statistical libraries.
You could take the mean of the coordinates of each patch:
# some dummy data
m <- matrix(c(
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,4,4,0,
0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,4,4,0,
0,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,4,4,0,
0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,
0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
0,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,
0,0,2,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), nrow=20, byrow=T)
# create a raster
r <- raster(m)
# convert raster to points
p <- data.frame(rasterToPoints(r))
# filter out packground
p <- p[p$layer > 0,]
# for each patch calc mean coordinates
sapply(split(p[, c("x", "y")], p$layer), colMeans)