Optimizing spatial query in R - crown-distributed biomass in forest - r

I am trying to calculate the area of tree crowns overlapping squared grid cells in a forest plot. Hereafter, a reproducible example:
# A. Define objects
require(sp)
require(raster)
require(rgdal)
require(rgeos)
require(dismo)
radius=25 # max search radius around 10 x 10 m cells
res <- vector() # where to store results
# Create a fake set of trees with x,y coordinates and trunk diameter (=dbh)
set.seed(0)
survey <- data.frame(x=sample(99,1000,replace=T),y=sample(99,1000,replace=T),dbh=sample(100,1000,replace=T))
coordinates(survey) <- ~x+y
# Define 10 x 10 subplots
grid10 <- SpatialGrid(GridTopology(c(5,5),c(10,10),c(10,10)))
survey$subplot <- over(survey,grid10)
# B. Now find fraction of tree crown overlapping each subplot
for (i in 1:100) {
# Extract centroïd of each the ith cell
centro <- expand.grid(x=seq(5,95,10),y=seq(5,95,10))[i,]
corner <- data.frame(x=c(centro$x-5,centro$x+5,centro$x+5,centro$x-5),y=c(centro$y-5,centro$y-5,centro$y+5,centro$y+5))
# Find trees in a max radius (define above)
tem <- survey[which((centro$x-survey$x)^2+(centro$y-survey$y)^2<=radius^2),]
# Define tree crown based on tree diameter
tem$crownr <- exp(-.438+.658*log(tem$dbh/10)) # crown radius in meter
# Compute the distance from each tree to cell's borders
pDist <- vector()
for (k in 1:nrow(tem)) {
pDist[k] <- gDistance(tem[k,],SpatialPolygons(list(Polygons(list(Polygon(corner)),1))))
}
# Keeps only trees whose crown is lower than the above distance (=overlap)
overlap.trees <- tem[which(pDist<=tem$crownr),]
overlap.trees$crowna <-overlap.trees$crownr^2*pi # compute crown area
# Creat polygons from overlapping crowns
c1 <- circles(coordinates(overlap.trees),overlap.trees$crownr, lonlat=F, dissolve=F)
crown <- polygons(c1)
Crown <- SpatialPolygonsDataFrame(polygons(c1),data=data.frame(dbh=overlap.trees$dbh,crown.area=overlap.trees$crowna))
# Create a fine grid points to retrieve the fraction of overlapping crowns
max.dist <- ceiling(sqrt(which.max((centro$x - overlap.trees$x)^2 + (centro$y - overlap.trees$y)^2))) # max distance to narrow search
finegrid <- as.data.frame(expand.grid(x=seq(centro$x-max.dist,centro$x+max.dist,1),y=seq(centro$y-max.dist,centro$y+max.dist,1)))
coordinates(finegrid) <- ~ x+y
A <- extract(Crown,finegrid)
Crown#data$ID <- seq(1,length(crown),1)
B <- as.data.frame(table(A$poly.ID))
if (nrow(B)>0) {
B <- merge(B,Crown#data,by.x="Var1",by.y="ID",all.x=T)
B$overlap <- B$Freq/B$crown.area
B$overlap[B$overlap>1] <- 1
res[i] <- sum(B$overlap) } else {
res[i] <- 0 }
}
# C. Check the result
res # sum of crown fraction overlapping each cell (works fine)
This algo takes about 3 min to run for 100 cells. I have a large data set with 35000 cells, so 150*7=1050 minutes or 17.5 hours.
Any hint to fasten and/or optimize this algorithm??

After a quick profiling with profvis package, it would seem there can be some improvement just by changing a few lines. This wasn't an exhaustive optimization and I'm sure more improvements are still possible.
I changed
pDist <- vector()
for (k in 1:nrow(tem)) {
pDist[k] <- gDistance(tem[k,],SpatialPolygons(list(Polygons(list(Polygon(corner)),1))))
}
to
pDist <- rep(NA, nrow(tem))
my.poly <- SpatialPolygons(list(Polygons(list(Polygon(corner)),1)))
for (k in 1:nrow(tem)) {
pDist[k] <- gDistance(tem[k,], my.poly)
}
because there's no need to create SpatialPolygons object every time. This can be expensive as seen in the profiling images below (top is optimized).
Here is some code which should run in parallel.
# load only necessary package for code until parSapplyLB
# LB is load-balancing, which means it will distribute task to cores
# which are idle. This is great if jobs take an uneven amount of time
# to run.
library(parallel)
library(sp)
system.time({
# prepare the cluster, default is PSOCK on windows but can be FORK form *nix
cl <- makeCluster(4)
# worker is just a new instance of fresh vanilla R so you need to load the
# necessary libraries to all the workers
clusterEvalQ(cl = cl, library(sp))
clusterEvalQ(cl = cl, library(raster))
clusterEvalQ(cl = cl, library(rgdal))
clusterEvalQ(cl = cl, library(rgeos))
clusterEvalQ(cl = cl, library(dismo))
radius <- 25 # max search radius around 10 x 10 m cells
# res <- rep(NA, 100) # where to store results
# Create a fake set of trees with x,y coordinates and trunk diameter (=dbh)
set.seed(0)
survey <- data.frame(x=sample(99,1000,replace=T),y=sample(99,1000,replace=T),dbh=sample(100,1000,replace=T))
coordinates(survey) <- ~x+y
# Define 10 x 10 subplots
grid10 <- SpatialGrid(GridTopology(c(5,5),c(10,10),c(10,10)))
survey$subplot <- over(survey,grid10)
# Export needed variables to workers
clusterExport(cl = cl, varlist = c("survey", "radius"))
# this function is your former for() loop, increase X = 1:100 to suit your needs
res <- parSapplyLB(cl = cl, X = 1:100, FUN = function(i, survey) {
# B. Now find fraction of tree crown overlapping each subplot
# Extract centroïd of each the ith cell
centro <- expand.grid(x=seq(5,95,10),y=seq(5,95,10))[i,]
corner <- data.frame(x=c(centro$x-5,centro$x+5,centro$x+5,centro$x-5),y=c(centro$y-5,centro$y-5,centro$y+5,centro$y+5))
# Find trees in a max radius (define above)
tem <- survey[which((centro$x-survey$x)^2+(centro$y-survey$y)^2<=radius^2),]
# Define tree crown based on tree diameter
tem$crownr <- exp(-.438+.658*log(tem$dbh/10)) # crown radius in meter
# Compute the distance from each tree to cell's borders
pDist <- vector()
my.poly <- SpatialPolygons(list(Polygons(list(Polygon(corner)),1)))
for (k in 1:nrow(tem)) {
pDist[k] <- gDistance(tem[k,], my.poly)
}
# Keeps only trees whose crown is lower than the above distance (=overlap)
overlap.trees <- tem[which(pDist<=tem$crownr),]
overlap.trees$crowna <-overlap.trees$crownr^2*pi # compute crown area
# Creat polygons from overlapping crowns
c1 <- circles(coordinates(overlap.trees),overlap.trees$crownr, lonlat=F, dissolve=F)
crown <- polygons(c1)
Crown <- SpatialPolygonsDataFrame(polygons(c1),data=data.frame(dbh=overlap.trees$dbh,crown.area=overlap.trees$crowna))
# Create a fine grid points to retrieve the fraction of overlapping crowns
max.dist <- ceiling(sqrt(which.max((centro$x - overlap.trees$x)^2 + (centro$y - overlap.trees$y)^2))) # max distance to narrow search
finegrid <- as.data.frame(expand.grid(x=seq(centro$x-max.dist,centro$x+max.dist,1),y=seq(centro$y-max.dist,centro$y+max.dist,1)))
coordinates(finegrid) <- ~ x+y
A <- extract(Crown,finegrid)
Crown#data$ID <- seq(1,length(crown),1)
B <- as.data.frame(table(A$poly.ID))
if (nrow(B)>0) {
B <- merge(B,Crown#data,by.x="Var1",by.y="ID",all.x=T)
B$overlap <- B$Freq/B$crown.area
B$overlap[B$overlap>1] <- 1
res <- sum(B$overlap) } else {
res <- 0 }
}, survey = survey)
stopCluster(cl = cl)
})

For those interested in trees, crown & biomass, I have been suggested a faster way of computing crown-distributed biomass in forest stand (thanks to H. Muller-Landau). One need to think on a stem-by-stem basis and 1x1m grid. The code hereafter takes 6 min to run vs. couple of hours for the previous code. Hope of interest!
# Create a fake 1-ha forest stand:
trees <- data.frame(x=sample(99.5,1000,replace=T),y=sample(99.5,1000,replace=T),dbh=sample(100,1000,replace=T))
# Create a 1x1m cell matrix where to store the result
cdagb=matrix(0,nrow=100,ncol=100)
#Calculate the crownradius for every stem (fake proportion)
trees$crownradius = 2*trees$dbh^0.5
#Calculate the index of the 1x1 m quadrat in which the tree stem falls
trees$quadx=ceiling(trees$x)
trees$quady=ceiling(trees$y)
# Run the algo stem-by-stem
for (i in 1:nrow(trees)) {
# xdisp and ydisp are the integer cell position differences in x and y that should be checked to see if the crown of the focal tree overlaps
xdisp=seq(ceiling(trees$quadx[i]-trees$crownradius[i]),floor((trees$quadx[i]+trees$crownradius[i])),1)
xdisp[xdisp>=1000] <- 1000 +(1000 - xdisp[xdisp>=1000]) # mirror values on edges onto adjacent cells
xdisp[xdisp<1] <- -xdisp[xdisp<1] + 1 # avoid XY to be 0
ydisp=seq(ceiling(trees$quady[i]-trees$crownradius[i]),floor((trees$quady[i]+trees$crownradius[i])),1)
ydisp[ydisp>=500] <- 500 +(500 - ydisp[ydisp>=500])
ydisp[ydisp<1] <- -ydisp[ydisp<1] + 1
# Calculate the square of the x and y distances from the focal tree to the center of each of these cells
xdistsqr=(xdisp-trees$quadx[i])^2
ydistsqr=(ydisp-trees$quady[i])^2
nx=length(xdisp)
ny=length(ydisp)
# Calculate the distance from the center of each cell in the neighborhood to the focal tree
distmatrix=matrix(sqrt(rep(xdistsqr,each=ny)+rep(ydistsqr,nx)),nrow=nx,ncol=ny)
# includes only trees that overlap the grid cells
incmatrix=ifelse(distmatrix<trees$crownradius[i],1,0)
ncells=sum(incmatrix)
agbpercell=trees$agb[i]/ncells # divide the biomass by cell
addagbmatrix=incmatrix*agbpercell # relloacte biomass by cell
# add the biomass divided in square meter to each grid point
cdagb[xdisp,ydisp] = cdagb[xdisp,ydisp] + addagbmatrix
}

Related

How to iterate with dynamic changing the number edges and nodes?

I have a random graph with 10 nodes where 4 nodes have the zero degree.
It is required to obtain the connected graph by 1) select a node with zero degree and a minimal feature (for exmaple, random number from uniform distribautin) corresponding to each edge and connect it with graph by creation two incident edges to the node and deleting the 3rd edge, 2) repeat step 1 for all zero degree nodes.
The original graph in left, the resulting one in right.
My attempt is:
library(igraph)
######################################################################
set.seed(5)
g <- sample_gnm(10, 4)
xy <- cbind(runif(10), runif(10))
par(mfrow=c(1,2))
plot(g, vertex.size=5, layout=xy)
num_point <- length(V(g)[degree(g)==0])
for(k in 1:num_point){
points = V(g)[degree(g)==0]
for(i in 1:length(E(g))) { # loop over all edges
head <- get.edgelist(g)[i,][1]; h <- c(V(g)[head]$x, V(g)[head]$y)
tail <- get.edgelist(g)[i,][2]; t <- c(V(g)[tail]$x, V(g)[tail]$y)
d <- NULL
# loop over all points
for(j in points) d <- c(d, runif(1))
E(g)[i]$d <- min(d) # local min
E(g)[i]$p <- points[which(d == min(d))]
} # i
ei = which.min(E(g)$d) # edge with the global min
vi = E(g)[ei]$p
# head and tail of edge with global min
head <- get.edgelist(g)[E(g)[ei],][1]; tail <- get.edgelist(g)[E(g)[ei],][2]
g <- add_edges(g, c(head, V(g)[vi],
V(g)[vi],
tail));
g <- delete_edges(g, get.edge.ids(g, c(head, tail) ))
}
plot(g, vertex.size=5, layout=xy)
Question. How to organize the loop over all edges when the number of edges increase by 1 and number of point decrising by 1 evety step? One can see, I don't use the k variable in explicit form.
Instead of for loop, I think you can use repeat plus a termination condition, i.e., no isolated vertices any more
repeat {
points <- V(g)[degree(g) == 0]
for (i in 1:length(E(g))) { # loop over all edges
head <- get.edgelist(g)[i, ][1]
h <- c(V(g)[head]$x, V(g)[head]$y)
tail <- get.edgelist(g)[i, ][2]
t <- c(V(g)[tail]$x, V(g)[tail]$y)
d <- NULL
# loop over all points
for (j in points) d <- c(d, runif(1))
E(g)[i]$d <- min(d) # local min
E(g)[i]$p <- points[which(d == min(d))]
} # i
ei <- which.min(E(g)$d) # edge with the global min
vi <- E(g)[ei]$p
# head and tail of edge with global min
head <- get.edgelist(g)[E(g)[ei], ][1]
tail <- get.edgelist(g)[E(g)[ei], ][2]
g <- add_edges(g, c(
head, V(g)[vi],
V(g)[vi],
tail
))
g <- delete_edges(g, get.edge.ids(g, c(head, tail)))
if (sum(degree(g) == 0) == 0) {
break
}
}
I will recommend you to use recursion for this and drop for loop- using recursion for tree and graph structures will definitely make your life easier.
Answer:
maintain a stack of all the leaf nodes
every time you iterate empty your stack by matching the leaf node values
if there's a new value and count of the stack != to old count.
Now iterate again.

algebra using rasters and dataframes

I want to predict vegetation health using 2 remote sensing vegetation indices (VIs) for multiple tree-stands across multiple months. I previously approached this by using a for() loop to iterate through a list of multi-band rasters and calculate the two VIs for each raster (month) using a given equation. I then used raster::extract() to extract the pixels corresponding to each stand. However, I now would like to include some additional variables in my predictions of vegetation health, and am having trouble integrating them using the same method as they are simply columns in a dataframe and not rasters. I'm open to different ways to do this, I just can't think of any.
example:
#Part 1: Loading libraries and creating some sample data
library(sf)
library(raster)
library(terra)
#polygons to generate random points into
v <- vect(system.file("ex/lux.shp", package="terra"))
v <- v[c(1:12)]
v_sf <- st_as_sf(v) # Convert 'SpatVector' into 'sf' object
#5 rasters (months) with 5 bands each
r <- rast(system.file("ex/elev.tif", package="terra"))
r <- rep(r, 5) * 1:5
names(r) <- paste0("band", 1:5)
ras_list <- list(r,r,r,r,r)
#generating some points (10 forest stands)
pnts <- st_sample(v_sf, size = 10, type = "random")
pnts<- as_Spatial(pnts)
#Part 2: Loop to predict vegetation health using two VI variables
vis <- list() #empty list to store NDVI rasters
for (i in seq_along(ras_list)) {
b <- ras_list[[i]]
#vegetation health = 1.23 + (0.45 * VI1) - (0.67 * VI2)
vis[i] <- 1.23 + 0.45*((b[[4]] + b[[3]] - b[[1]]) / (b[[4]] + b[[3]])) - 0.67*(b[[1]] * b[[3]] - b[[4]])
}
#Part 3: Loop to extract pixel values for each forest stand
vi_vals <- list() #empty list to store extracted pixel values
for (i in 1:length(vis)) {
n <- raster(vis[[i]])
vi_vals[[i]] <- raster::extract(n, pnts, method = "bilinear")
}
This method works fine but as I mentioned above, I now need to repeat the same process using a new equation which incorporates variables that can't be calculated from a raster. These values are simply 3 columns in a dataframe that are identified by a stand ID.
Let's first simplify your example a bit
Example data
library(terra)
v <- vect(system.file("ex/lux.shp", package="terra"))
r <- rast(system.file("ex/elev.tif", package="terra"))
r <- rep(r, 5) * 1:5
names(r) <- paste0("b", 1:5)
ras_list <- list(r,r,r,r,r)
set.seed(1)
pnts <- spatSample(v, 10, "random")
values(pnts) = data.frame(id=10, a=5:14, b=3:12, d=6:15)
Compute VI and extract
vis <- list()
for (i in seq_along(ras_list)) {
b <- ras_list[[i]]
vis[[i]] <- 1.23 + 0.45*((b[[4]] + b[[3]] - b[[1]]) / (b[[4]] + b[[3]])) - 0.67*(b[[1]] * b[[3]] - b[[4]])
}
vis <- rast(vis)
names(vis) = paste0("set", 1:5)
vi_vals <- extract(vis, pnts, method = "bilinear")
And now you can do something with the tree parameters
out <- t(t(vi_vals[,-1])) * pnts$a + pnts$b / pnts$d
It would be more efficient to first extract the values and then apply the function
e <- list()
for (i in seq_along(ras_list)) {
x <- extract(ras_list[[i]], pnts, method="bilinear")[,-1]
e[[i]] = (1.23 + 0.45*((x$b4 + x$b3 - x$b1) / (x$b4 + x$b3)) - 0.67*(x$b1 * x$b3 - x$b4)) * pnts$a + pnts$b / pnts$d
}
e <- do.call(cbind, e)
The results are not exactly the same; I assume because of loss of decimal number precision in one or the other method.

How can I use parallel processing on my function in R without this error popping up?

I've been having problems trying to parallelise my function. I've looked at a lot of the available tips online but when I try to perform the suggested code I am receiving an error "Error in match.fun(FUN) : argument "FUN" is missing, with no default
In addition: Warning messages:
1: In .Internal(gc(verbose, reset, full)) :
closing unused connection 6..."
My function is:
## tau-leap Gillespie algorithm function
tauLeapG <- function(beta, # transmission rate
theta, # dispersal scale
b=1, # kernel shape parameter, 1 for exponential
sigma=0, # asymptomatic period, used for outputing the time series
q0=0, # starting incidence if ppp is without marks
q.end=1, # stoping condition 1: incidence lvl
t.end=Inf, # stoping condition 2: time after first simulated time step
area.host=10, # surface area occupied by one host
delta.t=1, # time step
ppp, # point pattern as a ppp object, optinally with marks 1/0 for infected/healthy
dist.mat=NULL){ # matrix distance if its computation is to be avoided here (for e.g. repeated calls)
## if the point pattern has no marks, generate some randomly that fits q0
if (is.null(marks(ppp))){
inf.start <- max(1, round(ppp$n * q0))
marks(ppp) <- sample(c(rep(FALSE, ppp$n-inf.start), rep(TRUE, inf.start)))
}
## compute distance matrix if not provided
if (is.null(dist.mat)){
## add the kernel computation that can be added directly on the dist matrix to reduce comp time
dist.mat <- exp(-pairdist(ppp)^b / theta^b)
diag(dist.mat) <- NA
}
## function that compute infection event probability, based on the dispersal kernel
k.norm <- beta * area.host * (b/(2*pi*theta^2*gamma(2/b))) # constant part of the exponential power kernel
infection <- function(infected, dist){
inf <- matrix(k.norm * dist[infected,!infected],
nrow=sum(infected), byrow=FALSE)
inf[is.na(inf)] <- 0
inf
}
## starting time
time <- 0
## inititate the heavy dataframe that will aggregate all changes
df.big <- data.frame(time=0, who=which(ppp$marks), t(ppp$marks))
## computation loop
while (any(!ppp$marks) & time <= t.end & mean(ppp$marks) < q.end){
## infection event probaility
events <- infection(ppp$marks, dist=dist.mat)
## random proisson draws
new.infected <- which(!ppp$marks)[rpois(n=sum(!ppp$marks), lambda=apply(events, 2, sum) * delta.t) > 0]
## change marks of newly infected
ppp$marks[new.infected] <- TRUE
## increment time
time <- time + delta.t
## if some infection, increment the big dataframe
if (length(new.infected) > 0){
df.big <- rbind(df.big, data.frame(time=time, who=new.infected, t(ppp$marks)))
}
## print a dot per new infection
# cat(paste0(rep('.', length(new.infected)), collapse = '')) ## comment for quiet
}
## make compact, time only, version of the big dataframe
times.i <- unique(df.big[,1])
times.d <- times.i + sigma
times <- sort(unique(c(times.i, times.d)))
infected <- sapply(times, FUN=function(t) sum(t >= df.big[,1]))
detectable <- sapply(times, FUN=function(t) sum(t >= df.big[,1] + sigma))
df.small <- data.frame(time=times, infected=infected, detectable=detectable)
## out put the simplified time series, and the big one
list(df.small[df.small$time <= max(df.big$time),], df.big)
}
Then I create a landscape:
library('spatstat')
library('ggplot2')
library('dplyr')
library('reshape2')
n <- 1000 # number of hosts
dim <- 1000 # dimension of the landscape
landscape <- ppp(x=runif(n)*dim, y=runif(n)*dim,
window = owin(xrange = c(0, dim), yrange = c(0, dim)))
## give marks to the process, e.g. 1 infected randomly selected
marks(landscape) <- sample(c(TRUE, rep(FALSE, n-1)))
I am trying to parallelise this function below:
output <- tauLeapG(beta=1,
theta=.5,
b=.4,
sigma=sigma,
delta.t=10,
ppp=landscape)
I have tried:
install.packages("Parallel")
library("Parallel")
install.packages("doParallel")
library("doParallel")
no_cores <- detectCores(logical = TRUE)
cl<- makeCluster ( 4 , type = "SOCK" )
registerDoParallel(cl)
clusterExport(cl,list("tauLeapG","landscape"))
system.time(
output<-c(parApply(cl,landscape,1,fun=tauLeapG))
)
But this returns the error above. I am very new to parallel processing, so could somebody help me figure out how to parallelise this code? Thanks.

Package for Divide Chain of tesselations in R, spatstat package?

I am trying to create pretty figures of clustered points. Is there a package which will create the divide chain between tessellations of points? Ideally it would be fit for plotting in ggplot.
Here is some example code:
#DivideLineExample
library(spatstat)
W=owin(c(0,1),c(0,1)) # Set up the Window
p<-runifpoint(42, win=W) # Get random points
ll=cbind(p$x,p$y) # get lat/long for each point
zclust=kmeans(ll,centers=4) # Cluster the points spatially into 4 clusters
K<-pp<-D<-list()
plot(W,main="Clustered Points")
for (i in 1:4){ # this breaks up the points into separate ppp objects for each cluster
K[[i]]=ll[zclust$cluster==i,]
pp[[i]]=as.ppp(K[[i]],W)
plot(pp[[i]],col=i,add=TRUE,cex=1.5,pch=16)
D[[i]]=dirichlet(pp[[i]]) # This performs the Dirichlet Tessellation and plots
plot(D[[i]],col=i,add=TRUE)
}
This outputs as such:
http://imgur.com/CCXeOEB
What I'm looking for is this:
http://imgur.com/7nmtXjo
I know an algorithm exists.
Any ideas/alternatives?
I have written a function that I think will do what you want:
divchain <- function (X) {
stopifnot(is.ppp(X))
if(!is.multitype(X)) {
whinge <- paste(deparse(substitute(X)),
"must be a marked pattern with",
"factor valued marks.\n")
stop(whinge)
}
X <- unique(X, rule = "deldir", warn = TRUE)
w <- Window(X)
require(deldir)
dd <- deldir(X,z=marks(X),rw=c(w$xrange,w$yrange))
if (is.null(dd))
return(NULL)
ddd <- dd$dirsgs
sss <- dd$summary
z <- sss[["z"]]
rslt <- list()
nsgs <- nrow(ddd)
K <- 0
for (i in 1:nsgs) {
i1 <- ddd[i,5]
i2 <- ddd[i,6]
c1 <- z[i1]
c2 <- z[i2]
if(c1 != c2) {
K <- K+1
rslt[[K]] <- unlist(ddd[i,1:4])
}
}
class(rslt) <- "divchain"
attr(rslt,"rw") <- dd$rw
rslt
}
I have also written a plot method for class "divchain":
plot.divchain <- function(x,add=FALSE,...){
if(!add) {
rw <- attr(x,"rw")
plot(0,0,type="n",ann=FALSE,axes=FALSE,xlim=rw[1:2],ylim=rw[3:4])
bty <- list(...)$bty
box(bty=bty)
}
lapply(x,function(u){segments(u[1],u[2],u[3],u[4],...)})
invisible()
}
E.g.:
require(spatstat)
set.seed(42)
X <- runifpoint(50)
z <- factor(kmeans(with(X,cbind(x,y)),centers=4)$cluster)
marks(X) <- z
dcX <- divchain(X)
plot(dirichlet(X),border="brown",main="")
plot(X,chars=20,cols=1:4,add=TRUE)
plot(dcX,add=TRUE,lwd=3)
Let me know whether this is satisfactory. Sorry I can't help you with ggplot stuff; I don't do ggplot.
You could try point in polygon test for example like kirkpatrick data structure. Much easier is to divide the polygon in horizontal or vertical. Source:http://www.personal.kent.edu/~rmuhamma/Compgeometry/MyCG/Voronoi/DivConqVor/divConqVor.htm

Offline orthogonal range counting implementation

It seems that a statistical problem that I am working on requires doing something known in computational geometry as "offline orthogonal range counting":
Suppose I have a set of n points (for the moment, in the plane). For every pair of points i and j, I would like to count the number of remaining points in the set that are in the rectangle whose diagonal is the segment with endpoints i and j. The overall output then is a vector of n(n-1) values each in [0, 1, 2, ... , n-2].
I've seen that a rich literature on the problem (or at least a very similar problem) exists, but I cannot find an implementation. I would prefer an R (a statistical computing language) package, but I guess that's asking too much. An open source C/C++ implementation will also work.
Thanks.
I hope I understand well your proble. Here an implementation in R using package geometry. I use
mesh.drectangle function which compute a signed distance from points p to boundary of rectangle.
I create a combination for all points using combn
for each point p of combination , I compute the distance from the rectangle rect_p to the others points
if distance < 0 I choose the points.
For example
library(geometry)
## I generate some data
set.seed(1234)
p.x <- sample(1:100,size=30,replace=T)
p.y <- sample(1:100,size=30,replace=T)
points <- cbind(p.x,p.y)
## the algortithm
ll <- combn(1:nrow(points),2,function(x){
x1<- p.x[x[1]]; y1 <- p.y[x[1]]
x2<- p.x[x[2]]; y2 <- p.y[x[2]]
p <- points[-x,]
d <- mesh.drectangle(p,x1,y1,x2,y2)
res <- NA
if(length(which(d <0))){
points.in = as.data.frame(p,ncol=2)[ d < 0 , ]
res <- list(n = nrow(points.in),
rect = list(x1=x1,x2=x2,y1=y1,y2=y2),
points.in = points.in)
}
res
},simplify=F)
ll <- ll[!is.na(ll)]
## the result
nn <- do.call(rbind,lapply(ll,'[[','n'))
To visualize the results, I plots rectangles with 5 points for example.
library(grid)
grid.newpage()
vp <- plotViewport(xscale = extendrange(p.x),
yscale = extendrange(p.y))
pushViewport(vp)
grid.xaxis()
grid.yaxis()
grid.points(x=points[,'p.x'],y=points[,'p.y'],pch='*')
cols <- rainbow(length(ll))
ll <- ll[nn == 5] ## here I plot only the rectangle with 5 points
lapply(seq_along(ll),function(i){
x <- ll[[i]]
col <- sample(cols,1)
x1<- x$rect$x1; x2<- x$rect$x2
y1<- x$rect$y1; y2<- x$rect$y2
grid.rect(x=(x1+x2)*.5,y=(y1+y2)*.5,
width= x2-x1,height = y2-y1,
default.units ='native',
gp=gpar(fill=col,col='red',alpha=0.2)
)
grid.points(x=x$points.in$p.x,y=x$points.in$p.y,pch=19,
gp=gpar(col=rep(col,x$n)))
}
)
upViewport()

Resources