How to plot nicely-spaced data labels? - r

Labeling data points in a plot can get unwieldy:
Randomly sampling few labels may disappoint:
What would be a nice way to pick a small set of nicely-spaced data labels? That is, to randomly pick representatives whose labels are not overlapping.
# demo data
set.seed(123)
N <- 50
x <- runif(N)
y <- x + rnorm(N, 0, x)
data <- data.frame(x, y, labels=state.name)
# plot with labels
plot(x,y)
text(x,y,labels)
# plot a few labels
frame()
few_labels <- data[sample(N, 10), ]
plot(x,y)
with(few_labels, text(x,y,labels))

One way to do is through clustering. Here is a solution with stats::hclust. We agglomerate the data points in cluster and then pick one random observation from each cluster.
few_labels <- function(df, coord=1:ncol(df),grp=5){
require(dplyr)
df$cl <- cutree(hclust(dist(df[,coord])),grp)
few_labels <- df %>% group_by(cl) %>%
do(sample_n(.,1))
return(few_labels)
}
# demo data
set.seed(123)
N <- 50
x <- runif(N)
y <- x + rnorm(N, 0, x)
data <- data.frame(x, y, labels=state.name)
# plot a few labels
frame()
few_labels <- few_labels(data,coord=1:2,grp=12)
plot(x,y)
with(few_labels, text(x,y,labels))

For all labels:
xlims=c(-1,2)
plot(x,y,xlim=xlims)
#text(x,y,data$labels,pos = 2,cex=0.7)
library(plotrix)
spread.labels(x,y,data$labels,cex=0.7,ony=NA)

Another way is to pick randomly a point, throw all proximate ones, and so on, until no point is left:
radius <- .1 # of a ball containing the largest label
d <- as.matrix(dist(data[, c("x","y")], upper=TRUE, diag=TRUE))
remaining <- 1:N
spaced <- numeric()
i <- 1
while(length(remaining)>0) {
p <- ifelse(length(remaining)>1, sample(remaining, 1), remaining)
spaced <- c(spaced, p) # ...
remaining <- setdiff(remaining, which(d[p, ] < 2*radius))
i <- i + 1
}
frame()
plot(x,y)
spaced_labels <- data[spaced, ]
with(spaced_labels, text(x,y,labels))

Related

Problem with theoretical density functions plotted to histograms in R

I did simulations in R and plotted the results in histograms. There is no problem adding probability density into the histograms with the density() function. But for some reason I get very strange results when I plot the theoretical densities to the histograms for comparison purposes. Here are two example codes and two pictures. The blue theoretical pdfs are way off and I do not know why. Could someone with R skills point out my mistakes?
# generating 10000 samples thrice from U(0,1) distribution
# and sorting them for the statistics:
n <- 10000
samples1 <- data.frame('0'=c(rep(NA,4)))
samples2 <- data.frame('0'=c(rep(NA,10)))
samples3 <- data.frame('0'=c(rep(NA,10)))
for (i in 1:n) {
new <- runif(4)
samples1[ , ncol(samples1) + 1] <- sort(new)
colnames(samples1)[ncol(samples1)] <- i
new <- runif(10)
samples2[ , ncol(samples2)+1] <- sort(new)
colnames(samples2)[ncol(samples2)] <- i
new <- runif(10)
samples3[ , ncol(samples3)+1] <- sort(new)
colnames(samples3)[ncol(samples3)] <- i
}
# dropping the first (useless) columns:
samples1 <- samples1[-c(1)]
samples2 <- samples2[-c(1)]
samples3 <- samples3[-c(1)]
# selecting the statistics from the samples:
# X_2:4
stat24 <- rep(NA,n)
for (i in 1:n) {
stat24[i] <- samples1[2,i]
}
# X_2:10
stat210 <- rep(NA,n)
for (i in 1:n) {
stat210[i] <- samples1[2,i]
}
# X_10:10
stat1010 <- rep(NA,n)
for (i in 1:n) {
stat1010[i] <- samples1[10,i]
}
# plotting the histograms and Beta pdfs:
hist(stat24, freq = FALSE)
lines(dbeta(stat24, 2, 5), col='blue')
lines(density(stat24), col='red')
Distribution of the kth statistic follows Beta(k, n+k-1) distribution which appears as the odd blue stroke on the right.
n <- 10000
random_variable_F <- rep(NA,n)
# generating 10000 samples of sizes 10 and 5 and computing F:
for (i in 1:n) {
x <- rnorm(10, mean = 10, sd = sqrt(5))
y <- rnorm(5, mean = 20, sd = sqrt(10))
random_variable_F[i] <- ((var(x))*5)/((var(y)*10))
}
#head(random_variable_F)
# plotting the histogram:
hist(random_variable_F, freq = F)
lines(density(random_variable_F), col='red')
lines(df(random_variable_F, 9, 4,), col='blue')
Random variable F follows F-distribution. Paramaters are the sample sizes minus one, in this case 10-1=9 and 5-1=4. the theoretical curve is quite wild:
If you pass a single vector to lines, it assumes that this is a vector of y values you want to plot. It plots the first y value at x = 1, the second y value at x = 2, etc, all the way up to x = length(y). In your case, random_variable_F is an unordered random variable, and you are just plotting its sequential values at 1:10000 along the x axis.
Clearly, you want the function y = df(x) to be plotted, so you need to pass random_variable_F as the x values and df(random_variable_F) as the y values. You will also need to sort random_variable_F first to ensure the line is plotted from left to right:
hist(random_variable_F, freq = F)
lines(density(random_variable_F), col='red')
lines(sort(random_variable_F), df(sort(random_variable_F), 9, 4), col='blue')
Note that this doesn't happen when you plot lines(density(random_variable_F)) because density produces a list containing ordered x and y valued rather than a vector.

How to find outer limits of contour lines for three-dimensional data in r

I would like to extend the example given here
How to plot a contour line showing where 95% of values fall within, in R and in ggplot2
to data with three dimensions (x, y and z), and instead of plotting the contour line I'd like to get the limits of the x, y and z values.
This is the example from the previous post.
library(ggplot2)
set.seed(1001)
d <- data.frame(x=rnorm(1000),y=rnorm(1000))
kd <- ks::kde(d, compute.cont=TRUE)
contour_95 <- with(kd, contourLines(x=eval.points[[1]], y=eval.points[[2]],
z=estimate, levels=cont["5%"])[[1]])
contour_95 <- data.frame(contour_95)
ggplot(data=d, aes(x, y)) +
geom_point() +
geom_path(aes(x, y), data=contour_95) +
theme_bw()
and then, it's possible to get the limits of the contour like this:
range(contour_95$x)
range(contour_95$y)
I would love to know how to get the x, y and z ranges of 3-D contours at specified percentiles.
ks:kde can deal with higher dimensions, but contourLines() cant.
This is what I've tried...
set.seed(1001)
d <- data.frame(x=rnorm(1000),y=rnorm(1000), y=rnorm(1000))
kd <- ks::kde(d, compute.cont=TRUE)
#what kd$estimates are > 95th percentile?
#make function that can extract from 3d array
multi.which <- function(A){
if ( is.vector(A) ) return(which(A))
d <- dim(A)
T <- which(A) - 1
nd <- length(d)
t( sapply(T, function(t){
I <- integer(nd)
I[1] <- t %% d[1]
sapply(2:nd, function(j){
I[j] <<- (t %/% prod(d[1:(j-1)])) %% d[j]
})
I
}) + 1 )
}
#extract those estimates that have >density than 95th percentile
ests <- multi.which(kd$estimate > kd$cont["5%"])
#make into a long dataframe with column number in the second column and row number in first column
col1=rep(1, nrow(ests))
col2=rep(2, nrow(ests))
col3=rep(3, nrow(ests))
rows=c(ests[,1], ests[,2], ests[,3])
cols=c(col1,col2,col3)
index=cbind(rows,cols)#this is the index so we can extract the coordinates in multi-D space
car::some(index)
#get coordinates with this function
fExtract <- function(dat, indexDat){
dat[as.matrix(indexDat)]
}
#pull three coordinates (x,y,z) from eval.points into 3 columns
eval.pts <- cbind(kd$eval.points[[1]], kd$eval.points[[2]], kd$eval.points[[3]])
v <- fExtract(eval.pts, index) #one long vector
#re-create the three columns of x, y and z coordinates of points at higher density than 95th percentile
x1 <- v[1:nrow(ests)]
y1 <- v[(nrow(ests)+1):(2*nrow(ests))]
z1 <- v[(2*nrow(ests)+1):(3*nrow(ests))]
#the three coordinates.
fin <- cbind(x1,y1,z1)
#get range of each dimension
range(x1)
range(y1)
range(z1)
But I'm not confident it's right.

R plot heatmap of matrix with superposed line

I have a matrix whose values I want to plot. Using the image function it looks like this.
How can I plot a line over the image?
(In my case, it want to plot a line over the maximum values along the x axes)
Edit
The image and the line I want to plot over are the output from the Bayesian Online Changepoint detection. Since it is quite short I'll share the whole code. The plotting part is at the end:
# Bayesian Online Changepoint Detection
# Adams, MacKay 2007
# http://hips.seas.harvard.edu/content/bayesian-online-changepoint-detection
#######################################
# Other python and matlab implementations
# https://github.com/JackKelly/bayesianchangepoint
# http://hips.seas.harvard.edu/content/bayesian-online-changepoint-detection
# http://www.inference.phy.cam.ac.uk/rpa23/cp/gaussdemo.m
# http://www.inference.phy.cam.ac.uk/rpa23/cp/studentpdf.m
# http://www.inference.phy.cam.ac.uk/rpa23/cp/constant_hazard.m
# Even more commented, but different paper:
# https://github.com/davyfeng/ipdata/blob/master/csv/bocpd/core/bocpd.m
# Generate data
x1 <- rnorm(100, 10.5, 0.1)
x2 <- rnorm(100, 1, 0.1)
x3 <- rnorm(100, -10, 0.1)
x4 <- rnorm(100, -1, 0.1)
x5 <- rnorm(100, 5, 0.1)
x6 <- rnorm(30, 1, 0.1)
x7 <- rnorm(100, 8, 0.1)
x <- c(x1,x2,x3,x4,x5, x6,x7)
##############
# Algorithm
##############
# Prepare the scaled and shifted student-t
dt.scaled.shifted <- function(x, m, s, df) stats::dt((x-m)/s, df)/s
# Prepare the Hazard function
hazard <-function(x, lambda=200){rep(1/lambda, length(x))}
L <- length(x)
R <- matrix(rep(0,(L+1)*(L+1)), L+1, L+1)
R[1,1] <- 1 # for t=1 where are sure that p(r=1)=1
mu0 <- 0; kappa0 <- 1; alpha0 <-1; beta0 <- 1;
muT <- mu0
kappaT <- kappa0
alphaT <- alpha0
betaT <- beta0
maxes <- rep(0, L)
# Process data as they come in
for(t in 1:L){
# Evaluate predictive probability
predprobs <- dt.scaled.shifted(x[t], muT, betaT*(kappaT+1)/(alphaT*-kappaT), 2*alphaT)
H <- hazard(x[1:t])
# Calculate growth probabilities
R[2:(t+1), t+1] <- R[1:t,t]*predprobs*(1-H)
# Calculate changepoint (reset) probabilities
R[1,t+1] <- sum(R[1:t,t]*predprobs*H)
# Renormalize
R[,t+1] <- R[,t+1] / sum(R[,t+1])
# Update parameters for each possible run length
# keep the past ones since they will be used iteratively
muT0 <- c(mu0, (kappaT*muT + x[t])/(kappaT+1))
kappaT0 <- c(kappa0,kappaT+1)
alphaT0 <- c(alpha0, alphaT + 0.5)
betaT0 <- c(beta0, kappaT + (kappaT * (x[t]-muT)^2)/(2*(kappaT+1)))
muT <- muT0
kappaT <- kappaT0
alphaT <- alphaT0
betaT <- betaT0
# Store the maximum, to plot later
maxes[t] <- which.max(R[,t])
}
# Plot results
par(mfrow=c(2,1))
plot(x, type='l')
image((-t(log(R))), col = grey(seq(0,1,length=256)), axes=T)
par(new=T)
plot(1:(dim(R)[1]-1), maxes,type='l', col="red")
On the top there is the original data. On the bottom, the probability of a current run to have length y. The red line on the bottom should fit the dark shades.
(to be deleted. It does not work. I leave it temporaly to save the comments.)
I got it, I thought I had already tried par(new=T) but obviously I hadn't:
m <- matrix(rnorm(100,1,1),50,50)
image(m, col = grey(seq(0,1,length=256)))
par(new=T)
plot(seq(0,1, length=50), type='l', col="red", lwd=5)
Quick example simulating the whole process:
data <- vector()
for(i in 1:50){
data <- rbind(data, dpois(1:50, i^1.2))
}
maxes <- apply(data, 1, which.max)
image(-data, col = grey(seq(0,1,length=256)))
par(new=T)
plot(1:dim(data)[1], c(maxes),type='l')

R: Draw a polygon with conditional colour

I want to colour the area under a curve. The area with y > 0 should be red, the area with y < 0 should be green.
x <- c(1:4)
y <- c(0,1,-1,2,rep(0,4))
plot(y[1:4],type="l")
abline(h=0)
Using ifelse() does not work:
polygon(c(x,rev(x)),y,col=ifelse(y>0,"red","green"))
What I achieved so far is the following:
polygon(c(x,rev(x)),y,col="green")
polygon(c(x,rev(x)),ifelse(y>0,y,0),col="red")
But then the red area is too large. Do you have any ideas how to get the desired result?
If you want two different colors, you need two different polygons. You can either call polygon multiple times, or you can add NA values in your x and y vectors to indicate a new polygon. R will not automatically calculate the intersection for you. You must do that yourself. Here's how you could draw that with different colors.
x <- c(1,2,2.5,NA,2.5,3,4)
y <- c(0,1,0,NA,0,-1,0)
#calculate color based on most extreme y value
g <- cumsum(is.na(x))
gc <- ifelse(tapply(y, g,
function(x) x[which.max(abs(x))])>0,
"red","green")
plot(c(1, 4),c(-1,1), type = "n")
polygon(x, y, col = gc)
abline(h=0)
In the more general case, it might not be as easy to split a polygon into different regions. There seems to be some support for this type of operation in GIS packages, where this type of thing is more common. However, I've put together a somewhat general case that may work for simple polygons.
First, I define a closure that will define a cutting line. The function will take a slope and y-intercept for a line and will return the functions we need to cut a polygon.
getSplitLine <- function(m=1, b=0) {
force(m); force(b)
classify <- function(x,y) {
y >= m*x + b
}
intercepts <- function(x,y, class=classify(x,y)) {
w <- which(diff(class)!=0)
m2 <- (y[w+1]-y[w])/(x[w+1]-x[w])
b2 <- y[w] - m2*x[w]
ix <- (b2-b)/(m-m2)
iy <- ix*m + b
data.frame(x=ix,y=iy,idx=w+.5, dir=((rank(ix, ties="first")+1) %/% 2) %% 2 +1)
}
plot <- function(...) {
abline(b,m,...)
}
list(
intercepts=intercepts,
classify=classify,
plot=plot
)
}
Now we will define a function to actually split a polygon using the splitter we've just defined.
splitPolygon <- function(x, y, splitter) {
addnullrow <- function(x) if (!all(is.na(x[nrow(x),]))) rbind(x, NA) else x
rollup <- function(x,i=1) rbind(x[(i+1):nrow(x),], x[1:i,])
idx <- cumsum(is.na(x) | is.na(y))
polys <- split(data.frame(x=x,y=y)[!is.na(x),], idx[!is.na(x)])
r <- lapply(polys, function(P) {
x <- P$x; y<-P$y
side <- splitter$classify(x, y)
if(side[1] != side[length(side)]) {
ints <- splitter$intercepts(c(x,x[1]), c(y, y[1]), c(side, side[1]))
} else {
ints <- splitter$intercepts(x, y, side)
}
sideps <- lapply(unique(side), function(ss) {
pts <- data.frame(x=x[side==ss], y=y[side==ss],
idx=seq_along(x)[side==ss], dir=0)
mm <- rbind(pts, ints)
mm <- mm[order(mm$idx), ]
br <- cumsum(mm$dir!=0 & c(0,head(mm$dir,-1))!=0 &
c(0,diff(mm$idx))>1)
if (length(unique(br))>1) {
mm<-rollup(mm, sum(br==br[1]))
}
br <- cumsum(c(FALSE,abs(diff(mm$dir*mm$dir))==3))
do.call(rbind, lapply(split(mm, br), addnullrow))
})
pss<-rep(unique(side), sapply(sideps, nrow))
ps<-do.call(rbind, lapply(sideps, addnullrow))[,c("x","y")]
attr(ps, "side")<-pss
ps
})
pss<-unname(unlist(lapply(r, attr, "side")))
src <- rep(seq_along(r), sapply(r, nrow))
r <- do.call(rbind, r)
attr(r, "source")<-src
attr(r, "side")<-pss
r
}
The input is just the values of x and y as you would pass to polygon along with the cutter. It will return a data.frame with x and y values that can be used with polygon.
For example
x <- c(1,2,2.5,NA,2.5,3,4)
y <- c(1,-2,2,NA,-1,2,-2)
sl<-getSplitLine(0,0)
plot(range(x, na.rm=T),range(y, na.rm=T), type = "n")
p <- splitPolygon(x,y,sl)
g <- cumsum(c(F, is.na(head(p$y,-1))))
gc <- ifelse(attr(p,"side")[is.na(p$y)],
"red","green")
polygon(p, col=gc)
sl$plot(lty=2, col="grey")
This should work for simple concave polygons as well with sloped lines. Here's another example
x <- c(1,2,3,4,5,4,3,2)
y <- c(-2,2,1,2,-2,.5,-.5,.5)
sl<-getSplitLine(.5,-1.25)
plot(range(x, na.rm=T),range(y, na.rm=T), type = "n")
p <- splitPolygon(x,y,sl)
g <- cumsum(c(F, is.na(head(p$y,-1))))
gc <- ifelse(attr(p,"side")[is.na(p$y)],
"red","green")
polygon(p, col=gc)
sl$plot(lty=2, col="grey")
Right now things can get a bit messy when the the vertex of the polygon falls directly on the splitting line. I may try to correct that in the future.
A faster, but not very accurate solution is to split data frame to list according to grouping variable (e.g. above=red and below=blue). This is a pretty nice workaround for rather big (I would say > 100 elements) datasets. For smaller chunks some discontinuity may be visible:
x <- 1:100
y1 <- sin(1:100/10)*0.8
y2 <- sin(1:100/10)*1.2
plot(x, y2, type='l')
lines(x, y1, col='red')
df <- data.frame(x=x, y1=y1, y2=y2)
df$pos_neg <- ifelse(df$y2-df$y1>0,1,-1) # above (1) or below (-1) average
# create the number for chunks to be split into lists:
df$chunk <- c(1,cumsum(abs(diff(df$pos_neg)))/2+1) # first element needs to be added`
df$colors <- ifelse(df$pos_neg>0, "red","blue") # colors to be used for filling the polygons
# create lists to be plotted:
l <- split(df, df$chunk) # we should get 4 sub-lists
lapply(l, function(x) polygon(c(x$x,rev(x$x)),c(x$y2,rev(x$y1)),col=x$colors))
As I said, for smaller dataset some discontinuity may be visible if sharp changes occur between positive and negative areas, but if horizontal line distinguishes between those two, or more elements are plotted then this effect is neglected:

How does one turn contour lines into filled contours?

Does anyone know of a way to turn the output of contourLines polygons in order to plot as filled contours, as with filled.contours. Is there an order to how the polygons must then be plotted in order to see all available levels? Here is an example snippet of code that doesn't work:
#typical plot
filled.contour(volcano, color.palette = terrain.colors)
#try
cont <- contourLines(volcano)
fun <- function(x) x$level
LEVS <- sort(unique(unlist(lapply(cont, fun))))
COLS <- terrain.colors(length(LEVS))
contour(volcano)
for(i in seq(cont)){
COLNUM <- match(cont[[i]]$level, LEVS)
polygon(cont[[i]], col=COLS[COLNUM], border="NA")
}
contour(volcano, add=TRUE)
A solution that uses the raster package (which calls rgeos and sp). The output is a SpatialPolygonsDataFrame that will cover every value in your grid:
library('raster')
rr <- raster(t(volcano))
rc <- cut(rr, breaks= 10)
pols <- rasterToPolygons(rc, dissolve=T)
spplot(pols)
Here's a discussion that will show you how to simplify ('prettify') the resulting polygons.
Thanks to some inspiration from this site, I worked up a function to convert contour lines to filled contours. It's set-up to process a raster object and return a SpatialPolygonsDataFrame.
raster2contourPolys <- function(r, levels = NULL) {
## set-up levels
levels <- sort(levels)
plevels <- c(min(values(r), na.rm=TRUE), levels, max(values(r), na.rm=TRUE)) # pad with raster range
llevels <- paste(plevels[-length(plevels)], plevels[-1], sep=" - ")
llevels[1] <- paste("<", min(levels))
llevels[length(llevels)] <- paste(">", max(levels))
## convert raster object to matrix so it can be fed into contourLines
xmin <- extent(r)#xmin
xmax <- extent(r)#xmax
ymin <- extent(r)#ymin
ymax <- extent(r)#ymax
rx <- seq(xmin, xmax, length.out=ncol(r))
ry <- seq(ymin, ymax, length.out=nrow(r))
rz <- t(as.matrix(r))
rz <- rz[,ncol(rz):1] # reshape
## get contour lines and convert to SpatialLinesDataFrame
cat("Converting to contour lines...\n")
cl <- contourLines(rx,ry,rz,levels=levels)
cl <- ContourLines2SLDF(cl)
## extract coordinates to generate overall boundary polygon
xy <- coordinates(r)[which(!is.na(values(r))),]
i <- chull(xy)
b <- xy[c(i,i[1]),]
b <- SpatialPolygons(list(Polygons(list(Polygon(b, hole = FALSE)), "1")))
## add buffer around lines and cut boundary polygon
cat("Converting contour lines to polygons...\n")
bcl <- gBuffer(cl, width = 0.0001) # add small buffer so it cuts bounding poly
cp <- gDifference(b, bcl)
## restructure and make polygon number the ID
polys <- list()
for(j in seq_along(cp#polygons[[1]]#Polygons)) {
polys[[j]] <- Polygons(list(cp#polygons[[1]]#Polygons[[j]]),j)
}
cp <- SpatialPolygons(polys)
cp <- SpatialPolygonsDataFrame(cp, data.frame(id=seq_along(cp)))
## cut the raster by levels
rc <- cut(r, breaks=plevels)
## loop through each polygon, create internal buffer, select points and define overlap with raster
cat("Adding attributes to polygons...\n")
l <- character(length(cp))
for(j in seq_along(cp)) {
p <- cp[cp$id==j,]
bp <- gBuffer(p, width = -max(res(r))) # use a negative buffer to obtain internal points
if(!is.null(bp)) {
xy <- SpatialPoints(coordinates(bp#polygons[[1]]#Polygons[[1]]))[1]
l[j] <- llevels[extract(rc,xy)]
}
else {
xy <- coordinates(gCentroid(p)) # buffer will not be calculated for smaller polygons, so grab centroid
l[j] <- llevels[extract(rc,xy)]
}
}
## assign level to each polygon
cp$level <- factor(l, levels=llevels)
cp$min <- plevels[-length(plevels)][cp$level]
cp$max <- plevels[-1][cp$level]
cp <- cp[!is.na(cp$level),] # discard small polygons that did not capture a raster point
df <- unique(cp#data[,c("level","min","max")]) # to be used after holes are defined
df <- df[order(df$min),]
row.names(df) <- df$level
llevels <- df$level
## define depressions in higher levels (ie holes)
cat("Defining holes...\n")
spolys <- list()
p <- cp[cp$level==llevels[1],] # add deepest layer
p <- gUnaryUnion(p)
spolys[[1]] <- Polygons(p#polygons[[1]]#Polygons, ID=llevels[1])
for(i in seq(length(llevels)-1)) {
p1 <- cp[cp$level==llevels[i+1],] # upper layer
p2 <- cp[cp$level==llevels[i],] # lower layer
x <- numeric(length(p2)) # grab one point from each of the deeper polygons
y <- numeric(length(p2))
id <- numeric(length(p2))
for(j in seq_along(p2)) {
xy <- coordinates(p2#polygons[[j]]#Polygons[[1]])[1,]
x[j] <- xy[1]; y[j] <- xy[2]
id[j] <- as.numeric(p2#polygons[[j]]#ID)
}
xy <- SpatialPointsDataFrame(cbind(x,y), data.frame(id=id))
holes <- over(xy, p1)$id
holes <- xy$id[which(!is.na(holes))]
if(length(holes)>0) {
p2 <- p2[p2$id %in% holes,] # keep the polygons over the shallower polygon
p1 <- gUnaryUnion(p1) # simplify each group of polygons
p2 <- gUnaryUnion(p2)
p <- gDifference(p1, p2) # cut holes in p1
} else { p <- gUnaryUnion(p1) }
spolys[[i+1]] <- Polygons(p#polygons[[1]]#Polygons, ID=llevels[i+1]) # add level
}
cp <- SpatialPolygons(spolys, pO=seq_along(llevels), proj4string=CRS(proj4string(r))) # compile into final object
cp <- SpatialPolygonsDataFrame(cp, df)
cat("Done!")
cp
}
It probably holds several inefficiencies, but it has worked well in the tests I've conducted using bathymetry data. Here's an example using the volcano data:
r <- raster(t(volcano))
l <- seq(100,200,by=10)
cp <- raster2contourPolys(r, levels=l)
cols <- terrain.colors(length(cp))
plot(cp, col=cols, border=cols, axes=TRUE, xaxs="i", yaxs="i")
contour(r, levels=l, add=TRUE)
box()
Building on the excellent work of Paul Regular, here is a version that should ensure exclusive polygons (i.e. no overlapping).
I've added a new argument fd for fairy dust to address an issue I discovered working with UTM-type coordinates. Basically as I understand the algorithm works by sampling lateral points from the contour lines to determine which side is inside the polygon. The distance of the sample point from the line can create problems if it ends up in e.g. behind another contour. So if your resulting polygons looks wrong try setting fd to values 10^±n until it looks very wrong or about right..
raster2contourPolys <- function(r, levels = NULL, fd = 1) {
## set-up levels
levels <- sort(levels)
plevels <- c(min(values(r)-1, na.rm=TRUE), levels, max(values(r)+1, na.rm=TRUE)) # pad with raster range
llevels <- paste(plevels[-length(plevels)], plevels[-1], sep=" - ")
llevels[1] <- paste("<", min(levels))
llevels[length(llevels)] <- paste(">", max(levels))
## convert raster object to matrix so it can be fed into contourLines
xmin <- extent(r)#xmin
xmax <- extent(r)#xmax
ymin <- extent(r)#ymin
ymax <- extent(r)#ymax
rx <- seq(xmin, xmax, length.out=ncol(r))
ry <- seq(ymin, ymax, length.out=nrow(r))
rz <- t(as.matrix(r))
rz <- rz[,ncol(rz):1] # reshape
## get contour lines and convert to SpatialLinesDataFrame
cat("Converting to contour lines...\n")
cl0 <- contourLines(rx, ry, rz, levels = levels)
cl <- ContourLines2SLDF(cl0)
## extract coordinates to generate overall boundary polygon
xy <- coordinates(r)[which(!is.na(values(r))),]
i <- chull(xy)
b <- xy[c(i,i[1]),]
b <- SpatialPolygons(list(Polygons(list(Polygon(b, hole = FALSE)), "1")))
## add buffer around lines and cut boundary polygon
cat("Converting contour lines to polygons...\n")
bcl <- gBuffer(cl, width = fd*diff(bbox(r)[1,])/3600000) # add small buffer so it cuts bounding poly
cp <- gDifference(b, bcl)
## restructure and make polygon number the ID
polys <- list()
for(j in seq_along(cp#polygons[[1]]#Polygons)) {
polys[[j]] <- Polygons(list(cp#polygons[[1]]#Polygons[[j]]),j)
}
cp <- SpatialPolygons(polys)
cp <- SpatialPolygonsDataFrame(cp, data.frame(id=seq_along(cp)))
# group by elev (replicate ids)
# ids = sapply(slot(cl, "lines"), slot, "ID")
# lens = sapply(1:length(cl), function(i) length(cl[i,]#lines[[1]]#Lines))
## cut the raster by levels
rc <- cut(r, breaks=plevels)
## loop through each polygon, create internal buffer, select points and define overlap with raster
cat("Adding attributes to polygons...\n")
l <- character(length(cp))
for(j in seq_along(cp)) {
p <- cp[cp$id==j,]
bp <- gBuffer(p, width = -max(res(r))) # use a negative buffer to obtain internal points
if(!is.null(bp)) {
xy <- SpatialPoints(coordinates(bp#polygons[[1]]#Polygons[[1]]))[1]
l[j] <- llevels[raster::extract(rc,xy)]
}
else {
xy <- coordinates(gCentroid(p)) # buffer will not be calculated for smaller polygons, so grab centroid
l[j] <- llevels[raster::extract(rc,xy)]
}
}
## assign level to each polygon
cp$level <- factor(l, levels=llevels)
cp$min <- plevels[-length(plevels)][cp$level]
cp$max <- plevels[-1][cp$level]
cp <- cp[!is.na(cp$level),] # discard small polygons that did not capture a raster point
df <- unique(cp#data[,c("level","min","max")]) # to be used after holes are defined
df <- df[order(df$min),]
row.names(df) <- df$level
llevels <- df$level
## define depressions in higher levels (ie holes)
cat("Defining holes...\n")
spolys <- list()
p <- cp[cp$level==llevels[1],] # add deepest layer
p <- gUnaryUnion(p)
spolys[[1]] <- Polygons(p#polygons[[1]]#Polygons, ID=llevels[1])
for(i in seq(length(llevels)-1)) {
p1 <- cp[cp$level==llevels[i+1],] # upper layer
p2 <- cp[cp$level==llevels[i],] # lower layer
x <- numeric(length(p2)) # grab one point from each of the deeper polygons
y <- numeric(length(p2))
id <- numeric(length(p2))
for(j in seq_along(p2)) {
xy <- coordinates(p2#polygons[[j]]#Polygons[[1]])[1,]
x[j] <- xy[1]; y[j] <- xy[2]
id[j] <- as.numeric(p2#polygons[[j]]#ID)
}
xy <- SpatialPointsDataFrame(cbind(x,y), data.frame(id=id))
holes <- over(xy, p1)$id
holes <- xy$id[which(!is.na(holes))]
if(length(holes)>0) {
p2 <- p2[p2$id %in% holes,] # keep the polygons over the shallower polygon
p1 <- gUnaryUnion(p1) # simplify each group of polygons
p2 <- gUnaryUnion(p2)
p <- gDifference(p1, p2) # cut holes in p1
} else { p <- gUnaryUnion(p1) }
spolys[[i+1]] <- Polygons(p#polygons[[1]]#Polygons, ID=llevels[i+1]) # add level
}
cp <- SpatialPolygons(spolys, pO=seq_along(llevels), proj4string=CRS(proj4string(r))) # compile into final object
## make polygons exclusive (i.e. no overlapping)
cpx = gDifference(cp[1,], cp[2,], id=cp[1,]#polygons[[1]]#ID)
for(i in 2:(length(cp)-1)) cpx = spRbind(cpx, gDifference(cp[i,], cp[i+1,], id=cp[i,]#polygons[[1]]#ID))
cp = spRbind(cpx, cp[length(cp),])
## it's a wrap
cp <- SpatialPolygonsDataFrame(cp, df)
cat("Done!")
cp
}

Resources