I've seen a figure in a paper (Perales & Mas, 2007; Plant Cell) and I'm interested in making a similar graph with my data in R.
I have some circadian gene expression data and I need to represent which is the phase (the maximum peak of expression of a certain gene) of some genes. The graph I'm refering to is like a clock in which you can see at what time a gene has its maximum peak of expression.
(C) Phase plot of TOC1:LUC and CAB2:LUC expression in wild-type and TMG plants under the indicated photoperiods. Phases (phase/period × 24 h) were plotted against the strength of the rhythm expressed as relative amplitude error. The rhythm strength is graphed from 0 (center of the plot) to 0.8 (periphery of the circle), which indicates robust and very weak rhythms, respectively.
## generate data
set.seed(1);
gen <- data.frame(gene=c(rep('TOC1',3),rep('CAB2',3)), plant=c(rep(NA,3),'WT','WT','TMG'), photoperiod=c('8:16','12:12','16:8','8:16','16:8','8:16'), hourmean=c(11,13.5,15,4,6.5,6.5), hoursd=c(0.25,0.25,0.25,0.4,0.15,0.4), strengthmean=c(0.25,0.2,0.25,0.32,0.35,0.4), strengthsd=c(0.035,0.03,0.035,0.02,0.03,0.02), num=c(20,20,20,5,10,10), stringsAsFactors=F );
df <- cbind(as.data.frame(lapply(gen[,c('gene','plant','photoperiod')],rep,gen$num)),hour=rnorm(sum(gen$num),rep(gen$hourmean,gen$num),rep(gen$hoursd,gen$num)),strength=rnorm(sum(gen$num),rep(gen$strengthmean,gen$num),rep(gen$strengthsd,gen$num)));
tau <- 2*pi;
## define point specifications per group
ptspec <- data.frame(gene=c('TOC1','TOC1','TOC1','CAB2','CAB2','CAB2'), plant=c(NA,NA,NA,'WT','WT','TMG'), photoperiod=c('8:16','12:12','16:8','8:16','16:8','8:16'), pch=c(22,22,22,21,21,24), col=c('black','red','blue','black','blue','red'), bg=c('white','white','white','black','blue','white'), cex=1.8, lwd=3, stringsAsFactors=F );
## define virtual plot margins and overall plot region
A <- 24;
R <- 0.8;
imar <- 0.25;
bmar <- 0.4;
xlim <- c(-R,R)*(1+imar);
ylim <- c(-R*(1+imar+bmar),R*(1+imar));
## define angular and radial tick parameters
atick <- seq(0,A,3)[-A/3-1];
rtick <- seq(0,R,0.2);
atickLen <- R/50;
atickLabelDist <- atickLen*6;
## plotting helper functions
circles <- function(x,y,r,n=1000,col,lty,lwd,...) {
comb <- cbind(x,y,r);
angles <- tau*0:n/n;
if (!missing(col) && !is.null(col)) col <- rep(col,len=nrow(comb));
if (!missing(lty) && !is.null(lty)) lty <- rep(lty,len=nrow(comb));
if (!missing(lwd) && !is.null(lwd)) lwd <- rep(lwd,len=nrow(comb));
for (i in 1:nrow(comb)) {
args <- list(
comb[i,'x']+comb[i,'r']*cos(angles),
comb[i,'y']+comb[i,'r']*sin(angles)
);
if (!missing(col)) if (is.null(col)) args['col'] <- list(NULL) else args$col <- col[i];
if (!missing(lty)) if (is.null(lty)) args['lty'] <- list(NULL) else args$lty <- lty[i];
if (!missing(lwd)) if (is.null(lwd)) args['lwd'] <- list(NULL) else args$lwd <- lwd[i];
do.call(lines, c(args,...) );
}; ## end for
}; ## end circles()
radials <- function(x,y,a,r,...) {
comb <- cbind(x,y,a,r);
segments(comb[,'x'],comb[,'y'],comb[,'x']+comb[,'r']*cos(comb[,'a']),comb[,'y']+comb[,'r']*sin(comb[,'a']),...);
}; ## end radials()
## main plot
par(mar=c(1,1,1,1)+0.1,xaxs='i',yaxs='i');
plot(NA,xlim=xlim,ylim=ylim,axes=F,xlab='',ylab='');
circles(0,0,rtick,col='#aaaaaa',lty=3);
circles(0,0,R,lwd=2);
radials(0,0,tau*atick/A,R,col='#aaaaaa');
radials(R*cos(tau*atick/A),R*sin(tau*atick/A),tau*atick/A,atickLen,lwd=2);
text((R+atickLabelDist)*cos(tau*atick/A),(R+atickLabelDist)*sin(tau*atick/A),(A-atick+6)%%A,family='sans',font=2,cex=2);
with(merge(df,ptspec)[nrow(df):1,],points(strength*cos(tau*(A-hour+6)%%A/A),strength*sin(tau*(A-hour+6)%%A/A),pch=pch,col=col,bg=bg,cex=cex,lwd=lwd));
## common legend precomputations
legendTopSpace <- R/10;
legendBotSpace <- R/10;
legendDivCut <- R/20;
legendTop <- -R-legendTopSpace;
legendBot <- ylim[1]+legendBotSpace;
legendDivTop <- legendTop-legendDivCut;
legendDivBot <- legendBot+legendDivCut;
legendDivLeftSpace <- R/20;
legendDivRightSpace <- R/10;
legendPtSpace <- R/15;
## legend 1
legend1Gene <- 'TOC1';
legend1PtSpec <- subset(ptspec,gene==legend1Gene);
legend1PtSpec <- legend1PtSpec[nrow(legend1PtSpec):1,];
legend1DivX <- -R+2/5*R;
segments(legend1DivX,legendDivBot,legend1DivX,legendDivTop,lwd=3);
text(legend1DivX-legendDivLeftSpace,(legendTop+legendBot)/2,legend1Gene,c(1,NA),family='sans',font=2,cex=1.5);
legend1PtX <- legend1DivX+legendDivRightSpace;
legend1PtYSpace <- (legendTop-legendBot)/(nrow(legend1PtSpec)+1);
legend1PtY <- seq(legendBot+legend1PtYSpace,legendTop-legend1PtYSpace,len=nrow(legend1PtSpec));
with(legend1PtSpec,points(rep(legend1PtX,nrow(legend1PtSpec)),legend1PtY,pch=pch,col=col,bg=bg,cex=cex,lwd=lwd));
legend1LabelX <- legend1PtX+legendPtSpace;
text(rep(legend1LabelX,nrow(legend1PtSpec)),legend1PtY,with(legend1PtSpec,ifelse(is.na(plant),photoperiod,paste(plant,photoperiod))),c(0,NA),family='sans',font=2,cex=1.5);
## legend 2
legend2Gene <- 'CAB2';
legend2PtSpec <- subset(ptspec,gene==legend2Gene);
legend2PtSpec <- legend2PtSpec[nrow(legend2PtSpec):1,];
legend2DivX <- 2/5*R;
segments(legend2DivX,legendDivBot,legend2DivX,legendDivTop,lwd=3);
text(legend2DivX-legendDivLeftSpace,(legendTop+legendBot)/2,legend2Gene,c(1,NA),family='sans',font=2,cex=1.5);
legend2PtX <- legend2DivX+legendDivRightSpace;
legend2PtYSpace <- (legendTop-legendBot)/(nrow(legend2PtSpec)+1);
legend2PtY <- seq(legendBot+legend2PtYSpace,legendTop-legend2PtYSpace,len=nrow(legend2PtSpec));
with(legend2PtSpec,points(rep(legend2PtX,nrow(legend2PtSpec)),legend2PtY,pch=pch,col=col,bg=bg,cex=cex,lwd=lwd));
legend2LabelX <- legend2PtX+legendPtSpace;
text(rep(legend2LabelX,nrow(legend2PtSpec)),legend2PtY,with(legend2PtSpec,ifelse(is.na(plant),photoperiod,paste(plant,photoperiod))),c(0,NA),family='sans',font=2,cex=1.5);
Related
I have run a short simulation and want to plot the outcomes of each simulation in terms of the "running sum" over parameter k. For reference, I want to end up with a plot that looks similar to the ones in this article:
https://www.pinnacle.com/en/betting-articles/Betting-Strategy/betting-bankroll-management/VDM2GY6UX3B552BG
The following is the code for the simulation:
## Simulating returns over k bets.
odds <- 1.5
k <- 100
return <- odds - 1
edge <- 0.04
pw <- 1/(odds/(1-edge))
pl <- 1-pw
nsims <- 10000
set.seed(42)
sims <- replicate(nsims, {
x <- sample(c(-1,return), k, TRUE, prob=c(pl, pw))
})
rownames(sims) <- c(1:k)
colnames(sims) <- c(1:nsims)
If I wasn't being clear in the description let me know.
Okay so here is how you can achieve the plot of the cumulative value over bets (I set nsims <- 10 so that the plot is readable).
First I generate the data :
## Simulating returns over k bets.
odds <- 1.5
k <- 100
return <- odds - 1
edge <- 0.04
pw <- 1/(odds/(1-edge))
pl <- 1-pw
nsims <- 10
set.seed(42)
sims <- replicate(nsims, {
x <- sample(c(-1,return), k, TRUE, prob=c(pl, pw))
})
rownames(sims) <- c(1:k)
colnames(sims) <- c(1:nsims)
Then I create a dataframe containing the results of the n simulations (10 here) :
df <- as.data.frame(sims)
What we want to plot is the cumulative sum, not the result at a specific bet so we iterate through the columns (i.e. the simulations) to have that value :
for (i in colnames(df)){
df[[i]] <- cumsum(df[[i]])
}
df <- mutate(df, bets = rownames(df))
output <- melt(df, id.vars = "bets", variable.name = 'simulation')
Now we can plot our data :
ggplot(output, aes(bets,value,group=simulation)) + geom_line(aes(colour = simulation))
I'm trying to follow the code and steps described on THIS page.
Which is in two parts:
Part 1
library(foreach)
library(doParallel)
library(data.table)
library(raster)
# Time the code
start <- proc.time()
if (!file.exists("./DataSets")) {
dir.create("./DataSets")
}
# Data Source:
# http://sedac.ciesin.columbia.edu/data/set/gpw-v3-population-count/data-download
# Format: .ascii, 1/2 degree, 2000
population.file <- "./Canada/VoteDensityRaster64Bit.tif"
# Load the raster file
population.raster <- raster(population.file)
# Convert the raster file to a points file
population.points <- rasterToPoints(population.raster)
all.data <- as.data.table(population.points)
setnames(all.data, c("x", "y", "population"))
# If you have your data in a CSV file, use this instead
# file <- "./DataSets/NBBuildingsWGS84.csv"
# all.data <- data.table(fread(file))
# The following are used to manipulate various data sets
# colnames(all.data) <- c("Name", "Mass", "Latitude", "Longitude") # Meteorites
# all.data$X <- as.numeric(all.data$X)
# all.data$Y <- as.numeric(all.data$Y)
# all.data$Mass <- as.numeric(all.data$Mass)
startEnd <- function(lats, lngs) {
# Find the "upper left" (NW) and "bottom right" (SE) coordinates
# of a set of data.
#
# Args:
# lats: A list of latitude coordinates
# lngs: A list of longitude coordinates
#
# Returns:
# A list of values corresponding to the northwest-most and
# southeast-most coordinates
# Convert to real number and remove NA values
lats <- na.omit(as.numeric(lats))
lngs <- na.omit(as.numeric(lngs))
topLat <- max(lats)
topLng <- min(lngs)
botLat <- min(lats)
botLng <- max(lngs)
return(c(topLat, topLng, botLat, botLng))
}
startEndVals <- startEnd(all.data$y, all.data$x)
remove(startEnd)
startLat <- startEndVals[1]
endLat <- startEndVals[3]
startLng <- startEndVals[2]
endLng <- startEndVals[4]
remove(startEndVals)
interval.v.num = 200.0
interval.h.num = 800.0
interval.v <- (startLat - endLat) / interval.v.num
interval.h <- (endLng - startLng) / interval.h.num
remove(num_intervals)
lat.list <- seq(startLat, endLat + interval.v, -1*interval.v)
# testLng <- -66.66152983 # Fredericton
# testLat <- 45.96538183 # Fredericton
# Prepare the data to be sent in
# If you have a value you want to sum, use this
data <- all.data[,list(x, y, population)]
# If you want to perform a count, use this
# data <- all.data[,list(x, y)]
# data[,Value:=1]
sumInsideSquare <- function(pointLat, pointLng, data) {
# Sum all the values that fall within a square on a map given a point,
# an interval of the map, and data that contains lat, lng and the values
# of interest
setnames(data, c("lng", "lat", "value"))
# Get data inside lat/lon boundaries
lng.interval <- c(pointLng, pointLng + interval.h)
lat.interval <- c(pointLat - interval.v, pointLat)
data <- data[lng %between% lng.interval][lat %between% lat.interval]
return(sum(data$value))
}
# Debugging
# squareSumTemp <- sumInsideSquare(testLat, testLng, interval, data)
# Given a start longitude and an end longitude, calculate an array of values
# corresponding to the sums for that latitude
calcSumLat <- function(startLng, endLng, lat, data) {
row <- c()
lng <- startLng
while (lng < endLng) {
row <- c(row, sumInsideSquare(lat, lng, data))
lng <- lng + interval.h
}
return(row)
}
# Debugging
# rowTemp <- calcSumLat(startLng, endLng, testLat, interval, data)
# write.csv(rowTemp, file = "Temp.csv", row.names = FALSE)
# Set up parallel computing with the number of cores you have
cl <- makeCluster(detectCores(), outfile = "./Progress.txt")
registerDoParallel(cl)
all.sums <- foreach(lat=lat.list, .packages=c("data.table")) %dopar% {
lat.data <- calcSumLat(startLng, endLng, lat, data)
# Progress indicator that works on Mac/Windows
print((startLat - lat)/(startLat - endLat)*100) # Prints to Progress.txt
lat.data
}
stopCluster(cl = cl)
# Convert to data frame
all.sums.table <- as.data.table(all.sums)
# Save to disk so I don't have to run it again
if (!file.exists("./GeneratedData")) {
dir.create("./GeneratedData")
}
output.file <- "./GeneratedData/VoteDensityHighRes.csv"
write.csv(all.sums.table, file = output.file, row.names = FALSE)
# End timer
totalTime <- proc.time() - start
print(totalTime)
# remove(cl, endLat, endLng, startLat, startLng, lat.list, start, calcSumLat, sumInsideSquare, interval)
Part 2
library(graphics)
library(tcltk)
library(pracma)
# Load the data generated by 01GenerateData.R
plot.data <- read.csv("GeneratedData/VoteDensityHighRes.csv", header=TRUE, stringsAsFactors=FALSE)
# Add padding above/below where there was data
# On top
top.padding <- 1:23
for (i in top.padding) {
plot.data <- cbind(0, plot.data)
}
# On bottom
bottom.padding <- 1:23
for (i in bottom.padding) {
plot.data <- cbind(plot.data, 0)
}
# On left
zero.row <- vector(mode="integer", length=dim(plot.data)[1])
left.padding <- 1:10
for (i in left.padding) {
plot.data <- rbind(zero.row, plot.data)
}
# On right
right.padding <- 1:10
for (i in left.padding) {
plot.data <- rbind(plot.data, zero.row)
}
max <- max(plot.data) # Max value in the data, used for scaling
plottingHeight <- 1000 # Arbitrary number that provides the graph's height
scaleFactor <- 300 # Discovered through trial and error to keep the graph in the boundaries
gap <- plottingHeight / length(plot.data) # Space between lines
# Output the file to a 36 inch by 24 inch SVG canvas
plot.width = 36
plot.height = 24
svg(filename = "./TestPlots/CanadaSG03.svg", pointsize=12, width=plot.width, height=plot.height)
# Create a blank plot
yVals <- as.vector(plot.data[[1]] / max * scaleFactor)
plot(0, 0, xlim=c(0, length(yVals)), ylim=c(0,1100), type="n", las=1, xlab=NA, ylab=NA, bty="n", axes=FALSE)
plotting.threshold <- 0.1
plot.length = length(plot.data)
# Progress bar
pb = tkProgressBar(title = "Plot Progress", label = "", min = 1, max = plot.length, initial = 1, width = 300)
# Plot each line
for (i in 1:plot.length) {
# Grabs a row of data
yVals <- as.vector(plot.data[[i]] / max * scaleFactor)
xVals <- c(0:(length(yVals) - 1))
yVals.smooth = savgol(yVals, 3, forder=4)
polygon(xVals, yVals.smooth + plottingHeight, border = NA, col = "#ffffff")
lines(xVals, yVals.smooth + plottingHeight, col="#cccccc", lwd=1.5)
# Plot the peaks with a darker line.
j <- 2 # Skip padding
while (j <= (length(yVals.smooth) - 2)) {
if ((yVals.smooth[j]) > plotting.threshold | (yVals.smooth[j+1]) > plotting.threshold) {
segments(xVals[j], yVals.smooth[j] + plottingHeight, xVals[j+1], yVals.smooth[j+1] + plottingHeight, col="#000000", lwd=1.5)
} else { } # Do nothing
j <- j + 1
}
plottingHeight <- plottingHeight - gap
# Update the progress bar
info <- sprintf("%d%% Complete", round(i / plot.length * 100))
setTkProgressBar(pb, i, title="Progress", info)
}
dev.off()
Sys.sleep(1)
close(pb) # Close the progress bar after a couple seconds
Everything runs perfect until this part of the code from the second part is running:
yVals <- as.vector(plot.data[[1]] / max * scaleFactor)
plot(0, 0, xlim=c(0, length(yVals)), ylim=c(0,1100), type="n", las=1,xlab=NA, ylab=NA, bty="n", axes=FALSE)
And I get the following error message:
Error in plot.new() : cairo error 'error while writing to output stream'
I'm using R 3.3.1 and Rstudio on windows 10, I've also try to run the code with R 2.15.3.
How can i fix this error?
I am trying to create pretty figures of clustered points. Is there a package which will create the divide chain between tessellations of points? Ideally it would be fit for plotting in ggplot.
Here is some example code:
#DivideLineExample
library(spatstat)
W=owin(c(0,1),c(0,1)) # Set up the Window
p<-runifpoint(42, win=W) # Get random points
ll=cbind(p$x,p$y) # get lat/long for each point
zclust=kmeans(ll,centers=4) # Cluster the points spatially into 4 clusters
K<-pp<-D<-list()
plot(W,main="Clustered Points")
for (i in 1:4){ # this breaks up the points into separate ppp objects for each cluster
K[[i]]=ll[zclust$cluster==i,]
pp[[i]]=as.ppp(K[[i]],W)
plot(pp[[i]],col=i,add=TRUE,cex=1.5,pch=16)
D[[i]]=dirichlet(pp[[i]]) # This performs the Dirichlet Tessellation and plots
plot(D[[i]],col=i,add=TRUE)
}
This outputs as such:
http://imgur.com/CCXeOEB
What I'm looking for is this:
http://imgur.com/7nmtXjo
I know an algorithm exists.
Any ideas/alternatives?
I have written a function that I think will do what you want:
divchain <- function (X) {
stopifnot(is.ppp(X))
if(!is.multitype(X)) {
whinge <- paste(deparse(substitute(X)),
"must be a marked pattern with",
"factor valued marks.\n")
stop(whinge)
}
X <- unique(X, rule = "deldir", warn = TRUE)
w <- Window(X)
require(deldir)
dd <- deldir(X,z=marks(X),rw=c(w$xrange,w$yrange))
if (is.null(dd))
return(NULL)
ddd <- dd$dirsgs
sss <- dd$summary
z <- sss[["z"]]
rslt <- list()
nsgs <- nrow(ddd)
K <- 0
for (i in 1:nsgs) {
i1 <- ddd[i,5]
i2 <- ddd[i,6]
c1 <- z[i1]
c2 <- z[i2]
if(c1 != c2) {
K <- K+1
rslt[[K]] <- unlist(ddd[i,1:4])
}
}
class(rslt) <- "divchain"
attr(rslt,"rw") <- dd$rw
rslt
}
I have also written a plot method for class "divchain":
plot.divchain <- function(x,add=FALSE,...){
if(!add) {
rw <- attr(x,"rw")
plot(0,0,type="n",ann=FALSE,axes=FALSE,xlim=rw[1:2],ylim=rw[3:4])
bty <- list(...)$bty
box(bty=bty)
}
lapply(x,function(u){segments(u[1],u[2],u[3],u[4],...)})
invisible()
}
E.g.:
require(spatstat)
set.seed(42)
X <- runifpoint(50)
z <- factor(kmeans(with(X,cbind(x,y)),centers=4)$cluster)
marks(X) <- z
dcX <- divchain(X)
plot(dirichlet(X),border="brown",main="")
plot(X,chars=20,cols=1:4,add=TRUE)
plot(dcX,add=TRUE,lwd=3)
Let me know whether this is satisfactory. Sorry I can't help you with ggplot stuff; I don't do ggplot.
You could try point in polygon test for example like kirkpatrick data structure. Much easier is to divide the polygon in horizontal or vertical. Source:http://www.personal.kent.edu/~rmuhamma/Compgeometry/MyCG/Voronoi/DivConqVor/divConqVor.htm
This is a slightly specific problem, so a bit of knowledge of R and of Bézier curves is required to be of help... (thanks if you do!!)
So I need some help with my R code: I have a series of discretely sampled observations and I am trying to fit a Bézier Curve of the 5th order through these points with simple LSS regression. I have some limitations on the position of the 6 control points:
A & B have the same Y-axis coordinate
B & C have the same X-axis coordinate
C & D have the same Y-axis coordinate
D & E have the same X-axis coordinate
E & F have the same Y-axis coordinate
A is located on the observation 2 turning points ago from the last
observation
The X-axis coordinate of the last observation is
somewhere between the X-axis coordinates of E and F
Like this image:
Say I have these data:
-0.01105
-0.01118
-0.01271
-0.01479
-0.01729
-0.01996
-0.02250
-0.02473
-0.02554
-0.02478
-0.02207
-0.01788
-0.01319
-0.00956
They have a "curvy" shape so a Bézier curve would fit: the result of my code is this image: the data are in red, the 5th order Bézier and its control points with their restrictions in blue:
Like this image:
So you see that I have some kind of solution, but this is the problem:
The X-axis location of right-most control point is always to the right of the last input data point, and to get an appropriate fit, I had to require a value of t (t goes from 0 to 1 in a Bézier) where t is at if the input data end (the "limit" variable in my code). How do I rewrite it so I don't have to do that anymore, and the horizontal spread of the t-values remains constant, also outside of the input data?
(given the restrictions on the control points, and maximizing the fit of the part of the curve that overlaps with the input data)
If you can help, please take a look at this R code, any help is .. much much appreciated and happy holidays!!
ps: what I call exampledata.csv in my code is just the data above.
getT <- function(x){
# Calculates length from origin of each point in the path.
# args:
# x : a one dimensional vector
# Returns:
# out : a vector of distances from the origin, as a percent of end point - start point distance
out <- cumsum(abs(diff(x)))
out <- c(0, out/ out[length(out)])
return(out)
}
cost_f <- function(X,Y,K){
pred <-K%*%X
c <- Y- pred
out <- list(loss= as.vector(t(c)%*%c), pred = pred)
return(out)
}
df <- read.csv('exampledata.csv')
T <- nrow(df)
df['d'] = 1:T
# # identify all turning points:
# turn_point <- c(1)
# for(i in 2:(T-1)){
# if( ( (df[i,'x'] < df[i-1,'x']) & (df[i,'x'] < df[i+1,'x'])) | ( (df[i,'x'] > df[i-1,'x']) & (df[i,'x'] > df[i+1,'x'])) ){
# turn_point <- c(turn_point, i)
# }
# }
fit_last_piece <- function(df){
limit <- .79
turn_point <- c(1)
for(i in 2:(T-1)){
if( ( (df[i,'x'] < df[i-1,'x']) & (df[i,'x'] < df[i+1,'x'])) | ( (df[i,'x'] > df[i-1,'x']) & (df[i,'x'] > df[i+1,'x'])) ){
turn_point <- c(turn_point, i)
}
}
nk <- length(turn_point) # number of turning points
data <- df[turn_point[nk-1]:nrow(df),]
end_x <- data$d[1]
end_y <- data$x[1]
constr_x <- matrix(c(1,0,0,0,0,0, # remember data is input column to column
0,1,1,0,0,0,
0,0,0,1,1,0,
0,0,0,0,0,1),nrow = 6, ncol = 4)
constr_y <- matrix(c(1,1,0,0,0,0,
0,0,1,1,0,0,
0,0,0,0,1,1),nrow = 6, ncol = 3)
M = matrix(c(-1,5,-10,10,-5,1,
5,-20,30,-20,5,0,
-10,30,-30,10,0,0,
10,-20,10,0,0,0,
-5,5,0,0,0,0,
1,0,0,0,0,0),nrow = 6, ncol = 6)
t_x = getT(data$d)*limit
T_x = cbind(t_x^5, t_x^4 ,t_x^3, t_x^2, t_x,rep(1,length(t_x)))
in_par <- ( tail(data$d,1)-data$d[1])*c(2/5,4/5,6/5) + data$d[1] # initial values of the intermediate x levels are at 1/3 and 2/3 midpoints
res_x <- optim(par = in_par, fn = function(par){cost_f(c(data$d[1], par[1],par[2], par[3]), data$d, T_x%*%M%*%constr_x)$loss})
#res_x <- optimize(f = function(par){cost_f(c(df$d[1],par,df$d[nrow(df)]), df$d, T_x%*%M%*%constr_x)$loss}, interval = c(df$d[1],df$d[nrow(df)]),tol = .Machine$double.eps^0.25)
optim_x <- c(data$d[1],res_x$par)
pred_x <- cost_f(optim_x, data$d, T_x%*%M%*%constr_x)$pred
t_y = getT(data$x)*limit
T_y = cbind(t_y^5, t_y^4,t_y^3, t_y^2, t_y,rep(1,length(t_y)))
in_par <- c()
res_y <- optim(par = c(data$x[floor(nrow(data)/2)],tail(data$x,1)), fn = function(par){cost_f(c(data$x[1],par[1],par[2]), data$x, T_y%*%M%*%constr_y)$loss})
optim_y <- c(data$x[1],res_y$par[1],res_y$par[2])
#pred_y <- cost_f(res_y$par, df$x, T_y%*%M%*%constr_y)$pred
pred_y <- cost_f(optim_y, data$x, T_y%*%M%*%constr_y)$pred
t_x_p <- c(t_x,seq(tail(t_x,1),1,length.out = 10))
T_x_p <- cbind(t_x_p^5, t_x_p^4 ,t_x_p^3, t_x_p^2, t_x_p,rep(1,length(t_x_p)))
t_y_p <- c(t_y,seq(tail(t_y,1),1,length.out = 10))
T_y_p <- cbind(t_y_p^5, t_y_p^4 ,t_y_p^3, t_y_p^2, t_y_p,rep(1,length(t_y_p)))
pred_x <- T_x_p%*%M%*%constr_x%*%optim_x
pred_y <- T_y_p%*%M%*%constr_y%*%optim_y
# this part is new:
plot(pred_x,pred_y, ylim = c(min(c(data$x, pred_y,res_y$par)), max(c(data$x, pred_y,res_y$par))),col="blue",type="b")
points(data$d,data$x,col = 'red',type="b")
points(pred_x[1],pred_y[1],pch=20,col='blue')
points(res_x$par[1],pred_y[1],pch=20,col='blue')
points(res_x$par[1],res_y$par[1],pch=20,col='blue')
points(res_x$par[2],res_y$par[1],pch=20,col='blue')
points(res_x$par[2],res_y$par[2],pch=20,col='blue')
points(res_x$par[3],res_y$par[2],pch=20,col='blue')
segments(pred_x[1],pred_y[1],res_x$par[1],pred_y[1],lty=3,col='blue')
segments(res_x$par[1],pred_y[1],res_x$par[1],res_y$par[1],lty=3,col='blue')
segments(res_x$par[1],res_y$par[1],res_x$par[2],res_y$par[1],lty=3,col='blue')
segments(res_x$par[2],res_y$par[1],res_x$par[2],res_y$par[2],lty=3,col='blue')
segments(res_x$par[2],res_y$par[2],res_x$par[3],res_y$par[2],lty=3,col='blue')
}
fit_last_piece(df)
Does anyone know of a way to turn the output of contourLines polygons in order to plot as filled contours, as with filled.contours. Is there an order to how the polygons must then be plotted in order to see all available levels? Here is an example snippet of code that doesn't work:
#typical plot
filled.contour(volcano, color.palette = terrain.colors)
#try
cont <- contourLines(volcano)
fun <- function(x) x$level
LEVS <- sort(unique(unlist(lapply(cont, fun))))
COLS <- terrain.colors(length(LEVS))
contour(volcano)
for(i in seq(cont)){
COLNUM <- match(cont[[i]]$level, LEVS)
polygon(cont[[i]], col=COLS[COLNUM], border="NA")
}
contour(volcano, add=TRUE)
A solution that uses the raster package (which calls rgeos and sp). The output is a SpatialPolygonsDataFrame that will cover every value in your grid:
library('raster')
rr <- raster(t(volcano))
rc <- cut(rr, breaks= 10)
pols <- rasterToPolygons(rc, dissolve=T)
spplot(pols)
Here's a discussion that will show you how to simplify ('prettify') the resulting polygons.
Thanks to some inspiration from this site, I worked up a function to convert contour lines to filled contours. It's set-up to process a raster object and return a SpatialPolygonsDataFrame.
raster2contourPolys <- function(r, levels = NULL) {
## set-up levels
levels <- sort(levels)
plevels <- c(min(values(r), na.rm=TRUE), levels, max(values(r), na.rm=TRUE)) # pad with raster range
llevels <- paste(plevels[-length(plevels)], plevels[-1], sep=" - ")
llevels[1] <- paste("<", min(levels))
llevels[length(llevels)] <- paste(">", max(levels))
## convert raster object to matrix so it can be fed into contourLines
xmin <- extent(r)#xmin
xmax <- extent(r)#xmax
ymin <- extent(r)#ymin
ymax <- extent(r)#ymax
rx <- seq(xmin, xmax, length.out=ncol(r))
ry <- seq(ymin, ymax, length.out=nrow(r))
rz <- t(as.matrix(r))
rz <- rz[,ncol(rz):1] # reshape
## get contour lines and convert to SpatialLinesDataFrame
cat("Converting to contour lines...\n")
cl <- contourLines(rx,ry,rz,levels=levels)
cl <- ContourLines2SLDF(cl)
## extract coordinates to generate overall boundary polygon
xy <- coordinates(r)[which(!is.na(values(r))),]
i <- chull(xy)
b <- xy[c(i,i[1]),]
b <- SpatialPolygons(list(Polygons(list(Polygon(b, hole = FALSE)), "1")))
## add buffer around lines and cut boundary polygon
cat("Converting contour lines to polygons...\n")
bcl <- gBuffer(cl, width = 0.0001) # add small buffer so it cuts bounding poly
cp <- gDifference(b, bcl)
## restructure and make polygon number the ID
polys <- list()
for(j in seq_along(cp#polygons[[1]]#Polygons)) {
polys[[j]] <- Polygons(list(cp#polygons[[1]]#Polygons[[j]]),j)
}
cp <- SpatialPolygons(polys)
cp <- SpatialPolygonsDataFrame(cp, data.frame(id=seq_along(cp)))
## cut the raster by levels
rc <- cut(r, breaks=plevels)
## loop through each polygon, create internal buffer, select points and define overlap with raster
cat("Adding attributes to polygons...\n")
l <- character(length(cp))
for(j in seq_along(cp)) {
p <- cp[cp$id==j,]
bp <- gBuffer(p, width = -max(res(r))) # use a negative buffer to obtain internal points
if(!is.null(bp)) {
xy <- SpatialPoints(coordinates(bp#polygons[[1]]#Polygons[[1]]))[1]
l[j] <- llevels[extract(rc,xy)]
}
else {
xy <- coordinates(gCentroid(p)) # buffer will not be calculated for smaller polygons, so grab centroid
l[j] <- llevels[extract(rc,xy)]
}
}
## assign level to each polygon
cp$level <- factor(l, levels=llevels)
cp$min <- plevels[-length(plevels)][cp$level]
cp$max <- plevels[-1][cp$level]
cp <- cp[!is.na(cp$level),] # discard small polygons that did not capture a raster point
df <- unique(cp#data[,c("level","min","max")]) # to be used after holes are defined
df <- df[order(df$min),]
row.names(df) <- df$level
llevels <- df$level
## define depressions in higher levels (ie holes)
cat("Defining holes...\n")
spolys <- list()
p <- cp[cp$level==llevels[1],] # add deepest layer
p <- gUnaryUnion(p)
spolys[[1]] <- Polygons(p#polygons[[1]]#Polygons, ID=llevels[1])
for(i in seq(length(llevels)-1)) {
p1 <- cp[cp$level==llevels[i+1],] # upper layer
p2 <- cp[cp$level==llevels[i],] # lower layer
x <- numeric(length(p2)) # grab one point from each of the deeper polygons
y <- numeric(length(p2))
id <- numeric(length(p2))
for(j in seq_along(p2)) {
xy <- coordinates(p2#polygons[[j]]#Polygons[[1]])[1,]
x[j] <- xy[1]; y[j] <- xy[2]
id[j] <- as.numeric(p2#polygons[[j]]#ID)
}
xy <- SpatialPointsDataFrame(cbind(x,y), data.frame(id=id))
holes <- over(xy, p1)$id
holes <- xy$id[which(!is.na(holes))]
if(length(holes)>0) {
p2 <- p2[p2$id %in% holes,] # keep the polygons over the shallower polygon
p1 <- gUnaryUnion(p1) # simplify each group of polygons
p2 <- gUnaryUnion(p2)
p <- gDifference(p1, p2) # cut holes in p1
} else { p <- gUnaryUnion(p1) }
spolys[[i+1]] <- Polygons(p#polygons[[1]]#Polygons, ID=llevels[i+1]) # add level
}
cp <- SpatialPolygons(spolys, pO=seq_along(llevels), proj4string=CRS(proj4string(r))) # compile into final object
cp <- SpatialPolygonsDataFrame(cp, df)
cat("Done!")
cp
}
It probably holds several inefficiencies, but it has worked well in the tests I've conducted using bathymetry data. Here's an example using the volcano data:
r <- raster(t(volcano))
l <- seq(100,200,by=10)
cp <- raster2contourPolys(r, levels=l)
cols <- terrain.colors(length(cp))
plot(cp, col=cols, border=cols, axes=TRUE, xaxs="i", yaxs="i")
contour(r, levels=l, add=TRUE)
box()
Building on the excellent work of Paul Regular, here is a version that should ensure exclusive polygons (i.e. no overlapping).
I've added a new argument fd for fairy dust to address an issue I discovered working with UTM-type coordinates. Basically as I understand the algorithm works by sampling lateral points from the contour lines to determine which side is inside the polygon. The distance of the sample point from the line can create problems if it ends up in e.g. behind another contour. So if your resulting polygons looks wrong try setting fd to values 10^±n until it looks very wrong or about right..
raster2contourPolys <- function(r, levels = NULL, fd = 1) {
## set-up levels
levels <- sort(levels)
plevels <- c(min(values(r)-1, na.rm=TRUE), levels, max(values(r)+1, na.rm=TRUE)) # pad with raster range
llevels <- paste(plevels[-length(plevels)], plevels[-1], sep=" - ")
llevels[1] <- paste("<", min(levels))
llevels[length(llevels)] <- paste(">", max(levels))
## convert raster object to matrix so it can be fed into contourLines
xmin <- extent(r)#xmin
xmax <- extent(r)#xmax
ymin <- extent(r)#ymin
ymax <- extent(r)#ymax
rx <- seq(xmin, xmax, length.out=ncol(r))
ry <- seq(ymin, ymax, length.out=nrow(r))
rz <- t(as.matrix(r))
rz <- rz[,ncol(rz):1] # reshape
## get contour lines and convert to SpatialLinesDataFrame
cat("Converting to contour lines...\n")
cl0 <- contourLines(rx, ry, rz, levels = levels)
cl <- ContourLines2SLDF(cl0)
## extract coordinates to generate overall boundary polygon
xy <- coordinates(r)[which(!is.na(values(r))),]
i <- chull(xy)
b <- xy[c(i,i[1]),]
b <- SpatialPolygons(list(Polygons(list(Polygon(b, hole = FALSE)), "1")))
## add buffer around lines and cut boundary polygon
cat("Converting contour lines to polygons...\n")
bcl <- gBuffer(cl, width = fd*diff(bbox(r)[1,])/3600000) # add small buffer so it cuts bounding poly
cp <- gDifference(b, bcl)
## restructure and make polygon number the ID
polys <- list()
for(j in seq_along(cp#polygons[[1]]#Polygons)) {
polys[[j]] <- Polygons(list(cp#polygons[[1]]#Polygons[[j]]),j)
}
cp <- SpatialPolygons(polys)
cp <- SpatialPolygonsDataFrame(cp, data.frame(id=seq_along(cp)))
# group by elev (replicate ids)
# ids = sapply(slot(cl, "lines"), slot, "ID")
# lens = sapply(1:length(cl), function(i) length(cl[i,]#lines[[1]]#Lines))
## cut the raster by levels
rc <- cut(r, breaks=plevels)
## loop through each polygon, create internal buffer, select points and define overlap with raster
cat("Adding attributes to polygons...\n")
l <- character(length(cp))
for(j in seq_along(cp)) {
p <- cp[cp$id==j,]
bp <- gBuffer(p, width = -max(res(r))) # use a negative buffer to obtain internal points
if(!is.null(bp)) {
xy <- SpatialPoints(coordinates(bp#polygons[[1]]#Polygons[[1]]))[1]
l[j] <- llevels[raster::extract(rc,xy)]
}
else {
xy <- coordinates(gCentroid(p)) # buffer will not be calculated for smaller polygons, so grab centroid
l[j] <- llevels[raster::extract(rc,xy)]
}
}
## assign level to each polygon
cp$level <- factor(l, levels=llevels)
cp$min <- plevels[-length(plevels)][cp$level]
cp$max <- plevels[-1][cp$level]
cp <- cp[!is.na(cp$level),] # discard small polygons that did not capture a raster point
df <- unique(cp#data[,c("level","min","max")]) # to be used after holes are defined
df <- df[order(df$min),]
row.names(df) <- df$level
llevels <- df$level
## define depressions in higher levels (ie holes)
cat("Defining holes...\n")
spolys <- list()
p <- cp[cp$level==llevels[1],] # add deepest layer
p <- gUnaryUnion(p)
spolys[[1]] <- Polygons(p#polygons[[1]]#Polygons, ID=llevels[1])
for(i in seq(length(llevels)-1)) {
p1 <- cp[cp$level==llevels[i+1],] # upper layer
p2 <- cp[cp$level==llevels[i],] # lower layer
x <- numeric(length(p2)) # grab one point from each of the deeper polygons
y <- numeric(length(p2))
id <- numeric(length(p2))
for(j in seq_along(p2)) {
xy <- coordinates(p2#polygons[[j]]#Polygons[[1]])[1,]
x[j] <- xy[1]; y[j] <- xy[2]
id[j] <- as.numeric(p2#polygons[[j]]#ID)
}
xy <- SpatialPointsDataFrame(cbind(x,y), data.frame(id=id))
holes <- over(xy, p1)$id
holes <- xy$id[which(!is.na(holes))]
if(length(holes)>0) {
p2 <- p2[p2$id %in% holes,] # keep the polygons over the shallower polygon
p1 <- gUnaryUnion(p1) # simplify each group of polygons
p2 <- gUnaryUnion(p2)
p <- gDifference(p1, p2) # cut holes in p1
} else { p <- gUnaryUnion(p1) }
spolys[[i+1]] <- Polygons(p#polygons[[1]]#Polygons, ID=llevels[i+1]) # add level
}
cp <- SpatialPolygons(spolys, pO=seq_along(llevels), proj4string=CRS(proj4string(r))) # compile into final object
## make polygons exclusive (i.e. no overlapping)
cpx = gDifference(cp[1,], cp[2,], id=cp[1,]#polygons[[1]]#ID)
for(i in 2:(length(cp)-1)) cpx = spRbind(cpx, gDifference(cp[i,], cp[i+1,], id=cp[i,]#polygons[[1]]#ID))
cp = spRbind(cpx, cp[length(cp),])
## it's a wrap
cp <- SpatialPolygonsDataFrame(cp, df)
cat("Done!")
cp
}