ggplot Heatmap and Density Plot Errors - r

This new post is in reference to a previous post (Heatmap in a Shiny App).
The sample dataset is found here: Sample Dataset used in the Example
The resulting density plot and the plot showing the maximum values in the dataset for each position do not seem to match up. The third ggplot has a few issues that I am unsure how to fix.
I set the scale of the third ggplot in scale_fill_gradientn for 0 to 100. However, the heatmap colors of the resulting plot are not the same color as what the scale should show. For example, the 94.251 should be a darker organge, but it doesn't appear on the chart.
Some of the text for the Max Values in the third ggplot are not matched up to the rectangles of coordinate locations. I am looking to fix this issue.
I would also like the density plot in the first ggplot to show a blend, similar to the blend that is shown in this sample density plot. I'm not really sure how to do that:
library(grid)
library(ggplot2)
sensor.data <- read.csv("Sample_Dataset.csv")
# Create position -> coord conversion
pos.names <- names(sensor.data)[ grep("*Pos",names(sensor.data)) ] # Get column names with "Pos" in them
mock.coords <<- list()
lapply(pos.names, function(name){
})
mock.coords <- list ("Position1"=data.frame("x"=0.1,"y"=0.2),
"Position2"=data.frame("x"=0.2,"y"=0.4),
"Position3"=data.frame("x"=0.3,"y"=0.6),
"Position4"=data.frame("x"=0.4,"y"=0.65),
"Position5"=data.frame("x"=0.5,"y"=0.75),
"Position6"=data.frame("x"=0.6,"y"=0.6),
"Position7"=data.frame("x"=0.7,"y"=0.6),
"Position8"=data.frame("x"=0.8,"y"=0.43),
"Position9"=data.frame("x"=0.9,"y"=0.27),
"Position10"=data.frame("x"=0.75,"y"=0.12))
# Change format of your data matrix
df.l <- list()
cnt <- 1
for (i in 1:nrow(sensor.data)){
for (j in 1:length(pos.names)){
name <- pos.names[j]
curr.coords <- mock.coords[[name]]
df.l[[cnt]] <- data.frame("x.pos"=curr.coords$x,
"y.pos"=curr.coords$y,
"heat" =sensor.data[i,j])
cnt <- cnt + 1
}
}
df <- do.call(rbind, df.l)
# Load image
library(jpeg)
download.file("http://www.expresspcb.com/wp-content/uploads/2015/06/PhotoProductionPCB_TL_800.jpg","pcb.jpg")
img <- readJPEG("/home/oskar/pcb.jpg")
g <- rasterGrob(img, interpolate=TRUE,width=1,height=1)
# Show overlay of image and heatmap
ggplot(data=df,aes(x=x.pos,y=y.pos,fill=heat)) +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
stat_density2d( alpha=0.2,aes(fill = ..level..), geom="polygon" ) +
scale_fill_gradientn(colours = rev( rainbow(3) )) +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0)) +
ggtitle("Density")
# # Show where max temperature is
# dat.max = df[which.max(df$heat),]
#
# ggplot(data=coords,aes(x=x,y=y)) +
# annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
# geom_point(data=dat.max,aes(x=x.pos,y=y.pos), shape=21,size=5,color="black",fill="red") +
# geom_text(data=dat.max,aes(x=x.pos,y=y.pos,label=round(heat,3)),vjust=-1,color="red",size=10) +
# ggtitle("Max Temp Position")
# bin data manually
# Manually set number of rows and columns in the matrix containing sums of heat for each square in grid
nrows <- 30
ncols <- 30
# Define image coordinate ranges
x.range <- c(0,1) # x-coord range
y.range <- c(0,1) # x-coord range
# Create matrix and set all entries to 0
heat.density.dat <- matrix(nrow=nrows,ncol=ncols)
heat.density.dat[is.na(heat.density.dat)] <- 0
# Subdivide the coordinate ranges to n+1 values so that i-1,i gives a segments start and stop coordinates
x.seg <- seq(from=min(x.range),to=max(x.range),length.out=ncols+1)
y.seg <- seq(from=min(y.range),to=max(y.range),length.out=nrows+1)
# List to hold found values
a <- list()
cnt <- 1
for( ri in 2:(nrows+1)){
x.vals <- x.seg [c(ri-1,ri)]
for ( ci in 2:(ncols+1)){
# Get current segments, for example x.vals = [0.2, 0.3]
y.vals <- y.seg [c(ci-1,ci)]
# Find which of the entries in the data.frame that has x or y coordinates in the current grid
x.inds <- which( ( (df$x.pos >= min(x.vals)) & (df$x.pos <= max(x.vals)))==T )
y.inds <- which( ((df$y.pos >= min(y.vals)) & (df$y.pos <= max(y.vals)))==T )
# Find which entries has both x and y in current grid
inds <- intersect( x.inds , y.inds )
# If there's any such coordinates
if (length(inds) > 0){
# Append to list
a[[cnt]] <- data.frame("x.start"=min(x.vals), "x.stop"=max(x.vals),
"y.start"=min(y.vals), "y.stop"=max(y.vals),
"acc.heat"=sum(df$heat[inds],na.rm = T) )
print(length(df$heat[inds]))
# Increment counter variable
cnt <- cnt + 1
}
}
}
# Construct data.frame from list
heat.dens.df <- do.call(rbind,a)
# Plot again
ggplot(data=heat.dens.df,aes(x=x.start,y=y.start)) +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_rect(data=heat.dens.df, aes(xmin=x.start, xmax=x.stop, ymin=y.start, ymax=y.stop, fill=acc.heat), alpha=0.5) +
scale_fill_gradientn(colours = rev( rainbow(3) )) +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0))
mock.coords <- list ("Position1"=data.frame("x"=0.1,"y"=0.2),
"Position2"=data.frame("x"=0.2,"y"=0.4),
"Position3"=data.frame("x"=0.3,"y"=0.6),
"Position4"=data.frame("x"=0.4,"y"=0.65),
"Position5"=data.frame("x"=0.5,"y"=0.75),
"Position6"=data.frame("x"=0.6,"y"=0.6),
"Position7"=data.frame("x"=0.7,"y"=0.6),
"Position8"=data.frame("x"=0.8,"y"=0.43),
"Position9"=data.frame("x"=0.9,"y"=0.27),
"Position10"=data.frame("x"=0.75,"y"=0.12))
# Show where max temperature is
heat.dat <- sensor.data[pos.names]
# Get max for each position
max.df <- apply(heat.dat,2,max)
dat.max.l <- lapply(1:length(max.df), function(i){
h.val <- max.df[i]
c.name <- names(h.val)
c.coords <- mock.coords[[c.name]]
data.frame("x.pos"=c.coords$x, "y.pos"=c.coords$y,"heat"=h.val)
})
coords <- data.frame("x"=c(0,1),"y"=c(0,1))
dat.max <- do.call(rbind,dat.max.l)
ggplot(data=coords,aes(x=x,y=y)) +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_point(data=dat.max,aes(x=x.pos,y=y.pos), shape=13,size=5,color="black",fill="red") +
geom_text(data=dat.max,aes(x=x.pos,y=y.pos,label=round(heat,3)),vjust=-1,color="red",size=10) +
geom_rect(data=heat.dens.df, aes(xmin=x.start, xmax=x.stop, ymin=y.start, ymax=y.stop, fill=acc.heat,x=NULL,y=NULL), alpha=0.5) +
scale_fill_gradientn(limits = c(0,100), colours = rev( rainbow(3) )) +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0))

A couple of things.
To center the text, remove the vjust value in geom_text
In order to create a heatmap for this data we need some type of interpolation or smoothing since you only have data for 10 points (or you'll have a heatmap with just a few datapoints)
This could be a solution:
library(grid)
library(ggplot2)
sensor.data <- read.csv("/home/oskar/Downloads/Sample_Dataset.csv - Sample_Dataset.csv.csv")
# Create position -> coord conversion
pos.names <- names(sensor.data)[ grep("*Pos",names(sensor.data)) ] # Get column names with "Pos" in them
mock.coords <- list ("Position1"=data.frame("x"=0.1,"y"=0.2),
"Position2"=data.frame("x"=0.2,"y"=0.4),
"Position3"=data.frame("x"=0.3,"y"=0.6),
"Position4"=data.frame("x"=0.4,"y"=0.65),
"Position5"=data.frame("x"=0.5,"y"=0.75),
"Position6"=data.frame("x"=0.6,"y"=0.6),
"Position7"=data.frame("x"=0.7,"y"=0.6),
"Position8"=data.frame("x"=0.8,"y"=0.43),
"Position8.1"=data.frame("x"=0.85,"y"=0.49),
"Position9"=data.frame("x"=0.9,"y"=0.27),
"Position10"=data.frame("x"=0.75,"y"=0.12))
# Change format of your data matrix
df.l <- list()
cnt <- 1
for (i in 1:nrow(sensor.data)){
for (j in 1:length(pos.names)){
name <- pos.names[j]
curr.coords <- mock.coords[[name]]
df.l[[cnt]] <- data.frame("x.pos"=curr.coords$x,
"y.pos"=curr.coords$y,
"heat" =sensor.data[i,j])
cnt <- cnt + 1
}
}
df <- do.call(rbind, df.l)
# Load image
library(jpeg)
download.file("http://www.expresspcb.com/wp-content/uploads/2015/06/PhotoProductionPCB_TL_800.jpg","pcb.jpg")
img <- readJPEG("/home/oskar/pcb.jpg")
g <- rasterGrob(img, interpolate=TRUE,width=1,height=1)
# Manually set number of rows and columns in the matrix containing max of heat for each square in grid
nrows <- 50
ncols <- 50
# Define image coordinate ranges
x.range <- c(0,1) # x-coord range
y.range <- c(0,1) # x-coord range
x.bounds <- seq(from=min(x.range),to=max(x.range),length.out = ncols + 1)
y.bounds <- seq(from=min(y.range),to=max(y.range),length.out = nrows + 1)
# Create matrix and set all entries to 0
heat.max.dat <<- matrix(nrow=nrows,ncol=ncols)
lapply(1:length(mock.coords), function(i){
c <- mock.coords[[i]]
# calculate where in matrix this fits
x <- c$x; y <- c$y
x.ind <- findInterval(x, x.bounds)
y.ind <- findInterval(y, y.bounds)
heat.max.dat[x.ind,y.ind] <<- max(sensor.data[names(mock.coords)[i]])
})
heat.max.dat[is.na(heat.max.dat)]<-0
require(fields)
# Look at the image plots to see how the smoothing works
#image(heat.max.dat)
h.mat.interp <- image.smooth(heat.max.dat)
#image(h.mat.interp$z)
mat <- h.mat.interp$z
require(reshape2)
m.dat <- melt(mat)
# Change to propper coors, image is assumed to have coors between 0-1
m.dat$Var1 <- seq(from=min(x.range),to=max(x.range),length.out=ncols)[m.dat$Var1]
m.dat$Var2 <- seq(from=min(y.range),to=max(y.range),length.out=ncols)[m.dat$Var2]
# Show where max temperature is
heat.dat <- sensor.data[pos.names]
# Get max for each position
max.df <- apply(heat.dat,2,max)
dat.max.l <- lapply(1:length(max.df), function(i){
h.val <- max.df[i]
c.name <- names(h.val)
c.coords <- mock.coords[[c.name]]
data.frame("x.pos"=c.coords$x, "y.pos"=c.coords$y,"heat"=h.val)
})
dat.max <- do.call(rbind,dat.max.l)
coords <- data.frame("x"=c(0,1),"y"=c(0,1))
ggplot(data=coords,aes(x=x,y=y)) +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_raster(data=m.dat,aes(x=Var1,y=Var2,fill=value), interpolate = TRUE, alpha=0.5) +
scale_fill_gradientn(colours = rev( rainbow(3) ),guide=FALSE) +
geom_text(data=dat.max,aes(x=x.pos,y=y.pos,label=round(heat,3)),vjust=0,color="white",size=5) +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0))
In the end I get this

Related

Radial Visualization coordinate with ggplot2

I'm trying to obtain a radial coordinate plot using ggplot2 and the coord_polar(). I have use the drep package from R and obtain a radviz2d(), see NotMyRadViz. Since I desire to do it in ggplot I extracted all the necessary functions. Also I wish to obtain this using the coord_polar() and when I do it my variables rotate MyRadViz2D. Does anyone have an idea how to fix this?
library("dprep")
radviz2d(iris,name="Iris")
#-------------------- Extracted from mmnorm.R
n <- dim(iris)[1]
p <- dim(iris)[2]
classes <- iris[,p]
varnames <- colnames(iris)
dataset <- as.matrix(mmnorm(iris))
dataset <- dataset[,-p]
sumrows <- rowSums(dataset)
columns <- seq(0,p-2)
angles <- (2*pi*columns)
angles <- angles/(p-1)
cosines <- cos(angles)
sines <- sin(angles)
proj.x <- (dataset%*%cosines)
proj.x <- proj.x/sumrows
proj.y <- (dataset%*%sines)
proj.y <- proj.y/sumrows
#--------------- What I'd been doing
library("ggplot2")
library("reshape2")
dataset2 <- melt(dataset/sumrows)
classnumbers <- 1:length(unique(classes))
classes <- as.numeric(classes,drop=TRUE)
Projxy <- data.frame(Projx = proj.x, Projy = proj.y, Ids = classes)
Cos_Sin <- data.frame(Angle = angles,Cos=cosines,
Sin = sines, Varnames = varnames[-p])
theta <- angles * 180/pi
d2 <- data.frame(theta,Varnames=varnames[-p])
#------------ My ggplot2
ggplot(data=dataset2,aes(x = Var2, y = value)) + geom_blank() +
ylab("") + xlab("") + ggtitle(paste("RadViz 2D for", "Iris")) +
coord_polar("y",start = pi/2) +
geom_point(data = Projxy,aes(x = Projx, y = Projy,col = factor(Ids))) +
coord_polar() + theme(legend.position = "bottom")

Boundaries with ggplot2

I have produced the following plot using ggplot2. As you see I have 3 different classes colored as red black blue. I would like plot two curves on the two boundary that separate red points from black point and blue points from black points. Any ideas I am completely lost.
My code is:
datax=data.frame(x=y_data,y=x_data,
Diff_Motif_XY=factor(diff_motif,levels=c(1,0,‌​-1)),
size=factor(abs(diff_motif)))
#
p=ggplot(datax,aes(x,y))+
geom_point(aes(colour = Diff_Motif_XY,size=size))+
xlab(cond2)+
ylab(cond1)+
scale_colour_manual(values=c("red","black","blue"))
I got (way too) curious. I think it looks like the boundary is a hyperbola. One could calculate the optimal bounding hyperbola using something like optim, but it would be a fair amount of work and it might not converge.
# Generate some data because the OP did not provide any
npts <- 30000
l_data <- pmax(0,runif(npts,-10,20))
s_data <- (20-l_data + 10)/6
xstar <- -5.1
ystar <- -5.1
x_data <- pmax(0,l_data + rnorm(npts,0,s_data)) + xstar
y_data <- pmax(0,l_data + rnorm(npts,0,s_data)) + ystar
ha <- 6.0
hb <- 6.0
xy2 <- ((x_data-xstar)/ha)^2 - ((y_data-ystar)/hb)^2 + 0.8*rnorm(npts)
diff_motif <- ifelse(xy2>1,1,ifelse(-xy2<1,0,-1))
cond1 <- ""
cond2 <- ""
# We need this to plot our hyperbola
genhyperbola <- function( cx,cy,a,b,u0,u1,nu,swap=F)
{
# Generate a hyperbola through the parametric representation
# which uses sinh and cosh
# We generate nu segements from u0 to u1
# swap just swaps the x and y axes allowing for a north-south hyperbola (swap=T)
#
# https://en.wikipedia.org/wiki/Hyperbola
#
u <- seq(u0,u1,length.out=nu)
x <- a*cosh(u)
y <- b*sinh(u)
df <- data.frame(x=x,y=y)
df$x <- df$x + cx
df$y <- df$y + cy
if (swap){
# for north-south hyperbolas
tmp <- df$x
df$x <- df$y
df$y <- tmp
}
return(df)
}
hyp1 <- genhyperbola(xstar,ystar, ha,hb, 0,2.2,100, swap=F)
hyp2 <- genhyperbola(xstar,ystar, ha,hb, 0,2.2,100, swap=T)
datax=data.frame(x=x_data,y=y_data,
Diff_Motif_XY=factor(diff_motif,levels=c(1,0,-1)),
size=0)
eqlab1 <- sprintf("((x+%.1f)/%.1f)^{2}-((y+%.1f)/%.1f)^{2} == 1",xstar,ha,ystar,hb)
eqlab2 <- sprintf("((y+%.1f)/%.1f)^{2}-((x+%.1f)/%.1f)^{2} == 1",ystar,hb,xstar,ha)
#
p=ggplot(datax,aes(x,y))+
geom_point(aes(colour = Diff_Motif_XY),shape=".")+
geom_path(data=hyp1,aes(x,y),color=I("purple"),size=1)+
geom_path(data=hyp2,aes(x,y),color=I("brown"),size=1)+
xlab(cond2)+
ylab(cond1)+
scale_colour_manual(values=c("blue","black","red")) +
annotate('text', x=xstar+20, y=ystar+2,
label = eqlab1,parse = TRUE,size=6,color="purple") +
annotate('text', x=xstar+5, y=ystar+20,
label = eqlab2,parse = TRUE,size=6,color="brown")
print(p)
And here is the image:

geom_raster faceted plot with ggplot2: control row height

In the example below I have a dataset containing two experiments F1 and F2. A classification is performed based on F1 signal, and both F1 and F2 values are ordered accordingly. In this diagram, each facet has the same dimension although the number of rows is not the same (e.g class #7 contains only few elements compare to the other classes). I would like to modify the code to force row height to be the same across facets (facets would thus have various blank space below). Any hints would be greatly appreciated.
Thank you
library(ggplot2)
library(reshape2)
set.seed(123)
# let's create a fake dataset
nb.experiment <- 4
n.row <- 200
n.col <- 5
nb.class <- 7
d <- matrix(round(runif(n.row * n.col),2), nc=n.col)
colnames(d) <- sprintf("%02d", 1:5)
# These strings will be the row names of each heatmap
# in the subsequent facet plot
elements <- sample(replicate(n.row/2, rawToChar(as.raw(sample(65:90, 6, replace=T)))))
# let's create a data.frame d
d <- data.frame(d,
experiment = sort(rep(c("F1","F2"), n.row/2)),
elements= elements)
# Now we split the dataset by experiments
d.split <- split(d, d$experiment)
# Now we create classes (here using hierarchical clustering )
# based on F1 experiment
dist.mat <- as.dist(1-cor(t(d.split$F1[,1:5]), method="pearson"))
hc <- hclust(dist.mat)
cuts <- cutree(hc, nb.class)
levels(cuts) <- sprintf("Class %02d", 1:nb.experiment)
# We split F1 and F2 based on classification result
for(s in names(d.split)){
d.split[[s]] <- split(d.split[[s]], cuts)
}
# Data are melt (their is perhaps a better solution...)
# in order to use the ggplot function
dm <- melt(do.call('rbind',lapply(d.split, melt)), id.var=c( "experiment", "elements", "variable", "L1"))
dm <- dm[, -5]
colnames(dm) <- c("experiment","elements", "pos", "class", "exprs")
dm$class <- as.factor(dm$class)
levels(dm$class) <- paste("Class", levels(dm$class))
# Now we plot the data
p <- ggplot(dm, aes(x = pos, y = elements, fill = exprs))
p <- p + geom_raster()
p <- p + facet_wrap(~class +experiment , scales = "free", ncol = 2)
p <- p + theme_bw()
p <- p + theme(text = element_text(size=4))
p <- p + theme(text = element_text(family = "mono", face = "bold"))
print(p)
Use facet_grid instead of facet_wrap and set the space attribute:
ggplot(dm, aes(x = pos, y = elements, fill = exprs)) +
geom_raster() +
facet_grid(rowMeanClass ~ experiment , scales = "free", space = "free_y") +
theme_bw()

How can I create a custom colour scale using ggplot2 and geom_tile?

I would like to modify the colour gradient in order to match a set of predefined thresholds/cutpoints and colours. How can I do this?
Cutoff values: -0.103200, 0.007022, 0.094090, 0.548600
Colors: "#EDF8E9", "#BAE4B3", "#74C476", "#238B45"
#Create sample data
pp <- function (n,r=4) {
x <- seq(-r*pi, r*pi, len=n)
df <- expand.grid(x=x, y=x)
df$r <- sqrt(df$x^2 + df$y^2)
df$z <- cos(df$r^2)*exp(-df$r/6)
df
}
pp(20)->data
#create the plot
library(ggplo2)
p <- ggplot(pp(20), aes(x=x,y=y))
p + geom_tile(aes(fill=z))
#Generate custom colour ramp
library(RColorBrewer)
cols <- brewer.pal(4, "Greens")
You may try scale_fill_brewer. First, bin your z values:
df <- pp(20)
df$z_bin <- cut(df$z, breaks = c(-Inf, -0.103200, 0.007022, 0.094090, 0.548600))
Plot:
ggplot(data = df, aes(x = x, y = y, fill = z_bin)) +
geom_tile() +
scale_fill_brewer(palette = "Greens")
Use cut and match the bins to your colors. My code assumes -0.103200 is minimum of your vector (to sort the number of bins).
trh <- c(-0.103200, 0.007022, 0.094090, 0.548600, Inf)
colors <- c("#EDF8E9", "#BAE4B3", "#74C476", "#238B45")
x <- runif(30, min = -0.103200, max = 1)
xc <- cut(x, breaks = trh)
colors[xc]

How to plot a series of coordinates as rectangles using ggplot2 and R that that won't overlap?

I recently asked a question on SO about how to make rectangles from a series of coordinates, link is here.
The answer was perfect, and allows me to generate my rectangles really well:
# Sample data
plot.data <- data.frame(start.points=c(5, 32),
end.points=c(15, 51),
text.label=c("Sample A", "Sample B"))
plot.data$text.position <- (plot.data$start.points + plot.data$end.points)/2
# Plot using ggplot
library(ggplot2)
p <- ggplot(plot.data)
p + geom_rect(aes(xmin=start.points, xmax=end.points, ymin=0, ymax=3),
fill="yellow") +
theme_bw() + geom_text(aes(x=text.position, y=1.5, label=text.label)) +
labs(x=NULL, y=NULL)
However, I realized that my data often has overlapping coordinates, and I want to be able to visualize each individual span without washing out overlapping spans. So, let's use this as an example data set: 2-3, 5-10, 7-10
The current code will give something like:
---- -----------------
----| |----| |-------------
---- -----------------
However, I want to somehow change the code so that overlapping data will be visualized on a new track:
---- -----------------
----| |----| |-------------
---- -----------------
-------------
----------------| |---------
-------------
Sorry for the stupid ASCII art!
Does anyone have a suggestion? I wouldn't be adverse to independently generating several images and then stacking them, if that's easiest. Thanks!
You could compute sequences of non-overlapping intervals by hand, and space out the rectangles accordingly. Here it is with the intervals package: (note we assume your points are ordered by start.points -- this is easy to do)
library(intervals)
plot.data <- data.frame(start.points = c(1,2,4,6,8,11), end.points = c(3,5,9,10,12,13),
text.label = paste0('Sample ', LETTERS[1:6]))
plot.data$text.position <- (plot.data$start.points + plot.data$end.points)/2
overlap <- interval_overlap(tmp <- Intervals(c(plot.data$start.points, plot.data$end.points)), tmp)
# Find the next non-overlapping interval
nexts <- lapply(overlap, function(x) max(x) + 1)
non_overlaps <- list()
while(sum(sapply(nexts, Negate(is.na))) > 0) {
consec <- c()
i <- which(sapply(nexts, Negate(is.na)))[1]
# Find a stretch of consecutive non-overlapping intervals
while(!is.na(i) && i <= length(nexts) && !any(sapply(non_overlaps, function(y) i %in% y))) {
consec <- c(consec, i); i <- nexts[[i]]
}
non_overlaps <- append(non_overlaps, list(consec))
# Wipe out that stretch since we're no longer looking at it
nexts[consec] <- NA
}
# Squash remaining non-overlapping intervals -- the packing is not yet compact
i <- 1
while (i < length(non_overlaps)) {
ints1 <- non_overlaps[[i]]
ints1 <- Intervals(c(plot.data$start.points[ints1], plot.data$end.points[ints1]))
j <- i + 1
while(j <= length(non_overlaps)) {
ints2 <- Intervals(c(plot.data$start.points[non_overlaps[[j]]],
plot.data$end.points[non_overlaps[[j]]]))
iv <- interval_overlap(ints1, ints2)
if (length(c(iv, recursive = TRUE)) == 0) break;
j <- j + 1
}
if (j <= length(non_overlaps)) {
# we can merge non_overlaps[[i]] and non_overlaps[[j]]
non_overlaps[[i]] <- c(non_overlaps[[i]], non_overlaps[[j]])
non_overlaps[[j]] <- NULL
} else {
# we are done non_overlaps[[i]] -- nothing else can be squashed!
i <- i + 1
}
}
We now have
print(non_overlaps)
# [[1]]
# [1] 1 3 6
#
# [[2]]
# [1] 2 4 6
#
# [[3]]
# [1] 5
We can graph these non-overlapping intervals on separate heights.
ymin <- length(non_overlaps) - 1 - (sapply(seq_len(nrow(plot.data)),
function(ix) which(sapply(non_overlaps, function(y) ix %in% y))) - 1)
ymax <- ymin + 0.9
text.position.y <- ymin + 0.45
ymin <- ymin / length(non_overlaps) * 3 # rescale for display
ymax <- ymax / length(non_overlaps) * 3 # rescale for display
text.position.y <- text.position.y / length(non_overlaps) * 3
library(ggplot2)
p <- ggplot(plot.data)
p + geom_rect(aes(xmin=start.points, xmax=end.points, ymin=ymin, ymax=ymax),
fill="yellow") +
theme_bw() + geom_text(aes(x=text.position, y=text.position.y, label=text.label)) +
labs(x=NULL, y=NULL)
The final result:
Some more examples:
plot.data <- data.frame(start.points = c(1,3,5,7,9,11,13), end.points = c(4,6,8,10,12,14, 16), text.label = paste0('Sample ', LETTERS[1:7]))
plot.data <- data.frame(start.points = seq(1, 13, by = 4), end.points = seq(4, 16, by = 4), text.label = paste0('Sample ', LETTERS[1:4]))
set.seed(100); plot.data <- data.frame(start.points = tmp <- sort(runif(26, 1, 15)), end.points = tmp + runif(26, 1, 3), text.label = paste0('Sample ', LETTERS))
P.S. I apologize for the chicken scratch, but I did this rather hastily -- I am sure some of these operations can be performed more cleverly!

Resources