I have produced the following plot using ggplot2. As you see I have 3 different classes colored as red black blue. I would like plot two curves on the two boundary that separate red points from black point and blue points from black points. Any ideas I am completely lost.
My code is:
datax=data.frame(x=y_data,y=x_data,
Diff_Motif_XY=factor(diff_motif,levels=c(1,0,-1)),
size=factor(abs(diff_motif)))
#
p=ggplot(datax,aes(x,y))+
geom_point(aes(colour = Diff_Motif_XY,size=size))+
xlab(cond2)+
ylab(cond1)+
scale_colour_manual(values=c("red","black","blue"))
I got (way too) curious. I think it looks like the boundary is a hyperbola. One could calculate the optimal bounding hyperbola using something like optim, but it would be a fair amount of work and it might not converge.
# Generate some data because the OP did not provide any
npts <- 30000
l_data <- pmax(0,runif(npts,-10,20))
s_data <- (20-l_data + 10)/6
xstar <- -5.1
ystar <- -5.1
x_data <- pmax(0,l_data + rnorm(npts,0,s_data)) + xstar
y_data <- pmax(0,l_data + rnorm(npts,0,s_data)) + ystar
ha <- 6.0
hb <- 6.0
xy2 <- ((x_data-xstar)/ha)^2 - ((y_data-ystar)/hb)^2 + 0.8*rnorm(npts)
diff_motif <- ifelse(xy2>1,1,ifelse(-xy2<1,0,-1))
cond1 <- ""
cond2 <- ""
# We need this to plot our hyperbola
genhyperbola <- function( cx,cy,a,b,u0,u1,nu,swap=F)
{
# Generate a hyperbola through the parametric representation
# which uses sinh and cosh
# We generate nu segements from u0 to u1
# swap just swaps the x and y axes allowing for a north-south hyperbola (swap=T)
#
# https://en.wikipedia.org/wiki/Hyperbola
#
u <- seq(u0,u1,length.out=nu)
x <- a*cosh(u)
y <- b*sinh(u)
df <- data.frame(x=x,y=y)
df$x <- df$x + cx
df$y <- df$y + cy
if (swap){
# for north-south hyperbolas
tmp <- df$x
df$x <- df$y
df$y <- tmp
}
return(df)
}
hyp1 <- genhyperbola(xstar,ystar, ha,hb, 0,2.2,100, swap=F)
hyp2 <- genhyperbola(xstar,ystar, ha,hb, 0,2.2,100, swap=T)
datax=data.frame(x=x_data,y=y_data,
Diff_Motif_XY=factor(diff_motif,levels=c(1,0,-1)),
size=0)
eqlab1 <- sprintf("((x+%.1f)/%.1f)^{2}-((y+%.1f)/%.1f)^{2} == 1",xstar,ha,ystar,hb)
eqlab2 <- sprintf("((y+%.1f)/%.1f)^{2}-((x+%.1f)/%.1f)^{2} == 1",ystar,hb,xstar,ha)
#
p=ggplot(datax,aes(x,y))+
geom_point(aes(colour = Diff_Motif_XY),shape=".")+
geom_path(data=hyp1,aes(x,y),color=I("purple"),size=1)+
geom_path(data=hyp2,aes(x,y),color=I("brown"),size=1)+
xlab(cond2)+
ylab(cond1)+
scale_colour_manual(values=c("blue","black","red")) +
annotate('text', x=xstar+20, y=ystar+2,
label = eqlab1,parse = TRUE,size=6,color="purple") +
annotate('text', x=xstar+5, y=ystar+20,
label = eqlab2,parse = TRUE,size=6,color="brown")
print(p)
And here is the image:
Related
I am trying to create a plot where for each i there is a density graph and a histogram side by side. For this instance i = 1..3
The problem I have is creating the list to pass to grid.arrange. However I do it it seems to repeat itself somehow.
df:
x1 x2 x3
1 108.28 17.05 1484.10
2 152.36 16.59 750.33
3 95.04 10.91 766.42
4 65.45 14.14 1110.46
5 62.97 9.52 1031.29
6 263.99 25.33 195.26
7 265.19 18.54 193.83
8 285.06 15.73 191.11
9 92.01 8.10 1175.16
10 165.68 11.13 211.15
X <- df
mu.X <- colMeans(X)
cov.X <- cov(X)
eg <- eigen(cov.X)
myprinboot = function(
X,
iter = 10000,
alpha = 0.05,
prettyPlot = T
){
# Find the dimensions of X
nrX <- dim(X)[1]
nx <- dim(X)[2]
# Make matrices of suitable sizes to hold the booted parameter estimates
# lambda
# each cov matrix will have nx lambdas
lambda.mat <- matrix(NA, nr = nx, nc = iter)
# e vectors nx components each and one vector per eigen value
# Each cov matrix will therefore produce a nx X nx matrix of components
Y.mat <- matrix(NA, nr = nx, nc = iter * nx)
# For loop to fill the matrices created above
for (i in 1:iter)
{
# ind will contain random integers used to make random samples of the X matrix
# Must use number of rows nrX to index
ind <- sample(1:nrX,nrX,replace=TRUE)
# eigen will produce lambdas in decreasing order of size
# make an object then remove extract the list entries using $
eigvalvec <- eigen(cov(X[ind,]))
lambda.mat[,i] <- eigvalvec$values
colstart <- 1 + nx * (i - 1)
colend <- colstart + nx - 1
Y.mat[,colstart:colend] = eigvalvec$vectors
}
if(prettyPlot){
p <- list()
i <- 0
for(j in 1:(2*nx))
{
if (j %% 2 == 0){
p[[j]] <- ggplot(NULL, aes(lambda.mat[i,])) +
geom_histogram(color = 'black', fill = 'green', alpha = .5) +
xlab(substitute(lambda[i])) +
ggtitle(substitute(paste("Histogram of the pc variance ", lambda[i])))
} else {
i <- i + 1
p[[j]] <- ggplot(NULL, aes(lambda.mat[i,])) +
geom_density(fill = 'blue', alpha = .5) +
xlab((substitute(lambda[i]))) +
ggtitle(substitute(paste("Density plot of the pc variance ", lambda[i])))
}
do.call(grid.arrange, p)
}
do.call(grid.arrange, p)
} else {
layout(matrix(1:(2*nx),nr=nx,nc=2,byrow=TRUE))
for(i in 1:nx)
{
plot(density(lambda.mat[i,]),xlab=substitute(lambda[i]),
main=substitute(paste("Density plot of the pc variance ", lambda[i])
))
hist(lambda.mat[i,],xlab=substitute(lambda[i]),
main=substitute(paste("Histogram of the pc variance ", lambda[i])))
}
}
library(rgl)
plot3d(t(lambda.mat))
list(lambda.mat = lambda.mat, Y.mat = Y.mat)
}
pc <- myprinboot(X = Y, iter=1000, alpha=0.5)
Output
Anyone have any clue what I am doing wrong or is this just not possible?
I don't understand your code, Jay, as it seems to do lots of things and use both base and ggplot plotting, but if all you want is to create a combined histogram and density plot for each j, why not loop over j and inside that for j loop do something like this:
d <- your density plot created so that it depends on j only
h <- your histogram plot created so that it depends on j only
p[[j]] <- grid.arrange(d,h,ncol=2)
Then, when you come out of the loop, you'll have an object p which consists of a list of plots, with each plot consisting of a combination of density plot and histogram.
Then you could use the cowplot package (after installing it) to do something like this:
cowplot::plot_grid(plotlist = p, ncol = 2)
where the number of columns may need to be changed. See here for other ways to plot a list of plots: How do I arrange a variable list of plots using grid.arrange?
I don't know enough about your problem to understand why you treat the case of j even and j odd differently. But the underlying idea should be the same as what I suggested here.
I eventually got this working as follows.
getHist <- function(x, i){
lam <- paste('$\\lambda_', i, '$', sep='')
p <- qplot(x[i,],
geom="histogram",
fill = I('green'),
color = I('black'),
alpha = I(.5),
main=TeX(paste("Histogram of the pc variance ", lam, sep='')),
xlab=TeX(lam),
ylab="Count",
show.legend=F)
return(p)
}
getDens <- function(x, i){
lam <- paste('$\\lambda_', i, '$', sep='')
p <- qplot(x[i,],
geom="density",
fill = I('blue'),
alpha = I(.5),
main=TeX(paste("Density plot of the pc variance ", lam, sep='')),
xlab=TeX(lam),
ylab="Density",
show.legend=F)
return(p)
}
fp <- lapply(1:3, function(x) arrangeGrob(getHist(lambda.mat, x), getDens(lambda.mat, x), ncol=2))
print(marrangeGrob(fp, nrow = 3, ncol=1, top = textGrob("Lambda.mat Histogram and Density Plot",gp=gpar(fontsize=18))))
I am trying to calculate triangle area of pair of running points (X1, Y1,…. Xn, Yn) with a fix point (Cx,Cy) but without success. Can someone tell me please what is the problem so I can try to solve it?
Script:
library(ggplot2)
nElem <- 100
xData <- as.data.frame(seq(1,nElem,5))
yData <- as.data.frame(seq(5,nElem,5))
xyDATA<- cbind(xData,yData)
colnames(xyDATA) <- c("xCoord","yCoord")
Cx <- 10
Cy <- 1
ggplot(xyDATA) + geom_point(aes(x = xCoord, y = yCoord)) + geom_point(aes(x = Cx, y = Cy),colour="red",size=4)
for(i in 1:19)
{
Ax <- xyDATA[i,1]
Ay <- xyDATA[i,2]
Bx <- xyDATA[i+1,1]
By <- xyDATA[i+1,2]
s <- abs(0.5*((Ax*(By-Cy))+(Bx*(Cy-Ay))+(Cx*(Ay-By))))
# print(Ax)
# print(Ay)
# print(Bx)
# print(By)
print(s)
}
If you don't see the point graph drawn then you should modify ggplot line like:
p <- ggplot(xyDATA) + geom_point(aes(x = xCoord, y = yCoord)) +
geom_point(aes(x = Cx, y = Cy),colour="red",size=4)
print(p)
The print() method for the graph object produces the actual display.
Now the image is shown as:
I am running a simulation of mixture data. My function is harder than Gaussian distribution. Hence, here, I simplified my question to be in Gaussian form. That is, if I simulated a mixture data like this:
N=2000
U=runif(N, min=0,max=1)
X = matrix(NA, nrow=N, ncol=2)
for (i in 1:N){
if(U[i] < 0.7){
X[i,] <- rnorm(1,0.5,1)
} else {
X[i,] <- rnorm(1,3,5)
}
}
How can I have a scatter plot with different colour and shape (type of the plot point) for each cluster or distribution? I would like to have this manually since my function is hard and complex. I tried plot(X[,1],X[,2],col=c("red","blue")) but it does not work.
I think this is what you want. Note that I had to do a bit of guesswork here to figure out what was going on, because your example code seems to have an error in it, you weren't generating different x1 and x2 values in each row:
N=2000
U=runif(N, min=0,max=1)
X = matrix(NA, nrow = N, ncol=2)
for (i in 1:N){
if(U[i] < 0.7){
# You had rnorm(n=1, ...) which gives 2 identical values in each row
# Change that to 2 and you get different X1 and X2 values
X[i,] <- rnorm(2, 0.5, 1)
} else {
X[i,] <- rnorm(2, 3, 5)
}
}
df = data.frame(
source = ifelse(U < 0.7, "dist1", "dist2"),
x = X[, 1],
y = X[, 2]
)
library(ggplot2)
ggplot(df, aes(x = x, y = y, colour = source, shape = source)) +
geom_point()
Result:
Here's what I got, but I'm not sure if this what you are looking for - the location of the observations for both clusters are exactly the same.
library(tidyverse)
df <- data.frame(X = X, U = U)
df <- gather(df, key = cluster, value = X, -U)
ggplot(df, aes(x = X, y = U, colour = cluster)) + geom_point() + facet_wrap(~cluster)
EDIT: I don't seem to be understanding what you are looking to map onto a scatter plot, so I'll indicate how you need to shape your data in order to create a chart like the above with the proper X and Y coordinates:
head(df)
U cluster X
1 0.98345408 X.1 2.3296047
2 0.33939935 X.1 -0.6042917
3 0.66715421 X.1 -2.2673422
4 0.06093674 X.1 2.4007376
5 0.48162959 X.1 -2.3118850
6 0.50780007 X.1 -0.7307929
So you want one variable for the Y coordinate (I'm using variable U here), one variable for the X coordinate (using X here), and a 3rd variable that indicates whether the observation belongs to cluster 1 or cluster 2 (variable cluster here).
This new post is in reference to a previous post (Heatmap in a Shiny App).
The sample dataset is found here: Sample Dataset used in the Example
The resulting density plot and the plot showing the maximum values in the dataset for each position do not seem to match up. The third ggplot has a few issues that I am unsure how to fix.
I set the scale of the third ggplot in scale_fill_gradientn for 0 to 100. However, the heatmap colors of the resulting plot are not the same color as what the scale should show. For example, the 94.251 should be a darker organge, but it doesn't appear on the chart.
Some of the text for the Max Values in the third ggplot are not matched up to the rectangles of coordinate locations. I am looking to fix this issue.
I would also like the density plot in the first ggplot to show a blend, similar to the blend that is shown in this sample density plot. I'm not really sure how to do that:
library(grid)
library(ggplot2)
sensor.data <- read.csv("Sample_Dataset.csv")
# Create position -> coord conversion
pos.names <- names(sensor.data)[ grep("*Pos",names(sensor.data)) ] # Get column names with "Pos" in them
mock.coords <<- list()
lapply(pos.names, function(name){
})
mock.coords <- list ("Position1"=data.frame("x"=0.1,"y"=0.2),
"Position2"=data.frame("x"=0.2,"y"=0.4),
"Position3"=data.frame("x"=0.3,"y"=0.6),
"Position4"=data.frame("x"=0.4,"y"=0.65),
"Position5"=data.frame("x"=0.5,"y"=0.75),
"Position6"=data.frame("x"=0.6,"y"=0.6),
"Position7"=data.frame("x"=0.7,"y"=0.6),
"Position8"=data.frame("x"=0.8,"y"=0.43),
"Position9"=data.frame("x"=0.9,"y"=0.27),
"Position10"=data.frame("x"=0.75,"y"=0.12))
# Change format of your data matrix
df.l <- list()
cnt <- 1
for (i in 1:nrow(sensor.data)){
for (j in 1:length(pos.names)){
name <- pos.names[j]
curr.coords <- mock.coords[[name]]
df.l[[cnt]] <- data.frame("x.pos"=curr.coords$x,
"y.pos"=curr.coords$y,
"heat" =sensor.data[i,j])
cnt <- cnt + 1
}
}
df <- do.call(rbind, df.l)
# Load image
library(jpeg)
download.file("http://www.expresspcb.com/wp-content/uploads/2015/06/PhotoProductionPCB_TL_800.jpg","pcb.jpg")
img <- readJPEG("/home/oskar/pcb.jpg")
g <- rasterGrob(img, interpolate=TRUE,width=1,height=1)
# Show overlay of image and heatmap
ggplot(data=df,aes(x=x.pos,y=y.pos,fill=heat)) +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
stat_density2d( alpha=0.2,aes(fill = ..level..), geom="polygon" ) +
scale_fill_gradientn(colours = rev( rainbow(3) )) +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0)) +
ggtitle("Density")
# # Show where max temperature is
# dat.max = df[which.max(df$heat),]
#
# ggplot(data=coords,aes(x=x,y=y)) +
# annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
# geom_point(data=dat.max,aes(x=x.pos,y=y.pos), shape=21,size=5,color="black",fill="red") +
# geom_text(data=dat.max,aes(x=x.pos,y=y.pos,label=round(heat,3)),vjust=-1,color="red",size=10) +
# ggtitle("Max Temp Position")
# bin data manually
# Manually set number of rows and columns in the matrix containing sums of heat for each square in grid
nrows <- 30
ncols <- 30
# Define image coordinate ranges
x.range <- c(0,1) # x-coord range
y.range <- c(0,1) # x-coord range
# Create matrix and set all entries to 0
heat.density.dat <- matrix(nrow=nrows,ncol=ncols)
heat.density.dat[is.na(heat.density.dat)] <- 0
# Subdivide the coordinate ranges to n+1 values so that i-1,i gives a segments start and stop coordinates
x.seg <- seq(from=min(x.range),to=max(x.range),length.out=ncols+1)
y.seg <- seq(from=min(y.range),to=max(y.range),length.out=nrows+1)
# List to hold found values
a <- list()
cnt <- 1
for( ri in 2:(nrows+1)){
x.vals <- x.seg [c(ri-1,ri)]
for ( ci in 2:(ncols+1)){
# Get current segments, for example x.vals = [0.2, 0.3]
y.vals <- y.seg [c(ci-1,ci)]
# Find which of the entries in the data.frame that has x or y coordinates in the current grid
x.inds <- which( ( (df$x.pos >= min(x.vals)) & (df$x.pos <= max(x.vals)))==T )
y.inds <- which( ((df$y.pos >= min(y.vals)) & (df$y.pos <= max(y.vals)))==T )
# Find which entries has both x and y in current grid
inds <- intersect( x.inds , y.inds )
# If there's any such coordinates
if (length(inds) > 0){
# Append to list
a[[cnt]] <- data.frame("x.start"=min(x.vals), "x.stop"=max(x.vals),
"y.start"=min(y.vals), "y.stop"=max(y.vals),
"acc.heat"=sum(df$heat[inds],na.rm = T) )
print(length(df$heat[inds]))
# Increment counter variable
cnt <- cnt + 1
}
}
}
# Construct data.frame from list
heat.dens.df <- do.call(rbind,a)
# Plot again
ggplot(data=heat.dens.df,aes(x=x.start,y=y.start)) +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_rect(data=heat.dens.df, aes(xmin=x.start, xmax=x.stop, ymin=y.start, ymax=y.stop, fill=acc.heat), alpha=0.5) +
scale_fill_gradientn(colours = rev( rainbow(3) )) +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0))
mock.coords <- list ("Position1"=data.frame("x"=0.1,"y"=0.2),
"Position2"=data.frame("x"=0.2,"y"=0.4),
"Position3"=data.frame("x"=0.3,"y"=0.6),
"Position4"=data.frame("x"=0.4,"y"=0.65),
"Position5"=data.frame("x"=0.5,"y"=0.75),
"Position6"=data.frame("x"=0.6,"y"=0.6),
"Position7"=data.frame("x"=0.7,"y"=0.6),
"Position8"=data.frame("x"=0.8,"y"=0.43),
"Position9"=data.frame("x"=0.9,"y"=0.27),
"Position10"=data.frame("x"=0.75,"y"=0.12))
# Show where max temperature is
heat.dat <- sensor.data[pos.names]
# Get max for each position
max.df <- apply(heat.dat,2,max)
dat.max.l <- lapply(1:length(max.df), function(i){
h.val <- max.df[i]
c.name <- names(h.val)
c.coords <- mock.coords[[c.name]]
data.frame("x.pos"=c.coords$x, "y.pos"=c.coords$y,"heat"=h.val)
})
coords <- data.frame("x"=c(0,1),"y"=c(0,1))
dat.max <- do.call(rbind,dat.max.l)
ggplot(data=coords,aes(x=x,y=y)) +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_point(data=dat.max,aes(x=x.pos,y=y.pos), shape=13,size=5,color="black",fill="red") +
geom_text(data=dat.max,aes(x=x.pos,y=y.pos,label=round(heat,3)),vjust=-1,color="red",size=10) +
geom_rect(data=heat.dens.df, aes(xmin=x.start, xmax=x.stop, ymin=y.start, ymax=y.stop, fill=acc.heat,x=NULL,y=NULL), alpha=0.5) +
scale_fill_gradientn(limits = c(0,100), colours = rev( rainbow(3) )) +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0))
A couple of things.
To center the text, remove the vjust value in geom_text
In order to create a heatmap for this data we need some type of interpolation or smoothing since you only have data for 10 points (or you'll have a heatmap with just a few datapoints)
This could be a solution:
library(grid)
library(ggplot2)
sensor.data <- read.csv("/home/oskar/Downloads/Sample_Dataset.csv - Sample_Dataset.csv.csv")
# Create position -> coord conversion
pos.names <- names(sensor.data)[ grep("*Pos",names(sensor.data)) ] # Get column names with "Pos" in them
mock.coords <- list ("Position1"=data.frame("x"=0.1,"y"=0.2),
"Position2"=data.frame("x"=0.2,"y"=0.4),
"Position3"=data.frame("x"=0.3,"y"=0.6),
"Position4"=data.frame("x"=0.4,"y"=0.65),
"Position5"=data.frame("x"=0.5,"y"=0.75),
"Position6"=data.frame("x"=0.6,"y"=0.6),
"Position7"=data.frame("x"=0.7,"y"=0.6),
"Position8"=data.frame("x"=0.8,"y"=0.43),
"Position8.1"=data.frame("x"=0.85,"y"=0.49),
"Position9"=data.frame("x"=0.9,"y"=0.27),
"Position10"=data.frame("x"=0.75,"y"=0.12))
# Change format of your data matrix
df.l <- list()
cnt <- 1
for (i in 1:nrow(sensor.data)){
for (j in 1:length(pos.names)){
name <- pos.names[j]
curr.coords <- mock.coords[[name]]
df.l[[cnt]] <- data.frame("x.pos"=curr.coords$x,
"y.pos"=curr.coords$y,
"heat" =sensor.data[i,j])
cnt <- cnt + 1
}
}
df <- do.call(rbind, df.l)
# Load image
library(jpeg)
download.file("http://www.expresspcb.com/wp-content/uploads/2015/06/PhotoProductionPCB_TL_800.jpg","pcb.jpg")
img <- readJPEG("/home/oskar/pcb.jpg")
g <- rasterGrob(img, interpolate=TRUE,width=1,height=1)
# Manually set number of rows and columns in the matrix containing max of heat for each square in grid
nrows <- 50
ncols <- 50
# Define image coordinate ranges
x.range <- c(0,1) # x-coord range
y.range <- c(0,1) # x-coord range
x.bounds <- seq(from=min(x.range),to=max(x.range),length.out = ncols + 1)
y.bounds <- seq(from=min(y.range),to=max(y.range),length.out = nrows + 1)
# Create matrix and set all entries to 0
heat.max.dat <<- matrix(nrow=nrows,ncol=ncols)
lapply(1:length(mock.coords), function(i){
c <- mock.coords[[i]]
# calculate where in matrix this fits
x <- c$x; y <- c$y
x.ind <- findInterval(x, x.bounds)
y.ind <- findInterval(y, y.bounds)
heat.max.dat[x.ind,y.ind] <<- max(sensor.data[names(mock.coords)[i]])
})
heat.max.dat[is.na(heat.max.dat)]<-0
require(fields)
# Look at the image plots to see how the smoothing works
#image(heat.max.dat)
h.mat.interp <- image.smooth(heat.max.dat)
#image(h.mat.interp$z)
mat <- h.mat.interp$z
require(reshape2)
m.dat <- melt(mat)
# Change to propper coors, image is assumed to have coors between 0-1
m.dat$Var1 <- seq(from=min(x.range),to=max(x.range),length.out=ncols)[m.dat$Var1]
m.dat$Var2 <- seq(from=min(y.range),to=max(y.range),length.out=ncols)[m.dat$Var2]
# Show where max temperature is
heat.dat <- sensor.data[pos.names]
# Get max for each position
max.df <- apply(heat.dat,2,max)
dat.max.l <- lapply(1:length(max.df), function(i){
h.val <- max.df[i]
c.name <- names(h.val)
c.coords <- mock.coords[[c.name]]
data.frame("x.pos"=c.coords$x, "y.pos"=c.coords$y,"heat"=h.val)
})
dat.max <- do.call(rbind,dat.max.l)
coords <- data.frame("x"=c(0,1),"y"=c(0,1))
ggplot(data=coords,aes(x=x,y=y)) +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_raster(data=m.dat,aes(x=Var1,y=Var2,fill=value), interpolate = TRUE, alpha=0.5) +
scale_fill_gradientn(colours = rev( rainbow(3) ),guide=FALSE) +
geom_text(data=dat.max,aes(x=x.pos,y=y.pos,label=round(heat,3)),vjust=0,color="white",size=5) +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0))
In the end I get this
I recently asked a question on SO about how to make rectangles from a series of coordinates, link is here.
The answer was perfect, and allows me to generate my rectangles really well:
# Sample data
plot.data <- data.frame(start.points=c(5, 32),
end.points=c(15, 51),
text.label=c("Sample A", "Sample B"))
plot.data$text.position <- (plot.data$start.points + plot.data$end.points)/2
# Plot using ggplot
library(ggplot2)
p <- ggplot(plot.data)
p + geom_rect(aes(xmin=start.points, xmax=end.points, ymin=0, ymax=3),
fill="yellow") +
theme_bw() + geom_text(aes(x=text.position, y=1.5, label=text.label)) +
labs(x=NULL, y=NULL)
However, I realized that my data often has overlapping coordinates, and I want to be able to visualize each individual span without washing out overlapping spans. So, let's use this as an example data set: 2-3, 5-10, 7-10
The current code will give something like:
---- -----------------
----| |----| |-------------
---- -----------------
However, I want to somehow change the code so that overlapping data will be visualized on a new track:
---- -----------------
----| |----| |-------------
---- -----------------
-------------
----------------| |---------
-------------
Sorry for the stupid ASCII art!
Does anyone have a suggestion? I wouldn't be adverse to independently generating several images and then stacking them, if that's easiest. Thanks!
You could compute sequences of non-overlapping intervals by hand, and space out the rectangles accordingly. Here it is with the intervals package: (note we assume your points are ordered by start.points -- this is easy to do)
library(intervals)
plot.data <- data.frame(start.points = c(1,2,4,6,8,11), end.points = c(3,5,9,10,12,13),
text.label = paste0('Sample ', LETTERS[1:6]))
plot.data$text.position <- (plot.data$start.points + plot.data$end.points)/2
overlap <- interval_overlap(tmp <- Intervals(c(plot.data$start.points, plot.data$end.points)), tmp)
# Find the next non-overlapping interval
nexts <- lapply(overlap, function(x) max(x) + 1)
non_overlaps <- list()
while(sum(sapply(nexts, Negate(is.na))) > 0) {
consec <- c()
i <- which(sapply(nexts, Negate(is.na)))[1]
# Find a stretch of consecutive non-overlapping intervals
while(!is.na(i) && i <= length(nexts) && !any(sapply(non_overlaps, function(y) i %in% y))) {
consec <- c(consec, i); i <- nexts[[i]]
}
non_overlaps <- append(non_overlaps, list(consec))
# Wipe out that stretch since we're no longer looking at it
nexts[consec] <- NA
}
# Squash remaining non-overlapping intervals -- the packing is not yet compact
i <- 1
while (i < length(non_overlaps)) {
ints1 <- non_overlaps[[i]]
ints1 <- Intervals(c(plot.data$start.points[ints1], plot.data$end.points[ints1]))
j <- i + 1
while(j <= length(non_overlaps)) {
ints2 <- Intervals(c(plot.data$start.points[non_overlaps[[j]]],
plot.data$end.points[non_overlaps[[j]]]))
iv <- interval_overlap(ints1, ints2)
if (length(c(iv, recursive = TRUE)) == 0) break;
j <- j + 1
}
if (j <= length(non_overlaps)) {
# we can merge non_overlaps[[i]] and non_overlaps[[j]]
non_overlaps[[i]] <- c(non_overlaps[[i]], non_overlaps[[j]])
non_overlaps[[j]] <- NULL
} else {
# we are done non_overlaps[[i]] -- nothing else can be squashed!
i <- i + 1
}
}
We now have
print(non_overlaps)
# [[1]]
# [1] 1 3 6
#
# [[2]]
# [1] 2 4 6
#
# [[3]]
# [1] 5
We can graph these non-overlapping intervals on separate heights.
ymin <- length(non_overlaps) - 1 - (sapply(seq_len(nrow(plot.data)),
function(ix) which(sapply(non_overlaps, function(y) ix %in% y))) - 1)
ymax <- ymin + 0.9
text.position.y <- ymin + 0.45
ymin <- ymin / length(non_overlaps) * 3 # rescale for display
ymax <- ymax / length(non_overlaps) * 3 # rescale for display
text.position.y <- text.position.y / length(non_overlaps) * 3
library(ggplot2)
p <- ggplot(plot.data)
p + geom_rect(aes(xmin=start.points, xmax=end.points, ymin=ymin, ymax=ymax),
fill="yellow") +
theme_bw() + geom_text(aes(x=text.position, y=text.position.y, label=text.label)) +
labs(x=NULL, y=NULL)
The final result:
Some more examples:
plot.data <- data.frame(start.points = c(1,3,5,7,9,11,13), end.points = c(4,6,8,10,12,14, 16), text.label = paste0('Sample ', LETTERS[1:7]))
plot.data <- data.frame(start.points = seq(1, 13, by = 4), end.points = seq(4, 16, by = 4), text.label = paste0('Sample ', LETTERS[1:4]))
set.seed(100); plot.data <- data.frame(start.points = tmp <- sort(runif(26, 1, 15)), end.points = tmp + runif(26, 1, 3), text.label = paste0('Sample ', LETTERS))
P.S. I apologize for the chicken scratch, but I did this rather hastily -- I am sure some of these operations can be performed more cleverly!