geom_raster faceted plot with ggplot2: control row height - r

In the example below I have a dataset containing two experiments F1 and F2. A classification is performed based on F1 signal, and both F1 and F2 values are ordered accordingly. In this diagram, each facet has the same dimension although the number of rows is not the same (e.g class #7 contains only few elements compare to the other classes). I would like to modify the code to force row height to be the same across facets (facets would thus have various blank space below). Any hints would be greatly appreciated.
Thank you
library(ggplot2)
library(reshape2)
set.seed(123)
# let's create a fake dataset
nb.experiment <- 4
n.row <- 200
n.col <- 5
nb.class <- 7
d <- matrix(round(runif(n.row * n.col),2), nc=n.col)
colnames(d) <- sprintf("%02d", 1:5)
# These strings will be the row names of each heatmap
# in the subsequent facet plot
elements <- sample(replicate(n.row/2, rawToChar(as.raw(sample(65:90, 6, replace=T)))))
# let's create a data.frame d
d <- data.frame(d,
experiment = sort(rep(c("F1","F2"), n.row/2)),
elements= elements)
# Now we split the dataset by experiments
d.split <- split(d, d$experiment)
# Now we create classes (here using hierarchical clustering )
# based on F1 experiment
dist.mat <- as.dist(1-cor(t(d.split$F1[,1:5]), method="pearson"))
hc <- hclust(dist.mat)
cuts <- cutree(hc, nb.class)
levels(cuts) <- sprintf("Class %02d", 1:nb.experiment)
# We split F1 and F2 based on classification result
for(s in names(d.split)){
d.split[[s]] <- split(d.split[[s]], cuts)
}
# Data are melt (their is perhaps a better solution...)
# in order to use the ggplot function
dm <- melt(do.call('rbind',lapply(d.split, melt)), id.var=c( "experiment", "elements", "variable", "L1"))
dm <- dm[, -5]
colnames(dm) <- c("experiment","elements", "pos", "class", "exprs")
dm$class <- as.factor(dm$class)
levels(dm$class) <- paste("Class", levels(dm$class))
# Now we plot the data
p <- ggplot(dm, aes(x = pos, y = elements, fill = exprs))
p <- p + geom_raster()
p <- p + facet_wrap(~class +experiment , scales = "free", ncol = 2)
p <- p + theme_bw()
p <- p + theme(text = element_text(size=4))
p <- p + theme(text = element_text(family = "mono", face = "bold"))
print(p)

Use facet_grid instead of facet_wrap and set the space attribute:
ggplot(dm, aes(x = pos, y = elements, fill = exprs)) +
geom_raster() +
facet_grid(rowMeanClass ~ experiment , scales = "free", space = "free_y") +
theme_bw()

Related

Use a gradient fill under a facet wrap of density curves in ggplot in R?

Similar questions have been asked before in other forms. Some can be found here and here. However, I cant seem to adapt them when using a facet wrap displaying multiple density plots.
I tried adapting the other examples, but failed... I also tried using the ggpattern package, but when there is a large amount of data, it takes several minutes on my machine to create a plot.
I am trying to create a gradient under the density curve... but with the gradient pointing down. Something like in the example image below:
Some example data to work with:
library(ggplot2)
set.seed(321)
# create data
varNames <- c("x1", "x2", "x3")
df <- data.frame(
var = sample(varNames, 100, replace = T),
val = runif(100)
)
# create plot
ggplot(df, aes(x = val)) +
geom_density(aes(colour = var, fill = var)) +
facet_wrap(~var) +
theme_bw() +
theme(legend.position = "none")
You can use teunbrand's function, but you will need to apply it to each facet. Here simply looping over it with lapply
library(tidyverse)
library(polyclip)
#> polyclip 1.10-0 built from Clipper C++ version 6.4.0
## This is teunbrands function copied without any change!!
## from https://stackoverflow.com/a/64695516/7941188
fade_polygon <- function(x, y, n = 100) {
poly <- data.frame(x = x, y = y)
# Create bounding-box edges
yseq <- seq(min(poly$y), max(poly$y), length.out = n)
xlim <- range(poly$x) + c(-1, 1)
# Pair y-edges
grad <- cbind(head(yseq, -1), tail(yseq, -1))
# Add vertical ID
grad <- cbind(grad, seq_len(nrow(grad)))
# Slice up the polygon
grad <- apply(grad, 1, function(range) {
# Create bounding box
bbox <- data.frame(x = c(xlim, rev(xlim)),
y = c(range[1], range[1:2], range[2]))
# Do actual slicing
slice <- polyclip::polyclip(poly, bbox)
# Format as data.frame
for (i in seq_along(slice)) {
slice[[i]] <- data.frame(
x = slice[[i]]$x,
y = slice[[i]]$y,
value = range[3],
id = c(1, rep(0, length(slice[[i]]$x) - 1))
)
}
slice <- do.call(rbind, slice)
})
# Combine slices
grad <- do.call(rbind, grad)
# Create IDs
grad$id <- cumsum(grad$id)
return(grad)
}
## now here starts the change, loop over your variables. I'm creating the data frame directly instead of keeping the density object
dens <- lapply(split(df, df$var), function(x) {
dens <- density(x$val)
data.frame(x = dens$x, y = dens$y)
}
)
## we need this one for the plot, but still need the list
dens_df <- bind_rows(dens, .id = "var")
grad <- bind_rows(lapply(dens, function(x) fade_polygon(x$x, x$y)), .id = "var")
ggplot(grad, aes(x, y)) +
geom_line(data = dens_df) +
geom_polygon(aes(alpha = value, group = id),
fill = "blue") +
facet_wrap(~var) +
scale_alpha_continuous(range = c(0, 1))
Created on 2021-12-05 by the reprex package (v2.0.1)

List of plots generated in ggplot2 using scale_color_gradientn have wrong coloring

I'm attempting to use library(scales) and scale_color_gradientn() to create a custom mapping of colors to a continuous variable, in an attempt to limit the effect of outliers on the coloring of my plot. This works for a single plot, but does not work when I use a loop to generate several plots and store them in a list.
Here is a minimal working example:
library(ggplot2)
library(scales)
data1 <- as.data.frame(cbind(x = rnorm(100),
y = rnorm(100),
v1 = rnorm(100, mean = 2, sd = 1),
v2 = rnorm(100, mean = -2, sd = 1)))
#add outliers
data1[1,"v1"] <- 200
data1[2,"v1"] <- -200
data1[1,"v2"] <- 50
data1[2,"v2"] <- -50
#define color palette
cols <- colorRampPalette(c("#3540FF","black","#FF3535"))(n = 100)
#simple color scale
col2 <- scale_color_gradient2(low = "#3540FF",
mid = "black",
high = "#FF3535"
)
#outlier-adjusted color scale
{
aa <- min(data1$v1)
bb <- quantile(data1$v1, 0.05)
cc <- quantile(data1$v1, 0.95)
dd <- max(data1$v1)
coln <- scale_color_gradientn(colors = cols[c(1,5,95,100)],
values = rescale(c(aa,bb,cc,dd),
limits = c(aa,dd))
)
}
Plots:
1. Plot with simple scales - outliers cause scales to stretch out.
ggplot(data1, aes(x = x, y = y, color = v1))+
geom_point()+
col2
2. Plot with outlier-adjusted scales - correct color scaling.
ggplot(data1, aes(x = x, y = y, color = v1))+
geom_point()+
coln
3. The scales for v1 do not work for v2 as the data is different.
ggplot(data1, aes(x = x, y = y, color = v2))+
geom_point()+
coln
#loop to produce list of plots each with own scale
{
plots <- list()
k <- 1
for (i in c("v1","v2")){
aa <- min(data1[,i])
bb <- quantile(data1[,i],0.05)
cc <- quantile(data1[,i], 0.95)
dd <- max(data1[,i])
colm <- scale_color_gradientn(colors = cols[c(1,5,95,100)],
values = rescale(c(aa,bb,cc,dd),
limits = c(aa,dd)))
plots[[k]] <- ggplot(data1, aes_string(x = "x",
y = "y",
color = i
))+
geom_point()+
colm
k <- k + 1
}
}
4. First plot has the wrong scales.
plots[[1]]
5. Second plot has the correct scales.
plots[[2]]
So I'm guessing this has something to do with the scale_color_gradientn() function being called when the plotting takes place, rather than within the loop.
If anyone can help with this, it'd be much appreciated. In base R I would bin the continuous data and assigning hex colors into a vector used for fill color, but I'm unsure how I can apply this within ggplot.
You need to use a closure (function with associated environment):
{
plots <- list()
k <- 1
for (i in c("v1", "v2")){
colm <- function() {
aa <- min(data1[, i])
bb <- quantile(data1[, i], 0.05)
cc <- quantile(data1[, i], 0.95)
dd <- max(data1[, i])
scale_color_gradientn(colors = cols[c(1, 5, 95, 100)],
values = rescale(c(aa, bb, cc, dd),
limits = c(aa, dd)))
}
plots[[k]] <- ggplot(data1, aes_string(x = "x",
y = "y",
color = i)) +
geom_point() +
colm()
k <- k + 1
}
}
plots[[1]]
plots[[2]]

Order heatmap rows in ggplot2 facet plot

I'm having a problem with faceted heatmap rendering in ggplot2. The idea is that I have several elements (these are genes in the real life) and several experiments (F1 and F2 in the example below). Using the F1 experiment, I'm able to create class of elements/genes based on their mean expression (high, ..., moderate, ..., low). In the heatmap produced through the example below, I would like to order each elements in each class (01, 02, 03, 04) based on its mean expression value in F1. Unfortunately, the elements appear in alphabetic order. I would be very happy to get some hints...
Best
library(ggplot2)
library(reshape2)
set.seed(123)
# let's create a fake dataset
nb.experiment <- 4
n.row <- 200
n.col <- 5
d <- matrix(round(runif(n.row * n.col),2), nc=n.col)
colnames(d) <- sprintf("%02d", 1:5)
# These strings will be the row names of each heatmap
# in the subsequent facet plot
elements <- sample(replicate(n.row/2, rawToChar(as.raw(sample(65:90, 6, replace=T)))))
# let's create a data.frame d
d <- data.frame(d,
experiment = sort(rep(c("F1","F2"), n.row/2)),
elements= elements)
# For elements related to experiment F1
# we artificially produce a gradient of values that will
# create elements with increasing row means
d[d$experiment =="F1",1:5] <- round(sweep(d[d$experiment =="F1",1:5],
1,
seq(from=1, 10, length.out = 100),
"+"), 2)
# For elements related to experiment F2
# we artificially produce a gradient of values that will
# create elements with decreasing row means
d[d$experiment =="F2",1:5] <- round(sweep(d[d$experiment =="F2",1:5],
1,
seq(from=10, 1, length.out = 100),
"+"), 2)
#print(d[d$experiment =="F1",1:5])
# Now we split the dataset by experiments
d.split <- split(d, d$experiment)
# For all experiments, we order elements based on the mean expression signal in
# F1.
row.means.F1 <- rowMeans(d.split$F1[,1:5])
pos <- order(row.means.F1)
for(s in names(d.split)){
d.split[[s]] <- d.split[[s]][pos,]
}
# We create several classes of elements based on their
# mean expression signal in F1.
cuts <- cut(1:nrow(d.split$F1), nb.experiment)
levels(cuts) <- sprintf("%02d", 1:nb.experiment)
for(s in names(d.split)){
d.split[[s]] <- split(d.split[[s]], cuts)
}
# Data are melt (their is perhaps a better solution...)
# in order to use the ggplot function
dm <- melt(do.call('rbind',lapply(d.split, melt)), id.var=c( "experiment", "elements", "variable", "L1"))
dm <- dm[, -5]
colnames(dm) <- c("experiment","elements", "pos", "rowMeanClass", "exprs")
# Now we plot the data
p <- ggplot(dm, aes(x = pos, y = elements, fill = exprs))
p <- p + geom_raster()
p <- p + facet_wrap(~rowMeanClass +experiment , scales = "free", ncol = 2)
p <- p + theme_bw()
p <- p + theme(text = element_text(size=4))
p <- p + theme(text = element_text(family = "mono", face = "bold"))
ggsave("RPlot_test.jpeg", p)
Using your advises I was able to find a solution (which implies to clearly specify the order of levels for the 'elements' factor). Thank you #hrbrmstr (and all others).
NB: I only added few lines compare to the original code that are denoted below with 'Added: begin' and 'Added: end' flags.
library(ggplot2)
library(reshape2)
set.seed(123)
# let's create a fake dataset
nb.experiment <- 4
n.row <- 200
n.col <- 5
d <- matrix(round(runif(n.row * n.col),2), nc=n.col)
colnames(d) <- sprintf("%02d", 1:5)
# These strings will be the row names of each heatmap
# in the subsequent facet plot
elements <- sample(replicate(n.row/2, rawToChar(as.raw(sample(65:90, 6, replace=T)))))
# let's create a data.frame d
d <- data.frame(d,
experiment = sort(rep(c("F1","F2"), n.row/2)),
elements= elements)
# For elements related to experiment F1
# we artificially produce a gradient of values that will
# create elements with increasing row means
d[d$experiment =="F1",1:5] <- round(sweep(d[d$experiment =="F1",1:5],
1,
seq(from=1, 10, length.out = 100),
"+"), 2)
# For elements related to experiment F2
# we artificially produce a gradient of values that will
# create elements with decreasing row means
d[d$experiment =="F2",1:5] <- round(sweep(d[d$experiment =="F2",1:5],
1,
seq(from=10, 1, length.out = 100),
"+"), 2)
#print(d[d$experiment =="F1",1:5])
# Now we split the dataset by experiments
d.split <- split(d, d$experiment)
# For all experiments, we order elements based on the mean expression signal in
# F1.
row.means.F1 <- rowMeans(d.split$F1[,1:5])
pos <- order(row.means.F1)
for(s in names(d.split)){
d.split[[s]] <- d.split[[s]][pos,]
}
## Added: begin ###
#Get the list of elements in proper order (based on row mean)
mean.order <- as.character(d.split$F1$elements)
## Added: end###
# We create several classes of elements based on their
# mean expression signal in F1.
cuts <- cut(1:nrow(d.split$F1), nb.experiment)
levels(cuts) <- sprintf("%02d", 1:nb.experiment)
for(s in names(d.split)){
d.split[[s]] <- split(d.split[[s]], cuts)
}
# Data are melt (their is perhaps a better solution...)
# in order to use the ggplot function
dm <- melt(do.call('rbind',lapply(d.split, melt)), id.var=c( "experiment", "elements", "variable", "L1"))
dm <- dm[, -5]
colnames(dm) <- c("experiment","elements", "pos", "rowMeanClass", "exprs")
## Added: begin###
#Ensure that dm$elements is an ordered factor with levels
# ordered as expected
dm$elements <- factor(dm$elements, levels = mean.order, ordered = TRUE)
## Added: end###
# Now we plot the data
p <- ggplot(dm, aes(x = pos, y = elements, fill = exprs))
p <- p + geom_raster()
p <- p + facet_wrap(~rowMeanClass +experiment , scales = "free", ncol = 2)
p <- p + theme_bw()
p <- p + theme(text = element_text(size=4))
p <- p + theme(text = element_text(family = "mono", face = "bold"))
ggsave("RPlot_test.jpeg", p)

Align gridArranged facetted ggplots

I've created a faceted plot, separately for three different groups in my data, like so:
df <- data.frame(x=rep(seq(0.05,1,by=0.05),times=40),
y=sample(c('A','B'),20*40,replace=TRUE),
id=rep(1:40,each=20),
group=c(rep(1,20*12),rep(2,20*12),rep(3,20*16)))
g1 <- ggplot(df[df$group==1,],aes(x,y,group=id))
g1 <- g1 + geom_line()
g1 <- g1 + facet_wrap(~id,ncol=3)
g2 <- ggplot(df[df$group==2,],aes(x,y,group=id))
g2 <- g2 + geom_line()
g2 <- g2 + facet_wrap(~id,ncol=3)
g3 <- ggplot(df[df$group==3,],aes(x,y,group=id))
g3 <- g3 + geom_line()
g3 <- g3 + facet_wrap(~id,ncol=3)
grid.arrange(g1,g2,g3,nrow=1)
which gives me this:
As you can see, the number of facets differs between the three groups which means that the facets in the three columns have different heights. Is there any way to harmonize this height in a non-fragile way (i.e. without me having to manually determine the heights of columns 2 and 3 that gives me facets that look like they have roughly the same height)?
Here's a solution with some guidance from this question.
library(ggplot2)
library(gridExtra)
ncol = 3
df <- data.frame(x=rep(seq(0.05,1,by=0.05),times=40),
y=factor(sample(c('A','B'),20*40,replace=TRUE), levels = c("A", "B")),
id=rep(1:40,each=20),
group=c(rep(1,20*12),rep(2,20*12),rep(3,20*16)))
max_cases <- max(table(unique(df[,c("id", "group")])$group))
# create phantom plots for everything in the containing rectangle to standardize labels
rect_dim <- ceiling(max_cases / ncol) * ncol
plots <- lapply(X=unique(df$group), FUN= function(i){
df_case <- subset(df, subset= group == i)
tot_case <- nrow(unique(df_case[,c("id", "group")]))
# create fill levels to pad the plots
fill_levels <- unlist(lapply(X=1:(rect_dim - tot_case), function(y){paste0(rep(x=" ", times=y), collapse="")}))
df_case$id.label <- ordered(df_case$id, levels = c(unique(df_case$id), fill_levels))
g_case <- ggplot(df_case,aes(x,y,group=id.label)) +
geom_line() +
facet_wrap(~id.label, ncol = ncol, drop=FALSE)
# whiteout the inner y axis elements to clean it up a bit
if(i != 1){
g_case <- g_case + theme(axis.text.y = element_text(color = "white"),
axis.title.y = element_text(color = "white"),
axis.ticks.y = element_line(color = "white"))
}
g_case <- ggplotGrob(g_case)
rm_me <- (tot_case:rect_dim)[-1]
# remove empty panels and layout
g_case$grobs[names(g_case$grobs) %in% c(paste0("panel", rm_me), paste0("strip_t.", rm_me))] <- NULL
g_case$layout <- g_case$layout[!(g_case$layout$name %in% c(paste0("panel-", rm_me), paste0("strip_t-", rm_me))),]
g_case
})
plots$nrow = 1
do.call("grid.arrange", plots)
It's a bit messy, but you can massage the gtables to have the same number of rows, and align them. Further refinement would locate the rows corresponding to plot panels, rather than assume that all plots have the same row sequence of panel - axes - etc.
library(gtable)
cbind_top = function(...){
pl <- list(...)
## test that only passing plots
stopifnot(do.call(all, lapply(pl, inherits, "gg")))
gl <- lapply(pl, ggplotGrob)
nrows <- sapply(gl, function(x) length(x$heights))
tallest <- max(nrows)
add_dummy <- function(x, n){
if(n == 0) return(x)
gtable_add_rows(x, rep(unit(0, "mm"), n), nrow(x)-2)
}
gl <- mapply(add_dummy, x=gl, n=tallest - nrows)
compare_unit <- function(u1,u2){
n <- length(u1)
stopifnot(length(u2) == n)
null1 <- sapply(u1, attr, "unit")
null2 <- sapply(u2, attr, "unit")
null12 <- null1 == "null" | null2 == "null"
both <- grid::unit.pmax(u1, u2)
both[null12] <- rep(list(unit(1,"null")), sum(null12))
both
}
bind2 <- function(x,y){
y$layout$l <- y$layout$l + ncol(x)
y$layout$r <- y$layout$r + ncol(x)
x$layout <- rbind(x$layout, y$layout)
x$widths <- gtable:::insert.unit(x$widths, y$widths)
x$colnames <- c(x$colnames, y$colnames)
x$heights <- compare_unit(x$heights, y$heights)
x$grobs <- append(x$grobs, y$grobs)
x
}
combined <- Reduce(bind2, gl[-1], gl[[1]])
grid::grid.newpage()
grid::grid.draw(combined)
}
cbind_top(g1,g2,g3)

Matrix of density plots with each plot overlaying two distributions

I have a data.frame with 5 columns and I'd like to generate a matrix of density plots, such that each density plot is an overlay of two density plots. (This is akin to plotmatrix, except that in my case, the number of non-NA value in each column differ from column to column and I want overlaid distributions rather than scatter plots).
My first attempt, which didn't work, is given below:
library(ggplot2)
library(reshape)
tmp1 <- data.frame(do.call(cbind, lapply(1:5, function(x) {
r <- rnorm(100)
r[sample(1:100, 20)] <- NA
return(r)
})))
ggplot( melt(tmp1), aes(x=value, fill=variable))+
geom_density(alpha=0.2, position="identity")+opts(legend.position = "none")+
facet_grid(variable ~ variable)
My second approach got me nearly there, but instead of 5 different colors, I only want to use two colors across all the plots. And, I'm sure there is a more elegant way to construct this expanded matrix:
tmp2 <- do.call(rbind, lapply(1:5, function(i) {
do.call(rbind, lapply(1:5, function(j) {
r <- rbind(data.frame(var=sprintf('X%d', i), val=tmp1[,i]),
data.frame(var=sprintf('X%d', j), val=tmp1[,j]))
r <- data.frame(xx=sprintf('X%d', i), yy=sprintf('X%d', j), r)
return(r)
}))
}))
ggplot(tmp2, aes(x=val, fill=var))+
geom_density(alpha=0.2, position="identity")+opts(legend.position = "none")+
facet_grid(xx ~ yy)
My solution was to manually loop through the pairs of columns and generate the overlaid density plots by hand, saving them to a list. I then arranged them in a grid using `grid.arrange' giving the image below.
But is it possible to achieve this using facet_grid instead?
The easiest way is to reshape your data with all permutations (5 * 5 = 25 of them).
require(gregmisc)
perm <- permutations(5, 2, paste0("X", 1:5), repeats.allowed=TRUE)
# instead of gregmisc + permutations, you can use expand.grid from base as:
# perm <- expand.grid(paste0("X", 1:5), paste0("X", 1:5))
o <- apply(perm, 1, function(idx) {
t <- tmp1[idx]
names(t) <- c("A", "B")
t$id1 <- idx[1]
t$id2 <- idx[2]
t
})
require(ggplot2)
require(reshape2)
o <- do.call(rbind, o)
o.m <- melt(o, c("id1", "id2"))
o.m$id1 <- factor(o.m$id1)
o.m$id2 <- factor(o.m$id2)
p <- ggplot(o.m, aes(x = value))
p <- p + geom_density(alpha = 0.2, position = "identity", aes(fill = variable))
p <- p + theme(legend.position = "none")
p <- p + facet_grid(id1 ~ id2)
p

Resources