Combining a heatmap and a dendrogram in a grob plot - r

I'm trying to plot a heatmap along with a row dendrogram, which I manipulated (pruned the number of branches), aligned using grid.draw.
Here's my data:
set.seed(10)
mat <- matrix(rnorm(24*10,mean=1,sd=2),nrow=24,ncol=10,dimnames=list(paste("g",1:24,sep=""),paste("my.expriment.sample",1:10,sep="")))
dend <- as.dendrogram(hclust(dist(mat)))
row.ord <- order.dendrogram(dend)
mat <- matrix(mat[row.ord,],nrow=24,ncol=10,
dimnames=list(rownames(mat)[row.ord],colnames(mat)))
mat.df <- reshape2::melt(mat,value.name="expr",varnames=c("gene","sample"))
The heatmap part of the plot:
require(ggplot2)
map.plot <- ggplot(mat.df,aes(x=sample,y=gene)) + geom_tile(aes(fill=expr)) +
scale_fill_gradient2("expr",high="darkred",low="darkblue") + theme_bw() +
theme(legend.key=element_blank(),legend.position="right", axis.text.y=element_blank(), axis.ticks.y=element_blank(),
panel.border=element_blank(), strip.background=element_blank(), axis.text.x=element_text(angle=45,hjust=1,vjust=1),
legend.text=element_text(size=5), legend.title=element_text(size=8), legend.key.size=unit(0.4,"cm"))
Which gives:
Notice the long column labels - that's similar to what I have in reality.
Here's how I manipulate and plot the dendrogram:
depth.cutoff <- 11
dend <- cut(dend,h=depth.cutoff)$upper
require(dendextend)
gg.dend <- as.ggdend(dend)
#change vertical segments that lead to leaves
distinctColors <- function(n) {
if (n <= 8) {
res <- brewer.pal(n, "Set2")
} else {
res <- hcl(h=seq(0,(n-1)/(n),length=n)*360,c=100,l=65,fixup=TRUE)
}
}
cluster.cols <- distinctColors(nrow(gg.dend$labels))
leaf.heights <- dplyr::filter(gg.dend$nodes,!is.na(leaf))$height
leaf.seqments.idx <- which(gg.dend$segments$yend %in% leaf.heights)
gg.dend$segments$yend[leaf.seqments.idx] <- max(gg.dend$segments$yend[leaf.seqments.idx])
gg.dend$segments$col[leaf.seqments.idx] <- cluster.cols
#change labels
gg.dend$labels$label <- 1:nrow(gg.dend$labels)
gg.dend$labels$y <- max(gg.dend$segments$yend[leaf.seqments.idx])
gg.dend$labels$x <- gg.dend$segments$x[leaf.seqments.idx]
gg.dend$labels$col <- cluster.cols
dend.plot <- ggplot(gg.dend,labels=F)+scale_y_reverse()+coord_flip()+annotate("text",size=10,hjust=0,x=gg.dend$label$x,y=gg.dend$label$y,label=gg.dend$label$label,colour=gg.dend$label$col)
which gives:
Trying to follow this example, I do:
require(gtable)
plot.grob <- ggplotGrob(dend.plot)
plot.grob <- gtable_add_cols(plot.grob,unit(1,"cm"))
plot.grob <- gtable_add_grob(plot.grob,ggplotGrob(map.plot),t=1,l=ncol(plot.grob),b=1,r=ncol(plot.grob))
grid.newpage()
grid.draw(plot.grob)
But this comes out messed up:
Any idea how to get dend.plot aligned with the heatmap part of map.plot such that the lower branch of dend.plot is bottom aligned with the the heatmap and not the bottom of the column labels?

cowplot is super good at aligning ggplots.
library(cowplot)
plot_grid(dend.plot, map.plot, align = 'h')
Also, try to have a bit shorter example (why do I need a super detailed theme call?), and make sure it actually runs in a clean session.

Related

add one legend with all variables for combined graphs

I'm trying to plot two graphs side-by-side with one common legend that incorporates all the variables between both graphs (some vars are different between the graphs).
Here's a mock example of what I've been attempting:
#make relative abundance values for n rows
makeData <- function(n){
n <- n
x <- runif(n, 0, 1)
y <- x / sum(x)
}
#make random matrices filled with relative abundance values
makeDF <- function(col, rw){
df <- matrix(ncol=col, nrow=rw)
for(i in 1:ncol(df)){
df[,i] <- makeData(nrow(df))
}
return(df)
}
#create df1 and assign col names
df1 <- makeDF(4, 5)
colSums(df1) #verify relative abundance values = 1
df1 <- as.data.frame(df1)
colnames(df1) <- c("taxa","s1", "s2", "s3")
df1$taxa <- c("ASV1", "ASV2", "ASV3", "ASV4", "ASV5")
#repeat for df2
df2 <- makeDF(4,5)
df2 <- as.data.frame(df2)
colnames(df2) <- c("taxa","s1", "s2", "s3")
df2$taxa <- c("ASV1", "ASV5", "ASV6", "ASV7", "ASV8")
# convert wide data format to long format -- for plotting
library(reshape2)
makeLong <- function(df){
df.long <- melt(df, id.vars="taxa",
measure.vars=grep("s\\d+", names(df), val=T),
variable.name="sample",
value.name="value")
return(df.long)
}
df1 <- makeLong(df1)
df2 <- makeLong(df2)
#generate distinct colours for each asv
taxas <- union(df1$taxa, df2$taxa)
library("RColorBrewer")
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
colpals <- qual_col_pals[c("Set1", "Dark2", "Set3"),] #select colour palettes
col_vector = unlist(mapply(brewer.pal, colpals$maxcolors, rownames(colpals)))
taxa.col=sample(col_vector, length(taxas))
names(taxa.col) <- taxas
# plot using ggplot
library(ggplot2)
plotdf2 <- ggplot(df2, aes(x=sample, y=value, fill=taxa)) +
geom_bar(stat="identity")+
scale_fill_manual("ASV", values = taxa.col)
plotdf1 <- ggplot(df1, aes(x=sample, y=value, fill=taxa)) +
geom_bar(stat="identity")+
scale_fill_manual("ASV", values = taxa.col)
#combine plots to one figure and merge legend
library(ggpubr)
ggpubr::ggarrange(plotdf1, plotdf2, ncol=2, nrow=1, common.legend = T, legend="bottom")
(if you have suggestions on how to generate better mock data, by all means!)
When I run my code, I am able to get the two graphs in one figure, but the legend does not incorporate all variables from both plots:
I ideally would like to avoid having repeat variables in the legend, such as:
From what I've searched online, the legend only works when the variables are the same between graphs, but in my case I have similar and different variables.
Thanks for any help!
Maybe this is what you are looking for:
Convert your taxa variables to factor with the levels equal to your taxas variable, i.e. to include all levels from both datasets.
Add argument drop=FALSE to both scale_fill_manual to prevent dropping of unused factor levels.
Note: I only added the relevant parts of the code and set the seed to 42 at the beginning of the script.
set.seed(42)
df1$taxa <- factor(df1$taxa, taxas)
df2$taxa <- factor(df2$taxa, taxas)
# plot using ggplot
library(ggplot2)
plotdf2 <- ggplot(df2, aes(x=sample, y=value, fill=taxa)) +
geom_bar(stat="identity") +
scale_fill_manual("ASV", values = taxa.col, drop = FALSE)
plotdf1 <- ggplot(df1, aes(x=sample, y=value, fill=taxa)) +
geom_bar(stat="identity")+
scale_fill_manual("ASV", values = taxa.col, drop = FALSE)
#combine plots to one figure and merge legend
library(ggpubr)
ggpubr::ggarrange(plotdf1, plotdf2, ncol=2, nrow=1, common.legend = T, legend="bottom")

Multiple plot in R in a single page

I'm having trouble displaying the multiple graphs on the same page. I'm having a data frame with 18 numerical columns. For each column, I need to show its histogram and boxplot on the same page with a 4*9 grid. Following is what I tried. But I need to show it along with the boxplot as well. Through a for a loop if possible. Can someone please help me to do it.
library(gridExtra)
library(ggplot2)
p <- list()
for(i in 1:18){
x <- my_data[,i]
p[[i]] <- ggplot(gather(x), aes(value)) +
geom_histogram(bins = 10) +
facet_wrap(~key, scales = 'free_x')
}
do.call(grid.arrange,p)
I received the following graph.
When following is tried, I'm getting the graph in separate pages
library(dplyr)
dat2 <- my_data %>% mutate_all(scale)
# Boxplot from the R trees dataset
boxplot(dat2, col = rainbow(ncol(dat2)))
par(mfrow = c(2, 2)) # Set up a 2 x 2 plotting space
# Create the loop.vector (all the columns)
loop.vector <- 1:4
p <- list()
for (i in loop.vector) { # Loop over loop.vector
# store data in column.i as x
x <- my_data[,i]
# Plot histogram of x
p[[i]] <-hist(x,
main = paste("Question", i),
xlab = "Scores",
xlim = c(0, 100))
plot_grid(p, label_size = 12)
}
You can assemble the base R boxplot and the ggplot object generated with facet_wrap together using the R package patchwork:
library(ggplot2)
library(patchwork)
p <- ggplot(mtcars, aes(x = mpg)) +
geom_histogram() +
facet_wrap(~gear)
wrap_elements(~boxplot(split(mtcars$mpg, mtcars$gear))) / p
ggsave('test.png', width = 6, height = 8, units = 'in')

quarter circles ggplot-s with nonexisting points

I try to use ggplot to plot quarted circles to visualize contour plots but I get misconfigured plot using geom_area (following this tutorial on stacked area with ggplot2)
The code I tried reads
library(ggplot2)
library(dplyr)
N <- 1E2
r <- rev(c(1,2,4,7))
maxXY = max(r)+.25*max(r)
grupp <- c("0","0.25","0.5","0.75")
datalist = list()
plot(0,0,xlim=c(0,maxXY),ylim=c(0,maxXY))
for (i in 1:length(r)) {
quadX <- seq(from = 0,to = r[i],length.out = N) # calculate x coords
quadY <- sqrt(r[i]^2 - quadX^2) # calculate y coords
lines(quadX,quadY)
# data for ggplot
dat <- data.frame(X = quadX, Y = quadY)
dat$group <- grupp[i]
datalist[[i]] <- dat # add it to your list
}
DF = do.call(rbind, datalist)
# stacked area chart
p1 <- ggplot(DF, aes(x=X, y=Y, fill=group)) +
geom_area(alpha=0.6 , size=1, colour="black")
plot(p1)
and I get quarter circles plotted correctly with basic plot
but a weird one with geom_area
Any help would be very appreciated. MJS
EDIT: using Z.Lin's suggestions I get the correct plot, thanks!

Generate a multi-page pdf file of a grid of ggplot plots [duplicate]

This question already has answers here:
Plot over multiple pages
(3 answers)
Closed 3 years ago.
I am trying to generate a multi-page pdf of a grid of ggplots from a list of ggplots. I have tried very many ways to do this and have not succeeded. Here is a reproducible equivalent to what I have been working with:
library(ggplot2)
# generate a data frame w same structure as the one I'm working with
time <- c(1:10)
veclist <- list()
veclist[[1]] <- time
for (i in 2:25){
veclist[[i]] <- as.vector(c(runif(10,-2,2)))
}
d <- as.data.frame(do.call(rbind, veclist))
d <- as.data.frame(t(d))
colnames(d)[1] <- "time"
for (i in 2:length(d)){
colnames(d)[i] <- paste("name",i,sep=" ")
}
# for a common axis
numericvalues <- d[,2:length(d)]
# generate plot(s)
name_list = paste("`",names(d),"`",sep="")
plot_list = list()
for (i in 2:length(d)) {
p = ggplot(d, aes_string(x=name_list[[1]], y=name_list[[i]])) +
geom_point() +
labs(x="time",title=paste(strwrap(names(d[i]), width = 30),collapse = "\n")) +
theme(plot.title = element_text(size=10,hjust = 0.5),axis.text.x=element_text(size=6)) +
coord_cartesian(ylim = c(min(numericvalues, na.rm = TRUE), max(numericvalues, na.rm = TRUE)))
plot_list[[i]] = p
}
What I am looking for would generate a multi-page pdf grid of the plots in plot_list (ideally with 3 columns, 4 rows of plots per page).
A few things I have tried:
pdf("test.pdf")
do.call("marrangeGrob",c(plot_list,ncol=3,nrow=2))
produces an unreadable pdf file.
pdf("test.pdf")
do.call("grid.arrange",c(plot_list))
returns only 'grobs' allowed in "gList" error.
This one produces a multipage layout:
library(gridExtra)
...
plot_list = list()
for (i in 2:length(d)) {
p = ggplot(...)
plot_list[[i]] = ggplotGrob(p)
}
class(plot_list) <- c("arrangelist", class(plot_list))
ggsave("multipage.pdf", plot_list)
your plot list seems to have a missing item. Here's a minimal example
library(ggplot2)
pl <- replicate(13, ggplot(), simplify = FALSE)
ggsave("mp.pdf", gridExtra::marrangeGrob(grobs = pl, nrow=3, ncol=2))
Notes:
you were missing dev.off() hence the invalid pdf
do.call is no longer necessary in (m)arrangeGrob, use the grobs argument
Here's a solution with gridExtra and tidyr
(i) transform the wide data to long-format, and split into a list of data.frame based on each name:
library(tidyr)
df <- d %>% gather(var, val, -time)
df_list <- split(df, df$var)
(ii) plot for each name with lapply function
plots <- lapply(names(df_list), function(x){
ggplot(df_list[[x]], aes(time, val)) +
geom_point() +
labs(x="time", title=x)
})
(iii) using gridExtra to print 12 plots each on two pages of the pdf:
library(gridExtra)
pdf("something.pdf")
do.call(grid.arrange, c(plots[1:12], nrow=4))
do.call(grid.arrange, c(plots[13:24], nrow=4))
dev.off()

Is it possible to create a multiplot of data and images with ggplot?

I would like to know if it is possible with ggplot2 to create a multiple plot (like a facet) including images to look like that:
I don't know exactly how to arrange the image data to pass them to geom_raster() or how to include an image in a data frame...
what I have tried:
> img1 <- readPNG('1.png')
> img2 <- readPNG('2.png')
> img3 <- readPNG('3.png')
> test <- data.frame(c(1,2,3),c(5,2,7),c(img1,img2,img3))
> nrow(test)
[1] 4343040
I already have a problem here to build a data frame with the images inside... each 3 lines a repeated (I guess once per pixel).
I went finally with this solution which decompose the plot to a gtable, add one more line and insert images. Of course once decompose one can't add more layer to the plot, so it should be the very last operation. I don't think the gtable can be converted back to plot.
library(png)
library(gridExtra)
library(ggplot2)
library(gtable)
library(RCurl) # just to load image from URL
#Loading images
img0 <- readPNG(getURLContent('http://i.stack.imgur.com/3anUH.png'))
img1 <- readPNG(getURLContent('http://i.stack.imgur.com/3anUH.png'))
#Convert images to Grob (graphical objects)
grob0 <- rasterGrob(img0)
grob1 <- rasterGrob(img1)
# create the plot with data
p <- ggplot(subset(mpg,manufacturer=='audi'|manufacturer=='toyota'),aes(x=cty,y=displ))+facet_grid(year~manufacturer)+geom_point()
# convert the plot to gtable
mytable <- ggplot_gtable(ggplot_build(p))
#Add a line ssame height as line 4 after line 3
# use gtable_show_layout(mytable) to see where you want to put your line
mytable <- gtable_add_rows(mytable, mytable$height[[4]], 3)
# if needed mor row can be added
# (for example to keep consistent spacing with other graphs)
#Insert the grob in the cells of the new line
mytable <- gtable_add_grob(mytable,grob0,4,4, name = paste(runif(1)))
mytable <- gtable_add_grob(mytable,grob1,4,6, name = paste(runif(1)))
#rendering
grid.draw(mytable)
Note: In this example, I use twice the same image, but it is of course possible to have as much as needed.
Inspired by: How do you add a general label to facets in ggplot2?
This a rather peculiar question. Normally probably it's faster to "photoshop" one than rastering the plots into images etc (I meant, measured by how much human effort it takes).
But since your question has a fun component in it, below I provide one shoddy solution.
The starting point is the example you provided. I use some R code to extract the two faces, and then combine them with two plots (any plots). The plots will be saved to PNG, and then loaded into R as a rastered image.
Eventually you combine the four facets and make one final plot.
library(png)
library(ggplot2)
# load PNG file, and reduce dimension to 1.
# there's no sanity check to verify how many channels a PNG file has.
# potentially there can be 1 (grayscale), 3 (RGB) and 4 (RGBA).
# Here I assume that PNG file f has 4 channels.
load_png <- function(f) {
d <- readPNG(f)
# CCIR 601
rgb.weights <- c(0.2989, 0.5870, 0.1140)
grayscale <- matrix(apply(d[,,-4], -3,
function(rgb) rgb %*% rgb.weights),
ncol=dim(d)[2], byrow=T)
grayscale
}
# the image you provided as an example,
# used to extract the two emoicons
img <- load_png("3anUH.png")
# convert a grayscale matrix into a data.frame,
# facilitating plotting by ggplot
melt_grayscale <- function(d) {
w <- ncol(d)
h <- nrow(d)
coords <- expand.grid(1:w, 1:h)
df <- cbind(coords, as.vector(d))
names(df) <- c("x", "y", "gs")
# so that smallest Y is at the top
df$y <- h - df$y + 1
df
}
plot_grayscale <- function(d, melt=F) {
df <- melt_grayscale(d)
ggplot(df) + geom_raster(aes(x=x, y=y, fill=gs)) + scale_fill_continuous(low="#000000", high="#ffffff")
}
ggplot_blank <- function(x, y) {
# to plot a graph without any axis, grid and legend
# otherwise it would look weird when performing facet_wrap()
qplot(x, y + rnorm(10), size=15) +
theme(axis.line=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
legend.position="none",
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.background=element_blank())
}
# extract the two faces
#offset <- c(50, 40)
img0 <- img[50:200, 40:190]
img1 <- img[210:360, 40:190]
plot_grayscale(img0)
plot_grayscale(img1)
At this point we have img0 that looks like: .
You can probably adjust the subsetting offset to getting a cleaner cut.
Next we want to draw two plots, and save the plot to PNGs which can be loaded later.
# now plot two PNGs using ggplot
png(file="p0.png", width=150, height=150)
ggplot_blank(1:10, 10:1 + rnorm(10))
dev.off()
png(file="p1.png", width=150, height=150)
ggplot_blank(1:10, rnorm(10, 10))
dev.off()
p0 <- load_png("p0.png")
p1 <- load_png("p1.png")
# combine PNG grayscale matrices together
# into a melted data.frame, but with an extra column to
# identify which panel does this pixel belong to.
combine.plots <- function(l) {
panel <- 0
do.call(rbind, lapply(l, function(m){
panel <<- panel + 1
cbind(melt_grayscale(m), panel)
}))
}
plots <- combine.plots(list(img0, img1, p0, p1))
ggplot(plots) + geom_raster(aes(x=x, y=y, fill=gs)) +
scale_fill_continuous(low="#000000", high="#ffffff") + facet_wrap(~panel)
Of course the obvious drawback is that you only have grayscale image with above example.
It would be much trickier if you want RGB color in your final plot.
You can probably do what GIF format is doing: indexed color.
Basically you:
Discretize your RGB values into 256 or 512 colors
Create a vector of the discretized colors
Replaces scale_fill_continous with scale_fill_manual and let values equal to the color vector you created above.
EDIT: Combine geom_raster with geom_point.
Above solution used png() function to first save the plot to a PNG file (involving rasterization), and then loaded the rasterized images into R. This process could potentially lead to resolution loss, if, say, the two images shown in the top panels are themselves of low resolution.
We can modify above solution to combine geom_point with geom_raster, where the former is used for rendering plots and the latter for rendering images.
The only issue here, (assuming that the two images to display have the same resolution, denoted by w x h, where w is the width and h is the height), is that facet_wrap will enforce all panels to have the same X/Y-limits.
So, we need to rescale the plots to the same limits (w x h) before plotting them.
Below is the modified R code for combining plots and images:
library(png)
library(ggplot2)
load_png <- function(f) {
d <- readPNG(f)
# CCIR 601
rgb.weights <- c(0.2989, 0.5870, 0.1140)
grayscale <- matrix(apply(d[,,-4], -3,
function(rgb) rgb %*% rgb.weights),
ncol=dim(d)[2], byrow=T)
grayscale
}
# the image you provided as an example,
# used to extract the two emoicons
img <- load_png("3anUH.png")
# convert a grayscale matrix into a data.frame,
# facilitating plotting by ggplot
melt_grayscale <- function(d) {
w <- ncol(d)
h <- nrow(d)
coords <- expand.grid(1:w, 1:h)
df <- cbind(coords, as.vector(d))
names(df) <- c("x", "y", "gs")
df$y <- h - df$y + 1
df
}
plot_grayscale <- function(d, melt=F) {
df <- melt_grayscale(d)
ggplot(df) + geom_raster(aes(x=x, y=y, fill=gs)) + scale_fill_continuous(low="#000000", high="#ffffff")
}
ggplot_blank <- function(x, y) {
# to plot a graph without any axis, grid and legend
qplot(x, y + rnorm(10), size=15) +
theme(axis.line=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
legend.position="none",
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.background=element_blank())
}
# extract the two faces
#offset <- c(50, 40)
img0 <- img[50:200, 40:190]
img1 <- img[210:360, 40:190]
plot_grayscale(img0)
plot_grayscale(img1)
# now plot two PNGs using ggplot
png(file="p0.png", width=300, height=300)
ggplot_blank(1:10, 10:1 + rnorm(10))
dev.off()
png(file="p1.png", width=300, height=300)
ggplot_blank(1:10, rnorm(10, 10))
dev.off()
p0 <- load_png("p0.png")
p1 <- load_png("p1.png")
combine.plots <- function(l) {
panel <- 0
do.call(rbind, lapply(l, function(m){
panel <<- panel + 1
cbind(melt_grayscale(m), panel)
}))
}
rescale.plots <- function(x, y, w, h, panel) {
# need to rescale plots to the same scale (w x h)
x <- (x - min(x)) / (max(x) - min(x)) * w
y <- (y - min(y)) / (max(y) - min(y)) * h
data.frame(x=x, y=y, panel=panel)
}
imgs <- combine.plots(list(img0, img1))
# combine two plots, with proper rescaling
plots <- rbind(
rescale.plots(1:100, 100:1 + rnorm(100), 150, 150, panel=3),
rescale.plots(1:100, rnorm(100), 150, 150, panel=4)
)
ggplot() + geom_raster(data=imgs, aes(x=x, y=y, fill=gs)) + geom_point(data=plots, aes(x=x, y=y)) +
facet_wrap(~panel) + scale_fill_continuous(low="#000000", high="#ffffff")
which will give you:
You may not visually detect the immediate difference, but it will be obvious when you resize the output image in R.
Have you considered using gridExtra to combine a separate plot with the images you want?
For example;
# import packages needed
library(png)
library(gridExtra)
library(ggplot2)
# read image files
img1 <- readPNG('1.png')
img2 <- readPNG('2.png')
# convert images to objects that gridExtra can handle
image1 <- rasterGrob(img1, interpolate=TRUE)
image2 <- rasterGrob(img2, interpolate=TRUE)
# create whatever plot you want under the images;
test <- data.frame(x = c(1,2,3), y =c(5,2,7), facet = c("A", "B", "B"))
plot <- ggplot(test,aes(x=x, y= y)) + geom_point() + facet_wrap(~ facet)
# use grid.arrange to display the images and the plot on the same output.
grid.arrange(image1,image2,plot, ncol = 2)
You might have to play around a bit with grid.arrange to get the exact output.
# perhaps something like:
p = rectGrob()
grid.arrange(arrangeGrob(image1, image2, widths=c(1/2), ncol =2), plot, ncol=1)
adapted from an answer to this question.
Hope that helps.

Resources