I tried to plot a complex heatmap using ggplot2 with the cancer somatic mutation.
The data is here, and here is the code:
library(reshape2)
library(ggplot2)
library(scales)
library(gridExtra)
library(ggdendro)
library(zoo)
library(plyr)
#data process
mm8<-read.csv("mm8.csv",header=TRUE)
rownames(mm8)<-mm8$X
mm8<-mm8[,-2]
mm8[1:4,2:5]
#cluster from http://stackoverflow.com/questions/21474388/colorize-clusters-in-dendogram-with-ggplot2
df<-t(mm8)
df<-df[-1,]
cut <- 4 # Number of clusters
hc <- hclust(dist(df), "ave") # heirarchal clustering
dendr <- dendro_data(hc, type = "rectangle")
clust <- cutree(hc, k = cut) # find 'cut' clusters
clust.df <- data.frame(label = names(clust), cluster = clust)
# Split dendrogram into upper grey section and lower coloured section
height <- unique(dendr$segments$y)[order(unique(dendr$segments$y), decreasing = TRUE)]
cut.height <- mean(c(height[cut], height[cut-1]))
dendr$segments$line <- ifelse(dendr$segments$y == dendr$segments$yend &
dendr$segments$y > cut.height, 1, 2)
dendr$segments$line <- ifelse(dendr$segments$yend > cut.height, 1, dendr$segments$line)
# Number the clusters
dendr$segments$cluster <- c(-1, diff(dendr$segments$line))
change <- which(dendr$segments$cluster == 1)
for (i in 1:cut) dendr$segments$cluster[change[i]] = i + 1
dendr$segments$cluster <- ifelse(dendr$segments$line == 1, 1,
ifelse(dendr$segments$cluster == 0, NA, dendr$segments$cluster))
dendr$segments$cluster <- na.locf(dendr$segments$cluster)
# Consistent numbering between segment$cluster and label$cluster
clust.df$label <- factor(clust.df$label, levels = levels(dendr$labels$label))
clust.df <- arrange(clust.df, label)
clust.df$cluster <- factor((clust.df$cluster), levels = unique(clust.df$cluster), labels = (1:cut) + 1)
dendr[["labels"]] <- merge(dendr[["labels"]], clust.df, by = "label")
# Positions for cluster labels
n.rle <- rle(dendr$segments$cluster)
N <- cumsum(n.rle$lengths)
N <- N[seq(1, length(N), 2)] + 1
N.df <- dendr$segments[N, ]
N.df$cluster <- N.df$cluster - 1
# Plot the dendrogram
# Plot the dendrogram
p3<-ggplot() +
geom_segment(data = segment(dendr),
aes(x=x, y=y, xend=xend, yend=yend, size=factor(line), colour=factor(cluster)),
lineend = "square", show_guide = FALSE) +
scale_colour_manual(values = c("grey60", rainbow(cut))) +
scale_size_manual(values = c(.1, 1)) +
labs(x = NULL, y = NULL) +
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
panel.background = element_blank(),
panel.grid = element_blank()) +
guides(fill = FALSE)+
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
plot.background = element_blank())
#priparing a bar???
p4<-ggplot(clust.df,aes(x=label,y=1,fill=cluster))+geom_raster()+
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
panel.background = element_blank(),
panel.grid = element_blank()) +
guides(fill = FALSE)+
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
plot.background = element_blank())
#data for ggplot2 geom_raster
data.m = melt(mm8)
colnames(data.m)<-c("Var1", "Var2", "value")
head(data.m)
#plotting
p1 <- ggplot(data.m, aes(Var2, Var1)) + geom_raster(aes(fill = value),colour ="white")
p1<-p1 + theme(axis.ticks = element_blank(), axis.text = element_blank(),axis.title=element_blank(),plot.background = element_blank())
p2<-ggplot(data.m,aes(Var1,value*(-1)))+geom_bar(data.m, aes(fill=Var2),position="stack",stat="identity")+coord_flip()
p2<-ggplot(data.m,aes(Var1,value*(-1)))+geom_bar(data.m, aes(fill=Var2),position="stack",stat="identity")+coord_flip()+guides(fill = FALSE)+theme(axis.ticks.x = element_blank(), axis.text.x = element_blank(),axis.title.x = element_blank(),plot.background = element_blank())
#plotting 4 panels on a page
vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y)
#open graphic device
win.graph(width=860/72, height=450/72,pointsize = 12)
#plotting
grid.newpage()
pushViewport(viewport(layout = grid.layout(24, 50))) # 1 rows, 8 columns
#plotting
print(p2, vp = vplayout(5:24, 1:10))
print(p1, vp = vplayout(5:24, 10:50),newpage=FALSE)
print(p3, vp = vplayout(1:3, 9:47),newpage=FALSE)
print(p4, vp = vplayout(3:5, 10:46),newpage=FALSE)
#save
savePlot(filename="complex", type="emf")
dev.off()
And I got the picture like this:
1) How to automatically align P1,p3 and P4? For a sample on X, its features on p1, p3 and p4 are align automatically ?
2) Any good ideas to control the space between pancels? for example, reducing the space between p1 and p2, or p1 and p4.
3) How to reorder the samples on X-axis according to the cluster results? And how to control the order on X-axis for p1,p3,and p4 simultaneously?
Related
I have created the attached correlation matrix in R.
How can I limit the number of decimals to 0.01? I have tried to use "accuracy:0.01" next to value but it doesn't change anything.
Thanks!
My code is below:
library("reshape2")
melted_cormat <- melt(cormat)
head(melted_cormat)
get_lower_tri<-function(cormat){
cormat[upper.tri(cormat)] <- NA
return(cormat)
}
# Get upper triangle of the correlation matrix
get_upper_tri <- function(cormat){
cormat[lower.tri(cormat)]<- NA
return(cormat)
}
reorder_cormat <- function(cormat){
# Use correlation between variables as distance
dd <- as.dist((1-cormat)/2)
hc <- hclust(dd)
cormat <-cormat[hc$order, hc$order]
}
cormat <- reorder_cormat(cormat) # Reorder the correlation matrix
upper_tri <- get_upper_tri(cormat)
# Melt the correlation matrix
melted_cormat <- melt(upper_tri, na.rm = TRUE)
# Create a ggheatmap
ggheatmap <- ggplot(melted_cormat, aes(Var2, Var1, fill = value))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation") +
theme_minimal()+ # minimal theme
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 12, hjust = 1))+
coord_fixed()
# Print the heatmap
print(ggheatmap)
ggheatmap +
geom_text(aes(Var2, Var1, label = value), color = "black", size = 2) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
legend.justification = c(1, 0),
legend.position = c(0.6, 0.7),
legend.direction = "horizontal")+
guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
title.position = "top", title.hjust = 0.5))
How can I equalise the height of the grey panel and at the same time align all panels (even the flipped and reversed)? The problem is that due to the fact that the top plot does have no axis labels etc, the panel height is slightly larger.
library(ggplot2)
library(gridExtra)
library(grid)
# Plot 1
df1 <- data.frame(n = 1:10, y = rnorm(10, 5))
plot1 <- ggplot(df1, aes(x = n, y = y)) +
geom_bar(stat = "identity") +
labs(y = 'AU', x = NULL) +
theme(axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
plot.margin = unit(c(0.1, 0.1, 0.1, 0), "cm"))
# Plot 2
df2 <- data.frame(x = rnorm(100), y = rnorm(100))
plot2 <- ggplot(df2, aes(x = x, y = y)) +
geom_point() +
labs(y = 'AU', x = NULL) +
theme(axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
plot.margin = unit(c(0.1, 0.1, 0.1, 0), "cm"))
# Plot 3
df3 <- data.frame(x = rnorm(100))
plot3 <- ggplot(df3, aes(x = x)) +
coord_flip() +
geom_histogram() +
theme(axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
plot.margin = unit(c(0.1, 0.1, 0.1, 0), "cm"))
# Plot 4
df4 <- data.frame(n = 1:10, y = rnorm(10, 4))
plot4 <- ggplot(df4, aes(x = n, y = y)) +
scale_y_reverse() +
geom_bar(stat = "identity") +
labs(y = 'AU', x = 'N') +
theme(plot.margin = unit(c(0.1, 0.1, 0.1, 0), "cm"))
# Get blank plot
blank <- grid.rect(gp=gpar(col="white"))
# Combine everything into one plot
## Get Grobs
gplot1 <- ggplotGrob(plot1)
gplot2 <- ggplotGrob(plot2)
gplot3 <- ggplotGrob(plot3)
gplot4 <- ggplotGrob(plot4)
# Align widths
maxWidth = grid::unit.pmax(gplot1$widths, gplot2$widths, gplot4$widths)
gplot1$widths <- as.list(maxWidth)
gplot2$widths <- as.list(maxWidth)
gplot4$widths <- as.list(maxWidth)
# Grid arrange
grid.arrange(grobs = list(gplot1, blank, gplot2, gplot3, gplot4, blank),
newpage = FALSE,
ncol = 2,
widths = c(12, 3))
Since I want the middle scatter plot's y-axis to be aligned to the flipped x-axis of the righthand side plot, I can't see how I can use rbind() as suggested here would work for here.
Also, setting the grob heights manually, e.g.
gplot4$heights<- gplot1$heights
just cuts of the labels. While, the reverse order (gplot1$heights<- gplot4$heights) introduces a big gap between the top and the middle plot.
I have a dataset with a lot of overlapping points and used ggplot to create a bubble plot to show that data. I need to add bars on my plot for the means of each group on the x axis (values can be 0, 1, or 2). I have tried to use geom_errorbar but haven't been able to get it to work with my data. Any help/suggestions would be greatly appreciated.
The following is my code and a script to generate fake data that is similar:
y <- seq(from=0, to=3.5, by=0.5)
x <- seq(from=0, to=2, by=1)
xnew <- sample(x, 100, replace=T)
ynew <- sample(y, 100, replace=T)
data <- data.frame(xnew,ynew)
data2 <- aggregate(data$xnew, by=list(x=data$xnew, y=data$ynew), length)
names(data2)[3] <- "Count"
ggplot(data2, aes(x = x, y = y)) +
geom_point(aes(size=Count)) +
labs(x = "Copies", y = "Score") +
aes(ymax=..y.., ymin=..y..) +
scale_x_continuous(breaks = seq(0, 2, 1)) +
scale_y_continuous(breaks = seq(0, 3, 0.5)) +
theme(legend.position = "bottom", legend.direction = "horizontal",
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(colour="black", size = 10),
axis.text.y = element_text(colour="black", size = 10))
I am not entirely sure that I understand your question correctly. It seems to me that in addition to the bubbles, you want to visualise the mean value of y for each value of x as a bar of some kind. (You mention error bars, but it seems that this is not a requirement, but just what you have tried. I will use geom_col() instead.)
I assume that you want to weigh the mean over y by the counts, i.e., sum(y * Count) / sum(Count). You can create a data frame that contains these values by using dplyr:
data2_mean
## # A tibble: 3 × 2
## x y
## <dbl> <dbl>
## 1 0 1.833333
## 2 1 1.750000
## 3 2 2.200000
When creating the plot, I use data2 as the data set for geom_point() and data2_mean as the data set for geom_col(). It is important to put the bars first, since the bubbles should be on top of the bars.
ggplot() +
geom_col(aes(x = x, y = y), data2_mean, fill = "gray60", width = 0.7) +
geom_point(aes(x = x, y = y, size = Count), data2) +
labs(x = "Copies", y = "Score") +
scale_x_continuous(breaks = seq(0, 2, 1)) +
scale_y_continuous(breaks = seq(0, 3, 0.5)) +
theme(legend.position = "bottom", legend.direction = "horizontal",
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(colour="black", size = 10),
axis.text.y = element_text(colour="black", size = 10))
Everything that I changed compared to your code comes before scale_x_continuous(). This produces the following plot:
Is this what you're after? I first calculated the group-level means using the dplyr package and then added line segments to your plot using geom_segment:
library(ggplot2)
library(dplyr)
data2 <- data2 %>% group_by(x) %>% mutate(mean.y = mean(y))
ggplot(data2, aes(x = x, y = y)) +
geom_point(aes(size=Count)) +
labs(x = "Copies", y = "Score") +
aes(ymax=..y.., ymin=..y..) +
scale_x_continuous(breaks = seq(0, 2, 1)) +
scale_y_continuous(breaks = seq(0, 3, 0.5)) +
theme(legend.position = "bottom", legend.direction = "horizontal",
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(colour="black", size = 10),
axis.text.y = element_text(colour="black", size = 10)) +
geom_segment(aes(y = mean.y, yend = mean.y, x = x -0.25, xend = x + 0.25))
I have code to make plots following:
My question is how to adjust the widths of 1st plot and last plot in the figure made?
dat=data.frame(x=rep(c("M","D"),each=60),y=rep(rep(c(4,6,8,10,12),each=12),2),z=runif(120,0,100),s=rep(c(1:4),each=3,len=120))
gp=lapply(split(dat,dat$y),function(dfr){
g=ggplot(data = dfr, aes(s,z)) +
geom_point(aes(shape=x,colour=x),size=4)+
ylim(0,100)+
xlab("Int segs")+
ggtitle(paste(dfr[1,2],"hours"))
return(g)})
tiff(file="Pas.tiff",width=60, height = 22,units="cm",res=300)
require(gridExtra)
rg=arrangeGrob(grobs=list(gp$`4` + theme(legend.position="none"),
gp$`6` + theme(legend.position="none",
axis.title.y = element_blank(),
axis.text.y = element_blank()),
gp$`8` + theme(legend.position="none",
axis.title.y = element_blank(),
axis.text.y = element_blank()),
gp$`10` + theme(legend.position="none",
axis.title.y = element_blank(),
axis.text.y = element_blank()),
gp$`12` + theme(axis.title.y = element_blank(),
axis.text.y = element_blank())),ncol=5,
top=textGrob("Pas rate",
gp=gpar(fontsize=20,fontface="bold"),
y = unit(.4, "cm")),
theme(margin(t=70,r=0.2,b=0.5,l=0.3,unit="mm")))
grid.newpage()
#grid.draw(cbind(lg, rg, size = "last"))
grid.draw(rg)
dev.off()
i'd use cbind() here (if facetting isn't an option)
gp[[1]] <- gp[[1]] + theme(legend.position="none")
gp[[5]] <- gp[[5]] + theme(axis.title.y = element_blank(),
axis.text.y = element_blank())
gp[c("6", "8", "10")] <- lapply(gp[c("6", "8", "10")], "+", e2 = theme(legend.position="none",
axis.title.y = element_blank(),
axis.text.y = element_blank()))
grid.newpage()
grid.draw(do.call(gridExtra::cbind.gtable, lapply(gp, ggplotGrob)))
I suggest you use gtable: you can extract legend and y.label from a first "dummy" plot, then plot each graph within a cell and with the same dimensions (having removed both legend and y.label from all of them, including 1st and last) to then add y.label and legend into separate cells.
require(ggplot2)
require(gtable)
require(grid)
library(dplyr)
library(scales)
dat <- data.frame(x=rep(c("M","D"),each=60),
y=rep(rep(c(4,6,8,10,12),each=12),2),
z=runif(120,0,100),
s=rep(c(1:4),each=3,len=120))
gp <- lapply(split(dat,dat$y),function(dfr){
g=ggplot(data = dfr, aes(s,z)) +
geom_point(aes(shape=x,colour=x),size=4)+
ylim(0,100)+
xlab("Int segs")+
ggtitle(paste(dfr[1,2],"hours"))
return(g)
})
tiff(file="Pas.tiff",width=60, height = 22,units="cm",res=300)
## Step 1:
## "dummy" plot, just to take y axis title/text and legend
dummy <- ggplotGrob(gp[[1]] + theme(panel.background = element_blank()))$grobs
legend <- dummy[[which(sapply(dummy, function(x) x$name) == "guide-box")]]
ytitle <- dummy[[grep("axis.title.y",sapply(dummy, function(x) x$name))]]
yticks <- dummy[[2]]
## Step 2:
## actual plots, all of them without y axis title/text and legend
pp <- 1
tab <- ggplotGrob(gp[[pp]] + theme(legend.position="none",
axis.title.y = element_blank(),
axis.text.y = element_blank()))
for(pp in 2:length(gp)){
tab <- gtable_add_cols(tab, unit(1,"null"))
tab <- gtable_add_grob(tab, ggplotGrob(gp[[pp]] + theme(legend.position="none",
axis.title.y = element_blank(),
axis.text.y = element_blank())),
t = 1, l = ncol(tab), b=nrow(tab), r=ncol(tab))
}
## Step 2:
## adding back ytitle, yticks and legend
## add narrow column to the left and put yticks labels withint
tab <- gtable_add_cols(tab, unit(1,"cm"), pos=0)
tab <- gtable_add_grob(tab, yticks,
t = 3, l = 2, b=nrow(tab)-3, r=1)
## add narrow column to the left and put y.axis label withint
ab <- gtable_add_cols(tab, unit(1,"cm"), pos=0)
tab <- gtable_add_grob(tab, ytitle,
t = 1, l = 1, b=nrow(tab), r=1)
## add narrow column to the right and put legend within
tab <- gtable_add_cols(tab, unit(1.5,"cm"))
tab <- gtable_add_grob(tab, legend,
t = 1, l = ncol(tab), b=nrow(tab), r=ncol(tab))
grid.newpage()
grid.draw(tab)
dev.off()
I wish to plot a number of tightly spaced graphs as illustrated by the following toy example:
library(ggplot2)
library(gridExtra)
set.seed(314159)
n <- 100
data <- data.frame(x = rnorm(n), y = rnorm(n), z = rep("dummy var", n))
p00 <- ggplot(data, aes(x)) + stat_density() + theme(plot.margin = unit(c(0,0,0,0), units = "lines" ), axis.text = element_blank(), axis.title = element_blank(), axis.ticks = element_blank()) + labs(x = NULL, y = NULL)
p01 <- ggplot(data, aes(x, y)) + geom_point() + theme(plot.margin = unit(c(0,0,0,0), units = "lines" ), axis.text = element_blank(), axis.title = element_blank(), axis.ticks = element_blank()) + labs(x = NULL, y = NULL)
p10 <- ggplot(data, aes(y, x)) + geom_point() + theme(plot.margin = unit(c(0,0,0,0), units = "lines" ), axis.text = element_blank(), axis.title = element_blank(), axis.ticks = element_blank()) + labs(x = NULL, y = NULL)
p11 <- ggplot(data, aes(y)) + stat_density() + theme(plot.margin = unit(c(0,0,0,0), units = "lines" ), axis.text = element_blank(), axis.title = element_blank(), axis.ticks = element_blank()) + labs(x = NULL, y = NULL)
grid.arrange(p00, p01, p10, p11, ncol = 2)
In spite of my best efforts, I have been unable to overcome a complication that arises when I attempt to do so after having removed the facet strips from my graphs. In the following example, I have added horizontal and vertical strips to each graph by faceting on a dummy variable:
p00 <- p00 + facet_grid(z ~ z)
p01 <- p01 + facet_grid(z ~ z)
p10 <- p10 + facet_grid(z ~ z)
p11 <- p11 + facet_grid(z ~ z)
grid.arrange(p00, p01, p10, p11, ncol = 2)
Next I remove the strips according to the procedure outlined in this post. However, the resulting graphs are rather widely spaced by comparison:
p00 <- p00 + theme(plot.margin = unit(c(0,0.5,0.5,0), units = "lines" ), strip.background = element_blank(), strip.text = element_blank())
p01 <- p01 + theme(plot.margin = unit(c(0,0.5,0.5,0), units = "lines" ), strip.background = element_blank(), strip.text = element_blank())
p10 <- p10 + theme(plot.margin = unit(c(0,0.5,0.5,0), units = "lines" ), strip.background = element_blank(), strip.text = element_blank())
p11 <- p11 + theme(plot.margin = unit(c(0,0.5,0.5,0), units = "lines" ), strip.background = element_blank(), strip.text = element_blank())
grid.arrange(p00, p01, p10, p11, ncol = 2)
Any suggestions on how to reduce the spacing between graphs would be much appreciated.
To remove all elements associated with the axes, in addition to the elements you have set to element_blank, the tick margins and tick lengths need to be set to zero. But space will remain for the facet strips. Setting the background and text to element_blank does not affect the height and width of the strips. To remove the strips, I use functions that manipulate the gtable layout. However, I think it is better to leave some white space between the plots. I have set a small plot margin to 0.2 lines.
library(ggplot2)
library(gridExtra)
set.seed(314159)
n <- 100
data <- data.frame(x = rnorm(n), y = rnorm(n), z1 = rep("dummy var", n), z2 = rep("dummy var", n))
theme = theme(plot.margin = unit(c(.2,.2,.2,.2), units = "lines"),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.ticks.length = unit(0, "lines"))
labs = labs(x = NULL, y = NULL)
p00 <- ggplot(data, aes(x)) + stat_density() + theme + labs + facet_grid(z1 ~ z2)
p01 <- ggplot(data, aes(x, y)) + geom_point() + theme + labs + facet_grid(z1 ~ z2)
p10 <- ggplot(data, aes(y, x)) + geom_point() + theme + labs + facet_grid(z1 ~ z2)
p11 <- ggplot(data, aes(y)) + stat_density() + theme + labs + facet_grid(z1 ~ z2)
This is where the gtable layout is manipulated.
# Get the four gtables (and the four plots) into a list
pList = list(p00, p01, p10, p11)
gList = lapply(pList, ggplotGrob)
# Remove the top strip from each plot
stripT <- subset(gList[[1]]$layout, grepl("strip-t", gList[[1]]$layout$name))
gList = lapply(gList, function(x) x[-stripT$t, ])
# Remove the right strip from each plot
stripR <- subset(gList[[1]]$layout, grepl("strip-r", gList[[1]]$layout$name))
gList = lapply(gList, function(x) x[, -stripR$r])
# Draw the revised plots
nCol <- floor(sqrt(length(pList)))
do.call(grid.arrange, c(gList, ncol = nCol))
Edit: Using revised data and plot.
library(grid)
data <- data.frame(x = rnorm(n), y = rnorm(n), z = rep("dummy var", n), u = seq(1, n) %% 2)
p01 <- ggplot(data, aes(x, y)) + geom_point() + theme + labs + facet_grid(z ~ u)
g = ggplotGrob(p01)
stripT = subset(g$layout, grepl("strip-t", g$layout$name))
g = g[-stripT$t, ]
stripR = subset(g$layout, grepl("strip-r", g$layout$name))
g = g[, -stripR$r]
grid.draw(g) # Still got the space between the facets
g$widths # where is the space? it's the 5.55 pt width
g$widths[[5]] = unit(0, "lines") # remove it completely
g$width
grid.draw(g)