Related
I'm having trouble right aligning horizontal y axis titles for multiple plots in R ggplot2. I have a main plot which is a dendrogram with leaf labels created using the ggdendro package, and I have multiple color bars below the main plot with titles to the left. If I use grid.arrange to place the plots on the same page, I'm able to get good vertical spacing between the plots, but I'm not able to right-align the y axis titles for the color bars consistently. If I use plot_grid, I can right-align the y axis titles consistently, but I'm having trouble getting appropriate vertical spacing between plots. Any help would be appreciated!
Update: Two suggested solutions work equally well so I'm accepting the first one as the answer. Using ggarrange from the egg package and using plot_grid with align = "v" instead of align = "hv" both fixed my problem.
Create main plot and color bars:
require(ggplot2)
require(gridExtra)
require(cowplot)
require(ggdendro)
hc = hclust(dist(USArrests), "ave")
df = data.frame(cluster = cutree(hc, 6),
states = factor(hc$labels, levels = hc$labels[hc$order]))
p1_dendro = dendro_data(hc)
p1 = ggdendrogram(hc) +
coord_cartesian(xlim = c(-1, nrow(df) + 1), ylim = c( -1, max(p1_dendro$segments$y)), expand = F)
p2 = ggplot(df, aes(states, y = 1, fill = factor(cluster))) +
ylab("y label") +
geom_tile() + theme_minimal() +
coord_cartesian(xlim = c(-1, nrow(df) + 1), expand = F) +
theme(axis.title.x = element_blank(),
axis.title.y = element_text(angle = 0, vjust = 0.5, hjust = 1),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "none",
line = element_blank())
p3 = ggplot(df, aes(states, y = 1, fill = factor(cluster))) +
ylab("a longer y label") +
geom_tile() + theme_minimal() +
coord_cartesian(xlim = c(-1, nrow(df) + 1), expand = F) +
theme(axis.title.x = element_blank(),
axis.title.y = element_text(angle = 0, vjust = 0.5, hjust = 1),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "none",
line = element_blank())
grid.arrange approach:
gp1 = ggplotGrob(p1)
gp2 = ggplotGrob(p2)
gp3 = ggplotGrob(p3)
maxWidth = grid::unit.pmax(gp1$widths[2:5], gp2$widths[2:5], gp3$widths[2:5])
gp1$widths[2:5] = as.list(maxWidth)
gp2$widths[2:5] = as.list(maxWidth)
gp3$widths[2:5] = as.list(maxWidth)
grid.arrange(gp1, gp2, gp3, ncol = 1, heights = c(8,1,1))
plot_grid approach:
plot_grid(p1, p2, p3, ncol = 1, align = "hv", axis = "tblr", rel_heights = c(8,1,1))
egg package will get the job done
require(ggplot2)
require(ggdendro)
hc = hclust(dist(USArrests), "ave")
df = data.frame(cluster = cutree(hc, 6),
states = factor(hc$labels, levels = hc$labels[hc$order]))
p1_dendro = dendro_data(hc)
p1 = ggdendrogram(hc) +
coord_cartesian(xlim = c(-1, nrow(df) + 1), ylim = c( -1, max(p1_dendro$segments$y)), expand = F)
p2 = ggplot(df, aes(states, y = 1, fill = factor(cluster))) +
ylab("y label") +
geom_tile() + theme_minimal() +
coord_cartesian(xlim = c(-1, nrow(df) + 1), expand = F) +
theme(axis.title.x = element_blank(),
axis.title.y = element_text(angle = 0, vjust = 0.5, hjust = 1),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "none",
line = element_blank())
p3 = ggplot(df, aes(states, y = 1, fill = factor(cluster))) +
ylab("a longer y label") +
geom_tile() + theme_minimal() +
coord_cartesian(xlim = c(-1, nrow(df) + 1), expand = F) +
theme(axis.title.x = element_blank(),
axis.title.y = element_text(angle = 0, vjust = 0.5, hjust = 1),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "none",
line = element_blank())
Stack p1, p2 and p3 together using ggarrange()
# install.packages("egg", dependencies = TRUE)
library(egg)
ggarrange(p1, p2, p3,
ncol = 1,
heights = c(8, 1, 1))
Created on 2020-08-06 by the reprex package (v0.3.0)
I'm doing an arrangement of 2x2 plots. The plots share the same axis, so I want to put them together, e.g.
This code:
library(ggplot2)
library(cowplot)
Value <- seq(0,1000, by = 1000/10)
Index <- 0:10
DF <- data.frame(Index, Value)
plot <- ggplot(DF, aes(x = Index, y = Value)) +
geom_line(linetype = 2) +
theme(aspect.ratio = 0.5)
plot_grid(plot, plot, plot, plot, align = "hv", ncol = 2)
produces
But I'd like something like:
How can I achieve a similar result?
I think this is a case for the ggarrange() function from the egg package. Doing this with plot_grid() would require endless fiddling and isn't worth it.
(The technical reason is that plot_grid() keeps the total area for each plot in the grid constant, but if some plots have an x axis and others don’t then they take up different areas. One could try to circumvent this by using the rel_heights argument but there’s no good way to calculate the correct values for rel_heights, so it would be trial and error. By contrast, ggarrange() separately looks at the plot panel and the surrounding elements and makes sure the plot panels have the same size.)
Here is the code using ggarrange():
Value <- seq(0,1000, by = 1000/10)
Index <- 0:10
DF <- data.frame(Index, Value)
pbase <- ggplot(DF, aes(x = Index, y = Value)) +
geom_line(linetype = 2) +
theme_bw()
ptopleft <- pbase +
scale_x_continuous(position = "top") +
theme(plot.margin = margin(5.5, 0, 0, 5.5),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank())
ptopright <- pbase +
scale_y_continuous(position = "right") +
scale_x_continuous(position = "top") +
theme(plot.margin = margin(5.5, 5.5, 0, 0),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank())
pbottomleft <- pbase +
theme(plot.margin = margin(0, 0, 5.5, 5.5))
pbottomright <- pbase +
scale_y_continuous(position = "right") +
theme(plot.margin = margin(0, 5.5, 5.5, 0))
library(egg)
ggarrange(ptopleft, ptopright,
pbottomleft, pbottomright,
ncol = 2)
Two comments:
To remove every last bit of space below the plot panel on the top plots, we need to move the x axis to the top, even though we're not showing it. This is a strange limitation of the theming mechanism. We can't fully get rid of just one axis.
I'm not a big fan of shared axis titles, as in your example. I think each axis should have a title. If you want shared axis titles, why not use the faceting mechanism?
You can set subtle plot.margin each plot, then grid.arrange and add labs.
library(ggplot2)
library(grid)
library(gridExtra)
Value <- seq(0,1000, by = 1000/10)
Index <- 0:10
DF <- data.frame(Index, Value)
plot1 <- ggplot(DF, aes(x = Index, y = Value)) +
geom_line(linetype = 2) +
theme_minimal() +
theme(aspect.ratio = 0.5,
panel.border = element_rect(fill = NA),
axis.text.x = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
plot.margin = unit(c(5.5, 5.8, -50, 5.5), "pt"))
plot2 <- ggplot(DF, aes(x = Index, y = Value)) +
geom_line(linetype = 2) +
theme_minimal() +
theme(aspect.ratio = 0.5,
panel.border = element_rect(fill = NA),
axis.text.x = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
plot.margin = unit(c(5.5, 5.5, -50, 5.5), "pt")) +
scale_y_continuous(position = "right")
plot3 <- ggplot(DF, aes(x = Index, y = Value)) +
geom_line(linetype = 2) +
theme_minimal() +
theme(aspect.ratio = 0.5,
panel.border = element_rect(fill = NA),
axis.title = element_blank(),
axis.ticks = element_blank(),
plot.margin = unit(c(-50, 5.8, -50, 5.5), "pt"))
plot4 <- ggplot(DF, aes(x = Index, y = Value)) +
geom_line(linetype = 2) +
theme_minimal() +
theme(aspect.ratio = 0.5,
panel.border = element_rect(fill = NA),
axis.title = element_blank(),
axis.ticks = element_blank(),
plot.margin = unit(c(-50, 5.5, -50, 5.5), "pt")) +
scale_y_continuous(position = "right")
grid.arrange(grobs = list(plot1, plot2, plot3, plot4), ncol = 2, bottom = 'Index', left = 'Value', right = 'Value')
final plot
I have a dataset with a lot of overlapping points and used ggplot to create a bubble plot to show that data. I need to add bars on my plot for the means of each group on the x axis (values can be 0, 1, or 2). I have tried to use geom_errorbar but haven't been able to get it to work with my data. Any help/suggestions would be greatly appreciated.
The following is my code and a script to generate fake data that is similar:
y <- seq(from=0, to=3.5, by=0.5)
x <- seq(from=0, to=2, by=1)
xnew <- sample(x, 100, replace=T)
ynew <- sample(y, 100, replace=T)
data <- data.frame(xnew,ynew)
data2 <- aggregate(data$xnew, by=list(x=data$xnew, y=data$ynew), length)
names(data2)[3] <- "Count"
ggplot(data2, aes(x = x, y = y)) +
geom_point(aes(size=Count)) +
labs(x = "Copies", y = "Score") +
aes(ymax=..y.., ymin=..y..) +
scale_x_continuous(breaks = seq(0, 2, 1)) +
scale_y_continuous(breaks = seq(0, 3, 0.5)) +
theme(legend.position = "bottom", legend.direction = "horizontal",
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(colour="black", size = 10),
axis.text.y = element_text(colour="black", size = 10))
I am not entirely sure that I understand your question correctly. It seems to me that in addition to the bubbles, you want to visualise the mean value of y for each value of x as a bar of some kind. (You mention error bars, but it seems that this is not a requirement, but just what you have tried. I will use geom_col() instead.)
I assume that you want to weigh the mean over y by the counts, i.e., sum(y * Count) / sum(Count). You can create a data frame that contains these values by using dplyr:
data2_mean
## # A tibble: 3 × 2
## x y
## <dbl> <dbl>
## 1 0 1.833333
## 2 1 1.750000
## 3 2 2.200000
When creating the plot, I use data2 as the data set for geom_point() and data2_mean as the data set for geom_col(). It is important to put the bars first, since the bubbles should be on top of the bars.
ggplot() +
geom_col(aes(x = x, y = y), data2_mean, fill = "gray60", width = 0.7) +
geom_point(aes(x = x, y = y, size = Count), data2) +
labs(x = "Copies", y = "Score") +
scale_x_continuous(breaks = seq(0, 2, 1)) +
scale_y_continuous(breaks = seq(0, 3, 0.5)) +
theme(legend.position = "bottom", legend.direction = "horizontal",
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(colour="black", size = 10),
axis.text.y = element_text(colour="black", size = 10))
Everything that I changed compared to your code comes before scale_x_continuous(). This produces the following plot:
Is this what you're after? I first calculated the group-level means using the dplyr package and then added line segments to your plot using geom_segment:
library(ggplot2)
library(dplyr)
data2 <- data2 %>% group_by(x) %>% mutate(mean.y = mean(y))
ggplot(data2, aes(x = x, y = y)) +
geom_point(aes(size=Count)) +
labs(x = "Copies", y = "Score") +
aes(ymax=..y.., ymin=..y..) +
scale_x_continuous(breaks = seq(0, 2, 1)) +
scale_y_continuous(breaks = seq(0, 3, 0.5)) +
theme(legend.position = "bottom", legend.direction = "horizontal",
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(colour="black", size = 10),
axis.text.y = element_text(colour="black", size = 10)) +
geom_segment(aes(y = mean.y, yend = mean.y, x = x -0.25, xend = x + 0.25))
I tried to plot a complex heatmap using ggplot2 with the cancer somatic mutation.
The data is here, and here is the code:
library(reshape2)
library(ggplot2)
library(scales)
library(gridExtra)
library(ggdendro)
library(zoo)
library(plyr)
#data process
mm8<-read.csv("mm8.csv",header=TRUE)
rownames(mm8)<-mm8$X
mm8<-mm8[,-2]
mm8[1:4,2:5]
#cluster from http://stackoverflow.com/questions/21474388/colorize-clusters-in-dendogram-with-ggplot2
df<-t(mm8)
df<-df[-1,]
cut <- 4 # Number of clusters
hc <- hclust(dist(df), "ave") # heirarchal clustering
dendr <- dendro_data(hc, type = "rectangle")
clust <- cutree(hc, k = cut) # find 'cut' clusters
clust.df <- data.frame(label = names(clust), cluster = clust)
# Split dendrogram into upper grey section and lower coloured section
height <- unique(dendr$segments$y)[order(unique(dendr$segments$y), decreasing = TRUE)]
cut.height <- mean(c(height[cut], height[cut-1]))
dendr$segments$line <- ifelse(dendr$segments$y == dendr$segments$yend &
dendr$segments$y > cut.height, 1, 2)
dendr$segments$line <- ifelse(dendr$segments$yend > cut.height, 1, dendr$segments$line)
# Number the clusters
dendr$segments$cluster <- c(-1, diff(dendr$segments$line))
change <- which(dendr$segments$cluster == 1)
for (i in 1:cut) dendr$segments$cluster[change[i]] = i + 1
dendr$segments$cluster <- ifelse(dendr$segments$line == 1, 1,
ifelse(dendr$segments$cluster == 0, NA, dendr$segments$cluster))
dendr$segments$cluster <- na.locf(dendr$segments$cluster)
# Consistent numbering between segment$cluster and label$cluster
clust.df$label <- factor(clust.df$label, levels = levels(dendr$labels$label))
clust.df <- arrange(clust.df, label)
clust.df$cluster <- factor((clust.df$cluster), levels = unique(clust.df$cluster), labels = (1:cut) + 1)
dendr[["labels"]] <- merge(dendr[["labels"]], clust.df, by = "label")
# Positions for cluster labels
n.rle <- rle(dendr$segments$cluster)
N <- cumsum(n.rle$lengths)
N <- N[seq(1, length(N), 2)] + 1
N.df <- dendr$segments[N, ]
N.df$cluster <- N.df$cluster - 1
# Plot the dendrogram
# Plot the dendrogram
p3<-ggplot() +
geom_segment(data = segment(dendr),
aes(x=x, y=y, xend=xend, yend=yend, size=factor(line), colour=factor(cluster)),
lineend = "square", show_guide = FALSE) +
scale_colour_manual(values = c("grey60", rainbow(cut))) +
scale_size_manual(values = c(.1, 1)) +
labs(x = NULL, y = NULL) +
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
panel.background = element_blank(),
panel.grid = element_blank()) +
guides(fill = FALSE)+
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
plot.background = element_blank())
#priparing a bar???
p4<-ggplot(clust.df,aes(x=label,y=1,fill=cluster))+geom_raster()+
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
panel.background = element_blank(),
panel.grid = element_blank()) +
guides(fill = FALSE)+
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
plot.background = element_blank())
#data for ggplot2 geom_raster
data.m = melt(mm8)
colnames(data.m)<-c("Var1", "Var2", "value")
head(data.m)
#plotting
p1 <- ggplot(data.m, aes(Var2, Var1)) + geom_raster(aes(fill = value),colour ="white")
p1<-p1 + theme(axis.ticks = element_blank(), axis.text = element_blank(),axis.title=element_blank(),plot.background = element_blank())
p2<-ggplot(data.m,aes(Var1,value*(-1)))+geom_bar(data.m, aes(fill=Var2),position="stack",stat="identity")+coord_flip()
p2<-ggplot(data.m,aes(Var1,value*(-1)))+geom_bar(data.m, aes(fill=Var2),position="stack",stat="identity")+coord_flip()+guides(fill = FALSE)+theme(axis.ticks.x = element_blank(), axis.text.x = element_blank(),axis.title.x = element_blank(),plot.background = element_blank())
#plotting 4 panels on a page
vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y)
#open graphic device
win.graph(width=860/72, height=450/72,pointsize = 12)
#plotting
grid.newpage()
pushViewport(viewport(layout = grid.layout(24, 50))) # 1 rows, 8 columns
#plotting
print(p2, vp = vplayout(5:24, 1:10))
print(p1, vp = vplayout(5:24, 10:50),newpage=FALSE)
print(p3, vp = vplayout(1:3, 9:47),newpage=FALSE)
print(p4, vp = vplayout(3:5, 10:46),newpage=FALSE)
#save
savePlot(filename="complex", type="emf")
dev.off()
And I got the picture like this:
1) How to automatically align P1,p3 and P4? For a sample on X, its features on p1, p3 and p4 are align automatically ?
2) Any good ideas to control the space between pancels? for example, reducing the space between p1 and p2, or p1 and p4.
3) How to reorder the samples on X-axis according to the cluster results? And how to control the order on X-axis for p1,p3,and p4 simultaneously?
I wish to plot a number of tightly spaced graphs as illustrated by the following toy example:
library(ggplot2)
library(gridExtra)
set.seed(314159)
n <- 100
data <- data.frame(x = rnorm(n), y = rnorm(n), z = rep("dummy var", n))
p00 <- ggplot(data, aes(x)) + stat_density() + theme(plot.margin = unit(c(0,0,0,0), units = "lines" ), axis.text = element_blank(), axis.title = element_blank(), axis.ticks = element_blank()) + labs(x = NULL, y = NULL)
p01 <- ggplot(data, aes(x, y)) + geom_point() + theme(plot.margin = unit(c(0,0,0,0), units = "lines" ), axis.text = element_blank(), axis.title = element_blank(), axis.ticks = element_blank()) + labs(x = NULL, y = NULL)
p10 <- ggplot(data, aes(y, x)) + geom_point() + theme(plot.margin = unit(c(0,0,0,0), units = "lines" ), axis.text = element_blank(), axis.title = element_blank(), axis.ticks = element_blank()) + labs(x = NULL, y = NULL)
p11 <- ggplot(data, aes(y)) + stat_density() + theme(plot.margin = unit(c(0,0,0,0), units = "lines" ), axis.text = element_blank(), axis.title = element_blank(), axis.ticks = element_blank()) + labs(x = NULL, y = NULL)
grid.arrange(p00, p01, p10, p11, ncol = 2)
In spite of my best efforts, I have been unable to overcome a complication that arises when I attempt to do so after having removed the facet strips from my graphs. In the following example, I have added horizontal and vertical strips to each graph by faceting on a dummy variable:
p00 <- p00 + facet_grid(z ~ z)
p01 <- p01 + facet_grid(z ~ z)
p10 <- p10 + facet_grid(z ~ z)
p11 <- p11 + facet_grid(z ~ z)
grid.arrange(p00, p01, p10, p11, ncol = 2)
Next I remove the strips according to the procedure outlined in this post. However, the resulting graphs are rather widely spaced by comparison:
p00 <- p00 + theme(plot.margin = unit(c(0,0.5,0.5,0), units = "lines" ), strip.background = element_blank(), strip.text = element_blank())
p01 <- p01 + theme(plot.margin = unit(c(0,0.5,0.5,0), units = "lines" ), strip.background = element_blank(), strip.text = element_blank())
p10 <- p10 + theme(plot.margin = unit(c(0,0.5,0.5,0), units = "lines" ), strip.background = element_blank(), strip.text = element_blank())
p11 <- p11 + theme(plot.margin = unit(c(0,0.5,0.5,0), units = "lines" ), strip.background = element_blank(), strip.text = element_blank())
grid.arrange(p00, p01, p10, p11, ncol = 2)
Any suggestions on how to reduce the spacing between graphs would be much appreciated.
To remove all elements associated with the axes, in addition to the elements you have set to element_blank, the tick margins and tick lengths need to be set to zero. But space will remain for the facet strips. Setting the background and text to element_blank does not affect the height and width of the strips. To remove the strips, I use functions that manipulate the gtable layout. However, I think it is better to leave some white space between the plots. I have set a small plot margin to 0.2 lines.
library(ggplot2)
library(gridExtra)
set.seed(314159)
n <- 100
data <- data.frame(x = rnorm(n), y = rnorm(n), z1 = rep("dummy var", n), z2 = rep("dummy var", n))
theme = theme(plot.margin = unit(c(.2,.2,.2,.2), units = "lines"),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.ticks.length = unit(0, "lines"))
labs = labs(x = NULL, y = NULL)
p00 <- ggplot(data, aes(x)) + stat_density() + theme + labs + facet_grid(z1 ~ z2)
p01 <- ggplot(data, aes(x, y)) + geom_point() + theme + labs + facet_grid(z1 ~ z2)
p10 <- ggplot(data, aes(y, x)) + geom_point() + theme + labs + facet_grid(z1 ~ z2)
p11 <- ggplot(data, aes(y)) + stat_density() + theme + labs + facet_grid(z1 ~ z2)
This is where the gtable layout is manipulated.
# Get the four gtables (and the four plots) into a list
pList = list(p00, p01, p10, p11)
gList = lapply(pList, ggplotGrob)
# Remove the top strip from each plot
stripT <- subset(gList[[1]]$layout, grepl("strip-t", gList[[1]]$layout$name))
gList = lapply(gList, function(x) x[-stripT$t, ])
# Remove the right strip from each plot
stripR <- subset(gList[[1]]$layout, grepl("strip-r", gList[[1]]$layout$name))
gList = lapply(gList, function(x) x[, -stripR$r])
# Draw the revised plots
nCol <- floor(sqrt(length(pList)))
do.call(grid.arrange, c(gList, ncol = nCol))
Edit: Using revised data and plot.
library(grid)
data <- data.frame(x = rnorm(n), y = rnorm(n), z = rep("dummy var", n), u = seq(1, n) %% 2)
p01 <- ggplot(data, aes(x, y)) + geom_point() + theme + labs + facet_grid(z ~ u)
g = ggplotGrob(p01)
stripT = subset(g$layout, grepl("strip-t", g$layout$name))
g = g[-stripT$t, ]
stripR = subset(g$layout, grepl("strip-r", g$layout$name))
g = g[, -stripR$r]
grid.draw(g) # Still got the space between the facets
g$widths # where is the space? it's the 5.55 pt width
g$widths[[5]] = unit(0, "lines") # remove it completely
g$width
grid.draw(g)