Getting even figure widths in grid.arrange - r

I'm trying to plot three figures using grid.arrange of R'sgridExtra package. I want them to appear as 3 columns in one row, where the left most figure should have the y-axis but no legend, the middle figure no y-axis and no legend, and the right most figure should have no y-axis but should include the legend. That way the legend and y-axis, which are identical to all figures, appear only once.
Here are the data - they relate to gene ontology enrichment tests:
First, the color scheme of the legend - a color for each enrichment p-value range:
color.order <- c("#7d4343","#B20000","#C74747","#E09898","#EBCCD6","#C8C8C8")
names(color.order) <- c("(0-0.05]","(0.05-0.1]","(0.1-0.15]","(0.15-0.2]","(0.2-0.25]","(0.25-1]")
Then the figure data.frames:
df.g1 <- data.frame(category=c("C1-up","C1-down","C2-up","C2-down"),
p.value=c(0.4833,0.5114,0.3487,0.6522),log10.p.value=c(3.157832,2.912393,4.575481,1.856192),
col=c("(0.25-1]","(0.25-1]","(0.25-1]","(0.25-1]"),
col.cat=c("(0.25-1]","(0.25-1]","(0.25-1]","(0.25-1]"))
df.g2 <- data.frame(category=c("C1-up","C1-down","C2-up","C2-down"),
p.value=c(0.5345,0.4819,0.9986,0.0013),log10.p.value=c(2.720522905,3.170430737,0.006084383,28.860566477),
col=c("(0.25-1]","(0.25-1]","(0.25-1]","(0-0.05]"),
col.cat=c("(0.25-1]","(0.25-1]","(0.25-1]","(0-0.05]"))
df.g3 <- data.frame(category=c("C1-up","C1-down","C2-up","C2-down"),
p.value=c(0.2262,0.7703,0.9926,0.0080),log10.p.value=c(6.45507399,1.13340102,0.03225729,20.96910013),
col=c("(0.2-0.25]","(0.25-1]","(0.25-1]","(0-0.05]"),
col.cat=c("(0.2-0.25]","(0.25-1]","(0.25-1]","(0-0.05]"))
Putting them together in a list:
df.list <- list(g1=df.g1,g2=df.g2,g3=df.g3)
This is for the legend which associates p-value ranges with colors:
color.order <- c("#7d4343","#B20000","#C74747","#E09898","#EBCCD6","#C8C8C8")
names(color.order) <- c("(0-0.05]","(0.05-0.1]","(0.1-0.15]","(0.15-0.2]","(0.2-0.25]","(0.25-1]")
And the plot creation code:
library(ggplot2)
library(gridExtra)
ggplot.list <- vector(mode="list", length(df.list))
for(g in 1:length(df.list))
{
if(g==1){ #draw y-axis but no legend
ggplot.list[[g]] <- ggplot(df.list[[g]], aes(y=log10.p.value,x=category,fill=col))+
scale_fill_manual(drop=FALSE,values=color.order,name="Enrichment P-value",guide=F)+
geom_bar(stat="identity",width=0.2)+scale_y_continuous(limits=c(0,30),labels=c(seq(0,20,10)," >30"),expand=c(0,0))+
theme_bw()+theme(panel.border=element_blank(),axis.text=element_text(size=8),axis.title=element_text(size=8,face="bold"))+coord_flip()+theme(plot.margin=unit(c(0.1,1,0.1,0.1),"cm"),axis.title.y = element_text(size=8),axis.title.x = element_text(size=8))+labs(x="Category",y="-10log10(P-value)")+ggtitle(names(df.list)[g])
} else if(g==2){ #no y-axis and no legend
ggplot.list[[g]] <- ggplot(df.list[[g]], aes(y=log10.p.value,x=category,fill=col))+
scale_fill_manual(drop=FALSE,values=color.order,name="Enrichment P-value",guide=F)+
geom_bar(stat="identity",width=0.2)+scale_y_continuous(limits=c(0,30),labels = c(seq(0,20,10)," >30"),expand=c(0,0))+
theme_bw()+theme(panel.border=element_blank(),axis.text=element_text(size=8),axis.title=element_text(size=8,face="bold"))+coord_flip()+theme(plot.margin=unit(c(0.1,1,0.1,0.1),"cm"),axis.title.y = element_blank(),axis.text.y=element_blank(),axis.title.x = element_text(size=8))+labs(y="-10log10(P-value)")+ggtitle(names(df.list)[g])
} else if(g==3){ #only legend
ggplot.list[[g]] <- ggplot(df.list[[g]], aes(y=log10.p.value,x=category,fill=col))+
scale_fill_manual(drop=FALSE,values=color.order,name="Enrichment P-value")+
geom_bar(stat="identity",width=0.2)+scale_y_continuous(limits=c(0,30),labels = c(seq(0,20,10)," >30"),expand=c(0,0))+
theme_bw()+theme(panel.border=element_blank(),axis.text=element_text(size=8),axis.title=element_text(size=8,face="bold"))+coord_flip()+theme(plot.margin=unit(c(0.1,1,0.1,0.1),"cm"),axis.title.y = element_blank(),axis.text.y=element_blank(),axis.title.x = element_text(size=8))+labs(y="-10log10(P-value)")+ggtitle(names(df.list)[g])
}
}
This gives me almost what I need:
My problem is that the three figures have different widths. So my question is how do I make the widths identical?

This data seems tailor-made for faceting:
library(dplyr)
library(ggplot2)
color.order <- c("#7d4343","#B20000","#C74747","#E09898","#EBCCD6","#C8C8C8")
names(color.order) <- c("(0-0.05]","(0.05-0.1]","(0.1-0.15]","(0.15-0.2]","(0.2-0.25]","(0.25-1]")
df <- bind_rows(df.list, .id="grp")
df <- mutate(df, col=factor(col, levels=names(color.order)))
gg <- ggplot(df, aes(y=log10.p.value, x=category, fill=col))
gg <- gg + geom_bar(stat="identity", width=0.2)
gg <- gg + scale_y_continuous(limits=c(0,30), labels=c(seq(0,20,10)," >30"), expand=c(0,0))
gg <- gg + scale_fill_manual(drop=FALSE, values=color.order, name="Enrichment P-value")
gg <- gg + coord_flip()
gg <- gg + facet_wrap(~grp)
gg <- gg + labs(x="Category", y="-10log10(P-value)")
gg <- gg + theme_bw()
gg <- gg + theme(panel.border=element_blank(),
panel.margin=margin(1,1,1,1, unit="cm"),
axis.text=element_text(size=8),
axis.title=element_text(size=8,face="bold"),
axis.title.y=element_text(size=8),
axis.title.x=element_text(size=8),
strip.background=element_blank(),
plot.margin=margin(0.1, 0.1, 0.1, 0.1, unit="cm"))
gg

Related

Modifying Aesthetics - ggplot2

I am trying to learn ggplot2 and have made below plots:
Using this code:
library(ggplot2); library(gridExtra)
gg <- ggplot(mydata,aes(x=Level))
plot1 <- gg + geom_line(aes(y=Experience,colour="xp"),size=1) +
labs(title="xp")
g <- ggplot(mydata,aes(x=Level))
plot2 <- g + geom_line(aes(y=Experience,colour="xp"),size=1) + geom_line(aes(y=Accu,colour="accu"),size=1) +
labs(title="xp vs Accumulated")
grid.arrange(plot1,plot2,ncol=2)
Where mydata is a data frame containing 3 columns (Level, xp and accu) and 30 rows.
What I am wondering is:
How to get the y-axis on the left-hand plot to have the same form as the
right-hand plot.
How to make the color of "xp" the same in both plots
without removing the descriptions of what the lines represent.
How about this (with some random data)?
library(ggplot2)
library(gridExtra)
library(scales)
gg <- ggplot(mydata,aes(x=Level))
plot1 <- gg + geom_line(aes(y=Experience,colour="xp"),size=1) +
labs(title="xp") + scale_y_continuous(labels = comma) +
scale_colour_manual(values = c("red"))
g <- ggplot(mydata,aes(x=Level))
plot2 <- g + geom_line(aes(y=Experience,colour="xp"),size=1) +
geom_line(aes(y=Accu,colour="accu"),size=1) +
labs(title="xp vs Accumulated") + scale_y_continuous(labels = comma) +
scale_colour_manual(values = c("blue", "red"))
grid.arrange(plot1,plot2,ncol=2)

How do I combine multiple probability density functions into one ggplot?

I'm a complete R noob and I'm trying to combine multiple beta distributions into a single ggplot. Here is what I've got so far....
#to create a continuous probability function
x=seq(0,1,.01)
q=dbeta(x,2.2,1.7)
z=dbeta(x,3.3,1.2)
df=data.frame(x,q,z)
t=ggplot(df, aes(x)) + # basic graphical object
geom_line(aes(y=q), colour="red") + # first layer
geom_line(aes(y=z), colour="green")+ # second layer
xlab("X value")+ylab("PDF")+
ggtitle("Beta Probability Distribution Functions")
Where did I go wrong?
Here's an alternate approach to the plotting:
library(tidyr)
library(ggplot2)
set.seed(1492) # reproducible
x <- seq(0, 1, .01)
q <- dbeta(x, 2.2, 1.7)
z <- dbeta(x, 3.3, 1.2)
df <- data.frame(x, q, z)
Make it into a long data frame so we can use some inherent properties of ggplot2 w/r/t aesthetics.
df <- gather(df, func, val, -x)
Now, we can use aesthetic mapping vs hard-coding values and doing multiple geom_line()s.
gg <- ggplot(df, aes(x=x, y=val, group=func))
gg <- gg + geom_line(aes(color=func))
Tighten up the y axis limits a bit
gg <- gg + scale_y_continuous(expand=c(0, 0))
We can actually get a legend now (you could also remove it and manually label the lines with geom_text())
gg <- gg + scale_color_manual(name="Beta params",
values=c("#b2182b", "#4393c3"),
labels=c("α=2.2, β=1.7", "α=3.3, β=1.2"))
Combine multiple label statements into one.
gg <- gg + labs(x="x value", y="PDF",
title="Beta Probability Distribution Functions")
Remove some chart junk.
gg <- gg + theme_bw()
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(axis.line=element_line(size=0.15, color="#2b2b2b"))
gg

Stacked bubble chart, "bottom aligned"

New to programming and first time post.
I'm trying to create a stacked bubble chart to display how a population breaks down into it's proportions. My aim is to write this as a function so that I can use it repeatedly easily, but I need to get the meat of the code sorted before turning it to a function.
This is the type of plot I would like:
This is the code I've tried so far:
library(ggplot2)
# some data
observations = c(850, 500, 200, 50)
plot_data = data.frame(
"x" = rep.int(1,length(observations))
,"y" = rep.int(1,length(observations))
, "size" = rep.int(1,length(observations))
,"colour" = c(1:length(observations))
)
# convert to percentage for relative sizes
for (i in 1:length(observations))
{
plot_data$size[i] = (observations[i]/max(observations))*100
}
ggplot(plot_data,aes(x = x, y = y)) +
geom_point(aes(size = size, color = colour)) +
scale_size_identity() +
scale_y_continuous (limits = c(0.5, 1.5)) +
theme(legend.position = "none")
This produces a bullseye type image.
My approach has been to try and work out how the circle radii are calculated, and then update the y value in the for loop for each entry such that all the circles touch at the base - this is where I have been failing.
So my question:
How can I work out what the y coordinates for each circle needs to be?
Thank you for any help and hints.
I think this simplifies the answer that Henrick found:
circle <- function(center, radius, group) {
th <- seq(0, 2*pi, len=200)
data.frame(group=group,
x=center[1] + radius*cos(th),
y=center[2] + radius*sin(th))
}
# Create a named vector for your values
obs <- c(Org1=500, Org2=850, Org3=50, Org4=200)
# this reverse sorts them (so the stacked layered circles work)
# and makes it a list
obs <- as.list(rev(sort(obs)))
# need the radii
rads <- lapply(obs, "/", 2)
# need the max
x <- max(sapply(rads, "["))
# build a data frame of created circles
do.call(rbind.data.frame, lapply(1:length(rads), function(i) {
circle(c(x, rads[[i]]), rads[[i]], names(rads[i]))
})) -> dat
# make the plot
gg <- ggplot(dat)
gg <- gg + geom_polygon(aes(x=x, y=y, group=group, fill=group),
color="black")
gg <- gg + coord_equal()
gg <- gg + ggthemes::theme_map()
gg <- gg + theme(legend.position="right")
gg
You can tweak the guides/colors with standard ggplot functions.

Display two parallel axes on a ggplot (R)

Let's say we have a simple plot of the following kind.
library(ggplot2)
df = data.frame(y=c(0,1.1,2.3,3.1,2.9,5.8,6,7.4,8.2,9.1),x=seq(1,100, length.out=10))
ggplot(df,aes(x=x,y=y)) + geom_point()
x perfectly correlates with z. The relation is: Constant=x^2*z=1.23
therefore I could rewrite the data.frame like this:
df = cbind(df,1.23/df$x^2)
The question is:
How can I display both variables xand zone the x-axis? It could be one at the bottom and one at the top of the graph or both at the bottom.
Here's a dangerous attempt. Previous version with a log-scale was just wrong.
library(ggplot2)
df = data.frame(y=c(0,1.1,2.3,3.1,2.9,5.8,6,7.4,8.2,9.1),
x=seq(1,100, length.out=10))
df$z = 1.23/df$x^2
## let's at least remove the gridlines
p1 <- ggplot(df,aes(x=x,y=y)) + geom_point() +
scale_x_continuous(expand=c(0,0)) +
theme(panel.grid.major=element_blank(),
panel.grid.minor = element_blank())
## make sure both plots have expand = c(0,0)
## otherwise data and top-axis won't necessarily be aligned...
p2 <- ggplot(df,aes(x=z,y=y)) + geom_point() +
scale_x_continuous(expand=c(0,0))
library(gtable)
g1 <- ggplotGrob(p1)
g2 <- ggplotGrob(p2)
tmp <- gtable_filter(g2, pattern="axis-b")
## ugly tricks to extract and reshape the axis
axis <- tmp[["grobs"]][[1]][["children"]][["axis"]] # corrupt the children
axis$layout <- axis$layout[2:1,]
axis$grobs[[1]][["y"]] <- axis$grobs[[1]][["y"]] - unit(1,"npc") + unit(0.15,"cm")
## back to "normality"
g1 <- gtable_add_rows(g1, sum(tmp$heights), 2)
gtableAddGrobs <- gtable_add_grob # alias, making sure #!hadley doesn't see this
g1 <- gtableAddGrobs(g1,
grobs=list(gtable_filter(g2, pattern="xlab"),axis),
t=c(1,3), l=4)
grid.newpage()
grid.draw(g1)
A both-on-the-bottom approach can be done with the excellent cowplot library.
library(ggplot2)
library(cowplot)
data <- data.frame(temp_c=runif(100, min=-5, max=30), outcome=runif(100))
plot <- ggplot(data) +
geom_point(aes(x=temp_c, y=outcome)) +
theme_classic() +
labs(x='Temperature (Celsius)')
x2plot <- ggplot(data) +
geom_point(aes(x=temp_c, y=outcome)) +
theme_classic() +
scale_x_continuous(label=function(x){round(x*(9/5) + 32)}) +
labs(x='Temperature (Fahrenehit)')
x <- get_x_axis(x2plot)
xl <- get_plot_component(x2plot, "xlab-b")
plot_grid(plot, ggdraw(x), ggdraw(xl), align='v', axis='rl', ncol=1,
rel_heights=c(0.8, 0.05, 0.05))

ggplot() lines transparency

How to change the transparency level of lines in ggplot() diagram (i.e. histogram, line plot, etc.)?
For instance consider the code below:
data <- data.frame(a=rnorm(100), b = rnorm(100,.5,1.2))
data <- melt(data)
colnames(data) <- c("Category", "Intensity")
p <- ggplot(data, aes(x=Intensity))
p <- p + geom_density(aes(color=Category), size=2, alpha=.4)
print(p)
I expected the lines would be transparent (as alpha=.4), but they're not.
Simply following #baptiste's directions,
data <- data.frame(a=rnorm(100), b = rnorm(100,.5,1.2))
data <- melt(data)
colnames(data) <- c("Category", "Intensity")
p <- ggplot(data, aes(x=Intensity))
p + geom_line(aes(color=Category), stat="density", size=2, alpha=0.4)

Resources