Putting expressions (plotmath) in the legend key labels - r

How can I put expressions (plotmath) into the legend key labels of the following plot?
I am aware of How to use Greek symbols in ggplot2? and the link therein, but whenever I use scale_..._manual function, I obtain a second legend (see below).
require(ggplot2)
require(reshape2)
require(plyr)
## parameters
d <- c(2, 5, 10, 20, 50, 100)
tau <- c("t1", "t2", "t3")
fam <- c("f1", "f2", "f3", "f4", "f5")
meth <- c("m1", "m2", "m3", "m4")
## lengths
nd <- length(d)
ntau <- length(tau)
nfam <- length(fam)
nmeth <- length(meth)
## build result array containing the measurements
arr <- array(rep(NA, nd*ntau*nfam*nmeth), dim=c(nd, ntau, nfam, nmeth),
dimnames=list(d=d, tau=tau, fam=fam, meth=meth))
for(i in 1:nd){
for(j in 1:ntau){
for(k in 1:nfam){
for(l in 1:nmeth){
arr[i,j,k,l] <- i+j+k+l+runif(1)
}
}
}
}
## create molten data
mdf <- reshape2:::melt.array(arr, formula = . ~ d + tau + fam + meth) # create molten data frame
mdf$tau. <- factor(mdf$tau, levels=tau, labels=paste("tau==", tau, sep="")) # expression for tau
mdf$fam. <- factor(mdf$fam, levels=fam, labels=paste("alpha==", fam, sep="")) # expression for family
meth.labs <- lapply(1:nmeth, function(i) bquote(gamma==.(i))) # expression for methods
## plot
ggplot(mdf, aes(x=d, y=value, shape=meth, linetype=meth)) + geom_line() +
geom_point() + facet_grid(fam. ~ tau., scales="free_y", labeller=label_parsed) +
## scale_linetype_manual(values=1:4, breaks=meth, labels=meth.labs) + # problem: adds another legend
scale_x_continuous(trans="log10", breaks=d, labels=d) +
scale_y_continuous(trans="log10")

If I use both scale_*_manual functions I get a single legend with the expression:
ggplot(mdf, aes(x=d, y=value, shape=meth, linetype=meth)) + geom_line() +
geom_point() + facet_grid(fam. ~ tau., scales="free_y", labeller=label_parsed) +
## scale_linetype_manual(values=1:4, breaks=meth, labels=meth.labs) + # problem: adds another legend
scale_x_continuous(trans="log10", breaks=d, labels=d) +
scale_y_continuous(trans="log10") +
scale_linetype_manual(breaks = c('m1','m2','m3','m4'),values = 1:4,labels = meth.labs) +
scale_shape_manual(breaks = c('m1','m2','m3','m4'),values = 1:4,labels = meth.labs)

Related

Getting a variable to pass into function in R (ggplot2)

I'm trying to plot a graph between two columns of data from the data frame called "final". I want the p value and r^2 value to show up on the graph.
I'm using this function and code, but it gives me the error "cannot find y value"
library(ggplot2)
lm_eqn <- function(final, x, y){
m <- lm(final[,y] ~ final[,x])
output <- paste("r.squared = ", round(summary(m)$adj.r.squared, digits = 4), " | p.value = ", formatC(summary(m)$coefficients[8], format = "e", digits = 4))
return(output)
}
output_plot <- lm_eqn(final, x, y)
p1 <- ggplot(final, aes(x=ENSG00000153563, y= ENSG00000163599)) + geom_point() + geom_smooth(method=lm, se=FALSE) + labs(x = "CD8A", y = "CTLA-4") + ggtitle("CD8 v/s CTLA-4", subtitle = paste("Linear Regression of Expression |", output_plot))
How do I get both columns of data x and y to flow through the function and for the graph to plot with the p value and residual value printed on the graph?
Thanks in advance.
When you call function for output_plot generation you have to use the same ENS... variables as in your plot. After simplifying slightly function, should work now
library(stats)
library(ggplot2)
lm_eqn <- function(x, y){
m <- lm(y ~ x)
output <- paste("r.squared = ", round(summary(m)$adj.r.squared, digits = 4), " | p.value = ", formatC(summary(m)$coefficients[8], format = "e", digits = 4))
return(output)
}
x <-c(1,2,5,2,3,6,7,0)
y <-c(2,3,5,9,8,3,3,1)
final <- data_frame(x,y)
output_plot <- lm_eqn(x, y)
p1 <- ggplot(final, aes(x=x, y= y)) + geom_point() + geom_smooth(method=lm, se=FALSE) + labs(x = "x", y = "y") + ggtitle("CD8 v/s CTLA-4", subtitle = paste("Linear Regression of Expression |", output_plot))

How can I add significant brackets between facets of a faceted R plot using ggplot?

I’m using the following code to plot longitudinal data with the facet_grid() option. I would like to indicate significant group differences between the facet grids using brackets and asterisks. However, so far I could only add text/lines within the individual grids, and not between them.
for(i in seq_along(varlist)){
p <- ggplot(data = Plot, aes(x = Timepoint , y = eval(parse(text = varlist[i])),
group = Sub_ID, colour = Subgroup)) + geom_point() +
geom_line(linetype = "dashed")
r <- p + stat_smooth(aes(group = 1, method = "lm")) + stat_summary(aes(group = 1),
geom = "point", fun.y = mean, shape = 17, size = 5) + facet_grid(. ~ Subgroup)
ggsave(filename=paste(varlist[i],"_by_subgroup.jpg", sep=""),width = 10, height = 7.5)
}
Load the libraries
require(data.table)
require(ggplot2)
require(gtable)
Make toy data
data0 <- data.table(iris)[,list(Mean.Sepal.Length=mean(.SD[,Sepal.Length]),Mean.Petal.Length=mean(.SD[,Petal.Length])),by=list(Species)]
data1 <- melt(data0,id.vars="Species")
## ## Draw the bars
p <- ggplot(data=data1,aes(x=variable,y=value,fill=variable)) +
geom_bar(stat="identity") +
facet_grid(~Species) +
scale_x_discrete(breaks=NULL)
p
Draw the brackets and asterisks
## make function to rescale the coordinates to npc
scale_to_npc <- function(x, range) scales::rescale(c(range, x), c(0,1))[-c(1,2)]
scale_x <- function(x,facet,ranges){scale_to_npc(x,ranges[[facet]][["x.range"]])}
scale_y <- function(y,facet,ranges){scale_to_npc(y,ranges[[facet]][["y.range"]])}
## build grobs and get the ranges
gb <- ggplot_build(p)
g <- ggplot_gtable(gb)
## gtable_show_layout(g)
ranges <- gb$panel$ranges
## get and rescale the coordinates
y1 <- data1[variable=="Mean.Petal.Length",min(value)]
y3 <- data1[,max(value)]
y4 <- data1[variable=="Mean.Petal.Length",max(value)]
data2 <- data.frame(x.=c(2,2,2,2,1.5),y.=c(y1,y3*1.01,y3*1.01,y4,y3*1.01),facet=c(1,1,3,3,2))
data2b <- data.frame(
x=mapply(scale_x,data2[,1],data2[,3],MoreArgs=list(ranges =ranges)),
y=mapply(scale_y,data2[,2],data2[,3],MoreArgs=list(ranges=ranges))
)
## draw the brackets and asterisks
g <- gtable_add_grob(g, moveToGrob(data2b[1,1],data2b[1,2]),t=4,l=4,b=4,r=4)
g <- gtable_add_grob(g, lineToGrob(data2b[2,1],data2b[2,2]),t=4.5,l=4,b=4,r=4)
g <- gtable_add_grob(g, moveToGrob(data2b[2,1],data2b[2,2]),t=4.5,l=4,b=4,r=4)
g <- gtable_add_grob(g, lineToGrob(data2b[3,1],data2b[3,2]),t=4,l=8,b=4,r=8)
g <- gtable_add_grob(g, moveToGrob(data2b[3,1],data2b[3,2]),t=4,l=8,b=4,r=8)
g <- gtable_add_grob(g, lineToGrob(data2b[4,1],data2b[4,2]),t=4.5,l=8,b=4,r=8)
g <- gtable_add_grob(g, textGrob("***",data2b[5,1],data2b[5,2]),t=4,l=4,b=4,r=8)
## turn clip off to allow the line across panels
g$layout$clip <- "off"
grid.newpage()
grid.draw(g)

geom_raster faceted plot with ggplot2: control row height

In the example below I have a dataset containing two experiments F1 and F2. A classification is performed based on F1 signal, and both F1 and F2 values are ordered accordingly. In this diagram, each facet has the same dimension although the number of rows is not the same (e.g class #7 contains only few elements compare to the other classes). I would like to modify the code to force row height to be the same across facets (facets would thus have various blank space below). Any hints would be greatly appreciated.
Thank you
library(ggplot2)
library(reshape2)
set.seed(123)
# let's create a fake dataset
nb.experiment <- 4
n.row <- 200
n.col <- 5
nb.class <- 7
d <- matrix(round(runif(n.row * n.col),2), nc=n.col)
colnames(d) <- sprintf("%02d", 1:5)
# These strings will be the row names of each heatmap
# in the subsequent facet plot
elements <- sample(replicate(n.row/2, rawToChar(as.raw(sample(65:90, 6, replace=T)))))
# let's create a data.frame d
d <- data.frame(d,
experiment = sort(rep(c("F1","F2"), n.row/2)),
elements= elements)
# Now we split the dataset by experiments
d.split <- split(d, d$experiment)
# Now we create classes (here using hierarchical clustering )
# based on F1 experiment
dist.mat <- as.dist(1-cor(t(d.split$F1[,1:5]), method="pearson"))
hc <- hclust(dist.mat)
cuts <- cutree(hc, nb.class)
levels(cuts) <- sprintf("Class %02d", 1:nb.experiment)
# We split F1 and F2 based on classification result
for(s in names(d.split)){
d.split[[s]] <- split(d.split[[s]], cuts)
}
# Data are melt (their is perhaps a better solution...)
# in order to use the ggplot function
dm <- melt(do.call('rbind',lapply(d.split, melt)), id.var=c( "experiment", "elements", "variable", "L1"))
dm <- dm[, -5]
colnames(dm) <- c("experiment","elements", "pos", "class", "exprs")
dm$class <- as.factor(dm$class)
levels(dm$class) <- paste("Class", levels(dm$class))
# Now we plot the data
p <- ggplot(dm, aes(x = pos, y = elements, fill = exprs))
p <- p + geom_raster()
p <- p + facet_wrap(~class +experiment , scales = "free", ncol = 2)
p <- p + theme_bw()
p <- p + theme(text = element_text(size=4))
p <- p + theme(text = element_text(family = "mono", face = "bold"))
print(p)
Use facet_grid instead of facet_wrap and set the space attribute:
ggplot(dm, aes(x = pos, y = elements, fill = exprs)) +
geom_raster() +
facet_grid(rowMeanClass ~ experiment , scales = "free", space = "free_y") +
theme_bw()

How to create faceted linear regression plot using GGPLOT

I have a data frame created the following way.
library(ggplot2)
x <- data.frame(letters[1:10],abs(rnorm(10)),abs(rnorm(10)),type="x")
y <- data.frame(letters[1:10],abs(rnorm(10)),abs(rnorm(10)),type="y")
# in reality the number of row could be larger than 10 for each x and y
all <- rbind(x,y)
colnames(all) <- c("name","val1","val2","type")
What I want to do is to create a faceted ggplot that looks roughly like this:
Hence each facet above is the correlation plot of the following:
# Top left facet
subset(all,type=="x")$val1
subset(all,type=="y")$val1
# Top right facet
subset(all,type=="x")$val1
subset(all,type=="y")$val2
# ...etc..
But I'm stuck with the following code:
p <- ggplot(all, aes(val1, val2))+ geom_smooth(method = "lm") + geom_point() +
facet_grid(type ~ )
# Calculate correlation for each group
cors <- ddply(all, c(type ~ ), summarise, cor = round(cor(val1, val2), 2))
p + geom_text(data=cors, aes(label=paste("r=", cor, sep="")), x=0.5, y=0.5)
What's the right way to do it?
Some of your code was incorrect. This works for me:
p <- ggplot(all, aes(val1, val2))+ geom_smooth(method = "lm") + geom_point() +
facet_grid(~type)
# Calculate correlation for each group
cors <- ddply(all, .(type), summarise, cor = round(cor(val1, val2), 2))
p + geom_text(data=cors, aes(label=paste("r=", cor, sep="")), x=1, y=-0.25)
Edit: Following OP's comment and edit. The idea is to re-create the data with all four combinations and then facet.
# I consider the type in your previous data to be xx and yy
dat <- data.frame(val1 = c(rep(all$val1[all$type == "x"], 2),
rep(all$val1[all$type == "y"], 2)),
val2 = rep(all$val2, 2),
grp1 = rep(c("x", "x", "y", "y"), each=10),
grp2 = rep(c("x", "y", "x", "y"), each=10))
p <- ggplot(dat, aes(val1, val2)) + geom_point() + geom_smooth(method = "lm") +
facet_grid(grp1 ~ grp2)
cors <- ddply(dat, .(grp1, grp2), summarise, cor = round(cor(val1, val2), 2))
p + geom_text(data=cors, aes(label=paste("r=", cor, sep="")), x=1, y=-0.25)
Since your data is not in the appropriate format, some reshaping is necessary before it can be plotted.
Firstly, reshape the data to the long format:
library(reshape2)
allM <- melt(all[-1], id.vars = "type")
Split the values along type and val1 vs. val2:
allList <- split(allM$value, interaction(allM$type, allM$variable))
Create a list of all combinations:
allComb <- unlist(lapply(c(1, 3),
function(x)
lapply(c(2 ,4),
function(y)
do.call(cbind, allList[c(x, y)]))),
recursive = FALSE)
Create a new dataset:
allNew <- do.call(rbind,
lapply(allComb, function(x) {
tmp <- as.data.frame(x)
tmp <- (within(tmp, {xval <- names(tmp)[1];
yval <- names(tmp)[2]}))
names(tmp)[1:2] <- c("x", "y")
tmp}))
Plot:
library(ggplot2)
p <- ggplot(allNew, aes(x = x, y = y)) +
geom_smooth(method = "lm") +
geom_point() +
facet_grid(yval ~ xval)
# Calculate correlation for each group
library(plyr)
cors <- ddply(allNew, .(yval, xval), summarise, cor = round(cor(x, y), 2))
p + geom_text(data=cors, aes(label=paste("r=", cor, sep="")), x=0.5, y=0.5)
There is an additional package ggpubr available now addressing exactly this issue with the stat_cor() function.
library(tidyverse)
library(ggpubr)
ggplot(all, aes(val1, val2))+
geom_smooth(method = "lm") +
geom_point() +
facet_grid(~type) +
stat_cor()

ggplot2 boxplot: horizontal bar at median?

I would like to make a ggplot2 boxplot more meaningful by adding a thick bar at the median (so that if the median is equal to either of the lower or upper quartiles, it can be detected to which it is equal). I came across a recent post of Kohske:
Can I get boxplot notches in ggplot2?
but I didn't know how to give the "crossbar" a "height". Then I tried
to use a rectangle but it didn't work either. Here is a minimal example:
require(ggplot2)
require(reshape2)
require(plyr)
set.seed(1)
## parameters
p1 <- c(5, 20, 100)
p2 <- c("f1", "f2", "f3", "f4", "f5")
p3 <- c("g1","g2","g3","g4","g5")
N <- 1000
## lengths
l1 <- length(p1)
l2 <- length(p2)
l3 <- length(p3)
## build result array containing the measurements
arr <- array(rep(NA, l1*l2*l3*N), dim=c(l1, l2, l3, N),
dimnames=list(
p1=p1,
p2=p2,
p3=p3,
N=1:N))
for(i in 1:l1){
for(j in 1:l2){
for(k in 1:l3){
arr[i,j,k,] <- i+j+k+runif(N, min=-4, max=4)
}
}
}
arr <- arr + rexp(3*5*5*N)
## create molten data
mdf <- melt(arr, formula = . ~ p1 + p2 + p3 + N) # create molten data frame
## confidence interval calculated by `boxplot.stats`
f <- function(x){
ans <- boxplot.stats(x)
data.frame(x=x, y=ans$stats[3], ymin=ans$conf[1], ymax=ans$conf[2])
}
## (my poor) trial
ggplot(mdf, aes(x=p3, y=value)) + geom_boxplot(outlier.shape=1) +
stat_summary(fun.data=f, geom="rectangle", colour=NA, fill="black",
xmin=x-0.36, xmax=x+0.36, ymin=max(y-0.2, ymin), ymax=min(y+0.2,
ymax)) + facet_grid(p2 ~ p1, scales = "free_y")
**SOLUTION** (after the discussion with Kohske below):
f <- function(x, height){
ans <- median(x)
data.frame(y=ans, ymin=ans-height/2, ymax=ans+height/2)
}
p <- ggplot(mdf, aes(x=p3, y=value)) + geom_boxplot(outlier.shape=1) +
stat_summary(fun.data=f, geom="crossbar", height=0.5, colour=NA,
fill="black", width=0.78) +
facet_grid(p2 ~ p1, scales = "free_y")
pdf()
print(p)
dev.off()
**UPDATE** Hmmm... it's not that trivial. The following example shows that the "height" of the crossbar should be adapted to the y-axis scale, otherwise it might be overseen.
require(ggplot2)
require(reshape2)
require(plyr)
set.seed(1)
## parameters
p1 <- c(5, 20, 100)
p2 <- c("f1", "f2", "f3", "f4", "f5")
p3 <- c("g1","g2","g3","g4","g5")
N <- 1000
## lengths
l1 <- length(p1)
l2 <- length(p2)
l3 <- length(p3)
## build result array containing the measurements
arr <- array(rep(NA, l1*l2*l3*N), dim=c(l1, l2, l3, N),
dimnames=list(
p1=p1,
p2=p2,
p3=p3,
N=1:N))
for(i in 1:l1){
for(j in 1:l2){
for(k in 1:l3){
arr[i,j,k,] <- i+j^4+k+runif(N, min=-4, max=4)
}
}
}
arr <- arr + rexp(3*5*5*N)
arr[1,2,5,] <- arr[1,2,5,]+30
arr[1,5,3,] <- arr[1,5,3,]+100
## create molten data
mdf <- melt(arr, formula = . ~ p1 + p2 + p3 + N) # create molten data frame
f <- function(x, height){
ans <- median(x)
data.frame(y=ans, ymin=ans-height/2, ymax=ans+height/2)
}
## plot
p <- ggplot(mdf, aes(x=p3, y=value)) + geom_boxplot(outlier.shape=1) +
stat_summary(fun.data=f, geom="crossbar", height=0.7, colour=NA,
fill="black", width=0.78) +
facet_grid(p2 ~ p1, scales = "free_y")
pdf()
print(p)
dev.off()
here is an example:
f <- function(x, height) {
ans <- median(x)
data.frame(ymin = ans-height/2, ymax = ans+height/2, y = ans)
}
df <- data.frame(x=gl(2,6), y=c(1,1,1,1,3,3, 1,1,3,3,3,3))
ggplot(df, aes(x, y)) + geom_boxplot() +
stat_summary(fun.data = f, geom = "crossbar", height = 0.1,
colour = NA, fill = "skyblue", width = 0.8, alpha = 0.5)
if you just want to change the apparence, then here is a quick hack, I don't recommend though,
df <- data.frame(x=gl(2,6), y=c(c(1,1,1,1,3,3), c(1,1,3,3,3,3)*10))
ggplot(df, aes(x, y)) + geom_boxplot() + facet_grid(x~.)
gs <- grid.gget("geom_boxplot", grep = T)
if (inherits(gs, "grob")) gs <- list(gs)
gss <- llply(gs, function(g) g$children[[length(g$children)]])
l_ply(gss, function(g) grid.edit(g$name, grep=T, just = c("left", "center"), height = unit(0.05, "native"), gp = gpar(fill = "skyblue", alpha = 0.5, col = NA)))

Resources