R shared legend error - r

I'm using a shared legend in R, which is using this code:
g <- ggplotGrob(plots[[1]] + theme(legend.position="bottom"))$grobs
legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]]
Reference: Combining grid_arrange_shared_legend() and facet_wrap_labeller() in R
What is "structure(c(3L,3L,……))" in my legend? How to avoid it?
My code:
library(RColorBrewer)
library(ggplot2)
library(gridExtra)
wq <- read.csv('wineQualityReds.csv')
quality_factor <- factor(wq$quality)
plot_three <- function(feature) {
ggplot(data=wq,
aes_string(x = feature, y = wq$pH, color = quality_factor)) +
geom_tile() +
scale_color_brewer(palette='Blues') +
geom_smooth(method = loess, aes(group = 1)) + theme(legend.position = "none")
}
ph1 <- plot_three("volatile.acidity") +
labs(x='volatile.acidity(g / dm^3)', y='pH')
ph2 <- plot_three("citric.acid") +
labs(x='citric.acid(g / dm^3)', y='pH')
ph3 <- plot_three("chlorides") +
labs(x='chlorides(g / dm^3)', y='pH')
ph4 <- plot_three("free.sulfur.dioxide") +
labs(x='free.sulfur.dioxide(mg / dm^3)', y='pH')
ph5 <- plot_three("sulphates") +
labs(x='sulphates(g / dm^3)', y='pH')
ph6 <- plot_three("alcohol") +
labs(x='alcohol(% by volume)', y='pH')
g <- ggplotGrob(ph1 + theme(legend.position="bottom"))$grobs
legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]]
grid.arrange(arrangeGrob(ph1, ph2, ph3, ph4, ph5, ph6, ncol =3),
legend, top = "Relationship between pH and features via different quality", heights=c(9,1))
Dataset: redwineQuality
https://s3.amazonaws.com/udacity-hosted-downloads/ud651/wineQualityReds.csv

Related

Combining `for loop` and ggplotGrob

I just wanted to plot multiple facets using ggplot2 in combination with for loop and found the solution in this postggplot2-plots-over-multiple-pages.
However, I want to modify appearances of these facets with ggplotGrop for reducing the strip size of facets after this for loop.
I am providing here reproducible example of previous question for only plotting facets
library(ggplot2)
library(vcd) # For the Baseball data
data(Baseball)
pdf("baseball.pdf", 7, 5)
aa<- for (i in seq(1, length(unique(Baseball$team87)), 6)) {
print(ggplot(Baseball[Baseball$team87 %in% levels(Baseball$team87)[i:(i+5)], ],
aes(hits86, sal87)) +
geom_point() +
facet_wrap(~ team87) +
scale_y_continuous(limits=c(0, max(Baseball$sal87, na.rm=TRUE))) +
scale_x_continuous(limits=c(0, max(Baseball$hits86))) +
theme_bw())
}
dev.off()
want to implement ggplotGrob to reduce the strip size.
library(gridExtra)
library(grid)
g = ggplotGrob(aa)
pos = c(unique(subset(g$layout, grepl("panel", g$layout$name), select = t)))
for(i in pos) g$heights[i-1] = unit(0.4,"cm")
grobs = which(grepl("strip", g$layout$name))
for(i in grobs) g$grobs[[i]]$heights <- unit(1, "npc")
grid.draw(g)
dev.off()
Error in plot_clone(plot) : attempt to apply non-function
I just wonder how to implement ggplotGrop to that for loop.
The main probleme is that you're using ggplotGrob on an the wrong object.
You have to use it inside each loop.
Then you must grid.arrange to make the multipage pdf
First method: with a trick as ggplotGrob create a blank page
pdf("baseball.pdf", 7, 5)
for (i in seq(1, length(unique(Baseball$team87)), 6)) {
temp <- ggplot(Baseball[Baseball$team87 %in% levels(Baseball$team87)[i:(i+5)], ],
aes(hits86, sal87)) +
geom_point(na.rm=TRUE) + ## to avoid warnings
facet_wrap(~ team87) +
scale_y_continuous(limits=c(0, max(Baseball$sal87, na.rm=TRUE))) +
scale_x_continuous(limits=c(0, max(Baseball$hits86))) +
theme_bw()
pdf(file=NULL) ## because ggplotGrob will create a blank page
g <- ggplotGrob(temp)
pos = c(unique(subset(g$layout, grepl("panel", g$layout$name), select = t)))
for(i in pos) g$heights[i-1] = unit(0.4,"cm")
grobs = which(grepl("strip", g$layout$name))
for(i in grobs) g$grobs[[i]]$heights <- unit(1, "npc")
dev.off() ## to close the fake device
grid.arrange(g)
}
dev.off()
Second method: to avoid using fake device
plotlist <- list()
j <- 1
for (i in seq(1, length(unique(Baseball$team87)), 6)) {
temp <- ggplot(Baseball[Baseball$team87 %in% levels(Baseball$team87)[i:(i+5)], ],
aes(hits86, sal87)) +
geom_point(na.rm=TRUE) +
facet_wrap(~ team87) +
scale_y_continuous(limits=c(0, max(Baseball$sal87, na.rm=TRUE))) +
scale_x_continuous(limits=c(0, max(Baseball$hits86))) +
theme_bw()
g <- ggplotGrob(temp)
pos = c(unique(subset(g$layout, grepl("panel", g$layout$name), select = t)))
for(i in pos) g$heights[i-1] = unit(0.4,"cm")
grobs = which(grepl("strip", g$layout$name))
for(i in grobs) g$grobs[[i]]$heights <- unit(1, "npc")
plotlist[[j]] <- g
j <- j+1
}
pdf("baseball.pdf", 7, 5)
for (i in (1:length(plotlist))) {
grid.arrange(plotlist[[i]])
}
dev.off()
Actually, you can even use grid.arrange and ggplotGrob, without using facet to make all more customisable.

Combining grid_arrange_shared_legend() and facet_wrap_labeller() in R

I am trying to combine grid_arrange_shared_legend() and facet_wrap_labeller() in R. More specifically, I want to draw a figure including two ggplot figures with multiple panels each and have a common legend. I further want to italicize part of the facet strip labels. The former is possible with the grid_arrange_shared_legend() function introduced here, and the latter can be achieved with the facet_wrap_labeller() function here. However, I have not been successful in combining the two.
Here's an example.
library("ggplot2")
set.seed(1)
d <- data.frame(
f1 = rep(LETTERS[1:3], each = 100),
f2 = rep(letters[1:3], 100),
v1 = runif(3 * 100),
v2 = rnorm(3 * 100)
)
p1 <- ggplot(d, aes(v1, v2, color = f2)) + geom_point() + facet_wrap(~f1)
p2 <- ggplot(d, aes(v1, v2, color = f2)) + geom_smooth() + facet_wrap(~f1)
I can place p1 and p2 in the same figure and have a common legend using grid_arrange_shared_legend() (slightly modified from the original).
grid_arrange_shared_legend <- function(...) {
plots <- list(...)
g <- ggplotGrob(plots[[1]] + theme(legend.position = "right"))$grobs
legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]]
lheight <- sum(legend$width)
grid.arrange(
do.call(arrangeGrob, lapply(plots, function(x)
x + theme(legend.position = "none"))),
legend,
ncol = 2,
widths = unit.c(unit(1, "npc") - lheight, lheight))
}
grid_arrange_shared_legend(p1, p2)
Here's what I get.
It is possible to italicize part of the strip label by facet_wrap_labeller().
facet_wrap_labeller <- function(gg.plot,labels=NULL) {
require(gridExtra)
g <- ggplotGrob(gg.plot)
gg <- g$grobs
strips <- grep("strip_t", names(gg))
for(ii in seq_along(labels)) {
modgrob <- getGrob(gg[[strips[ii]]], "strip.text",
grep=TRUE, global=TRUE)
gg[[strips[ii]]]$children[[modgrob$name]] <- editGrob(modgrob,label=labels[ii])
}
g$grobs <- gg
class(g) = c("arrange", "ggplot",class(g))
g
}
facet_wrap_labeller(p1,
labels = c(
expression(paste("A ", italic(italic))),
expression(paste("B ", italic(italic))),
expression(paste("C ", italic(italic)))
)
)
However, I cannot combine the two in a straightforward manner.
p3 <- facet_wrap_labeller(p1,
labels = c(
expression(paste("A ", italic(italic))),
expression(paste("B ", italic(italic))),
expression(paste("C ", italic(italic)))
)
)
p4 <- facet_wrap_labeller(p2,
labels = c(
expression(paste("A ", italic(italic))),
expression(paste("B ", italic(italic))),
expression(paste("C ", italic(italic)))
)
)
grid_arrange_shared_legend(p3, p4)
# Error in plot_clone(p) : attempt to apply non-function
Does anyone know how to modify either or both of the functions so that they can be combined? Or is there any other way to achieve the goal?
You need to pass the gtable instead of the ggplot,
library(gtable)
library("ggplot2")
library(grid)
set.seed(1)
d <- data.frame(
f1 = rep(LETTERS[1:3], each = 100),
f2 = rep(letters[1:3], 100),
v1 = runif(3 * 100),
v2 = rnorm(3 * 100)
)
p1 <- ggplot(d, aes(v1, v2, color = f2)) + geom_point() + facet_wrap(~f1)
p2 <- ggplot(d, aes(v1, v2, color = f2)) + geom_smooth() + facet_wrap(~f1)
facet_wrap_labeller <- function(g, labels=NULL) {
gg <- g$grobs
strips <- grep("strip_t", names(gg))
for(ii in seq_along(labels)) {
oldgrob <- getGrob(gg[[strips[ii]]], "strip.text",
grep=TRUE, global=TRUE)
newgrob <- editGrob(oldgrob,label=labels[ii])
gg[[strips[ii]]]$children[[oldgrob$name]] <- newgrob
}
g$grobs <- gg
g
}
combined_fun <- function(p1, p2, labs1) {
g1 <- ggplotGrob(p1 + theme(legend.position = "right"))
g2 <- ggplotGrob(p2 + theme(legend.position = "none"))
g1 <- facet_wrap_labeller(g1, labs1)
legend <- gtable_filter(g1, "guide-box", trim = TRUE)
g1p <- g1[,-(ncol(g1)-1)]
lw <- sum(legend$width)
g12 <- rbind(g1p, g2, size="first")
g12$widths <- unit.pmax(g1p$widths, g2$widths)
g12 <- gtable_add_cols(g12, widths = lw)
g12 <- gtable_add_grob(g12, legend,
t = 1, l = ncol(g12), b = nrow(g12))
g12
}
test <- combined_fun(p1, p2, labs1 = c(
expression(paste("A ", italic(italic))),
expression(paste("B ", italic(italic))),
expression(paste("C ", italic(italic)))
)
)
grid.draw(test)

Putting expressions (plotmath) in the legend key labels

How can I put expressions (plotmath) into the legend key labels of the following plot?
I am aware of How to use Greek symbols in ggplot2? and the link therein, but whenever I use scale_..._manual function, I obtain a second legend (see below).
require(ggplot2)
require(reshape2)
require(plyr)
## parameters
d <- c(2, 5, 10, 20, 50, 100)
tau <- c("t1", "t2", "t3")
fam <- c("f1", "f2", "f3", "f4", "f5")
meth <- c("m1", "m2", "m3", "m4")
## lengths
nd <- length(d)
ntau <- length(tau)
nfam <- length(fam)
nmeth <- length(meth)
## build result array containing the measurements
arr <- array(rep(NA, nd*ntau*nfam*nmeth), dim=c(nd, ntau, nfam, nmeth),
dimnames=list(d=d, tau=tau, fam=fam, meth=meth))
for(i in 1:nd){
for(j in 1:ntau){
for(k in 1:nfam){
for(l in 1:nmeth){
arr[i,j,k,l] <- i+j+k+l+runif(1)
}
}
}
}
## create molten data
mdf <- reshape2:::melt.array(arr, formula = . ~ d + tau + fam + meth) # create molten data frame
mdf$tau. <- factor(mdf$tau, levels=tau, labels=paste("tau==", tau, sep="")) # expression for tau
mdf$fam. <- factor(mdf$fam, levels=fam, labels=paste("alpha==", fam, sep="")) # expression for family
meth.labs <- lapply(1:nmeth, function(i) bquote(gamma==.(i))) # expression for methods
## plot
ggplot(mdf, aes(x=d, y=value, shape=meth, linetype=meth)) + geom_line() +
geom_point() + facet_grid(fam. ~ tau., scales="free_y", labeller=label_parsed) +
## scale_linetype_manual(values=1:4, breaks=meth, labels=meth.labs) + # problem: adds another legend
scale_x_continuous(trans="log10", breaks=d, labels=d) +
scale_y_continuous(trans="log10")
If I use both scale_*_manual functions I get a single legend with the expression:
ggplot(mdf, aes(x=d, y=value, shape=meth, linetype=meth)) + geom_line() +
geom_point() + facet_grid(fam. ~ tau., scales="free_y", labeller=label_parsed) +
## scale_linetype_manual(values=1:4, breaks=meth, labels=meth.labs) + # problem: adds another legend
scale_x_continuous(trans="log10", breaks=d, labels=d) +
scale_y_continuous(trans="log10") +
scale_linetype_manual(breaks = c('m1','m2','m3','m4'),values = 1:4,labels = meth.labs) +
scale_shape_manual(breaks = c('m1','m2','m3','m4'),values = 1:4,labels = meth.labs)

ggplot2:scatterplots for all possible combinations of variables

I want to plot graphs for all possible combinations of variables. My code is below:
set.seed(12345)
a <- data.frame(Glabel=LETTERS[1:7], A=rnorm(7, mean = 0, sd = 1), B=rnorm(7, mean = 0, sd = 1), C=rnorm(7, mean = 0, sd = 1))
T <- data.frame(Tlabel=LETTERS[11:20], A=rnorm(10, mean = 0, sd = 1), B=rnorm(10, mean = 0, sd = 1), C=rnorm(10, mean = 0, sd = 1))
library(ggplot2)
for(i in 2:(ncol(a)-1))
{
for(j in (i+1):ncol(a))
{
r <- 0.08
p <- ggplot(data=a, mapping=aes(x=a[, i], y=a[, j])) + geom_point() + theme_bw()
p <- p + geom_text(data=a, mapping=aes(x=a[, i], y=a[, j], label=Glabel),
size=3, vjust=1.35, colour="black")
p <- p + geom_segment(data = T, aes(xend = T[ ,i], yend=T[ ,j]),
x=0, y=0, colour="black",
arrow=arrow(angle=25, length=unit(0.25, "cm")))
p <- p + geom_text(data=T, aes(x=T[ ,i], y=T[ ,j], label=Tlabel), size=3, vjust=0, colour="red")
dev.new()
print(p)
}
}
This code works fine. But the method used here is not recommended (See #baptiste comment) and does not work in function. I want to know what is the best and recommended way to accomplish this task. Thanks in advance for your help.
Alright this is garbage but the best I could do. It's super inefficient as it recreates a partial data with each loop through lapply. Maybe someone else has something better:
MAT <- outer(names(df)[-1], names(df)[-1], paste)
combs <- sapply(MAT[lower.tri(MAT)], function(x) strsplit(x, " "))
ind <- lapply(combs, function(x) match(x, names(df)))
plotter <- function(cn) { #start junky function
NAMES <- colnames(df)[cn]
df2 <- df[cn]
names(df2)<- c('x1', 'x2')
p <- ggplot(data=df2, aes(x1, x2)) + geom_point() + theme_bw() +
scale_x_continuous(name=NAMES[1]) +
scale_y_continuous(name=NAMES[2])
dev.new()
print(p)
} #end of junky function
lapply(ind, function(x) plotter(cn=x))
EDIT: This is a bit better:
x <- match(names(df)[-1], names(df))
MAT <- outer(x, x, paste)
combs <- t(sapply(MAT[lower.tri(MAT)], function(x) as.numeric(unlist(strsplit(x, " ")))))
plotter <- function(cn) {
NAMES <- colnames(df)[cn]
df2 <- df[cn]
names(df2)<- c('x1', 'x2')
p <- ggplot(data=df2, aes(x1, x2)) + geom_point() + theme_bw() +
scale_x_continuous(name=NAMES[1]) +
scale_y_continuous(name=NAMES[2])
dev.new()
print(p)
}
apply(combs, 1, function(x) plotter(cn=x))

ggplot2 boxplot: horizontal bar at median?

I would like to make a ggplot2 boxplot more meaningful by adding a thick bar at the median (so that if the median is equal to either of the lower or upper quartiles, it can be detected to which it is equal). I came across a recent post of Kohske:
Can I get boxplot notches in ggplot2?
but I didn't know how to give the "crossbar" a "height". Then I tried
to use a rectangle but it didn't work either. Here is a minimal example:
require(ggplot2)
require(reshape2)
require(plyr)
set.seed(1)
## parameters
p1 <- c(5, 20, 100)
p2 <- c("f1", "f2", "f3", "f4", "f5")
p3 <- c("g1","g2","g3","g4","g5")
N <- 1000
## lengths
l1 <- length(p1)
l2 <- length(p2)
l3 <- length(p3)
## build result array containing the measurements
arr <- array(rep(NA, l1*l2*l3*N), dim=c(l1, l2, l3, N),
dimnames=list(
p1=p1,
p2=p2,
p3=p3,
N=1:N))
for(i in 1:l1){
for(j in 1:l2){
for(k in 1:l3){
arr[i,j,k,] <- i+j+k+runif(N, min=-4, max=4)
}
}
}
arr <- arr + rexp(3*5*5*N)
## create molten data
mdf <- melt(arr, formula = . ~ p1 + p2 + p3 + N) # create molten data frame
## confidence interval calculated by `boxplot.stats`
f <- function(x){
ans <- boxplot.stats(x)
data.frame(x=x, y=ans$stats[3], ymin=ans$conf[1], ymax=ans$conf[2])
}
## (my poor) trial
ggplot(mdf, aes(x=p3, y=value)) + geom_boxplot(outlier.shape=1) +
stat_summary(fun.data=f, geom="rectangle", colour=NA, fill="black",
xmin=x-0.36, xmax=x+0.36, ymin=max(y-0.2, ymin), ymax=min(y+0.2,
ymax)) + facet_grid(p2 ~ p1, scales = "free_y")
**SOLUTION** (after the discussion with Kohske below):
f <- function(x, height){
ans <- median(x)
data.frame(y=ans, ymin=ans-height/2, ymax=ans+height/2)
}
p <- ggplot(mdf, aes(x=p3, y=value)) + geom_boxplot(outlier.shape=1) +
stat_summary(fun.data=f, geom="crossbar", height=0.5, colour=NA,
fill="black", width=0.78) +
facet_grid(p2 ~ p1, scales = "free_y")
pdf()
print(p)
dev.off()
**UPDATE** Hmmm... it's not that trivial. The following example shows that the "height" of the crossbar should be adapted to the y-axis scale, otherwise it might be overseen.
require(ggplot2)
require(reshape2)
require(plyr)
set.seed(1)
## parameters
p1 <- c(5, 20, 100)
p2 <- c("f1", "f2", "f3", "f4", "f5")
p3 <- c("g1","g2","g3","g4","g5")
N <- 1000
## lengths
l1 <- length(p1)
l2 <- length(p2)
l3 <- length(p3)
## build result array containing the measurements
arr <- array(rep(NA, l1*l2*l3*N), dim=c(l1, l2, l3, N),
dimnames=list(
p1=p1,
p2=p2,
p3=p3,
N=1:N))
for(i in 1:l1){
for(j in 1:l2){
for(k in 1:l3){
arr[i,j,k,] <- i+j^4+k+runif(N, min=-4, max=4)
}
}
}
arr <- arr + rexp(3*5*5*N)
arr[1,2,5,] <- arr[1,2,5,]+30
arr[1,5,3,] <- arr[1,5,3,]+100
## create molten data
mdf <- melt(arr, formula = . ~ p1 + p2 + p3 + N) # create molten data frame
f <- function(x, height){
ans <- median(x)
data.frame(y=ans, ymin=ans-height/2, ymax=ans+height/2)
}
## plot
p <- ggplot(mdf, aes(x=p3, y=value)) + geom_boxplot(outlier.shape=1) +
stat_summary(fun.data=f, geom="crossbar", height=0.7, colour=NA,
fill="black", width=0.78) +
facet_grid(p2 ~ p1, scales = "free_y")
pdf()
print(p)
dev.off()
here is an example:
f <- function(x, height) {
ans <- median(x)
data.frame(ymin = ans-height/2, ymax = ans+height/2, y = ans)
}
df <- data.frame(x=gl(2,6), y=c(1,1,1,1,3,3, 1,1,3,3,3,3))
ggplot(df, aes(x, y)) + geom_boxplot() +
stat_summary(fun.data = f, geom = "crossbar", height = 0.1,
colour = NA, fill = "skyblue", width = 0.8, alpha = 0.5)
if you just want to change the apparence, then here is a quick hack, I don't recommend though,
df <- data.frame(x=gl(2,6), y=c(c(1,1,1,1,3,3), c(1,1,3,3,3,3)*10))
ggplot(df, aes(x, y)) + geom_boxplot() + facet_grid(x~.)
gs <- grid.gget("geom_boxplot", grep = T)
if (inherits(gs, "grob")) gs <- list(gs)
gss <- llply(gs, function(g) g$children[[length(g$children)]])
l_ply(gss, function(g) grid.edit(g$name, grep=T, just = c("left", "center"), height = unit(0.05, "native"), gp = gpar(fill = "skyblue", alpha = 0.5, col = NA)))

Resources