ggplot2:scatterplots for all possible combinations of variables - r

I want to plot graphs for all possible combinations of variables. My code is below:
set.seed(12345)
a <- data.frame(Glabel=LETTERS[1:7], A=rnorm(7, mean = 0, sd = 1), B=rnorm(7, mean = 0, sd = 1), C=rnorm(7, mean = 0, sd = 1))
T <- data.frame(Tlabel=LETTERS[11:20], A=rnorm(10, mean = 0, sd = 1), B=rnorm(10, mean = 0, sd = 1), C=rnorm(10, mean = 0, sd = 1))
library(ggplot2)
for(i in 2:(ncol(a)-1))
{
for(j in (i+1):ncol(a))
{
r <- 0.08
p <- ggplot(data=a, mapping=aes(x=a[, i], y=a[, j])) + geom_point() + theme_bw()
p <- p + geom_text(data=a, mapping=aes(x=a[, i], y=a[, j], label=Glabel),
size=3, vjust=1.35, colour="black")
p <- p + geom_segment(data = T, aes(xend = T[ ,i], yend=T[ ,j]),
x=0, y=0, colour="black",
arrow=arrow(angle=25, length=unit(0.25, "cm")))
p <- p + geom_text(data=T, aes(x=T[ ,i], y=T[ ,j], label=Tlabel), size=3, vjust=0, colour="red")
dev.new()
print(p)
}
}
This code works fine. But the method used here is not recommended (See #baptiste comment) and does not work in function. I want to know what is the best and recommended way to accomplish this task. Thanks in advance for your help.

Alright this is garbage but the best I could do. It's super inefficient as it recreates a partial data with each loop through lapply. Maybe someone else has something better:
MAT <- outer(names(df)[-1], names(df)[-1], paste)
combs <- sapply(MAT[lower.tri(MAT)], function(x) strsplit(x, " "))
ind <- lapply(combs, function(x) match(x, names(df)))
plotter <- function(cn) { #start junky function
NAMES <- colnames(df)[cn]
df2 <- df[cn]
names(df2)<- c('x1', 'x2')
p <- ggplot(data=df2, aes(x1, x2)) + geom_point() + theme_bw() +
scale_x_continuous(name=NAMES[1]) +
scale_y_continuous(name=NAMES[2])
dev.new()
print(p)
} #end of junky function
lapply(ind, function(x) plotter(cn=x))
EDIT: This is a bit better:
x <- match(names(df)[-1], names(df))
MAT <- outer(x, x, paste)
combs <- t(sapply(MAT[lower.tri(MAT)], function(x) as.numeric(unlist(strsplit(x, " ")))))
plotter <- function(cn) {
NAMES <- colnames(df)[cn]
df2 <- df[cn]
names(df2)<- c('x1', 'x2')
p <- ggplot(data=df2, aes(x1, x2)) + geom_point() + theme_bw() +
scale_x_continuous(name=NAMES[1]) +
scale_y_continuous(name=NAMES[2])
dev.new()
print(p)
}
apply(combs, 1, function(x) plotter(cn=x))

Related

How to show integers when using ggplot2::geom_smooth()

In the example below, how can I round the x label to even numbers? I cant convert them as factors first, because then geom_smooth does not work
library(ggplot2)
set.seed(32)
df <- data.frame(a = as.integer(rnorm(250, 2, 0.1)))
df$b <- df$a + rnorm(250)
df$id = 1
df_2 <- df
df_2$id <- 2
df_tot <- rbind(df, df_2)
ggplot(df_tot, aes(x = a, y = b)) +
geom_smooth() +
facet_wrap(~id)
If we want even numbers, an option is to add labels as a function in scale_x_continuous
library(ggplot2)
ggplot(df_tot, aes(x = a, y = b)) +
geom_smooth() +
facet_wrap(~id) +
scale_x_continuous(labels = function(x) seq(2, length.out = length(x)))

How to format ggplot `geom_text` with formula, getting unwanted "c(...)"

In my ggplot2 code below, I want to show the formula for a linear-regression fit on my plot with geom_text, but I get unwanted c before the values of a and b, how do I prevent this?
p <- ggplot(data=Algae, aes(x=a254, y=DOC))+
geom_point(color="blue",stat="identity") +
geom_smooth(method="lm",se=FALSE,color="red",formula=y~x)
model.lm <- lm(DOC~a254, data=Algae)
l <- list(a=format(coef(model.lm)[1], digits=4),
b=format(coef(model.lm)[2], digits=4),
r2=format(summary(model.lm)$r.squared, digits=4),
p=format(summary(model.lm)$coefficients[2,4], digits=4))
eq <- substitute(italic(DOC) == a - b %*% italic(a254)~","~italic(R)^2~"="~r2~",
"~italic(P)~"="~p, l)
p1 <- p + geom_text(aes(x =6, y = 0, label = as.character(as.expression(eq))), parse = TRUE)
p1
The reason for this is that you first format() your data into character format and then try to calculate with strings. You could solve the problem this way:
First, it is more convenient to transform your list into a data.frame, using:
d <- as.data.frame(l)
The values should be converted back to numeric, since you yet want to do arithmetics inside the formula:
d[] <- lapply(d, function(x) as.numeric(as.character(x)))
Then it should work fine:
eq <- substitute(italic(Sepal.Length) == a - b %*% italic(Petal.Length)~","~italic(R)^2~"="~r2~",
"~italic(P)~"="~p, d)
p + geom_text(aes(x =5, y = 0, label = as.character(as.expression(eq))), parse = TRUE)
You could also use annotate() to add the formula to the plot, which might look a little nicer:
p + annotate('text', 7, 4,
label=paste("italic(Sepal.Length)==", d$a, "~-~", d$b, "~x~",
"~italic(Petal.Length)~';'~italic(r)^2==", d$r2,
"~italic(P)==", d$p),
parse=TRUE,
hjust=1, size=3.5)
Yielding:
Data:
library(ggplot2)
p <- ggplot(data=iris, aes(x=Petal.Length, y=Sepal.Length)) +
geom_point(color="blue", stat="identity") +
geom_smooth(method="lm", se=FALSE, color="red", formula=y~x)
model.lm <- lm(Sepal.Length ~ Petal.Length, data=iris)
l <- list(a=format(coef(model.lm)[1], digits=4),
b=format(coef(model.lm)[2], digits=4),
r2=format(summary(model.lm)$r.squared, digits=4),
p=format(summary(model.lm)$coefficients[2, 4], digits=4))

Combining grid_arrange_shared_legend() and facet_wrap_labeller() in R

I am trying to combine grid_arrange_shared_legend() and facet_wrap_labeller() in R. More specifically, I want to draw a figure including two ggplot figures with multiple panels each and have a common legend. I further want to italicize part of the facet strip labels. The former is possible with the grid_arrange_shared_legend() function introduced here, and the latter can be achieved with the facet_wrap_labeller() function here. However, I have not been successful in combining the two.
Here's an example.
library("ggplot2")
set.seed(1)
d <- data.frame(
f1 = rep(LETTERS[1:3], each = 100),
f2 = rep(letters[1:3], 100),
v1 = runif(3 * 100),
v2 = rnorm(3 * 100)
)
p1 <- ggplot(d, aes(v1, v2, color = f2)) + geom_point() + facet_wrap(~f1)
p2 <- ggplot(d, aes(v1, v2, color = f2)) + geom_smooth() + facet_wrap(~f1)
I can place p1 and p2 in the same figure and have a common legend using grid_arrange_shared_legend() (slightly modified from the original).
grid_arrange_shared_legend <- function(...) {
plots <- list(...)
g <- ggplotGrob(plots[[1]] + theme(legend.position = "right"))$grobs
legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]]
lheight <- sum(legend$width)
grid.arrange(
do.call(arrangeGrob, lapply(plots, function(x)
x + theme(legend.position = "none"))),
legend,
ncol = 2,
widths = unit.c(unit(1, "npc") - lheight, lheight))
}
grid_arrange_shared_legend(p1, p2)
Here's what I get.
It is possible to italicize part of the strip label by facet_wrap_labeller().
facet_wrap_labeller <- function(gg.plot,labels=NULL) {
require(gridExtra)
g <- ggplotGrob(gg.plot)
gg <- g$grobs
strips <- grep("strip_t", names(gg))
for(ii in seq_along(labels)) {
modgrob <- getGrob(gg[[strips[ii]]], "strip.text",
grep=TRUE, global=TRUE)
gg[[strips[ii]]]$children[[modgrob$name]] <- editGrob(modgrob,label=labels[ii])
}
g$grobs <- gg
class(g) = c("arrange", "ggplot",class(g))
g
}
facet_wrap_labeller(p1,
labels = c(
expression(paste("A ", italic(italic))),
expression(paste("B ", italic(italic))),
expression(paste("C ", italic(italic)))
)
)
However, I cannot combine the two in a straightforward manner.
p3 <- facet_wrap_labeller(p1,
labels = c(
expression(paste("A ", italic(italic))),
expression(paste("B ", italic(italic))),
expression(paste("C ", italic(italic)))
)
)
p4 <- facet_wrap_labeller(p2,
labels = c(
expression(paste("A ", italic(italic))),
expression(paste("B ", italic(italic))),
expression(paste("C ", italic(italic)))
)
)
grid_arrange_shared_legend(p3, p4)
# Error in plot_clone(p) : attempt to apply non-function
Does anyone know how to modify either or both of the functions so that they can be combined? Or is there any other way to achieve the goal?
You need to pass the gtable instead of the ggplot,
library(gtable)
library("ggplot2")
library(grid)
set.seed(1)
d <- data.frame(
f1 = rep(LETTERS[1:3], each = 100),
f2 = rep(letters[1:3], 100),
v1 = runif(3 * 100),
v2 = rnorm(3 * 100)
)
p1 <- ggplot(d, aes(v1, v2, color = f2)) + geom_point() + facet_wrap(~f1)
p2 <- ggplot(d, aes(v1, v2, color = f2)) + geom_smooth() + facet_wrap(~f1)
facet_wrap_labeller <- function(g, labels=NULL) {
gg <- g$grobs
strips <- grep("strip_t", names(gg))
for(ii in seq_along(labels)) {
oldgrob <- getGrob(gg[[strips[ii]]], "strip.text",
grep=TRUE, global=TRUE)
newgrob <- editGrob(oldgrob,label=labels[ii])
gg[[strips[ii]]]$children[[oldgrob$name]] <- newgrob
}
g$grobs <- gg
g
}
combined_fun <- function(p1, p2, labs1) {
g1 <- ggplotGrob(p1 + theme(legend.position = "right"))
g2 <- ggplotGrob(p2 + theme(legend.position = "none"))
g1 <- facet_wrap_labeller(g1, labs1)
legend <- gtable_filter(g1, "guide-box", trim = TRUE)
g1p <- g1[,-(ncol(g1)-1)]
lw <- sum(legend$width)
g12 <- rbind(g1p, g2, size="first")
g12$widths <- unit.pmax(g1p$widths, g2$widths)
g12 <- gtable_add_cols(g12, widths = lw)
g12 <- gtable_add_grob(g12, legend,
t = 1, l = ncol(g12), b = nrow(g12))
g12
}
test <- combined_fun(p1, p2, labs1 = c(
expression(paste("A ", italic(italic))),
expression(paste("B ", italic(italic))),
expression(paste("C ", italic(italic)))
)
)
grid.draw(test)

How to create faceted linear regression plot using GGPLOT

I have a data frame created the following way.
library(ggplot2)
x <- data.frame(letters[1:10],abs(rnorm(10)),abs(rnorm(10)),type="x")
y <- data.frame(letters[1:10],abs(rnorm(10)),abs(rnorm(10)),type="y")
# in reality the number of row could be larger than 10 for each x and y
all <- rbind(x,y)
colnames(all) <- c("name","val1","val2","type")
What I want to do is to create a faceted ggplot that looks roughly like this:
Hence each facet above is the correlation plot of the following:
# Top left facet
subset(all,type=="x")$val1
subset(all,type=="y")$val1
# Top right facet
subset(all,type=="x")$val1
subset(all,type=="y")$val2
# ...etc..
But I'm stuck with the following code:
p <- ggplot(all, aes(val1, val2))+ geom_smooth(method = "lm") + geom_point() +
facet_grid(type ~ )
# Calculate correlation for each group
cors <- ddply(all, c(type ~ ), summarise, cor = round(cor(val1, val2), 2))
p + geom_text(data=cors, aes(label=paste("r=", cor, sep="")), x=0.5, y=0.5)
What's the right way to do it?
Some of your code was incorrect. This works for me:
p <- ggplot(all, aes(val1, val2))+ geom_smooth(method = "lm") + geom_point() +
facet_grid(~type)
# Calculate correlation for each group
cors <- ddply(all, .(type), summarise, cor = round(cor(val1, val2), 2))
p + geom_text(data=cors, aes(label=paste("r=", cor, sep="")), x=1, y=-0.25)
Edit: Following OP's comment and edit. The idea is to re-create the data with all four combinations and then facet.
# I consider the type in your previous data to be xx and yy
dat <- data.frame(val1 = c(rep(all$val1[all$type == "x"], 2),
rep(all$val1[all$type == "y"], 2)),
val2 = rep(all$val2, 2),
grp1 = rep(c("x", "x", "y", "y"), each=10),
grp2 = rep(c("x", "y", "x", "y"), each=10))
p <- ggplot(dat, aes(val1, val2)) + geom_point() + geom_smooth(method = "lm") +
facet_grid(grp1 ~ grp2)
cors <- ddply(dat, .(grp1, grp2), summarise, cor = round(cor(val1, val2), 2))
p + geom_text(data=cors, aes(label=paste("r=", cor, sep="")), x=1, y=-0.25)
Since your data is not in the appropriate format, some reshaping is necessary before it can be plotted.
Firstly, reshape the data to the long format:
library(reshape2)
allM <- melt(all[-1], id.vars = "type")
Split the values along type and val1 vs. val2:
allList <- split(allM$value, interaction(allM$type, allM$variable))
Create a list of all combinations:
allComb <- unlist(lapply(c(1, 3),
function(x)
lapply(c(2 ,4),
function(y)
do.call(cbind, allList[c(x, y)]))),
recursive = FALSE)
Create a new dataset:
allNew <- do.call(rbind,
lapply(allComb, function(x) {
tmp <- as.data.frame(x)
tmp <- (within(tmp, {xval <- names(tmp)[1];
yval <- names(tmp)[2]}))
names(tmp)[1:2] <- c("x", "y")
tmp}))
Plot:
library(ggplot2)
p <- ggplot(allNew, aes(x = x, y = y)) +
geom_smooth(method = "lm") +
geom_point() +
facet_grid(yval ~ xval)
# Calculate correlation for each group
library(plyr)
cors <- ddply(allNew, .(yval, xval), summarise, cor = round(cor(x, y), 2))
p + geom_text(data=cors, aes(label=paste("r=", cor, sep="")), x=0.5, y=0.5)
There is an additional package ggpubr available now addressing exactly this issue with the stat_cor() function.
library(tidyverse)
library(ggpubr)
ggplot(all, aes(val1, val2))+
geom_smooth(method = "lm") +
geom_point() +
facet_grid(~type) +
stat_cor()

ggplot2 boxplot: horizontal bar at median?

I would like to make a ggplot2 boxplot more meaningful by adding a thick bar at the median (so that if the median is equal to either of the lower or upper quartiles, it can be detected to which it is equal). I came across a recent post of Kohske:
Can I get boxplot notches in ggplot2?
but I didn't know how to give the "crossbar" a "height". Then I tried
to use a rectangle but it didn't work either. Here is a minimal example:
require(ggplot2)
require(reshape2)
require(plyr)
set.seed(1)
## parameters
p1 <- c(5, 20, 100)
p2 <- c("f1", "f2", "f3", "f4", "f5")
p3 <- c("g1","g2","g3","g4","g5")
N <- 1000
## lengths
l1 <- length(p1)
l2 <- length(p2)
l3 <- length(p3)
## build result array containing the measurements
arr <- array(rep(NA, l1*l2*l3*N), dim=c(l1, l2, l3, N),
dimnames=list(
p1=p1,
p2=p2,
p3=p3,
N=1:N))
for(i in 1:l1){
for(j in 1:l2){
for(k in 1:l3){
arr[i,j,k,] <- i+j+k+runif(N, min=-4, max=4)
}
}
}
arr <- arr + rexp(3*5*5*N)
## create molten data
mdf <- melt(arr, formula = . ~ p1 + p2 + p3 + N) # create molten data frame
## confidence interval calculated by `boxplot.stats`
f <- function(x){
ans <- boxplot.stats(x)
data.frame(x=x, y=ans$stats[3], ymin=ans$conf[1], ymax=ans$conf[2])
}
## (my poor) trial
ggplot(mdf, aes(x=p3, y=value)) + geom_boxplot(outlier.shape=1) +
stat_summary(fun.data=f, geom="rectangle", colour=NA, fill="black",
xmin=x-0.36, xmax=x+0.36, ymin=max(y-0.2, ymin), ymax=min(y+0.2,
ymax)) + facet_grid(p2 ~ p1, scales = "free_y")
**SOLUTION** (after the discussion with Kohske below):
f <- function(x, height){
ans <- median(x)
data.frame(y=ans, ymin=ans-height/2, ymax=ans+height/2)
}
p <- ggplot(mdf, aes(x=p3, y=value)) + geom_boxplot(outlier.shape=1) +
stat_summary(fun.data=f, geom="crossbar", height=0.5, colour=NA,
fill="black", width=0.78) +
facet_grid(p2 ~ p1, scales = "free_y")
pdf()
print(p)
dev.off()
**UPDATE** Hmmm... it's not that trivial. The following example shows that the "height" of the crossbar should be adapted to the y-axis scale, otherwise it might be overseen.
require(ggplot2)
require(reshape2)
require(plyr)
set.seed(1)
## parameters
p1 <- c(5, 20, 100)
p2 <- c("f1", "f2", "f3", "f4", "f5")
p3 <- c("g1","g2","g3","g4","g5")
N <- 1000
## lengths
l1 <- length(p1)
l2 <- length(p2)
l3 <- length(p3)
## build result array containing the measurements
arr <- array(rep(NA, l1*l2*l3*N), dim=c(l1, l2, l3, N),
dimnames=list(
p1=p1,
p2=p2,
p3=p3,
N=1:N))
for(i in 1:l1){
for(j in 1:l2){
for(k in 1:l3){
arr[i,j,k,] <- i+j^4+k+runif(N, min=-4, max=4)
}
}
}
arr <- arr + rexp(3*5*5*N)
arr[1,2,5,] <- arr[1,2,5,]+30
arr[1,5,3,] <- arr[1,5,3,]+100
## create molten data
mdf <- melt(arr, formula = . ~ p1 + p2 + p3 + N) # create molten data frame
f <- function(x, height){
ans <- median(x)
data.frame(y=ans, ymin=ans-height/2, ymax=ans+height/2)
}
## plot
p <- ggplot(mdf, aes(x=p3, y=value)) + geom_boxplot(outlier.shape=1) +
stat_summary(fun.data=f, geom="crossbar", height=0.7, colour=NA,
fill="black", width=0.78) +
facet_grid(p2 ~ p1, scales = "free_y")
pdf()
print(p)
dev.off()
here is an example:
f <- function(x, height) {
ans <- median(x)
data.frame(ymin = ans-height/2, ymax = ans+height/2, y = ans)
}
df <- data.frame(x=gl(2,6), y=c(1,1,1,1,3,3, 1,1,3,3,3,3))
ggplot(df, aes(x, y)) + geom_boxplot() +
stat_summary(fun.data = f, geom = "crossbar", height = 0.1,
colour = NA, fill = "skyblue", width = 0.8, alpha = 0.5)
if you just want to change the apparence, then here is a quick hack, I don't recommend though,
df <- data.frame(x=gl(2,6), y=c(c(1,1,1,1,3,3), c(1,1,3,3,3,3)*10))
ggplot(df, aes(x, y)) + geom_boxplot() + facet_grid(x~.)
gs <- grid.gget("geom_boxplot", grep = T)
if (inherits(gs, "grob")) gs <- list(gs)
gss <- llply(gs, function(g) g$children[[length(g$children)]])
l_ply(gss, function(g) grid.edit(g$name, grep=T, just = c("left", "center"), height = unit(0.05, "native"), gp = gpar(fill = "skyblue", alpha = 0.5, col = NA)))

Resources