I've written a function that generates a survival plot to my liking using ggplot2. I would like the plot to be customizable, but because I am making a panel of three plots, I can't simply return the plot object to add additional customizations.
I've successfully managed to accomplish what I want using character strings (see the function at the end of the question), but I was wondering if there was a better way to do this; using the character string seems kind of foreign.
For example, the function currently allows me to do this:
require(survival)
fit <- survfit(Surv(time, status) ~ x, data=aml)
ggSurvGraph(fit, times=seq(0, 60, by=12), offset.scale=1, xlim=c(0, 60),
gg_expr="ylab('Percentage Survived') + xlab('Time Elapsed') +
scale_y_continuous(breaks=seq(0, 1.0, by=.25), labels=seq(0, 100, by=25))")
ggexpr gets added to the plot by
if (!missing(gg_expr)) .plot <- eval(parse(text=paste(".plot + ", gg_expr, sep="")))
But it seems to me it would be more natural to pass an expression in gg_expr rather than a character string. Such as this:
require(survival)
fit <- survfit(Surv(time, status) ~ x, data=aml)
ggSurvGraph(fit, times=seq(0, 60, by=12), offset.scale=1, xlim=c(0, 60),
gg_expr=ylab('Percentage Survived') + xlab('Time Elapsed') +
scale_y_continuous(breaks=seq(0, 1.0, by=.25), labels=seq(0, 100, by=25)))
Is there a better way to approach this? (a copy of what the plot looks like follows the function code)
ggSurvGraph <- function(object, times, cum.inc=FALSE, conf.bar=TRUE,
offset.scale=1, n.risk=FALSE, n.event=FALSE,
xlim, gg_expr){
require(stringr)
require(plyr)
require(ggplot2)
require(gridExtra)
require(survival)
#**************************************************************
#*** Parameter checking
error.count <- 0
error.msg <- NULL
#*** 'object' should be either a 'survfit' object or a 'data.frame'
if (!(any(class(object) %in% c("survfit","data.frame")))){
error.count <- error.count + 1
error.msg <- c(error.msg, str_c(error.count, ": \'object\' must be either a survfit object or a data frame", sep=""))
}
#*** When 'object' is a data frame, it must have the columns in 'req.col'
#*** This is a feature that was added so that we could make survival graphs with PROC LIFETEST output
req.col <- c("time","surv","lower","upper","n.risk","n.event")
if ("data.frame" %in% class(object) && !any(req.col %in% names(object))){
miss.col <- str_c("\'", req.col[!req.col %in% names(object)], "\'", sep="", collapse=", ")
error.count <- error.count + 1
error.msg <- c(error.msg, str_c(error.count, ": data frame \'object\' is missing columns ", miss.col, sep=""))
}
#*** Stop the function if any parameter checks failed
if (error.count){
stop(str_c(error.msg, collapse="\n"))
}
#********************************************************************
#*** Prepare the data for plotting
#*** Create data frame from survfit object
if ("survfit" %in% class(object)) survData <- createSurvivalFrame(object) else survData <- object
if (is.null(survData$strata)) survData$strata <- factor(1)
if (cum.inc) survData <- transform(survData,
surv = 1-surv,
lower = 1-lower,
upper = 1-upper)
survData <- ddply(survData,
"strata",
transform,
cum.evt = cumsum(n.event))
#*** Generate offset values
if(nlevels(survData$strata)>1){
offset <- seq.int(-1*ceiling(nlevels(survData$strata)/2),ceiling(nlevels(survData$strata)/2),length.out=nlevels(survData$strata)+1)
offset <- offset[offset!=0]
offset <- offset[order(abs(offset))] * offset.scale
}
else offset <- 0
offset <- data.frame(strata = levels(survData$strata), offset = offset)
survData <- merge(survData, offset, by="strata")
#*************************************************************
#* Limit to 'times' argument
extractSurvTimes <- function(df, reportTime=times){
.out <- df[sapply(reportTime, function(t) max(which(df$time <= t))), ]
.out$reportTime <- reportTime
return(.out)
}
survData <- transform(survData, reportTime = time)
survTimes <- if (missing(times)) survData
else do.call("rbind", lapply(levels(survData$strata), function(x) extractSurvTimes(subset(survData, strata==x))))
if (missing(xlim)) xlim <- c(0, max(survData$time, na.rm=TRUE))
#*************************************************************
#*** Create Plot
#*** Creates a blank plot for a spacer between survival plot and risk/event data
blank.pic <- ggplot(survData, aes(time, surv)) +
geom_blank() + theme_bw() +
theme(axis.text.x = element_blank(), axis.text.y = element_blank(),
axis.title.x = element_blank(), axis.title.y = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank(), panel.border = element_blank())
#*** Create the survival plot
if (nlevels(survData$strata) > 1){
.plot <- ggplot(survData, aes(x=time, y=surv, colour=strata)) + geom_step()
}
else{
.plot <- ggplot(survData, aes(x=time, y=surv)) + geom_step()
}
.plot <- .plot + scale_x_continuous(limits = xlim)
#*** Add Confidence bars
if (conf.bar){
.plot <- .plot +
geom_segment(data=survTimes, aes(x=reportTime + offset, xend=reportTime + offset, y=lower, yend=upper))
}
if (!missing(gg_expr)) .plot <- eval(parse(text=paste(".plot + ", gg_expr, sep="")))
riskTable <- survTimes
riskTable <- melt(riskTable[, c("reportTime", "strata", "n.risk", "cum.evt")],
c("reportTime", "strata"))
riskTable <- transform(riskTable,
y.pos = ifelse(variable %in% "n.risk", 1, 0))
.risk <- ggplot(survData, aes(x=time, y=surv)) +
geom_text(data=riskTable, aes(x=reportTime, y=rev(variable), label=value), size=3.5, hjust=0) +
theme_bw() +
# scale_y_discrete(breaks = as.character(levels(riskTable$strata)),
# labels = levels(riskTable$variable)) +
theme(axis.text.x = element_blank(),
axis.title.x = element_blank(), axis.title.y = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank(), panel.border = element_blank()) +
scale_x_continuous(limits = xlim) +
scale_y_discrete(labels=c("N Event", "N at Risk"))
if (nlevels(riskTable$strata) > 1) .risk <- .risk + facet_wrap(~ strata, ncol=1)
grid.arrange(.plot + theme(plot.margin = unit(c(1,1,0,.5), "lines"), legend.position="bottom"),
blank.pic + theme(plot.margin = unit(c(0,0,0,0), "lines")),
.risk + theme(plot.margin = unit(c(0,1,0,0), "lines")),
clip = FALSE, nrow = 3,
ncol = 1, heights = unit(c(.70, .04, .35),c("null", "null", "null")))
}
I don't understand your question, but I have a feeling the list() syntax might help,
p = qplot(1:10, 1:10)
p + list(ylab("label"),
scale_x_continuous(),
geom_line())
Abstract
This answer allows you to add ggplot objects to the return value of your function in standard ggplot form. Here, we add coord_cartesian to the first graph of a function that returns two graphs (yours would return a graph and a table, but same idea):
my_plots() + coord_cartesian(ylim=c(0, 5))
Also, note you could add any ggplot object. We just chose coord_cartesian because it was convenient. In your case, you would modify ggSurvGraph to operate like my_plots, which should be pretty simple.
Details
The strategy relies on the function not using grid.arrange, but rather, on having the print method for the object your function returns use grid.arrange. I made up a function that makes a barplot and a scatter plot, but I think it illustrates the point well.
library(gridExtra)
library(ggplot2)
my_plots <- function() {
df <- data.frame(x=1:10, y=(1:10)/10)
gg1 <- ggplot(df) + geom_point(aes(x=x, y=y))
gg2 <- ggplot(df) + geom_bar(aes(x=x, y=y), stat="identity")
structure(list(gg1, gg2), class="myplots")
}
The key here is what I'm returning is a list of the ggplot objects, with a custom class myplots here. Then, I can just define a print method for that class, and have grid.arrange do it's thing:
print.myplots <- function(x, ...) {
do.call(grid.arrange, x)
}
my_plots() outputs the graphs through the print method (note the key point here is that you have two ggplot objects; I realize for yours one of them is a table, but the net result is the same):
my_plots()
And now, I can define a + method, which adds whatever the second operand is to the first value in my list (so in your case, this would only affect the graph, not the table):
`+.myplots` <- function(e1, e2) {
e1[[1]] <- e1[[1]] + e2
invisible(e1)
}
Now, we can use as illustrated in the abstract (notice how the Y axis changed for the first graph):
my_plots() + coord_cartesian(ylim=c(0, 5))
Related
I want to create a plot with custom axis tick spacing. What I want to achieve is similar to this plot:
I can specify axis tick locations using breaks argument, but I can't change the distance between them.
dat <- data.frame(x = runif(100), y = runif(100))
ggplot(dat, aes(x,y)) + geom_point() + scale_x_continuous(breaks=c(0,0.1,0.2,0.4,0.8,1)) + scale_y_continuous(breaks=c(0,0.1,0.2,0.4,0.8,1))
What I essentially want is to focus on a specific interval (say 0:0.2) and have bigger spacing for this interval and squish the rest (0.2:1).
Right now I do that by creating two graphs for my desired intervals, and glue them together with grid.arrange, but I was wondering if there was a solution that enables me to generate the plot in one go.
This is my current solution:
q1<-ggplot(dat, aes(x,y)) + geom_point() + ylim(c(0.2,1)) + xlim(c(0,0.2))+ theme(axis.text.x = element_blank(), axis.title.x = element_blank(), axis.ticks.x=element_blank())
q2<-ggplot(dat, aes(x,y)) + geom_point() + ylim(c(0.2,1)) + xlim(c(0.2,1))+ theme(axis.text = element_blank(), axis.title = element_blank(), axis.ticks=element_blank())
q3<-ggplot(dat, aes(x,y)) + geom_point() + ylim(c(0,0.2)) + xlim(c(0,0.2))
q4<-ggplot(dat, aes(x,y)) + geom_point() + ylim(c(0.2,1)) + xlim(c(0.2,1))+ theme(axis.text.y = element_blank(), axis.title.y = element_blank(), axis.ticks.y=element_blank())
grid.arrange(q1,q2,q3,q4)
Ok first I'll have to make the obligatory comment that squishing part of the data in a way that disconnects the position on the plot from a direct connection to the data is not a good idea in general.
That said, here is how you can do it. We can make a function factory that produces a transformation object with the scales factor. The function factory accepts a range it should squish and a factor by how much to squish the data. I haven't tested it exhaustively, but I think it works correctly.
library(ggplot2)
library(scales)
squish_trans <- function(range, factor = 10) {
force(range)
force(factor)
forward <- function(x) {
test_between <- x > range[1] & x < range[2]
test_over <- x >= range[2]
between <- ((x - range[1]) / factor) + range[1]
over <- (x - range[2] + diff(range) / factor) + range[1]
ifelse(test_over, over,
ifelse(test_between, between, x))
}
reverse <- function(x) {
test_between <- x > range[1] & x < range[1] + diff(range) / factor
test_over <- x >= range[1] + diff(range) / factor
between <- ((x - range[1]) * factor) + range[1]
over <- (x - range[1]) - diff(range) / factor + range[2]
ifelse(test_over, over,
ifelse(test_between, between, x))
}
trans_new(
"squish_trans",
transform = forward,
inverse = reverse
)
}
Now we simply run the function factory as trans argument with the range you want to squish. You can notice that the 0.2-1 range (80% of data range) is now 0.08/0.28 ~= 0.28 (~28%) of the axis range because we squish with a factor 10.
dat <- data.frame(x = runif(100), y = runif(100))
ggplot(dat, aes(x,y)) + geom_point() +
scale_x_continuous(breaks=c(0,0.1,0.2,0.4,0.8,1),
trans = squish_trans(c(0.2, Inf))) +
scale_y_continuous(breaks=c(0,0.1,0.2,0.4,0.8,1),
trans = squish_trans(c(0.2, Inf)))
Created on 2021-02-05 by the reprex package (v1.0.0)
dat <- data.frame(x = runif(100), y = runif(100))
ggplot(dat, aes(x,y)) +
geom_point() +
scale_x_continuous(breaks=c(0,0.1,0.2,0.4,0.8,1)) +
scale_y_continuous(breaks=c(0,0.1,0.2,0.4,0.8,1))
dat$condx <- ifelse(dat$x > 0.2, "x2", "x1")
dat$condy <- ifelse(dat$y > 0.2, "y1", "y2")
dat$condxy <- paste(dat$condx, dat$condy)
ggplot(dat, aes(x, y, group=condxy)) +
geom_point() +
scale_x_continuous(breaks=c(0,0.05,0.1,0.15,0.2,0.4,0.6,0.8,1)) +
scale_y_continuous(breaks=c(0,0.05,0.1,0.15,0.2,0.4,0.6,0.8,1)) +
facet_grid(condy~condx, scales="free")
(Related to Two scales in the same axis)
Regards,
I have a longitudinal data which I pass to ggplot with facet_wrap and a subsetting of the dataframe. I wish to 'function'-alize this and I am running into trouble. I have seen similar posts to this, but not one with facet_wrap and data subsetting inside the function. For example, I have used information from this post in the past to do simple graphs. Below I show the section of code for generating dummy data and then plotting the graphic. That works OK. It's when I try to use the function call that I get the error message:
Your help is greatly appreciated. Thank you!!
Error: Faceting variables must have at least one value
# Test
# Generate data
DF <- expand.grid(Time=(0:10), variable=as.factor(c("ux0", "ux1", "ux2")), model=as.factor(c("Model 1", "Model 2", "Model 3")))
DF$value <- DF$Time + rnorm(nrow(DF), 0, 20)
# load libraries
library(ggplot2)
library(scales)
# Define themes
My_Theme = theme_bw()+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
aspect.ratio = 1,
axis.title=element_text(size=7),
axis.text.x=element_text(size=rel(0.6)),
axis.text.y=element_text(size=rel(0.6)),
strip.text = element_text(size = 6))
#Plot
my.plot =
ggplot(subset(DF, variable %in% "ux1")) +
geom_line(aes(x=Time, y=value)) +
facet_wrap( ~ model, ncol=3) +
labs(x = "Time [s]", y = expression(paste("U"[X],","[1]))) +
My_Theme
print(my.plot)
#Now try with function
makePlots <- function(data, subSetVar, subSetval, xVar, yVar, facetVar,
xLabel, yLabel){
# Common Theme to all plots
My_Theme = theme_bw()+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
aspect.ratio = 1,
axis.title=element_text(size=7),
axis.text.x=element_text(size=rel(0.6)),
axis.text.y=element_text(size=rel(0.6)),
strip.text = element_text(size = 6))
my.plot =
ggplot(subset(data, subSetVar %in% subSetval)) +
geom_line(aes(x=xVar, y=yVar)) +
facet_wrap(facetVar, ncol=3) +
labs(x = xLabel, y = yLabel) +
My_Theme
# Output to Plots window in RStudio
print(my.plot)
}
my.plot <- makePlots(DF, "variable", "ux1", "Time", "value", "model",
"Time [s]", expression(paste("U"[X],","[1])))
'''
In order to pass character strings as variable into ggplot, you need to make some changes in my.plot part before wrapping it into a function.
For subset the dataset, you need to pass the names of the column with [[ ]] in order to make it work. For the definition of x and y, you can use aes_string (https://ggplot2.tidyverse.org/reference/aes_.html). Finally, for facetting, pass your character vector as a formula (as explained in this post: Passing string variable facet_wrap() in ggplot using R).
my.plot =
ggplot(subset(data, data[[subSetVar]] %in% subSetval)) +
geom_line(aes_string(x=xVar, y=yVar)) +
facet_wrap(as.formula(paste("~", facetVar)), ncol=3) +
labs(x = xLabel, y = yLabel) +
My_Theme
Then, it should work and get you the same graph :
my.plot <- makePlots(DF, "variable", "ux1", "Time", "value", "model",
"Time [s]", expression(paste("U"[X],","[1])))
Does it answer your question ?
Following up on this question and for the sake of completeness, I modified the accepted answer and customized the resulting plot, but I am still facing some important problems.
To sum up, I am doing boxplots reflecting significance of Kruskal-Wallis and pairwise Wilcoxon test comparisons.
I want to replace the p-value numbers with asterisks, and show only the significant comparisons, reducing vertical spacing to the max.
Basically I want to do this, but with the added problem of facets, that messes everything up.
So far I have worked on a very decent MWE, but it still shows problems...
library(reshape2)
library(ggplot2)
library(gridExtra)
library(tidyverse)
library(data.table)
library(ggsignif)
library(RColorBrewer)
data(iris)
iris$treatment <- rep(c("A","B"), length(iris$Species)/2)
mydf <- melt(iris, measure.vars=names(iris)[1:4])
mydf$treatment <- as.factor(mydf$treatment)
mydf$variable <- factor(mydf$variable, levels=sort(levels(mydf$variable)))
mydf$both <- factor(paste(mydf$treatment, mydf$variable), levels=(unique(paste(mydf$treatment, mydf$variable))))
# Change data to reduce number of statistically significant differences
set.seed(2)
mydf <- mydf %>% mutate(value=rnorm(nrow(mydf)))
##
##FIRST TEST BOTH
#Kruskal-Wallis
addkw <- as.data.frame(mydf %>% group_by(Species) %>%
summarize(p.value = kruskal.test(value ~ both)$p.value))
#addkw$p.adjust <- p.adjust(addkw$p.value, "BH")
a <- combn(levels(mydf$both), 2, simplify = FALSE)
#new p.values
pv.final <- data.frame()
for (gr in unique(mydf$Species)){
for (i in 1:length(a)){
tis <- a[[i]] #variable pair to test
as <- subset(mydf, Species==gr & both %in% tis)
pv <- wilcox.test(value ~ both, data=as)$p.value
ddd <- data.table(as)
asm <- as.data.frame(ddd[, list(value=mean(value)), by=list(both=both)])
asm2 <- dcast(asm, .~both, value.var="value")[,-1]
pf <- data.frame(group1=paste(tis[1], gr), group2=paste(tis[2], gr), mean.group1=asm2[,1], mean.group2=asm2[,2], FC.1over2=asm2[,1]/asm2[,2], p.value=pv)
pv.final <- rbind(pv.final, pf)
}
}
#pv.final$p.adjust <- p.adjust(pv.final$p.value, method="BH")
pv.final$map.signif <- ifelse(pv.final$p.value > 0.05, "", ifelse(pv.final$p.value > 0.01,"*", "**"))
cols <- colorRampPalette(brewer.pal(length(unique(mydf$Species)), "Set1"))
myPal <- cols(length(unique(mydf$Species)))
#Function to get a list of plots to use as "facets" with grid.arrange
plot.list=function(mydf, pv.final, addkw, a, myPal){
mylist <- list()
i <- 0
for (sp in unique(mydf$Species)){
i <- i+1
mydf0 <- subset(mydf, Species==sp)
addkw0 <- subset(addkw, Species==sp)
pv.final0 <- pv.final[grep(sp, pv.final$group1), ]
num.signif <- sum(pv.final0$p.value <= 0.05)
P <- ggplot(mydf0,aes(x=both, y=value)) +
geom_boxplot(aes(fill=Species)) +
stat_summary(fun.y=mean, geom="point", shape=5, size=4) +
facet_grid(~Species, scales="free", space="free_x") +
scale_fill_manual(values=myPal[i]) + #WHY IS COLOR IGNORED?
geom_text(data=addkw0, hjust=0, size=4.5, aes(x=0, y=round(max(mydf0$value, na.rm=TRUE)+0.5), label=paste0("KW p=",p.value))) +
geom_signif(test="wilcox.test", comparisons = a[which(pv.final0$p.value<=0.05)],#I can use "a"here
map_signif_level = F,
vjust=0,
textsize=4,
size=0.5,
step_increase = 0.05)
if (i==1){
P <- P + theme(legend.position="none",
axis.text.x=element_text(size=20, angle=90, hjust=1),
axis.text.y=element_text(size=20),
axis.title=element_blank(),
strip.text.x=element_text(size=20,face="bold"),
strip.text.y=element_text(size=20,face="bold"))
} else{
P <- P + theme(legend.position="none",
axis.text.x=element_text(size=20, angle=90, hjust=1),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.title=element_blank(),
strip.text.x=element_text(size=20,face="bold"),
strip.text.y=element_text(size=20,face="bold"))
}
#WHY USING THE CODE BELOW TO CHANGE NUMBERS TO ASTERISKS I GET ERRORS?
#P2 <- ggplot_build(P)
#P2$data[[3]]$annotation <- rep(subset(pv.final0, p.value<=0.05)$map.signif, each=3)
#P <- plot(ggplot_gtable(P2))
mylist[[sp]] <- list(num.signif, P)
}
return(mylist)
}
p.list <- plot.list(mydf, pv.final, addkw, a, myPal)
y.rng <- range(mydf$value)
# Get the highest number of significant p-values across all three "facets"
height.factor <- 0.3
max.signif <- max(sapply(p.list, function(x) x[[1]]))
# Lay out the three plots as facets (one for each Species), but adjust so that y-range is same for each facet. Top of y-range is adjusted using max_signif.
png(filename="test.png", height=800, width=1200)
grid.arrange(grobs=lapply(p.list, function(x) x[[2]] +
scale_y_continuous(limits=c(y.rng[1], y.rng[2] + height.factor*max.signif))),
ncol=length(unique(mydf$Species)), top="Random title", left="Value") #HOW TO CHANGE THE SIZE OF THE TITLE AND THE Y AXIS TEXT?
#HOW TO ADD A COMMON LEGEND?
dev.off()
It produces the following plot:
As you can see there are some problems, most obviously:
1- Coloring does not work for some reason
2- I do not seem to be able to change the annotation with the asterisks
I want something more like this (mockup):
So we need to:
1- Make coloring work
2- Show asterisks instead of numbers
...and for the win:
3- Make a common legend
4- Place Kruskal-Wallis line on top
5- Change the size (and alignment) of the title and y axis text
IMPORTANT NOTES
I would appreciate my code is left as intact as possible even if it isn't the prettiest, cause I still have to make use of intermediate objects like "CNb" or "pv.final".
The solution should be easily transferable to other cases; please consider testing "variable" alone, instead of "both"... In this case we have 6 "facets" (vertically and horizontally) and everything gets even more screwed up...
I made this other MWE:
##NOW TEST MEASURE, TO GET VERTICAL AND HORIZONTAL FACETS
addkw <- as.data.frame(mydf %>% group_by(treatment, Species) %>%
summarize(p.value = kruskal.test(value ~ variable)$p.value))
#addkw$p.adjust <- p.adjust(addkw$p.value, "BH")
a <- combn(levels(mydf$variable), 2, simplify = FALSE)
#new p.values
pv.final <- data.frame()
for (tr in levels(mydf$treatment)){
for (gr in levels(mydf$Species)){
for (i in 1:length(a)){
tis <- a[[i]] #variable pair to test
as <- subset(mydf, treatment==tr & Species==gr & variable %in% tis)
pv <- wilcox.test(value ~ variable, data=as)$p.value
ddd <- data.table(as)
asm <- as.data.frame(ddd[, list(value=mean(value, na.rm=T)), by=list(variable=variable)])
asm2 <- dcast(asm, .~variable, value.var="value")[,-1]
pf <- data.frame(group1=paste(tis[1], gr, tr), group2=paste(tis[2], gr, tr), mean.group1=asm2[,1], mean.group2=asm2[,2], FC.1over2=asm2[,1]/asm2[,2], p.value=pv)
pv.final <- rbind(pv.final, pf)
}
}
}
#pv.final$p.adjust <- p.adjust(pv.final$p.value, method="BH")
# set signif level
pv.final$map.signif <- ifelse(pv.final$p.value > 0.05, "", ifelse(pv.final$p.value > 0.01,"*", "**"))
plot.list2=function(mydf, pv.final, addkw, a, myPal){
mylist <- list()
i <- 0
for (sp in unique(mydf$Species)){
for (tr in unique(mydf$treatment)){
i <- i+1
mydf0 <- subset(mydf, Species==sp & treatment==tr)
addkw0 <- subset(addkw, Species==sp & treatment==tr)
pv.final0 <- pv.final[grep(paste(sp,tr), pv.final$group1), ]
num.signif <- sum(pv.final0$p.value <= 0.05)
P <- ggplot(mydf0,aes(x=variable, y=value)) +
geom_boxplot(aes(fill=Species)) +
stat_summary(fun.y=mean, geom="point", shape=5, size=4) +
facet_grid(treatment~Species, scales="free", space="free_x") +
scale_fill_manual(values=myPal[i]) + #WHY IS COLOR IGNORED?
geom_text(data=addkw0, hjust=0, size=4.5, aes(x=0, y=round(max(mydf0$value, na.rm=TRUE)+0.5), label=paste0("KW p=",p.value))) +
geom_signif(test="wilcox.test", comparisons = a[which(pv.final0$p.value<=0.05)],#I can use "a"here
map_signif_level = F,
vjust=0,
textsize=4,
size=0.5,
step_increase = 0.05)
if (i==1){
P <- P + theme(legend.position="none",
axis.text.x=element_blank(),
axis.text.y=element_text(size=20),
axis.title=element_blank(),
axis.ticks.x=element_blank(),
strip.text.x=element_text(size=20,face="bold"),
strip.text.y=element_text(size=20,face="bold"))
}
if (i==4){
P <- P + theme(legend.position="none",
axis.text.x=element_text(size=20, angle=90, hjust=1),
axis.text.y=element_text(size=20),
axis.title=element_blank(),
strip.text.x=element_text(size=20,face="bold"),
strip.text.y=element_text(size=20,face="bold"))
}
if ((i==2)|(i==3)){
P <- P + theme(legend.position="none",
axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.title=element_blank(),
axis.ticks.x=element_blank(),
axis.ticks.y=element_blank(),
strip.text.x=element_text(size=20,face="bold"),
strip.text.y=element_text(size=20,face="bold"))
}
if ((i==5)|(i==6)){
P <- P + theme(legend.position="none",
axis.text.x=element_text(size=20, angle=90, hjust=1),
axis.text.y=element_blank(),
#axis.ticks.y=element_blank(), #WHY SPECIFYING THIS GIVES ERROR?
axis.title=element_blank(),
axis.ticks.y=element_blank(),
strip.text.x=element_text(size=20,face="bold"),
strip.text.y=element_text(size=20,face="bold"))
}
#WHY USING THE CODE BELOW TO CHANGE NUMBERS TO ASTERISKS I GET ERRORS?
#P2 <- ggplot_build(P)
#P2$data[[3]]$annotation <- rep(subset(pv.final0, p.value<=0.05)$map.signif, each=3)
#P <- plot(ggplot_gtable(P2))
sptr <- paste(sp,tr)
mylist[[sptr]] <- list(num.signif, P)
}
}
return(mylist)
}
p.list2 <- plot.list2(mydf, pv.final, addkw, a, myPal)
y.rng <- range(mydf$value)
# Get the highest number of significant p-values across all three "facets"
height.factor <- 0.5
max.signif <- max(sapply(p.list2, function(x) x[[1]]))
# Lay out the three plots as facets (one for each Species), but adjust so that y-range is same for each facet. Top of y-range is adjusted using max_signif.
png(filename="test2.png", height=800, width=1200)
grid.arrange(grobs=lapply(p.list2, function(x) x[[2]] +
scale_y_continuous(limits=c(y.rng[1], y.rng[2] + height.factor*max.signif))),
ncol=length(unique(mydf$Species)), top="Random title", left="Value") #HOW TO CHANGE THE SIZE OF THE TITLE AND THE Y AXIS TEXT?
#HOW TO ADD A COMMON LEGEND?
dev.off()
That produces the following plot:
Now the color problem becomes more striking, the facet heights are uneven, and something should be done with the redundant facet strip texts too.
I am stuck at this point, so would appreciate any help. Sorry for the long question, but I think it is almost there! Thanks!!
You can try following. As your code is really busy and for me too complicated to understand, I suggest a different approach. I tried to avoid loops and to use the tidyverse as much as possible. Thus, first I created your data. Then calculated kruskal wallis tests as this was not possible within ggsignif. Afterwards I will plot all p.values using geom_signif. Finally, insignificant ones will be removed and a step increase is added.
1- Make coloring work done
2- Show asterisks instead of numbers done
...and for the win:
3- Make a common legend done
4- Place Kruskal-Wallis line on top done, I placed the values at the bottom
5- Change the size (and alignment) of the title and y axis text done
library(tidyverse)
library(ggsignif)
# 1. your data
set.seed(2)
df <- as.tbl(iris) %>%
mutate(treatment=rep(c("A","B"), length(iris$Species)/2)) %>%
gather(key, value, -Species, -treatment) %>%
mutate(value=rnorm(n())) %>%
mutate(key=factor(key, levels=unique(key))) %>%
mutate(both=interaction(treatment, key, sep = " "))
# 2. Kruskal test
KW <- df %>%
group_by(Species) %>%
summarise(p=round(kruskal.test(value ~ both)$p.value,2),
y=min(value),
x=1) %>%
mutate(y=min(y))
# 3. Plot
P <- df %>%
ggplot(aes(x=both, y=value)) +
geom_boxplot(aes(fill=Species)) +
facet_grid(~Species) +
ylim(-3,7)+
theme(axis.text.x = element_text(angle=45, hjust=1)) +
geom_signif(comparisons = combn(levels(df$both),2,simplify = F),
map_signif_level = T) +
stat_summary(fun.y=mean, geom="point", shape=5, size=4) +
xlab("") +
geom_text(data=KW,aes(x, y=y, label=paste0("KW p=",p)),hjust=0) +
ggtitle("Plot") + ylab("This is my own y-lab")
# 4. remove not significant values and add step increase
P_new <- ggplot_build(P)
P_new$data[[2]] <- P_new$data[[2]] %>%
filter(annotation != "NS.") %>%
group_by(PANEL) %>%
mutate(index=(as.numeric(group[drop=T])-1)*0.5) %>%
mutate(y=y+index,
yend=yend+index) %>%
select(-index) %>%
as.data.frame()
# the final plot
plot(ggplot_gtable(P_new))
and similar approach using two facets
# --------------------
# 5. Kruskal
KW <- df %>%
group_by(Species, treatment) %>%
summarise(p=round(kruskal.test(value ~ both)$p.value,2),
y=min(value),
x=1) %>%
ungroup() %>%
mutate(y=min(y))
# 6. Plot with two facets
P <- df %>%
ggplot(aes(x=key, y=value)) +
geom_boxplot(aes(fill=Species)) +
facet_grid(treatment~Species) +
ylim(-5,7)+
theme(axis.text.x = element_text(angle=45, hjust=1)) +
geom_signif(comparisons = combn(levels(df$key),2,simplify = F),
map_signif_level = T) +
stat_summary(fun.y=mean, geom="point", shape=5, size=4) +
xlab("") +
geom_text(data=KW,aes(x, y=y, label=paste0("KW p=",p)),hjust=0) +
ggtitle("Plot") + ylab("This is my own y-lab")
# 7. remove not significant values and add step increase
P_new <- ggplot_build(P)
P_new$data[[2]] <- P_new$data[[2]] %>%
filter(annotation != "NS.") %>%
group_by(PANEL) %>%
mutate(index=(as.numeric(group[drop=T])-1)*0.5) %>%
mutate(y=y+index,
yend=yend+index) %>%
select(-index) %>%
as.data.frame()
# the final plot
plot(ggplot_gtable(P_new))
Edit.
Regarding to your p.adjust needs, you can set up a function on your own and calling it directly within geom_signif().
wilcox.test.BH.adjusted <- function(x,y,n){
tmp <- wilcox.test(x,y)
tmp$p.value <- p.adjust(tmp$p.value, n = n,method = "BH")
tmp
}
geom_signif(comparisons = combn(levels(df$both),2,simplify = F),
map_signif_level = T, test = "wilcox.test.BH.adjusted",
test.args = list(n=8))
The challenge is to know how many independet tests you will have in the end. Then you can set the n by your own. Here I used 8. But this is maybe wrong.
Constructing ggplots in a loop has always been known to produce confusing results, and for the explanation of point 1 I'll refer to this question and many others. There's also a hint there about evaluating the ggplot object on the spot, e.g. via print.
Re point 2, you were close, a bit of debugging with trial and error helped. Here's the complete code for plot.list:
plot.list=function(mydf, pv.final, addkw, a, myPal){
mylist <- list()
i <- 0
for (sp in unique(mydf$Species)){
i <- i+1
mydf0 <- subset(mydf, Species==sp)
addkw0 <- subset(addkw, Species==sp)
pv.final0 <- pv.final[grep(sp, pv.final$group1), ]
num.signif <- sum(pv.final0$p.value <= 0.05)
P <- ggplot(mydf0,aes(x=both, y=value)) +
geom_boxplot(aes(fill=Species)) +
stat_summary(fun.y=mean, geom="point", shape=5, size=4) +
facet_grid(~Species, scales="free", space="free_x") +
scale_fill_manual(values=myPal[i]) +
geom_text(data=addkw0, hjust=0, size=4.5, aes(x=0, y=round(max(mydf0$value, na.rm=TRUE)+0.5), label=paste0("KW p=",p.value))) +
geom_signif(test="wilcox.test", comparisons = a[which(pv.final0$p.value<=0.05)],#I can use "a"here
map_signif_level = F,
vjust=0,
textsize=4,
size=0.5,
step_increase = 0.05)
if (i==1){
P <- P + theme(legend.position="none",
axis.text.x=element_text(size=20, angle=90, hjust=1),
axis.text.y=element_text(size=20),
axis.title=element_blank(),
strip.text.x=element_text(size=20,face="bold"),
strip.text.y=element_text(size=20,face="bold"))
} else{
P <- P + theme(legend.position="none",
axis.text.x=element_text(size=20, angle=90, hjust=1),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.title=element_blank(),
strip.text.x=element_text(size=20,face="bold"),
strip.text.y=element_text(size=20,face="bold"))
}
P2 <- ggplot_build(P)
P2$data[[4]]$annotation <- rep(subset(pv.final0, p.value<=0.05)$map.signif, each=3)
P <- ggplot_gtable(P2)
mylist[[sp]] <- list(num.signif, P)
}
return(mylist)
}
Note that we can no longer modify the plot via ggplot semantics, since we already applied ggplot_build/ggplot_gtable, so scale modification is no longer possible. If you want to preserve it, move it inside the plot.list function. So, changing to
grid.arrange(grobs=lapply(p.list, function(x) x[[2]]),
ncol=length(unique(mydf$Species)), top="Random title", left="Value")
yields
That's not a complete solution, of course, but I hope that helps.
I would like to re-order the elements in a legend, as they appear top to bottom in an R ggplot. That is: I'd like the order dictated by comparing the Y value at the right most point X axis point. In the following data, I'd like the legend to read from the top: bush, foo, baz, bar.
Update: following #alexwhan comments, I have added the data to the script.
Update 2: this is now exactly what I was hoping for, thanks to #thomas-kern on #R (bosie) irc.freenode. The trick was to add both, i.e.
scale_linetype_discrete(breaks = ord$Variant) + scale_shape_discrete(breaks = ord$Variant)
Here's my R:
library(plyr)
library(ggplot2)
require(grid)
args <- commandArgs(trailingOnly = TRUE)
lines <- "
X,Variant,Y
1,foo,123
1,bar,134
1,baz,135
1,bush,136
2,foo,221
2,bar,104
2,baz,155
2,bush,336
"
con <- textConnection(lines)
DF <- read.csv(con, header=TRUE)
close(con)
cdata <- ddply(DF, .(Variant,X), summarise, N = length(Y), mean=round(mean(Y),2), sd=round(sd(Y),2), se=round(sd(Y)/sqrt(length(Y)),2))
ord <- cdata[cdata$X == max(cdata$X),]
ord <- ord[order(ord$Variant, decreasing=T),]
pdf("out.pdf")
none <- element_blank()
bp <- ggplot(cdata, aes(x=X, y=mean, group=Variant)) + xlab("X label") + geom_line(aes(linetype=Variant)) + geom_point(aes(shape=Variant)) + ylab("Y Value") + labs(title = "mytitle") + scale_linetype_discrete(breaks = ord$Variant) + scale_shape_discrete(breaks = ord$Variant)
print(bp + theme(legend.justification=c(1,0), legend.position=c(1,0), legend.key.width=unit(3,"line"), legend.title=element_blank(), text = element_text(size=18)) + theme(panel.background = element_rect(fill='white', colour='black')) + theme(panel.grid.major = none, panel.grid.minor = none))
dev.off()
This generates exactly what I'm after:
It really helps if you provide the data your plot is made with. Here's an example of how to approach with some data I made up:
dat <- data.frame(x = c(1,2), y = rnorm(8), group = rep(c("bar", "baz", "bush", "foo"), each = 2))
ord <- dat[dat$x == max(dat$x),]
ord <- ord[order(ord$y, decreasing=T),]
ggplot(dat, aes(x, y)) + geom_point(aes(shape = group)) + geom_line(aes(group = group)) +
scale_shape_discrete(breaks = ord$group)
I am building charts that have two lines in the axis text. The first line contains the group name, the second line contains that group population. I build my axis labels as a single character string with the format "LINE1 \n LINE2". Is it possible to assign different font faces and sizes to LINE1 and LINE2, even though they are contained within a single character string? I would like LINE1 to be large and bolded, and LINE2 to be small and unbolded.
Here's some sample code:
Treatment <- rep(c('T','C'),each=2)
Gender <- rep(c('Male','Female'),2)
Response <- sample(1:100,4)
test_df <- data.frame(Treatment, Gender, Response)
xbreaks <- levels(test_df$Gender)
xlabels <- paste(xbreaks,'\n',c('POP1','POP2'))
hist <- ggplot(test_df, aes(x=Gender, y=Response, fill=Treatment, stat="identity"))
hist + geom_bar(position = "dodge") + scale_y_continuous(limits = c(0,
100), name = "") + scale_x_discrete(labels=xlabels, breaks = xbreaks) +
opts(
axis.text.x = theme_text(face='bold',size=12)
)
I tried this, but the result was one large, bolded entry, and one small, unbolded entry:
hist + geom_bar(position = "dodge") + scale_y_continuous(limits = c(0,
100), name = "") + scale_x_discrete(labels=xlabels, breaks = xbreaks) +
opts(
axis.text.x = theme_text(face=c('bold','plain'),size=c('15','10'))
)
Another possible solution is to create separate chart elements, but I don't think that ggplot2 has a 'sub-axis label' element available...
Any help would be very much appreciated.
Cheers,
Aaron
I also think that I could not to make the graph by only using ggplot2 features.
I would use grid.text and grid.gedit.
require(ggplot2)
Treatment <- rep(c('T','C'), each=2)
Gender <- rep(c('Male','Female'), 2)
Response <- sample(1:100, 4)
test_df <- data.frame(Treatment, Gender, Response)
xbreaks <- levels(test_df$Gender)
xlabels <- paste(xbreaks,'\n',c('',''))
hist <- ggplot(test_df, aes(x=Gender, y=Response, fill=Treatment,
stat="identity"))
hist + geom_bar(position = "dodge") +
scale_y_continuous(limits = c(0, 100), name = "") +
scale_x_discrete(labels=xlabels, breaks = xbreaks) +
opts(axis.text.x = theme_text(face='bold', size=12))
grid.text(label="POP1", x = 0.29, y = 0.06)
grid.text(label="POP2", x = 0.645, y = 0.06)
grid.gedit("GRID.text", gp=gpar(fontsize=8))
Please try to tune a code upon according to your environment (e.g. the position of sub-axis labels and the fontsize).
I found another simple solution below:
require(ggplot2)
Treatment <- rep(c('T','C'),each=2)
Gender <- rep(c('Male','Female'),2)
Response <- sample(1:100,4)
test_df <- data.frame(Treatment, Gender, Response)
xbreaks <- levels(test_df$Gender)
xlabels[1] <- expression(atop(bold(Female), scriptstyle("POP1")))
xlabels[2] <- expression(atop(bold(Male), scriptstyle("POP2")))
hist <- ggplot(test_df, aes(x=Gender, y=Response, fill=Treatment,
stat="identity"))
hist +
geom_bar(position = "dodge") +
scale_y_continuous(limits = c(0, 100), name = "") +
scale_x_discrete(label = xlabels, breaks = xbreaks) +
opts(
axis.text.x = theme_text(size = 12)
)
All,
Using Triad's cheat this is the closest I was able to get to solution on this one. Let me know if you have any questions:
library(ggplot2)
spacing <- 0 #We can adjust how much blank space we have beneath the chart here
labels1= paste('Group',c('A','B','C','D'))
labels2 = rep(paste(rep('\n',spacing),collapse=''),length(labels1))
labels <- paste(labels1,labels2)
qplot(1:4,1:4, geom="blank") +
scale_x_continuous(breaks=1:length(labels), labels=labels) + xlab("")+
opts(plot.margin = unit(c(1, 1, 3, 0.5), "lines"),
axis.text.x = theme_text(face='bold', size=14))
xseq <- seq(0.15,0.9,length.out=length(labels)) #Assume for now that 0.15 and 0.9 are constant plot boundaries
sample_df <- data.frame(group=rep(labels1,each=2),subgroup=rep(c('a','b'),4),pop=sample(1:10,8))
popLabs <- by(sample_df,sample_df$group,function(subData){
paste(paste(subData$subgroup,' [n = ', subData$pop,']',sep=''),collapse='\n')
})
gridText <- paste("grid.text(label='\n",popLabs,"',x=",xseq,',y=0.1)',sep='')
sapply(gridText, function(x){ #Evaluate parsed character string for each element of gridText
eval(parse(text=x))
})
grid.gedit("GRID.text", gp=gpar(fontsize=12))
Cheers,
Aaron