Modify x-axis labels in each facet - r

I have this chart - I would like to add to each label the text N=xx to denote the number of observations. I know how to do this and I have done that on charts with no facets.
When I tried that on the faceted chart it did not work, (I got the same N on the open tick on all 3 charts, the same N on the Restricted, etc.)
I hope someone can point the way to a solution, how do I control the elements on a given facet?
library(ggplot2)
library(scales)
stat_sum_single <- function(fun, geom="point", ...) {
stat_summary(fun.y=fun, fill="red", geom=geom, size = 5, shape=24)
}
set.seed(1)
data1 <- data.frame(Physicians_In=sample(1:3,100,replace=T),Physicians_Out=sample(1:3,100,replace=T),share=runif(100,0,1))
data1$Physicians_In <- factor(data1$Physicians_In,levels=c(1,2,3),labels=c("Open","Restricted","Closed"))
data1$Physicians_Out <- factor(data1$Physicians_Out,levels=c(1,2,3),labels=c("Open","Restricted","Closed"))
access_ch3 <- ggplot(data1,aes(x=Physicians_In,y=share,fill=Physicians_In))+geom_boxplot()+stat_sum_single(mean)
access_ch3 <- access_ch3 +geom_jitter(position = position_jitter(width = .2),color="blue")+theme_bw()
access_ch3 <- access_ch3 + theme(legend.position="none") +scale_y_continuous("Gammagard Share",labels=percent)
gpo_labs5 <- paste(gsub("/","-\n",names(table(data1$Physicians_Out)),fixed=T),"\n(N=",table(data1$Physicians_Out),")",sep="")
access_ch3 <- access_ch3 + scale_x_discrete("Physician Access (In Hospital)",labels=gpo_labs5)
access_ch3 <- access_ch3 +facet_grid(.~Physicians_Out,labeller=label_both)
access_ch3
I tried creating the 9 labels and passing that vector to the scale_x_discrete element, that just recycled the first 3, so it also did not solve the issue.

With the same data I followed a four step approach.
First: subsetting the data
open <- subset(data1, Physicians_Out == "Open")
restr <- subset(data1, Physicians_Out == "Restricted")
closed <- subset(data1, Physicians_Out == "Closed")
Second: creating the labels for the different subsets
labs.open <- paste(gsub("/","-\n",names(table(open$Physicians_In)),fixed=T),
"\n(N=",table(open$Physicians_In),")",sep="")
labs.restr <- paste(gsub("/","-\n",names(table(restr$Physicians_In)),fixed=T),
"\n(N=",table(restr$Physicians_In),")",sep="")
labs.closed <- paste(gsub("/","-\n",names(table(closed$Physicians_In)),fixed=T),
"\n(N=",table(closed$Physicians_In),")",sep="")
Third: creating a theme for removing the y-axis labels & text for the 2nd & 3rd sub-graphs
mytheme <- theme(
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)
Finally: creating the graph
p1 <- ggplot(open,aes(x=Physicians_In,y=share,fill=Physicians_In)) +
geom_boxplot() + stat_sum_single(mean) +
geom_jitter(position = position_jitter(width = .2),color="blue") +
guides(fill=FALSE) +
ggtitle(paste("Physician Access (Out): Open\nN = (", nrow(open), ")\n")) +
scale_y_continuous("Gammagard Share",labels=percent) +
scale_x_discrete("\nPhysician Access (In Hospital)",labels=labs.open) +
theme_bw()
p2 <- ggplot(restr,aes(x=Physicians_In,y=share,fill=Physicians_In)) +
geom_boxplot() + stat_sum_single(mean) +
geom_jitter(position = position_jitter(width = .2),color="blue") +
guides(fill=FALSE) +
ggtitle(paste("Physician Access (Out): Restricted\nN = (", nrow(restr), ")\n")) +
scale_x_discrete("\nPhysician Access (In Hospital)",labels=labs.restr) +
theme_bw() + mytheme
p3 <- ggplot(closed,aes(x=Physicians_In,y=share,fill=Physicians_In)) +
geom_boxplot() + stat_sum_single(mean) +
geom_jitter(position = position_jitter(width = .2),color="blue") +
guides(fill=FALSE) +
ggtitle(paste("Physician Access (Out): Closed\nN = (", nrow(closed), ")\n")) +
scale_x_discrete("\nPhysician Access (In Hospital)",labels=labs.closed) +
theme_bw() + mytheme
library(gridExtra)
grid.arrange(p1, p2, p3, ncol=3)
Which gives the following result:

It is not exactly what you want to do , but I think this can be helpful ( at least a good start)
library(ggplot2)
library(plyr)
data1 <- ddply(data1,.(Physicians_Out,Physicians_In),transform,label = length(share))
ggplot(data1,aes(x=Physicians_In,y=share,fill=Physicians_In))+
geom_boxplot() +
stat_sum_single(mean) +
facet_grid(.~Physicians_Out,labeller=label_both,scales='free_x') +
stat_summary(fun.y=min,aes(label=paste0('N=',label)),geom='text',col='blue',cex=5)

Related

Variable geom_text is overwritten when plots saved in list [duplicate]

This question already has an answer here:
List for Multiple Plots from Loop (ggplot2) - List elements being overwritten
(1 answer)
Closed 2 years ago.
I am trying to organize several dozens of plots using ggarrange, so I have setup a loop in which I save each plot in a list. Each plot differs from each other with different data, title, etc. Everything works perfectly until I try to use geom_text to place some text inside the plot. When the plots are saved in the list, each plot inherits the geom_text from the last plot in the list. I don't know how to avoid this.
my.list=vector("list", length = 2);
dt=data.table(x=c(1,100,100000),y=c(1,100,100000))
plotname=c('first','second')
for (i in 1:length(my.list)) {
my.list[[i]]=ggplot(data = dt, aes(x = x, y = y )) + geom_point(size=1.5,aes(color=c('red'))) + labs(x=NULL, y=NULL)
+ scale_color_manual(values='red')
+ theme_bw() + theme(panel.background = element_rect(fill='light grey', colour='black'),legend.position = "none")
+ geom_text(inherit.aes=FALSE,aes(x=500, y=100000, label=paste0('NRMSE:',i))) + ggtitle(paste0(plotname[i])) + coord_equal()
+ geom_abline(slope=1)
+ scale_y_log10(breaks = c(1,10,100,1000,10000,100000),limits=c(1,100000))
+ scale_x_log10(breaks = c(1,10,100,1000,10000,1000000),limits=c(1,100000))
+ labs(x=NULL, y=NULL)
+ theme_bw() + theme(panel.background = element_rect(fill='light grey', colour='black'),legend.position = "none")
}
after this I do
plotosave=ggarrange(plotlist=my.list)
Using lapply instead of forloop works fine:
my.list <- lapply(1:2, function(i) {
ggplot(data = dt, aes(x = x, y = y )) +
geom_point(size=1.5) +
labs(x=NULL, y=NULL) +
theme_bw() +
theme(panel.background = element_rect(fill='light grey', colour='black'),
legend.position = "none") +
geom_text(inherit.aes=FALSE,aes(x=50000, y=100000,
label=paste0('NRMSE:',i))) +
ggtitle(paste0(plotname[i]))
})
ggarrange(plotlist = my.list)
Note: the issue is not with ggarrange.
Roland:
The plot is build when you print the ggplot object. Anything that is not part of the data passed will be taken from the enclosing environment at exactly that time point. If you use the iterator of a for loop in the plot, it has its last value then (or any value you change it to later on). lapply avoids the issue because of the stuff explained in the Note in its documentation.
Related post:
the problem is that ggplot() waits until you print the plot to resolve the variables in the aes() command.
I don't exactly know why this occurs but if you remove aes from geom_text it works.
library(ggplot2)
my.list = vector("list", length = 2)
dt = data.table::data.table(x=c(1,100,100000),y=c(1,100,100000))
plotname = c('first','second')
for (i in 1:length(my.list)) {
my.list[[i]]= ggplot(data = dt, aes(x = x, y = y )) +
geom_point(size=1.5) +
labs(x=NULL, y=NULL) +
theme_bw() +
theme(panel.background = element_rect(fill='light grey', colour='black'),
legend.position = "none") +
geom_text(x=50000, y=100000, label=paste0('NRMSE:',i)) +
ggtitle(paste0(plotname[i]))
}
plotosave = ggpubr::ggarrange(plotlist=my.list)

Circular tree with heatmap

This question is quite trivial but I cannot be handled nicely with.
I'm trying to plot a circular tree with a side heatmap.
I'm using ggtree but any approach ggplo2 based is welcome.
The problems that I'm not understanding well the gheatmap function.
I want:
1- names AFTER the heatmap
2- 2 text columns after heatmap (for while may have the same value, but I need to know how to add it )
3- heatmap columns name nicely handled, should we remove the columns name and use different colors scales for each? wherever the solution falls might better than the way it is now
library(tidyverse)
library(ggtree)
library(treeio)
library(tidytree)
beast_file <- system.file("examples/MCC_FluA_H3.tree", package="ggtree")
beast_tree <- read.beast(beast_file)
genotype_file <- system.file("examples/Genotype.txt", package="ggtree")
genotype <- read.table(genotype_file, sep="\t", stringsAsFactor=F)
colnames(genotype) <- sub("\\.$", "", colnames(genotype))
p <- ggtree(beast_tree, mrsd="2013-01-01",layout = "fan", open.angle = -270) +
geom_treescale(x=2008, y=1, offset=2) +
geom_tiplab(size=2)
gheatmap(p, genotype, offset=5, width=0.5, font.size=3,
colnames_angle=-45, hjust=0) +
scale_fill_manual(breaks=c("HuH3N2", "pdm", "trig"),
values=c("steelblue", "firebrick", "darkgreen"), name="genotype")
Thanks in advance
UPDATE:
I found a better way to plot the name of heatmap columns.
Also, I found that the simplification of the data was useful to
clean up a little the tip labels.
Now, I just need to add two text columns after heatmap.
p <- ggtree(beast_tree)
gheatmap(
p, genotype, colnames=TRUE,
colnames_angle=90,
colnames_offset_y = 5,
colnames_position = "top",
) +
scale_fill_manual(breaks=c("HuH3N2", "pdm", "trig"),
values=c("steelblue", "firebrick", "darkgreen"), name="genotype")
UPDATE 2:
A very bad improvement
I just used ggplot to create the label and merge with patchwork
library(patchwork)
p$data %>%
ggplot(aes(1, y= y, label = label )) +
geom_text(size=2) +
xlim(NA, 1) +
theme_classic() +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()) -> adText
pp + adText
The answer according #xiangpin at GitHub.
Big offset value to geom_tiplabel:
p <- ggtree(beast_tree)
p1 <- gheatmap(
p, genotype, colnames=TRUE,
colnames_angle=-45,
colnames_offset_y = 5,
colnames_position = "bottom",
width=0.3,
hjust=0, font.size=2) +
scale_fill_manual(breaks=c("HuH3N2", "pdm", "trig"),
values=c("steelblue", "firebrick", "darkgreen"), name="genotype") +
geom_tiplab(align = TRUE, linesize=0, offset = 7, size=2) +
xlim_tree(xlim=c(0, 36)) +
scale_y_continuous(limits = c(-1, NA))
p1
Using ggtreeExtra:
library(ggtreeExtra)
library(ggtree)
library(treeio)
library(ggplot2)
beast_file <- system.file("examples/MCC_FluA_H3.tree", package="ggtree")
genotype_file <- system.file("examples/Genotype.txt", package="ggtree")
tree <- read.beast(beast_file)
genotype <- read.table(genotype_file, sep="\t")
colnames(genotype) <- sub("\\.$", "", colnames(genotype))
genotype$ID <- row.names(genotype)
dat <- reshape2::melt(genotype, id.vars="ID", variable.name = "type", value.name="genotype", factorsAsStrings=FALSE)
dat$genotype <- unlist(lapply(as.vector(dat$genotype),function(x)ifelse(nchar(x)==0,NA,x)))
p <- ggtree(tree) + geom_treescale()
p2 <- p + geom_fruit(data=dat,
geom=geom_tile,
mapping=aes(y=ID, x=type, fill=genotype),
color="white") +
scale_fill_manual(values=c("steelblue", "firebrick", "darkgreen"),
na.translate=FALSE) +
geom_axis_text(angle=-45, hjust=0, size=1.5) +
geom_tiplab(align = TRUE, linesize=0, offset = 6, size=2) +
xlim_tree(xlim=c(0, 36)) +
scale_y_continuous(limits = c(-1, NA))
p2

ggplot2 - change line color depending on variable name

I would like to plot the values of several tables. Each of these tables have a different/unknown number of variables/columns.
I am using the following code in order to plot the data:
library(ggplot2)
library(reshape2)
#data <- read.table("jony.csv", header = TRUE, sep = ";", fill = TRUE)
data <- read.table(text="MONTH;GFDL.ESM2M_HBV_IWW_WFDisi;GFDL.ESM2M_SWIM;GFDL.ESM2M_WaterGAP3;GFDL.ESM2M_HYPE;GFDL.ESM2M_VIC;month_mean;q70
1;853.455161290323;550.116774193548;746.965913978495;469.31688172043;546.64752688172;633.300451612903;452.931661075269
2;1037.55011792453;632.34445754717;805.189285714286;567.411202830189;763.929245283019;761.284861859839;452.931661075269
3;782.714301075269;447.378494623656;561.674193548387;422.475483870968;591.257634408602;561.100021505376;452.931661075269
", header = TRUE, sep = ";", fill = TRUE)
jony <- melt(data, id.vars="MONTH")
p <- ggplot(jony, aes(MONTH,value, col=variable))
p + geom_line(size = 0.1) +
geom_hline(aes(yintercept = 0), linetype="dotted") +
ylab("Runoff [m3/s]") +
xlab("Month") +
theme_bw() +
theme(legend.key = element_blank())+
scale_color_discrete(name='Models GCM_HM') +
ggtitle("Jony")
So with this code, ggplot2 assign automatically a color for each of my variables. My problem is that I would like to assign manually a color just for the last two variables "month_mean" and "q70". I have tried different ways, but it seems that then I need to assign manually a color for each of my variables (which is not making sens in my case because I have way too many data to threat and the number of variables is not constant). Does anyone knows a workaround in order to assign manually a color for those two variables?
Maybe use some sort of a helper function, e.g.
p <- ggplot(jony, aes(MONTH,value, col=variable)) +
geom_line(size = 0.1) +
geom_hline(aes(yintercept = 0), linetype="dotted") +
ylab("Runoff [m3/s]") +
xlab("Month") +
theme_bw() +
theme(legend.key = element_blank())+
scale_color_discrete(name='Models GCM_HM') +
ggtitle("Jony")
f <- function(x,cols,pal=rainbow) {
stopifnot(names(cols) %in% x)
pal <- pal(length(x)-length(cols))
names(pal) <- setdiff(x, names(cols))
pal <- c(pal, cols)
return(pal)
}
p + scale_color_manual(
values = f(levels(jony$variable), c("month_mean"="black", "q70"="cyan"), grey.colors )
)
Probably room for improvement, but...

How to get plots in several pdf pages using ggplot2

I need help to process graphs into multiple pdf pages. Here is my current code:
file <- read.csv(file="file.csv")
library(ggplot2)
library(gridExtra)
library(plyr)
gg1 <- ggplot() +
geom_line(aes(x=TIME, y=var1, colour = "z1"), file) +
geom_line(aes(x=TIME, y=var2, colour = "z2"), file) +
geom_point(aes(x=TIME, y=var3), file) + facet_wrap( ~ ID, ncol=5)+
xlab("x") +
ylab("Y") +
ggtitle(" x ") + scale_colour_manual(name="Legend",
values=c(z1="red", z2 ="blue")) + theme(legend.position="bottom")
gg10 = do.call(marrangeGrob, c(gg1, list(nrow=4, ncol=4)))
ggsave("need10.pdf", gg10)
Here is the image created, without splitting my images
I wish to have a code to get my plots in a 4 by 4 layout in multiple pages. The last two lines of my code need adjustment and I do not know how to fix it myself.
The ggplus wrapper appears to do what you want. I changed a couple of things in the code block below from your original: facet_wrap is commented out, and file is moved to ggplot so that it doesn't have to be re-specified in each geom_*:
gg1 <- ggplot(file) +
geom_line(aes(x=TIME, y=var1, colour = "z1")) +
geom_line(aes(x=TIME, y=var2, colour = "z2")) +
geom_point(aes(x=TIME, y=var3)) +
# facet_wrap( ~ ID, ncol=5) +
xlab("x") +
ylab("Y") +
ggtitle(" x ") +
scale_colour_manual(name="Legend",
values=c(z1="red", z2 ="blue"),
labels=c("X","Y")) +
theme(legend.position="bottom")
devtools::install_github("guiastrennec/ggplus")
library(ggplus)
pdf("need10.pdf")
gg10 <- facet_multiple(plot=gg1, facets="ID", ncol = 4, nrow = 4)
dev.off()
4 years later...
since ggplus is deprecated, you might consider using ggforce. You could use any of the relevant facet_* options as described in the documentation. E.g., facet_matrix:
# Standard use:
ggplot(mpg) +
geom_point(aes(x = .panel_x, y = .panel_y)) +
facet_matrix(vars(displ, cty, hwy))

Ordering ggplot legend by the final value in a data frame

I would like to re-order the elements in a legend, as they appear top to bottom in an R ggplot. That is: I'd like the order dictated by comparing the Y value at the right most point X axis point. In the following data, I'd like the legend to read from the top: bush, foo, baz, bar.
Update: following #alexwhan comments, I have added the data to the script.
Update 2: this is now exactly what I was hoping for, thanks to #thomas-kern on #R (bosie) irc.freenode. The trick was to add both, i.e.
scale_linetype_discrete(breaks = ord$Variant) + scale_shape_discrete(breaks = ord$Variant)
Here's my R:
library(plyr)
library(ggplot2)
require(grid)
args <- commandArgs(trailingOnly = TRUE)
lines <- "
X,Variant,Y
1,foo,123
1,bar,134
1,baz,135
1,bush,136
2,foo,221
2,bar,104
2,baz,155
2,bush,336
"
con <- textConnection(lines)
DF <- read.csv(con, header=TRUE)
close(con)
cdata <- ddply(DF, .(Variant,X), summarise, N = length(Y), mean=round(mean(Y),2), sd=round(sd(Y),2), se=round(sd(Y)/sqrt(length(Y)),2))
ord <- cdata[cdata$X == max(cdata$X),]
ord <- ord[order(ord$Variant, decreasing=T),]
pdf("out.pdf")
none <- element_blank()
bp <- ggplot(cdata, aes(x=X, y=mean, group=Variant)) + xlab("X label") + geom_line(aes(linetype=Variant)) + geom_point(aes(shape=Variant)) + ylab("Y Value") + labs(title = "mytitle") + scale_linetype_discrete(breaks = ord$Variant) + scale_shape_discrete(breaks = ord$Variant)
print(bp + theme(legend.justification=c(1,0), legend.position=c(1,0), legend.key.width=unit(3,"line"), legend.title=element_blank(), text = element_text(size=18)) + theme(panel.background = element_rect(fill='white', colour='black')) + theme(panel.grid.major = none, panel.grid.minor = none))
dev.off()
This generates exactly what I'm after:
It really helps if you provide the data your plot is made with. Here's an example of how to approach with some data I made up:
dat <- data.frame(x = c(1,2), y = rnorm(8), group = rep(c("bar", "baz", "bush", "foo"), each = 2))
ord <- dat[dat$x == max(dat$x),]
ord <- ord[order(ord$y, decreasing=T),]
ggplot(dat, aes(x, y)) + geom_point(aes(shape = group)) + geom_line(aes(group = group)) +
scale_shape_discrete(breaks = ord$group)

Resources