How to add multiple level of x-axis in ggplot [duplicate] - r

This question already has answers here:
Multirow axis labels with nested grouping variables
(7 answers)
Closed 6 years ago.
variable <- c("PM10","SO2","NO","NO2","PM10","SO2","NO","NO2","PM10","SO2","NO","NO2","PM10","SO2","NO","NO2","PM10","SO2","NO","NO2","PM10","SO2","NO","NO2")
sex <- c("male","male","male","male","female","female","female","female",
"male","male","male","male","female","female","female","female",
"male","male","male","male","female","female","female","female")
exposureperiod <- c("P1","P1","P1","P1","P1","P1","P1","P1",
"P2","P2","P2","P2","P2","P2","P2","P2",
"P3","P3","P3","P3","P3","P3","P3","P3")
set.seed(100)
coef <- runif(24, -2, 2)
coef_lb <- coef - 0.3
coef_ub <- coef + 0.3
df <- data.frame(variable,sex,exposureperiod,coef,coef_lb,coef_ub)
df$variable <- factor(df$variable,levels=c("PM10","SO2","NO","NO2"))
levels(df$variable) <- c("PM[10]","SO[2]", "NO", "NO[2]")
df$exposureperiod <- factor(df$exposureperiod,levels=c("P1","P2","P3"))
df$sex <- factor(df$sex,levels=c("male","female"))
df <- df[order(df$variable,df$sex),]
df$aux <- c(1,2,3,
5,6,7,
11,12,13,
15,16,17,
21,22,23,
25,26,27,
31,32,33,
35,36,37)
library(ggplot2)
plot <- ggplot(data = df, aes(x = aux, y = coef)) +
geom_pointrange(aes(ymin=coef_lb,ymax=coef_ub),shape="none") +
geom_point(aes(shape = exposureperiod)) +
scale_shape_discrete(name ="Exposure period",
breaks=c("P1", "P2","P3"),
labels=c("P1","P2","P3")) +
scale_x_continuous("Sex and Pollutant",breaks=c(2,6,12,16,22,26,32,36),
labels=c("Boys","Girls","Boys","Girls","Boys","Girls","Boys","Girls")) +
scale_y_continuous("Mean Difference in Tanner Stage",
limits=c(-3, 3),
breaks=round(seq(-3, 3, by = 0.5),1)) +
geom_hline(yintercept=0,alpha=1,linetype="dashed") +
theme(axis.text.x = element_text()) +
theme_bw(base_size = 16,base_family="Arial") +
theme(legend.text.align = 0,
legend.title = element_text(face="plain"),
legend.key = element_blank(),
legend.position = "bottom") +
guides(shape= guide_legend(nrow = 3,byrow = TRUE)) +
theme(text = element_text(colour = "black",face="plain"),
axis.title.y = element_text(face="plain"),
axis.title.x = element_text(face="plain"),
axis.text.x = element_text(face="plain",hjust = 0),
axis.text.y = element_text(face="plain")) +
theme(panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line = element_line(colour = "black"))+
theme(axis.ticks = element_line(size = 1))
plot
With the above script, I got the graph as below.
But I want to add another level of x-axis, which indicate the PM10, SO2, NO, and NO2, like the below graph. (To illustrate, I added those pollutants manually.) And of course, the x-axis title and legend should move down accordingly.
I used facet before, but I want to avoid the gap between pollutants generate by facet.
Thank you.

You could try faceting the plot
plot <- plot + facet_wrap(~variable)

Related

stat_fit_glance and generalized additive models (GAM) error

I am trying to add the p-value and R2 from mgcv::gam results to ggplot with facets. The sample dataframe and code are below. Is there a way to successfully paste the p-value and R2 on the ggplots?
DF <- data.frame(Site = rep(LETTERS[20:24], each = 4),
Region = rep(LETTERS[14:18], each = 4),
time = rep(LETTERS[1:10], each = 10),
group = rep(LETTERS[1:4], each = 10),
value1 = runif(n = 1000, min = 10, max = 15),
value2 = runif(n = 1000, min = 100, max = 150))
DF$time <- as.numeric(DF$time)
GAMFORMULA <- y ~ s(x,bs="cr",k=3)
plot1 <- ggplot(data=DF,
aes(x=time, y=value2)) +
geom_point(col="gray", alpha=0.8,
name="") +
geom_line(col="gray", alpha=0.8,
name="",aes(group=group)) +
geom_smooth(se=T, col="darkorange", alpha=0.8,
name="", fill="orange",
method="gam",formula=GAMFORMULA) +
theme_bw() +
theme(strip.text.x = element_text(size=10),
strip.text.y = element_text(size=10, face="bold", angle=0),
strip.background = element_rect(colour="black", fill="gray90"),
axis.text.x = element_text(size=10), # remove x-axis text
axis.text.y = element_text(size=10), # remove y-axis text
axis.ticks = element_blank(), # remove axis ticks
axis.title.x = element_text(size=18), # remove x-axis labels
axis.title.y = element_text(size=25), # remove y-axis labels
panel.background = element_blank(),
panel.grid.major = element_blank(), #remove major-grid labels
panel.grid.minor = element_blank(), #remove minor-grid labels
plot.background = element_blank()) +
labs(y="Value", x="Time", title = "") +
stat_fit_glance(method = "gam",
method.args = list(formula = GAMFORMULA),
aes(label = sprintf('R^2~"="~%.3f~~italic(p)~"="~%.2f',
stat(..r.squared..),stat(..p.value..))),
parse = TRUE)
plot1 + facet_wrap(Site~group, scales="free_y", ncol=3)
Error in sprintf("R^2~\"=\"~%.3f~~italic(p)~\"=\"~%.2f", r.squared, p.value) :
object 'r.squared' not found
My answer explains why stat_fit_glance() cannot be used to add r.sq to a plot, but I am afraid is does not provide an alternative approach.
stat_fit_glance() is a wrapper on broom:glance() that fits the model and passes the model fit object to broom:glance(). In the case of gam(), broom:glance() does not return an estimate for R2 and consequently also stat_fit_glance() is unable to return it.
To see what computed values are available one can use geom_debug() from package 'gginnards'.
library(ggpmisc)
library(gginnards)
library(mgcv)
DF <- data.frame(Site = rep(LETTERS[20:24], each = 4),
Region = rep(LETTERS[14:18], each = 4),
time = rep(LETTERS[1:10], each = 10),
group = rep(LETTERS[1:4], each = 10),
value1 = runif(n = 1000, min = 10, max = 15),
value2 = runif(n = 1000, min = 100, max = 150))
DF$time <- as.numeric(DF$time)
GAMFORMULA <- y ~ s(x,bs="cr",k=3)
plot1 <- ggplot(data=DF,
aes(x=time, y=value2)) +
geom_point(col="gray", alpha=0.8,
name="") +
geom_line(col="gray", alpha=0.8,
name="",aes(group=group)) +
geom_smooth(se=T, col="darkorange", alpha=0.8,
name="", fill="orange",
method="gam",formula=GAMFORMULA) +
theme_bw() +
theme(strip.text.x = element_text(size=10),
strip.text.y = element_text(size=10, face="bold", angle=0),
strip.background = element_rect(colour="black", fill="gray90"),
axis.text.x = element_text(size=10), # remove x-axis text
axis.text.y = element_text(size=10), # remove y-axis text
axis.ticks = element_blank(), # remove axis ticks
axis.title.x = element_text(size=18), # remove x-axis labels
axis.title.y = element_text(size=25), # remove y-axis labels
panel.background = element_blank(),
panel.grid.major = element_blank(), #remove major-grid labels
panel.grid.minor = element_blank(), #remove minor-grid labels
plot.background = element_blank()) +
labs(y="Value", x="Time", title = "") +
stat_fit_glance(method = "gam",
method.args = list(formula = GAMFORMULA),
# aes(label = sprintf('R^2~"="~%.3f~~italic(p)~"="~%.2f',
# stat(..r.squared..),stat(..p.value..))),
# parse = TRUE)
geom = "debug")
plot1 + facet_wrap(Site~group, scales="free_y", ncol=3)
Shown above are the values returned by stat_fit_glance() for the first two panels in the plot.
Note: There does not seem to be agreement on whether R-square is meaningful for GAM. However the summary() method for gam does return an adjusted R-square estimate as member r.sq.

Ho to add the count values calculated in the geom_histogram on top of the bars in R

I'd like to add the count values calculated in the geom_histogram function on ggplot2. I've put the ggplot2 into a loop so I can produce multiple plots, in my case 30 but for ease, here is a dummy set for only four plots. Facet wrap didn't work as the geom density was pooling the data across all factors before calculating proportions, rather than within a factor/variable. To produce this plot, I've essentially mixed a whole bunch of code from various sources so credit to them.
library(dplyr)
library(ggplot2)
library(ggridges)
library(reshape2)
library(gridExtra)
#Make the data#
df.fact <- data.frame("A"=rnorm(400, mean = 350, sd=160),"B"=rnorm(400, mean = 300, sd=100), "C"=rnorm(400, mean = 200, sd=80), names=rep(factor(LETTERS[23:26]), 100))
df.test<-melt(df.fact, id.vars = "names", value.name = "Length2")
names(df.test)[names(df.test) =="variable"] <- "TSM.FACT"
#Create the plotlist##
myplots <- list()
#Loop for plots##
for(i in 1:(length(unique(df.test$names)))){
p1 <- eval(substitute(
ggplot(data=df.test[df.test$names == levels(df.test$names)[i],], aes(x=Length2, group=TSM.FACT, colour = TSM.FACT, fill=TSM.FACT)) +
geom_histogram(aes( y = stat(width*density)), position = "dodge", binwidth = 50, alpha =0.4, show.legend=T)+
ggtitle(paste0(levels(df.test$names)[i]))+
geom_density_line(stat="density", aes(y=(..count..)/sum(..count..)*50), alpha=0.3, size=0.5, show.legend=F) +
geom_vline(data=ddply(df.test[df.test$names == levels(df.test$names)[i],], ~ TSM.FACT, numcolwise(mean)), mapping=aes(xintercept = Length2, group=TSM.FACT, colour=TSM.FACT), linetype=2, size=1, show.legend=F) +
scale_y_continuous(labels = percent_format()) +
ylab("relative frequency") +
scale_color_manual(values= c("#00B2EE", "#1E90FF", "#104E8B")) +
scale_fill_manual(values= c("#00B2EE", "#1E90FF", "#104E8B")) +
theme_bw() + theme(
plot.title = element_text(lineheight=0.5, hjust= 0.5, size=10),
strip.text.y = element_text(hjust = 1, angle = 0),
strip.text.x = element_text(size=10, vjust = 0.9),
strip.text=element_text(margin = margin(t=0.3,r=1,b=0.3,l=1), size=8, debug = F, vjust=0.2),
strip.background = element_blank(),
axis.text.x = element_text(size=8, angle=0, vjust=0.2, margin = margin(t=0.3,r=0.1,b=0.3,l=0.1)),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.line.x=element_line(colour="black"),
axis.line.y=element_line(colour="black"),
panel.grid.minor = element_blank(),
panel.border=element_blank(),
panel.background=element_blank(),
legend.position=(c(0.9,0.9)),
legend.title = element_blank(),
legend.key = element_blank()),
list(i = i)))
print(i)
print(p1)
myplots[[i]] <- p1
plot(p1)
}
#Join the plots
panelplot=grid.arrange(plotlist = myplots, grobs = myplots, shared.legend=T)
Unfortunately I am unable to reproduce your example. I can recommend adding a column that includes the sum of each bar (let's name it "Bar")
The required addition to the ggplot code then involves:
geom_text(aes(label = Bar), position = position_stack(vjust = 1)) +
The text height above the bar can be adjusted with vjust

Mean per group on a bubble plot with ggplot

I have a dataset with a lot of overlapping points and used ggplot to create a bubble plot to show that data. I need to add bars on my plot for the means of each group on the x axis (values can be 0, 1, or 2). I have tried to use geom_errorbar but haven't been able to get it to work with my data. Any help/suggestions would be greatly appreciated.
The following is my code and a script to generate fake data that is similar:
y <- seq(from=0, to=3.5, by=0.5)
x <- seq(from=0, to=2, by=1)
xnew <- sample(x, 100, replace=T)
ynew <- sample(y, 100, replace=T)
data <- data.frame(xnew,ynew)
data2 <- aggregate(data$xnew, by=list(x=data$xnew, y=data$ynew), length)
names(data2)[3] <- "Count"
ggplot(data2, aes(x = x, y = y)) +
geom_point(aes(size=Count)) +
labs(x = "Copies", y = "Score") +
aes(ymax=..y.., ymin=..y..) +
scale_x_continuous(breaks = seq(0, 2, 1)) +
scale_y_continuous(breaks = seq(0, 3, 0.5)) +
theme(legend.position = "bottom", legend.direction = "horizontal",
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(colour="black", size = 10),
axis.text.y = element_text(colour="black", size = 10))
I am not entirely sure that I understand your question correctly. It seems to me that in addition to the bubbles, you want to visualise the mean value of y for each value of x as a bar of some kind. (You mention error bars, but it seems that this is not a requirement, but just what you have tried. I will use geom_col() instead.)
I assume that you want to weigh the mean over y by the counts, i.e., sum(y * Count) / sum(Count). You can create a data frame that contains these values by using dplyr:
data2_mean
## # A tibble: 3 × 2
## x y
## <dbl> <dbl>
## 1 0 1.833333
## 2 1 1.750000
## 3 2 2.200000
When creating the plot, I use data2 as the data set for geom_point() and data2_mean as the data set for geom_col(). It is important to put the bars first, since the bubbles should be on top of the bars.
ggplot() +
geom_col(aes(x = x, y = y), data2_mean, fill = "gray60", width = 0.7) +
geom_point(aes(x = x, y = y, size = Count), data2) +
labs(x = "Copies", y = "Score") +
scale_x_continuous(breaks = seq(0, 2, 1)) +
scale_y_continuous(breaks = seq(0, 3, 0.5)) +
theme(legend.position = "bottom", legend.direction = "horizontal",
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(colour="black", size = 10),
axis.text.y = element_text(colour="black", size = 10))
Everything that I changed compared to your code comes before scale_x_continuous(). This produces the following plot:
Is this what you're after? I first calculated the group-level means using the dplyr package and then added line segments to your plot using geom_segment:
library(ggplot2)
library(dplyr)
data2 <- data2 %>% group_by(x) %>% mutate(mean.y = mean(y))
ggplot(data2, aes(x = x, y = y)) +
geom_point(aes(size=Count)) +
labs(x = "Copies", y = "Score") +
aes(ymax=..y.., ymin=..y..) +
scale_x_continuous(breaks = seq(0, 2, 1)) +
scale_y_continuous(breaks = seq(0, 3, 0.5)) +
theme(legend.position = "bottom", legend.direction = "horizontal",
axis.line = element_line(size=1, colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(colour="black", size = 10),
axis.text.y = element_text(colour="black", size = 10)) +
geom_segment(aes(y = mean.y, yend = mean.y, x = x -0.25, xend = x + 0.25))

Subscript and width restrictions in x-axis tick labels in ggplot2

This is currently my code for the figure above
ggplot(AllData, aes(Year, AGResiduals, fill=Type)) +
geom_boxplot(outlier.size=0) +
scale_fill_manual(values=c("skyblue4", "skyblue"),
name="Male Type",
labels=c("Guarders","Sneakers")) +
labs(x=NULL, y = "Residual of Accessory Gland Mass x Total Mass") +
scale_x_discrete(limits=c("2007","2008","2010","2011","2013","2014","2015"),
labels=str_wrap(c("2007 (nG=37, nS=8)","2008 (nG=4, nS=6)","2010 (nG=31, nS=6)","2011 (nG=55, nS=5)","2013 (nG=202, nS=24)","2014 (nG=63)","2015 (nG=59, nS=3)"),
width=6)) +
theme(plot.title = element_text(size = rel(1.4)),
axis.title = element_text(size = rel(1.2)),
axis.text.x = element_text(size = rel(1.5)),
axis.text.y = element_text(size = rel(1.5)),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"))
I want to make the "G" and "S"'s in each x-axis tick labels subscript (they designate sample size for two different groups, G and S).
Writing
expression(2007 (n[G]=37, n[S]=8)
works, but only if I remove the preceding
str_wrap
code for some reason.
I need to constrain the width of the text for each x-axis tick label, so I need to retain str_wrap or use line breaks within the expression function somehow.
I also can't replace my list of labels with a factor since I have to set limits on the years I want to show.
Can someone please help on how to make a 3-line x-axis tick label that allows for subscript?
I couldn't find a way to display expressions on multiple lines, but you could try rotating the labels:
library(stringr)
library(ggplot2)
library(scales)
library(dplyr)
n <- 100
y <- as.character(sample(2007:2015,n,replace=T))
t <- sample(c("Guarders","Guarders","Sneakers"),n,replace=T)
r <- rnorm(n,10,20)
nsk <- sum(t=="Sneakers")
r[ t=="Sneakers" ] <- rnorm(nsk,1,5)
AllData <- data.frame(Year=y,AGResiduals=r,Type=t)
sdf <- AllData %>% group_by( Year ) %>%
summarize( n=n(), ng=sum(Type=="Guarders") )
fmts <- rep("%s (n[G]==%d) ~~ (n[S]==%d)",nrow(sdf))
labs2 <- do.call(sprintf,list(fmts,sdf$Year,sdf$ng, sdf$n-sdf$ng ) )
ex2 <- parse(text=labs2)
ggplot(AllData, aes(Year, AGResiduals, fill=Type)) +
geom_boxplot(outlier.size=0) +
scale_fill_manual(values=c("skyblue4", "skyblue"),
name="Male Type",
labels=c("Guarders","Sneakers")) +
labs(x=NULL, y = "Residual of Accessory Gland Mass x Total Mass") +
scale_x_discrete(limits=c("2007","2008","2010","2011","2013","2014","2015"),
labels=ex2) +
theme(plot.title = element_text(size = rel(1.4)),
axis.title = element_text(size = rel(1.2)),
axis.text.x = element_text(size = rel(1.0),angle=-30,hjust=0),
axis.text.y = element_text(size = rel(1.5)),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"))
Yields this:
Rawr made a suggestion that allows you to get two, but not three lines. Since it doesn't require rotation, I am entering it as a second solution.
This:
library(stringr)
library(ggplot2)
library(scales)
library(dplyr)
set.seed(23456)
n <- 100
y <- as.character(sample(2007:2015,n,replace=T))
t <- sample(c("Guarders","Guarders","Sneakers"),n,replace=T)
r <- rnorm(n,10,20)
nsk <- sum(t=="Sneakers")
r[ t=="Sneakers" ] <- rnorm(nsk,1,5)
AllData <- data.frame(Year=y,AGResiduals=r,Type=t)
sdf <- AllData %>% group_by( Year ) %>%
summarize( n=n(), ng=sum(Type=="Guarders") )
fmts <- rep("atop(%s, n[G]==%d ~~ n[S]==%d)",nrow(sdf)) # two rows
labs2 <- do.call(sprintf,list(fmts,sdf$Year,sdf$ng, sdf$n-sdf$ng ) )
ex2 <- parse(text=labs2)
ggplot(AllData, aes(Year, AGResiduals, fill=Type)) +
geom_boxplot(outlier.size=0) +
scale_fill_manual(values=c("skyblue4", "skyblue"),
name="Male Type",
labels=c("Guarders","Sneakers")) +
labs(x=NULL, y = "Residual of Accessory Gland Mass x Total Mass") +
scale_x_discrete(limits=c("2007","2008","2010","2011","2013","2014","2015"),
labels=ex2) +
theme(plot.title = element_text(size = rel(1.4)),
axis.title = element_text(size = rel(1.2)),
axis.text.x = element_text(size = rel(1.0),angle=0,hjust=0),
axis.text.y = element_text(size = rel(1.5)),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"))
yields this:

R: remove extra x-axis value (ggplot2)

I am using bar graph with ggplot2 library and when number of values (dates in my case) on x-axis is high, additional blank value ticks appear on the beginig and the end of axis. Please, is there a way how to remove these? Reproducible code and image below.
library(ggplot2)
a <- runif(28, 2.0, 7.5)
b <- seq(as.Date("1910/1/1"), as.Date("1910/1/28"), "days")
ds = data.frame(a, b)
p <-
ggplot(data=ds, aes(b, a), environment = environment()) +
theme(panel.grid.major.x = element_blank(), panel.grid.minor = element_blank(),
panel.grid.major.y = element_line(color="grey"),
panel.background = element_blank(), panel.border = element_rect(fill= NA, colour = "grey")) +
geom_bar(width=.4,stat="identity") +
xlab(" ") + ylab(" ") +
theme(text = element_text(size=20), axis.text.x = element_text(angle=90), axis.text = element_text(color="black"),
legend.key = element_rect(fill="white")) +
scale_x_date(breaks = date_breaks("1 day"), labels = date_format("%d.%B %y"))
print(p)
When I put breaks = b instead of breaks = date_breaks("1 day"), it solves the problem. Also you need library(scales) to run your example now.

Resources