I'm trying to replace the facet_wrap titles on a ggplot bar plot with expressions, but I'm having no luck. I've tried here and here but neither seem to be working for me.
The whole dataset is quite large, so here's some dummy data to illustrate the problem.
library(tidyr)
library(ggplot2)
data<-data.frame(species = rep(c("oak", "elm", "ash"), each = 5),
resp_1 = (runif(15, 1,100)),
resp_2 = (runif(15, 1,100)),
resp_3 = (runif(15, 1,100)),
resp_4 = (runif(15, 1,100)),
resp_5 = (runif(15, 1,100)))
### transform to longform with tidyr
data_2 <- gather(data, response, result, resp_1:resp_5, factor_key=TRUE)
### plot with ggplot2
ggplot(data_2, aes(x = species, y = result, fill = species))+
geom_bar(stat = 'sum')+
facet_wrap(~ response)
### here are the labels I'd like to see on the facets
oxygen <-expression ("Oxygen production (kg/yr)")
runoff <-expression("Avoided runoff " ~ (m ^{3} /yr))
co <- expression("CO removal (g/yr)")
o3 <- expression("O"[3]~" removal (g/yr)")
no2 <- expression("NO"[2]~" removal (g/yr)")
labels <- c(oxygen, runoff, co, o3, no2)
### this doesn't work
ggplot(data_2, aes(x = species, y = result, fill = species))+
geom_bar(stat = 'sum')+
facet_wrap(~ response, labeller = labeller(response = labels))
### close, but doesn't work
levels(data_2$response)<-labels
ggplot(data_2, aes(x = species, y = result, fill = species))+
geom_bar(stat = 'sum')+
facet_wrap(~ response, labeller = labeller(response = labels))
### produces an error
ggplot(data_2, aes(x = species, y = result, fill = species))+
geom_bar(stat = 'sum')+
facet_wrap(~ response, labeller = label_parsed)
I'd also like to get rid of the second legend in grey titled "n".
Right now your expression names don't match up to the values used as the facets. So I'd recommend storing your labels in an expression
labels <- expression(
resp_1 = "Oxygen production (kg/yr)",
resp_2 = "Avoided runoff " ~ (m ^{3} /yr),
resp_3 = "CO removal (g/yr)",
resp_4 = "O"[3]~" removal (g/yr)",
resp_5 = "NO"[2]~" removal (g/yr)"
)
And then you can write your own labeler function to extract the correct value
ggplot(data_2, aes(x = species, y = result, fill = species))+
geom_bar(stat = 'sum', show.legend = c(size=FALSE))+
facet_wrap(~ response, labeller = function(x) {
list(as.list(labels)[x$response])
})
We've also used show.legend = c(size=FALSE) to turn off the n legend
Use as_labeller and label_parsed. Ref
library(tidyr)
library(ggplot2)
data <- data.frame(species = rep(c("oak", "elm", "ash"), each = 5),
resp_1 = (runif(15, 1, 100)),
resp_2 = (runif(15, 1, 100)),
resp_3 = (runif(15, 1, 100)),
resp_4 = (runif(15, 1, 100)),
resp_5 = (runif(15, 1, 100)))
data_2 <- gather(data, response, result, resp_1:resp_5, factor_key = TRUE)
# setup the labels
reponse_names <- c(
`resp_1` = "Oxygen~production~(kg*yr^{-1})",
`resp_2` = "Avoided~runoff~(m^{3}*yr^{-1})",
`resp_3` = "CO~removal~(g*yr^{-1})",
`resp_4` = "O[3]~removal~(g*yr^{-1})",
`resp_5` = "NO[2]~removal~(g*yr^{-1})"
)
# plot
ggplot(data_2, aes(x = species, y = result, fill = species))+
geom_bar(stat = 'sum')+
facet_wrap(
~ response,
labeller = labeller(response = as_labeller(reponse_names, label_parsed))
) +
guides(size = "none")
Created on 2021-04-30 by the reprex package (v2.0.0)
Related
Here's my R code
ggplot(dat = Table, aes(x = Group, y = value, fill = Type)) +
geom_boxplot(alpha=0.08)+
geom_jitter()+
scale_fill_brewer(palette="Spectral")+
theme_minimal()
Like you can see the dots are in the middle of the boxplots. What can I add in geom_jitter to have each point in the righ boxplot and not in the middle like this ? I also tried geom_point, it gave the same result !
Thanks to the help now It works, but I wanted to add a line to connect the dots and I got this.. can someone tell how to really connect the dots with lines
I think if you group by interaction(Group, Type) and use position_jitterdodge() you should get what you're looking for.
ggplot(mtcars, aes(as.character(am), mpg, color = as.character(vs),
group = interaction(as.character(vs), as.character(am)))) +
geom_boxplot() +
geom_jitter(position = position_jitterdodge()) # same output with geom_point()
Edit - here's an example with manual jittering applied to data where the each subject appears once in each Group.
I looked for a built-in way to do this, and this answer comes close, but I couldn't get it to work in terms of using position_jitterdodge with position defined by the groups of Group/Type, but line grouping defined by id alone and not by Group/Type. Both aesthetics (position adjustment and series identification) rely on the same group parameter, but they each need a different value for it.
Table = data.frame(id = 1:4,
value = rnorm(8),
Group = rep(c("a","b"), each = 4),
Type = c("1", "2"))
library(dplyr)
Table %>%
mutate(x = as.numeric(as.factor(Group)) +
0.2 * scale(as.numeric(as.factor(Type))) +
rnorm(n(), sd = 0.06)) %>%
ggplot(aes(x = Group, y = value, fill = Type, group = interaction(Group, Type))) +
geom_boxplot(alpha=0.2)+
geom_point(aes(x = x)) +
geom_line(aes(x = x, group = id), alpha = 0.1) +
scale_fill_brewer(palette="Spectral")+
theme_minimal()
Best to use position_dodge instead if you want them to line up:
library(ggplot2)
Table <- tibble::tibble(
Group = rep(c("A", "B"), each = 20),
Type = factor(rep(c(1:2, 1:2), each = 10)),
value = rnorm(40, mean = 10)
)
ggplot(dat = Table, aes(x = Group, y = value, fill = Type)) +
geom_boxplot(alpha=0.08)+
geom_point(position = position_dodge(width = 0.75))+
scale_fill_brewer(palette="Spectral")+
theme_minimal()
To add a line, make sure group = ID goes in both the geom_point and geom_line calls:
library(ggplot2)
Table <- tibble::tibble(
Group = rep(c("A", "B"), each = 20),
Type = factor(rep(c(1:2, 1:2), each = 10)),
ID = factor(rep(1:20, times = 2)),
value = rnorm(40, mean = 10)
)
ggplot(dat = Table, aes(x = Group, y = value, fill = Type)) +
geom_boxplot(alpha = 0.08) +
geom_point(aes(group = ID), position = position_dodge(width = 0.75))+
geom_line(aes(group = ID), position = position_dodge(width = 0.75), colour = "grey")+
scale_fill_brewer(palette = "Spectral") +
theme_minimal()
Is there a way to first change the facet label from 1:3 to something like c(good, bad, ugly). Also, i would like to add R2 value to each of the facet. Below is my code- i tried a few things but didn't succeed.
DF = data.frame(SUB = rep(1:3, each = 100), Ob = runif(300, 50,100), S1 = runif(300, 75,95), S2 = runif(300, 40,90),
S3 = runif(300, 35,80),S4 = runif(300, 55,100))
FakeData = gather(DF, key = "Variable", value = "Value", -c(SUB,Ob))
ggplot(FakeData, aes(x = Ob, y = Value))+
geom_point()+ geom_smooth(method="lm") + facet_grid(Variable ~ SUB, scales = "free_y")+
theme_bw()
Here is the figure that i am getting using above code.
I tried below code to change the facet_label but it didn't work
ggplot(FakeData, SUB = factor(SUB, levels = c("Good", "Bad","Ugly")), aes(x = Ob, y = Value))+
geom_point()+ geom_smooth(method="lm") + facet_grid(Variable ~ SUB, scales = "free_y")+
theme_bw()
I do not have any idea how to add R2 to the facets. Is there any efficient way of computing and R2 to the facets?
You can use ggpubr::stat_cor() to easily add correlation coefficients to your plot.
library(dplyr)
library(ggplot2)
library(ggpubr)
FakeData %>%
mutate(SUB = factor(SUB, labels = c("good", "bad", "ugly"))) %>%
ggplot(aes(x = Ob, y = Value)) +
geom_point() +
geom_smooth(method = "lm") +
facet_grid(Variable ~ SUB, scales = "free_y") +
theme_bw() +
stat_cor(aes(label = after_stat(rr.label)), color = "red", geom = "label")
If you don't want to use functions from other packages and only want to use ggplot2, you will need to compute the R2 for each SUB and Variable combination, and then add to your plot with geom_text or geom_label. Here is one way to do it.
library(tidyverse)
set.seed(1)
DF = data.frame(SUB = rep(1:3, each = 100), Ob = runif(300, 50,100), S1 = runif(300, 75,95), S2 = runif(300, 40,90),
S3 = runif(300, 35,80),S4 = runif(300, 55,100))
FakeData = gather(DF, key = "Variable", value = "Value", -c(SUB,Ob))
FakeData_lm <- FakeData %>%
group_by(SUB, Variable) %>%
nest() %>%
# Fit linear model
mutate(Mod = map(data, ~lm(Value ~ Ob, data = .x))) %>%
# Get the R2
mutate(R2 = map_dbl(Mod, ~round(summary(.x)$r.squared, 3)))
ggplot(FakeData, aes(x = Ob, y = Value))+
geom_point()+
geom_smooth(method="lm") +
# Add label
geom_label(data = FakeData_lm,
aes(x = Inf, y = Inf,
label = paste("R2 = ", R2, sep = " ")),
hjust = 1, vjust = 1) +
facet_grid(Variable ~ SUB, scales = "free_y") +
theme_bw()
The following answer makes use of package 'ggpmisc' (version >= 0.5.0 for the second example). In addition, I simply used a call to factor() within aes() to set the labels.
library(tidyverse)
library(ggpmisc)
DF = data.frame(SUB = rep(1:3, each = 100), Ob = runif(300, 50,100), S1 = runif(300, 75,95), S2 = runif(300, 40,90),
S3 = runif(300, 35,80),S4 = runif(300, 55,100))
FakeData = gather(DF, key = "Variable", value = "Value", -c(SUB,Ob))
# As asked in the question
# Ensuring that the R^2 label does not overlap the observations
ggplot(FakeData, aes(x = Ob, y = Value)) +
geom_point()+
geom_smooth(method = "lm") +
stat_poly_eq() +
scale_y_continuous(expand = expansion(mult = c(0.1, 0.33))) +
facet_grid(Variable ~ factor(SUB,
levels = 1:3,
labels = c("good", "bad", "ugly")),
scales = "free_y") +
theme_bw()
# As asked in a comment, adding P-value
ggplot(FakeData, aes(x = Ob, y = Value))+
geom_point()+
geom_smooth(method = "lm") +
stat_poly_eq(mapping = use_label(c("R2", "P")), p.digits = 2) +
scale_y_continuous(expand = expansion(mult = c(0.1, 0.33))) +
facet_grid(Variable ~ factor(SUB,
levels = 1:3,
labels = c("good", "bad", "ugly")),
scales = "free_y")+
theme_bw()
And the plot from the second example adding P to the label.
Note: With older versions of 'ggpmisc' which lack function use_label() the mapping can be written as aes(label = paste(after_stat(rr.label), after_stat(p.label), sep = "*\", \"*") in the same way as when using 'ggpubr'.
Package 'ggpubr' includes code copied from 'ggpmisc' without acknowledgenment, which explains why some statistics are so similar between the two packages. 'ggpmisc' is more narrowly focused but the statistics in it have been much improved after they were taken into 'ggpubr'. 'ggpmisc' is actively maintained while maintenance of 'ggpubr' seems currently stalled.
I'm trying to display the equations on the plot using the stat_poly_eq function of ggpmisc.
My problem is how to change the y= ... in the equation, by y1=... and y2=... by referring to the key argument.
I tried to add the eq.with.lhs argument in the mapping but it does not recognize the argument.
I tried to pass a vector to the eq.with.lhs argument but it overlapped both elements in each equation...
Do you have a better idea?
In the last case, I could use geom_text after calculating the equation coefficients myself, but it seemed to be a less efficient way to solve the problem.
Here is a reprex of my problem.
data <- data.frame(x = rnorm(20)) %>%
mutate(y1 = 1.2*x + rnorm(20, sd=0.2),
y2 = 0.9*x + rnorm(20, sd=0.3)) %>%
gather(value = value, key = key, -x)
ggplot(data, aes(x = x, y = value)) +
geom_point(aes(shape = key, colour = key)) +
stat_poly_eq(aes(label = ..eq.label.., colour = key),
formula = y ~ poly(x, 1, raw = TRUE),
eq.x.rhs = "x",
# eq.with.lhs = c(paste0(expression(y[1]), "~`=`~"),
# paste0(expression(y[2]), "~`=`~")),
eq.with.lhs = paste0(expression(y[ind]), "~`=`~"),
parse = TRUE) +
ylab(NULL)
I'm not really sure if it's possible to do it through ggpmisc, but you can change the data once the plot is built, like so:
library(tidyverse)
library(ggpmisc)
data <- data.frame(x = rnorm(20)) %>%
mutate(y1 = 1.2*x + rnorm(20, sd=0.2),
y2 = 0.9*x + rnorm(20, sd=0.3)) %>%
gather(value = value, key = key, -x)
p <- ggplot(data, aes(x = x, y = value)) +
geom_point(aes(shape = key, colour = key)) +
stat_poly_eq(aes(label = ..eq.label.., colour = key),
formula = y ~ poly(x, 1, raw = TRUE),
eq.x.rhs = "x",
eq.with.lhs = paste0(expression(y), "~`=`~"),
parse = TRUE) +
ylab(NULL)
temp <- ggplot_build(p)
temp$data[[2]]$label <- temp$data[[2]]$label %>%
fct_relabel(~ str_replace(.x, "y", paste0("y[", 1:2, "]")))
grid::grid.newpage()
grid::grid.draw(ggplot_gtable(temp))
I am currently trying to customize my plot with the goal to have a plot like this:
If I try to specify the color or linetype in either aes() or mapping = aes(), I get two different smooths. One for each class. This makes sense, because the smoothing will be applied once for each type.
If I use group = 1 in the aestetics, I will get one line, also one color/linetype.
But I can not find a solution to have one smooth line with different colors/linetypes for each class.
My code:
ggplot(df2, aes(x = dateTime, y = capacity)) +
#geom_line(size = 0) +
stat_smooth(geom = "area", method = "loess", show.legend = F,
mapping = aes(x = dateTime, y = capacity, fill = type, color = type, linetype = type)) +
scale_color_manual(values = c(col_fill, col_fill)) +
scale_fill_manual(values = c(col_fill, col_fill2))
The result for my data:
Reproduceable code:
File: enter link description here (I can not make this file shorter and copy it hear, else I get errors with smoothing for too few data points)
df2 <- read.csv("tmp.csv")
df2$dateTime <- as.POSIXct(df2$dateTime, format = "%Y-%m-%d %H:%M:%OS")
col_lines <- "#8DA8C5"
col_fill <- "#033F77"
col_fill2 <- "#E5E9F2"
ggplot(df2, aes(x = dateTime, y = capacity)) +
stat_smooth(geom = "area", method = "loess", show.legend = F,
mapping = aes(x = dateTime, y = capacity, fill = type, color = type, linetype = type)) +
scale_color_manual(values = c(col_fill, col_fill)) +
scale_fill_manual(values = c(col_fill, col_fill2))
I would suggest to model the data outside the plotting function and then plot it with ggplot. I used the pipes (%>%) and mutate from the tidyversefor convenient reasons, but you don't have to. Also, I prefer to have a line and a fill separated to avoid the dashed line on the right side of your plot.
df2$index <- as.numeric(df2$dateTime) #create an index for the loess model
model <- loess(capacity ~ index, data = df2) #model the capacity
plot <- df2 %>% mutate(capacity_predicted = predict(model)) %>% # use the predicted data for the capacity
ggplot(aes(x = dateTime, y = capacity_predicted)) +
geom_ribbon(aes(ymax = capacity_predicted, ymin = 0, fill = type, group = type)) +
geom_line(aes( color = type, linetype = type)) +
scale_color_manual(values = c(col_fill, col_fill)) +
scale_fill_manual(values = c(col_fill, col_fill2)) +
theme_minimal() +
theme(legend.position = "none")
plot
Please tell me if it works (I don't have the original data to test it), and if you would like a version without tidyverse functions.
EDIT:
Not very clean, but a smoother curve can be obtained with this code:
df3 <- data.frame(index = seq(min(df2$index), max(df2$index), length.out = 300),
type = "historic", stringsAsFactors = F)
modelling_date_index <- 1512562500
df3$type[df3$index <= modelling_date_index] = "predict"
plot <- df3 %>% mutate(capacity_predicted = predict(model, newdata = index),
dateTime = as.POSIXct(index, origin = '1970-01-01')) %>%
# arrange(dateTime) %>%
ggplot(aes(x = dateTime, y = capacity_predicted)) +
geom_ribbon(aes(ymax = capacity_predicted, ymin = 0, fill = type, group =
type)) +
geom_line(aes( color = type, linetype = type)) +
scale_color_manual(values = c(col_fill, col_fill)) +
scale_fill_manual(values = c(col_fill, col_fill2)) +
theme_minimal()+
theme(legend.position = "none")
plot
When using ggplot2 to make faceted plots, I'm having trouble getting individual labels in each facet when I also specify a grouping parameter. Without specifying group = ..., things work fine, but I'm trying to make plots of paired data that emphasize the before vs. after treatment changes.
Here is an example:
library(tidyr)
library(ggplot2)
set.seed(253)
data <- data.frame(Subject = LETTERS[1:10],
Day1.CompoundA = rnorm(10, 4, 2),
Day2.CompoundA = rnorm(10, 7, 2),
Day1.CompoundB = rnorm(10, 5, 2),
Day2.CompoundB = rnorm(10, 5.5, 2))
# Compare concentration of compounds by day
A <- t.test(data$Day1.CompoundA, data$Day2.CompoundA, paired = TRUE)
B <- t.test(data$Day1.CompoundB, data$Day2.CompoundB, paired = TRUE)
data.long <- gather(data, key = DayCompound, value = Concentration, -Subject) %>%
separate(DayCompound, c("Day", "Compound"))
# text to annotate graphs
graphLabels <- data.frame(Compound = c("CompoundA", "CompoundB"),
Pval = paste("p =", c(signif(A$p.value, 2),
signif(B$p.value, 2))))
Ok, now that the data are set up, I can make a boxplot just fine:
ggplot(data.long, aes(x = Day, y = Concentration)) +
geom_boxplot() +
facet_wrap(~ Compound) +
geom_text(data = graphLabels, aes(x = 1.5, y = 10, label = Pval))
But if I want to show line plots that emphasize the paired nature of the data by showing each subject in a different color, the facet labels don't work.
ggplot(data.long, aes(x = Day, y = Concentration, color = Subject, group = Subject)) +
geom_point() + geom_line() +
facet_wrap(~ Compound) +
geom_text(data = graphLabels, aes(x = 1.5, y = 10, label = Pval))
# Error in eval(expr, envir, enclos) : object 'Subject' not found
Any suggestions?
When you map aesthetics (i.e. aes(...,color = Subject)) in the top level ggplot() call, those mappings are passed on to each layer, which means that each layer expects data to have variables by those names.
You either need to specify the data and mapping separately in each layer, or unmap them explicitly:
ggplot(data.long, aes(x = Day, y = Concentration, color = Subject, group = Subject)) +
geom_point() + geom_line() +
facet_wrap(~ Compound) +
geom_text(data = graphLabels, aes(x = 1.5, y = 10, label = Pval,color = NULL,group= NULL))
There is also an inherit.aes argument that you can set to FALSE in any layer you don't want pulling in those other mappings, e.g.
ggplot(data.long, aes(x = Day, y = Concentration, color = Subject, group = Subject)) +
geom_point() + geom_line() +
facet_wrap(~ Compound) +
geom_text(data = graphLabels, aes(x = 1.5, y = 10, label = Pval),inherit.aes = FALSE)