Label ggplot with group names and their equation, possibly with ggpmisc? - r

I would like to label my plot, possibly using the equation method from ggpmisc to give an informative label that links to the colour and equation (then I can remove the legend altogether). For example, in the plot below, I would ideally have the factor levels of 4, 6 and 8 in the equation LHS.
library(tidyverse)
library(ggpmisc)
df_mtcars <- mtcars %>% mutate(factor_cyl = as.factor(cyl))
p <- ggplot(df_mtcars, aes(x = wt, y = mpg, group = factor_cyl, colour= factor_cyl))+
geom_smooth(method="lm")+
geom_point()+
stat_poly_eq(formula = my_formula,
label.x = "centre",
#eq.with.lhs = paste0(expression(y), "~`=`~"),
eq.with.lhs = paste0("Group~factor~level~here", "~Cylinders:", "~italic(hat(y))~`=`~"),
aes(label = paste(..eq.label.., sep = "~~~")),
parse = TRUE)
p
There is a workaround by modifying the plot afterwards using the technique described here, but surely there is something simpler?
p <- ggplot(df_mtcars, aes(x = wt, y = mpg, group = factor_cyl, colour= factor_cyl))+
geom_smooth(method="lm")+
geom_point()+
stat_poly_eq(formula = my_formula,
label.x = "centre",
eq.with.lhs = paste0(expression(y), "~`=`~"),
#eq.with.lhs = paste0("Group~factor~level~here", "~Cylinders:", "~italic(hat(y))~`=`~"),
aes(label = paste(..eq.label.., sep = "~~~")),
parse = TRUE)
p
# Modification of equation LHS technique from:
# https://stackoverflow.com/questions/56376072/convert-gtable-into-ggplot-in-r-ggplot2
temp <- ggplot_build(p)
temp$data[[3]]$label <- temp$data[[3]]$label %>%
fct_relabel(~ str_replace(.x, "y", paste0(c("8","6","4"),"~cylinder:", "~~italic(hat(y))" )))
class(temp)
#convert back to ggplot object
#https://stackoverflow.com/questions/56376072/convert-gtable-into-ggplot-in-r-ggplot2
#install.packages("ggplotify")
library("ggplotify")
q <- as.ggplot(ggplot_gtable(temp))
class(q)
q

This first example puts the label to the right of the equation, and is partly manual. On the other hand it is very simple to code. Why this works is because group is always present in the data as seen by layer functions (statistics and geoms).
library(tidyverse)
library(ggpmisc)
df_mtcars <- mtcars %>% mutate(factor_cyl = as.factor(cyl))
my_formula <- y ~ x
p <- ggplot(df_mtcars, aes(x = wt, y = mpg, group = factor_cyl, colour = factor_cyl)) +
geom_smooth(method="lm")+
geom_point()+
stat_poly_eq(formula = my_formula,
label.x = "centre",
eq.with.lhs = "italic(hat(y))~`=`~",
aes(label = paste(stat(eq.label), "*\", \"*",
c("4", "6", "8")[stat(group)],
"~cylinders.", sep = "")),
label.x.npc = "right",
parse = TRUE) +
scale_colour_discrete(guide = FALSE)
p
In fact with a little bit of additional juggling one can achieve almost an answer to the question. We need to add the lhs by pasting it explicitly in aes() so that we can add also paste text to its left based on a computed variable.
library(tidyverse)
library(ggpmisc)
df_mtcars <- mtcars %>% mutate(factor_cyl = as.factor(cyl))
my_formula <- y ~ x
p <- ggplot(df_mtcars, aes(x = wt, y = mpg, group = factor_cyl, colour = factor_cyl)) +
geom_smooth(method="lm")+
geom_point()+
stat_poly_eq(formula = my_formula,
label.x = "centre",
eq.with.lhs = "",
aes(label = paste("bold(\"", c("4", "6", "8")[stat(group)],
" cylinders: \")*",
"italic(hat(y))~`=`~",
stat(eq.label),
sep = "")),
label.x.npc = "right",
parse = TRUE) +
scale_colour_discrete(guide = FALSE)
p

What about a manual solution where you can add your equation as geom_text ?
Pros: Highly customization / Cons: Need to be manually edited based on your equation
Here, using your example and the linear regression:
library(tidyverse)
df_label <- df_mtcars %>% group_by(factor_cyl) %>%
summarise(Inter = lm(mpg~wt)$coefficients[1],
Coeff = lm(mpg~wt)$coefficients[2]) %>% ungroup() %>%
mutate(ypos = max(df_mtcars$mpg)*(1-0.05*row_number())) %>%
mutate(Label2 = paste(factor_cyl,"~Cylinders:~", "italic(y)==",round(Inter,2),ifelse(Coeff <0,"-","+"),round(abs(Coeff),2),"~italic(x)",sep =""))
# A tibble: 3 x 5
factor_cyl Inter Coeff ypos Label2
<fct> <dbl> <dbl> <dbl> <chr>
1 4 39.6 -5.65 32.2 4~Cylinders:~italic(y)==39.57-5.65~italic(x)
2 6 28.4 -2.78 30.5 6~Cylinders:~italic(y)==28.41-2.78~italic(x)
3 8 23.9 -2.19 28.8 8~Cylinders:~italic(y)==23.87-2.19~italic(x)
Now, you can pass it in ggplot2:
ggplot(df_mtcars,aes(x = wt, y = mpg, group = factor_cyl, colour= factor_cyl))+
geom_smooth(method="lm")+
geom_point()+
geom_text(data = df_label,
aes(x = 2.5, y = ypos,
label = Label2, color = factor_cyl),
hjust = 0, show.legend = FALSE, parse = TRUE)

An alternative to labelling with the equation is to label with the fitted line. Here is an approach adapted from an answer on a related question here
#example of loess for multiple models
#https://stackoverflow.com/a/55127487/4927395
library(tidyverse)
library(ggpmisc)
df_mtcars <- mtcars %>% mutate(cyl = as.factor(cyl))
models <- df_mtcars %>%
tidyr::nest(-cyl) %>%
dplyr::mutate(
# Perform loess calculation on each CpG group
m = purrr::map(data, lm,
formula = mpg ~ wt),
# Retrieve the fitted values from each model
fitted = purrr::map(m, `[[`, "fitted.values")
)
# Apply fitted y's as a new column
results <- models %>%
dplyr::select(-m) %>%
tidyr::unnest()
#find final x values for each group
my_last_points <- results %>% group_by(cyl) %>% summarise(wt = max(wt, na.rm=TRUE))
#Join dataframe of predictions to group labels
my_last_points$pred_y <- left_join(my_last_points, results)
# Plot with loess line for each group
ggplot(results, aes(x = wt, y = mpg, group = cyl, colour = cyl)) +
geom_point(size=1) +
geom_smooth(method="lm",se=FALSE)+
geom_text(data = my_last_points, aes(x=wt+0.4, y=pred_y$fitted, label = paste0(cyl," Cylinders")))+
theme(legend.position = "none")+
stat_poly_eq(formula = "y~x",
label.x = "centre",
eq.with.lhs = paste0(expression(y), "~`=`~"),
aes(label = paste(..eq.label.., sep = "~~~")),
parse = TRUE)

Related

How to plot a single regression line but colour points by a different factor in ggplot2 R?

The scatterplot is colour-coded by factor z. By default, ggplot2 also pots the regression lines by factor. I want to plot a single regression line passing through the data. How do I achiece this?
x <- c(1:50)
y <- rnorm(50,4,1)
z <- rep(c("P1", "P2"), each = 25)
df <- data.frame(x,y,z)
my.formula = y ~ x
ggplot(aes(x = x, y = y, color = z), data = df) +
geom_point() + scale_fill_manual(values=c("purple", "blue")) +
geom_smooth(method="lm", formula = y ~ x ) +
stat_poly_eq(formula = my.formula, aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), parse = TRUE, size = 2.5, col = "black")+
theme_classic()
If I undertand you correctly, you can assign group = 1 in the aes to plot just one regression line. You can use the following code:
library(tidyverse)
library(ggpmisc)
my.formula = y ~ x
ggplot(aes(x = x, y = y, color = z, group = 1), data = df) +
geom_point() + scale_fill_manual(values=c("purple", "blue")) +
geom_smooth(method="lm", formula = y ~ x ) +
stat_poly_eq(formula = my.formula, aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), parse = TRUE, size = 2.5, col = "black")+
theme_classic()
Output:

How to adjust the position of regression equation on ggplot?

I would like to add the regression line and R^2 to my ggplot. I am fitting the regression line to different categories and for each category I am getting a unique equation. I'd like to set the position of equations for each category manually. i.e. Finding the max expression of y for each group and printing the equation at ymax + 1.
Here is my code:
library(ggpmisc)
df <- data.frame(x = c(1:100))
df$y <- 20 * c(0, 1) + 3 * df$x + rnorm(100, sd = 40)
df$group <- factor(rep(c("A", "B"), 50))
df <- df %>% group_by(group) %>% mutate(ymax = max(y))
my.formula <- y ~ x
df %>%
group_by(group) %>%
do(tidy(lm(y ~ x, data = .)))
p <- ggplot(data = df, aes(x = x, y = y, colour = group)) +
geom_smooth(method = "lm", se=FALSE, formula = my.formula) +
stat_poly_eq(formula = my.formula,
aes(x = x , y = ymax + 1, label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
parse = TRUE) +
geom_point()
p
Any suggestion how to do this?
Also is there any way I can only print the slope of the equation. (remove the intercept from plot)?
Thanks,
I'm pretty sure that setting adjusting stat_poly_eq() with the geom argument will get what you want. Doing so will center the equations, leaving the left half of each clipped, so we use hjust = 0 to left-adjust the equations. Finally, depending on your specific data, the equations may be overlapping each other, so we use the position argument to have ggplot attempt to separate them.
This adjusted call should get you started, I hope:
p <- ggplot(data = df, aes(x = x, y = y, colour = group)) +
geom_smooth(method = "lm", se=FALSE, formula = my.formula) +
stat_poly_eq(
formula = my.formula,
geom = "text", # or 'label'
hjust = 0, # left-adjust equations
position = position_dodge(), # in case equations now overlap
aes(x = x , y = ymax + 1, label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
parse = TRUE) +
geom_point()
p

Using formulae on facet_wrap in ggplot2

I'm trying to replace the facet_wrap titles on a ggplot bar plot with expressions, but I'm having no luck. I've tried here and here but neither seem to be working for me.
The whole dataset is quite large, so here's some dummy data to illustrate the problem.
library(tidyr)
library(ggplot2)
data<-data.frame(species = rep(c("oak", "elm", "ash"), each = 5),
resp_1 = (runif(15, 1,100)),
resp_2 = (runif(15, 1,100)),
resp_3 = (runif(15, 1,100)),
resp_4 = (runif(15, 1,100)),
resp_5 = (runif(15, 1,100)))
### transform to longform with tidyr
data_2 <- gather(data, response, result, resp_1:resp_5, factor_key=TRUE)
### plot with ggplot2
ggplot(data_2, aes(x = species, y = result, fill = species))+
geom_bar(stat = 'sum')+
facet_wrap(~ response)
### here are the labels I'd like to see on the facets
oxygen <-expression ("Oxygen production (kg/yr)")
runoff <-expression("Avoided runoff " ~ (m ^{3} /yr))
co <- expression("CO removal (g/yr)")
o3 <- expression("O"[3]~" removal (g/yr)")
no2 <- expression("NO"[2]~" removal (g/yr)")
labels <- c(oxygen, runoff, co, o3, no2)
### this doesn't work
ggplot(data_2, aes(x = species, y = result, fill = species))+
geom_bar(stat = 'sum')+
facet_wrap(~ response, labeller = labeller(response = labels))
### close, but doesn't work
levels(data_2$response)<-labels
ggplot(data_2, aes(x = species, y = result, fill = species))+
geom_bar(stat = 'sum')+
facet_wrap(~ response, labeller = labeller(response = labels))
### produces an error
ggplot(data_2, aes(x = species, y = result, fill = species))+
geom_bar(stat = 'sum')+
facet_wrap(~ response, labeller = label_parsed)
I'd also like to get rid of the second legend in grey titled "n".
Right now your expression names don't match up to the values used as the facets. So I'd recommend storing your labels in an expression
labels <- expression(
resp_1 = "Oxygen production (kg/yr)",
resp_2 = "Avoided runoff " ~ (m ^{3} /yr),
resp_3 = "CO removal (g/yr)",
resp_4 = "O"[3]~" removal (g/yr)",
resp_5 = "NO"[2]~" removal (g/yr)"
)
And then you can write your own labeler function to extract the correct value
ggplot(data_2, aes(x = species, y = result, fill = species))+
geom_bar(stat = 'sum', show.legend = c(size=FALSE))+
facet_wrap(~ response, labeller = function(x) {
list(as.list(labels)[x$response])
})
We've also used show.legend = c(size=FALSE) to turn off the n legend
Use as_labeller and label_parsed. Ref
library(tidyr)
library(ggplot2)
data <- data.frame(species = rep(c("oak", "elm", "ash"), each = 5),
resp_1 = (runif(15, 1, 100)),
resp_2 = (runif(15, 1, 100)),
resp_3 = (runif(15, 1, 100)),
resp_4 = (runif(15, 1, 100)),
resp_5 = (runif(15, 1, 100)))
data_2 <- gather(data, response, result, resp_1:resp_5, factor_key = TRUE)
# setup the labels
reponse_names <- c(
`resp_1` = "Oxygen~production~(kg*yr^{-1})",
`resp_2` = "Avoided~runoff~(m^{3}*yr^{-1})",
`resp_3` = "CO~removal~(g*yr^{-1})",
`resp_4` = "O[3]~removal~(g*yr^{-1})",
`resp_5` = "NO[2]~removal~(g*yr^{-1})"
)
# plot
ggplot(data_2, aes(x = species, y = result, fill = species))+
geom_bar(stat = 'sum')+
facet_wrap(
~ response,
labeller = labeller(response = as_labeller(reponse_names, label_parsed))
) +
guides(size = "none")
Created on 2021-04-30 by the reprex package (v2.0.0)

How can I change the colour of the column header in geom_table() in R?

I'm using the geom_table() function from the ggpmisc package to add a table legend to my figure. I want to remove the grey colour from the first row with the column headers.
library(ggpmisc)
library(tidyverse)
mtcars %>%
group_by(cyl) %>%
summarize(wt = mean(wt), mpg = mean(mpg)) %>%
ungroup() %>%
mutate(wt = sprintf("%.2f", wt),
mpg = sprintf("%.1f", mpg)) -> tb
df <- tibble(x = 5.45, y = 34, tb = list(tb))
ggplot(mtcars, aes(wt, mpg, colour = factor(cyl))) +
geom_point() +
geom_table(data = df, aes(x = x, y = y, label = tb),
table.theme = ttheme_gtbw)
You can set the theme using arguments that are passed from ggpmisc to the corresponding ttheme function from gridExtra (Description of some of the possible options). If I understand your question correctly you want the background of the first row in your table to be white. You can achieve this using the following code to build your plot:
ggplot(mtcars, aes(wt, mpg, colour = factor(cyl))) +
geom_point() +
geom_table(data = df, aes(x = x, y = y, label = tb),
table.theme = ttheme_gtbw(colhead = list(bg_params = list(fill = "white"))))

How to display different y labels in the equations using stat_poly_eq of ggpmisc

I'm trying to display the equations on the plot using the stat_poly_eq function of ggpmisc.
My problem is how to change the y= ... in the equation, by y1=... and y2=... by referring to the key argument.
I tried to add the eq.with.lhs argument in the mapping but it does not recognize the argument.
I tried to pass a vector to the eq.with.lhs argument but it overlapped both elements in each equation...
Do you have a better idea?
In the last case, I could use geom_text after calculating the equation coefficients myself, but it seemed to be a less efficient way to solve the problem.
Here is a reprex of my problem.
data <- data.frame(x = rnorm(20)) %>%
mutate(y1 = 1.2*x + rnorm(20, sd=0.2),
y2 = 0.9*x + rnorm(20, sd=0.3)) %>%
gather(value = value, key = key, -x)
ggplot(data, aes(x = x, y = value)) +
geom_point(aes(shape = key, colour = key)) +
stat_poly_eq(aes(label = ..eq.label.., colour = key),
formula = y ~ poly(x, 1, raw = TRUE),
eq.x.rhs = "x",
# eq.with.lhs = c(paste0(expression(y[1]), "~`=`~"),
# paste0(expression(y[2]), "~`=`~")),
eq.with.lhs = paste0(expression(y[ind]), "~`=`~"),
parse = TRUE) +
ylab(NULL)
I'm not really sure if it's possible to do it through ggpmisc, but you can change the data once the plot is built, like so:
library(tidyverse)
library(ggpmisc)
data <- data.frame(x = rnorm(20)) %>%
mutate(y1 = 1.2*x + rnorm(20, sd=0.2),
y2 = 0.9*x + rnorm(20, sd=0.3)) %>%
gather(value = value, key = key, -x)
p <- ggplot(data, aes(x = x, y = value)) +
geom_point(aes(shape = key, colour = key)) +
stat_poly_eq(aes(label = ..eq.label.., colour = key),
formula = y ~ poly(x, 1, raw = TRUE),
eq.x.rhs = "x",
eq.with.lhs = paste0(expression(y), "~`=`~"),
parse = TRUE) +
ylab(NULL)
temp <- ggplot_build(p)
temp$data[[2]]$label <- temp$data[[2]]$label %>%
fct_relabel(~ str_replace(.x, "y", paste0("y[", 1:2, "]")))
grid::grid.newpage()
grid::grid.draw(ggplot_gtable(temp))

Resources