Create dynamic labels for geom_smooth lines - r

I have a changing df and I am grouping different values c.
With ggplot2 I plot them with the following code to get a scatterplott with multiple linear regression lines (geom_smooth)
ggplot(aes(x = a, y = b, group = c)) +
geom_point(shape = 1, aes(color = c), alpha = alpha) +
geom_smooth(method = "lm", aes(group = c, color = c), se = F)
Now I want to display on each geom_smooth line in the plot a label with the value of the group c.
This has to be dynamic, because I can not write new code when my df changes.
Example: my df looks like this
a b c
----------------
1.6 24 100
-1.4 43 50
1 28 100
4.3 11 50
-3.45 5.2 50
So in this case I would get 3 geom_smooth lines in the plot with different colors.
Now I simply want to add a text label to the plot with "100" next to the geom_smooth with the group c = 100 and a text label with "50"to the line for the group c = 50, and so on... as new groups get introduced in the df, new geom_smooth lines are plotted and need to be labeled.
the whole code for the plot:
ggplot(aes(x = a, y = b, group = c), data = df, na.rm = TRUE) +
geom_point(aes(color = GG, size = factor(c)), alpha=0.3) +
scale_x_continuous(limits = c(-200,2300))+
scale_y_continuous(limits = c(-1.8,1.5))+
geom_hline(yintercept=0, size=0.4, color="black") +
scale_color_distiller(palette="YlGnBu", na.value="white") +
geom_smooth(method = "lm", aes(group = factor(GG), color = GG), se = F) +
geom_label_repel(data = labelInfo, aes(x= max, y = predAtMax, label = label, color = label))

You can probably do it if you pick the location you want the lines labelled. Below, I set them to label at the far right end of each line, and used ggrepel to avoid overlapping labels:
library(ggplot2)
library(ggrepel)
library(dplyr)
set.seed(12345)
df <-
data.frame(
a = rnorm(100,2,0.5)
, b = rnorm(100, 20, 5)
, c = factor(sample(c(50,100,150), 100, TRUE))
)
labelInfo <-
split(df, df$c) %>%
lapply(function(x){
data.frame(
predAtMax = lm(b~a, data=x) %>%
predict(newdata = data.frame(a = max(x$a)))
, max = max(x$a)
)}) %>%
bind_rows
labelInfo$label = levels(df$c)
ggplot(
df
, aes(x = a, y = b, color = c)
) +
geom_point(shape = 1) +
geom_smooth(method = "lm", se = F) +
geom_label_repel(data = labelInfo
, aes(x= max
, y = predAtMax
, label = label
, color = label))

This method might work for you. It uses ggplot_build to access the rightmost point in the actual geom_smooth lines to add a label by it. Below is an adaptation that uses Mark Peterson's example.
library(ggplot2)
library(ggrepel)
library(dplyr)
set.seed(12345)
df <-
data.frame(
a = rnorm(100,2,0.5)
, b = rnorm(100, 20, 5)
, c = factor(sample(c(50,100,150), 100, TRUE))
)
p <-
ggplot(df, aes(x = a, y = b, color = c)) +
geom_point(shape = 1) +
geom_smooth(method = "lm", se = F)
p.smoothedmaxes <-
ggplot_build(p)$data[[2]] %>%
group_by( group) %>%
filter( x == max(x))
p +
geom_text_repel( data = p.smoothedmaxes,
mapping = aes(x = x, y = y, label = round(y,2)),
col = p.smoothedmaxes$colour,
inherit.aes = FALSE)

This came up for me today and I landed on this solution with data = ~fn()
library(tidyverse)
library(broom)
mpg |>
ggplot(aes(x = displ, y = hwy, colour = class, label = class)) +
geom_count(alpha = 0.1) +
stat_smooth(alpha = 0.6, method = lm, geom = "line", se = FALSE) +
geom_text(
aes(y = .fitted), size = 3, hjust = 0, nudge_x = 0.1,
data = ~{
nest_by(.x, class) |>
summarize(broom::augment(lm(hwy ~ displ, data = data))) |>
slice_max(order_by = displ, n = 1)
}
) +
scale_x_continuous(expand = expansion(add = c(0, 1))) +
theme_minimal()
Or do it with a function
#' #examples
#' last_lm_points(df = mpg, formula = hwy~displ, group = class)
last_lm_points <- function(df, formula, group) {
# df <- mpg; formula <- as.formula(hwy~displ); group <- sym("class");
x_arg <- formula[[3]]
df |>
nest_by({{group}}) |>
summarize(broom::augment(lm(formula, data = data))) |>
slice_max(order_by = get(x_arg), n = 1)
}
mpg |>
ggplot(aes(displ, hwy, colour = class, label = class)) +
geom_count(alpha = 0.1) +
stat_smooth(alpha = 0.6, method = lm, geom = "line", se = FALSE) +
geom_text(
aes(y = .fitted), size = 3, hjust = 0, nudge_x = 0.1,
data = ~last_lm_points(.x, hwy~displ, class)
) +
scale_x_continuous(expand = expansion(add = c(0, 1))) +
theme_minimal()

Related

How to manually change line size and alpha values for ggplot2 lines (separated by factor)?

I want to create a graph where I can change the line size for each line c(1,2,3) and the alpha values for each line c(0.5,0.6,0.7). I tried to use scale_size_manual but it didn't make any difference. Any ideas on how to proceed?
var <- c("T","T","T","M","M","M","A","A","A")
val <- rnorm(12,4,5)
x <- c(1:12)
df <- data.frame(var,val,x)
ggplot(aes(x= x , y = val, color = var, group = var), data = df) +
scale_color_manual(values = c("grey","blue","black")) + geom_smooth(aes(x = x, y = val), formula = "y ~ x", method = "loess",se = FALSE, size = 1) + scale_x_continuous(breaks=seq(1, 12, 1), limits=c(1, 12)) + scale_size_manual(values = c(1,2,3))
To set the size and alpha values for your lines you have to map on aesthetics. Otherwise scale_size_manual will have no effect:
library(ggplot2)
ggplot(aes(x = x, y = val, color = var, group = var), data = df) +
scale_color_manual(values = c("grey", "blue", "black")) +
geom_smooth(aes(x = x, y = val, size = var, alpha = var), formula = "y ~ x", method = "loess", se = FALSE) +
scale_x_continuous(breaks = seq(1, 12, 1), limits = c(1, 12)) +
scale_size_manual(values = c(1, 2, 3)) +
scale_alpha_manual(values = c(.5, .6, .7))

How do I place insets at exact positions on a ggplot and set the colors of geom_segment?

I'm creating an illustration of how loess works. My two queries are at the end of this question. First, setup:
library(tidyverse)
data(melanoma, package = "lattice")
mela <- as_tibble(melanoma)
tric = function(x) if_else(abs(x) < 1, (1 - abs(x)^3)^3, 0)
scl = function(x) (x - min(x))/(max(x) - min(x))
mela1 <- mela %>%
slice(1:9) %>%
mutate(dist = abs(year - year[5]),
scaled = scl(dist),
weight = tric(scaled)
)
mod1 <- lm(incidence ~ year, data = mela1, weights = weight) %>%
augment(., mela1)
mela2 <- mela %>%
slice(10:18) %>%
mutate(dist = abs(year - year[5]),
scaled = scl(dist),
weight = tric(scaled)
)
mod2 <- lm(incidence ~ year, data = mela2, weights = weight) %>%
augment(., mela2)
mela3 <- mela %>%
slice(19:27) %>%
mutate(dist = abs(year - year[5]),
scaled = scl(dist),
weight = tric(scaled)
)
mod3 <- lm(incidence ~ year, data = mela3, weights = weight) %>%
augment(., mela3)
mela4 <- mela %>%
slice(28:37) %>%
mutate(dist = abs(year - year[5]),
scaled = scl(dist),
weight = tric(scaled)
)
mod4 <- lm(incidence ~ year, data = mela4, weights = weight) %>%
augment(., mela4)
The main plot:
col <- rainbow_hcl(start = 12, 4, l = 20)
colB <- rainbow_hcl(start = 12, 4, l = 100)
main <- ggplot(data = mela, aes(x = year, y = incidence)) +
# segment 1
geom_segment(
aes(x = 1936, xend = 1944, y = 2.115717, yend = 2.115717)) +
# segment 2
geom_segment(
aes(x = 1945, xend = 1953, y = 3.473217, yend = 3.473217)) +
# segment 3
geom_segment(
aes(x = 1954, xend = 1962, y = 1.170247, yend = 1.170247)) +
# segment 4
geom_segment(
aes(x = 1963, xend = 1972, y = 2.7, yend = 2.7)) +
geom_point(data = mod1, color = col[1], shape = 1) +
geom_point(data = mod2, color = col[2], shape = 0) +
geom_point(data = mod3, color = col[4], shape = 5) +
geom_point(data = mod4, color = col[3], shape = 2) +
geom_line(data = mod1, aes(x = year, y = .fitted), color = col[1]) +
geom_line(data = mod2, aes(x = year, y = .fitted), color = col[2]) +
geom_line(data = mod3, aes(x = year, y = .fitted), color = col[4]) +
geom_line(data = mod4, aes(x = year, y = .fitted), color = col[3]) +
scale_x_continuous(breaks = c(1940, 1949, 1958, 1967))
Insets
inset1 <- ggplot(data = mod1, aes(x = year, y = weight)) +
geom_line(color = col[1]) +
geom_area(fill = colB[1]) +
theme_void()
inset2 <- ggplot(data = mod2, aes(x = year, y = weight)) +
geom_line(color = col[12) +
geom_area(fill = colB[2]) +
theme_void()
inset3 <- ggplot(data = mod3, aes(x = year, y = weight)) +
geom_line(color = col[3]) +
geom_area(fill = colB[3]) +
theme_void()
inset4 <- ggplot(data = mod4, aes(x = year, y = weight)) +
geom_line(color = col[4]) +
geom_area(fill = colB[4]) +
theme_void()
Question 1: How do I place the four insets so that the y = 0 of the weight function is at the height of the corresponding geom_segment? I would like the inset heights = 2 in the main figure coordinates.
Question 2: How do I set the color of each segment to the color of the corresponding inset?
Not sure whether I got everything right. But I tried my best. (; You could simplify your code considerably
... by binding you models data into one dataframe and also the data for the segments.
... mapping on aesthetics and setting the colors and shape via some named vectors and scale_xxx_manual
For your insets there is no need to make separate plots and trying to put them into the main plot. You could simply add them via an additional geom_line and a geom_ribbon. To get the heights of the segments join the segments data to the models data so that you can set the starting value for the geom_ribbon according to the y value of the segment
library(tidyverse)
library(broom)
library(colorspace)
col <- setNames(col, c("mod1", "mod2", "mod4", "mod3"))
colB <- setNames(colB, c("mod1", "mod2", "mod4", "mod3"))
shapes <- setNames(c(1, 0, 5, 2), c("mod1", "mod2", "mod3", "mod4"))
mods <- list(mod1 = mod1, mod2 = mod2, mod3 = mod3, mod4 = mod4) %>%
bind_rows(.id = "mod")
# segments data
dseg <- tribble(
~mod, ~x, ~xend, ~y,
"mod1", 1936, 1944, 2.115717,
"mod2", 1945, 1953, 3.473217,
"mod3", 1954, 1962, 1.170247,
"mod4", 1963, 1972, 2.7,
)
main <- ggplot(data = mela, aes(x = year, y = incidence)) +
geom_segment(data = dseg, aes(x = x, xend = xend, y = y, yend = y, color = mod)) +
geom_point(data = mods, aes(color = mod, shape = mod)) +
geom_line(data = mods, aes(x = year, y = .fitted, color = mod)) +
scale_color_manual(values = col) +
scale_shape_manual(values = shapes) +
scale_x_continuous(breaks = c(1940, 1949, 1958, 1967)) +
guides(color = FALSE, shape = FALSE, fill = FALSE)
mods1 <- left_join(mods, select(dseg, mod, y), by = "mod")
# Add insets
main +
geom_line(data = mods1, aes(x = year, y = weight + y, color = mod, group = mod)) +
geom_ribbon(data = mods1, aes(x = year, ymin = y, ymax = weight + y, fill = mod, group = mod)) +
scale_fill_manual(values = colB)

Is there a way to label each cluster generated by lda()

using lda() and ggplot2 I can make a canonical plot with confidence ellipses. Is there a way to add labels for each group on the plot (labeling each cluster with a group from figure legend)?
# for the universality lda(Species~., data=iris) would be analogous
m.lda <- lda(Diet ~ ., data = b)
m.sub <- b %>% dplyr::select(-Diet) %>% as.matrix
CVA.scores <- m.sub %*% m.lda$scaling
m.CV <- data.frame(CVA.scores)
m.CV$Diet <- b$Diet
m.cva.plot <-
ggplot(m.CV, aes(x = LD1, y = LD2)) +
geom_point(aes(color=Diet), alpha=0.5) +
labs(x = "CV1", y = "CV2") +
coord_fixed(ratio=1)
chi2 = qchisq(0.05,2, lower.tail=FALSE)
CIregions.mean.and.pop <-
m.CV %>%
group_by(Diet) %>%
summarize(CV1.mean = mean(LD1),
CV2.mean = mean(LD2),
mean.radii = sqrt(chi2/n()),
popn.radii = sqrt(chi2))
m.cva.plot2 <-
m.cva.plot +
geom_circle(data = CIregions.mean.and.pop,
mapping = aes(x0 = CV1.mean, y0 = CV2.mean, r = mean.radii),
inherit.aes = FALSE) +
geom_circle(data = CIregions.mean.and.pop,
mapping = aes(x0 = CV1.mean, y0 = CV2.mean, r = popn.radii),
linetype = "dashed",
inherit.aes = FALSE)
The labels can be placed with either geom_text or geom_label. In the case below I will use geom_label, with the y coordinate adjusted by adding popn.radii the radii of the outer circles.
The code in the question is adapted to use built-in data set iris, like the question itself says.
m.cva.plot2 +
geom_label(data = CIregions.mean.and.pop,
mapping = aes(x = CV1.mean,
y = CV2.mean + popn.radii,
label = Species),
label.padding = unit(0.20, "lines"),
label.size = 0)
Reproducible code
library(dplyr)
library(ggplot2)
library(ggforce)
library(MASS)
b <- iris
m.lda <- lda(Species~., data=iris) #would be analogous
#m.lda <- lda(Diet ~ ., data = b)
m.sub <- b %>% dplyr::select(-Species) %>% as.matrix
CVA.scores <- m.sub %*% m.lda$scaling
m.CV <- data.frame(CVA.scores)
m.CV$Species <- b$Species
m.cva.plot <-
ggplot(m.CV, aes(x = LD1, y = LD2)) +
geom_point(aes(color=Species), alpha=0.5) +
labs(x = "CV1", y = "CV2") +
coord_fixed(ratio=1)
chi2 = qchisq(0.05,2, lower.tail=FALSE)
CIregions.mean.and.pop <-
m.CV %>%
group_by(Species) %>%
summarize(CV1.mean = mean(LD1),
CV2.mean = mean(LD2),
mean.radii = sqrt(chi2/n()),
popn.radii = sqrt(chi2))
m.cva.plot2 <-
m.cva.plot +
geom_circle(data = CIregions.mean.and.pop,
mapping = aes(x0 = CV1.mean, y0 = CV2.mean, r = mean.radii),
inherit.aes = FALSE) +
geom_circle(data = CIregions.mean.and.pop,
mapping = aes(x0 = CV1.mean, y0 = CV2.mean, r = popn.radii),
linetype = "dashed",
inherit.aes = FALSE)

Side by side geom_boxplot for the high and low values of multiple variables in R?

I am trying to create boxplot that would compare ob vs A and B at multiple location (I.e., Start, Mid, End) for Top and Low values (10% in this case). I am trying to use the gather, facet_wrap, grid.arrange, ggplot functionality in R but can not put things together. here is my code so far- I would appreciate help moving forward.
library(tidyverse)
library(gridExtra)
DF_1 = data.frame(Ob = runif(100, 10,80), A = runif(100, 5, 90), B = runif(100, 3,85), loc = rep("Start",100))
DF_2 = data.frame(Ob = runif(100, 10,80), A = runif(100, 5, 90), B = runif(100, 3,85), loc = rep("Mid",100))
DF_3 = data.frame(Ob = runif(100, 10,80), A = runif(100, 5, 90), B = runif(100, 3,85), loc = rep("End",100))
DF_1_Top = DF_1[order(DF_1$Ob,decreasing = TRUE),][1:10,]
DF_1_Low = DF_1[order(DF_1$Ob,decreasing = FALSE),][1:10,]
DF_2_Top = DF_2[order(DF_2$Ob,decreasing = TRUE),][1:10,]
DF_2_Low = DF_2[order(DF_2$Ob,decreasing = FALSE),][1:10,]
DF_3_Top = DF_1[order(DF_3$Ob,decreasing = TRUE),][1:10,]
DF_3_Low = DF_1[order(DF_3$Ob,decreasing = FALSE),][1:10,]
DF_Top = rbind(DF_1_Top, DF_2_Top, DF_3_Top)
DF_Low = rbind(DF_1_Low, DF_2_Low, DF_3_Low)
DF_T = gather(DF_Top, key = "Variable", value = "Value", - "loc")
DF_L = gather(DF_Low, key = "Variable", value = "Value", - "loc")
P1 = ggplot(DF_T, aes(x = Variable, y = Value))+
geom_boxplot()+facet_wrap(~loc, nrow = 1)
P2 = ggplot(DF_L, aes(x = Variable, y = Value))+
geom_boxplot()+facet_wrap(~loc, nrow = 1)
grid.arrange(P1,P2, nrow = 2)
Here is a manually drawn figure that i would like to achieve
You could stack all of your data into a single data frame and create a single graph. For example:
d = bind_rows(High=DF_Top, Low=DF_Low, .id='source') %>%
mutate(source=factor(source, levels=c("High","Low")))
d %>%
gather(key, value, Ob:B) %>%
mutate(key = fct_relevel(key, "Ob")) %>%
ggplot(aes(key, value)) +
geom_hline(yintercept=0) +
geom_boxplot() +
facet_grid(source ~ loc, switch="x") +
labs(x="", y="") +
scale_y_continuous(expand=expand_scale(mult=c(0.0, 0.02))) +
theme_classic() +
theme(strip.placement="outside",
strip.background.x=element_rect(colour=NA, fill=NA),
strip.text.x=element_text(size=11, face="bold"))
Responding to your comment, I'm not wild about moving the key labels to a legend, but...
d %>%
gather(key, value, Ob:B) %>%
mutate(key = fct_relevel(key, "Ob")) %>%
ggplot(aes(loc, value, colour=key)) +
geom_hline(yintercept=0) +
geom_boxplot() +
facet_grid(source ~ ., switch="x") +
labs(x="", y="", colour="") +
scale_y_continuous(expand=expand_scale(mult=c(0.0, 0.02))) +
theme_classic() +
theme(legend.position="bottom",
legend.box.margin=margin(t=-20))

how to display text on several lines with geom_text in faceting wrap

This is my df :
df <- data.frame(annee = rep(c(2003,2004), times = 1, each = 3), sps = c("a", "b", "c"), nb = 1:3)
I create a column containing my labels :
df$labels <- paste("nb", df$sps, "=", df$nb)
Then I do my plot :
ggplot(df, aes(nb)) +
geom_density(aes(fill = sps, colour = sps), alpha = 0.1) +
facet_wrap(~ annee) +
geom_text(data=df, aes(x=8, y=2.5, label= labels), colour="black", inherit.aes=FALSE, parse=FALSE)
But I have a problem with my text in each facet : I would like to have 3 lines (one for each sps).
I tried with the symbol "\n" but I failed in trying to obtain :
"nb a = 1 \n nb b = 2 \n nb c = 3" for each year
Thanks for help
You will have to concatenate what you want broken into several lines into one single string.
newdf <- aggregate(labels ~ annee, data = df, FUN = paste, collapse = "\n")
ggplot(df, aes(nb)) +
geom_density(aes(fill = sps, colour = sps), alpha = 0.1) +
facet_wrap(~ annee) +
geom_text(data = newdf, aes(x = 8, y = 2, label = labels), color = "black") +
scale_x_continuous(limits = c(0, 11)) +
scale_y_continuous(limits = c(0, 2.25))
You can achieve what you want by creating a separate data.frame for your labels:
library(tidyverse)
df <- data.frame(annee = rep(c(2003,2004),
times = 1, each = 3),
sps = c("a", "b", "c"),
nb = 1:3)
# create labels in separate data.frame
label_df <- df %>%
mutate(labels = paste("nb", sps, "=", nb)) %>%
group_by(annee) %>%
summarise(labels = paste(labels, collapse = "\n")) %>%
mutate(x = 6.5,
y = 2.2)
ggplot(df, aes(nb)) +
geom_density(aes(fill = sps, colour = sps), alpha = 0.1) +
facet_wrap(~annee) +
geom_text(data = label_df, aes(x = x, y = y, label = labels)) +
coord_cartesian(ylim = c(0, 2.4), xlim = c(1, 8))

Resources