Is there a way to label each cluster generated by lda() - r

using lda() and ggplot2 I can make a canonical plot with confidence ellipses. Is there a way to add labels for each group on the plot (labeling each cluster with a group from figure legend)?
# for the universality lda(Species~., data=iris) would be analogous
m.lda <- lda(Diet ~ ., data = b)
m.sub <- b %>% dplyr::select(-Diet) %>% as.matrix
CVA.scores <- m.sub %*% m.lda$scaling
m.CV <- data.frame(CVA.scores)
m.CV$Diet <- b$Diet
m.cva.plot <-
ggplot(m.CV, aes(x = LD1, y = LD2)) +
geom_point(aes(color=Diet), alpha=0.5) +
labs(x = "CV1", y = "CV2") +
coord_fixed(ratio=1)
chi2 = qchisq(0.05,2, lower.tail=FALSE)
CIregions.mean.and.pop <-
m.CV %>%
group_by(Diet) %>%
summarize(CV1.mean = mean(LD1),
CV2.mean = mean(LD2),
mean.radii = sqrt(chi2/n()),
popn.radii = sqrt(chi2))
m.cva.plot2 <-
m.cva.plot +
geom_circle(data = CIregions.mean.and.pop,
mapping = aes(x0 = CV1.mean, y0 = CV2.mean, r = mean.radii),
inherit.aes = FALSE) +
geom_circle(data = CIregions.mean.and.pop,
mapping = aes(x0 = CV1.mean, y0 = CV2.mean, r = popn.radii),
linetype = "dashed",
inherit.aes = FALSE)

The labels can be placed with either geom_text or geom_label. In the case below I will use geom_label, with the y coordinate adjusted by adding popn.radii the radii of the outer circles.
The code in the question is adapted to use built-in data set iris, like the question itself says.
m.cva.plot2 +
geom_label(data = CIregions.mean.and.pop,
mapping = aes(x = CV1.mean,
y = CV2.mean + popn.radii,
label = Species),
label.padding = unit(0.20, "lines"),
label.size = 0)
Reproducible code
library(dplyr)
library(ggplot2)
library(ggforce)
library(MASS)
b <- iris
m.lda <- lda(Species~., data=iris) #would be analogous
#m.lda <- lda(Diet ~ ., data = b)
m.sub <- b %>% dplyr::select(-Species) %>% as.matrix
CVA.scores <- m.sub %*% m.lda$scaling
m.CV <- data.frame(CVA.scores)
m.CV$Species <- b$Species
m.cva.plot <-
ggplot(m.CV, aes(x = LD1, y = LD2)) +
geom_point(aes(color=Species), alpha=0.5) +
labs(x = "CV1", y = "CV2") +
coord_fixed(ratio=1)
chi2 = qchisq(0.05,2, lower.tail=FALSE)
CIregions.mean.and.pop <-
m.CV %>%
group_by(Species) %>%
summarize(CV1.mean = mean(LD1),
CV2.mean = mean(LD2),
mean.radii = sqrt(chi2/n()),
popn.radii = sqrt(chi2))
m.cva.plot2 <-
m.cva.plot +
geom_circle(data = CIregions.mean.and.pop,
mapping = aes(x0 = CV1.mean, y0 = CV2.mean, r = mean.radii),
inherit.aes = FALSE) +
geom_circle(data = CIregions.mean.and.pop,
mapping = aes(x0 = CV1.mean, y0 = CV2.mean, r = popn.radii),
linetype = "dashed",
inherit.aes = FALSE)

Related

Problem with reference to variable in R function

I'd like to write a function to do ANOVA in batch, but there's a problem I can't solve. The problem is with the variable reference. The whole code is written correctly, because everything is calculated "on foot", but when I try to insert this code into the function, it doesn't work. Can someone take a look and point out where the problem is?
library(tidyverse)
library(ggpubr)
library(rstatix)
library(ggprism)
df <- data.frame(
stringsAsFactors = FALSE,
id = c(1,2,3,4,5,1,2,3,4,5,1,2,3,4,5,1,2,3,4,5),
treat = c("o","o","o","o","o","j","j","j","j","j","z","z","z","z","z","w","w","w","w","w"),
vo2 = c("47.48","42.74","45.23","51.65","49.11","51.00","43.82","49.88","54.61","52.20","51.31",
"47.56","50.69","54.88","55.01","51.89","46.10","50.98","53.62","52.77"))
df$vo2 <- as.numeric(df$vo2)
df$treat <- factor(df$treat)
Everything works fine in the code below...
# Summary
group_by(df, treat) %>%
summarise(
N = n(),
Mean = mean(vo2, na.rm = TRUE),
Sd = sd(vo2, na.rm = TRUE))
# ANOVA
res.aov <- anova_test(dv = vo2, wid = id, within = treat, data = df)
get_anova_table(res.aov, correction = c("auto"))
# Pairwise comparisons
pwc <- df %>%
pairwise_t_test(vo2 ~ treat, paired = TRUE, conf.level = 0.95,
detailed = TRUE, p.adjust.method = "bonferroni")
pwc
pwc <- pwc %>% add_xy_position(x = "treat")
ggplot(df, aes(x = treat, y = vo2)) +
stat_boxplot(aes(x = treat, y = vo2, color = treat), geom = 'errorbar', coef=1.5, width=0.4, linetype = 1) +
geom_boxplot(aes(x = treat, y = vo2, color = treat, fill = treat)) +
geom_jitter(aes(x = treat, y = vo2, color = treat, fill = treat), width = 0.2) +
stat_summary(fun = mean, geom = "point", shape = 0, size = 2, color = "black", stroke = 1) +
#xlab(deparse(substitute(x))) + ylab(deparse(substitute(y))) +
theme_prism(base_size = 14) + scale_x_discrete(guide = "prism_bracket") +
scale_fill_prism(palette = "floral") + scale_colour_prism(palette = "floral") +
scale_y_continuous(expand = expansion(mult = c(0.02, 0.05))) +
stat_pvalue_manual(pwc, tip.length = 0, hide.ns = TRUE) +
labs(
subtitle = get_test_label(res.aov, detailed = TRUE),
caption = get_pwc_label(pwc)) +
theme(legend.position = "NULL")
The result of this code is the chart below
But the function doesn't work...
The fix for "deparse(substitute(x)" only partially solved the problem, and I get a warning:
1: In mean.default(~"vo2", na.rm = TRUE): argument is not numeric or logical: returning an
2: In var(if (is.vector(x) || is.factor(x)) x else as.double(x), na.rm = na.rm) : NAs introduced by coercion.
Below is the full code for this function:
my_function <- function(df, x, y) {
x <- deparse(substitute(x))
y <- deparse(substitute(y))
formula <- as.formula(paste0(y, "~", x))
# Summary
a <- group_by(df, {{x}}) %>%
summarise(
N = n(),
Mean = mean({{y}}, na.rm = TRUE),
Sd = sd({{y}}, na.rm = TRUE)
)
# ANOVA
res.aov <<- anova_test(dv = {{y}}, wid = id, within = {{x}}, data = df)
b <- get_anova_table(res.aov, correction = c("auto"))
# Pairwise comparisons
pwc <- pairwise_t_test(data = df, formula, paired = TRUE, conf.level = 0.95,
detailed = TRUE, p.adjust.method = "bonferroni")
pwc2 <<- pwc %>% add_xy_position(x = {{x}})
# Plot
d <- ggplot(df, aes(x = {{x}}, y = {{y}})) +
stat_boxplot(aes(x = {{x}}, y = {{y}}, color = {{x}}), geom = 'errorbar', coef=1.5, width=0.4, linetype = 1) +
geom_boxplot(aes(x = {{x}}, y = {{y}}, color = {{x}}, fill = {{x}})) +
geom_jitter(aes(x = {{x}}, y = {{y}}, color = {{x}}, fill = {{x}}), width = 0.2) +
stat_summary(fun = mean, geom = "point", shape = 0, size = 2, color = "black", stroke = 1) +
stat_pvalue_manual(pwc2, tip.length = 0, hide.ns = TRUE) +
xlab(deparse(substitute(x))) + ylab(deparse(substitute(y))) +
scale_x_discrete(guide = "prism_bracket") +
theme_prism(base_size = 14) +
theme(legend.position = "NULL")
#ggsave(paste0(deparse(substitute(x)), "_",
# deparse(substitute(y)), ".png"), width=160, height=90, units="mm", dpi=600)
output <- list(a,b,pwc2,d)
return(output)
}
my_function(df, treat, vo2)
I have a huge request for tips on how to solve this.
The problem lies in the formula. Try adding the following code:
x1 <- deparse(substitute(x))
y1 <- deparse(substitute(y))
formula <- as.formula(paste0(y1, "~", x1))
# Pairwise comparisons
pwc <- pairwise_t_test(data=df, formula, paired = TRUE, conf.level = 0.95,
detailed = TRUE, p.adjust.method = "bonferroni")

Using a function within mapply to create and save scatterplots

I am trying to use mapply to automate saving scatterplots from ggplot to a folder.
To do this I have created lists of my x variable and y variable, as well as a list of the grouping variable I would like to colour my points by.
I then tried creating a function, and calling the function with mapply but the only output saved is a single blank image of the last variable in the list. Below is an example dataset.
df <- data.frame("ID" = 1:16)
df$VarA <- c(1,1,1,1,1,1,1,1,1,1,1,14,NA_real_,NA_real_,NA_real_,16)
df$VarB <- c(10,0,0,0,12,12,12,12,0,14,NA_real_,14,16,16,16,16)
df$VarC <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14,16)
df$VarD <- c(10,12,14,16,10,12,14,16,10,12,14,16,10,12,14,16)
df$ControlVarA <- factor(c("Group_1","Group_1","Group_1","Group_1","Group_1", "Group_1",
"Group_2","Group_2","Group_2","Group_2","Group_2","Group_2",
"Group_2","Group_2","Group_2","Group_2"))
df$ControlVarB <- factor(c("Group_1","Group_1","Group_1","Group_1","Group_1", "Group_1",
"Group_1","Group_1","Group_1","Group_1","Group_2","Group_2",
"Group_2","Group_2","Group_2","Group_2"))
df$ControlVarC <- factor(c("Group_2","Group_2","Group_2","Group_2","Group_1", "Group_1",
"Group_2","Group_2","Group_2","Group_2","Group_2","Group_2",
"Group_2","Group_2","Group_2","Group_2"))
Below is the code I used to call the lists for the x, y and colouring variable.
x_lists <- df %>% select(VarA:VarB) %>% colnames(.)
y_lists <- df %>% select(VarC:VarD) %>% colnames(.)
ControlVar_list <- df %>% select(contains("ControlVar")) %>% colnames(.)
Below is the function I have created and the mapply
save_plots <- function(dataset, x, y, z) {
first_plot <- ggplot(dataset) +
geom_point(data = subset(dataset, .data[[z]] == 'Group_1'),
aes(x = .data[[x]], y = .data[[y]], color = .data[[z]], size = 3)) +
geom_point(data = subset(dataset, .data[[z]] == 'Group_2'),
aes(x = .data[[x]], y = .data[[y]], color = .data[[z]], size = 3)) +
geom_smooth(aes(x = .data[[x]], y = .data[[y]], size = 0), method = "lm", colour="black", size=0.5) +
stat_cor(aes(x = .data[[x]], y = .data[[y]], color = .data[[z]],
label = ..rr.label..),
label.y.npc="top", label.x.npc = "left", method = "pearson",
size = 5) +
scale_color_manual(values = c("#C5BEC9", "#F2642b", "#F2642b")) +
labs(title = "test",
x = "VarA",
y = "VarB",
colour = "") +
guides(size = FALSE, color = FALSE) +
theme_bw(base_size = 14)
ggsave(sprintf("C:\\Documents\\%s.tiff", y), first_plot)
grDevices::dev.off()
}
mapply(save_plots, x_lists, y_lists, ControlVar_list, MoreArgs = list(dataset = df))
.data will not work with base R subset function. Try using dplyr::filter
library(tidyverse)
library(ggpubr)
save_plots <- function(dataset, x, y, z) {
first_plot <- ggplot(dataset) +
geom_point(data = filter(dataset, .data[[z]] == 'Group_1'),
aes(x = .data[[x]], y = .data[[y]], color = .data[[z]], size = 3)) +
geom_point(data = filter(dataset, .data[[z]] == 'Group_2'),
aes(x = .data[[x]], y = .data[[y]], color = .data[[z]], size = 3)) +
geom_smooth(aes(x = .data[[x]], y = .data[[y]], size = 0), method = "lm", colour="black", size=0.5) +
stat_cor(aes(x = .data[[x]], y = .data[[y]], color = .data[[z]],
label = ..rr.label..),
label.y.npc="top", label.x.npc = "left", method = "pearson",
size = 5) +
scale_color_manual(values = c("#C5BEC9", "#F2642b", "#F2642b")) +
labs(title = "test",
x = "VarA",
y = "VarB",
colour = "") +
guides(size = FALSE, color = FALSE) +
theme_bw(base_size = 14)
ggsave(sprintf("%s.tiff", y), first_plot)
grDevices::dev.off()
}

How to change the colors of the dots in the graph? ggpubr package

I am using the ggerrorplot () function of the ggpubr package to create the graph below. My question is whether there is any way to change the colors of the dots without changing the color of the point that represents the mean and standard deviation? Observe the image:
My code:
# loading packages
library(ggpubr)
# Create data frame
GROUP <- c()
TEST <- c()
VALUE <- c()
for (i in 0:100) {
gp <- c('Group1','Group2','Group1 and Group2')
ts <- c('Test1','Test2')
GROUP <- append(GROUP, sample(gp, 1))
TEST <- append(TEST, sample(ts, 1))
VALUE <- append(VALUE, sample(1:200, 1))
}
df <- data.frame(GROUP, TEST, VALUE)
# Seed
set.seed(123)
# Plot
ggerrorplot(df, x = "GROUP", y = "VALUE",
desc_stat = "mean_sd",
add = c("jitter"),
color = "TEST",
palette = "jco",
add.params = list(size = 0.2),
order = c('Group1','Group2','Group1 and Group2')
) +
labs(x = '', y = 'Values\n') +
theme(legend.title = element_blank())
Can you accomplish this by simply passing in color to add.params?
# loading packages
library(ggpubr)
#> Loading required package: ggplot2
# Create data frame
GROUP <- c()
TEST <- c()
VALUE <- c()
for (i in 0:100) {
gp <- c('Group1','Group2','Group1 and Group2')
ts <- c('Test1','Test2')
GROUP <- append(GROUP, sample(gp, 1))
TEST <- append(TEST, sample(ts, 1))
VALUE <- append(VALUE, sample(1:200, 1))
}
df <- data.frame(GROUP, TEST, VALUE)
# Seed
set.seed(123)
# Plot
ggerrorplot(df, x = "GROUP", y = "VALUE",
desc_stat = "mean_sd",
add = c("jitter"),
color = "TEST",
palette = "jco",
add.params = list(size = 0.2, color = "red"),
order = c('Group1','Group2','Group1 and Group2')
) +
labs(x = '', y = 'Values\n') +
theme(legend.title = element_blank())
Created on 2021-03-10 by the reprex package (v0.3.0)
Another potential workaround - replicate the plot using ggplot() and geom_linerange(), e.g.
library(ggpubr)
library(ggsci)
library(cowplot)
# Create data frame
GROUP <- c()
TEST <- c()
VALUE <- c()
for (i in 0:100) {
gp <- c('Group1','Group2','Group1 and Group2')
ts <- c('Test1','Test2')
GROUP <- append(GROUP, sample(gp, 1))
TEST <- append(TEST, sample(ts, 1))
VALUE <- append(VALUE, sample(1:200, 1))
}
df <- data.frame(GROUP, TEST, VALUE)
# Seed
set.seed(123)
data_summary <- function(data, varname, groupnames){
require(plyr)
summary_func <- function(x, col){
c(mean = mean(x[[col]], na.rm=TRUE),
sd = sd(x[[col]], na.rm=TRUE))
}
data_sum<-ddply(data, groupnames, .fun=summary_func,
varname)
data_sum <- rename(data_sum, c("mean" = varname))
return(data_sum)
}
df2 <- data_summary(df, varname = "VALUE", groupnames = c("TEST", "GROUP"))
# Plot
p1 <- ggplot(df, aes(x = factor(GROUP, levels = c('Group1','Group2','Group1 and Group2')),
y = VALUE, color = TEST)) +
geom_jitter(shape = 21, fill = "black", stroke = 0,
position = position_jitterdodge(jitter.width = 0.2)) +
geom_linerange(data = df2, aes(ymin=VALUE-sd, ymax=VALUE+sd),
position=position_dodge(width = .75)) +
geom_point(data = df2, aes(y = VALUE), size = 3,
position = position_dodge(width = 0.75)) +
scale_color_jco() +
labs(x = '', y = 'Values\n') +
theme_classic(base_size = 14) +
theme(legend.title = element_blank(),
legend.position = "top")
p2 <- ggerrorplot(df, x = "GROUP", y = "VALUE",
desc_stat = "mean_sd",
add = c("jitter"),
color = "TEST",
palette = "jco",
add.params = list(size = 0.2),
order = c('Group1','Group2','Group1 and Group2')
) +
labs(x = '', y = 'Values\n') +
theme(legend.title = element_blank())
cowplot::plot_grid(p1, p2, nrow = 1, ncol = 2, labels = "AUTO")
When you plot them side-by-side you can see that they aren't exactly the same, but this might work for you nonetheless.
Edit
An advantage of this approach is that you can adjust the 'fill' scale separately if you don't want all the dots to be the same colour, but you do want them to be different to the lines, e.g.
p1 <- ggplot(df, aes(x = factor(GROUP, levels = c('Group1','Group2','Group1 and Group2')),
y = VALUE, color = TEST)) +
geom_jitter(aes(fill = TEST), shape = 21, stroke = 0,
position = position_jitterdodge(jitter.width = 0.2)) +
geom_linerange(data = df2, aes(ymin=VALUE-sd, ymax=VALUE+sd),
position=position_dodge(width = .75)) +
geom_point(data = df2, aes(y = VALUE), size = 3,
position = position_dodge(width = 0.75)) +
scale_color_jco() +
scale_fill_npg() +
labs(x = '', y = 'Values\n') +
theme_classic(base_size = 14) +
theme(legend.title = element_blank(),
legend.position = "top")
p2 <- ggerrorplot(df, x = "GROUP", y = "VALUE",
desc_stat = "mean_sd",
add = c("jitter"),
color = "TEST",
palette = "jco",
add.params = list(size = 0.2),
order = c('Group1','Group2','Group1 and Group2')
) +
labs(x = '', y = 'Values\n') +
theme(legend.title = element_blank())
cowplot::plot_grid(p1, p2, nrow = 1, ncol = 2, labels = "AUTO")

Generating multiple geom_smooth lines of data samples

Attempting to build a new geom function here that will take a sample of points from a dataset by group, and fit a number of local regressions through the individual subsets. This would generate multiple local regression lines as samples of a full dataset. In the end generating something akin to this:
Though I'm continuing to get errors with the function I've built below (with reprex). Any assistance is appreciated. Thank you!
library(ggplot2)
library(dplyr)
geom_mline <- function(mapping = NULL, data = NULL, stat = "mline",
position = "identity", show.legend = NA,
inherit.aes = TRUE, na.rm = TRUE,
SPAN = .9, N_size = 50, N_LOESS = 50, ...) {
layer(
geom = geomMline,
mapping = mapping,
data = data,
stat = stat,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(SPAN=SPAN,
N_size=N_size,
N_LOESS=N_LOESS,
...)
)
}
geomMline <- ggproto("geomMline", GeomLine,
required_aes = c("x", "y"),
default_aes = aes(colour = "black", size = 0.5, linetype = 1, alpha = NA)
)
stat_mline <- function(mapping = NULL, data = NULL, geom = "line",
position = "identity", show.legend = NA, inherit.aes = TRUE,
SPAN = .9, N_size = 50, N_LOESS = 50, ...) {
layer(
stat = StatMline,
data = data,
mapping = mapping,
geom = geom,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(SPAN=SPAN,
N_size=N_size,
N_LOESS=N_LOESS,
...
)
)
}
StatMline <- ggproto("StatMline", Stat,
required_aes = c("x", "y"),
compute_group = function(self, data, scales, params,
SPAN = .9, N_size = 50, N_LOESS = 50) {
tf <- tempfile(fileext=".png")
png(tf)
plot.new()
colnames(data) <- c("x", "variable", "y")
LOESS_DF <- data.frame(y = seq(min(data$x),
max(data$x),
length.out = 50))
for(i in 1:N_LOESS){
# sample N_size points
df_sample <- sample_n(data, N_size)
# fit a loess
xx <- df_sample$x
yy <- df_sample$y
tp_est <- loess(yy ~ xx , span = SPAN)
# predict accross range of x using loess model
loess_vec <- data.frame(
predict(tp_est, newdata =
data.frame(xx = seq(min(data$x), max(data$x), length.out = 500))))
colnames(loess_vec) <- as.character(i)
# repeat x times
LOESS_DF <- cbind(LOESS_DF,loess_vec)
#str(LOESS_DF)
}
invisible(dev.off())
unlink(tf)
data.frame(reshape2::melt(LOESS_DF, id = "y"))
}
)
# dummy data
library(reshape2)
x <- seq(1,1000,1)
y1 <- rnorm(n = 1000,mean = x*2^1.1, sd = 200)
y2 <- rnorm(n = 1000,mean = x*1, sd = 287.3)
y3 <- rnorm(n = 1000,mean = x*1.1, sd = 100.1)
data <- data.frame(x , y1, y2, y3)
data <- melt(data, id.vars = "x")
str(data)
ggplot(data,aes(x,value,group = variable, color = va
riable))+geom_point()
ggplot(data = data, aes(x = x, y = value, group=variable, color = variable)) +
#geom_point(color="black") +
#geom_smooth(se=FALSE, linetype="dashed", size=0.5) +
#stat_mline(SPAN = .2, N_size = 50, N_LOESS = 5)
geom_mline(SPAN = .2, N_size = 50, N_LOESS = 5)
#data <- subset(data, variable == "y2")
You could use the existing geom_smooth geom and use lapply to generate geom_smooth calls from multiple random samples from the original data frame. For example:
# Fake data
set.seed(2)
dat = data.frame(x = runif(100, 0, 10))
dat$y = 2*dat$x - 0.5*dat$x^2 - 5 + rnorm(100, 0, 5)
ggplot(dat, aes(x, y)) +
geom_point() +
lapply(1:10, function(i) {
geom_smooth(data=dat[sample(1:nrow(dat), 20), ], se=FALSE)
})
Or, keeping it all in the tidyverse:
library(tidyverse)
ggplot(dat, aes(x, y)) +
geom_point() +
map(1:10, ~geom_smooth(data=dat[sample(1:nrow(dat), 20), ], se=FALSE))
Here's a way to plot the quantiles within ggplot. I'm not sure if it's possible to get stat_quantile to plot a ribbon. To get that, you might have to calculate the quantile regression outside of ggplot and add use geom_ribbon to add the values.
ggplot(dat, aes(x, y)) +
geom_point() +
geom_quantile(quantiles=c(0.1, 0.5, 0.9), formula=y ~ poly(x, 2),
aes(color=factor(..quantile..), size=factor(..quantile..))) +
scale_color_manual(values=c("red","blue","red")) +
scale_size_manual(values=c(1,2,1)) +
labs(colour="Quantile") +
guides(colour=guide_legend(reverse=TRUE), size=FALSE) +
theme_classic()

Create dynamic labels for geom_smooth lines

I have a changing df and I am grouping different values c.
With ggplot2 I plot them with the following code to get a scatterplott with multiple linear regression lines (geom_smooth)
ggplot(aes(x = a, y = b, group = c)) +
geom_point(shape = 1, aes(color = c), alpha = alpha) +
geom_smooth(method = "lm", aes(group = c, color = c), se = F)
Now I want to display on each geom_smooth line in the plot a label with the value of the group c.
This has to be dynamic, because I can not write new code when my df changes.
Example: my df looks like this
a b c
----------------
1.6 24 100
-1.4 43 50
1 28 100
4.3 11 50
-3.45 5.2 50
So in this case I would get 3 geom_smooth lines in the plot with different colors.
Now I simply want to add a text label to the plot with "100" next to the geom_smooth with the group c = 100 and a text label with "50"to the line for the group c = 50, and so on... as new groups get introduced in the df, new geom_smooth lines are plotted and need to be labeled.
the whole code for the plot:
ggplot(aes(x = a, y = b, group = c), data = df, na.rm = TRUE) +
geom_point(aes(color = GG, size = factor(c)), alpha=0.3) +
scale_x_continuous(limits = c(-200,2300))+
scale_y_continuous(limits = c(-1.8,1.5))+
geom_hline(yintercept=0, size=0.4, color="black") +
scale_color_distiller(palette="YlGnBu", na.value="white") +
geom_smooth(method = "lm", aes(group = factor(GG), color = GG), se = F) +
geom_label_repel(data = labelInfo, aes(x= max, y = predAtMax, label = label, color = label))
You can probably do it if you pick the location you want the lines labelled. Below, I set them to label at the far right end of each line, and used ggrepel to avoid overlapping labels:
library(ggplot2)
library(ggrepel)
library(dplyr)
set.seed(12345)
df <-
data.frame(
a = rnorm(100,2,0.5)
, b = rnorm(100, 20, 5)
, c = factor(sample(c(50,100,150), 100, TRUE))
)
labelInfo <-
split(df, df$c) %>%
lapply(function(x){
data.frame(
predAtMax = lm(b~a, data=x) %>%
predict(newdata = data.frame(a = max(x$a)))
, max = max(x$a)
)}) %>%
bind_rows
labelInfo$label = levels(df$c)
ggplot(
df
, aes(x = a, y = b, color = c)
) +
geom_point(shape = 1) +
geom_smooth(method = "lm", se = F) +
geom_label_repel(data = labelInfo
, aes(x= max
, y = predAtMax
, label = label
, color = label))
This method might work for you. It uses ggplot_build to access the rightmost point in the actual geom_smooth lines to add a label by it. Below is an adaptation that uses Mark Peterson's example.
library(ggplot2)
library(ggrepel)
library(dplyr)
set.seed(12345)
df <-
data.frame(
a = rnorm(100,2,0.5)
, b = rnorm(100, 20, 5)
, c = factor(sample(c(50,100,150), 100, TRUE))
)
p <-
ggplot(df, aes(x = a, y = b, color = c)) +
geom_point(shape = 1) +
geom_smooth(method = "lm", se = F)
p.smoothedmaxes <-
ggplot_build(p)$data[[2]] %>%
group_by( group) %>%
filter( x == max(x))
p +
geom_text_repel( data = p.smoothedmaxes,
mapping = aes(x = x, y = y, label = round(y,2)),
col = p.smoothedmaxes$colour,
inherit.aes = FALSE)
This came up for me today and I landed on this solution with data = ~fn()
library(tidyverse)
library(broom)
mpg |>
ggplot(aes(x = displ, y = hwy, colour = class, label = class)) +
geom_count(alpha = 0.1) +
stat_smooth(alpha = 0.6, method = lm, geom = "line", se = FALSE) +
geom_text(
aes(y = .fitted), size = 3, hjust = 0, nudge_x = 0.1,
data = ~{
nest_by(.x, class) |>
summarize(broom::augment(lm(hwy ~ displ, data = data))) |>
slice_max(order_by = displ, n = 1)
}
) +
scale_x_continuous(expand = expansion(add = c(0, 1))) +
theme_minimal()
Or do it with a function
#' #examples
#' last_lm_points(df = mpg, formula = hwy~displ, group = class)
last_lm_points <- function(df, formula, group) {
# df <- mpg; formula <- as.formula(hwy~displ); group <- sym("class");
x_arg <- formula[[3]]
df |>
nest_by({{group}}) |>
summarize(broom::augment(lm(formula, data = data))) |>
slice_max(order_by = get(x_arg), n = 1)
}
mpg |>
ggplot(aes(displ, hwy, colour = class, label = class)) +
geom_count(alpha = 0.1) +
stat_smooth(alpha = 0.6, method = lm, geom = "line", se = FALSE) +
geom_text(
aes(y = .fitted), size = 3, hjust = 0, nudge_x = 0.1,
data = ~last_lm_points(.x, hwy~displ, class)
) +
scale_x_continuous(expand = expansion(add = c(0, 1))) +
theme_minimal()

Resources