I have created a basketball animation using gganimate, as seen below. You can get the full animation on my blog post (Look at animation under "Paul Pierce Isolation").
Now, there is one problem with this: once the seconds reach single digits, it shows only one digit (7). Its doing what its told to do, but I want it to display (07) so that it closely matches a NBA broadcast.
I'm thinking of using transformers from the glue package, but I'm not sure how I'd do that.
Here is my code:
# Function to grab jersey numbers
grab_jersey <- function(player_id) {
swish_url <- paste0("https://www.swishanalytics.com/nba/players/player?id=", player_id)
swish <- read_html(swish_url)
result <- swish %>%
html_node(".mobile-hide") %>%
html_text() %>%
# Extract out numeric
parse_number()
result
}
## Read in dataset
e.dat_test <- read_csv("https://raw.githubusercontent.com/howardbaek/nba-animation/master/test_df.csv")
# Replace _ent with jersey numbers
a1_ent_jersey <- e.dat_test %>%
pull(a1_ent) %>%
first() %>%
grab_jersey()
a2_ent_jersey <- e.dat_test %>%
pull(a2_ent) %>%
first() %>%
grab_jersey()
a3_ent_jersey <- e.dat_test %>%
pull(a3_ent) %>%
first() %>%
grab_jersey()
a4_ent_jersey <- e.dat_test %>%
pull(a4_ent) %>%
first() %>%
grab_jersey()
a5_ent_jersey <- e.dat_test %>%
pull(a5_ent) %>%
first() %>%
grab_jersey()
h1_ent_jersey <- e.dat_test %>%
pull(h1_ent) %>%
first() %>%
grab_jersey()
h2_ent_jersey <- e.dat_test %>%
pull(h2_ent) %>%
first() %>%
grab_jersey()
h3_ent_jersey <- e.dat_test %>%
pull(h3_ent) %>%
first() %>%
grab_jersey()
h4_ent_jersey <- e.dat_test %>%
pull(h4_ent) %>%
first() %>%
grab_jersey()
h5_ent_jersey <- e.dat_test %>%
pull(h5_ent) %>%
first() %>%
grab_jersey()
# Mutate jersey number columns
e.dat_test <- e.dat_test %>%
mutate(a1_ent_jersey = a1_ent_jersey,
a2_ent_jersey = a2_ent_jersey,
a3_ent_jersey = a3_ent_jersey,
a4_ent_jersey = a4_ent_jersey,
a5_ent_jersey = a5_ent_jersey,
h1_ent_jersey = h1_ent_jersey,
h2_ent_jersey = h2_ent_jersey,
h3_ent_jersey = h3_ent_jersey,
h4_ent_jersey = h4_ent_jersey,
h5_ent_jersey = h5_ent_jersey) %>%
mutate(quarter_processed = case_when(
quarter == 1 ~ "1ST",
quarter == 2 ~ "2ND",
quarter == 3 ~ "3RD",
quarter == 4 ~ "4TH",
TRUE ~ "NA"
)) %>%
mutate(game_clock_minutes = game_clock %/% 60) %>%
mutate(game_clock_seconds = game_clock %% 60)
possid_quarter <- e.dat_test %>%
pull(quarter_processed) %>%
first()
# Save animation as object
anim <- fullcourt() +
# Home Players + Jersey Numbers
geom_point(data = e.dat_test, aes(x = h1_x, y = h1_y, group = possID), size = 6, color = "lightskyblue1") +
geom_text(data = e.dat_test, aes(x = h1_x, y = h1_y, group = possID, label = h1_ent_jersey), color = 'black', alpha = 0.3) +
geom_point(data = e.dat_test, aes(x = h2_x, y = h2_y, group = possID), size = 6, color = "lightskyblue1") +
geom_text(data = e.dat_test, aes(x = h2_x, y = h2_y, group = possID, label = h2_ent_jersey), color = 'black', alpha = 0.3) +
geom_point(data = e.dat_test, aes(x = h3_x, y = h3_y, group = possID), size = 6, color = "lightskyblue1") +
geom_text(data = e.dat_test, aes(x = h3_x, y = h3_y, group = possID, label = h3_ent_jersey), color = 'black', alpha = 0.3) +
geom_point(data = e.dat_test, aes(x = h4_x, y = h4_y, group = possID), size = 6, color = "lightskyblue1") +
geom_text(data = e.dat_test, aes(x = h4_x, y = h4_y, group = possID, label = h4_ent_jersey), color = 'black', alpha = 0.3) +
geom_point(data = e.dat_test, aes(x = h5_x, y = h5_y, group = possID), size = 6, color = "lightskyblue1") +
geom_text(data = e.dat_test, aes(x = h5_x, y = h5_y, group = possID, label = h5_ent_jersey), color = 'black', alpha = 0.3) +
# Away Players
geom_point(data = e.dat_test, aes(x = a1_x, y = a1_y, group = possID), size = 6, color = "salmon1") +
geom_text(data = e.dat_test, aes(x = a1_x, y = a1_y, group = possID, label = a1_ent_jersey), color = 'black', alpha = 0.3) +
geom_point(data = e.dat_test, aes(x = a2_x, y = a2_y, group = possID), size = 6, color = "salmon1") +
geom_text(data = e.dat_test, aes(x = a2_x, y = a2_y, group = possID, label = a2_ent_jersey), color = 'black', alpha = 0.3) +
geom_point(data = e.dat_test, aes(x = a3_x, y = a3_y, group = possID), size = 6, color = "salmon1") +
geom_text(data = e.dat_test, aes(x = a3_x, y = a3_y, group = possID, label = a3_ent_jersey), color = 'black', alpha = 0.3) +
geom_point(data = e.dat_test, aes(x = a4_x, y = a4_y, group = possID), size = 6, color = "salmon1") +
geom_text(data = e.dat_test, aes(x = a4_x, y = a4_y, group = possID, label = a4_ent_jersey), color = 'black', alpha = 0.3) +
geom_point(data = e.dat_test, aes(x = a5_x, y = a5_y, group = possID), size = 6, color = "salmon1") +
geom_text(data = e.dat_test, aes(x = a5_x, y = a5_y, group = possID, label = a5_ent_jersey), color = 'black', alpha = 0.3) +
# Ball
geom_point(data = e.dat_test, aes(x = x, y = y, group = possID), size = 3, color = "gold") +
transition_time(time = -game_clock) +
ggtitle(paste0(possid_quarter, " ", "{-frame_time %/% 60}", ":", "{round(-frame_time %% 60, 0)}")) +
theme(plot.title = element_text(hjust = 0.5))
anim
Related
I have below ggplot:
library(ggplot2)
data = rbind(data.frame('val' = c(10, 30, 15), 'name' = c('A', 'B', 'C'), group = 'gr1'), data.frame('val' = c(30, 40, 12), 'name' = c('A', 'B', 'C'), group = 'gr2'))
ggplot(data, # Draw barplot with grouping & stacking
aes(x = group,
y = val,
fill = name)) +
geom_bar(stat = "identity",
position = "stack", width = .1)
With this, I am getting below plot
However, I want to connect these bars with a curved area where the area would be equal to the value of the corresponding bar-component. A close example could be like,
Is there any way to achieve this with ggplot?
Any pointer will be very helpful.
This is something like an alluvial plot. There are various extension packages that could help you create such a plot, but it is possible to do it in ggplot directly using a bit of data manipulation.
library(tidyverse)
alluvia <- data %>%
group_by(name) %>%
summarize(x = seq(1, 2, 0.01),
val = pnorm(x, 1.5, 0.15) * diff(val) + first(val))
ggplot(data,
aes(x = as.numeric(factor(group)),
y = val,
fill = name)) +
geom_bar(stat = "identity",
position = "stack", width = .1) +
geom_area(data = alluvia, aes(x = x), position = "stack", alpha = 0.5) +
scale_x_continuous(breaks = 1:2, labels = levels(factor(data$group)),
name = "Group", expand = c(0.25, 0.25)) +
scale_fill_brewer(palette = "Set2") +
theme_light(base_size = 20)
EDIT
A more generalized solution for more than 2 groups would be
library(tidyverse)
alluvia <- data %>%
mutate(group = as.numeric(factor(group)),
name = factor(name)) %>%
arrange(group) %>%
group_by(name) %>%
mutate(next_group = lead(group),
next_val = lead(val)) %>%
filter(!is.na(next_val)) %>%
group_by(name, group) %>%
summarise(x = seq(group + 0.01, next_group - 0.01, 0.01),
val = (next_val - val) * pnorm(x, group + 0.5, 0.15) + val)
ggplot(data,
aes(x = as.numeric(factor(group)),
y = val,
fill = name)) +
geom_bar(stat = "identity",
position = "stack", width = .1) +
geom_area(data = alluvia, aes(x = x), position = "stack", alpha = 0.5) +
scale_x_continuous(breaks = seq(length(unique(data$group))),
labels = levels(factor(data$group)),
name = "Group", expand = c(0.25, 0.25)) +
scale_fill_brewer(palette = "Set2") +
theme_light(base_size = 20)
How can I do to insert in the "X axis" the months abbreviations ("xi") instead of the numbers?
I need to switch in the X axis the numbers for months abbreviations ("xi").
Reproductive example
library(ggplot2)
library(dplyr)
x<-c("2014-06","2014-07","2014-08","2014-09","2014-10","2014-11","2014-12")
xi<-c("Jun","Jul","Aug","Sep","Oct","Nov","Dez")
values.observed<-c(3.698,2.132,2.716,4.279,3.918,4.493,4.265)
values.estimated<-c(2.670,2.689,3.078,3.735,3.963,4.238,4.315)
yii<-c(0.629,1.394,1.957,2.677,2.913,3.190,3.299)
yiii<-c(4.567,3.982,4.185,4.785,4.996,5.279,5.349)
df<-data.frame(x,xi,values.observed,values.estimated,yii,yiii)
Year <- seq(min(as.integer(df$x)), max(as.integer(df$x)), by = 1)
df %>%
mutate(x = as.integer(x)) %>%
tidyr::pivot_longer(
cols = starts_with('values'),
names_to = 'group',
values_to = 'values'
) %>%
mutate(group = ifelse(group == "values.observed", "observed", "estimated")) %>%
ggplot(aes(x = x, y = values)) +
geom_line(aes(color = group), size=1.3) +
geom_ribbon(aes(ymin = yii, ymax = yiii), alpha = 0.3, show.legend = FALSE) +
scale_color_manual(values = c(observed = 'green', estimated = 'red'))+
scale_x_continuous(breaks = Year, labels = Year) +
ylab("X") +
xlab("Months") +
theme(axis.text.x = element_text(angle = -15, vjust = 0))
You can group the first geom_line with group and force the second geom_ribbon to take use as.numeric(xi) :
df$xi = factor(df$xi,levels=df$xi)
df %>%
tidyr::pivot_longer(
cols = starts_with('values'),
names_to = 'group',
values_to = 'values'
) %>%
mutate(group = ifelse(group == "values.observed", "observed", "estimated")) %>%
ggplot() +
geom_line(aes(x = xi, y = values,color = group,group = group), size=1.3) +
geom_ribbon(aes(x = as.numeric(xi),y = values,
ymin = yii, ymax = yiii), alpha = 0.3, show.legend = FALSE) +
scale_color_manual(values = c(observed = 'green', estimated = 'red'))+
ylab("X") +
xlab("Months") +
theme(axis.text.x = element_text(angle = -15, vjust = 0))
Or with what you have done, just provide the labels:
labels = split(as.character(df$xi),as.integer(df$xi))
df %>%
mutate(x = as.integer(x)) %>%
tidyr::pivot_longer(
cols = starts_with('values'),
names_to = 'group',
values_to = 'values'
) %>%
mutate(group = ifelse(group == "values.observed", "observed", "estimated")) %>%
ggplot(aes(x = x, y = values)) +
geom_line(aes(color = group), size=1.3) +
geom_ribbon(aes(ymin = yii, ymax = yiii), alpha = 0.3, show.legend = FALSE) +
scale_color_manual(values = c(observed = 'green', estimated = 'red'))+
scale_x_continuous(breaks = as.numeric(names(labels)), labels = labels) +
ylab("X") +
xlab("Months") +
theme(axis.text.x = element_text(angle = -15, vjust = 0))
I'm creating an illustration of how loess works. My two queries are at the end of this question. First, setup:
library(tidyverse)
data(melanoma, package = "lattice")
mela <- as_tibble(melanoma)
tric = function(x) if_else(abs(x) < 1, (1 - abs(x)^3)^3, 0)
scl = function(x) (x - min(x))/(max(x) - min(x))
mela1 <- mela %>%
slice(1:9) %>%
mutate(dist = abs(year - year[5]),
scaled = scl(dist),
weight = tric(scaled)
)
mod1 <- lm(incidence ~ year, data = mela1, weights = weight) %>%
augment(., mela1)
mela2 <- mela %>%
slice(10:18) %>%
mutate(dist = abs(year - year[5]),
scaled = scl(dist),
weight = tric(scaled)
)
mod2 <- lm(incidence ~ year, data = mela2, weights = weight) %>%
augment(., mela2)
mela3 <- mela %>%
slice(19:27) %>%
mutate(dist = abs(year - year[5]),
scaled = scl(dist),
weight = tric(scaled)
)
mod3 <- lm(incidence ~ year, data = mela3, weights = weight) %>%
augment(., mela3)
mela4 <- mela %>%
slice(28:37) %>%
mutate(dist = abs(year - year[5]),
scaled = scl(dist),
weight = tric(scaled)
)
mod4 <- lm(incidence ~ year, data = mela4, weights = weight) %>%
augment(., mela4)
The main plot:
col <- rainbow_hcl(start = 12, 4, l = 20)
colB <- rainbow_hcl(start = 12, 4, l = 100)
main <- ggplot(data = mela, aes(x = year, y = incidence)) +
# segment 1
geom_segment(
aes(x = 1936, xend = 1944, y = 2.115717, yend = 2.115717)) +
# segment 2
geom_segment(
aes(x = 1945, xend = 1953, y = 3.473217, yend = 3.473217)) +
# segment 3
geom_segment(
aes(x = 1954, xend = 1962, y = 1.170247, yend = 1.170247)) +
# segment 4
geom_segment(
aes(x = 1963, xend = 1972, y = 2.7, yend = 2.7)) +
geom_point(data = mod1, color = col[1], shape = 1) +
geom_point(data = mod2, color = col[2], shape = 0) +
geom_point(data = mod3, color = col[4], shape = 5) +
geom_point(data = mod4, color = col[3], shape = 2) +
geom_line(data = mod1, aes(x = year, y = .fitted), color = col[1]) +
geom_line(data = mod2, aes(x = year, y = .fitted), color = col[2]) +
geom_line(data = mod3, aes(x = year, y = .fitted), color = col[4]) +
geom_line(data = mod4, aes(x = year, y = .fitted), color = col[3]) +
scale_x_continuous(breaks = c(1940, 1949, 1958, 1967))
Insets
inset1 <- ggplot(data = mod1, aes(x = year, y = weight)) +
geom_line(color = col[1]) +
geom_area(fill = colB[1]) +
theme_void()
inset2 <- ggplot(data = mod2, aes(x = year, y = weight)) +
geom_line(color = col[12) +
geom_area(fill = colB[2]) +
theme_void()
inset3 <- ggplot(data = mod3, aes(x = year, y = weight)) +
geom_line(color = col[3]) +
geom_area(fill = colB[3]) +
theme_void()
inset4 <- ggplot(data = mod4, aes(x = year, y = weight)) +
geom_line(color = col[4]) +
geom_area(fill = colB[4]) +
theme_void()
Question 1: How do I place the four insets so that the y = 0 of the weight function is at the height of the corresponding geom_segment? I would like the inset heights = 2 in the main figure coordinates.
Question 2: How do I set the color of each segment to the color of the corresponding inset?
Not sure whether I got everything right. But I tried my best. (; You could simplify your code considerably
... by binding you models data into one dataframe and also the data for the segments.
... mapping on aesthetics and setting the colors and shape via some named vectors and scale_xxx_manual
For your insets there is no need to make separate plots and trying to put them into the main plot. You could simply add them via an additional geom_line and a geom_ribbon. To get the heights of the segments join the segments data to the models data so that you can set the starting value for the geom_ribbon according to the y value of the segment
library(tidyverse)
library(broom)
library(colorspace)
col <- setNames(col, c("mod1", "mod2", "mod4", "mod3"))
colB <- setNames(colB, c("mod1", "mod2", "mod4", "mod3"))
shapes <- setNames(c(1, 0, 5, 2), c("mod1", "mod2", "mod3", "mod4"))
mods <- list(mod1 = mod1, mod2 = mod2, mod3 = mod3, mod4 = mod4) %>%
bind_rows(.id = "mod")
# segments data
dseg <- tribble(
~mod, ~x, ~xend, ~y,
"mod1", 1936, 1944, 2.115717,
"mod2", 1945, 1953, 3.473217,
"mod3", 1954, 1962, 1.170247,
"mod4", 1963, 1972, 2.7,
)
main <- ggplot(data = mela, aes(x = year, y = incidence)) +
geom_segment(data = dseg, aes(x = x, xend = xend, y = y, yend = y, color = mod)) +
geom_point(data = mods, aes(color = mod, shape = mod)) +
geom_line(data = mods, aes(x = year, y = .fitted, color = mod)) +
scale_color_manual(values = col) +
scale_shape_manual(values = shapes) +
scale_x_continuous(breaks = c(1940, 1949, 1958, 1967)) +
guides(color = FALSE, shape = FALSE, fill = FALSE)
mods1 <- left_join(mods, select(dseg, mod, y), by = "mod")
# Add insets
main +
geom_line(data = mods1, aes(x = year, y = weight + y, color = mod, group = mod)) +
geom_ribbon(data = mods1, aes(x = year, ymin = y, ymax = weight + y, fill = mod, group = mod)) +
scale_fill_manual(values = colB)
I want text labels were above or under of bar cap depending on where is more space for them. Now it's always down which is not always looks good:
Here is my code:
library(tidyr)
library(ggplot2)
library(dplyr)
library(stringr)
library(purrr)
numa.nodes <- tibble (
numa_name = c("numa_01","numa_01","numa_01","numa_01","numa_01","numa_01","numa_02","numa_02","numa_02","numa_02"),
counter_name =c("cpu01","cpu02","cpu03","cpu04","memory_used","memory_total","cpu01","cpu02","memory_used","memory_total"),
value = c(sample(0:100,4), sample(0:32,1), 32, sample(0:100,1), sample(0:100,1), sample(0:128,1), 128)
)
numa.nodes <- numa.nodes %>% add_row(
numa_name = c("numa_03","numa_03","numa_03","numa_03","numa_03","numa_03","numa_04","numa_04","numa_04","numa_04"),
counter_name =c("cpu01","cpu02","cpu03","cpu04","memory_used","memory_total","cpu01","cpu02","memory_used","memory_total"),
value = c(sample(0:100,4), sample(0:32,1), 32, sample(0:100,1), sample(0:100,1), sample(0:128,1), 128)
)
numa.nodes <- numa.nodes %>% add_row(
numa_name = c("numa_05","numa_05","numa_05","numa_05","numa_05","numa_05","numa_05"),
counter_name =c("cpu01","cpu02","cpu03","cpu04","cpu05","memory_used","memory_total"),
value = c(sample(1:100,5), sample(1:64,1), 64)
)
numa.nodes <- numa.nodes %>% mutate(counter_name=factor(counter_name,levels = unique(counter_name),ordered = T))
memory_columns <- numa.nodes %>% filter(counter_name=='memory_total')
memory_y_scale <- max(memory_columns$value, na.rm = TRUE) + 6
plot_numa = function(num){
df = numa.nodes %>% filter(str_detect(numa_name, num))
cpu_plot = df %>%
filter(str_detect(counter_name, "cpu")) %>%
ggplot(aes(x = counter_name)) +
geom_col(aes(y = 100), fill = "white", color = "black") +
geom_col(aes(y = value), fill = "#00AFBB", color = "black") +
geom_text(aes(y = value, label = paste0(value,"%")), nudge_y = 5, color = "black") +
theme_bw() +
labs(x = "CPU", y = "")
memory_plot = df %>%
filter(str_detect(counter_name, "memory")) %>%
pivot_wider(names_from = counter_name, values_from = value) %>%
ggplot(aes(x = "") ) +
geom_col(aes(y = memory_total), fill = "white", color = "black") +
geom_col(aes(y = memory_used), fill = "#FC4E07", color = "black") +
geom_text(aes(label = paste(memory_total, "GB"), y = memory_total), nudge_y = 5, color = "black") +
geom_text(aes(label = paste(memory_used, "GB"), y = memory_used), nudge_y = -3, color = "black") +
theme_bw() +
ylim(0, memory_y_scale) +
labs(x = "Memory", y = "")
ggpubr::ggarrange(cpu_plot, memory_plot, ncol = 2) %>% ggpubr::annotate_figure(top = paste("NUMA",num))
}
numa_numbers <- unique(numa.nodes$numa_name) %>% str_remove ("numa_")
ggpubr::ggarrange(plotlist = map(.x = numa_numbers, .f = ~plot_numa(num = .x)))
I tried to change this line:
geom_text(aes(label = paste(memory_used, "GB"), y = memory_used), nudge_y = -3, color = "black")
to something like that:
geom_text(aes(label = paste(memory_used, "GB"), y = memory_used),nudge_y = ifelse( (memory_total-memory_used) > 10, 5, -3)
, color = "black")
But I've got an error:
Error in ifelse((memory_total - memory_used) > 10, 5, -3) :
object 'memory_total' not found
Is there a better way to print labels optimal way?
What am I doing wrong?
How to change color of label to more contrast ie black on white, white on red?
Think of it this way: The nudge value will be different (potentially) for every observation in your data frame. That means that this is something that should be handled within aes(), where stuff is designed to change with your data, rather than nudge_y, which is designed to be a constant (and complains if used otherwise).
So, the solution is to do away entirely with nudge_y and build your ifelse() statement directly into aes(y=...).
In this case, here's the replacement for that particular geom_text() line:
# to see the same plot posted here, put this at the top of your code
set.seed(7331)
...
# plot code...
... +
geom_text(aes(
label = paste(memory_used, "GB"),
y = ifelse((memory_total-memory_used > 10), memory_used + 5, memory_used - 3)),
color = "black") +
I have a changing df and I am grouping different values c.
With ggplot2 I plot them with the following code to get a scatterplott with multiple linear regression lines (geom_smooth)
ggplot(aes(x = a, y = b, group = c)) +
geom_point(shape = 1, aes(color = c), alpha = alpha) +
geom_smooth(method = "lm", aes(group = c, color = c), se = F)
Now I want to display on each geom_smooth line in the plot a label with the value of the group c.
This has to be dynamic, because I can not write new code when my df changes.
Example: my df looks like this
a b c
----------------
1.6 24 100
-1.4 43 50
1 28 100
4.3 11 50
-3.45 5.2 50
So in this case I would get 3 geom_smooth lines in the plot with different colors.
Now I simply want to add a text label to the plot with "100" next to the geom_smooth with the group c = 100 and a text label with "50"to the line for the group c = 50, and so on... as new groups get introduced in the df, new geom_smooth lines are plotted and need to be labeled.
the whole code for the plot:
ggplot(aes(x = a, y = b, group = c), data = df, na.rm = TRUE) +
geom_point(aes(color = GG, size = factor(c)), alpha=0.3) +
scale_x_continuous(limits = c(-200,2300))+
scale_y_continuous(limits = c(-1.8,1.5))+
geom_hline(yintercept=0, size=0.4, color="black") +
scale_color_distiller(palette="YlGnBu", na.value="white") +
geom_smooth(method = "lm", aes(group = factor(GG), color = GG), se = F) +
geom_label_repel(data = labelInfo, aes(x= max, y = predAtMax, label = label, color = label))
You can probably do it if you pick the location you want the lines labelled. Below, I set them to label at the far right end of each line, and used ggrepel to avoid overlapping labels:
library(ggplot2)
library(ggrepel)
library(dplyr)
set.seed(12345)
df <-
data.frame(
a = rnorm(100,2,0.5)
, b = rnorm(100, 20, 5)
, c = factor(sample(c(50,100,150), 100, TRUE))
)
labelInfo <-
split(df, df$c) %>%
lapply(function(x){
data.frame(
predAtMax = lm(b~a, data=x) %>%
predict(newdata = data.frame(a = max(x$a)))
, max = max(x$a)
)}) %>%
bind_rows
labelInfo$label = levels(df$c)
ggplot(
df
, aes(x = a, y = b, color = c)
) +
geom_point(shape = 1) +
geom_smooth(method = "lm", se = F) +
geom_label_repel(data = labelInfo
, aes(x= max
, y = predAtMax
, label = label
, color = label))
This method might work for you. It uses ggplot_build to access the rightmost point in the actual geom_smooth lines to add a label by it. Below is an adaptation that uses Mark Peterson's example.
library(ggplot2)
library(ggrepel)
library(dplyr)
set.seed(12345)
df <-
data.frame(
a = rnorm(100,2,0.5)
, b = rnorm(100, 20, 5)
, c = factor(sample(c(50,100,150), 100, TRUE))
)
p <-
ggplot(df, aes(x = a, y = b, color = c)) +
geom_point(shape = 1) +
geom_smooth(method = "lm", se = F)
p.smoothedmaxes <-
ggplot_build(p)$data[[2]] %>%
group_by( group) %>%
filter( x == max(x))
p +
geom_text_repel( data = p.smoothedmaxes,
mapping = aes(x = x, y = y, label = round(y,2)),
col = p.smoothedmaxes$colour,
inherit.aes = FALSE)
This came up for me today and I landed on this solution with data = ~fn()
library(tidyverse)
library(broom)
mpg |>
ggplot(aes(x = displ, y = hwy, colour = class, label = class)) +
geom_count(alpha = 0.1) +
stat_smooth(alpha = 0.6, method = lm, geom = "line", se = FALSE) +
geom_text(
aes(y = .fitted), size = 3, hjust = 0, nudge_x = 0.1,
data = ~{
nest_by(.x, class) |>
summarize(broom::augment(lm(hwy ~ displ, data = data))) |>
slice_max(order_by = displ, n = 1)
}
) +
scale_x_continuous(expand = expansion(add = c(0, 1))) +
theme_minimal()
Or do it with a function
#' #examples
#' last_lm_points(df = mpg, formula = hwy~displ, group = class)
last_lm_points <- function(df, formula, group) {
# df <- mpg; formula <- as.formula(hwy~displ); group <- sym("class");
x_arg <- formula[[3]]
df |>
nest_by({{group}}) |>
summarize(broom::augment(lm(formula, data = data))) |>
slice_max(order_by = get(x_arg), n = 1)
}
mpg |>
ggplot(aes(displ, hwy, colour = class, label = class)) +
geom_count(alpha = 0.1) +
stat_smooth(alpha = 0.6, method = lm, geom = "line", se = FALSE) +
geom_text(
aes(y = .fitted), size = 3, hjust = 0, nudge_x = 0.1,
data = ~last_lm_points(.x, hwy~displ, class)
) +
scale_x_continuous(expand = expansion(add = c(0, 1))) +
theme_minimal()