ggplot2: How to get geom_text() to play nice with facet_grid()? - r

So I'm trying to plot a couple of curves using ggplot(), and I would like to have each curve sitting in its own plot in a facet_grid. All of this works fine.
The problem is that I'd also like to annotate the curve with the x value corresponding to the peak y value. I tried using geom_text(), and I tried implementing it as shown below, but it doesn't seem to quite work. It's clearly printing something onto the plot, but not the way I hoped it would; i.e., each plot has its corresponding x value printed on it at the location (x, max(y)).
I suspect I've not implemented the ifelse() correctly, but I'm not experienced enough with R to figure out what exactly the problem is.
Any suggestions on where I'm going wrong?
Output:
Data + code:
library('ggplot2')
x <- seq(5, 15, length=1000)
y <- dnorm(x, mean=10, sd=1)
z <- rep_len("z", length.out = 1000)
x1 <- seq(5, 15, length=1000)
y1 <- dnorm(x1, mean=10, sd=2)
z1 <- rep_len("z1", length.out = 1000)
x <- c(x, x1)
y <- c(y, y1)
z <- c(z, z1)
df <- data.frame(x, y, z)
ggplot(data = df, aes(x, y)) + geom_line() + facet_grid(.~z) + geom_text(data = df, aes(x, y, label = ifelse(y == max(y), as.numeric(x), '')), inherit.aes = FALSE, hjust = 0, vjust = 0)
Edit: the output I'm expecting is something like this:

You need to fix two things.
(1) calculate max per z
(2) avoid duplicate y_values
The following code should fix both:
library(dplyr)
df2 <- df %>%
distinct(y, .keep_all = TRUE) %>%
group_by(z) %>%
mutate(y_label = ifelse(y == max(y), as.numeric(x), ''))
as.data.frame(df2)
ggplot(data = df2, aes(x, y)) + geom_line() + facet_grid(.~z) + geom_text(aes(label = y_label), hjust = 0, vjust = 0)

You need to provide geom_text a data.frame with data for z and z1.
x y z
z 9.994995 0.3989373 z
z1 9.994995 0.1994705 z1
How to get that? Well, here's one way.
df.split <- split(df, f = df$z)
df.max <- sapply(df.split, FUN = function(x) which.max(x$y))
df.max <- mapply(function(x1, x2) x1[x2, ], x1 = df.split, x2 = df.max, SIMPLIFY = FALSE)
df.max <- do.call(rbind, df.max)
which you can then plot
ggplot(data = df, aes(x, y)) +
geom_line() +
geom_text(data = df.max, aes(x = x, y = y, label = round(y, 2))) +
facet_grid(. ~ z)

Get the means and maxes for each z:
Ys <- df %>% group_by(z) %>% summarise(maxY = max(y))
Xs <- df %>% group_by(z) %>% summarise(meanX = mean(x))
Plot with the geom_text
ggplot(data = df, aes(x, y)) +
geom_line() +
geom_text(data = left_join(Xs,Ys), aes(meanX, maxY, label = meanX)) +
facet_grid(.~z)
Or more succinctly
ggplot(data = df, aes(x, y)) +
geom_line() +
geom_text(data =
df %>%
group_by(z) %>%
summarise(maxY = max(y), meanX = mean(x)),
aes(meanX, maxY, label = meanX)) +
facet_grid(.~z)

Related

How to show integers when using ggplot2::geom_smooth()

In the example below, how can I round the x label to even numbers? I cant convert them as factors first, because then geom_smooth does not work
library(ggplot2)
set.seed(32)
df <- data.frame(a = as.integer(rnorm(250, 2, 0.1)))
df$b <- df$a + rnorm(250)
df$id = 1
df_2 <- df
df_2$id <- 2
df_tot <- rbind(df, df_2)
ggplot(df_tot, aes(x = a, y = b)) +
geom_smooth() +
facet_wrap(~id)
If we want even numbers, an option is to add labels as a function in scale_x_continuous
library(ggplot2)
ggplot(df_tot, aes(x = a, y = b)) +
geom_smooth() +
facet_wrap(~id) +
scale_x_continuous(labels = function(x) seq(2, length.out = length(x)))

How to put plotmath labels in ggplot facets

We often want individual regression equations in ggplot facets. The best way to do this is build the labels in a dataframe and then add them manually. But what if the labels contain plotmath, e.g., superscripts?
Here is a way to do it. The plotmath is converted to a string and then parsed by ggplot. The test_eqn function is taken from another Stackoverflow post, I'll link it when I find it again. Sorry about that.
library(ggplot2)
library(dplyr)
test_eqn <- function(y, x){
m <- lm(log(y) ~ log(x)) # fit y = a * x ^ b in log space
p <- exp(predict(m)) # model prediction of y
eq <- substitute(expression(Y==a~X^~b),
list(
a = format(unname(exp(coef(m)[1])), digits = 3),
b = format(unname(coef(m)[2]), digits = 3)
))
list(eq = as.character(eq)[2], pred = p)
}
set.seed(123)
x <- runif(20)
y <- runif(20)
test_eqn(x,y)$eq
#> [1] "Y == \"0.57\" ~ X^~\"0.413\""
data <- data.frame(x = x,
y = y,
f = sample(c("A","B"), 20, replace = TRUE)) %>%
group_by(f) %>%
mutate(
label = test_eqn(y,x)$eq, # add label
labelx = mean(x),
labely = mean(y),
pred = test_eqn(y,x)$pred # add prediction
)
# plot fits (use slice(1) to avoid multiple copies of labels)
ggplot(data) +
geom_point(aes(x = x, y = y)) +
geom_line(aes(x = x, y = pred), colour = "red") +
geom_text(data = slice(data, 1), aes(x = labelx, y = labely, label = label), parse = TRUE) +
facet_wrap("f")
Created on 2021-10-20 by the reprex package (v2.0.1)

draw vertical lines in ggplot with faceting

I have line plots y vs x. y is sigmoid and varies from 0 to 1.
determine the value of x where y = 0.5 or very close by interpolation.
draw vertical line at x where y = 0.5
library(tidyverse)
# continuous variables
x <- seq(-5, 5, 0.1)
# compute y1
error_term <- runif(1, min = -2, max = 2)
y1 <- 1/(1 + exp(-x + error_term))
# compute y2
error_term <- runif(1, min = -2, max = 2)
y2 <- 1/(1 + exp(-x + error_term))
# merge y
y <- c(y1, y2)
x <- c(x, x)
# categorical variable
a <- c(rep(0, 101), rep(1, 101))
tbl <- tibble(x, a, y)
# TASK
# 1. determine values of x at which y = 0.5 for all categories and store them in variable x0
# 2. Use x0 to draw vertical lines in plots at x where y is 0.5
# ggplot
ggplot(data = tbl,
aes(x = x,
y = y)) +
geom_line() +
theme_bw() +
facet_grid(a ~ .)
This really isn't something built in to ggplot so you'll need to summarize the data yourself prior to plotting. You can write a helper function and then create the data you need for the lines
find_intersect <- function(x,y, target=0.5) {
optimize(function(z) (approxfun(x,y)(z)-target)^2, x)$minimum
}
line_data <- tbl %>%
group_by(a) %>%
summarize(xint=find_intersect(x,y))
Then plot with
ggplot(data = tbl,
aes(x = x,
y = y)) +
geom_line() +
theme_bw() +
geom_vline(aes(xintercept=xint), data=line_data) +
facet_grid(a ~ .)

How to draw means and error bars on axes in ggplot2 R

I'd like to plot the means and error bars on the axes of my qplot in R.
Here I provide an example of what I mean:
As you can see on the axes in yellow are drawn means and error bars. I'd like to have that on my qplot.
Consider this subset of data:
x <- c(2.037820, 3.247560, 1.259053, 4.200520, 1.960179, 6.247880, 2.830693, 5.565390, 4.476610,
4.627420, 2.500470, 4.156422, 2.855426, 9.210740, 2.663490, 4.412452, 3.270280, 2.838081,
1.705650, 5.440690, 3.014000, 3.513820, 3.002930, 2.453080, 2.787320, 0.979227, 2.815368);
y <- c(2.855820, 3.332350, 1.991730, 3.688240, 3.565680, 3.525511, 4.451860, 3.233950, 6.125230,
4.039360, 5.043330, 3.194650, 7.419020, 7.389600, 2.734740, 4.456250, 3.037665, 5.147140,
3.184790, 3.595890, 5.457550, 1.527680, 2.848046, 1.418289, 3.996330, 4.516640, 2.884100);
fp <- qplot(x, y) + annotate("segment", x=-Inf, xend=Inf,y=-Inf, yend=Inf);
ggExtra::ggMarginal(fp, type = "density", margins = 'both')
It should give you a plot like this:
Now, how do I draw my means and error bars? the axes() used in the basic plots in R doesn't work in ggplot2.
I appreciate any suggestion, even if it requires to change packages or approach the problem differently.
Thank you!
Maybe not exactly what you are looking for, but might be a startingpoint for you to continu working on:
require(ggplot2)
require(dplyr)
df <- data.frame(x = x, y = y)
dferrx <- df %>%
summarise(m = mean(x),
lo = m - 1.96 * sd(x)/sqrt(n()),
hi = m + 1.96 * sd(x)/sqrt(n()),
x = m)
dferry <- df %>%
summarise(m = mean(y),
lo = m - 1.96 * sd(y)/sqrt(n()),
hi = m + 1.96 * sd(y)/sqrt(n()),
y = m)
ggplot(df, aes(x = x, y = y)) +
geom_point() +
annotate("segment", x=-Inf, xend=Inf,y=-Inf, yend=Inf) +
geom_errorbar(data = dferry, aes(x = 0, ymin = lo, ymax = hi)) +
geom_errorbarh(data = dferrx, aes(y = 0, xmin = lo, xmax = hi))
You might have already solved it, but, just in case, here is the code for an error bar for each point, following Wietze314's contribution:
library(tidyverse)
example_DF <- tibble(x = c(2.037820, 3.247560, 1.259053, 4.200520, 1.960179, 6.247880, 2.830693, 5.565390, 4.476610,
4.627420, 2.500470, 4.156422, 2.855426, 9.210740, 2.663490, 4.412452, 3.270280, 2.838081,
1.705650, 5.440690, 3.014000, 3.513820, 3.002930, 2.453080, 2.787320, 0.979227, 2.815368),
y = c(2.855820, 3.332350, 1.991730, 3.688240, 3.565680, 3.525511, 4.451860, 3.233950, 6.125230,
4.039360, 5.043330, 3.194650, 7.419020, 7.389600, 2.734740, 4.456250, 3.037665, 5.147140,
3.184790, 3.595890, 5.457550, 1.527680, 2.848046, 1.418289, 3.996330, 4.516640, 2.884100))
dferrx <- example_DF %>%
summarise(m = mean(x),
lo = m - 1.96 * sd(x)/sqrt(n()),
hi = m + 1.96 * sd(x)/sqrt(n()),
x = m)
dferry <- example_DF %>%
summarise(m = mean(y),
lo = m - 1.96 * sd(y)/sqrt(n()),
hi = m + 1.96 * sd(y)/sqrt(n()),
y = m)
ggplot(example_DF, aes(x = x, y = y)) +
geom_point() +
annotate("segment", x=-Inf, xend=Inf,y=-Inf, yend=Inf) +
geom_errorbar(aes(ymin =y -dferry$lo, ymax = y+ dferry$hi))+
geom_errorbarh(aes(xmin =x -dferrx$lo, xmax = x+ dferrx$hi))
It would benefit from a lot of esthetic tinkering, but there it is.

Plot Grouped bar graph with calculated standard deviation in ggplot

I feel like this should be really easy to do, but I'm having a really hard time figuring this out.
I have a data frame
type <- c("a","b","c","d","e")
x <- rnorm(5)
y <- rnorm(5)
z <- rnorm(5)
xsd <- sd(x)
ysd <- sd(y)
zsd <- sd(z)
df <- data.frame(type, x,y,z,xsd,ysd,zsd)
df
type x y z xsd ysd zsd
1 a -1.16788106 0.2260430 -1.16788106 0.8182508 0.7321015 0.9016335
2 b -0.09955193 -0.6647980 -0.09955193 0.8182508 0.7321015 0.9016335
3 c -0.87901053 -0.4269936 -0.87901053 0.8182508 0.7321015 0.9016335
4 d -0.87861339 -1.3669793 -0.87861339 0.8182508 0.7321015 0.9016335
5 e 0.84350228 0.4702580 0.84350228 0.8182508 0.7321015 0.9016335
and I need a grouped bar graph of the mean of x, y, and z by type with error bars showing the standard deviation for each variable. The standard deviation is in different columns xsd,ysdand zsd
I need to plot the mean in the y axis, type grouping the x, y, z variables in the x axis.
I tried using gather(), to rearrange the data, but I'm not having any success...
Let ggplot2 do the calculations for you:
install.packages("hmisc") # for mean_sdl
library(tidyverse)
type <- c("a","b","c","d","e")
x <- rnorm(5, 10, 5)
y <- rnorm(5, 8, 3)
z <- rnorm(5, 2, 4)
df <- data.frame(type,x,y,z)
df_long <- df %>%
gather(variable, value, x:z)
ggplot(df_long, aes(x = variable, y = value, fill = variable)) +
stat_summary(fun.y = "mean", geom = "col") +
stat_summary(fun.data = mean_sdl, geom = "errorbar", width = .5, fun.args = list(mult = 1))
This example should help:
type <- c("a","b","c","d","e")
x <- rnorm(50,20, 5)
y <- rnorm(50, 25,1)
z <- rnorm(50, 40, 1)
df <- data.frame(type, x,y,z)
df
library(tidyverse)
df %>%
gather(x,value,-type) %>%
group_by(type, x) %>%
summarise(MEAN = mean(value),
SD = sd(value)) %>%
ggplot(aes(x, MEAN, fill=type))+
geom_bar(stat="identity", position = "dodge")+
geom_errorbar(aes(ymin=MEAN-SD, ymax=MEAN+SD), position = "dodge")

Resources