How to highlight a point along a curve in ggplot - r

I have the below code to plot a probit model comparing the chance of success based on a maximum temperature value. Seems to work well, I'm happy with the plot. But I'm hoping to highlight the point along the curve where the probability is 50%, and then draw a line down to the x-axis to determine (and show) this value as well. Also hoping to include confidence intervals for this estimate. Any help would be greatly appreciated!
data <- data.frame(MaxTemp = c(53.2402, 59.01004,51.42602,41.53883,44.70763,53.90285,51.130318,54.5929,43.697559,49.772446,54.902222,52.720528,58.782608,47.680374,48.30313,56.10921,57.660324,46.387924,60.503147,53.803177,52.27771,58.58555,55.74136,49.04505,46.816269,52.58295,52.751373,56.209747,51.733894,51.424305,50.74564,47.046513,53.030407,56.68752,56.639351,53.526585,51.562313),
Success=c(1,1,1,0,0,1,1,1,0,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1))
TempProbitModel <- glm(Success ~ MaxTemp, data=data, family=binomial(link="logit"))
temp.data <- data.frame(MaxTemp = seq(40, 62, 0.5))
predicted.data <- as.data.frame(predict(TempProbitModel, newdata = temp.data, type="link", se=TRUE))
new.data <- cbind(temp.data, predicted.data)
std <- qnorm(0.95 / 2 + 0.5)
new.data$ymin <- TempProbitModel$family$linkinv(new.data$fit - std * new.data$se)
new.data$ymax <- TempProbitModel$family$linkinv(new.data$fit + std * new.data$se)
new.data$fit <- TempProbitModel$family$linkinv(new.data$fit)
(TempProb <- ggplot(data, aes(x=MaxTemp, y=Success)) +
geom_point() +
geom_ribbon(data=new.data, aes(y=fit, ymin=ymin, ymax=ymax), alpha=0.5) +
geom_line(data=new.data, aes(y=fit)) +
labs(x="Peak Temperature", y="Probability of Success") )

Find the closest value to y = 0.5:
closest_value <- which(abs(new.data$fit - 0.5) == min(abs(new.data$fit - 0.5)))
Calculate slope at this point:
slope_at_closest_value <- (new.data[closest_value, "MaxTemp"] - new.data[closest_value - 1, "MaxTemp"]) /( new.data[closest_value, "fit"] - new.data[closest_value - 1, "fit"])
x_value <- new.data[closest_value - 1, "MaxTemp"] + slope_at_closest_value * (0.5 - new.data[closest_value - 1, "fit"])
Use this x_value to draw a vertical line:
ggplot(data, aes(x=MaxTemp, y=Success)) +
geom_point() +
geom_ribbon(data=new.data, aes(y=fit, ymin=ymin, ymax=ymax), alpha=0.5) +
geom_line(data=new.data, aes(y=fit)) +
labs(x="Peak Temperature", y="Probability of Success") +
geom_vline(xintercept = x_value, color="red")
This draws the following plot:
The confidence interval can be drawn accordingly.

An another way of getting this point is to use approxfun function.
f <- approxfun(new.data$fit,new.data$MaxTemp, rule = 2)
f(0.5)
[1] 49.39391
So now, if you are plotting it:
library(ggplot2)
ggplot(data, aes(x = MaxTemp, y = Success))+
geom_point()+
geom_ribbon(data=new.data, aes(y=fit, ymin=ymin, ymax=ymax), alpha=0.5) +
geom_line(data=new.data, aes(y=fit)) +
labs(x="Peak Temperature", y="Probability of Success") +
geom_point(x = f(0.5), y = 0.5, size = 3, color = "red")+
geom_vline(xintercept = f(0.5), linetype = "dashed", color = "red")+
geom_hline(yintercept = 0.5, linetype = "dashed", color = "red")

Related

R gglot2 want to reflect different colors in a line

I used R language to perform regression analysis/smoothing on the known data. The red line is the fitted regression curve, and the green background is the 95% confidence interval.
Now I want to set different colors for the red line and green confidence interval in this picture by time period (red solid line and gray confidence interval before 2020, green dotted line and blue confidence interval from 2020 to 2030), but I don't know how to set the code.
Here is my code.
data <- data.frame(
  year = c(2003:2030),
  number = c(40.84,49.2354,51.5988,53.9622,56.3256,64.79,61.0524,63.4158,65.7792,68.1426,76.4,72.8694,75.2328,77.5962,79.9596,76.36,71.99236,71.10268,70.39136,69.80928,69.31588,68.89523,68.53132,68.21586,67.93846,67.69439,67.47674,67.28291),
  stringsAsFactors = FALSE
)
p3 <- ggplot(data, aes(x=year, y=number)) +
  geom_point() +
  geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), color="red", fill="#69b3a2", se=TRUE) +
  theme_ipsum()
p3
This figure is what I have drawn so far.
This figure is what I want in the end.
Thanks!!
This is another try:
library(tidyverse)
library(hrbrthemes)
data %>%
mutate(year_dich = ifelse(year<2015, "<2020", ">=2020")) %>%
ggplot(aes(x=year, y=number, group=year_dich)) +
geom_point() +
geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), se=TRUE,
aes(color = ifelse(data$year<2015, "red", "green"),
fill = ifelse(data$year<2015, "blue", "black"),
linetype = ifelse(data$year<2015, "dashed", "solid"))
)+
theme_ipsum()+
theme(legend.position = "none")
Often people try to do too much inside ggplot - when you have very specific requirements it is best to just work out what you want to plot, then draw it:
library(ggplot2)
library(hrbrthemes)
mod <- lm(formula = number ~ splines::bs(year, 3), data = data)
pred <- as.data.frame(predict(mod, se = TRUE)[1:2])
pred$year <- 2003:2030
pred$number <- pred$fit
pred$pre <- pred$year < 2020
ggplot(pred, aes(x=year, y=number)) +
geom_point(data = data) +
geom_line(aes(color = pre, linetype = pre), size = 1.2) +
geom_ribbon(aes(ymin = fit - 1.96 * se.fit, ymax = fit + 1.96 * se.fit,
fill = pre), alpha = 0.2) +
scale_fill_manual(values = c("dodgerblue", "gray")) +
scale_color_manual(values = c("forestgreen", "red")) +
scale_linetype_manual(values = c(2, 1)) +
theme_ipsum() +
theme(legend.position = "none")
If you want an interpretable formula for the line (since the actual formula for a spline regression is complicated and not easy to recover), you could try a simple polynomial regression, which seems to fit your data about the same as a spline regression:
mod <- lm(number ~ poly(year, 3), data = data)
pred <- as.data.frame(predict(mod, se.fit = TRUE)[1:2])
pred$year <- 2003:2030
pred$number <- pred$fit
pred$pre <- pred$year < 2020
ggplot(pred, aes(x=year, y=number)) +
geom_point(data = data) +
geom_line(aes(color = pre, linetype = pre), size = 1.2) +
geom_ribbon(aes(ymin = fit - 1.96 * se.fit, ymax = fit + 1.96 * se.fit,
fill = pre), alpha = 0.2) +
scale_fill_manual(values = c("dodgerblue", "gray")) +
scale_color_manual(values = c("forestgreen", "red")) +
scale_linetype_manual(values = c(2, 1)) +
theme_ipsum() +
theme(legend.position = "none") +
annotate("text", x = 2020, y = 45,
label = paste("number =",
paste0(format(coef(mod), digits = 3),
c("", " * year", " * year\u00b2", " * year\u00b3"),
collapse = " + ")), fontface = "bold")
Where we can see the equation added as an annotation, retrieved from the regression equation with:
paste("number =",
paste0(format(coef(mod), digits = 3),
c("", " * year", " * year\u00b2", " * year\u00b3"),
collapse = " + "))
#> [1] "number = 66.5 + 27.4 * year + -34.3 * year² + 10.0 * year³"

Bunched up x axis ticks on multi panelled plot in ggplot

I am attempting to make a multi-panelled plot from three individual plots (see images).However, I am unable to rectify the bunched x-axis tick labels when the plots are in the multi-panel format. Following is the script for the individual plots and the multi-panel:
Individual Plot:
NewDat [[60]]
EstRes <- NewDat [[60]]
EstResPlt = ggplot(EstRes,aes(Distance3, `newBa`))+geom_line() + scale_x_continuous(n.breaks = 10, limits = c(0, 3500))+ scale_y_continuous(n.breaks = 10, limits = c(0,25))+ xlab("Distance from Core (μm)") + ylab("Ba:Ca concentration(μmol:mol)") + geom_hline(yintercept=2.25, linetype="dashed", color = "red")+ geom_vline(xintercept = 1193.9, linetype="dashed", color = "grey")+ geom_vline(xintercept = 1965.5, linetype="dashed", color = "grey") + geom_vline(xintercept = 2616.9, linetype="dashed", color = "grey") + geom_vline(xintercept = 3202.8, linetype="dashed", color = "grey")+ geom_vline(xintercept = 3698.9, linetype="dashed", color = "grey")
EstResPlt
Multi-panel plot:
MultiP <- grid.arrange(MigrPlt,OcResPlt,EstResPlt, nrow =1)
I have attempted to include:
MultiP <- grid.arrange(MigrPlt,OcResPlt,EstResPlt, nrow =1)+
theme(axis.text.x = element_text (angle = 45)) )
MultiP
but have only received errors. It's not necessary for all tick marks to be included. An initial, mid and end value is sufficient and therefore they would not need to all be included or angled. I'm just not sure how to do this. Assistance would be much appreciated.
There are several options to resolve the crowded axes. Let's consider the following example which parallels your case. The default labelling strategy wouldn't overcrowd the x-axis.
library(ggplot2)
library(patchwork)
library(scales)
df <- data.frame(
x = seq(0, 3200, by = 20),
y = cumsum(rnorm(161))
)
p <- ggplot(df, aes(x, y)) +
geom_line()
(p + p + p) / p &
scale_x_continuous(
name = "Distance (um)"
)
However, because you've given n.breaks = 10 to the scale, it becomes crowded. So a simple solution would just be to remove that.
(p + p + p) / p &
scale_x_continuous(
n.breaks = 10,
name = "Distance (um)"
)
Alternatively, you could convert the micrometers to millimeters, which makes the labels less wide.
(p + p + p) / p &
scale_x_continuous(
n.breaks = 10,
labels = label_number(scale = 1e-3, accuracy = 0.1),
name = "Distance (mm)"
)
Yet another alternative is to put breaks only every n units, in the case below, a 1000. This happens to coincide with omitting n.breaks = 10 by chance.
(p + p + p) / p &
scale_x_continuous(
breaks = breaks_width(1000),
name = "Distance (um)"
)
Created on 2021-11-02 by the reprex package (v2.0.1)
I thought it would be better to show with an example.
What I mean was, you made MigrPlt, OcResPlt, EstResPlt each with ggplot() +...... For plot that you want to rotate x axis, add + theme(axis.text.x = element_text (angle = 45)).
For example, in iris data, only rotate x axis text for a like
a <- ggplot(iris, aes(Sepal.Width, Sepal.Length)) +
geom_point() +
theme(axis.text.x = element_text (angle = 45))
b <- ggplot(iris, aes(Petal.Width, Petal.Length)) +
geom_point()
gridExtra::grid.arrange(a,b, nrow = 1)

Adding uncertainty bands to a smooth spline in a scatterplot

I have a following scatterplot with a smooth spline
a<-rep(1:50,len=500)
b<-sample(0:5000,500)
c<-round(seq(0,600,len=500))
data_frame<-as.data.frame(cbind(a,b,c))
names(data_frame)<-c("ID","toxin_level","days_to_event")
plot(data_frame$days_to_event,data_frame$toxin_level, xlim=c(600,0),xlab="days before the event",ylab="Toxin level",type="p")
abline(v=0,col="red")
x <- data_frame$days_to_event
y <- data_frame$toxin_level
fit.sp = smooth.spline(y ~ x, nknots=20)
lines(fit.sp, col="blue")
This is the resulting plot
I was wondernig if it is possible to somehow add confidence bands to this curve? I deally I would like it to be in a transparent blue, but any color including gray is OK.
Updated: using scale_x_reverse to match your graph more precisely...
How about this using ggplot2?
library(ggplot2)
ggplot(data_frame, aes(x = days_to_event, y = toxin_level)) + geom_point() +
geom_vline(xintercept = 0, color = "red") + scale_x_reverse() +
xlab("Days before the event") + ylab("Toxin Level") +
geom_smooth(method = lm, se = TRUE)
Which gives this:
Or to match your question a bit more:
ggplot(data_frame, aes(x = days_to_event, y = toxin_level)) + geom_point(shape = 1) +
geom_vline(xintercept = 0, color = "red") + scale_x_reverse() +
xlab("Days before the event") + ylab("Toxin Level") +
geom_smooth(method = lm, se = TRUE, color = "blue", fill = "lightblue") +
theme_bw()

Limiting the x-axis range of geom_line (defined by slope and intercept)

library(ggplot2)
##
df <- as.data.frame(matrix(rnorm(60*2, mean=3,sd=1), 60, 2))
colnames(df) <- c("A", "B")
cf1 <- coef(lm(B~A, data=df))
##
ggplot(df, aes(A,B)) +
geom_point() +
stat_smooth(method = "lm", color="red", fill="red", alpha=0.1, fullrange=TRUE) +
#xlim(0,6)+
geom_abline(intercept = cf1[1], slope = cf1[2], lty="dashed", col="green")
I want to limit geom_line to the same range as stat_smooth (which seems to be defined by xmax/xmin).
The xlim argument did not help (this was proposed here). In the real life application, the geom_line slope and intercept will be extracted from model updates, so they will be slightly different. Thank you.
I think this is one way to get what you are looking for:
min_x <- min(df$A)
min_y <- unname(cf1[1])
max_x <- max(df$A)
max_y <- min_y + unname(cf1[2]) * max_x
##
p <- ggplot(df, aes(A,B)) +
geom_point() +
stat_smooth(
method = "lm", color = "red",
fill = "red", alpha = 0.1,
fullrange = TRUE)
##
R> p + geom_segment(
aes(x = min_x, y = min_y,
xend = max_x, yend = max_y),
linetype = "dashed",
color = "green")
This requires a little extra effort as you are calculating the endpoint coordinates by hand, rather than just passing the slope and intercept values to the function, but it does not seem like geom_abline allows you to set its domain.

Overlay barplot with negative binomial/poisson distribution

How can I overlay my barplot on real data with the estimated negative binomial density function using the same mean and variance?
library(data.table)
library(ggplot2)
temp <- data.table(cbind(V1=c(1,2,3,4,5,9), N=c(50,40,30,20,10,2)))
ggplot(temp, aes(x=V1, y= N)) +
geom_histogram(stat="identity", binwidth = 2.5) +
scale_y_continuous(breaks=c(0, 100, 200, max(temp$N))) +
scale_x_continuous(breaks=c(0, 100, 200, max(temp$V1))) +
theme(panel.grid.minor.x=element_blank(),
panel.grid.major.x=element_blank()
)
I tried to add stat_function(fun = dnbinom, args = list(size=1, mu = mean(temp$V1)), color="red") but all I see is a red line on the abscissa. Same for dpois (with lambda=mean(temp$V1)) and dnorm (with mean = mean(temp$V1), sd = sd(temp$V1)).
Maybe my parametrization is wrong?
#mmk is correct: normalization is the key. Here's how you can achieve what you want:
#simplest normalization
temp$Nmod <- temp$N / sum(temp$N)
#alternative normalization
#temp$Nmod <- temp$N / sqrt(sum(temp$N * temp$N))
temp$pois <- dpois(temp$V1, lambda = mean(temp$V1))
temp$nbinom <- dnbinom(temp$V1, mu = mean(temp$V1), size = 1)
ggplot(temp, aes(x=V1, y= Nmod)) +
geom_histogram(stat="identity", binwidth = 2.5) +
theme(panel.grid.minor.x=element_blank(),
panel.grid.major.x=element_blank()) +
geom_line(aes(y = pois), col = "red") +
geom_line(aes(y = nbinom), col = "blue")

Resources