Given a data frame, Data of the form
x y
1 250 1.00000000
2 345 0.03567766
3 290 0.16654457
4 260 0.58363858
5 270 0.38754579
6 280 0.24713065
7 290 0.17142857
8 300 0.11709402
9 310 0.09047619
10 320 0.06439560
11 330 0.05098901
I am able to derive and plot a fit for the data with
library(ggplot2)
Data$x2<-Data$x^2
quadratic.model <- lm(log(Data$y) ~ Data$x + Data$x2)
fun_quad <- function(x){return(exp(
quadratic.model$coef[[3]] * x ^ 2 +
quadratic.model$coef[[2]] * x +
quadratic.model$coef[[1]]
))}
chartObj <- ggplot() +
stat_function(
fun = fun_quad,
aes(color = factor(0)),
size = 1.3,
linetype = "dotdash"
)+
geom_point(data = Data,
aes(x = x, y = y, fill = factor(0)),
color = "black", shape = 22, stroke = 0.7, size = 2.2) +
coord_trans(y = 'log10',
limx = c(250,350), limy = c(.025,1))+
theme_bw() +
guides(fill=F,color=F,linetype=F)
chartObj
which renders
.
I also tried plotting the CI using confint and geom_ribbon.
ribbon.ymin <- function(x){return(exp(
confint(quadratic.model)[[3]]*x^2 +
confint(quadratic.model)[[2]]*x +
confint(quadratic.model)[[1]]
))}
ribbon.ymax <- function(x){return(exp(
confint(quadratic.model)[[6]]*x^2 +
confint(quadratic.model)[[5]]*x +
confint(quadratic.model)[[4]]
))}
ribbonData <- as.data.frame(cbind(x = seq(250,350,.01)))
attach(ribbonData)
ribbonData$ymin <- ribbon.ymin(x)
ribbonData$ymax <- ribbon.ymax(x)
ribbonData$y <- fun_quad(x)
detach(ribbonData)
head(ribbonData)
chartObj <- chartObj +
geom_ribbon( data = ribbonData,
aes(x = x, y = 0:0,
ymin = ymin, ymax = ymax,
color = factor(0),fill = factor(0)),
alpha = 0.3)
however, this renders as below, which again feels obviously incorrect.
So, how do I plot the confidence interval associated with the function described by quadratic.model?
Update
I think that I've found nearly what I am looking for with the use of the predict command, specifically, shown below, however this is still leaves a bit to be desired, particularly the unevenness of the edges of the produced ribbon.
Data$x2<-Data$x^2
quadratic.model <- lm(log(Data$y) ~ Data$x + Data$x2)
fun_quad <- function(x){return(exp(
quadratic.model$coef[[3]] * x ^ 2 +
quadratic.model$coef[[2]] * x +
quadratic.model$coef[[1]]
))}
ribbonData<-predict(quadratic.model,data.frame(x=Data$x),interval="predict",level=.95)
# "predict" used over "confidence" in this example to show the rough edges better.
ribbonData<-as.data.frame(cbind(x=Data$x,fit=ribbonData[,1],lower=ribbonData[,2],upper=ribbonData[,3]))
ribbonData[,2:4]<-exp(ribbonData[,2:4])
chartObj <- ggplot() +
geom_ribbon( data = ribbonData,
aes(x = x, y = fit,
ymin = lower, ymax = upper,
color = factor(0),fill = factor(0)),
alpha = 0.3) +
stat_function(
fun = fun_quad,
aes(color = factor(0)),
size = 1.3,
linetype = "dotdash"
)+
geom_point(data = Data,
aes(x = x, y = y, fill = factor(0)),
color = "black", shape = 22, stroke = 0.7, size = 2.2) +
coord_trans(y = 'log10',
limx = c(250,350), limy = c(.025,1))+
theme_bw() +
guides(fill=F,color=F,linetype=F)
Is there a better way to represent the information presented by the plot above? To smooth out the rough edges of the ribbon?
It might "feel obviously incorrect", but it plots what it's been asked. The whole interval cannot be seen because limx and limy have been set:
ribbon <- function(x, level = 0.95) {
data.frame(
x,
ymin = exp(
confint(quadratic.model, level = level)[[3]] * x ^ 2 +
confint(quadratic.model, level = level)[[2]] * x +
confint(quadratic.model, level = level)[[1]]
),
ymax = exp(
confint(quadratic.model, level = level)[[6]]*x^2 +
confint(quadratic.model, level = level)[[5]]*x +
confint(quadratic.model, level = level)[[4]]
)
)
}
chartObj +
coord_trans(y = 'log10') +
geom_ribbon(data = ribbon(seq(250, 350, .01), level = 0.95),
aes(x = x, ymin = ymin, ymax = ymax,
color = factor(0), fill = factor(0)),
alpha = 0.3)
(NB: My answer is strictly about programming with ggplot2 and says nothing about the statistical validity of exponentiating a confidence interval).
Edit in response to OP's updated question (smooth out edges of ribbon).
predict() over more points:
quadratic.model <- lm(log(y) ~ x + x2, data = Data)
ribbonData <- data.frame(x = seq(250, 350, 0.01), x2 = seq(250, 350, 0.01) ^ 2)
ribbonData <- cbind(
ribbonData,
predict(quadratic.model, ribbonData,
interval = "prediction", level = 0.95)
)
# "predict" used over "confidence" in this example to show the rough edges better.
ribbonData[, 3:5] <- exp(ribbonData[, 3:5])
ggplot() +
geom_ribbon( data = ribbonData,
aes(x = x, y = fit,
ymin = lwr, ymax = upr,
color = factor(0),fill = factor(0)),
alpha = 0.3) +
stat_function(
fun = fun_quad,
aes(color = factor(0)),
size = 1.3,
linetype = "dotdash"
) +
geom_point(data = Data,
aes(x = x, y = y, fill = factor(0)),
color = "black", shape = 22, stroke = 0.7, size = 2.2) +
coord_trans(y = 'log10',
limx = c(250, 350), limy = c(.025, 1)) +
theme_bw() +
guides(fill = F, color = F, linetype = F)
Related
I'm attempting to create a composite plot in r, the code for which is below:
#Adding initial data
ggp <- ggplot(NULL, aes(x = date, y = covid)) +
geom_spline(data = onsdf,
aes(x = date, y = covid, colour = "ONS Modelled Estimates"), nknots = 90, size = 1.3) +
geom_spline(data = gvtdf,
aes(x = date, y = covid, colour = "Gvt Reported Positive Tests"), nknots = 90, size = 1.3)
#Creating function to add stringency bars
barfunction <- function(date1, date2, alpha){
a <- annotate(geom = "rect",
xmin = as.Date(date1), xmax = as.Date(date2), ymin = 0, ymax = Inf, alpha = alpha, fill = "red")
return(a)
}
#Adding lockdown stringency bars
ggp <- ggp +
barfunction("2020-05-03", "2020-06-01", 0.5) +
barfunction("2020-06-01", "2020-06-15", 0.4) +
barfunction("2020-06-15", "2020-09-14", 0.3) +
barfunction("2020-09-14", "2020-11-05", 0.3) +
barfunction("2020-11-05", "2020-12-02", 0.5) +
barfunction("2020-12-02", "2021-01-06", 0.4) +
barfunction("2021-01-06", "2021-03-29", 0.5) +
barfunction("2021-03-29", "2021-04-12", 0.4) +
barfunction("2021-04-12", "2021-05-17", 0.3) +
barfunction("2021-05-17", "2021-07-19", 0.2) +
barfunction("2021-07-19", "2021-12-08", 0.1) +
barfunction("2021-12-08", "2022-02-24", 0.2) +
#Adding plot labels
ggp <- ggp + labs(title = "Estimated Total Covid-19 Cases vs Reported Positive Cases",
subtitle = "From ONS and HMGvt datasets",
x = "Date (year - month)", y = "Covid Levels") +
scale_y_continuous(labels = scales::comma) +
scale_x_date(limits = as.Date(c("2020-05-03", NA ))) +
scale_colour_manual(name = "Measurement Method",
values = c("ONS Modelled Estimates"="purple",
"Gvt Reported Positive Tests" = "blue"))
The output of this code looks like this:
Rendered graph
As you can see, I have a very repetitive function (barfunction) in this code that I would like to change. I thought the best way to do this was to convert the data barfunction() was applying to the graph into a dataframe, and then try to use a function on said data frame. Here is a head of the data frame (called strindf)
date1 date2 alpha
2020-05-03 2020-06-01 0.5
2020-06-01 2020-06-15 0.4
2020-06-15 2020-09-14 0.3
2020-09-14 2020-11-05 0.3
I initially tried to use apply() to add the strindf data to my plot, however I got an error message (Error in as.Date(date2) : argument "date2" is missing, with no default). Here is how I implemented it into the original code
ggptest <- ggplot(NULL, aes(x = date, y = covid)) +
geom_spline(data = onsdf,
aes(x = date, y = covid, colour = "ONS Modelled Estimates"), nknots = 90, size = 1.3) +
geom_spline(data = gvtdf,
aes(x = date, y = covid, colour = "Gvt Reported Positive Tests"), nknots = 90, size = 1.3) +
apply(strindf, MARGIN = 1 , barfunction) +
theme_minimal() +
scale_y_continuous(labels = scales::comma) +
scale_x_date(limits = as.Date(c("2020-05-03", NA ))) +
scale_colour_manual(name = "Legend",
I'm quite new to r so I'm a bit stumped, does anyone have any suggestions?
Thanks in advance!
Your idea was right. But you have chosen the wrong function from the apply family of functions. As you have a function of multiple arguments use mapply or as I do below purrr::pmap:
Using some fake random example data:
library(ggplot2)
library(ggformula)
barfunction <- function(date1, date2, alpha) {
annotate(geom = "rect", xmin = as.Date(date1), xmax = as.Date(date2), ymin = 0, ymax = Inf, alpha = alpha, fill = "red")
}
ggplot(NULL, aes(x = date, y = covid)) +
geom_spline(data = df, aes(colour = "ONS Modelled Estimates"), nknots = 90, size = 1.3) +
purrr::pmap(strindf, barfunction) +
theme_minimal() +
scale_y_continuous(labels = scales::comma) +
scale_x_date(limits = as.Date(c("2020-05-03", NA))) +
scale_colour_manual(
name = "Measurement Method",
values = c(
"ONS Modelled Estimates" = "purple",
"Gvt Reported Positive Tests" = "blue"
)
)
#> Warning: Removed 123 rows containing non-finite values (stat_spline).
DATA
set.seed(123)
df <- data.frame(
date = seq.Date(as.Date("2020-01-01"), as.Date("2020-12-31"), by = "day"),
covid = runif(366)
)
strindf <- structure(list(date1 = c(
"2020-05-03", "2020-06-01", "2020-06-15",
"2020-09-14"
), date2 = c(
"2020-06-01", "2020-06-15", "2020-09-14",
"2020-11-05"
), alpha = c(0.5, 0.4, 0.3, 0.3)), class = "data.frame", row.names = c(
NA,
-4L
))
I am making errorbar plot with different linetype
library(ggplot2)
library(plyr)
# Create dataset:
DF <- data.frame(
group = rep(c("a", "b", "c", "d"),each=10),
Ydata = c(seq(1,10,1),seq(5,50,5),seq(20,11,-1),seq(0.3,3,0.3)),
Xdata = c(seq(1,10,1),seq(5,50,5),seq(20,11,-1),seq(0.3,3,0.3)))
# Summarise data:
subDF <- ddply(DF, .(group), summarise,
X = mean(Xdata), Y = mean(Ydata),
X_sd = sd(Xdata, na.rm = T), Y_sd = sd(Ydata))
# Plot data with error bars:
ggplot(subDF, aes(x = X, y = Y,linetype = group)) +
geom_errorbar(aes(x = X,
ymin = (Y-Y_sd),
ymax = (Y+Y_sd)),
width = 1, size = 0.5) +
geom_point(cex = 3) +
scale_linetype_manual(values = c("solid","twodash","longdash","longdash"))
This give me the following plot, but I want the end whiskers to be solid. Anyone could help?
One option to achieve your desired result would be to switch to geom_linerange and add the whiskers via geom_segment like so:
library(ggplot2)
width <- .3
# Plot data with error bars:
ggplot(subDF, aes(x = X, y = Y, linetype = group)) +
geom_segment(aes(
x = X - width, xend = X + width,
y = Y - Y_sd, yend = Y - Y_sd
),
size = 0.5, linetype = "solid"
) +
geom_segment(aes(
x = X - width, xend = X + width,
y = Y + Y_sd, yend = Y + Y_sd
),
size = 0.5, linetype = "solid"
) +
geom_linerange(aes(
x = X,
ymin = (Y - Y_sd),
ymax = (Y + Y_sd)
),
size = 0.5
) +
geom_point(cex = 3) +
scale_linetype_manual(values = c("solid", "twodash", "longdash", "longdash"))
I have a kind of "time series", with different measures taken at regular points on the same individuals.
I want to graphically represent 2 of these time series on the same graph (no problem with that), and add a background which depends on a third factor.
Here a reproducible example of what I've done:
df <- data.frame(
x = seq(1, 20),
y = sample(c(1:10), 20, replace = TRUE),
z = sample(c(1:10), 20, replace = TRUE),
w = sample(c("yes", "no"), 20, replace = TRUE)
)
ggplot(df) +
geom_line(aes(x = x, y = y), color = 'darkorange') +
geom_line(aes(x = x, y = z), color = 'royalblue') +
geom_raster(aes(x = x, y = 5, fill = w, alpha = w)) +
scale_alpha_ordinal(range = c(0, 0.8)) +
scale_fill_manual(values = c("gray32", "gray32"))
Which give me almost what I want excepted that I would like my raster to cover my whole y-axis window.
Any idea?
Thank you!
I think it's simplest to use geom_rect here:
ggplot(df) +
geom_line(aes(x = x, y = y), color = 'darkorange') +
geom_line(aes(x = x, y = z), color = 'royalblue') +
geom_rect(aes(xmin = x - 0.5, xmax = x + 0.5,
ymin = -Inf, ymax = Inf, fill = w, alpha = w)) +
scale_alpha_ordinal(range = c(0, 0.8)) +
scale_fill_manual(values = c("gray32", "gray32"))
It's probably also possible with geom_tile and geom_raster, but I couldn't get the range to cover the whole vertical space without also fiddling with coord_cartesian.
What I have here are two graphs "PlotA" and "PlotB", however I want a combined graph with geom_pointranges showing points, geom_line showing the line and geom_ribbon showing the standard deviation.
water <- c(35,40,42,46,48,50)
depth <- c(1,2,3,4,5,6)
sd <- c(10,10,10,10,10,10)
dataA <- data.frame(depth, water, sd)
from <- c(0.5, 1.5, 2.5, 3.5, 4.5, 5.5)
to <- c(1.5, 2.5, 3.5, 4.5, 5.5, 6.5)
depth1 <- c(1,2,3,4,5,6)
water1 <- c(40,32,50,55,62,30)
dataB <- data.frame(from,to,depth1, water1)
# Load necessary packages
require(ggplot2)
# Plotting Started
#PlotA
ggplot(data=dataA, aes(x = water, y = depth), na.rm=T) +
geom_path(size=0.4, color="black")+
geom_pointrange(data=dataB, aes(water1, depth1, ymin=from, ymax=to), size=0.1, color='black') +
scale_y_reverse(lim = c(10,0), breaks = seq(0,10,1)) +
theme_bw(12) +
scale_x_continuous(lim =c(0,100), breaks = seq(0,100,20))
#PlotB
ggplot() + geom_ribbon(data=dataA, aes(x=depth, y=water, ymin = water - sd, ymax = water + sd), alpha=0.3, fill='grey12') + coord_flip() +
scale_x_reverse(lim = c(10,0), breaks = seq(0,10,1)) + theme_bw(12) +
scale_y_continuous(lim =c(0,100), breaks = seq(0,100,20))
coord_flip is difficult to use well in the middle of a plot. I strongly recommend debugging plots without it and then adding it as the last step.
I think this is what you're looking for. If not, please describe your desired result in more detail.
ggplot(data = dataA, aes(x = depth, y = water)) +
geom_ribbon(
data = dataA,
aes(
x = depth,
ymin = water - sd,
ymax = water + sd
),
alpha = 0.3,
fill = 'grey12'
) +
geom_path(size = 0.4, color = "black") +
geom_point(
data = dataB,
aes(x = depth1, y = water1),
size = 0.1,
color = 'black'
) +
geom_errorbarh(
data = dataB,
aes(
x = depth1,
xmin = from,
xmax = to,
y = water1
),
size = 0.1,
height = 0
) +
theme_bw(12) +
scale_x_reverse(lim = c(10, 0), breaks = seq(0, 10, 1)) +
scale_y_continuous(lim = c(0, 100), breaks = seq(0, 100, 20)) +
coord_flip()
This question already has answers here:
Control transparency of smoother and confidence interval
(2 answers)
Closed 5 years ago.
I am using alpha to set transparency for the smoothing line in ggplot but instead I get transparency only in the Error band that surrounds the fitted line.
My code is the following:
z1 <- rnorm(10)
z2 <- z1 ^ 2
error <- rnorm(10, 0.25)
y <- 1 + 0.5 * z1 + error
data1 <- data.table(y, z1, z2)
ggplot(data1) +
geom_point(aes(x = z1, y = y), color = "blue", size = 3) +
geom_point(aes(x = z2, y = y), color = "red", size = 3) +
geom_smooth(method = lm, aes(x = z1, y = y), color = "blue", size = 2, alpha = 0.1) +
geom_smooth(method = lm, aes(x = z2, y = y), color = "red", size = 2, alpha = 0.1)
The output is this:
How can I set explicitly the transparency of the regression line as well?
Your advice will be appreciated.
You can use geom_line for more fine-grained control of the regression line.
ggplot(data1) +
geom_point(aes(x = z1, y = y), color = "blue", size = 3) +
geom_point(aes(x = z2, y = y), color = "red", size = 3) +
geom_line(stat = "smooth", method = lm, aes(x = z1, y = y), color = "blue", size = 2, alpha = 0.1) +
geom_line(stat = "smooth", method = lm, aes(x = z2, y = y), color = "red", size = 2, alpha = 0.1) +
geom_smooth(method = lm, aes(x = z1, y = y), color = NA, size = 2, alpha = 0.1) +
geom_smooth(method = lm, aes(x = z2, y = y), color = NA, size = 2, alpha = 0.1)