Turn off scatter plot and print only regression line - r

I'm trying to fit a regression curve to my data. My code generates the plot and curve that I want, however, I don't need the scatter plot--only the line. If I comment out the plot, my code fails. Is there a way to (bypass, turn-off, hide) the scatter plot?
Ultimately, I will need to compare multiple regression curves on my graph and the scatter charts become distracting. Also, my R2 shows NULL. Is there a coefficient for R2?
Code below.
# get underlying plot
y <- dataset$'Fuel Consumed 24h'
x <-dataset$'Performance Speed'
plot(x, y, xlab = "Performance Speed", ylab = "24h Fuel Consumption")
# polynomial
f <- function(x,a,b,d) {(a*x^2) + (b*x) + d}
fit <- nls(y ~ f(x,a,b,d), start = c(a=1, b=1, d=1))
co <- round(coef(fit), 2)
r2 <- format(summary(fit)$r.squared, digits = 3)
curve(f(x, a=co[1], b=co[2], d=co[3]), add = TRUE, col="red", lwd=2)
eq <- paste0("Fuel = ", co[1], "PS^2 ", ifelse(sign(co[2]) == 1, " + ", " - "), abs(co[2]), "PS +", co[3], " R2 = ", r2)
# print equation
mtext(eq, 3, line =-2)
mylabel = bquote(italic(R)^2 == .(format(r2, digits = 3)))
text(x = 1, y = 2.5, r2)

Here is an example with cars data
Fit:
data(cars)
f <- function(x,a,b,d) {(a*x^2) + (b*x) + d}
fit <- nls(dist ~ f(speed,a,b,d), data = cars, start = c(a=1, b=1, d=1))
goodness of fit: (as user20650 pointed out R2 makes little sense for a non linear model, perhaps a better metric is RMSE)
rmse <- function(error)
{
sqrt(mean(error^2))
}
error <- predict(fit, cars) - cars$dist
rms <- rmse(error)
eq <- paste0("Fuel = ", co[1], "PS^2 ", ifelse(sign(co[2]) == 1, " + ", " - "), abs(co[2]), "PS +", co[3], " RMSE = ", round(rms, 2))
plot (no need to call plot at all - add other curves with add = T):
curve(f(x, a=co[1], b=co[2], d=co[3]), col="red", lwd=2, from = min(cars$speed), to = max(cars$speed))
mtext(eq, 3, line =-2)
to add another curve:
f2 = function(x, a, b) {a + b*x}
co2 = coef(lm(dist ~ speed, data = cars))
curve(f2(x, a = co2[1], b = co2[2]), col="blue", lwd=2, add = T)
EDIT: as per user20650 suggestion (the nls is really not needed since the poly and nls curves are the same)
co3 = coef(lm(dist ~ poly(speed, 2, raw=TRUE), data = cars))
curve(f3(x, a = co3[1], b = co3[2], c = co3[3]), col="grey", lty = 2, lwd=2, add = T)
legend("topleft", legend = c("nls", "lm", "poly"), col = c("red", "blue", "grey"), lty =c(1,1,2))

Related

lines function returning too many random lines

I have a weird problem with drawing a graph with confidence intervals for predictions.
Here is my code:
rm(list = ls())
cat("\014")
set.seed(1)
file.name <- "testRegresji.pdf"
count = 20
pdf(file.name)
x <- runif(count, 0, 2)
y <- x + rnorm(count)
model <-lm(y ~ x)
xlab.label <- paste("y = ", format(model$coeff[1], digits = 4),
"+", format(model$coeff[2], digits = 4),
"* x + e")
plot(x, y, xlab = xlab.label, ylab = "", main = paste("n = ", count), col = 8)
matlines(x, predict(model, interval = "confidence"),
type = 'l', lty = c(1, 2, 2), col = "black")
abline(0, 1, col = grey(0.4), lwd = 3)
dev.off()
shell.exec(paste(getwd(), "/", file.name, sep = ""))
The resulting graph looks very weird with too many random lines for confidence intervals, although the result of predict function are correct.
Here's the screenshot of the graph:
What could be the issue? Thanks a lot for any help!

How to find doubling time of cells with scatterplot in R?

I'm trying to calculate the doubling time of cells using a scatterplot. This is my dataframe
df = data.frame("x" = 1:5, "y" = c(246, 667, 1715, 4867, 11694))
and I've graphed this dataframe using this code
plot(df$x, df$y, xlab = "days", ylab = "cells mL -1")
Does anyone know how to calculate the doubling time of these cells using the graph? the equation for doubling time is (ln(2)/rate constant)
Plot log2(y) vs. x suppressing the Y axis so that we can build a nicer one. We also improved the Y axis label slightly. Then use axis to build a pretty axis and calculate the doubling time. Note that the formula for doubling time in the question works if the rate constant is the slope of the log(y) ~ x regression line but if we use the regression log2(y) ~ x, i.e. log2 instead of log, then the correct formula is just 1/slope. We show both below.
plot(df$x, log2(df$y), xlab = "days", ylab = "cells/mL", yaxt = "n")
s <- 1:round(log2(max(df$y)))
axis(2, s, parse(text = sprintf("2^%d", s)))
fm <- lm(log2(y) ~ x, df)
abline(fm)
doubling.time <- 1/coef(fm)[[2]]
doubling.time
## [1] 0.7138163
log(2)/coef(lm(log(y) ~ x, df))[[2]] # same
## [1] 0.7138163
legend("topleft", paste("doubling time:", round(doubling.time, 3), "days"), bty = "n")
You can visualize the constant rate of change with ggplot2 by scaling the y-axis accordingly:
library(dplyr)
library(ggplot2)
library(broom)
library(scales)
df = data.frame("x" = 1:5, "y" = c(246, 667, 1715, 4867, 11694))
fit <- lm(data = df, log2(y) ~ x)
tidy_fit <- tidy(fit) %>%
mutate(x = 3, y = 2048)
ggplot(df, aes(x = x, y = y)) +
geom_point() +
scale_y_continuous(name = "log2(y)",
trans = 'log2',
breaks = trans_breaks("log2", function(x) 2^x),
labels = trans_format("log2", math_format(2^.x))) +
geom_smooth(method = "lm", se = FALSE) +
geom_text(tidy_fit,
mapping = aes(
x = x,
y = y,
label = paste0("log2(y) = ", round(estimate[1], 2), " + ", round(estimate[2], 2), "x",
"\n", "Doubling Time: ", round(1 / tidy_fit$estimate[2], 2), " Days")
),
nudge_x = -1,
nudge_y = 0.5,
hjust = 0)
Created on 2020-02-03 by the reprex package (v0.3.0)
You can plot the points to show the exponential rise and then linearize the function by applying log2 to the y values. With that you can plot and do a linear fit:
df = data.frame("x" = 1:5, "y" = c(246, 667, 1715, 4867, 11694))
plot(df) # plot not displayed
plot(df$x, log2(df$y))
abline(lm(log2(y)~x,df))
lm(log2(y)~x,df)
#-------------------
Call:
lm(formula = log2(y) ~ x, data = df)
Coefficients:
(Intercept) x
6.563 1.401 #the x-coefficient is the slope of the line
#---------------------
log(2)/1.4
#[1] 0.4951051
Checking with the original (not-displayed plot that does look like a sensible estimate of doubling time. Be sure to cite this posting if this happens to be a homework problem.
If I were tasked with using the original graph, first draw an exponential curve by hand. I would then draw two horizontal lines at y= 2000 and y=4000 and then drop perpendicular lines from their intersections with the curve and read off the difference in x values on the horizontal axis.That is what I meant by my comment above that I "checked" the log2/x-coef value for sensibility.

After fitting the cumulative distribution in R creating the normal distribution from fitted parameters

After successfully fitting my cumulative data with Gompertz function, I need to create normal distribution from fitted function.
This is the code so far:
df <- data.frame(x = c(0.01,0.011482,0.013183,0.015136,0.017378,0.019953,0.022909,0.026303,0.0302,0.034674,0.039811,0.045709,0.052481,0.060256,0.069183,0.079433,0.091201,0.104713,0.120226,0.138038,0.158489,0.18197,0.20893,0.239883,0.275423,0.316228,0.363078,0.416869,0.47863,0.549541,0.630957,0.724436,0.831764,0.954993,1.096478,1.258925,1.44544,1.659587,1.905461,2.187762,2.511886,2.884031,3.311311,3.801894,4.365158,5.011872,5.754399,6.606934,7.585776,8.709636,10,11.481536,13.182567,15.135612,17.378008,19.952623,22.908677,26.30268,30.199517,34.673685,39.810717,45.708819,52.480746,60.255959,69.183097,79.432823,91.201084,104.712855,120.226443,138.038426,158.489319,181.970086,208.929613,239.883292,275.42287,316.227766,363.078055,416.869383,478.630092,549.540874,630.957344,724.43596,831.763771,954.992586,1096.478196),
y = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.00044816,0.00127554,0.00221488,0.00324858,0.00438312,0.00559138,0.00686054,0.00817179,0.00950625,0.01085188,0.0122145,0.01362578,0.01514366,0.01684314,0.01880564,0.02109756,0.0237676,0.02683182,0.03030649,0.0342276,0.03874555,0.04418374,0.05119304,0.06076553,0.07437854,0.09380666,0.12115065,0.15836926,0.20712933,0.26822017,0.34131335,0.42465413,0.51503564,0.60810697,0.69886817,0.78237651,0.85461023,0.91287236,0.95616228,0.98569093,0.99869001,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999))
library(drc)
fm <- drm(y ~ x, data = df, fct = G.3())
options(scipen = 10) #to avoid scientific notation in x axis
plot(df$x, predict(fm),type = "l", log = "x",col = "blue",
main = "Cumulative function distribution",xlab = "x", ylab = "y")
points(df,col = "red")
legend("topleft", inset = .05,legend = c("exp","fit")
,lty = c(NA,1), col = c("red", "blue"), pch = c(1,NA), lwd=1, bty = "n")
summary(fm)
And this is the following plot:
My idea is now to transform somehow this cumulative fit to the normal distribution. Is there any idea how could I do that?
While your original intention might be non-parametric, I suggest using parametric estimation method: method of moments, which is widely used for problems like this, because you have a certain parametric distribution (normal distribution) to fit. The idea is quite simple, from the fitted cumulative distribution function, you can calculate the mean (E1 in my code) and variance (square of SD in my code), and then the problem is solved, because normal distribution can be totally determined by mean and variance.
df <- data.frame(x=c(0.01,0.011482,0.013183,0.015136,0.017378,0.019953,0.022909,0.026303,0.0302,0.034674,0.039811,0.045709,0.052481,0.060256,0.069183,0.079433,0.091201,0.104713,0.120226,0.138038,0.158489,0.18197,0.20893,0.239883,0.275423,0.316228,0.363078,0.416869,0.47863,0.549541,0.630957,0.724436,0.831764,0.954993,1.096478,1.258925,1.44544,1.659587,1.905461,2.187762,2.511886,2.884031,3.311311,3.801894,4.365158,5.011872,5.754399,6.606934,7.585776,8.709636,10,11.481536,13.182567,15.135612,17.378008,19.952623,22.908677,26.30268,30.199517,34.673685,39.810717,45.708819,52.480746,60.255959,69.183097,79.432823,91.201084,104.712855,120.226443,138.038426,158.489319,181.970086,208.929613,239.883292,275.42287,316.227766,363.078055,416.869383,478.630092,549.540874,630.957344,724.43596,831.763771,954.992586,1096.478196),
y=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.00044816,0.00127554,0.00221488,0.00324858,0.00438312,0.00559138,0.00686054,0.00817179,0.00950625,0.01085188,0.0122145,0.01362578,0.01514366,0.01684314,0.01880564,0.02109756,0.0237676,0.02683182,0.03030649,0.0342276,0.03874555,0.04418374,0.05119304,0.06076553,0.07437854,0.09380666,0.12115065,0.15836926,0.20712933,0.26822017,0.34131335,0.42465413,0.51503564,0.60810697,0.69886817,0.78237651,0.85461023,0.91287236,0.95616228,0.98569093,0.99869001,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999))
library(drc)
fm <- drm(y ~ x, data = df, fct = G.3())
options(scipen = 10) #to avoid scientific notation in x axis
plot(df$x, predict(fm),type="l", log = "x",col="blue", main="Cumulative distribution function",xlab="x", ylab="y")
points(df,col="red")
E1 <- sum((df$x[-1] + df$x[-length(df$x)]) / 2 * diff(predict(fm)))
E2 <- sum((df$x[-1] + df$x[-length(df$x)]) ^ 2 / 4 * diff(predict(fm)))
SD <- sqrt(E2 - E1 ^ 2)
points(df$x, pnorm((df$x - E1) / SD), col = "green")
legend("topleft", inset = .05,legend= c("exp","fit","method of moment")
,lty = c(NA,1), col = c("red", "blue", "green"), pch = c(1,NA), lwd=1, bty="n")
summary(fm)
And the estimation results:
## > E1 (mean of fitted normal distribution)
## [1] 65.78474
## > E2 (second moment of fitted normal distribution)
##[1] 5792.767
## > SD (standard deviation of fitted normal distribution)
## [1] 38.27707
## > SD ^ 2 (variance of fitted normal distribution)
## [1] 1465.134
Edit: updated method to calculate moments from cdf fitted by drc. The function moment defined below calculates moment estimation using the moment formula for continuous r.v. E(X ^ k) = k * \int x ^ {k - 1} (1 - cdf(x)) dx. These are the best estimation I can get from the fitted cdf. And the fit is not very good when x is near zero because of the reason in original datasets as I discussed in comments.
df <- data.frame(x=c(0.01,0.011482,0.013183,0.015136,0.017378,0.019953,0.022909,0.026303,0.0302,0.034674,0.039811,0.045709,0.052481,0.060256,0.069183,0.079433,0.091201,0.104713,0.120226,0.138038,0.158489,0.18197,0.20893,0.239883,0.275423,0.316228,0.363078,0.416869,0.47863,0.549541,0.630957,0.724436,0.831764,0.954993,1.096478,1.258925,1.44544,1.659587,1.905461,2.187762,2.511886,2.884031,3.311311,3.801894,4.365158,5.011872,5.754399,6.606934,7.585776,8.709636,10,11.481536,13.182567,15.135612,17.378008,19.952623,22.908677,26.30268,30.199517,34.673685,39.810717,45.708819,52.480746,60.255959,69.183097,79.432823,91.201084,104.712855,120.226443,138.038426,158.489319,181.970086,208.929613,239.883292,275.42287,316.227766,363.078055,416.869383,478.630092,549.540874,630.957344,724.43596,831.763771,954.992586,1096.478196),
y=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.00044816,0.00127554,0.00221488,0.00324858,0.00438312,0.00559138,0.00686054,0.00817179,0.00950625,0.01085188,0.0122145,0.01362578,0.01514366,0.01684314,0.01880564,0.02109756,0.0237676,0.02683182,0.03030649,0.0342276,0.03874555,0.04418374,0.05119304,0.06076553,0.07437854,0.09380666,0.12115065,0.15836926,0.20712933,0.26822017,0.34131335,0.42465413,0.51503564,0.60810697,0.69886817,0.78237651,0.85461023,0.91287236,0.95616228,0.98569093,0.99869001,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999,0.99999999))
library(drc)
fm <- drm(y ~ x, data = df, fct = G.3())
moment <- function(k){
f <- function(x){
x ^ (k - 1) * pmax(0, 1 - predict(fm, data.frame(x = x)))
}
k * integrate(f, lower = min(df$x), upper = max(df$x))$value
}
E1 <- moment(1)
E2 <- moment(2)
SD <- sqrt(E2 - E1 ^ 2)
I was thinking of the cumdiff (for lack of a better term). The link helped a lot.
EDIT
plot(df$x[-1], Mod(df$y[-length(df$y)]-df$y[-1]), log = "x", type = "b",
main = "Normal distribution for original data",
xlab = "x", ylab = "y")
yielding:
ADDITION
In order to get the Gaussian from the fittedfunction:
df$y_pred<-predict(fm)
plot(df$x[-1], Mod(df$y_pred[-length(df$y_pred)]-df$y_pred[-1]), log = "x",
type = "b", main="Normal distribution for fitted function",
xlab = "x", lab = "y")
yielding:

Include weibull fit in ggsurvplot

I would like to fit a weibull curve to some event data and then include the fitted weibull curve in a survival plot plotted by survminer::ggsurvplot. Any ideas of how?
Here is an example to work on:
A function for simulating weibull data:
# N = sample size
# lambda = scale parameter in h0()
# rho = shape parameter in h0()
# beta = fixed effect parameter
# rateC = rate parameter of the exponential distribution of C
simulWeib <- function(N, lambda, rho, beta, rateC)
{
# covariate --> N Bernoulli trials
x <- sample(x=c(0, 1), size=N, replace=TRUE, prob=c(0.5, 0.5))
# Weibull latent event times
v <- runif(n=N)
Tlat <- (- log(v) / (lambda * exp(x * beta)))^(1 / rho)
# censoring times
C <- rexp(n=N, rate=rateC)
# follow-up times and event indicators
time <- pmin(Tlat, C)
status <- as.numeric(Tlat <= C)
# data set
data.frame(id=1:N,
time=time,
status=status,
x=x)
}
generate data
set.seed(1234)
betaHat <- rep(NA, 1e3)
for(k in 1:1e3)
{
dat <- simulWeib(N=100, lambda=0.01, rho=1, beta=-0.6, rateC=0.001)
fit <- coxph(Surv(time, status) ~ x, data=dat)
betaHat[k] <- fit$coef
}
#Estimate a survival function
survfit(Surv(as.numeric(time), x)~1, data=dat) -> out0
#plot
library(survminer)
ggsurvplot(out0, data = dat, risk.table = TRUE)
gg1 <- ggsurvplot(
out0, # survfit object with calculated statistics.
data = dat, # data used to fit survival curves.
risk.table = TRUE, # show risk table.
pval = TRUE, # show p-value of log-rank test.
conf.int = TRUE, # show confidence intervals for
# point estimaes of survival curves.
xlim = c(0,2000), # present narrower X axis, but not affect
# survival estimates.
break.time.by = 500, # break X axis in time intervals by 500.
ggtheme = theme_minimal(), # customize plot and risk table with a theme.
risk.table.y.text.col = T, # colour risk table text annotations.
risk.table.y.text = FALSE,
surv.median.line = "hv",
color = "darkgreen",
conf.int.fill = "lightblue",
title = "Survival probability",# show bars instead of names in text annotations
# in legend of risk table
)
gg1
As far as I see this, it is not possible do it with ggsurvplot at this moment.
I created an issue requesting this feature: https://github.com/kassambara/survminer/issues/276
You can plot survivor curves of a weibull model with ggplot2 like this:
library("survival")
wbmod <- survreg(Surv(time, status) ~ x, data = dat)
s <- seq(.01, .99, by = .01)
t_0 <- predict(wbmod, newdata = data.frame(x = 0),
type = "quantile", p = s)
t_1 <- predict(wbmod, newdata = data.frame(x = 1),
type = "quantile", p = s)
smod <- data.frame(time = c(t_0, t_1),
surv = rep(1 - s, times = 2),
strata = rep(c(0, 1), each = length(s)),
upper = NA, lower = NA)
head(surv_summary(cm))
library("ggplot2")
ggplot() +
geom_line(data = smod, aes(x = time, y = surv, color = factor(strata))) +
theme_classic()
However to my knowledge you cannot use survminer (yet):
library("survminer")
# wrong:
ggsurvplot(smod)
# does not work:
gg1$plot + geom_line(data = smod, aes(x = time, y = surv, color = factor(strata)))
The following works for me. Probably the credit goes to Heidi filling a feature request.
Hope, someone finds this useful.
library(survminer)
library(tidyr)
s <- with(lung,Surv(time,status))
sWei <- survreg(s ~ as.factor(sex),dist='weibull',data=lung)
fKM <- survfit(s ~ sex,data=lung)
pred.sex1 = predict(sWei, newdata=list(sex=1),type="quantile",p=seq(.01,.99,by=.01))
pred.sex2 = predict(sWei, newdata=list(sex=2),type="quantile",p=seq(.01,.99,by=.01))
df = data.frame(y=seq(.99,.01,by=-.01), sex1=pred.sex1, sex2=pred.sex2)
df_long = gather(df, key= "sex", value="time", -y)
p = ggsurvplot(fKM, data = lung, risk.table = T)
p$plot = p$plot + geom_line(data=df_long, aes(x=time, y=y, group=sex))

how to find 95% confidence bands for predicting mean y per value of x and 95% prediction bands for predicting individual y values [duplicate]

If I have 10 values, each of which has a fitted value F, and an upper and lower confidence interval U and L:
set.seed(0815)
F <- runif(10, 1, 2)
L <- runif(10, 0, 1)
U <- runif(10, 2, 3)
How can I show these 10 fitted values and their confidence intervals in the same plot like the one below in R?
Here is a plotrix solution:
set.seed(0815)
x <- 1:10
F <- runif(10,1,2)
L <- runif(10,0,1)
U <- runif(10,2,3)
require(plotrix)
plotCI(x, F, ui=U, li=L)
And here is a ggplot solution:
set.seed(0815)
df <- data.frame(x =1:10,
F =runif(10,1,2),
L =runif(10,0,1),
U =runif(10,2,3))
require(ggplot2)
ggplot(df, aes(x = x, y = F)) +
geom_point(size = 4) +
geom_errorbar(aes(ymax = U, ymin = L))
UPDATE:
Here is a base solution to your edits:
set.seed(1234)
x <- rnorm(20)
df <- data.frame(x = x,
y = x + rnorm(20))
plot(y ~ x, data = df)
# model
mod <- lm(y ~ x, data = df)
# predicts + interval
newx <- seq(min(df$x), max(df$x), length.out=100)
preds <- predict(mod, newdata = data.frame(x=newx),
interval = 'confidence')
# plot
plot(y ~ x, data = df, type = 'n')
# add fill
polygon(c(rev(newx), newx), c(rev(preds[ ,3]), preds[ ,2]), col = 'grey80', border = NA)
# model
abline(mod)
# intervals
lines(newx, preds[ ,3], lty = 'dashed', col = 'red')
lines(newx, preds[ ,2], lty = 'dashed', col = 'red')
Here is a solution using functions plot(), polygon() and lines().
set.seed(1234)
df <- data.frame(x =1:10,
F =runif(10,1,2),
L =runif(10,0,1),
U =runif(10,2,3))
plot(df$x, df$F, ylim = c(0,4), type = "l")
#make polygon where coordinates start with lower limit and
# then upper limit in reverse order
polygon(c(df$x,rev(df$x)),c(df$L,rev(df$U)),col = "grey75", border = FALSE)
lines(df$x, df$F, lwd = 2)
#add red lines on borders of polygon
lines(df$x, df$U, col="red",lty=2)
lines(df$x, df$L, col="red",lty=2)
Now use example data provided by OP in another question:
Lower <- c(0.418116841, 0.391011834, 0.393297710,
0.366144073,0.569956636,0.224775521,0.599166016,0.512269587,
0.531378573, 0.311448219, 0.392045751,0.153614913, 0.366684097,
0.161100849,0.700274810,0.629714150, 0.661641288, 0.533404093,
0.412427559, 0.432905333, 0.525306427,0.224292061,
0.28893064,0.099543648, 0.342995605,0.086973739,0.289030388,
0.081230826,0.164505624, -0.031290586,0.148383474,0.070517523,0.009686605,
-0.052703529,0.475924192,0.253382210, 0.354011010,0.130295355,0.102253218,
0.446598823,0.548330752,0.393985810,0.481691632,0.111811248,0.339626541,
0.267831909,0.133460254,0.347996621,0.412472322,0.133671128,0.178969601,0.484070587,
0.335833224,0.037258467, 0.141312363,0.361392799,0.129791998,
0.283759439,0.333893418,0.569533076,0.385258093,0.356201955,0.481816148,
0.531282473,0.273126565,0.267815691,0.138127486,0.008865700,0.018118398,0.080143484,
0.117861634,0.073697418,0.230002398,0.105855042,0.262367348,0.217799352,0.289108011,
0.161271889,0.219663224,0.306117717,0.538088622,0.320711912,0.264395149,0.396061543,
0.397350946,0.151726970,0.048650180,0.131914718,0.076629840,0.425849394,
0.068692279,0.155144797,0.137939059,0.301912657,-0.071415593,-0.030141781,0.119450922,
0.312927614,0.231345972)
Upper.limit <- c(0.6446223,0.6177311, 0.6034427, 0.5726503,
0.7644718, 0.4585430, 0.8205418, 0.7154043,0.7370033,
0.5285199, 0.5973728, 0.3764209, 0.5818298,
0.3960867,0.8972357, 0.8370151, 0.8359921, 0.7449118,
0.6152879, 0.6200704, 0.7041068, 0.4541011, 0.5222653,
0.3472364, 0.5956551, 0.3068065, 0.5112895, 0.3081448,
0.3745473, 0.1931089, 0.3890704, 0.3031025, 0.2472591,
0.1976092, 0.6906118, 0.4736644, 0.5770463, 0.3528607,
0.3307651, 0.6681629, 0.7476231, 0.5959025, 0.7128883,
0.3451623, 0.5609742, 0.4739216, 0.3694883, 0.5609220,
0.6343219, 0.3647751, 0.4247147, 0.6996334, 0.5562876,
0.2586490, 0.3750040, 0.5922248, 0.3626322, 0.5243285,
0.5548211, 0.7409648, 0.5820070, 0.5530232, 0.6863703,
0.7206998, 0.4952387, 0.4993264, 0.3527727, 0.2203694,
0.2583149, 0.3035342, 0.3462009, 0.3003602, 0.4506054,
0.3359478, 0.4834151, 0.4391330, 0.5273411, 0.3947622,
0.4133769, 0.5288060, 0.7492071, 0.5381701, 0.4825456,
0.6121942, 0.6192227, 0.3784870, 0.2574025, 0.3704140,
0.2945623, 0.6532694, 0.2697202, 0.3652230, 0.3696383,
0.5268808, 0.1545602, 0.2221450, 0.3553377, 0.5204076,
0.3550094)
Fitted.values<- c(0.53136955, 0.50437146, 0.49837019,
0.46939721, 0.66721423, 0.34165926, 0.70985388, 0.61383696,
0.63419092, 0.41998407, 0.49470927, 0.26501789, 0.47425695,
0.27859380, 0.79875525, 0.73336461, 0.74881668, 0.63915795,
0.51385774, 0.52648789, 0.61470661, 0.33919656, 0.40559797,
0.22339000, 0.46932536, 0.19689011, 0.40015996, 0.19468781,
0.26952645, 0.08090917, 0.26872696, 0.18680999, 0.12847285,
0.07245286, 0.58326799, 0.36352329, 0.46552867, 0.24157804,
0.21650915, 0.55738088, 0.64797691, 0.49494416, 0.59728999,
0.22848680, 0.45030036, 0.37087676, 0.25147426, 0.45445930,
0.52339711, 0.24922310, 0.30184215, 0.59185198, 0.44606040,
0.14795374, 0.25815819, 0.47680880, 0.24621212, 0.40404398,
0.44435727, 0.65524894, 0.48363255, 0.45461258, 0.58409323,
0.62599114, 0.38418264, 0.38357103, 0.24545011, 0.11461756,
0.13821664, 0.19183886, 0.23203127, 0.18702881, 0.34030391,
0.22090140, 0.37289121, 0.32846615, 0.40822456, 0.27801706,
0.31652008, 0.41746184, 0.64364785, 0.42944100, 0.37347037,
0.50412786, 0.50828681, 0.26510696, 0.15302635, 0.25116438,
0.18559609, 0.53955941, 0.16920626, 0.26018389, 0.25378867,
0.41439675, 0.04157232, 0.09600163, 0.23739430, 0.41666762,
0.29317767)
Assemble into a data frame (no x provided, so using indices)
df2 <- data.frame(x=seq(length(Fitted.values)),
fit=Fitted.values,lwr=Lower,upr=Upper.limit)
plot(fit~x,data=df2,ylim=range(c(df2$lwr,df2$upr)))
#make polygon where coordinates start with lower limit and then upper limit in reverse order
with(df2,polygon(c(x,rev(x)),c(lwr,rev(upr)),col = "grey75", border = FALSE))
matlines(df2[,1],df2[,-1],
lwd=c(2,1,1),
lty=1,
col=c("black","red","red"))
Here is part of my program related to plotting confidence interval.
1. Generate the test data
ads = 1
require(stats); require(graphics)
library(splines)
x_raw <- seq(1,10,0.1)
y <- cos(x_raw)+rnorm(len_data,0,0.1)
y[30] <- 1.4 # outlier point
len_data = length(x_raw)
N <- len_data
summary(fm1 <- lm(y~bs(x_raw, df=5), model = TRUE, x =T, y = T))
ht <-seq(1,10,length.out = len_data)
plot(x = x_raw, y = y,type = 'p')
y_e <- predict(fm1, data.frame(height = ht))
lines(x= ht, y = y_e)
Result
2. Fitting the raw data using B-spline smoother method
sigma_e <- sqrt(sum((y-y_e)^2)/N)
print(sigma_e)
H<-fm1$x
A <-solve(t(H) %*% H)
y_e_minus <- rep(0,N)
y_e_plus <- rep(0,N)
y_e_minus[N]
for (i in 1:N)
{
tmp <-t(matrix(H[i,])) %*% A %*% matrix(H[i,])
tmp <- 1.96*sqrt(tmp)
y_e_minus[i] <- y_e[i] - tmp
y_e_plus[i] <- y_e[i] + tmp
}
plot(x = x_raw, y = y,type = 'p')
polygon(c(ht,rev(ht)),c(y_e_minus,rev(y_e_plus)),col = rgb(1, 0, 0,0.5), border = NA)
#plot(x = x_raw, y = y,type = 'p')
lines(x= ht, y = y_e_plus, lty = 'dashed', col = 'red')
lines(x= ht, y = y_e)
lines(x= ht, y = y_e_minus, lty = 'dashed', col = 'red')
Result
Some addition to the previous answers. It is nice to regulate the density of the polygon to avoid obscuring the data points.
library(MASS)
attach(Boston)
lm.fit2 = lm(medv~poly(lstat,2))
plot(lstat,medv)
new.lstat = seq(min(lstat), max(lstat), length.out=100)
preds <- predict(lm.fit2, newdata = data.frame(lstat=new.lstat), interval = 'prediction')
lines(sort(lstat), fitted(lm.fit2)[order(lstat)], col='red', lwd=3)
polygon(c(rev(new.lstat), new.lstat), c(rev(preds[ ,3]), preds[ ,2]), density=10, col = 'blue', border = NA)
lines(new.lstat, preds[ ,3], lty = 'dashed', col = 'red')
lines(new.lstat, preds[ ,2], lty = 'dashed', col = 'red')
Please note that you see the prediction interval on the picture, which is several times wider than the confidence interval. You can read here the detailed explanation of those two types of interval estimates.

Resources