Leave One Out Cross Validation - R - KNN - r

I have a daraset with three columns. I'm trying to implement LOOCV for KNN regression. I 'm able to do it with the LOOCV library but I'm unable to write a manual code like a function. How can I write this?
Here is how my data looks
A1 A2 A3
a b m
c d n
. . .
. . .
. . .
I've tried the sample below:
loocv_tmp <- matrix(NA, nrow = n_train, ncol = length(df))
for (k in 1:n_train) {
train_xy <- xy[-k, ]
test_xy <- xy[k, ]
x <- train_xy$x
y <- train_xy$y
fitted_models <- apply(t(df), 2, function(degf) lm(y ~ ns(x, df = degf)))
pred <- mapply(function(obj, degf) predict(obj, data.frame(x = test_xy$x)),
fitted_models, df)
loocv_tmp[k, ] <- (test_xy$y - pred)^2
}
loocv <- colMeans(loocv_tmp)
plot(df, mse, type = "l", lwd = 2, col = gray(.4), ylab = "Prediction error",
xlab = "Flexibilty (spline's degrees of freedom [log scaled])",
main = "Leave-One-Out Cross-Validation", ylim = c(.1, .8), log = "x")
lines(df, cv, lwd = 2, col = "steelblue2", lty = 2)
lines(df, loocv, lwd = 2, col = "darkorange")
legend(x = "topright", legend = c("Training error", "10-fold CV error", "LOOCV error"),
lty = c(1, 2, 1), lwd = rep(2, 3), col = c(gray(.4), "steelblue2", "darkorange"),
text.width = .3, cex = .85)
But I want something like
lcv(train.data, train.label, K, numfold)
Any suggestion on how can I write the lcv function to perform LOOCV.
Thanks in advance.

Related

lines function returning too many random lines

I have a weird problem with drawing a graph with confidence intervals for predictions.
Here is my code:
rm(list = ls())
cat("\014")
set.seed(1)
file.name <- "testRegresji.pdf"
count = 20
pdf(file.name)
x <- runif(count, 0, 2)
y <- x + rnorm(count)
model <-lm(y ~ x)
xlab.label <- paste("y = ", format(model$coeff[1], digits = 4),
"+", format(model$coeff[2], digits = 4),
"* x + e")
plot(x, y, xlab = xlab.label, ylab = "", main = paste("n = ", count), col = 8)
matlines(x, predict(model, interval = "confidence"),
type = 'l', lty = c(1, 2, 2), col = "black")
abline(0, 1, col = grey(0.4), lwd = 3)
dev.off()
shell.exec(paste(getwd(), "/", file.name, sep = ""))
The resulting graph looks very weird with too many random lines for confidence intervals, although the result of predict function are correct.
Here's the screenshot of the graph:
What could be the issue? Thanks a lot for any help!

Problem analysing LME outcomes when I reduce the number of factors in a variable

I have a variable with 4 factors that I put into the LME model as an independent variable (Status).
The outcome in lme gives me 1 factor as a reference value and the other 3 referenced to that one. I have tried to apply 'emmeans' to get multiple comparisons as I am interested on knowing how each factor was behaving against each other, but it was not possible (perhaps because this data is longitudinal?). So I have created subsets in which each "status" variable only have 2 factors. The problem comes when I try to interpret the outcomes, as it seems that for example the factor A sometimes gets better and sometimes gets worse. (see pictures)
Can anyone help me?
This is the code I've used to create the graphs:
x <- c(0:600)/100
Hdata <- data.frame(Time = x, Status = "H", Init.Age = 43)
Ndata <- data.frame(Time = x, Status = "N", Init.Age = 45)
Fdata <- data.frame(Time = x, Status = "F", Init.Age = 40)
Adata <- data.frame(Time = x, Status = "A", Init.Age = 45)
yH <- predict(mod2.lme, newdata = Hdata, level = 0)
yN <- predict(mod2.lme, newdata = Ndata, level = 0)
yF <- predict(mod2.lme, newdata = Fdata, level = 0)
yA <- predict(mod2.lme, newdata = Adata, level = 0)
summary(long1$Ratio_a)
plot(x, yH, ylim = c(0.3, 0.6), ylab = "Average Pit Depth/Slope Disk Area", xlab = "Time (years)", type = "l")
lines(x, yF, col = "blue")
lines(x, yA, col = "red")
lines(x, yN, col = "green")
legend("bottomleft", legend = c("H", "A", "F", "N"), col = c("black", "red", "blue", "green"), lty =1, cex = 0.8, bty ="n")
Thanks a lot!
Lili

Enter an item from the menu, or 0 to exit

I am running a code from a hierarchical modelling in ecology book, from Kéry and Royle (2015), to learn about this topic. But I am getting this warning: No suitable frames for recover(). I would like to know what is the meaning of this stetment.
Here is the code:
cat(file = "ANCOVA2.txt","
model {
# Priors
for(k in 1:4){
alpha[k] ~ dnorm(0, 1.0E-06) # Priors for intercepts
beta[k] ~ dnorm(0, 1.0E-06) # Priors for slopes
}
tau <- pow(sd, -2)
sd ~ dunif(0, 1000) # Prior for dispersion on sd scale
# Likelihood
for (i in 1:M){
Cmean[i] ~ dnorm(mu[i], tau) # precision tau = 1 / variance
mu[i] <- alpha[facFor[i]] + beta[facFor[i]] * elev[i]
}
# Derived quantities: comparison of slopes
for(k in 1:4){
diff.vs1[k] <- beta[k] - beta[1] # Differences relative to beta[1]
diff.vs2[k] <- beta[k] - beta[2] # ... relative to beta[2]
diff.vs3[k] <- beta[k] - beta[3] # ... relative to beta[3]
diff.vs4[k] <- beta[k] - beta[4] # ... relative to beta[4]
}
}
")
# Initial values
inits <- function() list(alpha = rnorm(4,,10), beta = rnorm(4,,10))
# Parameters monitored
params <- c("alpha", "beta", "sd", "diff.vs1", "diff.vs2", "diff.vs3",
"diff.vs4")
# MCMC settings
ni <- 6000 ; nt <- 1 ; nb <- 1000 ; nc <- 3
out4J <- jags(win.data, inits, params, "ANCOVA2.txt", n.chains = nc,
n.thin = nt, n.iter = ni, n.burnin = nb)
traceplot(out4J)
print(out4J, 2)
#plot the predicted response as a function of the explanatory variables
facFor and elev
plot(elev[facFor==1], Cmean[facFor==1], col = "red", ylim = c(0, 20), xlab =
"Elevation",
ylab = "", frame.plot = F)
points(elev[facFor==2], Cmean[facFor==2], col = "blue")
points(elev[facFor==3], Cmean[facFor==3], col = "green")
points(elev[facFor==4], Cmean[facFor==4], col = "black")
attach.bugs(out4J) # Allows to directly address the sims.list
#No sé si esta función se puede usar con JAGSUI, tal vez con rjags
str(diff.vs3)
par(mfrow = c(1, 3), mar = c(5,5,3,2), cex.lab = 1.5, cex.axis = 1.5)
hist(diff.vs3[,1], col = "grey", breaks = 100, main = "", freq=F, ylim = c(0,
0.8))
abline(v = 1, lwd = 3, col = "red")
hist(diff.vs3[,2], col = "grey", breaks = 100, main = "", freq=F, ylim = c(0,
0.8))
abline(v = 1, lwd = 3, col = "red")
hist(diff.vs3[,4], col = "grey", breaks = 100, main = "", freq=F, ylim = c(0,
0.8))
abline(v = 1, lwd = 3, col = "red")
diff.vs3
# Prob. difference greater than 1
mean(diff.vs3[,1] > 1)
mean(diff.vs3[,2] > 1)
mean(diff.vs3[,4] > 1)
Thank you very much!
Regards
Antonela

Add the new regression line but keep the regression lines from previous runs in R

Background
I have a function called TPN (R code is below the picture). When you run this function, it produces two plots (see picture below). The bottom-row plot samples from the top-row plot and then adds a red regression line. Each time you run the TPN function, the bottom-row plot produces a new red-colored regression line.
Question
In the bottom-row plot, I was wondering if there is a way I could KEEP the regression lines from previous runs each time I run the TPN function (see picture below)?
That is, each time that I run a new TPN function the regression line from a previous run is kept in its place (probably in a color other than "red" for distinction purposes), and the new regression line is just added to he bottom-row plot?
############## Input Values #################
TPN = function( each.sub.pop.n = 150,
sub.pop.means = 20:10,
predict.range = 10:0,
sub.pop.sd = .75,
n.sample = 2 ) {
#############################################
par( mar = c(2, 4.1, 2.1, 2.1) )
m = matrix( c(1, 2), nrow = 2, ncol = 1 ); layout(m)
set.seed(2460986)
Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')
y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
set.seed(NULL)
x <- rep(predict.range, each = each.sub.pop.n)
plot(x, y, ylim = range(y)) ## Top-Row Plot
sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
sample <- data.frame(y = unlist(sample),
x = as.numeric(rep(names(sample), each = n.sample)))
x = sample$x ; y = sample$y
plot(x, y, ylim = range(y)) #### BOTTOM-ROW PLOT
abline(lm(y ~ x), col = 'red') # Regression Line
}
## TEST HERE:
TPN()
It ain't that easy. I made another function and edit the first one as well.
To summarize what I have done:
I made the first function to set par(new = TRUE) at the end of it. Also, set the color for points in the bottom row plot to be white only for formatting. You can get rid of col = 'white', bg = 'white' if you wish.
Then, in the second function top row plot does not get plotted and yaxis won't be added to the bottom row plot from each "test".
Look below:
############## Input Values #################
TPN = function( each.sub.pop.n = 150,
sub.pop.means = 20:10,
predict.range = 10:0,
sub.pop.sd = .75,
n.sample = 2 ) {
#############################################
par( mar = c(2, 4.1, 2.1, 2.1) )
m = matrix( c(1, 2), nrow = 2, ncol = 1 ); layout(m)
set.seed(2460986)
Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')
y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
set.seed(NULL)
x <- rep(predict.range, each = each.sub.pop.n)
par(new = FALSE)
plot(x, y, ylim = range(y)) ## Top-Row Plot
sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
sample <- data.frame(y = unlist(sample),
x = as.numeric(rep(names(sample), each = n.sample)))
x = sample$x ; y = sample$y
plot(x, y, ylim = range(y), col = 'white', bg = 'white') #### BOTTOM-ROW PLOT
abline(lm(y ~ x), col = 'red') # Regression Line
par(new = TRUE)
}
The second one does not plot the top row one:
############## Input Values #################
TPN2 = function( each.sub.pop.n = 150,
sub.pop.means = 20:10,
predict.range = 10:0,
sub.pop.sd = .75,
n.sample = 2 ) {
#############################################
par( mar = c(2, 4.1, 2.1, 2.1) )
m = matrix( c(1, 2), nrow = 2, ncol = 1 ); layout(m)
set.seed(2460986)
Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')
y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
set.seed(NULL)
x <- rep(predict.range, each = each.sub.pop.n)
#par(new = FALSE) #comment-out
#plot(x, y, ylim = range(y)) ##Top-Row Plot #comment-out
sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
sample <- data.frame(y = unlist(sample),
x = as.numeric(rep(names(sample), each = n.sample)))
x = sample$x ; y = sample$y
plot(x, y, ylim = range(y), axes = FALSE, col = 'white', bg = 'white') ##BOTTOM-ROW PLOT
abline(lm(y ~ x), col = 'blue') # Regression Line
par(new = TRUE)
}
Then your test would be like this:
## TEST HERE:
TPN()
TPN2()
TPN2()
TPN2()
This is the output:
A simple way to do what you want is to change your main effect (currently none) to return an accumulation of previous regressions and your side effect (plotting) to loop through these previous regressions (in blue) in addition to the current one (in red).
Another tip: you can use the abline(reg=lm(y~x)) argument and just accumulate the lm objects in a list. It's not necessary to store coefficients and intercepts separately as suggested in the other answer. Keeping the lm objects is also a good idea in case you want to go back and look at average R-squared, etc. -- you couldn't do that using only the coefficients.
Your new function could look like:
TPN.accum <- function( each.sub.pop.n = 150,
sub.pop.means = 20:10,
predict.range = 10:0,
sub.pop.sd = .75,
n.sample = 2,
lm.history = list() # the accumulator
){
par( mar = c(2, 4.1, 2.1, 2.1) )
m <- matrix( c(1, 2), nrow = 2, ncol = 1 ); layout(m)
set.seed(2460986)
Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')
y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
set.seed(NULL)
x <- rep(predict.range, each = each.sub.pop.n)
plot(x, y, ylim = range(y)) ### Top-Row Plot
sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
sample <- data.frame(y = unlist(sample),
x = as.numeric(rep(names(sample), each = n.sample)))
x <- sample$x ; y <- sample$y
lm.current <- lm(y~x) # the current regression
plot(x, y, ylim = range(y)) ### Bottom-Row Plot
abline(reg = lm.current, col = 'red') # plot current regression (red)
for( i in seq_along(lm.history) ){
abline(reg=lm.history[[i]], col='blue') # plot any previous regressions (blue)
}
return(c(lm.history, list(lm.current))) # append current regression to accumulator
}
To initialize it and then run it repeatedly, just do something like:
tpn.history <- TPN.accum()
for (i in 1:5) tpn.history <- TPN.accum(lm.history=tpn.history)
And your output will look like:
I propose two possibilities:
Use par(mfg) to define on which panel to draw, so that you can add new points or lines on any of the two. For the color, I propose to add options saying if this is the first plot or the last plot of the series.
Store the coefficients of the abline to be used on other plots.
Use par(mfg)
I used some transparent color so that we do not see all superimposition of each iteration. Depending on what you want to achieve, you can modify this.
############## Input Values #################
TPN <- function(each.sub.pop.n = 150,
sub.pop.means = 20:10,
predict.range = 10:0,
sub.pop.sd = .75,
n.sample = 2,
plot = TRUE,
first = FALSE,
last = FALSE) {
#############################################
if (plot & first) {
plot.new()
m <- matrix( c(1, 2), nrow = 2, ncol = 1 ); layout(m)
par( mar = c(2, 4.1, 2.1, 2.1) )
}
set.seed(2460986)
Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')
y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
set.seed(NULL)
x <- rep(predict.range, each = each.sub.pop.n)
if (plot) {
par(mfg = c(1,1)) ## Top-Row Plot
if (first) {
plot(x, y, ylim = range(y), col = "transparent")
} else if (last) {
plot(x, y, ylim = range(y))
}
}
sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
sample <- data.frame(y = unlist(sample),
x = as.numeric(rep(names(sample), each = n.sample)))
x = sample$x ; y = sample$y
if (plot) {
par(mfg = c(2,1)) #### BOTTOM-ROW PLOT
if (first) {
plot(x, y, ylim = range(y), col = "transparent")
}
if (last) {
points(x, y)
}
abline(lm(y ~ x), col = c('blue', 'red')[(last) + 1]) # Regression Line
}
}
## TEST HERE:
n <- 10
for (i in 1:n) {
TPN(first = ifelse(i == 1, TRUE, FALSE), last = ifelse(i == n, TRUE, FALSE))
}
Store the abline coefficients
There is no need of transparent color here because, a new plot is created for each iteration.
############## Input Values #################
TPN <- function(each.sub.pop.n = 150,
sub.pop.means = 20:10,
predict.range = 10:0,
sub.pop.sd = .75,
n.sample = 2,
plot = TRUE,
coefs = FALSE,
coefsup = NULL) {
#############################################
if (plot) {
m <- matrix( c(1, 2), nrow = 2, ncol = 1 )
layout(m)
par( mar = c(2, 4.1, 2.1, 2.1) )
}
set.seed(2460986)
Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')
y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
set.seed(NULL)
x <- rep(predict.range, each = each.sub.pop.n)
if (plot) {
plot(x, y, ylim = range(y))
}
sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
sample <- data.frame(y = unlist(sample),
x = as.numeric(rep(names(sample), each = n.sample)))
x = sample$x ; y = sample$y
if (plot) {
plot(x, y, ylim = range(y))
# Add the previous lines if exists
if (!is.null(coefsup)) {
apply(coefsup, 1, function(x) abline(a = x[1], b = x[2], col = "blue"))
}
abline(lm(y ~ x), col = 'red') # Regression Line
}
if (coefs) {return(coef(lm(y ~ x)))}
}
# TEST with coefs
n <- 10
coefsup <- NULL
for (i in 1:n) {
coefsup <- rbind(coefsup, TPN(coefs = TRUE, coefsup = coefsup))
}
In both cases, the output is what you expect:

How can I plot data with confidence intervals?

If I have 10 values, each of which has a fitted value F, and an upper and lower confidence interval U and L:
set.seed(0815)
F <- runif(10, 1, 2)
L <- runif(10, 0, 1)
U <- runif(10, 2, 3)
How can I show these 10 fitted values and their confidence intervals in the same plot like the one below in R?
Here is a plotrix solution:
set.seed(0815)
x <- 1:10
F <- runif(10,1,2)
L <- runif(10,0,1)
U <- runif(10,2,3)
require(plotrix)
plotCI(x, F, ui=U, li=L)
And here is a ggplot solution:
set.seed(0815)
df <- data.frame(x =1:10,
F =runif(10,1,2),
L =runif(10,0,1),
U =runif(10,2,3))
require(ggplot2)
ggplot(df, aes(x = x, y = F)) +
geom_point(size = 4) +
geom_errorbar(aes(ymax = U, ymin = L))
UPDATE:
Here is a base solution to your edits:
set.seed(1234)
x <- rnorm(20)
df <- data.frame(x = x,
y = x + rnorm(20))
plot(y ~ x, data = df)
# model
mod <- lm(y ~ x, data = df)
# predicts + interval
newx <- seq(min(df$x), max(df$x), length.out=100)
preds <- predict(mod, newdata = data.frame(x=newx),
interval = 'confidence')
# plot
plot(y ~ x, data = df, type = 'n')
# add fill
polygon(c(rev(newx), newx), c(rev(preds[ ,3]), preds[ ,2]), col = 'grey80', border = NA)
# model
abline(mod)
# intervals
lines(newx, preds[ ,3], lty = 'dashed', col = 'red')
lines(newx, preds[ ,2], lty = 'dashed', col = 'red')
Here is a solution using functions plot(), polygon() and lines().
set.seed(1234)
df <- data.frame(x =1:10,
F =runif(10,1,2),
L =runif(10,0,1),
U =runif(10,2,3))
plot(df$x, df$F, ylim = c(0,4), type = "l")
#make polygon where coordinates start with lower limit and
# then upper limit in reverse order
polygon(c(df$x,rev(df$x)),c(df$L,rev(df$U)),col = "grey75", border = FALSE)
lines(df$x, df$F, lwd = 2)
#add red lines on borders of polygon
lines(df$x, df$U, col="red",lty=2)
lines(df$x, df$L, col="red",lty=2)
Now use example data provided by OP in another question:
Lower <- c(0.418116841, 0.391011834, 0.393297710,
0.366144073,0.569956636,0.224775521,0.599166016,0.512269587,
0.531378573, 0.311448219, 0.392045751,0.153614913, 0.366684097,
0.161100849,0.700274810,0.629714150, 0.661641288, 0.533404093,
0.412427559, 0.432905333, 0.525306427,0.224292061,
0.28893064,0.099543648, 0.342995605,0.086973739,0.289030388,
0.081230826,0.164505624, -0.031290586,0.148383474,0.070517523,0.009686605,
-0.052703529,0.475924192,0.253382210, 0.354011010,0.130295355,0.102253218,
0.446598823,0.548330752,0.393985810,0.481691632,0.111811248,0.339626541,
0.267831909,0.133460254,0.347996621,0.412472322,0.133671128,0.178969601,0.484070587,
0.335833224,0.037258467, 0.141312363,0.361392799,0.129791998,
0.283759439,0.333893418,0.569533076,0.385258093,0.356201955,0.481816148,
0.531282473,0.273126565,0.267815691,0.138127486,0.008865700,0.018118398,0.080143484,
0.117861634,0.073697418,0.230002398,0.105855042,0.262367348,0.217799352,0.289108011,
0.161271889,0.219663224,0.306117717,0.538088622,0.320711912,0.264395149,0.396061543,
0.397350946,0.151726970,0.048650180,0.131914718,0.076629840,0.425849394,
0.068692279,0.155144797,0.137939059,0.301912657,-0.071415593,-0.030141781,0.119450922,
0.312927614,0.231345972)
Upper.limit <- c(0.6446223,0.6177311, 0.6034427, 0.5726503,
0.7644718, 0.4585430, 0.8205418, 0.7154043,0.7370033,
0.5285199, 0.5973728, 0.3764209, 0.5818298,
0.3960867,0.8972357, 0.8370151, 0.8359921, 0.7449118,
0.6152879, 0.6200704, 0.7041068, 0.4541011, 0.5222653,
0.3472364, 0.5956551, 0.3068065, 0.5112895, 0.3081448,
0.3745473, 0.1931089, 0.3890704, 0.3031025, 0.2472591,
0.1976092, 0.6906118, 0.4736644, 0.5770463, 0.3528607,
0.3307651, 0.6681629, 0.7476231, 0.5959025, 0.7128883,
0.3451623, 0.5609742, 0.4739216, 0.3694883, 0.5609220,
0.6343219, 0.3647751, 0.4247147, 0.6996334, 0.5562876,
0.2586490, 0.3750040, 0.5922248, 0.3626322, 0.5243285,
0.5548211, 0.7409648, 0.5820070, 0.5530232, 0.6863703,
0.7206998, 0.4952387, 0.4993264, 0.3527727, 0.2203694,
0.2583149, 0.3035342, 0.3462009, 0.3003602, 0.4506054,
0.3359478, 0.4834151, 0.4391330, 0.5273411, 0.3947622,
0.4133769, 0.5288060, 0.7492071, 0.5381701, 0.4825456,
0.6121942, 0.6192227, 0.3784870, 0.2574025, 0.3704140,
0.2945623, 0.6532694, 0.2697202, 0.3652230, 0.3696383,
0.5268808, 0.1545602, 0.2221450, 0.3553377, 0.5204076,
0.3550094)
Fitted.values<- c(0.53136955, 0.50437146, 0.49837019,
0.46939721, 0.66721423, 0.34165926, 0.70985388, 0.61383696,
0.63419092, 0.41998407, 0.49470927, 0.26501789, 0.47425695,
0.27859380, 0.79875525, 0.73336461, 0.74881668, 0.63915795,
0.51385774, 0.52648789, 0.61470661, 0.33919656, 0.40559797,
0.22339000, 0.46932536, 0.19689011, 0.40015996, 0.19468781,
0.26952645, 0.08090917, 0.26872696, 0.18680999, 0.12847285,
0.07245286, 0.58326799, 0.36352329, 0.46552867, 0.24157804,
0.21650915, 0.55738088, 0.64797691, 0.49494416, 0.59728999,
0.22848680, 0.45030036, 0.37087676, 0.25147426, 0.45445930,
0.52339711, 0.24922310, 0.30184215, 0.59185198, 0.44606040,
0.14795374, 0.25815819, 0.47680880, 0.24621212, 0.40404398,
0.44435727, 0.65524894, 0.48363255, 0.45461258, 0.58409323,
0.62599114, 0.38418264, 0.38357103, 0.24545011, 0.11461756,
0.13821664, 0.19183886, 0.23203127, 0.18702881, 0.34030391,
0.22090140, 0.37289121, 0.32846615, 0.40822456, 0.27801706,
0.31652008, 0.41746184, 0.64364785, 0.42944100, 0.37347037,
0.50412786, 0.50828681, 0.26510696, 0.15302635, 0.25116438,
0.18559609, 0.53955941, 0.16920626, 0.26018389, 0.25378867,
0.41439675, 0.04157232, 0.09600163, 0.23739430, 0.41666762,
0.29317767)
Assemble into a data frame (no x provided, so using indices)
df2 <- data.frame(x=seq(length(Fitted.values)),
fit=Fitted.values,lwr=Lower,upr=Upper.limit)
plot(fit~x,data=df2,ylim=range(c(df2$lwr,df2$upr)))
#make polygon where coordinates start with lower limit and then upper limit in reverse order
with(df2,polygon(c(x,rev(x)),c(lwr,rev(upr)),col = "grey75", border = FALSE))
matlines(df2[,1],df2[,-1],
lwd=c(2,1,1),
lty=1,
col=c("black","red","red"))
Here is part of my program related to plotting confidence interval.
1. Generate the test data
ads = 1
require(stats); require(graphics)
library(splines)
x_raw <- seq(1,10,0.1)
y <- cos(x_raw)+rnorm(len_data,0,0.1)
y[30] <- 1.4 # outlier point
len_data = length(x_raw)
N <- len_data
summary(fm1 <- lm(y~bs(x_raw, df=5), model = TRUE, x =T, y = T))
ht <-seq(1,10,length.out = len_data)
plot(x = x_raw, y = y,type = 'p')
y_e <- predict(fm1, data.frame(height = ht))
lines(x= ht, y = y_e)
Result
2. Fitting the raw data using B-spline smoother method
sigma_e <- sqrt(sum((y-y_e)^2)/N)
print(sigma_e)
H<-fm1$x
A <-solve(t(H) %*% H)
y_e_minus <- rep(0,N)
y_e_plus <- rep(0,N)
y_e_minus[N]
for (i in 1:N)
{
tmp <-t(matrix(H[i,])) %*% A %*% matrix(H[i,])
tmp <- 1.96*sqrt(tmp)
y_e_minus[i] <- y_e[i] - tmp
y_e_plus[i] <- y_e[i] + tmp
}
plot(x = x_raw, y = y,type = 'p')
polygon(c(ht,rev(ht)),c(y_e_minus,rev(y_e_plus)),col = rgb(1, 0, 0,0.5), border = NA)
#plot(x = x_raw, y = y,type = 'p')
lines(x= ht, y = y_e_plus, lty = 'dashed', col = 'red')
lines(x= ht, y = y_e)
lines(x= ht, y = y_e_minus, lty = 'dashed', col = 'red')
Result
Some addition to the previous answers. It is nice to regulate the density of the polygon to avoid obscuring the data points.
library(MASS)
attach(Boston)
lm.fit2 = lm(medv~poly(lstat,2))
plot(lstat,medv)
new.lstat = seq(min(lstat), max(lstat), length.out=100)
preds <- predict(lm.fit2, newdata = data.frame(lstat=new.lstat), interval = 'prediction')
lines(sort(lstat), fitted(lm.fit2)[order(lstat)], col='red', lwd=3)
polygon(c(rev(new.lstat), new.lstat), c(rev(preds[ ,3]), preds[ ,2]), density=10, col = 'blue', border = NA)
lines(new.lstat, preds[ ,3], lty = 'dashed', col = 'red')
lines(new.lstat, preds[ ,2], lty = 'dashed', col = 'red')
Please note that you see the prediction interval on the picture, which is several times wider than the confidence interval. You can read here the detailed explanation of those two types of interval estimates.

Resources