Warning message in predict.gam - why is this happening? - r

Reference: Page 22 of, http://www.statoek.wiso.uni-goettingen.de/mitarbeiter/ogi/pub/r_workshop.pdf
My question is self-contained in this post though. The data is in http://134.76.173.220/R_workshop
car <- read.table("car.dat", header = TRUE)
attach(car)
library(mgcv)
fit <- gam(MPG~s(SP))
plot(HP, MPG)
x <- seq(0, 350, length = 500)
y <- predict(fit, data.frame(HP = x))
lines(x, y, col = "red", lwd = 2)
Error im receiving:
Warning message:
In predict.gam(fit, data.frame(HP = x)) :
not all required variables have been supplied in newdata!

It seems to work if you scrap the 'attach' adjust the code accordingly and use SP as suggested by #GeorgeDontas
fit <- gam(MPG~s(SP), data=car)
plot(car$HP, car$MPG)
x <- seq(0, 350, length = 500)
y <- predict(fit, data.frame(SP = x))
lines(x, y, col = "red", lwd = 2)

I think it is supposed to be MPG~HP as that is a good fit and SP is poor (with default parameters)
par(mfrow=c(1,2))
fit <- gam(MPG~s(SP), data=car)
plot(car$SP, car$MPG)
x <- seq(0, 350, length = 500)
y <- predict(fit, data.frame(SP = x))
lines(x, y, col = "red", lwd = 2)
fit <- gam(MPG~s(HP), data=car)
plot(car$HP, car$MPG)
x <- seq(0, 350, length = 500)
y <- predict(fit, data.frame(HP = x))
lines(x, y, col = "red", lwd = 2)

Related

Call objects based on one class with the different attributes

Is it possible to call objects with different attributes based on one class?
For example here i want to have zlab attribute for one object and not for another one.
I tried to use do.call with structure, but it doesn't work, as I thought. Or maybe it should be constructed in some other way?
chart <- function(domain, n, f, col, zlab, ...) {
x <- y <- seq(min(domain), max(domain), length.out = n)
fun <- function(x, y) {}
body(fun) <- substitute(f)
do.call(
lapply, structure(list(x = x, y = y, f = fun,
col=col, zlab = zlab, ...), ...),
class = "myChart")
}
# plot method
plot.myChart <- function(x, ...) {
z = outer(x$x, x$y, x$f)
persp(x$x, x$y, z, col = x$col, zlab = x$zlab, ...)
}
# object call
chr1 <- chart(c(-6, 6),
n = 30,
sqrt(x^2 + y^2),
col = 'blue',
zlab = "Height")
chr2 <- chart(c(-5, 5),
n = 30,
x^2 + y^2,
col = 'green')
plot(chr1)
plot(chr2)
You can simply add defaults for each argument, so that if any are skipped, they have a sensible fallback. This is a bit harder with f, but can be done inside the function using a combination of missing, match.call and quote:
chart <- function(domain = c(-1, 1), n = 10, f, col = 'gray90', zlab = '', ...)
{
x <- y <- seq(min(domain), max(domain), length.out = n)
f <- if(missing(f)) quote(x + y) else match.call()$f
structure(list(x = x, y = y, f = as.function(c(alist(x=, y=), f)),
col = col, zlab = zlab, ...),
class = "myChart")
}
# plot method
plot.myChart <- function(x, ...) {
z = outer(x$x, x$y, x$f)
persp(x$x, x$y, z, col = x$col, zlab = x$zlab, ...)
}
# object call
chr1 <- chart(c(-6, 6),
n = 30,
sqrt(x^2 + y^2),
col = 'blue',
zlab = "Height")
chr2 <- chart(c(-5, 5),
n = 30,
x^2 + y^2,
col = 'green')
plot(chr1)
plot(chr2)
Note we even get a result with no arguments passed to chart at all.
chr3 <- chart()
plot(chr3)
Created on 2022-05-30 by the reprex package (v2.0.1)
Maybe this helps
chart <- function(domain, n, f, col, zlab, ...) {
x <- y <- seq(min(domain), max(domain), length.out = n)
fun <- function(x, y) {}
body(fun) <- substitute(f)
structure(c(list(x = x, y = y, f = fun, col=col, zlab = zlab),
c(...)), class = "myChart")
}
-testing
chr1 <- chart(c(-6, 6),
n = 30,
sqrt(x^2 + y^2),
col = 'blue',
zlab = "Height")
chr2 <- chart(c(-5, 5),
n = 30,
x^2 + y^2,
zlab = NULL,
col = 'green')
plot(chr1)
plot(chr2)

Plot vectors of gradient descent in R

I've code gradient descent algorithm in R and now I'm trying to "draw" the path of the vectors.
I've got draw points in my contour plot, but it's not correct because nobody knows what happened first.
In my algorith always I have an previous state P=(Xi,Yi) and a later state L=(Xi+1,Yi+1), so, How can I draw the vector PL in a contour or a persp plot?
I only got this with contour, where the red point is the convergence:
The same for persp:
Thanks all!
EDIT:
Graphics can be obtanined respectively:
f<-function(u,v){
u*u*exp(2*v)+4*v*v*exp(-2*u)-4*u*v*exp(v-u)
}
x = seq(-2, 2, by = 0.5)
y = seq(-2, 2, by = 0.5)
z <- outer(x,y,f)
#Contour plot
contour(x,y,z)
#Persp plot
persp(x, y, z, phi = 25, theta = 55, xlim=c(-2,2), ylim=c(-2,2),
xlab = "U", ylab = "V",
main = "F(u,v)", col="yellow", ticktype = "detailed"
) -> res
Taking Himmelblau's function as a test example:
f <- function(x, y) { (x^2+y-11)^2 + (x+y^2-7)^2 }
Its partial derivatives:
dx <- function(x,y) {4*x**3-4*x*y-42*x+4*x*y-14}
dy <- function(x,y) {4*y**3+2*x**2-26*y+4*x*y-22}
Running the gradient descent:
# gradient descent parameters
num_iter <- 100
learning_rate <- 0.001
x_val <- 6
y_val <- 6
updates_x <- vector("numeric", length = num_iter)
updates_y <- vector("numeric", length = num_iter)
updates_z <- vector("numeric", length = num_iter)
# parameter updates
for (i in 1:num_iter) {
dx_val = dx(x_val,y_val)
dy_val = dy(x_val,y_val)
x_val <- x_val-learning_rate*dx_val
y_val <- y_val-learning_rate*dx_val
z_val <- f(x_val, y_val)
updates_x[i] <- x_val
updates_y[i] <- y_val
updates_z[i] <- z_val
}
Plotting:
x <- seq(-6, 6, length = 100)
y <- x
z <- outer(x, y, f)
plt <- persp(x, y, z,
theta = -50-log(i), phi = 20+log(i),
expand = 0.5,
col = "lightblue", border = 'lightblue',
axes = FALSE, box = FALSE,
ltheta = 60, shade = 0.90
)
points(trans3d(updates_x[1:i], updates_y[1:i], updates_z[1:i],pmat = plt),
col = c(rep('white', num_iter-1), 'blue'),
pch = 16,
cex = c(rep(0.5, num_iter-1), 1))
There's a trick to plotting points using persp, as mentioned in ?persp. By employing the power of trans3d, you can successfully put points and lines on a perspective plot.
f<-function(u,v){
u*u*exp(2*v)+4*v*v*exp(-2*u)-4*u*v*exp(v-u)
}
x = seq(-2, 2, by = 0.5)
y = seq(-2, 2, by = 0.5)
z <- scale(outer(x,y,f))
view <- persp(x, y, z, phi = 30, theta = 30, xlim=c(-2,2), ylim=c(-2,2),
xlab = "X", ylab = "Y", zlab = "Z", scale = FALSE,
main = "F(u,v)", col="yellow", ticktype = "detailed")
set.seed(2)
pts <- data.frame(x = sample(x, 3),
y = sample(y, 3),
z = sample(z, 3))
points(trans3d(x = pts$x, y = pts$y, z = pts$z, pmat = view), pch = 16)
lines(trans3d(x = pts$x, y = pts$y, z = pts$z, pmat = view))

R function doesn't do what I want

I don't inderstand why my function doesn't work, can you help me to find my error.
VehiculeFunction <- function(data){
my.data <- data[data$GAMME =="M1",]
ma.col = rgb(red = 0.1,blue = 1,green = 0.1, alpha = 0.2)
X <- my.data$GMF.24
Y <- my.data$Cout.24
X11()
plot(X, Y, pch=20, las = 1, col = ma.col, xlim = c(0, 10), ylim = c(0,10))
identify(X, Y, labels = my.data$NITG, cex = 0.7)
}
This one works perfectly, and when I add two variables it returns "numeric(0)"
VehiculeFunction <- function(data, x, y){
my.data <- data[data$GAMME =="M1",]
ma.col = rgb(red = 0.1,blue = 1,green = 0.1, alpha = 0.2)
X <- my.data$x
Y <- my.data$y
X11()
plot(X, Y, pch=20, las = 1, col = ma.col, xlim = c(0, 10), ylim = c(0,10))
identify(X, Y, labels = my.data$NITG, cex = 0.7)
}
VehiculeFunction(data.vehicule, GMF.24, Cout.24)
numeric(0)
thank you Oleg, and if i want to add a condition on another variable of my dataframe, i want to take all the three first of "RANG_NITG_PROJET_K"
my.data1 <- my.data[my.data$RANG_NITG_PROJET_K == 1|2|3,] ?
but | think it's false because when i do this
my.data1 <- data.vehicule[data.vehicule$RANG_NITG_PROJET_K == 1,]
my.data2 <- data.vehicule[data.vehicule$RANG_NITG_PROJET_K == 2,]
my.data3 <- data.vehicule[data.vehicule$RANG_NITG_PROJET_K == 3,]
my.data <- rbind(my.data1, my.data2, my.data3)
it gives me two different dataframe ?
Try the following. You cannot access my.data columns (or list elements) using the $ operator in this case and you need to pass strings for x and y:
VehiculeFunction <- function(data, x, y, gamme){
my.data <- data[data$GAMME == gamme,]
ma.col = rgb(red = 0.1,blue = 1,green = 0.1, alpha = 0.2)
X <- my.data[[x]] # <- change from $ to [[]]
Y <- my.data[[y]] # <- change from $ to [[]]
X11()
plot(X, Y, pch=20, las = 1, col = ma.col, xlim = c(0, 10), ylim = c(0,10))
identify(X, Y, labels = my.data$NITG, cex = 0.7)
}
VehiculeFunction(data.vehicule, "GMF.24", "Cout.24", "M1") # <- change to strings

how to find 95% confidence bands for predicting mean y per value of x and 95% prediction bands for predicting individual y values [duplicate]

If I have 10 values, each of which has a fitted value F, and an upper and lower confidence interval U and L:
set.seed(0815)
F <- runif(10, 1, 2)
L <- runif(10, 0, 1)
U <- runif(10, 2, 3)
How can I show these 10 fitted values and their confidence intervals in the same plot like the one below in R?
Here is a plotrix solution:
set.seed(0815)
x <- 1:10
F <- runif(10,1,2)
L <- runif(10,0,1)
U <- runif(10,2,3)
require(plotrix)
plotCI(x, F, ui=U, li=L)
And here is a ggplot solution:
set.seed(0815)
df <- data.frame(x =1:10,
F =runif(10,1,2),
L =runif(10,0,1),
U =runif(10,2,3))
require(ggplot2)
ggplot(df, aes(x = x, y = F)) +
geom_point(size = 4) +
geom_errorbar(aes(ymax = U, ymin = L))
UPDATE:
Here is a base solution to your edits:
set.seed(1234)
x <- rnorm(20)
df <- data.frame(x = x,
y = x + rnorm(20))
plot(y ~ x, data = df)
# model
mod <- lm(y ~ x, data = df)
# predicts + interval
newx <- seq(min(df$x), max(df$x), length.out=100)
preds <- predict(mod, newdata = data.frame(x=newx),
interval = 'confidence')
# plot
plot(y ~ x, data = df, type = 'n')
# add fill
polygon(c(rev(newx), newx), c(rev(preds[ ,3]), preds[ ,2]), col = 'grey80', border = NA)
# model
abline(mod)
# intervals
lines(newx, preds[ ,3], lty = 'dashed', col = 'red')
lines(newx, preds[ ,2], lty = 'dashed', col = 'red')
Here is a solution using functions plot(), polygon() and lines().
set.seed(1234)
df <- data.frame(x =1:10,
F =runif(10,1,2),
L =runif(10,0,1),
U =runif(10,2,3))
plot(df$x, df$F, ylim = c(0,4), type = "l")
#make polygon where coordinates start with lower limit and
# then upper limit in reverse order
polygon(c(df$x,rev(df$x)),c(df$L,rev(df$U)),col = "grey75", border = FALSE)
lines(df$x, df$F, lwd = 2)
#add red lines on borders of polygon
lines(df$x, df$U, col="red",lty=2)
lines(df$x, df$L, col="red",lty=2)
Now use example data provided by OP in another question:
Lower <- c(0.418116841, 0.391011834, 0.393297710,
0.366144073,0.569956636,0.224775521,0.599166016,0.512269587,
0.531378573, 0.311448219, 0.392045751,0.153614913, 0.366684097,
0.161100849,0.700274810,0.629714150, 0.661641288, 0.533404093,
0.412427559, 0.432905333, 0.525306427,0.224292061,
0.28893064,0.099543648, 0.342995605,0.086973739,0.289030388,
0.081230826,0.164505624, -0.031290586,0.148383474,0.070517523,0.009686605,
-0.052703529,0.475924192,0.253382210, 0.354011010,0.130295355,0.102253218,
0.446598823,0.548330752,0.393985810,0.481691632,0.111811248,0.339626541,
0.267831909,0.133460254,0.347996621,0.412472322,0.133671128,0.178969601,0.484070587,
0.335833224,0.037258467, 0.141312363,0.361392799,0.129791998,
0.283759439,0.333893418,0.569533076,0.385258093,0.356201955,0.481816148,
0.531282473,0.273126565,0.267815691,0.138127486,0.008865700,0.018118398,0.080143484,
0.117861634,0.073697418,0.230002398,0.105855042,0.262367348,0.217799352,0.289108011,
0.161271889,0.219663224,0.306117717,0.538088622,0.320711912,0.264395149,0.396061543,
0.397350946,0.151726970,0.048650180,0.131914718,0.076629840,0.425849394,
0.068692279,0.155144797,0.137939059,0.301912657,-0.071415593,-0.030141781,0.119450922,
0.312927614,0.231345972)
Upper.limit <- c(0.6446223,0.6177311, 0.6034427, 0.5726503,
0.7644718, 0.4585430, 0.8205418, 0.7154043,0.7370033,
0.5285199, 0.5973728, 0.3764209, 0.5818298,
0.3960867,0.8972357, 0.8370151, 0.8359921, 0.7449118,
0.6152879, 0.6200704, 0.7041068, 0.4541011, 0.5222653,
0.3472364, 0.5956551, 0.3068065, 0.5112895, 0.3081448,
0.3745473, 0.1931089, 0.3890704, 0.3031025, 0.2472591,
0.1976092, 0.6906118, 0.4736644, 0.5770463, 0.3528607,
0.3307651, 0.6681629, 0.7476231, 0.5959025, 0.7128883,
0.3451623, 0.5609742, 0.4739216, 0.3694883, 0.5609220,
0.6343219, 0.3647751, 0.4247147, 0.6996334, 0.5562876,
0.2586490, 0.3750040, 0.5922248, 0.3626322, 0.5243285,
0.5548211, 0.7409648, 0.5820070, 0.5530232, 0.6863703,
0.7206998, 0.4952387, 0.4993264, 0.3527727, 0.2203694,
0.2583149, 0.3035342, 0.3462009, 0.3003602, 0.4506054,
0.3359478, 0.4834151, 0.4391330, 0.5273411, 0.3947622,
0.4133769, 0.5288060, 0.7492071, 0.5381701, 0.4825456,
0.6121942, 0.6192227, 0.3784870, 0.2574025, 0.3704140,
0.2945623, 0.6532694, 0.2697202, 0.3652230, 0.3696383,
0.5268808, 0.1545602, 0.2221450, 0.3553377, 0.5204076,
0.3550094)
Fitted.values<- c(0.53136955, 0.50437146, 0.49837019,
0.46939721, 0.66721423, 0.34165926, 0.70985388, 0.61383696,
0.63419092, 0.41998407, 0.49470927, 0.26501789, 0.47425695,
0.27859380, 0.79875525, 0.73336461, 0.74881668, 0.63915795,
0.51385774, 0.52648789, 0.61470661, 0.33919656, 0.40559797,
0.22339000, 0.46932536, 0.19689011, 0.40015996, 0.19468781,
0.26952645, 0.08090917, 0.26872696, 0.18680999, 0.12847285,
0.07245286, 0.58326799, 0.36352329, 0.46552867, 0.24157804,
0.21650915, 0.55738088, 0.64797691, 0.49494416, 0.59728999,
0.22848680, 0.45030036, 0.37087676, 0.25147426, 0.45445930,
0.52339711, 0.24922310, 0.30184215, 0.59185198, 0.44606040,
0.14795374, 0.25815819, 0.47680880, 0.24621212, 0.40404398,
0.44435727, 0.65524894, 0.48363255, 0.45461258, 0.58409323,
0.62599114, 0.38418264, 0.38357103, 0.24545011, 0.11461756,
0.13821664, 0.19183886, 0.23203127, 0.18702881, 0.34030391,
0.22090140, 0.37289121, 0.32846615, 0.40822456, 0.27801706,
0.31652008, 0.41746184, 0.64364785, 0.42944100, 0.37347037,
0.50412786, 0.50828681, 0.26510696, 0.15302635, 0.25116438,
0.18559609, 0.53955941, 0.16920626, 0.26018389, 0.25378867,
0.41439675, 0.04157232, 0.09600163, 0.23739430, 0.41666762,
0.29317767)
Assemble into a data frame (no x provided, so using indices)
df2 <- data.frame(x=seq(length(Fitted.values)),
fit=Fitted.values,lwr=Lower,upr=Upper.limit)
plot(fit~x,data=df2,ylim=range(c(df2$lwr,df2$upr)))
#make polygon where coordinates start with lower limit and then upper limit in reverse order
with(df2,polygon(c(x,rev(x)),c(lwr,rev(upr)),col = "grey75", border = FALSE))
matlines(df2[,1],df2[,-1],
lwd=c(2,1,1),
lty=1,
col=c("black","red","red"))
Here is part of my program related to plotting confidence interval.
1. Generate the test data
ads = 1
require(stats); require(graphics)
library(splines)
x_raw <- seq(1,10,0.1)
y <- cos(x_raw)+rnorm(len_data,0,0.1)
y[30] <- 1.4 # outlier point
len_data = length(x_raw)
N <- len_data
summary(fm1 <- lm(y~bs(x_raw, df=5), model = TRUE, x =T, y = T))
ht <-seq(1,10,length.out = len_data)
plot(x = x_raw, y = y,type = 'p')
y_e <- predict(fm1, data.frame(height = ht))
lines(x= ht, y = y_e)
Result
2. Fitting the raw data using B-spline smoother method
sigma_e <- sqrt(sum((y-y_e)^2)/N)
print(sigma_e)
H<-fm1$x
A <-solve(t(H) %*% H)
y_e_minus <- rep(0,N)
y_e_plus <- rep(0,N)
y_e_minus[N]
for (i in 1:N)
{
tmp <-t(matrix(H[i,])) %*% A %*% matrix(H[i,])
tmp <- 1.96*sqrt(tmp)
y_e_minus[i] <- y_e[i] - tmp
y_e_plus[i] <- y_e[i] + tmp
}
plot(x = x_raw, y = y,type = 'p')
polygon(c(ht,rev(ht)),c(y_e_minus,rev(y_e_plus)),col = rgb(1, 0, 0,0.5), border = NA)
#plot(x = x_raw, y = y,type = 'p')
lines(x= ht, y = y_e_plus, lty = 'dashed', col = 'red')
lines(x= ht, y = y_e)
lines(x= ht, y = y_e_minus, lty = 'dashed', col = 'red')
Result
Some addition to the previous answers. It is nice to regulate the density of the polygon to avoid obscuring the data points.
library(MASS)
attach(Boston)
lm.fit2 = lm(medv~poly(lstat,2))
plot(lstat,medv)
new.lstat = seq(min(lstat), max(lstat), length.out=100)
preds <- predict(lm.fit2, newdata = data.frame(lstat=new.lstat), interval = 'prediction')
lines(sort(lstat), fitted(lm.fit2)[order(lstat)], col='red', lwd=3)
polygon(c(rev(new.lstat), new.lstat), c(rev(preds[ ,3]), preds[ ,2]), density=10, col = 'blue', border = NA)
lines(new.lstat, preds[ ,3], lty = 'dashed', col = 'red')
lines(new.lstat, preds[ ,2], lty = 'dashed', col = 'red')
Please note that you see the prediction interval on the picture, which is several times wider than the confidence interval. You can read here the detailed explanation of those two types of interval estimates.

How can I plot data with confidence intervals?

If I have 10 values, each of which has a fitted value F, and an upper and lower confidence interval U and L:
set.seed(0815)
F <- runif(10, 1, 2)
L <- runif(10, 0, 1)
U <- runif(10, 2, 3)
How can I show these 10 fitted values and their confidence intervals in the same plot like the one below in R?
Here is a plotrix solution:
set.seed(0815)
x <- 1:10
F <- runif(10,1,2)
L <- runif(10,0,1)
U <- runif(10,2,3)
require(plotrix)
plotCI(x, F, ui=U, li=L)
And here is a ggplot solution:
set.seed(0815)
df <- data.frame(x =1:10,
F =runif(10,1,2),
L =runif(10,0,1),
U =runif(10,2,3))
require(ggplot2)
ggplot(df, aes(x = x, y = F)) +
geom_point(size = 4) +
geom_errorbar(aes(ymax = U, ymin = L))
UPDATE:
Here is a base solution to your edits:
set.seed(1234)
x <- rnorm(20)
df <- data.frame(x = x,
y = x + rnorm(20))
plot(y ~ x, data = df)
# model
mod <- lm(y ~ x, data = df)
# predicts + interval
newx <- seq(min(df$x), max(df$x), length.out=100)
preds <- predict(mod, newdata = data.frame(x=newx),
interval = 'confidence')
# plot
plot(y ~ x, data = df, type = 'n')
# add fill
polygon(c(rev(newx), newx), c(rev(preds[ ,3]), preds[ ,2]), col = 'grey80', border = NA)
# model
abline(mod)
# intervals
lines(newx, preds[ ,3], lty = 'dashed', col = 'red')
lines(newx, preds[ ,2], lty = 'dashed', col = 'red')
Here is a solution using functions plot(), polygon() and lines().
set.seed(1234)
df <- data.frame(x =1:10,
F =runif(10,1,2),
L =runif(10,0,1),
U =runif(10,2,3))
plot(df$x, df$F, ylim = c(0,4), type = "l")
#make polygon where coordinates start with lower limit and
# then upper limit in reverse order
polygon(c(df$x,rev(df$x)),c(df$L,rev(df$U)),col = "grey75", border = FALSE)
lines(df$x, df$F, lwd = 2)
#add red lines on borders of polygon
lines(df$x, df$U, col="red",lty=2)
lines(df$x, df$L, col="red",lty=2)
Now use example data provided by OP in another question:
Lower <- c(0.418116841, 0.391011834, 0.393297710,
0.366144073,0.569956636,0.224775521,0.599166016,0.512269587,
0.531378573, 0.311448219, 0.392045751,0.153614913, 0.366684097,
0.161100849,0.700274810,0.629714150, 0.661641288, 0.533404093,
0.412427559, 0.432905333, 0.525306427,0.224292061,
0.28893064,0.099543648, 0.342995605,0.086973739,0.289030388,
0.081230826,0.164505624, -0.031290586,0.148383474,0.070517523,0.009686605,
-0.052703529,0.475924192,0.253382210, 0.354011010,0.130295355,0.102253218,
0.446598823,0.548330752,0.393985810,0.481691632,0.111811248,0.339626541,
0.267831909,0.133460254,0.347996621,0.412472322,0.133671128,0.178969601,0.484070587,
0.335833224,0.037258467, 0.141312363,0.361392799,0.129791998,
0.283759439,0.333893418,0.569533076,0.385258093,0.356201955,0.481816148,
0.531282473,0.273126565,0.267815691,0.138127486,0.008865700,0.018118398,0.080143484,
0.117861634,0.073697418,0.230002398,0.105855042,0.262367348,0.217799352,0.289108011,
0.161271889,0.219663224,0.306117717,0.538088622,0.320711912,0.264395149,0.396061543,
0.397350946,0.151726970,0.048650180,0.131914718,0.076629840,0.425849394,
0.068692279,0.155144797,0.137939059,0.301912657,-0.071415593,-0.030141781,0.119450922,
0.312927614,0.231345972)
Upper.limit <- c(0.6446223,0.6177311, 0.6034427, 0.5726503,
0.7644718, 0.4585430, 0.8205418, 0.7154043,0.7370033,
0.5285199, 0.5973728, 0.3764209, 0.5818298,
0.3960867,0.8972357, 0.8370151, 0.8359921, 0.7449118,
0.6152879, 0.6200704, 0.7041068, 0.4541011, 0.5222653,
0.3472364, 0.5956551, 0.3068065, 0.5112895, 0.3081448,
0.3745473, 0.1931089, 0.3890704, 0.3031025, 0.2472591,
0.1976092, 0.6906118, 0.4736644, 0.5770463, 0.3528607,
0.3307651, 0.6681629, 0.7476231, 0.5959025, 0.7128883,
0.3451623, 0.5609742, 0.4739216, 0.3694883, 0.5609220,
0.6343219, 0.3647751, 0.4247147, 0.6996334, 0.5562876,
0.2586490, 0.3750040, 0.5922248, 0.3626322, 0.5243285,
0.5548211, 0.7409648, 0.5820070, 0.5530232, 0.6863703,
0.7206998, 0.4952387, 0.4993264, 0.3527727, 0.2203694,
0.2583149, 0.3035342, 0.3462009, 0.3003602, 0.4506054,
0.3359478, 0.4834151, 0.4391330, 0.5273411, 0.3947622,
0.4133769, 0.5288060, 0.7492071, 0.5381701, 0.4825456,
0.6121942, 0.6192227, 0.3784870, 0.2574025, 0.3704140,
0.2945623, 0.6532694, 0.2697202, 0.3652230, 0.3696383,
0.5268808, 0.1545602, 0.2221450, 0.3553377, 0.5204076,
0.3550094)
Fitted.values<- c(0.53136955, 0.50437146, 0.49837019,
0.46939721, 0.66721423, 0.34165926, 0.70985388, 0.61383696,
0.63419092, 0.41998407, 0.49470927, 0.26501789, 0.47425695,
0.27859380, 0.79875525, 0.73336461, 0.74881668, 0.63915795,
0.51385774, 0.52648789, 0.61470661, 0.33919656, 0.40559797,
0.22339000, 0.46932536, 0.19689011, 0.40015996, 0.19468781,
0.26952645, 0.08090917, 0.26872696, 0.18680999, 0.12847285,
0.07245286, 0.58326799, 0.36352329, 0.46552867, 0.24157804,
0.21650915, 0.55738088, 0.64797691, 0.49494416, 0.59728999,
0.22848680, 0.45030036, 0.37087676, 0.25147426, 0.45445930,
0.52339711, 0.24922310, 0.30184215, 0.59185198, 0.44606040,
0.14795374, 0.25815819, 0.47680880, 0.24621212, 0.40404398,
0.44435727, 0.65524894, 0.48363255, 0.45461258, 0.58409323,
0.62599114, 0.38418264, 0.38357103, 0.24545011, 0.11461756,
0.13821664, 0.19183886, 0.23203127, 0.18702881, 0.34030391,
0.22090140, 0.37289121, 0.32846615, 0.40822456, 0.27801706,
0.31652008, 0.41746184, 0.64364785, 0.42944100, 0.37347037,
0.50412786, 0.50828681, 0.26510696, 0.15302635, 0.25116438,
0.18559609, 0.53955941, 0.16920626, 0.26018389, 0.25378867,
0.41439675, 0.04157232, 0.09600163, 0.23739430, 0.41666762,
0.29317767)
Assemble into a data frame (no x provided, so using indices)
df2 <- data.frame(x=seq(length(Fitted.values)),
fit=Fitted.values,lwr=Lower,upr=Upper.limit)
plot(fit~x,data=df2,ylim=range(c(df2$lwr,df2$upr)))
#make polygon where coordinates start with lower limit and then upper limit in reverse order
with(df2,polygon(c(x,rev(x)),c(lwr,rev(upr)),col = "grey75", border = FALSE))
matlines(df2[,1],df2[,-1],
lwd=c(2,1,1),
lty=1,
col=c("black","red","red"))
Here is part of my program related to plotting confidence interval.
1. Generate the test data
ads = 1
require(stats); require(graphics)
library(splines)
x_raw <- seq(1,10,0.1)
y <- cos(x_raw)+rnorm(len_data,0,0.1)
y[30] <- 1.4 # outlier point
len_data = length(x_raw)
N <- len_data
summary(fm1 <- lm(y~bs(x_raw, df=5), model = TRUE, x =T, y = T))
ht <-seq(1,10,length.out = len_data)
plot(x = x_raw, y = y,type = 'p')
y_e <- predict(fm1, data.frame(height = ht))
lines(x= ht, y = y_e)
Result
2. Fitting the raw data using B-spline smoother method
sigma_e <- sqrt(sum((y-y_e)^2)/N)
print(sigma_e)
H<-fm1$x
A <-solve(t(H) %*% H)
y_e_minus <- rep(0,N)
y_e_plus <- rep(0,N)
y_e_minus[N]
for (i in 1:N)
{
tmp <-t(matrix(H[i,])) %*% A %*% matrix(H[i,])
tmp <- 1.96*sqrt(tmp)
y_e_minus[i] <- y_e[i] - tmp
y_e_plus[i] <- y_e[i] + tmp
}
plot(x = x_raw, y = y,type = 'p')
polygon(c(ht,rev(ht)),c(y_e_minus,rev(y_e_plus)),col = rgb(1, 0, 0,0.5), border = NA)
#plot(x = x_raw, y = y,type = 'p')
lines(x= ht, y = y_e_plus, lty = 'dashed', col = 'red')
lines(x= ht, y = y_e)
lines(x= ht, y = y_e_minus, lty = 'dashed', col = 'red')
Result
Some addition to the previous answers. It is nice to regulate the density of the polygon to avoid obscuring the data points.
library(MASS)
attach(Boston)
lm.fit2 = lm(medv~poly(lstat,2))
plot(lstat,medv)
new.lstat = seq(min(lstat), max(lstat), length.out=100)
preds <- predict(lm.fit2, newdata = data.frame(lstat=new.lstat), interval = 'prediction')
lines(sort(lstat), fitted(lm.fit2)[order(lstat)], col='red', lwd=3)
polygon(c(rev(new.lstat), new.lstat), c(rev(preds[ ,3]), preds[ ,2]), density=10, col = 'blue', border = NA)
lines(new.lstat, preds[ ,3], lty = 'dashed', col = 'red')
lines(new.lstat, preds[ ,2], lty = 'dashed', col = 'red')
Please note that you see the prediction interval on the picture, which is several times wider than the confidence interval. You can read here the detailed explanation of those two types of interval estimates.

Resources