Confidence Intervals while using nls() - r

I am writing a code to model the inactivation of bacteria. I was able to get the model using nls(). Now, when I am trying to calculate and plot the confidence and prediction intervals, I am unable to get a proper fit. The lower CI and PI are being plotted awry and I do not know how to rectify this issue.
My code:
Xo <- c(0, 0.461, 0.978, 2.087)
Y <- c(0, -2.891, -4.197, -4.489)
dat <- data.frame(Xo, Y, colClass="numeric")
f <- function(Xo, n, k) {-k*(Xo)^n}
m <- nls(Y~f(Xo, n, k), data=dat, start=c(n=2, k=20), trace=TRUE)
summary(m)
yfitted <- predict(m)
plot(Xo, yfitted)
lines(Xo, yfitted)
plot(dat$Xo, yfitted, xlim=c(0, 5), ylim=c(-5, 0))
y.conf <- predictNLS(m, interval="confidence", alpha =0.05, nsim=10000)$summary
y.pred <- predictNLS(m, interval="prediction", alpha=0.05, nsim=10000)$summary
matlines(Xo, y.conf[, c("Sim.2.5%", "Sim.97.5%")], col="red", lty="dashed")
matlines(Xo, y.pred[, c("Sim.2.5%", "Sim.97.5%")], col="blue", lty="solid")

Related

How to get the correct output when using batch forecasting for the monthly time series with the Mcomp package?

I'm trying to do batch forecasting for the monthly time series with the Mcomp package. I've prepared a code but I am not getting any output.
library(forecast)
library(Mcomp)
Using the seq function, as I need to select the particular time series which ends with 7.
tsset <- (seq(1507, 2797, 10))
tsset
horizon <- 18
fit1<-array(0,130)
for (tsi in 1:130){
y <- tsset[[tsi]]$x
yt <- head(y, length(y) - horizon)
yv <- tail(y, horizon)
for(i in 1:130){
fit1 <-c(ets(yt))
}
print(fit1)
}
Here is how you get the prediction for the last 18 points, given the first 112 points in the time series (you don't need loops):
tsset<- seq(1507, 2797, 10) + 10*runif(130) # add noise
horizon <- 18
y <- tsset
n <- length(y)
yt <- head(y, n - horizon)
#yv <- tail(y, horizon)
fit1 <- ets(yt)
yv1 <- forecast(fit1, h=horizon)
start <- n - horizon + 1
plot(start:n, yv, type='l', col='red', lty=1, xlab='t', ylab='y(t)')
lines(start:n, yv1$mean, col='blue', lty=2)
lines(start:n, yv1$upper[,2], col='green', lty=2)
lines(start:n, yv1$lower[,2], col='green', lty=2)
legend("topleft", legend=c("original", "forecast", "95% CI"),
col=c("red", "blue", "green"), lty=c(1,2,2), cex=0.8)

Plot the likelihood of weibull

I would like to plot the likelihood function of a size 1000 weibull sample with a sequence of shape parameter theta. I have used standardised weibull so the scale lambda is 1. However the output is a horizontal straight line.
n<-1000
lik <- function(theta, x){
K<- length(theta)
n<- length(x)
out<- rep(0,K)
for(k in 1:K){
out[k] <- prod(dweibull(x, shape= theta[k], scale=1))
}
return(out)
}
theta<-seq(0.01, 10, by = 0.01)
x <- rweibull(n, shape= 0.5, scale= 1)
plot(theta, lik(theta, x), type="l", lwd=2)
There is nothing really wrong about what you have done but computers struggle to calculate the product of many small numbers and so can end up as zero (even 0.99^1000 = 4^-5). And so it is easier to log transform and then sum. (As the log transform is a monotonic increasing function maximising the log-likelihood is the same as maximising the likelihood).Thus change
prod(dweibull(x, shape= theta[k], scale=1))
to
sum(dweibull(x, shape= theta[k], scale=1, log=TRUE))
The other minor change is to plot the likelihood witihin a reasonable range of theta so that
you can see the curve.
Working code:
set.seed(1)
n<-1000
lik <- function(theta, x){
K <- length(theta)
n <- length(x)
out <- rep(0,K)
for(k in 1:K){
out[k] <- sum(dweibull(x, shape= theta[k], scale=1, log=TRUE))
}
return(out)
}
popTheta = 0.5
theta = seq(0.01, 1.5, by = 0.01)
x = rweibull(n, shape=popTheta, scale= 1)
plot(theta, lik(theta, x), type="l", lwd=2)
abline(v=popTheta)
theta[which.max( lik(theta, x))]

R - Regression plane for linear mixed-effects model with 3 explicative variables

I'm struggling to plot the regression plane of a linear mixed-effects model with 3 explicative variables. I used the code here, which is for a z~x+y model, and tried to adapt it for a z~x+y+c model :
library(nlme)
library(plot3D)
data <- read.table('data-plot-3D.csv', dec=',', sep=';', header=TRUE)
test <- lme(z~x+y+c,
data=data,
random=~1|s, method="ML")
grid.lines = nrow(data)
x.pred <- seq(min(data$x), max(data$x), length.out = grid.lines)
y.pred <- seq(min(data$y), max(data$y), length.out = grid.lines)
c.pred <- seq(min(data$c), max(data$c), length.out = grid.lines)
xyc <- expand.grid(x=x.pred,
y=y.pred,
c=c.pred)
z.pred <- matrix(predict(test, newdata=xyc, level=0),
nrow=grid.lines, ncol=grid.lines)
fitpoints <- as.vector(predict(test, data))
scatter3D(data$x, data$y, data$z,
pch=18, cex=2, theta=20, phi=20, ticktype="detailed",
xlab="x", ylab = "y", zlab="z",
surf=list(x=x.pred,
y=y.pred,
z=z.pred,
facets = NA,
fit = fitpoints),
main="Model 3D plot")
But the figure is wrong : the plane does not fit at all. I have several hypothesis that could explain this result, for example the level=0 parameter when it comes to predict values, or some properties of linear mixed-effects models that I don't know. If I understood correctly, the level=0 parameter is needed because the lme model includes some noise and my plotting data do not consider it (maybe I'm wrong).
When I simplify the model to a simple linear model (lm instead of lme), with a removal of the 3d variable (z~x+y), I have a correct regression plane :
library(nlme)
library(plot3D)
data <- read.table('data-plot-3D.csv', dec=',', sep=';', header=TRUE)
test <- lm(z~x+y, data=data)
grid.lines = nrow(data)
x.pred <- seq(min(data$x), max(data$x), length.out = grid.lines)
y.pred <- seq(min(data$y), max(data$y), length.out = grid.lines)
xy <- expand.grid(x=x.pred,
y=y.pred)
z.pred <- matrix(predict(test, newdata=xy),
nrow=grid.lines, ncol=grid.lines)
fitpoints <- predict(test)
scatter3D(data$x, data$y, data$z,
pch=18, cex=2, theta=20, phi=20, ticktype="detailed",
xlab="senescence", ylab = "cambium", zlab="budburst",
surf=list(x=x.pred,
y=y.pred,
z=z.pred,
facets = NA,
fit = fitpoints),
main="Good plane (lm z~x+y)")
How can I solve that problem ? The data and the pictures are below. I have also planned to color the points and the plane according to the c variable instead of along the z axis, but I guess it is less important (I'll ask another question maybe)
s;z;x;y;c
B;103,5104;259,9426;250;0,289661996
F;100,6323;292,9711;250;0,522028213
F;105,2674;293,2956;257;0,429718346
F;103,2945;289,3861;257;0,499746521
B;100,3786;283,8451;271;0,210084525
B;100,392;284,7398;271;0,261014107
F;106,0284;295,9574;271;0,674816959
F;105,2831;280,6875;271;0,592056388
F;109,1873;284,9641;271;0,607971883
B;100,1647;253,8401;271;0,327859183
B;100,1124;259,4129;271;0,270563403
B;101,2346;261,3593;271;0,238732415
B;100,9574;285,814;277;0,261014107
F;105,1347;294,102;277;0,461549335
B;102,3565;302,9692;277;0,33422538
F;102,6884;297,6776;284;0,410619753
F;109,0465;288,4876;284;0,44881694
Q;110,0359;318,2187;284;0,630253575
Q;108,2855;285,7081;311;0,413802852
F;113,5328;308,7703;311;0,385154962
F;105,1965;266,3839;311;0,585690191
Q;112,9197;314,6168;311;0,604788784
Q;105,5601;284,8472;311;0,506112719
Q;109,5241;309,2628;319;0,36287327
Q;109,5802;315,2328;319;0,461549335
F;108,3161;285,0408;319;0,397887358
Q;110,3092;317,1796;328;0,423352149
Q;110,5984;297,9926;328;0,620704278

R: Contour plot for each component of a fitted bivariate mixture

(Please note: I'm using R for only two days now.)
I have a dataset data that looks like this:
plot(data, pch=20, xlim=c(-2,3), ylim=c(-1,2))
I'm using the mixsmsn package to fit a mixture of bivariate skew-normal distributions:
sn2 <- smsn.mmix(data, nu=3, g=2, get.init=TRUE, criteria=TRUE, group=TRUE, family="Skew.normal", error=1e-08, iter.max=10000)
I can plot it like this (why pch=20 doesn't work?):
mix.contour(data, sn2, pch=20, xlim=c(-2,3), ylim=c(-1,2), levels=c(0.1,0.25,0.5))
How can I achieve the following?
I'd want to draw a contour separately for each component at half its height. That is, say it's a mixture distribution of the form p f_1(x,y) + (1-p) f_2(x,y) (f_i being the pdf of the _i_th skew-normal component); I'd want to draw (on a scatter plot) a contour of the f_1 component at half its height, and a second contour related to f_2 at half its height; I'd like the result to look like this:
Using the fMultivar package, I came up with this:
X <- data
sn2 <- smsn.mmix(X, nu=3, g=2, get.init=TRUE, criteria=TRUE, group=TRUE, family="Skew.normal", error=1e-08, iter.max=10000)
mu1 <- sn2$mu[[1]]
sigma1 <- sn2$Sigma[[1]]
alpha1 <- c(sn2$shape[[1]][1], sn2$shape[[1]][2])
p1 <- sn2$pii[[1]]
mu2 <- sn2$mu[[2]]
sigma2 <- sn2$Sigma[[2]]
alpha2 <- c(sn2$shape[[2]][1], sn2$shape[[2]][2])
p2 <- sn2$pii[[2]]
N <- 101
x <- seq(min(X[, 1]), max(X[, 1]), l=N)
y <- seq(min(X[, 2]), max(X[, 2]), l=N)
u <- grid2d(x, y)$x
v <- grid2d(x, y)$y
XY <- cbind(u, v)
Z1 <- matrix(p1*dmsn(XY, mu1, sigma1, alpha1), ncol=N)
Z2 <- matrix(p2*dmsn(XY, mu2, sigma2, alpha2), ncol=N)
c1 <- 0.5*max(Z1)
c2 <- 0.5*max(Z2)
plot(X, pch=20, xlim=c(-2,3), ylim=c(-1,2))
contour(x, y, Z1, add=TRUE, col="red", lwd=3, levels=c(c1), labels="")
contour(x, y, Z2, add=TRUE, col="green", lwd=3, levels=c(c2), labels="")

How to calculate moments of a distribution specified by x, y coordinates

Imagine I have a simple dataframe of x, y coordinates.
dta_example <- data.frame(
x=c(0,1,2,3,4,5),
y=c(0.1, 0.4, 0.5, 0.6, 0.3, 0.1)
)
plot(NULL, xlim=c(0, 6), ylim=c(0,1), xlab="x", ylab="f(x)")
polygon(
x=c(dta_example$x[1], dta_example$x, dta_example$x[length(dta_example$x)]),
y=c(0, dta_example$y, 0),
col="red"
)
points(dta_example, pch=16)
How would I go about using the above to produce an empirical probability distribution that I could then characterise in terms of mean, sd, skewness, kurtosis etc? Thanks,
Jon
I would recommend the use approxfun on your data. Also, I would add 0s to your data beforehand:
dta_example <- rbind(c(0,0), dta_example, c(0,0))
First create a function corresponding to your data
f <- approxfun(dta_example$x, dta_example$y)
Compute numerically the $n$-th moment
n <- 3
xmin <- min(dta_example$x)
xmax <- max(dta_example$x)
m <- integrate(function(x) x^n*f(x), lower=xmin, upper=xmax)
m
# 47.1216 with absolute error < 0.002
EDIT: An example with a simple triangular distribution.
dat <- data.frame(x = c(-1, 0, 1), y = c(0, 1, 0))
f <- approxfun(dat$x, dat$y)
Plot of the distribution
plot(f, xlim=c(-2,2), col="red") ; grid()
Check that the integra between -1 and +1 is equal to one
integrate(f, lower=-1, upper=+1)
Compute mean and variance
integrate(function(x) x*f(x), lower=-1, upper=+1)
integrate(function(x) x^2*f(x), lower=-1, upper=+1)

Resources