Confidence interval in R for 1000 times - r

I have run the code below to obtain 1000 confidence intervals but it doesn't give an output for lambda_jk and beta_jk. And hence I cannot obtain the jack_lambda and jack_beta.
library(bootstrap)
library(maxLik)
est<-NULL
set.seed(20)
lambda <- 0.02
beta <- 0.5
alpha <- 0.10
n <- 40
N <- 1000
lambda_hat <- NULL
beta_hat <- NULL
lambda_jk<-NULL
beta_jk<-NULL
cp <- NULL
jack_lambda <- matrix(NA, nrow = N, ncol = 2)
jack_beta <- matrix(NA, nrow = N, ncol = 2)
for(i in 1:N){
u <- runif(n)
c_i <- rexp(n, 0.0001)
t_i <- (log(1 - (1 / lambda) * log(1 - u))) ^ (1 / beta)
s_i <- 1 * (t_i < c_i)
t <- pmin(t_i, c_i)
data<- data.frame(t,s_i)
LLF <- function(para,y) {
lambda <- para[1]
beta <- para[2]
e <- y[,2]*log(lambda*y[,1]^(beta-1)*beta*exp(y[,1]^beta)*exp(lambda*(1-exp(y[,1]^beta))))
r <- (1-y[,2])*log(exp(lambda*(1-exp(y[,1]^beta))))
f <- sum(e + r)
return(f)
}
mle <- maxLik(LLF, y=data,start = c(para = c(0.02, 0.5))) ### Obtain MLE based on the simulated data
lambda_hat[i] <- mle$estimate[1] #estimate for parameter 1
beta_hat[i] <- mle$estimate[2] #estimate for parameter 2
est<-rbind(est,mle$estimate)
### statistic function for jackknife()
jack<-matrix(0, nrow = n, ncol = 2)
for(i in 1:n){
fit.jack<-maxLik(logLik=LLF,y=data[-i,],method="NR",start=c(0.02, 0.5))
jack[i,]<-coef(fit.jack) #delete-one estimates
}
estjack<-rbind(jack)
meanlambda = mean(estjack[,1])
meanbeta = mean(estjack[,2])
lambda_jk[i] =lambda_hat[i]-(n-1)*(meanlambda-lambda_hat[i]) #jackknife estimate
beta_jk[i] = beta_hat[i]-(n-1)*(meanbeta-beta_hat[i])
SElambda<-sqrt(var(estjack[,1])/n-1) #std error
SEbeta<-sqrt(var(estjack[,2])/n-1)
#confidence interval
jack_lambda[i,] <- lambda_jk[i]+c(-1,1)*qt((1-alpha)/2,n-1)*SElambda
jack_beta[i,] <- beta_jk[i]+c(-1,1)*qt((1-alpha)/2,n-1)*SEbeta
}
(I am very appreciate with any ideas)

Related

How can I fix the while loop problem in R?

I wrote the code as like below, and sometime it gets proper value but sometime it could not give me the value for a long time.
I guess it looks like it has infinite problem with while function but I couldn't get it how to fix it.
I've already tried to search about the while loop but I guess I wrote proeprly but I couldn't get it why it sometime run properly and sometime run not.
Could you please give me advice or the proper modification?
Thank you.
rm(list=ls())
library(readxl)
library(dplyr)
library(ggplot2)
library(MASS)
# Mean Vector, Covariance Matrix Construction
mu <- c(0,0,0)
mu <- t(mu)
mu <- t(mu)
mu
# Construct 40 random variables for Phase II
mu2 <- c(1, 2, 1)
mu2 <- t(mu2)
mu2 <- t(mu2)
mu2
Sigma <- matrix(c(1, 0.9, 0.9, 0.9, 1, 0.9, 0.9, 0.9, 1), 3)
Sigma
getResult <- function(Result) {
# Construct 50 Random Variables for Phase I
Obs <- mvrnorm(50, mu = mu, Sigma = Sigma)
VecT2 <- apply(Obs, 2, mean)
VecT2 <- round(VecT2, 3)
ST2 <- cov(Obs)
ST2 <- round(ST2, 3)
Obs <- as.matrix(Obs)
T2All <- rep(0, nrow(Obs))
for(i in 1:nrow(Obs)) {
T2All[i] = t(Obs[i, ] - VecT2) %*% solve(ST2) %*% (Obs[i, ] - VecT2)
}
# Construct Control Limit
Alpha <- 0.005
M <- nrow(Obs)
M
p <- ncol(Obs)
p
UCL <- ((p * (M-1) * (M + 1))) / ((M - p) * M) * qf((1-Alpha), p, (M-p))
UCL <- round(UCL, 3)
Compare <- which(T2All > UCL)
# Repeat when is there are Out of Control in Phase I with eliminating it
while(isTRUE(Compare > UCL)) {
Obs <- Obs[-Compare,]
Alpha <- 0.005
M <- nrow(Obs)
p <- ncol(Obs)
UCL <- ((p * (M-1) * (M + 1))) / ((M - p) * M) * qf((1-Alpha), p, (M-p))
Compare <- which(T2All > UCL)
}
UCL <- round(UCL, 3)
# Prepare Observations two types of cases with Variable 20_1, Variable 20_2
Obs20_1 <- mvrnorm(20, mu = mu, Sigma = Sigma)
Obs20_2 <- mvrnorm(20, mu = mu2, Sigma = Sigma)
Obs40 <- rbind(Obs20_1, Obs20_2)
Obs40 <- as.matrix(Obs40)
T2 <- rep(0, nrow(Obs40))
for(i in 1:nrow(Obs40)) {
T2[i] = t(Obs40[i, ] - mu) %*% solve(Sigma) %*% (Obs40[i, ] - mu)
}
Result <- which(T2 > UCL)[1]
# Repeat when Out of Control occur in ARL0 section
while(isTRUE(Result < 20)) {
Obs20_1 <- mvrnorm(20, mu = mu, Sigma = Sigma)
Obs40 <- rbind(Obs20_1, Obs20_2)
Obs40 <- as.matrix(Obs40)
T2 <- rep(0, nrow(Obs40))
for(i in 1:nrow(Obs40)) {
T2[i] = t(Obs40[i, ] - mu) %*% solve(Sigma) %*% (Obs40[i, ] - mu)
}
Result <- which(T2 > UCL)[1]
}
Result
}
# Result
Final <- replicate(n = 200, expr = getResult(Result))
Final <- Final - 20
Final
mean(Final)
You could try using a for loop instead of a while loop.

Jackknife in R to obtain interval estimates

I have a question on how to use the jackknife using the bootstrap package. I want to obtain the interval estimate for the jackknife method.
I've tried running the code below, but no results for my parameter estimate.
rm(list=ls())
library(bootstrap)
library(maxLik)
set.seed(20)
lambda <- 0.02
beta <- 0.5
alpha <- 0.10
n <- 40
N <- 1000
lambda_hat <- NULL
beta_hat <- NULL
cp <- NULL
jack_lambda <- matrix(NA, nrow = N, ncol = 2)
jack_beta <- matrix(NA, nrow = N, ncol = 2)
### group all data frame generated from for loop into a list of data frame
data_full <- list()
for(i in 1:N){
u <- runif(n)
c_i <- rexp(n, 0.0001)
t_i <- (log(1 - (1 / lambda) * log(1 - u))) ^ (1 / beta)
s_i <- 1 * (t_i < c_i)
t <- pmin(t_i, c_i)
data_full[[i]] <- data.frame(u, t_i, c_i, s_i, t)
}
### statistic function for jackknife()
estjack <- function(data, j) {
data <- data[j, ]
data0 <- data[which(data$s_i == 0), ] #uncensored data
data1 <- data[which(data$s_i == 1), ] #right censored data
data
LLF <- function(para) {
t1 <- data$t_i
lambda <- para[1]
beta <- para[2]
e <- s_i*log(lambda*t1^(beta-1)*beta*exp(t1^beta)*exp(lambda*(1-exp(t1^beta))))
r <- (1-s_i)*log(exp(lambda*(1-exp(t1^beta))))
f <- sum(e + r)
return(f)
}
mle <- maxLik(LLF, start = c(para = c(0.02, 0.5)))
lambda_hat[i] <- mle$estimate[1]
beta_hat[i] <- mle$estimate[2]
return(c(lambda_hat[i], beta_hat[i]))
}
jackknife_resample<-list()
for(i in 1:N) {
jackknife_resample[[i]]<-data_full[[i]][-i]
results <- jackknife(jackknife_resample, estjack,R=1000)
jack_lambda[i,]<-lambda_hat[i]+c(-1,1)*qt(alpha/2,n-1,lower.tail = FALSE)*results$jack.se
jack_beta[i,]<-beta_hat[i]+c(-1,1)*qt(alpha/2,n-1,lower.tail = FALSE)*results$jack.se
}```
I couldn't get the parameter estimate that run in MLE and hence couldn't proceed to the next step. Can anyone help?

Objective function in optim evaluates to length 3 not 1

I am new to R and trying to find the optimal values of 3 parameters via indirect inference from a simulated panel data set, but getting an error "objective function in optim evaluates to length 3 not 1". I tried to check past posts, but the one I found didn't address the problem I am facing.
The code works if I only try for one parameter instead of 3. Here is the code:
#Generating data
modelp <- function(Y,alpha,N,T){
Yt <- Y[,2:T]
Ylag <- Y[,1:(T-1)]
Alpha <- alpha[,2:T]
yt <- matrix(t(Yt), (T-1)*N, 1)
ylag <- matrix(t(Ylag), (T-1)*N, 1)
alph <- matrix(t(Alpha), (T-1)*N, 1)
rho.ind <- rep(NA,N)
sigma_u <- rep(NA,N)
sigma_a <- rep(NA,N)
for(n in 1:N){
sigma_u[n] <- sigma(lm(yt~alph+ylag))
sigma_a[n] <- lm(yt~alph+ylag)$coef[2] #
(diag(vcov((lm(yt~alph+ylag)$coef),complete=TRUE)))[2] #
rho.ind[n] <- lm(yt~alph+ylag)$coef[3]
}
param <- matrix(NA,1,3)
param[1]<- mean(sum(rho.ind))
param[2]<- mean(sum(sigma_u))
param[3]<- mean(sum(sigma_a))
return(param)
}
## Function to estimate parameters
H.theta <- function(param.s){
set.seed(tmp.seed) #set seed
param.s.tmp <- matrix(0,1,3)
for(s in 1:H){
eps.s <- matrix(rnorm(N*T), N, T) #white noise erros
eps0.s <- matrix(rnorm(N*T), N, 1) #error for initial condition
alph.s <- matrix(rnorm(N*T),N,T)
Y.s <- matrix( 0, N, T)
ys.lag <- eps0.s
for(t in 1:T){ #Simulating the AR(1) process data
ys <- alph.s[,t]+param.s[1] * ys.lag + eps.s[,t] # [n,1:t]
Y.s[,t] <- ys
ys.lag <- ys
}
param.s.tmp <- param.s.tmp + modelp(Y.s, alph.s,N, T)
param.s[2] <- param.s.tmp[2]
param.s[3] <- mean(var(alph.s)) #param.s.tmp[3]
}
return( (param.data - param.s.tmp/H)^2 )
#return(param.s[1])
}
#Results for T = 10 & H = 10, N=100
nrep <-10
rho <-0.9
sigma_u <- 1
sigma_a <- 1.5
param <- matrix(NA,1,3)
param[1] <- rho
param[2] <- sigma_u
param[3] <- sigma_u
s.mu <- 0 # Mean
s.ep <- 0.5 #White Noise -initial conditions
Box <- cbind(rep(100,1),c(20),rep(c(5),1))
r.simu.box <- matrix(0,nrep,nrow(Box))
r.data.box <- matrix(0,nrep,nrow(Box))
for(k in 1:nrow(Box)){
N <- Box[k,1] #Number of individuals in panel
T <- Box[k,2] #Length of Panel
H <- Box[k,3] # Number of simulation paths
p.data <-matrix(NA,nrep,3)
p.simu <-matrix(NA,nrep,3)
est <- matrix(NA,1,3)
for(i in 1:nrep){
mu <- matrix(rnorm(N )*s.mu, N, 1)
eps <- matrix(rnorm(N*T)*s.ep, N, T)
eps0 <- matrix(rnorm(N*T)*s.ep, N, 1)
alph <- matrix(rnorm(N ), N, T)
Y <- matrix( 0, N, T)
y.lag <- (1-param[1])*mu + eps0
for(t in 1:T){
y <- alph[,t]+param[1]*y.lag +eps[,t]
Y[,t] <- y
y.lag <- y
}
param.data <- modelp(Y,alph,N,T) #Actual data
p.data[i,1:3] <- param.data
tmp.seed <- 3864+i+100*(k-1) #Simulated data
x0 <- c(0.5, 0,0)
est[i] <- optim(x0, H.theta,method = "BFGS", hessian = TRUE)$par
p.simu[i,1:3] <- est[i]
if(i%%10==0) print(c("Finished the (",i,")-th replication"))
}
}
mean(p.data[,1])- mean(p.simu[,1])
mean(p.data[,2])- mean(p.simu[,2])
sqrt(mean((p.data[1]-p.simu[1])^2))
I expect to get three values. Any help or suggestion will be greatly appreciated.

Coverage probability for an unspecified CDF

I used the following r code to determine the coverage probability.
theta <- seq(0,1, length = 100)
CD_theta <- function(y, p, n){
1 - pbinom (y, size = n, prob = p) + 1/2*dbinom(y, size = n, prob = p)
}
y <- 5
n <- 100
phat <- y/n
mytheta <- CD_theta(5, theta, 100)
set.seed(650)
ci <- list()
n <- 100
B <- 1000
result = rep(NA, B)
all_confInt <- function(B) {
for (i in 1:B){
boot.sample <- sample(mytheta, replace = TRUE)
lower <- theta[which.min(abs(boot.sample - .025))]
upper <- theta[which.min(abs(boot.sample - .975))]
ci[[i]] <- data.frame(lowerCI = lower, upperCI = upper)
intervals <- unlist(ci)
}
return(intervals)
}
df <- data.frame(matrix(all_confInt(B), nrow=B, byrow=T))
colnames(df)[1] <- "Lower"
colnames(df)[2] <- "Upper"
names(df)
dim(df)
mean(df$Lower < phat & df$Upper > phat)*100
However, I obtained 6.4% which is too low. Why am I getting really lower percentage?. Is there any problem in the r function?

Predicting binary response probabilities from gamlss R object

I want to predict binary class probabilities/class labels from gamlss R function, how can the predict function be used to get them?
I have the following sample code
library(gamlss)
X1 <- rnorm(500)
X2 <- sample(c("A","C","D","E"),500, replace = TRUE)
Y <- ifelse(X1>0.2& X2=="A",1,0)
n <- 500
training <- sample(1:n, 400)
testing <- (1:n)[-training]
fit <- gamlss(Y[training]~pcat(X2[training],Lp=1)+ri(X1[training],Lp=1),family=BI())
pred <- predict(fit,newdata = data.frame(X1,X2)[testing,],type = "response")
Error in predict.gamlss(fit, newdata = data.frame(X1, X2)[testing, ], :
define the original data using the option data
Any idea?
You need to define the original data using the data option of gamlss:
library(gamlss)
set.seed(1)
n <- 500
X1 <- rnorm(n)
X2 <- sample(c("A","C","D","E"), n, replace = TRUE)
Y <- ifelse(X1>0.2 & X2=="A", 1, 0)
dtset <- data.frame(X1, X2, Y)
training <- sample(1:n, 400)
XYtrain <- dtset[training,]
XYtest <- dtset[-training,]
fit <- gamlss(Y ~ pcat(X2, Lp=1) + ri(X1, Lp=1), family=BI(), data=XYtrain)
pred <- predict(fit, type="response", newdata=XYtest)
Unfortunately, predict now generates a new error message:
Error in if (p != ap) stop("the dimensions of the penalty matrix and
of the design matrix are incompatible") : argument is of length
zero
This problem can be solved modifying the gamlss.ri function used by predict.gamlss:
gamlss.ri <- function (x, y, w, xeval = NULL, ...)
{
regpen <- function(sm, D, P0, lambda) {
for (it in 1:iter) {
RD <- rbind(R, sqrt(lambda) * sqrt(omega.) * D)
svdRD <- svd(RD)
rank <- sum(svdRD$d > max(svdRD$d) * .Machine$double.eps^0.8)
np <- min(p, N)
U1 <- svdRD$u[1:np, 1:rank]
y1 <- t(U1) %*% Qy
beta <- svdRD$v[, 1:rank] %*% (y1/svdRD$d[1:rank])
dm <- max(abs(sm - beta))
sm <- beta
omega. <- c(1/(abs(sm)^(2 - Lp) + kappa^2))
if (dm < c.crit)
break
}
HH <- (svdRD$u)[1:p, 1:rank] %*% t(svdRD$u[1:p, 1:rank])
edf <- sum(diag(HH))
fv <- X %*% beta
row.names(beta) <- namesX
out <- list(fv = fv, beta = beta, edf = edf, omega = omega.)
}
fnGAIC <- function(lambda, k) {
fit <- regpen(sm, D, P0, lambda)
fv <- fit$fv
GAIC <- sum(w * (y - fv)^2) + k * fit$edf
GAIC
}
X <- if (is.null(xeval))
as.matrix(attr(x, "X"))
else as.matrix(attr(x, "X"))[seq(1, length(y)), , drop=FALSE] # Added drop=FALSE
namesX <- as.character(attr(x, "namesX"))
D <- as.matrix(attr(x, "D"))
order <- as.vector(attr(x, "order"))
lambda <- as.vector(attr(x, "lambda"))
df <- as.vector(attr(x, "df"))
Lp <- as.vector(attr(x, "Lp"))
kappa <- as.vector(attr(x, "kappa"))
iter <- as.vector(attr(x, "iter"))
k <- as.vector(attr(x, "k"))
c.crit <- as.vector(attr(x, "c.crit"))
method <- as.character(attr(x, "method"))
gamlss.env <- as.environment(attr(x, "gamlss.env"))
startLambdaName <- as.character(attr(x, "NameForLambda"))
N <- sum(w != 0)
n <- nrow(X)
p <- ncol(X)
aN <- nrow(D)
ap <- ncol(D)
qrX <- qr(sqrt(w) * X, tol = .Machine$double.eps^0.8)
R <- qr.R(qrX)
Q <- qr.Q(qrX)
Qy <- t(Q) %*% (sqrt(w) * y)
if (p != ap)
stop("the dimensions of the penalty matrix and of the design matrix are incompatible")
P0 <- diag(p) * 1e-06
sm <- rep(0, p)
omega. <- rep(1, p)
tau2 <- sig2 <- NULL
lambdaS <- get(startLambdaName, envir = gamlss.env)
if (lambdaS >= 1e+07)
lambda <- 1e+07
if (lambdaS <= 1e-07)
lambda <- 1e-07
if (is.null(df) && !is.null(lambda) || !is.null(df) && !is.null(lambda)) {
fit <- regpen(sm, D, P0, lambda)
fv <- fit$fv
}
else if (is.null(df) && is.null(lambda)) {
lambda <- lambdaS
switch(method, ML = {
for (it in 1:20) {
fit <- regpen(sm, D, P0, lambda)
gamma. <- D %*% as.vector(fit$beta) * sqrt(fit$omega)
fv <- X %*% fit$beta
sig2 <- sum(w * (y - fv)^2)/(N - fit$edf)
tau2 <- sum(gamma.^2)/(fit$edf - order)
lambda.old <- lambda
lambda <- sig2/tau2
if (abs(lambda - lambda.old) < 1e-04 || lambda >
1e+05) break
}
}, GAIC = {
lambda <- nlminb(lambda, fnGAIC, lower = 1e-07, upper = 1e+07,
k = k)$par
fit <- regpen(sm, D, P0, lambda)
fv <- fit$fv
assign(startLambdaName, lambda, envir = gamlss.env)
}, )
}
else {
edf1_df <- function(lambda) {
edf <- sum(1/(1 + lambda * UDU$values))
(edf - df)
}
Rinv <- solve(R)
S <- t(D) %*% D
UDU <- eigen(t(Rinv) %*% S %*% Rinv)
lambda <- if (sign(edf1_df(0)) == sign(edf1_df(1e+05)))
1e+05
else uniroot(edf1_df, c(0, 1e+05))$root
fit <- regpen(sm, D, P0, lambda)
fv <- fit$fv
}
waug <- as.vector(c(w, rep(1, nrow(D))))
xaug <- as.matrix(rbind(X, sqrt(lambda) * D))
lev <- hat(sqrt(waug) * xaug, intercept = FALSE)[1:n]
var <- lev/w
coefSmo <- list(coef = fit$beta, lambda = lambda, edf = fit$edf,
sigb2 = tau2, sige2 = sig2, sigb = if (is.null(tau2)) NA else sqrt(tau2),
sige = if (is.null(sig2)) NA else sqrt(sig2), fv = as.vector(fv),
se = sqrt(var), Lp = Lp)
class(coefSmo) <- "ri"
if (is.null(xeval)) {
list(fitted.values = as.vector(fv), residuals = y - fv,
var = var, nl.df = fit$edf - 1, lambda = lambda,
coefSmo = coefSmo)
}
else {
ll <- dim(as.matrix(attr(x, "X")))[1]
nx <- as.matrix(attr(x, "X"))[seq(length(y) + 1, ll),
]
pred <- drop(nx %*% fit$beta)
pred
}
}
# Replace "gamlss.ri" in the package "gamlss"
assignInNamespace("gamlss.ri", gamlss.ri, pos="package:gamlss")
pred <- predict(fit, type="response", newdata=XYtest)
print(head(pred))
# [1] 2.220446e-16 2.220446e-16 2.220446e-16 4.142198e-12 2.220446e-16 2.220446e-16

Resources