Jackknife in R to obtain interval estimates - r

I have a question on how to use the jackknife using the bootstrap package. I want to obtain the interval estimate for the jackknife method.
I've tried running the code below, but no results for my parameter estimate.
rm(list=ls())
library(bootstrap)
library(maxLik)
set.seed(20)
lambda <- 0.02
beta <- 0.5
alpha <- 0.10
n <- 40
N <- 1000
lambda_hat <- NULL
beta_hat <- NULL
cp <- NULL
jack_lambda <- matrix(NA, nrow = N, ncol = 2)
jack_beta <- matrix(NA, nrow = N, ncol = 2)
### group all data frame generated from for loop into a list of data frame
data_full <- list()
for(i in 1:N){
u <- runif(n)
c_i <- rexp(n, 0.0001)
t_i <- (log(1 - (1 / lambda) * log(1 - u))) ^ (1 / beta)
s_i <- 1 * (t_i < c_i)
t <- pmin(t_i, c_i)
data_full[[i]] <- data.frame(u, t_i, c_i, s_i, t)
}
### statistic function for jackknife()
estjack <- function(data, j) {
data <- data[j, ]
data0 <- data[which(data$s_i == 0), ] #uncensored data
data1 <- data[which(data$s_i == 1), ] #right censored data
data
LLF <- function(para) {
t1 <- data$t_i
lambda <- para[1]
beta <- para[2]
e <- s_i*log(lambda*t1^(beta-1)*beta*exp(t1^beta)*exp(lambda*(1-exp(t1^beta))))
r <- (1-s_i)*log(exp(lambda*(1-exp(t1^beta))))
f <- sum(e + r)
return(f)
}
mle <- maxLik(LLF, start = c(para = c(0.02, 0.5)))
lambda_hat[i] <- mle$estimate[1]
beta_hat[i] <- mle$estimate[2]
return(c(lambda_hat[i], beta_hat[i]))
}
jackknife_resample<-list()
for(i in 1:N) {
jackknife_resample[[i]]<-data_full[[i]][-i]
results <- jackknife(jackknife_resample, estjack,R=1000)
jack_lambda[i,]<-lambda_hat[i]+c(-1,1)*qt(alpha/2,n-1,lower.tail = FALSE)*results$jack.se
jack_beta[i,]<-beta_hat[i]+c(-1,1)*qt(alpha/2,n-1,lower.tail = FALSE)*results$jack.se
}```
I couldn't get the parameter estimate that run in MLE and hence couldn't proceed to the next step. Can anyone help?

Related

How can I fix the while loop problem in R?

I wrote the code as like below, and sometime it gets proper value but sometime it could not give me the value for a long time.
I guess it looks like it has infinite problem with while function but I couldn't get it how to fix it.
I've already tried to search about the while loop but I guess I wrote proeprly but I couldn't get it why it sometime run properly and sometime run not.
Could you please give me advice or the proper modification?
Thank you.
rm(list=ls())
library(readxl)
library(dplyr)
library(ggplot2)
library(MASS)
# Mean Vector, Covariance Matrix Construction
mu <- c(0,0,0)
mu <- t(mu)
mu <- t(mu)
mu
# Construct 40 random variables for Phase II
mu2 <- c(1, 2, 1)
mu2 <- t(mu2)
mu2 <- t(mu2)
mu2
Sigma <- matrix(c(1, 0.9, 0.9, 0.9, 1, 0.9, 0.9, 0.9, 1), 3)
Sigma
getResult <- function(Result) {
# Construct 50 Random Variables for Phase I
Obs <- mvrnorm(50, mu = mu, Sigma = Sigma)
VecT2 <- apply(Obs, 2, mean)
VecT2 <- round(VecT2, 3)
ST2 <- cov(Obs)
ST2 <- round(ST2, 3)
Obs <- as.matrix(Obs)
T2All <- rep(0, nrow(Obs))
for(i in 1:nrow(Obs)) {
T2All[i] = t(Obs[i, ] - VecT2) %*% solve(ST2) %*% (Obs[i, ] - VecT2)
}
# Construct Control Limit
Alpha <- 0.005
M <- nrow(Obs)
M
p <- ncol(Obs)
p
UCL <- ((p * (M-1) * (M + 1))) / ((M - p) * M) * qf((1-Alpha), p, (M-p))
UCL <- round(UCL, 3)
Compare <- which(T2All > UCL)
# Repeat when is there are Out of Control in Phase I with eliminating it
while(isTRUE(Compare > UCL)) {
Obs <- Obs[-Compare,]
Alpha <- 0.005
M <- nrow(Obs)
p <- ncol(Obs)
UCL <- ((p * (M-1) * (M + 1))) / ((M - p) * M) * qf((1-Alpha), p, (M-p))
Compare <- which(T2All > UCL)
}
UCL <- round(UCL, 3)
# Prepare Observations two types of cases with Variable 20_1, Variable 20_2
Obs20_1 <- mvrnorm(20, mu = mu, Sigma = Sigma)
Obs20_2 <- mvrnorm(20, mu = mu2, Sigma = Sigma)
Obs40 <- rbind(Obs20_1, Obs20_2)
Obs40 <- as.matrix(Obs40)
T2 <- rep(0, nrow(Obs40))
for(i in 1:nrow(Obs40)) {
T2[i] = t(Obs40[i, ] - mu) %*% solve(Sigma) %*% (Obs40[i, ] - mu)
}
Result <- which(T2 > UCL)[1]
# Repeat when Out of Control occur in ARL0 section
while(isTRUE(Result < 20)) {
Obs20_1 <- mvrnorm(20, mu = mu, Sigma = Sigma)
Obs40 <- rbind(Obs20_1, Obs20_2)
Obs40 <- as.matrix(Obs40)
T2 <- rep(0, nrow(Obs40))
for(i in 1:nrow(Obs40)) {
T2[i] = t(Obs40[i, ] - mu) %*% solve(Sigma) %*% (Obs40[i, ] - mu)
}
Result <- which(T2 > UCL)[1]
}
Result
}
# Result
Final <- replicate(n = 200, expr = getResult(Result))
Final <- Final - 20
Final
mean(Final)
You could try using a for loop instead of a while loop.

Confidence interval in R for 1000 times

I have run the code below to obtain 1000 confidence intervals but it doesn't give an output for lambda_jk and beta_jk. And hence I cannot obtain the jack_lambda and jack_beta.
library(bootstrap)
library(maxLik)
est<-NULL
set.seed(20)
lambda <- 0.02
beta <- 0.5
alpha <- 0.10
n <- 40
N <- 1000
lambda_hat <- NULL
beta_hat <- NULL
lambda_jk<-NULL
beta_jk<-NULL
cp <- NULL
jack_lambda <- matrix(NA, nrow = N, ncol = 2)
jack_beta <- matrix(NA, nrow = N, ncol = 2)
for(i in 1:N){
u <- runif(n)
c_i <- rexp(n, 0.0001)
t_i <- (log(1 - (1 / lambda) * log(1 - u))) ^ (1 / beta)
s_i <- 1 * (t_i < c_i)
t <- pmin(t_i, c_i)
data<- data.frame(t,s_i)
LLF <- function(para,y) {
lambda <- para[1]
beta <- para[2]
e <- y[,2]*log(lambda*y[,1]^(beta-1)*beta*exp(y[,1]^beta)*exp(lambda*(1-exp(y[,1]^beta))))
r <- (1-y[,2])*log(exp(lambda*(1-exp(y[,1]^beta))))
f <- sum(e + r)
return(f)
}
mle <- maxLik(LLF, y=data,start = c(para = c(0.02, 0.5))) ### Obtain MLE based on the simulated data
lambda_hat[i] <- mle$estimate[1] #estimate for parameter 1
beta_hat[i] <- mle$estimate[2] #estimate for parameter 2
est<-rbind(est,mle$estimate)
### statistic function for jackknife()
jack<-matrix(0, nrow = n, ncol = 2)
for(i in 1:n){
fit.jack<-maxLik(logLik=LLF,y=data[-i,],method="NR",start=c(0.02, 0.5))
jack[i,]<-coef(fit.jack) #delete-one estimates
}
estjack<-rbind(jack)
meanlambda = mean(estjack[,1])
meanbeta = mean(estjack[,2])
lambda_jk[i] =lambda_hat[i]-(n-1)*(meanlambda-lambda_hat[i]) #jackknife estimate
beta_jk[i] = beta_hat[i]-(n-1)*(meanbeta-beta_hat[i])
SElambda<-sqrt(var(estjack[,1])/n-1) #std error
SEbeta<-sqrt(var(estjack[,2])/n-1)
#confidence interval
jack_lambda[i,] <- lambda_jk[i]+c(-1,1)*qt((1-alpha)/2,n-1)*SElambda
jack_beta[i,] <- beta_jk[i]+c(-1,1)*qt((1-alpha)/2,n-1)*SEbeta
}
(I am very appreciate with any ideas)

Optimising nested for loops in R

I tried to speed the below code but without any success.
I read about Rfast package but I also fail in implementing that package.
Is there any way to optimise the following code in R?
RI<-function(y,x,a,mu,R=500,t=500){
x <- as.matrix(x)
dm <- dim(x)
n <- dm[1]
bias1 <- bias2 <- bias3 <- numeric(t)
b1 <- b2<- b3 <- numeric(R)
### Outliers in Y ######
for (j in 1:t) {
for (i in 1:R) {
id <- sample(n, a * n)
z <- y
z[id] <- rnorm(id, mu)
b1[i] <- var(coef(lm(z ~., data = as.data.frame(x))))
b2[i] <- var(coef(rlm(z ~ ., data = data.frame(x), maxit = 2000, method = "MM")))
b3[i] <- var(coef(rlm(z ~ ., data = data.frame(x), psi = psi.huber,maxit = 300)))
}
bias1[j] <- sum(b1) ; bias2[j] <- sum(b2); bias3[j] <- sum(b3)
}
bias <- cbind("lm" = bias1,"MM-rlm" = bias2, "H-rlm" = bias3)
colMeans(bias)
}
#######################################
p <- 5
n <- 200
x<- matrix(rnorm(n * p), ncol = p)
y<-rnorm(n)
a=0.2
mu <-10
#######################################
RI(y,x,a,mu)

Objective function in optim evaluates to length 3 not 1

I am new to R and trying to find the optimal values of 3 parameters via indirect inference from a simulated panel data set, but getting an error "objective function in optim evaluates to length 3 not 1". I tried to check past posts, but the one I found didn't address the problem I am facing.
The code works if I only try for one parameter instead of 3. Here is the code:
#Generating data
modelp <- function(Y,alpha,N,T){
Yt <- Y[,2:T]
Ylag <- Y[,1:(T-1)]
Alpha <- alpha[,2:T]
yt <- matrix(t(Yt), (T-1)*N, 1)
ylag <- matrix(t(Ylag), (T-1)*N, 1)
alph <- matrix(t(Alpha), (T-1)*N, 1)
rho.ind <- rep(NA,N)
sigma_u <- rep(NA,N)
sigma_a <- rep(NA,N)
for(n in 1:N){
sigma_u[n] <- sigma(lm(yt~alph+ylag))
sigma_a[n] <- lm(yt~alph+ylag)$coef[2] #
(diag(vcov((lm(yt~alph+ylag)$coef),complete=TRUE)))[2] #
rho.ind[n] <- lm(yt~alph+ylag)$coef[3]
}
param <- matrix(NA,1,3)
param[1]<- mean(sum(rho.ind))
param[2]<- mean(sum(sigma_u))
param[3]<- mean(sum(sigma_a))
return(param)
}
## Function to estimate parameters
H.theta <- function(param.s){
set.seed(tmp.seed) #set seed
param.s.tmp <- matrix(0,1,3)
for(s in 1:H){
eps.s <- matrix(rnorm(N*T), N, T) #white noise erros
eps0.s <- matrix(rnorm(N*T), N, 1) #error for initial condition
alph.s <- matrix(rnorm(N*T),N,T)
Y.s <- matrix( 0, N, T)
ys.lag <- eps0.s
for(t in 1:T){ #Simulating the AR(1) process data
ys <- alph.s[,t]+param.s[1] * ys.lag + eps.s[,t] # [n,1:t]
Y.s[,t] <- ys
ys.lag <- ys
}
param.s.tmp <- param.s.tmp + modelp(Y.s, alph.s,N, T)
param.s[2] <- param.s.tmp[2]
param.s[3] <- mean(var(alph.s)) #param.s.tmp[3]
}
return( (param.data - param.s.tmp/H)^2 )
#return(param.s[1])
}
#Results for T = 10 & H = 10, N=100
nrep <-10
rho <-0.9
sigma_u <- 1
sigma_a <- 1.5
param <- matrix(NA,1,3)
param[1] <- rho
param[2] <- sigma_u
param[3] <- sigma_u
s.mu <- 0 # Mean
s.ep <- 0.5 #White Noise -initial conditions
Box <- cbind(rep(100,1),c(20),rep(c(5),1))
r.simu.box <- matrix(0,nrep,nrow(Box))
r.data.box <- matrix(0,nrep,nrow(Box))
for(k in 1:nrow(Box)){
N <- Box[k,1] #Number of individuals in panel
T <- Box[k,2] #Length of Panel
H <- Box[k,3] # Number of simulation paths
p.data <-matrix(NA,nrep,3)
p.simu <-matrix(NA,nrep,3)
est <- matrix(NA,1,3)
for(i in 1:nrep){
mu <- matrix(rnorm(N )*s.mu, N, 1)
eps <- matrix(rnorm(N*T)*s.ep, N, T)
eps0 <- matrix(rnorm(N*T)*s.ep, N, 1)
alph <- matrix(rnorm(N ), N, T)
Y <- matrix( 0, N, T)
y.lag <- (1-param[1])*mu + eps0
for(t in 1:T){
y <- alph[,t]+param[1]*y.lag +eps[,t]
Y[,t] <- y
y.lag <- y
}
param.data <- modelp(Y,alph,N,T) #Actual data
p.data[i,1:3] <- param.data
tmp.seed <- 3864+i+100*(k-1) #Simulated data
x0 <- c(0.5, 0,0)
est[i] <- optim(x0, H.theta,method = "BFGS", hessian = TRUE)$par
p.simu[i,1:3] <- est[i]
if(i%%10==0) print(c("Finished the (",i,")-th replication"))
}
}
mean(p.data[,1])- mean(p.simu[,1])
mean(p.data[,2])- mean(p.simu[,2])
sqrt(mean((p.data[1]-p.simu[1])^2))
I expect to get three values. Any help or suggestion will be greatly appreciated.

Coverage probability for an unspecified CDF

I used the following r code to determine the coverage probability.
theta <- seq(0,1, length = 100)
CD_theta <- function(y, p, n){
1 - pbinom (y, size = n, prob = p) + 1/2*dbinom(y, size = n, prob = p)
}
y <- 5
n <- 100
phat <- y/n
mytheta <- CD_theta(5, theta, 100)
set.seed(650)
ci <- list()
n <- 100
B <- 1000
result = rep(NA, B)
all_confInt <- function(B) {
for (i in 1:B){
boot.sample <- sample(mytheta, replace = TRUE)
lower <- theta[which.min(abs(boot.sample - .025))]
upper <- theta[which.min(abs(boot.sample - .975))]
ci[[i]] <- data.frame(lowerCI = lower, upperCI = upper)
intervals <- unlist(ci)
}
return(intervals)
}
df <- data.frame(matrix(all_confInt(B), nrow=B, byrow=T))
colnames(df)[1] <- "Lower"
colnames(df)[2] <- "Upper"
names(df)
dim(df)
mean(df$Lower < phat & df$Upper > phat)*100
However, I obtained 6.4% which is too low. Why am I getting really lower percentage?. Is there any problem in the r function?

Resources