Coverage probability for an unspecified CDF - r

I used the following r code to determine the coverage probability.
theta <- seq(0,1, length = 100)
CD_theta <- function(y, p, n){
1 - pbinom (y, size = n, prob = p) + 1/2*dbinom(y, size = n, prob = p)
}
y <- 5
n <- 100
phat <- y/n
mytheta <- CD_theta(5, theta, 100)
set.seed(650)
ci <- list()
n <- 100
B <- 1000
result = rep(NA, B)
all_confInt <- function(B) {
for (i in 1:B){
boot.sample <- sample(mytheta, replace = TRUE)
lower <- theta[which.min(abs(boot.sample - .025))]
upper <- theta[which.min(abs(boot.sample - .975))]
ci[[i]] <- data.frame(lowerCI = lower, upperCI = upper)
intervals <- unlist(ci)
}
return(intervals)
}
df <- data.frame(matrix(all_confInt(B), nrow=B, byrow=T))
colnames(df)[1] <- "Lower"
colnames(df)[2] <- "Upper"
names(df)
dim(df)
mean(df$Lower < phat & df$Upper > phat)*100
However, I obtained 6.4% which is too low. Why am I getting really lower percentage?. Is there any problem in the r function?

Related

Confidence interval in R for 1000 times

I have run the code below to obtain 1000 confidence intervals but it doesn't give an output for lambda_jk and beta_jk. And hence I cannot obtain the jack_lambda and jack_beta.
library(bootstrap)
library(maxLik)
est<-NULL
set.seed(20)
lambda <- 0.02
beta <- 0.5
alpha <- 0.10
n <- 40
N <- 1000
lambda_hat <- NULL
beta_hat <- NULL
lambda_jk<-NULL
beta_jk<-NULL
cp <- NULL
jack_lambda <- matrix(NA, nrow = N, ncol = 2)
jack_beta <- matrix(NA, nrow = N, ncol = 2)
for(i in 1:N){
u <- runif(n)
c_i <- rexp(n, 0.0001)
t_i <- (log(1 - (1 / lambda) * log(1 - u))) ^ (1 / beta)
s_i <- 1 * (t_i < c_i)
t <- pmin(t_i, c_i)
data<- data.frame(t,s_i)
LLF <- function(para,y) {
lambda <- para[1]
beta <- para[2]
e <- y[,2]*log(lambda*y[,1]^(beta-1)*beta*exp(y[,1]^beta)*exp(lambda*(1-exp(y[,1]^beta))))
r <- (1-y[,2])*log(exp(lambda*(1-exp(y[,1]^beta))))
f <- sum(e + r)
return(f)
}
mle <- maxLik(LLF, y=data,start = c(para = c(0.02, 0.5))) ### Obtain MLE based on the simulated data
lambda_hat[i] <- mle$estimate[1] #estimate for parameter 1
beta_hat[i] <- mle$estimate[2] #estimate for parameter 2
est<-rbind(est,mle$estimate)
### statistic function for jackknife()
jack<-matrix(0, nrow = n, ncol = 2)
for(i in 1:n){
fit.jack<-maxLik(logLik=LLF,y=data[-i,],method="NR",start=c(0.02, 0.5))
jack[i,]<-coef(fit.jack) #delete-one estimates
}
estjack<-rbind(jack)
meanlambda = mean(estjack[,1])
meanbeta = mean(estjack[,2])
lambda_jk[i] =lambda_hat[i]-(n-1)*(meanlambda-lambda_hat[i]) #jackknife estimate
beta_jk[i] = beta_hat[i]-(n-1)*(meanbeta-beta_hat[i])
SElambda<-sqrt(var(estjack[,1])/n-1) #std error
SEbeta<-sqrt(var(estjack[,2])/n-1)
#confidence interval
jack_lambda[i,] <- lambda_jk[i]+c(-1,1)*qt((1-alpha)/2,n-1)*SElambda
jack_beta[i,] <- beta_jk[i]+c(-1,1)*qt((1-alpha)/2,n-1)*SEbeta
}
(I am very appreciate with any ideas)

Jackknife in R to obtain interval estimates

I have a question on how to use the jackknife using the bootstrap package. I want to obtain the interval estimate for the jackknife method.
I've tried running the code below, but no results for my parameter estimate.
rm(list=ls())
library(bootstrap)
library(maxLik)
set.seed(20)
lambda <- 0.02
beta <- 0.5
alpha <- 0.10
n <- 40
N <- 1000
lambda_hat <- NULL
beta_hat <- NULL
cp <- NULL
jack_lambda <- matrix(NA, nrow = N, ncol = 2)
jack_beta <- matrix(NA, nrow = N, ncol = 2)
### group all data frame generated from for loop into a list of data frame
data_full <- list()
for(i in 1:N){
u <- runif(n)
c_i <- rexp(n, 0.0001)
t_i <- (log(1 - (1 / lambda) * log(1 - u))) ^ (1 / beta)
s_i <- 1 * (t_i < c_i)
t <- pmin(t_i, c_i)
data_full[[i]] <- data.frame(u, t_i, c_i, s_i, t)
}
### statistic function for jackknife()
estjack <- function(data, j) {
data <- data[j, ]
data0 <- data[which(data$s_i == 0), ] #uncensored data
data1 <- data[which(data$s_i == 1), ] #right censored data
data
LLF <- function(para) {
t1 <- data$t_i
lambda <- para[1]
beta <- para[2]
e <- s_i*log(lambda*t1^(beta-1)*beta*exp(t1^beta)*exp(lambda*(1-exp(t1^beta))))
r <- (1-s_i)*log(exp(lambda*(1-exp(t1^beta))))
f <- sum(e + r)
return(f)
}
mle <- maxLik(LLF, start = c(para = c(0.02, 0.5)))
lambda_hat[i] <- mle$estimate[1]
beta_hat[i] <- mle$estimate[2]
return(c(lambda_hat[i], beta_hat[i]))
}
jackknife_resample<-list()
for(i in 1:N) {
jackknife_resample[[i]]<-data_full[[i]][-i]
results <- jackknife(jackknife_resample, estjack,R=1000)
jack_lambda[i,]<-lambda_hat[i]+c(-1,1)*qt(alpha/2,n-1,lower.tail = FALSE)*results$jack.se
jack_beta[i,]<-beta_hat[i]+c(-1,1)*qt(alpha/2,n-1,lower.tail = FALSE)*results$jack.se
}```
I couldn't get the parameter estimate that run in MLE and hence couldn't proceed to the next step. Can anyone help?

Deep NN for multivariate regression

I implemented this simple NN but even when making it do all the interactions it fails to converge and the MSE remains very high
I tried to change the number of iterations and the learning rate but it doesn't work
rm(list=ls())
data <- read.csv("C:/Users/Mikele/Documents/Uni/IA AI & Machine Learning/R/11_23_2018/wine.csv",sep = ',',header = FALSE)
x <- data[,1:11]
y <- as.matrix(data[,12])
y_matrix <- matrix(rep(0,length(y)),nrow = length(y), ncol = 6)
k <-1
for (w in 1:length(y))
{
temp <- y[k] - 2
y_matrix[k,temp] <-1
k <- k + 1
}
hl <- c(40, 30, 20)
iter <- 1000
lr <- 0.1
## add in intercept
x_1 <- as.matrix(cbind(rep(1, nrow(x)),x))
## set error array
error <- rep(0, iter)
## set up weights
## the +1 is to add in the intercept/bias parameter
W1 <- matrix(runif(ncol(x_1)*hl[1], -1, 1), nrow = ncol(x_1))
W2 <- matrix(runif((hl[1]+1)*hl[2], -1, 1), nrow = hl[1]+1)
W3 <- matrix(runif((hl[2]+1)*hl[3], -1, 1), nrow = hl[2]+1)
W4 <- matrix(runif((hl[3]+1)*ncol(y), -1, 1), nrow = hl[3]+1)
for(k in 1:iter)
{
# calculate the hidden and output layers using X and hidden layer as inputs
# hidden layer 1 and 2 have a column of ones appended for the bias term
hidden1 <- cbind(matrix(1, nrow = nrow(x_1)), sigm(x_1 %*% W1))
hidden2 <- cbind(matrix(1, nrow = nrow(x_1)), sigm(hidden1 %*% W2))
hidden3 <- cbind(matrix(1, nrow = nrow(x_1)), sigm(hidden2 %*% W3))
y_hat <- sigm(hidden3 %*% W4)
# calculate the gradient and back prop the errors
# see theory above
y_hat_del <- (y-y_hat)*(d.sigm(y_hat))
hidden3_del <- y_hat_del %*% t(W4)*d.sigm(hidden3)
hidden2_del <- hidden3_del[,-1] %*% t(W3)*d.sigm(hidden2)
hidden1_del <- hidden2_del[,-1] %*% t(W2)*d.sigm(hidden1)
# update the weights
W4 <- W4 + lr*t(hidden3) %*% y_hat_del
W3 <- W3 + lr*t(hidden2) %*% hidden3_del[,-1]
W2 <- W2 + lr*t(hidden1) %*% hidden2_del[,-1]
W1 <- W1 + lr*t(x_1) %*% hidden1_del[,-1]
error[k] <- 1/nrow(y)*sum((y-y_hat)^2)
if((k %% (10^4+1)) == 0) cat("mse:", error[k], "\n")
}
# plot loss
xvals <- seq(1, iter, length = 100)
print(qplot(xvals, error[xvals], geom = "line", main = "MSE", xlab = "Iteration"))
no error message but I can't understand how to make a deep NN for
Multivariate Linear Regression
in addition I divided the ys into a 6-column matrix (the maximum and minimum of the initial dataset) now there is someone who can help me understand why not cover and in any case the final results are all concentrated on column 4?

Objective function in optim evaluates to length 3 not 1

I am new to R and trying to find the optimal values of 3 parameters via indirect inference from a simulated panel data set, but getting an error "objective function in optim evaluates to length 3 not 1". I tried to check past posts, but the one I found didn't address the problem I am facing.
The code works if I only try for one parameter instead of 3. Here is the code:
#Generating data
modelp <- function(Y,alpha,N,T){
Yt <- Y[,2:T]
Ylag <- Y[,1:(T-1)]
Alpha <- alpha[,2:T]
yt <- matrix(t(Yt), (T-1)*N, 1)
ylag <- matrix(t(Ylag), (T-1)*N, 1)
alph <- matrix(t(Alpha), (T-1)*N, 1)
rho.ind <- rep(NA,N)
sigma_u <- rep(NA,N)
sigma_a <- rep(NA,N)
for(n in 1:N){
sigma_u[n] <- sigma(lm(yt~alph+ylag))
sigma_a[n] <- lm(yt~alph+ylag)$coef[2] #
(diag(vcov((lm(yt~alph+ylag)$coef),complete=TRUE)))[2] #
rho.ind[n] <- lm(yt~alph+ylag)$coef[3]
}
param <- matrix(NA,1,3)
param[1]<- mean(sum(rho.ind))
param[2]<- mean(sum(sigma_u))
param[3]<- mean(sum(sigma_a))
return(param)
}
## Function to estimate parameters
H.theta <- function(param.s){
set.seed(tmp.seed) #set seed
param.s.tmp <- matrix(0,1,3)
for(s in 1:H){
eps.s <- matrix(rnorm(N*T), N, T) #white noise erros
eps0.s <- matrix(rnorm(N*T), N, 1) #error for initial condition
alph.s <- matrix(rnorm(N*T),N,T)
Y.s <- matrix( 0, N, T)
ys.lag <- eps0.s
for(t in 1:T){ #Simulating the AR(1) process data
ys <- alph.s[,t]+param.s[1] * ys.lag + eps.s[,t] # [n,1:t]
Y.s[,t] <- ys
ys.lag <- ys
}
param.s.tmp <- param.s.tmp + modelp(Y.s, alph.s,N, T)
param.s[2] <- param.s.tmp[2]
param.s[3] <- mean(var(alph.s)) #param.s.tmp[3]
}
return( (param.data - param.s.tmp/H)^2 )
#return(param.s[1])
}
#Results for T = 10 & H = 10, N=100
nrep <-10
rho <-0.9
sigma_u <- 1
sigma_a <- 1.5
param <- matrix(NA,1,3)
param[1] <- rho
param[2] <- sigma_u
param[3] <- sigma_u
s.mu <- 0 # Mean
s.ep <- 0.5 #White Noise -initial conditions
Box <- cbind(rep(100,1),c(20),rep(c(5),1))
r.simu.box <- matrix(0,nrep,nrow(Box))
r.data.box <- matrix(0,nrep,nrow(Box))
for(k in 1:nrow(Box)){
N <- Box[k,1] #Number of individuals in panel
T <- Box[k,2] #Length of Panel
H <- Box[k,3] # Number of simulation paths
p.data <-matrix(NA,nrep,3)
p.simu <-matrix(NA,nrep,3)
est <- matrix(NA,1,3)
for(i in 1:nrep){
mu <- matrix(rnorm(N )*s.mu, N, 1)
eps <- matrix(rnorm(N*T)*s.ep, N, T)
eps0 <- matrix(rnorm(N*T)*s.ep, N, 1)
alph <- matrix(rnorm(N ), N, T)
Y <- matrix( 0, N, T)
y.lag <- (1-param[1])*mu + eps0
for(t in 1:T){
y <- alph[,t]+param[1]*y.lag +eps[,t]
Y[,t] <- y
y.lag <- y
}
param.data <- modelp(Y,alph,N,T) #Actual data
p.data[i,1:3] <- param.data
tmp.seed <- 3864+i+100*(k-1) #Simulated data
x0 <- c(0.5, 0,0)
est[i] <- optim(x0, H.theta,method = "BFGS", hessian = TRUE)$par
p.simu[i,1:3] <- est[i]
if(i%%10==0) print(c("Finished the (",i,")-th replication"))
}
}
mean(p.data[,1])- mean(p.simu[,1])
mean(p.data[,2])- mean(p.simu[,2])
sqrt(mean((p.data[1]-p.simu[1])^2))
I expect to get three values. Any help or suggestion will be greatly appreciated.

Finding a single missing value that yields a correlation value

I'm trying to impute a missing value of one variable such that it'll yield a given correlation value
library(MASS)
mat <- mvrnorm(49, mu = c(0,5), Sigma = matrix(c(1,0.05,.05,1), ncol = 2), empirical = TRUE)
cor50row <- function(x,y, rho){
y_lnt <- length(y)
x[length(x) +1] <- mean(x)
val <- seq(-1000,0, .01)
for(indx in val){
y[y_lnt + 1] <- indx
if(rho - cor(x,y) < 1e-6){
break
}
}
return(cbind(x,y))
}
a <- cor50row(x = mat[,1], y= mat[,2], rho = .06)
So the idea is to find the missing value of the y variable that increases the correlation by .01

Resources