how to get each output of iteration - r

i have to do 1000 iteration for this SIMPLS function to get the value of the coefficient. my problem is how to get the value of the coefficient for each iteration? can I print the output for iteration?
n = 10
k = 20
a = 2
coef = matrix(0,nrow=20, ncol=10)
for (i in 1:1000) {
t[,i] = matrix(rnorm(n%*%a,0,1), ncol=a) # n x a
p[,i] = matrix(rnorm(k%*%a,0,1), ncol=a) # k x a
B[,i] = matrix(rnorm(k,0,0.001), nrow=k, ncol=1) # k x 1
e[,i] = matrix(rcauchy(n,location=0,scale=1), nrow=n, ncol=1)##standard cauchy
x[,i] = t%*%t(p) ## explanatary variable xi
y[,i] = (t%*%(t(p)%*%B)) + e ## response variable yi
simpls <- function(y, x, a) {
n <- nrow(x)
k <- ncol(x)
m <- NCOL(y)
y <- matrix(y)
Ps <- matrix(0, k, a)
Cs <- matrix(0, m, a)
Rs <- matrix(0, k, a)
Ts <- matrix(0, n, a)
mx <- apply(x, 2, mean)
sdx <- apply(x, 2, sd)
x <- sapply(1:k, function(i) (x[,i]-mx[i]))
my <- apply(y, 2, mean)
sdy <- apply(y, 2, sd)
y <- sapply(1:m, function(i) (y[,i]-my[i]))
S <- t(x)%*%y
Snew <- S
for (i in 1:a) {
rs <- svd(Snew)$u[,1,drop=FALSE]
rs <- rs/norm(rs,type="F")
ts <- x%*%rs
ts <- ts/norm(ts,type="F")
ps <- t(x)%*%ts
cs <- t(y)%*%ts
Rs[,i] <- rs
Ts[,i] <- ts
Ps[,i] <- ps
Cs[,i] <- cs
Snew <- Snew-Ps[,1:i]%*%solve(t(Ps[,1:i])%*%Ps[,1:i])%*%t(Ps[,1:i])%*%Snew
}
coef[,i] <- matrix(drop(Rs%*%(solve(t(Ps)%*%Rs)%*%t(Cs))))
yfit <- x%*%coef
orgyfit <- yfit+my
res <- y-yfit
SSE <- sum((y-yfit)^2)
scale <- sqrt(SSE/(n-a))
stdres <- sapply(1:m, function(i) (res[,i]-mean(res[,i]))/sqrt(var(res[,i])))
hatt <- diag(Ts%*%solve(t(Ts)%*%Ts)%*%t(Ts))
result <- list(coef=coef, fit=orgyfit, res=res, SSE=SSE,scale=scale, stdres=stdres, leverage=hatt,Ts=Ts,Rs=Rs,Ps=Ps,Cs=Cs)
}
}
print(coef)

You can just add your coef to a vector for every iteration. I've created an example here:
coef_vector <- NULL
for (i in 1:10) {
loop_coef <- i*2
coef_vector <- c(coef_vector, loop_coef)
}
Result:
> coef_vector
[1] 2 4 6 8 10 12 14 16 18 20
>
Of course, if your coef is more complex than a variable, you can add it to a list instead of a vector.

Related

Why do I get the error "number of items to replace is not a multiple of replacement length" when running the sppba function of the WRS2 package?

I would be super grateful for some help. I don't have a coding background and I am confused by the error message I am getting when running the sppb functions of the WRS2 package. These functions perform a robust mixed ANOVA using bootstrapping.
sppba(formula = score ~ my_between_variable * my_within_variable, id = participant_code, data = df_long_T2)
Error in xmat[, k] <- x[[kv]] :
number of items to replace is not a multiple of replacement length
I get the same error for all three sppb functions. The functions look the same except that instead of sppba the others say sppbb and sppbi. I don't even know what the functions are trying to replace. The functions work for me with other data.
The classes of all the things involved seem fine: score is numeric, order_supplement and time are factors, participant_code is character, df_long_T2 is a dataframe. I have 120 participants, 61 in one group and 59 in the other, with two observations per participant. There are no NAs in the columns involved.
Traceback() just gives me the one line of code above and the error message.
Debug() gives me this and I don't know what to make of it:
"Debug location is approximate because location is not available"
function (formula, id, data, est = "mom", avg = TRUE, nboot = 500,
MDIS = FALSE, ...)
{
if (missing(data)) {
mf <- model.frame(formula)
}
else {
mf <- model.frame(formula, data)
}
cl <- match.call()
est <- match.arg(est, c("mom", "onestep", "median"), several.ok = FALSE)
mf1 <- match.call()
m <- match(c("formula", "data", "id"), names(mf1), 0L)
mf1 <- mf1[c(1L, m)]
mf1$drop.unused.levels <- TRUE
mf1[[1L]] <- quote(stats::model.frame)
mf1 <- eval(mf1, parent.frame())
random1 <- mf1[, "(id)"]
depvar <- colnames(mf)[1]
if (all(length(table(random1)) == table(mf[, 3]))) {
ranvar <- colnames(mf)[3]
fixvar <- colnames(mf)[2]
}
else {
ranvar <- colnames(mf)[2]
fixvar <- colnames(mf)[3]
}
MC <- FALSE
K <- length(table(mf[, ranvar]))
J <- length(table(mf[, fixvar]))
p <- J * K
grp <- 1:p
est <- get(est)
fixsplit <- split(mf[, depvar], mf[, fixvar])
indsplit <- split(mf[, ranvar], mf[, fixvar])
dattemp <- mapply(split, fixsplit, indsplit, SIMPLIFY = FALSE)
data <- do.call(c, dattemp)
x <- data
jp <- 1 - K
kv <- 0
kv2 <- 0
for (j in 1:J) {
jp <- jp + K
xmat <- matrix(NA, ncol = K, nrow = length(x[[jp]]))
for (k in 1:K) {
kv <- kv + 1
xmat[, k] <- x[[kv]]
}
xmat <- elimna(xmat)
for (k in 1:K) {
kv2 <- kv2 + 1
x[[kv2]] <- xmat[, k]
}
}
xx <- x
nvec <- NA
jp <- 1 - K
for (j in 1:J) {
jp <- jp + K
nvec[j] <- length(x[[jp]])
}
bloc <- matrix(NA, nrow = J, ncol = nboot)
mvec <- NA
ik <- 0
for (j in 1:J) {
x <- matrix(NA, nrow = nvec[j], ncol = K)
for (k in 1:K) {
ik <- ik + 1
x[, k] <- xx[[ik]]
if (!avg)
mvec[ik] <- est(xx[[ik]])
}
tempv <- apply(x, 2, est)
data <- matrix(sample(nvec[j], size = nvec[j] * nboot,
replace = TRUE), nrow = nboot)
bvec <- matrix(NA, ncol = K, nrow = nboot)
for (k in 1:K) {
temp <- x[, k]
bvec[, k] <- apply(data, 1, rmanogsub, temp, est)
}
if (avg) {
mvec[j] <- mean(tempv)
bloc[j, ] <- apply(bvec, 1, mean)
}
if (!avg) {
if (j == 1)
bloc <- bvec
if (j > 1)
bloc <- cbind(bloc, bvec)
}
}
if (avg) {
d <- (J^2 - J)/2
con <- matrix(0, J, d)
id <- 0
Jm <- J - 1
for (j in 1:Jm) {
jp <- j + 1
for (k in jp:J) {
id <- id + 1
con[j, id] <- 1
con[k, id] <- 0 - 1
}
}
}
if (!avg) {
MJK <- K * (J^2 - J)/2
JK <- J * K
MJ <- (J^2 - J)/2
cont <- matrix(0, nrow = J, ncol = MJ)
ic <- 0
for (j in 1:J) {
for (jj in 1:J) {
if (j < jj) {
ic <- ic + 1
cont[j, ic] <- 1
cont[jj, ic] <- 0 - 1
}
}
}
tempv <- matrix(0, nrow = K - 1, ncol = MJ)
con1 <- rbind(cont[1, ], tempv)
for (j in 2:J) {
con2 <- rbind(cont[j, ], tempv)
con1 <- rbind(con1, con2)
}
con <- con1
if (K > 1) {
for (k in 2:K) {
con1 <- push(con1)
con <- cbind(con, con1)
}
}
}
if (!avg)
bcon <- t(con) %*% t(bloc)
if (avg)
bcon <- t(con) %*% (bloc)
tvec <- t(con) %*% mvec
tvec <- tvec[, 1]
tempcen <- apply(bcon, 1, mean)
vecz <- rep(0, ncol(con))
bcon <- t(bcon)
temp = bcon
for (ib in 1:nrow(temp)) temp[ib, ] = temp[ib, ] - tempcen +
tvec
bcon <- rbind(bcon, vecz)
if (!MDIS) {
if (!MC)
dv = pdis(bcon, center = tvec)
}
if (MDIS) {
smat <- var(temp)
bcon <- rbind(bcon, vecz)
chkrank <- qr(smat)$rank
if (chkrank == ncol(smat))
dv <- mahalanobis(bcon, tvec, smat)
if (chkrank < ncol(smat)) {
smat <- ginv(smat)
dv <- mahalanobis(bcon, tvec, smat, inverted = T)
}
}
bplus <- nboot + 1
sig.level <- 1 - sum(dv[bplus] >= dv[1:nboot])/nboot
tvec1 <- data.frame(Estimate = tvec)
if (avg) {
tnames <- apply(combn(levels(mf[, fixvar]), 2), 2, paste0,
collapse = "-")
rownames(tvec1) <- tnames
}
else {
fixcomb <- apply(combn(levels(mf[, fixvar]), 2), 2,
paste0, collapse = "-")
rnames <- levels(mf[, ranvar])
tnames <- as.vector(t(outer(rnames, fixcomb, paste)))
rownames(tvec1) <- tnames
}
result <- list(test = tvec1, p.value = sig.level, contrasts = con,
call = cl)
class(result) <- c("spp")
result
}
I expected to get an output like this:
## Test statistics:
## Estimate
## time1-time2 0.3000
##
## Test whether the corrresponding population parameters are the same:
## p-value: 0.37

Jackknife in R to obtain interval estimates

I have a question on how to use the jackknife using the bootstrap package. I want to obtain the interval estimate for the jackknife method.
I've tried running the code below, but no results for my parameter estimate.
rm(list=ls())
library(bootstrap)
library(maxLik)
set.seed(20)
lambda <- 0.02
beta <- 0.5
alpha <- 0.10
n <- 40
N <- 1000
lambda_hat <- NULL
beta_hat <- NULL
cp <- NULL
jack_lambda <- matrix(NA, nrow = N, ncol = 2)
jack_beta <- matrix(NA, nrow = N, ncol = 2)
### group all data frame generated from for loop into a list of data frame
data_full <- list()
for(i in 1:N){
u <- runif(n)
c_i <- rexp(n, 0.0001)
t_i <- (log(1 - (1 / lambda) * log(1 - u))) ^ (1 / beta)
s_i <- 1 * (t_i < c_i)
t <- pmin(t_i, c_i)
data_full[[i]] <- data.frame(u, t_i, c_i, s_i, t)
}
### statistic function for jackknife()
estjack <- function(data, j) {
data <- data[j, ]
data0 <- data[which(data$s_i == 0), ] #uncensored data
data1 <- data[which(data$s_i == 1), ] #right censored data
data
LLF <- function(para) {
t1 <- data$t_i
lambda <- para[1]
beta <- para[2]
e <- s_i*log(lambda*t1^(beta-1)*beta*exp(t1^beta)*exp(lambda*(1-exp(t1^beta))))
r <- (1-s_i)*log(exp(lambda*(1-exp(t1^beta))))
f <- sum(e + r)
return(f)
}
mle <- maxLik(LLF, start = c(para = c(0.02, 0.5)))
lambda_hat[i] <- mle$estimate[1]
beta_hat[i] <- mle$estimate[2]
return(c(lambda_hat[i], beta_hat[i]))
}
jackknife_resample<-list()
for(i in 1:N) {
jackknife_resample[[i]]<-data_full[[i]][-i]
results <- jackknife(jackknife_resample, estjack,R=1000)
jack_lambda[i,]<-lambda_hat[i]+c(-1,1)*qt(alpha/2,n-1,lower.tail = FALSE)*results$jack.se
jack_beta[i,]<-beta_hat[i]+c(-1,1)*qt(alpha/2,n-1,lower.tail = FALSE)*results$jack.se
}```
I couldn't get the parameter estimate that run in MLE and hence couldn't proceed to the next step. Can anyone help?

Sample function in R vector combining

I need your help, I need to combine two vectors(z and Num1 or Num2), so z will 10 in final vector and Num1(Num2) was 90 in final vector.
Code that I have now:
I <- seq(1:100)
NA1<-vector()
NA2<-vector()
z <- rep(NA, 10)
Num1 <- rnorm(100)
Num2 <- rnorm(100)
vect_1 <- sample(c(Num1, z))
vect_2 <- sample(c(Num2, z))
vect_1_NA <- is.na(vect_1)
vect_2_NA <- is.na(vect_2)
for(i in I){
if(vect_1_NA[i] == TRUE)
NA1 <- append(NA1, i)
}
for(i in I){
if(vect_2_NA[i] == TRUE)
NA2 <- append(NA2, i)
}

Objective function in optim evaluates to length 3 not 1

I am new to R and trying to find the optimal values of 3 parameters via indirect inference from a simulated panel data set, but getting an error "objective function in optim evaluates to length 3 not 1". I tried to check past posts, but the one I found didn't address the problem I am facing.
The code works if I only try for one parameter instead of 3. Here is the code:
#Generating data
modelp <- function(Y,alpha,N,T){
Yt <- Y[,2:T]
Ylag <- Y[,1:(T-1)]
Alpha <- alpha[,2:T]
yt <- matrix(t(Yt), (T-1)*N, 1)
ylag <- matrix(t(Ylag), (T-1)*N, 1)
alph <- matrix(t(Alpha), (T-1)*N, 1)
rho.ind <- rep(NA,N)
sigma_u <- rep(NA,N)
sigma_a <- rep(NA,N)
for(n in 1:N){
sigma_u[n] <- sigma(lm(yt~alph+ylag))
sigma_a[n] <- lm(yt~alph+ylag)$coef[2] #
(diag(vcov((lm(yt~alph+ylag)$coef),complete=TRUE)))[2] #
rho.ind[n] <- lm(yt~alph+ylag)$coef[3]
}
param <- matrix(NA,1,3)
param[1]<- mean(sum(rho.ind))
param[2]<- mean(sum(sigma_u))
param[3]<- mean(sum(sigma_a))
return(param)
}
## Function to estimate parameters
H.theta <- function(param.s){
set.seed(tmp.seed) #set seed
param.s.tmp <- matrix(0,1,3)
for(s in 1:H){
eps.s <- matrix(rnorm(N*T), N, T) #white noise erros
eps0.s <- matrix(rnorm(N*T), N, 1) #error for initial condition
alph.s <- matrix(rnorm(N*T),N,T)
Y.s <- matrix( 0, N, T)
ys.lag <- eps0.s
for(t in 1:T){ #Simulating the AR(1) process data
ys <- alph.s[,t]+param.s[1] * ys.lag + eps.s[,t] # [n,1:t]
Y.s[,t] <- ys
ys.lag <- ys
}
param.s.tmp <- param.s.tmp + modelp(Y.s, alph.s,N, T)
param.s[2] <- param.s.tmp[2]
param.s[3] <- mean(var(alph.s)) #param.s.tmp[3]
}
return( (param.data - param.s.tmp/H)^2 )
#return(param.s[1])
}
#Results for T = 10 & H = 10, N=100
nrep <-10
rho <-0.9
sigma_u <- 1
sigma_a <- 1.5
param <- matrix(NA,1,3)
param[1] <- rho
param[2] <- sigma_u
param[3] <- sigma_u
s.mu <- 0 # Mean
s.ep <- 0.5 #White Noise -initial conditions
Box <- cbind(rep(100,1),c(20),rep(c(5),1))
r.simu.box <- matrix(0,nrep,nrow(Box))
r.data.box <- matrix(0,nrep,nrow(Box))
for(k in 1:nrow(Box)){
N <- Box[k,1] #Number of individuals in panel
T <- Box[k,2] #Length of Panel
H <- Box[k,3] # Number of simulation paths
p.data <-matrix(NA,nrep,3)
p.simu <-matrix(NA,nrep,3)
est <- matrix(NA,1,3)
for(i in 1:nrep){
mu <- matrix(rnorm(N )*s.mu, N, 1)
eps <- matrix(rnorm(N*T)*s.ep, N, T)
eps0 <- matrix(rnorm(N*T)*s.ep, N, 1)
alph <- matrix(rnorm(N ), N, T)
Y <- matrix( 0, N, T)
y.lag <- (1-param[1])*mu + eps0
for(t in 1:T){
y <- alph[,t]+param[1]*y.lag +eps[,t]
Y[,t] <- y
y.lag <- y
}
param.data <- modelp(Y,alph,N,T) #Actual data
p.data[i,1:3] <- param.data
tmp.seed <- 3864+i+100*(k-1) #Simulated data
x0 <- c(0.5, 0,0)
est[i] <- optim(x0, H.theta,method = "BFGS", hessian = TRUE)$par
p.simu[i,1:3] <- est[i]
if(i%%10==0) print(c("Finished the (",i,")-th replication"))
}
}
mean(p.data[,1])- mean(p.simu[,1])
mean(p.data[,2])- mean(p.simu[,2])
sqrt(mean((p.data[1]-p.simu[1])^2))
I expect to get three values. Any help or suggestion will be greatly appreciated.

How to fix Object(...) not found while it is declared in R

I am trying to take a derivative of a double sum function. I am running into this error:
Error in deriv.f.1(X = X.data, y = y.vec, alpha = alpha.vector[1, ]) :
object 'L_D_grad' not found
I have tried to move the {} brackets around, double check if I missed a closing/opening bracket, if I have extra opening/closing bracket. However, the error still exists.
# Generate Sample Data
gen.sample <- function(n){
x <- rnorm(n,5,10)
y <- ifelse(x < 2.843,1,-1)
return(data.frame(x,y))
}
##
deriv.f.1 <- function(X,y,alpha){
N <- length(X)
L_D_grad < numeric(N)
xy.alpha.sum <- numeric(N)
for(k in 1:N){
for(l in 1:N){
if(l == k){
xy.alpha.sum[l] = 0}
else{
xy.alpha.sum[l] <- alpha[l]*y[k]*y[l]*X[k]*X[l]}
}
L_D_grad[k] <- 1 - sum(xy.alpha.sum) - alpha[k]*(y[k])^2*(X[k])^2
}
return(L_D_grad)
}
## Illustration
set.seed(4997)
options(digits = 4,scipen = -4)
sample.data <- gen.sample(n=N)
X.data <- sample.data$x
y.vec <- sample.data$y
alpha.vector <- matrix(rep(seq(from=-5,to = 5, length.out = N),N*N),
ncol = N, nrow = N, byrow = TRUE)
alpha_vec <- alpha.vector[1,]
deriv.f.1(X = X.data, y = y.vec, alpha = alpha_vec)
Thanks in advance!
Here is my code:
# Generate Sample Data
gen.sample <- function(n){
x <- rnorm(n,5,10)
y <- ifelse(x < 2.843,1,-1)
return(data.frame(x,y))
}
##
deriv.f.1 <- function(X,y,alpha){
N <- length(X)
L_D_grad <- numeric(N)
xy.alpha.sum <- numeric(N)
for(k in 1:N){
for(l in 1:N){
if(l == k){
xy.alpha.sum[l] = 0}
else{
xy.alpha.sum[l] <- alpha[l]*y[k]*y[l]*X[k]*X[l]}
}
L_D_grad[k] <- 1 - sum(xy.alpha.sum) - alpha[k]*(y[k])^2*(X[k])^2
}
return(L_D_grad)
}
## Illustration
set.seed(4997)
options(digits = 4,scipen = -4)
N=10
sample.data <- gen.sample(n=N)
X.data <- sample.data$x
y.vec <- sample.data$y
alpha.vector <- matrix(rep(seq(from=-5,to = 5, length.out = N),N*N),
ncol = N, nrow = N, byrow = TRUE)
alpha_vec <- alpha.vector[1,]
deriv.f.1(X = X.data, y = y.vec, alpha = alpha_vec)
Where:
#sample.data
#x y
#1 -5.303e+00 1
#2 1.493e+01 -1
#3 9.797e+00 -1
#4 1.991e+01 -1
#5 -1.454e+01 1
#6 1.423e+01 -1
#7 1.025e+01 -1
#8 5.455e+00 -1
#9 3.719e+00 -1
#10 2.021e+01 -1
And deriv.f.1(X = X.data, y = y.vec, alpha = alpha_vec)
# -1.271e+01 -3.759e+01 -2.432e+01 -5.046e+01 -3.659e+01 -3.577e+01 -2.548e+01 -1.310e+01
# -8.612e+00 -5.123e+01
I made two changes:
Assign N a value: N=10
Correct assignment form L_D_grad: L_D_grad <- numeric(N)

Resources