I am trying to run a multivariate regression with different layers in a RasterStack using focal {raster} or localFun {raster}. With the help of a similar post and the raster reference manual, my code works fine with single RasterLayers as input (see reproducible, albeit probably 'clunky', example below). However, I would like to do this using different layers in a RasterStack as described in {SECTION2} of the code below.
I would very much appreciate any advice.
Thank you
CODE:
library(raster)
#%%%%%%%%%%%%%%%%%%%%%
## SECTION1
#%%%%%%%%%%%%%%%%%%%%%
# create test data
set.seed(0)
resp = expl = raster(nrow=10, ncol=10)
# response variable
resp = setValues(resp,runif(100,min=15,max=45))
# explanatory variable
expl = setValues(expl,runif(100,min=2,max=6))
expl = expl * resp
resp[1:5] = NA; expl[1:5] = NA # add some NA values
par(mfrow=c(1,2))
plot(resp); plot(expl)
#..............................................................
# check global lm() results
data1.df = na.omit(as.data.frame(stack(list(resp=resp,expl=expl))))
head(data1.df)
data1.lm = lm(resp ~ expl, data=data1.df)
(data1.lmSum = summary(data1.lm))
data1.lmSum$coefficients[1];data1.lmSum$coefficients[2];data1.lmSum$coefficients[8]
data1.lmSum$r.squared
data1.lmSum$sigma
# pf(data1.lmSum$fstatistic[1], data1.lmSum$fstatistic[2], data1.lmSum$fstatistic[3],lower.tail = FALSE)
#..............................................................
# lm function for focal {raster} with RasterLayers
# output coefficients, r-squared, residual standard error and p-value(F stat)
# Calculate focal ("moving window") weight
fw = focalWeight(resp, 2, "Gauss")
# focal regression:
lm.focal = function(x, y, ...) {
if(all(is.na(x) & is.na(y))) {NA}
else {
m = lm(y~x)
summary(m)$r.squared #r-squared
# summary(m)$coefficients #intercept and slope together
#---> Error in setValues(x, value) : cannot use a matrix with these dimensions
# summary(m)$coefficients[1] #intercept
# summary(m)$coefficients[2] #slope
# summary(m)$coefficients[8] #p-value
# summary(m)$sigma #residual standard error
}
}
#---> How to output all at once?
lm.focal.out1 = localFun(resp, expl, w=fw, fun=lm.focal, na.rm=TRUE)
plot(lm.focal.out1)
#%%%%%%%%%%%%%%%%%%%%%
## SECTION2
#%%%%%%%%%%%%%%%%%%%%%
# create test data
set.seed(1)
resp = expl1 = expl2 = expl3 = expl4 = raster(nrow=10, ncol=10)
# x1 response variable
resp = setValues(resp,runif(100,min=15,max=45))
# x3 explanatory variables
expl1 = setValues(expl,runif(100,min=2,max=6))
expl1 = expl1 * resp
expl2 = expl1 * resp/runif(100,min=1,max=4)
expl3 = ((expl1 * resp) / 1.5 )/10
expl4 = ((expl1 * resp) / runif(100,min=0.5,max=2))/100
# add some NA values
resp[1:5] = NA; expl1[1:5] = NA; expl2[1:5] = NA; expl3[1:5] = NA; expl4[1:5] = NA
#stack RasterLayers
stack1 = stack(list(resp=resp,expl1=expl1,expl2=expl2,expl3=expl3,expl4=expl4))
# par(mfrow=c(1,1))
plot(stack1)
#..............................................................
# check global lm() results
stack1.df = na.omit(as.data.frame(stack1))
head(stack1.df)
stack1.lm = lm(resp ~ expl1+expl2+expl3+expl4, data=stack1.df)
(stack1.lmSum = summary(stack1.lm))
stack1.lmSum$coefficients[1]
stack1.lmSum$coefficients[2];stack1.lmSum$coefficients[3];stack1.lmSum$coefficients[4];stack1.lmSum$coefficients[5]
stack1.lmSum$r.squared
stack1.lmSum$sigma
pf(stack1.lmSum$fstatistic[1], stack1.lmSum$fstatistic[2], stack1.lmSum$fstatistic[3],lower.tail = FALSE)
#..............................................................
# lm function for focal {raster} with RasterStack
# output coefficients, r-squared, residual standard error and p-value(F stat)
# Calculate focal ("moving window") weight
fw.s = focalWeight(stack1, 2, "Gauss")
# focal regression with raster stack:
lm.focal.stack = function(x, ...) {
if(all(is.na(x) )) {NA}
else {
m = lm(x[1]~x[2]+x[3]+x[4]+x[5])
summary(m)$r.squared #r-squared
# summary(m)$coefficients #intercept and slope together
#---> Error in setValues(x, value) : cannot use a matrix with these dimensions
# summary(m)$coefficients[1] #intercept
# summary(m)$coefficients[2] #slope
# pf(summary(m)$fstatistic[1], summary(m)$fstatistic[2], summary(m)$fstatistic[3],lower.tail = FALSE) #p-value
# summary(m)$sigma #residual standard error
}
}
#---> How to output all at once?
lm.focal.stack.out1 <- focal(stack1, w=fw.s, fun=lm.focal.stack, na.rm=TRUE)
#---> unable to find an inherited method for function ‘focal’ for signature ‘"RasterStack"’
#plot(lm.focal.stack.out1)
#-----------------------------------------------------------
> sessionInfo()
R version 3.3.1 (2016-06-21)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows >= 8 x64 (build 9200)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] raster_2.5-8 sp_1.2-3
loaded via a namespace (and not attached):
[1] rgdal_1.1-10 tools_3.3.1 Rcpp_0.12.5 grid_3.3.1 lattice_0.20-33
Not sure if you still need this answered, but I had the same issue and made a function called localFunStack to do the job of vector output from the local function as a rasterStack object, with a little hack to get the right layer names:
# localFun modified to write out a layer stack
localFunStack <- function(x, y, ngb=5, fun, ...) {
compareRaster(x,y)
rasterList <- list()
nc1 <- 1:(ngb*ngb)
nc2 <- ((ngb*ngb)+1):(2*(ngb*ngb))
if (canProcessInMemory(x, n=2*ngb)) {
vx <- getValuesFocal(x, 1, nrow(x), ngb=ngb)
vy <- getValuesFocal(y, 1, nrow(y), ngb=ngb)
v <- apply(cbind(vx, vy), 1, function(x, ...) fun(x[nc1], x[nc2], ...))
for (j in 1:nrow(v)) {
if (length(rasterList) < j) {
rasterList[[j]] <- raster(x)
}
values(rasterList[[j]]) <- v[j,]
}
}
else {
tr <- blockSize(out)
pb <- pbCreate(tr$n, label='localFun', ...)
for (i in 1:tr$n) {
vx <- getValuesFocal(x, tr$row[i], tr$nrows[i], ngb=ngb)
vy <- getValuesFocal(y, tr$row[i], tr$nrows[i], ngb=ngb)
v <- apply(cbind(vx, vy), 1, function(x, ...) fun(x[nc1], x[nc2], ...))
for (j in 1:nrow(v)) {
if (length(rasterList) < j) {
rasterList[[j]] <- raster(x)
}
rasterList[[j]] <- writeValues(rasterList[[j]], v[j,], tr$row[i])
}
}
}
return(stack(rasterList))
}
# local regression function
lm.focal <- function(x, y, ...) {
if(all(is.na(x) & is.na(y)) || all(is.na(x)) || all(is.na(y))) {rep(NA, 8)}
else {
m <- lm(y~x)
coef <- summary(m)$coef
if (nrow(coef) == 1) { # Add NAs for cases where the response is constant
coef <- rbind(coef, rep(NA, 4))
rownames(coef) <- rownames(nm)
}
coef <- as.vector(coef)
names(coef) <- c( outer( rownames(nm), colnames(nm) ,FUN=paste ,sep=" "))
coef
# summary(m)$r.squared #r-squared
# summary(m)$sigma #residual standard error
}
}
lm.focal.out = localFunStack(expl, resp, ngb=5, fun=lm.focal, na.rm=TRUE)
m <- lm(resp ~ expl)
nm <- summary(m)$coefficients
names(lm.focal.out) <- c(outer(rownames(nm), colnames(nm), FUN=paste, sep=" "))
plot(lm.focal.out)
Related
As the title suggest, I have seen some user mentioned that .lm.fit() functions has an advantage of more speed than a regular lm(), but when i look deeper at .lm.fit() in help, it is supposed to be a fitter functions, it returns a set of list instead of a model, which makes me to think is it still possible to extract components like R squared, Adj R Squared, and lastly do a predict() out of it?
Below is sample data and executions:
test_dat <- data.frame(y = rnorm(780, 20, 10))
for(b in 1:300){
name_var <- paste0("x",b)
test_dat[[name_var]] <- rnorm(780, 0.01 * b, 5)
}
tic()
obj_lm <- lm(y ~ ., data = test_dat)
print(class(obj_lm))
print(summary(obj_lm)$r.squared)
print(summary(obj_lm)$adj.r.squared)
predict(obj_lm)
toc() #approximately 0.4 seconds
tic()
datm <- as.matrix(test_dat)
obj_lm_fit <- .lm.fit(cbind(1,datm[,-1]), datm[,1])
print(class(obj_lm_fit))
toc() #approximately 0.2 seconds
Functions predict and resid are generic and since .lm.fit returns an object of class "list", all you have to do is to write methods implementing the definitions of what you want. Below are methods to compute fitted values, residuals and R^2.
set.seed(2023) # make the results reproducible
test_dat <- data.frame(y = rnorm(780, 20, 10))
for(b in 1:300){
name_var <- paste0("x",b)
test_dat[[name_var]] <- rnorm(780, 0.01 * b, 5)
}
obj_lm <- lm(y ~ ., data = test_dat)
datm <- as.matrix(test_dat)
obj_lm_fit <- .lm.fit(cbind(1,datm[,-1]), datm[,1])
#------------------------------------------------------------------------
# the methods for objects of class "list"
#
fitted.list <- function(object, X) {
X %*% coef(object)
}
resid.list <- residuals.list <- function(object, X, y) {
y_fitted <- fitted(object, X)
y - y_fitted
}
rsquared <- function(x, ...) UseMethod("rsquared")
rsquared.default <- function(x, ...) {
summary(x)$r.squared
}
rsquared.list <- function(object, X, y) {
e <- resid.list(object, X, y)
1 - sum(e^2)/sum( (y - mean(y))^2 )
}
rsquared(obj_lm_fit, cbind(1,datm[,-1]), datm[,1])
#> [1] 0.3948863
rsquared(obj_lm)
#> [1] 0.3948863
Created on 2023-01-03 with reprex v2.0.2
Edit 1
Added method to also calculate adj.R2
adj_rsquared_list <- function(object, X, y){
r2 <- rsquared.list(object, X, y)
k <- ncol(X) - 1
n <- nrow(X)
rate_of_error <- (1 - r2) * (n - 1) / (n - k - 1)
adj_r2 <- 1 - rate_of_error
return(adj_r2)
}
adj_rsquared_list(obj_lm_fit, cbind(1,datm[,-1]), datm[,1])
#> [1] 0.01590061
Edit 2
After the edit by Jovan, I have changed fitted.list above to use coef(), a function that extracts the first arguments list member "coefficients", if it exists, and rewrote the default and list methods of rsquared to accept a adj argument. The code to compute the adjusted R^2 is a copy&paste of Jovan's code.
rsquared <- function(x, ...) UseMethod("rsquared")
rsquared.default <- function(x, adj = FALSE, ...) {
if(adj) {
summary(x)$adj.r.squared
} else summary(x)$r.squared
}
rsquared.list <- function(object, X, y, adj = FALSE) {
e <- resid.list(object, X, y)
r2 <- 1 - sum(e^2)/sum( (y - mean(y))^2 )
if(adj) {
k <- ncol(X) - 1
n <- nrow(X)
rate_of_error <- (1 - r2) * (n - 1) / (n - k - 1)
adj_r2 <- 1 - rate_of_error
adj_r2
} else r2
}
# same as above
rsquared(obj_lm_fit, cbind(1,datm[,-1]), datm[,1])
#> [1] 0.3948863
rsquared(obj_lm)
#> [1] 0.3948863
# new, `adj = TRUE`
rsquared(obj_lm_fit, cbind(1,datm[,-1]), datm[,1], adj = TRUE)
#> [1] 0.01590061
rsquared(obj_lm, adj = TRUE)
#> [1] 0.01590061
Created on 2023-01-03 with reprex v2.0.2
library(GLMsData)
data(fluoro)
lambda <- seq(-2,2,0.5)
lm.out <- list()
for(i in length(lambda)){
if(i != 0){
y <- (fluoro$Dose^lambda-1)/lambda
} else {
y <- log(fluoro$Dose)
}
lm.out[[i]] <- lm(y[i]~Time, data = fluoro, na.exclude = T)
}
print(lm.out)
Error in model.frame.default(formula = y[i] ~ Time, data = fluoro, drop.unused.levels = TRUE) : variable lengths differ (found for 'Time')
I am trying to use various transformations of the response variable and fit these corresponding models, and obtain residual plots for each model.
I need a help. Thanks
Here is a corrected version of the for loop in the question.
data(fluoro, package = "GLMsData")
lambda <- seq(-2, 2, 0.5)
lm.out <- list()
for(i in 1:length(lambda)){
if(lambda[i] != 0){
y <- (fluoro$Dose^lambda[i]-1)/lambda[i]
} else {
y <- log(fluoro$Dose)
}
lm.out[[i]] <- lm(y ~ Time, data = fluoro, na.action = na.exclude)
}
print(lm.out)
And a version with a boxcox function defined and used in a lapply loop.
boxcox <- function(x, lambda, na.rm = FALSE){
if(na.rm) x <- x[!is.na(x)]
if(lambda == 0){
log(x)
} else {
(x^lambda - 1)/lambda
}
}
lm_out2 <- lapply(lambda, \(l){
lm(boxcox(Dose, lambda = l) ~ Time, data = fluoro, na.action = na.exclude)
})
Check that both ways above produce the same results.
coef_list <- sapply(lm.out, coef)
coef_list2 <- sapply(lm_out2, coef)
identical(coef_list, coef_list2)
#[1] TRUE
smry_list <- lapply(lm.out, summary)
smry_list2 <- lapply(lm_out2, summary)
pval_list <- sapply(smry_list, \(fit) fit$coefficients[, "Pr(>|t|)"])
pval_list2 <- sapply(smry_list2, \(fit) fit$coefficients[, "Pr(>|t|)"])
identical(pval_list, pval_list2)
#[1] TRUE
R2_list <- sapply(smry_list, "[[", "r.squared")
R2_list2 <- sapply(smry_list2, "[[", "r.squared")
identical(R2_list, R2_list2)
#[1] TRUE
I am trying to get lapply to run models (more specifically path models from the piecewiseSEM package). These path models use separate models (from the nlme package) and then are combine to build the final path model. I've had to utilize some custom made functions from this post to get the models to work. However, now when I try and run the path model using lapply-created objects the models do not run. However they run perfectly fine when not using lapply. I want to use lapply because I want to also utilize Parlapply later on. Here is a reproducible example:
This is the code that I used for custom functions for the package nlme:
library(nlme)
library(piecewiseSEM)
#### corHaversine - spatial correlation with haversine distance
# Calculates the geodesic distance between two points specified by radian latitude/longitude using Haversine formula.
# output in km
haversine <- function(x0, x1, y0, y1) {
a <- sin( (y1 - y0)/2 )^2 + cos(y0) * cos(y1) * sin( (x1 - x0)/2 )^2
v <- 2 * asin( min(1, sqrt(a) ) )
6371 * v
}
# function to compute geodesic haversine distance given two-column matrix of longitude/latitude
# input is assumed in form decimal degrees if radians = F
# note fields::rdist.earth is more efficient
haversineDist <- function(xy, radians = F) {
if (ncol(xy) > 2) stop("Input must have two columns (longitude and latitude)")
if (radians == F) xy <- xy * pi/180
hMat <- matrix(NA, ncol = nrow(xy), nrow = nrow(xy))
for (i in 1:nrow(xy) ) {
for (j in i:nrow(xy) ) {
hMat[j,i] <- haversine(xy[i,1], xy[j,1], xy[i,2], xy[j,2])
}
}
as.dist(hMat)
}
## for most methods, machinery from corSpatial will work without modification
Initialize.corHaversine <- nlme:::Initialize.corSpatial
recalc.corHaversine <- nlme:::recalc.corSpatial
Variogram.corHaversine <- nlme:::Variogram.corSpatial
corFactor.corHaversine <- nlme:::corFactor.corSpatial
corMatrix.corHaversine <- nlme:::corMatrix.corSpatial
coef.corHaversine <- nlme:::coef.corSpatial
"coef<-.corHaversine" <- nlme:::"coef<-.corSpatial"
## Constructor for the corHaversine class
corHaversine <- function(value = numeric(0), form = ~ 1, mimic = "corSpher", nugget = FALSE, fixed = FALSE) {
spClass <- "corHaversine"
attr(value, "formula") <- form
attr(value, "nugget") <- nugget
attr(value, "fixed") <- fixed
attr(value, "function") <- mimic
class(value) <- c(spClass, "corStruct")
value
} # end corHaversine class
environment(corHaversine) <- asNamespace("nlme")
Dim.corHaversine <- function(object, groups, ...) {
if (missing(groups)) return(attr(object, "Dim"))
val <- Dim.corStruct(object, groups)
val[["start"]] <- c(0, cumsum(val[["len"]] * (val[["len"]] - 1)/2)[-val[["M"]]])
## will use third component of Dim list for spClass
names(val)[3] <- "spClass"
val[[3]] <- match(attr(object, "function"), c("corSpher", "corExp", "corGaus", "corLin", "corRatio"), 0)
val
}
environment(Dim.corHaversine) <- asNamespace("nlme")
## getCovariate method for corHaversine class
getCovariate.corHaversine <- function(object, form = formula(object), data) {
if (is.null(covar <- attr(object, "covariate"))) { # if object lacks covariate attribute
if (missing(data)) { # if object lacks data
stop("need data to calculate covariate")
}
covForm <- getCovariateFormula(form)
if (length(all.vars(covForm)) > 0) { # if covariate present
if (attr(terms(covForm), "intercept") == 1) { # if formula includes intercept
covForm <- eval(parse(text = paste("~", deparse(covForm[[2]]),"-1",sep=""))) # remove intercept
}
# can only take covariates with correct names
if (length(all.vars(covForm)) > 2) stop("corHaversine can only take two covariates, 'lon' and 'lat'")
if ( !all(all.vars(covForm) %in% c("lon", "lat")) ) stop("covariates must be named 'lon' and 'lat'")
covar <- as.data.frame(unclass(model.matrix(covForm, model.frame(covForm, data, drop.unused.levels = TRUE) ) ) )
covar <- covar[,order(colnames(covar), decreasing = T)] # order as lon ... lat
}
else {
covar <- NULL
}
if (!is.null(getGroupsFormula(form))) { # if groups in formula extract covar by groups
grps <- getGroups(object, data = data)
if (is.null(covar)) {
covar <- lapply(split(grps, grps), function(x) as.vector(dist(1:length(x) ) ) ) # filler?
}
else {
giveDist <- function(el) {
el <- as.matrix(el)
if (nrow(el) > 1) as.vector(haversineDist(el))
else numeric(0)
}
covar <- lapply(split(covar, grps), giveDist )
}
covar <- covar[sapply(covar, length) > 0] # no 1-obs groups
}
else { # if no groups in formula extract distance
if (is.null(covar)) {
covar <- as.vector(dist(1:nrow(data) ) )
}
else {
covar <- as.vector(haversineDist(as.matrix(covar) ) )
}
}
if (any(unlist(covar) == 0)) { # check that no distances are zero
stop("cannot have zero distances in \"corHaversine\"")
}
}
covar
} # end method getCovariate
environment(getCovariate.corHaversine) <- asNamespace("nlme")
Here is the reproducible example/problem with the mtcars dataset:
set.seed(42) ## for sake of reproducibility
mtcars <- within(mtcars, {
lon <- runif(nrow(mtcars))
lat <- runif(nrow(mtcars))
})
#this makes a list of dataframes
empty_list<-replicate(n = 10,
expr = mtcars,
simplify = F)
#doing it the lapply method
model1<-lapply(empty_list, FUN = function(i)
nlme::gls(disp ~ wt,
correlation = corHaversine(form=~lon+lat,mimic="corSpher"),
data = i)
)
model2<-lapply(empty_list, FUN = function(i)
nlme::gls(wt ~ hp,
correlation = corHaversine(form=~lon+lat,mimic="corSpher"),
data = i)
)
model1.2<-psem(model1[[1]],model2[[1]], data = empty_list[[1]])
summary(model1.2, .progressBar = F, standardize = "scale")
This results in this error:
Error in max(sapply(nm[dfdetect], nrow)) :
invalid 'type' (list) of argument
But when I do this without lapply, it works out fine:
model3<-nlme::gls(disp ~ wt,
correlation = corHaversine(form=~lon+lat,mimic="corSpher"),
data = empty_list[[1]])
model4<-nlme::gls(wt ~ hp,
correlation = corHaversine(form=~lon+lat,mimic="corSpher"),
data = empty_list[[1]])
model3.4<-psem(model3, model4)
summary(model3.4, .progressBar = F, standardize = "scale")
all,
I'm trying to use JAGS model in R. The R package is "R2jags".
I'm confused with the error like beblow.
Compiling model graph
Resolving undeclared variables
Allocating nodes
Deleting model
Error in jags.model(model.file, data = data, inits = init.values, n.chains = n.chains, :
RUNTIME ERROR:
Non-conforming parameters in function inprod
The data processing part,
library(R2jags)
data <- read.table("H:/mlp2020/data109.csv",sep = ",",check.names = F,stringsAsFactors = F,header = T)
rownames(data) <- data[,1]
data <- data[,-1]
z <- as.vector(data$group)
z <- z+1
Y_input <- data[,c(1:1356)]
X_input <- data[,c(1358:1368)]
N <- 1702 #nrow(Y_input)
P <- 1356 #ncol(Y_input)
R <- 11 #ncol(X_input)
Y <- Y_input
X <- X_input
jags.data <- list("X","Y","z","N","P","R")
jags.params <- c("phi","delta")
jags.inits <- function(){
list("phi"=rbinom(1,1,0.1),"delta"=rbinom(1,1,0.1))
}
my model code is like,
model <- function(){
for (j in 1:P){
for (i in 1:N){
k[i,j] <- 1+phi[j]*z[i] #k=1 phi[j]=0,k=2 phi[j]=1 z[i]=1,k=3 phi[j]=1 z[i]=2
}
phi[j] ~ dbern(w)
}
for (j in 1:P){
for (i in 1:N){
Y[i,j] ~ dnorm(mu[i,j], tau[j])
mu[i,j] = e_1[j] + e_2[j]*equals(k[i,j],2) + inprod(X[i,],beta)
}
for (r in 1:R){
beta[r,j]<-delta[r,j]*beta_0[j]
#spike and slab pior for beta
delta[r,j] ~ dbern(t)
}
#spike and slab pior for beta
beta_0[j] ~ dnorm(0,tau_beta[j])
tau_beta[j] ~ dgamma(2, 15) # input
e_1[j]~dnorm(0,tau_1[j]) #tau_mu[j]=1
tau_1[j] ~ dgamma(2, 15) # input
e_2[j]~dnorm(0,tau_2[j]) #tau_mu[j]=1
tau_2[j] ~ dgamma(2, 15) # input
sigma[j] <- 1.0/sqrt(tau[j])
# input
tau[j] ~ dgamma(10, 1)
}
# t ~ dbeta(a_t,b_t) # input
# w ~ dbeta(a_w,b_w) # input
w ~ dbeta(1,0.1)
t ~ dbeta(0.4,1.6)
}
jagsfit <- jags(data=jags.data, inits=jags.inits, jags.params,
n.iter=10,
model.file=model)
Any help would be greatly appreciated!
inprod is the dot product. Its two arguments must be vectors of the same lengths, but the second argument in your code is a matrix.
I have not checked your model, but perhaps you want inprod(X[i,],beta[,j]).
I am using a ratio of means to estimate the population, T = mean(xbar)/ mean(u_bar) using bootstrapping method and I'm getting this error
message, Error in z[, 1] : incorrect number of dimensions.
pop = read_xlsx("US_pop.xlsx")
attach(pop)
z = cbind(X,U)
T = function(z)
{
T = (mean(z[,1]) / mean(z[,2])) ## I am using a ratio of means to estimate the population = mean(xbar)/ mean(u_bar)
}
T_stat = T(z)
nBoot = 2e5 # number of bootstrapping sample
Tboot = matrix(0,nBoot,2) # est. T for each sample
set.seed(123)
for (i in 1:nBoot)
{
{
Tboot[i] = T(sample(z,replace = TRUE))
}
}
The error of the code in the question is in
sample(z, replace = TRUE)
z is a matrix but when sample is applied to it z is seen as a vector and the return value is no longer of class "matrix" and "array":
class(z)
#[1] "matrix" "array"
class(sample(z, replace = TRUE))
#[1] "numeric"
Therefore, to subset z[, 1] in the function is not to subset the same z but the function argument of the same name. And this is what sample returned, a numeric vector.
Here is the function and its call, corrected. Tested with the data at the end.
STAT <- function(z) mean(z[, 1])/mean(z[, 2])
T_stat <- STAT(z)
nBoot <- 2e5
Tboot <- numeric(nBoot)
set.seed(123)
for (i in 1:nBoot) {
j <- sample(nrow(z), replace = TRUE)
Tboot[i] = STAT(z[j, ])
}
T_stat
#[1] 1.826662
mean(Tboot)
#[1] 1.878934
A simpler alternative is function boot in base package boot. The function is modified in order to have an index variable i as argument.
STAT2 <- function(z, i) mean(z[i, 1])/mean(z[i, 2])
set.seed(123)
b <- boot::boot(z, STAT2, R = nBoot)
b
#
#ORDINARY NONPARAMETRIC BOOTSTRAP
#
#
#Call:
#boot::boot(data = z, statistic = STAT2, R = nBoot)
#
#
#Bootstrap Statistics :
# original bias std. error
#t1* 1.826662 0.05249037 0.4181259
mean(b$t)
#[1] 1.879153
Test data creation code
set.seed(2020)
X <- rexp(20, rate = 1/4)
U <- rexp(20, rate = 1/2)
z <- cbind(X, U)