Adding AR(1) term to multiple regressions in R - r

I am running 503 separate regressions, each with a separate dependent variables, with 3 independent variables and 1 AR(1) term.
Data:
# fake data
set.seed(333)
df <- data.frame(seq(as.Date("2017/1/1"), as.Date("2017/2/19"), "days"),
matrix(runif(50*506), nrow = 50, ncol = 506))
names(df) <- c("Date", paste0("var", 1:503), c("mktrf", "smb", "hml"))
I create the AR(1) process as follows, using a function called lagpad:
lagpad <- function(x, k=1) {
i<-is.vector(x)
if(is.vector(x)) x<-matrix(x) else x<-matrix(x,nrow(x))
if(k>0) {
x <- rbind(matrix(rep(NA, k*ncol(x)),ncol=ncol(x)), matrix(x[1:(nrow(x)-k),], ncol=ncol(x)))
}
else {
x <- rbind(matrix(x[(-k+1):(nrow(x)),], ncol=ncol(x)),matrix(rep(NA, -k*ncol(x)),ncol=ncol(x)))
}
if(i) x[1:length(x)] else x
}
Then I store my necessary variables for regression:
All the dep var
x = df[,505:507]
All the indep var
y <- df[,2:504]
AR(1) process
y_lag <- lagpad(y, -1)
Fit all the models
list_models_AR= lapply(y, function(y)
with(x, lm(y ~ mktrf + smb + hml + y_lag, na.action = na.exclude)))
I'm having trouble figuring out how to use lapply in this case, since there are multiple components of y_lag that need to be called, one for each regression.

I am not sure of your lag term, it seems to be actually the next y-column. If so you can create 502 models as follows:
list_models_AR= lapply(1:(ncol(y)-1), function(i) lm(y[,i]~x[,1] + x[,2] + x[,3] +y[,i+1], na.action=na.exclude))

Related

An equivalent for the excel If error for lme4 glmer in r

I'm writing a loop that fits models across several datasets and outcome variables. I'm saving the results in a matrix. in the current version, I accounted for potential errors such as missing covariates. The more data I include the more errors I get and I need to account for in the loop. I would like to modify the code below so it record "NA" when the model stops due to an error regardless of the error type.
I would appreciate any thoughts.
datasets <- list('data_01','data_02','data_03','data_04')
outcomes <- list('var_01','var_02','var_03','var_04','var_05')
results <- vector("list", length(datasets))
for (i in 1:length(datasets)) {
results [[i]] <- matrix(NA, nrow=length(outcomes), ncol=2)
}
for (j in seq_along(outcomes)) {
for (i in seq_along(surveys)) {
if ("TRUE" %in% (!(outcomes[[j]] %in% names(datasets[[i]]))))
{
results[[i]][j, 1] <- outcomes[[j]]
results[[i]][j, 2] <- "NA"
}
else
{
results[[i]][j, 1] <- outcomes[[j]]
fit <- glmer(~ RS_AGE + RS_MARITAL + (1|FW_ID) + (1|RS_CLID), data = datasets[[i]], family =
binomial, nAGQ=0, control = glmerControl(optimizer = "nloptwrap"))
SI <- getME(fit,"theta")^2
ICC <- SI[[2]] /(SI[[1]]+SI[[2]]+3.29)
results[[i]][j, 2] <- ICC
}
}
}
Without the data I can't test, but this should work:
atasets <- list('data_01','data_02','data_03','data_04')
outcomes <- list('var_01','var_02','var_03','var_04','var_05')
results <- vector("list", length(datasets))
for (i in 1:length(datasets)) {
results [[i]] <- matrix(NA, nrow=length(outcomes), ncol=2)
}
for (j in seq_along(outcomes)) {
for (i in seq_along(surveys)) {
if (any(!(outcomes[[j]] %in% names(datasets[[i]]))))
{
results[[i]][j, 1] <- outcomes[[j]]
results[[i]][j, 2] <- NA
}
else
{
results[[i]][j, 1] <- outcomes[[j]]
form <- reformulate(c("RS_AGE", "RS_MARITAL", "(1|FW_ID)", "(1|RS_CLID)"),
response = outcomes[[j]])
fit <- try(glmer(form, data = datasets[[i]], family =
binomial, nAGQ=0, control = glmerControl(optimizer = "nloptwrap")))
if(!inherits(fit, "try-error")){
SI <- getME(fit,"theta")^2
ICC <- SI[[2]] /(SI[[1]]+SI[[2]]+3.29)
results[[i]][j, 2] <- ICC
}else{
results[[i]][j,2] <- NA
}
}
}
}
Try replacing
fit <- glmer(~ RS_AGE + RS_MARITAL + (1|FW_ID) + (1|RS_CLID), data = datasets[[i]], family =
binomial, nAGQ=0, control = glmerControl(optimizer = "nloptwrap"))
SI <- getME(fit,"theta")^2
ICC <- SI[[2]] /(SI[[1]]+SI[[2]]+3.29)
results[[i]][j, 2] <- ICC
by
fit <- tryCatch(glmer(~ RS_AGE + RS_MARITAL + (1|FW_ID) + (1|RS_CLID), data = datasets[[i]], family =
binomial, nAGQ=0, control = glmerControl(optimizer = "nloptwrap")),error = function(e) e)
if(!inherits(fit,"error")){
SI <- getME(fit,"theta")^2
ICC <- SI[[2]] /(SI[[1]]+SI[[2]]+3.29)
}else{
ICC <- NA
}

I am trying to use various transformations of the response variable and fit corresponding these model, and obtain residual plots for each model

library(GLMsData)
data(fluoro)
lambda <- seq(-2,2,0.5)
lm.out <- list()
for(i in length(lambda)){
if(i != 0){
y <- (fluoro$Dose^lambda-1)/lambda
} else {
y <- log(fluoro$Dose)
}
lm.out[[i]] <- lm(y[i]~Time, data = fluoro, na.exclude = T)
}
print(lm.out)
Error in model.frame.default(formula = y[i] ~ Time, data = fluoro, drop.unused.levels = TRUE) : variable lengths differ (found for 'Time')
I am trying to use various transformations of the response variable and fit these corresponding models, and obtain residual plots for each model.
I need a help. Thanks
Here is a corrected version of the for loop in the question.
data(fluoro, package = "GLMsData")
lambda <- seq(-2, 2, 0.5)
lm.out <- list()
for(i in 1:length(lambda)){
if(lambda[i] != 0){
y <- (fluoro$Dose^lambda[i]-1)/lambda[i]
} else {
y <- log(fluoro$Dose)
}
lm.out[[i]] <- lm(y ~ Time, data = fluoro, na.action = na.exclude)
}
print(lm.out)
And a version with a boxcox function defined and used in a lapply loop.
boxcox <- function(x, lambda, na.rm = FALSE){
if(na.rm) x <- x[!is.na(x)]
if(lambda == 0){
log(x)
} else {
(x^lambda - 1)/lambda
}
}
lm_out2 <- lapply(lambda, \(l){
lm(boxcox(Dose, lambda = l) ~ Time, data = fluoro, na.action = na.exclude)
})
Check that both ways above produce the same results.
coef_list <- sapply(lm.out, coef)
coef_list2 <- sapply(lm_out2, coef)
identical(coef_list, coef_list2)
#[1] TRUE
smry_list <- lapply(lm.out, summary)
smry_list2 <- lapply(lm_out2, summary)
pval_list <- sapply(smry_list, \(fit) fit$coefficients[, "Pr(>|t|)"])
pval_list2 <- sapply(smry_list2, \(fit) fit$coefficients[, "Pr(>|t|)"])
identical(pval_list, pval_list2)
#[1] TRUE
R2_list <- sapply(smry_list, "[[", "r.squared")
R2_list2 <- sapply(smry_list2, "[[", "r.squared")
identical(R2_list, R2_list2)
#[1] TRUE

Non-conforming parameters in function inprod

all,
I'm trying to use JAGS model in R. The R package is "R2jags".
I'm confused with the error like beblow.
Compiling model graph
Resolving undeclared variables
Allocating nodes
Deleting model
Error in jags.model(model.file, data = data, inits = init.values, n.chains = n.chains, :
RUNTIME ERROR:
Non-conforming parameters in function inprod
The data processing part,
library(R2jags)
data <- read.table("H:/mlp2020/data109.csv",sep = ",",check.names = F,stringsAsFactors = F,header = T)
rownames(data) <- data[,1]
data <- data[,-1]
z <- as.vector(data$group)
z <- z+1
Y_input <- data[,c(1:1356)]
X_input <- data[,c(1358:1368)]
N <- 1702 #nrow(Y_input)
P <- 1356 #ncol(Y_input)
R <- 11 #ncol(X_input)
Y <- Y_input
X <- X_input
jags.data <- list("X","Y","z","N","P","R")
jags.params <- c("phi","delta")
jags.inits <- function(){
list("phi"=rbinom(1,1,0.1),"delta"=rbinom(1,1,0.1))
}
my model code is like,
model <- function(){
for (j in 1:P){
for (i in 1:N){
k[i,j] <- 1+phi[j]*z[i] #k=1 phi[j]=0,k=2 phi[j]=1 z[i]=1,k=3 phi[j]=1 z[i]=2
}
phi[j] ~ dbern(w)
}
for (j in 1:P){
for (i in 1:N){
Y[i,j] ~ dnorm(mu[i,j], tau[j])
mu[i,j] = e_1[j] + e_2[j]*equals(k[i,j],2) + inprod(X[i,],beta)
}
for (r in 1:R){
beta[r,j]<-delta[r,j]*beta_0[j]
#spike and slab pior for beta
delta[r,j] ~ dbern(t)
}
#spike and slab pior for beta
beta_0[j] ~ dnorm(0,tau_beta[j])
tau_beta[j] ~ dgamma(2, 15) # input
e_1[j]~dnorm(0,tau_1[j]) #tau_mu[j]=1
tau_1[j] ~ dgamma(2, 15) # input
e_2[j]~dnorm(0,tau_2[j]) #tau_mu[j]=1
tau_2[j] ~ dgamma(2, 15) # input
sigma[j] <- 1.0/sqrt(tau[j])
# input
tau[j] ~ dgamma(10, 1)
}
# t ~ dbeta(a_t,b_t) # input
# w ~ dbeta(a_w,b_w) # input
w ~ dbeta(1,0.1)
t ~ dbeta(0.4,1.6)
}
jagsfit <- jags(data=jags.data, inits=jags.inits, jags.params,
n.iter=10,
model.file=model)
Any help would be greatly appreciated!
inprod is the dot product. Its two arguments must be vectors of the same lengths, but the second argument in your code is a matrix.
I have not checked your model, but perhaps you want inprod(X[i,],beta[,j]).

Rolling Window Regression - Error Calculation

I have a data set and want to essentially fit a linear model with a rolling time window, find the fitted values and calculate the errors in the estimate. I have functions which calculate the error and I have the start of the algorithm, but I keep getting null time series with the algorithm below. Can anybody spot a fix for it?
rollerOLS <- function(data, measure, predict, predictor){
error <- c()
m <- dim(data)[1]
for(i in 1:(floor(m/142)-10)){
data.new <- as.data.frame(data[c((1+(142*(i-1))):((i+9)*142)),])
data.pred <- as.data.frame(data[c((1+(142*(i+9))):((i+10)*142)-1),])
n <- dim(data.new)[1]
k <- dim(data.pred)[1]
x <- data.new[-1,predictor]
y <- data.new[-n, predict]
mod <- lm(y ~ x)
ts <- predict.lm(mod, newdata = data.frame(data.pred[, predictor]), interval="none")
actual <- data.pred[-k,predict]
error[i] <- measure(ts, actual)
}
return(mod)
}
Note that 142 is specific to my data set.
The problem was in the ts line and here is the fix.
rollerOLS <- function(data, measure, predict, predictor){
error <- c()
m <- dim(data)[1]
for(i in 1:(floor(m/142)-10)){
data.new <- as.data.frame(data[c((1+(142*(i-1))):((i+9)*142)),])
data.pred <- as.data.frame(data[c((1+(142*(i+9))):((i+10)*142)-1),])
n <- dim(data.new)[1]
k <- dim(data.pred)[1]
x <- data.new[-1,predictor]
y <- data.new[-n, predict]
mod <- lm(y ~ x)
ts <- mod$coefficients[1] + mod$coefficients[2]*data.pred[-1,predictor]
actual <- data.pred[-k,predict]
error[i] <- measure(ts, actual)
}
return(error)
}

JAGS post calculation and ifelse/step

I'm relatively new to JAGS and am running it through the R package jagsUI. I am building occupancy models, but want to summarize results as I go. So I have a matrix of 0s and 1s:
mat1 <- matrix(rbinom(10*10,1,.5),10,10)
y=mat1
That I want to run through the following model:
# Bundle data and summarize data bundle
str( win.data <- list(y = mat1, M = nrow(mat1), T = ncol(mat1)) )
# Specify model in BUGS language
sink("model.txt")
cat("
model {
# Priors
psi0 ~ dunif(0, 1)
p ~ dunif(0, 1)
for(t in 1:(T-1)){
rho[t] ~ dunif(-1,1)
}
beta0 ~ dnorm(0, 0.1)
# Likelihood
for (i in 1:M) { # Loop over sites
z[i,1] ~ dbern(psi0) # State model
y[i,1] ~ dbern(z[i,1]*p)
for (j in 2:T) { # Loop over replicate surveys
logit(psi[i,j])<- beta0 + rho[j-1]*z[i,j-1]
z[i,j] ~ dbern(psi[i,j])
y[i,j] ~ dbern(z[i,j]*p) # Observation model
}
}
# Derived quantities
coln[i,j] <- ifelse(z[i,j]-z[i,j-1]==1,1,0) # colonized
ext[i,j] <- ifelse(z[i,j-1]-z[i,j]==1,1,0) # went extinct
tot.coln[,j] <- sum(coln[,j]) # sum of colonized each survey
tot.ext[,j] <- sum(ext[,j]) # sum of extinctions each survey
Nocc[,j] <- sum(z[,j]) # total sites occupied each survey
coln.rate[,j] <- tot.coln[,j]/Nocc[,j]
ext.rate[,j] <- tot.ext[,j]/Nocc[,j]
}
",fill = TRUE)
sink()
# Initial values
zst <- apply(y, 1, max, na.rm=TRUE) # Avoid data/model/inits conflict
y<- as.matrix(y)
zst<- y
inits <- function(){list(z = zst)}
# Parameters monitored
params <- c("psi0", "p", "beta0", "coln.rate", "ext.rate")
# MCMC settings
ni <- 2000 ; nt <- 1 ; nb <- 1000 ; nc <- 3
# Call JAGS and summarize posteriors
library(jagsUI)
fm <- jags(win.data, inits, params, "model.txt", n.chains = nc,
n.thin = nt, n.iter = ni, n.burnin = nb)
print(fm, dig = 3)
The model runs except for the piece after "# Derived quantities". Basically I want to calculate the rate of change from 0 to 1 and from 1 to 0 in each survey. A couple of my thoughts on why it doesn't work. 1) z[i,j] isn't really 0s and 1s. 2) the calculations shouldn't go under Derived quantities. 3) ifelse from the JAGS manual isn't doing what I think.
I also tried using the "step" function replacing the first two lines after Derived quantities with:
coln[i,j] <- step(z[i,j]-z[i,j-1]-0.5) # colonized
ext[i,j] <- step(z[i,j-1]-z[i,j]-0.5) # went extinct
But no luck there. Any ideas?
You are indexing i and j here without looping through them. To make this work you would need to set it up within another nested for loop. Also, your extinction calculation was incorrect.
for(j in 2:T){
for(i in 1:M){
coln[i,j-1] <- ifelse(z[i,j]-z[i,j-1]==1,1,0) # colonized
ext[i,j-1] <- ifelse(z[i,j]-z[i,j-1]==-1,1,0) # went extinct
}
tot.coln[j-1] <- sum(coln[,j-1]) # sum of colonized each survey
tot.ext[j-1] <- sum(ext[,j-1]) # sum of extinctions each survey
Nocc[j-1] <- sum(z[,j-1]) # total sites occupied each survey
coln.rate[j-1] <- tot.coln[j-1]/Nocc[j-1]
ext.rate[j-1] <- tot.ext[j-1]/Nocc[j-1]
}

Resources