Problem with function/ replacement has length zero - r

I'm trying to run the following function proposed by Lai et al., 2021, that aims to compare fit differences in non-nested models with categorical AVs. The Models look like this:
Mod1 <- '
# Measurement models
# Predictor Variables
A =~ NoEPA2 + ChEPA2 + SvEPA2
N =~ NoEPN2 + ChEPN2 + SvEPN2
# Outcome Variables
I =~ DE_VORH + AN_VORH + AINTMAX
EX =~ C_VORH + O_VORH + AD_MAX
# Control variables
AGE =~ age
age ~~ 0*age
SEX =~ sex
sex ~~ 0*sex
EDU =~ edu
edu ~~ 0* edu
#Error correlation A, N, E
NoEPA1 ~~ NoEPN1
ChEPA1 ~~ ChEPN1
SvEPA1 ~~ SvEPN1
# Correlations DV
A ~~ N
I ~~ EX
# Paths
I ~ A + N + AGE + SEX + EDU
EX ~ A + N + AGE + SEX + EDU
'
Sem2 <- sem(Mod1,
data=a,
estimator = "WLSMV",
conditional.x = FALSE,
mimic = "Mplus",
ordered = c("DE_VORH", "AN_VORH","AINTMAX","O_VORH", "C_VORH","AD_MAX"))
summary(sem2,
fit.measures = TRUE,
standardize = TRUE,
rsquare = TRUE,
estimates = TRUE,
ci = FALSE)
Mod2 <- '
# Measurement models
# Predictor Variables
A =~ NoEPA1 + ChEPA1 + SvEPA1
N =~ NoEPN1 + ChEPN1 + SvEPN1
E =~ MxStEPEM + ChEPEM1 + SvEPEM1
# Outcome Variables
I =~ DE_VORH + AN_VORH + AINTMAX
EX =~ C_VORH + O_VORH + AD_MAX
# Control variables
AGE =~ age
age ~~ 0*age
SEX =~ sex
sex ~~ 0*sex
EDU =~ edu
edu ~~ 0* edu
#Error correlation A, N, E
NoEPA1 ~~ NoEPN1 + MxStEPEM
NoEPN1 ~~ MxStEPEM
ChEPA1 ~~ ChEPN1 + ChEPEM1
ChEPN1 ~~ ChEPEM1
SvEPA1 ~~ SvEPN1 + SvEPEM1
SvEPN1 ~~ SvEPEM1
# Correlations DV
A ~~ N + E
N ~~ E
I ~~ EX
# Paths
I ~ A + N + E + AGE + SEX + EDU
EX ~ A + N + E + AGE + SEX + EDU
'
sem3a <- sem(Mod2,
data=a,
estimator = "WLSMV",
conditional.x = FALSE,
mimic = "Mplus",
ordered = c("DE_VORH", "AN_VORH", "AINTMAX", "O_VORH","C_VORH","AD_MAX"))
summary(sem3a,
fit.measures = TRUE,
standardize = TRUE,
rsquare = TRUE,
estimates = TRUE,
ci = FALSE)
The function I want to apply looks like this:
## The function below returns point estimate and standard error for
## ∆RMSEA, ∆CFI, and ∆SRMR between two competing models A & B given categorical data.
## The two models do not need to be nested.
# fitA = Fitted 'lavaan' model object for Model A
# fitB = Fitted 'lavaan' model object for Model B
# fitZ = Fitted 'lavaan' model object for the baseline model for CFI
fit.diff.cat <- function(fitA, fitB){
######################################
# Internal functions
######################################
# Rearrange the model-implied correlation matrix of 'fitB' so that its columns and rows
# are in the same order as that in the model-implied correlation matrix of 'fitA'
rearrange.P.theta <- function(fitA, fitB){
R <- inspect(fitA, "sampstat")$'cov'
p <- dim(R)[1]
R <- as.matrix(R, p, p)
P.theta.A <- inspect(fitA, "cov.ov")
P.theta.B0 <- inspect(fitB, "cov.ov")
target.var.names <- rownames(R)
current.var.names <- rownames(P.theta.B0)
P.theta.B <- matrix(NA, p, p)
rownames(P.theta.B) <- colnames(P.theta.B) <- target.var.names
for (i.row in 1:p){
for(i.col in 1:p){
row.name <- target.var.names[i.row]
col.name <- target.var.names[i.col]
pick.row <- which(current.var.names==row.name)
pick.col <- which(current.var.names==col.name)
P.theta.B[i.row, i.col] <- P.theta.B0[pick.row, pick.col]
}
}
return(P.theta.B)
}# End of rearrange.P.theta()
# Rearrange the model-implied thresholds of 'fitB' so that its names
# are in the same order as that in the model-implied thresholds 'fitA'
rearrange.thresh <- function(fitA, fitB){
thresh <- inspect(fitA, "sampstat")$th
thresh.B0 <- inspect(fitB, "th")
target.var.names <- names(thresh)
current.var.names <- names(thresh.B0)
n.thresh <- length(thresh)
thresh.B <- rep(NA, n.thresh)
names(thresh.B) <- target.var.names
for (i in 1:n.thresh){
name <- target.var.names[i]
pick.name <- which(current.var.names==name)
thresh.B[i] <- thresh.B0[pick.name]
}
return(thresh.B)
}# End of rearrange.thresh()
# Rearrange the Delta matrix of 'fitB' so that its rows are
# in the same order as that in the Delta matrix of 'fitA'.
# Delta = derivative of P(theta) wrt theta
rearrange.Delta <- function(fitA, fitB){
Delta.B0 <- lavaan:::computeDelta(fitB#Model)[[1]]
n.theta <- dim(Delta.B0)[2]
thresh <- inspect(fitA, "sampstat")$th
thresh.B0 <- inspect(fitB, "th")
target.var.names <- names(thresh)
current.var.names <- names(thresh.B0)
n.thresh <- length(thresh)
Delta.th <- matrix(NA, n.thresh, n.theta)
rownames(Delta.th) <- target.var.names
for (i in 1:n.thresh){
name <- target.var.names[i]
pick.name <- which(current.var.names==name)
Delta.th[i,] <- Delta.B0[pick.name,]
}
P.theta.B0 <- inspect(fitB, "cov.ov")
R <- inspect(fitA, "sampstat")$'cov'
p <- dim(R)[1]
target.var.names <- rownames(R)
current.var.names <- rownames(P.theta.B0)
n.rho <- p*(p-1)/2
current.matrix <- matrix(NA, p, p)
current.matrix[lower.tri(current.matrix, diag=FALSE)] <- 1:n.rho
pick.vech <- rep(NA, n.rho)
j <- 1
for(i.col in 1:(p-1)){
for(i.row in (i.col+1):p){
row.name <- target.var.names[i.row]
col.name <- target.var.names[i.col]
pick.row <- which(current.var.names==row.name)
pick.col <- which(current.var.names==col.name)
if(pick.row >= pick.col) pick.vech[j] <- current.matrix[pick.row, pick.col]
if(pick.row < pick.col) pick.vech[j] <- current.matrix[pick.col, pick.row]
j <- j+1
}
}
Delta.rho <- matrix(NA, n.rho, n.theta)
for(i in 1:n.rho){
pick <- pick.vech[i] + n.thresh
Delta.rho[i,] <- Delta.B0[pick,]
}
Delta.B <- rbind(Delta.th, Delta.rho)
return(Delta.B)
}# End of rearrange.Delta()
######################################
# Main function
######################################
H.A <- inspect(fitA, "hessian")*2
H.B <- inspect(fitB, "hessian")*2
H.A.inv <- try(chol2inv(chol(H.A)), TRUE)
H.B.inv <- try(chol2inv(chol(H.B)), TRUE)
if(class(H.A.inv)=="matrix" & class(H.B.inv)=="matrix"){
n <- inspect(fitA, "nobs")
dA <- as.numeric(fitmeasures(fitA, "df"))
dB <- as.numeric(fitmeasures(fitB, "df"))
P.B <- rearrange.P.theta(fitA, fitB)
p <- dim(P.B)[1]
rho.B <- lav_matrix_vech(P.B, diagonal = FALSE)
thresh.B <- rearrange.thresh(fitA, fitB)
estB <- c(thresh.B, rho.B)
m <- inspect(fitA, "wls.obs")
estA <- inspect(fitA, "wls.est")
eA <- m - estA
eB <- m - estB
Gamma <- inspect(fitA, "gamma")
DeltaA <- lavaan:::computeDelta(fitA#Model)[[1]]
DeltaB <- rearrange.Delta(fitA, fitB)
p1 <- dim(DeltaA)[1]
g.A <- 2*t(eA)
K.A <- (-2)*t(DeltaA)
T.A <- 2*diag(1, p1)
Q.A <- T.A - t(K.A)%*%H.A.inv%*%K.A
G.A <- t(eA) %*% eA
G.A.bc0 <- G.A - sum(diag(Q.A%*%Gamma))/(2*n)
G.A.bc <- ifelse(G.A.bc0 < 0, 0, G.A.bc0)
g.B <- 2*t(eB)
K.B <- (-2)*t(DeltaB)
T.B <- 2*diag(1, p1)
Q.B <- T.B - t(K.B)%*%H.B.inv%*%K.B
G.B <- t(eB) %*% eB
G.B.bc0 <- G.B - sum(diag(Q.B%*%Gamma))/(2*n)
G.B.bc <- ifelse(G.B.bc0 < 0, 0, G.B.bc0)
R <- inspect(fitA, "sampstat")$cov
r <- lav_matrix_vech(R, diagonal = FALSE)
k <- length(r)
G.Z <- t(r) %*% r
G.Z.bc0 <- G.Z - sum(diag(Gamma))/n
G.Z.bc <- ifelse(G.Z.bc0 < 0, 0, G.Z.bc0)
G.A1 <- ifelse(G.A.bc > 0, G.A.bc, G.A)
G.B1 <- ifelse(G.B.bc > 0, G.B.bc, G.B)
G.Z1 <- ifelse(G.Z.bc > 0, G.Z.bc, G.Z)
## RMSEA diff
rmsea.AB <- sqrt(G.A.bc/dA) - sqrt(G.B.bc/dB)
J.rmsea.1 <- cbind( 1/(2*sqrt(dA*G.A1)), -1/(2*sqrt(dB*G.B1)) )
J.rmsea.2 <- rbind(g.A, g.B)
J.rmsea <- J.rmsea.1 %*% J.rmsea.2
var.rmsea.AB <- J.rmsea %*% Gamma %*% t(J.rmsea) / n
se.rmsea.AB <- sqrt(var.rmsea.AB)
## CFI diff
cfi.AB <- (G.B.bc - G.A.bc) / G.Z.bc
n.thresh <- length(fitted(fitA)$th)
r1 <- c(rep(0, n.thresh), r)
J.cfi.1 <- cbind( -1/G.Z1, 1/G.Z1, (G.A1-G.B1)/G.Z1^2 )
J.cfi.2 <- rbind(g.A, g.B, 2*t(r1) )
J.cfi <- J.cfi.1 %*% J.cfi.2
var.cfi.AB <- J.cfi %*% Gamma %*% t(J.cfi) / n
se.cfi.AB <- sqrt(var.cfi.AB)
## SRMR diff
srmr.AB <- sqrt(G.A.bc/k) - sqrt(G.B.bc/k)
J.srmr.1 <- cbind( 1/(2*sqrt(k*G.A1)), -1/(2*sqrt(k*G.B1)) )
J.srmr.2 <- rbind(g.A, g.B)
J.srmr <- J.srmr.1 %*% J.srmr.2
var.srmr.AB <- J.srmr %*% Gamma %*% t(J.srmr) / n
se.srmr.AB <- sqrt(var.srmr.AB)
#######
output <- c(rmsea.AB, se.rmsea.AB,
cfi.AB, se.cfi.AB,
srmr.AB, se.srmr.AB)
names(output) <- c("rmsea.AB", "se.rmsea.AB",
"cfi.AB", "se.cfi.AB",
"srmr.AB", "se.srmr.AB")
}# End of if Hessian is positive definite
else{output <- rep(NA, 6)}
return(output)
}
When I enter my fitted models, the following error is returned:
Error in P.theta.B[i.row, i.col] <- P.theta.B0[pick.row, pick.col] :
replacement has length zero<
Now I tried to run the commands step by step and it seems that this error is produced at the beginning of the internal functions running this step:
P.theta.B[i.row, i.col] <- P.theta.B0[pick.row, pick.col]
Any ideas on how to troubleshoot there? I'm left with a lot of questionmarks.

Related

How can I use try catch for nls function in R

I am doing a regression for a Quadric Linear function. I got two option is to use either nlsLM and nls2. However, for some dataset, the use of nlsLM casing some problem such as: singular gradient matrix at initial parameter estimates or they ran in to an infinitie loop. I want to use the try catch to deal with this issue. Can anyone help me out? Thanks everyone in advance.
Here is the full code:
# Packages needed for estimaton of Ideal trajectory - nonlinear regression
#-------------------------------------------------------------------------------
library("minpack.lm")
library("nlstools")
library("nlsMicrobio")
library("stats")
library("tseries") #runs test for auto correlation
#Use NLS2
library(proto)
library(nls2)
################################################################
# Set working directory
setwd("C:/Users/Kevin Le/PycharmProjects/Pig Data Black Box - Copy")
#load dataset
load("Data/JRPData_TTC.Rdata") #load dataset created in MissingData.step
ID <- 5470
#Create a new dataframe which will store Data after ITC estimation
#Dataframe contains ITC parameters
ITC.param.pos2 <- data.frame(ANIMAL_ID=factor(),
X0=double(),
Y1=double(),
Y2=double(),
Ylast=double(),
a=double(),
b=double(),
c=double(),
d=double(),
stringsAsFactors=FALSE)
#Dataframe contains data points on the ITC
Data.remain <- data.frame(ANIMAL_ID=character(),
Age=double(),
obs.CFI=double(),
tt=double(),
ttt=double(),
stringsAsFactors=FALSE)
#===============================================================
# For loop for automatically estimating ITC of all pigs
#===============================================================
IDC <- seq_along(ID) # 17, 23, 52, 57, 116
for (idc in IDC){
# idc = 1
i <- ID[idc]
Data <- No.NA.Data.1[No.NA.Data.1$ANIMAL_ID == i,]
idc1 <- unique(as.numeric(Data$idc.1))
####### Create data frame of x (Age) and y (CFI) ########
x <- as.numeric(Data$Age.plot)
Y <- as.numeric(Data$CFI.plot)
Z <- as.numeric(Data$DFI.plot)
Data.xy <- as.data.frame(cbind(x,Y))
#Initial parameteres for parameter estimation
X0.0 <- x[1]
Xlast <- x[length(x)]
##################################################################
# 1. reparametrization CFI at X0 = 0
#function used for reparametrization in MAPLE
# solve({
# 0=a+b*X_0+c*X_0**2,
# DFIs=b+2*c*Xs,CFIs=a+b*Xs+c*Xs**2},
# {a,b,c});
# a = -X0*(2*CFIs*Xs-CFIs*X0-Xs^2*DFIs+Xs*DFIs*X0)/(Xs^2-2*X0*Xs+X0^2)
# b = (-Xs^2*DFIs+DFIs*X0^2+2*CFIs*Xs)/(Xs^2-2*X0*Xs+X0^2)
# c = -(CFIs-Xs*DFIs+X0*DFIs)/(Xs^2-2*X0*Xs+X0^2)
# 2. with the source of the function abcd and pred
##################################################################
#Provide set of initial parameters
Xs.1 <- round(seq(X0.0 + 1, Xlast - 1, len = 30), digits = 0)
X0.1 <- rep(X0.0, length(Xs.1))
DFIs.1 <- NULL
CFIs.1 <- NULL
for(A in seq_along(Xs.1)){
DFIs2 <- Data[Data$Age.plot == Xs.1[A],]$DFI.plot
CFIs2 <- Data[Data$Age.plot == Xs.1[A],]$CFI.plot
DFIs.1 <- c(DFIs.1, DFIs2)
CFIs.1 <- c(CFIs.1, CFIs2)
}
st1 <- data.frame(cbind(X0.1, Xs.1, DFIs.1, CFIs.1))
names(st1) <- c("X0","Xs", "DFIs","CFIs")
#RUN NLS2 to find optimal initial parameters
st2 <- nls2(Y ~ nls.func.2(X0, Xs, DFIs, CFIs),
Data.xy,
start = st1,
# weights = weight,
# trace = T,
algorithm = "brute-force")
par_init <- coef(st2); par_init
#--------------------------------------------
# Create empty lists to store data after loop
#--------------------------------------------
par <- list()
AC.res <- list()
AC.pvalue <- NULL
data2 <- list()
data3 <- list()
param <- data.frame(rbind(par_init))
par.abcd <- data.frame(rbind(abcd.2(as.vector(par_init))))
param.2 <- data.frame(X0=double(),
Xs=double(),
DFIs=double(),
CFIs=double(),
a=double(),
b=double(),
c=double(),
stringsAsFactors=FALSE)
j <- 2
AC_pvalue <- 0
AC.pvalue[1] <- AC_pvalue
datapointsleft <- as.numeric(dim(Data)[1])
dpl <- datapointsleft #vector of all dataponitsleft at each step
#-------------------------------------------------------------------------------
# Start the procedure of Non Linear Regression
#-------------------------------------------------------------------------------
while ((AC_pvalue<=0.05) && datapointsleft >= 20){
weight <- 1/Y^2
# ---------------- NON linear reg applied to log(Y) ---------------------------------
st2 <- nls2(Y ~ nls.func.2(X0, Xs, DFIs, CFIs),
Data.xy,
start = st1,
weights = weight,
trace = F,
algorithm = "brute-force")
par_init <- coef(st2)
par_init
# st1 <- st1[!(st1$Xs == par_init[2]),]
nls.CFI <- nlsLM(Y ~ nls.func.2(X0, Xs, DFIs, CFIs),
Data.xy,
control = list(tol = 1e-2, printEval = TRUE, maxiter = 1024),
start = list(X0 = par_init[1], Xs = par_init[2],
DFIs = par_init[3], CFIs = par_init[4]),
weights = weight,
algorithm = "port",
lower = c(-10000,X0.0+1, -10000, -10000),
upper = c(10000, Xlast-1, 10000, 10000),
trace = F)
# nls.CFI <- nls2(Y ~ nls.func.2(X0, Xs, DFIs, CFIs),
# Data.xy,
# start = list(X0 = par_init[1], Xs = par_init[2],
# DFIs = par_init[3], CFIs = par_init[4]),
# weights = weight,
# control = nls.control(warnOnly = TRUE),
# trace = T,
# algorithm = "port",
# lower = c(-100000000,X0.0+1, -1000000000, -1000000000),
# upper = c(1000000000, Xlast-1, 1000000000, 1000000000))
# nls.CFI <- nlsLM(Y ~ nls.func.2(X0, Xs, DFIs, CFIs),
# Data.xy,
# control = nls.control(warnOnly = TRUE),
# start = list(X0 = par_init[1], Xs = par_init[2],
# DFIs = par_init[3], CFIs = par_init[4]),
# weights = weight,
# algorithm = "port",
# lower = c(-1000000000,X0.0+1, -1000000000, -1000000000),
# upper = c(1000000000, Xlast-1, 1000000000, 1000000000),
# trace = F)
#--------RESULTS analysis GOODNESS of fit
#estimate params
par[[j]] <- coef(nls.CFI)
par.abcd[j,] <- abcd.2(as.vector(coef(nls.CFI) )) #calculation of a, b, c and d
param[j,] <- par[[j]]
param.2[j-1,] <- cbind(param[j,], par.abcd[j,])
#summary
# summ = overview((nls.CFI)) #summary
#residuals
res1 <- nlsResiduals(nls.CFI) #residuals
res2 <- nlsResiduals(nls.CFI)$resi1
res <- res2[, 2]
AC.res <- test.nlsResiduals(res1)
AC.pvalue[j] <- AC.res$p.value
#---------Check for negative residuals----------
#Add filtration step order to data
Step <- rep(j - 1, length(x))
#create a new dataset with predicted CFI included
Data.new <- data.frame(cbind(x, Z, Y, pred.func.2(par[[j]],x)[[1]], res, Step))
names(Data.new) <- c("Age", "Observed_DFI","Observed_CFI", "Predicted_CFI", "Residual", "Step")
# plot(Data.new$Age, Data.new$Predicted_CFI, type = "l", col = "black",lwd = 2,
# ylim = c(0, max(Data.new$Predicted_CFI, Data.new$Observed_CFI)))
# lines(Data.new$Age, Data.new$Observed_CFI, type = "p", cex = 1.5)
#
#remove negative res
Data.pos <- Data.new[!Data.new$Residual<0,]
# lines(Data.pos$Age, Data.pos$Predicted_CFI, type = "l", col = j-1, lwd = 2)
# lines(Data.pos$Age, Data.pos$Observed_CFI, type = "p", col = j, cex = 1.5)
#restart
#Criteria to stop the loop when the estimated parameters are equal to initial parameters
# Crite <- sum(param.2[dim(param.2)[1],c(1:4)] == par_init)
datapointsleft <- as.numeric(dim(Data.pos)[1])
par_init <- par[[j]]
AC_pvalue <- AC.pvalue[j]
j <- j+1
x <- Data.pos$Age
Y <- Data.pos$Observed_CFI
Z <- Data.pos$Observed_DFI
Data.xy <- as.data.frame(cbind(x,Y))
dpl <- c(dpl, datapointsleft)
dpl
#Create again the grid
X0.0 <- x[1]
Xlast <- x[length(x)]
#Xs
if(par_init[2] -15 <= X0.0){
Xs.1 <- round(seq(X0.0 + 5, Xlast - 5, len = 30), digits = 0)
} else if(par_init[2] + 5 >= Xlast){
Xs.1 <- round(seq(par_init[2]-10, par_init[2]-1, len = 6), digits = 0)
} else{
Xs.1 <- round(seq(par_init[2]-5, par_init[2] + 5, len = 6), digits = 0)
}
#
X0.1 <- rep(X0.0, length(Xs.1))
DFIs.1 <- NULL
CFIs.1 <- NULL
for(A in seq_along(Xs.1)){
DFIs2 <- Data[Data$Age.plot == Xs.1[A],]$DFI.plot
CFIs2 <- Data[Data$Age.plot == Xs.1[A],]$CFI.plot
DFIs.1 <- c(DFIs.1, DFIs2)
CFIs.1 <- c(CFIs.1, CFIs2)
}
st1 <- data.frame(cbind(X0.1, Xs.1, DFIs.1, CFIs.1))
if(X0.0 <= par_init[2] && Xlast >=par_init[2]){
st1 <- rbind(st1, par_init)
}
names(st1) <- c("X0","Xs", "DFIs","CFIs")
}
} # end FOR loop
Here is the data file. I have exported my data into the .Rdata for an easier import.: https://drive.google.com/file/d/1GVMarNKWMEyz-noSp1dhzKQNtu2uPS3R/view?usp=sharing
In this file, the set id: 5470 will have this error: singular gradient matrix at initial parameter estimates in this part:
nls.CFI <- nlsLM(Y ~ nls.func.2(X0, Xs, DFIs, CFIs),
Data.xy,
control = list(tol = 1e-2, printEval = TRUE, maxiter = 1024),
start = list(X0 = par_init[1], Xs = par_init[2],
DFIs = par_init[3], CFIs = par_init[4]),
weights = weight,
algorithm = "port",
lower = c(-10000,X0.0+1, -10000, -10000),
upper = c(10000, Xlast-1, 10000, 10000),
trace = F)
The complementary functions (file Function.R):
abcd.2 <- function(P){
X0 <- P[1]
Xs <- P[2]
DFIs <- P[3]
CFIs <- P[4]
a <- -X0*(2*CFIs*Xs-CFIs*X0-Xs^2*DFIs+Xs*DFIs*X0)/(Xs^2-2*X0*Xs+X0^2)
b <- (-Xs^2*DFIs+DFIs*X0^2+2*CFIs*Xs)/(Xs^2-2*X0*Xs+X0^2)
c <- -(CFIs-Xs*DFIs+X0*DFIs)/(Xs^2-2*X0*Xs+X0^2)
pp <- as.vector(c(a, b, c))
return(pp)
}
#--------------------------------------------------------------
# NLS function
#--------------------------------------------------------------
nls.func.2 <- function(X0, Xs, DFIs, CFIs){
pp <- c(X0, Xs, DFIs, CFIs)
#calculation of a, b and c using these new parameters
c <- abcd.2(pp)[3]
b <- abcd.2(pp)[2]
a <- abcd.2(pp)[1]
ind1 <- as.numeric(x < Xs)
return (ind1*(a+b*x+c*x^2)+(1-ind1)*((a+b*(Xs)+c*(Xs)^2)+(b+2*c*(Xs))*(x-(Xs))))
}
#--------------------------------------------------------------
# Fit new parameters to a quadratic-linear function of CFI
#--------------------------------------------------------------
pred.func.2 <- function(pr,age){
#
X0 <- pr[1]
Xs <- pr[2]
DFIs <- pr[3]
CFIs <- pr[4]
#
x <- age
#calculation of a, b and c using these new parameters
c <- abcd.2(pr)[3]
b <- abcd.2(pr)[2]
a <- abcd.2(pr)[1]
#
ind1 <- as.numeric(x < Xs)
#
results <- list()
cfi <- ind1*(a+b*x+c*x^2)+(1-ind1)*((a+b*(Xs)+c*(Xs)^2)+(b+2*c*(Xs))*(x-(Xs))) #CFI
dfi <- ind1*(b+2*c*x) + (1 - ind1)*(b+2*c*(Xs)) #DFI
results[[1]] <- cfi
results[[2]] <- dfi
return (results)
}
#---------------------------------------------------------------------------------------------------------------
# Quadratic-linear function of CFI curve and its 1st derivative (DFI) with original parameters (only a, b and c)
#---------------------------------------------------------------------------------------------------------------
pred.abcd.2 <- function(pr,age){
#
a <- pr[1]
b <- pr[2]
c <- pr[3]
x <- age
#calculation of a, b and c using these new parameters
#
ind1 <- as.numeric(x < Xs)
#
results <- list()
cfi <- ind1*(a+b*x+c*x^2)+(1-ind1)*((a+b*(Xs)+c*(Xs)^2)+(b+2*c*(Xs))*(x-(Xs))) #CFI
dfi <- ind1*(b+2*c*x) + (1 - ind1)*(b+2*c*(Xs)) #DFI
results[[1]] <- cfi
results[[2]] <- dfi
return (results)
}
Updated: I did review my logic from the previous step and found that my data is a bit messed up because of it. I have fixed it. The case where a set f data ran into an infinite loop has no longer exists, but this error is still there however: singular gradient matrix at initial parameter estimates.

Jackknife in R to obtain interval estimates

I have a question on how to use the jackknife using the bootstrap package. I want to obtain the interval estimate for the jackknife method.
I've tried running the code below, but no results for my parameter estimate.
rm(list=ls())
library(bootstrap)
library(maxLik)
set.seed(20)
lambda <- 0.02
beta <- 0.5
alpha <- 0.10
n <- 40
N <- 1000
lambda_hat <- NULL
beta_hat <- NULL
cp <- NULL
jack_lambda <- matrix(NA, nrow = N, ncol = 2)
jack_beta <- matrix(NA, nrow = N, ncol = 2)
### group all data frame generated from for loop into a list of data frame
data_full <- list()
for(i in 1:N){
u <- runif(n)
c_i <- rexp(n, 0.0001)
t_i <- (log(1 - (1 / lambda) * log(1 - u))) ^ (1 / beta)
s_i <- 1 * (t_i < c_i)
t <- pmin(t_i, c_i)
data_full[[i]] <- data.frame(u, t_i, c_i, s_i, t)
}
### statistic function for jackknife()
estjack <- function(data, j) {
data <- data[j, ]
data0 <- data[which(data$s_i == 0), ] #uncensored data
data1 <- data[which(data$s_i == 1), ] #right censored data
data
LLF <- function(para) {
t1 <- data$t_i
lambda <- para[1]
beta <- para[2]
e <- s_i*log(lambda*t1^(beta-1)*beta*exp(t1^beta)*exp(lambda*(1-exp(t1^beta))))
r <- (1-s_i)*log(exp(lambda*(1-exp(t1^beta))))
f <- sum(e + r)
return(f)
}
mle <- maxLik(LLF, start = c(para = c(0.02, 0.5)))
lambda_hat[i] <- mle$estimate[1]
beta_hat[i] <- mle$estimate[2]
return(c(lambda_hat[i], beta_hat[i]))
}
jackknife_resample<-list()
for(i in 1:N) {
jackknife_resample[[i]]<-data_full[[i]][-i]
results <- jackknife(jackknife_resample, estjack,R=1000)
jack_lambda[i,]<-lambda_hat[i]+c(-1,1)*qt(alpha/2,n-1,lower.tail = FALSE)*results$jack.se
jack_beta[i,]<-beta_hat[i]+c(-1,1)*qt(alpha/2,n-1,lower.tail = FALSE)*results$jack.se
}```
I couldn't get the parameter estimate that run in MLE and hence couldn't proceed to the next step. Can anyone help?

Calculate stderr, t-value, p-value, predict value for linear regression

I'm fitting linear models with MatrixModels:::lm.fit.sparse and MatrixModels::glm4 (also sparse).
However, these functions return coeff, residuals and fitted.values only.
What's the fastest and easiest way to get/calculate another values such as stderr, t-value, p-value, predict value?
I use the data from MatrixModels:::lm.fit.sparse example.
I built a custom function summary_sparse to perform a summary for this model.
All matrix operations are performed with Matrix package.
Results are compared with dense type model.
Note lm.fit.sparse have to be evaluated with method = "chol" to get proper results.
Functions:
summary_sparse <- function(l, X) {
XXinv <- Matrix::chol2inv(Matrix::chol(Matrix::crossprod(X)))
se <- sqrt(Matrix::diag(XXinv*sum(l$residuals**2)/(nrow(X)-ncol(X))))
ts <- l$coef/se
pvals <- 2*c(1 - pnorm(abs(ts)))
list(coef = l$coef, se = se, t = ts, p = pvals)
}
predict_sparse <- function(X, coef) {
X %*% coef
}
Application:
dd <- expand.grid(a = as.factor(1:3),
b = as.factor(1:4),
c = as.factor(1:2),
d= as.factor(1:8))
n <- nrow(dd <- dd[rep(seq_len(nrow(dd)), each = 10), ])
set.seed(17)
dM <- cbind(dd, x = round(rnorm(n), 1))
## randomly drop some
n <- nrow(dM <- dM[- sample(n, 50),])
dM <- within(dM, { A <- c(2,5,10)[a]
B <- c(-10,-1, 3:4)[b]
C <- c(-8,8)[c]
D <- c(10*(-5:-2), 20*c(0, 3:5))[d]
Y <- A + B + A*B + C + D + A*D + C*x + rnorm(n)/10
wts <- sample(1:10, n, replace=TRUE)
rm(A,B,C,D)
})
X <- Matrix::sparse.model.matrix( ~ (a+b+c+d)^2 + c*x, data = dM)
Xd <- as(X,"matrix")
fmDense <- lm(dM[,"Y"]~Xd-1)
ss <- summary(fmDense)
r1 <- MatrixModels:::lm.fit.sparse(X, y = dM[,"Y"], method = "chol")
f <- summary_sparse(r1, X)
all.equal(do.call(cbind, f), ss$coefficients, check.attributes = F)
#TRUE
all.equal(predict_sparse(X, r1$coef)#x, predict(fmDense), check.attributes = F, check.names=F)
#TRUE

How could I solve Dimension mismatch in Jags model.?

I'm super new in bayesian analysis and I'm trying to practice with an example for Classic Capture-recapture models: Mh2
This is my code
nind <- dim(venados)[1]
K <- 43
ntraps <- 13
M <- 150
nz <- M - nind
Yaug <- array(0, dim = c(M, ntraps, K))
Yaug[1:nind,,] <- venados
y <- apply(Yaug, c(1,3), sum)
y[y > 1] <- 1
Bundle data
data1 <- list(y = y, nz = nz, nind = nind, K = K, sup = Buffer)
# Model JAGS
sink("Mh2_jags.txt")
cat("
model{
# Priors
p0 ~ dunif(0,1)
mup <- log(p0/(1-p0))
sigmap ~ dunif(0,10)
taup <- 1/(sigmap*sigmap)
psi ~ dunif(0,1)
# Likelihood
for (i in 1:(nind+nz)) {
z[i] ~ dbern(psi)
lp[i] ~ dnorm(mup,taup)
logit(p[i]) <- lp[i]
y[i] ~ dbin(mu[i],K)
} # i
N <- sum(z[1:(nind+nz)])
D <- N/sup*100
} # modelo
",fill = TRUE)
sink()
# Inicial values
inits <- function(){list(z = as.numeric(y >= 1), psi = 0.6, p0 = runif(1), sigmap = runif(1, 0.7, 1.2), lp = rnorm(M, -0.2))}
params1 <- c("p0","sigmap","psi","N","D")
# MCMC
ni <- 10000; nt <- 1; nb <- 1000; nc <- 3
# JAGS and posteriors
fM2 <- jags(data1, inits, params1, "Mh2_jags.txt", n.chains = nc, n.thin = nt, n.iter = ni, n.burnin = nb)
I received this error message
Processing function input.......
Done.
Compiling model graph
Resolving undeclared variables
Deleting model
Error in jags.model(file = model.file, data = data, inits = inits, n.chains = n.chains, :
RUNTIME ERROR:
Compilation error on line 16.
Dimension mismatch in subset expression of y
I have read that some letters as s and n have to be changed. However,
I do not know what to do. Please if you could give an advice.
Thank you very much
The issue is because y is two dimensional but the model assumes it is one dimensional. If you are assuming that the secondary surveys are i.i.d. Bernoulli trials (and each session had K trials)n then you would just need to take the sum of the rows of the y matrix. Assuming this is the case then you just need to modify a couple lines at the top of this script.
nind <- dim(venados)[1]
K <- 43
ntraps <- 13
M <- 150
nz <- M - nind
Yaug <- array(0, dim = c(M, ntraps, K))
Yaug[1:nind,,] <- venados
y <- apply(Yaug, c(1,3), sum)
y[y > 1] <- 1
# Take the rowSum
y_vector <- rowSums(y)
# Use y_vector instead of y
data1 <- list(y = y_vector, nz = nz, nind = nind, K = K, sup = Buffer)
Conversely, if you wanted to include covariates for the observational process (and those covariates vary by survey) you would use the matrix y and modify the model.
sink("Mh2_jags_Kloop.txt")
cat("
model{
# Priors
p0 ~ dunif(0,1)
mup <- log(p0/(1-p0))
sigmap ~ dunif(0,10)
taup <- 1/(sigmap*sigmap)
psi ~ dunif(0,1)
# Likelihood
for (i in 1:(nind+nz)) {
z[i] ~ dbern(psi)
lp[i] ~ dnorm(mup,taup)
logit(p[i]) <- lp[i]
# Loop over K surveys
for(j in 1:K){
y[i,j] ~ dbern(p[i]*z[i])
}
} # i
N <- sum(z[1:(nind+nz)])
D <- N/sup*100
} # modelo
",fill = TRUE)
sink()
Finally, you don't specify what mu is within the model. I think you want it to be p, but you also need to link the latent state model to the observational state model (if z=0 then that individual cannot be sampled. In this case you would interpret psi as the probability that nind+nz individuals are at your site.
# Model JAGS
sink("Mh2_jags.txt")
cat("
model{
# Priors
p0 ~ dunif(0,1)
mup <- log(p0/(1-p0))
sigmap ~ dunif(0,10)
taup <- 1/(sigmap*sigmap)
psi ~ dunif(0,1)
# Likelihood
for (i in 1:(nind+nz)) {
z[i] ~ dbern(psi)
lp[i] ~ dnorm(mup,taup)
logit(p[i]) <- lp[i]
y[i] ~ dbin(p[i] * z[i],K)
} # i
N <- sum(z[1:(nind+nz)])
D <- N/sup*100
} # modelo
",fill = TRUE)
sink()

Issue with calculating marginal effects for an ordered logit model in R with ocME

I am attempting to estimate an ordered logit model incl. the marginal effects in R through following the code from this tutorial. I am using polr from the MASS package to estimate the model and ocME from the erer package to attempt to calculate the marginal effects.
Estimating the model is no problem.
logitModelSentiment90 <- polr(availability_90_ord ~ mean_sentiment, data = data, Hess = T,
method = "logistic")
However, I run into an issue with ocME which generates the error message below:
ocME(logitModelSentiment90)
Error in eval(predvars, data, env) :
numeric 'envir' arg not of length one
The documentation below for ocME states that the object that should be used needs to come from the polr function which seems to be exactly what I am doing.
ocME(w, rev.dum = TRUE, digits = 3)
w = an ordered probit or logit model object estimated by polr from the MASS library.
So can anybody help me to understand what I am doing wrong? I have published a subset of my data with the two variables for the model here. In R I have the DV set up as a factor variable, the IV is continuous.
Side note:
I can pass the calculation to Stata from R with RStata to calculate the marginal effects without any problems. But I don't want to have to do this on a regular basis so I want to understand what is causing the issue with R and ocME.
stata("ologit availability_90_ord mean_sentiment
mfx", data.in = data)
. ologit availability_90_ord mean_sentiment
Iteration 0: log likelihood = -15379.121
Iteration 1: log likelihood = -15378.742
Iteration 2: log likelihood = -15378.742
Ordered logistic regression Number of obs = 11,901
LR chi2(1) = 0.76
Prob > chi2 = 0.3835
Log likelihood = -15378.742 Pseudo R2 = 0.0000
------------------------------------------------------------------------------
avail~90_ord | Coef. Std. Err. z P>|z| [95% Conf. Interval]
-------------+----------------------------------------------------------------
mean_senti~t | .0044728 .0051353 0.87 0.384 -.0055922 .0145379
-------------+----------------------------------------------------------------
/cut1 | -1.14947 .0441059 -1.235916 -1.063024
/cut2 | -.5286239 .042808 -.6125261 -.4447217
/cut3 | .3127556 .0426782 .2291079 .3964034
------------------------------------------------------------------------------
. mfx
Marginal effects after ologit
y = Pr(availability_90_ord==1) (predict)
= .23446398
------------------------------------------------------------------------------
variable | dy/dx Std. Err. z P>|z| [ 95% C.I. ] X
---------+--------------------------------------------------------------------
mean_s~t | -.0008028 .00092 -0.87 0.384 -.002609 .001004 7.55768
------------------------------------------------------------------------------
Your model has only one explanatory variable (mean_sentiment) and this seems to be a problem for ocME. Try for example to add a second variable to the model:
logitModelSentiment90 <- polr(availability_90_ord ~ mean_sentiment + I(mean_sentiment^2),
data = data, Hess = T, method = "logistic")
ocME(logitModelSentiment90)
# effect.0 effect.1 effect.2 effect.3
# mean_sentiment -0.004 -0.001 0 0.006
# I(mean_sentiment^2) 0.000 0.000 0 0.000
With minor modifications ocME can correctly run also with one independent variable.
Try the following myocME function
myocME <- function (w, rev.dum = TRUE, digits = 3)
{
if (!inherits(w, "polr")) {
stop("Need an ordered choice model from 'polr()'.\n")
}
if (w$method != "probit" & w$method != "logistic") {
stop("Need a probit or logit model.\n")
}
lev <- w$lev
J <- length(lev)
x.name <- attr(x = w$terms, which = "term.labels")
x2 <- w$model[, x.name, drop=FALSE]
ww <- paste("~ 1", paste("+", x.name, collapse = " "), collapse = " ")
x <- model.matrix(as.formula(ww), data = x2)[, -1, drop=FALSE]
x.bar <- as.matrix(colMeans(x))
b.est <- as.matrix(coef(w))
K <- nrow(b.est)
xb <- t(x.bar) %*% b.est
z <- c(-10^6, w$zeta, 10^6)
pfun <- switch(w$method, probit = pnorm, logistic = plogis)
dfun <- switch(w$method, probit = dnorm, logistic = dlogis)
V2 <- vcov(w)
V3 <- rbind(cbind(V2, 0, 0), 0, 0)
ind <- c(1:K, nrow(V3) - 1, (K + 1):(K + J - 1), nrow(V3))
V4 <- V3[ind, ]
V5 <- V4[, ind]
f.xb <- dfun(z[1:J] - c(xb)) - dfun(z[2:(J + 1)] - c(xb))
me <- b.est %*% matrix(data = f.xb, nrow = 1)
colnames(me) <- paste("effect", lev, sep = ".")
se <- matrix(0, nrow = K, ncol = J)
for (j in 1:J) {
u1 <- c(z[j] - xb)
u2 <- c(z[j + 1] - xb)
if (w$method == "probit") {
s1 <- -u1
s2 <- -u2
}
else {
s1 <- 1 - 2 * pfun(u1)
s2 <- 1 - 2 * pfun(u2)
}
d1 <- dfun(u1) * (diag(1, K, K) - s1 * (b.est %*% t(x.bar)))
d2 <- -1 * dfun(u2) * (diag(1, K, K) - s2 * (b.est %*%
t(x.bar)))
q1 <- dfun(u1) * s1 * b.est
q2 <- -1 * dfun(u2) * s2 * b.est
dr <- cbind(d1 + d2, q1, q2)
V <- V5[c(1:K, K + j, K + j + 1), c(1:K, K + j, K + j +
1)]
cova <- dr %*% V %*% t(dr)
se[, j] <- sqrt(diag(cova))
}
colnames(se) <- paste("SE", lev, sep = ".")
rownames(se) <- colnames(x)
if (rev.dum) {
for (k in 1:K) {
if (identical(sort(unique(x[, k])), c(0, 1))) {
for (j in 1:J) {
x.d1 <- x.bar
x.d1[k, 1] <- 1
x.d0 <- x.bar
x.d0[k, 1] <- 0
ua1 <- z[j] - t(x.d1) %*% b.est
ub1 <- z[j + 1] - t(x.d1) %*% b.est
ua0 <- z[j] - t(x.d0) %*% b.est
ub0 <- z[j + 1] - t(x.d0) %*% b.est
me[k, j] <- pfun(ub1) - pfun(ua1) - (pfun(ub0) -
pfun(ua0))
d1 <- (dfun(ua1) - dfun(ub1)) %*% t(x.d1) -
(dfun(ua0) - dfun(ub0)) %*% t(x.d0)
q1 <- -dfun(ua1) + dfun(ua0)
q2 <- dfun(ub1) - dfun(ub0)
dr <- cbind(d1, q1, q2)
V <- V5[c(1:K, K + j, K + j + 1), c(1:K, K +
j, K + j + 1)]
se[k, j] <- sqrt(c(dr %*% V %*% t(dr)))
}
}
}
}
t.value <- me/se
p.value <- 2 * (1 - pt(abs(t.value), w$df.residual))
out <- list()
for (j in 1:J) {
out[[j]] <- round(cbind(effect = me[, j], error = se[,
j], t.value = t.value[, j], p.value = p.value[, j]),
digits)
}
out[[J + 1]] <- round(me, digits)
names(out) <- paste("ME", c(lev, "all"), sep = ".")
result <- listn(w, out)
class(result) <- "ocME"
return(result)
}
and run the following code:
logitModelSentiment90 <- polr(availability_90_ord ~ mean_sentiment,
data = data, Hess = T, method = "logistic")
myocME(logitModelSentiment90)
# effect.0 effect.1 effect.2 effect.3
# mean_sentiment -0.001 0 0 0.001

Resources