eta squared - kruskal wallis in R - different results - r

Tomczak and Tomczak's (2014) formula to calculate the eta squared for the Kruskal-Wallis H-test using the following code:
x <- Data$text
H <- unname(kruskal.test(x ~ Data$group)$statistic)
n <- sum(table(x, Data$group))
k <- unname(res$parameter)+1
eta_squared <- (H-k+1)/(n - k)
print(eta_squared)
For reproducibility purposes here is the data:
x <- c(2,2,3,3,3,3,3,4,5,6,6,6,7,7,8,8,9,10,11,11,13,9,10,11,12,19,19,23,26,30,8,14,16,24,26,43,46)
group1 <- rep("group1", 21)
group2 <- rep("group2", 9)
group3 <- rep("group3", 7)
df <- data.frame(group = c(group1, group2, group3), result = c(x))
However, when comparing the findings with the results from the package rstatix, it sometimes gives different results so I am not sure which one I should report. I looked at the source code and I cannot tell what might be the difference. What is the source of the difference?
library(rstatix)
kruskal_effsize(
Data,
x ~ group,
ci = FALSE,
conf.level = 0.95,
ci.type = "perc",
nboot = 1000
)

I'm not getting your results. First revising your initial code to use df:
res <- kruskal.test(result~group, df)
H <- unname(res$statistic)
n <- sum(table(df$result, df$group))
k <- unname(res$parameter)+1
(eta_squared <- (H-k+1)/(n - k))
# [1] 0.5812849
Now the other computation:
kruskal_effsize(df, x ~ group, ci = FALSE, conf.level = 0.95,
ci.type = "perc", nboot = 1000)
# A tibble: 1 x 5
# .y. n effsize method magnitude
# * <chr> <int> <dbl> <chr> <ord>
# 1 x 37 0.581 eta2[H] large

Related

Bayesian in R: Dimension mismatch in values supplied for betaA

I am working with a matrix that is 35 rows and 16 columns. I am trying to run a Bayesian Multistate Model but something in my model code prevents it from working. When I run the code in R, I get the error message:
`
Error in checkForRemoteErrors(val) :
3 nodes produced errors; first error: RUNTIME ERROR:
Dimension mismatch in values supplied for betaA
`
Any help is appreciated and my code is below:
# psi = movement probability
# phi = apparent survival
# p = detection probability
# o = occurrence probability
# load libraries
library(jagsUI)
library(lattice)
library(coda)
library("R2WinBUGS")
library("R2jags")
library(zoo)
devtools::install_github("bstaton1/postpack")`
# initializing functions####
known.state.ms <- function(ms, notseen){
#notseen: label for 'not seen'
state <- ms
state[state==notseen] <- NA
for (i in 1:dim(ms)[1]){
m <- min(which(!is.na(state[i,])))
state[i,m] <- NA
}
return(state)
`}`
#i = 1
#ch = CHY[i,]
#first = f[i]`
z_inits = function(ch, first) {
nt = length(ch)
to_fill = which(ch == 4 & 1:nt >= first)
to_keep = which(ch != 4 & 1:nt >= first)
known = ch; known[to_fill] = NA
unknown = rep(NA, nt)
known_alive = rep(NA, nt)
unknown[to_fill] = 2
for (t in 1:nt) {
known_alive[t] = ifelse(any(!is.na(known[t:nt])), 1, 0)
}
last_known_alive = max(which(known_alive == 1))
if (last_known_alive < 16) {
dead = rep(0, nt)
for (t in (last_known_alive + 1):nt) {
dead[t] = sample(c(0,1), size = 1, prob = matrix(c(0.9, 0.1, 0, 1), 2,
2, byrow = T)[dead[t-1] + 1,])
}
unknown[dead == 1] = 4
}
unknown
}
`
# import data
dat <- read.csv("bass_encounter_history_0.csv")
covs <- read.csv("depth.csv")
depth = covs[,1]
histories <- unlist(lapply(dat$history, function(x) strsplit(x,split="")))
CH <- t(matrix(histories,nrow=16,ncol=35))
CH <- gsub("0",4,CH)
CH <- gsub("A",1,CH)
CH <- gsub("B",2,CH)
CH <- gsub("C",3,CH)
CH <- matrix(as.numeric(CH),nrow=35,ncol=16)
# Built the model####
nind= nrow(CH)
n.occasions = ncol(CH)
f=c(1,1,1,2,2,2,2,2,3,3,3,4,4,4,4,4,4,1,1,1,3,1,1,1,3,3,3,3,5,3,5,6,6,6,6) # initial tagging week
jags_model = function() {
# -------------------------------------------------
# Parameters:
# phiA: survival probability at site A
# phiB: survival probability at site B
# phiC: survival probability at site C
# psiA[1,t]: probability of staying in site A
# psiA[2,t]: movement probability from site A to site B
# psiB[1,t]: movement probability from site B to site A
# psiB[2,t]: probability of staying in site B
# psiB[3,t]: movement probability from site B to site C
# psiC[1,t]: probability of staying in site C
# psiC[2,t]: movement probability from site C to site B
# betaA[i]: the effect of standardized flow on movement probabilities at site A
# betaB[i]: the effect of standardized flow on movement probabilities at site B
# betaC[i]: the effect of standardized flow on movement probabilities at site C
# wA,B,C: the variable weight of the betas, 1 = essential, 0 = insignificant
# pA: recapture probability at site A
# pB: recapture probability at site B
# pC: recapture probability at site C
# -------------------------------------------------
# States (S):
# 1 alive at A
# 2 alive at B
# 3 alive at C
# 4 dead
# Observations (O):
# 1 seen at A
# 2 seen at B
# 3 seen at C
# 4 not seen
# -------------------------------------------------
# Priors and constraints
# Survival and recapture: uniform
phiA ~ dunif(0, 1)
phiB ~ dunif(0, 1)
phiC ~ dunif(0, 1)
pA ~ dunif(0, 1)
pB ~ dunif(0, 1)
pC ~ dunif(0, 1)
wA ~ dbern(.5)
for(i in 1:3){
wB[i] ~ dbern(.5)
}
wC ~ dbern(.5)
for(t in 1:(n.occasions-1)){
logit(psiA[1,t]) <- muA + wA*betaA*x[t]
psiA[2,t] <- 1 - psiA[1,t]
logit(psiC[1,t]) <- muC + wC*betaC*x[t]
psiC[2,t] <- 1 - psiC[1,t]
for(i in 1:3){
b[i,t] <- exp(muB[i] + wB[i]*betaB[i]*x[t])
psiB[i,t] <- b[i,t]/sum(b[,t])
}
}
muA ~ dt(0, 1/1.566^2, 7.763)
muC ~ dt(0, 1/1.566^2, 7.763)
mean.psiA <- 1/(1+exp(-muA))
#it's not really the mean - it's the probability of staying in A at mean value of x (only b/c x is z- transformed)
mean.psiC <- 1/(1+exp(-muC))
betaA ~ dt(0, 1/1.566^2, 7.763)
betaC ~ dt(0, 1/1.566^2, 7.763)
for(i in 1:2){
muB[i] ~ dt(0, 1/1.566^2, 7.763)
betaB[i] ~ dt(0, 1/1.566^2, 7.763)
}
muB[3] <- 0
betaB[3] <- 0
# PREDICTED TRANSITION PROBS FOR PLOTTING
for(r in 1:n.depth){
for(i in 1:3){
pred.b[i,r] <- exp(muB[i] + wB[i]*betaB[i]*depthseq[r])
pred.psiB[i,r] <- pred.b[i,r]/sum(pred.b[,r])
}
logit(pred.psiA[1,r]) <- muA + wA*betaA*depthseq[r]
pred.psiA[2,r] <- 1 - pred.psiA[1,r]
logit(pred.psiC[1,r]) <- muC + wC*betaC*depthseq[r]
pred.psiC[2,r] <- 1 - pred.psiC[1,r]
}
# Define probabilities of state S(t+1) given S(t)
for (t in 1:(n.occasions-1)){
ps[1,t,1] <- phiA * psiA[1,t]
ps[1,t,2] <- phiA * psiA[2,t]
ps[1,t,3] <- 0
ps[1,t,4] <- 1-phiA
ps[2,t,1] <- phiB * psiB[1,t]
ps[2,t,2] <- phiB * psiB[2,t]
ps[2,t,3] <- phiB * psiB[3,t]
ps[2,t,4] <- 1-phiB
ps[3,t,1] <- 0
ps[3,t,2] <- phiC * psiC[2,t]
ps[3,t,3] <- phiC * psiC[1,t] # switch these so the coefs talk about prob(stay in C)
ps[3,t,4] <- 1-phiC
ps[4,t,1] <- 0
ps[4,t,2] <- 0
ps[4,t,3] <- 0
ps[4,t,4] <- 1
# Define probabilities of O(t) given S(t)
po[1,t,1] <- pA
po[1,t,2] <- 0
po[1,t,3] <- 0
po[1,t,4] <- 1-pA
po[2,t,1] <- 0
po[2,t,2] <- pB
po[2,t,3] <- 0
po[2,t,4] <- 1-pB
po[3,t,1] <- 0
po[3,t,2] <- 0
po[3,t,3] <- pC
po[3,t,4] <- 1-pC
po[4,t,1] <- 0
po[4,t,2] <- 0
po[4,t,3] <- 0
po[4,t,4] <- 1
} #t
# Likelihood
for (i in 1:nind){
# Define latent state at first capture
z[i,f[i]] <- y[i,f[i]]
for (t in (f[i]+1):n.occasions){
# State process: draw S(t) given S(t-1)
z[i,t] ~ dcat(ps[z[i,t-1], t-1,])
# Observation process: draw O(t) given S(t)
y[i,t] ~ dcat(po[z[i,t], t-1,])
} #t
} #i
}
jags_file = "invasiondepthmodel.txt"
postpack::write_model(jags_model, jags_file)
# Configure the model settings and initial values ####
depthseq = seq(min(depth),max(depth),length.out=100)
n.depth=length(depthseq)
#compile jags data object
jags_data <- list(y = CH, x= depth, depthseq=depthseq, n.depth=n.depth, f = f,
n.occasions = n.occasions, nind = nind, z = known.state.ms(CH, 4))
#specify initial values
jags_inits <- function(i){list(
muA = runif(1,-1,1),
muB = c(runif(2,-1,1),NA),
muC = runif(1,-1,1),
wA= rbinom(3, 1, 0.5),
wB= rbinom(3, 1, 0.5),
wC= rbinom(3, 1, 0.5),
betaA = runif(2,-1,1),
betaB = c(runif(2,-1,1),NA),
betaC = runif(2,-1,1),
phiA = runif(1, 0.5, 1),
phiB = runif(1, 0.5, 1),
phiC = runif(1, 0.5, 1),
pA = runif(1, 0.5, 1),
pB = runif(1, 0.5, 1),
pC = runif(1, 0.5, 1),
z = t(sapply(1:nind, function(i) z_inits(CH[i,], f[i])))
)
}
# Parameters monitored
jags_params <- c("phiA","phiB","phiC",
"psiA","psiB","psiC",
"wA","wB","wC",
"muA","muB","muC",
"betaA","betaB","betaC",
"pA","pB","pC",
"pred.psiA","pred.psiB","pred.psiC")
jags_dims = c(
na = 10000, # number of samples in adapting phase
ni = 40000, # number of post-burn-in samples per chain
nb = 40000, # number of burn-in samples
nt = 20, # thinning rate
nc = 3, # number of chains,
parallel = T # run chains in parallel?
); with(as.list(jags_dims), ni/nt * nc)
inits = lapply(1:jags_dims["nc"], jags_inits)
# Run the model in JAGS #####
starttime = Sys.time()
cat("MCMC Started: ", format(starttime), "\n")
post = jagsUI::jags.basic(
data = jags_data,
model.file = jags_file,
inits = inits,
parameters.to.save = jags_params,
n.adapt = jags_dims["na"],
n.iter = sum(jags_dims[c("ni", "nb")]),
n.thin = jags_dims["nt"],
n.burnin = jags_dims["nb"],
n.chains = jags_dims["nc"],
parallel = jags_dims["parallel"],
verbose = F
)
I was expecting the dimensions between the matrix and betaA values to match up. However, it seems like they are not.
In the model, betaA is a scalar. In the model code, you have betaA*x[t] and in the prior: betaA ~ dt(0, 1/1.566^2, 7.763) both indicating a single value. However, in the initial values, it is a vector of length 2: betaA = runif(2,-1,1). You either need to define it as a vector in the model or pass a single value in the inits.

How can I use try catch for nls function in R

I am doing a regression for a Quadric Linear function. I got two option is to use either nlsLM and nls2. However, for some dataset, the use of nlsLM casing some problem such as: singular gradient matrix at initial parameter estimates or they ran in to an infinitie loop. I want to use the try catch to deal with this issue. Can anyone help me out? Thanks everyone in advance.
Here is the full code:
# Packages needed for estimaton of Ideal trajectory - nonlinear regression
#-------------------------------------------------------------------------------
library("minpack.lm")
library("nlstools")
library("nlsMicrobio")
library("stats")
library("tseries") #runs test for auto correlation
#Use NLS2
library(proto)
library(nls2)
################################################################
# Set working directory
setwd("C:/Users/Kevin Le/PycharmProjects/Pig Data Black Box - Copy")
#load dataset
load("Data/JRPData_TTC.Rdata") #load dataset created in MissingData.step
ID <- 5470
#Create a new dataframe which will store Data after ITC estimation
#Dataframe contains ITC parameters
ITC.param.pos2 <- data.frame(ANIMAL_ID=factor(),
X0=double(),
Y1=double(),
Y2=double(),
Ylast=double(),
a=double(),
b=double(),
c=double(),
d=double(),
stringsAsFactors=FALSE)
#Dataframe contains data points on the ITC
Data.remain <- data.frame(ANIMAL_ID=character(),
Age=double(),
obs.CFI=double(),
tt=double(),
ttt=double(),
stringsAsFactors=FALSE)
#===============================================================
# For loop for automatically estimating ITC of all pigs
#===============================================================
IDC <- seq_along(ID) # 17, 23, 52, 57, 116
for (idc in IDC){
# idc = 1
i <- ID[idc]
Data <- No.NA.Data.1[No.NA.Data.1$ANIMAL_ID == i,]
idc1 <- unique(as.numeric(Data$idc.1))
####### Create data frame of x (Age) and y (CFI) ########
x <- as.numeric(Data$Age.plot)
Y <- as.numeric(Data$CFI.plot)
Z <- as.numeric(Data$DFI.plot)
Data.xy <- as.data.frame(cbind(x,Y))
#Initial parameteres for parameter estimation
X0.0 <- x[1]
Xlast <- x[length(x)]
##################################################################
# 1. reparametrization CFI at X0 = 0
#function used for reparametrization in MAPLE
# solve({
# 0=a+b*X_0+c*X_0**2,
# DFIs=b+2*c*Xs,CFIs=a+b*Xs+c*Xs**2},
# {a,b,c});
# a = -X0*(2*CFIs*Xs-CFIs*X0-Xs^2*DFIs+Xs*DFIs*X0)/(Xs^2-2*X0*Xs+X0^2)
# b = (-Xs^2*DFIs+DFIs*X0^2+2*CFIs*Xs)/(Xs^2-2*X0*Xs+X0^2)
# c = -(CFIs-Xs*DFIs+X0*DFIs)/(Xs^2-2*X0*Xs+X0^2)
# 2. with the source of the function abcd and pred
##################################################################
#Provide set of initial parameters
Xs.1 <- round(seq(X0.0 + 1, Xlast - 1, len = 30), digits = 0)
X0.1 <- rep(X0.0, length(Xs.1))
DFIs.1 <- NULL
CFIs.1 <- NULL
for(A in seq_along(Xs.1)){
DFIs2 <- Data[Data$Age.plot == Xs.1[A],]$DFI.plot
CFIs2 <- Data[Data$Age.plot == Xs.1[A],]$CFI.plot
DFIs.1 <- c(DFIs.1, DFIs2)
CFIs.1 <- c(CFIs.1, CFIs2)
}
st1 <- data.frame(cbind(X0.1, Xs.1, DFIs.1, CFIs.1))
names(st1) <- c("X0","Xs", "DFIs","CFIs")
#RUN NLS2 to find optimal initial parameters
st2 <- nls2(Y ~ nls.func.2(X0, Xs, DFIs, CFIs),
Data.xy,
start = st1,
# weights = weight,
# trace = T,
algorithm = "brute-force")
par_init <- coef(st2); par_init
#--------------------------------------------
# Create empty lists to store data after loop
#--------------------------------------------
par <- list()
AC.res <- list()
AC.pvalue <- NULL
data2 <- list()
data3 <- list()
param <- data.frame(rbind(par_init))
par.abcd <- data.frame(rbind(abcd.2(as.vector(par_init))))
param.2 <- data.frame(X0=double(),
Xs=double(),
DFIs=double(),
CFIs=double(),
a=double(),
b=double(),
c=double(),
stringsAsFactors=FALSE)
j <- 2
AC_pvalue <- 0
AC.pvalue[1] <- AC_pvalue
datapointsleft <- as.numeric(dim(Data)[1])
dpl <- datapointsleft #vector of all dataponitsleft at each step
#-------------------------------------------------------------------------------
# Start the procedure of Non Linear Regression
#-------------------------------------------------------------------------------
while ((AC_pvalue<=0.05) && datapointsleft >= 20){
weight <- 1/Y^2
# ---------------- NON linear reg applied to log(Y) ---------------------------------
st2 <- nls2(Y ~ nls.func.2(X0, Xs, DFIs, CFIs),
Data.xy,
start = st1,
weights = weight,
trace = F,
algorithm = "brute-force")
par_init <- coef(st2)
par_init
# st1 <- st1[!(st1$Xs == par_init[2]),]
nls.CFI <- nlsLM(Y ~ nls.func.2(X0, Xs, DFIs, CFIs),
Data.xy,
control = list(tol = 1e-2, printEval = TRUE, maxiter = 1024),
start = list(X0 = par_init[1], Xs = par_init[2],
DFIs = par_init[3], CFIs = par_init[4]),
weights = weight,
algorithm = "port",
lower = c(-10000,X0.0+1, -10000, -10000),
upper = c(10000, Xlast-1, 10000, 10000),
trace = F)
# nls.CFI <- nls2(Y ~ nls.func.2(X0, Xs, DFIs, CFIs),
# Data.xy,
# start = list(X0 = par_init[1], Xs = par_init[2],
# DFIs = par_init[3], CFIs = par_init[4]),
# weights = weight,
# control = nls.control(warnOnly = TRUE),
# trace = T,
# algorithm = "port",
# lower = c(-100000000,X0.0+1, -1000000000, -1000000000),
# upper = c(1000000000, Xlast-1, 1000000000, 1000000000))
# nls.CFI <- nlsLM(Y ~ nls.func.2(X0, Xs, DFIs, CFIs),
# Data.xy,
# control = nls.control(warnOnly = TRUE),
# start = list(X0 = par_init[1], Xs = par_init[2],
# DFIs = par_init[3], CFIs = par_init[4]),
# weights = weight,
# algorithm = "port",
# lower = c(-1000000000,X0.0+1, -1000000000, -1000000000),
# upper = c(1000000000, Xlast-1, 1000000000, 1000000000),
# trace = F)
#--------RESULTS analysis GOODNESS of fit
#estimate params
par[[j]] <- coef(nls.CFI)
par.abcd[j,] <- abcd.2(as.vector(coef(nls.CFI) )) #calculation of a, b, c and d
param[j,] <- par[[j]]
param.2[j-1,] <- cbind(param[j,], par.abcd[j,])
#summary
# summ = overview((nls.CFI)) #summary
#residuals
res1 <- nlsResiduals(nls.CFI) #residuals
res2 <- nlsResiduals(nls.CFI)$resi1
res <- res2[, 2]
AC.res <- test.nlsResiduals(res1)
AC.pvalue[j] <- AC.res$p.value
#---------Check for negative residuals----------
#Add filtration step order to data
Step <- rep(j - 1, length(x))
#create a new dataset with predicted CFI included
Data.new <- data.frame(cbind(x, Z, Y, pred.func.2(par[[j]],x)[[1]], res, Step))
names(Data.new) <- c("Age", "Observed_DFI","Observed_CFI", "Predicted_CFI", "Residual", "Step")
# plot(Data.new$Age, Data.new$Predicted_CFI, type = "l", col = "black",lwd = 2,
# ylim = c(0, max(Data.new$Predicted_CFI, Data.new$Observed_CFI)))
# lines(Data.new$Age, Data.new$Observed_CFI, type = "p", cex = 1.5)
#
#remove negative res
Data.pos <- Data.new[!Data.new$Residual<0,]
# lines(Data.pos$Age, Data.pos$Predicted_CFI, type = "l", col = j-1, lwd = 2)
# lines(Data.pos$Age, Data.pos$Observed_CFI, type = "p", col = j, cex = 1.5)
#restart
#Criteria to stop the loop when the estimated parameters are equal to initial parameters
# Crite <- sum(param.2[dim(param.2)[1],c(1:4)] == par_init)
datapointsleft <- as.numeric(dim(Data.pos)[1])
par_init <- par[[j]]
AC_pvalue <- AC.pvalue[j]
j <- j+1
x <- Data.pos$Age
Y <- Data.pos$Observed_CFI
Z <- Data.pos$Observed_DFI
Data.xy <- as.data.frame(cbind(x,Y))
dpl <- c(dpl, datapointsleft)
dpl
#Create again the grid
X0.0 <- x[1]
Xlast <- x[length(x)]
#Xs
if(par_init[2] -15 <= X0.0){
Xs.1 <- round(seq(X0.0 + 5, Xlast - 5, len = 30), digits = 0)
} else if(par_init[2] + 5 >= Xlast){
Xs.1 <- round(seq(par_init[2]-10, par_init[2]-1, len = 6), digits = 0)
} else{
Xs.1 <- round(seq(par_init[2]-5, par_init[2] + 5, len = 6), digits = 0)
}
#
X0.1 <- rep(X0.0, length(Xs.1))
DFIs.1 <- NULL
CFIs.1 <- NULL
for(A in seq_along(Xs.1)){
DFIs2 <- Data[Data$Age.plot == Xs.1[A],]$DFI.plot
CFIs2 <- Data[Data$Age.plot == Xs.1[A],]$CFI.plot
DFIs.1 <- c(DFIs.1, DFIs2)
CFIs.1 <- c(CFIs.1, CFIs2)
}
st1 <- data.frame(cbind(X0.1, Xs.1, DFIs.1, CFIs.1))
if(X0.0 <= par_init[2] && Xlast >=par_init[2]){
st1 <- rbind(st1, par_init)
}
names(st1) <- c("X0","Xs", "DFIs","CFIs")
}
} # end FOR loop
Here is the data file. I have exported my data into the .Rdata for an easier import.: https://drive.google.com/file/d/1GVMarNKWMEyz-noSp1dhzKQNtu2uPS3R/view?usp=sharing
In this file, the set id: 5470 will have this error: singular gradient matrix at initial parameter estimates in this part:
nls.CFI <- nlsLM(Y ~ nls.func.2(X0, Xs, DFIs, CFIs),
Data.xy,
control = list(tol = 1e-2, printEval = TRUE, maxiter = 1024),
start = list(X0 = par_init[1], Xs = par_init[2],
DFIs = par_init[3], CFIs = par_init[4]),
weights = weight,
algorithm = "port",
lower = c(-10000,X0.0+1, -10000, -10000),
upper = c(10000, Xlast-1, 10000, 10000),
trace = F)
The complementary functions (file Function.R):
abcd.2 <- function(P){
X0 <- P[1]
Xs <- P[2]
DFIs <- P[3]
CFIs <- P[4]
a <- -X0*(2*CFIs*Xs-CFIs*X0-Xs^2*DFIs+Xs*DFIs*X0)/(Xs^2-2*X0*Xs+X0^2)
b <- (-Xs^2*DFIs+DFIs*X0^2+2*CFIs*Xs)/(Xs^2-2*X0*Xs+X0^2)
c <- -(CFIs-Xs*DFIs+X0*DFIs)/(Xs^2-2*X0*Xs+X0^2)
pp <- as.vector(c(a, b, c))
return(pp)
}
#--------------------------------------------------------------
# NLS function
#--------------------------------------------------------------
nls.func.2 <- function(X0, Xs, DFIs, CFIs){
pp <- c(X0, Xs, DFIs, CFIs)
#calculation of a, b and c using these new parameters
c <- abcd.2(pp)[3]
b <- abcd.2(pp)[2]
a <- abcd.2(pp)[1]
ind1 <- as.numeric(x < Xs)
return (ind1*(a+b*x+c*x^2)+(1-ind1)*((a+b*(Xs)+c*(Xs)^2)+(b+2*c*(Xs))*(x-(Xs))))
}
#--------------------------------------------------------------
# Fit new parameters to a quadratic-linear function of CFI
#--------------------------------------------------------------
pred.func.2 <- function(pr,age){
#
X0 <- pr[1]
Xs <- pr[2]
DFIs <- pr[3]
CFIs <- pr[4]
#
x <- age
#calculation of a, b and c using these new parameters
c <- abcd.2(pr)[3]
b <- abcd.2(pr)[2]
a <- abcd.2(pr)[1]
#
ind1 <- as.numeric(x < Xs)
#
results <- list()
cfi <- ind1*(a+b*x+c*x^2)+(1-ind1)*((a+b*(Xs)+c*(Xs)^2)+(b+2*c*(Xs))*(x-(Xs))) #CFI
dfi <- ind1*(b+2*c*x) + (1 - ind1)*(b+2*c*(Xs)) #DFI
results[[1]] <- cfi
results[[2]] <- dfi
return (results)
}
#---------------------------------------------------------------------------------------------------------------
# Quadratic-linear function of CFI curve and its 1st derivative (DFI) with original parameters (only a, b and c)
#---------------------------------------------------------------------------------------------------------------
pred.abcd.2 <- function(pr,age){
#
a <- pr[1]
b <- pr[2]
c <- pr[3]
x <- age
#calculation of a, b and c using these new parameters
#
ind1 <- as.numeric(x < Xs)
#
results <- list()
cfi <- ind1*(a+b*x+c*x^2)+(1-ind1)*((a+b*(Xs)+c*(Xs)^2)+(b+2*c*(Xs))*(x-(Xs))) #CFI
dfi <- ind1*(b+2*c*x) + (1 - ind1)*(b+2*c*(Xs)) #DFI
results[[1]] <- cfi
results[[2]] <- dfi
return (results)
}
Updated: I did review my logic from the previous step and found that my data is a bit messed up because of it. I have fixed it. The case where a set f data ran into an infinite loop has no longer exists, but this error is still there however: singular gradient matrix at initial parameter estimates.

Monte Carlo simulations for VAR models

I've been trying to estimate VAR models using Monte Carlo Simulation. I have 3 endogenous variables. I need some guidance regarding this.
First of all, I want to add an outlier as a percentage of the sample size.
Second (second simulation for same model), I want to add multivariate contaminated normal distribution like 0.9N (0, I) + 0.1((0,0,0)',(100, 100, 100)) instead of outlier.
Could you tell me how to do these?
Thank you.
RR <- function(n, out){
# n is number of observations
k <- 3 # Number of endogenous variables
p <- 2 # Number of lags
# add outlier
n[1]<- n[1]+out
# Generate coefficient matrices
B1 <- matrix(c(.1, .3, .4, .1, -.2, -.3, .03, .1, .1), k) # Coefficient matrix of lag 1
B2 <- matrix(c(0, .2, .1, .07, -.4, -.1, .5, 0, -.1), k) # Coefficient matrix of lag 2
M <- cbind(B1, B2) # Companion form of the coefficient matrices
# Generate series
DT <- matrix(0, k, n + 2*p) # Raw series with zeros
for (i in (p + 1):(n + 2*p)){ # Generate series with e ~ N(0,1)
DT[, i] <- B1%*%DT[, i-1] + B2%*%DT[, i-2] + rnorm(k, 0, 1)
}
DT <- ts(t(DT[, -(1:p)])) # Convert to time series format
#names <- c("V1", "V2", "V3") # Rename variables
colnames(DT) <- c("Y1", "Y2", "Y3")
#plot.ts(DT) # Plot the series
# estimate VECM
vecm1 <- VECM(DT, lag = 2, r = 2, include = "const", estim ="ML")
vecm2 <- VECM(DT, lag = 2, r = 1, include = "const", estim ="ML")
# mse
mse1 <- mean(vecm1$residuals^2)
mse2 <- mean(vecm2$residuals^2)
#param_list <- unname(param_list)
return(list("mse1" = mse1, "mse2" = mse2, "mse3" = mse3))
}
# defined the parameter grids(define the parameters ranges we want to run our function with)
n_grid = c(50, 80, 200, 400)
out_grid = c(0 ,5, 10)
# collect parameter grids in a list (to enter it into the Monte Carlo function)
prml = list("n" = n_grid, "out" = out_grid)
# run simulation
RRS <- MonteCarlo(func = RR, nrep = 1000, param_list = prml)
summary(RRS)
# make table:
rows = "n"
cols = "out"
MakeTable(output = RRS, rows = rows, cols = cols)

Optimize function in r

Here is my code:
cee = abs(qnorm(.5*0.1)) # Bonferroni threshold for achieving study-wide significance = 0.1
p.value = (simAll %>% select("p.value"))
p.value1 <- as.numeric(unlist(p.value))
# we use "cee" so R does not get confused with the function 'c'
betahat = log(OR) # Reported OR
z = sign(betahat)*abs(qnorm(0.5*p.value1)) # Reported p-value = 5.7e-4, which we convert to a z-value
###################################################
# THE PROPOSED APPROACH #
###################################################
se = betahat/z # standard error of betahat
mutilde1 = optimize(f=conditional.like,c(-20,20),maximum=T,z=z,cee=cee)$maximum # the conditional mle
The p.value is the p-values for 1000 simulations, same as OR, for the "se“ part, I can get 1000 different se values there. But for the mutilde1 line, there is an error exist: "Error in optimize(f = conditional.like, c(-20, 20), maximum = T, z = z, :
invalid function value in 'optimize'"
How can I fix the issue?
The conditional.like() function:
conditional.like=function(mu,cee,z){
like=dnorm(z-mu)/(pnorm(mu-cee)+pnorm(-cee-mu))
return((abs(z)>cee)*like) }
The simALL is a table looks like this (total 1000 lines):
# A tibble: 1,000 x 6
id term estimate std.error statistic p.value
<int> <chr> <dbl> <dbl> <dbl> <dbl>
1 1 .x 0.226 0.127 1.78 0.0747
2 2 .x 0.137 0.127 1.08 0.280
3 3 .x 0.304 0.127 2.38 0.0171
4 4 .x 0.497 0.128 3.87 0.000111
OR (total 1000 lines):
> OR
[1] 1.5537098 1.0939850 1.4491432 1.6377551 1.1646904 1.3387534 1.6377551 1.5009351 1.7918552
Also, here is my overall code:
library(tidyverse)
library(broom)
# create a tibble with an id column for each simulation and x wrapped in list()
sim <- tibble(id = 1:1000,
x = list(rbinom(1000,1,0.5))) %>%
# to generate z, pr, y, k use map and map2 from the purrr package to loop over the list column x
# `~ ... ` is similar to `function(.x) {...}`
# `.x` represents the variable you are using map on
mutate(z = map(x, ~ log(1.3) * .x),
pr = map(z, ~ 1 / (1 + exp(-.x))),
y = map(pr, ~ rbinom(1000, 1, .x)),
k = map2(x, y, ~ glm(.y ~ .x, family="binomial")),
# use broom::tidy to get the model summary in form of a tibble
sum = map(k, broom::tidy)) %>%
# select id and sum and unnest the tibbles
select(id, sum) %>%
unnest(cols = c(sum))
simOR <- sim %>%
# drop the intercepts and every .x with a p < 0.05
filter(term !="(Intercept)",
p.value < 0.05)
sim
j1=exp(simOR %>% select("estimate"))
OR1=as.numeric(unlist(j1))
mean(OR1)
simAll <- sim %>%
filter(term !="(Intercept)")
j <- exp(simAll %>% select("estimate"))
OR2 <- as.numeric(unlist(j))
mean(OR2)
simOR2 <- sim %>%
filter(term !="(Intercept)",
p.value < 0.005)
j2 <- exp(simOR2 %>% select("estimate"))
OR3 <- as.numeric(unlist(j2))
mean(OR3)
#op <- par(mfrow = c(3, 1))
hga=hist(OR2, main = NULL, freq = T, breaks = 10) #OR2:Overall OR
hgb=hist(OR1, freq = T,col=2,breaks=10, main="OR:p-value<0.05") #OR1:p-value<0.05
hgc=hist(OR3, freq = T,col=2,breaks=10, main="OR:p-value<0.005") #OR3:p-value<0.005
plot(hga,col=rgb(0,1,0,0.5),main = "OR",xlim=c(0.8,2),ylim=c(0,250))
plot(hgb, add = TRUE,col=rgb(0,0,0.8,0.5),xlim=c(0.8,2),ylim=c(0,250))
plot(hgc, add = TRUE,col=rgb(1,0,0,0.5),xlim=c(0.8,2))
abline(v = mean(OR2), lwd = 4, col = 3)
abline(v = mean(OR3), lwd = 4, col=2)
text(1.65,240,"1.31",col=1)
arrows(1.5,240,1.31,240,length=0.1,col=1,lwd=2)
abline(v = mean(OR1), lwd = 4, col=4)
text(2.1,220,"1.43",col=4)
arrows(1.98,220,1.43,220,length=0.1,col=4,lwd=2)
text(2.1,220,"1.55",col=2)
arrows(1.98,220,1.55,220,length=0.1,col=2,lwd=2)
#########################################
## THE FUNCTIONS BELOW ARE USED TO OBTAIN THE
## BIAS-CORRECTED ESTIMATES
#########################################
conditional.like=function(mu,cee,z){
like=dnorm(z-mu)/(pnorm(mu-cee)+pnorm(-cee-mu))
return((abs(z)>cee)*like) }
conditional.like.z=function(mu,cee,z){
return(conditional.like(mu,cee,z)*mu)
}
#########################################
## THE FUNCTIONS BELOW ARE USED TO OBTAIN THE
## BIAS-CORRECTED CONFIDENCE INTERVAL
#########################################
ptruncnorm.lower=function(z,mu,cee,alpha){
A=pnorm(-cee+mu)+pnorm(-cee-mu)
term1=pnorm(z-mu)
term2=pnorm(-cee-mu)
term3=pnorm(-cee-mu)+pnorm(z-mu)-pnorm(cee-mu)
result=(1/A)*(term1*(z<= -cee)+term2*(abs(z)<cee)+term3*(z>=cee))
return(result-(alpha/2))
}
ptruncnorm.upper=function(z,mu,cee,alpha){
A=pnorm(-cee+mu)+pnorm(-cee-mu)
term1=pnorm(z-mu)
term2=pnorm(-cee-mu)
term3=pnorm(-cee-mu)+pnorm(z-mu)-pnorm(cee-mu)
result=(1/A)*(term1*(z<= -cee)+term2*(abs(z)<cee)+term3*(z>=cee))
return(result-(1-alpha/2))
}
find.lowerz=function(mu,z,cee,alpha){
lowerz=uniroot(ptruncnorm.lower,lower=-20,upper=20,mu=mu,cee=cee,alpha=alpha)$root
return(lowerz-z)
}
find.upperz=function(mu,z,cee,alpha){
upperz=uniroot(ptruncnorm.upper,lower=-20,upper=20,mu=mu,cee=cee,alpha=alpha)$root
return(upperz-z)
}
getCI=function(z,cee,alpha){
uppermu=uniroot(find.lowerz,interval=c(-15,15),cee=cee,z=z,alpha=alpha)$root
lowermu=uniroot(find.upperz,interval=c(-15,15),cee=cee,z=z,alpha=alpha)$root
out=list(lowermu,uppermu)
names(out)=c("lowermu","uppermu")
return(out)
}
source("GW-functions.R")# YOU READ IN THE FUNCTIONS FOR OUR METHOD
cee=abs(qnorm(.5*0.1)) # Bonferroni threshold for achieving study-wide significance = 0.1
p.value=(simAll %>% select("p.value"))
p.value1 <- as.numeric(unlist(p.value))
# we use "cee" so R does not get confused with the function 'c'
betahat=log(OR) # Reported OR
z=sign(betahat)*abs(qnorm(0.5*p.value1)) # Reported p-value = 5.7e-4, which we convert to a z-value
###################################################
# THE PROPOSED APPROACH #
###################################################
se=betahat/z # standard error of betahat
mutilde1=optimize(f=conditional.like,c(-20,20),maximum=T,z=z,cee=cee)$maximum

How to simulate PCA Data?

I am trying to simulate PCA Data as follows:
q <- 5 # no. of PCs
p <- 20 # no. of variables
n <- 2000 # no. of individuals
eps <- 0.05 # error standard deviation
# Eigenvalues
Sig <- seq(3, 1, length.out = q)^2
Lambda <- diag(Sig)
# Matrix of Principal Components
H <- rmvnorm(n = n, mean = rep(0, q), sigma = Lambda)
# Add gaussian noise
E <- matrix(rnorm(n*p, sd = sqrt(eps)), ncol = p)
# Data matrix
Y <- H %*% t(Amat) + E
# Perform PCA
summary(m1 <- prcomp(Y, scale = T)) # and so on...
However, I have no idea how to create the matrix of Loadings Amat in a meaningful way.
Thanks for any help I receive from you and I appreciate it!
This is not using the same structure as the OP, but it simulates a PCA with 4 different groups (which could be species) which each have 3 "traits" (each of the trait have different means and sd based on some biological data found in the literature for example).
set.seed(123) # setting this so the random results will be repeatable
library(MASS)
# Simulating 3 traits for 4 different species
n = 200 # number of "individuals"
# Generate the groups
Amat1 = MASS::mvrnorm(n, mu = c(11.2,11.8,9.91), Sigma = diag(c(1.31,1.01,1.02)))
Amat2 = MASS::mvrnorm(n, mu = c(7.16,8.54,6.82), Sigma = diag(c(0.445,0.546,0.350)))
Amat3 = MASS::mvrnorm(n, mu = c(15.6,14.6,13.5), Sigma = diag(c(1.43,0.885,0.990)))
Amat4 = MASS::mvrnorm(n, mu = c(8.65,14.1,8.24), Sigma = diag(c(0.535,0.844,0.426)))
# Combine the data
Amat = rbind(Amat1,Amat2,Amat3,Amat4)
# Make group data
Amat.gr = cbind(Amat, gl(4,k=n,labels = c(1,2,3,4)))
# Calculate the covariance matrix for each group
by(Amat.gr[,1:3],INDICES = Amat.gr[,4],FUN = cov) # calculate covariance matrix for all groups
# Plot the result
summary(m1 <- prcomp(Amat, scale= T))
# biplot(m1, xlabs=rep(".", nrow(Amat)), cex = 2)
plot(vegan::scores(m1), asp = 1, pch = 19, col = gl(4,k=n,labels = c(1,2,3,4)))
plot(Amat[,1],Amat[,2], pch = 19, col = gl(4,k=n,labels = c(1,2,3,4)))
The plot on the left shows the PCA and on the right the raw data.
I added a toy example with data to show what is the algorithm to compute a PCA in R from Legendre and Legendre 2012.
# Generate vectors (example from Legendre and Legendre 2012)
v1 = c(2,3,5,7,9)
v2 = c(1,4,0,6,2)
# If you want to play with sample size
# n = 100
# v1 = rnorm(n = n, mean = mean(v1), sd = sd(v1))
# v2 = rnorm(n = n, mean = mean(v2), sd = sd(v2))
# Get the y matrix
y = cbind(v1,v2)
# Centered y matrix
yc = apply(y, 2, FUN = function(x) x-mean(x))
# Dispersion matrix
s = 1/(nrow(y)-1)*t(yc) %*% yc
# Compute the single value decomposition to get the eigenvectors and
ev = svd(s)$v
# get the principal components
f = yc %*% ev
# This gives the identity matrix
round(t(svd(s)$v) %*% svd(s)$v,2)
# these are the eigen values
svd(s)$d
-svd(yc)$v #p. 104
plot(f, pch = 19); abline(h=0,v=0, lty = 3)

Resources