R for loop that passes only once though a vector - r

I would like to concatenate an equation for a logistic model first followed by a linear model.
For model 1, o1=p1+p2+p3 (binomial will be input to family parameter in glm function)
For model 2, o2=p1+p2+p3 (gaussian will be input to family parameter in glm function)
In the real life example, there will be many more models.
Here is the basic scenario:
outcome <- c("o1", "o2")
predictor <- c("p1", "p2", "p3")
link=c("binomial", "gaussian")
try <- function(outcomes, predictors) {
for(o in outcome) {
eq <- paste(o, "~")
for(p in predictor) {
eq=paste0(eq, p, "+")
}
# remove extra +
eq <- substr(eq,1,nchar(eq)-1)
# model will go here
eq <- lapply(link, function(x) paste0(x, " - ", eq))
print(eq)
}
}
try(outcomes=outcome, predictors=predictor)
Output:
[[1]]
[1] "binomial - o1 ~p1+p2+p3"
[[2]]
[1] "gaussian - o1 ~p1+p2+p3"
[[1]]
[1] "binomial - o2 ~p1+p2+p3"
[[2]]
[1] "gaussian - o2 ~p1+p2+p3"
Instead, I want:
[1] "binomial - o1 ~p1+p2+p3"
[1] "gaussian - o2 ~p1+p2+p3"

We can do
try1 <- function(outcomes, predictors) {
Map(function(x, y) paste(y, '-',
deparse(reformulate(predictors, x))), outcomes, link)
}
-testing
try1(outcomes=outcome, predictors=predictor)
#$o1
#[1] "binomial - o1 ~ p1 + p2 + p3"
#$o2
#[1] "gaussian - o2 ~ p1 + p2 + p3"

Maybe try can be written like below
try <- function(outcomes, predictors) {
as.list(
paste0(
do.call(
paste,
c(data.frame(link, outcomes), sep = " - ")
),
paste0(" ~ ", paste0(predictors, collapse = " + "))
)
)
}
such that
> try(outcome, predictor)
[[1]]
[1] "binomial - o1 ~ p1 + p2 + p3"
[[2]]
[1] "gaussian - o2 ~ p1 + p2 + p3"

Related

Problem with function/ replacement has length zero

I'm trying to run the following function proposed by Lai et al., 2021, that aims to compare fit differences in non-nested models with categorical AVs. The Models look like this:
Mod1 <- '
# Measurement models
# Predictor Variables
A =~ NoEPA2 + ChEPA2 + SvEPA2
N =~ NoEPN2 + ChEPN2 + SvEPN2
# Outcome Variables
I =~ DE_VORH + AN_VORH + AINTMAX
EX =~ C_VORH + O_VORH + AD_MAX
# Control variables
AGE =~ age
age ~~ 0*age
SEX =~ sex
sex ~~ 0*sex
EDU =~ edu
edu ~~ 0* edu
#Error correlation A, N, E
NoEPA1 ~~ NoEPN1
ChEPA1 ~~ ChEPN1
SvEPA1 ~~ SvEPN1
# Correlations DV
A ~~ N
I ~~ EX
# Paths
I ~ A + N + AGE + SEX + EDU
EX ~ A + N + AGE + SEX + EDU
'
Sem2 <- sem(Mod1,
data=a,
estimator = "WLSMV",
conditional.x = FALSE,
mimic = "Mplus",
ordered = c("DE_VORH", "AN_VORH","AINTMAX","O_VORH", "C_VORH","AD_MAX"))
summary(sem2,
fit.measures = TRUE,
standardize = TRUE,
rsquare = TRUE,
estimates = TRUE,
ci = FALSE)
Mod2 <- '
# Measurement models
# Predictor Variables
A =~ NoEPA1 + ChEPA1 + SvEPA1
N =~ NoEPN1 + ChEPN1 + SvEPN1
E =~ MxStEPEM + ChEPEM1 + SvEPEM1
# Outcome Variables
I =~ DE_VORH + AN_VORH + AINTMAX
EX =~ C_VORH + O_VORH + AD_MAX
# Control variables
AGE =~ age
age ~~ 0*age
SEX =~ sex
sex ~~ 0*sex
EDU =~ edu
edu ~~ 0* edu
#Error correlation A, N, E
NoEPA1 ~~ NoEPN1 + MxStEPEM
NoEPN1 ~~ MxStEPEM
ChEPA1 ~~ ChEPN1 + ChEPEM1
ChEPN1 ~~ ChEPEM1
SvEPA1 ~~ SvEPN1 + SvEPEM1
SvEPN1 ~~ SvEPEM1
# Correlations DV
A ~~ N + E
N ~~ E
I ~~ EX
# Paths
I ~ A + N + E + AGE + SEX + EDU
EX ~ A + N + E + AGE + SEX + EDU
'
sem3a <- sem(Mod2,
data=a,
estimator = "WLSMV",
conditional.x = FALSE,
mimic = "Mplus",
ordered = c("DE_VORH", "AN_VORH", "AINTMAX", "O_VORH","C_VORH","AD_MAX"))
summary(sem3a,
fit.measures = TRUE,
standardize = TRUE,
rsquare = TRUE,
estimates = TRUE,
ci = FALSE)
The function I want to apply looks like this:
## The function below returns point estimate and standard error for
## ∆RMSEA, ∆CFI, and ∆SRMR between two competing models A & B given categorical data.
## The two models do not need to be nested.
# fitA = Fitted 'lavaan' model object for Model A
# fitB = Fitted 'lavaan' model object for Model B
# fitZ = Fitted 'lavaan' model object for the baseline model for CFI
fit.diff.cat <- function(fitA, fitB){
######################################
# Internal functions
######################################
# Rearrange the model-implied correlation matrix of 'fitB' so that its columns and rows
# are in the same order as that in the model-implied correlation matrix of 'fitA'
rearrange.P.theta <- function(fitA, fitB){
R <- inspect(fitA, "sampstat")$'cov'
p <- dim(R)[1]
R <- as.matrix(R, p, p)
P.theta.A <- inspect(fitA, "cov.ov")
P.theta.B0 <- inspect(fitB, "cov.ov")
target.var.names <- rownames(R)
current.var.names <- rownames(P.theta.B0)
P.theta.B <- matrix(NA, p, p)
rownames(P.theta.B) <- colnames(P.theta.B) <- target.var.names
for (i.row in 1:p){
for(i.col in 1:p){
row.name <- target.var.names[i.row]
col.name <- target.var.names[i.col]
pick.row <- which(current.var.names==row.name)
pick.col <- which(current.var.names==col.name)
P.theta.B[i.row, i.col] <- P.theta.B0[pick.row, pick.col]
}
}
return(P.theta.B)
}# End of rearrange.P.theta()
# Rearrange the model-implied thresholds of 'fitB' so that its names
# are in the same order as that in the model-implied thresholds 'fitA'
rearrange.thresh <- function(fitA, fitB){
thresh <- inspect(fitA, "sampstat")$th
thresh.B0 <- inspect(fitB, "th")
target.var.names <- names(thresh)
current.var.names <- names(thresh.B0)
n.thresh <- length(thresh)
thresh.B <- rep(NA, n.thresh)
names(thresh.B) <- target.var.names
for (i in 1:n.thresh){
name <- target.var.names[i]
pick.name <- which(current.var.names==name)
thresh.B[i] <- thresh.B0[pick.name]
}
return(thresh.B)
}# End of rearrange.thresh()
# Rearrange the Delta matrix of 'fitB' so that its rows are
# in the same order as that in the Delta matrix of 'fitA'.
# Delta = derivative of P(theta) wrt theta
rearrange.Delta <- function(fitA, fitB){
Delta.B0 <- lavaan:::computeDelta(fitB#Model)[[1]]
n.theta <- dim(Delta.B0)[2]
thresh <- inspect(fitA, "sampstat")$th
thresh.B0 <- inspect(fitB, "th")
target.var.names <- names(thresh)
current.var.names <- names(thresh.B0)
n.thresh <- length(thresh)
Delta.th <- matrix(NA, n.thresh, n.theta)
rownames(Delta.th) <- target.var.names
for (i in 1:n.thresh){
name <- target.var.names[i]
pick.name <- which(current.var.names==name)
Delta.th[i,] <- Delta.B0[pick.name,]
}
P.theta.B0 <- inspect(fitB, "cov.ov")
R <- inspect(fitA, "sampstat")$'cov'
p <- dim(R)[1]
target.var.names <- rownames(R)
current.var.names <- rownames(P.theta.B0)
n.rho <- p*(p-1)/2
current.matrix <- matrix(NA, p, p)
current.matrix[lower.tri(current.matrix, diag=FALSE)] <- 1:n.rho
pick.vech <- rep(NA, n.rho)
j <- 1
for(i.col in 1:(p-1)){
for(i.row in (i.col+1):p){
row.name <- target.var.names[i.row]
col.name <- target.var.names[i.col]
pick.row <- which(current.var.names==row.name)
pick.col <- which(current.var.names==col.name)
if(pick.row >= pick.col) pick.vech[j] <- current.matrix[pick.row, pick.col]
if(pick.row < pick.col) pick.vech[j] <- current.matrix[pick.col, pick.row]
j <- j+1
}
}
Delta.rho <- matrix(NA, n.rho, n.theta)
for(i in 1:n.rho){
pick <- pick.vech[i] + n.thresh
Delta.rho[i,] <- Delta.B0[pick,]
}
Delta.B <- rbind(Delta.th, Delta.rho)
return(Delta.B)
}# End of rearrange.Delta()
######################################
# Main function
######################################
H.A <- inspect(fitA, "hessian")*2
H.B <- inspect(fitB, "hessian")*2
H.A.inv <- try(chol2inv(chol(H.A)), TRUE)
H.B.inv <- try(chol2inv(chol(H.B)), TRUE)
if(class(H.A.inv)=="matrix" & class(H.B.inv)=="matrix"){
n <- inspect(fitA, "nobs")
dA <- as.numeric(fitmeasures(fitA, "df"))
dB <- as.numeric(fitmeasures(fitB, "df"))
P.B <- rearrange.P.theta(fitA, fitB)
p <- dim(P.B)[1]
rho.B <- lav_matrix_vech(P.B, diagonal = FALSE)
thresh.B <- rearrange.thresh(fitA, fitB)
estB <- c(thresh.B, rho.B)
m <- inspect(fitA, "wls.obs")
estA <- inspect(fitA, "wls.est")
eA <- m - estA
eB <- m - estB
Gamma <- inspect(fitA, "gamma")
DeltaA <- lavaan:::computeDelta(fitA#Model)[[1]]
DeltaB <- rearrange.Delta(fitA, fitB)
p1 <- dim(DeltaA)[1]
g.A <- 2*t(eA)
K.A <- (-2)*t(DeltaA)
T.A <- 2*diag(1, p1)
Q.A <- T.A - t(K.A)%*%H.A.inv%*%K.A
G.A <- t(eA) %*% eA
G.A.bc0 <- G.A - sum(diag(Q.A%*%Gamma))/(2*n)
G.A.bc <- ifelse(G.A.bc0 < 0, 0, G.A.bc0)
g.B <- 2*t(eB)
K.B <- (-2)*t(DeltaB)
T.B <- 2*diag(1, p1)
Q.B <- T.B - t(K.B)%*%H.B.inv%*%K.B
G.B <- t(eB) %*% eB
G.B.bc0 <- G.B - sum(diag(Q.B%*%Gamma))/(2*n)
G.B.bc <- ifelse(G.B.bc0 < 0, 0, G.B.bc0)
R <- inspect(fitA, "sampstat")$cov
r <- lav_matrix_vech(R, diagonal = FALSE)
k <- length(r)
G.Z <- t(r) %*% r
G.Z.bc0 <- G.Z - sum(diag(Gamma))/n
G.Z.bc <- ifelse(G.Z.bc0 < 0, 0, G.Z.bc0)
G.A1 <- ifelse(G.A.bc > 0, G.A.bc, G.A)
G.B1 <- ifelse(G.B.bc > 0, G.B.bc, G.B)
G.Z1 <- ifelse(G.Z.bc > 0, G.Z.bc, G.Z)
## RMSEA diff
rmsea.AB <- sqrt(G.A.bc/dA) - sqrt(G.B.bc/dB)
J.rmsea.1 <- cbind( 1/(2*sqrt(dA*G.A1)), -1/(2*sqrt(dB*G.B1)) )
J.rmsea.2 <- rbind(g.A, g.B)
J.rmsea <- J.rmsea.1 %*% J.rmsea.2
var.rmsea.AB <- J.rmsea %*% Gamma %*% t(J.rmsea) / n
se.rmsea.AB <- sqrt(var.rmsea.AB)
## CFI diff
cfi.AB <- (G.B.bc - G.A.bc) / G.Z.bc
n.thresh <- length(fitted(fitA)$th)
r1 <- c(rep(0, n.thresh), r)
J.cfi.1 <- cbind( -1/G.Z1, 1/G.Z1, (G.A1-G.B1)/G.Z1^2 )
J.cfi.2 <- rbind(g.A, g.B, 2*t(r1) )
J.cfi <- J.cfi.1 %*% J.cfi.2
var.cfi.AB <- J.cfi %*% Gamma %*% t(J.cfi) / n
se.cfi.AB <- sqrt(var.cfi.AB)
## SRMR diff
srmr.AB <- sqrt(G.A.bc/k) - sqrt(G.B.bc/k)
J.srmr.1 <- cbind( 1/(2*sqrt(k*G.A1)), -1/(2*sqrt(k*G.B1)) )
J.srmr.2 <- rbind(g.A, g.B)
J.srmr <- J.srmr.1 %*% J.srmr.2
var.srmr.AB <- J.srmr %*% Gamma %*% t(J.srmr) / n
se.srmr.AB <- sqrt(var.srmr.AB)
#######
output <- c(rmsea.AB, se.rmsea.AB,
cfi.AB, se.cfi.AB,
srmr.AB, se.srmr.AB)
names(output) <- c("rmsea.AB", "se.rmsea.AB",
"cfi.AB", "se.cfi.AB",
"srmr.AB", "se.srmr.AB")
}# End of if Hessian is positive definite
else{output <- rep(NA, 6)}
return(output)
}
When I enter my fitted models, the following error is returned:
Error in P.theta.B[i.row, i.col] <- P.theta.B0[pick.row, pick.col] :
replacement has length zero<
Now I tried to run the commands step by step and it seems that this error is produced at the beginning of the internal functions running this step:
P.theta.B[i.row, i.col] <- P.theta.B0[pick.row, pick.col]
Any ideas on how to troubleshoot there? I'm left with a lot of questionmarks.

Variable parts formula for Shiny

In this part of my Shiny app, I'll do a 'linear model' (lm()) regression, using the variables the user selects. There are three inputs:
input$lmTrendFun is a selectInput(), with the options c("Linear", "Exponential", "Logarithmic", "Quadratic", "Cubic"):
selectInput("lmTrendFun", "Select the model for your trend line.",
choices = c("Linear", "Exponential", "Logarithmic", "Quadratic", "Cubic"))
The second input is input$lmDep, and it's a selectInput() too. I created a updateSelectInput first inside an observe() reactive function, so the choices are the column names from the imported tibble.
The third input is input$lmInd and it's a checkboxGroupInput(), the choices being all the column names other than the one that's already the input$lmInd.
From that I want this output: the lm() (or rather, summary.lm() or summary(lm())) result for those variables. If I knew which they were, it would be simple:
if(input$lmTrendFun == "Linear"){
form <- yname ~ x1 + x2
}else if(input$lmTrendFun == "Exponential"){
form <- yname~ exp(x1) + exp(x2)
}else if(input$lmTrendFun == "Logarithmic"){
form <- yname~ log(x1) + log(x2)
}else if(input$lmTrendFun == "Quadratic"){
form <- yname ~ poly(x1, 2) + poly(x2, 2)
}else if(input$lmTrendFun == "Cubic"){
form <- y ~ poly(x1, 3) + poly(x2, 3)
}
[...]
lm(form, data = .)
where the data (.) has the columns yname, x1 and x2.
However, I don't. So I believe I need some more generic function that can create the formula. How can this be done?
formulizer <- function() as.formula(paste0( input$lmDep, "~", switch(input$lmTrendFun,
Linear = paste0(input$lmInd, collapse=" + "),
Logarithmic = paste0("exp(", input$lmInd,")", collapse=" + "),
Quadratic = paste0("poly(", input$lmInd,", 2)", collapse=" + "),
Cubic = paste0("poly(", input$lmInd,", 3)", collapse=" + ") )))
> input <- list(lmInd=paste0("V", 1:5), lmTrendFun="Linear", lmDep="Vp")
> formulaizer()
Vp ~ V1 + V2 + V3 + V4 + V5
<environment: 0x7fad1cf63d48>
> input <- list(lmInd=paste0("V", 1:5), lmTrendFun="Logarithmic", lmDep="Vp")
> formulizer()
Vp ~ exp(V1) + exp(V2) + exp(V3) + exp(V4) + exp(V5)
<environment: 0x7fad01e694d0>
> input <- list(lmInd=paste0("V", 1:5), lmTrendFun="Quadratic", lmDep="Vp")
> formulizer()
Vp ~ poly(V1, 2) + poly(V2, 2) + poly(V3, 2) + poly(V4, 2) +
poly(V5, 2)
<environment: 0x7fad01f51d20>
> input <- list(lmInd=paste0("V", 1:5), lmTrendFun="Cubic", lmDep="Vp")
> formulizer()
Vp ~ poly(V1, 3) + poly(V2, 3) + poly(V3, 3) + poly(V4, 3) +
poly(V5, 3)
<environment: 0x7fad01f59690>
Consider switch with vectorized paste0 to build terms with transformations and then pass terms into reformulate. Adjust below inputs to actual Shiny variables:
dep_term <- ...
ind_terms <- ...
form <- switch(input$lmTrendFun,
Linear = reformulate(ind_terms, response="yname"),
Exponential = reformulate(paste0("exp(", ind_terms, ")"), response=dep_term),
Logarithmic = reformulate(paste0("log(", ind_terms, ")"), response=dep_term),
Quadratic = reformulate(paste0("poly(", ind_terms, ", 2)"), response=dep_term),
Cubic = reformulate(paste0("poly(", ind_terms, ", 3)"), response=dep_term)
)
Online Demo

How can I use `I()` with `paste0`?

I have multiple dataframes and I would like to evaluate (multiple) different models on each. MWE
df1 <- data.frame(A3 = c(-5, 5, 1),
B3 = c(0, 10, 1))
df2 <- data.frame(A4 = c(5, 15, 1))
B4 = c(10, 20, 1))
myfun <- function(arg1, arg2){ # arg1 =1 or 2
if (arg2 == 1){
eqn <- paste0("A", arg1+2) ~ paste0("B", arg1+2) + I(as.name(paste0("B", arg1+2))^2)
} else {
eqn <- paste0("A", arg1+2) ~ paste0("B", arg1+2) + I(as.name(paste0("B", arg1+2))^2) +I(as.name(paste0("B", arg1+2))^3)
}
return (lm(formula = eqn, data = eval(as.name(paste0("df", arg1)))
)
)
}
For example if I run myfun(1,2) I would like to get lm(A4 ~ B4 + I(B4^2) + I(B4^3), data = df2). But whatever I try I get the following error message Error in (paste0("B", arg1 +2))^2 : non-numeric argument to binary operator. From what I read in ?I, I imagine this is because R isolates whatever is passed into I(), so it doesn't realize I am trying to transform a variable: is that what is going on, and is it something I can fix? Also, is there a better way to estimate multiple models quickly? All the similar questions I found used the same data.frame across models, while I have to account for the response (and predictor) variables coming from different dataframes for different models.
Maybe this is what you are looking for:
The issue is that your are doing a math operation on a string, i.e with (paste0("B", arg1 +2))^2 you try to square a string, that's why you get the error. Inytead you can simply glue you formula together as a string an d converted it to a formula via as.formula:
df1 <- data.frame(A3 = c(-5, 5, 1),
B3 = c(0, 10, 1))
df2 <- data.frame(A4 = c(5, 15, 1))
B4 = c(10, 20, 1)
myfun <- function(arg1, arg2){ # arg1 =1 or 2
if (arg2 == 1){
eqn <- paste0("A", arg1+2, " ~ B", arg1+2," + I(B", arg1+2, "^2)")
} else {
eqn <- paste0("A", arg1+2, " ~ B", arg1+2," + I(B", arg1+2, "^2) + I(B", arg1+2, "^3)")
}
return (lm(formula = as.formula(eqn), data = eval(as.name(paste0("df", arg1)))
)
)
}
myfun(2, 1)
#>
#> Call:
#> lm(formula = as.formula(eqn), data = eval(as.name(paste0("df",
#> arg1))))
#>
#> Coefficients:
#> (Intercept) B4 I(B4^2)
#> 0.84795 0.12281 0.02924
An option is also to construct the formula with glue
myfun <- function(arg1, arg2){
eqn <- switch(arg2,
`1` = glue::glue("A{arg1 + 2}~ B{arg1+2} + I(B{arg1+2}^2)"),
glue::glue("A{arg1 + 2}~ B{arg1+2}",
"+ I(B{arg1+2}^2) + I(B{arg1+2}^3)")
)
model <- lm(eqn, data = get(paste0('df', arg1), envir = .GlobalEnv))
model$call <- as.formula(eqn)
return(model)
}
myfun(2, 1)
#Call:
#A4 ~ B4 + I(B4^2)
#Coefficients:
#(Intercept) B4 I(B4^2)
# 0.84795 0.12281 0.02924

Creating complicated formula's with a function

I am using the following function to make formula's, where I can simply assign vector of variable names, where the function makes sure everything is in the right place and double variable names are excluded:
formula <- function(depvar, indepvars, instruments=NULL, othervars=NULL) {
x <- c(indepvars, instruments, othervars)
totvars <- unique(x)
totvars <- x[!x %in% depvar]
formula <- as.formula(
paste(depvar, paste(totvars, collapse = " + "), sep = " ~ "))
return(formula)
}
indepvars <- c("indepvarA", "indepvarB", "indepvarC")
instruments <- c("IV_A", "IV_B")
# lm
formula("depvar", indepvars)
# 1st stage - IV's for indepvarC
formula("indepvarC", indepvars, instruments)
However, I want the option to write a more complicated formula (an ivreg formula), namely:
depvar ~ instrumentedvar + indepvars | instrumentvars + indepvars
I have been trying the following:
formula <- function(depvar, indepvars, instruments=NULL, instrumentedvar=NULL, othervars=NULL, twostage=NULL) {
x <- c(indepvars, instruments, othervars)
totvars <- unique(x)
totvars <- x[!x %in% depvar]
if (is.null(twostage)) {
formula <- as.formula(
paste(depvar, paste(totvars, collapse = " + "), sep = " ~ "))
} else {
totvarsB <- totvars[!totvars %in% instrumentedvar]
totvarsB <- c(as.character(totvarsB), as.character(instruments))
formula <- as.formula(
paste(depvar, paste(paste(totvars, collapse = " + "), paste("|", paste(totvarsB, collapse = " + " )), sep = " ~ ")))
}
return(formula)
}
indepvars <- c("indepvarA", "indepvarB", "indepvarC")
instruments <- c("IV_A", "IV_B")
instrumentedvar <- "indepvarC"
formula("indepvarC", indepvars, instruments, twostage=1)
But I cannot seem to get it right.
Define reform which takes a vector of names and outputs a string in which they are connected with plus signs. Then use sprintf to generate the final string and convert that using as.formula:
reform <- function(x) paste(x, collapse = " + ")
makeFo <- function(lhs, rhs1, rhs2 = NULL, env = parent.frame()) {
s <- sprintf("%s ~ %s", lhs, reform(c(rhs1, rhs2)))
if (!missing(rhs2)) s <- sprintf("%s | %s", s, reform(rhs2))
as.formula(s, env = env)
}
# test
makeFo("y", c("x1", "x2"))
## y ~ x1 + x2
makeFo("y", c("x1", "x2"), c("u1", "u2"))
## y ~ x1 + x2 + u1 + u2 | u1 + u2

Regression function with variable number of arguments in r

I have composed a function to calculate VIF for nls regression models. It looks like this:
function (a,b,c,d,e,f,g) {
VIFa <- 1/(1- (R2 <- summary(lm(a ~ b + c + d + e + f + g))$r.square))
PMa <- ifelse (sqrt(VIFa) > 2, "JE", "NI")
VIFb <- 1/(1- (R2 <- summary(lm(b ~ a + c + d + e + f + g))$r.square))
PMb <- ifelse (sqrt(VIFb) > 2, "JE", "NI")
VIFc <- 1/(1- (R2 <- summary(lm(c ~ a + b + d + e + f + g))$r.square))
PMc <- ifelse (sqrt(VIFc) > 2, "JE", "NI")
VIFd <- 1/(1- (R2 <- summary(lm(d ~ a + b + c + e + f + g))$r.square))
PMd <- ifelse (sqrt(VIFd) > 2, "JE", "NI")
VIFe <- 1/(1- (R2 <- summary(lm(e ~ a + b + c + d + f + g))$r.square))
PMe <- ifelse (sqrt(VIFe) > 2, "JE", "NI")
VIFf <- 1/(1- (R2 <- summary(lm(f ~ a + b + c + d + e + g))$r.square))
PMf <- ifelse (sqrt(VIFf) > 2, "JE", "NI")
VIFg <- 1/(1- (R2 <- summary(lm(g ~ a + b + c + d + e + f))$r.square))
PMg <- ifelse (sqrt(VIFg) > 2, "JE", "NI")
rezultat <- data.frame(c(VIFa, VIFb, VIFc, VIFd, VIFe, VIFf, VIFg),
c(PMa, PMb, PMc, PMd, PMe, PMf, PMg))
names(rezultat) <- c("VIF", "Multikolinearnost")
return(as.matrix.data.frame(rezultat))
}
Where a,b,c,d,e,f,g are variables that are used in certain model. VIFa is Variance Inflation Factor of 'a' variable, and PMa is a logical value showing whether variance inflation can cause major discrepancy in model (JE = yes) or not (NO = not).
My question is how to make this function suitable for any number of arguments?
I already tried to use lapply function, however I could not find a way to use each variable once as dependent and all others as independent (for any number of variables).
Try the following:
regapply <- function(l) {
ids <- names(l)
n <- length(l)
vifs <- numeric(n)
pms <- character(n)
for (i in seq_along(l)) {
f <- parse(text=sprintf("%s ~ %s", ids[i], paste(ids[-i], collapse=" + ")))[[1]]
vifs[i] <- 1/(1- (R2 <- summary(lm(f, data=l))$r.square))
pms[i] <- ifelse (sqrt(vifs[i]) > 2, "JE", "NI")
}
data.frame(var=ids, vif=vifs, pm=pms)
}
Call the above on a named list (e.g. a data frame), for example:
regapply(iris[-5])
## var vif pm
## 1 Sepal.Length 7.072722 JE
## 2 Sepal.Width 2.100872 NI
## 3 Petal.Length 31.261498 JE
## 4 Petal.Width 16.090175 JE
Explanation: parse(text=sprintf("%s ~ %s", ids[i], paste(ids[-i], collapse=" + ")))[[1]] creates a series of formulas. In the above example we have:
Sepal.Length ~ Sepal.Width + Petal.Length + Petal.Width
Sepal.Width ~ Sepal.Length + Petal.Length + Petal.Width
Petal.Length ~ Sepal.Length + Sepal.Width + Petal.Width
Petal.Width ~ Sepal.Length + Sepal.Width + Petal.Length
Here is another solution using an ellipsis for the varying number of arguments.
The regression models that you apply contain solely additive terms (see my comment on your question). In this case, you can loop over the variables (e.g. in a data.frame).
vif <- function(...){
dat <- data.frame(...)
n <- ncol(dat)
out <- data.frame(VIF=numeric(n), MK=numeric(n))
for(ii in 1:n){
dv <- colnames(dat)[ii]
iv <- colnames(dat)[-ii]
fml <- as.formula(paste(dv,paste(iv,collapse="+"),sep="~"))
VIF <- 1/(1- (R2 <- summary(lm(fml))$r.square))
KM <- ifelse(sqrt(VIF)>2, "JE", "NI")
out[ii,] <- c(round(VIF,5),KM)
}
return(out)
}
Works for both variables and data.frames as input.
a <- c(1,2,3,4,5)
b <- c(1,3,2,2,4)
x <- c(3,3,2,4,5)
dat <- data.frame(a,b,x)
# > vif(a,b,x)
# VIF MK
# 1 2.47059 NI
# 2 2.06471 NI
# 3 2.06471 NI
# > vif(dat)
# VIF MK
# 1 2.47059 NI
# 2 2.06471 NI
# 3 2.06471 NI
Cheers!

Resources