remove spaces added with using as.formula - r

I create a formula from text, but the resulting formula contains unwanted spaces. Is there a way to prevent this.
# text in which variables for formula are stored
mainEffectText ="age sex bmi cohort"
interactionText="age*cohort"
# get text into R
mainEffects <- read.table(textConnection(mainEffectText))
mainEffects <- t(mainEffects)
mainEffects <- as.character(mainEffects[,1])
interactions <- read.table(textConnection(interactionText))
interactions <- t(interactions)
interactions <- as.character(interactions[,1])
# put all terms into one vector
allTerms <- c(mainEffects,interactions)
# create formula
form <- as.formula(paste("Surv(time, event) ~ ",paste(allTerms, collapse=" +")))
This gives the following with spaces added to the interaction terms:
form
# Surv(time, event) ~ age + sex + bmi + cohort + age * cohort

The spaces are not really a problem but anyways try this:
# inputs
mainEffectText <- "age sex bmi cohort"
interactionText <- "age*cohort"
lhs <- "Surv(time, event)"
both <- paste(mainEffectText, interactionText)
paste0(gsub(" ", "", lhs), "~", gsub(" ", "+", both))
giving:
"Surv(time,event)~age+sex+bmi+cohort+age*cohort"
If the inputs are of this form (lhs is as above), then use:
# inputs
mainEffects <- c("age", "sex", "bmi", "cohort")
interactions <- "age*cohort"
both <- paste(paste(mainEffects, collapse = " "),
paste(interactions, collapse = " "))
paste0(gsub(" ", "", lhs), "~", gsub(" ", "+", both))

Related

In the Stargazer package in R, how can one add a line separator for the resulting output table?

I have two regressions to report in stargazer in R, with add.lines() adding a predicted table at the end. My table currently looks like:
but I want to add a line right below "Predicted Values on x Values", so that it is its own row like we have in the "Observations" row. Is there a way to do this?
Code to generate regression data:
x <- 1:100
y <- rnorm(100, 4*x, 5)
mod1 <- lm(y ~ x)
mod2 <- lm(y ~ 1)
se1 <- summary(mod1)$coef[2,2]
se2 <- summary(mod2)$coef[1,2]
mod1.pred1 <- predict(mod1, newdata=data.frame(x=1))
mod2.pred1 <- predict(mod2, newdata=data.frame(x=1))
mod1.pred2 <- predict(mod1, newdata=data.frame(x=2))
mod2.pred2 <- predict(mod2, newdata=data.frame(x=2))
Stargazer output with table:
stargazer(mod1, mod2, se = list(se1, se2), out="Results.html", notes="Two Models", keep.stat="n", type = "text",
table.layout ="ldmc#-t-s-a=n",
add.lines = list(
c("Predicted Values on x Values"),
c("", "", "", ""), # add empty list element
c("Predict when x=1",mod1.pred1,mod2.pred1),
c("Predict when x=2",mod1.pred2,mod2.pred2)),
add.lines.separator = c(1) # add separator after fourth element)

How to calculate Standardized Mean Difference for Table1 Package in R?

I am using the package "table1" to create a fancy table one with extra column containing the standardized mean difference of continuous variables in my dataset.
The SMD should be a combination between the treatment and control groups stratified for a given variable within the table.
I am struggling to figure out a good way of doing this and would love some help creating the function to calculate SMD.
Here is some sample code:
f <- function(x, n, ...) factor(sample(x, n, replace=T, ...), levels=x)
set.seed(427)
n <- 146
dat <- data.frame(id=1:n)
dat$treat <- f(c("Placebo", "Treated"), n, prob=c(1, 2)) # 2:1 randomization
dat$age <- sample(18:65, n, replace=TRUE)
dat$sex <- f(c("Female", "Male"), n, prob=c(.6, .4)) # 60% female
dat$wt <- round(exp(rnorm(n, log(70), 0.23)), 1)
# Add some missing data
dat$wt[sample.int(n, 5)] <- NA
label(dat$age) <- "Age"
label(dat$sex) <- "Sex"
label(dat$wt) <- "Weight"
label(dat$treat) <- "Treatment Group"
units(dat$age) <- "years"
units(dat$wt) <- "kg"
my.render.cont <- function(x) {
with(stats.apply.rounding(stats.default(x), digits=2), c("",
"Mean (SD)"=sprintf("%s (± %s)", MEAN, SD)))
}
my.render.cat <- function(x) {
c("", sapply(stats.default(x), function(y) with(y,
sprintf("%d (%0.0f %%)", FREQ, PCT))))
}
#My attempt at an SMD function
smd_value <- function(x, ...) {
x <- x[-length(x)] # Remove "overall" group
# Construct vectors of data y, and groups (strata) g
y <- unlist(x)
g <- factor(rep(1:length(x), times=sapply(x, length)))
if (is.numeric(y) & g==1) {
# For numeric variables, calculate SMD
smd_val1 <- (mean(y)/sd(y))
} else if (is.numeric(y) & g==2) {
# For numeric variables, calculate SMD
smd_val2 <- (mean(y)/sd(y))
} else {print("--")
}
smd_val <- smdval2 - smdval1
}
table1(~ age + sex + wt | treat, data=dat, render.continuous=my.render.cont, render.categorical=my.render.cat, extra.col=list(`SMD`=smd_value))
I get the following error:
"Error in if (is.numeric(y) & g == 1) { : the condition has length > 1"
Any insight into a potential solution?
Thanks!
Here you go!
# Install Packages---------------------------------------------------
library(stddiff)
library(cobalt)
library(table1)
library(Hmisc)
#Using 'mtcars' as an example
my_data<-mtcars
# Format variables--------------------------------------------------------------
# amd - Transmission (0 = automatic; 1 = manual)
my_data$am <-factor(my_data$am,
levels = c(0,1),
labels =c("Automatic","Manual"))
label(my_data$am) <-"Transmission Type" #adding a label for the variable
# vs - Engine (0 = V-shaped, 1 = Straight)
my_data$vs <-factor(my_data$vs,
levels = c(0,1),
labels =c("V-shaped","Straight"))
label(my_data$vs) <-"Engine"
# Adding a label to the numeric variables
label(my_data$mpg)<-"Miles per gallon"
label(my_data$hp)<-"Horsepower"
# SMD FUNCTION------------------------------------------------------------------
SMD_value <- function(x, ...) {
# Construct vectors of data y, and groups (strata) g
y <- unlist(x)
g <- factor(rep(1:length(x), times=sapply(x, length)))
if (is.numeric(y)) {
# For numeric variables
try({a<-data.frame(y)
a$g<-g
smd<-(as.data.frame(stddiff.numeric(data=a,gcol = "g", vcol = "y")))$stddiff
},silent=TRUE)
} else {
# For categorical variables
try({
a<-data.frame(y)
a$g<-g
smd<-(abs((bal.tab(a, treat = "g",data=a,binary="std",continuous =
"std",s.d.denom = "pooled",stats=c("mean.diffs"))$Balance)$Diff.Un))
},silent=TRUE)
}
c("",format(smd,digits=2)) #Formatting number of digits
}
# CONTINUOUS VARIABLES FORMATTING-----------------------------------------------
my.render.cont <- function(x) {
with(stats.default(x),
c("",
"Mean (SD)" = sprintf("%s (%s)",
round_pad(MEAN, 1),
round_pad(SD, 1)),
"Median (IQR)" = sprintf("%s (%s, %s)",
round_pad(MEDIAN, 1),
round_pad(Q1, 1),
round_pad(Q3, 1)))
)}
# Creating the final table-----------------------------------------------------
Table1<-table1(~ vs + mpg + hp | am,
data=my_data,
overall = FALSE,
render.continuous = my.render.cont,
extra.col=list(`SMD`=SMD_value)) #SMD Column
Table1 #displays final table

Variable parts formula for Shiny

In this part of my Shiny app, I'll do a 'linear model' (lm()) regression, using the variables the user selects. There are three inputs:
input$lmTrendFun is a selectInput(), with the options c("Linear", "Exponential", "Logarithmic", "Quadratic", "Cubic"):
selectInput("lmTrendFun", "Select the model for your trend line.",
choices = c("Linear", "Exponential", "Logarithmic", "Quadratic", "Cubic"))
The second input is input$lmDep, and it's a selectInput() too. I created a updateSelectInput first inside an observe() reactive function, so the choices are the column names from the imported tibble.
The third input is input$lmInd and it's a checkboxGroupInput(), the choices being all the column names other than the one that's already the input$lmInd.
From that I want this output: the lm() (or rather, summary.lm() or summary(lm())) result for those variables. If I knew which they were, it would be simple:
if(input$lmTrendFun == "Linear"){
form <- yname ~ x1 + x2
}else if(input$lmTrendFun == "Exponential"){
form <- yname~ exp(x1) + exp(x2)
}else if(input$lmTrendFun == "Logarithmic"){
form <- yname~ log(x1) + log(x2)
}else if(input$lmTrendFun == "Quadratic"){
form <- yname ~ poly(x1, 2) + poly(x2, 2)
}else if(input$lmTrendFun == "Cubic"){
form <- y ~ poly(x1, 3) + poly(x2, 3)
}
[...]
lm(form, data = .)
where the data (.) has the columns yname, x1 and x2.
However, I don't. So I believe I need some more generic function that can create the formula. How can this be done?
formulizer <- function() as.formula(paste0( input$lmDep, "~", switch(input$lmTrendFun,
Linear = paste0(input$lmInd, collapse=" + "),
Logarithmic = paste0("exp(", input$lmInd,")", collapse=" + "),
Quadratic = paste0("poly(", input$lmInd,", 2)", collapse=" + "),
Cubic = paste0("poly(", input$lmInd,", 3)", collapse=" + ") )))
> input <- list(lmInd=paste0("V", 1:5), lmTrendFun="Linear", lmDep="Vp")
> formulaizer()
Vp ~ V1 + V2 + V3 + V4 + V5
<environment: 0x7fad1cf63d48>
> input <- list(lmInd=paste0("V", 1:5), lmTrendFun="Logarithmic", lmDep="Vp")
> formulizer()
Vp ~ exp(V1) + exp(V2) + exp(V3) + exp(V4) + exp(V5)
<environment: 0x7fad01e694d0>
> input <- list(lmInd=paste0("V", 1:5), lmTrendFun="Quadratic", lmDep="Vp")
> formulizer()
Vp ~ poly(V1, 2) + poly(V2, 2) + poly(V3, 2) + poly(V4, 2) +
poly(V5, 2)
<environment: 0x7fad01f51d20>
> input <- list(lmInd=paste0("V", 1:5), lmTrendFun="Cubic", lmDep="Vp")
> formulizer()
Vp ~ poly(V1, 3) + poly(V2, 3) + poly(V3, 3) + poly(V4, 3) +
poly(V5, 3)
<environment: 0x7fad01f59690>
Consider switch with vectorized paste0 to build terms with transformations and then pass terms into reformulate. Adjust below inputs to actual Shiny variables:
dep_term <- ...
ind_terms <- ...
form <- switch(input$lmTrendFun,
Linear = reformulate(ind_terms, response="yname"),
Exponential = reformulate(paste0("exp(", ind_terms, ")"), response=dep_term),
Logarithmic = reformulate(paste0("log(", ind_terms, ")"), response=dep_term),
Quadratic = reformulate(paste0("poly(", ind_terms, ", 2)"), response=dep_term),
Cubic = reformulate(paste0("poly(", ind_terms, ", 3)"), response=dep_term)
)
Online Demo

R Step function looks for data in global environment, not inside defined function

I have a problem with step forward regression and My understanding is that i don't pass argument Data correctly.
I have the function:
ForwardStep <- function(df,yName, Xs, XsMin) {
Data <- df[, c(yName,Xs)]
fit <- glm(formula = paste(yName, " ~ ", paste0(XsMin, collapse = " + ")),
data = Data, family = binomial(link = "logit") )
ScopeFormula <- list(lower = paste(yName, " ~ ", paste0(XsMin, collapse = " + ")),
upper = paste(yName, " ~ ", paste0(Xs, collapse = " + ")))
result <- step(fit, direction = "forward", scope = ScopeFormula, trace = 1 )
return(result)
}
When I try to run it with following arguments
df <- data.frame(Y= rep(c(0,1),25),time = rpois(50,2), x1 = rnorm(50, 0,1),
x2 = rnorm(50,.5,2), x3 = rnorm(50,0,1))
yName = "Y"
Xs <- c("x1","x2","x3")
XsMin <- 1
res <- ForwardStep(df,Yname,Xs,XsMin)
I am getting an Error:
Error in is.data.frame(data) : object 'Data' not found
But if I first define Data in Global Env it works perfectly fine.
Data <- df[, c(yName,Xs)]
res <- ForwardStep(df,Yname,Xs,XsMin)
I guess that I have wrong implementation of function step however I don't exactly know how to do it the right way.
You need to realize that formulas always have an associated environment, see help("formula"). One should never pass text to the formula parameter of model functions, never ever. If you do that, you will encounter scoping issues sooner or later. Usually, I'd recommend computing on the language instead, but you can also create the formulas from text in the correct scope:
ForwardStep <- function(df,Yname, Xs, XsMin) {
Data <- df[, c(Yname,Xs)]
f1 <- as.formula(paste(Yname, " ~ ", paste0(XsMin, collapse = " + ")))
fit <- glm(formula = f1,
data = Data, family = binomial(link = "logit") )
f2 <- as.formula(paste(Yname, " ~ ", paste0(XsMin, collapse = " + ")))
f3 <- as.formula(paste(Yname, " ~ ", paste0(Xs, collapse = " + ")))
ScopeFormula <- list(lower = f2,
upper = f3)
step(fit, direction = "forward", scope = ScopeFormula, trace = 1)
}
df <- data.frame(Y= rep(c(0,1),25),time = rpois(50,2), x1 = rnorm(50, 0,1),
x2 = rnorm(50,.5,2), x3 = rnorm(50,0,1))
YName = "Y"
Xs <- c("x1","x2","x3")
XsMin <- 1
res <- ForwardStep(df,YName,Xs,XsMin)
#Start: AIC=71.31
#Y ~ 1
#
# Df Deviance AIC
#<none> 69.315 71.315
#+ x1 1 68.661 72.661
#+ x3 1 68.797 72.797
#+ x2 1 69.277 73.277
(Public service announcement: step-wise regression is a garbage generator. There are better statistical techniques available.)

Creating complicated formula's with a function

I am using the following function to make formula's, where I can simply assign vector of variable names, where the function makes sure everything is in the right place and double variable names are excluded:
formula <- function(depvar, indepvars, instruments=NULL, othervars=NULL) {
x <- c(indepvars, instruments, othervars)
totvars <- unique(x)
totvars <- x[!x %in% depvar]
formula <- as.formula(
paste(depvar, paste(totvars, collapse = " + "), sep = " ~ "))
return(formula)
}
indepvars <- c("indepvarA", "indepvarB", "indepvarC")
instruments <- c("IV_A", "IV_B")
# lm
formula("depvar", indepvars)
# 1st stage - IV's for indepvarC
formula("indepvarC", indepvars, instruments)
However, I want the option to write a more complicated formula (an ivreg formula), namely:
depvar ~ instrumentedvar + indepvars | instrumentvars + indepvars
I have been trying the following:
formula <- function(depvar, indepvars, instruments=NULL, instrumentedvar=NULL, othervars=NULL, twostage=NULL) {
x <- c(indepvars, instruments, othervars)
totvars <- unique(x)
totvars <- x[!x %in% depvar]
if (is.null(twostage)) {
formula <- as.formula(
paste(depvar, paste(totvars, collapse = " + "), sep = " ~ "))
} else {
totvarsB <- totvars[!totvars %in% instrumentedvar]
totvarsB <- c(as.character(totvarsB), as.character(instruments))
formula <- as.formula(
paste(depvar, paste(paste(totvars, collapse = " + "), paste("|", paste(totvarsB, collapse = " + " )), sep = " ~ ")))
}
return(formula)
}
indepvars <- c("indepvarA", "indepvarB", "indepvarC")
instruments <- c("IV_A", "IV_B")
instrumentedvar <- "indepvarC"
formula("indepvarC", indepvars, instruments, twostage=1)
But I cannot seem to get it right.
Define reform which takes a vector of names and outputs a string in which they are connected with plus signs. Then use sprintf to generate the final string and convert that using as.formula:
reform <- function(x) paste(x, collapse = " + ")
makeFo <- function(lhs, rhs1, rhs2 = NULL, env = parent.frame()) {
s <- sprintf("%s ~ %s", lhs, reform(c(rhs1, rhs2)))
if (!missing(rhs2)) s <- sprintf("%s | %s", s, reform(rhs2))
as.formula(s, env = env)
}
# test
makeFo("y", c("x1", "x2"))
## y ~ x1 + x2
makeFo("y", c("x1", "x2"), c("u1", "u2"))
## y ~ x1 + x2 + u1 + u2 | u1 + u2

Resources