crr output list- remove df$ from coefficients? - r

I am using the cmprsk package to create a series of regressions. In the real models I used, I specified my models in the same way that is shown in the example that produces mel2 below. My problem is, I want the Melanoma$ in front of the coefficients to go away, as happens if I had specified the model like in mel1. Is there a way to delete that data frame prefix out of the object without re-running it?
library(cmprsk)
data(Melanoma, package = "MASS")
head(Melanoma)
mel1 <- crr(ftime = Melanoma$time, fstatus = Melanoma$status, cov1 = Melanoma[, c("sex", "age")], cencode = 2)
covs2 <- model.matrix(~ Melanoma$sex + Melanoma$age)[, -1]
mel2 <- crr(ftime = Melanoma$time, fstatus = Melanoma$status, cov1 = covs2, cencode = 2)
What I want:
What I have:

You could use the data argument in model.matrix, and wrap the crr call in with(Melanoma, ...)
covs2 <- model.matrix(~ sex + age, data = Melanoma)[, -1]
mel2 <- with(Melanoma, crr(ftime = time, fstatus = status,
cov1 = covs2, cencode = 2))
mel2$coef
#> sex age
#> 0.58838573 0.01259388
If you are stuck with existing models like this:
covs2 <- model.matrix(~ Melanoma$sex + Melanoma$age)[, -1]
mel2 <- crr(ftime = Melanoma$time, fstatus = Melanoma$status,
cov1 = covs2, cencode = 2)
You could simply rename the coefficients like this
names(mel2$coef) <- c("sex", "age")
mel2
#> convergence: TRUE
#> coefficients:
#> sex age
#> 0.58840 0.01259
#> standard errors:
#> [1] 0.271800 0.009301
#> two-sided p-values:
#> sex age
#> 0.03 0.18

Related

Extract categorical coeffients and all p-values from a mixed model into a data table

Here is a reproduceable code and sample data
I want to achieve a final data table with 3 columns: 1. exposure quantile 2. OR/RR 3. PV
set.seed(42)
n <- 100
dat = data.frame(ID = rep(c(1:25),times=4 ) ,
Score = rnorm(n, mean=0.3, sd=0.8))
dat = dat %>%
group_by(ID)%>%
dplyr::mutate(exposure1 = rep(c(rnorm(1, mean=6, sd=1.8))),
exposure2 = rep(c(rnorm(1, mean=3, sd=0.6))),
age = rep(c(rnorm(1, mean=40, sd=15))))%>%
ungroup()%>%
dplyr::mutate(exposure1_quantile = cut(exposure1, breaks = 4, labels = c("Q1","Q2","Q3","Q4")),
exposure2_quantile = cut(exposure2, breaks = 4, labels = c("Q1","Q2","Q3","Q4")))
exposures_var = c("exposure1_quantile","exposure2_quantile")
exposure_var_labels("exposure1 Q1","exposure1 Q2 ", "exposure 1 Q3",
"exposure2 Q1","exposure2 Q2 ", "exposure2 Q3")
age="age"
outcome = "Score"
exposure_data_table = c()
for(i in 1:length(exposures_var)){
exp = exposures_var[i]
fixed_effects_formula = paste0(outcome, "~",exp,"+",age)
fixed_effects_formula = as.formula(fixed_effects_formula)
mixedmodel = lme(fixed =fixed_effects_formula, random = ~1|ID, data=dat, method = "ML")
for(m in 2:4){
v = mixedmodel$coefficients$fixed[m]
vector = c(exp , v)
#P=p value for every quantile (HOW TO ADD?)
#exposure_name = exposure_var_labels[?] (HOW TO ADD LABEL)
exposure_data_table = rbind(exposure_data_table, vector)
}
}
exposure_data_table = as.data.table(exposure_data_table)
colnames(exposure_data_table)=c("Exposure","RR")#,"pv")
view(exposure_data_table)
I first used anova to try and get the pvalue but it didnt work.
I think a tidymodels approach using lme would work well here:
library(nlme)
library(tidymodels)
library(multilevelmod)
library(data.table)
lme_spec <-
linear_reg() %>%
set_engine("lme", random = ~ 1 | ID)
Map(function(exp) {
fixed_effects_formula <- as.formula(paste0("Score~",exp,"+ age +", 0))
lme_spec %>%
fit(fixed_effects_formula, data = dat) %>%
broom.mixed::tidy() %>%
filter(effect == "fixed", grepl("exposure", term)) %>%
select(term, estimate, std.error, p.value)
}, exposures_var) %>%
bind_rows() %>%
as.data.table()
#> term estimate std.error p.value
#> 1: exposure1_quantileQ1 -0.16147364 0.3532834 0.6525497
#> 2: exposure1_quantileQ2 0.22318505 0.2719366 0.4214784
#> 3: exposure1_quantileQ3 0.24976757 0.3484126 0.4817411
#> 4: exposure1_quantileQ4 0.14177064 0.4020702 0.7280757
#> 5: exposure2_quantileQ1 0.28976458 0.4191198 0.4972840
#> 6: exposure2_quantileQ2 0.19907863 0.2699164 0.4693496
#> 7: exposure2_quantileQ3 0.35040767 0.2827229 0.2295436
#> 8: exposure2_quantileQ4 -0.09587234 0.3533819 0.7889412
Created on 2022-08-07 by the reprex package (v2.0.1)

huxreg - exponentiated coefficients and updated standard errors

I am running some CRR models from the package cmprsk. I am working towards outputting the results using huxreg. I have used tidy_args to get exponentiated coefficients, per this q&a, but it didn't have an answer on updating the standard errors along with the exp(coef)'s. SE's are really the value I want (because it matches the other tables I made using different regression analysis, and I want to carry that theme through the paper). Any advice on how I can do a workaround to get them?
library(cmprsk)
data(Melanoma, package = "MASS")
head(Melanoma)
covs1 <- model.matrix(~ Melanoma$sex)[, -1]
covs2 <- model.matrix(~ Melanoma$sex + Melanoma$age)[, -1]
covs3 <- model.matrix(~ Melanoma$sex*Melanoma$age)[, -1]
mel1 <- crr(ftime = Melanoma$time, fstatus = Melanoma$status, cov1 = covs1, cencode = 2)
mel2 <- crr(ftime = Melanoma$time, fstatus = Melanoma$status, cov1 = covs2, cencode = 2)
mel3 <- crr(ftime = Melanoma$time, fstatus = Melanoma$status, cov1 = covs3, cencode = 2)
summary(mel1)
summary(mel2)
summary(mel3)
huxreg(mel1, mel2, mel3, tidy_args = list(exponentiate = TRUE))

tbl_regression(), plm, and mice - Error: Tibble columns must have compatible sizes

I am trying to print a regression model using tbl_regression() on a plm object with multiply imputed data. I've found that I can print the regression table if the plm has one independent variable, but not if it has two or more independent variables.
I understand that the below error message is common, but I don't understand what it means in the context of tbl_regression and multiply imputed data. Is there a bug in gtsummary, or is something wrong with my code?
library(mice, warn.conflicts = FALSE)
library(mitools)
library(missMethods)
library(plm)
library(gtsummary)
options(scipen=999)
set.seed(12345)
data("Grunfeld")
df <- delete_MCAR(Grunfeld, p = 0.3, cols_mis = c(3:5))
imp <- mice::mice(df, m = 5, print = FALSE)
implist <- imputationList(
lapply(1:imp$m, function(n) mice::complete(imp, action = n)))
fit1 <- lapply(implist$imputations, function(x){ plm(inv ~ value, data = x, model = "within", index = c("firm", "year"))})
#tbl_regression(as.mira(fit1)) # works
fit2 <- lapply(implist$imputations, function(x){ plm(inv ~ value + capital, data = x, model = "within", index = c("firm", "year"))})
tbl_regression(as.mira(fit2)) # does not work
#> pool_and_tidy_mice(): Tidying mice model with
#> `mice::pool(x) %>% mice::tidy(exponentiate = FALSE, conf.int = TRUE, conf.level = 0.95)`
#> Error: Tibble columns must have compatible sizes.
#> * Size 2: Existing data.
#> * Size 3: Column `variable`.
#> ℹ Only values of size one are recycled.
Thank you! This code now works under the current version of gtsummary.
library(mice, warn.conflicts = FALSE)
library(mitools)
library(missMethods)
library(plm)
library(gtsummary)
options(scipen=999)
set.seed(12345)
mice::version(pkg = "gtsummary")
#> [1] "gtsummary 1.5.1.9001 /Library/Frameworks/R.framework/Versions/4.1/Resources/library"
data("Grunfeld")
df <- delete_MCAR(Grunfeld, p = 0.3, cols_mis = c(3:5))
imp <- mice::mice(df, m = 5, print = FALSE)
implist <- imputationList(
lapply(1:imp$m, function(n) mice::complete(imp, action = n)))
fit2 <- lapply(implist$imputations, function(x){ plm(inv ~ value + capital, data = x, model = "within", index = c("firm", "year"))})
tbl_regression(as.mira(fit2))
#> pool_and_tidy_mice(): Tidying mice model with
#> `mice::pool(x) %>% mice::tidy(exponentiate = FALSE, conf.int = TRUE, conf.level = 0.95)`

create a reusable part in R formula

That is not a duplicate of this (Edit and reuse the formula part of the call for a model in R) but rather an extension.
So let's say I have several regression models to test. I also have a set of socio demographic variables that I want to add to each model to control for them.
m1 <- lm(a~b)
m1_full <- lm(a~b+age+gender+income)
m2 <- lm(c~d)
m2_full <- lm(c~d+age+gender+income)
Is there a way to store (age+gender+income) and add it to some models?
Something like that (in pseudocode):
ses <- age+gender+income
m1 <- lm(a~b)
m1_full <- lm(a~b+... ses)
m2 <- lm(c~d)
m2_full <- lm(c~d+...ses)
I guess you can just paste everything together into a formula. Here's a function that would do the whole process:
set.seed(0)
df <- data.frame(age = rpois(200,45),
gender = rbinom(200,1,0.5),
income = rnorm(200,30000,10000),
a = rnorm(200),
b = rnorm(200),
c = rnorm(200),
d = rnorm(200))
ses <- c("age", "gender", "income")
get_lm_model <- function(df, outcome_var, pred_var, ses) {
fm <- as.formula(paste(outcome_var, "~", pred_var, "+", paste(ses, collapse = " + ")))
lm(fm, data = df)
}
get_lm_model(df, "a", "b", ses)
#>
#> Call:
#> lm(formula = fm, data = df)
#>
#> Coefficients:
#> (Intercept) b age gender income
#> 3.345e-01 -9.516e-02 -3.748e-03 -7.033e-02 -6.718e-06
get_lm_model(df, "c", "d", ses)
#>
#> Call:
#> lm(formula = fm, data = df)
#>
#> Coefficients:
#> (Intercept) d age gender income
#> 2.775e-01 5.570e-02 -4.564e-03 -5.359e-02 -5.898e-06
Created on 2021-10-26 by the reprex package (v2.0.1)

Making a function for matching on multiple dependent variables, purrr

I want to estimate the matched treatment effect using the Matching package on multiple dependent variables.
For just a single dependent variable, I can run the below which returns what I want:
library(carData)
library(purrr)
library(tidyverse)
library(Matching)
matching_df <- Mroz %>%
mutate(wc = case_when(wc == "yes" ~ "TRUE",
wc == "no" ~ "FALSE")) %>%
drop_na(k5, k618, age, wc, hc, lfp)
matching_df$wc <- as.logical(matching_df$wc)
ps1 <- glm(wc ~ k5 + k618 + age + hc,
family = binomial, data = matching_df)
pscore <- ps1$fitted.values
matching_df <- cbind(matching_df, pscore)
Y <- matching_df$lfp
Tr <- as.logical(matching_df$wc)
psm1 <- Matching::Match(
Y = Y,
Tr = Tr,
X = pscore,
estimand = "ATT",
M = 1,
replace = TRUE,
caliper = 0.05,
version = "fast")
summary(psm1)
Estimate... 0.17479
SE......... 0.044963
T-stat..... 3.8873
p.val...... 0.00010135
Original number of observations.............. 753
Original number of treated obs............... 212
Matched number of observations............... 207
Matched number of observations (unweighted). 1074
Caliper (SDs)........................................ 0.05
Number of obs dropped by 'exact' or 'caliper' 5
But when I try and make a function using purrr:map_dfr so I can repeat this operation for multiple dependent variables, it returns an error. This is my attempt at the function:
vars <- c("lfp", "lwg", "inc")
names(vars) <- vars
matching_fcn <- function(.x){
matching_df <- Mroz %>%
mutate(wc = case_when(wc == "yes" ~ "TRUE",
wc == "no" ~ "FALSE")) %>%
drop_na(k5, k618, age, wc, hc, .x)
matching_df$wc <- as.logical(matching_df$wc)
ps1 <- glm(wc ~ k5 + k618 + age + hc,
family = binomial, data = matching_df)
pscore <- ps1$fitted.values
matching_df <- cbind(matching_df, pscore)
Y <- matching_df$.x
Tr <- as.logical(matching_df$wc)
psm1 <- Matching::Match(
Y = Y,
Tr = Tr,
X = pscore,
estimand = "ATT",
M = 1,
replace = TRUE,
caliper = 0.05,
version = "fast")
summary(psm1)
}
purrr::map_dfr(
.x = all_of(vars),
.f = matching_fcn)
Error: All columns in a tibble must be vectors.
x Column `lfp` is a `summary.Match` object.
x Column `lwg` is a `summary.Match` object.
x Column `inc` is a `summary.Match` object.
Run `rlang::last_error()` to see where the error occurred.
Ultimately, I would like a tibble which includes the name of the dependent variable in one column, then the estimate, se, T-stat, and p.val that are returned by the Matching::Match function in other columns
The summary(psm1) can't be put into a tibble. So choose some values of psm1 and make your own. Further, drop_na is no good idea and will bias your results.
library(Matching)
vars <- c("dependent_var_1", "dependent_var_2", "dependent_var_3")
names(vars) <- vars
matching_fcn <- function(.x){
# matching_df <- matching_df %>%
# drop_na(covar_1, covar_2, covar_3, covar_4, covar_5, covar_6, covar_7, treat_1, .x)
ps1 <- glm(treat_1 ~ covar_1 + covar_2 + covar_3 + covar_4 + covar_5 + covar_6 + covar_7,
family = binomial, data = matching_df)
pscore <- ps1$fitted.values
matching_df <- cbind(matching_df, pscore)
Y <- matching_df[[.x]]
Tr <- matching_df$treat_1
psm1 <- Matching::Match(
Y = Y,
Tr = Tr,
X = pscore,
estimand = "ATT",
M = 1,
replace = TRUE,
caliper = 0.05,
version = "fast")
p <- 1 - pnorm(abs(psm1$est.noadj/psm1$se.standard))
with(psm1, tibble(dv=.x, est=est.noadj, se=se.standard, p=p, ndrops=ndrops))
}
Usage and result
library(dplyr)
library(tidyr)
purrr::map_df(
.x = tidyselect::all_of(vars),
.f = matching_fcn)
# # A tibble: 3 × 5
# dv est se p ndrops
# <chr> <dbl> <dbl> <dbl> <dbl>
# 1 dependent_var_1 0.652 0.231 0.00238 8
# 2 dependent_var_2 -0.216 0.188 0.125 8
# 3 dependent_var_3 -0.506 0.249 0.0210 8
Data
v <- c('covar_1', 'covar_2', 'covar_3', 'covar_4', 'covar_5', 'covar_6',
'covar_7', 'treat_1', 'dependent_var_1', 'dependent_var_2', 'dependent_var_3')
set.seed(830595665)
matching_df <- data.frame(matrix(rnorm(100*length(v)), 100, length(v), dimnames=list(c(), v)))
matching_df$treat_1 <- +(matching_df$treat_1 > 0)

Resources