I would want to generate different tbl_summary tables from a loop(lapply function) over similar categorical variables (var1, var2, var3) applied to "by= " and assign each of them an object name e.g "tbl_var1", "tbl_var2" and "tbl_var3"
dflist <- c("var1",
"var2",
"var3")
vartbls = lapply(dflist, function(df) {
tbl_summary_ex2 <-
trial %>%
select(age, grade, response, trt) %>%
tbl_summary(
by = df,
label = list(age ~ "Patient Age"),
statistic = list(all_continuous() ~ "{mean} ({sd})"),
digits = list(age ~ c(0, 1))
)
}
)
Here is a reprex with a working version of your function with code to set the names:
library(gtsummary)
dflist <- c("age", "grade")
vartbls <- lapply(dflist, function(x) {
tbl_summary_ex2 <-
trial %>%
select(age, grade, response, trt) %>%
tbl_summary(
by = x,
label = list(age ~ "Patient Age"),
statistic = list(all_continuous() ~ "{mean} ({sd})"),
digits = list(age ~ c(0, 1))
)
}
)
names(vartbls) <- paste0("tbl_", dflist)
Here is a version using {purrr} and setting names before iterating:
library(gtsummary)
library(purrr)
result <- c("trt", "grade") %>%
purrr::set_names(paste0("tbl_", .)) %>%
purrr::map(., ~ trial %>%
select(age, grade, response, trt) %>%
tbl_summary(
by = .x,
label = list(age ~ "Patient Age"),
statistic = list(all_continuous() ~ "{mean} ({sd})"),
digits = list(age ~ c(0, 1))
))
Related
I've got the following reprex
library(tidyverse)
library(gtsummary)
set.seed(50)
dat <- data.frame(exposed = sample(c("Unexposed","Exposed"), 100, TRUE),
year = rep(c(1985,1986), each = 50),
Age = rnorm(100, 85, 1),
Transit = sample(c("Bus", "Train", "Walk", "Car"), 100, TRUE))
dat %>%
tbl_strata(strata = year,
~ .x %>%
tbl_summary(
by = exposed,
include = c(Age, Transit),
statistic = list(Age ~ "{mean} ± {sd}"),
digits = Age ~ 1,
label = Age ~ "Age, mean ± SD"
)) %>%
modify_header(all_stat_cols() ~ "**{level}**") %>%
modify_footnote(update = everything() ~ NA)
which produces this table:
but when I try to add a new, separate footnote, the previous one gets overwritten
dat %>%
tbl_strata(strata = year,
~ .x %>%
tbl_summary(
by = exposed,
include = c(Age, Transit),
statistic = list(Age ~ "{mean} ± {sd}"),
digits = Age ~ 1,
label = Age ~ "Age, mean ± SD"
)) %>%
modify_header(all_stat_cols() ~ "**{level}**") %>%
modify_table_styling(columns = label,
rows = variable == "Age",
footnote = "Footnote 1") %>%
modify_table_styling(columns = label,
rows = label == "Transit",
footnote = "Footnote 2") %>%
modify_table_styling(columns = label,
rows = label == "Transit",
footnote = "Footnote 3") %>%
modify_footnote(update = everything() ~ NA)
and my table looks like this.
I've tried using modify_footnote as described here but I don't understand the documentation for how to get the footnotes out of the columns and into the rows.
The final output should look something like this.
For example, now I have two groups of data, Drug A and Drug B. I would like to add a column of the number of observations of each variable for only Drug A, how can I do that? I don't find a way using add_n.
The code for producing example table:
tbl_summary_ex2 <- trial %>% select(age, grade, response, trt) %>%
tbl_summary(
by = trt,
label = list(age ~ "Patient Age"),
statistic = list(all_continuous() ~ "{mean} ({sd})"),
digits = list(age ~ c(0, 1))
)
Here is one way to do it:
library(gtsummary)
packageVersion("gtsummary")
#> [1] '1.6.1'
# build table with only Drug A
tbl_summary_ex1 <-
trial %>%
dplyr::filter(trt == "Drug A") %>%
select(age, grade, response) %>%
tbl_summary(
label = list(age ~ "Patient Age"),
statistic = list(all_continuous() ~ "{mean} ({sd})"),
digits = list(age ~ c(0, 1))
) %>%
add_n(col_label = "**Drug A N**") %>%
modify_column_hide(all_stat_cols())
# build table split by treatment
tbl_summary_ex2 <-
trial %>%
select(age, grade, response, trt) %>%
tbl_summary(
by = trt,
label = list(age ~ "Patient Age"),
statistic = list(all_continuous() ~ "{mean} ({sd})"),
digits = list(age ~ c(0, 1))
)
# merge tables together
tbl_final <-
list(tbl_summary_ex1, tbl_summary_ex2) %>%
tbl_merge(tab_spanner = FALSE)
Created on 2022-08-19 by the reprex package (v2.0.1)
I want to create a function that would automatically generate the tables with summary statistics when i parse different column names. I am trying to create a function for gtsummary I have tried enquo and deparse but both don't seem to help. Can somebody please guide me in what I am doing wrong here.
get_stats <- function (var2) {
var2 <- dplyr::enquo(var2)
grp_val <- deparse(substitute(var2))
df %>%
gtsummary::tbl_summary(.,
by = trt,
missing = "no",
type =
list(!!var2 ~ "continuous2"),
statistic = list(
"{{var2}}" = c(
"{N_nonmiss}",
"{mean} ({sd})",
"{median} ({p25}, {p75})",
"{min}, {max}"
)
)
,
digits = !!var2 ~ c(0, 1, 1, 1)
)
}
The error I keep getting is Error: Error in type= argument input. Select from ‘age’, ‘trt’.
When I use this with the trial data without parsing anything it works fine.
trial %>%
dplyr::select(age, trt) %>%
dplyr::mutate_if(is.factor, as.character()) %>%
gtsummary::tbl_summary(
by = trt,
missing = "no",
type =
list(age ~ "continuous2"),
statistic = list(
"age" = c(
"{N_nonmiss}",
"{mean} ({sd})",
"{median} ({p25}, {p75})",
"{min}, {max}"
))
,
digits = age ~ c(0, 1, 1, 1)
)
Expected output from the code
Using rlang::as_name and named lists you could do:
library(gtsummary)
get_stats <- function(df, var2) {
var2_str <- rlang::as_name(rlang::enquo(var2))
df %>%
gtsummary::tbl_summary(.,
by = trt,
missing = "no",
type = setNames(list(c("continuous2")), var2_str),
statistic = setNames(list(c(
"{N_nonmiss}",
"{mean} ({sd})",
"{median} ({p25}, {p75})",
"{min}, {max}"
)), var2_str
),
digits = setNames(list(c(0, 1, 1, 1)), var2_str),
)
}
trial %>%
select(age, trt) %>%
dplyr::mutate_if(is.factor, as.character()) %>%
get_stats(age)
I'm trying to add the effective size of the wilcox test to a summary table using the add_stat function of the "gtsummary" package.
My data looks like:
Type <- c ("FND", "FND", "FND", "FND", "FND", "FND", "FND", "FND","FND", "FND",
"HC","HC","HC","HC","HC","HC","HC","HC","HC","HC")
Component1 <- c(2,3,2,2,1,0,1,2,1,2,1,0,0,0,1,1,2,0,1,1)
Component2 <- c(1,3,3,3,2,0,2,3,3,2,2,0,0,0,0,1,2,1,1,0)
Component3 <- c(0,1,3,2,0,1,2,2,0,1,0,0,1,1,0,1,1,0,0,0)
data_components <- data.frame(Type, Component1, Component2, Component3)
data_components_tbl <- data_components %>%
tbl_summary(
by = Type,
type = list(Component1 ~ "continuous", Component2 ~ "continuous", Component3 ~ "continuous"), #define Components as continuous for analyse mean
statistic = list(all_continuous() ~ "{mean} ({sd})",
all_categorical() ~ "{n} / {N} ({p}%)"),
digits = all_continuous() ~ 2,
label = list(Component1 ~ "Subjective sleep quality",
Component2 ~ "Sleep latency",
Component3 ~ "Sleep duration")
) %>%
add_p(pvalue_fun = ~style_pvalue(.x, digits = 2)) %>%
modify_header(update = list(label ~ "**Variable**")) %>%
modify_spanning_header(c("stat_1", "stat_2") ~ "**Group**") %>%
modify_footnote(
all_stat_cols() ~ "Mean (SD)")%>%
bold_labels()
data_components_tbl
I've tried with this function:
my_ES_test <- function(data, variable, by, ...) {
(data%>%
rstatix::wilcox_effsize(data[[variable]] ~ as.factor(data[[by]])))$effsize
}
data_components_tbl <- data_components %>%
tbl_summary(
by = Type,
type = list(Component1 ~ "continuous", Component2 ~ "continuous", Component3 ~ "continuous"), #define Components as continuous for analyse mean
statistic = list(all_continuous() ~ "{mean} ({sd})",
all_categorical() ~ "{n} / {N} ({p}%)"),
digits = all_continuous() ~ 2)%>%
add_p(pvalue_fun = ~style_pvalue(.x, digits = 2)) %>%
add_stat(fns = everything() ~ my_ES_test()) %>%
modify_header(update = list(label ~ "**Variable**")) %>%
modify_spanning_header(c("stat_1", "stat_2") ~ "**Group**") %>%
modify_footnote(
all_stat_cols() ~ "Mean (SD)")%>%
bold_labels()
data_components_tbl
I think I didn't use the right syntax for the my_ES_test function. Is there any way to do this?
Thanks for help!
Dear aylaxla
I made a slight modification to your ES function. See below!
library(gtsummary)
packageVersion("gtsummary")
#> [1] '1.4.0.9000'
my_ES_test <- function(data, variable, by, ...) {
rstatix::wilcox_effsize(data, as.formula(glue::glue("{variable} ~ {by}")))$effsize
}
my_ES_test(trial, "age", "trt")
#> Effect size (r)
#> 0.02633451
tbl <-
trial %>%
select(age, marker, trt) %>%
tbl_summary(
by = trt,
statistic = all_continuous() ~ "{mean} ({sd})",
missing = "no"
) %>%
add_stat(fns = all_continuous() ~ my_ES_test) %>%
modify_header(add_stat_1 ~ "**Wilcoxon ES**")
Created on 2021-04-26 by the reprex package (v2.0.0)
When I do a tbl_stack, I'd like to show the total N of the combined tables in the tbl_stack in the header. At the moment the result appears to show the N of the first table in the stack.
trial %>%
select(age, grade, response, trt) %>%
filter(grade == "I") %>%
tbl_summary(
by = trt,
label = list(age ~ "Patient Age"),
statistic = list(all_continuous() ~ "{mean} ({sd})"),
digits = list(age ~ c(0, 1))
)
tbl_summary_ex2a <-
trial %>%
select(age, grade, response, trt) %>%
filter(grade %in% c("II", "III", "IV")) %>%
tbl_summary(
by = trt,
label = list(age ~ "Patient Age"),
statistic = list(all_continuous() ~ "{mean} ({sd})"),
digits = list(age ~ c(0, 1))
)
tbl_stack(tbls=list(tbl_summary_ex2, tbl_summary_ex2a))
Thanks for any tips,
Jeff
Yes, as the documentation of tbl_stack() indicates, the headers are retained from the first gtsummary in the stack. You can use the modify_header() function to change the headers, however. Additionally, these gtsummary tables have an internal object, .$df_by, that saves the Ns from each of your tables. You can sum the Ns across tables using these internal data frames. Example below doing this programmatically, but if it's easier you could simply hard code the Ns.
library(gtsummary)
library(tidyverse)
tbl_summary_ex2 <-
trial %>%
select(age, grade, response, trt) %>%
filter(grade == "I") %>%
tbl_summary(
by = trt,
label = list(age ~ "Patient Age"),
statistic = list(all_continuous() ~ "{mean} ({sd})"),
digits = list(age ~ c(0, 1)),
include = -grade
)
tbl_summary_ex2a <-
trial %>%
select(age, grade, response, trt) %>%
filter(grade %in% c("II", "III", "IV")) %>%
tbl_summary(
by = trt,
label = list(age ~ "Patient Age"),
statistic = list(all_continuous() ~ "{mean} ({sd})"),
digits = list(age ~ c(0, 1)),
include = -grade
)
# calculate the sum total Ns from both tables
list_N <-
tbl_summary_ex2$df_by %>%
bind_rows(tbl_summary_ex2a$df_by) %>%
select(by_col, by, n) %>%
group_by(by_col, by) %>%
summarise(n = sum(n)) %>%
mutate(
header_update =
str_glue("{by_col} ~ '**{by}**, N = {n}'") %>%
as.formula() %>%
list()
) %>%
pull(header_update)
list_N
tbl_stack(
tbls=list(tbl_summary_ex2, tbl_summary_ex2a),
group_header = c("Grade I", "Grade > I")
) %>%
modify_header(list_N)