Related
I want to create a function that would automatically generate the tables with summary statistics when i parse different column names. I am trying to create a function for gtsummary I have tried enquo and deparse but both don't seem to help. Can somebody please guide me in what I am doing wrong here.
get_stats <- function (var2) {
var2 <- dplyr::enquo(var2)
grp_val <- deparse(substitute(var2))
df %>%
gtsummary::tbl_summary(.,
by = trt,
missing = "no",
type =
list(!!var2 ~ "continuous2"),
statistic = list(
"{{var2}}" = c(
"{N_nonmiss}",
"{mean} ({sd})",
"{median} ({p25}, {p75})",
"{min}, {max}"
)
)
,
digits = !!var2 ~ c(0, 1, 1, 1)
)
}
The error I keep getting is Error: Error in type= argument input. Select from ‘age’, ‘trt’.
When I use this with the trial data without parsing anything it works fine.
trial %>%
dplyr::select(age, trt) %>%
dplyr::mutate_if(is.factor, as.character()) %>%
gtsummary::tbl_summary(
by = trt,
missing = "no",
type =
list(age ~ "continuous2"),
statistic = list(
"age" = c(
"{N_nonmiss}",
"{mean} ({sd})",
"{median} ({p25}, {p75})",
"{min}, {max}"
))
,
digits = age ~ c(0, 1, 1, 1)
)
Expected output from the code
Using rlang::as_name and named lists you could do:
library(gtsummary)
get_stats <- function(df, var2) {
var2_str <- rlang::as_name(rlang::enquo(var2))
df %>%
gtsummary::tbl_summary(.,
by = trt,
missing = "no",
type = setNames(list(c("continuous2")), var2_str),
statistic = setNames(list(c(
"{N_nonmiss}",
"{mean} ({sd})",
"{median} ({p25}, {p75})",
"{min}, {max}"
)), var2_str
),
digits = setNames(list(c(0, 1, 1, 1)), var2_str),
)
}
trial %>%
select(age, trt) %>%
dplyr::mutate_if(is.factor, as.character()) %>%
get_stats(age)
I am trying to figure out how to add customized columns for labels when using gtsummary -- for example I want to add a column with headings for each summary statistics that I have. I don't want this in the characteristic column, I want this to be on the left of the characteristic group.
I am not sure what is the best way to achieve this is using gtsummary, I have the rest of the table but would need the column with the modifiable header in yellow.
This is the code I have so far:
library(tidyverse)
library(gtsummary)
trial %>%
dplyr::select(age, trt) %>%
gtsummary::tbl_summary(.,
by = trt,
missing = "no",
type = age ~ "continuous2",
statistic = age ~ c(
"{N_nonmiss}",
"{mean} ({sd})",
"{median} ({p25}, {p75})",
"{min}, {max}"),
digits = age ~ c(0, 1, 1, 1),
label = age ~ " ") %>%
gtsummary::add_overall() %>%
# This will add the Total column
gtsummary::add_stat_label(label = age ~ c("N",
"Mean (SD)",
"Median (Q1, Q3)",
"Min, Max")) %>%
gtsummary::modify_header(
label ~ "Summary Statistics",
stat_0 ~ "Total",
stat_1 ~ "Drug A",
stat_2 ~ "Drug B"
) %>%
gtsummary::modify_table_body(~ .x %>% dplyr::relocate(stat_0, .after = stat_2))
You're so close! See code example below :)
library(gtsummary)
library(tidyverse)
packageVersion("gtsummary")
#> [1] '1.5.2.9026'
trial %>%
dplyr::select(age, trt) %>%
gtsummary::tbl_summary(
by = trt,
missing = "no",
type = age ~ "continuous2",
statistic = age ~ c(
"{N_nonmiss}",
"{mean} ({sd})",
"{median} ({p25}, {p75})",
"{min}, {max}"),
digits = age ~ c(0, 1, 1, 1)
) %>%
gtsummary::add_overall(last = TRUE) %>%
modify_table_body(
~ .x %>%
mutate(
new_label = ifelse(row_type == "label", label, ""),
label = ifelse(row_type == "label", "", label),
.before = label
)
) %>%
modify_header(
new_label ~ "Characteristic",
label ~ "Summary Statistics",
stat_0 ~ "Total",
stat_1 ~ "Drug A",
stat_2 ~ "Drug B"
) %>%
modify_column_alignment(new_label, "left") %>%
as_kable()
Characteristic
Summary Statistics
Drug A
Drug B
Total
Age
N
91
98
189
Mean (SD)
47.0 (14.7)
47.4 (14.0)
47.2 (14.3)
Median (IQR)
46.0 (37, 59.0)
48.0 (39, 56.0)
47.0 (38, 57.0)
Range
6.0, 78.0
9.0, 83.0
6.0, 83.0
Created on 2022-04-21 by the reprex package (v2.0.1)
enter image description hereI have tried adding the confidence intervals in gtsummry but I get an error #>Error: Dimension of 'a1' and the added statistic do not match. Expecting statistic to be length 2. I successfully managed to add the intervals when I don't stratified by any variable. The code is as below-sorry if its too verbose.
#---- Libraries
library(gtsummary)
library(tidyverse)
#---- Data
set.seed(2021)
df <- tibble(
a1 = factor(ifelse(sign(rnorm(30))==-1, 0, 1), labels = c("No", "Yes")),
a2 = factor(ifelse(sign(rnorm(30))==-1, 0, 1), labels = c("No", "Yes")),
gender = gl(2, 15, labels = c("Males", "Females")),
b2 = gl(3, 10, labels = c("Primary", "Secondary", "Tertiary")),
c1 = gl(3, 10, labels = c("15-19", "20-24", "25-30")),
outcome = factor(ifelse(sign(rnorm(30))==-1, 0, 1), labels = c("No", "Yes")),
weight = runif(30, 1, 12)
)
#---- Function to calculate CIs
categorical_ci <- function(variable, tbl, ...) {
filter(tbl$meta_data, variable == .env$variable) %>%
pluck("df_stats", 1) %>%
mutate(
# calculate and format 95% CI
prop_ci = map2(n, N, ~prop.test(.x, .y)$conf.int %>%
style_percent(symbol = TRUE)),
ci = map_chr(prop_ci, ~glue::glue("{.x[1]}, {.x[2]}"))
) %>%
pull(ci)
}
#---- tblsummary with stratified by gender
t1 <- df %>%
select(gender, a1, a2) %>%
tbl_summary(by = gender, statistic = everything() ~ "{n} {p}%",
type = everything() ~ "categorical")
t1 %>%
add_stat(
fns = everything() ~ "categorical_ci",
location = "level",
header = "**95% CI**"
) %>%
modify_footnote(everything() ~ NA)
There is a similar question here: https://community.rstudio.com/t/tbl-summary-function/100113/6
library(gtsummary)
ll <- function(x) t.test(x)$conf.int[[1]] # Lower 95% CI of mean
ul <- function(x) t.test(x)$conf.int[[2]] # Upper 95% CI of mean
# create table 1
table <-
trial %>%
select(trt, age) %>%
tbl_summary(
by = trt,
statistic = all_continuous() ~ "{mean} ({ll} — {ul})",
missing = "no",
digits = all_continuous() ~ 2
) %>%
modify_footnote(all_stat_cols() ~ "Mean (95% CI)")
#---- Libraries
library(gtsummary)
library(flextable)
library(tidyverse)
#---- Data
set.seed(2021)
df <- tibble(
a1 = factor(ifelse(sign(rnorm(30))==-1, 0, 1), labels = c("No", "Yes")),
a2 = factor(ifelse(sign(rnorm(30))==-1, 0, 1), labels = c("No", "Yes")),
gender = gl(2, 15, labels = c("Males", "Females")),
b2 = gl(3, 10, labels = c("Primary", "Secondary", "Tertiary")),
c1 = gl(3, 10, labels = c("15-19", "20-24", "25-30")),
outcome = factor(ifelse(sign(rnorm(30))==-1, 0, 1), labels = c("No", "Yes")),
weight = runif(30, 1, 12)
)
#---- Solution ----
tbl <-
df %>%
select(a1, a2, gender) %>%
tbl_summary(missing = "no", by = gender, type = everything() ~ "categorical",
percent = "row") %>%
add_n() %>%
modify_footnote(everything() ~ NA)
myci <- tbl$meta_data %>%
filter(summary_type %in% c("categorical", "dichotomous")) %>%
select(summary_type, var_label, df_stats) %>%
unnest(df_stats) %>%
mutate(
conf.low = (p - qnorm(0.975) * sqrt(p * (1 - p) / N)) %>%
style_percent(symbol = TRUE),
conf.high =( p + qnorm(0.975) * sqrt(p * (1 - p) / N)) %>%
style_percent(symbol = TRUE),
ci = str_glue("{conf.low}, {conf.high}"),
label = coalesce(variable_levels, var_label),
row_type = ifelse(summary_type == "dichotomous", "label", "level")
) %>%
select(by, variable, row_type, label, ci) %>%
pivot_wider(names_from = "by", values_from = "ci") %>%
rename(Male_ci = Males, Female_ci = Females)
tbl %>%
modify_table_body(
left_join,
myci,
by = c("variable", "row_type", "label")
) %>%
modify_table_header(
Male_ci,
hide = FALSE,
label = "**95% CI Males**"
) %>%
modify_table_header(
Female_ci,
hide = FALSE,
label = "**95% CI Females**"
)
I am trying to group some rows/variables (both categorical and continuous) to help with the table readability in a large dataset.
Here is the dummy dataset:
library(gtsummary)
library(tidyverse)
library(gt)
set.seed(11012021)
# Create Dataset
PIR <-
tibble(
siteidn = sample(c("1324", "1329", "1333", "1334"), 5000, replace = TRUE, prob = c(0.2, 0.45, 0.15, 0.2)) %>% factor(),
countryname = sample(c("NZ", "Australia"), 5000, replace = TRUE, prob = c(0.3, 0.7)) %>% factor(),
hospt = sample(c("Metropolitan", "Rural"), 5000, replace = TRUE, prob = c(0.65, 0.35)) %>% factor(),
age = rnorm(5000, mean = 60, sd = 20),
apache2 = rnorm(5000, mean = 18.5, sd=10),
apache3 = rnorm(5000, mean = 55, sd=20),
mechvent = sample(c("Yes", "No"), 5000, replace = TRUE, prob = c(0.4, 0.6)) %>% factor(),
sex = sample(c("Female", "Male"), 5000, replace = TRUE) %>% factor(),
patient = TRUE
) %>%
mutate(patient_id = row_number())%>%
group_by(
siteidn) %>% mutate(
count_site = row_number() == 1L) %>%
ungroup()%>%
group_by(
patient_id) %>% mutate(
count_pt = row_number() == 1L) %>%
ungroup()
Then I use the following code to generate my table:
t1 <- PIR %>%
select(patientn = count_pt, siten = count_site, age, sex, apache2, apache3, apache2, mechvent, countryname) %>%
tbl_summary(
by = countryname,
missing = "no",
statistic = list(
patientn ~ "{n}",
siten ~ "{n}",
age ~ "{mean} ({sd})",
apache2 ~ "{mean} ({sd})",
mechvent ~ "{n} ({p}%)",
sex ~ "{n} ({p}%)",
apache3 ~ "{mean} ({sd})"),
label = list(
siten = "Number of ICUs",
patientn = "Number of Patients",
age = "Age",
apache2 = "APACHE II Score",
mechvent = "Mechanical Ventilation",
sex = "Sex",
apache3 = "APACHE III Score")) %>%
modify_header(stat_by = "**{level}**") %>%
add_overall(col_label = "**Overall**")
t2 <- PIR %>%
select(patientn = count_pt, siten = count_site, age, sex, apache2, apache3, apache2, mechvent, hospt) %>%
tbl_summary(
by = hospt,
missing = "no",
statistic = list(
patientn ~ "{n}",
siten ~ "{n}",
age ~ "{mean} ({sd})",
apache2 ~ "{mean} ({sd})",
mechvent ~ "{n} ({p}%)",
sex ~ "{n} ({p}%)",
apache3 ~ "{mean} ({sd})"),
label = list(
siten = "Number of ICUs",
patientn = "Number of Patients",
age = "Age",
apache2 = "APACHE II Score",
mechvent = "Mechanical Ventilation",
sex = "Sex",
apache3 = "APACHE III Score")) %>%
modify_header(stat_by = "**{level}**")
tbl <-
tbl_merge(
tbls = list(t1, t2),
tab_spanner = c("**Country**", "**Hospital Type**")
) %>%
modify_spanning_header(stat_0_1 ~ NA) %>%
modify_footnote(everything() ~ NA)
This produces the following table:
I would like to group certain rows together for ease of reading. Ideally, I would like the table to look like this:
I have attempted using the gt package, with the following code:
tbl <-
tbl_merge(
tbls = list(t1, t2),
tab_spanner = c("**Country**", "**Hospital Type**")
) %>%
modify_spanning_header(stat_0_1 ~ NA) %>%
modify_footnote(everything() ~ NA) %>%
as_gt() %>%
gt::tab_row_group(
group = "Severity of Illness Scores",
rows = 7:8) %>%
gt::tab_row_group(
group = "Patient Demographics",
rows = 3:6) %>%
gt::tab_row_group(
group = "Numbers",
rows = 1:2)
This produces the desired table:
There are a couple of issues I'm having with the way that I'm doing this.
When I try to use the row names (variables), an error message comes up (Can't subset columns that don't exist...). Is there a way to do this by using the variable names? With larger tables, I am getting into some trouble with using the row numbers method of assigning row names. This is particularly true when there is a single variable that loses its place as it's moved to the end to account for the grouped rows.
Is there a way to do this prior to piping into tbl_summary? Although I like the output of this table, I use Word as my output document for statistical reports and would like the ability to be able to format the tables in Word if need be (or by my collaborators). I usually use gtsummary::as_flextable for table output.
Thanks again,
Ben
When I try to use the row names (variables), an error message comes up (Can't subset columns that don't exist...). Is there a way to do this by using the variable names? With larger tables, I am getting into some trouble with using the row numbers method of assigning row names. This is particularly true when there is a single variable that loses its place as it's moved to the end to account for the grouped rows.
There are two ways to go about this, 1. build separate tables for each group, then stack them, and 2. add a grouping column to .$table_body then group the tibble by the new variable.
library(gtsummary)
library(dplyr)
packageVersion("gtsummary")
#> '1.3.6'
# Method 1 - Stack separate tables
t1 <- trial %>% select(age) %>% tbl_summary()
t2 <- trial %>% select(grade) %>% tbl_summary()
tbl1 <-
tbl_stack(
list(t1, t2),
group_header = c("Demographics", "Tumor Characteristics")
) %>%
modify_footnote(all_stat_cols() ~ NA)
# Method 2 - build a grouping variable
tbl2 <-
trial %>%
select(age, grade) %>%
tbl_summary() %>%
modify_table_body(
mutate,
groupname_col = case_when(variable == "age" ~ "Deomgraphics",
variable == "grade" ~ "Tumor Characteristics")
)
2.Is there a way to do this prior to piping into tbl_summary? Although I like the output of this table, I use Word as my output document for statistical reports and would like the ability to be able to format the tables in Word if need be (or by my collaborators). I usually use gtsummary::as_flextable for table output.
The examples above modify the table before exporting to gt format, so you can export these example to flextable. However, flextable does not have the same built-in header row functionality (or at least I am unaware of it, and don't use it in as_flex_table()), and the output would look like the table below. I recommend installing the dev version of gt from GitHub and export to RTF (supported by Word)--they've made many updates to RTF output in the last months, and it may work for you.
I think I might have a solution for this (thanks, obviously, to Daniel Sjoberg and team providing us with the modify_table_body function)
All you need to do is edit the underlying data frame to add a variable with your desired grouping row using modify_table_body, and then put it in the position you want it to be in, like this:
library(gtsummary)
library(dplyr)
packageVersion("gtsummary")
trial%>%
select(age, stage, grade)%>%
tbl_summary()%>%
modify_table_body(
~.x %>%
# add your variable
rbind(
tibble(
variable="Demographics",
var_type=NA,
var_label = "Demographics",
row_type="label",
label="Demographics",
stat_0= NA))%>% # expand the components of the tibble as needed if you have more columns
# can add another one
rbind(
tibble(
variable="Tumor characteristics",
var_type=NA,
var_label = "Tumor characteristics",
row_type="label",
label="Tumor characteristics",
stat_0= NA))%>%
# specify the position you want these in
arrange(factor(variable, levels=c("Demographics",
"age",
"Tumor characteristics",
"stage",
"grade"))))%>%
# and you can then indent the actual variables
modify_column_indent(columns=label, rows=variable%in%c("age",
"stage",
"grade"))%>%
# and double indent their levels
modify_column_indent(columns=label, rows= (variable%in%c("stage",
"grade")
& row_type=="level"),
double_indent=T)
When I do a tbl_stack, I'd like to show the total N of the combined tables in the tbl_stack in the header. At the moment the result appears to show the N of the first table in the stack.
trial %>%
select(age, grade, response, trt) %>%
filter(grade == "I") %>%
tbl_summary(
by = trt,
label = list(age ~ "Patient Age"),
statistic = list(all_continuous() ~ "{mean} ({sd})"),
digits = list(age ~ c(0, 1))
)
tbl_summary_ex2a <-
trial %>%
select(age, grade, response, trt) %>%
filter(grade %in% c("II", "III", "IV")) %>%
tbl_summary(
by = trt,
label = list(age ~ "Patient Age"),
statistic = list(all_continuous() ~ "{mean} ({sd})"),
digits = list(age ~ c(0, 1))
)
tbl_stack(tbls=list(tbl_summary_ex2, tbl_summary_ex2a))
Thanks for any tips,
Jeff
Yes, as the documentation of tbl_stack() indicates, the headers are retained from the first gtsummary in the stack. You can use the modify_header() function to change the headers, however. Additionally, these gtsummary tables have an internal object, .$df_by, that saves the Ns from each of your tables. You can sum the Ns across tables using these internal data frames. Example below doing this programmatically, but if it's easier you could simply hard code the Ns.
library(gtsummary)
library(tidyverse)
tbl_summary_ex2 <-
trial %>%
select(age, grade, response, trt) %>%
filter(grade == "I") %>%
tbl_summary(
by = trt,
label = list(age ~ "Patient Age"),
statistic = list(all_continuous() ~ "{mean} ({sd})"),
digits = list(age ~ c(0, 1)),
include = -grade
)
tbl_summary_ex2a <-
trial %>%
select(age, grade, response, trt) %>%
filter(grade %in% c("II", "III", "IV")) %>%
tbl_summary(
by = trt,
label = list(age ~ "Patient Age"),
statistic = list(all_continuous() ~ "{mean} ({sd})"),
digits = list(age ~ c(0, 1)),
include = -grade
)
# calculate the sum total Ns from both tables
list_N <-
tbl_summary_ex2$df_by %>%
bind_rows(tbl_summary_ex2a$df_by) %>%
select(by_col, by, n) %>%
group_by(by_col, by) %>%
summarise(n = sum(n)) %>%
mutate(
header_update =
str_glue("{by_col} ~ '**{by}**, N = {n}'") %>%
as.formula() %>%
list()
) %>%
pull(header_update)
list_N
tbl_stack(
tbls=list(tbl_summary_ex2, tbl_summary_ex2a),
group_header = c("Grade I", "Grade > I")
) %>%
modify_header(list_N)