How to get number of unique observations using tbl_summary - r

In the data below, I have 7 patients who have undergone surgeries for 9 conditions, two of the patients have undergone surgery on the same day for two different reasons. When I try to summarise this data using tbl_summary function, it shows total number of observations as 9. I want to add total number of patients, which is n = 7.I am not understanding how do I add that information in the table. Could you guys please help me with this?
library(data.table)
library(tidyverse)
library(gtsummary)
participant.index = c(1,2,3,3,4,5,5,6,7)
repeat.instance = c(1,1,1,1,1,1,1,1,1)
indication.surgery = c("ibs","infection", "infection", "renalstones", "ibs", "infection",
"infection", "ibs","tumour")
date.surgery =c("2019-01-10", "2019-01-01", "2018-01-01", "2018-01-01", "2017-09-10",
"2000-09-09","2015-01-10","2015-01-10","2006-09-09")
mydata = data.table(participant.index,repeat.instance,indication.surgery,date.surgery)
mydata%>%
select(indication.surgery)%>%
tbl_summary
label = list(indication.surgery ~
"Indication for Surgery"),
statistic = list(all_categorical() ~ "{n} ({p}%)"),
missing_text = "(Missing Observations)",
percent = c("cell")
) %>%
modify_table_styling(spanning_header = "Indications for Surgery") %>%
modify_caption("**Table 1. Indication for Surgery**") %>%
modify_footnote(all_stat_cols() ~ "Number of observations, Frequency (%)")

Sure! If you add an indicator for the first participant index and include that new variable in the summary table, you can include it in the summary. Example Below!
library(data.table)
library(tidyverse)
library(gtsummary)
participant.index <- c(1, 2, 3, 3, 4, 5, 5, 6, 7)
repeat.instance <- c(1, 1, 1, 1, 1, 1, 1, 1, 1)
indication.surgery <- c(
"ibs", "infection", "infection", "renalstones", "ibs", "infection",
"infection", "ibs", "tumour"
)
date.surgery <- c(
"2019-01-10", "2019-01-01", "2018-01-01", "2018-01-01", "2017-09-10",
"2000-09-09", "2015-01-10", "2015-01-10", "2006-09-09"
)
mydata <-
data.table(participant.index, repeat.instance,
indication.surgery, date.surgery) %>%
group_by(participant.index) %>%
mutate(first.participant.index = row_number() == 1L,
.before = 1L) %>%
ungroup()
mydata %>%
select(first.participant.index, indication.surgery) %>%
tbl_summary(
label =
list(first.participant.index ~ "No. Unique Participants",
indication.surgery ~ "Indication for Surgery"),
statistic = list(all_categorical() ~ "{n} ({p}%)",
first.participant.index ~ "{n}"),
missing_text = "(Missing Observations)",
percent = c("cell")
) %>%
modify_table_styling(spanning_header = "Indications for Surgery") %>%
modify_caption("**Table 1. Indication for Surgery**") %>%
modify_footnote(all_stat_cols() ~ "Number of observations, Frequency (%)")

Related

Getting multiple footnotes on the same line with gtsummary

I've got the following reprex
library(tidyverse)
library(gtsummary)
set.seed(50)
dat <- data.frame(exposed = sample(c("Unexposed","Exposed"), 100, TRUE),
year = rep(c(1985,1986), each = 50),
Age = rnorm(100, 85, 1),
Transit = sample(c("Bus", "Train", "Walk", "Car"), 100, TRUE))
dat %>%
tbl_strata(strata = year,
~ .x %>%
tbl_summary(
by = exposed,
include = c(Age, Transit),
statistic = list(Age ~ "{mean} ± {sd}"),
digits = Age ~ 1,
label = Age ~ "Age, mean ± SD"
)) %>%
modify_header(all_stat_cols() ~ "**{level}**") %>%
modify_footnote(update = everything() ~ NA)
which produces this table:
but when I try to add a new, separate footnote, the previous one gets overwritten
dat %>%
tbl_strata(strata = year,
~ .x %>%
tbl_summary(
by = exposed,
include = c(Age, Transit),
statistic = list(Age ~ "{mean} ± {sd}"),
digits = Age ~ 1,
label = Age ~ "Age, mean ± SD"
)) %>%
modify_header(all_stat_cols() ~ "**{level}**") %>%
modify_table_styling(columns = label,
rows = variable == "Age",
footnote = "Footnote 1") %>%
modify_table_styling(columns = label,
rows = label == "Transit",
footnote = "Footnote 2") %>%
modify_table_styling(columns = label,
rows = label == "Transit",
footnote = "Footnote 3") %>%
modify_footnote(update = everything() ~ NA)
and my table looks like this.
I've tried using modify_footnote as described here but I don't understand the documentation for how to get the footnotes out of the columns and into the rows.
The final output should look something like this.

How to dynamically add variables to gtsummary::tbl_summary() to calculate statistics for continuous variables

I want to create a function that would automatically generate the tables with summary statistics when i parse different column names. I am trying to create a function for gtsummary I have tried enquo and deparse but both don't seem to help. Can somebody please guide me in what I am doing wrong here.
get_stats <- function (var2) {
var2 <- dplyr::enquo(var2)
grp_val <- deparse(substitute(var2))
df %>%
gtsummary::tbl_summary(.,
by = trt,
missing = "no",
type =
list(!!var2 ~ "continuous2"),
statistic = list(
"{{var2}}" = c(
"{N_nonmiss}",
"{mean} ({sd})",
"{median} ({p25}, {p75})",
"{min}, {max}"
)
)
,
digits = !!var2 ~ c(0, 1, 1, 1)
)
}
The error I keep getting is Error: Error in type= argument input. Select from ‘age’, ‘trt’.
When I use this with the trial data without parsing anything it works fine.
trial %>%
dplyr::select(age, trt) %>%
dplyr::mutate_if(is.factor, as.character()) %>%
gtsummary::tbl_summary(
by = trt,
missing = "no",
type =
list(age ~ "continuous2"),
statistic = list(
"age" = c(
"{N_nonmiss}",
"{mean} ({sd})",
"{median} ({p25}, {p75})",
"{min}, {max}"
))
,
digits = age ~ c(0, 1, 1, 1)
)
Expected output from the code
Using rlang::as_name and named lists you could do:
library(gtsummary)
get_stats <- function(df, var2) {
var2_str <- rlang::as_name(rlang::enquo(var2))
df %>%
gtsummary::tbl_summary(.,
by = trt,
missing = "no",
type = setNames(list(c("continuous2")), var2_str),
statistic = setNames(list(c(
"{N_nonmiss}",
"{mean} ({sd})",
"{median} ({p25}, {p75})",
"{min}, {max}"
)), var2_str
),
digits = setNames(list(c(0, 1, 1, 1)), var2_str),
)
}
trial %>%
select(age, trt) %>%
dplyr::mutate_if(is.factor, as.character()) %>%
get_stats(age)

Using summary_row() values to calculate group percentage with {gt} package?

I am trying to calculate the percentage for an entire group while using the summary_rows() function from the {gt} package. The problem I am encountering is how to create a function that uses summary_rows() values to calculate a percentage rowwise.
Sample df:
df <- tibble(
division = c("Science", "Science", "Science"),
department = c("Biology", "Biology", "Biology"),
course_num = c('101', '201', "301"),
widthraws = c(1, 2, 3),
unsucessful = c(0, 0 , 0),
successfull = c(1, 3, 4),
total_enrolled = c(2, 5, 7),
percent_successful = c(.50, .60, .57)
)
Sample of gt table:
df %>%
group_by(division, department) %>%
gt() %>%
summary_rows(
groups = TRUE,
columns = 4:7,
missing_text = " ",
fns = list(
total = ~sum(.)),
)
What I would want is the total row of the percent_successful column to be .57. Open to other ideas that would help me achieve this too.
Compute the percentage for total outside and add a layer
library(gt)
library(dplyr)
total_success_perc <- with(df, round(sum(successfull)/sum(total_enrolled), 2))
df %>%
group_by(division, department) %>%
gt() %>%
summary_rows(
groups = TRUE,
columns = 4:7,
missing_text = " ",
fns = list(
total = ~sum(.)),
) %>%
summary_rows(groups = TRUE, columns = 8, missing_text = " ",
fns = list(total = ~ c(total_success_perc)))
-output
library(tidyverse)
library(gt)
df <- tibble(
division = c("Science", "Science", "Science"),
department = c("Biology", "Biology", "Biology"),
course_num = c('101', '201', "301"),
widthraws = c(1, 2, 3),
unsucessful = c(0, 0 , 0),
successfull = c(1, 3, 4),
total_enrolled = c(2, 5, 7),
percent_successful = c(.50, .60, .57)
)
df %>%
group_by(division, department) %>%
gt() %>%
summary_rows(
groups = TRUE,
columns = c(widthraws:percent_successful),
missing_text = " ",
fns = list(
total = ~sum(.),
max = ~max(.),
min = ~min(.),
medain = ~median(.))
)

How do you include confidence intervals for proportions in gtsummary with by?

enter image description hereI have tried adding the confidence intervals in gtsummry but I get an error #>Error: Dimension of 'a1' and the added statistic do not match. Expecting statistic to be length 2. I successfully managed to add the intervals when I don't stratified by any variable. The code is as below-sorry if its too verbose.
#---- Libraries
library(gtsummary)
library(tidyverse)
#---- Data
set.seed(2021)
df <- tibble(
a1 = factor(ifelse(sign(rnorm(30))==-1, 0, 1), labels = c("No", "Yes")),
a2 = factor(ifelse(sign(rnorm(30))==-1, 0, 1), labels = c("No", "Yes")),
gender = gl(2, 15, labels = c("Males", "Females")),
b2 = gl(3, 10, labels = c("Primary", "Secondary", "Tertiary")),
c1 = gl(3, 10, labels = c("15-19", "20-24", "25-30")),
outcome = factor(ifelse(sign(rnorm(30))==-1, 0, 1), labels = c("No", "Yes")),
weight = runif(30, 1, 12)
)
#---- Function to calculate CIs
categorical_ci <- function(variable, tbl, ...) {
filter(tbl$meta_data, variable == .env$variable) %>%
pluck("df_stats", 1) %>%
mutate(
# calculate and format 95% CI
prop_ci = map2(n, N, ~prop.test(.x, .y)$conf.int %>%
style_percent(symbol = TRUE)),
ci = map_chr(prop_ci, ~glue::glue("{.x[1]}, {.x[2]}"))
) %>%
pull(ci)
}
#---- tblsummary with stratified by gender
t1 <- df %>%
select(gender, a1, a2) %>%
tbl_summary(by = gender, statistic = everything() ~ "{n} {p}%",
type = everything() ~ "categorical")
t1 %>%
add_stat(
fns = everything() ~ "categorical_ci",
location = "level",
header = "**95% CI**"
) %>%
modify_footnote(everything() ~ NA)
There is a similar question here: https://community.rstudio.com/t/tbl-summary-function/100113/6
library(gtsummary)
ll <- function(x) t.test(x)$conf.int[[1]] # Lower 95% CI of mean
ul <- function(x) t.test(x)$conf.int[[2]] # Upper 95% CI of mean
# create table 1
table <-
trial %>%
select(trt, age) %>%
tbl_summary(
by = trt,
statistic = all_continuous() ~ "{mean} ({ll} — {ul})",
missing = "no",
digits = all_continuous() ~ 2
) %>%
modify_footnote(all_stat_cols() ~ "Mean (95% CI)")
#---- Libraries
library(gtsummary)
library(flextable)
library(tidyverse)
#---- Data
set.seed(2021)
df <- tibble(
a1 = factor(ifelse(sign(rnorm(30))==-1, 0, 1), labels = c("No", "Yes")),
a2 = factor(ifelse(sign(rnorm(30))==-1, 0, 1), labels = c("No", "Yes")),
gender = gl(2, 15, labels = c("Males", "Females")),
b2 = gl(3, 10, labels = c("Primary", "Secondary", "Tertiary")),
c1 = gl(3, 10, labels = c("15-19", "20-24", "25-30")),
outcome = factor(ifelse(sign(rnorm(30))==-1, 0, 1), labels = c("No", "Yes")),
weight = runif(30, 1, 12)
)
#---- Solution ----
tbl <-
df %>%
select(a1, a2, gender) %>%
tbl_summary(missing = "no", by = gender, type = everything() ~ "categorical",
percent = "row") %>%
add_n() %>%
modify_footnote(everything() ~ NA)
myci <- tbl$meta_data %>%
filter(summary_type %in% c("categorical", "dichotomous")) %>%
select(summary_type, var_label, df_stats) %>%
unnest(df_stats) %>%
mutate(
conf.low = (p - qnorm(0.975) * sqrt(p * (1 - p) / N)) %>%
style_percent(symbol = TRUE),
conf.high =( p + qnorm(0.975) * sqrt(p * (1 - p) / N)) %>%
style_percent(symbol = TRUE),
ci = str_glue("{conf.low}, {conf.high}"),
label = coalesce(variable_levels, var_label),
row_type = ifelse(summary_type == "dichotomous", "label", "level")
) %>%
select(by, variable, row_type, label, ci) %>%
pivot_wider(names_from = "by", values_from = "ci") %>%
rename(Male_ci = Males, Female_ci = Females)
tbl %>%
modify_table_body(
left_join,
myci,
by = c("variable", "row_type", "label")
) %>%
modify_table_header(
Male_ci,
hide = FALSE,
label = "**95% CI Males**"
) %>%
modify_table_header(
Female_ci,
hide = FALSE,
label = "**95% CI Females**"
)

Find predictions for linear model that is grouped_by

I would like to get predicted values based on a model I fit to a training set of data. I have done this before, but now I have a grouping factor and it is throwing me off. I want to predict biomass based on population for each environment.
library(tidyverse)
fit_mods<-df %>%
group_by(environ) %>%
do(model = lm(biomass ~ poly(population, 2), data = .))
Ultimately, I will want to find at which population biomass is the greatest. Usually I would do this by creating a grid and running the model on my new values and finding the max value, but I'm blanking on how to do this with the grouping. Usual way:
min_pop <- min(df$population)
max_pop <- max(df$population)
grid_pop <- expand.grid(new = (seq(from = min_pop,
to = max_pop,
length.out = 1000)),
environ = c("A", "B"))
#This is what I did with ungrouped data, but doesn't work now.
pred_pop <- predict(object = fit_mods,
newdata = grid_pop,
interval = "predict")
Here is some dummy data:
df <- as.data.frame(list(environ = c("a", "a", "a", "a", "a", "b", "b", "b", "b", "b"),
population = c(2, 3, 4, 5, 6, 3, 4, 5, 6, 7),
biomass = c(1, 2.2, 3.5, 4.1, 3.8, 2.5, 3.6, 4.3, 5.2, 5.1)), class = "data.frame")
In a tidyverse many models approach you could do it the following way:
library(tidyverse)
fit_mods <- df %>%
nest(-environ) %>%
mutate(models = map(data, ~ lm(biomass ~ poly(population, 2), data = .x)),
min_pop = map_dbl(data, ~ pull(.x, population) %>% min),
max_pop = map_dbl(data, ~ pull(.x, population) %>% max),
new = map2(min_pop, max_pop, ~ tibble(population = seq(from = .x,
to = .y,
length.out = 1000))),
pred = map2(models,
new,
~ predict(object = .x,
newdata = select(.y,population),
interval = "predict")))

Resources