I am trying to combine to different stratified tables made with tbl_strata()and tbl_summary from the gtsummary-package. I want to stratify by the same variable in both tables, but use different variables in the "by" argument in tbl_summary(). When I combine the tables using tbl_stack(), the column headers from the second table are lost. Is there any way to combine the tables and keep the column headers from both tables?
See reproducible example below
library(gtsummary)
library(tidyverse)
data("diamonds")
table1 <- diamonds %>%
filter(cut %in% c("Ideal", "Premium") & color %in% c("E", "I")) %>%
mutate(color = factor(color)) %>%
tbl_strata(strata = cut,
.tbl_fun =
~.x %>%
tbl_summary(by = color, include = price))
table2 <- diamonds %>%
filter(cut %in% c("Ideal", "Premium") & clarity %in% c("SI1", "SI2")) %>%
mutate(clarity = factor(clarity)) %>%
tbl_strata(strata = cut,
.tbl_fun =
~.x %>%
tbl_summary(by = clarity, include = price))
tbl_stack(list(table1, table2), group_header = c("Table 1", "Table 2"))
UPDATE:
Below is an (rough) example of the output I would like to be able to get from tbl_stack():
library(gtsummary)
library(tidyverse)
library(gt)
data("diamonds")
library(flextable)
table1_tibble <- diamonds %>%
filter(cut %in% c("Ideal", "Premium") & color %in% c("E", "I")) %>%
mutate(color = factor(color)) %>%
tbl_strata(strata = cut,
.tbl_fun =
~.x %>%
tbl_summary(by = color, include = price)) %>%
as_tibble()
table1_tibble <- rbind(colnames(table1_tibble), table1_tibble) %>%
rename(label = colnames(table1_tibble)[1],
premium_1 = colnames(table1_tibble)[2],
premium_2 = colnames(table1_tibble)[3],
ideal_1 = colnames(table1_tibble)[4],
ideal_2 = colnames(table1_tibble)[5]) %>%
add_row(label = "Table 1", .before = 1)
table2_tibble <- diamonds %>%
filter(cut %in% c("Ideal", "Premium") & clarity %in% c("SI1", "SI2")) %>%
mutate(clarity = factor(clarity)) %>%
tbl_strata(strata = cut,
.tbl_fun =
~.x %>%
tbl_summary(by = clarity, include = price)) %>%
as_tibble()
table2_tibble <- rbind(colnames(table2_tibble), table2_tibble) %>%
rename(label = colnames(table2_tibble)[1],
premium_1 = colnames(table2_tibble)[2],
premium_2 = colnames(table2_tibble)[3],
ideal_1 = colnames(table2_tibble)[4],
ideal_2 = colnames(table2_tibble)[5]) %>%
add_row(label = "Table 2", .before = 1)
bind_rows(table1_tibble, table2_tibble) %>%
flextable() %>%
merge_at(i = 1, j = 2:3, part = "header") %>%
merge_at(i = 1, j = 4:5, part = "header") %>%
set_header_labels(label = "", premium_1 = "Premium", ideal_1 = "Ideal")
Hope this clarifies matters.
Best regards,
Martin
Related
I have the following data and table:
library(gt)
library(dplyr)
a <- rnorm(21, mean = 112, sd =12)
colour <- rep(c("Blue", "Red", "Green"), 7)
data <- data.frame(colour, a) %>%
group_by(colour) %>%
summarise(mean = mean(a), sd = sd(a), n = n()) %>%
mutate(grp = html("[H<sub>2</sub>O]")) %>%
rename(cat = colour)
b <- rnorm(21, mean = 60, sd =12)
day <- rep(c("2", "4", "6"), 7)
data2 <- data.frame(day, b) %>%
group_by(day) %>%
summarise(mean = mean(a), sd = sd(a), n = n()) %>%
mutate(grp = html("[H<sub>2</sub>O] Additition <br> (Days)")) %>%
rename(cat = day)
bind_rows(data, data2) %>%
group_by(grp) %>%
gt(rowname_col = "cat")
bind_rows(data, data2) %>%
group_by(grp) %>%
gt() %>%
tab_options(row_group.as_column = TRUE)
The row group labels appear literally as '[H<sub>2<\sub>O]', rather than [H2O] etc. It is likely that I am using HTML wrong and it needs to be used with another package/function. I have also tried using cols_label but doesn't recognise these as columns in the dataframe.
Is there also a way to have the row groups column vertically centered, rather than at the top where is currently is? How do you bold these row groups?
The html function won't work outside of a gt table, so you'll have to create the row groups using tab_row_group and add the html labels there.
data <- data.frame(colour, a) %>%
group_by(colour) %>%
summarise(mean = mean(a), sd = sd(a), n = n()) %>%
mutate(grp = "color") %>%
rename(cat = colour)
data2 <- data.frame(day, b) %>%
group_by(day) %>%
summarise(mean = mean(a), sd = sd(a), n = n()) %>%
mutate(grp = "day") %>%
rename(cat = day)
bind_rows(data, data2) %>%
gt() %>%
tab_row_group(
label = html("[H<sub>2</sub>O]"),
rows = grp == "color"
) %>%
tab_row_group(
label = html("[H<sub>2</sub>O] Additition <br> (Days)"),
rows = grp == "day"
) %>%
cols_hide(grp)
I have a data table that I created in R to compare the percentage between a population and sample. Here is the script I created for the table:
team_pop <- team_new %>%
group_by(degree) %>%
count() %>%
ungroup() %>%
mutate(pop = n/sum(n)) %>%
arrange(desc(pop)) %>%
adorn_totals()
team_sample <- sample_final %>%
group_by(degree) %>%
count() %>%
ungroup() %>%
mutate(sam = n/sum(n)) %>%
arrange(desc(sam)) %>%
adorn_totals()
datatable(
team_pop %>%
select(-n) %>%
left_join(team_sample %>%
select(degree,
sam),
by = "degree"),
rownames = FALSE,
colnames = c(
"Degree"= "degree",
"Population" = "pop",
"Sample" = "sam"),
options = list(info = FALSE,
paging = FALSE,
searching = FALSE)
) %>%
formatPercentage(2, digits = 0) %>%
formatPercentage(3, digits = 0)
Here is what the data looks like:
Degree
Population
Sample
Medium
45%
43%
Low
35%
37%
High
20%
20%
But this is how I want me data to be ordered in my data table:
Degree
Population
Sample
High
20%
20%
Medium
45%
43%
Low
35%
37%
Just wondering if someone could please help me order my categories based on the Degree column from High to Low. I have tried the R function "sort" using the formula below, but it still orders my data based on the highest number first:
team_pop <- team_new %>%
group_by(degree) %>%
count() %>%
ungroup() %>%
mutate(pop = n/sum(n)) %>%
sort(degree, degreasing = FALSE) %>%
adorn_totals()
team_sample <- sample_final %>%
group_by(degree) %>%
count() %>%
ungroup() %>%
mutate(sam = n/sum(n)) %>%
sort(degree, degreasing = FALSE) %>%
adorn_totals()
datatable(
team_pop %>%
select(-n) %>%
left_join(team_sample %>%
select(degree,
sam),
by = "degree"),
rownames = FALSE,
colnames = c(
"Degree"= "degree",
"Population" = "pop",
"Sample" = "sam"),
options = list(info = FALSE,
paging = FALSE,
searching = FALSE)
) %>%
formatPercentage(2, digits = 0) %>%
formatPercentage(3, digits = 0)
Make your degree variable a factor and then arrange based on degree.
team_pop <- team_pop %>%
mutate(Degree=factor(Degree, levels = c("High", "Medium", "Low"))) %>%
arrange(Degree)
I am trying to create a 95% minimum convex polygon using sf in R. My code works fine as long as I only group my data on 1 variable, but when I group on two variables, the output loses its sf class and becomes a grouped_df instead.
Here is a toy dataset as an example
library(dplyr)
set.seed(12)
toy <- tibble::tibble(
ID = rep(c(1,2), each = 10),
year = rep(c(1,2), 10),
lat = runif(20, 1, 10),
long = runif(20, 1, 10)
) %>%
sf::st_as_sf(., coords = c("long", "lat"))
toy %>%
group_by(ID) %>%
summarize(.groups = "keep") %>%
mutate(cent = sf::st_centroid(geometry)) %>%
sf::st_cast(to = "POINT") %>%
mutate(dist = sf::st_distance(geometry, cent, by_element = TRUE)) %>%
filter(dist <= quantile(dist, .95)) %>%
summarize() %>%
sf::st_convex_hull() %>%
class()
This gives the output I want. But when I try to group by two variables, the result loses the sf class.
toy %>%
group_by(ID, year) %>%
summarize(.groups = "keep") %>%
mutate(cent = sf::st_centroid(geometry)) %>%
sf::st_cast(to = "POINT") %>%
mutate(dist = sf::st_distance(geometry, cent, by_element = TRUE)) %>%
filter(dist <= quantile(dist, .95)) %>%
summarize() %>%
sf::st_convex_hull() %>%
class
Is there something in my code keeping me from being able to group on two variables?
It's because the second summarize is regrouping on ID. You need a .groups = "keeps" there to pass the same grouping through and then convert back to an sf object. Alternatively you could create a grouping variable mutate(grp = paste0(ID, year)) and do group_by(grp) .
toy %>%
group_by(ID, year) %>%
summarize(.groups = "keep") %>%
mutate(cent = sf::st_centroid(geometry)) %>%
sf::st_cast(to = "POINT") %>%
mutate(dist = sf::st_distance(geometry, cent, by_element = TRUE)) %>%
filter(dist <= quantile(dist, .95)) %>%
summarize(.groups = "keep") %>%
sf::st_convex_hull() %>%
st_sf()
I have a DF and wanted to modify the y-axis, ordering my data by a variable from another object. I tried to use fct_reorder from forcats, but didn't work.
My code:
library(tidyverse)
library(ggridges)
library(zoo)
url <- httr::GET("https://xx9p7hp1p7.execute-api.us-east-1.amazonaws.com/prod/PortalGeral",
httr::add_headers("X-Parse-Application-Id" =
"unAFkcaNDeXajurGB7LChj8SgQYS2ptm")) %>%
httr::content() %>%
'[['("results") %>%
'[['(1) %>%
'[['("arquivo") %>%
'[['("url")
data <- openxlsx::read.xlsx(url) %>%
filter(is.na(municipio), is.na(codmun)) %>%
mutate_at(vars(contains(c("Acumulado", "Novos", "novos"))), ~ as.numeric(.))
data[,8] <- openxlsx::convertToDate(data[,8])
bigger_state <- data %>%
group_by(estado) %>%
mutate(diasposdez = 1:n(),
mm7d = rollmean(casosNovos, 7, fill = NA, allign = "right")) %>%
filter(data == data[which.max(mm7d)], !is.na(estado)) %>%
arrange(desc(casosNovos)) %>%
pull(estado)
data %>%
group_by(estado) %>%
mutate(height = rollmean(casosNovos, k = 7, align = "right", fill = NA) / sum(casosNovos),
estado = as_factor(estado)) %>%
filter(data >= "2020-05-01") %>%
ggplot(aes(x = data, y = fct_reorder(.f = estado, .x = bigger_state), height = scales::rescale(height))) +
geom_ridgeline() +
scale_x_date(date_breaks = "2 weeks",
date_labels = "%d/%b/%Y")
Error:
Error in fct_reorder(.f = estado, .x = bigger_state) :
length(f) == length(.x) is not TRUE
Just use factor(estado, bigger_state) instead of fct_reorder(.f = estado, .x = bigger_state). You are trying to match up all values with the factor levels, which gives you the error.
I want to create a summary table for some dichotomous variables using the expss package. Since the variables are dichotomous, one of the two levels would the sufficient to "show the picture".
I tried to use the function tab_net_cell, but was not able to get the right results. Here is some example code with BrCa (Breast cancer) with 1 or 0. I only want to show the number of patients with but not without breast cancer.
df <- data.frame(BrCa = c(1,1,1,0,0,0,NA,NA,0,0))
df$group <- c(1,2,1,2,1,2,1,2,1,2)
df %>%
expss::tab_cols(group) %>%
expss::tab_cells(BrCa) %>%
expss::tab_stat_cpct(total_row_position = "none",label = "%") %>%
expss::tab_stat_cases(total_row_position = "none",label = "N") %>%
expss::tab_pivot(stat_position = "inside_rows")
df %>%
expss::tab_cols(group) %>%
expss::tab_cells(BrCa) %>%
expss::tab_net_cells("BrCa" = eq(1)) %>%
expss::tab_stat_cpct(total_row_position = "none",label = "%") %>%
expss::tab_stat_cases(total_row_position = "none",label = "N") %>%
expss::tab_pivot(stat_position = "inside_rows")
The simplest way is to filter resulted table:
df <- data.frame(BrCa = c(1,1,1,0,0,0,NA,NA,0,0))
df$group <- c(1,2,1,2,1,2,1,2,1,2)
df %>%
expss::tab_cols(group) %>%
expss::tab_cells(BrCa) %>%
expss::tab_stat_cpct(total_row_position = "none",label = "%") %>%
expss::tab_stat_cases(total_row_position = "none",label = "N") %>%
expss::tab_pivot(stat_position = "inside_rows") %>%
expss::where(grepl(1, row_labels))
Another way is to use mean and sum instead of cpct and cases:
df %>%
expss::tab_cols(group) %>%
expss::tab_cells(BrCa*100) %>%
expss::tab_stat_mean(label = "%") %>%
expss::tab_stat_sum(label = "N") %>%
expss::tab_pivot(stat_position = "inside_rows")