Tbl_Summary Chi Squared Testing - r

I have created a summary table for some data using tbl_summary(). The table sums numerical values in columns grouped by a factor, producing a 2x2 table.
I'd like to use tlb_summary()'s built-in statistics to calculate the p value using a Chi Squared test, however I can't tell if this is possible. Using the add_p() line gives me a p value for each row, which is incorrect:
library(gtsummary)
library(tidyverse)
test <- data.frame("With_assistant" = c(TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE),
"correct_answers" = c(2,4,5,6,1,2,7,2,1,2,3),
"incorrect_answers" = c(1,2,1,5,3,1,2,5,3,2,4))
test %>%
tbl_summary(
by = With_assistant,
type = list(c(correct_answers, incorrect_answers) ~ "continuous"),
statistic = list(c(correct_answers, incorrect_answers) ~ "{sum}")
) %>%
add_p(test = everything () ~ "chisq.test")

Would you be okay with adding the result of a chisq.test() to the tbl object as a note?
library(gtsummary)
library(tidyverse)
test <- data.frame("With_assistant" = c(TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE),
"correct_answers" = c(2,4,5,6,1,2,7,2,1,2,3),
"incorrect_answers" = c(1,2,1,5,3,1,2,5,3,2,4))
t <- test %>%
group_by(With_assistant) %>%
summarize(sum_cor = sum(correct_answers),
sum_inc =sum(incorrect_answers))
# A tibble: 2 × 3
With_assistant sum_cor sum_inc
<lgl> <dbl> <dbl>
1 FALSE 15 15
2 TRUE 20 14
chi<- chisq.test(t)
test %>%
tbl_summary(
by = With_assistant,
type = list(c(correct_answers, incorrect_answers) ~ "continuous"),
statistic = list(c(correct_answers, incorrect_answers) ~ "{sum}")
) %>%
modify_footnote(all_stat_cols() ~ paste0("Chi Square ", round(chi$statistic,2), " with p = ", round(chi$p.value, 2)))
1.37 is the real test statistic of your four numbers from the contingency table.

Related

Sum of a column in tbl_summary?

I'm trying to create a table using tbl_summary() that contains sums of columns (a sum of the correct test scores and incorrect test scores), however it seems to keep treating my continuous variables as categorical?
I have tried specifying the type as continuous with no luck.
What I'm aiming for:
library(gtsummary)
library(tidyverse)
test <- data.frame("With_assistant" = c(TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE),
"correct_answers" = c(2,4,5,6,1,2,7,2,1,2,3),
"incorrect_answers" = c(1,2,1,5,3,1,2,5,3,2,4))
output <- test %>%
group_by(With_assistant) %>%
summarize(
total_correct=sum(correct_answers, na.rm=TRUE),
total_incorrect=(sum(incorrect_answers, na.rm=TRUE))
)
output
Table_1
I've tried the below:
library(gtsummary)
library(tidyverse)
test <- data.frame("With_assistant" = c(TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE),
"correct_answers" = c(2,4,5,6,1,2,7,2,1,2,3),
"incorrect_answers" = c(1,2,1,5,3,1,2,5,3,2,4))
output <- test %>%
tbl_summary(
by = With_assistant,
statistic = all_continuous() ~ {n}
)
Produces a count of each result as below:
Table_2
library(gtsummary)
library(tidyverse)
test <- data.frame("With_assistant" = c(TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE),
"correct_answers" = c(2,4,5,6,1,2,7,2,1,2,3),
"incorrect_answers" = c(1,2,1,5,3,1,2,5,3,2,4))
output <- test %>%
tbl_summary(
by = With_assistant,
type = c(correct_answers, incorrect_answers) ~ "continuous",
statistic = all_continuous() ~ {n},
percent = "column",
missing = "no"
) %>%
print(output)
Produces an error "Error: Error processing statistic argument for element 'Anatomy_yes'. Expecting a character as the passed value."
You almost made it. Check the documentation again.The type option since the default for numeric values less than 10 is categorical.
library(gtsummary)
library(tidyverse)
test <- data.frame("With_assistant" = c(TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE),
"correct_answers" = c(2,4,5,6,1,2,7,2,1,2,3),
"incorrect_answers" = c(1,2,1,5,3,1,2,5,3,2,4))
test %>%
tbl_summary(
by = With_assistant,
type = list(c(correct_answers, incorrect_answers) ~ "continuous")
)
Is this what you like to achieve?
With statistic option, specified for the sum:
test %>%
tbl_summary(
by = With_assistant,
type = list(c(correct_answers, incorrect_answers) ~ "continuous"),
statistic = list(c(correct_answers, incorrect_answers) ~ "{sum}")
)

How to convert a list of tbl_regression objects to a single tbl_regression object?

I am trying to add significance stars to a gtsummary table that combines multiple models, but I'm getting an error that I don't know how to resolve.
library(gtsummary)
library(tidyverse)
# Create a list of tbl_regression objects
models <- c("disp", "disp + hp") %>%
map(
~ paste("vs", .x, sep = " ~ ") %>%
as.formula() %>%
glm(data = mtcars,
family = binomial(link = "logit")) %>%
tbl_regression(exponentiate = TRUE))
# Try to add significance stars
models %>%
add_significance_stars(
pattern = "{estimate}{stars}",
thresholds = c(0.001, 0.01, 0.05),
hide_ci = TRUE,
hide_p = TRUE,
hide_se = FALSE
)
#> Error: Error in argument 'x='. Expecting object of class 'tbl_regression', or 'tbl_uvregression'
It seems that the list models is not an object of class tbl_regression, and therefore cannot be passed to add_significance_stars(). How can I fix this problem?
Your models object is a list of gtsummary tables. Hence, as you did when you created the tables you have to use map to loop over the list to add significance stars to each table:
library(gtsummary)
library(tidyverse)
models %>%
map(
~ add_significance_stars(.x,
pattern = "{estimate}{stars}",
thresholds = c(0.001, 0.01, 0.05),
hide_ci = TRUE,
hide_p = TRUE,
hide_se = FALSE
)
)
As you indicate, models is a list of objects of class tbl_regression. You can just use lapply, like this:
models = lapply(models,
add_significance_stars,
pattern = "{estimate}{stars}",
thresholds = c(0.001, 0.01, 0.05),
hide_ci = TRUE,
hide_p = TRUE,
hide_se = FALSE
)
Or you could include the call to add_significance_stars() in the original map()

Add Chi test to plot_stackfrq in R

I need to add to the plot_stackfrq in R the Chi test, I didn't see any parameter of the function that accepts chi.test or similar.
This is my code:
library(tidyr)
library(data.table)
homes2 <- homes %>% mutate(rn = rowid(Educ_level)) %>%
pivot_wider(names_from = Educ_level, values_from = Insurance)
plot_stackfrq(homes2[,c("High Scholl","College","Elementary")],
geom.colors = c("YlOrBr"))
I could resolve it with plot_xtab, this did show me Chi test as I needed.
plot_xtab(homes2$Educ_label, homes2$Insurance, bar.pos = "stack", show.total = FALSE, margin = "row", coord.flip = TRUE, show.n = FALSE, show.summary = TRUE,
geom.colors = c("YlOrBr"))

Creating a user-defined total with grand_summary_rows()

I have a table that looks like this:
category family amount
<chr> <chr> <chr>
1 SALES ONLINE SALES 47
2 SALES IN STORE 72
3 COGS LABOR 28
4 COGS TAXES 35
5 COGS WORKERS COMP 24
6 COGS BENEFITS 33
7 EXPENSE AUTOMOBILE 44
8 EXPENSE RENT 12
9 EXPENSE TELEPHONE 26
I am trying to create a gt table from this so I have created this code:
library(tidyverse)
library(gt)
category <- c(rep("SALES",2),
rep("COGS", 4),
rep("EXPENSE",3)
)
family <- c("ONLINE SALES","IN STORE","LABOR","TAXES","WORKERS COMP","BENEFITS",
"AUTOMOBILE", "RENT","TELEPHONE")
amount <- c(47,72,28,35,24,33,44,12,26)
output <- as_tibble(cbind(category,family,amount)) %>%
mutate(amount= as.numeric(amount)) %>%
gt(rowname_col = 'family',
groupname_col = 'category') %>%
row_group_order(c("SALES","COGS", "EXPENSE")) %>%
summary_rows(groups = TRUE,
columns = 'amount',
fns = list(
Total = ~sum(.,na.rm = TRUE)
))
output
How do you get the overall total of SALES-COGS-EXPENSE using the grand_summary_rows() function while all of the amounts are still positive?
if I have correctly understood your request you can use the code below:
output2 <- as_tibble(cbind(category,family,amount)) %>%
mutate(amount= as.numeric(amount)) %>%
gt(rowname_col = 'family', groupname_col = 'category') %>%
summary_rows(groups = TRUE,
columns = 'amount',
fns = list(
Total = ~sum(.,na.rm = TRUE)
)) %>%
grand_summary_rows(
columns = c("family","category","amount"),
fns = list(
"Grand Total" = ~sum(.,na.rm = TRUE)),
formatter = fmt_number,
use_seps = FALSE
)
output2
################### EDIT ###################
The only way I found from the documentation is to create a custom aggregation function. Here's the full working example with the output printscreen.
customFunc <- function(data) {
salesSum <- sum(subset(data$`_data`[,c("amount")], category == 'SALES'))
cogsSum <- sum(subset(data$`_data`[,c("amount")], category == 'COGS'))
expenseSum <- sum(subset(data$`_data`[,c("amount")], category == 'EXPENSE'))
return (salesSum - cogsSum - expenseSum)
}
data <- as_tibble(cbind(category,family,amount)) %>%
mutate(amount= as.numeric(amount)) %>%
gt(rowname_col = 'family', groupname_col = 'category')
output3 <- data %>%
summary_rows(groups = TRUE,
columns = 'amount',
fns = list(
Total = ~sum(.,na.rm = TRUE)
)) %>%
grand_summary_rows(
columns = c("family","category","amount"),
fns = list(
"Grand Total" = ~customFunc(data)),
formatter = fmt_number,
use_seps = FALSE
)
output3

labels order by month name chronically in r plotly [duplicate]

I'm using the plotly package in R to build an R Shiny dashboard. I want to order my pie chart in a custom order (non-alphabetic, non-descending/ascending order). For some reason I can't find how to achieve this.
Help would be highly appreciated!
# Get Manufacturer
mtcars$manuf <- sapply(strsplit(rownames(mtcars), " "), "[[", 1)
df <- mtcars %>%
group_by(manuf) %>%
summarize(count = n())
# Create custom order
customOrder <- c(df$manuf[12:22],df$manuf[1:11])
# Order data frame
df <- df %>% slice(match(customOrder, manuf))
# Create factor
df$manuf <- factor(df$manuf, levels = df[["manuf"]])
# Plot
df %>% plot_ly(labels = ~manuf, values = ~count) %>%
add_pie(hole = 0.6) %>%
layout(title = "Donut charts using Plotly", showlegend = F,
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
Ok, the answer is apparently twofold. Firstly, there is an argument in plot_ly, asking to sort the data on values (default is TRUE) or work with the custom order. Change this to FALSE.
Then, secondly, the order (clockwise) is different from the order in the data frame. The pie starts in the top right corner, and continues counterclockwise.
Hence, the following solves the problem:
# Get Manufacturer
mtcars$manuf <- sapply(strsplit(rownames(mtcars), " "), "[[", 1)
df <- mtcars %>%
group_by(manuf) %>%
summarize(count = n())
# Create custom order
customOrder <- c(df$manuf[12:22],df$manuf[1:11])
# Adjust customOrder to deal with pie
customOrder <- c(customOrder[1],rev(customOrder[2:length(customOrder)]))
# Order data frame
df <- df %>% slice(match(customOrder, manuf))
# Create factor
df$manuf <- factor(df$manuf, levels = df[["manuf"]])
# Plot
df %>% plot_ly(labels = ~manuf, values = ~count, sort = FALSE) %>%
add_pie(hole = 0.6) %>%
layout(title = "Donut charts using Plotly", showlegend = F,
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))

Resources