How to create subheadings using summary_table R qwraps2? - r

I want to produce a table of summary stats with summary_table. This needs to have subheadings.
I have been following this example:
https://cran.r-project.org/web/packages/qwraps2/vignettes/summary-statistics.html
stats_summary1 <-
list("Sex (female)" =
list("number (%)" = ~ qwraps2::n_perc(.data$sex=="F", digits = 1)),
"Age" =
list("min" = ~ min(.data$age_d, digits = 1),
"max" = ~ max(.data$age_d, digits = 1),
"median (IQR)" = ~ qwraps2::median_iqr(.data$age_d, digits = 1)),
"Disease" =
list("A" = ~ qwraps2::n_perc(.data$d==1, digits = 1),
"B" = ~ qwraps2::n_perc(.data$d==2, digits = 1),
"C" = ~ qwraps2::n_perc(.data$d==3, digits = 1),
"D" = ~ qwraps2::n_perc(.data$d==4, digits = 1),
"E" = ~ qwraps2::n_perc(.data$d==5, digits = 1)),
"Disease duration" =
list("min" = ~ min(.data$dis_dur, digits = 1),
"max" = ~ max(.data$dis_dur, digits = 1),
"median (IQR)" = ~ qwraps2::median_iqr(.data$dis_dur, digits = 1))
)
whole <- summary_table(df, stats_summary1)
The table output does not include the subheadings eg "Disease". It also produces the percentage with a forward slash as eg. 65\%

First, I will create an example dataset to match the provided summary
set.seed(42)
df <- data.frame(sex = sample(c("M", "F"), size = 100, replace = TRUE),
age_d = runif(100, 18, 80),
d = sample(1:5, size = 100, replace = TRUE),
dis_dur = runif(100, 20, 43),
stringsAsFactors = FALSE)
str(df)
#> 'data.frame': 100 obs. of 4 variables:
#> $ sex : chr "M" "M" "M" "M" ...
#> $ age_d : num 56.8 31.5 31.4 42.1 76.4 ...
#> $ d : int 2 1 4 3 5 2 4 2 3 2 ...
#> $ dis_dur: num 33.3 21.7 23.8 37 30.9 ...
load and attach the qwraps2 namespace
library(qwraps2)
by default qwraps2 formats output in LaTeX. To have the default switched to
markdown set the following option
options(qwraps2_markup = "markdown")
Update: as of version 0.5.0 of qwraps2, the use of the .data pronoun is
no longer needed or recommended.
stats_summary1 <-
list("Sex (female)" =
list("number (%)" = ~ qwraps2::n_perc(sex=="F", digits = 1)),
"Age" =
list("min" = ~ min(age_d, digits = 1),
"max" = ~ max(age_d, digits = 1),
"median (IQR)" = ~ qwraps2::median_iqr(age_d, digits = 1)),
"Disease" =
list("A" = ~ qwraps2::n_perc(d==1, digits = 1),
"B" = ~ qwraps2::n_perc(d==2, digits = 1),
"C" = ~ qwraps2::n_perc(d==3, digits = 1),
"D" = ~ qwraps2::n_perc(d==4, digits = 1),
"E" = ~ qwraps2::n_perc(d==5, digits = 1)),
"Disease duration" =
list("min" = ~ min(dis_dur, digits = 1),
"max" = ~ max(dis_dur, digits = 1),
"median (IQR)" = ~ qwraps2::median_iqr(dis_dur, digits = 1))
)
whole <- summary_table(df, stats_summary1)
whole
#>
#>
#> | |df (N = 100) |
#> |:-------------------------|:-----------------|
#> |**Sex (female)** | |
#> | number (%) |56 (56.0%) |
#> |**Age** | |
#> | min |1 |
#> | max |77.6816968536004 |
#> | median (IQR) |52.8 (33.7, 64.8) |
#> |**Disease** | |
#> | A |26 (26.0%) |
#> | B |20 (20.0%) |
#> | C |15 (15.0%) |
#> | D |21 (21.0%) |
#> | E |18 (18.0%) |
#> |**Disease duration** | |
#> | min |1 |
#> | max |42.5464100171812 |
#> | median (IQR) |31.0 (25.1, 35.8) |
This sould resolve the issue with the forward slash on the percentage sign
(needed escape for LaTeX). Make sure you have the results = "asis" chunk
option set so the table will render nicely in your final document.
As for the omitted subheadings, the qwraps2_summary_table object is a
character matrix with the class attribute set accordingly and has the
additional attribute rgroups which is used by the printing methods to
format the output correctly.
str(whole)
#> 'qwraps2_summary_table' chr [1:12, 1] "56 (56.0%)" "1" "77.6816968536004" ...
#> - attr(*, "dimnames")=List of 2
#> ..$ : chr [1:12] "number (%)" "min" "max" "median (IQR)" ...
#> ..$ : chr "df (N = 100)"
#> - attr(*, "rgroups")= Named int [1:4] 1 3 5 3
#> ..- attr(*, "names")= chr [1:4] "Sex (female)" "Age" "Disease" "Disease duration"
#> - attr(*, "n")= int 100
Created on 2020-09-14 by the reprex package (v0.3.0)
devtools::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.0.2 (2020-06-22)
#> os macOS Catalina 10.15.6
#> system x86_64, darwin17.0
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz America/Denver
#> date 2020-09-14
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.0.0)
#> backports 1.1.9 2020-08-24 [1] CRAN (R 4.0.2)
#> callr 3.4.4 2020-09-07 [1] CRAN (R 4.0.2)
#> cli 2.0.2 2020-02-28 [1] CRAN (R 4.0.0)
#> crayon 1.3.4 2017-09-16 [1] CRAN (R 4.0.0)
#> desc 1.2.0 2018-05-01 [1] CRAN (R 4.0.0)
#> devtools 2.3.1 2020-07-21 [1] CRAN (R 4.0.2)
#> digest 0.6.25 2020-02-23 [1] CRAN (R 4.0.0)
#> ellipsis 0.3.1 2020-05-15 [1] CRAN (R 4.0.0)
#> evaluate 0.14 2019-05-28 [1] CRAN (R 4.0.0)
#> fansi 0.4.1 2020-01-08 [1] CRAN (R 4.0.0)
#> fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.2)
#> glue 1.4.2 2020-08-27 [1] CRAN (R 4.0.2)
#> highr 0.8 2019-03-20 [1] CRAN (R 4.0.0)
#> htmltools 0.5.0 2020-06-16 [1] CRAN (R 4.0.0)
#> knitr 1.29 2020-06-23 [1] CRAN (R 4.0.0)
#> magrittr 1.5 2014-11-22 [1] CRAN (R 4.0.0)
#> memoise 1.1.0 2017-04-21 [1] CRAN (R 4.0.0)
#> pkgbuild 1.1.0 2020-07-13 [1] CRAN (R 4.0.2)
#> pkgload 1.1.0 2020-05-29 [1] CRAN (R 4.0.0)
#> prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.0.0)
#> processx 3.4.4 2020-09-03 [1] CRAN (R 4.0.2)
#> ps 1.3.4 2020-08-11 [1] CRAN (R 4.0.2)
#> qwraps2 * 0.5.0 2020-08-31 [1] local
#> R6 2.4.1 2019-11-12 [1] CRAN (R 4.0.0)
#> Rcpp 1.0.5 2020-07-06 [1] CRAN (R 4.0.0)
#> remotes 2.2.0 2020-07-21 [1] CRAN (R 4.0.2)
#> rlang 0.4.7 2020-07-09 [1] CRAN (R 4.0.2)
#> rmarkdown 2.3 2020-06-18 [1] CRAN (R 4.0.0)
#> rprojroot 1.3-2 2018-01-03 [1] CRAN (R 4.0.0)
#> sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 4.0.0)
#> stringi 1.5.3 2020-09-09 [1] CRAN (R 4.0.2)
#> stringr 1.4.0 2019-02-10 [1] CRAN (R 4.0.0)
#> testthat 2.3.2 2020-03-02 [1] CRAN (R 4.0.0)
#> usethis 1.6.1 2020-04-29 [1] CRAN (R 4.0.0)
#> withr 2.2.0 2020-04-20 [1] CRAN (R 4.0.0)
#> xfun 0.17 2020-09-09 [1] CRAN (R 4.0.2)
#> yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.0)
#>
#> [1] /Library/Frameworks/R.framework/Versions/4.0/Resources/library

Your example is not really reproducible since you data is is not provided.
What worked for me was the following:
Instead of using the summary statistics arguments from qwraps2, I use the ones from the carpenter package. This is because in PDF documents rendered through the bookdown package, qwraps2::mean_sd() renders the "±" sign as ± in the table.
Instead, carpenter::stat_meanSD() avoids the "±" sign and uses brackets for the SD instead. You could do another workaround with paste() though if you prefer the "±" sign.
The fact that qwraps2 is ignoring the top-level headings like Sex (female), Age etc. in your example is very annoying. My workaround here is to manually re-introduce these headings through the kableExtra package. In you example this would be something like this:
.
options(qwraps2_markup = "markdown")
summary_table(df, stats_summary1) %>%
kableExtra::pack_rows("Sex (female)", 1, 1) %>%
kableExtra::pack_rows("Age", 2, 4) %>%
kableExtra::pack_rows("Disease", 5, 9) %>%
kableExtra::pack_rows("Disease duration", 10, 12)

Related

DiagrammeR - arrow problems

I am trying to create a flowchart using this excellent tutorial: https://dannyjnwong.github.io/STROBE-CONSORT-Diagrams-in-R/
library(DiagrammeR)
library(DiagrammeRsvg) #Needed if you want to export the image
library(rsvg) #Needed if you want to export the image
#Set the values which will go into each label.
a1 <- 'Total available patients\n(n = x)'
b1 <- ''
c1 <- ''
d1 <- 'Included for analysis\n(n = x - y - z)'
e1 <- 'Data linked with\nexternal dataset'
a2 <- ''
b2 <- 'Excluded because of\nexclusion criteria (n = y)'
c2 <- 'Excluded because of\nmissing values (n = z)'
d2 <- ''
e2 <- ''
#Create a node dataframe
ndf <- create_node_df(
n = 10,
label = c(a1, b1, c1, d1, e1, #Column 1
a2, b2, c2, d2, e2), #Column 2
style = c('solid', 'invis', 'invis', 'solid', 'solid', #Column 1
'invis', 'solid', 'solid', 'invis', 'invis'), #Column 2
shape = c('box', 'point', 'point', 'box', 'box', #Column 1
'plaintext', 'box', 'box', 'point', 'point'), #Column 2
width = c(3, 0.001, 0.001, 3, 3, #Column 1
2, 2.5, 2.5, 0.001, 0.001), #Column 2
height = c(1, 0.001, 0.001, 1, 1, #Column 1
1, 1, 1, 0.001, 0.001), #Column 2
fontsize = c(rep(14, 10)),
fontname = c(rep('Helvetica', 10)),
penwidth = 1.5,
fixedsize = 'true')
#Create an edge dataframe
edf <- create_edge_df(
from = c(1, 2, 3, 4, #Column 1
6, 7, 8, 9, #Column 2
2, 3 #Horizontals
),
to = c(2, 3, 4, 5, #Column 1
7, 8, 9, 10, #Column 2
7, 8 #Horizontals
),
arrowhead = c('none', 'none', 'normal', 'normal', #Column 1
'none', 'none', 'none', 'none', #Column 2
'normal', 'normal' #Horizontals
),
color = c('black', 'black', 'black', 'black', #Column 1
'#00000000', '#00000000', '#00000000', '#00000000', #Column 2
'black', 'black' #Horizontals
),
constraint = c(rep('true', 8), #Columns
rep('false', 2) #Horizontals
)
)
g <- create_graph(ndf,
edf,
attr_theme = NULL)
render_graph(g)
The result should look like this:
But instead it looks likes this when i run the code on my computer
Anyone eager to help?
Best regards,
H
Maybe a bit late, but better than never.
You have to create two nodes with width = 0 and height = 0, from where the arrows can origin horizontally. They need to be defined as subgraphs in order to be on a horizontal axis.
DiagrammeR::grViz("digraph prisma{
node [shape=box, fontsize = 12, fontname = 'Helvetica', width = 2];
a; b; c; d; e
# create 2 nodes without box around
node [shape = point, width = 0, height = 0]
x1; x2
graph [splines=ortho, nodesep=1, dpi = 72]
# Labels
a [label = 'Total available patients\n(n = x)'];
b [label = 'Excluded because of\nexclusion criteria (n = y)'];
c [label = 'Excluded because of\nmissing values (n = z)'];
d [label = 'Included for analysis\n(n = x - y - z)']
e [label = 'Data linked with\nexternal dataset']
# Edge definitions
a -> x1 [arrowhead='none']
x1 -> x2 [arrowhead='none']
x1 -> b
x2 -> c
x2 -> d
d -> e
# Make subgraph definition so arrow is horzontal
subgraph {
rank = same; b; x1;
}
subgraph {
rank = same; c; x2;
}
}
}
")
Created on 2021-08-06 by the reprex package (v2.0.1)
Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.0.3 (2020-10-10)
#> os macOS Catalina 10.15.7
#> system x86_64, darwin17.0
#> ui X11
#> language (EN)
#> collate de_CH.UTF-8
#> ctype de_CH.UTF-8
#> tz Europe/Zurich
#> date 2021-08-06
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date lib source
#> backports 1.2.1 2020-12-09 [1] CRAN (R 4.0.2)
#> callr 3.7.0 2021-04-20 [1] CRAN (R 4.0.2)
#> cli 3.0.0 2021-06-30 [1] CRAN (R 4.0.2)
#> crayon 1.4.1 2021-02-08 [1] CRAN (R 4.0.2)
#> curl 4.3.2 2021-06-23 [1] CRAN (R 4.0.2)
#> DiagrammeR 1.0.6.1 2020-05-08 [1] CRAN (R 4.0.2)
#> digest 0.6.27 2020-10-24 [1] CRAN (R 4.0.2)
#> ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.0.2)
#> evaluate 0.14 2019-05-28 [1] CRAN (R 4.0.0)
#> fansi 0.5.0 2021-05-25 [1] CRAN (R 4.0.2)
#> fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.2)
#> glue 1.4.2 2020-08-27 [1] CRAN (R 4.0.2)
#> highr 0.9 2021-04-16 [1] CRAN (R 4.0.2)
#> htmltools 0.5.1.1 2021-01-22 [1] CRAN (R 4.0.2)
#> htmlwidgets 1.5.2 2020-10-03 [1] CRAN (R 4.0.2)
#> httr 1.4.2 2020-07-20 [1] CRAN (R 4.0.2)
#> jsonlite 1.7.2 2020-12-09 [1] CRAN (R 4.0.2)
#> knitr 1.33 2021-04-24 [1] CRAN (R 4.0.2)
#> lifecycle 1.0.0 2021-02-15 [1] CRAN (R 4.0.2)
#> magrittr 2.0.1 2020-11-17 [1] CRAN (R 4.0.2)
#> mime 0.11 2021-06-23 [1] CRAN (R 4.0.2)
#> pillar 1.6.1 2021-05-16 [1] CRAN (R 4.0.2)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.0.0)
#> processx 3.5.2 2021-04-30 [1] CRAN (R 4.0.2)
#> ps 1.6.0 2021-02-28 [1] CRAN (R 4.0.2)
#> purrr 0.3.4 2020-04-17 [1] CRAN (R 4.0.0)
#> R6 2.5.0 2020-10-28 [1] CRAN (R 4.0.2)
#> RColorBrewer 1.1-2 2014-12-07 [1] CRAN (R 4.0.0)
#> reprex 2.0.1 2021-08-05 [1] CRAN (R 4.0.3)
#> rlang 0.4.11 2021-04-30 [1] CRAN (R 4.0.2)
#> rmarkdown 2.9 2021-06-15 [1] CRAN (R 4.0.2)
#> rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.0.2)
#> sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 4.0.0)
#> stringi 1.7.3 2021-07-16 [1] CRAN (R 4.0.2)
#> stringr 1.4.0 2019-02-10 [1] CRAN (R 4.0.2)
#> styler 1.3.2 2020-02-23 [1] CRAN (R 4.0.2)
#> tibble 3.1.2 2021-05-16 [1] CRAN (R 4.0.2)
#> utf8 1.2.1 2021-03-12 [1] CRAN (R 4.0.2)
#> vctrs 0.3.8 2021-04-29 [1] CRAN (R 4.0.2)
#> visNetwork 2.0.9 2019-12-06 [1] CRAN (R 4.0.2)
#> webshot 0.5.2 2019-11-22 [1] CRAN (R 4.0.0)
#> withr 2.4.2 2021-04-18 [1] CRAN (R 4.0.2)
#> xfun 0.24 2021-06-15 [1] CRAN (R 4.0.2)
#> xml2 1.3.2 2020-04-23 [1] CRAN (R 4.0.0)
#> yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.0)
#>
#> [1] /Library/Frameworks/R.framework/Versions/4.0/Resources/library

Calculating upper and lower confidence intervals by group in dplyr summarise()

I am trying to make a table that shows N (number of observations), percent frequency (of answers > 0), and the lower and upper confidence intervals for percent frequency, and I want to group this by type.
Example of data
dat <- data.frame(
"type" = c("B","B","A","B","A","A","B","A","A","B","A","A","A","B","B","B"),
"num" = c(3,0,0,9,6,0,4,1,1,5,6,1,3,0,0,0)
)
Expected output (with values filled in):
Type N Percent Lower 95% CI Upper 95% CI
A
B
Attempt
library(dplyr)
library(qwraps2)
table<-dat %>%
group_by(type) %>%
summarise(N=n(),
mean.ci = mean_ci(dat$num),
"Percent"=n_perc(num > 0))
This worked to get N and percent frequency, but returned an error: "Column must be length 1 (a summary value), not 3" when I added in mean_ci
The second code I tried, found here:
table2<-dat %>%
group_by(type) %>%
summarise(N.num=n(),
mean.num = mean(dat$num),
sd.num = sd(dat$num),
"Percent"=n_perc(num > 0)) %>%
mutate(se.num = sd.num / sqrt(N.num),
lower.ci = 100*(mean.num - qt(1 - (0.05 / 2), N.num - 1) * se.num),
upper.ci = 100*(mean.num + qt(1 - (0.05 / 2), N.num - 1) * se.num))
# A tibble: 2 x 8
# type N.num mean.num sd.num Percent se.num lower.ci upper.ci
# <fct> <int> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
#1 A 8 2.44 2.83 "6 (75.00\\%)" 1.00 7.35 480.
#2 B 8 2.44 2.83 "4 (50.00\\%)" 1.00 7.35 480.
This gave me an output, but the confidence intervals are not logical.
The output of mean_ci is a vector of length 3. This is maybe unexpected because the package has added a print method so that when you see this in the console it looks like a single character value and not a numeric length > 1 vector. But, you can see the underlying data structure by looking at str.
mean_ci(dat$num) %>% str
# 'qwraps2_mean_ci' Named num [1:3] 2.44 1.05 3.82
# - attr(*, "names")= chr [1:3] "mean" "lcl" "ucl"
# - attr(*, "alpha")= num 0.05
In summarize, each element of each column of the output needs to be length 1, so providing a length 3 object for summarize to put in a single "cell" (column element) results in an error. A workaround is to put the length 3 vector in a list, so that it is now a length 1 list. Then you can use unnest_wider to separate it into 3 columns (and therefore making the table "wider")
library(tidyverse)
dat %>%
group_by(type) %>%
summarise( N=n(),
mean.ci = list(mean_ci(num)),
"Percent"= n_perc(num > 0)) %>%
unnest_wider(mean.ci)
# # A tibble: 2 x 6
# type N mean lcl ucl Percent
# <fct> <int> <dbl> <dbl> <dbl> <chr>
# 1 A 8 2.25 0.523 3.98 "6 (75.00\\%)"
# 2 B 8 2.62 0.344 4.91 "4 (50.00\\%)"
IceCreamToucan’s answer is very good. I’m posting this answer to offer a
different way to present the information.
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(qwraps2)
dat <- data.frame("type" = c("B","B","A","B","A","A","B","A","A","B","A","A","A","B","B","B"),
"num" = c(3,0,0,9,6,0,4,1,1,5,6,1,3,0,0,0))
When building the dplyr::summarize call you can use the qwraps2::frmtci
call to format the output of qwraps2::mean_ci into a character string of
length one.
I would also recommend using the data pronoun .data so you can be explicit
about the variables to summarize.
dat %>%
dplyr::group_by(type) %>%
dplyr::summarize(N = n(),
mean.ci = qwraps2::frmtci(qwraps2::mean_ci(.data$num)),
Percent = qwraps2::n_perc(.data$num > 0))
#> `summarise()` ungrouping output (override with `.groups` argument)
#> # A tibble: 2 x 4
#> type N mean.ci Percent
#> <chr> <int> <chr> <chr>
#> 1 A 8 2.25 (0.52, 3.98) "6 (75.00\\%)"
#> 2 B 8 2.62 (0.34, 4.91) "4 (50.00\\%)"
Created on 2020-09-15 by the reprex package (v0.3.0)
devtools::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.0.2 (2020-06-22)
#> os macOS Catalina 10.15.6
#> system x86_64, darwin17.0
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz America/Denver
#> date 2020-09-15
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.0.0)
#> backports 1.1.9 2020-08-24 [1] CRAN (R 4.0.2)
#> callr 3.4.4 2020-09-07 [1] CRAN (R 4.0.2)
#> cli 2.0.2 2020-02-28 [1] CRAN (R 4.0.0)
#> crayon 1.3.4 2017-09-16 [1] CRAN (R 4.0.0)
#> desc 1.2.0 2018-05-01 [1] CRAN (R 4.0.0)
#> devtools 2.3.1 2020-07-21 [1] CRAN (R 4.0.2)
#> digest 0.6.25 2020-02-23 [1] CRAN (R 4.0.0)
#> dplyr * 1.0.2 2020-08-18 [1] CRAN (R 4.0.2)
#> ellipsis 0.3.1 2020-05-15 [1] CRAN (R 4.0.0)
#> evaluate 0.14 2019-05-28 [1] CRAN (R 4.0.0)
#> fansi 0.4.1 2020-01-08 [1] CRAN (R 4.0.0)
#> fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.2)
#> generics 0.0.2 2018-11-29 [1] CRAN (R 4.0.0)
#> glue 1.4.2 2020-08-27 [1] CRAN (R 4.0.2)
#> highr 0.8 2019-03-20 [1] CRAN (R 4.0.0)
#> htmltools 0.5.0 2020-06-16 [1] CRAN (R 4.0.0)
#> knitr 1.29 2020-06-23 [1] CRAN (R 4.0.0)
#> lifecycle 0.2.0 2020-03-06 [1] CRAN (R 4.0.0)
#> magrittr 1.5 2014-11-22 [1] CRAN (R 4.0.0)
#> memoise 1.1.0 2017-04-21 [1] CRAN (R 4.0.0)
#> pillar 1.4.6 2020-07-10 [1] CRAN (R 4.0.2)
#> pkgbuild 1.1.0 2020-07-13 [1] CRAN (R 4.0.2)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.0.0)
#> pkgload 1.1.0 2020-05-29 [1] CRAN (R 4.0.0)
#> prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.0.0)
#> processx 3.4.4 2020-09-03 [1] CRAN (R 4.0.2)
#> ps 1.3.4 2020-08-11 [1] CRAN (R 4.0.2)
#> purrr 0.3.4 2020-04-17 [1] CRAN (R 4.0.0)
#> qwraps2 * 0.5.0 2020-09-14 [1] local
#> R6 2.4.1 2019-11-12 [1] CRAN (R 4.0.0)
#> Rcpp 1.0.5 2020-07-06 [1] CRAN (R 4.0.0)
#> remotes 2.2.0 2020-07-21 [1] CRAN (R 4.0.2)
#> rlang 0.4.7 2020-07-09 [1] CRAN (R 4.0.2)
#> rmarkdown 2.3 2020-06-18 [1] CRAN (R 4.0.0)
#> rprojroot 1.3-2 2018-01-03 [1] CRAN (R 4.0.0)
#> sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 4.0.0)
#> stringi 1.5.3 2020-09-09 [1] CRAN (R 4.0.2)
#> stringr 1.4.0 2019-02-10 [1] CRAN (R 4.0.0)
#> testthat 2.3.2 2020-03-02 [1] CRAN (R 4.0.0)
#> tibble 3.0.3 2020-07-10 [1] CRAN (R 4.0.2)
#> tidyselect 1.1.0 2020-05-11 [1] CRAN (R 4.0.0)
#> usethis 1.6.1 2020-04-29 [1] CRAN (R 4.0.0)
#> utf8 1.1.4 2018-05-24 [1] CRAN (R 4.0.0)
#> vctrs 0.3.4 2020-08-29 [1] CRAN (R 4.0.2)
#> withr 2.2.0 2020-04-20 [1] CRAN (R 4.0.0)
#> xfun 0.17 2020-09-09 [1] CRAN (R 4.0.2)
#> yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.0)
#>
#> [1] /Library/Frameworks/R.framework/Versions/4.0/Resources/library

Table R Markdown qwraps2 and knitr::kable. Column missing

I have the following dummy data.frame
set.seed(12345)
df<-data.frame(var1=floor(runif(10,1000000,5000000)), group=rep(c("A","B"),5), event=rep(c("Yes","No"),5))
And would like to create a summary table of it. I tried to use qwraps2
As follows:
summary<-list("VAlue1" =
list("min" = ~ min(.data$var1),
"max" = ~ max(.data$var1),
"mean (sd)" = ~ qwraps2::mean_sd(.data$var1)),
"Group" =
list("Yes" = ~ qwraps2::n_perc0(.data$group == "A"),
"No" = ~ qwraps2::n_perc0(.data$group == "B")))
knitr::kable(
qwraps2::summary_table(dplyr::group_by(df, event),summary )
)
The output is unfortunately missing the variable to look at:
| |event: No (N = 5) |event: Yes (N = 5) |
|:---------|:--------------------------------|:----------------------------------|
|min |2591303 |1315253 |
|max |4232714 |4711820 |
|mean (sd) |3,456,579.40 ± 672,665.35 |3,029,844.00 ± 1,572,709.32 |
|Yes |0 (0) |5 (100) |
|No |5 (100) |0 (0) |
How do I incorporate the category "Value1" and "Group"?
Thank you!
suggestions for other packages are welcome, too.
The object returned by summary_table is a character matrix with the
additional S3 class qwraps2_summary_table. The rowgroup names Value1 and
Group are not part of the character matrix explicitly, they are part
attributes. The print method for the qwraps2_summary_table object builds
the table as need for the appropriate markup language, LaTeX or markdown.
Two edits the the example posted to get the table you are looking for:
Add options(qwraps2_markup = "markdown") to your script. The default
mark up language is LaTeX, setting this option changes the default to
markdown.
Do not wrap summary_table inside of knitr::kable: this prevents the
needed print method from being called.
options(qwraps2_markup = "markdown")
set.seed(12345)
df <- data.frame(var1 = floor(runif(10,1000000,5000000)),
group = rep(c("A","B"),5),
event = rep(c("Yes","No"),5))
summary <- list("Value1" =
list("min" = ~ min(var1),
"max" = ~ max(var1),
"mean (sd)" = ~ qwraps2::mean_sd(var1)),
"Group" =
list("Yes" = ~ qwraps2::n_perc0(group == "A"),
"No" = ~ qwraps2::n_perc0(group == "B")))
tab <- qwraps2::summary_table(df, summaries = summary, by = "event")
str(tab)
#> 'qwraps2_summary_table' chr [1:5, 1:2] "1665487" "4958947" ...
#> - attr(*, "dimnames")=List of 2
#> ..$ : chr [1:5] "min" "max" "mean (sd)" "Yes" ...
#> ..$ : chr [1:2] "No (N = 5)" "Yes (N = 5)"
#> - attr(*, "rgroups")= Named int [1:2] 3 2
#> ..- attr(*, "names")= chr [1:2] "Value1" "Group"
tab
#>
#>
#> | |No (N = 5) |Yes (N = 5) |
#> |:----------------------|:----------------------------------|:--------------------------------|
#> |**Value1** | | |
#> | min |1665487 |2300381 |
#> | max |4958947 |4043929 |
#> | mean (sd) |3,741,784.20 ± 1,370,520.00 |3,392,933.80 ± 782,295.15 |
#> |**Group** | | |
#> | Yes |0 (0) |5 (100) |
#> | No |5 (100) |0 (0) |
Created on 2020-09-15 by the reprex package (v0.3.0)
devtools::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.0.2 (2020-06-22)
#> os macOS Catalina 10.15.6
#> system x86_64, darwin17.0
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz America/Denver
#> date 2020-09-15
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.0.0)
#> backports 1.1.9 2020-08-24 [1] CRAN (R 4.0.2)
#> callr 3.4.4 2020-09-07 [1] CRAN (R 4.0.2)
#> cli 2.0.2 2020-02-28 [1] CRAN (R 4.0.0)
#> crayon 1.3.4 2017-09-16 [1] CRAN (R 4.0.0)
#> desc 1.2.0 2018-05-01 [1] CRAN (R 4.0.0)
#> devtools 2.3.1 2020-07-21 [1] CRAN (R 4.0.2)
#> digest 0.6.25 2020-02-23 [1] CRAN (R 4.0.0)
#> ellipsis 0.3.1 2020-05-15 [1] CRAN (R 4.0.0)
#> evaluate 0.14 2019-05-28 [1] CRAN (R 4.0.0)
#> fansi 0.4.1 2020-01-08 [1] CRAN (R 4.0.0)
#> fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.2)
#> glue 1.4.2 2020-08-27 [1] CRAN (R 4.0.2)
#> highr 0.8 2019-03-20 [1] CRAN (R 4.0.0)
#> htmltools 0.5.0 2020-06-16 [1] CRAN (R 4.0.0)
#> knitr 1.29 2020-06-23 [1] CRAN (R 4.0.0)
#> magrittr 1.5 2014-11-22 [1] CRAN (R 4.0.0)
#> memoise 1.1.0 2017-04-21 [1] CRAN (R 4.0.0)
#> pkgbuild 1.1.0 2020-07-13 [1] CRAN (R 4.0.2)
#> pkgload 1.1.0 2020-05-29 [1] CRAN (R 4.0.0)
#> prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.0.0)
#> processx 3.4.4 2020-09-03 [1] CRAN (R 4.0.2)
#> ps 1.3.4 2020-08-11 [1] CRAN (R 4.0.2)
#> qwraps2 0.5.0 2020-09-14 [1] local
#> R6 2.4.1 2019-11-12 [1] CRAN (R 4.0.0)
#> Rcpp 1.0.5 2020-07-06 [1] CRAN (R 4.0.0)
#> remotes 2.2.0 2020-07-21 [1] CRAN (R 4.0.2)
#> rlang 0.4.7 2020-07-09 [1] CRAN (R 4.0.2)
#> rmarkdown 2.3 2020-06-18 [1] CRAN (R 4.0.0)
#> rprojroot 1.3-2 2018-01-03 [1] CRAN (R 4.0.0)
#> sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 4.0.0)
#> stringi 1.5.3 2020-09-09 [1] CRAN (R 4.0.2)
#> stringr 1.4.0 2019-02-10 [1] CRAN (R 4.0.0)
#> testthat 2.3.2 2020-03-02 [1] CRAN (R 4.0.0)
#> usethis 1.6.1 2020-04-29 [1] CRAN (R 4.0.0)
#> withr 2.2.0 2020-04-20 [1] CRAN (R 4.0.0)
#> xfun 0.17 2020-09-09 [1] CRAN (R 4.0.2)
#> yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.0)
#>
#> [1] /Library/Frameworks/R.framework/Versions/4.0/Resources/library

The output of the qwraps2 code is not as expected

I am trying to get the summary table output for one of the datasets but the output is not in the form of a tidy table
options(qwraps2_markup = "markdown")
age_summary <- list("Age" =
list("Min" = ~min(.data$Age),
"Max" = ~max(.data$Age),
"Mean" = ~mean_sd(.data$Age)))
age_tab <- summary_table(insurance, age_summary)
age_tab
When I knit the RMarkdown file, the summary table is similar to the one that comes as an output in the Console and not the expected formatted summary table.
The object generated by qwraps2::summary_table is a character matrix with
the class attribute qwraps2_summary_table. The
qwraps2:::print.qwraps2_summary_table and qwraps2:::print.qable methods
are responsible for the way the table is presented in the output. Chunk
options will be responsible for how the table is rendered in the output
document.
Update: as of qwraps2 version 0.5.0 the use of the .data is no longer
needed or recommended.
options(qwraps2_markup = "markdown")
library(qwraps2)
eg_data <- data.frame(Age = rnorm(1000, mean = 54, sd = 10))
age_summary <- list("Age" =
list(
"Min" = ~ min(Age),
"Max" = ~ max(Age),
"Mean" = ~ mean_sd(Age)
)
)
age_table <- summary_table(eg_data, age_summary)
Take a look at the structure of age_table
str(age_table)
#> 'qwraps2_summary_table' chr [1:3, 1] "25.1137149669314" "83.5664804448924" ...
#> - attr(*, "dimnames")=List of 2
#> ..$ : chr [1:3] "Min" "Max" "Mean"
#> ..$ : chr "eg_data (N = 1,000)"
#> - attr(*, "rgroups")= Named int 3
#> ..- attr(*, "names")= chr "Age"
#> - attr(*, "n")= int 1000
As noted above, the object is a 3 x 1 character matrix of class
qwraps2_summary_table. To see the return in the R console:
print.default(age_table)
#> eg_data (N = 1,000)
#> Min "25.1137149669314"
#> Max "83.5664804448924"
#> Mean "53.75 ± 9.94"
#> attr(,"rgroups")
#> Age
#> 3
#> attr(,"n")
#> [1] 1000
#> attr(,"class")
#> [1] "qwraps2_summary_table" "matrix" "array"
Since the options(qwraps2_markup = "markdown") as been set, the printing
method will return a markdown table
age_table
#>
#>
#> | |eg_data (N = 1,000) |
#> |:-----------------|:-------------------|
#> |**Age** | |
#> | Min |25.1137149669314 |
#> | Max |83.5664804448924 |
#> | Mean |53.75 ± 9.94 |
Make sure you have the results = "asis" chunk option set in your .Rmd file
so that the table will render correctly in your output document.
Created on 2020-09-14 by the reprex package (v0.3.0)
devtools::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.0.2 (2020-06-22)
#> os macOS Catalina 10.15.6
#> system x86_64, darwin17.0
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz America/Denver
#> date 2020-09-14
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.0.0)
#> backports 1.1.9 2020-08-24 [1] CRAN (R 4.0.2)
#> callr 3.4.4 2020-09-07 [1] CRAN (R 4.0.2)
#> cli 2.0.2 2020-02-28 [1] CRAN (R 4.0.0)
#> crayon 1.3.4 2017-09-16 [1] CRAN (R 4.0.0)
#> desc 1.2.0 2018-05-01 [1] CRAN (R 4.0.0)
#> devtools 2.3.1 2020-07-21 [1] CRAN (R 4.0.2)
#> digest 0.6.25 2020-02-23 [1] CRAN (R 4.0.0)
#> ellipsis 0.3.1 2020-05-15 [1] CRAN (R 4.0.0)
#> evaluate 0.14 2019-05-28 [1] CRAN (R 4.0.0)
#> fansi 0.4.1 2020-01-08 [1] CRAN (R 4.0.0)
#> fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.2)
#> glue 1.4.2 2020-08-27 [1] CRAN (R 4.0.2)
#> highr 0.8 2019-03-20 [1] CRAN (R 4.0.0)
#> htmltools 0.5.0 2020-06-16 [1] CRAN (R 4.0.0)
#> knitr 1.29 2020-06-23 [1] CRAN (R 4.0.0)
#> magrittr 1.5 2014-11-22 [1] CRAN (R 4.0.0)
#> memoise 1.1.0 2017-04-21 [1] CRAN (R 4.0.0)
#> pkgbuild 1.1.0 2020-07-13 [1] CRAN (R 4.0.2)
#> pkgload 1.1.0 2020-05-29 [1] CRAN (R 4.0.0)
#> prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.0.0)
#> processx 3.4.4 2020-09-03 [1] CRAN (R 4.0.2)
#> ps 1.3.4 2020-08-11 [1] CRAN (R 4.0.2)
#> qwraps2 * 0.5.0 2020-09-14 [1] local
#> R6 2.4.1 2019-11-12 [1] CRAN (R 4.0.0)
#> Rcpp 1.0.5 2020-07-06 [1] CRAN (R 4.0.0)
#> remotes 2.2.0 2020-07-21 [1] CRAN (R 4.0.2)
#> rlang 0.4.7 2020-07-09 [1] CRAN (R 4.0.2)
#> rmarkdown 2.3 2020-06-18 [1] CRAN (R 4.0.0)
#> rprojroot 1.3-2 2018-01-03 [1] CRAN (R 4.0.0)
#> sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 4.0.0)
#> stringi 1.5.3 2020-09-09 [1] CRAN (R 4.0.2)
#> stringr 1.4.0 2019-02-10 [1] CRAN (R 4.0.0)
#> testthat 2.3.2 2020-03-02 [1] CRAN (R 4.0.0)
#> usethis 1.6.1 2020-04-29 [1] CRAN (R 4.0.0)
#> withr 2.2.0 2020-04-20 [1] CRAN (R 4.0.0)
#> xfun 0.17 2020-09-09 [1] CRAN (R 4.0.2)
#> yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.0)
#>
#> [1] /Library/Frameworks/R.framework/Versions/4.0/Resources/library

Error when using summary_table in qwraps2. Not correct data.frame?

I'm quite new using R, so hopefully this isn't too basic..
I'm trying to create a summary table, using qwraps2. Having followed the well-written tutorial without problems (qwraps2) I run into an error when applying my own dataset.
library(qwraps2)
summary_tbl1 <-
list("Gender" =
list("Female" = ~ qwraps2::n_perc0(.data$gender == 0),
"Male" = ~ qwraps2::n_perc0(.data$gender == 1)),
"Mean age (sd)" = ~ qwraps2::mean_sd(.data$inage),
"Age categories" =
list("65-74" = ~ qwraps2::n_perc0(.data$age_cat == 1),
"75-84" = ~ qwraps2::n_perc0(.data$age_cat == 2),
"> 85" = ~ qwraps2::n_perc0(.data$age_cat == 3))
)
#making the overall column
c_overall <- summary_table(my_dataset, summary_tbl1)
Error: x must be a formula Call rlang::last_error() to see a backtrace
The backtrace reads as follows:
12. stop(cnd)
11. rlang::abort(x)
10. rlang::f_rhs(y)
9. FUN(X[[i]], ...)
8. lapply(s, function(y) { rlang::f_rhs(y) })
7. FUN(X[[i]], ...)
6. lapply(summaries, function(s) { lapply(s, function(y) { rlang::f_rhs(y) })...
5. eval(lhs, parent, parent)
4. eval(lhs, parent, parent)
3. lapply(summaries, function(s) { lapply(s, function(y) { rlang::f_rhs(y) })...
2. summary_table.data.frame(new_dataset, summary_tbl1)
1. summary_table(new_dataset, summary_tbl1)
I have converted the dataset to a data.frame using as.data.frame, as that is what summary_tablerequires, from what i can understand.
My dataset is imported from STATA (Haven package), could that be the answer, and if that is case - any ideas on how to overcome?
Or could it be related to the size of my dataset (80.300 obs)?
Thanks in advance
Added the summary readout:
summary_tbl1
$`Gender`
$`Gender`$`Female`
~qwraps2::n_perc0(.data$gender == 0)
$`Gender`$Male
~qwraps2::n_perc0(.data$gender == 1)
$`Mean age (sd)`
~qwraps2::mean_sd(.data$inage)
$`Age categories`
$`Age categories`$`65-74`
~qwraps2::n_perc0(.data$age_cat == 1)
$`Age categories`$`75-84`
~qwraps2::n_perc0(.data$age_cat == 2)
$`Age categories`$`> 85`
~qwraps2::n_perc0(.data$age_cat == 3)
it looks like you've collected everything into a single list called 'Gender'. it appears your data format is gender[(male,female), mean_age, age_categories()]. so you have a list called genders containing an unnamed list, a numeric names mean_age, and a named list called age_categories.
#phi’s answer is correct. To explian in more detail: the summary is expected
to be a list of lists. That is, a list hwere each element is a list.
Let’s look at the structure of the provided summary: (EDIT: omitting
the .data pronoun as it is no longer recommened as of qwraps2 version
0.5.0, released 1 Sept 2020).
summary_tbl1 <-
list("Gender" =
list("Female" = ~ qwraps2::n_perc0(gender == 0),
"Male" = ~ qwraps2::n_perc0(gender == 1)
),
"Mean age (sd)" = ~ qwraps2::mean_sd(inage),
"Age categories" =
list("65-74" = ~ qwraps2::n_perc0(age_cat == 1),
"75-84" = ~ qwraps2::n_perc0(age_cat == 2),
"> 85" = ~ qwraps2::n_perc0(age_cat == 3)
)
)
str(summary_tbl1, max.level = 1)
#> List of 3
#> $ Gender :List of 2
#> $ Mean age (sd) :Class 'formula' language ~qwraps2::mean_sd(inage)
#> .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
#> $ Age categories:List of 3
The first and thrid elements are lists, but the second element is a formula.
The correct specification for the summary is:
summary_tbl1 <-
list("Gender" =
list("Female" = ~ qwraps2::n_perc0(gender == 0),
"Male" = ~ qwraps2::n_perc0(gender == 1)),
"inage" =
list("Mean age (sd)" = ~ qwraps2::mean_sd(inage)),
"Age categories" =
list("65-74" = ~ qwraps2::n_perc0(age_cat == 1),
"75-84" = ~ qwraps2::n_perc0(age_cat == 2),
"> 85" = ~ qwraps2::n_perc0(age_cat == 3))
)
str(summary_tbl1, max.level = 1)
#> List of 3
#> $ Gender :List of 2
#> $ inage :List of 1
#> $ Age categories:List of 3
Created on 2020-09-01 by the reprex package (v0.3.0)
devtools::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.0.2 (2020-06-22)
#> os macOS Catalina 10.15.6
#> system x86_64, darwin17.0
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz America/Denver
#> date 2020-09-01
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.0.0)
#> backports 1.1.9 2020-08-24 [1] CRAN (R 4.0.2)
#> callr 3.4.3 2020-03-28 [1] CRAN (R 4.0.0)
#> cli 2.0.2 2020-02-28 [1] CRAN (R 4.0.0)
#> crayon 1.3.4 2017-09-16 [1] CRAN (R 4.0.0)
#> desc 1.2.0 2018-05-01 [1] CRAN (R 4.0.0)
#> devtools 2.3.1 2020-07-21 [1] CRAN (R 4.0.2)
#> digest 0.6.25 2020-02-23 [1] CRAN (R 4.0.0)
#> ellipsis 0.3.1 2020-05-15 [1] CRAN (R 4.0.0)
#> evaluate 0.14 2019-05-28 [1] CRAN (R 4.0.0)
#> fansi 0.4.1 2020-01-08 [1] CRAN (R 4.0.0)
#> fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.2)
#> glue 1.4.2 2020-08-27 [1] CRAN (R 4.0.2)
#> highr 0.8 2019-03-20 [1] CRAN (R 4.0.0)
#> htmltools 0.5.0 2020-06-16 [1] CRAN (R 4.0.0)
#> knitr 1.29 2020-06-23 [1] CRAN (R 4.0.0)
#> magrittr 1.5 2014-11-22 [1] CRAN (R 4.0.0)
#> memoise 1.1.0 2017-04-21 [1] CRAN (R 4.0.0)
#> pkgbuild 1.1.0 2020-07-13 [1] CRAN (R 4.0.2)
#> pkgload 1.1.0 2020-05-29 [1] CRAN (R 4.0.0)
#> prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.0.0)
#> processx 3.4.3 2020-07-05 [1] CRAN (R 4.0.0)
#> ps 1.3.4 2020-08-11 [1] CRAN (R 4.0.2)
#> R6 2.4.1 2019-11-12 [1] CRAN (R 4.0.0)
#> remotes 2.2.0 2020-07-21 [1] CRAN (R 4.0.2)
#> rlang 0.4.7 2020-07-09 [1] CRAN (R 4.0.2)
#> rmarkdown 2.3 2020-06-18 [1] CRAN (R 4.0.0)
#> rprojroot 1.3-2 2018-01-03 [1] CRAN (R 4.0.0)
#> sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 4.0.0)
#> stringi 1.4.6 2020-02-17 [1] CRAN (R 4.0.0)
#> stringr 1.4.0 2019-02-10 [1] CRAN (R 4.0.0)
#> testthat 2.3.2 2020-03-02 [1] CRAN (R 4.0.0)
#> usethis 1.6.1 2020-04-29 [1] CRAN (R 4.0.0)
#> withr 2.2.0 2020-04-20 [1] CRAN (R 4.0.0)
#> xfun 0.16 2020-07-24 [1] CRAN (R 4.0.2)
#> yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.0)
#>
#> [1] /Library/Frameworks/R.framework/Versions/4.0/Resources/library

Resources