gtsummary modified cross tab - r

[![enter image description here][2]][2][![i need help in writing gstummary r code to produce following table output.dummy table shown in above table][2]][2]
i need help in writing gstummary r code to produce following table output.dummy table shown in above table
[![enter image description here][2]][2]
library(gtsummary)
[![enter image description here][2]][2]
[![enter image description here][3]][3]
id
age
sex
country
edu
ln
ivds
n2
p5
1
a
M
eng
x
45
15
40
15
2
a
M
eng
x
23
26
70
15
4
a
M
eng
x
26
36
35
40
5
b
F
eng
x
26
25
36
47
6
b
F
wal
y
45
45
60
12
7
b
M
wal
y
60
25
36
15
8
c
M
wal
y
70
08
25
36
9
c
F
sco
z
80
25
36
15
10
c
F
sco
z
90
25
26
39
structure(list(id = 1:15, age = structure(c(1L, 1L, 2L, 1L, 2L,
2L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, 1L, 2L), .Label = c("a", "b",
"c"), class = "factor"), sex = structure(c(2L, 1L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L), .Label = c("F", "M"), class = "factor"),
country = structure(c(1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 3L), .Label = c("eng", "scot", "wale"
), class = "factor"), edu = structure(c(1L, 1L, 1L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L), .Label = c("x",
"y", "z"), class = "factor"), lon = c(45L, 23L,
25L, 45L, 70L, 69L, 90L, 50L, 62L, 45L, 23L, 25L, 45L, 70L,
69L), is = c(15L, 26L, 36L, 34L, 2L, 4L, 5L, 8L, 9L,
15L, 26L, 36L, 34L, 2L, 4L), n2 = c(40L, 70L, 50L, 60L,
30L, 25L, 80L, 89L, 10L, 40L, 70L, 50L, 60L, 30L, 25L), p5 = c(15L,
20L, 36L, 48L, 25L, 36L, 28L, 15L, 25L, 15L, 20L, 36L, 48L,
25L, 36L)), row.names = c(NA, 15L), class = "data.frame")
[

I made a table similar to what you have above (more similar to the table you had before you updated it). But I think it'll get you most of the way there.
The type of table you're requesting it something that is in the works. In the meantime, you will need to use the bstfun::tbl_2way_summary() function. This function exists in another package while we work to make it better before integrating with gtsummary.
library(bstfun) # install with `remotes::install_github("ddsjoberg/bstfun")`
library(gtsummary)
packageVersion("gtsummary")
#> [1] '1.4.1'
# add a column that is all the same value
trial2 <- trial %>% mutate(constant = TRUE)
# loop over each continuous variable, construct table, then merge them together
tbls_row1 <-
c("age", "marker", "ttdeath") %>%
purrr::map(
~tbl_2way_summary(data = trial2, row = grade, col = constant, con = all_of(.x),
statistic = "{mean} ({sd}) - {min}, {max}") %>%
modify_header(stat_1 = paste0("**", .x, "**"))
) %>%
tbl_merge() %>%
modify_spanning_header(everything() ~ NA)
# repeat for the second row
tbls_row2 <-
c("age", "marker", "ttdeath") %>%
purrr::map(
~tbl_2way_summary(data = trial2, row = stage, col = constant, con = all_of(.x),
statistic = "{mean} ({sd}) - {min}, {max}") %>%
modify_header(stat_1 = paste0("**", .x, "**"))
) %>%
tbl_merge() %>%
modify_spanning_header(everything() ~ NA)
# stack these tables
tbl_stacked <- tbl_stack(list(tbls_row1, tbls_row2))
# lastly, add calculated summary stats for categorical variables, and merge them
tbl_summary_stats <-
trial2 %>%
tbl_summary(
include = c(grade, stage),
missing = "no"
) %>%
modify_header(stat_0 ~ "**n (%)**") %>%
modify_footnote(everything() ~ NA)
tbl_final <-
tbl_merge(list(tbl_summary_stats, tbl_stacked)) %>%
modify_spanning_header(everything() ~ NA) %>%
# column spanning column headers
modify_spanning_header(
list(c(stat_1_1_2, stat_1_2_2) ~ "**Group 1**",
stat_1_3_2 ~ "**Group 2**")
)
Created on 2021-07-10 by the reprex package (v2.0.0)

Related

labelling of ordered factor variable

I am trying to produce a univariate output table using the gtsummary package.
structure(list(id = 1:10, age = structure(c(3L, 3L, 2L, 3L, 2L,
2L, 2L, 1L, 1L, 1L), .Label = c("c", "b", "a"), class = c("ordered",
"factor")), sex = structure(c(2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 2L), .Label = c("F", "M"), class = "factor"), country = structure(c(1L,
1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L), .Label = c("eng", "scot",
"wale"), class = "factor"), edu = structure(c(1L, 1L, 1L, 2L,
2L, 2L, 3L, 3L, 3L, 3L), .Label = c("x", "y", "z"), class = "factor"),
lungfunction = c(45L, 23L, 25L, 45L, 70L, 69L, 90L, 50L,
62L, 45L), ivdays = c(15L, 26L, 36L, 34L, 2L, 4L, 5L, 8L,
9L, 15L), no2 = c(40L, 70L, 50L, 60L, 30L, 25L, 80L, 89L,
10L, 40L), pm25 = c(15L, 20L, 36L, 48L, 25L, 36L, 28L, 15L,
25L, 15L)), row.names = c(NA, 10L), class = "data.frame")
...
library(gtsummary)
publication_dummytable1_sum %>%
select(sex,age,lungfunction,ivdays) %>%
tbl_uvregression(
method =lm,
y = lungfunction,
pvalue_fun = ~style_pvalue(.x, digits = 3)
) %>%
add_global_p() %>% # add global p-value
bold_p() %>% # bold p-values under a given threshold
bold_labels()
...
When I run this code I get the output below. The issue is the labeling of the ordered factor variable (age). R chooses its own labeling for the ordered factor variable. Is it possible to tell R not to choose its own labeling for ordered factor variables?
I want output like the following:
Like many other people, I think you might be misunderstanding the meaning of an "ordered" factor in R. All factors in R are ordered, in a sense; the estimates etc. are typically printed, plotted, etc. in the order of the levels vector. Specifying that a factor is of type ordered has two major effects:
it allows you to evaluate inequalities on the levels of the factor (e.g. you can filter(age > "b"))
the contrasts are set by default to orthogonal polynomial contrasts, which is where the L (linear) and Q (quadratic) labels come from: see e.g. this CrossValidated answer for more details.
If you want this variable treated in the same way a regular factor (so that the estimates are made for differences of groups from the baseline level, i.e. treatment contrasts), you can:
convert back to an unordered factor (e.g. factor(age, ordered=FALSE))
specify that you want to use treatment contrasts in your model (in base R you would specify contrasts = list(age = "contr.treatment"))
set options(contrasts = c(unordered = "contr.treatment", ordered = "contr.treatment")) (the default for ordered is "contr.poly")
If you have an unordered ("regular") factor and the levels are not in the order you want, you can reset the level order by specifying the levels explicitly, e.g.
mutate(across(age, factor,
levels = c("0-10 years", "11-20 years", "21-30 years", "30-40 years")))
R sets the factors in alphabetical order by default, which is sometimes not what you want (but I can't think of a case where the order would be 'random' ...)
The easiest way to remove the odd labelling for the ordered variables, is to remove the ordered class from these factor variables. Example below!
library(gtsummary)
library(tidyverse)
packageVersion("gtsummary")
#> [1] '1.4.2'
publication_dummytable1_sum <-
structure(list(id = 1:10, age = structure(c(3L, 3L, 2L, 3L, 2L,
2L, 2L, 1L, 1L, 1L), .Label = c("c", "b", "a"), class = c("ordered",
"factor")), sex = structure(c(2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 2L), .Label = c("F", "M"), class = "factor"), country = structure(c(1L,
1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L), .Label = c("eng", "scot",
"wale"), class = "factor"), edu = structure(c(1L, 1L, 1L, 2L,
2L, 2L, 3L, 3L, 3L, 3L), .Label = c("x", "y", "z"), class = "factor"),
lungfunction = c(45L, 23L, 25L, 45L, 70L, 69L, 90L, 50L,
62L, 45L), ivdays = c(15L, 26L, 36L, 34L, 2L, 4L, 5L, 8L,
9L, 15L), no2 = c(40L, 70L, 50L, 60L, 30L, 25L, 80L, 89L,
10L, 40L), pm25 = c(15L, 20L, 36L, 48L, 25L, 36L, 28L, 15L,
25L, 15L)), row.names = c(NA, 10L), class = "data.frame") |>
as_tibble()
# R labels the order factors like this in lm()
lm(lungfunction ~ age, publication_dummytable1_sum)
#>
#> Call:
#> lm(formula = lungfunction ~ age, data = publication_dummytable1_sum)
#>
#> Coefficients:
#> (Intercept) age.L age.Q
#> 51.17 -10.37 -15.11
tbl <-
publication_dummytable1_sum %>%
# remove ordered class
mutate(across(where(is.ordered), ~factor(., ordered = FALSE))) %>%
select(sex,age,lungfunction,ivdays) %>%
tbl_uvregression(
method =lm,
y = lungfunction,
pvalue_fun = ~style_pvalue(.x, digits = 3)
)
Created on 2021-07-22 by the reprex package (v2.0.0)

Get rows from a column per group based on a condition

I have a data.frame as shown below:
Basic requirement is to find average of "n" number of "value" after certain date per group.
For ex:, user provides:
Certain Date = Failure Date
n = 4
Hence, for A, the average would be (60+70+80+100)/4 ; ignoring NAs
and for B, the average would be (80+90+100)/3. Note for B, n=4 does not happen as there are only 3 values after the satisfied condition failuredate = valuedate.
Here is the dput:
structure(list(Name = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("A",
"B"), class = "factor"), FailureDate = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("1/5/2020", "1/7/2020"), class = "factor"), ValueDate = structure(c(1L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 2L, 1L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 2L), .Label = c("1/1/2020", "1/10/2020", "1/2/2020",
"1/3/2020", "1/4/2020", "1/5/2020", "1/6/2020", "1/7/2020", "1/8/2020",
"1/9/2020"), class = "factor"), Value = c(10L, 20L, 30L, 40L,
NA, 60L, 70L, 80L, NA, 100L, 10L, 20L, 30L, 40L, 50L, 60L, 70L,
80L, 90L, 100L)), class = "data.frame", row.names = c(NA, -20L
))
We could create an index with cumsum after grouping by 'Name', extract the 'Value' elements and get the mean
library(dplyr)
n <- 4
df1 %>%
type.convert(as.is = TRUE) %>%
group_by(Name) %>%
summarise(Ave = mean(head(na.omit(Value[lag(cumsum(FailureDate == ValueDate),
default = 0) > 0]), n), na.rm = TRUE))
# A tibble: 2 x 2
# Name Ave
# <chr> <dbl>
#1 A 77.5
#2 B 90
You can convert factor dates to the Date object and then compute averages of "n" numbers after FailureDate per group. Note that "n" numbers should exclude NA, so tidyr::drop_na() is used here.
library(dplyr)
df %>%
mutate(across(contains("Date"), as.Date, "%m/%d/%Y")) %>%
tidyr::drop_na(Value) %>%
group_by(Name) %>%
summarise(mean = mean(Value[ValueDate > FailureDate][1:4], na.rm = T))
# # A tibble: 2 x 2
# Name mean
# <fct> <dbl>
# 1 A 77.5
# 2 B 90
You can try this:
library(dplyr)
n <- 4
df %>%
mutate(condition = as.character(FailureDate) == as.character(ValueDate))
group_by(Name) %>%
mutate(condition = cumsum(condition)) %>%
filter(condition == 1) %>%
slice(-1) %>%
filter(!is.na(Value)) %>%
slice(1:n) %>%
summarise(mean_col = mean(Value))
> df
# A tibble: 2 x 2
Name mean_col
<fct> <dbl>
1 A 77.5
2 B 90

replacement missing values by groups in R

How can i replace the missing values for each group separately?
The reproducible example:
mydata=structure(list(group1 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), group.2 = c(1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L), x1 = c(20L, 4L, 91L, NA, 94L, 69L, 38L,
NA, 29L, 69L, 55L, 86L, 81L, 11L, NA, 12L, 65L, 90L, 74L, NA,
49L, 90L), x2 = c(44L, 94L, NA, 1L, 67L, NA, 73L, 22L, 44L, 24L,
NA, 54L, 70L, 65L, 97L, 10L, 97L, NA, 74L, 97L, 34L, 29L)), class = "data.frame", row.names = c(NA,
-22L))
Now i found how to replace the missing values without groups.
library(dplyr)
mydata %>% mutate_at(vars(starts_with("x1")), funs(ifelse(is.na(.) & is.numeric(.) ,mean(., na.rm = TRUE),.)))
But i need to replace for each groups (group1,group2) separately.
edit to small dataset
structure(list(group1 = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L), group.2 = c(1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L,
2L, 2L, 2L), x1 = c(63L, 67L, 57L, NA, 65L, 75L, 57L, 80L, 42L,
NA, 35L, 80L), x2 = c(46L, 1L, NA, 41L, 80L, NA, 74L, 73L, NA,
13L, 83L, NA)), class = "data.frame", row.names = c(NA, -12L))
mydata=structure(list(group1 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), group2 = c(1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L), x1 = c(20L, 4L, 91L, NA, 94L, 69L, 38L,
NA, 29L, 69L, 55L, 86L, 81L, 11L, NA, 12L, 65L, 90L, 74L, NA,
49L, 90L), x2 = c(44L, 94L, NA, 1L, 67L, NA, 73L, 22L, 44L, 24L,
NA, 54L, 70L, 65L, 97L, 10L, 97L, NA, 74L, 97L, 34L, 29L)), class = "data.frame", row.names = c(NA,
-22L))
library(tidyverse)
mydata %>%
unite(group, group1, group2) %>% # combine groups
mutate(id = row_number()) %>% # add the row number as an id (useful when reshaping)
gather(var, value, -group, -id) %>% # reshape data
group_by(group, var) %>% # for each group combination and variable
mutate(value = ifelse(is.na(value), mean(value, na.rm = T), value)) %>% # replace NAs with mean
spread(var, value) %>% # reshape again
arrange(id) %>% # keep order of original dataset
select(-id) %>% # remove id
ungroup() %>% # forget the grouping
separate(group, c("group1","group2")) # split the groups again
# # A tibble: 22 x 4
# group1 group2 x1 x2
# <chr> <chr> <dbl> <dbl>
# 1 1 1 20 44
# 2 1 2 4 94
# 3 1 1 91 61.3
# 4 1 2 36.5 1
# 5 1 1 94 67
# 6 1 2 69 39
# 7 1 1 38 73
# 8 1 2 36.5 22
# 9 2 1 29 44
# 10 2 2 69 24
# # ... with 12 more rows

R:splitting the dataset into groups for forecast

i have dataset and i have to perform daily forecast splited by groups.
The group is client+stuff
ts <- read.csv("C:/Users/Admin/Desktop/mydat.csv",sep=";", dec=",")
here mydat
structure(list(Data = structure(c(1L, 3L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 2L, 4L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 21L, 22L, 23L, 24L, 25L, 26L), .Label = c("01.04.2017",
"01.06.2017", "02.04.2017", "02.06.2017", "03.04.2017", "04.04.2017",
"05.04.2017", "06.04.2017", "07.04.2017", "08.04.2017", "09.04.2017",
"10.04.2017", "11.04.2017", "12.05.2017", "13.05.2017", "14.05.2017",
"15.05.2017", "16.05.2017", "17.05.2017", "18.05.2017", "19.05.2017",
"20.05.2017", "21.05.2017", "22.05.2017", "23.05.2017", "24.05.2017",
"25.05.2017", "26.05.2017", "27.05.2017", "28.05.2017", "29.05.2017",
"30.05.2017", "31.05.2017"), class = "factor"), client = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Horns and hooves", "Kornev & Co."
), class = "factor"), stuff = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("chickens", "hooves", "Oysters"), class = "factor"),
Продажи = c(374L, 12L, 120L, 242L, 227L, 268L, 280L, 419L,
12L, 172L, 336L, 117L, 108L, 150L, 90L, 117L, 116L, 146L,
120L, 211L, 213L, 67L, 146L, 118L, 152L, 122L, 201L, 497L,
522L, 65L, 268L, 441L, 247L, 348L, 445L, 477L, 62L, 226L,
476L, 306L)), .Names = c("Data", "client", "stuff", "Продажи"
), class = "data.frame", row.names = c(NA, -40L))
of course I can manually separate three datasets
horns and hooves + hooves
Horns and hooves + chickens
Kornev & Co. + oysters
but what to do in the case when I have a huge dataset and there are hundreds of groups. Do not manually split.
Is it possible to split it in R into groups and then perform a forecast?
the code for forecast is simple
The first i do so
library(forecast)
library(lubridate)
msts <- msts(ts$sales,seasonal.periods = c(7,365.25),start = decimal_date(as.Date("2017-05-12")))
plot(msts, main="sales", xlab="Year", ylab="sales")
tbats <- tbats(msts)
plot(tbats, main="Multiple Season Decomposition")
sp<- predict(tbats,h=14) #14 days forecast
plot(sp, main = "TBATS Forecast", include=14)
print(sp)
if the result does not suit me, I'm perform forecast via dummy variables
tsw <- ts(ts$Sales, start = decimal_date(as.Date("2017-05-12")), frequency = 7)
View(tsw)
mytslm <- tslm(tsw ~ trend + season)
print(mytslm)
residarima1 <- auto.arima(mytslm$residuals)
residualsArimaForecast <- forecast(residarima1, h=14)
residualsF <- as.numeric(residualsArimaForecast$mean)
regressionForecast <- forecast(mytslm,h=14)
regressionF <- as.numeric(regressionForecast$mean)
forecastR <- regressionF+residualsF
print(forecastR)
You can use split to split the data into groups by a combination of factors, in this case columns client and stuff.
group_list <- split(mydat, list(mydat$client, mydat$stuff))
group_list <- group_list[sapply(group_list, function(x) nrow(x) != 0)]
Then you can use this list and lapply any function you want. The following is how you would perform your first forecast. Note that I have separated the forecast code from the plots code and that each step of the forecast is done by one function, first apply function msts and produce a list of such objects, then apply function tbats and produce another list.
fun_msts <- function(ts){
msts(ts$Sales, seasonal.periods = c(7,365.25), start = decimal_date(as.Date("2017-05-12")))
}
fun_sp <- function(m){
tbats <- tbats(m)
predict(tbats, h=14) #14 days forecast
}
msts_list <- lapply(group_list, fun_msts)
sp_list <- lapply(msts_list, fun_sp)
Now if you want to, you can plot the results. In order to do that, define two other functions to be lapplyed.
plot_msts <- function(m, new.window = TRUE){
if(new.window) windows()
plot(m, main="Sales", xlab="Year", ylab="Sales")
}
plot_sp <- function(sp, new.window = TRUE){
if(new.window) windows()
plot(sp, main = "TBATS Forecast", include = 14)
}
lapply(msts_list, plot_msts)
lapply(sp_list, plot_sp)
In these functions a new graphic device is open with function windows. If you are not using Microsoft Windows or if you want to open another type of device, change that instruction but keep the if(new.window).
EDIT.
As for the regression with dummy variables, you can do the following.
fun_tslm <- function(x, start = "2017-05-12", freq = 7){
tsw <- ts(x[["Sales"]], start = decimal_date(as.Date(start)), frequency = freq)
#View(tsw)
mytslm <- tslm(tsw ~ trend + season)
mytslm
}
fun_forecast <- function(x, h = 14){
residarima1 <- auto.arima(x[["residuals"]])
residualsArimaForecast <- forecast(residarima1, h = h)
residualsF <- as.numeric(residualsArimaForecast$mean)
regressionForecast <- forecast(x, h = h)
regressionF <- as.numeric(regressionForecast$mean)
forecastR <- regressionF + residualsF
forecastR
}
tslm_list <- lapply(group_list, fun_tslm)
fore_list <- lapply(tslm_list, fun_forecast)

R ggplot2 - How to plot 2 boxplots on the same x value

suppose I have two boxplots.
trial1 <- ggplot(completionTime, aes(fill=Condition, x=Scenario, y=Trial1))
trial1 + geom_boxplot()+geom_point(position=position_dodge(width=0.75)) + ylim(0, 160)
trial2 <- ggplot(completionTime, aes(fill=Condition, x=Scenario, y=Trial2))
trial2 + geom_boxplot()+geom_point(position=position_dodge(width=0.75)) + ylim(0, 160)
How can I plot trial 1 and trial 2 on the same plot and same respective X? they have the same range of y.
I looked at geom_boxplot(position="identity"), but that plots the two conditions(fill) on the same X.
I want to plot two y column on the same X.
Edit: the dataset
User Condition Scenario Trial1 Trial2
1 1 ME a 67 41
2 1 ME b 70 42
3 1 ME c 40 15
4 1 ME d 65 23
5 1 ME e 45 45
6 1 SE a 100 34
7 1 SE b 54 23
8 1 SE c 70 23
9 1 SE d 56 15
10 1 SE e 30 20
11 2 ME a 42 23
12 2 ME b 22 12
13 2 ME c 28 8
14 2 ME d 22 8
15 2 ME e 38 37
16 2 SE a 59 18
17 2 SE b 65 14
18 2 SE c 75 7
19 2 SE d 37 9
20 2 SE e 31 7
dput()
structure(list(User = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), Condition = structure(c(1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L), .Label = c("ME", "SE"), class = "factor"), Scenario =
structure(c(1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L,
3L, 4L, 5L), .Label = c("a", "b", "c", "d", "e"), class = "factor"),
Trial1 = c(67L, 70L, 40L, 65L, 45L, 100L, 54L, 70L, 56L,
30L, 42L, 22L, 28L, 22L, 38L, 59L, 65L, 75L, 37L, 31L), Trial2 = c(41L,
42L, 15L, 23L, 45L, 34L, 23L, 23L, 15L, 20L, 23L, 12L, 8L,
8L, 37L, 18L, 14L, 7L, 9L, 7L)), .Names = c("User", "Condition",
"Scenario", "Trial1", "Trial2"), class = "data.frame", row.names = c(NA,
-20L))
You could try using interaction to combine two of your factors and plot against a third. For example, assuming you want to fill by condition as in your original code:
library(tidyr)
completionTime %>%
gather(trial, value, -Scenario, -Condition, -User) %>%
ggplot(aes(interaction(Scenario, trial), value)) + geom_boxplot(aes(fill = Condition))
Result:

Resources