Related
I have a dataframe that looks like the following.
consistent admire trust judge
3 3 2 4
5 1 3 6
2 4 5 1
I can run the regressions I need simultaneously using the following code. In the actual dataset, there are many more than 3 variables.
variables <- c("admire", "trust", "judge")
form <- paste("consistent ~ ",variables,"")
model <- form %>%
set_names(variables) %>%
map(~lm(as.formula(.x), data = df))
map(model, summary)
This yields the output for the 3 following regressions.
summary(lm(consistent ~ admire, df))
summary(lm(consistent ~ trust, df))
summary(lm(consistent ~ judge, df))
I would like a list of the variables with significant p-values at p < 0.05. For example, if "admire" was significant and "judge" was significant, the output I am looking for would be something like:
admire, judge
Is there a way to do this that allows me to also run several regressions simultaneously? This question offers a similar answer, but I don't know how to apply it when I have several regressions.
Data:
structure(list(consistent = c(1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0,
1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0,
1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1,
0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1,
1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1,
1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0,
1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1,
1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1,
0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0,
0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0,
1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0,
1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0,
0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1,
1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,
0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1,
1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0,
1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0,
1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1,
0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0), admire = c(7,
3, 1, 1, 3, 5, 5, 6, 7, 1, 4, 2, 5, 3, 3, 1, 3, 1, 2, 1, 5, 5,
3, 1, 5, 3, 5, 4, 5, 1, 6, 1, 6, 2, 1, 4, 1, 1, 3, 2, 1, 5, 1,
7, 1, 4, 1, 4, 2, 2, 4, 2, 4, 1, 5, 5, 1, 2, 6, 6, 1, 1, 3, 5,
5, 1, 5, 7, 2, 4, 5, 1, 4, 4, 3, 5, 6, 1, 5, 2, 1, 5, 6, 2, 3,
3, 5, 6, 1, 4, 4, 6, 4, 4, 4, 6, 5, 4, 1, 2, 5, 4, 2, 4, 6, 1,
3, 7, 4, 4, 3, 2, 7, 5, 3, 2, 1, 2, 2, 5, 7, 3, 5, 4, 6, 2, 2,
4, 4, 5, 5, 1, 5, 6, 1, 2, 4, 7, 1, 4, 5, 4, 2, 4, 1, 4, 3, 4,
7, 5, 6, 3, 1, 1, 7, 1, 6, 4, 1, 1, 2, 1, 1, 6, 3, 1, 4, 4, 7,
2, 1, 5, 3, 3, 7, 4, 5, 1, 3, 7, 5, 4, 1, 1, 1, 5, 2, 1, 1, 4,
1, 5, 4, 5, 1, 4, 4, 4, 7, 1, 1, 2, 5, 2, 4, 2, 4, 6, 4, 2, 6,
5, 6, 7, 4, 4, 5, 1, 5, 7, 1, 7, 2, 7, 3, 6, 2, 5, 7, 3, 5, 4,
1, 4, 1, 5, 1, 1, 6, 6, 7, 3, 4, 1, 6, 4, 1, 6, 7, 5, 4, 2, 6,
5, 5, 4, 1, 2, 6, 1, 5, 3, 1, 1, 1, 7, 7, 3, 5, 1, 5, 1, 7, 2,
5, 4, 2, 1, 4, 1, 1, 5, 5, 4, 5, 2, 4, 5, 5, 1, 4, 4, 1, 3, 4,
2, 7, 6, 6, 4, 3, 6, 1, 6, 1, 1, 4, 7, 7, 1, 3, 1, 4, 2, 2, 6,
1, 2, 1, 1, 1, 4, 2, 5, 4, 1, 4, 2, 5, 5, 2, 1, 6, 1, 2, 3, 4,
1, 7, 2, 2, 4, 5, 1, 6, 2, 5, 1, 5, 6, 2, 5, 1, 1, 7, 4, 5, 6,
1, 4, 5, 2, 4, 4, 6, 4, 4, 2, 6, 1, 1, 2, 6, 1, 3, 5, 5, 3, 7,
5, 6, 4, 3, 4, 7, 5, 4, 2, 1, 5, 7, 2, 6, 3, 1, 2, 4, 3, 5, 4,
1, 6, 1, 3, 1, 1, 1, 4, 3, 3, 1, 1, 1, 6, 4, 1, 1, 1, 1, 4, 1,
6, 4, 4, 4, 4, 1, 5, 2, 4, 5, 4, 4, 3, 3, 6, 7, 3, 2, 4, 2, 5,
1, 4, 5, 4, 1, 2, 4, 1), trust = c(7, 4, 2, 2, 3, 4, 6, 6, 7,
1, 4, 5, 5, 4, 1, 1, 2, 2, 1, 1, 6, 6, 4, 1, 3, 6, 5, 4, 6, 1,
5, 1, 6, 1, 2, 5, 1, 1, 4, 1, 1, 5, 1, 7, 1, 4, 4, 5, 3, 4, 5,
3, 5, 2, 6, 5, 3, 2, 6, 6, 1, 1, 3, 5, 5, 1, 5, 7, 2, 4, 6, 1,
4, 4, 4, 6, 6, 3, 5, 6, 1, 6, 5, 2, 2, 2, 5, 7, 1, 5, 3, 7, 3,
5, 4, 6, 6, 5, 2, 1, 6, 5, 2, 6, 5, 1, 2, 7, 6, 5, 3, 3, 4, 7,
4, 2, 1, 3, 4, 7, 6, 2, 6, 5, 7, 3, 2, 4, 5, 5, 5, 1, 2, 7, 1,
1, 5, 4, 1, 4, 6, 6, 2, 4, 2, 4, 1, 5, 7, 6, 7, 3, 2, 1, 7, 1,
4, 4, 1, 2, 4, 1, 1, 6, 3, 1, 4, 3, 7, 2, 2, 6, 4, 5, 7, 5, 7,
2, 4, 7, 4, 3, 1, 1, 1, 5, 2, 4, 1, 4, 1, 5, 4, 5, 1, 6, 5, 4,
6, 1, 1, 2, 6, 2, 4, 4, 4, 5, 6, 1, 5, 5, 5, 6, 4, 4, 5, 5, 6,
7, 1, 7, 3, 7, 5, 6, 3, 5, 7, 4, 5, 4, 2, 3, 1, 4, 5, 1, 5, 4,
7, 3, 5, 1, 6, 6, 1, 4, 6, 5, 4, 3, 7, 6, 5, 4, 1, 1, 6, 1, 5,
3, 1, 1, 1, 7, 7, 3, 4, 1, 4, 1, 7, 2, 4, 2, 2, 2, 4, 1, 1, 5,
4, 6, 5, 2, 4, 5, 4, 1, 6, 4, 1, 4, 4, 3, 7, 5, 6, 4, 4, 6, 2,
6, 1, 2, 4, 7, 7, 1, 1, 1, 4, 2, 2, 6, 2, 4, 1, 2, 1, 6, 2, 6,
4, 1, 6, 3, 5, 4, 3, 1, 6, 1, 2, 3, 5, 1, 6, 1, 3, 4, 5, 2, 6,
2, 5, 1, 3, 7, 1, 4, 1, 1, 7, 5, 6, 5, 1, 5, 5, 1, 4, 3, 7, 4,
4, 1, 7, 1, 1, 4, 6, 1, 4, 5, 5, 4, 7, 6, 7, 4, 4, 4, 4, 4, 4,
1, 1, 5, 6, 2, 7, 4, 2, 4, 5, 4, 5, 4, 1, 5, 1, 2, 1, 1, 4, 4,
3, 4, 3, 1, 2, 6, 5, 1, 1, 1, 2, 4, 1, 7, 4, 4, 5, 6, 2, 5, 3,
4, 5, 4, 4, 3, 3, 6, 7, 4, 4, 3, 2, 5, 1, 5, 5, 5, 2, 2, 3, 1
), judge = c(1, 5, 6, 3, 6, 3, 4, 5, 4, 1, 3, 2, 3, 2, 4, 3,
4, 2, 5, 4, 3, 3, 4, 4, 7, 5, 4, 4, 1, 3, 6, 2, 3, 2, 5, 2, 3,
4, 2, 4, 4, 3, 4, 4, 1, 4, 1, 2, 3, 1, 2, 2, 3, 5, 3, 5, 5, 3,
1, 4, 4, 2, 5, 4, 3, 1, 5, 4, 4, 5, 2, 2, 2, 7, 3, 3, 1, 1, 5,
3, 3, 1, 2, 5, 2, 3, 5, 4, 3, 4, 3, 2, 1, 3, 4, 4, 5, 5, 3, 2,
2, 3, 2, 4, 1, 1, 4, 2, 2, 3, 3, 2, 4, 4, 6, 1, 7, 4, 2, 3, 4,
1, 2, 4, 4, 5, 2, 1, 3, 2, 2, 1, 1, 7, 2, 3, 5, 5, 1, 2, 2, 5,
6, 5, 1, 1, 1, 4, 1, 5, 4, 3, 6, 1, 4, 1, 3, 4, 6, 1, 2, 4, 3,
3, 4, 7, 1, 3, 1, 2, 2, 3, 2, 3, 5, 3, 4, 2, 6, 3, 1, 1, 1, 1,
4, 2, 2, 4, 4, 5, 4, 2, 1, 6, 7, 5, 2, 2, 4, 5, 6, 1, 5, 2, 4,
5, 5, 2, 2, 3, 4, 5, 2, 2, 4, 1, 3, 4, 4, 4, 2, 3, 1, 4, 4, 3,
2, 3, 1, 4, 2, 4, 4, 1, 5, 4, 4, 4, 4, 6, 1, 3, 5, 7, 2, 6, 1,
5, 7, 5, 4, 2, 3, 6, 3, 1, 1, 2, 2, 5, 5, 2, 5, 4, 4, 5, 4, 4,
3, 7, 4, 4, 4, 2, 5, 3, 6, 5, 4, 4, 4, 6, 4, 5, 5, 1, 5, 2, 6,
4, 4, 1, 1, 4, 6, 1, 7, 1, 5, 2, 5, 4, 2, 3, 2, 6, 3, 2, 2, 1,
1, 5, 4, 1, 1, 4, 1, 5, 1, 4, 3, 2, 3, 4, 1, 6, 1, 2, 1, 3, 5,
5, 2, 1, 3, 4, 2, 4, 5, 4, 6, 3, 4, 6, 7, 6, 2, 4, 6, 2, 4, 5,
1, 4, 1, 3, 2, 4, 1, 6, 4, 3, 1, 3, 4, 5, 1, 6, 1, 5, 1, 3, 3,
1, 3, 4, 2, 4, 1, 1, 2, 2, 2, 3, 1, 6, 5, 4, 1, 7, 5, 6, 5, 2,
3, 5, 4, 3, 4, 5, 7, 1, 5, 2, 5, 1, 3, 4, 3, 5, 1, 4, 2, 3, 4,
1, 7, 5, 5, 2, 1, 2, 5, 6, 5, 5, 3, 1, 3, 1, 4, 1, 5, 2, 3, 5,
6, 4, 4, 3, 2, 4, 1, 3, 4, 3, 4, 4, 1, 5)), row.names = c(NA,
-450L), class = c("tbl_df", "tbl", "data.frame"))
To fit many many simple linear regression models, I recommend Fast pairwise simple linear regression between variables in a data frame. Hmm... looks like I need to collect those functions in an R package...
## suppose your data frame is `df`
## response variable (LHS) in column 1
## independent variable (RHS) in other columns
out <- general_paired_simpleLM(df[1], df[-1])
# LHS RHS alpha beta beta.se beta.tv beta.pv
#1 consistent admire -0.1458455 0.18754326 0.008324192 22.529906 1.040756e-75
#2 consistent trust -0.2211250 0.19565589 0.007721387 25.339475 1.531499e-88
#3 consistent judge 0.3484851 0.04824981 0.014182420 3.402086 7.287372e-04
# sig R2 F.fv F.pv
#1 0.3430602 0.53118295 507.59665 1.040756e-75
#2 0.3212008 0.58902439 642.08902 1.531499e-88
#3 0.4946862 0.02518459 11.57419 7.287372e-04
To get what you want:
with(out, RHS[beta.pv < 0.05])
#[1] "admire" "trust" "judge"
I want to bootstrap my dataset for multiple regression. Unfortunately I get this error message:
"number of items to replace is not a multiple of replacement length"
I suspect that the factors in my regression formula may be problematic.
What could I do to solve my problem?
My code is as following (I read Andy Field´s Discovering Statistics using R):
BootReg <- function(data, indices, formula) {
d <- data[indices,]
fit <- lm(formula, data=d)
return(coef(fit))
}
bootResults <-boot(statistic = BootReg, formula = TICS_Skala1 ~HSPhoch + HSPhoch*extra.c
+ psy + sex + age.c, data = mod.reg.data, R = 2000)
psy (psychiatric disease), sex and HSPhoch (high sensory-processing sensitivity) are factors. TICS_Skala1, extra.c, age.c are continuos variables.
my sample data:
> dput(head(mod.reg.data, 20))
structure(list(neo_01 = c(3, 4, 3, 0, 4, 4, 3, 2, 3, 1, 4, 2,
3, 3, 1, 2, 3, 4, 0, 2), neo_03 = c(1, 1, 1, 3, 1, 2, 0, 0, 0,
0, 0, 0, 1, 3, 1, 1, 1, 1, 3, 1), neo_04 = c(2, 4, 3, 0, 4, 3,
4, 3, 2, 3, 3, 3, 3, 4, 2, 4, 3, 4, 3, 3), neo_08 = c(3, 0, 1,
2, 3, 3, 4, 3, 2, 1, 2, 4, 0, 3, 1, 1, 3, 1, 3, 1), neo_12 = c(3,
1, 1, 2, 2, 2, 4, 1, 1, 2, 1, 4, 1, 3, 1, 1, 3, 2, 3, 2), neo_13 = c(3,
2, 2, 4, 3, 3, 3, 2, 2, 1, 2, 3, 0, 3, 1, 0, 2, 3, 0, 2), neo_16 = c(3,
1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 3, 0, 2, 0, 0, 0, 0, 2, 1), neo_17 = c(2,
1, 3, 0, 1, 1, 1, 4, 3, 1, 2, 2, 2, 3, 1, 0, 2, 0, 2, 2), neo_18 = c(2,
3, 4, 0, 4, 3, 4, 3, 3, 1, 3, 2, 4, 2, 3, 4, 3, 4, 2, 2), neo_21 = c(3,
0, 1, 2, 1, 2, 1, 1, 1, 1, 1, 3, 0, 4, 1, 0, 0, 0, 4, 1), neo_26 = c(3,
0, 0, 0, 2, 1, 3, 0, 1, 1, 0, 2, 3, 3, 0, 0, 1, 1, 4, 1), neo_27 = c(3,
3, 4, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 4, 3, 3, 3, 3, 2, 2), TICS_1 = c(3,
0, 3, 2, 2, 1, 3, 3, 1, 2, 0, 4, 2, 3, 2, 3, 4, 1, 3, 2), TICS_2 = c(3,
1, 1, 1, 1, 2, 0, 0, 0, 0, 0, 4, 3, 1, 1, 1, 2, 1, 2, 1), TICS_3 = c(2,
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 3, 1, 2, 0, 1, 1, 0, 1, 0), TICS_4 = c(2,
0, 2, 0, 1, 2, 1, 3, 0, 0, 0, 4, 1, 2, 1, 2, 1, 1, 2, 2), TICS_5 = c(2,
3, 2, 1, 2, 2, 2, 2, 0, 2, 1, 2, 2, 2, 2, 1, 1, 1, 2, 1), TICS_6 = c(3,
2, 2, 4, 2, 2, 1, 3, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 1, 2), TICS_7 = c(3,
3, 2, 2, 2, 2, 0, 3, 1, 2, 1, 4, 2, 0, 2, 1, 4, 1, 0, 1), TICS_8 =c(NA,
NA, NA, NA, NA, NA, NA, NA, 1, 1, 0, 4, 3, 1, 1, 3, 3, 2, 1,
2), TICS_9 = c(NA, NA, NA, NA, NA, NA, NA, NA, 0, 3, 2, 2, 1,
3, 0, 1, 3, 1, 1, 2), TICS_10 = c(2, 2, 0, 0, 2, 3, 0, 2, 1,
1, 2, 2, 1, 0, 0, 1, 1, 2, 2, 1), TICS_11 = c(1, 2, 1, 0, 1,
1, 0, 0, 0, 0, 2, 4, 1, 0, 0, 0, 0, 1, 1, 0), TICS_12 = c(2,
2, 1, 0, 1, 1, 1, 3, 1, 1, 1, 4, 2, 2, 2, 3, 3, 1, 2, 3), TICS_13=
c(1, 1, 3, 0, 2, 3, 2, 1, 1, 2, 1, 2, 2, 3, 2, 2, 1, 2, 2, 2),
TICS_14= c(4, 1, 1, 0, 1, 1, 3, 4, 0, 2, 0, 4, 2, 3, 0, 1, 3, 1, 1,
1), TICS_15= c(3, 1, 1, 3, 0, 2, 0, 2, 0, 2, 1, 2, 0, 1, 1, 1, 0, 0,
0, 1), ICS_16= c(4, 2, 1, 3, 3, 2, 1, 2, 1, 1, 1, 3, 1, 3, 1, 2, 3,
1, 2, 1), TICS_17= c(3, 0, 2, 2, 1, 2, 2, 3, 0, 1, 1, 2, 1, 2, 2, 3,
1, 1, 1, 2), TICS_18= c(3, 0, 1, 2, 0, 1, 1, 0, 0, 1, 0, 4, 2, 2, 0,
0, 1, 0, 2, 0), TICS_19= c(4, 2, 2, 2, 2, 2, 0, 2, 1, 2, 1, 4, 3, 2,
1, 1, 1, 0, 1, 2), TICS_20= c(2, 0, 2, 0, 0, 0, 1, 0, 1, 1, 0, 4, 1,
1, 0, 0, 1, 0, 2, 0), TICS_21= c(2, 1, 1, 0, 2, 3, 0, 1, 0, 1, 3, 2,
2, 1, 2, 1, 1, 1, 3, 0), TICS_22= c(3, 0, 1, 2, 2, 3, 1, 4, 0, 1, 1,
2, 3, 1, 1, 2, 3, 2, 0, 3), TICS_24= c(2, 0, 0, 1, 0, 0, 2, 0, 1, 1,
0, 2, 0, 0, 0, 1, 1, 0, 0, 1), TICS_25= c(4, 0, 1, 2, 2, 2, 4, 2, 1,
1, 0, 3, 0, 2, 0, 1, 2, 1, 2, 1), TICS_26= c(3, 0, 2, 2, 0, 1, 1, 0,
0, 1, 0, 2, 0, 2, 0, 0, 0, 0, 0, 1), TICS_27= c(3,
1, 4, 2, 3, 3, 4, 4, 0, 1, 0, 3, 2, 3, 2, 3, 2, 2, 4, 3), TICS_28=
c(3, 2, 2, 1, 1, 2, 1, 2, 1, 1, 0, 4, 1, 2, 1, 0, 1, 0, 0, 2),
TICS_29= c(2, 0, 1, 0, 2, 2, 1, 0, 1, 0, 0, 4, 1, 1, 0, 1, 0, 0, 1,
1), TICS_30= c(2, 1, 3, 1, 2, 2, 1, 0, 1, 1, 1, 3, 2, 0, 1, 0, 1, 2,
2, 2), TICS_31= c(2, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 3, 2, 1, 0, 0, 1,
0, 2, 1), TICS_32= c(4, 1, 1, 0, 1, 2, 1, 4, 0, 3, 0, 3, 3, 2, 1, 2,
2, 2, 3, 3), TICS_33= c(2,
1, 0, 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 2, 0, 0, 0, 1, 1, 1), TICS_34=
c(1, 3, 0, 0, 2, 1, 1, 1, 0, 0, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0),
TICS_35= c(1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 2, 0, 1, 0, 1, 1, 0, 4,
1), TICS_36= c(4, 1, 2, 3, 3, 2, 4, 1, 0, 1, 2, 3, 1, 3, 0, 1, 1, 0,
2, 1), TICS_37= c(1, 1, 2, 0, 2, 3, 3, 0, 1, 2, 1, 2, 1, 0, 2, 2, 1,
1, 2, 1), TICS_38= c(3, 0, 3, 1, 2, 2, 2, 3, 0, 2, 0, 4, 0, 2, 1, 2,
2, 1, 1, 2), TICS_39= c(1, 1, 2, 2, 3, 1, 1, 2, 1, 1, 1, 4, 1, 1, 1,
1, 3, 0, 0, 3), TICS_40= c(2, 0, 2, 0, 3, 2, 1, 2, 0, 0, 0, 3, 2, 2,
0, 1, 2, 0, 0, 1), TICS_41= c(2, 2, 0, 0, 2, 3, 1, 1, 0, 1, 3, 1, 2,
0, 1, 0, 0, 1, 2, 0), TICS_42= c(1, 2, 0, 0, 2, 1, 0, 0, 0, 1, 1, 2,
1, 1, 1, 0, 0, 0, 0, 0), TICS_43= c(4,
1, 1, 2, 2, 3, 3, 3, 0, 2, 1, 4, 3, 2, 1, 1, 3, 1, 2, 3), TICS_44=
c(3, 0, 2, 1, 2, 2, 3, 3, 0, 1, 0, 4, 1, 3, 0, 2, 2, 1, 3, 1),
TICS_45= c(2,
0, 1, 2, 0, 1, 0, 2, 0, 1, 0, 2, 0, 2, 0, 0, 0, 0, 0, 1), TICS_46=
c(2, 1, 0, 1, 2, 2, 1, 0, 0, 3, 1, 4, 3, 1, 1, 0, 1, 1, 2, 1),
TICS_47= c(3,
1, 2, 1, 2, 2, 1, 1, 1, 2, 0, 3, 1, 2, 1, 2, 1, 1, 4, 1), TICS_48=
c(1,
2, 3, 1, 2, 3, 1, 1, 0, 2, 2, 4, 2, 3, 2, 2, 1, 0, 2, 0), TICS_49=
c(1,
3, 2, 2, 1, 2, 2, 1, 0, 1, 1, 4, 3, 0, 1, 2, 4, 1, 0, 3), TICS_50=
c(3,
0, 3, 1, 1, 2, 4, 3, 0, 2, 0, 4, 2, 3, 2, 2, 2, 2, 2, 3), TICS_51=
c(1,
2, 0, 0, 2, 1, 0, 0, 0, 0, 1, 2, 1, 0, 1, 0, 0, 0, 0, 0), TICS_52=
c(2,
1, 3, 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 3, 0, 0, 0, 0, 0, 1), TICS_53=
c(2,
2, 2, 0, 2, 3, 1, 1, 0, 2, 2, 3, 2, 2, 2, 1, 1, 1, 2, 1), TICS_54=
c(3,
0, 3, 2, 2, 2, 3, 3, 1, 2, 0, 4, 0, 2, 0, 2, 2, 0, 2, 1), TICS_55=
c(2,
0, 0, 1, 0, 1, 2, 0, 0, 1, 0, 4, 0, 1, 0, 1, 1, 0, 2, 0), TICS_56=
c(4,
3, 1, 0, 2, 0, 0, 0, 1, 0, 1, 2, 1, 1, 1, 0, 0, 0, 2, 0), TICS_57=
c(2,
1, 1, 0, 2, 1, 0, 0, 1, 1, 1, 4, 3, 0, 0, 1, 1, 0, 0, 2), HSPS_1 =
c(3,
4, 3, 3, 4, 2, 4, 2, 4, 2, 3, 4, 2, 2, 4, 2, 3, 3, 5, 2), HSPS_2 =
c(4,
4, 3, 5, 5, 3, 2, 4, 5, 5, 3, 4, 3, 4, 4, 2, 4, 3, 4, 3), HSPS_3 =
c(4,
4, 4, 3, 3, 4, 3, 3, 3, 3, 3, 5, 3, 4, 5, 3, 3, 3, 4, 2), HSPS_4 =
c(4,
2, 1, 4, 2, 3, 5, 3, 5, 2, 3, 3, 3, 4, 3, 3, 4, 2, 5, 2), HSPS_5 =
c(2,
2, 2, 4, 3, 3, 3, 1, 4, 3, 3, 4, 3, 2, 4, 3, 4, 3, 5, 1), HSPS_6 =
c(4,
3, 1, 3, 4, 3, 3, 3, 3, 2, 1, 1, 1, 3, 5, 3, 3, 1, 1, 2), HSPS_7 =
c(4,
3, 1, 3, 4, 2, 3, 1, 4, 3, 2, 4, 1, 1, 5, 3, 3, 1, 5, 1), HSPS_8 =
c(4,
3, 5, 5, 4, 5, 5, 3, 4, 4, 3, 3, 2, 4, 4, 3, 4, 3, 3, 3), HSPS_9 =
c(3,
2, 2, 5, 3, 3, 4, 1, 5, 2, 2, 4, 1, 2, 4, 4, 3, 1, 5, 2), HSPS_10=
c(4,
4, 5, 4, 4, 4, 3, 1, 4, 3, 3, 4, 2, 1, 5, 3, 4, 4, 3, 2), HSPS_11=
c(3,
2, 2, 3, 2, 2, 3, 1, 3, 2, 4, 5, 1, 3, 3, 3, 3, 2, 3, 2), HSPS_12=
c(4,
4, 5, 5, 4, 5, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 4, 4, 5, 4), HSPS_13=
c(3,
2, 3, 2, 2, 2, 5, 2, 3, 2, 3, 4, 3, 3, 3, 3, 4, 2, 5, 2), HSPS_14=
c(3,
2, 2, 3, 3, 3, 5, 3, 3, 2, 3, 3, 2, 3, 2, 3, 3, 2, 4, 2), HSPS_15=
c(4,
4, 2, 3, 4, 3, 3, 3, 4, 2, 3, 3, 5, 2, 4, 2, 3, 3, 3, 2), HSPS_16=
c(2,
2, 1, 5, 2, 3, 2, 2, 3, 3, 3, 5, 2, 3, 3, 3, 2, 2, 5, 2), HSPS_17=
c(4,
3, 4, 5, 3, 4, 4, 2, 4, 3, 5, 4, 4, 4, 5, 4, 5, 2, 5, 4), HSPS_18=
c(2,
2, 1, 2, 1, 2, 2, 1, 3, 2, 2, 5, 2, 1, 4, 3, 2, 1, 5, 1), HSPS_19=
c(3,
2, 2, 4, 2, 2, 3, 1, 4, 2, 2, 4, 1, 1, 4, 3, 2, 2, 5, 2), HSPS_20=
c(4,
4, 4, 3, 4, 3, 5, 3, 3, 3, 4, 3, 3, 4, 4, 3, 5, 3, 5, 2), HSPS_21=
c(3,
3, 4, 5, 3, 3, 5, 2, 4, 2, 3, 5, 4, 4, 3, 2, 3, 2, 5, 2), HSPS_22=
c(3,
5, 5, 4, 5, 4, 3, 2, 4, 3, 3, 5, 3, 2, 4, 2, 4, 3, 5, 2), HSPS_23=
c(2,
2, 1, 4, 2, 3, 4, 3, 3, 2, 2, 5, 3, 3, 3, 3, 3, 2, 5, 3), HSPS_24=
c(3,
2, 2, 3, 3, 3, 3, 2, 4, 2, 3, 5, 4, 2, 4, 4, 4, 3, 4, 2), HSPS_25=
c(3,
2, 2, 5, 3, 3, 5, 1, 4, 2, 3, 5, 3, 2, 4, 3, 3, 2, 5, 2), HSPS_26=
c(2,
1, 1, 3, 3, 3, 3, 2, 3, 2, 2, 5, 2, 2, 3, 3, 3, 2, 5, 2), HSPS_27=
c(2,
2, 1, 4, 3, 2, 3, 4, 3, 1, 4, 1, 1, 3, 4, 2, 3, 2, 5, 3), sex =
structure(c(2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L), .Label = c("m", "w", "d"), class = "factor"), Bildung =
structure(c(6L,
5L, 5L, 6L, 6L, 6L, 5L, 6L, 5L, 6L, 6L, 4L, 6L, 5L, 5L, 6L, 6L,
5L, 5L, 6L), .Label = c("kein", "Haupt", "mittlereR", "Fachabi",
"Abi", "Studium"), class = "factor"), job = structure(c(6L, 2L,
2L, 2L, 2L, 6L, 2L, 6L, 5L, 2L, 2L, 1L, 6L, 2L, 2L, 2L, 6L, 2L,
2L, 6L), .Label = c("hausl", "Student", "Azubi", "Suchend", "Rente",
"berufstaetig"), class = "factor"), age = c(23, 24, 21, 70, 25,
29, 22, 25, 57, 24, 25, 30, 31, 20, 28, 27, 26, 21, 24, 53),
VPN = 1:20, consent = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label =
c("ja",
"nein"), class = "factor"), psy = c(0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0), HSPS = c(86, 75,
69, 102, 85, 82, 97, 59, 100, 68, 80, 106, 68, 73, 105, 79,
91, 63, 119, 59), neuro = c(16, 3, 4, 10, 10, 11, 12, 5,
5, 5, 5, 16, 5, 18, 4, 3, 8, 5, 19, 7), extra = c(15, 17,
19, 7, 19, 17, 18, 17, 16, 10, 17, 14, 15, 19, 11, 13, 16,
18, 9, 13), TICS_Skala1 = c(23, 1, 22, 11, 14, 16, 22, 25,
2, 11, 1, 29, 9, 20, 10, 19, 16, 9, 18, 16), TICS_Skala2 = c(14,
12, 11, 9, 11, 10, 4, 10, 5, 8, 5, 24, 13, 5, 6, 6, 14, 2,
1, 13), TICS_Skala3 = c(21, 6, 10, 5, 12, 14, 11, 20, 3,
11, 4, 27, 20, 13, 7, 13, 20, 11, 11, 18), TICS_Skala4 = c(13,
14, 13, 2, 16, 23, 10, 9, 3, 13, 15, 18, 14, 11, 13, 10,
7, 9, 17, 6), TICS_Skala5 = c(12, 2, 6, 5, 3, 5, 8, 3, 4,
6, 0, 18, 3, 7, 1, 6, 6, 1, 13, 3), TICS_Skala6 = c(10, 2,
3, 4, 4, 6, 3, 0, 0, 5, 2, 15, 10, 5, 2, 1, 5, 2, 8, 3),
TICS_Skala7 = c(15, 5, 9, 13, 4, 8, 4, 9, 1, 6, 2, 11, 2,
12, 3, 2, 1, 3, 2, 7), TICS_Skala8 = c(8, 10, 3, 0, 11, 7,
2, 1, 2, 2, 7, 20, 7, 2, 2, 2, 1, 1, 2, 3), TICS_Skala9 = c(12,
3, 4, 8, 8, 6, 9, 5, 2, 6, 5, 11, 3, 11, 1, 5, 9, 3, 7, 5
), TICS_Skala10 = c(32, 5, 18, 16, 19, 18, 21, 16, 5, 17,
7, 39, 12, 24, 3, 15, 20, 6, 25, 14), neuro.c = c(6.08921933085502,
-6.91078066914498, -5.91078066914498, 0.089219330855018,
0.089219330855018, 1.08921933085502, 2.08921933085502,
-4.91078066914498,
-4.91078066914498, -4.91078066914498, -4.91078066914498,
6.08921933085502, -4.91078066914498, 8.08921933085502,
-5.91078066914498,
-6.91078066914498, -1.91078066914498, -4.91078066914498,
9.08921933085502, -2.91078066914498), extra.c = c(5.21003717472119,
7.21003717472119, 9.21003717472119, -2.78996282527881,
9.21003717472119,
7.21003717472119, 8.21003717472119, 7.21003717472119,
6.21003717472119,
0.21003717472119, 7.21003717472119, 4.21003717472119,
5.21003717472119,
9.21003717472119, 1.21003717472119, 3.21003717472119,
6.21003717472119,
8.21003717472119, -0.78996282527881, 3.21003717472119), age.c =
c(-15.4460966542751,
-14.4460966542751, -17.4460966542751, 31.5539033457249,
-13.4460966542751,
-9.4460966542751, -16.4460966542751, -13.4460966542751,
18.5539033457249,
-14.4460966542751, -13.4460966542751, -8.4460966542751,
-7.4460966542751,
-18.4460966542751, -10.4460966542751, -11.4460966542751,
-12.4460966542751, -17.4460966542751, -14.4460966542751,
14.5539033457249), HSP.c = c(-1.92936802973978, -12.9293680297398,
-18.9293680297398, 14.0706319702602, -2.92936802973978,
-5.92936802973978,
9.07063197026022, -28.9293680297398, 12.0706319702602,
-19.9293680297398,
-7.92936802973978, 18.0706319702602, -19.9293680297398,
-14.9293680297398,
17.0706319702602, -8.92936802973978, 3.07063197026022,
-24.9293680297398,
31.0706319702602, -28.9293680297398), HSPhoch = c(1, 0, 0,
1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0)), row.names =
c(NA, 20L), class = "data.frame")
I have a dataset of chess positions made up of 100 groups, with each group taking one of 50 positions ("Position_number") and one of two colours ("stm_white"). I want to run a linear regression for each Position_number subsample, where stm_white is the explanatory variable and stm_perform is the outcome variable. Then, I want to display the coefficient of stm_white and the associated confidence interval for each regression in a forest plot. The idea is to be able to easily see which Position_number subsample gives significant coefficients for stm_white, and to compare coefficients across positions. For example, the plot would have 50 y-axis categories labelled with each position number, the x-axis would represent the coefficient range, and the plot would display a horizontal confidence bar for each position number.
Where I'm stuck:
Getting the confidence interval bounds for each regression
Plotting each of the 50 coefficients (with confidence intervals) on one plot. (I think this is called a forest plot?)
This is how I current get a list of the coefficients for each regression:
fits <- by(df, df[,"Position_number"],
function(x) lm(stm_perform ~ stm_white, data = x))
# Combine coefficients from each model
do.call("rbind", lapply(fits, coef))
And here is a sample of 10 positions (apologies if there's a better way to show reproducible data):
>dput(droplevels(dfMWE[,c("Position_number","stm_white","stm_perform")]))
structure(list(Position_number = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10), stm_white = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1), stm_perform = c(0.224847134350316, -0.252000458803946,
0.263005239459311, -0.337712202569111, 0.525880930891169, -0.5,
0.514387184165999, 0.520136722035817, -0.471249436107731, -0.557311633762293,
-0.382774969095054, -0.256365477992672, -0.592466230584332, 0.420100239642119,
0.35728693116738, -0.239203909010858, 0.492804918290949, -0.377349804212738,
0.498560888290847, 0.650604627933873, 0.244481117928803, 0.225852022298169,
0.448376452689039, 0.305090287270497, 0.275461757157464, 0.0232950364735793,
-0.117225030904946, 0.103523492101814, 0.098301745397805, 0.435599509759579,
-0.323024628921732, -0.790798102797238, 0.326223812111678, -0.331305043692668,
0.300230596737942, -0.340292005855252, 0.196181480575316, -0.0606495585093978,
0.789844179758131, -0.0862623926308338, -0.560150145231903, 0.697345078589853,
-0.425719796345476, 0.65321716721887, -0.878090073942596, 0.393712176214572,
0.636076899687882, 0.530184680003902, -0.567228844342952, 0.767024918145021,
-0.207303615824231, -0.332581578126777, -0.511510891217792, 0.227871326531416,
-0.0140876421179904, -0.891010911045765, -0.617225030904946,
-0.335142021445235, -0.517262524432376, 0.676301669492737, 0.375998241382333,
-0.0882899718631629, -0.154706189382, -0.108431333126633, 0.204584592662721,
0.475554538879339, 0.0840205872617279, -0.403370826694226, -0.74253555894307,
0.182570385474772, -0.484175014735265, -0.332581578126777, -0.427127748605496,
0.474119069108831, -0.0668284645696687, -0.0262098994728823,
-0.255269593134965, -0.313699742316688, -0.485612815834001, 0.302654921410147,
-0.425719796345476, 0.65321716721887, 0.393712176214572, 0.60766106412682,
0.530184680003902, 0.384135895746244, 0.564400490240421, 0.767024918145021,
0.702182602090521, 0.518699777929559, -0.281243170101218, -0.283576305897061,
0.349395372066127, -0.596629173305774, 0.0849108889395813, -0.264122555898524,
0.593855385236178, -0.418698521631085, 0.269754586702576, -0.719919005947152,
0.510072446927438, -0.0728722513945044, -0.0849108889395813,
0.0650557537775339, 0.063669188530584, -0.527315973006493, -0.716423694102939,
-0.518699777929559, 0.349395372066127, -0.518699777929559, 0.420100239642119,
-0.361262250888275, 0.431358608116332, 0.104596852632671, 0.198558626418023,
0.753386077785615, 0.418698521631085, -0.492804918290949, -0.636076899687882,
-0.294218640287997, 0.617225030904946, -0.333860575416878, -0.544494573083008,
-0.738109032540419, -0.192575818328721, -0.442688366237707, 0.455505426916992,
0.13344335621046, 0.116471711943561, 0.836830966002895, -0.125024693001636,
0.400603203290743, -0.363923100312118, -0.157741327529574, -0.281243170101218,
-0.326223812111678, -0.548774335859742, 0.104058949158278, -0.618584122089031,
-0.148779202375097, -0.543066492022212, -0.790798102797238, -0.541637702714763,
0.166337530816562, -0.431358608116332, -0.471249436107731, -0.531618297828107,
-0.135452994588696, 0.444109038883147, -0.309993792719686, 0.472684026993507,
-0.672509643334985, -0.455505426916992, -0.0304828450187082,
-0.668694956307332, 0.213036720610531, -0.370611452782498, -0.100361684849949,
-0.167940159469667, -0.256580594295053, 0.41031649686005, 0.544494573083008,
-0.675040201040299, 0.683816314193659, 0.397841906825283, 0.384135895746244,
0.634743335052317, 0.518699777929559, -0.598013765769344, -0.524445461120661,
-0.613136820153143, 0.12949974225673, -0.337712202569111, -0.189904841395243,
0.588289971863163, 0.434184796930767, -0.703385003471829, 0.505756208411145,
0.445530625978324, -0.167137309739621, 0.437015271896404, -0.550199353253537,
-0.489927553072562, -0.791748837508184, 0.434184796930767, 0.264122555898524,
-0.282408276808469, -0.574280203654524, 0.167940159469667, -0.439849854768097,
-0.604912902007957, 0.420100239642119, 0.35728693116738, 0.239220254140668,
-0.276612130560829, -0.25746444105693, 0.593855385236178, -0.632070012100074,
0.314483587504712, 0.650604627933873, -0.226860086923233, -0.702182602090521,
0.25746444105693, -0.174474012638818, 0.0166045907672774, 0.535915926945102,
0.141635395826102, 0.420100239642119, 0.557311633762293, 0.593855385236178,
0.6961287704296, 0.0444945730830079, -0.234005329233511, 0.448376452689039,
-0.86655664378954, 0.22107824319756, 0.148051654147426, 0.543066492022212,
-0.448376452689039, 0.373300918333268)), row.names = c(NA, -220L
), groups = structure(list(Position_number = c(0, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10), .rows = structure(list(1:20, 21:40, 41:60,
61:80, 81:100, 101:120, 121:140, 141:160, 161:180, 181:200,
201:220), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 11L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
confint() can get you the confidence interval of a model.
forestplot() from the forestplot R package can make you a forest plot.
library(dplyr)
library(forestplot)
results <- lapply(unique(df$Position_number), function(pos) {
fit = filter(df, Position_number == pos) %>%
lm(data = ., stm_perform ~ stm_white)
stm_white_lm_index = 2 # the second term in lm() output is "stm_white"
coefficient = coef(fit)[stm_white_lm_index]
lb = confint(fit)[stm_white_lm_index,1] # lower bound confidence
ub = confint(fit)[stm_white_lm_index,2] # upper bound confidence
output = data.frame(Position_number = pos, coefficient, lb, ub)
return(output)
}) %>% bind_rows() # bind_rows() combines output from each model in the list
with(results, forestplot(Position_number, coefficient, lb, ub))
The forest plot shows the "Position_number" labels on the left and the regression coefficients of "stm_white" with the 95% confidence intervals plotted. You can further customize the plot. See forestplot::forestplot() or this introduction by Max Gordon for details.
I want to split a percentage histogram (that integrates to 100%) into two facets using facet_grid. However, when splitting to facets, each facet by itself doesn't integrate to 100%. This kind of question has been resolved here in the past, but I cannot translate that solution to my current situation where x is a factor, and thus a histogram using stat(density) doesn't work.
My Data
Dataframe with two columns. equipment denotes whether a household has enough equipment for homeschooling, and children_n denotes number of children.
library(tidyverse)
library(magrittr)
df <-
structure(list(equipment = c(1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1,
0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0,
1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0,
1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,
1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0,
0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1,
0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1,
1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0,
1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1,
1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1,
0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1,
0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0,
1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0,
0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0,
1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1,
1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1,
1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1,
0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1,
1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1,
1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1,
1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0,
0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1,
1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1,
1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0,
1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0,
1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1,
0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0,
0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1,
0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0,
0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0,
1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1,
0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0,
0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1,
1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1,
1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1,
1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,
1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1,
1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1), children_n = c(4,
4, 2, 2, 2, 1, 1, 3, 2, 3, 3, 7, 3, 2, 1, 2, 1, 1, 3, 3, 3, 2,
3, 3, 3, 2, 4, 3, 1, 2, 3, 4, 4, 1, 2, 5, 2, 8, 1, 2, 1, 2, 2,
3, 4, 3, 3, 3, 3, 2, 3, 2, 2, 4, 3, 3, 3, 4, 3, 1, 1, 2, 1, 1,
2, 1, 3, 3, 2, 3, 3, 3, 4, 2, 2, 2, 3, 5, 2, 2, 2, 2, 1, 2, 4,
3, 4, 3, 3, 1, 2, 3, 3, 3, 2, 4, 4, 3, 1, 3, 2, 2, 2, 3, 1, 1,
1, 3, 1, 2, 2, 2, 3, 6, 3, 2, 2, 6, 3, 4, 3, 2, 3, 3, 2, 2, 2,
3, 2, 3, 3, 6, 3, 1, 4, 3, 4, 9, 1, 1, 3, 4, 2, 2, 1, 2, 3, 1,
3, 3, 6, 4, 1, 3, 2, 2, 3, 2, 3, 2, 4, 3, 1, 3, 3, 2, 3, 2, 2,
4, 2, 2, 3, 3, 3, 1, 3, 3, 2, 4, 2, 7, 3, 3, 3, 2, 2, 2, 4, 3,
1, 1, 3, 4, 1, 4, 3, 4, 3, 3, 2, 3, 3, 3, 2, 3, 3, 2, 3, 3, 3,
3, 1, 1, 2, 2, 4, 2, 3, 3, 2, 2, 1, 2, 5, 2, 2, 2, 5, 3, 2, 2,
4, 2, 1, 3, 4, 4, 3, 3, 4, 3, 3, 1, 3, 2, 1, 8, 2, 3, 2, 3, 3,
2, 3, 3, 1, 3, 3, 4, 2, 3, 3, 3, 2, 6, 1, 2, 2, 2, 2, 2, 2, 4,
3, 5, 4, 1, 2, 2, 2, 4, 2, 3, 3, 1, 3, 2, 1, 2, 2, 3, 3, 3, 3,
1, 3, 4, 2, 1, 3, 4, 2, 1, 3, 4, 3, 4, 2, 3, 3, 2, 7, 1, 2, 1,
3, 2, 2, 2, 2, 3, 3, 3, 2, 3, 1, 2, 2, 3, 2, 4, 3, 2, 3, 3, 5,
3, 5, 3, 5, 1, 2, 1, 4, 1, 4, 2, 2, 3, 2, 2, 2, 3, 2, 3, 3, 3,
3, 4, 3, 8, 3, 1, 2, 3, 3, 2, 1, 3, 2, 2, 3, 3, 4, 4, 2, 2, 3,
1, 2, 3, 2, 3, 3, 2, 1, 3, 3, 2, 3, 3, 3, 4, 1, 2, 3, 3, 3, 4,
2, 1, 3, 4, 2, 3, 3, 2, 2, 2, 2, 2, 3, 3, 3, 1, 3, 3, 1, 1, 3,
2, 1, 3, 2, 4, 1, 3, 2, 3, 2, 2, 2, 4, 1, 2, 3, 2, 3, 2, 2, 1,
3, 1, 3, 1, 3, 3, 2, 1, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 3, 4, 1,
2, 4, 2, 4, 2, 2, 2, 1, 3, 2, 1, 1, 4, 3, 4, 3, 2, 2, 2, 3, 7,
3, 1, 3, 3, 3, 2, 1, 3, 2, 3, 3, 2, 4, 1, 1, 1, 4, 3, 3, 4, 3,
8, 2, 4, 5, 3, 2, 3, 1, 2, 1, 2, 2, 3, 1, 4, 3, 2, 2, 3, 3, 3,
3, 1, 2, 1, 2, 3, 3, 2, 2, 2, 2, 3, 3, 4, 5, 3, 2, 2, 2, 3, 1,
3, 3, 4, 2, 1, 3, 3, 3, 4, 2, 1, 2, 1, 2, 2, 3, 3, 4, 1, 1, 6,
3, 2, 2, 2, 6, 3, 3, 2, 2, 1, 4, 2, 3, 3, 3, 2, 2, 3, 3, 2, 4,
6, 1, 1, 1, 1, 3, 9, 4, 2, 3, 2, 2, 2, 4, 3, 3, 4, 1, 2, 6, 3,
3, 3, 2, 2, 3, 4, 2, 3, 2, 2, 3, 2, 3, 4, 7, 2, 3, 3, 2, 3, 2,
3, 4, 3, 3, 3, 2, 2, 2, 1, 3, 4, 2, 1, 3, 4, 1, 3, 4, 4, 3, 3,
3, 3, 3, 2, 3, 3, 3, 5, 3, 3, 5, 2, 2, 1, 1, 2, 2, 2, 3, 1, 3,
2, 2, 2, 4, 2, 2, 2, 4, 1, 3, 4, 3, 3, 4, 3, 2, 1, 3, 4, 8, 1,
2, 3, 3, 3, 3, 2, 3, 3, 1, 3, 4, 2, 3, 2, 6, 3, 1, 2, 2, 2, 2,
2, 4, 3, 5, 1, 2, 2, 2, 4, 2, 3, 3, 1, 1, 2, 2, 3, 3, 2, 3, 3,
3, 3, 1, 4, 4, 2, 3, 3, 1, 4, 3, 4, 2, 3, 3, 2, 7, 1, 4, 1, 2,
2, 3, 2, 5, 2, 3, 2, 3, 1, 3, 2, 2, 3, 2, 4, 2, 3, 3, 3, 3, 1,
5, 5, 1, 1, 2, 3, 1, 4, 2, 2, 3, 2, 2, 2, 3, 3, 3, 3, 2, 3, 4,
8, 3, 2, 3, 1, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 4, 2, 3, 2, 1, 3,
2, 3, 3, 2, 3, 3, 2, 3, 2, 3, 3, 1, 1, 2, 4, 3, 4, 3, 1, 3, 4,
2, 3, 3, 2, 2, 2, 2, 2, 3, 3, 3, 1, 3, 3, 2, 1, 1, 4, 1, 3, 2,
1, 2, 3, 3, 2, 2, 2, 4, 2, 1, 3, 2, 3, 2, 1, 3, 1, 3, 1, 3, 3,
2, 1, 2, 3, 2, 3, 1, 2, 2, 2, 3, 3, 2, 3, 1, 3, 3, 3, 3, 2, 4,
2, 4, 4, 1, 2, 1, 2, 1, 3, 3, 3, 2, 3, 3, 4, 2, 2, 3, 2, 1, 2,
2, 1, 1, 3, 1, 2, 3, 3, 3, 2, 1, 1, 1, 2, 1, 2, 5, 1, 2, 1, 4,
2, 2, 2, 1, 4, 2, 3, 3, 3, 2, 4, 5, 4, 2, 4, 2, 3, 1, 4, 3, 3,
2, 3, 3, 2, 3, 2, 1, 3, 2, 4, 2, 3, 4, 1, 2, 3, 1, 3, 3, 4, 2,
2, 2, 3, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 1, 1, 3, 2, 2, 4, 3, 4,
3, 3, 4, 1, 1, 3, 3, 2, 3, 2, 3, 2, 1, 3, 3, 1, 5, 1, 1, 2, 4,
2, 3, 5, 4, 1, 3, 2, 1, 2, 2, 4, 3, 4, 2, 2, 1, 3, 2, 4, 2, 3,
3, 2, 3, 2, 1, 2, 3, 4)), row.names = c(NA, -1059L), class = c("tbl_df",
"tbl", "data.frame"))
df
## # A tibble: 1,059 x 2
## equipment children_n
## <dbl> <dbl>
## 1 1 4
## 2 0 4
## 3 1 2
## 4 1 2
## 5 0 2
## 6 1 1
## 7 1 1
## 8 1 3
## 9 1 2
## 10 1 3
## # ... with 1,049 more rows
In cases where number of children is above 6, I want to collapse those cases to one category of "6+".
df %<>%
mutate_at(vars(children_n), as.character) %>%
mutate_at(vars(children_n), recode, "9" = "6_plus", "8" = "6_plus", "7" = "6_plus", "6" = "6_plus") %>%
mutate_at(vars(children_n), fct_relevel, "1", "2", "3", "4", "5", "6_plus")
glimpse(df)
## Rows: 1,059
## Columns: 2
## $ equipment <dbl> 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, ...
## $ children_n <fct> 4, 4, 2, 2, 2, 1, 1, 3, 2, 3, 3, 6_plus, 3, 2, 1, 2, 1, 1, 3, 3, 3, 2, 3, 3, 3, 2, 4, 3, 1, 2, 3, 4, 4, 1, 2, 5, 2, 6_plus, 1, 2, 1, 2,...
Now I want to plot the proportion of number of children in two separate panels: one panel for families who have enough equipment, and another panel for families who don't have enough equipment:
df %>%
ggplot(data = ., aes(x = children_n, y = equipment)) +
geom_histogram(aes(y = (..count..)/sum(..count..)), stat = "count" , fill = "darkblue") +
geom_text(aes(label = scales::percent(((..count..)/sum(..count..)), accuracy = 1),
y = ((..count..)/sum(..count..)) ), stat= "count", vjust = -.5, color = "darkblue") +
scale_y_continuous(labels = scales::percent) +
facet_grid(~ equipment, labeller = as_labeller(c("1" = "have enough equipment",
"0" = "don't have enough equipment")))
This gives two panels that *DON'T* integrate to 100% independently:
Trying to solve the problem
I found this question that describes the same intention and problem. The chosen solution suggests defining the geom_histogram as density so it integrates to 100%. But this won't work in my case because stat(density) asks that the x variable will be continuous, unlike my case where x is a factor.
df %>%
ggplot(data = ., aes(x = children_n, y = equipment)) +
geom_histogram(aes(y = stat(density) * 6), binwidth = 6, fill = "darkblue") +
facet_grid(~ equipment, labeller = as_labeller(c("1" = "have enough equipment",
"0" = "don't have enough equipment")))
Error: StatBin requires a continuous x variable: the x variable is
discrete. Perhaps you want stat="count"?
Other approaches suggest using ..PANEL.. while others are strongly against it.
How can I get the two facets to show percents that independently integrate to 100%, in a proper way?
This could be achieved like so:
Map the facetting variable on the group aes
Use e.g. tapply to get the total number per group or facet
BTW: I have put the code for the normalization inside a helper function to reduce the code duplication and readability
library(tidyverse)
library(magrittr)
df %<>%
mutate_at(vars(children_n), as.character) %>%
mutate_at(vars(children_n), recode, "9" = "6_plus", "8" = "6_plus", "7" = "6_plus", "6" = "6_plus") %>%
mutate_at(vars(children_n), fct_relevel, "1", "2", "3", "4", "5", "6_plus")
help <- function(count, group) {
count / tapply(count, group, sum)[group]
}
df %>%
ggplot(data = ., aes(x = children_n, y = equipment, group = equipment)) +
geom_histogram(aes(y = help(..count.., ..group..)), stat = "count" , fill = "darkblue") +
geom_text(aes(label = scales::percent(help(..count.., ..group..), accuracy = 1),
y = help(..count.., ..group..) ), stat= "count", vjust = -.5, color = "darkblue") +
scale_y_continuous(labels = scales::percent) +
facet_grid(~ equipment, labeller = as_labeller(c("1" = "have enough equipment",
"0" = "don't have enough equipment")))
#> Warning: Ignoring unknown parameters: binwidth, bins, pad
I have a dataframe with credits of participants for several items. I would like to calculate the item difficulty of those items. With the eRm Package I can calculate the difficulty for the different categories of each item:
Some data:
x1 <- c(1, 2, 1, 0, 3, 3, 0, 4, 4, 1, 0, 3, 2, 0, 4, 1, NA, 1, 1, NA, 0, 1, 2, 1, 1, 3, 0, 2, 1, 0)
x2 <- c(0, 1, 0, 3, 2, 0, 1, 2, 2, NA, 0, 1, 2, 2, NA, 1, 2, 1, 2, 1, 0, 2, 3, 0, 1, 1, 0, 1, 1, 3)
x3 <- c(NA, NA, 3, 0, 1, 2, 0, 1, 1, NA, 3, 0, 1, 2, 0, 1, 2, 1, 0, 1, 3, 1, 3, 0, 1, 1, 0, 1, 1, 0)
x4 <- c(3, 0, 2, 2, 3, 2, 1, 2, 0, 0, 1, 0, 1, 1, 0, 1, 2, 1, 1, 2, 0, 1, 1, 2, 1, 1, 0, 1, 1, 0)
x5 <- c(1, NA, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, NA, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0)
dat <- data.frame(x1, x2, x3, x4, x5)
library("eRm")
Calculation of category difficulties:
PCM(dat)
PCM(dat)$etapar
I do not need the difficulty for the categories, but for the whole item. How can I calculate the overall difficulty of each item?
Thank you very much in advance!