I have a dataset of chess positions made up of 100 groups, with each group taking one of 50 positions ("Position_number") and one of two colours ("stm_white"). I want to run a linear regression for each Position_number subsample, where stm_white is the explanatory variable and stm_perform is the outcome variable. Then, I want to display the coefficient of stm_white and the associated confidence interval for each regression in a forest plot. The idea is to be able to easily see which Position_number subsample gives significant coefficients for stm_white, and to compare coefficients across positions. For example, the plot would have 50 y-axis categories labelled with each position number, the x-axis would represent the coefficient range, and the plot would display a horizontal confidence bar for each position number.
Where I'm stuck:
Getting the confidence interval bounds for each regression
Plotting each of the 50 coefficients (with confidence intervals) on one plot. (I think this is called a forest plot?)
This is how I current get a list of the coefficients for each regression:
fits <- by(df, df[,"Position_number"],
function(x) lm(stm_perform ~ stm_white, data = x))
# Combine coefficients from each model
do.call("rbind", lapply(fits, coef))
And here is a sample of 10 positions (apologies if there's a better way to show reproducible data):
>dput(droplevels(dfMWE[,c("Position_number","stm_white","stm_perform")]))
structure(list(Position_number = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10), stm_white = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1), stm_perform = c(0.224847134350316, -0.252000458803946,
0.263005239459311, -0.337712202569111, 0.525880930891169, -0.5,
0.514387184165999, 0.520136722035817, -0.471249436107731, -0.557311633762293,
-0.382774969095054, -0.256365477992672, -0.592466230584332, 0.420100239642119,
0.35728693116738, -0.239203909010858, 0.492804918290949, -0.377349804212738,
0.498560888290847, 0.650604627933873, 0.244481117928803, 0.225852022298169,
0.448376452689039, 0.305090287270497, 0.275461757157464, 0.0232950364735793,
-0.117225030904946, 0.103523492101814, 0.098301745397805, 0.435599509759579,
-0.323024628921732, -0.790798102797238, 0.326223812111678, -0.331305043692668,
0.300230596737942, -0.340292005855252, 0.196181480575316, -0.0606495585093978,
0.789844179758131, -0.0862623926308338, -0.560150145231903, 0.697345078589853,
-0.425719796345476, 0.65321716721887, -0.878090073942596, 0.393712176214572,
0.636076899687882, 0.530184680003902, -0.567228844342952, 0.767024918145021,
-0.207303615824231, -0.332581578126777, -0.511510891217792, 0.227871326531416,
-0.0140876421179904, -0.891010911045765, -0.617225030904946,
-0.335142021445235, -0.517262524432376, 0.676301669492737, 0.375998241382333,
-0.0882899718631629, -0.154706189382, -0.108431333126633, 0.204584592662721,
0.475554538879339, 0.0840205872617279, -0.403370826694226, -0.74253555894307,
0.182570385474772, -0.484175014735265, -0.332581578126777, -0.427127748605496,
0.474119069108831, -0.0668284645696687, -0.0262098994728823,
-0.255269593134965, -0.313699742316688, -0.485612815834001, 0.302654921410147,
-0.425719796345476, 0.65321716721887, 0.393712176214572, 0.60766106412682,
0.530184680003902, 0.384135895746244, 0.564400490240421, 0.767024918145021,
0.702182602090521, 0.518699777929559, -0.281243170101218, -0.283576305897061,
0.349395372066127, -0.596629173305774, 0.0849108889395813, -0.264122555898524,
0.593855385236178, -0.418698521631085, 0.269754586702576, -0.719919005947152,
0.510072446927438, -0.0728722513945044, -0.0849108889395813,
0.0650557537775339, 0.063669188530584, -0.527315973006493, -0.716423694102939,
-0.518699777929559, 0.349395372066127, -0.518699777929559, 0.420100239642119,
-0.361262250888275, 0.431358608116332, 0.104596852632671, 0.198558626418023,
0.753386077785615, 0.418698521631085, -0.492804918290949, -0.636076899687882,
-0.294218640287997, 0.617225030904946, -0.333860575416878, -0.544494573083008,
-0.738109032540419, -0.192575818328721, -0.442688366237707, 0.455505426916992,
0.13344335621046, 0.116471711943561, 0.836830966002895, -0.125024693001636,
0.400603203290743, -0.363923100312118, -0.157741327529574, -0.281243170101218,
-0.326223812111678, -0.548774335859742, 0.104058949158278, -0.618584122089031,
-0.148779202375097, -0.543066492022212, -0.790798102797238, -0.541637702714763,
0.166337530816562, -0.431358608116332, -0.471249436107731, -0.531618297828107,
-0.135452994588696, 0.444109038883147, -0.309993792719686, 0.472684026993507,
-0.672509643334985, -0.455505426916992, -0.0304828450187082,
-0.668694956307332, 0.213036720610531, -0.370611452782498, -0.100361684849949,
-0.167940159469667, -0.256580594295053, 0.41031649686005, 0.544494573083008,
-0.675040201040299, 0.683816314193659, 0.397841906825283, 0.384135895746244,
0.634743335052317, 0.518699777929559, -0.598013765769344, -0.524445461120661,
-0.613136820153143, 0.12949974225673, -0.337712202569111, -0.189904841395243,
0.588289971863163, 0.434184796930767, -0.703385003471829, 0.505756208411145,
0.445530625978324, -0.167137309739621, 0.437015271896404, -0.550199353253537,
-0.489927553072562, -0.791748837508184, 0.434184796930767, 0.264122555898524,
-0.282408276808469, -0.574280203654524, 0.167940159469667, -0.439849854768097,
-0.604912902007957, 0.420100239642119, 0.35728693116738, 0.239220254140668,
-0.276612130560829, -0.25746444105693, 0.593855385236178, -0.632070012100074,
0.314483587504712, 0.650604627933873, -0.226860086923233, -0.702182602090521,
0.25746444105693, -0.174474012638818, 0.0166045907672774, 0.535915926945102,
0.141635395826102, 0.420100239642119, 0.557311633762293, 0.593855385236178,
0.6961287704296, 0.0444945730830079, -0.234005329233511, 0.448376452689039,
-0.86655664378954, 0.22107824319756, 0.148051654147426, 0.543066492022212,
-0.448376452689039, 0.373300918333268)), row.names = c(NA, -220L
), groups = structure(list(Position_number = c(0, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10), .rows = structure(list(1:20, 21:40, 41:60,
61:80, 81:100, 101:120, 121:140, 141:160, 161:180, 181:200,
201:220), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 11L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
confint() can get you the confidence interval of a model.
forestplot() from the forestplot R package can make you a forest plot.
library(dplyr)
library(forestplot)
results <- lapply(unique(df$Position_number), function(pos) {
fit = filter(df, Position_number == pos) %>%
lm(data = ., stm_perform ~ stm_white)
stm_white_lm_index = 2 # the second term in lm() output is "stm_white"
coefficient = coef(fit)[stm_white_lm_index]
lb = confint(fit)[stm_white_lm_index,1] # lower bound confidence
ub = confint(fit)[stm_white_lm_index,2] # upper bound confidence
output = data.frame(Position_number = pos, coefficient, lb, ub)
return(output)
}) %>% bind_rows() # bind_rows() combines output from each model in the list
with(results, forestplot(Position_number, coefficient, lb, ub))
The forest plot shows the "Position_number" labels on the left and the regression coefficients of "stm_white" with the 95% confidence intervals plotted. You can further customize the plot. See forestplot::forestplot() or this introduction by Max Gordon for details.
I want to split a percentage histogram (that integrates to 100%) into two facets using facet_grid. However, when splitting to facets, each facet by itself doesn't integrate to 100%. This kind of question has been resolved here in the past, but I cannot translate that solution to my current situation where x is a factor, and thus a histogram using stat(density) doesn't work.
My Data
Dataframe with two columns. equipment denotes whether a household has enough equipment for homeschooling, and children_n denotes number of children.
library(tidyverse)
library(magrittr)
df <-
structure(list(equipment = c(1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1,
0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0,
1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0,
1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,
1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0,
0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1,
0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1,
1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0,
1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1,
1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1,
0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1,
0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0,
1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0,
0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0,
1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1,
1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1,
1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1,
0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1,
1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1,
1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1,
1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0,
0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1,
1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1,
1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0,
1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0,
1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1,
0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0,
0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1,
0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0,
0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0,
1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1,
0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0,
0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1,
1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1,
1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1,
1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,
1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1,
1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1), children_n = c(4,
4, 2, 2, 2, 1, 1, 3, 2, 3, 3, 7, 3, 2, 1, 2, 1, 1, 3, 3, 3, 2,
3, 3, 3, 2, 4, 3, 1, 2, 3, 4, 4, 1, 2, 5, 2, 8, 1, 2, 1, 2, 2,
3, 4, 3, 3, 3, 3, 2, 3, 2, 2, 4, 3, 3, 3, 4, 3, 1, 1, 2, 1, 1,
2, 1, 3, 3, 2, 3, 3, 3, 4, 2, 2, 2, 3, 5, 2, 2, 2, 2, 1, 2, 4,
3, 4, 3, 3, 1, 2, 3, 3, 3, 2, 4, 4, 3, 1, 3, 2, 2, 2, 3, 1, 1,
1, 3, 1, 2, 2, 2, 3, 6, 3, 2, 2, 6, 3, 4, 3, 2, 3, 3, 2, 2, 2,
3, 2, 3, 3, 6, 3, 1, 4, 3, 4, 9, 1, 1, 3, 4, 2, 2, 1, 2, 3, 1,
3, 3, 6, 4, 1, 3, 2, 2, 3, 2, 3, 2, 4, 3, 1, 3, 3, 2, 3, 2, 2,
4, 2, 2, 3, 3, 3, 1, 3, 3, 2, 4, 2, 7, 3, 3, 3, 2, 2, 2, 4, 3,
1, 1, 3, 4, 1, 4, 3, 4, 3, 3, 2, 3, 3, 3, 2, 3, 3, 2, 3, 3, 3,
3, 1, 1, 2, 2, 4, 2, 3, 3, 2, 2, 1, 2, 5, 2, 2, 2, 5, 3, 2, 2,
4, 2, 1, 3, 4, 4, 3, 3, 4, 3, 3, 1, 3, 2, 1, 8, 2, 3, 2, 3, 3,
2, 3, 3, 1, 3, 3, 4, 2, 3, 3, 3, 2, 6, 1, 2, 2, 2, 2, 2, 2, 4,
3, 5, 4, 1, 2, 2, 2, 4, 2, 3, 3, 1, 3, 2, 1, 2, 2, 3, 3, 3, 3,
1, 3, 4, 2, 1, 3, 4, 2, 1, 3, 4, 3, 4, 2, 3, 3, 2, 7, 1, 2, 1,
3, 2, 2, 2, 2, 3, 3, 3, 2, 3, 1, 2, 2, 3, 2, 4, 3, 2, 3, 3, 5,
3, 5, 3, 5, 1, 2, 1, 4, 1, 4, 2, 2, 3, 2, 2, 2, 3, 2, 3, 3, 3,
3, 4, 3, 8, 3, 1, 2, 3, 3, 2, 1, 3, 2, 2, 3, 3, 4, 4, 2, 2, 3,
1, 2, 3, 2, 3, 3, 2, 1, 3, 3, 2, 3, 3, 3, 4, 1, 2, 3, 3, 3, 4,
2, 1, 3, 4, 2, 3, 3, 2, 2, 2, 2, 2, 3, 3, 3, 1, 3, 3, 1, 1, 3,
2, 1, 3, 2, 4, 1, 3, 2, 3, 2, 2, 2, 4, 1, 2, 3, 2, 3, 2, 2, 1,
3, 1, 3, 1, 3, 3, 2, 1, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 3, 4, 1,
2, 4, 2, 4, 2, 2, 2, 1, 3, 2, 1, 1, 4, 3, 4, 3, 2, 2, 2, 3, 7,
3, 1, 3, 3, 3, 2, 1, 3, 2, 3, 3, 2, 4, 1, 1, 1, 4, 3, 3, 4, 3,
8, 2, 4, 5, 3, 2, 3, 1, 2, 1, 2, 2, 3, 1, 4, 3, 2, 2, 3, 3, 3,
3, 1, 2, 1, 2, 3, 3, 2, 2, 2, 2, 3, 3, 4, 5, 3, 2, 2, 2, 3, 1,
3, 3, 4, 2, 1, 3, 3, 3, 4, 2, 1, 2, 1, 2, 2, 3, 3, 4, 1, 1, 6,
3, 2, 2, 2, 6, 3, 3, 2, 2, 1, 4, 2, 3, 3, 3, 2, 2, 3, 3, 2, 4,
6, 1, 1, 1, 1, 3, 9, 4, 2, 3, 2, 2, 2, 4, 3, 3, 4, 1, 2, 6, 3,
3, 3, 2, 2, 3, 4, 2, 3, 2, 2, 3, 2, 3, 4, 7, 2, 3, 3, 2, 3, 2,
3, 4, 3, 3, 3, 2, 2, 2, 1, 3, 4, 2, 1, 3, 4, 1, 3, 4, 4, 3, 3,
3, 3, 3, 2, 3, 3, 3, 5, 3, 3, 5, 2, 2, 1, 1, 2, 2, 2, 3, 1, 3,
2, 2, 2, 4, 2, 2, 2, 4, 1, 3, 4, 3, 3, 4, 3, 2, 1, 3, 4, 8, 1,
2, 3, 3, 3, 3, 2, 3, 3, 1, 3, 4, 2, 3, 2, 6, 3, 1, 2, 2, 2, 2,
2, 4, 3, 5, 1, 2, 2, 2, 4, 2, 3, 3, 1, 1, 2, 2, 3, 3, 2, 3, 3,
3, 3, 1, 4, 4, 2, 3, 3, 1, 4, 3, 4, 2, 3, 3, 2, 7, 1, 4, 1, 2,
2, 3, 2, 5, 2, 3, 2, 3, 1, 3, 2, 2, 3, 2, 4, 2, 3, 3, 3, 3, 1,
5, 5, 1, 1, 2, 3, 1, 4, 2, 2, 3, 2, 2, 2, 3, 3, 3, 3, 2, 3, 4,
8, 3, 2, 3, 1, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 4, 2, 3, 2, 1, 3,
2, 3, 3, 2, 3, 3, 2, 3, 2, 3, 3, 1, 1, 2, 4, 3, 4, 3, 1, 3, 4,
2, 3, 3, 2, 2, 2, 2, 2, 3, 3, 3, 1, 3, 3, 2, 1, 1, 4, 1, 3, 2,
1, 2, 3, 3, 2, 2, 2, 4, 2, 1, 3, 2, 3, 2, 1, 3, 1, 3, 1, 3, 3,
2, 1, 2, 3, 2, 3, 1, 2, 2, 2, 3, 3, 2, 3, 1, 3, 3, 3, 3, 2, 4,
2, 4, 4, 1, 2, 1, 2, 1, 3, 3, 3, 2, 3, 3, 4, 2, 2, 3, 2, 1, 2,
2, 1, 1, 3, 1, 2, 3, 3, 3, 2, 1, 1, 1, 2, 1, 2, 5, 1, 2, 1, 4,
2, 2, 2, 1, 4, 2, 3, 3, 3, 2, 4, 5, 4, 2, 4, 2, 3, 1, 4, 3, 3,
2, 3, 3, 2, 3, 2, 1, 3, 2, 4, 2, 3, 4, 1, 2, 3, 1, 3, 3, 4, 2,
2, 2, 3, 3, 2, 1, 2, 2, 1, 3, 1, 3, 1, 1, 1, 3, 2, 2, 4, 3, 4,
3, 3, 4, 1, 1, 3, 3, 2, 3, 2, 3, 2, 1, 3, 3, 1, 5, 1, 1, 2, 4,
2, 3, 5, 4, 1, 3, 2, 1, 2, 2, 4, 3, 4, 2, 2, 1, 3, 2, 4, 2, 3,
3, 2, 3, 2, 1, 2, 3, 4)), row.names = c(NA, -1059L), class = c("tbl_df",
"tbl", "data.frame"))
df
## # A tibble: 1,059 x 2
## equipment children_n
## <dbl> <dbl>
## 1 1 4
## 2 0 4
## 3 1 2
## 4 1 2
## 5 0 2
## 6 1 1
## 7 1 1
## 8 1 3
## 9 1 2
## 10 1 3
## # ... with 1,049 more rows
In cases where number of children is above 6, I want to collapse those cases to one category of "6+".
df %<>%
mutate_at(vars(children_n), as.character) %>%
mutate_at(vars(children_n), recode, "9" = "6_plus", "8" = "6_plus", "7" = "6_plus", "6" = "6_plus") %>%
mutate_at(vars(children_n), fct_relevel, "1", "2", "3", "4", "5", "6_plus")
glimpse(df)
## Rows: 1,059
## Columns: 2
## $ equipment <dbl> 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, ...
## $ children_n <fct> 4, 4, 2, 2, 2, 1, 1, 3, 2, 3, 3, 6_plus, 3, 2, 1, 2, 1, 1, 3, 3, 3, 2, 3, 3, 3, 2, 4, 3, 1, 2, 3, 4, 4, 1, 2, 5, 2, 6_plus, 1, 2, 1, 2,...
Now I want to plot the proportion of number of children in two separate panels: one panel for families who have enough equipment, and another panel for families who don't have enough equipment:
df %>%
ggplot(data = ., aes(x = children_n, y = equipment)) +
geom_histogram(aes(y = (..count..)/sum(..count..)), stat = "count" , fill = "darkblue") +
geom_text(aes(label = scales::percent(((..count..)/sum(..count..)), accuracy = 1),
y = ((..count..)/sum(..count..)) ), stat= "count", vjust = -.5, color = "darkblue") +
scale_y_continuous(labels = scales::percent) +
facet_grid(~ equipment, labeller = as_labeller(c("1" = "have enough equipment",
"0" = "don't have enough equipment")))
This gives two panels that *DON'T* integrate to 100% independently:
Trying to solve the problem
I found this question that describes the same intention and problem. The chosen solution suggests defining the geom_histogram as density so it integrates to 100%. But this won't work in my case because stat(density) asks that the x variable will be continuous, unlike my case where x is a factor.
df %>%
ggplot(data = ., aes(x = children_n, y = equipment)) +
geom_histogram(aes(y = stat(density) * 6), binwidth = 6, fill = "darkblue") +
facet_grid(~ equipment, labeller = as_labeller(c("1" = "have enough equipment",
"0" = "don't have enough equipment")))
Error: StatBin requires a continuous x variable: the x variable is
discrete. Perhaps you want stat="count"?
Other approaches suggest using ..PANEL.. while others are strongly against it.
How can I get the two facets to show percents that independently integrate to 100%, in a proper way?
This could be achieved like so:
Map the facetting variable on the group aes
Use e.g. tapply to get the total number per group or facet
BTW: I have put the code for the normalization inside a helper function to reduce the code duplication and readability
library(tidyverse)
library(magrittr)
df %<>%
mutate_at(vars(children_n), as.character) %>%
mutate_at(vars(children_n), recode, "9" = "6_plus", "8" = "6_plus", "7" = "6_plus", "6" = "6_plus") %>%
mutate_at(vars(children_n), fct_relevel, "1", "2", "3", "4", "5", "6_plus")
help <- function(count, group) {
count / tapply(count, group, sum)[group]
}
df %>%
ggplot(data = ., aes(x = children_n, y = equipment, group = equipment)) +
geom_histogram(aes(y = help(..count.., ..group..)), stat = "count" , fill = "darkblue") +
geom_text(aes(label = scales::percent(help(..count.., ..group..), accuracy = 1),
y = help(..count.., ..group..) ), stat= "count", vjust = -.5, color = "darkblue") +
scale_y_continuous(labels = scales::percent) +
facet_grid(~ equipment, labeller = as_labeller(c("1" = "have enough equipment",
"0" = "don't have enough equipment")))
#> Warning: Ignoring unknown parameters: binwidth, bins, pad
I'm looking at specifying inverse probability weights associated with each data point in a data frame (not analytical weights) for a mixed effect logistic regression for use vs. available GPS locations from animals. My question is closely related to this post: (Inverse probability weights in r), but for a mixed-effect model. The svyglm function specifies the correct weights, but the survey package doesn't allow for random effects, and lme4 uses analytical weights. I looked into the coxme package as an alternative but the help file indicates that weights are specified as per lm, which uses analytical weights.
Is there a package/function implemented in r to specify sampling weights for mixed effects, or a way to specify sampling weights using the coxme package?
example data:
data2 <- structure(list(Use = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), Status = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), AnimalID = c(1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), St.A1k = c(0.029627, 0.043414,
0.113816, 0.000000, 0.020241, 0.000000,0.000000, 0.007334, 0.000000,
0.046055, 0.028454, 0.042828, 0.018480, 0.106776, 0.018480, 0.046641,
0.033148, 0.039308, 0.035494, 0.000000, 0.004987, 0.051335, 0.046935,
0.018774, 0.000000, 0.043708, 0.014667, 0.080375, 0.000000, 0.015254,
0.000000, 0.053388, 0.055148, 0.036668, 0.006160, 0.016720, 0.029041,
0.057788, 0.023174, 0.022294, 0.031388, 0.043414, 0.005573, 0.000000,
0.024054,0.000000, 0.000000, 0.074215, 0.021121, 0.016720, 0.028454,
0.042828, 0.018480, 0.106776, 0.018480, 0.046641, 0.033148, 0.039308,
0.035494, 0.000000, 0.000000, 0.053388, 0.055148, 0.036668, 0.006160,
0.016720, 0.029041, 0.057788, 0.023174, 0.022294,0.031388, 0.043414,
0.005573, 0.000000, 0.024054, 0.000000, 0.000000, 0.074215, 0.021121,
0.016720, 0.029627, 0.043414, 0.113816, 0.000000, 0.020241, 0.000000,
0.000000, 0.007334, 0.000000, 0.046055, 0.029627, 0.043414, 0.113816,
0.000000, 0.020241, 0.000000, 0.000000, 0.007334, 0.000000, 0.046055),
InvWeight = c(1.332636, 1.248722, 1.248722, 1.248722, 1.179661, 1, 1, 1, 1,
1, 1.060296, 1.060296, 1.249593, 1.248595, 1.248626, 1, 1, 1, 1, 1,
1.294132, 1.740839, 1.740839, 2.377546, 2.377546, 1, 1, 1, 1, 1,
2.378091,2.378091, 2.378091, 2.378091, 1.060295, 1, 1, 1, 1, 1, 1.060296,
1.060296, 1.249593, 1.248595, 1.248626, 1, 1, 1, 1, 1, 2.378091, 2.378091,
2.378091, 2.378091, 1.060295, 1, 1, 1, 1, 1, 2.378091, 2.378091, 2.378091,
2.378091, 1.060295, 1, 1, 1, 1, 1, 1.294132,1.740839, 1.740839, 2.377546,
2.377546, 1, 1, 1, 1, 1, 1.332636, 1.248722, 1.248722, 1.248722, 1.179661,
1, 1, 1, 1, 1, 1.060296,1.060296, 1.249593, 1.248595, 1.248626, 1, 1, 1, 1,
1)),.Names = c("Use", "Status", "AnimalID", "St.A1k", "InvWeight"), class =
c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -100L))
GLM with sampling weights
des2 <- svydesign(id = ~1, weights = ~InvWeight, data = data2)
glm.sampling.weights <- svyglm(Use ~ St.A1k, family = binomial, design=des2)
summary(glm.sampling.weights)
GLM (not sampling weights)
glm.w <- glm(Use ~ St.A1k, family = binomial, weight=InvWeight, data=data2)
summary(glm.w)
(from the glm help file: "For a binomial GLM prior weights are used to give the number of trials when the response is the proportion of successes").
GEE
cox.w <- coxme(Surv(Status,Use) ~ St.A1k + (1|AnimalID), weight=InvWeight,
data=data2)
summary(cox.w)