How to plot a partition layout/table - r

I want to plot a partition layout a.k.a. partition table in R. This type of diagram plots hierarchical data horizontally from left to right with nodes as space-filling tiles of proportional height and with tile-adjacency as connections. The x-axis represents hierarchy levels and the y-axis whatever metric used.
Flawed makeshift example cobbled in Excel:
It could also be compared to:
a horizontal uniform-depth icicle plot
a non-nested treemap
a table with variable-height cells
This type of diagram is available in D3.js.
How can I do it in R? Is it possible with ggplot2?
EDIT: Here's my actual data. I need emp_est (not a count) to be the y-axis. emp_est for N-level NAICS code equals the sum of emp_est of daughter N-1-level NAICS codes.
structure(list(naics_level = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3), naics = c("62----", "44----", "54----", "72----", "61----",
"52----", "31----", "56----", "42----", "51----", "81----", "23----",
"55----", "48----", "71----", "53----", "22----", "99----", "11----",
"21----", "541///", "722///", "611///", "622///", "561///", "621///",
"624///", "445///", "551///", "623///", "238///", "423///", "524///",
"522///", "523///", "511///", "813///", "424///", "334///", "448///",
"812///", "531///", "713///", "452///", "441///", "518///", "721///",
"236///", "446///", "444///", "311///", "485///", "811///", "517///",
"332///", "454///", "325///", "453///", "333///", "519///", "484///",
"339///", "237///", "451///", "711///", "221///", "492///", "425///",
"447///", "481///", "442///", "493///", "532///", "336///", "562///",
"488///", "443///", "323///", "326///", "335///", "712///", "515///",
"512///", "327///", "337///", "313///", "322///", "312///", "533///",
"521///", "315///", "331///", "321///", "314///", "316///", "487///",
"525///", "324///", "212///", "115///", "483///", "486///", "114///",
"113///", "213///", "211///", "7225//", "6113//", "6221//", "5511//",
"4451//", "6241//", "5613//", "5415//", "5221//", "5417//", "6211//",
"5416//", "5413//", "5241//", "6231//", "2382//", "5617//", "5239//",
"5112//", "4481//", "7139//", "6111//", "6216//", "6214//", "5411//",
"5182//", "7211//", "5412//", "6244//", "3345//", "6232//", "4234//",
"5242//", "4461//", "8131//", "7223//", "8121//", "4411//", "4441//",
"4521//", "6212//", "5616//", "5313//", "6233//", "6223//", "5419//",
"4244//", "8111//", "4236//", "6116//", "5171//", "5111//", "2383//",
"5191//", "6213//", "5231//", "5614//", "2362//", "2361//", "5311//",
"5418//", "4541//", "4529//", "3344//", "3254//", "4251//", "2381//",
"8129//", "4471//", "4511//", "4921//", "5611//", "6222//", "2389//",
"4931//", "4811//", "2211//", "4431//", "4242//", "4238//", "3391//",
"3231//", "3118//", "8139//", "4841//", "3327//", "8123//", "4854//",
"7224//", "8134//", "5222//", "6215//", "6219//", "4413//", "8133//",
"2371//", "4243//", "4453//", "6243//", "3261//", "4422//", "4532//",
"4543//", "5615//", "4842//", "5619//", "3364//", "6242//", "8132//",
"3399//", "5312//", "4539//", "3332//", "7113//", "3359//", "4231//",
"4851//", "4239//", "7121//", "2373//", "4482//", "4237//", "4452//",
"5179//", "4859//", "3119//", "4853//", "5121//", "5621//", "3339//",
"6112//", "4249//", "4241//", "6117//", "5629//", "4233//", "3329//",
"3323//", "5324//", "5151//", "4248//", "4483//", "5223//", "4885//",
"5321//", "4421//", "4512//", "3328//", "5414//", "7111//", "4881//",
"4232//", "3113//", "4533//", "7112//", "5612//", "6115//", "3333//",
"6239//", "5172//", "3115//", "3116//", "3222//", "3121//", "5322//",
"8122//", "8112//", "3363//", "6114//", "4247//", "2212//", "4884//",
"3342//", "8113//", "4235//", "4246//", "5331//", "3341//", "4442//",
"3372//", "3133//", "3262//", "5211//", "3114//", "3251//", "3353//",
"8114//", "3117//", "4531//", "3252//", "3132//", "4412//", "3152//",
"3334//", "3371//", "3219//", "7131//", "5152//", "3273//", "3255//",
"3335//", "3321//", "3324//", "3259//", "3322//", "3272//", "2379//",
"4922//", "5622//", "7115//", "3315//", "3149//", "4855//", "3279//",
"3162//", "4872//", "2372//", "4812//", "3351//", "4852//", "2213//",
"3336//", "3366//", "3141//", "7212//", "3362//", "5259//", "4542//",
"3241//", "4883//", "3256//", "2123//", "3221//", "4831//", "7114//",
"3369//", "3379//", "3313//", "1152//", "3346//", "5323//", "3343//",
"7132//", "3274//", "3314//", "4862//", "4889//", "7213//", "3271//",
"5122//", "3169//", "3352//", "4871//", "3331//", "3161//", "3326//",
"3312//", "3159//", "4245//", "5174//", "1133//", "3361//", "3253//",
"3325//", "3112//", "1141//", "1153//", "2131//", "1142//", "3211//",
"3111//", "2111//", "5232//", "3365//", "1151//", "1132//", "3131//",
"3212//", "4832//", "1131//", "2122//", "3122//", "3311//", "4879//"
), emp_est = c(444491, 266068, 242031, 217912, 172492, 156366,
150651, 141859, 106269, 103451, 94438, 93862, 78951, 61390, 42552,
38517, 8786, 644, 475, 373, 242031, 193891, 172492, 145566, 134867,
132980, 92554, 79489, 78951, 73391, 63089, 59167, 56680, 53411,
44938, 42428, 41420, 38574, 35544, 35191, 34789, 29626, 29104,
27284, 25031, 24318, 24021, 20156, 19473, 19472, 18503, 18446,
18229, 17982, 16842, 14807, 13685, 12597, 11880, 11531, 11199,
10983, 10617, 10365, 9554, 8786, 8565, 8528, 8025, 7760, 7578,
7390, 7369, 7308, 6992, 6955, 6756, 6511, 6428, 5969, 3894, 3649,
3543, 2642, 2586, 2413, 2154, 1871, 1522, 1256.5, 1145, 1043,
1038, 988, 799, 645, 324, 319, 291, 282, 282, 148, 108, 85, 58,
24, 168952, 123448, 122772, 78951, 70361, 59968, 52588, 47253,
44865, 43270, 40371, 39138, 37926, 36667, 35557, 35527, 34236,
33878, 30390, 28710, 28005, 26446, 26006, 25954, 24481, 24318,
23538, 23091, 22838, 22786, 20565, 20463, 20013, 19473, 18890,
18796, 18617, 18271, 17994, 17856, 17654, 17590, 15429, 15258,
15038, 14706, 14407, 13729, 13324, 12736, 12354, 12038, 11949,
11531, 11385, 11049, 10961, 10105, 10051, 9894, 9756, 9507, 9428,
9329, 9296, 8528, 8059, 8033, 8025, 7895, 7846, 7786, 7756, 7554,
7390, 7258, 6823, 6756, 6702, 6689, 6661, 6511, 6443, 6412, 6351,
6332, 6317, 6272, 6143, 6056, 5918, 5827, 5783, 5689, 5663, 5526,
5381, 5379, 5242, 5138, 5047, 4979, 4977, 4905, 4848, 4742, 4569,
4506, 4399, 4322, 4303, 4287, 4274, 4237, 4212, 4152, 4057, 4045,
3894, 3855, 3810, 3802, 3749, 3611, 3599, 3551, 3493, 3418, 3289,
3182, 3138, 3092, 3063, 3020, 2994, 2859, 2819, 2795, 2739, 2722,
2701, 2671, 2628, 2617, 2607, 2531, 2470, 2467, 2410, 2322, 2316,
2302, 2273, 2220, 2066, 2059, 2054, 2029, 2011, 1939, 1901, 1876,
1870, 1868, 1851, 1822, 1795, 1698, 1650, 1627, 1578, 1567, 1547,
1540, 1531, 1523, 1522, 1515, 1478, 1330, 1322, 1290, 1256.5,
1251, 1174, 1171, 1165, 1112, 1111, 1109, 1084, 1071, 1067, 1035,
1021, 992, 944, 927, 925, 909, 908, 827, 823, 820, 809.5, 801,
719, 719, 709, 672, 661, 645, 630, 618, 596, 547, 517, 502, 482,
395, 385, 358, 355, 343, 342, 340, 324, 323, 319, 311, 304, 288,
286, 274, 257, 238, 235, 209, 206, 197, 172, 170, 155, 154, 148,
148, 144, 141, 133, 125, 111, 104, 96, 94, 92, 90, 80, 78, 78,
78, 77, 77, 73, 73, 68, 66, 66, 58, 41.5, 38, 28, 24, 14.5, 13,
12, 9.5, 9.5, 8, 8, 2.5, 2.5, 2.5, 2.5, 2.5)), .Names = c("naics_level",
"naics", "emp_est"), row.names = c(NA, -390L), class = c("tbl_df",
"tbl", "data.frame"))

Here's one simple example in ggplot2:
d <- data.frame(level3 = c(rep('aaa', 4), 'aab', rep('aba', 2), 'abb', 'aca', 'acb'), stringsAsFactors = FALSE)
d$level2 <- substr(d$level3, 1, 2)
d$level1 <- substr(d$level3, 1, 1)
d$id <- 1:nrow(d)
d2 <- tidyr::gather(d, level, label, -id)
library(ggplot2)
ggplot(d2, aes(level, group = label, fill = level)) +
geom_bar(position = 'stack', col = 1, width = 1) +
geom_text(aes(label = label), position = position_stack(vjust = 0.5), stat = 'count')

Related

mixed model for paired data - 4 level outcome / 3 times

What is the correct test to evaluate the difference between 3 RX readers of the same 50 patients? The rx outcome is a categorical value with 4 ordered levels. The 3 reader have read the RX at the same time.
May I use a mixed model for paired data with patients as random effect?
I'm really in difficulty...
I've tried the repolr package for repeated ordinal scores:
repolr(formula = rsna ~ factor(reader) ,
subjects = "Patient" , data = mydata ,
categories = 4, times=c(1,2,3), po.test=TRUE,fixed=FALSE)
but I obtain this error:
Error in ord.expand(space = space, formula = formula, times = times, poly = poly, :
data: model frame and formula mismatch in model.matrix()
Here are the data:
mydata<- data.frame(Patient = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50),
reader = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3),
rsna = c(1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 4, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 4, 4, 4, 2, 1, 3, 2, 1, 1, 1, 2, 1, 3, 1, 1, 1, 3, 1, 1, 1, 1, 1, 4, 2, 2, 1, 1))
rsna is my outcome with 4 levels, Patients are the 50 patients, reader are the 3 readers.
How can I fix it?
If you turn the reader variable into a factor in the dataset first, it works:
mydata<- data.frame(Patient = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50),
reader = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3),
rsna = c(1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 4, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 4, 4, 4, 2, 1, 3, 2, 1, 1, 1, 2, 1, 3, 1, 1, 1, 3, 1, 1, 1, 1, 1, 4, 2, 2, 1, 1))
mydata$reader <- as.factor(mydata$reader)
library(repolr)
repolr(formula = rsna ~ reader,
subjects = "Patient" , data = mydata ,
categories = 4, times=c(1,2,3), po.test=TRUE,fixed=FALSE)
#>
#> repolr: 2016-02-26 version 3.4
#>
#> Call:
#> repolr(formula = rsna ~ reader, subjects = "Patient", data = mydata,
#> times = c(1, 2, 3), categories = 4, po.test = TRUE, fixed = FALSE)
#>
#> Coefficients:
#> cuts1|2 cuts2|3 cuts3|4 reader2 reader3
#> 1.1761 2.1395 2.9071 0.6600 -0.4768
Created on 2022-06-07 by the reprex package (v2.0.1)

What is the other way to qount tertiles using tidyverse (or any other packages) in R?

I have WVS 6th wave dataframe. Computed the outgroup trust index (outgroup_index) and I want to divide this vector into 3 groups according to tertiles.
I use base R functions to do that:
# Recoding will be based on tertiles
# Find the tretiles of the index
tertiles <- quantile(filtered_df$outgroup_index, c(0:3) / 3)
# cut the target variable into tertiles
filtered_df$index_recoded <- with(
filtered_df,
cut(outgroup_index,
tertiles,
include.lowest = T)
)
But I am wondering about other possible and more neat ways to do it (preferably using dplyr/tidyverse or any other packages)?
Data:
structure(list(V2 = structure(c(643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643), label = "Country/region", format.spss = "F4.0", labels = c(`Not asked in survey` = -4,
Algeria = 12, Azerbaijan = 31, Argentina = 32, Australia = 36,
Armenia = 51, Brazil = 76, Belarus = 112, Chile = 152, China = 156,
`Taiwan ROC` = 158, Colombia = 170, Cyprus = 196, Ecuador = 218,
Estonia = 233, Georgia = 268, Palestine = 275, Germany = 276,
Ghana = 288, Haiti = 332, `Hong Kong SAR` = 344, India = 356,
Iraq = 368, Japan = 392, Kazakhstan = 398, Jordan = 400, `South Korea` = 410,
Kuwait = 414, Kyrgyzstan = 417, Lebanon = 422, Libya = 434, Malaysia = 458,
Mexico = 484, Morocco = 504, Netherlands = 528, `New Zealand` = 554,
Nigeria = 566, Pakistan = 586, Peru = 604, Philippines = 608,
Poland = 616, Qatar = 634, Romania = 642, Russia = 643, Rwanda = 646,
Singapore = 702, Slovenia = 705, `South Africa` = 710, Zimbabwe = 716,
Spain = 724, Sweden = 752, Thailand = 764, `Trinidad and Tobago` = 780,
Tunisia = 788, Turkey = 792, Ukraine = 804, Egypt = 818, `United States` = 840,
Uruguay = 858, Uzbekistan = 860, Yemen = 887), class = c("haven_labelled",
"vctrs_vctr", "double")), V105 = structure(c(4, 3, 3, 4, 3, 4,
4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 2, 2, 2, 1, 1,
2, 4, 2, 2, 2, 1, 2, 1, 4, 2, 1, 4, 2, 3, 3, 2, 3, 2, 3, 2, 3,
2, 2, 3, 3, 3, 3, 3, 3, NA, 3, 3, 4, 2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 2, 2, 3, 3, 3, 2, 3, NA), label = "Trust: People you meet for the first time (B)", format.spss = "F3.0", labels = c(`SE:Inapplicable ; RU:Inappropriate response; HT: Dropped out` = -5,
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U+00B4>t know` = -1,
`Trust completely` = 1, `Trust somewhat` = 2, `Do not trust very much` = 3,
`Do not trust at all` = 4), class = c("haven_labelled", "vctrs_vctr",
"double")), V106 = structure(c(3, 2, NA, 4, 2, 4, 4, 3, 3, 4,
3, 3, 4, 4, 4, 4, NA, NA, NA, NA, 3, 2, 2, 2, 2, 2, 2, 3, 3,
3, 3, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2,
2, 2, 1, 1, 2, 1, 4, 2, 1, 4, 2, 3, 3, 2, 2, 2, 3, 2, 3, 2, 2,
NA, 3, NA, 3, 3, 3, 2, 3, 3, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 2, 2, 2, 3, 2, 2, 2, 3), label = "Trust: People of another religion (B)", format.spss = "F3.0", labels = c(`DE,SE:Inapplicable ; RU:Inappropriate response; HT: Dropped` = -5,
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U+00B4>t know` = -1,
`Trust completely` = 1, `Trust somewhat` = 2, `Do not trust very much` = 3,
`Do not trust at all` = 4), class = c("haven_labelled", "vctrs_vctr",
"double")), V107 = structure(c(3, 4, NA, 4, 2, 4, 4, 3, 3, 4,
3, 3, 4, 4, 4, 4, 3, 2, NA, NA, 3, 2, 2, 2, 2, 2, 2, 3, 3, 3,
3, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 1, 2, 2, 2, 2, 1, 1, 2, 1, 2,
2, 1, 1, 2, 1, 4, 2, 1, 3, 2, 3, 2, 2, 2, 2, 3, 2, 3, 2, 2, NA,
3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 3, 2, 3, 2, 2, 2, 3), label = "Trust: People of another nationality (B)", format.spss = "F3.0", labels = c(`DE,SE:Inapplicable ; RU:Inappropriate response; HT: Dropped` = -5,
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U+00B4>t know` = -1,
`Trust completely` = 1, `Trust somewhat` = 2, `Do not trust very much` = 3,
`Do not trust at all` = 4), class = c("haven_labelled", "vctrs_vctr",
"double")), V248 = structure(c(9, 8, 5, 8, 8, 8, 8, 9, 7, 9,
9, 5, 5, 6, 5, 5, 5, 5, 5, 4, 9, 9, 4, 9, 9, 3, 6, 9, 8, 9, 9,
9, NA, 9, 5, 9, 5, 7, 9, 5, 5, 9, 9, 8, 9, 9, 5, 5, 5, 9, 9,
8, 5, 8, 9, 9, 5, 8, 9, 9, 9, 7, 7, 5, 4, 6, 9, 6, 6, 9, 9, 5,
6, 7, 5, 4, 7, 7, 5, 5, 5, 5, 8, 9, 8, 9, 9, 9, 9, 9, 9, 9, 5,
9, 9, 5, 9, 8, 9, 5, 5), label = "Highest educational level attained", format.spss = "F3.0", labels = c(`AU: Inapplicable (No-school education) DE,SE:Inapplicable ;` = -5,
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U+00B4>t know` = -1,
`No formal education` = 1, `Incomplete primary school` = 2, `Complete primary school` = 3,
`Incomplete secondary school: technical/ vocational type` = 4,
`Complete secondary school: technical/ vocational type` = 5,
`Incomplete secondary school: university-preparatory type` = 6,
`Complete secondary school: university-preparatory type` = 7,
`Some university-level education, without degree` = 8, `University - level education, with degree` = 9
), class = c("haven_labelled", "vctrs_vctr", "double")), V59 = structure(c(9,
5, 6, 8, 6, 7, NA, 8, 5, 3, 4, 7, 2, 1, 1, 6, 8, 6, NA, NA, 1,
5, NA, 6, 1, 2, 9, 5, 6, NA, NA, 3, 6, 6, 4, NA, 6, 6, NA, NA,
3, 9, 8, 10, 9, 6, 10, 9, 8, 9, 9, 10, 6, 4, 4, 6, 4, 10, 3,
3, 4, 3, 5, 4, 7, 3, 3, 4, 3, 7, 4, 6, 4, 1, 1, 6, 1, 1, 6, 1,
1, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 7, 3, 1, 5, 6, 7, 2, 4, 5
), label = "Satisfaction with financial situation of household", format.spss = "F3.0", labels = c(`HT: Dropped out survey;DE,SE:Inapplicable ; RU:Inappropriate` = -5,
`Not asked` = -4, `No answer` = -2, `Don<U+00B4>t know` = -1,
Dissatisfied = 1, `2` = 2, `3` = 3, `4` = 4, `5` = 5, `6` = 6,
`7` = 7, `8` = 8, `9` = 9, Satisfied = 10), class = c("haven_labelled",
"vctrs_vctr", "double")), V237 = structure(c(3, 2, 2, 2, NA,
1, 2, 2, 1, 2, 2, 2, 2, 3, 2, 1, 1, 3, 2, 2, NA, 2, 2, 3, 4,
2, 2, 1, NA, 1, 1, 1, NA, NA, NA, 1, NA, 1, 1, NA, 2, 1, 2, 1,
1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3,
2, 3, 2, 1, 2, 3, 2, 2, 2, NA, 2, 2, 4, 2, 2, 2, 1, 1, 2, 1,
2, 3, 2, 2, 1, 2, 2, 2, 3, 3, 2, 3, 2, 2, NA, 3), label = "Family savings during past year", format.spss = "F3.0", labels = c(`DE,SE:Inapplicable ; RU:Inappropriate response; BH: Missing;` = -5,
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U+00B4>t know` = -1,
`Save money` = 1, `Just get by` = 2, `Spent some savings and borrowed money` = 3,
`Spent savings and borrowed money` = 4), class = c("haven_labelled",
"vctrs_vctr", "double")), V105_rec = c(1, 2, 2, 1, 2, 1, 1, 1,
1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 3, 3, 3, 4, 4, 3, 1,
3, 3, 3, 4, 3, 4, 1, 3, 4, 1, 3, 2, 2, 3, 2, 3, 2, 3, 2, 3, 3,
2, 2, 2, 2, 2, 2, NA, 2, 2, 1, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 3, 3, 2, 2, 2, 3, 2, NA), V106_rec = c(2, 3, NA, 1, 3,
1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, NA, NA, NA, NA, 2, 3, 3, 3,
3, 3, 3, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, 3,
3, 4, 4, 3, 3, 3, 3, 4, 4, 3, 4, 1, 3, 4, 1, 3, 2, 2, 3, 3, 3,
2, 3, 2, 3, 3, NA, 2, NA, 2, 2, 2, 3, 2, 2, 1, 3, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 3, 3, 3, 2), V107_rec = c(2,
1, NA, 1, 3, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 3, NA, NA, 2,
3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 4,
3, 3, 3, 3, 4, 4, 3, 4, 3, 3, 4, 4, 3, 4, 1, 3, 4, 2, 3, 2, 3,
3, 3, 3, 2, 3, 2, 3, 3, NA, 2, 3, 2, 2, 2, 3, 2, 2, 2, 3, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 2, 3, 2, 3, 3, 3, 2), outgroup_index = c(1.66666666666667,
2, 2, 1, 2.66666666666667, 1, 1, 1.66666666666667, 1.66666666666667,
1, 1.66666666666667, 2, 1, 1, 1, 1, 1.5, 2.5, 2, 2, 2, 3, 3,
3, 3, 3, 2.66666666666667, 2, 2, 2, 2, 1.33333333333333, 1.33333333333333,
2, 2, 2, 2, 2, 2, 2, 2, 2.66666666666667, 2, 3, 3, 3, 4, 4, 3,
2.66666666666667, 3, 3, 3.66666666666667, 4, 3, 4, 1, 3, 4, 1.33333333333333,
3, 2, 2.33333333333333, 3, 2.66666666666667, 3, 2, 3, 2, 3, 3,
2, 2, 2.5, 2, 2, 2, 3, 2, 2, 1.33333333333333, 3, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 3, 2.66666666666667, 2.66666666666667, 2,
2.66666666666667, 3, 2.66666666666667, 2), V59_rec = structure(c(5,
3, 3, 4, 3, 4, NA, 4, 3, 2, 2, 4, 1, 1, 1, 3, 4, 3, NA, NA, 1,
3, NA, 3, 1, 1, 5, 3, 3, NA, NA, 2, 3, 3, 2, NA, 3, 3, NA, NA,
2, 5, 4, 5, 5, 3, 5, 5, 4, 5, 5, 5, 3, 2, 2, 3, 2, 5, 2, 2, 2,
2, 3, 2, 4, 2, 2, 2, 2, 4, 2, 3, 2, 1, 1, 3, 1, 1, 3, 1, 1, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 4, 2, 1, 3, 3, 4, 1, 2, 3), labels = c(`Not satisfied at all` = 1,
`Rather not satisfied` = 2, `Neither satisfied, nor not satisfied` = 3,
`Rather satisfied` = 4, Satisfied = 5), class = c("haven_labelled",
"vctrs_vctr", "double")), V248_dummy = structure(c(1, 1, 0, 1,
1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1,
0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1,
0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1,
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0), labels = c(`A university education and higher` = 1,
`No university education` = 0), class = c("haven_labelled", "vctrs_vctr",
"double")), V237_rec = structure(c(3, 2, 2, 2, NA, 1, 2, 2, 1,
2, 2, 2, 2, 3, 2, 1, 1, 3, 2, 2, NA, 2, 2, 3, 3, 2, 2, 1, NA,
1, 1, 1, NA, NA, NA, 1, NA, 1, 1, NA, 2, 1, 2, 1, 1, 1, 1, 1,
1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 2, 3, 2, 1,
2, 3, 2, 2, 2, NA, 2, 2, 3, 2, 2, 2, 1, 1, 2, 1, 2, 3, 2, 2,
1, 2, 2, 2, 3, 3, 2, 3, 2, 2, NA, 3), labels = c(`Save money` = 1,
`Just get by` = 2, `Spent savings and borrowed money` = 3), class = c("haven_labelled",
"vctrs_vctr", "double"))), row.names = c(NA, -101L), class = c("tbl_df",
"tbl", "data.frame"), label = "filelabel")
A bit unintuitive, but ggplot2 has the functionality you are looking for.
filtered_df %>%
mutate(index_recoded = ggplot2::cut_interval(outgroup_index, 3))
And to verify the levels are the same:
# smaller dput would be nice
start <- Data
all(
{
filtered_df <- start
tertiles <- quantile(filtered_df$outgroup_index, c(0:3) / 3)
filtered_df$index_recoded <- with(
filtered_df,
cut(outgroup_index,
tertiles,
include.lowest = T)
)
filtered_df$index_recoded
} == {
tv_df <- start
tv_df %>%
mutate(index_recoded = ggplot2::cut_interval(outgroup_index, 3)) %>%
pull(index_recoded)
}
)
[1] TRUE
cut has a simpler syntax if you want to divide the data into fixed intervals.
filtered_df$index_recoded <- cut(filtered_df$outgroup_index, 3)
You can also use it with labels = FALSE to get 1, 2 and 3 as output.
filtered_df$index_recoded <- cut(filtered_df$outgroup_index, 3, labels = FALSE)

How to convert igraph file in row/colums?

I would like to pass the information I have to a normal list of axes with nodes but I don't know how to do it. The raw data with "deput" would look like this. If someone knows how to convert this list into something easier to use I would appreciate it.I can visualise the graph with "plot" but to edit it I need to have more precise information.
library(igraph)
dput (net2$graph_pajek)
structure(list(30, FALSE, c(1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 6, 7, 13, 13,
14, 15, 16, 18, 20, 20, 21, 27, 27, 27, 27, 29, 2, 2, 2, 2, 2,
2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5,
6, 6, 7, 8, 8, 9, 9, 9, 10, 10, 10, 10, 10, 11, 11, 12, 12, 12,
13, 13, 13, 14, 14, 14, 15, 15, 15, 16, 18, 18, 18, 19, 20, 20,
21, 21, 23, 24, 25, 26, 27, 27, 27, 29, 3, 3, 3, 3, 3, 3, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5,
5, 5, 5, 5, 6, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 10,
10, 10, 10, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 13, 13, 14, 14, 15, 15, 15, 15, 15), list(c(1, 0, 1), structure(list(), .Names = character(0)),
list(name = c("A", "B", "C",
"D", "E", "F", "G", "H",
"I", "J", "K",
"L", "M", "N",
"O", "P", "Q", "R",
"S", "T", "U",
"V", "W", "X", "Y", "Z",
"AB", "AC", "AD", "AE"
), deg = c(248, 532, 855, 574, 1761, 261, 229, 216, 554,
628, 774, 223, 502, 295, 266, 910, 227, 312, 364, 260, 294,
741, 227, 471, 392, 376, 292, 295, 212, 287), size = c(2.,
6, 9, 6, 20,
2, 2, 2, 6,
7, 8, 2, 7,
3, 3, 10, 2,
3, 4, 2, 3.,
8, 2, 5, 4,
4, 3, 3, 2,
3), label.cex = c(0.7, 0.7, 0.7, 0.7, 0.7, 0.7,
0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7,
0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7
), id = c("A", "B", "C",
"D", "E", "F", "G", "H",
"I", "J", "K",
"L", "M", "N",
"O", "P", "Q", "R",
"S", "T", "U",
"V", "W", "X", "Y", "Z",
"AB", "AC", "AD", "AE"
)), list(num = c(4, 4, 4, 4, 7, 7, 7, 7, 7, 7, 7, 3, 3, 3,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 3, 3, 3, 1, 1, 2,
2, 1, 1, 1, 1, 2, 2, 1, 4, 4, 4, 4, 1, 7, 7, 7, 7, 7, 7,
7, 6, 6, 6, 6, 6, 6, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 1, 2, 2, 1, 2, 2, 3, 3, 3, 5, 5, 5, 5, 5, 2,
2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, 3, 3, 1, 2,
2, 2, 2, 1, 1, 1, 1, 3, 3, 3, 1, 6, 6, 6, 6, 6, 6, 40, 40,
40, 40, 40, 40, 40, 40, 40), weight = c(4, 4, 4, 4,
7, 7, 7, 7, 7, 7, 7, 3, 3, 3, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 3, 3, 3, 1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 4,
4, 4, 4, 1, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 1, 2, 2, 1, 2, 2,
3, 3, 3, 5, 5, 5, 5, 5, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 1, 3, 3, 3, 1, 2, 2, 2, 2, 1, 1, 1, 1, 3, 3, 3,
1, 6, 6, 6, 6, 6, 6, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 7, 7, 7, 7, 7, 7, 7, 1, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7,
4, 4, 4, 4, 4, 4, 4, 4, 1, 18, 18))), <environment>), class = "igraph")
Are you looking for something like get.data.frame
> get.data.frame(net)
from to weight
1 A B 0.63502922
2 B C 0.79410173
3 C D 0.90802625
4 D E 0.09408188
5 E F 0.16450634
6 F G 0.75931882
7 G H 0.30409658
8 H I 0.23990324
9 I J 0.84762277
10 A J 0.88657718
data
Since I cannot reproduce the example in your post, I created a dummy example net like below
net <- make_ring(10) %>%
set_vertex_attr(name = "name", value = LETTERS[1:vcount(.)]) %>%
set_edge_attr(name = "weight", value = runif(ecount(.)))
To clarify a couple things:
The igraph file is not a plot per se, but a graph structure (as in, nodes and edges).
igraph has functions for plotting graphs, but there is no single and standard way of plotting a graph - instead, different algorithms can be used to determine visually-ideal ways of displaying them, and these algorithms oftentimes rely on random initializations.
The outputs from the plotting functions of igraph are only relevant in terms of R base plot drawing logic, AFAIK they don't use an intermediate format with coordinates handled in a user-comprehensible structure. You can nevertheless manage lots of aspects of how they are drawn - see ?igraph::igraph.plotting.

R Multiple T-test: Grouping factor must have 2 variables

I'm trying to compare a control group with an experimental group on a range of variable to show that they are similar (baseline).
I thus need to do multiple t-test (unpaired/ Welch t-test). My data is in a long format with the first variable called "Group" with either a number 1 or a number 2. There are some missing values in some of my other variables but it's pretty random.
So when I run t-test manually using this line of code:
t.test(variable_1 ~ Group,df)
it works.
I then tried to do it all at once using this line of code:
sapply(df[,2:71], function(i) t.test(i ~ df$Group)$p.value)
But I get the following error:
grouping factor must have exactly 2 levels
Could anyone help?
Here is what the structure looks like
structure(list(Group = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 2, 2), EM_Accuracy_Time_Airport = c(3, 3, 0,
1, 1, 2, 2, 1, 1, 3, 3, 2, 2, 2, 1, 3, 1, 3, 1, 1), EM_Accuracy_Place_Airport = c(2,
2, 1, 2, 1, 2, 2, 1, 1, 2, 0, 2, 2, 0, 2, 2, 2, 1, 1, 1), EM_Accuracy_Expl_Airport = c(2,
2, 2, 0, 2, 2, 2, 1, 2, 2, 2, 2, 2, 0, 0, 1, 0, 2, 2, 1), EM_Accuracy_Death_Airport = c(0,
2, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0), EM_Accuracy_Time_Metro = c(3,
1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 2, 1, 3, 1, 1, 2, 1, 3, 3), EM_Accuracy_Death_Metro = c(3,
0, 1, 0, 1, 1, 0, 0, 0, 3, 0, 0, 1, 0, 3, 1, 1, 1, 0, 0), EM_Accuracy_PC_Time_Airpot = c(100,
100, 0, 33.3333333333333, 33.3333333333333, 66.6666666666667,
66.6666666666667, 33.3333333333333, 33.3333333333333, 100, 100,
66.6666666666667, 66.6666666666667, 66.6666666666667, 33.3333333333333,
100, 33.3333333333333, 100, 33.3333333333333, 33.3333333333333
), EM_Accuracy_PC_Place_Airport = c(100, 100, 50, 100, 50, 100,
100, 50, 50, 100, 0, 100, 100, 0, 100, 100, 100, 50, 50, 50),
EM_Accuracy_PC_Expl_Airport = c(100, 100, 100, 0, 100, 100,
100, 50, 100, 100, 100, 100, 100, 0, 0, 50, 0, 100, 100,
50), EM_Accuracy_PC_Death_Airport = c(0, 66.6666666666667,
0, 0, 33.3333333333333, 66.6666666666667, 0, 0, 0, 0, 0,
0, 66.6666666666667, 0, 0, 0, 100, 0, 0, 0), EM_Accuracy_PC_Time_Metro = c(100,
33.3333333333333, 0, 0, 33.3333333333333, 33.3333333333333,
0, 33.3333333333333, 33.3333333333333, 33.3333333333333,
33.3333333333333, 66.6666666666667, 33.3333333333333, 100,
33.3333333333333, 33.3333333333333, 66.6666666666667, 33.3333333333333,
100, 100), EM_Accuracy_PC_Death_Metro = c(100, 0, 33.3333333333333,
0, 33.3333333333333, 33.3333333333333, 0, 0, 0, 100, 0, 0,
33.3333333333333, 0, 100, 33.3333333333333, 33.3333333333333,
33.3333333333333, 0, 0), EM_ACCURACY_PC = c(83.3333333333333,
66.6666666666667, 30.5555555555556, 22.2222222222222, 47.2222222222222,
66.6666666666666, 44.4444444444444, 27.7777777777778, 36.1111111111111,
72.2222222222222, 38.8888888888889, 55.5555555555555, 66.6666666666666,
27.7777777777778, 44.4444444444444, 52.7777777777778, 55.5555555555556,
52.7777777777778, 47.2222222222222, 38.8888888888889), EM_Certainty_Time_Airport = c(3,
1, 1, 1, 2, 2, 1, 1, 2, 3, 3, 2, 2, 2, 4, 2, 3, 3, 2, 2),
EM_Certainty__Place_Airport = c(3, 4, 2, 2, 2, 2, 4, 1, 3,
4, 4, 4, 4, 3, 3, 4, 4, 3, 2, 3), EM_Certainty__Expl_Airport = c(4,
2, 3, 1, 2, 3, 2, 1, 2, 4, 1, 3, 2, 2, 1, 3, 1, 2, 2, 3),
EM_Certainty__Death_Airport = c(1, 1, NA, 1, 2, 1, 3, 1,
2, 3, NA, 3, 2, 1, 2, 1, 1, 1, 4, 4), EM_Certainty__Time_Metro = c(3,
3, 1, 1, 2, 2, 2, 1, 3, 2, 3, 2, 3, 2, 2, 2, 3, 1, 2, 2),
EM_Certainty__Death_Metro = c(2, 1, 1, NA, 2, 1, 1, 1, 2,
1, NA, 3, 2, 1, 1, 1, 1, 1, 1, 4), EM_CERTAINTY = c(2.66666666666667,
2, 1.6, 1.2, 2, 1.83333333333333, 2.16666666666667, 1, 2.33333333333333,
2.83333333333333, 2.75, 2.83333333333333, 2.5, 1.83333333333333,
2.16666666666667, 2.16666666666667, 2.16666666666667, 1.83333333333333,
2.16666666666667, 3), EM_CONFIDENCE = c(5, 5, 1, 2, 2, 4,
5, 2, 3, 4, 5, 5, 3, 3, 4, 4, 3, 2, 3, 2), FBM_CONFIDENCE = c(4,
6, 7, 7, 5, 4, 2, 7, 5, 6, 6, 7, 6, 7, 3, 6, 6, 4, 5, 6),
FBM_Vividness_Time = c(3, 3, 1, 4, 3, 2, 4, 3, 4, 4, 1, 3,
4, 4, 3, 3, 3, 2, 4, 3), FBM_Vividness_How = c(4, 4, 2, 4,
4, 3, 4, 4, 4, 4, 3, 4, 3, 4, 4, 4, 4, 4, 4, 4), FBM_Vividness_Where = c(4,
4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4),
FBM_Vividness_WithWhom = c(4, 4, 3, 4, 3, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4), FBM_Vividness_WereDoing = c(4,
4, 1, 4, 3, 4, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4),
FBM_Vividness_Did_After = c(4, 4, 3, 4, 2, 3, 4, 4, 2, 4,
1, 4, 4, 4, 3, 4, 4, 3, 4, 4), FBM_VIVIDNESS = c(3.83333333333333,
3.83333333333333, 2, 4, 3.16666666666667, 3.33333333333333,
4, 3.83333333333333, 3.66666666666667, 4, 2.33333333333333,
3.83333333333333, 3.83333333333333, 4, 3.66666666666667,
3.83333333333333, 3.83333333333333, 3.5, 4, 3.83333333333333
), FBM_Details_NB_T2 = c(3, 5, 0, 5, 5, 5, 2, 5, 1, 5, 3,
5, 5, 5, 2, 4, 2, 3, 5, 5), P_Novelty_5 = c(5, 6.2, 6.5,
5.6, 4.8, 5.4, 4, 4.2, 4.4, 5.8, 3.4, 5.8, 6, 5.8, 3.8, 6.4,
6.8, 6.6, 7, 3), P_Suprise_emotion = c(6, 6, 6, 6, 4, 5,
1, 7, 1, 5, 4, 5, 7, 7, 6, 4, 7, 7, 2, 5), P_Surprise_Expected = c(1,
3, 5, 2, 4, 3, 6, 2, 2, 1, 6, 4, 3, 1, 5, 1, 1, 1, 5, 4),
P_Surprise_Unbelievable = c(5, 4, 1, 6, 4, 4, 2, 7, 1, 4,
1, 6, 7, 7, 6, 3, 7, 7, 5, 3), `P_Consequence-Importance_5` = c(5.6,
4.8, 3.4, 5, 4.8, 4, 5, 5.4, 3, 5.2, 6.8, 5.4, 4, 4.4, 6,
3.8, 4, 4.8, 5, 5.2), P_Emotional_Intensity_4 = c(5.25, 5.75,
3, 4.75, 4.75, 6, 4, 5.25, 2.5, 5.5, 7, 6.5, 5.75, 6.75,
6.75, 6, 6.25, 6, 5, 2.5), P_Social_Sharing_6 = c(3.66666666666667,
3.83333333333333, 3.4, 3.16666666666667, 3, 3.33333333333333,
3.8, 3.16666666666667, 2.16666666666667, 4.16666666666667,
4, 4.5, 4.5, 4.33333333333333, 4, 3.16666666666667, 3.66666666666667,
4, NA, NA), P_Media_3 = c(4.66666666666667, 4, 3, 2.66666666666667,
2.66666666666667, 2.33333333333333, 3, 2.33333333333333,
2.33333333333333, 3.33333333333333, 4.33333333333333, 5,
4.33333333333333, 5, 4, 2, 3, 3.33333333333333, 2, 1.66666666666667
), P_Ruminations = c(3, NA, 3, 2, 4, NA, 4, 2, 1, 4, 4, 4,
2, 4, 2, 3, 3, 3, 4, 3), P_Novelty_Common_rev = c(6, 7, 7,
7, 4, 6, 4, 7, 2, 6, 3, 7, 7, 7, 3, 6, 7, 7, 7, 3), P_Novelty_Unusual = c(2,
5, 7, 7, 3, 5, 3, 3, 5, 6, 1, 4, 7, 1, 4, 6, 6, 6, 7, 2),
P_Novelty_Special = c(6, 6, NA, 6, 5, 5, 4, 3, 5, 4, 1, 5,
6, 7, 4, 6, 7, 7, 7, 3), P_Novelty_Singular = c(4, 6, 5,
1, 5, 5, 4, 1, 3, 6, 5, 6, 4, 7, 3, 7, 7, 6, 7, 2), P_Novelty_Ordinary_rev = c(7,
7, 7, 7, 7, 6, 5, 7, 7, 7, 7, 7, 6, 7, 5, 7, 7, 7, 7, 5),
P_Consequence = c(6, 7, 5, 4, 5, 4, 5, 3, 5, 5, 7, 5, 5,
2, 6, 6, 1, 4, 6, 3), P_Importance_self = c(4, 3, 3, 4, 4,
3, 5, 6, 1, 5, 7, 5, 3, 3, 5, 2, 2, 4, 5, 3), `P_Importance_friends&family` = c(4,
4, 3, 4, 4, 4, 4, 6, 1, 5, 6, 5, 3, 3, 5, 2, 6, 4, 5, 10),
P_Importance_Belgium = c(7, 5, 3, 7, 6, 5, 6, 7, 3, 7, 7,
7, 5, 7, 7, 5, 6, 7, 6, 6), P_Importance_International = c(7,
5, 3, 6, 5, 4, 5, 5, 5, 4, 7, 5, 4, 7, 7, 4, 5, 5, 3, 4),
P_Emotional_Intensity_Upset = c(4, 5, NA, 3, 3, 5, 3, 5,
2, 5, 7, 5, 5, 6, 7, 6, 6, 5, 5, 3), P_Emotional_Intensity_Indiferent_rev = c(7,
7, 5, 7, 6, 7, 4, 6, 4, 7, 7, 7, 7, 7, 7, 7, 7, 7, NA, 4),
P_Emotional_Intensity_Affected = c(6, 6, 3, 5, 5, 6, 5, 6,
2, 5, 7, 7, 5, 7, 7, 6, 6, 6, NA, 2), P_Emotional_Intensity_Shaken = c(4,
5, 1, 4, 5, 6, 4, 4, 2, 5, 7, 7, 6, 7, 6, 5, 6, 6, 5, 1),
P_Rehearsal_Media_TV = c(5, 3, NA, 3, 2, 3, NA, 1, 1, 4,
3, 5, 5, 5, 2, 3, 2, 2, 2, 2), P_Rehearsal_Media_Internet = c(4,
4, 1, 3, 2, 2, 2, 4, 3, 2, 5, 5, 3, 5, 5, 1, 5, 4, 2, 1),
P_Rehearsal_Media_Social_Networks = c(5, 5, 5, 2, 4, 2, 4,
2, 3, 4, 5, 5, 5, 5, 5, 2, 2, 4, 2, 2), P_Social_Sharing_How_Often = c(4,
5, 4, 4, 4, 3, 3, 3, 3, 5, 4, 5, 5, 5, 5, 3, 4, 4, 5, NA),
P_Social_Sharing_With_How_Many_People = c(5, 4, NA, 3, 3,
3, 3, 3, 2, 5, 3, 5, 5, 3, 5, 3, 3, 4, 3, NA), PK_Shops_YN = c(0,
1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1),
PK_Comic = c(0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0,
0, 0, 0, 1, 0), PK_Hotel = c(0, 0, 0, 0, 1, 0, 0, 0, 0, 1,
0, 0, 1, 1, 0, 0, 0, 0, 0, 0), PK_Decoration_Maelbeek = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1),
PK_Stations_before_after_Maelbeek = c(0, 0.5, 0, 0, 0, 0,
0, 0, 0.5, 1, 0, 0, 0.5, 0.5, 0, 0, 0.5, 0, 0.5, 0), PK_TOTAL_PC = c(0,
50, 0, 40, 40, 40, 20, 0, 10, 60, 20, 40, 90, 70, 20, 0,
30, 20, 70, 40), SI_Attachment_BXL = c(6, 4, 1, 4, 2, 5,
1, 6, 5, 4, 2, 6, 6, 7, 1, 3, 6, 4, 5, 4), SI_Pride_BXL = c(1,
2, 1, 2, 1, 2, 1, 5, 1, 6, 1, 1, 7, 7, 1, 2, 6, 1, 3, 3),
SI_Attachment_Belgium = c(7, 3, 5, 5, 4, 6, 7, 6, 5, 6, 7,
7, 7, 7, 5, 6, 7, 6, 4, 2), SI_Pride_Belgium = c(7, 2, 6,
4, 2, 6, 4, 5, 1, 5, 1, 6, 7, 7, 5, 7, 7, 6, 2, 2), SI_Attachment_EU = c(6,
4, 2, 5, 4, 4, 5, 4, 7, 4, 1, 6, 7, 7, 5, 4, 6, 6, 2, 6),
SI_Pride_EU = c(7, 1, 1, 4, 3, 4, 4, 4, 1, 4, 1, 6, 7, 7,
4, 3, 6, 6, 2, 4)), .Names = c("Group", "EM_Accuracy_Time_Airport",
"EM_Accuracy_Place_Airport", "EM_Accuracy_Expl_Airport", "EM_Accuracy_Death_Airport",
"EM_Accuracy_Time_Metro", "EM_Accuracy_Death_Metro", "EM_Accuracy_PC_Time_Airpot",
"EM_Accuracy_PC_Place_Airport", "EM_Accuracy_PC_Expl_Airport",
"EM_Accuracy_PC_Death_Airport", "EM_Accuracy_PC_Time_Metro",
"EM_Accuracy_PC_Death_Metro", "EM_ACCURACY_PC", "EM_Certainty_Time_Airport",
"EM_Certainty__Place_Airport", "EM_Certainty__Expl_Airport",
"EM_Certainty__Death_Airport", "EM_Certainty__Time_Metro", "EM_Certainty__Death_Metro",
"EM_CERTAINTY", "EM_CONFIDENCE", "FBM_CONFIDENCE", "FBM_Vividness_Time",
"FBM_Vividness_How", "FBM_Vividness_Where", "FBM_Vividness_WithWhom",
"FBM_Vividness_WereDoing", "FBM_Vividness_Did_After", "FBM_VIVIDNESS",
"FBM_Details_NB_T2", "P_Novelty_5", "P_Suprise_emotion", "P_Surprise_Expected",
"P_Surprise_Unbelievable", "P_Consequence-Importance_5", "P_Emotional_Intensity_4",
"P_Social_Sharing_6", "P_Media_3", "P_Ruminations", "P_Novelty_Common_rev",
"P_Novelty_Unusual", "P_Novelty_Special", "P_Novelty_Singular",
"P_Novelty_Ordinary_rev", "P_Consequence", "P_Importance_self",
"P_Importance_friends&family", "P_Importance_Belgium", "P_Importance_International",
"P_Emotional_Intensity_Upset", "P_Emotional_Intensity_Indiferent_rev",
"P_Emotional_Intensity_Affected", "P_Emotional_Intensity_Shaken",
"P_Rehearsal_Media_TV", "P_Rehearsal_Media_Internet", "P_Rehearsal_Media_Social_Networks",
"P_Social_Sharing_How_Often", "P_Social_Sharing_With_How_Many_People",
"PK_Shops_YN", "PK_Comic", "PK_Hotel", "PK_Decoration_Maelbeek",
"PK_Stations_before_after_Maelbeek", "PK_TOTAL_PC", "SI_Attachment_BXL",
"SI_Pride_BXL", "SI_Attachment_Belgium", "SI_Pride_Belgium",
"SI_Attachment_EU", "SI_Pride_EU"), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
The error you get means that there's a problem in your dataset, with at least one of your variables.
Here's a process to help you spot problematic variables:
library(tidyverse)
df %>%
group_by(Group) %>% # for each group value
summarise_all(~sum(!is.na(.))) %>% # count non NA values for each variable
gather(var,value,-Group) %>% # reshape
spread(Group, value, sep = "_") %>% # reshape
filter(Group_2 < 2) # get problematic variables
# # A tibble: 5 x 3
# var Group_1 Group_2
# <chr> <int> <int>
# 1 P_Emotional_Intensity_Affected 18 1
# 2 P_Emotional_Intensity_Indiferent_rev 18 1
# 3 P_Social_Sharing_6 18 0
# 4 P_Social_Sharing_How_Often 18 1
# 5 P_Social_Sharing_With_How_Many_People 17 1
0 counts will throw an error about needing two levels in your grouping variables.
1 count will throw an error about needing more observations in one of your groups.
After spotting those you have to treat them accordingly and then your original t.test code should work.
So my problem was just missing data in one variable.
However, if you are looking at doing multiple T-test in a long format: this line of code works:
sapply(df[,2:71], function(i) t.test(i ~ df$Group)$p.value)

How to specify predictor matrix for stan data block?

Dear stackoverflow community. I want to use the variables w1 to w10 as predictor matrix matrix[N, W] weights; in my stan model. I am not certain how to accomplish that.
data frame
(dat <- data.frame(
id = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4),
imput = c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5),
A = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
B = c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0),
Pass = c(278, 278, 278, 278, 278, 100, 100, 100, 100, 100, 153, 153, 153, 153, 153, 79, 79, 79, 79, 79),
Fail = c(740, 743, 742, 743, 740, 7581, 7581, 7581, 7581, 7581, 1231, 1232, 1235, 1235, 1232, 1731, 1732, 1731, 1731, 1731),
W_1= c(4, 3, 4, 3, 3, 1, 2, 1, 2, 1, 12, 12, 11, 12, 12, 3, 5, 3, 3, 3),
W_2= c(3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_3= c(4, 3, 3, 3, 3, 1, 2, 1, 1, 1, 12, 12, 11, 12, 12, 3, 3, 3, 3, 3),
W_4= c(3, 3, 4, 3, 3, 1, 1, 1, 2, 1, 12, 12, 13, 12, 12, 3, 2, 3, 3, 3),
W_5= c(3, 3, 3, 3, 3, 1, 0, 1, 1, 1, 12, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_6= c(4, 3, 3, 3, 3, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_7= c(3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_8= c(3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 15, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_9= c(3, 3, 3, 4, 3, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 2, 3, 3, 3, 3),
W_10= c(3, 3, 4, 3, 3, 1, 1, 1, 1, 1, 12, 10, 12, 12, 12, 3, 3, 3, 3, 3)
))
creating list
N <- nrow(dat)
ncases <- dat$Pass
nn <- dat$Fail + dat$Pass
A <- dat$A
B <- dat$B
id <- dat$id
imput <- dat$imput
w_1 <- dat$W_1
w_2 <- dat$W_2
w_3 <- dat$W_3
w_4 <- dat$W_4
w_5 <- dat$W_5
w_6 <- dat$W_6
w_7 <- dat$W_7
w_8 <- dat$W_8
w_9 <- dat$W_9
w_10 <- dat$W_10
dat1 <- list (N = N,
ncases = ncases, A = A, B = B, id = id, P = imput, nn = nn,
w1 = w_1, w2 = w_2, w3 = w_3, w4 = w_4, w5 = w_5,
w6 = w_6, w7 = w_7, w8 = w_8, w9 = w_9, w10 = w_10)
data block
data{
int N; // number of observations
int ncases[N]; // independent variable
int A[N]; // independent variable
int B[N]; // independent variable
int nn[N]; // independent variable
int id[N]; //individual id
int W[N]; //vector of weights
int P[N]; // number of imputations
matrix[N, W] weights; // design matrix of weights
}
Thank you in advance for any help.
If W in the data block is actually an int (rather than a vector; i.e., W is the number of columns in weights), then I would expect this to do what you need:
dat1 <- list (N = N,
ncases = ncases, A = A, B = B, id = id, P = imput, nn = nn, W = 10,
weights = cbind(w_1, w_2, w_3, w_4, w_5, w_6, w_7, w_8, w_9, w_10))

Resources