Related
Is it possible to randomly sample patients by group so that they have similar distributions based on other variables? To me, this sounds like a matching problem, but there's no "treatment" here, so I'm not sure if the concept applies.
Sample data:
structure(list(id = c(8350L, 22543L, 24144L, 9392L, 27648L, 2943L,
34686L, 27153L, 11143L, 15209L, 11952L, 22669L, 8211L, 27765L,
28671L, 9693L, 30274L, 25807L, 14839L, 22400L, 24494L, 6540L,
6861L, 31825L, 34190L, 19606L, 21077L, 5037L, 25943L, 20530L,
23730L, 34774L, 7210L, 2051L, 28410L, 18318L, 34848L, 26596L,
8973L, 24885L, 9652L, 8387L, 16168L, 36893L, 24048L, 17769L,
1273L, 22734L, 36796L, 25497L, 28300L, 166L, 21172L, 20026L,
16265L, 1699L, 33140L, 23997L, 10216L, 27408L, 6813L, 10196L,
15015L, 2748L, 34979L, 21763L, 27438L, 6255L, 17047L, 30593L,
30723L, 7914L, 218L, 20134L, 29952L, 27126L, 3795L, 1367L, 33585L,
5940L, 26250L, 22519L, 35611L, 26168L, 26848L, 21276L, 8971L,
22554L, 16655L, 5315L, 18121L, 32526L, 21513L, 9262L, 36882L,
7408L, 18873L, 17238L, 15216L, 23667L, 30138L, 2978L, 25451L,
2492L, 30983L, 7677L, 22880L, 29674L, 7093L, 24910L, 20839L,
18176L, 23031L, 17197L, 4613L, 35801L, 30822L, 3889L, 11752L,
11314L, 22317L, 12825L, 17433L, 4407L, 3986L, 10173L, 32409L,
2697L, 3410L, 26834L, 3203L, 5474L, 34678L, 35336L, 19462L, 15835L,
7888L, 27897L, 9245L, 16524L, 13316L, 21604L, 30458L, 9191L,
1220L, 1779L, 1724L, 26382L, 11566L, 21310L, 12600L, 25063L,
30912L, 31189L, 9480L, 16804L, 2372L, 26238L, 20113L, 33753L,
32711L, 11543L, 10578L, 4475L, 13187L, 23395L, 35342L, 6903L,
26905L, 12026L, 5697L, 15352L, 33985L, 1132L, 15806L, 13611L,
29930L, 15896L, 6057L, 10849L, 12944L, 25561L, 3328L, 27481L,
28790L, 3260L, 24986L, 22177L, 26580L, 11639L, 2256L, 4839L,
22805L, 616L, 6702L, 18360L, 4439L, 1300L, 33779L, 24940L, 10043L,
21268L, 35127L, 36621L, 17618L, 6688L, 15937L, 31057L, 2144L,
30866L, 12500L, 29753L, 36497L, 21247L, 9481L, 36465L, 20665L,
15017L, 21234L, 34258L, 576L, 31187L, 4528L, 15314L, 3657L, 24489L,
33871L, 106L, 24916L, 2524L, 17469L, 2799L, 13311L, 26585L, 7131L,
21401L, 6191L, 22338L, 11647L, 11681L, 22744L, 14000L, 5356L,
2892L, 24481L, 24116L, 21461L, 13992L, 22751L, 11129L, 8802L,
29963L, 4660L, 29020L, 20843L, 21796L, 3607L, 10692L, 29168L,
25034L, 3307L, 35010L, 20280L, 31894L, 7276L, 24259L, 34059L,
35867L, 11165L, 16010L, 34082L, 26586L, 30958L, 25030L, 34851L,
29185L, 25721L, 8968L, 29427L, 20213L, 34667L, 28721L, 21472L,
17132L, 35247L, 9798L, 36826L, 21226L, 28335L, 16077L, 2654L,
20466L, 21324L, 36969L, 22553L, 5895L, 16514L, 10644L, 4376L,
13592L, 11206L, 32440L, 13413L, 31416L, 22540L, 15986L, 11506L,
16928L, 18652L, 17858L, 13522L, 8566L, 10665L, 29442L, 28219L,
22549L, 2209L, 8017L, 6066L, 21718L, 21930L, 11540L, 4100L, 35236L,
240L, 24900L, 425L, 26880L, 21409L, 18885L, 5803L, 33335L, 25597L,
12547L, 8930L, 4328L, 17360L, 4696L, 25198L, 26469L, 14679L,
1691L, 32989L, 6099L, 14427L, 31797L, 23408L, 29296L, 23928L,
31889L, 31737L, 6420L, 11304L, 34798L, 20785L, 9806L, 35018L,
35008L, 1450L, 3246L, 15123L, 19603L, 8519L, 32012L, 3397L, 11682L,
27102L, 18022L, 20408L, 15836L, 18284L, 12897L, 29580L, 14510L,
23925L, 28821L, 35825L, 14922L, 36643L, 10948L, 4220L, 23791L,
65L, 35772L, 1423L, 29386L, 755L, 23627L, 27201L, 12353L, 3578L,
1914L, 35373L, 16702L, 13057L, 3021L, 27531L, 1990L, 205L, 21559L,
29081L, 26301L, 18894L, 3088L, 9782L, 10522L, 12570L, 8948L,
36240L, 33943L, 33022L, 2750L, 32649L, 30134L, 13920L, 11498L,
8314L, 16849L, 15559L, 22529L, 31406L, 5680L, 17908L, 14931L,
2122L, 2581L, 33546L, 12143L, 17220L, 16713L, 7454L, 13659L,
15973L, 20116L, 27689L, 35285L, 36106L, 21834L, 29850L, 29030L,
7957L, 31698L, 12307L, 23642L, 5615L, 12016L, 1161L, 15291L,
32738L, 1089L, 32988L, 33382L, 3642L, 18661L, 35584L, 8009L,
24000L, 30587L, 25870L, 19944L, 34970L, 29983L, 24774L, 28702L,
21199L, 17292L, 29831L, 476L, 18881L, 29923L, 31476L, 4570L,
31081L, 10544L, 3373L, 13435L, 22651L, 17861L, 3818L, 35387L,
11459L, 35637L, 308L, 35697L, 12696L, 15175L, 7990L, 16691L,
19494L, 9008L, 30695L, 28889L, 446L, 22178L, 13000L, 26166L,
15431L, 19332L, 35991L, 2840L), race_f = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 4L, 2L, 3L, 4L, 1L, 1L, 3L, 3L, 3L, 3L, 1L,
3L, 1L, 3L, 3L, 1L, 1L, 3L, 2L, 2L, 1L, 4L, 5L, 1L, 4L, 1L, 1L,
5L, 1L, 1L, 3L, 2L, 3L, 3L, 1L, 1L, 1L, 2L, 1L, 3L, 2L, 1L, 1L,
2L, 1L, 3L, 1L, 2L, 1L, 1L, 1L, 2L, 3L, 3L, 1L, 1L, 3L, 3L, 3L,
1L, 1L, 1L, 3L, 3L, 2L, 1L, 1L, 3L, 4L, 4L, 1L, 1L, 3L, 1L, 2L,
3L, 4L, 1L, 1L, 1L, 3L, 1L, 1L, 5L, 3L, 1L, 1L, 3L, 2L, 1L, 1L,
3L, 1L, 4L, 1L, 1L, 3L, 1L, 4L, 3L, 1L, 1L, 1L, 1L, 2L, 1L, 2L,
3L, 3L, 4L, 4L, 1L, 2L, 1L, 4L, 3L, 3L, 3L, 1L, 1L, 1L, 3L, 1L,
1L, 1L, 1L, 3L, 3L, 3L, 2L, 3L, 1L, 4L, 5L, 1L, 4L, 3L, 3L, 3L,
1L, 2L, 1L, 2L, 2L, 4L, 1L, 1L, 2L, 3L, 1L, 1L, 1L, 4L, 1L, 5L,
2L, 1L, 2L, 3L, 1L, 5L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 3L, 3L, 3L,
1L, 4L, 4L, 3L, 2L, 4L, 2L, 1L, 3L, 3L, 1L, 4L, 3L, 3L, 3L, 1L,
1L, 4L, 1L, 4L, 2L, 3L, 3L, 1L, 3L, 3L, 1L, 1L, 1L, 4L, 4L, 1L,
3L, 4L, 1L, 3L, 1L, 1L, 4L, 3L, 4L, 1L, 3L, 1L, 2L, 4L, 3L, 3L,
1L, 1L, 3L, 1L, 5L, 1L, 1L, 1L, 3L, 1L, 3L, 3L, 2L, 1L, 4L, 3L,
3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
3L, 1L, 1L, 1L, 4L, 1L, 4L, 3L, 1L, 3L, 2L, 1L, 1L, 2L, 3L, 1L,
4L, 2L, 3L, 1L, 3L, 4L, 1L, 1L, 3L, 1L, 3L, 3L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 5L, 4L, 3L, 1L, 3L, 3L, 1L, 3L, 3L, 4L, 1L, 1L, 3L,
1L, 3L, 3L, 1L, 1L, 1L, 4L, 1L, 3L, 1L, 3L, 2L, 1L, 3L, 1L, 4L,
1L, 4L, 3L, 3L, 2L, 3L, 3L, 1L, 1L, 4L, 1L, 1L, 2L, 1L, 1L, 1L,
4L, 1L, 1L, 3L, 3L, 1L, 4L, 3L, 3L, 4L, 1L, 3L, 1L, 5L, 3L, 4L,
1L, 4L, 4L, 1L, 3L, 4L, 1L, 4L, 1L, 1L, 1L, 3L, 2L, 1L, 2L, 4L,
1L, 1L, 5L, 4L, 1L, 1L, 4L, 3L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 2L,
1L, 3L, 3L, 3L, 1L, 2L, 3L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 3L, 1L,
1L, 3L, 4L, 1L, 1L, 2L, 5L, 3L, 3L, 1L, 1L, 4L, 1L, 4L, 1L, 4L,
2L, 3L, 3L, 1L, 1L, 1L, 4L, 1L, 4L, 3L, 1L, 1L, 1L, 1L, 3L, 1L,
3L, 1L, 1L, 1L, 1L, 4L, 3L, 4L, 3L, 3L, 3L, 2L, 3L, 1L, 1L, 1L,
3L, 5L, 2L, 5L, 1L, 1L, 4L, 3L, 1L, 3L, 2L, 1L, 1L, 5L, 1L, 3L,
3L, 4L, 1L, 1L, 1L, 2L, 5L, 1L, 1L, 4L, 3L, 1L, 1L, 1L, 2L, 1L,
1L, 3L, 3L, 1L, 1L, 4L, 3L, 2L, 4L, 4L, 1L, 1L, 2L, 3L, 1L, 3L,
3L, 1L), .Label = c("White", "Black", "Hispanic", "Asian", "Other"
), class = "factor"), cops2_avg_12mo = c(82.9166666666667, 66,
23.3333333333333, 28, 9.33333333333333, 69.9166666666667, 6,
33.3333333333333, 0, 12, 102, NA, 66, 6, 45, 58.5, 10, 55.9166666666667,
19.5, 6, 10, 234.666666666667, 28, 23, 51.5833333333333, 10,
38, 123.5, 0, 24, 10, 0, 73, 10, 25, 6, 20, 13.4166666666667,
13.8333333333333, 8, 14.8333333333333, 53.5, 42, NA, 57.1666666666667,
0, 24.6666666666667, 10, NA, 54.6666666666667, 38.75, 41, 22,
0.833333333333333, 13, 113.083333333333, 27.3333333333333, 9,
33.1666666666667, 18.75, 57.75, 30, 60.3333333333333, 23.1666666666667,
37, 16.5, 0, 145.5, 45, 31.3333333333333, 0, 10, 187.5, 27.4166666666667,
10, 54.9166666666667, 78.8333333333333, 103.75, 6.66666666666667,
30.4166666666667, 10, 10, 24.6666666666667, 10, 118.333333333333,
61.25, 17, 10, 28, 51, 6, 32.0833333333333, 80.75, 8.83333333333333,
NA, 10, 74.25, 42.25, 47, 60, 41.6666666666667, 19.0833333333333,
98.5, 73.5, 10, 6.66666666666667, 49.8333333333333, 10, 79.8333333333333,
10, 42, 95.8333333333333, 130.583333333333, 5.41666666666667,
47.25, 6, 8, 17.8333333333333, 10, 73.9166666666667, 10, 8, 27.8333333333333,
125.916666666667, 134.166666666667, 88, 10, 58, 62.5, 10.3333333333333,
28.8333333333333, 100.083333333333, 35.5, 0, 0, 10, 105, 7.33333333333333,
35, 9.66666666666667, 10, 4.16666666666667, 10, 8.33333333333333,
70.6666666666667, 28.4166666666667, 38.1666666666667, 8, 101.5,
26.75, 61.1666666666667, 14, 95.5833333333333, 35, 65, 0, 51.75,
57.5, 10, 13.6666666666667, 10, 67.5, 10, 62.3333333333333, 72.6666666666667,
10, 45.5, 20.8333333333333, 31, 84.5, 10, 98.1666666666667, 47.5,
56, 126, 14, 10, 10, 8, 36, 111.5, 54.5, 45.5, 8, 37.5, 84.8333333333333,
39.1666666666667, 56.25, 37.9166666666667, 37.75, 27, 55.6666666666667,
10, 34, 5.83333333333333, 37, 80.0833333333333, 57, 102.166666666667,
12.6666666666667, 10, 19.3333333333333, 10, NA, 51, 25.9166666666667,
14, 36.9090909090909, 38.6666666666667, 0, 6.33333333333333,
NA, 31, 43, 26.5, 10, 34.4166666666667, 77.1666666666667, 10,
10, 89.9166666666667, 59, 37, 77.3333333333333, 64, 52, 19.6666666666667,
66.5, 24, 106.083333333333, 29.6666666666667, 38.1666666666667,
6.66666666666667, 10, 16.75, NA, 86.75, 1, 14, 20.3333333333333,
8, 21, 38.9166666666667, 50.8333333333333, 57.5, 29, 0, 26.5,
51.9166666666667, 71.25, 42.6666666666667, 82, 58.0833333333333,
11.3333333333333, 82, 9.5, 78.6666666666667, 102.5, 71, 10, 70.6666666666667,
NA, 33.8333333333333, 61.25, 87, 36.5, 10, 40.4166666666667,
51.8333333333333, 23, 9.66666666666667, 44.5, 8, 10, 4.16666666666667,
0, 48.8333333333333, 49.25, 15, 70, 10, 6, 10, 34.8333333333333,
108.75, 36, NA, 31, 51, 69.5, 122.5, 48, 43.5833333333333, NA,
10, 20, 80.75, 54.75, 106.916666666667, 53.5, 90.6666666666667,
8.33333333333333, 85.5, 40.5833333333333, 5.5, 10, 61.3333333333333,
69.8333333333333, 10, 51, 0, 49.0833333333333, 13.6666666666667,
13.3333333333333, 5.83333333333333, 33.8333333333333, 14.4166666666667,
11.25, 14, 6, 14.5833333333333, 36, 21, 10, 29.5833333333333,
13, 34, 10, 2.5, 10, 211.916666666667, 19.75, 7.33333333333333,
6, 59.6666666666667, 30.25, 34.25, 16.1666666666667, 10, NA,
NA, 97, 75, 26.5, 8, 32.25, 0, 39, 37, 165.333333333333, 45,
33.1666666666667, 21, 10, 57, 70.3333333333333, 10, 10, 62, 79.1666666666667,
38, 26.1666666666667, 13, 8, 69.6666666666667, 40.5, 100, 0.833333333333333,
8, 82.5, 10, 19.8333333333333, 20.0833333333333, 8, 25.8333333333333,
16.75, 10, 36, NA, 12.8333333333333, 31.4166666666667, 10, 61.4166666666667,
14, 67.5, 3, 83.1666666666667, 48, 43.75, 35.4166666666667, 73,
44.1666666666667, 8, 29.75, 10, 10, 62.6666666666667, 26.9166666666667,
29.6666666666667, 10, NA, 15, 19.4166666666667, 112, 29, 3, 33.5,
62.5, 10, 84.6666666666667, 8, 84.4166666666667, 81.5, 56.1666666666667,
10, 101.416666666667, 16, 10, 19.6666666666667, 60, 73.6666666666667,
74.9166666666667, 21, 5, 15.0833333333333, 17.0833333333333,
17.5, 46, 61.8333333333333, 115.333333333333, 92, 30, 0, 22.75,
16.6666666666667, 15, 15, 10, NA, 56.25, 54, 10, 40, 9.83333333333333,
10.9166666666667, 22.25, 84.75, 80, 1.66666666666667, 99.8333333333333,
10, 38.6666666666667, 169.75, 35.0833333333333, 8, 78.5, 6.33333333333333,
21, 10, 42, 105.166666666667, 162.416666666667, 14, 69.25, 35.8333333333333,
13, 5.83333333333333, 34, 51, 12.75, 44.3333333333333, 39.5,
10, 23, 46.8333333333333, 89.9166666666667, 15, 28, 128.416666666667,
10, 91.6666666666667, 3.5, 54, 23, NA, 29.75, 37.1666666666667,
12.6666666666667, 31.9166666666667, 23, 0, 11, 67.9166666666667,
3.16666666666667, 8.33333333333333, 51, NA, 10, 0, 58.8333333333333
), AGE = c(86, 82, 83, 92, 45, 81, 52, 64, 71, 96, 79, 64, 76,
37, 81, 79, 72, 79, 74, 46, 45, 71, 89, 76, 53, 48, 52, 77, 63,
52, 57, 62, 84, 88, 55, 69, 67, 63, 67, 51, 86, 53, 65, 59, 71,
60, 70, 20, 78, 62, 58, 73, 68, 71, 66, 72, 71, 65, 95, 67, 79,
70, 86, 77, 81, 54, 44, 66, 80, 71, 30, 77, 67, 75, 48, 65, 83,
85, 70, 70, 74, 58, 81, 28, 78, 66, 79, 47, 74, 41, 74, 58, 73,
55, 53, 56, 84, 74, 62, 85, 68, 47, 78, 72, 57, 56, 64, 55, 86,
76, 77, 58, 74, 55, 71, 61, 74, 62, 65, 75, 81, 68, 39, 58, 65,
76, 27, 79, 86, 61, 87, 52, 72, 58, 53, 69, 78, 65, 81, 69, 66,
68, 61, 72, 74, 80, 88, 46, 53, 77, 89, 83, 41, 67, 83, 62, 90,
70, 60, 62, 33, 78, 80, 62, 81, 37, 55, 90, 81, 73, 67, 97, 32,
71, 70, 69, 46, 57, 60, 79, 79, 56, 75, 60, 52, 78, 61, 51, 70,
67, 71, 36, 53, 70, 53, 74, 89, 78, 70, 56, 58, 83, 50, 77, 70,
50, 75, 53, 86, 65, 45, 63, 62, 78, 65, 69, 75, 79, 71, 56, 88,
63, 72, 85, 68, 72, 45, 81, 46, 70, 84, 71, 82, 63, 57, 77, 70,
42, 87, 84, 61, 64, 79, 53, 65, 64, 69, 68, 71, 89, 49, 70, 82,
63, 79, 65, 64, 54, 73, 36, 80, 38, 68, 62, 84, 80, 65, 73, 91,
59, 35, 80, 67, 68, 65, 47, 60, 67, 72, 81, 22, 35, 58, 57, 68,
94, 38, 77, 75, 73, 78, 71, 78, 53, 58, 61, 77, 44, 95, 53, 72,
68, 72, 73, 78, 41, 75, 80, 60, 53, 68, 79, 80, 74, 25, 79, 55,
68, 85, 64, 72, 78, 78, 71, 73, 82, 73, 73, 58, 69, 58, 72, 78,
56, 74, 67, 66, 72, 38, 58, 62, 77, 81, 37, 46, 88, 55, 76, 50,
57, 72, 39, 56, 29, 76, 77, 36, 31, 70, 70, 70, 54, 74, 47, 81,
46, 81, 55, 53, 70, 28, 71, 79, 68, 78, 81, 30, 83, 43, 70, 79,
47, 94, 60, 64, 82, 81, 92, 57, 90, 86, 58, 61, 69, 50, 64, 79,
56, 76, 52, 55, 53, 85, 89, 64, 86, 58, 82, 64, 74, 45, 64, 71,
75, 61, 79, 82, 63, 81, 60, 70, 79, 63, 59, 80, 53, 80, 41, 83,
67, 90, 60, 82, 74, 75, 52, 62, 35, 53, 49, 71, 69, 73, 67, 44,
77, 81, 96, 52, 75, 30, 83, 74, 56, 62, 78, 63, 63, 62, 71, 62,
89, 83, 77, 66, 64, 24, 96, 63, 51, 65, 71, 50, 68, 83, 82, 90,
91, 84, 90, 76, 62, 79, 20, 75, 79, 80, 62, 62, 71, 51, 81, 84,
65, 65, 55, 65, 51, 26, 70)), row.names = c(NA, -500L), class = c("tbl_df",
"tbl", "data.frame"))
I'm hoping to sample by race_f so that the different race groups are similar in AGE and cops2_avg_12mo. Is this at all possible? Thank you!
The answer depends on if you want to ensure that their ages/cops2_avg_12mo will always be within a specific range - in which case you would simply create a subset of your data frame with only the patients whose age and cops2_avg_12mo are within some range. I do think that this is the safer thing to do in terms of quality control. You can view a plot of the two columns of your data (AGE and cops2_avg_12mo) to get an idea of what ranges of values most of the patients fall into:
plot(x[,c("AGE", "cops2_avg_12mo")])
Pick ranges for these values that contain enough patients to sample from. (I don't know how many samples you need). Basically, draw a box in the dot plot which contains enough patients to sample from.
So once you determine the ranges/boundaries of the box, just create indexes like so:
idx = (x[,"AGE"] > 50) & (x[,"AGE"] < 75) & (x[,"cops2_avg_12mo"] > 0) & (x[,"cops2_avg_12mo"] < 75) & !is.na(x[,"cops2_avg_12mo"])
then get the subset of your data:
subsetX = x[idx,]
After you create that subset, you can randomly sample using R's sample() function. If you want to do sampling from each race equally, then call sample() with the subsetX data, with each race selected at a time, to get n samples at a time:
sample(subsetX[subsetX[,"race_f"]=="Asian",], n, replace=FALSE)
Alternatively, if you are ok with sampling patients that have outlier values (but I feel like this will produce more variation in your results), then you can create a histogram of each of the columns - for example, AGE - then get the histogram bin counts, divide them by the total number of patients to get a probability distribution, then create a vector the same length as the number of patients where each value is the probability we calculated for the bin it belongs to (found by getting bin indexes when calculating the histogram), then pass that vector into the sample() function as the prob input argument so that values are sampled with their specified probability.
I've been working on the following dataset
> dput(db_analysis)
structure(list(ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92,
93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
107, 108, 109, 110, 111, 112, 113), GROUP = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("CONTROL",
"TRAINING"), class = "factor"), Gender = c(1, 0, 1, 0, 1, 0,
0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1,
0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1,
0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1,
1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
1, 0), Age = c(74, 76, 81, 74, 69, 72, 75, 83, 78, 72, 82, 68,
72, 72, 73, 80, 69, 72, 70, 80, 75, 80, 78, 74, 82, 74, 80, 82,
78, 81, 66, 71, 70, 79, 78, 73, 72, 77, 77, 71, 83, 74, 70, 71,
77, 69, 67, 64, 79, 71, 77, 77, 73, 67, 68, 79, 81, 67, 84, 75,
80, 73, 68, 74, 77, 79, 79, 72, 73, 78, 76, 78, 77, 74, 78, 77,
77, 82, 77, 70, 77, 81, 79, 75, 74, 78, 69, 77, 73, 77, 70, 79,
70, 72, 77, 72, 71, 71, 73, 81, 70, 72, 68, 70, 73, 82, 81, 73,
76, 85, 70, 77, 65), Education = c(18, 4, 8, 5, 8, 11, 5, 5,
4, 8, 8, 12, 5, 18, 13, 5, 13, 13, 5, 5, 13, 5, 3, 8, 17, 5,
8, 5, 5, 8, 17, 8, 18, 18, 13, 13, 13, 13, 15, 17, 8, 5, 5, 13,
8, 5, 11, 13, 8, 8, 8, 5, 13, 8, 5, 17, 8, 12, 13, 5, 8, 8, 8,
5, 3, 8, 18, 5, 8, 13, 8, 5, 17, 8, 5, 17, 5, 8, 11, 8, 8, 5,
12, 3, 8, 8, 8, 13, 5, 5, 8, 8, 13, 5, 5, 8, 13, 5, 8, 12, 5,
13, 12, 8, 5, 17, 5, 5, 5, 8, 13, 10, 8), ADAS_CogT0 = c(14.66,
15.33, 17.33, 19, 7.66, 12.6, 18.67, 14.99, 17.99, 17.33, 13.66,
16.99, 10.66, 9.66, 14.99, 15.66, 13.33, 4.33, 14.33, 15.99,
16.33, 10.66, 14.66, 10.66, 19.33, 17.66, 15.99, 20.66, 20.6,
17, 10.33, 6.33, 6.66, 19.99, 13.33, 24.33, 12.33, 10.33, 12.33,
9.66, 10.99, 13.99, 23, 6.32, 11.32, 13.99, 14.66, 8.99, 14.33,
9.99, 7.33, 15.66, 14, 7.99, 23.32, 14.66, 9.99, 5.66, 6.99,
11.66, 10.33, 6.99, 19.32, NA, 10, 17.66, 13.66, 10.32, NA, NA,
8.66, 9, 6.99, 14.99, 9.66, 13.66, 15.32, 12, 14, 13.66, 11.99,
15.66, 16, 15, 16.99, 20, 11, 7.99, 8.33, 8.32, 14.99, 18.66,
10.33, 11.99, 9.32, 17, 14.33, 14.66, 16.6, 9.99, NA, 17.66,
18.66, NA, 19, 11.9, 16.66, 9.33, 10, 13.99, 7.66, 8.66, 9.32
), ADAS_CogT7 = c(16, 9.32, 21.33, 17, 8.32, 11, 14.99, 10.99,
17, 18.33, 13.32, 14.34, 8.99, 7, 11.99, 15.33, 6.99, 5.33, 12.32,
13, 21.32, 7.99, 13.33, 11.99, 17.32, 16.32, 16.33, 14.66, 18.99,
17.33, 7.99, 9.33, 10.99, NA, 12.99, 16.33, 21.66, 9, 9.34, 8.66,
8.33, 13.66, 15.66, 6.66, 10.99, 13.33, 13.33, 7.99, 11.99, 11.32,
7.33, 9.66, 6.99, NA, 15.99, 15.66, 14.66, 6.32, 7, 11, 14, 10.33,
24.66, NA, 14.99, NA, 15.99, 9.32, NA, NA, 9.99, 9.33, 7.66,
17.33, 10.32, 16, 17, 12.99, 15, 14.33, 10, 14.99, 19, 13.99,
19.33, NA, 10, 6.99, 11.66, 6.66, 14.33, 16, 8.66, 10, NA, 20,
14.99, 19.66, 26.66, 8.99, NA, 14.99, 20.99, NA, 17.99, 12.33,
19, 11.33, 10.66, 16.66, 11.33, 9.66, 6.99)), row.names = c(NA,
-113L), class = c("tbl_df", "tbl", "data.frame"))
>
ADAS_CogT0 and ADAS_CogT7 are score of psychological test gather at time0 and at time7 both into a CONTROL group as well as into a TRAINING one. Since I need to fit a model fro the variable at time 7 corrected for its initial score and evalue the effect as well of variable TIME, GROUP and TIME*GROUP, I've scripted down the following code:
db_long <- db_analysis %>%
dplyr::select(ID, GROUP, Age, Gender,ADAS_CogT0, ADAS_CogT7,Education) %>%
na.omit() %>%
pivot_longer(
c(ADAS_CogT0, ADAS_CogT7), names_to = "time", values_to = "score"
) %>%
mutate(
time = factor(if_else(
time == "ADAS_CogT0", "0", "7"
), levels = c("0", "7")),
ID = factor(ID)
)
And fitted this model
options(datadist = "dd")
dd <- datadist(db_long)
ols_fit <- ols(
score ~ time * GROUP + Age ,
data = db_long,
x = TRUE,
y = TRUE
)
However the out output does not reflect the I needed to calculate.
Effects Response : score
Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
Age 71.25 78 6.75 0.62149 0.58269 -0.52749 1.770500
time - 7:0 1.00 2 NA -1.40080 0.44476 -2.27770 -0.523760
GROUP - CONTROLLO:TRAINING 2.00 1 NA -1.66800 0.79710 -3.23980 -0.096255
Adjusted to: time=0 GROUP=TRAINING
How should manipulate the dataset to fit the model in a way I obtain evalaution of score time7 as a dependent variables of time, group, time*group and adjusted for its initial score at time0?
I've tried to analyze "Simple slope" with Moderation Regression
Using library names interactions
but turns out it doesn't work
I've already searched in google but it seems no one has the same problem I had
install.packages("interactions", dependencies = TRUE)
library(interactions)
out1 = lm(timetogether ~ malehappy + femalehappy, df)
out2 = lm(timetogether ~ malehappy*femalehappy, df)
summary(out1)
summary(out2)
anova(out1, out2)
sim_slopes(out2, pred = "malehappy", modx = "femalehappy")
When I compute function names sim_slopes(out2, pred...)
it returns me as
"Error in isFALSE(row.names) : could not find function "isFALSE""
Some might run sim_slopes() without any error.
but not for me...
What should I do, to resolve it, or to check it?
Thank you
and here, is the output of dput(df)
structure(list(malehappy = structure(c(62, 53, 55, 36, 60, 50, 45,
53, 48, 50, 63, 46, 72, 40, 40, 30, 49, 49, 45, 59, 46.1513513513514,
51, 36, 47, 53, 65, 46, 39, 41, 56, 54, 41, 36, 46.1513513513514, 51,
50, 47, 56, 44, 42, 61, 44, 47, 55, 57, 55, 32, 62, 53, 60, 59, 65,
49, 49, 60, 56, 67, 54, 46.1513513513514, 46.1513513513514,
46.1513513513514, 34, 57, 61, 73, 42, 84, 46.1513513513514, 47, 43, 46.1513513513514, 59, 40, 42, 49, 55, 46, 56, 50, 48, 57, 50, 53, 46.1513513513514, 50, 46.1513513513514, 61, 64, 48, 42, 31, 71, 54, 29, 45, 56, 53, 56, 47, 48, 39, 58, 51, 48, 54, 52, 57, 89, 53, 53,
44, 53, 40, 47, 40, 47, 54, 69, 60, 56, 47, 65, 50, 29, 58, 50,
46.1513513513514, 39, 66, 50, 46.1513513513514, 47, 38, 50, 70, 36, 59, 71, 41, 54, 18, 46.1513513513514, 38, 29, 71, 46.1513513513514,
51, 46, 48, 61, 52, 41, 48, 44, 37, 43, 54, 56, 44, 55, 51, 64, 52,
38, 48, 60, 45, 43, 44, 39, 54, 56, 47, 53, 51, 43, 49, 50, 56, 41,
37, 49, 59, 60, 72, 31, 58, 52, 49, 58, 60, 52, 47, 65, 63, 67,
46.1513513513514, 54, 60,
46.1513513513514, 52, 43, 45, 26, 50, 40, 35, 43, 38, 40, 53, 36, 62, 30, 30, 46.1513513513514, 39, 39, 35, 49, 34, 41, 26, 37, 43, 55, 36,
29, 31, 46, 44, 31, 26, 28, 41, 40, 37, 46, 34,
46.1513513513514, 51, 34, 37, 45, 47, 45, 22, 52, 43, 50, 49, 55, 39, 39, 50, 46, 46.1513513513514, 44, 46.1513513513514, 43,
46.1513513513514, 24, 47, 51, 63, 32, 74, 24, 37, 33, 42, 49, 30, 32, 39, 45, 36, 46, 40, 46.1513513513514, 47, 40, 43, 58, 40, 47, 51, 54,
38, 32, 21, 61, 44, 19, 35, 46, 43, 46, 37, 38, 29, 48, 41, 38, 44,
42, 47, 79, 43, 43, 34, 43, 30, 37, 30, 37, 44, 59, 50, 46,
46.1513513513514, 55, 40, 19, 48, 40, 37, 29, 56, 40, 49, 37, 28, 46.1513513513514, 60, 26, 49, 61, 31, 44, 8, 36, 28, 19, 61, 38, 41, 36, 38, 51, 42, 31, 38, 34, 27, 33, 44, 46, 46.1513513513514,
46.1513513513514, 46.1513513513514, 54, 42, 28, 38, 50, 35, 46.1513513513514, 34, 29, 46.1513513513514, 46, 37, 43, 41, 33, 39, 40, 46, 31, 27, 39, 49, 46.1513513513514, 62, 46.1513513513514, 48,
42, 39, 48, 50, 42, 37, 55, 53, 57, 44, 44, 50, 52), imputed = c(21L,
34L, 59L, 60L, 61L, 68L, 71L, 84L, 86L, 127L, 131L, 142L, 146L, 197L,
200L, 216L, 240L, 257L, 259L, 261L, 280L, 321L, 334L, 359L, 360L,
361L, 368L, 371L, 384L, 386L), class = "impute"), femalehappy =
structure(c(59, 54, 51, 35, 50, 55.5978260869565, 45, 59, 49, 63, 53,
57, 65, 38, 45, 45, 34, 48, 35, 89, 45, 53, 46, 30, 54, 59, 31, 44,
37, 55, 46, 63, 41, 43, 57, 65, 41, 67, 52, 55, 69, 41, 55, 37, 50,
39, 23, 63, 63, 47, 53, 52, 37, 51, 52, 34, 58, 55, 55.5978260869565,
60, 55.5978260869565, 42, 42, 55.5978260869565, 55, 39, 71,
55.5978260869565, 41, 51, 38, 38, 44, 72, 57, 44, 45, 57, 56, 43, 55.5978260869565, 51, 46, 64, 64, 65, 74, 58, 54, 51, 45, 61, 56, 39, 48, 49, 57, 56, 39, 51, 35, 42, 49, 43, 43, 53, 64, 67, 43, 54, 49,
57, 43, 44, 57, 48, 64, 56, 57, 69, 55.5978260869565, 65, 65, 37, 52,
50, 55.5978260869565, 55.5978260869565, 61, 57, 55.5978260869565, 46,
62, 55, 66, 50, 70, 63, 44, 62, 36, 55.5978260869565, 23, 47, 54,
55.5978260869565, 41, 40, 57, 40, 61, 45, 57, 30, 40, 42, 55.5978260869565, 57, 45, 44, 46, 48, 33, 45, 49, 55, 47, 40, 47, 42, 60, 55.5978260869565, 38, 55.5978260869565, 41, 55, 36, 52, 50, 36,
44, 50, 59, 59, 55.5978260869565, 49, 62, 57, 37, 59, 63, 43, 38, 63,
53, 58, 60, 47, 49, 55.5978260869565, 69, 64, 61, 45, 60, 61, 55, 69,
59, 73, 63, 67, 75, 48, 55, 55.5978260869565, 44, 58, 45, 99, 55, 63,
56, 40, 64, 69, 55.5978260869565, 54, 47, 65, 56, 73, 51, 53, 67, 75,
51, 77, 62, 55.5978260869565, 79, 51, 65, 47, 60, 49, 33, 73, 73,
55.5978260869565, 63, 62, 47, 61, 62, 44, 68, 65, 55.5978260869565, 70, 55.5978260869565, 52, 52, 64, 65, 49, 81, 48, 51, 61, 48, 48, 54,
55.5978260869565, 67, 54, 55, 67, 66, 55.5978260869565, 55.5978260869565, 61, 56, 74, 74, 75, 84, 68, 64, 61, 55, 71, 66, 49, 58, 59, 67, 66, 49, 61, 45, 52, 59, 53, 53, 55.5978260869565, 74, 77,
53, 64, 59, 67, 53, 54, 67, 58, 74, 66, 67, 79, 57, 75, 75, 47, 62,
60, 57, 42, 71, 67, 63, 56, 72, 65, 76, 60, 80, 73, 54, 72, 46, 57,
33, 57, 64, 72, 51, 50, 67, 50, 71, 55, 67, 40, 50, 52, 56, 67,
55.5978260869565, 54, 55.5978260869565, 58, 43, 55.5978260869565, 59, 65, 57, 55.5978260869565, 57, 52, 70, 56, 48, 65, 51, 65, 46, 62, 60,
46, 55.5978260869565, 60, 69, 69, 84, 59, 72, 67, 47, 69, 73, 53, 48,
73, 63, 68, 70, 57, 59, 72), imputed = c(6L, 59L, 61L, 64L, 68L, 81L,
121L, 127L, 128L, 131L, 142L, 146L, 157L, 172L, 174L, 185L, 200L,
216L, 227L, 240L, 250L, 259L, 261L, 274L, 280L, 281L, 306L, 359L,
361L, 364L, 368L, 381L), class = "impute"), timetogether =
structure(c(132, 89, 86, 19, 96, 74, 47, 91.7415143603133, 62, 104,
114, 76, 195, 27, 39, 18, 30, 63, 28, 91.7415143603133, 45, 79, 29,
18, 89, 145, 20, 34, 26, 101, 69, 70, 25, 32, 93, 107, 43, 136, 60,
59, 165, 37, 73, 43, 89, 49, 6, 146, 91.7415143603133, 85,
91.7415143603133, 115, 36, 71, 103, 35, 145, 93, 37, 104, 69, 91.7415143603133, 64, 114, 152, 31, 91.7415143603133, 20, 43, 54, 43, 51, 36, 87, 85, 65, 50, 109, 85, 48, 89, 74, 67, 178, 105, 136, 186,
138, 75, 51, 19, 172, 96, 14, 55, 84, 98, 91.7415143603133, 38, 68,
22, 64, 70, 49, 60, 82, 132, 277, 60, 89, 54, 98, 36, 51, 57, 58,
122, 142, 118, 146, 57, 165, 109, 13, 95, 70, 55, 17, 153, 88, 103,
52, 58, 82, 190, 36, 162, 184, 38, 91.7415143603133, 0, 56, 5, 17,
139, 90, 48, 39, 82, 61, 103, 41, 82, 16, 26, 38, 68, 108, 45, 66,
61, 98, 29, 34, 64, 114, 51, 35, 51, 30, 109, 74, 35, 89, 50,
91.7415143603133, 34, 75, 85, 26, 31, 67, 122, 128, 237, 21, 130, 95, 36, 123, 141, 55, 37, 158, 116, 145, 109, 72, 92, 91.7415143603133,
164, 113, 120, 47, 137, 100, 73, 119, 88, 111, 157, 87, 231, 57, 59,
23, 78, 91, 71, 205, 71, 103, 41, 70, 116, 181, 70, 54, 60, 130, 108,
63, 43, 51, 111, 111, 79, 147, 75, 65, 179, 69, 87, 97, 127, 101, 47,
171, 124, 130, 139, 163, 81, 95, 142, 95, 185, 118, 66, 121, 96, 39,
113, 151, 206, 63, 325, 41, 79, 71, 90, 110, 56, 69, 101, 109, 79,
134, 103, 82, 125, 99, 103, 211, 110, 150, 194, 175, 93, 66, 25, 214,
120, 26, 78, 121, 120, 132, 78, 91, 55, 114, 100, 83, 103, 108, 148,
91.7415143603133, 102, 115, 74, 120, 59, 83, 59, 84, 134, 189, 150, 91.7415143603133, 85, 193, 114, 28, 131, 98, 83, 55, 188, 105, 138, 81, 49, 102, 223, 42, 172, 222, 61, 132, 0, 81, 54, 16, 191, 96, 90,
74, 91.7415143603133, 119, 91.7415143603133, 62, 97, 64, 50, 67, 106,
133, 71, 109, 96, 91.7415143603133, 84, 51, 89, 149,
91.7415143603133, 68, 74, 53, 128, 116, 76, 113, 91, 70, 80, 98, 121, 60, 48, 92, 149, 157, 262, 21, 151, 114, 81, 148, 164, 97, 78, 188,
158, 186, 126, 91.7415143603133, 136, 174), imputed = c(8L, 20L, 49L,
51L, 62L, 67L, 98L, 140L, 176L, 200L, 308L, 320L, 349L, 351L, 362L,
367L, 398L), class = "impute"),
kids = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("nokids", "kids"), class = "factor")), .Names = c("malehappy", "femalehappy", "timetogether", "kids"), row.names =
c(NA, -400L ), class = "data.frame")
It is related to the R version. The funcion isFALSE is built-in in R from version 3.5 onwards. I was getting the same error with the package "jjtools" when using R 3.4. Upgraded R to version 3.6, and the problem was gone.
How do I calculate brier scores for generalised linear mixed-effects model with binomial errors when my response variable is a cbind of success/failures?
I have tried the DescTools and scoring packages but I am having trouble understanding the help files for the BrierScore and brierscore functions...
Here is what I have done so far with some sample data.
#Data
df <- structure(list(Dose = c(20, 14, 14, 20, 0, 0, 14, 14, 14, 16,
10, 20, 20, 20, 16, 10, 10, 0, 16, 16, 16, 10, 0, 12, 10, 12,
12, 0, 0, 20, 12, 16, 10, 12, 12, 0, 14, 14, 16, 0, 14, 20, 16,
20, 14, 12, 12, 20, 20, 0, 0, 14, 12, 10, 10, 20, 16, 16, 14,
10, 10, 10, 20, 16, 10, 0, 12, 12, 0, 12, 16, 14, 16, 14, 0,
0, 12, 20, 0, 12, 14, 14, 0, 0, 20, 20, 20, 14, 14, 10, 10, 20,
16, 16, 0, 12, 10, 10, 10, 16, 16, 12, 20, 10, 12, 12, 16, 14,
0, 16, 20, 12, 14, 10, 10, 0, 0, 12, 12, 10, 10, 0, 0, 0, 14,
12, 12, 20, 20, 14, 14, 14, 12, 20, 20, 20, 16, 16, 14, 10, 10,
16, 16, 16), Success = c(100, 91, 87, 100, 0, 0, 91, 96, 89,
96, 82, 99, 99, 99, 92, 59, 45, 0, 100, 95, 100, 83, 8, 82, 63,
98, 74, 9, 0, 99, 78, 98, 53, 96, 52, 0, 62, 85, 98, 4, 89, 99,
99, 97, 82, 80, 91, 99, 96, 0, 0, 95, 80, 68, 74, 100, 97, 93,
87, 34, 32, 47, 99, 96, 86, 15, 93, 86, 0, 77, 89, 80, 98, 96,
31, 0, 61, 100, 0, 84, 88, 97, 0, 0, 99, 100, 100, 92, 88, 46,
51, 99, 97, 100, 0, 93, 61, 91, 57, 76, 95, 50, 98, 16, 87, 93,
87, 88, 11, 92, 98, 60, 96, 0, 64, 72, 0, 74, 77, 0, 62, 0, 0,
0, 84, 47, 69, 98, 100, 61, 90, 79, 11, 100, 98, 100, 98, 93,
91, 58, 58, 93, 78, 69), Failure = c(0, 9, 13, 0, 100, 100, 9,
4, 11, 4, 18, 1, 1, 1, 8, 41, 55, 100, 0, 5, 0, 17, 92, 18, 37,
2, 26, 91, 100, 1, 22, 2, 47, 4, 48, 100, 38, 15, 2, 96, 11,
1, 1, 3, 18, 20, 9, 1, 4, 100, 100, 5, 20, 32, 26, 0, 3, 7, 13,
66, 68, 53, 1, 4, 14, 85, 7, 14, 100, 23, 11, 20, 2, 4, 69, 100,
39, 0, 100, 16, 12, 3, 100, 100, 1, 0, 0, 8, 12, 54, 49, 1, 3,
0, 100, 7, 39, 9, 43, 24, 5, 50, 2, 84, 13, 7, 13, 12, 89, 8,
2, 40, 4, 100, 36, 28, 100, 26, 23, 100, 38, 100, 100, 100, 16,
53, 31, 2, 0, 39, 10, 21, 89, 0, 2, 0, 2, 7, 9, 42, 42, 7, 22,
31), Rep = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L)), row.names = 433:576, class = "data.frame")
#Model fitting
fit <- lme4::glmer(cbind(Success, Failure) ~ Dose + (1|factor(Rep)), family = binomial, data = df)
#Brier scores
DescTools::BrierScore(fit)
#Returns following error
#Error in mean(resp * (1 - pred)^2 + (1 - resp) * pred^2)
scoring::brierscore(fit)
#Returns following error
#argument "pred" is missing, with no default
Is it possible to calculate brier scores for a mixed effects model? If so, how?
I suppose you can simply get rid of the cbind construction, get a long data format and use the predict/response interface of DescTools::BrierScore().
Something like this might work:
library(DescTools)
tt <- aggregate(df[, c("Success", "Failure")], by=as.list(df[, c("Dose", "Rep")]), FUN = sum)
d.frm <- Untable(reshape(tt, idvar=c("Dose","Rep"), varying = c("Success", "Failure"),
direction = "long",
timevar="Success", v.names="Freq", times=c(1, 0)))
fit <- lme4::glmer(Success ~ Dose + (1|Rep), family = binomial, data = d.frm)
BrierScore(pred=predict(fit, type="response"), resp = d.frm$Success)
I have a data frame of 200 individuals, and using dplyr I would like to randomly select half of them, create a variable called 'sex,' and assign 100 with sex as male. For the remaining 100 individuals, I would like to assign the sex as female. A reproducible example of the data set is available below.
df <- dput(input)
structure(list(id = 1:200, age = c(6L, 4L, 4L, 6L, 1L, 5L, 3L,
1L, 0L, 0L, 0L, 5L, 5L, 5L, 3L, 4L, 4L, 2L, 2L, 3L, 3L, 4L, 6L,
4L, 4L, 0L, 4L, 6L, 1L, 5L, 2L, 6L, 2L, 2L, 0L, 3L, 1L, 6L, 0L,
2L, 5L, 3L, 5L, 3L, 1L, 6L, 6L, 0L, 4L, 5L, 0L, 5L, 3L, 6L, 1L,
2L, 1L, 1L, 4L, 2L, 1L, 2L, 0L, 4L, 3L, 3L, 6L, 2L, 1L, 2L, 5L,
0L, 5L, 2L, 5L, 3L, 3L, 3L, 2L, 5L, 1L, 0L, 0L, 1L, 6L, 3L, 1L,
5L, 6L, 4L, 4L, 4L, 0L, 6L, 6L, 3L, 4L, 6L, 5L, 2L, 5L, 6L, 2L,
2L, 4L, 0L, 4L, 6L, 5L, 6L, 0L, 6L, 2L, 1L, 5L, 5L, 5L, 5L, 3L,
1L, 6L, 3L, 1L, 1L, 3L, 4L, 2L, 4L, 2L, 0L, 5L, 0L, 3L, 1L, 1L,
2L, 0L, 5L, 2L, 3L, 6L, 5L, 2L, 6L, 0L, 0L, 6L, 6L, 1L, 4L, 2L,
0L, 4L, 1L, 3L, 6L, 3L, 4L, 3L, 0L, 1L, 6L, 6L, 5L, 4L, 1L, 1L,
6L, 0L, 1L, 2L, 1L, 1L, 2L, 0L, 4L, 1L, 2L, 2L, 2L, 1L, 6L, 5L,
3L, 2L, 3L, 5L, 2L, 3L, 4L, 5L, 0L, 6L, 5L, 1L, 4L, 5L, 3L, 5L,
5L), x = c(21, 9, 31, 55, 5, 63, 63, 3, 13, 21, 53, 77, 5, 67,
63, 31, 17, 5, 21, 45, 79, 3, 7, 43, 27, 1, 63, 11, 37, 33, 27,
53, 71, 73, 97, 87, 77, 17, 85, 91, 49, 87, 89, 61, 65, 17, 71,
33, 53, 85, 49, 41, 75, 85, 79, 75, 23, 63, 89, 31, 29, 47, 75,
63, 65, 27, 27, 71, 89, 29, 25, 49, 91, 91, 39, 65, 45, 99, 53,
21, 29, 81, 35, 7, 27, 81, 93, 41, 79, 83, 31, 51, 33, 75, 15,
69, 7, 29, 7, 35, 87, 93, 57, 13, 91, 87, 95, 77, 7, 37, 81,
99, 83, 69, 85, 5, 77, 69, 55, 7, 39, 5, 41, 1, 63, 25, 13, 39,
97, 73, 25, 49, 35, 95, 59, 75, 23, 35, 67, 73, 91, 83, 79, 9,
27, 89, 79, 53, 89, 69, 95, 57, 11, 45, 63, 5, 25, 61, 3, 89,
1, 61, 85, 75, 67, 73, 63, 77, 43, 31, 69, 39, 47, 59, 75, 45,
57, 73, 5, 85, 57, 13, 91, 69, 79, 89, 13, 33, 15, 23, 89, 85,
39, 87, 7, 97, 57, 5, 61, 85), y = c(41, 57, 29, 59, 83, 77,
35, 73, 99, 69, 85, 23, 85, 11, 63, 97, 73, 47, 57, 73, 77, 1,
91, 17, 71, 57, 11, 3, 81, 31, 5, 41, 69, 93, 3, 11, 45, 97,
81, 87, 43, 9, 53, 61, 11, 63, 59, 33, 49, 89, 87, 79, 47, 59,
41, 25, 47, 13, 69, 11, 93, 83, 91, 85, 13, 95, 13, 37, 99, 35,
11, 63, 19, 99, 71, 55, 5, 21, 43, 59, 49, 15, 99, 15, 75, 77,
53, 51, 91, 45, 83, 21, 29, 35, 3, 27, 97, 95, 29, 53, 55, 41,
45, 31, 75, 37, 15, 47, 3, 1, 99, 55, 81, 37, 1, 41, 51, 45,
27, 83, 9, 69, 13, 81, 91, 55, 51, 31, 17, 97, 1, 47, 35, 7,
53, 59, 5, 51, 7, 5, 93, 63, 95, 51, 33, 43, 75, 67, 59, 89,
49, 83, 21, 49, 5, 5, 19, 45, 29, 41, 25, 3, 9, 1, 73, 53, 43,
99, 69, 41, 21, 3, 3, 13, 39, 21, 55, 75, 91, 31, 79, 17, 43,
91, 73, 11, 75, 15, 49, 77, 77, 23, 83, 47, 51, 53, 57, 99, 35,
15)), row.names = c(NA, -200L), class = "data.frame", .Names = c("id",
"age", "x", "y"))
I'm new to using dplyr, so I'm not exactly sure how to perform this operation. I'm thinking it would look something like this:
new_df <- df %>%
sample_frac(0.5) %>% # use sample_frac or sample_n to select 100 individuals
mutate(sex = "male")
but obviously that just results in a new data frame. Is there a way to select 100 males from the original data frame, then use something like an ifelse statement to assign the rest as female?
If you absolutely need a 50/50 distribution between male and female, you could run with dplyr:
dfs <- sample_n(df, 100, replace = FALSE) %>%
mutate(sex = "male") %>%
select(id, sex) %>%
right_join(df, by = "id") %>%
mutate(sex = if_else(is.na(sex), "female", "male"))
results:
table(dfs$sex)
female male
100 100