Is it possible to randomly sample patients by group so that they have similar distributions based on other variables? To me, this sounds like a matching problem, but there's no "treatment" here, so I'm not sure if the concept applies.
Sample data:
structure(list(id = c(8350L, 22543L, 24144L, 9392L, 27648L, 2943L,
34686L, 27153L, 11143L, 15209L, 11952L, 22669L, 8211L, 27765L,
28671L, 9693L, 30274L, 25807L, 14839L, 22400L, 24494L, 6540L,
6861L, 31825L, 34190L, 19606L, 21077L, 5037L, 25943L, 20530L,
23730L, 34774L, 7210L, 2051L, 28410L, 18318L, 34848L, 26596L,
8973L, 24885L, 9652L, 8387L, 16168L, 36893L, 24048L, 17769L,
1273L, 22734L, 36796L, 25497L, 28300L, 166L, 21172L, 20026L,
16265L, 1699L, 33140L, 23997L, 10216L, 27408L, 6813L, 10196L,
15015L, 2748L, 34979L, 21763L, 27438L, 6255L, 17047L, 30593L,
30723L, 7914L, 218L, 20134L, 29952L, 27126L, 3795L, 1367L, 33585L,
5940L, 26250L, 22519L, 35611L, 26168L, 26848L, 21276L, 8971L,
22554L, 16655L, 5315L, 18121L, 32526L, 21513L, 9262L, 36882L,
7408L, 18873L, 17238L, 15216L, 23667L, 30138L, 2978L, 25451L,
2492L, 30983L, 7677L, 22880L, 29674L, 7093L, 24910L, 20839L,
18176L, 23031L, 17197L, 4613L, 35801L, 30822L, 3889L, 11752L,
11314L, 22317L, 12825L, 17433L, 4407L, 3986L, 10173L, 32409L,
2697L, 3410L, 26834L, 3203L, 5474L, 34678L, 35336L, 19462L, 15835L,
7888L, 27897L, 9245L, 16524L, 13316L, 21604L, 30458L, 9191L,
1220L, 1779L, 1724L, 26382L, 11566L, 21310L, 12600L, 25063L,
30912L, 31189L, 9480L, 16804L, 2372L, 26238L, 20113L, 33753L,
32711L, 11543L, 10578L, 4475L, 13187L, 23395L, 35342L, 6903L,
26905L, 12026L, 5697L, 15352L, 33985L, 1132L, 15806L, 13611L,
29930L, 15896L, 6057L, 10849L, 12944L, 25561L, 3328L, 27481L,
28790L, 3260L, 24986L, 22177L, 26580L, 11639L, 2256L, 4839L,
22805L, 616L, 6702L, 18360L, 4439L, 1300L, 33779L, 24940L, 10043L,
21268L, 35127L, 36621L, 17618L, 6688L, 15937L, 31057L, 2144L,
30866L, 12500L, 29753L, 36497L, 21247L, 9481L, 36465L, 20665L,
15017L, 21234L, 34258L, 576L, 31187L, 4528L, 15314L, 3657L, 24489L,
33871L, 106L, 24916L, 2524L, 17469L, 2799L, 13311L, 26585L, 7131L,
21401L, 6191L, 22338L, 11647L, 11681L, 22744L, 14000L, 5356L,
2892L, 24481L, 24116L, 21461L, 13992L, 22751L, 11129L, 8802L,
29963L, 4660L, 29020L, 20843L, 21796L, 3607L, 10692L, 29168L,
25034L, 3307L, 35010L, 20280L, 31894L, 7276L, 24259L, 34059L,
35867L, 11165L, 16010L, 34082L, 26586L, 30958L, 25030L, 34851L,
29185L, 25721L, 8968L, 29427L, 20213L, 34667L, 28721L, 21472L,
17132L, 35247L, 9798L, 36826L, 21226L, 28335L, 16077L, 2654L,
20466L, 21324L, 36969L, 22553L, 5895L, 16514L, 10644L, 4376L,
13592L, 11206L, 32440L, 13413L, 31416L, 22540L, 15986L, 11506L,
16928L, 18652L, 17858L, 13522L, 8566L, 10665L, 29442L, 28219L,
22549L, 2209L, 8017L, 6066L, 21718L, 21930L, 11540L, 4100L, 35236L,
240L, 24900L, 425L, 26880L, 21409L, 18885L, 5803L, 33335L, 25597L,
12547L, 8930L, 4328L, 17360L, 4696L, 25198L, 26469L, 14679L,
1691L, 32989L, 6099L, 14427L, 31797L, 23408L, 29296L, 23928L,
31889L, 31737L, 6420L, 11304L, 34798L, 20785L, 9806L, 35018L,
35008L, 1450L, 3246L, 15123L, 19603L, 8519L, 32012L, 3397L, 11682L,
27102L, 18022L, 20408L, 15836L, 18284L, 12897L, 29580L, 14510L,
23925L, 28821L, 35825L, 14922L, 36643L, 10948L, 4220L, 23791L,
65L, 35772L, 1423L, 29386L, 755L, 23627L, 27201L, 12353L, 3578L,
1914L, 35373L, 16702L, 13057L, 3021L, 27531L, 1990L, 205L, 21559L,
29081L, 26301L, 18894L, 3088L, 9782L, 10522L, 12570L, 8948L,
36240L, 33943L, 33022L, 2750L, 32649L, 30134L, 13920L, 11498L,
8314L, 16849L, 15559L, 22529L, 31406L, 5680L, 17908L, 14931L,
2122L, 2581L, 33546L, 12143L, 17220L, 16713L, 7454L, 13659L,
15973L, 20116L, 27689L, 35285L, 36106L, 21834L, 29850L, 29030L,
7957L, 31698L, 12307L, 23642L, 5615L, 12016L, 1161L, 15291L,
32738L, 1089L, 32988L, 33382L, 3642L, 18661L, 35584L, 8009L,
24000L, 30587L, 25870L, 19944L, 34970L, 29983L, 24774L, 28702L,
21199L, 17292L, 29831L, 476L, 18881L, 29923L, 31476L, 4570L,
31081L, 10544L, 3373L, 13435L, 22651L, 17861L, 3818L, 35387L,
11459L, 35637L, 308L, 35697L, 12696L, 15175L, 7990L, 16691L,
19494L, 9008L, 30695L, 28889L, 446L, 22178L, 13000L, 26166L,
15431L, 19332L, 35991L, 2840L), race_f = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 4L, 2L, 3L, 4L, 1L, 1L, 3L, 3L, 3L, 3L, 1L,
3L, 1L, 3L, 3L, 1L, 1L, 3L, 2L, 2L, 1L, 4L, 5L, 1L, 4L, 1L, 1L,
5L, 1L, 1L, 3L, 2L, 3L, 3L, 1L, 1L, 1L, 2L, 1L, 3L, 2L, 1L, 1L,
2L, 1L, 3L, 1L, 2L, 1L, 1L, 1L, 2L, 3L, 3L, 1L, 1L, 3L, 3L, 3L,
1L, 1L, 1L, 3L, 3L, 2L, 1L, 1L, 3L, 4L, 4L, 1L, 1L, 3L, 1L, 2L,
3L, 4L, 1L, 1L, 1L, 3L, 1L, 1L, 5L, 3L, 1L, 1L, 3L, 2L, 1L, 1L,
3L, 1L, 4L, 1L, 1L, 3L, 1L, 4L, 3L, 1L, 1L, 1L, 1L, 2L, 1L, 2L,
3L, 3L, 4L, 4L, 1L, 2L, 1L, 4L, 3L, 3L, 3L, 1L, 1L, 1L, 3L, 1L,
1L, 1L, 1L, 3L, 3L, 3L, 2L, 3L, 1L, 4L, 5L, 1L, 4L, 3L, 3L, 3L,
1L, 2L, 1L, 2L, 2L, 4L, 1L, 1L, 2L, 3L, 1L, 1L, 1L, 4L, 1L, 5L,
2L, 1L, 2L, 3L, 1L, 5L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 3L, 3L, 3L,
1L, 4L, 4L, 3L, 2L, 4L, 2L, 1L, 3L, 3L, 1L, 4L, 3L, 3L, 3L, 1L,
1L, 4L, 1L, 4L, 2L, 3L, 3L, 1L, 3L, 3L, 1L, 1L, 1L, 4L, 4L, 1L,
3L, 4L, 1L, 3L, 1L, 1L, 4L, 3L, 4L, 1L, 3L, 1L, 2L, 4L, 3L, 3L,
1L, 1L, 3L, 1L, 5L, 1L, 1L, 1L, 3L, 1L, 3L, 3L, 2L, 1L, 4L, 3L,
3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
3L, 1L, 1L, 1L, 4L, 1L, 4L, 3L, 1L, 3L, 2L, 1L, 1L, 2L, 3L, 1L,
4L, 2L, 3L, 1L, 3L, 4L, 1L, 1L, 3L, 1L, 3L, 3L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 5L, 4L, 3L, 1L, 3L, 3L, 1L, 3L, 3L, 4L, 1L, 1L, 3L,
1L, 3L, 3L, 1L, 1L, 1L, 4L, 1L, 3L, 1L, 3L, 2L, 1L, 3L, 1L, 4L,
1L, 4L, 3L, 3L, 2L, 3L, 3L, 1L, 1L, 4L, 1L, 1L, 2L, 1L, 1L, 1L,
4L, 1L, 1L, 3L, 3L, 1L, 4L, 3L, 3L, 4L, 1L, 3L, 1L, 5L, 3L, 4L,
1L, 4L, 4L, 1L, 3L, 4L, 1L, 4L, 1L, 1L, 1L, 3L, 2L, 1L, 2L, 4L,
1L, 1L, 5L, 4L, 1L, 1L, 4L, 3L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 2L,
1L, 3L, 3L, 3L, 1L, 2L, 3L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 3L, 1L,
1L, 3L, 4L, 1L, 1L, 2L, 5L, 3L, 3L, 1L, 1L, 4L, 1L, 4L, 1L, 4L,
2L, 3L, 3L, 1L, 1L, 1L, 4L, 1L, 4L, 3L, 1L, 1L, 1L, 1L, 3L, 1L,
3L, 1L, 1L, 1L, 1L, 4L, 3L, 4L, 3L, 3L, 3L, 2L, 3L, 1L, 1L, 1L,
3L, 5L, 2L, 5L, 1L, 1L, 4L, 3L, 1L, 3L, 2L, 1L, 1L, 5L, 1L, 3L,
3L, 4L, 1L, 1L, 1L, 2L, 5L, 1L, 1L, 4L, 3L, 1L, 1L, 1L, 2L, 1L,
1L, 3L, 3L, 1L, 1L, 4L, 3L, 2L, 4L, 4L, 1L, 1L, 2L, 3L, 1L, 3L,
3L, 1L), .Label = c("White", "Black", "Hispanic", "Asian", "Other"
), class = "factor"), cops2_avg_12mo = c(82.9166666666667, 66,
23.3333333333333, 28, 9.33333333333333, 69.9166666666667, 6,
33.3333333333333, 0, 12, 102, NA, 66, 6, 45, 58.5, 10, 55.9166666666667,
19.5, 6, 10, 234.666666666667, 28, 23, 51.5833333333333, 10,
38, 123.5, 0, 24, 10, 0, 73, 10, 25, 6, 20, 13.4166666666667,
13.8333333333333, 8, 14.8333333333333, 53.5, 42, NA, 57.1666666666667,
0, 24.6666666666667, 10, NA, 54.6666666666667, 38.75, 41, 22,
0.833333333333333, 13, 113.083333333333, 27.3333333333333, 9,
33.1666666666667, 18.75, 57.75, 30, 60.3333333333333, 23.1666666666667,
37, 16.5, 0, 145.5, 45, 31.3333333333333, 0, 10, 187.5, 27.4166666666667,
10, 54.9166666666667, 78.8333333333333, 103.75, 6.66666666666667,
30.4166666666667, 10, 10, 24.6666666666667, 10, 118.333333333333,
61.25, 17, 10, 28, 51, 6, 32.0833333333333, 80.75, 8.83333333333333,
NA, 10, 74.25, 42.25, 47, 60, 41.6666666666667, 19.0833333333333,
98.5, 73.5, 10, 6.66666666666667, 49.8333333333333, 10, 79.8333333333333,
10, 42, 95.8333333333333, 130.583333333333, 5.41666666666667,
47.25, 6, 8, 17.8333333333333, 10, 73.9166666666667, 10, 8, 27.8333333333333,
125.916666666667, 134.166666666667, 88, 10, 58, 62.5, 10.3333333333333,
28.8333333333333, 100.083333333333, 35.5, 0, 0, 10, 105, 7.33333333333333,
35, 9.66666666666667, 10, 4.16666666666667, 10, 8.33333333333333,
70.6666666666667, 28.4166666666667, 38.1666666666667, 8, 101.5,
26.75, 61.1666666666667, 14, 95.5833333333333, 35, 65, 0, 51.75,
57.5, 10, 13.6666666666667, 10, 67.5, 10, 62.3333333333333, 72.6666666666667,
10, 45.5, 20.8333333333333, 31, 84.5, 10, 98.1666666666667, 47.5,
56, 126, 14, 10, 10, 8, 36, 111.5, 54.5, 45.5, 8, 37.5, 84.8333333333333,
39.1666666666667, 56.25, 37.9166666666667, 37.75, 27, 55.6666666666667,
10, 34, 5.83333333333333, 37, 80.0833333333333, 57, 102.166666666667,
12.6666666666667, 10, 19.3333333333333, 10, NA, 51, 25.9166666666667,
14, 36.9090909090909, 38.6666666666667, 0, 6.33333333333333,
NA, 31, 43, 26.5, 10, 34.4166666666667, 77.1666666666667, 10,
10, 89.9166666666667, 59, 37, 77.3333333333333, 64, 52, 19.6666666666667,
66.5, 24, 106.083333333333, 29.6666666666667, 38.1666666666667,
6.66666666666667, 10, 16.75, NA, 86.75, 1, 14, 20.3333333333333,
8, 21, 38.9166666666667, 50.8333333333333, 57.5, 29, 0, 26.5,
51.9166666666667, 71.25, 42.6666666666667, 82, 58.0833333333333,
11.3333333333333, 82, 9.5, 78.6666666666667, 102.5, 71, 10, 70.6666666666667,
NA, 33.8333333333333, 61.25, 87, 36.5, 10, 40.4166666666667,
51.8333333333333, 23, 9.66666666666667, 44.5, 8, 10, 4.16666666666667,
0, 48.8333333333333, 49.25, 15, 70, 10, 6, 10, 34.8333333333333,
108.75, 36, NA, 31, 51, 69.5, 122.5, 48, 43.5833333333333, NA,
10, 20, 80.75, 54.75, 106.916666666667, 53.5, 90.6666666666667,
8.33333333333333, 85.5, 40.5833333333333, 5.5, 10, 61.3333333333333,
69.8333333333333, 10, 51, 0, 49.0833333333333, 13.6666666666667,
13.3333333333333, 5.83333333333333, 33.8333333333333, 14.4166666666667,
11.25, 14, 6, 14.5833333333333, 36, 21, 10, 29.5833333333333,
13, 34, 10, 2.5, 10, 211.916666666667, 19.75, 7.33333333333333,
6, 59.6666666666667, 30.25, 34.25, 16.1666666666667, 10, NA,
NA, 97, 75, 26.5, 8, 32.25, 0, 39, 37, 165.333333333333, 45,
33.1666666666667, 21, 10, 57, 70.3333333333333, 10, 10, 62, 79.1666666666667,
38, 26.1666666666667, 13, 8, 69.6666666666667, 40.5, 100, 0.833333333333333,
8, 82.5, 10, 19.8333333333333, 20.0833333333333, 8, 25.8333333333333,
16.75, 10, 36, NA, 12.8333333333333, 31.4166666666667, 10, 61.4166666666667,
14, 67.5, 3, 83.1666666666667, 48, 43.75, 35.4166666666667, 73,
44.1666666666667, 8, 29.75, 10, 10, 62.6666666666667, 26.9166666666667,
29.6666666666667, 10, NA, 15, 19.4166666666667, 112, 29, 3, 33.5,
62.5, 10, 84.6666666666667, 8, 84.4166666666667, 81.5, 56.1666666666667,
10, 101.416666666667, 16, 10, 19.6666666666667, 60, 73.6666666666667,
74.9166666666667, 21, 5, 15.0833333333333, 17.0833333333333,
17.5, 46, 61.8333333333333, 115.333333333333, 92, 30, 0, 22.75,
16.6666666666667, 15, 15, 10, NA, 56.25, 54, 10, 40, 9.83333333333333,
10.9166666666667, 22.25, 84.75, 80, 1.66666666666667, 99.8333333333333,
10, 38.6666666666667, 169.75, 35.0833333333333, 8, 78.5, 6.33333333333333,
21, 10, 42, 105.166666666667, 162.416666666667, 14, 69.25, 35.8333333333333,
13, 5.83333333333333, 34, 51, 12.75, 44.3333333333333, 39.5,
10, 23, 46.8333333333333, 89.9166666666667, 15, 28, 128.416666666667,
10, 91.6666666666667, 3.5, 54, 23, NA, 29.75, 37.1666666666667,
12.6666666666667, 31.9166666666667, 23, 0, 11, 67.9166666666667,
3.16666666666667, 8.33333333333333, 51, NA, 10, 0, 58.8333333333333
), AGE = c(86, 82, 83, 92, 45, 81, 52, 64, 71, 96, 79, 64, 76,
37, 81, 79, 72, 79, 74, 46, 45, 71, 89, 76, 53, 48, 52, 77, 63,
52, 57, 62, 84, 88, 55, 69, 67, 63, 67, 51, 86, 53, 65, 59, 71,
60, 70, 20, 78, 62, 58, 73, 68, 71, 66, 72, 71, 65, 95, 67, 79,
70, 86, 77, 81, 54, 44, 66, 80, 71, 30, 77, 67, 75, 48, 65, 83,
85, 70, 70, 74, 58, 81, 28, 78, 66, 79, 47, 74, 41, 74, 58, 73,
55, 53, 56, 84, 74, 62, 85, 68, 47, 78, 72, 57, 56, 64, 55, 86,
76, 77, 58, 74, 55, 71, 61, 74, 62, 65, 75, 81, 68, 39, 58, 65,
76, 27, 79, 86, 61, 87, 52, 72, 58, 53, 69, 78, 65, 81, 69, 66,
68, 61, 72, 74, 80, 88, 46, 53, 77, 89, 83, 41, 67, 83, 62, 90,
70, 60, 62, 33, 78, 80, 62, 81, 37, 55, 90, 81, 73, 67, 97, 32,
71, 70, 69, 46, 57, 60, 79, 79, 56, 75, 60, 52, 78, 61, 51, 70,
67, 71, 36, 53, 70, 53, 74, 89, 78, 70, 56, 58, 83, 50, 77, 70,
50, 75, 53, 86, 65, 45, 63, 62, 78, 65, 69, 75, 79, 71, 56, 88,
63, 72, 85, 68, 72, 45, 81, 46, 70, 84, 71, 82, 63, 57, 77, 70,
42, 87, 84, 61, 64, 79, 53, 65, 64, 69, 68, 71, 89, 49, 70, 82,
63, 79, 65, 64, 54, 73, 36, 80, 38, 68, 62, 84, 80, 65, 73, 91,
59, 35, 80, 67, 68, 65, 47, 60, 67, 72, 81, 22, 35, 58, 57, 68,
94, 38, 77, 75, 73, 78, 71, 78, 53, 58, 61, 77, 44, 95, 53, 72,
68, 72, 73, 78, 41, 75, 80, 60, 53, 68, 79, 80, 74, 25, 79, 55,
68, 85, 64, 72, 78, 78, 71, 73, 82, 73, 73, 58, 69, 58, 72, 78,
56, 74, 67, 66, 72, 38, 58, 62, 77, 81, 37, 46, 88, 55, 76, 50,
57, 72, 39, 56, 29, 76, 77, 36, 31, 70, 70, 70, 54, 74, 47, 81,
46, 81, 55, 53, 70, 28, 71, 79, 68, 78, 81, 30, 83, 43, 70, 79,
47, 94, 60, 64, 82, 81, 92, 57, 90, 86, 58, 61, 69, 50, 64, 79,
56, 76, 52, 55, 53, 85, 89, 64, 86, 58, 82, 64, 74, 45, 64, 71,
75, 61, 79, 82, 63, 81, 60, 70, 79, 63, 59, 80, 53, 80, 41, 83,
67, 90, 60, 82, 74, 75, 52, 62, 35, 53, 49, 71, 69, 73, 67, 44,
77, 81, 96, 52, 75, 30, 83, 74, 56, 62, 78, 63, 63, 62, 71, 62,
89, 83, 77, 66, 64, 24, 96, 63, 51, 65, 71, 50, 68, 83, 82, 90,
91, 84, 90, 76, 62, 79, 20, 75, 79, 80, 62, 62, 71, 51, 81, 84,
65, 65, 55, 65, 51, 26, 70)), row.names = c(NA, -500L), class = c("tbl_df",
"tbl", "data.frame"))
I'm hoping to sample by race_f so that the different race groups are similar in AGE and cops2_avg_12mo. Is this at all possible? Thank you!
The answer depends on if you want to ensure that their ages/cops2_avg_12mo will always be within a specific range - in which case you would simply create a subset of your data frame with only the patients whose age and cops2_avg_12mo are within some range. I do think that this is the safer thing to do in terms of quality control. You can view a plot of the two columns of your data (AGE and cops2_avg_12mo) to get an idea of what ranges of values most of the patients fall into:
plot(x[,c("AGE", "cops2_avg_12mo")])
Pick ranges for these values that contain enough patients to sample from. (I don't know how many samples you need). Basically, draw a box in the dot plot which contains enough patients to sample from.
So once you determine the ranges/boundaries of the box, just create indexes like so:
idx = (x[,"AGE"] > 50) & (x[,"AGE"] < 75) & (x[,"cops2_avg_12mo"] > 0) & (x[,"cops2_avg_12mo"] < 75) & !is.na(x[,"cops2_avg_12mo"])
then get the subset of your data:
subsetX = x[idx,]
After you create that subset, you can randomly sample using R's sample() function. If you want to do sampling from each race equally, then call sample() with the subsetX data, with each race selected at a time, to get n samples at a time:
sample(subsetX[subsetX[,"race_f"]=="Asian",], n, replace=FALSE)
Alternatively, if you are ok with sampling patients that have outlier values (but I feel like this will produce more variation in your results), then you can create a histogram of each of the columns - for example, AGE - then get the histogram bin counts, divide them by the total number of patients to get a probability distribution, then create a vector the same length as the number of patients where each value is the probability we calculated for the bin it belongs to (found by getting bin indexes when calculating the histogram), then pass that vector into the sample() function as the prob input argument so that values are sampled with their specified probability.
Related
This question already has answers here:
Changing the line type in the ggplot legend
(2 answers)
ggplot2 for grayscale printouts
(3 answers)
Closed 7 months ago.
I am trying to make a time-series graph with multiple y values. I would like to change the shape of the different variables so some are solid, some are dashed etc. I would also like all the colors to be on greyscale.
Does anyone know how I can accomplish this?
I know how to melt my data so that I can plot them all together by the value of the variables but right now I cannot get the shapes to change or the greyscale. Thank you in advance.
ggplot(melted_data, aes(x = Distance, y = value, color = variable)) + geom_line()
data <- structure(list(Distance = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75,
76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
92, 93, 94, 95, 96, 97, 98, 99, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
91, 92, 93, 94, 95, 96, 97, 98, 99, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99), variable = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("Mg",
"Mn", "Zn", "Ba"), class = "factor"), value = c(0.903247645,
0.912560748, 0.896003508, 0.909572697, 0.883631829, 0.905722594,
0.892465355, 0.909271173, 0.880506202, 0.889278401, 0.878534542,
0.959209459, 0.913303825, 0.929893977, 0.97778374, 0.9885554,
0.929716333, 1.028422583, 1.025638955, 1.011352651, 1.041343955,
1.092562951, 1.129761801, 1.088857171, 1.107257284, 1.116728405,
1.103053734, 1.041662037, 1.134182243, 1.104550315, 1.086952767,
1.106004784, 1.057688595, 1.034347579, 1.04641385, 1.139270945,
1.048446018, 1.033827731, 1.075554754, 1.029893202, 1.074749532,
1.001626205, 0.977053541, 0.987467665, 0.999540478, 0.945184816,
0.959677178, 0.962807712, 0.967023936, 1.024286493, 0.881264816,
0.967181342, 1.000316876, 0.956168258, 1.003214572, 1.00047837,
0.940103474, 0.929875987, 0.928227112, 0.982410241, 0.983035162,
0.976666772, 1.019755049, 1.075189042, 0.975380543, 0.981316782,
0.986876269, 1.026690916, 1.052379934, 1.001547298, 0.979888683,
1.008209647, 0.976098272, 0.944479556, 0.996767684, 1.018077758,
1.028862706, 1.08510417, 1.08963868, 1.048481179, 1.139954126,
1.107066353, 1.122920581, 1.23904326, 1.19449336, 1.179971969,
1.165865352, 1.068804094, 1.099436469, 1.073307737, 1.07045113,
1.101007051, 1.011962649, 1.11202545, 1.097883672, 1.05361424,
0.993283703, 1.046635444, 1.04951188, 0.086720869, 0.113119382,
0.088197332, 0.081547788, 0.079373211, 0.07888827, 0.072865285,
0.079637996, 0.066314774, 0.097585729, 0.185034982, 0.214466904,
0.294317625, 0.481389256, 0.531196058, 0.715842439, 0.865098887,
0.987242052, 1.081028291, 1.240920518, 1.313524957, 1.543771699,
1.78495042, 1.746572555, 2.048760527, 2.101438775, 1.967474033,
2.000286925, 2.014020838, 1.924470659, 1.75696549, 1.786681246,
1.633290961, 1.455799758, 1.315346538, 1.435348984, 1.27887702,
1.152818928, 1.095127218, 0.987502349, 1.062278922, 0.898540082,
0.83617998, 0.889057689, 0.825563648, 0.788347646, 0.790973555,
0.775541228, 0.815063004, 0.848723108, 0.66783059, 0.672629631,
0.747809615, 0.72338158, 0.666220438, 0.664051795, 0.597260657,
0.689282162, 0.663808452, 0.678551141, 0.672917354, 0.686199986,
0.724202364, 0.746195474, 0.686135659, 0.654148537, 0.713488795,
0.72446665, 0.699529989, 0.630120423, 0.661767463, 0.663290351,
0.705879842, 0.709399338, 0.76228353, 0.714368918, 0.720561695,
0.837036666, 0.923882149, 1.014163852, 1.221410703, 1.315825246,
1.368054705, 1.641746627, 1.630198312, 1.698589629, 1.562956393,
1.427322658, 1.53964983, 1.574583495, 1.527101216, 1.380123116,
1.28649445, 1.29251968, 1.330565441, 1.317758525, 1.19292313,
1.217953538, 1.218591815, 0.746612627, 0.818368055, 0.696689824,
0.748702805, 0.717457681, 0.766243608, 0.805305259, 0.855909762,
0.803357905, 0.889646097, 0.854456208, 1.067795473, 1.051422575,
1.17061972, 1.138440648, 1.052796919, 1.040998633, 1.161739158,
1.025956799, 0.971567748, 1.072911493, 0.952121155, 1.040392714,
1.069745522, 1.068549198, 1.090194087, 1.214584829, 1.157485471,
1.245813376, 1.336359991, 1.204038397, 1.126255292, 1.131057736,
0.922042386, 1.037566449, 1.100852394, 1.121842367, 0.998657748,
1.006938923, 1.002800377, 0.897387497, 0.93902937, 0.889327622,
0.802133735, 0.855245047, 0.860702407, 0.704324249, 0.905827093,
0.760155095, 0.760247698, 0.655991619, 0.677006743, 0.668001976,
0.623410532, 0.569302474, 0.523713794, 0.690042836, 0.539115342,
0.528696218, 0.57851915, 0.60294784, 0.581392042, 0.65277069,
0.65620614, 0.625397246, 0.697647782, 0.6180657, 0.632326126,
0.684659215, 0.606197513, 0.630134281, 0.637151517, 0.574538208,
0.605993607, 0.533522181, 0.544522236, 0.577535469, 0.573427383,
0.672984155, 0.735286828, 0.7532343, 0.881292245, 0.801132661,
1.122761046, 1.137397845, 1.173190388, 1.138033979, 1.126494557,
1.144871399, 1.087042815, 0.981750792, 0.992888445, 0.955352455,
1.074357698, 1.027127808, 1.083248059, 1.010304962, 1.037776316,
1.052809984, 0.742734852, 0.839492568, 0.743899849, 0.817080816,
0.773569657, 0.735728339, 0.715168283, 0.78077814, 0.694280484,
0.773303425, 0.768041196, 0.883401699, 0.818274274, 0.715927964,
0.696938222, 0.832246446, 0.73089346, 0.790965216, 0.799717389,
0.865896893, 0.946771069, 0.954212275, 1.023740345, 1.027036123,
1.086336263, 1.064542815, 0.9463809, 0.924081609, 0.999832641,
0.911277648, 0.922871168, 0.953134033, 0.786732115, 0.802026729,
0.832863371, 0.863952475, 0.817833153, 0.748586924, 0.72095701,
0.738213943, 0.672736744, 0.704947698, 0.531743532, 0.634123809,
0.683548549, 0.733277161, 0.608993729, 0.752162246, 0.568705823,
0.643172511, 0.597251486, 0.655514695, 0.583437677, 0.557676441,
0.646713866, 0.527005047, 0.578023512, 0.576281064, 0.600923204,
0.578475648, 0.551957027, 0.585007991, 0.623858699, 0.630936819,
0.636198589, 0.565476603, 0.658861425, 0.577557604, 0.629178306,
0.646092809, 0.566079299, 0.60953767, 0.680135261, 0.500802233,
0.704656678, 0.61109605, 0.645344144, 0.667139888, 0.734969576,
0.780062983, 0.783090234, 0.83005691, 0.905356723, 0.933746319,
0.947613375, 0.923115827, 0.873482691, 0.746883952, 0.850273618,
0.795256154, 0.800825928, 0.772630039, 0.749567395, 0.7823457,
0.772609842, 0.736269985, 0.699705666, 0.716860238, 0.65909369
)), row.names = c(NA, -396L), class = "data.frame")
You can use the linetype parameter with the aestethics :
ggplot(data) +
geom_line(aes(x = Distance, y = value, color = variable, linetype = variable))
I am trying to remove the outliers from various variables at the same time in my dataset but with the function used it seems that when it finds one outlier it turns the whole row into NA.
That´s a problem because I have to apply the same process to a larger dataset and I am worried that it considerably reduces my sample...
So I would like to just turn the case where the outlier is into NA without turning the whole row into NA. Is that eventually possible?
Thank you for your input
#function used for outliers
outliers <- function(x) {
Q1 <- quantile(x, probs=.25, na.rm = TRUE)
Q3 <- quantile(x, probs=.75, na.rm = TRUE)
iqr = Q3-Q1
upper_limit = Q3 + (iqr*1.5)
lower_limit = Q1 - (iqr*1.5)
x > upper_limit | x < lower_limit
}
remove_outliers <- function(dflinear, cols = names(dflinear)) {
for (col in cols) {
dflinear <- dflinear[!outliers(dflinear[[col]]),]
}
dflinear
}
dflinear_without_outliers<-remove_outliers(dflinear, c("insuline", "glucose", "hdl","ldl"))
#Reproducible sample below
dflinear<- structure(list(id = structure(c("SA01", "SA02", "SA03", "SA04",
"SA05", "SA06", "SA07", "SA08", "SA09", "SA10", "SA11", "SA12",
"SA13", "SA14", "SA15", "SA16", "SA17", "SA18", "SA19", "SA20",
"SA21", "SA22", "SA23", "SA24", "SA25", "SA26", "SA27", "SA28",
"SA29", "SA30", "SA31", "SA32", "SA33", "SA34", "SA35", "SA36",
"SA37", "SA38", "SA39", "SA40", "SA41", "SA42", "SA43", "SA44",
"SA45", "SA46", "SA47", "SA48", "SA49", "SA50", "SA51", "SA52",
"SA53", "SA54", "SA56", "SA57", "SA58", "SA59", "SA60", "SA61",
"SA62", "SA63", "SA64", "SA65", "SA66", "SA67", "SA68", "SA69",
"SA72", "SA73", "SA74", "SA75", "SA76", "SA77", "SA78", "SA79",
"SA80", "SA81", "SA82", "SA83", "SA84", "SA85", "SA86", "SA87",
"SA88", "SA89", "SA90", "SA92", "SA93", "SA94", "SA95", "SA96",
"SA97", "SA99", "SA100", "SA101", "SA102", "SA103", "SA104",
"SA105", "SA107", "SA108", "SA109", "SA110", "SA111", "SA112",
"SA113", "SA114", "SA115", "SA116", "SA118", "SC01", "SC02",
"SC03", "SC04", "SC05", "SC06", "SC07", "SC08", "SC09", "SC10",
"SC11", "SC12", "SC13", "SC14", "SC15", "SC16", "SC17", "SC18",
"SC19", "SC20", "SC21", "SC22", "SC23", "SC24", "SC25", "SC26",
"SC27", "SC28", "SC29", "SC30", "SC31", "SC32", "SC33", "SC34",
"SC35", "SC36", "SC37", "SC38", "M01", "M02", "M03", "M04", "M05",
"M06", "M07", "M08", "M09", "M10", "M11", "M12", "M13", "M14",
"M15", "M16", "M17", "M18", "M19", "M20", "M21", "M22", "M23",
"M24", "M25", "M26", "M27", "M28", "M29", "M30", "M31", "M32",
"M33", "M34", "M35", "M36", "M37", "M38", "M39", "M40", "M41",
"M42", "M43", "M44", "M45", "M46", "M47", "M48", "M49", "M50",
"M51", "M52", "M53", "SA01", "SA02", "SA03", "SA04", "SA05",
"SA06", "SA07", "SA08", "SA09", "SA10", "SA11", "SA12", "SA13",
"SA14", "SA15", "SA16", "SA17", "SA18", "SA19", "SA20", "SA21",
"SA22", "SA23", "SA24", "SA25", "SA26", "SA27", "SA28", "SA29",
"SA30", "SA31", "SA32", "SA33", "SA34", "SA35", "SA36", "SA37",
"SA38", "SA39", "SA40", "SA41", "SA42", "SA43", "SA44", "SA45",
"SA46", "SA47", "SA48", "SA49", "SA50", "SA51", "SA52", "SA53",
"SA54", "SA56", "SA57", "SA58", "SA59", "SA60", "SA61", "SA62",
"SA63", "SA64", "SA65", "SA66", "SA67", "SA68", "SA69", "SA72",
"SA73", "SA74", "SA75", "SA76", "SA77", "SA78", "SA79", "SA80",
"SA81", "SA82", "SA83", "SA84", "SA85", "SA86", "SA87", "SA88",
"SA89", "SA90", "SA92", "SA93", "SA94", "SA95", "SA96", "SA97",
"SA99", "SA100", "SA101", "SA102", "SA103", "SA104", "SA105",
"SA107", "SA108", "SA109", "SA110", "SA111", "SA112", "SA113",
"SA114", "SA115", "SA116", "SA118", "SC01", "SC02", "SC03", "SC04",
"SC05", "SC06", "SC07", "SC08", "SC09", "SC10", "SC11", "SC12",
"SC13", "SC14", "SC15", "SC16", "SC17", "SC18", "SC19", "SC20",
"SC21", "SC22", "SC23", "SC24", "SC25", "SC26", "SC27", "SC28",
"SC29", "SC30", "SC31", "SC32", "SC33", "SC34", "SC35", "SC36",
"SC37", "SC38", "M01", "M02", "M03", "M04", "M05", "M06", "M07",
"M08", "M09", "M10", "M11", "M12", "M13", "M14", "M15", "M16",
"M17", "M18", "M19", "M20", "M21", "M22", "M23", "M24", "M25",
"M26", "M27", "M28", "M29", "M30", "M31", "M32", "M33", "M34",
"M35", "M36", "M37", "M38", "M39", "M40", "M41", "M42", "M43",
"M44", "M45", "M46", "M47", "M48", "M49", "M50", "M51", "M52",
"M53"), label = "Code of PrevenGo", format.spss = "A5", display_width = 12L),
group = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("Metab", "SA", "SC"), class = "factor"),
sex = structure(c(2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L,
1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L,
1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L,
2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L,
1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L,
2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L,
1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L,
1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L,
1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L), .Label = c("F", "M"), class = "factor"),
time = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), insuline = structure(c(9, 4.1, 3.3, 9.4, 22.9, 16.2,
8.7, 16.7, 21.2, 21, 12.8, 7.3, 38.4, 20.2, 19.6, 6.4, 18.9,
12.1, 8.2, 17, 15.6, 12.5, 19.1, 13.7, 8, 20.1, 19.8, 6.8,
15.4, 14.7, 11.9, 8.8, 7.9, 51.2, 10.8, 8.1, 28.6, 8.6, 27.9,
13.3, 9, 16.3, 13.3, 5.8, 27.3, 4.2, 8.2, 9.9, 20.1, 11.7,
8.7, 18.1, 10.9, 27.4, 14.6, 29.1, 10.2, 20.2, 9.7, 12.3,
18.2, 1.9, 11.6, 14.6, 7.9, 11.2, 13.8, 21.2, 23.8, 18, 23.5,
21.4, 11.4, 12, 6.6, 13.5, 10.4, 25.3, 56.8, 10.7, 21.5,
8.5, 30.2, 5.3, 7.5, 15.9, 11.6, 22.4, 25.2, 6.1, 15.1, 9.3,
24.3, 30.8, 8.9, 9.8, 34.1, 13.4, 23.1, 21.1, 4.8, 20.1,
38.5, 16.1, 34.1, 16.1, 17.7, 41.4, 20.4, 21.5, 36.3, 15.9,
8.8, 6.1, 29, 4, 23.1, 36.8, 16.4, 15.5, 28.8, 15.9, NA,
7.1, 6.1, 10, 9.1, 25.2, 19.1, 6.9, 14.7, 23.1, 19.3, 12.3,
7.3, 5.9, 8, 0.5, 9, 4, 10.4, 21.4, 14.6, 8.8, 24.5, 5.3,
9.8, 17.6, 10.2, 10.7, 23, 14.5, 4.6, 33.3, 23.3, 7.2, 3.7,
13.1, 6.7, 20, 7.5, 9.2, 4.5, 2.1, 7.7, 11.7, 7.6, 22.5,
8.8, 5.1, 14.8, 15.1, 18.8, 24.3, 14, 17.2, 16.2, 23.6, 17.4,
16.5, 12.1, 15.3, 11.4, 8.7, 22.6, 10.5, 7.4, 15.1, 13.1,
24.6, 19.3, 19.7, 14.1, 5.9, 19.7, 14.9, 5.9, 17.2, 16.9,
6.2, 11.2, 4.1, 10, 3.7, 3.6, 11.6, 16.9, NA, 8, 17.3, NA,
18.3, 4, 3.1, 26.4, 12.9, 17.9, 10.3, 22.5, NA, NA, 23.4,
15.1, NA, 11.9, 27, 6.2, NA, 21.5, 11.6, 15.8, 8.6, 15.2,
10.1, 20.6, 21.7, 45.3, 8.3, 19.5, 29.2, 21.5, 11.4, 9.5,
31.8, 35.3, 11.2, 15.4, NA, 8.5, 22.6, 14.3, NA, 11.8, 11.4,
4.2, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 35.8, NA, NA,
NA, NA, NA, 19.7, 42.8, 30.6, 12.2, 5.2, 4.9, 20.4, NA, 23.5,
NA, 13.6, 19.4, 6.9, 16.7, 7.2, 14.7, 59.2, 22, 41.4, 18.1,
10.5, 19.8, 17.4, NA, 25.9, NA, 8.3, 25.9, 5.7, 17.1, 25.2,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 10.3, 9.1, 14.8,
13.7, 6.2, 17.9, 13.9, 14.6, 70.4, 23.6, 13.8, 15.2, 9.9,
14, 27.6, 14.3, 23.7, 11, 12.1, 13.5, 21, NA, 7.2, 12.3,
4.4, 6.2, 3.9, 15, 9.6, NA, 9, 10.3, NA, 13.3, 6, 11.3, 17.6,
8.5, 10, NA, 11.8, 10.4, 26.2, NA, 10, 5.7, 16.3, 4.7, 20.3,
7.7, 14.6, 9.4, 6.3, 10, 11.1, 6.7, 42.5, NA, NA, NA, 7.7,
18.6, NA, 16.7, 25.4, 21.8, 26.8, 10.2, 13.8, 11.6, 19.1,
8.3, 3.8, 31.1, NA, 7.1, 11.1, 8.7, 19, 16, 31.8, 11.7, 3.4,
17.6, 12.3, 5.1, 17.5, 6.7, 3.8, 16.6, 6.1), format.spss = "F4.2", display_width = 11L),
glucose = structure(c(90, 95, 79, 85, 95, 97, 86, 74, 88,
95, 94, 88, 86, 94, 86, 95, 97, 88, 88, 88, 83, 103, 79,
67, 88, 79, 90, 79, 97, 94, 85, 83, 88, 97, 81, 95, 92, 94,
99, 79, 83, 92, 81, 92, 79, 94, 83, 79, 81, 92, 86, 95, 92,
95, 92, 85, 94, 81, 86, 85, 99, 92, 85, 72, 86, 81, 79, 86,
97, 88, 92, 97, 83, 103, 97, 95, 85, 77, 77, 83, 99, 90,
77, 77, 83, 92, 88, 83, 88, 86, 88, 97, 101, 99, 88, 101,
94, 86, 85, 83, 86, 88, 92, 94, 94, 90, 160, 94, 83, 95,
97, 88, 88, 95, 90, 92, 113, 104, 85, 101, 91.8, 99, 94,
85, 85, 83, 86, 88, 95, 79, 101, 92, 83, 90, 85, 95, 88,
79, 90, 79, 94, 99, 83, 85, 85, 77, 99, 81, 92, 86.4, 95.4,
82.8, 73.8, 81, 90, 82.8, 79.2, 90, 82.8, 91.8, 90, 84.6,
84.6, 84.6, 77.4, 77.4, 75.6, 88.2, 79.2, 92, 90, 113, 81,
81, 81, 84.6, 88.2, 73.8, 81, 81, 82.8, 79.2, 70.2, 91.8,
97.2, 82.8, 70.2, 91.8, 93.6, 86.4, 93.6, 73.8, 95.4, 81,
97.2, 77.4, 90, 82.8, 86.4, 88.2, 88.2, 73.8, 90, 92, 83,
86, 99, NA, 86, 81, NA, 99, 83, 86, 76, 90, 85, 90, 92, NA,
NA, 79, 79, NA, 86, 81, 88, NA, 90, 86, 92, 85, 92, 83, 92,
90, 92, 95, 94, 88, 90, 86, 88, 101, 95, 92, 81, NA, 92,
90, 81, NA, 90, 81, 88, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 85, NA, NA, NA, NA, NA, 85, 88, 86, 88, 106, 101, 88,
NA, 79, NA, 85, 99, 92, 79, 88, 88, 95, 81, 86, 77, 81, 92,
97, NA, 86, NA, 88, 94, 81, 86, 85, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 85, 88, 95, 83, 92, 112, 94, 95, 108,
97, 90, 88, 86, 97, 95, 88, 90, 88, 77, 94, 81, NA, 79, 83,
95, 88, 81, 92, 92, NA, 88, 86, NA, 85, 85, 97, 81, 88, 90,
NA, 77.4, 94, 83, NA, 95, 85, 92, 83, 95, 88, 94, 94, 88,
77, 90, 86, 92, NA, NA, NA, 95, 92, NA, 90, 103, 90, 85,
92, 83, 81, 94, 81, 79, 94, NA, 92, 99, 95, 84, 95, 72, 90,
79, 97.5, 85, 88, 79, 81, 72, 85, 88), format.spss = "F4.2", display_width = 11L),
hdl = structure(c(54, 55, 48, 38, 46, 50, 45, 38, 50, 43,
39, 32, 35, 34, 40, 48, 53, 33, 42, 34, 41, 48, 51, 38, 53,
38, 37, 44, 37, 33, 54, 47, 51, 39, 44, 54, 32, 53, 39, 36,
58, 41, 34, 43, 40, 49, 49, 50, 37, 36, 54, 47, 35, 40, 50,
44, 40, 43, 45, 41, 34, 50, 46, 46, 50, 53, 53, 45, 37, 70,
51, 55, 51, 58, 58, 49, 44, 37, 32, 64, 41, 63, 46, 55, 46,
65, 43, 55, 42, 56, 39, 50, 38, 46, 45, 53, 53, 39, 45, 47,
48, 32, 45, 45, 36, 60, 30, 43, 43, 57, 36, 56, 45, 40, 40,
61, 50, 29, 55, 38, 35, 47, 42, 50, 46, 26, 60, 33, 36, 34,
44, 59, 45, 44, 55, 45, 53, 38, 50, 40, 57, 46, 48, 45, 43,
49, 53, 39, 46, 39, 36, 39, 36, 42, 40, 50, 63, 46, 45, 39,
43, 30, 57, 46, 40, 39, 39, 53, 40, 54, 56, 40, 37, 48, 43,
29, 46, 45, 82, 31, 34, 37, 41, 63, 34, 50, 37, 51, 36, 42,
41, 34, 55, 40, 42, 60, 36, 38, 52, 57, 48, 48, 46, 47, 50,
41, 48, NA, 40, 45, NA, 43, 58, 42, 48, 44, 46, 47, 55, NA,
NA, 38, 52, NA, 53, 31, 51, NA, 32, 51, 41, 38, 57, 36, 50,
41, 60, 65, 39, 52, 36, 36, 49, 43, 34, 44, 41, NA, 50, 52,
37, NA, 58, 45, 34, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
46, NA, NA, NA, NA, NA, 59, 55, 50, 46, 58, 58, 42, NA, 31,
NA, 48, 43, 66, 55, 51, 41, 50, 38, 46, 41, 43, 38, 48, NA,
46, NA, 56, 44, 46, 48, 49, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 63, 41, 39, 46, 58, 53, 33, 53, 48, 33, 44, 46,
49, 48, 44, 55, 44, 39, 32, 46, 50, NA, 47, 53, 39, 51, 61,
48, 32, NA, 42, 46, NA, 49, 48, 52, 39, 40, 38, NA, 31, 46,
48, NA, 51, 58, 43, 49, 43, 65, 41, 61, 49, 35, 37, 36, 58,
NA, NA, NA, 38, 45, NA, 58, 31, 49, 52, 65, 32, 45, 39, 37,
41, 34, NA, 42, 51, 39, 48, 36, 35, 55, 38, 48, 53, 41, 39,
49, 63, 41, 47), label = "HDL-Cholesterol", format.spss = "F3.2", display_width = 11L),
ldl = structure(c(100, 104, 171, 153, 107, 152, 87, 101,
70, 137, 96, 95, 98, 94, 92, 102, 63, 104, 62, 75, 125, 117,
114, 132, 112, 146, 121, 91, 113, 120, 96, 96, 95, 87, 96,
134, 98, 92, 88, 101, 133, 113, 77, 128, 97, 169, 136, 96,
74, 59, 121, 66, 109, 103, 116, 86, 87, 124, 88, 94, 77,
98, 90, 133, 79, 78, 98, 129, 62, 62, 96, 72, 85, 98, 101,
132, 69, 196, 76, 125, 105, 108, 89, 108, 123, 51, 92, 50,
121, 105, 80, 103, 59, 96, 89, 65, 77, 90, 92, 65, 123, 96,
80, 128, 92, 124, 96, 83, 120, 145, 114, 134, 116, 65, 91,
103, 84, 123, 99, 96, 61, 82, 85, 116, 116, 113, 121, 69,
82, 100, 108, 99, 144, 152, 158, 128, 112, 89, 119, 61, 99,
147, 109, 121, 92, 115, 95, 62, 72, 130, 96, 76, 117, 96,
108, 131, 120, 67, 99, 105, 63, 63, 103, 128, 92, 120, 146,
106, 103, 94, 85, 122, 111, 102, 143, 74, 87, 80, 67, 140,
85, 87, 101, 94, 122, 124, 82, 150, 92, 84, 119, 98, 89,
97, 117, 122, 111, 86, 90, 110, 107, 150, 103, 94, 149, 159,
91, NA, 109, 126, NA, 167, 77, 90, 103, 80, 68, 75, 55, NA,
NA, 74, 113, NA, 102, 116, 84, NA, 66, 85, 114, 111, 101,
95, 92, 86, 96, 90, 92, 77, 91, 108, 86, 118, 85, 127, 99,
NA, 160, 80, 63, NA, 123, 86, 94, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 106, NA, NA, NA, NA, NA, 70, 85, 70, 96,
102, 117, 101, NA, 146, NA, 94, 122, 122, 94, 110, 121, 39,
72, 48, 109, 110, 60, 95, NA, 83, NA, 79, 87, 113, 103, 55,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 157, 103, 56,
92, 114, 78, 97, 106, 117, 61, 72, 83, 91, 122, 106, 103,
89, 51, 89, 153, 90, NA, 132, 132, 110, 84, 84, 96, 72, NA,
104, 122, NA, 80, 113, 106, 62, 72, 121, NA, 102, 125, 130,
NA, 111, 119, 66, 109, 119, 91, 92, 120, 160, 93, 117, 126,
88, NA, NA, NA, 115, 100, NA, 200, 79, 95, 99, 89, 123, 108,
82, 108, 81, 103, NA, 103, 149, 116, 115, 122, 95, 106, 89,
128, 118, 123, 51, 90, 130, 119, 120), label = "LDL-Cholesterol", format.spss = "F4.2", display_width = 11L)), row.names = c(NA,
-404L), class = c("tbl_df", "tbl", "data.frame"), reshapeLong = list(
varying = list(c("age_1", "age_2"), c("whz_1", "whz_2"),
c("haz_1", "haz_2"), c("waz_1", "waz_2"), c("zbmi_1",
"zbmi_2"), c("wc_1", "wc_2"), c("abc_1", "abc_2"), c("PA_1",
"PA_2"), c("PAextra_1", "PAextra_2"), c("TVweekdays_1",
"TVweekdays_2"), c("TVweekend_1", "TVweekend_2"), c("kidmed_1",
"kidmed_2"), c("totalcholesterol_1", "totalcholesterol_2"
), c("ldl_1", "ldl_2"), c("hdl_1", "hdl_2"), c("triglycerides_1",
"triglycerides_2"), c("glucose_1", "glucose_2"), c("insuline_1",
"insuline_2"), c("hba1c_1", "hba1c_2"), c("homair_1",
"homair_2"), c("fatmass_1", "fatmass_2"), c("energykcal_1",
"energykcal_2"), c("protein_1", "protein_2"), c("proteinpc_1",
"proteinpc_2"), c("carbohydrates_1", "carbohydrates_2"
), c("carbohydratespc_1", "carbohydratespc_2"), c("sugar_1",
"sugar_2"), c("sugarpc_1", "sugarpc_2"), c("starch_1",
"starch_2"), c("fruitportions_1", "fruitportions_2"),
c("vegetablesportions_1", "vegetablesportions_2"), c("vegetalfiber_1",
"vegetalfiber_2"), c("solublefiber_1", "solublefiber_2"
), c("insolublefiber_1", "insolublefiber_2"), c("lipids_1",
"lipids_2"), c("lipidspc_1", "lipidspc_2"), c("sfa_1",
"sfa_2"), c("sfapc_1", "sfapc_2"), c("mufa_1", "mufa_2"
), c("mufapc_1", "mufapc_2"), c("pufa_1", "pufa_2"),
c("pufapc_1", "pufapc_2"), c("cholesterolintake_1", "cholesterolintake_2"
)), v.names = c("age", "whz", "haz", "waz", "zbmi", "wc",
"abc", "PA", "PAextra", "TVweekdays", "TVweekend", "kidmed",
"totalcholesterol", "ldl", "hdl", "triglycerides", "glucose",
"insuline", "hba1c", "homair", "fatmass", "energykcal", "protein",
"proteinpc", "carbohydrates", "carbohydratespc", "sugar",
"sugarpc", "starch", "fruitportions", "vegetablesportions",
"vegetalfiber", "solublefiber", "insolublefiber", "lipids",
" lipidspc", "sfa", "sfapc", "mufa", "mufapc", "pufa", "pufapc",
"cholesterolintake"), idvar = c("id", "group"), timevar = "time"))
You can drop the outliers by changing your remove_outlier function to this:
remove_outliers <- function(dflinear, cols = names(dflinear)) {
for (col in cols) {
dflinear[,col] <- ifelse(outliers(dflinear[[col]]),NA,dflinear[[col]])
}
dflinear
}
But I would think very carefully about whether this is a good approach to outlier detection and removal. This procedure is removing values that look like regular parts of the distribution. With a lot of values you would expect some to be outside of the range Q3+1.5IQR etc.
Eg, this is the qqnorm for the ldl variable. Doesn't look like any problematic values at all really, but your procedure is throwing out the top five and the lowest value:
I have this function that allows me to create multiple graphs on various variables of the dataset.
However in the output on the y-axis it always put the name of the list "varlist" instead of the name of each variable in the list, i.e. insuline, glucose, hdl and ldl.
How could I do that? thank you
# Multiple box plot per group per time
library(ggplot2)
names(dflinear) <- c("id", "group", "sex", "time", "insuline", "glucose", "hdl", "ldl")
# Create a list wherein the function will be applied to
varlist<-c(list(dflinear$insuline, dflinear$glucose, dflinear$hdl, dflinear$ldl))
names(varlist)<-c("insuline", "glucose", "hdl", "ldl")
# Create the function boxplot
A <- function (varlist) {
dflinear %>% group_by('group')%>%
ggplot(mapping = aes_string(x='time', y='varlist', fill='group')) +
geom_boxplot()
}
# Apply it to the whole list and graph the plots
plots<-lapply(varlist, FUN = A)
plots
Reproducible dataset
dflinear<- structure(list(id = structure(c("SA01", "SA02", "SA03", "SA04",
"SA05", "SA06", "SA07", "SA08", "SA09", "SA10", "SA11", "SA12",
"SA13", "SA14", "SA15", "SA16", "SA17", "SA18", "SA19", "SA20",
"SA21", "SA22", "SA23", "SA24", "SA25", "SA26", "SA27", "SA28",
"SA29", "SA30", "SA31", "SA32", "SA33", "SA34", "SA35", "SA36",
"SA37", "SA38", "SA39", "SA40", "SA41", "SA42", "SA43", "SA44",
"SA45", "SA46", "SA47", "SA48", "SA49", "SA50", "SA51", "SA52",
"SA53", "SA54", "SA56", "SA57", "SA58", "SA59", "SA60", "SA61",
"SA62", "SA63", "SA64", "SA65", "SA66", "SA67", "SA68", "SA69",
"SA72", "SA73", "SA74", "SA75", "SA76", "SA77", "SA78", "SA79",
"SA80", "SA81", "SA82", "SA83", "SA84", "SA85", "SA86", "SA87",
"SA88", "SA89", "SA90", "SA92", "SA93", "SA94", "SA95", "SA96",
"SA97", "SA99", "SA100", "SA101", "SA102", "SA103", "SA104",
"SA105", "SA107", "SA108", "SA109", "SA110", "SA111", "SA112",
"SA113", "SA114", "SA115", "SA116", "SA118", "SC01", "SC02",
"SC03", "SC04", "SC05", "SC06", "SC07", "SC08", "SC09", "SC10",
"SC11", "SC12", "SC13", "SC14", "SC15", "SC16", "SC17", "SC18",
"SC19", "SC20", "SC21", "SC22", "SC23", "SC24", "SC25", "SC26",
"SC27", "SC28", "SC29", "SC30", "SC31", "SC32", "SC33", "SC34",
"SC35", "SC36", "SC37", "SC38", "M01", "M02", "M03", "M04", "M05",
"M06", "M07", "M08", "M09", "M10", "M11", "M12", "M13", "M14",
"M15", "M16", "M17", "M18", "M19", "M20", "M21", "M22", "M23",
"M24", "M25", "M26", "M27", "M28", "M29", "M30", "M31", "M32",
"M33", "M34", "M35", "M36", "M37", "M38", "M39", "M40", "M41",
"M42", "M43", "M44", "M45", "M46", "M47", "M48", "M49", "M50",
"M51", "M52", "M53", "SA01", "SA02", "SA03", "SA04", "SA05",
"SA06", "SA07", "SA08", "SA09", "SA10", "SA11", "SA12", "SA13",
"SA14", "SA15", "SA16", "SA17", "SA18", "SA19", "SA20", "SA21",
"SA22", "SA23", "SA24", "SA25", "SA26", "SA27", "SA28", "SA29",
"SA30", "SA31", "SA32", "SA33", "SA34", "SA35", "SA36", "SA37",
"SA38", "SA39", "SA40", "SA41", "SA42", "SA43", "SA44", "SA45",
"SA46", "SA47", "SA48", "SA49", "SA50", "SA51", "SA52", "SA53",
"SA54", "SA56", "SA57", "SA58", "SA59", "SA60", "SA61", "SA62",
"SA63", "SA64", "SA65", "SA66", "SA67", "SA68", "SA69", "SA72",
"SA73", "SA74", "SA75", "SA76", "SA77", "SA78", "SA79", "SA80",
"SA81", "SA82", "SA83", "SA84", "SA85", "SA86", "SA87", "SA88",
"SA89", "SA90", "SA92", "SA93", "SA94", "SA95", "SA96", "SA97",
"SA99", "SA100", "SA101", "SA102", "SA103", "SA104", "SA105",
"SA107", "SA108", "SA109", "SA110", "SA111", "SA112", "SA113",
"SA114", "SA115", "SA116", "SA118", "SC01", "SC02", "SC03", "SC04",
"SC05", "SC06", "SC07", "SC08", "SC09", "SC10", "SC11", "SC12",
"SC13", "SC14", "SC15", "SC16", "SC17", "SC18", "SC19", "SC20",
"SC21", "SC22", "SC23", "SC24", "SC25", "SC26", "SC27", "SC28",
"SC29", "SC30", "SC31", "SC32", "SC33", "SC34", "SC35", "SC36",
"SC37", "SC38", "M01", "M02", "M03", "M04", "M05", "M06", "M07",
"M08", "M09", "M10", "M11", "M12", "M13", "M14", "M15", "M16",
"M17", "M18", "M19", "M20", "M21", "M22", "M23", "M24", "M25",
"M26", "M27", "M28", "M29", "M30", "M31", "M32", "M33", "M34",
"M35", "M36", "M37", "M38", "M39", "M40", "M41", "M42", "M43",
"M44", "M45", "M46", "M47", "M48", "M49", "M50", "M51", "M52",
"M53"), label = "Code of PrevenGo", format.spss = "A5", display_width = 12L),
group = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("Metab", "SA", "SC"), class = "factor"),
sex = structure(c(2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L,
1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L,
1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L,
2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L,
1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L,
2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L,
1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L,
1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L,
1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L), .Label = c("F", "M"), class = "factor"),
time = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), insuline = structure(c(9, 4.1, 3.3, 9.4, 22.9, 16.2,
8.7, 16.7, 21.2, 21, 12.8, 7.3, 38.4, 20.2, 19.6, 6.4, 18.9,
12.1, 8.2, 17, 15.6, 12.5, 19.1, 13.7, 8, 20.1, 19.8, 6.8,
15.4, 14.7, 11.9, 8.8, 7.9, 51.2, 10.8, 8.1, 28.6, 8.6, 27.9,
13.3, 9, 16.3, 13.3, 5.8, 27.3, 4.2, 8.2, 9.9, 20.1, 11.7,
8.7, 18.1, 10.9, 27.4, 14.6, 29.1, 10.2, 20.2, 9.7, 12.3,
18.2, 1.9, 11.6, 14.6, 7.9, 11.2, 13.8, 21.2, 23.8, 18, 23.5,
21.4, 11.4, 12, 6.6, 13.5, 10.4, 25.3, 56.8, 10.7, 21.5,
8.5, 30.2, 5.3, 7.5, 15.9, 11.6, 22.4, 25.2, 6.1, 15.1, 9.3,
24.3, 30.8, 8.9, 9.8, 34.1, 13.4, 23.1, 21.1, 4.8, 20.1,
38.5, 16.1, 34.1, 16.1, 17.7, 41.4, 20.4, 21.5, 36.3, 15.9,
8.8, 6.1, 29, 4, 23.1, 36.8, 16.4, 15.5, 28.8, 15.9, NA,
7.1, 6.1, 10, 9.1, 25.2, 19.1, 6.9, 14.7, 23.1, 19.3, 12.3,
7.3, 5.9, 8, 0.5, 9, 4, 10.4, 21.4, 14.6, 8.8, 24.5, 5.3,
9.8, 17.6, 10.2, 10.7, 23, 14.5, 4.6, 33.3, 23.3, 7.2, 3.7,
13.1, 6.7, 20, 7.5, 9.2, 4.5, 2.1, 7.7, 11.7, 7.6, 22.5,
8.8, 5.1, 14.8, 15.1, 18.8, 24.3, 14, 17.2, 16.2, 23.6, 17.4,
16.5, 12.1, 15.3, 11.4, 8.7, 22.6, 10.5, 7.4, 15.1, 13.1,
24.6, 19.3, 19.7, 14.1, 5.9, 19.7, 14.9, 5.9, 17.2, 16.9,
6.2, 11.2, 4.1, 10, 3.7, 3.6, 11.6, 16.9, NA, 8, 17.3, NA,
18.3, 4, 3.1, 26.4, 12.9, 17.9, 10.3, 22.5, NA, NA, 23.4,
15.1, NA, 11.9, 27, 6.2, NA, 21.5, 11.6, 15.8, 8.6, 15.2,
10.1, 20.6, 21.7, 45.3, 8.3, 19.5, 29.2, 21.5, 11.4, 9.5,
31.8, 35.3, 11.2, 15.4, NA, 8.5, 22.6, 14.3, NA, 11.8, 11.4,
4.2, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 35.8, NA, NA,
NA, NA, NA, 19.7, 42.8, 30.6, 12.2, 5.2, 4.9, 20.4, NA, 23.5,
NA, 13.6, 19.4, 6.9, 16.7, 7.2, 14.7, 59.2, 22, 41.4, 18.1,
10.5, 19.8, 17.4, NA, 25.9, NA, 8.3, 25.9, 5.7, 17.1, 25.2,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 10.3, 9.1, 14.8,
13.7, 6.2, 17.9, 13.9, 14.6, 70.4, 23.6, 13.8, 15.2, 9.9,
14, 27.6, 14.3, 23.7, 11, 12.1, 13.5, 21, NA, 7.2, 12.3,
4.4, 6.2, 3.9, 15, 9.6, NA, 9, 10.3, NA, 13.3, 6, 11.3, 17.6,
8.5, 10, NA, 11.8, 10.4, 26.2, NA, 10, 5.7, 16.3, 4.7, 20.3,
7.7, 14.6, 9.4, 6.3, 10, 11.1, 6.7, 42.5, NA, NA, NA, 7.7,
18.6, NA, 16.7, 25.4, 21.8, 26.8, 10.2, 13.8, 11.6, 19.1,
8.3, 3.8, 31.1, NA, 7.1, 11.1, 8.7, 19, 16, 31.8, 11.7, 3.4,
17.6, 12.3, 5.1, 17.5, 6.7, 3.8, 16.6, 6.1), format.spss = "F4.2", display_width = 11L),
glucose = structure(c(90, 95, 79, 85, 95, 97, 86, 74, 88,
95, 94, 88, 86, 94, 86, 95, 97, 88, 88, 88, 83, 103, 79,
67, 88, 79, 90, 79, 97, 94, 85, 83, 88, 97, 81, 95, 92, 94,
99, 79, 83, 92, 81, 92, 79, 94, 83, 79, 81, 92, 86, 95, 92,
95, 92, 85, 94, 81, 86, 85, 99, 92, 85, 72, 86, 81, 79, 86,
97, 88, 92, 97, 83, 103, 97, 95, 85, 77, 77, 83, 99, 90,
77, 77, 83, 92, 88, 83, 88, 86, 88, 97, 101, 99, 88, 101,
94, 86, 85, 83, 86, 88, 92, 94, 94, 90, 160, 94, 83, 95,
97, 88, 88, 95, 90, 92, 113, 104, 85, 101, 91.8, 99, 94,
85, 85, 83, 86, 88, 95, 79, 101, 92, 83, 90, 85, 95, 88,
79, 90, 79, 94, 99, 83, 85, 85, 77, 99, 81, 92, 86.4, 95.4,
82.8, 73.8, 81, 90, 82.8, 79.2, 90, 82.8, 91.8, 90, 84.6,
84.6, 84.6, 77.4, 77.4, 75.6, 88.2, 79.2, 92, 90, 113, 81,
81, 81, 84.6, 88.2, 73.8, 81, 81, 82.8, 79.2, 70.2, 91.8,
97.2, 82.8, 70.2, 91.8, 93.6, 86.4, 93.6, 73.8, 95.4, 81,
97.2, 77.4, 90, 82.8, 86.4, 88.2, 88.2, 73.8, 90, 92, 83,
86, 99, NA, 86, 81, NA, 99, 83, 86, 76, 90, 85, 90, 92, NA,
NA, 79, 79, NA, 86, 81, 88, NA, 90, 86, 92, 85, 92, 83, 92,
90, 92, 95, 94, 88, 90, 86, 88, 101, 95, 92, 81, NA, 92,
90, 81, NA, 90, 81, 88, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 85, NA, NA, NA, NA, NA, 85, 88, 86, 88, 106, 101, 88,
NA, 79, NA, 85, 99, 92, 79, 88, 88, 95, 81, 86, 77, 81, 92,
97, NA, 86, NA, 88, 94, 81, 86, 85, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 85, 88, 95, 83, 92, 112, 94, 95, 108,
97, 90, 88, 86, 97, 95, 88, 90, 88, 77, 94, 81, NA, 79, 83,
95, 88, 81, 92, 92, NA, 88, 86, NA, 85, 85, 97, 81, 88, 90,
NA, 77.4, 94, 83, NA, 95, 85, 92, 83, 95, 88, 94, 94, 88,
77, 90, 86, 92, NA, NA, NA, 95, 92, NA, 90, 103, 90, 85,
92, 83, 81, 94, 81, 79, 94, NA, 92, 99, 95, 84, 95, 72, 90,
79, 97.5, 85, 88, 79, 81, 72, 85, 88), format.spss = "F4.2", display_width = 11L),
hdl = structure(c(54, 55, 48, 38, 46, 50, 45, 38, 50, 43,
39, 32, 35, 34, 40, 48, 53, 33, 42, 34, 41, 48, 51, 38, 53,
38, 37, 44, 37, 33, 54, 47, 51, 39, 44, 54, 32, 53, 39, 36,
58, 41, 34, 43, 40, 49, 49, 50, 37, 36, 54, 47, 35, 40, 50,
44, 40, 43, 45, 41, 34, 50, 46, 46, 50, 53, 53, 45, 37, 70,
51, 55, 51, 58, 58, 49, 44, 37, 32, 64, 41, 63, 46, 55, 46,
65, 43, 55, 42, 56, 39, 50, 38, 46, 45, 53, 53, 39, 45, 47,
48, 32, 45, 45, 36, 60, 30, 43, 43, 57, 36, 56, 45, 40, 40,
61, 50, 29, 55, 38, 35, 47, 42, 50, 46, 26, 60, 33, 36, 34,
44, 59, 45, 44, 55, 45, 53, 38, 50, 40, 57, 46, 48, 45, 43,
49, 53, 39, 46, 39, 36, 39, 36, 42, 40, 50, 63, 46, 45, 39,
43, 30, 57, 46, 40, 39, 39, 53, 40, 54, 56, 40, 37, 48, 43,
29, 46, 45, 82, 31, 34, 37, 41, 63, 34, 50, 37, 51, 36, 42,
41, 34, 55, 40, 42, 60, 36, 38, 52, 57, 48, 48, 46, 47, 50,
41, 48, NA, 40, 45, NA, 43, 58, 42, 48, 44, 46, 47, 55, NA,
NA, 38, 52, NA, 53, 31, 51, NA, 32, 51, 41, 38, 57, 36, 50,
41, 60, 65, 39, 52, 36, 36, 49, 43, 34, 44, 41, NA, 50, 52,
37, NA, 58, 45, 34, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
46, NA, NA, NA, NA, NA, 59, 55, 50, 46, 58, 58, 42, NA, 31,
NA, 48, 43, 66, 55, 51, 41, 50, 38, 46, 41, 43, 38, 48, NA,
46, NA, 56, 44, 46, 48, 49, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 63, 41, 39, 46, 58, 53, 33, 53, 48, 33, 44, 46,
49, 48, 44, 55, 44, 39, 32, 46, 50, NA, 47, 53, 39, 51, 61,
48, 32, NA, 42, 46, NA, 49, 48, 52, 39, 40, 38, NA, 31, 46,
48, NA, 51, 58, 43, 49, 43, 65, 41, 61, 49, 35, 37, 36, 58,
NA, NA, NA, 38, 45, NA, 58, 31, 49, 52, 65, 32, 45, 39, 37,
41, 34, NA, 42, 51, 39, 48, 36, 35, 55, 38, 48, 53, 41, 39,
49, 63, 41, 47), label = "HDL-Cholesterol", format.spss = "F3.2", display_width = 11L),
ldl = structure(c(100, 104, 171, 153, 107, 152, 87, 101,
70, 137, 96, 95, 98, 94, 92, 102, 63, 104, 62, 75, 125, 117,
114, 132, 112, 146, 121, 91, 113, 120, 96, 96, 95, 87, 96,
134, 98, 92, 88, 101, 133, 113, 77, 128, 97, 169, 136, 96,
74, 59, 121, 66, 109, 103, 116, 86, 87, 124, 88, 94, 77,
98, 90, 133, 79, 78, 98, 129, 62, 62, 96, 72, 85, 98, 101,
132, 69, 196, 76, 125, 105, 108, 89, 108, 123, 51, 92, 50,
121, 105, 80, 103, 59, 96, 89, 65, 77, 90, 92, 65, 123, 96,
80, 128, 92, 124, 96, 83, 120, 145, 114, 134, 116, 65, 91,
103, 84, 123, 99, 96, 61, 82, 85, 116, 116, 113, 121, 69,
82, 100, 108, 99, 144, 152, 158, 128, 112, 89, 119, 61, 99,
147, 109, 121, 92, 115, 95, 62, 72, 130, 96, 76, 117, 96,
108, 131, 120, 67, 99, 105, 63, 63, 103, 128, 92, 120, 146,
106, 103, 94, 85, 122, 111, 102, 143, 74, 87, 80, 67, 140,
85, 87, 101, 94, 122, 124, 82, 150, 92, 84, 119, 98, 89,
97, 117, 122, 111, 86, 90, 110, 107, 150, 103, 94, 149, 159,
91, NA, 109, 126, NA, 167, 77, 90, 103, 80, 68, 75, 55, NA,
NA, 74, 113, NA, 102, 116, 84, NA, 66, 85, 114, 111, 101,
95, 92, 86, 96, 90, 92, 77, 91, 108, 86, 118, 85, 127, 99,
NA, 160, 80, 63, NA, 123, 86, 94, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 106, NA, NA, NA, NA, NA, 70, 85, 70, 96,
102, 117, 101, NA, 146, NA, 94, 122, 122, 94, 110, 121, 39,
72, 48, 109, 110, 60, 95, NA, 83, NA, 79, 87, 113, 103, 55,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 157, 103, 56,
92, 114, 78, 97, 106, 117, 61, 72, 83, 91, 122, 106, 103,
89, 51, 89, 153, 90, NA, 132, 132, 110, 84, 84, 96, 72, NA,
104, 122, NA, 80, 113, 106, 62, 72, 121, NA, 102, 125, 130,
NA, 111, 119, 66, 109, 119, 91, 92, 120, 160, 93, 117, 126,
88, NA, NA, NA, 115, 100, NA, 200, 79, 95, 99, 89, 123, 108,
82, 108, 81, 103, NA, 103, 149, 116, 115, 122, 95, 106, 89,
128, 118, 123, 51, 90, 130, 119, 120), label = "LDL-Cholesterol", format.spss = "F4.2", display_width = 11L)), row.names = c(NA,
-404L), class = c("tbl_df", "tbl", "data.frame"), reshapeLong = list(
varying = list(c("age_1", "age_2"), c("whz_1", "whz_2"),
c("haz_1", "haz_2"), c("waz_1", "waz_2"), c("zbmi_1",
"zbmi_2"), c("wc_1", "wc_2"), c("abc_1", "abc_2"), c("PA_1",
"PA_2"), c("PAextra_1", "PAextra_2"), c("TVweekdays_1",
"TVweekdays_2"), c("TVweekend_1", "TVweekend_2"), c("kidmed_1",
"kidmed_2"), c("totalcholesterol_1", "totalcholesterol_2"
), c("ldl_1", "ldl_2"), c("hdl_1", "hdl_2"), c("triglycerides_1",
"triglycerides_2"), c("glucose_1", "glucose_2"), c("insuline_1",
"insuline_2"), c("hba1c_1", "hba1c_2"), c("homair_1",
"homair_2"), c("fatmass_1", "fatmass_2"), c("energykcal_1",
"energykcal_2"), c("protein_1", "protein_2"), c("proteinpc_1",
"proteinpc_2"), c("carbohydrates_1", "carbohydrates_2"
), c("carbohydratespc_1", "carbohydratespc_2"), c("sugar_1",
"sugar_2"), c("sugarpc_1", "sugarpc_2"), c("starch_1",
"starch_2"), c("fruitportions_1", "fruitportions_2"),
c("vegetablesportions_1", "vegetablesportions_2"), c("vegetalfiber_1",
"vegetalfiber_2"), c("solublefiber_1", "solublefiber_2"
), c("insolublefiber_1", "insolublefiber_2"), c("lipids_1",
"lipids_2"), c("lipidspc_1", "lipidspc_2"), c("sfa_1",
"sfa_2"), c("sfapc_1", "sfapc_2"), c("mufa_1", "mufa_2"
), c("mufapc_1", "mufapc_2"), c("pufa_1", "pufa_2"),
c("pufapc_1", "pufapc_2"), c("cholesterolintake_1", "cholesterolintake_2"
)), v.names = c("age", "whz", "haz", "waz", "zbmi", "wc",
"abc", "PA", "PAextra", "TVweekdays", "TVweekend", "kidmed",
"totalcholesterol", "ldl", "hdl", "triglycerides", "glucose",
"insuline", "hba1c", "homair", "fatmass", "energykcal", "protein",
"proteinpc", "carbohydrates", "carbohydratespc", "sugar",
"sugarpc", "starch", "fruitportions", "vegetablesportions",
"vegetalfiber", "solublefiber", "insolublefiber", "lipids",
" lipidspc", "sfa", "sfapc", "mufa", "mufapc", "pufa", "pufapc",
"cholesterolintake"), idvar = c("id", "group"), timevar = "time"))
Instead of making your varlist a list of vectors you could simply pass a vector with names of the colums you want to plot. Then use aes_string(..., y = varlist) inside your function and you will automatically get the name of the variable as the y axis title:
# Multiple box plot per group per time
library(ggplot2)
library(dplyr)
# Create a list wherein the function will be applied to
varlist <- c("insuline", "glucose", "hdl", "ldl")
names(varlist) <- varlist
# Create the function boxplot
A <- function(varlist) {
dflinear %>%
group_by("group") %>%
ggplot(mapping = aes_string(x = "time", y = varlist, fill = "group")) +
geom_boxplot()
}
# Apply it to the whole list and graph the plots
plots <- lapply(varlist, FUN = A)
plots[[1]]
I have a data frame of 200 individuals, and using dplyr I would like to randomly select half of them, create a variable called 'sex,' and assign 100 with sex as male. For the remaining 100 individuals, I would like to assign the sex as female. A reproducible example of the data set is available below.
df <- dput(input)
structure(list(id = 1:200, age = c(6L, 4L, 4L, 6L, 1L, 5L, 3L,
1L, 0L, 0L, 0L, 5L, 5L, 5L, 3L, 4L, 4L, 2L, 2L, 3L, 3L, 4L, 6L,
4L, 4L, 0L, 4L, 6L, 1L, 5L, 2L, 6L, 2L, 2L, 0L, 3L, 1L, 6L, 0L,
2L, 5L, 3L, 5L, 3L, 1L, 6L, 6L, 0L, 4L, 5L, 0L, 5L, 3L, 6L, 1L,
2L, 1L, 1L, 4L, 2L, 1L, 2L, 0L, 4L, 3L, 3L, 6L, 2L, 1L, 2L, 5L,
0L, 5L, 2L, 5L, 3L, 3L, 3L, 2L, 5L, 1L, 0L, 0L, 1L, 6L, 3L, 1L,
5L, 6L, 4L, 4L, 4L, 0L, 6L, 6L, 3L, 4L, 6L, 5L, 2L, 5L, 6L, 2L,
2L, 4L, 0L, 4L, 6L, 5L, 6L, 0L, 6L, 2L, 1L, 5L, 5L, 5L, 5L, 3L,
1L, 6L, 3L, 1L, 1L, 3L, 4L, 2L, 4L, 2L, 0L, 5L, 0L, 3L, 1L, 1L,
2L, 0L, 5L, 2L, 3L, 6L, 5L, 2L, 6L, 0L, 0L, 6L, 6L, 1L, 4L, 2L,
0L, 4L, 1L, 3L, 6L, 3L, 4L, 3L, 0L, 1L, 6L, 6L, 5L, 4L, 1L, 1L,
6L, 0L, 1L, 2L, 1L, 1L, 2L, 0L, 4L, 1L, 2L, 2L, 2L, 1L, 6L, 5L,
3L, 2L, 3L, 5L, 2L, 3L, 4L, 5L, 0L, 6L, 5L, 1L, 4L, 5L, 3L, 5L,
5L), x = c(21, 9, 31, 55, 5, 63, 63, 3, 13, 21, 53, 77, 5, 67,
63, 31, 17, 5, 21, 45, 79, 3, 7, 43, 27, 1, 63, 11, 37, 33, 27,
53, 71, 73, 97, 87, 77, 17, 85, 91, 49, 87, 89, 61, 65, 17, 71,
33, 53, 85, 49, 41, 75, 85, 79, 75, 23, 63, 89, 31, 29, 47, 75,
63, 65, 27, 27, 71, 89, 29, 25, 49, 91, 91, 39, 65, 45, 99, 53,
21, 29, 81, 35, 7, 27, 81, 93, 41, 79, 83, 31, 51, 33, 75, 15,
69, 7, 29, 7, 35, 87, 93, 57, 13, 91, 87, 95, 77, 7, 37, 81,
99, 83, 69, 85, 5, 77, 69, 55, 7, 39, 5, 41, 1, 63, 25, 13, 39,
97, 73, 25, 49, 35, 95, 59, 75, 23, 35, 67, 73, 91, 83, 79, 9,
27, 89, 79, 53, 89, 69, 95, 57, 11, 45, 63, 5, 25, 61, 3, 89,
1, 61, 85, 75, 67, 73, 63, 77, 43, 31, 69, 39, 47, 59, 75, 45,
57, 73, 5, 85, 57, 13, 91, 69, 79, 89, 13, 33, 15, 23, 89, 85,
39, 87, 7, 97, 57, 5, 61, 85), y = c(41, 57, 29, 59, 83, 77,
35, 73, 99, 69, 85, 23, 85, 11, 63, 97, 73, 47, 57, 73, 77, 1,
91, 17, 71, 57, 11, 3, 81, 31, 5, 41, 69, 93, 3, 11, 45, 97,
81, 87, 43, 9, 53, 61, 11, 63, 59, 33, 49, 89, 87, 79, 47, 59,
41, 25, 47, 13, 69, 11, 93, 83, 91, 85, 13, 95, 13, 37, 99, 35,
11, 63, 19, 99, 71, 55, 5, 21, 43, 59, 49, 15, 99, 15, 75, 77,
53, 51, 91, 45, 83, 21, 29, 35, 3, 27, 97, 95, 29, 53, 55, 41,
45, 31, 75, 37, 15, 47, 3, 1, 99, 55, 81, 37, 1, 41, 51, 45,
27, 83, 9, 69, 13, 81, 91, 55, 51, 31, 17, 97, 1, 47, 35, 7,
53, 59, 5, 51, 7, 5, 93, 63, 95, 51, 33, 43, 75, 67, 59, 89,
49, 83, 21, 49, 5, 5, 19, 45, 29, 41, 25, 3, 9, 1, 73, 53, 43,
99, 69, 41, 21, 3, 3, 13, 39, 21, 55, 75, 91, 31, 79, 17, 43,
91, 73, 11, 75, 15, 49, 77, 77, 23, 83, 47, 51, 53, 57, 99, 35,
15)), row.names = c(NA, -200L), class = "data.frame", .Names = c("id",
"age", "x", "y"))
I'm new to using dplyr, so I'm not exactly sure how to perform this operation. I'm thinking it would look something like this:
new_df <- df %>%
sample_frac(0.5) %>% # use sample_frac or sample_n to select 100 individuals
mutate(sex = "male")
but obviously that just results in a new data frame. Is there a way to select 100 males from the original data frame, then use something like an ifelse statement to assign the rest as female?
If you absolutely need a 50/50 distribution between male and female, you could run with dplyr:
dfs <- sample_n(df, 100, replace = FALSE) %>%
mutate(sex = "male") %>%
select(id, sex) %>%
right_join(df, by = "id") %>%
mutate(sex = if_else(is.na(sex), "female", "male"))
results:
table(dfs$sex)
female male
100 100
I have two plots from two different data frames
The DPUT from data frame 1 is as follows
ppv_npv2 <- structure(list(pred.prob = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
44, 45, 46, 47, 48, 49, 50, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
44, 45, 46, 47, 48, 49, 50, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
44, 45, 46, 47, 48, 49, 50), variable = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L), .Label = c("ppv_2.5", "ppv_50", "ppv_97.5"), class = "factor"),
value = c(4.8, 9.3, 13.4, 17.2, 20.8, 24.2, 27.3, 30.3, 33.1,
35.7, 38.2, 40.5, 42.8, 44.9, 46.9, 48.8, 50.6, 52.3, 54,
55.6, 57.1, 58.5, 59.9, 61.2, 62.5, 63.7, 64.9, 66, 67.1,
68.2, 69.2, 70.2, 71.1, 72, 72.9, 73.8, 74.6, 75.4, 76.2,
76.9, 77.7, 78.4, 79, 79.7, 80.4, 81, 81.6, 82.2, 82.8, 83.3,
7.2, 13.6, 19.3, 24.4, 28.9, 33, 36.8, 40.2, 43.3, 46.2,
48.9, 51.3, 53.6, 55.7, 57.7, 59.6, 61.3, 62.9, 64.5, 65.9,
67.3, 68.6, 69.8, 70.9, 72, 73.1, 74.1, 75, 75.9, 76.8, 77.6,
78.4, 79.2, 79.9, 80.6, 81.3, 82, 82.6, 83.2, 83.8, 84.3,
84.8, 85.4, 85.9, 86.3, 86.8, 87.3, 87.7, 88.1, 88.5, 11.7,
21.1, 28.8, 35.3, 40.8, 45.5, 49.7, 53.3, 56.4, 59.3, 61.8,
64.1, 66.2, 68.1, 69.8, 71.4, 72.9, 74.2, 75.5, 76.6, 77.7,
78.7, 79.7, 80.5, 81.4, 82.2, 82.9, 83.6, 84.3, 84.9, 85.5,
86, 86.6, 87.1, 87.6, 88.1, 88.5, 88.9, 89.3, 89.7, 90.1,
90.5, 90.8, 91.1, 91.5, 91.8, 92.1, 92.4, 92.6, 92.9)),
.Names =c("pred.prob","variable", "value"), row.names = c(NA, -150L),
class = "data.frame")
The plot that i have created is from the following code
p1 <- ggplot(ppv_npv2,aes(x=pred.prob,y=value))+
geom_line(data=ppv_npv2[ppv_npv2$variable=="ppv_50",],
colour="red",linetype=2)+
geom_line(data=ppv_npv2[ ppv_npv2$variable=="ppv_2.5", ],
colour="blue",linetype=4)+
geom_line(data=ppv_npv2[ ppv_npv2$variable=="ppv_97.5", ],
colour="blue",linetype=4)+
theme_classic()+
ylab("Predicted positive predictive value (%) \n")+
xlab("\n Prevalence (%)")+
scale_x_continuous(limits=c(0,50),breaks=seq(0,50,2))+
scale_y_continuous(limits=c(0,100),breaks=seq(0,100,10), expand=c(0,0))+
theme(axis.text.x = element_text(size=12,hjust=.5,vjust=.8,face="plain"),
axis.text.y = element_text(size=12,hjust=.5,vjust=.8,face="plain"))+
theme(axis.title.x = element_text(size=14,face="bold"),
axis.title.y = element_text(size=14,face="bold"))
p1
The dput for the second data frame is
dat <- structure(list(PPV = c(57, 89, 19, 52, 52, 62, 63, 46, 31, 52,
54, 13, 17, 47, 48, 52, 96, 88, 64, 33, 62, 77, 75, 72), Prevalence = c(19,
35, 12, 16, 24, 6, 28, 13, 8, 19, 30, 6, 8, 20, 11, 25, 29, 55,
46, 13, 16, 22, 23, 20), total = c(939L, 323L, 306L, 703L, 137L,
833L, 360L, 317L, 440L, 2072L, 209L, 386L, 142L, 358L, 167L,
503L, 180L, 233L, 342L, 478L, 4870L, 1104L, 1813L, 1567L),
Author = structure(c(1L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 9L, 10L, 11L, 12L,
15L,18L, 19L, 8L, 14L, 16L, 17L, 21L, 20L, 20L, 13L, 10L),
.Label = c("Aldous",
"Bahrmann", "Body", "Christ ", "Collinson", "Eggers", "Freund",
"Giannitis", "Hammerer-Lercher", "Hoeller", "Inoue", "Invernizi",
"Keller", "Khan", "Lotze", "Melki ", "Normann", "Santalol", "Sebbane",
"Shah", "Thelin "), class = "factor"), Study.assay = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L), .Label = c("TnI", "TnT"), class = "factor")),
.Names = c("PPV", "Prevalence", "total", "Author", "Study.assay"),
class ="data.frame", row.names = c(NA, -24L))
And the plot from dataframe 2 is as follows
p2 <- ggplot(dat, aes(x=dat$Prevalence, y=dat$PPV, size=dat$total,
label=dat$Author),guide=F)+
geom_point(colour="white", fill="red", shape=21)+
scale_size_area(max_size = 10)+
scale_x_continuous(name="\n Prevalence", limits=c(0,100))+
scale_y_continuous(name="Predicted positive predictive value (%) \n",
limits=c(0,100))+
geom_text(size=2.5)+
theme_classic()+
ylab("Predicted positive predictive value (%) \n")+
xlab("\n Prevalence (%)")+
scale_x_continuous(limits=c(0,50),breaks=seq(0,50,2))+
scale_y_continuous(limits=c(0,100),breaks=seq(0,100,10), expand=c(0,0))+
theme(axis.text.x = element_text(size=12,hjust=.5,vjust=.8,face="plain"),
axis.text.y = element_text(size=12,hjust=.5,vjust=.8,face="plain"))+
theme(axis.title.x = element_text(size=14,face="bold"),
axis.title.y = element_text(size=14,face="bold"))+
theme(legend.position='none')
p2
As you can see both plots have the same axis and limits. I have two questions:
a) Can i overlay plot 2 onto plot 1?
b) Can i make the bubbles on plot 2 more transparent and choose colours by the factor dat$Study.assay (green and purple)?
Many thanks in advance - have spent a day researching this but no solution yet.
Here's a start using your data,
(plot2 <- ggplot() +
geom_line(data = ppv_npv2,aes(pred.prob, value,
group= variable, colour = variable)) +
geom_point(data = dat, aes(Prevalence, PPV, label=Author, size = total,
colour = Study.assay), alpha = I(0.4)) +
geom_text(data = dat, aes(Prevalence, PPV, label=Author,
size = total), size=3, hjust=-1, vjust=0)
)
It's not the orthodox ggplot2 way, but it's a start.