R, extrapolate average scores from graph - r
I have a graph like this:
With data that created it like this:
test<-structure(list(study_id = c(1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 13,
13, 13, 13, 13, 34, 34, 34, 34, 34, 40, 40, 40, 40, 40, 44, 44,
44, 44, 44, 47, 47, 47, 47, 47, 49, 49, 49, 49, 49, 51, 51, 51,
51, 51, 61, 61, 61, 61, 61, 66, 66, 66, 66, 66, 67, 67, 67, 67,
67, 68, 68, 68, 68, 68, 72, 72, 72, 72, 72, 75, 75, 75, 75, 75,
80, 80, 80, 80, 80, 84, 84, 84, 84, 84, 86, 86, 86, 86, 86, 94,
94, 94, 94, 94, 95, 95, 95, 95, 95, 101, 101, 101, 101, 101,
105, 105, 105, 105, 105, 111, 111, 111, 111, 111, 117, 117, 117,
117, 117, 123, 123, 123, 123, 123, 124, 124, 124, 124, 124, 125,
125, 125, 125, 125, 126, 126, 126, 126, 126, 131, 131, 131, 131,
131, 145, 145, 145, 145, 145, 153, 153, 153, 153, 153, 154, 154,
154, 154, 154, 155, 155, 155, 155, 155, 156, 156, 156, 156, 156,
161, 161, 161, 161, 161, 162, 162, 162, 162, 162, 166, 166, 166,
166, 166, 167, 167, 167, 167, 167, 169, 169, 169, 169, 169, 172,
172, 172, 172, 172, 175, 175, 175, 175, 175, 179, 179, 179, 179,
179, 180, 180, 180, 180, 180, 184, 184, 184, 184, 184, 185, 185,
185, 185, 185, 188, 188, 188, 188, 188, 190, 190, 190, 190, 190,
192, 192, 192, 192, 192, 194, 194, 194, 194, 194, 195, 195, 195,
195, 195, 197, 197, 197, 197, 197, 199, 199, 199, 199, 199, 203,
203, 203, 203, 203, 207, 207, 207, 207, 207, 210, 210, 210, 210,
210, 211, 211, 211, 211, 211, 212, 212, 212, 212, 212, 217, 217,
217, 217, 217, 221, 221, 221, 221, 221, 223, 223, 223, 223, 223,
227, 227, 227, 227, 227, 228, 228, 228, 228, 228, 229, 229, 229,
229, 229, 239, 239, 239, 239, 239, 244, 244, 244, 244, 244, 253,
253, 253, 253, 253, 256, 256, 256, 256, 256, 257, 257, 257, 257,
257, 259, 259, 259, 259, 259, 266, 266, 266, 266, 266, 272, 272,
272, 272, 272, 275, 275, 275, 275, 275, 277, 277, 277, 277, 277,
278, 278, 278, 278, 278, 284, 284, 284, 284, 284, 288, 288, 288,
288, 288, 290, 290, 290, 290, 290, 291, 291, 291, 291, 291, 292,
292, 292, 292, 292, 294, 294, 294, 294, 294, 295, 295, 295, 295,
295, 296, 296, 296, 296, 296, 299, 299, 299, 299, 299, 300, 300,
300, 300, 300, 301, 301, 301, 301, 301, 303, 303, 303, 303, 303,
305, 305, 305, 305, 305, 306, 306, 306, 306, 306, 307, 307, 307,
307, 307, 309, 309, 309, 309, 309, 313, 313, 313, 313, 313, 315,
315, 315, 315, 315, 316, 316, 316, 316, 316, 320, 320, 320, 320,
320, 324, 324, 324, 324, 324, 331, 331, 331, 331, 331, 336, 336,
336, 336, 336, 337, 337, 337, 337, 337, 348, 348, 348, 348, 348,
349, 349, 349, 349, 349, 352, 352, 352, 352, 352, 353, 353, 353,
353, 353, 367, 367, 367, 367, 367, 373, 373, 373, 373, 373, 382,
382, 382, 382, 382, 387, 387, 387, 387, 387, 388, 388, 388, 388,
388, 389, 389, 389, 389, 389, 392, 392, 392, 392, 392, 398, 398,
398, 398, 398, 401, 401, 401, 401, 401, 402, 402, 402, 402, 402,
404, 404, 404, 404, 404, 405, 405, 405, 405, 405, 410, 410, 410,
410, 410, 411, 411, 411, 411, 411, 412, 412, 412, 412, 412, 413,
413, 413, 413, 413, 414, 414, 414, 414, 414, 415, 415, 415, 415,
415, 420, 420, 420, 420, 420, 428, 428, 428, 428, 428, 431, 431,
431, 431, 431, 433, 433, 433, 433, 433, 434, 434, 434, 434, 434,
436, 436, 436, 436, 436), Time = structure(c(1L, 2L, 3L, 4L,
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L,
1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L,
3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L,
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L,
1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L,
3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L,
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L,
1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L,
3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L,
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L,
1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L,
3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L,
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L,
1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L,
3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L,
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L,
1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L,
3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L,
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L,
1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L,
3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L,
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L,
1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L,
3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L), .Label = c("1", "2", "3", "4", "5"), class = "factor"),
Score = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 3, NA, NA, NA, NA, 0, 0,
NA, NA, NA, NA, NA, NA, NA, NA, 4, 7, NA, NA, NA, NA, NA,
NA, NA, NA, 4, NA, NA, NA, NA, 0, NA, NA, NA, NA, 0, NA,
NA, NA, NA, 0, 0, 7, 8, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 5, 7, NA, NA, NA, 0, NA, NA,
NA, NA, 0, 5, 8, NA, NA, 7, 8, NA, NA, NA, 0, 0, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 4, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 0, NA, NA, NA, NA, 4, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 2, 8, 8, NA, NA, 3, NA, NA, NA, NA, 1, NA, NA, NA, NA,
0, 9, NA, NA, NA, 2, NA, NA, NA, NA, NA, NA, NA, NA, NA,
2, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, 0, 1, 5, 5, NA,
NA, NA, NA, NA, 3, 4, 4, NA, NA, 0, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA,
NA, NA, NA, 0, 0, 0, 1, 1, 9, 9, NA, NA, NA, NA, NA, NA,
NA, NA, 0, 2, 5, 5, NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0,
0, NA, NA, NA, NA, NA, NA, NA, NA, NA, 6, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA,
0, NA, NA, NA, NA, 7, NA, NA, NA, NA, 5, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 7, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 0, 4, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, 1, 1, 1, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 8, 8, NA, NA, NA, 0,
NA, NA, NA, NA, 0, NA, NA, NA, NA, 0, 3, NA, NA, NA, 6, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 7, NA,
NA, NA, NA, 0, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 3, 8, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, 5, 5,
5, NA, NA, 0, NA, NA, NA, NA, 2, 7, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 0, 0, NA, NA, NA, NA, NA, NA, NA, NA, 0, 3, NA, NA,
NA, 0, NA, NA, NA, NA, 7, 7, 8, NA, NA, 0, NA, 0, NA, NA,
2, 4, 4, NA, NA), TimeBetweenScans = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
316, NA, NA, NA, NA, 113, 139, NA, NA, NA, NA, NA, NA, NA,
NA, 335, 660, NA, NA, NA, NA, NA, NA, NA, NA, 104, NA, NA,
NA, NA, 7, NA, NA, NA, NA, 42, NA, NA, NA, NA, 30, 84, 467,
826, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 643, 1794, NA, NA, NA, 404, NA, NA, NA, NA, 40,
221, 394, NA, NA, 171, 320, NA, NA, NA, 51, 227, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 449, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 56, NA, NA, NA, NA, 104, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 79, 989, 1097, NA, NA, 116, NA, NA, NA, NA, 65,
NA, NA, NA, NA, 39, 411, NA, NA, NA, 1193, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 142, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 106, 216, 266, 497, 575, NA, NA, NA, NA, NA, 221, 474,
796, NA, NA, 18, NA, NA, NA, NA, 87, 1565, NA, NA, NA, NA,
NA, NA, NA, NA, 36, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 207,
529, NA, NA, NA, NA, NA, NA, NA, NA, 125, NA, NA, NA, NA,
137, 372, 941, 1102, 1225, 927, 1006, NA, NA, NA, NA, NA,
NA, NA, NA, 63, 429, 533, 567, NA, NA, NA, NA, NA, NA, 156,
447, 470, 1204, 1266, 32, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 411, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 201, NA, NA, NA, NA, 160, NA, NA, NA, NA, 166, NA,
NA, NA, NA, 459, NA, NA, NA, NA, NA, NA, NA, NA, NA, 212,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 50,
313, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 312, 530, 783, 1574, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 1627, 1706, NA, NA, NA, 354,
NA, NA, NA, NA, 33, NA, NA, NA, NA, 62, 130, NA, NA, NA,
1416, NA, NA, NA, NA, 121, NA, NA, NA, NA, 842, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 24, 64, 82, 122, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 250, NA, NA, NA, NA, 174, 300, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 216, 264, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 17,
NA, NA, NA, NA, 214, 268, 388, NA, NA, 24, NA, NA, NA, NA,
149, 382, NA, NA, NA, NA, NA, NA, NA, NA, 8, NA, NA, NA,
NA, 91, 188, NA, NA, NA, NA, NA, NA, NA, NA, 72, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 9, 38, NA,
NA, NA, NA, NA, NA, NA, NA, 13, 138, NA, NA, NA, 42, NA,
NA, NA, NA, 771, 1200, 1512, NA, NA, 113, 166, 180, NA, NA,
122, 475, 640, NA, NA), Groups = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Two",
NA, NA, NA, NA, "Zero", "Zero", NA, NA, NA, NA, NA, NA, NA,
NA, "Two", "Two", NA, NA, NA, NA, NA, NA, NA, NA, "Two",
NA, NA, NA, NA, "Zero", NA, NA, NA, NA, "Zero", NA, NA, NA,
NA, "Two", "Two", "Two", "Two", NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, "Two", "Two", NA, NA,
NA, "One", NA, NA, NA, NA, "Two", "Two", "Two", NA, NA, "Two",
"Two", NA, NA, NA, "Zero", "Zero", NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Two", NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Zero",
NA, NA, NA, NA, "Two", NA, NA, NA, NA, NA, NA, NA, NA, NA,
"Two", "Two", "Two", NA, NA, "Two", NA, NA, NA, NA, "Two",
NA, NA, NA, NA, "Two", "Two", NA, NA, NA, "One", NA, NA,
NA, NA, NA, NA, NA, NA, NA, "Two", NA, NA, NA, NA, NA, NA,
NA, NA, NA, "Two", "Two", "Two", "Two", "Two", NA, NA, NA,
NA, NA, "Two", "Two", "Two", NA, NA, "Zero", NA, NA, NA,
NA, "Zero", "Zero", NA, NA, NA, NA, NA, NA, NA, NA, "Zero",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, "Zero", "Zero", NA, NA,
NA, NA, NA, NA, NA, NA, "Zero", NA, NA, NA, NA, "One", "One",
"One", "One", "One", "Two", "Two", NA, NA, NA, NA, NA, NA,
NA, NA, "Two", "Two", "Two", "Two", NA, NA, NA, NA, NA, NA,
"One", "One", "One", "One", "One", "Zero", NA, NA, NA, NA,
NA, NA, NA, NA, NA, "Two", NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, "Zero", NA, NA, NA, NA, "Zero", NA,
NA, NA, NA, "Two", NA, NA, NA, NA, "Two", NA, NA, NA, NA,
NA, NA, NA, NA, NA, "Two", NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, "Two", "Two", NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "One",
"One", "One", "One", NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, "Two", "Two", NA, NA, NA, "One", NA, NA, NA, NA,
"Zero", NA, NA, NA, NA, "Two", "Two", NA, NA, NA, "Two",
NA, NA, NA, NA, "Zero", NA, NA, NA, NA, "Two", NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, "Two", NA, NA, NA, NA, "Zero", "One", NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, "Two", "Two", NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, "Zero", NA, NA, NA, NA, "Two", "Two", "Two", NA, NA,
"Zero", NA, NA, NA, NA, "Two", "Two", NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, "Zero", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, "Zero", "Zero", NA, NA, NA, NA, NA, NA, NA,
NA, "Zero", "Two", NA, NA, NA, "Zero", NA, NA, NA, NA, "Two",
"Two", "Two", NA, NA, "One", "One", "One", NA, NA, "Two",
"Two", "Two", NA, NA)), class = c("spec_tbl_df", "tbl_df",
"tbl", "data.frame"), row.names = c(NA, -630L), spec = structure(list(
cols = list(study_id = structure(list(), class = c("collector_double",
"collector")), Time = structure(list(), class = c("collector_double",
"collector")), Score = structure(list(), class = c("collector_double",
"collector")), TimeBetweenScans = structure(list(), class = c("collector_double",
"collector")), Groups = structure(list(), class = c("collector_character",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1L), class = "col_spec"))
And Code that created the graph like this: I grouped the study id's so that a dotted line was drawn connecting all the scores from each individual patient. So each line is one person.
test%>%ggplot(aes(x=TimeBetweenScans,y=Score, group=study_id, color=Time, shape=Groups))+geom_point(size=3)+geom_line(color="Black", linetype="dotted")+labs(title = "Oulu Score vs Time",y="Oulu Score",x="Time from Post-Op Scan to Follow Up Scan", color="Follow-up Scan")
I was asked to get the "average" score at different timeframes. I.e. the average score at 1 year followup (TimeBetweenScans = "365"), 2 years, 3 years, and 4 years.
So for instance, eyeballing it, you'd take all the dotted lines that cross this red line I drew at the 1 year mark, figure out where they were in the Y axis when they crossed that line, and average their "score".
If I had rows that contained '365' in the "TimeBetweenScans" column, I'd write something like:
test%>%filter(TimeBetweenScans=="365")%>%summarise(MeanScore=mean(Score))
That code would select only the data right at the year mark and average the y axis score for me. But since 365 isn't actually ever in a row, and it only exists when those dotted lines cross it, I need to extrapolate what it WOULD be for that person at '365'.
Does that make sense?
If so, how can I do it?
Here is an idea.
I filtered the nearest days around the desired time (year_in_days) for each study_id. Then I calculated a regression line between these points and predicted the Score for the year_in_days. In a last step I calculated the mean over all predictions.
You might get a lot of warnings while filtering because a lot study_id groups won't have any value - just NA.
Code
# Time you are looking for
year_in_days = 100
test %>%
group_by(study_id) %>%
group_modify(~{
.x %>%
# filter inside each group the nearest time to year_in_days (lower and upper)
filter((TimeBetweenScans %in% min(TimeBetweenScans[TimeBetweenScans > year_in_days], na.rm = T)) |
(TimeBetweenScans %in% max(TimeBetweenScans[TimeBetweenScans < year_in_days], na.rm = T))) %>%
# filter groups with two meassurments and values for Score
filter(n() == 2 &
!is.na(Score))
}) %>%
ungroup() %>%
group_by(study_id) %>%
group_modify(~{
# for each group predict the value at year "year_in_days"
broom::tidy(predict(lm(Score ~ TimeBetweenScans, .x), data.frame(TimeBetweenScans = c(year_in_days))))
}) %>%
ungroup() %>%
# calculate mean score over all predictions
summarise(mean(x))
Output
# A tibble: 1 x 1
`mean(x)`
<dbl>
1 1.14
Related
How to produce histograms from large table set of mammals. new to coding
I am trying to create a histogram for the geographic range size of mammals. I have data columns with geographic range size and different mammal names. hist1 = hist(x = dat1$Range_Area_km2, breaks = 100, col = "blue", border = "green", main = "Geographic Range Size of Mammals", # add a title xlab = "Km2", # change axis labels ylab = "Frequency", xlim = c(0, 65000000), # change the axis limits ylim = c(0, 200)) I am expecting a histogram of geographic range size of all mammals. structure(list(rownames = 1:20, MSW05_Binomial = c("Melomys_rubicola", "Peromyscus_dickeyi", "Mogera_uchidai", "Pteropus_pelewensis", "Mysateles_garridoi", "Neotoma_anthonyi", "Microtus_breweri", "Hypsugo_lophurus", "Neotoma_martinensis", "Pteropus_howensis", "Crocidura_dhofarensis", "Mesocapromys_angelcabrerai", "Ochotona_gaoligongensis", "Myotis_abei", "Rhynchomys_isarogensis", "Calomyscus_tsolovi", "Monodelphis_unistriata", "Geocapromys_ingrahami", "Hesperoptenus_gaskelli", "Neotoma_bunkeri"), MSW05_Order = c("Rodentia", "Rodentia", "Soricomorpha", "Chiroptera", "Rodentia", "Rodentia", "Rodentia", "Chiroptera", "Rodentia", "Chiroptera", "Soricomorpha", "Rodentia", "Lagomorpha", "Chiroptera", "Rodentia", "Rodentia", "Didelphimorphia", "Rodentia", "Chiroptera", "Rodentia"), MSW05_Family = c("Muridae", "Cricetidae", "Talpidae", "Pteropodidae", "Capromyidae", "Cricetidae", "Cricetidae", "Vespertilionidae", "Cricetidae", "Pteropodidae", "Soricidae", "Capromyidae", "Ochotonidae", "Vespertilionidae", "Muridae", "Calomyscidae", "Didelphidae", "Capromyidae", "Vespertilionidae", "Cricetidae"), AdultBodyMass_g = c(100, 28.6, NA, NA, NA, 195.4, NA, NA, 240.16, 232.92, NA, NA, NA, NA, 122.29, NA, 55.3, 733.52, NA, 375), LitterSize = c(NA, NA, NA, NA, NA, NA, 4.5, NA, NA, 0.98, NA, NA, NA, NA, NA, NA, NA, 1.04, NA, NA), Range_Area_km2 = c(0.000187, 0.00725, 0.03, 0.06, 0.49, 0.66, 1.85, 2.7, 2.8, 3.16, 3.52, 3.55, 4.89, 5.11, 5.28, 6.78, 6.86, 8.36, 8.89, 9.15), Precip_Mean_mm = c(NA, NA, NA, NA, NA, NA, NA, 290, NA, NA, 7, 111, 195, 52, 331, 18, 107, NA, 237, NA), Temp_Mean_degC = c(NA, NA, NA, NA, NA, NA, NA, 24.6, NA, NA, 21.1, 24.6, 11.3, -0.9, 25.2, 17.1, 17.9, NA, 20.6, NA), ActivityCycle = c(NA, 1L, 2L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 2L, NA, 1L, 1L, NA, NA), DietBreadth = c(NA, NA, 1L, NA, 3L, NA, 1L, NA, NA, NA, NA, 3L, NA, NA, 1L, NA, 1L, 2L, NA, NA), HabitatBreadth = c(NA, NA, 1L, NA, 1L, NA, 2L, 1L, NA, 1L, NA, 1L, 1L, 1L, NA, NA, 1L, 2L, 1L, NA), Terrestriality = c(NA, NA, 1L, NA, 2L, NA, 1L, 2L, NA, 2L, NA, 2L, 1L, 2L, NA, NA, 1L, 2L, 2L, NA), TrophicLevel = c(NA, NA, 3L, NA, 2L, NA, 1L, NA, NA, NA, NA, 2L, NA, NA, 3L, NA, 3L, 1L, NA, NA)), row.names = c(NA, 20L), class = "data.frame") >
Normalisation and simplification of a plot
I'd like to make a graph that captures the impact of the intervention. As you can see in the plot below, the y-axis of my figure has different scales (e.g. values), making comparison difficult. Is it possible to normalise the y axis (0 and 1) on both figures? Is it also possible to create a plot that shows the intervention rate per day? For Monday, for example, display a black line without SEDM and a red line with Sample code: df %>% group_by(Day, Time = ceiling_date(as.POSIXct(Time), '10 minutes')) %>% summarise(kW= mean(kW)) %>% ungroup df$Day<- factor(df$Day,levels = c( "Monday", "Tuesday","Wednesday","Thursday","Friday","Saturday", "Sunday")) #as.POSIXct(df$Time,format="%H:%M") ggplot(transform(df, Time = as.POSIXct(Time)), aes(x = Time,y =kW, group=Day)) + geom_smooth(aes(color=Day)) + #geom_boxplot()+ labs(x="", y="kW", title="Monthly electricity consumption without SEDM (House 3)") + scale_x_datetime(date_labels = '%H:%M', date_breaks = '2 hours',expand = expansion(mult = c(0, 0)))+ #facet_wrap(~Day, ncol=1 )+ theme_bw()+ theme(axis.text.x = element_text(angle = 90, hjust = 1,family="Times", face="bold", size=12, color="black"), axis.title.x = element_text(family="Times", face="bold", size=16, color="black"), axis.text.y = element_text(family="Times", face="bold", size=12, color="black"), axis.title.y = element_text(family="Times", face="bold", size=16, color="black"), strip.text = element_text(size=15, face="bold"), plot.title = element_text(size=20, face="bold"))+ theme(legend.title = element_blank(), legend.text = element_text(family="Times", color = "black", size = 16,face="bold"), legend.position="top", legend.box = "horizontal", plot.title = element_text(hjust = 0.5))+ guides(fill = guide_legend(nrow = 1)) Plot: Data: #for the first plot (without SEDM) first 300 structure(list(structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"), class = "factor"), structure(c(41220, 41280, 41340, 41400, 41460, 41520, 41580, 41640, 41700, 41760, 41820, 41880, 41940, 42000, 42060, 42120, 42180, 42240, 42300, 42360, 42420, 42480, 42540, 42600, 42660, 42720, 42780, 42840, 42900, 42960, 43020, 43080, 43140, 43200, 43260, 43320, 43380, 43440, 43500, 43560, 43620, 43680, 43740, 43800, 43860, 43920, 43980, 44040, 44100, 44160, 44220, 44280, 44340, 44400, 44460, 44520, 44580, 44640, 44700, 44760, 44820, 44880, 44940, 45000, 45060, 45120, 45180, 45240, 45300, 45360, 45420, 45480, 45540, 45600, 45660, 45720, 45780, 45840, 45900, 45960, 46020, 46080, 46140, 46200, 46260, 46320, 46380, 46440, 46500, 46560, 46620, 46680, 46740, 46800, 46860, 46920, 46980, 47040, 47100, 47160, 47220, 47280, 47340, 47400, 47460, 47520, 47580, 47640, 47700, 47760, 47820, 47880, 47940, 48000, 48060, 48120, 48180, 48240, 48300, 48360, 48420, 48480, 48540, 48600, 48660, 48720, 48780, 48840, 48900, 48960, 49020, 49080, 49140, 49200, 49260, 49320, 49380, 49440, 49500, 49560, 49620, 49680, 49740, 49800, 49860, 49920, 49980, 50040, 50100, 50160, 50220, 50280, 50340, 50400, 50460, 50520, 50580, 50640, 50700, 50760, 50820, 50880, 50940, 51000, 51060, 51120, 51180, 51240, 51300, 51360, 51420, 51480, 51540, 51600, 51660, 51720, 51780, 51840, 51900, 51960, 52020, 52080, 52140, 52200, 52260, 52320, 52380, 52440, 52500, 52560, 52620, 52680, 52740, 52800, 52860, 52920, 52980, 53040, 53100, 53160, 53220, 53280, 53340, 53400, 53460, 53520, 53580, 53640, 53700, 53760, 53820, 53880, 53940, 54000, 54060, 54120, 54180, 54240, 54300, 54360, 54420, 54480, 54540, 54600, 54660, 54720, 54780, 54840, 54900, 54960, 55020, 55080, 55140, 55200, 55260, 55320, 55380, 55440, 55500, 55560, 55620, 55680, 55740, 55800, 55860, 55920, 55980, 56040, 56100, 56160, 56220, 56280, 56340, 56400, 56460, 56520, 56580, 56640, 56700, 56760, 56820, 56880, 56940, 57000, 57060, 57120, 57180, 57240, 57300, 57360, 57420, 57480, 57540, 57600, 57660, 57720, 57780, 57840, 57900, 57960, 58020, 58080, 58140, 58200, 58260, 58320, 58380, 58440, 58500, 58560, 58620, 58680, 58740, 58800, 58860, 58920, 58980, 59040, 59100, 59160), class = c("hms", "difftime"), units = "secs"), c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2.951667, 41.055, 10.733333, 2.951667, 3.22, 4.83, 3.22, 3.22, 3.22, 2.951667, 3.488333, 13.416667, 3.22, 3.22, 13.416667, 43.738333, 3.22, 3.22, 3.22, 3.488333, 3.22, 3.22, 3.488333, 2.951667, 11.806667, 3.756667, 45.348333, 4.025, 4.025, 4.293333, 4.83, 4.83, 15.026667, 4.561667, 46.153333, 4.025, 4.025, 3.756667, 4.025, 4.293333, 44.275, 44.811667, 3.756667, 3.756667, 3.756667, 3.756667, 3.756667, 3.756667, 3.756667, 3.756667, 3.756667, 44.006667, 3.756667, 3.756667, 3.756667, 3.488333, 3.756667, 3.756667, 44.006667, 44.006667, 3.756667, 3.756667, 4.293333, 3.756667, 4.293333, 4.561667, 4.83, 4.025, 4.025, 24.418333, 4.561667, 4.025, 4.293333, 32.736667, 9.928333, 1.073333, 1.341667, 1.073333, 0.805, 20.93, 41.591667, 0.805, 0.805, 1.073333, 1.341667, 1.61, 2.415, 43.201667), c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), ccames = c("Day", "Time", "kW", NA, NA, NA, NA, NA, NA), row.names = c(NA, 300L ), class = "data.frame") #for the second plot (with SEDM) first 500 structure(list(Day = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"), class = "factor"), Time = structure(c(43380, 43440, 43500, 43560, 43620, 43680, 43740, 43800, 43860, 43920, 65700, 65760, 65820, 65880, 65940, 66000, 66060, 66120, 66180, 66240, 66300, 66360, 66420, 66480, 66540, 66600, 66660, 66720, 66780, 66840, 66900, 66960, 67020, 67080, 67140, 67200, 67260, 67320, 67380, 67440, 67500, 67560, 67620, 67680, 67740, 67800, 67860, 67920, 67980, 68040, 68100, 68160, 68220, 68280, 68340, 68400, 68460, 68520, 68580, 68640, 68700, 68760, 68820, 68880, 68940, 69000, 69060, 69120, 69180, 69240, 69300, 69360, 69420, 69480, 69540, 69600, 69660, 69720, 69780, 69840, 69900, 69960, 70020, 70080, 70140, 70200, 70260, 70320, 70380, 70440, 70500, 70560, 70620, 70680, 70740, 70800, 70860, 70920, 70980, 71040, 71100, 71160, 71220, 71280, 71340, 71400, 71460, 71520, 71580, 71640, 71700, 71760, 71820, 71880, 71940, 72000, 72060, 72120, 72180, 72240, 72300, 72360, 72420, 72480, 72540, 72600, 72660, 72720, 72780, 72840, 72900, 72960, 73020, 73080, 73140, 73200, 73260, 73320, 73380, 73440, 73500, 73560, 73620, 73680, 73740, 73800, 73860, 73920, 73980, 74040, 74100, 74160, 74220, 74280, 74340, 74400, 74460, 74520, 74580, 74640, 74700, 74760, 74820, 74880, 74940, 75000, 75060, 75120, 75180, 75240, 75300, 75360, 75420, 75480, 75540, 75600, 75660, 75720, 75780, 75840, 75900, 75960, 76020, 76080, 76140, 76200, 76260, 76320, 76380, 76440, 76500, 76560, 76620, 76680, 76740, 76800, 76860, 76920, 76980, 77040, 77100, 77160, 77220, 77280, 77340, 77400, 77460, 77520, 77580, 77640, 77700, 77760, 77820, 77880, 77940, 78000, 78060, 78120, 78180, 78240, 78300, 78360, 78420, 78480, 78540, 78600, 78660, 78720, 78780, 78840, 78900, 78960, 79020, 79080, 79140, 79200, 79260, 79320, 79380, 79440, 79500, 79560, 79620, 79680, 79740, 79800, 79860, 79920, 79980, 80040, 80100, 80160, 80220, 80280, 80340, 80400, 80460, 80520, 80580, 80640, 80700, 80760, 80820, 80880, 80940, 81000, 81060, 81120, 81180, 81240, 81300, 81360, 81420, 81480, 81540, 81600, 81660, 81720, 81780, 81840, 81900, 81960, 82020, 82080, 82140, 82200, 82260, 82320, 82380, 82440, 82500, 82560, 82620, 82680, 82740, 82800, 82860, 82920, 82980, 83040), class = c("hms", "difftime"), units = "secs"), kW = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6.976667, 6.976667, 7.245, 7.245, 7.245, 7.245, 7.245, 7.245, 7.245, 7.245, 7.245, 7.245, 7.245, 7.245, 7.245, 7.245, 7.245, 7.245, 7.245, 7.245, 13.416667, 30.053333, 4.025, 4.025, 4.025, 4.025, 42.396667, 41.591667, 4.025, 4.025, 4.025, 4.025, 4.025, 4.025, 4.293333, 4.025, 4.025, 4.293333, 4.025, 4.025, 23.881667, 23.881667, 4.293333, 4.025, 4.293333, 42.665, 43.201667, 29.785, 4.293333, 16.636667, 4.025, 4.025, 4.025, 4.025, 4.025, 4.025, 4.293333, 5.903333, 5.635, 5.903333, 31.126667, 5.635, 5.635, 5.635, 5.635, 5.635, 5.903333, 5.635, 5.635, 5.635, 39.176667, 60.106667, 75.67, 75.67, 76.475, 30.321667, 6.976667, 7.781667, 7.781667, 39.713333, 39.713333, 39.713333, 31.663333, 31.663333, 31.663333, 18.515, 31.663333, 37.298333, 49.91, 59.301667, 24.15, 28.711667, 30.59, 35.956667, 38.64, 41.055, 24.955, 42.665, 52.325, 43.47, 31.395, 50.983333, 52.325, 40.518333, 44.543333, 50.178333, 41.591667, 50.715, 54.74, 50.983333, 65.741667, 48.031667, 41.591667, 50.715, 28.98, 5.366667, 7.781667, 28.98, 16.368333, 4.561667, 4.561667, 2.683333, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.683333, 2.683333, 2.415, 2.415, 3.756667, 3.756667, 3.756667, 4.293333, 5.366667, 5.635, 5.366667, 5.366667, 4.83, 4.83, 4.561667, 3.22, 2.951667, 2.951667, 3.22, 3.22, 3.22, 3.22, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.415, 2.415, 2.415, 2.415, 2.146667, 2.415, 2.146667, 2.415, 2.415, 2.415, 2.146667, 2.415, 2.415, 2.415, 2.415, 2.951667, 2.415, 2.415, 2.415, 2.415, 2.415, 2.415, 2.415, 2.415, 2.415, 2.415, 2.415, 2.415, 2.415, 2.415, 2.415, 2.415, 2.415, 2.415, 2.415, 2.683333, 2.683333, 2.683333, 2.683333, 2.683333, 2.415, 2.683333, 3.488333, 2.951667, 3.756667, 3.756667, 3.488333, 3.488333, 4.025, 4.025, 2.415, 2.415, 1.878333, 1.878333, 2.146667, 2.415, 1.878333, 2.146667, 2.146667, 2.146667, 1.878333, 1.878333, 1.878333, 1.878333, 1.878333, 1.878333, 1.878333, 1.878333, 1.878333, 1.878333, 1.878333, 1.878333, 1.878333, 3.488333, 3.22, 2.951667, 2.683333, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.951667, 2.683333, 2.951667, 2.951667, 2.951667, 2.683333)), row.names = c(NA, 300L), class = "data.frame")
You could scale your "kW" variable with min and max like this: library(ggplot2) df$Day<- factor(df$Day,levels = c( "Monday", "Tuesday","Wednesday","Thursday","Friday","Saturday", "Sunday")) #as.POSIXct(df$Time,format="%H:%M") maxs <- max(df$kW) mins <- min(df$kW) df$kW <- scale(df$kW, center = mins, scale = maxs-mins) ggplot(transform(df, Time = as.POSIXct(Time)), aes(x = Time,y =kW, group=Day)) + geom_smooth(aes(color=Day)) + #geom_boxplot()+ labs(x="", y="kW", title="Monthly electricity consumption without SEDM (House 3)") + scale_x_datetime(date_labels = '%H:%M', date_breaks = '2 hours',expand = expansion(mult = c(0, 0)))+ #facet_wrap(~Day, ncol=1 )+ theme_bw()+ theme(axis.text.x = element_text(angle = 90, hjust = 1,family="Times", face="bold", size=12, color="black"), axis.title.x = element_text(family="Times", face="bold", size=16, color="black"), axis.text.y = element_text(family="Times", face="bold", size=12, color="black"), axis.title.y = element_text(family="Times", face="bold", size=16, color="black"), strip.text = element_text(size=15, face="bold"), plot.title = element_text(size=20, face="bold"))+ theme(legend.title = element_blank(), legend.text = element_text(family="Times", color = "black", size = 16,face="bold"), legend.position="top", legend.box = "horizontal", plot.title = element_text(hjust = 0.5))+ guides(fill = guide_legend(nrow = 1)) Output:
R Problems with glm-model due to missing values
I have problems with putting my data into a glm model. I think the problem is because I have many missing values in my data (below). I tried this so far: baseformula = as.formula(df) glm(baseformula, data = df, family = poisson(link = "log"), na.action = na.exclude) I am getting an Error: Error in glm.fit(x = numeric(0), y = integer(0), weights = NULL, start = NULL, : object 'fit' not found Can somebody help me with this? When a variable is NA in my formula, I just want the glm to ignore the NAs and use these variables the same as variables without NA. structure(list(V1 = c(0L, 1L, 3L, 0L, 0L, 0L, 2L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 2L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 1L, 5L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 3L, 0L, 1L, 0L), V48 = c(97.33, 96.88, 85.33, 83.75, 75.58, 86.13, 83, 95.75, 88.46, 80.25, 75, 67.17, 69.33, 64.08, 70.75, 78.46, 85.58, 83.42, 96.17, 76.5, 76.42, 65.38, 69.79, 68.38, 84.67, 89.67, 91.29, 80.54, 64.63, 72.29, 76.54, 65.33, 96.92, 91.38, 88.92, 80.63, 85.5, 76.38, 76.21, 78.29, 89.29, 87.04, 78.67), V49 = c(-0.9, -0.1, 0, 0.9, -0.2, -6.3, -4.9, -1.2, -0.3, -1.4, 7.3, 10.5, 10.8, 17.5, 10.8, 9.2, 7.3, 8.2, 10.2, 8.5, 10.4, 25.6, 26.7, 28, 20.1, 20.2, 15.7, 15.3, 21.6, 24.8, 22.4, 27.1, 14.3, 13.8, 17.1, 19.5, 22.9, 21.9, 17.2, 18.9, 16.3, 14.2, 18.5), V58 = c(0.16208333, -0.02576069, -0.24859501, -0.39733779, -0.35568168, -0.13908246, -0.11529523, -0.07094469, 0.07592036, 0.13803538, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), V59 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.40727943, 0.44007391, 0.50582446, 0.59001139, 0.55057958, 0.53888617, 0.55019019, 0.42592698, 0.347516, 0.52019593, 0.69611622, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), V61 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.04555282, 0.16109391, 0.13651381, -0.02339007, -0.24799358, -0.14477839, -0.0845835, -0.13505766, -0.06910931, 0.05876354, 0.11372484, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), V68 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.01575957, -0.19924471, -0.39083879, -0.26620543, -0.10669409, -0.05650572, 0.06644096, 0.24769837, -0.11404654, -0.49358358, -0.27725445, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), V71 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -0.1563703, -0.23797044, -0.37304736, -0.27425744, -0.02347071, 0.36391633, 0.44316418, 0.21940339, 0.02321926, -0.01531807, -0.05197635, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), V73 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -0.46298985, -0.7644245, -0.82771396, -0.81243484, -0.75591058, -0.55440085, -0.35516327, -0.05602486, -0.12290976, -0.14458255, -0.17033091 ), V77 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -0.04571093, 0.25592819, 0.35649173, 0.3507695, 0.30446594, 0.36505183, 0.54215354, 0.47808018, 0.40325075, 0.32091592, 0.09212919 )), .Names = c("V1", "V48", "V49", "V58", "V59", "V61", "V68", "V71", "V73", "V77"), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 152L, 153L, 154L, 155L, 156L, 157L, 158L, 159L, 160L, 161L, 162L, 244L, 245L, 246L, 247L, 248L, 249L, 250L, 251L, 252L, 253L, 254L), class = "data.frame")
ggplot - bar chart with multiple y-variables
I'm trying to create a bar chart with three y-variables (Total_us_received, Total_us_required, Total_us_received_from.CERF) using ggplot2. All three y-variables are measured on the same scale (US$). This far I've created a bar chart with Total_us_received as the y-variables and Disaster_category as the x-variable using this code: ggplot(Template.2006.2017.text, aes(Disaster_category, y=Total_US_received)) + geom_bar(stat ="identity", fill="lightblue") + coord_flip() However, every attempt I've made to include the other two y-variables into the graph have failed. How can I include the other two variables into the graph? A follow-up questions: Can I make the graph show the mean of every category of the x-variable (Disaster_subtype) without NA:s instead of the total sum? Here's my data in dput (compressed version): structure(list(Disaster_category = structure(c(1L, 15L, 17L, 15L, 5L, 8L, 13L, 8L, 2L, 8L, 2L, 3L, 8L, 2L, 8L, 2L, 10L, 5L, 7L, 8L, 15L, 2L, 8L, 2L, 15L, 15L, 8L, 15L, 2L, 17L, 2L, 7L, 2L, 8L, 2L, 3L, 2L, 8L, 8L, 2L, 8L, 17L, 2L, 3L, 8L, 8L, 2L, 8L, 8L, 8L, 2L, 8L, 3L, 2L, 3L, 2L, 8L, 2L, 3L, 8L, 2L, 8L, 2L, 15L, 5L, 8L, 13L, 8L, 15L, 2L, 8L, 2L, 3L, 2L, 3L, 15L, 8L, 3L, 2L, 3L, 8L, 2L, 3L, 2L, 8L, 2L, 8L, 15L, 2L, 8L, 8L, 5L, 2L, 8L, 2L, 3L, 2L, 17L, 2L, 17L, 2L, 4L, 5L, 8L, 8L, 2L, 8L, 15L, 2L, 15L, 15L, 7L, 2L, 8L, 2L, 15L, 15L, 7L, 8L, 17L, 2L, 15L, 8L, 2L, 17L, 2L, 3L, 8L, 2L, 5L, 2L, 8L, 2L, 8L, 8L, 15L, 2L, 8L, 2L, 15L, 8L, 2L, 15L, 8L, 7L, 8L, 15L, 2L, 8L, 8L, 7L, 13L, 8L, 2L, 8L, 2L, 8L, 8L, 3L, 2L, 13L, 2L, 3L, 8L, 2L, 15L, 15L, 8L, 15L, 2L, 5L, 3L, 3L, 8L, 3L, 2L, 8L, 8L, 3L, 2L, 8L, 2L, 15L, 2L, 17L, 2L, 5L, 2L, 8L, 2L, 15L, 2L, 3L, 8L, 8L, 2L, 8L, 8L, 2L, 3L), .Label = c("", " ", "Disease", "Disease related disaster", "Drought", "Drought & storm", "Extreme temperature / fire", "Flood", "Flood & drought", "Insect infestation", "Insect infestation & drought", "Landslide & flood", "Landslide / mudslide", "Other", "Storm", "Storm & flood", "Winter"), class = "factor"), Total_US_received_from.CERF = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 678307.8333, 678307.8333, 678307.8333, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1110469.5, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1905355, NA, NA, NA, NA, NA, NA, NA, NA, NA, 2493246, 2493246, 2493246, 2493246, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 333333.3333, 333333.3333, 333333.3333, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 9365420, NA, NA, 14321419, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Total_US_received = cotal_US_required = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 20502064.83, 20502064.83, 20502064.83, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3070192, NA, NA, NA, NA, NA, NA, NA, NA, NA, 49955895.25, 49955895.25, 49955895.25, 49955895.25, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 333333.3333, 333333.3333, 333333.3333, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, 200L), class = "data.frame")
You can transform you data into long format and then plot them: library(tidyr) library(ggplot2) my_data %>% gather(Total_US_category, Total_US, Total_US_received, Total_US_required, Total_US_received_from.CERF) %>% ggplot(aes(Disaster_category, y = Total_US, fill = Total_US_category)) + geom_col(position = position_dodge()) + coord_flip() If you want to plot the mean-value per disaster you can first summarize the data with dplyr: library(dplyr) my_data_sum <- my_data %>% gather(Total_US_category, Total_US, Total_US_received, Total_US_required, Total_US_received_from.CERF) %>% group_by(Disaster_category, Total_US_category) %>% summarize(Total_US_mean = mean(Total_US, na.rm = T)) my_data_sum # A tibble: 33 x 3 # Groups: Disaster_category [?] # Disaster_category Total_US_category Total_US_mean # <fct> <chr> <dbl> # 1 "" Total_US_received NaN # 2 "" Total_US_received_from.CERF NaN # 3 "" Total_US_required NaN # 4 " " Total_US_received NaN # 5 " " Total_US_received_from.CERF NaN # 6 " " Total_US_required NaN # 7 Disease Total_US_received NaN # 8 Disease Total_US_received_from.CERF NaN # 9 Disease Total_US_required NaN # 10 Disease related disaster Total_US_received NaN # ... with 23 more rows And then plot the data: ggplot(my_data_sum, aes(Disaster_category, y = Total_US_mean, fill = Total_US_category)) + geom_col(position = position_dodge()) + coord_flip()
How to get conditional weighted means for several columns
For the following dataframe: eu <- structure(list(land = structure(c(1L, 4L, 5L, 12L, 9L, 13L, 16L, 18L, 27L, 10L, 25L, 21L, 28L, 19L, 8L, 26L, 6L, 3L, 15L, 14L, 11L, 17L, 20L, 23L, 24L, 2L, 22L, 7L), .Label = c("Belgie", "Bulgarije", "Cyprus", "Denemarken", "Duitsland", "Estland", "Europese Unie", "Finland", "Frankrijk", "Griekenland", "Hongarije", "Ierland", "Italie", "Letland", "Litouwen", "Luxemburg", "Malta", "Nederland", "Oostenrijk", "Polen", "Portugal", "Roemenie", "Slovenie", "Slowakije", "Spanje", "Tsjechie", "Verenigd Koninkrijk", "Zweden"), class = "factor"), `1979` = c(91.36, 47.82, 65.73, 63.61, 60.71, 85.65, 88.91, 58.12, 32.35, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 61.99), `1981` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 81.48, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), `1984` = c(92.09, 52.38, 56.76, 47.56, 56.72, 82.47, 88.79, 50.88, 32.57, 80.59, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 58.98), `1987` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 68.52, 72.42, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), `1989` = c(90.73, 46.17, 62.28, 68.28, 48.8, 81.07, 87.39, 47.48, 36.37, 80.03, 54.71, 51.1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 58.41), `1994` = c(90.66, 52.92, 60.02, 43.98, 52.71, 73.6, 88.55, 35.69, 36.43, 73.18, 59.14, 35.54, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 56.67), `1995` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 41.63, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), `1996` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 67.73, 57.6, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), `1999` = c(91.05, 50.46, 45.19, 50.21, 46.76, 69.76, 87.27, 30.02, 24, 70.25, 63.05, 39.93, 38.84, 49.4, 30.14, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 49.51), `2004` = c(90.81, 47.89, 43, 58.58, 42.76, 71.72, 91.35, 39.26, 38.52, 63.22, 45.14, 38.6, 37.85, 42.43, 39.43, 28.3, 26.83, 72.5, 48.38, 41.34, 38.5, 82.39, 20.87, 28.35, 16.97, NA, NA, 45.47), `2007` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 29.22, 29.47, NA), `2009` = c(90.39, 59.54, 43.3, 58.64, 40.63, 65.05, 90.75, 36.75, 34.7, 52.61, 44.9, 36.78, 45.53, 45.97, 40.3, 28.2, 43.9, 59.4, 20.98, 53.7, 36.31, 78.79, 24.53, 28.33, 19.64, 38.99, 27.67, 43), inwoners = c(11161642, 5602628, 80523746, 4591087, 65578819, 59685227, 537039, 16779575, 63896071, 11062508, 46727890, 10487289, 9555893, 8451860, 5426674, 10516125, 1320174, 865878, 2971905, 2023825, 9908798, 421364, 38533299, 2058821, 5410836, 7284552, 20020074, 501403599), plicht = structure(c(1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("ja", "nee"), class = "factor")), .Names = c("land", "1979", "1981", "1984", "1987", "1989", "1994", "1995", "1996", "1999", "2004", "2007", "2009", "inwoners", "plicht"), row.names = c(NA, -28L), class = "data.frame") I need conditional column means. I can do that with: verplicht <- c("Europese Unie (stemplicht)", colMeans(eu[eu$plicht=="ja",c(2:13)], na.rm=TRUE), NA) vrij <- c("Europese Unie (geen stemplicht)", colMeans(eu[eu$plicht=="nee",c(2:13)], na.rm=TRUE), NA) eu2 <- rbind(eu, verplicht, vrij) However, I need weighted column means with country population (the inwoners column) as the weights. I tried to that with: verplicht <- c("Europese Unie (stemplicht)", lapply(eu[eu$plicht=="ja",c(2:13)], weighted.mean(x, eu[eu$plicht=="ja",14], na.rm=TRUE)), NA) but that resulted in the following error: Error in weighted.mean.default(x, eu[eu$plicht == "ja", 14], na.rm = TRUE) : 'x' and 'w' must have the same length I understand what the error-message is saying, but don't know how to solve this. Any suggestions?
The problem is with how you're using lapply. Here's the correct code: lapply(eu[eu$plicht=='ja',2:13], weighted.mean, eu[eu$plicht=='ja','inwoners'], na.rm=TRUE) lapply(eu[eu$plicht=='nee',2:13], weighted.mean, eu[eu$plicht=='nee','inwoners'], na.rm=TRUE) Notice how weighted.mean is used as an argument, rather than inside an anonymous function with x as an argument. You could equivalently do: lapply(eu[eu$plicht=='ja',2:13], function(x) weighted.mean(x, eu[eu$plicht=='ja','inwoners'], na.rm=TRUE)) lapply(eu[eu$plicht=='nee',2:13], function(x) weighted.mean(x, eu[eu$plicht=='nee','inwoners'], na.rm=TRUE)) But you're currently kind of mixing the two different ways of using lapply.
If inwoners is the population, then > (weights <- with(eu, inwoners/sum(inwoners))) # [1] 0.0111303968 0.0055869443 0.0802983327 0.0045782350 0.0653952416 # [6] 0.0595181478 0.0005355356 0.0167326033 0.0637172042 0.0110315403 # [11] 0.0465970828 0.0104579315 0.0095291428 0.0084282004 0.0054114829 # [16] 0.0104866868 0.0013164784 0.0008634541 0.0029635856 0.0020181596 # [21] 0.0098810599 0.0004201845 0.0384254312 0.0020530577 0.0053956892 # [26] 0.0072641601 0.0199640310 0.5000000000 and the weighted mean of the 2004 column, for example, is > weighted.mean(eu$`2004`, w = weights, na.rm = TRUE) # [1] 45.31782 To get the weighted mean of each of the year columns for when plicht == 'ja', > s <- subset(eu, plicht == "ja") > w2 <- weights[as.numeric(rownames(s))] > newDF <- do.call(rbind, lapply(2:13, function(i){ data.frame(wtMean.ja = weighted.mean(s[,i], w = w2, na.rm = TRUE)) })) > rownames(newDF) <- names(s)[2:13] > newDF # wtMean.ja # 1979 86.56735 # 1981 81.48000 # 1984 83.56127 # 1987 68.52000 # 1989 72.30636 # 1994 69.86950 # 1995 NaN # 1996 NaN # 1999 69.28708 # 2004 63.17060 # 2007 NaN # 2009 58.99465