Related
I want to plot a split-violin plot as described here for each column of df (second column onwards). The x-axis would be subtype, and the fill is pathologic_stage.
My code returned "data must be a <data.frame>, or an object coercible by fortify(), not a character vector." error.
devtools::install_github("psyteachr/introdataviz")
library(introdataviz)
Plot:
for (i in 1:nrow(df[,-c(1:2)])){
print(ggplot(df[,i], aes(x=df$pathologic_stage, y=df[,i], fill=df$subtype)) +
geom_split_violin(alpha = .4, trim = FALSE) +
geom_boxplot(width = .2, alpha = .6, fatten = NULL, show.legend = FALSE) +
stat_summary(fun.data = "mean_se", geom = "pointrange", show.legend = F, position = position_dodge(.175)) +
scale_x_discrete(name = "Stage", labels = c("stage i", "stage ii", "stage iii", "stage iv")) +
scale_y_continuous(name = "Gene Expression", breaks = seq(200, 800, 100), limits = c(200, 800)) +
scale_fill_brewer(palette = "Dark2", name = "Subtype") +
theme_minimal())
}
Traceback:
Error in `fortify()`:
! `data` must be a <data.frame>, or an object coercible by `fortify()`, not a
character vector.
Run `rlang::last_error()` to see where the error occurred.
> rlang::last_error()
<error/rlang_error>
Error in `fortify()`:
! `data` must be a <data.frame>, or an object coercible by `fortify()`, not a
character vector.
---
Backtrace:
1. base::print(...)
3. ggplot2:::ggplot.default(...)
5. ggplot2:::fortify.default(data, ...)
Run `rlang::last_trace()` to see the full context.
Data:
dput(df[1:50,1:50])
> dput(df[1:50,1:50])
structure(list(pathologic_stage = c("stage i", "stage i", "stage i",
"stage i", "stage i", "stage iii", "stage i", "stage i", "stage i",
"stage i", "stage i", "stage iii", "stage i", "stage i", "stage i",
"stage i", "stage i", "stage i", "stage ii", "stage i", "stage i",
"stage i", "stage i", "stage i", "stage i", "stage iii", "stage ii",
"stage ii", "stage ii", "stage i", "stage i", "stage ii", "stage iv",
"stage iv", "stage i", "stage iii", "stage iv", "stage iv", "stage iii",
"stage iv", "stage i", "stage i", "stage i", "stage iii", "stage i",
"stage iii", "stage i", "stage i", "stage ii", "stage i"), subtype = c("KIRP",
"KIRP", "KIRP", "KIRP", "KIRP", "KIRP", "KIRC", "KIRC", "KIRC",
"KIRC", "KIRC", "KIRC", "KIRC", "KIRC", "KIRC", "KIRC", "KIRP",
"KIRP", "KIRP", "KIRP", "KIRP", "KIRP", "KIRP", "KIRP", "KIRP",
"KIRC", "KIRC", "KIRC", "KIRC", "KIRC", "KIRC", "KIRP", "KIRC",
"KIRC", "KIRC", "KIRC", "KIRC", "KIRC", "KIRC", "KIRC", "KIRC",
"KIRC", "KIRC", "KIRC", "KIRC", "KIRC", "KIRP", "KIRC", "KIRP",
"KIRC"), A1BG = c(66.4361, 39.6938, 77.9754, 53.1898, 191.8009,
50.8509, 12.4224, 28.5205, 80.5208, 19.1515, 53.2646, 21.3555,
33.4962, 42.2477, 16.9054, 58.6972, 35.6614, 109.9437, 14.429,
70.2027, 59.4344, 22.4731, 28.4957, 19.4994, 25.8724, 298.569,
148.3682, 139.7575, 61.7103, 220.857, 13.2709, 7.5077, 106.505,
129.3458, 28.7289, 80.3571, 18.543, 40.3925, 72.5155, 54.2779,
7.6608, 45.0186, 15.8792, 29.0844, 24.1362, 24.808, 127.0924,
51.6359, 16.2703, 40.4216), A1CF = c(0.374, 0.6125, 1.1703, 23.663,
2.9283, 47.0016, 126.8116, 316.5775, 315.9734, 190.5771, 127.0227,
335.9949, 59.898, 466.994, 595.8235, 136.7737, 0, 0, 0.7762,
86.9447, 0.9457, 12.178, 8.3199, 41.1204, 72.8039, 1.4406, 224.4191,
88.0664, 9.8863, 38.624, 5.9625, 271.4374, 5.5, 0, 106.4724,
73.3766, 168.3626, 6.6141, 141.568, 50.2988, 247.236, 36.9128,
173.1216, 42.394, 51.7409, 571.1754, 3.9402, 276.699, 0.6258,
171.743), A2BP1 = c(0, 1.2249, 2.3406, 0.4733, 5.8565, 1.6207,
0, 0, 0.3495, 0, 0, 0, 0.4248, 0.5456, 0, 0, 0.8703, 2.0471,
0.7762, 1.2938, 0, 0.4684, 0, 0, 3.6101, 1.9208, 0, 0, 0, 2.414,
2.1295, 1.2338, 1, 23.2415, 0, 0.6494, 1.8501, 0.7781, 0.564,
0.498, 0, 0.3729, 0, 0.3563, 0.9582, 0, 26.2683, 0, 0, 0), A2LD1 = c(53.1638,
150.1945, 175.7929, 133.7056, 90.0146, 220.5429, 189.9586, 1274.5478,
79.3569, 61.2079, 69.818, 62.3596, 247.3534, 73.4943, 211.3834,
118.192, 141.7798, 85.4555, 70.9227, 61.1951, 121.9658, 75.5082,
121.9032, 51.8594, 67.5331, 80.4898, 87.5234, 143.2993, 183.569,
147.924, 93.6542, 67.6619, 43.075, 37.83, 66.9431, 128.0065,
53.3858, 115.3026, 161.2126, 66.1355, 661.2867, 38.8255, 58.9311,
59.1628, 91.8562, 2430.7561, 50.8291, 142.4078, 74.3054, 187.11
), A2ML1 = c(0.374, 10.4119, 0, 0.4733, 0, 0.4052, 0, 1.426,
0, 0.3236, 0, 0, 0.8496, 0.5456, 0, 2.887, 0, 0, 0, 0.2588, 0,
0.9368, 0.416, 0, 1.2034, 0, 0, 0, 4.9432, 0.6035, 0, 0, 0, 0.447,
0, 1.9481, 0.9251, 1.9453, 1.128, 3.4861, 0, 0.3729, 0, 0.3563,
0, 2.3627, 0, 0, 4.3805, 0), A2M = c(29835.2992, 9396.2578, 10393.2768,
7446.9238, 22716.3543, 4080.9887, 14629.3271, 22602.4385, 27446.5851,
37196.4554, 53143.7176, 43071.7036, 42157.158, 36148.0142, 27739.0492,
19953.5583, 2267.7372, 48050.0768, 15309.9757, 11263.9665, 3457.7381,
6895.8267, 2168.5237, 6824.1776, 2439.627, 29292.8595, 33466.0932,
13926.7709, 10773.4355, 11296.4092, 13068.9566, 3865.4719, 19986.845,
7094.0991, 54466.2931, 44946.9091, 46699.9352, 7469.8126, 21914.4162,
26395.3586, 53265.4096, 41444.7614, 43623.3424, 38610.7374, 30405.7241,
19550.9096, 4950.5106, 57936.7379, 6543.7985, 66622.8908), A4GALT = c(1418.1002,
915.6331, 2365.7109, 2372.9295, 3014.6413, 802.269, 1449.2754,
1129.8871, 994.058, 1440.1676, 1554.2071, 1281.1242, 1143.5854,
1046.0993, 1333.0483, 1711.6564, 1312.8808, 2354.1453, 1895.4109,
1403.0185, 3031.7681, 1605.6206, 866.9336, 1641.2396, 2241.8773,
1029.0516, 1906.7916, 1368.2195, 1059.8122, 1135.7876, 365.4174,
1640.3455, 1205.5, 347.282, 1556.7386, 1881.8182, 1510.1758,
1173.413, 1451.7766, 863.5458, 1630.3648, 1073.8255, 1345.0813,
1833.274, 957.8463, 310.1004, 1830.2413, 1349.0291, 179.5995,
1475.9358), A4GNT = c(22.8123, 0, 2.3406, 5.6791, 5.8565, 4.8622,
27.9503, 32.7986, 45.4387, 28.7969, 33.1715, 25.5509, 10.1954,
20.4583, 17.3901, 67.4847, 2.611, 5.6295, 13.1949, 25.3589, 1.4185,
8.8993, 32.8636, 5.3635, 120.3369, 8.6435, 5.1354, 5.1053, 2.4716,
4.828, 0, 99.3214, 17, 0, 0, 6.4935, 42.0907, 15.5625, 44.5572,
27.3904, 11.8395, 2.2371, 0, 6.7688, 10.8592, 8.2693, 10.5073,
1.9417, 0, 8.0601), AAA1 = c(0.7479, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0.8496, 0, 0, 0, 0, 0, 0, 0, 0.4728, 0, 0, 0, 0, 0.4802,
0, 0, 0, 0, 0.4259, 0, 1, 0.447, 0, 0, 0.4625, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), AAAS = c(624.1586, 746.5932, 1053.8327,
953.1472, 1301.6105, 1170.1783, 695.1346, 648.8414, 479.2031,
540.0224, 643.2039, 785.436, 622.3449, 581.2875, 607.2269, 850.5955,
1018.7119, 577.7892, 568.1576, 755.0734, 760.3062, 1000, 785.3986,
1072.1097, 1236.4621, 964.7059, 543.8439, 516.2731, 516.0652,
739.2879, 838.586, 1070.9439, 1093.5, 838.0356, 620.9022, 847.4026,
832.5624, 1299.4693, 1143.26, 890.9363, 554.714, 471.6629, 581.7196,
978.9811, 638.7771, 578.2634, 2372.0243, 498.5437, 754.6934,
487.9485), AACSL = c(0.374, 61.2464, 22.8204, 35.9678, 21.9619,
21.4749, 70.911, 84.1355, 67.1094, 37.2095, 23.0583, 21.7183,
8.921, 74.1953, 3.7061, 42.223, 15.6658, 1.5353, 33.7635, 24.0651,
1.4185, 53.8642, 24.5437, 48.8677, 80.0241, 3.3613, 49.3003,
2.5526, 18.784, 72.42, 3.4072, 42.5663, 43.5, 0, 8.2189, 16.8831,
40.7031, 7.3922, 25.9447, 32.8685, 82.8763, 100.6711, 17.8156,
46.669, 23.9541, 27.1707, 21.6713, 151.4563, 153.9424, 37.5107
), AACS = c(593.8669, 1303.3226, 813.9263, 739.7066, 740.8492,
1232.577, 271.7391, 391.6815, 510.3111, 603.7638, 473.301, 425.9342,
392.9482, 596.563, 556.4821, 852.7607, 1024.369, 663.7666, 881.3428,
798.8045, 708.768, 894.6136, 832.406, 1222.8844, 806.8592, 411.5246,
809.3465, 583.9183, 905.5858, 339.7707, 896.0818, 825.4164, 955,
586.178, 680.3026, 611.039, 567.5301, 1069.9223, 329.9492, 429.2829,
543.571, 424.3102, 319.907, 522.2658, 592.4658, 519.1967, 591.036,
640.7767, 546.9337, 390.2968), AADACL2 = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0.6035, 0, 0, 2, 0, 0.3736, 0, 0, 0, 0, 1.494, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), AADACL3 = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0.4248, 0, 0, 0, 0, 0, 1.5523, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 3.5, 0, 0, 0, 0.4625, 0, 1.128, 0, 0, 0, 0,
0, 0.3194, 0, 0, 0, 0, 0), AADACL4 = c(0, 0, 1.1703, 0, 0, 0,
0, 0, 0, 0, 0, 0.2555, 0, 0, 0, 0, 0, 0, 105.1712, 0, 0, 0, 0,
0, 0, 0.9604, 0, 0, 0.4943, 1.8105, 0, 0, 0, 0.2235, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.4854, 0, 0), AADAC = c(0, 0,
0, 0, 0, 0, 0, 0.9507, 0, 0, 0, 38.3264, 0.4248, 0.2728, 1.9956,
0.3609, 0, 0, 11.6426, 0, 0, 0, 0, 0, 0.6017, 0.4802, 0, 1.2763,
0, 0, 2.1295, 0, 0.5, 0.2235, 2.6151, 257.7922, 0.4625, 0, 1.128,
3.9841, 0.3482, 0.7457, 0, 0.7125, 0, 0, 0.6567, 0, 0, 0), AADAT = c(68.0628,
124.3301, 70.8016, 45.433, 20.4978, 185.1702, 172.8778, 107.9026,
70.9542, 96.421, 63.9159, 99.9042, 162.7018, 66.8303, 104.3404,
77.2284, 100.9574, 34.8004, 10.0902, 39.8497, 188.6581, 68.3841,
127.2945, 85.8164, 106.4982, 81.1525, 22.0824, 35.0989, 104.7949,
65.178, 72.8279, 123.3806, 86.5, 126.9345, 61.6419, 107.7922,
149.3987, 49.411, 93.0626, 57.7689, 129.886, 122.2968, 80.945,
123.6195, 73.7788, 42.5281, 169.4303, 73.7864, 723.4043, 48.3608
), AAGAB = c(674.6447, 1476.0374, 985.9567, 1247.0421, 1244.5095,
826.5802, 702.381, 451.0992, 634.7431, 499.2538, 447.411, 531.4596,
856.4146, 494.2717, 666.8092, 576.6871, 1191.0357, 592.6305,
939.9437, 1205.064, 1584.444, 1091.3349, 1051.2194, 962.4553,
783.3935, 513.8055, 669.1488, 752.3931, 1244.1918, 553.4098,
2583.4753, 858.7292, 808, 1601.8772, 549.547, 498.0519, 508.7882,
542.7424, 393.683, 690.239, 510.1419, 413.1245, 472.8892, 722.1233,
1029.0699, 716.4796, 663.2737, 501.4563, 1657.0713, 434.0076),
AAK1 = c(732.2364, 1728.9848, 1579.2861, 1894.4628, 1161.0542,
2443.6791, 1035.1967, 1074.7475, 1162.5306, 1274.5046, 814.3204,
887.8952, 881.4783, 1433.1697, 1143.7531, 1019.4875, 1471.2794,
1201.6377, 1818.958, 1536.0232, 1534.3243, 1779.8595, 1978.0563,
1733.0155, 1841.1552, 1137.0948, 728.72, 565.4116, 1789.4217,
310.1992, 1095.8262, 1837.1376, 828, 1489.4687, 752.405,
947.4026, 827.012, 1209.9849, 956.5708, 1176.2948, 905.7195,
955.6301, 824.1673, 984.6812, 1169.6009, 1481.9846, 1195.206,
1325.7282, 1187.7347, 770.9835), AAMP = c(2327.5991, 3045.1692,
2152.7209, 2275.4378, 1932.6501, 3489.8703, 2151.1387, 1601.9014,
1656.0643, 1712.6055, 1910.5987, 2684.382, 1446.8989, 1904.5281,
1981.8972, 2886.6835, 1613.577, 1887.9222, 3891.336, 1933.4847,
2602.9138, 1873.0679, 1818.3142, 2419.5471, 1957.2804, 1505.4022,
3482.8604, 2273.1334, 1731.0924, 2270.9716, 1430.5792, 1304.7502,
3140.5, 1152.0197, 2074.1571, 1589.6104, 2739.1304, 2360.8323,
1771.0096, 2385.9562, 2149.5604, 1339.299, 2126.646, 3134.3071,
2141.5003, 2288.8364, 1322.6071, 1639.8058, 2118.8986, 2173.448
), AANAT = c(2.6178, 1.8374, 1.1703, 1.4198, 1.4641, 2.4311,
0, 0, 0, 0.6471, 0, 0.511, 0, 0.2728, 0, 2.1653, 0.8703,
2.0471, 0, 0.5175, 0.9457, 0.4684, 1.248, 1.1919, 0.6017,
2.401, 0.5135, 1.9145, 0.4943, 1.8105, 1.2777, 0.6169, 1.5,
1.3409, 0.3736, 0, 0.9251, 1.1672, 2.2561, 1.494, 0.3482,
0, 0, 0, 0.6388, 1.772, 1.3134, 0, 0.6258, 0.62), AARS2 = c(525.718,
641.2249, 668.8122, 645.9063, 680.8199, 584.2747, 262.412,
502.4219, 523.2366, 524.7859, 475.2063, 464.9505, 246.8097,
561.6312, 422.7582, 626.817, 455.544, 327.5128, 570.4589,
599.0258, 477.0827, 659.9485, 618.9923, 541.1144, 422.3827,
400.9604, 409.2952, 205.4882, 560.0445, 232.3476, 497.0017,
478.6798, 623.49, 387.0049, 305.2059, 779.6818, 662.8076,
341.1963, 681.6977, 750.9861, 588.46, 527.9605, 612.6995,
425.7214, 452.5736, 596.5682, 743.3328, 487.3544, 555.6946,
417.878), AARSD1 = c(597.9806, 989.7412, 1583.9672, 924.7515,
2733.5286, 754.8622, 638.1988, 520.4991, 611.3247, 399.5972,
499.1909, 679.9106, 383.1776, 468.6307, 386.8577, 765.7885,
787.6414, 442.6817, 580.1882, 414.7987, 620.3493, 666.5105,
697.2076, 641.8355, 707.5812, 652.1008, 690.7177, 636.8858,
348.4923, 601.6898, 353.9182, 562.6157, 708, 286.273, 586.1586,
728.5714, 735.4302, 1082.3723, 866.8923, 628.9841, 413.3368,
590.9769, 653.3695, 804.0613, 838.0756, 341.4058, 1069.1184,
433.0097, 473.0914, 541.5795), AARS = c(1833.2087, 3809.5238,
4739.0287, 3400.8519, 3606.1493, 2477.7147, 1309.5238, 1984.0761,
2380.6361, 2323.8112, 2363.2686, 2918.4286, 2183.9422, 2660.3928,
2749.3407, 2912.306, 4083.1158, 2761.5148, 2589.6963, 3280.8689,
3703.6555, 3992.5059, 3298.0084, 3389.7497, 1912.7557, 1920.2881,
2473.2315, 1499.6809, 2709.8369, 1821.3639, 3267.4617, 2870.4503,
3894, 3654.9528, 3225.927, 2321.4286, 2848.7512, 7012.0765,
1807.1066, 2709.6614, 3043.7886, 2310.2163, 2445.0039, 4618.0976,
4203.7923, 2442.4099, 1290.4285, 2641.2621, 2016.2703, 3129.8148
), AASDHPPT = c(790.5123, 1154.494, 666.2493, 776.1477, 342.6061,
1030.7942, 1660.4555, 907.9026, 1339.0423, 1158.67, 984.2233,
1213.9253, 1235.7689, 1284.2335, 1423.7047, 755.9906, 1221.497,
786.5916, 1135.9115, 981.7507, 855.3444, 943.2881, 1168.53,
1003.5757, 625.1504, 609.3637, 1113.8785, 1341.4167, 1309.9357,
1214.2426, 661.8399, 617.52, 948.5, 997.5798, 1234.2991,
797.4026, 1054.0703, 504.2252, 668.3587, 882.4701, 964.2204,
1136.4653, 1102.6336, 1217.638, 1085.2823, 1321.9138, 406.5014,
983.4951, 833.5419, 1053.7084), AASDH = c(214.2857, 250.4976,
148.6249, 229.0582, 121.5227, 257.2934, 277.9503, 295.1872,
262.1461, 329.061, 267.3948, 253.4653, 361.9371, 288.3252,
283.0874, 209.3107, 219.7563, 219.0379, 387.6977, 214.5154,
322.4682, 212.6464, 203.8376, 199.6424, 116.1252, 106.6026,
238.7983, 96.3625, 247.1577, 153.2891, 362.862, 140.037,
220.5, 347.0585, 223.4052, 157.7922, 192.4144, 62.25, 159.0525,
171.8127, 301.9065, 266.2192, 260.2634, 161.7385, 187.4811,
193.1483, 289.6076, 233.4951, 317.2716, 226.924), AASS = c(427.0755,
305.0069, 473.3762, 1046.8528, 266.4714, 1009.7245, 414.0787,
675.4605, 589.654, 367.5647, 453.8835, 310.1884, 880.2039,
700.7638, 1040.5531, 228.0765, 1614.4473, 283.0092, 468.8076,
392.545, 515.3817, 684.3091, 888.5653, 816.4482, 2000, 418.2473,
209.0127, 58.7109, 551.656, 441.7622, 566.4395, 2349.1672,
294, 223.6996, 238.7223, 533.1169, 534.2276, 253.6689, 1132.5437,
457.1713, 328.7194, 469.0529, 416.7312, 217.6701, 276.9099,
142.9415, 2046.2978, 207.2816, 451.8148, 400.527), AATF = c(1334.3306,
1346.1951, 1459.9181, 1310.9323, 1692.5329, 1314.4246, 1440.9938,
1126.5597, 1240.1258, 913.7347, 1110.0324, 1635.2603, 745.5395,
1126.2957, 844.4159, 1340.6712, 860.3133, 1223.132, 2095.6631,
941.901, 1384.4381, 1051.5222, 1034.1636, 1017.2825, 836.3418,
1105.8824, 1010.6561, 570.5169, 847.7509, 656.6083, 551.9591,
1179.5188, 1446.5, 964.7466, 1332.5862, 991.5584, 1271.5079,
3250.2295, 1002.2561, 1498.008, 1168.6254, 1619.3139, 1367.1572,
1391.8774, 1324.185, 1135.8535, 1058.6111, 1287.3786, 1419.8999,
1453.6154), AATK = c(127.5243, 14.6991, 12.873, 8.992, 5.8712,
15.3971, 54.3478, 95.0683, 86.3334, 95.7739, 337.3786, 106.8029,
120.2209, 161.0038, 91.5117, 153.8217, 3.4813, 117.1955,
17.4639, 6.2103, 18.9131, 7.4941, 9.5679, 25.0298, 8.4236,
80.1921, 58.0306, 40.2042, 15.8181, 132.1666, 35.3492, 30.8452,
41, 53.1873, 74.7175, 281.3182, 181.3136, 41.6297, 283.1359,
118.0279, 46.6614, 77.9269, 206.8164, 134.7667, 37.3685,
46.6627, 9.8506, 145.6311, 43.8048, 162.7528), ABAT = c(142.1092,
846.4247, 1145.1141, 1233.7908, 563.6896, 5905.1864, 1161.4907,
360.7843, 339.0423, 480.1637, 239.0777, 633.1523, 234.4945,
718.767, 3055.2348, 166.3659, 2442.9939, 83.4186, 34.9277,
2107.8917, 1005.7035, 1555.0351, 3974.8323, 1581.6448, 2007.2202,
258.8235, 232.1222, 709.6362, 2171.5274, 642.7278, 809.6252,
3375.694, 207.5, 254.0924, 104.6045, 562.987, 391.7669, 263.0064,
881.5567, 639.4422, 703.0556, 144.6682, 386.9094, 1405.415,
836.798, 346.1311, 193.7285, 328.6408, 4592.6158, 233.1241
), ABCA10 = c(23.2199, 9.7994, 7.0217, 19.4037, 7.3206, 17.0178,
23.8095, 48.4848, 19.5841, 23.2964, 19.0129, 34.5193, 22.5149,
35.7774, 11.9735, 24.908, 10.4439, 7.6766, 22.8971, 8.798,
23.6414, 3.7471, 8.7359, 7.7473, 9.627, 31.6927, 3.0813,
0, 1.9773, 65.7815, 52.385, 8.6366, 20, 12.7381, 5.6038,
44.1558, 68.4551, 5.4469, 154.5685, 24.4024, 7.3126, 50.3356,
8.1332, 3.2063, 3.5133, 1.1813, 105.0731, 7.2816, 692.8661,
3.1001), ABCA11P = c(58.1862, 93.9335, 27.7882, 59.1671,
38.6091, 48.9425, 65.9834, 80.1806, 87.3471, 70.9439, 118.1594,
57.7962, 116.7842, 90.6137, 58.6558, 56.6402, 24.2776, 45.5937,
83.2871, 46.6319, 82.6928, 34.8197, 51.2215, 51.9249, 68.1528,
69.2341, 36.0149, 28.6918, 36.871, 113.8443, 83.7095, 47.3288,
81.95, 62.2493, 35.7037, 119.5779, 104.7086, 51.7842, 93.5759,
133.2769, 35.0205, 100.3691, 73.993, 29.1557, 61.6516, 38.4406,
217.0087, 58.5, 347.7597, 76.5558), ABCA12 = c(11.5931, 124.3301,
89.526, 141.0317, 178.6237, 1205.0243, 120.6004, 220.0832,
99.266, 252.7007, 67.9612, 665.602, 160.1529, 25.9138, 95.2177,
123.0603, 57.8764, 22.5179, 553.0222, 752.7445, 42.5544,
530.2108, 396.8592, 471.9905, 11.432, 85.9544, 6.6761, 15.3159,
665.3485, 255.8841, 4.2589, 14.1888, 38, 2.0113, 98.6271,
84.4156, 413.9685, 66.1407, 73.3221, 33.8645, 104.4659, 83.5198,
266.8474, 352.3335, 248.8037, 1377.4365, 6.5671, 80.5825,
51.3141, 66.0312), ABCA13 = c(25.4301, 1.2249, 12.873, 0,
5.8565, 0, 2.0704, 0, 0.3495, 0.9707, 0, 1.022, 4.6729, 0.5456,
3.9912, 4.6914, 6.0923, 28.6592, 541.3796, 112.3036, 4.2554,
14.0515, 22.8797, 4.7676, 361.0108, 0.4802, 0.5135, 0, 17.301,
1.8105, 16.6099, 22.2085, 10.5, 9.386, 0.7472, 1.2987, 4.1628,
0, 2.2561, 25.3984, 1.0447, 0, 1.5492, 0.7125, 69.6267, 1.772,
11.8207, 1.9417, 41.9274, 0.31), ABCA17P = c(36.2752, 12.8617,
8.7771, 4.7326, 1.4641, 29.5786, 36.2319, 13.7849, 17.8259,
10.0304, 4.4498, 26.8285, 12.3195, 4.0917, 2.5657, 39.336,
8.2681, 9.7236, 102.4546, 17.0784, 8.9837, 3.7471, 3.328,
4.1716, 5.4152, 13.4454, 4.6219, 16.5922, 4.4488, 89.318,
19.1652, 1.2338, 42.5, 23.2415, 5.9774, 42.8571, 37.9278,
12.45, 671.1788, 46.3147, 20.1967, 18.6428, 4.6476, 8.9063,
6.3878, 25.3987, 19.0445, 23.7864, 1.2516, 12.0902), ABCA1 = c(378.7509,
1455.1891, 1033.3528, 1713.6346, 856.5154, 2420.0324, 900.0776,
6174.8782, 2993.3555, 5553.7859, 1983.3738, 3218.343, 1494.0357,
3179.1135, 3256.4806, 2258.1992, 1283.2768, 461.1054, 877.0505,
2667.3136, 670.9276, 1710.5012, 1582.4284, 1420.7211, 1433.8147,
1946.1945, 1939.1295, 1381.6209, 3498.7296, 1465.9022, 2007.5128,
917.9272, 1577.985, 3137.639, 3958.8568, 3062.8312, 1044.3663,
1159.399, 2689.1371, 1853.0378, 2365.77, 3466.0291, 1497.2657,
913.0638, 2721.4748, 2339.622, 486.6196, 2814.034, 498.1227,
1388.4926), ABCA2 = c(4267.3897, 1250.0383, 1385.6056, 1640.7951,
1060.0293, 1760.5348, 942.029, 2243.1373, 2808.8081, 1837.1763,
2047.7346, 1324.0498, 954.9703, 1930.9875, 1299.4085, 3079.0328,
1367.7111, 1968.782, 1856.6023, 2517.5151, 3804.8406, 1973.7705,
2435.6508, 1859.9523, 1364.0193, 1726.2905, 1586.8533, 479.8979,
1128.0277, 1065.178, 1576.661, 907.4645, 1307.5, 1616.4032,
1230.9704, 1818.1818, 1482.4237, 433.4158, 1600.6768, 1769.9203,
1440.585, 2171.8867, 1596.4369, 868.5429, 1585.4448, 746.013,
2508.6193, 3495.1456, 458.6984, 1278.4624), ABCA3 = c(1893.4181,
1465.013, 3856.0562, 3091.3393, 3431.918, 2689.222, 3052.795,
2074.391, 2635.7917, 1536.5886, 1269.0129, 2005.2379, 823.7043,
1402.0731, 1331.3378, 2961.7467, 3307.6588, 924.7697, 1757.6404,
2490.8624, 1433.1393, 4549.4145, 3365.3996, 2532.1812, 1309.2659,
1638.8956, 1541.1478, 1419.2725, 1534.8492, 1106.2161, 882.0273,
1491.0549, 2005, 446.2819, 1206.6872, 1635.0649, 1496.7623,
2689.9793, 1532.4309, 2035.8566, 1663.4456, 1413.8702, 570.1007,
1497.6844, 2466.3185, 3694.0343, 1451.3216, 2634.9515, 56.9462,
1292.4126), ABCA4 = c(881.825, 3.0623, 0.5851, 0.9465, 1.4641,
2.0259, 3.6232, 6.1794, 4.1943, 5.5005, 11.3269, 6.8988,
22.5149, 3.8189, 5.9867, 2.887, 0, 263.5619, 580.9644, 2.0701,
1.4185, 0.4684, 1.248, 0, 3.0084, 7.2029, 1.5406, 0.6382,
3.4602, 4.828, 9.7956, 1.2338, 2.5, 22.7946, 5.6038, 26.6234,
3.7003, 106.9922, 6.7682, 12.9482, 4.5269, 4.1014, 7.7459,
2.85, 4.1521, 0.5907, 0.6567, 9.2233, 41.3016, 7.7501), ABCA5 = c(730.7068,
355.2289, 297.835, 539.044, 169.8389, 378.0389, 890.2692,
849.4355, 726.6585, 900.4688, 439.7249, 792.3092, 627.0178,
741.3639, 399.9715, 631.1729, 644.0383, 318.3214, 267.003,
248.931, 560.3002, 385.4801, 394.3633, 402.2646, 486.7629,
650.6603, 523.3021, 232.291, 467.6223, 947.4955, 1552.8109,
475.0154, 348.5, 857.925, 399.7385, 816.8831, 1063.3673,
122.5547, 1443.2882, 478.5857, 454.4267, 679.3438, 608.4431,
327.0395, 310.4457, 282.9297, 1832.2115, 695.6311, 646.3079,
379.4466), ABCA6 = c(10.0972, 20.8238, 32.1826, 26.0293,
2.9283, 5.2674, 8.7992, 62.7451, 15.0297, 26.532, 47.7346,
17.3746, 44.6049, 56.192, 28.7934, 32.4792, 50.0435, 6.653,
219.6565, 98.5891, 7.0924, 11.2412, 18.7198, 36.3528, 4.2118,
32.1729, 3.5948, 40.2042, 7.4147, 32.589, 43.4412, 5.5521,
47, 4.0226, 66.4986, 38.3117, 18.5014, 11.2828, 43.9932,
32.3705, 44.2239, 76.4355, 23.6251, 6.4125, 439.4787, 552.2741,
42.0292, 213.1068, 20.6508, 8.0601), ABCA7 = c(142.8571,
464.2474, 815.6817, 783.7198, 1194.7291, 406.8071, 93.6853,
182.5312, 189.4443, 262.084, 198.6246, 196.4867, 104.9278,
390.3437, 84.6697, 328.4013, 358.5727, 345.957, 497.1379,
375.4666, 387.2455, 699.2974, 670.168, 538.1406, 419.3742,
1285.9544, 160.226, 49.1385, 498.2699, 106.8196, 255.5366,
302.8994, 183.5, 402.4806, 134.865, 348.0519, 221.0916, 723.6566,
329.9492, 839.1434, 90.8853, 133.1096, 60.031, 87.2818, 304.3773,
69.1081, 919.3893, 129.6117, 234.6683, 86.1815), ABCA8 = c(80.0299,
358.2912, 31.0123, 701.3725, 125.9151, 3.6467, 6.2112, 119.3108,
1.3981, 62.4472, 100.3236, 49.0578, 136.3636, 178.6688, 118.0244,
69.6499, 29.5909, 23.0297, 194.431, 147.7543, 49.174, 44.9649,
1329.1041, 1386.77, 359.8075, 36.9748, 13.3522, 33.8226,
879.387, 80.2655, 121.8058, 77.1129, 42, 17.8781, 206.2202,
5.8442, 149.8612, 16.7297, 75.0141, 14.4422, 167.4937, 105.1454,
263.749, 42.394, 83.6798, 2.3627, 283.6973, 59.2233, 150.8135,
142.9125), ABCA9 = c(8.2274, 11.6368, 17.5541, 8.992, 2.9283,
11.3452, 35.7143, 50.3862, 30.7585, 21.6786, 45.712, 28.106,
64.1461, 60.0109, 41.6221, 32.8401, 7.3977, 4.0942, 58.989,
15.5258, 15.6033, 3.2787, 1.664, 8.3433, 4.2118, 45.1381,
1.0271, 19.783, 0, 21.726, 52.385, 0, 51.5, 2.0113, 32.5021,
104.5455, 51.3414, 14.3953, 137.6199, 49.8008, 21.9378, 87.994,
13.9427, 8.5501, 63.2389, 2.3627, 51.2231, 12.6214, 15.0188,
3.1001), ABCB10 = c(429.6933, 408.5132, 263.3119, 309.0393,
207.9063, 521.8801, 893.3747, 526.6786, 503.3205, 626.0895,
475.3236, 646.4388, 565.4206, 413.257, 858.9552, 415.0126,
525.2393, 384.3398, 446.6867, 449.4731, 384.4085, 529.7424,
388.9553, 386.174, 250.3008, 412.485, 457.0548, 532.8653,
458.2304, 828.0024, 251.2777, 212.8316, 434.5, 269.7357,
502.1014, 401.9481, 508.7882, 380.8924, 271.8556, 371.0159,
690.5197, 423.5645, 488.3811, 440.684, 412.0112, 805.0797,
262.6826, 549.5146, 409.2616, 505.3088), ABCB11 = c(0, 0.6125,
0, 0, 0, 0, 0, 0, 0, 0.6471, 0, 2.8106, 0.4248, 0.8183, 0.2851,
0, 0, 0.5118, 6.9855, 0.5175, 0, 0.4684, 0, 0.5959, 0, 0,
0.5135, 0, 0.4943, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.564, 0, 0,
0.3729, 0, 0, 0, 0, 0, 0.9709, 0, 0), ABCB1 = c(83.3957,
410.9631, 1382.0948, 3798.8642, 2136.164, 6457.4554, 1570.911,
634.1058, 871.7232, 947.385, 383.8997, 779.0482, 852.1665,
1000, 2371.8908, 1003.2479, 426.4578, 100.8188, 3182.6914,
1995.8468, 107.8046, 8507.26, 3411.1591, 2440.4052, 5239.4705,
354.8619, 1671.0746, 183.1525, 1926.8413, 654.1943, 399.4889,
4606.4158, 796.5, 73.7471, 738.5822, 312.987, 518.0389, 5323.9336,
403.8353, 109.5618, 967.3544, 470.5444, 341.5957, 432.8465,
2066.1246, 2427.6432, 3770.1527, 1274.7573, 84.4806, 75.3313
), ABCB4 = c(14.5849, 39.8101, 25.7461, 62.4704, 177.1596,
97.6499, 219.9793, 21.8657, 14.3307, 49.8283, 25.89, 47.7803,
29.7366, 29.7327, 39.6265, 137.4955, 17.4064, 29.1709, 16.6877,
152.1532, 1.4185, 76.815, 233.3732, 392.1335, 320.0963, 91.2365,
97.5735, 17.2304, 42.0168, 12.6735, 9.7956, 133.868, 208,
6.4808, 41.4682, 195.4545, 6.013, 272.7329, 175.4089, 36.3546,
16.0181, 18.6428, 35.6313, 18.8814, 105.3982, 515.6527, 199.6388,
516.9903, 2.5031, 288.9251)), row.names = c("TCGA-2Z-A9J9-01A",
"TCGA-2Z-A9JO-01A", "TCGA-2Z-A9JP-01A", "TCGA-2Z-A9JR-01A", "TCGA-2Z-A9JT-01A",
"TCGA-5P-A9JU-01A", "TCGA-A3-3306-01A", "TCGA-A3-3322-01A", "TCGA-A3-3324-01A",
"TCGA-A3-3331-01A", "TCGA-A3-3349-01A", "TCGA-A3-3352-01A", "TCGA-A3-3365-01A",
"TCGA-A3-3370-01A", "TCGA-A3-3373-01A", "TCGA-A3-A6NI-01A", "TCGA-A4-7584-01A",
"TCGA-A4-7828-01A", "TCGA-A4-7915-01A", "TCGA-A4-7997-01A", "TCGA-A4-8312-01A",
"TCGA-A4-8517-01A", "TCGA-A4-8630-01A", "TCGA-A4-A4ZT-01A", "TCGA-A4-A5DU-01A",
"TCGA-AK-3426-01A", "TCGA-AK-3429-01A", "TCGA-AK-3451-01A", "TCGA-AK-3456-01A",
"TCGA-AK-3458-01A", "TCGA-AK-3465-01A", "TCGA-AL-3468-01A", "TCGA-B0-4691-01A",
"TCGA-B0-4699-01A", "TCGA-B0-4823-01A", "TCGA-B0-4843-01A", "TCGA-B0-4845-01A",
"TCGA-B0-5084-01A", "TCGA-B0-5085-01A", "TCGA-B0-5092-01A", "TCGA-B0-5119-01A",
"TCGA-B0-5120-01A", "TCGA-B0-5693-01A", "TCGA-B0-5696-01A", "TCGA-B0-5697-01A",
"TCGA-B0-5701-01A", "TCGA-B1-A47N-01A", "TCGA-B2-5633-01A", "TCGA-B3-4104-01A",
"TCGA-B4-5834-01A"), class = "data.frame")
Desired output (sample):
I have the following data frame that I'd like to aggregate all the values in the data column (a table in this nested data frame), and then mutate 6 corresponding variables in the df.test to save these values.
structure(list(Unipro.ID = c("Q13177", "Q8TD19"), Gene.name = c("PAK2",
"NEK9"), Mod.site = c("Q13177_278", "Q8TD19_81"), Mod.site2 = c("PAK2_278",
"NEK9_81"), `mod.or.not(Y/N)` = c("Y", "Y"), `kinase.or.not(Y/N)` = c("Y",
"Y"), data = list(structure(list(`First Scan` = c(18638, 18640,
18699, 18889, 18890, 19117, 19277, 19387, 19395, 19495, 19502,
19576, 19650, 19726, 19802, 19879, 19956, 20034, 20111, 20189,
20266, 20342, 20420, 20497, 20574, 20670, 20727, 20803, 20918,
20956, 21033, 21147, 21185, 21263, 21377, 21416, 21492, 21607,
21646, 21762, 21840, 21879, 21994, 22072, 22113, 22240, 22298,
22356, 22473, 22530, 22703, 22760, 22820, 22936, 23012, 23165,
23243, 23281, 23396, 23472, 23572, 23590, 23665, 23741, 23760,
23894, 23970, 23990, 24122, 24200, 24372, 24428, 24568, 24605,
24661, 24702, 24860, 25051, 25116, 25457, 25459, 25470, 25522,
25806, 26042, 26288, 26516, 27123, 27344, 27573, 27867, 28024,
28224, 29830, 30443, 32854), TMT126 = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 683.603289421257, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1058.63721546934, 5105.06075558397, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 786.877999584963, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1158.61443528477, 0, 0, 0, 0, 0, 790.552967308115,
1769.24251582031, 0, 0, 4230.5333458008, 0, 754.027149866911,
0, 0, 937.667214973757, 0, 0, 16065.4787548108, 0, 1033.79325384522,
0, 1545.17012178734, 22490.6473715619, 0, 12391.863457809, 1694.80635977683,
1669.92554702357, 2915.19207246094, 1305.76068893333, 1218.70367092158,
1724.48555844726, 1220.71902820389, 1142.29342043404, 15089.1510828024,
786.642299340822, 0, 0, 803.014098225646), TMT127N = c(0, 0,
0, 0, 0, 0, 0, 865.592199462889, 0, 0, 959.490090490725, 0, 0,
0, 3632.481907304, 0, 0, 0, 0, 0, 2281.36683418381, 0, 0, 1148.88409380685,
2282.96815473633, 0, 737.964767776176, 0, 0, 714.336386722035,
1336.78857436523, 0, 992.266982382272, 0, 8167.51299697265, 1481.83583737793,
0, 0, 0, 0, 0, 994.583403515622, 0, 3822.09582135578, 730.363546764861,
0, 0, 1601.84151123047, 0, 0, 0, 0, 1248.0926793457, 0, 2349.57136347657,
0, 925.784469760195, 0, 0, 0, 0, 0, 0, 0, 2869.82125708008, 779.87640303955,
0, 4092.33059296876, 0, 0, 0, 1133.40195924235, 0, 0, 0, 15684.5063332031,
0, 0, 0, 2583.52765913085, 24540.0170683595, 1238.62926386719,
11428.895775, 1483.56418256835, 1332.67443391114, 1825.25018671875,
1123.61925747071, 1030.31205838623, 1533.51140185547, 2153.17955595703,
1008.89100509033, 12831.9995203125, 1684.62781442872, 0, 1136.51165412598,
0), TMT128C = c(1611.91088437501, 1786.68805515069, 0, 1823.53765170799,
874.032758349609, 0, 0, 1177.50190689896, 646.466474431654, 1499.41582602539,
1255.4240426748, 1783.93500465848, 5996.15327152243, 8208.12153691405,
21348.7184882813, 33456.1981237878, 46046.4520991817, 5945.01241588586,
6798.69927773436, 18048.0166453125, 3490.51877753905, 7744.45986702618,
11529.8069519532, 3396.17701757812, 4651.27894687501, 14325.2306858641,
1561.40410561524, 0, 9592.47815625001, 2089.33504892578, 3367.76334082031,
10669.6799841273, 1439.18170268554, 0, 9310.75753007808, 1700.90959455201,
0, 6206.48464218753, 3397.72012818408, 0, 7079.64974935495, 1845.17109756826,
1177.34360229492, 6097.26335625001, 1754.74566738282, 1392.17984296875,
7026.71143068691, 2193.1615467585, 2836.98221749547, 6301.54294276034,
0, 3623.93436796874, 1312.70517940113, 0, 4724.16524276671, 0,
3946.52595689374, 2970.83282951206, 865.842519287108, 3283.01703994418,
0, 0, 1262.2417984723, 3482.3909258789, 4262.98461721732, 725.868942626953,
3139.75638657914, 6559.00482506506, 719.443235961914, 1980.26911301631,
735.412520327744, 2538.58699423828, 0, 0, 3345.73545624628, 17476.7195602775,
0, 3061.52193035407, 0, 3370.09478829905, 23476.7103351433, 1087.0128899699,
11912.6099077086, 1957.73038989257, 3158.21345361328, 3399.21823401529,
2593.01804501953, 934.40066220703, 2667.04373535038, 2390.35165488282,
1286.71902949219, 13590.0106023424, 1817.04336728435, 720.287557226125,
916.984646430313, 0), TMT129N = c(5236.22537109374, 5253.53062265625,
2574.52071173515, 6331.35667763673, 2909.4254578125, 2260.77584003907,
2299.19597588428, 2146.42980395508, 1524.82949099121, 4594.55742216798,
2945.06894956055, 3353.94465205077, 17265.8083640626, 58543.518953906,
42651.7719375001, 85288.5246984373, 147283.5420375, 16268.584838086,
31273.8547945313, 64984.9260843748, 15376.6111124999, 18807.9172359374,
41234.0440359375, 7038.37527421877, 14994.4310613281, 35007.7380726561,
5527.67209306639, 8827.66559818178, 32485.2518203124, 6771.8957270508,
10744.4285308594, 21706.4754328126, 7351.69939775389, 7286.74206159669,
31413.8810906249, 5402.31355253905, 7530.87224590352, 22346.2738640626,
7845.50157832029, 8598.84479102795, 13646.5870857421, 3948.41240126953,
4709.55193242188, 14502.506803125, 5527.37162548826, 4609.58657929687,
17346.4145701172, 3806.28572124023, 6695.23604707033, 10312.3288382812,
5030.34046109047, 12238.5130183594, 4008.11956391603, 3366.81581267473,
10163.4030287109, 5657.196356792, 9102.44150449222, 3997.46163222657,
3410.17096083986, 7459.02988359372, 2817.01574699993, 3228.9395395409,
1668.4944914795, 8344.1621935547, 6186.76663623048, 4037.21344028321,
4953.59359042969, 6163.53660527345, 2403.32564355468, 7143.26629951174,
1674.84291965332, 6149.94359912111, 2371.20030146484, 2708.46439392749,
8739.23083300782, 17708.4475347656, 2940.52380873895, 6755.28018515623,
2318.903026372, 5194.7135689453, 37003.6496414061, 1967.3172459961,
11835.060703125, 6357.79309687501, 6929.07336386721, 4922.18842499999,
5119.39548720702, 5098.90275791014, 5251.26713525389, 3925.45210825196,
3679.82248959962, 14610.4555587891, 3124.13869614258, 1892.91264873047,
2424.18607792969, 1217.02803508301), TMT130C = c(8534.51942065459,
7915.5220132809, 5427.32003829632, 6570.76122664215, 4059.66840776324,
6061.80182135341, 4680.93892861494, 3924.40329255471, 1651.02954082353,
9963.97493859979, 2095.70639501953, 6791.52962729408, 29089.1005494669,
84188.5986999335, 79102.1477485965, 111076.559352845, 228355.265937366,
31033.9549731673, 34443.9266034838, 100401.13011289, 20754.0266423186,
29334.7579023722, 70572.5366141732, 12419.2333360566, 17842.2049581652,
61518.6150214895, 10110.7438562966, 17502.4708992952, 61357.4708251869,
9995.86560411634, 17530.0854369149, 53203.79872126, 11560.0754183956,
13608.3534926174, 46828.7269257867, 11019.0036476036, 12751.4728461078,
26876.5493340097, 15817.3356058367, 13322.0303678209, 28346.2564980389,
6207.28749858749, 9509.23448457102, 36589.1961503305, 10263.6194964664,
7564.4130620443, 29223.5021208951, 7015.27604618795, 8985.64276558643,
18886.2813530505, 7104.5605795249, 17804.4468985063, 6812.45760606556,
6606.50436956103, 18488.5028791766, 5863.36827860186, 13255.0296116594,
6453.52502342659, 5515.76995189115, 13991.8058338637, 3809.92839574631,
5061.53991882257, 5991.94758913396, 11334.5748098943, 6206.71330898436,
5219.26272399663, 10963.7722168793, 7489.68230742191, 3361.11015884112,
9301.91002246388, 2802.03791217282, 10320.7319848397, 2509.93049810913,
3062.67670544402, 12115.818468418, 22596.7672476562, 5210.67744771832,
12678.5528095273, 3674.33047767757, 10158.3722420368, 38620.8279703125,
4299.66130273203, 16002.3112734375, 9597.19786319193, 10816.7672483378,
10449.5441895035, 8882.17509875803, 5726.84640941102, 8043.21098886215,
7002.83669129089, 4997.59224291582, 17214.8268832032, 4203.51644882184,
2044.40880964859, 2570.59274774035, 1621.14734118819), TMT131 = c(9418.17036445313,
8878.68274218749, 8378.77222617187, 5645.64330000002, 4753.99889121092,
4420.32600410157, 5180.99550117188, 4787.09710488281, 4537.51669335937,
11573.2264957031, 3011.21359365234, 6855.67281093751, 33135.919678125,
78639.2005781249, 89222.1922968751, 117395.8545375, 221719.7701125,
31420.3548656251, 46548.5491968752, 99822.2995687501, 21221.19759375,
31903.8316523437, 65884.2084140624, 12792.6073300781, 32494.3474710937,
55493.9358187499, 10832.2051148437, 16887.821878125, 56011.2740859373,
11016.5575394531, 15928.4518980469, 45726.2466890625, 10025.5855992188,
21737.7840796876, 59706.8427656249, 9716.2620714844, 18071.6204882813,
35495.48536875, 19939.4042976562, 13032.988340625, 32501.3787140625,
7199.69156542971, 10150.1181632812, 40466.1293437499, 10645.1514984375,
11109.5601890625, 28522.8374835938, 5834.91936972658, 6796.13434628906,
23190.480225, 9266.24059453124, 29075.5950890626, 7621.3969927734,
9322.90506210938, 22800.4644656249, 7069.82396484372, 15978.9590683593,
6530.85736874999, 6011.4814611328, 17323.3141382813, 4554.70168183593,
5415.33720234375, 6123.79340332032, 14385.484575, 10146.5210988281,
3506.75934082032, 13551.8667152343, 9222.76466718747, 5501.55972480469,
13922.8457027344, 4250.15196503907, 9017.17755468752, 3575.14872451172,
5562.49420546876, 17743.048228125, 24449.2857210939, 5704.12143984373,
15124.9334800781, 5079.61518925782, 10921.1189097656, 35210.8651640626,
4445.87090449217, 15901.6300136718, 11364.1122761719, 9222.57776601566,
8388.0708205078, 9823.76574609376, 6197.90128417968, 8764.95285703127,
9621.21499453124, 6218.11219218749, 15561.3999234375, 4677.02849882812,
3778.64337919922, 2264.6449546875, 2498.85718242187)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -96L)), structure(list(
`First Scan` = c(16668, 16732, 16751, 16894, 16948, 16970,
17120, 17177, 17210, 17367, 17403, 17406, 17633, 17638, 17833,
17917, 18052, 18090, 18149, 18285, 18338, 18376, 18397, 18504,
18561, 18605, 18642, 18662, 18702, 18753, 18755, 18792, 18932,
18983, 19022, 19210, 19238, 19249, 19438, 19586, 19669, 19707,
19742, 19898, 19937, 20040, 20131, 20172, 20302, 20361, 20405,
20613, 20633, 20790, 20962, 21004, 21148, 21187, 21382, 21454,
21455, 21585, 21726, 21729, 21966, 22000, 22197, 22267, 22585,
22789, 23382, 23725), TMT126 = c(1882.9047259105, 3449.99254746093,
1024.2977771073, 5158.15467082831, 15759.4817531249, 5163.23808916981,
23084.5250109376, 11158.1465572494, 2542.16341682018, 6236.75949960936,
2540.56668486328, 4643.57579941405, 3676.30715654298, 2933.73809824218,
0, 3710.6456292118, 4085.0294783203, 675.688557714844, 1950.23111088867,
2205.57922470703, 8296.83276855471, 2672.59435136718, 3034.29849873047,
45624.8063390625, 22890.978928125, 3676.17324550782, 804.357999226392,
2339.07756328126, 2307.76013144531, 21624.511528125, 1518.1371897772,
10635.9306925781, 2680.27191738282, 11041.5375597656, 7065.95542382811,
8761.31611523439, 1801.56362783203, 7668.99884179686, 7505.24644894218,
1129.2046734375, 5552.81345566407, 6607.18650630685, 2071.91356259765,
5463.23402109373, 5149.03205670459, 1219.86494693985, 4910.47049139314,
3155.11395608923, 1398.27528804432, 6326.38970151936, 3541.72186757812,
4198.17516679688, 1440.07868518477, 0, 3465.05218769531,
961.89374368008, 2280.46133583984, 2356.01639560548, 0, 2526.26822314453,
1853.50988980811, 1310.57137792969, 1419.80935503652, 1688.71382270508,
1531.11097573243, 1821.7144977539, 1352.28257709961, 2137.31568546879,
1951.55034804646, 1311.16770682422, 0, 1720.31448781044),
TMT127N = c(2771.68523671875, 5795.16860742187, 1532.51465844727,
4997.19921035158, 25241.5731433593, 7411.63585078126, 35598.7388882813,
11632.0612353516, 3981.85727929688, 8431.46010673831, 4482.49877431641,
6676.69583173827, 4001.42575561522, 3282.20197690431, 1573.56998100586,
3935.30607905274, 7116.32664960937, 2307.64589311523, 4102.16486528321,
4772.48413124998, 14428.697888086, 3332.28787353515, 5485.6754455078,
91312.7273437499, 37781.7556101563, 4225.51941313476, 1839.82785454101,
4019.30935473634, 3973.05389443358, 32996.0530066406, 2037.37071027832,
20023.8569953125, 3530.19987890626, 16245.3700406251, 11744.1997277344,
14524.1289123047, 2272.03076718749, 7951.62546621093, 11169.627975,
2088.36168669433, 8426.40678837892, 5546.88783515623, 825.817551295166,
11964.5538246094, 5850.03650859376, 1951.7206411377, 6372.5401716797,
2720.9217121582, 2928.83477036133, 7842.02046181641, 4470.19431181639,
5518.54197597655, 4886.56620761718, 2598.45832382813, 5150.38777236328,
1764.69598916016, 4431.15874072266, 5851.29773701172, 3535.65924873047,
4506.44162783202, 3948.30497885742, 3474.47286108398, 4899.0403397461,
3668.68391484374, 1715.68195283204, 1517.17190141602, 2988.61810048828,
3049.25398110351, 3346.69140673828, 1629.18274760742, 1652.68488032226,
3111.11825932616), TMT128C = c(3546.93682792969, 5443.04733562549,
2406.94690743869, 7409.37070198117, 24566.1728096405, 7605.80923058386,
37319.8058793624, 13554.5726592211, 4322.44607961136, 10647.2819212435,
5886.92810075651, 8753.17848761496, 6553.53110235196, 4646.29586668501,
1968.9492260545, 4154.15849160926, 7670.78133562564, 2768.92031395084,
3603.54312466775, 4079.51357784559, 16511.9767178725, 3885.32503013637,
4789.39730709742, 75477.2031668271, 39696.3689381062, 6235.92097131154,
2324.05125227432, 4163.9149784173, 5315.6094992347, 31155.5901563382,
1816.81292346028, 16308.3860112984, 4287.50902664535, 15935.5273428347,
12014.8172808907, 13939.1329255981, 3921.1247165634, 11449.7683673977,
13003.5107817882, 2150.04455439802, 10889.4027696227, 6249.74721525204,
3815.90910915446, 9774.84044695728, 5968.13675351661, 3450.75965045113,
6496.88961226217, 4185.18567482653, 3493.2378779832, 6249.25306422371,
4966.6136218745, 7318.69851904571, 4807.8110810617, 1540.76095735796,
6832.23484476677, 2965.60747444571, 4139.87531020355, 4987.87787026309,
3610.04293593895, 5899.23647575952, 5796.31857744029, 3008.6643611543,
5433.73871616973, 2796.2558469155, 2533.88330506493, 3404.51135442734,
1989.89578135028, 3963.71640004554, 1855.68417729492, 1858.78970920819,
1801.24628626465, 1521.81316450196), TMT129N = c(5603.3715375,
7466.48746787111, 5737.87805507811, 10125.7479275391, 27223.0979882812,
11989.9738019531, 44015.6930296875, 17091.8407880859, 7938.03088417966,
12459.8042378906, 7051.86322177736, 10014.4530556641, 7992.80055556639,
5937.58603710939, 3499.28743828124, 8939.26554726561, 11082.9714445312,
3852.90205781249, 4749.96272050781, 6339.30068320313, 18824.0227183594,
5960.32649501952, 4809.15115634765, 84223.8693140624, 36909.3952734375,
7050.83312226562, 2210.35349326172, 5805.73487548829, 6812.68262490236,
29434.3397519532, 2379.38804296874, 20404.2909726562, 5217.36262587891,
19563.0384855469, 13420.0282283203, 12340.821178125, 3700.34673647461,
10695.0951164062, 15392.6850773437, 2774.59824887696, 12177.5412128907,
7713.37043554689, 3815.38668457032, 11456.5398427734, 11993.45544375,
3913.54135283203, 9596.82833613281, 5349.46382695312, 3289.20360688477,
8314.06658349611, 5784.25196894531, 7882.8373013672, 5610.97252675783,
3071.51274536133, 7498.057575293, 3661.76029086913, 6717.39973330079,
6506.75830341797, 4466.70268945312, 5969.45083388673, 7538.86863662107,
3945.00036079102, 7081.96355947265, 3515.28444770509, 4725.6043602539,
4937.22756269533, 3392.35801362304, 5545.30827919922, 4755.80240244142,
3465.95602353517, 2446.60899624023, 3763.70442260743), TMT130C = c(10983.9436123188,
9211.23508829853, 10557.3906937124, 10405.446046875, 28290.6707273437,
16687.4414721715, 47013.3588375, 24663.9287051498, 10819.8059091653,
12112.6441605469, 12221.0968142757, 14143.6632554762, 8870.24921835937,
10222.1638647599, 4387.39878905819, 7903.13580132335, 12659.3879214159,
4977.95176757538, 6713.09197134976, 8619.08616316083, 25601.7183693629,
9371.93738344518, 8431.95007587771, 96391.5124218752, 44929.6342171874,
7378.82902968752, 3945.11645284669, 8071.18750848737, 7751.84701707912,
38104.5129609375, 3638.03642401144, 20403.32641875, 7808.49545553976,
23539.4361984374, 18752.01165285, 20811.394168736, 5824.4538993184,
18893.4697542139, 12674.1161636719, 2807.67847851562, 12264.333778125,
11014.4240599255, 6251.73090996096, 13488.8996818318, 15964.1844357857,
5828.08199702709, 11239.0816570313, 8253.48396721728, 4132.48828007812,
13706.3856985815, 7642.39674902346, 8508.96873105467, 8428.07027860162,
5015.03055391062, 9927.36947998573, 4157.77188427735, 6591.46973203123,
10252.0377530773, 5571.05266915471, 8466.28209717723, 9830.67984953953,
6387.25448143305, 8227.90628531475, 6429.95347392001, 5927.99930589914,
6179.43049557376, 4779.11406556715, 5853.8402419922, 5984.37583438957,
2653.83845332031, 4653.86852737383, 4421.67880346553), TMT131 = c(13383.1659585937,
10027.0432195313, 16206.8438039063, 16355.4699128906, 29514.0965624999,
32393.8092585938, 53039.4243328127, 23782.9527632812, 21228.6799054688,
15934.2437460937, 11102.0789214844, 15037.0606933593, 13194.4500703125,
9207.67683515628, 6735.74908359374, 12899.2203527343, 13469.5999195312,
6745.45593691405, 7115.74944609379, 11411.3147414063, 27412.2978679687,
11739.3367394532, 8886.12224414064, 96689.4600375001, 45453.11203125,
7980.59650781252, 6961.24482539061, 10250.2241015625, 13306.6513335937,
39924.4417171876, 6483.56406328127, 22704.3346148437, 9771.777984375,
24691.8792656249, 18723.0097054687, 16477.3002410156, 7680.46611621096,
17120.4574535157, 18478.2819375, 4914.95838925784, 14805.4535964844,
11972.9694691407, 6757.70631679689, 12590.9775105469, 19122.4749375,
7233.17976562498, 14106.4787144532, 11431.40191875, 7873.65249257815,
16542.6268992187, 8767.54336992188, 11536.5155554687, 10534.8182027344,
6824.66757714841, 13787.927071875, 7553.96639121096, 7917.68129238283,
12813.6645140625, 7735.42080351564, 7866.51996796874, 12833.8237371094,
7160.45798144533, 11867.8380820313, 7840.4843214844, 6481.4439357422,
8916.9703347656, 7413.33213457034, 12091.0962304687, 10392.2846542969,
5704.91811914064, 5982.57808242186, 6364.45633183592)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -72L))), `# of PSMs` = c(96,
72)), row.names = c(NA, -2L), groups = structure(list(Unipro.ID = c("Q13177",
"Q8TD19"), Gene.name = c("PAK2", "NEK9"), Mod.site = c("Q13177_278",
"Q8TD19_81"), Mod.site2 = c("PAK2_278", "NEK9_81"), `mod.or.not(Y/N)` = c("Y",
"Y"), `kinase.or.not(Y/N)` = c("Y", "Y"), .rows = structure(list(
1L, 2L), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr",
"list"))), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame"), .drop = TRUE), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"))
Here is the final data frame that I want (of course, the NA would be replaced by the sum values).
Besides this, how to call pivot_longer for 6 TMT columns in all nested tables?
Does anyone know how to do that? Thanks for any help!
df %>%
mutate(data = map(data, ~summarise(.x, across(where(is.numeric), sum)))) %>%
unnest_wider(data) %>%
select(-`First Scan`) %>%
ungroup()
# A tibble: 2 x 13
Unipro.ID Gene.name Mod.site Mod.site2 `mod.or.not(Y/N)` `kinase.or.not(Y/N)` TMT126
<chr> <chr> <chr> <chr> <chr> <chr> <dbl>
1 Q13177 PAK2 Q13177_278 PAK2_278 Y Y 1.00e5
2 Q8TD19 NEK9 Q8TD19_81 NEK9_81 Y Y 3.56e5
# ... with 6 more variables: TMT127N <dbl>, TMT128C <dbl>, TMT129N <dbl>,
# TMT130C <dbl>, TMT131 <dbl>, # of PSMs <dbl>
You can do summarise(across(starts_with("TMT"), sum)) after calling unnest(data):
df <- structure(list(Unipro.ID = c("Q13177", "Q8TD19"), Gene.name = c(
"PAK2",
"NEK9"
), Mod.site = c("Q13177_278", "Q8TD19_81"), Mod.site2 = c(
"PAK2_278",
"NEK9_81"
), `mod.or.not(Y/N)` = c("Y", "Y"), `kinase.or.not(Y/N)` = c(
"Y",
"Y"
), data = list(structure(list(`First Scan` = c(
18638, 18640,
18699, 18889, 18890, 19117, 19277, 19387, 19395, 19495, 19502,
19576, 19650, 19726, 19802, 19879, 19956, 20034, 20111, 20189,
20266, 20342, 20420, 20497, 20574, 20670, 20727, 20803, 20918,
20956, 21033, 21147, 21185, 21263, 21377, 21416, 21492, 21607,
21646, 21762, 21840, 21879, 21994, 22072, 22113, 22240, 22298,
22356, 22473, 22530, 22703, 22760, 22820, 22936, 23012, 23165,
23243, 23281, 23396, 23472, 23572, 23590, 23665, 23741, 23760,
23894, 23970, 23990, 24122, 24200, 24372, 24428, 24568, 24605,
24661, 24702, 24860, 25051, 25116, 25457, 25459, 25470, 25522,
25806, 26042, 26288, 26516, 27123, 27344, 27573, 27867, 28024,
28224, 29830, 30443, 32854
), TMT126 = c(
0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 683.603289421257, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1058.63721546934, 5105.06075558397, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 786.877999584963, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1158.61443528477, 0, 0, 0, 0, 0, 790.552967308115,
1769.24251582031, 0, 0, 4230.5333458008, 0, 754.027149866911,
0, 0, 937.667214973757, 0, 0, 16065.4787548108, 0, 1033.79325384522,
0, 1545.17012178734, 22490.6473715619, 0, 12391.863457809, 1694.80635977683,
1669.92554702357, 2915.19207246094, 1305.76068893333, 1218.70367092158,
1724.48555844726, 1220.71902820389, 1142.29342043404, 15089.1510828024,
786.642299340822, 0, 0, 803.014098225646
), TMT127N = c(
0, 0,
0, 0, 0, 0, 0, 865.592199462889, 0, 0, 959.490090490725, 0, 0,
0, 3632.481907304, 0, 0, 0, 0, 0, 2281.36683418381, 0, 0, 1148.88409380685,
2282.96815473633, 0, 737.964767776176, 0, 0, 714.336386722035,
1336.78857436523, 0, 992.266982382272, 0, 8167.51299697265, 1481.83583737793,
0, 0, 0, 0, 0, 994.583403515622, 0, 3822.09582135578, 730.363546764861,
0, 0, 1601.84151123047, 0, 0, 0, 0, 1248.0926793457, 0, 2349.57136347657,
0, 925.784469760195, 0, 0, 0, 0, 0, 0, 0, 2869.82125708008, 779.87640303955,
0, 4092.33059296876, 0, 0, 0, 1133.40195924235, 0, 0, 0, 15684.5063332031,
0, 0, 0, 2583.52765913085, 24540.0170683595, 1238.62926386719,
11428.895775, 1483.56418256835, 1332.67443391114, 1825.25018671875,
1123.61925747071, 1030.31205838623, 1533.51140185547, 2153.17955595703,
1008.89100509033, 12831.9995203125, 1684.62781442872, 0, 1136.51165412598,
0
), TMT128C = c(
1611.91088437501, 1786.68805515069, 0, 1823.53765170799,
874.032758349609, 0, 0, 1177.50190689896, 646.466474431654, 1499.41582602539,
1255.4240426748, 1783.93500465848, 5996.15327152243, 8208.12153691405,
21348.7184882813, 33456.1981237878, 46046.4520991817, 5945.01241588586,
6798.69927773436, 18048.0166453125, 3490.51877753905, 7744.45986702618,
11529.8069519532, 3396.17701757812, 4651.27894687501, 14325.2306858641,
1561.40410561524, 0, 9592.47815625001, 2089.33504892578, 3367.76334082031,
10669.6799841273, 1439.18170268554, 0, 9310.75753007808, 1700.90959455201,
0, 6206.48464218753, 3397.72012818408, 0, 7079.64974935495, 1845.17109756826,
1177.34360229492, 6097.26335625001, 1754.74566738282, 1392.17984296875,
7026.71143068691, 2193.1615467585, 2836.98221749547, 6301.54294276034,
0, 3623.93436796874, 1312.70517940113, 0, 4724.16524276671, 0,
3946.52595689374, 2970.83282951206, 865.842519287108, 3283.01703994418,
0, 0, 1262.2417984723, 3482.3909258789, 4262.98461721732, 725.868942626953,
3139.75638657914, 6559.00482506506, 719.443235961914, 1980.26911301631,
735.412520327744, 2538.58699423828, 0, 0, 3345.73545624628, 17476.7195602775,
0, 3061.52193035407, 0, 3370.09478829905, 23476.7103351433, 1087.0128899699,
11912.6099077086, 1957.73038989257, 3158.21345361328, 3399.21823401529,
2593.01804501953, 934.40066220703, 2667.04373535038, 2390.35165488282,
1286.71902949219, 13590.0106023424, 1817.04336728435, 720.287557226125,
916.984646430313, 0
), TMT129N = c(
5236.22537109374, 5253.53062265625,
2574.52071173515, 6331.35667763673, 2909.4254578125, 2260.77584003907,
2299.19597588428, 2146.42980395508, 1524.82949099121, 4594.55742216798,
2945.06894956055, 3353.94465205077, 17265.8083640626, 58543.518953906,
42651.7719375001, 85288.5246984373, 147283.5420375, 16268.584838086,
31273.8547945313, 64984.9260843748, 15376.6111124999, 18807.9172359374,
41234.0440359375, 7038.37527421877, 14994.4310613281, 35007.7380726561,
5527.67209306639, 8827.66559818178, 32485.2518203124, 6771.8957270508,
10744.4285308594, 21706.4754328126, 7351.69939775389, 7286.74206159669,
31413.8810906249, 5402.31355253905, 7530.87224590352, 22346.2738640626,
7845.50157832029, 8598.84479102795, 13646.5870857421, 3948.41240126953,
4709.55193242188, 14502.506803125, 5527.37162548826, 4609.58657929687,
17346.4145701172, 3806.28572124023, 6695.23604707033, 10312.3288382812,
5030.34046109047, 12238.5130183594, 4008.11956391603, 3366.81581267473,
10163.4030287109, 5657.196356792, 9102.44150449222, 3997.46163222657,
3410.17096083986, 7459.02988359372, 2817.01574699993, 3228.9395395409,
1668.4944914795, 8344.1621935547, 6186.76663623048, 4037.21344028321,
4953.59359042969, 6163.53660527345, 2403.32564355468, 7143.26629951174,
1674.84291965332, 6149.94359912111, 2371.20030146484, 2708.46439392749,
8739.23083300782, 17708.4475347656, 2940.52380873895, 6755.28018515623,
2318.903026372, 5194.7135689453, 37003.6496414061, 1967.3172459961,
11835.060703125, 6357.79309687501, 6929.07336386721, 4922.18842499999,
5119.39548720702, 5098.90275791014, 5251.26713525389, 3925.45210825196,
3679.82248959962, 14610.4555587891, 3124.13869614258, 1892.91264873047,
2424.18607792969, 1217.02803508301
), TMT130C = c(
8534.51942065459,
7915.5220132809, 5427.32003829632, 6570.76122664215, 4059.66840776324,
6061.80182135341, 4680.93892861494, 3924.40329255471, 1651.02954082353,
9963.97493859979, 2095.70639501953, 6791.52962729408, 29089.1005494669,
84188.5986999335, 79102.1477485965, 111076.559352845, 228355.265937366,
31033.9549731673, 34443.9266034838, 100401.13011289, 20754.0266423186,
29334.7579023722, 70572.5366141732, 12419.2333360566, 17842.2049581652,
61518.6150214895, 10110.7438562966, 17502.4708992952, 61357.4708251869,
9995.86560411634, 17530.0854369149, 53203.79872126, 11560.0754183956,
13608.3534926174, 46828.7269257867, 11019.0036476036, 12751.4728461078,
26876.5493340097, 15817.3356058367, 13322.0303678209, 28346.2564980389,
6207.28749858749, 9509.23448457102, 36589.1961503305, 10263.6194964664,
7564.4130620443, 29223.5021208951, 7015.27604618795, 8985.64276558643,
18886.2813530505, 7104.5605795249, 17804.4468985063, 6812.45760606556,
6606.50436956103, 18488.5028791766, 5863.36827860186, 13255.0296116594,
6453.52502342659, 5515.76995189115, 13991.8058338637, 3809.92839574631,
5061.53991882257, 5991.94758913396, 11334.5748098943, 6206.71330898436,
5219.26272399663, 10963.7722168793, 7489.68230742191, 3361.11015884112,
9301.91002246388, 2802.03791217282, 10320.7319848397, 2509.93049810913,
3062.67670544402, 12115.818468418, 22596.7672476562, 5210.67744771832,
12678.5528095273, 3674.33047767757, 10158.3722420368, 38620.8279703125,
4299.66130273203, 16002.3112734375, 9597.19786319193, 10816.7672483378,
10449.5441895035, 8882.17509875803, 5726.84640941102, 8043.21098886215,
7002.83669129089, 4997.59224291582, 17214.8268832032, 4203.51644882184,
2044.40880964859, 2570.59274774035, 1621.14734118819
), TMT131 = c(
9418.17036445313,
8878.68274218749, 8378.77222617187, 5645.64330000002, 4753.99889121092,
4420.32600410157, 5180.99550117188, 4787.09710488281, 4537.51669335937,
11573.2264957031, 3011.21359365234, 6855.67281093751, 33135.919678125,
78639.2005781249, 89222.1922968751, 117395.8545375, 221719.7701125,
31420.3548656251, 46548.5491968752, 99822.2995687501, 21221.19759375,
31903.8316523437, 65884.2084140624, 12792.6073300781, 32494.3474710937,
55493.9358187499, 10832.2051148437, 16887.821878125, 56011.2740859373,
11016.5575394531, 15928.4518980469, 45726.2466890625, 10025.5855992188,
21737.7840796876, 59706.8427656249, 9716.2620714844, 18071.6204882813,
35495.48536875, 19939.4042976562, 13032.988340625, 32501.3787140625,
7199.69156542971, 10150.1181632812, 40466.1293437499, 10645.1514984375,
11109.5601890625, 28522.8374835938, 5834.91936972658, 6796.13434628906,
23190.480225, 9266.24059453124, 29075.5950890626, 7621.3969927734,
9322.90506210938, 22800.4644656249, 7069.82396484372, 15978.9590683593,
6530.85736874999, 6011.4814611328, 17323.3141382813, 4554.70168183593,
5415.33720234375, 6123.79340332032, 14385.484575, 10146.5210988281,
3506.75934082032, 13551.8667152343, 9222.76466718747, 5501.55972480469,
13922.8457027344, 4250.15196503907, 9017.17755468752, 3575.14872451172,
5562.49420546876, 17743.048228125, 24449.2857210939, 5704.12143984373,
15124.9334800781, 5079.61518925782, 10921.1189097656, 35210.8651640626,
4445.87090449217, 15901.6300136718, 11364.1122761719, 9222.57776601566,
8388.0708205078, 9823.76574609376, 6197.90128417968, 8764.95285703127,
9621.21499453124, 6218.11219218749, 15561.3999234375, 4677.02849882812,
3778.64337919922, 2264.6449546875, 2498.85718242187
)), class = c(
"tbl_df",
"tbl", "data.frame"
), row.names = c(NA, -96L)), structure(list(
`First Scan` = c(
16668, 16732, 16751, 16894, 16948, 16970,
17120, 17177, 17210, 17367, 17403, 17406, 17633, 17638, 17833,
17917, 18052, 18090, 18149, 18285, 18338, 18376, 18397, 18504,
18561, 18605, 18642, 18662, 18702, 18753, 18755, 18792, 18932,
18983, 19022, 19210, 19238, 19249, 19438, 19586, 19669, 19707,
19742, 19898, 19937, 20040, 20131, 20172, 20302, 20361, 20405,
20613, 20633, 20790, 20962, 21004, 21148, 21187, 21382, 21454,
21455, 21585, 21726, 21729, 21966, 22000, 22197, 22267, 22585,
22789, 23382, 23725
), TMT126 = c(
1882.9047259105, 3449.99254746093,
1024.2977771073, 5158.15467082831, 15759.4817531249, 5163.23808916981,
23084.5250109376, 11158.1465572494, 2542.16341682018, 6236.75949960936,
2540.56668486328, 4643.57579941405, 3676.30715654298, 2933.73809824218,
0, 3710.6456292118, 4085.0294783203, 675.688557714844, 1950.23111088867,
2205.57922470703, 8296.83276855471, 2672.59435136718, 3034.29849873047,
45624.8063390625, 22890.978928125, 3676.17324550782, 804.357999226392,
2339.07756328126, 2307.76013144531, 21624.511528125, 1518.1371897772,
10635.9306925781, 2680.27191738282, 11041.5375597656, 7065.95542382811,
8761.31611523439, 1801.56362783203, 7668.99884179686, 7505.24644894218,
1129.2046734375, 5552.81345566407, 6607.18650630685, 2071.91356259765,
5463.23402109373, 5149.03205670459, 1219.86494693985, 4910.47049139314,
3155.11395608923, 1398.27528804432, 6326.38970151936, 3541.72186757812,
4198.17516679688, 1440.07868518477, 0, 3465.05218769531,
961.89374368008, 2280.46133583984, 2356.01639560548, 0, 2526.26822314453,
1853.50988980811, 1310.57137792969, 1419.80935503652, 1688.71382270508,
1531.11097573243, 1821.7144977539, 1352.28257709961, 2137.31568546879,
1951.55034804646, 1311.16770682422, 0, 1720.31448781044
),
TMT127N = c(
2771.68523671875, 5795.16860742187, 1532.51465844727,
4997.19921035158, 25241.5731433593, 7411.63585078126, 35598.7388882813,
11632.0612353516, 3981.85727929688, 8431.46010673831, 4482.49877431641,
6676.69583173827, 4001.42575561522, 3282.20197690431, 1573.56998100586,
3935.30607905274, 7116.32664960937, 2307.64589311523, 4102.16486528321,
4772.48413124998, 14428.697888086, 3332.28787353515, 5485.6754455078,
91312.7273437499, 37781.7556101563, 4225.51941313476, 1839.82785454101,
4019.30935473634, 3973.05389443358, 32996.0530066406, 2037.37071027832,
20023.8569953125, 3530.19987890626, 16245.3700406251, 11744.1997277344,
14524.1289123047, 2272.03076718749, 7951.62546621093, 11169.627975,
2088.36168669433, 8426.40678837892, 5546.88783515623, 825.817551295166,
11964.5538246094, 5850.03650859376, 1951.7206411377, 6372.5401716797,
2720.9217121582, 2928.83477036133, 7842.02046181641, 4470.19431181639,
5518.54197597655, 4886.56620761718, 2598.45832382813, 5150.38777236328,
1764.69598916016, 4431.15874072266, 5851.29773701172, 3535.65924873047,
4506.44162783202, 3948.30497885742, 3474.47286108398, 4899.0403397461,
3668.68391484374, 1715.68195283204, 1517.17190141602, 2988.61810048828,
3049.25398110351, 3346.69140673828, 1629.18274760742, 1652.68488032226,
3111.11825932616
), TMT128C = c(
3546.93682792969, 5443.04733562549,
2406.94690743869, 7409.37070198117, 24566.1728096405, 7605.80923058386,
37319.8058793624, 13554.5726592211, 4322.44607961136, 10647.2819212435,
5886.92810075651, 8753.17848761496, 6553.53110235196, 4646.29586668501,
1968.9492260545, 4154.15849160926, 7670.78133562564, 2768.92031395084,
3603.54312466775, 4079.51357784559, 16511.9767178725, 3885.32503013637,
4789.39730709742, 75477.2031668271, 39696.3689381062, 6235.92097131154,
2324.05125227432, 4163.9149784173, 5315.6094992347, 31155.5901563382,
1816.81292346028, 16308.3860112984, 4287.50902664535, 15935.5273428347,
12014.8172808907, 13939.1329255981, 3921.1247165634, 11449.7683673977,
13003.5107817882, 2150.04455439802, 10889.4027696227, 6249.74721525204,
3815.90910915446, 9774.84044695728, 5968.13675351661, 3450.75965045113,
6496.88961226217, 4185.18567482653, 3493.2378779832, 6249.25306422371,
4966.6136218745, 7318.69851904571, 4807.8110810617, 1540.76095735796,
6832.23484476677, 2965.60747444571, 4139.87531020355, 4987.87787026309,
3610.04293593895, 5899.23647575952, 5796.31857744029, 3008.6643611543,
5433.73871616973, 2796.2558469155, 2533.88330506493, 3404.51135442734,
1989.89578135028, 3963.71640004554, 1855.68417729492, 1858.78970920819,
1801.24628626465, 1521.81316450196
), TMT129N = c(
5603.3715375,
7466.48746787111, 5737.87805507811, 10125.7479275391, 27223.0979882812,
11989.9738019531, 44015.6930296875, 17091.8407880859, 7938.03088417966,
12459.8042378906, 7051.86322177736, 10014.4530556641, 7992.80055556639,
5937.58603710939, 3499.28743828124, 8939.26554726561, 11082.9714445312,
3852.90205781249, 4749.96272050781, 6339.30068320313, 18824.0227183594,
5960.32649501952, 4809.15115634765, 84223.8693140624, 36909.3952734375,
7050.83312226562, 2210.35349326172, 5805.73487548829, 6812.68262490236,
29434.3397519532, 2379.38804296874, 20404.2909726562, 5217.36262587891,
19563.0384855469, 13420.0282283203, 12340.821178125, 3700.34673647461,
10695.0951164062, 15392.6850773437, 2774.59824887696, 12177.5412128907,
7713.37043554689, 3815.38668457032, 11456.5398427734, 11993.45544375,
3913.54135283203, 9596.82833613281, 5349.46382695312, 3289.20360688477,
8314.06658349611, 5784.25196894531, 7882.8373013672, 5610.97252675783,
3071.51274536133, 7498.057575293, 3661.76029086913, 6717.39973330079,
6506.75830341797, 4466.70268945312, 5969.45083388673, 7538.86863662107,
3945.00036079102, 7081.96355947265, 3515.28444770509, 4725.6043602539,
4937.22756269533, 3392.35801362304, 5545.30827919922, 4755.80240244142,
3465.95602353517, 2446.60899624023, 3763.70442260743
), TMT130C = c(
10983.9436123188,
9211.23508829853, 10557.3906937124, 10405.446046875, 28290.6707273437,
16687.4414721715, 47013.3588375, 24663.9287051498, 10819.8059091653,
12112.6441605469, 12221.0968142757, 14143.6632554762, 8870.24921835937,
10222.1638647599, 4387.39878905819, 7903.13580132335, 12659.3879214159,
4977.95176757538, 6713.09197134976, 8619.08616316083, 25601.7183693629,
9371.93738344518, 8431.95007587771, 96391.5124218752, 44929.6342171874,
7378.82902968752, 3945.11645284669, 8071.18750848737, 7751.84701707912,
38104.5129609375, 3638.03642401144, 20403.32641875, 7808.49545553976,
23539.4361984374, 18752.01165285, 20811.394168736, 5824.4538993184,
18893.4697542139, 12674.1161636719, 2807.67847851562, 12264.333778125,
11014.4240599255, 6251.73090996096, 13488.8996818318, 15964.1844357857,
5828.08199702709, 11239.0816570313, 8253.48396721728, 4132.48828007812,
13706.3856985815, 7642.39674902346, 8508.96873105467, 8428.07027860162,
5015.03055391062, 9927.36947998573, 4157.77188427735, 6591.46973203123,
10252.0377530773, 5571.05266915471, 8466.28209717723, 9830.67984953953,
6387.25448143305, 8227.90628531475, 6429.95347392001, 5927.99930589914,
6179.43049557376, 4779.11406556715, 5853.8402419922, 5984.37583438957,
2653.83845332031, 4653.86852737383, 4421.67880346553
), TMT131 = c(
13383.1659585937,
10027.0432195313, 16206.8438039063, 16355.4699128906, 29514.0965624999,
32393.8092585938, 53039.4243328127, 23782.9527632812, 21228.6799054688,
15934.2437460937, 11102.0789214844, 15037.0606933593, 13194.4500703125,
9207.67683515628, 6735.74908359374, 12899.2203527343, 13469.5999195312,
6745.45593691405, 7115.74944609379, 11411.3147414063, 27412.2978679687,
11739.3367394532, 8886.12224414064, 96689.4600375001, 45453.11203125,
7980.59650781252, 6961.24482539061, 10250.2241015625, 13306.6513335937,
39924.4417171876, 6483.56406328127, 22704.3346148437, 9771.777984375,
24691.8792656249, 18723.0097054687, 16477.3002410156, 7680.46611621096,
17120.4574535157, 18478.2819375, 4914.95838925784, 14805.4535964844,
11972.9694691407, 6757.70631679689, 12590.9775105469, 19122.4749375,
7233.17976562498, 14106.4787144532, 11431.40191875, 7873.65249257815,
16542.6268992187, 8767.54336992188, 11536.5155554687, 10534.8182027344,
6824.66757714841, 13787.927071875, 7553.96639121096, 7917.68129238283,
12813.6645140625, 7735.42080351564, 7866.51996796874, 12833.8237371094,
7160.45798144533, 11867.8380820313, 7840.4843214844, 6481.4439357422,
8916.9703347656, 7413.33213457034, 12091.0962304687, 10392.2846542969,
5704.91811914064, 5982.57808242186, 6364.45633183592
)
), class = c(
"tbl_df",
"tbl", "data.frame"
), row.names = c(NA, -72L))), `# of PSMs` = c(
96,
72
)), row.names = c(NA, -2L), groups = structure(list(Unipro.ID = c(
"Q13177",
"Q8TD19"
), Gene.name = c("PAK2", "NEK9"), Mod.site = c(
"Q13177_278",
"Q8TD19_81"
), Mod.site2 = c("PAK2_278", "NEK9_81"), `mod.or.not(Y/N)` = c(
"Y",
"Y"
), `kinase.or.not(Y/N)` = c("Y", "Y"), .rows = structure(list(
1L, 2L
), ptype = integer(0), class = c(
"vctrs_list_of", "vctrs_vctr",
"list"
))), row.names = c(NA, -2L), class = c(
"tbl_df", "tbl",
"data.frame"
), .drop = TRUE), class = c(
"grouped_df", "tbl_df",
"tbl", "data.frame"
))
library(tidyverse)
df %>%
unnest(data) %>%
summarise(across(starts_with("TMT"), sum)) %>%
pivot_longer(starts_with("TMT")) %>%
# select only a few columns for optimized printing
select(Unipro.ID, Gene.name, Mod.site, Mod.site2, name, value)
#> `summarise()` has grouped output by 'Unipro.ID', 'Gene.name', 'Mod.site', 'Mod.site2', 'mod.or.not(Y/N)'. You can override using the `.groups` argument.
#> Adding missing grouping variables: `mod.or.not(Y/N)`
#> # A tibble: 12 × 7
#> # Groups: Unipro.ID, Gene.name, Mod.site, Mod.site2, mod.or.not(Y/N) [2]
#> `mod.or.not(Y/N)` Unipro.ID Gene.name Mod.site Mod.site2 name value
#> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
#> 1 Y Q13177 PAK2 Q13177_278 PAK2_278 TMT126 100372.
#> 2 Y Q13177 PAK2 Q13177_278 PAK2_278 TMT127N 127769.
#> 3 Y Q13177 PAK2 Q13177_278 PAK2_278 TMT128C 435779.
#> 4 Y Q13177 PAK2 Q13177_278 PAK2_278 TMT129N 1189693.
#> 5 Y Q13177 PAK2 Q13177_278 PAK2_278 TMT130C 1869744.
#> 6 Y Q13177 PAK2 Q13177_278 PAK2_278 TMT131 2008380.
#> 7 Y Q8TD19 NEK9 Q8TD19_81 NEK9_81 TMT126 355683.
#> 8 Y Q8TD19 NEK9 Q8TD19_81 NEK9_81 TMT127N 572770.
#> 9 Y Q8TD19 NEK9 Q8TD19_81 NEK9_81 TMT128C 594897.
#> 10 Y Q8TD19 NEK9 Q8TD19_81 NEK9_81 TMT129N 716943.
#> 11 Y Q8TD19 NEK9 Q8TD19_81 NEK9_81 TMT130C 904627.
#> 12 Y Q8TD19 NEK9 Q8TD19_81 NEK9_81 TMT131 1075259.
Created on 2021-09-19 by the reprex package (v2.0.1)
I am trying to merge / join two objects together, one an xts object and the other a list. I want to keep the data as an xts object (so merge the second data with the first data I post below). Basically, the Data 2 consists of predicted probabilities from the Data 1.
I have tried creating Data 2 as a data.frame and unlisting it and then merging it by = "index(data)" but it fails.
The dimensions of the data are the following:
dim(dat)
212 4
prob_predictions <- as.data.frame(unlist(probs))
dim(prob_predictions)
191 1
I am also trying subset(index(df1) %in% index(df2))
Data 1:
dat <- structure(c(0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0,
1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1,
0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0,
0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1,
1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,
1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1,
0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1,
0, 1, 1, 1, 1, 1, 1157.4779907, 1161.2739868, 1165.064978, 1162.5039794,
1152.5029784, 1143.5659789, 1131.9999755, 1115.114978, 1101.3089843,
1088.9449828, 1077.7859863, 1067.7619873, 1059.9439942, 1058.2339967,
1062.8999879, 1065.9739869, 1071.7789918, 1084.3059937, 1094.9029908,
1101.5380006, 1106.801001, 1106.7830079, 1105.7230103, 1105.3360108,
1104.5960206, 1104.4260255, 1106.363025, 1109.688025, 1111.763025,
1113.7510255, 1118.2270265, 1126.2330201, 1131.9140137, 1132.8030029,
1133.0679931, 1131.1919921, 1123.4999877, 1109.6529845, 1098.5239806,
1085.2169738, 1070.7239746, 1058.9449829, 1046.018982, 1037.3779847,
1030.1209901, 1023.8139955, 1019.6099977, 1018.9979982, 1016.8410036,
1018.3280031, 1021.1230043, 1020.8710024, 1024.0220033, 1030.0970094,
1034.7910035, 1040.7799927, 1047.371991, 1052.5719849, 1051.4059814,
1051.5269836, 1052.2799865, 1052.3579894, 1050.2929931, 1046.6079956,
1041.8380005, 1035.4400025, 1032.9650025, 1031.6990113, 1035.0920167,
1041.2500184, 1047.0030091, 1053.8240052, 1062.1109986, 1066.3029907,
1072.0419922, 1077.5289917, 1079.3439941, 1081.8229858, 1083.4049804,
1083.0979735, 1081.2649779, 1079.0049803, 1075.0169798, 1073.8739867,
1074.1959837, 1078.2869871, 1085.5799925, 1091.5880003, 1098.3030028,
1102.7200072, 1106.8830077, 1112.3160033, 1120.2160033, 1126.9150023,
1133.6280028, 1136.9040038, 1140.320996, 1143.1609985, 1146.4569946,
1149.8369995, 1153.297998, 1152.7800049, 1150.6940064, 1147.6130005,
1143.8229981, 1140.1619995, 1135.5619995, 1129.0449951, 1124.4880005,
1122.7390015, 1122.5960084, 1125.3989991, 1128.9430054, 1136.8930054,
1144.3530029, 1151.173999, 1158.3080078, 1167.6070068, 1173.8760009,
1178.3499999, 1183.494995, 1193.018994, 1203.9989867, 1212.4839843,
1217.4519897, 1221.0399902, 1222.8859863, 1225.2989868, 1229.2179931,
1233.0979858, 1235.0249878, 1234.4389893, 1232.6299927, 1230.7069947,
1230.6179932, 1232.1449952, 1234.6289918, 1234.0659913, 1232.0999879,
1229.8249879, 1228.1249879, 1224.0649903, 1220.2369874, 1215.8649903,
1214.1689942, 1214.8499878, 1213.7549926, 1217.246997, 1220.5099975,
1222.2329955, 1221.1559935, 1219.641992, 1216.0529905, 1211.9979856,
1206.3969847, 1199.9509886, 1193.1179808, 1185.7209715, 1179.0619749,
1172.8479857, 1169.2699828, 1167.7309814, 1169.2739868, 1169.3999878,
1170.2729858, 1171.0019897, 1172.7689941, 1174.7, 1176.7939942,
1180.7199952, 1184.6089966, 1187.7949951, 1185.9269897, 1185.0529907,
1182.6129883, 1178.0299805, 1168.1029786, 1156.5709717, 1148.2319702,
1137.9259643, 1130.0429687, 1121.3169677, 1113.2949707, 1107.2059692,
1102.4249755, 1098.911975, 1095.860974, 1097.485974, 1093.6249755,
1086.4079772, 1077.9009704, 1074.0089783, 1072.2119812, 1068.344989,
1062.2379822, 1057.449994, 1061.7179994, 1060.4010072, 1059.8690125,
1061.7240113, 1061.7080201, 1058.3970215, 1057.8680176, 1058.2380127,
1056.2290161, 1053.2240112, 1047.6460082, 1041.7940063, 1040.0410034,
1040.6190063, 1045.6369994, 1050.1010009, 1128.81199335, 1132.72894074524,
1136.05951315045, 1133.75860942184, 1126.33398461976, 1121.97836475121,
1114.98804010824, 1104.18156200269, 1097.85760647863, 1093.48449548066,
1089.54311267298, 1087.65328775174, 1087.83107177539, 1088.49478389202,
1089.82480075944, 1091.87386411569, 1093.27921086657, 1096.47071830785,
1100.97350704044, 1102.6227005604, 1102.82339384036, 1099.6516439508,
1097.67720586025, 1097.0346199688, 1096.8465665432, 1098.06499020575,
1100.72546732901, 1106.37447415482, 1111.91023852103, 1114.41117237617,
1117.75201214987, 1120.7832448975, 1122.20674347869, 1120.07466752834,
1117.94469547802, 1115.36710590868, 1109.05404401262, 1100.7222309638,
1096.19725287201, 1087.52132174134, 1079.62024328978, 1075.06498573838,
1068.53212719186, 1063.28239822121, 1059.64979029538, 1056.61743493392,
1051.89577236878, 1048.42474757175, 1046.82620161254, 1044.26846536373,
1043.14861247194, 1041.82684176033, 1041.46047397363, 1044.57471778567,
1047.19426428227, 1051.05194873158, 1053.13842609047, 1054.50142846281,
1051.21367146635, 1048.35332113622, 1047.56157998039, 1045.89381512512,
1043.17345339892, 1042.61503488473, 1040.8783653719, 1039.24423257458,
1040.09811147224, 1041.49734266536, 1042.67950374485, 1046.49669481677,
1051.36081397707, 1055.8274040745, 1060.05336092454, 1061.8797055984,
1063.77402125569, 1065.18506361229, 1065.29696088731, 1066.65724613614,
1066.94988745651, 1068.16322588922, 1069.21815580453, 1069.83166801363,
1068.92578972661, 1068.81857632408, 1070.35871095988, 1075.03883372561,
1081.15799613269, 1086.72961878672, 1091.50584604513, 1094.58719261226,
1097.09031664919, 1100.22361887307, 1103.94707859945, 1106.8845033995,
1111.19264545669, 1115.10382303224, 1120.66155045774, 1125.17569412844,
1129.42943430668, 1132.1180628489, 1134.34300733948, 1133.43510749763,
1132.00890306928, 1129.33948182459, 1127.89952841272, 1126.73290894484,
1126.80215199772, 1124.52480561698, 1124.50054032013, 1125.99287400392,
1128.66498590831, 1130.96736496466, 1133.15142772993, 1137.94462318423,
1142.78989202382, 1146.70132945013, 1151.6631122644, 1155.87424490588,
1158.8347892958, 1161.3181459343, 1165.5259415596, 1173.38822864916,
1181.98934506353, 1190.21226039081, 1194.81109273454, 1197.18527342649,
1199.09715310016, 1201.08885375729, 1203.47563187564, 1205.40271083986,
1207.24721647416, 1210.57795500043, 1213.91433880992, 1217.26535187564,
1219.20293598272, 1220.70837160341, 1222.74566726023, 1221.94893752116,
1220.47665680486, 1218.61792387106, 1217.58479016906, 1216.06433348629,
1215.23248801141, 1214.29415629603, 1214.89947702975, 1217.46333121739,
1218.76682576811, 1221.6747517902, 1223.33620352446, 1222.84608328404,
1220.3845515427, 1217.15554472911, 1212.80167770729, 1208.2329423066,
1204.08123494406, 1201.53635399701, 1197.84907704491, 1195.70439885016,
1193.49731600729, 1189.93090962564, 1187.19653451844, 1185.66257561192,
1185.77756793459, 1183.90255822654, 1182.89945696687, 1183.06617763669,
1182.8208264332, 1183.94646343956, 1184.8534641596, 1185.84933033488,
1187.20748792203, 1188.70677011993, 1186.75278639422, 1183.95251873763,
1180.62084752452, 1176.63980928409, 1167.55220563799, 1159.14913329151,
1154.47587831137, 1148.54960418648, 1145.95250178776, 1143.07035314131,
1137.82269769928, 1133.88338944221, 1130.76687940009, 1128.18812336199,
1120.80925075608, 1118.40550744598, 1113.93545635589, 1104.9968430839,
1098.44571145686, 1096.38135988954, 1093.86884942387, 1090.43277224064,
1085.63821926534, 1082.79744209722, 1083.80625856415, 1083.6723314628,
1082.00354027587, 1077.87272739245, 1073.8896151646, 1071.01060743464,
1070.41054586943, 1069.56096911996, 1064.84087682282, 1061.11888950636,
1058.87994622004, 1055.5466184848, 1054.88694005768, 1053.88913948076,
1056.96921953021, 1059.95310805114, 77.1228859956622, 81.0362538530292,
78.8404654349793, 46.4728298378735, 33.7103494024937, 38.1634534707235,
33.5520386736078, 26.2429467891094, 30.5979953728327, 30.5979953728327,
31.2223518673486, 33.7665461425831, 36.6962580582319, 37.7398082531122,
40.5860776927095, 41.0627097257687, 40.7556533339627, 52.526559398101,
67.2093345204357, 57.3558861837519, 61.809628052695, 65.0522479908148,
60.3356537763659, 59.9025026642582, 60.6951031882524, 60.0950548232381,
59.3846485649388, 64.6199416069941, 64.1051430716001, 55.6515339908006,
58.7835089189351, 55.0890845598537, 48.1838706704649, 46.0064642542491,
48.4030879681908, 55.5793562399467, 43.3339041496164, 35.5089178322478,
42.157901440901, 32.5975281088021, 28.6602735068277, 26.9110067493817,
23.5372731683978, 27.6575715257538, 27.7636741048428, 28.4241344813052,
27.7437779358905, 33.8748748481366, 38.0173561927228, 37.3614293051309,
46.7027642395441, 51.6960358269122, 46.2684476430283, 67.9712504992444,
67.4307596718059, 65.3539239654913, 69.3859268680975, 65.8884694613497,
48.7463489665683, 48.3776103610145, 58.1513743683333, 53.5784372311078,
46.4319595892114, 54.1515204375632, 48.0571628692748, 48.6571396623733,
52.2995925118996, 44.9774509790143, 45.2591195805464, 48.7943143049565,
56.0044804919092, 57.6982718090011, 75.947686211121, 66.6475291255686,
63.2031704734223, 66.0494138822722, 66.2641524590373, 64.6800962380417,
66.0941051628946, 68.6330617447997, 62.298871330898, 58.4734193157287,
52.329016147723, 43.5650542408412, 44.6973713488007, 56.9666746925596,
61.477502601121, 70.1850582389349, 68.3785649248245, 64.1672444920065,
68.1060250901431, 67.2130080618559, 73.8468747118516, 69.6113702464934,
73.1570958144156, 74.8830412236628, 85.4049570826199, 81.7882678868151,
79.8159292966814, 65.9053697697576, 57.9091367119927, 44.4025529377091,
43.2388424796772, 42.7803356293289, 47.7057738515549, 44.7755737074884,
45.7557906780512, 40.016244653124, 41.4992896665767, 46.6336286507843,
44.3657650232027, 45.4718259236287, 45.2372613787558, 56.9881807801438,
58.8717301068573, 68.2039283244873, 73.5215112680329, 78.8594307629251,
73.0335410836162, 71.845824268758, 73.323376014074, 89.1748677280385,
88.8275948061702, 88.079358554904, 72.9197089804835, 66.5774741060939,
65.5905607795046, 60.3560855296636, 60.5351059532554, 61.4085229097936,
58.076745639994, 63.2173375817626, 67.2733875032827, 68.7459719049055,
59.9037653356146, 44.6491666372171, 40.4929666577831, 30.2655738215587,
36.0522832244009, 40.7505784647263, 45.517250253278, 41.5835266382263,
41.3526668380199, 41.539756712543, 48.3189167794286, 49.8415866657383,
44.5858982397584, 50.0675010891207, 50.5139938354098, 44.9097955003298,
37.4247186375495, 41.3952548987526, 39.6467050713014, 39.3953595896288,
36.8289128008105, 42.8772642627352, 37.5760511024063, 42.0791664435174,
36.4236440580649, 25.1434697637668, 29.0666072154372, 25.3668839063101,
34.1040319281821, 34.1351918720353, 42.138526061446, 49.3942545777117,
53.2282422165058, 60.0907410718325, 59.6946479180297, 56.5126081396889,
64.5584522103826, 61.6638469740838, 48.5567687748239, 50.4491176695018,
45.8595330253583, 39.1134283844586, 22.2017732449298, 24.6509068125481,
33.7409449463083, 27.0354908046699, 36.9033514343542, 31.849732552439,
28.384694400023, 30.2843907497844, 30.2566110685775, 30.1702095862,
28.1229085893699, 39.7891005017724, 37.8236546439287, 33.4844836408483,
42.9231744072258, 49.6425369989148, 43.9761986844232, 44.7318583977582,
37.1424843378588, 40.8120228103859, 50.807226927847, 47.9214803669887,
44.995279725301, 41.3197867616665, 47.7401787161256, 40.9599257198947,
48.8101085201251, 58.7773921954413, 46.8976151314924, 38.7370234461344,
43.0052200556536, 42.7247275761847, 51.7764243779359, 47.5063348907638,
48.4623219235214, 51.3175593621287), class = c("xts", "zoo"), .indexCLASS = "Date", .indexTZ = "UTC", tclass = "Date", tzone = "UTC", src = "yahoo", updated = structure(1545418313.79923, class = c("POSIXct",
"POSIXt")), index = structure(c(1517356800, 1517443200, 1517529600,
1517788800, 1517875200, 1517961600, 1518048000, 1518134400, 1518393600,
1518480000, 1518566400, 1518652800, 1518739200, 1519084800, 1519171200,
1519257600, 1519344000, 1519603200, 1519689600, 1519776000, 1519862400,
1519948800, 1520208000, 1520294400, 1520380800, 1520467200, 1520553600,
1520812800, 1520899200, 1520985600, 1521072000, 1521158400, 1521417600,
1521504000, 1521590400, 1521676800, 1521763200, 1522022400, 1522108800,
1522195200, 1522281600, 1522627200, 1522713600, 1522800000, 1522886400,
1522972800, 1523232000, 1523318400, 1523404800, 1523491200, 1523577600,
1523836800, 1523923200, 1524009600, 1524096000, 1524182400, 1524441600,
1524528000, 1524614400, 1524700800, 1524787200, 1525046400, 1525132800,
1525219200, 1525305600, 1525392000, 1525651200, 1525737600, 1525824000,
1525910400, 1525996800, 1526256000, 1526342400, 1526428800, 1526515200,
1526601600, 1526860800, 1526947200, 1527033600, 1527120000, 1527206400,
1527552000, 1527638400, 1527724800, 1527811200, 1528070400, 1528156800,
1528243200, 1528329600, 1528416000, 1528675200, 1528761600, 1528848000,
1528934400, 1529020800, 1529280000, 1529366400, 1529452800, 1529539200,
1529625600, 1529884800, 1529971200, 1530057600, 1530144000, 1530230400,
1530489600, 1530576000, 1530748800, 1530835200, 1531094400, 1531180800,
1531267200, 1531353600, 1531440000, 1531699200, 1531785600, 1531872000,
1531958400, 1532044800, 1532304000, 1532390400, 1532476800, 1532563200,
1532649600, 1532908800, 1532995200, 1533081600, 1533168000, 1533254400,
1533513600, 1533600000, 1533686400, 1533772800, 1533859200, 1534118400,
1534204800, 1534291200, 1534377600, 1534464000, 1534723200, 1534809600,
1534896000, 1534982400, 1535068800, 1535328000, 1535414400, 1535500800,
1535587200, 1535673600, 1536019200, 1536105600, 1536192000, 1536278400,
1536537600, 1536624000, 1536710400, 1536796800, 1536883200, 1537142400,
1537228800, 1537315200, 1537401600, 1537488000, 1537747200, 1537833600,
1537920000, 1538006400, 1538092800, 1538352000, 1538438400, 1538524800,
1538611200, 1538697600, 1538956800, 1539043200, 1539129600, 1539216000,
1539302400, 1539561600, 1539648000, 1539734400, 1539820800, 1539907200,
1540166400, 1540252800, 1540339200, 1540425600, 1540512000, 1540771200,
1540857600, 1540944000, 1541030400, 1541116800, 1541376000, 1541462400,
1541548800, 1541635200, 1541721600, 1541980800, 1542067200, 1542153600,
1542240000, 1542326400, 1542585600, 1542672000, 1542758400, 1542931200,
1543190400, 1543276800, 1543363200, 1543449600, 1543536000), tzone = "UTC", tclass = "Date"), .Dim = c(212L,
4L), .Dimnames = list(NULL, c("y", "x1", "x2", "x3")))
Data 2:
probs <- list(c(`2018-03-02` = 0.420560689116128), c(`2018-03-05` = 0.711907085203791),
c(`2018-03-06` = 0.751215601254221), c(`2018-03-07` = 0.547750838805509),
c(`2018-03-08` = 0.593534104939348), c(`2018-03-09` = 0.527119339702333),
c(`2018-03-12` = 0.0723935802517959), c(`2018-03-13` = 0.147386150528317),
c(`2018-03-14` = 0.0110208387470279), c(`2018-03-15` = 0.448885780391426),
c(`2018-03-16` = 0.390256452877606), c(`2018-03-19` = 0.455819375756108),
c(`2018-03-20` = 0.373370265212439), c(`2018-03-21` = 0.285897356504093),
c(`2018-03-22` = 0.28174860433147), c(`2018-03-23` = 0.411629206184693),
c(`2018-03-26` = 0.602595990820247), c(`2018-03-27` = 0.914769125894976),
c(`2018-03-28` = 0.567802770307713), c(`2018-03-29` = 1.06192517431111),
c(`2018-04-02` = 1.26133164688967), c(`2018-04-03` = 0.765505424480004),
c(`2018-04-04` = 0.599705106418312), c(`2018-04-05` = 0.783480703900658),
c(`2018-04-06` = 0.58925226449577), c(`2018-04-09` = 0.581888802979559),
c(`2018-04-10` = 0.69731544035042), c(`2018-04-11` = 0.811834990407441),
c(`2018-04-12` = 0.40358830120227), c(`2018-04-13` = 0.732777147245629),
c(`2018-04-16` = 0.351131289433891), c(`2018-04-17` = 0.858564500915946),
c(`2018-04-18` = 0.398843971991605), c(`2018-04-19` = 0.232313329163781),
c(`2018-04-20` = 0.511838387056659), c(`2018-04-23` = 0.470527532699864),
c(`2018-04-24` = 0.75300602307451), c(`2018-04-25` = 1.7150087789751),
c(`2018-04-26` = 0.71773864194899), c(`2018-04-27` = 0.718150438655544),
c(`2018-04-30` = 0.60025651434085), c(`2018-05-01` = 0.516209030172394),
c(`2018-05-02` = 0.597683826516939), c(`2018-05-03` = 0.602186621767186),
c(`2018-05-04` = 0.837921470849356), c(`2018-05-07` = 0.801165076649731),
c(`2018-05-08` = 0.795649126872325), c(`2018-05-09` = 0.669657050275692),
c(`2018-05-10` = 0.432442105016062), c(`2018-05-11` = 0.241267377515004),
c(`2018-05-14` = 0.188621015982501), c(`2018-05-15` = -0.18303573453157),
c(`2018-05-16` = 0.152752056639494), c(`2018-05-17` = 0.514420711650191),
c(`2018-05-18` = 0.156339542628494), c(`2018-05-21` = 0.49960357405928),
c(`2018-05-22` = 0.711680804214282), c(`2018-05-23` = 0.460762316943894),
c(`2018-05-24` = 0.550255124643012), c(`2018-05-25` = 0.988972200664625),
c(`2018-05-29` = 0.846476234446601), c(`2018-05-30` = 0.50919230317936),
c(`2018-05-31` = 0.723166021404194), c(`2018-06-01` = 0.835744277153273),
c(`2018-06-04` = 0.525528654060598), c(`2018-06-05` = 0.620015291063136),
c(`2018-06-06` = 0.176251554900159), c(`2018-06-07` = 0.136995698782173),
c(`2018-06-08` = 0.0522471325520983), c(`2018-06-11` = 0.191934977257687),
c(`2018-06-12` = 0.429006006600039), c(`2018-06-13` = 0.559913925676721),
c(`2018-06-14` = 0.672077559523321), c(`2018-06-15` = 0.76032956322575),
c(`2018-06-18` = 0.92544891831305), c(`2018-06-19` = 0.464788295902493),
c(`2018-06-20` = 0.726125381878034), c(`2018-06-21` = 0.68716814788161),
c(`2018-06-22` = 1.05616788651542), c(`2018-06-25` = 0.868015147566888),
c(`2018-06-26` = 0.644906830154394), c(`2018-06-27` = 0.208974693095105),
c(`2018-06-28` = 0.138107192015791), c(`2018-06-29` = 0.166994363352344),
c(`2018-07-02` = -0.0251122167094578), c(`2018-07-03` = -0.177699586946273),
c(`2018-07-05` = -0.326862202836793), c(`2018-07-06` = 0.0201775156053099),
c(`2018-07-09` = 0.426881152297873), c(`2018-07-10` = 0.744979059597089),
c(`2018-07-11` = 0.41243241951448), c(`2018-07-12` = 0.502955033278997),
c(`2018-07-13` = 0.354841947278703), c(`2018-07-16` = 0.139218821204331),
c(`2018-07-17` = -0.202178270120416), c(`2018-07-18` = 0.128150204047404),
c(`2018-07-19` = -0.0686370513411129), c(`2018-07-20` = -0.0431244526756323),
c(`2018-07-23` = -0.0752612046047743), c(`2018-07-24` = 0.341756550136681),
c(`2018-07-25` = -0.0195230327845111), c(`2018-07-26` = 0.330832699288839),
c(`2018-07-27` = 0.828860453502682), c(`2018-07-30` = 0.709201233874251),
c(`2018-07-31` = 0.140975580247175), c(`2018-08-01` = 0.0294266713325949),
c(`2018-08-02` = -0.0309860327622304), c(`2018-08-03` = 0.328432490391903),
c(`2018-08-06` = 0.246261721432661), c(`2018-08-07` = 0.155023310480069),
c(`2018-08-08` = 0.264199818076836), c(`2018-08-09` = 0.619737926626806),
c(`2018-08-10` = 0.192071322493496), c(`2018-08-13` = 0.0642317026042935),
c(`2018-08-14` = -0.0489313867619856), c(`2018-08-15` = 0.44527540734492),
c(`2018-08-16` = 0.250841276758185), c(`2018-08-17` = 0.166886285921369),
c(`2018-08-20` = 0.14501608322908), c(`2018-08-21` = 0.299868775235037),
c(`2018-08-22` = 0.220950088232105), c(`2018-08-23` = 0.556141681401253),
c(`2018-08-24` = 0.332399640608902), c(`2018-08-27` = 0.31601373354441),
c(`2018-08-28` = 0.536228568950966), c(`2018-08-29` = 0.461926306171486),
c(`2018-08-30` = 0.454570840347704), c(`2018-08-31` = 0.226220781244086),
c(`2018-09-04` = 0.285296899508341), c(`2018-09-05` = 0.444687467396326),
c(`2018-09-06` = 0.576303992437837), c(`2018-09-07` = 0.904209619267055),
c(`2018-09-10` = 1.22770238231023), c(`2018-09-11` = 0.936841682214141),
c(`2018-09-12` = 1.05849630327662), c(`2018-09-13` = 0.637323180199244),
c(`2018-09-14` = 0.932850919631093), c(`2018-09-17` = 0.541733974001656),
c(`2018-09-18` = 0.36306729369803), c(`2018-09-19` = 0.642546689499425),
c(`2018-09-20` = 0.916481643939073), c(`2018-09-21` = 0.614426300203428),
c(`2018-09-24` = 0.508766547811766), c(`2018-09-25` = 0.338267252107242),
c(`2018-09-26` = 0.455102005496381), c(`2018-09-27` = 0.0598758629606586),
c(`2018-09-28` = 0.291185862573928), c(`2018-10-01` = 0.579664627999074),
c(`2018-10-02` = 0.543265160634182), c(`2018-10-03` = 0.518432853545448),
c(`2018-10-04` = 0.541374575635435), c(`2018-10-05` = 1.12461397204987),
c(`2018-10-08` = 0.701101617497031), c(`2018-10-09` = 0.610361538453297),
c(`2018-10-10` = 0.393709447767468), c(`2018-10-11` = 0.326895135027112),
c(`2018-10-12` = 0.9286505914526), c(`2018-10-15` = 1.56529837818139),
c(`2018-10-16` = 0.91095701538889), c(`2018-10-17` = 0.953883434361834),
c(`2018-10-18` = 0.653974845171448), c(`2018-10-19` = 0.417176906768297),
c(`2018-10-22` = 0.586281560385281), c(`2018-10-23` = 0.381443970694156),
c(`2018-10-24` = 0.510316238211796), c(`2018-10-25` = 0.100366157783568),
c(`2018-10-26` = -0.421064599732561), c(`2018-10-29` = 0.656960441730497),
c(`2018-10-30` = 0.494031922279178), c(`2018-10-31` = 0.597524731270041),
c(`2018-11-01` = 0.674525019989957), c(`2018-11-02` = 0.589723916825039),
c(`2018-11-05` = 0.438332501211451), c(`2018-11-06` = 0.699001705555669),
c(`2018-11-07` = 0.63181540579542), c(`2018-11-08` = 0.149239387764848),
c(`2018-11-09` = 0.272625539714475), c(`2018-11-12` = 0.524676229501674),
c(`2018-11-13` = 0.956295144895798), c(`2018-11-14` = 0.386944846002602),
c(`2018-11-15` = 0.582054913107624), c(`2018-11-16` = 0.263083233959702),
c(`2018-11-19` = -0.055491287398159), c(`2018-11-20` = 0.700720198889711),
c(`2018-11-21` = 1.24722134579426), c(`2018-11-23` = 0.88799587539072),
c(`2018-11-26` = 0.61561373514364), c(`2018-11-27` = 0.228747410170771),
c(`2018-11-28` = 0.710493869896566), c(`2018-11-29` = 0.639454784579899),
c(`2018-11-30` = 0.553917730992259))
The following works. You either unlist or create a named vector of probs. Then you create an xts object of this, but you need to use as.Date on the names of the probs (or rownames from the data.frame) otherwise you try to merge a character value with the date values of the dat xts.
library(xts)
# creates a named vector
vals_probs <- sapply(probs, `[`)
# also works
# vals <- data.frame(unlist(probs))
# need to set the names as dates otherwise they are characters
probs_xts <- xts(vals, as.Date(names(vals)))
tail(merge(dat, probs_xts))
y x1 x2 x3 probs_xts
2018-11-23 0 1047.646 1058.880 43.00522 0.8879959
2018-11-26 1 1041.794 1055.547 42.72473 0.6156137
2018-11-27 1 1040.041 1054.887 51.77642 0.2287474
2018-11-28 1 1040.619 1053.889 47.50633 0.7104939
2018-11-29 1 1045.637 1056.969 48.46232 0.6394548
2018-11-30 1 1050.101 1059.953 51.31756 0.5539177
I have some time series data called dat and what I am trying to do is to split it into training and test on a rolling basis.
Say we have 100 days in total, I want to train the model on the first 20 days and test on the next 10 days (so using 30 days for train & test). Then move from day 2 until day 22 (training on 20 days) and then test on the next 10 days (22 - 32). Then do the same but begin on day 3 and train until day 23, and test on the next 10 observations until 33. Keep going until the final model begins on day 70 and trains until 90, tests on the last 10 observations.
I am trying to make it so that the number of days an change, i.e. the total days can be 1000, 1250, 87 etc.
I have a function which trains a logistic model on some data but the data expands as the days increase but its not exactly what I am after.
If I can créate the different training and tests splits then using the rollapply function might give the results I am after.
EDIT: I am not sure if it would be better/ or interesting to train on the first 20 days and then test just on the next 1 day instead of 10 days.
Code:
myfun <- function(model_len, dat, ...){
dat <- data.frame(dat)
names(dat) <- c("y", "x1", "x2", "x3")
fit <- glm(formula, data=dat[(1:model_len),])
predict(fit, dat[(model_len + 1),])
}
sapply(1:50, myfun, dat=dat)
Data:
dat <- structure(c(0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0,
1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1,
0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0,
0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1,
1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,
1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1,
1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1,
0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1,
0, 1, 1, 1, 1, 1, 1157.4779907, 1161.2739868, 1165.064978, 1162.5039794,
1152.5029784, 1143.5659789, 1131.9999755, 1115.114978, 1101.3089843,
1088.9449828, 1077.7859863, 1067.7619873, 1059.9439942, 1058.2339967,
1062.8999879, 1065.9739869, 1071.7789918, 1084.3059937, 1094.9029908,
1101.5380006, 1106.801001, 1106.7830079, 1105.7230103, 1105.3360108,
1104.5960206, 1104.4260255, 1106.363025, 1109.688025, 1111.763025,
1113.7510255, 1118.2270265, 1126.2330201, 1131.9140137, 1132.8030029,
1133.0679931, 1131.1919921, 1123.4999877, 1109.6529845, 1098.5239806,
1085.2169738, 1070.7239746, 1058.9449829, 1046.018982, 1037.3779847,
1030.1209901, 1023.8139955, 1019.6099977, 1018.9979982, 1016.8410036,
1018.3280031, 1021.1230043, 1020.8710024, 1024.0220033, 1030.0970094,
1034.7910035, 1040.7799927, 1047.371991, 1052.5719849, 1051.4059814,
1051.5269836, 1052.2799865, 1052.3579894, 1050.2929931, 1046.6079956,
1041.8380005, 1035.4400025, 1032.9650025, 1031.6990113, 1035.0920167,
1041.2500184, 1047.0030091, 1053.8240052, 1062.1109986, 1066.3029907,
1072.0419922, 1077.5289917, 1079.3439941, 1081.8229858, 1083.4049804,
1083.0979735, 1081.2649779, 1079.0049803, 1075.0169798, 1073.8739867,
1074.1959837, 1078.2869871, 1085.5799925, 1091.5880003, 1098.3030028,
1102.7200072, 1106.8830077, 1112.3160033, 1120.2160033, 1126.9150023,
1133.6280028, 1136.9040038, 1140.320996, 1143.1609985, 1146.4569946,
1149.8369995, 1153.297998, 1152.7800049, 1150.6940064, 1147.6130005,
1143.8229981, 1140.1619995, 1135.5619995, 1129.0449951, 1124.4880005,
1122.7390015, 1122.5960084, 1125.3989991, 1128.9430054, 1136.8930054,
1144.3530029, 1151.173999, 1158.3080078, 1167.6070068, 1173.8760009,
1178.3499999, 1183.494995, 1193.018994, 1203.9989867, 1212.4839843,
1217.4519897, 1221.0399902, 1222.8859863, 1225.2989868, 1229.2179931,
1233.0979858, 1235.0249878, 1234.4389893, 1232.6299927, 1230.7069947,
1230.6179932, 1232.1449952, 1234.6289918, 1234.0659913, 1232.0999879,
1229.8249879, 1228.1249879, 1224.0649903, 1220.2369874, 1215.8649903,
1214.1689942, 1214.8499878, 1213.7549926, 1217.246997, 1220.5099975,
1222.2329955, 1221.1559935, 1219.641992, 1216.0529905, 1211.9979856,
1206.3969847, 1199.9509886, 1193.1179808, 1185.7209715, 1179.0619749,
1172.8479857, 1169.2699828, 1167.7309814, 1169.2739868, 1169.3999878,
1170.2729858, 1171.0019897, 1172.7689941, 1174.7, 1176.7939942,
1180.7199952, 1184.6089966, 1187.7949951, 1185.9269897, 1185.0529907,
1182.6129883, 1178.0299805, 1168.1029786, 1156.5709717, 1148.2319702,
1137.9259643, 1130.0429687, 1121.3169677, 1113.2949707, 1107.2059692,
1102.4249755, 1098.911975, 1095.860974, 1097.485974, 1093.6249755,
1086.4079772, 1077.9009704, 1074.0089783, 1072.2119812, 1068.344989,
1062.2379822, 1057.449994, 1061.7179994, 1060.4010072, 1059.8690125,
1061.7240113, 1061.7080201, 1058.3970215, 1057.8680176, 1058.2380127,
1056.2290161, 1053.2240112, 1047.6460082, 1041.7940063, 1040.0410034,
1040.6190063, 1045.6369994, 1050.1010009, 1128.81199335, 1132.72894074524,
1136.05951315045, 1133.75860942184, 1126.33398461976, 1121.97836475121,
1114.98804010824, 1104.18156200269, 1097.85760647863, 1093.48449548066,
1089.54311267298, 1087.65328775174, 1087.83107177539, 1088.49478389202,
1089.82480075944, 1091.87386411569, 1093.27921086657, 1096.47071830785,
1100.97350704044, 1102.6227005604, 1102.82339384036, 1099.6516439508,
1097.67720586025, 1097.0346199688, 1096.8465665432, 1098.06499020575,
1100.72546732901, 1106.37447415482, 1111.91023852103, 1114.41117237617,
1117.75201214987, 1120.7832448975, 1122.20674347869, 1120.07466752834,
1117.94469547802, 1115.36710590868, 1109.05404401262, 1100.7222309638,
1096.19725287201, 1087.52132174134, 1079.62024328978, 1075.06498573838,
1068.53212719186, 1063.28239822121, 1059.64979029538, 1056.61743493392,
1051.89577236878, 1048.42474757175, 1046.82620161254, 1044.26846536373,
1043.14861247194, 1041.82684176033, 1041.46047397363, 1044.57471778567,
1047.19426428227, 1051.05194873158, 1053.13842609047, 1054.50142846281,
1051.21367146635, 1048.35332113622, 1047.56157998039, 1045.89381512512,
1043.17345339892, 1042.61503488473, 1040.8783653719, 1039.24423257458,
1040.09811147224, 1041.49734266536, 1042.67950374485, 1046.49669481677,
1051.36081397707, 1055.8274040745, 1060.05336092454, 1061.8797055984,
1063.77402125569, 1065.18506361229, 1065.29696088731, 1066.65724613614,
1066.94988745651, 1068.16322588922, 1069.21815580453, 1069.83166801363,
1068.92578972661, 1068.81857632408, 1070.35871095988, 1075.03883372561,
1081.15799613269, 1086.72961878672, 1091.50584604513, 1094.58719261226,
1097.09031664919, 1100.22361887307, 1103.94707859945, 1106.8845033995,
1111.19264545669, 1115.10382303224, 1120.66155045774, 1125.17569412844,
1129.42943430668, 1132.1180628489, 1134.34300733948, 1133.43510749763,
1132.00890306928, 1129.33948182459, 1127.89952841272, 1126.73290894484,
1126.80215199772, 1124.52480561698, 1124.50054032013, 1125.99287400392,
1128.66498590831, 1130.96736496466, 1133.15142772993, 1137.94462318423,
1142.78989202382, 1146.70132945013, 1151.6631122644, 1155.87424490588,
1158.8347892958, 1161.3181459343, 1165.5259415596, 1173.38822864916,
1181.98934506353, 1190.21226039081, 1194.81109273454, 1197.18527342649,
1199.09715310016, 1201.08885375729, 1203.47563187564, 1205.40271083986,
1207.24721647416, 1210.57795500043, 1213.91433880992, 1217.26535187564,
1219.20293598272, 1220.70837160341, 1222.74566726023, 1221.94893752116,
1220.47665680486, 1218.61792387106, 1217.58479016906, 1216.06433348629,
1215.23248801141, 1214.29415629603, 1214.89947702975, 1217.46333121739,
1218.76682576811, 1221.6747517902, 1223.33620352446, 1222.84608328404,
1220.3845515427, 1217.15554472911, 1212.80167770729, 1208.2329423066,
1204.08123494406, 1201.53635399701, 1197.84907704491, 1195.70439885016,
1193.49731600729, 1189.93090962564, 1187.19653451844, 1185.66257561192,
1185.77756793459, 1183.90255822654, 1182.89945696687, 1183.06617763669,
1182.8208264332, 1183.94646343956, 1184.8534641596, 1185.84933033488,
1187.20748792203, 1188.70677011993, 1186.75278639422, 1183.95251873763,
1180.62084752452, 1176.63980928409, 1167.55220563799, 1159.14913329151,
1154.47587831137, 1148.54960418648, 1145.95250178776, 1143.07035314131,
1137.82269769928, 1133.88338944221, 1130.76687940009, 1128.18812336199,
1120.80925075608, 1118.40550744598, 1113.93545635589, 1104.9968430839,
1098.44571145686, 1096.38135988954, 1093.86884942387, 1090.43277224064,
1085.63821926534, 1082.79744209722, 1083.80625856415, 1083.6723314628,
1082.00354027587, 1077.87272739245, 1073.8896151646, 1071.01060743464,
1070.41054586943, 1069.56096911996, 1064.84087682282, 1061.11888950636,
1058.87994622004, 1055.5466184848, 1054.88694005768, 1053.88913948076,
1056.96921953021, 1059.95310805114, 77.1228859956622, 81.0362538530292,
78.8404654349793, 46.4728298378735, 33.7103494024937, 38.1634534707235,
33.5520386736078, 26.2429467891094, 30.5979953728327, 30.5979953728327,
31.2223518673486, 33.7665461425831, 36.6962580582319, 37.7398082531122,
40.5860776927095, 41.0627097257687, 40.7556533339627, 52.526559398101,
67.2093345204357, 57.3558861837519, 61.809628052695, 65.0522479908148,
60.3356537763659, 59.9025026642582, 60.6951031882524, 60.0950548232381,
59.3846485649388, 64.6199416069941, 64.1051430716001, 55.6515339908006,
58.7835089189351, 55.0890845598537, 48.1838706704649, 46.0064642542491,
48.4030879681908, 55.5793562399467, 43.3339041496164, 35.5089178322478,
42.157901440901, 32.5975281088021, 28.6602735068277, 26.9110067493817,
23.5372731683978, 27.6575715257538, 27.7636741048428, 28.4241344813052,
27.7437779358905, 33.8748748481366, 38.0173561927228, 37.3614293051309,
46.7027642395441, 51.6960358269122, 46.2684476430283, 67.9712504992444,
67.4307596718059, 65.3539239654913, 69.3859268680975, 65.8884694613497,
48.7463489665683, 48.3776103610145, 58.1513743683333, 53.5784372311078,
46.4319595892114, 54.1515204375632, 48.0571628692748, 48.6571396623733,
52.2995925118996, 44.9774509790143, 45.2591195805464, 48.7943143049565,
56.0044804919092, 57.6982718090011, 75.947686211121, 66.6475291255686,
63.2031704734223, 66.0494138822722, 66.2641524590373, 64.6800962380417,
66.0941051628946, 68.6330617447997, 62.298871330898, 58.4734193157287,
52.329016147723, 43.5650542408412, 44.6973713488007, 56.9666746925596,
61.477502601121, 70.1850582389349, 68.3785649248245, 64.1672444920065,
68.1060250901431, 67.2130080618559, 73.8468747118516, 69.6113702464934,
73.1570958144156, 74.8830412236628, 85.4049570826199, 81.7882678868151,
79.8159292966814, 65.9053697697576, 57.9091367119927, 44.4025529377091,
43.2388424796772, 42.7803356293289, 47.7057738515549, 44.7755737074884,
45.7557906780512, 40.016244653124, 41.4992896665767, 46.6336286507843,
44.3657650232027, 45.4718259236287, 45.2372613787558, 56.9881807801438,
58.8717301068573, 68.2039283244873, 73.5215112680329, 78.8594307629251,
73.0335410836162, 71.845824268758, 73.323376014074, 89.1748677280385,
88.8275948061702, 88.079358554904, 72.9197089804835, 66.5774741060939,
65.5905607795046, 60.3560855296636, 60.5351059532554, 61.4085229097936,
58.076745639994, 63.2173375817626, 67.2733875032827, 68.7459719049055,
59.9037653356146, 44.6491666372171, 40.4929666577831, 30.2655738215587,
36.0522832244009, 40.7505784647263, 45.517250253278, 41.5835266382263,
41.3526668380199, 41.539756712543, 48.3189167794286, 49.8415866657383,
44.5858982397584, 50.0675010891207, 50.5139938354098, 44.9097955003298,
37.4247186375495, 41.3952548987526, 39.6467050713014, 39.3953595896288,
36.8289128008105, 42.8772642627352, 37.5760511024063, 42.0791664435174,
36.4236440580649, 25.1434697637668, 29.0666072154372, 25.3668839063101,
34.1040319281821, 34.1351918720353, 42.138526061446, 49.3942545777117,
53.2282422165058, 60.0907410718325, 59.6946479180297, 56.5126081396889,
64.5584522103826, 61.6638469740838, 48.5567687748239, 50.4491176695018,
45.8595330253583, 39.1134283844586, 22.2017732449298, 24.6509068125481,
33.7409449463083, 27.0354908046699, 36.9033514343542, 31.849732552439,
28.384694400023, 30.2843907497844, 30.2566110685775, 30.1702095862,
28.1229085893699, 39.7891005017724, 37.8236546439287, 33.4844836408483,
42.9231744072258, 49.6425369989148, 43.9761986844232, 44.7318583977582,
37.1424843378588, 40.8120228103859, 50.807226927847, 47.9214803669887,
44.995279725301, 41.3197867616665, 47.7401787161256, 40.9599257198947,
48.8101085201251, 58.7773921954413, 46.8976151314924, 38.7370234461344,
43.0052200556536, 42.7247275761847, 51.7764243779359, 47.5063348907638,
48.4623219235214, 51.3175593621287), class = c("xts", "zoo"), .indexCLASS = "Date", .indexTZ = "UTC", tclass = "Date", tzone = "UTC", src = "yahoo", updated = structure(1544977543.47594, class = c("POSIXct",
"POSIXt")), index = structure(c(1517356800, 1517443200, 1517529600,
1517788800, 1517875200, 1517961600, 1518048000, 1518134400, 1518393600,
1518480000, 1518566400, 1518652800, 1518739200, 1519084800, 1519171200,
1519257600, 1519344000, 1519603200, 1519689600, 1519776000, 1519862400,
1519948800, 1520208000, 1520294400, 1520380800, 1520467200, 1520553600,
1520812800, 1520899200, 1520985600, 1521072000, 1521158400, 1521417600,
1521504000, 1521590400, 1521676800, 1521763200, 1522022400, 1522108800,
1522195200, 1522281600, 1522627200, 1522713600, 1522800000, 1522886400,
1522972800, 1523232000, 1523318400, 1523404800, 1523491200, 1523577600,
1523836800, 1523923200, 1524009600, 1524096000, 1524182400, 1524441600,
1524528000, 1524614400, 1524700800, 1524787200, 1525046400, 1525132800,
1525219200, 1525305600, 1525392000, 1525651200, 1525737600, 1525824000,
1525910400, 1525996800, 1526256000, 1526342400, 1526428800, 1526515200,
1526601600, 1526860800, 1526947200, 1527033600, 1527120000, 1527206400,
1527552000, 1527638400, 1527724800, 1527811200, 1528070400, 1528156800,
1528243200, 1528329600, 1528416000, 1528675200, 1528761600, 1528848000,
1528934400, 1529020800, 1529280000, 1529366400, 1529452800, 1529539200,
1529625600, 1529884800, 1529971200, 1530057600, 1530144000, 1530230400,
1530489600, 1530576000, 1530748800, 1530835200, 1531094400, 1531180800,
1531267200, 1531353600, 1531440000, 1531699200, 1531785600, 1531872000,
1531958400, 1532044800, 1532304000, 1532390400, 1532476800, 1532563200,
1532649600, 1532908800, 1532995200, 1533081600, 1533168000, 1533254400,
1533513600, 1533600000, 1533686400, 1533772800, 1533859200, 1534118400,
1534204800, 1534291200, 1534377600, 1534464000, 1534723200, 1534809600,
1534896000, 1534982400, 1535068800, 1535328000, 1535414400, 1535500800,
1535587200, 1535673600, 1536019200, 1536105600, 1536192000, 1536278400,
1536537600, 1536624000, 1536710400, 1536796800, 1536883200, 1537142400,
1537228800, 1537315200, 1537401600, 1537488000, 1537747200, 1537833600,
1537920000, 1538006400, 1538092800, 1538352000, 1538438400, 1538524800,
1538611200, 1538697600, 1538956800, 1539043200, 1539129600, 1539216000,
1539302400, 1539561600, 1539648000, 1539734400, 1539820800, 1539907200,
1540166400, 1540252800, 1540339200, 1540425600, 1540512000, 1540771200,
1540857600, 1540944000, 1541030400, 1541116800, 1541376000, 1541462400,
1541548800, 1541635200, 1541721600, 1541980800, 1542067200, 1542153600,
1542240000, 1542326400, 1542585600, 1542672000, 1542758400, 1542931200,
1543190400, 1543276800, 1543363200, 1543449600, 1543536000), tzone = "UTC", tclass = "Date"), .Dim = c(212L,
4L), .Dimnames = list(NULL, c("y", "x1", "x2", "x3")))
EDIT:
Just so I undestand the function output a little.
I set:
, n_train = 5
, n_test = 1
and get the following final 3 outputs:
[[203]]
2018-11-16 2018-11-19 2018-11-20 2018-11-21 2018-11-23 2018-11-26
1.00045650 0.08862828 0.61874897 1.00620776 0.67800147 0.60795702
[[204]]
2018-11-19 2018-11-20 2018-11-21 2018-11-23 2018-11-26 2018-11-27
0.05759443 0.69372082 0.93025186 0.72564291 0.60694731 0.98584268
[[205]]
2018-11-20 2018-11-21 2018-11-23 2018-11-26 2018-11-27 2018-11-28
0.8507988 0.8028078 0.7412901 0.6416496 0.9538837 1.0095700
Are these the predicted probabilities of the event happening? How can we have 1.0095700 as one of the probabilities?
Secondly since n train = 5 and n test = 1, the last output tells me that the first 5 results are the predicted probabilities on the training data and the 6th results is the predicted on the test data, i.e. data 2018-11-28 = 1.0095700
?, the same being for result 204, 2018-11-27 = 0.98584268.
I am not sure how you intend to use such a function, but you can wrap some of the code in an extra function, where you compute the training and testing indexes. For example, like so
myfun <- function(fm, dat, train_index, test_index){
fit <- glm(fm, data=dat[train_index, ])
predict(fit, newdata = dat[test_index, ], type = 'response')
}
wrapper_myfun <- function(
dat
, n_train = 20
, n_test = 10
){
stopifnot('y' %in% names(dat))
f_ <- formula(paste0('y~', paste(setdiff(names(dat), 'y'), collapse = ' + ')))
stride <- n_train + n_test
start_position <- seq(1, dim(dat)[1] - stride)
train_index_list <- lapply(start_position
, function(i) seq(i, i + n_train))
test_index_list <- lapply(start_position
, function(i) seq((i + n_train + 1)
, (i + n_train + n_test)))
mapply(
myfun
, train_index = train_index_list
, test_index = test_index_list
, MoreArgs = list(fm = f_, dat = dat)
, SIMPLIFY = F
)
}
You can further optimize this code.
Choosing between 1 and 10 time periods for test purposes depends on application quite a bit.
HTH
I have a dataset that consists of rows of 896 SNP in a genome. Then I have a probability value of each SNP coming from a specific founder. The founders are grouped in groups of 8.
I want to check for each SNP probability if the maximum value has a tie, so check for the highest value in each group of 8 founders and see if it is repeated. I am only interested in 2-way ties. Then I want to get the value of the tie, the SNP in which it appears and which founders have it.
The problem is that I have the data given to me in a way that all founder groups are in columns repeated next to each other with a layout like this :
SNP P1_1 P1_2 P1_3 P1_4 P1_5 P1_6 P1_7 P1_8 ... Pn_1 Pn_2 Pn_3 Pn_4 Pn_5 Pn_6 Pn7_ Pn8
Asd
FDA
FDG
GDE
GBD
SDF
I have worked it out by using loops, but in total for my dataset it takes 17 hours(!!!) to go through it, 896 SNPs and 600+ groups of 8 founders each.
Is there anyway to make it more efficient ? I was thinking something like melt() and having the final dataset in groups of 8 columns ? Would that be possible and even so would it make it more efficient ?
Here is what I have worked out so far:
require(reshape2)
A4 <- read.csv("sample.csv", header=TRUE)
SNP_count <-length(A4[,1])
A5 <- melt(A4)
#Give single index from 1-8 to each FOUNDER
A5[,4]<-rep(rep(seq(1,8,by=1),each=SNP_count),times=length(A5[,1])/SNP_count/8)
#Give a groupind number to each row so we know in which FOUNDER group it belongs
A5[,5]<-rep(seq(1,length(A5[,1])/SNP_count/8,by=1),each=SNP_count*8)
A5[,6]<-rep(seq(1,SNP_count))
colnames(A5) <- c("SNP","FOUNDER","PROB","FOUNDER_ID","GROUP_ID", "SNP_ID")
maxes<-NULL
x=1
i=1
for (x in 1:length(unique(A5$GROUP_ID))) {
for (i in 1:length(unique(A5$SNP_ID))) {
e<-NULL
e <- A5$PROB[A5$GROUP_ID==x & A5$SNP_ID==i] #save the rows of values here for each founder group
if(length(which(e == max(e))) == 2){
len <- length(which(e == max(e)))
max1 <- max(e)
founder <- as.character(A5[,2][A5$GROUP_ID==x & A5$SNP_ID==i])[1]
SNP_INFO <- as.character(A5[,1][A5$GROUP_ID==x & A5$SNP_ID==i][1])
ties <- length(which(e == max(e)))
tie1 <- which(e==max(e))[1]
tie2 <- which(e==max(e))[2]
maxes <- rbind(maxes, c(e, SNP_INFO, ties, tie1, tie2, max1, founder))
print(e)
}
}
}
From testing it seems that the line that makes it really slow is this :
e<-A5$PROB[A5$GROUP_ID==x & A5$SNP_ID==i]
Any help from anyone here would be amazingly appreciated.
dput results given below
structure(list(SNP = structure(c(571L, 690L, 7L, 204L, 317L,
39L, 559L, 657L, 221L, 470L, 576L, 49L, 413L, 460L, 606L, 348L,
111L, 129L, 30L, 290L, 291L, 767L, 96L, 172L, 38L, 48L, 54L,
144L, 273L, 644L), .Label = c("BobWhite_c11298_512", "BobWhite_c11935_137",
"BobWhite_c12428_371", "BobWhite_c1740_97", "BobWhite_c17852_511",
"BobWhite_c17879_519", "BobWhite_c18256_105", "BobWhite_c18408_199",
"BobWhite_c18593_955", "BobWhite_c1895_1953", "BobWhite_c21397_148",
"BobWhite_c28950_147", "BobWhite_c29419_116", "BobWhite_c30009_285",
"BobWhite_c30232_154", "BobWhite_c31500_144", "BobWhite_c34548_96",
"BobWhite_c34866_232", "BobWhite_c35093_176", "BobWhite_c35303_192",
"BobWhite_c42102_237", "BobWhite_c43213_184", "BobWhite_c43681_334",
"BobWhite_c45534_535", "BobWhite_c46361_331", "BobWhite_c47722_613",
"BobWhite_c5337_225", "BobWhite_c8340_511", "BobWhite_c9704_273",
"BobWhite_c9961_402", "BobWhite_c9992_862a", "BobWhite_rep_c49102_169",
"BobWhite_rep_c55551_53", "BobWhite_s65081_93", "BobWhite_s67516_159",
"BS00000445_51", "BS00001478_51", "BS00003733_51", "BS00003932_51",
"BS00003935_51", "BS00003956_51", "BS00003964_51", "BS00003971_51",
"BS00006822_51", "BS00007502_51", "BS00009649_51", "BS00009657_51",
"BS00009793_51", "BS00010059_51", "BS00010204_51", "BS00010531_51",
"BS00010854_51", "BS00011231_51", "BS00011516_51", "BS00011612_51",
"BS00012551_51", "BS00013584_51", "BS00017988_51", "BS00020459_51",
"BS00021694_51", "BS00021871_51", "BS00021909_51", "BS00021981_51",
"BS00022016_51", "BS00022029_51", "BS00022148_51", "BS00022159_51",
"BS00022182_51", "BS00022256_51", "BS00022273_51", "BS00022368_51",
"BS00022419_51", "BS00022424_51", "BS00022459_51", "BS00022516_51",
"BS00022528_51", "BS00022586_51", "BS00022703_51", "BS00022746_51",
"BS00022804_51", "BS00022845_51", "BS00022882_51", "BS00022884_51",
"BS00022908_51", "BS00022968_51", "BS00022989_51", "BS00023026_51",
"BS00023028_51", "BS00023189_51", "BS00023222_51", "BS00023337_51",
"BS00023511_51", "BS00025191_51", "BS00026189_51", "BS00026396_51",
"BS00027516_51", "BS00028660_51", "BS00029361_51", "BS00029569_51",
"BS00030652_51", "BS00030876_51", "BS00032025_51", "BS00032524_51",
"BS00032525_51", "BS00033658_51", "BS00034334_51", "BS00035364_51",
"BS00035576_51", "BS00036168_51", "BS00036492_51", "BS00037189_51",
"BS00037400_51", "BS00037537_51", "BS00038663_51", "BS00038664_51",
"BS00039357_51", "BS00039498_51", "BS00040798_51", "BS00041121_51",
"BS00041462_51", "BS00041742_51", "BS00042191_51", "BS00045327_51",
"BS00047668_51", "BS00047836_51", "BS00048031_51", "BS00048633_51",
"BS00048757_51", "BS00049420_51", "BS00049636_51", "BS00049637_51",
"BS00049977_51", "BS00055211_51", "BS00056089_51", "BS00057444_51",
"BS00057445_51", "BS00057523_51", "BS00057524_51", "BS00060029_51",
"BS00060391_51", "BS00061173_51", "BS00061179_51", "BS00062645_51",
"BS00062673_51", "BS00062808_51", "BS00063163_51", "BS00063300_51",
"BS00063696_51", "BS00064039_51", "BS00064422_51", "BS00064487_51",
"BS00064692_51", "BS00064703_51", "BS00065468_51", "BS00065673_51",
"BS00065734_51", "BS00065840_51", "BS00065932_51", "BS00065956_51",
"BS00066230_51", "BS00066475_51", "BS00066585_51", "BS00066714_51",
"BS00066800_51", "BS00067190_51", "BS00067215_51", "BS00067216_51",
"BS00067224_51", "BS00067499_51", "BS00067711_51", "BS00067748_51",
"BS00067777_51", "BS00067940_51", "BS00067997_51", "BS00068094_51",
"BS00068508_51", "BS00068520_51", "BS00069355_51", "BS00070464_51",
"BS00070511_51", "BS00070870_51", "BS00071422_51", "BS00071823_51",
"BS00071948_51", "BS00072186_51", "BS00072296_51", "BS00073009_51",
"BS00073525_51", "BS00073854_51", "BS00074617_51", "BS00074926_51",
"BS00075119_51", "BS00075515_51", "BS00075598_51", "BS00076772_51",
"BS00077819_51", "BS00078430_51", "BS00078669_51", "BS00080546_51",
"BS00080879_51", "BS00081418_51", "BS00081475_51", "BS00081610_51",
"BS00081981_51", "BS00082982_51", "BS00083279_51", "BS00083398_51",
"BS00084158_51", "BS00084159_51", "BS00085967_51", "BS00086050_51",
"BS00086051_51", "BS00086247_51", "BS00088755_51", "BS00088756_51",
"BS00089400_51", "BS00090225_51", "BS00090405_51", "BS00091002_51",
"BS00091887_51", "BS00092728_51", "BS00092735_51", "BS00093252_51",
"BS00093377_51", "BS00093871_51", "BS00093889_51", "BS00094366_51",
"BS00094523_51", "BS00095228_51", "BS00095423_51", "BS00096642_51",
"BS00097265_51", "BS00098840_51", "BS00099980_51", "BS00100626_51",
"BS00101363_51", "BS00101401_51", "BS00104401_51", "BS00105963_51",
"BS00105964_51", "BS00106008_51", "BS00106932_51", "BS00107444_51",
"BS00109084_51", "BS00109700_51", "BS00109935_51", "BS00110266_51",
"BS00110405_51", "BS00110452_51", "BS00110550_51", "BS00110564_51",
"BS00110638_51a", "CAP11_c1022_117", "CAP11_c1022_66", "CAP11_c3472_60",
"CAP11_c6193_232", "CAP11_rep_c6920_161", "CAP12_c1840_108",
"CAP12_c1860_280", "CAP7_c1238_51", "CAP7_c2141_129", "CAP7_c3178_52",
"CAP7_c3367_68", "CAP7_c915_121", "CAP7_rep_c12537_81", "CAP8_c1393_327",
"CAP8_c359_95", "CAP8_c665_242", "CAP8_c665_409", "CAP8_rep_c3652_80",
"CAP8_rep_c6382_237", "D_contig22919_290", "D_F5XZDLF02GN47Z_208",
"D_GBB4FNX02FX302_98", "Ex_c104581_457", "Ex_c15087_564", "Ex_c17586_108",
"Ex_c18484_2026", "Ex_c18484_2048", "Ex_c24554_1583", "Ex_c45438_377",
"Ex_c5858_1681", "Ex_c5858_953", "Ex_c66357_866", "Ex_c67794_487",
"Ex_c8871_1318", "Ex_c9685_1264", "Excalibur_c10079_1585", "Excalibur_c10383_432",
"Excalibur_c11079_749", "Excalibur_c11079_923", "Excalibur_c11505_155",
"Excalibur_c12875_864", "Excalibur_c13242_1178", "Excalibur_c13709_2568",
"Excalibur_c1529_1644", "Excalibur_c18089_1116", "Excalibur_c20250_313",
"Excalibur_c21051_515", "Excalibur_c24123_1597", "Excalibur_c24123_165",
"Excalibur_c2419_531", "Excalibur_c24354_465", "Excalibur_c24829_189",
"Excalibur_c25239_283", "Excalibur_c26754_433", "Excalibur_c29205_537",
"Excalibur_c29600_173", "Excalibur_c31571_136", "Excalibur_c33545_134",
"Excalibur_c39248_485", "Excalibur_c39309_171", "Excalibur_c39508_88",
"Excalibur_c39808_453", "Excalibur_c40694_473", "Excalibur_c41477_1272",
"Excalibur_c42951_136", "Excalibur_c42978_149", "Excalibur_c46082_440",
"Excalibur_c47078_512", "Excalibur_c47907_517", "Excalibur_c49743_97",
"Excalibur_c5097_1468", "Excalibur_c52446_519", "Excalibur_c5561_1013",
"Excalibur_c56240_176", "Excalibur_c60452_196", "Excalibur_c60581_62",
"Excalibur_c62042_175", "Excalibur_c64302_103", "Excalibur_c6501_477",
"Excalibur_c9000_209", "Excalibur_c91154_164", "Excalibur_c91154_175",
"Excalibur_c96921_206", "Excalibur_c9811_131", "Excalibur_c98205_83",
"Excalibur_rep_c103091_266", "Excalibur_rep_c103408_632a", "Excalibur_rep_c104884_1325",
"Excalibur_rep_c104884_417", "Excalibur_rep_c105085_102", "Excalibur_rep_c105978_544",
"Excalibur_rep_c110501_525", "Excalibur_rep_c112985_337", "Excalibur_rep_c116073_259",
"Excalibur_rep_c67269_496", "Excalibur_rep_c69004_1008", "Excalibur_s113043_59",
"GENE_1464_73", "GENE_1549_110", "GENE_1606_307", "GENE_1634_405",
"GENE_1661_228", "GENE_1674_631", "GENE_1820_661", "GENE_1826_718",
"GENE_1838_79", "GENE_2052_186", "GENE_2087_300", "GENE_3343_183",
"GENE_3665_61", "GENE_3939_576", "GENE_3939_653", "GENE_4252_246",
"GENE_4795_75", "IAAV1155", "IAAV1334", "IAAV1410", "IAAV1523",
"IAAV2646", "IAAV3826", "IAAV3900", "IAAV4190", "IAAV4286", "IAAV4781",
"IAAV488", "IAAV5030", "IAAV5207", "IAAV5370", "IAAV5507", "IAAV5729",
"IAAV5821", "IAAV5984", "IAAV6317", "IAAV6474", "IAAV6676", "IAAV6974",
"IAAV7454", "IAAV7930", "IAAV8170", "IAAV8218", "IAAV8351", "IAAV8407",
"IAAV8768", "IAAV8924", "IAAV902", "IAAV9044", "IAAV9068", "IACX11403",
"IACX2728", "IACX2831", "IACX464", "IACX5909", "IACX5968", "IACX6054",
"IACX6065", "IACX6092", "IACX6277", "IACX8519", "Jagger_c367_427",
"Jagger_c5842_90", "Jagger_c6722_104", "Jagger_c765_61", "Jagger_c791_62",
"JD_c1187_1398", "JG_c3077_225", "Ku_c103671_362", "Ku_c12191_1202",
"Ku_c13307_1116", "Ku_c14313_1194", "Ku_c17569_905", "Ku_c18096_552",
"Ku_c19456_645", "Ku_c26872_269", "Ku_c28597_514", "Ku_c41007_116",
"Ku_c4231_940", "Ku_c56370_1155", "Ku_c64839_240", "Ku_c68484_1276",
"Ku_c69970_624", "Ku_c69970_631", "Ku_c70534_1215", "Ku_c73010_143",
"Kukri_c10210_1387", "Kukri_c10210_1700", "Kukri_c10511_94",
"Kukri_c10751_1031", "Kukri_c10751_158", "Kukri_c10751_264",
"Kukri_c10751_860", "Kukri_c10977_990", "Kukri_c11709_874", "Kukri_c12079_204",
"Kukri_c12212_182", "Kukri_c13793_1152", "Kukri_c14012_358",
"Kukri_c14029_117", "Kukri_c14971_367", "Kukri_c15950_244", "Kukri_c17313_667",
"Kukri_c17966_561", "Kukri_c20889_526", "Kukri_c2123_1254", "Kukri_c23388_695",
"Kukri_c23833_181", "Kukri_c2454_59", "Kukri_c25564_185", "Kukri_c28650_111",
"Kukri_c28917_96", "Kukri_c30370_79", "Kukri_c31546_66", "Kukri_c33640_640",
"Kukri_c34195_357", "Kukri_c34845_242", "Kukri_c35426_507", "Kukri_c36418_392",
"Kukri_c3673_2442", "Kukri_c41361_186", "Kukri_c4324_74", "Kukri_c4568_1708",
"Kukri_c46833_105", "Kukri_c47643_920", "Kukri_c49927_151", "Kukri_c51247_322",
"Kukri_c51666_401", "Kukri_c5459_334", "Kukri_c54593_543", "Kukri_c54729_181",
"Kukri_c55381_67", "Kukri_c62467_362", "Kukri_c6288_364", "Kukri_c63259_151",
"Kukri_c63361_97", "Kukri_c64268_101", "Kukri_c68006_282", "Kukri_c80104_809",
"Kukri_c82097_197", "Kukri_c82117_279", "Kukri_c8242_730", "Kukri_c8400_2315",
"Kukri_c8465_54", "Kukri_c97654_90", "Kukri_rep_c102953_304",
"Kukri_rep_c103783_1380", "Kukri_rep_c105819_114", "Kukri_rep_c107195_111",
"Kukri_rep_c109051_116", "Kukri_rep_c110312_376", "Kukri_rep_c112061_617",
"Kukri_rep_c114028_94", "Kukri_rep_c69028_139", "Kukri_rep_c69028_1398",
"Kukri_rep_c69028_347", "Kukri_rep_c69970_717", "Kukri_rep_c70441_132",
"Kukri_rep_c70479_411", "Kukri_rep_c71916_1548", "Kukri_rep_c73936_154",
"Kukri_rep_c86903_184", "Kukri_rep_c87640_135", "Kukri_rep_c89183_282",
"Kukri_rep_c89509_83", "Kukri_s110037_62", "Kukri_s117946_404",
"Ra_c11263_2353", "Ra_c14565_1056", "Ra_c1619_250", "Ra_c1619_432",
"Ra_c20211_1415", "Ra_c23771_496", "Ra_c38505_544", "Ra_c38505_555",
"Ra_c3994_598", "Ra_c42858_503", "Ra_c42858_91", "Ra_c48730_581",
"Ra_c5515_1723", "Ra_c5515_2396", "Ra_c5515_2469", "Ra_c70087_485",
"Ra_c72106_227", "Ra_c73278_1234", "Ra_c88203_1055", "Ra_c88203_455",
"Ra_c88203_468", "RAC875_c10194_673", "RAC875_c10194_910", "RAC875_c10286_1247",
"RAC875_c10430_672", "RAC875_c14684_1128", "RAC875_c15109_106",
"RAC875_c1619_120", "RAC875_c1666_126", "RAC875_c17907_181",
"RAC875_c18570_893", "RAC875_c19138_402", "RAC875_c19570_671",
"RAC875_c20134_535", "RAC875_c22333_504", "RAC875_c23470_220",
"RAC875_c2464_274", "RAC875_c24982_202", "RAC875_c26514_127",
"RAC875_c27323_867", "RAC875_c2786_1097", "RAC875_c30443_966",
"RAC875_c3141_214", "RAC875_c31572_251", "RAC875_c31572_281",
"RAC875_c3427_734", "RAC875_c34649_380", "RAC875_c35074_452",
"RAC875_c36922_829", "RAC875_c371_251", "RAC875_c37118_595",
"RAC875_c39728_187", "RAC875_c45016_79", "RAC875_c46323_1996",
"RAC875_c46403_277", "RAC875_c47550_437", "RAC875_c47976_291",
"RAC875_c50787_113", "RAC875_c50864_1921", "RAC875_c51781_238",
"RAC875_c52195_324", "RAC875_c5310_1729", "RAC875_c53864_310",
"RAC875_c55313_89", "RAC875_c58159_521", "RAC875_c6445_275",
"RAC875_c66845_466", "RAC875_c66892_58", "RAC875_c67309_634",
"RAC875_c67998_96", "RAC875_c68056_81", "RAC875_c75663_115",
"RAC875_c787_431", "RAC875_c79551_167", "RAC875_c8010_155", "RAC875_c8174_1034",
"RAC875_c82493_67", "RAC875_c842_1476", "RAC875_c842_1516", "RAC875_c99055_69",
"RAC875_rep_c105428_270", "RAC875_rep_c108615_475", "RAC875_rep_c109228_400",
"RAC875_rep_c109288_122", "RAC875_rep_c109433_782", "RAC875_rep_c111243_125",
"RAC875_rep_c111698_383", "RAC875_rep_c111952_436", "RAC875_rep_c117959_132",
"RAC875_rep_c69730_522", "RAC875_rep_c70746_582", "RAC875_rep_c75557_177",
"RAC875_rep_c75768_245", "RAC875_rep_c77065_1532", "RAC875_rep_c77148_311",
"RAC875_rep_c81701_424", "RAC875_rep_c90531_283", "RAC875_s120038_124",
"RFL_Contig102_119", "RFL_Contig1175_354", "RFL_Contig1435_886",
"RFL_Contig1488_671", "RFL_Contig1798_1606", "RFL_Contig2394_439",
"RFL_Contig4282_1420", "RFL_Contig4399_956", "RFL_Contig4403_1034",
"RFL_Contig4921_2420", "RFL_Contig497_1114", "RFL_Contig5153_2667",
"RFL_Contig5153_958", "RFL_Contig5871_1650", "TA001038_0975",
"TA001383_0516", "TA002103_1262", "TA002702_0616", "TA003179_0872",
"TA003281_2379", "TA003550_0145", "TA003589_0518", "TA005161_0899",
"TA005199_0585", "TA005216_0272", "TA015264_0958", "Tdurum_contig10307_375",
"Tdurum_contig11121_739", "Tdurum_contig11121_834", "Tdurum_contig12008_842",
"Tdurum_contig13011_381", "Tdurum_contig13548_158", "Tdurum_contig25520_363",
"Tdurum_contig27982_568", "Tdurum_contig28699_54", "Tdurum_contig31235_99",
"Tdurum_contig33100_127", "Tdurum_contig42495_389", "Tdurum_contig45726_1116",
"Tdurum_contig5009_349", "Tdurum_contig5009_392", "Tdurum_contig5009_735",
"Tdurum_contig50376_375", "Tdurum_contig50392_1355", "Tdurum_contig50577_620",
"Tdurum_contig50617_713", "Tdurum_contig51995_377", "Tdurum_contig51995_474",
"Tdurum_contig55443_1361", "Tdurum_contig56373_348", "Tdurum_contig56731_335",
"Tdurum_contig57139_318", "Tdurum_contig57693_581", "Tdurum_contig57753_138",
"Tdurum_contig58326_467", "Tdurum_contig59531_892", "Tdurum_contig59531_914",
"Tdurum_contig59585_576", "Tdurum_contig59585_656", "Tdurum_contig59782_86",
"Tdurum_contig61299_55", "Tdurum_contig62502_90", "Tdurum_contig62557_263",
"Tdurum_contig63129_238", "Tdurum_contig67686_1149", "Tdurum_contig67686_1204",
"Tdurum_contig67686_633", "Tdurum_contig67686_792", "Tdurum_contig67686_851",
"Tdurum_contig68305_703", "Tdurum_contig68305_796", "Tdurum_contig68855_91",
"Tdurum_contig697_73", "Tdurum_contig76105_124", "Tdurum_contig76105_201",
"Tdurum_contig83209_316", "Tdurum_contig83663_371", "Tdurum_contig91865_242",
"Tdurum_contig93570_848", "Tdurum_contig9912_216", "Tdurum_contig9912_228",
"Tdurum_contig9912_451", "tplb0050h15_1287", "tplb0055d10_1024",
"wsnp_BE426222A_Ta_2_1", "wsnp_BE443568A_Ta_2_1", "wsnp_BE443568A_Ta_2_2",
"wsnp_BE443995B_Ta_2_2", "wsnp_BE490613A_Ta_2_1", "wsnp_BE494474A_Ta_2_2",
"wsnp_BE604885A_Ta_2_1", "wsnp_BE604885A_Ta_2_2", "wsnp_BF293133A_Ta_2_2",
"wsnp_BF429272A_Ta_2_1", "wsnp_BG262734A_Ta_2_3", "wsnp_BG262734A_Ta_2_7",
"wsnp_BM137927A_Ta_2_1", "wsnp_BQ171931A_Ta_2_1", "wsnp_CAP11_c2438_1258747",
"wsnp_CAP11_c318_261649", "wsnp_CAP11_rep_c4157_1965583", "wsnp_CAP11_rep_c4226_1995152",
"wsnp_CAP12_c15_9559", "wsnp_CAP12_rep_c8867_3720285", "wsnp_CAP8_c6939_3242530",
"wsnp_Ex_c10014_16477392", "wsnp_Ex_c10272_16842803", "wsnp_Ex_c10630_17338753",
"wsnp_Ex_c10667_17387885", "wsnp_Ex_c11039_17902115", "wsnp_Ex_c11085_17973016",
"wsnp_Ex_c11229_18163892", "wsnp_Ex_c11397_18400400", "wsnp_Ex_c1141_2191485",
"wsnp_Ex_c1149_2206471", "wsnp_Ex_c11807_18960045", "wsnp_Ex_c12269_19597341",
"wsnp_Ex_c12269_19597415", "wsnp_Ex_c12341_19693570", "wsnp_Ex_c12354_19711297",
"wsnp_Ex_c12750_20243224", "wsnp_Ex_c12948_20511479", "wsnp_Ex_c1335_2556442",
"wsnp_Ex_c13802_21639096", "wsnp_Ex_c14202_22144844", "wsnp_Ex_c14202_22145136",
"wsnp_Ex_c14202_22145805", "wsnp_Ex_c14340_22315611", "wsnp_Ex_c14400_22381382",
"wsnp_Ex_c14400_22381548", "wsnp_Ex_c14420_22402673", "wsnp_Ex_c15100_23284023",
"wsnp_Ex_c15269_23491104", "wsnp_Ex_c15269_23492289", "wsnp_Ex_c1533_2930233",
"wsnp_Ex_c1538_2937905", "wsnp_Ex_c15475_23756906", "wsnp_Ex_c15475_23757972",
"wsnp_Ex_c15674_24004513", "wsnp_Ex_c15674_24005648", "wsnp_Ex_c16079_24507688",
"wsnp_Ex_c1660_3159173", "wsnp_Ex_c16615_25147492", "wsnp_Ex_c18596_27457344",
"wsnp_Ex_c18637_27508578", "wsnp_Ex_c20273_29326769", "wsnp_Ex_c2148_4035913",
"wsnp_Ex_c22435_31629303", "wsnp_Ex_c22888_32105519", "wsnp_Ex_c24085_33332723",
"wsnp_Ex_c24731_33983680", "wsnp_Ex_c2502_4675968", "wsnp_Ex_c25132_34396655",
"wsnp_Ex_c26887_36107413", "wsnp_Ex_c27150_36365659", "wsnp_Ex_c28679_37784954",
"wsnp_Ex_c28930_38008757", "wsnp_Ex_c29742_38738725", "wsnp_Ex_c32003_40728918",
"wsnp_Ex_c3478_6369892", "wsnp_Ex_c35073_43285821", "wsnp_Ex_c35457_43602830",
"wsnp_Ex_c361_707953", "wsnp_Ex_c361_708712", "wsnp_Ex_c37208_45002588",
"wsnp_Ex_c41283_48140956", "wsnp_Ex_c41283_48141201", "wsnp_Ex_c47763_52874806",
"wsnp_Ex_c47907_52974924", "wsnp_Ex_c48136_53140385", "wsnp_Ex_c5047_8963671",
"wsnp_Ex_c51776_55603135", "wsnp_Ex_c53364_56625806", "wsnp_Ex_c55051_57706127",
"wsnp_Ex_c5623_9891427", "wsnp_Ex_c5623_9891516", "wsnp_Ex_c5623_9891584",
"wsnp_Ex_c56525_58609595", "wsnp_Ex_c57322_59083238", "wsnp_Ex_c57322_59084809",
"wsnp_Ex_c57322_59084950", "wsnp_Ex_c5929_10402147", "wsnp_Ex_c5997_10512308",
"wsnp_Ex_c60462_60905848", "wsnp_Ex_c6217_10848574", "wsnp_Ex_c6833_11782875",
"wsnp_Ex_c742_1458743", "wsnp_Ex_c763_1503467", "wsnp_Ex_c8409_14170476",
"wsnp_Ex_c88767_80001420", "wsnp_Ex_c8884_14841846", "wsnp_Ex_c9145_15214903",
"wsnp_Ex_c943_1808232", "wsnp_Ex_c943_1808577", "wsnp_Ex_c9458_15679797",
"wsnp_Ex_c9468_15696542", "wsnp_Ex_c9483_15722127", "wsnp_Ex_c9510_15761235",
"wsnp_Ex_rep_c101340_86719115", "wsnp_Ex_rep_c101340_86719239",
"wsnp_Ex_rep_c101942_87217430", "wsnp_Ex_rep_c102478_87635370",
"wsnp_Ex_rep_c106152_90334299", "wsnp_Ex_rep_c108072_91444417",
"wsnp_Ex_rep_c66357_64540369", "wsnp_Ex_rep_c66357_64540428",
"wsnp_Ex_rep_c66685_65003087", "wsnp_Ex_rep_c66685_65003254",
"wsnp_Ex_rep_c66685_65003625", "wsnp_Ex_rep_c66867_65267909",
"wsnp_Ex_rep_c66907_65324299", "wsnp_Ex_rep_c67349_65914945",
"wsnp_Ex_rep_c67460_66057400", "wsnp_Ex_rep_c67588_66227926",
"wsnp_Ex_rep_c67635_66291944", "wsnp_Ex_rep_c67635_66292308",
"wsnp_Ex_rep_c67635_66292689", "wsnp_Ex_rep_c67727_66398596",
"wsnp_Ex_rep_c67786_66472568", "wsnp_Ex_rep_c69034_67934852",
"wsnp_Ex_rep_c69034_67935465", "wsnp_Ex_rep_c69314_68244036",
"wsnp_Ex_rep_c69314_68244502", "wsnp_Ex_rep_c69577_68526990",
"wsnp_Ex_rep_c69752_68711460", "wsnp_Ex_rep_c69864_68823765",
"wsnp_Ex_rep_c69864_68824236", "wsnp_Ex_rep_c69864_68824319",
"wsnp_JD_c1187_1731186", "wsnp_JD_c29019_23208078", "wsnp_JD_c3034_4017676",
"wsnp_JD_c5699_6859527", "wsnp_JD_c7532_8615717", "wsnp_JD_c8207_9234643",
"wsnp_JD_c9434_10274598a", "wsnp_JD_c968_1427139", "wsnp_Ku_c10468_17301042",
"wsnp_Ku_c11052_18135847", "wsnp_Ku_c18497_27803432", "wsnp_Ku_c19456_28944589",
"wsnp_Ku_c23901_33846711", "wsnp_Ku_c30545_40369365", "wsnp_Ku_c32404_42016343",
"wsnp_Ku_c3286_6111360", "wsnp_Ku_c38911_47455674", "wsnp_Ku_c38911_47455924",
"wsnp_Ku_c40218_48484410", "wsnp_Ku_c4568_8243646", "wsnp_Ku_c4568_8243775",
"wsnp_Ku_c4886_8753646", "wsnp_Ku_c5243_9344536", "wsnp_Ku_c5359_9531713",
"wsnp_Ku_c5378_9559013", "wsnp_Ku_c7811_13387117", "wsnp_Ku_c8400_14280021",
"wsnp_Ku_c9433_15811664", "wsnp_Ku_rep_c104691_91086871", "wsnp_Ku_rep_c69970_69476502",
"wsnp_Ku_rep_c70479_70079622", "wsnp_Ku_rep_c71761_71496470",
"wsnp_Ra_c10669_17515792", "wsnp_Ra_c11291_18338838", "wsnp_Ra_c132_291198",
"wsnp_Ra_c16053_24607526", "wsnp_Ra_c16278_24893033", "wsnp_Ra_c16846_25598885",
"wsnp_Ra_c19079_28210829", "wsnp_Ra_c19079_28210937", "wsnp_Ra_c27831_37346894",
"wsnp_Ra_c29280_38672141", "wsnp_Ra_c35889_44345459", "wsnp_Ra_c44141_50623811",
"wsnp_Ra_c4858_8709000", "wsnp_Ra_c7280_12576178", "wsnp_Ra_rep_c106523_90273922",
"wsnp_Ra_rep_c72670_70836439a", "wsnp_RFL_Contig2011_1216801",
"wsnp_RFL_Contig2767_2518373", "wsnp_RFL_Contig3866_4228783",
"wsnp_RFL_Contig429_4978628", "wsnp_RFL_Contig4734_5671036",
"wsnp_RFL_Contig4814_5829093"), class = "factor"), MEL_004.3_haplotype1 = c(0.00033,
1e-05, 0, 0, 0, 0, 0, 0, 0.00013, 0.00013, 0.00014, 0.00022,
0.00022, 0.00022, 0.00022, 0.00027, 0.00027, 0.00023, 0.00023,
0.00023, 0.00022, 0.00022, 0.00015, 0.00014, 3e-05, 3e-05, 2e-05,
1e-05, 1e-05, 0), MEL_004.3_haplotype2 = c(0.00033, 1e-05, 0,
0, 0, 0, 0, 0, 0.00013, 0.00013, 0.00014, 0.00022, 0.00022, 0.00022,
0.00022, 0.00027, 0.00027, 0.00023, 0.00023, 0.00023, 0.00022,
0.00022, 0.00015, 0.00014, 3e-05, 3e-05, 2e-05, 1e-05, 1e-05,
0), MEL_004.3_haplotype3 = c(0.00033, 1e-05, 0, 0, 0, 0, 0, 0,
0.00013, 0.00013, 0.00014, 0.00022, 0.00022, 0.00022, 0.00022,
0.00027, 0.00027, 0.00023, 0.00023, 0.00023, 0.00022, 0.00022,
0.00015, 0.00014, 3e-05, 3e-05, 2e-05, 1e-05, 1e-05, 0), MEL_004.3_haplotype4 = c(0.00033,
1e-05, 0, 0, 0, 4e-05, 4e-05, 4e-05, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00021), MEL_004.3_haplotype5 = c(0.00033,
1e-05, 0, 0, 0, 4e-05, 4e-05, 4e-05, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), MEL_004.3_haplotype6 = c(0.96403,
0.96583, 0.96858, 0.97124, 0.97155, 0.97417, 0.97446, 0.97749,
0.98022, 0.9804, 0.98058, 0.98303, 0.98316, 0.9833, 0.98345,
0.98632, 0.98946, 0.99245, 0.99259, 0.99272, 0.99286, 0.993,
0.9958, 0.9961, 0.99927, 0.99939, 0.99948, 0.9996, 0.99972, 0.99973
), MEL_004.3_haplotype7 = c(0, 0, 0, 0, 0, 4e-05, 4e-05, 4e-05,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2e-05), MEL_004.3_haplotype8 = c(0.03431, 0.03407, 0.03141, 0.02873,
0.02844, 0.0257, 0.0254, 0.02237, 0.0194, 0.01919, 0.01897, 0.01631,
0.01616, 0.01602, 0.01586, 0.01286, 0.00971, 0.00683, 0.0067,
0.00658, 0.00646, 0.00633, 0.00374, 0.00346, 0.00065, 0.00055,
0.00045, 0.00034, 0.00024, 3e-05), MEL_005.3_haplotype1 = c(0.21684,
0.21698, 0.21721, 0.21746, 0.21741, 0.21704, 0.21694, 0.21582,
0.21465, 0.21458, 0.21448, 0.21345, 0.2134, 0.21333, 0.21328,
0.21213, 0.21094, 0.20988, 0.20983, 0.20977, 0.20973, 0.20969,
0.20874, 0.20864, 0.20762, 0.20758, 0.20754, 0.20752, 0.20747,
0.20744), MEL_005.3_haplotype2 = c(0.03821, 0.03799, 0.03566,
0.03332, 0.03305, 0.0306, 0.03031, 0.02743, 0.02462, 0.02441,
0.02421, 0.02168, 0.02154, 0.0214, 0.02124, 0.01837, 0.01534,
0.01254, 0.01241, 0.01229, 0.01218, 0.01205, 0.00953, 0.00925,
0.00652, 0.0064, 0.0063, 0.0062, 0.0061, 0.00599), MEL_005.3_haplotype3 = c(0.72948,
0.73078, 0.73836, 0.74606, 0.74668, 0.75236, 0.75276, 0.75674,
0.76033, 0.76059, 0.76085, 0.76416, 0.76434, 0.76454, 0.76474,
0.76859, 0.77275, 0.77666, 0.77684, 0.777, 0.77718, 0.77736,
0.78097, 0.78137, 0.7854, 0.78554, 0.7857, 0.78584, 0.78599,
0.78614), MEL_005.3_haplotype4 = c(0.0089, 0.0086, 0.00586, 0.00311,
0.0028, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), MEL_005.3_haplotype5 = c(0.00588, 0.00558,
0.00281, 1e-05, 2e-05, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), MEL_005.3_haplotype6 = c(0.00033,
1e-05, 0, 0, 0, 0, 0, 0, 0.00018, 0.00019, 2e-04, 3e-04, 3e-04,
0.00031, 0.00032, 0.00038, 0.00038, 0.00033, 0.00032, 0.00031,
0.00031, 3e-04, 0.00021, 2e-04, 4e-05, 2e-05, 2e-05, 1e-05, 1e-05,
0), MEL_005.3_haplotype7 = c(3e-05, 5e-05, 6e-05, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0), MEL_005.3_haplotype8 = c(0.00033, 1e-05, 0, 0, 0, 0, 0,
0, 0.00022, 0.00024, 0.00025, 4e-04, 0.00041, 0.00042, 0.00042,
0.00053, 6e-04, 6e-04, 6e-04, 6e-04, 6e-04, 6e-04, 0.00054, 0.00053,
0.00043, 0.00043, 0.00042, 0.00042, 0.00041, 0.00041), MEL_005.4_haplotype1 = c(0.16705,
0.16711, 0.16662, 0.16614, 0.16603, 0.16508, 0.16492, 0.16332,
0.16169, 0.16156, 0.16144, 0.16, 0.15991, 0.15983, 0.15975, 0.15811,
0.1564, 0.15485, 0.15479, 0.15472, 0.15464, 0.15458, 0.1532,
0.15304, 0.15154, 0.15148, 0.15142, 0.15138, 0.15132, 0.15126
), MEL_005.4_haplotype2 = c(0.03823, 0.03801, 0.03569, 0.03335,
0.03308, 0.0306, 0.03032, 0.02746, 0.02463, 0.02443, 0.02423,
0.0217, 0.02156, 0.02141, 0.02126, 0.01837, 0.01535, 0.01256,
0.01243, 0.0123, 0.01218, 0.01206, 0.00955, 0.00925, 0.0065,
0.00641, 0.00632, 0.0062, 0.0061, 0.006), MEL_005.4_haplotype3 = c(0.77923,
0.78063, 0.78893, 0.79736, 0.79805, 0.80432, 0.80476, 0.80922,
0.81328, 0.81357, 0.81386, 0.8176, 0.81781, 0.81801, 0.81825,
0.82258, 0.82726, 0.83167, 0.83187, 0.83205, 0.83224, 0.83244,
0.8365, 0.83695, 0.84147, 0.84164, 0.84181, 0.84198, 0.84215,
0.8423), MEL_005.4_haplotype4 = c(0.00889, 0.00861, 0.00587,
0.00312, 0.0028, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), MEL_005.4_haplotype5 = c(0.00589,
0.00559, 0.00282, 1e-05, 2e-05, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), MEL_005.4_haplotype6 = c(0.00033,
1e-05, 0, 0, 0, 0, 0, 0, 0.00018, 0.00019, 2e-04, 3e-04, 3e-04,
0.00031, 0.00031, 0.00037, 0.00037, 0.00032, 0.00031, 0.00031,
0.00031, 3e-04, 0.00021, 2e-04, 4e-05, 2e-05, 2e-05, 1e-05, 1e-05,
0), MEL_005.4_haplotype7 = c(3e-05, 5e-05, 6e-05, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0)), .Names = c("SNP", "MEL_004.3_haplotype1", "MEL_004.3_haplotype2",
"MEL_004.3_haplotype3", "MEL_004.3_haplotype4", "MEL_004.3_haplotype5",
"MEL_004.3_haplotype6", "MEL_004.3_haplotype7", "MEL_004.3_haplotype8",
"MEL_005.3_haplotype1", "MEL_005.3_haplotype2", "MEL_005.3_haplotype3",
"MEL_005.3_haplotype4", "MEL_005.3_haplotype5", "MEL_005.3_haplotype6",
"MEL_005.3_haplotype7", "MEL_005.3_haplotype8", "MEL_005.4_haplotype1",
"MEL_005.4_haplotype2", "MEL_005.4_haplotype3", "MEL_005.4_haplotype4",
"MEL_005.4_haplotype5", "MEL_005.4_haplotype6", "MEL_005.4_haplotype7"
), row.names = c(NA, 30L), class = "data.frame")
Thank you
You can do this efficiently with the dplyr package, after first gathering all probabilities into a single column (very similar to what you were doing with melt):
library(dplyr)
library(tidyr)
pairs <- A4 %>%
gather(individual, value, -SNP) %>%
separate(individual, c("group", "founder"), sep = "_haplotype") %>%
group_by(SNP, group) %>%
filter(value == max(value)) %>%
filter(n() == 2) %>%
ungroup() %>%
arrange(SNP)
(By the way, the ungroup and arrange aren't strictly necessary here: it is just nice to have it sorted by SNP when you view the output).
The subset of data you posted had no 2-way ties for first place. But we could add 5 ties artificially with:
A4[1:5, 2:3] <- .999
At this point, the output would look like:
Source: local data frame [10 x 4]
SNP group founder value
(fctr) (chr) (chr) (dbl)
1 BobWhite_c18256_105 MEL_004.3 1 0.999
2 BobWhite_c18256_105 MEL_004.3 2 0.999
3 BS00081981_51 MEL_004.3 1 0.999
4 BS00081981_51 MEL_004.3 2 0.999
5 Excalibur_c42951_136 MEL_004.3 1 0.999
6 Excalibur_c42951_136 MEL_004.3 2 0.999
7 RAC875_c46403_277 MEL_004.3 1 0.999
8 RAC875_c46403_277 MEL_004.3 2 0.999
9 Tdurum_contig83209_316 MEL_004.3 1 0.999
10 Tdurum_contig83209_316 MEL_004.3 2 0.999
This shows pairs of probability that were tied within a SNP and group. Incidentally, you might prefer to reorganize this into one-row-per-tie, which could be done with tidyr's spread:
pair_ties <- pairs %>%
group_by(SNP, group) %>%
mutate(column = c("founder1", "founder2")) %>%
spread(column, founder)
Which would have output:
Source: local data frame [5 x 5]
SNP group value founder1 founder2
(fctr) (chr) (dbl) (chr) (chr)
1 BobWhite_c18256_105 MEL_004.3 0.999 1 2
2 BS00081981_51 MEL_004.3 0.999 1 2
3 Excalibur_c42951_136 MEL_004.3 0.999 1 2
4 RAC875_c46403_277 MEL_004.3 0.999 1 2
5 Tdurum_contig83209_316 MEL_004.3 0.999 1 2
As a quick explanation of how this works- it relies on the dplyr package, which defines a grammar for data manipulation in terms of sequential steps, piped together with the %>% "pipe" operator. One of the best intros is here. Those are worth reading in general for what each function does. In this case, imagine it happening in four stages:
x1 <- A4 %>%
gather(individual, value, -SNP) %>%
separate(individual, c("group", "founder"), sep = "_haplotype")
These steps do the same thing as melt in your code: they gather all the probabilities into a single column called value, and move the column names into a column called individual alongside it. It then separates the individual into a "group" column (your 600+ groups) and a "founder" column (1-8).
x2 <- x1 %>%
group_by(SNP, group) %>%
filter(value == max(value))
This says you want to look within each group made up of a unique SNP and group. This is taking the place of your nested for loops above. Within each group, it filters for rows where the value column is as large as the maximum.
x3 <- x2 %>%
filter(n() == 2)
The data frame is still grouped, and only has ties for first-place, and this is saying you want to include only groups of size 2. At this point, the work is done, and the last two steps just sort it by SNP to make it more readable:
result <- x3 %>%
ungroup() %>%
arrange(SNP)
Incidentally, to estimate how long this would take on your data, I simulated some data of the same size as yours:
groups <- 600
founders <- 8
SNPs <- 896
vals <- as.data.frame(replicate(groups * founders, sample(10, SNPs, replace = TRUE) / 10))
names(vals) <- paste0("P", rep(seq_len(groups), each = founders), "_", seq_len(founders))
SNPnames <- paste("SNP", seq_len(SNPs))
A4 <- cbind(SNP = SNPnames, vals)
On this A4, the code took about 30 seconds to run on my machine.