How to generate a frequency table from an array - r

I have an array like the following:
structure(c(0, 191, 235, 196, 311, 240, 246, 236, 222, 345, 369,
447, 289, 274, 331, 368, 371, 344, 335, 403, 378, 367, 384, 364,
191, 0, 230, 207, 336, 151, 291, 324, 306, 340, 389, 461, 345,
322, 341, 367, 369, 356, 334, 395, 396, 392, 414, 387, 235, 230,
0, 254, 309, 253, 300, 346, 305, 375, 400, 466, 379, 372, 367,
387, 382, 370, 363, 445, 384, 361, 386, 356, 196, 207, 254, 0,
298, 195, 263, 244, 223, 352, 377, 444, 348, 316, 333, 356, 367,
347, 322, 400, 378, 357, 370, 367, 311, 336, 309, 298, 0, 326,
257, 240, 259, 205, 320, 357, 331, 339, 191, 298, 262, 223, 220,
311, 273, 216, 256, 317, 240, 151, 253, 195, 326, 0, 263, 308,
277, 303, 382, 457, 347, 294, 321, 358, 374, 340, 302, 376, 386,
373, 399, 379, 246, 291, 300, 263, 257, 263, 0, 264, 240, 265,
283, 368, 263, 265, 268, 336, 324, 292, 262, 372, 354, 345, 359,
355, 236, 324, 346, 244, 240, 308, 264, 0, 116, 307, 343, 412,
296, 266, 299, 313, 320, 312, 308, 356, 320, 341, 353, 324, 222,
306, 305, 223, 259, 277, 240, 116, 0, 280, 351, 428, 305, 263,
308, 335, 350, 332, 314, 392, 362, 352, 372, 362, 345, 340, 375,
352, 205, 303, 265, 307, 280, 0, 226, 303, 278, 287, 229, 349,
312, 289, 238, 280, 361, 290, 326, 353, 369, 389, 400, 377, 320,
382, 283, 343, 351, 226, 0, 290, 277, 265, 280, 344, 332, 360,
332, 379, 391, 365, 370, 412, 447, 461, 466, 444, 357, 457, 368,
412, 428, 303, 290, 0, 309, 346, 281, 345, 313, 336, 354, 334,
349, 348, 337, 382, 289, 345, 379, 348, 331, 347, 263, 296, 305,
278, 277, 309, 0, 147, 305, 333, 329, 355, 316, 339, 365, 358,
385, 371, 274, 322, 372, 316, 339, 294, 265, 266, 263, 287, 265,
346, 147, 0, 279, 303, 320, 346, 299, 318, 366, 358, 373, 378,
331, 341, 367, 333, 191, 321, 268, 299, 308, 229, 280, 281, 305,
279, 0, 201, 153, 172, 185, 254, 261, 228, 252, 316, 368, 367,
387, 356, 298, 358, 336, 313, 335, 349, 344, 345, 333, 303, 201,
0, 146, 235, 278, 287, 228, 299, 279, 235, 371, 369, 382, 367,
262, 374, 324, 320, 350, 312, 332, 313, 329, 320, 153, 146, 0,
184, 251, 233, 229, 264, 241, 273, 344, 356, 370, 347, 223, 340,
292, 312, 332, 289, 360, 336, 355, 346, 172, 235, 184, 0, 157,
202, 183, 193, 181, 249, 335, 334, 363, 322, 220, 302, 262, 308,
314, 238, 332, 354, 316, 299, 185, 278, 251, 157, 0, 171, 221,
178, 220, 262, 403, 395, 445, 400, 311, 376, 372, 356, 392, 280,
379, 334, 339, 318, 254, 287, 233, 202, 171, 0, 149, 195, 168,
218, 378, 396, 384, 378, 273, 386, 354, 320, 362, 361, 391, 349,
365, 366, 261, 228, 229, 183, 221, 149, 0, 130, 127, 136, 367,
392, 361, 357, 216, 373, 345, 341, 352, 290, 365, 348, 358, 358,
228, 299, 264, 193, 178, 195, 130, 0, 98, 175, 384, 414, 386,
370, 256, 399, 359, 353, 372, 326, 370, 337, 385, 373, 252, 279,
241, 181, 220, 168, 127, 98, 0, 146, 364, 387, 356, 367, 317,
379, 355, 324, 362, 353, 412, 382, 371, 378, 316, 235, 273, 249,
262, 218, 136, 175, 146, 0), .Dim = c(24L, 24L))
and want to create a frequency table.
The code I have used:
ages <- c(-0.01,100,200,300,400,500,600)
factorx <- factor(cut(tdc,breaks=ages,include.lowest=TRUE))
xout <- as.data.frame(table(factorx))
gives
factorx Freq
[-0.01,100] 26
(100,200] 52
(200,300] 168
(300,400] 308
(400,500] 22
which is right, except it excludes the interval 500 to 600. But the real problem is that the absolute frequencies are wrong as the sum should be equal to 25*25=625 but in xout is 576.
I guess there is a problem in the code.

Related

Having trouble separating data using Tidyverse in R

I have a dataset consisting of 20 plant genotypes with measurements of LAI, V1, V2, V3, V4, V5 being taken at three growth stages (1, 2, 3).
I need to separate the data in R (using the tidyverse package) into columns of genotype, stage, and mesurement (consisting of LAI:V5). The code that I have tried does not work; how could I go about doing this? Here is what I have tried:
#Open packages
library(readr)
library(tidyr)
library(dplyr)
#Dataset:
dataset <- structure(list(plot = c(101, 102, 103, 104, 105, 106, 107, 108,
109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 101,
102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114,
115, 116, 117, 118, 119, 120, 101, 102, 103, 104, 105, 106, 107,
108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120
), genotype = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20), stage = c(1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3), LAI = c(822, 763,
551, 251, 800, 761, 343, 593, 997, 261, 19, 429, 566, 574, 174,
356, 891, 918, 948, 782, 902, 383, 704, 157, 358, 453, 723, 644,
308, 149, 504, 437, 348, 165, 128, 305, 778, 516, 347, 212, 792,
423, 565, 828, 106, 605, 603, 551, 145, 393, 914, 919, 672, 628,
143, 103, 906, 717, 18, 324), V1 = c(52, 556, 222, 534, 953,
346, 635, 84, 592, 444, 34, 340, 343, 188, 554, 397, 315, 643,
376, 101, 663, 42, 360, 645, 718, 883, 266, 225, 674, 797, 726,
259, 829, 701, 601, 206, 325, 963, 292, 985, 954, 828, 839, 541,
301, 312, 187, 59, 563, 577, 961, 239, 147, 203, 421, 690, 542,
412, 812, 19), V2 = c(354, 719, 45, 376, 921, 243, 256, 316,
384, 450, 166, 850, 784, 291, 889, 389, 925, 157, 37, 528, 847,
942, 624, 387, 680, 380, 848, 745, 49, 69, 864, 649, 125, 117,
911, 947, 212, 628, 162, 165, 395, 437, 102, 136, 446, 51, 106,
141, 886, 373, 113, 186, 233, 937, 698, 202, 89, 623, 731, 474
), V3 = c(18, 87, 692, 888, 681, 134, 774, 619, 544, 32, 804,
993, 147, 352, 825, 490, 196, 794, 900, 796, 617, 160, 688, 947,
665, 122, 386, 968, 772, 836, 696, 806, 925, 410, 949, 546, 303,
550, 359, 285, 167, 605, 780, 419, 925, 822, 142, 4, 648, 18,
867, 204, 617, 5, 251, 198, 316, 205, 660, 680), V4 = c(728,
266, 678, 958, 946, 248, 425, 777, 86, 340, 527, 766, 161, 187,
129, 881, 149, 888, 811, 118, 379, 22, 953, 940, 520, 200, 557,
438, 401, 25, 55, 155, 73, 834, 614, 933, 235, 759, 852, 29,
475, 356, 992, 765, 593, 703, 929, 823, 466, 717, 86, 607, 730,
7, 416, 727, 400, 904, 503, 881), V5 = c(550, 785, 954, 852,
718, 295, 208, 2, 36, 185, 726, 540, 476, 994, 720, 532, 401,
525, 504, 868, 414, 878, 808, 550, 740, 9, 936, 570, 477, 516,
561, 648, 686, 906, 387, 621, 461, 323, 829, 948, 964, 853, 943,
805, 349, 254, 979, 784, 246, 444, 71, 883, 345, 973, 546, 120,
310, 347, 732, 308)), class = "data.frame", row.names = c(NA,
-60L))
Code I have tried....
data <- gather(dataset, LAI, V1, V2, V3, V4, V5, -plot)
....provides these results (a sample of the resulting dataset):
plot genotype stage LAI V1
1 101 1 1 V2 354
2 102 2 1 V2 719
3 103 3 1 V2 45
4 104 4 1 V2 376
5 105 5 1 V2 921
6 106 6 1 V2 243
7 107 7 1 V2 256
8 108 8 1 V2 316
9 109 9 1 V2 384
10 110 10 1 V2 450
11 111 11 1 V2 166
12 112 12 1 V2 850
13 113 13 1 V2 784
14 114 14 1 V2 291
The outcome needs to be like this:
correct_format <- data.frame(genotype = c(1,
2,
3,
4,
5,
6),
stage = c(1,
1,
1,
1,
1,
1),
measurement = c("LAI",
"LAI",
"LAI",
"LAI",
"LAI",
"LAI"),
value = c(822,
763,
551,
251,
800,
761)
Perhaps, we need
library(dplyr)
library(tidyr)
dataset %>%
select(-plot) %>%
pivot_longer(cols = LAI:V5, names_to = 'measurement') %>%
arrange(measurement)

How to automatically delete noise (artifacts) in time series data in R

If I have the following data:
structure(list(x = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45,
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93,
94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133,
134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,
147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172,
173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185,
186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198,
199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211,
212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237,
238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250,
251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263,
264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276,
277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289,
290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302,
303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315,
316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328,
329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341,
342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354,
355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367,
368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380,
381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393,
394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406,
407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419,
420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432,
433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445,
446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458,
459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471,
472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484,
485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497,
498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510,
511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523,
524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536,
537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549,
550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562,
563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575,
576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588,
589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601,
602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614,
615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627,
628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640,
641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653,
654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666,
667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679,
680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692,
693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705,
706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718,
719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731,
732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744,
745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757,
758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770,
771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783,
784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796,
797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809,
810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822,
823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835,
836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848,
849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861,
862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874,
875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887,
888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900,
901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913,
914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926,
927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939,
940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952,
953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965,
966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978,
979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991,
992, 993, 994, 995, 996, 997, 998, 999), y = c(99, 100, 97, 95,
94, 92, 90, 91, 91, 92, 95, 99, 99, 100, 101, 101, 101, 102,
104, 106, 107, 107, 108, 108, 109, 109, 109, 109, 113, 114, 117,
117, 117, 117, 117, 117, 117, 117, 117, 146, 124, 124, 125, 123,
126, 111, 110, 109, 108, 121, 122, 122, 123, 124, 124, 124, 123,
122, 120, 120, 120, 121, 122, 123, 123, 125, 125, 126, 126, 126,
126, 126, 125, 125, 125, 126, 127, 127, 129, 130, 131, 131, 131,
131, 130, 130, 129, 128, 128, 128, 128, 128, 127, 127, 127, 127,
127, 127, 128, 130, 131, 132, 132, 132, 131, 131, 131, 131, 132,
132, 133, 132, 132, 127, 126, 125, 125, 128, 128, 129, 130, 130,
130, 131, 131, 132, 132, 132, 132, 131, 130, 129, 129, 128, 128,
128, 128, 129, 129, 129, 128, 128, 128, 127, 127, 126, 126, 126,
127, 127, 127, 127, 126, 126, 126, 127, 127, 128, 128, 128, 128,
128, 129, 129, 130, 130, 129, 130, 131, 131, 131, 131, 130, 131,
131, 130, 130, 130, 130, 129, 129, 129, 130, 130, 131, 132, 133,
134, 134, 134, 134, 133, 132, 130, 128, 126, 126, 127, 127, 130,
131, 132, 133, 135, 136, 135, 134, 131, 130, 130, 130, 132, 134,
135, 135, 135, 134, 132, 131, 130, 128, 127, 126, 126, 127, 128,
128, 128, 129, 130, 130, 131, 131, 131, 131, 130, 129, 127, 126,
126, 126, 126, 127, 127, 128, 128, 130, 130, 131, 131, 130, 129,
129, 128, 128, 128, 129, 130, 131, 132, 132, 132, 131, 131, 131,
132, 132, 132, 131, 130, 129, 128, 128, 129, 129, 130, 129, 129,
129, 129, 130, 130, 130, 130, 130, 129, 129, 128, 128, 127, 127,
126, 127, 129, 130, 131, 132, 135, 135, 135, 134, 133, 130, 130,
130, 130, 130, 130, 132, 134, 135, 134, 133, 132, 132, 132, 131,
131, 131, 131, 131, 132, 132, 132, 131, 130, 130, 130, 130, 130,
130, 131, 132, 133, 134, 135, 135, 135, 135, 135, 135, 135, 135,
135, 135, 136, 135, 135, 135, 135, 134, 133, 132, 132, 132, 132,
132, 133, 134, 133, 133, 133, 133, 133, 134, 134, 134, 135, 135,
135, 135, 136, 136, 136, 136, 135, 135, 135, 135, 134, 135, 136,
137, 138, 138, 138, 139, 138, 138, 137, 137, 136, 136, 136, 136,
136, 136, 135, 134, 133, 132, 132, 132, 132, 131, 131, 131, 132,
132, 133, 133, 133, 133, 132, 133, 133, 133, 133, 133, 134, 135,
136, 136, 136, 136, 135, 134, 133, 132, 131, 131, 132, 132, 132,
132, 132, 132, 132, 132, 132, 132, 133, 133, 133, 132, 132, 132,
132, 135, 135, 136, 136, 136, 136, 136, 136, 135, 135, 136, 137,
137, 136, 136, 136, 137, 137, 137, 137, 138, 138, 138, 137, 136,
135, 133, 132, 132, 132, 133, 134, 135, 135, 136, 136, 136, 136,
135, 134, 134, 134, 134, 134, 134, 119, 118, 118, 134, 135, 135,
122, 122, 122, 84, 84, 84, 84, 55, 58, 74, 141, 140, 139, 133,
131, 128, 128, 127, 128, 130, 132, 132, 132, 132, 131, 130, 130,
131, 132, 133, 135, 136, 136, 136, 135, 134, 132, 131, 130, 131,
132, 132, 134, 135, 136, 136, 136, 136, 134, 134, 134, 133, 135,
136, 136, 135, 135, 135, 135, 135, 135, 136, 137, 138, 139, 138,
137, 136, 136, 135, 135, 134, 134, 133, 133, 133, 133, 133, 135,
136, 136, 137, 137, 136, 136, 135, 135, 135, 135, 135, 134, 132,
132, 132, 133, 135, 135, 134, 134, 132, 131, 131, 131, 132, 135,
135, 135, 135, 135, 135, 134, 134, 133, 133, 133, 132, 133, 133,
133, 134, 135, 137, 137, 137, 136, 134, 133, 132, 132, 132, 132,
133, 134, 135, 136, 136, 136, 137, 136, 136, 136, 136, 135, 135,
135, 135, 135, 135, 135, 135, 135, 134, 134, 133, 133, 132, 132,
132, 132, 132, 132, 132, 133, 135, 139, 140, 141, 141, 140, 139,
137, 135, 133, 133, 133, 133, 134, 135, 135, 135, 135, 136, 135,
135, 135, 135, 135, 135, 135, 135, 136, 136, 137, 138, 140, 140,
139, 139, 137, 135, 136, 134, 134, 134, 134, 134, 134, 134, 134,
134, 134, 134, 134, 134, 134, 134, 135, 135, 136, 136, 137, 137,
137, 137, 135, 135, 135, 135, 135, 135, 135, 135, 135, 134, 134,
133, 133, 132, 132, 132, 132, 132, 132, 132, 132, 132, 133, 133,
133, 133, 133, 133, 133, 133, 135, 137, 137, 137, 136, 135, 135,
135, 135, 135, 135, 135, 135, 135, 135, 136, 136, 136, 79, 79,
74, 66, 66, 63, 103, 102, 138, 138, 139, 138, 137, 121, 121,
121, 121, 121, 109, 109, 110, 109, 109, 73, 73, 73, 73, 48, 48,
40, 40, 39, 38, 70, 130, 140, 140, 140, 139, 139, 138, 138, 138,
138, 138, 138, 138, 139, 140, 140, 140, 81, 81, 82, 128, 143,
143, 142, 141, 138, 136, 136, 135, 135, 135, 136, 136, 136, 136,
137, 137, 137, 137, 137, 136, 136, 136, 135, 135, 135, 135, 135,
135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 134, 135,
135, 136, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 139,
140, 141, 141, 140, 139, 139, 137, 137, 137, 137, 137, 136, 136,
136, 136, 136, 140, 141, 143, 145, 126, 125, 111, 110, 120, 119,
133, 118, 119, 108, 98, 99, 108, 108, 108, 120, 135, 135, 135,
137, 119, 119, 119, 97, 97, 97, 71, 70, 70, 70, 70, 45, 45, 49,
52, 52, 50, 50, 50, 87, 80, 112, 123, 108, 120, 120, 120, 122,
122, 110, 122, 120, 99, 99, 99, 99, 108, 121, 136, 137, 138,
138, 140, 140, 142, 142, 141, 140, 115, 115, 85, 85, 85, 85,
85, 85, 102, 142, 142, 142, 141, 139, 136, 120, 120, 108, 99,
91, 100, 100, 110, 110, 110, 122, 122, 137, 137, 100, 100, 100
)), row.names = c(NA, -999L), class = c("tbl_df", "tbl", "data.frame"
), .Names = c("x", "y"))
Plot:
The red points represent the ones that are considered noise (or artifacts). What I need to do is to detect these points through a function and delete them.
Of course, these red points are not 100% accurate, I am just giving an example to show the data I want to keep and what I want to delete.
I tried applying a butterworth filter but this is not what I need. It does not work. Any ideas?
So, assuming your data was assigned to DF, we can approximate the relationship between x an y via loess:
plot(y ~ x, data = DF)
span_parameter <- 0.15
outlier_threshold <- 1.2
lo <- loess.smooth(DF$x, DF$y, span = span_parameter)
lines(lo$x, lo$y, lwd = 3)
lines(lo$x, lo$y * outlier_threshold, lwd = 3, col = 2)
lines(lo$x, lo$y / outlier_threshold, lwd = 3, col = 2)
You can use approxfun to decide which points are outliers or not:
f1 <- approxfun(lo$x, lo$y * outlier_threshold)
(wh1 <- which(DF$y > f1(DF$x)))
# [1] 40 959 960 961 962 963 964 965 966 967 977 978 979 980 981 982 995 996
f2 <- approxfun(lo$x, lo$y / outlier_threshold)
(wh2 <- which(DF$y < f2(DF$x)))
# [1] 503 504 505 506 507 508 509 772 773 774 775 776 777 778 779 790 791 792 793
# [20] 794 795 796 797 798 799 800 801 802 803 804 805 823 824 825 910 911 923 924
# [39] 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 970 971 972
# [58] 973 974 975
Adjust span_parameter and outlier_threshold to your convenience.
EDIT:
My response was more of a general one, but it seems your data could be better approximated by a log(log()):
DF$x <- DF$x + 1
logEstimate <- lm(y~log(log(50+x))-1,data=DF)
plot(y ~ x, data = DF)
lines(DF$x, logEstimate$fitted.values, lwd = 3)
lines(DF$x, logEstimate$fitted.values * outlier_threshold, lwd = 3, col = 2)
lines(DF$x, logEstimate$fitted.values / outlier_threshold, lwd = 3, col = 2)

How to create list-columns from list in R?

# Sample data
df <- tibble(id=1:2, xml_str=c("<?xml version='1.0'?><!DOCTYPE svg PUBLIC '-//W3C//DTD SVG 1.1//EN' 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd'><svg version='1.1' xmlns='http://www.w3.org/2000/svg'>'/>\n<path fill='none' stroke='#ff0000' stroke-width='5' d='M171, 160 L171, 160, 168, 159, 164, 159, 163, 159, 162, 159, 161, 159, 161, 158, 162, 158, 162, 157, 163, 156, 165, 156'/>'/>\n<path fill='none' stroke='#ff0000' stroke-width='5' d='M172, 226 L172, 226, 171, 213, 170, 212, 171, 212, 172, 212, 173, 212, 173, 211, 172, 211, 171, 211, 171, 212, 171, 215'/>\n<path fill='none' stroke='#ff0000' stroke-width='5' d='M153, 94 L153, 94, 150, 90, 150, 89, 150, 88, 150, 87, 150, 86, 150, 85, 150, 84, 150, 82, 150, 81, 150, 80, 150, 79'/>'/>'/>'/>\n<path fill='none' stroke='#ff0000' stroke-width='5' d='M346, 84 L346, 84, 346, 79, 347, 78, 347, 77, 348, 77, 348, 76, 348, 75, 348, 76, 348, 77, 349, 77, 348, 78'/>\n<path fill='none' stroke='#ff0000' stroke-width='5' d='M314, 67 L314, 67, 311, 76, 309, 76, 308, 77, 307, 77, 307, 76, 306, 76, 305, 76, 305, 77, 306, 77, 307, 77, 306, 77, 305, 79, 304, 80'/>\n<path fill='none' stroke='#ff0000' stroke-width='5' d='M313, 57 L313, 57, 321, 56, 321, 57, 321, 58'/>\n<path fill='none' stroke='#ff0000' stroke-width='5' d='M332, 58 L332, 58, 332, 57, 331, 57, 333, 57, 334, 57, 335, 57, 336, 58, 337, 58, 338, 58, 339, 58, 340, 58, 341, 58, 341, 59, 340, 60, 339, 60, 338, 60, 337, 60, 336, 60, 335, 60, 334, 60, 333, 60, 332, 60, 331, 60, 331, 59, 333, 58, 334, 58'/></svg>", "<?xml version='1.0'?><!DOCTYPE svg PUBLIC '-//W3C//DTD SVG 1.1//EN' 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd'><svg version='1.1' xmlns='http://www.w3.org/2000/svg'>\n<path fill='none' stroke='#ff0000' stroke-width='5' d='M315, 80 L315, 80, 321, 79, 320, 79, 318, 79, 317, 79'/>\n<path fill='none' stroke='#ff0000' stroke-width='5' d='M334, 83 L334, 83, 334, 82'/>\n<path fill='none' stroke='#ff0000' stroke-width='5' d='M315, 80 L315, 80, 315, 82, 315, 83, 315, 84, 315, 85'/>\n<path fill='none' stroke='#ff0000' stroke-width='5' d='M315, 72 L315, 72'/>\n<path fill='none' stroke='#ff0000' stroke-width='5' d='M315, 69 L315, 69, 315, 70'/>\n<path fill='none' stroke='#ff0000' stroke-width='5' d='M332, 66 L332, 66, 332, 67'/>\n<path fill='none' stroke='#ff0000' stroke-width='5' d='M315, 56 L315, 56'/>\n<path fill='none' stroke='#ff0000' stroke-width='5' d='M315, 66 L315, 66, 315, 67'/>\n<path fill='none' stroke='#ff0000' stroke-width='5' d='M315, 72 L315, 72'/>\n<path fill='none' stroke='#ff0000' stroke-width='5' d='M332, 72 L332, 72, 333, 75'/>\n<path fill='none' stroke='#ff0000' stroke-width='5' d='M315, 72 L315, 72'/>\n<path fill='none' stroke='#ff0000' stroke-width='5' d='M334, 73 L334, 73, 333, 73'/></svg>"))
df <- df %>%
rowwise() %>%
mutate(nodes = (xml_str %>% read_xml() %>% xml_find_all(., "//#d") %>% as_list()))
With the data frame above, I want to extract all path-element d-nodes from the xml string and store them as a list in the same data frame, but I get Column nodes must be length 1 (the group size), not 7
The piping used in the mutate statement does return a single list.
I can leave out the 'rowwise()', but that simply expects length 2 instead of 1.
What am I missing here?
It's not exactly the way you're doing it, but you can use str_extract_all and regex to pull out the relevant string as a list of comma-separated strings
ans <-
df %>%
dplyr::mutate(dnodes = stringr::str_extract_all(xml_str, "(?<=[d]=')[^']+(?='\\/)"))
ans$dnodes
# [[1]]
# [1] "M171, 160 L171, 160, 168, 159, 164, 159, 163, 159, 162, 159, 161, 159, 161, 158, 162, 158, 162, 157, 163, 156, 165, 156"
# [2] "M172, 226 L172, 226, 171, 213, 170, 212, 171, 212, 172, 212, 173, 212, 173, 211, 172, 211, 171, 211, 171, 212, 171, 215"
# [3] "M153, 94 L153, 94, 150, 90, 150, 89, 150, 88, 150, 87, 150, 86, 150, 85, 150, 84, 150, 82, 150, 81, 150, 80, 150, 79"
# [4] "M346, 84 L346, 84, 346, 79, 347, 78, 347, 77, 348, 77, 348, 76, 348, 75, 348, 76, 348, 77, 349, 77, 348, 78"
# [5] "M314, 67 L314, 67, 311, 76, 309, 76, 308, 77, 307, 77, 307, 76, 306, 76, 305, 76, 305, 77, 306, 77, 307, 77, 306, 77, 305, 79, 304, 80"
# [6] "M313, 57 L313, 57, 321, 56, 321, 57, 321, 58"
# [7] "M332, 58 L332, 58, 332, 57, 331, 57, 333, 57, 334, 57, 335, 57, 336, 58, 337, 58, 338, 58, 339, 58, 340, 58, 341, 58, 341, 59, 340, 60, 339, 60, 338, 60, 337, 60, 336, 60, 335, 60, 334, 60, 333, 60, 332, 60, 331, 60, 331, 59, 333, 58, 334, 58"
# [[2]]
# [1] "M315, 80 L315, 80, 321, 79, 320, 79, 318, 79, 317, 79" "M334, 83 L334, 83, 334, 82"
# [3] "M315, 80 L315, 80, 315, 82, 315, 83, 315, 84, 315, 85" "M315, 72 L315, 72"
# [5] "M315, 69 L315, 69, 315, 70" "M332, 66 L332, 66, 332, 67"
# [7] "M315, 56 L315, 56" "M315, 66 L315, 66, 315, 67"
# [9] "M315, 72 L315, 72" "M332, 72 L332, 72, 333, 75"
# [11] "M315, 72 L315, 72" "M334, 73 L334, 73, 333, 73"
You can convert to list of a vector of strings with
ans <-
df %>%
dplyr::mutate(dnodes = stringr::str_extract_all(xml_str, "(?<=[d]=')[^']+(?='\\/)")) %>%
dplyr::mutate(dnodes = purrr::map(dnodes, ~unlist(strsplit(paste(.x, collapse=", "), ", "))))
ans$dnodes
# [[1]]
# [1] "M171" "160 L171" "160" "168" "159" "164" "159" "163" "159" "162"
# [11] "159" "161" "159" "161" "158" "162" "158" "162" "157" "163"
# [21] "156" "165" "156" "M172" "226 L172" "226" "171" "213" "170" "212"
# [31] "171" "212" "172" "212" "173" "212" "173" "211" "172" "211"
# [41] "171" "211" "171" "212" "171" "215" "M153" "94 L153" "94" "150"
# [51] "90" "150" "89" "150" "88" "150" "87" "150" "86" "150"
# [61] "85" "150" "84" "150" "82" "150" "81" "150" "80" "150"
# etc
Does this do what you want? I usually wrap the right side of my mutate(name = right_side) in list() to accomplish this.
df <- df %>%
mutate(nodes = list(xml_str %>% read_xml() %>% xml_find_all(., "//#d")))
class(df$nodes)
"list"
class(df$nodes[[1]])
"xml_nodeset"
Not sure if you want the xml_nodeset objects or perhaps CPak's solution with actual strings is better for you.

remove every 4 rows between two numbers

for example, I have a data frame with one column containing numbers. these is how it looks.
head(c1)
c
1 300
2 302
3 304
4 306
5 308
6 310
Here is the sample data frame.
c1 <- structure(list(c = c(300, 302, 304, 306, 308, 310, 312, 314,
316, 318, 320, 322, 324, 326, 328, 330, 332, 334, 336, 338, 340,
342, 344, 346, 348, 350, 352, 354, 356, 358, 360, 362, 364, 366,
368, 370, 372, 374, 376, 378, 380, 382, 384, 386, 388, 390, 392,
394, 396, 398, 400)), .Names = "c", row.names = c(NA, -51L), class = "data.frame")
I want to delete the rows between 300 to 310 and 310 to 320 and so on..
I want to have a dataframe like these
300
310
320
330
340
350
.
.
.
400
Any ideas how to do these, I found how to remove every nth row, but not every four rows between two numbers
You can make use of the modulo operator %%. If you want the result as an atomic vector, you can run
c1$c[c1$c %% 10 == 0]
or if you want it as a data.frame with 1 column, you can use
c1[c1$c %% 10 == 0, , drop=FALSE]

K Shortest Path in R: igraph

I have to find the K Shortest Path,However the below code i tried gives the same path when i choose different K Values and the distance computed is not correct.
My dataset is my.graph with class igraph
dput(my.graph)
structure(list(169, FALSE, c(22, 1, 2, 1, 2, 3, 114, 3, 4, 5,
4, 5, 6, 6, 7, 7, 8, 9, 8, 110, 78, 159, 9, 159, 30, 11, 13,
160, 11, 66, 160, 138, 14, 13, 14, 15, 81, 16, 15, 17, 16, 17,
18, 18, 19, 130, 19, 62, 62, 23, 42, 22, 22, 22, 23, 24, 161,
24, 25, 25, 26, 64, 26, 28, 161, 29, 28, 29, 47, 48, 53, 142,
31, 30, 32, 31, 32, 33, 33, 34, 35, 118, 34, 36, 35, 37, 36,
37, 38, 39, 38, 162, 40, 39, 40, 41, 41, 42, 43, 44, 43, 44,
45, 45, 46, 47, 46, 47, 47, 49, 48, 49, 50, 51, 50, 52, 51, 52,
53, 60, 53, 54, 53, 55, 54, 56, 55, 57, 56, 57, 58, 58, 59, 59,
60, 60, 60, 63, 162, 62, 62, 63, 64, 65, 65, 66, 166, 68, 163,
164, 69, 165, 68, 70, 69, 71, 70, 71, 72, 72, 73, 112, 73, 74,
75, 74, 76, 75, 76, 77, 78, 77, 78, 110, 78, 79, 80, 79, 146,
80, 81, 82, 81, 81, 82, 137, 164, 84, 85, 84, 86, 85, 86, 87,
87, 164, 165, 89, 89, 90, 90, 91, 92, 91, 93, 92, 93, 94, 95,
94, 165, 95, 163, 97, 97, 98, 99, 98, 99, 100, 101, 100, 101,
102, 102, 163, 104, 166, 105, 104, 106, 105, 106, 107, 108, 107,
109, 108, 109, 166, 110, 110, 125, 116, 112, 113, 112, 112, 114,
113, 114, 115, 114, 126, 115, 116, 117, 118, 117, 119, 118, 118,
120, 119, 120, 121, 121, 122, 123, 122, 124, 168, 141, 123, 124,
125, 125, 125, 126, 140, 140, 128, 128, 129, 130, 129, 130, 130,
131, 131, 132, 133, 132, 134, 133, 134, 135, 135, 136, 137, 136,
137, 137, 139, 138, 139, 168, 143, 140, 140, 141, 142, 158, 167,
143, 167, 144, 145, 144, 145, 146, 146, 146, 148, 148, 149, 149,
150, 151, 150, 152, 151, 153, 152, 153, 154, 154, 155, 156, 155,
156, 157, 157, 158, 158, 158, 159, 160, 159, 160, 160, 160, 161,
161, 162, 162, 163, 163, 163, 164, 164, 164, 165, 165, 165, 166,
166, 166, 167, 167, 168, 168), c(0, 0, 1, 0, 1, 2, 2, 2, 3, 4,
3, 4, 5, 5, 6, 6, 7, 8, 7, 9, 9, 9, 8, 10, 10, 10, 11, 11, 10,
12, 12, 12, 13, 11, 13, 14, 14, 15, 14, 16, 15, 16, 17, 17, 18,
19, 18, 19, 20, 20, 21, 21, 0, 21, 20, 23, 23, 23, 24, 24, 25,
26, 25, 27, 27, 28, 27, 28, 29, 29, 29, 30, 30, 10, 31, 30, 31,
32, 32, 33, 34, 34, 33, 35, 34, 36, 35, 36, 37, 38, 37, 38, 39,
38, 39, 40, 40, 21, 42, 43, 42, 43, 44, 44, 45, 46, 45, 29, 46,
48, 29, 48, 49, 50, 49, 51, 50, 51, 52, 53, 52, 53, 29, 54, 53,
55, 54, 56, 55, 56, 57, 57, 58, 58, 59, 53, 59, 61, 61, 20, 19,
61, 26, 64, 64, 12, 67, 67, 67, 68, 68, 68, 67, 69, 68, 70, 69,
70, 71, 71, 72, 72, 72, 73, 74, 73, 75, 74, 75, 76, 77, 76, 77,
78, 9, 78, 79, 78, 80, 79, 80, 81, 80, 14, 81, 82, 83, 83, 84,
83, 85, 84, 85, 86, 86, 87, 88, 88, 88, 89, 89, 90, 91, 90, 92,
91, 92, 93, 94, 93, 95, 94, 96, 96, 96, 97, 98, 97, 98, 99, 100,
99, 100, 101, 101, 102, 103, 103, 104, 103, 105, 104, 105, 106,
107, 106, 108, 107, 108, 109, 9, 78, 110, 111, 111, 112, 72,
111, 113, 112, 113, 114, 2, 115, 114, 111, 116, 117, 116, 118,
117, 34, 119, 118, 119, 120, 120, 121, 122, 121, 123, 123, 123,
122, 123, 124, 124, 110, 115, 126, 127, 127, 127, 128, 129, 128,
129, 19, 130, 130, 131, 132, 131, 133, 132, 133, 134, 134, 135,
136, 135, 136, 82, 138, 12, 138, 139, 139, 127, 126, 123, 30,
142, 142, 139, 143, 143, 144, 143, 144, 145, 80, 145, 147, 147,
148, 148, 149, 150, 149, 151, 150, 152, 151, 152, 153, 153, 154,
155, 154, 155, 156, 156, 157, 142, 157, 9, 159, 10, 12, 11, 159,
23, 27, 61, 38, 96, 67, 102, 68, 83, 87, 95, 88, 68, 67, 109,
103, 142, 143, 123, 139), c(3, 1, 4, 2, 7, 5, 10, 8, 11, 9, 13,
12, 15, 14, 18, 16, 22, 17, 28, 25, 33, 26, 34, 32, 38, 35, 40,
37, 41, 39, 43, 42, 46, 44, 52, 0, 53, 51, 54, 49, 57, 55, 59,
58, 62, 60, 66, 63, 67, 65, 73, 24, 75, 72, 76, 74, 78, 77, 82,
79, 84, 80, 86, 83, 87, 85, 90, 88, 93, 89, 94, 92, 96, 95, 97,
50, 100, 98, 101, 99, 103, 102, 106, 104, 107, 68, 108, 105,
110, 69, 111, 109, 114, 112, 116, 113, 117, 115, 122, 70, 120,
118, 124, 121, 126, 123, 128, 125, 129, 127, 131, 130, 133, 132,
135, 119, 136, 134, 140, 47, 139, 48, 141, 137, 142, 61, 144,
143, 145, 29, 152, 147, 154, 150, 156, 153, 157, 155, 159, 158,
162, 160, 165, 163, 167, 164, 168, 166, 171, 169, 174, 20, 172,
170, 177, 175, 179, 176, 183, 36, 182, 180, 184, 181, 189, 187,
191, 188, 192, 190, 194, 193, 198, 197, 200, 199, 203, 201, 205,
202, 206, 204, 209, 207, 211, 208, 214, 213, 217, 215, 218, 216,
221, 219, 222, 220, 224, 223, 229, 226, 231, 228, 232, 230, 235,
233, 237, 234, 238, 236, 240, 19, 241, 173, 246, 161, 247, 244,
249, 245, 252, 6, 250, 248, 254, 251, 255, 243, 258, 256, 261,
81, 260, 257, 263, 259, 264, 262, 266, 265, 269, 267, 273, 268,
274, 270, 277, 242, 276, 275, 278, 253, 282, 281, 285, 283, 287,
45, 286, 284, 289, 288, 292, 290, 294, 291, 295, 293, 297, 296,
300, 298, 302, 185, 301, 299, 304, 31, 305, 303, 309, 279, 308,
280, 310, 272, 311, 71, 314, 307, 318, 316, 319, 317, 321, 178,
322, 320, 324, 323, 326, 325, 329, 327, 331, 328, 333, 330, 334,
332, 336, 335, 339, 337, 340, 338, 342, 341, 344, 312, 345, 343,
346, 21, 348, 23, 350, 27, 349, 30, 351, 347, 352, 56, 353, 64,
355, 91, 354, 138, 357, 148, 356, 212, 358, 225, 359, 149, 360,
186, 361, 195, 364, 151, 363, 196, 362, 210, 365, 146, 367, 227,
366, 239, 368, 313, 369, 315, 370, 271, 371, 306), c(3, 1, 52,
0, 4, 2, 7, 5, 252, 6, 10, 8, 11, 9, 13, 12, 15, 14, 18, 16,
22, 17, 174, 20, 240, 19, 346, 21, 28, 25, 73, 24, 348, 23, 33,
26, 350, 27, 145, 29, 304, 31, 349, 30, 34, 32, 38, 35, 183,
36, 40, 37, 41, 39, 43, 42, 46, 44, 140, 47, 287, 45, 54, 49,
139, 48, 53, 51, 97, 50, 57, 55, 352, 56, 59, 58, 62, 60, 142,
61, 66, 63, 353, 64, 67, 65, 107, 68, 110, 69, 122, 70, 75, 72,
311, 71, 76, 74, 78, 77, 82, 79, 84, 80, 261, 81, 86, 83, 87,
85, 90, 88, 93, 89, 355, 91, 94, 92, 96, 95, 100, 98, 101, 99,
103, 102, 106, 104, 108, 105, 111, 109, 114, 112, 116, 113, 117,
115, 120, 118, 124, 121, 135, 119, 126, 123, 128, 125, 129, 127,
131, 130, 133, 132, 136, 134, 141, 137, 354, 138, 144, 143, 152,
147, 357, 148, 365, 146, 154, 150, 359, 149, 364, 151, 156, 153,
157, 155, 159, 158, 162, 160, 246, 161, 165, 163, 167, 164, 168,
166, 171, 169, 172, 170, 177, 175, 241, 173, 179, 176, 182, 180,
321, 178, 184, 181, 302, 185, 189, 187, 360, 186, 191, 188, 192,
190, 194, 193, 361, 195, 198, 197, 363, 196, 200, 199, 203, 201,
205, 202, 206, 204, 209, 207, 211, 208, 362, 210, 214, 213, 356,
212, 217, 215, 218, 216, 221, 219, 222, 220, 224, 223, 358, 225,
229, 226, 367, 227, 231, 228, 232, 230, 235, 233, 237, 234, 238,
236, 366, 239, 277, 242, 247, 244, 255, 243, 249, 245, 250, 248,
254, 251, 278, 253, 258, 256, 260, 257, 263, 259, 264, 262, 266,
265, 269, 267, 273, 268, 274, 270, 310, 272, 370, 271, 276, 275,
309, 279, 282, 281, 308, 280, 285, 283, 286, 284, 289, 288, 292,
290, 294, 291, 295, 293, 297, 296, 300, 298, 301, 299, 305, 303,
314, 307, 371, 306, 344, 312, 368, 313, 318, 316, 369, 315, 319,
317, 322, 320, 324, 323, 326, 325, 329, 327, 331, 328, 333, 330,
334, 332, 336, 335, 339, 337, 340, 338, 342, 341, 345, 343, 351,
347), c(0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 18, 20, 20, 22,
24, 26, 28, 30, 32, 34, 34, 34, 38, 40, 42, 44, 46, 46, 48, 50,
52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82,
84, 88, 90, 92, 94, 96, 98, 102, 104, 106, 108, 110, 112, 114,
118, 118, 122, 124, 126, 128, 130, 130, 132, 134, 136, 138, 140,
142, 144, 146, 148, 150, 154, 156, 158, 162, 164, 164, 166, 168,
170, 172, 172, 174, 176, 178, 180, 182, 184, 186, 186, 188, 190,
192, 194, 196, 198, 198, 200, 202, 204, 206, 208, 210, 214, 214,
218, 220, 224, 226, 228, 230, 234, 236, 238, 240, 242, 244, 246,
250, 252, 252, 254, 256, 260, 262, 264, 266, 268, 270, 272, 276,
278, 280, 284, 286, 288, 290, 292, 294, 298, 298, 300, 302, 304,
306, 308, 310, 312, 314, 316, 318, 322, 326, 332, 336, 340, 346,
352, 358, 364, 368, 372), c(0, 4, 6, 10, 12, 14, 16, 18, 20,
22, 28, 34, 38, 44, 46, 50, 52, 54, 56, 58, 62, 66, 70, 70, 74,
76, 78, 80, 84, 86, 92, 96, 98, 100, 102, 106, 108, 110, 112,
116, 118, 120, 120, 122, 124, 126, 128, 130, 130, 132, 134, 136,
138, 140, 144, 146, 148, 150, 152, 154, 156, 156, 160, 160, 160,
162, 162, 162, 168, 174, 176, 178, 180, 184, 186, 188, 190, 192,
194, 198, 200, 204, 206, 208, 212, 214, 216, 218, 220, 224, 226,
228, 230, 232, 234, 236, 238, 242, 244, 246, 248, 250, 252, 254,
258, 260, 262, 264, 266, 268, 270, 272, 276, 278, 280, 282, 284,
286, 288, 290, 292, 294, 296, 298, 304, 306, 306, 308, 312, 314,
316, 318, 320, 322, 324, 326, 328, 330, 330, 332, 336, 336, 336,
340, 344, 346, 348, 348, 350, 352, 354, 356, 358, 360, 362, 364,
366, 368, 370, 370, 372, 372, 372, 372, 372, 372, 372, 372, 372,
372), list(c(1, 0, 1), structure(list(), .Names = character(0)),
structure(list(name = c("1", "2", "3", "4", "5", "6", "7",
"8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
"18", "19", "20", "21", "22", "23", "24", "25", "26", "27",
"28", "29", "30", "31", "32", "33", "34", "35", "36", "37",
"38", "39", "40", "41", "42", "43", "44", "45", "46", "47",
"48", "49", "50", "51", "52", "53", "54", "55", "56", "57",
"58", "59", "60", "61", "62", "63", "64", "65", "66", "67",
"68", "69", "70", "71", "72", "73", "74", "75", "76", "77",
"78", "79", "80", "81", "82", "83", "84", "85", "86", "87",
"88", "89", "90", "91", "92", "93", "94", "95", "96", "97",
"98", "99", "100", "101", "102", "103", "104", "105", "106",
"107", "108", "109", "110", "111", "112", "113", "114", "115",
"116", "117", "118", "119", "120", "121", "122", "123", "124",
"125", "126", "127", "128", "129", "130", "131", "132", "133",
"134", "135", "136", "137", "138", "139", "140", "141", "142",
"143", "144", "145", "146", "147", "148", "149", "150", "151",
"152", "153", "154", "155", "156", "157", "158", "159", "160",
"161", "162", "163", "164", "165", "166", "167", "168", "169"
)), .Names = "name"), structure(list(DIST_KM_CNT = c(4.89,
1.45, 2.36, 1.45, 2.36, 1.18, 0, 1.18, 0.89, 1.47, 0.89,
1.47, 1.16, 1.16, 1.2, 1.2, 1.02, 0.79, 1.02, 0, 0, 1, 0.79,
0, 0.98, 1.03, 1.15, 0, 1.03, 1.35, 0.95, 0, 0.99, 1.15,
0.99, 1.53, 0, 1.22, 1.53, 1.37, 1.22, 1.37, 1.23, 1.23,
1.1, 0, 1.1, 1.38, 1.69, 3.49, 3.16, 1.38, 4.89, 1.38, 3.49,
1.51, 0, 1.51, 1.39, 1.39, 1.78, 0.947, 1.78, 1.17, 2.12,
3.26, 1.17, 3.26, 1.43, 0, 0, 15.58, 1.11, 0.98, 1.09, 1.11,
1.09, 1.43, 1.43, 1.15, 1.11, 0, 1.15, 1.13, 1.11, 1.96,
1.13, 1.96, 1.86, 2.48, 1.86, 0, 1.44, 2.48, 1.44, 2.38,
2.38, 3.16, 2.41, 1.691, 2.41, 1.691, 1.54, 1.54, 1.65, 4.14,
1.65, 1.43, 4.14, 0.572, 0, 0.572, 0.455, 0.558, 0.455, 0.54,
0.558, 0.54, 0.682, 0.638, 0.682, 0.42, 0, 0.624, 0.42, 0.47,
0.624, 0.895, 0.47, 0.895, 0.493, 0.493, 0.703, 0.703, 0.553,
0.638, 0.553, 4.52, 1.94, 1.69, 1.38, 4.52, 0.947, 2.647,
2.647, 1.35, 0, 1.66, 0, 0, 1.05, 0, 1.66, 1.31, 1.05, 1.54,
1.31, 1.54, 1.72, 1.72, 1.24, 0, 1.24, 0.94, 1.57, 0.94,
1.15, 1.57, 1.15, 0.77, 0.95, 0.77, 0.95, 0, 0, 1.38, 0.6,
1.38, 11.42, 0.6, 0.72, 2.64, 0.72, 0, 2.64, 0, 0.82, 0.708,
0.467, 0.708, 0.59, 0.467, 0.59, 0.828, 0.828, 1.047, 0.77,
0.517, 0.517, 0.897, 0.897, 0.727, 0.602, 0.727, 0.481, 0.602,
0.481, 0.726, 0.602, 0.726, 0.92, 0.602, 0.986, 0.44, 0.44,
0.513, 0.548, 0.513, 0.548, 0.721, 0.513, 0.721, 0.513, 0.564,
0.564, 0.937, 0.412, 0.576, 0.542, 0.412, 0.567, 0.542, 0.567,
0.497, 0.426, 0.497, 0.379, 0.426, 0.379, 0.987, 0, 0, 0.614,
1.321, 1.327, 0.912, 0, 1.327, 1.735, 0.912, 1.735, 1.577,
0, 1.188, 1.577, 1.321, 1.017, 1.057, 1.017, 1.239, 1.057,
0, 0.732, 1.239, 0.732, 0.877, 0.877, 1.548, 0.816, 1.548,
0.806, 0, 11.5, 0.816, 0.806, 0.689, 0.689, 0.614, 1.188,
1.357, 2.496, 1.028, 1.028, 1.432, 0.93, 1.432, 0.93, 0,
0.794, 0.794, 0.811, 1.395, 0.811, 1.323, 1.395, 1.323, 1.385,
1.385, 0.774, 1.53, 0.774, 1.53, 0, 0.841, 0, 0.841, 1.317,
7.75, 2.496, 1.357, 11.5, 15.58, 0.75, 0.905, 7.75, 1.317,
0.89, 0.593, 0.89, 0.593, 0.555, 11.42, 0.555, 1.18, 1.18,
0.87, 0.87, 2.63, 1.21, 2.63, 1.6, 1.21, 1.26, 1.6, 1.26,
1.09, 1.09, 1.12, 1.58, 1.12, 1.58, 1.42, 1.42, 0.54, 0.75,
0.54, 1, 1.03, 0, 0.95, 0, 1.03, 0, 2.12, 1.94, 0, 0.986,
0, 0.937, 0, 0.82, 1.047, 0.92, 0.77, 0, 0, 0.987, 0.576,
0.905, 1.317, 0, 1.317)), .Names = "DIST_KM_CNT")), <environment>), class = "igraph")
K Shortest Path logic
# find k shortest paths
k.shortest.paths <- function(graph, from, to, k){
# first shortest path
k0 <- get.shortest.paths(graph,from,to, output='both')
# number of currently found shortest paths
kk <- 1
# list of alternatives
variants <- list()
# shortest variants
shortest.variants <- list(list(g=graph, path=k0$epath, vert=k0$vpath, dist=shortest.paths(graph,from,to)))
# until k shortest paths are found
while(kk<k){
# take last found shortest path
last.variant <- shortest.variants[[length(shortest.variants)]]
# calculate all alternatives
variants <- calculate.variants(variants, last.variant, from, to)
# find shortest alternative
sp <- select.shortest.path(variants)
# add to list, increase kk, remove shortest path from list of alternatives
shortest.variants[[length(shortest.variants)+1]] <- list(g=variants[[sp]]$g, path=variants[[sp]]$variants$path, vert=variants[[sp]]$variants$vert, dist=variants[[sp]]$variants$dist)
kk <- kk+1
variants <- variants[-sp]
}
return(shortest.variants)
}
# found all alternative routes
calculate.variants <- function(variants, variant, from, to){
# take graph from current path
g <- variant$g
# iterate through edges, removing one each iterations
for (j in unlist(variant$path)){
newgraph <- delete.edges(g, j) # remove adge
sp <- get.shortest.paths(newgraph,from,to, output='both') # calculate shortest path
spd <- shortest.paths(newgraph,from,to) # calculate length
if (spd != Inf){ # the the path is found
if (!contains.path(variants, sp$vpath)) # add to list, unless it already contains the same path
{
variants[[length(variants)+1]] <- list(g=newgraph, variants=list(path=sp$epath, vert=sp$vpath, dist=spd))
}
}
}
return(variants)
}
# does a list contain this path?
contains.path <- function(variants, variant){
return( any( unlist( lapply( variants, function(x){ identical(x$variant$vert,variant) } ) ) ) )
}
# which path from the list is the shortest?
select.shortest.path <- function(variants){
return( which.min( unlist( lapply( variants, function(x){x$variants$dist} ) ) ) )
}
The results are below with Same Path and and the distance computed is also not correct.I am not sure about where i am making the mistake
library(igraph)
k.shortest.paths(my.graph, from = 37, to = 8, k = 2)
[[1]]
[[1]]$g
IGRAPH UN-- 169 372 --
+ attr: name (v/c), DIST_KM_CNT (e/n)
+ edges (vertex names):
[1] 1 --23 1 --2 2 --3 1 --2 2 --3 3 --4 3 --115 3 --4 4 --5
[10] 5 --6 4 --5 5 --6 6 --7 6 --7 7 --8 7 --8 8 --9 9 --10
[19] 8 --9 10--111 10--79 10--160 9 --10 11--160 11--31 11--12 12--14
[28] 12--161 11--12 13--67 13--161 13--139 14--15 12--14 14--15 15--16
[37] 15--82 16--17 15--16 17--18 16--17 17--18 18--19 18--19 19--20
[46] 20--131 19--20 20--63 21--63 21--24 22--43 22--23 1 --23 22--23
[55] 21--24 24--25 24--162 24--25 25--26 25--26 26--27 27--65 26--27
[64] 28--29 28--162 29--30 28--29 29--30 30--48 30--49 30--54 31--143
+ ... omitted several edges
[[1]]$path
[[1]]$path[[1]]
+ 11/372 edges (vertex names):
[1] 36--37 35--36 34--35 33--34 32--33 31--32 11--31 11--160 10--160
[10] 9 --10 8 --9
[[1]]$vert
[[1]]$vert[[1]]
+ 12/169 vertices, named:
[1] 37 36 35 34 33 32 31 11 160 10 9 8
[[1]]$dist
8
37 11
[[2]]
[[2]]$g
IGRAPH UN-- 169 371 --
+ attr: name (v/c), DIST_KM_CNT (e/n)
+ edges (vertex names):
[1] 1 --23 1 --2 2 --3 1 --2 2 --3 3 --4 3 --115 3 --4 4 --5
[10] 5 --6 4 --5 5 --6 6 --7 6 --7 7 --8 7 --8 8 --9 9 --10
[19] 8 --9 10--111 10--79 10--160 9 --10 11--160 11--31 11--12 12--14
[28] 12--161 11--12 13--67 13--161 13--139 14--15 12--14 14--15 15--16
[37] 15--82 16--17 15--16 17--18 16--17 17--18 18--19 18--19 19--20
[46] 20--131 19--20 20--63 21--63 21--24 22--43 22--23 1 --23 22--23
[55] 21--24 24--25 24--162 24--25 25--26 25--26 26--27 27--65 26--27
[64] 28--29 28--162 29--30 28--29 29--30 30--48 30--49 30--54 31--143
+ ... omitted several edges
[[2]]$path
[[2]]$path[[1]]
+ 11/371 edges (vertex names):
[1] 36--37 35--36 34--35 33--34 32--33 31--32 11--31 11--160 10--160
[10] 9 --10 8 --9
[[2]]$vert
[[2]]$vert[[1]]
+ 12/169 vertices, named:
[1] 37 36 35 34 33 32 31 11 160 10 9 8
[[2]]$dist
8
37 11
I know this is like 2 years late but hopefully this will be useful for other people who needs an implementation of yen's algorithm in R.
library(igraph)
library(tidyverse)
#'#return the shortest path as a list of vertices or NULL if there is no path between src and dest
shortest_path <- function(graph, src, dest){
path <- suppressWarnings(get.shortest.paths(graph, src, dest))
path <- names(path$vpath[[1]])
if (length(path)==1) NULL else path
}
#'#return the sum of the weights of all the edges in the given path
path_weight <- function(path, graph) sum(E(graph, path=path)$weight)
#'#description sorts a list of paths based on the weight of the path
sort_paths <- function(graph, paths) paths[paths %>% sapply(path_weight, graph) %>% order]
#'#description creates a list of edges that should be deleted
find_edges_to_delete <- function(A,i,rootPath){
edgesToDelete <- NULL
for (p in A){
rootPath_p <- p[1:i]
if (all(rootPath_p == rootPath)){
edge <- paste(p[i], ifelse(is.na(p[i+1]),p[i],p[i+1]), sep = '|')
edgesToDelete[length(edgesToDelete)+1] <- edge
}
}
unique(edgesToDelete)
}
#returns the k shortest path from src to dest
#sometimes it will return less than k shortest paths. This occurs when the max possible number of paths are less than k
k_shortest_yen <- function(graph, src, dest, k){
if (src == dest) stop('src and dest can not be the same (currently)')
#accepted paths
A <- list(shortest_path(graph, src, dest))
if (k == 1) return (A)
#potential paths
B <- list()
for (k_i in 2:k){
prev_path <- A[[k_i-1]]
num_nodes_to_loop <- length(prev_path)-1
for(i in 1:num_nodes_to_loop){
spurNode <- prev_path[i]
rootPath <- prev_path[1:i]
edgesToDelete <- find_edges_to_delete(A, i,rootPath)
t_g <- delete.edges(graph, edgesToDelete)
#for (edge in edgesToDelete) t_g <- delete.edges(t_g, edge)
spurPath <- shortest_path(t_g,spurNode, dest)
if (!is.null(spurPath)){
total_path <- list(c(rootPath[-i], spurPath))
if (!total_path %in% B) B[length(B)+1] <- total_path
}
}
if (length(B) == 0) break
B <- sort_paths(graph, B)
A[k_i] <- B[1]
B <- B[-1]
}
A
}
#===================Test==========================#
edgeList <- tibble(from=character(), to=character(), weight = numeric())
edgeList[nrow(edgeList)+1,] <-list('c','d',3)
edgeList[nrow(edgeList)+1,] <-list('d','f',4)
edgeList[nrow(edgeList)+1,] <-list('f','h',1)
edgeList[nrow(edgeList)+1,] <-list('c','e',2)
edgeList[nrow(edgeList)+1,] <-list('e','d',1)
edgeList[nrow(edgeList)+1,] <-list('e','f',2)
edgeList[nrow(edgeList)+1,] <-list('e','g',3)
edgeList[nrow(edgeList)+1,] <-list('g','h',2)
edgeList[nrow(edgeList)+1,] <-list('f','g',2)
graph <- graph.data.frame(edgeList)
#k_shortest.yen(graph, 'c','c',7) #expect error
#expect all 7 paths
k_shortest_yen(graph,'c','h',7)
I had the same problem and then i noticed that there are a error in the code. The function identical in function contains.path were not returning the correct value. I simply changed the code of identical(x$variant$vert,variant) to identical(unlist(x$variant$vert),unlist(variant)). And now the code is reporting all routings and no duplicates are present.

Resources