Remove steps in time series - r
I am analyzing lizards increase of temperature. Problem is that the animals sometime move moving the thermometer we are using for the measurements. This results in spikes (few measurements extremely high or low) and steps (sudden offsets of the curve). I wanted to clean my curves from this noise, removing the spikes and aligning the two parts of the curve separated by each step. I guess the spikes can be easily smoothed but what about the steps?
Examples of time series with steps marked in yellow and spikes in red
This is a sample from my dataframe
structure(list(ID = structure(c(183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L), .Label = c("101", "102", "104", "106", "107",
"109", "110", "111", "112", "113", "114", "115", "116", "118",
"119", "121", "122", "123", "124", "125", "126", "128", "129",
"130", "132", "133", "134", "157", "158", "161", "163", "164",
"165", "166", "167", "168", "169", "170", "171", "172", "173",
"174", "175", "177", "178", "179", "180", "181", "182", "183",
"186", "187", "188", "189", "191", "192", "193", "194", "195",
"196", "198", "199", "201", "202", "203", "204", "205", "207",
"208", "209", "210", "211", "213", "214", "215", "218", "219",
"220", "221", "222", "223", "224", "225", "226", "227", "228",
"229", "230", "233", "235", "307", "308", "310", "311", "312",
"313", "314", "315", "316", "317", "318", "319", "320", "321",
"322", "323", "324", "325", "326", "327", "328", "329", "330",
"331", "332", "333", "334", "335", "336", "339", "340", "341",
"343", "344", "346", "347", "348", "349", "350", "351", "352",
"353", "354", "355", "356", "357", "358", "58", "59", "60", "61",
"62", "63", "64", "65", "66", "67", "68", "69", "70", "71", "72",
"73", "74", "75", "76", "78", "79", "80", "81", "82", "83", "84",
"85", "86", "87", "88", "89", "90", "91", "92", "93", "94", "95",
"96", "97", "98", "99", "F04000017", "F04001300", "F04060000",
"F04070000", "F04080000", "F05000017", "F05000020", "F05060000",
"F05070000", "FSUM", "M03100000", "M04000016", "M04090000", "M04100000",
"M05000016", "M05000018", "M05080000", "M05090000", "M05100000",
"MSUM"), class = "factor"), TIME = c(1600, 0, 180, 280, 1270,
190, 1570, 100, 630, 110, 1530, 790, 90, 650, 910, 1520, 460,
270, 710, 1240, 1610, 550, 470, 1320, 360, 1220, 860, 540, 290,
1330, 1180, 170, 1700, 990, 1060, 420, 10, 330, 1110, 1160, 890,
260, 620, 1140, 1420, 380, 300, 1650, 1430, 200, 490, 1590, 150,
430, 720, 950, 730, 1660, 1260, 740, 810, 780, 1370, 960, 1130,
1210, 1670, 1010, 760, 1280, 1450, 770, 1070, 1000, 1030, 690,
80, 1040, 1050, 160, 1350, 1230, 1630, 640, 1390, 1460, 1100,
850, 1120, 390, 1170, 980, 320, 590, 520, 1090, 560, 870, 1020,
750, 140, 700, 70, 1500, 340, 1290, 530, 840, 230, 370, 1440,
1200, 1480, 30, 1400, 210, 1300, 480, 450, 1580, 1560, 410, 1360,
900, 670, 1540, 50, 500, 400, 1380, 930, 580, 1680, 600, 1340,
240, 1190, 570, 1640, 940, 350, 1550, 830, 310, 880, 1080, 920,
1490, 610, 220, 1470, 800, 820, 40, 1250, 130, 60, 680, 20, 970,
1620, 440, 1690, 120, 510, 1410, 660, 250, 1310, 1150, 1510,
1230, 1540, 320, 1380, 680, 410, 480, 580, 130, 670, 1200, 1900,
1410, 1470, 1950, 400, 870, 970, 1740, 500, 1300, 1220, 240,
660, 1100, 20, 490, 590, 900, 820, 1910, 920, 690, 1290, 1710,
750, 1860, 1840, 880, 1250, 2070, 640, 1580, 1310, 800, 1000,
2090, 1760, 330, 760, 40, 600, 1800, 200, 1730, 2080, 470, 30,
1490, 2030, 860, 1620, 250, 1880, 790, 90, 360, 1520, 460, 180,
1770, 1450, 1940, 1370, 1180, 290, 2060, 1240, 1500, 1480, 510,
980, 1610, 1630, 950, 1980, 990, 140, 60, 110, 1680, 10, 550,
1700, 1750, 1270, 1690, 230, 2010, 1070, 910, 300, 1850, 1140,
1350, 930, 170, 1650, 1170, 1820, 1040, 1420, 120, 1150, 50,
960, 1560, 1790, 630, 280, 740, 1890, 2020, 370, 2040, 2050,
310, 380, 1920, 340, 1030, 430, 1330, 1670, 1960, 830, 1590,
420, 1400, 560, 1360, 1460, 1440, 1120, 1870, 1720, 1130, 730,
390, 850, 520, 1090, 1550, 100, 1930, 1010, 440, 210, 1390, 720,
1190, 450, 770, 1320, 220, 1640, 570, 2100, 1210, 650, 2000,
840, 1340, 1830, 530, 1110, 1260, 890, 700, 80, 810, 1060, 260,
1510, 1600, 70, 1430, 1280, 1530, 1020, 540, 940, 1050, 710,
1660, 1570, 270, 610, 620, 780, 1780, 350, 1970, 0, 1160, 1990,
190, 150, 1080, 160, 1810), LIZ = c(33.88628, 16.01848, 19.07537,
20.7521, 31.20864, 19.28933, 33.81645, 17.74926, 25.17476, 17.96775,
35.05947, 27.00651, 17.59508, 25.4069, 28.208, 34.99299, 22.88728,
20.59275, 26.18355, 31.03587, 33.947, 24.10997, 23.02703, 33.7095,
22.17608, 30.91595, 27.75973, 23.97134, 20.93754, 33.80886, 30.68742,
18.91375, 33.92175, 29.09865, 29.63962, 22.70535, 16.21696, 21.63548,
30.08059, 30.53127, 27.98175, 20.43409, 25.05877, 30.36036, 34.46184,
22.44901, 21.10243, 33.94136, 34.52333, 19.46398, 23.28305, 33.80731,
18.61317, 22.47734, 26.25653, 28.72091, 26.38854, 33.93099, 31.15667,
26.51461, 27.30298, 26.90585, 34.10386, 28.83213, 30.1994, 30.83049,
33.89071, 29.26265, 26.67329, 32.40561, 34.64661, 26.80018, 29.67405,
29.16813, 29.42029, 25.87777, 17.42933, 29.50874, 29.60253, 18.75925,
33.97794, 30.99272, 33.94607, 25.2893, 34.24567, 34.68945, 30.0688,
27.65221, 30.13411, 22.59461, 30.63778, 28.96221, 21.41397, 24.58403,
23.68083, 29.85481, 24.22983, 27.8342, 29.36583, 26.60143, 18.46704,
26.03891, 17.26645, 34.9014, 21.84469, 33.39574, 23.82, 27.57049,
19.93821, 22.30472, 34.60674, 30.77051, 34.79948, 16.55761, 34.33158,
19.62092, 33.46234, 23.15645, 22.68871, 33.79304, 33.88377, 22.99619,
34.04042, 28.1129, 25.6251, 34.21122, 16.89074, 23.43516, 22.78885,
34.17764, 28.47507, 24.45472, 33.8742, 24.72735, 33.90453, 20.10577,
30.73417, 24.35771, 33.93048, 28.62741, 21.99781, 33.97049, 27.48719,
21.25535, 27.92438, 29.80396, 28.35731, 34.8615, 24.93956, 19.783,
34.76926, 27.19699, 27.40897, 16.72252, 31.08047, 18.32881, 17.0416,
25.72955, 16.38935, 28.8926, 33.97636, 22.53829, 33.86432, 18.22302,
23.55972, 34.38583, 25.50873, 20.27311, 33.6045, 30.43855, 34.93784,
35.288345, 36.257075, 25.391195, 36.09727, 30.813695, 27.044815,
28.04567, 29.548165, 20.606055, 30.69551, 35.129335, 37.32639,
36.37374, 36.308585, 37.41784, 26.86707, 32.548155, 33.36402,
37.37834, 28.385865, 35.88449, 35.231535, 23.53661, 30.580155,
34.58997, 16.9712, 28.21824, 29.69837, 32.803, 32.16565, 37.3345,
32.96087, 30.917135, 35.835545, 37.40881, 31.57274, 37.28267,
37.3545, 32.635805, 35.639945, 37.55586, 30.322245, 36.361875,
35.93363, 32.083515, 33.9122, 37.57389, 37.38447, 25.60093, 31.70867,
17.74926, 29.83628, 37.32308, 22.486975, 37.37677, 37.54575,
27.8517, 17.37036, 36.13542, 37.58466, 32.4728, 36.84912, 23.79395,
37.2999, 32.03574, 19.402135, 27.835535, 36.185425, 27.68629,
21.91643, 37.37574, 36.612895, 37.38406, 36.11211, 35.03594,
24.74032, 37.55308, 35.4775, 36.152685, 36.125095, 28.51215,
33.614975, 36.71473, 36.859965, 33.190425, 37.57786, 33.82105,
20.911715, 18.444165, 19.9942, 36.89602, 16.551, 29.161815, 37.4295,
37.38779, 35.75298, 36.91141, 23.29514, 37.59916, 34.393665,
32.88079, 24.885135, 37.30579, 34.812115, 36.05718, 33.03272,
21.730805, 36.877595, 34.980885, 37.34123, 34.19463, 36.436875,
20.27451, 34.880975, 18.10635, 33.268755, 36.24434, 37.33012,
30.18474, 24.530635, 31.445215, 37.32058, 37.6003, 36.417575,
37.57818, 37.56227, 25.15269, 26.548085, 37.34238, 25.799625,
34.12894, 27.14156, 36.0024, 36.89349, 37.48383, 32.23698, 36.386895,
27.013425, 36.295295, 29.29934, 36.091595, 36.642735, 36.5601,
34.706135, 37.27491, 37.38146, 34.758915, 31.35675, 26.709435,
32.374665, 28.69313, 34.52595, 36.22561, 19.717425, 37.35049,
33.991875, 27.288855, 22.76699, 36.179625, 31.25419, 35.08752,
27.507965, 31.821405, 35.97357, 23.037415, 36.86924, 29.408195,
37.60048, 35.18184, 30.45653, 37.58567, 32.27861, 36.04241, 37.34733,
28.86293, 34.64831, 35.684655, 32.71743, 31.01652, 19.07221,
32.1161, 34.32663, 24.059185, 36.16283, 36.413085, 18.77961,
36.49902, 35.7932, 36.214135, 34.05778, 29.01832, 33.111965,
34.2606, 31.14351, 36.88914, 36.326515, 24.30303, 29.97399, 30.103995,
31.92968, 37.36615, 26.01068, 37.52906, 16.12443, 34.929045,
37.59149, 22.175545, 21.19651, 34.4613, 21.473975, 37.33004),
COP = c(22.39478, 22.68049, 22.27224, 22.19685, 22.16573,
22.26254, 22.34891, 22.41931, 22.15782, 22.39973, 22.31492,
22.13082, 22.44822, 22.16036, 22.13963, 22.3111, 22.15529,
22.20868, 22.14849, 22.16546, 22.40751, 22.15873, 22.15807,
22.15667, 22.17006, 22.16576, 22.13619, 22.15633, 22.18792,
22.15968, 22.15784, 22.28282, 22.39207, 22.15544, 22.15307,
22.15378, 22.65061, 22.18484, 22.14693, 22.1517, 22.14945,
22.22223, 22.15782, 22.15069, 22.18498, 22.17095, 22.18882,
22.44182, 22.18867, 22.25752, 22.16363, 22.37156, 22.33576,
22.14907, 22.15658, 22.15133, 22.1547, 22.43926, 22.16738,
22.15422, 22.13264, 22.14114, 22.17317, 22.15817, 22.14417,
22.16643, 22.43323, 22.14401, 22.14921, 22.15957, 22.22265,
22.14089, 22.14463, 22.14593, 22.14691, 22.14661, 22.47322,
22.15314, 22.15694, 22.3083, 22.16482, 22.16823, 22.42285,
22.16284, 22.17225, 22.2391, 22.1465, 22.12234, 22.14539,
22.15492, 22.14823, 22.15392, 22.18663, 22.14773, 22.16047,
22.14056, 22.15791, 22.14152, 22.14251, 22.1509, 22.35001,
22.14744, 22.49713, 22.28185, 22.18089, 22.15846, 22.15929,
22.12327, 22.23335, 22.16951, 22.20456, 22.16193, 22.25876,
22.60373, 22.17513, 22.25065, 22.15608, 22.16255, 22.14552,
22.36192, 22.3395, 22.15159, 22.16513, 22.14421, 22.15804,
22.31474, 22.5485, 22.16658, 22.15816, 22.16993, 22.14259,
22.14321, 22.4235, 22.16097, 22.16565, 22.22893, 22.16082,
22.14911, 22.43463, 22.14959, 22.16988, 22.32926, 22.12439,
22.18906, 22.14787, 22.143, 22.14448, 22.26607, 22.16002,
22.24653, 22.25201, 22.13589, 22.13117, 22.578, 22.16947,
22.36273, 22.51722, 22.15239, 22.62479, 22.15932, 22.41646,
22.14778, 22.41367, 22.38414, 22.16356, 22.18521, 22.16367,
22.22729, 22.16007, 22.15065, 22.29892, 21.72296, 21.62249,
22.017515, 21.693785, 21.70153, 21.87866, 21.81236, 21.732425,
22.36322, 21.70375, 21.72846, 21.91553, 21.68792, 21.660505,
22.00908, 21.89052, 21.763825, 21.760305, 21.83179, 21.79684,
21.71059, 21.726415, 22.149185, 21.70785, 21.75106, 22.571055,
21.804205, 21.7291, 21.761135, 21.747215, 21.92869, 21.76272,
21.705115, 21.712995, 21.83027, 21.708885, 21.83844, 21.84054,
21.768415, 21.71803, 22.11175, 21.70465, 21.60251, 21.70955,
21.742635, 21.753505, 22.13507, 21.82251, 21.999255, 21.717505,
22.53302, 21.72247, 21.82229, 22.22044, 21.829, 22.12267,
21.82473, 22.55087, 21.6558, 22.09163, 21.76516, 21.593285,
22.13412, 21.86871, 21.73596, 22.43231, 21.943965, 21.636055,
21.836155, 22.26584, 21.82428, 21.672995, 21.98196, 21.694165,
21.73181, 22.066775, 22.10273, 21.71976, 21.64993, 21.65895,
21.788935, 21.759365, 21.59079, 21.59272, 21.762905, 22.04811,
21.755645, 22.34494, 22.4932, 22.397445, 21.59399, 22.582895,
21.750065, 21.84207, 21.82297, 21.712635, 21.593685, 22.164485,
22.07698, 21.7517, 21.760385, 22.04753, 21.8382, 21.741645,
21.69029, 21.759415, 22.28582, 21.590015, 21.73355, 21.83193,
21.75102, 21.68036, 22.378585, 21.73784, 22.51333, 21.75792,
21.609515, 21.82392, 21.70592, 22.084645, 21.708, 21.89372,
22.08329, 21.93241, 22.10217, 22.10085, 22.030495, 21.91551,
21.94469, 21.979055, 21.75095, 21.858405, 21.698245, 21.59609,
22.02914, 21.755635, 21.593795, 21.86841, 21.689295, 21.739825,
21.691495, 21.66596, 21.674615, 21.74691, 21.84458, 21.82984,
21.741525, 21.710355, 21.90157, 21.762305, 21.77891, 21.750345,
21.61608, 22.41471, 21.95989, 21.7551, 21.84604, 22.198325,
21.691115, 21.709805, 21.732425, 21.84097, 21.723055, 21.70937,
22.179165, 21.58997, 21.732705, 22.13691, 21.725925, 21.70574,
22.06154, 21.755495, 21.692555, 21.83686, 21.77328, 21.74777,
21.716135, 21.761175, 21.709325, 22.451175, 21.749225, 21.750545,
22.117925, 21.636325, 21.59097, 22.47463, 21.67836, 21.715015,
21.63479, 21.75122, 21.762415, 21.764115, 21.751305, 21.70731,
21.591635, 21.606505, 22.09898, 21.71826, 21.71403, 21.727555,
21.81842, 21.963065, 22.03673, 22.597065, 21.73344, 22.05635,
22.243885, 22.325185, 21.75266, 22.3091, 21.82619), BLK = c(28.64989,
15.80412, 20.22131, 22.01231, 28.0591, 20.41488, 28.57393,
18.42224, 25.81396, 18.66597, 28.51571, 26.66224, 18.17439,
25.95201, 27.20529, 28.51703, 24.38048, 21.85311, 26.30937,
28.02257, 28.6773, 25.22704, 24.48749, 28.1236, 23.18583,
27.98485, 27.00689, 25.14024, 22.16927, 28.1335, 27.89662,
20.00687, 28.67155, 27.44175, 27.62627, 23.92489, 16.09932,
22.78544, 27.75227, 27.83675, 27.13084, 21.69284, 25.74085,
27.80579, 28.28594, 23.44893, 22.33534, 28.7166, 28.29154,
20.61311, 24.68904, 28.60215, 19.59335, 24.03528, 26.36296,
27.34839, 26.40381, 28.7205, 28.04637, 26.45016, 26.76595,
26.62852, 28.18898, 27.38003, 27.79384, 27.96629, 28.71652,
27.47686, 26.54292, 28.06953, 28.37088, 26.5854, 27.64778,
27.45691, 27.54817, 26.17697, 17.91728, 27.57453, 27.59667,
19.79719, 28.15391, 28.01052, 28.70145, 25.8918, 28.19863,
28.40318, 27.73245, 26.95575, 27.77656, 23.56018, 27.86381,
27.42115, 22.64232, 25.52273, 24.96468, 27.69186, 25.29434,
27.05567, 27.51177, 26.4991, 19.37665, 26.24547, 17.66023,
28.45211, 22.92771, 28.08868, 25.05819, 26.9147, 21.16986,
23.31623, 28.31888, 27.94109, 28.41742, 16.63277, 28.21869,
20.81001, 28.10906, 24.58388, 24.26295, 28.59208, 28.56833,
23.80437, 28.16685, 27.16407, 26.06582, 28.53314, 17.15336,
24.78023, 23.69012, 28.19161, 27.27431, 25.4426, 28.70709,
25.60821, 28.15239, 21.35028, 27.92147, 25.36057, 28.71294,
27.31714, 23.05455, 28.55633, 26.8609, 22.49237, 27.09901,
27.67287, 27.23569, 28.43207, 25.6761, 20.99499, 28.41302,
26.72662, 26.81219, 16.89905, 28.04126, 19.14027, 17.39888,
26.11992, 16.37336, 27.39998, 28.69902, 24.15099, 28.69344,
18.90929, 24.86793, 28.25387, 26.01419, 21.52158, 28.12068,
27.81882, 28.48342, 27.96163, 28.197645, 22.70333, 28.117945,
26.073955, 23.85303, 24.593665, 25.413525, 19.11083, 26.00622,
27.90615, 27.98023, 28.14475, 28.161525, 28.08333, 23.7368,
27.01432, 27.341995, 27.83233, 24.77127, 28.08601, 27.96112,
21.39113, 25.949465, 27.68429, 15.858655, 24.68182, 25.48676,
27.12372, 26.823595, 27.98649, 27.17823, 26.13994, 28.074105,
27.80747, 26.44518, 27.88772, 27.88865, 27.054655, 27.983055,
28.14709, 25.80761, 28.22729, 28.087765, 26.741425, 27.415515,
28.16615, 27.82628, 22.852945, 26.515965, 16.55507, 25.558165,
27.86497, 20.59904, 27.82338, 28.15537, 24.503635, 16.208215,
28.18434, 28.12301, 26.98152, 28.238175, 21.57631, 27.93972,
26.69019, 18.11697, 23.249475, 28.194915, 24.400325, 20.18436,
27.83363, 28.152575, 28.05914, 28.1046, 27.87944, 22.246695,
28.13895, 27.97113, 28.17759, 28.17134, 24.852555, 27.36403,
28.231645, 28.240475, 27.28467, 28.09842, 27.38207, 19.33762,
17.208895, 18.63795, 28.32022, 15.478795, 25.178505, 27.81531,
27.83335, 28.025865, 28.32901, 21.198015, 28.12642, 27.594495,
27.149095, 22.40141, 27.87721, 27.77537, 28.07577, 27.214415,
19.976595, 28.24768, 27.87019, 27.87417, 27.512335, 28.1487,
18.87582, 27.808485, 16.88221, 27.304755, 28.22269, 27.85485,
25.751105, 22.085525, 26.402235, 27.96385, 28.12362, 23.3511,
28.13134, 28.13854, 22.55042, 23.483435, 27.99603, 22.988955,
27.479825, 24.074565, 28.06897, 28.300515, 28.08899, 26.850145,
28.21175, 23.961985, 28.148655, 25.25804, 28.084975, 28.15017,
28.142015, 27.74106, 27.89856, 27.81585, 27.762185, 26.34704,
23.61241, 26.9402, 24.945815, 27.660965, 28.20952, 18.388425,
28.03487, 27.44823, 24.182, 20.80328, 28.138395, 26.294595,
27.8952, 24.294155, 26.569235, 28.076645, 21.003065, 28.221055,
25.336215, 28.17753, 27.934355, 25.886285, 28.10892, 26.890965,
28.06394, 27.89246, 25.030615, 27.71521, 27.994955, 27.091235,
26.189505, 17.829845, 26.78325, 27.566845, 21.749355, 28.181235,
28.216485, 17.523135, 28.145175, 28.05206, 28.198345, 27.46945,
25.099975, 27.266245, 27.54595, 26.236675, 28.27499, 28.230255,
21.91532, 25.631615, 25.69132, 26.62857, 27.83423, 23.119855,
28.08352, 15.10413, 27.84174, 28.10417, 20.39175, 19.55529,
27.62871, 19.76947, 27.87254)), row.names = c(39410L, 39411L,
39412L, 39413L, 39414L, 39415L, 39416L, 39417L, 39418L, 39419L,
39420L, 39421L, 39422L, 39423L, 39424L, 39425L, 39426L, 39427L,
39428L, 39429L, 39430L, 39431L, 39432L, 39433L, 39434L, 39435L,
39436L, 39437L, 39438L, 39439L, 39440L, 39441L, 39442L, 39443L,
39444L, 39445L, 39446L, 39447L, 39448L, 39449L, 39450L, 39451L,
39452L, 39453L, 39454L, 39455L, 39456L, 39457L, 39458L, 39459L,
39460L, 39461L, 39462L, 39463L, 39464L, 39465L, 39466L, 39467L,
39468L, 39469L, 39470L, 39471L, 39472L, 39473L, 39474L, 39475L,
39476L, 39477L, 39478L, 39479L, 39480L, 39481L, 39482L, 39483L,
39484L, 39485L, 39486L, 39487L, 39488L, 39489L, 39490L, 39491L,
39492L, 39493L, 39494L, 39495L, 39496L, 39497L, 39498L, 39499L,
39500L, 39501L, 39502L, 39503L, 39504L, 39505L, 39506L, 39507L,
39508L, 39509L, 39510L, 39511L, 39512L, 39513L, 39514L, 39515L,
39516L, 39517L, 39518L, 39519L, 39520L, 39521L, 39522L, 39523L,
39524L, 39525L, 39526L, 39527L, 39528L, 39529L, 39530L, 39531L,
39532L, 39533L, 39534L, 39535L, 39536L, 39537L, 39538L, 39539L,
39540L, 39541L, 39542L, 39543L, 39544L, 39545L, 39546L, 39547L,
39548L, 39549L, 39550L, 39551L, 39552L, 39553L, 39554L, 39555L,
39556L, 39557L, 39558L, 39559L, 39560L, 39561L, 39562L, 39563L,
39564L, 39565L, 39566L, 39567L, 39568L, 39569L, 39570L, 39571L,
39572L, 39573L, 39574L, 39575L, 39576L, 39577L, 39578L, 39579L,
39580L, 41926L, 41927L, 41928L, 41929L, 41930L, 41931L, 41932L,
41933L, 41934L, 41935L, 41936L, 41937L, 41938L, 41939L, 41940L,
41941L, 41942L, 41943L, 41944L, 41945L, 41946L, 41947L, 41948L,
41949L, 41950L, 41951L, 41952L, 41953L, 41954L, 41955L, 41956L,
41957L, 41958L, 41959L, 41960L, 41961L, 41962L, 41963L, 41964L,
41965L, 41966L, 41967L, 41968L, 41969L, 41970L, 41971L, 41972L,
41973L, 41974L, 41975L, 41976L, 41977L, 41978L, 41979L, 41980L,
41981L, 41982L, 41983L, 41984L, 41985L, 41986L, 41987L, 41988L,
41989L, 41990L, 41991L, 41992L, 41993L, 41994L, 41995L, 41996L,
41997L, 41998L, 41999L, 42000L, 42001L, 42002L, 42003L, 42004L,
42005L, 42006L, 42007L, 42008L, 42009L, 42010L, 42011L, 42012L,
42013L, 42014L, 42015L, 42016L, 42017L, 42018L, 42019L, 42020L,
42021L, 42022L, 42023L, 42024L, 42025L, 42026L, 42027L, 42028L,
42029L, 42030L, 42031L, 42032L, 42033L, 42034L, 42035L, 42036L,
42037L, 42038L, 42039L, 42040L, 42041L, 42042L, 42043L, 42044L,
42045L, 42046L, 42047L, 42048L, 42049L, 42050L, 42051L, 42052L,
42053L, 42054L, 42055L, 42056L, 42057L, 42058L, 42059L, 42060L,
42061L, 42062L, 42063L, 42064L, 42065L, 42066L, 42067L, 42068L,
42069L, 42070L, 42071L, 42072L, 42073L, 42074L, 42075L, 42076L,
42077L, 42078L, 42079L, 42080L, 42081L, 42082L, 42083L, 42084L,
42085L, 42086L, 42087L, 42088L, 42089L, 42090L, 42091L, 42092L,
42093L, 42094L, 42095L, 42096L, 42097L, 42098L, 42099L, 42100L,
42101L, 42102L, 42103L, 42104L, 42105L, 42106L, 42107L, 42108L,
42109L, 42110L, 42111L, 42112L, 42113L, 42114L, 42115L, 42116L,
42117L, 42118L, 42119L, 42120L, 42121L, 42122L, 42123L, 42124L,
42125L, 42126L, 42127L, 42128L, 42129L, 42130L, 42131L, 42132L,
42133L, 42134L, 42135L, 42136L), class = "data.frame")
My problem is actually very similar to the one that this person had in python
Remove jumps like peaks and steps in timeseries but I haven't been able to find something similar for R
EDIT: I actually found something similar R-related in this question https://stats.stackexchange.com/questions/139660/detecting-changes-in-time-series-r-example?newreg=f119230044de4802a9f0f6f4e4637d8f
The solution using tsoutliers looks applicable to my problem but so far it didn't work very well. I am not sure about what I am doing wrong.
tso(dat.ts, types = c("LS","TC"))
flattens completely my curve
tso(dat.ts, types = c("LS","TC"), discard.method = "bottom-up")
Moves my curve in the right direction but not enough to fix the problem.
Any approach using the bottom-up method to discard outliers gives this result, any approac using the en-masse approach flattens the curve.
There are a couple methods attempted in the code below attempting to capture the changes: by percent change and by using a rolling median method.
First import a couple useful libraries. Also changing your data into a tibble called 'lizard_data'
library(tidyverse)
library(RcppRoll)
lizard_data <- tibble(your_data)
Then using ggplot2, we can visualize all the time series.
lizard_data %>%
pivot_longer(names_to = 'key', values_to = 'value', cols = c(LIZ, COP, BLK)) %>%
arrange(TIME) %>%
ggplot(aes(x = TIME, y = value, color = key))+
geom_line()+
facet_wrap(~ID, nrow = 2)
Then we remove the adjustments by finding the delta, and we can use two different methods to smooth the time series. First, with a percentile method (changing the top 5% and bottom 5% of values to the median) and second, with the rolling median (if the max change is ten percent higher or lower than the median, replacing the value with the median).
ld_w_change <- lizard_data %>%
pivot_longer(names_to = 'key', values_to = 'value', cols = c(LIZ, COP, BLK)) %>%
group_by(ID, key) %>%
arrange(TIME) %>%
mutate(lag = lag(value),
raw_change = (value-lag),
#using the percentile change method
med_raw_change = median(raw_change, na.rm = T),
q_05 = quantile(raw_change,.05, na.rm = T),
q_95 = quantile(raw_change,.95, na.rm = T),
adj_raw_change = if_else(raw_change > q_95 | raw_change < q_05, med_raw_change, raw_change),
normalized_change = if_else(is.na(adj_raw_change), 0,adj_raw_change),
initial_value = first(value),
roll_raw_change = cumsum(normalized_change),
new_value_pct = initial_value + roll_raw_change,
# using the rolling median method
rolling_median_change = roll_median(raw_change, n = 5, align = "right", na.rm = T, fill = NA),
adj_median_change = case_when(raw_change > rolling_median_change*1.1 | raw_change < rolling_median_change*.9 ~ rolling_median_change,
is.na(rolling_median_change) & is.na(raw_change) ~ 0,
T ~ raw_change),
normalized_med_change = cumsum(adj_median_change),
new_value_roll = initial_value + normalized_med_change
)
I personally prefer the median adjustment, it seems to preserve the shape of the data better than the percentile method. To compare these methods visually, we can plot them side-by-side:
ld_w_change %>%
pivot_longer(names_to = 'method', values_to = 'adjusted_temp', cols = c(new_value_pct, new_value_roll)) %>%
ggplot(aes(x = TIME, y =adjusted_temp, color = key))+
geom_line()+
facet_wrap(ID ~ method, nrow = 2)
Related
Creating window intervals of the positions in R
I am creating an interval list of the positions, by window 4 and step 1. pos <- subs$variable intervals <- paste0(pos[seq(1, n, by=1)],":", pos[seq(4, n, by=1)]) intervals I get are: [1] "92:107" "101:120" "106:132" "107:136" "120:140" "132:146" "136:147" "140:152" "146:166" "147:167" "152:174" [12] "166:186" "167:187" "174:189" "186:204" "187:228" "189:229" "204:107" "228:120" "229:132" subs <- structure(list(variable = c(92, 101, 106, 107, 120, 132, 136, 140, 146, 147, 152, 166, 167, 174, 186, 187, 189, 204, 228, 229 ), covMean = c(11355.658, 11450.079, 11479.711, 11495.132, 11612.053, 11580.158, 11421.684, 11288.105, 11278, 11239.763, 11236.895, 10425.526, 10386.789, 10233.816, 9523.132, 9503.316, 9450.158, 8532.763, 7795.368, 7656.895), emboss = c(1.3717151, 1.3828546, 1.3880071, 1.3879077, 1.3959816, 1.3830276, 1.3724465, 1.363247, 1.337794, 1.3334625, 1.3097811, 1.2826296, 1.2811749, 1.2676601, 1.2193303, 1.2136416, 1.2044259, 1.1649542, 1.1163399, 1.1125204 )), row.names = c(56L, 62L, 65L, 67L, 77L, 82L, 87L, 95L, 97L, 114L, 119L, 133L, 154L, 156L, 169L, 173L, 186L, 190L, 212L, 214L ), class = "data.frame") Why it starts iterating from the beginning, I want it to stop at the last position.
seq(1, n, by=1) and seq(4, n, by=1) are not of same length, the values are recycled. Try : n <- length(pos) intervals <- paste(pos[1:(n-3)],pos[4:n], sep =':') intervals # [1] "92:107" "101:120" "106:132" "107:136" "120:140" "132:146" "136:147" "140:152" "146:166" #[10] "147:167" "152:174" "166:186" "167:187" "174:189" "186:204" "187:228" "189:229"
How to find a specific point on a model in R?
I am working with a CSV dataset called combinedDataset, which I found a model for using k-fold validation procedure. My x value for the model is hour meter reading while my y value is cumulative cost. Here's a dput of my combinedDataset: structure(list(Unit.ID = c(925L, 967L, 1054L, 967L, 1054L, 967L, 1160L, 1054L, 1160L, 967L, 967L, 1054L, 1160L, 967L, 1054L, 1160L, 967L, 1160L, 1054L, 1054L, 967L, 1160L, 1054L, 967L, 1160L, 1054L, 967L, 1160L, 1054L, 164L, 967L, 967L, 1160L, 1054L, 164L, 967L, 164L, 1160L, 164L, 1054L, 967L, 164L, 1054L, 967L, 1054L, 164L, 967L, 164L, 164L, 1054L, 967L, 164L, 967L, 164L, 1054L, 164L, 925L, 164L, 967L, 1054L, 1054L, 925L, 925L, 164L, 165L, 164L, 1054L, 967L, 164L, 165L, 967L, 164L, 164L, 165L, 1054L, 967L, 967L, 165L, 164L, 1054L, 967L, 165L, 967L, 165L, 164L, 967L, 164L, 967L, 164L, 967L, 164L, 967L, 164L, 1054L, 164L, 164L, 164L, 164L, 164L, 164L, 164L), Hour.Meter.Reading = c(34L, 381L, 532L, 600L, 732L, 783L, 796L, 947L, 1016L, 1038L, 1200L, 1282L, 1290L, 1388L, 1481L, 1528L, 1579L, 1671L, 1704L, 1728L, 1755L, 1906L, 1926L, 1936L, 2031L, 2063L, 2136L, 2205L, 2293L, 2321L, 2342L, 2382L, 2425L, 2505L, 2524L, 2576L, 2704L, 2731L, 2777L, 2811L, 2873L, 2960L, 2997L, 3080L, 3170L, 3175L, 3264L, 3371L, 3386L, 3425L, 3485L, 3570L, 3690L, 3740L, 3746L, 3854L, 3863L, 3976L, 3990L, 3991L, 4078L, 4103L, 4106L, 4138L, 4138L, 4216L, 4249L, 4253L, 4305L, 4326L, 4353L, 4483L, 4489L, 4489L, 4500L, 4580L, 4581L, 4652L, 4721L, 4742L, 4784L, 4805L, 4828L, 4943L, 4947L, 4954L, 4968L, 5298L, 5316L, 5407L, 5533L, 5628L, 5712L, 5747L, 5951L, 6165L, 6194L, 6439L, 6636L, 6702L, 6918L), Labour.Cost = c(1102.5, 4270, 542.5, 2730, 682.5, 3097.5, 336, 871.5, 525, 2695, 1837.5, 1092, 1995, 2572.5, 1092, 924, 840, 1575, 693, 693, 560, 2100, 7959, 2747.5, 1092, 1764, 2030, 5355, 7434, 315, 1890, 2688, 504, 3024, 805, 1701, 577.5, 777, 6440, 1281, 588, 4910, 1470, 1911, 3738, 4140, 9219, 525, 1995, 1239, 1491, 2292.5, 4389, 2012.5, 1134, 945, 490, 3307.5, 714, 756, 1302, 297.5, 875, 1872.5, 1435, 1767.5, 2037, 3108, 1645, 1067.5, 3087, 1452.5, 11777.5, 5670, 4872, 2916, 4158, 5350, 2817.5, 84, 1596, 3865, 714, 910, 4112.5, 1197, 3622.5, 714, 3675, 4767, 3150, 2142, 2436, 210, 1974, 3843, 14532, 2373, 2919, 7098, 2205), Parts.Cost = c(657.6733, 6451.9113, 2235.8885, 6729.7326, 8357.0427, 9224.9012, 1957.0181, 6890.5315, 3156.4815, 2009.3578, 4555.0977, 3458.6842, 1546.2183, 6249.232, 4430.8058, 3835.5721, 3415.2062, 4868.2379, 2151.4558, 2233.2055, 2554.7489, 7433.8141, 2563.289, 3348.7162, 2173.6179, 1940.2806, 4404.6421, 5626.8595, 10553.4599, 12.62, 11405.5704, 2554.2787, 1907.3543, 12625.7525, 243.5735, 6104.7416, 405.959, 3609.1684, 4647.767, 12842.3638, 489.477, 9961.5883, 1706.0572, 2381.7686, 15177.0692, 5416.7948, 16538.1428, 253.3975, 1390.5058, 8699.7549, 7759.8042, 5128.0276, 8556.2625, 5760.523, 1923.699, 628.643, 158.4313, 14481.7111, 3796.3243, 11671.4333, 7140.2504, 1326.837, 441.0999, 2866.2141, 4229.31, 2935.825, 7452.8686, 11683.7093, 2644.1532, 418.679, 11665.8066, 523.9236, 18247.2776, 8115.265, 25011.6846, 13727.0801, 31786.6422, 6064.3123, 10599.0455, 119.4423, 1228.3541, 3587.7566, 3666.517, 472.1537, 1968.7669, 1417.8506, 8023.1254, 5831.6884, 14873.8008, 10193.2736, 6442.1719, 7525.4562, 4378.1336, 1691.4286, 12144.6891, 13094.8609, 20582.1682, 2544.103, 16934.6748, 17344.5551, 8912.7088), Total.Cost = c(1760.1733, 10721.9113, 2778.3885, 9459.7326, 9039.5427, 12322.4012, 2293.0181, 7762.0315, 3681.4815, 4704.3578, 6392.5977, 4550.6842, 3541.2183, 8821.732, 5522.8058, 4759.5721, 4255.2062, 6443.2379, 2844.4558, 2926.2055, 3114.7489, 9533.8141, 10522.289, 6096.2162, 3265.6179, 3704.2806, 6434.6421, 10981.8595, 17987.4599, 327.62, 13295.5704, 5242.2787, 2411.3543, 15649.7525, 1048.5735, 7805.7416, 983.459, 4386.1684, 11087.767, 14123.3638, 1077.477, 14871.5883, 3176.0572, 4292.7686, 18915.0692, 9556.7948, 25757.1428, 778.3975, 3385.5058, 9938.7549, 9250.8042, 7420.5276, 12945.2625, 7773.023, 3057.699, 1573.643, 648.4313, 17789.2111, 4510.3243, 12427.4333, 8442.2504, 1624.337, 1316.0999, 4738.7141, 5664.31, 4703.325, 9489.8686, 14791.7093, 4289.1532, 1486.179, 14752.8066, 1976.4236, 30024.7776, 13785.265, 29883.6846, 16643.0801, 35944.6422, 11414.3123, 13416.5455, 203.4423, 2824.3541, 7452.7566, 4380.517, 1382.1537, 6081.2669, 2614.8506, 11645.6254, 6545.6884, 18548.8008, 14960.2736, 9592.1719, 9667.4562, 6814.1336, 1901.4286, 14118.6891, 16937.8609, 35114.1682, 4917.103, 19853.6748, 24442.5551, 11117.7088), Cumulative.Cost = c(1760.1733, 12482.0846, 15260.4731, 24720.2057, 33759.7484, 46082.1496, 48375.1677, 56137.1992, 59818.6807, 64523.0385, 70915.6362, 75466.3204, 79007.5387, 87829.2707, 93352.0765, 98111.6486, 102366.8548, 108810.0927, 111654.5485, 114580.754, 117695.5029, 127229.317, 137751.606, 143847.8222, 147113.4401, 150817.7207, 157252.3628, 168234.2223, 186221.6822, 186549.3022, 199844.8726, 205087.1513, 207498.5056, 223148.2581, 224196.8316, 232002.5732, 232986.0322, 237372.2006, 248459.9676, 262583.3314, 263660.8084, 278532.3967, 281708.4539, 286001.2225, 304916.2917, 314473.0865, 340230.2293, 341008.6268, 344394.1326, 354332.8875, 363583.6917, 371004.2193, 383949.4818, 391722.5048, 394780.2038, 396353.8468, 397002.2781, 414791.4892, 419301.8135, 431729.2468, 440171.4972, 441795.8342, 443111.9341, 447850.6482, 453514.9582, 458218.2832, 467708.1518, 482499.8611, 486789.0143, 488275.1933, 503027.9999, 505004.4235, 535029.2011, 548814.4661, 578698.1507, 595341.2308, 631285.873, 642700.1853, 656116.7308, 656320.1731, 659144.5272, 666597.2838, 670977.8008, 672359.9545, 678441.2214, 681056.072, 692701.6974, 699247.3858, 717796.1866, 732756.4602, 742348.6321, 752016.0883, 758830.2219, 760731.6505, 774850.3396, 791788.2005, 826902.3687, 831819.4717, 851673.1465, 876115.7016, 887233.4104)), class = "data.frame", row.names = c(NA, -101L)) Here's the code I used to find the model: set.seed(123) idx <- sample(1:nrow(combinedDataset), nrow(combinedDataset)) view(idx) test_size2 <- floor(nrow(combinedDataset)*0) #multiplied by 0 implies nothing is being tested because the whole model is getting trained train <- combinedDataset[-idx[1:test_size2],] view(test) view(train) train_X <- train$Hour.Meter.Reading train_y <- train$Cumulative.Cost X <- train_X y <- train_y poly_order <- 2 Model <- lm(y~poly(X, poly_order)) print(Model) and here's the code and a picture of the plot for the model: X_new = seq(min(X), max(X), 1) y_new <- predict(Model, data.frame(X = X_new)) plot.new() plot(combinedDataset$Hour.Meter.Reading, combinedDataset$Cumulative.Cost, col = "blue") lines(X_new, y_new, col="red", type = "l", lwd = 2) legend(5, 95, legend=c("samples", "fitted model"), col = c("blue", "red"), lty = c(-1, 1), pch = c(1, -1)) Using this model, I am trying to predict the “Cumulative Cost” when “Hour Meter Reading” are 4000 and 8000, accordingly. Is there a function that lets me find those specific values out of my model? I've tried plugging in 4000 and 8000 into the X value using the equation that came out of the printing my model, but I got insanely high numbers which don't seem right and don't fit correctly into the plot.
Trouble finding the MSE value during K-fold cross validation procedure
I am currently doing a K-fold cross validation procedure to determine the best model (linear or quadratic) for this data is. My data comes from a CSV dataset called combinedData which I've pasted a dput for below: structure(list(Unit.ID = c(925L, 967L, 1054L, 967L, 1054L, 967L, 1160L, 1054L, 1160L, 967L, 967L, 1054L, 1160L, 967L, 1054L, 1160L, 967L, 1160L, 1054L, 1054L, 967L, 1160L, 1054L, 967L, 1160L, 1054L, 967L, 1160L, 1054L, 164L, 967L, 967L, 1160L, 1054L, 164L, 967L, 164L, 1160L, 164L, 1054L, 967L, 164L, 1054L, 967L, 1054L, 164L, 967L, 164L, 164L, 1054L, 967L, 164L, 967L, 164L, 1054L, 164L, 925L, 164L, 967L, 1054L, 1054L, 925L, 925L, 164L, 165L, 164L, 1054L, 967L, 164L, 165L, 967L, 164L, 164L, 165L, 1054L, 967L, 967L, 165L, 164L, 1054L, 967L, 165L, 967L, 165L, 164L, 967L, 164L, 967L, 164L, 967L, 164L, 967L, 164L, 1054L, 164L, 164L, 164L, 164L, 164L, 164L, 164L), Hour.Meter.Reading = c(34L, 381L, 532L, 600L, 732L, 783L, 796L, 947L, 1016L, 1038L, 1200L, 1282L, 1290L, 1388L, 1481L, 1528L, 1579L, 1671L, 1704L, 1728L, 1755L, 1906L, 1926L, 1936L, 2031L, 2063L, 2136L, 2205L, 2293L, 2321L, 2342L, 2382L, 2425L, 2505L, 2524L, 2576L, 2704L, 2731L, 2777L, 2811L, 2873L, 2960L, 2997L, 3080L, 3170L, 3175L, 3264L, 3371L, 3386L, 3425L, 3485L, 3570L, 3690L, 3740L, 3746L, 3854L, 3863L, 3976L, 3990L, 3991L, 4078L, 4103L, 4106L, 4138L, 4138L, 4216L, 4249L, 4253L, 4305L, 4326L, 4353L, 4483L, 4489L, 4489L, 4500L, 4580L, 4581L, 4652L, 4721L, 4742L, 4784L, 4805L, 4828L, 4943L, 4947L, 4954L, 4968L, 5298L, 5316L, 5407L, 5533L, 5628L, 5712L, 5747L, 5951L, 6165L, 6194L, 6439L, 6636L, 6702L, 6918L), Labour.Cost = c(1102.5, 4270, 542.5, 2730, 682.5, 3097.5, 336, 871.5, 525, 2695, 1837.5, 1092, 1995, 2572.5, 1092, 924, 840, 1575, 693, 693, 560, 2100, 7959, 2747.5, 1092, 1764, 2030, 5355, 7434, 315, 1890, 2688, 504, 3024, 805, 1701, 577.5, 777, 6440, 1281, 588, 4910, 1470, 1911, 3738, 4140, 9219, 525, 1995, 1239, 1491, 2292.5, 4389, 2012.5, 1134, 945, 490, 3307.5, 714, 756, 1302, 297.5, 875, 1872.5, 1435, 1767.5, 2037, 3108, 1645, 1067.5, 3087, 1452.5, 11777.5, 5670, 4872, 2916, 4158, 5350, 2817.5, 84, 1596, 3865, 714, 910, 4112.5, 1197, 3622.5, 714, 3675, 4767, 3150, 2142, 2436, 210, 1974, 3843, 14532, 2373, 2919, 7098, 2205), Parts.Cost = c(657.6733, 6451.9113, 2235.8885, 6729.7326, 8357.0427, 9224.9012, 1957.0181, 6890.5315, 3156.4815, 2009.3578, 4555.0977, 3458.6842, 1546.2183, 6249.232, 4430.8058, 3835.5721, 3415.2062, 4868.2379, 2151.4558, 2233.2055, 2554.7489, 7433.8141, 2563.289, 3348.7162, 2173.6179, 1940.2806, 4404.6421, 5626.8595, 10553.4599, 12.62, 11405.5704, 2554.2787, 1907.3543, 12625.7525, 243.5735, 6104.7416, 405.959, 3609.1684, 4647.767, 12842.3638, 489.477, 9961.5883, 1706.0572, 2381.7686, 15177.0692, 5416.7948, 16538.1428, 253.3975, 1390.5058, 8699.7549, 7759.8042, 5128.0276, 8556.2625, 5760.523, 1923.699, 628.643, 158.4313, 14481.7111, 3796.3243, 11671.4333, 7140.2504, 1326.837, 441.0999, 2866.2141, 4229.31, 2935.825, 7452.8686, 11683.7093, 2644.1532, 418.679, 11665.8066, 523.9236, 18247.2776, 8115.265, 25011.6846, 13727.0801, 31786.6422, 6064.3123, 10599.0455, 119.4423, 1228.3541, 3587.7566, 3666.517, 472.1537, 1968.7669, 1417.8506, 8023.1254, 5831.6884, 14873.8008, 10193.2736, 6442.1719, 7525.4562, 4378.1336, 1691.4286, 12144.6891, 13094.8609, 20582.1682, 2544.103, 16934.6748, 17344.5551, 8912.7088), Total.Cost = c(1760.1733, 10721.9113, 2778.3885, 9459.7326, 9039.5427, 12322.4012, 2293.0181, 7762.0315, 3681.4815, 4704.3578, 6392.5977, 4550.6842, 3541.2183, 8821.732, 5522.8058, 4759.5721, 4255.2062, 6443.2379, 2844.4558, 2926.2055, 3114.7489, 9533.8141, 10522.289, 6096.2162, 3265.6179, 3704.2806, 6434.6421, 10981.8595, 17987.4599, 327.62, 13295.5704, 5242.2787, 2411.3543, 15649.7525, 1048.5735, 7805.7416, 983.459, 4386.1684, 11087.767, 14123.3638, 1077.477, 14871.5883, 3176.0572, 4292.7686, 18915.0692, 9556.7948, 25757.1428, 778.3975, 3385.5058, 9938.7549, 9250.8042, 7420.5276, 12945.2625, 7773.023, 3057.699, 1573.643, 648.4313, 17789.2111, 4510.3243, 12427.4333, 8442.2504, 1624.337, 1316.0999, 4738.7141, 5664.31, 4703.325, 9489.8686, 14791.7093, 4289.1532, 1486.179, 14752.8066, 1976.4236, 30024.7776, 13785.265, 29883.6846, 16643.0801, 35944.6422, 11414.3123, 13416.5455, 203.4423, 2824.3541, 7452.7566, 4380.517, 1382.1537, 6081.2669, 2614.8506, 11645.6254, 6545.6884, 18548.8008, 14960.2736, 9592.1719, 9667.4562, 6814.1336, 1901.4286, 14118.6891, 16937.8609, 35114.1682, 4917.103, 19853.6748, 24442.5551, 11117.7088), Cumulative.Cost = c(1760.1733, 12482.0846, 15260.4731, 24720.2057, 33759.7484, 46082.1496, 48375.1677, 56137.1992, 59818.6807, 64523.0385, 70915.6362, 75466.3204, 79007.5387, 87829.2707, 93352.0765, 98111.6486, 102366.8548, 108810.0927, 111654.5485, 114580.754, 117695.5029, 127229.317, 137751.606, 143847.8222, 147113.4401, 150817.7207, 157252.3628, 168234.2223, 186221.6822, 186549.3022, 199844.8726, 205087.1513, 207498.5056, 223148.2581, 224196.8316, 232002.5732, 232986.0322, 237372.2006, 248459.9676, 262583.3314, 263660.8084, 278532.3967, 281708.4539, 286001.2225, 304916.2917, 314473.0865, 340230.2293, 341008.6268, 344394.1326, 354332.8875, 363583.6917, 371004.2193, 383949.4818, 391722.5048, 394780.2038, 396353.8468, 397002.2781, 414791.4892, 419301.8135, 431729.2468, 440171.4972, 441795.8342, 443111.9341, 447850.6482, 453514.9582, 458218.2832, 467708.1518, 482499.8611, 486789.0143, 488275.1933, 503027.9999, 505004.4235, 535029.2011, 548814.4661, 578698.1507, 595341.2308, 631285.873, 642700.1853, 656116.7308, 656320.1731, 659144.5272, 666597.2838, 670977.8008, 672359.9545, 678441.2214, 681056.072, 692701.6974, 699247.3858, 717796.1866, 732756.4602, 742348.6321, 752016.0883, 758830.2219, 760731.6505, 774850.3396, 791788.2005, 826902.3687, 831819.4717, 851673.1465, 876115.7016, 887233.4104)), class = "data.frame", row.names = c(NA, -101L)) So far, I've created all the models I need (K=5) for both linear and quadratic models and I am at the stage where I am trying to calculate the MSE and R squarred values. Here's the code for the process below: #linear model (Model 1) k-validation #splitting the testing data into 5 k folds set.seed(123) idx <- sample(1:nrow(combinedDataset), nrow(combinedDataset)) view(idx) test_size <- floor(nrow(combinedDataset)*0.2) test1 <- combinedDataset[idx[1:test_size],] train1 <- combinedDataset[-idx[1:test_size],] view(test1) view(train1) train_X1 <- train1$Hour.Meter.Reading train_y1 <- train1$Cumulative.Cost test_X1 <- test1$Hour.Meter.Reading test_y1 <- test1$Cumulative.Cost X1 <- train_X1 y1 <- train_y1 #Create the 5 linear model equations poly_order <- 1 Model1 <- lm(y1~poly(X1, poly_order)) print(Model1) #Calculate MSE test_yhat1 <- predict(Model1, data.frame(X1 = test_X1)) MSE1 <- mean((test_y1-test_yhat1)^2) print(MSE1) But for the last part of the code where I am calculating the MSE value for the first model, I keep getting this error: Error: variable 'poly(X1, poly_order)' was fitted with type "nmatrix.1" but type "nmatrix.2" was supplied In addition: Warning message: In Z/rep(sqrt(norm2[-1L]), each = length(x)) : longer object length is not a multiple of shorter object length I have no idea what that code means or how to fix it. I've checked over my code multiple times but I haven't noticed anything wrong with my Model1. Edit: Made the code shorter
Plot slope and intercept of a model with log and percentage transformations
This is the structure of my data > dput(test) structure(list(MAT = c(4.9, 4.9, 15.5, 14.1, 14.1, 14.1, 11.5, 11.5, 11.5, 17, 6.1, 2.7, 2.2, 2.2, 14.1, 14.1, 14.1, 9.5, 9.5, 9.5, 9.5, 9.3, 8.3, 8.266666651, 8.266666651, 4.3, 4.3, 22.3, 14.1, 14.1, 14.1, 8.5, 8.5, 8.5, 8.5, 21.5, 21.5, 3.8, 3.8, 6, 6, 6, 6, 6), es = c(0.29603085763985, 0.421393627439682, 0.189653473156549, 0.226685054608428, 0.291373762079697, 0.166533544378467, 0.250586529054368, 0.146320008054403, 0.199565119644333, -0.0819047677231083, 0.15963948187092, -0.154628141843561, 0.201121044198443, 0.0867981239977565, 0.543870310978598, 0.34547921143505, 0.37557241352574, -0.287318919407836, 0.207937483228907, 0.190143660810163, 0.276182673435993, 0.128596803172119, 0.454753165843559, 0.399237234440439, 0.32075358541748, 0.362664873575803, -0.0865925288159671, 0.51290512543514, 0.186308318839249, 0.147936083867325, 0.243792477087184, 0.625169403695832, 0.110317782120045, 0.217836235313289, 0.171468156841181, 0.50548821117127, 0.164418265301427, -0.00246305543239786, 0.325552346507191, 0.381240606108843, 0.19337350462531, 0.0408803528990759, 0.321815078821239, 0.307642815014319), var = c(0.00496277337027962, 0.0130962311273343, 0.0180149624217804, 0.0134568083459063, 0.00139708925143695, 0.000725862546533828, 0.00670831011660164, 0.0190783110089115, 0.0641568910090007, 0.0121596544795352, 0.0653909966557582, 0.0514610437228611, 0.0231592619167496, 0.0108989891148006, 0.0588577146414195, 0.0695760532112402, 0.0744256820906048, 0.00997789089155498, 0.00928124381998638, 0.0145009450673482, 0.00652956018299188, 0.0111886178917916, 0.0265943757419349, 0.142676904340634, 0.110705177803624, 0.0576538348777718, 0.0625171635976251, 0.0131652117394448, 0.00947904166717649, 0.00813569411386797, 0.00444289889858652, 0.0673007030900184,0.00545169559098343, 0.240046081413733, 0.00561125010476281, 0.0185516235174018, 0.0179989506841957, 0.0496806959944248, 0.022478393723115, 0.0521209786580004, 0.282298667080106, 0.0151428845076692, 0.00992945920656693, 0.0145544965304081), MAP = c(810, 810, 1140, 1750, 1750, 1750, 1034, 1034, 1034, 720, 645, 645, 645, 645, 1000, 1000, 1000, 691, 691, 691, 691, 1134, 1750, 1326, 1326, 1140, 1140, 1310, 1750, 1750, 1750, 1003, 1003, 1003, 1003, 1750, 1750, 1750, 1750, 1750, 1750, 1750, 1750, 1750), CO2dif = c(162L, 162L, 190L, 165L, 165L, 165L, 200L, 200L, 200L, 150L, 335L, 335L, 335L, 335L, 348L, 348L, 348L, 200L, 200L, 200L, 200L, 220L, 350L, 350L, 350L, 350L, 350L, 350L, 180L, 180L, 180L, 130L, 130L, 130L, 130L, 320L, 320L, 360L, 360L, 345L, 345L, 350L, 348L, 348L)), row.names = c(NA, -44L), class = "data.frame", .Names = c("MAT", "es", "var", "MAP", "CO2dif")) I run model selection using meta-analysis, and the best model to predict the effects size is: library(metafor) summary(rma(es, var, data=test ,control=list(stepadj=.5), mods= ~ 1 + log(MAT) + MAP + CO2dif + log(MAT):CO2dif, knha=TRUE)) Model Results: estimate se tval pval ci.lb ci.ub intrcpt 1.2556 0.3719 3.3758 0.0017 0.5033 2.0080 ** log(MAT) -0.5740 0.1694 -3.3882 0.0016 -0.9167 -0.2313 ** MAP 0.0001 0.0001 2.5181 0.0160 0.0000 0.0003 * CO2dif -0.0042 0.0013 -3.2932 0.0021 -0.0067 -0.0016 ** log(MAT):CO2dif 0.0020 0.0005 3.7500 0.0006 0.0009 0.0031 *** Now I want to plot es vs MAT, with an example with this model, assuming that MAP=1200 mm and CO2dif=350 MAPi <- 1200 CO2i <- 350 make_pct <- function(x) (exp(x) - 1) * 100 ggplot(test, aes(x = log(MAT), y = make_pct(es))) + geom_abline(aes(intercept = make_pct(1.2556 + 0.0001 * MAPi - 0.0042 * CO2i), slope = make_pct(log(0.0020 * CO2i)) - make_pct(log(0.5740))) , color = "red", size=0.8) + geom_point() + theme_classic() Effect size (es) is in log format, and I want percentage, so I transform it with the function make_pct. MAT, on the other hand, has to be log-transformed in the plot as indicated in the model output. Is the slope of the ggplot above correct with the log and percentage transformations? It seems to me that the slope is rather low. I am not very familiar with this type of plots and transformations, so any tips are welcome. Thanks
The relationship between exp(es)-1 and the explanatory variable log(MAT) is not linear. For a given set of values of MAP and CO2dif, this relationship is of the form: y = exp(es)-1 = k1*exp(k2*log(MAT)). This function can be plotted as follows: library(metafor) library(ggplot2) modfit <- rma(es, var, data=test ,control=list(stepadj=.5), mods= ~ 1 + MAP + log(MAT)*CO2dif, knha=TRUE) pars <- coef(modfit) MAPi <- 1200 CO2i <- 350 make_pct <- function(x) (exp(x) - 1) * 100 mod_fun <- function(MAP, MAT, CO2dif, pars) { y <- pars[1]+pars[2]*MAP+pars[3]*log(MAT)+ pars[4]*CO2dif+pars[5]*log(MAT)*CO2dif make_pct(y) } test$ESpct <- mod_fun(MAPi, test$MAT, CO2i, coef(modfit)) ggplot(test, aes(x = log(MAT), y = make_pct(es))) + geom_line(aes(y=ESpct), color = "red", size=0.8) + geom_point() + theme_classic()
How to replicate a Monthly Cycle Chart in R
I'd like to output a chart similar to the one represented on this page (on the right) using R and any package that would make it look good: http://processtrends.com/pg_charts_monthly_cycle_chart.htm Anyone up to the challenge? :) Thanks!
The stats package in R base already has a function to do this. Here is my one-liner and the output that it produces monthplot(redata, labels = month.abb, ylab = 'Listings') Building on this an example of using monthplot for a weekly cycle plot is here (gives full R code and source data): http://figshare.com/figures/index.php/OpenURL_Router_Data:_Total_Requests_by_Weekday monthplot(ts(sdf$values, frequency = 7, start=c(12,5)), labels = dow, ylab = "No. requests / day", xlab = "Weekday") which gives this weekly cycle plot: Example of monthplot to create a graph showing a weekly cycle http://figshare.com/figures/images/a/a7/Total_requests_by_weekday_01_Apr_to_31_Jul_2011.jpeg
Of course no graphical challenge will be complete without a ggplot solution. The tricky bit is to use ddply to summarise the monthly averages, and to pass this as data to a separate layer to ggplot. library(lubridate) library(plyr) library(ggplot2) df$month <- factor(month(df$dates), levels=1:12, labels=month.abb, ordered=TRUE) df$year <- year(df$dates) hline.data <- ddply(df, .(month), summarize, avgvalue=mean(values)) ggplot() + geom_line(aes(x=year, y=values, group=month), data=df, colour="blue") + geom_hline(aes(yintercept=avgvalue), data=hline.data, colour="blue", size=2) + facet_grid(~month) + opts(axis.text.x = theme_blank()) + xlab("") The data: df <- structure(list(dates = structure(c(8415, 8446, 8474, 8505, 8535, 8566, 8596, 8627, 8658, 8688, 8719, 8749, 8780, 8811, 8839, 8870, 8900, 8931, 8961, 8992, 9023, 9053, 9084, 9114, 9145, 9176, 9204, 9235, 9265, 9296, 9326, 9357, 9388, 9418, 9449, 9479, 9510, 9541, 9570, 9601, 9631, 9662, 9692, 9723, 9754, 9784, 9815, 9845, 9876, 9907, 9935, 9966, 9996, 10027, 10057, 10088, 10119, 10149, 10180, 10210, 10241, 10272, 10300, 10331, 10361, 10392, 10422, 10453, 10484, 10514, 10545, 10575, 10606, 10637, 10665, 10696, 10726, 10757, 10787, 10818, 10849, 10879, 10910, 10940, 10971, 11002, 11031, 11062, 11092, 11123, 11153, 11184, 11215, 11245, 11276, 11306, 11337, 11368, 11396, 11427, 11457, 11488, 11518, 11549, 11580, 11610, 11641, 11671, 11702, 11733, 11761, 11792, 11822, 11853, 11883, 11914, 11945, 11975, 12006, 12036, 12067, 12098, 12126, 12157, 12187, 12218, 12248, 12279, 12310, 12340, 12371, 12401, 12432, 12463, 12492, 12523, 12553, 12584, 12614, 12645, 12676, 12706, 12737, 12767, 12798, 12829, 12857, 12888, 12918, 12949, 12979, 13010, 13041, 13071, 13102, 13132), class = "Date"), values = c(1093, 1182, 1299, 1372, 1319, 1362, 1239, 1162, 1059, 921, 815, 720, 835, 853, 1034, 1030, 1240, 1388, 1429, 1319, 1231, 1184, 1076, 825, 991, 1093, 854, 808, 1079, 1092, 1220, 1251, 1130, 1131, 1052, 951, 950, 1006, 1112, 1119, 1250, 1322, 1347, 1310, 1215, 1128, 1035, 992, 1079, 1018, 1112, 1224, 1323, 1344, 1326, 1267, 1171, 1075, 916, 932, 888, 904, 939, 1018, 1140, 1174, 1285, 1311, 1298, 1231, 1091, 1088, 991, 1028, 1177, 1322, 1322, 1398, 1389, 1174, 1196, 1115, 756, 496, 693, 673, 748, 777, 820, 948, 966, 1027, 960, 865, 767, 675, 765, 732, 613, 632, 659, 705, 684, 734, 715, 626, 551, 487, 500, 536, 575, 595, 736, 798, 832, 797, 792, 726, 650, 584, 567, 524, 574, 571, 591, 657, 699, 756, 867, 795, 760, 685, 609, 588, 521, 581, 614, 623, 668, 702, 777, 697, 647, 562, 523, 508, 493, 504, 534, 586, 621, 620, 636, 600, 549, 557)), .Names = c("dates", "values"), row.names = c(NA, -156L), class = "data.frame")
An awful piece of R coding here by me, but it might give you some ideas on how to do it: This was the data I used taken from the excel file on the linked site: > dput(redata) structure(c(1093L, 1182L, 1299L, 1372L, 1319L, 1362L, 1239L, 1162L, 1059L, 921L, 815L, 720L, 835L, 853L, 1034L, 1030L, 1240L, 1388L, 1429L, 1319L, 1231L, 1184L, 1076L, 825L, 991L, 1093L, 854L, 808L, 1079L, 1092L, 1220L, 1251L, 1130L, 1131L, 1052L, 951L, 950L, 1006L, 1112L, 1119L, 1250L, 1322L, 1347L, 1310L, 1215L, 1128L, 1035L, 992L, 1079L, 1018L, 1112L, 1224L, 1323L, 1344L, 1326L, 1267L, 1171L, 1075L, 916L, 932L, 888L, 904L, 939L, 1018L, 1140L, 1174L, 1285L, 1311L, 1298L, 1231L, 1091L, 1088L, 991L, 1028L, 1177L, 1322L, 1322L, 1398L, 1389L, 1174L, 1196L, 1115L, 756L, 496L, 693L, 673L, 748L, 777L, 820L, 948L, 966L, 1027L, 960L, 865L, 767L, 675L, 765L, 732L, 613L, 632L, 659L, 705L, 684L, 734L, 715L, 626L, 551L, 487L, 500L, 536L, 575L, 595L, 736L, 798L, 832L, 797L, 792L, 726L, 650L, 584L, 567L, 524L, 574L, 571L, 591L, 657L, 699L, 756L, 867L, 795L, 760L, 685L, 609L, 588L, 521L, 581L, 614L, 623L, 668L, 702L, 777L, 697L, 647L, 562L, 523L, 508L, 493L, 504L, 534L, 586L, 621L, 620L, 636L, 600L, 549L, 557L ), .Dim = 12:13, .Dimnames = list(c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"), c("X1993", "X1994", "X1995", "X1996", "X1997", "X1998", "X1999", "X2000", "X2001", "X2002", "X2003", "X2004", "X2005"))) And here's my woeful coding... so much cleanup is possible here, but it is a quick test of the possibilities. monthnames <- c( "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" ) # size of window windows(w=6,h=3) # margins par( mar=c(5.1,5.1,2.1,2.1), cex.axis=0.7 ) # set up plot with the number of categories and the y limits # yaxs="i" sets the yaxis as having no separation from the corner point ylimlp <- c(0,max(redata))*1.06 plot(1:156, type="n", xaxt="n", ylim=ylimlp, ann=FALSE, yaxs="i", xaxs="i", bty="l", las="1") abline(v=seq(13,156,13),lty=1,col="grey") title(xlab="Month", col.lab=rgb(0,0,0), font.lab=2, cex.lab=0.75) title(ylab="Listings", col.lab=rgb(0,0,0), font.lab=2, cex.lab=0.75) lines(redata[1,],type="l",pch=NA,lwd=1,col="grey") lines(c(rep(NA,13*1),redata[2,]),type="l",pch=NA,lwd=1,col="grey") lines(c(rep(NA,13*2),redata[3,]),type="l",pch=NA,lwd=1,col="grey") lines(c(rep(NA,13*3),redata[4,]),type="l",pch=NA,lwd=1,col="grey") lines(c(rep(NA,13*4),redata[5,]),type="l",pch=NA,lwd=1,col="grey") lines(c(rep(NA,13*5),redata[6,]),type="l",pch=NA,lwd=1,col="grey") lines(c(rep(NA,13*6),redata[7,]),type="l",pch=NA,lwd=1,col="grey") lines(c(rep(NA,13*7),redata[8,]),type="l",pch=NA,lwd=1,col="grey") lines(c(rep(NA,13*8),redata[9,]),type="l",pch=NA,lwd=1,col="grey") lines(c(rep(NA,13*9),redata[10,]),type="l",pch=NA,lwd=1,col="grey") lines(c(rep(NA,13*10),redata[11,]),type="l",pch=NA,lwd=1,col="grey") lines(c(rep(NA,13*11),redata[12,]),type="l",pch=NA,lwd=1,col="grey") redatamonthmean <- apply(redata,1,mean) lines(rep(redatamonthmean[1],13),type="l",pch=NA,lwd=1,col="black") lines(c(rep(NA,13*1),rep(redatamonthmean[2],13)),type="l",pch=NA,lwd=1,col="black") lines(c(rep(NA,13*2),rep(redatamonthmean[3],13)),type="l",pch=NA,lwd=1,col="black") lines(c(rep(NA,13*3),rep(redatamonthmean[4],13)),type="l",pch=NA,lwd=1,col="black") lines(c(rep(NA,13*4),rep(redatamonthmean[5],13)),type="l",pch=NA,lwd=1,col="black") lines(c(rep(NA,13*5),rep(redatamonthmean[6],13)),type="l",pch=NA,lwd=1,col="black") lines(c(rep(NA,13*6),rep(redatamonthmean[7],13)),type="l",pch=NA,lwd=1,col="black") lines(c(rep(NA,13*7),rep(redatamonthmean[8],13)),type="l",pch=NA,lwd=1,col="black") lines(c(rep(NA,13*8),rep(redatamonthmean[9],13)),type="l",pch=NA,lwd=1,col="black") lines(c(rep(NA,13*9),rep(redatamonthmean[10],13)),type="l",pch=NA,lwd=1,col="black") lines(c(rep(NA,13*10),rep(redatamonthmean[11],13)),type="l",pch=NA,lwd=1,col="black") lines(c(rep(NA,13*11),rep(redatamonthmean[12],13)),type="l",pch=NA,lwd=1,col="black") mtext(monthnames[1], side=1, cex=0.7, at=6.5) mtext(monthnames[2], side=1, cex=0.7, at=6.5*3) mtext(monthnames[3], side=1, cex=0.7, at=6.5*5) mtext(monthnames[4], side=1, cex=0.7, at=6.5*7) mtext(monthnames[5], side=1, cex=0.7, at=6.5*9) mtext(monthnames[6], side=1, cex=0.7, at=6.5*11) mtext(monthnames[7], side=1, cex=0.7, at=6.5*13) mtext(monthnames[8], side=1, cex=0.7, at=6.5*15) mtext(monthnames[9], side=1, cex=0.7, at=6.5*17) mtext(monthnames[10], side=1, cex=0.7, at=6.5*19) mtext(monthnames[11], side=1, cex=0.7, at=6.5*21) mtext(monthnames[12], side=1, cex=0.7, at=6.5*23) And the example image