Trouble finding the MSE value during K-fold cross validation procedure - r

I am currently doing a K-fold cross validation procedure to determine the best model (linear or quadratic) for this data is. My data comes from a CSV dataset called combinedData which I've pasted a dput for below:
structure(list(Unit.ID = c(925L, 967L, 1054L, 967L, 1054L, 967L,
1160L, 1054L, 1160L, 967L, 967L, 1054L, 1160L, 967L, 1054L, 1160L,
967L, 1160L, 1054L, 1054L, 967L, 1160L, 1054L, 967L, 1160L, 1054L,
967L, 1160L, 1054L, 164L, 967L, 967L, 1160L, 1054L, 164L, 967L,
164L, 1160L, 164L, 1054L, 967L, 164L, 1054L, 967L, 1054L, 164L,
967L, 164L, 164L, 1054L, 967L, 164L, 967L, 164L, 1054L, 164L,
925L, 164L, 967L, 1054L, 1054L, 925L, 925L, 164L, 165L, 164L,
1054L, 967L, 164L, 165L, 967L, 164L, 164L, 165L, 1054L, 967L,
967L, 165L, 164L, 1054L, 967L, 165L, 967L, 165L, 164L, 967L,
164L, 967L, 164L, 967L, 164L, 967L, 164L, 1054L, 164L, 164L,
164L, 164L, 164L, 164L, 164L), Hour.Meter.Reading = c(34L, 381L,
532L, 600L, 732L, 783L, 796L, 947L, 1016L, 1038L, 1200L, 1282L,
1290L, 1388L, 1481L, 1528L, 1579L, 1671L, 1704L, 1728L, 1755L,
1906L, 1926L, 1936L, 2031L, 2063L, 2136L, 2205L, 2293L, 2321L,
2342L, 2382L, 2425L, 2505L, 2524L, 2576L, 2704L, 2731L, 2777L,
2811L, 2873L, 2960L, 2997L, 3080L, 3170L, 3175L, 3264L, 3371L,
3386L, 3425L, 3485L, 3570L, 3690L, 3740L, 3746L, 3854L, 3863L,
3976L, 3990L, 3991L, 4078L, 4103L, 4106L, 4138L, 4138L, 4216L,
4249L, 4253L, 4305L, 4326L, 4353L, 4483L, 4489L, 4489L, 4500L,
4580L, 4581L, 4652L, 4721L, 4742L, 4784L, 4805L, 4828L, 4943L,
4947L, 4954L, 4968L, 5298L, 5316L, 5407L, 5533L, 5628L, 5712L,
5747L, 5951L, 6165L, 6194L, 6439L, 6636L, 6702L, 6918L), Labour.Cost = c(1102.5,
4270, 542.5, 2730, 682.5, 3097.5, 336, 871.5, 525, 2695, 1837.5,
1092, 1995, 2572.5, 1092, 924, 840, 1575, 693, 693, 560, 2100,
7959, 2747.5, 1092, 1764, 2030, 5355, 7434, 315, 1890, 2688,
504, 3024, 805, 1701, 577.5, 777, 6440, 1281, 588, 4910, 1470,
1911, 3738, 4140, 9219, 525, 1995, 1239, 1491, 2292.5, 4389,
2012.5, 1134, 945, 490, 3307.5, 714, 756, 1302, 297.5, 875, 1872.5,
1435, 1767.5, 2037, 3108, 1645, 1067.5, 3087, 1452.5, 11777.5,
5670, 4872, 2916, 4158, 5350, 2817.5, 84, 1596, 3865, 714, 910,
4112.5, 1197, 3622.5, 714, 3675, 4767, 3150, 2142, 2436, 210,
1974, 3843, 14532, 2373, 2919, 7098, 2205), Parts.Cost = c(657.6733,
6451.9113, 2235.8885, 6729.7326, 8357.0427, 9224.9012, 1957.0181,
6890.5315, 3156.4815, 2009.3578, 4555.0977, 3458.6842, 1546.2183,
6249.232, 4430.8058, 3835.5721, 3415.2062, 4868.2379, 2151.4558,
2233.2055, 2554.7489, 7433.8141, 2563.289, 3348.7162, 2173.6179,
1940.2806, 4404.6421, 5626.8595, 10553.4599, 12.62, 11405.5704,
2554.2787, 1907.3543, 12625.7525, 243.5735, 6104.7416, 405.959,
3609.1684, 4647.767, 12842.3638, 489.477, 9961.5883, 1706.0572,
2381.7686, 15177.0692, 5416.7948, 16538.1428, 253.3975, 1390.5058,
8699.7549, 7759.8042, 5128.0276, 8556.2625, 5760.523, 1923.699,
628.643, 158.4313, 14481.7111, 3796.3243, 11671.4333, 7140.2504,
1326.837, 441.0999, 2866.2141, 4229.31, 2935.825, 7452.8686,
11683.7093, 2644.1532, 418.679, 11665.8066, 523.9236, 18247.2776,
8115.265, 25011.6846, 13727.0801, 31786.6422, 6064.3123, 10599.0455,
119.4423, 1228.3541, 3587.7566, 3666.517, 472.1537, 1968.7669,
1417.8506, 8023.1254, 5831.6884, 14873.8008, 10193.2736, 6442.1719,
7525.4562, 4378.1336, 1691.4286, 12144.6891, 13094.8609, 20582.1682,
2544.103, 16934.6748, 17344.5551, 8912.7088), Total.Cost = c(1760.1733,
10721.9113, 2778.3885, 9459.7326, 9039.5427, 12322.4012, 2293.0181,
7762.0315, 3681.4815, 4704.3578, 6392.5977, 4550.6842, 3541.2183,
8821.732, 5522.8058, 4759.5721, 4255.2062, 6443.2379, 2844.4558,
2926.2055, 3114.7489, 9533.8141, 10522.289, 6096.2162, 3265.6179,
3704.2806, 6434.6421, 10981.8595, 17987.4599, 327.62, 13295.5704,
5242.2787, 2411.3543, 15649.7525, 1048.5735, 7805.7416, 983.459,
4386.1684, 11087.767, 14123.3638, 1077.477, 14871.5883, 3176.0572,
4292.7686, 18915.0692, 9556.7948, 25757.1428, 778.3975, 3385.5058,
9938.7549, 9250.8042, 7420.5276, 12945.2625, 7773.023, 3057.699,
1573.643, 648.4313, 17789.2111, 4510.3243, 12427.4333, 8442.2504,
1624.337, 1316.0999, 4738.7141, 5664.31, 4703.325, 9489.8686,
14791.7093, 4289.1532, 1486.179, 14752.8066, 1976.4236, 30024.7776,
13785.265, 29883.6846, 16643.0801, 35944.6422, 11414.3123, 13416.5455,
203.4423, 2824.3541, 7452.7566, 4380.517, 1382.1537, 6081.2669,
2614.8506, 11645.6254, 6545.6884, 18548.8008, 14960.2736, 9592.1719,
9667.4562, 6814.1336, 1901.4286, 14118.6891, 16937.8609, 35114.1682,
4917.103, 19853.6748, 24442.5551, 11117.7088), Cumulative.Cost = c(1760.1733,
12482.0846, 15260.4731, 24720.2057, 33759.7484, 46082.1496, 48375.1677,
56137.1992, 59818.6807, 64523.0385, 70915.6362, 75466.3204, 79007.5387,
87829.2707, 93352.0765, 98111.6486, 102366.8548, 108810.0927,
111654.5485, 114580.754, 117695.5029, 127229.317, 137751.606,
143847.8222, 147113.4401, 150817.7207, 157252.3628, 168234.2223,
186221.6822, 186549.3022, 199844.8726, 205087.1513, 207498.5056,
223148.2581, 224196.8316, 232002.5732, 232986.0322, 237372.2006,
248459.9676, 262583.3314, 263660.8084, 278532.3967, 281708.4539,
286001.2225, 304916.2917, 314473.0865, 340230.2293, 341008.6268,
344394.1326, 354332.8875, 363583.6917, 371004.2193, 383949.4818,
391722.5048, 394780.2038, 396353.8468, 397002.2781, 414791.4892,
419301.8135, 431729.2468, 440171.4972, 441795.8342, 443111.9341,
447850.6482, 453514.9582, 458218.2832, 467708.1518, 482499.8611,
486789.0143, 488275.1933, 503027.9999, 505004.4235, 535029.2011,
548814.4661, 578698.1507, 595341.2308, 631285.873, 642700.1853,
656116.7308, 656320.1731, 659144.5272, 666597.2838, 670977.8008,
672359.9545, 678441.2214, 681056.072, 692701.6974, 699247.3858,
717796.1866, 732756.4602, 742348.6321, 752016.0883, 758830.2219,
760731.6505, 774850.3396, 791788.2005, 826902.3687, 831819.4717,
851673.1465, 876115.7016, 887233.4104)), class = "data.frame", row.names = c(NA,
-101L))
So far, I've created all the models I need (K=5) for both linear and quadratic models and I am at the stage where I am trying to calculate the MSE and R squarred values. Here's the code for the process below:
#linear model (Model 1) k-validation
#splitting the testing data into 5 k folds
set.seed(123)
idx <- sample(1:nrow(combinedDataset), nrow(combinedDataset))
view(idx)
test_size <- floor(nrow(combinedDataset)*0.2)
test1 <- combinedDataset[idx[1:test_size],]
train1 <- combinedDataset[-idx[1:test_size],]
view(test1)
view(train1)
train_X1 <- train1$Hour.Meter.Reading
train_y1 <- train1$Cumulative.Cost
test_X1 <- test1$Hour.Meter.Reading
test_y1 <- test1$Cumulative.Cost
X1 <- train_X1
y1 <- train_y1
#Create the 5 linear model equations
poly_order <- 1
Model1 <- lm(y1~poly(X1, poly_order))
print(Model1)
#Calculate MSE
test_yhat1 <- predict(Model1, data.frame(X1 = test_X1))
MSE1 <- mean((test_y1-test_yhat1)^2)
print(MSE1)
But for the last part of the code where I am calculating the MSE value for the first model, I keep getting this error:
Error: variable 'poly(X1, poly_order)' was fitted with type "nmatrix.1" but type "nmatrix.2" was supplied
In addition: Warning message:
In Z/rep(sqrt(norm2[-1L]), each = length(x)) :
longer object length is not a multiple of shorter object length
I have no idea what that code means or how to fix it. I've checked over my code multiple times but I haven't noticed anything wrong with my Model1.
Edit: Made the code shorter

Related

R: how to get rid of line from first point to last point

Here's the data I am working with.
https://www.dropbox.com/s/dn0dom094epuj2k/test.dat?dl=0
I am simply reading this data in and plotting it:
plot(x=phases, y=mag.lambdas, main=paste("Set D Light Curve",sep=" "), type = "l", pch=3, col="purple",
xlab=expression("Phase"), ylab=expression("Absolute Magnitude (mags)"), cex.main=1.60, cex.lab=1.50, ceb.axis=1.80)
Yet, when I do so, I get a straight line from the first point to the last point.
How do I get rid of this line?
Just a minor fix sorting the data using order().
dat <- read.table('test.dat', header=TRUE)
o <- order(dat$phases)
op <- par(mar=c(5, 5, 4, 3)+.1) ## to fix the ylab issue
with(dat, plot(x=phases[o], y=mag.lambdas[o],
main=paste("Set D Light Curve", sep=" "),
type="l", pch=3, col="purple", xlab=expression("Phase"),
ylab=expression("Absolute Magnitude (mags)"), cex.main=1.60,
cex.lab=1.50, cex.axis=1.80))
par(op) ## reset to defaults
Example data (reduced):
dat <- structure(list(phases = c(0.109120693761022, 0.185397836784117,
0.249938230401199, 0.30822778363147, 0.363435269696223, 0.455084249279825,
0.543552820242301, 0.655747752044217, 0.774866453366206, 0.856206594036075,
0.982585270988032, 0.100415444179982, 0.178014804373087, 0.243589226128645,
0.302336457676285, 0.357806463203636, 0.438138755151517, 0.534438861334455,
0.642183464272524, 0.765461100969384, 0.848464475619498, 0.965588333338646,
0.0917064615210546, 0.170566091284324, 0.237176977834675, 0.296375445141231,
0.352149626454385, 0.420152568949603, 0.525048346872039, 0.628312433306401,
0.75484614531608, 0.840001508109705, 0.947550129709331), mag.lambdas = c(-2.60908535740988,
-2.60768038551774, -2.73345412091621, -2.79093891301878, -2.85664469201418,
-2.9379744328431, -3.07326641740626, -3.17106458057106, -2.97427279239455,
-2.8699631559454, -2.68495601145474, -2.61243148043341, -2.60591330325558,
-2.6375863091666, -2.78291529193605, -2.85146326816402, -2.93295354167362,
-3.0573674843861, -3.1712588336948, -3.03203368817616, -2.87787161464618,
-2.69569380800697, -2.61638329170682, -2.60465158131301, -2.63317322633531,
-2.77584913218085, -2.84624680934001, -2.93274737295461, -3.04100660609613,
-3.16794769204494, -3.05057297981154, -2.88664628320297, -2.79770401055239
)), row.names = c(100L, 200L, 300L, 400L, 500L, 600L, 700L, 800L,
900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L, 1700L,
1800L, 1900L, 2000L, 2100L, 2200L, 2300L, 2400L, 2500L, 2600L,
2700L, 2800L, 2900L, 3000L, 3100L, 3200L, 3300L), class = "data.frame")

Remove steps in time series

I am analyzing lizards increase of temperature. Problem is that the animals sometime move moving the thermometer we are using for the measurements. This results in spikes (few measurements extremely high or low) and steps (sudden offsets of the curve). I wanted to clean my curves from this noise, removing the spikes and aligning the two parts of the curve separated by each step. I guess the spikes can be easily smoothed but what about the steps?
Examples of time series with steps marked in yellow and spikes in red
This is a sample from my dataframe
structure(list(ID = structure(c(183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L, 183L,
183L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L, 194L,
194L, 194L, 194L), .Label = c("101", "102", "104", "106", "107",
"109", "110", "111", "112", "113", "114", "115", "116", "118",
"119", "121", "122", "123", "124", "125", "126", "128", "129",
"130", "132", "133", "134", "157", "158", "161", "163", "164",
"165", "166", "167", "168", "169", "170", "171", "172", "173",
"174", "175", "177", "178", "179", "180", "181", "182", "183",
"186", "187", "188", "189", "191", "192", "193", "194", "195",
"196", "198", "199", "201", "202", "203", "204", "205", "207",
"208", "209", "210", "211", "213", "214", "215", "218", "219",
"220", "221", "222", "223", "224", "225", "226", "227", "228",
"229", "230", "233", "235", "307", "308", "310", "311", "312",
"313", "314", "315", "316", "317", "318", "319", "320", "321",
"322", "323", "324", "325", "326", "327", "328", "329", "330",
"331", "332", "333", "334", "335", "336", "339", "340", "341",
"343", "344", "346", "347", "348", "349", "350", "351", "352",
"353", "354", "355", "356", "357", "358", "58", "59", "60", "61",
"62", "63", "64", "65", "66", "67", "68", "69", "70", "71", "72",
"73", "74", "75", "76", "78", "79", "80", "81", "82", "83", "84",
"85", "86", "87", "88", "89", "90", "91", "92", "93", "94", "95",
"96", "97", "98", "99", "F04000017", "F04001300", "F04060000",
"F04070000", "F04080000", "F05000017", "F05000020", "F05060000",
"F05070000", "FSUM", "M03100000", "M04000016", "M04090000", "M04100000",
"M05000016", "M05000018", "M05080000", "M05090000", "M05100000",
"MSUM"), class = "factor"), TIME = c(1600, 0, 180, 280, 1270,
190, 1570, 100, 630, 110, 1530, 790, 90, 650, 910, 1520, 460,
270, 710, 1240, 1610, 550, 470, 1320, 360, 1220, 860, 540, 290,
1330, 1180, 170, 1700, 990, 1060, 420, 10, 330, 1110, 1160, 890,
260, 620, 1140, 1420, 380, 300, 1650, 1430, 200, 490, 1590, 150,
430, 720, 950, 730, 1660, 1260, 740, 810, 780, 1370, 960, 1130,
1210, 1670, 1010, 760, 1280, 1450, 770, 1070, 1000, 1030, 690,
80, 1040, 1050, 160, 1350, 1230, 1630, 640, 1390, 1460, 1100,
850, 1120, 390, 1170, 980, 320, 590, 520, 1090, 560, 870, 1020,
750, 140, 700, 70, 1500, 340, 1290, 530, 840, 230, 370, 1440,
1200, 1480, 30, 1400, 210, 1300, 480, 450, 1580, 1560, 410, 1360,
900, 670, 1540, 50, 500, 400, 1380, 930, 580, 1680, 600, 1340,
240, 1190, 570, 1640, 940, 350, 1550, 830, 310, 880, 1080, 920,
1490, 610, 220, 1470, 800, 820, 40, 1250, 130, 60, 680, 20, 970,
1620, 440, 1690, 120, 510, 1410, 660, 250, 1310, 1150, 1510,
1230, 1540, 320, 1380, 680, 410, 480, 580, 130, 670, 1200, 1900,
1410, 1470, 1950, 400, 870, 970, 1740, 500, 1300, 1220, 240,
660, 1100, 20, 490, 590, 900, 820, 1910, 920, 690, 1290, 1710,
750, 1860, 1840, 880, 1250, 2070, 640, 1580, 1310, 800, 1000,
2090, 1760, 330, 760, 40, 600, 1800, 200, 1730, 2080, 470, 30,
1490, 2030, 860, 1620, 250, 1880, 790, 90, 360, 1520, 460, 180,
1770, 1450, 1940, 1370, 1180, 290, 2060, 1240, 1500, 1480, 510,
980, 1610, 1630, 950, 1980, 990, 140, 60, 110, 1680, 10, 550,
1700, 1750, 1270, 1690, 230, 2010, 1070, 910, 300, 1850, 1140,
1350, 930, 170, 1650, 1170, 1820, 1040, 1420, 120, 1150, 50,
960, 1560, 1790, 630, 280, 740, 1890, 2020, 370, 2040, 2050,
310, 380, 1920, 340, 1030, 430, 1330, 1670, 1960, 830, 1590,
420, 1400, 560, 1360, 1460, 1440, 1120, 1870, 1720, 1130, 730,
390, 850, 520, 1090, 1550, 100, 1930, 1010, 440, 210, 1390, 720,
1190, 450, 770, 1320, 220, 1640, 570, 2100, 1210, 650, 2000,
840, 1340, 1830, 530, 1110, 1260, 890, 700, 80, 810, 1060, 260,
1510, 1600, 70, 1430, 1280, 1530, 1020, 540, 940, 1050, 710,
1660, 1570, 270, 610, 620, 780, 1780, 350, 1970, 0, 1160, 1990,
190, 150, 1080, 160, 1810), LIZ = c(33.88628, 16.01848, 19.07537,
20.7521, 31.20864, 19.28933, 33.81645, 17.74926, 25.17476, 17.96775,
35.05947, 27.00651, 17.59508, 25.4069, 28.208, 34.99299, 22.88728,
20.59275, 26.18355, 31.03587, 33.947, 24.10997, 23.02703, 33.7095,
22.17608, 30.91595, 27.75973, 23.97134, 20.93754, 33.80886, 30.68742,
18.91375, 33.92175, 29.09865, 29.63962, 22.70535, 16.21696, 21.63548,
30.08059, 30.53127, 27.98175, 20.43409, 25.05877, 30.36036, 34.46184,
22.44901, 21.10243, 33.94136, 34.52333, 19.46398, 23.28305, 33.80731,
18.61317, 22.47734, 26.25653, 28.72091, 26.38854, 33.93099, 31.15667,
26.51461, 27.30298, 26.90585, 34.10386, 28.83213, 30.1994, 30.83049,
33.89071, 29.26265, 26.67329, 32.40561, 34.64661, 26.80018, 29.67405,
29.16813, 29.42029, 25.87777, 17.42933, 29.50874, 29.60253, 18.75925,
33.97794, 30.99272, 33.94607, 25.2893, 34.24567, 34.68945, 30.0688,
27.65221, 30.13411, 22.59461, 30.63778, 28.96221, 21.41397, 24.58403,
23.68083, 29.85481, 24.22983, 27.8342, 29.36583, 26.60143, 18.46704,
26.03891, 17.26645, 34.9014, 21.84469, 33.39574, 23.82, 27.57049,
19.93821, 22.30472, 34.60674, 30.77051, 34.79948, 16.55761, 34.33158,
19.62092, 33.46234, 23.15645, 22.68871, 33.79304, 33.88377, 22.99619,
34.04042, 28.1129, 25.6251, 34.21122, 16.89074, 23.43516, 22.78885,
34.17764, 28.47507, 24.45472, 33.8742, 24.72735, 33.90453, 20.10577,
30.73417, 24.35771, 33.93048, 28.62741, 21.99781, 33.97049, 27.48719,
21.25535, 27.92438, 29.80396, 28.35731, 34.8615, 24.93956, 19.783,
34.76926, 27.19699, 27.40897, 16.72252, 31.08047, 18.32881, 17.0416,
25.72955, 16.38935, 28.8926, 33.97636, 22.53829, 33.86432, 18.22302,
23.55972, 34.38583, 25.50873, 20.27311, 33.6045, 30.43855, 34.93784,
35.288345, 36.257075, 25.391195, 36.09727, 30.813695, 27.044815,
28.04567, 29.548165, 20.606055, 30.69551, 35.129335, 37.32639,
36.37374, 36.308585, 37.41784, 26.86707, 32.548155, 33.36402,
37.37834, 28.385865, 35.88449, 35.231535, 23.53661, 30.580155,
34.58997, 16.9712, 28.21824, 29.69837, 32.803, 32.16565, 37.3345,
32.96087, 30.917135, 35.835545, 37.40881, 31.57274, 37.28267,
37.3545, 32.635805, 35.639945, 37.55586, 30.322245, 36.361875,
35.93363, 32.083515, 33.9122, 37.57389, 37.38447, 25.60093, 31.70867,
17.74926, 29.83628, 37.32308, 22.486975, 37.37677, 37.54575,
27.8517, 17.37036, 36.13542, 37.58466, 32.4728, 36.84912, 23.79395,
37.2999, 32.03574, 19.402135, 27.835535, 36.185425, 27.68629,
21.91643, 37.37574, 36.612895, 37.38406, 36.11211, 35.03594,
24.74032, 37.55308, 35.4775, 36.152685, 36.125095, 28.51215,
33.614975, 36.71473, 36.859965, 33.190425, 37.57786, 33.82105,
20.911715, 18.444165, 19.9942, 36.89602, 16.551, 29.161815, 37.4295,
37.38779, 35.75298, 36.91141, 23.29514, 37.59916, 34.393665,
32.88079, 24.885135, 37.30579, 34.812115, 36.05718, 33.03272,
21.730805, 36.877595, 34.980885, 37.34123, 34.19463, 36.436875,
20.27451, 34.880975, 18.10635, 33.268755, 36.24434, 37.33012,
30.18474, 24.530635, 31.445215, 37.32058, 37.6003, 36.417575,
37.57818, 37.56227, 25.15269, 26.548085, 37.34238, 25.799625,
34.12894, 27.14156, 36.0024, 36.89349, 37.48383, 32.23698, 36.386895,
27.013425, 36.295295, 29.29934, 36.091595, 36.642735, 36.5601,
34.706135, 37.27491, 37.38146, 34.758915, 31.35675, 26.709435,
32.374665, 28.69313, 34.52595, 36.22561, 19.717425, 37.35049,
33.991875, 27.288855, 22.76699, 36.179625, 31.25419, 35.08752,
27.507965, 31.821405, 35.97357, 23.037415, 36.86924, 29.408195,
37.60048, 35.18184, 30.45653, 37.58567, 32.27861, 36.04241, 37.34733,
28.86293, 34.64831, 35.684655, 32.71743, 31.01652, 19.07221,
32.1161, 34.32663, 24.059185, 36.16283, 36.413085, 18.77961,
36.49902, 35.7932, 36.214135, 34.05778, 29.01832, 33.111965,
34.2606, 31.14351, 36.88914, 36.326515, 24.30303, 29.97399, 30.103995,
31.92968, 37.36615, 26.01068, 37.52906, 16.12443, 34.929045,
37.59149, 22.175545, 21.19651, 34.4613, 21.473975, 37.33004),
COP = c(22.39478, 22.68049, 22.27224, 22.19685, 22.16573,
22.26254, 22.34891, 22.41931, 22.15782, 22.39973, 22.31492,
22.13082, 22.44822, 22.16036, 22.13963, 22.3111, 22.15529,
22.20868, 22.14849, 22.16546, 22.40751, 22.15873, 22.15807,
22.15667, 22.17006, 22.16576, 22.13619, 22.15633, 22.18792,
22.15968, 22.15784, 22.28282, 22.39207, 22.15544, 22.15307,
22.15378, 22.65061, 22.18484, 22.14693, 22.1517, 22.14945,
22.22223, 22.15782, 22.15069, 22.18498, 22.17095, 22.18882,
22.44182, 22.18867, 22.25752, 22.16363, 22.37156, 22.33576,
22.14907, 22.15658, 22.15133, 22.1547, 22.43926, 22.16738,
22.15422, 22.13264, 22.14114, 22.17317, 22.15817, 22.14417,
22.16643, 22.43323, 22.14401, 22.14921, 22.15957, 22.22265,
22.14089, 22.14463, 22.14593, 22.14691, 22.14661, 22.47322,
22.15314, 22.15694, 22.3083, 22.16482, 22.16823, 22.42285,
22.16284, 22.17225, 22.2391, 22.1465, 22.12234, 22.14539,
22.15492, 22.14823, 22.15392, 22.18663, 22.14773, 22.16047,
22.14056, 22.15791, 22.14152, 22.14251, 22.1509, 22.35001,
22.14744, 22.49713, 22.28185, 22.18089, 22.15846, 22.15929,
22.12327, 22.23335, 22.16951, 22.20456, 22.16193, 22.25876,
22.60373, 22.17513, 22.25065, 22.15608, 22.16255, 22.14552,
22.36192, 22.3395, 22.15159, 22.16513, 22.14421, 22.15804,
22.31474, 22.5485, 22.16658, 22.15816, 22.16993, 22.14259,
22.14321, 22.4235, 22.16097, 22.16565, 22.22893, 22.16082,
22.14911, 22.43463, 22.14959, 22.16988, 22.32926, 22.12439,
22.18906, 22.14787, 22.143, 22.14448, 22.26607, 22.16002,
22.24653, 22.25201, 22.13589, 22.13117, 22.578, 22.16947,
22.36273, 22.51722, 22.15239, 22.62479, 22.15932, 22.41646,
22.14778, 22.41367, 22.38414, 22.16356, 22.18521, 22.16367,
22.22729, 22.16007, 22.15065, 22.29892, 21.72296, 21.62249,
22.017515, 21.693785, 21.70153, 21.87866, 21.81236, 21.732425,
22.36322, 21.70375, 21.72846, 21.91553, 21.68792, 21.660505,
22.00908, 21.89052, 21.763825, 21.760305, 21.83179, 21.79684,
21.71059, 21.726415, 22.149185, 21.70785, 21.75106, 22.571055,
21.804205, 21.7291, 21.761135, 21.747215, 21.92869, 21.76272,
21.705115, 21.712995, 21.83027, 21.708885, 21.83844, 21.84054,
21.768415, 21.71803, 22.11175, 21.70465, 21.60251, 21.70955,
21.742635, 21.753505, 22.13507, 21.82251, 21.999255, 21.717505,
22.53302, 21.72247, 21.82229, 22.22044, 21.829, 22.12267,
21.82473, 22.55087, 21.6558, 22.09163, 21.76516, 21.593285,
22.13412, 21.86871, 21.73596, 22.43231, 21.943965, 21.636055,
21.836155, 22.26584, 21.82428, 21.672995, 21.98196, 21.694165,
21.73181, 22.066775, 22.10273, 21.71976, 21.64993, 21.65895,
21.788935, 21.759365, 21.59079, 21.59272, 21.762905, 22.04811,
21.755645, 22.34494, 22.4932, 22.397445, 21.59399, 22.582895,
21.750065, 21.84207, 21.82297, 21.712635, 21.593685, 22.164485,
22.07698, 21.7517, 21.760385, 22.04753, 21.8382, 21.741645,
21.69029, 21.759415, 22.28582, 21.590015, 21.73355, 21.83193,
21.75102, 21.68036, 22.378585, 21.73784, 22.51333, 21.75792,
21.609515, 21.82392, 21.70592, 22.084645, 21.708, 21.89372,
22.08329, 21.93241, 22.10217, 22.10085, 22.030495, 21.91551,
21.94469, 21.979055, 21.75095, 21.858405, 21.698245, 21.59609,
22.02914, 21.755635, 21.593795, 21.86841, 21.689295, 21.739825,
21.691495, 21.66596, 21.674615, 21.74691, 21.84458, 21.82984,
21.741525, 21.710355, 21.90157, 21.762305, 21.77891, 21.750345,
21.61608, 22.41471, 21.95989, 21.7551, 21.84604, 22.198325,
21.691115, 21.709805, 21.732425, 21.84097, 21.723055, 21.70937,
22.179165, 21.58997, 21.732705, 22.13691, 21.725925, 21.70574,
22.06154, 21.755495, 21.692555, 21.83686, 21.77328, 21.74777,
21.716135, 21.761175, 21.709325, 22.451175, 21.749225, 21.750545,
22.117925, 21.636325, 21.59097, 22.47463, 21.67836, 21.715015,
21.63479, 21.75122, 21.762415, 21.764115, 21.751305, 21.70731,
21.591635, 21.606505, 22.09898, 21.71826, 21.71403, 21.727555,
21.81842, 21.963065, 22.03673, 22.597065, 21.73344, 22.05635,
22.243885, 22.325185, 21.75266, 22.3091, 21.82619), BLK = c(28.64989,
15.80412, 20.22131, 22.01231, 28.0591, 20.41488, 28.57393,
18.42224, 25.81396, 18.66597, 28.51571, 26.66224, 18.17439,
25.95201, 27.20529, 28.51703, 24.38048, 21.85311, 26.30937,
28.02257, 28.6773, 25.22704, 24.48749, 28.1236, 23.18583,
27.98485, 27.00689, 25.14024, 22.16927, 28.1335, 27.89662,
20.00687, 28.67155, 27.44175, 27.62627, 23.92489, 16.09932,
22.78544, 27.75227, 27.83675, 27.13084, 21.69284, 25.74085,
27.80579, 28.28594, 23.44893, 22.33534, 28.7166, 28.29154,
20.61311, 24.68904, 28.60215, 19.59335, 24.03528, 26.36296,
27.34839, 26.40381, 28.7205, 28.04637, 26.45016, 26.76595,
26.62852, 28.18898, 27.38003, 27.79384, 27.96629, 28.71652,
27.47686, 26.54292, 28.06953, 28.37088, 26.5854, 27.64778,
27.45691, 27.54817, 26.17697, 17.91728, 27.57453, 27.59667,
19.79719, 28.15391, 28.01052, 28.70145, 25.8918, 28.19863,
28.40318, 27.73245, 26.95575, 27.77656, 23.56018, 27.86381,
27.42115, 22.64232, 25.52273, 24.96468, 27.69186, 25.29434,
27.05567, 27.51177, 26.4991, 19.37665, 26.24547, 17.66023,
28.45211, 22.92771, 28.08868, 25.05819, 26.9147, 21.16986,
23.31623, 28.31888, 27.94109, 28.41742, 16.63277, 28.21869,
20.81001, 28.10906, 24.58388, 24.26295, 28.59208, 28.56833,
23.80437, 28.16685, 27.16407, 26.06582, 28.53314, 17.15336,
24.78023, 23.69012, 28.19161, 27.27431, 25.4426, 28.70709,
25.60821, 28.15239, 21.35028, 27.92147, 25.36057, 28.71294,
27.31714, 23.05455, 28.55633, 26.8609, 22.49237, 27.09901,
27.67287, 27.23569, 28.43207, 25.6761, 20.99499, 28.41302,
26.72662, 26.81219, 16.89905, 28.04126, 19.14027, 17.39888,
26.11992, 16.37336, 27.39998, 28.69902, 24.15099, 28.69344,
18.90929, 24.86793, 28.25387, 26.01419, 21.52158, 28.12068,
27.81882, 28.48342, 27.96163, 28.197645, 22.70333, 28.117945,
26.073955, 23.85303, 24.593665, 25.413525, 19.11083, 26.00622,
27.90615, 27.98023, 28.14475, 28.161525, 28.08333, 23.7368,
27.01432, 27.341995, 27.83233, 24.77127, 28.08601, 27.96112,
21.39113, 25.949465, 27.68429, 15.858655, 24.68182, 25.48676,
27.12372, 26.823595, 27.98649, 27.17823, 26.13994, 28.074105,
27.80747, 26.44518, 27.88772, 27.88865, 27.054655, 27.983055,
28.14709, 25.80761, 28.22729, 28.087765, 26.741425, 27.415515,
28.16615, 27.82628, 22.852945, 26.515965, 16.55507, 25.558165,
27.86497, 20.59904, 27.82338, 28.15537, 24.503635, 16.208215,
28.18434, 28.12301, 26.98152, 28.238175, 21.57631, 27.93972,
26.69019, 18.11697, 23.249475, 28.194915, 24.400325, 20.18436,
27.83363, 28.152575, 28.05914, 28.1046, 27.87944, 22.246695,
28.13895, 27.97113, 28.17759, 28.17134, 24.852555, 27.36403,
28.231645, 28.240475, 27.28467, 28.09842, 27.38207, 19.33762,
17.208895, 18.63795, 28.32022, 15.478795, 25.178505, 27.81531,
27.83335, 28.025865, 28.32901, 21.198015, 28.12642, 27.594495,
27.149095, 22.40141, 27.87721, 27.77537, 28.07577, 27.214415,
19.976595, 28.24768, 27.87019, 27.87417, 27.512335, 28.1487,
18.87582, 27.808485, 16.88221, 27.304755, 28.22269, 27.85485,
25.751105, 22.085525, 26.402235, 27.96385, 28.12362, 23.3511,
28.13134, 28.13854, 22.55042, 23.483435, 27.99603, 22.988955,
27.479825, 24.074565, 28.06897, 28.300515, 28.08899, 26.850145,
28.21175, 23.961985, 28.148655, 25.25804, 28.084975, 28.15017,
28.142015, 27.74106, 27.89856, 27.81585, 27.762185, 26.34704,
23.61241, 26.9402, 24.945815, 27.660965, 28.20952, 18.388425,
28.03487, 27.44823, 24.182, 20.80328, 28.138395, 26.294595,
27.8952, 24.294155, 26.569235, 28.076645, 21.003065, 28.221055,
25.336215, 28.17753, 27.934355, 25.886285, 28.10892, 26.890965,
28.06394, 27.89246, 25.030615, 27.71521, 27.994955, 27.091235,
26.189505, 17.829845, 26.78325, 27.566845, 21.749355, 28.181235,
28.216485, 17.523135, 28.145175, 28.05206, 28.198345, 27.46945,
25.099975, 27.266245, 27.54595, 26.236675, 28.27499, 28.230255,
21.91532, 25.631615, 25.69132, 26.62857, 27.83423, 23.119855,
28.08352, 15.10413, 27.84174, 28.10417, 20.39175, 19.55529,
27.62871, 19.76947, 27.87254)), row.names = c(39410L, 39411L,
39412L, 39413L, 39414L, 39415L, 39416L, 39417L, 39418L, 39419L,
39420L, 39421L, 39422L, 39423L, 39424L, 39425L, 39426L, 39427L,
39428L, 39429L, 39430L, 39431L, 39432L, 39433L, 39434L, 39435L,
39436L, 39437L, 39438L, 39439L, 39440L, 39441L, 39442L, 39443L,
39444L, 39445L, 39446L, 39447L, 39448L, 39449L, 39450L, 39451L,
39452L, 39453L, 39454L, 39455L, 39456L, 39457L, 39458L, 39459L,
39460L, 39461L, 39462L, 39463L, 39464L, 39465L, 39466L, 39467L,
39468L, 39469L, 39470L, 39471L, 39472L, 39473L, 39474L, 39475L,
39476L, 39477L, 39478L, 39479L, 39480L, 39481L, 39482L, 39483L,
39484L, 39485L, 39486L, 39487L, 39488L, 39489L, 39490L, 39491L,
39492L, 39493L, 39494L, 39495L, 39496L, 39497L, 39498L, 39499L,
39500L, 39501L, 39502L, 39503L, 39504L, 39505L, 39506L, 39507L,
39508L, 39509L, 39510L, 39511L, 39512L, 39513L, 39514L, 39515L,
39516L, 39517L, 39518L, 39519L, 39520L, 39521L, 39522L, 39523L,
39524L, 39525L, 39526L, 39527L, 39528L, 39529L, 39530L, 39531L,
39532L, 39533L, 39534L, 39535L, 39536L, 39537L, 39538L, 39539L,
39540L, 39541L, 39542L, 39543L, 39544L, 39545L, 39546L, 39547L,
39548L, 39549L, 39550L, 39551L, 39552L, 39553L, 39554L, 39555L,
39556L, 39557L, 39558L, 39559L, 39560L, 39561L, 39562L, 39563L,
39564L, 39565L, 39566L, 39567L, 39568L, 39569L, 39570L, 39571L,
39572L, 39573L, 39574L, 39575L, 39576L, 39577L, 39578L, 39579L,
39580L, 41926L, 41927L, 41928L, 41929L, 41930L, 41931L, 41932L,
41933L, 41934L, 41935L, 41936L, 41937L, 41938L, 41939L, 41940L,
41941L, 41942L, 41943L, 41944L, 41945L, 41946L, 41947L, 41948L,
41949L, 41950L, 41951L, 41952L, 41953L, 41954L, 41955L, 41956L,
41957L, 41958L, 41959L, 41960L, 41961L, 41962L, 41963L, 41964L,
41965L, 41966L, 41967L, 41968L, 41969L, 41970L, 41971L, 41972L,
41973L, 41974L, 41975L, 41976L, 41977L, 41978L, 41979L, 41980L,
41981L, 41982L, 41983L, 41984L, 41985L, 41986L, 41987L, 41988L,
41989L, 41990L, 41991L, 41992L, 41993L, 41994L, 41995L, 41996L,
41997L, 41998L, 41999L, 42000L, 42001L, 42002L, 42003L, 42004L,
42005L, 42006L, 42007L, 42008L, 42009L, 42010L, 42011L, 42012L,
42013L, 42014L, 42015L, 42016L, 42017L, 42018L, 42019L, 42020L,
42021L, 42022L, 42023L, 42024L, 42025L, 42026L, 42027L, 42028L,
42029L, 42030L, 42031L, 42032L, 42033L, 42034L, 42035L, 42036L,
42037L, 42038L, 42039L, 42040L, 42041L, 42042L, 42043L, 42044L,
42045L, 42046L, 42047L, 42048L, 42049L, 42050L, 42051L, 42052L,
42053L, 42054L, 42055L, 42056L, 42057L, 42058L, 42059L, 42060L,
42061L, 42062L, 42063L, 42064L, 42065L, 42066L, 42067L, 42068L,
42069L, 42070L, 42071L, 42072L, 42073L, 42074L, 42075L, 42076L,
42077L, 42078L, 42079L, 42080L, 42081L, 42082L, 42083L, 42084L,
42085L, 42086L, 42087L, 42088L, 42089L, 42090L, 42091L, 42092L,
42093L, 42094L, 42095L, 42096L, 42097L, 42098L, 42099L, 42100L,
42101L, 42102L, 42103L, 42104L, 42105L, 42106L, 42107L, 42108L,
42109L, 42110L, 42111L, 42112L, 42113L, 42114L, 42115L, 42116L,
42117L, 42118L, 42119L, 42120L, 42121L, 42122L, 42123L, 42124L,
42125L, 42126L, 42127L, 42128L, 42129L, 42130L, 42131L, 42132L,
42133L, 42134L, 42135L, 42136L), class = "data.frame")
My problem is actually very similar to the one that this person had in python
Remove jumps like peaks and steps in timeseries but I haven't been able to find something similar for R
EDIT: I actually found something similar R-related in this question https://stats.stackexchange.com/questions/139660/detecting-changes-in-time-series-r-example?newreg=f119230044de4802a9f0f6f4e4637d8f
The solution using tsoutliers looks applicable to my problem but so far it didn't work very well. I am not sure about what I am doing wrong.
tso(dat.ts, types = c("LS","TC"))
flattens completely my curve
tso(dat.ts, types = c("LS","TC"), discard.method = "bottom-up")
Moves my curve in the right direction but not enough to fix the problem.
Any approach using the bottom-up method to discard outliers gives this result, any approac using the en-masse approach flattens the curve.
There are a couple methods attempted in the code below attempting to capture the changes: by percent change and by using a rolling median method.
First import a couple useful libraries. Also changing your data into a tibble called 'lizard_data'
library(tidyverse)
library(RcppRoll)
lizard_data <- tibble(your_data)
Then using ggplot2, we can visualize all the time series.
lizard_data %>%
pivot_longer(names_to = 'key', values_to = 'value', cols = c(LIZ, COP, BLK)) %>%
arrange(TIME) %>%
ggplot(aes(x = TIME, y = value, color = key))+
geom_line()+
facet_wrap(~ID, nrow = 2)
Then we remove the adjustments by finding the delta, and we can use two different methods to smooth the time series. First, with a percentile method (changing the top 5% and bottom 5% of values to the median) and second, with the rolling median (if the max change is ten percent higher or lower than the median, replacing the value with the median).
ld_w_change <- lizard_data %>%
pivot_longer(names_to = 'key', values_to = 'value', cols = c(LIZ, COP, BLK)) %>%
group_by(ID, key) %>%
arrange(TIME) %>%
mutate(lag = lag(value),
raw_change = (value-lag),
#using the percentile change method
med_raw_change = median(raw_change, na.rm = T),
q_05 = quantile(raw_change,.05, na.rm = T),
q_95 = quantile(raw_change,.95, na.rm = T),
adj_raw_change = if_else(raw_change > q_95 | raw_change < q_05, med_raw_change, raw_change),
normalized_change = if_else(is.na(adj_raw_change), 0,adj_raw_change),
initial_value = first(value),
roll_raw_change = cumsum(normalized_change),
new_value_pct = initial_value + roll_raw_change,
# using the rolling median method
rolling_median_change = roll_median(raw_change, n = 5, align = "right", na.rm = T, fill = NA),
adj_median_change = case_when(raw_change > rolling_median_change*1.1 | raw_change < rolling_median_change*.9 ~ rolling_median_change,
is.na(rolling_median_change) & is.na(raw_change) ~ 0,
T ~ raw_change),
normalized_med_change = cumsum(adj_median_change),
new_value_roll = initial_value + normalized_med_change
)
I personally prefer the median adjustment, it seems to preserve the shape of the data better than the percentile method. To compare these methods visually, we can plot them side-by-side:
ld_w_change %>%
pivot_longer(names_to = 'method', values_to = 'adjusted_temp', cols = c(new_value_pct, new_value_roll)) %>%
ggplot(aes(x = TIME, y =adjusted_temp, color = key))+
geom_line()+
facet_wrap(ID ~ method, nrow = 2)

How to find a specific point on a model in R?

I am working with a CSV dataset called combinedDataset, which I found a model for using k-fold validation procedure. My x value for the model is hour meter reading while my y value is cumulative cost. Here's a dput of my combinedDataset:
structure(list(Unit.ID = c(925L, 967L, 1054L, 967L, 1054L, 967L,
1160L, 1054L, 1160L, 967L, 967L, 1054L, 1160L, 967L, 1054L, 1160L,
967L, 1160L, 1054L, 1054L, 967L, 1160L, 1054L, 967L, 1160L, 1054L,
967L, 1160L, 1054L, 164L, 967L, 967L, 1160L, 1054L, 164L, 967L,
164L, 1160L, 164L, 1054L, 967L, 164L, 1054L, 967L, 1054L, 164L,
967L, 164L, 164L, 1054L, 967L, 164L, 967L, 164L, 1054L, 164L,
925L, 164L, 967L, 1054L, 1054L, 925L, 925L, 164L, 165L, 164L,
1054L, 967L, 164L, 165L, 967L, 164L, 164L, 165L, 1054L, 967L,
967L, 165L, 164L, 1054L, 967L, 165L, 967L, 165L, 164L, 967L,
164L, 967L, 164L, 967L, 164L, 967L, 164L, 1054L, 164L, 164L,
164L, 164L, 164L, 164L, 164L), Hour.Meter.Reading = c(34L, 381L,
532L, 600L, 732L, 783L, 796L, 947L, 1016L, 1038L, 1200L, 1282L,
1290L, 1388L, 1481L, 1528L, 1579L, 1671L, 1704L, 1728L, 1755L,
1906L, 1926L, 1936L, 2031L, 2063L, 2136L, 2205L, 2293L, 2321L,
2342L, 2382L, 2425L, 2505L, 2524L, 2576L, 2704L, 2731L, 2777L,
2811L, 2873L, 2960L, 2997L, 3080L, 3170L, 3175L, 3264L, 3371L,
3386L, 3425L, 3485L, 3570L, 3690L, 3740L, 3746L, 3854L, 3863L,
3976L, 3990L, 3991L, 4078L, 4103L, 4106L, 4138L, 4138L, 4216L,
4249L, 4253L, 4305L, 4326L, 4353L, 4483L, 4489L, 4489L, 4500L,
4580L, 4581L, 4652L, 4721L, 4742L, 4784L, 4805L, 4828L, 4943L,
4947L, 4954L, 4968L, 5298L, 5316L, 5407L, 5533L, 5628L, 5712L,
5747L, 5951L, 6165L, 6194L, 6439L, 6636L, 6702L, 6918L), Labour.Cost = c(1102.5,
4270, 542.5, 2730, 682.5, 3097.5, 336, 871.5, 525, 2695, 1837.5,
1092, 1995, 2572.5, 1092, 924, 840, 1575, 693, 693, 560, 2100,
7959, 2747.5, 1092, 1764, 2030, 5355, 7434, 315, 1890, 2688,
504, 3024, 805, 1701, 577.5, 777, 6440, 1281, 588, 4910, 1470,
1911, 3738, 4140, 9219, 525, 1995, 1239, 1491, 2292.5, 4389,
2012.5, 1134, 945, 490, 3307.5, 714, 756, 1302, 297.5, 875, 1872.5,
1435, 1767.5, 2037, 3108, 1645, 1067.5, 3087, 1452.5, 11777.5,
5670, 4872, 2916, 4158, 5350, 2817.5, 84, 1596, 3865, 714, 910,
4112.5, 1197, 3622.5, 714, 3675, 4767, 3150, 2142, 2436, 210,
1974, 3843, 14532, 2373, 2919, 7098, 2205), Parts.Cost = c(657.6733,
6451.9113, 2235.8885, 6729.7326, 8357.0427, 9224.9012, 1957.0181,
6890.5315, 3156.4815, 2009.3578, 4555.0977, 3458.6842, 1546.2183,
6249.232, 4430.8058, 3835.5721, 3415.2062, 4868.2379, 2151.4558,
2233.2055, 2554.7489, 7433.8141, 2563.289, 3348.7162, 2173.6179,
1940.2806, 4404.6421, 5626.8595, 10553.4599, 12.62, 11405.5704,
2554.2787, 1907.3543, 12625.7525, 243.5735, 6104.7416, 405.959,
3609.1684, 4647.767, 12842.3638, 489.477, 9961.5883, 1706.0572,
2381.7686, 15177.0692, 5416.7948, 16538.1428, 253.3975, 1390.5058,
8699.7549, 7759.8042, 5128.0276, 8556.2625, 5760.523, 1923.699,
628.643, 158.4313, 14481.7111, 3796.3243, 11671.4333, 7140.2504,
1326.837, 441.0999, 2866.2141, 4229.31, 2935.825, 7452.8686,
11683.7093, 2644.1532, 418.679, 11665.8066, 523.9236, 18247.2776,
8115.265, 25011.6846, 13727.0801, 31786.6422, 6064.3123, 10599.0455,
119.4423, 1228.3541, 3587.7566, 3666.517, 472.1537, 1968.7669,
1417.8506, 8023.1254, 5831.6884, 14873.8008, 10193.2736, 6442.1719,
7525.4562, 4378.1336, 1691.4286, 12144.6891, 13094.8609, 20582.1682,
2544.103, 16934.6748, 17344.5551, 8912.7088), Total.Cost = c(1760.1733,
10721.9113, 2778.3885, 9459.7326, 9039.5427, 12322.4012, 2293.0181,
7762.0315, 3681.4815, 4704.3578, 6392.5977, 4550.6842, 3541.2183,
8821.732, 5522.8058, 4759.5721, 4255.2062, 6443.2379, 2844.4558,
2926.2055, 3114.7489, 9533.8141, 10522.289, 6096.2162, 3265.6179,
3704.2806, 6434.6421, 10981.8595, 17987.4599, 327.62, 13295.5704,
5242.2787, 2411.3543, 15649.7525, 1048.5735, 7805.7416, 983.459,
4386.1684, 11087.767, 14123.3638, 1077.477, 14871.5883, 3176.0572,
4292.7686, 18915.0692, 9556.7948, 25757.1428, 778.3975, 3385.5058,
9938.7549, 9250.8042, 7420.5276, 12945.2625, 7773.023, 3057.699,
1573.643, 648.4313, 17789.2111, 4510.3243, 12427.4333, 8442.2504,
1624.337, 1316.0999, 4738.7141, 5664.31, 4703.325, 9489.8686,
14791.7093, 4289.1532, 1486.179, 14752.8066, 1976.4236, 30024.7776,
13785.265, 29883.6846, 16643.0801, 35944.6422, 11414.3123, 13416.5455,
203.4423, 2824.3541, 7452.7566, 4380.517, 1382.1537, 6081.2669,
2614.8506, 11645.6254, 6545.6884, 18548.8008, 14960.2736, 9592.1719,
9667.4562, 6814.1336, 1901.4286, 14118.6891, 16937.8609, 35114.1682,
4917.103, 19853.6748, 24442.5551, 11117.7088), Cumulative.Cost = c(1760.1733,
12482.0846, 15260.4731, 24720.2057, 33759.7484, 46082.1496, 48375.1677,
56137.1992, 59818.6807, 64523.0385, 70915.6362, 75466.3204, 79007.5387,
87829.2707, 93352.0765, 98111.6486, 102366.8548, 108810.0927,
111654.5485, 114580.754, 117695.5029, 127229.317, 137751.606,
143847.8222, 147113.4401, 150817.7207, 157252.3628, 168234.2223,
186221.6822, 186549.3022, 199844.8726, 205087.1513, 207498.5056,
223148.2581, 224196.8316, 232002.5732, 232986.0322, 237372.2006,
248459.9676, 262583.3314, 263660.8084, 278532.3967, 281708.4539,
286001.2225, 304916.2917, 314473.0865, 340230.2293, 341008.6268,
344394.1326, 354332.8875, 363583.6917, 371004.2193, 383949.4818,
391722.5048, 394780.2038, 396353.8468, 397002.2781, 414791.4892,
419301.8135, 431729.2468, 440171.4972, 441795.8342, 443111.9341,
447850.6482, 453514.9582, 458218.2832, 467708.1518, 482499.8611,
486789.0143, 488275.1933, 503027.9999, 505004.4235, 535029.2011,
548814.4661, 578698.1507, 595341.2308, 631285.873, 642700.1853,
656116.7308, 656320.1731, 659144.5272, 666597.2838, 670977.8008,
672359.9545, 678441.2214, 681056.072, 692701.6974, 699247.3858,
717796.1866, 732756.4602, 742348.6321, 752016.0883, 758830.2219,
760731.6505, 774850.3396, 791788.2005, 826902.3687, 831819.4717,
851673.1465, 876115.7016, 887233.4104)), class = "data.frame", row.names = c(NA,
-101L))
Here's the code I used to find the model:
set.seed(123)
idx <- sample(1:nrow(combinedDataset), nrow(combinedDataset))
view(idx)
test_size2 <- floor(nrow(combinedDataset)*0) #multiplied by 0 implies nothing is being tested because the whole model is getting trained
train <- combinedDataset[-idx[1:test_size2],]
view(test)
view(train)
train_X <- train$Hour.Meter.Reading
train_y <- train$Cumulative.Cost
X <- train_X
y <- train_y
poly_order <- 2
Model <- lm(y~poly(X, poly_order))
print(Model)
and here's the code and a picture of the plot for the model:
X_new = seq(min(X), max(X), 1)
y_new <- predict(Model, data.frame(X = X_new))
plot.new()
plot(combinedDataset$Hour.Meter.Reading, combinedDataset$Cumulative.Cost, col = "blue")
lines(X_new, y_new, col="red", type = "l", lwd = 2)
legend(5, 95, legend=c("samples", "fitted model"),
col = c("blue", "red"), lty = c(-1, 1), pch = c(1, -1))
Using this model, I am trying to predict the “Cumulative Cost” when “Hour Meter Reading” are 4000 and 8000, accordingly. Is there a function that lets me find those specific values out of my model?
I've tried plugging in 4000 and 8000 into the X value using the equation that came out of the printing my model, but I got insanely high numbers which don't seem right and don't fit correctly into the plot.

Stacking multiple plots, vertically with the same x axis but different Y axes in R

I have a data.frame with multiple time series vectors against a date:time vector. I would like to plot all of the relevant vectors, vertically stacked on separate graphs with the same X axis but unique Y axes. A graph similar to this one:
my data looks like this:
dt <- structure(list(DEPTH = c(156, 156.5, 157.4, 158.15, 158.8, 159.2,
159.75, 160.35, 160.85, 161.1, 161.6, 162.05, 162.5, 162.65,
163.15, 163.45, 163.55, 163.8, 163.65, 163.75, 163.8, 163.8,
163.75, 164.45, 164.8, 165.35, 165.65, 165.75, 166.1, 166.75,
167, 167.2, 167.65, 168, 168.8, 169.3, 169.7, 170.2, 170.65,
170.9, 171.45, 171.65, 172, 172.1, 172.25, 173, 173.4, 173.9,
174.2, 174.6, 175, 175.25, 175.45, 175.9, 176.25, 176.7, 177,
177.15, 177.5, 178, 178.5, 179.05, 179.2, 180.7, 181.05, 181.25,
181.5, 181.7, 182.1, 182.3, 182.35, 182.75, 183.1, 183.65, 184.3,
184.6, 185.1, 185.15, 185.3, 185.15, 185.25, 185.3, 185.15),
Smooth.Vert.Speed = c(-0.550000000000011, -0.5, -0.900000000000006,
-0.75, -0.650000000000006, -0.399999999999977, -0.550000000000011,
-0.599999999999994, -0.5, -0.25, -0.5, -0.450000000000017,
-0.449999999999989, -0.150000000000006, -0.5, -0.299999999999983,
-0.100000000000023, -0.25, 0.150000000000006, -0.0999999999999943,
-0.0500000000000114, 0, 0.0500000000000114, -0.699999999999989,
-0.350000000000023, -0.549999999999983, -0.300000000000011,
-0.0999999999999943, -0.349999999999994, -0.650000000000006,
-0.25, -0.199999999999989, -0.450000000000017, -0.349999999999994,
-0.800000000000011, -0.5, -0.399999999999977, -0.5, -0.450000000000017,
-0.25, -0.549999999999983, -0.200000000000017, -0.349999999999994,
-0.0999999999999943, -0.150000000000006, -0.75, -0.400000000000006,
-0.5, -0.299999999999983, -0.400000000000006, -0.400000000000006,
-0.25, -0.199999999999989, -0.450000000000017, -0.349999999999994,
-0.449999999999989, -0.300000000000011, -0.150000000000006,
-0.349999999999994, -0.5, -0.5, -0.550000000000011, -0.149999999999977,
-1.5, -0.350000000000023, -0.199999999999989, -0.25, -0.199999999999989,
-0.400000000000006, -0.200000000000017, -0.049999999999983,
-0.400000000000006, -0.349999999999994, -0.550000000000011,
-0.650000000000006, -0.299999999999983, -0.5, -0.0500000000000114,
-0.150000000000006, 0.150000000000006, -0.0999999999999943,
-0.0500000000000114, 0.150000000000006), DIVE_SURF = c("dive21",
"dive21", "dive21", "dive21", "dive21", "dive21", "dive21",
"dive21", "dive21", "dive21", "dive21", "dive21", "dive21",
"dive21", "dive21", "dive21", "dive21", "dive21", "dive21",
"dive21", "dive21", "dive21", "dive21", "dive21", "dive21",
"dive21", "dive21", "dive21", "dive21", "dive21", "dive21",
"dive21", "dive21", "dive21", "dive21", "dive21", "dive21",
"dive21", "dive21", "dive21", "dive21", "dive21", "dive21",
"dive21", "dive21", "dive21", "dive21", "dive21", "dive21",
"dive21", "dive21", "dive21", "dive21", "dive21", "dive21",
"dive21", "dive21", "dive21", "dive21", "dive21", "dive21",
"dive21", "dive21", "dive21", "dive21", "dive21", "dive21",
"dive21", "dive21", "dive21", "dive21", "dive21", "dive21",
"dive21", "dive21", "dive21", "dive21", "dive21", "dive21",
"dive21", "dive21", "dive21", "dive21"), X = c(2050L, 2062L,
2026L, 2078L, 2058L, 2076L, 2050L, 2068L, 2060L, 2078L, 2058L,
2088L, 2080L, 2065L, 2088L, 2076L, 2084L, 2105L, 2084L, 2102L,
2123L, 2096L, 2074L, 2054L, 2090L, 2089L, 2080L, 2078L, 2068L,
2092L, 2084L, 2082L, 2094L, 2056L, 2062L, 2067L, 2082L, 2084L,
2091L, 2058L, 2076L, 2098L, 2104L, 2090L, 2058L, 2050L, 2080L,
2074L, 2074L, 2082L, 2070L, 2088L, 2062L, 2062L, 2082L, 2086L,
2070L, 2081L, 2092L, 2058L, 2060L, 2076L, 2094L, 2083L, 2072L,
2107L, 2104L, 2066L, 2110L, 2104L, 2072L, 2076L, 2065L, 2042L,
2066L, 2093L, 2080L, 2083L, 2108L, 2107L, 2086L, 2096L, 2126L
), Y = c(2036L, 2000L, 2049L, 1966L, 2042L, 2078L, 2072L,
2055L, 2036L, 2128L, 2044L, 2112L, 2066L, 2051L, 2102L, 2060L,
2054L, 2043L, 2034L, 2086L, 1980L, 2076L, 2003L, 2033L, 2107L,
1992L, 2028L, 2027L, 2024L, 2005L, 2050L, 2010L, 1944L, 2010L,
2046L, 2020L, 2088L, 2086L, 2034L, 2066L, 2060L, 2152L, 2044L,
2078L, 2040L, 2067L, 2080L, 2072L, 2073L, 2028L, 2066L, 2082L,
2030L, 2042L, 1990L, 2076L, 2054L, 2064L, 2016L, 2048L, 2029L,
2008L, 2090L, 2038L, 2026L, 2096L, 2002L, 2025L, 2001L, 2098L,
2061L, 2022L, 2054L, 2064L, 2043L, 2090L, 2042L, 2086L, 2073L,
2066L, 2040L, 2081L, 2087L), Z = c(2488L, 2484L, 2490L, 2486L,
2488L, 2492L, 2498L, 2490L, 2492L, 2484L, 2491L, 2494L, 2497L,
2493L, 2488L, 2493L, 2494L, 2484L, 2486L, 2487L, 2478L, 2490L,
2478L, 2493L, 2490L, 2486L, 2488L, 2486L, 2488L, 2482L, 2488L,
2480L, 2480L, 2488L, 2490L, 2490L, 2490L, 2489L, 2492L, 2490L,
2486L, 2480L, 2488L, 2491L, 2486L, 2488L, 2488L, 2494L, 2490L,
2488L, 2492L, 2498L, 2484L, 2491L, 2480L, 2491L, 2497L, 2487L,
2482L, 2490L, 2490L, 2478L, 2488L, 2492L, 2492L, 2482L, 2484L,
2489L, 2482L, 2484L, 2485L, 2492L, 2488L, 2493L, 2487L, 2490L,
2492L, 2488L, 2490L, 2487L, 2484L, 2486L, 2478L)), .Names = c("DEPTH",
"Smooth.Vert.Speed", "DIVE_SURF", "X", "Y", "Z"), row.names = 7222:7304, class = "data.frame")
and I am looking to plot DEPTH, X, Y and Z on separate graphs with a common X axis.
I agree with #PaulHiemstra, ggplot2 is the way to go.
Assuming Smooth.Vert.Speed is the common x-axis variable against which you want to plot DEPTH, X, Y and Z...
library(ggplot2)
library(reshape2)
# Add time variable as per #BenBolker's suggestion
dt$time <- seq(nrow(dt))
# Use melt to reshape data so values and variables are in separate columns
dt.df <- melt(dt, measure.vars = c("DEPTH", "X", "Y", "Z"))
ggplot(dt.df, aes(x = time, y = value)) +
geom_line(aes(color = variable)) +
facet_grid(variable ~ ., scales = "free_y") +
# Suppress the legend since color isn't actually providing any information
opts(legend.position = "none")
Just to be different, let me mention a solution involving neither lattice nor ggplot2 -- I posted this to Romain's R Graph Gallery a few years back as entry 65 with the code here. It just stacks the graphs up, using par() settings to keep them stacked.
Note that the vertical sizes are different by choice, they could easily be of the same height as well.
If you want to be old-fashioned you can use lattice. Unlike #aaronwolen I assumed there was a missing time variable in the data set, so I made one up:
dt$time <- seq(nrow(dt))
library(reshape2)
mm <- melt(subset(dt,select=c(time,DEPTH,X,Y,Z)),id.var="time")
library(lattice)
xyplot(value~time|variable,data=mm,type="l",
scales=list(y=list(relation="free")),
layout=c(1,4))
I've actually figured out another interesting way of doing this with the zoo library:
library(zoo)
z <- with(dt, zoo(cbind(DEPTH, X, Y, Z),as.POSIXct(time)))
plot.zoo(z, ylab=c("Depth (m)", "Pitch Angle (degrees)", "Swaying Acceleration (m/s^2)", "Heaving Acceleration (m/s^2)"), col=c("black", "blue", "darkred", "darkgreen"),
xlab = c("Time"), lwd=2, ylim=list((rev(range(dt$DEPTH))), c(-90,90), c(-10,10), c(-10,10)))
So within a zoo plot you can create new axis labels as a list form and all plots can have different colours.
Please read this example:
Generate example data:
dt = read_table("Time A B C D
10:12:54 2376.2 1.462 3.462 48
10:12:55 2410 1.462 3.462 48
10:12:56 2400 1.462 3.462 48
10:12:57 2409 1.462 3.462 48.6
10:12:58 2400 1.462 3.462 48.6
10:12:59 2385.1 1.462 3.462 46.6
10:13:00 2400 1.462 3.462 46.6
10:13:01 2410 1.462 3.462 46.6
10:13:02 2400 1.462 3.462 46.6
10:13:03 2106 1.463 3.463 46.6
10:13:04 2406 1.463 3.463 44.8
10:13:05 2376.2 1.463 3.463 44.8
10:13:06 2406 1.463 3.463 44.8
10:13:07 2400 1.463 3.463 44.8")
dt$Time=as.POSIXct(dt$Time)
If you want to plot it quickly, try this:
library(foqat)
geom_ts_batch(dt, panelgap=4)
If you want to plot it with more degree of freedom, try this:
library(foqat)
library(patchwork)
blankx=theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank())
p2=geom_ts(dt, yl=2, llist=2, lcc="blue", yllab="A")+blankx
p3=geom_ts(dt, yl=3, llist=3, lcc="red", yllab="B")+blankx
p4=geom_ts(dt, yl=4, llist=4, lcc="green", yllab="C")+blankx
p5=geom_ts(dt, yl=5, llist=5, lcc="grey", yllab="D", xlab="Time")
p2/p3/p4/p5

How to replicate a Monthly Cycle Chart in R

I'd like to output a chart similar to the one represented on this page (on the right) using R and any package that would make it look good:
http://processtrends.com/pg_charts_monthly_cycle_chart.htm
Anyone up to the challenge? :)
Thanks!
The stats package in R base already has a function to do this. Here is my one-liner and the output that it produces
monthplot(redata, labels = month.abb, ylab = 'Listings')
Building on this an example of using monthplot for a weekly cycle plot is here (gives full R code and source data): http://figshare.com/figures/index.php/OpenURL_Router_Data:_Total_Requests_by_Weekday
monthplot(ts(sdf$values, frequency = 7, start=c(12,5)), labels = dow, ylab = "No. requests / day", xlab = "Weekday")
which gives this weekly cycle plot:
Example of monthplot to create a graph showing a weekly cycle http://figshare.com/figures/images/a/a7/Total_requests_by_weekday_01_Apr_to_31_Jul_2011.jpeg
Of course no graphical challenge will be complete without a ggplot solution. The tricky bit is to use ddply to summarise the monthly averages, and to pass this as data to a separate layer to ggplot.
library(lubridate)
library(plyr)
library(ggplot2)
df$month <- factor(month(df$dates), levels=1:12, labels=month.abb, ordered=TRUE)
df$year <- year(df$dates)
hline.data <- ddply(df, .(month), summarize, avgvalue=mean(values))
ggplot() +
geom_line(aes(x=year, y=values, group=month), data=df, colour="blue") +
geom_hline(aes(yintercept=avgvalue), data=hline.data, colour="blue", size=2) +
facet_grid(~month) +
opts(axis.text.x = theme_blank()) +
xlab("")
The data:
df <- structure(list(dates = structure(c(8415, 8446, 8474, 8505, 8535,
8566, 8596, 8627, 8658, 8688, 8719, 8749, 8780, 8811, 8839, 8870,
8900, 8931, 8961, 8992, 9023, 9053, 9084, 9114, 9145, 9176, 9204,
9235, 9265, 9296, 9326, 9357, 9388, 9418, 9449, 9479, 9510, 9541,
9570, 9601, 9631, 9662, 9692, 9723, 9754, 9784, 9815, 9845, 9876,
9907, 9935, 9966, 9996, 10027, 10057, 10088, 10119, 10149, 10180,
10210, 10241, 10272, 10300, 10331, 10361, 10392, 10422, 10453,
10484, 10514, 10545, 10575, 10606, 10637, 10665, 10696, 10726,
10757, 10787, 10818, 10849, 10879, 10910, 10940, 10971, 11002,
11031, 11062, 11092, 11123, 11153, 11184, 11215, 11245, 11276,
11306, 11337, 11368, 11396, 11427, 11457, 11488, 11518, 11549,
11580, 11610, 11641, 11671, 11702, 11733, 11761, 11792, 11822,
11853, 11883, 11914, 11945, 11975, 12006, 12036, 12067, 12098,
12126, 12157, 12187, 12218, 12248, 12279, 12310, 12340, 12371,
12401, 12432, 12463, 12492, 12523, 12553, 12584, 12614, 12645,
12676, 12706, 12737, 12767, 12798, 12829, 12857, 12888, 12918,
12949, 12979, 13010, 13041, 13071, 13102, 13132), class = "Date"),
values = c(1093, 1182, 1299, 1372, 1319, 1362, 1239, 1162,
1059, 921, 815, 720, 835, 853, 1034, 1030, 1240, 1388, 1429,
1319, 1231, 1184, 1076, 825, 991, 1093, 854, 808, 1079, 1092,
1220, 1251, 1130, 1131, 1052, 951, 950, 1006, 1112, 1119,
1250, 1322, 1347, 1310, 1215, 1128, 1035, 992, 1079, 1018,
1112, 1224, 1323, 1344, 1326, 1267, 1171, 1075, 916, 932,
888, 904, 939, 1018, 1140, 1174, 1285, 1311, 1298, 1231,
1091, 1088, 991, 1028, 1177, 1322, 1322, 1398, 1389, 1174,
1196, 1115, 756, 496, 693, 673, 748, 777, 820, 948, 966,
1027, 960, 865, 767, 675, 765, 732, 613, 632, 659, 705, 684,
734, 715, 626, 551, 487, 500, 536, 575, 595, 736, 798, 832,
797, 792, 726, 650, 584, 567, 524, 574, 571, 591, 657, 699,
756, 867, 795, 760, 685, 609, 588, 521, 581, 614, 623, 668,
702, 777, 697, 647, 562, 523, 508, 493, 504, 534, 586, 621,
620, 636, 600, 549, 557)), .Names = c("dates", "values"), row.names = c(NA,
-156L), class = "data.frame")
An awful piece of R coding here by me, but it might give you some ideas on how to do it:
This was the data I used taken from the excel file on the linked site:
> dput(redata)
structure(c(1093L, 1182L, 1299L, 1372L, 1319L, 1362L, 1239L,
1162L, 1059L, 921L, 815L, 720L, 835L, 853L, 1034L, 1030L, 1240L,
1388L, 1429L, 1319L, 1231L, 1184L, 1076L, 825L, 991L, 1093L,
854L, 808L, 1079L, 1092L, 1220L, 1251L, 1130L, 1131L, 1052L,
951L, 950L, 1006L, 1112L, 1119L, 1250L, 1322L, 1347L, 1310L,
1215L, 1128L, 1035L, 992L, 1079L, 1018L, 1112L, 1224L, 1323L,
1344L, 1326L, 1267L, 1171L, 1075L, 916L, 932L, 888L, 904L, 939L,
1018L, 1140L, 1174L, 1285L, 1311L, 1298L, 1231L, 1091L, 1088L,
991L, 1028L, 1177L, 1322L, 1322L, 1398L, 1389L, 1174L, 1196L,
1115L, 756L, 496L, 693L, 673L, 748L, 777L, 820L, 948L, 966L,
1027L, 960L, 865L, 767L, 675L, 765L, 732L, 613L, 632L, 659L,
705L, 684L, 734L, 715L, 626L, 551L, 487L, 500L, 536L, 575L, 595L,
736L, 798L, 832L, 797L, 792L, 726L, 650L, 584L, 567L, 524L, 574L,
571L, 591L, 657L, 699L, 756L, 867L, 795L, 760L, 685L, 609L, 588L,
521L, 581L, 614L, 623L, 668L, 702L, 777L, 697L, 647L, 562L, 523L,
508L, 493L, 504L, 534L, 586L, 621L, 620L, 636L, 600L, 549L, 557L
), .Dim = 12:13, .Dimnames = list(c("Jan", "Feb", "Mar", "Apr",
"May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"), c("X1993",
"X1994", "X1995", "X1996", "X1997", "X1998", "X1999", "X2000",
"X2001", "X2002", "X2003", "X2004", "X2005")))
And here's my woeful coding... so much cleanup is possible here, but it is a quick test of the possibilities.
monthnames <- c(
"Jan",
"Feb",
"Mar",
"Apr",
"May",
"Jun",
"Jul",
"Aug",
"Sep",
"Oct",
"Nov",
"Dec"
)
# size of window
windows(w=6,h=3)
# margins
par(
mar=c(5.1,5.1,2.1,2.1),
cex.axis=0.7
)
# set up plot with the number of categories and the y limits
# yaxs="i" sets the yaxis as having no separation from the corner point
ylimlp <- c(0,max(redata))*1.06
plot(1:156, type="n", xaxt="n", ylim=ylimlp, ann=FALSE, yaxs="i", xaxs="i", bty="l", las="1")
abline(v=seq(13,156,13),lty=1,col="grey")
title(xlab="Month", col.lab=rgb(0,0,0), font.lab=2, cex.lab=0.75)
title(ylab="Listings", col.lab=rgb(0,0,0), font.lab=2, cex.lab=0.75)
lines(redata[1,],type="l",pch=NA,lwd=1,col="grey")
lines(c(rep(NA,13*1),redata[2,]),type="l",pch=NA,lwd=1,col="grey")
lines(c(rep(NA,13*2),redata[3,]),type="l",pch=NA,lwd=1,col="grey")
lines(c(rep(NA,13*3),redata[4,]),type="l",pch=NA,lwd=1,col="grey")
lines(c(rep(NA,13*4),redata[5,]),type="l",pch=NA,lwd=1,col="grey")
lines(c(rep(NA,13*5),redata[6,]),type="l",pch=NA,lwd=1,col="grey")
lines(c(rep(NA,13*6),redata[7,]),type="l",pch=NA,lwd=1,col="grey")
lines(c(rep(NA,13*7),redata[8,]),type="l",pch=NA,lwd=1,col="grey")
lines(c(rep(NA,13*8),redata[9,]),type="l",pch=NA,lwd=1,col="grey")
lines(c(rep(NA,13*9),redata[10,]),type="l",pch=NA,lwd=1,col="grey")
lines(c(rep(NA,13*10),redata[11,]),type="l",pch=NA,lwd=1,col="grey")
lines(c(rep(NA,13*11),redata[12,]),type="l",pch=NA,lwd=1,col="grey")
redatamonthmean <- apply(redata,1,mean)
lines(rep(redatamonthmean[1],13),type="l",pch=NA,lwd=1,col="black")
lines(c(rep(NA,13*1),rep(redatamonthmean[2],13)),type="l",pch=NA,lwd=1,col="black")
lines(c(rep(NA,13*2),rep(redatamonthmean[3],13)),type="l",pch=NA,lwd=1,col="black")
lines(c(rep(NA,13*3),rep(redatamonthmean[4],13)),type="l",pch=NA,lwd=1,col="black")
lines(c(rep(NA,13*4),rep(redatamonthmean[5],13)),type="l",pch=NA,lwd=1,col="black")
lines(c(rep(NA,13*5),rep(redatamonthmean[6],13)),type="l",pch=NA,lwd=1,col="black")
lines(c(rep(NA,13*6),rep(redatamonthmean[7],13)),type="l",pch=NA,lwd=1,col="black")
lines(c(rep(NA,13*7),rep(redatamonthmean[8],13)),type="l",pch=NA,lwd=1,col="black")
lines(c(rep(NA,13*8),rep(redatamonthmean[9],13)),type="l",pch=NA,lwd=1,col="black")
lines(c(rep(NA,13*9),rep(redatamonthmean[10],13)),type="l",pch=NA,lwd=1,col="black")
lines(c(rep(NA,13*10),rep(redatamonthmean[11],13)),type="l",pch=NA,lwd=1,col="black")
lines(c(rep(NA,13*11),rep(redatamonthmean[12],13)),type="l",pch=NA,lwd=1,col="black")
mtext(monthnames[1], side=1, cex=0.7, at=6.5)
mtext(monthnames[2], side=1, cex=0.7, at=6.5*3)
mtext(monthnames[3], side=1, cex=0.7, at=6.5*5)
mtext(monthnames[4], side=1, cex=0.7, at=6.5*7)
mtext(monthnames[5], side=1, cex=0.7, at=6.5*9)
mtext(monthnames[6], side=1, cex=0.7, at=6.5*11)
mtext(monthnames[7], side=1, cex=0.7, at=6.5*13)
mtext(monthnames[8], side=1, cex=0.7, at=6.5*15)
mtext(monthnames[9], side=1, cex=0.7, at=6.5*17)
mtext(monthnames[10], side=1, cex=0.7, at=6.5*19)
mtext(monthnames[11], side=1, cex=0.7, at=6.5*21)
mtext(monthnames[12], side=1, cex=0.7, at=6.5*23)
And the example image

Resources