Related
I have a data set with 7 features including the date column where my dependent variables are NORTH and YORKSANDTHEHUMBER and the rest are independent variables. I want to automate the process where I take my first dependent feature NORTH and run it against all the independent variables in a univariate manner so that the first model gives me the result for NORTH and x1, second for NORTH and x2 and so on via using for loop but I couldn't make the sense. Can anyone please guide me in this?
Data:
structure(list(Date = structure(c(289094400, 297043200, 304992000,
312854400, 320716800, 328665600, 336614400, 344476800, 352252800,
360201600, 368150400, 376012800, 383788800, 391737600, 399686400,
407548800, 415324800, 423273600, 431222400, 439084800, 446947200,
454896000, 462844800, 470707200, 478483200, 486432000, 494380800,
502243200, 510019200, 517968000, 525916800, 533779200, 541555200,
549504000, 557452800, 565315200, 573177600, 581126400, 589075200,
596937600, 604713600, 612662400, 620611200, 628473600, 636249600,
644198400, 652147200, 660009600, 667785600, 675734400, 683683200,
691545600, 699408000, 707356800, 715305600, 723168000, 730944000,
738892800, 746841600, 754704000, 762480000, 770428800, 778377600,
786240000, 794016000, 801964800, 809913600, 817776000, 825638400,
833587200, 841536000, 849398400, 857174400, 865123200, 873072000,
880934400, 888710400, 896659200, 904608000, 912470400, 920246400,
928195200, 936144000, 944006400, 951868800, 959817600, 967766400,
975628800, 983404800, 991353600, 999302400, 1007164800, 1014940800,
1022889600, 1030838400, 1038700800, 1046476800, 1054425600, 1062374400,
1070236800, 1078099200, 1086048000, 1093996800, 1101859200, 1109635200,
1117584000, 1125532800, 1133395200, 1141171200, 1149120000, 1157068800,
1164931200, 1172707200, 1180656000, 1188604800, 1196467200, 1204329600,
1212278400, 1220227200, 1228089600, 1235865600, 1243814400, 1251763200,
1259625600, 1267401600, 1275350400, 1283299200, 1291161600, 1298937600,
1306886400, 1314835200, 1322697600, 1330560000, 1338508800, 1346457600,
1354320000, 1362096000, 1370044800, 1377993600, 1385856000, 1393632000,
1401580800, 1409529600, 1417392000, 1425168000, 1433116800, 1441065600,
1448928000, 1456790400, 1464739200, 1472688000, 1480550400, 1488326400,
1496275200, 1504224000, 1512086400, 1519862400, 1527811200, 1535760000,
1543622400, 1551398400, 1559347200, 1567296000, 1575158400, 1583020800,
1590969600, 1598918400, 1606780800, 1614556800, 1622505600, 1630454400,
1638316800), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
Industrialproduction = c(8.2, 8.79, 0.94, 1.53, -3.18, -8.66,
-8.96, -11.93, -8.14, -4.5, 1.53, 2.06, 2.39, 2.02, 2.01,
1.68, 2.16, 2.15, 3.77, 5.95, 3.58, 0.81, -1.58, -1.72, 3.62,
9.78, 8.51, 3.49, 1.97, -1.02, 1.92, 6.13, 3.87, 3.54, 2.76,
4.19, 4.73, 4.84, 6.64, 3.88, 2.05, 1.36, 0.53, 1.47, 1.61,
3.22, -1.45, -2.76, -3.83, -5.06, -4.01, -1.76, -0.27, -0.82,
2.23, 0.69, 1.38, 2.07, 2.32, 4.1, 4.61, 5.68, 6.13, 5.91,
2.85, 1.66, 1, 0.37, 2.52, 1.26, 1.24, 1.48, 0.37, 2.24,
2.7, 4.38, 7.6, 3.89, 0.84, -0.82, -0.46, 5.61, 9.48, 5.06,
1.95, 2.1, 1.08, 6.27, 1.46, 2.28, 3.21, 3.37, 12.94, -1.06,
-2.07, -6.22, -5.19, 6.65, 6.78, 4.35, -2.69, -1.31, -2.08,
3.44, -3.08, -0.92, -1.62, -0.91, 8.32, 2.57, 4.33, 2.44,
1.52, -1.3, -4.94, -3.97, -3.59, -1.83, 1.77, -1.86, -4.86,
-5.07, -7.55, -5.37, -0.33, -1.2, -0.11, -1.11, -8.39, -5.4,
-5.52, -4.16, 0.12, -0.7, -0.58, -0.59, 0.48, 3.87, 5.29,
7.91, 7.21, -0.45, -2.23, -1.86, 4.19, 5.9, 5.94, 2.45, 0,
-0.75, -1.08, 1.63, -3.28, -0.22, 3.49, 1.07, 1.53, 5.3,
4.21, 6.14, 10.24, 2.26, 0.71, -1.3, -8.9, -12.36, -5.02,
-2.83, 3.76, 9.86, 1.9, 0.94), Householdconsumption = c(30.09,
32.53, 33.35, 35.23, 37.18, 37.59, 38.89, 39.82, 41.56, 42.7,
43.74, 45.03, 46.19, 46.95, 48.29, 49.84, 51.26, 52.15, 53.5,
54.36, 55.4, 56.7, 57.05, 58.88, 60.09, 61.44, 63.27, 64.74,
66.63, 68.35, 69.55, 70.81, 72.3, 74.29, 76.65, 78.82, 81.51,
83.81, 86.53, 88.4, 90.29, 92.46, 93.95, 95.99, 97.85, 100.83,
102.42, 104.05, 106.08, 107.79, 109.33, 110.63, 111.71, 113.52,
114.9, 116.02, 118.31, 119.4, 122.27, 124.05, 125.13, 125.99,
127.59, 129.19, 130.16, 132.29, 135.06, 136.61, 139.34, 142.14,
144.59, 146.95, 149.43, 151.71, 155.34, 156.37, 158.39, 160.69,
164.47, 164.41, 167.54, 169.48, 170.09, 172.51, 176.26, 177.61,
179.44, 180.28, 182.96, 184.01, 186.83, 186.34, 188.79, 190.18,
191.94, 194.56, 196.46, 198.86, 201.75, 203.09, 205.24, 208.26,
210.84, 213.9, 216.18, 217.54, 220.61, 222.9, 223.67, 227.66,
230.62, 232.57, 234.8, 237.82, 241.91, 244.47, 248.84, 248.63,
248.14, 243.9, 241.46, 239.04, 240.72, 243.03, 241.87, 248,
249.95, 251.91, 254.92, 254.81, 257.17, 261.11, 262.28, 265.29,
266.74, 271.42, 274.28, 277.61, 282.48, 282.94, 285.76, 290.21,
292.88, 294.9, 296.07, 299.14, 302.58, 302.82, 309.63, 313.2,
318.64, 320.87, 323.41, 325.57, 326.56, 329.67, 335.95, 337.61,
341.08, 345.09, 346.16, 350.18, 350.23, 347.89, 339.85, 270.86,
325.65, 320.28, 311.3, 341.24, 354.61, 361.47), Investmentgrowth = c(17.3,
22.73, 25.8, 29.99, 21.59, 15.49, 11.11, 6.04, 4.23, 4.42,
4.28, 3.51, 6.53, 8.81, 10.52, 12.63, 14.6, 8.04, 7.42, 10.72,
11.15, 16.11, 15.45, 11.36, 18.41, 8.32, 8.99, 8.18, 0.86,
5.04, 9.07, 14.27, 11.11, 19.61, 23.14, 19.47, 27.16, 24.6,
17.45, 16.17, 20.57, 17.01, 17.76, 15.36, 8.28, 7.05, 2.92,
2.83, -3.08, -4.32, -7.48, -6.69, -3.71, -4.64, -3.87, -4.88,
-1.72, -0.38, 1.97, 4.65, 2.84, 2.98, 3.68, 2.88, 0.69, 3.5,
4.91, 5.66, 11.3, 13.85, 10.87, 4.01, -5.63, -8.06, -3.81,
3.94, 10.74, 9.14, 3.83, 3.36, 3.29, 3.24, 7.59, 3.43, 7.05,
13.14, 1.12, 7.68, 4.22, 1.34, 9.27, 0.78, 0.66, -1.52, 4.17,
12.34, 11.74, 5.2, 1.89, -1.56, 2.26, 5.89, 5.79, 4.84, 3.44,
7.15, 7.27, 7.31, 6.11, 5.7, 8.15, 6.96, 7.79, 10.05, 2.71,
9.61, 4.63, 2.72, 1.13, -6.1, -8.98, -14.36, -9.8, -11.41,
-3.13, 1.28, 3.81, 9.18, 1.62, 2.05, 2.14, 2.03, 7.32, 3.88,
0.09, 3.44, -1.27, 6.8, 10.41, 5.73, 12.93, 7.89, 6.8, 7.92,
8.2, 9.32, 6.18, 7.39, 5.22, 6.07, 9.44, 5.64, 6.8, 7.2,
4.77, 6.83, 3.74, 1.63, 2.59, 1.17, 4.39, 3.28, 3.78, 2.18,
-1.93, -19.78, -7.51, -2.54, -0.99, 23.33, 6.54, 4.25), ConsumerPriceIndex = c(24.88,
25.94, 27.55, 28.28, 29.79, 31.39, 31.92, 32.55, 33.55, 34.94,
35.55, 36.48, 37.02, 38.14, 38.14, 38.45, 38.73, 39.54, 40.1,
40.49, 40.76, 41.57, 41.99, 42.35, 43.25, 44.46, 44.46, 44.74,
45.06, 45.58, 45.81, 46.42, 46.88, 47.49, 47.72, 48.14, 48.44,
49.43, 49.83, 50.33, 50.82, 52.02, 52.42, 53.11, 53.91, 55.6,
56.69, 57.09, 57.59, 60.27, 60.67, 61.27, 61.67, 62.56, 62.56,
62.86, 63.16, 64.05, 64.45, 64.35, 64.55, 65.35, 65.45, 65.64,
66.24, 67.04, 67.34, 67.63, 68.03, 68.63, 68.93, 69.13, 69.13,
69.82, 70.22, 70.32, 70.7, 71.3, 71.5, 71.8, 71.9, 72.3,
72.4, 72.6, 72.3, 72.9, 73.1, 73.2, 73, 74.1, 74.1, 74, 74.1,
74.6, 74.8, 75.2, 75.3, 75.4, 75.9, 76.2, 76.1, 76.6, 76.7,
77.4, 77.5, 78.1, 78.6, 78.9, 78.9, 80.1, 80.5, 81.3, 81.4,
82, 81.9, 83, 83.4, 85.2, 86.1, 85.5, 85.8, 86.7, 87.1, 88,
88.7, 89.5, 89.8, 91.2, 92.2, 93.3, 94.4, 95.1, 95.4, 95.5,
96.5, 97.6, 98.1, 98.3, 99.1, 99.6, 99.7, 100.2, 100.3, 100.1,
99.7, 100.2, 100.2, 100.3, 100.2, 100.6, 101.1, 101.9, 102.7,
103.5, 104.3, 105, 105.1, 105.9, 106.6, 107.1, 107, 107.9,
108.4, 108.5, 108.6, 108.8, 109.2, 109.4, 109.7, 111.4, 112.4,
114.7), NORTH = c(4.06976744186047, 5.51675977653633, 7.2799470549305,
4.75015422578655, 4.59363957597172, 3.15315315315317, 1.2008733624454,
-0.377562028047452, -0.108283703302655, 0.650406504065032,
0.969305331179318, 0.106666666666688, 3.09003729355352, 2.11886304909562,
2.32793522267207, 5.68743818001977, -1.46934955545156, 3.95611702127658,
5.19438987619354, -0.0912012507600199, 2.81677896109541,
3.97412590369087, 1.30118326353028, 3.31553807249226, 1.32872294960955,
2.93700394923507, 0.908853875665812, 1.81241002546971, -1.3414545718222,
4.81772747317361, -3.4743890895067, 4.63823913990992, 0.857370960463727,
1.78620594713658, 0.527472527472524, -4.05973562947765, -0.136726966764838,
3.16657890117607, 5.95161125667812, 8.01002055498458, 10.5501040737437,
13.4138468987035, 2.93371279497212, 8.84291046495554, -6.87764606265876,
2.90741287990725, 3.71548486856639, 1.23317430567388, -1.1153443739474,
4.31313207880924, -1.64273763383666, 0.751373343751978, -3.21877014345816,
1.16314882913623, -3.59065232516701, -4.65283582701413, 4.98489115166134,
3.18459755147199, -3.72875180849018, 2.20137289784552, -4.22488416879167,
-0.706371260732776, -2.33320725244584, -2.77596063540517,
9.48636128308308, -2.15172116987927, -5.71766285746257, 1.92271571537407,
0.655934629757954, 4.01517293049256, -2.89270965830984, 3.910032505864,
-1.31616434600239, 1.51533020314829, 3.09793915477058, 1.00146317751519,
-0.516295759142123, 4.36356154298765, -0.254418667464494,
-1.38015492270122, -0.375369475589906, 3.79511767246943,
1.67693295616696, 0.197127124553074, -1.01758464617007, 5.70477696100394,
-1.37564670926045, 1.39335708665185, 2.29473337483174, -1.40489357721877,
10.7514355294201, -0.403985348024547, -0.0106181613732362,
10.6504339189417, 7.72602065226992, 6.66622841015428, 7.3618861388054,
7.20852539277177, 7.17954849482943, 5.47999408979134, 9.96115783870405,
6.960515961579, 4.82626274289161, -0.428385428540776, 1.6283388103162,
2.07440844957785, -0.707412409361252, -4.9247119657169, 4.3311229522328,
2.53158682305453, -0.8800288960527, 2.40275362264064, 0.67520264383003,
3.97711266595697, 0.00749650524863867, -0.990038901876062,
-0.63991866618197, -2.00199671222057, -5.15098853828302,
-3.65317386916235, -4.67277715297035, -0.564594703469009,
3.29526766976492, 0.0888482310529472, -0.524228981506815,
3.04012050839788, -1.53185447929528, -0.338917708381546,
-2.5450727924491, 3.36238295093309, -0.918735392055365, -0.766840492430499,
-0.767135363240273, 0.0468961039030733, 1.51618073336643,
-2.02356670927575, -1.11584500803018, 2.45568937824186, 0.989863990072745,
-0.4214032191629, 2.8219393653178, 4.51474479784726, -2.49049271581373,
-0.41346860604498, 3.13864420514751, -0.0877964623534655,
-0.674347043417658, -0.143267961613368, -0.243406512930108,
0.0402054219496719, 0.12912750657269, 0.168664845016241,
-0.713623226415894, 1.49163339466038, 1.57747101133233, -2.10536689354583,
3.12980292320487, -0.90833324273064, -1.71375697178543, 0.582188469928239,
2.89692448021907, 0.0768238907010953, -1.53392147948349,
1.23622644511851, -0.0506227154778281, 0.327869614383542,
2.62019966395382, 3.48629495563575, 0.593740862165774, 4.09560684327741,
2.32207959691005, 0.506809670097958), YORKSANDTHEHUMBER = c(4.0121120363361,
5.45851528384282, 9.52380952380951, 6.04914933837431, 3.03030303030299,
5.42099192618225, 2.78993435448577, -0.53219797764768, 1.97966827180309,
1.15424973767052, 0.466804979253115, -1.96179659266907, 2.42232754081095,
0.719794344473031, -0.306278713629415, 3.37941628264209,
2.74393263992076, 3.91920555341303, 1.91585099967527, 0.892125625853447,
2.91888477848958, 3.78293078507868, 0.109815847271484, 6.83486625601216,
0.722691730511011, 3.56008625759656, -0.227160867754524,
2.69419041475355, -1.17134094520194, 2.78546324684064, 1.01487759630426,
1.54843356139717, 4.15602836879435, 4.43619773934357, -0.309698451507728,
-1.45519947678222, -1.09839057574248, 9.08267346664877, 11.8913598474363,
13.9511229623114, 9.71243848306475, 7.66524473371739, 6.46801731884651,
-2.26736490763654, -4.35729847494552, -2.93870179974964,
-7.72353426221536, -7.01127302722023, 2.02543627323513, 2.51245245873873,
0.712134856164617, -2.74951902189779, 3.20525370229387, -2.17225212432703,
0.304311135936791, -5.21962007478405, -1.22771231792975,
5.62676205566459, -0.0988236572110239, 0.865912760888606,
-3.71050647202427, 1.5475703474865, -3.43233328040058, -2.86288061069106,
-0.551968808874026, 2.05442655433966, 0.388675938226524,
-2.60493926554792, -2.23312255163324, 5.04817095211292, 1.43656632546456,
2.53687507970646, -2.37376845704496, 4.95419269721737, 2.5486061891899,
-0.64046817419928, 1.75846231104579, 0.542834308795226, -0.322606591645488,
-2.67961743436791, 3.57498650723638, 2.89743475977992, 1.28567849851333,
1.828392232888, -0.335580970541442, 5.34860062451308, -2.98213938289875,
3.55468980520775, 2.76514398982056, 3.45832186518539, 1.32470422187813,
2.79428923624948, 3.8093136923264, 9.02544568216825, 7.65854560247412,
11.0775256253873, -0.658987130155868, 10.726463566155, 5.35747018223358,
4.66387144397987, 5.14763674355188, 10.581371911713, 3.46926043870116,
-0.000369065205607915, 0.924675325682334, 3.681119585314,
-0.0731638011738147, 0.690177922935143, 1.33427941484383,
2.65734876034112, 1.62515008951355, 1.48038293242949, 0.494192527588077,
2.39510739408179, 0.818557817036399, -1.1083492547105, -1.89465779498896,
-3.74953204588813, -3.7238074999174, -4.9788025925358, -4.65464963206228,
3.34588197167384, 2.20886725349025, 1.99954661835316, -0.777545762347822,
3.58681336123701, -2.96757202302368, -3.36310924643208, 2.01483012871867,
2.4154475314586, -0.642314624781054, -2.0920093049768, -1.73904001349183,
1.69071701857513, 0.201962934561265, -2.66472457335063, 0.323680874793625,
1.37879437405697, 3.26467995053582, 2.21645486418079, -0.646736928898328,
2.06516965491332, 1.8250141624007, -1.68545096699093, -0.818973277015041,
4.05215303886115, -1.16233786449552, -1.56747999678074, 0.67708495662531,
2.92754908797974, 1.50505329502891, -1.12667258046976, -0.765034978617734,
2.67854615526131, -0.306294171526678, 0.175047038539941,
1.56451236885344, 0.618844724791642, 3.34585295985361, -1.76420421213768,
-0.079420811764984, 1.56942028744185, 0.407910173531572,
-0.268243129544691, 2.57107118459526, -0.758721256899304,
3.03713057699041, 2.68699850192726, 1.88666482868311, 4.78697689266296,
2.43248653386118, 1.27252711337855)), row.names = c(NA, -172L
), class = "data.frame")
Code:
library(tseries)
library(dplyr)
# ARDL MODELING AND FORECASTING
in_sampleARDL <- data %>%
dplyr::filter(Date < '2020-03-01')
out_sampleARDL <-data %>%
dplyr::filter(Date >= '2020-03-01')
auto_ardl(NORTH~Householdconsumption,
data = in_sampleARDL, max_order = 4, selection = 'BIC')
pred1 <-forecast(ardlDlm(formula = NORTH ~ diff(Householdconsumption),
data = in_sampleARDL, p =3)
, x =out_sampleARDL$NORTH, h = 4)
error1 = out_sampleARDL$NORTH[1:2]- pred1[["forecasts"]]
mean(error1^2)
auto_ardl(NORTH~Industrialproduction,
data = in_sampleARDL, max_order = 4, selection = 'BIC')
pred2 <-forecast(ardlDlm(formula = NORTH ~ Industrialproduction,
data = in_sampleARDL, p =3)
, x =out_sampleARDL$NORTH, h = 4)
error2 = out_sampleARDL$NORTH[1:4]- pred2[["forecasts"]]
mean(error2^2)
I have a series of x and y values that I've used to build a linear model.
I can use predict() to find a value of y from a known value of x, but I'm struggling to calculate x from a known value of y. I've seen a few posts that talk about using the approx() function, but I can't figure out how to implement it for my use case. The idea is to write a function that takes a numerical value of y as an input and returns the expected value of x that it would correspond to, ideally with a prediction interval, eg "The expected value of x is 38.90, plus or minus 0.7", or something like that.
Here's my data:
> dput(x)
c(4.66, 5.53, 5.62, 5.85, 6.26, 6.91, 7.04, 7.32, 7.43, 7.85,
8.1, 8.3, 8.34, 8.53, 8.69, 8.7, 8.73, 8.76, 8.96, 9.06, 9.42,
9.78, 10.3, 10.82, 10.98, 11.07, 11.09, 11.32, 11.75, 12.1, 12.46,
12.5, 12.99, 13.02, 13.28, 13.43, 13.96, 14, 14.07, 14.29, 14.57,
14.66, 15.21, 15.56, 15.97, 16.44, 16.8, 17.95, 18.33, 18.62,
18.92, 19.49, 19.9, 19.92, 20.14, 20.18, 21.19, 22.7, 23.25,
23.48, 23.49, 23.58, 23.7, 23.83, 23.83, 23.97, 24.05, 24.14,
24.15, 24.19, 24.32, 24.62, 24.9, 24.92, 25, 25.06, 25.31, 25.36,
25.86, 25.9, 25.95, 25.99, 26.08, 26.2, 26.27, 26.39, 26.5, 26.51,
26.68, 26.78, 26.82, 26.92, 26.92, 27.05, 27.05, 27.07, 27.32,
27.6, 27.77, 27.8, 27.91, 27.96, 27.97, 28.04, 28.05, 28.15,
28.2, 28.28, 28.37, 28.51, 28.53, 28.53, 28.66, 28.68, 28.72,
28.74, 28.82, 28.83, 28.83, 28.86, 28.89, 28.91, 29.04, 29.2,
29.35, 29.4, 29.42, 29.48, 29.53, 29.65, 29.67, 29.69, 29.7,
29.72, 29.93, 29.97, 30.03, 30.08, 30.09, 30.11, 30.18, 30.62,
30.66, 30.78, 31, 31.32, 31.43, 31.47, 31.69, 31.96, 32.33, 32.5,
32.5, 32.58, 32.7, 32.92, 33.2, 33.6, 33.72, 33.77, 33.95, 34.02,
34.08, 34.42, 34.79, 34.91, 34.99, 35.08, 35.15, 35.49, 35.6,
35.6, 35.74, 35.8, 36.05, 36.17, 36.3, 36.37, 36.84, 37.31, 37.95,
38.75, 38.78, 38.81, 38.9, 39.21, 39.31, 39.5, 42.68, 43.92,
43.95, 44.64, 45.7, 45.95, 46.25, 46.8, 49.08, 50.33, 51.23,
52.76, 53.06, 62)
> dput(y)
c(11.91, 13.491, 13.708, 13.984, 14.624, 15.688, 15.823, 16.105,
16.387, 17.004, 17.239, 17.498, 17.686, 17.844, 17.997, 18.044,
18.003, 18.191, 18.332, 18.25, 18.778, 19.237, 19.693, 20.177,
20.441, 20.876, 20.512, 20.894, 21.493, 21.539, 21.951, 21.763,
22.498, 22.451, 22.744, 22.785, 23.409, 23.314, 23.408, 23.567,
23.849, 23.978, 24.472, 24.678, 25.236, 25.547, 25.676, 26.81,
26.83, 27.275, 27.331, 27.844, 28.009, 28.244, 28.497, 28.555,
29.067, 30.412, 30.788, 30.965, 31.058, 31.423, 31.346, 31.118,
31.252, 31.258, 31.399, 31.605, 31.552, 31.881, 31.822, 31.91,
32.333, 32.174, 32.222, 32.704, 32.445, 32.557, 32.993, 32.845,
32.997, 32.909, 32.911, 33.121, 33.191, 33.156, 33.426, 33.332,
33.52, 33.526, 33.697, 33.379, 33.849, 33.726, 33.538, 33.885,
33.961, 34.284, 34.208, 33.896, 34.278, 34.355, 34.276, 34.267,
34.399, 34.507, 34.492, 34.531, 34.695, 34.642, 34.872, 34.772,
34.813, 34.942, 34.883, 34.948, 34.719, 34.983, 34.99, 35.136,
35.007, 34.026, 35.148, 35.201, 35.459, 35.418, 35.236, 35.411,
35.459, 35.5, 35.665, 35.724, 35.636, 35.667, 35.747, 35.788,
35.882, 35.9, 35.83, 36.106, 36.029, 36.364, 36.358, 36.517,
37.005, 36.74, 36.963, 36.634, 37.04, 37.48, 37.581, 37.78, 37.686,
38.262, 37.998, 37.986, 38.498, 39.296, 38.467, 38.779, 38.885,
38.72, 39.038, 38.932, 39.719, 39.654, 39.367, 40.072, 39.707,
39.742, 39.919, 40.054, 40.189, 40.197, 40.154, 40.383, 42.146,
40.595, 40.971, 41.441, 41.964, 42.328, 42.463, 42.627, 42.633,
42.721, 42.786, 42.857, 45.318, 45.665, 46.406, 46.335, 47.663,
47.181, 48.074, 48.109, 49.931, 50.377, 51.053, 52.451, 53.004,
65.889)
> model <- lm(y ~ poly(x,3,raw=TRUE))
> model
Call:
lm(formula = y ~ poly(x, 3, raw = TRUE))
Coefficients:
(Intercept) poly(x, 3, raw = TRUE)1 poly(x, 3, raw = TRUE)2 poly(x, 3, raw = TRUE)3
6.6096981 1.4736619 -0.0238935 0.0002445
Since you have fitted a low order polynomial in ordinary form (raw = TRUE), you can use polyroot to directly find x given y.
## pc: polynomial coefficients in increasing order
solvePC <- function (pc, y) {
pc[1] <- pc[1] - y
## all roots, including complex ones
roots <- polyroot(pc)
## keep real roots
Re(roots)[abs(Im(roots)) / Mod(roots) < 1e-10]
}
y0 <- 38.9 ## example y-value
x0 <- solvePC(coef(model), y0)
#[1] 34.28348
plot(x, y, col = 8)
lines(x, model$fitted, lwd = 2)
abline(h = y0)
abline(v = x0)
To get an interval estimate, we can use sampling methods.
## polyfit: an ordinary polynomial regression model fitted by lm()
rootCI <- function (polyfit, y, nSamples = 1000, level = 0.05) {
## sample regression coefficients from their joint distribution
pc <- MASS::mvrnorm(nSamples, coef(polyfit), vcov(polyfit))
## for each row (a sample), call solvePC()
roots <- apply(pc, 1, solvePC, y)
## confidence interval
quantile(roots, prob = c(0.5 * level, 1 - 0.5 * level))
}
## 95% confidence interval
rootCI(model, y = y0)
# 2.5% 97.5%
#34.17981 34.38828
You can use optim:
Predict the y values given x:
pred_y <- function(x)predict(model, data.frame(x))
pred_y(x = 10)
[1] 19.20145
Now to predict x given y, we do:
pred_x <- function(y) optim(1, \(x) (y-pred_y(x))^2, method='BFGS')[[1]]
pred_x(19.20145)
[1] 10
The uniroot function is intended for this type of problem.
#coefficients for the model
coeff <- c(6.6096981, 1.4736619, -0.0238935, 0.0002445)
#define the equation which one needs the root of
modely <- function(x, y) {
# could use the predict function here
my<-coeff[1] + coeff[2]*x + coeff[3]*x**2 + coeff[4]*x**3
y-my
}
#use the uniroot functiion
#In this example y=10
uniroot(modely, lower=-100, upper=100, y=10)
$root
[1] 2.391022
$f.root
[1] -1.208443e-08
$iter
[1] 10
$init.it
[1] NA
$estim.prec
[1] 6.103516e-05
In this case for y=10, x = 2.391022
I have used hclust in the TSclust package to do agglomerative hierarchical clustering. My question is, Can I get the dissimlarity (distance) matrix back from hclust? I wanted the values of the distance to rank which variable is closer to a single variable in the group of variables.
example: If (x1,x2, x3,x4,x5,x6,x7,x8,x9,x10) are the variables used to form the distance matrix, then what I wanted is the distance between x3 and the rest of variables (x3x1,x3x2,x3x4,x3x5, and so on). Can we do that? Here is the code and reproducible data.
Data:
structure(list(x1 = c(186.41, 100.18, 12.3, 14.38, 25.97, 0.06,
0, 6.17, 244.06, 19.26, 256.18, 255.69, 121.88, 75, 121.45, 11.34,
34.68, 3.09, 34.3, 26.13, 111.31), x2 = c(327.2, 8.05, 4.23,
6.7, 3.12, 1.91, 37.03, 39.17, 140.06, 83.72, 263.29, 261.22,
202.48, 23.27, 2.87, 7.17, 14.48, 3.41, 5.95, 70.56, 91.58),
x3 = c(220.18, 126.14, 98.59, 8.56, 0.5, 0.9, 17.45, 191.1,
164.64, 224.36, 262.86, 237.75, 254.88, 42.05, 9.12, 0.04,
12.22, 0.61, 61.86, 114.08, 78.94), x4 = c(90.74, 26.11,
47.86, 10.86, 3.74, 23.69, 61.79, 68.12, 87.92, 171.76, 260.98,
266.62, 96.27, 57.15, 78.89, 16.73, 6.59, 49.44, 57.21, 202.2,
67.17), x5 = c(134.09, 27.06, 7.44, 4.53, 17, 47.66, 95.96,
129.53, 40.23, 157.37, 172.61, 248.56, 160.84, 421.94, 109.93,
22.77, 2.11, 49.18, 64.13, 52.61, 180.87), x6 = c(173.17,
46.68, 6.54, 3.05, 0.35, 0.12, 5.09, 72.46, 58.19, 112.31,
233.77, 215.82, 100.63, 65.84, 2.69, 0.01, 3.63, 12.93, 66.55,
28, 61.74), x7 = c(157.22, 141.81, 19.98, 116.18, 16.55,
122.3, 62.67, 141.84, 78.3, 227.27, 340.22, 351.38, 147.73,
0.3, 56.12, 33.2, 5.51, 54.4, 82.98, 152.66, 218.26), x8 = c(274.08,
51.92, 54.86, 15.37, 0.31, 0.05, 36.3, 162.04, 171.78, 181.39,
310.73, 261.55, 237.99, 123.99, 1.92, 0.74, 0.23, 18.51,
7.68, 65.55, 171.33), x9 = c(262.71, 192.34, 2.75, 21.68,
1.69, 3.92, 0.09, 9.33, 120.36, 282.92, 236.7, 161.59, 255.44,
126.44, 7.63, 2.04, 1.02, 0.12, 5.87, 146.25, 134.11), x10 = c(82.71,
44.09, 1.52, 2.63, 4.38, 28.64, 168.43, 80.62, 20.36, 39.29,
302.31, 247.52, 165.73, 18.27, 2.67, 1.77, 23.13, 53.47,
53.14, 46.61, 86.29)), class = "data.frame", row.names = c(NA,
-21L))
Code:
as.ts(cdata)
library(dplyr) # data wrangling
library(ggplot2) # grammar of graphics
library(ggdendro) # dendrograms
library(TSclust) # cluster time series
cluster analysis
dist_ts <- TSclust::diss(SERIES = t(cdata), METHOD = "INT.PER") # note the data frame must be transposed
hc <- stats::hclust(dist_ts, method="complete") # method can be also "average" or diana (for DIvisive ANAlysis Clustering)
hcdata <- ggdendro::dendro_data(hc)
names_order <- hcdata$labels$label
# Use the following to remove labels from dendogram so not doubling up - but good for checking hcdata$labels$label <- ""
hcdata%>%ggdendro::ggdendrogram(., rotate=FALSE, leaf_labels=FALSE)
I believe the object you are looking for is stored in the variable dist_ts:
dist_ts <- TSclust::diss(SERIES = t(cdata), METHOD = "INT.PER")
print(dist_ts)
Here is my data
[![enter image description here][1]][1]
my code
library(ggplot2)
library(reshape)
dt1 =read.csv("C:/Users/My DELL/Documents/R_data/machine learning/dt1.csv")
head(dt1)
dt1$month <- seq(nrow(dt1))
library(reshape2)
mm <- melt(subset(dt1,select=c(month,EgbeNa,UrejeNa,EroNa,RefNa,EgbeMg,UrejeMg,EroMg,RefMg
)),id.var="month")
head(mm)
library(lattice)
xyplot(value ~ month|variable,data=mm,type="l",
scales=list(y=list(relation="free")),
layout=c(1,8))
dt_repr = structure(list(Date = c("01-11-17", "01-12-17", "01-01-18", "01-02-18",
"01-03-18", "01-04-18", "01-05-18", "01-06-18", "01-07-18", "01-08-18",
"01-09-18", "01-10-18", "01-11-18", "01-12-18", "01-01-19", "01-02-19",
"01-03-19", "01-04-19", "01-05-19", "01-06-19", "01-07-19", "01-08-19",
"01-09-19", "01-10-19"), month = 1:24, EgbeNa = c(27.4, 29.25,
31.1, 20.4, 13.55, 14, 16.25, 18.5, 24.95, 16.2, 30.15, 28.6,
35.1, 36.5, 28.45, 31.5, 38.1, 28, 32.55, 30.5, 33.2, 30.8, 13,
24.3), UrejeNa = c(10.45, 9, 7.55, 13.35, 11.6, 12.475, 20.1625,
27.85, 21.5, 32.05, 17.65, 15.15, 25.7, 18.8, 26.85, 20.65, 23.5,
26.45, 30.2, 25.75, 28.3, 31.45, 44.4, 39.6), EroNa = c(44.45,
40.55, 36.65, 43, 39.825, 36.825, 44.1, 51.65, 44.2, 56.1, 61.3,
66.05, 15.75, 19.15, 13.05, 12.2, 21.7, 17.9, 14.6, 33.3, 21.2,
19.6, 32.7, 25.1), RefNa = c(10.55, 9.75, 12.35, 19.65, 10.6,
13.74, 22.62, 25.82, 20.4, 31.2, 16.95, 14.25, 15.03, 17.15,
12.75, 13.5, 20.45, 16.8, 15.5, 25.4, 19.5, 19.8, 26.7, 25.1),
EgbeMg = c(4.118, 4.7155, 5.313, 4.4865, 5.1535, 5.1295,
5.113, 5.103, 5.721, 5.285, 3.8575, 4.128, 5.4205, 6.2975,
5.134, 5.4605, 5.124, 4.203, 5.2635, 5.135, 6.092, 5.575,
4.139, 4.8645), UrejeMg = c(3.6655, 3.977, 4.288, 4.192,
4.676, 4.434, 4.7005, 4.966, 5.3895, 5.7165, 4.881, 4.1015,
3.743, 6.132, 6.0785, 6.1775, 6.3135, 6.028, 5.739, 6.126,
4.5155, 4.716, 5.2165, 5.678), EroMg = c(2.472, 2.31425,
2.1565, 2.2115, 2.184, 2.135, 4.135, 6.2005, 5.457, 5.981,
5.784, 5.885, 5.406, 5.248, 4.967, 4.449, 5.058, 5.1675,
5.667, 6.966, 5.17, 4.8965, 7.201, 6.538), RefMg = c(3.75,
3.87, 4.82, 4.132, 3.98, 4.23, 4.57, 5.01, 5.02, 4.67, 4.18,
4.51, 5.21, 5.18, 4.76, 4.29, 4.95, 5.07, 5.45, 5.86, 5.11,
4.79, 6.01, 5.24)), class = "data.frame", row.names = c(NA,
-24L)) #This data is reproducible
and the output
I want to use Date as my x-axis, the Date covers 24 months. It starts at 01-11-17 and ends at 01-10-19. Anyone can help please.
It is difficult to provide answers without using your data. You need to provide your data in a usable format as #r2evans says above. However, you can convert your Date row, which appears to be a string, to Date type and use that as your X-axis. You can format how the date should be displayed by adding the format in the scales list.
For example, in your case:
...
scales=list(
y=list(relation="free"),
x = list(format = "%m-%Y") # or whatever format you need
),
...
or whatever format you need.
Here is one way how you could achieve your task:
library(tidyverse)
library(lubridate)
library(lattice)
df <- dt_repr %>%
pivot_longer(
cols = c(-Date, -month),
names_to = "names",
values_to = "values"
) %>%
mutate(Date = dmy(Date))
xyplot(values ~ Date|names,data=df,type="l",
scales=list(y=list(relation="free")),
layout=c(1,8))
I got the solution using this set of instruction:
#From Painless way to install a new version of R?
Run in the old version of R (or via RStudio)
setwd("C:/Temp/")
packages <- installed.packages()[,"Package"]
save(packages, file="Rpackages")
# INSTALL NEW R VERSION
if(!require(installr)) { install.packages("installr"); require(installr)} #load / install+load installr
# See here for more on installr: https://www.r-statistics.com/2013/03/updating-r-from-r-on-windows-using-the-installr-package/
# step by step functions:
check.for.updates.R() # tells you if there is a new version of R or not.
install.R() # download and run the latest R installer
# Install library - run in the new version of R. This calls package names and installs them from repos, thus all packages should be correct to the most recent version
setwd("C:/Temp/")
load("Rpackages")
for (p in setdiff(packages, installed.packages()[,"Package"]))
install.packages(p)
# Installr includes a package migration tool but this simply copies packages, it does not update them
copy.packages.between.libraries() # copy your packages to the newest R installation from the one version before it (if ask=T, it will ask you between which two versions to perform the copying)
Then all the error messages are gone, the missing packages tidyverse and ggplot2 came back and I have my desired plot with expected x axis
I have a large time series of actual and forecasted electricity generation values for each quarter of the hour, that looks like this:
df<-structure(list(DATETIME = structure(c(1604188800, 1604189700,
1604190600, 1604191500, 1604192400, 1604193300, 1604194200, 1604195100,
1604196000, 1604196900, 1604197800, 1604198700, 1604199600, 1604200500,
1604201400, 1604202300, 1604203200, 1604204100, 1604205000, 1604205900,
1604206800, 1604207700, 1604208600, 1604209500, 1604210400, 1604211300,
1604212200, 1604213100, 1604214000, 1604214900, 1604215800, 1604216700,
1604217600, 1604218500, 1604219400, 1604220300, 1604221200, 1604222100,
1604223000, 1604223900, 1604224800, 1604225700, 1604226600, 1604227500,
1604228400, 1604229300, 1604230200, 1604231100, 1604232000, 1604232900,
1604233800, 1604234700, 1604235600, 1604236500, 1604237400, 1604238300,
1604239200, 1604240100, 1604241000, 1604241900, 1604242800, 1604243700,
1604244600, 1604245500, 1604246400, 1604247300, 1604248200, 1604249100,
1604250000, 1604250900, 1604251800, 1604252700, 1604253600, 1604254500,
1604255400, 1604256300, 1604257200, 1604258100, 1604259000, 1604259900,
1604260800, 1604261700, 1604262600, 1604263500, 1604264400, 1604265300,
1604266200, 1604267100, 1604268000, 1604268900, 1604269800, 1604270700,
1604271600, 1604272500, 1604273400, 1604274300, 1604275200, 1604276100,
1604277000, 1604277900, 1604278800, 1604279700, 1604280600, 1604281500,
1604282400, 1604283300, 1604284200, 1604285100, 1604286000, 1604286900,
1604287800, 1604288700, 1604289600, 1604290500, 1604291400, 1604292300,
1604293200, 1604294100, 1604295000, 1604295900, 1604296800, 1604297700,
1604298600, 1604299500, 1604300400, 1604301300, 1604302200, 1604303100,
1604304000, 1604304900, 1604305800, 1604306700, 1604307600, 1604308500,
1604309400, 1604310300, 1604311200, 1604312100, 1604313000, 1604313900,
1604314800, 1604315700, 1604316600, 1604317500, 1604318400, 1604319300,
1604320200, 1604321100, 1604322000, 1604322900, 1604323800, 1604324700,
1604325600, 1604326500, 1604327400, 1604328300, 1604329200, 1604330100,
1604331000, 1604331900, 1604332800, 1604333700, 1604334600, 1604335500,
1604336400, 1604337300, 1604338200, 1604339100, 1604340000, 1604340900,
1604341800, 1604342700, 1604343600, 1604344500, 1604345400, 1604346300,
1604347200, 1604348100, 1604349000, 1604349900, 1604350800, 1604351700,
1604352600, 1604353500, 1604354400, 1604355300, 1604356200, 1604357100,
1604358000, 1604358900, 1604359800, 1604360700, 1604361600, 1604362500,
1604363400, 1604364300, 1604365200, 1604366100, 1604367000, 1604367900,
1604368800, 1604369700, 1604370600, 1604371500, 1604372400, 1604373300,
1604374200, 1604375100, 1604376000, 1604376900, 1604377800, 1604378700,
1604379600, 1604380500, 1604381400, 1604382300, 1604383200, 1604384100,
1604385000, 1604385900, 1604386800, 1604387700, 1604388600, 1604389500,
1604390400, 1604391300, 1604392200, 1604393100, 1604394000, 1604394900,
1604395800, 1604396700, 1604397600, 1604398500, 1604399400, 1604400300,
1604401200, 1604402100, 1604403000, 1604403900, 1604404800, 1604405700,
1604406600, 1604407500, 1604408400, 1604409300, 1604410200, 1604411100,
1604412000, 1604412900, 1604413800, 1604414700, 1604415600, 1604416500,
1604417400, 1604418300, 1604419200, 1604420100, 1604421000, 1604421900,
1604422800, 1604423700, 1604424600, 1604425500, 1604426400, 1604427300,
1604428200, 1604429100, 1604430000, 1604430900, 1604431800, 1604432700,
1604433600, 1604434500, 1604435400, 1604436300, 1604437200, 1604438100,
1604439000, 1604439900, 1604440800, 1604441700, 1604442600, 1604443500,
1604444400, 1604445300, 1604446200, 1604447100, 1604448000, 1604448900,
1604449800, 1604450700, 1604451600, 1604452500, 1604453400, 1604454300,
1604455200, 1604456100, 1604457000, 1604457900, 1604458800, 1604459700,
1604460600, 1604461500, 1604462400, 1604463300, 1604464200, 1604465100,
1604466000, 1604466900, 1604467800, 1604468700, 1604469600, 1604470500,
1604471400, 1604472300, 1604473200, 1604474100, 1604475000, 1604475900,
1604476800, 1604477700, 1604478600, 1604479500, 1604480400, 1604481300,
1604482200, 1604483100, 1604484000, 1604484900, 1604485800, 1604486700,
1604487600, 1604488500, 1604489400, 1604490300, 1604491200, 1604492100,
1604493000, 1604493900, 1604494800, 1604495700, 1604496600, 1604497500,
1604498400, 1604499300, 1604500200, 1604501100, 1604502000, 1604502900,
1604503800, 1604504700, 1604505600, 1604506500, 1604507400, 1604508300,
1604509200, 1604510100, 1604511000, 1604511900, 1604512800, 1604513700,
1604514600, 1604515500, 1604516400, 1604517300, 1604518200, 1604519100,
1604520000, 1604520900, 1604521800, 1604522700, 1604523600, 1604524500,
1604525400, 1604526300, 1604527200, 1604528100, 1604529000, 1604529900,
1604530800, 1604531700, 1604532600, 1604533500, 1604534400, 1604535300,
1604536200, 1604537100, 1604538000, 1604538900, 1604539800, 1604540700,
1604541600, 1604542500, 1604543400, 1604544300, 1604545200, 1604546100,
1604547000, 1604547900, 1604548800, 1604549700, 1604550600, 1604551500,
1604552400, 1604553300, 1604554200, 1604555100, 1604556000, 1604556900,
1604557800, 1604558700, 1604559600, 1604560500, 1604561400, 1604562300,
1604563200, 1604564100, 1604565000, 1604565900, 1604566800, 1604567700,
1604568600, 1604569500, 1604570400, 1604571300, 1604572200, 1604573100,
1604574000, 1604574900, 1604575800, 1604576700, 1604577600, 1604578500,
1604579400, 1604580300, 1604581200, 1604582100, 1604583000, 1604583900,
1604584800, 1604585700, 1604586600, 1604587500, 1604588400, 1604589300,
1604590200, 1604591100, 1604592000, 1604592900, 1604593800, 1604594700,
1604595600, 1604596500, 1604597400, 1604598300, 1604599200, 1604600100,
1604601000, 1604601900, 1604602800, 1604603700, 1604604600, 1604605500,
1604606400, 1604607300, 1604608200, 1604609100, 1604610000, 1604610900,
1604611800, 1604612700, 1604613600, 1604614500, 1604615400, 1604616300,
1604617200, 1604618100, 1604619000, 1604619900, 1604620800, 1604621700,
1604622600, 1604623500, 1604624400, 1604625300, 1604626200, 1604627100,
1604628000, 1604628900, 1604629800, 1604630700, 1604631600, 1604632500,
1604633400, 1604634300, 1604635200, 1604636100, 1604637000, 1604637900,
1604638800, 1604639700, 1604640600, 1604641500, 1604642400, 1604643300,
1604644200, 1604645100, 1604646000, 1604646900, 1604647800, 1604648700,
1604649600, 1604650500, 1604651400, 1604652300, 1604653200, 1604654100,
1604655000, 1604655900, 1604656800, 1604657700, 1604658600, 1604659500,
1604660400, 1604661300, 1604662200, 1604663100, 1604664000, 1604664900,
1604665800, 1604666700, 1604667600, 1604668500, 1604669400, 1604670300,
1604671200, 1604672100, 1604673000, 1604673900, 1604674800, 1604675700,
1604676600, 1604677500, 1604678400, 1604679300, 1604680200, 1604681100,
1604682000, 1604682900, 1604683800, 1604684700, 1604685600, 1604686500,
1604687400, 1604688300, 1604689200, 1604690100, 1604691000, 1604691900,
1604692800, 1604693700, 1604694600, 1604695500, 1604696400, 1604697300,
1604698200, 1604699100, 1604700000, 1604700900, 1604701800, 1604702700,
1604703600, 1604704500, 1604705400, 1604706300, 1604707200, 1604708100,
1604709000, 1604709900, 1604710800, 1604711700, 1604712600, 1604713500,
1604714400, 1604715300, 1604716200, 1604717100, 1604718000, 1604718900,
1604719800, 1604720700, 1604721600, 1604722500, 1604723400, 1604724300,
1604725200, 1604726100, 1604727000, 1604727900, 1604728800, 1604729700,
1604730600, 1604731500, 1604732400, 1604733300, 1604734200, 1604735100,
1604736000, 1604736900, 1604737800, 1604738700, 1604739600, 1604740500,
1604741400, 1604742300, 1604743200, 1604744100, 1604745000, 1604745900,
1604746800, 1604747700, 1604748600, 1604749500, 1604750400, 1604751300,
1604752200, 1604753100, 1604754000, 1604754900, 1604755800, 1604756700,
1604757600, 1604758500, 1604759400, 1604760300, 1604761200, 1604762100,
1604763000, 1604763900, 1604764800, 1604765700, 1604766600, 1604767500,
1604768400, 1604769300, 1604770200, 1604771100, 1604772000, 1604772900,
1604773800, 1604774700, 1604775600, 1604776500, 1604777400, 1604778300,
1604779200, 1604780100, 1604781000, 1604781900, 1604782800, 1604783700,
1604784600, 1604785500, 1604786400, 1604787300, 1604788200, 1604789100,
1604790000, 1604790900, 1604791800, 1604792700), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), Actual = c(5.718, 5.844, 5.971, 5.925,
5.414, 5.432, 5.521, 5.513, 5.524, 6.182, 6.947, 7.133, 7.746,
7.058, 6.615, 9.665, 12.015, 12.029, 11.957, 12.981, 16.03, 16.821,
16.897, 15.383, 14.029, 14.642, 10.721, 9.187, 8.466, 6.89, 5.916,
7.977, 11.577, 12.432, 14.191, 14.655, 16.77, 17.376, 16.656,
16.659, 16.249, 15.771, 15.969, 15.623, 14.488, 14.506, 14.37,
13.399, 11.806, 10.78, 9.962, 10.093, 8.922, 10.099, 9.832, 8.406,
7.077, 6.514, 5.942, 6.502, 5.731, 5.276, 7.513, 7.141, 6.74,
6.36, 7.061, 8.619, 8.845, 9.808, 9.702, 10.079, 8.703, 7.287,
7.239, 7.768, 7.338, 7.11, 6.975, 7.35, 6.209, 6.441, 6.641,
5.892, 5.148, 4.533, 4.223, 3.89, 3.498, 2.941, 3.06, 3.244,
3.521, 3.703, 3.314, 3.507, 3.705, 3.073, 2.472, 2.344, 2.695,
2.729, 2.652, 2.582, 2.731, 2.759, 2.866, 3.283, 3.138, 3.009,
3.126, 3.389, 3.205, 3.39, 3.942, 4.029, 4.186, 4.282, 4.335,
4.026, 3.863, 3.772, 3.25, 3.282, 3.332, 3.18, 2.929, 3.054,
3.579, 3.886, 3.145, 2.984, 3.305, 3.535, 3.59, 4.079, 4.141,
3.957, 3.786, 3.505, 3.101, 2.892, 2.501, 2.243, 2.151, 1.96,
1.907, 1.911, 1.901, 1.96, 1.854, 1.88, 2.25, 2.252, 2.154, 2.11,
2.081, 2.27, 2.545, 2.864, 2.854, 3.257, 3.8, 4.158, 3.732, 3.771,
4.19, 4.964, 4.323, 5.286, 4.407, 4.795, 5.225, 5.726, 5.732,
5.836, 6.799, 6.322, 6.262, 5.851, 5.172, 5.007, 5.641, 5.812,
4.964, 4.531, 5.07, 5.403, 5.176, 5.122, 5.618, 6.368, 5.941,
6.191, 6.094, 6.703, 6.977, 6.391, 6.664, 6.671, 6.65, 6.687,
7.659, 8.364, 8.088, 7.329, 7.634, 7.981, 8.941, 8.922, 8.898,
8.921, 8.65, 9.39, 8.253, 7.656, 7.588, 6.226, 5.269, 4.915,
4.918, 4.594, 4.873, 4.414, 4.384, 3.967, 3.432, 3.287, 3.48,
3.374, 3.476, 4.54, 6.395, 6.67, 6.094, 5.212, 5.289, 4.99, 4.113,
3.32, 3.09, 2.25, 2.42, 2.812, 2.544, 2.466, 2.892, 3.04, 2.665,
2.303, 2.162, 1.896, 1.704, 1.683, 1.788, 1.841, 2.162, 2.288,
2.108, 1.943, 1.582, 1.347, 1.256, 1.301, 1.649, 1.615, 1.669,
1.855, 1.952, 2.354, 2.513, 2.314, 2.314, 2.623, 2.646, 2.655,
2.795, 2.829, 3.225, 3.759, 4.926, 6.119, 6.206, 6.608, 6.237,
5.74, 6.116, 8.257, 9.366, 9.073, 8.124, 6.595, 5, 4.428, 4.25,
4.662, 6.04, 7.415, 6.713, 6.646, 6.287, 6.502, 6.023, 5.789,
6.211, 7.477, 8.396, 9.687, 11.208, 9.911, 9.085, 8.758, 8.411,
8.321, 8.393, 8.796, 9.682, 9.908, 9.637, 10.39, 11.094, 12.521,
15.086, 14.875, 15.56, 15.396, 15.365, 16.238, 17.188, 17.16,
17.902, 15.802, 14.354, 12.045, 11.883, 12.716, 13.031, 11.346,
12.645, 13.082, 14.082, 14.606, 15.297, 15.215, 14.762, 15.61,
17.65, 17.997, 17.933, 17.884, 17.323, 17.169, 19.862, 23.073,
25.928, 27.872, 28.236, 30.207, 29.643, 28.742, 28.017, 25.973,
26.97, 28.061, 26.099, 25.133, 23.174, 20.483, 19.969, 21.094,
23.736, 26.382, 29.764, 33.129, 36.769, 38.491, 35.788, 35.61,
37.34, 35.794, 35.368, 34.635, 33.84, 33.404, 32.614, 30.159,
32.87, 33.89, 34.26, 33.309, 34.895, 34.596, 35.942, 37.642,
38.688, 39.047, 39.552, 40.228, 41.329, 42.208, 43.073, 41.838,
40.083, 40.6, 41.215, 41.782, 41.454, 41.65, 42.493, 42.297,
42.925, 43.51, 42.537, 42.897, 42.832, 42.54, 43.833, 44.497,
43.118, 42.778, 41.69, 40.37, 40.367, 43.267, 44.982, 46.84,
46.205, 46.376, 43.302, 40.977, 41.712, 42.041, 42.881, 43.467,
43.469, 45.591, 42.482, 45.196, 44.643, 43.234, 43.791, 45.037,
44.114, 43.488, 42.825, 41.382, 41.102, 39.78, 40.251, 40.823,
41.788, 43.272, 42.782, 41.288, 42.616, 44.222, 47.462, 48.783,
48.18, 48.067, 47.608, 46.76, 49.739, 47.404, 47.416, 44.787,
46.945, 49.951, 51.052, 50.017, 51.615, 51.965, 53.686, 54.802,
57.408, 57.979, 57.367, 57.363, 53.659, 50.096, 47.254, 43.275,
43.44, 42.811, 43.405, 47.903, 51.26, 52.405, 55.277, 54.637,
54.626, 54.327, 53.697, 53.366, 54.955, 52.65, 51.526, 52.107,
53.781, 50.12, 51.497, 54.426, 55.57, 54.662, 50.37, 54.62, 58.996,
60.24, 57.378, 56.576, 56.589, 58.439, 59.768, 59.263, 56.691,
59.286, 59.953, 60.097, 55.379, 49.2, 46.915, 52.828, 53.813,
52.989, 53.829, 53.209, 54.683, 55.321, 56.154, 55.016, 54.061,
53.296, 53.772, 52.342, 51.84, 52.607, 53.034, 54.727, 55.591,
54.567, 53.198, 50.51, 49.453, 49.908, 48.499, 47.923, 49.602,
48.64, 51.539, 52.353, 51.426, 50.687, 48.629, 46.685, 48.598,
48.944, 48.143, 46.887, 47.518, 46.229, 45.746, 46.078, 47.155,
46.933, 49.07, 49.243, 48.567, 48.352, 48.28, 48.954, 49.512,
48.704, 49.681, 50.053, 50.957, 49.603, 47.978, 50, 50.963, 50.415,
50.266, 50.155, 50.662, 51.918, 53.166, 52.792, 54.156, 55.07,
53.702, 54.07, 54.098, 54.46, 53.783, 54.023, 53.997, 55.079,
54.819, 54.935, 53.642, 54.181, 54.903, 56.18, 56.016, 55.307,
53.558, 52.03, 54.562, 58.919, 58.351, 57.536, 59.128, 60.585,
60.938, 60.858, 60.539, 59.114, 57.326, 58.094, 58.355, 58.507,
57.85, 55.829, 57.801, 59.946, 58.755, 56.767, 55.223, 55.452,
58.029, 60.207, 60.816, 60.403, 59.365, 58.132, 57.989, 59.469,
58.991, 58.748, 58.43, 57.926, 58.671, 58.192, 58.335, 58.222,
58.802, 58.458, 58.537, 59.014, 59.015, 57.656, 56.055, 53.788,
54.659, 53.911, 54.722, 56.387, 57.327, 57.251, 57.596, 56.207,
54.747, 53.934, 55.108, 55.669, 57.554, 57.824, 56.961, 54.605,
54.705, 54.599, 54.044, 53.627), Forecasted = c(4.843, 4.843,
4.843, 4.843, 3.695, 3.695, 3.695, 3.695, 3.313, 3.313, 3.313,
3.313, 3.198, 3.198, 3.198, 3.198, 3.74, 3.74, 3.74, 3.74, 5.115,
5.115, 5.115, 5.115, 7.01, 7.01, 7.01, 7.01, 9.236, 9.236, 9.236,
9.236, 16.695, 16.695, 16.695, 16.695, 28.66, 28.66, 28.66, 28.66,
34.778, 34.778, 34.778, 34.778, 35.03, 35.03, 35.03, 35.03, 32.805,
32.805, 32.805, 32.805, 28.593, 28.593, 28.593, 28.593, 22.283,
22.283, 22.283, 22.283, 19.164, 19.164, 19.164, 19.164, 17.839,
17.839, 17.839, 17.839, 15.454, 15.454, 15.454, 15.454, 12.195,
12.195, 12.195, 12.195, 8.568, 8.568, 8.568, 8.568, 7.477, 7.477,
7.477, 7.477, 6.314, 6.314, 6.314, 6.314, 5.763, 5.763, 5.763,
5.763, 4.615, 4.615, 4.615, 4.615, 3.57, 3.57, 3.57, 3.57, 3.438,
3.438, 3.438, 3.438, 3.618, 3.618, 3.618, 3.618, 2.983, 2.983,
2.983, 2.983, 2.513, 2.513, 2.513, 2.513, 2.075, 2.075, 2.075,
2.075, 2.015, 2.015, 2.015, 2.015, 2.315, 2.315, 2.315, 2.315,
2.325, 2.325, 2.325, 2.325, 2.363, 2.363, 2.363, 2.363, 2.058,
2.058, 2.058, 2.058, 1.455, 1.455, 1.455, 1.455, 1.878, 1.878,
1.878, 1.878, 2.165, 2.165, 2.165, 2.165, 2.633, 2.633, 2.633,
2.633, 3.279, 3.279, 3.279, 3.279, 2.935, 2.935, 2.935, 2.935,
2.833, 2.833, 2.833, 2.833, 2.89, 2.89, 2.89, 2.89, 3.952, 3.952,
3.952, 3.952, 4.037, 4.037, 4.037, 4.037, 4.311, 4.311, 4.311,
4.311, 4.242, 4.242, 4.242, 4.242, 2.478, 2.478, 2.478, 2.478,
6.285, 6.285, 6.285, 6.285, 6.803, 6.803, 6.803, 6.803, 7.54,
7.54, 7.54, 7.54, 8.383, 8.383, 8.383, 8.383, 8.86, 8.86, 8.86,
8.86, 8.65, 8.65, 8.65, 8.65, 8.11, 8.11, 8.11, 8.11, 7.783,
7.783, 7.783, 7.783, 6.995, 6.995, 6.995, 6.995, 5.98, 5.98,
5.98, 5.98, 5.22, 5.22, 5.22, 5.22, 5.023, 5.023, 5.023, 5.023,
7.343, 7.343, 7.343, 7.343, 7.29, 7.29, 7.29, 7.29, 7.315, 7.315,
7.315, 7.315, 7.63, 7.63, 7.63, 7.63, 7.875, 7.875, 7.875, 7.875,
7.993, 7.993, 7.993, 7.993, 5.315, 5.315, 5.315, 5.315, 5.787,
5.787, 5.787, 5.787, 5.097, 5.097, 5.097, 5.097, 6.878, 6.878,
6.878, 6.878, 6.337, 6.337, 6.337, 6.337, 4.025, 4.025, 4.025,
4.025, 2.665, 2.665, 2.665, 2.665, 4.573, 4.573, 4.573, 4.573,
6.248, 6.248, 6.248, 6.248, 5.534, 5.534, 5.534, 5.534, 6.268,
6.268, 6.268, 6.268, 5.964, 5.964, 5.964, 5.964, 5.23, 5.23,
5.23, 5.23, 7.155, 7.155, 7.155, 7.155, 9.775, 9.775, 9.775,
9.775, 12.565, 12.565, 12.565, 12.565, 11.868, 11.868, 11.868,
11.868, 11.473, 11.473, 11.473, 11.473, 10.698, 10.698, 10.698,
10.698, 12.933, 12.933, 12.933, 12.933, 13.148, 13.148, 13.148,
13.148, 12.845, 12.845, 12.845, 12.845, 11.588, 11.588, 11.588,
11.588, 16.374, 16.374, 16.374, 16.374, 19.543, 19.543, 19.543,
19.543, 21.938, 21.938, 21.938, 21.938, 22.425, 22.425, 22.425,
22.425, 22.738, 22.738, 22.738, 22.738, 28.695, 28.695, 28.695,
28.695, 29.352, 29.352, 29.352, 29.352, 34.76, 34.76, 34.76,
34.76, 31.81, 31.81, 31.81, 31.81, 33.791, 33.791, 33.791, 33.791,
35.23, 35.23, 35.23, 35.23, 35.308, 35.308, 35.308, 35.308, 35.13,
35.13, 35.13, 35.13, 37.11, 37.11, 37.11, 37.11, 37.073, 37.073,
37.073, 37.073, 40.596, 40.596, 40.596, 40.596, 40.7, 40.7, 40.7,
40.7, 45.062, 45.062, 45.062, 45.062, 43.892, 43.892, 43.892,
43.892, 44.33, 44.33, 44.33, 44.33, 42.638, 42.638, 42.638, 42.638,
45.95, 45.95, 45.95, 45.95, 50.997, 50.997, 50.997, 50.997, 50.458,
50.458, 50.458, 50.458, 49.61, 49.61, 49.61, 49.61, 50.95, 50.95,
50.95, 50.95, 49.273, 49.273, 49.273, 49.273, 46.993, 46.993,
46.993, 46.993, 47.368, 47.368, 47.368, 47.368, 48.526, 48.526,
48.526, 48.526, 51.974, 51.974, 51.974, 51.974, 61.343, 61.343,
61.343, 61.343, 61.38, 61.38, 61.38, 61.38, 61.763, 61.763, 61.763,
61.763, 61.96, 61.96, 61.96, 61.96, 59.295, 59.295, 59.295, 59.295,
59.98, 59.98, 59.98, 59.98, 59.233, 59.233, 59.233, 59.233, 56.633,
56.633, 56.633, 56.633, 57.81, 57.81, 57.81, 57.81, 63.66, 63.66,
63.66, 63.66, 67.34, 67.34, 67.34, 67.34, 67.59, 67.59, 67.59,
67.59, 68.453, 68.453, 68.453, 68.453, 66.291, 66.291, 66.291,
66.291, 62.495, 62.495, 62.495, 62.495, 61.142, 61.142, 61.142,
61.142, 51.96, 51.96, 51.96, 51.96, 48.465, 48.465, 48.465, 48.465,
51.932, 51.932, 51.932, 51.932, 54.498, 54.498, 54.498, 54.498,
56.787, 56.787, 56.787, 56.787, 58.053, 58.053, 58.053, 58.053,
57.649, 57.649, 57.649, 57.649, 54.156, 54.156, 54.156, 54.156,
39.908, 39.908, 39.908, 39.908, 40.138, 40.138, 40.138, 40.138,
41.303, 41.303, 41.303, 41.303, 41.175, 41.175, 41.175, 41.175,
39.023, 39.023, 39.023, 39.023, 39.09, 39.09, 39.09, 39.09, 39.238,
39.238, 39.238, 39.238, 44.308, 44.308, 44.308, 44.308, 41.595,
41.595, 41.595, 41.595, 45.503, 45.503, 45.503, 45.503, 45.773,
45.773, 45.773, 45.773, 46.703, 46.703, 46.703, 46.703, 51.37,
51.37, 51.37, 51.37, 50.908, 50.908, 50.908, 50.908, 46.036,
46.036, 46.036, 46.036, 43.526, 43.526, 43.526, 43.526, 38.05,
38.05, 38.05, 38.05, 34.29, 34.29, 34.29, 34.29, 34.306, 34.306,
34.306, 34.306, 37.668, 37.668, 37.668, 37.668, 41.563, 41.563,
41.563, 41.563, 41.171, 41.171, 41.171, 41.171, 42.225, 42.225,
42.225, 42.225, 43.463, 43.463, 43.463, 43.463)), row.names = c(NA,
672L), class = "data.frame")
I want to calculate the mean absolute percentage error for each (calendar) week of the dataset and I have tried several ways, such as the function apply.weekly, but none of them seems to work.
apply.weekly(df, function(x) mean(abs((df$Actual-df$Forecasted)/df$Actual)))
I keep getting as outcome the same value for each quarter of the dataset, and not an outcome for each week.
Do you have any ideas on how to implement this calculation?
Thank you in advance for your help.
Perhaps like so:
library(lubridate)
library(magrittr)
df %>% group_by( isoweek( DATETIME ), year( DATETIME ) ) %>%
summarise( DateCount = n_distinct(date(DATETIME)), MeanError = mean( abs(Forecasted-Actual)/Actual ) )
It outputs:
`isoweek(DATETIME)` `year(DATETIME)` DateCount MeanError
<dbl> <dbl> <int> <dbl>
1 44 2020 1 0.858
2 45 2020 6 0.357
With the updated data source:
df <- read.delim( "https://raw.githubusercontent.com/Argiro1983/data-set/main/test1", sep=";" ) %>%
as_tibble
df %<>% mutate( DATETIME = dmy_hm(DATETIME) )
df %>% group_by( year( DATETIME ), isoweek( DATETIME ) ) %>%
summarise( DateCount = n_distinct(date(DATETIME)), MeanError = mean( abs(Forecasted-Actual)/Actual ) )
Output:
`year(DATETIME)` `isoweek(DATETIME)` DateCount MeanError
<dbl> <dbl> <int> <dbl>
1 2020 44 1 0.858
2 2020 45 7 0.321
3 2020 46 7 0.505
4 2020 47 7 0.439
5 2020 48 7 0.309
6 2020 49 7 0.545
7 2020 50 7 0.342
8 2020 51 7 0.357
9 2020 52 7 0.204
10 2020 53 4 0.120
# … with 14 more rows
Dates should work fine if you set them up correctly.