Yearly seasonal sums for DJF - r

I want to create sums for the meteorological nomenclature of DJF, that means December values are from the year x-1.
There is already a suggestion, using the packages seas and zoo for my kind of problem: Link to the reference. Can I use a loop regarding the time index of my zoo-object, to get the winter sums for each year and different columns? There are already only the winter months in my sample data:
structure(c(0.335767631885527, 0.329964137686826, 0.324867678295622,
0.346234032749876, 0.315486588076342, 0.373440783616547, 0.393108355980974,
0.310526442402042, 0.955068399718777, 0.959654624426492, 0.293930575800507,
0.350949140946517, 0.657761387039141, 0.53822087533681, 0.296938223280703,
0.318325593619261, 0.827528522109129, 0.914084376992577, 0.914209302937996,
0.913163846516007, 0.776698687524975, 0.597284692104539, 0.91488961230643,
0.28945161773974, 0.282895617679457, 0.28492139335934, 0.928492227792593,
0.287740157404564, 0.93011080075256, 0.32787462005944, 0.809245564874419,
0.299095322129539, 0.302473955104931, 0.453458703894119, 0.331724139938735,
0.314265997270211, 0.378968117507553, 0.344955599135117, 0.961200295699775,
1.07300929383762, 0.339365254133058, 0.421999171190298, 0.351276824906379,
0.36810350819186, 0.364237601690115, 0.425751222495895, 1.2000504740503,
0.401585883450189, 0.393244206959102, 0.412013522316855, 1.40622761554481,
1.43010692801434, 1.45452312391606, 1.44102848262452, 0.583854512560274,
0.453530324821785, 0.836929179095723, 0.485649439571136, 1.45323622566975,
1.42066532567401, 1.55192692063172, 1.69545734226667, 1.59084952877426,
0.536277991651981, 0.878100994910164, 1.80588869793109, 0.612726668114702,
1.49557275883036, 1.83080789724595, 0.859368961826519, 1.3537163175202,
0.795003445956722, 1.68510799767645, 1.94219078558463, 0.678911636490617,
1.98538116097216, 1.39431924099171, 0.716178198907659, 0.897864731079577,
0.739754008960108, 1.32647638785145, 1.27550346512974, 1.57782298324095,
1.17541538713537, 1.08141388070016, 2.81373485339402, 0.841584582588819,
2.98872530454666, 1.93484656658214, 3.01625884992721, 0.902448663673698,
0.361944635028181, 1.03795562218241, 0.961881521906292, 0.704732279822006,
0.894256898010956, 0.307197052425753, 0.620230669033494, 0.900835004143219,
0.336503062729966, 0.376726235662507, 0.323019953443342, 0.291097473211189,
0.583926906347703, 0.540940525007957, 0.906358816314195, 0.372788957369332,
0.335375002309946, 0.914209302937996, 0.328320596067713, 0.659589829678685,
0.68859386616471, 0.91488961230643, 0.902977019532625, 0.739324647975471,
0.603576498397486, 0.690375139214112, 0.603004583921208, 0.659868379563069,
0.292376232645021, 0.562401086780579, 0.298131207627614, 0.299095322129539,
0.302473955104931, 0.705840069893102, 0.993644273952054, 0.425326528868129,
0.400345928302124, 0.361221494378293, 0.328750601711733, 0.55820945179875,
0.748093576785292, 0.345188978576, 0.351315165819748, 0.357626992140137,
0.517538802067647, 1.04751086637289, 0.385695811626645, 0.385612146149294,
0.397271280188057, 0.550298801906058, 1.28131889629393, 0.82396230266283,
1.03189532043667, 0.502923809446499, 1.13388533378536, 0.821249922028902,
0.496130920693478, 0.491056299113018, 0.861144623672965, 0.498763665924562,
0.912165347541201, 0.64869230436972, 1.32528603957948, 1.75339437114229,
1.78285803283739, 1.11217610098546, 0.597795159831033, 1.00740416004752,
0.739549658487185, 0.607139331936484, 1.35734916834937, 1.43608105985186,
1.80042779869959, 1.18905308118327, 1.70456429994882, 0.905541925940458,
2.22398340066076, 2.16944665030202, 2.29546486372867, 1.85605245367111,
1.1239234690604, 2.50480944519147, 1.02954245959557, 0.975126362552554,
2.14223132835323, 2.91282474285556, 2.66863827732602, 0.933593864631134,
2.70815814163342, 2.87351062547491, 0.335329222971355, 0.934907402460015,
0.57591904762801, 0.907224647738403, 0.320417497402957, 0.766767831651282,
0.861903342837008, 0.303464733511709, 0.709698376015027, 0.308598232977547,
0.293930575800507, 0.29130992351097, 0.28896933229556, 0.45769807141885,
0.468340431926149, 0.830040974016766, 0.282420179745874, 0.477428977916008,
0.733418492651481, 0.822348309121175, 0.280392410026905, 0.542239475756514,
0.281077879631808, 0.281845318148658, 0.42849080424256, 0.295089908538224,
0.747925637213591, 0.929814463524078, 0.310954657683433, 0.292376232645021,
0.64500798819687, 0.690255336889303, 0.364309565584761, 0.306129346468766,
0.311371964852598, 0.915461004824963, 0.397063771122394, 1.0404933625801,
0.483845551843616, 0.333807374425717, 0.402255447456447, 0.453946781602374,
0.394538152500142, 0.357626992140137, 0.364237601690115, 0.372020526598045,
0.37823224873185, 0.389581791596903, 0.393244206959102, 0.401126173348066,
0.563948059226945, 0.625538021242673, 0.80823517471131, 0.440809452269821,
0.753920921570439, 0.571583127323145, 0.463092290982252, 0.576935449307388,
0.482901053437729, 1.40965077473646, 1.25183016539419, 0.856169846501004,
1.72377824975207, 0.536277991651981, 1.13652692119597, 1.24290457699823,
1.64437171023011, 1.87302947654355, 0.594841647571458, 2.04410190051534,
1.62571002130845, 1.13052139459963, 0.836130011762252, 1.85233449007414,
2.38839794838805, 1.09920265799031, 1.94766079436355, 1.66770758466983,
1.27453119791191, 2.57917818578189, 1.13896219096471, 2.74804359878488,
1.69823856330245, 0.935150681359782, 1.74656095016161, 0.835168244061429,
0.841584582588819, 0.856635868155615, 0.972724285567558, 2.42939239419398,
0.96325679668782, 0.640892567004161, 1.03795562218241, 0.949309568900219,
0.316910844084317, 0.311204732481577, 0.307197052425753, 0.303464733511709,
0.779574150582344, 0.296830513889512, 0.335960010735195, 0.4390886067335,
0.28896933229556, 0.306902835898889, 0.926150657204963, 0.388532344331494,
0.495283643343666, 0.916064063737401, 0.281013296117892, 0.913163846516007,
0.912928724576721, 0.438926937515807, 0.59117658733228, 0.517844090756594,
0.704234100156676, 0.913848110190877, 0.423829975580762, 0.795497269555325,
0.289917958593354, 0.292376232645021, 0.295114252321699, 0.345353147959634,
0.854886103409894, 0.62965115658928, 0.776701146370991, 0.446059142229343,
0.326457042618417, 0.568752212327844, 0.325374322793979, 0.374762702815228,
0.333807374425717, 0.420206697512664, 0.399408381034396, 0.456977698650331,
0.357626992140137, 0.596680957599271, 1.29550961397828, 1.24265117031916,
0.580164026815441, 0.393244206959102, 0.401126173348066, 0.443462006528755,
0.417630422649225, 0.426247823064678, 0.505363855323395, 0.494595916530596,
1.12922054709106, 0.482617341273223, 0.650774092876326, 0.5452273225038,
1.61305811763483, 1.66701808699342, 0.514281824935098, 0.525174470147384,
1.6850349371761, 1.78354241230912, 1.83460579403794, 1.86582069105335,
1.40279004365455, 0.594841647571458, 0.691585610303159, 0.619623644706909,
2.06846657922012, 0.710726446010795, 0.997307890433014, 2.40064963745822,
2.22161516025196, 1.79188547652641, 2.19553900228869, 2.1816869110449,
2.1984531582332, 2.55364304827728, 0.918827215513173, 0.930267750935017,
0.798812034349413, 0.830829315142733, 1.13089106389005, 1.00204606351463,
1.07126361979325, 0.871799972892206, 1.28166129954517), .Dim = c(181L,
2L), .Dimnames = list(NULL, NULL), index = structure(c(699, 700,
701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713,
714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726,
727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739,
740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752,
753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765,
766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778,
779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 1065,
1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076,
1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087,
1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098,
1099, 1100, 1101, 1102, 1103, 1104, 1105, 1106, 1107, 1108, 1109,
1110, 1111, 1112, 1113, 1114, 1115, 1116, 1117, 1118, 1119, 1120,
1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131,
1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142,
1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153,
1154), class = "Date"), class = "zoo")

library(hydroTSM)
dm2seasonal(df, FUN=sum, season="DJF")
I've used the package hydroTSM. The package can also be used for other seasons (MAM, JJA, SON) and with other functions (e.g. mean). You can compute the yearly seasonal sums for every column in your matrix (df). seas does the same for every column, but you have to write your own loop to get yearly seasonal sums I guess. mkseas() from seas will compute the sum over all winter months in your timeseries.

Related

How to solve error when using adorn_totals function in R?

I get the following message of error when using janitor::adorn_totals("row"):
"Error in adorn_totals(., "row") :
trying to re-add a totals dimension that is already been added"
Here is the head of my dataset :
structure(list(code_1 = c("M01", "C03", "M99", "C05", "O01",
"C07"), regroupement_elsan = c("Gastro", "Ophtalmo", "Divers médecine",
"Gynéco", "Accouchements", "bouche et dents"), actes_2019 = c(9179,
5589, 6024, 4150, 4028, 3458), actes_2020 = c(7933, 4167, 3740,
2994, 3348, 2206), actes_2021 = c(6504, 5505, 4682, 3376, 3226,
3035), sejours_2019 = c(1631, 2502, 1028, 852, 1455, 1288), sejours_2020 = c(1335,
1819, 726, 574, 1371, 801), sejours_2021 = c(1109, 2416, 825,
657, 1259, 1106), tx_0_nuit_2019 = c("3.92397302268547", "90.7673860911271",
"32.9766536964981", "57.5117370892019", "0.206185567010309",
"98.9130434782609"), tx_0_nuit_2020 = c("3.29588014981273", "92.9081913139087",
"47.1074380165289", "59.581881533101", "0.291757840991977", "99.250936329588"
), tx_0_nuit_2021 = c("3.6068530207394", "95.4470198675497",
"18.3030303030303", "60.2739726027397", "0.158856235107228",
"98.7341772151899"), pourcentage = c(5.37796226165473, 4.55191916519208,
3.87140518282095, 2.79151300666457, 2.66748251170021, 2.50955034811226
), pourcentage_cumule = c(78.4062908267046, 82.9582099918967,
86.8296151747176, 89.6211281813822, 92.2886106930824, 94.7981610411947
)), row.names = c(NA, -6L), class = c("tabyl", "tbl_df", "tbl",
"data.frame"), core = structure(list(code_1 = c("M01b", "C01",
"C02", "C04", "M01", "C03", "M99", "C05", "O01", "C07", "C08",
"C99", "C98", "C10", "C06", "M03", "O02", "M02", "M04", "C01b",
"O03", "S99", "***", "C10b", "M05", "M98", "O04"), regroupement_elsan = c("Endoscopies
digestives",
"Ortho (+ rhumato et rachis)", "Chirurgie digestive", "Uro-néphro",
"Gastro", "Ophtalmo", "Divers médecine", "Gynéco", "Accouchements",
"bouche et dents", "Tissus mou et chir plastique", "Divers chir",
"Chir esth et hors sécu", "Chir thoracique et vasculaire", "ORL Stomato sf bouche et
dent",
"Pneumologie", "Obstétrique autre (hors IVG)", "Cardio Vasc (médecine)",
"Neurologie", "Rachis", "IVG", "Séances autres", "Autres", "Chir thoracique",
"Soins palliatifs", "Vasculaire interventionnel", "Néo nat"),
actes_2019 = c(36079, 29520, 14618, 6515, 9179, 5589, 6024,
4150, 4028, 3458, 2137, 2180, 575, 449, 866, 388, 294, 311,
714, 395, 292, 1842, 10, 0, 4, 0, 1), actes_2020 = c(30192,
25451, 12845, 7376, 7933, 4167, 3740, 2994, 3348, 2206, 2107,
1477, 575, 437, 337, 897, 193, 218, 267, 308, 118, 737, 8,
4, 0, 11, 5), actes_2021 = c(42333, 24055, 13735, 8196, 6504,
5505, 4682, 3376, 3226, 3035, 2571, 1134, 689, 511, 352,
272, 181, 161, 138, 106, 82, 61, 18, 8, 7, 0, 0), sejours_2019 = c(6992,
5493, 2577, 1221, 1631, 2502, 1028, 852, 1455, 1288, 540,
397, 236, 158, 260, 63, 148, 101, 90, 44, 246, 1820, 4, 0,
1, 0, 1), sejours_2020 = c(5811, 4946, 2220, 1220, 1335,
1819, 726, 574, 1371, 801, 554, 269, 221, 140, 94, 42, 109,
79, 58, 34, 98, 720, 2, 1, 0, 1, 5), sejours_2021 = c(7922,
5144, 2523, 1451, 1109, 2416, 825, 657, 1259, 1106, 649,
264, 278, 162, 111, 51, 108, 69, 30, 21, 77, 54, 7, 1, 2,
0, 0), tx_0_nuit_2019 = c("96.0955377574371", "63.5718186783179",
"41.4435389988359", "36.2817362817363", "3.92397302268547",
"90.7673860911271", "32.9766536964981", "57.5117370892019",
"0.206185567010309", "98.9130434782609", "72.5925925925926",
"53.904282115869", "13.9830508474576", "96.2025316455696",
"50.7692307692308", "42.8571428571429", "85.1351351351351",
"72.2772277227723", "11.1111111111111", "4.54545454545455",
"100,0", "100,0", "100,0", "0,0", "0,0", "0,0", "0,0"), tx_0_nuit_2020 =
c("96.0936155567028",
"67.3069146785281", "40.5855855855856", "34.344262295082",
"3.29588014981273", "92.9081913139087", "47.1074380165289",
"59.581881533101", "0.291757840991977", "99.250936329588",
"76.3537906137184", "49.814126394052", "11.7647058823529",
"99.2857142857143", "53.1914893617021", "16.6666666666667",
"74.3119266055046", "81.0126582278481", "25.8620689655172",
"8.82352941176471", "98.9795918367347", "100,0", "100,0",
"100,0", "0,0", "0,0", "20,0"), tx_0_nuit_2021 = c("96.7053774299419",
"73.2892690513219", "51.0503369005153", "41.9021364576154",
"3.6068530207394", "95.4470198675497", "18.3030303030303",
"60.2739726027397", "0.158856235107228", "98.7341772151899",
"83.9753466872111", "60.2272727272727", "50,0", "94.4444444444444",
"72.972972972973", "1.96078431372549", "81.4814814814815",
"85.5072463768116", "43.3333333333333", "52.3809523809524",
"100,0", "100,0", "100,0", "100,0", "0,0", "0,0", "0,0")), row.names = c(NA,
-27L), class = "data.frame"), tabyl_type = "two_way", totals = "row")
And the code I tried :
library(janitor)
autres %>%
adorn_totals("row")
Could anyone help ? I had indeed used the adorn_totals function on the dataframe used to generate the dataframe "autres", but I made sure the row "total" isn't in the dataframe "autres" anymore.
With the object you have shared as x:
x %>%
untabyl() %>%
adorn_totals()
Why it works:
You can see at the end of the object you shared, tabyl_type = "two_way", totals = "row". Those attributes are stored with the data.frame you're working with. When you try to adorn_totals() a second time, janitor checks this and errors.
When you call untabyl() it strips those attributes. Then adorn_totals() succeeds.
I notice you have a cumulative percentage column. If desired, you can control exactly which columns get a totals value in adorn_totals() - see ?adorn_totals and the ... argument for how, and here's an example: https://stackoverflow.com/a/69759313.

How to plot two groups of values?

These are my sets of four mean values:
meanf1hindi = c(253, 297, 377, 426, 476, 518, 560, 620, 657, 697)
meanf2hindi = c(850, 887, 1017, 1080, 1197, 1342, 1694, 1820, 2265)
meanf1tamil = c(260, 304, 390, 435, 483, 527, 563, 628, 670, 704)
meanf2tamil = c(891, 826, 1018, 1068, 1188, 1355, 1709, 1834, 1976, 2303)
I would like to make a linear graph of meanf1hindi and meanf2hindi together, and do the same with meanf1tamil and meanf2tamil.
This is what I did so far, and don't know how to proceed further:
plot(meanf1hindi, meanf2hindi)
Error in xy.coords(x, y, xlabel, ylabel, log) :
'x' and 'y' lengths differ
You get the error because the length differs for your vectors. What you can do is make the two vectors' length the same by removing one value for the longer vector in this case remove one value of meanf1hindi by doing this:
> length(meanf1hindi)
[1] 10
> length(meanf2hindi)
[1] 9
plot(meanf1hindi[-1], meanf2hindi)
Output:

how to extract fitted values in a forecast model after multiple model simulations

This is my original df and fitted model
library(tsibble)
library(tibble)
library(ISOweek)
library(fable)
library(forecast)
library(fpp3)
library(dplyr)
library(tidyverse)
Original.df <- structure(list(YearWeek = c("201901", "201902", "201903", "201904",
"201905", "201906", "201907", "201908", "201909", "201910", "201911",
"201912", "201913", "201914", "201915", "201916", "201917", "201918",
"201919", "201920", "201921", "201922", "201923", "201924", "201925",
"201926", "201927", "201928", "201929", "201930", "201931", "201932",
"201933", "201934", "201935", "201936", "201937", "201938", "201939",
"201940", "201941", "201942", "201943", "201944", "201945", "201946",
"201947", "201948", "201949", "201950", "201951", "201952", "202001",
"202002", "202003", "202004", "202005", "202006", "202007", "202008",
"202009", "202010", "202011", "202012", "202013", "202014", "202015",
"202016", "202017", "202018", "202019", "202020", "202021", "202022",
"202023", "202024", "202025", "202026", "202027", "202028", "202029",
"202030", "202031", "202032", "202033", "202034", "202035", "202036",
"202037", "202038", "202039", "202040", "202041", "202042", "202043",
"202044", "202045", "202046", "202047", "202048", "202049", "202050",
"202051", "202052", "202053", "202101", "202102", "202103", "202104",
"202105", "202106", "202107", "202108", "202109", "202110", "202111",
"202112", "202113", "202114", "202115", "202116", "202117", "202118",
"202119", "202120", "202121", "202122", "202123", "202124", "202125",
"202126", "202127", "202128", "202129", "202130", "202131", "202132",
"202133", "202134", "202135", "202136", "202137", "202138", "202139",
"202140", "202141", "202142", "202143"), Shipment = c(418, 1442,
1115, 1203, 1192, 1353, 1191, 1411, 933, 1384, 1362, 1353, 1739,
1751, 1595, 1380, 1711, 2058, 1843, 1602, 2195, 2159, 2009, 1812,
2195, 1763, 821, 1892, 1781, 2071, 1789, 1789, 1732, 1384, 1435,
1247, 1839, 2034, 1963, 1599, 1596, 1548, 1084, 1350, 1856, 1882,
1979, 1021, 1311, 2031, 1547, 591, 724, 1535, 1268, 1021, 1269,
1763, 1275, 1411, 1847, 1379, 1606, 1473, 1180, 926, 800, 840,
1375, 1755, 1902, 1921, 1743, 1275, 1425, 1088, 1416, 1168, 842,
1185, 1570, 1435, 1209, 1470, 1368, 1926, 1233, 1189, 1245, 1465,
1226, 887, 1489, 1369, 1358, 1179, 1200, 1226, 1066, 823, 1913,
2308, 1842, 910, 794, 1098, 1557, 1417, 1851, 1876, 1010, 160,
1803, 1607, 1185, 1347, 1700, 981, 1191, 1058, 1464, 1513, 1333,
1169, 1294, 978, 962, 1254, 987, 1290, 758, 436, 579, 636, 614,
906, 982, 649, 564, 502, 274, 473, 506, 902, 639, 810, 398, 488
), Production = c(0, 198, 1436, 1055, 1396, 1330, 1460, 1628,
1513, 1673, 1737, 1274, 1726, 1591, 2094, 1411, 2009, 1909, 1759,
1693, 1748, 1455, 2078, 1717, 1737, 1886, 862, 1382, 1779, 1423,
1460, 1454, 1347, 1409, 1203, 1235, 1397, 1563, 1411, 1455, 1706,
688, 1446, 1336, 1618, 1404, 1759, 746, 1560, 1665, 1317, 0,
441, 1390, 1392, 1180, 1477, 1265, 1485, 1495, 1543, 1584, 1575,
1609, 1233, 1420, 908, 1008, 1586, 1392, 1385, 1259, 1010, 973,
1053, 905, 1101, 1196, 891, 1033, 925, 889, 1136, 1058, 1179,
1047, 967, 900, 904, 986, 1014, 945, 1030, 1066, 1191, 1143,
1292, 574, 1174, 515, 1296, 1315, 1241, 0, 0, 1182, 1052, 1107,
1207, 1254, 1055, 258, 1471, 1344, 1353, 1265, 1444, 791, 1397,
1186, 1264, 1032, 949, 1059, 954, 798, 956, 1074, 1136, 1209,
975, 833, 994, 1127, 1153, 1202, 1234, 1336, 1484, 1515, 1151,
1175, 976, 1135, 1272, 869, 1900, 1173), Net.Production.Qty = c(22,
188, 1428, 1031, 1382, 1368, 1456, 1578, 1463, 1583, 1699, 1318,
1582, 1537, 2118, 1567, 1961, 1897, 1767, 1603, 1666, 1419, 2186,
1621, 1677, 1840, 698, 1290, 1411, 927, 1754, 1222, 1411, 1549,
1491, 1359, 1179, 1945, 1463, 1465, 1764, 764, 810, 1308, 1830,
1542, 1695, 544, 1482, 1673, 1659, 0, 445, 1358, 1364, 1224,
1417, 1239, 1387, 1595, 1469, 1624, 1643, 1763, 1217, 1456, 568,
1290, 1666, 1428, 1327, 773, 1118, 1231, 1143, 921, 1083, 1124,
935, 903, 937, 849, 1132, 1032, 1143, 1081, 891, 886, 880, 1002,
1072, 969, 1000, 996, 1243, 1183, 1306, 650, 1226, 553, 1306,
1379, 1359, 0, 0, 1182, 988, 1099, 1173, 1244, 1039, 254, 1425,
1318, 1385, 1221, 1364, 739, 1397, 1112, 1160, 924, 971, 1015,
978, 828, 868, 994, 1090, 1165, 783, 887, 934, 1023, 1045, 1114,
1052, 1186, 1456, 1401, 1249, 779, 430, 1625, 1498, 883, 1860,
1101), isoweek = c("2019-W01-1", "2019-W02-1", "2019-W03-1",
"2019-W04-1", "2019-W05-1", "2019-W06-1", "2019-W07-1", "2019-W08-1",
"2019-W09-1", "2019-W10-1", "2019-W11-1", "2019-W12-1", "2019-W13-1",
"2019-W14-1", "2019-W15-1", "2019-W16-1", "2019-W17-1", "2019-W18-1",
"2019-W19-1", "2019-W20-1", "2019-W21-1", "2019-W22-1", "2019-W23-1",
"2019-W24-1", "2019-W25-1", "2019-W26-1", "2019-W27-1", "2019-W28-1",
"2019-W29-1", "2019-W30-1", "2019-W31-1", "2019-W32-1", "2019-W33-1",
"2019-W34-1", "2019-W35-1", "2019-W36-1", "2019-W37-1", "2019-W38-1",
"2019-W39-1", "2019-W40-1", "2019-W41-1", "2019-W42-1", "2019-W43-1",
"2019-W44-1", "2019-W45-1", "2019-W46-1", "2019-W47-1", "2019-W48-1",
"2019-W49-1", "2019-W50-1", "2019-W51-1", "2019-W52-1", "2020-W01-1",
"2020-W02-1", "2020-W03-1", "2020-W04-1", "2020-W05-1", "2020-W06-1",
"2020-W07-1", "2020-W08-1", "2020-W09-1", "2020-W10-1", "2020-W11-1",
"2020-W12-1", "2020-W13-1", "2020-W14-1", "2020-W15-1", "2020-W16-1",
"2020-W17-1", "2020-W18-1", "2020-W19-1", "2020-W20-1", "2020-W21-1",
"2020-W22-1", "2020-W23-1", "2020-W24-1", "2020-W25-1", "2020-W26-1",
"2020-W27-1", "2020-W28-1", "2020-W29-1", "2020-W30-1", "2020-W31-1",
"2020-W32-1", "2020-W33-1", "2020-W34-1", "2020-W35-1", "2020-W36-1",
"2020-W37-1", "2020-W38-1", "2020-W39-1", "2020-W40-1", "2020-W41-1",
"2020-W42-1", "2020-W43-1", "2020-W44-1", "2020-W45-1", "2020-W46-1",
"2020-W47-1", "2020-W48-1", "2020-W49-1", "2020-W50-1", "2020-W51-1",
"2020-W52-1", "2020-W53-1", "2021-W01-1", "2021-W02-1", "2021-W03-1",
"2021-W04-1", "2021-W05-1", "2021-W06-1", "2021-W07-1", "2021-W08-1",
"2021-W09-1", "2021-W10-1", "2021-W11-1", "2021-W12-1", "2021-W13-1",
"2021-W14-1", "2021-W15-1", "2021-W16-1", "2021-W17-1", "2021-W18-1",
"2021-W19-1", "2021-W20-1", "2021-W21-1", "2021-W22-1", "2021-W23-1",
"2021-W24-1", "2021-W25-1", "2021-W26-1", "2021-W27-1", "2021-W28-1",
"2021-W29-1", "2021-W30-1", "2021-W31-1", "2021-W32-1", "2021-W33-1",
"2021-W34-1", "2021-W35-1", "2021-W36-1", "2021-W37-1", "2021-W38-1",
"2021-W39-1", "2021-W40-1", "2021-W41-1", "2021-W42-1", "2021-W43-1"
), date = structure(c(17896, 17903, 17910, 17917, 17924, 17931,
17938, 17945, 17952, 17959, 17966, 17973, 17980, 17987, 17994,
18001, 18008, 18015, 18022, 18029, 18036, 18043, 18050, 18057,
18064, 18071, 18078, 18085, 18092, 18099, 18106, 18113, 18120,
18127, 18134, 18141, 18148, 18155, 18162, 18169, 18176, 18183,
18190, 18197, 18204, 18211, 18218, 18225, 18232, 18239, 18246,
18253, 18260, 18267, 18274, 18281, 18288, 18295, 18302, 18309,
18316, 18323, 18330, 18337, 18344, 18351, 18358, 18365, 18372,
18379, 18386, 18393, 18400, 18407, 18414, 18421, 18428, 18435,
18442, 18449, 18456, 18463, 18470, 18477, 18484, 18491, 18498,
18505, 18512, 18519, 18526, 18533, 18540, 18547, 18554, 18561,
18568, 18575, 18582, 18589, 18596, 18603, 18610, 18617, 18624,
18631, 18638, 18645, 18652, 18659, 18666, 18673, 18680, 18687,
18694, 18701, 18708, 18715, 18722, 18729, 18736, 18743, 18750,
18757, 18764, 18771, 18778, 18785, 18792, 18799, 18806, 18813,
18820, 18827, 18834, 18841, 18848, 18855, 18862, 18869, 18876,
18883, 18890, 18897, 18904, 18911, 18918, 18925), class = "Date")), row.names = c(NA,
148L), class = "data.frame")
# Converting the df to accomodate leap year for weekly observations
Original.df <- Original.df %>%
mutate(
isoweek =stringr::str_replace(YearWeek, "^(\\d{4})(\\d{2})$", "\\1-W\\2-1"),
date = ISOweek::ISOweek2date(isoweek)
)
# creating test and train data
Original.train.df <- Original.df %>%
filter(date >= "2018-12-31", date <= "2021-03-29")
Original.test.df <- Original.df %>%
filter(date >= "2021-04-05", date <= "2021-10-25")
# splitting the original train data to contain only Week, Dependent and Independent variables
Total.train.df<-Original.train.df %>%
mutate(Week.1 = yearweek(ISOweek::ISOweek(date))) %>%
select(-YearWeek, -Production, -date,-isoweek) %>%
as_tsibble(index = Week.1)
#Fitting forecast model(Arima with Fourier terms) to Net.Production.qty
fit_all_models.Prod.1 <- list()
for(K in seq(25)){
fit.Prod.1 <- Total.train.df %>%
model(ARIMA(Net.Production.Qty ~ fourier(K = K),stepwise = FALSE, approximation = FALSE))
names(fit.Prod.1) <- paste0("arima_", K)
fit_all_models.Prod.1 <- bind_cols(fit_all_models.Prod.1, fit.Prod.1)
}
glance(fit_all_models.Prod.1) %>% arrange(AICc) %>% select(.model:BIC)
best_model.Prod.1 <- glance(fit_all_models.Prod.1) %>%
filter(AICc == min(AICc)) %>%
select(.model) %>%
as.character
#Forecasting Net.Production.Qty for 30 steps using the fitted model above-Model.1
Forecast.Net.Prod.1<-fit_all_models.Prod.1 %>%
select(all_of(best_model.Prod.1)) %>%
forecast(h = 30)
#To extract fitted values from the model which has min AICc
fitted.Prod.1<-fit.Prod.1 %>%
filter(AICc == min(AICc)) %>% fitted()
As you see from the last step above I'm trying to extract the fitted values from the model which has minimum AICc- which is not working though
If anyone could help me getting the fitted values from the model above which has min.AICc would be really helpful
Thank you
You are almost there:
# your code .....
# get the fitted based on the selection in best_model.Prod.1
fitted.Prod.1 <- fit_all_models.Prod.1 %>%
select(all_of(best_model.Prod.1)) %>%
fitted()
fitted.Prod.1
# A tsibble: 118 x 3 [1W]
# Key: .model [1]
.model Week.1 .fitted
<chr> <week> <dbl>
1 arima_13 2019 W01 21.0
2 arima_13 2019 W02 486.
3 arima_13 2019 W03 1007.
4 arima_13 2019 W04 965.
5 arima_13 2019 W05 1012.
6 arima_13 2019 W06 1088.
7 arima_13 2019 W07 1175.
8 arima_13 2019 W08 1166.
9 arima_13 2019 W09 1305.
10 arima_13 2019 W10 1613.
# ... with 108 more rows

From 201906141930 to "2019-16-14 19:30:00"

I've a data frame with a column dateHourMinute that I need it as POSIXct to make a plot.
For example this dateHourMinute 201906141930, I'd like to get: 2019-06-14 19:30:00 as a POSIXct element.
data:
structure(list(dateHourMinute = c("201906141930", "201906141931",
"201906141932", "201906141933", "201906141934", "201906141935",
"201906141936", "201906141937", "201906141938", "201906141939",
"201906141940", "201906141941", "201906141942", "201906141943",
"201906141944", "201906141945", "201906141946", "201906141947",
"201906141948", "201906141949", "201906141950", "201906141951",
"201906141952", "201906141953", "201906141954", "201906141955",
"201906141956", "201906141957", "201906141958", "201906141959",
"201906142000", "201906142001", "201906142002", "201906142003",
"201906142004", "201906142005", "201906142006", "201906142007",
"201906142008", "201906142009", "201906142010", "201906142011",
"201906142012", "201906142013", "201906142014", "201906142015",
"201906142016", "201906142017", "201906142018", "201906142019",
"201906142020", "201906142021", "201906142022", "201906142023",
"201906142024", "201906142025", "201906142026", "201906142027",
"201906142028", "201906142029", "201906142030", "201906142031",
"201906142032", "201906142033", "201906142034", "201906142035",
"201906142036", "201906142037", "201906142038", "201906142039",
"201906142040", "201906142041", "201906142042", "201906142043",
"201906142044", "201906142045", "201906142046", "201906142047",
"201906142048", "201906142049", "201906142050", "201906142051",
"201906142052", "201906142053", "201906142054", "201906142055",
"201906142056", "201906142057", "201906142058", "201906142059",
"201906142100", "201906142101", "201906142102", "201906142103",
"201906142104", "201906142105", "201906142106", "201906142107",
"201906142108", "201906142109", "201906142110", "201906142111",
"201906142112", "201906142113", "201906142114", "201906142115",
"201906142116", "201906142117", "201906142118", "201906142119",
"201906142120", "201906142121", "201906142122", "201906142123",
"201906142124", "201906142125", "201906142126", "201906142127",
"201906142128", "201906142129", "201906142130", "201906142131",
"201906142132", "201906142133", "201906142134", "201906142135",
"201906142136", "201906142137", "201906142138", "201906142139",
"201906142140", "201906142141", "201906142142", "201906142143",
"201906142144", "201906142145", "201906142146", "201906142147",
"201906142148", "201906142149", "201906142150", "201906142151",
"201906142152", "201906142153", "201906142154", "201906142155"
), users = c(2894, 2969, 3031, 2912, 2845, 2837, 2832, 2731,
2784, 2681, 2682, 2614, 2569, 2551, 2580, 2588, 2574, 2458, 2419,
2504, 2430, 2401, 2322, 2252, 2329, 2374, 2201, 2142, 2163, 2133,
2087, 2078, 2053, 2206, 2093, 2091, 2045, 2059, 1945, 1943, 1951,
1972, 1899, 1822, 1841, 1906, 1778, 2148, 3297, 2098, 1801, 1650,
1630, 1626, 1674, 1647, 1633, 1671, 1757, 1862, 1968, 2045, 2119,
2396, 2513, 2394, 2375, 2492, 2488, 2381, 2417, 2337, 2243, 2211,
1999, 2021, 2037, 2418, 2254, 2050, 2004, 1944, 1802, 1718, 1726,
1725, 1641, 1657, 1592, 1604, 1551, 1553, 1486, 1481, 1518, 1479,
1310, 1317, 1329, 1259, 1255, 1259, 1407, 1352, 1250, 1250, 1223,
1149, 1103, 1108, 1025, 1165, 1870, 1452, 1418, 1469, 1522, 1303,
1147, 1060, 1004, 1001, 1003, 983, 894, 870, 882, 863, 832, 790,
819, 732, 751, 752, 694, 692, 926, 862, 755, 736, 796, 803, 771,
869, 745, 709)), row.names = c(NA, -146L), totals = list(list(
users = "2016665")), minimums = list(list(users = "1")), maximums = list(
list(users = "11863")), isDataGolden = TRUE, rowCount = 2875L, class = "data.frame")
You could use :
df$dateHourMinute <- as.POSIXct(df$dateHourMinute,format = "%Y%m%d%H%M", tz = "UTC")
#Or with `strptime`
#df$dateHourMinute <- strptime(df$dateHourMinute, format = "%Y%m%d%H%M", tz = "UTC")
head(df)
# dateHourMinute users
#1 2019-06-14 19:30:00 2894
#2 2019-06-14 19:31:00 2969
#3 2019-06-14 19:32:00 3031
#4 2019-06-14 19:33:00 2912
#5 2019-06-14 19:34:00 2845
#6 2019-06-14 19:35:00 2837
Or with lubridate
df$dateHourMinute <- lubridate::ymd_hm(df$dateHourMinute))

Split time-series between any interval

I have a have time-series at 10 minutes duration. I want sub-series of duration between 23:10:00 - 00:00:00 hours. Here is the dput of data,
df<-structure(c(994, 1019, 1381, 843, 1105, 1120, 869, 2216, 1741,
1737, 1727, 1462, 1564, 418, 281, 280, 277, 311, 242, 221, 328,
359, 410, 436, 359, 1738, 2075, 1766, 1812, 1810, 1246, 323,
250, 272, 283, 286, 252, 1671, 1695, 1687, 1646, 1257, 1632,
277, 305, 292, 261, 309, 304, 209, 210, 225, 201, 197, 247, 264,
238, 260, 254, 263, 226, 624, 1955, 1561, 1231, 976, 1213, 167,
1037, 1269, 1619, 1749, 1674, 1123, 1695, 2164, 1780, 1732, 1715,
283, 230, 291, 281, 137, 1358, 1630, 1626, 1889, 1635, 1591,
1606, 2024, 1783, 1752, 613, 301, 933, 1823, 1831, 1810, 1895,
1876, 1222, 1952, 1288, 282, 261, 296, 839, 1831, 1799, 1950,
2085, 1921, 1862, 1885, 1869, 1909, 1896, 1843), .Dim = c(120L,
1L), .Dimnames = list(NULL, "value"), index = structure(c(1430764200,
1430847600, 1430848200, 1430848800, 1430849400, 1430850000, 1430850600,
1430934000, 1430934600, 1430935200, 1430935800, 1430936400, 1430937000,
1431020400, 1431021000, 1431021600, 1431022200, 1431022800, 1431023400,
1431106800, 1431107400, 1431108000, 1431108600, 1431109200, 1431109800,
1431193200, 1431193800, 1431194400, 1431195000, 1431195600, 1431196200,
1431279600, 1431280200, 1431280800, 1431281400, 1431282000, 1431282600,
1431366000, 1431366600, 1431367200, 1431367800, 1431368400, 1431369000,
1431452400, 1431453000, 1431453600, 1431454200, 1431454800, 1431455400,
1431538800, 1431539400, 1431540000, 1431540600, 1431541200, 1431541800,
1431625200, 1431625800, 1431626400, 1431627000, 1431627600, 1431628200,
1431711600, 1431712200, 1431712800, 1431713400, 1431714000, 1431714600,
1431798000, 1431798600, 1431799200, 1431799800, 1431800400, 1431801000,
1431884400, 1431885000, 1431885600, 1431886200, 1431886800, 1431887400,
1431970800, 1431971400, 1431972000, 1431972600, 1431973200, 1431973800,
1432057200, 1432057800, 1432058400, 1432059000, 1432059600, 1432060200,
1432143600, 1432144200, 1432144800, 1432145400, 1432146000, 1432146600,
1432230000, 1432230600, 1432231200, 1432231800, 1432232400, 1432233000,
1432316400, 1432317000, 1432317600, 1432318200, 1432318800, 1432319400,
1432402800, 1432403400, 1432404000, 1432404600, 1432405200, 1432405800,
1432489200, 1432489800, 1432490400, 1432491000, 1432491600), tclass = c("POSIXct",
"POSIXt"), tzone = "Asia/Kolkata"), .indexCLASS = c("POSIXct",
"POSIXt"), .indexTZ = "Asia/Kolkata", tclass = c("POSIXct", "POSIXt"
), tzone = "Asia/Kolkata", class = c("xts", "zoo"))
Required output is:
Is there any existing function which can do this? I tried split.xts, but was not able to get required form.
You could use xts with only base R or use chained expressions with dplyr and tidyr. Base R's unstack and tidyr's spread both take two columns of data containing key-value pairs and arrange them as separate columns of values for each unique key value. Code would look like:
# base R version
library(xts)
df2 <- unstack(data.frame(value=coredata(df), time = format(index(df), "%H:%M")),
value ~ time)[,c(2:6,1)]
# version using chained expressions with dplyr and tidyr
library(xts)
library(dplyr)
library(tidyr)
df3 <- df %>% fortify.zoo() %>%
mutate(time=format(Index, "%H:%M"), Index=format(Index, "%Y-%m-%d") ) %>%
spread(key=time, value=value) %>%
select(c(3:6,2))

Resources