Related
I have been using dplyr::summarise to sum daily data by month and have just spent a few hours finally figuring out that for whatever reason the decimal places in my raw data was throwing the function off.
Basically, the monthly sums for 'temp1' (which is the 'Mod' data) are wrong (the digits look right but the decimal place is in the wrong place) until I apply a rounding function (rounded to 7 or less digits fixes the problem it seems, 8 just reproduces the problem)
temp1 <- as.data.frame(read.csv("data/ModObs.csv"))
temp1$Date <- as.Date(temp1$Date, "%d/%m/%Y")
((((temp1$Obs <- round(temp1$Obs, 7))))#this line fixes the problem
((((temp1$Mod <- round(temp1$Mod, 7))))#this line fixes the problem
temp1$yearmonth <- lubridate::floor_date(as.Date(temp1$Date), 'month')
temp2 <- dplyr::group_by(temp1, yearmonth)
temp2 <- dplyr::summarise(temp2, Obs = sum(Obs, na.rm=TRUE))
temp1 <- dplyr::group_by(temp1, yearmonth)
temp1 <- dplyr::summarise(temp1, Mod = sum(Mod, na.rm=TRUE))
My question is, is there a way of using dplyr::summarise that means I would not have encountered this issue (which took me quite a while to figure out and solve)?
dput with error:
dput(temp1)
structure(list(yearmonth = structure(c(0, 31, 59, 90, 120, 151,
181, 212, 243, 273, 304, 334, 365, 396, 424, 455, 485, 516, 546,
577, 608, 638, 669, 699, 730, 761, 790, 821, 851, 882, 912, 943,
974, 1004, 1035, 1065, 1096, 1127, 1155, 1186, 1216, 1247, 1277,
1308, 1339, 1369, 1400, 1430, 1461, 1492, 1520, 1551, 1581, 1612,
1642, 1673, 1704, 1734, 1765, 1795, 1826, 1857, 1885, 1916, 1946,
1977, 2007, 2038, 2069, 2099, 2130, 2160, 2191, 2222, 2251, 2282,
2312, 2343, 2373, 2404, 2435, 2465, 2496, 2526, 2557, 2588, 2616,
2647, 2677, 2708, 2738, 2769, 2800, 2830, 2861, 2891, 2922, 2953,
2981, 3012, 3042, 3073, 3103, 3134, 3165, 3195, 3226, 3256, 3287,
3318, 3346, 3377, 3407, 3438, 3468, 3499, 3530, 3560, 3591, 3621,
3652, 3683, 3712, 3743, 3773, 3804, 3834, 3865, 3896, 3926, 3957,
3987, 4018, 4049, 4077, 4108, 4138, 4169, 4199, 4230, 4261, 4291,
4322, 4352, 4383, 4414, 4442, 4473, 4503, 4534, 4564, 4595, 4626,
4656, 4687, 4717, 4748, 4779, 4807, 4838, 4868, 4899, 4929, 4960,
4991, 5021, 5052, 5082, 5113, 5144, 5173, 5204, 5234, 5265, 5295,
5326, 5357, 5387, 5418, 5448, 5479, 5510, 5538, 5569, 5599, 5630,
5660, 5691, 5722, 5752, 5783, 5813, 5844, 5875, 5903, 5934, 5964,
5995, 6025, 6056, 6087, 6117, 6148, 6178, 6209, 6240, 6268, 6299,
6329, 6360, 6390, 6421, 6452, 6482, 6513, 6543, 6574, 6605, 6634,
6665, 6695, 6726, 6756, 6787, 6818, 6848, 6879, 6909, 6940, 6971,
6999, 7030, 7060, 7091, 7121, 7152, 7183, 7213, 7244, 7274), class = "Date"),
Obs = c(29.5, 1.6, 225.9, 305, 485.9, 392.6, 86.7, 422.1,
262.6, 22.6, 12.7, 40.3, 96.5, 1.4, 0, 40.1, 251.9, 494.4,
181.6, 86, 69.6, 128.4, 560.9, 55.8, 14.2, 94.1, 8.9, 28.2,
10.3, 1.4, 2.6, 9, 14.2, 5.6, 9.5, 1.6, 0, 195.1, 24.3, 18.9,
26, 34.3, 68.1, 144.1, 213.3, 99.2, 36.1, 17, 19.5, 13.3,
0, 352.7, 2812.7, 82, 311.9, 314.2, 397.5, 783.7, 388.7,
40.4, 1.3, 0, 2.1, 2.9, 2.1, 8.6, 1.2, 260.8, 494.4, 1125.1,
983.3, 20.9, 3.2, 0, 0.1, 1.1, 0.6, 32.9, 16.2, 124.1, 204.3,
340.6, 212.6, 42.8, 56.7, 2.2, 22.6, 78.3, 100.2, 786.4,
990.7, 330.2, 119.9, 2.7, 3.5, 5.7, 9.4, 17.6, 104.2, 168.9,
43, 118.4, 69.7, 479.3, 435.4, 101.4, 52.9, 104, 28.6, 0.8,
1.8, 13.1, 54.4, 26.2, 6.3, 24.8, 30.7, 53.9, 1.4, 0, 4.6,
0, 1.5, 14.5, 67.3, 217.4, 460.5, 251.2, 72.3, 29.3, 57.7,
29.7, 1.4, 1, 30.4, 3.6, 164.2, 378.4, 861.1, 982.5, 116.5,
68.4, 33.9, 1.5, 52.3, 7.5, 18.2, 70.4, 75.9, 158.9, 26.1,
2.3, 6, 8.4, 0.1, 43.9, 8.3, 0, 0.7, 8.6, 38.3, 24.2, 110.1,
164.1, 239, 120.8, 23.9, 24.7, 1.8, 1.4, 54.7, 75.6, 11.6,
19.6, 69.4, 199.7, 648.3, 260.9, 53.5, 4.5, 0.5, 0, 0.6,
60.9, 9, 130.9, 61.7, 539.5, 222.1, 31.6, 19.8, 288.6, 83.3,
2, 1.8, 104.4, 214, 108.3, 504.2, 152.6, 110.2, 103.3, 14.7,
128.2, 3.5, 2.5, 7.2, 47.4, 73.6, 116.2, 150.6, 161.1, 58.8,
32.9, 12.1, 33.3, 31.3, 0.7, 39.7, 0.3, 26, 102.2, 55.2,
46.3, 62.3, 15.4, 200.2, 98.9, 35.5, 0.4, 80.3, 286.5, 348.8,
646.5, 340.3, 1048.5, 558.6, 365.5, 129.4, 3.7), Mod = c(58.456732574,
0.647399496, 106.77816386, 267.838017351, 599.939323463,
250.80934844, 113.281660213, 241.663996002, 127.530387061,
52.687410089, 84.890244021, 41.364802773, 59.23208781, 8.497558874,
0.672761812, 15.465132304, 358.926445816, 399.9093607, 97.971842098,
42.72450411, 78.475537521, 267.696647395, 1499.730009232,
164.134543701, 15.739950594, 117.176571603, 0.29960511, 33.153451885,
71.35707594, 1.976493212, 38.99406048, 58.699745671, 88.893788732,
55.590919209, 17.675911123, 0.323688533, 0.802922429, 255.339027286,
86.973361482, 56.672316286, 195.494804037, 113.402888496,
88.016557451, 146.313739207, 141.11162499, 309.49712486,
42.342303882, 32.801816137, 9.804984811, 14.876734504, 0.741273571,
432.148407136, 2516.875488309, 47.539316029, 269.405152962,
183.64372206, 154.563624943, 467.720012557, 153.054373772,
32.514885627, 1.830055421, 0.066762771, 1.044433442, 1.346976081,
17.458179607, 49.907434727, 53.305731876, 353.57856375, 310.529543548,
962.398015832, 344.181844335, 30.810939684, 8.040785393,
0.377896164, 0.798674902, 0.801987649, 0.691369382, 13.928109124,
12.703685263, 137.85141766, 211.000002457, 444.374773665,
187.06473363, 56.579158088, 79.307114494, 1.185915374, 7.450495202,
350.92445957, 168.333585374, 1103.415013415, 530.738230571,
97.400577403, 120.218466778, 26.53863178, 4.95759286, 1.415953207,
1.349259407, 7.598631896, 31.687964985, 111.63067543, 253.033200389,
260.084267318, 174.328538378, 435.075601539, 266.057507136,
169.491413576, 370.501536962, 325.734910145, 52.804905885,
0.637640491, 0.749393501, 61.044014158, 236.352010674, 69.236802018,
50.981912279, 113.755615714, 68.015519965, 228.376481539,
1.35494224, 0.370446501, 1.782754512, 0.200525121, 0.567715904,
2.562157517, 66.360280078, 537.409598471, 548.857756317,
131.818783821, 79.38907511, 216.219977069, 86.85853468, 91.042441797,
1.310072508, 4.6784498, 49.359916771, 12.139100379, 792.36086926,
360.543361637, 674.819587278, 417.14625705, 71.213853069,
88.470327459, 26.501678301, 0.660446628, 1.983262203, 0.053393889,
2.116900185, 103.974625465, 146.1406309, 241.456322328, 41.761031962,
19.165518836, 61.329157567, 77.461701504, 0.717057613, 60.244865985,
2.038796249, 0.024612503, 1.15999722, 0.987871135, 66.752973657,
101.602951298, 207.507552152, 351.874694806, 239.490966404,
373.402963887, 83.392418938, 96.713198206, 2.471314963, 1.789748376,
23.642411238, 274.437164678, 27.196302352, 122.879115856,
203.243972815, 261.450286079, 674.337097864, 201.592587766,
66.457305017, 14.265446489, 11.688820111, 0.287104024, 0.498545345,
10.595714786, 64.36811409, 147.944544256, 105.263660789,
348.781394762, 115.965911604, 81.556952547, 35.877763907,
293.156577573, 122.052605838, 0.891615203, 0.201455399, 17.693507458,
191.862026713, 93.030313466, 379.074639489, 88.590763754,
138.225716958, 438.407332197, 11.107850781, 175.835916749,
0.793580574, 0.755295219, 1.253581528, 2.175241521, 126.829190302,
167.624256025, 261.538659971, 143.633607733, 58.216055381,
101.857571372, 37.192461414, 112.344312062, 100.262190061,
2.378722279, 4.946631624, 0.435401092, 51.481605801, 155.384067186,
153.115869623, 128.316180053, 153.07003862, 80.585921934,
221.89445498, 62.047224666, 105.157734971, 0.203008456, 51.168132113,
310.567488885, 238.329914336, 783.724869869, 193.016695288,
431.815627948, 143.854730373, 358.082995503, 84.620059176,
1.146042245)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-240L))
Your data is exactly the same before and after the rounding.
library(dplyr)
temp1 <- as.data.frame(read.csv("C:/Users/xxx/Documents/modops.csv"))
temp1$Date <- as.Date(temp1$Date, "%d/%m/%Y")
temp1$yearmonth <- lubridate::floor_date(as.Date(temp1$Date), 'month')
notfixed <- dplyr::group_by(temp1, yearmonth) %>% summarise(Mod = sum(Mod, na.rm=TRUE))
temp1$Obs <- round(temp1$Obs, 7)#this line fixes the problem
fixed <- dplyr::group_by(temp1, yearmonth) %>% summarise(Mod = sum(Mod, na.rm=TRUE))
> identical(fixed, notfixed)
[1] TRUE
I have a data frame with 4000 columns and daily observations sorted by time. I want to create new columns that lag all existing columns 50 times in the past. So for a column Y create 50 additional columns that are Y-1day,Y-2days,Y-3days...Y-50days.
So far I've wrapped the following loop which does what I need to make.
The issue is that it's not very fast. Is there a more efficient way I can test?
for(i in 2:ncol(Data)){
for(j in 1:50){
Data<- slide(Data, Var = names(Data[i]), slideBy = -j)
}}
I'm attaching a snapshot of my data frame for reproducible example:
structure(list(time = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92,
93, 94, 95, 96, 97, 98, 99, 100), A = c(17.081545, 16.630901,
16.623749, 16.258942, 16.244635, 16.165951, 15.886981, 15.865522,
15.529327, 15.772532, 16.04435, 15.779685, 15.915594, 15.593705,
15.336195, 15.593705, 15.736767, 15.736767, 15.457797, 15.815451,
16.108727, 16.237482, 15.808297, 16.058655, 16.53791, 16.988556,
16.516453, 16.480686, 16.967096, 17.181688, 17.446352, 17.11731,
16.952789, 16.8598, 16.795422, 16.437769, 16.587982, 16.845493,
17.167381, 17.510729, 17.410587, 17.474964, 17.246065, 17.703863,
17.424892, 17.174536, 17.103004, 16.695278, 16.93133, 16.638054,
16.115879, 16.20887, 15.987124, 16.151646, 16.151646, 16.115879,
16.173105, 16.101574, 16.080114, 15.9299, 15.879828, 15.786839,
15.314735, 15.27897, 15.493563, 15.436337, 15.286123, 15.121602,
15.27897, 14.88555, 14.785408, 14.592275, 14.785408, 14.856938,
14.670959, 15.243204, 15.09299, 15.250358, 15.264664, 15.18598,
14.771102, 14.842632, 15, 15.150214, 15.200286, 15.078684, 15.379113,
15.658083, 15.636623, 15.879828, 15.715307, 15.729613, 15.422031,
16.080114, 16.39485, 16.502146, 16.74535, 16.902718, 17.088697,
16.831188), AAP = c(29.033333, 28.84, 28.893333, 28.866667, 28.700001,
28.799999, 28.973333, 28.866667, 28.806667, 28.973333, 29.713333,
29.033333, 28.626667, 28.546667, 28.173334, 28.166666, 28.24,
28.553333, 28.366667, 28.733334, 28.833334, 28.9, 29.166666,
29.846666, 30.08, 30.093334, 29.673334, 29.860001, 30.053333,
30.186666, 29.833334, 29.673334, 34.533333, 33.82, 33.373333,
33.633335, 33.593334, 33.833332, 33.586666, 33.946667, 34.66,
34.599998, 34.84, 34.779999, 34.093334, 33.713333, 33.560001,
33.933334, 33.086666, 33.139999, 33.279999, 33.200001, 33.259998,
32.466667, 32.713333, 32.686668, 33.053333, 33.806667, 33.333332,
33.613335, 33.633335, 33.799999, 34.206665, 34.5, 34.166668,
34.206665, 33.933334, 34, 34.373333, 33.700001, 33.173332, 32.633335,
32.639999, 34.013332, 33.566666, 34.053333, 34.053333, 34.826668,
35.106667, 35.68, 35.653332, 35.566666, 35.380001, 35.419998,
35.966667, 36.573334, 36.673332, 36.486668, 36.286667, 36.099998,
35.433334, 35.419998, 35.84, 36.533333, 36.779999, 38.98, 39.633335,
39.646667, 39.486668, 39.433334), AAPL = c(4.520714, 4.567143,
4.607143, 4.610714, 4.946429, 4.925714, 4.611429, 4.675714, 4.985714,
5.014286, 5.046429, 4.991428, 5.032857, 5.035, 5.054286, 5.146429,
5.160714, 5.188571, 5.284286, 5.492857, 5.537857, 5.687857, 5.557857,
5.631429, 5.638571, 5.778572, 5.624286, 5.597143, 5.800714, 6.045,
6.315, 6.437857, 6.272143, 6.200714, 6.092143, 6.302143, 6.352143,
6.356429, 6.408571, 6.357143, 6.302857, 5.97, 6.115714, 6.107143,
5.79, 5.621428, 5.69, 5.752857, 5.76, 5.851429, 5.882857, 6.035714,
6.137143, 6.242857, 6.118571, 6.078571, 6.071429, 6.075714, 5.964286,
6.114286, 5.952857, 5.841429, 5.87, 5.984286, 6.047143, 6.222857,
6.248571, 5.988572, 6.094285, 5.862857, 5.322857, 5.05, 5.088572,
5.298572, 5.072857, 5.311429, 5.071429, 5.282857, 5.17, 5.135714,
5.077143, 5.151429, 5.204286, 5.172857, 5.307143, 5.24, 5.32,
5.281428, 5.202857, 5.087143, 4.875714, 4.967143, 5.078571, 5.051429,
5.12, 5.364286, 5.364286, 5.68, 5.671429, 5.682857), ABC = c(14.5375,
14.4225, 14.395, 14.5175, 14.475, 14.475, 14.51, 14.515, 14.275,
14.3175, 14.4875, 14.375, 14.5025, 14.2525, 14.3925, 14.13, 14.47,
14.365, 14.5925, 14.57, 14.74, 14.71, 14.995, 14.9, 14.8625,
15.0325, 14.78, 14.875, 15.085, 15.0525, 15.4275, 15.3075, 14.9225,
15, 14.7025, 14.7975, 15, 15, 14.975, 15.3775, 15.435, 15.5325,
15.6625, 15.6575, 15.695, 15.1275, 15.1025, 15.0775, 15.265,
15.0325, 14.905, 15.1975, 15.215, 15.2025, 15.1025, 15.3775,
15.2775, 13.5075, 13.5275, 13.95, 14.3225, 14.09, 14.4275, 14.735,
14.6475, 14.8, 14.4575, 14.62, 14.7525, 14.7, 14.9, 15.125, 14.83,
14.9525, 14.825, 14.9625, 15, 14.975, 14.9675, 15.0975, 15.0875,
15.32, 15.5125, 15.38, 15.51, 15.575, 15.7475, 15.9975, 15.9175,
15.895, 15.955, 15.98, 16.209999, 16.459999, 16.5725, 16.514999,
16.4925, 16.5, 16.495001, 16.4825), ABMD = c(15.01, 14.98, 14.69,
14.52, 14.29, 14.42, 14.31, 14.17, 12.45, 12.05, 11.87, 11.97,
11.41, 11.16, 11.06, 11.2, 11.1, 11.57, 11.43, 11.88, 11.58,
11.12, 11.16, 11.32, 10.97, 10.88, 10.72, 10.3, 10.75, 10.25,
10.29, 10.41, 10.02, 10.05, 10.08, 10, 10.24, 10.89, 10.7, 10.8,
10.66, 10.71, 11.12, 11.18, 11.2, 10.95, 11.07, 11.12, 11.3,
11.19, 10.83, 10.56, 10.37, 10.47, 10.33, 10.17, 10.51, 10.4,
10.56, 10.74, 10.58, 10.6, 10.57, 10.71, 11.23, 11.28, 11.51,
11.15, 10.98, 10.98, 11.05, 10.76, 10.96, 11.1, 10.62, 11.1,
10.53, 10.69, 10.65, 10.73, 10.15, 10.15, 9.52, 9.6, 9.6, 9.52,
9.47, 9.44, 9.35, 9.27, 9.13, 8.92, 9.26, 9.45, 9.97, 10.25,
10.28, 9.99, 10.16, 10.17), ABT = c(22.392265, 22.166759, 21.912466,
22.40666, 22.790501, 23.011208, 22.588984, 22.517014, 22.085194,
22.19075, 22.089993, 22.09479, 21.95085, 22.061205, 22.037214,
22.027618, 22.018023, 21.811708, 21.720547, 21.600595, 21.854891,
21.898071, 21.907667, 21.840496, 21.874083, 21.725344, 21.667768,
21.581404, 22.166759, 22.305902, 22.488226, 22.469034, 22.339487,
22.26272, 21.802113, 21.946053, 22.243528, 22.200346, 22.066002,
22.051607, 22.099588, 22.075598, 22.267517, 22.382669, 22.310699,
22.02282, 22.209942, 22.070801, 22.128376, 21.907667, 21.792517,
21.365494, 21.336706, 21.048826, 20.996048, 21.39908, 21.562212,
21.677364, 21.95085, 22.430651, 22.368277, 22.161963, 22.157164,
22.646561, 22.843279, 23.19833, 22.963228, 22.91045, 22.98242,
23.049591, 23.169542, 23.927626, 23.500605, 23.111965, 22.69454,
23.078381, 22.824085, 22.920046, 23.001612, 23.255905, 23.073582,
23.586967, 23.692524, 23.634949, 23.850859, 23.601362, 23.519796,
23.543785, 23.438231, 23.634949, 23.567776, 23.395048, 23.735706,
23.706919, 23.678129, 23.529392, 23.452623, 23.366261, 23.351866,
23.145552), ACN = c(26.370001, 25.75, 25.65, 25.42, 26.610001,
26.959999, 26.5, 26.389999, 26.18, 26.290001, 26.1, 26, 25.67,
25.16, 24.9, 25.200001, 25.4, 25.68, 25.6, 26.049999, 25.99,
25.83, 25.48, 25.73, 25.77, 25.85, 25.51, 25.42, 25.200001, 24.639999,
24.9, 25.049999, 24.51, 24.9, 24.799999, 24.709999, 24.48, 25.15,
25.549999, 25.59, 25.42, 25.110001, 25.370001, 25.49, 25.32,
25.17, 24.950001, 24.459999, 24.48, 23.98, 24.030001, 23.950001,
23.66, 24.01, 24.280001, 24.299999, 24.4, 24.57, 24.16, 24.559999,
24.15, 24.440001, 24.35, 24.860001, 24.969999, 24.889999, 23.700001,
23.34, 23.440001, 23.120001, 22.860001, 22.5, 22.57, 22.440001,
21.9, 21.959999, 21.75, 21.85, 21.549999, 21.469999, 21.620001,
21.700001, 21.969999, 22.1, 22.1, 21.82, 22, 22.08, 21.860001,
21.92, 21.99, 22.049999, 22.01, 22.049999, 22.5, 22.790001, 22.719999,
22.76, 22.67, 22.34), ADBE = c(30.844999, 30.030001, 29.865,
29.370001, 29.389999, 29.41, 29.059999, 29.49, 29.110001, 29.115,
29.190001, 28.940001, 29.035, 28.535, 27.695, 27.790001, 28.004999,
28.084999, 27.74, 28.450001, 28.950001, 31.145, 31.709999, 31.995001,
31.76, 31.85, 31.295, 31.34, 31.85, 31.735001, 32.455002, 32.299999,
31.535, 31.415001, 30.754999, 30.875, 30.695, 30.715, 30.875,
31.17, 31.174999, 31.174999, 31.885, 32.535, 32.474998, 32.255001,
32.654999, 32.209999, 32.669998, 32.27, 31.594999, 31.945, 33.904999,
33.349998, 33.18, 33.134998, 33.27, 33.555, 33.110001, 33.865002,
33.584999, 33.380001, 33.290001, 33.424999, 34.049999, 34.195,
33.630001, 33.400002, 33.450001, 32.535, 31.74, 30.33, 27.385,
29.049999, 28.625, 29.77, 30.145, 30.02, 29.559999, 29.225, 29.235001,
29.735001, 28.575001, 28.645, 28.775, 28.459999, 28.85, 29.334999,
28.76, 28.965, 28.889999, 29.049999, 29.955, 29.889999, 30.549999,
31.059999, 31.115, 31.360001, 32.419998, 32.759998), ADI = c(36.389999,
35.400002, 35.560001, 35.5, 35.549999, 35.41, 35.080002, 35.560001,
35.099998, 35.639999, 36.07, 35.139999, 34.650002, 34.470001,
34.049999, 34.299999, 34.880001, 34.830002, 34.740002, 35.889999,
35.990002, 36.009998, 35.240002, 37.52, 37.52, 38.02, 37.18,
36.830002, 38.049999, 37.599998, 37.32, 37.130001, 36.700001,
36.299999, 36.5, 36.59, 37.32, 37.5, 36.720001, 38, 37.709999,
36.93, 37.119999, 37.049999, 36.950001, 36.919998, 37.849998,
37.130001, 37.209999, 36.57, 35.919998, 36.02, 35.830002, 35.709999,
35.830002, 36.23, 35.799999, 35.66, 35.119999, 36.330002, 36.139999,
35.709999, 35.599998, 35.310001, 35.41, 36.09, 35.669998, 35.34,
34.93, 34.099998, 33.650002, 32.84, 33.360001, 33.849998, 33.419998,
34.349998, 33.799999, 33.700001, 33.52, 33.360001, 33.52, 34.110001,
33.849998, 33.669998, 34.560001, 34.619999, 34.619999, 34.549999,
34.130001, 34.060001, 34.310001, 35.490002, 36.419998, 36.700001,
36.860001, 36.889999, 37.080002, 36.529999, 36.849998, 36.290001
)), row.names = c(NA, 100L), class = "data.frame")
We can use shift from data.table which can take a vector of values for n
library(data.table)
setDT(Data)
out <- Data[, shift(.SD, n = 1:50), .SDcols = -1]
names(out) <- paste0(rep(names(Data)[-1], each = 50), "_", 1:50, "days")
Data[, names(out) := out][]
I am trying to perform 6 months forecasting over production data for three power plants, I built my data as an hts object that has 3 levels. However, when I am performing the forecast function and then try to see the accuracy using test data I get the following error: "Error in x - fcasts: non-conformable arrays"
Furthermore, when I try to apply the "arima" as a forecasting method on the hts object I get the following (the warning message is repeated 9 times, as I have 9 time series in the hts object):
forecasts <- forecast(data,h = 6 , method = "bu" , fmethod = "arima")
I used the following instructions to get the hts object:
and the data has the following structure:
I am not sure where I am going wrong. Anyone can help with some thoughts??
Thank you!
The data:
structure(list(LarGroup1 = c(188.3, 187.2, 94.7, 109.2, 202.7,
146.6, 121.9, 151.3, 111.1, 103.4, 188.1, 168.1, 233.9, 230.7,
187.1, 0, 98.9, 173.5, 149.4, 168.6, 4.7, 14.8, 91.8, 166.5,
170.5, 123.6, 85.2, 64.4), LarGroup2 = c(159.1, 127.7, 210.3,
199.8, 113, 143.4, 144.5, 83.8, 41.6, 35.1, 95.2, 178.2, 241.1,
236.4, 181.9, 194.3, 196.1, 92.4, 154.6, 78.9, 35.7, 0, 74.5,
75.1, 140, 142.5, 3.8, 17.5), RibGroup1 = c(49.4, 102.4, 50.8,
118.8, 108.4, 139.5, 121.7, 69.6, 53.4, 28, 113.3, 96.3, 70.8,
124.4, 54.4, 128.7, 63.3, 2.1, 41.3, 0.4, 0.6, 0, 5.4, 57.9,
9.9, 30, 221, 167.2), RibGroup2 = c(32.7, 32, 98.1, 6.3, 85.5,
96.6, 41.1, 44.9, 50.4, 27.3, 0, 45.4, 199.1, 179.2, 86.1, 0,
58.4, 43.3, 41.8, 42.1, 22.1, 11.8, 71.8, 112, 204.1, 40.9, 24.5,
210.9), RibGroup3 = c(90.8, 15.4, 10.5, 124.4, 33.9, 8.4, 38.3,
56.9, 13.5, 0, 32.6, 132.8, 160.7, 168.7, 60.7, 131.9, 110.8,
29.2, 131.3, 62.1, 6.1, 0, 0, 3.4, 23.9, 192.7, 165.5, 0), SinGroup1 = c(235.2,
225.4, 226.1, 234.4, 222.1, 232.3, 233.4, 201.9, 195.3, 209.4,
233.6, 223.6, 222.2, 232, 224, 149.8, 201.6, 220.2, 203.1, 212.1,
71.9, 82.3, 183.2, 210.6, 198.6, 230.8, 218, 163.2), SinGroup2 = c(233.4,
225.6, 227, 51.6, 76, 230.7, 233.1, 202.7, 200.2, 207.2, 228.4,
226.2, 183.9, 230.4, 222.3, 227.7, 177.9, 152, 218.6, 210.6,
80.9, 63.2, 188.1, 209.5, 233.2, 210.1, 226.5, 200.5), SinGroup3 = c(233.2,
188.5, 226.9, 234.7, 222.8, 234.6, 220.6, 156.4, 209.2, 218.7,
232.9, 226.1, 215.4, 231, 222.7, 222.7, 183.7, 203.8, 216.8,
112, 0, 39.6, 180.8, 203.6, 221.1, 228.9, 202.8, 186.7), SinGroup4 = c(218,
215.5, 226.8, 235.6, 223.6, 234.8, 234.9, 69.3, 192, 207.8, 235.2,
217.2, 235.1, 231.8, 223.5, 230.5, 225.6, 220.1, 220, 211.9,
114.8, 44.5, 158.5, 206.3, 231.8, 179, 225.3, 198.6)), class = "data.frame", row.names = c(NA,
-28L))
In the accuracy function, you need to include test data, not training data. You ask for 6 steps ahead, but your test data only consists of 4 time periods.
The seasonal differencing error suggests you are using an old version of the forecast package. Please update your packages.
The following code works using current CRAN packages (forecast v8.4, hts v
library(hts)
Production_data <- data.frame(
LarGroup1 = c(
188.3, 187.2, 94.7, 109.2, 202.7,
146.6, 121.9, 151.3, 111.1, 103.4, 188.1, 168.1, 233.9, 230.7,
187.1, 0, 98.9, 173.5, 149.4, 168.6, 4.7, 14.8, 91.8, 166.5,
170.5, 123.6, 85.2, 64.4
), LarGroup2 = c(
159.1, 127.7, 210.3,
199.8, 113, 143.4, 144.5, 83.8, 41.6, 35.1, 95.2, 178.2, 241.1,
236.4, 181.9, 194.3, 196.1, 92.4, 154.6, 78.9, 35.7, 0, 74.5,
75.1, 140, 142.5, 3.8, 17.5
), RibGroup1 = c(
49.4, 102.4, 50.8,
118.8, 108.4, 139.5, 121.7, 69.6, 53.4, 28, 113.3, 96.3, 70.8,
124.4, 54.4, 128.7, 63.3, 2.1, 41.3, 0.4, 0.6, 0, 5.4, 57.9,
9.9, 30, 221, 167.2
), RibGroup2 = c(
32.7, 32, 98.1, 6.3, 85.5,
96.6, 41.1, 44.9, 50.4, 27.3, 0, 45.4, 199.1, 179.2, 86.1, 0,
58.4, 43.3, 41.8, 42.1, 22.1, 11.8, 71.8, 112, 204.1, 40.9, 24.5,
210.9
), RibGroup3 = c(
90.8, 15.4, 10.5, 124.4, 33.9, 8.4, 38.3,
56.9, 13.5, 0, 32.6, 132.8, 160.7, 168.7, 60.7, 131.9, 110.8,
29.2, 131.3, 62.1, 6.1, 0, 0, 3.4, 23.9, 192.7, 165.5, 0
), SinGroup1 = c(
235.2,
225.4, 226.1, 234.4, 222.1, 232.3, 233.4, 201.9, 195.3, 209.4,
233.6, 223.6, 222.2, 232, 224, 149.8, 201.6, 220.2, 203.1, 212.1,
71.9, 82.3, 183.2, 210.6, 198.6, 230.8, 218, 163.2
), SinGroup2 = c(
233.4,
225.6, 227, 51.6, 76, 230.7, 233.1, 202.7, 200.2, 207.2, 228.4,
226.2, 183.9, 230.4, 222.3, 227.7, 177.9, 152, 218.6, 210.6,
80.9, 63.2, 188.1, 209.5, 233.2, 210.1, 226.5, 200.5
), SinGroup3 = c(
233.2,
188.5, 226.9, 234.7, 222.8, 234.6, 220.6, 156.4, 209.2, 218.7,
232.9, 226.1, 215.4, 231, 222.7, 222.7, 183.7, 203.8, 216.8,
112, 0, 39.6, 180.8, 203.6, 221.1, 228.9, 202.8, 186.7
), SinGroup4 = c(
218,
215.5, 226.8, 235.6, 223.6, 234.8, 234.9, 69.3, 192, 207.8, 235.2,
217.2, 235.1, 231.8, 223.5, 230.5, 225.6, 220.1, 220, 211.9,
114.8, 44.5, 158.5, 206.3, 231.8, 179, 225.3, 198.6
)
)
Production_data_ts <- ts(Production_data, frequency = 12, start = c(2016, 7))
Production_data_hts <- hts(Production_data_ts, characters = c(3, 6))
data <- window(Production_data_hts, start = c(2016, 7), end = c(2018, 6))
test <- window(Production_data_hts, start = c(2018, 7), end = c(2018, 10))
forecasts <- forecast(data, h = 4, method = "bu")
accuracy(forecasts, test)
I have a data frame with Lat Lon mean_wind and wind_dir in each grid cells.
I am trying to make a spatial plot with mean wind in background and wind direction as arrow on each grid cells.
I have tried following on sample data-frame wind.dt
win.plt<- ggplot(wind.dt,aes(x=Lon,y=Lat))+
#Mean wind plot : OK
geom_tile(aes(fill=mean_wind),alpha=1)+
geom_tile(aes(color=mean_wind), fill=NA) +
scale_fill_gradientn(colours=(brewer.pal(9,rev("RdYlGn"))))+
scale_color_gradientn(colours=(brewer.pal(9,rev("RdYlGn"))),guide=F)
#Wind Direction : doesnot work
geom_segment(arrow = arrow(),aes(yend = Lon + wind_dir, xend = Lat + wind_dir))
win.plt
wind.dt<-structure(list(Lon = c(170.25, 171, 171.75, 172.5, 173.25, 174,
174.75, 175.5, 176.25, 177, 177.75, 178.5, 179.25, 180, 180.75,
181.5, 182.25, 183, 183.75, 184.5, 185.25, 186, 186.75, 187.5,
188.25, 189, 189.75, 190.5, 191.25, 192, 192.75, 193.5, 194.25,
170.25, 171, 171.75, 172.5, 173.25, 174, 174.75, 175.5, 176.25,
177, 177.75, 178.5, 179.25, 180, 180.75, 181.5, 182.25, 183,
183.75, 184.5, 185.25, 186, 186.75, 187.5, 188.25, 189, 189.75,
190.5, 191.25, 192, 192.75, 193.5, 194.25, 170.25, 171, 171.75,
172.5, 173.25, 174, 174.75, 175.5, 176.25, 177, 177.75, 178.5,
179.25, 180, 180.75, 181.5, 182.25, 183, 183.75, 184.5, 185.25,
186, 186.75, 187.5, 188.25, 189, 189.75, 190.5, 191.25, 192,
192.75, 193.5, 194.25, 170.25, 171, 171.75, 172.5, 173.25, 174,
174.75, 175.5, 176.25, 177, 177.75, 178.5, 179.25, 180, 180.75,
181.5, 182.25, 183, 183.75, 184.5, 185.25, 186, 186.75, 187.5,
188.25, 189, 189.75, 190.5, 191.25, 192, 192.75, 193.5, 194.25,
170.25, 171, 171.75, 172.5, 173.25, 174, 174.75, 175.5, 176.25,
177, 177.75, 178.5, 179.25, 180, 180.75, 181.5, 182.25, 183,
183.75, 184.5, 185.25, 186, 186.75, 187.5, 188.25, 189, 189.75,
190.5, 191.25, 192, 192.75, 193.5, 194.25, 170.25, 171, 171.75,
172.5, 173.25, 174, 174.75, 175.5, 176.25, 177, 177.75, 178.5,
179.25, 180, 180.75, 181.5, 182.25, 183, 183.75, 184.5, 185.25,
186, 186.75, 187.5, 188.25, 189, 189.75, 190.5, 191.25, 192,
192.75, 193.5, 194.25), Lat = c(14.25, 14.25, 14.25, 14.25, 14.25,
14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25,
14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25,
14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25, 14.25,
14.25, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5,
13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5,
13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13.5,
13.5, 13.5, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75,
12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75,
12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75,
12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12.75, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11.25,
11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25,
11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25,
11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25, 11.25,
11.25, 11.25, 11.25, 11.25, 11.25, 10.5, 10.5, 10.5, 10.5, 10.5,
10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5,
10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5, 10.5,
10.5, 10.5, 10.5, 10.5, 10.5, 10.5), mean_wind = c(8.34, 8.33,
8.31, 8.29, 8.27, 8.24, 8.22, 8.2, 8.19, 8.16, 8.14, 8.13, 8.1,
8.08, 8.06, 8.02, 7.99, 7.96, 7.93, 7.89, 7.85, 7.81, 7.78, 7.73,
7.7, 7.67, 7.63, 7.62, 7.6, 7.58, 7.56, 7.53, 7.54, 8.65, 8.64,
8.61, 8.59, 8.56, 8.53, 8.51, 8.48, 8.46, 8.43, 8.41, 8.39, 8.38,
8.37, 8.33, 8.31, 8.28, 8.24, 8.2, 8.15, 8.12, 8.07, 8.03, 8.01,
7.97, 7.94, 7.92, 7.89, 7.87, 7.85, 7.85, 7.83, 7.8, 8.85, 8.84,
8.81, 8.8, 8.77, 8.74, 8.72, 8.69, 8.67, 8.65, 8.63, 8.61, 8.59,
8.58, 8.55, 8.54, 8.5, 8.46, 8.44, 8.4, 8.37, 8.33, 8.29, 8.26,
8.21, 8.18, 8.16, 8.13, 8.12, 8.09, 8.06, 8.06, 8.03, 9.01, 8.99,
8.96, 8.94, 8.91, 8.89, 8.86, 8.83, 8.82, 8.79, 8.78, 8.77, 8.75,
8.75, 8.73, 8.7, 8.68, 8.66, 8.63, 8.59, 8.55, 8.52, 8.47, 8.43,
8.4, 8.38, 8.35, 8.32, 8.31, 8.29, 8.26, 8.25, 8.23, 9.07, 9.06,
9.04, 9.01, 8.99, 8.97, 8.94, 8.92, 8.91, 8.9, 8.89, 8.88, 8.88,
8.87, 8.86, 8.84, 8.83, 8.8, 8.75, 8.74, 8.7, 8.67, 8.63, 8.59,
8.57, 8.53, 8.52, 8.51, 8.47, 8.47, 8.45, 8.42, 8.41, 9.1, 9.08,
9.06, 9.04, 9.02, 9, 8.98, 8.97, 8.96, 8.96, 8.95, 8.95, 8.97,
8.96, 8.96, 8.94, 8.91, 8.89, 8.86, 8.84, 8.8, 8.76, 8.73, 8.69,
8.67, 8.64, 8.63, 8.63, 8.61, 8.59, 8.57, 8.54, 8.53), wind_dir = c(81.27,
81.34, 81.38, 81.44, 81.47, 81.34, 81.31, 81.51, 81.56, 81.46,
81.54, 81.53, 81.42, 81.53, 81.66, 81.76, 81.86, 81.96, 82.02,
82.28, 82.65, 82.77, 83.07, 83.46, 83.78, 84.15, 84.52, 84.92,
85.39, 85.87, 86.15, 86.38, 86.53, 81.34, 81.34, 81.38, 81.31,
81.2, 81.25, 81.39, 81.36, 81.31, 81.4, 81.47, 81.48, 81.59,
81.64, 81.58, 81.62, 81.75, 81.98, 82.13, 82.26, 82.52, 82.77,
82.97, 83.15, 83.49, 83.74, 84.23, 84.78, 85.04, 85.49, 85.73,
86.05, 86.35, 81.5, 81.41, 81.32, 81.28, 81.32, 81.31, 81.24,
81.17, 81.28, 81.33, 81.24, 81.3, 81.44, 81.46, 81.55, 81.76,
81.8, 81.88, 82.11, 82.31, 82.4, 82.61, 82.88, 82.95, 83.29,
83.59, 83.93, 84.46, 84.8, 85.26, 85.47, 85.78, 86.11, 81.3,
81.29, 81.29, 81.28, 81.32, 81.22, 81.24, 81.32, 81.31, 81.23,
81.34, 81.47, 81.37, 81.42, 81.5, 81.6, 81.78, 81.98, 82.06,
82.26, 82.49, 82.52, 82.7, 82.79, 83.05, 83.46, 83.79, 84.18,
84.5, 84.91, 85.23, 85.49, 85.7, 81.31, 81.33, 81.28, 81.19,
81.26, 81.29, 81.36, 81.24, 81.16, 81.18, 81.23, 81.23, 81.23,
81.47, 81.5, 81.55, 81.73, 81.99, 82.14, 82.18, 82.41, 82.46,
82.63, 82.83, 82.97, 83.27, 83.62, 84.01, 84.34, 84.64, 85.01,
85.38, 85.55, 81.14, 81.14, 81.1, 81.15, 81.2, 81.1, 81.14, 81.06,
81.21, 81.26, 81.13, 81.16, 81.17, 81.22, 81.28, 81.63, 81.71,
81.77, 82.13, 82.22, 82.37, 82.48, 82.56, 82.7, 82.92, 83.19,
83.43, 83.74, 84.15, 84.59, 84.89, 85.22, 85.39)), row.names = c(NA,
-198L), .Names = c("Lon", "Lat", "mean_wind", "wind_dir"), class = c("tbl_df",
"tbl", "data.frame"))
geom_spoke was made for this particular sort of plot. Cleaned up a little,
library(ggplot2)
ggplot(wind.dt,
aes(x = Lon ,
y = Lat,
fill = mean_wind,
angle = wind_dir,
radius = scales::rescale(mean_wind, c(.2, .8)))) +
geom_raster() +
geom_spoke(arrow = arrow(length = unit(.05, 'inches'))) +
scale_fill_distiller(palette = "RdYlGn") +
coord_equal(expand = 0) +
theme(legend.position = 'bottom',
legend.direction = 'horizontal')
Adjust scaling and sizes as desired.
Edit: Controlling the number of arrows
To adjust the number of arrows, a quick-and-dirty route is to subset one of the aesthetics passed to geom_spoke with a recycling vector that will cause some rows to be dropped, e.g.
library(ggplot2)
ggplot(wind.dt,
aes(x = Lon ,
y = Lat,
fill = mean_wind,
angle = wind_dir[c(TRUE, NA, NA, NA, NA)], # causes some values not to plot
radius = scales::rescale(mean_wind, c(.2, .8)))) +
geom_raster() +
geom_spoke(arrow = arrow(length = unit(.05, 'inches'))) +
scale_fill_distiller(palette = "RdYlGn") +
coord_equal(expand = 0) +
theme(legend.position = 'bottom',
legend.direction = 'horizontal')
#> Warning: Removed 158 rows containing missing values (geom_spoke).
This depends on your data frame being in order and is not infinitely flexible, but if it gets you a nice plot with minimal effort, can be useless nonetheless.
A more robust approach is to make a subsetted data frame for use by geom_spoke, say, selecting every other value of Lon and Lat, here using recycling subsetting on a vector of distinct values:
library(dplyr)
wind.arrows <- wind.dt %>%
filter(Lon %in% sort(unique(Lon))[c(TRUE, FALSE)],
Lat %in% sort(unique(Lat))[c(TRUE, FALSE)])
ggplot(wind.dt,
aes(x = Lon ,
y = Lat,
fill = mean_wind,
angle = wind_dir,
radius = scales::rescale(mean_wind, c(.2, .8)))) +
geom_raster() +
geom_spoke(data = wind.arrows, # this is the only difference in the plotting code
arrow = arrow(length = unit(.05, 'inches'))) +
scale_fill_distiller(palette = "RdYlGn") +
coord_equal(expand = 0) +
theme(legend.position = 'bottom',
legend.direction = 'horizontal')
This approach makes getting (and scaling) a grid fairly easy, but getting a diamond pattern will take a bit more logic:
wind.arrows <- wind.dt %>%
filter(( Lon %in% sort(unique(Lon))[c(TRUE, FALSE)] &
Lat %in% sort(unique(Lat))[c(TRUE, FALSE)] ) |
( Lon %in% sort(unique(Lon))[c(FALSE, TRUE)] &
Lat %in% sort(unique(Lat))[c(FALSE, TRUE)] ))
Good afternoon,
I have a simple dataset with 2 columns with wind direction and data. I am trying to reproduce a plot similar to the following:
After searching in the stackoverflow I have come across the "circular" package 'rose-diag' function.
however when I try it with the following codes:
x= dat$Test
plot(x)
rose.diag(x, bins=24, main="test", prop=2, axes = F)
I get the following:
which shows wrong directions. Could someone please advise what I am doing wrong here and any suggestion to rectify the error?
I also found some examples in ggplot but could not workout with what I am trying to do. any suggestion would be really appreciated too with any alternative method available just to learn other ways of doing this type of plots. thanks
my sample input data that I am testing with are:
> dput(dat)
structure(list(wd = c(7.5, 22.5, 37.5, 52.5, 67.5, 82.5, 97.5,
112.5, 127.5, 142.5, 157.5, 172.5, 187.5, 202.5, 217.5, 232.5,
247.5, 262.5, 277.5, 292.5, 307.5, 322.5, 337.5, 352.5), Test = c(10.82,
6.75, 6.57, 6.52, 8.48, 9.66, 15.36, 18.97, 29.14, 36.56, 38.65,
44.23, 51.99, 50.83, 51.93, 50.27, 49.35, 52.67, 54.05, 49.69,
43.73, 29.83, 18.94, 17.33)), .Names = c("wd", "Test"), class = "data.frame", row.names = c(NA,
-24L))
Here some example code:
df <- data.frame(wd = c(7.5, 22.5, 37.5, 52.5, 67.5, 82.5, 97.5,
112.5, 127.5, 142.5, 157.5, 172.5, 187.5, 202.5, 217.5, 232.5,
247.5, 262.5, 277.5, 292.5, 307.5, 322.5, 337.5, 352.5),
Test = c(10.82,
6.75, 6.57, 6.52, 8.48, 9.66, 15.36, 18.97, 29.14, 36.56, 38.65,
44.23, 51.99, 50.83, 51.93, 50.27, 49.35, 52.67, 54.05, 49.69,
43.73, 29.83, 18.94, 17.33))
ggplot(df, aes(wd, Test)) +
geom_bar(width=15, stat='identity', color='grey') +
coord_polar()