I have been using dplyr::summarise to sum daily data by month and have just spent a few hours finally figuring out that for whatever reason the decimal places in my raw data was throwing the function off.
Basically, the monthly sums for 'temp1' (which is the 'Mod' data) are wrong (the digits look right but the decimal place is in the wrong place) until I apply a rounding function (rounded to 7 or less digits fixes the problem it seems, 8 just reproduces the problem)
temp1 <- as.data.frame(read.csv("data/ModObs.csv"))
temp1$Date <- as.Date(temp1$Date, "%d/%m/%Y")
((((temp1$Obs <- round(temp1$Obs, 7))))#this line fixes the problem
((((temp1$Mod <- round(temp1$Mod, 7))))#this line fixes the problem
temp1$yearmonth <- lubridate::floor_date(as.Date(temp1$Date), 'month')
temp2 <- dplyr::group_by(temp1, yearmonth)
temp2 <- dplyr::summarise(temp2, Obs = sum(Obs, na.rm=TRUE))
temp1 <- dplyr::group_by(temp1, yearmonth)
temp1 <- dplyr::summarise(temp1, Mod = sum(Mod, na.rm=TRUE))
My question is, is there a way of using dplyr::summarise that means I would not have encountered this issue (which took me quite a while to figure out and solve)?
dput with error:
dput(temp1)
structure(list(yearmonth = structure(c(0, 31, 59, 90, 120, 151,
181, 212, 243, 273, 304, 334, 365, 396, 424, 455, 485, 516, 546,
577, 608, 638, 669, 699, 730, 761, 790, 821, 851, 882, 912, 943,
974, 1004, 1035, 1065, 1096, 1127, 1155, 1186, 1216, 1247, 1277,
1308, 1339, 1369, 1400, 1430, 1461, 1492, 1520, 1551, 1581, 1612,
1642, 1673, 1704, 1734, 1765, 1795, 1826, 1857, 1885, 1916, 1946,
1977, 2007, 2038, 2069, 2099, 2130, 2160, 2191, 2222, 2251, 2282,
2312, 2343, 2373, 2404, 2435, 2465, 2496, 2526, 2557, 2588, 2616,
2647, 2677, 2708, 2738, 2769, 2800, 2830, 2861, 2891, 2922, 2953,
2981, 3012, 3042, 3073, 3103, 3134, 3165, 3195, 3226, 3256, 3287,
3318, 3346, 3377, 3407, 3438, 3468, 3499, 3530, 3560, 3591, 3621,
3652, 3683, 3712, 3743, 3773, 3804, 3834, 3865, 3896, 3926, 3957,
3987, 4018, 4049, 4077, 4108, 4138, 4169, 4199, 4230, 4261, 4291,
4322, 4352, 4383, 4414, 4442, 4473, 4503, 4534, 4564, 4595, 4626,
4656, 4687, 4717, 4748, 4779, 4807, 4838, 4868, 4899, 4929, 4960,
4991, 5021, 5052, 5082, 5113, 5144, 5173, 5204, 5234, 5265, 5295,
5326, 5357, 5387, 5418, 5448, 5479, 5510, 5538, 5569, 5599, 5630,
5660, 5691, 5722, 5752, 5783, 5813, 5844, 5875, 5903, 5934, 5964,
5995, 6025, 6056, 6087, 6117, 6148, 6178, 6209, 6240, 6268, 6299,
6329, 6360, 6390, 6421, 6452, 6482, 6513, 6543, 6574, 6605, 6634,
6665, 6695, 6726, 6756, 6787, 6818, 6848, 6879, 6909, 6940, 6971,
6999, 7030, 7060, 7091, 7121, 7152, 7183, 7213, 7244, 7274), class = "Date"),
Obs = c(29.5, 1.6, 225.9, 305, 485.9, 392.6, 86.7, 422.1,
262.6, 22.6, 12.7, 40.3, 96.5, 1.4, 0, 40.1, 251.9, 494.4,
181.6, 86, 69.6, 128.4, 560.9, 55.8, 14.2, 94.1, 8.9, 28.2,
10.3, 1.4, 2.6, 9, 14.2, 5.6, 9.5, 1.6, 0, 195.1, 24.3, 18.9,
26, 34.3, 68.1, 144.1, 213.3, 99.2, 36.1, 17, 19.5, 13.3,
0, 352.7, 2812.7, 82, 311.9, 314.2, 397.5, 783.7, 388.7,
40.4, 1.3, 0, 2.1, 2.9, 2.1, 8.6, 1.2, 260.8, 494.4, 1125.1,
983.3, 20.9, 3.2, 0, 0.1, 1.1, 0.6, 32.9, 16.2, 124.1, 204.3,
340.6, 212.6, 42.8, 56.7, 2.2, 22.6, 78.3, 100.2, 786.4,
990.7, 330.2, 119.9, 2.7, 3.5, 5.7, 9.4, 17.6, 104.2, 168.9,
43, 118.4, 69.7, 479.3, 435.4, 101.4, 52.9, 104, 28.6, 0.8,
1.8, 13.1, 54.4, 26.2, 6.3, 24.8, 30.7, 53.9, 1.4, 0, 4.6,
0, 1.5, 14.5, 67.3, 217.4, 460.5, 251.2, 72.3, 29.3, 57.7,
29.7, 1.4, 1, 30.4, 3.6, 164.2, 378.4, 861.1, 982.5, 116.5,
68.4, 33.9, 1.5, 52.3, 7.5, 18.2, 70.4, 75.9, 158.9, 26.1,
2.3, 6, 8.4, 0.1, 43.9, 8.3, 0, 0.7, 8.6, 38.3, 24.2, 110.1,
164.1, 239, 120.8, 23.9, 24.7, 1.8, 1.4, 54.7, 75.6, 11.6,
19.6, 69.4, 199.7, 648.3, 260.9, 53.5, 4.5, 0.5, 0, 0.6,
60.9, 9, 130.9, 61.7, 539.5, 222.1, 31.6, 19.8, 288.6, 83.3,
2, 1.8, 104.4, 214, 108.3, 504.2, 152.6, 110.2, 103.3, 14.7,
128.2, 3.5, 2.5, 7.2, 47.4, 73.6, 116.2, 150.6, 161.1, 58.8,
32.9, 12.1, 33.3, 31.3, 0.7, 39.7, 0.3, 26, 102.2, 55.2,
46.3, 62.3, 15.4, 200.2, 98.9, 35.5, 0.4, 80.3, 286.5, 348.8,
646.5, 340.3, 1048.5, 558.6, 365.5, 129.4, 3.7), Mod = c(58.456732574,
0.647399496, 106.77816386, 267.838017351, 599.939323463,
250.80934844, 113.281660213, 241.663996002, 127.530387061,
52.687410089, 84.890244021, 41.364802773, 59.23208781, 8.497558874,
0.672761812, 15.465132304, 358.926445816, 399.9093607, 97.971842098,
42.72450411, 78.475537521, 267.696647395, 1499.730009232,
164.134543701, 15.739950594, 117.176571603, 0.29960511, 33.153451885,
71.35707594, 1.976493212, 38.99406048, 58.699745671, 88.893788732,
55.590919209, 17.675911123, 0.323688533, 0.802922429, 255.339027286,
86.973361482, 56.672316286, 195.494804037, 113.402888496,
88.016557451, 146.313739207, 141.11162499, 309.49712486,
42.342303882, 32.801816137, 9.804984811, 14.876734504, 0.741273571,
432.148407136, 2516.875488309, 47.539316029, 269.405152962,
183.64372206, 154.563624943, 467.720012557, 153.054373772,
32.514885627, 1.830055421, 0.066762771, 1.044433442, 1.346976081,
17.458179607, 49.907434727, 53.305731876, 353.57856375, 310.529543548,
962.398015832, 344.181844335, 30.810939684, 8.040785393,
0.377896164, 0.798674902, 0.801987649, 0.691369382, 13.928109124,
12.703685263, 137.85141766, 211.000002457, 444.374773665,
187.06473363, 56.579158088, 79.307114494, 1.185915374, 7.450495202,
350.92445957, 168.333585374, 1103.415013415, 530.738230571,
97.400577403, 120.218466778, 26.53863178, 4.95759286, 1.415953207,
1.349259407, 7.598631896, 31.687964985, 111.63067543, 253.033200389,
260.084267318, 174.328538378, 435.075601539, 266.057507136,
169.491413576, 370.501536962, 325.734910145, 52.804905885,
0.637640491, 0.749393501, 61.044014158, 236.352010674, 69.236802018,
50.981912279, 113.755615714, 68.015519965, 228.376481539,
1.35494224, 0.370446501, 1.782754512, 0.200525121, 0.567715904,
2.562157517, 66.360280078, 537.409598471, 548.857756317,
131.818783821, 79.38907511, 216.219977069, 86.85853468, 91.042441797,
1.310072508, 4.6784498, 49.359916771, 12.139100379, 792.36086926,
360.543361637, 674.819587278, 417.14625705, 71.213853069,
88.470327459, 26.501678301, 0.660446628, 1.983262203, 0.053393889,
2.116900185, 103.974625465, 146.1406309, 241.456322328, 41.761031962,
19.165518836, 61.329157567, 77.461701504, 0.717057613, 60.244865985,
2.038796249, 0.024612503, 1.15999722, 0.987871135, 66.752973657,
101.602951298, 207.507552152, 351.874694806, 239.490966404,
373.402963887, 83.392418938, 96.713198206, 2.471314963, 1.789748376,
23.642411238, 274.437164678, 27.196302352, 122.879115856,
203.243972815, 261.450286079, 674.337097864, 201.592587766,
66.457305017, 14.265446489, 11.688820111, 0.287104024, 0.498545345,
10.595714786, 64.36811409, 147.944544256, 105.263660789,
348.781394762, 115.965911604, 81.556952547, 35.877763907,
293.156577573, 122.052605838, 0.891615203, 0.201455399, 17.693507458,
191.862026713, 93.030313466, 379.074639489, 88.590763754,
138.225716958, 438.407332197, 11.107850781, 175.835916749,
0.793580574, 0.755295219, 1.253581528, 2.175241521, 126.829190302,
167.624256025, 261.538659971, 143.633607733, 58.216055381,
101.857571372, 37.192461414, 112.344312062, 100.262190061,
2.378722279, 4.946631624, 0.435401092, 51.481605801, 155.384067186,
153.115869623, 128.316180053, 153.07003862, 80.585921934,
221.89445498, 62.047224666, 105.157734971, 0.203008456, 51.168132113,
310.567488885, 238.329914336, 783.724869869, 193.016695288,
431.815627948, 143.854730373, 358.082995503, 84.620059176,
1.146042245)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-240L))
Your data is exactly the same before and after the rounding.
library(dplyr)
temp1 <- as.data.frame(read.csv("C:/Users/xxx/Documents/modops.csv"))
temp1$Date <- as.Date(temp1$Date, "%d/%m/%Y")
temp1$yearmonth <- lubridate::floor_date(as.Date(temp1$Date), 'month')
notfixed <- dplyr::group_by(temp1, yearmonth) %>% summarise(Mod = sum(Mod, na.rm=TRUE))
temp1$Obs <- round(temp1$Obs, 7)#this line fixes the problem
fixed <- dplyr::group_by(temp1, yearmonth) %>% summarise(Mod = sum(Mod, na.rm=TRUE))
> identical(fixed, notfixed)
[1] TRUE
Related
I'm trying to compare a distribution against theoretical normal distribution but I would like to represents the decimal quantiles as red in the Q-Q plot.
This is the numeric array I'm using (as a df).
structure(list(v1 = c(84.01, 86.88, 48.5, 80.28, 63.85, 81.95,
8.94, 59.16, 35.16, 49.6, 56.16, 19.71, 63.03, 64.82, 2.92, 46.69,
42, 63.83, 53.29, 67.79, 15.87, 37.95, 52.99, 52.11, 66.14, 68.61,
61.19, 44.9, 54.8, 61.81, 80.9, 26.17, 12.7, 57.51, 73.41, 54.25,
55.61, 55.64, 57.69, 67.77, 23.21, 74.23, 29.43, 52.08, 41.1,
58.65, 55.25, 79.08, 71.39, 32.29, 66.9, 60.52, 100, 43.34, 47.69,
65.64, 75.9, 59.95, 55.12, 37.57, 60.76, 79.82, 55.67, 43.17,
56.56, 39.87, 65.32, 53.82, 57.76, 26.88, 62.83, 42.71, 73.82,
55.02, 73.52, 68.9, 60.82, 52.16, 61.34, 57.59, 19.71, 74.17,
63.93, 59.88, 55.47, 43.19, 45.17, 64.23, 99.18, 53.25, 58, 73.66,
100, 58.83, 86.6, 54.83, 58.13, 68.57, 56.17, 59.73, 47.02, 33.97,
60.2, 60.53, 47.18, 54.72, 49.71, 35.29, 62.25, 44.07, 46.35,
17.29, 26.93, 60.26, 77.35, 59.18, 71.83, 72.01, 58.78, 29.14,
62.56, 23.41, 39.82, 56.27, 46.55, 60.05, 80.74, 15.99, 58.64,
37.43, 60.38, 48.77, 54.41, 49.4, 36.05, 35.17, 80.04, 31.64,
56.2, 39.35, 10.61, 75.99, 51.69, 59.65, 52.58, 64.76, 76.07,
43.96, 66.95, 29.45, 43.18, 30.99, 51.22, 30.45, 73.49, 58.92,
67.22, 51.68, 62.56, 64.29, 46.11, 72.86, 66.33, 56.27, 62.04,
49.68, 46.44, 49.3, 66.93, 43.86, 12.72, 44.43, 60.62, 33.98,
83.57, 50.37, 36.96, 54.04, 30.47, 28.46, 48.35, 35.64, 71.47,
76.1, 7.32, 3.36, 83.27, 52.11, 90.43, 22.85, 7.9, 52.19, 41.96,
23.22, 42.86, 30.83, 54.66, 63.9, 46.04, 43.69, 84.14, 50.09,
65.29, 42.9, 72.92, 60.2, 45.36, 51.76, 58.7, 36.84, 77.72, 21.04,
53.21, 68.87, 12.22, 63.93, 18.57, 74.93, 65.3, 79.18, 38.03,
55.69, 73.93, 63.33, 93.97, 34.99, 45.87, 61.12, 47.95, 40.82,
54.1, 69.17, 54.9, 41.72, 76.39, 61.75, 79.3, 29.62, 41.29, 33.33,
52.31, 31.04, 65.94, 23.29, 64.27, 63.71, 73.43, 68.94, 68.85,
68.39, 39.13, 65.16, 61.77, 88.54, 76.29, 78.85, 44.97, 12.35,
90.31, 63.24, 77.85, 58.5, 81.05, 85.38, 74.16, 78.56, 54.86,
71.15, 62.7, 64.49, 27.32, 45.71, 56.29, 60.76, 90, 63.91, 71.72,
20.42, 45.02, 14.21, 54.64, 68.88, 71.67, 37.67, 53.88, 58.8,
33.86, 57.98, 57.84, 73.74, 39.83, 40.69, 54.66, 18.08, 43.79,
31.05, 89.34, 46.65, 24.99, 64.33, 51.87, 65.17, 40.04, 69.41,
13.89, 61.99, 44.14, 65.46, 47.14, 27.88, 52.46, 12.69, 45.41,
84.13, 59.48, 40.2, 40.76, 42.66, 35.56, 57.52, 41.17, 60.02,
44.58, 1.76, 35.8, 70.61, 20.74, 1.78, 60.11, 70.64, 26.04, 33.58,
61.88, 12.38, 48.17, 27.09, 75.23, 28.85, 53.25, 44.19, 64.56,
17.12, 51.22, 66.02, 43.06, 58.71, 55.65, 27.52, 45.28, 44.83,
74.25, 58.35, 23.1, 58.52, 54.1, 57.74, 47.61, 17.25, 57.44,
70.38, 43.69, 89.84, 72.9, 57.53, 33.53, 18.08, 41.71, 52.07,
46.63, 42.64, 41.98, 27.74, 72.13, 61.92, 83.65, 22.42, 62.47,
13.49, 68.39, 48.94, 72.79, 49.76, 63.05, 85, 45.58, 59.58, 57.94,
65.93, 65.26, 35.56, 65.71, 62.17, 21.07, 53.33, 72.97, 36.32,
55.38, 68.96, 66.11, 56.05, 70.68, 89.05, 73.7, 85.43, 88.74,
51.16, 49.83, 63.85, 45.61, 29.15, 49.93, 62.9, 61.39, 69.17,
6.61, 63.66, 76.21, 56.85, 49.86, 68.41, 75.94, 67.88, 62.25,
26.8, 74.27, 76.48, 46.25, 73.84, 47.21, 60.76, 42.17, 45.73,
41.4, 67.04, 48.28, 63.73, 65.9, 36.62, 74.91, 50.11, 24.21,
91.45, 78.71, 100, 27.59, 78.42, 38.69, 21.68, 0, 44.85, 59.28,
45.75, 36.84, 73.91, 41.8, 66, 74.88, 59.25, 54.7, 44.39, 40.95,
42.03, 66.93, 38, 72.08, 48, 41.7, 72.1, 64.23, 14.18, 43.53,
57.81, 52.68, 45.73, 97.12, 18.98, 57.12, 48.33, 18.94, 60.6,
52.05, 67.09, 72.89, 64.16, 57.79, 45.62, 55.02, 48.86, 60.74,
7.35, 56.31, 60.51, 49, 67.56, 84.62, 15.98, 54.67, 65.72, 65.79,
70.55, 74.18, 55.72, 57.89, 54.67, 39.83, 21.2, 78)), class = "data.frame", row.names = c(NA,
-507L))
This is to generate the Q-Q plot
example%>%
ggplot(aes(sample=v1)) +
stat_qq() + stat_qq_line()
# + add deciles colour='red' ?
How can I colour as red the deciles?
First create deciles using the ntile function. After that you can color the first decile by filtering that data and color them red. You can use the following code:
library(tidyverse)
example <- example %>% mutate(decile = ntile(v1, 10))
ggplot() +
geom_qq(example, mapping = aes(sample=example$v1),col=ifelse(example$decile == 1, "red", "black")[order(example$v1)]) +
geom_qq_line(aes(sample=example$v1))
Output:
Thanks to #Quinten answer I could manage to get the decile standing out from the qq points. So I added the ntile()function and calculated the changing points (real deciles) and that's the solution:
library(tidyverse)
example <- example %>%
mutate(decile = ntile(v1, 10)) %>%
arrange(v1) %>%
mutate(difs = decile - lag(decile)) %>%
mutate(difs= ifelse(is.na(difs),1,difs))
example %>%
ggplot(aes(sample=v1)) +
geom_qq(col=ifelse(example$difs == 1, "red", "black"),
alpha=ifelse(example$difs == 1, 1, 0.33),
size=ifelse(example$difs == 1, 1.5, 1)) +
geom_qq_line()
I'm looking for help trying to fit a sigmoidal curve to this data. It is a naturally occurring dataset and should be sigmoidal in its nature.
Any help much appreciated.
x <- c(10.90, 15.80, 12.80, 12.70, 13.90, 8.45, 9.28, 7.13, 12.00, 10.20, 10.60, 23.70, 8.04, 9.72, 19.30, 9.75, 10.10, 9.84, 9.18, 9.61, 7.92, 13.80, 11.70, 7.30, 10.60, 20.60, 8.03, 12.60, 11.90, 13.30, 8.89, 6.20, 11.80, 13.80, 8.90, 8.53, 16.30, 15.50, 11.40, 9.11, 8.15, 8.82, 8.85, 7.26, 13.40, 10.70, 8.83, 12.50, 11.10, 8.70, 12.30, 10.40, 16.60, 11.40, 10.10, 8.55, 13.60, 8.71, 12.00, 10.50, 7.40, 18.00, 12.40, 26.30, 11.70, 12.10, 8.49, 6.51, 11.40, 33.20, 8.46, 13.00, 9.76, 11.40, 9.14, 11.00, 8.08, 20.90, 12.50, 9.52, 9.99, 17.50, 10.40, 8.56, 11.40, 15.80, 13.00, 16.20, 9.20, 8.28, 15.10, 9.41, 12.60, 8.28, 7.87, 10.90, 13.40, 21.80, 11.40, 12.70,11.20, 14.8, 9.42, 7.68, 10.90, 11.00, 7.99, 17.20)
y <- c(94.4, 98.5, 97.9, 97.1, 97.5, 94.1, 93.3, 90.6, 95.6, 96.3, 95.3, 99.1, 92.5, 95.9, 99.2, 95.9, 94.2, 95.2, 95.0, 95.2, 92.1, 97.4, 97.1, 92.2, 92.4, 98.8, 92.7, 97.5, 96.8, 95.3, 87.2, 82.5, 96.4, 98.4, 93.4, 89.7, 97.5, 98.8, 97.1, 93.4, 90.7, 93.7, 93.2, 93.2, 97.6, 96.7, 94.0, 97.1, 94.9, 94.3, 96.8, 96.4, 98.0, 96.1, 96.4, 93.9, 96.8, 92.9, 97.0, 96.6, 82.8, 98.5, 97.4, 99.4, 96.2, 96.8, 90.5, 84.7, 95.9, 100.0, 93.9, 96.0, 92.4, 96.7, 95.0, 96.2, 89.8, 97.7, 96.9, 96.9, 95.8, 98.7, 95.3, 92.5, 95.8, 98.8, 97.2, 98.6, 93.6, 93.3, 99.0, 95.3, 96.7, 91.6, 91.0, 96.7, 96.8, 99.0, 96.7, 97.5, 95.7, 97.0, 92.8, 93.1, 94.6, 97.9, 92.6, 98.5)
You can use geom_smooth with method = "nls"
library(ggplot2)
data %>%
ggplot(aes(x=x,y=y)) +
geom_point() +
geom_smooth(method = "nls", se = FALSE,
formula = y ~ a/(1+exp(-b*(x-c))),
method.args = list(start = c(a = 98, b = -1.5, c = 1.5),
algorithm='port'),
color = "blue")
Alternatively, you could use the self-starting model SSlogis.
data %>%
ggplot(aes(x=x,y=y)) +
geom_point() +
geom_smooth(method = "nls", se = FALSE,
formula = y ~ SSlogis(x, Asym, xmid, scal),
color = "blue")
I have a data frame with 4000 columns and daily observations sorted by time. I want to create new columns that lag all existing columns 50 times in the past. So for a column Y create 50 additional columns that are Y-1day,Y-2days,Y-3days...Y-50days.
So far I've wrapped the following loop which does what I need to make.
The issue is that it's not very fast. Is there a more efficient way I can test?
for(i in 2:ncol(Data)){
for(j in 1:50){
Data<- slide(Data, Var = names(Data[i]), slideBy = -j)
}}
I'm attaching a snapshot of my data frame for reproducible example:
structure(list(time = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92,
93, 94, 95, 96, 97, 98, 99, 100), A = c(17.081545, 16.630901,
16.623749, 16.258942, 16.244635, 16.165951, 15.886981, 15.865522,
15.529327, 15.772532, 16.04435, 15.779685, 15.915594, 15.593705,
15.336195, 15.593705, 15.736767, 15.736767, 15.457797, 15.815451,
16.108727, 16.237482, 15.808297, 16.058655, 16.53791, 16.988556,
16.516453, 16.480686, 16.967096, 17.181688, 17.446352, 17.11731,
16.952789, 16.8598, 16.795422, 16.437769, 16.587982, 16.845493,
17.167381, 17.510729, 17.410587, 17.474964, 17.246065, 17.703863,
17.424892, 17.174536, 17.103004, 16.695278, 16.93133, 16.638054,
16.115879, 16.20887, 15.987124, 16.151646, 16.151646, 16.115879,
16.173105, 16.101574, 16.080114, 15.9299, 15.879828, 15.786839,
15.314735, 15.27897, 15.493563, 15.436337, 15.286123, 15.121602,
15.27897, 14.88555, 14.785408, 14.592275, 14.785408, 14.856938,
14.670959, 15.243204, 15.09299, 15.250358, 15.264664, 15.18598,
14.771102, 14.842632, 15, 15.150214, 15.200286, 15.078684, 15.379113,
15.658083, 15.636623, 15.879828, 15.715307, 15.729613, 15.422031,
16.080114, 16.39485, 16.502146, 16.74535, 16.902718, 17.088697,
16.831188), AAP = c(29.033333, 28.84, 28.893333, 28.866667, 28.700001,
28.799999, 28.973333, 28.866667, 28.806667, 28.973333, 29.713333,
29.033333, 28.626667, 28.546667, 28.173334, 28.166666, 28.24,
28.553333, 28.366667, 28.733334, 28.833334, 28.9, 29.166666,
29.846666, 30.08, 30.093334, 29.673334, 29.860001, 30.053333,
30.186666, 29.833334, 29.673334, 34.533333, 33.82, 33.373333,
33.633335, 33.593334, 33.833332, 33.586666, 33.946667, 34.66,
34.599998, 34.84, 34.779999, 34.093334, 33.713333, 33.560001,
33.933334, 33.086666, 33.139999, 33.279999, 33.200001, 33.259998,
32.466667, 32.713333, 32.686668, 33.053333, 33.806667, 33.333332,
33.613335, 33.633335, 33.799999, 34.206665, 34.5, 34.166668,
34.206665, 33.933334, 34, 34.373333, 33.700001, 33.173332, 32.633335,
32.639999, 34.013332, 33.566666, 34.053333, 34.053333, 34.826668,
35.106667, 35.68, 35.653332, 35.566666, 35.380001, 35.419998,
35.966667, 36.573334, 36.673332, 36.486668, 36.286667, 36.099998,
35.433334, 35.419998, 35.84, 36.533333, 36.779999, 38.98, 39.633335,
39.646667, 39.486668, 39.433334), AAPL = c(4.520714, 4.567143,
4.607143, 4.610714, 4.946429, 4.925714, 4.611429, 4.675714, 4.985714,
5.014286, 5.046429, 4.991428, 5.032857, 5.035, 5.054286, 5.146429,
5.160714, 5.188571, 5.284286, 5.492857, 5.537857, 5.687857, 5.557857,
5.631429, 5.638571, 5.778572, 5.624286, 5.597143, 5.800714, 6.045,
6.315, 6.437857, 6.272143, 6.200714, 6.092143, 6.302143, 6.352143,
6.356429, 6.408571, 6.357143, 6.302857, 5.97, 6.115714, 6.107143,
5.79, 5.621428, 5.69, 5.752857, 5.76, 5.851429, 5.882857, 6.035714,
6.137143, 6.242857, 6.118571, 6.078571, 6.071429, 6.075714, 5.964286,
6.114286, 5.952857, 5.841429, 5.87, 5.984286, 6.047143, 6.222857,
6.248571, 5.988572, 6.094285, 5.862857, 5.322857, 5.05, 5.088572,
5.298572, 5.072857, 5.311429, 5.071429, 5.282857, 5.17, 5.135714,
5.077143, 5.151429, 5.204286, 5.172857, 5.307143, 5.24, 5.32,
5.281428, 5.202857, 5.087143, 4.875714, 4.967143, 5.078571, 5.051429,
5.12, 5.364286, 5.364286, 5.68, 5.671429, 5.682857), ABC = c(14.5375,
14.4225, 14.395, 14.5175, 14.475, 14.475, 14.51, 14.515, 14.275,
14.3175, 14.4875, 14.375, 14.5025, 14.2525, 14.3925, 14.13, 14.47,
14.365, 14.5925, 14.57, 14.74, 14.71, 14.995, 14.9, 14.8625,
15.0325, 14.78, 14.875, 15.085, 15.0525, 15.4275, 15.3075, 14.9225,
15, 14.7025, 14.7975, 15, 15, 14.975, 15.3775, 15.435, 15.5325,
15.6625, 15.6575, 15.695, 15.1275, 15.1025, 15.0775, 15.265,
15.0325, 14.905, 15.1975, 15.215, 15.2025, 15.1025, 15.3775,
15.2775, 13.5075, 13.5275, 13.95, 14.3225, 14.09, 14.4275, 14.735,
14.6475, 14.8, 14.4575, 14.62, 14.7525, 14.7, 14.9, 15.125, 14.83,
14.9525, 14.825, 14.9625, 15, 14.975, 14.9675, 15.0975, 15.0875,
15.32, 15.5125, 15.38, 15.51, 15.575, 15.7475, 15.9975, 15.9175,
15.895, 15.955, 15.98, 16.209999, 16.459999, 16.5725, 16.514999,
16.4925, 16.5, 16.495001, 16.4825), ABMD = c(15.01, 14.98, 14.69,
14.52, 14.29, 14.42, 14.31, 14.17, 12.45, 12.05, 11.87, 11.97,
11.41, 11.16, 11.06, 11.2, 11.1, 11.57, 11.43, 11.88, 11.58,
11.12, 11.16, 11.32, 10.97, 10.88, 10.72, 10.3, 10.75, 10.25,
10.29, 10.41, 10.02, 10.05, 10.08, 10, 10.24, 10.89, 10.7, 10.8,
10.66, 10.71, 11.12, 11.18, 11.2, 10.95, 11.07, 11.12, 11.3,
11.19, 10.83, 10.56, 10.37, 10.47, 10.33, 10.17, 10.51, 10.4,
10.56, 10.74, 10.58, 10.6, 10.57, 10.71, 11.23, 11.28, 11.51,
11.15, 10.98, 10.98, 11.05, 10.76, 10.96, 11.1, 10.62, 11.1,
10.53, 10.69, 10.65, 10.73, 10.15, 10.15, 9.52, 9.6, 9.6, 9.52,
9.47, 9.44, 9.35, 9.27, 9.13, 8.92, 9.26, 9.45, 9.97, 10.25,
10.28, 9.99, 10.16, 10.17), ABT = c(22.392265, 22.166759, 21.912466,
22.40666, 22.790501, 23.011208, 22.588984, 22.517014, 22.085194,
22.19075, 22.089993, 22.09479, 21.95085, 22.061205, 22.037214,
22.027618, 22.018023, 21.811708, 21.720547, 21.600595, 21.854891,
21.898071, 21.907667, 21.840496, 21.874083, 21.725344, 21.667768,
21.581404, 22.166759, 22.305902, 22.488226, 22.469034, 22.339487,
22.26272, 21.802113, 21.946053, 22.243528, 22.200346, 22.066002,
22.051607, 22.099588, 22.075598, 22.267517, 22.382669, 22.310699,
22.02282, 22.209942, 22.070801, 22.128376, 21.907667, 21.792517,
21.365494, 21.336706, 21.048826, 20.996048, 21.39908, 21.562212,
21.677364, 21.95085, 22.430651, 22.368277, 22.161963, 22.157164,
22.646561, 22.843279, 23.19833, 22.963228, 22.91045, 22.98242,
23.049591, 23.169542, 23.927626, 23.500605, 23.111965, 22.69454,
23.078381, 22.824085, 22.920046, 23.001612, 23.255905, 23.073582,
23.586967, 23.692524, 23.634949, 23.850859, 23.601362, 23.519796,
23.543785, 23.438231, 23.634949, 23.567776, 23.395048, 23.735706,
23.706919, 23.678129, 23.529392, 23.452623, 23.366261, 23.351866,
23.145552), ACN = c(26.370001, 25.75, 25.65, 25.42, 26.610001,
26.959999, 26.5, 26.389999, 26.18, 26.290001, 26.1, 26, 25.67,
25.16, 24.9, 25.200001, 25.4, 25.68, 25.6, 26.049999, 25.99,
25.83, 25.48, 25.73, 25.77, 25.85, 25.51, 25.42, 25.200001, 24.639999,
24.9, 25.049999, 24.51, 24.9, 24.799999, 24.709999, 24.48, 25.15,
25.549999, 25.59, 25.42, 25.110001, 25.370001, 25.49, 25.32,
25.17, 24.950001, 24.459999, 24.48, 23.98, 24.030001, 23.950001,
23.66, 24.01, 24.280001, 24.299999, 24.4, 24.57, 24.16, 24.559999,
24.15, 24.440001, 24.35, 24.860001, 24.969999, 24.889999, 23.700001,
23.34, 23.440001, 23.120001, 22.860001, 22.5, 22.57, 22.440001,
21.9, 21.959999, 21.75, 21.85, 21.549999, 21.469999, 21.620001,
21.700001, 21.969999, 22.1, 22.1, 21.82, 22, 22.08, 21.860001,
21.92, 21.99, 22.049999, 22.01, 22.049999, 22.5, 22.790001, 22.719999,
22.76, 22.67, 22.34), ADBE = c(30.844999, 30.030001, 29.865,
29.370001, 29.389999, 29.41, 29.059999, 29.49, 29.110001, 29.115,
29.190001, 28.940001, 29.035, 28.535, 27.695, 27.790001, 28.004999,
28.084999, 27.74, 28.450001, 28.950001, 31.145, 31.709999, 31.995001,
31.76, 31.85, 31.295, 31.34, 31.85, 31.735001, 32.455002, 32.299999,
31.535, 31.415001, 30.754999, 30.875, 30.695, 30.715, 30.875,
31.17, 31.174999, 31.174999, 31.885, 32.535, 32.474998, 32.255001,
32.654999, 32.209999, 32.669998, 32.27, 31.594999, 31.945, 33.904999,
33.349998, 33.18, 33.134998, 33.27, 33.555, 33.110001, 33.865002,
33.584999, 33.380001, 33.290001, 33.424999, 34.049999, 34.195,
33.630001, 33.400002, 33.450001, 32.535, 31.74, 30.33, 27.385,
29.049999, 28.625, 29.77, 30.145, 30.02, 29.559999, 29.225, 29.235001,
29.735001, 28.575001, 28.645, 28.775, 28.459999, 28.85, 29.334999,
28.76, 28.965, 28.889999, 29.049999, 29.955, 29.889999, 30.549999,
31.059999, 31.115, 31.360001, 32.419998, 32.759998), ADI = c(36.389999,
35.400002, 35.560001, 35.5, 35.549999, 35.41, 35.080002, 35.560001,
35.099998, 35.639999, 36.07, 35.139999, 34.650002, 34.470001,
34.049999, 34.299999, 34.880001, 34.830002, 34.740002, 35.889999,
35.990002, 36.009998, 35.240002, 37.52, 37.52, 38.02, 37.18,
36.830002, 38.049999, 37.599998, 37.32, 37.130001, 36.700001,
36.299999, 36.5, 36.59, 37.32, 37.5, 36.720001, 38, 37.709999,
36.93, 37.119999, 37.049999, 36.950001, 36.919998, 37.849998,
37.130001, 37.209999, 36.57, 35.919998, 36.02, 35.830002, 35.709999,
35.830002, 36.23, 35.799999, 35.66, 35.119999, 36.330002, 36.139999,
35.709999, 35.599998, 35.310001, 35.41, 36.09, 35.669998, 35.34,
34.93, 34.099998, 33.650002, 32.84, 33.360001, 33.849998, 33.419998,
34.349998, 33.799999, 33.700001, 33.52, 33.360001, 33.52, 34.110001,
33.849998, 33.669998, 34.560001, 34.619999, 34.619999, 34.549999,
34.130001, 34.060001, 34.310001, 35.490002, 36.419998, 36.700001,
36.860001, 36.889999, 37.080002, 36.529999, 36.849998, 36.290001
)), row.names = c(NA, 100L), class = "data.frame")
We can use shift from data.table which can take a vector of values for n
library(data.table)
setDT(Data)
out <- Data[, shift(.SD, n = 1:50), .SDcols = -1]
names(out) <- paste0(rep(names(Data)[-1], each = 50), "_", 1:50, "days")
Data[, names(out) := out][]
I am trying to perform 6 months forecasting over production data for three power plants, I built my data as an hts object that has 3 levels. However, when I am performing the forecast function and then try to see the accuracy using test data I get the following error: "Error in x - fcasts: non-conformable arrays"
Furthermore, when I try to apply the "arima" as a forecasting method on the hts object I get the following (the warning message is repeated 9 times, as I have 9 time series in the hts object):
forecasts <- forecast(data,h = 6 , method = "bu" , fmethod = "arima")
I used the following instructions to get the hts object:
and the data has the following structure:
I am not sure where I am going wrong. Anyone can help with some thoughts??
Thank you!
The data:
structure(list(LarGroup1 = c(188.3, 187.2, 94.7, 109.2, 202.7,
146.6, 121.9, 151.3, 111.1, 103.4, 188.1, 168.1, 233.9, 230.7,
187.1, 0, 98.9, 173.5, 149.4, 168.6, 4.7, 14.8, 91.8, 166.5,
170.5, 123.6, 85.2, 64.4), LarGroup2 = c(159.1, 127.7, 210.3,
199.8, 113, 143.4, 144.5, 83.8, 41.6, 35.1, 95.2, 178.2, 241.1,
236.4, 181.9, 194.3, 196.1, 92.4, 154.6, 78.9, 35.7, 0, 74.5,
75.1, 140, 142.5, 3.8, 17.5), RibGroup1 = c(49.4, 102.4, 50.8,
118.8, 108.4, 139.5, 121.7, 69.6, 53.4, 28, 113.3, 96.3, 70.8,
124.4, 54.4, 128.7, 63.3, 2.1, 41.3, 0.4, 0.6, 0, 5.4, 57.9,
9.9, 30, 221, 167.2), RibGroup2 = c(32.7, 32, 98.1, 6.3, 85.5,
96.6, 41.1, 44.9, 50.4, 27.3, 0, 45.4, 199.1, 179.2, 86.1, 0,
58.4, 43.3, 41.8, 42.1, 22.1, 11.8, 71.8, 112, 204.1, 40.9, 24.5,
210.9), RibGroup3 = c(90.8, 15.4, 10.5, 124.4, 33.9, 8.4, 38.3,
56.9, 13.5, 0, 32.6, 132.8, 160.7, 168.7, 60.7, 131.9, 110.8,
29.2, 131.3, 62.1, 6.1, 0, 0, 3.4, 23.9, 192.7, 165.5, 0), SinGroup1 = c(235.2,
225.4, 226.1, 234.4, 222.1, 232.3, 233.4, 201.9, 195.3, 209.4,
233.6, 223.6, 222.2, 232, 224, 149.8, 201.6, 220.2, 203.1, 212.1,
71.9, 82.3, 183.2, 210.6, 198.6, 230.8, 218, 163.2), SinGroup2 = c(233.4,
225.6, 227, 51.6, 76, 230.7, 233.1, 202.7, 200.2, 207.2, 228.4,
226.2, 183.9, 230.4, 222.3, 227.7, 177.9, 152, 218.6, 210.6,
80.9, 63.2, 188.1, 209.5, 233.2, 210.1, 226.5, 200.5), SinGroup3 = c(233.2,
188.5, 226.9, 234.7, 222.8, 234.6, 220.6, 156.4, 209.2, 218.7,
232.9, 226.1, 215.4, 231, 222.7, 222.7, 183.7, 203.8, 216.8,
112, 0, 39.6, 180.8, 203.6, 221.1, 228.9, 202.8, 186.7), SinGroup4 = c(218,
215.5, 226.8, 235.6, 223.6, 234.8, 234.9, 69.3, 192, 207.8, 235.2,
217.2, 235.1, 231.8, 223.5, 230.5, 225.6, 220.1, 220, 211.9,
114.8, 44.5, 158.5, 206.3, 231.8, 179, 225.3, 198.6)), class = "data.frame", row.names = c(NA,
-28L))
In the accuracy function, you need to include test data, not training data. You ask for 6 steps ahead, but your test data only consists of 4 time periods.
The seasonal differencing error suggests you are using an old version of the forecast package. Please update your packages.
The following code works using current CRAN packages (forecast v8.4, hts v
library(hts)
Production_data <- data.frame(
LarGroup1 = c(
188.3, 187.2, 94.7, 109.2, 202.7,
146.6, 121.9, 151.3, 111.1, 103.4, 188.1, 168.1, 233.9, 230.7,
187.1, 0, 98.9, 173.5, 149.4, 168.6, 4.7, 14.8, 91.8, 166.5,
170.5, 123.6, 85.2, 64.4
), LarGroup2 = c(
159.1, 127.7, 210.3,
199.8, 113, 143.4, 144.5, 83.8, 41.6, 35.1, 95.2, 178.2, 241.1,
236.4, 181.9, 194.3, 196.1, 92.4, 154.6, 78.9, 35.7, 0, 74.5,
75.1, 140, 142.5, 3.8, 17.5
), RibGroup1 = c(
49.4, 102.4, 50.8,
118.8, 108.4, 139.5, 121.7, 69.6, 53.4, 28, 113.3, 96.3, 70.8,
124.4, 54.4, 128.7, 63.3, 2.1, 41.3, 0.4, 0.6, 0, 5.4, 57.9,
9.9, 30, 221, 167.2
), RibGroup2 = c(
32.7, 32, 98.1, 6.3, 85.5,
96.6, 41.1, 44.9, 50.4, 27.3, 0, 45.4, 199.1, 179.2, 86.1, 0,
58.4, 43.3, 41.8, 42.1, 22.1, 11.8, 71.8, 112, 204.1, 40.9, 24.5,
210.9
), RibGroup3 = c(
90.8, 15.4, 10.5, 124.4, 33.9, 8.4, 38.3,
56.9, 13.5, 0, 32.6, 132.8, 160.7, 168.7, 60.7, 131.9, 110.8,
29.2, 131.3, 62.1, 6.1, 0, 0, 3.4, 23.9, 192.7, 165.5, 0
), SinGroup1 = c(
235.2,
225.4, 226.1, 234.4, 222.1, 232.3, 233.4, 201.9, 195.3, 209.4,
233.6, 223.6, 222.2, 232, 224, 149.8, 201.6, 220.2, 203.1, 212.1,
71.9, 82.3, 183.2, 210.6, 198.6, 230.8, 218, 163.2
), SinGroup2 = c(
233.4,
225.6, 227, 51.6, 76, 230.7, 233.1, 202.7, 200.2, 207.2, 228.4,
226.2, 183.9, 230.4, 222.3, 227.7, 177.9, 152, 218.6, 210.6,
80.9, 63.2, 188.1, 209.5, 233.2, 210.1, 226.5, 200.5
), SinGroup3 = c(
233.2,
188.5, 226.9, 234.7, 222.8, 234.6, 220.6, 156.4, 209.2, 218.7,
232.9, 226.1, 215.4, 231, 222.7, 222.7, 183.7, 203.8, 216.8,
112, 0, 39.6, 180.8, 203.6, 221.1, 228.9, 202.8, 186.7
), SinGroup4 = c(
218,
215.5, 226.8, 235.6, 223.6, 234.8, 234.9, 69.3, 192, 207.8, 235.2,
217.2, 235.1, 231.8, 223.5, 230.5, 225.6, 220.1, 220, 211.9,
114.8, 44.5, 158.5, 206.3, 231.8, 179, 225.3, 198.6
)
)
Production_data_ts <- ts(Production_data, frequency = 12, start = c(2016, 7))
Production_data_hts <- hts(Production_data_ts, characters = c(3, 6))
data <- window(Production_data_hts, start = c(2016, 7), end = c(2018, 6))
test <- window(Production_data_hts, start = c(2018, 7), end = c(2018, 10))
forecasts <- forecast(data, h = 4, method = "bu")
accuracy(forecasts, test)
Good afternoon,
I have a simple dataset with 2 columns with wind direction and data. I am trying to reproduce a plot similar to the following:
After searching in the stackoverflow I have come across the "circular" package 'rose-diag' function.
however when I try it with the following codes:
x= dat$Test
plot(x)
rose.diag(x, bins=24, main="test", prop=2, axes = F)
I get the following:
which shows wrong directions. Could someone please advise what I am doing wrong here and any suggestion to rectify the error?
I also found some examples in ggplot but could not workout with what I am trying to do. any suggestion would be really appreciated too with any alternative method available just to learn other ways of doing this type of plots. thanks
my sample input data that I am testing with are:
> dput(dat)
structure(list(wd = c(7.5, 22.5, 37.5, 52.5, 67.5, 82.5, 97.5,
112.5, 127.5, 142.5, 157.5, 172.5, 187.5, 202.5, 217.5, 232.5,
247.5, 262.5, 277.5, 292.5, 307.5, 322.5, 337.5, 352.5), Test = c(10.82,
6.75, 6.57, 6.52, 8.48, 9.66, 15.36, 18.97, 29.14, 36.56, 38.65,
44.23, 51.99, 50.83, 51.93, 50.27, 49.35, 52.67, 54.05, 49.69,
43.73, 29.83, 18.94, 17.33)), .Names = c("wd", "Test"), class = "data.frame", row.names = c(NA,
-24L))
Here some example code:
df <- data.frame(wd = c(7.5, 22.5, 37.5, 52.5, 67.5, 82.5, 97.5,
112.5, 127.5, 142.5, 157.5, 172.5, 187.5, 202.5, 217.5, 232.5,
247.5, 262.5, 277.5, 292.5, 307.5, 322.5, 337.5, 352.5),
Test = c(10.82,
6.75, 6.57, 6.52, 8.48, 9.66, 15.36, 18.97, 29.14, 36.56, 38.65,
44.23, 51.99, 50.83, 51.93, 50.27, 49.35, 52.67, 54.05, 49.69,
43.73, 29.83, 18.94, 17.33))
ggplot(df, aes(wd, Test)) +
geom_bar(width=15, stat='identity', color='grey') +
coord_polar()