fable from distribution to confidence interval - r

I manage to use fable for the forecast then get the result
could I have some guidance on how to change this distribution to 80% 95% confidence interval? thank you!
you can use the sample code here to get the distribution
result <–USAccDeaths %>% as_tsibble %>%
model(arima = ARIMA(log(value) ~ pdq(0,1,1) + PDQ(0,1,1)))%>%
forecast(h=12)

The hilo() function allows you to extract confidence intervals from a forecast distribution. It can be used on either the distribution vector, or the fable itself.
library(tidyverse)
library(fable)
result <- as_tsibble(USAccDeaths) %>%
model(arima = ARIMA(log(value) ~ pdq(0,1,1) + PDQ(0,1,1)))%>%
forecast(h=12)
result %>%
mutate(`80%` = hilo(value, 80))
#> # A fable: 12 x 5 [1M]
#> # Key: .model [1]
#> .model index value .mean `80%`
#> <chr> <mth> <dist> <dbl> <hilo>
#> 1 arima 1979 Jan t(N(9, 0.0014)) 8290. [ 7899.082, 8689.169]80
#> 2 arima 1979 Feb t(N(8.9, 0.0018)) 7453. [ 7055.860, 7859.100]80
#> 3 arima 1979 Mar t(N(9, 0.0022)) 8276. [ 7789.719, 8774.054]80
#> 4 arima 1979 Apr t(N(9.1, 0.0025)) 8584. [ 8036.304, 9144.752]80
#> 5 arima 1979 May t(N(9.2, 0.0029)) 9499. [ 8849.860, 10166.302]80
#> 6 arima 1979 Jun t(N(9.2, 0.0033)) 9900. [ 9180.375, 10639.833]80
#> 7 arima 1979 Jul t(N(9.3, 0.0037)) 10988. [10145.473, 11857.038]80
#> 8 arima 1979 Aug t(N(9.2, 0.0041)) 10132. [ 9315.840, 10974.140]80
#> 9 arima 1979 Sep t(N(9.1, 0.0045)) 9138. [ 8368.585, 9933.124]80
#> 10 arima 1979 Oct t(N(9.1, 0.0049)) 9391. [ 8567.874, 10243.615]80
#> 11 arima 1979 Nov t(N(9.1, 0.0052)) 8863. [ 8056.754, 9699.824]80
#> 12 arima 1979 Dec t(N(9.1, 0.0056)) 9356. [ 8474.732, 10271.739]80
result %>%
hilo(level = c(80, 95))
#> # A tsibble: 12 x 6 [1M]
#> # Key: .model [1]
#> .model index value .mean `80%`
#> <chr> <mth> <dist> <dbl> <hilo>
#> 1 arima 1979 Jan t(N(9, 0.0014)) 8290. [ 7899.082, 8689.169]80
#> 2 arima 1979 Feb t(N(8.9, 0.0018)) 7453. [ 7055.860, 7859.100]80
#> 3 arima 1979 Mar t(N(9, 0.0022)) 8276. [ 7789.719, 8774.054]80
#> 4 arima 1979 Apr t(N(9.1, 0.0025)) 8584. [ 8036.304, 9144.752]80
#> 5 arima 1979 May t(N(9.2, 0.0029)) 9499. [ 8849.860, 10166.302]80
#> 6 arima 1979 Jun t(N(9.2, 0.0033)) 9900. [ 9180.375, 10639.833]80
#> 7 arima 1979 Jul t(N(9.3, 0.0037)) 10988. [10145.473, 11857.038]80
#> 8 arima 1979 Aug t(N(9.2, 0.0041)) 10132. [ 9315.840, 10974.140]80
#> 9 arima 1979 Sep t(N(9.1, 0.0045)) 9138. [ 8368.585, 9933.124]80
#> 10 arima 1979 Oct t(N(9.1, 0.0049)) 9391. [ 8567.874, 10243.615]80
#> 11 arima 1979 Nov t(N(9.1, 0.0052)) 8863. [ 8056.754, 9699.824]80
#> 12 arima 1979 Dec t(N(9.1, 0.0056)) 9356. [ 8474.732, 10271.739]80
#> # … with 1 more variable: `95%` <hilo>
To extract the numerical values from a <hilo> object, you can use the unpack_hilo() function, or obtain each part using <hilo>$lower, <hilo>$upper and <hilo>$level.
result %>%
hilo(level = c(80, 95)) %>%
unpack_hilo("80%")
#> # A tsibble: 12 x 7 [1M]
#> # Key: .model [1]
#> .model index value .mean `80%_lower` `80%_upper`
#> <chr> <mth> <dist> <dbl> <dbl> <dbl>
#> 1 arima 1979 Jan t(N(9, 0.0014)) 8290. 7899. 8689.
#> 2 arima 1979 Feb t(N(8.9, 0.0018)) 7453. 7056. 7859.
#> 3 arima 1979 Mar t(N(9, 0.0022)) 8276. 7790. 8774.
#> 4 arima 1979 Apr t(N(9.1, 0.0025)) 8584. 8036. 9145.
#> 5 arima 1979 May t(N(9.2, 0.0029)) 9499. 8850. 10166.
#> 6 arima 1979 Jun t(N(9.2, 0.0033)) 9900. 9180. 10640.
#> 7 arima 1979 Jul t(N(9.3, 0.0037)) 10988. 10145. 11857.
#> 8 arima 1979 Aug t(N(9.2, 0.0041)) 10132. 9316. 10974.
#> 9 arima 1979 Sep t(N(9.1, 0.0045)) 9138. 8369. 9933.
#> 10 arima 1979 Oct t(N(9.1, 0.0049)) 9391. 8568. 10244.
#> 11 arima 1979 Nov t(N(9.1, 0.0052)) 8863. 8057. 9700.
#> 12 arima 1979 Dec t(N(9.1, 0.0056)) 9356. 8475. 10272.
#> # … with 1 more variable: `95%` <hilo>
Created on 2020-04-08 by the reprex package (v0.3.0)

Related

How to forecast multiple time series in R

I have this dataset that contains multiple series (50 products). My dataset has 50 products (50 columns). each column has the daily sales of a product.
I want to forecast these product using ets. So I have created this code below and when I run it I get only one time series and some information that I do not understand. Thanks in advance :)
y<- read.csv("QAO2.csv", header=FALSE, fileEncoding = "latin1")
y <- ts(y[,-1],f=12,s=c(2007, 1))
ns <- ncol(y)
for(i in 1:ns)
fit.ets <- ets(y[,i])
print(fit.ets)
f.ets <- forecast(fit.ets,h=12)
print(f.ets)
plot(f.ets)
This is what the fable package is designed to do. Here is an example using 50 series of monthly data from 2007. Although you say you have daily data, the code you provide assumes monthly data (frequency 12).
library(fable)
library(dplyr)
library(tidyr)
library(ggplot2)
y <- ts(matrix(rnorm(175*50), ncol=50), frequency=12, start=c(2007,1)) %>%
as_tsibble() %>%
rename(Month = index, Sales=value)
y
#> # A tsibble: 8,750 x 3 [1M]
#> # Key: key [50]
#> Month key Sales
#> <mth> <chr> <dbl>
#> 1 2007 Jan Series 1 1.06
#> 2 2007 Feb Series 1 0.495
#> 3 2007 Mar Series 1 0.332
#> 4 2007 Apr Series 1 0.157
#> 5 2007 May Series 1 -0.120
#> 6 2007 Jun Series 1 -0.0846
#> 7 2007 Jul Series 1 -0.743
#> 8 2007 Aug Series 1 0.714
#> 9 2007 Sep Series 1 1.73
#> 10 2007 Oct Series 1 -0.212
#> # … with 8,740 more rows
fit.ets <- y %>% model(ETS(Sales))
fit.ets
#> # A mable: 50 x 2
#> # Key: key [50]
#> key `ETS(Sales)`
#> <chr> <model>
#> 1 Series 1 <ETS(A,N,N)>
#> 2 Series 10 <ETS(A,N,N)>
#> 3 Series 11 <ETS(A,N,N)>
#> 4 Series 12 <ETS(A,N,N)>
#> 5 Series 13 <ETS(A,N,N)>
#> 6 Series 14 <ETS(A,N,N)>
#> 7 Series 15 <ETS(A,N,N)>
#> 8 Series 16 <ETS(A,N,N)>
#> 9 Series 17 <ETS(A,N,N)>
#> 10 Series 18 <ETS(A,N,N)>
#> # … with 40 more rows
f.ets <- forecast(fit.ets, h=12)
f.ets
#> # A fable: 600 x 5 [1M]
#> # Key: key, .model [50]
#> key .model Month Sales .mean
#> <chr> <chr> <mth> <dist> <dbl>
#> 1 Series 1 ETS(Sales) 2021 Aug N(-0.028, 1.1) -0.0279
#> 2 Series 1 ETS(Sales) 2021 Sep N(-0.028, 1.1) -0.0279
#> 3 Series 1 ETS(Sales) 2021 Oct N(-0.028, 1.1) -0.0279
#> 4 Series 1 ETS(Sales) 2021 Nov N(-0.028, 1.1) -0.0279
#> 5 Series 1 ETS(Sales) 2021 Dec N(-0.028, 1.1) -0.0279
#> 6 Series 1 ETS(Sales) 2022 Jan N(-0.028, 1.1) -0.0279
#> 7 Series 1 ETS(Sales) 2022 Feb N(-0.028, 1.1) -0.0279
#> 8 Series 1 ETS(Sales) 2022 Mar N(-0.028, 1.1) -0.0279
#> 9 Series 1 ETS(Sales) 2022 Apr N(-0.028, 1.1) -0.0279
#> 10 Series 1 ETS(Sales) 2022 May N(-0.028, 1.1) -0.0279
#> # … with 590 more rows
f.ets %>%
filter(key == "Series 1") %>%
autoplot(y) +
labs(title = "Series 1")
Created on 2021-08-05 by the reprex package (v2.0.0)

Extracting Confidence Intervals from Forecast Fable

I'm encountering an issue attempting to extract the 90/95% confidence intervals resulting from a forecast model built from a key variable holding 5 groups across a total of 4 forecasting models.
The primary problem is that I'm not familiar with how R treats and works with dist and hilo object types.
The original tsibble has a structure of 60 months for each of the 5 groups (300 observations)
>groups
# A tsibble: 300 x 3 [1M]
# Key: Group [5]
Month Group Measure
<mth> <chr> <dbl>
1 2016 May Group1 8.75
2 2016 Jun Group1 8.5
3 2016 Jul Group1 7
4 2016 Aug Group1 10
5 2016 Sep Group1 2
6 2016 Oct Group1 6
7 2016 Nov Group1 8
8 2016 Dec Group1 0
9 2017 Jan Group1 16
10 2017 Feb Group1 9
... with 290 more rows
I form a model with different forecast methods, as well as a combination model:
groups%>%model(ets=ETS(Measure),
mean=MEAN(Measure),
snaive=SNAIVE(Measure))%>%mutate(combination=(ets+mean+snaive)/3)->groups_avg
This results in a mable of the structure
>groups_avg
# A mable: 5 x 5
# Key: Group [5]
Group ets mean snaive combination
<chr> <model> <mode> <model> <model>
1 Group1 <ETS(A,N,N)> <MEAN> <SNAIVE> <COMBINATION>
2 Group2 <ETS(A,N,N)> <MEAN> <SNAIVE> <COMBINATION>
3 Group3 <ETS(M,N,N)> <MEAN> <SNAIVE> <COMBINATION>
4 Group4 <ETS(A,N,N)> <MEAN> <SNAIVE> <COMBINATION>
5 Group5 <ETS(A,N,N)> <MEAN> <SNAIVE> <COMBINATION>
Which I then forecast out 6 months
groups_avg%>%forecast(h=6,level=c(90,95))->groups_fc
Before generating my idea of what the output tsibble should be:
>firm_fc%>%hilo(level=c(90,95))->firm_hilo
> groups_hilo
# A tsibble: 120 x 7 [1M]
# Key: Group, .model [20]
Group .model Month Measure .mean `90%` `95%`
<chr> <chr> <mth> <dist> <dbl> <hilo> <hilo>
1 CapstoneLaw ets 2021 May N(12, 21) 11.6 [4.1332418, 19.04858]90 [ 2.704550, 20.47727]95
2 CapstoneLaw ets 2021 Jun N(12, 21) 11.6 [4.0438878, 19.13793]90 [ 2.598079, 20.58374]95
3 CapstoneLaw ets 2021 Jul N(12, 22) 11.6 [3.9555794, 19.22624]90 [ 2.492853, 20.68897]95
4 CapstoneLaw ets 2021 Aug N(12, 22) 11.6 [3.8682807, 19.31354]90 [ 2.388830, 20.79299]95
5 CapstoneLaw ets 2021 Sep N(12, 23) 11.6 [3.7819580, 19.39986]90 [ 2.285970, 20.89585]95
6 CapstoneLaw ets 2021 Oct N(12, 23) 11.6 [3.6965790, 19.48524]90 [ 2.184235, 20.99758]95
7 CapstoneLaw mean 2021 May N(8, 21) 7.97 [0.3744124, 15.56725]90 [-1.080860, 17.02253]95
8 CapstoneLaw mean 2021 Jun N(8, 21) 7.97 [0.3744124, 15.56725]90 [-1.080860, 17.02253]95
9 CapstoneLaw mean 2021 Jul N(8, 21) 7.97 [0.3744124, 15.56725]90 [-1.080860, 17.02253]95
10 CapstoneLaw mean 2021 Aug N(8, 21) 7.97 [0.3744124, 15.56725]90 [-1.080860, 17.02253]95
# ... with 110 more rows
As I've done with more simply structured forecasts, I tried to write these forecast results to a csv.
> write.csv(firm_hilo,dir)
Error: Can't convert <hilo> to <character>.
Run `rlang::last_error()` to see where the error occurred.
But I am quite lost on how to coerce the generated 90/95% confidence intervals into a format that I can export. Has anyone encountered this issue?
Please let me know if I should include any more information!

How to apply Fable/Forecast (in R) to this database?

I am trying to forecast a multiple time series with the Fable function in R. It seems the most eficient way to do it, but I am very new using R so I'm currently dealing with a lot of problems. I just wanted to ask someone for advices and Ideas. I already found how to do it just using the forecast function package, but in a way that requires a lot of extra steps.
My data is an excel with 5701 columns and 50 rows. Each Column as the name of a product in the first row and the next 49 values are numbers, representing the sales from January 2017 to January 2021. First, how to transform that table into a tsibble? I know I need to do that in order to work with Fable, but I'm stuck in such a simple step. Then I would like to have as output a table with the monthy forecast for the next 3 semesters (april 2021 to september 2022) with Product|Date|Model Arima(values)|error of arima(value/values)|model ETS|Error of ETS|model Naive|error of naive..etc. My main objective is to get a table with product|best prediccion for april2021/september2021|best prediccion for october2021/march2021|best prediccion for april2022/september2022|
What I was doing was using this code:
newdata <- read_excel("ALLINCOLUMNS.xlsx")
Fcast <- ts(newdata[,1:5701], start= c(1), end=c(49), frequency=12)
output <- lapply(Fcast, function(x) forecast(auto.arima(x)))
prediction <- as.data.frame(output)
write.table(prediction, file= "C:\\Users\\thega\\OneDrive\\Documentos\\finalprediction.csv",sep=",")
Which gave to me, by default, something in the format |product1.Point.Forecast||Product1.Lo.80||Product1.Hi.80|Product1.Lo.95|Product1.Hi.95|Product2.Point.Forecast|...|Product5071.Hi.95|... anyway, I dont need the 80 and 95 intervals, and that made more difficult to me to work in excel with it. How to get something in the format:
|point forecast product 1|point forecast product 2|....|point forecast product 5701|, showing only the forecast? I know that I have to use level=NULL in the forecast function, but it is not working in the ways I had tried. I was planning to do a programming to delete those columns but it is less elegant. Finally, is there any way to show all the errors for the methods in a column? I want to add to my table the best method so I need to verify which as the less error.
The {fable} package works best when data is in a tidy format. In your case, the products should be represented across rows instead of columns.
You can read more about what tidy data is here: https://r4ds.had.co.nz/tidy-data.html
Once you've done that, you can also read about tidy data for time series here: https://otexts.com/fpp3/tsibbles.html
Without having your dataset, I can only guess that your Fcast object (the ts() data) looks something like this:
Fcast <- cbind(mdeaths,fdeaths)
Fcast
#> mdeaths fdeaths
#> Jan 1974 2134 901
#> Feb 1974 1863 689
#> Mar 1974 1877 827
#> Apr 1974 1877 677
#> May 1974 1492 522
#> Jun 1974 1249 406
#> Jul 1974 1280 441
#> and so on ...
That is, each of your products have their own column (and you have 5701 products, not only 2 I'll use in an example).
If you already have the data in a ts object, you can use as_tsibble(<ts>) to convert it to a tidy time series dataset.
library(tsibble)
as_tsibble(Fcast, pivot_longer = TRUE)
#> # A tsibble: 144 x 3 [1M]
#> # Key: key [2]
#> index key value
#> <mth> <chr> <dbl>
#> 1 1974 Jan fdeaths 901
#> 2 1974 Feb fdeaths 689
#> 3 1974 Mar fdeaths 827
#> 4 1974 Apr fdeaths 677
#> 5 1974 May fdeaths 522
#> 6 1974 Jan mdeaths 2134
#> 7 1974 Feb mdeaths 1863
#> 8 1974 Mar mdeaths 1877
#> 9 1974 Apr mdeaths 1877
#> 10 1974 May mdeaths 1492
Created on 2021-02-25 by the reprex package (v0.3.0)
Setting pivot_longer = TRUE will collect the columns into a long format. This format is suitable for the {fable} package. We now have a key column which stores the series name (product ID for your data), and the values are stored in the value column.
With the data in an appropriate format, we can now use auto ARIMA() and forecast() to obtain forecasts:
library(fable)
#> Loading required package: fabletools
as_tsibble(Fcast, pivot_longer = TRUE) %>%
model(ARIMA(value)) %>%
forecast()
#> # A fable: 48 x 5 [1M]
#> # Key: key, .model [2]
#> key .model index value .mean
#> <chr> <chr> <mth> <dist> <dbl>
#> 1 fdeaths ARIMA(value) 1980 Jan N(825, 6184) 825.
#> 2 fdeaths ARIMA(value) 1980 Feb N(820, 6184) 820.
#> 3 fdeaths ARIMA(value) 1980 Mar N(767, 6184) 767.
#> 4 fdeaths ARIMA(value) 1980 Apr N(605, 6184) 605.
#> 5 fdeaths ARIMA(value) 1980 May N(494, 6184) 494.
#> 6 fdeaths ARIMA(value) 1980 Jun N(423, 6184) 423.
#> 7 fdeaths ARIMA(value) 1980 Jul N(414, 6184) 414.
#> 8 fdeaths ARIMA(value) 1980 Aug N(367, 6184) 367.
#> 9 fdeaths ARIMA(value) 1980 Sep N(376, 6184) 376.
#> 10 fdeaths ARIMA(value) 1980 Oct N(442, 6184) 442.
#> # … with 38 more rows
Created on 2021-02-25 by the reprex package (v0.3.0)
You can also compute forecasts from other models by specifying several models in model().
Fcast <- cbind(mdeaths,fdeaths)
library(tsibble)
#>
#> Attaching package: 'tsibble'
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, union
library(fable)
#> Loading required package: fabletools
as_tsibble(Fcast, pivot_longer = TRUE) %>%
model(arima = ARIMA(value), ets = ETS(value), snaive = SNAIVE(value)) %>%
forecast()
#> # A fable: 144 x 5 [1M]
#> # Key: key, .model [6]
#> key .model index value .mean
#> <chr> <chr> <mth> <dist> <dbl>
#> 1 fdeaths arima 1980 Jan N(825, 6184) 825.
#> 2 fdeaths arima 1980 Feb N(820, 6184) 820.
#> 3 fdeaths arima 1980 Mar N(767, 6184) 767.
#> 4 fdeaths arima 1980 Apr N(605, 6184) 605.
#> 5 fdeaths arima 1980 May N(494, 6184) 494.
#> 6 fdeaths arima 1980 Jun N(423, 6184) 423.
#> 7 fdeaths arima 1980 Jul N(414, 6184) 414.
#> 8 fdeaths arima 1980 Aug N(367, 6184) 367.
#> 9 fdeaths arima 1980 Sep N(376, 6184) 376.
#> 10 fdeaths arima 1980 Oct N(442, 6184) 442.
#> # … with 134 more rows
Created on 2021-02-25 by the reprex package (v0.3.0)
The .model column now identifies the model used to produce each forecast, of which there are 3 models.
If you want to focus on point forecasts side by side, you can tidyr::pivot_wider() the forecast .mean values across several columns.
library(tsibble)
library(fable)
library(tidyr)
Fcast <- cbind(mdeaths,fdeaths)
as_tsibble(Fcast, pivot_longer = TRUE) %>%
model(arima = ARIMA(value), ets = ETS(value), snaive = SNAIVE(value)) %>%
forecast() %>%
as_tibble() %>%
pivot_wider(id_cols = c("key", "index"), names_from = ".model", values_from = ".mean")
#> # A tibble: 48 x 5
#> key index arima ets snaive
#> <chr> <mth> <dbl> <dbl> <dbl>
#> 1 fdeaths 1980 Jan 825. 789. 821
#> 2 fdeaths 1980 Feb 820. 812. 785
#> 3 fdeaths 1980 Mar 767. 746. 727
#> 4 fdeaths 1980 Apr 605. 592. 612
#> 5 fdeaths 1980 May 494. 479. 478
#> 6 fdeaths 1980 Jun 423. 413. 429
#> 7 fdeaths 1980 Jul 414. 394. 405
#> 8 fdeaths 1980 Aug 367. 355. 379
#> 9 fdeaths 1980 Sep 376. 365. 393
#> 10 fdeaths 1980 Oct 442. 443. 411
#> # … with 38 more rows
Created on 2021-02-25 by the reprex package (v0.3.0)
You can learn how to evaluate accuracy of these models/forecasts here: https://otexts.com/fpp3/accuracy.html

Aggregating forecasts using Fable

Issue:
Using fable I can easily produce forecasts on a time series with a grouped structure, and can even use Fable's aggregate_key/ reconcile syntax to produce a coherent top-level forecast. However I'm unable to easily access the aggregate forecasts using this method, and the alternative I'm using involves abandoning the fable (forecast table) structure. Can anyone tell me if there's an easier/intended way to do this using the package? As you can see in the examples, I'm able to get there using other methods, but I'd like to know if there's a better way. Any help gratefully received!
Approach 1:
My efforts to summarise the forecast without using aggregate_key/ reconcile have been mainly using dplyr's group_by and summarise, however the prediction interval for the forecast is formatted as a normal distribution object, which doesn't seem to support summing using this method. To get around this I've been using hilo and unpack_hilo to extract bounds for different prediction intervals, which can then be summed using the usual method. However I'd really like to retain the fable structure and the distribution objects, which is impossible using this method.
Approach 2:
The alternative, using aggregate_key/ reconcile only seems to support aggregation using min_trace. I understand that this method is for optimum reconciliation, whereas what I want is a simple bottom-up aggregate forecast. It feels like there should be an easy way to get bottom-up forecasts using this syntax, but I haven't found one so far. Moreover, even using min_trace I'm unsure how to access the aggregate forecast itself as you can see in the example!
Example using approach 1:
library(fable)
#> Loading required package: fabletools
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
lung_deaths_agg <- as_tsibble(cbind(mdeaths, fdeaths))
fc_1 <- lung_deaths_agg %>%
model(lm = TSLM(value ~ trend() + season())) %>%
forecast()
fc_1
#> # A fable: 48 x 5 [1M]
#> # Key: key, .model [2]
#> key .model index value .mean
#> <chr> <chr> <mth> <dist> <dbl>
#> 1 fdeaths lm 1980 Jan N(794, 5940) 794.
#> 2 fdeaths lm 1980 Feb N(778, 5940) 778.
#> 3 fdeaths lm 1980 Mar N(737, 5940) 737.
#> 4 fdeaths lm 1980 Apr N(577, 5940) 577.
#> 5 fdeaths lm 1980 May N(456, 5940) 456.
#> 6 fdeaths lm 1980 Jun N(386, 5940) 386.
#> 7 fdeaths lm 1980 Jul N(379, 5940) 379.
#> 8 fdeaths lm 1980 Aug N(335, 5940) 335.
#> 9 fdeaths lm 1980 Sep N(340, 5940) 340.
#> 10 fdeaths lm 1980 Oct N(413, 5940) 413.
#> # ... with 38 more rows
fc_1 %>%
hilo() %>%
unpack_hilo(c(`80%`, `95%`)) %>%
as_tibble() %>%
group_by(index) %>%
summarise(across(c(.mean, ends_with("upper"), ends_with("lower")), sum))
#> `summarise()` ungrouping output (override with `.groups` argument)
#> # A tibble: 24 x 6
#> index .mean `80%_upper` `95%_upper` `80%_lower` `95%_lower`
#> <mth> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1980 Jan 2751. 3089. 3267. 2414. 2236.
#> 2 1980 Feb 2687. 3024. 3202. 2350. 2171.
#> 3 1980 Mar 2535. 2872. 3051. 2198. 2020.
#> 4 1980 Apr 2062. 2399. 2577. 1725. 1546.
#> 5 1980 May 1597. 1934. 2113. 1260. 1082.
#> 6 1980 Jun 1401. 1738. 1916. 1064. 885.
#> 7 1980 Jul 1343. 1680. 1858. 1006. 827.
#> 8 1980 Aug 1200. 1538. 1716. 863. 685.
#> 9 1980 Sep 1189. 1527. 1705. 852. 674.
#> 10 1980 Oct 1482. 1819. 1998. 1145. 967.
#> # ... with 14 more rows
Example using approach 2:
fc_2 <- lung_deaths_agg %>%
aggregate_key(key, value = sum(value)) %>%
model(lm = TSLM(value ~ trend() + season())) %>%
reconcile(lm = min_trace(lm)) %>%
forecast()
fc_2
#> # A fable: 72 x 5 [1M]
#> # Key: key, .model [3]
#> key .model index value .mean
#> <chr> <chr> <mth> <dist> <dbl>
#> 1 fdeaths lm 1980 Jan N(794, 5606) 794.
#> 2 fdeaths lm 1980 Feb N(778, 5606) 778.
#> 3 fdeaths lm 1980 Mar N(737, 5606) 737.
#> 4 fdeaths lm 1980 Apr N(577, 5606) 577.
#> 5 fdeaths lm 1980 May N(456, 5606) 456.
#> 6 fdeaths lm 1980 Jun N(386, 5606) 386.
#> 7 fdeaths lm 1980 Jul N(379, 5606) 379.
#> 8 fdeaths lm 1980 Aug N(335, 5606) 335.
#> 9 fdeaths lm 1980 Sep N(340, 5606) 340.
#> 10 fdeaths lm 1980 Oct N(413, 5606) 413.
#> # ... with 62 more rows
fc_2 %>% as_tibble() %>% select(key) %>% slice(50:55)
#> # A tibble: 6 x 1
#> key
#> <chr>
#> 1 <aggregated>
#> 2 <aggregated>
#> 3 <aggregated>
#> 4 <aggregated>
#> 5 <aggregated>
#> 6 <aggregated>
fc_2 %>% as_tibble() %>% select(key) %>% filter(key == "<aggregated>")
#> # A tibble: 0 x 1
#> # ... with 1 variable: key <chr>
Approach 1:
Working with distributions requires more care (than numbers) when adding things together. More specifically, the mean of a Normal distribution can be added without issue:
library(distributional)
mean(dist_normal(2,3) + dist_normal(4,1))
#> [1] 6
mean(dist_normal(2,3)) + mean(dist_normal(4,1))
#> [1] 6
Created on 2020-07-03 by the reprex package (v0.3.0)
However the quantiles (used to produce your 80% and 95% intervals) cannot:
library(distributional)
quantile(dist_normal(2,3) + dist_normal(4,1), 0.9)
#> [1] 10.05262
quantile(dist_normal(2,3), 0.9) + quantile(dist_normal(4,1), 0.9)
#> [1] 11.12621
Created on 2020-07-03 by the reprex package (v0.3.0)
If you want to aggregate distributions, you'll need to compute the sum on the distribution itself:
library(fable)
library(dplyr)
lung_deaths_agg <- as_tsibble(cbind(mdeaths, fdeaths))
fc_1 <- lung_deaths_agg %>%
model(lm = fable::TSLM(value ~ trend() + season())) %>%
forecast()
fc_1 %>%
summarise(value = sum(value), .mean = mean(value))
#> # A fable: 24 x 3 [1M]
#> index value .mean
#> <mth> <dist> <dbl>
#> 1 1980 Jan N(2751, 40520) 2751.
#> 2 1980 Feb N(2687, 40520) 2687.
#> 3 1980 Mar N(2535, 40520) 2535.
#> 4 1980 Apr N(2062, 40520) 2062.
#> 5 1980 May N(1597, 40520) 1597.
#> 6 1980 Jun N(1401, 40520) 1401.
#> 7 1980 Jul N(1343, 40520) 1343.
#> 8 1980 Aug N(1200, 40520) 1200.
#> 9 1980 Sep N(1189, 40520) 1189.
#> 10 1980 Oct N(1482, 40520) 1482.
#> # … with 14 more rows
Created on 2020-07-03 by the reprex package (v0.3.0)
Note that this will require the development versions of fabletools (>=0.2.0.9000) and distributional (>=0.1.0.9000) as I have added new features to make this example work.
Approach 2:
Experimental support for bottom up reconciliation is available using fabletools:::bottom_up(). This is currently an internal function as I'm still working on some details of how reconciliation can be done more generally in fabletools.
Matching aggregated values should be done with is_aggregated().
fc_2 <- lung_deaths_agg %>%
aggregate_key(key, value = sum(value)) %>%
model(lm = TSLM(value ~ trend() + season())) %>%
reconcile(lm = min_trace(lm)) %>%
forecast()
fc_2 %>%
filter(is_aggregated(key))
#> # A fable: 24 x 5 [1M]
#> # Key: key, .model [1]
#> key .model index value .mean
#> <chr> <chr> <mth> <dist> <dbl>
#> 1 <aggregated> lm 1980 Jan N(2751, 24989) 2751.
#> 2 <aggregated> lm 1980 Feb N(2687, 24989) 2687.
#> 3 <aggregated> lm 1980 Mar N(2535, 24989) 2535.
#> 4 <aggregated> lm 1980 Apr N(2062, 24989) 2062.
#> 5 <aggregated> lm 1980 May N(1597, 24989) 1597.
#> 6 <aggregated> lm 1980 Jun N(1401, 24989) 1401.
#> 7 <aggregated> lm 1980 Jul N(1343, 24989) 1343.
#> 8 <aggregated> lm 1980 Aug N(1200, 24989) 1200.
#> 9 <aggregated> lm 1980 Sep N(1189, 24989) 1189.
#> 10 <aggregated> lm 1980 Oct N(1482, 24989) 1482.
#> # … with 14 more rows
Created on 2020-07-03 by the reprex package (v0.3.0)
Comparing an aggregated vector with "<aggregated>" is ambiguous, as your key's character value may be "<aggregated>" without the value being <aggregated>. I've now updated fabletools to match "<aggregated>" with aggregated values with a warning and hint, so this code now gives:
fc_2 %>%
filter(key == "<aggregated>")
#> Warning: <aggregated> character values have been converted to aggregated values.
#> Hint: If you're trying to compare aggregated values, use `is_aggregated()`.
#> # A fable: 24 x 5 [1M]
#> # Key: key, .model [1]
#> key .model index value .mean
#> <chr> <chr> <mth> <dist> <dbl>
#> 1 <aggregated> lm 1980 Jan N(2751, 24989) 2751.
#> 2 <aggregated> lm 1980 Feb N(2687, 24989) 2687.
#> 3 <aggregated> lm 1980 Mar N(2535, 24989) 2535.
#> 4 <aggregated> lm 1980 Apr N(2062, 24989) 2062.
#> 5 <aggregated> lm 1980 May N(1597, 24989) 1597.
#> 6 <aggregated> lm 1980 Jun N(1401, 24989) 1401.
#> 7 <aggregated> lm 1980 Jul N(1343, 24989) 1343.
#> 8 <aggregated> lm 1980 Aug N(1200, 24989) 1200.
#> 9 <aggregated> lm 1980 Sep N(1189, 24989) 1189.
#> 10 <aggregated> lm 1980 Oct N(1482, 24989) 1482.
#> # … with 14 more rows
Created on 2020-07-03 by the reprex package (v0.3.0)

How to generate future forecasts with dates using auto.arima in R?

i would like to generate forecasts using auto.arima however i dont see future dates populated. How can i get future forecasts with date. I am having weekly data, want to generate forecasts upto Dec 2020
i am using forecast package in R
fit <- auto.arima(zoo_ts)
fcast <- forecast(fit, h=83)
Need weekly forecast from july 2019 with dates having weekly interval. I am not providing any data. Can anyone share how to do it will be great
The forecast package uses ts objects, which are not great for weekly data. The time index is stored numerically in terms of years. So 2019.5385 means week 28 of 2019 (as 28/52 = 0.5385).
An alternative is to use the fable and tsibble packages. Here is an example using weekly data.
library(tsibble)
library(fable)
library(fpp3) # For the data
# Fit the model
fit <- us_gasoline %>% model(arima = ARIMA(Barrels))
# Produce forecasts
fcast <- forecast(fit, h = 83)
fcast
#> # A fable: 83 x 4 [1W]
#> # Key: .model [1]
#> .model Week Barrels .distribution
#> <chr> <week> <dbl> <dist>
#> 1 arima 2017 W04 8.30 N(8.3, 0.072)
#> 2 arima 2017 W05 8.44 N(8.4, 0.077)
#> 3 arima 2017 W06 8.53 N(8.5, 0.082)
#> 4 arima 2017 W07 8.59 N(8.6, 0.086)
#> 5 arima 2017 W08 8.48 N(8.5, 0.091)
#> 6 arima 2017 W09 8.49 N(8.5, 0.096)
#> 7 arima 2017 W10 8.61 N(8.6, 0.101)
#> 8 arima 2017 W11 8.52 N(8.5, 0.106)
#> 9 arima 2017 W12 8.58 N(8.6, 0.111)
#> 10 arima 2017 W13 8.47 N(8.5, 0.115)
#> # … with 73 more rows
The time index is stored in weeks here. This can be converted to a date using as.Date:
# Convert weekly index to a date
fcast %>% mutate(date = as.Date(Week))
#> # A fable: 83 x 5 [1W]
#> # Key: .model [1]
#> .model Week Barrels .distribution date
#> <chr> <week> <dbl> <dist> <date>
#> 1 arima 2017 W04 8.30 N(8.3, 0.072) 2017-01-23
#> 2 arima 2017 W05 8.44 N(8.4, 0.077) 2017-01-30
#> 3 arima 2017 W06 8.53 N(8.5, 0.082) 2017-02-06
#> 4 arima 2017 W07 8.59 N(8.6, 0.086) 2017-02-13
#> 5 arima 2017 W08 8.48 N(8.5, 0.091) 2017-02-20
#> 6 arima 2017 W09 8.49 N(8.5, 0.096) 2017-02-27
#> 7 arima 2017 W10 8.61 N(8.6, 0.101) 2017-03-06
#> 8 arima 2017 W11 8.52 N(8.5, 0.106) 2017-03-13
#> 9 arima 2017 W12 8.58 N(8.6, 0.111) 2017-03-20
#> 10 arima 2017 W13 8.47 N(8.5, 0.115) 2017-03-27
#> # … with 73 more rows
Created on 2019-10-16 by the reprex package (v0.3.0)

Resources