indicate ts frequency by group in R - r

timeseries=structure(list(Data = structure(c(10L, 14L, 18L, 22L, 26L, 29L,
32L, 35L, 38L, 1L, 4L, 7L, 11L, 15L, 19L, 23L, 27L, 30L, 33L,
36L, 39L, 2L, 5L, 8L, 12L, 16L, 20L, 24L, 28L, 31L, 34L, 37L,
40L, 3L, 6L, 9L, 13L, 17L, 21L, 25L), .Label = c("01.01.2018",
"01.01.2019", "01.01.2020", "01.02.2018", "01.02.2019", "01.02.2020",
"01.03.2018", "01.03.2019", "01.03.2020", "01.04.2017", "01.04.2018",
"01.04.2019", "01.04.2020", "01.05.2017", "01.05.2018", "01.05.2019",
"01.05.2020", "01.06.2017", "01.06.2018", "01.06.2019", "01.06.2020",
"01.07.2017", "01.07.2018", "01.07.2019", "01.07.2020", "01.08.2017",
"01.08.2018", "01.08.2019", "01.09.2017", "01.09.2018", "01.09.2019",
"01.10.2017", "01.10.2018", "01.10.2019", "01.11.2017", "01.11.2018",
"01.11.2019", "01.12.2017", "01.12.2018", "01.12.2019"), class = "factor"),
client = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("Horns", "Kornev"), class = "factor"), stuff = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("chickens",
"hooves", "Oysters"), class = "factor"), Sales = c(374L,
12L, 120L, 242L, 227L, 268L, 280L, 419L, 12L, 172L, 336L,
117L, 108L, 150L, 90L, 117L, 116L, 146L, 120L, 211L, 213L,
67L, 146L, 118L, 152L, 122L, 201L, 497L, 522L, 65L, 268L,
441L, 247L, 348L, 445L, 477L, 62L, 226L, 476L, 306L)), .Names = c("Data",
"client", "stuff", "Sales"), class = "data.frame", row.names = c(NA,
-40L))
here three groups
Kornev Oysters 01.03.2018 - 01.06.2019
Horns hooves 01.07.2019 - 01.07.2020
Horns chickens 01.04.2017 - 01.02.2018
As we can see these groups have different time start.
in my code
create forecast by group
# first the grouping variable
timeseries$group <- paste0(timeseries$client,timeseries$stuff)
# now the list
listed <- split(timeseries,timeseries$group)
# to have a smaller output, I post the str(listed)
library("forecast")
library("lubridate")
listed_ts <- lapply(listed,
function(x) ts(x[["Sales"]], start = c(2017, 1), frequency = 12) )
listed_ts
listed_arima <- lapply(listed_ts,function(x) auto.arima(x) )
#Now the forecast for each arima:
listed_forecast <- lapply(listed_arima,function(x) forecast(x,5) )
listed_forecast
do.call(rbind,listed_forecast)
get initial values
lapply(listed_arima, fitted)
lapply(listed_arima, function(x) fitted(x))
listed_arima <- lapply(listed_ts, auto.arima)
The row where indicate start -
listed_ts <- lapply(listed,
function(x) ts(x[["Sales"]], start = c(2017, 1), frequency = 12) )
but some groups can start from 2018.start = c(2018, 3)
How to do that for each group individually indicated the starting time!?
I can't indicate start time for each group separately manually, cause can be more than 100 groups

This might be of help
# determine all groups
groups <- unique(timeseries$group)
# find starting date per group and save them as a list of elements c('YEAR','Month')
timeseries$date <- as.Date(as.character(timeseries$Data), '%d.%m.%Y')
timeseries <- timeseries[order(timeseries$date),]
start_dates <- format(timeseries$date[match(groups, timeseries$group)], "%Y %m")
start_dates <- strsplit(start_dates, ' ')
# Back to your code
# now the list
listed <- split(timeseries,timeseries$group)
# Edited the lapply funcion in order to consider the starting dates
# to have a smaller output, I post the str(listed)
listed_ts <- lapply(seq_along(listed),
function(k) ts(listed[[k]][["Sales"]], start = as.integer(start_dates[[k]]), frequency = 12) )
Explanation
Figure out for each group what the starting date is and save it as a vector c('Year','Month')
All those starting dates are saved in a list start_dates
modify the lapply function in order to consider the starting dates

Related

Splitting one column into two with kable / kableExtra in R

this is my first so please be patient with me.
I want to split one column of a tibble into two columns depending on the value of a third column.
My table looks like this so far
Wertetabelle <- tibble(DAT$Tag, DAT$Lauf, DAT$Replikate, DAT$Wert) %>% group_by(DAT$Lauf)
Wertetabelle %>%
mutate_all(linebreak) %>%
kable(booktabs = T, digits = 2,
caption = "Rohdaten der PCR Messungen",
col.names = linebreak(c("Tag", " Lauf", "Replikat", "Wert"), align = "r")) %>%
kable_styling(latex_options = c("striped", "hold_position"))
This, unfortunately, gives me a very long table. The column "Wert" has at least 80 values.
So depending on the "Replikat" column which has two values (1:2) I could split up "Wert" into two columns with 40 values each.
Unfortunately, the group_by doesn't work, it seems.
Do you have any idea?
Tag has 20 values 1:20
Lauf has 2 values 1:2
Replikat has 2 values 1:2
Wert is numeric
Best
Werek
as requested please find the results of dput(.)
structure(list(`DAT$Tag` = structure(c(1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L), .Label = c("1", "2", "3", "4",
"5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15",
"16", "17", "18", "19", "20"), class = "factor"), `DAT$Lauf` = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1",
"2"), class = "factor"), `DAT$Replikate` = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1",
"2"), class = "factor"), `DAT$Wert` = c(242L, 243L, 247L, 249L,
246L, 244L, 241L, 245L, 243L, 244L, 252L, 249L, 242L, 246L, 247L,
240L, 241L, 244L, 241L, 247L, 246L, 242L, 239L, 241L, 242L, 245L,
246L, 245L, 239L, 246L, 251L, 248L, 240L, 249L, 248L, 238L, 244L,
244L, 239L, 240L, 245L, 238L, 241L, 250L, 243L, 251L, 245L, 243L,
244L, 247L, 247L, 251L, 251L, 248L, 245L, 239L, 245L, 237L, 247L,
245L, 246L, 238L, 240L, 245L, 240L, 247L, 247L, 245L, 245L, 239L,
241L, 246L, 245L, 240L, 246L, 242L, 248L, 242L, 245L, 242L)), row.names = c(NA,
-80L), groups = structure(list(`DAT$Lauf` = structure(1:2, .Label = c("1",
"2"), class = "factor"), .rows = structure(list(c(1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 20L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L,
51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L), c(21L, 22L,
23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L,
36L, 37L, 38L, 39L, 40L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L,
69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L)), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = 1:2, class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))

Forecast: list of length 3 not meaningful when calculate MAPE in R

In this data
timeseries=structure(list(Data = structure(c(10L, 14L, 18L, 22L, 26L, 29L,
32L, 35L, 38L, 1L, 4L, 7L, 11L, 15L, 19L, 23L, 27L, 30L, 33L,
36L, 39L, 2L, 5L, 8L, 12L, 16L, 20L, 24L, 28L, 31L, 34L, 37L,
40L, 3L, 6L, 9L, 13L, 17L, 21L, 25L), .Label = c("01.01.2018",
"01.01.2019", "01.01.2020", "01.02.2018", "01.02.2019", "01.02.2020",
"01.03.2018", "01.03.2019", "01.03.2020", "01.04.2017", "01.04.2018",
"01.04.2019", "01.04.2020", "01.05.2017", "01.05.2018", "01.05.2019",
"01.05.2020", "01.06.2017", "01.06.2018", "01.06.2019", "01.06.2020",
"01.07.2017", "01.07.2018", "01.07.2019", "01.07.2020", "01.08.2017",
"01.08.2018", "01.08.2019", "01.09.2017", "01.09.2018", "01.09.2019",
"01.10.2017", "01.10.2018", "01.10.2019", "01.11.2017", "01.11.2018",
"01.11.2019", "01.12.2017", "01.12.2018", "01.12.2019"), class = "factor"),
client = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("Horns", "Kornev"), class = "factor"), stuff = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("chickens",
"hooves", "Oysters"), class = "factor"), Sales = c(374L,
12L, 120L, 242L, 227L, 268L, 280L, 419L, 12L, 172L, 336L,
117L, 108L, 150L, 90L, 117L, 116L, 146L, 120L, 211L, 213L,
67L, 146L, 118L, 152L, 122L, 201L, 497L, 522L, 65L, 268L,
441L, 247L, 348L, 445L, 477L, 62L, 226L, 476L, 306L)), .Names = c("Data",
"client", "stuff", "Sales"), class = "data.frame", row.names = c(NA,
-40L))
Create forecast by group
# first the grouping variable
timeseries$group <- paste0(timeseries$client,timeseries$stuff)
# determine all groups
groups <- unique(timeseries$group)
# find starting date per group and save them as a list of elements c('YEAR','Month')
timeseries$date <- as.Date(as.character(timeseries$Data), '%d.%m.%Y')
timeseries <- timeseries[order(timeseries$date),]
start_dates <- format(timeseries$date[match(groups, timeseries$group)], "%Y %m")
start_dates <- strsplit(start_dates, ' ')
# now the list
listed <- split(timeseries,timeseries$group)
str(listed)
# Edited the lapply funcion in order to consider the starting dates
# to have a smaller output, I post the str(listed)
library("forecast")
library("lubridate")
listed_ts <- lapply(seq_along(listed),
function(k) ts(listed[[k]][["Sales"]], start = as.integer(start_dates[[k]]), frequency = 12) )
listed_ts
listed_arima <- lapply(listed_ts,function(x) auto.arima(x,allowmean = F ))
#Now the forecast for each arima:
listed_forecast <- lapply(listed_arima,function(x) forecast(x,5) )
listed_forecast
do.call(rbind,listed_forecast)
lapply(listed_arima, fitted)
#As a side comment, note that the solution is equivalent to
lapply(listed_arima, function(x) fitted(x))
#For the same reason you may also use AIC Metrix
listed_arima <- lapply(listed_ts, auto.arima)
So I want calculate MAPE using library("MLmetrics")
Let's check help
?MAPE(y_pred, y_true)
y_true is timeseries data and y_pred is the result of lapply(listed_arima, fitted)
So I do so
MAPE(lapply(listed_arima, fitted), timeseries)
and get the error
Error in Ops.data.frame(y_true, y_pred) : list of length 3 not meaningful
What's wrong? Why I can't calculate MAPE metrics using this function from library("MLmetric)?
How for each group i can calculate MAPE?
So as output i want data frame like in my example
How to reach this output?
What you need is
mapply(MAPE, lapply(listed_arima, fitted), split(timeseries$Sales, timeseries$group))
# [1] 3.4659421 0.8926123 0.2577634
In this way we apply MAPE to each pair of elements of lists lapply(listed_arima, fitted) and split(timeseries$Sales, timeseries$group).
The issue with
MAPE(lapply(listed_arima, fitted), timeseries)
is that lapply(listed_arima, fitted) is a list while the first argument to MAPE has to be a vector, and also that timeseries is a data frame rather than just a single column.

Display The sequence of statistical criteria for the time series by groups in data.frame using R

data sample
timeseries=structure(list(Data = structure(c(10L, 14L, 18L, 22L, 26L, 29L,
32L, 35L, 38L, 1L, 4L, 7L, 11L, 15L, 19L, 23L, 27L, 30L, 33L,
36L, 39L, 2L, 5L, 8L, 12L, 16L, 20L, 24L, 28L, 31L, 34L, 37L,
40L, 3L, 6L, 9L, 13L, 17L, 21L, 25L), .Label = c("01.01.2018",
"01.01.2019", "01.01.2020", "01.02.2018", "01.02.2019", "01.02.2020",
"01.03.2018", "01.03.2019", "01.03.2020", "01.04.2017", "01.04.2018",
"01.04.2019", "01.04.2020", "01.05.2017", "01.05.2018", "01.05.2019",
"01.05.2020", "01.06.2017", "01.06.2018", "01.06.2019", "01.06.2020",
"01.07.2017", "01.07.2018", "01.07.2019", "01.07.2020", "01.08.2017",
"01.08.2018", "01.08.2019", "01.09.2017", "01.09.2018", "01.09.2019",
"01.10.2017", "01.10.2018", "01.10.2019", "01.11.2017", "01.11.2018",
"01.11.2019", "01.12.2017", "01.12.2018", "01.12.2019"), class = "factor"),
client = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("Horns", "Kornev"), class = "factor"), stuff = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("chickens",
"hooves", "Oysters"), class = "factor"), Sales = c(374L,
12L, 120L, 242L, 227L, 268L, 280L, 419L, 12L, 172L, 336L,
117L, 108L, 150L, 90L, 117L, 116L, 146L, 120L, 211L, 213L,
67L, 146L, 118L, 152L, 122L, 201L, 497L, 522L, 65L, 268L,
441L, 247L, 348L, 445L, 477L, 62L, 226L, 476L, 306L)), .Names = c("Data",
"client", "stuff", "Sales"), class = "data.frame", row.names = c(NA,
-40L))
I need check timeseries on trend and seasonal using not acf function,
but some criterions for it. For serias of each group Cleint+Stuff.
#test adf
library("tseries")
adf.test(timeseries$Sales)
then
Seasonal Mann-Kendall Trend Test
library("trend")
res <- smk.test(timeseries$Sales)
and
#Cox and Stuart Trend Test
cs.test(timeseries$Sales)
The result of these tests should be in data.frame format for each group
How it can be done?
Edit
w=structure(list(Sales = c(18175L, 20015L, 48049L, 62826L, 34804L,
33105L, 38384L, 42316L, 44577L, 24939L, 15908L, 24859L, 13879L,
18739L, 13202L, 29653L, 30371L, 29638L, 5495L, 56932L, 1091L,
5906L, 8229L, 239L, 102L, 8L, 263L, 26L), group = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("Sales", "group"
), class = "data.frame", row.names = c(NA, -28L))
transform to ts object
w=ts(mydat$new,frequency = 12,start=c(2015,1))
library(dplyr);w %>% group_by(group) %>% summarise(stat = cs.test(Sales)$statistic, pval = cs.test(Sales)$p.value)

R:splitting the dataset into groups for forecast

i have dataset and i have to perform daily forecast splited by groups.
The group is client+stuff
ts <- read.csv("C:/Users/Admin/Desktop/mydat.csv",sep=";", dec=",")
here mydat
structure(list(Data = structure(c(1L, 3L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 2L, 4L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 21L, 22L, 23L, 24L, 25L, 26L), .Label = c("01.04.2017",
"01.06.2017", "02.04.2017", "02.06.2017", "03.04.2017", "04.04.2017",
"05.04.2017", "06.04.2017", "07.04.2017", "08.04.2017", "09.04.2017",
"10.04.2017", "11.04.2017", "12.05.2017", "13.05.2017", "14.05.2017",
"15.05.2017", "16.05.2017", "17.05.2017", "18.05.2017", "19.05.2017",
"20.05.2017", "21.05.2017", "22.05.2017", "23.05.2017", "24.05.2017",
"25.05.2017", "26.05.2017", "27.05.2017", "28.05.2017", "29.05.2017",
"30.05.2017", "31.05.2017"), class = "factor"), client = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Horns and hooves", "Kornev & Co."
), class = "factor"), stuff = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("chickens", "hooves", "Oysters"), class = "factor"),
Продажи = c(374L, 12L, 120L, 242L, 227L, 268L, 280L, 419L,
12L, 172L, 336L, 117L, 108L, 150L, 90L, 117L, 116L, 146L,
120L, 211L, 213L, 67L, 146L, 118L, 152L, 122L, 201L, 497L,
522L, 65L, 268L, 441L, 247L, 348L, 445L, 477L, 62L, 226L,
476L, 306L)), .Names = c("Data", "client", "stuff", "Продажи"
), class = "data.frame", row.names = c(NA, -40L))
of course I can manually separate three datasets
horns and hooves + hooves
Horns and hooves + chickens
Kornev & Co. + oysters
but what to do in the case when I have a huge dataset and there are hundreds of groups. Do not manually split.
Is it possible to split it in R into groups and then perform a forecast?
the code for forecast is simple
The first i do so
library(forecast)
library(lubridate)
msts <- msts(ts$sales,seasonal.periods = c(7,365.25),start = decimal_date(as.Date("2017-05-12")))
plot(msts, main="sales", xlab="Year", ylab="sales")
tbats <- tbats(msts)
plot(tbats, main="Multiple Season Decomposition")
sp<- predict(tbats,h=14) #14 days forecast
plot(sp, main = "TBATS Forecast", include=14)
print(sp)
if the result does not suit me, I'm perform forecast via dummy variables
tsw <- ts(ts$Sales, start = decimal_date(as.Date("2017-05-12")), frequency = 7)
View(tsw)
mytslm <- tslm(tsw ~ trend + season)
print(mytslm)
residarima1 <- auto.arima(mytslm$residuals)
residualsArimaForecast <- forecast(residarima1, h=14)
residualsF <- as.numeric(residualsArimaForecast$mean)
regressionForecast <- forecast(mytslm,h=14)
regressionF <- as.numeric(regressionForecast$mean)
forecastR <- regressionF+residualsF
print(forecastR)
You can use split to split the data into groups by a combination of factors, in this case columns client and stuff.
group_list <- split(mydat, list(mydat$client, mydat$stuff))
group_list <- group_list[sapply(group_list, function(x) nrow(x) != 0)]
Then you can use this list and lapply any function you want. The following is how you would perform your first forecast. Note that I have separated the forecast code from the plots code and that each step of the forecast is done by one function, first apply function msts and produce a list of such objects, then apply function tbats and produce another list.
fun_msts <- function(ts){
msts(ts$Sales, seasonal.periods = c(7,365.25), start = decimal_date(as.Date("2017-05-12")))
}
fun_sp <- function(m){
tbats <- tbats(m)
predict(tbats, h=14) #14 days forecast
}
msts_list <- lapply(group_list, fun_msts)
sp_list <- lapply(msts_list, fun_sp)
Now if you want to, you can plot the results. In order to do that, define two other functions to be lapplyed.
plot_msts <- function(m, new.window = TRUE){
if(new.window) windows()
plot(m, main="Sales", xlab="Year", ylab="Sales")
}
plot_sp <- function(sp, new.window = TRUE){
if(new.window) windows()
plot(sp, main = "TBATS Forecast", include = 14)
}
lapply(msts_list, plot_msts)
lapply(sp_list, plot_sp)
In these functions a new graphic device is open with function windows. If you are not using Microsoft Windows or if you want to open another type of device, change that instruction but keep the if(new.window).
EDIT.
As for the regression with dummy variables, you can do the following.
fun_tslm <- function(x, start = "2017-05-12", freq = 7){
tsw <- ts(x[["Sales"]], start = decimal_date(as.Date(start)), frequency = freq)
#View(tsw)
mytslm <- tslm(tsw ~ trend + season)
mytslm
}
fun_forecast <- function(x, h = 14){
residarima1 <- auto.arima(x[["residuals"]])
residualsArimaForecast <- forecast(residarima1, h = h)
residualsF <- as.numeric(residualsArimaForecast$mean)
regressionForecast <- forecast(x, h = h)
regressionF <- as.numeric(regressionForecast$mean)
forecastR <- regressionF + residualsF
forecastR
}
tslm_list <- lapply(group_list, fun_tslm)
fore_list <- lapply(tslm_list, fun_forecast)

In R, how do I order within a single column so one category is ascending and one is descending?

I am generating multiple experimental designs of different sizes and shapes. This is done using a function dependent on the agricolae package (I’ve included it below). To generate practical data sheets for field operations I need to order the data frame by Row, then for odd Rows sort the Range ascending and for even Rows sort it descending.
Using sort, order, rep and seq I have been able to find a simple solution to this. Any suggestions are greatly appreciated!
So the data frame will go from something like this:
df1 <- structure(list(Block = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), Range = c(1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), Row = c(1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L,
9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L
), Plot = c(101L, 201L, 301L, 401L, 102L, 202L, 302L, 402L, 103L,
203L, 303L, 403L, 104L, 204L, 304L, 404L, 105L, 205L, 305L, 405L,
106L, 206L, 306L, 406L, 107L, 207L, 307L, 407L, 108L, 208L, 308L,
408L, 109L, 209L, 309L, 409L, 110L, 210L, 310L, 410L, 111L, 211L,
311L, 411L, 112L, 212L, 312L, 412L), Entry.Num = c(14L, 26L,
18L, 4L, 52L, 17L, 41L, 47L, 40L, 30L, 21L, 12L, 9L, 2L, 8L,
36L, 25L, 43L, 15L, 6L, 33L, 48L, 54L, 37L, 9L, 18L, 8L, 41L,
48L, 28L, 7L, 47L, 54L, 38L, 46L, 23L, 19L, 1L, 3L, 27L, 36L,
14L, 12L, 33L, 16L, 24L, 31L, 2L)), .Names = c("Block", "Range",
"Row", "Plot", "Entry.Num"), class = "data.frame", row.names = c(NA,
-48L))
To something like this:
df2 <- structure(list(Block = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), Range = c(1L, 2L, 3L, 4L, 4L, 3L,
2L, 1L, 1L, 2L, 3L, 4L, 4L, 3L, 2L, 1L, 1L, 2L, 3L, 4L, 4L, 3L,
2L, 1L, 1L, 2L, 3L, 4L, 4L, 3L, 2L, 1L, 1L, 2L, 3L, 4L, 4L, 3L,
2L, 1L, 1L, 2L, 3L, 4L, 4L, 3L, 2L, 1L), Row = c(1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L,
9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L
), Plot = c(101L, 201L, 301L, 401L, 402L, 302L, 202L, 102L, 103L,
203L, 303L, 403L, 404L, 304L, 204L, 104L, 105L, 205L, 305L, 405L,
406L, 306L, 206L, 106L, 107L, 207L, 307L, 407L, 408L, 308L, 208L,
108L, 109L, 209L, 309L, 409L, 410L, 310L, 210L, 110L, 111L, 211L,
311L, 411L, 412L, 312L, 212L, 112L), Entry.Num = c(14L, 26L,
18L, 4L, 47L, 41L, 17L, 52L, 40L, 30L, 21L, 12L, 36L, 8L, 2L,
9L, 25L, 43L, 15L, 6L, 37L, 54L, 48L, 33L, 9L, 18L, 8L, 41L,
47L, 7L, 28L, 48L, 54L, 38L, 46L, 23L, 27L, 3L, 1L, 19L, 36L,
14L, 12L, 33L, 2L, 31L, 24L, 16L)), .Names = c("Block", "Range",
"Row", "Plot", "Entry.Num"), class = "data.frame", row.names = c(NA,
-48L))
In case you're interested, this is the trial design function. There is undoubtedly a more elegant way to do this but I am not particularly good at R:
Trial.Design <- function(Total.Entries, Rows.per.Block, Ranges.per.Block, Trial.Name){
library(agricolae)
library(reshape2)
#########################################################################################
# Generate a trial design #
#########################################################################################
total.trt <- Total.Entries
if(total.trt%%2) # If the variety number is uneven it will return the following error message
stop("WARNING: Variety number is uneven! Subsequent script will not work correctly!")
blocks <- 4 # This is fixed, we are unlikely to use a different block number in any trial.
trt<-c(1:total.trt) # You could in theory have the variety names here.
# This function from agricolae generates a statistically sound trial design.
outdesign <-design.rcbd(trt, blocks, serie=0,continue=TRUE,986,"Wichmann-Hill") # seed for ranomization = 986
# This uses an agricolae function to print the "field book" of the trial.
book <-outdesign$book # field book
#########################################################################################
# Generate blocking in two directions #
#########################################################################################
# The following generates an appropriately blocked map. The idea is block in two directions.
# We use this design so that the blocking structure captures field trends both down and across the field.
Block.Rows <- Rows.per.Block
Block.Ranges <- Ranges.per.Block
ifelse(total.trt==Block.Rows*Block.Ranges, "Entry number is okay",
stop("WARNING: Block is uneven and/or does not equal entry number! Subsequent script will not work correctly!"))
Block <- matrix(rep(1, times=total.trt))
Range <- matrix(rep(1:Block.Rows, times=Block.Ranges))
Row <- matrix(rep(1:Block.Ranges, each=Block.Rows))
Block.1 <- cbind(Block, Range)
Block.1 <- cbind(Block.1, Row)
Block <- matrix(rep(3, times=total.trt))
Range <- matrix(rep((Block.Rows+1):(Block.Rows*2), times=Block.Ranges))
Row <- matrix(rep(1:Block.Ranges, each=Block.Rows))
Block.3 <- cbind(Block, Range)
Block.3 <- cbind(Block.3, Row)
Block <- matrix(rep(2, times=total.trt))
Range <- matrix(rep(1:Block.Rows, times=Block.Ranges))
Row <- matrix(rep((Block.Ranges+1):(Block.Ranges*2), each=Block.Rows))
Block.2 <- cbind(Block, Range)
Block.2 <- cbind(Block.2, Row)
Block <- matrix(rep(4, times=total.trt))
Range <- matrix(rep((Block.Rows+1):(Block.Rows*2), times=Block.Ranges))
Row <- matrix(rep((Block.Ranges+1):(Block.Ranges*2), each=Block.Rows))
Block.4 <- cbind(Block, Range)
Block.4 <- cbind(Block.4, Row)
# The following adds the coordinates generated above to our field book.
Field.book <- rbind(Block.1, Block.2)
Field.book <- rbind(Field.book, Block.3)
Field.book <- rbind(Field.book, Block.4)
Plots <- as.matrix(rep(1:(total.trt*4)))
Field.book <- cbind(Plots, Field.book)
# Generate temporary Range names.
colnames(Field.book) <- c("plots", "block", "range", "row")
Field.book <- as.data.frame(Field.book)
Field.book$range <- as.numeric(Field.book$range)
Field.book$row <- as.numeric(Field.book$row)
# This joins the experimental design generated by agricolae to the plot layout generated above.
Field.book <- join(Field.book, book, by= c("plots","block"))
# Generate better Range names.
colnames(Field.book) <- c("Plot.Num", "Block", "Range", "Row", "Entry.Num")
# Create Plot coordinates.
Field.book$Plot <- (Field.book$Range * 100) + Field.book$Row
# Reorders the Ranges to something more intuitive.
# I drop the 'plot number' Range generated by agricolae because I don't think it is useful or necessary in our case.
Field.book <- Field.book[c("Block", "Range", "Row", "Plot", "Entry.Num")]
# Sort the plots by Range and Row.
Field.book <- Field.book[order(Field.book$Range, Field.book$Row),]
Field.book <<- Field.book
# Convert the Ranges to factors to allow for conversion to a 'wide' format.
Field.book$Block <- as.factor(Field.book$Block)
Field.book$Range <- as.factor(Field.book$Range)
Field.book$Row <- as.factor(Field.book$Row)
Field.book$Plot <- as.factor(Field.book$Plot)
#########################################################################################
# Generate plot maps #
#########################################################################################
# This function rotates the design if it's deemed necessary.
# rotate <- function(x) t(apply(x, 2, rev))
Field.design.num <- dcast(Field.book, Row ~ Range, value.var = "Entry.Num")
Field.design.num$Row <- as.numeric(Field.design.num$Row)
Field.design.num <- Field.design.num[order(-Field.design.num$Row),]
Field.book$Plot <- as.factor(Field.book$Plot)
colnames(Field.design.num)[2:ncol(Field.design.num)] <- paste("Row", colnames(Field.design.num[,c(2:ncol(Field.design.num))]), sep = "-")
Field.design.num$Row <- sub("^", "Range-", Field.design.num$Row)
#rotate(Field.design.num)
Field.design.num <<- Field.design.num
Field.design.plot <- dcast(Field.book, Row ~ Range, value.var = "Plot")
Field.design.plot$Row <- as.numeric(Field.design.plot$Row)
Field.design.plot <- Field.design.plot[order(-Field.design.plot$Row),]
Field.book$Plot <- as.factor(Field.book$Plot)
colnames(Field.design.plot)[2:ncol(Field.design.plot)] <- paste("Row", colnames(Field.design.plot[,c(2:ncol(Field.design.plot))]), sep = "-")
Field.design.plot$Row <- sub("^", "Range-", Field.design.plot$Row)
#rotate(Field.design.plot)
Field.design.plot <<- Field.design.plot
Field.design.Block <- dcast(Field.book, Row ~ Range, value.var = "Block")
Field.design.Block$Row <- as.numeric(Field.design.Block$Row)
Field.design.Block <- Field.design.Block[order(-Field.design.Block$Row),]
Field.book$Block <- as.factor(Field.book$Block)
colnames(Field.design.Block)[2:ncol(Field.design.Block)] <- paste("Row", colnames(Field.design.Block[,c(2:ncol(Field.design.Block))]), sep = "-")
Field.design.Block$Row <- sub("^", "Range-", Field.design.Block$Row)
#rotate(Field.design.Block)
Field.design.Block <<- Field.design.Block
#########################################################################################
# Write the files #
#########################################################################################
write.csv(Field.book, paste("Field Book",Trial.Name,".csv"), row.names=FALSE)
write.csv(Field.design.num, paste("Field map Entry",Trial.Name,".csv"), row.names=FALSE)
write.csv(Field.design.plot, paste("Field map Plots",Trial.Name,".csv"), row.names=FALSE)
write.csv(Field.design.Block, paste("Field map Blocks",Trial.Name,".csv"), row.names=FALSE)
#########################################################################################
}
# The parameters are:
# The total number of entires/varieties in a replicate (NOTE: The number of entries must be an even number).
# The number of rows in an individual block/replicate.
# The number of ranges in an individual block/replicate.
# (NOTE: The number of rows and ranges must multiply to give the number of entries.)
# The trial name is what will be written to your working directory.
Total.Entries = 54
Rows.per.Block = 9
Ranges.per.Block = 6
Trial.Name = "Example"
Trial.Design (Total.Entries, Rows.per.Block, Ranges.per.Block, Trial.Name)
The magic of order awaits you:
df1[order(df1$Row, c(-1,1)[df1$Row %% 2 + 1] * df1$Range ),]
Essentially what this does is order by Row, then by Range, multiplied by -1 if it is even. x %% 2 can be used to check for odd/even status.
all.equal(
df1[order(df1$Row, c(-1,1)[df1$Row %% 2 + 1] * df1$Range ),],
df2,
check.attributes=FALSE
)
#[1] TRUE

Resources