Neuralnet package in R: denormalize data - r

I'm trying to predict the price of the stock market, and using the algorithm from R's "neuralnet" package. How can I denormalize the data and find the actual price finally. Here is the algorithm:
library("quantmod")
library("neuralnet")
startDate <- as.Date('2009-01-01')
endDate <- as.Date('2014-01-01')
getSymbols("^AAPL",src="yahoo",from=startDate,to=endDate)
# DataSet
RSI3 <- RSI(Op(AAPL),n=3)
EMA5 <- EMA(Op(AAPL),n=5)
EMAcross <- Op(AAPL)-EMA5
MACD <- MACD(Op(AAPL),fast = 12, slow = 26, signal = 9)
MACDsignal <- MACD[,2]
BB <- BBands(Op(AAPL),n=20,sd=2)
BBp <- BB[,4]
Price <- Cl(AAPL)-Op(AAPL)
DataSet <- data.frame(RSI3,EMAcross,MACDsignal,BBp,Price)
DataSet <- DataSet[-c(1:33),]
colnames(DataSet) <- c("RSI3","EMAcross","MACDsignal","BollingerB","Price")
# Normalize function
Normalized <- function(x) {(x-min(x))/(max(x)-min(x))}
NormalizedData <- as.data.frame(lapply(DataSet,Normalized))
# Training and test sets
TrainingSet <- NormalizedData[1:816,]
TestSet <- NormalizedData[817:1225 ,]
# ANN
nn1 <- neuralnet(Price~RSI3+EMAcross+MACDsignal+BollingerB,data=TrainingSet, hidden=3, learningrate=.001,algorithm="backprop")
predictedPrice=compute(nn1,TestSet[,1:4])

Related

R Expanding Window RandomForest, Accuracy Not Dropping Off With Increases in Lag

I am trying make a binary prediction (predicting QQQ states) using 16 input variables. My data set is 2001-2022. Here is what my data set looks like (predicting X0, which is 5 days ahead)
First I use cross validation with an 80-20 train test split on data from 2001-2017 in order
to test the accuracy of a potential model.
However, since I want our model doing forward predictions, I train the model using the 2001-2017 data set and make a chronological prediction for the 2018-2022 data set.
Understandably, the accuracy drops off
In order to improve the accuracy, I run an expanding window prediction model, where I keep
retraining the model using all prior available observations in order to predict the next state in the data set. For each model I increment the training set by one date. The output is a 2018-2022 prediction of states where the state for each date was predicted using a different training set. This ideally should also help the model to train on new market conditions/phases. The accuracy improves.
However, when I change the lags, I begin to notice that the accuracy does not begin to drop off with increased lags…
The code has been checked extensively and it seems like the lags for each dataset are legitimate. This leads to the question…what is wrong with mu model? Might there be a model better suited for our purposes? It also makes me wonder, why is there such a variability in the Sharpe for each model, is the 15th lag having the highest Sharpe purely coincidental? One theory was that the training set is quite comprehensive, therefore the model is great at making prediction regardless of lag in the near term. However, when I took the lags to an extreme, the accuracy still did not drop off:
Should I try using a different model? Any advice or guidance would be greatly appreciated. Please see my code below (the loop commented out is the expanding window RandomForest application).
library(ggplot2)
library(BatchGetSymbols)
library(data.table)
library(plyr)
library(quantmod)
library(PerformanceAnalytics)
defaultW <- getOption("warn")
options(warn = -1)
library(dplyr)
library(caret)
library(ranger)
### Data Import ######
states_full <- read.csv(file = "rolling_qqq_hidden_states_full_five_back.csv")
states_full$formatted_date <- as.Date(states_full$formatted_date)
states_full <- states_full[!duplicated(states_full$formatted_date),]
tickers <- c("QQQ", "^VXN")
l.out <- BatchGetSymbols(tickers = tickers,
first.date = states_full$formatted_date[1],
last.date = states_full$formatted_date[nrow(states_full)]+1, do.cache=FALSE, be.quiet = TRUE)
price_data <- data.frame(l.out$df.tickers$price.adjusted,l.out$df.tickers$ret.adjusted.prices, l.out$df.tickers$ref.date, l.out$df.tickers$ticker)
colnames(price_data) <- c("Value", "Daily Return", "Date", "Label")
QQQ_full <- price_data[which(price_data$Label == "QQQ"),]
# Make sure dates match
mylist <- c()
for (i in i:nrow(QQQ_full)){
if (sum(QQQ_full$Date[i] == states_full$formatted_date) != 1){
mylist <- c(mylist, i)
}
}
if(length(mylist) > 0){
QQQ_full <- QQQ_full[-mylist,]
}
mylist <- c()
for (i in 1:nrow(QQQ_01_17)){
if (sum(states_full$formatted_date[i] == QQQ_full$Date) != 1){
mylist <- c(mylist, i)
}
}
if(length(mylist) > 0){
states_full <- states_full[-mylist,]
}
# split the data into 2001-2017, 2018-2022
states_01_17 <- states_full[1:which(states_full$formatted_date == "2017-12-29"),]
states_17_22 <- states_full[(nrow(states_01_17)+1):nrow(states_full),]
QQQ_01_17<- QQQ_full[1:which(QQQ_full$Date == "2017-12-29"),]
QQQ_17_22 <- QQQ_full[(which(QQQ_full$Date == "2017-12-29")+1):nrow(QQQ_full),]
# build QQQ portfolio
QQQ_portfolio <- as.data.frame(matrix(nrow = nrow(QQQ_17_22) , ncol = 3))
colnames(QQQ_portfolio) <- c("Value", "Date", "Label")
QQQ_portfolio$Value <- 100
QQQ_portfolio$Label <- "QQQ Portfolio"
QQQ_portfolio$Date <- QQQ_17_22$Date
for(m in 2:nrow(QQQ_portfolio)){
QQQ_portfolio$Value[m] <- QQQ_portfolio$Value[m-1] * (1+QQQ_17_22$`Daily Return`[m])
}
# build non-lagged states portfolio
states_portfolio <- as.data.frame(matrix(nrow = nrow(QQQ_17_22) , ncol = 3))
colnames(states_portfolio) <- c("Value", "Date", "Label")
states_portfolio$Value <- 100
states_portfolio$Label <- "0 Lag RandomForest Prediction of MSDR"
states_portfolio$Date <- QQQ_17_22$Date
for(i in 2:nrow(states_portfolio)){
if (states_17_22$X0[i-1] == 1){
states_portfolio$Value[i] <- states_portfolio$Value[i-1] * (1+QQQ_17_22$`Daily Return`[i])
} else {
states_portfolio$Value[i] <- states_portfolio$Value[i-1]
}
}
# Calculate non-lagged sharpe as benchmark
#states_portfolio_returns <- data.frame(Delt(states_portfolio$Value)[-1])
#states_portfolio_returns_xts <- xts(states_portfolio_returns,states_portfolio$Date[-1])
#as.numeric(SharpeRatio.annualized(states_portfolio_returns_xts))
# bind portfolios together for plotting
port_comp <- rbind(QQQ_portfolio,states_portfolio)
# data set that will hold performance metrics
loop_output <- as.data.frame(matrix(0, nrow = 22, ncol = 8))
colnames(loop_output) <- c("Lag", "Cross Validation Accuracy 01-17","Forward Accuracy 18-22","Sharpe", "Average 1YR Rolling Sharpe",
"Median 1YR Rolling Sharpe","Min 1YR Rolling Sharpe","Max 1YR Rolling Sharpe")
# read macro data (do it each time because)
macro_full <- data.frame(read.csv("macroindicators3.csv"))
for (j in 2:ncol(macro_full)){
macro_full[j] <- as.numeric(nafill(macro_full[,j], type = "locf"))
}
macro_full$Date <- as.Date(macro_full[,1], "%m/%d/%Y")
macro_full <- macro_full[,-1]
macro_full <- macro_full[-1,]
# Remove NA columns, can try with more columns values later...
macro_no_na_full <- macro_full[,colSums(is.na(macro_full))==0]
# make sure dates match
mylist <- c()
for (k in 1:nrow(states_full)){
if (sum(states_full$formatted_date[k] == macro_full$Date) != 1){
mylist <- c(mylist, k)
}
}
if(length(mylist) > 0){
states_full <- states_full[-mylist,]
}
mylist <- c()
for (l in 1:nrow(macro_full)){
if (sum(macro_full$Date[l] == states_full$formatted_date) != 1){
mylist <- c(mylist, l)
}
}
if(length(mylist) > 0){
macro_full <- macro_full[-mylist,]
}
# states are a factor
states_full$X0 <- as.factor(states_full$X0)
set.seed(42)
for (i in 1:50){
if (i <= 8){
lag = i*5 # increment lag by 5 until 40
} else if (i <= 14){
lag = 40 + (i-8)*10 # increment lag by 10 until 100
} else {
lag = 100+(i-14)*100 # increment lag by 100 until 900
}
print(lag)
#Save lag
loop_output$Lag[i] <- lag
#Create a lagged data frame
full <- cbind(macro_no_na_full[1:(nrow(macro_no_na_full)-lag),], states_full[(lag+1):nrow(states_full),])
full_01_17 <- full[1:(which(full$Date == "2017-12-29")-lag),]
full_17_22 <- full[-(1:nrow(full_01_17)),]
# save version with dates to verify lags later
full_w_dates <- full
full_01_17_w_dates <- full_01_17
full_17_22_w_dates <- full_17_22
# remove dates for ml
full <- full[,-c(17,18)]
full_01_17 <- full_01_17[,-c(17,18)]
full_17_22 <- full_17_22[,-c(17,18)]
# this is just for cross validation model
x_01_17 <- data.frame(full_01_17[,-ncol(full_01_17)])
y_01_17 <- full_01_17$X0
# run cross validation model
train=sample(nrow(full_01_17),nrow(full_01_17)*.8,replace=FALSE) #Train/Test
rf.reg = ranger(y = y_01_17[train], x= x_01_17[train,] ,mtry=round(sqrt(ncol(x_01_17))),num.trees=200,importance = "impurity")
y.rf.pred = predict(rf.reg, x_01_17[-train,])$predictions # Predict with bagging
# cross validation model accuracy
rf.acc = mean(y.rf.pred==y_01_17[-train]) # Directly compute the accuracy
#rf.acc
#table(y.rf.pred,y_01_17[-train])
loop_output$`Cross Validation Accuracy 01-17`[i] <- rf.acc
# Expanding window models - takes a while
# prediction <- as.data.frame(matrix(0,nrow = nrow(full_17_22), ncol= 2)) # data set to store predictions
# prediction$V1 <- as.factor(c(0,1))[1] # store predictions as a factor
# previous = 0 # progress bar
# for(a in nrow(full_01_17):(nrow(full)-1)){ #expanding window starts with 2001-2017, next iteration is 2001-2017+1day
# progress = (a-nrow(full_01_17))/(nrow(full)-1-nrow(full_01_17)) # progress bar
# progress = round_any(progress, 0.01) # progress bar
# if (progress != previous){ # progress bar
# print(progress) # progress bar
# }
# previous = progress # progress bar
# rf.reg = ranger(full$X0[1:a]~.,data=full[1:a,],mtry=round(sqrt(ncol(x_01_17))),num.tree=800,importance = 'impurity') # ranger model
# y.rf.pred = predict(rf.reg, full[a+1,])$prediction # make the prediction on the a+1 observation
# prediction$V1[a-nrow(full_01_17)+1] <- y.rf.pred #save the prediction
# prediction$V2<-as.Date(prediction$V2) # save the date so we can verify lags
# prediction$V2[a-nrow(full_01_17)+1] <- as.Date(full_w_dates$formatted_date[a+1])
# if (a == nrow(full)-1) message("Done!") # gives a status update
# }
#
# write.csv(prediction, paste(lag,"lagprediction.csv", sep = "")) # save the prediction so we don't have to rerun
####
### to read-in results from already completed backtets
prediction <- read.csv(paste(lag,"lagprediction.csv", sep = ""))[2]
###
full_17_22_w_pred <- full_17_22_w_dates
full_17_22_w_pred$prediction <- prediction$V1
# Evaluate the accuracy
rf.acc = mean(full_17_22_w_pred$prediction==full_17_22_w_pred$X0)
loop_output$`Forward Accuracy 18-22`[i] <- rf.acc
# build a portfolio out of the predicted states
portfolio <- as.data.frame(matrix(0,nrow = nrow(full_17_22), ncol= 3))
colnames(portfolio) <- c("Value", "Date", "Label")
portfolio$Date <- full_17_22_w_pred$formatted_date
portfolio$Value <- 100
portfolio$Label <- paste(lag,"Lag RandomForest Prediction of MSDR", sep = " ")
for(b in 2:nrow(portfolio)){
if (full_17_22_w_pred$prediction[b-1] == 1){
portfolio$Value[b] <- portfolio$Value[b-1] * (1+QQQ_17_22$`Daily Return`[b])
} else {
portfolio$Value[b] <- portfolio$Value[b-1]
}
}
# save it to dataset containing port
port_comp <- rbind(port_comp, portfolio)
# calculate Sharpe
portfolio_returns <- data.frame(Delt(portfolio$Value)[-1])
portfolio_returns_xts <- xts(portfolio_returns, portfolio$Date[-1])
loop_output$Sharpe[i] <- as.numeric(SharpeRatio.annualized(portfolio_returns_xts))
# rolling sharpe
mylist <- c()
for (z in 1:(nrow(portfolio_returns)-252)){
portfolio_xts_rolling <- portfolio_returns_xts[z:(z+252)]
mylist <- c(mylist, as.numeric(SharpeRatio.annualized(portfolio_xts_rolling)))
}
loop_output$`Average 1YR Rolling Sharpe`[i]<- mean
loop_output$`Median 1YR Rolling Sharpe`[i]<- median(mylist)
loop_output$`Min 1YR Rolling Sharpe`[i]<- min(mylist)
loop_output$`Max 1YR Rolling Sharpe`[i]<- max(mylist)
}
options(warn = defaultW)
# plot output
ggplot(port_comp, aes(x = port_comp$Date, y = port_comp$Value, color = port_comp$Label, group = port_comp$Label))+geom_line()
#loop_output_v1 <- rbind(loop_output_v1, loop_output)
loop_output_v1

Extract element from nested list of rolling_origin R

I want to extract, .metrics (RMSE) from a Rolling origin forecast resampling
(tibble: 52 x 5) by "id" columns which consist of slices.
The replicating codes are given below. Here is my attempt.
metric <- resamples_fitted$.resample_results
metric
all <- metric[[1]][[".metrics"]]
res <- unlist(all)
estimate <- res[ grepl(".estimate", names(res))]
I want to get the ".estimate" by "slices" in the data frame. For each slice, there will be one RMSE. These are full codes
library(tidymodels)
library(modeltime)
library(modeltime.resample)
library(tidyverse)
library(timetk)
library(resample)
resample_spec <- rolling_origin(
data = m750,
initial = 200,
assess = 3,
cumulative = TRUE,
skip = 1,
overlap = 0 )
resamples_fitted <- m750_models %>%
modeltime_fit_resamples(
resamples = resample_spec,
control = control_resamples(verbose = FALSE)
)
resamples_fitted
metric <- resamples_fitted$.resample_results
metric
all <- metric[[1]][[".metrics"]]
res <- unlist(all)
estimate <- res[ grepl(".estimate", names(res))]
After spending some time, I found a solution, which may not be very elegant though, it solves the problem.
metric <- resamples_fitted$.resample_results
metric
all <- metric[[1]][[ ".metrics"]]
res <- unlist(all)
estimate <- res[ grepl(".estimate", names(res))]
typeof(estimate)
dat <- as.data.frame(sapply(estimate, as.numeric))
data <- dat[complete.cases(dat), ]
all_id <- data.frame(metric[[1]]$id)
al <- cbind(all_id,data)

Applying piecewise linear model for multiple year

I have daily rainfall data which I have converted to yearwise cumulative value using following code
library(tidyverse); library(segmented); library(seas); library(SiZer)
## get mscdata from "seas" packages
data(mscdata)
dat <- (mksub(mscdata, id=1108447))
dat$julian.date <- as.numeric(format(dat$date, "%j"))
## generate cumulative sum of rain by year
df <- dat %>% group_by(year) %>% mutate(rain_cs = cumsum(rain)) %>% ungroup
Then I want to divide every year into 2 parts (before 210 days and after 210 days) then apply piecewise linear model from SiZer to identify yearwise breakpoints. I could able to do it for single year like
data <- subset(df, year == 1975)
sub1 <- filter(data, julian.date < 210)
sub2 <- filter(data, julian.date > 210)
sub1.mod <- piecewise.linear(x= sub1$julian.date, y = sub1$rain_cs,
middle = 1,
CI = T,
bootstrap.samples = 1000)
sub1.mod
sub2.mod <- piecewise.linear(x= sub2$julian.date, y = sub2$rain_cs,
CI = T,
bootstrap.samples = 1000)
sub2.mod
Now how to dynamically fit piecewise linear model for all the years?
You can try using a function and base R, creating a list and then saving the models. I include in last line a way to export all models outside the list:
library(tidyverse); library(segmented); library(seas); library(SiZer)
## get mscdata from "seas" packages
data(mscdata)
dat <- (mksub(mscdata, id=1108447))
dat$julian.date <- as.numeric(format(dat$date, "%j"))
## generate cumulative sum of rain by year
df <- dat %>% group_by(year) %>% mutate(rain_cs = cumsum(rain)) %>% ungroup
#Create list
Listyear <- split(df,df$year)
#Function for year process
model_function<-function(x)
{
data <- x
sub1 <- filter(data, julian.date < 210)
sub2 <- filter(data, julian.date > 210)
sub1.mod <- piecewise.linear(x= sub1$julian.date, y = sub1$rain_cs,
middle = 1,
CI = T,
bootstrap.samples = 1000)
sub1.mod
sub2.mod <- piecewise.linear(x= sub2$julian.date, y = sub2$rain_cs,
CI = T,
bootstrap.samples = 1000)
sub2.mod
#Group elements
list.model <- list(v1=sub1.mod,v2=sub2.mod)
names(list.model)<-paste0(c("sub.mod1.","sub.mod2."),unique(x$year))
return(list.model)
}
#Iterate over all models
z1 <- lapply(Listyear,model_function)
#Export elements to envir
lapply(z1,list2env,.GlobalEnv)
You will end up with z1:
$`1975`
$`1975`$sub.mod1.1975
[1] "Threshold alpha: 85.0000277968913"
[1] ""
[1] "Model coefficients: Beta[0], Beta[1], Beta[2]"
(Intercept) x w
26.730070 3.376754 -2.406744
Change.Point Initial.Slope Slope.Change Second.Slope
2.5% 82.87297 3.259395 -2.515015 0.9283611
97.5% 87.90540 3.478656 -2.273062 1.0153773
$`1975`$sub.mod2.1975
[1] "Threshold alpha: 274.000071675723"
[1] ""
[1] "Model coefficients: Beta[0], Beta[1], Beta[2]"
(Intercept) x w
-37.968273 2.150220 5.115431
Change.Point Initial.Slope Slope.Change Second.Slope
2.5% 272.0000 1.969573 4.750341 7.057207
97.5% 276.0001 2.371539 5.468130 7.504963
And by running last line you will get the models in the global environment:
I hope this can help.
Code for exporting to csv.
I include an additional function that takes some results from the models and creates dataframes so that it can be easily exported to .csv after doing some adjusts to lists. The function is next:
model_export<-function(x)
{
data <- x
sub1 <- filter(data, julian.date < 210)
sub2 <- filter(data, julian.date > 210)
sub1.mod <- piecewise.linear(x= sub1$julian.date, y = sub1$rain_cs,
middle = 1,
CI = T,
bootstrap.samples = 1000)
sub1.mod
sub2.mod <- piecewise.linear(x= sub2$julian.date, y = sub2$rain_cs,
CI = T,
bootstrap.samples = 1000)
sub2.mod
#Group elements for models
#Model 1
modelname <- rep('sub1.mod',2)
year <- rep(unique(x$year),2)
changepoint <- rep(sub1.mod$change.point,2)
coefs <- as.data.frame(t(sub1.mod$model$coefficients))
intervals <- as.data.frame(sub1.mod$intervals)
intervals <- cbind(data.frame(confidence=rownames(intervals)),intervals)
rownames(intervals)<-NULL
#Build DF
DF1 <- data.frame(modelname,year,changepoint,coefs,intervals)
#Model 2
modelname <- rep('sub2.mod',2)
changepoint <- rep(sub2.mod$change.point,2)
coefs <- as.data.frame(t(sub2.mod$model$coefficients))
intervals <- as.data.frame(sub2.mod$intervals)
intervals <- cbind(data.frame(confidence=rownames(intervals)),intervals)
rownames(intervals)<-NULL
#Build DF
DF2 <- data.frame(modelname,year,changepoint,coefs,intervals)
#Bind DFs
DFG <- rbind(DF1,DF2)
return(DFG)
}
Then you can apply:
#Apply new function to list
z2 <- lapply(Listyear,model_export)
#DF to export
MyDF <- do.call(rbind,z2)
#Export
write.csv(MyDF,file='Myfile.csv')
I have used it for two years having the results saved in MyDF and then exported to .csv file. Just as consideration if rbind would not work for any reason you could try rbind.fill() from plyr package.

Optimizing using R

The data is:
name <- c("Gen1","Gen2","Gen3")
QuantityE <- c(200,100,50)
PriceE <- c(10,12,50)
QuantityAS <- c(100,50,10)
PriceAS <- c(1,5,7)
mydata <- data.frame(name, QuantityE, PriceE , QuantityAS,PriceAS )
I have the following objective function:
Minimize total cost when multiplying combinations of
((PriceE*QuantityE) + (PriceAS* QuantityAS))
Subject to constraints:
Total QuantityE = 300
Total QuantityAS = 0.06* QuantityE
What is the best approach to use, or what I can read up to solve the problem?
For completeness, after some reading, found the right way to code the LP.
There is neater ways of doing it, but this works for me.
name <- c("Gen1","Gen2","Gen3")
QuantityE <- c(200,100,50)
PriceE <- c(10,12,50)
QuantityAS <- c(100,50,10)
PriceAS <- c(1,5,7)
mydata <- data.frame(name, QuantityE, PriceE , QuantityAS,PriceAS )
#System Data
EnergyDemand <- 300 #Total QuantityE
CRRequired <- 0.06*EnergyDemand #Total Quantity AS
library(lpSolve)
#Set up Objective function, prices will be the co-ef's
obj.fun <- as.vector(stack(mydata[,c(3,5)])[1])
##Set up the constraints matrix
#This will set up individual quantityE and quantityAS coef's
D <- diag(1, NROW(obj.fun),NROW(obj.fun))
#This sets up coefficients with the ability to combine QuantityAS and QuantityE
E <- diag(1, NROW(name),NROW(name))
FA <- cbind(E,E)
#This sets up the cofficients for all quantityE
G <- matrix(c(rep(1,NROW(name)),rep(0,NROW(name))),1)
#This sets up the cofficients for all quantityAS
H <- matrix(c(rep(0,NROW(name)),rep(1,NROW(name))),1)
#This combines the above constraints into one matrix
constr <- rbind(D,FA,G,H)
#Set up directional constraints. All except the last 2 are <=
#This allows flexibility in choosing volumes
# The last two have to be equal to for Energy and AS demand
constr.dir <- c(rep("<=",NROW(constr)-2), rep("=",2))
#This sets up the rhs numbers for the matrix above
rhs <- c(QuantityE, QuantityAS, pmax(QuantityE, QuantityAS), EnergyDemand,CRRequired)
#This is the algorithm parameters
prod.sol <- lp("min", obj.fun, constr, constr.dir, rhs, compute.sens = TRUE)
a <- matrix(prod.sol$solution, nrow= length(name)) #decision variables values
rownames(a) <- name
colnames(a) <- c("Energy MW", "AS MW")
#This is the Summary of results
print(mydata) #This gives the initial dataset
a # This gives the combination of quantity used from Gen's
prod.sol #This gives the optimal minimized cost

How Forecast a time series with CART models

I'm using the rpart library from R to try forecasting the electricity consumption from Australia (example from the book Introductory Time Series with R):
library(rpart)
www <- "http://staff.elena.aut.ac.nz/Paul-Cowpertwait/ts/cbe.dat"
CBE <- read.table(www, header = T)
Elec.ts <- ts(CBE[, 3], start = 1958, freq = 12)
plot(cbind(Elec.ts))
fit <- rpart(elec~elec, method="anova", data=CBE)
pre <- predict(fit)
Elec.predict <- ts(pre[], start = 1958, freq = 12)
plot(cbind(Elec.ts,Elec.predict ))
It's really simple, the R program does not run, if I try to create a model using the elec data it self.
Am I using it wrong?
How Can I use this library properly ?
Solving the problem with this script.
I have created a github site with all informations about the script and the time series data. http://alvarojoao.github.io/timeseriesExamples
library(caret)
library(ggplot2)
library(pls)
library(data.table)
library(rpart)
library(bst)
library(plyr)
nLag <- 12
khorizon <- 1
www <- "./databases/elec.dat"
CBE <- read.table(www, header = T)
base <- CBE
variable <- 'elec'
base$elec = (base$elec-min(base$elec))/(max(base$elec)-min(base$elec))
base <- setDT(base)[, paste0(variable, 1:nLag) := shift(elec, 1:nLag)][]
base <- base[(nLag+1):nrow(base),]
Elec.ts <- ts(CBE[, 1], start = 1958, freq = 12)
acf(CBE$elec)
plot(cbind(Elec.ts))
timeSlices <- createTimeSlices(1:nrow(base),
initialWindow =nrow(base)*2/3, horizon = khorizon , fixedWindow = FALSE)
str(timeSlices,max.level = 1)
trainSlices <- timeSlices[[1]]
testSlices <- timeSlices[[2]]
predTest <- c(1,2)
predTest <- predTest[0]
trueTest <- c(1,2)
trueTest <- trueTest[0]
for(i in 1:length(trainSlices)){
plsFitTime <- train(elec ~ .,
data = base[trainSlices[[i]],],
method = "treebag"
)
pred <- predict(plsFitTime,base[testSlices[[i]],])
true <- base$elec[testSlices[[i]]]
}

Resources