Building a LSTM for Stock Prediction - r

I am building my first LSTM model using keras in R. I believe my problem is with my input_shape and I would appreciate your help.
I am using closing stock returns at time t to predict returns at t+1
so i believe my input shape should equal 1.
Here is my code:
#LSTM Model
model <- keras::keras_model_sequential() %>%
layer_lstm(units = 50, return_sequences = TRUE, dropout = 0.2, weights = 0.01, input_shape = 1
)%>%
layer_lstm(units = 100, return_sequences = FALSE, dropout = 0.2, weights = 0.01
)%>%
layer_dense(units = 1, activation = 'linear')
model %>% compile(
optimizer = optimizer_rmsprop(),
loss = "mse"
)
However when I run this I get the following error message:
error in py_call_impl(callable, dots$args, dots$keywords) :
ValueError: Input 0 is incompatible with layer lstm_12: expected ndim=3, found ndim=2
Any ideas on what I am doing wrong and suggestions on what I should do?
Thank you for your time.
I believe the information I provided should be enough to answer my question. If not, here is my full code to get the data and results
#Downloading Stock Price and normalize to returns
BRI_Price <-quantmod::getSymbols("BBRI.JK", src = "yahoo", from = as.Date("2015-01-01"), to = as.Date("2017-02-17"), by = "day", auto.assign = FALSE)
Returns_Closing <-quantmod::dailyReturn(BRI_Price$BBRI.JK.Close)
#Training and Testing Data
train_size <- round(.9*nrow(Returns_Closing))
test_size <- nrow(Returns_Closing) - train_size
train <- Returns_Closing[0:train_size,]
test <- Returns_Closing[train_size:nrow(Returns_Closing),]
#Reshape into X=t and Y=t+1
x_train <- Returns_Closing[1:train_size,]
y_train <- Returns_Closing[2:train_size+1]
x_test <- Returns_Closing[train_size:nrow(Returns_Closing),]
y_test <- Returns_Closing[468:nrow(Returns_Closing),]
#LSTM Model
model <- keras::keras_model_sequential() %>%
layer_lstm(units = 50, return_sequences = TRUE, dropout = 0.2, weights = 0.01, input_shape = 1 )%>%
layer_lstm(units = 100, return_sequences = FALSE, dropout = 0.2, weights = 0.01 )%>%
layer_dense(units = 1, activation = 'linear')
model %>%
compile( optimizer = optimizer_rmsprop(), loss = "mse" )
#train the model
Train_Model <- model %>% fit( x = x_train, batch_size = 100, epochs = 1, validation_data = list(x_test, y_test) )

Related

Parameter adjustment using kerastuneR packet deep neural network

What is the cause of the following code error?
library(magrittr)
x_data <- matrix(data = runif(500,0,1),nrow = 50,ncol = 5)
y_data <- ifelse(runif(50,0,1) > 0.6, 1L,0L) %>% as.matrix()
x_data2 <- matrix(data = runif(500,0,1),nrow = 50,ncol = 5)
y_data2 <- ifelse(runif(50,0,1) > 0.6, 1L,0L) %>% as.matrix()
library(keras)
library(tensorflow)
library(kerastuneR)
build_model = function(hp) {
model = keras_model_sequential()
model %>% layer_dense(units = hp$Int('units',
min_value = 32,
max_value = 512,
step= 32),input_shape = ncol(x_data),
activation = 'relu') %>%
layer_dense(units = 1, activation = 'softmax') %>%
compile(
optimizer = tf$keras$optimizers$Adam(
hp$Choice('learning_rate',
values=c(1e-2, 1e-3, 1e-4))),
loss = 'binary_crossentropy',
metrics = 'accuracy')
return(model)
}
tuner = RandomSearch(
build_model,
objective = 'val_accuracy',
max_trials = 5,
executions_per_trial = 3,
directory = 'my_dir',
project_name = 'helloworld')
tuner %>% search_summary()
tuner %>% fit_tuner(x_data,y_data,
epochs = 5,
validation_data = list(x_data2,y_data2))
result = kerastuneR::plot_tuner(tuner)
best_5_models = tuner %>% get_best_models(5)
best_5_models[[1]] %>% plot_keras_model()
Error in py_call_impl(callable, dots$args, dots$keywords) :
ValueError: Objective value missing in metrics reported to the Oracle, expected: ['val_accuracy'], found: dict_keys(['loss', 'acc', 'val_loss', 'val_acc'])

How to implement Bayesian optimization with Keras tuneR

I am hoping to run Bayesian optimization for my neural network via keras tuner.
I have the following code so far:
build_model <- function(hp) {
model <- keras_model_sequential()
model %>% layer_dense(units = hp$Int('units', min_value = 10, max_value = 50, step = 10),
activation = "relu",
input_shape = dim(X_pca_scores_scaled)[[2]]) %>%
layer_dropout(rate = hp$Float('rate', min_value = 0, max_value = 0.5, step = 0.1)) %>%
layer_dense(units = hp$Int('units', min_value = 0, max_value = 50, step = 10),
activation = "relu") %>%
layer_dropout(rate = hp$Float('rate', min_value = 0, max_value = 0.5, step = 0.1)) %>%
layer_dense(units = 1) %>%
compile(
optimizer = "adam",
loss = "mse",
metrics = c("mae"))
return(model)
}
tuner <- kerastuneR::BayesianOptimization(
objective = 'mae',
max_trials = 30)
stop_early <- callback_early_stopping(monitor = "mae",
patience = 5,
min_delta = 0.25,
mode = "min")
tuner %>% fit_tuner(np_array(X_pca_scores_scaled),
np_array(train_targets),
epochs = 30,
callbacks = c(stop_early))
The above code will lead to the following error:
Error in py_get_attr_impl(x, name, silent) :
AttributeError: 'BayesianOptimizationOracle' object has no attribute 'search'
I'm not sure what an oracle is...so I know the problem is somewhere in my implementation regarding that.

'keras_model_sequential()' runs forever

I am working on a project, where I build an LSTM model for GDP growth forecasting. When I try to build the model using 'keras_model_sequential()' it gets stuck there and runs forever! I am confused, I installed both the Keras and TensorFlow packages but still, it runs forever. R keeps running in the first line of this code sample.
lstm_model <- keras_model_sequential()
lstm_model %>%
# 1st LSTM layer
layer_lstm(units = 20, # size of the layer
batch_input_shape = c(1, 5, 1), # batch size, timesteps, features
return_sequences = TRUE, # reserve the sequence
stateful = TRUE) %>%
# Dropout layer
layer_dropout(rate = 0.3) %>%
# 2nd LSTM layer
layer_lstm(units = 20,
return_sequences = TRUE,
stateful = TRUE) %>%
layer_dropout(rate = 0.3) %>%
# Final dense/output layer
time_distributed(keras::layer_dense(units = 1))
# Use Adam optimizer, Mean absolute error as loss function, and want to see accuracy
lstm_model %>%
compile(loss = 'mae', optimizer = 'adam', metrics = 'accuracy')
#Summary of the model
summary(lstm_model)
# fit the model
lstm_model %>% fit(
x = x_train_arr,
y = y_train_arr,
batch_size = 1,
epochs = 20,
verbose = 0,
shuffle = FALSE
)

Create generator that shuffle training data for Keras in R/train a Keras model with lowspec computer

I have a dataset similar to MNIST (200.000 rows of 784 pixels + 1 categorical output (785 columns)) and I want to train a MLP and a CNN using the Keras library in R (in RStudio). I'm currently using a computer with 32 GB of RAM with an intel i7-8700 #3.2 GHz and I have not any problem when training this neural networks using the fit() function from Keras (training time ~4 minutes). However, when I execute the same script in my laptop (8 GB of RAM with intel i5-6300 #2.3 GHz) it can not even do an epoch in least than 10 minutes.
I work as a laboratory professor in university and I'm worried that my students could not run the script with that database in their laptops due to lack of computation power. My idea was to fit the same models using a generator with the fit_generator() function, and load part of the dataset instead of the whole dataset in each call to the generator function (in order to use less memory than loading the entire dataset and resulting in a faster training). However, this produce some unexpected results. The accuracy reached by the fit() function is ~98.8% in training (120.000 rows) and ~98.4% in test (80.000 rows), but using the fit_generator() function is ~1.05% in the same training and ~1.01% in the same test. I have found related issues here, here, here and here, and it seems that the problem is that fit_generator() does not shuffle the training data and this produce that the network is always trained with the same batches (e.g. same gradient when computing backpropagation) and does not represent well the entire dataset, resulting in that poor accuracy. I've trained the model using fit() but setting the shuffle argument to FALSE, and the accuracy falls to 0.1% so it confirms that shuffling the training data is crucial to train the model.
My questions are:
Is it a good idea to use a generator to avoid problems/reduce training time when using a computer with lower specifications or is there a better solution?
I'm training the models using the entire dataset by setting steps_per_epoch argument equal to ceil(nrow(train_dataset)/batch_size), so it should use the same data when using fit() and fit_generator() except from the "shuffling" part right?
In case that using a generator that loads part of the dataset is a good solution to train the models with low-spec computers, how can i shuffle the training data in an effective way using a generator?
All the generators i've seen takes the entire dataset and produce a batch of the samples in each call or does not shuffle the data. I have created a generator with the code below. It takes as arguments datafile the file with the data (training data or test data), batch_size the size of the batch to produce in each call, mlp in order to process the data to train an MLP or a CNN, val in order to begin to produce batches in different index for validation data, and shuffle to indicate if we want to shuffle the data or not. My idea to shuffle the data was to create a random index and read only one row of the file for each number in the index (using the skip and nrow arguments in read.table()). This is extremely unefficient due to the several calls to read.table():
data_generator <- function(datafile, batch_size = 128, mlp = TRUE, val = TRUE, shuffle = TRUE) {
nrow_file <- R.utils::countLines(datafile) - 1
if (!val) {
skip <- 0
} else {
skip <- nrow_file / 2
}
function() {
# Calculate the rows to read in this epoch
rows_to_read <- batch_size
if (skip + batch_size > nrow_file) {
rows_to_read <- nrow_file - skip
}
if (shuffle) {
index <- sample (c(1:nrow_file), size=batch_size, replace =F)
} else {
index <- (skip + 1):(skip + rows_to_read)
}
# Load only the rows that we want to use in training
trData <- as.list(numeric(batch_size))
for(i in index) {
ii <- i - 1
trData[[which(i == index)]] <- read.table(datafile, sep = ";", header = TRUE,
skip = ii, nrows = 1)
}
trData <- do.call("rbind",trData)
# Upload the rows to train
skip <<- skip + batch_size
if (skip >= nrow_file) {
skip <<- 0
}
# Build inputs and output
y_train <- trData[,1]
x_train <- trData[,-1]
if (mlp) {
# Return data as is for mlp
list(data.matrix(x_train), data.matrix(y_train))
} else {
# Return data reshaped for CNN
list(array_reshape(data.matrix(x_train), c(nrow(x_train), 28, 28, 1)),
data.matrix(y_train))
}
}
}
The code I've used to train the MLP model (analogous to CNN) is:
Without generator
MLP_model <- keras_model_sequential()
MLP_model %>%
layer_dense(units = 500, activation = 'relu', input_shape = c(784),
kernel_regularizer = regularizer_l2(l = 0.0001),
bias_regularizer = regularizer_l2(l = 0.0001)) %>%
layer_dropout(rate = 0.4, seed = 150) %>%
layer_batch_normalization() %>%
layer_dense(units = 300, activation = 'relu',
kernel_regularizer = regularizer_l2(l = 0.001),
bias_regularizer = regularizer_l2(l = 0.001)) %>%
layer_dropout(rate = 0.3, seed = 150) %>%
layer_batch_normalization() %>%
layer_dense(units = 10, activation = 'softmax',
kernel_regularizer = regularizer_l2(l = 0.001),
bias_regularizer = regularizer_l2(l = 0.001))
MLP_model %>% compile(
loss = loss_categorical_crossentropy,
optimizer = optimizer_adam(),
metrics = c('accuracy')
)
history <- MLP_model %>% fit(
x_train_mlp, y_train,
epochs = 20, batch_size = 124,
validation_split = 0.2,
shuffle = TRUE
)
With generator:
MLP_model <- keras_model_sequential()
MLP_model %>%
layer_dense(units = 500, activation = 'relu', input_shape = c(784),
kernel_regularizer = regularizer_l2(l = 0.0001),
bias_regularizer = regularizer_l2(l = 0.0001)) %>%
layer_dropout(rate = 0.4, seed = 150) %>%
layer_batch_normalization() %>%
layer_dense(units = 300, activation = 'relu',
kernel_regularizer = regularizer_l2(l = 0.001),
bias_regularizer = regularizer_l2(l = 0.001)) %>%
layer_dropout(rate = 0.3, seed = 150) %>%
layer_batch_normalization() %>%
layer_dense(units = 10, activation = 'softmax',
kernel_regularizer = regularizer_l2(l = 0.001),
bias_regularizer = regularizer_l2(l = 0.001))
MLP_model %>% compile(
loss = loss_categorical_crossentropy,
optimizer = optimizer_adam(),
metrics = c('accuracy')
)
history <- MLP_model %>% fit_generator(
data_generator(traindatafile,
batch_size = 128, mlp = TRUE, val = FALSE),
steps_per_epoch = round((R.utils::countLines(traindatafile)-1) / (128)),
epochs = 10)
Thanks in advance!
answering my own question and thanks to #user12728748 for the comments, I change the generator to read random samples from the file:
data_generator <- function(datafile, batch_size = 128,
mlp, val,
shuffle = TRUE, validation_split = 0) {
nrow_file <- py_eval(paste("sum(1 for line in open('", datafile, "'))", sep = '')) - 1
skip <- 0
if (val) {
nrow_file <- validation_split * nrow_file
} else {
nrow_file <- (1 - validation_split) * nrow_file
}
if (nrow_file > 0) {
function() {
# Calculate the rows to read in this epoch
rows_to_read <- batch_size
if (skip + batch_size > nrow_file) {
rows_to_read <- nrow_file - skip
}
if (shuffle) {
index <- sample (c(1:nrow_file), size=batch_size, replace =F)
} else {
index <- (skip + 1):(skip + rows_to_read)
}
# Create rows to skip
if (val) {
# in validation, skip training rows and validation rows that are not found in index
rows_to_skip <- c(1:ifelse(validation_split > 0,((1 - validation_split) * nrow_file / validation_split),1),
setdiff(1:nrow_file, index) + (1 - validation_split) * nrow_file / validation_split)
} else {
# in training, skip validation rows and training rows that are not found in index
rows_to_skip <- c(ifelse(validation_split > 0,
nrow_file + 1, 0):ifelse(validation_split > 0,
nrow_file/(1 - validation_split), 0),
setdiff(1:nrow_file, index))
if (rows_to_skip[1] == 0) rows_to_skip <- rows_to_skip[-1]
}
trData <- import("pandas")$read_csv(datafile,
skiprows = rows_to_skip,
sep = ";")
# Upload the rows to train
skip <<- skip + batch_size
if (skip >= nrow_file) {
skip <<- 0
}
# Build inputs and output
y_train <- to_categorical(trData[,1], num_classes = 10)
x_train <- trData[,-1]
if (mlp) {
# Return data as is for mlp
list(data.matrix(x_train), data.matrix(y_train))
} else {
# Return data reshaped for CNN
list(array_reshape(data.matrix(x_train), c(nrow(x_train), 28, 28, 1)),
data.matrix(y_train))
}
}
} else {
NULL
}
}
I have added the validation_split argument to set the percentage of the training data that we want to use as validation.
However, using the generator to train the model does not seem to improve training time when using the lowspec computer, but it uses almost 4 GB less of RAM (especially when training with small batches (~128 samples)) and you can use the computer to perform other tasks while executing the code without crashing the program.
Here I leave you the code to train and evaluate a MLP model using the generator:
MLP_model <- keras_model_sequential()
MLP_model %>%
layer_dense(units = 500, activation = 'relu', input_shape = c(784),
kernel_regularizer = regularizer_l2(l = 0.0001),
bias_regularizer = regularizer_l2(l = 0.0001)) %>%
layer_dropout(rate = 0.4, seed = 150) %>%
layer_batch_normalization() %>%
layer_dense(units = 300, activation = 'relu',
kernel_regularizer = regularizer_l2(l = 0.001),
bias_regularizer = regularizer_l2(l = 0.001)) %>%
layer_dropout(rate = 0.3, seed = 150) %>%
layer_batch_normalization() %>%
layer_dense(units = 10, activation = 'softmax',
kernel_regularizer = regularizer_l2(l = 0.001),
bias_regularizer = regularizer_l2(l = 0.001))
MLP_model %>% compile(
loss = loss_categorical_crossentropy,
optimizer = optimizer_adam(),
metrics = c('accuracy')
)
validation_split <- 0.2
history <- MLP_model %>% fit_generator(
data_generator(traindatafile,
batch_size_train, mlp = TRUE,
val = FALSE, validation_split = validation_split),
steps_per_epoch = round((1 - validation_split) * (py_eval(paste("sum(1 for line in open('", traindatafile, "'))", sep = '')) - 1) / (batch_size_train)),
validation_data = data_generator(traindatafile,
batch_size_train, mlp = TRUE,
val = TRUE, validation_split = validation_split),
validation_steps = round((validation_split) * (py_eval(paste("sum(1 for line in open('", traindatafile, "'))", sep = '')) - 1) / (batch_size_train)),
callbacks = c(early_stopping),
epochs = 10)
MLP_metrics_train <- MLP_model %>%
evaluate_generator(data_generator(traindatafile,
batch_size_eval,
mlp = TRUE,
val = FALSE,
shuffle = FALSE),
steps = ceiling((py_eval(paste("sum(1 for line in open('", traindatafile, "'))", sep = '')) - 1) / (batch_size_eval)))
MLP_metrics_test <- MLP_model %>%
evaluate_generator(data_generator(testdatafile,
batch_size_eval,
mlp = TRUE,
val = FALSE,
shuffle = FALSE),
steps = ceiling((py_eval(paste("sum(1 for line in open('", testdatafile, "'))", sep = '')) - 1) / (batch_size_eval)))
y_pred_mlp <- MLP_model %>%
predict_generator(data_generator(testdatafile,
batch_size_eval,
mlp = TRUE,
val = FALSE,
shuffle = FALSE),
steps = ceiling((py_eval(paste("sum(1 for line in open('", testdatafile, "'))", sep = '')) - 1) / (batch_size_eval)))
Hope this helps someone!

How to make sure inputs can be diveded by batch size in stateful LSTM?

I'm having some issues training a network using stateful LSTMs.
Given the code below, I'm getting the following error message:
Error in py_call_impl(callable, dots$args, dots$keywords) :
ValueError: In a stateful network, you should only pass inputs with a number of samples that can be divided by the batch size. Found: 9384 samples
Input is sent from an external application, so I cannot control the exact number of inputs sent. What would be the best way to ensure that the input can always be divided nu the batch size?
neural.train = function(model,XY)
{
XY <- as.matrix(XY)
X <- XY[,-ncol(XY)]
Y <- XY[,ncol(XY)]
Y <<- ifelse(Y > 0,1,0)
dropout <- 0.3
batchSize <- 64
newModel <- keras_model_sequential()
newModel %>%
layer_lstm(batch_input_shape = c(batchSize, 30, 19), units = 72, return_sequences = TRUE, stateful = TRUE, dropout = dropout, recurrent_dropout = dropout) %>%
#layer_dense(units = 20) %>%
#layer_lstm(units = 50, return_sequences = TRUE, stateful = TRUE, dropout = dropout, recurrent_dropout = dropout) %>%
layer_lstm(units = 16, dropout = dropout, recurrent_dropout = dropout, return_sequences = FALSE, stateful = TRUE) %>%
layer_dense(units = 8) %>%
layer_batch_normalization() %>%
layer_dense(units = 1, activation = 'relu')
newModel %>% compile(
optimizer = optimizer_rmsprop(lr = 0.001),
loss = 'binary_crossentropy',
metrics = c('accuracy')
)
#X_conv <- matrix(c(X[1,1:10],X[1,11:20]),ncol=10,nrow=2)
ar <- array(X,c(dim(X)[1],30,19))
#newModel %>% fit(X, Y, epochs=20, batch_size=100, validation_split = 0.2, shuffle=TRUE, callbacks=reduce_lr)
newModel %>% fit(ar, Y, epochs=100, batch_size=batchSize, validation_split = 0.2, shuffle=FALSE)
Models[[model]] <<- serialize_model(newModel, include_optimizer = TRUE)
}

Resources