I'm trying to perform 1D-CNN on a small dataset, which has only 3000+ samples. Lookback parameter is set to 7 for the generator. R session aborted after few seconds of training.
However, when the lookback parameter is set to any value other than 7 (such as 5, 9 or 10), the 1D-CNN model can work! I also try GRU, LSTM, and fully-connected layers with the lookback parameter set to 7, everything works fine.
My environment are as fallows: R 3.6.1, Rstudio 1.5.5001, Tensorflow 2.0.0, Keras 2.2.5.0.
The codes is shown below:
lookback <- 7
step <- 1
delay <- 1
batch_size <- 32
nbfeature <- 31
generator_WeeklyTotal <- function(data, lookback, delay, min_index, max_index,
shuffle = FALSE, batch_size, step = 1) {
if (is.null(max_index)) max_index <- nrow(data) - delay - 1
i <- min_index + lookback
function() {
if (shuffle) {
rows <- sample(c((min_index+lookback):max_index), size = batch_size)
} else {
if (i + batch_size >= max_index)
i <<- min_index + lookback
rows <- c(i:min(i+batch_size, max_index))
i <<- i + length(rows)
}
samples <- array(0, dim = c(length(rows),
lookback / step,
nbfeature))
targets <- array(0, dim = c(length(rows)))
for (j in 1:length(rows)) {
indices <- seq(rows[[j]] - lookback, rows[[j]],
length.out = dim(samples)[[2]])
samples[j,,] <- data[indices,60:90]
targets[[j]] <- data[rows[[j]] + delay,6]
}
list(samples, targets)
}
}
train_gen <- generator_WeeklyTotal(
data = DLtrain,
lookback = lookback,
delay = delay,
min_index = 1+lookback,
max_index = 2922+lookback,
shuffle = FALSE,
step = step,
batch_size = batch_size
)
val_gen = generator_WeeklyTotal(
data = DLtrain,
lookback = lookback,
delay = delay,
min_index = 2923+lookback,
max_index = 3287+lookback,
step = step,
batch_size = batch_size
)
test_gen <- generator_WeeklyTotal(
data = DLtrain,
lookback = lookback,
delay = delay,
min_index = 3288+lookback,
max_index = 3652+lookback,
step = step,
batch_size = batch_size
)
val_steps <- (3287+lookback - 2923+lookback - lookback) / batch_size
test_steps <- (3652+lookback - 3288+lookback - lookback) / batch_size
model_conv1d <- keras_model_sequential() %>%
layer_conv_1d(filters = 64, kernel_size = 5, activation = "relu", padding = "same",
input_shape = list(NULL, nbfeature)) %>%
layer_max_pooling_1d(pool_size = 3) %>%
layer_conv_1d(filters = 64, kernel_size = 5, activation = "relu",padding = "same") %>%
layer_max_pooling_1d(pool_size = 3) %>%
layer_conv_1d(filters = 64, kernel_size = 5, activation = "relu",padding = "same") %>%
layer_global_max_pooling_1d() %>%
layer_dense(units = 1)]
I wonder why R session aborted when I set the lookback parameter to be 7 but works with other value? Is there any way to solve this problem because it is necessary to set lookback to be 7 for the purpose of predicting by using weekly data.
Related
I want to train a regression model by "keras_model_sequential" in R. For finding the best parameters over the grid search I have used "tuning_run". But I got this error:
training run 1/128 (flags = list(0.05, 66, "relu", 8, 10, 0.001, 0.2))
Error in sink(file = output_file, type = "output", split = TRUE) :
sink stack is full
Calls: tuning_run ... with_changed_file_copy -> force -> sink -> .handleSimpleError -> h
I need to mention, that a folder named "runs" was created in the data and script path which has a lot of subfolders whose name is like a date format. maybe this is the reason.
library(plyr)
library(boot)
library(keras)
library(tensorflow)
library(kerasR)
library(tidyverse)
library(tfruns)
library(MLmetrics)
df= mainlist[[1]] # data which is a 33*31 dataframe (33 samples and 31 features which last column is target)
x = (length(df))-1
print(x)
df1 = df[, 2:x]
#normalization
df2 = df[, 2:length(df)]
normalize <- function(x) {
return ((x - min(x)) / (max(x) - min(x)))
}
maxmindf <- as.data.frame(lapply(df2, normalize))
attach(maxmindf)
df_norm<-as.matrix(maxmindf)
# Determine sample size
ind <- sample(2, nrow(df_norm), replace=TRUE, prob=c(0.80, 0.20))
# Split the data(peaks)
training <- df_norm[ind==1, 1:ncol(df_norm)-1]
test1 <- df_norm[ind==2, 1:ncol(df_norm)-1]
training_target <- df_norm[ind==1, ncol(df_norm)]
test1_target <- df_norm[ind==2, ncol(df_norm)]
#number of nodes in the first hidden layer
u1_1 = ceiling((1/2) * (ncol(training)+1))
u2_1 = ceiling(1* (ncol(training)+1))
u3_1 = ceiling((2/3) * (ncol(training)+1))
u4_1 = ceiling(2*(ncol(training)))
####a) Declaring the flags for hyperparameters
FLAGS = flags(
flag_numeric("dropout1", 0.05),
flag_integer("units",u1_1),
flag_string("activation1", "relu"),
flag_integer("batchsize1",8),
flag_integer("Epoch1",50),
flag_numeric("learning_rate", 0.01),
flag_numeric("val_split",0.2),
flag_numeric("reg_l1",0.001)
)
# ####b) Defining the DNN model
build_model<-function() {
model <- keras_model_sequential()
model %>%
layer_dense(units = FLAGS$units, activation = FLAGS$activation1, input_shape = c(dim(training)[2])) %>%
layer_dropout(rate = FLAGS$dropout1) %>%
layer_dense(units=1, activation ="linear")
#####c) Compiling the DNN model
model %>% compile(
loss = "mse",
optimizer =optimizer_adam(FLAGS$learning_rate),
metrics = c("mse"))
model
}
model<-build_model()
model %>% summary()
print_dot_callback <- callback_lambda(
on_epoch_end = function(epoch, logs) {
if (epoch %% 80 == 0) cat("\n")
cat(".")})
early_stop <- callback_early_stopping(monitor = "val_loss", mode='min',patience =20)
###########d) Fitting the DNN model#################
model_Final<-build_model()
model_fit_Final<-model_Final %>% fit(
training,
training_target,
epochs =FLAGS$Epoch1, batch_size = FLAGS$batchsize1,
shuffled=F,
validation_split = FLAGS$val_split,
verbose=0,
callbacks = list(early_stop, print_dot_callback)
)
################a) Inner cross-validation##########################
nCVI=5
Hyperpar = data.frame() #the results of each combination of hyperparameters resulting from each inner partition will be saved
for (i in 1:nCVI){ #do it to choose best parameters
print("I is:")
print(i)
Sam_per=sample(1:nrow(training),nrow(training))
X_trII=training[Sam_per,]
y_trII=training_target[Sam_per]
# print(head(X_trII, 3))
print("----------------------")
print(head(y_trII,3))
############b) Grid search using the tuning_run() function of tfruns package########
runs.sp<-tuning_run(paste0("train.R")
,runs_dir = '_tuningE1'
,flags=list(dropout1 = c(0,0.05),
units = c(u1_1, u2_1),
activation1 = ("relu"),
batchsize1 = c(8, 16),
Epoch1 = c(10,50),
learning_rate = c(0.001),
val_split = c(0.2)),
sample = 0.2,
confirm = FALSE,
echo =F)
# clean_runs(ls_runs(completed == FALSE))
#####c) Saving each combination of hyperparameters in the Hyperpar data.frame
runs.sp = runs.sp[order(runs.sp$flag_units,runs.sp$flag_dropout1, runs.sp$flag_batchsize1, runs.sp$flag_Epoch1),]
runs.sp$grid_length = 1:nrow(runs.sp) #we save the grid lenght and also important parameters
Parameters = data.frame(grid_length=runs.sp$grid_length,
metric_val_mse=runs.sp$metric_val_mse,
flag_dropout1=runs.sp$flag_dropout1,
flag_units=runs.sp$flag_units,
flag_batchsize1=runs.sp$flag_batchsize1,
epochs_completed=runs.sp$epochs_completed,
flag_learning_rate=runs.sp$flag_learning_rate,
flag_activation1=runs.sp$flag_activation1)
Hyperpar = rbind(Hyperpar,data.frame(Parameters)) #we saved the important parameters
}
#####d) Summarizing the five inner fold by hyperparameter combination
#the average prediction performance is obtained for each inner fold
Hyperpar %>%
group_by(grid_length) %>%
summarise(val_mse=mean(metric_val_mse),
dropout1=mean(flag_dropout1),
units=mean(flag_units),
batchsize1=mean(flag_batchsize1),
learning_rate=mean(flag_learning_rate),
epochs=mean( epochs_completed)) %>%
select(grid_length,val_mse,dropout1,units,batchsize1,
learning_rate, epochs) %>%
mutate_if(is.numeric, funs(round(., 3)))
Hyperpar_Opt = Hyperpar
######e) ############ select the best combinition of hyperparameters
Min = min(Hyperpar_Opt$val_mse)
pos_opt = which(Hyperpar_Opt$val_mse==Min)
pos_opt=pos_opt[1]
Optimal_Hyper=Hyperpar_Opt[pos_opt,]
#####Selecting the best hyperparameters
Drop_O = Optimal_Hyper$dropout1
Epoch_O = round(Optimal_Hyper$epochs,0)
Units_O = round(Optimal_Hyper$units,0)
activation_O = unique(Hyperpar$flag_activation1)
batchsize_O = round(Optimal_Hyper$batchsize1,0)
lr_O = Optimal_Hyper$learning_rate
print_dot_callback <- callback_lambda(
on_epoch_end = function(epoch, logs) {
if (epoch %% 20 == 0) cat("\n")
cat(".")})
#refitting the model with optimal values
model_Sec<-keras_model_sequential()
model_Sec %>%
layer_dense(units =Units_O , activation =activation_O, input_shape =
c(dim(training)[2])) %>%
layer_dropout(rate =Drop_O) %>%
layer_dense(units =1, activation =activation_O)
model_Sec %>% compile(
loss = "mean_squared_error",
optimizer = optimizer_adam(lr=lr_O),
metrics = c("mean_squared_error"))
# fit the model with our data
ModelFited<-model_Sec %>% fit(
X_trII, y_trII,
epochs=Epoch_O, batch_size =batchsize_O, #####validation_split=0.2,
early_stop,
verbose=0
,callbacks=list(print_dot_callback)
)
#############g) Prediction of testing set ##########################
Yhat=model_Sec%>% predict(test1)
y_p=Yhat
y_p_tst =as.numeric(y_p)
#y_tst=y[tst_set]
plot(test1_target,y_p_tst)
MSE=mean((test1_target - y_p_tst)^2)
Do you have any ideas?
Thanks in advance.
I have a dataset similar to MNIST (200.000 rows of 784 pixels + 1 categorical output (785 columns)) and I want to train a MLP and a CNN using the Keras library in R (in RStudio). I'm currently using a computer with 32 GB of RAM with an intel i7-8700 #3.2 GHz and I have not any problem when training this neural networks using the fit() function from Keras (training time ~4 minutes). However, when I execute the same script in my laptop (8 GB of RAM with intel i5-6300 #2.3 GHz) it can not even do an epoch in least than 10 minutes.
I work as a laboratory professor in university and I'm worried that my students could not run the script with that database in their laptops due to lack of computation power. My idea was to fit the same models using a generator with the fit_generator() function, and load part of the dataset instead of the whole dataset in each call to the generator function (in order to use less memory than loading the entire dataset and resulting in a faster training). However, this produce some unexpected results. The accuracy reached by the fit() function is ~98.8% in training (120.000 rows) and ~98.4% in test (80.000 rows), but using the fit_generator() function is ~1.05% in the same training and ~1.01% in the same test. I have found related issues here, here, here and here, and it seems that the problem is that fit_generator() does not shuffle the training data and this produce that the network is always trained with the same batches (e.g. same gradient when computing backpropagation) and does not represent well the entire dataset, resulting in that poor accuracy. I've trained the model using fit() but setting the shuffle argument to FALSE, and the accuracy falls to 0.1% so it confirms that shuffling the training data is crucial to train the model.
My questions are:
Is it a good idea to use a generator to avoid problems/reduce training time when using a computer with lower specifications or is there a better solution?
I'm training the models using the entire dataset by setting steps_per_epoch argument equal to ceil(nrow(train_dataset)/batch_size), so it should use the same data when using fit() and fit_generator() except from the "shuffling" part right?
In case that using a generator that loads part of the dataset is a good solution to train the models with low-spec computers, how can i shuffle the training data in an effective way using a generator?
All the generators i've seen takes the entire dataset and produce a batch of the samples in each call or does not shuffle the data. I have created a generator with the code below. It takes as arguments datafile the file with the data (training data or test data), batch_size the size of the batch to produce in each call, mlp in order to process the data to train an MLP or a CNN, val in order to begin to produce batches in different index for validation data, and shuffle to indicate if we want to shuffle the data or not. My idea to shuffle the data was to create a random index and read only one row of the file for each number in the index (using the skip and nrow arguments in read.table()). This is extremely unefficient due to the several calls to read.table():
data_generator <- function(datafile, batch_size = 128, mlp = TRUE, val = TRUE, shuffle = TRUE) {
nrow_file <- R.utils::countLines(datafile) - 1
if (!val) {
skip <- 0
} else {
skip <- nrow_file / 2
}
function() {
# Calculate the rows to read in this epoch
rows_to_read <- batch_size
if (skip + batch_size > nrow_file) {
rows_to_read <- nrow_file - skip
}
if (shuffle) {
index <- sample (c(1:nrow_file), size=batch_size, replace =F)
} else {
index <- (skip + 1):(skip + rows_to_read)
}
# Load only the rows that we want to use in training
trData <- as.list(numeric(batch_size))
for(i in index) {
ii <- i - 1
trData[[which(i == index)]] <- read.table(datafile, sep = ";", header = TRUE,
skip = ii, nrows = 1)
}
trData <- do.call("rbind",trData)
# Upload the rows to train
skip <<- skip + batch_size
if (skip >= nrow_file) {
skip <<- 0
}
# Build inputs and output
y_train <- trData[,1]
x_train <- trData[,-1]
if (mlp) {
# Return data as is for mlp
list(data.matrix(x_train), data.matrix(y_train))
} else {
# Return data reshaped for CNN
list(array_reshape(data.matrix(x_train), c(nrow(x_train), 28, 28, 1)),
data.matrix(y_train))
}
}
}
The code I've used to train the MLP model (analogous to CNN) is:
Without generator
MLP_model <- keras_model_sequential()
MLP_model %>%
layer_dense(units = 500, activation = 'relu', input_shape = c(784),
kernel_regularizer = regularizer_l2(l = 0.0001),
bias_regularizer = regularizer_l2(l = 0.0001)) %>%
layer_dropout(rate = 0.4, seed = 150) %>%
layer_batch_normalization() %>%
layer_dense(units = 300, activation = 'relu',
kernel_regularizer = regularizer_l2(l = 0.001),
bias_regularizer = regularizer_l2(l = 0.001)) %>%
layer_dropout(rate = 0.3, seed = 150) %>%
layer_batch_normalization() %>%
layer_dense(units = 10, activation = 'softmax',
kernel_regularizer = regularizer_l2(l = 0.001),
bias_regularizer = regularizer_l2(l = 0.001))
MLP_model %>% compile(
loss = loss_categorical_crossentropy,
optimizer = optimizer_adam(),
metrics = c('accuracy')
)
history <- MLP_model %>% fit(
x_train_mlp, y_train,
epochs = 20, batch_size = 124,
validation_split = 0.2,
shuffle = TRUE
)
With generator:
MLP_model <- keras_model_sequential()
MLP_model %>%
layer_dense(units = 500, activation = 'relu', input_shape = c(784),
kernel_regularizer = regularizer_l2(l = 0.0001),
bias_regularizer = regularizer_l2(l = 0.0001)) %>%
layer_dropout(rate = 0.4, seed = 150) %>%
layer_batch_normalization() %>%
layer_dense(units = 300, activation = 'relu',
kernel_regularizer = regularizer_l2(l = 0.001),
bias_regularizer = regularizer_l2(l = 0.001)) %>%
layer_dropout(rate = 0.3, seed = 150) %>%
layer_batch_normalization() %>%
layer_dense(units = 10, activation = 'softmax',
kernel_regularizer = regularizer_l2(l = 0.001),
bias_regularizer = regularizer_l2(l = 0.001))
MLP_model %>% compile(
loss = loss_categorical_crossentropy,
optimizer = optimizer_adam(),
metrics = c('accuracy')
)
history <- MLP_model %>% fit_generator(
data_generator(traindatafile,
batch_size = 128, mlp = TRUE, val = FALSE),
steps_per_epoch = round((R.utils::countLines(traindatafile)-1) / (128)),
epochs = 10)
Thanks in advance!
answering my own question and thanks to #user12728748 for the comments, I change the generator to read random samples from the file:
data_generator <- function(datafile, batch_size = 128,
mlp, val,
shuffle = TRUE, validation_split = 0) {
nrow_file <- py_eval(paste("sum(1 for line in open('", datafile, "'))", sep = '')) - 1
skip <- 0
if (val) {
nrow_file <- validation_split * nrow_file
} else {
nrow_file <- (1 - validation_split) * nrow_file
}
if (nrow_file > 0) {
function() {
# Calculate the rows to read in this epoch
rows_to_read <- batch_size
if (skip + batch_size > nrow_file) {
rows_to_read <- nrow_file - skip
}
if (shuffle) {
index <- sample (c(1:nrow_file), size=batch_size, replace =F)
} else {
index <- (skip + 1):(skip + rows_to_read)
}
# Create rows to skip
if (val) {
# in validation, skip training rows and validation rows that are not found in index
rows_to_skip <- c(1:ifelse(validation_split > 0,((1 - validation_split) * nrow_file / validation_split),1),
setdiff(1:nrow_file, index) + (1 - validation_split) * nrow_file / validation_split)
} else {
# in training, skip validation rows and training rows that are not found in index
rows_to_skip <- c(ifelse(validation_split > 0,
nrow_file + 1, 0):ifelse(validation_split > 0,
nrow_file/(1 - validation_split), 0),
setdiff(1:nrow_file, index))
if (rows_to_skip[1] == 0) rows_to_skip <- rows_to_skip[-1]
}
trData <- import("pandas")$read_csv(datafile,
skiprows = rows_to_skip,
sep = ";")
# Upload the rows to train
skip <<- skip + batch_size
if (skip >= nrow_file) {
skip <<- 0
}
# Build inputs and output
y_train <- to_categorical(trData[,1], num_classes = 10)
x_train <- trData[,-1]
if (mlp) {
# Return data as is for mlp
list(data.matrix(x_train), data.matrix(y_train))
} else {
# Return data reshaped for CNN
list(array_reshape(data.matrix(x_train), c(nrow(x_train), 28, 28, 1)),
data.matrix(y_train))
}
}
} else {
NULL
}
}
I have added the validation_split argument to set the percentage of the training data that we want to use as validation.
However, using the generator to train the model does not seem to improve training time when using the lowspec computer, but it uses almost 4 GB less of RAM (especially when training with small batches (~128 samples)) and you can use the computer to perform other tasks while executing the code without crashing the program.
Here I leave you the code to train and evaluate a MLP model using the generator:
MLP_model <- keras_model_sequential()
MLP_model %>%
layer_dense(units = 500, activation = 'relu', input_shape = c(784),
kernel_regularizer = regularizer_l2(l = 0.0001),
bias_regularizer = regularizer_l2(l = 0.0001)) %>%
layer_dropout(rate = 0.4, seed = 150) %>%
layer_batch_normalization() %>%
layer_dense(units = 300, activation = 'relu',
kernel_regularizer = regularizer_l2(l = 0.001),
bias_regularizer = regularizer_l2(l = 0.001)) %>%
layer_dropout(rate = 0.3, seed = 150) %>%
layer_batch_normalization() %>%
layer_dense(units = 10, activation = 'softmax',
kernel_regularizer = regularizer_l2(l = 0.001),
bias_regularizer = regularizer_l2(l = 0.001))
MLP_model %>% compile(
loss = loss_categorical_crossentropy,
optimizer = optimizer_adam(),
metrics = c('accuracy')
)
validation_split <- 0.2
history <- MLP_model %>% fit_generator(
data_generator(traindatafile,
batch_size_train, mlp = TRUE,
val = FALSE, validation_split = validation_split),
steps_per_epoch = round((1 - validation_split) * (py_eval(paste("sum(1 for line in open('", traindatafile, "'))", sep = '')) - 1) / (batch_size_train)),
validation_data = data_generator(traindatafile,
batch_size_train, mlp = TRUE,
val = TRUE, validation_split = validation_split),
validation_steps = round((validation_split) * (py_eval(paste("sum(1 for line in open('", traindatafile, "'))", sep = '')) - 1) / (batch_size_train)),
callbacks = c(early_stopping),
epochs = 10)
MLP_metrics_train <- MLP_model %>%
evaluate_generator(data_generator(traindatafile,
batch_size_eval,
mlp = TRUE,
val = FALSE,
shuffle = FALSE),
steps = ceiling((py_eval(paste("sum(1 for line in open('", traindatafile, "'))", sep = '')) - 1) / (batch_size_eval)))
MLP_metrics_test <- MLP_model %>%
evaluate_generator(data_generator(testdatafile,
batch_size_eval,
mlp = TRUE,
val = FALSE,
shuffle = FALSE),
steps = ceiling((py_eval(paste("sum(1 for line in open('", testdatafile, "'))", sep = '')) - 1) / (batch_size_eval)))
y_pred_mlp <- MLP_model %>%
predict_generator(data_generator(testdatafile,
batch_size_eval,
mlp = TRUE,
val = FALSE,
shuffle = FALSE),
steps = ceiling((py_eval(paste("sum(1 for line in open('", testdatafile, "'))", sep = '')) - 1) / (batch_size_eval)))
Hope this helps someone!
I'm using the R interface to Keras (for the 1st time) with a timeseries_generator per the reprex below. How do I get fit_generator to complete all epochs?
I'm looking at custom generators as a possible alternative, but would ideally like to use the timeseries_generator assuming it is appropriate to my intended usage.
The intent is to predict the actual values several steps ahead by using the "feature" and three lagged copies of the actuals as predictors.
I'm assuming that differencing is not required since the feature is expected to indicate trend.
library(tidyverse)
library(keras)
# Function to generate multiple lags
# See: https://gist.github.com/romainfrancois/469ed8224ba4be48befec28cb1e1ff80
many_lags <- function(var, n = 10) {
var <- enquo(var)
indices <- seq_len(n)
map(indices, ~ quo(lag(!!var, !!.x))) %>%
set_names(sprintf("lag_%s_%02d", rlang::quo_text(var), indices))
}
# --- Simulate time-series
set.seed(42)
lags <- 3
samples <- 1000
ts <- tibble(time = 1:(1000 + lags)) %>%
mutate(
actual = seq(0.5, (1000 + lags) / 2, by = 0.5) + arima.sim(model = list(
ar = c(0.2, 0.3),
ma = 0.2,
order = c(2, 1, 1)
), 999 + lags) %>% as.double(),
feature = seq(0.5, (1000 + lags) / 2, by = 0.5)
) %>%
mutate(!!!many_lags(actual, lags)) %>%
tail(samples)
ts %>% ggplot(aes(time, actual)) +
geom_line()
# --- Centre & scale predictors
test_window <- 200
temp <- ts %>%
head(-test_window)
col_mean <- map_dbl(temp, mean) %>% replace(c(1:2), 0)
col_sd <- map_dbl(temp, sd) %>% replace(c(1:2), 1)
train_data <- ts %>%
head(-test_window) %>%
scale(col_mean, col_sd)
test_data <- ts %>%
tail(test_window) %>%
scale(col_mean, col_sd)
# --- Create generator objects
batch_size = 5
length <- 5 # forecast timesteps
train_gen <- timeseries_generator(
train_data[, 3:6],
targets = train_data[, 2],
length = length,
sampling_rate = 1,
stride = 1,
start_index = 1,
end_index = 600,
shuffle = FALSE,
reverse = FALSE,
batch_size = batch_size
)
val_gen <- timeseries_generator(
train_data[, 3:6],
targets = train_data[, 2],
length = length,
sampling_rate = 1,
stride = 1,
start_index = 601,
end_index = 800,
shuffle = FALSE,
reverse = FALSE,
batch_size = batch_size
)
test_gen <- timeseries_generator(
test_data[, 3:6],
targets = test_data[, 2],
length = length,
sampling_rate = 1,
stride = 1,
start_index = 1,
end_index = 200,
shuffle = FALSE,
reverse = FALSE,
batch_size = batch_size
)
# --- Train the model
model <- keras_model_sequential() %>%
layer_gru(
units = ncol(train_data[, 3:6]),
batch_size = batch_size,
input_shape = list(NULL, ncol(train_data[, 3:6]))
) %>%
layer_dense(units = 1)
summary(model)
#> Model: "sequential"
#> ___________________________________________________________________________
#> Layer (type) Output Shape Param #
#> ===========================================================================
#> gru (GRU) (5, 4) 108
#> ___________________________________________________________________________
#> dense (Dense) (5, 1) 5
#> ===========================================================================
#> Total params: 113
#> Trainable params: 113
#> Non-trainable params: 0
#> ___________________________________________________________________________
model %>% compile(loss = "mse",
optimizer = optimizer_rmsprop(),
metrics = list("mae"))
history <- model %>%
fit_generator(
train_gen,
steps_per_epoch = floor((train_gen$end_index - train_gen$start_index + 1) / batch_size),
epochs = 100,
validation_data = val_gen,
validation_steps = floor((val_gen$end_index - val_gen$start_index + 1) / batch_size)
)
Created on 2019-09-12 by the reprex package (v0.3.0)
I get the following messages:
2019-09-12 15:42:44.613462: I **tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA**
Epoch 1/100
119/119 [==============================] - 1s 10ms/step - loss: 20351.4386 - mean_absolute_error: 120.7280 - val_loss: 77980.3295 - val_mean_absolute_error: 279.1719
Epoch 2/100
**WARNING:tensorflow:Your dataset iterator ran out of data; interrupting training. Make sure that your iterator can generate at least `steps_per_epoch * epochs` batches** (in this case, 11900 batches). You may need touse the repeat() function when building your dataset.
0/119 [..............................] - ETA: 0s - loss: 0.0000e+00 - mean_absolute_error: 120.7280 - val_loss: 0.0000e+00 - val_mean_absolute_error: 0.0000e+00
I built a Variational Autoencoder using Keras in R,
I noticed that if I train the model on regular R session all work great,
but when I train the model on Shiny session it goes terribly wrong:
When Shiny session gets to the line which train the model:
history<- vae %>% fit(
x_train, x_train,
shuffle = TRUE,
epochs = 25,
batch_size = batch_size,
validation_data = list(x_test, x_test)
)
there is no feedback on epoch and the all computer get stuck.
(I don't get any errors just crushing computer)
Is there special configuration to set when using Keras on Shiny?
Edit:
I use the Variational Autoencoder for dimensionality reduction,
here is my function, I use it from shiny server:
get_variational_autoencoder<- function(data_set,
reduce_to = 2){
library(keras)
use_condaenv("r-tensorflow")
# Data preparation ---------------------------------
row_names<- row.names(data_set)
data_set<- normalize(data_set)
row.names(data_set)<- row_names
partition<- data_partition(data_set, .80)
x_train <- partition["train_set"]$train_set
x_test <- partition["test_set"]$test_set
# Parameters ---------------------------------------
batch_size = DEAFULT_BATCH_SIZE_FOR_AUTOANCODER
original_dim = dim(data_set)[2]
latent_dim = reduce_to
intermediate_dim = (2/3)*original_dim + latent_dim
nb_epoch = DEAFULT_NUMBER_OF_EPOCH_FOR_AUTOANCODER
epsilon_std = DEAFULT_EPSILON_FOR_AUTOANCODER
#encoder
# input layer
x <- layer_input(shape = c(original_dim))
# hidden intermediate, lower-res
h <- layer_dense(x, intermediate_dim, activation = "relu")
# latent var 1, 2-dim (mainly for plotting!): mean
z_mean <- layer_dense(h, latent_dim)
# latent var 2, 2-dim: variance
z_log_var <- layer_dense(h, latent_dim)
sampling <- function(arg){
z_mean <- arg[, 1:(latent_dim)]
z_log_var <- arg[, (latent_dim + 1):(2 * latent_dim)]
epsilon <- k_random_normal(
shape = c(k_shape(z_mean)[[1]]),
mean=0.,
stddev=epsilon_std
)
z_mean + k_exp(z_log_var/2)*epsilon
}
z <- layer_concatenate(list(z_mean, z_log_var)) %>%
layer_lambda(sampling)
# hidden intermediate, higher-res
decoder_h <- layer_dense(units = intermediate_dim, activation = "relu")
# decoder for the mean, high-res again
decoder_mean <- layer_dense(units = original_dim, activation = "sigmoid")
h_decoded <- decoder_h(z)
x_decoded_mean <- decoder_mean(h_decoded)
# the complete model, from input to decoded output
vae <- keras_model(x, x_decoded_mean)
# encoder, from inputs to latent space
encoder <- keras_model(x, z_mean)
# generator, from latent space to reconstructed inputs
decoder_input <- layer_input(shape = latent_dim)
h_decoded_2 <- decoder_h(decoder_input)
x_decoded_mean_2 <- decoder_mean(h_decoded_2)
generator <- keras_model(decoder_input, x_decoded_mean_2)
vae_loss <- function(x, x_decoded_mean){
xent_loss <- (original_dim/1.0)*loss_binary_crossentropy(x,
x_decoded_mean)
kl_loss <- -0.5*k_mean(1 + z_log_var - k_square(z_mean) -
k_exp(z_log_var), axis = -1L)
xent_loss + kl_loss
}
vae %>% compile(optimizer = "rmsprop", loss = vae_loss, metrics =
c('accuracy'))
# Model training ---------------------------------------------------------
history<- vae %>% fit(
x_train, x_train,
shuffle = TRUE,
epochs = 25,
batch_size = batch_size,
validation_data = list(x_test, x_test)
)
data_set_after_vae <- keras::predict(encoder, data_set, batch_size =
batch_size)
vae_result<- list ("v_autoencoder" = vae,
"data_set_after_vae" = data_set_after_vae %>%
as_data_frame(),
"history" = history,
"encoder" = encoder)
return (vae_result)
}
I have been reading Deep Learning with R, and in chapter 6, generators are introduced. The following is the generator that yields (samples, outputs) which has no issue when used in fit_generator or evaluate_generator:
generator <- function(data, lookback, delay, min_index, max_index,
shuffle = FALSE, batch_size = 60, step = 1) {
if (is.null(max_index))
max_index <- nrow(data) - delay - 1
i <- min_index + lookback
function() {
if (shuffle) {
rows <- sample(c( (min_index+lookback) : max_index ), size = batch_size)
} else {
if (i + batch_size >= max_index)
i <<- min_index + lookback
rows <- c(i : min(i + batch_size - 1, max_index))
rows
length(rows)
i <<- i + length(rows)
}
samples <- array(0, dim = c(length(rows),
lookback / step,
dim(data)[[-1]]))
targets <- array(0, dim = c(length(rows)))
for (j in 1:length(rows)) {
indices <- seq(rows[[j]] - lookback, rows[[j]],
length.out = dim(samples)[[2]])
samples[j,,] <- data[indices,]
targets[[j]] <- data[rows[[j]] + delay, 9]
}
list(samples, targets)
}
}
test_gen <- generator(
data,
lookback = lookback,
delay = delay,
min_index = validation_index+1,
max_index = NULL,
step = step,
batch_size = batch_size
)
## no issues here
test_steps <- (nrow(data) - validation_index+1 - lookback) / batch_size
perf <- my_model %>% evaluate_generator(test_gen, steps = test_steps)
however, when trying to change the generator to only yield samples:
generator_pred <- function(data, lookback, delay, min_index, max_index,
shuffle = FALSE, batch_size = 60, step = 1) {
if (is.null(max_index))
max_index <- nrow(data) - delay - 1
i <- min_index + lookback
function() {
if (shuffle) {
rows <- sample(c( (min_index+lookback) : max_index ), size = batch_size)
} else {
if (i + batch_size >= max_index)
i <<- min_index + lookback
rows <- c(i : min(i + batch_size - 1, max_index))
rows
length(rows)
i <<- i + length(rows)
}
samples <- array(0, dim = c(length(rows),
lookback / step,
dim(data)[[-1]]))
for (j in 1:length(rows)) {
indices <- seq(rows[[j]] - lookback, rows[[j]],
length.out = dim(samples)[[2]])
samples[j,,] <- data[indices,]
}
samples
}
}
test_gen_pred <- generator_pred(
data,
lookback = lookback,
delay = delay,
min_index = validation_index+1,
max_index = NULL,
step = step,
batch_size = batch_size
)
test_steps <- (nrow(data) - validation_index+1 - lookback) / batch_size
predict_generator(my_model, test_gen_pred, steps = test_steps)
I get an error about the equivalent python generator returning an array that is used in a comparison:
Exception in thread Thread-1064:
Traceback (most recent call last):
File "C:\Users\PBORDE~1\AppData\Local\CONTIN~1\ANACON~1\envs\R-TENS~1\lib\threading.py", line 916, in _bootstrap_inner
self.run()
File "C:\Users\PBORDE~1\AppData\Local\CONTIN~1\ANACON~1\envs\R-TENS~1\lib\threading.py", line 864, in run
self._target(*self._args, **self._kwargs)
File "C:\Users\PBORDE~1\AppData\Local\CONTIN~1\ANACON~1\envs\R-TENS~1\lib\site-packages\keras\utils\data_utils.py", line 579, in data_generator_task
generator_output = next(self._generator)
File "C:/Users/pbordeaux/Documents/R/win-library/3.4/reticulate/python\rpytools\generator.py", line 23, in __next__
return self.next()
File "C:/Users/pbordeaux/Documents/R/win-library/3.4/reticulate/python\rpytools\generator.py", line 39, in next
if (res == self.completed):
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
I read that the generator must return the same object that predict_on_batch takes as input. I ran the following successfully:
test_gen_pred <- generator_pred(
data,
lookback = lookback,
delay = delay,
min_index = validation_index+1,
max_index = NULL,
step = step,
batch_size = batch_size
)
t <- test_gen_pred()
predict_on_batch(my_model, t)
Is the generator interface not being implemented correctly? I checked that when test_gen_pred() is called that it returns a tensor with the correct shape, and it does, as I can call predict_on_batch successfully with a call from it.
You can also use series_generator() from kerasgenerator package, which provide return_target option if you want to use it for prediction.
Some quick example:
Make some supervised settings first:
# example data
data <- data.frame(
x = runif(80),
y = runif(80),
z = runif(80)
)
# variables
x <- c("x", "y")
y <- 2:3
# supervise settings
lookback <- 10
timesteps <- 10
# number of train sample
train_length <- 40
# data settings
batch_size <- 10
# train row indices
train_end <- nrow(data)
train_start <- train_end - train_length + 1
# number of steps to see full data
train_steps <- train_length / batch_size
Then you could define the generators like this:
# import libs
library(kerasgenerator)
# train generator
train_gen <- series_generator(
data = data,
y = y,
x = x,
lookback = lookback,
timesteps = timesteps,
start_index = train_start,
end_index = train_end,
batch_size = batch_size,
return_target = TRUE
)
# predict generator
predict_gen <- series_generator(
data = data,
y = y,
x = x,
lookback = lookback,
timesteps = timesteps,
start_index = train_start,
end_index = train_end,
batch_size = batch_size,
return_target = FALSE
)
Let's try the data generators on an example model:
# import libs
library(keras)
# initiate a sequential model
model <- keras_model_sequential()
# define the model
model %>%
# layer lstm
layer_lstm(
name = "lstm",
input_shape = list(timesteps, length(x)),
units = 16,
dropout = 0.1,
recurrent_dropout = 0.1,
return_sequences = FALSE
) %>%
# layer output
layer_dense(
name = "output",
units = length(y)
)
# compile the model
model %>% compile(
optimizer = "rmsprop",
loss = "mse"
)
# model summary
summary(model)
# set number of epochs
epochs <- 10
# model fitting
history <- model %>% fit_generator(
generator = train_gen,
steps_per_epoch = train_steps,
epochs = epochs
)
# history plot
plot(history)
# evaluate on train dataset
model %>% evaluate_generator(
generator = train_gen,
steps = train_steps
)
# predict on train dataset
model %>% predict_generator(
generator = predict_gen,
steps = train_steps
)
If you interested in forecasting, it also provide forecast_generator(). See the vignettes for full example.
Disclaimer: I'm the author of the package.
I have been looking for the exact same answer for a few days now, finally got it to work by making my pred_generator returning a list (instead of samples directly)!
In your case:
generator_pred <- function(data, lookback, delay, min_index, max_index,
shuffle = FALSE, batch_size = 60, step = 1) {
<...>
list(samples)
}
}