partition of data
set.seed(1234)
ind <- sample(2, nrow(bronx_data), replace = T, prob = c(.7,.3))
train <- bronx_data[ind==1,2:11]
test <- bronx_data[ind==2,2:11]
train_target <- bronx_data[ind==1,1]
test_target <- bronx_data[ind==2,1]
normalize my data**
m <- colMeans(train)
s <- apply(train, 2, sd)
train <- scale(train, center = m, scale = s)
test <- scale(test, center = m, scale = s) # use same mean and sd obtained form train data
Model
This is my model
library(keras)
model <- keras_model_sequential()
model %>%
layer_dense(units = 5, activation = 'relu', input_shape = c(10)) %>%
layer_dense(units = 1)
I get good output but the problem I am having is un-scaling the data. Someone please HELP. I am new coder.
I've tried
unscale(vals, norm.data, col.ids)
and got the following error
Error in scale.default(data, center = FALSE, scale = 1/scale) : length of 'scale' must equal the number of columns of 'x'
Related
I'm learning keras, and would like to see the predicted numbers that are returned. The model has a number of items returned, but none of them seem to be the predicted values.
df <- MASS::Boston
index <- sample(c(TRUE, FALSE), nrow(df), replace=TRUE, prob=c(0.7,0.3))
train_features <- Boston[index,]
test_features <- Boston[!index,]
train_labels <- Boston$medv[index]
test_labels <- Boston$medv[!index]
train_features <- scale(train_features)
train_features <- train_features[,1:ncol(train_features)]
test_features <- scale(test_features)
test_features <- test_features[,1:ncol(test_features)]
mean <- apply(train_features, 2, mean)
sd <- apply(train_features, 2, sd)
train_data <- scale(train_features, center = mean, scale = sd)
test_data <- scale(test_features, center = mean, scale = sd)
train_targets <- Boston$medv[index]
test_targets <- Boston$medv[!index]
Here is where the model is built:
build_model <- function() {
model <- keras_model_sequential() %>%
layer_dense(64, activation = "relu") %>%
layer_dense(64, activation = "relu") %>%
layer_dense(1)
model %>% compile(optimizer = "rmsprop",
loss = "mse",
metrics = "mse")
model
}
Next we set up five folds, and track all_scores:
k <- 5
fold_id <- sample(rep(1:k, length.out = nrow(train_data)))
num_epochs <- 100
all_scores <- numeric()
for (i in 1:k) {
cat("Processing fold #", i, "\n")
val_indices <- which(fold_id == i)
val_data <- train_data[val_indices, ]
val_targets <- train_targets[val_indices]
partial_train_data <- train_data[-val_indices, ]
partial_train_targets <- train_targets[-val_indices]
model <- build_model()
model %>% fit (
partial_train_data,
partial_train_targets,
epochs = num_epochs,
batch_size = 16,
verbose = 0
)
results <- model %>%
evaluate(val_data, val_targets, verbose = 0)
all_scores[[i]] <- results[['mse']]
}
keras.RMSE <- sqrt(mean(all_scores))
However, none of the variables seem to have the predicted values. A few examples:
all_scores is a set of RMSE scores (which I also want)
val_targets appears to be the wrong dimensions
model$fit does not return a value or set of values
model$predict generates predicted values, but those have already been generated, and I can't locate them.
How are the predicted values returned in a keras model?
I want to train a regression model by "keras_model_sequential" in R. For finding the best parameters over the grid search I have used "tuning_run". But I got this error:
training run 1/128 (flags = list(0.05, 66, "relu", 8, 10, 0.001, 0.2))
Error in sink(file = output_file, type = "output", split = TRUE) :
sink stack is full
Calls: tuning_run ... with_changed_file_copy -> force -> sink -> .handleSimpleError -> h
I need to mention, that a folder named "runs" was created in the data and script path which has a lot of subfolders whose name is like a date format. maybe this is the reason.
library(plyr)
library(boot)
library(keras)
library(tensorflow)
library(kerasR)
library(tidyverse)
library(tfruns)
library(MLmetrics)
df= mainlist[[1]] # data which is a 33*31 dataframe (33 samples and 31 features which last column is target)
x = (length(df))-1
print(x)
df1 = df[, 2:x]
#normalization
df2 = df[, 2:length(df)]
normalize <- function(x) {
return ((x - min(x)) / (max(x) - min(x)))
}
maxmindf <- as.data.frame(lapply(df2, normalize))
attach(maxmindf)
df_norm<-as.matrix(maxmindf)
# Determine sample size
ind <- sample(2, nrow(df_norm), replace=TRUE, prob=c(0.80, 0.20))
# Split the data(peaks)
training <- df_norm[ind==1, 1:ncol(df_norm)-1]
test1 <- df_norm[ind==2, 1:ncol(df_norm)-1]
training_target <- df_norm[ind==1, ncol(df_norm)]
test1_target <- df_norm[ind==2, ncol(df_norm)]
#number of nodes in the first hidden layer
u1_1 = ceiling((1/2) * (ncol(training)+1))
u2_1 = ceiling(1* (ncol(training)+1))
u3_1 = ceiling((2/3) * (ncol(training)+1))
u4_1 = ceiling(2*(ncol(training)))
####a) Declaring the flags for hyperparameters
FLAGS = flags(
flag_numeric("dropout1", 0.05),
flag_integer("units",u1_1),
flag_string("activation1", "relu"),
flag_integer("batchsize1",8),
flag_integer("Epoch1",50),
flag_numeric("learning_rate", 0.01),
flag_numeric("val_split",0.2),
flag_numeric("reg_l1",0.001)
)
# ####b) Defining the DNN model
build_model<-function() {
model <- keras_model_sequential()
model %>%
layer_dense(units = FLAGS$units, activation = FLAGS$activation1, input_shape = c(dim(training)[2])) %>%
layer_dropout(rate = FLAGS$dropout1) %>%
layer_dense(units=1, activation ="linear")
#####c) Compiling the DNN model
model %>% compile(
loss = "mse",
optimizer =optimizer_adam(FLAGS$learning_rate),
metrics = c("mse"))
model
}
model<-build_model()
model %>% summary()
print_dot_callback <- callback_lambda(
on_epoch_end = function(epoch, logs) {
if (epoch %% 80 == 0) cat("\n")
cat(".")})
early_stop <- callback_early_stopping(monitor = "val_loss", mode='min',patience =20)
###########d) Fitting the DNN model#################
model_Final<-build_model()
model_fit_Final<-model_Final %>% fit(
training,
training_target,
epochs =FLAGS$Epoch1, batch_size = FLAGS$batchsize1,
shuffled=F,
validation_split = FLAGS$val_split,
verbose=0,
callbacks = list(early_stop, print_dot_callback)
)
################a) Inner cross-validation##########################
nCVI=5
Hyperpar = data.frame() #the results of each combination of hyperparameters resulting from each inner partition will be saved
for (i in 1:nCVI){ #do it to choose best parameters
print("I is:")
print(i)
Sam_per=sample(1:nrow(training),nrow(training))
X_trII=training[Sam_per,]
y_trII=training_target[Sam_per]
# print(head(X_trII, 3))
print("----------------------")
print(head(y_trII,3))
############b) Grid search using the tuning_run() function of tfruns package########
runs.sp<-tuning_run(paste0("train.R")
,runs_dir = '_tuningE1'
,flags=list(dropout1 = c(0,0.05),
units = c(u1_1, u2_1),
activation1 = ("relu"),
batchsize1 = c(8, 16),
Epoch1 = c(10,50),
learning_rate = c(0.001),
val_split = c(0.2)),
sample = 0.2,
confirm = FALSE,
echo =F)
# clean_runs(ls_runs(completed == FALSE))
#####c) Saving each combination of hyperparameters in the Hyperpar data.frame
runs.sp = runs.sp[order(runs.sp$flag_units,runs.sp$flag_dropout1, runs.sp$flag_batchsize1, runs.sp$flag_Epoch1),]
runs.sp$grid_length = 1:nrow(runs.sp) #we save the grid lenght and also important parameters
Parameters = data.frame(grid_length=runs.sp$grid_length,
metric_val_mse=runs.sp$metric_val_mse,
flag_dropout1=runs.sp$flag_dropout1,
flag_units=runs.sp$flag_units,
flag_batchsize1=runs.sp$flag_batchsize1,
epochs_completed=runs.sp$epochs_completed,
flag_learning_rate=runs.sp$flag_learning_rate,
flag_activation1=runs.sp$flag_activation1)
Hyperpar = rbind(Hyperpar,data.frame(Parameters)) #we saved the important parameters
}
#####d) Summarizing the five inner fold by hyperparameter combination
#the average prediction performance is obtained for each inner fold
Hyperpar %>%
group_by(grid_length) %>%
summarise(val_mse=mean(metric_val_mse),
dropout1=mean(flag_dropout1),
units=mean(flag_units),
batchsize1=mean(flag_batchsize1),
learning_rate=mean(flag_learning_rate),
epochs=mean( epochs_completed)) %>%
select(grid_length,val_mse,dropout1,units,batchsize1,
learning_rate, epochs) %>%
mutate_if(is.numeric, funs(round(., 3)))
Hyperpar_Opt = Hyperpar
######e) ############ select the best combinition of hyperparameters
Min = min(Hyperpar_Opt$val_mse)
pos_opt = which(Hyperpar_Opt$val_mse==Min)
pos_opt=pos_opt[1]
Optimal_Hyper=Hyperpar_Opt[pos_opt,]
#####Selecting the best hyperparameters
Drop_O = Optimal_Hyper$dropout1
Epoch_O = round(Optimal_Hyper$epochs,0)
Units_O = round(Optimal_Hyper$units,0)
activation_O = unique(Hyperpar$flag_activation1)
batchsize_O = round(Optimal_Hyper$batchsize1,0)
lr_O = Optimal_Hyper$learning_rate
print_dot_callback <- callback_lambda(
on_epoch_end = function(epoch, logs) {
if (epoch %% 20 == 0) cat("\n")
cat(".")})
#refitting the model with optimal values
model_Sec<-keras_model_sequential()
model_Sec %>%
layer_dense(units =Units_O , activation =activation_O, input_shape =
c(dim(training)[2])) %>%
layer_dropout(rate =Drop_O) %>%
layer_dense(units =1, activation =activation_O)
model_Sec %>% compile(
loss = "mean_squared_error",
optimizer = optimizer_adam(lr=lr_O),
metrics = c("mean_squared_error"))
# fit the model with our data
ModelFited<-model_Sec %>% fit(
X_trII, y_trII,
epochs=Epoch_O, batch_size =batchsize_O, #####validation_split=0.2,
early_stop,
verbose=0
,callbacks=list(print_dot_callback)
)
#############g) Prediction of testing set ##########################
Yhat=model_Sec%>% predict(test1)
y_p=Yhat
y_p_tst =as.numeric(y_p)
#y_tst=y[tst_set]
plot(test1_target,y_p_tst)
MSE=mean((test1_target - y_p_tst)^2)
Do you have any ideas?
Thanks in advance.
I have a question regarding applying a neural network in categorical data.
1- I have one output which is numeric (Connection.Duration)
2- I have 5 inputs, 4 of them (EVSE.ID, User.ID, Fee, Day) are categorical and 1 (Time) is numeric.
I want to apply a neural network to predict the Connection.Duration. I do not know the correct command to use for categorical data. I used model.matrix but I did not how to continue with the new data frame (m) which contains the categorical data.
I would like to ask for help please.
data$Fee <- as.factor(data$Fee)
data$EVSE.ID <- as.factor(data$EVSE.ID)
data$User.ID <- as.factor(data$User.ID)
data$Day <- as.factor(data$Day)
data$Time <- as.factor(data$Time)
data$Connection.Duration <- as.factor(data$Connection.Duration)
m <- model.matrix(Connection.Duration ~ EVSE.ID+Time+Day+Fee+User.ID,
data= data)
# Neural Networks
n <- neuralnet(Connection.Duration ~ EVSE.ID+Time+Day+Fee+User.ID,
data = m,
hidden=c(100,60))
# Data partition
set.seed(1234)
ind <- sample(2, nrow(m), replace = TRUE, prob = c(0.7, 0.3))
training <- m[ind==1,1:5]
testing <- m[ind==2,1:5]
trainingtarget <- m[ind==1, 6]
testingtarget <- m[ind==2, 6]
# Normalize
m <- colMeans(training)
s <- apply(training, 2, sd)
training <- scale(training, center = m, scale = s)
testing <- scale(testing, center = m, scale = s)
# Create Model
model <- keras_model_sequential()
model %>%
layer_dense(units = 5, activation = 'relu', input_shape = c(5)) %>%
layer_dense(units = 1)
# Compile
model %>% compile(loss= 'mse',
optimizer= 'rmsprop',
metrics='mae')
# Fit model
mymodel <- model %>%
fit(training,
trainingtarget,
epochs= 100,
batch_size = 32,
validation_split = 0.2)
# Evaluate
model %>% evaluate(testing, testingtarget)
pred <- model %>% predict(testing)
mean(testingtarget- pred^2)
plot(testingtarget, pred)
# Fine-tune Model
model <- keras_model_sequential()
model %>%
layer_dense(units = 100, activation = 'relu', input_shape = c(5)) %>%
layer_dropout(rate = 0.4) %>%
layer_dense(units = 60, activation = 'relu', input_shape = c(5)) %>%
layer_dropout(rate = 0.2) %>%
layer_dense(units = 1)
# Compile
model %>% compile(loss= 'mse',
optimizer= optimizer_rmsprop(lr=0.0001),
metrics='mae')
# Fit model
mymodel <- model %>%
fit(training,
trainingtarget,
epochs= 100,
batch_size = 32,
validation_split = 0.2)
# Evaluate
model %>% evaluate(testing, testingtarget)
pred <- model %>% predict(testing)
mean(testingtarget- pred^2)
plot(testingtarget, pred)
What you're looking for is called "one hot encoding". There are functions in tensorflow/keras to help out with the encoding.
But otherwise, I would try to do it up front. I would not rely on model.matrix as it doesn't give you quite what you want.
You can easily write your own function, but here's an example using the mltools package:
library(data.table)
library(mltools)
one_hot(data.table(x = factor(letters), n = 1:26))
Note: it requires data.table rather than data.frame but you can convert your data back and forth.
This is a rather lengthy one, so please bear with me, unfortunately enough the error occurs right at the very end...I cannot predict on the unseen test set!
I would like to perform text classification with word embeddings (that I have trained on my data set) that are embedded into neural networks.
I simply have column with textual descriptions = input and four different price classes = target.
For a reproducible example, here are the necessary data set and the word embedding:
DF: https://www.dropbox.com/s/it0jsbv8e7nkryt/DF.csv?dl=0
WordEmb: https://www.dropbox.com/s/ia5fmio2e0plwkr/WordEmb.txt?dl=0
And here my code:
set.seed(2077)
DF = read.delim("DF.csv", header = TRUE, sep = ",",
dec = ".", stringsAsFactors = FALSE)
DF <- DF[,-1]
# parameters
max_num_words = 9000 # simply see number of observations
validation_split = 0.3
embedding_dim = 300
##### Data Preparation #####
# split into training and test set
set.seed(2077)
n <- nrow(DF)
shuffled <- DF[sample(n),]
# Split the data in train and test
train <- shuffled[1:round(0.7 * n),]
test <- shuffled[(round(0.7 * n) + 1):n,]
rm(n, shuffled)
# predictor/target variable
x_train <- train$Description
x_test <- test$Description
y_train <- train$Price_class
y_test <- test$Price_class
### encode target variable ###
# One hot encode training target values
trainLabels <- to_categorical(y_train)
trainLabels <- trainLabels[, 2:5]
# One hot encode test target values
testLabels <- keras::to_categorical(y_test)
testLabels <- testLabels[, 2:5]
### encode predictor variable ###
# pad sequences
tokenizer <- text_tokenizer(num_words = max_num_words)
# finally, vectorize the text samples into a 2D integer tensor
set.seed(2077)
tokenizer %>% fit_text_tokenizer(x_train)
train_data <- texts_to_sequences(tokenizer, x_train)
tokenizer %>% fit_text_tokenizer(x_test)
test_data <- texts_to_sequences(tokenizer, x_test)
# determine average length of document -> set as maximal sequence length
seq_mean <- stri_count(train_data, regex="\\S+")
mean((seq_mean))
max_sequence_length = 70
# This turns our lists of integers into a 2D integer tensor of shape`(samples, maxlen)`
x_train <- keras::pad_sequences(train_data, maxlen = max_sequence_length)
x_test <- keras::pad_sequences(test_data, maxlen = max_sequence_length)
word_index <- tokenizer$word_index
Encoding(names(word_index)) <- "UTF-8"
#### PREPARE EMBEDDING MATRIX ####
embeddings_index <- new.env(parent = emptyenv())
lines <- readLines("WordEmb.txt")
for (line in lines) {
values <- strsplit(line, ' ', fixed = TRUE)[[1]]
word <- values[[1]]
coefs <- as.numeric(values[-1])
embeddings_index[[word]] <- coefs
}
embedding_dim <- 300
embedding_matrix <- array(0,c(max_num_words, embedding_dim))
for(word in names(word_index)){
index <- word_index[[word]]
if(index < max_num_words){
embedding_vector <- embeddings_index[[word]]
if(!is.null(embedding_vector)){
embedding_matrix[index+1,] <- embedding_vector
}
}
}
##### Convolutional Neural Network #####
# load pre-trained word embeddings into an Embedding layer
# note that we set trainable = False so as to keep the embeddings fixed
num_words <- min(max_num_words, length(word_index) + 1)
embedding_layer <- keras::layer_embedding(
input_dim = num_words,
output_dim = embedding_dim,
weights = list(embedding_matrix),
input_length = max_sequence_length,
trainable = FALSE
)
# train a 1D convnet with global maxpooling
sequence_input <- layer_input(shape = list(max_sequence_length), dtype='int32')
preds <- sequence_input %>%
embedding_layer %>%
layer_conv_1d(filters = 128, kernel_size = 1, activation = 'relu') %>%
layer_max_pooling_1d(pool_size = 5) %>%
layer_conv_1d(filters = 128, kernel_size = 1, activation = 'relu') %>%
layer_max_pooling_1d(pool_size = 5) %>%
layer_conv_1d(filters = 128, kernel_size = 1, activation = 'relu') %>%
layer_max_pooling_1d(pool_size = 2) %>%
layer_flatten() %>%
layer_dense(units = 128, activation = 'relu') %>%
layer_dense(units = 4, activation = 'softmax')
model <- keras_model(sequence_input, preds)
model %>% compile(
loss = 'categorical_crossentropy',
optimizer = 'adam',
metrics = c('acc')
)
model %>% keras::fit(
x_train,
trainLabels,
batch_size = 1024,
epochs = 20,
validation_split = 0.3
)
Now here is where I get stuck:
I cannot use the results of the NN to predict on the unseen test data set:
# Predict the classes for the test data
classes <- model %>% predict_classes(x_test, batch_size = 128)
I get this error:
Error in py_get_attr_impl(x, name, silent) :
AttributeError: 'Model' object has no attribute 'predict_classes'
Afterwards, I'd proceed like this:
# Confusion matrix
table(y_test, classes)
# Evaluate on test data and labels
score <- model %>% evaluate(x_val, testLabels, batch_size = 128)
# Print the score
print(score)
For now the actual accuracy does not really matter since this is only a small example of my data set.
I know this is a long one but AAANNY help would be very muuuch appreciated.
I'm trying to build a toy model to demonstrate how LSTMs can predict the next few iteration of a sequence. My code runs without any errors until the last line.
# Simulating dummy data
seq <- data.frame(x_train = (seq(0,8, 1)/10), y_train = (seq(0,8, 1)/10))
seq$x_train <- seq$x_train + 0.1
# Reshaping
x_train <- array_reshape(seq$x_train, dim = c(9,1,1))
y_train <- array_reshape(seq$y_train, dim = c(9,1))
# Checking dimensions
dim(x_train); dim(y_train)
# Building the model
m <- keras_model_sequential()
m %>%
layer_lstm(units = 10, input_shape =c(9,1), batch_size = 9, return_sequences = T, stateful = T) %>%
layer_dense(units = 1)
summary(m)
# Compiling
m %>% compile(loss = "mse", optimizer = "adam")
This is where the issue arises -
for (i in 1:9) {
m %>% fit(object = x_train, y_train, batch_size = 1, shuffle = FALSE)
m %>% reset_states()
}
I get the following error, and I'm not sure why:
Error: $ operator is invalid for atomic vectors
Anyone know what I'm doing wrong?