Set class weights in Keras of R when there are multiple outputs - r

I'm using the keras package in R to fit a neural network model. The model I'm working on has two outputs: output1 is continuous(for regression), output2 is binary(for classification).
Since we have a very imbalanced dataset for the classification problem(output2), I want to assign different class weights to deal with the imbalance, but apparently we don't need to do that for output1(the regression).
Here is the sample code for the NN model that I'm working on:
input <- layer_input(shape = c(32,24))
output <- input %>%
layer_lstm(units = 64, dropout = 0.2, recurrent_dropout = 0.2)
pred1 <- output %>%
layer_dense(units = 1, name = "output1")
pred2 <- output %>%
layer_dense(units = 1, activation = "sigmoid", name = "output2")
model <- keras_model(
input,
list(pred1, pred2)
)
summary(model)
model %>% compile(
optimizer = "rmsprop",
loss = list(
output1 = "mse",
output2 = "binary_crossentropy"
),
loss_weights = list(
output1 = 0.25,
output2 = 10
)
)
history <- model %>% fit(
train_x, list(output1 = train_y1,output2 = train_y2),
epochs = 10,
batch_size = 5000,
class_weight = ???,
validation_data = list(valid_x, list(output1 = valid_y1,output2 = valid_y2))
)
If we just have one binary output, I know that the class weights can be assigned by:
class_weight = list("0"=1,"1"=100),
but it doesn't work anymore when we have two outputs and just want to assign the weights to one of them. I guess I may need to somehow specify the name of the binary output in "class_weight" so that it knows the weights only apply to output2, but I don't know how to do it in R.
Does anyone know how to assign class weights to the binary output only when we have two outputs(one is regression, one is classification)? Thank you very much for the help!

Related

How to fix "sink stack is full" over training a model in R

I want to train a regression model by "keras_model_sequential" in R. For finding the best parameters over the grid search I have used "tuning_run". But I got this error:
training run 1/128 (flags = list(0.05, 66, "relu", 8, 10, 0.001, 0.2))
Error in sink(file = output_file, type = "output", split = TRUE) :
sink stack is full
Calls: tuning_run ... with_changed_file_copy -> force -> sink -> .handleSimpleError -> h
I need to mention, that a folder named "runs" was created in the data and script path which has a lot of subfolders whose name is like a date format. maybe this is the reason.
library(plyr)
library(boot)
library(keras)
library(tensorflow)
library(kerasR)
library(tidyverse)
library(tfruns)
library(MLmetrics)
df= mainlist[[1]] # data which is a 33*31 dataframe (33 samples and 31 features which last column is target)
x = (length(df))-1
print(x)
df1 = df[, 2:x]
#normalization
df2 = df[, 2:length(df)]
normalize <- function(x) {
return ((x - min(x)) / (max(x) - min(x)))
}
maxmindf <- as.data.frame(lapply(df2, normalize))
attach(maxmindf)
df_norm<-as.matrix(maxmindf)
# Determine sample size
ind <- sample(2, nrow(df_norm), replace=TRUE, prob=c(0.80, 0.20))
# Split the data(peaks)
training <- df_norm[ind==1, 1:ncol(df_norm)-1]
test1 <- df_norm[ind==2, 1:ncol(df_norm)-1]
training_target <- df_norm[ind==1, ncol(df_norm)]
test1_target <- df_norm[ind==2, ncol(df_norm)]
#number of nodes in the first hidden layer
u1_1 = ceiling((1/2) * (ncol(training)+1))
u2_1 = ceiling(1* (ncol(training)+1))
u3_1 = ceiling((2/3) * (ncol(training)+1))
u4_1 = ceiling(2*(ncol(training)))
####a) Declaring the flags for hyperparameters
FLAGS = flags(
flag_numeric("dropout1", 0.05),
flag_integer("units",u1_1),
flag_string("activation1", "relu"),
flag_integer("batchsize1",8),
flag_integer("Epoch1",50),
flag_numeric("learning_rate", 0.01),
flag_numeric("val_split",0.2),
flag_numeric("reg_l1",0.001)
)
# ####b) Defining the DNN model
build_model<-function() {
model <- keras_model_sequential()
model %>%
layer_dense(units = FLAGS$units, activation = FLAGS$activation1, input_shape = c(dim(training)[2])) %>%
layer_dropout(rate = FLAGS$dropout1) %>%
layer_dense(units=1, activation ="linear")
#####c) Compiling the DNN model
model %>% compile(
loss = "mse",
optimizer =optimizer_adam(FLAGS$learning_rate),
metrics = c("mse"))
model
}
model<-build_model()
model %>% summary()
print_dot_callback <- callback_lambda(
on_epoch_end = function(epoch, logs) {
if (epoch %% 80 == 0) cat("\n")
cat(".")})
early_stop <- callback_early_stopping(monitor = "val_loss", mode='min',patience =20)
###########d) Fitting the DNN model#################
model_Final<-build_model()
model_fit_Final<-model_Final %>% fit(
training,
training_target,
epochs =FLAGS$Epoch1, batch_size = FLAGS$batchsize1,
shuffled=F,
validation_split = FLAGS$val_split,
verbose=0,
callbacks = list(early_stop, print_dot_callback)
)
################a) Inner cross-validation##########################
nCVI=5
Hyperpar = data.frame() #the results of each combination of hyperparameters resulting from each inner partition will be saved
for (i in 1:nCVI){ #do it to choose best parameters
print("I is:")
print(i)
Sam_per=sample(1:nrow(training),nrow(training))
X_trII=training[Sam_per,]
y_trII=training_target[Sam_per]
# print(head(X_trII, 3))
print("----------------------")
print(head(y_trII,3))
############b) Grid search using the tuning_run() function of tfruns package########
runs.sp<-tuning_run(paste0("train.R")
,runs_dir = '_tuningE1'
,flags=list(dropout1 = c(0,0.05),
units = c(u1_1, u2_1),
activation1 = ("relu"),
batchsize1 = c(8, 16),
Epoch1 = c(10,50),
learning_rate = c(0.001),
val_split = c(0.2)),
sample = 0.2,
confirm = FALSE,
echo =F)
# clean_runs(ls_runs(completed == FALSE))
#####c) Saving each combination of hyperparameters in the Hyperpar data.frame
runs.sp = runs.sp[order(runs.sp$flag_units,runs.sp$flag_dropout1, runs.sp$flag_batchsize1, runs.sp$flag_Epoch1),]
runs.sp$grid_length = 1:nrow(runs.sp) #we save the grid lenght and also important parameters
Parameters = data.frame(grid_length=runs.sp$grid_length,
metric_val_mse=runs.sp$metric_val_mse,
flag_dropout1=runs.sp$flag_dropout1,
flag_units=runs.sp$flag_units,
flag_batchsize1=runs.sp$flag_batchsize1,
epochs_completed=runs.sp$epochs_completed,
flag_learning_rate=runs.sp$flag_learning_rate,
flag_activation1=runs.sp$flag_activation1)
Hyperpar = rbind(Hyperpar,data.frame(Parameters)) #we saved the important parameters
}
#####d) Summarizing the five inner fold by hyperparameter combination
#the average prediction performance is obtained for each inner fold
Hyperpar %>%
group_by(grid_length) %>%
summarise(val_mse=mean(metric_val_mse),
dropout1=mean(flag_dropout1),
units=mean(flag_units),
batchsize1=mean(flag_batchsize1),
learning_rate=mean(flag_learning_rate),
epochs=mean( epochs_completed)) %>%
select(grid_length,val_mse,dropout1,units,batchsize1,
learning_rate, epochs) %>%
mutate_if(is.numeric, funs(round(., 3)))
Hyperpar_Opt = Hyperpar
######e) ############ select the best combinition of hyperparameters
Min = min(Hyperpar_Opt$val_mse)
pos_opt = which(Hyperpar_Opt$val_mse==Min)
pos_opt=pos_opt[1]
Optimal_Hyper=Hyperpar_Opt[pos_opt,]
#####Selecting the best hyperparameters
Drop_O = Optimal_Hyper$dropout1
Epoch_O = round(Optimal_Hyper$epochs,0)
Units_O = round(Optimal_Hyper$units,0)
activation_O = unique(Hyperpar$flag_activation1)
batchsize_O = round(Optimal_Hyper$batchsize1,0)
lr_O = Optimal_Hyper$learning_rate
print_dot_callback <- callback_lambda(
on_epoch_end = function(epoch, logs) {
if (epoch %% 20 == 0) cat("\n")
cat(".")})
#refitting the model with optimal values
model_Sec<-keras_model_sequential()
model_Sec %>%
layer_dense(units =Units_O , activation =activation_O, input_shape =
c(dim(training)[2])) %>%
layer_dropout(rate =Drop_O) %>%
layer_dense(units =1, activation =activation_O)
model_Sec %>% compile(
loss = "mean_squared_error",
optimizer = optimizer_adam(lr=lr_O),
metrics = c("mean_squared_error"))
# fit the model with our data
ModelFited<-model_Sec %>% fit(
X_trII, y_trII,
epochs=Epoch_O, batch_size =batchsize_O, #####validation_split=0.2,
early_stop,
verbose=0
,callbacks=list(print_dot_callback)
)
#############g) Prediction of testing set ##########################
Yhat=model_Sec%>% predict(test1)
y_p=Yhat
y_p_tst =as.numeric(y_p)
#y_tst=y[tst_set]
plot(test1_target,y_p_tst)
MSE=mean((test1_target - y_p_tst)^2)
Do you have any ideas?
Thanks in advance.

Fixing initial weights when training Keras model in R

I want to fix the weights of the model I'm training to get reproducible results for the report. The problem is that on different runs I get similar but slightly different results and training sometimes takes 600 epochs, sometimes takes 3500 using callback_early_stopping monitoring validation MSE and with min_delta of 0.00003.
Overall, I'm happy enough with results of all runs, but just need to find if there's a way to get reproducible results by fixing weights.
I tried setting seed at various parts of the process - before creating model, before compiling it and before training but nothing seems to work. Any way to do it?
BATCH <- nrow(x_train)
SHAPE <- ncol(x_train)
# Create a neural network model
set.seed(42)
model <- keras_model_sequential()
model %>%
layer_dense(units = 12, activation = "relu", input_shape = c(SHAPE)) %>%
layer_dense(units = 24, activation = "relu") %>%
layer_dense(units = 1, activation = "linear")
#print model summary
print(summary(model))
# initialise early stopping callback and optimiser
early_stoping <- callback_early_stopping(
monitor = "val_loss",
min_delta = 0.00003,
patience = 50,
restore_best_weights = TRUE
)
optim <- optimizer_adam(learning_rate = 0.00005)
set.seed(42)
model %>% compile(
optimizer = optim,
loss = "mse",
metrics = c("mse", "mae")
)
# fit model
set.seed(42)
val_data <- list(x_val = x_val, y_val = y_val)
hist <- model %>% fit(
x = x_train,
y = y_train,
batch_size = BATCH,
epochs = 6000,
validation_data = val_data,
shuffle = FALSE,
callbacks = early_stoping
)

Built Variational Autoencoder using Keras in R and Shiny

I built a Variational Autoencoder using Keras in R,
I noticed that if I train the model on regular R session all work great,
but when I train the model on Shiny session it goes terribly wrong:
When Shiny session gets to the line which train the model:
history<- vae %>% fit(
x_train, x_train,
shuffle = TRUE,
epochs = 25,
batch_size = batch_size,
validation_data = list(x_test, x_test)
)
there is no feedback on epoch and the all computer get stuck.
(I don't get any errors just crushing computer)
Is there special configuration to set when using Keras on Shiny?
Edit:
I use the Variational Autoencoder for dimensionality reduction,
here is my function, I use it from shiny server:
get_variational_autoencoder<- function(data_set,
reduce_to = 2){
library(keras)
use_condaenv("r-tensorflow")
# Data preparation ---------------------------------
row_names<- row.names(data_set)
data_set<- normalize(data_set)
row.names(data_set)<- row_names
partition<- data_partition(data_set, .80)
x_train <- partition["train_set"]$train_set
x_test <- partition["test_set"]$test_set
# Parameters ---------------------------------------
batch_size = DEAFULT_BATCH_SIZE_FOR_AUTOANCODER
original_dim = dim(data_set)[2]
latent_dim = reduce_to
intermediate_dim = (2/3)*original_dim + latent_dim
nb_epoch = DEAFULT_NUMBER_OF_EPOCH_FOR_AUTOANCODER
epsilon_std = DEAFULT_EPSILON_FOR_AUTOANCODER
#encoder
# input layer
x <- layer_input(shape = c(original_dim))
# hidden intermediate, lower-res
h <- layer_dense(x, intermediate_dim, activation = "relu")
# latent var 1, 2-dim (mainly for plotting!): mean
z_mean <- layer_dense(h, latent_dim)
# latent var 2, 2-dim: variance
z_log_var <- layer_dense(h, latent_dim)
sampling <- function(arg){
z_mean <- arg[, 1:(latent_dim)]
z_log_var <- arg[, (latent_dim + 1):(2 * latent_dim)]
epsilon <- k_random_normal(
shape = c(k_shape(z_mean)[[1]]),
mean=0.,
stddev=epsilon_std
)
z_mean + k_exp(z_log_var/2)*epsilon
}
z <- layer_concatenate(list(z_mean, z_log_var)) %>%
layer_lambda(sampling)
# hidden intermediate, higher-res
decoder_h <- layer_dense(units = intermediate_dim, activation = "relu")
# decoder for the mean, high-res again
decoder_mean <- layer_dense(units = original_dim, activation = "sigmoid")
h_decoded <- decoder_h(z)
x_decoded_mean <- decoder_mean(h_decoded)
# the complete model, from input to decoded output
vae <- keras_model(x, x_decoded_mean)
# encoder, from inputs to latent space
encoder <- keras_model(x, z_mean)
# generator, from latent space to reconstructed inputs
decoder_input <- layer_input(shape = latent_dim)
h_decoded_2 <- decoder_h(decoder_input)
x_decoded_mean_2 <- decoder_mean(h_decoded_2)
generator <- keras_model(decoder_input, x_decoded_mean_2)
vae_loss <- function(x, x_decoded_mean){
xent_loss <- (original_dim/1.0)*loss_binary_crossentropy(x,
x_decoded_mean)
kl_loss <- -0.5*k_mean(1 + z_log_var - k_square(z_mean) -
k_exp(z_log_var), axis = -1L)
xent_loss + kl_loss
}
vae %>% compile(optimizer = "rmsprop", loss = vae_loss, metrics =
c('accuracy'))
# Model training ---------------------------------------------------------
history<- vae %>% fit(
x_train, x_train,
shuffle = TRUE,
epochs = 25,
batch_size = batch_size,
validation_data = list(x_test, x_test)
)
data_set_after_vae <- keras::predict(encoder, data_set, batch_size =
batch_size)
vae_result<- list ("v_autoencoder" = vae,
"data_set_after_vae" = data_set_after_vae %>%
as_data_frame(),
"history" = history,
"encoder" = encoder)
return (vae_result)
}

Validation accuracy much lower than training accuracy in Keras for text classification

I am new to Keras and trying to create a model. The issue is that my training accuracy is around 80 percent but the validation accuracy is drastically low at 15 percent. I have 545 rows in my dataset. I have normalized all the input features. Any help on what can be tweaked would be really helpful.
Sharing the complete data and code here
https://drive.google.com/open?id=1g8Cmw2bmAI9DnOU-rB4sjsOeBuFp6NUy
#Normalize data
data[,1:(ncol(data)-1)] = normalize(data[,1:(ncol(data)-1)])
data[,ncol(data)] = as.numeric(data[,ncol(data)]) - 1
set.seed(128)
ind = sample(2,nrow(data),replace = T,prob = c(0.7,0.3))
training = data[ind==1,1:(ncol(data)-1)]
test = data[ind==2,1:(ncol(data)-1)]
traintarget = data[ind==1,ncol(data)]
testtarget = data[ind==2,ncol(data)]
# One hot encoding
trainLabels = to_categorical(traintarget)
testLabels = to_categorical(testtarget)
print(testLabels)
model = keras_model_sequential()
model %>%
layer_dense(units = 150, activation = 'relu', input_shape = c(520)) %>%
layer_dense(units = 50, activation = 'relu') %>%
layer_dense(units = 9, activation = 'softmax')
model %>%
compile(loss = 'categorical_crossentropy', optimizer = 'adam',metrics = 'accuracy')
history = model %>%
fit(training,
trainLabels,
epoch = 300,
batch_size = 32,
validation_split = 0.2)
prob = model %>%
predict_proba(test)
pred = model %>%
predict_classes(test)
table2 = table(Predicted = pred, Actual = testtarget)
cbind(prob,pred,testtarget)
Simply put when your model is succeeding at training but not validation it is overfitting. The best way to combat this is by a) making sure that your inputs actually predict the outputs because otherwise a large enough model will just memorize the historical data. and b) by adding a dropout layer in your network. Finally, 500 something training samples seems a little low for training a neural network.

How to get the labels of clusters after using autoencoder in R

I'm a beginner in machine learning. I'm doing clustering using autoencoder in R (h2o package). For now, I've done the following codes:
`mydata = h2o.importFile(path = mfile)
NN_model = h2o.deeplearning(
x = 2:9,
training_frame = mydata,
hidden = c(2),
epochs = 100,
activation = "Tanh",
autoencoder = TRUE
)
train_supervised_features = h2o.deepfeatures(NN_model, mydata, layer=1)`
For my data, there are not too many columns (as you can see in the codes, only 8 variables now), but lots of rows.
Then I plot the 2 nodes in "train_supervised_features" obtained above. And I got the plot like this
It is clear that there are 8 clusters of my data (right?)
My question is, how can I extract the labels from the autoencoder results? I want to add the labels to original data, and plot in colors using these labels, something like this:
Since the autoencoder doesn't have any idea about "clusters", you would have to call h2o.kmeans() on the 2D dataset first, to get the cluster assignments. Then you can score the dataset using that k-means model, using h2o.predict(model, 2Ddata), and you'll get a cluster label for each row.
Basically, AE can be a good DNN for reinstructing the input, however, you can use the latent layer for clustering.
The important steps:
1- For initializing the cluster assignment you may apply simple k-means for your data and label them;
2- Train your AE to learn features;
3- Try to extract the output of the layer before the latent layer which includes the trained space of your data;
4- Cluster the data with a k-means.
I hope it helps,
Here is an example I have provided in Keras:
library(keras)
library(caret)
library(tidyverse)
c(c(xtrain, ytrain), c(xtest, ytest)) %<-% dataset_mnist()
xtrain = xtrain/255
xtest = xtest/255
input_size = dim(xtrain)[2]*dim(xtrain)[3]
latent_size = 10
print(input_size)
x_train = array_reshape(xtrain, dim=c(dim(xtrain)[1], input_size))
x_test = array_reshape(xtest, dim=c(dim(xtest)[1], input_size))
x <- rbind( x_test, x_train )/255.0
# Encoder
encoder_input = layer_input(shape = input_size)
encoder_output = encoder_input %>%
layer_dense(units=256, activation = "relu") %>%
layer_activation_leaky_relu() %>%
layer_dense(units=latent_size) %>%
layer_activation_leaky_relu()
encoderoder = keras_model(encoder_input, encoder_output)
summary(encoderoder)
# Decoder
decoder_input = layer_input(shape = latent_size)
decoder_output = decoder_input %>%
layer_dense(units=256, activation = "relu") %>%
layer_activation_leaky_relu() %>%
layer_dense(units = input_size, activation = "relu") %>%
layer_activation_leaky_relu()
decoderoder = keras_model(decoder_input, decoder_output)
summary(decoderoder)
# Autoencoder
autoencoderoder_input = layer_input(shape = input_size)
autoencoderoder_output = autoencoderoder_input %>%
encoderoder() %>%
decoderoder()
autoencoderoder = keras_model(autoencoderoder_input, autoencoderoder_output)
summary(autoencoderoder)
autoencoderoder %>% compile(optimizer="rmsprop", loss="binary_crossentropy")
autoencoderoder %>% fit(x_train,x_train, epochs=20, batch_size=256)
encoderoded_imgs = encoderoder %>% predict(x_test)
decoderoded_imgs = decoderoder %>% predict(encoderoded_imgs)
# Images plot
pred_images = array_reshape(decoderoded_imgs, dim=c(dim(decoderoded_imgs)[1], 28, 28))
n = 10
op = par(mfrow=c(12,2), mar=c(1,0,0,0))
for (i in 1:n)
{
plot(as.raster(pred_images[i,,]))
plot(as.raster(xtest[i,,]))
}
# Saving trained Net
autoencoderoder_weights <- autoencoderoder %>%
keras::get_weights()
keras::save_model_weights_hdf5(object = autoencoderoder,filepath = '..../autoencoderoder_weights.hdf5',overwrite = TRUE)
encoderoder_model <- keras_model(inputs = encoder_input, outputs = encoderoder$output)
encoderoder_model %>% keras::load_model_weights_hdf5(filepath = "..../autoencoderoder_weights.hdf5",skip_mismatch = TRUE,by_name = TRUE)
encoderoder_model %>% compile(
loss='mean_squared_error',
optimizer='adam',
metrics = c('accuracy')
)
embeded_points <-
encoderoder_model %>%
keras::predict_on_batch(x = x_train)
summary(encoderoder_model)
# Getting layer
layer_name<-"dense_1"
intermediate_layer_model <- keras_model(inputs = encoderoder_model$input, outputs = get_layer(encoderoder_model, layer_name)$output)
intermediate_output <- predict(intermediate_layer_model, x)
# Clustering latent space
km <- stats::kmeans( intermediate_output, centers = 10L, nstart = 20L )
labPrediction <- km$cluster
plot(labPrediction)
# The End
labels are available in "labPrediction" file
For the reference:
https://www.datatechnotes.com/2020/02/how-to-build-simple-autoencoder-with-keras-in-r.html

Resources