I have followed online tutorial about image recognition using Keras in R ending up with the following code:
library(keras)
view_list <- c("Inside", "Outside")
output_n <- length(view_list)
# image size to scale down to (original images are 100 x 100 px)
img_width <- 20
img_height <- 20
target_size <- c(img_width, img_height)
# RGB = 3 channels
channels <- 3
train_image_files_path <- "C:/Users/Tomek/Desktop/Photos"
valid_image_files_path <- "C:/Users/Tomek/Desktop/Photos valid"
test_image_files_path <- "C:/Users/Tomek/Desktop/Photos test"
# optional data augmentation
train_data_gen = image_data_generator(rescale = 1/255 )
# Validation data shouldn't be augmented! But it should also be scaled.
valid_data_gen <- image_data_generator(rescale = 1/255)
test_data_gen <- image_data_generator(rescale = 1/255)
# training images
train_image_array_gen <- flow_images_from_directory(train_image_files_path,
train_data_gen,
target_size = target_size,
class_mode = "categorical",
classes = view_list,
seed = 42)
# validation images
valid_image_array_gen <- flow_images_from_directory(valid_image_files_path,
valid_data_gen,
target_size = target_size,
class_mode = "categorical",
classes = view_list,
seed = 42)
# test images
test_image_array_gen <- flow_images_from_directory(test_image_files_path,
test_data_gen,
target_size = target_size,
class_mode = "categorical",
classes = view_list,
seed = 42)
cat("Number of images per class:")
table(factor(train_image_array_gen$classes))
train_image_array_gen$class_indices
views_classes_indices <- train_image_array_gen$class_indices
save(views_classes_indices, file = "C:/Users/Tomek/Desktop/views_classes_indices.RData")
# number of training samples
train_samples <- train_image_array_gen$n
# number of validation samples
valid_samples <- valid_image_array_gen$n
# number of test samples
test_samples <- test_image_array_gen$n
# define batch size and number of epochs
batch_size <- 1
epochs <- 10
# initialise model
model <- keras_model_sequential()
# add layers
model %>%
layer_conv_2d(filter = 32, kernel_size = c(3,3), padding = "same", input_shape = c(img_width, img_height, channels)) %>%
layer_activation("relu") %>%
# Second hidden layer
layer_conv_2d(filter = 16, kernel_size = c(3,3), padding = "same") %>%
layer_activation_leaky_relu(0.5) %>%
layer_batch_normalization() %>%
# Use max pooling
layer_max_pooling_2d(pool_size = c(2,2)) %>%
layer_dropout(0.25) %>%
# Flatten max filtered output into feature vector
# and feed into dense layer
layer_flatten() %>%
layer_dense(100) %>%
layer_activation("relu") %>%
layer_dropout(0.5) %>%
# Outputs from dense layer are projected onto output layer
layer_dense(output_n) %>%
layer_activation("softmax")
# compile
model %>% compile(
loss = "categorical_crossentropy",
optimizer = optimizer_rmsprop(lr = 0.0001, decay = 1e-6),
metrics = "accuracy"
)
summary(model)
# fit
hist <- model %>% fit_generator(
# training data
train_image_array_gen,
# epochs
steps_per_epoch = as.integer(train_samples / batch_size),
epochs = epochs,
# validation data
validation_data = valid_image_array_gen,
validation_steps = as.integer(valid_samples / batch_size),
# print progress
verbose = 2,
callbacks = list(
# save best model after every epoch
callback_model_checkpoint("C:/Users/Tomek/Desktop/views_checkpoints.h5", save_best_only = TRUE),
# only needed for visualising with TensorBoard
callback_tensorboard(log_dir = "C:/Users/Tomek/Desktop/keras/logs")
)
)
plot(hist)
#prediction
a <- model %>% predict_generator(test_image_array_gen, steps = 5, verbose = 1, workers = 1)
a <- round(a, digits = 4)
The classification model (with two output classes) seems to work quite nicely. The accuracy on the train and the validation sets is equal to ~99% and ~95% respectively. However, I am not sure about the results of predictions on the test set. It looks like the predictions for observations are shuffled and I am not able to find a way to check which prediction refers to which image(observation). I have seen some threads on that issue: github medium 1 medium 2.
Nevertheless, I am really new to Keras and Python and I have hard time applying the suggested solutions in R. What is the easiest way to track which prediction refers to which image from the test set in predict_generator output?
I figured it out and the answer is simple. The shuffling is caused by argument shuffle which by default is set to true. After changing it, predictions correspond to the order of test_image_array_gen$filenames However, bear in mind that the order of predictions (and filenames) is different than the one on Windows which may be a bit confusing.
Order in Windows: Photo 1 Photo 2 ... Photo 10 Photo 11
Order in R: Photo 1 Photo 10 Photo 11 ... Photo 2
# test images
test_image_array_gen <- flow_images_from_directory(test_image_files_path,
test_data_gen,
target_size = target_size,
class_mode = "categorical",
classes = view_list,
seed = 42,
shuffle = FALSE)
#prediction
a <- model %>% predict_generator(test_image_array_gen, steps = ceiling(test_samples/32), verbose = 1, workers = 1)
#bind predictions with photos names
b <- cbind.data.frame(a, test_image_array_gen$filenames)
Related
I want to train a regression model by "keras_model_sequential" in R. For finding the best parameters over the grid search I have used "tuning_run". But I got this error:
training run 1/128 (flags = list(0.05, 66, "relu", 8, 10, 0.001, 0.2))
Error in sink(file = output_file, type = "output", split = TRUE) :
sink stack is full
Calls: tuning_run ... with_changed_file_copy -> force -> sink -> .handleSimpleError -> h
I need to mention, that a folder named "runs" was created in the data and script path which has a lot of subfolders whose name is like a date format. maybe this is the reason.
library(plyr)
library(boot)
library(keras)
library(tensorflow)
library(kerasR)
library(tidyverse)
library(tfruns)
library(MLmetrics)
df= mainlist[[1]] # data which is a 33*31 dataframe (33 samples and 31 features which last column is target)
x = (length(df))-1
print(x)
df1 = df[, 2:x]
#normalization
df2 = df[, 2:length(df)]
normalize <- function(x) {
return ((x - min(x)) / (max(x) - min(x)))
}
maxmindf <- as.data.frame(lapply(df2, normalize))
attach(maxmindf)
df_norm<-as.matrix(maxmindf)
# Determine sample size
ind <- sample(2, nrow(df_norm), replace=TRUE, prob=c(0.80, 0.20))
# Split the data(peaks)
training <- df_norm[ind==1, 1:ncol(df_norm)-1]
test1 <- df_norm[ind==2, 1:ncol(df_norm)-1]
training_target <- df_norm[ind==1, ncol(df_norm)]
test1_target <- df_norm[ind==2, ncol(df_norm)]
#number of nodes in the first hidden layer
u1_1 = ceiling((1/2) * (ncol(training)+1))
u2_1 = ceiling(1* (ncol(training)+1))
u3_1 = ceiling((2/3) * (ncol(training)+1))
u4_1 = ceiling(2*(ncol(training)))
####a) Declaring the flags for hyperparameters
FLAGS = flags(
flag_numeric("dropout1", 0.05),
flag_integer("units",u1_1),
flag_string("activation1", "relu"),
flag_integer("batchsize1",8),
flag_integer("Epoch1",50),
flag_numeric("learning_rate", 0.01),
flag_numeric("val_split",0.2),
flag_numeric("reg_l1",0.001)
)
# ####b) Defining the DNN model
build_model<-function() {
model <- keras_model_sequential()
model %>%
layer_dense(units = FLAGS$units, activation = FLAGS$activation1, input_shape = c(dim(training)[2])) %>%
layer_dropout(rate = FLAGS$dropout1) %>%
layer_dense(units=1, activation ="linear")
#####c) Compiling the DNN model
model %>% compile(
loss = "mse",
optimizer =optimizer_adam(FLAGS$learning_rate),
metrics = c("mse"))
model
}
model<-build_model()
model %>% summary()
print_dot_callback <- callback_lambda(
on_epoch_end = function(epoch, logs) {
if (epoch %% 80 == 0) cat("\n")
cat(".")})
early_stop <- callback_early_stopping(monitor = "val_loss", mode='min',patience =20)
###########d) Fitting the DNN model#################
model_Final<-build_model()
model_fit_Final<-model_Final %>% fit(
training,
training_target,
epochs =FLAGS$Epoch1, batch_size = FLAGS$batchsize1,
shuffled=F,
validation_split = FLAGS$val_split,
verbose=0,
callbacks = list(early_stop, print_dot_callback)
)
################a) Inner cross-validation##########################
nCVI=5
Hyperpar = data.frame() #the results of each combination of hyperparameters resulting from each inner partition will be saved
for (i in 1:nCVI){ #do it to choose best parameters
print("I is:")
print(i)
Sam_per=sample(1:nrow(training),nrow(training))
X_trII=training[Sam_per,]
y_trII=training_target[Sam_per]
# print(head(X_trII, 3))
print("----------------------")
print(head(y_trII,3))
############b) Grid search using the tuning_run() function of tfruns package########
runs.sp<-tuning_run(paste0("train.R")
,runs_dir = '_tuningE1'
,flags=list(dropout1 = c(0,0.05),
units = c(u1_1, u2_1),
activation1 = ("relu"),
batchsize1 = c(8, 16),
Epoch1 = c(10,50),
learning_rate = c(0.001),
val_split = c(0.2)),
sample = 0.2,
confirm = FALSE,
echo =F)
# clean_runs(ls_runs(completed == FALSE))
#####c) Saving each combination of hyperparameters in the Hyperpar data.frame
runs.sp = runs.sp[order(runs.sp$flag_units,runs.sp$flag_dropout1, runs.sp$flag_batchsize1, runs.sp$flag_Epoch1),]
runs.sp$grid_length = 1:nrow(runs.sp) #we save the grid lenght and also important parameters
Parameters = data.frame(grid_length=runs.sp$grid_length,
metric_val_mse=runs.sp$metric_val_mse,
flag_dropout1=runs.sp$flag_dropout1,
flag_units=runs.sp$flag_units,
flag_batchsize1=runs.sp$flag_batchsize1,
epochs_completed=runs.sp$epochs_completed,
flag_learning_rate=runs.sp$flag_learning_rate,
flag_activation1=runs.sp$flag_activation1)
Hyperpar = rbind(Hyperpar,data.frame(Parameters)) #we saved the important parameters
}
#####d) Summarizing the five inner fold by hyperparameter combination
#the average prediction performance is obtained for each inner fold
Hyperpar %>%
group_by(grid_length) %>%
summarise(val_mse=mean(metric_val_mse),
dropout1=mean(flag_dropout1),
units=mean(flag_units),
batchsize1=mean(flag_batchsize1),
learning_rate=mean(flag_learning_rate),
epochs=mean( epochs_completed)) %>%
select(grid_length,val_mse,dropout1,units,batchsize1,
learning_rate, epochs) %>%
mutate_if(is.numeric, funs(round(., 3)))
Hyperpar_Opt = Hyperpar
######e) ############ select the best combinition of hyperparameters
Min = min(Hyperpar_Opt$val_mse)
pos_opt = which(Hyperpar_Opt$val_mse==Min)
pos_opt=pos_opt[1]
Optimal_Hyper=Hyperpar_Opt[pos_opt,]
#####Selecting the best hyperparameters
Drop_O = Optimal_Hyper$dropout1
Epoch_O = round(Optimal_Hyper$epochs,0)
Units_O = round(Optimal_Hyper$units,0)
activation_O = unique(Hyperpar$flag_activation1)
batchsize_O = round(Optimal_Hyper$batchsize1,0)
lr_O = Optimal_Hyper$learning_rate
print_dot_callback <- callback_lambda(
on_epoch_end = function(epoch, logs) {
if (epoch %% 20 == 0) cat("\n")
cat(".")})
#refitting the model with optimal values
model_Sec<-keras_model_sequential()
model_Sec %>%
layer_dense(units =Units_O , activation =activation_O, input_shape =
c(dim(training)[2])) %>%
layer_dropout(rate =Drop_O) %>%
layer_dense(units =1, activation =activation_O)
model_Sec %>% compile(
loss = "mean_squared_error",
optimizer = optimizer_adam(lr=lr_O),
metrics = c("mean_squared_error"))
# fit the model with our data
ModelFited<-model_Sec %>% fit(
X_trII, y_trII,
epochs=Epoch_O, batch_size =batchsize_O, #####validation_split=0.2,
early_stop,
verbose=0
,callbacks=list(print_dot_callback)
)
#############g) Prediction of testing set ##########################
Yhat=model_Sec%>% predict(test1)
y_p=Yhat
y_p_tst =as.numeric(y_p)
#y_tst=y[tst_set]
plot(test1_target,y_p_tst)
MSE=mean((test1_target - y_p_tst)^2)
Do you have any ideas?
Thanks in advance.
I have been trying to build a multi-input model in keras. One input branch would be images and the second one some metaData for the corresponding images.
For the images I need a generator function which would input batches of images. The metaData is in a tabular form.
Now I am wondering how I should pass the data to the model so the right image would be processed with the respective metaData Information. For your Information this will be a regression Task.
The Input Data I have:
Images in dir1/
Data Frame with the path and features.
path feature1 feature2 target
image1.jpg 23.5 100 16
image2.jpg 25.0 88 33
The code I have for now:
generator function for Images:
train_datagen <- image_data_generator(rescale = 1/255)
train_generator <- flow_images_from_dataframe(
dataframe = joined_path_with_metadata,
directory = 'data_dir',
x_col = "path",
y_col = "train",
generator = train_datagen,
target_size = c(150, 150),
batch_size = 20,
color_mode = 'rgb',
class_mode = "sparse"
)
model definition:
vision_model <- keras_model_sequential()
vision_model %>%
layer_conv_2d(filters = 64,
kernel_size = c(3, 3),
activation = 'relu',
padding = 'same',
input_shape = c(150, 150, 3)) %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_flatten()
# Now let's get a tensor with the output of our vision model:
image_input <- layer_input(shape = c(150, 150, 3))
encoded_image <- image_input %>% vision_model
# ANN for tabular data
tabular_input <- layer_input(shape = ncol(dataframe), dtype = 'float32')
mlp_model <- tabular_input %>%
layer_dense(
units = 16,
kernel_initializer = "uniform",
activation = "relu") # Dropout to prevent overfitting
layer_dropout(rate = 0.1) %>%
layer_dense(
units = 32,
kernel_initializer = "uniform",
activation = "relu") %>%
# concatenate the metadata and the image vector then
# train a linear regression on it
output <- layer_concatenate(c(mlp_model, encoded_image)) %>%
layer_dense(units = 1, activation='linear')
# This is the final model:
vqa_model <- keras_model(inputs = c(image_input, tabular_input), outputs = output)
compile:
vqa_model %>% compile(
optimizer = 'adam',
loss = 'mean_squared_error',
metrics = c('mean_squared_error')
)
and the last step would be to fit the model. I am not sure how to do this to make sure that the first row of features will be taken as the metadata of the Images which are read in in batches.
This is a rather lengthy one, so please bear with me, unfortunately enough the error occurs right at the very end...I cannot predict on the unseen test set!
I would like to perform text classification with word embeddings (that I have trained on my data set) that are embedded into neural networks.
I simply have column with textual descriptions = input and four different price classes = target.
For a reproducible example, here are the necessary data set and the word embedding:
DF: https://www.dropbox.com/s/it0jsbv8e7nkryt/DF.csv?dl=0
WordEmb: https://www.dropbox.com/s/ia5fmio2e0plwkr/WordEmb.txt?dl=0
And here my code:
set.seed(2077)
DF = read.delim("DF.csv", header = TRUE, sep = ",",
dec = ".", stringsAsFactors = FALSE)
DF <- DF[,-1]
# parameters
max_num_words = 9000 # simply see number of observations
validation_split = 0.3
embedding_dim = 300
##### Data Preparation #####
# split into training and test set
set.seed(2077)
n <- nrow(DF)
shuffled <- DF[sample(n),]
# Split the data in train and test
train <- shuffled[1:round(0.7 * n),]
test <- shuffled[(round(0.7 * n) + 1):n,]
rm(n, shuffled)
# predictor/target variable
x_train <- train$Description
x_test <- test$Description
y_train <- train$Price_class
y_test <- test$Price_class
### encode target variable ###
# One hot encode training target values
trainLabels <- to_categorical(y_train)
trainLabels <- trainLabels[, 2:5]
# One hot encode test target values
testLabels <- keras::to_categorical(y_test)
testLabels <- testLabels[, 2:5]
### encode predictor variable ###
# pad sequences
tokenizer <- text_tokenizer(num_words = max_num_words)
# finally, vectorize the text samples into a 2D integer tensor
set.seed(2077)
tokenizer %>% fit_text_tokenizer(x_train)
train_data <- texts_to_sequences(tokenizer, x_train)
tokenizer %>% fit_text_tokenizer(x_test)
test_data <- texts_to_sequences(tokenizer, x_test)
# determine average length of document -> set as maximal sequence length
seq_mean <- stri_count(train_data, regex="\\S+")
mean((seq_mean))
max_sequence_length = 70
# This turns our lists of integers into a 2D integer tensor of shape`(samples, maxlen)`
x_train <- keras::pad_sequences(train_data, maxlen = max_sequence_length)
x_test <- keras::pad_sequences(test_data, maxlen = max_sequence_length)
word_index <- tokenizer$word_index
Encoding(names(word_index)) <- "UTF-8"
#### PREPARE EMBEDDING MATRIX ####
embeddings_index <- new.env(parent = emptyenv())
lines <- readLines("WordEmb.txt")
for (line in lines) {
values <- strsplit(line, ' ', fixed = TRUE)[[1]]
word <- values[[1]]
coefs <- as.numeric(values[-1])
embeddings_index[[word]] <- coefs
}
embedding_dim <- 300
embedding_matrix <- array(0,c(max_num_words, embedding_dim))
for(word in names(word_index)){
index <- word_index[[word]]
if(index < max_num_words){
embedding_vector <- embeddings_index[[word]]
if(!is.null(embedding_vector)){
embedding_matrix[index+1,] <- embedding_vector
}
}
}
##### Convolutional Neural Network #####
# load pre-trained word embeddings into an Embedding layer
# note that we set trainable = False so as to keep the embeddings fixed
num_words <- min(max_num_words, length(word_index) + 1)
embedding_layer <- keras::layer_embedding(
input_dim = num_words,
output_dim = embedding_dim,
weights = list(embedding_matrix),
input_length = max_sequence_length,
trainable = FALSE
)
# train a 1D convnet with global maxpooling
sequence_input <- layer_input(shape = list(max_sequence_length), dtype='int32')
preds <- sequence_input %>%
embedding_layer %>%
layer_conv_1d(filters = 128, kernel_size = 1, activation = 'relu') %>%
layer_max_pooling_1d(pool_size = 5) %>%
layer_conv_1d(filters = 128, kernel_size = 1, activation = 'relu') %>%
layer_max_pooling_1d(pool_size = 5) %>%
layer_conv_1d(filters = 128, kernel_size = 1, activation = 'relu') %>%
layer_max_pooling_1d(pool_size = 2) %>%
layer_flatten() %>%
layer_dense(units = 128, activation = 'relu') %>%
layer_dense(units = 4, activation = 'softmax')
model <- keras_model(sequence_input, preds)
model %>% compile(
loss = 'categorical_crossentropy',
optimizer = 'adam',
metrics = c('acc')
)
model %>% keras::fit(
x_train,
trainLabels,
batch_size = 1024,
epochs = 20,
validation_split = 0.3
)
Now here is where I get stuck:
I cannot use the results of the NN to predict on the unseen test data set:
# Predict the classes for the test data
classes <- model %>% predict_classes(x_test, batch_size = 128)
I get this error:
Error in py_get_attr_impl(x, name, silent) :
AttributeError: 'Model' object has no attribute 'predict_classes'
Afterwards, I'd proceed like this:
# Confusion matrix
table(y_test, classes)
# Evaluate on test data and labels
score <- model %>% evaluate(x_val, testLabels, batch_size = 128)
# Print the score
print(score)
For now the actual accuracy does not really matter since this is only a small example of my data set.
I know this is a long one but AAANNY help would be very muuuch appreciated.
I built a Variational Autoencoder using Keras in R,
I noticed that if I train the model on regular R session all work great,
but when I train the model on Shiny session it goes terribly wrong:
When Shiny session gets to the line which train the model:
history<- vae %>% fit(
x_train, x_train,
shuffle = TRUE,
epochs = 25,
batch_size = batch_size,
validation_data = list(x_test, x_test)
)
there is no feedback on epoch and the all computer get stuck.
(I don't get any errors just crushing computer)
Is there special configuration to set when using Keras on Shiny?
Edit:
I use the Variational Autoencoder for dimensionality reduction,
here is my function, I use it from shiny server:
get_variational_autoencoder<- function(data_set,
reduce_to = 2){
library(keras)
use_condaenv("r-tensorflow")
# Data preparation ---------------------------------
row_names<- row.names(data_set)
data_set<- normalize(data_set)
row.names(data_set)<- row_names
partition<- data_partition(data_set, .80)
x_train <- partition["train_set"]$train_set
x_test <- partition["test_set"]$test_set
# Parameters ---------------------------------------
batch_size = DEAFULT_BATCH_SIZE_FOR_AUTOANCODER
original_dim = dim(data_set)[2]
latent_dim = reduce_to
intermediate_dim = (2/3)*original_dim + latent_dim
nb_epoch = DEAFULT_NUMBER_OF_EPOCH_FOR_AUTOANCODER
epsilon_std = DEAFULT_EPSILON_FOR_AUTOANCODER
#encoder
# input layer
x <- layer_input(shape = c(original_dim))
# hidden intermediate, lower-res
h <- layer_dense(x, intermediate_dim, activation = "relu")
# latent var 1, 2-dim (mainly for plotting!): mean
z_mean <- layer_dense(h, latent_dim)
# latent var 2, 2-dim: variance
z_log_var <- layer_dense(h, latent_dim)
sampling <- function(arg){
z_mean <- arg[, 1:(latent_dim)]
z_log_var <- arg[, (latent_dim + 1):(2 * latent_dim)]
epsilon <- k_random_normal(
shape = c(k_shape(z_mean)[[1]]),
mean=0.,
stddev=epsilon_std
)
z_mean + k_exp(z_log_var/2)*epsilon
}
z <- layer_concatenate(list(z_mean, z_log_var)) %>%
layer_lambda(sampling)
# hidden intermediate, higher-res
decoder_h <- layer_dense(units = intermediate_dim, activation = "relu")
# decoder for the mean, high-res again
decoder_mean <- layer_dense(units = original_dim, activation = "sigmoid")
h_decoded <- decoder_h(z)
x_decoded_mean <- decoder_mean(h_decoded)
# the complete model, from input to decoded output
vae <- keras_model(x, x_decoded_mean)
# encoder, from inputs to latent space
encoder <- keras_model(x, z_mean)
# generator, from latent space to reconstructed inputs
decoder_input <- layer_input(shape = latent_dim)
h_decoded_2 <- decoder_h(decoder_input)
x_decoded_mean_2 <- decoder_mean(h_decoded_2)
generator <- keras_model(decoder_input, x_decoded_mean_2)
vae_loss <- function(x, x_decoded_mean){
xent_loss <- (original_dim/1.0)*loss_binary_crossentropy(x,
x_decoded_mean)
kl_loss <- -0.5*k_mean(1 + z_log_var - k_square(z_mean) -
k_exp(z_log_var), axis = -1L)
xent_loss + kl_loss
}
vae %>% compile(optimizer = "rmsprop", loss = vae_loss, metrics =
c('accuracy'))
# Model training ---------------------------------------------------------
history<- vae %>% fit(
x_train, x_train,
shuffle = TRUE,
epochs = 25,
batch_size = batch_size,
validation_data = list(x_test, x_test)
)
data_set_after_vae <- keras::predict(encoder, data_set, batch_size =
batch_size)
vae_result<- list ("v_autoencoder" = vae,
"data_set_after_vae" = data_set_after_vae %>%
as_data_frame(),
"history" = history,
"encoder" = encoder)
return (vae_result)
}
I'm a beginner in machine learning. I'm doing clustering using autoencoder in R (h2o package). For now, I've done the following codes:
`mydata = h2o.importFile(path = mfile)
NN_model = h2o.deeplearning(
x = 2:9,
training_frame = mydata,
hidden = c(2),
epochs = 100,
activation = "Tanh",
autoencoder = TRUE
)
train_supervised_features = h2o.deepfeatures(NN_model, mydata, layer=1)`
For my data, there are not too many columns (as you can see in the codes, only 8 variables now), but lots of rows.
Then I plot the 2 nodes in "train_supervised_features" obtained above. And I got the plot like this
It is clear that there are 8 clusters of my data (right?)
My question is, how can I extract the labels from the autoencoder results? I want to add the labels to original data, and plot in colors using these labels, something like this:
Since the autoencoder doesn't have any idea about "clusters", you would have to call h2o.kmeans() on the 2D dataset first, to get the cluster assignments. Then you can score the dataset using that k-means model, using h2o.predict(model, 2Ddata), and you'll get a cluster label for each row.
Basically, AE can be a good DNN for reinstructing the input, however, you can use the latent layer for clustering.
The important steps:
1- For initializing the cluster assignment you may apply simple k-means for your data and label them;
2- Train your AE to learn features;
3- Try to extract the output of the layer before the latent layer which includes the trained space of your data;
4- Cluster the data with a k-means.
I hope it helps,
Here is an example I have provided in Keras:
library(keras)
library(caret)
library(tidyverse)
c(c(xtrain, ytrain), c(xtest, ytest)) %<-% dataset_mnist()
xtrain = xtrain/255
xtest = xtest/255
input_size = dim(xtrain)[2]*dim(xtrain)[3]
latent_size = 10
print(input_size)
x_train = array_reshape(xtrain, dim=c(dim(xtrain)[1], input_size))
x_test = array_reshape(xtest, dim=c(dim(xtest)[1], input_size))
x <- rbind( x_test, x_train )/255.0
# Encoder
encoder_input = layer_input(shape = input_size)
encoder_output = encoder_input %>%
layer_dense(units=256, activation = "relu") %>%
layer_activation_leaky_relu() %>%
layer_dense(units=latent_size) %>%
layer_activation_leaky_relu()
encoderoder = keras_model(encoder_input, encoder_output)
summary(encoderoder)
# Decoder
decoder_input = layer_input(shape = latent_size)
decoder_output = decoder_input %>%
layer_dense(units=256, activation = "relu") %>%
layer_activation_leaky_relu() %>%
layer_dense(units = input_size, activation = "relu") %>%
layer_activation_leaky_relu()
decoderoder = keras_model(decoder_input, decoder_output)
summary(decoderoder)
# Autoencoder
autoencoderoder_input = layer_input(shape = input_size)
autoencoderoder_output = autoencoderoder_input %>%
encoderoder() %>%
decoderoder()
autoencoderoder = keras_model(autoencoderoder_input, autoencoderoder_output)
summary(autoencoderoder)
autoencoderoder %>% compile(optimizer="rmsprop", loss="binary_crossentropy")
autoencoderoder %>% fit(x_train,x_train, epochs=20, batch_size=256)
encoderoded_imgs = encoderoder %>% predict(x_test)
decoderoded_imgs = decoderoder %>% predict(encoderoded_imgs)
# Images plot
pred_images = array_reshape(decoderoded_imgs, dim=c(dim(decoderoded_imgs)[1], 28, 28))
n = 10
op = par(mfrow=c(12,2), mar=c(1,0,0,0))
for (i in 1:n)
{
plot(as.raster(pred_images[i,,]))
plot(as.raster(xtest[i,,]))
}
# Saving trained Net
autoencoderoder_weights <- autoencoderoder %>%
keras::get_weights()
keras::save_model_weights_hdf5(object = autoencoderoder,filepath = '..../autoencoderoder_weights.hdf5',overwrite = TRUE)
encoderoder_model <- keras_model(inputs = encoder_input, outputs = encoderoder$output)
encoderoder_model %>% keras::load_model_weights_hdf5(filepath = "..../autoencoderoder_weights.hdf5",skip_mismatch = TRUE,by_name = TRUE)
encoderoder_model %>% compile(
loss='mean_squared_error',
optimizer='adam',
metrics = c('accuracy')
)
embeded_points <-
encoderoder_model %>%
keras::predict_on_batch(x = x_train)
summary(encoderoder_model)
# Getting layer
layer_name<-"dense_1"
intermediate_layer_model <- keras_model(inputs = encoderoder_model$input, outputs = get_layer(encoderoder_model, layer_name)$output)
intermediate_output <- predict(intermediate_layer_model, x)
# Clustering latent space
km <- stats::kmeans( intermediate_output, centers = 10L, nstart = 20L )
labPrediction <- km$cluster
plot(labPrediction)
# The End
labels are available in "labPrediction" file
For the reference:
https://www.datatechnotes.com/2020/02/how-to-build-simple-autoencoder-with-keras-in-r.html