Unfortunately, I have a problem with my code in R. I am trying to use GA to tune up hyperparameters, but I received null values, so it is impossible to train svm.
Do you have any idea how to solve the problem?
library(caret)
library(GA)
library(e1071)
Iris <- iris
fit_fun <- function(params){
model <- train(Species ~ ., data = iris, method = "svmRadial",
trControl = trainControl(method = "cv", number = 5),
tuneGrid = data.frame(C = params[1], sigma = params[2]))
return(model$results[which.min(model$results[,"Accuracy"]),"Accuracy"])
}
param_grid <- expand.grid(C = c(0.1, 1, 10), sigma = c(0.1, 1, 10))
set.seed(123)
best_params <- ga(type = "real-valued", fitness = fit_fun, lower = as.numeric(param_grid[1,]),
upper = as.numeric(param_grid[nrow(param_grid),]), maxiter = 20, popSize = 50)
best_cost <- attributes(best_params)$parameters[1]
best_sigma <- attributes(best_params)$parameters[2]
model <- svm(Species ~ ., data = iris, cost = best_cost,
sigma = best_sigma, type = "C-classification")
**Error in svm.default(x, y, scale = scale, ..., na.action = na.action) :
‘cost’ must not be NULL!**
Thank You in advance.
Related
I would like to see what is wrong with my code when I use the classification methods. My accuracy is very high, and one is 1. I think I did something wrong with my code. Could you please revise the code and tell me code if it is wrong? Since I am very confused about it. I want to predict wine type and wine quality.
The dataset is from -> http://archive.ics.uci.edu/ml/datasets/Wine+Quality
My code:
library(party)
library(tidyverse)
library(RCurl)
library(psych)
library(ggplot2)
library(GGally)
library(mlbench)
library(e1071)
library(caret)
library(rpart)
library(dplyr)
redwine_df<-read.csv("winequality-red.csv")
whitewine_df<-read.csv("winequality-white.csv")
#add categorical values to both sets
redwine_df['wine_type'] <- 'red_wine'
whitewine_df['wine_type'] <- 'white_wine'
is.data.frame(redwine_df)
is.data.frame(whitewine_df)
#merge sets of red wine and white wine
wine <- rbind(redwine_df, whitewine_df)
#change to tibble
wine_tibble<-as_tibble(wine)
wine_tibble
#check the columns
names(wine_tibble)
#dimension
dim(wine_tibble)
#summary
length(which(wine_tibble==0))#just the column citric.acid has 0
summary(wine_tibble)
#drop duplicated values of sets
colSums(is.na(wine_tibble))#is there any na values
summary(duplicated(wine_tibble))#is there any duplicated values
wine_clean <- unique(wine_tibble)
summary(duplicated(wine_clean))
dim(wine_clean)
#Prediction
#Data Preparations - Training and Test Data
w1<-wine_clean %>% mutate(quality_rank =
case_when(quality <= 5 ~ "Poor",
quality == 6 ~ "Normal",
quality >= 7~ "Excellent"))
set.seed(2000)
w1$quality_rank <-as.factor(w1$quality_rank)
#Predict the Wine Quality
inTrain <- createDataPartition(y = w1$quality_rank, p = .8, list = FALSE)
quality_train <- w1 %>% slice(inTrain)
quality_test <- w1 %>% slice(-inTrain)
quality_index <- createFolds(quality_train$quality_rank, k = 10)
quality_train
#1. 1. Conditional Inference Tree (Decision Tree)
install.packages("party")
library(party)
ctreeFit <- quality_train %>% train(quality_rank ~ .,
method = "ctree",
data = .,
tuneLength = 5,
trControl = trainControl(method = "cv", indexOut =
quality_index))
ctreeFit
plot(ctreeFit$finalModel)
#2.Linear Support Vector Machine
svmFit<- svm(quality_rank ~ .,
data = trainset,
type = "C-classification",
kernel = "linear",
scale = FALSE)
svmFit
svmFit$finalModel
#3. C 4.5 Decision Tree
install.packages("RWeka")
library(RWeka)
C45Fit <- quality_train %>% train(quality_rank ~ .,
method = "J48",
data = .,
tuneLength = 5,
trControl = trainControl(method = "cv", indexOut =
quality_index))
C45Fit
C45Fit$finalModel
#4. K-Nearest Neighbors
knnFit <- quality_train %>% train(quality_rank ~ .,
method = "knn",
data = .,
preProcess = "scale",
tuneLength = 5,
tuneGrid=data.frame(k = 1:10),
trControl = trainControl(method = "cv", indexOut =
quality_index))
knnFit
knnFit$finalModel
#5. Naïve Bayes Classifiers
install.packages("klaR")
library(klaR)
NBayesFit <- quality_train %>% train(quality_rank ~ .,
method = "nb",
data = .,
tuneLength = 5,
trControl = trainControl(method = "cv", indexOut =
quality_index))
NBayesFit
#Compare the models for wine quality
resamps <- resamples(list(
ctree = ctreeFit,
C45 = C45Fit,
KNN = knnFit,
NBayes = NBayesFit,
SVM = svmFit))
resamps
#Applying the Chosen Model to the Test Data
summary(resamps)
library(lattice)
bwplot(resamps, layout = c(3, 1))
pr <- predict(knnFit, quality_train)
pr
confusionMatrix(pr, reference = quality_train$quality_rank)
#Predict Wine Type
w2<-wine_clean
w2$wine_type <-as.factor(w2$wine_type)
type_inTrain <- createDataPartition(y = w2$wine_type, p = .9, list = FALSE)
type_train <- w2 %>% slice(type_inTrain)
type_test <- w2 %>% slice(-type_inTrain)
type_index <- createFolds(type_train$wine_type, k = 10)
type_train
#1. Conditional Inference Tree (Decision Tree)
ctreeFit2 <- type_train %>% train(wine_type ~ .,
method = "ctree",
data = .,
tuneLength = 5,
trControl = trainControl(method = "cv", indexOut =
type_index))
ctreeFit2
plot(ctreeFit2$finalModel)
#2.Linear Support Vector Machine
svmFit2 <- type_train %>% train(wine_type ~.,
method = "svmLinear",
data = .,
tuneLength = 5,
trControl = trainControl(method = "cv", indexOut =
type_index))
svmFit2
svmFit2$finalModel
#3. C 4.5 Decision Tree
C45Fit2 <- type_train %>% train(wine_type ~ .,
method = "J48",
data = .,
tuneLength = 5,
trControl = trainControl(method = "cv", indexOut =
type_index))
C45Fit2
C45Fit2$finalModel
#4. K-Nearest Neighbors
knnFit2 <- type_train %>% train(wine_type ~ .,
method = "knn",
data = .,
preProcess = "scale",
tuneLength = 5,
tuneGrid=data.frame(k = 1:10),
trControl = trainControl(method = "cv", indexOut =
type_index))
knnFit2
knnFit2$finalModel
#5. Naïve Bayes Classifiers
NBayesFit2 <- type_train %>% train(wine_type ~ .,
method = "nb",
data = .,
tuneLength = 5,
trControl = trainControl(method = "cv", indexOut =
type_index))
NBayesFit2
#Compare the models for white wine
resamp <- resamples(list(
ctree2 = ctreeFit2,
C452 = C45Fit2,
KNN2 = knnFit2,
NBayes2 = NBayesFit2,
SVM2 = svmFit2
))
resamp
#Applying the Chosen Model to the Test Data
summary(resamp)
library(lattice)
bwplot(resamp, layout = c(3, 1))
pr2 <- predict(svmFit2, type_train)
pr2
confusionMatrix(pr2, reference = type_train$wine_type)
I have tried to changed the train/test spilt to 50%/50?, I changed the set.seed to 123 instead of 2000, but the answer is still the same.
I'm having doubts during the hyperparameters tune step. I think I might be making some confusion.
I split my dataset into training (70%), validation (15%) and testing (15%). Below is the code used for regression with Random Forest.
1. Training
I perform the initial training with the dataset, as follows:
rf_model <- ranger(y ~.,
date = train ,
num.trees = 500,
mtry = 5,
min.node.size = 100,
importance = "impurity")
I get the R squared and the RMSE using the actual and predicted data from the training set.
pred_rf <- predict(rf_model,train)
pred_rf <- data.frame(pred = pred_rf, obs = train$y)
RMSE_rf <- RMSE(pred_rf$pred, pred_rf$obs)
R2_rf <- (color(pred_rf$pred, pred_rf$obs)) ^2
2. Parameter optimization
Using a parameter grid, the best model is chosen based on performance.
hyper_grid <- expand.grid(mtry = seq(3, 12, by = 4),
sample_size = c(0.5,1),
min.node.size = seq(20, 500, by = 100),
MSE = as.numeric(NA),
R2 = as.numeric(NA),
OOB_RMSE = as.numeric(NA)
)
And I perform the search for the best model according to the smallest OOB error, for example.
for (i in 1:nrow(hyper_grid)) {
model <- ranger(formula = y ~ .,
date = train,
num.trees = 500,
mtry = hyper_grid$mtry[i],
sample.fraction = hyper_grid$sample_size[i],
min.node.size = hyper_grid$min.node.size[i],
importance = "impurity",
replace = TRUE,
oob.error = TRUE,
verbose = TRUE
)
hyper_grid$OOB_RMSE[i] <- sqrt(model$prediction.error)
hyper_grid[i, "MSE"] <- model$prediction.error
hyper_grid[i, "R2"] <- model$r.squared
hyper_grid[i, "OOB_RMSE"] <- sqrt(model$prediction.error)
}
Choose the best performing model
x <- hyper_grid[which.min(hyper_grid$OOB_RMSE), ]
The final model:
rf_fit_model <- ranger(formula = y ~ .,
date = train,
num.trees = 100,
mtry = x$mtry,
sample.fraction = x$sample_size,
min.node.size = x$min.node.size,
oob.error = TRUE,
verbose = TRUE,
importance = "impurity"
)
Perform model prediction with validation data
rf_predict_val <- predict(rf_fit_model, validation)
rf_predict_val <- as.data.frame(rf_predict_val[1])
names(rf_predict_val) <- "pred"
rf_predict_val <- data.frame(pred = rf_predict_val, obs = validation$y)
RMSE_rf_fit <- RMSE rf_predict_val$pred, rf_predict_val$obs)
R2_rf_fit <- (cor(rf_predict_val$pred, rf_predict_val$obs)) ^ 2
Well, now I wonder if I should replicate the model evaluation with the test data.
The fact is that the validation data is being used only as a "test" and is not effectively helping to validate the model.
I've used cross validation in other methods, but I'd like to do it more manually. One of the reasons is that the CV via caret is very slow.
I'm in the right way?
Code using Caret, but very slow:
ctrl <- trainControl(method = "repeatedcv",
repeats = 10)
grid <- expand.grid(interaction.depth = seq(1, 7, by = 2),
n.trees = 1000,
shrinkage = c(0.01,0.1),
n.minobsinnode = 50)
gbmTune <- train(y ~ ., data = train,
method = "gbm",
tuneGrid = grid,
verbose = TRUE,
trControl = ctrl)
I am trying to predict the times table training a neural network. However, I couldn't really get how preProcess argument works in train function in Caret.
In the docs, it says:
The preProcess class can be used for many operations on predictors, including centering and scaling.
When we set preProcess like below,
tt.cv <- train(product ~ .,
data = tt.train,
method = 'neuralnet',
tuneGrid = tune.grid,
trControl = train.control,
linear.output = TRUE,
algorithm = 'backprop',
preProcess = 'range',
learningrate = 0.01)
Does it mean that the train function preprocesses (normalizes) the training data passed, in this case tt.train?
After the training is done, when we are trying to predict, do we pass normalized inputs to the predict function or are inputs normalized in the function because we set the preProcess parameter?
# Do we do
predict(tt.cv, tt.test)
# or
predict(tt.cv, tt.normalized.test)
And from the quote above, it seems that when we use preProcess, outputs are not normalized this way in training, how do we go about normalizing outputs? Or do we just normalize the training data beforehand like below and then pass it to the train function?
preProc <- preProcess(tt, method = 'range')
tt.preProcessed <- predict(preProc, tt)
tt.preProcessed.train <- tt.preProcessed[indexes,]
tt.preProcessed.test <- tt.preProcessed[-indexes,]
The whole code:
library(caret)
library(neuralnet)
# Create the dataset
tt = data.frame(multiplier = rep(1:10, times = 10), multiplicand = rep(1:10, each = 10))
tt = cbind(tt, data.frame(product = tt$multiplier * tt$multiplicand))
# Splitting
indexes = createDataPartition(tt$product,
times = 1,
p = 0.7,
list = FALSE)
tt.train = tt[indexes,]
tt.test = tt[-indexes,]
# Pre-process
preProc <- preProcess(tt, method = c('center', 'scale'))
tt.preProcessed <- predict(preProc, tt)
tt.preProcessed.train <- tt.preProcessed[indexes,]
tt.preProcessed.test <- tt.preProcessed[-indexes,]
# Train
train.control <- trainControl(method = "repeatedcv",
number = 10,
repeats = 3,
savePredictions = TRUE)
tune.grid <- expand.grid(layer1 = 8,
layer2 = 0,
layer3 = 0)
tt.cv <- train(product ~ .,
data = tt.train,
method = 'neuralnet',
tuneGrid = tune.grid,
trControl = train.control,
algorithm = 'backprop',
learningrate = 0.01,
stepmax = 100000,
preProcess = c('center', 'scale'),
lifesign = 'minimal',
threshold = 0.01)
I am using Bayesian optimization to tune the parameters of SVM for regression problem. In the following code, what should be the value of init_grid_dt = initial_grid ? I got the upper and lower bounds of the sigma and C parameters of SVM, but dont know what should be the initial-grid?
In one of the example on the web, they took a random search results as input to the initial grid. The code is as follow:
ctrl <- trainControl(method = "repeatedcv", repeats = 5)
svm_fit_bayes <- function(logC, logSigma) {
## Use the same model code but for a single (C, sigma) pair.
txt <- capture.output(
mod <- train(y ~ ., data = train_dat,
method = "svmRadial",
preProc = c("center", "scale"),
metric = "RMSE",
trControl = ctrl,
tuneGrid = data.frame(C = exp(logC), sigma = exp(logSigma)))
)
list(Score = -getTrainPerf(mod)[, "TrainRMSE"], Pred = 0)
}
lower_bounds <- c(logC = -5, logSigma = -9)
upper_bounds <- c(logC = 20, logSigma = -0.75)
bounds <- list(logC = c(lower_bounds[1], upper_bounds[1]),
logSigma = c(lower_bounds[2], upper_bounds[2]))
## Create a grid of values as the input into the BO code
initial_grid <- rand_search$results[, c("C", "sigma", "RMSE")]
initial_grid$C <- log(initial_grid$C)
initial_grid$sigma <- log(initial_grid$sigma)
initial_grid$RMSE <- -initial_grid$RMSE
names(initial_grid) <- c("logC", "logSigma", "Value")
library(rBayesianOptimization)
ba_search <- BayesianOptimization(svm_fit_bayes,
bounds = bounds,
init_grid_dt = initial_grid,
init_points = 0,
n_iter = 30,
acq = "ucb",
kappa = 1,
eps = 0.0,
verbose = TRUE)
I have a data set called value that have four variables (ER is the dependent variable) and 400 observations (after removing N/A). I tried to divide the dataset into training and test sets and train the model using linear regression in the caret package. But I always get the errors:
In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ... :
extra argument ‘trcontrol’ is disregarded.
Below is my code:
ctrl_lm <- trainControl(method = "cv", number = 5, verboseIter = FALSE)
value_rm = na.omit(value)
set.seed(1)
datasplit <- createDataPartition(y = value_rm[[1]], p = 0.8, list = FALSE)
train.value <- value_rm[datasplit,]
test.value <- value_rm[-datasplit,]
lmCVFit <- train(ER~., data = train.value, method = "lm",
trcontrol = ctrl_lm, metric = "Rsquared")
predictedVal <- predict(lmCVFit, test.value)
modelvalues <- data.frame(obs = test.value$ER, pred = predictedVal)
lmcv.out = defaultSummary(modelvalues)
The right sintax is trControl, not trcontrol. Try this:
library(caret)
set.seed(1)
n <- 100
value <- data.frame(ER=rnorm(n), X=matrix(rnorm(3*n),ncol=3))
ctrl_lm <- trainControl(method = "cv", number = 5, verboseIter = FALSE)
value_rm = na.omit(value)
set.seed(1)
datasplit <- createDataPartition(y = value_rm[[1]], p = 0.8, list = FALSE)
train.value <- value_rm[datasplit,]
test.value <- value_rm[-datasplit,]
lmCVFit <- train(ER~., data = train.value, method = "lm",
trControl = ctrl_lm, metric = "Rsquared")
predictedVal <- predict(lmCVFit, test.value)
modelvalues <- data.frame(obs = test.value$ER, pred = predictedVal)
( lmcv.out <- defaultSummary(modelvalues) )
# RMSE Rsquared MAE
# 1.2351006 0.1190862 1.0371477