Same accuracy for SVM classification - r

I am using car evaluation dataset from UCI. I am trying to use SVM classification for it. After Model creation, when I calculate accuracy using confusion matrix, even if i change the parameters of SVM, getting same accuracy every time. Posting my code below.
require("e1071");
#Code to read data from csv and convert to numeric
car_data <- read.csv("car.data.csv",header = TRUE,sep = ",",quote = "\"");
#backup original data to other data frame
car_data_bkp <- car_data;
car_data$buying<-as.numeric(car_data$buying);
car_data$maint<-as.numeric(car_data$maint);
car_data$doors<-as.numeric(car_data$doors);
car_data$persons<-as.numeric(car_data$persons);
car_data$lug_boot<-as.numeric(car_data$lug_boot);
car_data$safety<-as.numeric(car_data$safety);
car_data$class<-as.numeric(car_data$class);
#scaling of data
maxs = apply(car_data, MARGIN = 2, max);
mins = apply(car_data, MARGIN = 2, min);
scaled = as.data.frame(scale(car_data, center = mins, scale = maxs - mins));
#sampling of data for train and testing
trainIndex <- sample(1:nrow(scaled), 0.8 * nrow(scaled));
train <- scaled[trainIndex, ];
test <- scaled[-trainIndex, ];
n <- names(train);
f <- as.formula(paste("class ~", paste(n[!n %in% "class"], collapse = " + ")));
svm_model <- svm(formula=f,train,cross = 2,tolerance= 0.00001, cost = 1000,gamma=1);
summary(svm_model);
svm.pred <- predict(svm_model, test[,-7],type = "class");
table(pred = svm.pred, true = test[,7]);
#calculate accuracy
sum(diag(svm.pred))/sum(svm.pred);

Related

SVM Plot e1071 plots single color

I am trying to plot my SVM classification with usinf e1071 library.However, the classification plot shows only single value for my parameters.Even though I change the selected parameters to create 2d classification plot it is wrong.
require(caTools)
library(caret)
dataset <-read.csv("income_evaluation.csv")
# fnlwgt row remowed since it is not necessary
df_income <- subset(dataset,select=-c(fnlwgt))
# turn binary attribute into 0 and 1
df_income$income <-ifelse(df_income$income==" >50K",1,0)
df_income$native.country
apply(X=df_income,2,FUN=function(x) length(which(x==' ?')))
# handling missing values
#define function to calculate mode
find_mode <- function(x) {
u <- unique(x)
tab <- tabulate(match(x, u))
u[tab == max(tab)]
}
mod_workclass_df = find_mode(df_income$workclass)
mod_occupation_df = find_mode(df_income$occupation)
mod_country_df = find_mode(df_income$native.country)
# replacing the missing values with the mod values
df_income$workclass[df_income$workclass == ' ?'] <- mod_workclass_df
df_income$occupation[df_income$occupation == ' ?'] <- mod_occupation_df
df_income$native.country[df_income$native.country == ' ?'] <- mod_country_df
# one hot encoding for train set
dmy <- dummyVars(" ~ .", data = df_income, fullRank = T)
df_income <- data.frame(predict(dmy, newdata = df_income))
# sampling
set.seed(101)
sample = sample.split(df_income$income, SplitRatio = .75)
trainingSet = subset(df_income, sample == TRUE)
testSet = subset(df_income, sample == TRUE)
# isolaate y cariable
Y_train <- trainingSet$income
Y_test <- testSet$income
#isolate x cariable
X_test <- subset(testSet,select=-c(income))
# evalution of svm
library(e1071)
svm_classifier = svm(formula=income ~ .,data=trainingSet,type="C-classification",kernel="radial",scale=TRUE,cost=10)
Y_pred = predict(svm_classifier,newdata= X_test)
confusionMatrix(table(Y_test,Y_pred))
# cross validation
# in creating the folds we specify the target feature (dependent variable) and # of folds
folds = createFolds(trainingSet$income, k = 10)
# in cv we are going to applying a created function to our 'folds'
cv = lapply(folds, function(x) { # start of function
# in the next two lines we will separate the Training set into it's 10 pieces
training_fold = trainingSet[-x, ] # training fold = training set minus (-) it's sub test fold
test_fold = trainingSet[x, ] # here we describe the test fold individually
# now apply (train) the classifer on the training_fold
classifier = svm_classifier
Y_pred = predict(svm_classifier,newdata= test_fold[-97])
cm = table(test_fold[, 97], Y_pred)
accuracy = (cm[1,1] + cm[2,2]) / (cm[1,1] + cm[2,2] + cm[1,2] + cm[2,1])
return(accuracy)
})
accuracy = mean(as.numeric(cv))
accuracy
trainingSet$income <-as.factor(trainingSet$income)
# Visualising the Training set results
plot(svm_classifier,trainingSet,education.num~age)
library(ggplot2)
svm_classifier
table(predicted=svm_classifier$fitted,actual=trainingSet$income)
Here is my code above and the plot below.I could not find the problem why there is only one color background and why there is any red color in the background.

Calculating accuracy of train and testing data set

I'm confused while calculating train data prediction and test data prediction since the values of each feature is being calculated as same and test accuracy is not so accurate
library(dbplyr)
library(tidyverse)
library(caret)
Placeholder for test & train accuracy
train_Data_prediction=rep(1,100)
test_Data_prediction=rep(1,100)
Execute 100 times and later average the accuracy
for(count in c(1:100))
{
data_train <- read.csv("parktraining.csv",FALSE,",")
data_train = as.matrix(data_train)
x_index=ncol(data_train)
data_Without_lable <- data_train[,-x_index]
lable <- data_train[,x_index]
Train_mean = apply(data_Without_lable,2,mean)
Train_sd = apply(data_Without_lable,2,sd)
Train_offsets <- t(t(data_Without_lable) - Train_mean)
Train_scaled_data <- t(t(Train_offsets) / Train_sd)
positive_ids = which(data_train[,x_index] == 1)
negative_ids = which(data_train[,x_index] == 0)
positive_data = Train_scaled_data[positive_ids,]
negative_data = Train_scaled_data[negative_ids,]
pos_Mean = apply(positive_data,2,mean)
positive_sd=apply(positive_data,2,sd)
neg_Mean = apply(negative_data,2,mean)
negative_sd=apply(negative_data,2,sd)
tested_data <- read.csv("parktesting.csv",FALSE,",")
tested_data = as.matrix(tested_data)
testing_data = tested_data[,-x_index]
predict=function(testing_data_row){
target=0;
Used dnorm() function for normal distribution and calculate probability
p_pos=sum(log(dnorm(testing_data_row,pos_Mean,positive_sd)))
+log(length(positive_ids)/length(lable))
p_neg=sum(log(dnorm(testing_data_row,neg_Mean,negative_sd)))+log( 1 -
(length(negative_ids)/length(lable)))
if(p_pos>p_neg){
target=1
}else{
target=0
}
}
test_mean = apply(testing_data,2,mean)
test_sd = apply(testing_data,2,sd)
test_offset <- t(t(testing_data) - test_mean)
test_scaled_data <- t(t(test_offset) / test_sd)
test_prediction <- apply(test_scaled_data,1,predict)
target=tested_data[,x_index]
target
test_Data_prediction[count]=length(which((test_prediction == target)==TRUE))/length(target)
test_Data_prediction
#Predict for train data ( optional, output not printed )
train_prediction =apply(Train_scaled_data,1,predict)
train_Data_prediction[count]=length(which((train_prediction == lable)==TRUE))/length(lable)
}
test_Data_prediction
train_Data_prediction
print(paste("Average Train Data Accuracy:",mean(train_Data_prediction)*100.0,sep = " "))
print(paste("Average Test Data Accuracy:",mean(test_Data_prediction)*100.0,sep = " "))

What can be the cause for difference in MAE outcome from deep-learing with R between these datasets?

I’m trying to replicate the deep learning example below with the same Boston housing dataset from another source.
https://jjallaire.github.io/deep--with-r-notebooks/notebooks/3.6-predicting-house-prices.nb.html
Originally the data source is:
library(keras) dataset <- dataset_boston_housing()
Alternatively I try to use:
library(mlbench)
data(BostonHousing)
The difference between the datasets are:
the dataset from mlbench contains column names.
the dataset from keras is already split between test and train.
the set from keras is organised with lists containing matrices while the dataset from mlbench is a dataframe
the fourth column contains a categorical variable "chas" which could not be preprocessed from the mlbench dataset while it can be preprocessed from the keras dataset. To compare apples with apples I have deleted this column from both datasets.
In order to compare both datasets I have merged the train and testset from keras into 1 dataset. After this I have compared the merged dataset from keras with mlbench with summary() and these are identical for every feature (min, max, median, mean).
Since the dataset from keras is already split between test and train (80-20), I can only use one training set for the deep learning proces. This training set gives a validation_mae of around 2.5. See this graph:
If I partition the data from mlbench at 0.8 to construct a training set of similar size, run the deep learing code and do this several times, I never reach a validation_mae of around 2.5. The range is between 4 and 6. An example of the output is this graph:
Does someone know what can be the cause for this difference?
Code with dataset from keras:
library(keras)
dataset <- dataset_boston_housing()
c(c(train_data, train_targets), c(test_data, test_targets)) %<-% dataset
train_data <- train_data[,-4]
test_data <- test_data[,-4]
mean <- apply(train_data, 2, mean)
std <- apply(train_data, 2, sd)
train_data <- scale(train_data, center = mean, scale = std)
test_data <- scale(test_data, center = mean, scale = std)
# After this line the code is the same for both code examples.
# =========================================
# Because we will need to instantiate the same model multiple times,
# we use a function to construct it.
build_model <- function() {
model <- keras_model_sequential() %>%
layer_dense(units = 64, activation = "relu",
input_shape = dim(train_data)[[2]]) %>%
layer_dense(units = 64, activation = "relu") %>%
layer_dense(units = 1)
model %>% compile(
optimizer = "rmsprop",
loss = "mse",
metrics = c("mae")
)
}
k <- 4
indices <- sample(1:nrow(train_data))
folds <- cut(1:length(indices), breaks = k, labels = FALSE)
num_epochs <- 100
all_scores <- c()
for (i in 1:k) {
cat("processing fold #", i, "\n")
# Prepare the validation data: data from partition # k
val_indices <- which(folds == i, arr.ind = TRUE)
val_data <- train_data[val_indices,]
val_targets <- train_targets[val_indices]
# Prepare the training data: data from all other partitions
partial_train_data <- train_data[-val_indices,]
partial_train_targets <- train_targets[-val_indices]
# Build the Keras model (already compiled)
model <- build_model()
# Train the model (in silent mode, verbose=0)
model %>% fit(partial_train_data, partial_train_targets,
epochs = num_epochs, batch_size = 1, verbose = 0)
# Evaluate the model on the validation data
results <- model %>% evaluate(val_data, val_targets, verbose = 0)
all_scores <- c(all_scores, results$mean_absolute_error)
}
all_scores
mean(all_scores)
# Some memory clean-up
k_clear_session()
num_epochs <- 500
all_mae_histories <- NULL
for (i in 1:k) {
cat("processing fold #", i, "\n")
# Prepare the validation data: data from partition # k
val_indices <- which(folds == i, arr.ind = TRUE)
val_data <- train_data[val_indices,]
val_targets <- train_targets[val_indices]
# Prepare the training data: data from all other partitions
partial_train_data <- train_data[-val_indices,]
partial_train_targets <- train_targets[-val_indices]
# Build the Keras model (already compiled)
model <- build_model()
# Train the model (in silent mode, verbose=0)
history <- model %>% fit(
partial_train_data, partial_train_targets,
validation_data = list(val_data, val_targets),
epochs = num_epochs, batch_size = 1, verbose = 1
)
mae_history <- history$metrics$val_mean_absolute_error
all_mae_histories <- rbind(all_mae_histories, mae_history)
}
average_mae_history <- data.frame(
epoch = seq(1:ncol(all_mae_histories)),
validation_mae = apply(all_mae_histories, 2, mean)
)
library(ggplot2)
ggplot(average_mae_history, aes(x = epoch, y = validation_mae)) + geom_line()
Code with dataset from mlbench (after the line with "=====", the code is the same as in the code above:
library(dplyr)
library(mlbench)
library(groupdata2)
data(BostonHousing)
parts <- partition(BostonHousing, p = 0.2)
test_data <- parts[[1]]
train_data <- parts[[2]]
train_targets <- train_data$medv
test_targets <- test_data$medv
train_data$medv <- NULL
test_data$medv <- NULL
train_data$chas <- NULL
test_data$chas <- NULL
mean <- apply(train_data, 2, mean)
std <- apply(train_data, 2, sd)
train_data <- scale(train_data, center = mean, scale = std)
test_data <- scale(test_data, center = mean, scale = std)
library(keras)
# After this line the code is the same for both code examples.
# =========================================
build_model <- function() {
model <- keras_model_sequential() %>%
layer_dense(units = 64, activation = "relu",
input_shape = dim(train_data)[[2]]) %>%
layer_dense(units = 64, activation = "relu") %>%
layer_dense(units = 1)
model %>% compile(
optimizer = "rmsprop",
loss = "mse",
metrics = c("mae")
)
}
k <- 4
indices <- sample(1:nrow(train_data))
folds <- cut(1:length(indices), breaks = k, labels = FALSE)
num_epochs <- 100
all_scores <- c()
for (i in 1:k) {
cat("processing fold #", i, "\n")
# Prepare the validation data: data from partition # k
val_indices <- which(folds == i, arr.ind = TRUE)
val_data <- train_data[val_indices,]
val_targets <- train_targets[val_indices]
# Prepare the training data: data from all other partitions
partial_train_data <- train_data[-val_indices,]
partial_train_targets <- train_targets[-val_indices]
# Build the Keras model (already compiled)
model <- build_model()
# Train the model (in silent mode, verbose=0)
model %>% fit(partial_train_data, partial_train_targets,
epochs = num_epochs, batch_size = 1, verbose = 0)
# Evaluate the model on the validation data
results <- model %>% evaluate(val_data, val_targets, verbose = 0)
all_scores <- c(all_scores, results$mean_absolute_error)
}
all_scores
mean(all_scores)
# Some memory clean-up
k_clear_session()
num_epochs <- 500
all_mae_histories <- NULL
for (i in 1:k) {
cat("processing fold #", i, "\n")
# Prepare the validation data: data from partition # k
val_indices <- which(folds == i, arr.ind = TRUE)
val_data <- train_data[val_indices,]
val_targets <- train_targets[val_indices]
# Prepare the training data: data from all other partitions
partial_train_data <- train_data[-val_indices,]
partial_train_targets <- train_targets[-val_indices]
# Build the Keras model (already compiled)
model <- build_model()
# Train the model (in silent mode, verbose=0)
history <- model %>% fit(
partial_train_data, partial_train_targets,
validation_data = list(val_data, val_targets),
epochs = num_epochs, batch_size = 1, verbose = 1
)
mae_history <- history$metrics$val_mean_absolute_error
all_mae_histories <- rbind(all_mae_histories, mae_history)
}
average_mae_history <- data.frame(
epoch = seq(1:ncol(all_mae_histories)),
validation_mae = apply(all_mae_histories, 2, mean)
)
library(ggplot2)
ggplot(average_mae_history, aes(x = epoch, y = validation_mae)) + geom_line()
Thank you!
writing here because I can't comment...
I checked the mlbench dataset here and it said, that it contains the 14 columns of the original boston data set and 5 additional columns. Not sure if you might have a faulty dataset because you state that there are no differences in the column counts of the datasets.
Another guess might be, that the second example graph is from a model which is stuck in a local minima. To get more comparable models, you might want to work with the same seeds to make sure that the inizialisations of the weights etc. are the same to get the same results.
Hope this helps and feel free to ask.

Predict Logistf

I'm using a R package called logistf to make a Logistc Regression and I saw that there's no predict function for new data in this package and predict package does not work with this, so I found a code that show how making this with new data:
fit<-logistf(Tax ~ L20+L24+L28+L29+L31+L32+L33+L36+S10+S15+S16+S17+S20, data=trainData)
betas <- coef(fit)
X <- model.matrix(fit, data=testData)
probs <- 1 / (1 + exp(-X %*% betas))
I want to make a cross validation version with this using fit$predict and the probabilities that probs generate for me. Has anyone ever done something like this before?
Other thing that I want to know is about fit$predict I'm making a binary logistic regression, and this function returns many values, are these values from class 0 or 1, how can I know this? Thanks
While the code that you wrote works perfectly, there is a concise way of getting the same results seemingly:
brglm_model <- brglm(formula = response ~ predictor , family = "binomial", data = train )
brglm_pred <- predict(object = brglm_model, newdata = test , type = "response")
About the CV, you have to write a few lines of code I guess:
#Setting the number of folds, and number of instances in each fold
n_folds <- 5
fold_size <- nrow(dataset) %/% 5
residual <- nrow(dataset) %% 5
#label the instances based on the number of folds
cv_labels <- c(rep(1,fold_size),rep(2,fold_size), rep(3,fold_size), rep(4,fold_size), rep(5,fold_size), rep(5,residual))
# the error term would differ based on each threshold value
t_seq <- seq(0.1,0.9,by = 0.1)
index_mat <- matrix(ncol = (n_folds+1) , nrow = length(t_seq))
index_mat[,1] <- t_seq
# the main loop for calculation of the CV error on each fold
for (i in 1:5){
train <- dataset %>% filter(cv_labels != i)
test <- dataset %>% filter(cv_labels == i )
brglm_cv_model <- brglm(formula = response_var ~ . , family = "binomial", data = train )
brglm_cv_pred <- predict(object = brglm_model, newdata = test , type = "response")
# error formula that you want, e.g. misclassification
counter <- 0
for (treshold in t_seq ) {
counter <- counter + 1
conf_mat <- table( factor(test$response_var) , factor(brglm_cv_pred>treshold, levels = c("FALSE","TRUE") ))
sen <- conf_mat[2,2]/sum(conf_mat[2,])
# other indices can be computed as follows
#spec <- conf_mat[1,1]/sum(conf_mat[1,])
#prec <- conf_mat[2,2]/sum(conf_mat[,2])
#F1 <- (2*prec * sen)/(prec+sen)
#accuracy <- (conf_mat[1,1]+conf_mat[2,2])/sum(conf_mat)
#here I am only interested in sensitivity
index_mat[counter,(i+1)] <- sen
}
}
# final data.frame would be the mean of sensitivity over each threshold value
final_mat <- matrix(nrow = length(t_seq), ncol = 2 )
final_mat[,1] <- t_seq
final_mat[,2] <- apply(X = index_mat[,-1] , MARGIN = 1 , FUN = mean)
final_mat <- data.frame(final_mat)
colnames(final_mat) <- c("treshold","sensitivity")
#why not having a look at the CV-sensitivity of the model over threshold values?
ggplot(data = final_mat) +
geom_line(aes(x = treshold, y = sensitivity ), color = "blue")

Prediction on Neural Network in R

I want to get the accuracy or the RMSE of the Prediction result of a neural network. I started using a Confusion Matrix, but as indicated by previous answers, the Confusion Matrix gives valid results for non Continuous variables.
Is there any way I can get the accuracy or the error rate of a Neural Network Prediction??
As an example here is the code I've got until now:
library(nnet)
library(caret)
library(e1071)
data(rock)
newformula <- perm ~ area + peri + shape
y <- rock[, "perm"]
x <- rock[!colnames(rock)%in% "perm"]
original <- datacol(rock,"perm")
nnclas_model <- nnet(newformula, data = rock, size = 4, decay = 0.0001, maxit = 500)
nnclas_prediction <- predict(nnclas_model, x)
nnclas_tab <- table(nnclas_prediction, y)
rmse <- sqrt(mean((original - nnclas_prediction)^2))
Does anyone know how can I make this work? or how can I get the Accuracy or the of the Neural Network Prediction?
Any help will be deeply appreciated.
I don't know about "nnet", but I have used the "neuralnet" library and am able to get the RMSE. Here is my full code: Just change the data for training_Data and testing_Data as per your requirements and in place of "Channel" give what is your classification attribute
dat <- read.csv("Give path of your data file here")
summary(dat)
cleandata <- dat
cleandata <- na.omit(cleandata)
#scaling
apply(cleandata,MARGIN = 2, FUN = function(x)sum(is.na(x)))
maxs = apply(cleandata, MARGIN = 2, max)
mins = apply(cleandata, MARGIN = 2, min)
scaledData = as.data.frame(scale(cleandata, center = mins, scale = maxs - mins))
summary(scaledData)
#Splitting data in 80:20 ratio
train = sample(1:nrow(scaledData), nrow(scaledData)*0.8)
test = -train
training_Data = scaledData[train,]
testing_Data = scaledData[test,]
dim(training_Data)
dim(testing_Data)
#neural net
library(neuralnet)
n <- names(training_Data)
f <- as.formula(paste("Channel ~", paste(n[!n %in% "Channel"], collapse = " + ")))
neuralnet_Model <- neuralnet(f,data = training_Data, hidden = c(2,1))
plot(neuralnet_Model)
neuralnet_Model$result.matrix
pred_neuralnet<-compute(neuralnet_Model,testing_Data[,2:8])
pred_neuralnet.scaled <- pred_neuralnet$net.result *(max(scaledData$Channel)-min(scaledData$Channel))+min(scaledData$Channel)
real.values <- (testing_Data$Channel)*(max(cleandata$Channel)-min(cleandata$Channel))+min(cleandata$Channel)
MSE.neuralnetModel <- sum((real.values - pred_neuralnet.scaled)^2)/nrow(testing_Data)
MSE.neuralnetModel
plot(real.values, pred_neuralnet.scaled, col='red',main='Real vs predicted',pch=18,cex=0.7)
abline(0,1,lwd=2)
legend('bottomright',legend='NN',pch=18,col='red', bty='n')
As mentioned in the comments, confusion matrices are for classification problems. If you meant to classify perm according to its levels, then the following code should work for you.
library(nnet)
library(caret)
library(e1071)
data(rock)
rock$perm <- as.factor(rock$perm)
nnclas_model <- nnet(perm ~ area + peri + shape, data = rock,
size = 4, decay = 0.0001, maxit = 500)
x <- rock[, 1:3]
y <- rock[, 4]
yhat <- predict(nnclas_model, x, type = 'class')
confusionMatrix(as.factor(yhat), y)
If you mean to treat perm as continuous, the confusion matrix doesn't make any sense. You should think in terms of mean-squared error instead.

Resources