I have this dataset:
"Density","bodyfat","Age","state"
1.0708,12.3,23,normal
1.0853,6.1,22,slim
1.0414,25.3,22,fat
1.0751,10.4,26,normal
I wrote this code:
library(rpart)
set.seed(1234)
ind <- sample(2,nrow(mydata),replace=TRUE, prob= c(0.7,0.3))
trainData <- mydata[ind==1,]
testData <- mydata[ind==2,]
myFormula <- state ~ bodyfat
albero <- rpart(state ~ bodyfat)
newdata <- data.frame(Density=1.0515,bodyfat=11.1,Age=24)
newdata
predict(albero,newdata,type="class")
print(albero)
This code not working and i obtain this 2 error:
albero <- rpart(state~bodyfat)
Error in eval(expr,envir,enclos): object "state" not find
and:
predict(albero,newdata,type="class")
Error in match.arg(type): 'arg' should be one of "responde","node","prob"
Related
When I type down the name of my tree model, I keep getting this error and I have no idea about what it means.
Here is my code, OJ is a dataset contained in package "ISLR"
library(tree)
library(ISLR)
library(tidyverse)
library(caTools)
library(randomForest)
library(MASS)
library(rpart)
library(gbm)
library(glmnet)
# 8.4.9
## a. split the data into train and test
OJ <- OJ
set.seed(2)
train2 <- sample.split(OJ$Purchase, SplitRatio = 800/1070)
OJ_train <- subset(OJ, train2 == T)
OJ_test <- subset(OJ, train2 == F)
## b. fit a tree model
OJ_train_tree <- tree(Purchase ~ ., data = OJ_train)
summary(OJ_train_tree)
## c. a closer look at the model detail (here is the error)
OJ_train_tree
And the error I got is
OJ_train_tree
Error in cat(x, ..., sep = sep) :
argument 1 (type 'list') cannot be handled by 'cat'
birth <- import("smoker_data1.xlsx")
## Splitting the dataset in test and train datasets
mysplit <- sample.split(birth, SplitRatio = 0.65)
train <- subset(birth, mysplit == T)
test <- subset(birth, mysplit == F)
## Build Random Forest model on the test set
mod1 <- randomForest(smoke~., train)
Error message: Error: Error in y - ymean : non-numeric argument to binary operator**
I think the best way is to check the data type for smoke variable first.
If possible try to change the variable using as.factor().
library(readxl)
birth <- read_excel("smoker_data1.xlsx")
## Splitting the dataset in test and train datasets
mysplit <- sample.split(birth, SplitRatio = 0.65)
train <- subset(birth, mysplit == T)
test <- subset(birth, mysplit == F)
train$smoke <- as.factor(train$smoke)
## Build Random Forest model on the test set
mod1 <- randomForest(smoke~., train)
I already tried with the data you gave, just need to specify the type of data correctly before fitting randomForest function.
data1$baby_wt <- as.numeric(data1$baby_wt)
data1$income <- as.factor(data1$income)
data1$mother_a <- as.numeric(data1$mother_a)
data1$smoke <- as.factor(data1$smoke)
data1$gestation <- as.numeric(data1$gestation)
data1$mother_wt <- as.numeric(data1$mother_wt)
library(caret)
library(randomForest)
predictors <- names(data1)[!names(data1) %in% "smoke"]
inTrainingSet <- createDataPartition(data1$smoke, p=0.7, list=F)
train<- data1[inTrainingSet,]
test<- data1[-inTrainingSet,]
library(randomForest)
m.rf = randomForest(smoke~., data=train, mtry=sqrt(ncol(x)), ntree=5000,
importance=T, proximity=T, probability=T)
m.rf
#############################################
# Test Performance
#############################################
m.pred = predict(m.rf, test[-4], response="class")
m.table <- table(m.pred, test$smoke)
library(caret)
confusionMatrix(m.table)
I'd like to build a logistic regression model using the caret package.
This is my code.
library(caret)
df <- data.frame(response = sample(0:1, 200, replace=TRUE), predictor = rnorm(200,10,45))
outcomeName <-"response"
predictors <- names(df)[!(names(df) %in% outcomeName)]
index <- createDataPartition(df$response, p=0.75, list=FALSE)
trainSet <- df[ index,]
testSet <- df[-index,]
model_glm <- train(trainSet[,outcomeName], trainSet[,predictors], method='glm', family="binomial", data = trainSet)
I get the error Error: Please use column names for x.
I receive the same error when I replace trainSet[,predictors] with the column name predictors.
Unfortunately R has a nasty behavior when subsetting just one column like df[,1] to change outcome to a vector and as you have only one predictor you encountered this feature. You can preserve results as data.frame by either
trainSet[,predictors, drop = FALSE]
or
trainSet[predictors]
BTW. there are two additional issues with the code:
First argument should be predictors, not response
For logistic regression with caret you need response to be a factor
The full code should be:
library(caret)
df <- data.frame(response = sample(0:1, 200, replace=TRUE),
predictor = rnorm(200,10,45))
df$response <- as.factor(df$response)
outcomeName <-"response"
predictors <- names(df)[!(names(df) %in% outcomeName)]
index <- createDataPartition(df$response, p=0.75, list=FALSE)
trainSet <- df[ index,]
testSet <- df[-index,]
model_glm <- train(trainSet[predictors], trainSet[[outcomeName]], method='glm', family="binomial", data = trainSet)
*changed trainSet[,outcomeName] to trainSet[[outcomeName]] for more explicit transformation to vector
I had the same problem,
`head(iris)
xx <- iris[,-5]
yy <- iris[,5]
rf.imp <- train(x = xx, y = yy, method = "rf", data = iris); rf.imp`
I am trying to apply simple binomial logistic regression in json data I downloaded from Kaggle:
https://www.kaggle.com/c/two-sigma-connect-rental-listing-inquiries/data
I changed values of interest_level column to either 1 if the value is "high" and 0 if otherwise.
This is my first time using glm so any help is welcome.
library(rjson)
library(dplyr)
library(purrr)
library(nnet)
json.data <- fromJSON(file = "train.json")
json.data = as.data.frame(t(do.call(rbind, json.data)))
#head(json.data)
#colnames(json.data)
x <- json.data$interest_level
for (i in 1:length(x)){
if (json.data$interest_level[i] =="high"){
json.data$interest_level[i] <- 1
}else {json.data$interest_level[i] <- 0}
}
indexes = sample(1:nrow(json.data), size=0.5*nrow(json.data))
train.data <- json.data[indexes,]
test.data <- json.data[-indexes,]
model <- glm(train.data~interest_level,family=binomial(link='logit'),data=train.data)
I'm getting this error message:
Error in model.frame.default(formula = train.data ~ interest_level, data = train.data, : invalid type (list) for variable 'train.data'
My testing data a my trining data have different factors levels. I try to merge levels but it doesnt works.
library(mgcv)
library(ff)
myData <- read.csv.ffdf(file = "myFile.csv")
myData$myVar <- as.factor(myData$myVar)
testData <- read.csv(file = "test.csv")
testData$myVar <- as.factor(testData$myVar)
form <- dependent ~ .
model <- gam(form, data=myData)
model$xlevels[["myVar"]] <- union(model$xlevels[["myVar"]], levels(testData$myVar))
predictedData <- predict(model, newdata=testData)
then R gives me this error:
Error in predict.gam(model, newdata = testData) : 1001, 1213,1231 not in original fit
Calls: predict -> predict.gam