Fit a no intercept binary model in caret [duplicate] - r

Performing logistic regression in R using the caret package and trying to force a zero intercept such that probability at x=0 is .5. In other forms of regression, it seems like you can turn the intercept off using tunegrid, but that has no functionality for logistic regression. Any ideas?
model <- train(y ~ 0+ x, data = data, method = "glm", family = binomial(link="probit"),
trControl = train.control)
And yes, I "know" that the probability at x=0 should be .5, and thus trying to force it.

There's a vignette on how to set up a custom model for caret. So in the solution below, you can also see why the intercept persist:
library(caret)
glm_wo_intercept = getModelInfo("glm",regex=FALSE)[[1]]
if you look at the fit, there's a line that does:
glm_wo_intercept$fit
....
modelArgs <- c(list(formula = as.formula(".outcome ~ ."), data = dat), theDots)
...
So the intercept is there by default. You can change this line and run caret on this modified model:
glm_wo_intercept$fit = function(x, y, wts, param, lev, last, classProbs, ...) {
dat <- if(is.data.frame(x)) x else as.data.frame(x)
dat$.outcome <- y
if(length(levels(y)) > 2) stop("glm models can only use 2-class outcomes")
theDots <- list(...)
if(!any(names(theDots) == "family"))
{
theDots$family <- if(is.factor(y)) binomial() else gaussian()
}
if(!is.null(wts)) theDots$weights <- wts
# change the model here
modelArgs <- c(list(formula = as.formula(".outcome ~ 0+."), data = dat), theDots)
out <- do.call("glm", modelArgs)
out$call <- NULL
out
}
We fit the model:
data = data.frame(y=factor(runif(100)>0.5),x=rnorm(100))
model <- train(y ~ 0+ x, data = data, method = glm_wo_intercept,
family = binomial(),trControl = trainControl(method = "cv",number=3))
predict(model,data.frame(x=0),type="prob")
FALSE TRUE
1 0.5 0.5

Related

How to use tensor function of gam model in train function of caret package in r?

I am using gam method with both spline and tensor interaction functions(s and ti) inside the train function (for test and train).
I know for spline functions in gam we can use method = "gam" in train function. for example:
fit <- gam(Y ~ s(x1) + s(x2) + s(x3) , data=df)
Prediction_gam <- as.numeric(predict(fit , data=df , type = "response"))
can be changed to follow for test and train in caret package:
fit_train <- train(Y ~ x1 + x2 + x3 , data = train_df, method = "gam", trControl = train.control)
but I don't know how to add tensor interaction function of gam in train function for example:
fit <- gam(Y ~ s(x1) + s(x2) + s(x3) + ti(x1,x2) , data=df)
any suggestion would be appreciated.
the full codes are as follow:
library(caret)
df <- data.frame(Y=rnorm(100), x1=rnorm(100),x2=rnorm(100), x3=rnorm(100))
df <- as.data.frame(do.call(cbind, df))
set.seed(1)
training.samples <- df$x1%>%createDataPartition(p = 0.8, list = FALSE)
train_df <- df[training.samples, ]
test_df <- df[-training.samples, ]
train.control <- trainControl(method = "repeatedcv", number = 10, repeats = 2)
fit_train <- train(Y ~ x1 + x2 + x3 , data = train_df, method = "gam", trControl = train.control)
Prediction_train <- as.numeric(predict(fit_train , data=train_df , type = "raw"))
Prediction_test <- as.numeric(predict(fit_train , newdata =test_df , type = "raw"))

Logistic Regression in Caret - No Intercept?

Performing logistic regression in R using the caret package and trying to force a zero intercept such that probability at x=0 is .5. In other forms of regression, it seems like you can turn the intercept off using tunegrid, but that has no functionality for logistic regression. Any ideas?
model <- train(y ~ 0+ x, data = data, method = "glm", family = binomial(link="probit"),
trControl = train.control)
And yes, I "know" that the probability at x=0 should be .5, and thus trying to force it.
There's a vignette on how to set up a custom model for caret. So in the solution below, you can also see why the intercept persist:
library(caret)
glm_wo_intercept = getModelInfo("glm",regex=FALSE)[[1]]
if you look at the fit, there's a line that does:
glm_wo_intercept$fit
....
modelArgs <- c(list(formula = as.formula(".outcome ~ ."), data = dat), theDots)
...
So the intercept is there by default. You can change this line and run caret on this modified model:
glm_wo_intercept$fit = function(x, y, wts, param, lev, last, classProbs, ...) {
dat <- if(is.data.frame(x)) x else as.data.frame(x)
dat$.outcome <- y
if(length(levels(y)) > 2) stop("glm models can only use 2-class outcomes")
theDots <- list(...)
if(!any(names(theDots) == "family"))
{
theDots$family <- if(is.factor(y)) binomial() else gaussian()
}
if(!is.null(wts)) theDots$weights <- wts
# change the model here
modelArgs <- c(list(formula = as.formula(".outcome ~ 0+."), data = dat), theDots)
out <- do.call("glm", modelArgs)
out$call <- NULL
out
}
We fit the model:
data = data.frame(y=factor(runif(100)>0.5),x=rnorm(100))
model <- train(y ~ 0+ x, data = data, method = glm_wo_intercept,
family = binomial(),trControl = trainControl(method = "cv",number=3))
predict(model,data.frame(x=0),type="prob")
FALSE TRUE
1 0.5 0.5

r: coefficients from glmnet and caret are different for the same lambda

I've read a few Q&As about this, but am still not sure I understand, why the coefficients from glmnet and caret models based on the same sample and the same hyper-parameters are slightly different. Would greatly appreciate an explanation!
I am using caret to train a ridge regression:
library(ISLR)
Hitters = na.omit(Hitters)
x = model.matrix(Salary ~ ., Hitters)[, -1] #Dropping the intercept column.
y = Hitters$Salary
set.seed(0)
train = sample(1:nrow(x), 7*nrow(x)/10)
library(caret)
set.seed(0)
train_control = trainControl(method = 'cv', number = 10)
grid = 10 ^ seq(5, -2, length = 100)
tune.grid = expand.grid(lambda = grid, alpha = 0)
ridge.caret = train(x[train, ], y[train],
method = 'glmnet',
trControl = train_control,
tuneGrid = tune.grid)
ridge.caret$bestTune
# alpha is 0 and best lambda is 242.0128
Now, I use the lambda (and alpha) found above to train a ridge regression for the whole data set. At the end, I extract the coefficients:
ridge_full <- train(x, y,
method = 'glmnet',
trControl = trainControl(method = 'none'),
tuneGrid = expand.grid(
lambda = ridge.caret$bestTune$lambda, alpha = 0)
)
coef(ridge_full$finalModel, s = ridge.caret$bestTune$lambda)
Finally, using exactly the same alpha and lambda, I try to fit the same ridge regression using glmnet package - and extract coefficients:
library(glmnet)
ridge_full2 = glmnet(x, y, alpha = 0, lambda = ridge.caret$bestTune$lambda)
coef(ridge_full2)
The reason is the fact the exact lambda you specified was not used by caret. You can check this by:
ridge_full$finalModel$lambda
closest values are 261.28915 and 238.07694.
When you do
coef(ridge_full$finalModel, s = ridge.caret$bestTune$lambda)
where s is 242.0128 the coefficients are interpolated from the coefficients actually calculated.
Wheres when you provide lambda to the glmnet call the model returns exact coefficients for that lambda which differ only slightly from the interpolated ones caret returns.
Why this happens:
when you specify one alpha and one lambda for a fit on all of the data caret will actually fit:
fit = function(x, y, wts, param, lev, last, classProbs, ...) {
numLev <- if(is.character(y) | is.factor(y)) length(levels(y)) else NA
theDots <- list(...)
if(all(names(theDots) != "family")) {
if(!is.na(numLev)) {
fam <- ifelse(numLev > 2, "multinomial", "binomial")
} else fam <- "gaussian"
theDots$family <- fam
}
## pass in any model weights
if(!is.null(wts)) theDots$weights <- wts
if(!(class(x)[1] %in% c("matrix", "sparseMatrix")))
x <- Matrix::as.matrix(x)
modelArgs <- c(list(x = x,
y = y,
alpha = param$alpha),
theDots)
out <- do.call(glmnet::glmnet, modelArgs)
if(!is.na(param$lambda[1])) out$lambdaOpt <- param$lambda[1]
out
}
this was taken from here.
in your example this translates to
fit <- glmnet::glmnet(x, y,
alpha = 0)
lambda <- unique(fit$lambda)
these lambda values correspond to ridge_full$finalModel$lambda:
all.equal(lambda, ridge_full$finalModel$lambda)
#output
TRUE

Training nnet and avNNet models with caret when the output has negatives

My question is about the typical feed-forward single-hidden-layer backprop neural network, as implemented in package nnet, and trained with train() in package caret. This is related to this question but in the context of the nnet and caret packages in R.
I demonstrate the problem with a simple regression example where Y = sin(X) + small error:
raw Y ~ raw X: predicted outputs are uniformly zero where raw Y < 0.
scaled Y (to 0-1) ~ raw X: solution looks great; see code below.
The code is as follows
library(nnet)
X <- t(t(runif(200, -pi, pi)))
Y <- t(t(sin(X))) # Y ~ sin(X)
Y <- Y + rnorm(200, 0, .05) # Add a little noise
Y_01 <- (Y - min(Y))/diff(range(Y)) # Y linearly transformed to have range 0-1.
plot(X,Y)
plot(X, Y_01)
dat <- data.frame(cbind(X, Y, Y_01)); names(dat) <- c("X", "Y", "Y_01")
head(dat)
plot(dat)
nnfit1 <- nnet(formula = Y ~ X, data = dat, maxit = 2000, size = 8, decay = 1e-4)
nnpred1 <- predict(nnfit1, dat)
plot(X, nnpred1)
nnfit2 <- nnet(formula = Y_01 ~ X, data = dat, maxit = 2000, size = 8, decay = 1e-4)
nnpred2 <- predict(nnfit2, dat)
plot(X, nnpred2)
When using train() in caret, there is a preProcess option but it only scales the inputs. train(..., method = "nnet", ...) appears to be using the raw Y values; see code below.
library(caret)
ctrl <- trainControl(method = "cv", number = 10)
nnet_grid <- expand.grid(.decay = 10^seq(-4, -1, 1), .size = c(8))
nnfit3 <- train(Y ~ X, dat, method = "nnet", maxit = 2000,
trControl = ctrl, tuneGrid = nnet_grid, preProcess = "range")
nnfit3
nnpred3 <- predict(nnfit3, dat)
plot(X, nnpred3)
Of course, I could linearly transform the Y variable(s) to have a positive range, but then my predictions will be on the wrong scale. Though this is only a minor headache, I'm wondering if there is a better solution for training nnet or avNNet models with caret when the output has negative values.
This was answered on cross validated here by user topepo
The relevant part of their answer is:
Since Y is roughly between -1 and 1 you should also use linout = TRUE in your nnet and train calls.

Confidence intervals for predictions from logistic regression

In R predict.lm computes predictions based on the results from linear regression and also offers to compute confidence intervals for these predictions. According to the manual, these intervals are based on the error variance of fitting, but not on the error intervals of the coefficient.
On the other hand predict.glm which computes predictions based on logistic and Poisson regression (amongst a few others) doesn't have an option for confidence intervals. And I even have a hard time imagining how such confidence intervals could be computed to provide a meaningful insight for Poisson and logistic regression.
Are there cases in which it is meaningful to provide confidence intervals for such predictions? How can they be interpreted? And what are the assumptions in these cases?
The usual way is to compute a confidence interval on the scale of the linear predictor, where things will be more normal (Gaussian) and then apply the inverse of the link function to map the confidence interval from the linear predictor scale to the response scale.
To do this you need two things;
call predict() with type = "link", and
call predict() with se.fit = TRUE.
The first produces predictions on the scale of the linear predictor, the second returns the standard errors of the predictions. In pseudo code
## foo <- mtcars[,c("mpg","vs")]; names(foo) <- c("x","y") ## Working example data
mod <- glm(y ~ x, data = foo, family = binomial)
preddata <- with(foo, data.frame(x = seq(min(x), max(x), length = 100)))
preds <- predict(mod, newdata = preddata, type = "link", se.fit = TRUE)
preds is then a list with components fit and se.fit.
The confidence interval on the linear predictor is then
critval <- 1.96 ## approx 95% CI
upr <- preds$fit + (critval * preds$se.fit)
lwr <- preds$fit - (critval * preds$se.fit)
fit <- preds$fit
critval is chosen from a t or z (normal) distribution as required (I forget exactly now which to use for which type of GLM and what the properties are) with the coverage required. The 1.96 is the value of the Gaussian distribution giving 95% coverage:
> qnorm(0.975) ## 0.975 as this is upper tail, 2.5% also in lower tail
[1] 1.959964
Now for fit, upr and lwr we need to apply the inverse of the link function to them.
fit2 <- mod$family$linkinv(fit)
upr2 <- mod$family$linkinv(upr)
lwr2 <- mod$family$linkinv(lwr)
Now you can plot all three and the data.
preddata$lwr <- lwr2
preddata$upr <- upr2
ggplot(data=foo, mapping=aes(x=x,y=y)) + geom_point() +
stat_smooth(method="glm", method.args=list(family=binomial)) +
geom_line(data=preddata, mapping=aes(x=x, y=upr), col="red") +
geom_line(data=preddata, mapping=aes(x=x, y=lwr), col="red")
I stumbled upon Liu WenSui's method that uses bootstrap or simulation approach to solve that problem for Poisson estimates.
Example from the Author
pkgs <- c('doParallel', 'foreach')
lapply(pkgs, require, character.only = T)
registerDoParallel(cores = 4)
data(AutoCollision, package = "insuranceData")
df <- rbind(AutoCollision, AutoCollision)
mdl <- glm(Claim_Count ~ Age + Vehicle_Use, data = df, family = poisson(link = "log"))
new_fake <- df[1:5, 1:2]
boot_pi <- function(model, pdata, n, p) {
odata <- model$data
lp <- (1 - p) / 2
up <- 1 - lp
set.seed(2016)
seeds <- round(runif(n, 1, 1000), 0)
boot_y <- foreach(i = 1:n, .combine = rbind) %dopar% {
set.seed(seeds[i])
bdata <- odata[sample(seq(nrow(odata)), size = nrow(odata), replace = TRUE), ]
bpred <- predict(update(model, data = bdata), type = "response", newdata = pdata)
rpois(length(bpred), lambda = bpred)
}
boot_ci <- t(apply(boot_y, 2, quantile, c(lp, up)))
return(data.frame(pred = predict(model, newdata = pdata, type = "response"), lower = boot_ci[, 1], upper = boot_ci[, 2]))
}
boot_pi(mdl, new_fake, 1000, 0.95)
sim_pi <- function(model, pdata, n, p) {
odata <- model$data
yhat <- predict(model, type = "response")
lp <- (1 - p) / 2
up <- 1 - lp
set.seed(2016)
seeds <- round(runif(n, 1, 1000), 0)
sim_y <- foreach(i = 1:n, .combine = rbind) %dopar% {
set.seed(seeds[i])
sim_y <- rpois(length(yhat), lambda = yhat)
sdata <- data.frame(y = sim_y, odata[names(model$x)])
refit <- glm(y ~ ., data = sdata, family = poisson)
bpred <- predict(refit, type = "response", newdata = pdata)
rpois(length(bpred),lambda = bpred)
}
sim_ci <- t(apply(sim_y, 2, quantile, c(lp, up)))
return(data.frame(pred = predict(model, newdata = pdata, type = "response"), lower = sim_ci[, 1], upper = sim_ci[, 2]))
}
sim_pi(mdl, new_fake, 1000, 0.95)

Resources