R Jags Multinomial error "length mismatch in node:setValue" - r

I am trying to run a multinomial model in rjags and I keep getting this error
Error in jags.model(model.file, data = data, inits = init.values,
n.chains = n.chains, :
Error in node dmulti(b,639)
Length mismatch in Node::setValue
Here is the code that creates the error:
n = c(294, 307,38)
m= c(288, 332,19)
x=1
y=1
z=1
model_string <-"model {
for(j in 1:3) {
n[j] ~ dmulti( a[1:3], 639)
m[j] ~ dmulti(b[1:3], 639)
}
a[1:3] ~ ddirich(c(x,y,z))
b[1:3]~ ddirich(c(x,y,z))
prob = step(b[1]-a[1])
RD = b[1]-a[1]
}"
jags.param <- c("a[1]","b[1]", "prob", "RD")
jags.data <- list(n=n,m=m, x=x, y=y,z=z)
jagsfit4 <- jags(data=jags.data, n.chains = 4, inits=NULL, parameters.to.save = jags.param,
model.file=textConnection(model_string), n.thin = 1 ,n.iter=5000,
n.burnin=1500, DIC=TRUE)
Thank you!

Related

Random forest error in eval(predvars, data, env) : object not found

numcols <- colnames(Filter(is.numeric, income.df))
for (i in numcols) {
ifelse(sum(is.na(train.df[[i]]))/nrow(train.df) > 0.01,
train.df[[i]][is.na(train.df[[i]])] <- median(income.df[[i]], na.rm = TRUE),
train.df <- train.df[!is.na(train.df[[i]]), ])
}
for (i in numcols) {
ifelse(sum(is.na(valid.df[[i]]))/nrow(valid.df) > 0.01,
valid.df[[i]][is.na(valid.df[[i]])] <- median(income.df[[i]], na.rm = TRUE),
valid.df <- valid.df[!is.na(valid.df[[i]]), ])
}
library(randomForest)
income.rf <- randomForest(income_Yes ~ ., data = income.df, ntree = 500,
mtry = 4, nodesize = 5, importance = TRUE)
Getting error message using this code "Error in eval(predvars, data, env) :
object 'workclass2_ Local-gov' not found" which is my first categorical predictor in my data set. If I remove the variable, it just moves onto the next categorical variable and says that one is not found.

Invalid parent values jagsUI using beta distribution

I am trying to run a model in jags using jagsUI and I am getting this error message:
Error in jags.model(file = model.file, data = data, inits = inits, n.chains = n.chains, :
Error in node rec_sim[1]
Invalid parent values
I don't understand where is my mistake, any help is welcome.
Here is the data for my model
# Data
set.seed(1234)
year<-c(2017,2016,2008,2007)
n<-c(27,57,37,31)
preg<-c(5,24,12,12)
not.preg<-c(22,33,25,19)
prop.preg<-preg/n
mean.preg<-mean(prop.preg) #media de hembras preñadas de mis datos
fem.prop<-0.6 #proporción de sexos
sup<-0.607 #supervivencia
gua.den<-rlnorm(100,2.7,0.8)
recr<- function (ab2) {(mean.preg*fem.prop*sup*ab2)}
var<-0.1
#número de añales
rec2<- rlnorm(100, log(recr(gua.den),var))
#recruitment calculus
dens.an<-c(4.08,1.20,5.25,3.67)
real.ab.tot<- c(19.07,22.09,27.54,33.39)
ab.ant<-c(22.77,19.07,15.93,27.54)
an.fun<- mean.preg*ab.ant*fem.prop*sup
fem_rat<-gua.den*fem.prop
rec_sim<-rec2/fem_rat
rec_dens<-dens.an/(real.ab.tot*fem.prop)
rec_fun<-an.fun/real.ab.tot*fem.prop
And here is my model
library(jagsUI)
cat(file="rick.bug","
model{
for(i in 1:100){
rec_sim[i]~dbeta(a[i],b)
a[i]<-alpha*gua.den[i]*exp((-beta)*gua.den[i])
}
#priors
alpha ~dnorm(0,0.00001)
beta ~dnorm(0,0.00001)
b~dgamma(5,3)
}
")
d.jags<-c("rec_sim","gua.den")
params<-c("alpha","beta","logsigma")
inits <- function() {list(alpha = runif(1,0.1, 1),
beta = runif(1, 0.1, 1))}
ni<-1000; nc<-1; nt<-1; nb<-500
rick.sim <- jags(data = d.jags, parameters.to.save = params, model.file = "rick.bug",
n.chains = nc, n.iter = ni, n.burnin = nb, n.thin = nt)
print(rick.sim)

Problem adjusting a linear model with JAGS

I am trying to adjust a linear model with JAGS but I'm having trouble with the code. I'm writing:
library(R2jags)
library(BEST)
base<-data.table::data.table(read.csv("/Users/franco/Documents/Todo/UNAM/Facultad\ de\ Ciencias/Asignaturas\ Actuaría/Análisis\ Bayesiano\ de\ Datos/Tareas/Tarea-Examen\ 2/FootballLeague.csv"))
X <- cbind(1,as.matrix(base[,-c(1,2,12)]))
y <-as.matrix(base[,2])
n <- length(y)
m <- ncol(X)
model.jags <- function(){
tau ~ dgamma(0.01, 0.01)
for(i in 1:m){
beta[i] ~ dnorm(0,0.001)
}
for (i in 1:n){
y[i] ~ dnorm(x[i,]%*%beta,tau)
}
sigma <- pow(tau,-1)
}
jags.params <- c("beta","sigma")
jags.modelo <- jags(model.file=model.jags,parameters.to.save=jags.params,
data = list('n' = n,
'y' = y,
'x' = X,
'm'=m),
n.chains = 2,
n.thin=1,
DIC=FALSE,
n.burnin = 10000,
n.iter = 20000)
And R throws this error:
Error in jags.model(model.file, data = data, inits = init.values,
n.chains = n.chains, : RUNTIME ERROR: Compilation error on line 8.
Dimension mismatch in subset expression of y.
I don't know which is the error :/ Can someone help me, please.

Custom classification threshold for GBM

I'm trying to create a custom GBM model that tunes the classification threshold for a binary classification problem. There is a nice example provided on the caret website here, but when I try to apply something similar to GBM I receive the following error:
Error in { : task 1 failed - "argument 1 is not a vector"
Unfortunately, I have no idea where the error is and the error isn't very helpful.
Here's an example, with the code that I've used for defining the custom GBM
library(caret)
library(gbm)
library(pROC)
#### DEFINE A CUSTOM GBM MODEL FOR PROBABILITY THRESHOLD TUNING ####
## Get the model code for the original gbm method from caret
customGBM <- getModelInfo("gbm", regex = FALSE)[[1]]
customGBM$type <- c("Classification")
## Add the threshold (i.e. class cutoff) as another tuning parameter
customGBM$parameters <- data.frame(parameter = c("n.trees", "interaction.depth", "shrinkage",
"n.minobsinnode", "threshold"),
class = rep("numeric", 5),
label = c("# Boosting Iterations", "Max Tree Depth", "Shrinkage",
"Min. Terminal Node Size", "Probability Cutoff"))
## Customise the tuning grid:
## Some paramters are fixed. Will give a tuning grid of 2,500 values if len = 100
customGBM$grid <- function(x, y, len = NULL, search = "grid") {
if (search == "grid") {
grid <- expand.grid(n.trees = seq(50, 250, 50),
interaction.depth = 2, ### fix interaction depth at 2
shrinkage = 0.0001, ### fix learning rate at 0.0001
n.minobsinnode = seq(2, 10, 2),
threshold = seq(.01, .99, length = len))
} else {
grid <- expand.grid(n.trees = floor(runif(len, min = 1, max = 5000)),
interaction.depth = sample(1:10, replace = TRUE, size = len),
shrinkage = runif(len, min = .001, max = .6),
n.minobsinnode = sample(5:25, replace = TRUE, size = len),
threshold = runif(1, 0, size = len))
grid <- grid[!duplicated(grid),] ### remove any duplicated rows in the training grid
}
grid
}
## Here we fit a single gbm model and loop over the threshold values to get predictions from the
## same gbm model.
customGBM$loop = function(grid) {
library(plyr)
loop <- ddply(grid, c("n.trees", "shrinkage", "interaction.depth", "n.minobsinnode"),
function(x) c(threshold = max(x$threshold)))
submodels <- vector(mode = "list", length = nrow(loop))
for (i in seq(along = loop$threshold)) {
index <- which(grid$n.trees == loop$n.trees[i] &
grid$interaction.depth == loop$interaction.depth[i] &
grid$shrinkage == loop$shrinkage[i] &
grid$n.minobsinnode == loop$n.minobsinnode[i])
cuts <- grid[index, "threshold"]
submodels[[i]] <- data.frame(threshold = cuts[cuts != loop$threshold[i]])
}
list(loop = loop, submodels = submodels)
}
## Fit the model independent of the threshold parameter
customGBM$fit = function(x, y, wts, param, lev, last, classProbs, ...) {
theDots <- list(...)
if (any(names(theDots) == "distribution")) {
modDist <- theDots$distribution
theDots$distribution <- NULL
} else {
if (is.numeric(y)) {
stop("This works only for 2-class classification problems")
} else modDist <- if (length(lev) == 2) "bernoulli" else
stop("This works only for 2-class classification problems")
}
# if (length(levels(y)) != 2)
# stop("This works only for 2-class problems")
## check to see if weights were passed in (and availible)
if (!is.null(wts)) theDots$w <- wts
if (is.factor(y) && length(lev) == 2) y <- ifelse(y == lev[1], 1, 0)
modArgs <- list(x = x,
y = y,
interaction.depth = param$interaction.depth,
n.trees = param$n.trees,
shrinkage = param$shrinkage,
n.minobsinnode = param$n.minobsinnode,
distribution = modDist)
do.call("gbm.fit", modArgs)
}
## Now get a probability prediction and use different thresholds to
## get the predicted class
customGBM$predict = function(modelFit, newdata, submodels = NULL) {
out <- predict(modelFit, newdata, n.trees = modelFit$tuneValue$n.trees,
type = "response")#[, modelFit$obsLevels[1]]
out[is.nan(out)] <- NA
class1Prob <- ifelse(out >= modelFit$tuneValue$threshold,
modelFit$obsLevels[1],
modelFit$obsLevels[2])
## Raise the threshold for class #1 and a higher level of
## evidence is needed to call it class 1 so it should
## decrease sensitivity and increase specificity
out <- ifelse(class1Prob >= modelFit$tuneValue$threshold,
modelFit$obsLevels[1],
modelFit$obsLevels[2])
if (!is.null(submodels)) {
tmp2 <- out
out <- vector(mode = "list", length = length(submodels$threshold))
out[[1]] <- tmp2
for (i in seq(along = submodels$threshold)) {
out[[i + 1]] <- ifelse(class1Prob >= submodels$threshold[[i]],
modelFit$obsLevels[1],
modelFit$obsLevels[2])
}
}
out
}
## The probabilities are always the same but we have to create
## mulitple versions of the probs to evaluate the data across
## thresholds
customGBM$prob = function(modelFit, newdata, submodels = NULL) {
out <- predict(modelFit, newdata, type = "response",
n.trees = modelFit$tuneValue$n.trees)
out[is.nan(out)] <- NA
out <- cbind(out, 1 - out)
colnames(out) <- modelFit$obsLevels
if (!is.null(submodels)) {
tmp <- predict(modelFit, newdata, type = "response", n.trees = submodels$n.trees)
tmp <- as.list(as.data.frame(tmp))
lapply(tmp, function(x, lvl) {
x <- cbind(x, 1 - x)
colnames(x) <- lvl
x}, lvl = modelFit$obsLevels)
out <- c(list(out), tmp)
}
out
}
fourStats <- function (data, lev = levels(data$obs), model = NULL) {
## This code will get use the area under the ROC curve and the
## sensitivity and specificity values using the current candidate
## value of the probability threshold.
out <- c(twoClassSummary(data, lev = levels(data$obs), model = NULL))
## The best possible model has sensitivity of 1 and specificity of 1.
## How far are we from that value?
coords <- matrix(c(1, 1, out["Spec"], out["Sens"]),
ncol = 2,
byrow = TRUE)
colnames(coords) <- c("Spec", "Sens")
rownames(coords) <- c("Best", "Current")
c(out, Dist = dist(coords)[1])
}
And then some code showing how to use the custom model
set.seed(949)
trainingSet <- twoClassSim(500, -9)
mod1 <- train(Class ~ ., data = trainingSet,
method = customGBM, metric = "Dist",
maximize = FALSE, tuneLength = 10,
trControl = trainControl(method = "cv", number = 5,
classProbs = TRUE,
summaryFunction = fourStats))
The model appears to run, but finishes with the error from above. If someone could please help me with customising the GBM model to tune the GBM parameters, and the probability threshold for the classes that would be great.

R Crashes when training using caret and method = gamLoess

When I run the code below, R crashes. If I comment out the tuneGrid line in the call to train, there is no crash. I've tried this with another dataset, and still crash R. Crash message is
R Session Aborted
R encountered a fatal error
The session was terminated
Start new session.
The code is:
library(splines)
library(foreach)
library(gam)
library(lattice)
library(ggplot2)
library(caret)
# crashes when I uncomment the tuneGrid = tuneGrid line
Set_seed_seed <- 100
data_set <- diamonds[, c(1, 5, 6, 7, 8, 9, 10)]
data_set <- data_set[1:1000,]
formula <- price ~ carat + depth + table + x + y + z
training_control <- trainControl(method = "cv", allowParallel = FALSE)
tune_grid <- expand.grid(span = seq(0.1, 0.9, length = 9), degree = seq(1, 2, length = 2))
set.seed(Set_seed_seed)
GAM_model <- train(formula,
data = data_set,
method = "gamLoess",
tuneGrid = tune_grid,
trControl = training_control
)
This occurred in R3.2.1 and 3.2.2 using R Studio.
In R gui, also get crashes.
It is a bug in the gam package. I alerted Trevor Hastie on March 3, 2014 about it:
library(gam)
set.seed(1)
x <- rnorm(1000)
y <- x^2+0.1*rnorm(1000)
tdat <- data.frame(y = y, x = x)
m1 <- gam(y ~ lo(x, span = .5, degree = 2), data = tdat)
That works fine but as I fit multiple models a seg fault occurs (but only
with loess and degree = 2).
This will produce it for me:
for(i in 1:10) m1 <- gam(y ~ lo(x, span = .5, degree = 2), data = tdat)
I verified that the problem exists. I debugged the program and found that the program gets stuck as shown. This is a bug with the foreach package
train(formula, data=data_set, ...)
useMethod("train") # train(); namespace:caret
train(x, y, weight = w, ...) train.formula(); # namespace:caret
useMethod("train") # train(); namespace:caret
nominalTrainWorkflow(x = x, ...) # train.default(); namespace:caret
result <- foreach(iter = , ...) # nominalTrainWorkflow(); namespace:caret
e <- getDoSeq() # %op%; namespace:foreach
list(fun = doSeq, data=NULL) # getDoSeq(); namespace:foreach
e$fun(obj, substitute(ex), parent.frame(), e$data) # %op%; namespace:foreach
tryCatch(accumulator(list(r), i) # e$fun; namespace:foreach

Resources