plm cross-validation runtime-error - r

I am trying to cross-validate a panel data analysis, but I am having trouble with the prediction function. The code below is a section of the CV function where the problem occurs. When I try to run this part of the function I get the error message tagged on to the end of the code description.
compare <- data.frame()
train.model <- list()
for (f in 1:numoffolds)
{
train.model[[f]] <- plm(logit(II1)~.,data=select(filter(train, cv != f),-Incomegroup, -cv),
model="within", effect="twoways", index=c("Year", "Country") )
II1.p <- predict(train.model[[f]], newdata=select(filter(train, cv == f),-Country, -Year, -cv, -Incomegroup), type="response")
II1 <- filter(train, cv == f)$II1
compare <- rbind(compare, data.frame(II1.p = II1.p, II1 = II1))
}
Error in crossprod(beta, t(X)) : non-conformable arguments
Called from: crossprod(beta, t(X))
I've had a look at the prediction function and the X and beta terms are of conflicting dimensions. Does anyone have a suggesting to what I can do to overcome this problem?
function (object, newdata = NULL, ...)
{
tt <- terms(object)
if (is.null(newdata)) {
result <- fitted(object, ...)
}
else {
Terms <- delete.response(tt)
m <- model.frame(Terms, newdata)
X <- model.matrix(Terms, m)
beta <- coef(object)
result <- as.numeric(crossprod(beta, t(X)))
}
result
I've been thinking about resorting to other methods such as tree based regression, but I wanted to use plm to make a specific point. Any helping comments and/or codes would be greatly appreciated.

Related

Question regarding k fold cross validation for KNN using R

I am trying to fit 5 fold cross validation for several values of k. I used the OJ data set in ISLR package.
my code so far as follows,
library(ISLR)
library(class)
ks=c(1:5)
err.rate.test <- numeric(length = 5)
folds <- cut(seq(1,nrow(OJ)),breaks=5,labels=FALSE)
for (j in seq(along = ks)) {
set.seed(123)
cv.knn <- sapply(1:5, FUN = function(i) {
testID <- which(folds == i, arr.ind = TRUE)
test.X <- OJ[testID, 3]
test.Y <- OJ[testID, 1]
train.X <- OJ[-testID, 3]
train.Y <- OJ[-testID, 1]
knn.test <- knn(data.frame(train.X), data.frame(test.X), train.Y, k = ks[j])
cv.test.est <- mean(knn.test != test.Y)
return(cv.test.est)
})
err.rate.test[j] <- mean(cv.knn)
}
err.rate.test
[1] 0.3757009 0.3757009 0.3757009 0.3757009 0.3757009
The code doesn't give any errors. But for some reason , my test error rate for each value of k is same.This seems to be weird for me. So i assume there is something wrong with my code.
Can anyone help me to figure that out ?
Thank you
remove set.seed(123), this causes the repeat error rates.
set.seed is used for reproducibility, ensuring that any random grid searches or parameter estimates remain constant, meaning all of the parameter estimates that go into fitting the knn model will be the same across executions, resulting in the same predictions and therefore the same error rates.

R issue : performing lm and then a boxcox to find a proper lambda value

I have a dataset of climate data in a data.frame (columns are measuring stations, and rows indicate time of measurement), and I'm trying to find the proper lambda values in a Yeo-Johnson transform to limit skewness impact on a principal component analysis.
Obviously, the first step is to get log likelihoods to find the best lambda : I use the following, where i is the index of a column :
getYeoJohsnonLambda <- function(myClimateData,cols,lambda_min, lambda_max,eps)
...
lambda <- seq(lambda_min,lambda_max,eps)
for(i in cols)
{
formula <- as.formula(paste("myClimateData$",colnames(myClimateData)[i],"~1"))
currentModel <- lm(formula,myClimateData)
print(currentModel)
myboxCox <- boxCox(currentModel, lambda = lambda ,family="yjPower", plotit = FALSE)
...
}
When I am trying to call it for a climateData time series which could be, for example :
`climateData <-data.frame(c(8.2,6.83,5.46,4.1,3.73,3.36,3,3,3,3,3.7),c(0,0.66,1.33,2,2,2,2,2,2,2,1.6))`
I get this error : Error in is.data.frame(data) : object 'myClimateData' not found
This is weird, as lm seems to find it and return a correct fit, and myClimateData should be found as it is one of the arguments of the function, right ?
Sadly, it seems that the problem comes from the function boxCox rather than your getYeoJohsnonLambda function. As BrodieG pointed out in a related question, this function uses parent.frame as an argument to eval which is considered as bad practice in the doc.
One way to solve this is to build the models before the call, as suggested in Adam Quek's answer:
library(car)
climateData <- data.frame(c(8.2,6.83,5.46,4.1,3.73,3.36,3,3,3,3,3.7),c(0,0.66,1.33,2,2,2,2,2,2,2,1.6))
names(climateData) <- c("a","b")
modelList <- list()
for(k in 1:ncol(climateData)) {
modelList[[k]] <- lm(as.formula(paste0(names(climateData)[k],"~1")),data=climateData)
}
getYeoJohnsonLambda <- function(myClimateData, cols, lambda_min, lambda_max, eps)
{
#Recommended values for lambda_min = -0.5 and lambda_max = 2.0, eps = 0.1
myboxCox <- list()
lmd <- seq(lambda_min,lambda_max,eps)
for(i in cols)
{
cat("Creating model for column # ",i,"\n")
currentModel <- modelList[[i]]
myboxCox[[i]] <- boxCox(currentModel, lambda = lmd ,family="yjPower", plotit = FALSE)
}
return(myboxCox)
}
test <- getYeoJohnsonLambda(climateData,c(1,2) ,-0.5,2,0.1)
Other solution (arguably cleaner): use yeo.johnson in VGAM
library(VGAM)
getYeoJohnsonLambda_VGAM <- function(myClimateData, cols, lambda_min, lambda_max, eps)
{
#Recommended values for lambda_min = -0.5 and lambda_max = 2.0, eps = 0.1
myboxCox <- list()
lmd <- seq(lambda_min,lambda_max,eps)
return(apply(climateData,2,yeo.johnson,lambda=lmd))
}
test2 <- getYeoJohnsonLambda_VGAM(climateData,c(1,2) ,-0.5,2,0.1)
Here's a solution without troubleshooting the function getYeoJohsnonLambda:
iris.dat <- iris[-5]
vars <- names(iris.dat)
lmd <- seq(.1, 1, .1) #lambda_min, lambda_max, eps
all.form <- lapply(vars, function(x) as.formula(paste0(x, "~ 1")))
all.lm <- lapply(all.form, lm, data=iris.dat)
library(MASS)
all.bcox <- lapply(all.form, boxcox, data=iris.dat,
lambda=lmd, family="yjPower", plotit=FALSE)

compute function in neuralnet R package not working when reproduced

I trained a model in neuralnet and I am trying to figure out how to compute results in Excel. Using the compute function that you call from the package everything works fine. But I went into the source code using F2 in Rstudio and the github page and the function is not working and stops on the relist() function and gives the error: Error in relist(weights, nrow.weights, ncol.weights) :
unused argument (ncol.weights)
I think the problem is the relist() function but I do not know how to transform the weights without it. And the neuralnet package does not come with its own version of relist(). If you ignore the relist line you get the following error: Error in neurons[[i]] %*% weights[[i]] : non-conformable arguments because the weights wasn't transformed correctly. I tried the same thing on my own data and got the same error.
library(neuralnet)
normalize <-function(x) {
return((x - min(x))/(max(x) - min(x)))
}
newdf <- Cars93
newdf = na.omit(newdf)
newdf <- newdf[complete.cases(newdf),]
newdf$Cylinders <- as.numeric(levels(newdf$Cylinders))[newdf$Cylinders]
newdf$Horsepower <- normalize(newdf$Horsepower)
newdf$EngineSize <- normalize(newdf$EngineSize)
newdf$Cylinders <- normalize(newdf$Cylinders)
smp_size <- floor(0.75 * nrow(newdf))
set.seed(12)
train_ind <- sample(seq_len(nrow(newdf)), size = smp_size)
train <- newdf[train_ind, ]
test <- newdf[-train_ind, ]
carsNN <- neuralnet(Horsepower ~ Cylinders+EngineSize,
data = train,hidden = c(1))
cars_results = compute(carsNN,test[11:12])
#this is the source code using F2 in RStudio and on github
sourceCodeCompute = function (x, covariate, rep = 1)
{
nn <- x
linear.output <- nn$linear.output
weights <- nn$weights[[rep]]
nrow.weights <- sapply(weights, nrow)
ncol.weights <- sapply(weights, ncol)
weights <- unlist(weights)
if (any(is.na(weights)))
weights[is.na(weights)] <- 0
weights <- relist(weights, nrow.weights, ncol.weights)
length.weights <- length(weights)
covariate <- as.matrix(cbind(1, covariate))
act.fct <- nn$act.fct
neurons <- list(covariate)
if (length.weights > 1)
for (i in 1:(length.weights - 1)) {
temp <- neurons[[i]] %*% weights[[i]]
act.temp <- act.fct(temp)
neurons[[i + 1]] <- cbind(1, act.temp)
}
temp <- neurons[[length.weights]] %*% weights[[length.weights]]
if (linear.output)
net.result <- temp
else net.result <- act.fct(temp)
list(neurons = neurons, net.result = net.result)
}
sourceCodeCompute(carsNN,test[11:12])
You use the wrong relist function. Try explicitly calling neuralnet:::relist, which is the (unexported) function used automatically within the package namespace.
(I don't know how this question relates to Excel.)

Updated: Parallel computing using R result in "attempt to replicate an object of type 'closure'"

I have set up a Metropolis-Hastings algorithm, and now I am trying to run the algorithm using parallel computing. I have set up a single-chain function
library(parallel)
library(foreach)
library(mvtnorm)
library(doParallel)
n<-100
mX <- 1:n
vY <- rnorm(n)
chains <- 4
iter <- n
p <- 2
#Loglikelihood
post <- function(y, theta) dmvnorm(t(y), rep(0,length(y)), theta[1]*exp(- abs(matrix(rep(mX,n),n) - matrix(rep(mX,each=n),n))/theta[2]),log=TRUE)
geninits <- function() list(theta = runif(p, 0, 1))
dist <- 0.01
jump <- function(x, dist) exp(log(x) + rmvnorm(1,rep(0,p),diag(rep(dist,p))))
MCsingle <- function(){ # This is part of a larger function, so no input are needed
inits <- geninits()
theta.post <- matrix(NA,nrow=p,ncol=iter)
for (i in 1:p) theta.post[i,1] <- inits$theta[i]
for (t in 2:iter){
theta_star <- c(jump(theta.post[, t-1],dist))
pstar <- post(vY, theta = theta_star) # post is the loglikelihood using dmvnorm.
pprev <- post(vY, theta = theta.post[,t-1])
r <- min(exp(pstar - pprev) , 1)
accept <- rbinom(1, 1, prob = r)
if (accept == 1){
theta.post[, t] <- theta_star
} else {
theta.post[, t] <- theta.post[, t-1]
}
}
return(theta.post)
}
, which returns an p x iter matrix, with p parameters and iter iterations.
cl<-makeCluster(4)
registerDoParallel(cl)
posterior <- foreach(c = 1:chains) %dopar% {
MCsingle() }
UPDATE: When I tried to simplify the problem the code suddenly seemed to work. Even though I purposely tried to make errors, the code ran perfectly and the results were as wanted. So for others with similar problems unfortunately I cannot give an answer.
A follow-up question:
My initial purpose was to built up an entire function, such that
MCmulti <- function(mX,vY,iter,chains){
posterior <- foreach(c = 1:chains) %dopar% {
MCsingle() }
return(posterior)
}
but the foreach-loop does not seem to read all the required functions like:
Error in FUN() : task 1 failed - "could not find function "geninits""
Can anybody answer how to implement custom functions inside a foreach loop? Am I to input it as MCmulti <- function(FUN,...) FUN() and call MCmulti(MCsingle,...) ?

How does the function multinom from R package nnet compute the multinomial probability weights?

I know the theoretical answer to the question of my title, which is discussed here or in this previous question on Stack Overflow. My problem is that, even considering some numerical roundings, the probability weights I compute using the coefficients fitted in the R function multinom are quite different from the weights directly obtained from the same function (through predict(fit, newdata = dat, "probs")). I tried to numerically compute these weights in Java and R, and in both implementations I obtain the same results, which are in fact different from the values returned from predict.
Do you know how I may discover the implementation of the function predict(..., "probs") for the R function multinom?
I first install nnet and open the help page for nnet function. I see that the function creates a nnet object.
I trypredict.nnet but nothing comes up. This means either the package is not loaded, the function doesn't exist or it's hidden. methods("predict") reveals that the object is actually hidden (indicated by the *).
> methods("predict")
[1] predict.ar* predict.Arima* predict.arima0* predict.glm
[5] predict.HoltWinters* predict.lm predict.loess* predict.mlm
[9] predict.multinom* predict.nls* predict.nnet* predict.poly
[13] predict.ppr* predict.prcomp* predict.princomp* predict.smooth.spline*
[17] predict.smooth.spline.fit* predict.StructTS*
Calling this function explicitly reveals its code.
> nnet:::predict.nnet
function (object, newdata, type = c("raw", "class"), ...)
{
if (!inherits(object, "nnet"))
stop("object not of class \"nnet\"")
type <- match.arg(type)
if (missing(newdata))
z <- fitted(object)
else {
if (inherits(object, "nnet.formula")) {
newdata <- as.data.frame(newdata)
rn <- row.names(newdata)
Terms <- delete.response(object$terms)
m <- model.frame(Terms, newdata, na.action = na.omit,
xlev = object$xlevels)
if (!is.null(cl <- attr(Terms, "dataClasses")))
.checkMFClasses(cl, m)
keep <- match(row.names(m), rn)
x <- model.matrix(Terms, m, contrasts = object$contrasts)
xint <- match("(Intercept)", colnames(x), nomatch = 0L)
if (xint > 0L)
x <- x[, -xint, drop = FALSE]
}
else {
if (is.null(dim(newdata)))
dim(newdata) <- c(1L, length(newdata))
x <- as.matrix(newdata)
if (any(is.na(x)))
stop("missing values in 'x'")
keep <- 1L:nrow(x)
rn <- rownames(x)
}
ntr <- nrow(x)
nout <- object$n[3L]
.C(VR_set_net, as.integer(object$n), as.integer(object$nconn),
as.integer(object$conn), rep(0, length(object$wts)),
as.integer(object$nsunits), as.integer(0L), as.integer(object$softmax),
as.integer(object$censored))
z <- matrix(NA, nrow(newdata), nout, dimnames = list(rn,
dimnames(object$fitted.values)[[2L]]))
z[keep, ] <- matrix(.C(VR_nntest, as.integer(ntr), as.double(x),
tclass = double(ntr * nout), as.double(object$wts))$tclass,
ntr, nout)
.C(VR_unset_net)
}
switch(type, raw = z, class = {
if (is.null(object$lev)) stop("inappropriate fit for class")
if (ncol(z) > 1L) object$lev[max.col(z)] else object$lev[1L +
(z > 0.5)]
})
}
<bytecode: 0x0000000009305fd8>
<environment: namespace:nnet>

Resources