Write results from stepAIC to a table - r

I am trying to write r2, rmse, coefficients, and standardized coefficients from stepAIC to a .CSV file:
NO3_lmres_ClimateOnly <- data.frame()
for (i in unique(Data$SeasAlltxt)){
print (i)
subdata1 <- subset(Data, SeasAlltxt == i)
for (j in unique(Data$ALSCIDtxtall)){
subdata2 <- subset(subdata1, ALSCIDtxtall == j)
fit <- lm(NO3resid~Avg94NO3+MaxDepth_m+MaxDepthDOY+FirstZeroDOY+PeakToGone+PRISMppt+PRISMtmax, data = subdata2, na.action = na.omit)
step <- stepAIC(fit, direction="both")
rmse <- round(sqrt(mean(resid(step)^2)), 3)
r2 <- round(summary(step)$r.squared, 3)
coefs <- summary(step)$coefficients
stdcoefs <- lm.beta(step)
stdcoefs <- unname(stdcoefs)
params <- names(stdcoefs)
tempvalues <- data.frame(i,j,rmse,r2,coefs,stdcoefs,params)
colnames(tempvalues) <- c('SeasAlltxt', 'ALSCIDtxtall', 'rmse', 'r2', 'coef', 'stdcoef','param')
NO3_lmres_ClimateOnly <- rbind(NO3_lmres_ClimateOnly,tempvalues)
}
}
write.csv(NO3_lmres_ClimateOnly, file = "NO3_ClimateOnly_stats.csv")
However, the above code produces this error:
Error in data.frame(i, j, rmse, r2, coefs, stdcoefs, params) :
arguments imply differing number of rows: 1, 3, 2, 0
I would also like to write the p-value associated with each parameter to the output table.
Any suggestions for how to accomplish this?

Maybe you want to change your code:
for (j in unique(subdata1$ALSCIDtxtall))
...
coefs <- summary(step)$coefficients[,1]
...
tempvalues <- data.frame(t(c(i,j,rmse,r2,coefs,stdcoefs,params)),stringsAsFactors=F)
colnames(tempvalues ) <- c('SeasAlltxt', 'ALSCIDtxtall', 'rmse', 'r2', names(coefs), paste('stdcoef:',params),params)
Bud the final rbind will give you an error when stepAIC select different number of coefficients.
Think of using a list instead:
Define cont=1 outside the for
then, change the following lines:
tempvalues <- data.frame(t(c(i,j,rmse,r2,coefs,stdcoefs,params)),stringsAsFactors=F)
colnames(tempvalues ) <- c('SeasAlltxt', 'ALSCIDtxtall', 'rmse', 'r2', names(coefs), paste('stdcoef:',params),params)
NO3_lmres_ClimateOnly[[cont]] <- tempvalues
cont=cont+1
Good luck!!

Related

parallelizing lapply with parLapply does not recognize objects even though I suppied them

I am trying to parallelize something with parLapply. I am exporting all necessary information to the cores, but somehow I am getting an error saying that it cannot find the object 'market_time' (first line of the function that is called in parLapply. However, this object is just a column of the data table 'dt' that I export to the cores.
library('data.table')
library('numDeriv')
library('snow')
cores=detectCores()
cl <- makeCluster(cores[1], type = 'PSOCK')
markets <- unique(dt[, market_time])
R = 10000
nu_p <- rnorm(n = R, -2, 0.5)
nu_xr <- rnorm(n = R, 2, 0.5)
nu_xm <- rnorm(n = R, 2, 0.5)
nu_xj <- rnorm(n = R, 2, 0.5)
clusterExport(cl,c('dt','nu_p','nu_xr','nu_xm','nu_xj')
temp <- parLapply(cl, markets,calc_mc_w, dt=dt,nu_p=nu_p,nu_xr= nu_xr,
nu_xm=nu_xm,nu_xj=nu_xj)
where the function calc_mc_w calls:
calc_mc_w <- function(m, dt,nu_p,nu_xr,nu_xm,nu_xj){
dt_mkt = dt[market_time==m,]
market_time <- dt_mkt[, market_time]
x_m <- dt_mkt[, x_m]
x_j <- dt_mkt[, x_j]
x_r <- dt_mkt[, x_r]
p <- as.matrix(dt_mkt[, p])
xi <- dt_mkt[, xi]
p <- as.matrix(dt_mkt[, p])
jacobian <- jacobian(function(x){calc_shares(x, x_m, x_j, x_r, xi, nu_p,
nu_xm, nu_xj, nu_xr,
market_time)},p)
output <- dt_mkt[,c('prod','market','time','retailer')]
#Get a system of equations with as many equations as unknowns
retailers = unique(dt_mkt[, retailer])
temp <- lapply(retailers,calc_mc_w_r,dt_mkt = dt_mkt, jacobian = jacobian)
temp <- rbindlist(temp)
output <- merge(output,temp,by.x = c('prod','retailer'),
by.y = c('prod','retailer'), allow.cartesian=TRUE)
output
}
calc_mc_w_r <- function(r, dt_mkt, jacobian){
dt_r = dt_mkt[retailer == r,]
result <- dt_r[,c('prod','retailer')]
rows = (dt_mkt[,'retailer']== r)
jacobian_r = jacobian[rows,rows]
result <- result[,mc_w := solve(jacobian_r, dt_r[,shares]+ jacobian_r %*% dt_r[,p])]
result
}
The error I get is:
Error in checkForRemoteErrors(val) :
2 nodes produced errors; first error: object 'market_time' not found
If instead, I do not export the data table dt, but instead each column of it, I get the same error but just for 'jacobian' which is something that I calculate in the function (I do not want to calculate it across the whole dataset as it is super costly, which is why I just want to calculate it on each subset).

neuralnet in R always produces the same result

I created a simple neural network. The issue is whatever I done for my network it classifies things perfectly.Always get accuracy, sensitivity, specificity as 1.
I need to know that I have made something wrong
following is my code.
#maxs <- apply(new_columns, 2, max)
#mins <- apply(new_columns, 2, min)
#scaled <- as.data.frame(scale(new_columns, center = mins, scale = maxs - mins))
#train_ <- scaled[index,]
#test_ <- scaled[-index,]
# Notice the coercion happens before doing anything else.
#.new_columns <- new_columns %>% mutate(yyes = as.factor(yyes))
set.seed(3033)
intrain <- createDataPartition(y = new_columns$yyes, p= 0.7, list = FALSE)
train_ <- new_columns[intrain,]
test_ <- new_columns[-intrain,]
n <- names(train_)
f <- as.formula(paste("yyes ~", paste(n[!n %in% "yyes"], collapse = " + ")))
nn <- neuralnet(f,data=train_,hidden=c(3,2),algorithm = "rprop+",threshold=0.1,stepmax = 1e+06)
pr.nn <- compute(nn,test_)
pr.nn_ <- pr.nn$net.result*(max(new_columns$yyes)-min(new_columns$yyes))+min(new_columns$yyes)
test.r <- (test_$yyes)*(max(new_columns$yyes)-min(new_columns$yyes))+min(new_columns$yyes)
test_pred_binary_neu <- +(pr.nn_ >= 0.5)
MSE.nn <- sum((test.r - pr.nn_)^2)/nrow(test_)
caret::confusionMatrix(factor(test_pred_binary_neu),factor(test.r))
summary(new_columns)
xtab_neu <- table(test_pred_binary_neu, test.r)
confusionMatrix(xtab_neu, positive = "1")
Kindly help to to sort this.
following are my statistic results

kmeans results as initial parameters in Mclust

I am doing Gaussian mixture models. I have done kmeans on the dataset and I want to use the means, variances and the size for the initial parameters for the em algorithm in R. I found that the parameters is a list of 3 and I tried to do the same thing but it gives me the following error :
Error in array(x, c(length(x), 1L), if (!is.null(names(x))) list(names(x), :
'data' must be of a vector type, was 'NULL'
My code
l <- kmeans(iris[,-5],centers=3)
pi <- l$size/length(iris[,1])
my <- t(l$centers)
sig <- vector("list", 3)
new <- as.data.frame(cbind(iris[,-5],l$cluster))
for (i in 1:3) {
subdata<-subset(new[,1:4],new[,5]==i);
sig[[i]]<-cov(subdata)
}
par <- vector("list",3)
par[[1]] <- pi; par[[2]] <- my; par[[3]] <- sig
kk <- em(modelName = msEst$modelName, data = iris[,-5],parameters = par)
Can someone please tell how should I assign the kmeans results as initial parameters?
Following is a quick example of what you seem to be after. The main thing you have to do is the get the parameters argument in the correct form. The tickly bit is with the variance list. There is a bit of help with this if you use the mclustVariance function.
library(mclust)
g <- 3
dat <- iris[, -5]
p <- ncol(dat)
n <- nrow(dat)
k_fit <- kmeans(dat, centers=g)
par <- vector("list", g)
par$pro <- k_fit$size/n
par$mean <- t(k_fit$centers)
sigma <- array(NA, c(p, p, g))
new <- as.data.frame(cbind(dat, k_fit$cluster))
for (i in 1 : g) {
subdata <- subset(new[, 1 : p], new[, (p+1)]==i)
sigma[,, i] <- cov(subdata)
}
variance <- mclustVariance("EEE", d = p, G = g)
par$variance <- variance
par$variance$sigma <- sigma
kk <- em(modelName = "EEE", data = dat, parameters = par)

Rolling Window Regression - Error Calculation

I have a data set and want to essentially fit a linear model with a rolling time window, find the fitted values and calculate the errors in the estimate. I have functions which calculate the error and I have the start of the algorithm, but I keep getting null time series with the algorithm below. Can anybody spot a fix for it?
rollerOLS <- function(data, measure, predict, predictor){
error <- c()
m <- dim(data)[1]
for(i in 1:(floor(m/142)-10)){
data.new <- as.data.frame(data[c((1+(142*(i-1))):((i+9)*142)),])
data.pred <- as.data.frame(data[c((1+(142*(i+9))):((i+10)*142)-1),])
n <- dim(data.new)[1]
k <- dim(data.pred)[1]
x <- data.new[-1,predictor]
y <- data.new[-n, predict]
mod <- lm(y ~ x)
ts <- predict.lm(mod, newdata = data.frame(data.pred[, predictor]), interval="none")
actual <- data.pred[-k,predict]
error[i] <- measure(ts, actual)
}
return(mod)
}
Note that 142 is specific to my data set.
The problem was in the ts line and here is the fix.
rollerOLS <- function(data, measure, predict, predictor){
error <- c()
m <- dim(data)[1]
for(i in 1:(floor(m/142)-10)){
data.new <- as.data.frame(data[c((1+(142*(i-1))):((i+9)*142)),])
data.pred <- as.data.frame(data[c((1+(142*(i+9))):((i+10)*142)-1),])
n <- dim(data.new)[1]
k <- dim(data.pred)[1]
x <- data.new[-1,predictor]
y <- data.new[-n, predict]
mod <- lm(y ~ x)
ts <- mod$coefficients[1] + mod$coefficients[2]*data.pred[-1,predictor]
actual <- data.pred[-k,predict]
error[i] <- measure(ts, actual)
}
return(error)
}

Viewing AIC for many variables with proper header

I'm pretty new in R and i'm stuck with one problem.
I've already found how to create many linear models at once, i made a function that counts AIC for each lm, but I cannot display this function with header that will show the name of the lm. I mean i want to get a data frame with header e.g. lm(a~b+c, data=data), and the AIC result for this lm.
Here's what i already wrote (with big help from stackoverflow, of course)
vars <- c("azot_stand", "przeplyw", "pH", "twardosc", "fosf_stand", "jon_stand", "tlen_stand", "BZO_stand", "spadek_stand")
N <- list(1,2,3,4,5,6,7,8)
COMB <- sapply(N, function(m) combn(x=vars[1:8], m))
COMB2 <- list()
k=0
for(i in seq(COMB)){
tmp <- COMB[[i]]
for(j in seq(ncol(tmp))){
k <- k + 1
COMB2[[k]] <- formula(paste("azot_stand", "~", paste(tmp[,j], collapse=" + ")))
}
}
res <- vector(mode="list", length(COMB2))
for(i in seq(COMB2)){
res[[i]] <- lm(COMB2[[i]], data=s)
}
aic <- vector(mode="list", length(COMB2))
d=0
for(i in seq(res)){
aic[[i]] <- AIC(res[[i]])
}
View(aic)
show(COMB2)
I guess that i miss something in the aic, but don't know what...
With formula you can obtain the formula of a regression model. Since you want to store the formula with the AIC, I would create a data.frame containing both:
aic <- data.frame(model = character(length(res)), aic = numeric(length(res)),
stringsAsFactors = FALSE)
for(i in seq(res)){
aic$model[i] <- deparse(formula(res[[i]]), width.cutoff = 500)
aic$aic[i] <- AIC(res[[i]])
}
Normally you would use format to convert a formula to a character. However, for long formulas this results in multiple lines. Therefore, I use deparse (which is also used by format) and passed it the width.cutoff argument.
You cannot use res[[i]]$call as this is always equal to lm(formula = COMB2[[i]], data = s).
Other suggestions
The first part of your code can be simplified. I would write something like:
s <- attitude
vars <- names(attitude)[-1]
yvar <- names(attitude)[1]
models <- character(0)
for (i in seq_along(vars)) {
comb <- combn(vars, i)
models <- c(models,
paste(yvar, " ~ ", apply(comb, 2, paste, collapse=" + ")))
}
res <- lapply(models, function(m) lm(as.formula(m), data = s))
It is shorter and also has the advantage that magical constants such as the 8 and azot_stand are defined outside the main code and can easily be modified.
I also noticed that you use azot_stand both as target variable and predictor (it is also part of vars). I don't think you will want to do that.

Resources