Related
I am staring work with snowfall package in that way:
library(snowfall)
sfInit(parallel=TRUE, cpus=6, type="SOCK")
#loading packages
sfLibrary(package = lars)
sfLibrary(package=covTest)
Function that I want to compute multiple times using sfLapply:
funkcja <- function(i,k=5)
{
beta <- c(k,k,0,k,k,rep(0,35))
X <- matrix(rnorm(100*40),100,40)
Y <- X%*%beta+rnorm(100)
lasso.lars <- lars(X,Y,intercept=FALSE,use.Gram=FALSE)
test <- covTest(lasso.lars,X,Y,sigma.est=1)
test
}
But when I try this
sfLapply(1:100,funkcja)
I get error:
"Error in checkForRemoteErrors(val): 6 nodes produced errors; first error: object 'Y' not found". But when I hide the last but one line and change test for lasso.lars then there is no longer trobule about vector Y:
funkcja <- function(i,k=5)
{
beta <- c(k,k,0,k,k,rep(0,35))
X <- matrix(rnorm(100*40),100,40)
Y <- X%*%beta+rnorm(100)
lasso.lars <- lars(X,Y,intercept=FALSE,use.Gram=FALSE)
#test <- covTest(lasso.lars,X,Y,sigma.est=1)
lasso.lars
}
I dont understand this because the line
test <- covTest(lasso.lars,X,Y,sigma.est=1)
should work since
lars(X,Y,intercept=FALSE,use.Gram=FALSE)
can work. I will be grateful for your help.
My guess is that Y is hiding and internal variable. The following function works (changed the case of "Y" to "y")
funkcja <- function(i,k=5)
{
beta <- c(k,k,0,k,k,rep(0,35))
X <- matrix(rnorm(100*40),100,40)
y <- X %*% beta + rnorm(100)
lasso.lars <- lars(X,y,intercept=FALSE,use.Gram=FALSE)
test <- covTest(lasso.lars,X,y,sigma.est=1)
test
}
I have a dataset of climate data in a data.frame (columns are measuring stations, and rows indicate time of measurement), and I'm trying to find the proper lambda values in a Yeo-Johnson transform to limit skewness impact on a principal component analysis.
Obviously, the first step is to get log likelihoods to find the best lambda : I use the following, where i is the index of a column :
getYeoJohsnonLambda <- function(myClimateData,cols,lambda_min, lambda_max,eps)
...
lambda <- seq(lambda_min,lambda_max,eps)
for(i in cols)
{
formula <- as.formula(paste("myClimateData$",colnames(myClimateData)[i],"~1"))
currentModel <- lm(formula,myClimateData)
print(currentModel)
myboxCox <- boxCox(currentModel, lambda = lambda ,family="yjPower", plotit = FALSE)
...
}
When I am trying to call it for a climateData time series which could be, for example :
`climateData <-data.frame(c(8.2,6.83,5.46,4.1,3.73,3.36,3,3,3,3,3.7),c(0,0.66,1.33,2,2,2,2,2,2,2,1.6))`
I get this error : Error in is.data.frame(data) : object 'myClimateData' not found
This is weird, as lm seems to find it and return a correct fit, and myClimateData should be found as it is one of the arguments of the function, right ?
Sadly, it seems that the problem comes from the function boxCox rather than your getYeoJohsnonLambda function. As BrodieG pointed out in a related question, this function uses parent.frame as an argument to eval which is considered as bad practice in the doc.
One way to solve this is to build the models before the call, as suggested in Adam Quek's answer:
library(car)
climateData <- data.frame(c(8.2,6.83,5.46,4.1,3.73,3.36,3,3,3,3,3.7),c(0,0.66,1.33,2,2,2,2,2,2,2,1.6))
names(climateData) <- c("a","b")
modelList <- list()
for(k in 1:ncol(climateData)) {
modelList[[k]] <- lm(as.formula(paste0(names(climateData)[k],"~1")),data=climateData)
}
getYeoJohnsonLambda <- function(myClimateData, cols, lambda_min, lambda_max, eps)
{
#Recommended values for lambda_min = -0.5 and lambda_max = 2.0, eps = 0.1
myboxCox <- list()
lmd <- seq(lambda_min,lambda_max,eps)
for(i in cols)
{
cat("Creating model for column # ",i,"\n")
currentModel <- modelList[[i]]
myboxCox[[i]] <- boxCox(currentModel, lambda = lmd ,family="yjPower", plotit = FALSE)
}
return(myboxCox)
}
test <- getYeoJohnsonLambda(climateData,c(1,2) ,-0.5,2,0.1)
Other solution (arguably cleaner): use yeo.johnson in VGAM
library(VGAM)
getYeoJohnsonLambda_VGAM <- function(myClimateData, cols, lambda_min, lambda_max, eps)
{
#Recommended values for lambda_min = -0.5 and lambda_max = 2.0, eps = 0.1
myboxCox <- list()
lmd <- seq(lambda_min,lambda_max,eps)
return(apply(climateData,2,yeo.johnson,lambda=lmd))
}
test2 <- getYeoJohnsonLambda_VGAM(climateData,c(1,2) ,-0.5,2,0.1)
Here's a solution without troubleshooting the function getYeoJohsnonLambda:
iris.dat <- iris[-5]
vars <- names(iris.dat)
lmd <- seq(.1, 1, .1) #lambda_min, lambda_max, eps
all.form <- lapply(vars, function(x) as.formula(paste0(x, "~ 1")))
all.lm <- lapply(all.form, lm, data=iris.dat)
library(MASS)
all.bcox <- lapply(all.form, boxcox, data=iris.dat,
lambda=lmd, family="yjPower", plotit=FALSE)
I am trying to cross-validate a panel data analysis, but I am having trouble with the prediction function. The code below is a section of the CV function where the problem occurs. When I try to run this part of the function I get the error message tagged on to the end of the code description.
compare <- data.frame()
train.model <- list()
for (f in 1:numoffolds)
{
train.model[[f]] <- plm(logit(II1)~.,data=select(filter(train, cv != f),-Incomegroup, -cv),
model="within", effect="twoways", index=c("Year", "Country") )
II1.p <- predict(train.model[[f]], newdata=select(filter(train, cv == f),-Country, -Year, -cv, -Incomegroup), type="response")
II1 <- filter(train, cv == f)$II1
compare <- rbind(compare, data.frame(II1.p = II1.p, II1 = II1))
}
Error in crossprod(beta, t(X)) : non-conformable arguments
Called from: crossprod(beta, t(X))
I've had a look at the prediction function and the X and beta terms are of conflicting dimensions. Does anyone have a suggesting to what I can do to overcome this problem?
function (object, newdata = NULL, ...)
{
tt <- terms(object)
if (is.null(newdata)) {
result <- fitted(object, ...)
}
else {
Terms <- delete.response(tt)
m <- model.frame(Terms, newdata)
X <- model.matrix(Terms, m)
beta <- coef(object)
result <- as.numeric(crossprod(beta, t(X)))
}
result
I've been thinking about resorting to other methods such as tree based regression, but I wanted to use plm to make a specific point. Any helping comments and/or codes would be greatly appreciated.
I have set up a Metropolis-Hastings algorithm, and now I am trying to run the algorithm using parallel computing. I have set up a single-chain function
library(parallel)
library(foreach)
library(mvtnorm)
library(doParallel)
n<-100
mX <- 1:n
vY <- rnorm(n)
chains <- 4
iter <- n
p <- 2
#Loglikelihood
post <- function(y, theta) dmvnorm(t(y), rep(0,length(y)), theta[1]*exp(- abs(matrix(rep(mX,n),n) - matrix(rep(mX,each=n),n))/theta[2]),log=TRUE)
geninits <- function() list(theta = runif(p, 0, 1))
dist <- 0.01
jump <- function(x, dist) exp(log(x) + rmvnorm(1,rep(0,p),diag(rep(dist,p))))
MCsingle <- function(){ # This is part of a larger function, so no input are needed
inits <- geninits()
theta.post <- matrix(NA,nrow=p,ncol=iter)
for (i in 1:p) theta.post[i,1] <- inits$theta[i]
for (t in 2:iter){
theta_star <- c(jump(theta.post[, t-1],dist))
pstar <- post(vY, theta = theta_star) # post is the loglikelihood using dmvnorm.
pprev <- post(vY, theta = theta.post[,t-1])
r <- min(exp(pstar - pprev) , 1)
accept <- rbinom(1, 1, prob = r)
if (accept == 1){
theta.post[, t] <- theta_star
} else {
theta.post[, t] <- theta.post[, t-1]
}
}
return(theta.post)
}
, which returns an p x iter matrix, with p parameters and iter iterations.
cl<-makeCluster(4)
registerDoParallel(cl)
posterior <- foreach(c = 1:chains) %dopar% {
MCsingle() }
UPDATE: When I tried to simplify the problem the code suddenly seemed to work. Even though I purposely tried to make errors, the code ran perfectly and the results were as wanted. So for others with similar problems unfortunately I cannot give an answer.
A follow-up question:
My initial purpose was to built up an entire function, such that
MCmulti <- function(mX,vY,iter,chains){
posterior <- foreach(c = 1:chains) %dopar% {
MCsingle() }
return(posterior)
}
but the foreach-loop does not seem to read all the required functions like:
Error in FUN() : task 1 failed - "could not find function "geninits""
Can anybody answer how to implement custom functions inside a foreach loop? Am I to input it as MCmulti <- function(FUN,...) FUN() and call MCmulti(MCsingle,...) ?
I have used the svm function in the e1071 package of R software to model my data using variables selected by my feature selection method. I have obtained predictions from this model using the predict.svm function in the same package. I want to compute the value of the objective function of the svm model using the R software. How can I do this?
Below is my code for my first feature selection technique-Information Gain
P1<-Fold1T$Class_NASQ
InfGainF1 <- information.gain(P1~., Fold1T[,-20])
subset <- cutoff.k(InfGainF1, 8)
f <- as.simple.formula(subset, "P1")
ModelInGF1<-svm(as.factor(P1)~ NSDQ.COMP+S.P.100+S.P.500+NYSE.COMP+NYSE.A.M.MKT +
RSEL.2000+ALL.ORD+HG.SENG ,data=Fold1T[,-20], kernel="radial",gamma=0.5,cost=16)
PredictInGF1<-predict(ModelInGF1,NewData=Fold1V[,-20])
######### Accuracy ########
confusionMatrix(PredictInGF1, P1)
Thanks
While learning about SVR back in 2010 I explored how predicted values are computed. To do this, I went over the file "svminternals.pdf" located in the e1071/doc subfolder and play my custom code (shown after the toy data) using the following data set
ToyData <- data.frame(X1=c(12.4,14.6,13.4,12.9,15.2,13.6,9.2), X2=c(2.1,9.2,1.9,0.8,1.1,8.6,1.1),Y=c(14.2,16.9,15.5,14.7,17.3,16,10.9))
You may explore the following code to see if is somehow helpful to you.
#LINEAR KERNEL
ToyData <- read.csv("ToyData.csv", header=T)
X <- as.matrix(ToyData[,1:2])
Y <- as.vector(ToyData[,3])
SVRLinear <- svm (X, Y, kernel="linear", epsilon=0.1, cost=1, scale=FALSE)
V <- as.matrix(SVRLinear$SV)
Vt <- t(V)
A <- as.matrix(SVRLinear$coefs)
(r <- SVRLinear$rho)
write.csv(V, file="SVLinear.csv")
write.csv(A, file="CoefsLinear.csv")
F <- (X %*% Vt) %*% A - r
write.csv(F, file="FittedLinear.csv")
#RBF KERNEL: Exp[(-gamma||x-z||^2)/2]
ToyData <- read.csv("ToyData.csv", header=T)
X <- as.matrix(ToyData[,1:2])
Y <- as.vector(ToyData[,3])
SVRRadial <- svm (X, Y, kernel="radial", epsilon=0.1, gamma=0.1, cost=5, scale=FALSE)
V <- as.matrix(SVRRadial$SV)
A <- as.matrix(SVRRadial$coefs)
(g <- SVRRadial$gamma)
(r <- SVRRadial$rho)
write.csv(V, file="SVRadial.csv")
write.csv(A, file="CoefsRadial.csv")
Kernel <- matrix(0, nrow(X), nrow(V))
for (i in 1:nrow(X)) {
for (j in 1:nrow(V)) {
Xi <- X[i,]
Vj <- V[j,]
XiMinusVj <- Xi - Vj
SumSqXiMinusVj <- XiMinusVj %*% XiMinusVj
Kernel[i,j] <- exp(-g*SumSqXiMinusVj)
}
}
F <- Kernel %*% A - r
write.csv(F, file="FittedRadial.csv")
I want to add the answer how to reproduce the predict value with the model parameter when scale option is open.In e1071,data are default scaled internally (both x and y variables) to zero mean and unit variance. The center and scale values are returned and used for later predictions.(http://www.inside-r.org/node/57517). According to the above code,I write the following code which may help to you.
ToyData <- data.frame(X1=c(12.4,14.6,13.4,12.9,15.2,13.6,9.2), X2=c(2.1,9.2,1.9,0.8,1.1,8.6,1.1),Y=c(14.2,16.9,15.5,14.7,17.3,16,10.9))
X <- as.matrix(ToyData[,1:2])
Y <- as.vector(ToyData[,3])
SVRRadial <- svm (X, Y, kernel="radial", epsilon=0.1, gamma=0.1, cost=5)
pred<-predict(SVRRadial,X)
toys<-ToyData
#scale the feature
sc_x<-data.frame(SVRRadial$x.scale)
for(col in row.names(sc_x)){
toys[[col]]<-(ToyData[[col]]-sc_x[[col,1]])/sc_x[[col,2]]
}
#compute the predict value, the method is same to the above code
X<-as.matrix(toys[,1:2])
V <- as.matrix(SVRRadial$SV)
A <- as.matrix(SVRRadial$coefs)
g <- SVRRadial$gamma
r <- SVRRadial$rho
Kernel <- matrix(0, nrow(X), nrow(V))
for (i in 1:nrow(X)) {
for (j in 1:nrow(V)) {
Xi <- X[i,]
Vj <- V[j,]
XiMinusVj <- Xi - Vj
SumSqXiMinusVj <- XiMinusVj %*% XiMinusVj
Kernel[i,j] <- exp(-g*SumSqXiMinusVj)
}
}
F <- Kernel %*% A - r
#restore the predict value from standard format to original format
my_pred<-F
sc_y<-data.frame(SVRRadial$y.scale)
my_pred<-my_pred*sc_y[[2]]+sc_y[[1]]
summary(my_pred-pred)
reference link:How to reproduce predict.svm in R?