I am applying spectral clustering to a dataset with 4200 rows and 2 columns.
spec <- specClust(df1, centers=7, nn = 7, method = "symmetric")
I have the below error.
n .Call("R_igraph_arpack", func, extra, options, env, sym, PACKAGE = "igraph") :
At arpack.c:944 : ARPACK error, Maximum number of iterations reached
In addition: Warning message:
In .Call("R_igraph_arpack", func, extra, options, env, sym, PACKAGE = "igraph") :
At arpack.c:776 :ARPACK solver failed to converge (1001 iterations, 0/7 eigenvectors converged)
How do i increase the iterations of arpack because this doesnt work:
spec <- specClust(df1, centers=7, nn = 7, method = "symmetric",iter.max=301000)
Digging into the specClust, the ... does not pass anything to the arpack call.
The simplest thing to do I think is to copy the specClust code add maxiter=10000 and source the function in your script.
specCLust2 <- function (data, centers = NULL, nn = 7, method = "symmetric",
gmax = NULL, max.iter = 10000, ...)
{
call = match.call()
if (is.data.frame(data))
data = as.matrix(data)
da = apply(data, 1, paste, collapse = "#")
indUnique = which(!duplicated(da))
indAll = match(da, da[indUnique])
data2 = data
data = data[indUnique, ]
n <- nrow(data)
data = scale(data, FALSE, TRUE)
if (is.null(gmax)) {
if (!is.null(centers))
gmax = centers - 1L
else gmax = 1L
}
test = TRUE
while (test) {
DC = mydist(data, nn)
sif <- rbind(1:n, as.vector(DC[[2]]))
g <- graph(sif, directed = FALSE)
g <- decompose(g, min.vertices = 4)
if (length(g) > 1) {
if (length(g) >= gmax)
nn = nn + 2
else test = FALSE
}
else test = FALSE
}
W <- DC[[1]]
n <- nrow(data)
wi <- W[, nn]
SC <- matrix(1, nrow(W), nn)
SC[] <- wi[DC[[2]]] * wi
W = W^2/SC
alpha = 1/(2 * (nn + 1))
qua = abs(qnorm(alpha))
W = W * qua
W = dnorm(W, sd = 1)
DC[[1]] = W
L = Laplacian(DC, nn, method)
f <- function(x, extra) as.vector(extra %*% x)
if (is.null(centers))
kmax = 25
else kmax = max(centers)
###
#add the maxiter parameter to the arpack call, below
###
U <- arpack(f, extra = L, options = list(n = n, which = "SM",
nev = kmax, ncv = 2 * kmax, mode = 1, maxiter=max.iter), sym = TRUE)
ind <- order(U[[1]])
U[[2]] = U[[2]][indAll, ind]
U[[1]] = U[[1]][ind]
if (is.null(centers)) {
tmp = which.max(diff(U[[1]])) + 1
centers = which.min(AUC(U[[1]][1:tmp]))
}
if (method == "symmetric") {
rs = sqrt(rowSums(U[[2]]^2))
U[[2]] = U[[2]]/rs
}
result = kmeans(U[[2]], centers = centers, nstart = 20, ...)
archeType = getClosest(U[[2]][indAll, ], result$centers)
result$eigenvalue = U[[1]]
result$eigenvector = U[[2]]
result$data = data2
result$indAll = indAll
result$indUnique = indUnique
result$L = L
result$archetype = archeType
result$call = call
class(result) = c("specClust", "kmeans")
result
}
Related
Would anyone know how to calculate significance values for clusters in the R package pvclust using a divisive hierarchical clustering method (e.g. diana from the cluster package)? The pvclust package supports only agglomerative hierarchical clustering methods (implemented by the hclust function), but I have been hoping that forcing pvclust to use diana instead of hclust might be possible. I tried modifying some of the internal pvclust functions as follows, but the only result was an error:
library(pvclust)
library(cluster)
pvclust.nonparallel <- function (data, method.hclust, method.dist, use.cor, nboot, r,
store, weight, iseed, quiet)
{
if (!is.null(iseed))
set.seed(seed = iseed)
n <- nrow(data)
p <- ncol(data)
if (is.function(method.dist)) {
distance <- method.dist(data)
}
else {
distance <- pvclust:::dist.pvclust(data, method = method.dist,
use.cor = use.cor)
}
####### replace hclust with diana
# data.hclust <- hclust(distance, method = method.hclust) # original version
data.hclust <- diana(distance, diss = T)
if (method.hclust == "ward" && getRversion() >= "3.1.0") {
method.hclust <- "ward.D"
}
size <- floor(n * r)
rl <- length(size)
if (rl == 1) {
if (r != 1)
warning("Relative sample size r is set to 1.0. AU p-values are not calculated\n")
r <- list(1)
}
else r <- as.list(size/n)
mboot <- lapply(r, boot.hclust, data = data, object.hclust = data.hclust,
nboot = nboot, method.dist = method.dist, use.cor = use.cor,
method.hclust = method.hclust, store = store, weight = weight,
quiet = quiet)
result <- pvclust:::pvclust.merge(data = data, object.hclust = data.hclust,
mboot = mboot)
return(result)
}
boot.hclust <- function (r, data, object.hclust, method.dist, use.cor, method.hclust,
nboot, store, weight = FALSE, quiet = FALSE)
{
n <- nrow(data)
size <- round(n * r, digits = 0)
if (size == 0)
stop("invalid scale parameter(r)")
r <- size/n
pattern <- pvclust:::hc2split(object.hclust)$pattern
edges.cnt <- table(factor(pattern)) - table(factor(pattern))
st <- list()
rp <- as.character(round(r, digits = 2))
if (r == 1)
rp <- paste(rp, ".0", sep = "")
if (!quiet)
cat(paste("Bootstrap (r = ", rp, ")... ",
sep = ""))
w0 <- rep(1, n)
na.flag <- 0
for (i in 1:nboot) {
if (weight && r > 10) {
w1 <- as.vector(rmultinom(1, size, w0))
suppressWarnings(distance <- distw.pvclust(data,
w1, method = method.dist, use.cor = use.cor))
}
else {
smpl <- sample(1:n, size, replace = TRUE)
if (is.function(method.dist)) {
suppressWarnings(distance <- method.dist(data[smpl,
]))
}
else {
suppressWarnings(distance <- pvclust:::dist.pvclust(data[smpl,
], method = method.dist, use.cor = use.cor))
}
}
if (all(is.finite(distance))) {
####### replace hclust with diana
# x.hclust <- hclust(distance, method = method.hclust) # original version
x.hclust <- diana(distance, diss = T)
pattern.i <- pvclust:::hc2split(x.hclust)$pattern
edges.cnt <- edges.cnt + table(factor(pattern.i,
levels = pattern))
}
else {
x.hclust <- NULL
na.flag <- 1
}
if (store)
st[[i]] <- x.hclust
}
if (!quiet)
cat("Done.\n")
if (na.flag == 1)
warning(paste("inappropriate distance matrices are omitted in computation: r = ",
r), call. = FALSE)
boot <- list(edges.cnt = edges.cnt, method.dist = method.dist,
use.cor = use.cor, method.hclust = method.hclust, nboot = nboot,
size = size, r = r, store = st)
class(boot) <- "boot.hclust"
return(boot)
}
assignInNamespace("pvclust.nonparallel",pvclust.nonparallel,ns="pvclust")
assignInNamespace("boot.hclust",boot.hclust,ns="pvclust")
data(lung)
res.pv <- pvclust(t(lung), method.dist = "euclidean")
plot(res.pv)
# Error in barplot.default(w, xlab = xlab, horiz = TRUE, space = 0, axes = FALSE, :
# argument 9 matches multiple formal arguments
I need help.
I need to make a portfolio selection (Markowitz) and my script is the following:
The following is the matrix of stock returns:
a <- c(0.00444, -0.00553, -0.01007, -0.00012, 0.04133, -0.02472, -0.01771,
-0.00994, -0.06933, 0.00476)
b <- c(-0.01926, 0.06008, 0.02839, 0.00930, 0.02072, 0.02072, 0.03670, -0.02049,
-0.01644, 0.02375)
c <- c(-0.00719, 0.02296, -0.06438, 0.008805, -0.00603, -0.00663, -0.01160,
-0.00298, 0.00713, 0.00790)
d <- c(-0.01753, 0.00809, 0.02301, -0.00631, -0.026114,0.03157, -0.02488,
-0.01013, -0.03231, -0.00763)
e <- c(-0.02153, 0.00863, -0.02929, -0.01424, -0.01940, -0.02313, -0.04146,
-0.02610, 0.00050, -0.04700)
stocks <- cbind(a,b,c,d,e)
And the following is the market index:
rmkt <- c(-0.01159, -0.02787, -0.05936, -0.09417, -0.03027,-0.03161, -0.03166,
-0.04092, -0.02841, -0.009781)
for example the time is 10 so:
time <- 1:10
I used the following code for clustering:
Nsim = 10
opt_num_centers = rep(0, Nsim)
est_centers_coefs = est_centers_fits = rep(list(0), Nsim)
for(kk in 1:Nsim){
rmkt_list <- list()
for(i in 1:5){
rmkt_list[[i]] = rmkt
}
stock_list_mat = do.call("cbind", stocks_list)
class(stock_list_mat)
stock_mat <- stock_list_mat[, c(1:societies_stocks)]
class(stock_mat)
nseg = 30
B = basis_array(time, min(time), max(time), ndt = nseg,
deg = 3, max_derivs = 0, sparse = FALSE) [[1]]
M <- ncol(B)
P <- crossprod(diff(diag(ncol(B)), diff = 3))
dim(stock_mat)
class(stock_mat)
est_coef = matrix(0, nrow = M, ncol(stock_mat))
est_bet = matrix(0, nrow = nrow(stock_mat), ncol(stock_mat))
dim(est_coef)
dim(est_bet)
smooth_par = NULL
for(i in 1:ncol(stock_mat)){
bb = expectreg.ls(stock_mat[ , i] ~ rb(time,"special", by = rmkt, B = B, P = P),
smooth = "schall", expectiles = 0.5)
smooth_par = c(smooth_par, bb$lambda$time)
est_coef[, i] = bb$coefficients$time[, 1]
est_bet[, i] = B %*% bb$coefficients$time[, 1]
}
sc_est_coef = scale(est_coef)
aa = (clusGap(t(sc_est_coef), kmeans, 10, B = 100, nstart = 50, iter.max = 50))
n_clust = maxSE(aa$Tab[, "gap"], aa$Tab[, "SE.sim"] ,"Tibs2001SEmax")
opt_num_centers[kk] = n_clust
km = kmeans(t(sc_est_coef), n_clust, nstart = 100, iter.max = 100)
est_centers_coefs[[kk]] = t(km$centers)
est_centers_fits[[kk]] = B %*% est_centers_coefs[[kk]]
cat("\t", "simulation =", kk, "\n","\t", "# Clust =", opt_num_centers[kk], "\n")
}
Considering everything how can I proceed to make a portfolio selection?
Thank u all.
I have written this model but rjags gives dimension mismatch error; What's happening?
Error in jags.model(textConnection(model1), data = jags_data, n.chains = n_chains, :
RUNTIME ERROR:
Compilation error on line 8.
Dimension mismatch taking subset of y
library(rjags)
model1 <- "model {
C <- 10000
for (j in 1:nobs){
zeros[j] ~ dpois(phi[j])
phi[j] <- -log(L[j]) + C
L[j] <- add[j]*(lambda[j]^y[j])*(1-lambda[j])^(1-y[j])
add[j] = ifelse(lambda[j] == 0.5, 2, aux[j])
aux[j] = 2*arctanh(1 - 2*lambda[j] + 10^(-323))/(1 - 2*lambda[j] + 10^(-323))
logit(lambda[j]) <- inprod(X[j, ], beta)
}
beta[1] ~ dnorm(0,1)
beta[2] ~ dgamma(1,1)
}"
n_chains = 1
n_adapt = 5000
n_iter = 10000
n_thin = 1
n_burnin = 5000
# generate data
n = 100
Ffun = plogis
design_mat = cbind(1, matrix(seq(0,1,by = 0.2), ncol=1))
gen_data = function(n, beta) {
X = design_mat[sample(nrow(design_mat), size = n, replace = T), ]
lambda = Ffun(X %*% beta)
y = rcbern(n,lambda)
idx = is.nan(y)
y[idx] = runif(length(idx))
list(X = X, y = y)
}
rcbern = function(n,lam){
x = runif(n)
y = log((x*(2*lam-1) - (lam-1))/(1-lam))/log(lam/(1-lam))
return(y)
}
beta = as.matrix(c(-3, 5))
jags_data = gen_data(n, beta)
jags_data$nobs = n
jg_model <- jags.model(textConnection(model1),
data = jags_data,
n.chains = n_chains,
n.adapt = n_adapt)
update(jg_model, n.iter = n_burnin)
result <- coda.samples(jg_model,
variable.names = c("beta"),
n.iter = n_iter,
thin = n_thin,
n.chains = n_chains)
beta_est = list(apply(result[[1]],2,median))
As suggested by #user20650 the issue is that you are indexing y as vector and your functions are generating as a matrix. Try this code with a slight change in gen_data():
library(rjags)
model1 <- "model {
C <- 10000
for (j in 1:nobs){
zeros[j] ~ dpois(phi[j])
phi[j] <- -log(L[j]) + C
L[j] <- add[j]*(lambda[j]^y[j])*(1-lambda[j])^(1-y[j])
add[j] = ifelse(lambda[j] == 0.5, 2, aux[j])
aux[j] = 2*arctanh(1 - 2*lambda[j] + 10^(-323))/(1 - 2*lambda[j] + 10^(-323))
logit(lambda[j]) <- inprod(X[j, ], beta)
}
beta[1] ~ dnorm(0,1)
beta[2] ~ dgamma(1,1)
}"
n_chains = 1
n_adapt = 5000
n_iter = 10000
n_thin = 1
n_burnin = 5000
# generate data
n = 100
Ffun = plogis
design_mat = cbind(1, matrix(seq(0,1,by = 0.2), ncol=1))
gen_data = function(n, beta) {
X = design_mat[sample(nrow(design_mat), size = n, replace = T), ]
lambda = Ffun(X %*% beta)
y = rcbern(n,lambda)
y <- as.vector(y)
idx = is.nan(y)
y[idx] = runif(length(idx))
list(X = X, y = y)
}
rcbern = function(n,lam){
x = runif(n)
y = log((x*(2*lam-1) - (lam-1))/(1-lam))/log(lam/(1-lam))
return(y)
}
beta = as.matrix(c(-3, 5))
jags_data = gen_data(n, beta)
jags_data$nobs = n
jg_model <- jags.model(textConnection(model1),
data = jags_data,
n.chains = n_chains,
n.adapt = n_adapt)
update(jg_model, n.iter = n_burnin)
result <- coda.samples(jg_model,
variable.names = c("beta"),
n.iter = n_iter,
thin = n_thin,
n.chains = n_chains)
beta_est = list(apply(result[[1]],2,median))
Output:
beta_est
[[1]]
beta[1] beta[2]
-0.006031984 0.692007301
You can also try y <- y[,1,drop=T] in the same function instead of as.vector()
I am trying to develop a method for variable selection using genetic algorithm and partial least squares in R. For this purpose I am using the package GA and the package plsdepot. However there seems to be some error in my code which I am unable to identify.I am using the GA package for the first time and very new to the ideas of genetic algorithm and partial least squares regression. Any help will be greatly appreciated
library(plsdepot)
library(GA)
mod <- plsreg1(Data[, 3:137], Data[, 2, drop = FALSE], comps = 100,crosval=TRUE)
x <- Data[,3:137]
y <- Data[,2]
fitness <- function(string) {
inc <- which(string == 1)
X <- cbind(1, x[,inc])
model <- plsreg1(y, X, comps =2,crosval=TRUE)
q2list<-model$Q2[3]
best_comp<-which(q2list==max(q2list))
model_final<-plsreg1(y, X, comps =best_comp,crosval=TRUE)
q2list_final<-model_final$Q2[3]
max(q2list_final)
}
GA <- ga("binary", fitness = fitness, nBits = ncol(x),names = colnames(x), monitor = plot)
When the code is run i am getting the following error in console
> GA <- ga("binary", fitness = fitness, nBits = ncol(x),names = colnames(x), monitor = plot)
Error in plsreg1(y, X, comps = 2, crosval = TRUE) :
predictors must contain more than one column
Called from: fitness(Pop[i, ], ...)
Browse[1]> Q
on checking if x is empty or not
> ncol(x)
[1] 135
In terms of errors I think the main problem seems to be the X matrix is staying blank as its somehow unable to read the x matrix.
When I am running the GA code source viewer is opening up and this is what its showing
function (predictors, response, comps = 2, crosval = TRUE)
{
X = as.matrix(predictors)
n = nrow(X)
p = ncol(X)
if (p < 2)
stop("\npredictors must contain more than one column") 'This is where the error is happening as p is < 2'
if (is.null(colnames(X)))
colnames(X) = paste(rep("X", p), 1:p, sep = "")
if (is.null(rownames(X)))
rownames(X) = 1:n
Y = as.matrix(response)
if (ncol(Y) != 1)
stop("\nresponse must be a single variable")
if (any(is.na(response)))
stop("\nresponse must not contain missing values")
if (nrow(X) != nrow(Y))
stop("\npredictors and response have different number of rows")
if (is.null(colnames(Y)))
colnames(Y) = "Y"
if (is.null(rownames(Y)))
rownames(Y) = 1:n
if (any(is.na(X)))
na.miss = TRUE
else na.miss = FALSE
if (!is.null(comps)) {
nc = comps
if (mode(nc) != "numeric" || length(nc) != 1 || nc <=
1 || (nc%%1) != 0 || nc > min(n, p))
nc = min(n, p)
if (nc == n)
nc = n - 1
}
else {
if (na.miss) {
crosval = FALSE
nc = 2
}
else {
if (n >= 10)
crosval = TRUE
else crosval = FALSE
nc = min(n, p)
}
}
if (!is.logical(crosval))
crosval = FALSE
Xx = scale(X)
Yy = scale(Y)
X.old = Xx
Y.old = Yy
Th = matrix(NA, n, nc)
Ph = matrix(NA, p, nc)
Wh = matrix(NA, p, nc)
Uh = matrix(NA, n, nc)
ch = rep(NA, nc)
Hot = matrix(NA, n, nc)
hlim = rep(NA, nc)
if (crosval) {
RSS = c(n - 1, rep(NA, nc))
PRESS = rep(NA, nc)
Q2 = rep(NA, nc)
sets_size = c(rep(n%/%10, 9), n - 9 * (n%/%10))
obs = sample(1:n, size = n)
segments = vector("list", length = 10)
ini = cumsum(sets_size) - sets_size + 1
fin = cumsum(sets_size)
for (k in 1:10) segments[[k]] = obs[ini[k]:fin[k]]
}
w.old = rep(1, p)
t.new = rep(1, n)
p.new = rep(NA, p)
h = 1
repeat {
if (na.miss) {
for (j in 1:p) {
i.exist = which(complete.cases(X[, j]))
w.old[j] = sum(X.old[i.exist, j] * Y.old[i.exist])
}
w.new = w.old/sqrt(sum(w.old^2))
for (i in 1:n) {
j.exist = which(complete.cases(X[i, ]))
t.new[i] = sum(X.old[i, j.exist] * w.new[j.exist])
}
for (j in 1:p) {
i.exist = intersect(which(complete.cases(X[,
j])), which(complete.cases(t.new)))
p.new[j] = sum(X.old[i.exist, j] * t.new[i.exist])/sum(t.new[i.exist]^2)
}
c.new = t(Y.old) %*% t.new/sum(t.new^2)
u.new = Y.old/as.vector(c.new)
}
if (!na.miss) {
w.old = t(X.old) %*% Y.old/sum(Y.old^2)
w.new = w.old/sqrt(sum(w.old^2))
t.new = X.old %*% w.new
p.new = t(X.old) %*% t.new/sum(t.new^2)
c.new = t(Y.old) %*% t.new/sum(t.new^2)
u.new = Y.old/as.vector(c.new)
if (crosval) {
RSS[h + 1] = sum((Y.old - t.new %*% c.new)^2)
press = rep(0, 10)
for (i in 1:10) {
aux = segments[[i]]
Xy.aux = t(X.old[-aux, ]) %*% Y.old[-aux]
wh.si = Xy.aux %*% sqrt(solve(t(Xy.aux) %*%
Xy.aux))
th.si = X.old[-aux, ] %*% wh.si
ch.si = t(Y.old[-aux]) %*% th.si %*% solve(t(th.si) %*%
th.si)
ch.si = as.vector(ch.si)
Yhat.si = ch.si * X.old[aux, ] %*% wh.si
press[i] = sum((Y.old[aux] - Yhat.si)^2)
}
PRESS[h] = sum(press)
Q2[h] = 1 - PRESS[h]/RSS[h]
}
}
Y.old = Y.old - (t.new %*% c.new)
X.old = X.old - (t.new %*% t(p.new))
Th[, h] = t.new
Ph[, h] = p.new
Wh[, h] = w.new
Uh[, h] = u.new
ch[h] = c.new
Hot[, h] = (n/(n - 1)) * t.new^2/(sum(t.new^2)/(n - 1))
hlim[h] = qf(0.95, h, n - h) * (h * (n^2 - 1))/(n * (n -
h))
if (is.null(comps) && crosval) {
if (Q2[h] < 0.0975 || h == nc)
break
}
else {
if (h == nc)
break
}
h = h + 1
}
if (crosval) {
q2cum = rep(NA, h)
for (k in 1:h) q2cum[k] = prod(PRESS[1:k])/prod(RSS[1:k])
Q2cum = 1 - q2cum
Q2cv = cbind(PRESS[1:h], RSS[1:h], Q2[1:h], rep(0.0975,
h), Q2cum)
dimnames(Q2cv) = list(1:h, c("PRESS", "RSS", "Q2", "LimQ2",
"Q2cum"))
if (is.null(comps))
h = h - 1
}
if (!crosval)
Q2cv = NULL
Th = Th[, 1:h]
Ph = Ph[, 1:h]
Wh = Wh[, 1:h]
Uh = Uh[, 1:h]
ch = ch[1:h]
Ws = Wh %*% solve(t(Ph) %*% Wh)
Bs = as.vector(Ws %*% ch)
if (!na.miss) {
Br = Bs * (rep(apply(Y, 2, sd), p)/apply(X, 2, sd))
cte = as.vector(colMeans(Y) - Br %*% apply(X, 2, mean))
y.hat = as.vector(X %*% Br + cte)
cor.xyt = cor(cbind(Xx, y = Yy), Th)
}
else {
mu.x <- attributes(Xx)$"scaled:center"
sd.x <- attributes(Xx)$"scaled:scale"
X.hat = Th %*% t(Ph) %*% diag(sd.x, p, p) + matrix(rep(mu.x,
each = n), n, p)
Br = Bs * (rep(apply(Y, 2, sd), p)/sd.x)
cte = as.vector(colMeans(response) - Br %*% mu.x)
y.hat = as.vector(X.hat %*% Br + cte)
cor.xyt = matrix(NA, p + 1, h)
for (j in 1:p) {
i.exist <- which(complete.cases(X[, j]))
cor.xyt[j, ] = cor(Xx[i.exist, j], Th[i.exist, ])
}
cor.xyt[p + 1, ] = cor(Yy, Th)
}
resid = as.vector(Y - y.hat)
R2 = as.vector(cor(Th, Yy))^2
R2Xy = t(apply(cor.xyt^2, 1, cumsum))
T2hot = rbind(hlim[1:h], t(apply(Hot[, 1:h], 1, cumsum)))
dimnames(Wh) = list(colnames(X), paste(rep("w", h), 1:h,
sep = ""))
dimnames(Ws) = list(colnames(X), paste(rep("w*", h), 1:h,
sep = ""))
dimnames(Th) = list(rownames(X), paste(rep("t", h), 1:h,
sep = ""))
dimnames(Ph) = list(colnames(X), paste(rep("p", h), 1:h,
sep = ""))
dimnames(Uh) = list(rownames(Y), paste(rep("u", h), 1:h,
sep = ""))
names(ch) = paste(rep("c", h), 1:h, sep = "")
dimnames(T2hot) = list(c("T2", rownames(X)), paste(rep("H",
h), 1:h, sep = ""))
names(Bs) = colnames(X)
names(Br) = colnames(X)
names(resid) = rownames(Y)
names(y.hat) = rownames(Y)
names(R2) = paste(rep("t", h), 1:h, sep = "")
colnames(R2Xy) = paste(rep("t", h), 1:h, sep = "")
dimnames(cor.xyt) = list(c(colnames(X), colnames(Y)), colnames(Th))
res = list(x.scores = Th, x.loads = Ph, y.scores = Uh, y.loads = ch,
cor.xyt = cor.xyt, raw.wgs = Wh, mod.wgs = Ws, std.coefs = Bs,
reg.coefs = c(Intercept = cte, Br), R2 = R2, R2Xy = R2Xy,
y.pred = y.hat, resid = resid, T2 = T2hot, Q2 = Q2cv,
y = response)
class(res) = "plsreg1"
return(res)
}
I used Rglpk to solve a linear programming problem, but its results seems weird. I changed it to lpSolve, and the two results are different.
Please comment the Rglpk and uncomment lpSolve statements to change the solver to lpSolve.
# Lo, S.-F., & Lu, W.-M. (2009). An integrated performance evaluation of financial holding companies in Taiwan.
# European Journal of Operational Research, 198(1), 341–350. doi:10.1016/j.ejor.2008.09.006
sbm = function(X,Y)
{
# Here X is N * m matrix, Y is N*s matrix.
library(Rglpk)
# require(lpSolve)
N = nrow(X)
m = ncol(X)
s = ncol(Y)
# variables are
# t
# gamma_j,j=1..N
# s_i^(-),i=1..m
# s_r^(+),r=1..s
efficiency = numeric(N)
max_positive_y = apply(Y[,1:s], MARGIN = 2, function(x) max(x[x>0]))
min_positive_y = apply(Y[,1:s], MARGIN = 2, function(x) min(x[x>0]))
dir = rep("==",1+m+s+1)
rhs = c(1,rep(0,m),rep(0,s),0)
for(i in 1:N)
{
x = X[i,]
y = Y[i,]
#variables
coef_t = 1
coef_gamma = rep(0,N)
coef_s_i = -1/(m * x)
coef_s_r = rep(0,s)
obj = c(coef_t,coef_gamma,coef_s_i,coef_s_r)
coef_constraint1_s=y
for(r in 1:s)
{
if(y[r]<0){
coef_constraint1_s[r] =
min_positive_y[r] * (max_positive_y[r] - min_positive_y[r])/
(max_positive_y[r] - y[r])
}
}
constraint1 = c(1, rep(0,N), rep(0,m) , 1/(s*coef_constraint1_s))
constraint2 = cbind(-x, t(X), diag(m), matrix(0,m,s))
constraint3 = cbind(-y, t(Y), matrix(0,s,m), -diag(s))
constraint4 = c(-1, rep(1,N), rep(0,m), rep(0,s))
mat = rbind(constraint1,constraint2,constraint3,constraint4)
results = Rglpk_solve_LP(obj = obj,mat = mat,dir = dir,rhs = rhs,max = FALSE)
efficiency[i] = results$optimum
# results <- lp("min", obj, mat, dir, rhs)
# efficiency[i] = results$objval
}
efficiency
}