How to remove extra points on graph - r

I have written the following R function:
#initialprob <- c(0.4,0.5,0.1)
f1 <- function(n,m,priceinitial,delta,mean, sd, ninterval){
initialprob <- c(1/3, 1/3, 1/3)
traders <- vector(mode="character", length=n)
traderscurrent <- vector(mode="character", length=n)
price <- vector(mode="numeric")
pricecurrent <- vector(mode="numeric")
for(nint in 1:ninterval)
{
print(initialprob)
L = floor(rnorm(1,mean,sd))
print(L)
for(i in 1:n)
{
traders[i] = sample(c("B", "S", "N"), size=1, prob=initialprob)
}
print(table(traders))
for(step in 1:L)
{
for(i in 1:n)
{
b <- sample(traders[-i], m)
#print(b)
#table(b)
traderscurrent[i] <- sample(b,1)
}
print(table(traderscurrent))
buy = length(which(traderscurrent == "B"))
sell = length(which(traderscurrent == "S"))
neutral = length(which(traderscurrent == "N"))
total = buy+neutral+sell
buyprop = buy/total
sellprop = sell/total
neutralprop = neutral/total
pricecurrent[step] = priceinitial+buy*delta-sell*delta
priceinitial = pricecurrent[step]
traders <- traderscurrent
#print(nint)
#print
initialprob <- c(sellprop,buyprop,neutralprop)
print(initialprob)
}
a <- runif(1,0,1)
b <- runif(1,0,1)
c <- runif(1,0,1)
total = a+b+c
initialprob <- c(a/total, b/total, c/total)
print(initialprob)
price <- append(price,pricecurrent, after=length(price))
#price <- price[-step]
plot(price)
}
print(price)
#plot(price)
}
When I call f1(1000,100,100,10,10,1,100) I get a graph that looks like this:
How would I fix this? It seems that there is a problem with appending the prices with previous prices. Maybe the last price of the previous iteration is being added twice?

The problem seems to be that the pricecurrent vector is not being reset after each iteration. Try adding pricecurrent <- vector(mode="numeric") just after the first for loop declaration:
for(nint in 1:ninterval)
{
pricecurrent <- vector(mode="numeric") ## <- added
print(initialprob)
L = floor(rnorm(1,mean,sd))
print(L)
for(i in 1:n)
....

Related

Why do I get the error "number of items to replace is not a multiple of replacement length" when running the sppba function of the WRS2 package?

I would be super grateful for some help. I don't have a coding background and I am confused by the error message I am getting when running the sppb functions of the WRS2 package. These functions perform a robust mixed ANOVA using bootstrapping.
sppba(formula = score ~ my_between_variable * my_within_variable, id = participant_code, data = df_long_T2)
Error in xmat[, k] <- x[[kv]] :
number of items to replace is not a multiple of replacement length
I get the same error for all three sppb functions. The functions look the same except that instead of sppba the others say sppbb and sppbi. I don't even know what the functions are trying to replace. The functions work for me with other data.
The classes of all the things involved seem fine: score is numeric, order_supplement and time are factors, participant_code is character, df_long_T2 is a dataframe. I have 120 participants, 61 in one group and 59 in the other, with two observations per participant. There are no NAs in the columns involved.
Traceback() just gives me the one line of code above and the error message.
Debug() gives me this and I don't know what to make of it:
"Debug location is approximate because location is not available"
function (formula, id, data, est = "mom", avg = TRUE, nboot = 500,
MDIS = FALSE, ...)
{
if (missing(data)) {
mf <- model.frame(formula)
}
else {
mf <- model.frame(formula, data)
}
cl <- match.call()
est <- match.arg(est, c("mom", "onestep", "median"), several.ok = FALSE)
mf1 <- match.call()
m <- match(c("formula", "data", "id"), names(mf1), 0L)
mf1 <- mf1[c(1L, m)]
mf1$drop.unused.levels <- TRUE
mf1[[1L]] <- quote(stats::model.frame)
mf1 <- eval(mf1, parent.frame())
random1 <- mf1[, "(id)"]
depvar <- colnames(mf)[1]
if (all(length(table(random1)) == table(mf[, 3]))) {
ranvar <- colnames(mf)[3]
fixvar <- colnames(mf)[2]
}
else {
ranvar <- colnames(mf)[2]
fixvar <- colnames(mf)[3]
}
MC <- FALSE
K <- length(table(mf[, ranvar]))
J <- length(table(mf[, fixvar]))
p <- J * K
grp <- 1:p
est <- get(est)
fixsplit <- split(mf[, depvar], mf[, fixvar])
indsplit <- split(mf[, ranvar], mf[, fixvar])
dattemp <- mapply(split, fixsplit, indsplit, SIMPLIFY = FALSE)
data <- do.call(c, dattemp)
x <- data
jp <- 1 - K
kv <- 0
kv2 <- 0
for (j in 1:J) {
jp <- jp + K
xmat <- matrix(NA, ncol = K, nrow = length(x[[jp]]))
for (k in 1:K) {
kv <- kv + 1
xmat[, k] <- x[[kv]]
}
xmat <- elimna(xmat)
for (k in 1:K) {
kv2 <- kv2 + 1
x[[kv2]] <- xmat[, k]
}
}
xx <- x
nvec <- NA
jp <- 1 - K
for (j in 1:J) {
jp <- jp + K
nvec[j] <- length(x[[jp]])
}
bloc <- matrix(NA, nrow = J, ncol = nboot)
mvec <- NA
ik <- 0
for (j in 1:J) {
x <- matrix(NA, nrow = nvec[j], ncol = K)
for (k in 1:K) {
ik <- ik + 1
x[, k] <- xx[[ik]]
if (!avg)
mvec[ik] <- est(xx[[ik]])
}
tempv <- apply(x, 2, est)
data <- matrix(sample(nvec[j], size = nvec[j] * nboot,
replace = TRUE), nrow = nboot)
bvec <- matrix(NA, ncol = K, nrow = nboot)
for (k in 1:K) {
temp <- x[, k]
bvec[, k] <- apply(data, 1, rmanogsub, temp, est)
}
if (avg) {
mvec[j] <- mean(tempv)
bloc[j, ] <- apply(bvec, 1, mean)
}
if (!avg) {
if (j == 1)
bloc <- bvec
if (j > 1)
bloc <- cbind(bloc, bvec)
}
}
if (avg) {
d <- (J^2 - J)/2
con <- matrix(0, J, d)
id <- 0
Jm <- J - 1
for (j in 1:Jm) {
jp <- j + 1
for (k in jp:J) {
id <- id + 1
con[j, id] <- 1
con[k, id] <- 0 - 1
}
}
}
if (!avg) {
MJK <- K * (J^2 - J)/2
JK <- J * K
MJ <- (J^2 - J)/2
cont <- matrix(0, nrow = J, ncol = MJ)
ic <- 0
for (j in 1:J) {
for (jj in 1:J) {
if (j < jj) {
ic <- ic + 1
cont[j, ic] <- 1
cont[jj, ic] <- 0 - 1
}
}
}
tempv <- matrix(0, nrow = K - 1, ncol = MJ)
con1 <- rbind(cont[1, ], tempv)
for (j in 2:J) {
con2 <- rbind(cont[j, ], tempv)
con1 <- rbind(con1, con2)
}
con <- con1
if (K > 1) {
for (k in 2:K) {
con1 <- push(con1)
con <- cbind(con, con1)
}
}
}
if (!avg)
bcon <- t(con) %*% t(bloc)
if (avg)
bcon <- t(con) %*% (bloc)
tvec <- t(con) %*% mvec
tvec <- tvec[, 1]
tempcen <- apply(bcon, 1, mean)
vecz <- rep(0, ncol(con))
bcon <- t(bcon)
temp = bcon
for (ib in 1:nrow(temp)) temp[ib, ] = temp[ib, ] - tempcen +
tvec
bcon <- rbind(bcon, vecz)
if (!MDIS) {
if (!MC)
dv = pdis(bcon, center = tvec)
}
if (MDIS) {
smat <- var(temp)
bcon <- rbind(bcon, vecz)
chkrank <- qr(smat)$rank
if (chkrank == ncol(smat))
dv <- mahalanobis(bcon, tvec, smat)
if (chkrank < ncol(smat)) {
smat <- ginv(smat)
dv <- mahalanobis(bcon, tvec, smat, inverted = T)
}
}
bplus <- nboot + 1
sig.level <- 1 - sum(dv[bplus] >= dv[1:nboot])/nboot
tvec1 <- data.frame(Estimate = tvec)
if (avg) {
tnames <- apply(combn(levels(mf[, fixvar]), 2), 2, paste0,
collapse = "-")
rownames(tvec1) <- tnames
}
else {
fixcomb <- apply(combn(levels(mf[, fixvar]), 2), 2,
paste0, collapse = "-")
rnames <- levels(mf[, ranvar])
tnames <- as.vector(t(outer(rnames, fixcomb, paste)))
rownames(tvec1) <- tnames
}
result <- list(test = tvec1, p.value = sig.level, contrasts = con,
call = cl)
class(result) <- c("spp")
result
}
I expected to get an output like this:
## Test statistics:
## Estimate
## time1-time2 0.3000
##
## Test whether the corrresponding population parameters are the same:
## p-value: 0.37

Regarding parallelisation of ensemblegamma0() function of ensembleBMA package

I was using the ensmebleBMA package in R I and fitting gamma0() model for precipitation forecast in R
But it is taking a long time if I do it for a season and so many grids.
I am using the ensemblegamma0() function to fit the model.
Hope that it will be done fast if I can parallelize the function. I tried parallelizing a source code but was not working properly. I seek help from someone having any idea about it.
There is a for loop inside this function() (ensemblegamma0()) which will fit the model for multiple days as each day forecasting is independent which could be parallelized
I am sharing the method I tried here
sample method
library(future.apply)
plan(multiprocess) ## => parallelize on your local computer
X <- 1:5
y <- future_lapply(X, function(x) {
tmp <- sqrt(x)
tmp
})
source code used
ensembleBMAgamma0 <-
function(ensembleData, trainingDays, dates = NULL,
control = controlBMAgamma0(), exchangeable = NULL)
{
#
# copyright 2006-present, University of Washington. All rights reserved.
# for terms of use, see the LICENSE file
#
if (!inherits(ensembleData,"ensembleData")) stop("not an ensembleData object")
if (missing(trainingDays)) stop("trainingDays must be specified")
call <- match.call()
warmStart <- FALSE
if (missing(trainingDays)) stop("trainingDays must be specified")
ensMemNames <- ensembleMembers(ensembleData)
nForecasts <- length(ensMemNames)
exchangeable <- getExchangeable( exchangeable, ensembleGroups(ensembleData),
nForecasts)
# remove instances missing all forecasts, obs or dates
M <- !dataNA(ensembleData)
if (!all(M)) ensembleData <- ensembleData[M,]
nObs <- nrow(ensembleData)
if (!nObs) stop("no data")
Dates <- as.character(ensembleValidDates(ensembleData))
DATES <- sort(unique(Dates))
julianDATES <- ymdhTOjul(DATES)
incr <- min(1,min(diff(julianDATES))) ## incr may be fractional for hours
forecastHour <- ensembleFhour(ensembleData)
lag <- ceiling( forecastHour / 24 )
## dates that can be modeled by the training data (ignoring gaps)
dates <- getDates( DATES, julianDATES, dates, trainingDays, lag, incr)
juliandates <- ymdhTOjul(dates)
nDates <- length(dates)
if (is.null(control$prior)) {
# accomodates saved mean as an additional parameter
prob0coefs <- array( NA, c(3, nForecasts, nDates),
dimnames = list(NULL, ensMemNames, dates))
}
else {
prob0coefs <- array( NA, c(4, nForecasts, nDates),
dimnames = list(NULL, ensMemNames, dates))
}
biasCoefs <- array( NA, c(2, nForecasts, nDates),
dimnames = list(NULL, ensMemNames, dates))
varCoefs <- array( NA, c(2, nDates), dimnames = list(NULL, dates))
weights <- array( NA, c(nForecasts, nDates),
dimnames = list(ensMemNames, dates))
trainTable <- rep(0, nDates)
names(trainTable) <- dates
nIter <- loglikelihood <- rep(0, nDates)
names(nIter) <- names(loglikelihood) <- dates
obs <- dataVerifObs(ensembleData)
K <- 1:nForecasts
L <- length(juliandates)
twin <- 1:trainingDays
cat("\n")
l <- 0
for(i in seq(along = juliandates)) {
I <- (juliandates[i]-lag*incr) >= julianDATES
if (!any(I)) stop("insufficient training data")
j <- which(I)[sum(I)]
if (j != l) {
D <- as.logical(match(Dates, DATES[j:1], nomatch=0))
nonz <- sum(obs[D] != 0)
if (is.null(control$prior) && nonz < control$rainobs) {
cat("insufficient nonzero training obs for date", dates[i], "...\n")
next
}
twin <- (j+1) - (1:trainingDays)
if (is.null(control$prior)) {
# attempt to extend the training period
while (TRUE) {
D <- as.logical(match(Dates, DATES[twin], nomatch=0))
if (!any(D)) stop("this should not happen")
d <- ensembleValidDates(ensembleData[D,])
# if (length(unique(d)) != trainingDays) stop("wrong # of training days")
nonz <- sum(obs[D] != 0)
if (nonz >= control$rainobs) break
if (min(twin) == 1) break
twin <- max(twin):(min(twin)-1)
}
if (nonz < control$rainobs) {
cat("insufficient nonzero training obs for date", dates[i], "...\n")
next
}
}
cat("modeling for date", dates[i], "...")
kNA <- apply(ensembleForecasts(ensembleData[D,]), 2,
function(x) all(is.na(x)))
if (any(kNA)) {
if (!is.null(x <- exchangeable)) x <- exchangeable[-K[kNA]]
fit <- fitBMAgamma0(ensembleData[D,-K[kNA]], control = control,
exchangeable = x)
}
else {
fit <- fitBMAgamma0(ensembleData[D,], control = control,
exchangeable = exchangeable)
}
l <- j ## last model fit
trainTable[i] <- length(unique(Dates[D]))
nIter[i] <- fit$nIter
loglikelihood[i] <- fit$loglikelihood
if (warmStart) control$start$weights <- weights[,i]
cat("\n")
}
else {
trainTable[i] <- -abs(trainTable[i-1])
nIter[i] <- -abs(nIter[i-1])
loglikelihood[i] <- loglikelihood[i-1]
}
prob0coefs[,K[!kNA],i] <- fit$prob0coefs
biasCoefs[,K[!kNA],i] <- fit$biasCoefs
varCoefs[,i] <- fit$varCoefs
weights[K[!kNA],i] <- fit$weights
}
structure(list(training = list(days=trainingDays,lag=lag,table=trainTable),
prob0coefs = prob0coefs, biasCoefs = biasCoefs,
varCoefs = varCoefs, weights = weights, nIter = nIter,
exchangeable = exchangeable, power = fit$power,
call = match.call()),
forecastHour = forecastHour,
initializationTime = ensembleItime(ensembleData),
class = c("ensembleBMAgamma0","ensembleBMA"))
}
edited source code for loop
y=future_lapply(juliandates, function (i) {
I <- (juliandates[i]-lag*incr) >= julianDATES
if (!any(I)) stop("insufficient training data")
j <- which(I)[sum(I)]
if (j != l) {
D <- as.logical(match(Dates, DATES[j:1], nomatch=0))
nonz <- sum(obs[D] != 0)
if (is.null(control$prior) && nonz < control$rainobs) {
cat("insufficient nonzero training obs for date", dates[i], "...\n")
next
}
twin <- (j+1) - (1:trainingDays)
if (is.null(control$prior)) {
# attempt to extend the training period
while (TRUE) {
D <- as.logical(match(Dates, DATES[twin], nomatch=0))
if (!any(D)) stop("this should not happen")
d <- ensembleValidDates(ensembleData[D,])
# if (length(unique(d)) != trainingDays) stop("wrong # of training days")
nonz <- sum(obs[D] != 0)
if (nonz >= control$rainobs) break
if (min(twin) == 1) break
twin <- max(twin):(min(twin)-1)
}
if (nonz < control$rainobs) {
cat("insufficient nonzero training obs for date", dates[i], "...\n")
next
}
}
cat("modeling for date", dates[i], "...")
kNA <- apply(ensembleForecasts(ensembleData[D,]), 2,
function(x) all(is.na(x)))
if (any(kNA)) {
if (!is.null(x <- exchangeable)) x <- exchangeable[-K[kNA]]
fit <- fitBMAgamma0(ensembleData[D,-K[kNA]], control = control,
exchangeable = x)
}
else {
fit <- fitBMAgamma0(ensembleData[D,], control = control,
exchangeable = exchangeable)
}
l <- j ## last model fit
trainTable[i] <- length(unique(Dates[D]))
nIter[i] <- fit$nIter
loglikelihood[i] <- fit$loglikelihood
if (warmStart) control$start$weights <- weights[,i]
cat("\n")
}
else {
trainTable[i] <- -abs(trainTable[i-1])
nIter[i] <- -abs(nIter[i-1])
loglikelihood[i] <- loglikelihood[i-1]
}
prob0coefs[,K[!kNA],i] <- fit$prob0coefs
biasCoefs[,K[!kNA],i] <- fit$biasCoefs
varCoefs[,i] <- fit$varCoefs
weights[K[!kNA],i] <- fit$weights
}
structure(list(training = list(days=trainingDays,lag=lag,table=trainTable),
prob0coefs = prob0coefs, biasCoefs = biasCoefs,
varCoefs = varCoefs, weights = weights, nIter = nIter,
exchangeable = exchangeable, power = fit$power,
call = match.call()),
forecastHour = forecastHour,
initializationTime = ensembleItime(ensembleData),
class = c("ensembleBMAgamma0","ensembleBMA"))
})
i am getting some error after running this

multiple data frames with similar names

I needed to generate array or many data frames from other data frames which only varied in names. This required me to do a lot of copy-paste works. Is it possible that I can make it cleaner but not keep copying and pasting? Follows are two examples from many similar cases of the analysis I am doing now (I will provide codes for reproduction at the end of the question), which I think may be able to make them cleaner with the same approach.
case 1, create an array with data from per_d1,per_d1,per_d3,per_d4,per_d5
perd <- array(dim=c(7,15,5))
perd [,,1] <- as.matrix(per_d$per_d1)
perd [,,2] <- as.matrix(per_d$per_d2)
perd [,,3] <- as.matrix(per_d$per_d3)
perd [,,4] <- as.matrix(per_d$per_d4)
perd [,,5] <- as.matrix(per_d$per_d5)
case 2, create multiple data frames from data with similar names.
dataplot <- dfmak (per_d$per_d1,ge$per_d1$g1,ge$per_d1$g2,ge$per_d1$g3,ge$per_d1$g4,ge$per_d1$g5)
dataplot2 <- dfmak (per_d$per_d2,ge$per_d2$g1,ge$per_d2$g2,ge$per_d2$g3,ge$per_d2$g4,ge$per_d2$g5)
dataplot3 <- dfmak (per_d$per_d3,ge$per_d3$g1,ge$per_d3$g2,ge$per_d3$g3,ge$per_d3$g4,ge$per_d3$g5)
dataplot4 <- dfmak (per_d$per_d4,ge$per_d4$g1,ge$per_d4$g2,ge$per_d4$g3,ge$per_d4$g4,ge$per_d4$g5)
dataplot5 <- dfmak (per_d$per_d5,ge$per_d5$g1,ge$per_d5$g2,ge$per_d5$g3,ge$per_d5$g4,ge$per_d5$g5)
codes for reproduction
N <- 1
CS <- 10.141
S <- seq (7.72,13,0.807)
t <- 15
l <- length (S)
m0 <- 100
exps <- c(0.2, 0.5, 0.9, 1.5, 2)
sd <- c(0.2, 0.5, 0.8, 1.3, 1.8)
names(sd) <- paste("per", seq_along(sd), sep = "")
per <- lapply(sd, function(x){
per <- matrix(nrow = length(S)*N, ncol = t+1)
for (i in 1:dim(per)[1]) {
for (j in 1:t+1){
per [,1] <- replicate (n = N, S)
per [i,j] <- round (abs (rnorm (1, mean = per[i,1], sd =x)),digits=3)
colnames(per) <- c('physical',paste('t', 1:15, sep = ""))
per <- as.data.frame (per)
}
}
per <- per [,-1]
return(per)
}
)
per_d <- lapply(per, function(x){
per_d <- abs (x - 10.141)
}
)
names(per_d) <- paste("per_d", seq_along(sd), sep = "")
gefun <- function (i){
res <- lapply(exps, function(x){
g <- as.matrix (m0 * exp (-x * i))
for (i in 1:l) {
for (j in 1:t){
g [i,j] <- abs((round (rnorm(1,mean = g[i,j],sd=3), digits = 3)))
colnames(g) <- paste('t', 1:ncol(g), sep = "")
g <- as.data.frame(g)
}}
return(g)
}
)
}
ge <- lapply(per_d, gefun)
for (i in 1:length(ge)){
names(ge[[i]]) <- paste("g", seq_along(ge), sep = "")
}
dfmak <- function(df1,df2,df3,df4,df5,df6){
data.frame(stimulus = c (paste0('S',1:3),'CS+',paste0('S',5:7)),
phy_dis = S,
per_dis = c(df1$t1,df1$t2,df1$t3,df1$t4,df1$t5,df1$t6,df1$t7,df1$t8,df1$t9,df1$t10,df1$t11,df1$t12,df1$t13,df1$t14,df1$t15),
trials = rep(1:15, each = 7),
response_0.2 = c (df2$t1,df2$t2,df2$t3,df2$t4,df2$t5,df2$t6,df2$t7,df2$t8,df2$t9,df2$t10,df2$t11,df2$t12,df2$t13,df2$t14,df2$t15),
response_0.5 = c (df3$t1,df3$t2,df3$t3,df3$t4,df3$t5,df3$t6,df3$t7,df3$t8,df3$t9,df3$t10,df3$t11,df3$t12,df3$t13,df3$t14,df3$t15),
response_0.9 = c (df4$t1,df4$t2,df4$t3,df4$t4,df4$t5,df4$t6,df4$t7,df4$t8,df4$t9,df4$t10,df4$t11,df4$t12,df4$t13,df4$t14,df4$t15),
response_1.5 = c (df5$t1,df5$t2,df5$t3,df5$t4,df5$t5,df5$t6,df5$t7,df5$t8,df5$t9,df5$t10,df5$t11,df5$t12,df5$t13,df5$t14,df5$t15),
response_2 = c (df6$t1,df6$t2,df6$t3,df6$t4,df6$t5,df6$t6,df6$t7,df6$t8,df6$t9,df6$t10,df6$t11,df6$t12,df6$t13,df6$t14,df6$t15)
)
}
You can try the followings. But the codes, unfortunately, are not short.
Case 1
a <- lapply(per_d, as.matrix)
b <- c(a, recursive = TRUE)
pred <- array(b, dim = c(7,15,5))
Case 2
The data frames will be stored in a list. You still have to extract them using $ or [[]].
# create empty lists to store the outputs
out <- list()
name <- list()
for(i in 1:5) {
a <- per_d[[i]]
b <- ge[[i]][[1]]
c <- ge[[i]][[2]]
d <- ge[[i]][[3]]
e <- ge[[i]][[4]]
f <- ge[[i]][[5]]
arg <- list(a, b, c, d, e, f)
name[[i]] <- paste0("df_", i)
out[[i]] <- do.call(dfmak, arg)
}
out <- setNames(out, name)

Error in x[i, j] : incorrect number of dimensions :Transportation problem using Genetic Algorithm in R (GA package)

I am getting the following error :
Error in x[i, j] : incorrect number of dimensions
while executing the following code :
library(GA)
library(readxl)
path <- "GAMS data & solution.xlsx"
c <- read_excel(path,range = "C3:G7",col_names = F)
f <- read_excel(path,range = "C10:G10",col_names = F)
d <- read_excel(path,range = "C13:G13",col_names = F)
cap <- read_excel(path,range = "C16:G16",col_names = F)
rows <- nrow(c)
cols <- ncol(c)[enter image description here][1]
val2 <-0
val1 <-0
fitness <- function(m){
x<-m[1]
y<-m[2]
# define fitness function
for(i in 1:rows){
for(j in 1:cols){
val <- c[i,j]*x[i,j]
val1 <- val1 + val
}
}
for(i in 1:rows){
val0 <- f[i]*y[i]
val2 <- val2+val0
}
fitness_value <- val1 + val2
#define constraint
g1 <- x
for(j in 1:cols){
for(i in 1:rows){
sum1 <- x[i,j]
sum2 <- sum2+sum1
}
gtemp <- sum2-d[j]
g2 <- append(g2,gtemp)
}
for(i in 1:rows){
for(j in 1:cols){
sum0 <- x[i,j]
sum3 <- sum3+sum0
}
gtemp1 <- sum3-cap[i]*y[i]
g3 <- append(g3,gtemp)
}
#penalized constraint violation
fitness_value <- ifelse( g1 >= 0 & g2 >= 0 & g3 <= 0 , fitness_value, fitness_value + 1e5 )
return(-fitness_value)
}
ga(type = "real-valued", fitness,lower = c(0,0),upper = c(10000, 1),maxiter = 1000, popSize = 100, monitor = F)
I am trying to solve the following problem using GA package of R:
Here is what my data looks like.

prediction.strength in Package fpc

I am using the function prediction.strength in the r Package fpc with k-medoids algorithms.
here is my code
prediction.strength(data,2,6,M=10,clustermethod=pamkCBI,DIST,krange=2:6,diss=TRUE,usepam=TRUE)
somehow I get the error message
Error in switch(method, kmeans = kmeans(xdata[indvec[[l]][[i]], ], k, :
EXPR must be a length 1 vector
Does anybody have experience with this r command? There are simple examples like
iriss <- iris[sample(150,20),-5]
prediction.strength(iriss,2,3,M=3,method="pam")
but my problem is that I am using dissimilarity matrix instead of the data itself for the k-medoids algorithms. I don't know how should I correct my code in this case.
Please note that in the package help the following is stated for the prediction.strength:
xdats - data (something that can be coerced into a matrix). Note that this can currently
not be a dissimilarity matrix.
I'm afraid you'll have to hack the function to get it to handle a distance matrix. I'm using the following:
pred <- function (distance, Gmin = 2, Gmax = 10, M = 50,
classification = "centroid", cutoff = 0.8, nnk = 1, ...)
{
require(cluster)
require(class)
xdata <- as.matrix(distance)
n <- nrow(xdata)
nf <- c(floor(n/2), n - floor(n/2))
indvec <- clcenters <- clusterings <- jclusterings <- classifications <- list()
prederr <- list()
dist <- as.matrix(distance)
for (k in Gmin:Gmax) {
prederr[[k]] <- numeric(0)
for (l in 1:M) {
nperm <- sample(n, n)
indvec[[l]] <- list()
indvec[[l]][[1]] <- nperm[1:nf[1]]
indvec[[l]][[2]] <- nperm[(nf[1] + 1):n]
for (i in 1:2) {
clusterings[[i]] <- as.vector(pam(as.dist(dist[indvec[[l]][[i]],indvec[[l]][[i]]]), k, diss=TRUE))
jclusterings[[i]] <- rep(-1, n)
jclusterings[[i]][indvec[[l]][[i]]] <- clusterings[[i]]$clustering
centroids <- clusterings[[i]]$medoids
j <- 3 - i
classifications[[j]] <- classifdist(as.dist(dist), jclusterings[[i]],
method = classification, centroids = centroids,
nnk = nnk)[indvec[[l]][[j]]]
}
ps <- matrix(0, nrow = 2, ncol = k)
for (i in 1:2) {
for (kk in 1:k) {
nik <- sum(clusterings[[i]]$clustering == kk)
if (nik > 1) {
for (j1 in (1:(nf[i] - 1))[clusterings[[i]]$clustering[1:(nf[i] -
1)] == kk]) {
for (j2 in (j1 + 1):nf[i]) if (clusterings[[i]]$clustering[j2] ==
kk)
ps[i, kk] <- ps[i, kk] + (classifications[[i]][j1] ==
classifications[[i]][j2])
}
ps[i, kk] <- 2 * ps[i, kk]/(nik * (nik -
1))
}
}
}
prederr[[k]][l] <- mean(c(min(ps[1, ]), min(ps[2,
])))
}
}
mean.pred <- numeric(0)
if (Gmin > 1)
mean.pred <- c(1)
if (Gmin > 2)
mean.pred <- c(mean.pred, rep(NA, Gmin - 2))
for (k in Gmin:Gmax) mean.pred <- c(mean.pred, mean(prederr[[k]]))
optimalk <- max(which(mean.pred > cutoff))
out <- list(predcorr = prederr, mean.pred = mean.pred, optimalk = optimalk,
cutoff = cutoff, method = clusterings[[1]]$clustermethod,
Gmax = Gmax, M = M)
class(out) <- "predstr"
out
}

Resources