Im trying to apply a function onto my list but it returns this error
"non numeric matrix extent error"
here's my code
the error occurs in the last few lines
the code works fine up till the end, and because of this, im unable to plot my graphs
I've searched online but couldnt find anything that helps, and I cant see what's wrong with the code
#Question 1
set.seed(10000)
v <- c(0.1,0.5,1,2,5,10,100)
lyst <- list()
for(i in v)
{
for(j in v)
{
elementname <- paste0(as.character(i),"-",as.character(j))
print(elementname)
lyst[[elementname]] <- rgamma(10000,i,j)
}
}
#Question 2
pdf("Question2.pdf",width = 20, height = 10)
par(mfcol=c(7,7))
for(x in names(lyst))
{
hist(lyst[[x]],
xlab = "Value",
main = paste("Alpha-Lambda:",x))
}
dev.off()
#Question 3
theoretical_mean <- matrix(ncol=7,nrow=7,dimnames=list(as.character(v), as.character(v)))
theoretical_var <- matrix(ncol=7,nrow=7,dimnames=list(as.character(v), as.character(v)))
for (i in 1:7)
{
for (j in 1:7)
{
theoretical_mean[j,i] <- as.character(v[i]/v[j])
theoretical_var[j,i] <- as.character(v[i]/(v[j]^2))
}
}
sample_mean <-lapply(lyst, mean)
sample_mean <- as.data.frame(matrix(unlist(sample_mean),nrow = 7, ncol = 7, byrow = T))
sample_mean <- round(sample_mean,digits = 3)
sample_mean <- data.matrix(sample_mean, rownames.force = NA)
sample_var <-lapply(lyst, var)
sample_var <- as.data.frame(matrix(unlist(sample_var),nrow = 7, ncol = 7, byrow = T))
sample_var <- round(sample_var,digits = 3)
sample_var <- data.matrix(sample_var, rownames.force = NA)
theor_sample_mean <- matrix(paste(theoretical_mean, sample_mean, sep=" - "),nrow=7,dimnames = dimnames(theoretical_var))
theor_sample_var <- matrix(paste(theoretical_var, sample_var, sep=" - "),nrow=7,dimnames= dimnames(theoretical_var))
sink("Q3.txt")
cat("Theoretical Mean vs. Sample Mean:\n")
print(as.table(theor_sample_mean))
cat("\n")
cat("Theoretical Variance vs. Sample Variance:\n")
print(as.table(theor_sample_var))
sink()
#Question 4
nmean <- function(x)
{
m <- matrix(nrow=nrow(x))
for (j in 1:ncol(x))
{
v <- c()
for(i in 1:nrow(x))
{
v <- c(v,mean(x[1:i,j]))
}
m <- cbind(m,v)
}
m <- m[,-1]
colnames(m) <- colnames(x)
rownames(m) <- NULL
return(m)
}
sequentialMeans <- lapply(lyst,nmean)
pdf("Question4.pdf",width=15,height=10)
for (i in 1:7)
{
for (j in 1:7)
{
plot(y=sequentialMeans[[i]][,j],x=1:10000,xlab="n value",ylab="Values", main=paste("Alpha-Lambda:",colnames(lyst[[i]])[j]),type="l")
}
}
dev.off()
The problem with your code is that the data format of the input for the nmean function according to the lines
nmean <- function(x)
{
m <- matrix(nrow=nrow(x))
for (j in 1:ncol(x))
{
v <- c()
for(i in 1:nrow(x))
{
v <- c(v,mean(x[1:i,j]))
}
m <- cbind(m,v)
}
m <- m[,-1]
colnames(m) <- colnames(x)
rownames(m) <- NULL
return(m)
}
is a matrix and you want feed it vectors of gamma-distributed values as specified in the following lines
lyst <- list()
for(i in v)
{
for(j in v)
{
elementname <- paste0(as.character(i),"-",as.character(j))
print(elementname)
lyst[[elementname]] <- rgamma(10000,i,j)
}
}
For x that have type vector, the function ncol(x)and nrow(x)return NULL. Besides, there is also no application of ncol(x) possible.
If you want to save your approach you need to either think about transforming your data into matrix format or alternatively, use the vector format but use the vector-compatible functions length(x) for the length of the vector and names(lyst) for the names.
Update:
The code in the comments works but you got to change the lapply-statement as you now have a matrix that you can use as input for the nmean function directly. The following code works for generating sampleMeans and avoids the original error message of your question. In order to cut down runtime it only takes 100 samples.
#Question 1
set.seed(10000)
v <- c(0.1,0.5,1,2,5,10,100)
lyst <- list()
for(i in v)
{
for(j in v)
{
elementname <- paste0(as.character(i),"-",as.character(j))
print(elementname)
lyst[[elementname]] <- rgamma(100,i,j)
}
}
#Question 2
pdf("Question2.pdf",width = 20, height = 10)
par(mfcol=c(7,7))
for(x in names(lyst))
{
hist(lyst[[x]],
xlab = "Value",
main = paste("Alpha-Lambda:",x))
}
dev.off()
#Question 3
theoretical_mean <- matrix(ncol=7,nrow=7,dimnames=list(as.character(v), as.character(v)))
theoretical_var <- matrix(ncol=7,nrow=7,dimnames=list(as.character(v), as.character(v)))
for (i in 1:7)
{
for (j in 1:7)
{
theoretical_mean[j,i] <- as.character(v[i]/v[j])
theoretical_var[j,i] <- as.character(v[i]/(v[j]^2))
}
}
sample_mean <-lapply(lyst, mean)
sample_mean <- as.data.frame(matrix(unlist(sample_mean),nrow = 7, ncol = 7, byrow = T))
sample_mean <- round(sample_mean,digits = 3)
sample_mean <- data.matrix(sample_mean, rownames.force = NA)
sample_var <-lapply(lyst, var)
sample_var <- as.data.frame(matrix(unlist(sample_var),nrow = 7, ncol = 7, byrow = T))
sample_var <- round(sample_var,digits = 3)
sample_var <- data.matrix(sample_var, rownames.force = NA)
theor_sample_mean <- matrix(paste(theoretical_mean, sample_mean, sep=" - "),nrow=7,dimnames = dimnames(theoretical_var))
theor_sample_var <- matrix(paste(theoretical_var, sample_var, sep=" - "),nrow=7,dimnames= dimnames(theoretical_var))
sink("Q3.txt")
cat("Theoretical Mean vs. Sample Mean:\n")
print(as.table(theor_sample_mean))
cat("\n")
cat("Theoretical Variance vs. Sample Variance:\n")
print(as.table(theor_sample_var))
sink()
lyst = matrix(unlist(lyst), ncol = 7, byrow = TRUE)
colnames(lyst) = c("100-0.1","100-0.5","100-1","100-2","100-5","100-10","100-100")
#Question 4
nmean <- function(x)
{
m <- matrix(nrow=nrow(x))
for (j in 1:ncol(x))
{
v <- c()
for(i in 1:nrow(x))
{
v <- c(v,mean(x[1:i,j]))
}
m <- cbind(m,v)
}
m <- m[,-1]
colnames(m) <- colnames(x)
rownames(m) <- NULL
return(m)
}
sequentialMeans <- nmean(lyst)
Note also that you need to adjust the code for Q4, that is, the plot generation. The following code works.
pdf("Question4.pdf",width=15,height=10)
for (i in 1:7)
{
for (j in 1:7)
{
plot(y=sequentialMeans[,j],x=1:700,xlab="n value",ylab="Values", main=paste("Alpha-Lambda:",colnames(lyst[,j]),type="l"))
}
}
dev.off()
Let me know if this helps.
Related
I would be super grateful for some help. I don't have a coding background and I am confused by the error message I am getting when running the sppb functions of the WRS2 package. These functions perform a robust mixed ANOVA using bootstrapping.
sppba(formula = score ~ my_between_variable * my_within_variable, id = participant_code, data = df_long_T2)
Error in xmat[, k] <- x[[kv]] :
number of items to replace is not a multiple of replacement length
I get the same error for all three sppb functions. The functions look the same except that instead of sppba the others say sppbb and sppbi. I don't even know what the functions are trying to replace. The functions work for me with other data.
The classes of all the things involved seem fine: score is numeric, order_supplement and time are factors, participant_code is character, df_long_T2 is a dataframe. I have 120 participants, 61 in one group and 59 in the other, with two observations per participant. There are no NAs in the columns involved.
Traceback() just gives me the one line of code above and the error message.
Debug() gives me this and I don't know what to make of it:
"Debug location is approximate because location is not available"
function (formula, id, data, est = "mom", avg = TRUE, nboot = 500,
MDIS = FALSE, ...)
{
if (missing(data)) {
mf <- model.frame(formula)
}
else {
mf <- model.frame(formula, data)
}
cl <- match.call()
est <- match.arg(est, c("mom", "onestep", "median"), several.ok = FALSE)
mf1 <- match.call()
m <- match(c("formula", "data", "id"), names(mf1), 0L)
mf1 <- mf1[c(1L, m)]
mf1$drop.unused.levels <- TRUE
mf1[[1L]] <- quote(stats::model.frame)
mf1 <- eval(mf1, parent.frame())
random1 <- mf1[, "(id)"]
depvar <- colnames(mf)[1]
if (all(length(table(random1)) == table(mf[, 3]))) {
ranvar <- colnames(mf)[3]
fixvar <- colnames(mf)[2]
}
else {
ranvar <- colnames(mf)[2]
fixvar <- colnames(mf)[3]
}
MC <- FALSE
K <- length(table(mf[, ranvar]))
J <- length(table(mf[, fixvar]))
p <- J * K
grp <- 1:p
est <- get(est)
fixsplit <- split(mf[, depvar], mf[, fixvar])
indsplit <- split(mf[, ranvar], mf[, fixvar])
dattemp <- mapply(split, fixsplit, indsplit, SIMPLIFY = FALSE)
data <- do.call(c, dattemp)
x <- data
jp <- 1 - K
kv <- 0
kv2 <- 0
for (j in 1:J) {
jp <- jp + K
xmat <- matrix(NA, ncol = K, nrow = length(x[[jp]]))
for (k in 1:K) {
kv <- kv + 1
xmat[, k] <- x[[kv]]
}
xmat <- elimna(xmat)
for (k in 1:K) {
kv2 <- kv2 + 1
x[[kv2]] <- xmat[, k]
}
}
xx <- x
nvec <- NA
jp <- 1 - K
for (j in 1:J) {
jp <- jp + K
nvec[j] <- length(x[[jp]])
}
bloc <- matrix(NA, nrow = J, ncol = nboot)
mvec <- NA
ik <- 0
for (j in 1:J) {
x <- matrix(NA, nrow = nvec[j], ncol = K)
for (k in 1:K) {
ik <- ik + 1
x[, k] <- xx[[ik]]
if (!avg)
mvec[ik] <- est(xx[[ik]])
}
tempv <- apply(x, 2, est)
data <- matrix(sample(nvec[j], size = nvec[j] * nboot,
replace = TRUE), nrow = nboot)
bvec <- matrix(NA, ncol = K, nrow = nboot)
for (k in 1:K) {
temp <- x[, k]
bvec[, k] <- apply(data, 1, rmanogsub, temp, est)
}
if (avg) {
mvec[j] <- mean(tempv)
bloc[j, ] <- apply(bvec, 1, mean)
}
if (!avg) {
if (j == 1)
bloc <- bvec
if (j > 1)
bloc <- cbind(bloc, bvec)
}
}
if (avg) {
d <- (J^2 - J)/2
con <- matrix(0, J, d)
id <- 0
Jm <- J - 1
for (j in 1:Jm) {
jp <- j + 1
for (k in jp:J) {
id <- id + 1
con[j, id] <- 1
con[k, id] <- 0 - 1
}
}
}
if (!avg) {
MJK <- K * (J^2 - J)/2
JK <- J * K
MJ <- (J^2 - J)/2
cont <- matrix(0, nrow = J, ncol = MJ)
ic <- 0
for (j in 1:J) {
for (jj in 1:J) {
if (j < jj) {
ic <- ic + 1
cont[j, ic] <- 1
cont[jj, ic] <- 0 - 1
}
}
}
tempv <- matrix(0, nrow = K - 1, ncol = MJ)
con1 <- rbind(cont[1, ], tempv)
for (j in 2:J) {
con2 <- rbind(cont[j, ], tempv)
con1 <- rbind(con1, con2)
}
con <- con1
if (K > 1) {
for (k in 2:K) {
con1 <- push(con1)
con <- cbind(con, con1)
}
}
}
if (!avg)
bcon <- t(con) %*% t(bloc)
if (avg)
bcon <- t(con) %*% (bloc)
tvec <- t(con) %*% mvec
tvec <- tvec[, 1]
tempcen <- apply(bcon, 1, mean)
vecz <- rep(0, ncol(con))
bcon <- t(bcon)
temp = bcon
for (ib in 1:nrow(temp)) temp[ib, ] = temp[ib, ] - tempcen +
tvec
bcon <- rbind(bcon, vecz)
if (!MDIS) {
if (!MC)
dv = pdis(bcon, center = tvec)
}
if (MDIS) {
smat <- var(temp)
bcon <- rbind(bcon, vecz)
chkrank <- qr(smat)$rank
if (chkrank == ncol(smat))
dv <- mahalanobis(bcon, tvec, smat)
if (chkrank < ncol(smat)) {
smat <- ginv(smat)
dv <- mahalanobis(bcon, tvec, smat, inverted = T)
}
}
bplus <- nboot + 1
sig.level <- 1 - sum(dv[bplus] >= dv[1:nboot])/nboot
tvec1 <- data.frame(Estimate = tvec)
if (avg) {
tnames <- apply(combn(levels(mf[, fixvar]), 2), 2, paste0,
collapse = "-")
rownames(tvec1) <- tnames
}
else {
fixcomb <- apply(combn(levels(mf[, fixvar]), 2), 2,
paste0, collapse = "-")
rnames <- levels(mf[, ranvar])
tnames <- as.vector(t(outer(rnames, fixcomb, paste)))
rownames(tvec1) <- tnames
}
result <- list(test = tvec1, p.value = sig.level, contrasts = con,
call = cl)
class(result) <- c("spp")
result
}
I expected to get an output like this:
## Test statistics:
## Estimate
## time1-time2 0.3000
##
## Test whether the corrresponding population parameters are the same:
## p-value: 0.37
I have calculated dendrograms of my dataset with the divisive and agglomerative method
library(cluster)
library(fpc)
gower.dist <- daisy(data.cluster, metric=c("gower"))
divisive.clust <- diana(as.matrix(gower.dist),
diss = TRUE, keep.diss = TRUE)
plot(divisive.clust, main = "Divisive")
aggl.clust.c <- hclust(gower.dist, method = "complete")
plot(aggl.clust.c,
main = "Agglomerative, complete linkages")
I also have the results in a table with the amounts of cases in the clusters, etc.
cstats.table <- function(dist, tree, k) {
clust.assess <- c("cluster.number","n","within.cluster.ss","average.within","average.between",
"wb.ratio","dunn2","avg.silwidth")
clust.size <- c("cluster.size")
stats.names <- c()
row.clust <- c()
output.stats <- matrix(ncol = k, nrow = length(clust.assess))
cluster.sizes <- matrix(ncol = k, nrow = k)
for(i in c(1:k)){
row.clust[i] <- paste("Cluster-", i, " size")
}
for(i in c(2:k)){
stats.names[i] <- paste("Test", i-1)
for(j in seq_along(clust.assess)){
output.stats[j, i] <- unlist(cluster.stats(d = dist, clustering = cutree(tree, k = i))[clust.assess])[j]
}
for(d in 1:k) {
cluster.sizes[d, i] <- unlist(cluster.stats(d = dist, clustering = cutree(tree, k = i))[clust.size])[d]
dim(cluster.sizes[d, i]) <- c(length(cluster.sizes[i]), 1)
cluster.sizes[d, i]
}
}
output.stats.df <- data.frame(output.stats)
cluster.sizes <- data.frame(cluster.sizes)
cluster.sizes[is.na(cluster.sizes)] <- 0
rows.all <- c(clust.assess, row.clust)
# rownames(output.stats.df) <- clust.assess
output <- rbind(output.stats.df, cluster.sizes)[ ,-1]
colnames(output) <- stats.names[2:k]
rownames(output) <- rows.all
is.num <- sapply(output, is.numeric)
output[is.num] <- lapply(output[is.num], round, 2)
output
}
stats.df.divisive <- cstats.table(gower.dist, divisive.clust, 7)
stats.df.divisive
stats.df.aggl <-cstats.table(gower.dist, aggl.clust.c, 7)
#complete linkages looks like the most balanced approach
stats.df.aggl
can somebody help me please with my cluster analysis?
I try to put values in an empty vector inside a while loop and are not able to manage it.
I am searching for the minimum value of my distance matrix and then put these values in every step of the while loop into a vector.
You can find the part of the code at variable "meltLevel" in the middle of the code.
Thanks in advance.
standardMatrix <- matrix (runif(25, max = 50, min = 1), nrow = 5, ncol = 5)
print(standardMatrix)
dist <- function(x1, x2) sqrt(sum((x1 - x2)^2))
distanceMatrix <- matrix(, nrow=5, ncol=5)
for (x in 1:4) {
for (y in (x+1):5) {
vector1 <- standardMatrix[x,];
vector2 <- standardMatrix[y,];
distanceMatrix[x,y] <- dist(vector1, vector2);
}
}
distanceMatrix <- t(distanceMatrix)
print(distanceMatrix)
while(dim(distanceMatrix)[1] > 2) {
distanceMatrix <- rbind(distanceMatrix, NA)
distanceMatrix <- cbind(distanceMatrix, NA)
newInd <- dim(distanceMatrix)[1]
endInd <- dim(distanceMatrix)[1] - 1
meltLevel <- numeric(0)
meltLevel <- c(meltLevel, min(distanceMatrix, na.rm = TRUE))
print(meltLevel)
minVal <- which(distanceMatrix == min(distanceMatrix, na.rm=TRUE), arr.ind=TRUE)
print(minVal)
endInd = dim(distanceMatrix)[1]-1
for(col in 1:endInd){
if(!(col == minVal[1] || col == minVal[2])) {
v <- c(distanceMatrix[minVal[1],col],distanceMatrix[col,minVal[1]],distanceMatrix[col,minVal[2]],distanceMatrix[minVal[2],col])
minV <- min(v, na.rm=TRUE)
distanceMatrix[newInd,col] = minV
}
}
maxMinVal <- max(minVal)
minMinVal <- min(minVal)
distanceMatrix <- distanceMatrix[-maxMinVal,]
distanceMatrix <- distanceMatrix[-minMinVal,]
distanceMatrix <- distanceMatrix[, -maxMinVal]
distanceMatrix <- distanceMatrix[, -minMinVal]
print(distanceMatrix)
}
print(meltLevel)
Heres my code
so the problem right now the code works but only if in the line
lyst[[elementname]] <- rgamma(10000,i,j) 10000 is set to 100
and in the line
plot(y=sample_mean_q4[,j],x=1:7000,xlab="n value",ylab="Values", main=paste("Alpha-Lambda:",colnames(lyst[,j]),type="l")) the value of x=1:1000 is set to x=1:700
otherwise I get the error "Error in xy.coords(x, y, xlabel, ylabel, log) :
'x' and 'y' lengths differ"
I need the values to be 10000 and x=1=1:1000
#Question 1
set.seed(10000)
v <- c(0.1,0.5,1,2,5,10,100)
lyst <- list()
for(i in v)
{
for(j in v)
{
elementname <- paste0(as.character(i),"-",as.character(j))
print(elementname)
lyst[[elementname]] <- rgamma(10000,i,j)
}
}
#Question 2
pdf("Question2.pdf",width = 20, height = 10)
par(mfcol=c(7,7))
for(x in names(lyst))
{
hist(lyst[[x]],
xlab = "Value",
main = paste("Alpha-Lambda:",x))
}
dev.off()
#Question 3
theoretical_mean <- matrix(ncol=7,nrow=7,dimnames=list(as.character(v), as.character(v)))
theoretical_var <- matrix(ncol=7,nrow=7,dimnames=list(as.character(v), as.character(v)))
for (i in 1:7)
{
for (j in 1:7)
{
theoretical_mean[j,i] <- as.character(v[i]/v[j])
theoretical_var[j,i] <- as.character(v[i]/(v[j]^2))
}
}
sample_mean <-lapply(lyst, mean)
sample_mean <- as.data.frame(matrix(unlist(sample_mean),nrow = 7, ncol = 7, byrow = T))
sample_mean <- round(sample_mean,digits = 3)
sample_mean <- data.matrix(sample_mean, rownames.force = NA)
sample_var <-lapply(lyst, var)
sample_var <- as.data.frame(matrix(unlist(sample_var),nrow = 7, ncol = 7, byrow = T))
sample_var <- round(sample_var,digits = 3)
sample_var <- data.matrix(sample_var, rownames.force = NA)
theor_sample_mean <- matrix(paste(theoretical_mean, sample_mean, sep=" - "),nrow=7,dimnames = dimnames(theoretical_var))
theor_sample_var <- matrix(paste(theoretical_var, sample_var, sep=" - "),nrow=7,dimnames= dimnames(theoretical_var))
sink("Q3.txt")
cat("Theoretical Mean vs. Sample Mean:\n")
print(as.table(theor_sample_mean))
cat("\n")
cat("Theoretical Variance vs. Sample Variance:\n")
print(as.table(theor_sample_var))
sink()
lyst = matrix(unlist(lyst), ncol = 7, byrow = F)
colnames(lyst) = c("100-0.1","100-0.5","100-1","100-2","100-5","100-10","100-100")
#Question 4
q4mean <- function(x)
{
m <- matrix(nrow=nrow(x))
for (j in 1:ncol(x))
{
v <- c()
for(i in 1:nrow(x))
{
v <- c(v,mean(x[1:i,j]))
}
m <- cbind(m,v)
}
m <- m[,-1]
colnames(m) <- colnames(x)
rownames(m) <- NULL
return(m)
}
sample_mean_q4 <- q4mean(lyst)
pdf("Question4.pdf",width=15,height=10)
for (i in 1:7)
{
for (j in 1:7)
{
plot(y=sample_mean_q4[,j],x=1:1000,xlab="n value",ylab="Values", main=paste("Alpha-Lambda:",colnames(lyst[,j]),type="l"))
}
}
dev.off()
Please do not add many questions in one post. Identify the problem and add one per post. The error in your code is related to Question-4.
# Select names of the column to be included in the plot
selected_cols <- c("100-0.1","100-0.5","100-1","100-2","100-5","100-10","100-100")
# this is equivalent to: `lyst = matrix(unlist(lyst), ncol = 7, byrow = F) `
# I used abc as name instead of lyst
abc <- sapply(selected_cols, function(x) lyst[[x]])
sample_mean_q4 <- q4mean(abc)
# draw plot
for(x in selected_cols){
pdf(paste0("Question4", "_", x, ".pdf"),width=15,height=10)
plot( x=1:1000, y = sample_mean_q4[1:1000, x],
xlab="n value", ylab="Values", main=bquote(alpha - lambda == .(x) ), type="l")
dev.off()
}
Data:
q4mean <- function(x)
{
m <- matrix(nrow=nrow(x))
for (j in 1:ncol(x)) {
v <- c()
for(i in 1:nrow(x)){
v <- c(v,mean(x[1:i,j]))
}
m <- cbind(m,v)
}
m <- m[,-1]
colnames(m) <- colnames(x)
rownames(m) <- NULL
return(m)
}
set.seed(10000)
v <- c(0.1,0.5,1,2,5,10,100)
lyst <- list()
for(i in v){
for(j in v){
elementname <- paste0(as.character(i),"-",as.character(j))
print(elementname)
lyst[[elementname]] <- rgamma(10000,i,j)
}
}
are you aware of R's recycling functionality? Have you tried breaking the problem into smaller chunks?
I am using the function prediction.strength in the r Package fpc with k-medoids algorithms.
here is my code
prediction.strength(data,2,6,M=10,clustermethod=pamkCBI,DIST,krange=2:6,diss=TRUE,usepam=TRUE)
somehow I get the error message
Error in switch(method, kmeans = kmeans(xdata[indvec[[l]][[i]], ], k, :
EXPR must be a length 1 vector
Does anybody have experience with this r command? There are simple examples like
iriss <- iris[sample(150,20),-5]
prediction.strength(iriss,2,3,M=3,method="pam")
but my problem is that I am using dissimilarity matrix instead of the data itself for the k-medoids algorithms. I don't know how should I correct my code in this case.
Please note that in the package help the following is stated for the prediction.strength:
xdats - data (something that can be coerced into a matrix). Note that this can currently
not be a dissimilarity matrix.
I'm afraid you'll have to hack the function to get it to handle a distance matrix. I'm using the following:
pred <- function (distance, Gmin = 2, Gmax = 10, M = 50,
classification = "centroid", cutoff = 0.8, nnk = 1, ...)
{
require(cluster)
require(class)
xdata <- as.matrix(distance)
n <- nrow(xdata)
nf <- c(floor(n/2), n - floor(n/2))
indvec <- clcenters <- clusterings <- jclusterings <- classifications <- list()
prederr <- list()
dist <- as.matrix(distance)
for (k in Gmin:Gmax) {
prederr[[k]] <- numeric(0)
for (l in 1:M) {
nperm <- sample(n, n)
indvec[[l]] <- list()
indvec[[l]][[1]] <- nperm[1:nf[1]]
indvec[[l]][[2]] <- nperm[(nf[1] + 1):n]
for (i in 1:2) {
clusterings[[i]] <- as.vector(pam(as.dist(dist[indvec[[l]][[i]],indvec[[l]][[i]]]), k, diss=TRUE))
jclusterings[[i]] <- rep(-1, n)
jclusterings[[i]][indvec[[l]][[i]]] <- clusterings[[i]]$clustering
centroids <- clusterings[[i]]$medoids
j <- 3 - i
classifications[[j]] <- classifdist(as.dist(dist), jclusterings[[i]],
method = classification, centroids = centroids,
nnk = nnk)[indvec[[l]][[j]]]
}
ps <- matrix(0, nrow = 2, ncol = k)
for (i in 1:2) {
for (kk in 1:k) {
nik <- sum(clusterings[[i]]$clustering == kk)
if (nik > 1) {
for (j1 in (1:(nf[i] - 1))[clusterings[[i]]$clustering[1:(nf[i] -
1)] == kk]) {
for (j2 in (j1 + 1):nf[i]) if (clusterings[[i]]$clustering[j2] ==
kk)
ps[i, kk] <- ps[i, kk] + (classifications[[i]][j1] ==
classifications[[i]][j2])
}
ps[i, kk] <- 2 * ps[i, kk]/(nik * (nik -
1))
}
}
}
prederr[[k]][l] <- mean(c(min(ps[1, ]), min(ps[2,
])))
}
}
mean.pred <- numeric(0)
if (Gmin > 1)
mean.pred <- c(1)
if (Gmin > 2)
mean.pred <- c(mean.pred, rep(NA, Gmin - 2))
for (k in Gmin:Gmax) mean.pred <- c(mean.pred, mean(prederr[[k]]))
optimalk <- max(which(mean.pred > cutoff))
out <- list(predcorr = prederr, mean.pred = mean.pred, optimalk = optimalk,
cutoff = cutoff, method = clusterings[[1]]$clustermethod,
Gmax = Gmax, M = M)
class(out) <- "predstr"
out
}