how can I change my x and y values to suit properly - r

Heres my code
so the problem right now the code works but only if in the line
lyst[[elementname]] <- rgamma(10000,i,j) 10000 is set to 100
and in the line
plot(y=sample_mean_q4[,j],x=1:7000,xlab="n value",ylab="Values", main=paste("Alpha-Lambda:",colnames(lyst[,j]),type="l")) the value of x=1:1000 is set to x=1:700
otherwise I get the error "Error in xy.coords(x, y, xlabel, ylabel, log) :
'x' and 'y' lengths differ"
I need the values to be 10000 and x=1=1:1000
#Question 1
set.seed(10000)
v <- c(0.1,0.5,1,2,5,10,100)
lyst <- list()
for(i in v)
{
for(j in v)
{
elementname <- paste0(as.character(i),"-",as.character(j))
print(elementname)
lyst[[elementname]] <- rgamma(10000,i,j)
}
}
#Question 2
pdf("Question2.pdf",width = 20, height = 10)
par(mfcol=c(7,7))
for(x in names(lyst))
{
hist(lyst[[x]],
xlab = "Value",
main = paste("Alpha-Lambda:",x))
}
dev.off()
#Question 3
theoretical_mean <- matrix(ncol=7,nrow=7,dimnames=list(as.character(v), as.character(v)))
theoretical_var <- matrix(ncol=7,nrow=7,dimnames=list(as.character(v), as.character(v)))
for (i in 1:7)
{
for (j in 1:7)
{
theoretical_mean[j,i] <- as.character(v[i]/v[j])
theoretical_var[j,i] <- as.character(v[i]/(v[j]^2))
}
}
sample_mean <-lapply(lyst, mean)
sample_mean <- as.data.frame(matrix(unlist(sample_mean),nrow = 7, ncol = 7, byrow = T))
sample_mean <- round(sample_mean,digits = 3)
sample_mean <- data.matrix(sample_mean, rownames.force = NA)
sample_var <-lapply(lyst, var)
sample_var <- as.data.frame(matrix(unlist(sample_var),nrow = 7, ncol = 7, byrow = T))
sample_var <- round(sample_var,digits = 3)
sample_var <- data.matrix(sample_var, rownames.force = NA)
theor_sample_mean <- matrix(paste(theoretical_mean, sample_mean, sep=" - "),nrow=7,dimnames = dimnames(theoretical_var))
theor_sample_var <- matrix(paste(theoretical_var, sample_var, sep=" - "),nrow=7,dimnames= dimnames(theoretical_var))
sink("Q3.txt")
cat("Theoretical Mean vs. Sample Mean:\n")
print(as.table(theor_sample_mean))
cat("\n")
cat("Theoretical Variance vs. Sample Variance:\n")
print(as.table(theor_sample_var))
sink()
lyst = matrix(unlist(lyst), ncol = 7, byrow = F)
colnames(lyst) = c("100-0.1","100-0.5","100-1","100-2","100-5","100-10","100-100")
#Question 4
q4mean <- function(x)
{
m <- matrix(nrow=nrow(x))
for (j in 1:ncol(x))
{
v <- c()
for(i in 1:nrow(x))
{
v <- c(v,mean(x[1:i,j]))
}
m <- cbind(m,v)
}
m <- m[,-1]
colnames(m) <- colnames(x)
rownames(m) <- NULL
return(m)
}
sample_mean_q4 <- q4mean(lyst)
pdf("Question4.pdf",width=15,height=10)
for (i in 1:7)
{
for (j in 1:7)
{
plot(y=sample_mean_q4[,j],x=1:1000,xlab="n value",ylab="Values", main=paste("Alpha-Lambda:",colnames(lyst[,j]),type="l"))
}
}
dev.off()

Please do not add many questions in one post. Identify the problem and add one per post. The error in your code is related to Question-4.
# Select names of the column to be included in the plot
selected_cols <- c("100-0.1","100-0.5","100-1","100-2","100-5","100-10","100-100")
# this is equivalent to: `lyst = matrix(unlist(lyst), ncol = 7, byrow = F) `
# I used abc as name instead of lyst
abc <- sapply(selected_cols, function(x) lyst[[x]])
sample_mean_q4 <- q4mean(abc)
# draw plot
for(x in selected_cols){
pdf(paste0("Question4", "_", x, ".pdf"),width=15,height=10)
plot( x=1:1000, y = sample_mean_q4[1:1000, x],
xlab="n value", ylab="Values", main=bquote(alpha - lambda == .(x) ), type="l")
dev.off()
}
Data:
q4mean <- function(x)
{
m <- matrix(nrow=nrow(x))
for (j in 1:ncol(x)) {
v <- c()
for(i in 1:nrow(x)){
v <- c(v,mean(x[1:i,j]))
}
m <- cbind(m,v)
}
m <- m[,-1]
colnames(m) <- colnames(x)
rownames(m) <- NULL
return(m)
}
set.seed(10000)
v <- c(0.1,0.5,1,2,5,10,100)
lyst <- list()
for(i in v){
for(j in v){
elementname <- paste0(as.character(i),"-",as.character(j))
print(elementname)
lyst[[elementname]] <- rgamma(10000,i,j)
}
}

are you aware of R's recycling functionality? Have you tried breaking the problem into smaller chunks?

Related

Why do I get the error "number of items to replace is not a multiple of replacement length" when running the sppba function of the WRS2 package?

I would be super grateful for some help. I don't have a coding background and I am confused by the error message I am getting when running the sppb functions of the WRS2 package. These functions perform a robust mixed ANOVA using bootstrapping.
sppba(formula = score ~ my_between_variable * my_within_variable, id = participant_code, data = df_long_T2)
Error in xmat[, k] <- x[[kv]] :
number of items to replace is not a multiple of replacement length
I get the same error for all three sppb functions. The functions look the same except that instead of sppba the others say sppbb and sppbi. I don't even know what the functions are trying to replace. The functions work for me with other data.
The classes of all the things involved seem fine: score is numeric, order_supplement and time are factors, participant_code is character, df_long_T2 is a dataframe. I have 120 participants, 61 in one group and 59 in the other, with two observations per participant. There are no NAs in the columns involved.
Traceback() just gives me the one line of code above and the error message.
Debug() gives me this and I don't know what to make of it:
"Debug location is approximate because location is not available"
function (formula, id, data, est = "mom", avg = TRUE, nboot = 500,
MDIS = FALSE, ...)
{
if (missing(data)) {
mf <- model.frame(formula)
}
else {
mf <- model.frame(formula, data)
}
cl <- match.call()
est <- match.arg(est, c("mom", "onestep", "median"), several.ok = FALSE)
mf1 <- match.call()
m <- match(c("formula", "data", "id"), names(mf1), 0L)
mf1 <- mf1[c(1L, m)]
mf1$drop.unused.levels <- TRUE
mf1[[1L]] <- quote(stats::model.frame)
mf1 <- eval(mf1, parent.frame())
random1 <- mf1[, "(id)"]
depvar <- colnames(mf)[1]
if (all(length(table(random1)) == table(mf[, 3]))) {
ranvar <- colnames(mf)[3]
fixvar <- colnames(mf)[2]
}
else {
ranvar <- colnames(mf)[2]
fixvar <- colnames(mf)[3]
}
MC <- FALSE
K <- length(table(mf[, ranvar]))
J <- length(table(mf[, fixvar]))
p <- J * K
grp <- 1:p
est <- get(est)
fixsplit <- split(mf[, depvar], mf[, fixvar])
indsplit <- split(mf[, ranvar], mf[, fixvar])
dattemp <- mapply(split, fixsplit, indsplit, SIMPLIFY = FALSE)
data <- do.call(c, dattemp)
x <- data
jp <- 1 - K
kv <- 0
kv2 <- 0
for (j in 1:J) {
jp <- jp + K
xmat <- matrix(NA, ncol = K, nrow = length(x[[jp]]))
for (k in 1:K) {
kv <- kv + 1
xmat[, k] <- x[[kv]]
}
xmat <- elimna(xmat)
for (k in 1:K) {
kv2 <- kv2 + 1
x[[kv2]] <- xmat[, k]
}
}
xx <- x
nvec <- NA
jp <- 1 - K
for (j in 1:J) {
jp <- jp + K
nvec[j] <- length(x[[jp]])
}
bloc <- matrix(NA, nrow = J, ncol = nboot)
mvec <- NA
ik <- 0
for (j in 1:J) {
x <- matrix(NA, nrow = nvec[j], ncol = K)
for (k in 1:K) {
ik <- ik + 1
x[, k] <- xx[[ik]]
if (!avg)
mvec[ik] <- est(xx[[ik]])
}
tempv <- apply(x, 2, est)
data <- matrix(sample(nvec[j], size = nvec[j] * nboot,
replace = TRUE), nrow = nboot)
bvec <- matrix(NA, ncol = K, nrow = nboot)
for (k in 1:K) {
temp <- x[, k]
bvec[, k] <- apply(data, 1, rmanogsub, temp, est)
}
if (avg) {
mvec[j] <- mean(tempv)
bloc[j, ] <- apply(bvec, 1, mean)
}
if (!avg) {
if (j == 1)
bloc <- bvec
if (j > 1)
bloc <- cbind(bloc, bvec)
}
}
if (avg) {
d <- (J^2 - J)/2
con <- matrix(0, J, d)
id <- 0
Jm <- J - 1
for (j in 1:Jm) {
jp <- j + 1
for (k in jp:J) {
id <- id + 1
con[j, id] <- 1
con[k, id] <- 0 - 1
}
}
}
if (!avg) {
MJK <- K * (J^2 - J)/2
JK <- J * K
MJ <- (J^2 - J)/2
cont <- matrix(0, nrow = J, ncol = MJ)
ic <- 0
for (j in 1:J) {
for (jj in 1:J) {
if (j < jj) {
ic <- ic + 1
cont[j, ic] <- 1
cont[jj, ic] <- 0 - 1
}
}
}
tempv <- matrix(0, nrow = K - 1, ncol = MJ)
con1 <- rbind(cont[1, ], tempv)
for (j in 2:J) {
con2 <- rbind(cont[j, ], tempv)
con1 <- rbind(con1, con2)
}
con <- con1
if (K > 1) {
for (k in 2:K) {
con1 <- push(con1)
con <- cbind(con, con1)
}
}
}
if (!avg)
bcon <- t(con) %*% t(bloc)
if (avg)
bcon <- t(con) %*% (bloc)
tvec <- t(con) %*% mvec
tvec <- tvec[, 1]
tempcen <- apply(bcon, 1, mean)
vecz <- rep(0, ncol(con))
bcon <- t(bcon)
temp = bcon
for (ib in 1:nrow(temp)) temp[ib, ] = temp[ib, ] - tempcen +
tvec
bcon <- rbind(bcon, vecz)
if (!MDIS) {
if (!MC)
dv = pdis(bcon, center = tvec)
}
if (MDIS) {
smat <- var(temp)
bcon <- rbind(bcon, vecz)
chkrank <- qr(smat)$rank
if (chkrank == ncol(smat))
dv <- mahalanobis(bcon, tvec, smat)
if (chkrank < ncol(smat)) {
smat <- ginv(smat)
dv <- mahalanobis(bcon, tvec, smat, inverted = T)
}
}
bplus <- nboot + 1
sig.level <- 1 - sum(dv[bplus] >= dv[1:nboot])/nboot
tvec1 <- data.frame(Estimate = tvec)
if (avg) {
tnames <- apply(combn(levels(mf[, fixvar]), 2), 2, paste0,
collapse = "-")
rownames(tvec1) <- tnames
}
else {
fixcomb <- apply(combn(levels(mf[, fixvar]), 2), 2,
paste0, collapse = "-")
rnames <- levels(mf[, ranvar])
tnames <- as.vector(t(outer(rnames, fixcomb, paste)))
rownames(tvec1) <- tnames
}
result <- list(test = tvec1, p.value = sig.level, contrasts = con,
call = cl)
class(result) <- c("spp")
result
}
I expected to get an output like this:
## Test statistics:
## Estimate
## time1-time2 0.3000
##
## Test whether the corrresponding population parameters are the same:
## p-value: 0.37

How to see the distribution of variables after clustering with gower distance in R. How can i see the distribution of variables accross the clusters?

I have calculated dendrograms of my dataset with the divisive and agglomerative method
library(cluster)
library(fpc)
gower.dist <- daisy(data.cluster, metric=c("gower"))
divisive.clust <- diana(as.matrix(gower.dist),
diss = TRUE, keep.diss = TRUE)
plot(divisive.clust, main = "Divisive")
aggl.clust.c <- hclust(gower.dist, method = "complete")
plot(aggl.clust.c,
main = "Agglomerative, complete linkages")
I also have the results in a table with the amounts of cases in the clusters, etc.
cstats.table <- function(dist, tree, k) {
clust.assess <- c("cluster.number","n","within.cluster.ss","average.within","average.between",
"wb.ratio","dunn2","avg.silwidth")
clust.size <- c("cluster.size")
stats.names <- c()
row.clust <- c()
output.stats <- matrix(ncol = k, nrow = length(clust.assess))
cluster.sizes <- matrix(ncol = k, nrow = k)
for(i in c(1:k)){
row.clust[i] <- paste("Cluster-", i, " size")
}
for(i in c(2:k)){
stats.names[i] <- paste("Test", i-1)
for(j in seq_along(clust.assess)){
output.stats[j, i] <- unlist(cluster.stats(d = dist, clustering = cutree(tree, k = i))[clust.assess])[j]
}
for(d in 1:k) {
cluster.sizes[d, i] <- unlist(cluster.stats(d = dist, clustering = cutree(tree, k = i))[clust.size])[d]
dim(cluster.sizes[d, i]) <- c(length(cluster.sizes[i]), 1)
cluster.sizes[d, i]
}
}
output.stats.df <- data.frame(output.stats)
cluster.sizes <- data.frame(cluster.sizes)
cluster.sizes[is.na(cluster.sizes)] <- 0
rows.all <- c(clust.assess, row.clust)
# rownames(output.stats.df) <- clust.assess
output <- rbind(output.stats.df, cluster.sizes)[ ,-1]
colnames(output) <- stats.names[2:k]
rownames(output) <- rows.all
is.num <- sapply(output, is.numeric)
output[is.num] <- lapply(output[is.num], round, 2)
output
}
stats.df.divisive <- cstats.table(gower.dist, divisive.clust, 7)
stats.df.divisive
stats.df.aggl <-cstats.table(gower.dist, aggl.clust.c, 7)
#complete linkages looks like the most balanced approach
stats.df.aggl

What is this non numeric matrix extent error in R?

Im trying to apply a function onto my list but it returns this error
"non numeric matrix extent error"
here's my code
the error occurs in the last few lines
the code works fine up till the end, and because of this, im unable to plot my graphs
I've searched online but couldnt find anything that helps, and I cant see what's wrong with the code
#Question 1
set.seed(10000)
v <- c(0.1,0.5,1,2,5,10,100)
lyst <- list()
for(i in v)
{
for(j in v)
{
elementname <- paste0(as.character(i),"-",as.character(j))
print(elementname)
lyst[[elementname]] <- rgamma(10000,i,j)
}
}
#Question 2
pdf("Question2.pdf",width = 20, height = 10)
par(mfcol=c(7,7))
for(x in names(lyst))
{
hist(lyst[[x]],
xlab = "Value",
main = paste("Alpha-Lambda:",x))
}
dev.off()
#Question 3
theoretical_mean <- matrix(ncol=7,nrow=7,dimnames=list(as.character(v), as.character(v)))
theoretical_var <- matrix(ncol=7,nrow=7,dimnames=list(as.character(v), as.character(v)))
for (i in 1:7)
{
for (j in 1:7)
{
theoretical_mean[j,i] <- as.character(v[i]/v[j])
theoretical_var[j,i] <- as.character(v[i]/(v[j]^2))
}
}
sample_mean <-lapply(lyst, mean)
sample_mean <- as.data.frame(matrix(unlist(sample_mean),nrow = 7, ncol = 7, byrow = T))
sample_mean <- round(sample_mean,digits = 3)
sample_mean <- data.matrix(sample_mean, rownames.force = NA)
sample_var <-lapply(lyst, var)
sample_var <- as.data.frame(matrix(unlist(sample_var),nrow = 7, ncol = 7, byrow = T))
sample_var <- round(sample_var,digits = 3)
sample_var <- data.matrix(sample_var, rownames.force = NA)
theor_sample_mean <- matrix(paste(theoretical_mean, sample_mean, sep=" - "),nrow=7,dimnames = dimnames(theoretical_var))
theor_sample_var <- matrix(paste(theoretical_var, sample_var, sep=" - "),nrow=7,dimnames= dimnames(theoretical_var))
sink("Q3.txt")
cat("Theoretical Mean vs. Sample Mean:\n")
print(as.table(theor_sample_mean))
cat("\n")
cat("Theoretical Variance vs. Sample Variance:\n")
print(as.table(theor_sample_var))
sink()
#Question 4
nmean <- function(x)
{
m <- matrix(nrow=nrow(x))
for (j in 1:ncol(x))
{
v <- c()
for(i in 1:nrow(x))
{
v <- c(v,mean(x[1:i,j]))
}
m <- cbind(m,v)
}
m <- m[,-1]
colnames(m) <- colnames(x)
rownames(m) <- NULL
return(m)
}
sequentialMeans <- lapply(lyst,nmean)
pdf("Question4.pdf",width=15,height=10)
for (i in 1:7)
{
for (j in 1:7)
{
plot(y=sequentialMeans[[i]][,j],x=1:10000,xlab="n value",ylab="Values", main=paste("Alpha-Lambda:",colnames(lyst[[i]])[j]),type="l")
}
}
dev.off()
The problem with your code is that the data format of the input for the nmean function according to the lines
nmean <- function(x)
{
m <- matrix(nrow=nrow(x))
for (j in 1:ncol(x))
{
v <- c()
for(i in 1:nrow(x))
{
v <- c(v,mean(x[1:i,j]))
}
m <- cbind(m,v)
}
m <- m[,-1]
colnames(m) <- colnames(x)
rownames(m) <- NULL
return(m)
}
is a matrix and you want feed it vectors of gamma-distributed values as specified in the following lines
lyst <- list()
for(i in v)
{
for(j in v)
{
elementname <- paste0(as.character(i),"-",as.character(j))
print(elementname)
lyst[[elementname]] <- rgamma(10000,i,j)
}
}
For x that have type vector, the function ncol(x)and nrow(x)return NULL. Besides, there is also no application of ncol(x) possible.
If you want to save your approach you need to either think about transforming your data into matrix format or alternatively, use the vector format but use the vector-compatible functions length(x) for the length of the vector and names(lyst) for the names.
Update:
The code in the comments works but you got to change the lapply-statement as you now have a matrix that you can use as input for the nmean function directly. The following code works for generating sampleMeans and avoids the original error message of your question. In order to cut down runtime it only takes 100 samples.
#Question 1
set.seed(10000)
v <- c(0.1,0.5,1,2,5,10,100)
lyst <- list()
for(i in v)
{
for(j in v)
{
elementname <- paste0(as.character(i),"-",as.character(j))
print(elementname)
lyst[[elementname]] <- rgamma(100,i,j)
}
}
#Question 2
pdf("Question2.pdf",width = 20, height = 10)
par(mfcol=c(7,7))
for(x in names(lyst))
{
hist(lyst[[x]],
xlab = "Value",
main = paste("Alpha-Lambda:",x))
}
dev.off()
#Question 3
theoretical_mean <- matrix(ncol=7,nrow=7,dimnames=list(as.character(v), as.character(v)))
theoretical_var <- matrix(ncol=7,nrow=7,dimnames=list(as.character(v), as.character(v)))
for (i in 1:7)
{
for (j in 1:7)
{
theoretical_mean[j,i] <- as.character(v[i]/v[j])
theoretical_var[j,i] <- as.character(v[i]/(v[j]^2))
}
}
sample_mean <-lapply(lyst, mean)
sample_mean <- as.data.frame(matrix(unlist(sample_mean),nrow = 7, ncol = 7, byrow = T))
sample_mean <- round(sample_mean,digits = 3)
sample_mean <- data.matrix(sample_mean, rownames.force = NA)
sample_var <-lapply(lyst, var)
sample_var <- as.data.frame(matrix(unlist(sample_var),nrow = 7, ncol = 7, byrow = T))
sample_var <- round(sample_var,digits = 3)
sample_var <- data.matrix(sample_var, rownames.force = NA)
theor_sample_mean <- matrix(paste(theoretical_mean, sample_mean, sep=" - "),nrow=7,dimnames = dimnames(theoretical_var))
theor_sample_var <- matrix(paste(theoretical_var, sample_var, sep=" - "),nrow=7,dimnames= dimnames(theoretical_var))
sink("Q3.txt")
cat("Theoretical Mean vs. Sample Mean:\n")
print(as.table(theor_sample_mean))
cat("\n")
cat("Theoretical Variance vs. Sample Variance:\n")
print(as.table(theor_sample_var))
sink()
lyst = matrix(unlist(lyst), ncol = 7, byrow = TRUE)
colnames(lyst) = c("100-0.1","100-0.5","100-1","100-2","100-5","100-10","100-100")
#Question 4
nmean <- function(x)
{
m <- matrix(nrow=nrow(x))
for (j in 1:ncol(x))
{
v <- c()
for(i in 1:nrow(x))
{
v <- c(v,mean(x[1:i,j]))
}
m <- cbind(m,v)
}
m <- m[,-1]
colnames(m) <- colnames(x)
rownames(m) <- NULL
return(m)
}
sequentialMeans <- nmean(lyst)
Note also that you need to adjust the code for Q4, that is, the plot generation. The following code works.
pdf("Question4.pdf",width=15,height=10)
for (i in 1:7)
{
for (j in 1:7)
{
plot(y=sequentialMeans[,j],x=1:700,xlab="n value",ylab="Values", main=paste("Alpha-Lambda:",colnames(lyst[,j]),type="l"))
}
}
dev.off()
Let me know if this helps.

R error - Check for Remote errors returning multiple node issues

I am currently trying to run goodness of fit tests for data in the unmarked package. To do this I am using code written in the associated google group. This relies on parboot to assess the goodness of fit of the model. It then produces a Chi squared P value and C-hat value.
Strangely when I only perform >90 simulations of the model do I get the following error:
Error in checkForRemoteErrors(val) : 3 nodes produced errors;
first error: could not find function "mb.chisq.RN"
Below this number of simulations, the error is not encountered and the statistic can be computed.
I first run; mb.chisq.RN
mb.chisq.RN <- function (mod, print.table = TRUE, maxK=50,
...){
y.raw <- mod#data#y
N.raw <- nrow(y.raw)
na.raw <- apply(X = y.raw, MARGIN = 1, FUN = function(i) all(is.na(i)))
y.data <- y.raw[!na.raw, ]
N <- N.raw - sum(na.raw)
T <- ncol(y.data)
K <- 0:maxK
det.hist <- apply(X = y.data, MARGIN = 1, FUN = function(i) paste(i,
collapse = ""))
preds.lam <- predict(mod, type = "state")$Predicted
preds.p <- matrix(data = predict(mod, type = "det")$Predicted,
ncol = T, byrow = TRUE)
out.hist <- data.frame(det.hist, preds.lam)
un.hist <- unique(det.hist)
n.un.hist <- length(un.hist)
na.vals <- length(grep(pattern = "NA", x = un.hist)) > 0
if (na.vals) {
id.na <- grep(pattern = "NA", x = un.hist)
id.det.hist.na <- grep(pattern = "NA", x = det.hist)
cohort.na <- sort(un.hist[id.na])
n.cohort.na <- length(cohort.na)
unique.na <- gsub(pattern = "NA", replacement = "N",
x = cohort.na)
na.visits <- sapply(strsplit(x = unique.na, split = ""),
FUN = function(i) paste(ifelse(i == "N", 1, 0), collapse = ""))
names(cohort.na) <- na.visits
n.hist.missing.cohorts <- table(na.visits)
n.missing.cohorts <- length(n.hist.missing.cohorts)
out.hist.na <- out.hist[id.det.hist.na, ]
out.hist.na$det.hist <- droplevels(out.hist.na$det.hist)
just.na <- sapply(X = out.hist.na$det.hist, FUN = function(i) gsub(pattern = "1",
replacement = "0", x = i))
out.hist.na$coh <- sapply(X = just.na, FUN = function(i) gsub(pattern = "NA",
replacement = "1", x = i))
freqs.missing.cohorts <- table(out.hist.na$coh)
na.freqs <- table(det.hist[id.det.hist.na])
preds.p.na <- preds.p[id.det.hist.na, ]
cohort.not.na <- sort(un.hist[-id.na])
out.hist.not.na <- out.hist[-id.det.hist.na, ]
out.hist.not.na$det.hist <- droplevels(out.hist.not.na$det.hist)
n.cohort.not.na <- length(cohort.not.na)
n.sites.not.na <- length(det.hist) - length(id.det.hist.na)
preds.p.not.na <- preds.p[-id.det.hist.na, ]
}
else {
cohort.not.na <- sort(un.hist)
out.hist.not.na <- out.hist
preds.p.not.na <- preds.p
n.cohort.not.na <- length(cohort.not.na)
n.sites.not.na <- length(det.hist)
}
if (n.cohort.not.na > 0) {
exp.freqs <- rep(NA, n.cohort.not.na)
names(exp.freqs) <- cohort.not.na
for (i in 1:n.cohort.not.na) {
eq.solved <- rep(NA, n.sites.not.na)
select.hist <- cohort.not.na[i]
strip.hist <- unlist(strsplit(select.hist, split = ""))
hist.mat <- new.hist.mat <- new.hist.mat1 <- new.hist.mat0 <- matrix(NA, nrow = n.sites.not.na, ncol = T)
for (j in 1:n.sites.not.na) {
if (n.sites.not.na == 1) {
hist.mat[j,] <- preds.p.not.na
} else {hist.mat[j,] <- preds.p.not.na[j,]}
#Pr(y.ij=1|K)
p.k.mat <- sapply(hist.mat[j,],function(r){1-(1-r)^K})
new.hist.mat1[j,] <- dpois(K,out.hist.not.na[j, "preds.lam"]) %*% p.k.mat
new.hist.mat0[j,] <- dpois(K,out.hist.not.na[j, "preds.lam"]) %*% (1-p.k.mat)
new.hist.mat[j,] <- ifelse(strip.hist == "1",
new.hist.mat1[j,], ifelse(strip.hist == "0",
new.hist.mat0[j,], 0))
combo.lam.p <- paste(new.hist.mat[j, ], collapse = "*")
eq.solved[j] <- eval(parse(text = as.expression(combo.lam.p)))
}
exp.freqs[i] <- sum(eq.solved, na.rm = TRUE)
}
freqs <- table(out.hist.not.na$det.hist)
out.freqs <- matrix(NA, nrow = n.cohort.not.na, ncol = 4)
colnames(out.freqs) <- c("Cohort", "Observed", "Expected",
"Chi-square")
rownames(out.freqs) <- names(freqs)
out.freqs[, 1] <- 0
out.freqs[, 2] <- freqs
out.freqs[, 3] <- exp.freqs
out.freqs[, 4] <- ((out.freqs[, "Observed"] - out.freqs[,
"Expected"])^2)/out.freqs[, "Expected"]
}
if (na.vals) {
missing.cohorts <- list()
if (!is.matrix(preds.p.na)) {
preds.p.na <- matrix(data = preds.p.na, nrow = 1)
}
for (m in 1:n.missing.cohorts) {
select.cohort <- out.hist.na[which(out.hist.na$coh ==
names(freqs.missing.cohorts)[m]), ]
select.preds.p.na <- preds.p.na[which(out.hist.na$coh ==
names(freqs.missing.cohorts)[m]), ]
if (!is.matrix(select.preds.p.na)) {
select.preds.p.na <- matrix(data = select.preds.p.na,
nrow = 1)
}
select.preds.p.na[, gregexpr(pattern = "N", text = gsub(pattern = "NA",
replacement = "N", x = select.cohort$det.hist[1]))[[1]]] <- 1
n.total.sites <- nrow(select.cohort)
freqs.na <- table(droplevels(select.cohort$det.hist))
cohort.na.un <- sort(unique(select.cohort$det.hist))
n.hist.na <- length(freqs.na)
exp.na <- rep(NA, n.hist.na)
names(exp.na) <- cohort.na.un
for (i in 1:n.hist.na) {
n.sites.hist <- freqs.na[i]
eq.solved <- rep(NA, n.total.sites)
select.hist <- gsub(pattern = "NA", replacement = "N",
x = cohort.na.un[i])
strip.hist <- unlist(strsplit(select.hist, split = ""))
hist.mat <- new.hist.mat <- new.hist.mat1 <-new.hist.mat0 <- matrix(NA, nrow = n.total.sites, ncol = T)
for (j in 1:n.total.sites) {
hist.mat[j, ] <- select.preds.p.na[j, ]
#Pr(y.ij=1|K)
p.k.mat <- sapply(hist.mat[j,],function(r){1-(1-r)^K})
new.hist.mat1[j,] <- dpois(K,select.cohort[j, "preds.lam"]) %*% p.k.mat
new.hist.mat0[j,] <- dpois(K,select.cohort[j, "preds.lam"]) %*% (1-p.k.mat)
new.hist.mat[j,] <- ifelse(strip.hist == "1",
new.hist.mat1[j,], ifelse(strip.hist == "0",
new.hist.mat0[j,], 1))
combo.lam.p <- paste(new.hist.mat[j, ], collapse = "*")
eq.solved[j] <- eval(parse(text = as.expression(combo.lam.p)))
}
exp.na[i] <- sum(eq.solved, na.rm = TRUE)
}
out.freqs.na <- matrix(NA, nrow = n.hist.na, ncol = 4)
colnames(out.freqs.na) <- c("Cohort", "Observed",
"Expected", "Chi-square")
rownames(out.freqs.na) <- cohort.na.un
out.freqs.na[, 1] <- m
out.freqs.na[, 2] <- freqs.na
out.freqs.na[, 3] <- exp.na
out.freqs.na[, 4] <- ((out.freqs.na[, "Observed"] -
out.freqs.na[, "Expected"])^2)/out.freqs.na[,
"Expected"]
missing.cohorts[[m]] <- list(out.freqs.na = out.freqs.na)
}
}
if (na.vals) {
chisq.missing <- do.call("rbind", lapply(missing.cohorts,
FUN = function(i) i$out.freqs.na))
if (n.cohort.not.na > 0) {
chisq.unobs.det <- N - sum(out.freqs[, "Expected"]) -
sum(chisq.missing[, "Expected"])
chisq.table <- rbind(out.freqs, chisq.missing)
}
else {
chisq.unobs.det <- N - sum(chisq.missing[, "Expected"])
chisq.table <- chisq.missing
}
}
else {
chisq.unobs.det <- N - sum(out.freqs[, "Expected"])
chisq.na <- 0
chisq.table <- out.freqs
}
chisq <- sum(chisq.table[, "Chi-square"]) + chisq.unobs.det
if (print.table) {
out <- list(chisq.table = chisq.table, chi.square = chisq,
model.type = "single-season")
}
else {
out <- list(chi.square = chisq, model.type = "single-season")
}
class(out) <- "mb.chisq"
return(out)
}
Which will successfuly compute a Chi squared value.
I then run the test.
mb.gof.test.RN <- function (mod, nsim = 100, plot.hist = TRUE, ...){
mod.table <- mb.chisq.RN(mod)
out <- parboot(mod, statistic = function(i) mb.chisq.RN(i)$chi.square,
nsim = nsim)
p.value <- sum(out#t.star >= out#t0)/nsim
if (p.value == 0) {
p.display <- paste("<", 1/nsim)
}
else {
p.display = paste("=", round(p.value, digits = 4))
}
if (plot.hist) {
hist(out#t.star, main = paste("Bootstrapped MacKenzie and Bailey fit statistic (",
nsim, " samples)", sep = ""), xlim = range(c(out#t.star,
out#t0)), xlab = paste("Simulated statistic ", "(observed = ",
round(out#t0, digits = 2), ")", sep = ""))
title(main = bquote(paste(italic(P), " ", .(p.display))),
line = 0.5)
abline(v = out#t0, lty = "dashed", col = "red")
}
c.hat.est <- out#t0/mean(out#t.star)
gof.out <- list(model.type = mod.table$model.type, chisq.table = mod.table$chisq.table,
chi.square = mod.table$chi.square, t.star = out#t.star,
p.value = p.value, c.hat.est = c.hat.est, nsim = nsim)
class(gof.out) <- "mb.chisq"
return(gof.out)
}
>mb.gof.test.RN(fm9)
which produces the following error:
Error in checkForRemoteErrors(val) : 3 nodes produced errors;
first error: could not find function "mb.chisq.RN"
I'm not entirely sure why this error only occurs above a certain number of simulations so any pointers would be greatly received.

prediction.strength in Package fpc

I am using the function prediction.strength in the r Package fpc with k-medoids algorithms.
here is my code
prediction.strength(data,2,6,M=10,clustermethod=pamkCBI,DIST,krange=2:6,diss=TRUE,usepam=TRUE)
somehow I get the error message
Error in switch(method, kmeans = kmeans(xdata[indvec[[l]][[i]], ], k, :
EXPR must be a length 1 vector
Does anybody have experience with this r command? There are simple examples like
iriss <- iris[sample(150,20),-5]
prediction.strength(iriss,2,3,M=3,method="pam")
but my problem is that I am using dissimilarity matrix instead of the data itself for the k-medoids algorithms. I don't know how should I correct my code in this case.
Please note that in the package help the following is stated for the prediction.strength:
xdats - data (something that can be coerced into a matrix). Note that this can currently
not be a dissimilarity matrix.
I'm afraid you'll have to hack the function to get it to handle a distance matrix. I'm using the following:
pred <- function (distance, Gmin = 2, Gmax = 10, M = 50,
classification = "centroid", cutoff = 0.8, nnk = 1, ...)
{
require(cluster)
require(class)
xdata <- as.matrix(distance)
n <- nrow(xdata)
nf <- c(floor(n/2), n - floor(n/2))
indvec <- clcenters <- clusterings <- jclusterings <- classifications <- list()
prederr <- list()
dist <- as.matrix(distance)
for (k in Gmin:Gmax) {
prederr[[k]] <- numeric(0)
for (l in 1:M) {
nperm <- sample(n, n)
indvec[[l]] <- list()
indvec[[l]][[1]] <- nperm[1:nf[1]]
indvec[[l]][[2]] <- nperm[(nf[1] + 1):n]
for (i in 1:2) {
clusterings[[i]] <- as.vector(pam(as.dist(dist[indvec[[l]][[i]],indvec[[l]][[i]]]), k, diss=TRUE))
jclusterings[[i]] <- rep(-1, n)
jclusterings[[i]][indvec[[l]][[i]]] <- clusterings[[i]]$clustering
centroids <- clusterings[[i]]$medoids
j <- 3 - i
classifications[[j]] <- classifdist(as.dist(dist), jclusterings[[i]],
method = classification, centroids = centroids,
nnk = nnk)[indvec[[l]][[j]]]
}
ps <- matrix(0, nrow = 2, ncol = k)
for (i in 1:2) {
for (kk in 1:k) {
nik <- sum(clusterings[[i]]$clustering == kk)
if (nik > 1) {
for (j1 in (1:(nf[i] - 1))[clusterings[[i]]$clustering[1:(nf[i] -
1)] == kk]) {
for (j2 in (j1 + 1):nf[i]) if (clusterings[[i]]$clustering[j2] ==
kk)
ps[i, kk] <- ps[i, kk] + (classifications[[i]][j1] ==
classifications[[i]][j2])
}
ps[i, kk] <- 2 * ps[i, kk]/(nik * (nik -
1))
}
}
}
prederr[[k]][l] <- mean(c(min(ps[1, ]), min(ps[2,
])))
}
}
mean.pred <- numeric(0)
if (Gmin > 1)
mean.pred <- c(1)
if (Gmin > 2)
mean.pred <- c(mean.pred, rep(NA, Gmin - 2))
for (k in Gmin:Gmax) mean.pred <- c(mean.pred, mean(prederr[[k]]))
optimalk <- max(which(mean.pred > cutoff))
out <- list(predcorr = prederr, mean.pred = mean.pred, optimalk = optimalk,
cutoff = cutoff, method = clusterings[[1]]$clustermethod,
Gmax = Gmax, M = M)
class(out) <- "predstr"
out
}

Resources