can somebody help me please with my cluster analysis?
I try to put values in an empty vector inside a while loop and are not able to manage it.
I am searching for the minimum value of my distance matrix and then put these values in every step of the while loop into a vector.
You can find the part of the code at variable "meltLevel" in the middle of the code.
Thanks in advance.
standardMatrix <- matrix (runif(25, max = 50, min = 1), nrow = 5, ncol = 5)
print(standardMatrix)
dist <- function(x1, x2) sqrt(sum((x1 - x2)^2))
distanceMatrix <- matrix(, nrow=5, ncol=5)
for (x in 1:4) {
for (y in (x+1):5) {
vector1 <- standardMatrix[x,];
vector2 <- standardMatrix[y,];
distanceMatrix[x,y] <- dist(vector1, vector2);
}
}
distanceMatrix <- t(distanceMatrix)
print(distanceMatrix)
while(dim(distanceMatrix)[1] > 2) {
distanceMatrix <- rbind(distanceMatrix, NA)
distanceMatrix <- cbind(distanceMatrix, NA)
newInd <- dim(distanceMatrix)[1]
endInd <- dim(distanceMatrix)[1] - 1
meltLevel <- numeric(0)
meltLevel <- c(meltLevel, min(distanceMatrix, na.rm = TRUE))
print(meltLevel)
minVal <- which(distanceMatrix == min(distanceMatrix, na.rm=TRUE), arr.ind=TRUE)
print(minVal)
endInd = dim(distanceMatrix)[1]-1
for(col in 1:endInd){
if(!(col == minVal[1] || col == minVal[2])) {
v <- c(distanceMatrix[minVal[1],col],distanceMatrix[col,minVal[1]],distanceMatrix[col,minVal[2]],distanceMatrix[minVal[2],col])
minV <- min(v, na.rm=TRUE)
distanceMatrix[newInd,col] = minV
}
}
maxMinVal <- max(minVal)
minMinVal <- min(minVal)
distanceMatrix <- distanceMatrix[-maxMinVal,]
distanceMatrix <- distanceMatrix[-minMinVal,]
distanceMatrix <- distanceMatrix[, -maxMinVal]
distanceMatrix <- distanceMatrix[, -minMinVal]
print(distanceMatrix)
}
print(meltLevel)
Related
I am trying to apply the following for-loop to every matrices in the list per_d and create a new list called per_hole. I am not sure how to do this, should I use lapply?
Thank you very much in advance for your helps!
per_hole <- per_d
for (i in 1:S) {
for (j in 1:t){
if (per_hole [i,j] > CS) {
per_hole [i,j] <- per_hole [i,j] - rnorm (1, mean = 1, sd = 0.5)
} else {
per_hole [i,j] <- per_hole [i,j] + rnorm (1, mean = 1, sd = 0.5)
}}}
codes for reproduction
N <- 1
CS <- 10.141
S <- seq (7.72,13,0.807)
t <- 15
l <- length (S)
m0 <- 100
exps <- c(0.2, 0.5, 0.9, 1.5, 2)
sd_per <- c(0.2, 0.5, 0.8, 1.3, 1.8)
sd_noise <- 3
per <- lapply(sd_per, function(x){
per <- matrix(nrow = length(S)*N, ncol = t+1)
for (i in 1:dim(per)[1]) {
for (j in 1:t+1){
per [,1] <- replicate (n = N, S)
per [i,j] <- round (abs (rnorm (1, mean = per[i,1], sd =x)),digits=3)
colnames(per) <- c('physical',paste('t', 1:15, sep = ""))
per <- as.data.frame (per)
}
}
per <- per [,-1]
return(per)
}
)
names(per) <- paste("per", seq_along(sd_per), sep = "")
per_d <- lapply(per, function(x){
per_d <- abs (x - 10.141)
}
)
names(per_d) <- paste("per_d", seq_along(sd_per), sep = "")
You can try
per_hole <- lapply(per_d,function(x) x + ifelse(x>CS,-1,1)*rnorm(prod(dim(x)),1,0.5))
or
per_hole <- lapply(per_d, function(x) x + rnorm(prod(dim(x)), 1-2*(x > CS), 0.5))
Im trying to apply a function onto my list but it returns this error
"non numeric matrix extent error"
here's my code
the error occurs in the last few lines
the code works fine up till the end, and because of this, im unable to plot my graphs
I've searched online but couldnt find anything that helps, and I cant see what's wrong with the code
#Question 1
set.seed(10000)
v <- c(0.1,0.5,1,2,5,10,100)
lyst <- list()
for(i in v)
{
for(j in v)
{
elementname <- paste0(as.character(i),"-",as.character(j))
print(elementname)
lyst[[elementname]] <- rgamma(10000,i,j)
}
}
#Question 2
pdf("Question2.pdf",width = 20, height = 10)
par(mfcol=c(7,7))
for(x in names(lyst))
{
hist(lyst[[x]],
xlab = "Value",
main = paste("Alpha-Lambda:",x))
}
dev.off()
#Question 3
theoretical_mean <- matrix(ncol=7,nrow=7,dimnames=list(as.character(v), as.character(v)))
theoretical_var <- matrix(ncol=7,nrow=7,dimnames=list(as.character(v), as.character(v)))
for (i in 1:7)
{
for (j in 1:7)
{
theoretical_mean[j,i] <- as.character(v[i]/v[j])
theoretical_var[j,i] <- as.character(v[i]/(v[j]^2))
}
}
sample_mean <-lapply(lyst, mean)
sample_mean <- as.data.frame(matrix(unlist(sample_mean),nrow = 7, ncol = 7, byrow = T))
sample_mean <- round(sample_mean,digits = 3)
sample_mean <- data.matrix(sample_mean, rownames.force = NA)
sample_var <-lapply(lyst, var)
sample_var <- as.data.frame(matrix(unlist(sample_var),nrow = 7, ncol = 7, byrow = T))
sample_var <- round(sample_var,digits = 3)
sample_var <- data.matrix(sample_var, rownames.force = NA)
theor_sample_mean <- matrix(paste(theoretical_mean, sample_mean, sep=" - "),nrow=7,dimnames = dimnames(theoretical_var))
theor_sample_var <- matrix(paste(theoretical_var, sample_var, sep=" - "),nrow=7,dimnames= dimnames(theoretical_var))
sink("Q3.txt")
cat("Theoretical Mean vs. Sample Mean:\n")
print(as.table(theor_sample_mean))
cat("\n")
cat("Theoretical Variance vs. Sample Variance:\n")
print(as.table(theor_sample_var))
sink()
#Question 4
nmean <- function(x)
{
m <- matrix(nrow=nrow(x))
for (j in 1:ncol(x))
{
v <- c()
for(i in 1:nrow(x))
{
v <- c(v,mean(x[1:i,j]))
}
m <- cbind(m,v)
}
m <- m[,-1]
colnames(m) <- colnames(x)
rownames(m) <- NULL
return(m)
}
sequentialMeans <- lapply(lyst,nmean)
pdf("Question4.pdf",width=15,height=10)
for (i in 1:7)
{
for (j in 1:7)
{
plot(y=sequentialMeans[[i]][,j],x=1:10000,xlab="n value",ylab="Values", main=paste("Alpha-Lambda:",colnames(lyst[[i]])[j]),type="l")
}
}
dev.off()
The problem with your code is that the data format of the input for the nmean function according to the lines
nmean <- function(x)
{
m <- matrix(nrow=nrow(x))
for (j in 1:ncol(x))
{
v <- c()
for(i in 1:nrow(x))
{
v <- c(v,mean(x[1:i,j]))
}
m <- cbind(m,v)
}
m <- m[,-1]
colnames(m) <- colnames(x)
rownames(m) <- NULL
return(m)
}
is a matrix and you want feed it vectors of gamma-distributed values as specified in the following lines
lyst <- list()
for(i in v)
{
for(j in v)
{
elementname <- paste0(as.character(i),"-",as.character(j))
print(elementname)
lyst[[elementname]] <- rgamma(10000,i,j)
}
}
For x that have type vector, the function ncol(x)and nrow(x)return NULL. Besides, there is also no application of ncol(x) possible.
If you want to save your approach you need to either think about transforming your data into matrix format or alternatively, use the vector format but use the vector-compatible functions length(x) for the length of the vector and names(lyst) for the names.
Update:
The code in the comments works but you got to change the lapply-statement as you now have a matrix that you can use as input for the nmean function directly. The following code works for generating sampleMeans and avoids the original error message of your question. In order to cut down runtime it only takes 100 samples.
#Question 1
set.seed(10000)
v <- c(0.1,0.5,1,2,5,10,100)
lyst <- list()
for(i in v)
{
for(j in v)
{
elementname <- paste0(as.character(i),"-",as.character(j))
print(elementname)
lyst[[elementname]] <- rgamma(100,i,j)
}
}
#Question 2
pdf("Question2.pdf",width = 20, height = 10)
par(mfcol=c(7,7))
for(x in names(lyst))
{
hist(lyst[[x]],
xlab = "Value",
main = paste("Alpha-Lambda:",x))
}
dev.off()
#Question 3
theoretical_mean <- matrix(ncol=7,nrow=7,dimnames=list(as.character(v), as.character(v)))
theoretical_var <- matrix(ncol=7,nrow=7,dimnames=list(as.character(v), as.character(v)))
for (i in 1:7)
{
for (j in 1:7)
{
theoretical_mean[j,i] <- as.character(v[i]/v[j])
theoretical_var[j,i] <- as.character(v[i]/(v[j]^2))
}
}
sample_mean <-lapply(lyst, mean)
sample_mean <- as.data.frame(matrix(unlist(sample_mean),nrow = 7, ncol = 7, byrow = T))
sample_mean <- round(sample_mean,digits = 3)
sample_mean <- data.matrix(sample_mean, rownames.force = NA)
sample_var <-lapply(lyst, var)
sample_var <- as.data.frame(matrix(unlist(sample_var),nrow = 7, ncol = 7, byrow = T))
sample_var <- round(sample_var,digits = 3)
sample_var <- data.matrix(sample_var, rownames.force = NA)
theor_sample_mean <- matrix(paste(theoretical_mean, sample_mean, sep=" - "),nrow=7,dimnames = dimnames(theoretical_var))
theor_sample_var <- matrix(paste(theoretical_var, sample_var, sep=" - "),nrow=7,dimnames= dimnames(theoretical_var))
sink("Q3.txt")
cat("Theoretical Mean vs. Sample Mean:\n")
print(as.table(theor_sample_mean))
cat("\n")
cat("Theoretical Variance vs. Sample Variance:\n")
print(as.table(theor_sample_var))
sink()
lyst = matrix(unlist(lyst), ncol = 7, byrow = TRUE)
colnames(lyst) = c("100-0.1","100-0.5","100-1","100-2","100-5","100-10","100-100")
#Question 4
nmean <- function(x)
{
m <- matrix(nrow=nrow(x))
for (j in 1:ncol(x))
{
v <- c()
for(i in 1:nrow(x))
{
v <- c(v,mean(x[1:i,j]))
}
m <- cbind(m,v)
}
m <- m[,-1]
colnames(m) <- colnames(x)
rownames(m) <- NULL
return(m)
}
sequentialMeans <- nmean(lyst)
Note also that you need to adjust the code for Q4, that is, the plot generation. The following code works.
pdf("Question4.pdf",width=15,height=10)
for (i in 1:7)
{
for (j in 1:7)
{
plot(y=sequentialMeans[,j],x=1:700,xlab="n value",ylab="Values", main=paste("Alpha-Lambda:",colnames(lyst[,j]),type="l"))
}
}
dev.off()
Let me know if this helps.
my code is as follows:
x <- data.frame(matrix(rnorm(20), nrow=10))
colnames(x) <- c("z", "m")
n_boot<-4
bs <- list()
for (i in 1:n_boot) {
bs[[i]] <- x[sample(nrow(x), 10, replace = TRUE), ]
}
bt<-matrix(unlist(bs), ncol = 2*n_boot, byrow = FALSE)
colnames(bt) <- rep(c("z","m"),times=n_boot)
M_to_boot <- bt[,seq(2,8,by=2)]
funct<-function(M_boot_max) {
od<-(1/((10*((10^((16-M_boot_max-25)/5))^3)/3)*((max(M_boot_max)-min(M_boot_max))/50)))
}
V_boot<-apply(M_to_boot,2,funct)
rows.combined <- nrow(M_to_boot)
cols.combined <- ncol(M_to_boot) + ncol(V_boot)
matrix.combined <- matrix(NA, nrow=rows.combined, ncol=cols.combined)
matrix.combined[, seq(1, cols.combined, 2)] <- M_to_boot
matrix.combined[, seq(2, cols.combined, 2)] <- V_boot
colnames(matrix.combined) <- rep(c("M_boot","V_boot"),times=n_boot)
df<-as.data.frame(matrix.combined)
start0 <- seq(1, by = 2, length = ncol(df) / 2)
start <- lapply(start0, function(i, df) df[i:(i+1)], df = df)
tests<-lapply(start, function(xy) split(xy, cut(xy$M_boot,breaks=5)))
Now I want to prepare some calculations on values V_boot from a sublists. To be specific I want to for each subsample calculate the sum of V_boot. So, for example I want for a bin M_boot "[[4]]$(0.811,1.25]" to have a value of sum(V_boot) for that bin. But I cannot figure out how to get to that each V_boot values.
Please help me.
for(i in 1:100)
{
for(j in 1:100)
{
hdist <- rgeos::gDistance(xySpatialLines[[i]], xySpatialLines[[j]], byid=FALSE, hausdorff=TRUE)
distances[i,j] <- dist
}
}
Is there any way to simplify j loop to get something like this:
for(i in 1:100)
{
distances[i,j] <- lapply(???) # or sapply?
}
UPDATE:
The data stored in xySpatialLines[[i]] - these are SpatialLines objects:
library(sp)
xySpatialLines <- vector(mode = "list", length = 2)
x1 <- c(1,4,5,3,2)
y1 <- c(2,5,3,6,7)
x2 <- c(4,4,6,3,2)
y2 <- c(8,5,2,6,1)
xy1 <- cbind(x1,y1)
xy2 <- cbind(x2,y2)
xy1.sp = sp::SpatialPoints(xy1)
xy2.sp = sp::SpatialPoints(xy2)
spl1 <- sp::SpatialLines(list(Lines(Line(xy1.sp), ID="a")))
spl2 <- sp::SpatialLines(list(Lines(Line(xy2.sp), ID="b")))
xySpatialLines[[1]] = spl1
xySpatialLines[[2]] = spl2
You can use outer:
distances = outer(xySpatialLines, xySpatialLines, FUN = gDistance, byid=FALSE, hausdorff = TRUE)
I am using the function prediction.strength in the r Package fpc with k-medoids algorithms.
here is my code
prediction.strength(data,2,6,M=10,clustermethod=pamkCBI,DIST,krange=2:6,diss=TRUE,usepam=TRUE)
somehow I get the error message
Error in switch(method, kmeans = kmeans(xdata[indvec[[l]][[i]], ], k, :
EXPR must be a length 1 vector
Does anybody have experience with this r command? There are simple examples like
iriss <- iris[sample(150,20),-5]
prediction.strength(iriss,2,3,M=3,method="pam")
but my problem is that I am using dissimilarity matrix instead of the data itself for the k-medoids algorithms. I don't know how should I correct my code in this case.
Please note that in the package help the following is stated for the prediction.strength:
xdats - data (something that can be coerced into a matrix). Note that this can currently
not be a dissimilarity matrix.
I'm afraid you'll have to hack the function to get it to handle a distance matrix. I'm using the following:
pred <- function (distance, Gmin = 2, Gmax = 10, M = 50,
classification = "centroid", cutoff = 0.8, nnk = 1, ...)
{
require(cluster)
require(class)
xdata <- as.matrix(distance)
n <- nrow(xdata)
nf <- c(floor(n/2), n - floor(n/2))
indvec <- clcenters <- clusterings <- jclusterings <- classifications <- list()
prederr <- list()
dist <- as.matrix(distance)
for (k in Gmin:Gmax) {
prederr[[k]] <- numeric(0)
for (l in 1:M) {
nperm <- sample(n, n)
indvec[[l]] <- list()
indvec[[l]][[1]] <- nperm[1:nf[1]]
indvec[[l]][[2]] <- nperm[(nf[1] + 1):n]
for (i in 1:2) {
clusterings[[i]] <- as.vector(pam(as.dist(dist[indvec[[l]][[i]],indvec[[l]][[i]]]), k, diss=TRUE))
jclusterings[[i]] <- rep(-1, n)
jclusterings[[i]][indvec[[l]][[i]]] <- clusterings[[i]]$clustering
centroids <- clusterings[[i]]$medoids
j <- 3 - i
classifications[[j]] <- classifdist(as.dist(dist), jclusterings[[i]],
method = classification, centroids = centroids,
nnk = nnk)[indvec[[l]][[j]]]
}
ps <- matrix(0, nrow = 2, ncol = k)
for (i in 1:2) {
for (kk in 1:k) {
nik <- sum(clusterings[[i]]$clustering == kk)
if (nik > 1) {
for (j1 in (1:(nf[i] - 1))[clusterings[[i]]$clustering[1:(nf[i] -
1)] == kk]) {
for (j2 in (j1 + 1):nf[i]) if (clusterings[[i]]$clustering[j2] ==
kk)
ps[i, kk] <- ps[i, kk] + (classifications[[i]][j1] ==
classifications[[i]][j2])
}
ps[i, kk] <- 2 * ps[i, kk]/(nik * (nik -
1))
}
}
}
prederr[[k]][l] <- mean(c(min(ps[1, ]), min(ps[2,
])))
}
}
mean.pred <- numeric(0)
if (Gmin > 1)
mean.pred <- c(1)
if (Gmin > 2)
mean.pred <- c(mean.pred, rep(NA, Gmin - 2))
for (k in Gmin:Gmax) mean.pred <- c(mean.pred, mean(prederr[[k]]))
optimalk <- max(which(mean.pred > cutoff))
out <- list(predcorr = prederr, mean.pred = mean.pred, optimalk = optimalk,
cutoff = cutoff, method = clusterings[[1]]$clustermethod,
Gmax = Gmax, M = M)
class(out) <- "predstr"
out
}