how to organize the output of MLE using R - r

I wrote down this function for MLE estimation and then I apply it for different settings of parameters.
Finally, I bind all results for an output.
But is not working i have problem with the output and also I need to organize the output like the attached image using R program.
enter image description here
could some one help me please?
What should I fix and how can I print the results like the picture attached.
thank you in advance
rbssn<- function(n,alpha,beta)
{
if(!is.numeric(n)||!is.numeric(alpha)||!is.numeric(beta))
{stop("non-numeric argument to mathematical function")}
if(alpha<=0){ stop("alpha must be positive")}
if(beta<=0) { stop("beta must be positive") }
z <- rnorm(n,0,1)
r <- beta*((alpha*z*0.5)+sqrt((alpha*z*0.5)^2+1))^2
return(r)
}
#Function
mymle <- function(n,alpha,beta,rep)
{
theta=c(alpha,beta) # store starting values
#Tables
LHE=array(0, c(2,rep));
rownames(LHE)= c("MLE_alpha", "MLE_beta")
#Bias
bias= array(0, c(2,rep));
rownames(bias)= c("bias_alpha", "bias_beta")
#Simulation
set.seed(1)
#Loop
for(i in 1:rep){
myx <- exp(-rbssn(n, alpha, beta))
Score <- function(x) {
y <- numeric(2)
y[1] <- (-n/x[1])*(1+2/(x[1]^2)) - (1/(x[2]*x[1]^3))*sum(log(myx)) - (x[2]/(x[1]^3))*sum(1/log(myx))
y[2] <- -(n/(2*x[2])) + sum((1/(x[2]-log(myx)))) - (1/(2*(x[1]^2)*(x[2]^2)))*sum(log(myx)) + (1/(2*x[1]^2))*sum(1/(log(myx)))
y
}
Sin <- c(alpha,beta)
mle<- nleqslv(Sin, Score, control=list(btol=.01))[1]
LHE[i,]= mle
bias[i,]= c(mle[1]-theta[1], mle[2]-theta[2])
}
# end for i
#Format results
L <-round(apply(LHE, 1, mean), 3) # MLE of all the applied iterations
bs <-round(apply(bias,1, mean),3) # bias of all the applied iterations
row<- c(L, bs)
#Format a label
lab <- paste0('n= ',n,';',' alpha= ',alpha,';',' beta= ',beta)
row2 <- c(lab,row)
row2 <- as.data.frame(t(row2))
return(row2)
}
#Bind all
#Example 1
ex1 <- mymle(n = 20,alpha = 1,beta = 0.5,rep = 100)
ex2 <- mymle(n = 50,alpha = 2,beta = 0.5,rep = 100)
ex3 <- mymle(n = 100,alpha = 3,beta = 0.5,rep = 100)
#Example 2
ex4 <- mymle(n = 20,alpha = 0.5,beta = 0.5,rep = 100)
ex5 <- mymle(n = 50,alpha = 0.5,beta = 1,rep = 100)
ex6 <- mymle(n = 100,alpha = 0.5,beta = 1,rep = 100)
df <- rbind(ex1,ex2,ex3,ex4,ex5,ex6)
Any help will be appreciated.

Related

How can I use try catch for nls function in R

I am doing a regression for a Quadric Linear function. I got two option is to use either nlsLM and nls2. However, for some dataset, the use of nlsLM casing some problem such as: singular gradient matrix at initial parameter estimates or they ran in to an infinitie loop. I want to use the try catch to deal with this issue. Can anyone help me out? Thanks everyone in advance.
Here is the full code:
# Packages needed for estimaton of Ideal trajectory - nonlinear regression
#-------------------------------------------------------------------------------
library("minpack.lm")
library("nlstools")
library("nlsMicrobio")
library("stats")
library("tseries") #runs test for auto correlation
#Use NLS2
library(proto)
library(nls2)
################################################################
# Set working directory
setwd("C:/Users/Kevin Le/PycharmProjects/Pig Data Black Box - Copy")
#load dataset
load("Data/JRPData_TTC.Rdata") #load dataset created in MissingData.step
ID <- 5470
#Create a new dataframe which will store Data after ITC estimation
#Dataframe contains ITC parameters
ITC.param.pos2 <- data.frame(ANIMAL_ID=factor(),
X0=double(),
Y1=double(),
Y2=double(),
Ylast=double(),
a=double(),
b=double(),
c=double(),
d=double(),
stringsAsFactors=FALSE)
#Dataframe contains data points on the ITC
Data.remain <- data.frame(ANIMAL_ID=character(),
Age=double(),
obs.CFI=double(),
tt=double(),
ttt=double(),
stringsAsFactors=FALSE)
#===============================================================
# For loop for automatically estimating ITC of all pigs
#===============================================================
IDC <- seq_along(ID) # 17, 23, 52, 57, 116
for (idc in IDC){
# idc = 1
i <- ID[idc]
Data <- No.NA.Data.1[No.NA.Data.1$ANIMAL_ID == i,]
idc1 <- unique(as.numeric(Data$idc.1))
####### Create data frame of x (Age) and y (CFI) ########
x <- as.numeric(Data$Age.plot)
Y <- as.numeric(Data$CFI.plot)
Z <- as.numeric(Data$DFI.plot)
Data.xy <- as.data.frame(cbind(x,Y))
#Initial parameteres for parameter estimation
X0.0 <- x[1]
Xlast <- x[length(x)]
##################################################################
# 1. reparametrization CFI at X0 = 0
#function used for reparametrization in MAPLE
# solve({
# 0=a+b*X_0+c*X_0**2,
# DFIs=b+2*c*Xs,CFIs=a+b*Xs+c*Xs**2},
# {a,b,c});
# a = -X0*(2*CFIs*Xs-CFIs*X0-Xs^2*DFIs+Xs*DFIs*X0)/(Xs^2-2*X0*Xs+X0^2)
# b = (-Xs^2*DFIs+DFIs*X0^2+2*CFIs*Xs)/(Xs^2-2*X0*Xs+X0^2)
# c = -(CFIs-Xs*DFIs+X0*DFIs)/(Xs^2-2*X0*Xs+X0^2)
# 2. with the source of the function abcd and pred
##################################################################
#Provide set of initial parameters
Xs.1 <- round(seq(X0.0 + 1, Xlast - 1, len = 30), digits = 0)
X0.1 <- rep(X0.0, length(Xs.1))
DFIs.1 <- NULL
CFIs.1 <- NULL
for(A in seq_along(Xs.1)){
DFIs2 <- Data[Data$Age.plot == Xs.1[A],]$DFI.plot
CFIs2 <- Data[Data$Age.plot == Xs.1[A],]$CFI.plot
DFIs.1 <- c(DFIs.1, DFIs2)
CFIs.1 <- c(CFIs.1, CFIs2)
}
st1 <- data.frame(cbind(X0.1, Xs.1, DFIs.1, CFIs.1))
names(st1) <- c("X0","Xs", "DFIs","CFIs")
#RUN NLS2 to find optimal initial parameters
st2 <- nls2(Y ~ nls.func.2(X0, Xs, DFIs, CFIs),
Data.xy,
start = st1,
# weights = weight,
# trace = T,
algorithm = "brute-force")
par_init <- coef(st2); par_init
#--------------------------------------------
# Create empty lists to store data after loop
#--------------------------------------------
par <- list()
AC.res <- list()
AC.pvalue <- NULL
data2 <- list()
data3 <- list()
param <- data.frame(rbind(par_init))
par.abcd <- data.frame(rbind(abcd.2(as.vector(par_init))))
param.2 <- data.frame(X0=double(),
Xs=double(),
DFIs=double(),
CFIs=double(),
a=double(),
b=double(),
c=double(),
stringsAsFactors=FALSE)
j <- 2
AC_pvalue <- 0
AC.pvalue[1] <- AC_pvalue
datapointsleft <- as.numeric(dim(Data)[1])
dpl <- datapointsleft #vector of all dataponitsleft at each step
#-------------------------------------------------------------------------------
# Start the procedure of Non Linear Regression
#-------------------------------------------------------------------------------
while ((AC_pvalue<=0.05) && datapointsleft >= 20){
weight <- 1/Y^2
# ---------------- NON linear reg applied to log(Y) ---------------------------------
st2 <- nls2(Y ~ nls.func.2(X0, Xs, DFIs, CFIs),
Data.xy,
start = st1,
weights = weight,
trace = F,
algorithm = "brute-force")
par_init <- coef(st2)
par_init
# st1 <- st1[!(st1$Xs == par_init[2]),]
nls.CFI <- nlsLM(Y ~ nls.func.2(X0, Xs, DFIs, CFIs),
Data.xy,
control = list(tol = 1e-2, printEval = TRUE, maxiter = 1024),
start = list(X0 = par_init[1], Xs = par_init[2],
DFIs = par_init[3], CFIs = par_init[4]),
weights = weight,
algorithm = "port",
lower = c(-10000,X0.0+1, -10000, -10000),
upper = c(10000, Xlast-1, 10000, 10000),
trace = F)
# nls.CFI <- nls2(Y ~ nls.func.2(X0, Xs, DFIs, CFIs),
# Data.xy,
# start = list(X0 = par_init[1], Xs = par_init[2],
# DFIs = par_init[3], CFIs = par_init[4]),
# weights = weight,
# control = nls.control(warnOnly = TRUE),
# trace = T,
# algorithm = "port",
# lower = c(-100000000,X0.0+1, -1000000000, -1000000000),
# upper = c(1000000000, Xlast-1, 1000000000, 1000000000))
# nls.CFI <- nlsLM(Y ~ nls.func.2(X0, Xs, DFIs, CFIs),
# Data.xy,
# control = nls.control(warnOnly = TRUE),
# start = list(X0 = par_init[1], Xs = par_init[2],
# DFIs = par_init[3], CFIs = par_init[4]),
# weights = weight,
# algorithm = "port",
# lower = c(-1000000000,X0.0+1, -1000000000, -1000000000),
# upper = c(1000000000, Xlast-1, 1000000000, 1000000000),
# trace = F)
#--------RESULTS analysis GOODNESS of fit
#estimate params
par[[j]] <- coef(nls.CFI)
par.abcd[j,] <- abcd.2(as.vector(coef(nls.CFI) )) #calculation of a, b, c and d
param[j,] <- par[[j]]
param.2[j-1,] <- cbind(param[j,], par.abcd[j,])
#summary
# summ = overview((nls.CFI)) #summary
#residuals
res1 <- nlsResiduals(nls.CFI) #residuals
res2 <- nlsResiduals(nls.CFI)$resi1
res <- res2[, 2]
AC.res <- test.nlsResiduals(res1)
AC.pvalue[j] <- AC.res$p.value
#---------Check for negative residuals----------
#Add filtration step order to data
Step <- rep(j - 1, length(x))
#create a new dataset with predicted CFI included
Data.new <- data.frame(cbind(x, Z, Y, pred.func.2(par[[j]],x)[[1]], res, Step))
names(Data.new) <- c("Age", "Observed_DFI","Observed_CFI", "Predicted_CFI", "Residual", "Step")
# plot(Data.new$Age, Data.new$Predicted_CFI, type = "l", col = "black",lwd = 2,
# ylim = c(0, max(Data.new$Predicted_CFI, Data.new$Observed_CFI)))
# lines(Data.new$Age, Data.new$Observed_CFI, type = "p", cex = 1.5)
#
#remove negative res
Data.pos <- Data.new[!Data.new$Residual<0,]
# lines(Data.pos$Age, Data.pos$Predicted_CFI, type = "l", col = j-1, lwd = 2)
# lines(Data.pos$Age, Data.pos$Observed_CFI, type = "p", col = j, cex = 1.5)
#restart
#Criteria to stop the loop when the estimated parameters are equal to initial parameters
# Crite <- sum(param.2[dim(param.2)[1],c(1:4)] == par_init)
datapointsleft <- as.numeric(dim(Data.pos)[1])
par_init <- par[[j]]
AC_pvalue <- AC.pvalue[j]
j <- j+1
x <- Data.pos$Age
Y <- Data.pos$Observed_CFI
Z <- Data.pos$Observed_DFI
Data.xy <- as.data.frame(cbind(x,Y))
dpl <- c(dpl, datapointsleft)
dpl
#Create again the grid
X0.0 <- x[1]
Xlast <- x[length(x)]
#Xs
if(par_init[2] -15 <= X0.0){
Xs.1 <- round(seq(X0.0 + 5, Xlast - 5, len = 30), digits = 0)
} else if(par_init[2] + 5 >= Xlast){
Xs.1 <- round(seq(par_init[2]-10, par_init[2]-1, len = 6), digits = 0)
} else{
Xs.1 <- round(seq(par_init[2]-5, par_init[2] + 5, len = 6), digits = 0)
}
#
X0.1 <- rep(X0.0, length(Xs.1))
DFIs.1 <- NULL
CFIs.1 <- NULL
for(A in seq_along(Xs.1)){
DFIs2 <- Data[Data$Age.plot == Xs.1[A],]$DFI.plot
CFIs2 <- Data[Data$Age.plot == Xs.1[A],]$CFI.plot
DFIs.1 <- c(DFIs.1, DFIs2)
CFIs.1 <- c(CFIs.1, CFIs2)
}
st1 <- data.frame(cbind(X0.1, Xs.1, DFIs.1, CFIs.1))
if(X0.0 <= par_init[2] && Xlast >=par_init[2]){
st1 <- rbind(st1, par_init)
}
names(st1) <- c("X0","Xs", "DFIs","CFIs")
}
} # end FOR loop
Here is the data file. I have exported my data into the .Rdata for an easier import.: https://drive.google.com/file/d/1GVMarNKWMEyz-noSp1dhzKQNtu2uPS3R/view?usp=sharing
In this file, the set id: 5470 will have this error: singular gradient matrix at initial parameter estimates in this part:
nls.CFI <- nlsLM(Y ~ nls.func.2(X0, Xs, DFIs, CFIs),
Data.xy,
control = list(tol = 1e-2, printEval = TRUE, maxiter = 1024),
start = list(X0 = par_init[1], Xs = par_init[2],
DFIs = par_init[3], CFIs = par_init[4]),
weights = weight,
algorithm = "port",
lower = c(-10000,X0.0+1, -10000, -10000),
upper = c(10000, Xlast-1, 10000, 10000),
trace = F)
The complementary functions (file Function.R):
abcd.2 <- function(P){
X0 <- P[1]
Xs <- P[2]
DFIs <- P[3]
CFIs <- P[4]
a <- -X0*(2*CFIs*Xs-CFIs*X0-Xs^2*DFIs+Xs*DFIs*X0)/(Xs^2-2*X0*Xs+X0^2)
b <- (-Xs^2*DFIs+DFIs*X0^2+2*CFIs*Xs)/(Xs^2-2*X0*Xs+X0^2)
c <- -(CFIs-Xs*DFIs+X0*DFIs)/(Xs^2-2*X0*Xs+X0^2)
pp <- as.vector(c(a, b, c))
return(pp)
}
#--------------------------------------------------------------
# NLS function
#--------------------------------------------------------------
nls.func.2 <- function(X0, Xs, DFIs, CFIs){
pp <- c(X0, Xs, DFIs, CFIs)
#calculation of a, b and c using these new parameters
c <- abcd.2(pp)[3]
b <- abcd.2(pp)[2]
a <- abcd.2(pp)[1]
ind1 <- as.numeric(x < Xs)
return (ind1*(a+b*x+c*x^2)+(1-ind1)*((a+b*(Xs)+c*(Xs)^2)+(b+2*c*(Xs))*(x-(Xs))))
}
#--------------------------------------------------------------
# Fit new parameters to a quadratic-linear function of CFI
#--------------------------------------------------------------
pred.func.2 <- function(pr,age){
#
X0 <- pr[1]
Xs <- pr[2]
DFIs <- pr[3]
CFIs <- pr[4]
#
x <- age
#calculation of a, b and c using these new parameters
c <- abcd.2(pr)[3]
b <- abcd.2(pr)[2]
a <- abcd.2(pr)[1]
#
ind1 <- as.numeric(x < Xs)
#
results <- list()
cfi <- ind1*(a+b*x+c*x^2)+(1-ind1)*((a+b*(Xs)+c*(Xs)^2)+(b+2*c*(Xs))*(x-(Xs))) #CFI
dfi <- ind1*(b+2*c*x) + (1 - ind1)*(b+2*c*(Xs)) #DFI
results[[1]] <- cfi
results[[2]] <- dfi
return (results)
}
#---------------------------------------------------------------------------------------------------------------
# Quadratic-linear function of CFI curve and its 1st derivative (DFI) with original parameters (only a, b and c)
#---------------------------------------------------------------------------------------------------------------
pred.abcd.2 <- function(pr,age){
#
a <- pr[1]
b <- pr[2]
c <- pr[3]
x <- age
#calculation of a, b and c using these new parameters
#
ind1 <- as.numeric(x < Xs)
#
results <- list()
cfi <- ind1*(a+b*x+c*x^2)+(1-ind1)*((a+b*(Xs)+c*(Xs)^2)+(b+2*c*(Xs))*(x-(Xs))) #CFI
dfi <- ind1*(b+2*c*x) + (1 - ind1)*(b+2*c*(Xs)) #DFI
results[[1]] <- cfi
results[[2]] <- dfi
return (results)
}
Updated: I did review my logic from the previous step and found that my data is a bit messed up because of it. I have fixed it. The case where a set f data ran into an infinite loop has no longer exists, but this error is still there however: singular gradient matrix at initial parameter estimates.

Looping a function in R

I have written a cross validation/grid search style code in R that tries to find an optimal threshold value for a given value of mtry (using the random forest algorithm). I have posted my code below using the Sonar data from the library mlbench However, there seems to be some problems with this code.
library(caret)
library(mlbench)
library(randomForest)
res <- matrix(0, nrow = 10, ncol = 6)
colnames(res) <- c("mtry","Threshhold","Accuracy", "PositivePred", "NegativePred", "F-value")
out <- matrix(0, nrow = 17, ncol = 6)
colnames(out) <- c("mtry","Threshhold","Avg.Accuracy", "Avg.PosPred", "Avg.NegPred", "Avg.F_Value")
rep <- matrix(0, nrow = 10, ncol = 6)
colnames(out) <- c("mtry","Threshhold","Avg_Accuracy", "Avg_PosPred", "Avg_NegPred", "Avg_F_Value")
data(Sonar)
N=Sonar
### creating 10 folds
folds <- cut(seq(1,nrow(N)),breaks=10,labels=FALSE)
for (mtry in 5:14) {
K=mtry-4
for(thresh in seq(1,9,0.5)) {
J = 2*thresh-1
dataset<-N[sample(nrow(N)),] #### mix up the dataset N
for(I in 1:10){
#Segement your data by fold using the which() function
testIndexes <- which(folds==I,arr.ind=TRUE)
N_test <- dataset[testIndexes, ] ### select each fold for test
N_train <- dataset[-testIndexes, ] ### select rest for training
rf = randomForest(Class~., data = N_train, mtry=mtry, ntree=500)
pred = predict(rf, N_test, type="prob")
label = as.factor(ifelse(pred[,2]>=thresh,"M","R"))
confusion = confusionMatrix(N_test$Class, label)
res[I,1]=mtry
res[I,2]=thresh
res[I,3]=confusion$overall[1]
res[I,4]=confusion$byClass[3]
res[I,5]=confusion$byClass[4]
res[I,6]=confusion$byClass[7]
}
print(res)
out[J,1] = mtry
out[J,2] = thresh
out[J,3] = mean(res[,2])
out[J,4] = mean(res[,3])
out[J,5] = mean(res[,4])
out[J,6] = mean(res[,5])
}
print(out)
rep[K,1] = mtry
rep[K,2] = thresh
rep[K,3] = mean(out[,2])
rep[K,4] = mean(out[,3])
rep[K,5] = mean(out[,4])
rep[K,6] = mean(out[,5])
}
print(rep)
Earlier, I wrote a similar code with the "iris" dataset, and I did not seem to have any problems:
library(caret)
library(randomForest)
data(iris)
N <- iris
N$Species = ifelse(N$Species == "setosa", "a", "b")
N$Species = as.factor(N$Species)
res <- matrix(0, nrow = 10, ncol = 5)
colnames(res) <- c("Threshhold","Accuracy", "PositivePred", "NegativePred", "F-value")
out <- matrix(0, nrow = 9, ncol = 5)
colnames(out) <- c("Threshhold","Avg.Accuracy", "Avg.PosPred", "Avg.NegPred", "Avg.F_Value")
### creating 10 folds
folds <- cut(seq(1,nrow(N)),breaks=10,labels=FALSE)
for(J in 1:9) {
thresh = J/10
dataset<-N[sample(nrow(N)),] #### mix up the dataset N
for(I in 1:10){
#Segement your data by fold using the which() function
testIndexes <- which(folds==I,arr.ind=TRUE)
N_test <- dataset[testIndexes, ] ### select each fold for test
N_train <- dataset[-testIndexes, ] ### select rest for training
rf = randomForest(Species~., data = N_train, mtry=3, ntree=10)
pred = predict(rf, N_test, type="prob")
label = as.factor(ifelse(pred[,1]>=thresh,"a","b"))
confusion = confusionMatrix(N_test$Species, label)
res[I,1]=thresh
res[I,2]=confusion$overall[1]
res[I,3]=confusion$byClass[3]
res[I,4]=confusion$byClass[4]
res[I,5]=confusion$byClass[7]
}
print(res)
out[J,1] = thresh
out[J,2] = mean(res[,2])
out[J,3] = mean(res[,3])
out[J,4] = mean(res[,4])
out[J,5] = mean(res[,5])
}
print(out)
Could someone please assist me in debugging the first code?
Thanks
You need to close parenthesis ) in your for loop.
Replace this
for(thresh in seq(1,9,0.5) {
with
for(thresh in seq(1,9,0.5)) {
Update:
Also, it appears that your thresh is always above 1 giving a single value R in the label, as it is never above thresh.
label = as.factor(ifelse(pred[,2]>=thresh,"M","R"))
and that creates a problem in the next statement
confusion = confusionMatrix(N_test$Class, label)
I tested with 0.5, and I get no error.
label = as.factor(ifelse(pred[,2]>=0.5,"M","R"))
If you can define a better thresh - to stay between 0 and 1, you should be fine.

Avoiding Loops to Generate a Complex Dataframe with Nested Lists

Here is a kind of DF, I have to generate to store simulations data).
nbSimul <- 100
nbSampleSizes <- 4
nbCensoredRates <- 4
sampleSize <- c(100, 50, 30, 10)
censoredRate <- c(0.1, 0.3, 0.5, 0.8)
df.sampled <- data.frame(cas = numeric() ,
distribution = character(),
simul = numeric() ,
sampleSize = numeric() ,
censoredRate = numeric() ,
dta = I(list()) ,
quantileLD = I(list()) ,
stringsAsFactors = FALSE)
v <- 0 # Scenario indicator
for(k in 1:nbCensoredRates){
for(j in 1:nbSampleSizes){
for(i in 1:nbSimul){
# Scenario Id + Other info
v <- v + 1
df.sampled[v,"cas"] <- v
df.sampled[v,"distribution"] <- "logNormal"
df.sampled[v,"simul"] <- i
df.sampled[v,"sampleSize"] <- sampleSize[j]
df.sampled[v,"censoredRate"] <- censoredRate[k]
X <- rlnorm(sampleSize[j], meanlog = 0, sdlog = 1)
estimatedLD <- array(9)
for(w in 1:9){
estimatedLD[w] <- quantile(X, probs=censoredRate[k], type=w)[[1]]
}
df.sampled$dta[v] <- list(X)
df.sampled$quantileLD[v] <- list(estimatedLD[1:9])
}
}
}
Which is quite difficult to read.
I would like to find a way to avoid loops, and to reference easily scenarios (v) and attached variables.
Any idea?

Function inputs from a list

How can I run a function (in R) where some of the inputs are pulled from a list (or data frame)? Am I right in thinking that this would be more efficient than running a for-loop?
I am running simulations and want to change the variable values, but as they take a long time to run I want them to run overnight and to just tick through the different values automatically.
Here's the code for the function:
n = 10000
mu = 0
sd = 1
n.sub = 100
iboot = 100
isim = 1000 ### REDUCED FOR THIS EXAMPLE ###
var.values <- NULL
var.values.pop <- NULL
hist.fn <- function(n,mu,sd,n.sub,iboot)
{
Pop <- rnorm(n,mu,sd)
var.pop <- var(Pop)
Samp <- sample(Pop, n.sub, replace = FALSE)
var.samp <- var(Samp)
for(i in 1:isim) {
for(j in 1:iboot) {
Boot <- sample(Samp, n.sub, replace = TRUE)
var.values[j] <- var(Boot)
}
Samp <- sample(Pop, n.sub, replace = FALSE)
var.values.pop[i] <- var(Samp)
}
hist.pop <- hist(var.values.pop,plot=F)
hist.boot <- hist(var.values,plot=F)
#mypath = file.path("C:", "Output", paste("hist.boot_n.", n.sub, "_var.", sd^2, "_isim.", isim, "_iboot.", iboot, ".wmf", sep=""))
#win.metafile(file=mypath)
plot.new() #### ADDED FOR THIS EXAMPLE INSTEAD OF OUTPUTTING TO FILE ####
plot(hist.pop, freq=FALSE, xlim=range(var.values.pop, var.values), ylim=range(hist.pop$density, hist.boot$density), main = paste("Histogram of variances \n n=",n.sub," mu=",mu,"var=",sd^2,"\n n.sim=",isim,"n.boot=",iboot,"\n"), cex.main=0.8, xlab="Variance", col="red")
plot(hist.boot, freq=FALSE, col="blue", border="blue", add=T, density=20, angle=45)
abline(v=var.pop, lty=2, col="black", lwd=2)
legend("topright", legend=c("sample","bootstrap"),col=c("red","blue"),lty=1,lwd=2,bty="n",cex=0.7)
#dev.off()
}
hist.fn(n,mu,sd,n.sub,iboot)
Then I want sd, n.sub, and iboot to change by running through the following values:
sd <- c(1,10,100,1000)
n.sub <- c(4,10,100,1000)
iboot <- c(100,1000,10000)
Perhaps something like this?
n = 10000
mu = 0
sd = 1
n.sub = 100
iboot = 100
isim = 1000
sd <- c(1,10,100,1000)
n.sub <- c(4,10,100,1000)
iboot <- c(100,1000,10000)
# hist.fn parameters: n,mu,sd,n.sub,iboot
params <- expand.grid(n = n, mu = mu, sd = sd,
n.sub = n.sub, iboot = iboot)
apply(params, 1, FUN = function(x) do.call(hist.fn, as.list(x) ) )
You probably want to put these:
var.values <- NULL
var.values.pop <- NULL
Inside hist.fn, because assigning values to variables outside a function doesn't work like you seem to think.
You should use do.call, which will apply the function using arguments in a list. I have simplified your example to run less loops for the example. You can modify the printline of the script in order to monitor your progress for a larger job:
# The function
hist.fn <- function(n,mu,isim,sd,n.sub,iboot)
{
Pop <- rnorm(n,mu,sd)
var.pop <- var(Pop)
Samp <- sample(Pop, n.sub, replace = FALSE)
var.samp <- var(Samp)
var.values <- NaN*seq(isim) # sets up an empty vector for results
var.values.pop <- NaN*seq(isim) # sets up an empty vector for results
for(i in seq(isim)) {
for(j in seq(iboot)) {
Boot <- sample(Samp, n.sub, replace = TRUE)
var.values[j] <- var(Boot)
print(paste("i =", i, "; j =", j))
}
Samp <- sample(Pop, n.sub, replace = FALSE)
var.values.pop[i] <- var(Samp)
}
list(var.values=var.values, var.values.pop=var.values.pop) #returns results in the form of a list
}
# Global variables
n = 100
mu = 0
isim = 10
# Changing variables
sd <- c(1,10,20,30)
n.sub <- c(4,10,20,30)
iboot <- c(100,200,300,400)
df <- data.frame(sd=sd, n.sub=n.sub, iboot=iboot)
res <- vector(mode="list", nrow(df)) # sets up an empty list for results
for(i in seq(nrow(df))){
res[[i]] <- do.call(hist.fn, c(n=n, mu=mu, isim=isim, df[i,]) )
}
res # show results
sd <- 1:3
n.sub <- 4:6
iboot <- 7:9
funct1<-function(x,y,z) print(x+y+z)
for (i in 1:length(sd)){
funct1(sd[i],n.sub[i],iboot[i])
}
just an example. Doing it with loop.

prediction.strength in Package fpc

I am using the function prediction.strength in the r Package fpc with k-medoids algorithms.
here is my code
prediction.strength(data,2,6,M=10,clustermethod=pamkCBI,DIST,krange=2:6,diss=TRUE,usepam=TRUE)
somehow I get the error message
Error in switch(method, kmeans = kmeans(xdata[indvec[[l]][[i]], ], k, :
EXPR must be a length 1 vector
Does anybody have experience with this r command? There are simple examples like
iriss <- iris[sample(150,20),-5]
prediction.strength(iriss,2,3,M=3,method="pam")
but my problem is that I am using dissimilarity matrix instead of the data itself for the k-medoids algorithms. I don't know how should I correct my code in this case.
Please note that in the package help the following is stated for the prediction.strength:
xdats - data (something that can be coerced into a matrix). Note that this can currently
not be a dissimilarity matrix.
I'm afraid you'll have to hack the function to get it to handle a distance matrix. I'm using the following:
pred <- function (distance, Gmin = 2, Gmax = 10, M = 50,
classification = "centroid", cutoff = 0.8, nnk = 1, ...)
{
require(cluster)
require(class)
xdata <- as.matrix(distance)
n <- nrow(xdata)
nf <- c(floor(n/2), n - floor(n/2))
indvec <- clcenters <- clusterings <- jclusterings <- classifications <- list()
prederr <- list()
dist <- as.matrix(distance)
for (k in Gmin:Gmax) {
prederr[[k]] <- numeric(0)
for (l in 1:M) {
nperm <- sample(n, n)
indvec[[l]] <- list()
indvec[[l]][[1]] <- nperm[1:nf[1]]
indvec[[l]][[2]] <- nperm[(nf[1] + 1):n]
for (i in 1:2) {
clusterings[[i]] <- as.vector(pam(as.dist(dist[indvec[[l]][[i]],indvec[[l]][[i]]]), k, diss=TRUE))
jclusterings[[i]] <- rep(-1, n)
jclusterings[[i]][indvec[[l]][[i]]] <- clusterings[[i]]$clustering
centroids <- clusterings[[i]]$medoids
j <- 3 - i
classifications[[j]] <- classifdist(as.dist(dist), jclusterings[[i]],
method = classification, centroids = centroids,
nnk = nnk)[indvec[[l]][[j]]]
}
ps <- matrix(0, nrow = 2, ncol = k)
for (i in 1:2) {
for (kk in 1:k) {
nik <- sum(clusterings[[i]]$clustering == kk)
if (nik > 1) {
for (j1 in (1:(nf[i] - 1))[clusterings[[i]]$clustering[1:(nf[i] -
1)] == kk]) {
for (j2 in (j1 + 1):nf[i]) if (clusterings[[i]]$clustering[j2] ==
kk)
ps[i, kk] <- ps[i, kk] + (classifications[[i]][j1] ==
classifications[[i]][j2])
}
ps[i, kk] <- 2 * ps[i, kk]/(nik * (nik -
1))
}
}
}
prederr[[k]][l] <- mean(c(min(ps[1, ]), min(ps[2,
])))
}
}
mean.pred <- numeric(0)
if (Gmin > 1)
mean.pred <- c(1)
if (Gmin > 2)
mean.pred <- c(mean.pred, rep(NA, Gmin - 2))
for (k in Gmin:Gmax) mean.pred <- c(mean.pred, mean(prederr[[k]]))
optimalk <- max(which(mean.pred > cutoff))
out <- list(predcorr = prederr, mean.pred = mean.pred, optimalk = optimalk,
cutoff = cutoff, method = clusterings[[1]]$clustermethod,
Gmax = Gmax, M = M)
class(out) <- "predstr"
out
}

Resources