I can't see model results in R2jags - what is wrong? - r

I'm trying to run this script in R2jags following the instructions provided in "Lahoz-Monfort JJ, Guillera-Arroita G, Tingley R (2015) Statistical approaches to account for false positive errors in environmental DNA
samples. Molecular Ecology Resources, 16, 673–685." and it seems that it worked ok, but I can't figure out the command to see the results... could anyone please help?
cat("model {
# Priors
psi ~ dunif(0,1)
p11 ~ dunif(0,1)
p10 ~ dunif(0,p10_max)
# Likelihood
for (i in 1:S){
z[i] ~ dbern(psi)
p[i] <- z[i]*p11 + (1-z[i])*p10
for (j in 1:K){
Y[i,j] ~ dbern(p[i])
}
}
} ",fill=TRUE)
sink()
Bayesian <- function(psi,p11,p10,S,K,nsims=100,doprint=TRUE,p10_max=0.05,
ni=100000,nt=2,nc=1,nb=50000,myparallel=TRUE) {
psihat<-p11hat<-p10hat<-rep(nsims)
modelSummaries<-list()
for(ii in 1:nsims){
if (doprint) cat("\r", ii, "of", nsims," ")
hh<-genSimData(psi,r11=0,p11,p10,S,K1=0,K2=K)
# fit the model
jags.inits <-function()(list(psi=runif(1,0.05,0.95),p11=runif(1,p10_max,1),p10=runif(1,0,p10_max)))
jags.data <-list(Y=hh,S=S,K=K,p10_max=p10_max)
jags.params<-c("psi","p11","p10")
Thoropa_model<-jags(data = jags.data, inits = jags.inits, parameters.to.save= jags.params,
model.file= "Thoropa.txt", n.thin= nt, n.chains= nc,
n.iter= ni, n.burnin = nb, parallel=myparallel) #, working.directory= getwd()
# extract results (medians of the marginal posteriors)
psihat[ii] <- model$summary["psi","50%"]
p11hat[ii] <- model$summary["p11","50%"]
p10hat[ii] <- model$summary["p10","50%"]
modelSummaries[[ii]]<-model$summary
}
if (doprint){
printsummres(psihat,thename="estimated psi")
printsummres(p11hat,thename="estimated p11")
printsummres(p10hat,thename="estimated p10")
}
return(list(psihat=psihat,p11hat=p11hat,p10hat=p10hat,modelSummaries=modelSummaries))
}
The file "Thoropa.txt" is a presence/absence matrix as follows:
PCR1 PCR2 PCR3 PCR4 PCR5 PCR6 PCR7 PCR8 PCR9 PCR10 PCR11 PCR12
1 0 0 0 0 0 0 0 0 0 0 0
0 0 0 1 0 0 0 1 0 0 0 0
0 0 0 0 0 0 1 0 0 0 0 0
1 1 1 1 1 1 1 1 1 1 1 1
0 0 0 0 0 0 0 0 0 0 0 0
1 0 1 0 1 1 1 1 1 1 1 1
0 0 1 0 0 0 1 0 0 0 0 0
1 0 1 0 0 0 0 0 1 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 1 1 0 0 0 1 0 0
1 1 0 1 0 1 0 1 0 0 1 0
1 1 0 0 0 0 0 1 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
1 1 1 1 0 1 1 1 1 0 1 1
1 1 1 1 1 1 1 1 1 1 1 1
0 0 1 0 1 0 0 0 0 0 1 1
1 1 1 1 1 1 1 1 1 1 1 1
Following the comment from Limey (thank you!) I changed the script to:
sink("Thoropa2.txt")
cat("model {
# Priors
psi ~ dunif(0,1)
p11 ~ dunif(0,1)
p10 ~ dunif(0,p10_max)
# Likelihood
for (i in 1:S){
z[i] ~ dbern(psi)
p[i] <- z[i]*p11 + (1-z[i])*p10
for (j in 1:K){
Y[i,j] ~ dbern(p[i])
}
}
} ",fill=TRUE)
sink()
y=Thoropa# the detection/non detection table
S=nrow(y)
K=ncol(y)
psi ~ dunif(0, 1)
p11 ~ dunif(0, 1)
p10 ~ dunif(0, p10_max)
p10_max=0.05
jags.data<-list(y=y, S=S, K=K, p10_max=p10_max)
jags.inits <-function()(list(psi=runif(0,1),p11=runif(0,1),p10=runif(0,p10_max)))
jags.params<-c("psi","p11","p10")
Thoropa_model<-jags.parallel(data = jags.data, inits = jags.inits, parameters.to.save= jags.params, model.file= "Thoropa2.txt", n.chains= 4, n.thin= 10, n.iter = 100000, n.burnin=50000, jags.seed = 333)
and the data file is as before.
Now I am getting the error message:
"Error in checkForRemoteErrors(val) :
4 nodes produced errors; first error: Indexing outside the bounds"
Could anyone help identify the error in my script? I'm no expert and I'm learning by myself, so sorry if it is a stupid question... (maybe there is something wrong with the format of the data...?)
Thank you all!

Your model is not working because some syntax errors in your R script. Note that the R syntax is different of the jags syntax, even if you are running the jags inside de R.
These are the errors:
The symbol "~" is not used for sampling in R. Delete the lines:
psi ~ dunif(0, 1)
p11 ~ dunif(0, 1)
p10 ~ dunif(0, p10_max)
The Y variable in the jags model is capitalized, so you must correct the syntax in jags.data.
jags.data<-list(Y=y, S=S, K=K, p10_max=p10_max)
In jags.inits, a) the body of the function must be inside of curly braces, and not parentheses, and b) the function runif takes 3 arguments: n (number of values you want to sample), min and max. The correct syntax is the following:
jags.inits <-function(){list(psi=runif(1,0,1),p11=runif(1,0,1),p10=runif(1,0,p10_max))}
Fixing those errors, your model should run without errors. After run the model, you can extract the median of the parameters "psi" using one of these two options:
Thoropa_model$BUGSoutput$median$psi
Thoropa_model$BUGSoutput$summary["psi","50%"]

Related

Predictions using neuralnet in R

I am using 'neuralnet' package in R to train a model for 'wine' dataset.
Below is the code that I have come up with so far-
library(neuralnet)
library(rattle)
library(rattle.data)
# load 'wine' dataset-
data(wine)
D <- as.data.frame(wine, stringsAsFactors=FALSE)
# replace 'Type' response variable (with values- 1, 2, 3) by 3 dummy variables-
D$wine1 <- 0
D$wine1[D$Type == 1] <- 1
D$wine2 <- 0
D$wine2[D$Type == 2] <- 1
D$wine3 <- 0
D$wine3[D$Type == 3] <- 1
# create formula to be used-
wine_formula <- as.formula(wine1 + wine2 + wine3 ~ Alcohol + Malic + Ash + Alcalinity + Magnesium + Phenols + Flavanoids + Nonflavanoids + Proanthocyanins + Color + Hue + Dilution + Proline)
# split dataset into training and testing datasets-
train_indices <- sample(1:nrow(wine), floor(0.7 * nrow(wine)), replace = F)
training <- D[train_indices, ]
testing <- D[-train_indices, ]
# train neural network model-
wine_nn <- neuralnet(wine_formula, data = training, hidden = c(5, 3), linear.output = FALSE, stepmax = 1e+07)
# make predictions using 'compute()'-
preds <- compute(wine_nn, testing[, 2:14])
# create a final data frame 'results' containing predicted & actual values-
results <- as.data.frame(preds$net.result)
results <- cbind(results, testing$wine1, testing$wine2, testing$wine3)
# rename the data frame-
names(results) <- c("Pred_Wine1", "Pred_Wine2", "Pred_Wine3", "Actual_Wine1", "Actual_Wine2", "Actual_Wine3")
The task that I have now is to convert the values in attributes "Pred_Wine1", "Pred_Wine2" and "Pred_Wine3" to 1s and 0s so that I can create a confusion matrix and test for model accuracy.
How should I go about it because "Pred_Wine1", "Pred_Wine2", "Pred_Wine3" contain calculated values which are in between 0 and 1.
Any suggestions?
Thanks!
I think you need label encoding here.
Let's say your data frame is called df. This will convert the values in your features into numeric. So, if Pred_Wine1 contains a,b it will convert it to 0,1 or vice-versa.
Try this:
features <- c("Pred_Wine1", "Pred_Wine2","Pred_Wine3")
for(f in features)
{
levels <- unique(df[[f]])
df[[i]] <- as.integer(factor(df[[i]], levels=levels))
}
Something like:
> head(results)
Pred_Wine1
1 1.00000000000000000
14 1.00000000000000000
17 1.00000000000000000
21 0.00000001901851182
26 0.21287781596598065
27 1.00000000000000000
Pred_Wine2
1 0.00000000000000000000000000000000000000000000000000015327712484
14 0.00000000000000000000000000000000000000000000000000009310376079
17 0.00000000000000000000000000000000000000000000000000009435487922
21 0.99999999363562386278658777882810682058334350585937500000000000
26 0.78964805454441211463034733242238871753215789794921875000000000
27 0.00000000000000000000000000000000000000000000000000009310386461
Pred_Wine3 Actual_Wine1 Actual_Wine2 Actual_Wine3
1 5.291055036e-10 1 0 0
14 1.336129635e-09 1 0 0
17 1.303396935e-09 1 0 0
21 8.968513318e-122 1 0 0
26 1.623066411e-111 1 0 0
27 1.336126866e-09 1 0 0
> class <- apply(results[1:3], 1, which.max)
> results[1:3] <- 0
> head(results)
Pred_Wine1 Pred_Wine2 Pred_Wine3 Actual_Wine1 Actual_Wine2 Actual_Wine3
1 0 0 0 1 0 0
14 0 0 0 1 0 0
17 0 0 0 1 0 0
21 0 0 0 1 0 0
26 0 0 0 1 0 0
27 0 0 0 1 0 0
> for (r in names(class)) {results[r,class[r]] <- 1}
> head(results)
Pred_Wine1 Pred_Wine2 Pred_Wine3 Actual_Wine1 Actual_Wine2 Actual_Wine3
1 1 0 0 1 0 0
14 1 0 0 1 0 0
17 1 0 0 1 0 0
21 0 1 0 1 0 0
26 0 1 0 1 0 0
27 1 0 0 1 0 0

How to use predict.stepplr() and confusionmatrix() correctly for step.plr method?

#Here is my code:
library(MASS, caret, stepPlr, janitor)
#stepPlr: L2 penalized logistic regression with a stepwise variable selection
#MASS: Support Functions and Datasets for Venables and Ripley's MASS
#caret: Classification and Regression Training
#janitor: Simple Tools for Examining and Cleaning Dirty Data
#Howells is a main dataframe, we will segregate it.
HNORSE <- Howells[which(Pop=='NORSE'),]
#Let's remove NA cols
#We will use janitor package here to remove NA cols
HNORSE <- remove_empty_cols(HNORSE)
#Assigning 0's and 1's to females and males resp.
HNORSE$PopSex[HNORSE$PopSex=="NORSEF"] <- '0'
HNORSE$PopSex[HNORSE$PopSex=="NORSEM"] <- '1'
HNORSE$PopSex <- as.numeric(HNORSE$PopSex)
HNORSE$PopSex
#Resultant column looks like this
[1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1
[41] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0
[81] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
I want to use Stepplr from caret package
a <- step.plr(HNORSE[,c(6:76)], HNORSE$PopSex, lambda = 1e-4, cp="bic", max.terms = 1, trace = TRUE, type = "forward")
#Where HNORSE[,c(6:76)] --> features
#HNORSE$PopSex ---> Binary response
#lambda ----> Default value
#max.terms ---> I tried more than 1 value for max.terms, but then R goes into infinite loop of 'Convergence Error'.
#That's why using max.terms=1
Then I ran summary command on "a"
summary(a)
Call: plr(x = ix0, y = y, weights = weights, offset.subset = offset.subset,
offset.coefficients = offset.coefficients, lambda = lambda,
cp = cp)
Coefficients:Estimate Std.Error z value Pr(>|z|)
Intercept -71.93470 13.3521 -5.388 0
ZYB 0.55594 0.1033 5.382 0
Null deviance: 152.49 on 109 degrees of freedom
Residual deviance: 57.29 on 108 degrees of freedom
Score: deviance + 4.7 * df = 66.69
I used step.plr so, I should then use predict.stepplr right? and not predict.plr?
By this logic I wish to use predict.stepplr. The default function argument example goes like this:
n <- 100
p <- 5
x0 <- matrix(sample(seq(3),n*p,replace=TRUE),nrow=n)
x0 <- cbind(rnorm(n),x0)
y <- sample(c(0,1),n,replace=TRUE)
level <- vector("list",length=6)
for (i in 2:6) level[[i]] <- seq(3)
fit <- step.plr(x0,y,level=level)
x1 <- matrix(sample(seq(3),n*p,replace=TRUE),nrow=n)
x1 <- cbind(rnorm(n),x1)
pred1 <- predict(fit,x0,x1,type="link")
pred2 <- predict(fit,x0,x1,type="response")
pred3 <- predict(fit,x0,x1,type="class")
object: stepplr object
x: matrix of features used for fitting object.
If newx is provided, x must be provided as well.
newx: matrix of features at which the predictions are made.
If newx=NULL, predictions for the training data are returned.
type: If type=link, the linear predictors are returned;
if type=response, the probability estimates are returned; and
if type=class, the class labels are returned. Default is type=link.
...
other options for prediction..
So First of all, I did not do any sampling like shown in here.
I want to predict HNORSE$PopSex which is binary variable.
My dataset which does not include the binary variable column is HNORSE[,c(6:76)].
I want to know what x0 and x1 function arguments should I put in
predict.stepplr()?
If not, HOW do I correctly implement
predict.stepplr?
I want to use overall accuracy to plot(Density(overall_accuracy))

For loop storage of output data

I am trying to store the output data from the forloop in the n.I matrix at the end of the code, but I am certain that something is wrong with my output matrix. It is giving me all the same values, either 0 or 1. I know that print(SS) is outputting the correct values and can see that the forloop is working properly.
Does anyone have any advice on how to fix the matrix, or any way that I am able to store the data from the forloop? Thanks in advance!
c=0.2
As=1
d=1
d0=0.5
s=0.5
e=0.1
ERs=e/As
C2 = c*As*exp(-d*s/d0)
#Island States (Initial Probability)
SS=0
for(i in 1:5) {
if (SS > 0) {
if (runif(1, min = 0, max = 1) < ERs){
SS = 0
}
}
else {
if (runif(1, min = 0, max = 1) < C2) {
SS = 1
}
}
print(SS)
}
n.I=matrix(c(SS), nrow=i, ncol=1, byrow=TRUE)
The efficient solution here is not to use a loop. It's unnecessary since the whole task can be easily vectorized.
Z =runif(100,0,1)
as.integer(x <= Z)
#[1] 0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 1 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
#[70] 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
you can save them in a list. Not very efficient but gets the job done.
list[[1]] indicates the first element saved in a list if you want to retrieve it.
list_pos <- list() # create the list out of the for loop
for(i in 1:100) {
c=0.10 #colonization rate
A=10 #Area of all islands(km^2)
d=250 #Distance from host to target (A-T)
s=0.1 #magnitude of distance
d0=100 #Specific "half distance" for dispersal(km)
C1 = c*A*exp(-d/d0) #Mainland to Target colonization
Z =runif(1,0,1)
x <- C1*A
if(x <= Z) {
list_pos[[i]] <- print("1") # Here you can store the 1 results.print is actually not necessary.
}
if(x >= Z){
list_pos[[i]] <- print("0") # Here you can store the 0 results.print is actually not necessary.
}
}

R formula() dummy coding model matrix factor interactions

I'm attempting to use formula to generate a model.matrix object to be used in a custom optimizer function.
It works great for the most part, but when it comes to factor-factor interactions, I'd like to specify the interaction as dummy coded rather than effects coded.
Take for example the following data set:
set.seed(1987)
myDF <- data.frame(Y = rnorm(100),
X1 = factor(LETTERS[sample(1:3, 100, replace = TRUE)]),
X2 = factor(LETTERS[sample(1:3, 100, replace = TRUE)]))
head(myDF)
Both the : and / operators create an effects coded model matrix (the latter being an additive effects structure, I think).
head(model.matrix(formula(Y ~ X1 : X2), data = myDF))
head(model.matrix(formula(Y ~ X1 / X2), data = myDF))
But I am looking to generate a dummy coded model matrix, which would have the first level of X1 omitted for each level of X2. Resulting in these terms (columns):
X1B:X2A
X1C:X2A
X1B:X2B
X1C:X2B
X1B:X2C
X1C:X2C
Is there a way to achieve this?
Is ~X1:X2-1 what you're looking for?
Make test data (as above):
set.seed(1987)
myDF <- data.frame(Y = rnorm(100),
X1 = factor(LETTERS[sample(1:3, 100, replace = TRUE)]),
X2 = factor(LETTERS[sample(1:3, 100, replace = TRUE)]))
Generate model matrix:
mm1 <- model.matrix(formula(Y ~ X1 : X2 - 1), data = myDF)
head(mm1)
## X1A:X2A X1B:X2A X1C:X2A X1A:X2B X1B:X2B X1C:X2B X1A:X2C X1B:X2C X1C:X2C
## 1 0 0 0 0 1 0 0 0 0
## 2 1 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 1 0
## 4 0 0 0 0 0 1 0 0 0
## 5 0 0 0 1 0 0 0 0 0
## 6 0 0 0 0 0 0 1 0 0
Or perhaps you really do just want some columns excluded:
mm0 <- model.matrix(formula(Y ~ X1 : X2), data = myDF)
mm0B <- mm0[,!grepl("(Intercept|^X1A:)",colnames(mm0))]
## X1B:X2A X1C:X2A X1B:X2B X1C:X2B X1B:X2C X1C:X2C
## 1 0 0 1 0 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 0 1 0
## 4 0 0 0 1 0 0
## 5 0 0 0 0 0 0
## 6 0 0 0 0 0 0
I thought you also might be interested in sum-to-zero contrasts:
mm2 <- model.matrix(formula(Y ~ X1 : X2 - 1), data = myDF,
contrasts.arg=list(X1=contr.sum,X2=contr.sum))
Below is another trial.
set.seed(1987)
myDF <- data.frame(Y = rnorm(100),
X1 = factor(LETTERS[sample(1:3, 100, replace = TRUE)]),
X2 = factor(LETTERS[sample(1:3, 100, replace = TRUE)]))
# row subsetting to exclude A
modelMat <- model.matrix(formula(Y ~ X1 : X2), data = myDF[myDF$X1 != 'A',])
# column subsetting to eliminate all columns including X1A
modelMat <- modelMat[,substring(colnames(modelMat), 1, 3) != "X1A"]
head(modelMat)
(Intercept) X1B:X2A X1C:X2A X1B:X2B X1C:X2B X1B:X2C X1C:X2C
1 1 0 0 1 0 0 0
3 1 0 0 0 0 1 0
4 1 0 0 0 1 0 0
8 1 0 0 0 0 1 0
10 1 0 0 0 0 0 1
11 1 0 0 0 0 0 1

R: combine rows of a matrix by group

I am attempting to reformat the data set my.data to obtain the output shown below the my.data2 statement. Specifically, I want to put the last 4 columns of my.data on one line per record.id, where the last four
columns of my.data will occupy columns 2-5 of the new data matrix if group=1 and columns 6-9 if group=2.
I wrote the cumbersome code below, but the double for-loop is causing an error that I simply cannot locate.
Even if the double for-loop worked, I suspect there is a much more efficient way of accomplishing the
same thing - (maybe reshape?)
Thank you for any help correcting the double for-loop or with more efficient code.
my.data <- "record.id group s1 s2 s3 s4
1 1 2 0 1 3
1 2 0 0 0 12
2 1 0 0 0 0
3 1 10 0 0 0
4 1 1 0 0 0
4 2 0 0 0 0
8 2 0 2 2 0
9 1 0 0 0 0
9 2 0 0 0 0"
my.data2 <- read.table(textConnection(my.data), header=T)
# desired output
#
# 1 2 0 1 3 0 0 0 12
# 2 0 0 0 0 0 0 0 0
# 3 10 0 0 0 0 0 0 0
# 4 1 0 0 0 0 0 0 0
# 8 0 0 0 0 0 2 2 0
# 9 0 0 0 0 0 0 0 0
Code:
dat_sorted <- sort(unique(my.data2[,1]))
my.seq <- match(my.data2[,1],dat_sorted)
my.data3 <- cbind(my.seq, my.data2)
group.min <- tapply(my.data3$group, my.data3$my.seq, min)
group.max <- tapply(my.data3$group, my.data3$my.seq, max)
# my.min <- group.min[my.data3[,1]]
# my.max <- group.max[my.data3[,1]]
my.records <- matrix(0, nrow=length(unique(my.data3$record.id)), ncol=9)
x <- 1
for(i in 1:max(my.data3$my.seq)) {
for(j in group.min[i]:group.max[i]) {
if(my.data3[x,1] == i) my.records[i,1] = i
# the two lines below seem to be causing an error
if((my.data3[x,1] == i) & (my.data3[x,3] == 1)) (my.records[i,2:5] = my.data3[x,4:7])
if((my.data3[x,1] == i) & (my.data3[x,3] == 2)) (my.records[i,6:9] = my.data3[x,4:7])
x <- x + 1
}
}
You are right, reshape helps here.
library(reshape2)
m <- melt(my.data2, id.var = c("record.id", "group"))
dcast(m, record.id ~ group + variable, fill = 0)
record.id 1_s1 1_s2 1_s3 1_s4 2_s1 2_s2 2_s3 2_s4
1 1 2 0 1 3 0 0 0 12
2 2 0 0 0 0 0 0 0 0
3 3 10 0 0 0 0 0 0 0
4 4 1 0 0 0 0 0 0 0
5 8 0 0 0 0 0 2 2 0
6 9 0 0 0 0 0 0 0 0
Comparison:
dfTest <- data.frame(record.id = rep(1:10e5, each = 2), group = 1:2,
s1 = sample(1:10, 10e5 * 2, replace = TRUE),
s2 = sample(1:10, 10e5 * 2, replace = TRUE),
s3 = sample(1:10, 10e5 * 2, replace = TRUE),
s4 = sample(1:10, 10e5 * 2, replace = TRUE))
system.time({
...# Your code
})
Error in my.records[i, 1] = i : incorrect number of subscripts on matrix
Timing stopped at: 41.61 0.36 42.56
system.time({m <- melt(dfTest, id.var = c("record.id", "group"))
dcast(m, record.id ~ group + variable, fill = 0)})
user system elapsed
25.04 2.78 28.72
Julius' answer is better, but for completeness, I think I managed to get the following for-loop to work:
dat_x <- (unique(my.data2[,1]))
my.seq <- match(my.data2[,1],dat_x)
my.data3 <- as.data.frame(cbind(my.seq, my.data2))
my.records <- matrix(0, nrow=length(unique(my.data3$record.id)), ncol=9)
my.records <- as.data.frame(my.records)
my.records[,1] = unique(my.data3[,2])
for(i in 1:9) {
if(my.data3[i,3] == 1) (my.records[my.data3[i,1],c(2:5)] = my.data3[i,c(4:7)])
if(my.data3[i,3] == 2) (my.records[my.data3[i,1],c(6:9)] = my.data3[i,c(4:7)])
}

Resources