My data file looks something like this:
list(y=structure(.Data=c(26, 228, 31, ...)), .Dim=c(413,9))
Let's say this file is saved as "data.txt".
If I'm working in 'R2OpenBUGS', it allows me to pass the data as a file with no problem:
mcmc <- bugs(data = "data.txt", inits=...)
But in JAGS, if I pass data as "data.txt", it says: "data must be a list or environment". What's the problem here? Also, if there is no way around it, is there a way I can read the data as list in R?
My model is:
model {
for (i in 1:413) {
for (j in 1:9) {
logy[i,j] <- log(y[i,j])
logy[i,j] ~ dnorm(m[i], s)
}
}
# priors
for (i in 1:413) {
m[i] ~ dgamma(0.001, 0.001)
}
s ~ dgamma(0.001, 0.001)
}
From the JAGS user manual
7.0.4 Data transformations
JAGS allows data transformations, but the syntax is different from BUGS.
BUGS allows you to put a stochastic node twice on the left hand side of a relation, as in this example taken from the manual
for (i in 1:N) {
z[i] <- sqrt(y[i])
z[i] ~ dnorm(mu, tau)
}
This is forbidden in JAGS. You must put data transformations in a separate block of relations preceded by the keyword data:
data {
for (i in 1:N) {
z[i] <- sqrt(y[i])
}
}
model {
for (i in 1:N) {
z[i] ~ dnorm(mu, tau)
}
...
}
Related
I create this function to summarize the results of a glm:
outcome_forest<-function(mod,var,sd){
x<-summary(mod)
y<-x$coefficients
x_df<-as.data.frame(y)
x_df$Estimate<-x_df[var,1]/sd
x_df$ci_min<-x_df[var,1]-x_df[var,2]/sd
x_df$ci_max<-x_df[var,1]+x_df[var,2]/sd
return(x_df[var,c(1,5,6,4)])
}
Now I have different glm models:
mod_1<-glm(y_1~x_1+c_1+c_2,data=data_1, family = binomial)
mod_2<-glm(y_1~x_2+c_1+c_2,data=data_1, family = binomial)
I want to create a loop in order to pass my function to these two models:
thelist<-c("mod_1","mod_2")
sd<-c(0.58,0.98)
results<-list()
for(i in thelist){
for(j in sd){
results[[i]]<-outcome_forest(i,2,j)
}
}
I obtained the followin error
Error: $ operator is invalid for atomic vectors
I’m guessing this is happening because of the quota marks in model_1 and model_2. But these quotes are needed in order to create a thelist vector which just those names and no the results of both regression models.
How can I fix this issue?
You are correct, outcome_forest takes mod as an object, and you have it as a string c("mod_1","mod_2"). To make R evaluate that string as an object you need to use eval(parse(text=...)):
thelist<-c("mod_1","mod_2")
sd<-c(0.58,0.98)
results<-list()
for(i in thelist){
for(j in sd){
results[[i]]<-outcome_forest(eval(parse(text=i)),2,j)
}
}
But this approach isn't too god, its better to place the "mod"'s in a list and looping trough that:
l = list(mod_1, mod_2)
thelist<-c("mod_1","mod_2")
sd<-c(0.58,0.98)
results<-list()
for(i in 1:2){
for(j in sd){
results[[thelist[i]]]<-outcome_forest(l[[i]],2,j)
}
}
Or avoid creating thelist by naming results later:
sd<-c(0.58,0.98)
results<-list()
for(i in 1:2){
for(j in sd){
results[[i]]<-outcome_forest(l[[i]],2,j)
}
}
names(results) = c("mod_1","mod_2")
I am trying to fit an ARIMA model to my time series.
I am trying to get the best model for my time series as follows,
best.aic<-Inf
for(p in 0:6){
for(d in 0:6){
for(q in 0:6){
fit<-arima(nasdaq_ts,order=c(p,d,q))
fit.aic<- fit$aic
if (fit.aic < best.aic) {
best.aic<-fit.aic
best.fit<-fit
best.order<-c(p,d,q)
}
}
}
}
But I am getting an error which says as follows
Error in optim(init[mask], armafn, method = optim.method, hessian = TRUE, :
initial value in 'vmmin' is not finite
I cannot understand the above error or what is causing it?
Can someone please help me here
My time series looks as follows,
Are you familiar with the auto.arima function? It contains parameters that allow you to specify the model space to search, as well as the type of search to perform.
Try this code:-
nasdaqfinal.aic <- Inf
nasdaqfinal.order <- c(0,0,0)
for (p in 0:6) for (d in 0:6) for (q in 0:6) {
nasdaqcurrent.aic <- AIC(arima(data, order=c(p, d, q)))
if (nasdaqcurrent.aic < nasdaqfinal.aic) {
nasdaqfinal.aic <- nasdaqcurrent.aic
nasdaqfinal.order <- c(p, d, q)
nasdaqfinal.arima <- arima(data, order=nasdaqfinal.order)
}
}
So I am brand new to R. I started learning it yesterday, because there's some data that is being very resistant to automatically importing into Mathematica and Python. I'm building a few machine learning techniques to do analysis on the data that I can now import with R. This is a genetic programming implementation that when finished should do symbolic regression on some data. (I have yet to create the mutation or crossover operators, build a legit function list, etc). I get two errors when I run the script:
> Error: attempt to apply non-function
> print(bestDude)
> Error in print(bestDude) : object 'bestDude' not found
This is my code:
library("datasets")
#Allows me to map a name to each element in a numerical list.
makeStrName<-function(listOfItems)
{
for(i in 1:length(listOfItems))
{
names(listOfItems)[i]=paste("x",i,sep="")
}
return(listOfItems)
}
#Allows me to replace each random number in a vector with the corresponding
#function in a list of functions.
mapFuncList<-function(funcList,rndNumVector)
{
for(i in 1:length(funcList))
{
replace(rndNumVector, rndNumVector==i,funcList[[i]])
}
return(rndNumVector)
}
#Will generate a random function from the list of functions and a random sample.
generateOrganism<-function(inputLen,inputSeed, functions)
{
set.seed(inputSeed)
rnd<-sample(1:length(functions),inputLen,replace=T)
Org<-mapFuncList(functions,rnd)
return(Org)
}
#Will generate a series of "Organisms"
genPopulation<-function(popSize,initialSeed,initialSize,functions)
{
population<-list("null")
for(i in 2:popSize)
{
population <- c(population,generateOrganism(initialSize,initialSeed, functions))
initialSeed <- initialSeed+1
}
populationWithNames<-makeStrName(population)
return(populationWithNames)
}
#Turns the population of functions (which are actually strings in "") into
#actual functions. (i.e. changes the mode of the list from string to function).
populationFuncList<-function(Population)
{
Population[[1]]<-"x"
funCreator<-function(snippet)
txt=snippet
function(x)
{
exprs <- parse(text = txt)
eval(exprs)
}
listOfFunctions <- lapply(setNames(Population,names(Population)),function(x){funCreator(x)})
return(listOfFunctions)
}
#Applies a fitness function to the population. Puts the best organism in
#the hallOfFame.
evalPopulation<-function(populationFuncList, inputData,outputData)
{
#rmse <- sqrt( mean( (sim - obs)^2))
hallOfFame<-list(1000000000)
for(i in 1:length(populationFuncList))
{
total<-list()
for(z in 1:length(inputData))
{
total<-c(total,(abs(populationFuncList[[i]](inputData[[z]])-outputData[[z]])))
}
rmse<-sqrt(mean(total*total))
if(rmse<hallOfFame[[1]]) {hallOfFame[[1]]<-rmse}
}
return(hallOfFame)
}
#Function list, input data, output data (data to fit to)
funcs<-list("x","log(x)","sin(x)","cos(x)","tan(x)")
desiredFuncOutput<-list(1,2,3,4,5)
dataForInput<-list(1,2,3,4,5)
#Function calls
POpulation<-genPopulation(4,1,1,funcs)
POpulationFuncList<-populationFuncList(POpulation)
bestDude<-evalPopulation(POpulationFuncList,dataForInput,desiredFuncOutput)
print(bestDude)
The code is now working thanks to Hack-R's suggestions. So here's the finalized code in case someone else runs into a similar trouble.
library("datasets")
#Allows me to map a name to each element in a numerical list.
makeStrName<-function(listOfItems)
{
for(i in 1:length(listOfItems))
{
names(listOfItems)[i]=paste("x",i,sep="")
}
return(listOfItems)
}
#Allows me to replace each random number in a vector with the corresponding
#function in a list of functions.
mapFuncList<-function(funcList,rndNumVector)
{
for(i in 1:length(funcList))
{
rndNumVector[rndNumVector==i]<-funcList[i]
}
return(rndNumVector)
}
#Will generate a random function from the list of functions and a random sample.
generateOrganism<-function(inputLen,inputSeed, functions)
{
set.seed(inputSeed)
rnd<-sample(1:length(functions),inputLen,replace=T)
Org<-mapFuncList(functions,rnd)
return(Org)
}
#Will generate a series of "Organisms"
genPopulation<-function(popSize,initialSeed,initialSize,functions)
{
population<-list()
for(i in 1:popSize)
{
population <- c(population,generateOrganism(initialSize,initialSeed,functions))
initialSeed <- initialSeed+1
}
populationWithNames<-makeStrName(population)
return(populationWithNames)
}
#Turns the population of functions (which are actually strings in "") into
#actual functions. (i.e. changes the mode of the list from string to function).
funCreator<-function(snippet)
{
txt=snippet
function(x)
{
exprs <- parse(text = txt)
eval(exprs)
}
}
#Applies a fitness function to the population. Puts the best organism in
#the hallOfFame.
evalPopulation<-function(populationFuncList, inputData,outputData)
{
#rmse <- sqrt( mean( (sim - obs)^2))
hallOfFame<-list(1000000000)
for(i in 1:length(populationFuncList))
{
total<-vector(mode="numeric",length=length(inputData))
for(z in 1:length(inputData))
{
total<-c(total,(abs(populationFuncList[[i]](inputData[[z]])-outputData[[z]])))
}
rmse<-sqrt(mean(total*total))
if(rmse<hallOfFame[[1]]) {hallOfFame[[1]]<-rmse}
}
return(hallOfFame)
}
#Function list, input data, output data (data to fit to)
funcs<-list("x","log(x)","sin(x)","cos(x)","tan(x)")
desiredFuncOutput<-list(1,2,3,4,5)
dataForInput<-list(1,2,3,4,5)
#Function calls
POpulation<-genPopulation(4,1,1,funcs)
POpulationFuncList <- lapply(setNames(POpulation,names(POpulation)),function(x){funCreator(x)})
bestDude<-evalPopulation(POpulationFuncList,dataForInput,desiredFuncOutput)
print(bestDude)
In your function evalPopulation you're attempting to apply populationFuncList[[i]] as if it were a function, but when you pass in the argument POpulationFuncList to replace the variable populationFuncList it's not a function, it's a list.
I'm not sure what you were trying to do, so I'm not sure which way you want to fix this. If you meant to use a function you should change the name of the object you're referencing to the function and remove it as an argument or at least pass a function in as an argument instead of the list.
OTOH if you meant to use the list POpulationFuncList then you just shouldn't be applying it as if it were a function instead of a list.
On a side note, this probably would be more apparent if you didn't give them such similar names.
Another potential problem is that you seem have non-numeric results in one of your lists:
> populationFuncList(POpulation)
$x1
[1] "x"
$x2
[1] 2
$x3
[1] 1
$x4
[1] 1
You can't take the absolute value of the character "x", so I just wanted to make sure you're aware of this.
A third problem is that you're doing math on a non-numeric data typed object called total. You need to either change the type to numeric or index it appropriately.
Now it's impossible for me to know exactly which of an infinite number of possibilities you should choose to fix this, because I don't know the details of your use case. However, here is one possible solution which you should be able to adapt to the specifics of the use case:
library("datasets")
#Allows me to map a name to each element in a numerical list.
makeStrName<-function(listOfItems)
{
for(i in 1:length(listOfItems))
{
names(listOfItems)[i]=paste("x",i,sep="")
}
return(listOfItems)
}
#Allows me to replace each random number in a vector with the corresponding
#function in a list of functions.
mapFuncList<-function(funcList,rndNumVector)
{
for(i in 1:length(funcList))
{
replace(rndNumVector, rndNumVector==i,funcList[[i]])
}
return(rndNumVector)
}
#Will generate a random function from the list of functions and a random sample.
generateOrganism<-function(inputLen,inputSeed, functions)
{
set.seed(inputSeed)
rnd<-sample(1:length(functions),inputLen,replace=T)
Org<-mapFuncList(functions,rnd)
return(Org)
}
#Will generate a series of "Organisms"
genPopulation<-function(popSize,initialSeed,initialSize,functions)
{
population<-list("null")
for(i in 2:popSize)
{
population <- c(population,generateOrganism(initialSize,initialSeed, functions))
initialSeed <- initialSeed+1
}
populationWithNames<-makeStrName(population)
return(populationWithNames)
}
#Turns the population of functions (which are actually strings in "") into
#actual functions. (i.e. changes the mode of the list from string to function).
populationFuncList<-function(Population)
{
Population[[1]]<-"x"
funCreator<-function(snippet)
txt=snippet
function(x)
{
exprs <- parse(text = txt)
eval(exprs)
}
listOfFunctions <- lapply(setNames(Population,names(Population)),function(x){funCreator(x)})
return(listOfFunctions)
}
#Applies a fitness function to the population. Puts the best organism in
#the hallOfFame.
evalPopulation<-function(myList=myList, dataForInput,desiredFuncOutput)
{
#rmse <- sqrt( mean( (sim - obs)^2))
hallOfFame<-list(1000000000)
for(i in 1:length(populationFuncList))
{
total<-0
for(z in 1:length(dataForInput))
{
total<-c(total,(abs(myList[[i]]+(dataForInput[[z]])-desiredFuncOutput[[z]])))
}
rmse<-sqrt(mean(total*total))
if(rmse<hallOfFame[[1]]) {hallOfFame[[1]]<-rmse}
}
return(hallOfFame)
}
#Function list, input data, output data (data to fit to)
funcs<-list("x","log(x)","sin(x)","cos(x)","tan(x)")
desiredFuncOutput<-list(1,2,3,4,5)
dataForInput<-list(1,2,3,4,5)
#Function calls
POpulation<-genPopulation(4,1,1,funcs)
myList <-populationFuncList(POpulation)[2:4]
bestDude<-evalPopulation(myList,dataForInput,desiredFuncOutput)
print(bestDude)
[[1]]
[1] 1.825742
Please bear in mind that this is only my second day writing R code instead of using it, and I'm taking on a project almost surely above my level. A lot of my code is probably inefficient.
I'm trying to write R code which will automate the majority of my multiple regression analysis, while still allowing manual fine tuning in terms of the # of predictors, data transformations and model assumptions. I always get the error:
Error: could not find function "Dat.assumptn"
General advice on getting this nested function design to work is appreciated. Also, could someone please post a few well written links on functions in R which cover a range of difficulty?
As for my other issues, such as implementing pass by reference behavior via a package like R.oo from CRAN or a source from R: Pass by reference, I think I can figure it out. Here is a part of my code (incomplete and needs rewriting):
Dat.assumptn <- function(f, final, caperDS, picanDS) {
print(f)
crunchMod <- crunch(f, data = contrasts)
print(caic.table(crunchMod))
print(caic.diagnostics(crunchMod))
print(summary(crunchMod))
#If independent contrasts assumptions fail, return me to the second for loop
#within Dat.analysis() [Not Yet Implemented]
#Implement code to reduce and check the model (whether final = true/false)
if (final == TRUE) {
retry <- Dat.Msucess(crunchMod)
#The above function will recommend additional transformations if the final
#reduced model significantly violated model assumptions.
}
}
Dat.analysis <- function() {
treList <- dir(pattern="*.tre") //All of my phylogenetic tree files
caperDS <- read.table("dataSet.txt", header = TRUE)
picanDS <- read.table("dataSet.txt", row.names = 1, header = TRUE)
#Dat.assumptn() requires a different format from Dat.analysis()
#The loop below changes the names from my data set to be proper variable names
for (i in 1:length(names(picanDS))) {
varName <- gsub("_|[0-9]|\\.", "", names(picanDS)[i])
names(caperDS)[i+1] <- varName
names(picanDS)[i] <- varName
caperDS[,paste(varName,"2",sep="")] <- caperDS[i+1]*caperDS[i+1]
}
#Implement a for loop to transform the data based upon specifications from both
#Dat.assumptn() [called from Dat.analysis] and Dat.Msuccess [called from Dat.assumptn].
#Likely using pass by reference.
for (i in 1:length(treList)) {
myTrees = read.nexus(treList[i])
for (j in 1:length(myTrees)) {
cat(paste("\n\n", treList[i]))
print(multiPhylosignal(picanDS, myTrees[[j]]))
contrasts <- comparative.data(myTrees[[j]], caperDS, Species)
if (names(caperDS)[3] == "MedF" || names(caperDS)[3] == "MaxF") {
final <- FALSE
f <- as.formula(paste(paste(names(caperDS)[2],"~"),
paste(paste(paste("(",paste(names(caperDS)[4:(ncol(picanDS)+1)], collapse="+"))),")^2"),
paste("+", paste(names(caperDS)[(ncol(picanDS)+4):ncol(caperDS)], collapse = "+"))))
while (final == FALSE) {
f <- Dat.assumptn(f, final, caperDS, picanDS)
#Pass final by reference, and set to true if the final reduced model
#is achieved. Otherwise, iterate to reduce the model.
}
final <- FALSE
f <- as.formula(paste(paste(names(caperDS)[3],"~"),
paste(paste(paste("(",paste(names(caperDS)[4:(ncol(picanDS)+1)], collapse="+"))),")^2"),
paste("+", paste(names(caperDS)[(ncol(picanDS)+4):ncol(caperDS)], collapse = "+"))))
while (final == FALSE) {
f <- Dat.assumptn(f, final, caperDS, picanDS)
#Pass final by reference, and set to true if the final reduced model
#is achieved. Otherwise, iterate to reduce the model.
}
} else {
final <- FALSE
f <- as.formula(paste(paste(names(caperDS)[2],"~"),
paste(paste(paste("(",paste(names(caperDS)[3:(ncol(picanDS)+1)], collapse="+"))),")^2"),
paste("+", paste(names(caperDS)[(ncol(picanDS)+3):ncol(caperDS)], collapse = "+"))))
while (final == FALSE) {
f <- Dat.assumptn(f, final, caperDS, picanDS)
#Pass final by reference, and set to true if the final reduced model
#is achieved. Otherwise, iterate to reduce the model.
}
}
}
}
}
I often run JAGS models on simulated data with known parameters. I like the default plot method for mcmc objects. However, I would like to add an abline(v=TRUE_VALUE) for each parameter that is modelled. This would give me a quick check for whether the posterior is reasonable.
Of course I could do this manually, or presumably reinvent the wheel and write my own function. But I was wondering if there is an elegant way that builds on the existing plot method.
Here's a worked example:
require(rjags)
require(coda)
# simulatee data
set.seed(4444)
N <- 100
Mu <- 100
Sigma <- 15
y <- rnorm(n=N, mean=Mu, sd=Sigma)
jagsdata <- list(y=y)
jags.script <- "
model {
for (i in 1:length(y)) {
y[i] ~ dnorm(mu, tau)
}
mu ~ dnorm(0, 0.001)
sigma ~ dunif(0, 1000)
tau <- 1/sigma^2
}"
mod1 <- jags.model(textConnection(jags.script), data=jagsdata, n.chains=4,
n.adapt=1000)
update(mod1, 200) # burn in
mod1.samples <- coda.samples(model=mod1,
variable.names=c('mu', 'sigma'),
n.iter=1000)
plot(mod1.samples)
I just want to run something like abline(v=100) for mu and abline(v=15) for sigma. Of course in many other examples, I would have 5, 10, 20 or more parameters of interest. Thus, I'm interested in being able to supply a vector of true values for named parameters.
I've had a look at getAnywhere(plot.mcmc). Would modifying that be a good way to go?
Okay. So I modified plot.mcmc to look like this:
my.plot.mcmc <- function (x, trace = TRUE, density = TRUE, smooth = FALSE, bwf,
auto.layout = TRUE, ask = FALSE, parameters, ...)
{
oldpar <- NULL
on.exit(par(oldpar))
if (auto.layout) {
mfrow <- coda:::set.mfrow(Nchains = nchain(x), Nparms = nvar(x),
nplots = trace + density)
oldpar <- par(mfrow = mfrow)
}
for (i in 1:nvar(x)) {
y <- mcmc(as.matrix(x)[, i, drop = FALSE], start(x),
end(x), thin(x))
if (trace)
traceplot(y, smooth = smooth, ...)
if (density) {
if (missing(bwf)) {
densplot(y, ...); abline(v=parameters[i])
} else densplot(y, bwf = bwf, ...)
}
if (i == 1)
oldpar <- c(oldpar, par(ask = ask))
}
}
Then running the command
my.plot.mcmc(mod1.samples, parameters=c(Mu, Sigma))
produces this
Note that parameters must be a vector of values in the same sort order as JAGS sorts variables, which seems to be alphabetically and then numerically for vectors.
Lessons learnt
Simply writing a new plot.mcmc didn't work by default presumably because of namespaces. So I just created a new function
I had to change set.mfrow to coda:::set.mfrow presumably also because of namespaces.
I changed ask to ask=FALSE, because RStudio permits browsing through figures.
I'd be happy to hear any suggestions about better ways of overriding or adapting existing S3 methods.