Related
I am trying to figure out how to sample from a custom density in rJAGS but am running into issues. having searched the site, I saw that there is a zeroes (or ones) trick that can be employed based on BUGS code but am having a hard time with its implementation in rJAGS. I think I am doing it correctly but keep getting the following error:
Error in jags.model(model1.spec, data = list(x = x, N = N), n.chains = 4, :
Error in node dpois(lambda)
Length mismatch in Node::setValue
Here is my rJAGS code for reproducibility:
library(rjags)
set.seed(4)
N = 100
x = rexp(N, 3)
L = quantile(x, prob = 1) # Censoring point
censor = ifelse(x <= L, 1, 0) # Censoring indicator
x[censor == 1] <- L
model1.string <-"
model {
for (i in 1:N){
x[i] ~ dpois(lambda)
lambda <- -N*log(1-exp(-(1/mu)))
}
mu ~ dlnorm(mup, taup)
mup <- log(.0001)
taup <- 1/49
R <- 1 - exp(-(1/mu) * .0001)
}
"
model1.spec<-textConnection(model1.string)
jags <- jags.model(model1.spec,
data = list('x' = x,
'N' = N),
n.chains=4,
n.adapt=100)
Here, my negative log likelihood of the density I am interested in is -N*log(1-exp(-(1/mu))). Is there an obvious mistake in the code?
Using the zeros trick, the variable on the left-hand side of the dpois() relationship has to be an N-length vector of zeros. The variable x should show up in the likelihood somewhere. Here is an example using the normal distribution.
set.seed(519)
N <- 100
x <- rnorm(100, mean=3)
z <- rep(0, N)
C <- 10
pi <- pi
model1.string <-"
model {
for (i in 1:N){
lambda[i] <- pow(2*pi*sig2, -0.5) * exp(-.5*pow(x[i]-mu, 2)/sig2)
loglam[i] <- log(lambda[i]) + C
z[i] ~ dpois(loglam[i])
}
mu ~ dnorm(0,.1)
tau ~ dgamma(1,.1)
sig2 <- pow(tau, -1)
sumLL <- sum(log(lambda[]))
}
"
model1.spec<-textConnection(model1.string)
set.seed(519)
jags <- jags.model(model1.spec,
data = list('x' = x,
'z' = z,
'N' = N,
'C' = C,
'pi' = pi),
inits = function()list(tau = 1, mu = 3),
n.chains=4,
n.adapt=100)
samps1 <- coda.samples(jags, c("mu", "sig2"), n.iter=1000)
summary(samps1)
Iterations = 101:1100
Thinning interval = 1
Number of chains = 4
Sample size per chain = 1000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
mu 4.493 2.1566 0.034100 0.1821
sig2 1.490 0.5635 0.008909 0.1144
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
mu 0.6709 3.541 5.218 5.993 7.197
sig2 0.7909 0.999 1.357 1.850 2.779
I am an R user and I am currently trying to use a Gradient Descent algorithm for which to compare against a multiple linear regression. I have seen some codes online but they do not work on all data sets. I use the
UCI bike sharing data set (hour) as an example
Data set can be found here:
https://archive.ics.uci.edu/ml/machine-learning-databases/00275/
Split the data into training/test sets and create matrices:
data1 <- data[, c("season", "mnth", "hr", "holiday", "weekday", "workingday", "weathersit", "temp", "atemp", "hum", "windspeed", "cnt")]
# Split the data
trainingObs<-sample(nrow(data1),0.70*nrow(data1),replace=FALSE)
# Create the training dataset
trainingDS<-data1[trainingObs,]
# Create the test dataset
testDS<-data1[-trainingObs,]
x0 <- rep(1, nrow(trainingDS)) # column of 1's
x1 <- trainingDS[, c("season", "mnth", "hr", "holiday", "weekday", "workingday", "weathersit", "temp", "atemp", "hum", "windspeed")]
# create the x- matrix of explanatory variables
x <- as.matrix(cbind(x0,x1))
# create the y-matrix of dependent variables
y <- as.matrix(trainingDS$cnt)
m <- nrow(y)
solve(t(x)%*%x)%*%t(x)%*%y
Followed by the gradient function
gradientDesc <- function(x, y, learn_rate, conv_threshold, max_iter) {
n <- nrow(x)
m <- runif(ncol(x), 0, 1) # m is a vector of dimension ncol(x), 1
yhat <- x %*% m # since x already contains a constant, no need to add another one
MSE <- sum((y - yhat) ^ 2) / n
converged = F
iterations = 0
while(converged == F) {
m <- m - learn_rate * ( 1/n * t(x) %*% (yhat - y))
yhat <- x %*% m
MSE_new <- sum((y - yhat) ^ 2) / n
if( abs(MSE - MSE_new) <= conv_threshold) {
converged = TRUE
}
iterations = iterations + 1
MSE <- MSE_new
if(iterations >= max_iter) break
}
return(list(converged = converged,
num_iterations = iterations,
MSE = MSE_new,
coefs = m) )
}
ols <- solve(t(x)%*%x)%*%t(x)%*%y
out <- gradientDesc(x,y, 0.005, 1e-7, 200000)
data.frame(ols, out$coefs)
It works fine and produces the following comparison between multiple regression and the gradient solution:
ols out.coefs
x0 30.8003341 33.4473667
season 19.7839676 19.8020073
mnth -0.1249776 -0.1290033
hr 7.4554424 7.4619508
holiday -15.6022846 -15.8630012
weekday 1.8238997 1.7930636
workingday 5.0487553 5.0088699
weathersit -2.2088254 -2.3389047
temp 85.6214524 141.1351024
atemp 235.5992391 173.1234342
hum -226.7253991 -226.1559532
windspeed 33.5144866 30.1245570
It also works for the iris data set following the exact same commands as before:
iris
head(iris)
data2 <-iris[,c("Sepal.Width", "Petal.Length","Petal.Width","Sepal.Length")]
# Split the data
trainingObs1<-sample(nrow(data2),0.70*nrow(data2),replace=FALSE)
# Create the training dataset
trainingDS1<-data2[trainingObs1,]
# Create the test dataset
testDS2<-data2[-trainingObs1,]
x0a <- rep(1, nrow(trainingDS1)) # column of 1's
x1a<-trainingDS1[, c("Sepal.Width", "Petal.Length","Petal.Width")]
z <- as.matrix(cbind(x0a,x1a))
y<-as.matrix(trainingDS1$Sepal.Length)
m<-nrow(y)
solve(t(z)%*%z)%*%t(z)%*%y
ols <- solve(t(z)%*%z)%*%t(z)%*%y
out <- gradientDesc(z,y, 0.005, 1e-7, 200000)
data.frame(ols, out$coefs)
Producing the following output:
ols out.coefs
x0a 1.7082712 1.3933410
Sepal.Width 0.6764848 0.7578847
Petal.Length 0.7225420 0.7571403
Petal.Width -0.5436298 -0.6001406
However when using it with the mtcars data set:
mtcars<-mtcars
head(mtcars)
data3<-mtcars[,c("hp","wt","gear","cyl","mpg")]
trainingObs2<-sample(nrow(data3),0.70*nrow(data3),replace=FALSE)
trainingDS2<-data3[trainingObs2,]
testDS3<-data3[-trainingObs2,]
x0b <- rep(1, nrow(trainingDS2)) # column of 1's
x1b<-trainingDS2[, c("hp", "wt","gear","cyl")]
w <- as.matrix(cbind(x0b,x1b))
y<-as.matrix(trainingDS2$mpg)
m<-nrow(y)
solve(t(w)%*%w)%*%t(w)%*%y
ols <- solve(t(w)%*%w)%*%t(w)%*%y
out <- gradientDesc(w,y, 0.005, 1e-7, 200000)
data.frame(ols, out$coefs)
It fails to produce a comparison, creating the following error:
> ols <- solve(t(w)%*%w)%*%t(w)%*%y
> out <- gradientDesc(w,y, 0.005, 1e-7, 200000)
Error in if (abs(MSE - MSE_new) <= conv_threshold) { :
missing value where TRUE/FALSE needed
> data.frame(ols, out$coefs)
Error in data.frame(ols, out$coefs) :
arguments imply differing number of rows: 5, 4
I'd appreciate any help and pointers. Thank you very much for your time.
I am trying to implement a linear regression in R from scratch without using any packages or libraries using the following data:
UCI Machine Learning Repository, Bike-Sharing-Dataset
The linear regression was easy enough, here is the code:
data <- read.csv("Bike-Sharing-Dataset/hour.csv")
# Select the useable features
data1 <- data[, c("season", "mnth", "hr", "holiday", "weekday", "workingday", "weathersit", "temp", "atemp", "hum", "windspeed", "cnt")]
# Split the data
trainingObs<-sample(nrow(data1),0.70*nrow(data1),replace=FALSE)
# Create the training dataset
trainingDS<-data1[trainingObs,]
# Create the test dataset
testDS<-data1[-trainingObs,]
x0 <- rep(1, nrow(trainingDS)) # column of 1's
x1 <- trainingDS[, c("season", "mnth", "hr", "holiday", "weekday", "workingday", "weathersit", "temp", "atemp", "hum", "windspeed")]
# create the x- matrix of explanatory variables
x <- as.matrix(cbind(x0,x1))
# create the y-matrix of dependent variables
y <- as.matrix(trainingDS$cnt)
m <- nrow(y)
solve(t(x)%*%x)%*%t(x)%*%y
The next step is to implement the batch update gradient descent and here is where I am running into problems. I dont know where the errors are coming from or how to fix them, but the code works. The problem is that the values being produced are radically different from the results of the regression and I am unsure of why.
The two versions of the batch update gradient descent that I have implemented are as follows (the results of both algorithms differ from one another and from the results of the regression):
# Gradient descent 1
gradientDesc <- function(x, y, learn_rate, conv_threshold, n, max_iter) {
plot(x, y, col = "blue", pch = 20)
m <- runif(1, 0, 1)
c <- runif(1, 0, 1)
yhat <- m * x + c
MSE <- sum((y - yhat) ^ 2) / n
converged = F
iterations = 0
while(converged == F) {
## Implement the gradient descent algorithm
m_new <- m - learn_rate * ((1 / n) * (sum((yhat - y) * x)))
c_new <- c - learn_rate * ((1 / n) * (sum(yhat - y)))
m <- m_new
c <- c_new
yhat <- m * x + c
MSE_new <- sum((y - yhat) ^ 2) / n
if(MSE - MSE_new <= conv_threshold) {
abline(c, m)
converged = T
return(paste("Optimal intercept:", c, "Optimal slope:", m))
}
iterations = iterations + 1
if(iterations > max_iter) {
abline(c, m)
converged = T
return(paste("Optimal intercept:", c, "Optimal slope:", m))
}
}
return(paste("MSE=", MSE))
}
AND:
grad <- function(x, y, theta) { # note that for readability, I redefined theta as a column vector
gradient <- 1/m* t(x) %*% (x %*% theta - y)
return(gradient)
}
grad.descent <- function(x, maxit, alpha){
theta <- matrix(rep(0, length=ncol(x)), ncol = 1)
for (i in 1:maxit) {
theta <- theta - alpha * grad(x, y, theta)
}
return(theta)
}
If someone could explain why these two functions are producing different results I would greatly appreciate it. I also want to make sure that I am in fact implementing the gradient descent correctly.
Lastly, how can I plot the results of the descent with varying learning rates and superimpose this data over the results of the regression itself?
EDIT
Here are the results of running the two algorithms with alpha = .005 and 10,000 iterations:
1)
> gradientDesc(trainingDS, y, 0.005, 0.001, 32, 10000)
TEXT_SHOW_BACKTRACE environmental variable.
[1] "Optimal intercept: 2183458.95872599 Optimal slope: 62417773.0184353"
2)
> print(grad.descent(x, 10000, .005))
[,1]
x0 8.3681113
season 19.8399837
mnth -0.3515479
hr 8.0269388
holiday -16.2429750
weekday 1.9615369
workingday 7.6063719
weathersit -12.0611266
temp 157.5315413
atemp 138.8019732
hum -162.7948299
windspeed 31.5442471
To give you an example of how to write functions like this in a slightly better way, consider the following:
gradientDesc <- function(x, y, learn_rate, conv_threshold, max_iter) {
n <- nrow(x)
m <- runif(ncol(x), 0, 1) # m is a vector of dimension ncol(x), 1
yhat <- x %*% m # since x already contains a constant, no need to add another one
MSE <- sum((y - yhat) ^ 2) / n
converged = F
iterations = 0
while(converged == F) {
m <- m - learn_rate * ( 1/n * t(x) %*% (yhat - y))
yhat <- x %*% m
MSE_new <- sum((y - yhat) ^ 2) / n
if( abs(MSE - MSE_new) <= conv_threshold) {
converged = T
}
iterations = iterations + 1
MSE <- MSE_new
if(iterations >= max_iter) break
}
return(list(converged = converged,
num_iterations = iterations,
MSE = MSE_new,
coefs = m) )
}
For comparison:
ols <- solve(t(x)%*%x)%*%t(x)%*%y
Now,
out <- gradientDesc(x,y, 0.005, 1e-7, 200000)
data.frame(ols, out$coefs)
ols out.coefs
x0 33.0663095 35.2995589
season 18.5603565 18.5779534
mnth -0.1441603 -0.1458521
hr 7.4374031 7.4420685
holiday -21.0608520 -21.3284449
weekday 1.5115838 1.4813259
workingday 5.9953383 5.9643950
weathersit -0.2990723 -0.4073493
temp 100.0719903 147.1157262
atemp 226.9828394 174.0260534
hum -225.7411524 -225.2686640
windspeed 12.3671942 9.5792498
Here, x refers to your x as defined in your first code chunk. Note the similarity between the coefficients. However, also note that
out$converged
[1] FALSE
so that you could increase the accuracy by increasing the number of iterations or by playing around with the step size. It might also help to scale your variables first.
I am trying to estimate the parameters of the following nonlinear model in implicit form:
its expression is not given by y = f(x;theta) + u
but by u = g(x,y;theta)
with u the random term, x and y are the explanatory and explained variable. It is not possible to obtain f from g (as g cannot be inverted in y and/or g is not additive in u).
Both NLS and GMM below do not run.
I could not find anything about this issue in the NLS and GMM documentation, so I wonder how to do it. I would appreciate any help on this topic.
More specifically, I considered the following specification for g (but NLS and GMM do not run):
#
# The DGP
#
set.seed(34567)
N <- 1000 # sample size
x <- rnorm(N) # explanatory variable
u <- rnorm(N) # u is the random term or the model
y <- (81 - 3*x*x)*(1+exp(u)) # y = h(x,u;theta) is the explained variable
summary(y)
#
# The NLS regression: we know that the true functional form is u = g(x,y;theta) and E[u]=0
# but we do not know that the DGP is obtained for theta_0=81 and theta_1=3
#
x2 <- x*x
NLS_reg <- function(theta_0, theta_1) {
log( y / (theta_0 - theta_1*x2) - 1 )
}
nls_out <- nls(0 ~ NLS_reg(theta_0, theta_1), start = list(theta_0 = 84.3, theta_1 = 3.25), trace = T)
summary(nls_out)
#
# The GMM regression
#
require("gmm")
iota <- rep(1,N)
data <- data.matrix(cbind(y, iota, x, x2))
GMM_reg <- function(data, theta_0, theta_1) {
y <- as.numeric(data[,1])
x2 <- as.numeric(data[,4])
return( log( y / (theta_0*iota-theta_1*x2) - 1 ) )
}
moments <- function(data, theta_0, theta_1) {
z <- data.matrix(data[, 2:4])
m <- z * as.vector(GMM_reg(data, theta_0, theta_1))
return(cbind(m))
}
GMM_out <- gmm(g=moments, x = data, t0 = c(78,2.6), type = "iterative", crit = 1e-3, wmatrix = "optimal", method = "Nelder-Mead", control = list(reltol = 1e-3, maxit = 100))
summary(GMM_out)
I'm working on a binomial mixture model using OpenBUGS and R package R2OpenBUGS. I've successfully built simpler models, but once I add another level for imperfect detection, I consistently receive the error variable X is not defined in model or in data set. I've tried a number of different things, including changing the structure of my data and entering my data directly into OpenBUGS. I'm posting this in the hope that someone else has experience with this error, and perhaps knows why OpenBUGS is not recognizing variable X even though it is clearly defined as far as I can tell.
I've also gotten the error expected the collection operator c error pos 8 - this is not an error I've been getting previously, but I am similarly stumped.
Both the model and the data-simulation function come from Kery's Introduction to WinBUGS for Ecologists (2010). I will note that the data set here is in lieu of my own data, which is similar.
I am including the function to build the dataset as well as the model. Apologies for the length.
# Simulate data: 200 sites, 3 sampling rounds, 3 factors of the level 'trt',
# and continuous covariate 'X'
data.fn <- function(nsite = 180, nrep = 3, xmin = -1, xmax = 1, alpha.vec = c(0.01,0.2,0.4,1.1,0.01,0.2), beta0 = 1, beta1 = -1, ntrt = 3){
y <- array(dim = c(nsite, nrep)) # Array for counts
X <- sort(runif(n = nsite, min = xmin, max = xmax)) # covariate values, sorted
# Relationship expected abundance - covariate
x2 <- rep(1:ntrt, rep(60, ntrt)) # Indicator for population
trt <- factor(x2, labels = c("CT", "CM", "CC"))
Xmat <- model.matrix(~ trt*X)
lin.pred <- Xmat[,] %*% alpha.vec # Value of lin.predictor
lam <- exp(lin.pred)
# Add Poisson noise: draw N from Poisson(lambda)
N <- rpois(n = nsite, lambda = lam)
table(N) # Distribution of abundances across sites
sum(N > 0) / nsite # Empirical occupancy
totalN <- sum(N) ; totalN
# Observation process
# Relationship detection prob - covariate
p <- plogis(beta0 + beta1 * X)
# Make a 'census' (i.e., go out and count things)
for (i in 1:nrep){
y[,i] <- rbinom(n = nsite, size = N, prob = p)
}
# Return stuff
return(list(nsite = nsite, nrep = nrep, ntrt = ntrt, X = X, alpha.vec = alpha.vec, beta0 = beta0, beta1 = beta1, lam = lam, N = N, totalN = totalN, p = p, y = y, trt = trt))
}
data <- data.fn()
And here is the model:
sink("nmix1.txt")
cat("
model {
# Priors
for (i in 1:3){ # 3 treatment levels (factor)
alpha0[i] ~ dnorm(0, 0.01)
alpha1[i] ~ dnorm(0, 0.01)
}
beta0 ~ dnorm(0, 0.01)
beta1 ~ dnorm(0, 0.01)
# Likelihood
for (i in 1:180) { # 180 sites
C[i] ~ dpois(lambda[i])
log(lambda[i]) <- log.lambda[i]
log.lambda[i] <- alpha0[trt[i]] + alpha1[trt[i]]*X[i]
for (j in 1:3){ # each site sampled 3 times
y[i,j] ~ dbin(p[i,j], C[i])
lp[i,j] <- beta0 + beta1*X[i]
p[i,j] <- exp(lp[i,j])/(1+exp(lp[i,j]))
}
}
# Derived quantities
}
",fill=TRUE)
sink()
# Bundle data
trt <- data$trt
y <- data$y
X <- data$X
ntrt <- 3
# Standardise covariates
s.X <- (X - mean(X))/sd(X)
win.data <- list(C = y, trt = as.numeric(trt), X = s.X)
# Inits function
inits <- function(){ list(alpha0 = rnorm(ntrt, 0, 2),
alpha1 = rnorm(ntrt, 0, 2),
beta0 = rnorm(1,0,2), beta1 = rnorm(1,0,2))}
# Parameters to estimate
parameters <- c("alpha0", "alpha1", "beta0", "beta1")
# MCMC settings
ni <- 1200
nb <- 200
nt <- 2
nc <- 3
# Start Markov chains
out <- bugs(data = win.data, inits, parameters, "nmix1.txt", n.thin=nt,
n.chains=nc, n.burnin=nb, n.iter=ni, debug = TRUE)
Note: This answer has gone through a major revision, after I noticed another problem with the code.
If I understand your model correctly, you are mixing up the y and N from the simulated data, and what is passed as C to Bugs. You are passing the y variable (a matrix) to the C variable in the Bugs model, but this is accessed as a vector. From what I can see C is representing the number of "trials" in your binomial draw (actual abundances), i.e. N in your data set. The variable y (a matrix) is called the same thing in both the simulated data and in the Bugs model.
This is a reformulation of your model, as I understand it, and this runs ok:
sink("nmix1.txt")
cat("
model {
# Priors
for (i in 1:3){ # 3 treatment levels (factor)
alpha0[i] ~ dnorm(0, 0.01)
alpha1[i] ~ dnorm(0, 0.01)
}
beta0 ~ dnorm(0, 0.01)
beta1 ~ dnorm(0, 0.01)
# Likelihood
for (i in 1:180) { # 180 sites
C[i] ~ dpois(lambda[i])
log(lambda[i]) <- log.lambda[i]
log.lambda[i] <- alpha0[trt[i]] + alpha1[trt[i]]*X[i]
for (j in 1:3){ # each site sampled 3 times
y[i,j] ~ dbin(p[i,j], C[i])
lp[i,j] <- beta0 + beta1*X[i]
p[i,j] <- exp(lp[i,j])/(1+exp(lp[i,j]))
}
}
# Derived quantities
}
",fill=TRUE)
sink()
# Bundle data
trt <- data$trt
y <- data$y
X <- data$X
N<- data$N
ntrt <- 3
# Standardise covariates
s.X <- (X - mean(X))/sd(X)
win.data <- list(y = y, trt = as.numeric(trt), X = s.X, C= N)
# Inits function
inits <- function(){ list(alpha0 = rnorm(ntrt, 0, 2),
alpha1 = rnorm(ntrt, 0, 2),
beta0 = rnorm(1,0,2), beta1 = rnorm(1,0,2))}
# Parameters to estimate
parameters <- c("alpha0", "alpha1", "beta0", "beta1")
# MCMC settings
ni <- 1200
nb <- 200
nt <- 2
nc <- 3
# Start Markov chains
out <- bugs(data = win.data, inits, parameters, "nmix1.txt", n.thin=nt,
n.chains=nc, n.burnin=nb, n.iter=ni, debug = TRUE)
Overall, the results from this model looks ok, but there are long autocorrelation lags for beta0 and beta1. The estimate of beta1 also seems a bit off(~= -0.4), so you might want to recheck the Bugs model specification, so that it is matching the simulation model (i.e. that you are fitting the correct statistical model). At the moment, I'm not sure that it does, but I don't have the time to check further right now.
I got the same message trying to pass a factor to OpenBUGS. Like so,
Ndata <- list(yrs=N$yrs, site=N$site), ... )
The variable "site" was not passed by the "bugs" function. It simply was not in list passed
to OpenBUGS
I solved the problem by passing site as numeric,
Ndata <- list(yrs=N$yrs, site=as.numeric(N$site)), ... )