Related
I am new to R. I want to do some parameters estimation by using Maximum Likelihood Estimation.
Here is my attempt:
The data are
my_data = c(0.1,0.2,1,1,1,1,1,2,3,6,7,11,12,18,18,18,18,18,21,32,36,40,
45,45,47,50,55,60,63,63,67,67,67,67,72,75,79,82,82,83,
84,84,84,85,85,85,85,85,86,86)
and
lx <- function(p,x){
l <- p[1]
b <- p[2]
a <- p[3]
n <- length(x)
lnL <- n*log(l)+n*log(b)+n*log(a)+(b-1)*sum(log(x))+(a-1)*sum(log(1+l*x^b))+n-sum(1+l*x^b)
return(-lnL)
}
Note: l is λ, b is β, and a is α.
And here is the optim function
optim(p=c(1,1,1),fn = lx, method = "L-BFGS-B",
lower = c(0.0001, 0.0001, 0.0001),
control = list(), hessian = FALSE, x = my_data)
After I run this code, I get an error message:
Error in optim(p = c(1, 1, 1), fn = lx, method = "L-BFGS-B", lower = c(1e-04, :
objective function in optim evaluates to length 50 not 1
What's wrong with my code? Can you help me to fix it? Thanks in advance!
Instead of a log-likelihood, use MASS::fitdistr.
#
# Power Generalized Weibull distribution
#
# x > 0, alpha, beta, lambda > 0
#
dpowergweibull <- function(x, alpha, beta, lambda){
f1 <- lambda * beta * alpha
f2 <- x^(beta - 1)
f3 <- (1 + lambda * x^beta)^(alpha - 1)
f4 <- exp(1 - (1 + lambda * x^beta)^alpha)
f1 * f2 * f3 * f4
}
ppowergweibull <- function(q, alpha, beta, lambda){
1 - exp(1 - (1 + lambda * q^beta)^alpha)
}
my_data <- c(0.1,0.2,1,1,1,1,1,2,3,6,7,11,12,18,18,18,18,18,21,32,36,40,
45,45,47,50,55,60,63,63,67,67,67,67,72,75,79,82,82,83,
84,84,84,85,85,85,85,85,86,86)
start_par <- list(alpha = 0.1, beta = 0.1, lambda = 0.1)
y1 <- MASS::fitdistr(my_data, dpowergweibull, start = start_par),
start_par2 <- list(shape = 1, rate = 1)
y2 <- MASS::fitdistr(my_data, "gamma", start = start_par2)
hist(my_data, freq = FALSE)
curve(dpowergweibull(x, y1$estimate[1], y1$estimate[2], y1$estimate[3]),
from = 0.1, to = 90, col = "red", add = TRUE)
curve(dgamma(x, y2$estimate[1], y2$estimate[2]),
from = 0.1, to = 90, col = "blue", add = TRUE)
On this page a SIR model in R is shown, https://rstudio-pubs-static.s3.amazonaws.com/382648_93783f69a2fd4df98ade8751c21abbad.html, the solution of it and the optimization of the $\beta$ and $\gamma$ parameter is also executed. (see below)
In this code both $\beta$ and $\gamma$ are assumed to be constant over the whole time.
What I want is to to have a time varying beta, it does not need to change each day, we have fourteen days of data, it would suffice if it would change after seven days, i.e we have $\beta_1$ for days[0:6]
and $\beta_2$ for days[7:13] and then do the optimization algorithm like below for both, i.e. in the end I want to receive a vector for the optimal values of (\beta_1, \beta_2, \gamma) whereas gamma stayed constant the whole time. Would it be possible with a modification of the code given? If yes could someone help how to modify it to receive the desired output.
day cases
0 1
1 6
2 26
3 73
4 222
5 293
6 258
7 236
8 191
9 124
10 69
11 26
12 11
13 4
#here beta is assumed to be constant
sir_equations <- function(time, variables, parameters) {
with(as.list(c(variables, parameters)), {
dS <- -beta * I * S
dI <- beta * I * S - gamma * I
dR <- gamma * I
return(list(c(dS, dI, dR)))
})
}
parameters_values <- c(
beta = 0.004, # infectious contact rate (/person/day)
gamma = 0.5 # recovery rate (/day)
)
initial_values <- c(
S = 999, # number of susceptibles at time = 0
I = 1, # number of infectious at time = 0
R = 0 # number of recovered (and immune) at time = 0
)
time_values <- seq(0, 10) # days
sir_values_1 <- ode(
y = initial_values,
times = time_values,
func = sir_equations,
parms = parameters_values
)
sir_values_1
sir_values_1 <- as.data.frame(sir_values_1)
sir_values_1
sir_1 <- function(beta, gamma, S0, I0, R0, times) {
require(deSolve) # for the "ode" function
# the differential equations:
sir_equations <- function(time, variables, parameters) {
with(as.list(c(variables, parameters)), {
dS <- -beta * I * S
dI <- beta * I * S - gamma * I
dR <- gamma * I
return(list(c(dS, dI, dR)))
})
}
# the parameters values:
parameters_values <- c(beta = beta, gamma = gamma)
# the initial values of variables:
initial_values <- c(S = S0, I = I0, R = R0)
# solving
out <- ode(initial_values, times, sir_equations, parameters_values)
# returning the output:
as.data.frame(out)
}
sir_1(beta = 0.004, gamma = 0.5, S0 = 999, I0 = 1, R0 = 0, times = seq(0, 10))
flu <- read.table("https://uc8f29367cc06ca2f989ead2cd8e.dl.dropboxusercontent.com/cd/0/inline/BNzBF_deK5fmfGXWCB9a5YO95JkiLNFRc2Jq1w-qGNqQMXxnpn-yL-cAVoE1JQG7D4Od_SkG8YVKesqBr7wMoQHHSTNbHU_hhyahK7up0EDEft-u7Vf4xZJvu4cTNuUjXFb-QaHlOfBPnFhKspeb7RbO/file", header = TRUE)
predictions <- sir_1(beta = 0.004, gamma = 0.5, S0 = 999, I0 = 1, R0 = 0, times = flu$day)
predictions
model_fit <- function(beta, gamma, data, N = 763, ...) {
I0 <- data$cases[1] # initial number of infected (from data)
times <- data$day # time points (from data)
# model's predictions:
predictions <- sir_1(beta = beta, gamma = gamma, # parameters
S0 = N - I0, I0 = I0, R0 = 0, # variables' intial values
times = times) # time points
# plotting the observed prevalences:
with(data, plot(day, cases, ...))
# adding the model-predicted prevalence:
with(predictions, lines(time, I, col = "red"))
}
predictions <- sir_1(beta = 0.004, gamma = 0.5, S0 = 999, I0 = 1, R0 = 0, times = flu$day)
predictions
ss <- function(beta, gamma, data = flu, N = 763) {
I0 <- data$cases[1]
times <- data$day
predictions <- sir_1(beta = beta, gamma = gamma, # parameters
S0 = N - I0, I0 = I0, R0 = 0, # variables' intial values
times = times) # time points
sum((predictions$I[-1] - data$cases[-1])^2)
}
ss(beta = 0.004, gamma = 0.5)
beta_val <- seq(from = 0.0016, to = 0.004, le = 100)
ss_val <- sapply(beta_val, ss, gamma = 0.5)
min_ss_val <- min(ss_val)
min_ss_val
beta_hat <- beta_val[ss_val == min_ss_val]
beta_hat
plot(beta_val, ss_val, type = "l", lwd = 2,
xlab = expression(paste("infectious contact rate ", beta)),
ylab = "sum of squares")
# adding the minimal value of the sum of squares:
abline(h = min_ss_val, lty = 2, col = "grey")
# adding the estimate of beta:
abline(v = beta_hat, lty = 2, col = "grey")
ss(beta = 0.004, gamma = 0.5)
ss2 <- function(x) {
ss(beta = x[1], gamma = x[2])
}
ss2(c(0.004, 0.5))
starting_param_val <- c(0.004, 0.5)
ss_optim <- optim(starting_param_val, ss2)
This is certainly possible. All you need is an if statement in your gradient function:
beta <- if (time<6) beta1 else beta2
or
beta <- ifelse(time<6, beta1, beta2))
and make sure your parameter vector includes both beta1 and beta2.
I'm trying to run a Bayesian pooled model in jags through R and getting an error message
I found from people who have encountered similar problems that it could be triggered by values of the priors, negative value, log of negative, syntax errors etc. I have eliminated all of these but the error persists.
## just for the prediction
pred.jac <- seq(min(test.bayes$Latitude), max(test.bayes$Latitude), 10)
data = list(
jac = test.bayes$Jaccard,
lat = test.bayes$Latitude,
pred.jac = pred.jac)
inits = list(
list(alpha = 1, beta = 2.5, sigma = 50),
list(alpha = 2, beta = 1.5, sigma = 20),
list(alpha = 3, beta = 0.75, sigma = 10))
{
sink("BetaPooledJAGS.R")
cat("
model{
# priors
alpha ~ dnorm(0, 0.0001)
beta ~ dnorm(0, 0.0001)
sigma ~ dunif(0, 10)
# likelihood
for (i in 1:length(jac)) {
mu[i] <- alpha + beta * lat[i]
a[i] <- ((1 - mu[i]) / (sigma^2) - 1 / mu[i]) * mu[i]^2
b[i] <- alpha * (1 / mu[i] - 1)
jac[i] ~ dbeta(a[i], b[i])
}
# predicted jaccard as derived quantities
for (i in 1:length(pred.jac)) {
mu_pred[i] <- alpha + beta * lat[i]
mu_pred1[i] <- exp(mu_pred[i])
}
}
",fill = TRUE)
sink()
}
n.adapt = 3000
n.update = 5000
n.iter = 5000
jm.pooled = jags.model(file="BetaPooledJAGS.R", data = data, n.adapt = n.adapt, inits = inits, n.chains = length(inits))
When I run the code, I get the error below:
Error in jags.model(file = "BetaPooledJAGS.R", data = data, n.adapt = n.adapt, : Error in node jac[1] Invalid parent values
Here's the link to a subset of my data.
https://fil.email/IuwgYhKs
You're getting negative values for b with those initials if lat is positive, and b must be > 0 in a beta distribution, in JAGS, and more generally.
E.g. using initials from inits[[1]]:
mu = 1 + 2.5*lat
assuming lat is positive, then mu > 1
b = 1 * (1/mu-1)
And 1/mu < 1 if mu>1, so 1/mu - 1 < 0.
Therefore
b = 1*-ve so b<0
pval.dist.sim = function(n, sigma_x, rho, reps = 2500){
p = 5; sigma = sqrt(2)
beta = c(0.5, 0.5, 0, 0.25, 0)
mu = 10
# generate vector for pvals
pval.list = numeric(reps)
for(r in 1:reps){
# generate design matrix
X = gen_X(n = n, p = 5, rho = rho, sigma_x = sigma_x, mu = mu)
# generate the XtXinv portion of equation
XtXinv = qr.solve(crossprod(X))
sqrtXtXinv55 = sqrt(XtXinv[5,5])
y = X %*% beta + rnorm(n = n)
beta.hat = XtXinv %*% crossprod(X, y)
sE = sqrt(sum((y - X %*% beta.hat)^2)/(n-p))
t.val = beta.hat[3]/(sE * sqrtXtXinv55)
pval.list[r] = 2 * pt(-abs(t.val), df = n - p)
}
return(pval.list)
}
Above is the pval.dist simulation. I need to run this function to build my p.values to build my power curve
set.seed(3701)
# givens
p = 5; d = 2; mu = 10; sigmasqrd = 2; reps = 2500
n.list = seq(from=10, to=150, by=10)
# create a vector for the estimates of the power
est.power = numeric(length(n.list))
# create a vector for the left endpoints of the 95% CI
LB.list = numeric(length(n.list))
# create a vector for the right endpoints of the 95% CI
UB.list = numeric(length(n.list))
for(j in 1:length(n.list)){
# perform the test reps times
pvals = pval.dist.sim(n = n.list[j], sigma_x = 1.5, rho = 0.2, reps = reps )
# record the simulated estimate of the power
est.power[j] = mean(pvals<0.05)
# compute the 95% conf int
bounds = binom.test(x=sum(pvals < 0.05), n = reps, conf.level = 0.95)$conf.int[1:2]
LB.list[j] = bounds[1]
UB.list[j] = bounds[2]
}
## plot the power curve estimation
plot(n.list, est.power, t = "l", xlab = "n",ylab = "Power")
I am having the issue that my pvalues, when plugged in, are drastically low. I am getting values in the single digit percentage. What am I doing wrong?
I am looking to optimize the fit of a model that describes the amount of litter collected in a network of .5m^2 'litter traps' in a plot of mapped trees of known diameter and species. The model of choice has two factors, allometric scaling of litter production, and exponential decay in litter travel distance.
tree1.litter = alpha*gamma^2 * DBH^Beta/(2*pi) * exp(-gamma*z-delta*DBH)
However, our trap data contains input from multiple trees (this is the "missing level" referred to in title):
Obs.Litter = tree1.litter + tree2.litter + ... + treej.litter + error
So far had very mixed results on even simulated data. It seems like with enough combinations of diameters and distances the functions should be somewhat well constrained. This analysis has been performed in an article I'm copy-catting. I've also tried the analysis on the log(Obs.Litter), which I think is the way to go. But I am not sure that the way I've coded the log version would have resulted in something that you would expect to perform any better.
At this point I suppose I'm just looking for any sort of advice (code based or conceptual) from someone more experienced with fitting nonlinear regressions or model fitting problems with this type of "hidden process". Code for data simulation and the various likelihoods are included below. I've had a bit more success with estimating these parameters with a Bayesian hierarchical model in OpenBUGS, with informative priors only.
library(plyr)
########################
##Generate Data#########
########################
alpha = 5
Beta = 2
gamma = .2
delta = .02
n = 600 #Number of trees
N.trap = 45 #Number of litter traps
D = rlnorm(n, 2)+5 #generate diameters
Z = runif(n, 0, 25) #generate distances
trap.id = sort(sample(1:N.trap, size = n, replace = T)) #assign trees to traps
tree.lit = (2*pi)^-1*alpha*gamma^2*D^Beta * exp(-gamma*Z-delta*D) #generate litter
log.tree.lit = -(2*pi) + log(alpha) + 2*log(gamma) + Beta*DBH -gamma*Z - delta*D
litter = data.frame(D=D, Z = Z, trap.id = trap.id, tree.lit = tree.lit)
data = ddply(litter, .(trap.id), summarize, trap.lit = sum(tree.lit), n.trees=length(trap.id) )
trap.lit = data[,2]
#####################################
##### Maximum Likelihood Optimization
#####################################
library(bbmle)
log.Litter.Func<-function(alpha, Beta, gamma, delta, sigma, D, Z, N.trap, trap.id, Obs.Litter){
log.Expected.Litter.tree = -log(2*pi) + log(alpha) + 2*log(gamma) + Beta*log(D) -gamma*Z - delta*D
log.Expected.Litter.trap = rep(0, N.trap)
for(i in 1:N.trap){
log.Expected.Litter.trap[i] <- sum(exp(log.Expected.Litter.tree[trap.id==i]))
}
-sum(dlnorm(log(Obs.Litter), log.Expected.Litter.trap, sd=sigma, log=T))
}
Litter.Func<-function(alpha, Beta, gamma, delta, sigma, D, Z, N.trap, trap.id, Obs.Litter){
Expected.Litter.tree = 1/(2*pi) * alpha * gamma^2 * D^Beta *exp(-gamma*Z - delta*D)
Expected.Litter.trap = rep(0, N.trap)
for(i in 1:N.trap){
Expected.Litter.trap[i] <- sum(Expected.Litter.tree[trap.id==i])
}
-sum(dnorm(Obs.Litter, Expected.Litter.trap, sd=sigma, log=T))
}
log.fit<-mle2(log.Litter.Func,
start = list(alpha = 5,gamma = .2,Beta = 2,delta = .02, sigma = 1),
#upper = list(alpha = 20,gamma = 1,Beta = 4,delta = .2,sigma = 20),
#lower = list(alpha = .002,gamma = .002,Beta = .0002,delta = .000000002,sigma = .020),
#method="L-BFGS-B",
data=list(D=D, Z=Z, N.trap=N.trap, trap.id=litter$trap.id, Obs.Litter=trap.lit)
)
fit<-mle2(Litter.Func,
start = list(alpha = 5,gamma = .2,Beta = 2,delta = .02, sigma = 1),
#upper = list(alpha = 20,gamma = 1,Beta = 4,delta = .2,sigma = 20),
#lower = list(alpha = .002,gamma = .002,Beta = .0002,delta = .000000002,sigma = .020),
#method="L-BFGS-B",
data=list(D = D,Z = Z,N.trap=N.trap, trap.id=litter$trap.id,Obs.Litter = trap.lit)
)