I need to numerically integrate the following:
I tried to use cubature and pracma but they don't seem to support functional integration limits. I found a attempt to use cubature by:
library(cubature)
integrand <- function(arg) {
x <- arg[1]
y <- arg[2]
z <- arg[3]
w <- arg[4]
v<- arg[5]
ff <- dnorm(x, 10,2)*dnorm(y, 10,2)*dnorm(z, 10,2)*dnorm(w, 10,2)* dnorm(v, 10,2)* (x+y+z+w+v<=52)
return(ff)
}
R <- cuhre(f = integrand,
lowerLimit=c(0,0,0,0,0),
upperLimit=c(20,20,20,20,20),
relTol = 1e-5, absTol= 1e-5)
But the author doesn't guarantee that it's correct to do it.
Is there a way to numerically integrate multiple integrals with functional limits in R?
The domain of integration is the canonical simplex scaled by the factor 42. To evaluate an integral on a simplex, use the SimplicialCubature package:
integrand <- function(arg) {
x <- arg[1]
y <- arg[2]
z <- arg[3]
w <- arg[4]
v <- arg[5]
dnorm(x, 10, 2) *
dnorm(y, 10, 2) *
dnorm(z, 10, 2) *
dnorm(w, 10, 2) *
dnorm(v, 10, 2)
}
library(SimplicialCubature)
Simplex <- 42 * CanonicalSimplex(5)
Here is the command to run:
adaptIntegrateSimplex(integrand, S = Simplex)
# $integral
# [1] 0.03252553
#
# $estAbsError
# [1] 0.3248119
#
# $functionEvaluations
# [1] 9792
#
# $returnCode
# [1] 1
#
# $message
# [1] "error: maxEvals exceeded - too many function evaluations"
The algorithm has reached the maximal number of function evaluations and the estimated absolute error is 0.3248119, while the estimated value of the integral is 0.03252553. This is a big error.
We can increase the maximum number of function evaluations allowed. Taking 1e6, the computation is a bit slow and we get:
adaptIntegrateSimplex(integrand, S = Simplex, maxEvals = 1e6)
# $integral
# [1] 0.03682535
#
# $estAbsError
# [1] 0.001004083
#
# $functionEvaluations
# [1] 999811
#
# $returnCode
# [1] 1
#
# $message
# [1] "error: maxEvals exceeded - too many function evaluations"
The estimated error has decreased to 0.001004083, quite better.
Note that we can approximate this integral by using simulations, because this integral is the measure of the simplex under a multivariate normal distribution:
library(mvtnorm)
Sigma <- 2^2 * diag(5)
Mean <- rep(10, 5)
set.seed(666)
sims <- rmvnorm(1e6, mean = Mean, sigma = Sigma)
f <- function(X){ # test whether 0 < x < 42, 0 < x + y < 42, 0 < x + y + z < 42, ...
all(X > 0 & cumsum(X) < 42)
}
mean(apply(sims, 1, f))
# 0.037083
I am using R optim() function to estimate set of parameters which optimize user defined function shown below. But optim() out put is:
Error in optim(pstart, llAgedepfn, method = "L-BFGS-B", upper = up, lower = lo) :
L-BFGS-B needs finite values of 'fn'
Please help. The complete script is shown below:
dataM<-cbind(c(1.91,0.29,0.08,0.02,0.01,0.28,0.45,0.36,0.42,0.17,0.16,0.06,0.17,0.17,0.12),
c(0.27,4.54,0.59,0.05,0.04,0.13,0.48,0.68,0.66,0.18,0.11,0.06,0.08,0.08,0.08),
c(0.07,0.57,4.48,0.48,0.02,0.05,0.09,0.43,0.78,0.52,0.17,0.10,0.05,0.05,0.14),
c(0.02,0.04,0.44,4.34,0.36,0.09,0.07,0.11,0.41,0.77,0.43,0.10,0.03,0.04,0.14),
c(0.01,0.04,0.01,0.36,2.20,0.46,0.19,0.15,0.19,0.34,0.62,0.30,0.09,0.03,0.22),
c(0.22,0.11,0.05,0.09,0.45,0.91,0.61,0.43,0.37,0.26,0.41,0.63,0.29,0.16,0.15),
c(0.31,0.35,0.07,0.05,0.16,0.54,0.81,0.59,0.48,0.36,0.33,0.43,0.47,0.26,0.20),
c(0.22,0.45,0.29,0.08,0.11,0.34,0.53,0.85,0.71,0.39,0.27,0.26,0.26,0.28,0.38),
c(0.22,0.36,0.44,0.26,0.12,0.24,0.36,0.59,0.91,0.61,0.35,0.28,0.20,0.22,0.29),
c(0.09,0.10,0.30,0.49,0.22,0.17,0.28,0.33,0.62,0.80,0.52,0.29,0.20,0.11,0.46),
c(0.10,0.07,0.12,0.32,0.48,0.32,0.30,0.27,0.42,0.61,0.78,0.47,0.33,0.23,0.49),
c(0.04,0.04,0.06,0.08,0.24,0.53,0.41,0.28,0.36,0.36,0.50,0.67,0.51,0.19,0.47),
c(0.10,0.05,0.04,0.02,0.07,0.23,0.43,0.26,0.23,0.23,0.33,0.48,0.75,0.51,0.49),
c(0.05,0.04,0.03,0.05,0.02,0.10,0.19,0.22,0.21,0.10,0.18,0.14,0.40,0.79,0.82),
c(0.03,0.02,0.03,0.03,0.06,0.04,0.06,0.12,0.11,0.18,0.16,0.14,0.16,0.34,1.26)
)
NormCM <- dataM/eigen(CMWkday)$values[1] #Normalizing the contact mtrix - divide by the largest eigen value
w <- c(495,528,548,603,617,634,720,801,957,937,798,755,795,1016,2469)
g2 <- c(770,622,726,559,410,547,564,472,399,397,340,308,337,91,84)
h2 <- c(269,426,556,430,271,284,303,207,194,181,126,106,74,24,23)
z2 <- h2/g2
g1 <- c(774,527,665,508,459,539,543,492,402,412,365,342,213,146,152)
h1 <- c(56,31,84,173,103,85,123,70,71,80,55,25,18,12,26)
z1 <- h1/g1
#### Normal loglikelihood #########
llnormfn <- function(q) {
tol <- 1e-9
final.size.start <- 0.8
zeta <- rep(final.size.start, nrow(NormCM))
last.zeta <- rep(0, nrow(NormCM))
first.run <- T
current.diff <- tol+1
loglik <- 0
while (current.diff > tol) {
zeta <- 1-exp(-(q*(zeta%*%NormCM)))
current.diff <- sum(abs(last.zeta-zeta))
last.zeta <-zeta
}
mu <- c(zeta)
zigma <- z1*(1-z1)/g1 + (z1+mu)*(1-(z1+mu))/g2
logliknorm <- -sum((((z2-z1)-mu)**2)/2*zigma + 0.5*log(2*pi*zigma))
return(logliknorm)
}
pstart <- c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)
up <- c(5,5,5,5,5,5,5,5,5,5,5,5,5,5,5)
lo <- c(0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1)
estm <- optim(pstart, llnormfn, method = "L-BFGS-B", upper = up, lower = lo )
Your llnormfn doesn't return a finite value for all values of its parameters within the range. For example at the upper limit:
> llnormfn(up)
[1] NaN
Warning message:
In log(2 * pi * zigma) : NaNs produced
Because zigma must be less than zero here.
If you restrict the range a bit you can eventually find a spot where it does work...
> llnormfn(up-2)
[1] NaN
Warning message:
In log(2 * pi * zigma) : NaNs produced
> llnormfn(up-3)
[1] 42.96818
Let's check it works at the lower range:
> llnormfn(lo)
[1] 41.92578
that looks fine. So either you've set that upper limit outside the computationally valid range of your function, or you've got a bug in your llnormfn function, or both, or something else.
If you do run the optimisation with a reduced upper bound you do get convergence:
> estm <- optim(pstart, llnormfn, method = "L-BFGS-B", upper = up-3, lower = lo )
> estm
$par
[1] 1.9042672 1.0891264 0.9916916 0.6208685 1.2413983 1.4822433 1.1243878
[8] 1.5224263 1.3686933 1.4876350 1.6231518 2.0000000 2.0000000 2.0000000
[15] 2.0000000
$value
[1] 38.32182
$counts
function gradient
23 23
$convergence
[1] 0
$message
[1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
Although you might notice some of those parameters are at the upper value (2.0) which is an alarm bell.
Check your function behaves sensibly for its input values - try fixing all-but-one and plotting how llnormfn behaves while varying one. I just had a quick look and the function does not look smooth at all, with lots of discontinuities, so I doubt BFGS is a good method for optimising.
e.g varying the fifth parameter between 0.1 and 2:
> s = seq(0.1,2,len=300)
> ss = sapply(1:length(s),function(i){ll=lo;ll[5]=s[i];llnormfn(ll)})
> plot(s,ss)
gives:
I'm having some trouble using optim() in R to solve for a likelihood involving an integral. I get an error that says "Error in optim(par = c(0.1, 0.1), LLL, method = "L-BFGS-B", lower = c(0, : L-BFGS-B needs finite values of 'fn'". Below is my code:
s1=c(1384,1,1219,1597,2106,145,87,1535,290,1752,265,588,1188,160,745,237,479,39,99,56,1503,158,916,651,1064,166,635,19,553,51,79,155,85,1196,142,108,325
,135,28,422,1032,1018,128,787,1704,307,854,6,896,902)
LLL=function (par) {
integrand1 <- function(x){ (x-s1[i]+1)*dgamma(x, shape=par[1], rate=par[2]) }
integrand2 <- function(x){ (-x+s1[i]+1)*dgamma(x, shape=par[1],rate=par[2]) }
likelihood = vector()
for(i in 1:length(s1)) {likelihood[i] =
log( integrate(integrand1,lower=s1[i]-1,upper=s1[i])$value+ integrate(integrand2,lower=s1[i],upper=s1[i]+1)$value )
}
like= -sum(likelihood)
return(like)
}
optim(par=c(0.1,0.1),LLL,method="L-BFGS-B", lower=c(0,0))
Thanks for your help.
Best,
YM
The objective function evaluated at the lower bounds of the parameters you provided is infinity.
LLL(c(0,0))
# [1] Inf
That's why L-BFGS-B fails. Try a different lower bound, e.g., c(0.001,0.001) and you will get a solution.
optim(par=c(0.1,0.1),LLL,method="L-BFGS-B", lower=c(0.001,0.001))
$par
[1] 0.6865841 0.0010000
$value
[1] 369.5532
$counts
function gradient
14 14
$convergence
[1] 0
$message
[1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
To get the 95% confidence intervals for the parameters try this:
res <- optim(par=c(0.1,0.1),LLL,method="L-BFGS-B", lower=c(0.005,0.005), hessian=TRUE)
n <- length(s1)
res$par # solution
# [1] 1.900928 0.005000
res$par - 1.96*sqrt(diag(solve(res$hessian)))/n # lower limit for 95% confint
# [1] 1.888152372 0.004963286
res$par + 1.96*sqrt(diag(solve(res$hessian)))/n # upper limit for 95% confint
# [1] 1.913703040 0.005036714
refer to this article: http://www.ms.uky.edu/~mai/sta321/MLEexample.pdf
I am trying to use http://rss.acs.unt.edu/Rdoc/library/stats/html/constrOptim.html in R to do optimization in R with some given linear constraints but not able to figure out how to set up the problem.
For example, I need to maximize $f(x,y) = log(x) + \frac{x^2}{y^2}$ subject to constraints $g_1(x,y) = x+y < 1$, $g_2(x,y) = x > 0$ and $g_3(x,y) = y > 0$. How do I do this in R? This is just a hypothetical example. Do not worry about its structure, instead I am interested to know how to set this up in R.
thanks!
Setting up the function was trivial:
fr <- function(x) { x1 <- x[1]
x2 <- x[2]
-(log(x1) + x1^2/x2^2) # need negative since constrOptim is a minimization routine
}
Setting up the constraint matrix was problematic due to a lack of much documentation, and I resorted to experimentation. The help page says "The feasible region is defined by ui %*% theta - ci >= 0". So I tested and this seemed to "work":
> rbind(c(-1,-1),c(1,0), c(0,1) ) %*% c(0.99,0.001) -c(-1,0, 0)
[,1]
[1,] 0.009
[2,] 0.990
[3,] 0.001
So I put in a row for each constraint/boundary:
constrOptim(c(0.99,0.001), fr, NULL, ui=rbind(c(-1,-1), # the -x-y > -1
c(1,0), # the x > 0
c(0,1) ), # the y > 0
ci=c(-1,0, 0)) # the thresholds
For this problem there is a potential difficulty in that for all values of x the function goes to Inf as y -> 0. I do get a max around x=.95 and y=0 even when I push the starting values out to the "corner", but I'm somewhat suspicious that this is not the true maximum which I would have guessed was in the "corner".
EDIT:
Pursuing this I reasoned that the gradient might provide additional "direction" and added a gradient function:
grr <- function(x) { ## Gradient of 'fr'
x1 <- x[1]
x2 <- x[2]
c(-(1/x[1] + 2 * x[1]/x[2]^2),
2 * x[1]^2 /x[2]^3 )
}
This did "steer" the optimization a bit closer to the c(.999..., 0) corner, instead of moving away from it, as it did for some starting values. I remain somewhat disappointed that the process seems to "head for the cliff" when the starting values are close to the center of the feasible region:
constrOptim(c(0.99,0.001), fr, grr, ui=rbind(c(-1,-1), # the -x-y > -1
c(1,0), # the x > 0
c(0,1) ), # the y > 0
ci=c(-1,0, 0) )
$par
[1] 9.900007e-01 -3.542673e-16
$value
[1] -7.80924e+30
$counts
function gradient
2001 37
$convergence
[1] 11
$message
[1] "Objective function increased at outer iteration 2"
$outer.iterations
[1] 2
$barrier.value
[1] NaN
Note: Hans Werner Borchers posted a better example on R-Help that succeeded in getting the corner values by setting the constraint slightly away from the edge:
> constrOptim(c(0.25,0.25), fr, NULL,
ui=rbind( c(-1,-1), c(1,0), c(0,1) ),
ci=c(-1, 0.0001, 0.0001))
$par
[1] 0.9999 0.0001