How to double integrate with +inf? - r

So i have this code in R that tries to double integrate a function f(x,y) using integral2, where one domain is (0.5 , +inf), but integral2 doesn't support it (because of that infinite). My question is: do you know a way to make it work?
fprob8 <- function(a , b)
{
f <- function(x , y)
{
densGamma <- function(x)
{
numitor <- b ^ a * fgam(a)
numarator <- x ^ (a - 1) * exp(-x / b)
return (numarator / numitor)
}
densBeta <- function(x)
{
numitor <- fbet(a , b)
numarator <- (1 - x) ^ (b - 1) * x ^ (a - 1)
return (numarator/numitor)
}
return (densGamma(x) * densBeta(y))
}
ymin <- function(x) # limita superioara pentru cea de-a doua integrala
{
return (min(x - 0.5),1)
}
I <- integral2(f, 0.5 , Inf , ymin , 1)
return(I)
}

Use the cubature package. It evaluates multiple integrals, allowing infinite bounds.
Here is an example:
library(cubature)
f <- function(x) exp(-x[1]-x[2])
pcubature(f, c(0,0), c(Inf,Inf))$integral
# 1
library(pracma)
f2 <- function(x,y) f(c(x,y))
integral2(f2, 0, 1000, 0, 1000, vectorized = FALSE)$Q
# 1

Related

Iterative optimization of alternative glm family

I'm setting up an alternative response function to the commonly used exponential function in poisson glms, which is called softplus and defined as $\frac{1}{c} \log(1+\exp(c \eta))$, where $\eta$ corresponds to the linear predictor $X\beta$
I already managed optimization by setting parameter $c$ to arbitrary fixed values and only searching for $\hat{\beta}$.
BUT now for the next step I have to optimize this parameter $c$ as well (iteratively changing between updated $\beta$ and current $c$).
I tried to write a log-lik function, score function and then setting up a Newton Raphson optimization (using a while loop)
but I don't know how to seperate the updating of c in an outer step and updating \beta in an inner step..
Are there any suggestions?
# Response function:
sp <- function(eta, c = 1 ) {
return(log(1 + exp(abs(c * eta)))/ c)
}
# Log Likelihood
l.lpois <- function(par, y, X){
beta <- par[1:(length(par)-1)]
c <- par[length(par)]
l <- rep(NA, times = length(y))
for (i in 1:length(l)){
l[i] <- y[i] * log(sp(X[i,]%*%beta, c)) - sp(X[i,]%*%beta, c)
}
l <- sum(l)
return(l)
}
# Score function
score <- function(y, X, par){
beta <- par[1:(length(par)-1)]
c <- par[length(par)]
s <- matrix(rep(NA, times = length(y)*length(par)), ncol = length(y))
for (i in 1:length(y)){
s[,i] <- c(X[i,], 1) * (y[i] * plogis(c * X[i,]%*%beta) / sp(X[i,]%*%beta, c) - plogis(c * X[i,]%*%beta))
}
score <- rep(NA, times = nrow(s))
for (j in 1:length(score)){
score[j] <- sum(s[j,])
}
return(score)
}
# Optimization function
opt <- function(y, X, b.start, eps=0.0001, maxiter = 1e5){
beta <- b.start[1:(length(b.start)-1)]
c <- b.start[length(b.start)]
b.old <- b.start
i <- 0
conv <- FALSE
while(conv == FALSE){
eta <- X%*%b.old[1:(length(b.old)-1)]
s <- score(y, X, b.old)
h <- numDeriv::hessian(l.lpois,b.old,y=y,X=X)
invh <- solve(h)
# update
b.new <- b.old + invh %*% s
i <- i + 1
# Test
if(any(is.nan(b.new))){
b.new <- b.old
warning("convergence failed")
break
}
# convergence reached?
if(sqrt(sum((b.new - b.old)^2))/sqrt(sum(b.old^2)) < eps | i >= maxiter){
conv <- TRUE
}
b.old <- b.new
}
eta <- X%*%b.new[1:(length(b.new)-1)]
# covariance
invh <- solve(numDeriv::hessian(l.lpois,b.new,y=y,X=X))
fitted <- sp(eta, b.new[length(b.new)])
result <- list("coefficients" = c(beta = b.new),
"fitted.values" = fitted,
"covariance" = invh)
}
# Running fails ..
n <- 100
x <- runif(n, 0, 1)
Xdes <- cbind(1, x)
eta <- 1 + 2 * x
y <- rpois(n, sp(eta, c = 1))
opt(y,Xdes,c(0,1,1))
You have 2 bugs:
line 25:
(y[i] * plogis(c * X[i,]%*%beta) / sp(X[i,]%*%beta, c) - plogis(c * X[i,]%*%beta))
this returns matrix so you must convert to numeric:
as.numeric(y[i] * plogis(c * X[i,]%*%beta) / sp(X[i,]%*%beta, c) - plogis(c * X[i,]%*%beta))
line 23:
) is missing:
you have:
s <- matrix(rep(NA, times = length(y)*length(par), ncol = length(y))
while it should be:
s <- matrix(rep(NA, times = length(y)*length(par)), ncol = length(y))

Sampling a log-concave distribution using the adaptive rejection sampling method (R)

I am not very familiar with R. I have been trying to use the implementation of the adaptive rejection sampling method in R, in order to sample from the following distribution:
here is my R code:
library(ars)
g1 <- function(x,r){(1./r)*((1-x)^r)}
f1 <- function(x,a,k) {
add<-0
for(i in 1:k) {
add<- add+g1(x,i)
}
res <- (a* add)+(a-1)*log(x)+k*log(1-x)
return(res)
}
g2 <- function(x,r){(1-x)^(r-1)}
f1prima <- function(x,a,k) {
add<-0
for(i in 1:k) {
add<- add-g2(x,i)
}
res <- (a* add)+(a-1)/x-k/(1-x)
return(res)
}
mysample1<-ars(20,f1,f1prima,x=c(0.001,0.09),m=2,emax=128,lb=TRUE,xlb=0.0, ub=TRUE, xub=1,a=0.5,k=100)
The function is a log-concave, but I get different error messages when I run ars and fiddling around with the input parameters won't help here. Any suggestion would be appreciated.
First thing, which you already noticed is that your log-concave function is not very well defined at x=0 and x=1.0. So useful interval would be something like 0.01...0.99, not 0.0...1.0
Second, I don't like the idea to compute hundreds of terms in your summation term.
So, good idea might be to express it in following way, starting with derivative
S1N-1 qi is obviously geometric series and could be replaced with
(1-qN)/(1-q), where q=1-x.
This is derivative, so to get to similar term in function itself, just integrate it.
http://www.wolframalpha.com/input/?i=integrate+(1-q%5EN)%2F(1-q)+dq will return Gauss Hypergeometric function 2F1 plus logarithm
-qN+1 2F1(1, N+1; N+2; q)/(N+1) - log(1-q)
NB: It is the same integral as Beta before, but dealing with it was a bit more cumbersome
So, code to compute those terms:
library(gsl)
library(ars)
library(ggplot2)
Gauss2F1 <- function(a, b, c, x) {
ifelse(x >= 0.0 & x < 1.0, hyperg_2F1(a, b, c, x), hyperg_2F1(c - a, b, c, 1.0 - 1.0/(1.0 - x))/(1.0 - x)^b)
}
f1sum <- function(x, N) {
q <- 1.0 - x
- q^(N+1) * Gauss2F1(1, N+1, N+2, q)/(N+1) - log(1.0 - q)
}
f1sum.1 <- function(x, N) {
q <- 1.0 - x
res <- rep(0.0, length.out = length(x))
s <- rep(1.0, length.out = length(x))
for(k in 1:N) {
s <- s * q / as.numeric(k)
res <- res + s
}
res
}
f1 <- function(x, a, N) {
a * f1sum(x, N) + (a - 1.0)*log(x) + N*log(1.0 - x)
}
f1.1 <- function(x, a, N) {
a * f1sum.1(x, N) + (a - 1.0)*log(x) + N*log(1.0 - x)
}
f1primesum <- function(x, N) {
q <- 1.0 - x
(1.0 - q^N)/(1.0 - q)
}
f1primesum.1 <- function(x, N) {
res <- rep(0.0, length.out = length(x))
s <- rep(1.0, length.out = length(x))
for(k in 1:N) {
res <- res + s
s <- s * q
}
-res
}
f1prime <- function(x, a, N) {
a* f1primesum(x, N) + (a - 1.0)/x - N/(1.0 - x)
}
f1prime.1 <- function(x, a, N) {
a* f1primesum.1(x, N) + (a - 1.0)/x - N/(1.0 - x)
}
p <- ggplot(data.frame(x = c(0, 1)), aes(x = x)) +
stat_function(fun = f1, args = list(0.5, 100), colour = "#4271AE") +
stat_function(fun = f1.1, args = list(0.5, 100), colour = "#1F3552") +
scale_x_continuous(name = "X", breaks = seq(0, 1, 0.2), limits=c(0.001, 0.5)) +
scale_y_continuous(name = "F") +
ggtitle("Log-concave function")
p
As you can see, I've implemented both versions - one using summation and another using analytical form of sums. Computed data for a=0.5, N=100.
First, there is a bit of a difference between direct sum and 2F1 - I attribute it to precision loss in summation.
Second, more important result - function is NOT log-concave. No questions why ars() if failing left and right. See graph below

How to find the second derivative in R and while using newton's method with numerical derivation

The log-likelihood of the gamma distribution with scale parameter 1 can be written as:
(α−1)s−nlogΓ(α)
where alpha is the shape parameter and s=∑logXi is the sufficient statistic.
Randomly draw a sample of n = 30 with a shape parameter of alpha = 4.5. Using newton_search and make_derivative, find the maximum likelihood estimate of alpha. Use the moment estimator of alpha, i.e., mean of x as the initial guess. The log-likelihood function in R is:
x <- rgamma(n=30, shape=4.5)
gllik <- function() {
s <- sum(log(x))
n <- length(x)
function(a) {
(a - 1) * s - n * lgamma(a)
}
}
I have created the make_derivative function as follows:
make_derivative <- function(f, h) {
(f(x + h) - f(x - h)) / (2*h)
}
I also have created a newton_search function that incorporates the make_derivative function; However, I need to use newton_search on the second derivative of the log-likelihood function and I'm not sure how to fix the following code in order for it to do that:
newton_search2 <- function(f, h, guess, conv=0.001) {
set.seed(2)
y0 <- guess
N = 1000
i <- 1; y1 <- y0
p <- numeric(N)
while (i <= N) {
make_derivative <- function(f, h) {
(f(y0 + h) - f(y0 - h)) / (2*h)
}
y1 <- (y0 - (f(y0)/make_derivative(f, h)))
p[i] <- y1
i <- i + 1
if (abs(y1 - y0) < conv) break
y0 <- y1
}
return (p[(i-1)])
}
Hint: You must apply newton_search to the first and second derivatives (derived numerically using make_derivative) of the log-likelihood. Your answer should be near 4.5.
when I run newton_search2(gllik(), 0.0001, mean(x), conv = 0.001), I get double what the answer should be.
I re-wrote the code and it works perfectly now (even better than what I had originally wrote). Thanks to all who helped. :-)
newton_search <- function(f, df, guess, conv=0.001) {
set.seed(1)
y0 <- guess
N = 100
i <- 1; y1 <- y0
p <- numeric(N)
while (i <= N) {
y1 <- (y0 - (f(y0)/df(y0)))
p[i] <- y1
i <- i + 1
if (abs(y1 - y0) < conv) break
y0 <- y1
}
return (p[(i-1)])
}
make_derivative <- function(f, h) {
function(x){(f(x + h) - f(x - h)) / (2*h)
}
}
df1 <- make_derivative(gllik(), 0.0001)
df2 <- make_derivative(df1, 0.0001)
newton_search(df1, df2, mean(x), conv = 0.001)

Unused arguments within a function in R

Below is the code I have. It works for primitive functions, such as sin. However, when using a function called gllik, it returns an error in f(y0): unused argument (y0). I'm not sure how to correct this.
newton_search2 <- function(f, h, guess, conv=0.001) {
y0 <- guess
N = 100
i <- 1; y1 <- y0
p <- numeric(N)
while (i <= N) {
make_derivative <- function(f, h) {
(f(y0 + h) - f(y0 - h)) / (2*h)
}
y1 <- (y0 - (f(y0)/make_derivative(f, h)))
p[i] <- y1
i <- i + 1
if (abs(y1 - y0) < conv) break
y0 <- y1
}
return (p[(i-1)])
}
The gllik function is as follows:
x <- rgamma(n=30, shape=4.5)
gllik <- function() {
s <- sum(log(x))
n <- length(x)
function(a) {
(a - 1) * s - n * lgamma(a)
}
}
The code I used was:
newton_search2(gllik, 0.001, mean(x), conv = 0.001)
I'm not sure how to fix the error or get the correct answer which is supposed to be 4.5 (the maximum liklihood estimate of a).
The problem is that gllik does not take any arguments. Furthermore, it returns a function and not a value.
Perhaps what you want to to is the following?
gllik <- function(a) {
s <- sum(log(x))
n <- length(x)
return((a - 1) * s - n * lgamma(a))
}
EDIT: An alternative solution is to just use the returned function. While this type of construction is often elegant, it does seem like overkill in this case:
newton_search2(gllik(), 0.001, mean(x), conv = 0.001)

Approximation of a root using Halley's method in R

Alright, so I'm working on a small R program in order to do approximation using Halley's method. Basically I need to be able to approximate to 9 decimal places the value of 59^(1/7) using Halley's method.
What I have for the first order recurrence relation of Halley's method is this:
Xn+1 = Xn - [ f(Xn) / ( f'(Xn) - (f(Xn)f''(Xn)/2f'(Xn)) ) ]
So far this is the code I have.
halleysMethodApprox = function(ftnH, x0, m0, k0, tol = 1e-8, max.iter=2) {
x <- x0
m <- m0
k <- k0
fx <- ftnH(x, m, k)
iter <- 0
b <- fx[1]/(fx[2] - (fx[1]*fx[3])/(2*fx[2]) )
while( (abs(fx[1] - x) > tol) && (iter < max.iter) ) {
# calculate X(n+1)
b <- ( fx[1]/(fx[2] - ( (fx[1]*fx[3])/(2*fx[2]) ) ))
x <- x - b
fx <- ftnH(x, m-1, 0)
iter <- iter + 1
cat("At iteration", iter, "value of x is: ", x, "\n")
}
if( abs(x) > tol ) {
cat("Algorithm failed to converge\n")
return(NULL)
} else {
cat("Algorithm converged\n")
return(x)
}
}
and this function for generating a vector containing the function of x, and its derivatives.
ftnH = function (x, m, k) {
fx <- x^m - k
dfx <- (m*x^(m-1))
ddfx <- ((m-1)*m)*x^(m-2)
return (c(fx, dfx, ddfx))
}
print(halleysMethodApprox(ftnH, 59, (1/7), 0))
I'm not quite sure how I am supposed to numerically approximate 59^(1/7) using the above definition for Halley's method.
If you want to calculate x=59^(1/7), your function for using Halley's method is f(x) = x^7-59 = 0. And there were a few typos and minor errors in your code. Here's a corrected version:
halleysMethodApprox = function(ftnH, x0, m0, k0, tol = 1e-8, max.iter=100) {
# x0: starting estimate, eqn: x = k0^(1/m0)
x <- x0
m <- m0
k <- k0
fx <- ftnH(x0, m0, k0)
iter <- 0
b <- fx[1]/(fx[2] - (fx[1]*fx[3])/(2*fx[2]) )
while( (abs(fx[1]) > tol) && (iter < max.iter) ) {
# calculate X(n+1)
b <- ( fx[1]/(fx[2] - ( (fx[1]*fx[3])/(2*fx[2]) ) ))
x <- x - b
fx <- ftnH(x, m0, k0)
iter <- iter + 1
cat("At iteration", iter, "value of x is: ", x, "\n")
}
if(abs(fx[1]) > tol) {
cat("Algorithm failed to converge\n")
return(NULL)
} else {
cat("Algorithm converged\n")
return(x)
}
}
ftnH = function (x, m, k) {
fx <- x^m - k
dfx <- (m*x^(m-1))
ddfx <- ((m-1)*m)*x^(m-2)
return (c(fx, dfx, ddfx))
}
halleysMethodApprox(ftnH, 1, 7, 59)
# At iteration 1 value of x is: 1.320442
# At iteration 2 value of x is: 1.655396
# At iteration 3 value of x is: 1.78716
# At iteration 4 value of x is: 1.790519
# At iteration 5 value of x is: 1.790519
# Algorithm converged
# [1] 1.790519

Resources