r deSolve - plotting time evolution pde - r

suppose that we have a pde that describes the evolution of a variable y(t,x) over time t and space x, and I would like to plot its evolution on a three dimensional diagram (t,x,y). With deSolve I can solve the pde, but I have no idea about how to obtain this kind of diagram.
The example in the deSolve package instruction is the following, where y is aphids, t=0,...,200 and x=1,...,60:
library(deSolve)
Aphid <- function(t, APHIDS, parameters) {
deltax <- c (0.5, rep(1, numboxes - 1), 0.5)
Flux <- -D * diff(c(0, APHIDS, 0)) / deltax
dAPHIDS <- -diff(Flux) / delx + APHIDS * r
list(dAPHIDS )
}
D <- 0.3 # m2/day diffusion rate
r <- 0.01 # /day net growth rate
delx <- 1 # m thickness of boxes
numboxes <- 60
Distance <- seq(from = 0.5, by = delx, length.out = numboxes)
APHIDS <- rep(0, times = numboxes)
APHIDS[30:31] <- 1
state <- c(APHIDS = APHIDS) # initialise state variables
times <-seq(0, 200, by = 1)
out <- ode.1D(state, times, Aphid, parms = 0, nspec = 1, names = "Aphid")
"out" produces a matrix containing all the data that we need, t, y(x1), y(x2), ... y(x60). How can I produce a surface plot to show the evolution and variability of y in (t,x)?

The ways change a bit depending on using package. But you can do it with little cost because out[,-1] is an ideal matrix form to draw surface. I showed two examples using rgl and plot3D package.
out2 <- out[,-1]
AphID <- 1:ncol(out2)
library(rgl)
persp3d(times, AphID, out2, col="gray50", zlab="y")
# If you want to change color with value of Z-axis
# persp3d(times, AphID, out2, zlab="y", col=topo.colors(256)[cut(c(out2), 256)])
library(plot3D)
mat <- mesh(times, AphID)
surf3D(mat$x, mat$y, out2, bty="f", ticktype="detailed", xlab="times", ylab="AphID", zlab="y")

Related

Some issues Related to simulation from rLGCP and Point Process Models fitting

I trying two generate points from rLGCP function. I assumed that the presence of these points in the Window is governed by two covaiates ras1 and ras2. Hence I need to comptute log-lambda.
rm(list= ls(all=T))
#Libraries
library(spatstat)
library(raster)
library(maptools)
library(fields)
Creating the domaine D and two rasters
D <- c(300, 300) # Square Domaine D of side 300
Win <- owin(xrange =c(0, D[1]), yrange =c(0,D[2]))
spatstat.options(npixel=c(D[1],D[2]))
ext <- extent(Win$xrange, Win$yrange) # Extent of the rasters
# First raster ras1
par(mfrow=c(1,1))
ras1 <- raster()
extent(ras1) <- ext
res(ras1) <- 10
names(ras1) <- 'Radiation sim'
crs(ras1) <- "+proj=lcc +lat_1=48 +lat_2=33 +lon_0=-100 +datum=WGS84"
values(ras1) <- matrix(c(seq(from =0, to =50, length.out=200), seq(from=50, to=100, length.out = 100), seq(from=100, to=150, length.out = 200), seq(from=150, to=200, length.out = 200), seq(from=200, to=290, length.out = 200)), nrow = 30, ncol = 30)
ras1
plot(ras1, asp=1)
# Second Raster ras2
ras2 <- raster()
extent(ras2) <- ext
res(ras2) <- 10
names(ras2) <- 'Precipitation sim'
crs(ras2) <- "+proj=lcc +lat_1=48 +lat_2=33 +lon_0=-100 +datum=WGS84"
values(ras2) <- matrix(c(seq(from =-0, to =200, length.out=500), seq(from=400, to=893, length.out = 20), seq(from=200, to=300, length.out = 300),seq(from=300, to = 400, length.out=80)))
ras2
plot(ras2, asp=1)
Rasters.group <- stack(ras1, ras2)
plot(Rasters.group)
graphics.off()
From Rasters to im. objects
im.ras1 <- as.im.RasterLayer(ras1); summary(im.ras1)
im.ras2 <- as.im.RasterLayer(ras2); summary(im.ras2)
covar.list <- list(Radiation.sim=im.ras1, Precipitation.sim=im.ras2)
# plot .im object
par(mfrow=c(1,2))
image.plot(list(x=im.ras1$xcol, y=im.ras1$yrow, z=t(im.ras1$v)), main= "Radiation sim", asp=1)
image.plot(list(x=im.ras2$xcol, y=im.ras2$yrow, z=t(im.ras2$v)), main= "Precipitation sim", asp=1)
Now I can compute log-Lambda
#normalization
norm.im.ras1 <- (im.ras1- summary(im.ras1)$mean)/sd(im.ras1) ; summary(norm.im.ras1)
norm.im.ras2 <- (im.ras2- summary(im.ras2)$mean)/sd(im.ras2) ; summary(norm.im.ras2)
#Compute log-lambda
log.lambda <- norm.im.ras1 + 2*norm.im.ras2
summary(log.lambda)
resut dispays very weak values
Pixel values
range = [-4.657923, 10.94624]
integral = -9.678445e-12
mean = -1.075383e-16
When I try to simulate from rLGCP
gen.lgcp <- rLGCP("matern", mu=log.lambda, var=0.5, scale=0.05, nu=1)
Error: could not allocate a vector of size 181.9 MB
I tried to get around that with
log.lambda0 <- as.im(solutionset(log.lambda>0))
gen.lgcp <- rLGCP("matern", mu=log.lambda0, var=0.5, scale=0.05, nu=1)
summary(gen.lgcp)
I can move forward. But further, I did not get relevent results
#Thinning
image.plot(list(x=log.lambda$xcol, y=log.lambda$yrow, z=t(log.lambda$v)), main= "log.lambda", asp=1)
samp.lgcp <- rthin(gen.lgcp, P=seq(from=0.02, to=0.2, length.out = gen.lgcp$n)); points(samp.lgcp$x, samp.lgcp$y, type = 'p', cex=0.2, lwd=1, col='white')
#point pattern
pts.locations <- as.data.frame(cbind(longitude=samp.lgcp$x, latitude=samp.lgcp$y))
ppp.lgcp <- ppp(pts.locations$longitude, pts.locations$latitude, window = owin(xrange=c(min(pts.locations [,1]),max(pts.locations [,1])), yrange = c(min(pts.locations[,2]),max(pts.locations[,2]))))
plot(ppp.lgcp)
#Extract value of each sampled point covariate
cov.value <- extract(Rasters.group, pts.locations)
cov.value <- as.data.frame(cov.value )
presence.data <- data.frame(pts.locations, cov.value, presence=rep(1, nrow(cov.value)))
### Choosing absence point pattern
abs.region <- crop(Virtual.species.domaine, extent(25.28486 , 162.2897 ,181.7417 , 280.7651 ))
im.abs.region <- as.im.RasterLayer(abs.region)
abs.points <- rasterToPoints(abs.region)
ppp.abs.points <- ppp(abs.points[,1], abs.points[,2], window = owin(xrange = c(min(abs.points[,1]), max(abs.points[,1])), yrange =c(min(abs.points[,2]), max(abs.points[,2]))))
plot(ppp.abs.points)
cov.value.abs <- extract(Rasters.group, abs.points[,1:2])
absence.data <- data.frame(abs.points[,1:2], cov.value.abs, presence=rep(0, nrow(abs.points)))
colnames(absence.data)[1:2] <- c("longitude", "latitude")
head(absence.data)
# Get database for LGCP
LGCP.Data.Set <- rbind(presence.data, absence.data)
#' Model
#' we will use non-stationary formula
covar.formula <- as.formula(paste("~", paste(names(LGCP.Data.Set[,3:4]), collapse = "+")))
#Quadrature scheme
Q.lgcp <- quadscheme(ppp.lgcp, ppp.abs.points, method = 'grid')
plot(Q.lgcp)
Warning message:
In countingweights(id, areas) :
some tiles with positive area do not contain any quadrature points: relative error = 94.2%
# Inhomogenous poisson process Model
fit.ipp <- ppm(Q.lgcp, trend = covar.formula, covariates = LGCP.Data.Set[,3:4])
summary(fit.ipp)
Warning message:
glm.fit: algorithm did not converge
What is going wrong?
My goal is to evaluate de model and the predict with
prediction.ipp <- predict.ppm(fit.ipp, log.lambda, type = 'intensity')
This is a very long and un-focused question but I will try to help.
After constructing the image log.lambda, you say "result shows very weak values". What do you mean? The image values were assigned as a sequence of values ranging from 0 to 200, and then standardised to have mean zero and standard deviation 1. How is this "weak"?
You then call rLGCP using this image as the mean log intensity. The values of log.lambda range from about -4 to +10. This means that the desired intensity will range from exp(-4) to exp(+10), that is, about 0.01 to 20 000 points per square unit. The image dimensions are 30 by 30 units. Thus, a very large number of random points must be generated, and this fails because of memory limits. (The expected number of points is integral(exp(log.lambda)).
You then change log.lambda to another image which takes only the values 0 and 1.
The next body of code appears to take a raster image (of "absence" pixels) and try to construct a quadrature scheme using the "absence" pixels as dummy points. This is not the purpose for which quadscheme is designed (for quadscheme the dummy points should be sparse).
You don't need to construct a quadrature scheme to use ppm. You could just do something like
D <- solist(A=im.ras.1, B=im.ras.2)
ppm(ppp.logi ~ A+B , data=D)
If you really want to construct a quadrature scheme, I suggest you use the function pixelquad instead. Just do pixelquad(ppp.lgcp, im.abs.region) or similar. Then use ppm.
Since the data were generated by a Cox process, it would be more appropriate to use kppm rather than ppm.
See the spatstat book for further information.

Arc length of piecewise spline using R

I know there are many ways to calculate the arc length of curve, but I am looking for an efficient way to calculate the arc length of a piecewise spline through irregularly spaced points.
The actual curve I'm trying to find the length of is quite complex (contour line) so here is a quick example using a circle where the actual arclength is known to be 2*pi:
# Generate "random" data
set.seed(50)
theta = seq(0, 2*pi, length.out = 50) + runif(50, -0.05, 0.05)
theta = c(0, theta[theta >=0 & theta <= 2*pi], 2*pi)
data = data.frame(x = cos(theta), y = sin(theta))
# Bezier Curve fit
library("bezier")
bezierArcLength(data, t1=0, t2=1)$arc.length
# Calculate arc length using euclidean distance
library("dplyr")
data$eucdist = sqrt((data$x - lag(data$x))^2 + (data$y - lag(data$y))^2)
print(paste("Euclidean distance:", sum(data$eucdist[-1])))
print(paste("Actual distance:", 2*pi))
# Output
Bezier distance: 5.864282
Euclidean distance: 6.2779
Actual distance: 6.2831
The closest thing I have found is https://www.rdocumentation.org/packages/pracma/versions/1.9.9/topics/arclength but I would have to parameterise my data to be some function(t) ...spline(data, t)... to use arclength. I tried this, but the fitted spline ran along the middle of the circle rather than along the circumference.
Another alternative I have been (unsuccessfully) trying is fit piecewise splines and determine the length of each spline.
Any help would be much appreciated!
EDIT: Added alternate method using the Bezier package, but the arc length found is even worse than just using the Euclidean method.
In lieu of community answers, I've cobbled together a solution which seems to work for what I was after! I'll leave my code here in case anyone has the same question and comes across this.
# Libraries
library("bezier")
library("pracma")
library("dplyr")
# Very slow for loops, sorry! Didn't write it as an apply function
output = data.frame()
for (i in 1:100) {
# Generate "random" data
# set.seed(50)
theta = seq(0, 2*pi, length.out = 50) + runif(50, -0.1, 0.1)
theta = sort(theta)
theta = c(0, theta[theta >=0 & theta <= 2*pi], 2*pi)
data = data.frame(x = cos(theta), y = sin(theta))
# Bezier Curve fit
b = bezierArcLength(data, t1=0, t2=1)$arc.length
# Pracma Piecewise cubic
t = atan2(data$y, data$x)
t = t + ifelse(t < 0, 2*pi, 0)
csx <- cubicspline(t, data$x)
csy <- cubicspline(t, data$y)
dcsx = csx; dcsx$coefs = t(apply(csx$coefs, 1, polyder))
dcsy = csy; dcsy$coefs = t(apply(csy$coefs, 1, polyder))
ds <- function(t) sqrt(ppval(dcsx, t)^2 + ppval(dcsy, t)^2)
s = integral(ds, t[1], t[length(t)])
# Calculate arc length using euclidean distance
data$eucdist = sqrt((data$x - lag(data$x))^2 + (data$y - lag(data$y))^2)
e = sum(data$eucdist[-1])
# Use path distance as parametric variable
data$d = c(0, cumsum(data$eucdist[-1]))
csx <- cubicspline(data$d, data$x)
csy <- cubicspline(data$d, data$y)
dcsx = csx; dcsx$coefs = t(apply(csx$coefs, 1, polyder))
dcsy = csy; dcsy$coefs = t(apply(csy$coefs, 1, polyder))
ds <- function(t) sqrt(ppval(dcsx, t)^2 + ppval(dcsy, t)^2)
d = integral(ds, data$d[1], data$d[nrow(data)])
# Actual value
a = 2*pi
# Append to result
output = rbind(
output,
data.frame(bezier=b, cubic.spline=s, cubic.spline.error=(s-a)/a*100,
euclidean.dist=e, euclidean.dist.error=(e-a)/a*100,
dist.spline=d, dist.spline.error=(d-a)/a*100))
}
# Summary
apply(output, 2, mean)
# Summary output
bezier cubic.spline cubic.spline.error euclidean.dist euclidean.dist.error dist.spline dist.spline.error
5.857931e+00 6.283180e+00 -7.742975e-05 6.274913e+00 -1.316564e-01 6.283085683 -0.001585570
I still don't quite understand what bezierArcLength does, but I'm very happy with my solution using cubicspline from the pracma package as it is a lot more accurate.
Other solutions are still more than welcome!

How can I improve the Integration and Parameterization of Convolved Distributions?

I am trying to solve for the parameters of a gamma distribution that is convolved with both normal and lognormal distributions. I can experimentally derive parameters for both the normal and lognormal components, hence, I just want to solve for the gamma params.
I have attempted 3 approaches to this problem:
1) generating convolved random datasets (i.e. rnorm()+rlnorm()+rgamma()) and using least-squares regression on the linear- or log-binned histograms of the data (not shown, but was very biased by RNG and didn't optimize well at all.)
2) "brute-force" numerical integration of the convolving functions (example code #1)
3) numerical integration approaches w/ the distr package. (example code #2)
I have had limited success with all three approaches. Importantly, these approaches seem to work well for "nominal" values for the gamma parameters, but they all begin to fail when k(shape) is low and theta(scale) is high—which is where my experimental data resides. please find the examples below.
Straight-up numerical Integration
# make the functions
f.N <- function(n) dnorm(n, N[1], N[2])
f.L <- function(l) dlnorm(l, L[1], L[2])
f.G <- function(g) dgamma(g, G[1], scale=G[2])
# make convolved functions
f.Z <- function(z) integrate(function(x,z) f.L(z-x)*f.N(x), -Inf, Inf, z)$value # L+N
f.Z <- Vectorize(f.Z)
f.Z1 <- function(z) integrate(function(x,z) f.G(z-x)*f.Z(x), -Inf, Inf, z)$value # G+(L+N)
f.Z1 <- Vectorize(f.Z1)
# params of Norm, Lnorm, and Gamma
N <- c(0,5)
L <- c(2.5,.5)
G <- c(2,7) # this distribution is the one we ultimately want to solve for.
# G <- c(.5,10) # 0<k<1
# G <- c(.25,5e4) # ballpark params of experimental data
# generate some data
set.seed(1)
rN <- rnorm(1e4, N[1], N[2])
rL <- rlnorm(1e4, L[1], L[2])
rG <- rgamma(1e4, G[1], scale=G[2])
Z <- rN + rL
Z1 <- rN + rL + rG
# check the fit
hist(Z,freq=F,breaks=100, xlim=c(-10,50), col=rgb(0,0,1,.25))
hist(Z1,freq=F,breaks=100, xlim=c(-10,50), col=rgb(1,0,0,.25), add=T)
z <- seq(-10,50,1)
lines(z,f.Z(z),lty=2,col="blue", lwd=2) # looks great... convolution performs as expected.
lines(z,f.Z1(z),lty=2,col="red", lwd=2) # this works perfectly so long as k(shape)>=1
# I'm guessing the failure to compute when shape 0 < k < 1 is due to
# numerical integration problems, but I don't know how to fix it.
integrate(dgamma, -Inf, Inf, shape=1, scale=1) # ==1
integrate(dgamma, 0, Inf, shape=1, scale=1) # ==1
integrate(dgamma, -Inf, Inf, shape=.5, scale=1) # !=1
integrate(dgamma, 0, Inf, shape=.5, scale=1) # != 1
# Let's try to estimate gamma anyway, supposing k>=1
optimFUN <- function(par, N, L) {
print(par)
-sum(log(f.Z1(Z1[1:4e2])))
}
f.G <- function(g) dgamma(g, par[1], scale=par[2])
fitresult <- optim(c(1.6,5), optimFUN, N=N, L=L)
par <- fitresult$par
lines(z,f.Z1(z),lty=2,col="green3", lwd=2) # not so great... likely better w/ more data,
# but it is SUPER slow and I observe large step sizes.
Attempting convolving via distr package
# params of Norm, Lnorm, and Gamma
N <- c(0,5)
L <- c(2.5,.5)
G <- c(2,7) # this distribution is the one we ultimately want to solve for.
# G <- c(.5,10) # 0<k<1
# G <- c(.25,5e4) # ballpark params of experimental data
# make the distributions and "convolvings'
dN <- Norm(N[1], N[2])
dL <- Lnorm(L[1], L[2])
dG <- Gammad(G[1], G[2])
d.NL <- d(convpow(dN+dL,1))
d.NLG <- d(convpow(dN+dL+dG,1)) # for large values of theta, no matter how I change
# getdistrOption("DefaultNrFFTGridPointsExponent"), grid size is always wrong.
# Generate some data
set.seed(1)
rN <- r(dN)(1e4)
rL <- r(dL)(1e4)
rG <- r(dG)(1e4)
r.NL <- rN + rL
r.NLG <- rN + rL + rG
# check the fit
hist(r.NL, freq=F, breaks=100, xlim=c(-10,50), col=rgb(0,0,1,.25))
hist(r.NLG, freq=F, breaks=100, xlim=c(-10,50), col=rgb(1,0,0,.25), add=T)
z <- seq(-10,50,1)
lines(z,d.NL(z), lty=2, col="blue", lwd=2) # looks great... convolution performs as expected.
lines(z,d.NLG(z), lty=2, col="red", lwd=2) # this appears to work perfectly
# for most values of K and low values of theta
# this is looking a lot more promising... how about estimating gamma params?
optimFUN <- function(par, dN, dL) {
tG <- Gammad(par[1],par[2])
d.NLG <- d(convpow(dN+dL+tG,1))
p <- d.NLG(r.NLG)
p[p==0] <- 1e-15 # because sometimes very low probabilities evaluate to 0...
# ...and logs don't like that.
-sum(log(p))
}
fitresult <- optim(c(1,1e4), optimFUN, dN=dN, dL=dL)
fdG <- Gammad(fitresult$par[1], fitresult$par[2])
fd.NLG <- d(convpow(dN+dL+fdG,1))
lines(z,fd.NLG(z), lty=2, col="green3", lwd=2) ## this works perfectly when ~k>1 & ~theta<100... but throws
## "Error in validityMethod(object) : shape has to be positive" when k decreases and/or theta increases
## (boundary subject to RNG).
Can i speed up the integration in example 1? can I increase the grid size in example 2 (distr package)? how can I address the k<1 problem? can I rescale the data in a way that will better facilitate evaluation at high theta values?
Is there a better way all-together?
Help!
Well, convolution of function with gaussian kernel calls for use of Gauss–Hermite quadrature. In R it is implemented in special package: https://cran.r-project.org/web/packages/gaussquad/gaussquad.pdf
UPDATE
For convolution with Gamma distribution this package might be useful as well via Gauss-Laguerre quadrature
UPDATE II
Here is quick code to convolute gaussian with lognormal,
hopefully not a lot of bugs and and prints some reasonable looking graph
library(gaussquad)
n.quad <- 170 # integration order
# get the particular weights/abscissas as data frame with 2 observables and n.quad observations
rule <- ghermite.h.quadrature.rules(n.quad, mu = 0.0)[[n.quad]]
# test function - integrate 1 over exp(-x^2) from -Inf to Inf
# should get sqrt(pi) as an answer
f <- function(x) {
1.0
}
q <- ghermite.h.quadrature(f, rule)
print(q - sqrt(pi))
# convolution of lognormal with gaussian
# because of the G-H rules, we have to make our own function
# for simplicity, sigmas are one and mus are zero
sqrt2 <- sqrt(2.0)
c.LG <- function(z) {
#print(z)
f.LG <- function(x) {
t <- (z - x*sqrt2)
q <- 0.0
if (t > 0.0) {
l <- log(t)
q <- exp( - 0.5*l*l ) / t
}
q
}
ghermite.h.quadrature(Vectorize(f.LG), rule) / (pi*sqrt2)
}
library(ggplot2)
p <- ggplot(data = data.frame(x = 0), mapping = aes(x = x))
p <- p + stat_function(fun = Vectorize(c.LG))
p <- p + xlim(-1.0, 5.0)
print(p)

Is it possible to sample from a conditional density in R given some conditional data?

In R, using the np package, I have created the bandwidths for a conditional density. What I would like to do is, given some new conditional vector, sample from the resulting distribution.
Current code:
library('np')
# Generate some test data.
somedata = data.frame(replicate(10,runif(100, 0, 1)))
# Conditional variables.
X <- data.frame(somedata[, c('X1', 'X2', 'X3')])
# Dependent variables.
Y <- data.frame(somedata[, c('X4', 'X5', 'X6')])
# Warning, this can be slow (but shouldn't be too bad).
bwsome = npcdensbw(xdat=X, ydat=Y)
# TODO: Given some vector t of conditional data, how can I sample from the resulting distribution?
I am quite new to R, so while I did read the package documentation, I haven't been able to figure out if what I vision makes sense or is possible. If necessary, I would happily use a different package.
Here is the Example 2.49 from: https://cran.r-project.org/web/packages/np/vignettes/np_faq.pdf , it gives the following
solution for for 2 variables:
###
library(np)
data(faithful)
n <- nrow(faithful)
x1 <- faithful$eruptions
x2 <- faithful$waiting
## First compute the bandwidth vector
bw <- npudensbw(~x1 + x2, ckertype = "gaussian")
plot(bw, view = "fixed", ylim = c(0, 3))
## Next generate draws from the kernel density (Gaussian)
n.boot <- 1000
i.boot <- sample(1:n, n.boot, replace = TRUE)
x1.boot <- rnorm(n.boot,x1[i.boot],bw$bw[1])
x2.boot <- rnorm(n.boot,x2[i.boot],bw$bw[2])
## Plot the density for the bootstrap sample using the original
## bandwidths
plot(npudens(~x1.boot+x2.boot,bws=bw$bw), view = "fixed")
Following this hint from #coffeejunky, the following is a possible
solution to your problem with 6 variables:
## Generate some test data.
somedata = data.frame(replicate(10, runif(100, 0, 1)))
## Conditional variables.
X <- data.frame(somedata[, c('X1', 'X2', 'X3')])
## Dependent variables.
Y <- data.frame(somedata[, c('X4', 'X5', 'X6')])
## First compute the bandwidth vector
n <- nrow(somedata)
bw <- npudensbw(~X$X1 + X$X2 + X$X3 + Y$X4 + Y$X5 + Y$X6, ckertype = "gaussian")
plot(bw, view = "fixed", ylim = c(0, 3))
## Next generate draws from the kernel density (Gaussian)
n.boot <- 1000
i.boot <- sample(1:n, n.boot, replace=TRUE)
x1.boot <- rnorm(n.boot, X$X1[i.boot], bw$bw[1])
x2.boot <- rnorm(n.boot, X$X2[i.boot], bw$bw[2])
x3.boot <- rnorm(n.boot, X$X3[i.boot], bw$bw[3])
x4.boot <- rnorm(n.boot, Y$X4[i.boot], bw$bw[4])
x5.boot <- rnorm(n.boot, Y$X5[i.boot], bw$bw[5])
x6.boot <- rnorm(n.boot, Y$X6[i.boot], bw$bw[6])
## Plot the density for the bootstrap sample using the original
## bandwidths
ob1 <- npudens(~x1.boot + x2.boot + x3.boot + x4.boot + x5.boot + x6.boot, bws = bw$bw)
plot(ob1, view = "fixed", ylim = c(0, 3))

How to plot the probabilistic density function of a function?

Assume A follows Exponential distribution; B follows Gamma distribution
How to plot the PDF of 0.5*(A+B)
This is fairly straight forward using the "distr" package:
library(distr)
A <- Exp(rate=3)
B <- Gammad(shape=2, scale=3)
conv <- 0.5*(A+B)
plot(conv)
plot(conv, to.draw.arg=1)
Edit by JD Long
Resulting plot looks like this:
If you're just looking for fast graph I usually do the quick and dirty simulation approach. I do some draws, slam a Gaussian density on the draws and plot that bad boy:
numDraws <- 1e6
gammaDraws <- rgamma(numDraws, 2)
expDraws <- rexp(numDraws)
combined <- .5 * (gammaDraws + expDraws)
plot(density(combined))
output should look a little like this:
Here is an attempt at doing the convolution (which #Jim Lewis refers to) in R. Note that there are probably much more efficient ways of doing this.
lower <- 0
upper <- 20
t <- seq(lower,upper,0.01)
fA <- dexp(t, rate = 0.4)
fB <- dgamma(t,shape = 8, rate = 2)
## C has the same distribution as (A + B)/2
dC <- function(x, lower, upper, exp.rate, gamma.rate, gamma.shape){
integrand <- function(Y, X, exp.rate, gamma.rate, gamma.shape){
dexp(Y, rate = exp.rate)*dgamma(2*X-Y, rate = gamma.rate, shape = gamma.shape)*2
}
out <- NULL
for(ix in seq_along(x)){
out[ix] <-
integrate(integrand, lower = lower, upper = upper,
X = x[ix], exp.rate = exp.rate,
gamma.rate = gamma.rate, gamma.shape = gamma.shape)$value
}
return(out)
}
fC <- dC(t, lower=lower, upper=upper, exp.rate=0.4, gamma.rate=2, gamma.shape=8)
## plot the resulting distribution
plot(t,fA,
ylim = range(fA,fB,na.rm=TRUE,finite = TRUE),
xlab = 'x',ylab = 'f(x)',type = 'l')
lines(t,fB,lty = 2)
lines(t,fC,lty = 3)
legend('topright', c('A ~ exp(0.4)','B ~ gamma(8,2)', 'C ~ (A+B)/2'),lty = 1:3)
I'm not an R programmer, but it might be helpful to know that for independent random variables with PDFs f1(x) and f2(x), the PDF
of the sum of the two variables is given by the convolution f1 * f2 (x) of the two input PDFs.

Resources