Related
I trying two generate points from rLGCP function. I assumed that the presence of these points in the Window is governed by two covaiates ras1 and ras2. Hence I need to comptute log-lambda.
rm(list= ls(all=T))
#Libraries
library(spatstat)
library(raster)
library(maptools)
library(fields)
Creating the domaine D and two rasters
D <- c(300, 300) # Square Domaine D of side 300
Win <- owin(xrange =c(0, D[1]), yrange =c(0,D[2]))
spatstat.options(npixel=c(D[1],D[2]))
ext <- extent(Win$xrange, Win$yrange) # Extent of the rasters
# First raster ras1
par(mfrow=c(1,1))
ras1 <- raster()
extent(ras1) <- ext
res(ras1) <- 10
names(ras1) <- 'Radiation sim'
crs(ras1) <- "+proj=lcc +lat_1=48 +lat_2=33 +lon_0=-100 +datum=WGS84"
values(ras1) <- matrix(c(seq(from =0, to =50, length.out=200), seq(from=50, to=100, length.out = 100), seq(from=100, to=150, length.out = 200), seq(from=150, to=200, length.out = 200), seq(from=200, to=290, length.out = 200)), nrow = 30, ncol = 30)
ras1
plot(ras1, asp=1)
# Second Raster ras2
ras2 <- raster()
extent(ras2) <- ext
res(ras2) <- 10
names(ras2) <- 'Precipitation sim'
crs(ras2) <- "+proj=lcc +lat_1=48 +lat_2=33 +lon_0=-100 +datum=WGS84"
values(ras2) <- matrix(c(seq(from =-0, to =200, length.out=500), seq(from=400, to=893, length.out = 20), seq(from=200, to=300, length.out = 300),seq(from=300, to = 400, length.out=80)))
ras2
plot(ras2, asp=1)
Rasters.group <- stack(ras1, ras2)
plot(Rasters.group)
graphics.off()
From Rasters to im. objects
im.ras1 <- as.im.RasterLayer(ras1); summary(im.ras1)
im.ras2 <- as.im.RasterLayer(ras2); summary(im.ras2)
covar.list <- list(Radiation.sim=im.ras1, Precipitation.sim=im.ras2)
# plot .im object
par(mfrow=c(1,2))
image.plot(list(x=im.ras1$xcol, y=im.ras1$yrow, z=t(im.ras1$v)), main= "Radiation sim", asp=1)
image.plot(list(x=im.ras2$xcol, y=im.ras2$yrow, z=t(im.ras2$v)), main= "Precipitation sim", asp=1)
Now I can compute log-Lambda
#normalization
norm.im.ras1 <- (im.ras1- summary(im.ras1)$mean)/sd(im.ras1) ; summary(norm.im.ras1)
norm.im.ras2 <- (im.ras2- summary(im.ras2)$mean)/sd(im.ras2) ; summary(norm.im.ras2)
#Compute log-lambda
log.lambda <- norm.im.ras1 + 2*norm.im.ras2
summary(log.lambda)
resut dispays very weak values
Pixel values
range = [-4.657923, 10.94624]
integral = -9.678445e-12
mean = -1.075383e-16
When I try to simulate from rLGCP
gen.lgcp <- rLGCP("matern", mu=log.lambda, var=0.5, scale=0.05, nu=1)
Error: could not allocate a vector of size 181.9 MB
I tried to get around that with
log.lambda0 <- as.im(solutionset(log.lambda>0))
gen.lgcp <- rLGCP("matern", mu=log.lambda0, var=0.5, scale=0.05, nu=1)
summary(gen.lgcp)
I can move forward. But further, I did not get relevent results
#Thinning
image.plot(list(x=log.lambda$xcol, y=log.lambda$yrow, z=t(log.lambda$v)), main= "log.lambda", asp=1)
samp.lgcp <- rthin(gen.lgcp, P=seq(from=0.02, to=0.2, length.out = gen.lgcp$n)); points(samp.lgcp$x, samp.lgcp$y, type = 'p', cex=0.2, lwd=1, col='white')
#point pattern
pts.locations <- as.data.frame(cbind(longitude=samp.lgcp$x, latitude=samp.lgcp$y))
ppp.lgcp <- ppp(pts.locations$longitude, pts.locations$latitude, window = owin(xrange=c(min(pts.locations [,1]),max(pts.locations [,1])), yrange = c(min(pts.locations[,2]),max(pts.locations[,2]))))
plot(ppp.lgcp)
#Extract value of each sampled point covariate
cov.value <- extract(Rasters.group, pts.locations)
cov.value <- as.data.frame(cov.value )
presence.data <- data.frame(pts.locations, cov.value, presence=rep(1, nrow(cov.value)))
### Choosing absence point pattern
abs.region <- crop(Virtual.species.domaine, extent(25.28486 , 162.2897 ,181.7417 , 280.7651 ))
im.abs.region <- as.im.RasterLayer(abs.region)
abs.points <- rasterToPoints(abs.region)
ppp.abs.points <- ppp(abs.points[,1], abs.points[,2], window = owin(xrange = c(min(abs.points[,1]), max(abs.points[,1])), yrange =c(min(abs.points[,2]), max(abs.points[,2]))))
plot(ppp.abs.points)
cov.value.abs <- extract(Rasters.group, abs.points[,1:2])
absence.data <- data.frame(abs.points[,1:2], cov.value.abs, presence=rep(0, nrow(abs.points)))
colnames(absence.data)[1:2] <- c("longitude", "latitude")
head(absence.data)
# Get database for LGCP
LGCP.Data.Set <- rbind(presence.data, absence.data)
#' Model
#' we will use non-stationary formula
covar.formula <- as.formula(paste("~", paste(names(LGCP.Data.Set[,3:4]), collapse = "+")))
#Quadrature scheme
Q.lgcp <- quadscheme(ppp.lgcp, ppp.abs.points, method = 'grid')
plot(Q.lgcp)
Warning message:
In countingweights(id, areas) :
some tiles with positive area do not contain any quadrature points: relative error = 94.2%
# Inhomogenous poisson process Model
fit.ipp <- ppm(Q.lgcp, trend = covar.formula, covariates = LGCP.Data.Set[,3:4])
summary(fit.ipp)
Warning message:
glm.fit: algorithm did not converge
What is going wrong?
My goal is to evaluate de model and the predict with
prediction.ipp <- predict.ppm(fit.ipp, log.lambda, type = 'intensity')
This is a very long and un-focused question but I will try to help.
After constructing the image log.lambda, you say "result shows very weak values". What do you mean? The image values were assigned as a sequence of values ranging from 0 to 200, and then standardised to have mean zero and standard deviation 1. How is this "weak"?
You then call rLGCP using this image as the mean log intensity. The values of log.lambda range from about -4 to +10. This means that the desired intensity will range from exp(-4) to exp(+10), that is, about 0.01 to 20 000 points per square unit. The image dimensions are 30 by 30 units. Thus, a very large number of random points must be generated, and this fails because of memory limits. (The expected number of points is integral(exp(log.lambda)).
You then change log.lambda to another image which takes only the values 0 and 1.
The next body of code appears to take a raster image (of "absence" pixels) and try to construct a quadrature scheme using the "absence" pixels as dummy points. This is not the purpose for which quadscheme is designed (for quadscheme the dummy points should be sparse).
You don't need to construct a quadrature scheme to use ppm. You could just do something like
D <- solist(A=im.ras.1, B=im.ras.2)
ppm(ppp.logi ~ A+B , data=D)
If you really want to construct a quadrature scheme, I suggest you use the function pixelquad instead. Just do pixelquad(ppp.lgcp, im.abs.region) or similar. Then use ppm.
Since the data were generated by a Cox process, it would be more appropriate to use kppm rather than ppm.
See the spatstat book for further information.
library(rgdal)
library(maptools)
library(gstat)
library(sp)
data <- read.table("meuse.txt", sep="", header=TRUE) # read txt file
# transform the data frame into a spatial data frame
coordinates(data) <- ~ x + y
## Set the coordinate system
proj4string(data) <- CRS("+init=epsg:4326")
## the epsg numbers can be found here: http://spatialreference.org/ref/
# import the border shp file
border <- readOGR("meuse_area.shp", "meuse_area")
proj4string(border) <- CRS("+init=epsg:4326")
# import a raster from a ArcInfo ASCII format
zinc <- read.asciigrid("zinc.asc")
proj4string(zinc) <- CRS("+init=epsg:4326")
# Let's first create a prediction grid for the interpolation, starting from
# the shape file
vals <- border#bbox
deltaLong <- as.integer((vals[1, 2] - vals[1, 1]) + 1.5)
deltaLat <- as.integer((vals[2, 2] - vals[2, 1]) + 1.5)
gridRes <- 0.5 # change this value to change the grid size (in metres)
gridSizeX <- deltaLong / gridRes
gridSizeY <- deltaLat / gridRes
grd <- GridTopology(vals[, 1], c(gridRes, gridRes), c(gridSizeX, gridSizeY))
pts <- SpatialPoints(coordinates(grd))
pts1 <- SpatialPointsDataFrame(as.data.frame(pts),
data=as.data.frame(rep(1, nrow(as.data.frame(pts)))))
Overlay <- overlay(pts1, border)
pts1$border <- Overlay
nona <- na.exclude(as.data.frame(pts1))
coordinates(nona) <- ~ x + y
gridded(nona) <- TRUE
proj4string(nona) <- CRS("+init=epsg:4326") # remember to set the coordinate
# system also for the prediction grid
writeAsciiGrid(nona, "prediction_grid.asc")
# For the Co-kriging we need to obtain the value of the covariate for each
# observation
over <- overlay(zinc, data)
data$zinc <- over$zinc.asc
str(as.data.frame(data))
# also the prediction grid need to be overlayed with the covariate
over <- overlay(zinc, nona)
nona$zinc <- over$zinc.asc
# for the cokriging, the first thing to do is create an object with the
# function gstat() that contains both the variable and the covariate
str(data)
complete.cases("data")
str(zinc)
complete.cases("zinc")
g <- gstat(id="lead", formula=lead ~ 1, data=data)
g <- gstat(g, id="zinc", formula=zinc ~ 1, data=data)
# Fitting the variogram
# first, plot the residual variogram
vario <- variogram(g)
Error in na.fail.default(list(zinc = c(NA, NA, NA, NA, NA, NA, NA, NA,
: missing values in object
I know that there is no missing in zinc when I edit the object in Notepad. What did I miss? There is no NA in zinc.asc.These are my data.
I want to perform cokriging and I am stuck with variogram.
also you can change the two lines :
overlay=overlay(pts1,border)
pts1$border=Overlay
by
pts1$border=over(pts1,border)
One cause of the confusion here may be that sp used to include it's own overlay function.
https://www.rdocumentation.org/packages/sp/versions/0.9-7/topics/overlay
However the most recent version of the sp package replaced the "overlay" function with "over". I do no believe raster::overlay behaves the same as the overlay function from sp 0.9-7
I need help in preparing something similar to Solver (from Excel) in R.
I try to develop a tool, which will take some points and create parameters of curve suitable for them. This curve will have a shape of log-linear distribution. I need 4 parameters, which could be useable in Excel formula:
y = b*loglindist(x*a, c, d), where b is a parameter using for the result, a is a parameter using for a value of distribution, c is a mean, and d is a standard deviation.
I have to minimize sse between actual points and points estimated with the curve.
My code is as follows:
input <- read.csv2("C:/Users/justyna.andrulewicz/Desktop/R estimator/data.csv", sep=",")
data <- as.matrix(input)
x <- nrow(data)
max_reach <- 90 ### max y
# solver
# constrains
a_min <- 0.000000001
b_min <- 0.5*max_reach
c_min <- 0.000000001
d_min <- 0.000000001
a_max <- 1000
b_max <- max_reach
c_max <- 1000
d_max <- 1000
constrains <- round(matrix(c(a_min,b_min,c_min,d_min,a_max,b_max,c_max,d_max), nrow=2, ncol=4, byrow=TRUE, dimnames=list(c("mins", "maxes"), c("a","b","c","d"))),1)
constrains
ui <- matrix(c(1,0,0,0, -1,0,0,0, 0,1,0,0, 0,-1,0,0, 0,0,1,0, 0,0,-1,0, 0,0,0,1, 0,0,0,-1), ncol=4, byrow=TRUE)
ci <- round(c(a_min, -a_max, b_min, -b_max, c_min, -c_max, d_min, -d_max), 1)
a <- 100
b <- 0.4*max_reach
c <- 1
d <- 1
par <-as.numeric(c(a,b,c,d))
par
spends <- as.numeric(data[,1])
estimated <- b*plnorm(a*spends, meanlog = c, sdlog = d, log = FALSE)
actual <- as.numeric(data[,2])
se <- estimated-actual
sse <- function(se) sum(se^2)
sse(se)
optimization <- constrOptim(par, sse, NULL, ui, ci, method="SANN")
results<-round(as.numeric(optimization$par,nrow=4,ncol=1),6)
results
but it doesn't work: the results make no sense, as you can see in the plot.
step <- 10^3
y <- 1:100
spends<-y*step
a_est<-optimization$par[1]
b_est<-optimization$par[2]
c_est<-optimization$par[3]
d_est<-optimization$par[4]
curve<-b_est*plnorm(a_est*spends, meanlog = c_est, sdlog = d_est, log = FALSE)
est <-plot(spends, curve, type="l", col="blue")
act <-plot(data, type="p", col="red")
Please help: maybe can I replace constOptim and use another function, which will better address my problem? Or maybe there is another way to solve my problem?
I am trying to solve for the parameters of a gamma distribution that is convolved with both normal and lognormal distributions. I can experimentally derive parameters for both the normal and lognormal components, hence, I just want to solve for the gamma params.
I have attempted 3 approaches to this problem:
1) generating convolved random datasets (i.e. rnorm()+rlnorm()+rgamma()) and using least-squares regression on the linear- or log-binned histograms of the data (not shown, but was very biased by RNG and didn't optimize well at all.)
2) "brute-force" numerical integration of the convolving functions (example code #1)
3) numerical integration approaches w/ the distr package. (example code #2)
I have had limited success with all three approaches. Importantly, these approaches seem to work well for "nominal" values for the gamma parameters, but they all begin to fail when k(shape) is low and theta(scale) is high—which is where my experimental data resides. please find the examples below.
Straight-up numerical Integration
# make the functions
f.N <- function(n) dnorm(n, N[1], N[2])
f.L <- function(l) dlnorm(l, L[1], L[2])
f.G <- function(g) dgamma(g, G[1], scale=G[2])
# make convolved functions
f.Z <- function(z) integrate(function(x,z) f.L(z-x)*f.N(x), -Inf, Inf, z)$value # L+N
f.Z <- Vectorize(f.Z)
f.Z1 <- function(z) integrate(function(x,z) f.G(z-x)*f.Z(x), -Inf, Inf, z)$value # G+(L+N)
f.Z1 <- Vectorize(f.Z1)
# params of Norm, Lnorm, and Gamma
N <- c(0,5)
L <- c(2.5,.5)
G <- c(2,7) # this distribution is the one we ultimately want to solve for.
# G <- c(.5,10) # 0<k<1
# G <- c(.25,5e4) # ballpark params of experimental data
# generate some data
set.seed(1)
rN <- rnorm(1e4, N[1], N[2])
rL <- rlnorm(1e4, L[1], L[2])
rG <- rgamma(1e4, G[1], scale=G[2])
Z <- rN + rL
Z1 <- rN + rL + rG
# check the fit
hist(Z,freq=F,breaks=100, xlim=c(-10,50), col=rgb(0,0,1,.25))
hist(Z1,freq=F,breaks=100, xlim=c(-10,50), col=rgb(1,0,0,.25), add=T)
z <- seq(-10,50,1)
lines(z,f.Z(z),lty=2,col="blue", lwd=2) # looks great... convolution performs as expected.
lines(z,f.Z1(z),lty=2,col="red", lwd=2) # this works perfectly so long as k(shape)>=1
# I'm guessing the failure to compute when shape 0 < k < 1 is due to
# numerical integration problems, but I don't know how to fix it.
integrate(dgamma, -Inf, Inf, shape=1, scale=1) # ==1
integrate(dgamma, 0, Inf, shape=1, scale=1) # ==1
integrate(dgamma, -Inf, Inf, shape=.5, scale=1) # !=1
integrate(dgamma, 0, Inf, shape=.5, scale=1) # != 1
# Let's try to estimate gamma anyway, supposing k>=1
optimFUN <- function(par, N, L) {
print(par)
-sum(log(f.Z1(Z1[1:4e2])))
}
f.G <- function(g) dgamma(g, par[1], scale=par[2])
fitresult <- optim(c(1.6,5), optimFUN, N=N, L=L)
par <- fitresult$par
lines(z,f.Z1(z),lty=2,col="green3", lwd=2) # not so great... likely better w/ more data,
# but it is SUPER slow and I observe large step sizes.
Attempting convolving via distr package
# params of Norm, Lnorm, and Gamma
N <- c(0,5)
L <- c(2.5,.5)
G <- c(2,7) # this distribution is the one we ultimately want to solve for.
# G <- c(.5,10) # 0<k<1
# G <- c(.25,5e4) # ballpark params of experimental data
# make the distributions and "convolvings'
dN <- Norm(N[1], N[2])
dL <- Lnorm(L[1], L[2])
dG <- Gammad(G[1], G[2])
d.NL <- d(convpow(dN+dL,1))
d.NLG <- d(convpow(dN+dL+dG,1)) # for large values of theta, no matter how I change
# getdistrOption("DefaultNrFFTGridPointsExponent"), grid size is always wrong.
# Generate some data
set.seed(1)
rN <- r(dN)(1e4)
rL <- r(dL)(1e4)
rG <- r(dG)(1e4)
r.NL <- rN + rL
r.NLG <- rN + rL + rG
# check the fit
hist(r.NL, freq=F, breaks=100, xlim=c(-10,50), col=rgb(0,0,1,.25))
hist(r.NLG, freq=F, breaks=100, xlim=c(-10,50), col=rgb(1,0,0,.25), add=T)
z <- seq(-10,50,1)
lines(z,d.NL(z), lty=2, col="blue", lwd=2) # looks great... convolution performs as expected.
lines(z,d.NLG(z), lty=2, col="red", lwd=2) # this appears to work perfectly
# for most values of K and low values of theta
# this is looking a lot more promising... how about estimating gamma params?
optimFUN <- function(par, dN, dL) {
tG <- Gammad(par[1],par[2])
d.NLG <- d(convpow(dN+dL+tG,1))
p <- d.NLG(r.NLG)
p[p==0] <- 1e-15 # because sometimes very low probabilities evaluate to 0...
# ...and logs don't like that.
-sum(log(p))
}
fitresult <- optim(c(1,1e4), optimFUN, dN=dN, dL=dL)
fdG <- Gammad(fitresult$par[1], fitresult$par[2])
fd.NLG <- d(convpow(dN+dL+fdG,1))
lines(z,fd.NLG(z), lty=2, col="green3", lwd=2) ## this works perfectly when ~k>1 & ~theta<100... but throws
## "Error in validityMethod(object) : shape has to be positive" when k decreases and/or theta increases
## (boundary subject to RNG).
Can i speed up the integration in example 1? can I increase the grid size in example 2 (distr package)? how can I address the k<1 problem? can I rescale the data in a way that will better facilitate evaluation at high theta values?
Is there a better way all-together?
Help!
Well, convolution of function with gaussian kernel calls for use of Gauss–Hermite quadrature. In R it is implemented in special package: https://cran.r-project.org/web/packages/gaussquad/gaussquad.pdf
UPDATE
For convolution with Gamma distribution this package might be useful as well via Gauss-Laguerre quadrature
UPDATE II
Here is quick code to convolute gaussian with lognormal,
hopefully not a lot of bugs and and prints some reasonable looking graph
library(gaussquad)
n.quad <- 170 # integration order
# get the particular weights/abscissas as data frame with 2 observables and n.quad observations
rule <- ghermite.h.quadrature.rules(n.quad, mu = 0.0)[[n.quad]]
# test function - integrate 1 over exp(-x^2) from -Inf to Inf
# should get sqrt(pi) as an answer
f <- function(x) {
1.0
}
q <- ghermite.h.quadrature(f, rule)
print(q - sqrt(pi))
# convolution of lognormal with gaussian
# because of the G-H rules, we have to make our own function
# for simplicity, sigmas are one and mus are zero
sqrt2 <- sqrt(2.0)
c.LG <- function(z) {
#print(z)
f.LG <- function(x) {
t <- (z - x*sqrt2)
q <- 0.0
if (t > 0.0) {
l <- log(t)
q <- exp( - 0.5*l*l ) / t
}
q
}
ghermite.h.quadrature(Vectorize(f.LG), rule) / (pi*sqrt2)
}
library(ggplot2)
p <- ggplot(data = data.frame(x = 0), mapping = aes(x = x))
p <- p + stat_function(fun = Vectorize(c.LG))
p <- p + xlim(-1.0, 5.0)
print(p)
In R, using the np package, I have created the bandwidths for a conditional density. What I would like to do is, given some new conditional vector, sample from the resulting distribution.
Current code:
library('np')
# Generate some test data.
somedata = data.frame(replicate(10,runif(100, 0, 1)))
# Conditional variables.
X <- data.frame(somedata[, c('X1', 'X2', 'X3')])
# Dependent variables.
Y <- data.frame(somedata[, c('X4', 'X5', 'X6')])
# Warning, this can be slow (but shouldn't be too bad).
bwsome = npcdensbw(xdat=X, ydat=Y)
# TODO: Given some vector t of conditional data, how can I sample from the resulting distribution?
I am quite new to R, so while I did read the package documentation, I haven't been able to figure out if what I vision makes sense or is possible. If necessary, I would happily use a different package.
Here is the Example 2.49 from: https://cran.r-project.org/web/packages/np/vignettes/np_faq.pdf , it gives the following
solution for for 2 variables:
###
library(np)
data(faithful)
n <- nrow(faithful)
x1 <- faithful$eruptions
x2 <- faithful$waiting
## First compute the bandwidth vector
bw <- npudensbw(~x1 + x2, ckertype = "gaussian")
plot(bw, view = "fixed", ylim = c(0, 3))
## Next generate draws from the kernel density (Gaussian)
n.boot <- 1000
i.boot <- sample(1:n, n.boot, replace = TRUE)
x1.boot <- rnorm(n.boot,x1[i.boot],bw$bw[1])
x2.boot <- rnorm(n.boot,x2[i.boot],bw$bw[2])
## Plot the density for the bootstrap sample using the original
## bandwidths
plot(npudens(~x1.boot+x2.boot,bws=bw$bw), view = "fixed")
Following this hint from #coffeejunky, the following is a possible
solution to your problem with 6 variables:
## Generate some test data.
somedata = data.frame(replicate(10, runif(100, 0, 1)))
## Conditional variables.
X <- data.frame(somedata[, c('X1', 'X2', 'X3')])
## Dependent variables.
Y <- data.frame(somedata[, c('X4', 'X5', 'X6')])
## First compute the bandwidth vector
n <- nrow(somedata)
bw <- npudensbw(~X$X1 + X$X2 + X$X3 + Y$X4 + Y$X5 + Y$X6, ckertype = "gaussian")
plot(bw, view = "fixed", ylim = c(0, 3))
## Next generate draws from the kernel density (Gaussian)
n.boot <- 1000
i.boot <- sample(1:n, n.boot, replace=TRUE)
x1.boot <- rnorm(n.boot, X$X1[i.boot], bw$bw[1])
x2.boot <- rnorm(n.boot, X$X2[i.boot], bw$bw[2])
x3.boot <- rnorm(n.boot, X$X3[i.boot], bw$bw[3])
x4.boot <- rnorm(n.boot, Y$X4[i.boot], bw$bw[4])
x5.boot <- rnorm(n.boot, Y$X5[i.boot], bw$bw[5])
x6.boot <- rnorm(n.boot, Y$X6[i.boot], bw$bw[6])
## Plot the density for the bootstrap sample using the original
## bandwidths
ob1 <- npudens(~x1.boot + x2.boot + x3.boot + x4.boot + x5.boot + x6.boot, bws = bw$bw)
plot(ob1, view = "fixed", ylim = c(0, 3))