If statement R missing value - r

I was trying to execute this functions, but I kept on getting an error with my if statment: Error in if (value[1][i] < 0) { : missing value where TRUE/FALSE needed:
Monte_Carlo <- function(trial)
{
S_T <- S_o*exp((r - q - (1/2)*sigma^2)*period + sigma*rnorm(trial, mean = 0, sd = 1))
K <- matrix(100, nrow = 1, ncol = 20)
value <- K - S_T
for(i in 1:trial)
{
if(value[1][i] < 0)
{
value[1][i] = 0;
}
}
return (mean(value)*exp(-r))
}

You are indexing your matrix incorrectly. value[1] will return a single value, which you are then trying to assign the ith element of, for i up to trial
If you assign to the ith element in the 1st row (which looks as if you are trying to do), then it will work
Monte_Carlo <- function(trial)
{
S_T <- S_o*exp((r - q - (1/2)*sigma^2)*period + sigma*rnorm(trial, mean = 0, sd = 1))
K <- matrix(100, nrow = 1, ncol = 20)
value <- K - S_T
for(i in 1:trial)
{
if(value[1, i] < 0)
{
value[1,i] = 0;
}
}
return (mean(value)*exp(-r))
}
you can remove the for loop and if statement with pmax which is vectorized and will return elementwise the maximum of value and 0.
Monte_Carlo <- function(trial)
{
S_T <- S_o*exp((r - q - (1/2)*sigma^2)*period + sigma*rnorm(trial, mean = 0, sd = 1))
K <- matrix(100, nrow = 1, ncol = trial)
value <- K - S_T
.value <- pmax(value,0)
return (mean(.value)*exp(-r)
}
As a matter of good programming, I would add r,S_o, q, sigma and period as arguments to your function, so that it doesn't depend on global variables

Related

MCMC for estimating negative binomial distribution

I want to estimate parameters of negative binomial distribution using MCMC Metropolis-Hastings algorithm. In other words, I have sample:
set.seed(42)
y <- rnbinom(20, size = 3, prob = 0.2)
and I want to write algorithm that will estimate parameter of size and parameter of prob.
My work so far
I defined prior distribution of size as Poisson:
prior_r <- function(r) {
return(dpois(r, lambda = 2, log = T))
}
And prior distribution of prob as uniform on [0, 1]:
prior_prob <- function(prob) {
return(dunif(prob, min = 0, max = 1, log = T))
}
Moreover for simplicity I defined loglikelihood and joint probability functions:
loglikelihood <- function(data, r, prob) {
loglikelihoodValue <- sum(dnorm(data, mean = r, sd = prob, log = T))
return(loglikelihoodValue)
}
joint <- function(r, prob) {
data <- y
return(loglikelihood(data, r, prob) + prior_r(r) + prior_prob(prob))
}
Finally, the whole algorithm:
run_mcmc <- function(startvalue, iterations) {
chain <- array(dim = c(iterations + 1, 2))
chain[1, ] <- startvalue
for (i in 1:iterations) {
proposal_r <- rpois(1, lambda = chain[i, 1])
proposal_prob <- chain[i, 2] + runif(1, min = -0.2, max = 0.2)
quotient <- joint(proposal_r, proposal_prob) - joint(chain[i, 1], chain[i, 2])
if (runif(1, 0, 1) < min(1, exp(quotient))) chain[i + 1, ] <- c(proposal_r, proposal_prob)
else chain[i + 1, ] <- chain[i, ]
}
return(chain)
}
The problem
Problem that I'm having is that when I run it with starting values even very close to correct ones:
iterations <- 2000
startvalue <- c(4, 0.25)
res <- run_mcmc(startvalue, iterations)
I'll obtain posterior distribution which is obviously wrong. For example
> colMeans(res)
[1] 11.963018 0.994533
As you can see, size is located very close to point 12, and probability is located in point 1.
Do you know what's the cause of those phenomeons?
Change dnorm in loglikelihood to dnbinom and fix the proposal for prob so it doesn't go outside (0,1):
set.seed(42)
y <- rnbinom(20, size = 3, prob = 0.2)
prior_r <- function(r) {
return(dpois(r, lambda = 2, log = T))
}
prior_prob <- function(prob) {
return(dunif(prob, min = 0, max = 1, log = TRUE))
}
loglikelihood <- function(data, r, prob) {
loglikelihoodValue <- sum(dnbinom(data, size = r, prob = prob, log = TRUE))
return(loglikelihoodValue)
}
joint <- function(r, prob) {
return(loglikelihood(y, r, prob) + prior_r(r) + prior_prob(prob))
}
run_mcmc <- function(startvalue, iterations) {
chain <- array(dim = c(iterations + 1, 2))
chain[1, ] <- startvalue
for (i in 1:iterations) {
proposal_r <- rpois(1, lambda = chain[i, 1])
proposal_prob <- chain[i, 2] + runif(1, min = max(-0.2, -chain[i,2]), max = min(0.2, 1 - chain[i,2]))
quotient <- joint(proposal_r, proposal_prob) - joint(chain[i, 1], chain[i, 2])
if (runif(1, 0, 1) < min(1, exp(quotient))) {
chain[i + 1, ] <- c(proposal_r, proposal_prob)
} else {
chain[i + 1, ] <- chain[i, ]
}
}
return(chain)
}
iterations <- 2000
startvalue <- c(4, 0.25)
res <- run_mcmc(startvalue, iterations)
colMeans(res)
#> [1] 3.1009495 0.1988177

My variogram code result different from variog() result

I am writing code for producing a variogram. For validating my result, I checked with geoR::variog() but both variograms are different.
I tried to understand the code of variog() to see what happens under the hood but there are so many things happening that I can't seem to understand it. I, in my code, am using the parameters X-coordinate, Y-coordiante, data value, number of lags, minimum lag value, lag interval, azimuth (angle in degrees; 90 corresponds to vertical direction), angle tolerance (in degrees) and maximum bandwidth.
variogram = function(xcor, ycor, data, nlag, minlag, laginv, azm, atol, maxbandw){
dl <- length(data)
lowangle <- azm - atol
upangle <- azm + atol
gamlag <- integer(nlag)
n <- integer(nlag)
dist <- pairdist(xcor, ycor)
maxd <- max(dist)
llag <- seq(minlag, minlag + (nlag-1) * laginv, by = laginv)
hlag <- llag + laginv
for(i in 1:dl){
for(j in i:dl){
if(i != j){
if(xcor[j]- xcor[i] == 0)
theta <- 90
else
theta <- 180/pi * atan((ycor[j] - ycor[i])/(xcor[j] - xcor[i]))
for(k in 1:nlag){
d <- dist[j, i]
b <- abs(d * sin(theta - azm))
if((llag[k] <= d & d < hlag[k]) & (lowangle <= theta & theta < upangle) & (b <= maxbandw)){
gamlag[k] <- gamlag[k] + (data[i] - data[j])^2;
n[k] <- n[k] + 1
}
}
}
}
}
gamlag <- ifelse(n == 0, NA, gamlag/(2*n))
tmp <- data.frame("lag" = llag, "gamma" = gamlag)
return(tmp)
}
function call for the above code
ideal_variogram_2 <- variogram(data3[,1], data3[,2], data3[,3], 18, 0, 0.025, 90, 45, 1000000)
ideal_variogram_2 <- na.omit(ideal_variogram_2)
plot(ideal_variogram_2$lag, ideal_variogram_2$gamma, main = "Using my code")
function call for variog()
geodata1 <- as.geodata(data3, coords.col = 1:2, data.col = 3)
ideal_variogram_1 <- variog(geodata1, coords = geodata1$coords, data = geodata1$data, option = "bin", uvec = seq(0, 0.45, by = 0.025), direction = pi/2, tolerance = pi/4)
df <- data.frame(u = ideal_variogram_1$u, v = ideal_variogram_1$v)
plot(df$u, df$v, main = "Using variog()")
The 2 variograms that I got are at the following link:
Variogram

Generating random numbers in two vectors in R given a specified condition

I want to create two vectors in R that contain values randomly drawn from a uniform distribution given a specified condition, that is for example if the number in vector A is < 50 then the number in vector B should be greater than 50.
I use this code but it is applied only on the first element of the vectors
nrows = 20
A = NaN*matrix(1, nrows, 1)
B = NaN*matrix(1, nrows, 1)
repeat {
A[] = round(runif(nrows, 10, 100), digits =2)
B[] = round(runif(nrows, 10, 100), digits =2)
if(A > 50 & B > 50) {
break
}
}
This should work for you if i understood the problem correctly:
nrows = 20
A = NaN * matrix(1, nrows, 1)
B = NaN * matrix(1, nrows, 1)
for (i in 1:nrows) {
A[i] <- round(runif(1, 10, 100), digits = 2)
if (A[i] < 50) {
B[i] <- round(runif(1, 50, 100), digits = 2)
} else {
B[i] <- round(runif(1, 10, 100), digits = 2)
}
}

Efficient code to map genotype matrix in R

Hi I want to convert a matrix of genotypes, encoded as triples to a matrix encoded as 0, 1, 2, i.e.
c(0,0,1) <-> 0; c(0,1,0) <-> 1; c(0,0,1) <-> 2
First here is some code to generate the matrix that needs to be reduced.
# generate genotypes
expand.G = function(n,p){
probs = runif(n = p)
G012.rows = matrix(rbinom(2,prob = probs,n=n*p),nrow = p)
colnames(G012.rows) = paste('s',1:n,sep = '')
rownames(G012.rows) = paste('g',1:p, sep = '')
G012.cols = t(G012.rows)
expand.geno = function(g){
if(g == 0){return(c(1,0,0))}
if(g == 1){return(c(0,1,0))}
if(g == 2){return(c(0,0,1))}
}
gtype = c()
for(i in 1:length(c(G012.cols))){
gtype = c(
gtype,
expand.geno(c(G012.cols)[i])
)
}
length(gtype)
G = matrix(gtype,byrow = T, nrow = p)
colnames(G) = paste('s',rep(1:n,each = 3),c('1','2','3'),sep = '')
rownames(G) = paste('g',1:p, sep = '')
print(G[1:10,1:15])
print(G012.rows[1:10,1:5])
return(G)
}
The output has 3n columns and p rows, where n is sample size and p is number of genotypes. Now we can reduce the matrix back to 0,1,2 coding with the following functions
reduce012 = function(x){
if(identical(x, c(1,0,0))){
return(0)
} else if(identical(x, c(0,1,0))){
return(1)
} else if(identical(x, c(0,0,1))){
return(2)
} else {
return(NA)
}
}
reduce.G = function(G.gen){
G.vec =
mapply(function(i,j) reduce012(as.numeric(G.gen[i,(3*j-2):(3*j)])),
i=expand.grid(1:(ncol(G.gen)/3),1:nrow(G.gen))[,2],
j=expand.grid(1:(ncol(G.gen)/3),1:nrow(G.gen))[,1]
)
G = matrix(G.vec, nrow = ncol(G.gen)/3, ncol = nrow(G.gen))
colnames(G) = rownames(G.gen)
return(G)
}
reduce.G.loop = function(G.gen){
G = matrix(NA,nrow = ncol(G.gen)/3, ncol = nrow(G.gen))
for(i in 1:nrow(G.gen)){
for(j in 1:(ncol(G.gen)/3)){
G[j,i] = reduce012(as.numeric(G.gen[i,(3*j-2):(3*j)]))
}
}
colnames(G) = rownames(G.gen)
return(G)
}
The output is n rows by p columns. It is incidental, but intentional, that the matrix encoded as 0,1,2 is the transpose of the matrix encoded as triples.
The code is not particularly fast. What is bothering me is that the the timing goes with n^2. Can you explain or supply more efficient code?
G = expand.G(1000,20)
system.time(reduce.G(G))
system.time(reduce.G.loop(G))
G = expand.G(2000,20)
system.time(reduce.G(G))
system.time(reduce.G.loop(G))
G = expand.G(4000,20)
system.time(reduce.G(G))
system.time(reduce.G.loop(G))
You can simply make an accessor lookup table:
decode <- array(dim = c(3, 3, 3))
decode[cbind(1, 0, 0) + 1] <- 0
decode[cbind(0, 1, 0) + 1] <- 1
decode[cbind(0, 0, 1) + 1] <- 2
And then, just do:
matrix(decode[matrix(t(G + 1), ncol = 3, byrow = TRUE)], ncol = nrow(G))
This full vectorized R version will give you the same matrix, without dimnames and super fast.
Yet, if you have much larger matrices, you should really use Rcpp for both memory and timing issues.
This seems to be a about three times faster than your version (renamed reduce.G.orig):
reduce.G <- function(G) {
varmap = c("100"=0, "010"=1, "001"=2)
result <- do.call(rbind, lapply(1:(ncol(G)/3)-1, function(val)
varmap[paste(G[,3*val+1], G[,3*val+2], G[,3*val+3], sep="")]))
colnames(result) <- rownames(G)
result
}
system.time(reduce.G(G))
# user system elapsed
# 0.156 0.000 0.155
system.time(reduce.G.orig(G))
# user system elapsed
# 0.444 0.000 0.441
identical(reduce.G(G), reduce.G.orig(G))
# [1] TRUE

Loop inside another loop in R

I have a problem with results of loop in loop function. It counts inside loop only once and choose the best solution for the first raw and then stop.
I would like to remember the best solution for every row of the matrix zmienne. What am I doing wrong?
schaffer <- function(xx)
{x1 <- xx[1]
x2 <- xx[2]
fact1 <- (sin(x1^2-x2^2))^2 - 0.5
fact2 <- (1 + 0.001*(x1^2+x2^2))^2
y <- 0.5 + fact1/fact2
return(y)
}
gradient_descent <- function(func, step, niter) {
N <- 3 #N- number of random points
zmienne <- matrix(runif(N*2, min = -100, max = 100), N, 2)
print(zmienne)
h = 0.001;
iter_count = 0;
for (i in 1:N) {
x_0 <- zmienne[i,]
x_n = x_0;
for (j in 1:niter) {
func_grad = (func(x_n+h) - func(x_n))/h;
if (abs(func_grad) < 0.0001) { break; }
x_n = x_n - step * func_grad;
print(x_n)
iter_count = iter_count + 1
}
}
return(list(iterations = niter, best_value = func_grad, best_state = x_n, x0=x_0))
}
solution_m1 <- gradient_descent(schaffer, 0.1, 20)
solution_m1
I think this is what you want:
gradient_descent <- function(func, step, niter) {
N <- 3 #N- number of random points
zmienne <- matrix(runif(N*2, min = -100, max = 100), N, 2)
print(zmienne)
h = 0.001;
iter_count = 0;
best.vals <- NULL
for (i in 1:N) {
x_0 <- zmienne[i,]
x_n = x_0;
for (j in 1:niter) {
func_grad = (func(x_n+h) - func(x_n))/h;
if (abs(func_grad) < 0.0001) { break; }
x_n = x_n - step * func_grad;
print(x_n)
iter_count = iter_count + 1
}
best.vals <- c(best.vals, func_grad)
}
return(list(iterations = iter_count, best_value = best.vals, best_state = x_n, x0=x_0))
}
solution_m1 <- gradient_descent(schaffer, 0.1, 20)
solution_m1
The return should not be inside the inside loop but at then end of the function.

Resources