While loop using a probability density function - r

I want to make a while loop that, until n is not equal to 105, it takes a random value x, and if the value x respects some restrictions, it store that value.
The x has to be between 15 and 33
It also simulates a variable num that has to be a random variable from a uniform distribution betwenn 0 and 1.
library(ggplot2)
dev.new()
n = 0
set.seed(1684)
x = seq(15, 33, by = 0.1)
f <- function(x) {
out <- ifelse(
x < 15 | 33 < x,
0,
ifelse(
15 <= x & x <= 24,
(2*(x-15))/((33-15)*(24-15)),
ifelse(
24 < x & x <= 33,
(2*(33-x))/((33-15)*(33-24)),
NA_real_
)))
if (any((is.na(out) | is.nan(out)) & (!is.na(x) & !is.nan(x)))) {
warning("f(x) undefined for some input values")
}
out
}
while (n != 105) {
n = n + 1
x1 = runif(1, min = 15 , max = 33)
num = runif(1, min = 0 , max = 1)
if ((num < (f(x1)/2/(33-15))) && (num == (18*(f(x1)/2)))) {
x_accept = c(x_accept, x1)
}
}
I've made this but it doesn't work, the x_accept is just an empty list

Related

C stack usage 15927808 is too close to the limit

library(ggplot2)
dev.new()
n = 0
x_accept <- list()
set.seed(1684)
x = seq(15, 33, by = 0.1)
f <- function(x) {
out <- ifelse(
x < 15 | 33 < x,
0,
ifelse(
15 <= x & x <= 24,
(2*(x-15))/((33-15)*(24-15)),
ifelse(
24 < x & x <= 33,
(2*(33-x))/((33-15)*(33-24)),
NA_real_
)))
if (any((is.na(out) | is.nan(out)) & (!is.na(x) & !is.nan(x)))) {
warning("f(x) undefined for some input values")
out
}
while (n != 105) {
n = n + 1
x1 = runif(1, min = 15 , max = 33)
num = runif(1, min = 0 , max = 1)
if (num < (f(x1)/2/(33-15)) && num <- (18*(f(x1)/2)))
x_accept = list(x_accept, x1)
}
}
histo <- data.frame(x_histo = x1, y_histo = x1)
dat <- data.frame(x = x, y = f(x))
ggplot(dat, aes(x, y)) +
geom_line()
ggplot(histo, aes(x_histo, y_histo) +
geom_histogram()
It gives me this error, it's the first time I've seen it and I don't really how what it means or how to solve it. C stack usage 15927808 is too close to the limit.

stat_bin() can only have an x or y aesthetic

library(ggplot2)
dev.new()
n = 0
set.seed(1684)
x = seq(15, 33, by = 0.1)
f <- function(x) {
out <- ifelse(
x < 15 | 33 < x,
0,
ifelse(
15 <= x & x <= 24,
(2*(x-15))/((33-15)*(24-15)),
ifelse(
24 < x & x <= 33,
(2*(33-x))/((33-15)*(33-24)),
NA_real_
)))
if (any((is.na(out) | is.nan(out)) & (!is.na(x) & !is.nan(x)))) {
warning("f(x) undefined for some input values")
}
out
}
while (n != 105) {
n = n + 1
x1 = runif(1, min = 15 , max = 33)
num = runif(1, min = 0 , max = 1)
if ((num < (f(x1)/2/(33-15))) && (num == (18*(f(x1)/2)))) {
x_accept = c(x_accept, x1)
}
}
histo <- data.frame(x = x_accept)
dat <- data.frame(x = x, y = f(x))
ggplot(dat, aes(x, y)) +
geom_line(aes(x, y)) +
geom_histogram(aes(x))
From the previous code it gives me this error:
Error in f():
! stat_bin() can only have an x or y aesthetic.
Data from the dput(head(dat)):
structure(list(x = c(15, 15.1, 15.2, 15.3, 15.4, 15.5), y = c(0,
0.00123456790123456, 0.00246913580246913, 0.00370370370370371,
0.00493827160493828, 0.00617283950617284)), row.names = c(NA,
6L), class = "data.frame")
I would appreciate some insight into why it's doing this

Maximum likelihood estimation using a step function

I would like to fit a step function (two parameters) to some data. The code below is not doing the job. I wonder if the round() argument is the problem. However, I also tried to divide the parameters to make small (e.g. 0.001) changes in the parameters to cause significant changes. But that did not change the fit. Any idea how to properly fit this function to the data?
dat <- c(rbinom(100, 100, 0.95), rbinom(50, 100, 0.01), rbinom(100, 100, 0.95))
plot(dat/100)
stepFnc <- function(parms, t) {
par <- as.list(parms)
(c(rep(1-(1e-5), par$t1), rep(1e-5, par$t2), rep(1-(1e-5), t)))[1:t]
}
lines(stepFnc(c(t1 = 50, t2 = 50), length(dat)))
loglik <- function(t1 = 50, t2 = 50) {
fit <- snowStepCurve(parms = list(t1=round(t1,0), t2=round(t2,0)), t = length(dat))
lines(fit)
-sum(dbinom(x = dat, size = 100, prob = fit, log = T), na.rm = T)
}
mle <- bbmle::mle2(loglik)
mle#coef
lines(snowStepCurve(mle#coef, length(dat)), lwd = 2, lty = 2, col = "orange")
With discrete x data I'd do a brute-force approach:
x <- seq_along(dat)
foo <- function(x, lwr, upr) {
y <- x
y[x <= lwr | x > upr] <- mean(dat[x <= lwr | x > upr])
y[x > lwr & x <= upr] <- mean(dat[x > lwr & x <= upr])
y
}
SSE <- function(lwr, upr) {
sum((dat - foo(x, lwr, upr))^ 2)
}
limits <- expand.grid(lwr = x, upr = x)
limits <- limits[limits$lwr <= limits$upr,]
nrow(limits)
SSEvals <- mapply(SSE, limits$lwr, limits$upr)
id <- which(SSEvals == min(SSEvals))
optlims <- limits[id,]
meanouter <- mean(dat[x <= optlims$lwr | x > optlims$upr])
meaninner <- mean(dat[x > optlims$lwr & x <= optlims$upr])
bar <- function(x) {
y <- x
y[x <= optlims$lwr | x > optlims$upr] <- meanouter
y[x > optlims$lwr & x <= optlims$upr] <- meaninner
y
}
plot(dat/100)
curve(bar(x) / 100, add = TRUE)

My variogram code result different from variog() result

I am writing code for producing a variogram. For validating my result, I checked with geoR::variog() but both variograms are different.
I tried to understand the code of variog() to see what happens under the hood but there are so many things happening that I can't seem to understand it. I, in my code, am using the parameters X-coordinate, Y-coordiante, data value, number of lags, minimum lag value, lag interval, azimuth (angle in degrees; 90 corresponds to vertical direction), angle tolerance (in degrees) and maximum bandwidth.
variogram = function(xcor, ycor, data, nlag, minlag, laginv, azm, atol, maxbandw){
dl <- length(data)
lowangle <- azm - atol
upangle <- azm + atol
gamlag <- integer(nlag)
n <- integer(nlag)
dist <- pairdist(xcor, ycor)
maxd <- max(dist)
llag <- seq(minlag, minlag + (nlag-1) * laginv, by = laginv)
hlag <- llag + laginv
for(i in 1:dl){
for(j in i:dl){
if(i != j){
if(xcor[j]- xcor[i] == 0)
theta <- 90
else
theta <- 180/pi * atan((ycor[j] - ycor[i])/(xcor[j] - xcor[i]))
for(k in 1:nlag){
d <- dist[j, i]
b <- abs(d * sin(theta - azm))
if((llag[k] <= d & d < hlag[k]) & (lowangle <= theta & theta < upangle) & (b <= maxbandw)){
gamlag[k] <- gamlag[k] + (data[i] - data[j])^2;
n[k] <- n[k] + 1
}
}
}
}
}
gamlag <- ifelse(n == 0, NA, gamlag/(2*n))
tmp <- data.frame("lag" = llag, "gamma" = gamlag)
return(tmp)
}
function call for the above code
ideal_variogram_2 <- variogram(data3[,1], data3[,2], data3[,3], 18, 0, 0.025, 90, 45, 1000000)
ideal_variogram_2 <- na.omit(ideal_variogram_2)
plot(ideal_variogram_2$lag, ideal_variogram_2$gamma, main = "Using my code")
function call for variog()
geodata1 <- as.geodata(data3, coords.col = 1:2, data.col = 3)
ideal_variogram_1 <- variog(geodata1, coords = geodata1$coords, data = geodata1$data, option = "bin", uvec = seq(0, 0.45, by = 0.025), direction = pi/2, tolerance = pi/4)
df <- data.frame(u = ideal_variogram_1$u, v = ideal_variogram_1$v)
plot(df$u, df$v, main = "Using variog()")
The 2 variograms that I got are at the following link:
Variogram

Increment inside for loop? in r

I am trying to make 1000 simulations to see how many of my f values are in the reject region which is above 1.48 and below .67.
I have this but the variables don't increment as they should:
for (k in 1:1000){
Adata = rnorm(100, mean = 30, sd = 10)
Bdata = rnorm(100, mean = 45, sd = 10)
f = (sd(Bdata)^2)/(sd(Adata)^2)
if (f > 1.48){
a = 0
a <- a + 1}
if (f < .67){
b = 0
b <- b + 1}
}
a
[1] 1
b
[1] 1
The end goal is find the sum of a and b
I have also tried:
for (k in 1000){
Adata = rnorm(100, mean = 30, sd = 10)
Bdata = rnorm(100, mean = 45, sd = 10)
f = (sd(Bdata)^2)/(sd(Adata)^2)
a = f > 1.48
b = f < .67
}
y = sum(a)+sum(b)
y
[1] 0
What other way would I increment to get the total amount of f's that are in the reject region?
In your first example, you are reset-ing a and b to zero every time that the if statement is true. Therefore, the max value will always be 1.
To fix, rearrange those lines:
a = 0 #initialize outside of the loop
b = 0 #initialize outside of the loop
set.seed(1) # added for SO as you are using rnorm, remove this when you run your simulations
for (k in 1:1000){
Adata = rnorm(100, mean = 30, sd = 10)
Bdata = rnorm(100, mean = 45, sd = 10)
f = (sd(Bdata)^2)/(sd(Adata)^2)
if (f > 1.48){
a <- a + 1}
if (f < .67){
b <- b + 1}
}
I now get a = 13 and b = 29
That said, don't increment variables like this in R. You can take advantage of matrices and vectorized operations.
First Create simulation matrices
set.seed(1)
Adata = matrix(data = rnorm(100*1000, mean = 30, sd = 10), nrow = 1000, ncol = 100)
Bdata = matrix(data = rnorm(100*1000, mean = 30, sd = 10), nrow = 1000, ncol = 100)
Then calculate your f score for each line:
f <- apply(Bdata,1,function(x){sd(x)^2})/apply(Adata,1,function(x){sd(x)^2})
now you can simply use:
sum(f > 1.48)
[1] 15
and:
sum(f < .67)
[1] 25
In the first block of code you are resetting a and b to 0 every iteration, then possibly adding 1 (so the most they will ever be is 1 because next iteration they will be set to 0 again).
In the second block you are setting a and b to either TRUE or FALSE, but you are overwriting the value, so you only see the value from the final iteration (actually that loop only runs once with k equal to 1000, but if you had 1:1000 there then you would only see the last iteration).
The simple solution is to move the a=0 and b=0 (or better a <- 0 and b <- 0) outside of the loop.
The better approach is to use something in the apply family of functions.
I would suggest something like:
out <- replicate(1000, {
Adata = rnorm(100, mean = 30, sd = 10)
Bdata = rnorm(100, mean = 45, sd = 10)
(sd(Bdata)^2)/(sd(Adata)^2)
})
sum( out > 1.48 )
sum( out < 0.67 )
sum( out > 1.48 | out < 0.67 )

Resources