Trying to call a function 1000 times with replicate - r

simulate <- function(bankroll, bet, max_iterations) { #gambler's ruin function
iteration = 1
while (bankroll > 0 & iteration < max_iterations) {
win <- sample(0:1, size = 1) #either loos or win
if (win) {
bankroll <- (bankroll + bet)
} else {
bankroll <- (bankroll - bet)
}
iteration <- iteration + 1
}
return(list(c(iteration, bankroll)))
}
simulate(1000, 100, 100)
I am trying to call the above function 1000 times to get the vector. Then I need to count how many of the iterations were less than 100. Any suggestion? Thanks

This can simply be reduced to:
simulate <- function(bankroll, bet, max_iterations){
bankroll + bet * sum(sample(c(-1, 1), max_iterations, TRUE))
}
Then you could do:
sum(replicate(1000, simulate(1000, 100, 100))<100)

This is a simple version using base
simulate <- function(bankroll, bet, max_iterations) { #gambler's ruin function
iteration = 1
while (bankroll > 0 & iteration < max_iterations) {
win <- sample(0:1, size = 1) #either loos or win
if (win) {
bankroll <- (bankroll + bet)
} else {
bankroll <- (bankroll - bet)
}
iteration <- iteration + 1
}
return(bankroll)
}
calls <- replicate(n = 1000,expr = simulate(1000, 100, 100))
calls_less_100 <- calls < 100
result <- length(calls[calls_less_100])
result
probability <- result/length(calls)
probability

Related

How to restart iteration in R loop?

I made a loop to generate a Markov Chain. If the proposal does not satisfy a condition, I want to restart the iteration with a new proposal? Is there a way to do this? My current code is shown below for reference. Currently, it sets the current chain's value to the previous one. But I don't want that. I want it to just restart the "i". So if i=2, and the condition in line 4 is not satisfied, I then want it to stay at i=2 until it is satisfied. Thanks in advance.
ABC_MCMC<-function(n){
for (i in 2:n){
prop<-rnorm(1,mean=chain[i-1],sd=1)
if (ABC(prop)==T & prop>=0){
h_ratio<-(dgamma(prop,shape=prior_alpha,rate=prior_beta)/dgamma(chain[i-1],shape=prior_alpha,rate=prior_beta))*
(dnorm(x=chain[i-1],mean=prop,sd=1)/dnorm(x=prop,mean=chain[i-1],sd=1))
u<-runif(1)
if (min(1,h_ratio)>u) {chain[i]=prop} else {chain[i]=chain[i-1]}
}
else{chain[i]=chain[i-1]}
}
return(chain<<-chain)
}
This is more of a comment than of an answer but to keep the code formatting I'm posting as an answer.
Replace the code inside the for loop for the code below.
while(TRUE) {
prop <- rnorm(1, mean = chain[i - 1L], sd = 1)
if (ABC(prop) && prop >= 0) {
h_ratio<-(dgamma(prop,shape=prior_alpha,rate=prior_beta)/dgamma(chain[i-1],shape=prior_alpha,rate=prior_beta))*
(dnorm(x=chain[i-1],mean=prop,sd=1)/dnorm(x=prop,mean=chain[i-1],sd=1))
u<-runif(1)
if (min(1,h_ratio)>u) {chain[i]=prop} else {chain[i]=chain[i-1]}
break
} else {chain[i] <- chain[i-1]}
}
Edit
The function below seems to be what is asked for.
ABC_MCMC <- function(n){
for (i in 2:n){
# loops until condition (ABC(prop) & prop >= 0) is met
while(TRUE) {
prop <- rnorm(1, mean = chain[i-1], sd = 1)
if (ABC(prop) & prop >= 0) {
h_ratio <- (dgamma(prop, shape = prior_alpha, rate = prior_beta)/dgamma(chain[i - 1L], shape = prior_alpha, rate = prior_beta)) *
(dnorm(chain[i - 1L], prop, 1)/dnorm(prop, chain[i - 1L], 1))
u <- runif(1)
if (min(1, h_ratio) > u) {
chain[i] <- prop
} else {
chain[i] <- chain[i - 1L]
}
break
}
}
}
# function return value
chain
}

Double sampling method in R

My initial code for double sampling is the following. I did only one sample.
# Data
samples<-matrix(NA,nrow = 12, ncol = 2000)
for (i in 1:12) {
samples[i,]<- rbinom(2000,1,prob = 0.05)
}
# Double Sampling Plan
accept<-rep(0,12)
for (i in 1:12) {
if (sum(samples[i,1:80])<=5){
accept[i]<-1
} else if (sum(samples[i,1:80]<=8) & sum(samples[i,1:80]>5) ) {
if (sum(samples[i,1:160])<=12) {
accept[i]<-1
}
}
}
sum(accept)
Since I generated randomly from Bernoulli, every time you run the code, the results will not be the same.
I want 100 repetitions of this double sample.
My solution:
nm=double(100)
for (j in 1:100){
# Data
samples<-matrix(NA,nrow = 12, ncol = 2000)
for (i in 1:12) {
samples[i,]<- rbinom(2000,1,prob = 0.05)
}
# Double Sampling Plan
accept<-rep(0,12)
for (i in 1:12) {
if (sum(samples[i,1:80])<=5){
accept[i]<-1
} else if (sum(samples[i,1:80]<=8) & sum(samples[i,1:80]>5) ) {
if (sum(samples[i,1:160])<=12) {
accept[i]<-1
}
}
}
nm[j]=sum(accept)
}
mean(nm)
What do you think?
If we follow the proposition of #Onyambu, we can embeded one simulation inside a function and call it in a loop like this :
one_double_sampling <- function(){
# Data
samples<-matrix(NA,nrow = 12, ncol = 2000)
for (i in 1:12) {
samples[i,]<- rbinom(2000,1,prob = 0.05)
}
# Double Sampling Plan
accept<-rep(0,12)
for (i in 1:12) {
if (sum(samples[i, 1:80])<=5){
accept[i]<-1
} else if (sum(samples[i,1:80]<=8) & sum(samples[i,1:80]>5) ) {
if (sum(samples[i,1:160])<=12) {
accept[i]<-1
}
}
}
return(sum(accept))
}
set.seed(123)
# number of sample
n <- 100
# stock the result
res <- rep(0, n)
for(i in 1:n){
res[i] <- one_double_sampling()
}
# mean
mean(res)
Definitly your code is correct. For people interresting by the double sampling method I advise you to see this.
Edit 1
In one line code based on Onyambu advise :
mean(replicate(n, one_double_sampling()))

How to speed up R loop operations over 3 Dimensional Array of variable size?

I have an R script that I wrote which simulates how long particles need to equilibrate in a room. The room is represented with a 3 dimensional array of size max_size. Currently the script works as it is intended, but the real run time of the script is so slow that it is nearly unusable. I have tried adjusting my algorithm to reduce the number of times that the array needs to be looped over, but it is still many magnitudes slower than when I wrote this code in FORTRAN previously. How can I adjust this script in order to reduce the runtime?
Below is my code.
#!/usr/bin/env Rscript
options(warn=1)
max_size <- 10
use_partition = 1
cube <- array(dim=c(max_size, max_size, max_size))
cube[,,]=0.0
diffusion_coefficient <- 0.175
room_dimension <- 5
mol_speed <- 250 # speed of molecules based on 100 g/mol at RT
timestep <- (room_dimension/mol_speed)/max_size # h in seconds
dist_between <- room_dimension/max_size
DTerm <- diffusion_coefficient*timestep/(dist_between*dist_between)
cube[1,1,1] <- 1.0e21 # initial mass of particles
time <- 0 # keep up with accumulated system time
ratio <- 0
part_height <- floor(max_size*0.75)
center <- floor(max_size/2)
# the partition is defined as:
# 1 space (index) "wide" on the X axis
# spans 75% (rounded) of the "height" (Y axis)
# spans 100% of the "depth" (z axis)
# partition spaces are indicated with the value -69.0
# partition spaces cannot be diffused from nor into
if(use_partition == 1) {
for (j in 1:part_height) {
cube[center,j,] <- -69.0 # flag partition spaces with -69.0
}
}
repeat { #R replacement for Do While
for (i in 1:nrow(cube)) {
for (j in 1:ncol(cube)) {
for (k in 1:max_size) {
if(cube[i,j,k] != -69.0) {
for (off in seq(from=-1, to=1, by=2)) {
if (i+off >= 1 && i+off <= max_size) {
change <- (cube[i,j,k] - cube[i+off,j,k])*DTerm
cube[i,j,k] = cube[i,j,k] - change
cube[i+off,j,k] = cube[i+off,j,k] + change
}
if (j+off >= 1 && j+off <= max_size) {
change <- (cube[i,j,k] - cube[i,j+off,k])*DTerm
cube[i,j,k] = cube[i,j,k] - change
cube[i,j+off,k] = cube[i,j+off,k] + change
}
if (k+off >= 1 && k+off <= max_size) {
change <- (cube[i,j,k] - cube[i,j,k+off])*DTerm
cube[i,j,k] = cube[i,j,k] - change
cube[i,j,k+off] = cube[i,j,k+off] + change
}
}
}
}
}
}
time = time+timestep
#check mass for consistency
sumval <- 0.0
maxval <- cube[1,1,1]
minval <- cube[1,1,1]
for(i in 1:nrow(cube)) {
for(j in 1:ncol(cube)) {
for(k in 1:max_size) {
if(cube[i,j,k] != -69.0) {
if(cube[i,j,k] > maxval) {
maxval = cube[i,j,k]
}
if(cube[i,j,k] < minval) {
minval = cube[i,j,k]
}
sumval = sumval + cube[i,j,k]
}
}
}
}
ratio <- minval / maxval
cat(time, " ", ratio, " ", sumval, "\n")
if(ratio >= 0.99) {
break
}
}
cat("Box equilibrated in ", time, " seconds of simulated time.\n")

For statement nested in while statement not working

I am learning how to use while and for loops, and am having difficulty executing a for loop within a while loop. I am trying to recurse a simple procedure over a vector, and have the while loop set the conditionals for which parts of the vector are operated upon. This is really just meant as an exercise to understand how to use for and while loops in conjunction.
data <- c(1:200)
n=0
while(n < 100){
for(x in data){
n=x+1
if(x >= 10 & x < 100){
print("double digit")
} else {
print("single digit")
}}}
I want to stop the while loop when the procedure hits x=100 of the vector that runs from 1:200. But instead, the for loop runs through every element within the vector, from 1:200, failing to stop executing when n hits 100.
Would appreciate any advice to help out a new coder, thanks.
I have also tried
for(x in data){
n=x+1
while(n < 100){
if(x >= 10 & x < 100){
print("double digit")
} else {
print("single digit")
}}}
But the code does not stop executing.
First let's try a for loop. Each time through it n will be set to the loop counter plus 1. If this result is between 10 and 100, print a message, if not print something else. Note that no loop depends on n .
data <- c(1:200)
n = 0
for (x in data) {
n = x + 1
if (n < 100) {
if (x >= 10 && x < 100) {
print("double digit")
} else {
print("single digit")
}
}
}
x
#[1] 200
n
#[1] 201
Now for a while loop. I believe it is much simpler, it only envolves one variable, n.
n <- 0
while (n < 100) {
n = n + 1
if (n < 100) {
if (n >= 10) {
print("double digit")
} else {
print("single digit")
}
}
}
n
#[1] 100

Improving run time for R with nested for loops

My reproducible R example:
f = runif(1500,10,50)
p = matrix(0, nrow=1250, ncol=250)
count = rep(0, 1250)
for(i in 1:1250) {
ref=f[i]
for(j in 1:250) {
p[i,j] = f[i + j - 1] / ref-1
if(p[i,j] == "NaN") {
count[i] = count[i]
}
else if(p[i,j] > (0.026)) {
count[i] = (count[i] + 1)
ref = f[i + j - 1]
}
}
}
To be more precise, I have a set of 600 f-series and this code runs 200 times for each f-series. Currently I am doing the iterations in loops and most of the operations are element-wise. My random variables are f, the condition if(p[i,j] > (0.026)), and the number 0.026 in itself.
One can drastically reduce the run-time by vectorizing my code and using functions, specifically the apply family, but I am rusty with apply and looking for some advice to proceed in the right direction.
It is quite easy to put for loops in Rcpp. I just copy-pasted your code to Rcpp and haven't checked the validity. In case of discrepancy, let me know. fCpp returns the list of p and c.
cppFunction('List fCpp(NumericVector f) {
const int n=1250;
const int k=250;
NumericMatrix p(n, k);
NumericVector c(n);
for(int i = 0; i < n; i++) {
double ref=f[i];
for(int j = 0; j < k; j++) {
p(i,j) = f[i+j+1]/ref-1;
if(p(i,j) == NAN){
c[i]=c[i];
}
else if(p(i,j) > 0.026){
c[i] = c[i]+1;
ref = f[i+j+1];
}
}
}
return List::create(p, c);
}')
Benchmark
set.seed(1)
f = runif(1500,10,50)
f1 <- function(f){
p = matrix(0, nrow=1250, ncol=250)
count = rep(0, 1250)
for(i in 1:1250) {
ref=f[i]
for(j in 1:250) {
p[i,j] = f[i + j - 1] / ref-1
if(p[i,j] == "NaN") {
count[i] = count[i]
}
else if(p[i,j] > (0.026)) {
count[i] = (count[i] + 1)
ref = f[i + j - 1]
}
}
}
list(p, count)
}
microbenchmark::microbenchmark(fCpp(f), f1(f), times=10L, unit="relative")
Unit: relative
expr min lq mean median uq max neval
fCpp(f) 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 10
f1(f) 785.8484 753.7044 734.4243 764.5883 718.0868 644.9022 10
Values returned by fCpp(f) and f1(f) are essentially identical, apart from column 1 of p matrix returned by f1 is filled with 0s.
system.time(a <- f1(f))[3]
#elapsed
# 2.8
system.time(a1 <- fCpp(f))[3]
#elapsed
# 0
all.equal( a[[1]], a1[[1]])
#[1] "Mean relative difference: 0.7019406"
all.equal( a[[2]], a1[[2]])
#[1] TRUE
Here is an implementation using while, although it is taking much longer than nested for loops which is a bit counter intuitive.
f1 <- function() {
n <- 1500
d <- 250
f = runif(n,1,5)
f = embed(f, d)
f = f[-(n-d+1),]
count = rep(0, n-d)
for(i in 1:(n-d)) {
tem <- f[i,]/f[i,1] - 1
ti <- which(t[-d] > 0.026)[1]
while(ti < d & !is.na(ti)) {
ti.plus = ti+1
tem[ti.plus:d] = f[i, ti.plus:d] / tem[ti]
count[i] = count[i] + 1
ti <- ti + which(tem[ti.plus:d-1] > 0.026)[1]
}
f[i] = tem
}
list(f, count)
}
system.time(f1())
#elapsed
#6.365
#ajmartin, your logic was better and reduced the number of iterations I was attempting. Here is the improved version of your code in R:
f1 <- function() {
n <- 1500
d <- 250
f = runif(n,1,5)
count = rep(0, n-d)
for(i in 1:(n-d)) {
tem <- f[i:(i+d-1)] / f[i] - 1
ind = which(tem>0.026)[1]
while(length(which(tem>0.026))){
count[i] = count[i] + 1
tem[ind:d] = f[ind:d] / tem[ind] - 1
ind = ind - 1 + (which(tem[ind:d] > 0.026)[1])
}
}
list(f, count)
}
system.time(f1())[3]
# elapsed
# 0.09
Implementing this in Rcpp will further reduce system-time but I can't install Rtools as my current machine does not have admin rights. Meanwhile this helps.

Resources