Step change in input parameter with time in R - r

If anyone can help me how to incorporate step in input parameter with respect to time. Please see the code below:
library(ReacTran)
N <- 10 # No of grids
L = 0.10 # thickness, m
l = L/2 # Half of thickness, m
k= 0.412 # thermal conductivity, W/m-K
cp = 3530 # thermal conductivity, J/kg-K
rho = 1100 # density, kg/m3
T_int = 57.2 # Initial temperature , degC
T_air = 19 # air temperature, degC
h_air = 20 # Convective heat transfer coeff of air, W/m2-K
xgrid <- setup.grid.1D(x.up = 0, x.down = l, N = N)
x <- xgrid$x.mid
alpha.coeff <- (k*3600)/(rho*cp)
Diffusion <- function (t, Y, parms){
tran <- tran.1D(C=Y, flux.down = 0, C.up = T_air, a.bl.up = h_air,
D = alpha.coeff, dx = xgrid)
list(dY = tran$dC, flux.up = tran$flux.up,
flux.down = tran$flux.down)
}
# Initial condition
Yini <- rep(T_int, N)
times <- seq(from = 0, to = 2, by = 0.2)
print(system.time(
out <- ode.1D(y = Yini, times = times, func = Diffusion,
parms = NULL, dimens = N)))
plot(times, out[,(N+1)], type = "l", lwd = 2, xlab = "time, hr", ylab = "Temperature")
I want the T_air to be constant for the 1st hour and it changes to another value for remaining 1 hr. This would be a step changein the parameter. How can I do it?
Any help would be appreciated.
Thanks,

Related

How to select appropriate sin() terms to fit a time series using R

I want to fit a time series with sin() function because it has a form of some periods (crests and troughs). However, for now I only guessed it, e.g., 1 month, two months, ..., 1 year, 2 year. Is there some function in R to estimate the multiple periods in a data series?
Below is an example which I want to fit it using the combination of sin() functions. The expression in lm() is a try after several guesses (red line in the Figure below). How can I find the sin() terms with appropriate periods?
t <- 1:365
y <- c(-1,-1.3,-1.6,-1.8,-2.1,-2.3,-2.5,-2.7,-2.9,-3,-2,-1.1,-0.3,0.5,1.1,1.6,2.1,2.5,2.8,3.1,3.4,3.7,4.2,4.6,5,5.3,5.7,5.9,6.2,5.8,5.4,5,4.6,4.2,3.9,3.6,3.4,3.1,2.9,2.8,2.6,2.5,2.3,1.9,1.5,1.1,0.8,0.5,0.2,0,-0.1,-0.3,-0.4,-0.5,-0.5,-0.6,-0.7,-0.8,-0.9,-0.8,-0.6,-0.3,-0.1,0.1,0.4,0.6,0.9,1.1,1.3,1.5,1.7,2.1,2.4,2.7,3,3.3,3.5,3.8,4.3,4.7,5.1,5.5,5.9,6.2,6.4,6.6,6.7,6.8,6.8,6.9,7,6.9,6.8,6.7,
6.5,6.4,6.4,6.3,6.2,6,5.9,5.7,5.6,5.5,5.4,5.4,5.1,4.9,4.8,4.6,4.5,4.4,4.3,3.9,3.6,3.3,3,2.8,2.6,2.4,2.6,2.5,2.4,2.3,2.3,2.2,2.2,2.3,2.4,2.4,2.5,2.5,2.6,2.6,2.4,2.1,1.9,1.8,1.6,1.4,1.3,1,0.7,0.5,0.2,0,-0.2,-0.4,-0.2,-0.1,0.1,0.1,0.1,0.1,0.1,0.1,0,0,-0.1,-0.1,-0.2,-0.2,-0.3,-0.3,-0.4,-0.5,-0.5,-0.6,-0.7,-0.7,-0.8,-0.8,-0.8,-0.9,-0.9,-0.9,-1.3,-1.6,-1.9,-2.1,-2.3,-2.6,-2.9,-2.9,-2.9,-2.9,
-2.9,-3,-3,-3,-2.8,-2.7,-2.5,-2.4,-2.3,-2.2,-2.1,-2,-2,-1.9,-1.9,-1.8,-1.8,-1.8,-1.9,-1.9,-2,-2.1,-2.2,-2.2,-2.3,-2.4,-2.5,-2.6,-2.7,-2.8,-2.9,-2.9,-2.9,-2.9,-2.9,-2.9,-2.9,-2.9,-2.9,-2.9,-2.8,-2.8,-2.7,-2.7,-2.6,-2.6,-2.8,-3,-3.1,-3.3,-3.4,-3.5,-3.6,-3.5,-3.4,-3.3,-3.3,-3.2,-3,-2.9,-2.8,-2.8,-2.7,-2.6,-2.6,-2.6,-2.5,-2.6,-2.7,-2.8,-2.8,-2.9,-3,-3,-3,-3,-2.9,-2.9,-2.9,-2.9,-2.9,-2.8,
-2.7,-2.6,-2.5,-2.4,-2.3,-2.3,-2.1,-1.9,-1.8,-1.7,-1.5,-1.4,-1.3,-1.5,-1.7,-1.8,-1.9,-2,-2.1,-2.2,-2.4,-2.5,-2.6,-2.7,-2.8,-2.8,-2.9,-3.1,-3.2,-3.3,-3.4,-3.5,-3.5,-3.6,-3.6,-3.5,-3.4,-3.3,-3.2,-3.1,-3,-2.7,-2.3,-2,-1.8,-1.5,-1.3,-1.1,-0.9,-0.7,-0.6,-0.5,-0.3,-0.2,-0.1,-0.3,-0.5,-0.6,-0.7,-0.8,-0.9,-1,-1.1,-1.1,-1.2,-1.2,-1.2,-1.2,-1.2,-0.8,-0.4,-0.1,0.2,0.5,0.8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0.6,0.3,0,-0.2,-0.5,-0.7,-0.8)
dt <- data.frame(t = t, y = y)
plot(x = dt$t, y = dt$y)
lm <- lm(y ~ sin(2*3.1416/365*t)+cos(2*3.1416/365*t)+
sin(2*2*3.1416/365*t)+cos(2*2*3.1416/365*t)+
sin(2*4*3.1416/365*t)+cos(2*4*3.1416/365*t)+
sin(2*5*3.1416/365*t)+cos(2*5*3.1416/365*t)+
sin(2*6*3.1416/365*t)+cos(2*6*3.1416/365*t)+
sin(2*0.5*3.1416/365*t)+cos(2*0.5*3.1416/365*t),
data = dt)
summary(lm)$adj.r.squared
plot(dt$y); lines(predict(lm), type = "l", col = "red")
Package forecast has the fourier function (see here), which allows you to model fourier series terms based on time series objects.
For example:
library(forecast)
dt$y <- ts(dt$y, frequency = 365)
lm<- lm(y ~ fourier(y, K=6), dt)
plot(dt$t, dt$y); lines(predict(lm), type = "l", col = "red")
Following my comment to the question,
In catastrophic-failure's answer replace Mod by Re as in SleuthEye's answer. Then call nff(y, 20, col = "red").
I realized that there is another correction to function nff to be made:
substitute length(x) or xlen for the magical number 73.
Here is the function corrected.
nff = function(x = NULL, n = NULL, up = 10L, plot = TRUE, add = FALSE, main = NULL, ...){
#The direct transformation
#The first frequency is DC, the rest are duplicated
dff = fft(x)
#The time
xlen <- length(x)
t = seq_along(x)
#Upsampled time
nt = seq(from = 1L, to = xlen + 1L - 1/up, by = 1/up)
#New spectrum
ndff = array(data = 0, dim = c(length(nt), 1L))
ndff[1] = dff[1] #Always, it's the DC component
if(n != 0){
ndff[2:(n+1)] <- dff[2:(n+1)] #The positive frequencies always come first
#The negative ones are trickier
ndff[(length(ndff) - n + 1):length(ndff)] <- dff[(xlen - n + 1L):xlen]
}
#The inverses
indff = fft(ndff/xlen, inverse = TRUE)
idff = fft(dff/xlen, inverse = TRUE)
if(plot){
if(!add){
plot(x = t, y = x, pch = 16L, xlab = "Time", ylab = "Measurement",
main = ifelse(is.null(main), paste(n, "harmonics"), main))
lines(y = Re(idff), x = t, col = adjustcolor(1L, alpha = 0.5))
}
lines(y = Re(indff), x = nt, ...)
}
ret = data.frame(time = nt, y = Mod(indff))
return(ret)
}
y <- c(-1,-1.3,-1.6,-1.8,-2.1,-2.3,-2.5,-2.7,-2.9,-3,-2,-1.1,-0.3,0.5,1.1,1.6,2.1,2.5,2.8,3.1,3.4,3.7,4.2,4.6,5,5.3,5.7,5.9,6.2,5.8,5.4,5,4.6,4.2,3.9,3.6,3.4,3.1,2.9,2.8,2.6,2.5,2.3,1.9,1.5,1.1,0.8,0.5,0.2,0,-0.1,-0.3,-0.4,-0.5,-0.5,-0.6,-0.7,-0.8,-0.9,-0.8,-0.6,-0.3,-0.1,0.1,0.4,0.6,0.9,1.1,1.3,1.5,1.7,2.1,2.4,2.7,3,3.3,3.5,3.8,4.3,4.7,5.1,5.5,5.9,6.2,6.4,6.6,6.7,6.8,6.8,6.9,7,6.9,6.8,6.7,
6.5,6.4,6.4,6.3,6.2,6,5.9,5.7,5.6,5.5,5.4,5.4,5.1,4.9,4.8,4.6,4.5,4.4,4.3,3.9,3.6,3.3,3,2.8,2.6,2.4,2.6,2.5,2.4,2.3,2.3,2.2,2.2,2.3,2.4,2.4,2.5,2.5,2.6,2.6,2.4,2.1,1.9,1.8,1.6,1.4,1.3,1,0.7,0.5,0.2,0,-0.2,-0.4,-0.2,-0.1,0.1,0.1,0.1,0.1,0.1,0.1,0,0,-0.1,-0.1,-0.2,-0.2,-0.3,-0.3,-0.4,-0.5,-0.5,-0.6,-0.7,-0.7,-0.8,-0.8,-0.8,-0.9,-0.9,-0.9,-1.3,-1.6,-1.9,-2.1,-2.3,-2.6,-2.9,-2.9,-2.9,-2.9,
-2.9,-3,-3,-3,-2.8,-2.7,-2.5,-2.4,-2.3,-2.2,-2.1,-2,-2,-1.9,-1.9,-1.8,-1.8,-1.8,-1.9,-1.9,-2,-2.1,-2.2,-2.2,-2.3,-2.4,-2.5,-2.6,-2.7,-2.8,-2.9,-2.9,-2.9,-2.9,-2.9,-2.9,-2.9,-2.9,-2.9,-2.9,-2.8,-2.8,-2.7,-2.7,-2.6,-2.6,-2.8,-3,-3.1,-3.3,-3.4,-3.5,-3.6,-3.5,-3.4,-3.3,-3.3,-3.2,-3,-2.9,-2.8,-2.8,-2.7,-2.6,-2.6,-2.6,-2.5,-2.6,-2.7,-2.8,-2.8,-2.9,-3,-3,-3,-3,-2.9,-2.9,-2.9,-2.9,-2.9,-2.8,
-2.7,-2.6,-2.5,-2.4,-2.3,-2.3,-2.1,-1.9,-1.8,-1.7,-1.5,-1.4,-1.3,-1.5,-1.7,-1.8,-1.9,-2,-2.1,-2.2,-2.4,-2.5,-2.6,-2.7,-2.8,-2.8,-2.9,-3.1,-3.2,-3.3,-3.4,-3.5,-3.5,-3.6,-3.6,-3.5,-3.4,-3.3,-3.2,-3.1,-3,-2.7,-2.3,-2,-1.8,-1.5,-1.3,-1.1,-0.9,-0.7,-0.6,-0.5,-0.3,-0.2,-0.1,-0.3,-0.5,-0.6,-0.7,-0.8,-0.9,-1,-1.1,-1.1,-1.2,-1.2,-1.2,-1.2,-1.2,-0.8,-0.4,-0.1,0.2,0.5,0.8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0.6,0.3,0,-0.2,-0.5,-0.7,-0.8)
res <- nff(y, 20, col = "red")
str(res)
#> 'data.frame': 3650 obs. of 2 variables:
#> $ time: num 1 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 ...
#> $ y : num 1.27 1.31 1.34 1.37 1.4 ...
Created on 2022-10-17 with reprex v2.0.2
The functions sinusoid and mvrm from package BNSP allow one to specify the number of harmonics and if that number is too high, the algorithm can remove some of the unnecessary terms and avoid overfitting.
# Specify the model
model <- y ~ sinusoid(t, harmonics = 20, amplitude = 1, period = 365)
# Fit the model
m1 <- mvrm(formula = model, data = dt, sweeps = 5000, burn = 3000, thin = 2, seed = 1, StorageDir = getwd())
# ggplot
plotOptionsM <- list(geom_point(data = dt, aes(x = t, y = y)))
plot(x = m1, term = 1, plotOptions = plotOptionsM, intercept = TRUE, quantiles = c(0.005, 0.995), grid = 100)
In this particular example, among the 20 harmonics, the 19 appear to be important.

ReacTran 2D Diffusion model with non-conformable arrays error in R, matrix and setup.grid.1D and setup.grid.2D

I am trying to model diffusion in 2D in R with the diffusion rate being dependent on the density, y. I have completed this model in 1D, but trying to change it 2D it keep getting the error code:
Error in -VF.grid$x.int * D.grid$x.int * diff(rbind(C.x.up, C, C.x.down, non-conformable arrays
I have no data, as it is a simulation. My code is as follows;
library(ReacTran)
N <- 50 # number of grid cells
Nx <-50
Ny <-50
XX <- 10 # total size
dy <- dx <- XX/N # grid size
Dy <- Dx <- 0.1 # diffusion coeff, X- and Y-direction
r <- 0.005 # growth rate
ini <- 10 # initial value at x=0
N2 <- ceiling(N/2)
K <- 100 #Carrying Capacity
A0<- 2 #pop ini size
x.grid <- setup.grid.1D(x.up = 0, x.down = 1, N = N)
y.grid <- setup.grid.1D(x.up = 0, x.down = 1, N = N)
grid2D <- setup.grid.2D(x.grid, y.grid)
D.grid <- setup.prop.2D(value = Dx, y.value = Dy, grid = grid2D) #diffusion coefficient on cell interfaces
v.grid <- setup.prop.2D(value = 0, y.value=0, grid = grid2D) #advection velocity
A.grid <- setup.prop.2D(value = 1, y.value=1, grid = grid2D) #interface area
AFDW.grid <- setup.prop.2D(value = 0, y.value=0, grid = grid2D) #advction weight difference
VF.grid <- setup.prop.2D(value = 0, y.value=1, grid = grid2D) #volume fraction
# The model equations - using the grids
Diff2Db <- function (t, y, parms) {
U <- matrix(nrow = N, ncol = N, data = y)
dCONC <- tran.2D(C = y, C.x.up=0, C.x.down=0,
C.y.up=0, C.y.down=0,
grid = grid2D, D.grid = D.grid,
D.x=(y-1)^2 + 1, D.y=(y-1)^2 + 1, dx=dx, dy=dy,
A.grid = A.grid,
VF.grid = VF.grid, AFDW.grid = AFDW.grid, v.grid = v.grid
)$dC
return (list(dCONC))
}
# initial condition: 0 everywhere, except in central point
y <- matrix(nrow = N, ncol = N, data = 0)
y[N2,N2] <- ini # initial concentration in the central point...
times <- 0:8
outb <- ode.2D (y = y, func = Diff2Db, t = times, parms = NULL,
dim = c(49, N), lrw = 160000)
I am out of ideas to try to fix it. Any help would be greatly appreciated.
Thank you in advance

solving for steady state PDE using steady.1D (rootSolve R)

I am trying to obtain a steady state for a spatially-explicit Lotka-Volterra competition model of two competing species (with spatial diffusion). Here is the model (without diffusion term):
http://en.wikipedia.org/wiki/Competitive_Lotka%E2%80%93Volterra_equations
where I let r1 = r2 = rG & alpha12 = alpha 21 = a. The carrying capacity of species 1 is assumed to vary linearly across space x i.e. K1 = x (while K2 = 0.5). And we assume Neumann BC. The spatial domain x is from 0 to 1.
Here is the example of coding in R for this model:
LVcomp1D <- function (time, state, parms, N, Da, x, dx) {
with (as.list(parms), {
S1 <- state[1:N]
S2 <- state[(N+1):(2*N)]
## Dispersive fluxes; zero-gradient boundaries
FluxS1 <- -Da * diff(c(S1[1], S1, S1[N]))/dx
FluxS2 <- -Da * diff(c(S2[1], S2, S2[N]))/dx
## LV Competition
InteractS1 <- rG * S1 * (1- (S1/x)- ((a*S2)/x))
InteractS2 <- rG * S2 * (1- (S2/(K2))- ((a*S1)/(K2)))
## Rate of change = -Flux gradient + Interaction
dS1 <- -diff(FluxS1)/dx + InteractS1
dS2 <- -diff(FluxS2)/dx + InteractS2
return (list(c(dS1, dS2)))
})
}
pars <- c(rG = 1.0, a = 0.8, K2 = 0.5)
dx <- 0.001
x <- seq(0, 1, by = dx)
N <- length(x)
Da <- 0.001
state <- c(rep(0.5, N), rep(0.5, N))
print(system.time(
out <- steady.1D (y = state, func = LVcomp1D, parms = pars,
nspec = 2, N = N, x = x, dx = dx, Da = Da, pos = TRUE)
))
mf <- par(mfrow = c(2, 2))
plot(out, grid = x, xlab = "x", mfrow = NULL,
ylab = "N(x)", main = c("Species 1", "Species 2"), type = "l")
par(mfrow = mf)
The problem is I cannot get the steady state solutions of the model. I keep getting a horizontal line passing through x-axis. Can you please help me since I do not know what is wrong with this code.
Thank you

Wright-Fisher Simulation of Genetic Drift using R

I'm trying to run a simulation of the wright-fisher model of genetic drift in R.
# Wright-Fisher simulation
# n = number of individuals
# f = number of focal alleles at base population
n=10
f=1
pop = as.matrix( c( rep(0,n-f), rep(1,f) ) )
pop = as.matrix( sample(pop, n, replace=T) )
This works, effectively this is one replicate, and each time I run the final line of script is a new generation. What I would like to do, but can't, is have a loop which automatically loops it for X generations and repeat for Y number of replicates.
It should store the results for each generation in a dataframe and then allow me to plot them in a graph which looks something like this (where f/n is allele frequency, each replicate is represented by one line, and the number of generations determines the length of the X axis)...
Here is a function I wrote a few years ago. You can set the pop size, generations to simulate for, and replicates.
Since you haven't shown any code of your own, I'll leave it up to you to figure out how to store output. At any rate, this should get you going:
Drift_graph = function(t,R){
N<-250
p<-0.5
freq<-as.numeric();
for( i in 1:t ){
A1=rbinom(1,2*N,p)
p=A1/(N*2);
freq[length(freq)+1]<-p;
}
plot(freq,type="l",ylim=c(0,1),col=3,xlab="t",ylab=expression(p(A[1])))
for(u in 1:R){
freq1<-as.numeric();
p<-0.5
for( j in 1:t ){
A1=rbinom(1,2*N,p)
p=A1/(N*2);
freq1[length(freq1)+1]<-p;
}
random<-sample(1:1000,1,replace=F)
randomcolor<-colors()[random]
lines(freq1,type="l",col=(randomcolor))
}
}
Drift_graph(2000,50)
# Pop = Replicate populations
# Gen = Generations
# NM = Male population size
# NF = Female population size
# P = Frequency of focal allele
GenDriftSim = function(Pop = Pop, Gen = Gen, NM, NF, P, graph = "y", histo = "y"){
P = (2*(NM+NF))*P
NE = round((4*NM*NF)/(NM+NF),0)
SR = round(NM/NF,2)
Na = NM+NF
if(graph=="y"){
plot(c(0,0),type = "n", main = bquote('N'[M]~'/ N'[F]~'='~.(SR)*', N'[A]~'='~.(Na)*', N'[E]~'='~.(NE)), cex.main = 1,
xlim = c(1,Gen), ylim=c(0,1), xlab = "Generations", ylab = "Fequency of focal allele")
}else{}
for (i in 1:Pop){
N = NM+NF
startA = as.vector(c(rep(1, times = P),rep(0, times = (2*N)-P)))
Population = matrix(c(
c(sample(startA, size = 2*N, replace = FALSE)),
c(rep("M", times = NM), rep("F", times = NF))),
ncol = 3)
SimResults[(Gen*i)+1-Gen, 3] <<- sum(as.numeric(Population[,1:2]))/(N*2)
for(j in 1:(Gen-1)){
Population = matrix(c(
c(sample(sample(Population[(1:NM),1:2], replace = TRUE),N, replace = TRUE)),
c(sample(sample(Population[(1+NM):N,1:2], replace = TRUE),N, replace = TRUE)),
c(rep("M", times = NM), rep("F", times = NF))), ncol = 3)
SimResults[(Gen*i)+1+j-Gen, 3] <<- sum(as.numeric(Population[,1:2]))/(N*2)
}
s = (i*Gen)-Gen+1; e = i*Gen
r = as.vector(SimResults[s:e, 3])
if(graph=="y"){
points(r~c(1:Gen), type = "l")
}else{}
}
if(histo == "y"){SimResults[,1] = rep(1:Pop, each = Gen)
SimResults[,2] = rep(1:Gen, times = Pop)
hist(SimResults[,3][SimResults[,2]==Gen], breaks = 100, cex.lab = 0.7, cex.axis = 0.7, xlim = c(0,1), cex.main = 1, main = bquote('N'[M]~'/ N'[F]~'='~.(SR)*', N'[A]~'='~.(Na)*', N'[E]~'='~.(NE)), xlab = paste0("Frequency of focal allele after ",Gen," Generations"))
}else{}
}
Pop = 10
Gen = 25
P = 0.5
SimResults = matrix(data = NA, ncol = 3, nrow = Gen*Pop)
GenDriftSim(Pop = Pop, Gen = Gen, NM = 100, NF = 900, P = P, graph = "y", histo = "n")
GenDriftSim(Pop = Pop, Gen = Gen, NM = 180, NF = 180, P = P, graph = "y", histo = "n")
dev.off()

n-armed bandit simulation in R

I'm using Sutton & Barto's ebook Reinforcement Learning: An Introduction to study reinforcement learning. I'm having some issues trying to emulate the results (plots) on the action-value page.
More specifically, how can I simulate the greedy value for each task? The book says:
...we can plot the performance and behavior of various methods as
they improve with experience over 1000 plays...
So I guess I have to keep track of the exploratory values as better ones are found. The issue is how to do this using the greedy approach - since there are no exploratory moves, how do I know what is a greedy behavior?
Thanks for all the comments and answers!
UPDATE: See code on my answer.
I finally got this right. The eps player should beat the greedy player because of the exploratory moves, as pointed out int the book.
The code is slow and need some optimizations, but here it is:
get.testbed = function(arms = 10, plays = 500, u = 0, sdev.arm = 1, sdev.rewards = 1){
optimal = rnorm(arms, u, sdev.arm)
rewards = sapply(optimal, function(x)rnorm(plays, x, sdev.rewards))
list(optimal = optimal, rewards = rewards)
}
play.slots = function(arms = 10, plays = 500, u = 0, sdev.arm = 1, sdev.rewards = 1, eps = 0.1){
testbed = get.testbed(arms, plays, u, sdev.arm, sdev.rewards)
optimal = testbed$optimal
rewards = testbed$rewards
optim.index = which.max(optimal)
slot.rewards = rep(0, arms)
reward.hist = rep(0, plays)
optimal.hist = rep(0, plays)
pulls = rep(0, arms)
probs = runif(plays)
# vetorizar
for (i in 1:plays){
## dont use ifelse() in this case
## idx = ifelse(probs[i] < eps, sample(arms, 1), which.max(slot.rewards))
idx = if (probs[i] < eps) sample(arms, 1) else which.max(slot.rewards)
reward.hist[i] = rewards[i, idx]
if (idx == optim.index)
optimal.hist[i] = 1
slot.rewards[idx] = slot.rewards[idx] + (rewards[i, idx] - slot.rewards[idx])/(pulls[idx] + 1)
pulls[idx] = pulls[idx] + 1
}
list(slot.rewards = slot.rewards, reward.hist = reward.hist, optimal.hist = optimal.hist, pulls = pulls)
}
do.simulation = function(N = 100, arms = 10, plays = 500, u = 0, sdev.arm = 1, sdev.rewards = 1, eps = c(0.0, 0.01, 0.1)){
n.players = length(eps)
col.names = paste('eps', eps)
rewards.hist = matrix(0, nrow = plays, ncol = n.players)
optim.hist = matrix(0, nrow = plays, ncol = n.players)
colnames(rewards.hist) = col.names
colnames(optim.hist) = col.names
for (p in 1:n.players){
for (i in 1:N){
play.results = play.slots(arms, plays, u, sdev.arm, sdev.rewards, eps[p])
rewards.hist[, p] = rewards.hist[, p] + play.results$reward.hist
optim.hist[, p] = optim.hist[, p] + play.results$optimal.hist
}
}
rewards.hist = rewards.hist/N
optim.hist = optim.hist/N
optim.hist = apply(optim.hist, 2, function(x)cumsum(x)/(1:plays))
### Plot helper ###
plot.result = function(x, n.series, colors, leg.names, ...){
for (i in 1:n.series){
if (i == 1)
plot.ts(x[, i], ylim = 2*range(x), col = colors[i], ...)
else
lines(x[, i], col = colors[i], ...)
grid(col = 'lightgray')
}
legend('topleft', leg.names, col = colors, lwd = 2, cex = 0.6, box.lwd = NA)
}
### Plot helper ###
#### Plots ####
require(RColorBrewer)
colors = brewer.pal(n.players + 3, 'Set2')
op <-par(mfrow = c(2, 1), no.readonly = TRUE)
plot.result(rewards.hist, n.players, colors, col.names, xlab = 'Plays', ylab = 'Average reward', lwd = 2)
plot.result(optim.hist, n.players, colors, col.names, xlab = 'Plays', ylab = 'Optimal move %', lwd = 2)
#### Plots ####
par(op)
}
To run it just call
do.simulation(N = 100, arms = 10, eps = c(0, 0.01, 0.1))
You could also choose to make use of the R package "contextual", which aims to ease the implementation and evaluation of both context-free (as described in Sutton & Barto) and contextual (such as for example LinUCB) Multi-Armed Bandit policies.
The package actually offers a vignette on how to replicate all Sutton & Barto bandit plots. For example, to generate the ε-greedy plots, just simulate EpsilonGreedy policies against a Gaussian bandit :
library(contextual)
set.seed(2)
mus <- rnorm(10, 0, 1)
sigmas <- rep(1, 10)
bandit <- BasicGaussianBandit$new(mu_per_arm = mus, sigma_per_arm = sigmas)
agents <- list(Agent$new(EpsilonGreedyPolicy$new(0), bandit, "e = 0, greedy"),
Agent$new(EpsilonGreedyPolicy$new(0.1), bandit, "e = 0.1"),
Agent$new(EpsilonGreedyPolicy$new(0.01), bandit, "e = 0.01"))
simulator <- Simulator$new(agents = agents, horizon = 1000, simulations = 2000)
history <- simulator$run()
plot(history, type = "average", regret = FALSE, lwd = 1, legend_position = "bottomright")
plot(history, type = "optimal", lwd = 1, legend_position = "bottomright")
Full disclosure: I am one of the developers of the package.
this is what I have so far based on our chat:
set.seed(1)
getRewardsGaussian <- function(arms, plays) {
## assuming each action has a normal distribution
# first generate new means
QStar <- rnorm(arms, 0, 1)
# then for each mean, generate `play`-many samples
sapply(QStar, function(u)
rnorm(plays, u, 1))
}
CalculateRewardsPerMethod <- function(arms=7, epsi1=0.01, epsi2=0.1
, plays=1000, methods=c("greedy", "epsi1", "epsi2")) {
# names for easy handling
names(methods) <- methods
arm.names <- paste0("Arm", ifelse((1:arms)<10, 0, ""), 1:arms)
# this could be different if not all actions' rewards have a gaussian dist.
rewards.source <- getRewardsGaussian(arms, plays)
# Three dimensional array to track running averages of each method
running.avgs <-
array(0, dim=c(plays, arms, length(methods))
, dimnames=list(PlayNo.=NULL, Arm=arm.names, Method=methods))
# Three dimensional array to track the outcome of each play, according to each method
rewards.received <-
array(NA_real_, dim=c(plays, 2, length(methods))
, dimnames=list(PlayNo.=seq(plays), Outcome=c("Arm", "Reward"), Method=methods))
# define the function internally to not have to pass running.avgs
chooseAnArm <- function(p) {
# Note that in a tie, which.max returns the lowest value, which is what we want
maxes <- apply(running.avgs[p, ,methods, drop=FALSE], 3, which.max)
# Note: deliberately drawing two separate random numbers and keeping this as
# two lines of code to accent that the two draws should not be related
if(runif(1) < epsi1)
maxes["epsi1"] <- sample(arms, 1)
if(runif(1) < epsi2)
maxes["epsi2"] <- sample(arms, 1)
return(maxes)
}
## TODO: Perform each action at least once, then select according to algorithm
## Starting points. Everyone starts at machine 3
choice <- c(3, 3, 3)
reward <- rewards.source[1, choice]
## First run, slightly different
rewards.received[1,,] <- rbind(choice, reward)
running.avgs[1, choice, ] <- reward # if different starting points, this needs to change like below
## HERE IS WHERE WE START PULLING THE LEVERS ##
## ----------------------------------------- ##
for (p in 2:plays) {
choice <- chooseAnArm(p)
reward <- rewards.source[p, choice]
# Note: When dropping a dim, the methods will be the columns
# and the Outcome info will be the rows. Use `rbind` instead of `cbind`.
rewards.received[p,,names(choice)] <- rbind(choice, reward)
## Update the running averages.
## For each method, the current running averages are the same as the
## previous for all arms, except for the one chosen this round.
## Thus start with last round's averages, then update the one arm.
running.avgs[p,,] <- running.avgs[p-1,,]
# The updating is only involved part (due to lots of array-indexing)
running.avgs[p,,][cbind(choice, 1:3)] <-
sapply(names(choice), function(m)
# Update the running average for the selected arm (for the current play & method)
mean( rewards.received[ 1:p,,,drop=FALSE][ rewards.received[1:p,"Arm",m] == choice[m],"Reward",m])
)
} # end for-loop
## DIFFERENT RETURN OPTIONS ##
## ------------------------ ##
## All rewards received, in simplifed matrix (dropping information on arm chosen)
# return(rewards.received[, "Reward", ])
## All rewards received, along with which arm chosen:
# return(rewards.received)
## Running averages of the rewards received by method
return( apply(rewards.received[, "Reward", ], 2, cumsum) / (1:plays) )
}
### EXECUTION (AND SIMULATION)
## PARAMETERS
arms <- 10
plays <- 1000
epsi1 <- 0.01
epsi2 <- 0.1
simuls <- 50 # 2000
methods=c("greedy", "epsi1", "epsi2")
## Single Iteration:
### we can run system time to get an idea for how long one will take
tme <- system.time( CalculateRewardsPerMethod(arms=arms, epsi1=epsi1, epsi2=epsi2, plays=plays) )
cat("Expected run time is approx: ", round((simuls * tme[["elapsed"]]) / 60, 1), " minutes")
## Multiple iterations (simulations)
rewards.received.list <- replicate(simuls, CalculateRewardsPerMethod(arms=arms, epsi1=epsi1, epsi2=epsi2, plays=plays), simplify="array")
## Compute average across simulations
rewards.received <- apply(rewards.received.list, 1:2, mean)
## RESULTS
head(rewards.received, 17)
MeanRewards <- rewards.received
## If using an alternate return method in `Calculate..` use the two lines below to calculate running avg
# CumulRewards <- apply(rewards.received, 2, cumsum)
# MeanRewards <- CumulRewards / (1:plays)
## PLOT
plot.ts(MeanRewards[, "greedy"], col = 'red', lwd = 2, ylim = range(MeanRewards), ylab = 'Average reward', xlab="Plays")
lines(MeanRewards[, "epsi1"], col = 'orange', lwd = 2)
lines(MeanRewards[, "epsi2"], col = 'navy', lwd = 2)
grid(col = 'darkgray')
legend('bottomright', c('greedy', paste("epsi1 =", epsi1), paste("epsi2 =", epsi2)), col = c('red', 'orange', 'navy'), lwd = 2, cex = 0.8)
You may also want to check this link
https://www.datahubbs.com/multi_armed_bandits_reinforcement_learning_1/
Copy of the relevant code from the above source
It does not use R but simply np.random.rand() from numpy
class eps_bandit:
'''
epsilon-greedy k-bandit problem
Inputs
=====================================================
k: number of arms (int)
eps: probability of random action 0 < eps < 1 (float)
iters: number of steps (int)
mu: set the average rewards for each of the k-arms.
Set to "random" for the rewards to be selected from
a normal distribution with mean = 0.
Set to "sequence" for the means to be ordered from
0 to k-1.
Pass a list or array of length = k for user-defined
values.
'''
def __init__(self, k, eps, iters, mu='random'):
# Number of arms
self.k = k
# Search probability
self.eps = eps
# Number of iterations
self.iters = iters
# Step count
self.n = 0
# Step count for each arm
self.k_n = np.zeros(k)
# Total mean reward
self.mean_reward = 0
self.reward = np.zeros(iters)
# Mean reward for each arm
self.k_reward = np.zeros(k)
if type(mu) == list or type(mu).__module__ == np.__name__:
# User-defined averages
self.mu = np.array(mu)
elif mu == 'random':
# Draw means from probability distribution
self.mu = np.random.normal(0, 1, k)
elif mu == 'sequence':
# Increase the mean for each arm by one
self.mu = np.linspace(0, k-1, k)
def pull(self):
# Generate random number
p = np.random.rand()
if self.eps == 0 and self.n == 0:
a = np.random.choice(self.k)
elif p < self.eps:
# Randomly select an action
a = np.random.choice(self.k)
else:
# Take greedy action
a = np.argmax(self.k_reward)
reward = np.random.normal(self.mu[a], 1)
# Update counts
self.n += 1
self.k_n[a] += 1
# Update total
self.mean_reward = self.mean_reward + (
reward - self.mean_reward) / self.n
# Update results for a_k
self.k_reward[a] = self.k_reward[a] + (
reward - self.k_reward[a]) / self.k_n[a]
def run(self):
for i in range(self.iters):
self.pull()
self.reward[i] = self.mean_reward
def reset(self):
# Resets results while keeping settings
self.n = 0
self.k_n = np.zeros(k)
self.mean_reward = 0
self.reward = np.zeros(iters)
self.k_reward = np.zeros(k)

Resources