n-armed bandit simulation in R - r

I'm using Sutton & Barto's ebook Reinforcement Learning: An Introduction to study reinforcement learning. I'm having some issues trying to emulate the results (plots) on the action-value page.
More specifically, how can I simulate the greedy value for each task? The book says:
...we can plot the performance and behavior of various methods as
they improve with experience over 1000 plays...
So I guess I have to keep track of the exploratory values as better ones are found. The issue is how to do this using the greedy approach - since there are no exploratory moves, how do I know what is a greedy behavior?
Thanks for all the comments and answers!
UPDATE: See code on my answer.

I finally got this right. The eps player should beat the greedy player because of the exploratory moves, as pointed out int the book.
The code is slow and need some optimizations, but here it is:
get.testbed = function(arms = 10, plays = 500, u = 0, sdev.arm = 1, sdev.rewards = 1){
optimal = rnorm(arms, u, sdev.arm)
rewards = sapply(optimal, function(x)rnorm(plays, x, sdev.rewards))
list(optimal = optimal, rewards = rewards)
}
play.slots = function(arms = 10, plays = 500, u = 0, sdev.arm = 1, sdev.rewards = 1, eps = 0.1){
testbed = get.testbed(arms, plays, u, sdev.arm, sdev.rewards)
optimal = testbed$optimal
rewards = testbed$rewards
optim.index = which.max(optimal)
slot.rewards = rep(0, arms)
reward.hist = rep(0, plays)
optimal.hist = rep(0, plays)
pulls = rep(0, arms)
probs = runif(plays)
# vetorizar
for (i in 1:plays){
## dont use ifelse() in this case
## idx = ifelse(probs[i] < eps, sample(arms, 1), which.max(slot.rewards))
idx = if (probs[i] < eps) sample(arms, 1) else which.max(slot.rewards)
reward.hist[i] = rewards[i, idx]
if (idx == optim.index)
optimal.hist[i] = 1
slot.rewards[idx] = slot.rewards[idx] + (rewards[i, idx] - slot.rewards[idx])/(pulls[idx] + 1)
pulls[idx] = pulls[idx] + 1
}
list(slot.rewards = slot.rewards, reward.hist = reward.hist, optimal.hist = optimal.hist, pulls = pulls)
}
do.simulation = function(N = 100, arms = 10, plays = 500, u = 0, sdev.arm = 1, sdev.rewards = 1, eps = c(0.0, 0.01, 0.1)){
n.players = length(eps)
col.names = paste('eps', eps)
rewards.hist = matrix(0, nrow = plays, ncol = n.players)
optim.hist = matrix(0, nrow = plays, ncol = n.players)
colnames(rewards.hist) = col.names
colnames(optim.hist) = col.names
for (p in 1:n.players){
for (i in 1:N){
play.results = play.slots(arms, plays, u, sdev.arm, sdev.rewards, eps[p])
rewards.hist[, p] = rewards.hist[, p] + play.results$reward.hist
optim.hist[, p] = optim.hist[, p] + play.results$optimal.hist
}
}
rewards.hist = rewards.hist/N
optim.hist = optim.hist/N
optim.hist = apply(optim.hist, 2, function(x)cumsum(x)/(1:plays))
### Plot helper ###
plot.result = function(x, n.series, colors, leg.names, ...){
for (i in 1:n.series){
if (i == 1)
plot.ts(x[, i], ylim = 2*range(x), col = colors[i], ...)
else
lines(x[, i], col = colors[i], ...)
grid(col = 'lightgray')
}
legend('topleft', leg.names, col = colors, lwd = 2, cex = 0.6, box.lwd = NA)
}
### Plot helper ###
#### Plots ####
require(RColorBrewer)
colors = brewer.pal(n.players + 3, 'Set2')
op <-par(mfrow = c(2, 1), no.readonly = TRUE)
plot.result(rewards.hist, n.players, colors, col.names, xlab = 'Plays', ylab = 'Average reward', lwd = 2)
plot.result(optim.hist, n.players, colors, col.names, xlab = 'Plays', ylab = 'Optimal move %', lwd = 2)
#### Plots ####
par(op)
}
To run it just call
do.simulation(N = 100, arms = 10, eps = c(0, 0.01, 0.1))

You could also choose to make use of the R package "contextual", which aims to ease the implementation and evaluation of both context-free (as described in Sutton & Barto) and contextual (such as for example LinUCB) Multi-Armed Bandit policies.
The package actually offers a vignette on how to replicate all Sutton & Barto bandit plots. For example, to generate the ε-greedy plots, just simulate EpsilonGreedy policies against a Gaussian bandit :
library(contextual)
set.seed(2)
mus <- rnorm(10, 0, 1)
sigmas <- rep(1, 10)
bandit <- BasicGaussianBandit$new(mu_per_arm = mus, sigma_per_arm = sigmas)
agents <- list(Agent$new(EpsilonGreedyPolicy$new(0), bandit, "e = 0, greedy"),
Agent$new(EpsilonGreedyPolicy$new(0.1), bandit, "e = 0.1"),
Agent$new(EpsilonGreedyPolicy$new(0.01), bandit, "e = 0.01"))
simulator <- Simulator$new(agents = agents, horizon = 1000, simulations = 2000)
history <- simulator$run()
plot(history, type = "average", regret = FALSE, lwd = 1, legend_position = "bottomright")
plot(history, type = "optimal", lwd = 1, legend_position = "bottomright")
Full disclosure: I am one of the developers of the package.

this is what I have so far based on our chat:
set.seed(1)
getRewardsGaussian <- function(arms, plays) {
## assuming each action has a normal distribution
# first generate new means
QStar <- rnorm(arms, 0, 1)
# then for each mean, generate `play`-many samples
sapply(QStar, function(u)
rnorm(plays, u, 1))
}
CalculateRewardsPerMethod <- function(arms=7, epsi1=0.01, epsi2=0.1
, plays=1000, methods=c("greedy", "epsi1", "epsi2")) {
# names for easy handling
names(methods) <- methods
arm.names <- paste0("Arm", ifelse((1:arms)<10, 0, ""), 1:arms)
# this could be different if not all actions' rewards have a gaussian dist.
rewards.source <- getRewardsGaussian(arms, plays)
# Three dimensional array to track running averages of each method
running.avgs <-
array(0, dim=c(plays, arms, length(methods))
, dimnames=list(PlayNo.=NULL, Arm=arm.names, Method=methods))
# Three dimensional array to track the outcome of each play, according to each method
rewards.received <-
array(NA_real_, dim=c(plays, 2, length(methods))
, dimnames=list(PlayNo.=seq(plays), Outcome=c("Arm", "Reward"), Method=methods))
# define the function internally to not have to pass running.avgs
chooseAnArm <- function(p) {
# Note that in a tie, which.max returns the lowest value, which is what we want
maxes <- apply(running.avgs[p, ,methods, drop=FALSE], 3, which.max)
# Note: deliberately drawing two separate random numbers and keeping this as
# two lines of code to accent that the two draws should not be related
if(runif(1) < epsi1)
maxes["epsi1"] <- sample(arms, 1)
if(runif(1) < epsi2)
maxes["epsi2"] <- sample(arms, 1)
return(maxes)
}
## TODO: Perform each action at least once, then select according to algorithm
## Starting points. Everyone starts at machine 3
choice <- c(3, 3, 3)
reward <- rewards.source[1, choice]
## First run, slightly different
rewards.received[1,,] <- rbind(choice, reward)
running.avgs[1, choice, ] <- reward # if different starting points, this needs to change like below
## HERE IS WHERE WE START PULLING THE LEVERS ##
## ----------------------------------------- ##
for (p in 2:plays) {
choice <- chooseAnArm(p)
reward <- rewards.source[p, choice]
# Note: When dropping a dim, the methods will be the columns
# and the Outcome info will be the rows. Use `rbind` instead of `cbind`.
rewards.received[p,,names(choice)] <- rbind(choice, reward)
## Update the running averages.
## For each method, the current running averages are the same as the
## previous for all arms, except for the one chosen this round.
## Thus start with last round's averages, then update the one arm.
running.avgs[p,,] <- running.avgs[p-1,,]
# The updating is only involved part (due to lots of array-indexing)
running.avgs[p,,][cbind(choice, 1:3)] <-
sapply(names(choice), function(m)
# Update the running average for the selected arm (for the current play & method)
mean( rewards.received[ 1:p,,,drop=FALSE][ rewards.received[1:p,"Arm",m] == choice[m],"Reward",m])
)
} # end for-loop
## DIFFERENT RETURN OPTIONS ##
## ------------------------ ##
## All rewards received, in simplifed matrix (dropping information on arm chosen)
# return(rewards.received[, "Reward", ])
## All rewards received, along with which arm chosen:
# return(rewards.received)
## Running averages of the rewards received by method
return( apply(rewards.received[, "Reward", ], 2, cumsum) / (1:plays) )
}
### EXECUTION (AND SIMULATION)
## PARAMETERS
arms <- 10
plays <- 1000
epsi1 <- 0.01
epsi2 <- 0.1
simuls <- 50 # 2000
methods=c("greedy", "epsi1", "epsi2")
## Single Iteration:
### we can run system time to get an idea for how long one will take
tme <- system.time( CalculateRewardsPerMethod(arms=arms, epsi1=epsi1, epsi2=epsi2, plays=plays) )
cat("Expected run time is approx: ", round((simuls * tme[["elapsed"]]) / 60, 1), " minutes")
## Multiple iterations (simulations)
rewards.received.list <- replicate(simuls, CalculateRewardsPerMethod(arms=arms, epsi1=epsi1, epsi2=epsi2, plays=plays), simplify="array")
## Compute average across simulations
rewards.received <- apply(rewards.received.list, 1:2, mean)
## RESULTS
head(rewards.received, 17)
MeanRewards <- rewards.received
## If using an alternate return method in `Calculate..` use the two lines below to calculate running avg
# CumulRewards <- apply(rewards.received, 2, cumsum)
# MeanRewards <- CumulRewards / (1:plays)
## PLOT
plot.ts(MeanRewards[, "greedy"], col = 'red', lwd = 2, ylim = range(MeanRewards), ylab = 'Average reward', xlab="Plays")
lines(MeanRewards[, "epsi1"], col = 'orange', lwd = 2)
lines(MeanRewards[, "epsi2"], col = 'navy', lwd = 2)
grid(col = 'darkgray')
legend('bottomright', c('greedy', paste("epsi1 =", epsi1), paste("epsi2 =", epsi2)), col = c('red', 'orange', 'navy'), lwd = 2, cex = 0.8)

You may also want to check this link
https://www.datahubbs.com/multi_armed_bandits_reinforcement_learning_1/
Copy of the relevant code from the above source
It does not use R but simply np.random.rand() from numpy
class eps_bandit:
'''
epsilon-greedy k-bandit problem
Inputs
=====================================================
k: number of arms (int)
eps: probability of random action 0 < eps < 1 (float)
iters: number of steps (int)
mu: set the average rewards for each of the k-arms.
Set to "random" for the rewards to be selected from
a normal distribution with mean = 0.
Set to "sequence" for the means to be ordered from
0 to k-1.
Pass a list or array of length = k for user-defined
values.
'''
def __init__(self, k, eps, iters, mu='random'):
# Number of arms
self.k = k
# Search probability
self.eps = eps
# Number of iterations
self.iters = iters
# Step count
self.n = 0
# Step count for each arm
self.k_n = np.zeros(k)
# Total mean reward
self.mean_reward = 0
self.reward = np.zeros(iters)
# Mean reward for each arm
self.k_reward = np.zeros(k)
if type(mu) == list or type(mu).__module__ == np.__name__:
# User-defined averages
self.mu = np.array(mu)
elif mu == 'random':
# Draw means from probability distribution
self.mu = np.random.normal(0, 1, k)
elif mu == 'sequence':
# Increase the mean for each arm by one
self.mu = np.linspace(0, k-1, k)
def pull(self):
# Generate random number
p = np.random.rand()
if self.eps == 0 and self.n == 0:
a = np.random.choice(self.k)
elif p < self.eps:
# Randomly select an action
a = np.random.choice(self.k)
else:
# Take greedy action
a = np.argmax(self.k_reward)
reward = np.random.normal(self.mu[a], 1)
# Update counts
self.n += 1
self.k_n[a] += 1
# Update total
self.mean_reward = self.mean_reward + (
reward - self.mean_reward) / self.n
# Update results for a_k
self.k_reward[a] = self.k_reward[a] + (
reward - self.k_reward[a]) / self.k_n[a]
def run(self):
for i in range(self.iters):
self.pull()
self.reward[i] = self.mean_reward
def reset(self):
# Resets results while keeping settings
self.n = 0
self.k_n = np.zeros(k)
self.mean_reward = 0
self.reward = np.zeros(iters)
self.k_reward = np.zeros(k)

Related

How to rotate nodes of a time-calibrated phylogenetic tree to match a particular order in R?

I have a time-calibrated phylogenetic tree from BEAST and I would like to make a figure in which its nodes are rotated to match an arbitrary ordering. The following code works perfectly to plot the tree with the nodes in the order they are in the input file.
library("phytools")
library("phyloch")
library("strap")
library("coda")
t <- read.beast("mcctree.tre") # I couldn't upload the file here
t$root.time <- t$height[1]
num_taxa <- length(t$tip.label)
display_all_node_bars <- TRUE
names_list <-vector()
for (name in t$tip){
v <- strsplit(name, "_")[[1]]
if(display_all_node_bars){
names_list = c(names_list, name)
}
else if(v[length(v)]=="0"){
names_list = c(names_list, name)
}
}
nids <- vector()
pos <- 1
len_nl <- length(names_list)
for(n in names_list){
for(nn in names_list[pos:len_nl]){
if(n != nn){
m <- getMRCA(t,c(n, nn))
if(m %in% nids == FALSE){
nids <- c(nids, m)
}
}
}
pos <- pos+1
}
pdf("tree.pdf", width = 20, height = 20)
geoscalePhylo(tree = t,
x.lim = c(-2,21),
units = c("Epoch"),
tick.scale = "myr",
boxes = FALSE,
width = 1,
cex.tip = 2,
cex.age = 3,
cex.ts = 2,
erotate = 0,
label.offset = 0.1)
lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv)
for(nv in nids){
bar_xx_a <- c(lastPP$xx[nv]+t$height[nv-num_taxa]-t$"height_95%_HPD_MIN"[nv-num_taxa],
lastPP$xx[nv]-(t$"height_95%_HPD_MAX"[nv-num_taxa]-t$height[nv-num_taxa]))
lines(bar_xx_a, c(lastPP$yy[nv], lastPP$yy[nv]), col = rgb(0, 0, 1, alpha = 0.3), lwd = 12)
}
t$node.label <- t$posterior
p <- character(length(t$node.label))
p[t$node.label >= 0.95] <- "black"
p[t$node.label < 0.95 & t$node.label >= 0.75] <- "gray"
p[t$node.label < 0.75] <- "white"
nodelabels(pch = 21, cex = 1.5, bg = p)
dev.off()
The following code is my attempt to rotate the nodes in the way I want (following this tutorial: http://blog.phytools.org/2015/04/finding-closest-set-of-node-rotations.html). And it works for rotating the nodes. However, the blue bars indicating the confidence intervals of the divergence time estimates get out of their correct place - this is what I would like help to correct. This will be used in much larger files with hundreds of branches - the example here is simplified.
new.order <- c("Sp8","Sp9","Sp10","Sp7","Sp6","Sp5","Sp4","Sp2","Sp3","Ou1","Ou2","Sp1")
t2 <- setNames(1:Ntip(t), new.order)
new.order.tree <- minRotate(t, t2)
new.order.tree$root.time <- t$root.time
new.order.tree$height <- t$height
new.order.tree$"height_95%_HPD_MIN" <- t$"height_95%_HPD_MIN"
new.order.tree$"height_95%_HPD_MAX" <- t$"height_95%_HPD_MAX"
pdf("reordered_tree.pdf", width = 20, height = 20)
geoscalePhylo(tree = new.order.tree,
x.lim = c(-2,21),
units = c("Epoch"),
tick.scale = "myr",
boxes = FALSE,
width = 1,
cex.tip = 2,
cex.age = 3,
cex.ts = 2,
erotate = 0,
label.offset = 0.1)
lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv)
for(nv in nids){
bar_xx_a <- c(lastPP$xx[nv]+new.order.tree$height[nv-num_taxa]-new.order.tree$"height_95%_HPD_MIN"[nv-num_taxa],
lastPP$xx[nv]-(new.order.tree$"height_95%_HPD_MAX"[nv-num_taxa]-new.order.tree$height[nv-num_taxa]))
lines(bar_xx_a, c(lastPP$yy[nv], lastPP$yy[nv]), col = rgb(0, 0, 1, alpha = 0.3), lwd = 12)
}
new.order.tree$node.label <- t$posterior
p <- character(length(new.order.tree$node.label))
p[new.order.tree$node.label >= 0.95] <- "black"
p[new.order.tree$node.label < 0.95 & new.order.tree$node.label >= 0.75] <- "gray"
p[new.order.tree$node.label < 0.75] <- "white"
nodelabels(pch = 21, cex = 1.5, bg = p)
dev.off()
I've found several similar questions here and in other forums, but none dealing specifically with time-calibrated trees - which is the core of the problem described above.
The short answer is that phyTools::minRotate() doesn't recognize the confidence intervals as associated with nodes. If you contact the phyTools maintainers, they may well be able to add this functionality quite easily.
Meanwhile, you can correct this yourself.
I don't know how read.beast() saves confidence intervals – let's say they're saved in t$conf.int. (Type unclass(t) at the R command line to see the full structure; you should be able to identify the appropriate property.)
If the tree's node labels are unique, then you can infer the new sequence of nodes using match():
library("phytools")
new.order <- c("Sp8","Sp9","Sp10","Sp7","Sp6","Sp5","Sp4","Sp2","Sp3","Ou1","Ou2","Sp1")
# Set up a fake initial tree -- you would load the tree from a file
tree <- rtree(length(new.order))
tree$tip.label <- sort(new.order)
tree$node.label <- seq_len(tree$Nnode)
tree$conf.int <- seq_len(tree$Nnode) * 10
# Plot tree
par(mfrow = c(1, 2), mar = rep(0, 4), cex = 0.9) # Create space
plot(tree, show.node.label = TRUE)
nodelabels(tree$conf.int, adj = 1) # Annotate "correct" intervals
# Re-order nodes with minRotate
noTree <- minRotate(tree, setNames(seq_along(new.order), new.order))
plot(noTree, show.node.label = TRUE)
# Move confidence intervals to correct node
tree$conf.int <- tree$conf.int[match(noTree$node.label, tree$node.label)]
nodelabels(tree$conf.int, adj = 1)
If you can't guarantee that the node labels are unique, you can always overwrite them in a temporary object:
# Find node order
treeCopy <- tree
treeCopy$node.label <- seq_len(tree$Nnode)
nodeOrder <- match(minRotate(treeCopy)$node.label, treeCopy$node.label)
# Apply node order
tree$conf.int <- tree$conf.int[nodeOrder]

Avoiding duplication in R

I am trying to fit a variety of (truncated) probability distributions to the same very thin set of quantiles. I can do it but it seems to require lots of duplication of the same code. Is there a neater way?
I am using this code by Nadarajah and Kotz to generate the pdf of the truncated distributions:
qtrunc <- function(p, spec, a = -Inf, b = Inf, ...)
{
tt <- p
G <- get(paste("p", spec, sep = ""), mode = "function")
Gin <- get(paste("q", spec, sep = ""), mode = "function")
tt <- Gin(G(a, ...) + p*(G(b, ...) - G(a, ...)), ...)
return(tt)
}
where spec can be the name of any untruncated distribution for which code in R exists, and the ... argument is used to provide the names of the parameters of that untruncated distribution.
To achieve the best fit I need to measure the distance between the given quantiles and those calculated using arbitrary values of the parameters of the distribution. In the case of the gamma distribution, for example, the code is as follows:
spec <- "gamma"
fit_gamma <- function(x, l = 0, h = 20, t1 = 5, t2 = 13){
ct1 <- qtrunc(p = 1/3, spec, a = l, b = h, shape = x[1],rate = x[2])
ct2 <- qtrunc(p = 2/3, spec, a = l, b = h, shape = x[1],rate = x[2])
dist <- vector(mode = "numeric", length = 2)
dist[1] <- (t1 - ct1)^2
dist[2] <- (t2- ct2)^2
return(sqrt(sum(dist)))
}
where l is the lower truncation, h is the higher and I am given the two tertiles t1 and t2.
Finally, I seek the best fit using optim, thus:
gamma_fit <- optim(par = c(2, 4),
fn = fit_gamma,
l = l,
h = h,
t1 = t1,
t2 = t2,
method = "L-BFGS-B",
lower = c(1.01, 1.4)
Now suppose I want to do the same thing but fitting a normal distribution instead. The names of the parameters of the normal distribution that I am using in R are mean and sd.
I can achieve what I want but only by writing a whole new function fit_normal that is extremely similar to my fit_gamma function but with the new parameter names used in the definition of ct1 and ct2.
The problem of duplication of code becomes very severe because I wish to try fitting a large number of different distributions to my data.
What I want to know is whether there is a way of writing a generic fit_spec as it were so that the parameter names do not have to be written out by me.
Use x as a named list to create a list of arguments to pass into qtrunc() using do.call().
fit_distro <- function(x, spec, l = 0, h = 20, t1 = 5, t2 = 13){
args <- c(x, list(spec = spec, a = l, b = h))
ct1 <- do.call(qtrunc, args = c(list(p = 1/3), args))
ct2 <- do.call(qtrunc, args = c(list(p = 2/3), args))
dist <- vector(mode = "numeric", length = 2)
dist[1] <- (t1 - ct1)^2
dist[2] <- (t2 - ct2)^2
return(sqrt(sum(dist)))
}
This is called as follows, which is the same as your original function.
fit_distro(list(shape = 2, rate = 3), "gamma")
# [1] 13.07425
fit_gamma(c(2, 3))
# [1] 13.07425
This will work with other distributions, for however many parameters they have.
fit_distro(list(mean = 10, sd = 3), "norm")
# [1] 4.08379
fit_distro(list(shape1 = 2, shape2 = 3, ncp = 10), "beta")
# [1] 12.98371

How to calculate fuzzy performance index and normalized classification entropy in R

I am running Fuzzy C-Means Clustering using e1071 package. I want to decide the optimum number of clusters based on fuzzy performance index (FPI) (extent of fuzziness) and normalized classification entropy (NCE) (degree of disorganization of specific class) given in the following formula
where c is the number of clusters and n is the number of observations, μik is the fuzzy membership and loga is the natural logarithm.
I am using the following code
library(e1071)
x <- rbind(matrix(rnorm(100,sd=0.3),ncol=2),
matrix(rnorm(100,mean=1,sd=0.3),ncol=2))
cl <- cmeans(x,2,20,verbose=TRUE,method="cmeans")
cl$membership
I have been able to extract the μik i.e. fuzzy membership. Now, cmeans has to for different number of clusters e.g. 2 to 6 and the FPI and NCE has to be calculated to have a plot like the following
How can it be achieved in R?
Edit
I have tried the code provided by #nya for iris dataset using the following code
df <- scale(iris[-5])
FPI <- function(cmem){
c <- ncol(cmem)
n <- nrow(cmem)
1 - (c / (c - 1)) * (1 - sum(cmem^2) / n)
}
NCE <- function(cmem){
c <- ncol(cmem)
n <- nrow(cmem)
(n / (n - c)) * (- sum(cmem * log(cmem)) / n)
}
# prepare variables
cl <- list()
fpi <- nce <- NULL
# cycle through the desired number of clusters
for(i in 2:6){
cl[[i]] <- cmeans(df, i, 20, method = "cmeans")
fpi <- c(fpi, FPI(cl[[i]]$membership))
nce <- c(nce, NCE(cl[[i]]$membership))
}
# add space for the second axis label
par(mar = c(5,4,1,4) + .1)
# plot FPI
plot(2:6, fpi, lty = 2, pch = 18, type = "b", xlab = "Number of clusters", ylab = "FPI")
# plot NCE, manually adding the second axis
par(new = TRUE)
plot(2:6, nce, lty = 1, pch = 15, type = "b", xlab = "", ylab = "", axes = FALSE)
axis(4, at = pretty(range(nce)))
mtext("NCE", side = 4, line = 3)
# add legend
legend("top", legend = c("FPI", "NCE"), pch = c(18,15), lty = c(2,1), horiz = TRUE)
The minimum values of fuzzy performance index(FPI) and normalized classification entropy (NCE) were considered to decide the optimum number of clusters. NCE is always increasing and FPI is showing the decreasing value. Ideally it should have been
With available equations, we can program our own functions. Here, the two functions use equations present in the paper you suggested and one of the references the authors cite.
FPI <- function(cmem, method = c("FuzME", "McBrathney", "Rahul")){
method = match.arg(method)
C <- ncol(cmem)
N <- nrow(cmem)
# Rahul et al. 2019. https://doi.org/10.1080/03650340.2019.1578345
if(method == "Rahul"){
res <- 1 - (C / (C - 1)) * (1 - sum(cmem^2) / N)
}
# McBrathney & Moore 1985 https://doi.org/10.1016/0168-1923(85)90082-6
if(method == "McBrathney"){
F <- sum(cmem^2) / N
res <- 1 - (C * F - 1) / (F - 1)
}
# FuzME https://precision-agriculture.sydney.edu.au/resources/software/
# MATLAB code file fvalidity.m, downloaded on 11 Nov, 2021
if(method == "FuzME"){
F <- sum(cmem^2) / N
res <- 1 - (C * F - 1) / (C - 1)
}
return(res)
}
NCE <- function(cmem, method = c("FuzME", "McBrathney", "Rahul")){
method = match.arg(method)
C <- ncol(cmem)
N <- nrow(cmem)
if(method == "Rahul"){
res <- (N / (N - C)) * (- sum(cmem * log(cmem)) / N)
}
if(method %in% c("FuzME", "McBrathney")){
H <- -1 / N * sum(cmem * log(cmem))
res <- H / log(C)
}
return(res)
}
Then use those to calculate the indices from the degrees of membership from the cmeans function from the iris dataset.
# prepare variables
cl <- list()
fpi <- nce <- NULL
# cycle through the desired number of clusters
for(i in 2:6){
cl[[i]] <- e1071::cmeans(iris[, -5], i, 20, method = "cmeans")
fpi <- c(fpi, FPI(cl[[i]]$membership, method = "M"))
nce <- c(nce, NCE(cl[[i]]$membership, method = "M"))
}
Last, plot with two different axes in one plot.
# add space for the second axis label
par(mar = c(5,4,1,4) + .1)
# plot FPI
plot(2:6, fpi, lty = 2, pch = 18, type = "b", xlab = "Number of clusters", ylab = "FPI")
# plot NCE, manually adding the second axis
par(new = TRUE)
plot(2:6, nce, lty = 1, pch = 15, type = "b", xlab = "", ylab = "", axes = FALSE)
axis(4, at = pretty(range(nce)))
mtext("NCE", side = 4, line = 3)
# add legend
legend("top", legend = c("FPI", "NCE"), pch = c(18,15), lty = c(2,1), horiz = TRUE)
EDIT1: Updated the functions according to optional equations from two different publications and calculated the example on the iris dataset.
EDIT2: Added code for the FPI and NCE calculations specified in the FuzME MATLAB code available here.
Hope this could help
library(dplyr)
library(ggplot2)
f <- function(cl) {
C <- length(cl$size)
N <- sum(cl$size)
mu <- cl$membership
fpi <- 1 - C / (C - 1) * (1 - sum((mu)^2) / N)
nce <- N / (N - C) * (-sum(log(mu) * mu) / N)
c(FPI = fpi, NCE = nce)
}
data.frame(t(rbind(
K = 2:6,
sapply(
K,
function(k) f(cmeans(x, k, 20, verbose = TRUE, method = "cmeans"))
)
))) %>%
pivot_longer(cols = FPI:NCE, names_to = "Index") %>%
ggplot(aes(x = K, y = value, group = Index)) +
geom_line(aes(linetype = Index, color = Index)) +
geom_point() +
scale_y_continuous(
name = "FPI",
sec.axis = sec_axis(~., name = "NCE")
) +
theme(legend.position = "top")

Minimax theory including alpha beta pruning for R code

Need help with developing an alpha beta pruning minimax algorithm in R. Currently I have implemented the minimax algorithm but it is only usable for 3x3 board. 4x4 boards do not run --> to long run time
I have copied the code from the 3x3 board but I realize I cannot provide a depth. So I assume it runs for all examples of a 4x4 board. What do I need to change to implement the alpha beta pruning in the minimax code section. Since I am fairly new to this field, I am trying to modify existing code to understand what each part is doing.
# draw the board for tic tac toe
draw_board <- function(board) {
xo <- c("X", " ", "O") # symbols
par(mar = rep(0, 4))
plot.new()
plot.window(xlim = c(0, 40), ylim = c(0, 40))
abline(h = c(10, 20, 30), col = "darkgrey", lwd = 4)
abline(v = c(10, 20, 30), col = "darkgrey", lwd = 4)
pieces <- xo[board + 2]
text(rep(c(5, 15, 25, 35), 4), c(rep(35, 4), rep(25, 4), rep(15, 4), rep(5, 4)), pieces, cex = 6)
# identify location of any three in a row
square <- t(matrix(board, nrow = 4))
hor <- abs(rowSums(square))
if(any(hor == 4))
hor <- (5 - which(hor == 4)) * 10 - 5
else
hor <- 0
ver <- abs(colSums(square))
if(any(ver == 4))
ver <- which(ver == 4) * 10 - 5
else
ver <- 0
diag1 <- sum(diag(square))
diag2 <- sum(diag(t(apply(square, 2, rev))))
# draw winning lines
if(hor > 0) lines(c(0, 40), rep(hor, 2), lwd = 10, col = "red")
if(ver > 0) lines(rep(ver, 2), c(0, 40), lwd = 10, col = "red")
if(abs(diag1) == 4) lines(c(2, 38), c(38, 2), lwd = 10, col = "red")
if(abs(diag2) == 4) lines(c(2, 38), c(2, 38), lwd = 10, col = "red")
}
# Human player enters a move
move_human <- function(game) {
text(4, 0, "Click on screen to move", col = "grey", cex=.7)
empty <- which(game == 0)
move <- 0
while (!move %in% empty) {
coords <- locator(n = 1) # add lines
coords$x <- floor(abs(coords$x) / 10) + 1
coords$y <- floor(abs(coords$y) / 10) + 1
move <- coords$x + 4 * (4 - coords$y) # 4 is the number of rows/columns --> needs to be a square
}
return (move)
}
# Evaluate winner function
eval_winner <- function(game_1, player) {
game <- matrix(game_1, nrow = 3, byrow = T)
hor <- rowSums(game)
ver <- colSums(game)
diag <- c(sum(diag(game)), sum(diag(apply(game, 1, rev))))
if (-4 %in% c(hor, ver, diag))
return(-10)
if (4 %in% c(hor, ver, diag))
return(10)
else
return(0)
}
# Minimax AI function
minimax <- function(game_1, player) {
free <- which(game_1 == 0)
if(length(free) == 1) {
game_1[free] <- player
return(list(move = free, U = eval_winner(game_1, player)))
}
poss.results <- rep(0, 16)
for(i in free) {
game <- game_1
game[i] <- player
poss.results[i] <- eval_winner(game, player)
}
mm <- ifelse(player == -1, "which.min", "which.max")
if(any(poss.results == (player * 10))) {
move <- do.call(mm, list(poss.results))
return(list(move = move, U = poss.results[move]))
}
for(i in free) {
game <- game_1
game[i] <- player
poss.results[i] <- minimax(game, -player)$U
}
random <- runif(16, 0, 0.1)
poss.results[-free] <- 100 * -player
poss.results <- poss.results + (player * random)
move <- do.call(mm, list(poss.results))
return(list(move = move, U = poss.results[move]))
}
# Main game engine human versus randomly choosing computer!
tic_tac_toe <- function(player1 = "human", player2 = "computer") {
game <- rep(0, 16) # Empty board
winner <- FALSE # Define winner
player <- 1 # First player
#players <- c(player1, player2)
players <- c("human", "computer")
draw_board(game)
while (0 %in% game & winner == 0) { # Keep playing until win or full board
if (players[(player + 3) %% 3] == "human") # Human player
move <- move_human(game)
else { # Computer player
move <- minimax(game, player)
move <- move$move
}
game[move] <- player # Change board
draw_board(game)
winner <- max(eval_winner(game, 1), abs(eval_winner(game, -1))) == 6 # Winner, winner, chicken dinner?
player <- -player # Change player
}
if (winner == 1)
print("Human has won")
else if (winner == 2)
print("Computer has won")
else
print("Play ended in a draw")
}
Going to reiterate what TheWhiteRabbit said: you should have a look at https://stackoverflow.com/help/how-to-ask
If you are more specific and include your code in the future, we can be way more helpful, but I'll give you some general suggestions based on what you have provided.
I theorize your issue might be some of the following:
You are not limiting your depth. You are trying to search all the way to the endgame. Minimax should only search enough turns ahead that your hardware can handle the strain.
Your scoring function is too inefficient. The score function is often the majority of your computation time in a Minimax search. If it is inefficient, you will pay for it.
Similarly, your code that generates a list of valid moves might be inefficient.
You might be considering invalid moves, causing your tree to branch way more than it should.
You are not generalizing your code enough. It doesn't work for 4x4 because you have hardcoded something to rely on a 3x3 board without realizing it.
Your Alpha-Beta pruning is incorrect. You are pruning nothing.
From my experience implementing MiniMax + variants, these tend to be some of the failure points.

Separate Bayesian parameter estimates for multiple groups in JAGS/rjags

I am trying to perform a hierarchical analysis in JAGS, extrapolating from Kruschke's Doing Bayesian Data Analysis, chapter 9. I wish to obtain posterior parameter estimates for the proportion of heads for four coins (theta's 1,2,3 and 4), coming from two mints, and also the estimates for average bias of the coins that come from each mint (mint bias: omega). I have kept the variability of each mint's bias, kappa, as a constant. The trouble is that I cannot get a posterior estimate from the second mint, it seems to just be sampling the prior. Does anyone know how to fix the model string text (see step 3 below) so as to generate the posterior estimate for the second mint?
Entire script for the analysis below
library(rjags)
library(runjags)
library(coda)
############### 1. Generate the data
flips <- c(sample(c(rep(1,3), rep(0,9))), # coin 1, mint 1, 12 flips total
sample(c(rep(1,1), rep(0,4))), # coin 2, mint 1, 5 flips total
sample(c(rep(1,10), rep(0,5))), # coin 1, mint 2, 15 flips
sample(c(rep(1,17), rep(0,6)))) # coin 2, mint 2, 23 flips
coins <- factor(c(rep(1,12), rep(2,5), rep(3, 15), rep(4, 23)))
mints <- factor(c(rep(1,17), rep(2,38)))
nFlips <- length(flips)
nCoins <- length(unique(coins))
nMints <- length(unique(mints))
#################### 2. Pass data into a list
dataList <- list(
flips = flips,
coins = coins,
mints = mints,
nFlips = nFlips,
nCoins = nCoins,
nMints = nMints)
################### 3. specify and save the model
modelString <- "
model{
# start with nested likelihood function
for (i in 1:nFlips) {
flips[i] ~ dbern(theta[coins[i]])
}
# next the prior on theta
for (coins in 1:nCoins) {
theta[coins] ~ dbeta(omega[mints[coins]]*(kappa - 2) + 1, (1 - omega[mints[coins]])*(kappa - 2) + 1)
}
# next we specify the prior for the higher-level parameters on the mint, omega and kappa
for (mints in 1:nMints) {
omega[mints] ~ dbeta(2,2)
}
kappa <- 5
}
"
writeLines(modelString, "tempModelHier4CoinTwoMint.txt")
############################### Step 4: Initialise Chains
initsList <- list(theta1 = mean(flips[coins==1]),
theta2 = mean(flips[coins==2]),
theta3 = mean(flips[coins==3]),
theta4 = mean(flips[coins==4]),
omega1 = mean(c(mean(flips[coins==1]),
mean(flips[coins==2]))),
omega2 = mean(c(mean(flips[coins==3]),
mean(flips[coins==4]))))
initsList
############################### Step 5: Generate Chains
runJagsOut <- run.jags(method = "simple",
model = "tempModelHier4CoinTwoMint.txt",
monitor = c("theta[1]", "theta[2]", "theta[3]", "theta[4]", "omega[1]", "omega[2]"),
data = dataList,
inits = initsList,
n.chains = 1,
adapt = 500,
burnin = 1000,
sample = 50000,
thin = 1,
summarise = FALSE,
plots = FALSE)
############################### Step 6: Convert to Coda Object
codaSamples <- as.mcmc.list(runJagsOut)
head(codaSamples)
############################### Step 7: Make Graphs
df <- data.frame(as.matrix(codaSamples))
theta1 <- ggplot(df, aes(x = df$theta.1.)) + geom_density()
theta2 <- ggplot(df, aes(x = df$theta.2.)) + geom_density()
theta3 <- ggplot(df, aes(x = df$theta.3.)) + geom_density()
theta4 <- ggplot(df, aes(x = df$theta.4.)) + geom_density()
omega1 <- ggplot(df, aes(x = df$omega.1.)) + geom_density()
omega2 <- ggplot(df, aes(x = df$omega.2.)) + geom_density()
require(gridExtra)
ggsave("coinsAndMintsHier/hierPropFourCoinsTwoMints.pdf", grid.arrange(theta1, theta2, theta3, theta4, omega1, omega2, ncol = 2), device = "pdf", height = 30, width = 10, units = "cm")
The problem was the way you were attempting to index the mints of the coins when setting the prior on theta. There are only 4 theta's in this case, not nFlips. Your nested indexing mints[coins] was accessing the mints data vector, not a vector of which mint each coin belongs to. I've created a corrected version below. Notice the explicit construction of a vector that indexes which mint each coin belongs to. Notice also in the model specification each for-loop index has its own index name distinct from data names.
graphics.off() # This closes all of R's graphics windows.
rm(list=ls()) # Careful! This clears all of R's memory!
library(runjags)
library(coda)
#library(rjags)
############### 1. Generate the data
flips <- c(sample(c(rep(1,3), rep(0,9))), # coin 1, mint 1, 12 flips total
sample(c(rep(1,1), rep(0,4))), # coin 2, mint 1, 5 flips total
sample(c(rep(1,10), rep(0,5))), # coin 1, mint 2, 15 flips
sample(c(rep(1,17), rep(0,6)))) # coin 2, mint 2, 23 flips
# NOTE: I got rid of `factor` because it was unneeded and got in the way
coins <- c(rep(1,12), rep(2,5), rep(3, 15), rep(4, 23))
# NOTE: I got rid of `factor` because it was unneeded and got in the way
mints <- c(rep(1,17), rep(2,38))
nFlips <- length(flips)
nCoins <- length(unique(coins))
nMints <- length(unique(mints))
# NEW: Create vector that specifies the mint of each coin. There must be a more
# elegant way to do this, but here is a logical brute-force approach. This
# assumes that coins are consecutively numbered from 1 to nCoins.
mintOfCoin = NULL
for ( cIdx in 1:nCoins ) {
mintOfCoin = c( mintOfCoin , unique(mints[coins==cIdx]) )
}
#################### 2. Pass data into a list
dataList <- list(
flips = flips,
coins = coins,
mints = mints,
nFlips = nFlips,
nCoins = nCoins,
nMints = nMints,
mintOfCoin = mintOfCoin # NOTE
)
################### 3. specify and save the model
modelString <- "
model{
# start with nested likelihood function
for (fIdx in 1:nFlips) {
flips[fIdx] ~ dbern( theta[coins[fIdx]] )
}
# next the prior on theta
# NOTE: Here we use the mintOfCoin index.
for (cIdx in 1:nCoins) {
theta[cIdx] ~ dbeta( omega[mintOfCoin[cIdx]]*(kappa - 2) + 1 ,
( 1 - omega[mintOfCoin[cIdx]])*(kappa - 2) + 1 )
}
# next we specify the prior for the higher-level parameters on the mint,
# omega and kappa
# NOTE: I changed the name of the mint index so it doesn't conflict with
# mints data vector.
for (mIdx in 1:nMints) {
omega[mIdx] ~ dbeta(2,2)
}
kappa <- 5
}
"
writeLines(modelString, "tempModelHier4CoinTwoMint.txt")
############################### Step 4: Initialise Chains
initsList <- list(theta1 = mean(flips[coins==1]),
theta2 = mean(flips[coins==2]),
theta3 = mean(flips[coins==3]),
theta4 = mean(flips[coins==4]),
omega1 = mean(c(mean(flips[coins==1]),
mean(flips[coins==2]))),
omega2 = mean(c(mean(flips[coins==3]),
mean(flips[coins==4]))))
initsList
############################### Step 5: Generate Chains
runJagsOut <- run.jags(method = "parallel",
model = "tempModelHier4CoinTwoMint.txt",
# NOTE: theta and omega are vectors:
monitor = c( "theta", "omega" , "kappa" ),
data = dataList,
#inits = initsList, # NOTE: Let JAGS initialize.
n.chains = 4, # NOTE: Not only 1 chain.
adapt = 500,
burnin = 1000,
sample = 10000,
thin = 1,
summarise = FALSE,
plots = FALSE)
############################### Step 6: Convert to Coda Object
codaSamples <- as.mcmc.list(runJagsOut)
head(codaSamples)
########################################
## NOTE: Important step --- Check MCMC diagnostics
# Display diagnostics of chain, for specified parameters:
source("DBDA2E-utilities.R") # For function diagMCMC()
parameterNames = varnames(codaSamples) # from coda package
for ( parName in parameterNames ) {
diagMCMC( codaObject=codaSamples , parName=parName )
}
############################### Step 7: Make Graphs
# ...

Resources