Q-learning R has length zero

Q-learning R has length zero - r

I am trying to inplement a simulation of a simplified blackjack game that will return the best policy at each state s.
The blackjack simulation seems to work properly, but i somehow get an error when trying to apply the Q learning algorithm to reach the optimal policy.
Here's my code, i believe it's well documented, error is in the Q-learning block, starting at ~line 170, it is also reproducible :
#Application reinforcement learning for black jack. We will suppose here that the croupier only has 1 pack of cards
#Initial tabs
packinit = c(rep(1,4), rep(2,4),rep(3,4),rep(4,4),rep(5,4),rep(6,4),rep(7,4),rep(8,4),
rep(9,4),rep(10,16))
#In our game and for simplicifaction of the problem, aces will always count as 1. Other figures are worth 10.
#If both player and croupier have same score, then player looses.
#Croupier will draw cards until he has 17 or more.
handPinit = NULL # will contain hand of player
handCinit = NULL # will contain hand of the croupier
list = list(handPinit, handCinit, packinit)
# Methods ####################################################################################
##############################################################################################
#Random integer, returns an integer to choose card
randInt = function(pack){
int = runif(1) * length(pack)
int = int+1
int = round(int, 0)
return(int)
}
#Picks a card, asimResults it to the desired hand and deletes it from the package.
pickC = function(hand, pack){
int = randInt(pack)
hand = c(hand, pack[int])
pack = pack[-int]
return(list(hand, pack))
}
score = function(handC){
return(sum(handC, na.rm = T))
}
printWinner = function(resultList){
res = resultList[[4]]
p = res[1]
c = res[2]
if((p > c && p <= 21) || (p <= 21 && c > 21)){
cat("Player has won with ", p, ", croupier has ", c, ".\n", sep = "")
}else{
cat("Player has lost with ", p, ", croupier has ", c, ".\n", sep = "")
}
}
#Black jack sim :
simulation = function(handP, handC, pack){
#Matrix to stock choice and next state, 1st is state, 2nd is choice, 3rd is reward, 4th is start state
cs = NULL
#pick first card
temp = NULL
temp = pickC(handP, pack)
handP = temp[[1]]
pack = temp[[2]]
temp = pickC(handC, pack)
handC = temp[[1]]
pack = temp[[2]]
#stock result
cs = rbind(cs, c(score(handP), 1, 0.1, 0))
#pick second card
temp = pickC(handP, pack)
handP = temp[[1]]
pack = temp[[2]]
temp = pickC(handC, pack)
handC = temp[[1]]
pack = temp[[2]]
#stock result
cs = rbind(cs, c(score(handP), 1, 0.1, cs[length(cs[,1]), 1]))
#reward stock final
reward = NULL
#to change with algo decision
while(score(handC) < 17){
#rand number to choose action, 1 = draw
rand = round(2*runif(1),0)
#if a = 1, draw a card
if(rand == 1 && score(handP) < 21){
temp = pickC(handP, pack)
handP = temp[[1]]
pack = temp[[2]]
cs = rbind(cs, c(score(handP), 1, 0.1, cs[length(cs[,1]), 1] ))
}else{
cs = rbind(cs, c(score(handP), 0, 0.1, cs[length(cs[,1]), 1]))
}
#if croupier < 17, he draws a card
if(score(handC) < 17){
temp = pickC(handC, pack)
handC = temp[[1]]
pack = temp[[2]]
}
}
#get scores
scores = c(score(handP), score(handC))
resultList = list(handP, handC, pack, scores)
#get reward
res = resultList[[4]]
p = res[1]
c = res[2]
if((p > c && p <= 21) || (p <= 21 && c > 21)){
reward = 100
}else{
reward = -50
}
#AsimResults reward as the reward of the last line of cs
cs[length(cs[,1]), 3] = reward
#return full list
resultList = list(handP, handC, pack, scores, cs)
return(resultList)
}
#Function for simulation, outputs tab containins states, actions and choices
simRand = function(k){
resultsRand = NULL
for(i in 1:k){
#init pack and hands
pack = c(rep(1,4), rep(2,4),rep(3,4),rep(4,4),rep(5,4),rep(6,4),rep(7,4),rep(8,4),
rep(9,4),rep(10,16))
handC = NULL
handP = NULL
#simulation k
res = simulation(handP, handC, pack)
resultsRand = rbind(resultsRand, res[[5]])
#resets for next iteration
pack = c(rep(1,4), rep(2,4),rep(3,4),rep(4,4),rep(5,4),rep(6,4),rep(7,4),rep(8,4),
rep(9,4),rep(10,16))
handC = NULL
handP = NULL
}
return(resultsRand)
}
#test
for(i in 1:10){
results = simulation(handPinit, handCinit, packinit)
printWinner(results)
}
#used to max the Qvalue decision
getRowMax = function(tab){
temp = tab[1]
for(i in 2:length(tab)){
if(tab[i] > temp){
temp = tab[i]
}
}
}
#####################################################################
#Q-learning
#####################################################################
#Represent sets of Q(s, a)
Qvalues = matrix(1, nrow = 30, ncol = 2)
simResults = simRand(1000)
#Hyperparameters
alpha = 0.9
discount = 0.1
#for all rows simulated, update qvalues.
for(i in 1:length(simResults[,1])){
st = simResults[i, 4] #st
a = simResults[i, 2] #a
stPlusOne = simResults[i, 1] #st+1
Qvalues[st, a] = Qvalues[st, a] + alpha * ( simResults[i,3] * discount * getRowMax(Qvalues[stPlusOne, ]) - Qvalues[st, a] )
}

As LucyMLi points out:
First you need to add return(temp) object to the getRowMax function.
But there is another issue with your simulation, because some of the
values in simResults[, 1] are 0, which means Qvalues[stPlusOne, ] will
be empty and thus you can't compute getRowMax().

Related

How to store list values in to matrix

set.seed(650)
library(maxLik)
y = c(rnorm(15,1,1), rnorm(15,3,1))
dat = data.frame(y)
B = 3 # number bootstrap sample
n = length(dat$y)
n1 = 15
boot.samples = matrix(sample(dat$y, size = B * n, replace = TRUE), n, B)
ml = list()
boot.l = 0
va.l = NULL
for (j in 1:B) {
boot.l = boot.samples[, j]
for (i in 1:n) {
LLl <- function(param) {
mul <- param[1]
sigmal <- param[2]
sum(log(dnorm(dat[1:i, ], mul, sigmal)))
}
ml[[i]] = coef(maxLik(logLik = LLl, start = c(mul = 1, sigmal = 1)))
}
va.l = matrix(unlist(ml), n-1, B*2, byrow = TRUE)
}
va.l
The following are my output
However, when I print the list I have the following output.
My question is how can I have mul estimates for j=1 in the 1st column, sigmal estimates for j = 1 in the second column and mul estimates for j=2 in the 3rd column, sigmal estimates for j = 2 in the 4th column and so on?
Are there any other way do this? Thank you for your help.

Window function / For loop in R

I have a database of matches with players and players'scores for each game. I am trying to create a rating variable for my prediction model. I am using formula from a blogpost.
Here is the dummy dataset:
df = data.frame(
matchid = c(1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4),
playerid = c(2,3,4,5,6,7,8,9,10,11,5,2,3,4,6,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,17,19,21,18,20,22,26,24,25,23),
point = c(52,38,34,33,16,19,16,8,10,2,38,37,31,34,21,18,18,13,9,-2,45,34,37,39,12,9,7,-3,-1,-8,47,38,31,17,26,32,28,17,16,9))
Here is my attempt using for loop. The for loop run extremely slow for 30000 games database. Please give me some pointers on how to improve this process / loop. I really have no idea.
## Initialize initial rating for each player
players_ratings = data.frame(playerid = unique(df$playerid),rating = 1000, stringsAsFactors = FALSE)
## Initialize unique matches
unique_matches = df$matchid %>% unique
## Matches with rating
relative_rating_matches = list(length(df))
### GENERATE RATING
for(index in 1:length(unique_matches)){
match = df %>% filter(matchid == unique_matches[[index]])
position = index
## UPDATE RATING
match = match %>% left_join(players_ratings,by = 'playerid')
relative_rating_matches[[position]] = match
print(match)
## BUILD ACTUAL RESULTS MATRIX
S = matrix(nrow = 10, ncol = 10)
rownames(S) = match$playerid
colnames(S) = match$playerid
for(i in 1:nrow(S)) {
for(j in 1:ncol(S)) {
player_row_point = as.numeric(match %>% filter(playerid == rownames(S)[i]) %>% select(point))
player_col_point = as.numeric(match %>% filter(playerid == colnames(S)[j]) %>% select(point))
S[i,j] = ifelse(player_col_point == player_row_point,0.5,
ifelse(player_col_point > player_row_point,1,0))
}
}
diag(S)= 0
print(S)
## BUILD EXPECTED WIN/LOSS MATRIX
E = matrix(nrow = 10, ncol = 10)
rownames(E) = match$playerid
colnames(E) = match$playerid
for(i in 1:nrow(E)) {
for(j in 1:ncol(E)) {
player_row_rating = as.numeric(match %>% filter(playerid == rownames(E)[i]) %>% select(rating))
player_col_rating = as.numeric(match %>% filter(playerid == colnames(E)[j]) %>% select(rating))
r = 1 + 10^((player_row_rating - player_col_rating)/400)
expected_result = 1/r
E[i,j] = expected_result
}
}
diag(E) = 0
print(E)
## GENERATE INCREMENTAL RATING
R = 20 * (S-E)
R = as.data.frame(colSums(R)) %>% rownames_to_column()
print(R)
## UPDATE EXISTING RATING DATABASE
for(i in 1:nrow(R)){
player_id = R[i,1]
incre_rating = ifelse(is.na(R[i,2]),0,R[i,2])
cur_rating = players_ratings[players_ratings$playerid == player_id,2]
players_ratings[players_ratings$playerid == player_id,2] = cur_rating + incre_rating
}
}

R : Changing values of variables after certain time

the question I am trying to ask is how to I change one of the values of my variables (noted as LO$M in my list) after I pass a certain time.
The thing I am trying to achieve is that after 20,000 seconds passing I would like to change my value of Lac to the value of Lac at time 20,0000 +10,000
So at t = 20,000, Lac = Lac + 10,000
The issue I am having with my code is that within my if command I have if tt>= 20000, but this leads to the issue that every value of Lac after 20,000 being increased by 10,000 when what i want is that the FIRST value after 20,000 be increased by 10,000.
Basically, after 20,000 of my experiment passing I am trying to inject 10,000 more Lac into the experiment.
My code is given below:
LO = list()
LO$M = c(i = 1, ri = 0, I = 50, Lac = 20, ILac = 0, o = 1, Io = 0, RNAP = 100, RNAPo = 0, r = 0, z = 0)
LO$Pre = matrix(c(1,0,0,0,0,0,0,0,0,0,0,
0,1,0,0,0,0,0,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,0,
0,0,0,0,1,0,0,0,0,0,0,
0,0,1,0,0,1,0,0,0,0,0,
0,0,0,0,0,0,1,0,0,0,0,
0,0,0,0,0,1,0,1,0,0,0,
0,0,0,0,0,0,0,0,1,0,0,
0,0,0,0,0,0,0,0,1,0,0,
0,0,0,0,0,0,0,0,0,1,0,
0,0,0,1,0,0,0,0,0,0,1,
0,1,0,0,0,0,0,0,0,0,0,
0,0,1,0,0,0,0,0,0,0,0,
0,0,0,0,1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,1,0,
0,0,0,0,0,0,0,0,0,0,1), ncol=11, byrow=TRUE)
LO$Post = matrix(c(1,1,0,0,0,0,0,0,0,0,0,
0,1,1,0,0,0,0,0,0,0,0,
0,0,0,0,1,0,0,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,0,
0,0,0,0,0,0,1,0,0,0,0,
0,0,1,0,0,1,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,0,0,
0,0,0,0,0,1,0,1,0,0,0,
0,0,0,0,0,1,0,1,0,1,0,
0,0,0,0,0,0,0,0,0,1,1,
0,0,0,0,0,0,0,0,0,0,1,
0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,
0,0,0,1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0), ncol=11, byrow=TRUE)
LO$h = function(x,t,th=c(0.02,0.1,0.005,0.1,1,0.01,0.1,0.01,0.03,0.1,1e-05,0.01,0.002,0.01,0.001))
{
with(as.list(c(x, th)), {
return(c(th[1]*i, th[2]*ri, th[3]*I*Lac, th[4]*ILac, th[5]*I*o, th[6]*Io, th[7]*o*RNAP,
th[8]*RNAPo, th[9]*RNAPo, th[10]*r, th[11]*Lac*z, th[12]*ri, th[13]*I,
th[13]*ILac, th[14]*r, th[15]*z))
})
}
gillespie1 = function (N, n, ...)
{
tt = 0
x = N$M
S = t(N$Post - N$Pre)
u = nrow(S)
v = ncol(S)
tvec = vector("numeric", n)
xmat = matrix(ncol = u, nrow = n + 1)
xmat[1, ] = x
for (i in 1:n) {
h = N$h(x, tt, ...)
tt = tt + rexp(1, sum(h))
j = sample(v, 1, prob = h)
x = x + S[, j]
tvec[i] = tt
xmat[i + 1, ] = x
if( tt >=20000){
x[4] = x[4] +10000
}
}
return(list(t = tvec, x = xmat))
}
newout = gillespie1(LO,200000)
matplot(newout$x[,4], type="l", lwd=0.25, col="grey")
I don't have a high enough reputation to attach images, but it should look something like this:
https://gyazo.com/0ffd940a22df23b2ccfdf4a17e85dca8
Sorry if this isn't clear. Thanks

In this example, you have the function myTask(). When you call execMyTask(), you will execute myTask()once, and after that, you will execute it at random intervals between 1 to max_wait milliseconds. When you get tired, you can kill the task with tclTaskDelete().
library(tcltk2)
myTask <- function() cat("some task!\n")
id = "execMyTask"
execMyTask <- function(max_wait = 3000) {
id <- toString(match.call()[[1]])
myTask()
wait = sample(1:max_wait, 1)
cat("Waiting", wait, "miliseconds\n") # replace with your function
if (is.null(tclTaskGet(id))) {
tclTaskSchedule(wait=wait, execMyTask(), id=id, redo = TRUE)
} else {
tclTaskChange(wait=wait, execMyTask(), id=id, redo = TRUE)
}
}
execMyTask()
tclTaskDelete(id)
So far, there is a little problem with this approach, because we can not supply arguments to the function fun in tclTaskChange().

How to manually build predictions from xgboost model

I am trying to figure out how to generate probabilities from the xgboost model tree so they match what I would get from the predict function.
First I build the model
library(xgboost)
#install.packages("ModelMetrics")
library(ModelMetrics)
set.seed(100)
# - Extreme gbm
y = as.integer(testDF$y)
x = testDF[,-which(names(testDF) %in% c('y'))]
var.names <- names(x)
x = as.matrix(x)
x = matrix(as.numeric(x),nrow(x),ncol(x))
nround = 10
XX <- xgboost(param=param, data = x, label = y, nrounds=nround, missing = NA)
Then I wrote some code to build all of the rules that would result in a particular leaf
baseTree <- xgb.model.dt.tree(model = XX)
Leafs <- filter(baseTree, Feature == 'Leaf')
Branches <- filter(baseTree, Feature != 'Leaf')
Branches$Feature = var.names[as.numeric(Branches$Feature) + 1]
FullRules = rep(NA, nrow(Leafs))
AllRules <- foreach(i = 1:nrow(Leafs), .combine = 'rbind') %do% {
theLeaf = Leafs[i,]
theNode = theLeaf$Node
theID = theLeaf$ID
count = 1
RuleText = ''
while(theNode != 0){
FF <- filter(Branches, Yes == theID | No == theID | Missing == theID)
isYes = FF$Yes == theID
isNo = FF$No == theID
isMissing = FF$Missing == theID
FullRules[i] = ifelse(isYes & isMissing
, paste0("(", FF$Feature, " < ", FF$Split, " | is.na(", FF$Feature, "))")
, NA)
FullRules[i] = ifelse(isNo & isMissing
, paste0("(", FF$Feature, " >= ", FF$Split, " | is.na(", FF$Feature, "))")
, FullRules[i])
FullRules[i] = ifelse(isYes & !isMissing
, paste0(FF$Feature, " < ", FF$Split)
, FullRules[i])
FullRules[i] = ifelse(isNo & !isMissing
, paste0(FF$Feature, " >= ", FF$Split)
, FullRules[i])
FullRules[i] = ifelse(isMissing & !isYes & !isNo
, paste0("is.na(", FF$Feature, ")")
, FullRules[i])
if(count == 1){
RuleText = FullRules[i]
} else{
RuleText = paste0(RuleText, " & ", FullRules[i])
}
theNode = FF$Node
theID = FF$ID
count = count + 1
}
data.frame(
Leafs[i,]
,RuleText
)
}
Now I pick out 1 row and attempted to match the probabilities. In this case it matches. The loop will go through and indicate TRUE for all of the rules that are met for this particular customer. Then I can filter down to those rows and sum those up to get the logodds estimates. Then I convert those to probabilities.
TT <- testDF[25,]
ff <- foreach(i = 1:nrow(AllRules), .combine = 'rbind') %do% {
TT %>% transmute_(
Tree = as.character(AllRules$RuleText[i])
, Quality = AllRules$Quality[i])
}
predict(XX, as.matrix(TT[,var.names]))
#[1] 0.05571342
filter(ff, Tree) %>%
summarise(
Q1 = sum(sqrt(Quality^2))
# ,Q2 = sum(sqrt(Quality^2))
, Prob1 = exp(Q1)/(1+exp(Q1))
, Prob2 = 1-Prob1
)
# Q1 Prob1 Prob2
#1 2.830209 0.9442866 0.0557134
But in this case it does not match the predict function...
TT <- testDF[17,]
ff <- foreach(i = 1:nrow(AllRules), .combine = 'rbind') %do% {
TT %>% transmute_(
Tree = as.character(AllRules$RuleText[i])
, Quality = AllRules$Quality[i])
}
predict(XX, as.matrix(TT[,var.names]))
#[1] 0.1386877
filter(ff, Tree) %>%
summarise(
Q1 = sum(sqrt(Quality^2))
# ,Q2 = sum(sqrt(Quality^2))
, Prob1 = exp(Q1)/(1+exp(Q1))
, Prob2 = 1-Prob1
)
# Q1 Prob1 Prob2
#1 1.967608 0.877354 0.122646

To generate the prediction you just need to sum up the values of the individual leafs that the person falls within for each booster
filter(ff, Tree) %>%
summarise(
Q1 = sum(Quality)
, Prob1 = exp(Q1)/(1+exp(Q1))
, Prob2 = 1-Prob1
)

Replacement has length zero: can't find the issue with my loop

I'm trying to modify some code from a chapter of Quantitative Trading with R to work with returns instead of raw prices. Everythings to be going okay with the exception of the "PROFIT AND LOSS" section of my code. It keeps returning "Error in qty_x[i] = (vec[i] + prev_x_qty) : replacement has length zero" When looking at my variables I can't seem to find any problems. I've included the code for reproduction.
# LOAD LIBRARIES
library(quantmod)
library(xts)
# FUNCTIONS
# ROLLING BETA
pcbeta = function(dF){
r = prcomp( ~ dF$x[-1] + dF$y[-1])
return(r$rotation[2, 1] / r$rotation[1,1])
}
rolling_beta = function(z, width){
rollapply(z, width = width, FUN = pcbeta,
by.column = FALSE, align = 'right')
}
# GET TICKER DATA
SPY = getSymbols('SPY', adjust=T, auto.assign=FALSE)
AAPL = getSymbols('AAPL', adjust=T, auto.assign=FALSE)
# IN-SAMPLE DATE RANGE
in_start_date = '2011-01-01'
in_end_date = '2011-12-31'
in_range = paste(in_start_date, '::', in_end_date, sep='')
# RETRIEVE IN-SAMPLE DATA
x_in = SPY[in_range, 6]
y_in = AAPL[in_range, 6]
dF_in = cbind(x_in, y_in)
names(dF_in) = c('x','y')
# OUT-OF-SAMPLE DATE RANGE
out_start_date= '2012-01-01'
out_end_date = '2012-12-31'
out_range = paste(out_start_date, '::', out_end_date, sep='')
# RETRIEVE OUT-OF-SAMPLE DATA
x_out = SPY[out_range, 6]
y_out = AAPL[out_range, 6]
dF_out = cbind(x_out, y_out)
names(dF_out) = c('x', 'y')
# CALCULATE RETURNS (IN AND OUT OF SAMPLE)
returns_in = diff(dF_in) / dF_in
returns_out = diff(dF_out) / dF_out
# DEFINE ROLLING WINDOW LENGTH
window_length = 10
# FIND BETAS
betas_in = rolling_beta(returns_in, window_length)
betas_out = rolling_beta(returns_out, window_length)
# FIND SPREADS
spreadR_in = returns_in$y - betas_in * returns_in$x
spreadR_out = returns_out$y - betas_out * returns_out$x
names(spreadR_in) = c('spread')
names(spreadR_out) = c('spread')
# FIND THRESHOLD
threshold = sd(spreadR_in, na.rm=TRUE)
# FORM DATA SETS
data_in = merge(returns_in, betas_in, spreadR_in)
data_out = merge(x_out, y_out, returns_out, betas_out, spreadR_out)
names(data_out) = c('xp', 'yp', 'x', 'y', 'betas_out', 'spread')
data_in = data_in[-1]
data_out = data_out[-1]
# GENERATE BUY AND SELL SIGNALS FOR OUT OF SAMPLE
buys = ifelse(data_out$spread > threshold, 1, 0)
sells = ifelse(data_out$spread < -threshold, -1, 0)
data_out$signal = buys+sells
# PROFIT AND LOSS
prev_x_qty = 0
position = 0
trade_size = 100
signal = as.numeric(data_out$signal)
signal[is.na(signal)] = 0
beta = as.numeric(data_out$betas_out)
ratio = (data_out$yp/data_out$xp)
vec = round(beta*trade_size*ratio)
qty_x = rep(0, length(signal))
qty_y = rep(0, length(signal))
for(i in 1:length(signal)){
if(signal[i] == 1 && position == 0){
#buy the spread
prev_x_qty = vec[i]
qty_x[i] = -prev_x_qty
qty_y[i] = trade_size
position = 1
}
if(signal[i] == -1 && position == 0){
#buy the spread
prev_x_qty = vec[i]
qty_x[i] = prev_x_qty
qty_y[i] = -trade_size
position = -1
}
if(signal[i] == 1 && position == -1){
# we are short the spread and need to buy
qty_x[i] = -(vec[i] + prev_x_qty)
prev_x_qty = vec[i]
qty_y[i] = 2 * trade_size
position = 1
}
if(signal[i] == -1 && position == 1){
# we are short the spread and need to buy
qty_x[i] = (vec[i] + prev_x_qty)
prev_x_qty = vec[i]
qty_y[i] = -2 * trade_size
position = -1
}
}

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Q-learning R has length zero - r

As LucyMLi points out: First you need to add return(temp) object to the getRowMax function. But there is another issue with your simulation, because some of the values in simResults[, 1] are 0, which means Qvalues[stPlusOne, ] will be empty and thus you can't compute getRowMax().

Related

How to store list values in to matrix

Window function / For loop in R

R : Changing values of variables after certain time

How to manually build predictions from xgboost model

Replacement has length zero: can't find the issue with my loop

Categories

Resources