Q-learning R has length zero - r
I am trying to inplement a simulation of a simplified blackjack game that will return the best policy at each state s.
The blackjack simulation seems to work properly, but i somehow get an error when trying to apply the Q learning algorithm to reach the optimal policy.
Here's my code, i believe it's well documented, error is in the Q-learning block, starting at ~line 170, it is also reproducible :
#Application reinforcement learning for black jack. We will suppose here that the croupier only has 1 pack of cards
#Initial tabs
packinit = c(rep(1,4), rep(2,4),rep(3,4),rep(4,4),rep(5,4),rep(6,4),rep(7,4),rep(8,4),
rep(9,4),rep(10,16))
#In our game and for simplicifaction of the problem, aces will always count as 1. Other figures are worth 10.
#If both player and croupier have same score, then player looses.
#Croupier will draw cards until he has 17 or more.
handPinit = NULL # will contain hand of player
handCinit = NULL # will contain hand of the croupier
list = list(handPinit, handCinit, packinit)
# Methods ####################################################################################
##############################################################################################
#Random integer, returns an integer to choose card
randInt = function(pack){
int = runif(1) * length(pack)
int = int+1
int = round(int, 0)
return(int)
}
#Picks a card, asimResults it to the desired hand and deletes it from the package.
pickC = function(hand, pack){
int = randInt(pack)
hand = c(hand, pack[int])
pack = pack[-int]
return(list(hand, pack))
}
score = function(handC){
return(sum(handC, na.rm = T))
}
printWinner = function(resultList){
res = resultList[[4]]
p = res[1]
c = res[2]
if((p > c && p <= 21) || (p <= 21 && c > 21)){
cat("Player has won with ", p, ", croupier has ", c, ".\n", sep = "")
}else{
cat("Player has lost with ", p, ", croupier has ", c, ".\n", sep = "")
}
}
#Black jack sim :
simulation = function(handP, handC, pack){
#Matrix to stock choice and next state, 1st is state, 2nd is choice, 3rd is reward, 4th is start state
cs = NULL
#pick first card
temp = NULL
temp = pickC(handP, pack)
handP = temp[[1]]
pack = temp[[2]]
temp = pickC(handC, pack)
handC = temp[[1]]
pack = temp[[2]]
#stock result
cs = rbind(cs, c(score(handP), 1, 0.1, 0))
#pick second card
temp = pickC(handP, pack)
handP = temp[[1]]
pack = temp[[2]]
temp = pickC(handC, pack)
handC = temp[[1]]
pack = temp[[2]]
#stock result
cs = rbind(cs, c(score(handP), 1, 0.1, cs[length(cs[,1]), 1]))
#reward stock final
reward = NULL
#to change with algo decision
while(score(handC) < 17){
#rand number to choose action, 1 = draw
rand = round(2*runif(1),0)
#if a = 1, draw a card
if(rand == 1 && score(handP) < 21){
temp = pickC(handP, pack)
handP = temp[[1]]
pack = temp[[2]]
cs = rbind(cs, c(score(handP), 1, 0.1, cs[length(cs[,1]), 1] ))
}else{
cs = rbind(cs, c(score(handP), 0, 0.1, cs[length(cs[,1]), 1]))
}
#if croupier < 17, he draws a card
if(score(handC) < 17){
temp = pickC(handC, pack)
handC = temp[[1]]
pack = temp[[2]]
}
}
#get scores
scores = c(score(handP), score(handC))
resultList = list(handP, handC, pack, scores)
#get reward
res = resultList[[4]]
p = res[1]
c = res[2]
if((p > c && p <= 21) || (p <= 21 && c > 21)){
reward = 100
}else{
reward = -50
}
#AsimResults reward as the reward of the last line of cs
cs[length(cs[,1]), 3] = reward
#return full list
resultList = list(handP, handC, pack, scores, cs)
return(resultList)
}
#Function for simulation, outputs tab containins states, actions and choices
simRand = function(k){
resultsRand = NULL
for(i in 1:k){
#init pack and hands
pack = c(rep(1,4), rep(2,4),rep(3,4),rep(4,4),rep(5,4),rep(6,4),rep(7,4),rep(8,4),
rep(9,4),rep(10,16))
handC = NULL
handP = NULL
#simulation k
res = simulation(handP, handC, pack)
resultsRand = rbind(resultsRand, res[[5]])
#resets for next iteration
pack = c(rep(1,4), rep(2,4),rep(3,4),rep(4,4),rep(5,4),rep(6,4),rep(7,4),rep(8,4),
rep(9,4),rep(10,16))
handC = NULL
handP = NULL
}
return(resultsRand)
}
#test
for(i in 1:10){
results = simulation(handPinit, handCinit, packinit)
printWinner(results)
}
#used to max the Qvalue decision
getRowMax = function(tab){
temp = tab[1]
for(i in 2:length(tab)){
if(tab[i] > temp){
temp = tab[i]
}
}
}
#####################################################################
#Q-learning
#####################################################################
#Represent sets of Q(s, a)
Qvalues = matrix(1, nrow = 30, ncol = 2)
simResults = simRand(1000)
#Hyperparameters
alpha = 0.9
discount = 0.1
#for all rows simulated, update qvalues.
for(i in 1:length(simResults[,1])){
st = simResults[i, 4] #st
a = simResults[i, 2] #a
stPlusOne = simResults[i, 1] #st+1
Qvalues[st, a] = Qvalues[st, a] + alpha * ( simResults[i,3] * discount * getRowMax(Qvalues[stPlusOne, ]) - Qvalues[st, a] )
}
As LucyMLi points out:
First you need to add return(temp) object to the getRowMax function.
But there is another issue with your simulation, because some of the
values in simResults[, 1] are 0, which means Qvalues[stPlusOne, ] will
be empty and thus you can't compute getRowMax().
Related
How to store list values in to matrix
set.seed(650) library(maxLik) y = c(rnorm(15,1,1), rnorm(15,3,1)) dat = data.frame(y) B = 3 # number bootstrap sample n = length(dat$y) n1 = 15 boot.samples = matrix(sample(dat$y, size = B * n, replace = TRUE), n, B) ml = list() boot.l = 0 va.l = NULL for (j in 1:B) { boot.l = boot.samples[, j] for (i in 1:n) { LLl <- function(param) { mul <- param[1] sigmal <- param[2] sum(log(dnorm(dat[1:i, ], mul, sigmal))) } ml[[i]] = coef(maxLik(logLik = LLl, start = c(mul = 1, sigmal = 1))) } va.l = matrix(unlist(ml), n-1, B*2, byrow = TRUE) } va.l The following are my output However, when I print the list I have the following output. My question is how can I have mul estimates for j=1 in the 1st column, sigmal estimates for j = 1 in the second column and mul estimates for j=2 in the 3rd column, sigmal estimates for j = 2 in the 4th column and so on? Are there any other way do this? Thank you for your help.
Window function / For loop in R
I have a database of matches with players and players'scores for each game. I am trying to create a rating variable for my prediction model. I am using formula from a blogpost. Here is the dummy dataset: df = data.frame( matchid = c(1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4), playerid = c(2,3,4,5,6,7,8,9,10,11,5,2,3,4,6,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,17,19,21,18,20,22,26,24,25,23), point = c(52,38,34,33,16,19,16,8,10,2,38,37,31,34,21,18,18,13,9,-2,45,34,37,39,12,9,7,-3,-1,-8,47,38,31,17,26,32,28,17,16,9)) Here is my attempt using for loop. The for loop run extremely slow for 30000 games database. Please give me some pointers on how to improve this process / loop. I really have no idea. ## Initialize initial rating for each player players_ratings = data.frame(playerid = unique(df$playerid),rating = 1000, stringsAsFactors = FALSE) ## Initialize unique matches unique_matches = df$matchid %>% unique ## Matches with rating relative_rating_matches = list(length(df)) ### GENERATE RATING for(index in 1:length(unique_matches)){ match = df %>% filter(matchid == unique_matches[[index]]) position = index ## UPDATE RATING match = match %>% left_join(players_ratings,by = 'playerid') relative_rating_matches[[position]] = match print(match) ## BUILD ACTUAL RESULTS MATRIX S = matrix(nrow = 10, ncol = 10) rownames(S) = match$playerid colnames(S) = match$playerid for(i in 1:nrow(S)) { for(j in 1:ncol(S)) { player_row_point = as.numeric(match %>% filter(playerid == rownames(S)[i]) %>% select(point)) player_col_point = as.numeric(match %>% filter(playerid == colnames(S)[j]) %>% select(point)) S[i,j] = ifelse(player_col_point == player_row_point,0.5, ifelse(player_col_point > player_row_point,1,0)) } } diag(S)= 0 print(S) ## BUILD EXPECTED WIN/LOSS MATRIX E = matrix(nrow = 10, ncol = 10) rownames(E) = match$playerid colnames(E) = match$playerid for(i in 1:nrow(E)) { for(j in 1:ncol(E)) { player_row_rating = as.numeric(match %>% filter(playerid == rownames(E)[i]) %>% select(rating)) player_col_rating = as.numeric(match %>% filter(playerid == colnames(E)[j]) %>% select(rating)) r = 1 + 10^((player_row_rating - player_col_rating)/400) expected_result = 1/r E[i,j] = expected_result } } diag(E) = 0 print(E) ## GENERATE INCREMENTAL RATING R = 20 * (S-E) R = as.data.frame(colSums(R)) %>% rownames_to_column() print(R) ## UPDATE EXISTING RATING DATABASE for(i in 1:nrow(R)){ player_id = R[i,1] incre_rating = ifelse(is.na(R[i,2]),0,R[i,2]) cur_rating = players_ratings[players_ratings$playerid == player_id,2] players_ratings[players_ratings$playerid == player_id,2] = cur_rating + incre_rating } }
R : Changing values of variables after certain time
the question I am trying to ask is how to I change one of the values of my variables (noted as LO$M in my list) after I pass a certain time. The thing I am trying to achieve is that after 20,000 seconds passing I would like to change my value of Lac to the value of Lac at time 20,0000 +10,000 So at t = 20,000, Lac = Lac + 10,000 The issue I am having with my code is that within my if command I have if tt>= 20000, but this leads to the issue that every value of Lac after 20,000 being increased by 10,000 when what i want is that the FIRST value after 20,000 be increased by 10,000. Basically, after 20,000 of my experiment passing I am trying to inject 10,000 more Lac into the experiment. My code is given below: LO = list() LO$M = c(i = 1, ri = 0, I = 50, Lac = 20, ILac = 0, o = 1, Io = 0, RNAP = 100, RNAPo = 0, r = 0, z = 0) LO$Pre = matrix(c(1,0,0,0,0,0,0,0,0,0,0, 0,1,0,0,0,0,0,0,0,0,0, 0,0,1,1,0,0,0,0,0,0,0, 0,0,0,0,1,0,0,0,0,0,0, 0,0,1,0,0,1,0,0,0,0,0, 0,0,0,0,0,0,1,0,0,0,0, 0,0,0,0,0,1,0,1,0,0,0, 0,0,0,0,0,0,0,0,1,0,0, 0,0,0,0,0,0,0,0,1,0,0, 0,0,0,0,0,0,0,0,0,1,0, 0,0,0,1,0,0,0,0,0,0,1, 0,1,0,0,0,0,0,0,0,0,0, 0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,1,0, 0,0,0,0,0,0,0,0,0,0,1), ncol=11, byrow=TRUE) LO$Post = matrix(c(1,1,0,0,0,0,0,0,0,0,0, 0,1,1,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,0,0,0,0,0, 0,0,1,1,0,0,0,0,0,0,0, 0,0,0,0,0,0,1,0,0,0,0, 0,0,1,0,0,1,0,0,0,0,0, 0,0,0,0,0,0,0,0,1,0,0, 0,0,0,0,0,1,0,1,0,0,0, 0,0,0,0,0,1,0,1,0,1,0, 0,0,0,0,0,0,0,0,0,1,1, 0,0,0,0,0,0,0,0,0,0,1, 0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0, 0,0,0,1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0), ncol=11, byrow=TRUE) LO$h = function(x,t,th=c(0.02,0.1,0.005,0.1,1,0.01,0.1,0.01,0.03,0.1,1e-05,0.01,0.002,0.01,0.001)) { with(as.list(c(x, th)), { return(c(th[1]*i, th[2]*ri, th[3]*I*Lac, th[4]*ILac, th[5]*I*o, th[6]*Io, th[7]*o*RNAP, th[8]*RNAPo, th[9]*RNAPo, th[10]*r, th[11]*Lac*z, th[12]*ri, th[13]*I, th[13]*ILac, th[14]*r, th[15]*z)) }) } gillespie1 = function (N, n, ...) { tt = 0 x = N$M S = t(N$Post - N$Pre) u = nrow(S) v = ncol(S) tvec = vector("numeric", n) xmat = matrix(ncol = u, nrow = n + 1) xmat[1, ] = x for (i in 1:n) { h = N$h(x, tt, ...) tt = tt + rexp(1, sum(h)) j = sample(v, 1, prob = h) x = x + S[, j] tvec[i] = tt xmat[i + 1, ] = x if( tt >=20000){ x[4] = x[4] +10000 } } return(list(t = tvec, x = xmat)) } newout = gillespie1(LO,200000) matplot(newout$x[,4], type="l", lwd=0.25, col="grey") I don't have a high enough reputation to attach images, but it should look something like this: https://gyazo.com/0ffd940a22df23b2ccfdf4a17e85dca8 Sorry if this isn't clear. Thanks
In this example, you have the function myTask(). When you call execMyTask(), you will execute myTask()once, and after that, you will execute it at random intervals between 1 to max_wait milliseconds. When you get tired, you can kill the task with tclTaskDelete(). library(tcltk2) myTask <- function() cat("some task!\n") id = "execMyTask" execMyTask <- function(max_wait = 3000) { id <- toString(match.call()[[1]]) myTask() wait = sample(1:max_wait, 1) cat("Waiting", wait, "miliseconds\n") # replace with your function if (is.null(tclTaskGet(id))) { tclTaskSchedule(wait=wait, execMyTask(), id=id, redo = TRUE) } else { tclTaskChange(wait=wait, execMyTask(), id=id, redo = TRUE) } } execMyTask() tclTaskDelete(id) So far, there is a little problem with this approach, because we can not supply arguments to the function fun in tclTaskChange().
How to manually build predictions from xgboost model
I am trying to figure out how to generate probabilities from the xgboost model tree so they match what I would get from the predict function. First I build the model library(xgboost) #install.packages("ModelMetrics") library(ModelMetrics) set.seed(100) # - Extreme gbm y = as.integer(testDF$y) x = testDF[,-which(names(testDF) %in% c('y'))] var.names <- names(x) x = as.matrix(x) x = matrix(as.numeric(x),nrow(x),ncol(x)) nround = 10 XX <- xgboost(param=param, data = x, label = y, nrounds=nround, missing = NA) Then I wrote some code to build all of the rules that would result in a particular leaf baseTree <- xgb.model.dt.tree(model = XX) Leafs <- filter(baseTree, Feature == 'Leaf') Branches <- filter(baseTree, Feature != 'Leaf') Branches$Feature = var.names[as.numeric(Branches$Feature) + 1] FullRules = rep(NA, nrow(Leafs)) AllRules <- foreach(i = 1:nrow(Leafs), .combine = 'rbind') %do% { theLeaf = Leafs[i,] theNode = theLeaf$Node theID = theLeaf$ID count = 1 RuleText = '' while(theNode != 0){ FF <- filter(Branches, Yes == theID | No == theID | Missing == theID) isYes = FF$Yes == theID isNo = FF$No == theID isMissing = FF$Missing == theID FullRules[i] = ifelse(isYes & isMissing , paste0("(", FF$Feature, " < ", FF$Split, " | is.na(", FF$Feature, "))") , NA) FullRules[i] = ifelse(isNo & isMissing , paste0("(", FF$Feature, " >= ", FF$Split, " | is.na(", FF$Feature, "))") , FullRules[i]) FullRules[i] = ifelse(isYes & !isMissing , paste0(FF$Feature, " < ", FF$Split) , FullRules[i]) FullRules[i] = ifelse(isNo & !isMissing , paste0(FF$Feature, " >= ", FF$Split) , FullRules[i]) FullRules[i] = ifelse(isMissing & !isYes & !isNo , paste0("is.na(", FF$Feature, ")") , FullRules[i]) if(count == 1){ RuleText = FullRules[i] } else{ RuleText = paste0(RuleText, " & ", FullRules[i]) } theNode = FF$Node theID = FF$ID count = count + 1 } data.frame( Leafs[i,] ,RuleText ) } Now I pick out 1 row and attempted to match the probabilities. In this case it matches. The loop will go through and indicate TRUE for all of the rules that are met for this particular customer. Then I can filter down to those rows and sum those up to get the logodds estimates. Then I convert those to probabilities. TT <- testDF[25,] ff <- foreach(i = 1:nrow(AllRules), .combine = 'rbind') %do% { TT %>% transmute_( Tree = as.character(AllRules$RuleText[i]) , Quality = AllRules$Quality[i]) } predict(XX, as.matrix(TT[,var.names])) #[1] 0.05571342 filter(ff, Tree) %>% summarise( Q1 = sum(sqrt(Quality^2)) # ,Q2 = sum(sqrt(Quality^2)) , Prob1 = exp(Q1)/(1+exp(Q1)) , Prob2 = 1-Prob1 ) # Q1 Prob1 Prob2 #1 2.830209 0.9442866 0.0557134 But in this case it does not match the predict function... TT <- testDF[17,] ff <- foreach(i = 1:nrow(AllRules), .combine = 'rbind') %do% { TT %>% transmute_( Tree = as.character(AllRules$RuleText[i]) , Quality = AllRules$Quality[i]) } predict(XX, as.matrix(TT[,var.names])) #[1] 0.1386877 filter(ff, Tree) %>% summarise( Q1 = sum(sqrt(Quality^2)) # ,Q2 = sum(sqrt(Quality^2)) , Prob1 = exp(Q1)/(1+exp(Q1)) , Prob2 = 1-Prob1 ) # Q1 Prob1 Prob2 #1 1.967608 0.877354 0.122646
To generate the prediction you just need to sum up the values of the individual leafs that the person falls within for each booster filter(ff, Tree) %>% summarise( Q1 = sum(Quality) , Prob1 = exp(Q1)/(1+exp(Q1)) , Prob2 = 1-Prob1 )
Replacement has length zero: can't find the issue with my loop
I'm trying to modify some code from a chapter of Quantitative Trading with R to work with returns instead of raw prices. Everythings to be going okay with the exception of the "PROFIT AND LOSS" section of my code. It keeps returning "Error in qty_x[i] = (vec[i] + prev_x_qty) : replacement has length zero" When looking at my variables I can't seem to find any problems. I've included the code for reproduction. # LOAD LIBRARIES library(quantmod) library(xts) # FUNCTIONS # ROLLING BETA pcbeta = function(dF){ r = prcomp( ~ dF$x[-1] + dF$y[-1]) return(r$rotation[2, 1] / r$rotation[1,1]) } rolling_beta = function(z, width){ rollapply(z, width = width, FUN = pcbeta, by.column = FALSE, align = 'right') } # GET TICKER DATA SPY = getSymbols('SPY', adjust=T, auto.assign=FALSE) AAPL = getSymbols('AAPL', adjust=T, auto.assign=FALSE) # IN-SAMPLE DATE RANGE in_start_date = '2011-01-01' in_end_date = '2011-12-31' in_range = paste(in_start_date, '::', in_end_date, sep='') # RETRIEVE IN-SAMPLE DATA x_in = SPY[in_range, 6] y_in = AAPL[in_range, 6] dF_in = cbind(x_in, y_in) names(dF_in) = c('x','y') # OUT-OF-SAMPLE DATE RANGE out_start_date= '2012-01-01' out_end_date = '2012-12-31' out_range = paste(out_start_date, '::', out_end_date, sep='') # RETRIEVE OUT-OF-SAMPLE DATA x_out = SPY[out_range, 6] y_out = AAPL[out_range, 6] dF_out = cbind(x_out, y_out) names(dF_out) = c('x', 'y') # CALCULATE RETURNS (IN AND OUT OF SAMPLE) returns_in = diff(dF_in) / dF_in returns_out = diff(dF_out) / dF_out # DEFINE ROLLING WINDOW LENGTH window_length = 10 # FIND BETAS betas_in = rolling_beta(returns_in, window_length) betas_out = rolling_beta(returns_out, window_length) # FIND SPREADS spreadR_in = returns_in$y - betas_in * returns_in$x spreadR_out = returns_out$y - betas_out * returns_out$x names(spreadR_in) = c('spread') names(spreadR_out) = c('spread') # FIND THRESHOLD threshold = sd(spreadR_in, na.rm=TRUE) # FORM DATA SETS data_in = merge(returns_in, betas_in, spreadR_in) data_out = merge(x_out, y_out, returns_out, betas_out, spreadR_out) names(data_out) = c('xp', 'yp', 'x', 'y', 'betas_out', 'spread') data_in = data_in[-1] data_out = data_out[-1] # GENERATE BUY AND SELL SIGNALS FOR OUT OF SAMPLE buys = ifelse(data_out$spread > threshold, 1, 0) sells = ifelse(data_out$spread < -threshold, -1, 0) data_out$signal = buys+sells # PROFIT AND LOSS prev_x_qty = 0 position = 0 trade_size = 100 signal = as.numeric(data_out$signal) signal[is.na(signal)] = 0 beta = as.numeric(data_out$betas_out) ratio = (data_out$yp/data_out$xp) vec = round(beta*trade_size*ratio) qty_x = rep(0, length(signal)) qty_y = rep(0, length(signal)) for(i in 1:length(signal)){ if(signal[i] == 1 && position == 0){ #buy the spread prev_x_qty = vec[i] qty_x[i] = -prev_x_qty qty_y[i] = trade_size position = 1 } if(signal[i] == -1 && position == 0){ #buy the spread prev_x_qty = vec[i] qty_x[i] = prev_x_qty qty_y[i] = -trade_size position = -1 } if(signal[i] == 1 && position == -1){ # we are short the spread and need to buy qty_x[i] = -(vec[i] + prev_x_qty) prev_x_qty = vec[i] qty_y[i] = 2 * trade_size position = 1 } if(signal[i] == -1 && position == 1){ # we are short the spread and need to buy qty_x[i] = (vec[i] + prev_x_qty) prev_x_qty = vec[i] qty_y[i] = -2 * trade_size position = -1 } }