I am receiving some values in my R process and I want to compute them asynchronously. I am using promises and future package.
This is how my current code looks like:
arr = list()
i=0
while(i < 10)
{
a = read messages from KAFKA topic
arr[[i]] = future(DoSomething(a))
i = i + 1
}
Now, arr contains a list of promises
How do I get value() of the promise that has resolved first (and so on)?
Something like promise.race in javascript.
Edit: I just re-read your question and saw that you were asking about getting the first result, not just all results. Below is the code for getting that. It's a while loop that waits for any result to be ready and then moves forward when there's a result ready.
There is also a function called promise_race in the promises package, but the issue with the promises package is that it can only output results. You can't get the value produced back into a variable for further computations in the main thread.
require(future)
plan(multiprocess)
longRunningFunction <- function(value) {
random1<- runif(min= 5 ,max = 30,n = 1)
Sys.sleep(random1)
return(value)
}
arr = list()
#changed starting number to 1 since R lists start at 1, not 0
i=1
#If the number of futures generated is more than the number of cores available, then the main thread will block until the first future completes and allows more futures to be started
while(i < 6)
{
arr[[i]] = future(longRunningFunction(i))
i = i + 1
}
while(all(!resolved(arr))){ }
raceresults_from_future<-lapply(arr[resolved(arr)], value)
print(paste("raceresults_from_future: ",raceresults_from_future) )
Related
We have a basic tryCatch that writes a dataframe to Google Sheets, and trys again if the first write fails for any reason:
result = tryCatch({
print('TRYING')
googlesheets4::sheet_write(data = our_df, ss = our_spreadsheet, sheet = 'our_sheetname')
}, error = function(e) {
print('ERROR, TRYING AGAIN')
googlesheets4::sheet_write(data = our_df, ss = our_spreadsheet, sheet = 'our_sheetname')
})
It is possible to generalize this code to retry the googlesheets4::sheet_write() function call for N number of tries? Is something built into base R for this or is there a good R library that handles unlimited retries of a function?
You can put it in a for loop like this.
First, I am going to define a function that often fails (as I don't have access to your Google sheet).
russian_roulette <- function(n = 6) {
revolver <- sample(1:n, 1)
if (revolver == 1) {
return("You lived")
} else {
stop("Better luck next time...")
}
}
Then you can try it as many times as you consider reasonable. You can replace my call to russian_roulette() with your call to googlesheets4::sheet_write().
NUM_TRIES <- 10
for (i in 1:NUM_TRIES) {
message(i)
result <- try({
russian_roulette()
})
if (class(result) != "try-error") {
print("Success!")
break
}
}
Output:
1
Error in russian_roulette() : Better luck next time...
2
Error in russian_roulette() : Better luck next time...
3
Error in russian_roulette() : Better luck next time...
4
Error in russian_roulette() : Better luck next time...
5
Error in russian_roulette() : Better luck next time...
6
[1] "Success!"
result
# [1] "You lived"
I don't know why you expect writing to a file to fail - depending on the reason you may want to add a Sys.sleep() call in there for a certain number of seconds after every failure.
I'd like to run heavy computations in Julia for a fixed duration, for example 10 seconds. I tried this:
timer = Timer(10.0)
while isopen(timer)
computation()
end
But this does not work, since the computations never let Julia's task scheduler take control. So I added yield() in the loop:
timer = Timer(10.0)
while isopen(timer)
yield()
computation()
end
But now there is significant overhead from calling yield(), especially when one call to computation() is short. I guess I could call yield() and isopen() only every 1000 iterations or so, but I would prefer a solution where I would not have to tweak the number of iterations every time I change the computations. Any ideas?
This pattern below uses threads and on my laptop has a latency of around 35ms for each 1,000,000 calls which is more than acceptable for any job.
Tested on Julia 1.5 release candidate:
function should_stop(timeout=10)
handle = Threads.Atomic{Bool}(false)
mytask = Threads.#spawn begin
sleep(timeout)
Threads.atomic_or!(handle, true)
end
handle
end
function do_some_job_with_timeout()
handle = should_stop(5)
res = BigInt() # save results to some object
mytask = Threads.#spawn begin
for i in 1:10_000_000
#TODO some complex computations here
res += 1 # mutate the result object
handle.value && break
end
end
wait(mytask) # wait for the job to complete
res
end
You can also used Distributed instead. The code below seems to have a much better latency - only about 1ms for each 1,000,000 timeout checks.
using Distributed
using SharedArrays
addprocs(1)
function get_termination_handle(timeout=5,workerid::Int=workers()[end])::SharedArray{Bool}
handle = SharedArray{Bool}([false])
proc = #spawnat workerid begin
sleep(timeout)
handle[1]=true
end
handle
end
function fun_within_timeout()
res = 0
h = get_termination_handle(0.1)
for i = 1:100_000_000
res += i % 2 == 0 ? 1 : 0
h[1] && break
end
res
end
Is it possible to write a function in R which will hold its execution, giving the users control over the console (while in interactive mode of course), meanwhile recording their inputs, and continuing execution either:
after a certain input has been made
or after a certain output has been made
or a certain duration of time has passed
Example: ask the user a question (without using readline() for the answer)
question <- function() {
message("How much is 2 + 2?")
#let users take control of the console
#continue to next statement only if they input "2+2", or "4" or a minute has passed
#meanwhile record their last input similar to ".Last.Value", e.g.:
startTime <- Sys.time()
timeout <- FALSE
lastInput <- lastInput()
while (eval(parse(text = lastInput)) != 4 & !timeout) {
if (difftime(Sys.time(), startTime, units = "mins") > 1) {
timeout <- TRUE
}
lastInput <- lastInput()
}
if (timeout) {
stop("Sorry, timeout.")
} else {
message("Correct! Let's continue with this function:")
}
}
Where lastInput() is a function which "listens" to user input when it changes.
Obviously the above structure is tentative and won't give me what I want, some way to "listen" or "observe" and only react when the user inputs something to the console.
The final user experience should be:
> question()
How much is 2+2?
> #I'm the user, I can do whatever
> head(mtcars)
> plot(1:10)
> 3
> 2 + 2
[1] 4
Correct! Let's continue with this function:
Am I too optimistic or is there some R magic for this?
Thanks to #parth I have looked at swirl's source code and got acquainted with the addTaskCallback function. From the help file:
addTaskCallback registers an R function that is to be called each time a top-level task is completed.
And so we can make R check the users input ("top-level task") with a specific function, responding accordingly.
But since the swirl code is very "heavy", I think I need to supply a minimal example:
swirllike <- function(...){
removeTaskCallback("swirllike")
e <- new.env(globalenv())
e$prompt <- TRUE
e$startTime <- Sys.time()
cb <- function(expr, val, ok, vis, data=e){
e$expr <- expr
e$val <- val
e$ok <- ok
e$vis <- vis
# The result of f() will determine whether the callback
# remains active
return(f(e, ...))
}
addTaskCallback(cb, name = "swirllike")
message("How much is 2+2?")
}
OK, so the swirllike function evokes the 2+2 question, but it also declares a new environment e with some objects the user needs not know. It then adds the swirllike task callback to the task callback list (or rather vector). This "task callback" holds the cb function which calls the f function - the f function will run with every input.
If you run this, make sure you see the swirllike task callback with:
> getTaskCallbackNames()
[1] "swirllike"
Now the f function is similar to my sketch in the question:
f <- function(e, ...){
if (e$prompt) {
if (difftime(Sys.time(), e$startTime, units = "mins") > 1) {
timeout <- TRUE
stop("Sorry, timeout.")
}
if(!is.null(.Last.value) && .Last.value == 4) {
message("Correct! Let's continue with this function:")
e$prompt <- FALSE
while (!e$prompt) {
#continue asking questions or something, but for this example:
break
}
}
}
return(TRUE)
}
And don't forget to remove the swirllike task callback with:
removeTaskCallback("swirllike")
I am asking to write a text or graphical progress tracker while rforcecom's batch update function loads batches of up to 10,000.
To set up and complete a batch update, a few objects must be created--there is no avoiding it. I really do not like having to re-run code in order to check the status of rforcecom.checkBatchStatus(). This needs to be automated while a progress bar gives a visual of actual progress, since checking in the global environment isn't preferred and it will be a static "status" update until it's run again.
Here's how the code is set up:
require(Rforcecom)
## Login to Salesforce using your username and password token
## Once ready to update records, use the following:
job<- rforcecom.createBulkJob(session, operation = 'update',
object = 'custom_object__c')
info<- rforcecom.createBulkBatch(session, jobId = job$id, data = entry,
batchSize = 10000)
### Re-run this line if status(in global environment) is "In Progress" for
### updated status
status<- lapply(info, FUN = function(x) {
rforcecom.checkBatchStatus(session, jobId = x$jobId, batchId = x$id)})
###Once complete, check details
details<- lapply(status, FUN = function(x){
rforcecom.getBatchDetails(session, jobId = x$jobId, batchId = x$id)})
close<- rforcecom.closeBulkJob(session, jobId = job$id)
To automate re-running the status code, use the repeat loop:
repeat {
statements...
if (condition) {
break
}
}
Then, to get a visual for a progress update, use the txtProgressBar() in base R. For this particular function, I made my own progress bar function with two simple companion functions. As a note about progressValue(), the rforcecom.checkBatchStatus() outputs as a list of 1 and a sublist. The sublist name for checking the number of records processed is "numberRecordsProcessed".
progressBar<- function(x, start = 0, finish){
# x is your object that is performing a function over a varying time length
# finish is your number of rows of data your function is processing
pb <- txtProgressBar(min = start, max = finish, style = 3)
for (i in 1:finish){
i<- progressValue(x)
setTxtProgressBar(pb, i)
if (progressValue(x)/finish == 1) {
close(pb)
}
}
}
finish<- function(x){
return(as.numeric(nrow(x)))
}
progressValue<- function(x){
x=x[[1]][["numberRecordsProcessed"]]
return(as.numeric(x))
}
Now, bring it all together! Repeat loops can be trained to end as long as you know your conditions: "Completed" or "Failed". Repeat "status", which will update the number of records processed, and by doing so this will update your progress bar. When the number of records processed equals the number of rows in your data, the progress bar will quit and so will your repeat loop.
repeat {
status<- lapply(info, FUN = function(x){
rforcecom.checkBatchStatus(session, jobId = x$jobId, batchId = x$id)})
progressBar(status, finish = finish(entry))
if (status[[1]][["state"]]=="Completed") {
break
}
if (status[[1]][["state"]]=="Failed") {
break
}
}
Let's say I've created a cluster using: cl <- parallel::makeCluster(2) and I send a call to the first node using parallel:::sendCall(cl[[1]], f, arg).
I want to get the results of a specific node (in this case the first node). I can do that using parallel:::recvResult(cl[[1]]). However, this process blocks until a result is received. Is there any way to check the status of of a specific node? I.e. a status like "is processing" or "is finished".
I'd recommend using the standard socketSelect function. For example:
library(parallel)
cl <- makePSOCKcluster(3, outfile="")
# Send task to worker 1
x <- 2
parallel:::sendCall(cl[[1]], sqrt, list(x), tag=1)
# Wait up to 5 seconds for worker 1 to send the result back
ready <- socketSelect(list(cl[[1]]$con), timeout=5)
if (ready > 0) {
result <- parallel:::recvData(cl[[1]])
cat(sprintf("sqrt(%f) = %f\n", x, result$value))
} else {
cat("result not ready after five seconds\n")
}
See the source for the recvOneData.SOCKcluster function in the file snowSOCK.R for a more complete example.