So I am doing a some batch computation very cpu intensive on books. And I built a tracker to track the computation of tasks. I close on a mailboxprocesser which all runs fine without parallelizaton but when I put a array.parallel.map or and async workflow the mailboxprocesser fails. I want to know why?
type timerMessage =
| Start of int
| Tick of bool
let timer = MailboxProcessor.Start(fun mbox ->
let inputloop() = async {
let progress = ref 0
let amount = ref 0
let start = ref System.DateTime.UtcNow
while true do
let! msg = mbox.Receive()
match msg with
| Start(i) -> amount := i
progress := 0
start := System.DateTime.UtcNow
| Tick(b) -> if !amount = 0 then ()
else
progress := !progress + 1
let el = System.DateTime.UtcNow - !start
let eta = int ((el.TotalSeconds/float !progress)*(float (!amount - !progress)))
let etas = (int (eta / 3600)).ToString() + ":" + (int ((eta % 3600) / 60)).ToString() + ":" + (eta % 60).ToString()
System.Console.Clear()
System.Console.Write((!progress).ToString() + "/" + (!amount).ToString() + " Completed [] Estimated Time Remaining:" + etas)
} inputloop() )
let computeBook (author :string) path =
let rs = ReadToStrings(path)
let bk = StringsToBook rs
let mt = createMatrix bk 100 10 //size 100 //span 10
let res = GetResults mt
//do stuff
timer.Post(Tick(true))
(author,path,res)
let partAopA = //clip head clip foot no word mods
let lss = seq {for x in processtree do
for y in (snd x) do
yield ((fst x),y) }
let ls = Seq.toArray lss //task list
timer.Post(Start(ls.Length)) //start counter
let compls = Array.map (fun l -> computeBook (fst l) (snd l) ) ls //Array.Parallel.map fails here the same as below async if I put async blcoks around the computbook call
//let res = compls |> Async.Parallel |> Async.RunSynchronously
writeResults compls outputfolder |> ignore
compls
Related
I need to run a process that runs something from a list - it doesn't really matter what order it runs in - but I want it to update a global "counter" when it has completed each task so that I can see the progress somewhere else ( maybe using something like signal R )
I used to do this stuff in an object-oriented way - but trying to be a little more "functional".
let doSomethingElse(value: int) = async{
// Update a global counter incrementing it by 1
return true
}
let doSomething() = async{
let values = [2; 4; 6; 8]
let! newList = values |> List.map(fun value -> doSomethingElse(value)) |> Async.Parallel
return true
}
Following what #JL0PD mentioned you could do something like the following.
[<RequireQualifiedAccess>]
module Async =
let tee (f: _ -> unit) a =
async {
let! r = a
f r
return r
}
module Counter =
let create () =
let mutable counter = 0
let increment =
fun _ ->
counter <- counter + 1
()
(increment, fun () -> counter)
let doSomethingElse(value: int) = async {
return value % 2
}
let doSomething() = async {
let values = [1; 2; 4; 6; 8; 9]
let increment, getCount = Counter.create()
let doSomethingElseWithProgress =
doSomethingElse
>> (Async.tee increment)
let! _ =
values
|> List.map doSomethingElseWithProgress
|> Async.Parallel
return getCount() = (List.length values)
}
doSomething ()
|> Async.RunSynchronously
I do recommend doing something better than a mutable counter, specially since you are dealing with parallel tasks.
In this case I'm composing doSomethingElse and increment using tee, this way doSomethingElse don't have to know (and call) an external function.
I have the following:
let getCandlesFrom (exchange: IExchange) (instrument: Instrument) (interval: TimeSpan) (fromTime: DateTime) =
let rec get (c: CandleData array) (f: DateTime) =
let now = DateTime.UtcNow
//info $"requesting {f} - {now}"
let candles = exchange.GetCandles(instrument, interval, f, now)
if candles.IsError then
failwith candles.GetError.Describe
else
//info $"received data {candles.Get.[0].Timestamp} - {candles.Get.[^0].Timestamp}"
let c = Array.append c candles.Get
if c.[^0].Timestamp < now - interval then
get c (c.[^0].Timestamp + interval)
else
c
get [||] fromTime
I would like to move the line:
let candles = exchange.GetCandles(instrument, interval, f, now)
to an async call:
let! candles = exchange.GetCandlesAsync(instrument, interval, f, now)
but if I wrap the whole function in an async block, the recursive function doesn't compile and I get this error:
DataSource.fs(14, 13): [FS0588] The block following this 'let' is unfinished. Every code block is an expression and must have a result. 'let' cannot be the final code element in a block. Consider giving this block an explicit result.
I don't see your code producing the error but you must have forgotten something (after the let) - this should work:
let getCandlesFrom (exchange: IExchange) (instrument: Instrument) (interval: TimeSpan) (fromTime: DateTime) =
let rec get (c: CandleData array) (f: DateTime) =
asnyc {
let now = DateTime.UtcNow
//info $"requesting {f} - {now}"
let! candles = exchange.GetCandlesAsync(instrument, interval, f, now)
if candles.IsError then
return (failwith candles.GetError.Describe)
else
let c = Array.append c candles.Get
if c.[^0].Timestamp < now - interval then
return! get c (c.[^0].Timestamp + interval)
else
return c
}
get [||] fromTime
notice the return and the return! for the recursive call - you need those there
I'm trying to write a function (OnceAsync f) that ensures that an async function is run only once on a server (i.e. a multi-threaded environment). I thought it would be easy, but it became complicated quickly (locks, busy waits!!)
This is my solution, but I think it's over-engineered; there must be a better way. This should work in FSI:
let locked_counter init =
let c = ref init
fun x -> lock c <| fun () ->
c := !c + x
!c
let wait_until finished = async {
while not(finished()) do
do! Async.Sleep(1000)
}
let OnceAsync f =
// - ensure that the async function, f, is only called once
// - this function always returns the value, f()
let mutable res = None
let lock_inc = locked_counter 0
async {
let count = lock_inc 1
match res, count with
| None, 1 -> // 1st run
let! r = f
res <- Some r
| None, _ -> // nth run, wait for 1st run to finish
do! wait_until (fun() -> res.IsSome)
| _ -> () // 1st run done, return result
return res.Value
}
You can use this code to test if OnceAsync is correct:
let test() =
let mutable count = 0
let initUser id = async {
do! Async.Sleep 1000 // simulate work
count <- count + 1
return count
}
//let fmem1 = (initUser "1234")
let fmem1 = OnceAsync (initUser "1234")
async {
let ps = Seq.init 20 (fun i -> fmem1)
let! rs = ps |> Async.Parallel
printfn "rs = %A" rs // outputs: [|1; 1; 1; 1; 1; ....; 1|]
}
test() |> Async.Start
If it fits, the simplest approach overall would be to use Async.StartChild. Unlike your solution it causes the function to run even if the result is never actually used though, e.g. in the Seq.init 0 case.
//let fmem1 = OnceAsync (initUser "1234")
async {
let! fmem1 = Async.StartChild (initUser "1234")
let ps = Seq.init 20 (fun i -> fmem1)
let! rs = ps |> Async.Parallel
printfn "rs = %A" rs // outputs: [|1; 1; 1; 1; 1; ....; 1|]
} |> Async.RunSynchronously
The simplest approach most similar to yours would be to use a TaskCompletionSource as follows:
let OnceAsync f =
let count = ref 0
let tcs = TaskCompletionSource<_>()
async {
if Interlocked.Increment(count) = 1 then
let! r = f
tcs.SetResult r
return! Async.AwaitTask tcs.Task
}
A more functional approach would use a MailboxProcessor and have it cache the result after the first run, and respond with it to all subsequent requests.
let OnceAsync f =
let handler (agent: MailboxProcessor<AsyncReplyChannel<_>>) =
let rec run resultOpt =
async {
let! chan = agent.Receive()
let! result =
match resultOpt with
| None -> f
| Some result -> async.Return result
chan.Reply result
return! run (Some result)
}
run None
let mbp = MailboxProcessor.Start handler
async { return! mbp.PostAndAsyncReply id }
I have a question concerning data structures that contain async operations.
It may sound weird.
TestActor contains a MailBoxProcessor and has three functions:
Receive prepares the mailbox processor to receive messages
Post and PostAndAsyncReply are used to send messages to the actor.
type TestActor (init, timeout) =
let mutable counter = init
let rcvFun = fun (msg) -> async {
match msg with
| Add i ->
counter <- counter + i
| GetCounter reply ->
reply.Reply counter}
do printfn "Initializing actors: "
do mailbox.Receive (rcvFun, timeout) ////// RECEIVE IS CALLED AT CONSTRUCTION
let mailbox = OnlyLatestMBP<TestMessage> ()
member x.Receive (timeout) =
mailbox.Receive (rcvFun, timeout)
member x.Post (msg: TestMessage, timeout) =
mailbox.Post(msg, timeout)
member x.PostAndAsyncReply (replyChannel, timeout) =
mailbox.PostAndAsyncReply(replyChannel, timeout)
I'd like to use this example to understand an issue that affected my code.
In the usual example for stacking agents in a data structure, Receive is executed at construction. In my example, the agent could be tested test with the code below:
let actorsWorkforce =
seq { 1 .. 5}
|> Seq.map (fun idx -> TestActor(idx, 60000))
let test =
actorsWorkforce
|> Seq.map ( fun idx -> idx.PostAndAsyncReply ( (fun reply -> GetCounter reply), 10000) )
|> Async.Parallel
|> Async.RunSynchronously
let result =
test
|> Array.iteri (fun idx element ->
match element with
| Some x -> printfn "Actor %i: OK with result %A" idx x
| None -> printfn "Actor %i: Failed" idx )
And this works as planned.
However, let's say I'd like to postpone the call to Receive to a later stage.
type TestActor (init) =
let mutable counter = init
let rcvFun = fun (msg) -> async {
match msg with
| Add i ->
counter <- counter + i
| GetCounter reply ->
reply.Reply counter}
let mailbox = OnlyLatestMBP<TestMessage> ()
member x.Receive (timeout) =
mailbox.Receive (rcvFun, timeout)
member x.Post (msg: TestMessage, timeout) =
mailbox.Post(msg, timeout)
member x.PostAndAsyncReply (replyChannel, timeout) =
mailbox.PostAndAsyncReply(replyChannel, timeout)
let actorsWorkforce =
seq { 1 .. 5}
|> Seq.map (fun idx -> TestActor(idx))
actorsWorkforce |> Seq.iter (fun idx -> idx.Receive (60000))
let test =
actorsWorkforce
|> Seq.map ( fun idx -> idx.PostAndAsyncReply ( (fun reply -> GetCounter reply), 10000) )
|> Async.Parallel
|> Async.RunSynchronously
let result =
test
|> Array.iteri (fun idx element ->
match element with
| Some x -> printfn "Actor %i: OK with result %A" idx x
| None -> printfn "Actor %i: Failed" idx )
This piece of code compiles but does not work.
mailbox.Receive has the type signature member Receive : callback:('a -> Async<unit>) * ?timeout:int -> unit so it would make sense to execute Receive with Seq.iter.
I suspect that the code does not work because actorsWorkforce |> Seq.iter (fun idx -> idx.Receive (60000)) duplicates actorsWorkforce when executed.
Is this correct? How can I fix this?
Thanks!
EDIT
The entire code:
open System
open System.Diagnostics
open Microsoft.FSharp.Control
open System.Threading
open System.Threading.Tasks
open System.Collections.Concurrent
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// OnlyLatest
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
type Envelope<'a> = Option<DateTime * 'a>
[<Sealed>]
type AsyncReplyChannel<'Reply>(replyf : 'Reply -> unit) =
member x.Reply(reply) = replyf(reply)
[<Sealed>]
type OnlyLatestMBP<'a> () =
let mutable currentEnvelope: Envelope<'a> = Envelope<'a>.None
let mutable timestampLastPrcsd: DateTime = DateTime.Now
let mutable react = Unchecked.defaultof<_>
// Msg Box status
let mutable isActive = false
let mutable defaultTimeout = Timeout.Infinite
// Event Messages
let awaitMsg = new AutoResetEvent(false)
let isActiveMsg = new AutoResetEvent(false)
let rec await timeout = async {
let thr = Thread.CurrentThread.ManagedThreadId
printfn "await on thread %i" thr
match currentEnvelope with
| Some (timestamp, x) ->
if timestamp > timestampLastPrcsd then
do! react x
timestampLastPrcsd <- timestamp
printfn "processed message"
currentEnvelope <- Envelope.None
awaitMsg.Reset() |> ignore
return! await timeout
| None ->
let! recd = Async.AwaitWaitHandle(awaitMsg, timeout)
if recd
then return! await timeout
else
isActive <- false
isActiveMsg.Reset() |> ignore
printfn ".. no message within timeout, shutting down" }
member x.DefaultTimeout
with get() = defaultTimeout
and set(value) = defaultTimeout <- value
member x.Receive (callback, ?timeout) =
if not isActive then
isActive <- true
isActiveMsg.Set() |> ignore
let timeout = defaultArg timeout defaultTimeout
react <- callback
let todo = await timeout
Async.Start todo
member x.Post (msg, ?timeout) = async {
let thr = Thread.CurrentThread.ManagedThreadId
printfn "posting on thread %i" thr
let timeout = defaultArg timeout defaultTimeout
if not isActive then
let! recd = Async.AwaitWaitHandle(isActiveMsg, timeout)
if recd then
currentEnvelope <- Envelope.Some(DateTime.Now, msg)
awaitMsg.Set() |> ignore
return true
else return false
else
currentEnvelope <- Envelope.Some(DateTime.Now, msg)
awaitMsg.Set() |> ignore
return true }
member x.PostAndAsyncReply (replyChannelMsg, ?timeout) = async {
let timeout = defaultArg timeout defaultTimeout
let tcs = new TaskCompletionSource<_>()
let msg = replyChannelMsg ( new AsyncReplyChannel<_> (fun reply -> tcs.SetResult(reply)) )
let! posted = x.Post (msg,timeout)
if posted then
match timeout with
| Timeout.Infinite ->
let! result = Async.FromContinuations ( fun (cont, _, _) ->
let apply = fun (task: Task<_>) -> cont (task.Result)
tcs.Task.ContinueWith(apply) |> ignore )
return Some result
| _ ->
let waithandle = tcs.Task.Wait(timeout)
match waithandle with
| false -> return None
| true -> return Some tcs.Task.Result
else return None }
type TestMessage =
| Add of int
| GetCounter of AsyncReplyChannel<int>
type TestActor (init) =
let mutable counter = init
let rcvFun = fun (msg) -> async {
match msg with
| Add i ->
counter <- counter + i
| GetCounter reply ->
reply.Reply counter}
let mailbox = OnlyLatestMBP<TestMessage> ()
// do printfn "Initializing actors: "
// do mailbox.Receive (rcvFun, timeout)
member x.Receive (timeout) =
mailbox.Receive (rcvFun, timeout)
member x.Post (msg: TestMessage, timeout) =
mailbox.Post(msg, timeout)
member x.PostAndAsyncReply (replyChannel, timeout) =
mailbox.PostAndAsyncReply(replyChannel, timeout)
let actorsWorkforce =
seq { 1 .. 5}
|> Seq.map (fun idx -> TestActor(idx))
actorsWorkforce |> Seq.iter (fun actor -> actor.Receive (60000))
let test =
actorsWorkforce
|> Seq.map ( fun idx -> idx.PostAndAsyncReply ( (fun reply -> GetCounter reply), 10000) )
|> Async.Parallel
|> Async.RunSynchronously
let result =
test
|> Array.iteri (fun idx element ->
match element with
| Some x -> printfn "Actor %i: OK with result %A" idx x
| None -> printfn "Actor %i: Failed" idx )
As initially suspected, the issue was indeed with: actorsWorkforce |> Seq.iter (fun idx -> idx.Receive (60000))
The problem was due to the lazy nature of seq
I have produced a narrowed down minimal code example.
open System
open System.Diagnostics
open Microsoft.FSharp.Control
open System.Threading
open System.Threading.Tasks
open System.Collections.Concurrent
type TestActress (name, timerlength) =
let mutable isActive = false
let rec myTask () = async {
Thread.Sleep (timerlength * 1000)
printfn "%s waited : %i" name timerlength
return! myTask () }
member this.Start () =
isActive <- true
Async.Start (myTask ())
member this.GetStatus () = async {
Thread.Sleep (2000)
return isActive }
// One single element, this is easy
let cameronDiaz = TestActress ("Cameron", 10)
cameronDiaz.Start ()
let status = cameronDiaz.GetStatus () |> Async.RunSynchronously
// Async.Parallel receives a seq<Async<'T>> as an input
// This is why I started off with a seq
// This piece of code does not work
let hollywood =
[ "Cameron"; "Pamela"; "Natalie"; "Diane" ]
|> List.toSeq
|> Seq.mapi ( fun idx el -> TestActress (el, idx + 10) )
hollywood |> Seq.iter ( fun el -> el.Start () )
let areTheyWorking =
hollywood
|> Seq.map (fun el -> el.GetStatus ())
|> Async.Parallel
|> Async.RunSynchronously
// Allright, with a list I get the function executed when I expect them to
let hollywood2 =
[ "Cameron"; "Pamela"; "Natalie"; "Diane" ]
|> List.mapi ( fun idx el -> TestActress (el, idx + 10) )
hollywood2 |> List.iter ( fun el -> el.Start () )
let areTheyWorking2 =
hollywood2
|> List.map (fun el -> el.GetStatus ())
|> Async.Parallel
|> Async.RunSynchronously
I am implementing in-place graph BFS in OCaml.
here is my code:
type graph = {s : int list array;num_v:int;mutable num_e : int};;
let bfs u g p =
let marker = Array.make g.num_v false
in
let q = Queue.create()
in
Queue.push u q;
let rec add_to_queue v = function
| [] -> ()
| hd::tl ->
if not marker.(hd) then begin
marker.(hd) <- true;
Queue.push hd q;
p.(hd) <- v
end;
add_to_queue v tl
in
**let rec bfs_go =**
if Queue.length q > 0 then begin
let v = Queue.pop q
in
print_int v;
add_to_queue v g.s.(v);
bfs_go
end
in
bfs_go;;
I thought the code is fine, but compiler gives me this error:
File "", line 20, characters 4-177: Error: This kind of expression is not allowed as right-hand side of 'let rec'
It seems my implementation of bfs_go has issues ( i have marked with ** **), but why? I can't see any errors.
Edit:
DFS in functional style
let dfs_better u g p =
let marker = Array.make g.num_v false in
let rec dfs_go current next =
match current, next with
| [], _ -> ()
| parent::[], [] -> ()
| parent::next_parent::rest, [] -> dfs_go (next_parent::rest) g.s.(next_parent)
| parent::rest, node::tl ->
if not marker.(node) then begin
print_int node;
marker.(node) <- true;
p.(node) <- parent;
dfs_go next g.s.(node)
end;
dfs_go current tl in
marker.(u) <- true;
dfs_go [u] g.s.(u);;
You probably mean
let rec bfs_go () =
...;
bfs_go ()
instead of
let rec bfs_go =
...;
bfs_go
Edit: I couldn't resist some improvements.
With your imperative style:
let bfs start graph from =
let marker = Array.make graph.num_v false in
let q = Queue.create() in
let add_to_queue parent node =
if not marker.(node) then begin
marker.(node) <- true;
Queue.push node q;
from.(node) <- parent;
end in
Queue.push start q;
while not (Queue.is_empty q) do
let node = Queue.pop q in
print_int node;
List.iter (add_to_queue node) graph.s.(node)
done
If you want something more functional:
let bfs start graph from =
let marker = Array.make graph.num_v false in
let rec bfs current next = match current, next with
| [], [] -> ()
| [], (_::_ as next) -> bfs next []
| node::current, next ->
let add parent node next =
if marker.(node) then next
else begin
marker.(node) <- true;
from.(node) <- parent;
node :: next
end in
print_int node;
bfs current (List.fold_right (add node) graph.s.(node) next)
in bfs [start] []
Edit 2:
Attempts at the DFS problem, completely untested (just compiled):
With a data-structure to handle nodes left to visit:
let dfs u g p =
let marker = Array.make g.num_v false in
let rec dfs_go = function
| [] -> ()
| node::next ->
print_int node;
let children =
List.filter (fun child -> not marker.(child)) g.s.(node) in
List.iter (fun child -> marker.(child) <- true) children;
dfs_go (children # next)
in
marker.(u) <- true;
dfs_go [u];;
Using only (non-tail) recursion:
let dfs u g p =
let marker = Array.make g.num_v false in
let rec dfs_go node =
print_int node;
let go_child child =
if not marker.(child) then begin
marker.(child) <- true;
dfs_go child;
end in
List.iter go_child g.s.(node)
in
marker.(u) <- true;
dfs_go u;;