Erlang gen_tcp missing packets? - tcp

I'm developing a mail client adapter for Erlang. I'm having issues when I try to perform a fetch command, Erlang isn't able to get the body's content.
This is output from my terminal, when I'm trying to utilize the command through netcat:
4 FETCH 2 BODY[2]
* 2 FETCH (BODY[2] {1135}
<div>
test content
</div>
)
4 OK FETCH completed.
The only output the gen_tcp server is able to receive is this binary:
<<"* 2 FETCH (BODY[2] {1135}\r\n">>
The source code is here:
-module(mailconnector).
-behaviour(gen_server).
-export([start_link/2, stop/0]).
-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]).
start_link(Host, imap) ->
gen_server:start_link({local, ?MODULE}, ?MODULE, [Host, 143], []).
stop() ->
gen_server:call(?MODULE, stop).
init([Host, Port]) ->
{ok, Sock} = gen_tcp:connect(Host, Port, [binary, {packet, 0}, {active, true}]),
{ok, {Sock, 0}}.
handle_call(stop, _From, State) ->
{stop, normal, ok, State};
handle_call({login, Username, Password}, _From, State) ->
{NewState, Output} = action(State, string:join(["LOGIN", Username, Password], " ")),
case Output of
{ok, Response, Data} -> Result = {Response, Data};
_ -> Result = false
end,
{reply, Result, NewState};
handle_call(list, _From, State) ->
{NewState, Resp} = action(State, "LIST \"\" \"*\""),
{reply, Resp, NewState};
handle_call({select, MailBox}, _From, State) ->
{NewState, Output} = action(State, string:join(["SELECT", MailBox], " ")),
case Output of
{ok, Response, Data} -> Result = {Response, Data};
_ -> Result = false
end,
{reply, Result, NewState};
handle_call({fetch, Num}, _From, State) ->
{NewState, Output} = action(State, string:join(["FETCH", Num, "BODY[1]"], " ")),
case Output of
{ok, Response, Data} -> Result = {Response, Data};
_ -> Result = false
end,
{reply, Result, NewState};
handle_call(_Command, _From, _State) ->
{reply, not_valid, _State}.
handle_cast(_Command, State) ->
{noreply, State}.
handle_info(_Info, State) ->
{noreply, State}.
terminate(_Reason, _State) ->
ok.
code_change(_OldVsn, State, _Extra) ->
{ok, State}.
action(_State = {Socket, Counter}, Command) ->
NewCount = Counter+1,
CounterAsList = lists:flatten(io_lib:format("~p ", [NewCount])),
Message = list_to_binary(lists:concat([CounterAsList, Command, "\r\n"])),
io:format("~p~n", [Message]),
gen_tcp:send(Socket, Message),
{{Socket, NewCount}, listener(Socket, NewCount)}.
listener(_Sock, Count) ->
receive
{_, _, Reply} ->
io:format("RECEIVED: ~p~n", [Reply]),
Messages = string:tokens(binary_to_list(Reply), "\r\n"),
io:format("~p~n", [Messages]),
find_message(Messages, Count)
after 5000 ->
timeout
end.
process_message(Message, Count) ->
StringCount = lists:flatten(io_lib:format("~p", [Count])),
case [MCount|PureMessage] = string:tokens(Message, " ") of
_M when StringCount == MCount ->
{ok, string:join(PureMessage, " ")};
_ -> [_Command|Output] = PureMessage, {data, string:join(Output, " ")}
end.
find_message(Messages, Count) ->
find_message(Messages, Count, []).
find_message([], _, _) ->
false;
find_message([H|T], Count, Data) ->
case process_message(H, Count) of
{ok, Message} -> {ok, Message, lists:reverse(Data)};
{data, Output} -> find_message(T, Count, [Output|Data])
end.
Thanks a lot for your help.

The following is just a comment and not an answer to your question which I believe rvirding provided above.
Since you are using one of the standard behaviours (gen_server) one would assume that you intend to write an OTP compliant application. If so you should never use a receive expression directly unless you are prepared to handle all possible system messages as well as your application's. In the case of a gen_server or gen_fsm non-system messages are processed by your handle_info/2 callback function. You could use the State variable to hold an indicator of what command you were processing (e.g. login) and have a separate clause for each:
handle_info({tcp,Socket,Reply}, #state{pending = login} = State) ->
...;
handle_info({tcp,Socket,Reply}, #state{pending = list} = State) ->
...;
... however that would become a poor man's finite state machine so you'd be better off porting it to a gen_fsm behaviour callback module where you have a separate state for each (i.e. wait_for_login). Then you can use handle_info/3:
handle_info({tcp,Socket,Reply}, wait_for_login, State) ->
...;
handle_info({tcp,Socket,Reply}, wait_for_list, State) ->
...;

If you are in active mode, the best approach consists in receiving the stream in your handle_info (or in your custom receiving function) using the pattern {tcp, Socket, Msg} and store it in a buffer until it does satisfy a specific pattern matching or it has a specific length, then you flush the buffer as you wish.
As #rvirding said you can't be sure that all your message will be received in a single packet, so you have to handle possible multiple packets. Otherwise you have to use passive mode and the function gen_tcp:recv/2 but keep in mind that this function is blocking.
I suggest you to read this: http://learnyousomeerlang.com/buckets-of-sockets

Related

MailboxProcessor first loop can't run if program immediately fails

I have a command running a SFTP check periodically and logging the result to a file.
let logPath = Path.Combine(config.["SharedFolder"],timestamp)
let sw = new StreamWriter(logPath,true)
//...
[<EntryPoint>]
let main argv =
try
sftpExample config.["SharedFolder"] config.["SFTPFolder"] 22 "usr" "pswd" |> ignore
with
| ex ->
ex.Message |> printerAgent.Post
printfn "%s" ex.Message // <- NOTICE THIS LINE
sw.Close()
sw.Dispose()
0
It loops over a MailboxProcessor
let printerAgent = MailboxProcessor.Start(fun inbox->
// the message processing function
let rec messageLoop() = async{
// read a message
let! msg = inbox.Receive()
// process a message
sw.WriteLine("{0}: {1}", DateTime.UtcNow.ToShortTimeString(), msg)
printfn "%s" msg
// loop to top
return! messageLoop()
}
// start the loop
messageLoop()
)
which is called to write the messages to the log
let sftpExample local host port username (password:string) =
async {
use client = new SftpClient(host, port, username, password)
client.Connect()
sprintf "Connected to %s\nroot dir list" host |> printerAgent.Post
do! downloadDir local client ""
sprintf "Done, disconnecting now" |> printerAgent.Post
client.Disconnect()
} |> Async.RunSynchronously
The file downloads are asynchronous, as well as the corresponding messages, but all appears to work well.
The problem is that - if, for some reasons, the sftp connection immediately fails, the MailboxProcessor has no time to log the exception message.
What I've tried to do - which is working indeed - was adding a printfn "%s" ex.Message before the end: I just wanted to know if someone envisions a better solution.
FYI, the full code is in this gist.
In fact, what you want is for the program to wait until the MailboxProcessor has finished handling all of its message queue before the program exits. Your printfn "%s" ex.Message seems to be working, but it's not guaranteed to work: if the MailboxProcessor had multiple items in its queue, the thread running the printfn function might finish before the MailboxProcessor's thread had had time to get through all of its messages.
The design I would recommend is to change the input of your printerAgent to be a DU like the following:
type printerAgentMsg =
| Message of string
| Shutdown
Then when you want the printer agent to finish sending its messages, use MailboxProcessor.PostAndReply (and note the usage example in the docs) in the main function and send it the Shutdown message. Remember that MailboxProcessor messages are queued: by the time it receives the Shutdown message, it will have already gone through the rest of the messages in the queue. So all it needs to do to handle the Shutdown message is to return a unit reply, and simply not call its loop again. And because you used PostAndReply rather than PostAndReplyAsync, the main function will block until the MailboxProcessor has finished doing all its work. (To avoid any chance of blocking forever, I'd recommend setting a timeout like 10 seconds in your PostAndReply call; the default timeout is -1, meaning wait forever).
EDIT: Here's an example (NOT tested, use at own risk) of what I mean:
type printerAgentMsg =
| Message of string
| Shutdown of AsyncReplyChannel<unit>
let printerAgent = MailboxProcessor.Start(fun inbox->
// the message processing function
let rec messageLoop() = async{
// read a message
let! msg = inbox.Receive()
// process a message
match msg with
| Message text ->
sw.WriteLine("{0}: {1}", DateTime.UtcNow.ToShortTimeString(), text)
printfn "%s" text
// loop to top
return! messageLoop()
| Shutdown replyChannel ->
replyChannel.Reply()
// We do NOT do return! messageLoop() here
}
// start the loop
messageLoop()
)
let logPath = Path.Combine(config.["SharedFolder"],timestamp)
let sw = new StreamWriter(logPath,true)
//...
[<EntryPoint>]
let main argv =
try
sftpExample config.["SharedFolder"] config.["SFTPFolder"] 22 "usr" "pswd" |> ignore
with
| ex ->
ex.Message |> Message |> printerAgent.Post
printfn "%s" ex.Message // <- NOTICE THIS LINE
printerAgent.PostAndReply( (fun replyChannel -> Shutdown replyChannel), 10000) // Timeout = 10000 ms = 10 seconds
sw.Close()
sw.Dispose()
Easiest solution would be to use normal (synchronous) function for logging instead of MailboxProcessor or use some logging framework and flush loggers in the end of the main function. If you want to keep using printingAgent you can implement "synchronous" mode like this:
type Msg =
| Log of string
| LogAndWait of string * AsyncReplyChannel<unit>
let printerAgent = MailboxProcessor.Start(fun inbox ->
let processLogMessage logMessage =
sw.WriteLine("{0}: {1}", DateTime.UtcNow.ToShortTimeString(), logMessage)
printfn "%s" logMessage
let rec messageLoop() = async{
let! msg = inbox.Receive()
match msg with
| Log logMessage ->
processLogMessage logMessage
| LogAndWait (logMessage, replyChannel) ->
processLogMessage logMessage
replyChannel.Reply()
return! messageLoop()
}
messageLoop()
)
Which you would then use either asynchronously
printerAgent.Post(Log "Message")
or synchronously
printerAgent.PostAndReply(fun channel -> LogAndWait("Message", channel))
You should use synchronous alternative when you log exception in the main function.

Writing the function "once" in Elixir

I'm coming to Elixir from primarily a Javascript background. in JS, it's possible to write a higher order function "once" which returns a function that will invoke the passed in function only once, and returns the previous result on subsequent calls- the trick is manipulating variables that were captured via closure:
var once = (func) => {
var wasCalled = false, prevResult;
return (...args) => {
if (wasCalled) return prevResult;
wasCalled = true;
return prevResult = func(...args);
}
}
It seems to me that it's not possible to create this function in Elixir, due to its different variable rebinding behavior. Is there some other clever way to do it via pattern matching or recursion, or is it just not possible? Without macros that is, I'd imagine those might enable it. Thanks
Using the current process dictionary:
defmodule A do
def once(f) do
key = make_ref()
fn ->
case Process.get(key) do
{^key, val} -> val
nil ->
val = f.()
Process.put(key, {key, val})
val
end
end
end
end
Or if the function will be passed across processes, an ets table can be used:
# ... during application initialization
:ets.new(:cache, [:set, :public, :named_table])
defmodule A do
def once(f) do
key = make_ref()
fn ->
case :ets.lookup(:cache, key) do
[{^key, val}] -> val
[] ->
val = f.()
:ets.insert(:cache, {key, val})
val
end
end
end
end
Application.put_env / Application.get_env can also be used to hold global state, though usually is used for configuration settings.
It's not considered idiomatic in most cases, but you can do this with Agent:
defmodule A do
def once(fun) do
{:ok, agent} = Agent.start_link(fn -> nil end)
fn args ->
case Agent.get(agent, & &1) do
nil ->
result = apply(fun, args)
:ok = Agent.update(agent, fn _ -> {:ok, result} end)
result
{:ok, result} ->
result
end
end
end
end
Now if you run this:
once = A.once(fn sleep ->
:timer.sleep(sleep)
1 + 1
end)
IO.inspect once.([1000])
IO.inspect once.([1000])
IO.inspect once.([1000])
IO.inspect once.([1000])
You'll see that the first line is printed after 1 second, but the next 3 are printed instantly, because the result is fetched from the agent.
While both already given answers are perfectly valid, the most precise translation from your javascript is shown below:
defmodule M do
use GenServer
def start_link(_opts \\ []) do
GenServer.start_link(__MODULE__, nil, name: __MODULE__)
end
def init(_args) do
Process.sleep(1_000)
{:ok, 42}
end
def value() do
start_link()
GenServer.call(__MODULE__, :value)
end
def handle_call(:value, _from, state) do
{:reply, state, state}
end
end
(1..5) |> Enum.each(&IO.inspect(M.value(), label: to_string(&1)))
Use the same metric as in #Dogbert’s answer: the first value is printed with a delay, all subsequent are printed immediately.
This is an exact analog of your memoized function using GenServer stage. GenServer.start_link/3 returns one of the following:
{:ok, #PID<0.80.0>}
{:error, {:already_started, #PID<0.80.0>}}
That said, it is not restarted if it’s already started. I do not bother to check the returned value since we are all set in any case: if it’s the initial start, we call the heavy function, if we were already started, the vaklue is already at fingers in the state.

Akka.net F# stateful actor that awaits multipe FileSystemWatcher Observable events

I'm new to F# as well as Akka.Net and trying to achieve the following with them:
I want to create an actor (Tail) that receives a file location and then listens for events at that location using FileSystemWatcher and some Observables, forwarding them on as messages to some other actor for processing.
The problem I'm having is that the code to listen for the events only picks up one event at a time and ignores all the others. e.g. if I copy 20 files into the directory being watched it only seems to send out the event for 1 of them.
Here's my Actor code:
module Tail
open Akka
open Akka.FSharp
open Akka.Actor
open System
open Model
open ObserveFiles
open ConsoleWriteActor
let handleTailMessages tm =
match tm with
| StartTail (f,r) ->
observeFile f consoleWriteActor |!> consoleWriteActor
|> ignore
let spawnTail =
fun (a : Actor<IMessage> ) ->
let rec l (count : int) = actor{
let! m = a.Receive()
handleTailMessages m
return! l (count + 1)
}
l(0)
and here's the code that listens for events:
module ObserveFiles
open System
open System.IO
open System.Threading
open Model
open Utils
open Akka
open Akka.FSharp
open Akka.Actor
let rec observeFile (absolutePath : string) (a : IActorRef ) = async{
let fsw = new FileSystemWatcher(
Path = Path.GetDirectoryName(absolutePath),
Filter = "*.*",
EnableRaisingEvents = true,
NotifyFilter = (NotifyFilters.FileName ||| NotifyFilters.LastWrite ||| NotifyFilters.LastAccess ||| NotifyFilters.CreationTime ||| NotifyFilters.DirectoryName)
)
let prepareMessage (args: EventArgs) =
let text =
match box args with
| :? FileSystemEventArgs as fsa ->
match fsa.ChangeType with
| WatcherChangeTypes.Changed -> "Changed " + fsa.Name
| WatcherChangeTypes.Created -> "Created " + fsa.Name
| WatcherChangeTypes.Deleted -> "Deleted " + fsa.Name
| WatcherChangeTypes.Renamed -> "Renamed " + fsa.Name
| _ -> "Some other change " + fsa.ChangeType.ToString()
| :? ErrorEventArgs as ea -> "Error: " + ea.GetException().Message
| o -> "some other unexpected event occurd" + o.GetType().ToString()
WriteMessage text
let sendMessage x = async{ async.Return(prepareMessage x) |!> a
return! observeFile absolutePath a }
let! occurance =
[
fsw.Changed |> Observable.map(fun x -> sendMessage (x :> EventArgs));
fsw.Created |> Observable.map(fun x -> sendMessage (x :> EventArgs));
fsw.Deleted |> Observable.map(fun x -> sendMessage (x :> EventArgs));
fsw.Renamed |> Observable.map(fun x -> sendMessage (x :> EventArgs));
fsw.Error |> Observable.map(fun x -> sendMessage (x :> EventArgs));
]
|> List.reduce Observable.merge
|> Async.AwaitObservable
return! occurance
}
It took quite a few hacks to get it to this point, any advice on how I could change it, so that it picks up and processes all the events while the actor is running would be greatly appreciated.
When designing task like that, we could split it into following components:
Create manager responsible for receiving all messages - it's main role is to respond on incoming directory listening requests. Once request comes in, it creates a child actor responsible for listening under this specific directory.
Child actor is responsible for managing FileSystemWatcher for specific path. It should subscribe to incoming events and redirect them as messages to actor responsible for receiving change events. It should also free disposable resources when it's closed.
Actor responsible for receiving change events - in our case by displaying them on the console.
Example code:
open Akka.FSharp
open System
open System.IO
let system = System.create "observer-system" <| Configuration.defaultConfig()
let observer filePath consoleWriter (mailbox: Actor<_>) =
let fsw = new FileSystemWatcher(
Path = filePath,
Filter = "*.*",
EnableRaisingEvents = true,
NotifyFilter = (NotifyFilters.FileName ||| NotifyFilters.LastWrite ||| NotifyFilters.LastAccess ||| NotifyFilters.CreationTime ||| NotifyFilters.DirectoryName)
)
// subscribe to all incoming events - send them to consoleWriter
let subscription =
[fsw.Changed |> Observable.map(fun x -> x.Name + " " + x.ChangeType.ToString());
fsw.Created |> Observable.map(fun x -> x.Name + " " + x.ChangeType.ToString());
fsw.Deleted |> Observable.map(fun x -> x.Name + " " + x.ChangeType.ToString());
fsw.Renamed |> Observable.map(fun x -> x.Name + " " + x.ChangeType.ToString());]
|> List.reduce Observable.merge
|> Observable.subscribe(fun x -> consoleWriter <! x)
// don't forget to free resources at the end
mailbox.Defer <| fun () ->
subscription.Dispose()
fsw.Dispose()
let rec loop () = actor {
let! msg = mailbox.Receive()
return! loop()
}
loop ()
// create actor responsible for printing messages
let writer = spawn system "console-writer" <| actorOf (printfn "%A")
// create manager responsible for serving listeners for provided paths
let manager = spawn system "manager" <| actorOf2 (fun mailbox filePath ->
spawn mailbox ("observer-" + Uri.EscapeDataString(filePath)) (observer filePath writer) |> ignore)
manager <! "testDir"

F# handling Task cancellation

I am struggling to understand why some code is never executed.
Consider this extension method:
type WebSocketListener with
member x.AsyncAcceptWebSocket = async {
try
let! client = Async.AwaitTask <| x.AcceptWebSocketAsync Async.DefaultCancellationToken
if(not (isNull client)) then
return Some client
else
return None
with
| :? System.Threading.Tasks.TaskCanceledException ->
| :? AggregateException ->
return None
}
I know that AcceptSocketAsync throws a TaskCanceledException when the cancellation token is canceled. I have checked in a C# application. The idea is to return None.
However, that never happens. If I put a breakpoint in the last return None or even in the if expression it never stops there when the cancellation token has been cancelled. And I know it is awaiting in the Async.AwaitTask because if before cancelling, other client connects, it works and it stops in the breakpoints.
I am a little bit lost, why is the exception lost?
Cancellation uses a special path in F# asyncs - Async.AwaitTask will re-route execution of cancelled task to the cancellation continuation. If you want different behavior - you can always do this by manually:
type WebSocketListener with
member x.AsyncAcceptWebSocket = async {
let! ct = Async.CancellationToken
return! Async.FromContinuations(fun (s, e, c) ->
x.AcceptWebSocketAsync(ct).ContinueWith(fun (t: System.Threading.Tasks.Task<_>) ->
if t.IsFaulted then e t.Exception
elif t.IsCanceled then s None // take success path in case of cancellation
else
match t.Result with
| null -> s None
| x -> s (Some x)
)
|> ignore
)
}

Does Async.StartChild have a memory leak?

When I run the following test (built with F#2.0) I get OutOfMemoryException. It takes about 5 min to reach exception on my system (i7-920 6gb ram if it was running as x86 process), but in any case we can see how memory is growing in task manager.
module start_child_test
open System
open System.Diagnostics
open System.Threading
open System.Threading.Tasks
let cnt = ref 0
let sw = Stopwatch.StartNew()
Async.RunSynchronously(async{
while true do
let! x = Async.StartChild(async{
if (Interlocked.Increment(cnt) % 100000) = 0 then
if sw.ElapsedMilliseconds > 0L then
printfn "ops per sec = %d" (100000L*1000L / sw.ElapsedMilliseconds)
else
printfn "ops per sec = INF"
sw.Restart()
GC.Collect()
})
do! x
})
printfn "done...."
I don't see nothing wrong with this code, and don't see any reasons for memory growing. I made alternate implementation to make sure my arguments are valid:
module start_child_fix
open System
open System.Collections
open System.Collections.Generic
open System.Threading
open System.Threading.Tasks
type IAsyncCallbacks<'T> = interface
abstract member OnSuccess: result:'T -> unit
abstract member OnError: error:Exception -> unit
abstract member OnCancel: error:OperationCanceledException -> unit
end
type internal AsyncResult<'T> =
| Succeeded of 'T
| Failed of Exception
| Canceled of OperationCanceledException
type internal AsyncGate<'T> =
| Completed of AsyncResult<'T>
| Subscribed of IAsyncCallbacks<'T>
| Started
| Notified
type Async with
static member StartChildEx (comp:Async<'TRes>) = async{
let! ct = Async.CancellationToken
let gate = ref AsyncGate.Started
let CompleteWith(result:AsyncResult<'T>, callbacks:IAsyncCallbacks<'T>) =
if Interlocked.Exchange(gate, Notified) <> Notified then
match result with
| Succeeded v -> callbacks.OnSuccess(v)
| Failed e -> callbacks.OnError(e)
| Canceled e -> callbacks.OnCancel(e)
let ProcessResults (result:AsyncResult<'TRes>) =
let t = Interlocked.CompareExchange<AsyncGate<'TRes>>(gate, AsyncGate.Completed(result), AsyncGate.Started)
match t with
| Subscribed callbacks ->
CompleteWith(result, callbacks)
| _ -> ()
let Subscribe (success, error, cancel) =
let callbacks = {
new IAsyncCallbacks<'TRes> with
member this.OnSuccess v = success v
member this.OnError e = error e
member this.OnCancel e = cancel e
}
let t = Interlocked.CompareExchange<AsyncGate<'TRes>>(gate, AsyncGate.Subscribed(callbacks), AsyncGate.Started)
match t with
| AsyncGate.Completed result ->
CompleteWith(result, callbacks)
| _ -> ()
Async.StartWithContinuations(
computation = comp,
continuation = (fun v -> ProcessResults(AsyncResult.Succeeded(v))),
exceptionContinuation = (fun e -> ProcessResults(AsyncResult.Failed(e))),
cancellationContinuation = (fun e -> ProcessResults(AsyncResult.Canceled(e))),
cancellationToken = ct
)
return Async.FromContinuations( fun (success, error, cancel) ->
Subscribe(success, error, cancel)
)
}
For this test it works well without any considerably memory consumption. Unfortunately I'm not much experienced in F# and have doubts if I miss some things. In case if it is bug how can I report it to F# team?
I think you're correct - there seems to be a memory leak in the implementation of StartChild.
I did a bit of profiling (following a fantastic tutorial by Dave Thomas) and the open-source F# release and I think I even know how to fix that. If you look at the implementation of StartChild, it registers a handler with the current cancellation token of the workflow:
let _reg = ct.Register(
(fun _ ->
match !ctsRef with
| null -> ()
| otherwise -> otherwise.Cancel()), null)
The objects that stay alive in the heap are instances of this registered function. They could be unregistered by calling _reg.Dispose(), but that never happens in the F# source code. I tried adding _reg.Dispose() to the functions that get called when the async completes:
(fun res -> _reg.Dispose(); ctsRef := null; resultCell.RegisterResult (Ok res, reuseThread=true))
(fun err -> _reg.Dispose(); ctsRef := null; resultCell.RegisterResult (Error err,reuseThread=true))
(fun err -> _reg.Dispose(); ctsRef := null; resultCell.RegisterResult (Canceled err,reuseThread=true))
... and based on my experiments, this fixes the problem. So, if you want a workaround, you can probably copy all the required code from control.fs and add this as a fix.
I'll send a bug report to the F# team with a link to your question. If you find something else, you can contact them by sending bug reports to fsbugs at microsoft dot com.

Resources