f# generate the cross join two list [duplicate] - collections

I have to do projection of a list of lists which returns all combinations with each element from each list. For example:
projection([[1]; [2; 3]]) = [[1; 2]; [1; 3]].
projection([[1]; [2; 3]; [4; 5]]) = [[1; 2; 4]; [1; 2; 5]; [1; 3; 4]; [1; 3; 5]].
I come up with a function:
let projection lss0 =
let rec projectionUtil lss accs =
match lss with
| [] -> accs
| ls::lss' -> projectionUtil lss' (List.fold (fun accs' l ->
accs' # List.map (fun acc -> acc # [l]) accs)
[] ls)
match lss0 with
| [] -> []
| ls::lss' ->
projectionUtil lss' (List.map (fun l -> [l]) ls)
and a testcase:
#time "on";;
let N = 10
let fss0 = List.init N (fun i -> List.init (i+1) (fun j -> j+i*i+i));;
let fss1 = projection fss0;;
The function is quite slow now, with N = 10 it takes more than 10 seconds to complete. Moreover, I think the solution is unnatural because I have to breakdown the same list in two different ways. Any suggestion how I can improve performance and readability of the function?

First of all, try to avoid list concatenation (#) whenever possible, since it's O(N) instead of O(1) prepend.
I'd start with a (relatively) easy to follow plan of how to compute the cartesian outer product of lists.
Prepend each element of the first list to each sublist in the cartesian product of the remaining lists.
Take care of the base case.
First version:
let rec cartesian = function
| [] -> [[]]
| L::Ls -> [for C in cartesian Ls do yield! [for x in L do yield x::C]]
This is the direct translation of the sentences above to code.
Now speed this up: instead of list comprehensions, use list concatenations and maps:
let rec cartesian2 = function
| [] -> [[]]
| L::Ls -> cartesian2 Ls |> List.collect (fun C -> L |> List.map (fun x->x::C))
This can be made faster still by computing the lists on demand via a sequence:
let rec cartesian3 = function
| [] -> Seq.singleton []
| L::Ls -> cartesian3 Ls |> Seq.collect (fun C -> L |> Seq.map (fun x->x::C))
This last form is what I use myself, since I most often just need to iterate over the results instead of having them all at once.
Some benchmarks on my machine:
Test code:
let test f N =
let fss0 = List.init N (fun i -> List.init (i+1) (fun j -> j+i*i+i))
f fss0 |> Seq.length
Results in FSI:
> test projection 10;;
Real: 00:00:18.066, CPU: 00:00:18.062, GC gen0: 168, gen1: 157, gen2: 7
val it : int = 3628800
> test cartesian 10;;
Real: 00:00:19.822, CPU: 00:00:19.828, GC gen0: 244, gen1: 121, gen2: 3
val it : int = 3628800
> test cartesian2 10;;
Real: 00:00:09.247, CPU: 00:00:09.250, GC gen0: 94, gen1: 52, gen2: 2
val it : int = 3628800
> test cartesian3 10;;
Real: 00:00:04.254, CPU: 00:00:04.250, GC gen0: 359, gen1: 1, gen2: 0
val it : int = 3628800

This function is Haskell's sequence (although sequence is more generic). Translating to F#:
let sequence lss =
let k l ls = [ for x in l do for xs in ls -> x::xs ]
List.foldBack k lss [[]]
in interactive:
> test projection 10;;
Real: 00:00:12.240, CPU: 00:00:12.807, GC gen0: 163, gen1: 155, gen2: 4
val it : int = 3628800
> test sequence 10;;
Real: 00:00:06.038, CPU: 00:00:06.021, GC gen0: 75, gen1: 74, gen2: 0
val it : int = 3628800
General idea: avoid explicit recursion in favor to standard combinators (fold, map etc.)

Here's a tail-recursive version. It's not as fast as some of the other solutions (only 25% faster than your original function), but memory usage is constant, so it works for extremely large result sets.
let cartesian l =
let rec aux f = function
| [] -> f (Seq.singleton [])
| h::t -> aux (fun acc -> f (Seq.collect (fun x -> (Seq.map (fun y -> y::x) h)) acc)) t
aux id l

You implementation is slow because of the # (i.e List concat) operation, which is a slow operation and it is being done many a times in recursive way. The reason for # being slow is that List are Linked list in functional programming and to concat 2 list you have to first go till the end of the list (one by one traversing through elements) and then append another list .
Please look at the suggested references in comments. I hope those will help you out.

The following version is even faster than cartesian3, and uses basic features of functional programming (no fancy List.collect, Seq.collect...)
let cartesian xss =
let rec add x yss s =
match yss with
| [] -> s
| ys :: yss' -> add x yss' ((x :: ys) :: s)
let rec mul xs yss p =
match xs with
| [] -> p
| x :: xs' -> mul xs' yss (add x yss p)
let rec cartesian xss c =
match xss with
| [] -> c
| xs :: xss' -> cartesian xss' (mul xs c [])
cartesian xss [ [] ]
Results
> test cartesian3 10;;
Real: 00:00:04.132, CPU: 00:00:04.109, GC Gen0: 482, Gen1: 2, Gen2: 1
val it: int = 3628800
> test cartesian 10;;
Real: 00:00:01.414, CPU: 00:00:01.406, GC Gen0: 27, Gen1: 16, Gen2: 2
val it: int = 3628800
> test cartesian3 11;;
Real: 00:00:45.652, CPU: 00:00:45.281, GC Gen0: 5299, Gen1: 5, Gen2: 1
val it: int = 39916800
> test cartesian 11;;
Real: 00:00:17.242, CPU: 00:00:16.812, GC Gen0: 260, Gen1: 174, Gen2: 6
val it: int = 39916800
The partition strategy used here is naive: the input list xss is separated into head and tail, I believe that a smarter strategy can give much better performance.
Edit: Another solution is of Christopher Strachey, which is explained in [1] (the observation is that the recursion on list can be expressed by folding):
let cartesianf xss =
let f xs yss =
let h x ys uss = (x :: ys) :: uss
let g yss x zss = List.foldBack (h x) yss zss
List.foldBack (g yss) xs []
List.foldBack f xss [ [] ]
[1] Mike Spivey. Strachey's function pearl, forty years on.

let crossProduct listA listB listC listD listE =
listA |> Seq.collect (fun a ->
listB |> Seq.collect (fun b ->
listC |> Seq.collect (fun c ->
listD |> Seq.collect (fun d ->
listE |> Seq.map (fun e -> a,b,c,d,e))

Related

F# Continuation-based tail recursion on list

I have this quite simple function which takes an int and adds it to the head of the list and is recursively called with i multiplied with itself:
let rec f i = function
| [] -> []
| x::xs -> (x+i)::f (i*i) xs
f 2 [1;2;3]
val it : int list = [3; 6; 19]
Now, I'm attempting to rewrite it using a continuation, but I'm a little stuck. Here's what I've come up with so far:
let fC i l =
let rec loop cont = function
| [] -> []
| x::xs -> cont(x+i)::loop (fun acc -> (acc*acc)) xs
loop id l
fC 2 [1;2;3] //Expected [3;6;19]
val it : int list = [3; 16; 25]
Any hints to what I'm doing wrong?
Looking at this questions and the comments it seems to me that there is some confusion.
Tail recursive does not necessary mean continuation passing style (CPS).
Here's the function in CPS:
let f' i p =
let rec loop i p k =
match p with
| [] -> k []
| x::xs -> loop (i*i) xs (fun a -> k ((x+i)::a))
loop i p id
And of course, it's tail recursive. But you can also write it tail recursive by using an accumulator instead of a continuation:
let f'' i p =
let rec loop i p acc =
match p with
| [] -> acc
| x::xs -> loop (i*i) xs ((x+i)::acc)
loop i p [] |> List.rev
See also the answer to this question to understand better CPS.

Trying to replicate the elements in a list n times in OCaml

I'm trying to write a function that would take an input like :
repeat 3 [1;2] ;;
and display something like:
[1;2;1;2;1;2]
Now the code I have is:
let repeat ls n =
let rec helper acc n l =
if n = 0 then acc else helper (l :: acc) (n-1) l in
let rec helper2 acc = function
| [] -> acc
| h :: t -> helper2 (helper acc n h) t in helper2 [] (List.rev ls);;
which gives me an output of:
[1;1;1;2;2;2]
for the same input. What can I do to fix this?
You are almost at the end ;)
Just modify the first helper :
let rec helper acc n l =
if n = 0 then acc else helper (l # acc) (n-1) l ;;
And you will be close to the solution.
(you just want to replicate the input list so # is ok to concatenate this list to the acc, you do not want to parse each and every element of the list, so :: is not what you need)
I think this solution may be a little bit faster in term of complexity (and simplicity):
let repeat ls n =
let rec f l = function
| 0 -> l
| n -> f (List.rev_append ls l) (n-1) in
List.rev (f [] n)
Also I always forget if List.rev is a tail-recursive or not, so this may be even better:
let repeat ls n =
let rec rev l = function
| [] -> l
| a::t -> rev (a::l) t in
let rec f l = function
| 0 -> l
| n -> f (List.rev_append ls l) (n-1) in
rev [] (f [] n)
Note: in my opinion Pierre's answer is good enough, my post is more like remark.

The parameter order of function for ocaml

Suppose I have a map function like below:
let rec map f xs = match xs with
[] -> []
| hd :: tl -> f hd :: (map f tl)
I got some errors when I change the signature of map to let rec map xs f,
Could anyone pinpoint which knowledge I am lacking and explains why?
There's no problem if you change the definition and the recursive call.
# let rec map xs f = match xs with
[] -> []
| hd :: tl -> f hd :: (map tl f);;
val map : 'a list -> ('a -> 'b) -> 'b list = <fun>
# map [3; 5; 7] ((+) 1);;
- : int list = [4; 6; 8]
You should be able to switch the xs with the f
the only thing you have to make sure to change is the the order on line 3 where you have (map f tl) to (map tl f)

Any simpler way to implement non-in-place selection sort in OCaml?

I implemented a non-in-place version of selection sort in OCaml.
let sort compare_fun l =
let rec find_min l' min_l origin_l =
match l' with
| [] ->
if min_l = [] then (min_l, l')
else
let min = List.hd min_l
in
(min_l, List.filter (fun x -> if x != min then true else false) origin_l)
| x::tl ->
if min_l = [] then
find_min tl [x] origin_l
else
let c = compare_fun (List.hd min_l) x
in
if c = 1 then
find_min tl [x] origin_l
else if c = 0 then
find_min tl (min_l # [x]) origin_l
else
find_min tl min_l origin_l
in
let rec insert_min l' new_l =
match l' with
| [] -> new_l
| _ ->
let (min_l, rest) = find_min l' [] l'
in
insert_min rest (new_l # min_l)
in
insert_min l [];;
My idea is that in a list, every time I find the list of minimum items (in case of duplicate values) and add this min list to the result list, then redo the finding_min in the rest of the list.
I use List.filter to filter out the min_list, so the resulting list will be the list for next find_min.
I find my implementation is quite complicated, and far more complicated than the Java in-place version of selection sort.
Any suggestions to improve it?
Edit: Here's a much better implementation: http://rosettacode.org/wiki/Sorting_algorithms/Selection_sort#OCaml
here's my own crappier implementation
(* partial function - bad habit, don't do this. *)
let smallest (x::xs) = List.fold_right (fun e acc -> min e acc) xs x
let remove l y =
let rec loop acc = function
| [] -> raise Not_found
| x::xs -> if y = x then (List.rev acc) # xs else loop (x::acc) xs
in loop [] l
let selection_sort =
let rec loop acc = function
| [] -> List.rev acc
| xs ->
let small = smallest xs in
let rest = remove xs small in
loop (small::acc) rest
in loop []

F# System.OutOfMemoryException with recursive call

This is actually a solution to Project Euler Problem 14 in F#. However, I'm running into a System.OutOfMemory exception when attempting to calculate an iterative sequence for larger numbers. As you can see, I'm writing my recursive function with tail calls.
I was running into a problem with StackOverFlowException because I was debugging in visual studio (which disables the tail calls). I've documented that in another question. Here, I'm running in release mode--but I'm getting out of memory exceptions when I run this as a console app (on windows xp with 4gb ram).
I'm really at a loss to understand how I coded myself into this memory overflow & hoping someone can show my the error in my ways.
let E14_interativeSequence x =
let rec calc acc startNum =
match startNum with
| d when d = 1 -> List.rev (d::acc)
| e when e%2 = 0 -> calc (e::acc) (e/2)
| _ -> calc (startNum::acc) (startNum * 3 + 1)
let maxNum pl=
let rec maxPairInternal acc pairList =
match pairList with
| [] -> acc
| x::xs -> if (snd x) > (snd acc) then maxPairInternal x xs
else maxPairInternal acc xs
maxPairInternal (0,0) pl
|> fst
// if I lower this to like [2..99999] it will work.
[2..99999]
|> List.map (fun n -> (n,(calc [] n)))
|> List.map (fun pair -> ((fst pair), (List.length (snd pair))))
|> maxNum
|> (fun x-> Console.WriteLine(x))
EDIT
Given the suggestions via the answers, I rewrote to use a lazy list and also to use Int64's.
#r "FSharp.PowerPack.dll"
let E14_interativeSequence =
let rec calc acc startNum =
match startNum with
| d when d = 1L -> List.rev (d::acc) |> List.toSeq
| e when e%2L = 0L -> calc (e::acc) (e/2L)
| _ -> calc (startNum::acc) (startNum * 3L + 1L)
let maxNum (lazyPairs:LazyList<System.Int64*System.Int64>) =
let rec maxPairInternal acc (pairs:seq<System.Int64*System.Int64>) =
match pairs with
| :? LazyList<System.Int64*System.Int64> as p ->
match p with
| LazyList.Cons(x,xs)-> if (snd x) > (snd acc) then maxPairInternal x xs
else maxPairInternal acc xs
| _ -> acc
| _ -> failwith("not a lazylist of pairs")
maxPairInternal (0L,0L) lazyPairs
|> fst
{2L..999999L}
|> Seq.map (fun n -> (n,(calc [] n)))
|> Seq.map (fun pair -> ((fst pair), (Convert.ToInt64(Seq.length (snd pair)))))
|> LazyList.ofSeq
|> maxNum
which solves the problem. I'd also look at Yin Zhu's solution which is better, though.
As mentioned by Brian, List.* operations are not appropriate here. They cost too much memory.
The stackoverflow problem comes from another place. There are two possible for you to have stackoverflow: calc and maxPairInternal. It must be the first as the second has the same depth as the first. Then the problem comes to the numbers, the number in 3n+1 problem could easily go to very large. So you first get a int32 overflow, then you get a stackoverflow. That's the reason. After changing the numbers to 64bit, the program works.
Here is my solution page, where you can see a memoization trick.
open System
let E14_interativeSequence x =
let rec calc acc startNum =
match startNum with
| d when d = 1L -> List.rev (d::acc)
| e when e%2L = 0L -> calc (e::acc) (e/2L)
| _ -> calc (startNum::acc) (startNum * 3L + 1L)
let maxNum pl=
let rec maxPairInternal acc pairList =
match pairList with
| [] -> acc
| x::xs -> if (snd x) > (snd acc) then maxPairInternal x xs
else maxPairInternal acc xs
maxPairInternal (0L,0) pl
|> fst
// if I lower this to like [2..99999] it will work.
[2L..1000000L]
|> Seq.map (fun n -> (n,(calc [] n)))
|> Seq.maxBy (fun (n, lst) -> List.length lst)
|> (fun x-> Console.WriteLine(x))
If you change List.map to Seq.map (and re-work maxPairInternal to iterate over a seq) that will probably help tons. Right now, you're manifesting all the data at once in a giant structure before processing the whole structure to get a single number result. It is much better to do this lazily via Seq, and just create one row, and compare it with the next row, and create a single row at a time and then discard it.
I don't have time to code my suggestion now, but let me know if you are still having trouble and I'll revisit this.
Stop trying to use lists everywhere, this isn't Haskell! And stop writing fst pair and snd pair everywhere, this isn't Lisp!
If you want a simple solution in F# you can do it directly like this without creating any intermediate data structures:
let rec f = function
| 1L -> 0
| n when n % 2L = 0L -> 1 + f(n / 2L)
| n -> 1 + f(3L * n + 1L)
let rec g (li, i) = function
| 1L -> i
| n -> g (max (li, i) (f n, n)) (n - 1L)
let euler14 n = g (0, 1L) n
That takes around 15s on my netbook. If you want something more time efficient, reuse previous results via an array:
let rec inside (a : _ array) n =
if n <= 1L || a.[int n] > 0s then a.[int n] else
let p =
if n &&& 1L = 0L then inside a (n >>> 1) else
let n = 3L*n + 1L
if n < int64 a.Length then inside a n else outside a n
a.[int n] <- 1s + p
1s + p
and outside (a : _ array) n =
let n = if n &&& 1L = 0L then n >>> 1 else 3L*n + 1L
1s + if n < int64 a.Length then inside a n else outside a n
let euler14 n =
let a = Array.create (n+1) 0s
let a = Array.Parallel.init (n+1) (fun n -> inside a (int64 n))
let i = Array.findIndex (Array.reduce max a |> (=)) a
i, a.[i]
That takes around 0.2s on my netbook.
Found this looking for Microsoft.FSharp.Core.Operators.Checked.
I'm just learning F#, so I thought I'd take the Project Euler 14 Challenge.
This uses recursion but not tail-recursion.
Takes about 3.1 sec for me, but has the advantage that I can almost understand it.
let Collatz (n:int64) = if n % 2L = 0L then n / 2L else n * 3L + 1L
let rec CollatzLength (current:int64) (acc:int) =
match current with
| 1L -> acc
| _ -> CollatzLength (Collatz current) (acc + 1)
let collatzSeq (max:int64) =
seq{
for i in 1L..max do
yield i, CollatzLength i 0
}
let collatz = Seq.toList(collatzSeq 1000000L)
let result, steps = List.maxBy snd collatz

Resources