Performance of F# code terrible

Performance of F# code terrible - performance

This is my very first F# programme. I thought I would implement Conway's Game of Life as a first exercise.
Please help me understand why the following code has such terrible performance.
let GetNeighbours (p : int, w : int, h : int) : seq<int> =
let (f1, f2, f3, f4) = (p > w, p % w <> 1, p % w <> 0, p < w * (h - 1))
[
(p - w - 1, f1 && f2);
(p - w, f1);
(p - w + 1, f1 && f3);
(p - 1, f2);
(p + 1, f3);
(p + w - 1, f4 && f2);
(p + w, f4);
(p + w + 1, f4 && f3)
]
|> List.filter (fun (s, t) -> t)
|> List.map (fun (s, t) -> s)
|> Seq.cast
let rec Evolve (B : seq<int>, S : seq<int>, CC : seq<int>, g : int) : unit =
let w = 10
let h = 10
let OutputStr = (sprintf "Generation %d: %A" g CC) // LINE_MARKER_1
printfn "%s" OutputStr
let CCN = CC |> Seq.map (fun s -> (s, GetNeighbours (s, w, h)))
let Survivors =
CCN
|> Seq.map (fun (s, t) -> (s, t |> Seq.map (fun u -> (CC |> Seq.exists (fun v -> u = v)))))
|> Seq.map (fun (s, t) -> (s, t |> Seq.filter (fun u -> u)))
|> Seq.map (fun (s, t) -> (s, Seq.length t))
|> Seq.filter (fun (s, t) -> (S |> Seq.exists (fun u -> t = u)))
|> Seq.map (fun (s, t) -> s)
let NewBorns =
CCN
|> Seq.map (fun (s, t) -> t)
|> Seq.concat
|> Seq.filter (fun s -> not (CC |> Seq.exists (fun t -> t = s)))
|> Seq.groupBy (fun s -> s)
|> Seq.map (fun (s, t) -> (s, Seq.length t))
|> Seq.filter (fun (s, t) -> B |> Seq.exists (fun u -> u = t))
|> Seq.map (fun (s, t) -> s)
let NC = Seq.append Survivors NewBorns
let SWt = new System.Threading.SpinWait ()
SWt.SpinOnce ()
if System.Console.KeyAvailable then
match (System.Console.ReadKey ()).Key with
| System.ConsoleKey.Q -> ()
| _ -> Evolve (B, S, NC, (g + 1))
else
Evolve (B, S, NC, (g + 1))
let B = [3]
let S = [2; 3]
let IC = [4; 13; 14]
let g = 0
Evolve (B, S, IC, g)
The first five iterations, i.e. generations 0, 1, 2, 3, 4, happen without a problem. Then, after a brief pause of about 100 milliseconds, generation 5 is completed. But after that, the programme hangs at the line marked "LINE_MARKER_1," as revealed by breakpoints Visual Studio. It never reaches the printfn line.
The strange thing is, already by generation 2, the CC sequence in the function Evolve has already stabilised to the sequence [4; 13; 14; 3] so I see no reason why generation 6 should fail to evolve.
I understand that it is generally considered opprobrious to paste large segments of code and ask for help in debugging, but I don't know how to reduce this to a minimum working example. Any pointers that would help me debug would be gratefully acknowledged.
Thanks in advance for your help.
EDIT
I really believe that anyone wishing to help me may pretty much ignore the GetNeighbours function. I included it only for the sake of completeness.

The simplest way to fix your performance is by using Seq.cache:
let GetNeighbours (p : int, w : int, h : int) : seq<int> =
let (f1, f2, f3, f4) = (p > w, p % w <> 1, p % w <> 0, p < w * (h - 1))
[
(p - w - 1, f1 && f2);
(p - w, f1);
(p - w + 1, f1 && f3);
(p - 1, f2);
(p + 1, f3);
(p + w - 1, f4 && f2);
(p + w, f4);
(p + w + 1, f4 && f3)
]
|> List.filter (fun (s, t) -> t)
|> List.map (fun (s, t) -> s)
:> seq<_> // <<<<<<<<<<<<<<<<<<<<<<<< MINOR EDIT, avoid boxing
let rec Evolve (B : seq<int>, S : seq<int>, CC : seq<int>, g : int) : unit =
let w = 10
let h = 10
let OutputStr = (sprintf "Generation %d: %A" g CC) // LINE_MARKER_1
printfn "%s" OutputStr
let CCN =
CC
|> Seq.map (fun s -> (s, GetNeighbours (s, w, h)))
|> Seq.cache // <<<<<<<<<<<<<<<<<< EDIT
let Survivors =
CCN
|> Seq.map (fun (s, t) -> (s, t |> Seq.map (fun u -> (CC |> Seq.exists (fun v -> u = v)))))
|> Seq.map (fun (s, t) -> (s, t |> Seq.filter (fun u -> u)))
|> Seq.map (fun (s, t) -> (s, Seq.length t))
|> Seq.filter (fun (s, t) -> (S |> Seq.exists (fun u -> t = u)))
|> Seq.map (fun (s, t) -> s)
let NewBorns =
CCN
|> Seq.map (fun (s, t) -> t)
|> Seq.concat
|> Seq.filter (fun s -> not (CC |> Seq.exists (fun t -> t = s)))
|> Seq.groupBy (fun s -> s)
|> Seq.map (fun (s, t) -> (s, Seq.length t))
|> Seq.filter (fun (s, t) -> B |> Seq.exists (fun u -> u = t))
|> Seq.map (fun (s, t) -> s)
let NC =
Seq.append Survivors NewBorns
|> Seq.cache // <<<<<<<<<<<<<<<<<< EDIT
let SWt = new System.Threading.SpinWait ()
SWt.SpinOnce ()
if System.Console.KeyAvailable then
match (System.Console.ReadKey ()).Key with
| System.ConsoleKey.Q -> ()
| _ -> Evolve (B, S, NC, (g + 1))
else
Evolve (B, S, NC, (g + 1))
let B = [3]
let S = [2; 3]
let IC = [4; 13; 14]
let g = 0
Evolve (B, S, IC, g)
The big problem is not using Seq per se, the problem is using it correctly. By default sequences are not lazy, instead they define computations that are re-evaluated on every traversal. This means that unless you do something about it (such as Seq.cache), re-evaluating the sequence may screw up the algorithmic complexity of your program.
Your original program has exponential complexity. To see that, note that it doubles the number of traversed elements with each iteration.
Also note that with your style of programming using Seq operators followed by Seq.cache has a small advantage over using List or Array operators: this avoids allocating intermediate data structures, which reduces GC pressure and may speed things up a bit.

See comments and all, but this code runs like hell - with both List.* and some other smaller optimisations:
let GetNeighbours p w h =
let (f1, f2, f3, f4) = p > w, p % w <> 1, p % w <> 0, p < w * (h - 1)
[
p - w - 1, f1 && f2
p - w, f1
p - w + 1, f1 && f3
p - 1, f2
p + 1, f3
p + w - 1, f4 && f2
p + w, f4
p + w + 1, f4 && f3
]
|> List.choose (fun (s, t) -> if t then Some s else None)
let rec Evolve B S CC g =
let w = 10
let h = 10
let OutputStr = sprintf "Generation %d: %A" g CC // LINE_MARKER_1
printfn "%s" OutputStr
let CCN = CC |> List.map (fun s -> s, GetNeighbours s w h)
let Survivors =
CCN
|> List.choose (fun (s, t) ->
let t =
t
|> List.filter (fun u -> CC |> List.exists ((=) u))
|> List.length
if S |> List.exists ((=) t) then
Some s
else None)
let NewBorns =
CCN
|> List.collect snd
|> List.filter (not << fun s -> CC |> List.exists ((=) s))
|> Seq.countBy id
|> List.ofSeq
|> List.choose (fun (s, t) ->
if B |> List.exists ((=) t) then
Some s
else None)
let NC = List.append Survivors NewBorns
let SWt = new System.Threading.SpinWait()
SWt.SpinOnce()
if System.Console.KeyAvailable then
match (System.Console.ReadKey()).Key with
| System.ConsoleKey.Q -> ()
| _ -> Evolve B S NC (g + 1)
else
Evolve B S NC (g + 1)
let B = [3]
let S = [2; 3]
let IC = [4; 13; 14]
let g = 0
Evolve B S IC g

Just thought I would add a simple answer, in case other beginners like me run into the same problem.
As advised by Ramon Snir, ildjarn and pad above, I changed the Seq.X calls to List.X. I had to add a simple extra casting step to account for the fact that List does not have groupBy, but having done that, the code now runs like a charm!
Thanks a lot.

One of the most amazing characteristics of the ML family of languages is that short code is often fast code and this applies to F# too.
Compare your implementation with the much faster one I blogged here:
let count (a: _ [,]) x y =
let m, n = a.GetLength 0, a.GetLength 1
let mutable c = 0
for x=x-1 to x+1 do
for y=y-1 to y+1 do
if x>=0 && x<m && y>=0 && y<n && a.[x, y] then
c <- c + 1
if a.[x, y] then c-1 else c
let rule (a: _ [,]) x y =
match a.[x, y], count a x y with
| true, (2 | 3) | false, 3 -> true
| _ -> false

Related

Implementing LLL algorithm in Haskell

I'm implementing the LLL basis reduction algorithm in Haskell. I'm basing my code on the pseudocode on Wikipedia. Here is what I have so far. Apologies for the code dump; I strongly suspect the issue lies in lll but I'm giving everything just in case.
import Linear as L
f v x = v `L.dot` x
gram_schmidt b =
let aux vs us =
case vs of
v:t -> let vus = map (\u -> project u v) us
s = foldr (^+^) zero vus
u = v ^-^ s in
aux t (us++[u])
[] -> us
in aux b []
swap :: Int -> Int -> [a] -> [a]
swap i j xs =
let elemI = xs !! i
elemJ = xs !! j
left = take i xs
middle = take (j - i - 1) (drop (i + 1) xs)
right = drop (j + 1) xs
in left ++ [elemJ] ++ middle ++ [elemI] ++ right
update i xs new =
let left = take (i-1) xs
right = drop (i) xs
in left ++ [new] ++ right
sort_vecs vs = map snd (sort (zip (map norm vs) vs))
lll :: Int -> [[Double]] -> Double -> [[Double]]
lll d b delta =
let b' = gram_schmidt b
aux :: [[Double]] -> [[Double]] -> Int -> [[Double]]
aux b b' k =
if k >= d then
b
else
let aux2 :: [[Double]] -> [[Double]] -> Int -> [[Double]]
aux2 b b' j =
if j < 0 then
let mu = (f (b!!k) (b'!!(k-1))) / (f (b'!!(k-1)) (b'!!(k-1))) in
if f (b'!!k) (b'!!k) >= (delta-mu^2) * f (b'!!(k-1)) (b'!!(k-1)) then
aux b b' (k+1)
else
let bb = swap k (k-1) b
bb' = gram_schmidt bb in
aux bb bb' (max (k-1) 1)
else
let mu = (f (b!!k) (b'!!j)) / (f (b'!!j) (b'!!j)) in
if abs mu > 0.5 then
let bk = b!!k
bj = b!!j
bb = update k b (bk ^-^ (fromIntegral (round mu)) *^ bj)
bb' = gram_schmidt bb in
aux2 bb bb' (j-1)
else
aux2 b b' (j-1)
in aux2 b b' (k-1)
in sort_vecs (aux b b' 1)
My issue is that it seems to find a basis of a sublattice. In particular, lll d [[-0.8526334764831849,-3.125000000000004e-2],[-1.2941941738241598,4.419417382415916e-2]] 0.75 returns [[0.41107277914220997,0.10669417382415924],[-1.2941941738241598,4.419417382415916e-2]], a basis for a index-2 sublattice, and with basis which are almost-parallel. I've been staring at this code for ages to no avail (I thought there was an issue with update where (i-1) should be (i) and (i) should be (i+1) but this caused an infinite loop). Any help is greatly appreciated.

F# - Algorithm and strings

Let's says I have a string of a length N that contains only 0 or 1. I want to split that string in multiples strings and each string should contains only one digit.
Example:
00011010111
Should be split into:
000
11
0
1
0
111
The only solution I can think of if using a for loop with a string builder (Written in pseudo code below, more c# like sorry):
result = new list<string>
tmpChar = ""
tmpString = ""
for each character c in MyString
if tmpchar != c
if tmpsString != ""
result.add tmpString
tmpString = ""
endIf
tmpchar = c
endIf
tmpString += tmpChar
endFor
Do you have any other solution and maybe a clever solution that use a more functional approach?

I think Seq.scan would be a good fit for this, this is a very procedural problem in nature, preserving the order like that. But here is code that I believe does what you are asking.
"00011010111"
|> Seq.scan (fun (s, i) x ->
match s with
| Some p when p = x -> Some x, i
| _ -> Some x, i + 1 ) (None, 0)
|> Seq.countBy id
|> Seq.choose (function
| (Some t, _), n -> Some(t, n)
| _ -> None )
|> Seq.toList

Perhaps something along the lines of:
let result =
let rec groupWhileSame xs result =
match xs with
| a when a |> Seq.isEmpty -> result
| _ ->
let head = xs |> Seq.head
let str = xs |> Seq.takeWhile ((=) head)
let rest = xs |> Seq.skipWhile ((=) head)
groupWhileSame rest (Seq.append result [str])
groupWhileSame (myStr) []

Seq.fold (fun (acc:(string list)) x ->
match acc with
| y::rst when y.StartsWith(string x) -> (string x) + y::rst
| _ -> (string x)::acc)
[]
"00011010111"

Consider this function (which is generic):
let chunk s =
if Seq.isEmpty s then []
else
let rec chunk items chunks =
if Seq.isEmpty items then chunks
else
let chunks' =
match chunks with
| [] -> [(Seq.head items, 1)]
| x::xs ->
let c,n = x in let c' = Seq.head items in
if c = c' then (c, n + 1) :: xs else (c', 1) :: x :: xs
chunk (Seq.tail items) chunks'
chunk s [] |> List.rev
It returns a list of tuples, where each tuple represents an item and its repetitions.
So
"00011010111" |> Seq.toList |> chunk
actually returns
[('0', 3); ('1', 2); ('0', 1); ('1', 1); ('0', 1); ('1', 3)]
Basically, we're doing run length encoding (which is admittedly a bit wasteful in the case of your example string).
To get the list of strings that you want, we use code like following:
"00011010111"
|> Seq.toList
|> chunk
|> List.map (fun x -> let c,n = x in new string(c, n))

Here's a working version of OP's proposal with light syntax:
let chunk (s: string) =
let result = System.Collections.Generic.List<string>()
let mutable tmpChar = ""
let mutable tmpString = ""
for c in s do
if tmpChar <> string c then
if tmpString <> "" then
result.Add tmpString
tmpString <- ""
tmpChar <- string c
tmpString <- tmpString + tmpChar
result.Add tmpString
result
No attempt was made to follow a functional style.

Boggle - count all possible paths on a N*N grid. Performance

While reading up this question, I wondered why no one would "simply" iterate all the possible paths on the boggle grid and have the word-tries follow and then cancel the path if there is no match in the word-trie. Cannot be that many paths on a tiny 4 by 4 grid, right? How many paths are there? So I set out to code a path-counter function in F#. The results yield what no one stated on that other page: Way more paths on the grid than I would have guessed (more paths than words in the word-set, actually).
While all that is pretty much the back story to my question, the code I ended up with was running rather slow and I found that I could not give good answers to a few aspects of the code. So here, the code first, then below it, you will find points which I think deserve explanations...
let moves n state square =
let allSquares = [0..n*n-1] |> Set.ofList
let right = Set.difference allSquares (Set.ofList [n-1..n..n*n])
let left = Set.difference allSquares (Set.ofList [0..n..n*n-1])
let up = Set.difference allSquares (Set.ofList [0..n-1])
let down = Set.difference allSquares (Set.ofList [n*n-n..n*n-1])
let downRight = Set.intersect right down
let downLeft = Set.intersect left down
let upRight = Set.intersect right up
let upLeft = Set.intersect left up
let appendIfInSet se v res =
if Set.contains square se then res # v else res
[]
|> appendIfInSet right [square + 1]
|> appendIfInSet left [square - 1]
|> appendIfInSet up [square - n]
|> appendIfInSet down [square + n]
|> appendIfInSet downRight [square + n + 1]
|> appendIfInSet downLeft [square + n - 1]
|> appendIfInSet upRight [square - n + 1]
|> appendIfInSet upLeft [square - n - 1]
|> List.choose (fun s -> if ((uint64 1 <<< s) &&& state) = 0UL then Some s else None )
let block state square =
state ||| (uint64 1 <<< square)
let countAllPaths n lmin lmax =
let mov = moves n // line 30
let rec count l state sq c =
let state' = block state sq
let m = mov state' sq
match l with
| x when x <= lmax && x >= lmin ->
List.fold (fun acc s -> count (l+1) state' s acc) (c+1) m
| x when x < lmin ->
List.fold (fun acc s -> count (l+1) state' s acc) (c) m
| _ ->
c
List.fold (fun acc s -> count 0 (block 0UL s) s acc) 0 [0..n*n-1]
[<EntryPoint>]
let main args =
printfn "%d: %A" (Array.length args) args
if 3 = Array.length args then
let n = int args.[0]
let lmin = int args.[1]
let lmax = int args.[2]
printfn "%d %d %d -> %d" n lmin lmax (countAllPaths n lmin lmax)
else
printfn "usage: wordgames.exe n lmin lmax"
0
In line 30, I curried the moves function with the first argument, hoping that maybe code optimization would benefit from it. Maybe optimizing the 9 sets I create in move which are only a function of n. After all, they need not be generated over and over again, right? On the other hand, I would not really bet on it actually happening.
So, question #1 is: How could I enforce this optimization in an as little code bloating way as possible? (I could of course create a type with 9 members and then an array of that type for each possible n and then do a look up table like usage of the pre-computed sets but that would be code bloat in my opinion).
Many sources hint that parallel folds are considered critical. How could I create a parallel version of the counting function (which runs on multiple cores)?
Does anyone have clever ideas how to speed this up? Maybe some pruning or memoization etc?
At first, when I ran the function for n=4 lmin=3 lmax=8 I thought it takes so long because I ran it in fsi. But then I compiled the code with -O and it still took about the same time...
UPDATE
While waiting for input from you guys, I did the code bloated manual optimization version (runs much faster) and then found a way to make it run on multiple cores.
All in all those 2 changes yielded about a speed up by a factor of 30. Here the (bloated) version I came up with (still looking for a way to avoid the bloat):
let squareSet n =
let allSquares = [0..n*n-1] |> Set.ofList
let right = Set.difference allSquares (Set.ofList [n-1..n..n*n])
let left = Set.difference allSquares (Set.ofList [0..n..n*n-1])
let up = Set.difference allSquares (Set.ofList [0..n-1])
let down = Set.difference allSquares (Set.ofList [n*n-n..n*n-1])
let downRight = Set.intersect right down
let downLeft = Set.intersect left down
let upRight = Set.intersect right up
let upLeft = Set.intersect left up
[|right;left;up;down;upRight;upLeft;downRight;downLeft|]
let RIGHT,LEFT,UP,DOWN = 0,1,2,3
let UPRIGHT,UPLEFT,DOWNRIGHT,DOWNLEFT = 4,5,6,7
let squareSets =
[|Set.empty;Set.empty;Set.empty;Set.empty;Set.empty;Set.empty;Set.empty;Set.empty;|]
::
[ for i in 1..8 do
yield squareSet i
]
|> Array.ofList
let moves n state square =
let appendIfInSet se v res =
if Set.contains square se then res # v else res
[]
|> appendIfInSet squareSets.[n].[RIGHT] [square + 1]
|> appendIfInSet squareSets.[n].[LEFT] [square - 1]
|> appendIfInSet squareSets.[n].[UP] [square - n]
|> appendIfInSet squareSets.[n].[DOWN] [square + n]
|> appendIfInSet squareSets.[n].[DOWNRIGHT] [square + n + 1]
|> appendIfInSet squareSets.[n].[DOWNLEFT] [square + n - 1]
|> appendIfInSet squareSets.[n].[UPRIGHT] [square - n + 1]
|> appendIfInSet squareSets.[n].[UPLEFT] [square - n - 1]
|> List.choose (fun s -> if ((uint64 1 <<< s) &&& state) = 0UL then Some s else None )
let block state square =
state ||| (uint64 1 <<< square)
let countAllPaths n lmin lmax =
let mov = moves n
let rec count l state sq c =
let state' = block state sq
let m = mov state' sq
match l with
| x when x <= lmax && x >= lmin ->
List.fold (fun acc s -> count (l+1) state' s acc) (c+1) m
| x when x < lmin ->
List.fold (fun acc s -> count (l+1) state' s acc) (c) m
| _ ->
c
//List.fold (fun acc s -> count 0 (block 0UL s) s acc) 0 [0..n*n-1]
[0..n*n-1]
|> Array.ofList
|> Array.Parallel.map (fun start -> count 0 (block 0UL start) start 0)
|> Array.sum
[<EntryPoint>]
let main args =
printfn "%d: %A" (Array.length args) args
if 3 = Array.length args then
let n = int args.[0]
let lmin = int args.[1]
let lmax = int args.[2]
printfn "%d %d %d -> %d" n lmin lmax (countAllPaths n lmin lmax)
else
printfn "usage: wordgames.exe n lmin lmax"
0

As for the non-optimization of the generation of sets.
The second version posted in the update to the question showed, that this is actually the case (not optimized by compiler) and it yielded a significant speed up. The final version (posted below in this answer) carries that approach even further and speeds up the path counting (and the solving of a boggle puzzle) even further.
Combined with parallel execution on multiple cores, the initially really slow (maybe 30s) version could be sped up to only around 100ms for the n=4 lmin=3 lmax=8 case.
For n=6 class of problems, the parallel and hand tuned implementation solves a puzzle in around 60ms on my machine. It makes sense, that this is faster than the path counting, as the word list probing (used a dictionary with around 80000 words) along with the dynamic programming approach pointed out by #GuyCoder renders the solution of the puzzle a less complex problem than the (brute force) path counting.
Lesson learned
The f# compiler does not seem to be all too mystical and magical if it comes to code optimizations. Hand tuning is worth the effort if performance is really required.
Turning a single threaded recursive search function into a parallel (concurrent) function was not really hard in this case.
The final version of the code
Compiled with:
fsc --optimize+ --tailcalls+ wordgames.fs
(Microsoft (R) F# Compiler version 14.0.23413.0)
let wordListPath = #"E:\temp\12dicts-6.0.2\International\3of6all.txt"
let acceptableWord (s : string) : bool =
let s' = s.Trim()
if s'.Length > 2
then
if System.Char.IsLower(s'.[0]) && System.Char.IsLetter(s'.[0]) then true
else false
else
false
let words =
System.IO.File.ReadAllLines(wordListPath)
|> Array.filter acceptableWord
let squareSet n =
let allSquares = [0..n*n-1] |> Set.ofList
let right = Set.difference allSquares (Set.ofList [n-1..n..n*n])
let left = Set.difference allSquares (Set.ofList [0..n..n*n-1])
let up = Set.difference allSquares (Set.ofList [0..n-1])
let down = Set.difference allSquares (Set.ofList [n*n-n..n*n-1])
let downRight = Set.intersect right down
let downLeft = Set.intersect left down
let upRight = Set.intersect right up
let upLeft = Set.intersect left up
[|right;left;up;down;upRight;upLeft;downRight;downLeft|]
let RIGHT,LEFT,UP,DOWN = 0,1,2,3
let UPRIGHT,UPLEFT,DOWNRIGHT,DOWNLEFT = 4,5,6,7
let squareSets =
[|Set.empty;Set.empty;Set.empty;Set.empty;Set.empty;Set.empty;Set.empty;Set.empty;|]
::
[ for i in 1..8 do
yield squareSet i
]
|> Array.ofList
let movesFromSquare n square =
let appendIfInSet se v res =
if Set.contains square se then v :: res else res
[]
|> appendIfInSet squareSets.[n].[RIGHT] (square + 1)
|> appendIfInSet squareSets.[n].[LEFT] (square - 1)
|> appendIfInSet squareSets.[n].[UP] (square - n)
|> appendIfInSet squareSets.[n].[DOWN] (square + n)
|> appendIfInSet squareSets.[n].[DOWNRIGHT] (square + n + 1)
|> appendIfInSet squareSets.[n].[DOWNLEFT] (square + n - 1)
|> appendIfInSet squareSets.[n].[UPRIGHT] (square - n + 1)
|> appendIfInSet squareSets.[n].[UPLEFT] (square - n - 1)
let lutMovesN n =
Array.init n (fun i -> if i > 0 then Array.init (n*n-1) (fun j -> movesFromSquare i j) else Array.empty)
let lutMoves =
lutMovesN 8
let moves n state square =
let appendIfInSet se v res =
if Set.contains square se then v :: res else res
lutMoves.[n].[square]
|> List.filter (fun s -> ((uint64 1 <<< s) &&& state) = 0UL)
let block state square =
state ||| (uint64 1 <<< square)
let countAllPaths n lmin lmax =
let mov = moves n
let rec count l state sq c =
let state' = block state sq
let m = mov state' sq
match l with
| x when x <= lmax && x >= lmin ->
List.fold (fun acc s -> count (l+1) state' s acc) (c+1) m
| x when x < lmin ->
List.fold (fun acc s -> count (l+1) state' s acc) (c) m
| _ ->
c
//List.fold (fun acc s -> count 0 (block 0UL s) s acc) 0 [0..n*n-1]
[|0..n*n-1|]
|> Array.Parallel.map (fun start -> count 0 (block 0UL start) start 0)
|> Array.sum
//printfn "%d " (words |> Array.distinct |> Array.length)
let usage() =
printfn "usage: wordgames.exe [--gen n count problemPath | --count n lmin lmax | --solve problemPath ]"
let rng = System.Random()
let genProblem n (sb : System.Text.StringBuilder) =
let a = Array.init (n*n) (fun _ -> char (rng.Next(26) + int 'a'))
sb.Append(a) |> ignore
sb.AppendLine()
let genProblems nproblems n (sb : System.Text.StringBuilder) : System.Text.StringBuilder =
for i in 1..nproblems do
genProblem n sb |> ignore
sb
let solve n (board : System.String) =
let ba = board.ToCharArray()
let testWord (w : string) : bool =
let testChar k sq = (ba.[sq] = w.[k])
let rec testSquare state k sq =
match k with
| 0 -> testChar 0 sq
| x ->
if testChar x sq
then
let state' = block state x
moves n state' x
|> List.exists (testSquare state' (x-1))
else
false
[0..n*n-1]
|> List.exists (testSquare 0UL (String.length w - 1))
words
|> Array.splitInto 32
|> Array.Parallel.map (Array.filter testWord)
|> Array.concat
[<EntryPoint>]
let main args =
printfn "%d: %A" (Array.length args) args
let nargs = Array.length args
let sw = System.Diagnostics.Stopwatch()
match nargs with
| x when x >= 2 ->
match args.[0] with
| "--gen" ->
if nargs = 4
then
let n = int args.[1]
let nproblems = int args.[2]
let outpath = args.[3]
let problems = genProblems nproblems n (System.Text.StringBuilder())
System.IO.File.WriteAllText (outpath,problems.ToString())
0
else
usage()
0
| "--count" ->
if nargs = 4
then
let n = int args.[1]
let lmin = int args.[2]
let lmax = int args.[3]
sw.Start()
let count = countAllPaths n lmin lmax
sw.Stop()
printfn "%d %d %d -> %d (took: %d)" n lmin lmax count (sw.ElapsedMilliseconds)
0
else
usage ()
0
| "--solve" ->
if nargs = 2
then
let problems = System.IO.File.ReadAllLines(args.[1])
problems
|> Array.iter
(fun (p : string) ->
let n = int (sqrt (float (String.length p)))
sw.Reset()
sw.Start()
let found = solve n p
sw.Stop()
printfn "%s\n%A\n%dms" p found (sw.ElapsedMilliseconds)
)
0
else
usage ()
0
| _ ->
usage ()
0
| _ ->
usage ()
0

Solving knapsack prob in F#: performance

I found an article:
Solving the 0-1 knapsack problem using continuation-passing style with memoization in F#
about knapsack problem implemented in F#. As I'm learning this language, I found this really interesting and tried to investigate this a bit. Here's the code I crafted:
open System
open System.IO
open System.Collections.Generic
let parseToTuple (line : string) =
let parsedLine = line.Split(' ') |> Array.filter(not << String.IsNullOrWhiteSpace) |> Array.map Int32.Parse
(parsedLine.[0], parsedLine.[1])
let memoize f =
let cache = Dictionary<_, _>()
fun x ->
if cache.ContainsKey(x)
then cache.[x]
else
let res = f x
cache.[x] <- res
res
type Item =
{
Value : int
Size : int
}
type ContinuationBuilder() =
member b.Bind(x, f) = fun k -> x (fun x -> f x k)
member b.Return x = fun k -> k x
member b.ReturnFrom x = x
let cont = ContinuationBuilder()
let set1 =
[
(4, 11)
(8, 4)
(10, 5)
(15, 8)
(4, 3)
]
let set2 =
[
(50, 341045); (1906, 4912); (41516, 99732); (23527, 56554); (559, 1818); (45136, 108372); (2625, 6750); (492, 1484)
(1086, 3072); (5516, 13532); (4875, 12050); (7570, 18440); (4436, 10972); (620, 1940); (50897, 122094); (2129, 5558)
(4265, 10630); (706, 2112); (2721, 6942); (16494, 39888); (29688, 71276); (3383, 8466); (2181, 5662); (96601, 231302)
(1795, 4690); (7512, 18324); (1242, 3384); (2889, 7278); (2133, 5566); (103, 706); (4446, 10992); (11326, 27552)
(3024, 7548); (217, 934); (13269, 32038); (281, 1062); (77174, 184848); (952, 2604); (15572, 37644); (566, 1832)
(4103, 10306); (313, 1126); (14393, 34886); (1313, 3526); (348, 1196); (419, 1338); (246, 992); (445, 1390)
(23552, 56804); (23552, 56804); (67, 634)
]
[<EntryPoint>]
let main args =
// prepare list of items from a file args.[0]
let header, items = set1
|> function
| h::t -> h, t
| _ -> raise (Exception("Wrong data format"))
let N, K = header
printfn "N = %d, K = %d" N K
let items = List.map (fun x -> {Value = fst x ; Size = snd x}) items |> Array.ofList
let rec combinations =
let innerSolver key =
cont
{
match key with
| (i, k) when i = 0 || k = 0 -> return 0
| (i, k) when items.[i-1].Size > k -> return! combinations (i-1, k)
| (i, k) -> let item = items.[i-1]
let! v1 = combinations (i-1, k)
let! beforeItem = combinations (i-1, k-item.Size)
let v2 = beforeItem + item.Value
return max v1 v2
}
memoize innerSolver
let res = combinations (N, K) id
printfn "%d" res
0
However, the problem with this implementation is that it's veeeery slow (in practice I'm unable to solve problem with 50 items and capacity of ~300000, which gets solved by my naive implementation in C# in less than 1s).
Could you tell me if I made a mistake somewhere? Or maybe the implementation is correct and this is simply the inefficient way of solving this problem.

When you naively apply a generic memoizer like this, and use continuation passing, the values in your memoization cache are continuations, not regular "final" results. Thus, when you get a cache hit, you aren't getting back a finalized result, you are getting back some function which promises to compute a result when you invoke it. This invocation might be expensive, might invoke various other continuations, might ultimately hit the memoization cache again itself, etc.
Effectively memoizing continuation-passing functions such that a) the caching works to full effect and b) the function remains tail-recursive is quite difficult. Read this discussion and come back when you fully understand it all. ;-)
The author of the blog post you linked is using a more sophisticated, less generic memoizer which is specially fitted to the problem. Admittedly, I don't fully grok it yet (code on the blog is incomplete/broken, so hard to try it out), but I think the gist of it is that it "forces" the chain of continuations before caching the final integer result.
To illustrate the point, here's a quick refactor of your code which is fully self-contained and traces out relevant info:
open System
open System.Collections.Generic
let mutable cacheHits = 0
let mutable cacheMisses = 0
let memoize f =
let cache = Dictionary<_, _>()
fun x ->
match cache.TryGetValue(x) with
| (true, v) ->
cacheHits <- cacheHits + 1
printfn "Hit for %A - Result is %A" x v
v
| _ ->
cacheMisses <- cacheMisses + 1
printfn "Miss for %A" x
let res = f x
cache.[x] <- res
res
type Item = { Value : int; Size : int }
type ContinuationBuilder() =
member b.Bind(x, f) = fun k -> x (fun x -> f x k)
member b.Return x = fun k -> k x
member b.ReturnFrom x = x
let cont = ContinuationBuilder()
let genItems n =
[| for i = 1 to n do
let size = i % 5
let value = (size * i)
yield { Value = value; Size = size }
|]
let N, K = (5, 100)
printfn "N = %d, K = %d" N K
let items = genItems N
let rec combinations_cont =
memoize (
fun key ->
cont {
match key with
| (0, _) | (_, 0) -> return 0
| (i, k) when items.[i-1].Size > k -> return! combinations_cont (i - 1, k)
| (i, k) -> let item = items.[i-1]
let! v1 = combinations_cont (i-1, k)
let! beforeItem = combinations_cont (i-1, k - item.Size)
let v2 = beforeItem + item.Value
return max v1 v2
}
)
let res = combinations_cont (N, K) id
printfn "Answer: %d" res
printfn "Memo hits: %d" cacheHits
printfn "Memo misses: %d" cacheMisses
printfn ""
let rec combinations_plain =
memoize (
fun key ->
match key with
| (i, k) when i = 0 || k = 0 -> 0
| (i, k) when items.[i-1].Size > k -> combinations_plain (i-1, k)
| (i, k) -> let item = items.[i-1]
let v1 = combinations_plain (i-1, k)
let beforeItem = combinations_plain (i-1, k-item.Size)
let v2 = beforeItem + item.Value
max v1 v2
)
cacheHits <- 0
cacheMisses <- 0
let res2 = combinations_plain (N, K)
printfn "Answer: %d" res2
printfn "Memo hits: %d" cacheHits
printfn "Memo misses: %d" cacheMisses
As you can see, the CPS version is caching continuations (not integers), and there are is a lot of extra activity going on toward the end as the continuations are invoked.
If you boost the problem size to let (N, K) = (20, 100) (and remove the printfn statements in the memoizer), you will see that the CPS version ends up doing over 1 million cache lookups, compared to plain version doing only a few hundred.

From running this code in FSI:
open System
open System.Diagnostics
open System.Collections.Generic
let time f =
System.GC.Collect()
let sw = Stopwatch.StartNew()
let r = f()
sw.Stop()
printfn "Took: %f" sw.Elapsed.TotalMilliseconds
r
let mutable cacheHits = 0
let mutable cacheMisses = 0
let memoize f =
let cache = Dictionary<_, _>()
fun x ->
match cache.TryGetValue(x) with
| (true, v) ->
cacheHits <- cacheHits + 1
//printfn "Hit for %A - Result is %A" x v
v
| _ ->
cacheMisses <- cacheMisses + 1
//printfn "Miss for %A" x
let res = f x
cache.[x] <- res
res
type Item = { Value : int; Size : int }
type ContinuationBuilder() =
member b.Bind(x, f) = fun k -> x (fun x -> f x k)
member b.Return x = fun k -> k x
member b.ReturnFrom x = x
let cont = ContinuationBuilder()
let genItems n =
[| for i = 1 to n do
let size = i % 5
let value = (size * i)
yield { Value = value; Size = size }
|]
let N, K = (80, 400)
printfn "N = %d, K = %d" N K
let items = genItems N
//let rec combinations_cont =
// memoize (
// fun key ->
// cont {
// match key with
// | (0, _) | (_, 0) -> return 0
// | (i, k) when items.[i-1].Size > k -> return! combinations_cont (i - 1, k)
// | (i, k) -> let item = items.[i-1]
// let! v1 = combinations_cont (i-1, k)
// let! beforeItem = combinations_cont (i-1, k - item.Size)
// let v2 = beforeItem + item.Value
// return max v1 v2
// }
// )
//
//
//cacheHits <- 0
//cacheMisses <- 0
//let res = time(fun () -> combinations_cont (N, K) id)
//printfn "Answer: %d" res
//printfn "Memo hits: %d" cacheHits
//printfn "Memo misses: %d" cacheMisses
//printfn ""
let rec combinations_plain =
memoize (
fun key ->
match key with
| (i, k) when i = 0 || k = 0 -> 0
| (i, k) when items.[i-1].Size > k -> combinations_plain (i-1, k)
| (i, k) -> let item = items.[i-1]
let v1 = combinations_plain (i-1, k)
let beforeItem = combinations_plain (i-1, k-item.Size)
let v2 = beforeItem + item.Value
max v1 v2
)
cacheHits <- 0
cacheMisses <- 0
printfn "combinations_plain"
let res2 = time (fun () -> combinations_plain (N, K))
printfn "Answer: %d" res2
printfn "Memo hits: %d" cacheHits
printfn "Memo misses: %d" cacheMisses
printfn ""
let recursivelyMemoize f =
let cache = Dictionary<_, _>()
let rec memoizeAux x =
match cache.TryGetValue(x) with
| (true, v) ->
cacheHits <- cacheHits + 1
//printfn "Hit for %A - Result is %A" x v
v
| _ ->
cacheMisses <- cacheMisses + 1
//printfn "Miss for %A" x
let res = f memoizeAux x
cache.[x] <- res
res
memoizeAux
let combinations_plain2 =
let combinations_plain2Aux combinations_plain2Aux key =
match key with
| (i, k) when i = 0 || k = 0 -> 0
| (i, k) when items.[i-1].Size > k -> combinations_plain2Aux (i-1, k)
| (i, k) -> let item = items.[i-1]
let v1 = combinations_plain2Aux (i-1, k)
let beforeItem = combinations_plain2Aux (i-1, k-item.Size)
let v2 = beforeItem + item.Value
max v1 v2
let memoized = recursivelyMemoize combinations_plain2Aux
fun x -> memoized x
cacheHits <- 0
cacheMisses <- 0
printfn "combinations_plain2"
let res3 = time (fun () -> combinations_plain2 (N, K))
printfn "Answer: %d" res3
printfn "Memo hits: %d" cacheHits
printfn "Memo misses: %d" cacheMisses
printfn ""
let recursivelyMemoizeCont f =
let cache = Dictionary HashIdentity.Structural
let rec memoizeAux x k =
match cache.TryGetValue(x) with
| (true, v) ->
cacheHits <- cacheHits + 1
//printfn "Hit for %A - Result is %A" x v
k v
| _ ->
cacheMisses <- cacheMisses + 1
//printfn "Miss for %A" x
f memoizeAux x (fun y ->
cache.[x] <- y
k y)
memoizeAux
let combinations_cont2 =
let combinations_cont2Aux combinations_cont2Aux key =
cont {
match key with
| (0, _) | (_, 0) -> return 0
| (i, k) when items.[i-1].Size > k -> return! combinations_cont2Aux (i - 1, k)
| (i, k) -> let item = items.[i-1]
let! v1 = combinations_cont2Aux (i-1, k)
let! beforeItem = combinations_cont2Aux (i-1, k - item.Size)
let v2 = beforeItem + item.Value
return max v1 v2
}
let memoized = recursivelyMemoizeCont combinations_cont2Aux
fun x -> memoized x id
cacheHits <- 0
cacheMisses <- 0
printfn "combinations_cont2"
let res4 = time (fun () -> combinations_cont2 (N, K))
printfn "Answer: %d" res4
printfn "Memo hits: %d" cacheHits
printfn "Memo misses: %d" cacheMisses
printfn ""
I get these results:
N = 80, K = 400
combinations_plain
Took: 7.191000
Answer: 6480
Memo hits: 6231
Memo misses: 6552
combinations_plain2
Took: 6.310800
Answer: 6480
Memo hits: 6231
Memo misses: 6552
combinations_cont2
Took: 17.021200
Answer: 6480
Memo hits: 6231
Memo misses: 6552
combinations_plain is from latkin's answer.
combinations_plain2 exposes the recursive memoization step explicitly.
combinations_cont2 adapts the recursive memoization function into one that memoizes the continuation results.
combinations_cont2 works by intercepting the result in the continuation before passing it on to the actual continuation. Subsequent calls on the same key provide a continuation and this continuation is fed the answer we intercepted originally.
This demonstrates that we are able to:
Memoize using continuation passing style.
Achieve similar (ish) performance characteristics to the vanilla memoized version.
I hope this clears things up a little. Sorry, my blog code snippet was incomplete (I think I might have lost it when reformatting recently).

Filter an array or list by consecutive pairs based on a matching rule

This is probably trivial, and I do have a solution but I'm not happy with it. Somehow, (much) simpler forms don't seem to work and it gets messy around the corner cases (either first, or last matching pairs in a row).
To keep it simple, let's define the matching rule as any two or more numbers that have a difference of two. Example:
> filterTwins [1; 2; 4; 6; 8; 10; 15; 17]
val it : int list = [2; 4; 6; 8; 10; 15; 17]
The code I currently use is this, which just feels sloppy and overweight:
let filterTwins list =
let func item acc =
let prevItem, resultList = acc
match prevItem, resultList with
| 0, []
-> item, []
| var, [] when var - 2 = item
-> item, item::var::resultList
| var, hd::tl when var - 2 = item && hd <> var
-> item, item::var::resultList
| var, _ when var - 2 = item
-> item, item::resultList
| _
-> item, resultList
List.foldBack func list (0, [])
|> snd
I intended my own original exercise to experiment with List.foldBack, large lists and parallel programming (which went well) but ended up messing with the "easy" part...
Guide through the answers
Daniel's last, 113 characters*, easy to follow, slow
Kvb's 2nd, 106 characters* (if I include the function), easy, but return value requires work
Stephen's 2nd, 397 characters*, long winded and comparably complex, but fastest
Abel's, 155 characters*, based on Daniel's, allows duplicates (this wasn't a necessity, btw) and is relatively fast.
There were more answers, but the above were the most distinct, I believe. Hope I didn't hurt anybody's feelings by accepting Daniel's answer as solution: each and every one solution deserves to be the selected answer(!).
* counting done with function names as one character

Would this do what you want?
let filterTwins l =
let rec filter l acc flag =
match l with
| [] -> List.rev acc
| a :: b :: rest when b - 2 = a ->
filter (b::rest) (if flag then b::acc else b::a::acc) true
| _ :: t -> filter t acc false
filter l [] false
This is terribly inefficient, but here's another approach using more built-in functions:
let filterTwinsSimple l =
l
|> Seq.pairwise
|> Seq.filter (fun (a, b) -> b - 2 = a)
|> Seq.collect (fun (a, b) -> [a; b])
|> Seq.distinct
|> Seq.toList
Maybe slightly better:
let filterTwinsSimple l =
seq {
for (a, b) in Seq.pairwise l do
if b - 2 = a then
yield a
yield b
}
|> Seq.distinct
|> Seq.toList

How about this?
let filterPairs f =
let rec filter keepHead = function
| x::(y::_ as xs) when f x y -> x::(filter true xs)
| x::xs ->
let rest = filter false xs
if keepHead then x::rest else rest
| _ -> []
filter false
let test = filterPairs (fun x y -> y - x = 2) [1; 2; 4; 6; 8; 10; 15; 17]
Or if all of your list's items are unique, you could do this:
let rec filterPairs f s =
s
|> Seq.windowed 2
|> Seq.filter (fun [|a;b|] -> f a b)
|> Seq.concat
|> Seq.distinct
let test = filterPairs (fun x y -> y - x = 2) [1; 2; 4; 6; 8; 10; 15; 17]
EDIT
Or here's another alternative which I find elegant. First define a function for breaking a list into a list of groups of consecutive items satisfying a predicate:
let rec groupConsec f = function
| [] -> []
| x::(y::_ as xs) when f x y ->
let (gp::gps) = groupConsec f xs
(x::gp)::gps
| x::xs -> [x]::(groupConsec f xs)
Then, build your function by collecting all results back together, discarding any singletons:
let filterPairs f =
groupConsec f
>> List.collect (function | [_] -> [] | l -> l)
let test = filterPairs (fun x y -> y - x = 2) [1; 2; 4; 6; 8; 10; 15; 17]

The following solution is in the spirit of your own, but I use a discriminate union to encapsulate aspects of the algorithm and reign in the madness a bit:
type status =
| Keep of int
| Skip of int
| Tail
let filterTwins xl =
(Tail, [])
|> List.foldBack
(fun cur (prev, acc) ->
match prev with
| Skip(prev) when prev - cur = 2 -> (Keep(cur), cur::prev::acc)
| Keep(prev) when prev - cur = 2 -> (Keep(cur), cur::acc)
| _ -> (Skip(cur), acc))
xl
|> snd

Here's another solution which uses a similar discriminate union strategy as my other answer but it works on sequences lazily so you can watch those twin (primes?) roll in as they come:
type status =
| KeepTwo of int * int
| KeepOne of int
| SkipOne of int
| Head
let filterTwins xl =
let xl' =
Seq.scan
(fun prev cur ->
match prev with
| KeepTwo(_,prev) | KeepOne prev when cur - prev = 2 ->
KeepOne cur
| SkipOne prev when cur - prev = 2 ->
KeepTwo(prev,cur)
| _ ->
SkipOne cur)
Head
xl
seq {
for x in xl' do
match x with
| KeepTwo(a,b) -> yield a; yield b
| KeepOne b -> yield b
| _ -> ()
}

for completeness sake, I'll answer this with what I eventually came up with, based on the friendly suggestions in this thread.
The benefits of this approach are that it doesn't need Seq.distinct, which I believe is an improvement as it allows for duplicates. However, it still needs List.rev which doesn't make it the fastest. Nor is it the most succinct code (see comparison of solution in question itself).
let filterTwins l =
l
|> Seq.pairwise
|> Seq.fold (fun a (x, y) ->
if y - x = 2 then (if List.head a = x then y::a else y::x::a)
else a) [0]
|> List.rev
|> List.tail

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

Performance of F# code terrible - performance

Related

Implementing LLL algorithm in Haskell

F# - Algorithm and strings

Boggle - count all possible paths on a N*N grid. Performance

Solving knapsack prob in F#: performance

Filter an array or list by consecutive pairs based on a matching rule

Categories

Resources