Erlang binary to lower case performance

Erlang binary to lower case performance - performance

My goal is to speed up performance of ASCII-only binary converting to lower case. I do not need any languages other than English. I've wrote and compare some variants:
Binary comprehension:
binary_comprehension(Binary) ->
<< <<if
C >= $A andalso C =< $Z -> C - $A + $a;
true -> C
end >>
|| <<C>> <= Binary >>.
List comprehension:
list_comprehension(Binary) ->
L = binary_to_list(Binary),
Lower =
[if
C >= $A andalso C =< $Z -> C - $A + $a;
true -> C
end || C <- L],
list_to_binary(Lower).
And regular string:lowercase.
And surprisingly list comprehension beat all others:
1> timer:tc(fun() -> lists:foreach(fun(_) -> tolower:list_comprehension(<<"QWEQWEIQEKQHWKEHKQWHEKQHWKEQWEKHQWLKL">>) end, L100000) end).
{267603,ok}
2> timer:tc(fun() -> lists:foreach(fun(_) -> tolower:binary_comprehension(<<"QWEQWEIQEKQHWKEHKQWHEKQHWKEQWEKHQWLKL">>) end, L100000) end).
{324383,ok}
3> timer:tc(fun() -> lists:foreach(fun(_) -> string:lowercase(<<"QWEQWEIQEKQHWKEHKQWHEKQHWKEQWEKHQWLKL">>) end, L100000) end).
{319819,ok}
Any ideas why double list conversion + comprehension is much faster than just binary transformation?
Maybe you know more powerful optimisation?
Update:
I also found that list-of-char version of string is also fast:
string_lowercase(Binary) ->
L = binary_to_list(Binary),
Lower = string:lowercase(L),
list_to_binary(Lower).
Run:
39> timer:tc(fun() -> lists:foreach(fun(_) -> tolower:string_to_lower(<<"QWEQWEIQEKQHWKEHKQWHEKQHWKEQWEKHQWLKL">>) end, L100000) end).
{277766,ok}

I made some modification of the code and changed test case. Test changes is not mandatory but I personally like more this way:
-module(tolower).
-compile(export_all).
u2l(C) when C >= $A andalso C =< $Z -> C + 32;
u2l(C) -> C.
binary_comprehension(Binary) ->
<< << (u2l(C)) >> || <<C>> <= Binary >>.
list_comprehension(Binary) ->
list_to_binary([u2l(C) || C <- binary_to_list(Binary)]).
list_recur(Binary) -> list_recur(binary_to_list(Binary), []).
list_recur([], Result) -> lists:reverse(Result);
list_recur([C | Tail], Result) when C >= $A andalso C =< $Z ->
list_recur(Tail, [(C + 32) | Result]);
list_recur([C | Tail], Result) ->
list_recur(Tail, [C | Result]).
string_to_lower(Binary) ->
list_to_binary(string:lowercase(binary_to_list(Binary))).
test() ->
L100000 = lists:seq(1, 100000),
TL0 = <<"QWEQWEIQEKQHWKEHKQWHEKQHWKEQWEKHQWLKL">>,
TL = binary:copy(TL0, 100000),
{R0, _} = timer:tc(fun() -> lists:foreach(fun(_) -> tolower:binary_comprehension(TL0) end, L100000) end),
{R1, _} = timer:tc(tolower, binary_comprehension, [TL]),
{R2, _} = timer:tc(tolower, list_comprehension, [TL]),
{R3, _} = timer:tc(tolower, list_recur, [TL]),
{R4, _} = timer:tc(string, lowercase, [TL]),
{R5, _} = timer:tc(tolower, string_to_lower, [TL]),
io:format("~n1.binary_comprehension = ~10w~n2.binary_comprehension = ~10w~n3. list_comprehension = ~10w~n4. list_recur = ~10w~n5. lowercase = ~10w~n6. string_to_lower = ~10w~n",
[R0,R1,R2,R3,R4,R5]).
Erlang shell shows that elapsed time not consistent due to concurrent nature of system. But the best time is for binary_comprehension as expected.
62> c(tolower).
tolower.erl:2: Warning: export_all flag enabled - all functions will be exported
{ok,tolower}
63> l(tolower).
{module,tolower}
64> tolower:test().
1.binary_comprehension = 109000
2.binary_comprehension = 94000
3. list_comprehension = 312001
4. list_recur = 344001
5. lowercase = 469002
6. string_to_lower = 218000
ok
65> tolower:test().
1.binary_comprehension = 140998
2.binary_comprehension = 93999
3. list_comprehension = 327994
4. list_recur = 296996
5. lowercase = 155997
6. string_to_lower = 280996
ok
66> tolower:test().
1.binary_comprehension = 124998
2.binary_comprehension = 93998
3. list_comprehension = 327995
4. list_recur = 296995
5. lowercase = 452993
6. string_to_lower = 202997
ok
67> tolower:test().
1.binary_comprehension = 125000
2.binary_comprehension = 94000
3. list_comprehension = 312000
4. list_recur = 282000
5. lowercase = 171000
6. string_to_lower = 266000
ok
Time on line 5 is different of time on line 6 because when you invoke string:lowercase/1 with binary argument it processed as utf8 sequence. When you invoke string:lowercase/1 with string argument utf8 processing is avoided. See code of string.erl from OTP for details.

Related

Implementing LLL algorithm in Haskell

I'm implementing the LLL basis reduction algorithm in Haskell. I'm basing my code on the pseudocode on Wikipedia. Here is what I have so far. Apologies for the code dump; I strongly suspect the issue lies in lll but I'm giving everything just in case.
import Linear as L
f v x = v `L.dot` x
gram_schmidt b =
let aux vs us =
case vs of
v:t -> let vus = map (\u -> project u v) us
s = foldr (^+^) zero vus
u = v ^-^ s in
aux t (us++[u])
[] -> us
in aux b []
swap :: Int -> Int -> [a] -> [a]
swap i j xs =
let elemI = xs !! i
elemJ = xs !! j
left = take i xs
middle = take (j - i - 1) (drop (i + 1) xs)
right = drop (j + 1) xs
in left ++ [elemJ] ++ middle ++ [elemI] ++ right
update i xs new =
let left = take (i-1) xs
right = drop (i) xs
in left ++ [new] ++ right
sort_vecs vs = map snd (sort (zip (map norm vs) vs))
lll :: Int -> [[Double]] -> Double -> [[Double]]
lll d b delta =
let b' = gram_schmidt b
aux :: [[Double]] -> [[Double]] -> Int -> [[Double]]
aux b b' k =
if k >= d then
b
else
let aux2 :: [[Double]] -> [[Double]] -> Int -> [[Double]]
aux2 b b' j =
if j < 0 then
let mu = (f (b!!k) (b'!!(k-1))) / (f (b'!!(k-1)) (b'!!(k-1))) in
if f (b'!!k) (b'!!k) >= (delta-mu^2) * f (b'!!(k-1)) (b'!!(k-1)) then
aux b b' (k+1)
else
let bb = swap k (k-1) b
bb' = gram_schmidt bb in
aux bb bb' (max (k-1) 1)
else
let mu = (f (b!!k) (b'!!j)) / (f (b'!!j) (b'!!j)) in
if abs mu > 0.5 then
let bk = b!!k
bj = b!!j
bb = update k b (bk ^-^ (fromIntegral (round mu)) *^ bj)
bb' = gram_schmidt bb in
aux2 bb bb' (j-1)
else
aux2 b b' (j-1)
in aux2 b b' (k-1)
in sort_vecs (aux b b' 1)
My issue is that it seems to find a basis of a sublattice. In particular, lll d [[-0.8526334764831849,-3.125000000000004e-2],[-1.2941941738241598,4.419417382415916e-2]] 0.75 returns [[0.41107277914220997,0.10669417382415924],[-1.2941941738241598,4.419417382415916e-2]], a basis for a index-2 sublattice, and with basis which are almost-parallel. I've been staring at this code for ages to no avail (I thought there was an issue with update where (i-1) should be (i) and (i) should be (i+1) but this caused an infinite loop). Any help is greatly appreciated.

F# - Algorithm and strings

Let's says I have a string of a length N that contains only 0 or 1. I want to split that string in multiples strings and each string should contains only one digit.
Example:
00011010111
Should be split into:
000
11
0
1
0
111
The only solution I can think of if using a for loop with a string builder (Written in pseudo code below, more c# like sorry):
result = new list<string>
tmpChar = ""
tmpString = ""
for each character c in MyString
if tmpchar != c
if tmpsString != ""
result.add tmpString
tmpString = ""
endIf
tmpchar = c
endIf
tmpString += tmpChar
endFor
Do you have any other solution and maybe a clever solution that use a more functional approach?

I think Seq.scan would be a good fit for this, this is a very procedural problem in nature, preserving the order like that. But here is code that I believe does what you are asking.
"00011010111"
|> Seq.scan (fun (s, i) x ->
match s with
| Some p when p = x -> Some x, i
| _ -> Some x, i + 1 ) (None, 0)
|> Seq.countBy id
|> Seq.choose (function
| (Some t, _), n -> Some(t, n)
| _ -> None )
|> Seq.toList

Perhaps something along the lines of:
let result =
let rec groupWhileSame xs result =
match xs with
| a when a |> Seq.isEmpty -> result
| _ ->
let head = xs |> Seq.head
let str = xs |> Seq.takeWhile ((=) head)
let rest = xs |> Seq.skipWhile ((=) head)
groupWhileSame rest (Seq.append result [str])
groupWhileSame (myStr) []

Seq.fold (fun (acc:(string list)) x ->
match acc with
| y::rst when y.StartsWith(string x) -> (string x) + y::rst
| _ -> (string x)::acc)
[]
"00011010111"

Consider this function (which is generic):
let chunk s =
if Seq.isEmpty s then []
else
let rec chunk items chunks =
if Seq.isEmpty items then chunks
else
let chunks' =
match chunks with
| [] -> [(Seq.head items, 1)]
| x::xs ->
let c,n = x in let c' = Seq.head items in
if c = c' then (c, n + 1) :: xs else (c', 1) :: x :: xs
chunk (Seq.tail items) chunks'
chunk s [] |> List.rev
It returns a list of tuples, where each tuple represents an item and its repetitions.
So
"00011010111" |> Seq.toList |> chunk
actually returns
[('0', 3); ('1', 2); ('0', 1); ('1', 1); ('0', 1); ('1', 3)]
Basically, we're doing run length encoding (which is admittedly a bit wasteful in the case of your example string).
To get the list of strings that you want, we use code like following:
"00011010111"
|> Seq.toList
|> chunk
|> List.map (fun x -> let c,n = x in new string(c, n))

Here's a working version of OP's proposal with light syntax:
let chunk (s: string) =
let result = System.Collections.Generic.List<string>()
let mutable tmpChar = ""
let mutable tmpString = ""
for c in s do
if tmpChar <> string c then
if tmpString <> "" then
result.Add tmpString
tmpString <- ""
tmpChar <- string c
tmpString <- tmpString + tmpChar
result.Add tmpString
result
No attempt was made to follow a functional style.

Knuth-Morris-Pratt implementation in Haskell -- Index out of bounds

I've used the pseudocode from Wikipedia in an attempt to write a KMP algorithm in Haskell.
It's giving "index out of bounds" when I try to search beyond the length of the pattern and I can't seem to find the issue; my "fixes" have only ruined the result.
import Control.Monad
import Control.Lens
import qualified Data.ByteString.Char8 as C
import qualified Data.Vector.Unboxed as V
(!) :: C.ByteString -> Int -> Char
(!) = C.index
-- Make the table for the KMP. Directly from Wikipedia. Works as expected for inputs from Wikipedia article.
mkTable :: C.ByteString -> V.Vector Int
mkTable pat = make 2 0 (ix 0 .~ (negate 1) $ V.replicate l 0)
where
l = C.length pat
make :: Int -> Int -> V.Vector Int -> V.Vector Int
make p c t
| p >= l = t
| otherwise = proc
where
proc | pat ! (p-1) == pat ! c
= make (p+1) (c+1) (ix p .~ (c+1) $ t)
| c > 0 = make p (t V.! c) t
| otherwise = make (p+1) c (ix p .~ 0 $ t)
kmp :: C.ByteString -> C.ByteString -> V.Vector Int -> Int
kmp text pat tbl = search 0 0
where
l = C.length text
search m i
| m + i >= l = l
| otherwise = cond
where
-- The conditions for the loop, given in the wiki article
cond | pat ! i == text ! (m+i)
= if i == C.length pat - 1
then m
else search m (i+1)
| tbl V.! i > (-1)
= search (m + i - (tbl V.! i)) (tbl V.! i)
| otherwise
= search 0 (m+1)
main :: IO()
main = do
t <- readLn
replicateM_ t $ do
text <- C.getLine
pat <- C.getLine
putStrLn $ kmp text pat (mkTable pat)

Simple solution: I mixed up m and i in the last condition of kmp.
| otherwise = search 0 (m+1)
Becomes
| otherwise = search (m+1) 0
And the issue is resolved.
Aside from that, it's necessary to use unboxed arrays in the ST monad or the table generation takes an absurd amount of time.

Why is the "better" digit-listing function slower?

I was playing around with Project Euler #34, and I wrote these functions:
import Data.Time.Clock.POSIX
import Data.Char
digits :: (Integral a) => a -> [Int]
digits x
| x < 10 = [fromIntegral x]
| otherwise = let (q, r) = x `quotRem` 10 in (fromIntegral r) : (digits q)
digitsByShow :: (Integral a, Show a) => a -> [Int]
digitsByShow = map (\x -> ord x - ord '0') . show
I thought that for sure digits has to be the faster one, as we don't convert to a String. I could not have been more wrong. I ran the two versions via pe034:
pe034 digitFunc = sum $ filter sumFactDigit [3..2540160]
where
sumFactDigit :: Int -> Bool
sumFactDigit n = n == (sum $ map sFact $ digitFunc n)
sFact :: Int -> Int
sFact n
| n == 0 = 1
| n == 1 = 1
| n == 2 = 2
| n == 3 = 6
| n == 4 = 24
| n == 5 = 120
| n == 6 = 720
| n == 7 = 5040
| n == 8 = 40320
| n == 9 = 362880
main = do
begin <- getPOSIXTime
print $ pe034 digitsByShow -- or digits
end <- getPOSIXTime
print $ end - begin
After compiling with ghc -O, digits consistently takes .5 seconds, while digitsByShow consistently takes .3 seconds. Why is this so? Why is the function which stays within Integer arithmetic slower, whereas the function which goes into string comparison is faster?
I ask this because I come from programming in Java and similar languages, where the % 10 trick of generating digits is way faster than the "convert to String" method. I haven't been able to wrap my head around the fact that converting to a string could be faster.

This is the best I can come up with.
digitsV2 :: (Integral a) => a -> [Int]
digitsV2 n = go n []
where
go x xs
| x < 10 = fromIntegral x : xs
| otherwise = case quotRem x 10 of
(q,r) -> go q (fromIntegral r : xs)
when compiled with -O2 and tested with Criterion
digits runs in 470.4 ms
digitsByShow runs in 421.8 ms
digitsV2 runs in 258.0 ms
results may vary
edit:
I am not sure why building the list like this helps so much.
But you can improve your codes speed by strictly evaluating quotRem x 10
You can do this with BangPatterns
| otherwise = let !(q, r) = x `quotRem` 10 in (fromIntegral r) : (digits q)
or with case
| otherwise = case quotRem x 10 of
(q,r) -> fromIntegral r : digits q
Doing this drops digits down to 323.5 ms
edit: time without using Criterion
digits = 464.3 ms
digitsStrict = 328.2 ms
digitsByShow = 259.2 ms
digitV2 = 252.5 ms
note: The criterion package measures software performance.

Let's investigate why #No_signal's solution is faster.
I made three runs of ghc:
ghc -O2 -ddump-simpl digits.hs >digits.txt
ghc -O2 -ddump-simpl digitsV2.hs >digitsV2.txt
ghc -O2 -ddump-simpl show.hs >show.txt
digits.hs
digits :: (Integral a) => a -> [Int]
digits x
| x < 10 = [fromIntegral x]
| otherwise = let (q, r) = x `quotRem` 10 in (fromIntegral r) : (digits q)
main = return $ digits 1
digitsV2.hs
digitsV2 :: (Integral a) => a -> [Int]
digitsV2 n = go n []
where
go x xs
| x < 10 = fromIntegral x : xs
| otherwise = let (q, r) = x `quotRem` 10 in go q (fromIntegral r : xs)
main = return $ digits 1
show.hs
import Data.Char
digitsByShow :: (Integral a, Show a) => a -> [Int]
digitsByShow = map (\x -> ord x - ord '0') . show
main = return $ digitsByShow 1
If you'd like to view the complete txt files, I placed them on ideone (rather than paste a 10000 char dump here):
digits.txt
digitsV2.txt
show.txt
If we carefully look through digits.txt, it appears that this is the relevant section:
lvl_r1qU = __integer 10
Rec {
Main.$w$sdigits [InlPrag=[0], Occ=LoopBreaker]
:: Integer -> (# Int, [Int] #)
[GblId, Arity=1, Str=DmdType <S,U>]
Main.$w$sdigits =
\ (w_s1pI :: Integer) ->
case integer-gmp-1.0.0.0:GHC.Integer.Type.ltInteger#
w_s1pI lvl_r1qU
of wild_a17q { __DEFAULT ->
case GHC.Prim.tagToEnum# # Bool wild_a17q of _ [Occ=Dead] {
False ->
let {
ds_s16Q [Dmd=<L,U(U,U)>] :: (Integer, Integer)
[LclId, Str=DmdType]
ds_s16Q =
case integer-gmp-1.0.0.0:GHC.Integer.Type.quotRemInteger
w_s1pI lvl_r1qU
of _ [Occ=Dead] { (# ipv_a17D, ipv1_a17E #) ->
(ipv_a17D, ipv1_a17E)
} } in
(# case ds_s16Q of _ [Occ=Dead] { (q_a11V, r_X12h) ->
case integer-gmp-1.0.0.0:GHC.Integer.Type.integerToInt r_X12h
of wild3_a17c { __DEFAULT ->
GHC.Types.I# wild3_a17c
}
},
case ds_s16Q of _ [Occ=Dead] { (q_X12h, r_X129) ->
case Main.$w$sdigits q_X12h
of _ [Occ=Dead] { (# ww1_s1pO, ww2_s1pP #) ->
GHC.Types.: # Int ww1_s1pO ww2_s1pP
}
} #);
True ->
(# GHC.Num.$fNumInt_$cfromInteger w_s1pI, GHC.Types.[] # Int #)
}
}
end Rec }
digitsV2.txt:
lvl_r1xl = __integer 10
Rec {
Main.$wgo [InlPrag=[0], Occ=LoopBreaker]
:: Integer -> [Int] -> (# Int, [Int] #)
[GblId, Arity=2, Str=DmdType <S,U><L,U>]
Main.$wgo =
\ (w_s1wh :: Integer) (w1_s1wi :: [Int]) ->
case integer-gmp-1.0.0.0:GHC.Integer.Type.ltInteger#
w_s1wh lvl_r1xl
of wild_a1dp { __DEFAULT ->
case GHC.Prim.tagToEnum# # Bool wild_a1dp of _ [Occ=Dead] {
False ->
case integer-gmp-1.0.0.0:GHC.Integer.Type.quotRemInteger
w_s1wh lvl_r1xl
of _ [Occ=Dead] { (# ipv_a1dB, ipv1_a1dC #) ->
Main.$wgo
ipv_a1dB
(GHC.Types.:
# Int
(case integer-gmp-1.0.0.0:GHC.Integer.Type.integerToInt ipv1_a1dC
of wild2_a1ea { __DEFAULT ->
GHC.Types.I# wild2_a1ea
})
w1_s1wi)
};
True -> (# GHC.Num.$fNumInt_$cfromInteger w_s1wh, w1_s1wi #)
}
}
end Rec }
I actually couldn't find the relevant section for show.txt. I'll work on that later.
Right off the bat, digitsV2.hs produces shorter code. That's probably a good sign for it.
digits.hs seems to be following this psuedocode:
def digits(w_s1pI):
if w_s1pI < 10: return [fromInteger(w_s1pI)]
else:
ds_s16Q = quotRem(w_s1pI, 10)
q_X12h = ds_s16Q[0]
r_X12h = ds_s16Q[1]
wild3_a17c = integerToInt(r_X12h)
ww1_s1pO = r_X12h
ww2_s1pP = digits(q_X12h)
ww2_s1pP.pushFront(ww1_s1pO)
return ww2_s1pP
digitsV2.hs seems to be following this psuedocode:
def digitsV2(w_s1wh, w1_s1wi=[]): # actually disguised as go(), as #No_signal wrote
if w_s1wh < 10:
w1_s1wi.pushFront(fromInteger(w_s1wh))
return w1_s1wi
else:
ipv_a1dB, ipv1_a1dC = quotRem(w_s1wh, 10)
w1_s1wi.pushFront(integerToIn(ipv1a1dC))
return digitsV2(ipv1_a1dC, w1_s1wi)
It might not be that these functions mutate lists like my psuedocode suggests, but this immediately suggests something: it looks as if digitsV2 is fully tail-recursive, whereas digits is actually not (may have to use some Haskell trampoline or something). It appears as if Haskell needs to store all the remainders in digits before pushing them all to the front of the list, whereas it can just push them and forget about them in digitsV2. This is purely speculation, but it is well-founded speculation.

Filter an array or list by consecutive pairs based on a matching rule

This is probably trivial, and I do have a solution but I'm not happy with it. Somehow, (much) simpler forms don't seem to work and it gets messy around the corner cases (either first, or last matching pairs in a row).
To keep it simple, let's define the matching rule as any two or more numbers that have a difference of two. Example:
> filterTwins [1; 2; 4; 6; 8; 10; 15; 17]
val it : int list = [2; 4; 6; 8; 10; 15; 17]
The code I currently use is this, which just feels sloppy and overweight:
let filterTwins list =
let func item acc =
let prevItem, resultList = acc
match prevItem, resultList with
| 0, []
-> item, []
| var, [] when var - 2 = item
-> item, item::var::resultList
| var, hd::tl when var - 2 = item && hd <> var
-> item, item::var::resultList
| var, _ when var - 2 = item
-> item, item::resultList
| _
-> item, resultList
List.foldBack func list (0, [])
|> snd
I intended my own original exercise to experiment with List.foldBack, large lists and parallel programming (which went well) but ended up messing with the "easy" part...
Guide through the answers
Daniel's last, 113 characters*, easy to follow, slow
Kvb's 2nd, 106 characters* (if I include the function), easy, but return value requires work
Stephen's 2nd, 397 characters*, long winded and comparably complex, but fastest
Abel's, 155 characters*, based on Daniel's, allows duplicates (this wasn't a necessity, btw) and is relatively fast.
There were more answers, but the above were the most distinct, I believe. Hope I didn't hurt anybody's feelings by accepting Daniel's answer as solution: each and every one solution deserves to be the selected answer(!).
* counting done with function names as one character

Would this do what you want?
let filterTwins l =
let rec filter l acc flag =
match l with
| [] -> List.rev acc
| a :: b :: rest when b - 2 = a ->
filter (b::rest) (if flag then b::acc else b::a::acc) true
| _ :: t -> filter t acc false
filter l [] false
This is terribly inefficient, but here's another approach using more built-in functions:
let filterTwinsSimple l =
l
|> Seq.pairwise
|> Seq.filter (fun (a, b) -> b - 2 = a)
|> Seq.collect (fun (a, b) -> [a; b])
|> Seq.distinct
|> Seq.toList
Maybe slightly better:
let filterTwinsSimple l =
seq {
for (a, b) in Seq.pairwise l do
if b - 2 = a then
yield a
yield b
}
|> Seq.distinct
|> Seq.toList

How about this?
let filterPairs f =
let rec filter keepHead = function
| x::(y::_ as xs) when f x y -> x::(filter true xs)
| x::xs ->
let rest = filter false xs
if keepHead then x::rest else rest
| _ -> []
filter false
let test = filterPairs (fun x y -> y - x = 2) [1; 2; 4; 6; 8; 10; 15; 17]
Or if all of your list's items are unique, you could do this:
let rec filterPairs f s =
s
|> Seq.windowed 2
|> Seq.filter (fun [|a;b|] -> f a b)
|> Seq.concat
|> Seq.distinct
let test = filterPairs (fun x y -> y - x = 2) [1; 2; 4; 6; 8; 10; 15; 17]
EDIT
Or here's another alternative which I find elegant. First define a function for breaking a list into a list of groups of consecutive items satisfying a predicate:
let rec groupConsec f = function
| [] -> []
| x::(y::_ as xs) when f x y ->
let (gp::gps) = groupConsec f xs
(x::gp)::gps
| x::xs -> [x]::(groupConsec f xs)
Then, build your function by collecting all results back together, discarding any singletons:
let filterPairs f =
groupConsec f
>> List.collect (function | [_] -> [] | l -> l)
let test = filterPairs (fun x y -> y - x = 2) [1; 2; 4; 6; 8; 10; 15; 17]

The following solution is in the spirit of your own, but I use a discriminate union to encapsulate aspects of the algorithm and reign in the madness a bit:
type status =
| Keep of int
| Skip of int
| Tail
let filterTwins xl =
(Tail, [])
|> List.foldBack
(fun cur (prev, acc) ->
match prev with
| Skip(prev) when prev - cur = 2 -> (Keep(cur), cur::prev::acc)
| Keep(prev) when prev - cur = 2 -> (Keep(cur), cur::acc)
| _ -> (Skip(cur), acc))
xl
|> snd

Here's another solution which uses a similar discriminate union strategy as my other answer but it works on sequences lazily so you can watch those twin (primes?) roll in as they come:
type status =
| KeepTwo of int * int
| KeepOne of int
| SkipOne of int
| Head
let filterTwins xl =
let xl' =
Seq.scan
(fun prev cur ->
match prev with
| KeepTwo(_,prev) | KeepOne prev when cur - prev = 2 ->
KeepOne cur
| SkipOne prev when cur - prev = 2 ->
KeepTwo(prev,cur)
| _ ->
SkipOne cur)
Head
xl
seq {
for x in xl' do
match x with
| KeepTwo(a,b) -> yield a; yield b
| KeepOne b -> yield b
| _ -> ()
}

for completeness sake, I'll answer this with what I eventually came up with, based on the friendly suggestions in this thread.
The benefits of this approach are that it doesn't need Seq.distinct, which I believe is an improvement as it allows for duplicates. However, it still needs List.rev which doesn't make it the fastest. Nor is it the most succinct code (see comparison of solution in question itself).
let filterTwins l =
l
|> Seq.pairwise
|> Seq.fold (fun a (x, y) ->
if y - x = 2 then (if List.head a = x then y::a else y::x::a)
else a) [0]
|> List.rev
|> List.tail

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

Erlang binary to lower case performance - performance

Related

Implementing LLL algorithm in Haskell

F# - Algorithm and strings

Knuth-Morris-Pratt implementation in Haskell -- Index out of bounds

Why is the "better" digit-listing function slower?

Filter an array or list by consecutive pairs based on a matching rule

Categories

Resources