Ocaml- partial derivative of a regular expression - algorithm

I got this code:
type regexp =
| V (* void *)
| E (* epsilon *)
| C of char (* char *)
| U of regexp * regexp (* a + b *)
| P of regexp * regexp (* a.b *)
| S of regexp (* a* *)
;;
...
module ReS = Set.Make (struct
type t = regexp
let compare = compare
end)
(* module/type for pairs of sets of regular expressions *)
module RePS = Set.Make (struct
type t = ReS.t * ReS.t
let compare = compare
end)
(*module/type for set of chars *)
module CS = Set.Make(Char)
let ewps = ReS.exists ewp;;
let atmost_epsilons = ReS.for_all atmost_epsilon;;
let infinitys = ReS.exists infinity;;
let rigth_concat s = function
| V -> ReS.empty
| E -> s
| r -> ReS.map (fun e -> P (e,r)) s
;;
let ( *.* ) = rigth_concat;;
(* partial derivative of a regular expression *)
let rec pd a re = function
| V | E -> ReS.empty
| C b when b=a -> ReS.singleton E
| C b -> ReS.empty
| U (r, s) -> ReS.union (pd a r) (pd a s)
| P (r, s) when ewp a -> ReS.union ((pd a r) *.* s) (pd a s)
| P (r, s) -> (pd a r) *.* s
| S r as re -> (pd a r) *.* re
;;
let rec unions f s =
ReS.fold (fun re acc -> ReS.union (f re) acc ) s ReS.empty
;;
let rec pds a s = unions (pd a) s;;
let rec pdw (sr: ReS.t) = function
| [] -> sr
| a::tl -> pds a (pdw sr tl)
;;
I checked the types of return values and i think they are correct, but it returns the following error and I am not sure why.
This expression has type regexp -> ReS.t but an expression was
expected of type ReS.t
In function "pd" in line that has error
| U (r, s) -> ReS.union (pd a r) (pd a s)

I believe your problem is caused by the fact that function supplies an implicit parameter. This expression:
function None -> 0 | Some x -> x
is a function with one parameter. So in your case you have defined pd to have three parameters. It looks to me like you're expecting it to have two parameters.
You can probably change your function ... to match re with instead. Or you can remove the explicit re parameter, and use the parameter that's implicit in function.

Related

F# function to sort Excel "variants" in Excel-Dna

When trying to sort a 1d array of variants (here by "variant" I mean all the Excel types, eg bool, double (and date), string, various errors...) with the following function :
[<ExcelFunction(Category="test", Description="sort variants.")>]
let sort_variant ([<ExcelArgument(Description= "Array to sort.")>] arr : obj[]): obj[] =
arr
|> Array.sort
I get the following error : Error FS0001 The type 'obj' does not support the 'comparison' constraint. For example, it does not support the 'System.IComparable' interface, probably meaning that there is no generic ordering function available on all obj types.
But Excel has a natural ordering function, which I'd like to emulate (at least ballpark). Eg double (and dates) < string < bool < error...
My question : What is the idiomatic way to sort an array of "variants" in F# / Excel-Dna? (I am after a function which takes an obj[] and return an obj[], nothing else, not a macro...)
My (temporary?) solution :
I created a “discriminated union” type
type XLVariant = D of double | S of string | B of bool | NIL of string
(not really sure whether NIL is necessary but it did not hurt. Also in my real life code I added a DT of DateTime instance as I need to distinguish dates from doubles).
let toXLVariant (x : obj) : XLVariant =
match x with
| :? double as d -> D d
| :? string as s -> S s
| :? bool as b -> B b
| _ -> NIL "unknown match"
let ofXLVariant (x : XLVariant) : obj =
match x with
| D d -> box d
| S s -> box s
| B b -> box b
| NIL _ -> box ExcelError.ExcelErrorRef
[<ExcelFunction(Category="test", Description="sort variants.")>]
let sort_variant ([<ExcelArgument(Description= "Array to sort.")>] arr : obj[]): obj[] =
arr
|> Array.map toXLVariant
|> Array.sort
|> Array.map ofXLVariant
(for the sake of simplicity, I missed the Errors types, but the idea is the same)
This seems a bit more explicit to me, since it just sticks to the CLR type system:
// Compare objects in the way Excel would
let xlCompare (v1 : obj) (v2 : obj) =
match (v1, v2) with
| (:? double as d1), (:? double as d2) -> d1.CompareTo(d2)
| (:? double), _ -> -1
| _, (:? double) -> 1
| (:? string as s1), (:? string as s2) -> s1.CompareTo(s2)
| (:? string), _ -> -1
| _, (:? string) -> 1
| (:? bool as b1), (:? bool as b2) -> b1.CompareTo(b2)
| (:? bool), _ -> -1
| _, (:? bool) -> 1
| _ -> 2
[<ExcelFunction(Category="test", Description="sort variants.")>]
let sort_variant ([<ExcelArgument(Description= "Array to sort.")>] arr : obj[]): obj[] =
Array.sortWith xlCompare arr

OCaml Syntax Error in let..in

I am new to Ocaml and I am trying to print out an AST tree, however I am getting a syntax error at the in of the mon_to_string function.
Note: All the functions used are defined.
let rec print exp =
let rec the_exp tree out = match tree with
[] -> out
and exp_to_string tree = match tree with
| Ast.Expression.Identifier(x) -> print_identifier x
| Ast.Expression.Literal(x) -> print_literal x
| Ast.Expression.BinaryExp(x) -> print_bin_exp x
| Ast.Expression.UnaryExp(x) -> print_unary_exp x
in "<Expression>\n" ^ exp_to_string exp ^ "</Expression>\n"
and mon_to_string tree = match tree with
| Ast.Monitor.ExpressionGuard(x) -> print_exp_guard x
| Ast.Monitor.QuantifiedGuard(x) -> print_quant_guard x
| Ast.Monitor.Conditional(x) -> print_conditional x
| Ast.Monitor.Evaluate(x) -> print_eval x
| Ast.Monitor.Choice(x) -> print_choice x
in "<Monitor>" ^ "\n" ^ mon_to_string exp ^ "</Monitor>\n"
There is no let corresponding to the last in.
The correct syntax goes like this:
let rec v1 = exp1
and v2 = exp2
and v3 = exp3
in exp4
You have:
let rec v1 = exp1
and v2 = exp2
in exp3
and v3 = exp4
in exp5

How to abstract non-linear list iteration schemes into a re-usable algorithm?

On the odd chance, that someone has a brilliant idea...
I am not sure if there is a good way to generalize that.
EDIT: I think it might be nice to explain exactly what the inputs and outputs are. The code below is only how I approached the solution.
Inputs: data, recipe
data: set of string, string list here also called "set of named lists"
recipe: list of commands
Command Print (literal|list reference)
Adds the literal to the output or if it is a list reference, it adds the head of the referenced list to the output.
Command While (list reference)
when referenced list not empty --> next command
when referenced list empty --> skip entries in recipe list past the matching Wend.
Command Wend (list reference)
replace referenced list with tail (reference list)
when referenced list is empty, next command
when referenced list is not empty, next command is the matching while above.
Outputs: string list
The best answer is the implementation of that which is shortest and which allows to re-use that algorithm in new contexts.
This is not just a programming problem for the fun of it, btw. It is basically what happens if you try to implement data driven text templating.
The code below is my attempt to solve this problem.
The first code snippet is a non-generalized solution.
The second code snippet is an attempt to isolate the algorithm.
If you play with the code, simply paste the second snippet below the first snippet and both versions are working.
The whole topic is about understanding better how to separate the iteration algorithm from the rest of the code and then to simply apply it, in contrast of having all the other code within.
Would it not be great, if there was a way to abstract the way the statements are being processed and the looping of the while/wend, such,
that it can be reused in my main code, just as I keep re-using other "iteration schemes", such as List.map?
The commonalities between my main code and this study are:
An evolving "environment" which is threaded through all steps of the computation.
Collections, which need to be iterated in a well-formed nested manner. (Malformed would be: while x while y wend x wend y)
A series of "execution steps" form the body of each of those "while wend" loops.
Done in a "pure" manner. As you will note, nothing is mutable in the study. Want to keep it like that.
Each "While" introduces a new scope (as for binding values), which is discarded again, once the while loop is done.
So, I am looking for something like:
run: CommandClassifier -> commandExecutor -> Command list -> EnvType -> EnvType
where
CommandClassifier could be a function of the form Command -> NORMAL|LOOP_START|LOOP_END
and commandexecutor: Command -> EnvType -> EnvType
Of course, nesting of those while-blocks would not be limited to 2 (just tried to keep the testProgram() small).
SideNote: the "commands list" is an AST from a preceding parser run, but that should not really matter.
type MiniLanguage =
| Print of string
| While of string
| Wend of string
let testProgram =
[ Print("Hello, I am your Mini Language program")
While("names")
Print("<names>")
While("pets")
Print("<pets>")
Wend("pets")
Print("Done with pets.")
Wend("names")
Print("Done with names.")
]
type MiniEnvironment = { Bindings : Map<string,string>; Collections : Map<string, string list> }
let init collections =
{ Bindings = Map.empty; Collections = Map.ofList collections}
let bind n v env =
let newBindings =
env.Bindings
|> Map.remove n
|> Map.add n v
{ env with Bindings = newBindings; }
let unbind n env =
{ env with Bindings = Map.remove n env.Bindings; }
let bindingValue n env =
if env.Bindings.ContainsKey n then
Some(env.Bindings.Item n)
else
None
let consumeFirstFromCollection n env =
if env.Collections.ContainsKey n then
let coll = env.Collections.Item n
match coll with
| [] -> env |> unbind n
| _ ->
let first = coll.Head
let newCollections =
env.Collections
|> Map.remove n
|> Map.add n coll.Tail
{ env with Collections = newCollections }
|> bind n first
else failwith ("Unknown collection: " + n)
// All do functions take env - the execution environment - as last argument.
// All do functions return (a new) env as single return parameter.
let rec doPrint (s : string) env =
if s.StartsWith("<") && s.EndsWith(">") then
match bindingValue (s.Substring (1, s.Length - 2 )) env with
| Some(v) -> v
| _ -> s
else s
|> printfn "%s"
env
let rec skipPastWend name code =
match code with
| (Wend(cl) :: rest) when cl = name -> rest
| [] -> failwith "No Wend found!"
| (_ :: rest) -> skipPastWend name rest
let rec doWhileX name code env =
match code with
| (Print(s) :: rest) -> env |> (doPrint s) |> doWhileX name rest
| (While(cn) :: rest) -> env |> doWhile cn rest |> ignore; env |> doWhileX name (skipPastWend cn rest)
| (Wend(cn) :: rest) when cn = name -> env
| [] -> failwith ("While without Wend for: " + name)
| _ -> failwith ("nested while refering to same collection!")
and doWhile name code env =
let e0 = env |> consumeFirstFromCollection name
match bindingValue name e0 with
| Some(s) ->
e0 |> doWhileX name code |> doWhile name code
| None -> env
let rec run (program : MiniLanguage list) env =
match program with
| (Print(s) :: rest) -> env |> (doPrint s) |> run rest
| (While(cn) :: rest) ->
env
|> doWhile cn rest |> ignore
env |> run (skipPastWend cn program)
| (Wend(cn) :: rest) -> failwith "wend found in run()"
| [] -> env
let test() =
init [ "names", ["name1"; "name2"; "name3"; ]; "pets", ["pet1"; "pet2"] ]
|> run testProgram
|> printfn "%A"
(*
Running test() yields:
Hello, I am your Mini Language program
name1
pet1
pet2
Done with pets.
name2
pet1
pet2
Done with pets.
name3
pet1
pet2
Done with pets.
Done with names.
{Bindings = map [];
Collections =
map [("names", ["name1"; "name2"; "name3"]); ("pets", ["pet1"; "pet2"])];}
*)
Here my first version of isolating the algorithm. The number of callbacks is not entirely pretty. Can anyone come up with something simpler?
// The only function I had to "modify" to work with new "generalized" algorithm.
let consumeFirstFromCollection1 n env =
if env.Collections.ContainsKey n then
let coll = env.Collections.Item n
match coll with
| [] -> (env |> unbind n , false)
| _ ->
let first = coll.Head
let newCollections =
env.Collections
|> Map.remove n
|> Map.add n coll.Tail
({ env with Collections = newCollections }
|> bind n first , true)
else failwith ("Unknown collection: " + n)
type NamedList<'n,'t when 'n : comparison> = 'n * List<'t>
type Action<'a,'c> = 'c -> 'a -> 'a
type LoopPreparer<'a,'c> = 'c -> 'a -> 'a * bool
type CommandType = | RUN | BEGIN | END
type CommandClassifier<'c> = 'c -> CommandType
type Skipper<'c> = 'c -> List<'c> -> List<'c>
type InterpreterContext<'a,'c> =
{ classifier : CommandClassifier<'c>
executor : Action<'a,'c>
skipper : Skipper<'c>
prepareLoop : LoopPreparer<'a,'c>
isMatchingEnd : 'c -> 'c -> bool
}
let interpret (context : InterpreterContext<'a,'c>) (program : 'c list) (env : 'a) : 'a =
let rec loop front (code : 'c list) e =
let e0,hasData = e |> context.prepareLoop front
if hasData
then
e0
|> loop1 front (code)
|> loop front (code)
else e
and loop1 front code e =
match code with
| x :: more when (context.classifier x) = RUN ->
//printfn "RUN %A" x
e |> context.executor x |> loop1 front more
| x :: more when (context.classifier x) = BEGIN ->
//printfn "BEGIN %A" x
e |> loop x more |> ignore
e |> loop1 front (context.skipper x more)
| x :: more when (((context.classifier x) = END) && (context.isMatchingEnd front x)) -> /// && (context.isMatchingEnd front x)
//printfn "END %A" x
e
| [] -> failwith "No END."
| _ -> failwith "TODO: Not sure which case this is. But it is not a legal one!"
let rec interpr code e =
match code with
| [] -> e
| (first :: rest) ->
match context.classifier first with
| RUN -> env |> context.executor first |> interpr rest
| BEGIN ->
e |> loop first rest |> ignore
e |> interpr (context.skipper first rest)
| END -> failwith "END without BEGIN."
interpr program env
let test1() =
let context : InterpreterContext<MiniEnvironment,MiniLanguage> =
{ classifier = fun c-> match c with | MiniLanguage.Print(_) -> RUN | MiniLanguage.While(_) -> BEGIN | MiniLanguage.Wend(_) -> END;
executor = fun c env -> match c with | Print(s) -> doPrint s env | _ -> failwith "Not a known command.";
skipper = fun c cl -> match c with | While(n) -> skipPastWend n cl | _ -> failwith "first arg of skipper SHALL be While!"
prepareLoop = fun c env -> match c with | While(n) -> (consumeFirstFromCollection1 n env) | _ -> failwith "first arg of skipper SHALL be While!"
isMatchingEnd = fun cwhile cx -> match cwhile,cx with | (While(n),Wend(p)) when n = p -> true | _ -> false
}
init [ "names", ["name1"; "name2"; "name3"; ]; "pets", ["pet1"; "pet2"] ]
|> interpret context testProgram
|> printfn "%A"

Parsing a list of tokens into an expression tree

I want to parse expressions like those in typical Haskell source. I get an input stream, which is already tokenized and annotated with fixity and precedence. The set of operators is not known at compile time and may be arbitrary. The output should be a tree representing the expression. Here's a bit of what I tried:
-- A single token of the input stream
data Token a
= Prefix a
| Infix a Int Fixity -- The Int parameter represents the precedence
| LBrace
| RBrace
deriving (Show,Eq)
data Fixity
= InfixL
| InfixR
| InfixC
deriving (Show,Eq)
data Expression a
= Atom a
| Apply a a
deriving Show
-- Wrapped into either, if expression is malformed.
exprToTree :: [Token a] -> Either String (Expression a)
exprToTree = undefined
For the sake of simpleness, I don't treat lambdas special, they are just atoms.
But now, I am completely lost. How can I convert the stream of atoms into a tree? Can somebody please point me to an algorithm or help me with finding one.
In a nutshell then, even though you have a token list you still need a parser.
Parsec can handle alternative token streams, but you'll probably have to refer to the manual - a PDF available at Daan Leijen's "legacy" home page - http://legacy.cs.uu.nl/daan/download/parsec/parsec.pdf. You can roll your own parser without using a combinator library but you will be re-implementing some fraction of Parsec. As far as I remember UU_parsing expects to work with a separate scanner so its another option.
Although it doesn't handle parsing you might find Lennart Augustsson's "Lambda calculus cooked four ways" helpful for other things - http://www.augustsson.net/Darcs/Lambda/top.pdf
Edit - here is a partly worked out plan of how you can go about it with Parsec, for details you'll have to consult section 2.11 of the manual.
Suppose you have this data type for concrete "internal" tokens:
data InternalTok = Ident String
| BinOpPlus
| BinOpMinus
| UnaryOpNegate
| IntLiteral Int
deriving (Show)
Then you end get these types for the Parsec token and parse:
type MyToken = Token InternalTok
type MyParser a = GenParser MyToken () a
Define a helper function as per the Parsec manual - this handles show and pos so individual definitions are shorter cf. the mytoken function on page 19.
mytoken :: (MyToken -> Maybe a) -> MyParser a
mytoken test = token showToken posToken testToken
where
showToken tok = show tok
posToken tok = no_pos
testToken tok = test tok
For the moment your token type does not track source position, so:
no_pos :: SourcePos
no_pos = newPos "" 0 0 0
For each terminal you have to define a token function:
identifier :: MyParser MyToken
identifier = mytoken (\tok -> case tok of
a#(Prefix (Ident _)) -> Just a
_ -> Nothing)
intLiteral :: MyParser MyToken
intLiteral = mytoken (\tok -> case tok of
a#(Prefix (IntLiteral _)) -> Just a
_ -> Nothing)
binPlus :: MyParser MyToken
binPlus = mytoken (\tok -> case tok of
a#(Infix BinOpPlus _ _) -> Just a
_ -> Nothing)
binMinus :: MyParser MyToken
binMinus = mytoken (\tok -> case tok of
a#(Infix BinOpMinus _ _) -> Just a
_ -> Nothing)
unaryNegate :: MyParser MyToken
unaryNegate = mytoken (\tok -> case tok of
a#(Prefix UnaryNegate _ _) -> Just a
_ -> Nothing)
Edit - to handle custom infix operators you'll need these token parsers:
tokInfixL :: Int -> MyParser MyToken
tokInfixL n = mytoken $ \tok -> case tok of
a#(Infix _ i InfixL) | i == n -> Just a
_ -> Nothing)
tokInfixR :: Int -> MyParser MyToken
tokInfixR n = mytoken $ \tok -> case tok of
a#(Infix _ i InfixR) | i == n -> Just a
_ -> Nothing)
tokInfixC :: Int -> MyParser MyToken
tokInfixC n = mytoken $ \tok -> case tok of
a#(Infix _ i InfixC) | i == n -> Just a
_ -> Nothing)
tokPrefix :: MyParser MyToken
tokPrefix = mytoken (\tok -> case tok of
a#(Prefix _) -> Just a
_ -> Nothing)
Now you can define the parser - you need to fix the number of levels of precedence beforehand, there is no way around that fact as you need to code a parser for each level.
The top-level expression parse is simply calls the highest precedence parser
pExpression :: Parser Expersion
pExpression = expression10
For each precendence level you need a parser roughly like this, you'll have to work out non-assoc for yourself. Also you might need to do some work on chainl / chainr - I've only written a parser in this style with UU_Parsing it might be slightly different for Parsec. Note Apply is usually at the precedence highest level.
expression10 :: Parser Expression
expression10 =
Apply <$> identifier <*> pExpression
<|> Prefix <$> tokPrefix <*> pExpression
<|> chainl (Infix <$> tokInfixL 10) expression9
<|> chainr (Infix <$> tokInfixR 10) expression9
expression9 :: Parser Expression
expression9 =
Prefix <$> tokPrefix <*> pExpression
<|> chainl (Infix <$> tokInfixL 9) expression8
<|> chainr (Infix <$> tokInfixR 9) expression8
...
You'll have to extend your syntax to handle IntLiterals and Identifiers which are at level 0 in precedence:
expression0 :: Parser Expression
expression0 =
IntLit <$> intLiteral
<|> Ident <$> identifier
<|> ...
Edit - for unlimited precedence - maybe if you only have application and Atom maybe something like this would work. Note you'll have to change the tokInfixL and tokInfixR parsers to no longer match assoc-level and you may have to experiment with the order of alternatives.
expression :: Parser Expression
expression =
Apply <$> identifier <*> expression
<|> Prefix <$> tokPrefix <*> expression
<|> chainl (Infix <$> tokInfixL) expression
<|> chainr (Infix <$> tokInfixR) expression
<|> intLiteral
<|> identifier
intLiteral :: Parser Expression
intLiteral = Atom . convert <$> intLiteral
where
convert = ??
identifier :: Parser Expression
identifier = Atom . convert <$> intLiteral
where
convert = ??
After searching the web for another topic, I found this nice piece of code to do exactly what I want. Have a look:
data Op = Op String Prec Fixity deriving Eq
data Fixity = Leftfix | Rightfix | Nonfix deriving Eq
data Exp = Var Var | OpApp Exp Op Exp deriving Eq
type Prec = Int
type Var = String
data Tok = TVar Var | TOp Op
parse :: [Tok] -> Exp
parse (TVar x : rest) = fst (parse1 (Var x) (-1) Nonfix rest)
parse1 :: Exp -> Int -> Fixity -> [Tok] -> (Exp, [Tok])
parse1 e p f [] = (e, [])
parse1 e p f inp#(TOp op#(Op _ p' f') : TVar x : rest)
| p' == p && (f /= f' || f == Nonfix)
= error "ambiguous infix expression"
| p' < p || p' == p && (f == Leftfix || f' == Nonfix)
= (e, inp)
| otherwise
= let (r,rest') = parse1 (Var x) p' f' rest in
parse1 (OpApp e op r) p f rest'
-- Printing
instance Show Exp where
showsPrec _ (Var x) = showString x
showsPrec p e#(OpApp l (Op op _ _) r) =
showParen (p > 0) $ showsPrec 9 l . showString op . showsPrec 9 r
-- Testing
plus = TOp (Op "+" 6 Leftfix)
times = TOp (Op "*" 7 Leftfix)
divide = TOp (Op "/" 7 Leftfix)
gt = TOp (Op ">" 4 Nonfix)
ex = TOp (Op "^" 8 Rightfix)
lookupop '+' = plus
lookupop '*' = times
lookupop '/' = divide
lookupop '>' = gt
lookupop '^' = ex
fromstr [x] = [TVar [x]]
fromstr (x:y:z) = TVar [x] : lookupop y : fromstr z
test1 = fromstr "a+b+c"
test2 = fromstr "a+b+c*d"
test3 = fromstr "a/b/c"
test4 = fromstr "a/b+c"
test5 = fromstr "a/b*c"
test6 = fromstr "1^2^3+4"
test7 = fromstr "a/1^2^3"
test8 = fromstr "a*b/c"
(I took it from this page: http://hackage.haskell.org/trac/haskell-prime/attachment/wiki/FixityResolution/resolve.hs)

Filter an array or list by consecutive pairs based on a matching rule

This is probably trivial, and I do have a solution but I'm not happy with it. Somehow, (much) simpler forms don't seem to work and it gets messy around the corner cases (either first, or last matching pairs in a row).
To keep it simple, let's define the matching rule as any two or more numbers that have a difference of two. Example:
> filterTwins [1; 2; 4; 6; 8; 10; 15; 17]
val it : int list = [2; 4; 6; 8; 10; 15; 17]
The code I currently use is this, which just feels sloppy and overweight:
let filterTwins list =
let func item acc =
let prevItem, resultList = acc
match prevItem, resultList with
| 0, []
-> item, []
| var, [] when var - 2 = item
-> item, item::var::resultList
| var, hd::tl when var - 2 = item && hd <> var
-> item, item::var::resultList
| var, _ when var - 2 = item
-> item, item::resultList
| _
-> item, resultList
List.foldBack func list (0, [])
|> snd
I intended my own original exercise to experiment with List.foldBack, large lists and parallel programming (which went well) but ended up messing with the "easy" part...
Guide through the answers
Daniel's last, 113 characters*, easy to follow, slow
Kvb's 2nd, 106 characters* (if I include the function), easy, but return value requires work
Stephen's 2nd, 397 characters*, long winded and comparably complex, but fastest
Abel's, 155 characters*, based on Daniel's, allows duplicates (this wasn't a necessity, btw) and is relatively fast.
There were more answers, but the above were the most distinct, I believe. Hope I didn't hurt anybody's feelings by accepting Daniel's answer as solution: each and every one solution deserves to be the selected answer(!).
* counting done with function names as one character
Would this do what you want?
let filterTwins l =
let rec filter l acc flag =
match l with
| [] -> List.rev acc
| a :: b :: rest when b - 2 = a ->
filter (b::rest) (if flag then b::acc else b::a::acc) true
| _ :: t -> filter t acc false
filter l [] false
This is terribly inefficient, but here's another approach using more built-in functions:
let filterTwinsSimple l =
l
|> Seq.pairwise
|> Seq.filter (fun (a, b) -> b - 2 = a)
|> Seq.collect (fun (a, b) -> [a; b])
|> Seq.distinct
|> Seq.toList
Maybe slightly better:
let filterTwinsSimple l =
seq {
for (a, b) in Seq.pairwise l do
if b - 2 = a then
yield a
yield b
}
|> Seq.distinct
|> Seq.toList
How about this?
let filterPairs f =
let rec filter keepHead = function
| x::(y::_ as xs) when f x y -> x::(filter true xs)
| x::xs ->
let rest = filter false xs
if keepHead then x::rest else rest
| _ -> []
filter false
let test = filterPairs (fun x y -> y - x = 2) [1; 2; 4; 6; 8; 10; 15; 17]
Or if all of your list's items are unique, you could do this:
let rec filterPairs f s =
s
|> Seq.windowed 2
|> Seq.filter (fun [|a;b|] -> f a b)
|> Seq.concat
|> Seq.distinct
let test = filterPairs (fun x y -> y - x = 2) [1; 2; 4; 6; 8; 10; 15; 17]
EDIT
Or here's another alternative which I find elegant. First define a function for breaking a list into a list of groups of consecutive items satisfying a predicate:
let rec groupConsec f = function
| [] -> []
| x::(y::_ as xs) when f x y ->
let (gp::gps) = groupConsec f xs
(x::gp)::gps
| x::xs -> [x]::(groupConsec f xs)
Then, build your function by collecting all results back together, discarding any singletons:
let filterPairs f =
groupConsec f
>> List.collect (function | [_] -> [] | l -> l)
let test = filterPairs (fun x y -> y - x = 2) [1; 2; 4; 6; 8; 10; 15; 17]
The following solution is in the spirit of your own, but I use a discriminate union to encapsulate aspects of the algorithm and reign in the madness a bit:
type status =
| Keep of int
| Skip of int
| Tail
let filterTwins xl =
(Tail, [])
|> List.foldBack
(fun cur (prev, acc) ->
match prev with
| Skip(prev) when prev - cur = 2 -> (Keep(cur), cur::prev::acc)
| Keep(prev) when prev - cur = 2 -> (Keep(cur), cur::acc)
| _ -> (Skip(cur), acc))
xl
|> snd
Here's another solution which uses a similar discriminate union strategy as my other answer but it works on sequences lazily so you can watch those twin (primes?) roll in as they come:
type status =
| KeepTwo of int * int
| KeepOne of int
| SkipOne of int
| Head
let filterTwins xl =
let xl' =
Seq.scan
(fun prev cur ->
match prev with
| KeepTwo(_,prev) | KeepOne prev when cur - prev = 2 ->
KeepOne cur
| SkipOne prev when cur - prev = 2 ->
KeepTwo(prev,cur)
| _ ->
SkipOne cur)
Head
xl
seq {
for x in xl' do
match x with
| KeepTwo(a,b) -> yield a; yield b
| KeepOne b -> yield b
| _ -> ()
}
for completeness sake, I'll answer this with what I eventually came up with, based on the friendly suggestions in this thread.
The benefits of this approach are that it doesn't need Seq.distinct, which I believe is an improvement as it allows for duplicates. However, it still needs List.rev which doesn't make it the fastest. Nor is it the most succinct code (see comparison of solution in question itself).
let filterTwins l =
l
|> Seq.pairwise
|> Seq.fold (fun a (x, y) ->
if y - x = 2 then (if List.head a = x then y::a else y::x::a)
else a) [0]
|> List.rev
|> List.tail

Resources