Hashtable indexed on several fields - data-structures

I'm currently programming an OCaml module defining a type corresponding to a CPU register. The interface of this module is the following :
(*
* Defines a type which represents a R3000 register.
*)
type t =
| R0 (* Always 0 *)
| AT (* Assembler temporary *)
| V0 | V1 (* Subroutine return values *)
| A0 | A1 | A2 | A3 (* Subroutine arguments *)
| T0 | T1 | T2 | T3 | T4 | T5 | T6 | T7 (* Temporary registers *)
| S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 (* Register variables *)
| T8 | T9 (* Temporary registers *)
| K0 | K1 (* Reserved for kernels *)
| GP | SP | FP (* Global/Stack/Frame pointer *)
| RA (* Return address *)
(*
* Conversion from/to [|0, 31|].
*)
val of_int : int -> t
val to_int : t -> int
(*
* Conversion to string for display.
*)
val of_string : string -> t
val to_string : t -> string
However, I would like the implementation to be fast and not too repetitive. For example, I could code the of_int function like this :
let of_int = function
| 0 -> R0
| 1 -> AT
(* ... *)
But it would be awful and unmaintainable. I do not want to do this as it conflicts with my programming religion. Moreover, I would need to do this kind of dirty code not only one time, but for the four functions.
The first solution I found would be to use a preprocessor (either Camlp4 or cpp) to generate the code I want. I find this to be overkill but would use this method if you can't help me with my second idea.
After a bit of thought, I thought I could do something like this :
type regdescr = {
reg : t ;
name : string ;
index : int
}
let regs =
let htbl = Hashtbl.create 32 in
let li = [ (* regdescr defs here *) ] in
List.iter (Hashtbl.add htbl) li ;
htbl
However, in this case, I must choose what field I want to hash. Is there another solution than using three different hashtables in this case ? Maybe a data-structure I do not know about is able to hash over three fields and perform searches on the three of them.
Sorry for the long question for which the answer may be trivial :) .

Looks like a perfect fit for deriving.
(*
* Defines a type which represents a R3000 register.
*)
type t =
| R0 (* Always 0 *)
| AT (* Assembler temporary *)
| V0 | V1 (* Subroutine return values *)
| A0 | A1 | A2 | A3 (* Subroutine arguments *)
| T0 | T1 | T2 | T3 | T4 | T5 | T6 | T7 (* Temporary registers *)
| S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 (* Register variables *)
| T8 | T9 (* Temporary registers *)
| K0 | K1 (* Reserved for kernels *)
| GP | SP | FP (* Global/Stack/Frame pointer *)
| RA (* Return address *)
deriving (Enum,Show)
let of_int x = Enum.to_enum<t>(x)
let to_int x = Enum.from_enum<t>(x)
let to_string x = Show.show<t>(x)
let pr = Printf.printf
let () =
pr "%i %i %i\n" (to_int R0) (to_int RA) (to_int T8);
pr "%s %s %s\n"
(to_string (of_int 0)) (to_string (of_int 31)) (to_string (of_int 24));
pr "%s %s %s\n"
(to_string (Enum.pred<t>(A1))) (to_string A1) (to_string (Enum.succ<t>(A1)));
()
Output :
0 31 24
R0 RA T8
A0 A1 A2
Compile with :
ocamlc -pp deriving -I ~/work/contrib/deriving/0.1.1-3.11.1-orig/lib deriving.cma q.ml -o q

Just have three separate hash tables?

Instead of using a hashtable for going from one partial representation of a register to another, have you thought of forcing yourself to always manipulate only pointers to complete descriptions, so that you can access any aspect you like (index, string representation, ...) with just a pointer dereference?
You can use the representation (your type regdescr) as the register.
How often do you need to pattern-match a value of type register?
If you never do, you can even do away with the reg field completely.
module Register :
sig
type t = private { name : string ; index : int }
val r0 : t
val at : t
val equal : t -> t -> bool
val hash : t -> int
val compare : t -> t -> int
end =
struct
type t = { name : string ; index : int }
let r0 = { name = "R0" ; index = 0 }
let at = { name = "AT" ; index = 1 }
let equal r1 r2 = r1.index = r2.index
let hash r1 = Hashtbl.hash (r1.index)
let compare r1 r2 = Pervasives.compare r1.index r2.index
end
Note: you can make the whole thing more readable by using files register.ml and register.mli to define the Register module.
If you sometimes need pattern-matching, you can keep the constructor field so that it is possible to write nice pattern-matchings:
match r.reg with
R0 -> ...
| AT -> ...
But force yourself to write only functions that accept (and pass their callees) the full Register.t.
EDIT: For indexing, first write the generic function below:
let all_registers = [ r0 ; at ]
let index projection =
let htbl = Hashtbl.create 32 in
let f r =
let key = projection r in
Hashtbl.add htbl key r
in
List.iter f all_registers ;
Hashtbl.find htbl
Then pass it all the projections you need:
let of_int = index (fun r -> r.index)
let of_name = index (fun r -> r.name)

Related

Ocaml- partial derivative of a regular expression

I got this code:
type regexp =
| V (* void *)
| E (* epsilon *)
| C of char (* char *)
| U of regexp * regexp (* a + b *)
| P of regexp * regexp (* a.b *)
| S of regexp (* a* *)
;;
...
module ReS = Set.Make (struct
type t = regexp
let compare = compare
end)
(* module/type for pairs of sets of regular expressions *)
module RePS = Set.Make (struct
type t = ReS.t * ReS.t
let compare = compare
end)
(*module/type for set of chars *)
module CS = Set.Make(Char)
let ewps = ReS.exists ewp;;
let atmost_epsilons = ReS.for_all atmost_epsilon;;
let infinitys = ReS.exists infinity;;
let rigth_concat s = function
| V -> ReS.empty
| E -> s
| r -> ReS.map (fun e -> P (e,r)) s
;;
let ( *.* ) = rigth_concat;;
(* partial derivative of a regular expression *)
let rec pd a re = function
| V | E -> ReS.empty
| C b when b=a -> ReS.singleton E
| C b -> ReS.empty
| U (r, s) -> ReS.union (pd a r) (pd a s)
| P (r, s) when ewp a -> ReS.union ((pd a r) *.* s) (pd a s)
| P (r, s) -> (pd a r) *.* s
| S r as re -> (pd a r) *.* re
;;
let rec unions f s =
ReS.fold (fun re acc -> ReS.union (f re) acc ) s ReS.empty
;;
let rec pds a s = unions (pd a) s;;
let rec pdw (sr: ReS.t) = function
| [] -> sr
| a::tl -> pds a (pdw sr tl)
;;
I checked the types of return values and i think they are correct, but it returns the following error and I am not sure why.
This expression has type regexp -> ReS.t but an expression was
expected of type ReS.t
In function "pd" in line that has error
| U (r, s) -> ReS.union (pd a r) (pd a s)
I believe your problem is caused by the fact that function supplies an implicit parameter. This expression:
function None -> 0 | Some x -> x
is a function with one parameter. So in your case you have defined pd to have three parameters. It looks to me like you're expecting it to have two parameters.
You can probably change your function ... to match re with instead. Or you can remove the explicit re parameter, and use the parameter that's implicit in function.

F# function to sort Excel "variants" in Excel-Dna

When trying to sort a 1d array of variants (here by "variant" I mean all the Excel types, eg bool, double (and date), string, various errors...) with the following function :
[<ExcelFunction(Category="test", Description="sort variants.")>]
let sort_variant ([<ExcelArgument(Description= "Array to sort.")>] arr : obj[]): obj[] =
arr
|> Array.sort
I get the following error : Error FS0001 The type 'obj' does not support the 'comparison' constraint. For example, it does not support the 'System.IComparable' interface, probably meaning that there is no generic ordering function available on all obj types.
But Excel has a natural ordering function, which I'd like to emulate (at least ballpark). Eg double (and dates) < string < bool < error...
My question : What is the idiomatic way to sort an array of "variants" in F# / Excel-Dna? (I am after a function which takes an obj[] and return an obj[], nothing else, not a macro...)
My (temporary?) solution :
I created a “discriminated union” type
type XLVariant = D of double | S of string | B of bool | NIL of string
(not really sure whether NIL is necessary but it did not hurt. Also in my real life code I added a DT of DateTime instance as I need to distinguish dates from doubles).
let toXLVariant (x : obj) : XLVariant =
match x with
| :? double as d -> D d
| :? string as s -> S s
| :? bool as b -> B b
| _ -> NIL "unknown match"
let ofXLVariant (x : XLVariant) : obj =
match x with
| D d -> box d
| S s -> box s
| B b -> box b
| NIL _ -> box ExcelError.ExcelErrorRef
[<ExcelFunction(Category="test", Description="sort variants.")>]
let sort_variant ([<ExcelArgument(Description= "Array to sort.")>] arr : obj[]): obj[] =
arr
|> Array.map toXLVariant
|> Array.sort
|> Array.map ofXLVariant
(for the sake of simplicity, I missed the Errors types, but the idea is the same)
This seems a bit more explicit to me, since it just sticks to the CLR type system:
// Compare objects in the way Excel would
let xlCompare (v1 : obj) (v2 : obj) =
match (v1, v2) with
| (:? double as d1), (:? double as d2) -> d1.CompareTo(d2)
| (:? double), _ -> -1
| _, (:? double) -> 1
| (:? string as s1), (:? string as s2) -> s1.CompareTo(s2)
| (:? string), _ -> -1
| _, (:? string) -> 1
| (:? bool as b1), (:? bool as b2) -> b1.CompareTo(b2)
| (:? bool), _ -> -1
| _, (:? bool) -> 1
| _ -> 2
[<ExcelFunction(Category="test", Description="sort variants.")>]
let sort_variant ([<ExcelArgument(Description= "Array to sort.")>] arr : obj[]): obj[] =
Array.sortWith xlCompare arr

How to consume (or iterate over) an array in ATS

Suppose we declare an array as the following:
dataview myarray
( a:t#ype (* element types *)
, addr (* location *)
, int (* size *)
) =
| {l:addr}
myarray_nil(a, l, 0)
| {l:addr}{n:int}
myarray_cons(a, l, n + 1) of (a#l, myarray(a, l + sizeof(a), n))
I would like to iterate over such an array. I have tried the following way:
fun
{a:t#ype}
myarray_map
{l: addr}{n: nat}
(pf: !myarray(a, l, n) | p0: ptr(l), f:a-<cloref1>a): void = let
prval myarray_cons(pf1, pf2) = pf
val elm = ptr_get<a>(pf1 | p0)
val () = ptr_set<a>(pf1 | p0, f(elm))
val p1 = ptr_succ<a>(p0)
in
(pf:= myarray_cons(pf1, pf2); myarray_map(pf | p1, f))
end
The issue is when I hit the myarray_nil case, the prval becomes unmatched.
Since pf is a linear resource, I cannot do
case+ pf of
| myarray_nil() =>
| myarray_cons(pf1, pf2) =>
Because pf is consumed here but it must be retained according to the function definition. How can I iterate through myarray in this way and ensure that pf is matched exhaustively while not being consumed?
Thank you!
Following the advice given in the comments, I wrote the following which typechecks:
fun
{a:t#ype}
myarray_map
{l: addr}{n: nat | n > 0}
(pf: !myarray(a, l, n) | p0: ptr(l), n: int(n), f:a-<cloref1>a): void = let
prval myarray_cons(pf1, pf2) = pf
val elm = ptr_get<a>(pf1 | p0)
val () = ptr_set<a>(pf1 | p0, f(elm))
val p1 = ptr_succ<a>(p0)
in
if n = 1
then ()
else myarray_map(pf2 | p1, n - 1, f); pf := myarray_cons(pf1, pf2)
end

Insertion sort using fold_left with bool function passing as argument

I want to write simple insertion sort function using fold_left but I also want to pass function that will specify order in my sort fun.
What I don't know, is how to pass it to fold_left..
let rec insert f l e =
match l with
| [] -> [e]
| h :: t -> if f e h then h :: insert f t e else e :: l;;
let insertion_sort f l = List.fold_left insert f [] l;;
let less x y = x < y;;
let result = insertion_sort less [2 ; 5 ; 1 ; 9 ; 7 ; -2 ; 0 ; 124];;
This what I am talking about but fold_left doesn't accept that solution.
When I make specialization of sort function then it works just fine.
let insertLess = insert less;;
let insertion_sortLess l = List.fold_left insertLess [] l;;
let result = insertion_sortLess [2 ; 5 ; 1 ; 9 ; 7 ; -2 ; 0 ; 124];;
# val result : int list = [124; 9; 7; 5; 2; 1; 0; -2]
List.fold_left insert f ... will apply insert and f as separate arguments to List.fold_left. What you want is List.fold (insert f) ..., which will apply f to insert, and then the result of that to List.fold_left.
Edit: In addition, you don't need to define less. You can pass > as a function directly by surrounding it in parentheses: insertion_sort (<) ...

How to abstract non-linear list iteration schemes into a re-usable algorithm?

On the odd chance, that someone has a brilliant idea...
I am not sure if there is a good way to generalize that.
EDIT: I think it might be nice to explain exactly what the inputs and outputs are. The code below is only how I approached the solution.
Inputs: data, recipe
data: set of string, string list here also called "set of named lists"
recipe: list of commands
Command Print (literal|list reference)
Adds the literal to the output or if it is a list reference, it adds the head of the referenced list to the output.
Command While (list reference)
when referenced list not empty --> next command
when referenced list empty --> skip entries in recipe list past the matching Wend.
Command Wend (list reference)
replace referenced list with tail (reference list)
when referenced list is empty, next command
when referenced list is not empty, next command is the matching while above.
Outputs: string list
The best answer is the implementation of that which is shortest and which allows to re-use that algorithm in new contexts.
This is not just a programming problem for the fun of it, btw. It is basically what happens if you try to implement data driven text templating.
The code below is my attempt to solve this problem.
The first code snippet is a non-generalized solution.
The second code snippet is an attempt to isolate the algorithm.
If you play with the code, simply paste the second snippet below the first snippet and both versions are working.
The whole topic is about understanding better how to separate the iteration algorithm from the rest of the code and then to simply apply it, in contrast of having all the other code within.
Would it not be great, if there was a way to abstract the way the statements are being processed and the looping of the while/wend, such,
that it can be reused in my main code, just as I keep re-using other "iteration schemes", such as List.map?
The commonalities between my main code and this study are:
An evolving "environment" which is threaded through all steps of the computation.
Collections, which need to be iterated in a well-formed nested manner. (Malformed would be: while x while y wend x wend y)
A series of "execution steps" form the body of each of those "while wend" loops.
Done in a "pure" manner. As you will note, nothing is mutable in the study. Want to keep it like that.
Each "While" introduces a new scope (as for binding values), which is discarded again, once the while loop is done.
So, I am looking for something like:
run: CommandClassifier -> commandExecutor -> Command list -> EnvType -> EnvType
where
CommandClassifier could be a function of the form Command -> NORMAL|LOOP_START|LOOP_END
and commandexecutor: Command -> EnvType -> EnvType
Of course, nesting of those while-blocks would not be limited to 2 (just tried to keep the testProgram() small).
SideNote: the "commands list" is an AST from a preceding parser run, but that should not really matter.
type MiniLanguage =
| Print of string
| While of string
| Wend of string
let testProgram =
[ Print("Hello, I am your Mini Language program")
While("names")
Print("<names>")
While("pets")
Print("<pets>")
Wend("pets")
Print("Done with pets.")
Wend("names")
Print("Done with names.")
]
type MiniEnvironment = { Bindings : Map<string,string>; Collections : Map<string, string list> }
let init collections =
{ Bindings = Map.empty; Collections = Map.ofList collections}
let bind n v env =
let newBindings =
env.Bindings
|> Map.remove n
|> Map.add n v
{ env with Bindings = newBindings; }
let unbind n env =
{ env with Bindings = Map.remove n env.Bindings; }
let bindingValue n env =
if env.Bindings.ContainsKey n then
Some(env.Bindings.Item n)
else
None
let consumeFirstFromCollection n env =
if env.Collections.ContainsKey n then
let coll = env.Collections.Item n
match coll with
| [] -> env |> unbind n
| _ ->
let first = coll.Head
let newCollections =
env.Collections
|> Map.remove n
|> Map.add n coll.Tail
{ env with Collections = newCollections }
|> bind n first
else failwith ("Unknown collection: " + n)
// All do functions take env - the execution environment - as last argument.
// All do functions return (a new) env as single return parameter.
let rec doPrint (s : string) env =
if s.StartsWith("<") && s.EndsWith(">") then
match bindingValue (s.Substring (1, s.Length - 2 )) env with
| Some(v) -> v
| _ -> s
else s
|> printfn "%s"
env
let rec skipPastWend name code =
match code with
| (Wend(cl) :: rest) when cl = name -> rest
| [] -> failwith "No Wend found!"
| (_ :: rest) -> skipPastWend name rest
let rec doWhileX name code env =
match code with
| (Print(s) :: rest) -> env |> (doPrint s) |> doWhileX name rest
| (While(cn) :: rest) -> env |> doWhile cn rest |> ignore; env |> doWhileX name (skipPastWend cn rest)
| (Wend(cn) :: rest) when cn = name -> env
| [] -> failwith ("While without Wend for: " + name)
| _ -> failwith ("nested while refering to same collection!")
and doWhile name code env =
let e0 = env |> consumeFirstFromCollection name
match bindingValue name e0 with
| Some(s) ->
e0 |> doWhileX name code |> doWhile name code
| None -> env
let rec run (program : MiniLanguage list) env =
match program with
| (Print(s) :: rest) -> env |> (doPrint s) |> run rest
| (While(cn) :: rest) ->
env
|> doWhile cn rest |> ignore
env |> run (skipPastWend cn program)
| (Wend(cn) :: rest) -> failwith "wend found in run()"
| [] -> env
let test() =
init [ "names", ["name1"; "name2"; "name3"; ]; "pets", ["pet1"; "pet2"] ]
|> run testProgram
|> printfn "%A"
(*
Running test() yields:
Hello, I am your Mini Language program
name1
pet1
pet2
Done with pets.
name2
pet1
pet2
Done with pets.
name3
pet1
pet2
Done with pets.
Done with names.
{Bindings = map [];
Collections =
map [("names", ["name1"; "name2"; "name3"]); ("pets", ["pet1"; "pet2"])];}
*)
Here my first version of isolating the algorithm. The number of callbacks is not entirely pretty. Can anyone come up with something simpler?
// The only function I had to "modify" to work with new "generalized" algorithm.
let consumeFirstFromCollection1 n env =
if env.Collections.ContainsKey n then
let coll = env.Collections.Item n
match coll with
| [] -> (env |> unbind n , false)
| _ ->
let first = coll.Head
let newCollections =
env.Collections
|> Map.remove n
|> Map.add n coll.Tail
({ env with Collections = newCollections }
|> bind n first , true)
else failwith ("Unknown collection: " + n)
type NamedList<'n,'t when 'n : comparison> = 'n * List<'t>
type Action<'a,'c> = 'c -> 'a -> 'a
type LoopPreparer<'a,'c> = 'c -> 'a -> 'a * bool
type CommandType = | RUN | BEGIN | END
type CommandClassifier<'c> = 'c -> CommandType
type Skipper<'c> = 'c -> List<'c> -> List<'c>
type InterpreterContext<'a,'c> =
{ classifier : CommandClassifier<'c>
executor : Action<'a,'c>
skipper : Skipper<'c>
prepareLoop : LoopPreparer<'a,'c>
isMatchingEnd : 'c -> 'c -> bool
}
let interpret (context : InterpreterContext<'a,'c>) (program : 'c list) (env : 'a) : 'a =
let rec loop front (code : 'c list) e =
let e0,hasData = e |> context.prepareLoop front
if hasData
then
e0
|> loop1 front (code)
|> loop front (code)
else e
and loop1 front code e =
match code with
| x :: more when (context.classifier x) = RUN ->
//printfn "RUN %A" x
e |> context.executor x |> loop1 front more
| x :: more when (context.classifier x) = BEGIN ->
//printfn "BEGIN %A" x
e |> loop x more |> ignore
e |> loop1 front (context.skipper x more)
| x :: more when (((context.classifier x) = END) && (context.isMatchingEnd front x)) -> /// && (context.isMatchingEnd front x)
//printfn "END %A" x
e
| [] -> failwith "No END."
| _ -> failwith "TODO: Not sure which case this is. But it is not a legal one!"
let rec interpr code e =
match code with
| [] -> e
| (first :: rest) ->
match context.classifier first with
| RUN -> env |> context.executor first |> interpr rest
| BEGIN ->
e |> loop first rest |> ignore
e |> interpr (context.skipper first rest)
| END -> failwith "END without BEGIN."
interpr program env
let test1() =
let context : InterpreterContext<MiniEnvironment,MiniLanguage> =
{ classifier = fun c-> match c with | MiniLanguage.Print(_) -> RUN | MiniLanguage.While(_) -> BEGIN | MiniLanguage.Wend(_) -> END;
executor = fun c env -> match c with | Print(s) -> doPrint s env | _ -> failwith "Not a known command.";
skipper = fun c cl -> match c with | While(n) -> skipPastWend n cl | _ -> failwith "first arg of skipper SHALL be While!"
prepareLoop = fun c env -> match c with | While(n) -> (consumeFirstFromCollection1 n env) | _ -> failwith "first arg of skipper SHALL be While!"
isMatchingEnd = fun cwhile cx -> match cwhile,cx with | (While(n),Wend(p)) when n = p -> true | _ -> false
}
init [ "names", ["name1"; "name2"; "name3"; ]; "pets", ["pet1"; "pet2"] ]
|> interpret context testProgram
|> printfn "%A"

Resources