I have the following smaller tokenizer for simple arithmetic expressions. I am new to fsharp and I don't know why this function doesn't return anything when being called. Can someone please help?
let tokenizer s =
let chars1 = scan s
let rec repeat list =
match list with
| []->[]
| char::chars ->
match char with
| ')' -> RP::repeat chars
| '(' -> LP::repeat chars
| '+' -> Plus::repeat chars
| '*' -> Times::repeat chars
| '^' -> Pow::repeat chars
| _ ->
let (x,y) = makeInt (toInt char) chars
Int x::repeat chars
repeat chars1
The implementation of scan, toInt, makeInt and the union type for the expression was not presented, but might be inferred as:
let scan (s:string) = s.ToCharArray() |> Array.toList
let toInt c = int c - int '0'
let makeInt n chars = (n,chars)
type expr = RP | LP | Plus | Times | Pow | Int of int
let tokenizer s =
let chars1 = scan s
let rec repeat list =
match list with
| []->[]
| char::chars ->
match char with
| ')' -> RP::repeat chars
| '(' -> LP::repeat chars
| '+' -> Plus::repeat chars
| '*' -> Times::repeat chars
| '^' -> Pow::repeat chars
| _ ->
let (x,y) = makeInt (toInt char) chars
Int x::repeat chars
repeat chars1
in which case:
tokenizer "1+1"
gives:
val it : expr list = [Int 1; Plus; Int 1]
It's possible the issue is in the implementation of your scan function.
Related
I have a handwritten predictive parser.
Each nonterminal has a corresponding parse method.Each parser method is of type tokenlist -> tokenlist * Ast`
Inside each method, I use the convention "tokenlist_symbol" to connote the tokenlist after consuming a specific symbol. In this line: let typ tokenlist_typ = parseTyp tokenlist in match tokenlist.head with, typ is an AST and tokenlist_typ is the remainder of the tokenlist after parseTyp has consumed the typ prefix.
However, I am getting This expression has type 'a -> token_list * Ast.typ
but an expression was expected of type Ast.typ error for line, (Ast.Declaration(typ, identifier, decls_prime), tokenlist_decls_prime)
type token_list =
{head : Lexer.token; (** head token. *)
lexbuf : Lexer.token list} (** lexer buffer. *)
(** Represents a parser buffer used during parsing of various productions. *)
let default_tokenlist s = {head = Lexer.EOF; lexbuf = Lexer.tokenize s}
(* Create a default [parse_buffer] with the given string [s]. *)
let next tokenlist =
let {head = _; lexbuf = buf} = tokenlist in
{head = List.hd buf; lexbuf = List.tl buf}
(** Retrieves a new parser buffer with the next lookahead token. *)
let parseTyp tokenlist =
match tokenlist.head with
| Lexer.Int -> (next tokenlist, Ast.Int)
| Lexer.Bool -> (next tokenlist, Ast.Bool)
| Lexer.Void -> (next tokenlist, Ast.Void)
| Lexer.EOF -> (tokenlist, Ast.Epsilon)
| _-> let err_msg = "Syntax Error" in
raise (Syntax_error err_msg)
(*decls = typ “id” decls_prime | epsilon *)
let rec parseDecls tokenlist =
let (tokenlist_typ, typ, ) = parseTyp tokenlist in
match tokenlist.head with
| Lexer.ID identifier -> let (tokenlist_decls_prime, decls_prime) = next tokenlist |> parseDeclsPrime in
(tokenlist_decls_prime, Ast.Declaration(typ, identifier, decls_prime))
| Lexer.EOF -> (tokenlist, [])
| _-> let err_msg = Printf.sprintf "Syntax Error" in
raise (Syntax_error err_msg)
(* decls_prime = vdecl decls | fdecl decls *)
and parseDeclsPrime tokenlist =
match tokenlist.head with
| Lexer.Semicolon -> let tokenlist_vdecl) = next tokenlist in
let (tokenlist_decls, decls) = parseDecls tokenlist_vdecl in
(tokenlist_decls, Ast.DeclsPrime(Lexer.Semicolon, vdecl, decls))
| Lexer.LeftParens -> let (tokenlist_fdecl, fdecl) = next tokenlist |> parseFdecl in
let (tokenlist_decls, decls) = parseDecls tokenlist_fdecl in
(tokenlist_decls, Ast.DeclsPrime(Lexer.Semicolon, fdecl, decls))
| _-> let err_msg = Printf.sprintf "Syntax Error" in
raise (Syntax_error err_msg)
You have this:
let (decls_prime, tokenlist_decls_prime) =
next tokenlist |> parseDeclsPrime
Judging by the names, this looks like parseDeclsPrime returns the type Ast * tokenlist. But it seems to me the parse functions are supposed to return tokenlist * Ast.
Most likely the two names in the pair are reversed.
So I am trying to print a list of lists that would look like this:
[0;0;0;0;0];
[0;0;0;0;0];
[0;0;1;0;0];
[0;0;0;0;0];
I can use as many functions as necessary, but only one function may use a print function. Here is what I have so far:
let rec rowToString(row) =
if (row == []) then []
else string_of_int(List.hd row) :: ";" :: rowToString(List.tl row);;
let rec pp_my_image s =
print_list(rowToString(List.hd s)) :: pp_my_image(List.tl s);;
I know this is wrong, but I can't figure out a way to do it.
Here is one way to do it:
let rec rowToString r =
match r with
| [] -> ""
| h :: [] -> string_of_int h
| h :: t -> string_of_int h ^ ";" ^ (rowToString t)
let rec imageToString i =
match i with
| [] -> ""
| h :: t -> "[" ^ (rowToString h) ^ "];\n" ^ (imageToString t)
let pp_my_image s =
print_string (imageToString s)
The rowToString function will create a string with the items in each inner list. Notice that case h :: [] is separated so that a semicolon is not added after the last item.
The imageToString function will create a string for each inner list with a call to rowToString. It will surround the result of each string with brackets and add a semicolon and newline to the end.
pp_my_image will simply convert the image to a string and print the result.
I am just wondering how to shorten these code as I suspect it is too redundant
let get ename doc =
try Some (StringMap.find ename doc) with Not_found -> None;;
let get_double ename doc =
let element = get ename doc in
match element with
| None -> None
| Some (Double v) -> Some v
| _ -> raise Wrong_bson_type;;
let get_string ename doc =
let element = get ename doc in
match element with
| None -> None
| Some (String v) -> Some v
| _ -> raise Wrong_bson_type;;
let get_doc ename doc =
let element = get ename doc in
match element with
| None -> None
| Some (Document v) -> Some v
| _ -> raise Wrong_bson_type;;
So, basically, I have different types of values, and I put all those kinds of values into a map.
The code above is for getting according type of values out of the map. What I do is that for each type, I have a get. To get one type of value, I have to see a). whether it is there or not; b). whether it is that type indeed, if not, raise an exception.
But the code above seems to redundant as you can see. The only diff between each type's get is just the type itself.
How can I shorten this code?
You can do this:
let get_generic extract ename doc =
let element = get ename doc in
match element with
| None -> None
| Some v -> Some (extract v)
let get_double = get_generic (function Double v -> v | _ -> raise Wrong_bson_type)
let get_string = get_generic (function String v -> v | _ -> raise Wrong_bson_type)
let get_doc = get_generic (function Document v -> v | _ -> raise Wrong_bson_type)
EDIT:
To remove the redundant raise Wrong_bson_type (But it is ugly):
let get_generic extract ename doc = try
let element = get ename doc in
match element with
| None -> None
| Some v -> Some (extract v)
with Match_failure _ -> raise Wrong_bson_type
let get_double = get_generic (fun (Double v) -> v)
let get_string = get_generic (fun (String v) -> v)
let get_doc = get_generic (fun (Document v)-> v)
You can use GADT to do that:
If you define a type expr like this:
type _ expr =
| Document: document -> document expr
| String: string -> string expr
| Double: float -> float expr
You can write a function get like this:
let get : type v. v expr -> v = function
Document doc -> doc
| String s -> s
| Double d -> d
With GADTs:
type _ asked =
| TDouble : float asked
| TString : string asked
| TDocument : document asked
let get : type v. v asked -> string -> doc StringMap.t -> v option =
fun asked ename doc ->
try
Some (match asked, StringMap.find ename doc with
| TDouble, Double f -> f
| TString, String s -> s
| TDocument, Document d -> d)
with Not_found -> None
let get_double = get TDouble
let get_string = get TString
let get_document = get TDocument
If you can live with these extractor functions:
let extract_double = function
| Double v -> v
| _ -> raise Wrong_bson_type
let extract_string = function
| String v -> v
| _ -> raise Wrong_bson_type
let extract_doc = function
| Document v -> v
| _ -> raise Wrong_bson_type
Then you can use monadic style for the higher-order function, which allows you to keep your original definition of get:
let return x = Some x
let (>>=) mx f =
match mx with
| Some x -> f x
| None -> None
let get_with exf ename doc =
(get ename doc) >>= fun v ->
return (exf v)
let get_double = get_with extract_double
let get_string = get_with extract_string
let get_doc = get_with extract_doc
Less redundant and abstracts the side effect to generic bind and return operations.
I am learning Jason Hickey's Introduction to Objective Caml.
There is an exercise like this:
Exercise 4.3 Suppose we have a crypto-system based on the following substitution cipher, where each plain letter is encrypted according to the following table.
Plain | A B C D
--------------------
Encrypted | C A D B
For example, the string BAD would be encrypted as ACB.
Write a function check that, given a plaintext string s1 and a ciphertext string s2, returns true if, and only if, s2 is the ciphertext for s1. Your function should raise an exception if s1 is not a plaintext string. You may wish to refer to the string operations on page 8. How does your code scale as the alphabet gets larger? [emphasis added]
Basically, I wrote two functions with might-be-stupid-naive ways for this exercise.
I would like to ask for advice on my solutions first.
Then I would like to ask for hints for the scaled solution as highlighted in the exercise.
Using if else
let check_cipher_1 s1 s2 =
let len1 = String.length s1 in
let len2 = String.length s2 in
if len1 = len2 then
let rec check pos =
if pos = -1 then
true
else
let sub1 = s1.[pos] in
let sub2 = s2.[pos] in
match sub1 with
| 'A' -> (match sub2 with
|'C' -> check (pos-1)
| _ -> false)
| 'B' -> (match sub2 with
|'A' -> check (pos-1)
| _ -> false)
| 'C' -> (match sub2 with
|'D' -> check (pos-1)
| _ -> false)
| 'D' -> (match sub2 with
|'B' -> check (pos-1)
| _ -> false)
| _ -> false;
in
check (len1-1)
else
false
Using pure match everywhere
let check_cipher_2 s1 s2 =
let len1 = String.length s1 in
let len2 = String.length s2 in
match () with
| () when len1 = len2 ->
let rec check pos =
match pos with
| -1 -> true
| _ ->
let sub1 = s1.[pos] in
let sub2 = s2.[pos] in
(*http://stackoverflow.com/questions/257605/ocaml-match-expression-inside-another-one*)
match sub1 with
| 'A' -> (match sub2 with
|'C' -> check (pos-1)
| _ -> false)
| 'B' -> (match sub2 with
|'A' -> check (pos-1)
| _ -> false)
| 'C' -> (match sub2 with
|'D' -> check (pos-1)
| _ -> false)
| 'D' -> (match sub2 with
|'B' -> check (pos-1)
| _ -> false)
| _ -> false
in
check (len1-1)
| () -> false
Ok. The above two solutions are similar.
I produced these two, because in here http://www.quora.com/OCaml/What-is-the-syntax-for-nested-IF-statements-in-OCaml, some people say that if else is not prefered.
This is essentially the first time I ever wrote a not-that-simple function in my whole life. So I am really hungry for suggestions here.
For exmaple,
how can I improve these solutions?
should I prefer match over if else?
Am I designing the rec or use the rec correctly?
if that in check (len1-1) correct?
Scale it
The exercise asks How does your code scale as the alphabet gets larger?. I really don't have a clue for now. In Java, I would say I will have a map, then for each char in s1, I am looking s2 for the according char and to see whether it is the value in the map.
Any suggestions on this?
Here's a simple solution:
let tr = function
| 'A' -> 'C'
| 'B' -> 'A'
| 'C' -> 'D'
| 'D' -> 'B'
| _ -> failwith "not a plaintext"
let check ~tr s1 s2 = (String.map tr s1) = s2
check ~tr "BAD" "ACD"
you can add more letters by composing with tr. I.e.
let comp c1 c2 x = try (c1 x) with _ -> (c2 x)
let tr2 = comp tr (function | 'X' -> 'Y')
how can I improve these solutions?
You misuse indentation which makes the program much harder to read. Eliminating unnecessary tabs and move check to outer scope for readability:
let check_cipher_1 s1 s2 =
let rec check pos =
if pos = -1 then
true
else
let sub1 = s1.[pos] in
let sub2 = s2.[pos] in
match sub1 with
| 'A' -> (match sub2 with
|'C' -> check (pos-1)
| _ -> false)
| 'B' -> (match sub2 with
|'A' -> check (pos-1)
| _ -> false)
| 'C' -> (match sub2 with
|'D' -> check (pos-1)
| _ -> false)
| 'D' -> (match sub2 with
|'B' -> check (pos-1)
| _ -> false)
| _ -> false in
let len1 = String.length s1 in
let len2 = String.length s2 in
if len1 = len2 then
check (len1-1)
else false
should I prefer match over if else?
It depends on situations. If pattern matching is superficial as you demonstrate in the 2nd function (match () with | () when len1 = len2) then it brings no value compared to a simple if/else construct. If you pattern match on values, it is better than if/else and potentially shorter when you make use of advanced constructs. For example, you can shorten the function by matching on tuples:
let check_cipher_1 s1 s2 =
let rec check pos =
if pos = -1 then
true
else
match s1.[pos], s2.[pos] with
| 'A', 'C' | 'B', 'A'
| 'C', 'D' | 'D', 'B' -> check (pos-1)
| _ -> false in
let len1 = String.length s1 in
let len2 = String.length s2 in
len1 = len2 && check (len1 - 1)
Here we also use Or pattern to group patterns having the same output actions and replace an unnecessary if/else block by &&.
Am I designing the rec or use the rec correctly?
if that in check (len1-1) correct?
Your function looks nice. There's no better way than testing with a few inputs on OCaml top-level.
Scale it
The number of patterns grows linearly with the size of the alphabet. It's pretty nice IMO.
The simplest solution seems to be to just cipher the text and compare the result:
let cipher_char = function
| 'A' -> 'C'
| 'B' -> 'A'
| 'C' -> 'D'
| 'D' -> 'B'
| _ -> failwith "cipher_char"
let cipher = String.map cipher_char
let check_cipher s1 s2 = (cipher s1 = s2)
The cipher_char function scales linearly with the size of the alphabet. To make it a bit more compact and generic you could use a lookup table of some form, e.g.
(* Assume that only letters are needed *)
let cipher_mapping = "CADB"
let cipher_char c =
try cipher_mapping.[Char.code c - Char.code 'A']
with Invalid_argument _ -> failwith "cipher_char"
Why do i get errors when I write this kind of pattern matching :
type t = A of int | B of float
let f = function
| (A i | B f) -> true
| _ -> false
or
let f = function
| A i | B f -> true
| _ -> false
Error: Variable f must occur on both sides of this | pattern
let f = function
| (A i | B i) -> true
| _ -> false
or
let f = function
| A i | B i -> true
| _ -> false
Error: This pattern matches values of type ints of type float
but a pattern was expected which matches value
If you provide a single right-hand side for multiple patterns (as you do), OCaml requires that the patterns consistently bind to pattern variables.
In the first situation,
match ... with
| A i | B f -> ...
...
the patterns don't agree on the variables they bind to: the first pattern binds to i, while the second binds to f.
In the second situation,
match ... with
| A i | B i -> ...
...
the patterns don't agree on the type of values to bind to their variables: the first pattern binds a value of type int to i, while the second binds a value of type float to i.
The only way in which these two pattern can consistently bind to variables is not to bind to any variables at all:
match ... with
| A _ | B _ -> ...
...
The complete example then becomes
type t = A of int | B of float
let f = function
| A _ | B _ -> true
| _ -> false
(But note that the last arm of the pattern match is superfluous as the first two pattern already exhaustively match all values of your type t. Hence, we get:
let f = function
| A _ | B _ -> true
This of course is equivalent to writing let f _ = true.)
In Or pattern (| pattern), you lose track of which constructors you are in. Therefore, you need to bind the same set of variables to work without referring to constructors.
And OCaml is strongly-typed; a value i cannot have both type int and type float.
If type t has more than two cases, you should write:
let f = function
| A _ | B _ -> true
| _ -> false
otherwise:
let f = function
| A _ | B _ -> true
is enough since pattern matching is already exhaustive.
I agree that Or pattern is quite restrictive, but sometimes it is helpful when you have symmetric cases in your function:
type num =
| Int of int
| Float of float
let add s1 s2 =
match s1, s2 with
| Int i1, Int i2 -> Int (i1 + i2)
| Int i, Float f | Float f, Int i -> Float (float i +. f)
| Float f1, Float f2 -> Float (f1 +. f2)