finishing up parser
parent
bbbd19eac7
commit
de30d605f6
@ -1,3 +1,3 @@
|
||||
(library
|
||||
(public_name hcfg)
|
||||
(libraries fmt))
|
||||
(libraries base fmt))
|
||||
|
@ -1,2 +1,3 @@
|
||||
module Token = Token
|
||||
module Lexer = Lexer
|
||||
module Lexer = Lexer
|
||||
module Parser = Parser
|
@ -0,0 +1,143 @@
|
||||
include Token
|
||||
|
||||
let rec split_at1 l n =
|
||||
if n = 0 then ([], l) else
|
||||
match l with
|
||||
| [] -> ([], [])
|
||||
| h :: t -> let (l1, l2) = split_at1 t (n-1) in (h :: l1, l2);;
|
||||
|
||||
|
||||
let safe_list xs =
|
||||
match List.length xs with
|
||||
| 0 -> (None, None)
|
||||
| 1 -> ((Some (List.hd xs)), None)
|
||||
| _ -> ((Some (List.hd xs)), Some(List.tl xs))
|
||||
|
||||
let list_skip xs n =
|
||||
match split_at1 xs n with
|
||||
| (_, tl) -> tl
|
||||
|
||||
let if_list_peek xs n v =
|
||||
match List.nth_opt xs n with
|
||||
| Some x -> x = v
|
||||
| _ -> false
|
||||
|
||||
module Parser = struct
|
||||
include Token
|
||||
open Base
|
||||
|
||||
let split_with (l:Token.token_type list) left right =
|
||||
let f (length, level) (i, tk) =
|
||||
if level < 1 && i < length then
|
||||
(i, level)
|
||||
else
|
||||
match tk with
|
||||
| a when Token.equal a left -> (length, level + 1)
|
||||
| a when Token.equal a right -> (length, level - 1)
|
||||
| _ -> (length, level)
|
||||
in
|
||||
match List.fold_left ~f:f ~init:(List.length l, 1) (List.mapi ~f:(fun i e -> (i,e)) l) with
|
||||
| (i, _) -> Some (split_at1 l i)
|
||||
|
||||
type hvalue =
|
||||
| INTEGER of int
|
||||
| FLOAT of float
|
||||
| STRING of string
|
||||
| BINARY of Bytes.t
|
||||
| NULL
|
||||
| ARRAY of hvalue List.t
|
||||
| OBJECT of (string, hvalue, String.comparator_witness) Map.t
|
||||
|
||||
let rec equal a b =
|
||||
match (a,b) with
|
||||
| (INTEGER a, INTEGER b) -> Int.equal a b
|
||||
| (FLOAT a, FLOAT b) -> Float.equal a b
|
||||
| (STRING a, STRING b) -> String.equal a b
|
||||
| (BINARY a, BINARY b) -> Bytes.equal a b
|
||||
| (NULL, NULL) -> true
|
||||
| (ARRAY a, ARRAY b) -> List.equal equal a b
|
||||
| (OBJECT a, OBJECT b) -> Map.equal equal a b
|
||||
| _ -> false
|
||||
|
||||
|
||||
let rec hvalue_to_string = function
|
||||
| INTEGER a -> "INTEGER: " ^ Int.to_string a
|
||||
| FLOAT a -> "INTEGER: " ^ Float.to_string a
|
||||
| STRING a -> "INTEGER: " ^ a
|
||||
| BINARY a -> "BINARY " ^ Bytes.fold a ~init:"" ~f:(fun acc b -> acc ^ Printf.sprintf "\\x%2x" (Char.to_int b))
|
||||
| NULL -> "NULL"
|
||||
| ARRAY a -> "ARRAY [" ^ (String.concat ~sep:", " (List.map a ~f:hvalue_to_string)) ^ "]"
|
||||
| OBJECT a -> "OBJECT {" ^ (String.concat ~sep:", " (List.map (Map.to_alist a) ~f:(fun (k,v) -> k ^ ": (" ^ (hvalue_to_string v) ^ ")"))) ^ "}"
|
||||
|
||||
let pretty_print ppf tok = Fmt.pf ppf "HValue %s" (hvalue_to_string tok)
|
||||
|
||||
let parse_primitive token =
|
||||
match token with
|
||||
| Token.INTEGER a -> INTEGER a
|
||||
| Token.FLOAT a -> FLOAT a
|
||||
| Token.STRING a -> STRING a
|
||||
| Token.BINARY a -> BINARY a
|
||||
| Token.NULL -> NULL
|
||||
| _ -> failwith "Not a primitive token"
|
||||
|
||||
let print_token_array tokens =
|
||||
List.map tokens ~f:Token.token_to_string |> String.concat ~sep:"; "
|
||||
|
||||
|
||||
let rec parse tokens =
|
||||
match List.hd tokens with
|
||||
| Some(Token.INTEGER _ | Token.FLOAT _ | Token.STRING _ | Token.BINARY _ | Token.NULL as t) -> parse_primitive t
|
||||
| Some(Token.LBRACE) -> (match List.tl tokens with
|
||||
| Some(tl) -> parse_object tl
|
||||
| _ -> failwith "Not a valid hcfg file")
|
||||
|
||||
| Some(Token.LBRACKET) -> (match List.tl tokens with
|
||||
| Some(tl) -> parse_arr tl
|
||||
| _ -> failwith "Not a valid hcfg file")
|
||||
| Some(Token.EOF) -> failwith "Empty tree"
|
||||
| _ -> failwith "Not a valid hcfg file"
|
||||
and parse_object tokens =
|
||||
let rec parse_object_i tkns acc =
|
||||
match safe_list tkns with
|
||||
| (Some hd, Some tl) -> (match hd with
|
||||
| Token.IDENT i when if_list_peek tl 0 Token.COLON -> (match List.nth tl 1 with
|
||||
| Some(Token.INTEGER _ | Token.FLOAT _ | Token.STRING _ | Token.BINARY _ | Token.NULL as t) when if_list_peek tl 2 Token.COMMA -> let (_arr, rest) = split_at1 tl 3 in parse_object_i rest (Map.set acc ~key:i ~data:(parse_primitive t))
|
||||
| Some(Token.INTEGER _ | Token.FLOAT _ | Token.STRING _ | Token.BINARY _ | Token.NULL as t) when if_list_peek tl 2 Token.RBRACE -> let (_arr, rest) = split_at1 tl 2 in parse_object_i rest (Map.set acc ~key:i ~data:(parse_primitive t))
|
||||
| Some Token.LBRACKET -> (match split_with (list_skip tl 2) Token.LBRACKET Token.RBRACKET with
|
||||
| Some (arr, rest) ->
|
||||
parse_object_i (rest) (Map.set acc ~key:i ~data:(parse_arr arr))
|
||||
| None -> failwith "uhhh arr in obj")
|
||||
| Some Token.LBRACE -> (match split_with (list_skip tl 2) Token.LBRACE Token.RBRACE with
|
||||
| Some (arr, rest) -> parse_object_i (rest) (Map.set acc ~key:i ~data:(parse_object arr))
|
||||
| None -> failwith "uhhh arr in obj")
|
||||
| _ -> "TODO: Unhandled token for object value" ^ token_to_string hd ^ "(" ^ print_token_array tl ^ ")" |> failwith
|
||||
)
|
||||
| Token.COMMA when not (if_list_peek tl 0 Token.RBRACE) -> parse_object_i tl acc
|
||||
| Token.COMMA when if_list_peek tl 0 Token.RBRACE -> acc
|
||||
| Token.RBRACE -> acc
|
||||
| _ -> "TODO: Unhandled token in object: " ^ (Token.token_to_string hd) ^ " (" ^ (print_token_array tl) ^ ")"|> failwith)
|
||||
| (Some Token.RBRACE, _) -> acc
|
||||
| _ -> (String.concat ~sep:", " (List.map (Map.to_alist acc) ~f:(fun (k,v) -> k ^ ": (" ^ (hvalue_to_string v) ^ ")"))) |> failwith
|
||||
in OBJECT (parse_object_i tokens (Map.empty (module String)))
|
||||
and parse_arr tokens =
|
||||
let rec parse_arr_i tkns acc =
|
||||
match safe_list tkns with
|
||||
| (Some hd, Some tl) -> (match hd with
|
||||
| Token.INTEGER _ | Token.FLOAT _ | Token.STRING _ | Token.BINARY _ | Token.NULL as t -> (match List.hd tl with
|
||||
| Some Token.COMMA when List.length tl >= 2 -> parse_arr_i (List.tl_exn tl) ((parse_primitive t) :: acc)
|
||||
| Some Token.RBRACKET -> ((parse_primitive t) :: acc)
|
||||
| _ -> failwith "Invalid array"
|
||||
)
|
||||
| Token.LBRACKET -> (match split_with tl Token.LBRACKET Token.RBRACKET with
|
||||
| Some (arr, rest) -> parse_arr_i (rest) (parse_arr arr :: acc)
|
||||
| None -> failwith "uhhh arr in arr")
|
||||
| Token.LBRACE -> (match split_with tl Token.LBRACE Token.RBRACE with
|
||||
| Some (arr, rest) -> parse_arr_i (rest) (parse_object arr :: acc)
|
||||
| None -> failwith "uhhh obj in arr")
|
||||
| Token.RBRACKET -> acc
|
||||
| t -> "TODO: Unhandled token in array: " ^ (Token.token_to_string t) ^ " (" ^ (print_token_array tl) ^ ")"|> failwith
|
||||
)
|
||||
| (Some Token.EOF, None) -> acc
|
||||
| _ -> "TODO: parse_arr (" ^ (print_token_array tkns) ^ ")"|> failwith
|
||||
in ARRAY (List.rev (parse_arr_i tokens []))
|
||||
end
|
@ -1,31 +1,42 @@
|
||||
module Token = struct
|
||||
open Base
|
||||
type token_type =
|
||||
| ILLEGAL
|
||||
| EOF
|
||||
| SPACE
|
||||
(* Identifiers and literals *)
|
||||
| IDENT of string
|
||||
| INTEGER of int
|
||||
| FLOAT of float
|
||||
| BOOL of bool
|
||||
| STRING of string
|
||||
| BINARY of Bytes.t
|
||||
| NULL
|
||||
(* -- Delimiters *)
|
||||
| COLON
|
||||
| COMMA
|
||||
| LBRACE
|
||||
| RBRACE
|
||||
| LBRACKET
|
||||
| RBRACKET
|
||||
|
||||
let equal (a:token_type) b = Poly.equal a b
|
||||
|
||||
let token_to_string = function
|
||||
| ILLEGAL -> "ILLEGAL"
|
||||
| EOF -> "EOF"
|
||||
| SPACE -> "SPACE"
|
||||
| IDENT a -> "IDENT " ^ a
|
||||
| INTEGER a -> "INTEGER " ^ string_of_int a
|
||||
| FLOAT a -> "FLOAT " ^ string_of_float a
|
||||
| INTEGER a -> "INTEGER " ^ Int.to_string a
|
||||
| FLOAT a -> "FLOAT " ^ Float.to_string a
|
||||
| BOOL a -> "BOOL " ^ Bool.to_string a
|
||||
| STRING a -> "STRING " ^ a
|
||||
| BINARY a -> "BINARY " ^ Bytes.fold a ~init:"" ~f:(fun acc b -> acc ^ Printf.sprintf "\\x%2x" (Char.to_int b))
|
||||
| NULL -> "NULL"
|
||||
| COMMA -> "COMMA"
|
||||
| COLON -> "COLON"
|
||||
| LBRACE -> "LBRACE"
|
||||
| RBRACE -> "RBRACE"
|
||||
| LBRACKET -> "LBRACKET"
|
||||
| RBRACKET -> "RBRACKET"
|
||||
|
||||
let pretty_print ppf tok = Fmt.pf ppf "Token %s" (token_to_string tok)
|
||||
end
|
||||
|
@ -0,0 +1,11 @@
|
||||
{
|
||||
// test
|
||||
key: "value",
|
||||
/* this is a comment */
|
||||
other_key: 100,
|
||||
floating: 123f,
|
||||
other_float: 123.3,
|
||||
bin: b"test",
|
||||
arr: [1,2],
|
||||
nested: { key: "trailing", },
|
||||
}
|
@ -1,29 +1,46 @@
|
||||
open Hcfg
|
||||
include Lexer
|
||||
include Token
|
||||
include Lexer
|
||||
include Parser
|
||||
|
||||
open Base
|
||||
|
||||
let token_testable = Alcotest.testable Token.pretty_print (=)
|
||||
let token_testable = Alcotest.testable Token.pretty_print Token.equal
|
||||
let hvalue_testable = Alcotest.testable Parser.pretty_print Parser.equal
|
||||
|
||||
let test_lexer_delimiters () =
|
||||
let read_test_file =
|
||||
(* open_in_bin works correctly on Unix and Windows *)
|
||||
let ch = Stdlib.open_in_bin "test/test.hcfg" in
|
||||
let s = Stdlib.really_input_string ch (Stdlib.in_channel_length ch) in
|
||||
Stdlib.close_in ch;
|
||||
s
|
||||
|
||||
let test_lexer() =
|
||||
Alcotest.(check (list token_testable))
|
||||
"same token types" [
|
||||
Token.LBRACE
|
||||
; Token.IDENT "key"
|
||||
; Token.COLON
|
||||
; Token.STRING "value"
|
||||
; Token.COMMA
|
||||
; Token.IDENT "other_key"
|
||||
; Token.COLON
|
||||
; Token.INTEGER 100
|
||||
; Token.IDENT "key" ; Token.COLON ; Token.STRING "value" ; Token.COMMA
|
||||
; Token.IDENT "other_key" ; Token.COLON ; Token.INTEGER 100 ; Token.COMMA
|
||||
; Token.IDENT "floating" ; Token.COLON ; Token.FLOAT 123.0 ; Token.COMMA
|
||||
; Token.IDENT "other_float" ; Token.COLON ; Token.FLOAT 123.3 ; Token.COMMA
|
||||
; Token.IDENT "bin" ; Token.COLON ; Token.BINARY (Bytes.of_string "test") ; Token.COMMA
|
||||
; Token.IDENT "arr" ; Token.COLON ; Token.LBRACKET ; Token.INTEGER 1 ; Token.COMMA ; Token.INTEGER 2; Token.RBRACKET ; Token.COMMA
|
||||
; Token.IDENT "nested" ; Token.COLON ; Token.LBRACE ; Token.IDENT "key" ; Token.COLON ; Token.STRING "trailing" ; Token.COMMA ; Token.RBRACE ; Token.COMMA
|
||||
; Token.RBRACE
|
||||
; Token.EOF
|
||||
]
|
||||
(Lexer.generate_tokens {|{key: "value", other_key: 100}|})
|
||||
(Lexer.generate_tokens read_test_file)
|
||||
|
||||
let test_ast() =
|
||||
Alcotest.(check (hvalue_testable))
|
||||
"same token types" (Parser.OBJECT (Base.Map.of_alist_exn (module String) [("a", Parser.FLOAT 1.); ("b", Parser.ARRAY [Parser.INTEGER 1; Parser.INTEGER 2]); ("c", Parser.OBJECT (Base.Map.of_alist_exn (module String) [("a", Parser.INTEGER 2)]))]))
|
||||
(Lexer.generate_tokens {|{a: 1f,b: [1,2], c: { a: 2}}|} |> Parser.parse)
|
||||
|
||||
let () =
|
||||
(* Lexer.generate_tokens {|{key: "value",}|} |> List.map Token.token_to_string |> String.concat ", " |> print_endline *)
|
||||
Alcotest.run "Lexer"
|
||||
[
|
||||
( "list-delimiters",
|
||||
[ Alcotest.test_case "first case" `Slow test_lexer_delimiters ] );
|
||||
( "lex test file",
|
||||
[ Alcotest.test_case "" `Slow test_lexer] );
|
||||
( "parse test file",
|
||||
[ Alcotest.test_case "" `Slow test_ast]);
|
||||
]
|
Loading…
Reference in New Issue