From 5a31d63c3fb20c605c939ff62ca9b82c9b350d10 Mon Sep 17 00:00:00 2001 From: Carsten Kragelund Date: Wed, 13 Mar 2024 16:29:38 +0100 Subject: [PATCH] Big rewrite JS support --- .gitignore | 3 +- README.md | 23 ------------ dune-project | 21 ++++++++++- hcfg.opam | 30 +++++++++++++-- jslib/dune | 3 ++ jslib/main.ml | 10 +++++ lib/dune | 7 ++++ lib/hcfg.ml | 3 ++ {src => lib}/lexer.ml | 0 {src => lib}/parser.ml | 67 ++++++++++++++++++++-------------- {src => lib}/token.ml | 7 +--- src/dune | 3 -- src/hcfg.ml | 3 -- test/dune | 5 ++- test/prettyprint.ml | 7 ++++ test/{test.ml => test_hcfg.ml} | 13 ++++--- 16 files changed, 131 insertions(+), 74 deletions(-) delete mode 100644 README.md create mode 100644 jslib/dune create mode 100644 jslib/main.ml create mode 100644 lib/dune create mode 100644 lib/hcfg.ml rename {src => lib}/lexer.ml (100%) rename {src => lib}/parser.ml (68%) rename {src => lib}/token.ml (76%) delete mode 100644 src/dune delete mode 100644 src/hcfg.ml create mode 100644 test/prettyprint.ml rename test/{test.ml => test_hcfg.ml} (76%) diff --git a/.gitignore b/.gitignore index 9c5f578..6bf8a98 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -_build \ No newline at end of file +_build/ +.DS_Store diff --git a/README.md b/README.md deleted file mode 100644 index da1adbe..0000000 --- a/README.md +++ /dev/null @@ -1,23 +0,0 @@ -# HCFG - -HCFG is a configuration format meant to be usable without sacrificing familiarity, it does this by expanding on the syntax of JSON, allowing for extra features such as -* Trailing commas -* Comments -* Binary data -* Unquoted keys - -## Example - -```hcfg -{ - // test - key: "value", - /* this is a comment */ - other_key: 100, - floating: 123f, - other_float: 123.3, - bin: b"test", - arr: [1,2], - nested: { key: "not trailing" }, -} -``` \ No newline at end of file diff --git a/dune-project b/dune-project index 3d0d0bb..a582e73 100644 --- a/dune-project +++ b/dune-project @@ -1,3 +1,22 @@ -(lang dune 3.8) +(lang dune 3.14) + +(using melange 0.1) (name hcfg) + +(generate_opam_files true) + +(source + (github nyxkrage/hcfg)) + +(authors "Carsten Kragelund") + +(maintainers "Carsten Kragelund") + +(license "MIT") + +(package + (name hcfg) + (depends ocaml base melange dune)) + +; See the complete stanza docs at https://dune.readthedocs.io/en/stable/dune-files.html#dune-project diff --git a/hcfg.opam b/hcfg.opam index bd36fcd..2151500 100644 --- a/hcfg.opam +++ b/hcfg.opam @@ -1,7 +1,29 @@ -opam-version: "2.0.0" -maintainer: "carsten@kragelund.me" +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +maintainer: ["Carsten Kragelund"] authors: ["Carsten Kragelund"] license: "MIT" +homepage: "https://github.com/nyxkrage/hcfg" +bug-reports: "https://github.com/nyxkrage/hcfg/issues" +depends: [ + "ocaml" + "base" + "melange" + "dune" {>= "3.14"} + "odoc" {with-doc} +] build: [ - ["dune" "build" "--only" "hcfg" "--root" "." "-j" jobs "@install"] -] \ No newline at end of file + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/nyxkrage/hcfg.git" diff --git a/jslib/dune b/jslib/dune new file mode 100644 index 0000000..e30e334 --- /dev/null +++ b/jslib/dune @@ -0,0 +1,3 @@ +(melange.emit + (target main) + (libraries hcfg)) diff --git a/jslib/main.ml b/jslib/main.ml new file mode 100644 index 0000000..85579bd --- /dev/null +++ b/jslib/main.ml @@ -0,0 +1,10 @@ +let parse str = + let rec hvalue_to_js x = match x with + | Hcfg.Parser.STRING s -> Js.Json.string s + | Hcfg.Parser.INTEGER i -> Js.Json.number (float_of_int i) + | Hcfg.Parser.FLOAT f -> Js.Json.number f + | Hcfg.Parser.BINARY _ -> failwith "Binary not supported" + | Hcfg.Parser.NULL -> Js.Json.null + | Hcfg.Parser.ARRAY a -> Js.Json.array (Array.of_list (List.map hvalue_to_js a)) + | Hcfg.Parser.OBJECT o -> Js.Json.object_ (Js.Dict.fromList (List.map (fun (k, v) -> (k, hvalue_to_js v)) (Hcfg.Parser.Obj.to_list o))) in + Hcfg.Lexer.generate_tokens str |> Hcfg.Parser.parse |> hvalue_to_js \ No newline at end of file diff --git a/lib/dune b/lib/dune new file mode 100644 index 0000000..254efde --- /dev/null +++ b/lib/dune @@ -0,0 +1,7 @@ +(library + (public_name hcfg) + (name hcfg) + (modes melange best) + (preprocess + (pps melange.ppx)) + (modules hcfg lexer parser token)) diff --git a/lib/hcfg.ml b/lib/hcfg.ml new file mode 100644 index 0000000..0398fcd --- /dev/null +++ b/lib/hcfg.ml @@ -0,0 +1,3 @@ +module Token = Token.Token +module Lexer = Lexer.Lexer +module Parser = Parser.Parser \ No newline at end of file diff --git a/src/lexer.ml b/lib/lexer.ml similarity index 100% rename from src/lexer.ml rename to lib/lexer.ml diff --git a/src/parser.ml b/lib/parser.ml similarity index 68% rename from src/parser.ml rename to lib/parser.ml index 22e1c36..339483e 100644 --- a/src/parser.ml +++ b/lib/parser.ml @@ -1,11 +1,19 @@ -include Token - let rec split_at1 l n = if n = 0 then ([], l) else match l with | [] -> ([], []) | h :: t -> let (l1, l2) = split_at1 t (n-1) in (h :: l1, l2);; +let safe_hd xs = + match xs with + | [] -> None + | x :: _ -> Some x + +let safe_tl xs = + match xs with + | [] -> None + | _ :: xs -> Some xs + let safe_list xs = match List.length xs with @@ -24,7 +32,6 @@ let if_list_peek xs n v = module Parser = struct include Token - open Base let split_with (l:Token.token_type list) left right = let f (length, level) (i, tk) = @@ -36,9 +43,11 @@ module Parser = struct | a when Token.equal a right -> (length, level - 1) | _ -> (length, level) in - match List.fold_left ~f:f ~init:(List.length l, 1) (List.mapi ~f:(fun i e -> (i,e)) l) with + match List.fold_left f (List.length l, 1) (List.mapi (fun i e -> (i,e)) l) with | (i, _) -> Some (split_at1 l i) + module Obj = Map.Make(String);; + type hvalue = | INTEGER of int | FLOAT of float @@ -46,7 +55,7 @@ module Parser = struct | BINARY of Bytes.t | NULL | ARRAY of hvalue List.t - | OBJECT of (string, hvalue, String.comparator_witness) Map.t + | OBJECT of hvalue Obj.t let rec equal a b = match (a,b) with @@ -56,7 +65,7 @@ module Parser = struct | (BINARY a, BINARY b) -> Bytes.equal a b | (NULL, NULL) -> true | (ARRAY a, ARRAY b) -> List.equal equal a b - | (OBJECT a, OBJECT b) -> Map.equal equal a b + | (OBJECT a, OBJECT b) -> Obj.equal equal a b | _ -> false @@ -64,13 +73,11 @@ module Parser = struct | INTEGER a -> "INTEGER: " ^ Int.to_string a | FLOAT a -> "INTEGER: " ^ Float.to_string a | STRING a -> "INTEGER: " ^ a - | BINARY a -> "BINARY " ^ Bytes.fold a ~init:"" ~f:(fun acc b -> acc ^ Printf.sprintf "\\x%2x" (Char.to_int b)) + | BINARY a -> "BINARY " ^ Bytes.fold_left (fun acc b -> acc ^ Printf.sprintf "\\x%2x" (Char.code b)) "" a | NULL -> "NULL" - | ARRAY a -> "ARRAY [" ^ (String.concat ~sep:", " (List.map a ~f:hvalue_to_string)) ^ "]" - | OBJECT a -> "OBJECT {" ^ (String.concat ~sep:", " (List.map (Map.to_alist a) ~f:(fun (k,v) -> k ^ ": (" ^ (hvalue_to_string v) ^ ")"))) ^ "}" + | ARRAY a -> "ARRAY [" ^ (String.concat ", " (List.map hvalue_to_string a)) ^ "]" + | OBJECT a -> "OBJECT {" ^ (String.concat ", " (List.map (fun (k,v) -> k ^ ": (" ^ (hvalue_to_string v) ^ ")") (Obj.to_list a) )) ^ "}" - let pretty_print ppf tok = Fmt.pf ppf "HValue %s" (hvalue_to_string tok) - let parse_primitive token = match token with | Token.INTEGER a -> INTEGER a @@ -81,17 +88,17 @@ module Parser = struct | _ -> failwith "Not a primitive token" let print_token_array tokens = - List.map tokens ~f:Token.token_to_string |> String.concat ~sep:"; " + List.map Token.token_to_string tokens |> String.concat "; " let rec parse tokens = - match List.hd tokens with + match safe_hd tokens with | Some(Token.INTEGER _ | Token.FLOAT _ | Token.STRING _ | Token.BINARY _ | Token.NULL as t) -> parse_primitive t - | Some(Token.LBRACE) -> (match List.tl tokens with + | Some(Token.LBRACE) -> (match safe_tl tokens with | Some(tl) -> parse_object tl | _ -> failwith "Not a valid hcfg file") - | Some(Token.LBRACKET) -> (match List.tl tokens with + | Some(Token.LBRACKET) -> (match safe_tl tokens with | Some(tl) -> parse_arr tl | _ -> failwith "Not a valid hcfg file") | Some(Token.EOF) -> failwith "Empty tree" @@ -100,33 +107,38 @@ module Parser = struct let rec parse_object_i tkns acc = match safe_list tkns with | (Some hd, Some tl) -> (match hd with - | Token.IDENT i when if_list_peek tl 0 Token.COLON -> (match List.nth tl 1 with - | Some(Token.INTEGER _ | Token.FLOAT _ | Token.STRING _ | Token.BINARY _ | Token.NULL as t) when if_list_peek tl 2 Token.COMMA -> let (_arr, rest) = split_at1 tl 3 in parse_object_i rest (Map.set acc ~key:i ~data:(parse_primitive t)) - | Some(Token.INTEGER _ | Token.FLOAT _ | Token.STRING _ | Token.BINARY _ | Token.NULL as t) when if_list_peek tl 2 Token.RBRACE -> let (_arr, rest) = split_at1 tl 2 in parse_object_i rest (Map.set acc ~key:i ~data:(parse_primitive t)) + | Token.IDENT i when if_list_peek tl 0 Token.COLON -> (match List.nth_opt tl 1 with + | Some(Token.INTEGER _ | Token.FLOAT _ | Token.STRING _ | Token.BINARY _ | Token.NULL as t) when if_list_peek tl 2 Token.COMMA -> let (_arr, rest) = split_at1 tl 3 in parse_object_i rest (Obj.add i (parse_primitive t) acc) + | Some(Token.INTEGER _ | Token.FLOAT _ | Token.STRING _ | Token.BINARY _ | Token.NULL as t) when if_list_peek tl 2 Token.RBRACE -> let (_arr, rest) = split_at1 tl 2 in parse_object_i rest (Obj.add i (parse_primitive t) acc) | Some Token.LBRACKET -> (match split_with (list_skip tl 2) Token.LBRACKET Token.RBRACKET with | Some (arr, rest) -> - parse_object_i (rest) (Map.set acc ~key:i ~data:(parse_arr arr)) + parse_object_i (rest) (Obj.add i (parse_arr arr) acc) | None -> failwith "uhhh arr in obj") | Some Token.LBRACE -> (match split_with (list_skip tl 2) Token.LBRACE Token.RBRACE with - | Some (arr, rest) -> parse_object_i (rest) (Map.set acc ~key:i ~data:(parse_object arr)) + | Some (arr, rest) -> parse_object_i (rest) (Obj.add i (parse_object arr) acc) | None -> failwith "uhhh arr in obj") - | _ -> "TODO: Unhandled token for object value" ^ token_to_string hd ^ "(" ^ print_token_array tl ^ ")" |> failwith + | _ -> "TODO: Unhandled token for object value" ^ Token.token_to_string hd ^ "(" ^ print_token_array tl ^ ")" |> failwith ) | Token.COMMA when not (if_list_peek tl 0 Token.RBRACE) -> parse_object_i tl acc | Token.COMMA when if_list_peek tl 0 Token.RBRACE -> acc | Token.RBRACE -> acc | _ -> "TODO: Unhandled token in object: " ^ (Token.token_to_string hd) ^ " (" ^ (print_token_array tl) ^ ")"|> failwith) | (Some Token.RBRACE, _) -> acc - | _ -> (String.concat ~sep:", " (List.map (Map.to_alist acc) ~f:(fun (k,v) -> k ^ ": (" ^ (hvalue_to_string v) ^ ")"))) |> failwith - in OBJECT (parse_object_i tokens (Map.empty (module String))) + | _ -> (String.concat ", " (List.map (fun (k,v) -> k ^ ": (" ^ (hvalue_to_string v) ^ ")") (Obj.to_list acc))) |> failwith + in OBJECT (parse_object_i tokens (Obj.empty)) and parse_arr tokens = let rec parse_arr_i tkns acc = match safe_list tkns with | (Some hd, Some tl) -> (match hd with - | Token.INTEGER _ | Token.FLOAT _ | Token.STRING _ | Token.BINARY _ | Token.NULL as t -> (match List.hd tl with - | Some Token.COMMA when List.length tl >= 2 -> parse_arr_i (List.tl_exn tl) ((parse_primitive t) :: acc) - | Some Token.RBRACKET -> ((parse_primitive t) :: acc) - | _ -> failwith "Invalid array" + | Token.INTEGER _ | Token.FLOAT _ | Token.STRING _ | Token.BINARY _ | Token.NULL as t -> (match safe_list tl with + | (Some Token.COMMA, Some tl) -> (match safe_hd tl with + | Some Token.EOF -> failwith "Unterminated Array" + | Some Token.RBRACKET -> ((parse_primitive t) :: acc) + | _ -> parse_arr_i tl ((parse_primitive t) :: acc) + ) + | (Some Token.RBRACKET, _) -> ((parse_primitive t) :: acc) + | (Some Token.EOF, _) -> failwith "Unterminated Array" + | _ -> "Invalid Array (" ^ print_token_array tl ^ ")" |> failwith ) | Token.LBRACKET -> (match split_with tl Token.LBRACKET Token.RBRACKET with | Some (arr, rest) -> parse_arr_i (rest) (parse_arr arr :: acc) @@ -138,6 +150,7 @@ module Parser = struct | t -> "TODO: Unhandled token in array: " ^ (Token.token_to_string t) ^ " (" ^ (print_token_array tl) ^ ")"|> failwith ) | (Some Token.EOF, None) -> acc + | (Some Token.RBRACKET, None) -> acc | _ -> "TODO: parse_arr (" ^ (print_token_array tkns) ^ ")"|> failwith in ARRAY (List.rev (parse_arr_i tokens [])) end \ No newline at end of file diff --git a/src/token.ml b/lib/token.ml similarity index 76% rename from src/token.ml rename to lib/token.ml index b3ca04e..7533054 100644 --- a/src/token.ml +++ b/lib/token.ml @@ -1,5 +1,4 @@ module Token = struct - open Base type token_type = | ILLEGAL | EOF @@ -19,7 +18,7 @@ module Token = struct | LBRACKET | RBRACKET - let equal (a:token_type) b = Poly.equal a b + let equal (a:token_type) (b:token_type) = (=) a b let token_to_string = function | ILLEGAL -> "ILLEGAL" @@ -29,7 +28,7 @@ module Token = struct | FLOAT a -> "FLOAT " ^ Float.to_string a | BOOL a -> "BOOL " ^ Bool.to_string a | STRING a -> "STRING " ^ a - | BINARY a -> "BINARY " ^ Bytes.fold a ~init:"" ~f:(fun acc b -> acc ^ Printf.sprintf "\\x%2x" (Char.to_int b)) + | BINARY a -> "BINARY " ^ Bytes.fold_left (fun acc b -> acc ^ Printf.sprintf "\\x%2x" (Char.code b)) "" a | NULL -> "NULL" | COMMA -> "COMMA" | COLON -> "COLON" @@ -37,6 +36,4 @@ module Token = struct | RBRACE -> "RBRACE" | LBRACKET -> "LBRACKET" | RBRACKET -> "RBRACKET" - - let pretty_print ppf tok = Fmt.pf ppf "Token %s" (token_to_string tok) end diff --git a/src/dune b/src/dune deleted file mode 100644 index d8899f7..0000000 --- a/src/dune +++ /dev/null @@ -1,3 +0,0 @@ -(library - (public_name hcfg) - (libraries base fmt)) diff --git a/src/hcfg.ml b/src/hcfg.ml deleted file mode 100644 index fb0e66b..0000000 --- a/src/hcfg.ml +++ /dev/null @@ -1,3 +0,0 @@ -module Token = Token -module Lexer = Lexer -module Parser = Parser \ No newline at end of file diff --git a/test/dune b/test/dune index f2cf0da..8cc22d2 100644 --- a/test/dune +++ b/test/dune @@ -1,3 +1,4 @@ (test - (name test) - (libraries alcotest hcfg fmt)) + (name test_hcfg) + (deps test.hcfg) + (libraries hcfg fmt alcotest)) diff --git a/test/prettyprint.ml b/test/prettyprint.ml new file mode 100644 index 0000000..1d4fc9a --- /dev/null +++ b/test/prettyprint.ml @@ -0,0 +1,7 @@ +module Token = struct + let pretty_print ppf tok = Fmt.pf ppf "Token %s" (Hcfg.Token.token_to_string tok) +end + +module Parser = struct + let pretty_print ppf tok = Fmt.pf ppf "HValue %s" (Hcfg.Parser.hvalue_to_string tok) +end \ No newline at end of file diff --git a/test/test.ml b/test/test_hcfg.ml similarity index 76% rename from test/test.ml rename to test/test_hcfg.ml index 8459e47..a015158 100644 --- a/test/test.ml +++ b/test/test_hcfg.ml @@ -1,16 +1,17 @@ + + open Hcfg include Token include Lexer include Parser -open Base -let token_testable = Alcotest.testable Token.pretty_print Token.equal -let hvalue_testable = Alcotest.testable Parser.pretty_print Parser.equal +let token_testable = Alcotest.testable Prettyprint.Token.pretty_print Token.equal +let hvalue_testable = Alcotest.testable Prettyprint.Parser.pretty_print Parser.equal let read_test_file = (* open_in_bin works correctly on Unix and Windows *) - let ch = Stdlib.open_in_bin "test/test.hcfg" in + let ch = Stdlib.open_in_bin "test.hcfg" in let s = Stdlib.really_input_string ch (Stdlib.in_channel_length ch) in Stdlib.close_in ch; s @@ -31,9 +32,11 @@ let test_lexer() = ] (Lexer.generate_tokens read_test_file) +module Obj = Map.Make(String) + let test_ast() = Alcotest.(check (hvalue_testable)) - "same token types" (Parser.OBJECT (Base.Map.of_alist_exn (module String) [("a", Parser.FLOAT 1.); ("b", Parser.ARRAY [Parser.INTEGER 1; Parser.INTEGER 2]); ("c", Parser.OBJECT (Base.Map.of_alist_exn (module String) [("a", Parser.INTEGER 2)]))])) + "same token types" (Parser.OBJECT (Obj.of_list [("a", Parser.FLOAT 1.); ("b", Parser.ARRAY [Parser.INTEGER 1; Parser.INTEGER 2]); ("c", Parser.OBJECT (Obj.of_list [("a", Parser.INTEGER 2)]))])) (Lexer.generate_tokens {|{a: 1f,b: [1,2], c: { a: 2}}|} |> Parser.parse) let () =