Big rewrite JS support

main
Carsten Kragelund 2024-03-13 16:29:38 +01:00
parent f39fede52a
commit 5a31d63c3f
16 changed files with 131 additions and 74 deletions

3
.gitignore vendored

@ -1 +1,2 @@
_build
_build/
.DS_Store

@ -1,23 +0,0 @@
# HCFG
HCFG is a configuration format meant to be usable without sacrificing familiarity, it does this by expanding on the syntax of JSON, allowing for extra features such as
* Trailing commas
* Comments
* Binary data
* Unquoted keys
## Example
```hcfg
{
// test
key: "value",
/* this is a comment */
other_key: 100,
floating: 123f,
other_float: 123.3,
bin: b"test",
arr: [1,2],
nested: { key: "not trailing" },
}
```

@ -1,3 +1,22 @@
(lang dune 3.8)
(lang dune 3.14)
(using melange 0.1)
(name hcfg)
(generate_opam_files true)
(source
(github nyxkrage/hcfg))
(authors "Carsten Kragelund")
(maintainers "Carsten Kragelund")
(license "MIT")
(package
(name hcfg)
(depends ocaml base melange dune))
; See the complete stanza docs at https://dune.readthedocs.io/en/stable/dune-files.html#dune-project

@ -1,7 +1,29 @@
opam-version: "2.0.0"
maintainer: "carsten@kragelund.me"
# This file is generated by dune, edit dune-project instead
opam-version: "2.0"
maintainer: ["Carsten Kragelund"]
authors: ["Carsten Kragelund"]
license: "MIT"
homepage: "https://github.com/nyxkrage/hcfg"
bug-reports: "https://github.com/nyxkrage/hcfg/issues"
depends: [
"ocaml"
"base"
"melange"
"dune" {>= "3.14"}
"odoc" {with-doc}
]
build: [
["dune" "build" "--only" "hcfg" "--root" "." "-j" jobs "@install"]
]
["dune" "subst"] {dev}
[
"dune"
"build"
"-p"
name
"-j"
jobs
"@install"
"@runtest" {with-test}
"@doc" {with-doc}
]
]
dev-repo: "git+https://github.com/nyxkrage/hcfg.git"

@ -0,0 +1,3 @@
(melange.emit
(target main)
(libraries hcfg))

@ -0,0 +1,10 @@
let parse str =
let rec hvalue_to_js x = match x with
| Hcfg.Parser.STRING s -> Js.Json.string s
| Hcfg.Parser.INTEGER i -> Js.Json.number (float_of_int i)
| Hcfg.Parser.FLOAT f -> Js.Json.number f
| Hcfg.Parser.BINARY _ -> failwith "Binary not supported"
| Hcfg.Parser.NULL -> Js.Json.null
| Hcfg.Parser.ARRAY a -> Js.Json.array (Array.of_list (List.map hvalue_to_js a))
| Hcfg.Parser.OBJECT o -> Js.Json.object_ (Js.Dict.fromList (List.map (fun (k, v) -> (k, hvalue_to_js v)) (Hcfg.Parser.Obj.to_list o))) in
Hcfg.Lexer.generate_tokens str |> Hcfg.Parser.parse |> hvalue_to_js

@ -0,0 +1,7 @@
(library
(public_name hcfg)
(name hcfg)
(modes melange best)
(preprocess
(pps melange.ppx))
(modules hcfg lexer parser token))

@ -0,0 +1,3 @@
module Token = Token.Token
module Lexer = Lexer.Lexer
module Parser = Parser.Parser

@ -1,11 +1,19 @@
include Token
let rec split_at1 l n =
if n = 0 then ([], l) else
match l with
| [] -> ([], [])
| h :: t -> let (l1, l2) = split_at1 t (n-1) in (h :: l1, l2);;
let safe_hd xs =
match xs with
| [] -> None
| x :: _ -> Some x
let safe_tl xs =
match xs with
| [] -> None
| _ :: xs -> Some xs
let safe_list xs =
match List.length xs with
@ -24,7 +32,6 @@ let if_list_peek xs n v =
module Parser = struct
include Token
open Base
let split_with (l:Token.token_type list) left right =
let f (length, level) (i, tk) =
@ -36,9 +43,11 @@ module Parser = struct
| a when Token.equal a right -> (length, level - 1)
| _ -> (length, level)
in
match List.fold_left ~f:f ~init:(List.length l, 1) (List.mapi ~f:(fun i e -> (i,e)) l) with
match List.fold_left f (List.length l, 1) (List.mapi (fun i e -> (i,e)) l) with
| (i, _) -> Some (split_at1 l i)
module Obj = Map.Make(String);;
type hvalue =
| INTEGER of int
| FLOAT of float
@ -46,7 +55,7 @@ module Parser = struct
| BINARY of Bytes.t
| NULL
| ARRAY of hvalue List.t
| OBJECT of (string, hvalue, String.comparator_witness) Map.t
| OBJECT of hvalue Obj.t
let rec equal a b =
match (a,b) with
@ -56,7 +65,7 @@ module Parser = struct
| (BINARY a, BINARY b) -> Bytes.equal a b
| (NULL, NULL) -> true
| (ARRAY a, ARRAY b) -> List.equal equal a b
| (OBJECT a, OBJECT b) -> Map.equal equal a b
| (OBJECT a, OBJECT b) -> Obj.equal equal a b
| _ -> false
@ -64,13 +73,11 @@ module Parser = struct
| INTEGER a -> "INTEGER: " ^ Int.to_string a
| FLOAT a -> "INTEGER: " ^ Float.to_string a
| STRING a -> "INTEGER: " ^ a
| BINARY a -> "BINARY " ^ Bytes.fold a ~init:"" ~f:(fun acc b -> acc ^ Printf.sprintf "\\x%2x" (Char.to_int b))
| BINARY a -> "BINARY " ^ Bytes.fold_left (fun acc b -> acc ^ Printf.sprintf "\\x%2x" (Char.code b)) "" a
| NULL -> "NULL"
| ARRAY a -> "ARRAY [" ^ (String.concat ~sep:", " (List.map a ~f:hvalue_to_string)) ^ "]"
| OBJECT a -> "OBJECT {" ^ (String.concat ~sep:", " (List.map (Map.to_alist a) ~f:(fun (k,v) -> k ^ ": (" ^ (hvalue_to_string v) ^ ")"))) ^ "}"
| ARRAY a -> "ARRAY [" ^ (String.concat ", " (List.map hvalue_to_string a)) ^ "]"
| OBJECT a -> "OBJECT {" ^ (String.concat ", " (List.map (fun (k,v) -> k ^ ": (" ^ (hvalue_to_string v) ^ ")") (Obj.to_list a) )) ^ "}"
let pretty_print ppf tok = Fmt.pf ppf "HValue %s" (hvalue_to_string tok)
let parse_primitive token =
match token with
| Token.INTEGER a -> INTEGER a
@ -81,17 +88,17 @@ module Parser = struct
| _ -> failwith "Not a primitive token"
let print_token_array tokens =
List.map tokens ~f:Token.token_to_string |> String.concat ~sep:"; "
List.map Token.token_to_string tokens |> String.concat "; "
let rec parse tokens =
match List.hd tokens with
match safe_hd tokens with
| Some(Token.INTEGER _ | Token.FLOAT _ | Token.STRING _ | Token.BINARY _ | Token.NULL as t) -> parse_primitive t
| Some(Token.LBRACE) -> (match List.tl tokens with
| Some(Token.LBRACE) -> (match safe_tl tokens with
| Some(tl) -> parse_object tl
| _ -> failwith "Not a valid hcfg file")
| Some(Token.LBRACKET) -> (match List.tl tokens with
| Some(Token.LBRACKET) -> (match safe_tl tokens with
| Some(tl) -> parse_arr tl
| _ -> failwith "Not a valid hcfg file")
| Some(Token.EOF) -> failwith "Empty tree"
@ -100,33 +107,38 @@ module Parser = struct
let rec parse_object_i tkns acc =
match safe_list tkns with
| (Some hd, Some tl) -> (match hd with
| Token.IDENT i when if_list_peek tl 0 Token.COLON -> (match List.nth tl 1 with
| Some(Token.INTEGER _ | Token.FLOAT _ | Token.STRING _ | Token.BINARY _ | Token.NULL as t) when if_list_peek tl 2 Token.COMMA -> let (_arr, rest) = split_at1 tl 3 in parse_object_i rest (Map.set acc ~key:i ~data:(parse_primitive t))
| Some(Token.INTEGER _ | Token.FLOAT _ | Token.STRING _ | Token.BINARY _ | Token.NULL as t) when if_list_peek tl 2 Token.RBRACE -> let (_arr, rest) = split_at1 tl 2 in parse_object_i rest (Map.set acc ~key:i ~data:(parse_primitive t))
| Token.IDENT i when if_list_peek tl 0 Token.COLON -> (match List.nth_opt tl 1 with
| Some(Token.INTEGER _ | Token.FLOAT _ | Token.STRING _ | Token.BINARY _ | Token.NULL as t) when if_list_peek tl 2 Token.COMMA -> let (_arr, rest) = split_at1 tl 3 in parse_object_i rest (Obj.add i (parse_primitive t) acc)
| Some(Token.INTEGER _ | Token.FLOAT _ | Token.STRING _ | Token.BINARY _ | Token.NULL as t) when if_list_peek tl 2 Token.RBRACE -> let (_arr, rest) = split_at1 tl 2 in parse_object_i rest (Obj.add i (parse_primitive t) acc)
| Some Token.LBRACKET -> (match split_with (list_skip tl 2) Token.LBRACKET Token.RBRACKET with
| Some (arr, rest) ->
parse_object_i (rest) (Map.set acc ~key:i ~data:(parse_arr arr))
parse_object_i (rest) (Obj.add i (parse_arr arr) acc)
| None -> failwith "uhhh arr in obj")
| Some Token.LBRACE -> (match split_with (list_skip tl 2) Token.LBRACE Token.RBRACE with
| Some (arr, rest) -> parse_object_i (rest) (Map.set acc ~key:i ~data:(parse_object arr))
| Some (arr, rest) -> parse_object_i (rest) (Obj.add i (parse_object arr) acc)
| None -> failwith "uhhh arr in obj")
| _ -> "TODO: Unhandled token for object value" ^ token_to_string hd ^ "(" ^ print_token_array tl ^ ")" |> failwith
| _ -> "TODO: Unhandled token for object value" ^ Token.token_to_string hd ^ "(" ^ print_token_array tl ^ ")" |> failwith
)
| Token.COMMA when not (if_list_peek tl 0 Token.RBRACE) -> parse_object_i tl acc
| Token.COMMA when if_list_peek tl 0 Token.RBRACE -> acc
| Token.RBRACE -> acc
| _ -> "TODO: Unhandled token in object: " ^ (Token.token_to_string hd) ^ " (" ^ (print_token_array tl) ^ ")"|> failwith)
| (Some Token.RBRACE, _) -> acc
| _ -> (String.concat ~sep:", " (List.map (Map.to_alist acc) ~f:(fun (k,v) -> k ^ ": (" ^ (hvalue_to_string v) ^ ")"))) |> failwith
in OBJECT (parse_object_i tokens (Map.empty (module String)))
| _ -> (String.concat ", " (List.map (fun (k,v) -> k ^ ": (" ^ (hvalue_to_string v) ^ ")") (Obj.to_list acc))) |> failwith
in OBJECT (parse_object_i tokens (Obj.empty))
and parse_arr tokens =
let rec parse_arr_i tkns acc =
match safe_list tkns with
| (Some hd, Some tl) -> (match hd with
| Token.INTEGER _ | Token.FLOAT _ | Token.STRING _ | Token.BINARY _ | Token.NULL as t -> (match List.hd tl with
| Some Token.COMMA when List.length tl >= 2 -> parse_arr_i (List.tl_exn tl) ((parse_primitive t) :: acc)
| Some Token.RBRACKET -> ((parse_primitive t) :: acc)
| _ -> failwith "Invalid array"
| Token.INTEGER _ | Token.FLOAT _ | Token.STRING _ | Token.BINARY _ | Token.NULL as t -> (match safe_list tl with
| (Some Token.COMMA, Some tl) -> (match safe_hd tl with
| Some Token.EOF -> failwith "Unterminated Array"
| Some Token.RBRACKET -> ((parse_primitive t) :: acc)
| _ -> parse_arr_i tl ((parse_primitive t) :: acc)
)
| (Some Token.RBRACKET, _) -> ((parse_primitive t) :: acc)
| (Some Token.EOF, _) -> failwith "Unterminated Array"
| _ -> "Invalid Array (" ^ print_token_array tl ^ ")" |> failwith
)
| Token.LBRACKET -> (match split_with tl Token.LBRACKET Token.RBRACKET with
| Some (arr, rest) -> parse_arr_i (rest) (parse_arr arr :: acc)
@ -138,6 +150,7 @@ module Parser = struct
| t -> "TODO: Unhandled token in array: " ^ (Token.token_to_string t) ^ " (" ^ (print_token_array tl) ^ ")"|> failwith
)
| (Some Token.EOF, None) -> acc
| (Some Token.RBRACKET, None) -> acc
| _ -> "TODO: parse_arr (" ^ (print_token_array tkns) ^ ")"|> failwith
in ARRAY (List.rev (parse_arr_i tokens []))
end

@ -1,5 +1,4 @@
module Token = struct
open Base
type token_type =
| ILLEGAL
| EOF
@ -19,7 +18,7 @@ module Token = struct
| LBRACKET
| RBRACKET
let equal (a:token_type) b = Poly.equal a b
let equal (a:token_type) (b:token_type) = (=) a b
let token_to_string = function
| ILLEGAL -> "ILLEGAL"
@ -29,7 +28,7 @@ module Token = struct
| FLOAT a -> "FLOAT " ^ Float.to_string a
| BOOL a -> "BOOL " ^ Bool.to_string a
| STRING a -> "STRING " ^ a
| BINARY a -> "BINARY " ^ Bytes.fold a ~init:"" ~f:(fun acc b -> acc ^ Printf.sprintf "\\x%2x" (Char.to_int b))
| BINARY a -> "BINARY " ^ Bytes.fold_left (fun acc b -> acc ^ Printf.sprintf "\\x%2x" (Char.code b)) "" a
| NULL -> "NULL"
| COMMA -> "COMMA"
| COLON -> "COLON"
@ -37,6 +36,4 @@ module Token = struct
| RBRACE -> "RBRACE"
| LBRACKET -> "LBRACKET"
| RBRACKET -> "RBRACKET"
let pretty_print ppf tok = Fmt.pf ppf "Token %s" (token_to_string tok)
end

@ -1,3 +0,0 @@
(library
(public_name hcfg)
(libraries base fmt))

@ -1,3 +0,0 @@
module Token = Token
module Lexer = Lexer
module Parser = Parser

@ -1,3 +1,4 @@
(test
(name test)
(libraries alcotest hcfg fmt))
(name test_hcfg)
(deps test.hcfg)
(libraries hcfg fmt alcotest))

@ -0,0 +1,7 @@
module Token = struct
let pretty_print ppf tok = Fmt.pf ppf "Token %s" (Hcfg.Token.token_to_string tok)
end
module Parser = struct
let pretty_print ppf tok = Fmt.pf ppf "HValue %s" (Hcfg.Parser.hvalue_to_string tok)
end

@ -1,16 +1,17 @@
open Hcfg
include Token
include Lexer
include Parser
open Base
let token_testable = Alcotest.testable Token.pretty_print Token.equal
let hvalue_testable = Alcotest.testable Parser.pretty_print Parser.equal
let token_testable = Alcotest.testable Prettyprint.Token.pretty_print Token.equal
let hvalue_testable = Alcotest.testable Prettyprint.Parser.pretty_print Parser.equal
let read_test_file =
(* open_in_bin works correctly on Unix and Windows *)
let ch = Stdlib.open_in_bin "test/test.hcfg" in
let ch = Stdlib.open_in_bin "test.hcfg" in
let s = Stdlib.really_input_string ch (Stdlib.in_channel_length ch) in
Stdlib.close_in ch;
s
@ -31,9 +32,11 @@ let test_lexer() =
]
(Lexer.generate_tokens read_test_file)
module Obj = Map.Make(String)
let test_ast() =
Alcotest.(check (hvalue_testable))
"same token types" (Parser.OBJECT (Base.Map.of_alist_exn (module String) [("a", Parser.FLOAT 1.); ("b", Parser.ARRAY [Parser.INTEGER 1; Parser.INTEGER 2]); ("c", Parser.OBJECT (Base.Map.of_alist_exn (module String) [("a", Parser.INTEGER 2)]))]))
"same token types" (Parser.OBJECT (Obj.of_list [("a", Parser.FLOAT 1.); ("b", Parser.ARRAY [Parser.INTEGER 1; Parser.INTEGER 2]); ("c", Parser.OBJECT (Obj.of_list [("a", Parser.INTEGER 2)]))]))
(Lexer.generate_tokens {|{a: 1f,b: [1,2], c: { a: 2}}|} |> Parser.parse)
let () =