From b4c7634ac679dbc23888de865eeb1269f7be5880 Mon Sep 17 00:00:00 2001 From: Marc Nickert Date: Sun, 22 Sep 2024 21:50:59 +0200 Subject: [PATCH 1/3] WIP add json-schema grammar test from llama.cpp This commit adds tests for all grammar from https://github.com/ggerganov/llama.cpp/blob/master/tests/test-json-schema-to-grammar.cpp Many fail because the regex operator to repeat {from,to} many times seems not implemented. And some other problems. --- test/ebnf_more_test.exs | 783 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 783 insertions(+) create mode 100644 test/ebnf_more_test.exs diff --git a/test/ebnf_more_test.exs b/test/ebnf_more_test.exs new file mode 100644 index 0000000..7552a4d --- /dev/null +++ b/test/ebnf_more_test.exs @@ -0,0 +1,783 @@ +# +# MIT License +# +# Copyright (c) 2023-2024 The ggml authors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# +# https://github.com/ggerganov/llama.cpp/blob/master/tests/test-json-schema-to-grammar.cpp +# +# Code used from Regex Copy paste from llama.cpp +# +defmodule EBNFMoreTest do + use ExUnit.Case + + alias EBNF.ParseState + +# Try all to parse all grammars from +# https://github.com/ggerganov/llama.cpp/blob/master/tests/test-json-schema-to-grammar.cpp + describe "parse/1" do + + test "min 0" do + + grammar = """ +root ::= ([0] | [1-9] [0-9]{0,15}) space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "min 1" do + + grammar = """ +root ::= ([1-9] [0-9]{0,15}) space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "min 3" do + + grammar = """ +root ::= ([1-2] [0-9]{1,15} | [3-9] [0-9]{0,15}) space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "min 9" do + + grammar = """ +root ::= ([1-8] [0-9]{1,15} | [9] [0-9]{0,15}) space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "min 10" do + + grammar = """ +root ::= ([1] ([0-9]{1,15}) | [2-9] [0-9]{1,15}) space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "min 25" do + + grammar = """ +root ::= ([1] [0-9]{2,15} | [2] ([0-4] [0-9]{1,14} | [5-9] [0-9]{0,14}) | [3-9] [0-9]{1,15}) space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "max 30" do + + grammar = """ +root ::= ("-" [1-9] [0-9]{0,15} | [0-9] | ([1-2] [0-9] | [3] "0")) space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "min -5" do + + grammar = """ +root ::= ("-" ([0-5]) | [0] | [1-9] [0-9]{0,15}) space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "min -123" do + + grammar = """ +root ::= ("-" ([0-9] | ([1-8] [0-9] | [9] [0-9]) | "1" ([0-1] [0-9] | [2] [0-3])) | [0] | [1-9] [0-9]{0,15}) space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "max -5" do + + grammar = """ +root ::= ("-" ([0-4] [0-9]{1,15} | [5-9] [0-9]{0,15})) space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "max 1" do + + grammar = """ +root ::= ("-" [1-9] [0-9]{0,15} | [0-1]) space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "max 100" do + + grammar = """ +root ::= ("-" [1-9] [0-9]{0,15} | [0-9] | ([1-8] [0-9] | [9] [0-9]) | "100") space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "min 0 max 23" do + + grammar = """ +root ::= ([0-9] | ([1] [0-9] | [2] [0-3])) space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "min 15 max 300" do + + grammar = """ +root ::= (([1] ([5-9]) | [2-9] [0-9]) | ([1-2] [0-9]{2} | [3] "00")) space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "min 5 max 30" do + + grammar = """ +root ::= ([5-9] | ([1-2] [0-9] | [3] "0")) space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "min -123 max 42" do + + grammar = """ +root ::= ("-" ([0-9] | ([1-8] [0-9] | [9] [0-9]) | "1" ([0-1] [0-9] | [2] [0-3])) | [0-9] | ([1-3] [0-9] | [4] [0-2])) space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "min -10 max 10" do + + grammar = """ +root ::= ("-" ([0-9] | "10") | [0-9] | "10") space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + + test "empty schema (object)" do + + grammar = """ +array ::= "[" space ( value ("," space value)* )? "]" space +boolean ::= ("true" | "false") space +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +decimal-part ::= [0-9]{1,16} +integral-part ::= [0] | [1-9] [0-9]{0,15} +null ::= "null" space +number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space +object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space +root ::= object +space ::= | " " | "\n" [ \t]{0,20} +string ::= "\"" char* "\"" space +value ::= object | array | string | number | boolean | null +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "exotic formats" do + + grammar = """ +date ::= [0-9]{4} "-" ( "0" [1-9] | "1" [0-2] ) "-" ( "0" [1-9] | [1-2] [0-9] | "3" [0-1] ) +date-string ::= "\"" date "\"" space +date-time ::= date "T" time +date-time-string ::= "\"" date-time "\"" space +root ::= "[" space tuple-0 "," space uuid "," space tuple-2 "," space tuple-3 "]" space +space ::= | " " | "\n" [ \t]{0,20} +time ::= ([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9]{3} )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] ) +time-string ::= "\"" time "\"" space +tuple-0 ::= date-string +tuple-2 ::= time-string +tuple-3 ::= date-time-string +uuid ::= "\"" [0-9a-fA-F]{8} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{12} "\"" space +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "string" do + + grammar = """ +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +root ::= "\"" char* "\"" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "string w/ min length 1" do + + grammar = """ +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +root ::= "\"" char+ "\"" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "string w/ min length 3" do + + grammar = """ +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +root ::= "\"" char{3,} "\"" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "string w/ max length" do + + grammar = """ +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +root ::= "\"" char{0,3} "\"" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "string w/ min & max length" do + + grammar = """ +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +root ::= "\"" char{1,4} "\"" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "boolean" do + + grammar = """ +root ::= ("true" | "false") space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "integer" do + + grammar = """ +integral-part ::= [0] | [1-9] [0-9]{0,15} +root ::= ("-"? integral-part) space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "string const" do + + grammar = """ +root ::= "\"foo\"" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "non-string const" do + + grammar = """ +root ::= "123" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "non-string enum" do + + grammar = """ +root ::= ("\"red\"" | "\"amber\"" | "\"green\"" | "null" | "42" | "[\"foo\"]") space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "string array" do + + grammar = """ +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +root ::= "[" space (string ("," space string)*)? "]" space +space ::= | " " | "\n" [ \t]{0,20} +string ::= "\"" char* "\"" space +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "nullable string array" do + + grammar = """ +alternative-0 ::= "[" space (string ("," space string)*)? "]" space +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +null ::= "null" space +root ::= alternative-0 | null +space ::= | " " | "\n" [ \t]{0,20} +string ::= "\"" char* "\"" space +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "tuple1" do + + grammar = """ +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +root ::= "[" space string "]" space +space ::= | " " | "\n" [ \t]{0,20} +string ::= "\"" char* "\"" space +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "tuple2" do + + grammar = """ +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +decimal-part ::= [0-9]{1,16} +integral-part ::= [0] | [1-9] [0-9]{0,15} +number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space +root ::= "[" space string "," space number "]" space +space ::= | " " | "\n" [ \t]{0,20} +string ::= "\"" char* "\"" space +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "number" do + + grammar = """ +decimal-part ::= [0-9]{1,16} +integral-part ::= [0] | [1-9] [0-9]{0,15} +root ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "minItems" do + + grammar = """ +boolean ::= ("true" | "false") space +root ::= "[" space boolean ("," space boolean)+ "]" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "maxItems 1" do + + grammar = """ +boolean ::= ("true" | "false") space +root ::= "[" space boolean? "]" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "maxItems 2" do + + grammar = """ +boolean ::= ("true" | "false") space +root ::= "[" space (boolean ("," space boolean)?)? "]" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "min + maxItems" do + + grammar = """ +decimal-part ::= [0-9]{1,16} +integer ::= ("-"? integral-part) space +integral-part ::= [0] | [1-9] [0-9]{0,15} +item ::= number | integer +number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space +root ::= "[" space item ("," space item){2,4} "]" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "min + max items with min + max values across zero" do + + grammar = """ +item ::= ("-" ([0-9] | "1" [0-2]) | [0-9] | ([1-8] [0-9] | [9] [0-9]) | ([1] [0-9]{2} | [2] "0" [0-7])) space +root ::= "[" space item ("," space item){2,4} "]" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "min + max items with min + max values" do + + grammar = """ +item ::= (([1] ([2-9]) | [2-9] [0-9]) | ([1] [0-9]{2} | [2] "0" [0-7])) space +root ::= "[" space item ("," space item){2,4} "]" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "simple regexp" do + + grammar = """ +root ::= "\"" "ab" "c"? "d"* "ef" "g"+ ("hij")? "kl" "\"" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "regexp escapes" do + + grammar = """ +root ::= "\"" "[]{}()|+*?" "\"" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "regexp quote" do + + grammar = """ +root ::= "\"" "\"" "\"" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "regexp" do + + grammar = """ +dot ::= [^\x0A\x0D] +root ::= "\"" ("(" root-1{1,3} ")")? root-1{3,3} "-" root-1{4,4} " " "a"{3,5} "nd" dot dot dot "\"" space +root-1 ::= [0-9] +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "required props in original order" do + + grammar = """ +a-kv ::= "\"a\"" space ":" space string +b-kv ::= "\"b\"" space ":" space string +c-kv ::= "\"c\"" space ":" space string +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +root ::= "{" space b-kv "," space c-kv "," space a-kv "}" space +space ::= | " " | "\n" [ \t]{0,20} +string ::= "\"" char* "\"" space +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "1 optional prop" do + + grammar = """ +a-kv ::= "\"a\"" space ":" space string +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +root ::= "{" space (a-kv )? "}" space +space ::= | " " | "\n" [ \t]{0,20} +string ::= "\"" char* "\"" space +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "N optional props" do + + grammar = """ +a-kv ::= "\"a\"" space ":" space string +a-rest ::= ( "," space b-kv )? b-rest +b-kv ::= "\"b\"" space ":" space string +b-rest ::= ( "," space c-kv )? +c-kv ::= "\"c\"" space ":" space string +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +root ::= "{" space (a-kv a-rest | b-kv b-rest | c-kv )? "}" space +space ::= | " " | "\n" [ \t]{0,20} +string ::= "\"" char* "\"" space +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "required + optional props each in original order" do + + grammar = """ +a-kv ::= "\"a\"" space ":" space string +b-kv ::= "\"b\"" space ":" space string +c-kv ::= "\"c\"" space ":" space string +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +d-kv ::= "\"d\"" space ":" space string +d-rest ::= ( "," space c-kv )? +root ::= "{" space b-kv "," space a-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space +space ::= | " " | "\n" [ \t]{0,20} +string ::= "\"" char* "\"" space +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "additional props" do + + grammar = """ +additional-kv ::= string ":" space additional-value +additional-value ::= "[" space (number ("," space number)*)? "]" space +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +decimal-part ::= [0-9]{1,16} +integral-part ::= [0] | [1-9] [0-9]{0,15} +number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space +root ::= "{" space (additional-kv ( "," space additional-kv )* )? "}" space +space ::= | " " | "\n" [ \t]{0,20} +string ::= "\"" char* "\"" space +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "additional props (true)" do + + grammar = """ +array ::= "[" space ( value ("," space value)* )? "]" space +boolean ::= ("true" | "false") space +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +decimal-part ::= [0-9]{1,16} +integral-part ::= [0] | [1-9] [0-9]{0,15} +null ::= "null" space +number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space +object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space +root ::= object +space ::= | " " | "\n" [ \t]{0,20} +string ::= "\"" char* "\"" space +value ::= object | array | string | number | boolean | null +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "additional props (implicit)" do + + grammar = """ +array ::= "[" space ( value ("," space value)* )? "]" space +boolean ::= ("true" | "false") space +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +decimal-part ::= [0-9]{1,16} +integral-part ::= [0] | [1-9] [0-9]{0,15} +null ::= "null" space +number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space +object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space +root ::= object +space ::= | " " | "\n" [ \t]{0,20} +string ::= "\"" char* "\"" space +value ::= object | array | string | number | boolean | null +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "empty w/o additional props" do + + grammar = """ +root ::= "{" space "}" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "required + additional props" do + + grammar = """ +a-kv ::= "\"a\"" space ":" space number +additional-k ::= ["] ( [a] char+ | [^"a] char* )? ["] space +additional-kv ::= additional-k ":" space string +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +decimal-part ::= [0-9]{1,16} +integral-part ::= [0] | [1-9] [0-9]{0,15} +number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space +root ::= "{" space a-kv ( "," space ( additional-kv ( "," space additional-kv )* ) )? "}" space +space ::= | " " | "\n" [ \t]{0,20} +string ::= "\"" char* "\"" space +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "optional + additional props" do + + grammar = """ +a-kv ::= "\"a\"" space ":" space number +a-rest ::= ( "," space additional-kv )* +additional-k ::= ["] ( [a] char+ | [^"a] char* )? ["] space +additional-kv ::= additional-k ":" space number +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +decimal-part ::= [0-9]{1,16} +integral-part ::= [0] | [1-9] [0-9]{0,15} +number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space +root ::= "{" space (a-kv a-rest | additional-kv ( "," space additional-kv )* )? "}" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "required + optional + additional props" do + + grammar = """ +additional-k ::= ["] ( [a] ([l] ([s] ([o] char+ | [^"o] char*) | [^"s] char*) | [n] ([d] char+ | [^"d] char*) | [^"ln] char*) | [^"a] char* )? ["] space +additional-kv ::= additional-k ":" space number +also-kv ::= "\"also\"" space ":" space number +also-rest ::= ( "," space additional-kv )* +and-kv ::= "\"and\"" space ":" space number +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +decimal-part ::= [0-9]{1,16} +integral-part ::= [0] | [1-9] [0-9]{0,15} +number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space +root ::= "{" space and-kv ( "," space ( also-kv also-rest | additional-kv ( "," space additional-kv )* ) )? "}" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "optional props with empty name" do + + grammar = """ +-kv ::= "\"\"" space ":" space root +-rest ::= ( "," space a-kv )? a-rest +a-kv ::= "\"a\"" space ":" space integer +a-rest ::= ( "," space additional-kv )* +additional-k ::= ["] ( [a] char+ | [^"a] char* ) ["] space +additional-kv ::= additional-k ":" space integer +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +integer ::= ("-"? integral-part) space +integral-part ::= [0] | [1-9] [0-9]{0,15} +root ::= ("-"? integral-part) space +root0 ::= "{" space (-kv -rest | a-kv a-rest | additional-kv ( "," space additional-kv )* )? "}" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "optional props with nested names" do + + grammar = """ +a-kv ::= "\"a\"" space ":" space integer +a-rest ::= ( "," space aa-kv )? aa-rest +aa-kv ::= "\"aa\"" space ":" space integer +aa-rest ::= ( "," space additional-kv )* +additional-k ::= ["] ( [a] ([a] char+ | [^"a] char*) | [^"a] char* )? ["] space +additional-kv ::= additional-k ":" space integer +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +integer ::= ("-"? integral-part) space +integral-part ::= [0] | [1-9] [0-9]{0,15} +root ::= "{" space (a-kv a-rest | aa-kv aa-rest | additional-kv ( "," space additional-kv )* )? "}" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "optional props with common prefix" do + + grammar = """ +ab-kv ::= "\"ab\"" space ":" space integer +ab-rest ::= ( "," space ac-kv )? ac-rest +ac-kv ::= "\"ac\"" space ":" space integer +ac-rest ::= ( "," space additional-kv )* +additional-k ::= ["] ( [a] ([b] char+ | [c] char+ | [^"bc] char*) | [^"a] char* )? ["] space +additional-kv ::= additional-k ":" space integer +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +integer ::= ("-"? integral-part) space +integral-part ::= [0] | [1-9] [0-9]{0,15} +root ::= "{" space (ab-kv ab-rest | ac-kv ac-rest | additional-kv ( "," space additional-kv )* )? "}" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "top-level $ref" do + + grammar = """ +char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) +foo ::= "{" space foo-a-kv "}" space +foo-a-kv ::= "\"a\"" space ":" space string +root ::= foo +space ::= | " " | "\n" [ \t]{0,20} +string ::= "\"" char* "\"" space +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "anyOf" do + + grammar = """ +alternative-0 ::= foo +alternative-1 ::= bar +bar ::= "{" space (bar-b-kv )? "}" space +bar-b-kv ::= "\"b\"" space ":" space number +decimal-part ::= [0-9]{1,16} +foo ::= "{" space (foo-a-kv )? "}" space +foo-a-kv ::= "\"a\"" space ":" space number +integral-part ::= [0] | [1-9] [0-9]{0,15} +number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space +root ::= alternative-0 | alternative-1 +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "mix of allOf, anyOf and $ref (similar to https://json.schemastore.org/tsconfig.json)" do + + grammar = """ +a-kv ::= "\"a\"" space ":" space number +b-kv ::= "\"b\"" space ":" space number +c-kv ::= "\"c\"" space ":" space number +d-kv ::= "\"d\"" space ":" space number +d-rest ::= ( "," space c-kv )? +decimal-part ::= [0-9]{1,16} +integral-part ::= [0] | [1-9] [0-9]{0,15} +number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space +root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + test "conflicting names" do + + grammar = """ +decimal-part ::= [0-9]{1,16} +integral-part ::= [0] | [1-9] [0-9]{0,15} +number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space +number- ::= "{" space number-number-kv "}" space +number-kv ::= "\"number\"" space ":" space number- +number-number ::= "{" space number-number-root-kv "}" space +number-number-kv ::= "\"number\"" space ":" space number-number +number-number-root-kv ::= "\"root\"" space ":" space number +root ::= "{" space number-kv "}" space +space ::= | " " | "\n" [ \t]{0,20} +""" + assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + end + + end + +end From c63942372624d7efabdfe80cc01399bc2f88627e Mon Sep 17 00:00:00 2001 From: Marc Nickert Date: Mon, 7 Oct 2024 18:53:10 +0200 Subject: [PATCH 2/3] add parser feature grammar expansion WIP --- lib/ebnf/parser.ex | 43 ++++++- lib/ebnf/parser/helpers.ex | 10 -- test/ebnf_more_test.exs | 236 +++++++++++++++++++++++++------------ 3 files changed, 196 insertions(+), 93 deletions(-) delete mode 100644 lib/ebnf/parser/helpers.ex diff --git a/lib/ebnf/parser.ex b/lib/ebnf/parser.ex index aa5bd6c..6370d01 100644 --- a/lib/ebnf/parser.ex +++ b/lib/ebnf/parser.ex @@ -2,7 +2,20 @@ defmodule EBNF.Parser do @moduledoc false import NimbleParsec - import EBNF.Parser.Helpers + + from_to = + ignore(string("{")) + |> integer(min: 1) + |> ignore(string(",")) + |> integer(min: 1) + |> ignore(string("}")) + |> tag(:from_to) + + times = + ignore(string("{")) + |> integer(min: 1) + |> ignore(string("}")) + |> tag(:times) whitespace = repeat(choice([string(" "), string("\n"), string("\t")])) @@ -25,8 +38,16 @@ defmodule EBNF.Parser do choice([ string(~S( )) |> replace(?\s), string(~S(\t)) |> replace(?\t), + string("\t") |> replace(?\t), string(~S(\n)) |> replace(?\n), + string("\n") |> replace(?\n), + string("\r") |> replace(?\r), string(~S(\\)) |> replace(?\\), + string("\\") |> replace(?\\), + string("\"") |> replace(?\"), + string("\d") |> replace(?\d), + string("\0") |> replace(?\0), + string("\x1F") |> replace(~s(\x1F)), ascii_char([?a..?z, ?A..?Z, ?0..?9, ?_, ?-, ?+, ?*, ?/]) ]) |> tag(:char) @@ -37,11 +58,20 @@ defmodule EBNF.Parser do |> concat(char) |> tag(:range) + + range_or_char = choice([range, char]) + negation = + ignore(string("^")) + |> concat(range_or_char) + |> tag(:negation) + + negation_range_or_char = choice([negation, range, char]) + character_set = ignore(string("[")) - |> times(range_or_char, min: 1) + |> times(negation_range_or_char, min: 1) |> ignore(string("]")) |> tag(:character_set) @@ -50,7 +80,7 @@ defmodule EBNF.Parser do |> tag(:terminal) identifier = - ascii_string([?a..?z, ?A..?Z, ?0..?9, ?_], min: 1) + ascii_string([?a..?z, ?A..?Z, ?0..?9, ?_, ?-], min: 1) |> tag(:identifier) grouping = @@ -63,10 +93,13 @@ defmodule EBNF.Parser do factor = choice([identifier, terminal, grouping]) - |> repeatable() + |> then(&choice([ + tag(concat(&1,choice([string("*"), string("+"), string("?"), from_to, times])),:repetition), + &1 + ])) |> ignore(repeat(choice([string(" "), string("\t")]))) - term = times(factor, min: 1) + term = times(factor, min: 0) choice_separator = ignore(repeat(string(" "))) diff --git a/lib/ebnf/parser/helpers.ex b/lib/ebnf/parser/helpers.ex deleted file mode 100644 index eaca069..0000000 --- a/lib/ebnf/parser/helpers.ex +++ /dev/null @@ -1,10 +0,0 @@ -defmodule EBNF.Parser.Helpers do - import NimbleParsec - - def repeatable(combinator) do - choice([ - tag(concat(combinator, choice([string("*"), string("+"), string("?")])), :repetition), - combinator - ]) - end -end diff --git a/test/ebnf_more_test.exs b/test/ebnf_more_test.exs index 7552a4d..83eb06c 100644 --- a/test/ebnf_more_test.exs +++ b/test/ebnf_more_test.exs @@ -41,7 +41,8 @@ defmodule EBNFMoreTest do root ::= ([0] | [1-9] [0-9]{0,15}) space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) + assert [_ | _] = parsed end test "min 1" do @@ -50,16 +51,18 @@ space ::= | " " | "\n" [ \t]{0,20} root ::= ([1-9] [0-9]{0,15}) space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) +assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "min 3" do grammar = """ root ::= ([1-2] [0-9]{1,15} | [3-9] [0-9]{0,15}) space -space ::= | " " | "\n" [ \t]{0,20} +space ::= | " " """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) +assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "min 9" do @@ -68,7 +71,8 @@ space ::= | " " | "\n" [ \t]{0,20} root ::= ([1-8] [0-9]{1,15} | [9] [0-9]{0,15}) space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "min 10" do @@ -77,7 +81,8 @@ space ::= | " " | "\n" [ \t]{0,20} root ::= ([1] ([0-9]{1,15}) | [2-9] [0-9]{1,15}) space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "min 25" do @@ -86,7 +91,8 @@ space ::= | " " | "\n" [ \t]{0,20} root ::= ([1] [0-9]{2,15} | [2] ([0-4] [0-9]{1,14} | [5-9] [0-9]{0,14}) | [3-9] [0-9]{1,15}) space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "max 30" do @@ -95,7 +101,8 @@ space ::= | " " | "\n" [ \t]{0,20} root ::= ("-" [1-9] [0-9]{0,15} | [0-9] | ([1-2] [0-9] | [3] "0")) space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "min -5" do @@ -104,7 +111,8 @@ space ::= | " " | "\n" [ \t]{0,20} root ::= ("-" ([0-5]) | [0] | [1-9] [0-9]{0,15}) space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "min -123" do @@ -113,7 +121,8 @@ space ::= | " " | "\n" [ \t]{0,20} root ::= ("-" ([0-9] | ([1-8] [0-9] | [9] [0-9]) | "1" ([0-1] [0-9] | [2] [0-3])) | [0] | [1-9] [0-9]{0,15}) space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "max -5" do @@ -122,7 +131,8 @@ space ::= | " " | "\n" [ \t]{0,20} root ::= ("-" ([0-4] [0-9]{1,15} | [5-9] [0-9]{0,15})) space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "max 1" do @@ -131,7 +141,8 @@ space ::= | " " | "\n" [ \t]{0,20} root ::= ("-" [1-9] [0-9]{0,15} | [0-1]) space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "max 100" do @@ -140,7 +151,8 @@ space ::= | " " | "\n" [ \t]{0,20} root ::= ("-" [1-9] [0-9]{0,15} | [0-9] | ([1-8] [0-9] | [9] [0-9]) | "100") space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "min 0 max 23" do @@ -149,7 +161,8 @@ space ::= | " " | "\n" [ \t]{0,20} root ::= ([0-9] | ([1] [0-9] | [2] [0-3])) space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "min 15 max 300" do @@ -158,7 +171,8 @@ space ::= | " " | "\n" [ \t]{0,20} root ::= (([1] ([5-9]) | [2-9] [0-9]) | ([1-2] [0-9]{2} | [3] "00")) space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "min 5 max 30" do @@ -167,7 +181,8 @@ space ::= | " " | "\n" [ \t]{0,20} root ::= ([5-9] | ([1-2] [0-9] | [3] "0")) space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "min -123 max 42" do @@ -176,7 +191,8 @@ space ::= | " " | "\n" [ \t]{0,20} root ::= ("-" ([0-9] | ([1-8] [0-9] | [9] [0-9]) | "1" ([0-1] [0-9] | [2] [0-3])) | [0-9] | ([1-3] [0-9] | [4] [0-2])) space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "min -10 max 10" do @@ -185,7 +201,8 @@ space ::= | " " | "\n" [ \t]{0,20} root ::= ("-" ([0-9] | "10") | [0-9] | "10") space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end @@ -205,7 +222,8 @@ space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space value ::= object | array | string | number | boolean | null """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "exotic formats" do @@ -224,7 +242,8 @@ tuple-2 ::= time-string tuple-3 ::= date-time-string uuid ::= "\"" [0-9a-fA-F]{8} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{12} "\"" space """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "string" do @@ -234,7 +253,8 @@ char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) root ::= "\"" char* "\"" space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "string w/ min length 1" do @@ -244,7 +264,8 @@ char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) root ::= "\"" char+ "\"" space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "string w/ min length 3" do @@ -254,7 +275,8 @@ char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) root ::= "\"" char{3,} "\"" space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "string w/ max length" do @@ -264,7 +286,8 @@ char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) root ::= "\"" char{0,3} "\"" space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "string w/ min & max length" do @@ -274,7 +297,8 @@ char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) root ::= "\"" char{1,4} "\"" space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "boolean" do @@ -283,7 +307,8 @@ space ::= | " " | "\n" [ \t]{0,20} root ::= ("true" | "false") space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) +assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "integer" do @@ -293,7 +318,8 @@ integral-part ::= [0] | [1-9] [0-9]{0,15} root ::= ("-"? integral-part) space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "string const" do @@ -302,7 +328,8 @@ space ::= | " " | "\n" [ \t]{0,20} root ::= "\"foo\"" space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "non-string const" do @@ -311,7 +338,8 @@ space ::= | " " | "\n" [ \t]{0,20} root ::= "123" space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "non-string enum" do @@ -320,7 +348,8 @@ space ::= | " " | "\n" [ \t]{0,20} root ::= ("\"red\"" | "\"amber\"" | "\"green\"" | "null" | "42" | "[\"foo\"]") space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "string array" do @@ -331,7 +360,8 @@ root ::= "[" space (string ("," space string)*)? "]" space space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "nullable string array" do @@ -344,7 +374,8 @@ root ::= alternative-0 | null space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "tuple1" do @@ -355,7 +386,8 @@ root ::= "[" space string "]" space space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "tuple2" do @@ -369,7 +401,8 @@ root ::= "[" space string "," space number "]" space space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "number" do @@ -380,7 +413,8 @@ integral-part ::= [0] | [1-9] [0-9]{0,15} root ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "minItems" do @@ -390,7 +424,8 @@ boolean ::= ("true" | "false") space root ::= "[" space boolean ("," space boolean)+ "]" space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "maxItems 1" do @@ -400,7 +435,8 @@ boolean ::= ("true" | "false") space root ::= "[" space boolean? "]" space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "maxItems 2" do @@ -410,7 +446,8 @@ boolean ::= ("true" | "false") space root ::= "[" space (boolean ("," space boolean)?)? "]" space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "min + maxItems" do @@ -424,7 +461,8 @@ number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? root ::= "[" space item ("," space item){2,4} "]" space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "min + max items with min + max values across zero" do @@ -434,7 +472,8 @@ item ::= ("-" ([0-9] | "1" [0-2]) | [0-9] | ([1-8] [0-9] | [9] [0-9]) | ([1] [0- root ::= "[" space item ("," space item){2,4} "]" space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "min + max items with min + max values" do @@ -444,7 +483,8 @@ item ::= (([1] ([2-9]) | [2-9] [0-9]) | ([1] [0-9]{2} | [2] "0" [0-7])) space root ::= "[" space item ("," space item){2,4} "]" space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "simple regexp" do @@ -453,26 +493,29 @@ space ::= | " " | "\n" [ \t]{0,20} root ::= "\"" "ab" "c"? "d"* "ef" "g"+ ("hij")? "kl" "\"" space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) - end - - test "regexp escapes" do - - grammar = """ -root ::= "\"" "[]{}()|+*?" "\"" space -space ::= | " " | "\n" [ \t]{0,20} -""" - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end - test "regexp quote" do - - grammar = """ -root ::= "\"" "\"" "\"" space -space ::= | " " | "\n" [ \t]{0,20} -""" - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) - end +# test "regexp escapes" do +# +# grammar = """ +#root ::= "\"" "[]{}()|+*?" "\"" space +#space ::= | " " | "\n" [ \t]{0,20} +#""" +# assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +#assert [_ | _] = parsed +# end + +# test "regexp quote" do +# +# grammar = """ +#root ::= "\"" "\"" "\"" space +#space ::= | " " | "\n" [ \t]{0,20} +#""" +# assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +#assert [_ | _] = parsed +# end test "regexp" do @@ -482,7 +525,8 @@ root ::= "\"" ("(" root-1{1,3} ")")? root-1{3,3} "-" root-1{4,4} " " "a"{3,5} "n root-1 ::= [0-9] space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "required props in original order" do @@ -496,7 +540,8 @@ root ::= "{" space b-kv "," space c-kv "," space a-kv "}" space space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "1 optional prop" do @@ -508,7 +553,8 @@ root ::= "{" space (a-kv )? "}" space space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "N optional props" do @@ -524,7 +570,8 @@ root ::= "{" space (a-kv a-rest | b-kv b-rest | c-kv )? "}" space space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "required + optional props each in original order" do @@ -540,7 +587,8 @@ root ::= "{" space b-kv "," space a-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "additional props" do @@ -556,7 +604,8 @@ root ::= "{" space (additional-kv ( "," space additional-kv )* )? "}" space space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "additional props (true)" do @@ -575,7 +624,8 @@ space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space value ::= object | array | string | number | boolean | null """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "additional props (implicit)" do @@ -594,7 +644,8 @@ space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space value ::= object | array | string | number | boolean | null """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "empty w/o additional props" do @@ -603,7 +654,8 @@ value ::= object | array | string | number | boolean | null root ::= "{" space "}" space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "required + additional props" do @@ -620,7 +672,8 @@ root ::= "{" space a-kv ( "," space ( additional-kv ( "," space additional-kv )* space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "optional + additional props" do @@ -637,7 +690,8 @@ number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? root ::= "{" space (a-kv a-rest | additional-kv ( "," space additional-kv )* )? "}" space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "required + optional + additional props" do @@ -655,7 +709,8 @@ number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? root ::= "{" space and-kv ( "," space ( also-kv also-rest | additional-kv ( "," space additional-kv )* ) )? "}" space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "optional props with empty name" do @@ -674,7 +729,8 @@ root ::= ("-"? integral-part) space root0 ::= "{" space (-kv -rest | a-kv a-rest | additional-kv ( "," space additional-kv )* )? "}" space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "optional props with nested names" do @@ -692,7 +748,8 @@ integral-part ::= [0] | [1-9] [0-9]{0,15} root ::= "{" space (a-kv a-rest | aa-kv aa-rest | additional-kv ( "," space additional-kv )* )? "}" space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "optional props with common prefix" do @@ -710,7 +767,8 @@ integral-part ::= [0] | [1-9] [0-9]{0,15} root ::= "{" space (ab-kv ab-rest | ac-kv ac-rest | additional-kv ( "," space additional-kv )* )? "}" space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "top-level $ref" do @@ -723,7 +781,8 @@ root ::= foo space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "anyOf" do @@ -741,7 +800,8 @@ number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? root ::= alternative-0 | alternative-1 space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "mix of allOf, anyOf and $ref (similar to https://json.schemastore.org/tsconfig.json)" do @@ -758,7 +818,8 @@ number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end test "conflicting names" do @@ -775,7 +836,26 @@ number-number-root-kv ::= "\"root\"" space ":" space number root ::= "{" space number-kv "}" space space ::= | " " | "\n" [ \t]{0,20} """ - assert {:ok, _parsed, _, _, _, _} = EBNF.parse(grammar) + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed + end + + test "space" do + + grammar = """ +root ::= [\t]{0,20} +""" + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed + end + + test "char regex" do + + grammar = """ +root ::= [^\"\\\d\0-\x1F] +""" + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed end end From 37fd08ff01b5f38b3b3141b306bb2feb82c005b3 Mon Sep 17 00:00:00 2001 From: Marc Nickert Date: Wed, 9 Oct 2024 20:57:14 +0200 Subject: [PATCH 3/3] add from to expansion --- lib/ebnf.ex | 39 ++++++++++++++++++++++++++++++ test/ebnf_more_test.exs | 53 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 91 insertions(+), 1 deletion(-) diff --git a/lib/ebnf.ex b/lib/ebnf.ex index ae5ccfe..541560b 100644 --- a/lib/ebnf.ex +++ b/lib/ebnf.ex @@ -136,6 +136,45 @@ defmodule EBNF do group_alternates(rest, alternates, [factor | current_group]) end + defp times(factor, times) do + 1..times//1 + |> Enum.map(fn _i -> factor end) + end + + defp expand_repetition(factor, {:times, [t]}, ids, name) do + + rule = if t > 0 do + times(factor, t) + else + {:empty, []} + end + + + next_symbol_id = map_size(ids) + next_rule_name = "#{name}_#{next_symbol_id}" + ids = Map.put(ids, next_rule_name, next_symbol_id) + + {next_rule_name, traverse_rule({:rule, [{:identifier, [next_rule_name]} | rule ]}, ids, name)} + end + + defp expand_repetition(factor, {:from_to, [from, to]}, ids, name) when from <= to do + + from_rules = if from > 0 do + times(factor, from) + else + [{:empty, []}] + end + to_rules = from+1..to//1 + |> Enum.reduce([], fn e,acc -> times(factor,e) ++ ["|" | acc] end) + next_symbol_id = map_size(ids) + next_rule_name = "#{name}_#{next_symbol_id}" + ids = Map.put(ids, next_rule_name, next_symbol_id) + + rule = [alternate: to_rules ++ from_rules] + + {next_rule_name, traverse_rule({:rule, [{:identifier, [next_rule_name]} | rule ]}, ids, name)} + end + defp expand_repetition(factor, rep, ids, name) do next_symbol_id = map_size(ids) next_rule_name = "#{name}_#{next_symbol_id}" diff --git a/test/ebnf_more_test.exs b/test/ebnf_more_test.exs index 83eb06c..5b4cbef 100644 --- a/test/ebnf_more_test.exs +++ b/test/ebnf_more_test.exs @@ -840,7 +840,7 @@ space ::= | " " | "\n" [ \t]{0,20} assert [_ | _] = parsed end - test "space" do + test "tab" do grammar = """ root ::= [\t]{0,20} @@ -849,6 +849,15 @@ root ::= [\t]{0,20} assert [_ | _] = parsed end + test "two tab" do + + grammar = """ +root ::= [\t]{2} +""" + assert {:ok, parsed, "", _, _, _} = EBNF.parse(grammar) +assert [_ | _] = parsed + end + test "char regex" do grammar = """ @@ -860,4 +869,46 @@ assert [_ | _] = parsed end + describe "expand/1" do + + test "tab{1,3}" do + + grammar = """ +root ::= [\t]{1,3} +""" +assert %ParseState{ + symbol_ids: symbol, + grammar_encoding: encoded + } = EBNF.encode(grammar) +assert [1, 10, 2, 9, 9, 2, 9, 9, 2, 9, 9, 0, 7, 2, 9, 9, 2, 9, 9, 0, 4, 2, 9, 9, 0, 0, 0, 3, 1, 1, 0, 0, 65535] = encoded +assert %{"root" => 0, "root_1" => 1} = symbol + end + + test "tab{0,3}" do + + grammar = """ +root ::= [\t]{0,3} +""" +assert %ParseState{ + symbol_ids: symbol, + grammar_encoding: encoded + } = EBNF.encode(grammar) +assert [1, 10, 2, 9, 9, 2, 9, 9, 2, 9, 9, 0, 7, 2, 9, 9, 2, 9, 9, 0, 4, 2, 9, 9, 0, 1, 0, 0, 0, 3, 1, 1, 0, 0, 65535] = encoded +assert %{"root" => 0, "root_1" => 1} = symbol + end + + test "tab{2}" do + + grammar = """ +root ::= [\t]{2} +""" +assert %ParseState{ + symbol_ids: symbol, + grammar_encoding: encoded + } = EBNF.encode(grammar) +assert [1, 7, 2, 9, 9, 2, 9, 9, 0, 0, 0, 3, 1, 1, 0, 0, 65535] = encoded +assert %{"root" => 0, "root_1" => 1} = symbol + end + end + end