diff --git a/.gitignore b/.gitignore index 5e3115d..eadab64 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,7 @@ src/parsy.egg-info docs/_build .cache +__pycache__ +.python-version +.venv +.vscode diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 87725aa..e3b0dfd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,8 +4,8 @@ repos: hooks: - id: trailing-whitespace - id: end-of-file-fixer - - repo: https://gitlab.com/pycqa/flake8 - rev: 3.8.4 + - repo: https://github.com/pycqa/flake8.git + rev: 3.9.2 hooks: - id: flake8 language_version: python3.9 diff --git a/README.rst b/README.rst index 6d90112..3b68947 100644 --- a/README.rst +++ b/README.rst @@ -13,10 +13,12 @@ incompatible** version of parsy that has strong type guarantees, with no This means removing anything that cannot be typed fully, and providing alternatives. Main changes: -* Removed ``seq``, and replaced it with ``&`` operator support which returns a 2-tuple +* Removed ``seq``, and replaced it with ``join`` which creates a 2-tuple result, and + ``append`` which takes an ``n``-tuple result and adds the result of another parser to + the end, producing an ``n+1``-tuple result. * Removed ``alt`` - you can use only ``|`` operator. -* Removed ``.combine`` and ``.combine_dict`` - you have to use ``.map`` instead, - which is type-safe but much trickier, especially once you have nested tuples. +* Removed ``.combine_dict`` - you have to use ``.map`` or ``.combine`` instead, + which is type-safe but loses the benefit of keyword sequence parsers. The docs have not been updated, you’ll need to look at the source code if you are interested. diff --git a/conftest.py b/conftest.py index dce42a2..b521adf 100644 --- a/conftest.py +++ b/conftest.py @@ -1,6 +1,7 @@ import sys +from typing import List -collect_ignore: list[str] = [] +collect_ignore: List[str] = [] if sys.version_info < (3, 7): # Python 3.6 and below don't have `dataclasses` diff --git a/docs/ref/methods_and_combinators.rst b/docs/ref/methods_and_combinators.rst index 3e37df5..69b7ab4 100644 --- a/docs/ref/methods_and_combinators.rst +++ b/docs/ref/methods_and_combinators.rst @@ -111,13 +111,12 @@ can be used and manipulated as below. Returns a parser that expects the initial parser at least ``n`` times, and produces a list of the results. - .. method:: until(other_parser, [min=0, max=inf, consume_other=False]) + .. method:: until(other_parser, [min=0, max=inf]) Returns a parser that expects the initial parser followed by ``other_parser``. The initial parser is expected at least ``min`` times and at most ``max`` times. By default, it does not consume ``other_parser`` and it produces a list of the - results excluding ``other_parser``. If ``consume_other`` is ``True`` then - ``other_parser`` is consumed and its result is included in the list of results. + results excluding ``other_parser``. .. code:: python @@ -125,8 +124,6 @@ can be used and manipulated as below. [['A','A','A'], 'BC'] >>> string('A').until(string('B')).then(string('BC')).parse('AAABC') 'BC' - >>> string('A').until(string('BC'), consume_other=True).parse('AAABC') - ['A', 'A', 'A', 'BC'] .. versionadded:: 2.0 diff --git a/examples/dataclass_parser_demo.py b/examples/dataclass_parser_demo.py new file mode 100644 index 0000000..dc436d4 --- /dev/null +++ b/examples/dataclass_parser_demo.py @@ -0,0 +1,114 @@ +from dataclasses import dataclass +from typing import List + +from parsy import dataclass_parser, parser_field, regex, string + +text = """Sample text + +A selection of students from Riverdale High and Hogwarts took part in a quiz. This is a record of their scores. + +School = Riverdale High +Grade = 1 +Student number, Name +0, Phoebe +1, Rachel + +Student number, Score +0, 3 +1, 7 + +Grade = 2 +Student number, Name +0, Angela +1, Tristan +2, Aurora + +Student number, Score +0, 6 +1, 3 +2, 9 + +School = Hogwarts +Grade = 1 +Student number, Name +0, Ginny +1, Luna + +Student number, Score +0, 8 +1, 7 + +Grade = 2 +Student number, Name +0, Harry +1, Hermione + +Student number, Score +0, 5 +1, 10 + +Grade = 3 +Student number, Name +0, Fred +1, George + +Student number, Score +0, 0 +1, 0 +""" + + +integer = regex(r"\d+").map(int) +any_text = regex(r"[^\n]+") + + +@dataclass +class Student: + number: int = parser_field(integer << string(", ")) + name: str = parser_field(any_text << string("\n")) + + +@dataclass +class Score: + number: int = parser_field(integer << string(", ")) + score: int = parser_field(integer << string("\n")) + + +@dataclass +class StudentWithScore: + name: str + number: int + score: int + + +@dataclass +class Grade: + grade: int = parser_field(string("Grade = ") >> integer << string("\n")) + students: List[Student] = parser_field( + string("Student number, Name\n") >> dataclass_parser(Student).many() << regex(r"\n*") + ) + scores: List[Score] = parser_field( + string("Student number, Score\n") >> dataclass_parser(Score).many() << regex(r"\n*") + ) + + @property + def students_with_scores(self) -> List[StudentWithScore]: + names = {st.number: st.name for st in self.students} + return [StudentWithScore(names[score.number], score.number, score.score) for score in self.scores] + + +@dataclass +class School: + name: str = parser_field(string("School = ") >> any_text << string("\n")) + grades: List[Grade] = parser_field(dataclass_parser(Grade).many()) + + +@dataclass +class File: + header: str = parser_field(regex(r"[\s\S]*?(?=School =)")) + schools: List[School] = parser_field(dataclass_parser(School).many()) + + +if __name__ == "__main__": + file = dataclass_parser(File).parse(text) + print(file.schools) diff --git a/examples/dataclass_parsing.py b/examples/dataclass_parsing.py new file mode 100644 index 0000000..00193b1 --- /dev/null +++ b/examples/dataclass_parsing.py @@ -0,0 +1,82 @@ +from dataclasses import dataclass +from typing import Optional + +from parsy import dataclass_parser, parser_field, regex, string, whitespace + + +@dataclass +class Person: + name: str = parser_field(regex(r"\w+") << whitespace) + age: int = parser_field(regex(r"\d+").map(int) << whitespace) + note: str = parser_field(regex(".+")) + + +person_parser = dataclass_parser(Person) +person = person_parser.parse("Rob 2000 how time flies") +print(person) +assert person == Person(name="Rob", age=2000, note="how time flies") + + +# Nesting dataclass parsers + + +@dataclass +class Id: + id: str = parser_field(regex(r"[^\s]+") << whitespace.optional()) + from_year: Optional[int] = parser_field( + regex("[0-9]+").map(int).desc("Numeric").optional() << whitespace.optional() + ) + + +@dataclass +class Name: + name: str = parser_field(regex(r"[a-zA-Z]+") << whitespace.optional()) + abbreviated: Optional[bool] = parser_field( + (string("T") | string("F")).map(lambda x: x == "T").optional() << whitespace.optional() + ) + + +@dataclass +class PersonDetail: + id: Id = parser_field(dataclass_parser(Id)) + forename: Name = parser_field(dataclass_parser(Name)) + surname: Optional[Name] = parser_field(dataclass_parser(Name).optional()) + + +out_parser = dataclass_parser(PersonDetail).many() + +new_person = out_parser.parse("007 2023 Rob T John 123 2004 Bob") +print(new_person) + +res = [ + PersonDetail( + id=Id(id="007", from_year=2023), + forename=Name(name="Rob", abbreviated=True), + surname=Name(name="John", abbreviated=None), + ), + PersonDetail(id=Id(id="123", from_year=2004), forename=Name(name="Bob", abbreviated=None), surname=None), +] + +# Dataclass parsing where not all fields have a parsy parser + + +@dataclass +class PersonWithRarity: + name: str = parser_field(regex(r"\w+") << whitespace) + age: int = parser_field(regex(r"\d+").map(int) << whitespace) + note: str = parser_field(regex(".+")) + rare: bool = False + + def __post_init__(self): + if self.age > 70: + self.rare = True + + +person_parser = dataclass_parser(PersonWithRarity) +person = person_parser.parse("Rob 20 whippersnapper") +print(person) +assert person == PersonWithRarity(name="Rob", age=20, note="whippersnapper", rare=False) + +person = person_parser.parse("Rob 2000 how time flies") +print(person) +assert person == PersonWithRarity(name="Rob", age=2000, note="how time flies", rare=True) diff --git a/examples/generator_typed.py b/examples/generator_typed.py new file mode 100644 index 0000000..110ed21 --- /dev/null +++ b/examples/generator_typed.py @@ -0,0 +1,39 @@ +from dataclasses import dataclass +from typing import Generator + +from parsy import Parser, generate, regex, success, whitespace + + +@dataclass +class Person: + name: str + age: int + note: str + + +def person_parser(): + @generate + def person_parser() -> Generator[Parser[str], str, Person]: + # By yielding parsers of a single type, the type system works. + # Homogeneous generator types don't exist. + name = yield regex(r"\w+") << whitespace + + # But every parser starts by matching a string anyway: other types only come + # from further function logic, which doesn't need to be part of the parser when + # using a generator: + age_text = yield regex(r"\d+") << whitespace + age = int(age_text) + if age > 20: + # Parsing depends on previously parsed values + note = yield regex(".+") >> success("Older than a score") + else: + note = yield regex(".+") + + return Person(name, age, note) + + return person_parser + + +person = person_parser().parse("Rob 21 once upon a time") + +print(person) diff --git a/examples/json.py b/examples/json.py index 7eab341..dd159a7 100644 --- a/examples/json.py +++ b/examples/json.py @@ -1,5 +1,6 @@ -from typing import TypeVar -from parsy import Parser, forward_declaration, regex, string +from typing import Dict, List, TypeVar, Union + +from parsy import Parser, ParserReference, generate, regex, string # Utilities whitespace = regex(r"\s*") @@ -39,20 +40,27 @@ def lexeme(p: Parser[T]) -> Parser[T]: quoted = lexeme(string('"') >> (string_part | string_esc).many().concat() << string('"')) # Data structures -json_value = forward_declaration() -object_pair = (quoted << colon) & json_value -json_object = lbrace >> object_pair.sep_by(comma).map(dict) << rbrace -array = lbrack >> json_value.sep_by(comma) << rbrack +JSON = Union[Dict[str, "JSON"], List["JSON"], str, int, float, bool, None] + + +@generate +def _json_parser() -> ParserReference[JSON]: + return (yield json_parser) + + +object_pair = (quoted << colon) & _json_parser +json_object = lbrace >> object_pair.sep_by(comma).map(lambda a: {g[0]: g[1] for g in a}) << rbrace +array = lbrack >> _json_parser.sep_by(comma) << rbrack # Everything -json_value.become(quoted | number | json_object | array | true | false | null) -json_doc = whitespace >> json_value +json_parser = quoted | number | json_object | array | true | false | null + +json_doc = whitespace >> json_parser def test(): - assert ( - json_doc.parse( - r""" + result = json_doc.parse( + r""" { "int": 1, "string": "hello", @@ -62,19 +70,18 @@ def test(): "other": [true, false, null] } """ - ) - == { - "int": 1, - "string": "hello", - "a list": [1, 2, 3], - "escapes": "\n ⓒ", - "nested": {"x": "y"}, - "other": [True, False, None], - } ) + print(result) + assert result == { + "int": 1, + "string": "hello", + "a list": [1, 2, 3], + "escapes": "\n ⓒ", + "nested": {"x": "y"}, + "other": [True, False, None], + } if __name__ == "__main__": - from sys import stdin - - print(repr(json_doc.parse(stdin.read()))) + test() + # print(repr(json_doc.parse(stdin.read()))) diff --git a/examples/sequence.py b/examples/sequence.py new file mode 100644 index 0000000..6dcca28 --- /dev/null +++ b/examples/sequence.py @@ -0,0 +1,24 @@ +from dataclasses import dataclass + +from parsy import regex, seq, whitespace + + +@dataclass +class Person: + name: str + age: int + note: str + + +person_arg_sequence = seq( + regex(r"\w+"), + whitespace >> regex(r"\d+").map(int), + whitespace.then(regex(r".+")), +) +person_parser = person_arg_sequence.combine(Person) + +person = person_parser.parse("Rob 1000 pretty old") + +print(person) + +assert person == Person(name="Rob", age=1000, note="pretty old") diff --git a/examples/simple_eval.py b/examples/simple_eval.py index 1debc7c..0f77bc5 100644 --- a/examples/simple_eval.py +++ b/examples/simple_eval.py @@ -1,70 +1,70 @@ -from parsy import digit, generate, match_item, regex, string, success, test_item +# from parsy import digit, generate, match_char, regex, string, success -def lexer(code): - whitespace = regex(r"\s*") - integer = digit.at_least(1).concat().map(int) - float_ = (digit.many() + string(".").result(["."]) + digit.many()).concat().map(float) - parser = whitespace >> ((float_ | integer | regex(r"[()*/+-]")) << whitespace).many() - return parser.parse(code) +# def lexer(code): +# whitespace = regex(r"\s*") +# integer = digit.at_least(1).concat().map(int) +# float_ = (digit.many() + string(".").result(["."]) + digit.many()).concat().map(float) +# parser = whitespace >> ((float_ | integer | regex(r"[()*/+-]")) << whitespace).many() +# return parser.parse(code) -def eval_tokens(tokens): - # This function parses and evaluates at the same time. +# def eval_tokens(tokens): +# # This function parses and evaluates at the same time. - lparen = match_item("(") - rparen = match_item(")") +# lparen = match_char("(") +# rparen = match_char(")") - @generate - def additive(): - res = yield multiplicative - sign = match_item("+") | match_item("-") - while True: - operation = yield sign | success("") - if not operation: - break - operand = yield multiplicative - if operation == "+": - res += operand - elif operation == "-": - res -= operand - return res +# @generate +# def additive(): +# res = yield multiplicative +# sign = match_char("+") | match_char("-") +# while True: +# operation = yield sign | success("") +# if not operation: +# break +# operand = yield multiplicative +# if operation == "+": +# res += operand +# elif operation == "-": +# res -= operand +# return res - @generate - def multiplicative(): - res = yield simple - op = match_item("*") | match_item("/") - while True: - operation = yield op | success("") - if not operation: - break - operand = yield simple - if operation == "*": - res *= operand - elif operation == "/": - res /= operand - return res +# @generate +# def multiplicative(): +# res = yield simple +# op = match_char("*") | match_char("/") +# while True: +# operation = yield op | success("") +# if not operation: +# break +# operand = yield simple +# if operation == "*": +# res *= operand +# elif operation == "/": +# res /= operand +# return res - @generate - def number(): - sign = yield match_item("+") | match_item("-") | success("+") - value = yield test_item(lambda x: isinstance(x, (int, float)), "number") - return value if sign == "+" else -value +# @generate +# def number(): +# sign = yield match_char("+") | match_char("-") | success("+") +# value = yield test_item(lambda x: isinstance(x, (int, float)), "number") +# return value if sign == "+" else -value - expr = additive - simple = (lparen >> expr << rparen) | number +# expr = additive +# simple = (lparen >> expr << rparen) | number - return expr.parse(tokens) +# return expr.parse(tokens) -def simple_eval(expr): - return eval_tokens(lexer(expr)) +# def simple_eval(expr): +# return eval_tokens(lexer(expr)) -import pytest # noqa isort:skip +# import pytest # noqa isort:skip -test_item = pytest.mark.skip(test_item) # This is not a test +# test_item = pytest.mark.skip(test_item) # This is not a test -if __name__ == "__main__": - print(simple_eval(input())) +# if __name__ == "__main__": +# print(simple_eval(input())) diff --git a/examples/simple_logo_lexer.py b/examples/simple_logo_lexer.py index 4f5ffb8..a768fdc 100644 --- a/examples/simple_logo_lexer.py +++ b/examples/simple_logo_lexer.py @@ -8,14 +8,16 @@ etc. """ -from parsy import eof, regex, string, string_from, whitespace, Parser +from dataclasses import dataclass + +from parsy import dataclass_parser, eof, parser_field, regex, string, string_from, whitespace command = string_from("fd", "bk", "rt", "lt") number = regex(r"[0-9]+").map(int) optional_whitespace = regex(r"\s*") eol = string("\n") -line = (optional_whitespace >> command) & (whitespace >> number) & (eof | eol | (whitespace >> eol)).result("\n") -lexer: Parser[list[object]] = line.many().map(lambda lines: sum(([t0, t1, t2] for ((t0, t1), t2) in lines), [])) +line = (optional_whitespace >> command).join(whitespace >> number) << (eof | eol | (whitespace >> eol)) +lexer = line.many() def test_lexer() -> None: @@ -25,5 +27,28 @@ def test_lexer() -> None: bk 2 """ ) - == ["fd", 1, "\n", "bk", 2, "\n"] + == [("fd", 1), ("bk", 2)] + ) + + +""" +Alternative which creates a more structured output +""" + + +@dataclass +class Instruction: + command: str = parser_field(optional_whitespace >> command) + distance: int = parser_field(whitespace >> number << (eof | eol | (whitespace >> eol))) + + +instruction_parser = dataclass_parser(Instruction).many() + +assert ( + instruction_parser.parse( + """fd 1 +bk 2 +""" ) + == [Instruction("fd", 1), Instruction("bk", 2)] +) diff --git a/examples/simple_logo_parser.py b/examples/simple_logo_parser.py deleted file mode 100644 index 37e9570..0000000 --- a/examples/simple_logo_parser.py +++ /dev/null @@ -1,49 +0,0 @@ -from parsy import generate, match_item, test_item - - -class Command: - def __init__(self, parameter): - self.parameter = parameter - - def __repr__(self): - return f"{self.__class__.__name__}({self.parameter})" - - -class Forward(Command): - pass - - -class Backward(Command): - pass - - -class Right(Command): - pass - - -class Left(Command): - pass - - -commands = { - "fd": Forward, - "bk": Backward, - "rt": Right, - "lt": Left, -} - - -@generate -def statement(): - cmd_name = yield test_item(lambda i: i in commands.keys(), "command") - parameter = yield test_item(lambda i: isinstance(i, int), "number") - yield match_item("\n") - return commands[cmd_name](int(parameter)) - - -program = statement.many() - - -import pytest # noqa isort:skip - -test_item = pytest.mark.skip(test_item) # This is not a test diff --git a/parsy/__init__.py b/parsy/__init__.py new file mode 100644 index 0000000..b1d70e2 --- /dev/null +++ b/parsy/__init__.py @@ -0,0 +1,784 @@ +# End-user documentation is in ../../doc/ and so is for the most part not +# duplicated here in the form of doc strings. Code comments and docstrings +# are mainly for internal use. +from __future__ import annotations + +import enum +import operator +import re +from dataclasses import Field, dataclass, field, fields +from functools import reduce, wraps +from typing import ( + Any, + Callable, + ClassVar, + Dict, + FrozenSet, + Generator, + Generic, + List, + Mapping, + Optional, + Pattern, + Tuple, + Type, + TypeVar, + Union, + cast, + overload, +) + +from typing_extensions import Literal, ParamSpec, Protocol, TypeVarTuple, Unpack + +OUT = TypeVar("OUT") +OUT1 = TypeVar("OUT1") +OUT2 = TypeVar("OUT2") +OUT3 = TypeVar("OUT3") +OUT4 = TypeVar("OUT4") +OUT5 = TypeVar("OUT5") +OUT6 = TypeVar("OUT6") +OUT_T = TypeVarTuple("OUT_T") +OUT_T2 = TypeVarTuple("OUT_T2") +OUT_co = TypeVar("OUT_co", covariant=True) +OUT2_co = TypeVar("OUT2_co", covariant=True) + +P = ParamSpec("P") + +T = TypeVar("T") +T_co = TypeVar("T_co", covariant=True) + +_T_contra = TypeVar("_T_contra", contravariant=True) + +_T_co = TypeVar("_T_co", covariant=True) + + +class SupportsAdd(Protocol[_T_contra, _T_co]): + def __add__(self, __x: _T_contra) -> _T_co: + ... + + +def noop(val: T) -> T: + return val + + +def line_info_at(stream: str, index: int) -> Tuple[int, int]: + if index > len(stream): + raise ValueError("invalid index") + line = stream.count("\n", 0, index) + last_nl = stream.rfind("\n", 0, index) + col = index - (last_nl + 1) + return (line, col) + + +# @dataclass +# class Stream: +# stream: str + +# def at_index(self, index: int): +# return memoryview(self.stream) + + +class ParseError(RuntimeError): + def __init__(self, expected: FrozenSet[str], stream: str, index: int): + self.expected: FrozenSet[str] = expected + self.stream: str = stream + self.index: int = index + + def line_info(self) -> str: + try: + return "{}:{}".format(*line_info_at(self.stream, self.index)) + except (TypeError, AttributeError): # not a str + return str(self.index) + + def __str__(self) -> str: + expected_list = sorted(repr(e) for e in self.expected) + + if len(expected_list) == 1: + return f"expected {expected_list[0]} at {self.line_info()}" + else: + return f"expected one of {', '.join(expected_list)} at {self.line_info()}" + + +@dataclass +class Result(Generic[OUT_co]): + status: bool + index: int + value: OUT_co + furthest: int + expected: FrozenSet[str] + + @staticmethod + def success(index: int, value: OUT) -> Result[OUT]: + return Result(True, index, value, -1, frozenset()) + + # We don't handle types of failures yet, and always + # either: + # - don't return these values (e.g. choose another parser) + # - raise an exception. + + # Therefore, I think it is safe here to use `Any` as type to keep type checker happy + # The same issue crops up in various branches that return parse failure results + @staticmethod + def failure(index: int, expected: str) -> Result[Any]: + return Result(False, -1, None, index, frozenset([expected])) + + # collect the furthest failure from self and other + def aggregate(self: Result[OUT], other: Optional[Result[Any]]) -> Result[OUT]: + if not other: + return self + + if self.furthest > other.furthest: + return self + elif self.furthest == other.furthest: + # if we both have the same failure index, we combine the expected messages. + return Result(self.status, self.index, self.value, self.furthest, self.expected | other.expected) + else: + return Result(self.status, self.index, self.value, other.furthest, other.expected) + + +class Parser(Generic[OUT_co]): + """ + A Parser is an object that wraps a function whose arguments are + a string to be parsed and the index on which to begin parsing. + The function should return either Result.success(next_index, value), + where the next index is where to continue the parse and the value is + the yielded value, or Result.failure(index, expected), where expected + is a string indicating what was expected, and the index is the index + of the failure. + """ + + def __init__(self, wrapped_fn: Callable[[str, int], Result[OUT_co]]): + self.wrapped_fn: Callable[[str, int], Result[OUT_co]] = wrapped_fn + + def __call__(self, stream: str, index: int) -> Result[OUT_co]: + return self.wrapped_fn(stream, index) + + def parse(self, stream: str) -> OUT_co: + """Parse a string and return the result or raise a ParseError.""" + (result, _) = (self << eof).parse_partial(stream) + return result + + def parse_partial(self, stream: str) -> Tuple[OUT_co, str]: + """ + Parse the longest possible prefix of a given string. + Return a tuple of the result and the rest of the string, + or raise a ParseError. + """ + result = self(stream, 0) + + if result.status: + return (result.value, stream[result.index :]) + else: + raise ParseError(result.expected, stream, result.furthest) + + def bind(self: Parser[OUT1], bind_fn: Callable[[OUT1], Parser[OUT2]]) -> Parser[OUT2]: + @Parser + def bound_parser(stream: str, index: int) -> Result[OUT2]: + result: Result[OUT1] = self(stream, index) + + if result.status: + next_parser = bind_fn(result.value) + return next_parser(stream, result.index).aggregate(result) + else: + return result # type: ignore + + return bound_parser + + def map(self: Parser[OUT1], map_fn: Callable[[OUT1], OUT2]) -> Parser[OUT2]: + return self.bind(lambda res: success(map_fn(res))) + + def concat(self: Parser[List[str]]) -> Parser[str]: + return self.map("".join) + + def then(self: Parser[Any], other: Parser[OUT2]) -> Parser[OUT2]: + return (self & other).map(lambda t: t[1]) + + def skip(self: Parser[OUT1], other: Parser[Any]) -> Parser[OUT1]: + return (self & other).map(lambda t: t[0]) + + def result(self: Parser[Any], res: OUT2) -> Parser[OUT2]: + return self >> success(res) + + def many(self: Parser[OUT_co]) -> Parser[List[OUT_co]]: + return self.times(0, float("inf")) + + def times(self: Parser[OUT_co], min: int, max: int | float | None = None) -> Parser[List[OUT_co]]: + the_max: int | float + if max is None: + the_max = min + else: + the_max = max + + # TODO - must execute at least once + @Parser + def times_parser(stream: str, index: int) -> Result[List[OUT_co]]: + values: List[OUT_co] = [] + times = 0 + result = None + + while times < the_max: + result = self(stream, index).aggregate(result) + if result.status: + values.append(result.value) + index = result.index + times += 1 + elif times >= min: + break + else: + return result # type: ignore + + return Result.success(index, values).aggregate(result) + + return times_parser + + def at_most(self: Parser[OUT_co], n: int) -> Parser[List[OUT_co]]: + return self.times(0, n) + + def at_least(self: Parser[OUT_co], n: int) -> Parser[List[OUT_co]]: + return self.times(min=n, max=float("inf")) + + def optional(self: Parser[OUT1], default: OUT2 = None) -> Parser[OUT1 | OUT2]: + return self.times(0, 1).map(lambda v: v[0] if v else default) + + def until( + self: Parser[OUT_co], + other: Parser[Any], + min: int = 0, + max: int | float = float("inf"), + ) -> Parser[List[OUT_co]]: + @Parser + def until_parser(stream: str, index: int) -> Result[List[OUT_co]]: + values: List[OUT_co] = [] + times = 0 + while True: + # try parser first + res = other(stream, index) + if res.status and times >= min: + return Result.success(index, values) + + # exceeded max? + if times >= max: + # return failure, it matched parser more than max times + return Result.failure(index, f"at most {max} items") + + # failed, try parser + result = self(stream, index) + if result.status: + # consume + values.append(result.value) + index = result.index + times += 1 + elif times >= min: + # return failure, parser is not followed by other + return Result.failure(index, "did not find other parser") + else: + # return failure, it did not match parser at least min times + return Result.failure(index, f"at least {min} items; got {times} item(s)") + + return until_parser + + def sep_by( + self: Parser[OUT], sep: Parser[Any], *, min: int = 0, max: int | float = float("inf") + ) -> Parser[List[OUT]]: + zero_times = success(cast(List[OUT], [])) + if max == 0: + return zero_times + + res = (self & (sep >> self).times(min - 1, max - 1)).combine(lambda first, repeats: [first, *repeats]) + if min == 0: + res = res | zero_times + return res + + def desc(self, description: str) -> Parser[OUT_co]: + @Parser + def desc_parser(stream: str, index: int) -> Result[OUT_co]: + result = self(stream, index) + if result.status: + return result + else: + return Result.failure(index, description) + + return desc_parser + + def mark(self: Parser[OUT_co]) -> Parser[Tuple[Tuple[int, int], OUT_co, Tuple[int, int]]]: + return seq(line_info, self, line_info) + + def tag(self: Parser[OUT], name: str) -> Parser[Tuple[str, OUT]]: + return self.map(lambda v: (name, v)) + + def should_fail(self: Parser[OUT], description: str) -> Parser[Result[OUT]]: + @Parser + def fail_parser(stream: str, index: int) -> Result[Result[OUT]]: + res = self(stream, index) + if res.status: + return Result.failure(index, description) + return Result.success(index, res) + + return fail_parser + + # Special cases for adding tuples + # We have to unroll each number of tuple elements for `other` because PEP-646 + # only allows one "Unpack" in a Tuple (if we could have two, the return + # type could use two Unpacks + @overload + def __add__(self: Parser[Tuple[Unpack[OUT_T]]], other: Parser[Tuple[OUT1]]) -> Parser[Tuple[Unpack[OUT_T], OUT1]]: + ... + + @overload + def __add__( + self: Parser[Tuple[Unpack[OUT_T]]], other: Parser[Tuple[OUT1, OUT2]] + ) -> Parser[Tuple[Unpack[OUT_T], OUT1, OUT2]]: + ... + + @overload + def __add__( + self: Parser[Tuple[Unpack[OUT_T]]], other: Parser[Tuple[OUT1, OUT2, OUT3]] + ) -> Parser[Tuple[Unpack[OUT_T], OUT1, OUT2, OUT3]]: + ... + + @overload + def __add__( + self: Parser[Tuple[Unpack[OUT_T]]], other: Parser[Tuple[OUT1, OUT2, OUT3, OUT4]] + ) -> Parser[Tuple[Unpack[OUT_T], OUT1, OUT2, OUT3, OUT4]]: + ... + + @overload + def __add__( + self: Parser[Tuple[Unpack[OUT_T]]], other: Parser[Tuple[OUT1, OUT2, OUT3, OUT4, OUT5]] + ) -> Parser[Tuple[Unpack[OUT_T], OUT1, OUT2, OUT3, OUT4, OUT5]]: + ... + + # This covers tuples where `other` has more elements than the above overloads + # and the `self` and `other` tuples have the same homogeneous type + @overload + def __add__(self: Parser[Tuple[OUT, ...]], other: Parser[Tuple[OUT, ...]]) -> Parser[Tuple[OUT, ...]]: + ... + + # Cover the rest of cases which can't return a homogeneous tuple + @overload + def __add__(self: Parser[Tuple[Any, ...]], other: Parser[Tuple[Any, ...]]) -> Parser[Tuple[Any, ...]]: + ... + + # Addable parsers which return the same type + @overload + def __add__(self: Parser[SupportsAdd[Any, _T_co]], other: Parser[SupportsAdd[Any, _T_co]]) -> Parser[_T_co]: + ... + + def __add__(self: Parser[Any], other: Parser[Any]) -> Parser[Any]: + return (self & other).combine(operator.add) + + def __mul__(self: Parser[OUT], other: range | int) -> Parser[List[OUT]]: + if isinstance(other, range): + return self.times(other.start, other.stop - 1) + return self.times(other) + + def __or__(self: Parser[OUT1], other: Parser[OUT2]) -> Parser[Union[OUT1, OUT2]]: + @Parser + def alt_parser(stream: str, index: int) -> Result[Union[OUT1, OUT2]]: + result0 = None + + result1 = self(stream, index).aggregate(result0) + if result1.status: + return result1 + + result2 = other(stream, index).aggregate(result1) + return result2 + + return alt_parser + + def __and__(self: Parser[OUT1], other: Parser[OUT2]) -> Parser[tuple[OUT1, OUT2]]: + @Parser + def and_parser(stream: str, index: int) -> Result[tuple[OUT1, OUT2]]: + self_result = self(stream, index) + if not self_result.status: + return self_result # type: ignore + other_result = other(stream, self_result.index).aggregate(self_result) + if not other_result.status: + return other_result # type: ignore + + return Result.success(other_result.index, (self_result.value, other_result.value)).aggregate(other_result) + + return and_parser + + def join(self: Parser[OUT1], other: Parser[OUT2]) -> Parser[tuple[OUT1, OUT2]]: + """TODO alternative name for `&`, decide on naming""" + return self & other + + def as_tuple(self: Parser[OUT]) -> Parser[Tuple[OUT]]: + return self.map(lambda value: (value,)) + + def append(self: Parser[Tuple[Unpack[OUT_T]]], other: Parser[OUT2]) -> Parser[Tuple[Unpack[OUT_T], OUT2]]: + """ + Take a parser which produces a tuple of values, and add another parser's result + to the end of that tuples + """ + return self.bind(lambda self_value: other.bind(lambda other_value: success((*self_value, other_value)))) + + def combine(self: Parser[Tuple[Unpack[OUT_T]]], combine_fn: Callable[[Unpack[OUT_T]], OUT2]) -> Parser[OUT2]: + """ + Apply ``combine_fn`` to the parser result, which must be a tuple. The result + is passed as `*args` to ``combine_fn``. + """ + return self.bind(lambda value: success(combine_fn(*value))) + + # haskelley operators, for fun # + + # >> + + def __rshift__(self, other: Parser[OUT]) -> Parser[OUT]: + return self.then(other) + + # << + def __lshift__(self, other: Parser[Any]) -> Parser[OUT_co]: + return self.skip(other) + + +# TODO: +# I think @generate is unfixable. It's not surprising, because +# we are doing something genuninely unusual with generator functions. + +# The return value of a `@generate` parser is now OK. + +# But we have no type checking within a user's @generate function. + +# The big issue is that each `val = yield parser` inside a @generate parser has +# a different type, and we'd like those to be typed checked. But the +# `Generator[...]` expects a homogeneous stream of yield and send types, +# whereas we have pairs of yield/send types which need to match within the +# pair, but each pair can be completely different from the next in the stream + + +def generate(fn: Callable[[], Generator[Parser[Any], Any, OUT]]) -> Parser[OUT]: + @Parser + @wraps(fn) + def generated(stream: str, index: int) -> Result[OUT]: + # start up the generator + iterator = fn() + + result = None + value = None + try: + while True: + next_parser = iterator.send(value) + result = next_parser(stream, index).aggregate(result) + if not result.status: + return result + value = result.value + index = result.index + except StopIteration as stop: + returnVal = stop.value + return Result.success(index, returnVal).aggregate(result) + + return generated + + +# A convenience type for defining forward references to parsers using a generator +ParserReference = Generator[Parser[T], T, T] + + +index = Parser(lambda _, index: Result.success(index, index)) +line_info = Parser(lambda stream, index: Result.success(index, line_info_at(stream, index))) + + +def success(val: OUT) -> Parser[OUT]: + return Parser(lambda _, index: Result.success(index, val)) + + +def fail(expected: str) -> Parser[None]: + return Parser(lambda _, index: Result.failure(index, expected)) + + +def string(s: str, transform: Callable[[str], str] = noop) -> Parser[str]: + slen = len(s) + transformed_s = transform(s) + + @Parser + def string_parser(stream: str, index: int) -> Result[str]: + if transform(stream[index : index + slen]) == transformed_s: + return Result.success(index + slen, s) + else: + return Result.failure(index, s) + + return string_parser + + +PatternType = Union[str, Pattern[str]] + + +@overload +def regex(pattern: PatternType, *, flags: re.RegexFlag = re.RegexFlag(0), group: Literal[0] = 0) -> Parser[str]: + ... + + +@overload +def regex(pattern: PatternType, *, flags: re.RegexFlag = re.RegexFlag(0), group: str | int) -> Parser[str]: + ... + + +@overload +def regex( + pattern: PatternType, *, flags: re.RegexFlag = re.RegexFlag(0), group: Tuple[str | int] +) -> Parser[Tuple[str]]: + ... + + +@overload +def regex( + pattern: PatternType, *, flags: re.RegexFlag = re.RegexFlag(0), group: Tuple[str | int, str | int] +) -> Parser[Tuple[str, str]]: + ... + + +@overload +def regex( + pattern: PatternType, *, flags: re.RegexFlag = re.RegexFlag(0), group: Tuple[str | int, str | int, str | int] +) -> Parser[Tuple[str, str, str]]: + ... + + +@overload +def regex( + pattern: PatternType, + *, + flags: re.RegexFlag = re.RegexFlag(0), + group: Tuple[str | int, str | int, str | int, str | int], +) -> Parser[Tuple[str, str, str, str]]: + ... + + +@overload +def regex( + pattern: PatternType, + *, + flags: re.RegexFlag = re.RegexFlag(0), + group: Tuple[str | int, str | int, str | int, str | int, str | int], +) -> Parser[Tuple[str, str, str, str, str]]: + ... + + +def regex( + pattern: PatternType, + *, + flags: re.RegexFlag = re.RegexFlag(0), + group: str | int | Tuple[str | int, ...] = 0, +) -> Parser[str | Tuple[str, ...]]: + if isinstance(pattern, str): + exp = re.compile(pattern, flags) + else: + exp = pattern + + if isinstance(group, tuple) and len(group) >= 2: + first_group, second_group, *groups = group + + @Parser + def regex_parser_tuple(stream: str, index: int) -> Result[Tuple[str, ...]]: + match = exp.match(stream, index) + if match: + return Result.success(match.end(), match.group(first_group, second_group, *groups)) + else: + return Result.failure(index, exp.pattern) + + return regex_parser_tuple + + if isinstance(group, tuple) and len(group) == 1: + target_group = group[0] + elif isinstance(group, tuple): + target_group = 0 + else: + target_group = group + + @Parser + def regex_parser(stream: str, index: int) -> Result[str]: + match = exp.match(stream, index) + if match: + return Result.success(match.end(), match.group(target_group)) + else: + return Result.failure(index, exp.pattern) + + return regex_parser + + +# Each number of args needs to be typed separately +@overload +def seq( + __parser_1: Parser[OUT1], + __parser_2: Parser[OUT2], + __parser_3: Parser[OUT3], + __parser_4: Parser[OUT4], + __parser_5: Parser[OUT5], + __parser_6: Parser[OUT6], +) -> Parser[Tuple[OUT1, OUT2, OUT3, OUT4, OUT5, OUT6]]: + ... + + +@overload +def seq( + __parser_1: Parser[OUT1], + __parser_2: Parser[OUT2], + __parser_3: Parser[OUT3], + __parser_4: Parser[OUT4], + __parser_5: Parser[OUT5], +) -> Parser[Tuple[OUT1, OUT2, OUT3, OUT4, OUT5]]: + ... + + +@overload +def seq( + __parser_1: Parser[OUT1], __parser_2: Parser[OUT2], __parser_3: Parser[OUT3], __parser_4: Parser[OUT4] +) -> Parser[Tuple[OUT1, OUT2, OUT3, OUT4]]: + ... + + +@overload +def seq( + __parser_1: Parser[OUT1], __parser_2: Parser[OUT2], __parser_3: Parser[OUT3] +) -> Parser[Tuple[OUT1, OUT2, OUT3]]: + ... + + +@overload +def seq(__parser_1: Parser[OUT1], __parser_2: Parser[OUT2]) -> Parser[Tuple[OUT1, OUT2]]: + ... + + +@overload +def seq(__parser_1: Parser[OUT1]) -> Parser[Tuple[OUT1]]: + ... + + +@overload +def seq(*parsers: Parser[Any]) -> Parser[Tuple[Any, ...]]: + ... + + +def seq(*parsers: Parser[Any]) -> Parser[Tuple[Any, ...]]: + if not parsers: + raise ValueError() + first, *remainder = parsers + parser = first.as_tuple() + for p in remainder: + parser = parser.append(p) # type: ignore + return parser + + +# TODO the rest of the functions here need type annotations. + +# One problem is that `test_item` and `match_item` are assumning that the input +# type might not be str, but arbitrary types, including heterogeneous +# lists. We have no generic parameter for the input stream type +# yet, for simplicity. + + +def test_char(func: Callable[[str], bool], description: str) -> Parser[str]: + @Parser + def test_char_parser(stream: str, index: int) -> Result[str]: + if index < len(stream): + if func(stream[index]): + return Result.success(index + 1, stream[index]) + return Result.failure(index, description) + + return test_char_parser + + +def match_char(char: str, description: Optional[str] = None) -> Parser[str]: + if description is None: + description = char + return test_char(lambda i: char == i, description) + + +def string_from(*strings: str, transform: Callable[[str], str] = noop) -> Parser[str]: + # Sort longest first, so that overlapping options work correctly + return reduce(operator.or_, [string(s, transform) for s in sorted(strings, key=len, reverse=True)]) + + +# TODO drop bytes support here +def char_from(string: str) -> Parser[str]: + return test_char(lambda c: c in string, "[" + string + "]") + + +def peek(parser: Parser[OUT]) -> Parser[OUT]: + @Parser + def peek_parser(stream: str, index: int) -> Result[OUT]: + result = parser(stream, index) + if result.status: + return Result.success(index, result.value) + else: + return result + + return peek_parser + + +any_char = test_char(lambda c: True, "any character") + +whitespace = regex(r"\s+") + +letter = test_char(lambda c: c.isalpha(), "a letter") + +digit = test_char(lambda c: c.isdigit(), "a digit") + +decimal_digit = char_from("0123456789") + + +@Parser +def eof(stream: str, index: int) -> Result[None]: + if index >= len(stream): + return Result.success(index, None) + else: + return Result.failure(index, "EOF") + + +E = TypeVar("E", bound=enum.Enum) + + +def from_enum(enum_cls: type[E], transform: Callable[[str], str] = noop) -> Parser[E]: + items = sorted( + ((str(enum_item.value), enum_item) for enum_item in enum_cls), key=lambda t: len(t[0]), reverse=True + ) + return reduce(operator.or_, [string(value, transform=transform).result(enum_item) for value, enum_item in items]) + + +# Dataclass parsers + + +def parser_field( + parser: Parser[OUT], + *, + default: OUT = ..., + init: bool = ..., + repr: bool = ..., + hash: Union[bool, None] = ..., + compare: bool = ..., + metadata: Mapping[Any, Any] = ..., +) -> OUT: + if metadata is Ellipsis: + metadata = {} + return field( + default=default, init=init, repr=repr, hash=hash, compare=compare, metadata={**metadata, "parser": parser} + ) + + +class DataClassProtocol(Protocol): + __dataclass_fields__: ClassVar[Dict[str, Field[Any]]] + __init__: Callable[..., None] + + +OUT_D = TypeVar("OUT_D", bound=DataClassProtocol) + + +def dataclass_parser(datatype: Type[OUT_D]) -> Parser[OUT_D]: + @Parser + def data_parser(stream: str, index: int) -> Result[OUT_D]: + parsed_fields: Dict[str, Any] = {} + for dataclass_field in fields(datatype): + if "parser" not in dataclass_field.metadata: + continue + parser: Parser[Any] = dataclass_field.metadata["parser"] + result = parser(stream, index) + if not result.status: + return result # type: ignore + index = result.index + parsed_fields[dataclass_field.name] = result.value + + return Result.success(index, datatype(**parsed_fields)) + + return data_parser diff --git a/src/parsy/py.typed b/parsy/py.typed similarity index 100% rename from src/parsy/py.typed rename to parsy/py.typed diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..67d12e9 --- /dev/null +++ b/poetry.lock @@ -0,0 +1,353 @@ +[[package]] +name = "black" +version = "23.3.0" +description = "The uncompromising code formatter." +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +click = ">=8.0.0" +mypy-extensions = ">=0.4.3" +packaging = ">=22.0" +pathspec = ">=0.9.0" +platformdirs = ">=2" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typed-ast = {version = ">=1.4.2", markers = "python_version < \"3.8\" and implementation_name == \"cpython\""} +typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} + +[package.extras] +colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.7.4)"] +jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] +uvloop = ["uvloop (>=0.15.2)"] + +[[package]] +name = "click" +version = "8.1.3" +description = "Composable command line interface toolkit" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} +importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" + +[[package]] +name = "exceptiongroup" +version = "1.1.1" +description = "Backport of PEP 654 (exception groups)" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "importlib-metadata" +version = "6.6.0" +description = "Read metadata from Python packages" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} +zipp = ">=0.5" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +perf = ["ipython"] +testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +category = "dev" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "mypy" +version = "1.3.0" +description = "Optional static typing for Python" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +mypy-extensions = ">=1.0.0" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typed-ast = {version = ">=1.4.0,<2", markers = "python_version < \"3.8\""} +typing-extensions = ">=3.10" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +install-types = ["pip"] +python2 = ["typed-ast (>=1.4.0,<2)"] +reports = ["lxml"] + +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +category = "dev" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "packaging" +version = "23.1" +description = "Core utilities for Python packages" +category = "dev" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "pathspec" +version = "0.11.1" +description = "Utility library for gitignore style pattern matching of file paths." +category = "dev" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "platformdirs" +version = "3.5.1" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +typing-extensions = {version = ">=4.5", markers = "python_version < \"3.8\""} + +[package.extras] +docs = ["furo (>=2023.3.27)", "proselint (>=0.13)", "sphinx (>=6.2.1)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"] + +[[package]] +name = "pluggy" +version = "1.0.0" +description = "plugin and hook calling mechanisms for python" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pytest" +version = "7.3.1" +description = "pytest: simple powerful testing with Python" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +category = "dev" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "typed-ast" +version = "1.5.4" +description = "a fork of Python 2 and 3 ast modules with type comment support" +category = "dev" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "typing-extensions" +version = "4.5.0" +description = "Backported and Experimental Type Hints for Python 3.7+" +category = "main" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "zipp" +version = "3.15.0" +description = "Backport of pathlib-compatible object wrapper for zip files" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] + +[metadata] +lock-version = "1.1" +python-versions = "^3.7" +content-hash = "3305bba6acd3af12565b1a45daa6ba063b24fc0ac09cfdbf79602a0201497f21" + +[metadata.files] +black = [ + {file = "black-23.3.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:0945e13506be58bf7db93ee5853243eb368ace1c08a24c65ce108986eac65915"}, + {file = "black-23.3.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:67de8d0c209eb5b330cce2469503de11bca4085880d62f1628bd9972cc3366b9"}, + {file = "black-23.3.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:7c3eb7cea23904399866c55826b31c1f55bbcd3890ce22ff70466b907b6775c2"}, + {file = "black-23.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32daa9783106c28815d05b724238e30718f34155653d4d6e125dc7daec8e260c"}, + {file = "black-23.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:35d1381d7a22cc5b2be2f72c7dfdae4072a3336060635718cc7e1ede24221d6c"}, + {file = "black-23.3.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:a8a968125d0a6a404842fa1bf0b349a568634f856aa08ffaff40ae0dfa52e7c6"}, + {file = "black-23.3.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:c7ab5790333c448903c4b721b59c0d80b11fe5e9803d8703e84dcb8da56fec1b"}, + {file = "black-23.3.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:a6f6886c9869d4daae2d1715ce34a19bbc4b95006d20ed785ca00fa03cba312d"}, + {file = "black-23.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f3c333ea1dd6771b2d3777482429864f8e258899f6ff05826c3a4fcc5ce3f70"}, + {file = "black-23.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:11c410f71b876f961d1de77b9699ad19f939094c3a677323f43d7a29855fe326"}, + {file = "black-23.3.0-cp37-cp37m-macosx_10_16_x86_64.whl", hash = "sha256:1d06691f1eb8de91cd1b322f21e3bfc9efe0c7ca1f0e1eb1db44ea367dff656b"}, + {file = "black-23.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50cb33cac881766a5cd9913e10ff75b1e8eb71babf4c7104f2e9c52da1fb7de2"}, + {file = "black-23.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:e114420bf26b90d4b9daa597351337762b63039752bdf72bf361364c1aa05925"}, + {file = "black-23.3.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:48f9d345675bb7fbc3dd85821b12487e1b9a75242028adad0333ce36ed2a6d27"}, + {file = "black-23.3.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:714290490c18fb0126baa0fca0a54ee795f7502b44177e1ce7624ba1c00f2331"}, + {file = "black-23.3.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:064101748afa12ad2291c2b91c960be28b817c0c7eaa35bec09cc63aa56493c5"}, + {file = "black-23.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:562bd3a70495facf56814293149e51aa1be9931567474993c7942ff7d3533961"}, + {file = "black-23.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:e198cf27888ad6f4ff331ca1c48ffc038848ea9f031a3b40ba36aced7e22f2c8"}, + {file = "black-23.3.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:3238f2aacf827d18d26db07524e44741233ae09a584273aa059066d644ca7b30"}, + {file = "black-23.3.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:f0bd2f4a58d6666500542b26354978218a9babcdc972722f4bf90779524515f3"}, + {file = "black-23.3.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:92c543f6854c28a3c7f39f4d9b7694f9a6eb9d3c5e2ece488c327b6e7ea9b266"}, + {file = "black-23.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a150542a204124ed00683f0db1f5cf1c2aaaa9cc3495b7a3b5976fb136090ab"}, + {file = "black-23.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:6b39abdfb402002b8a7d030ccc85cf5afff64ee90fa4c5aebc531e3ad0175ddb"}, + {file = "black-23.3.0-py3-none-any.whl", hash = "sha256:ec751418022185b0c1bb7d7736e6933d40bbb14c14a0abcf9123d1b159f98dd4"}, + {file = "black-23.3.0.tar.gz", hash = "sha256:1c7b8d606e728a41ea1ccbd7264677e494e87cf630e399262ced92d4a8dac940"}, +] +click = [ + {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, + {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, +] +colorama = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] +exceptiongroup = [ + {file = "exceptiongroup-1.1.1-py3-none-any.whl", hash = "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e"}, + {file = "exceptiongroup-1.1.1.tar.gz", hash = "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"}, +] +importlib-metadata = [ + {file = "importlib_metadata-6.6.0-py3-none-any.whl", hash = "sha256:43dd286a2cd8995d5eaef7fee2066340423b818ed3fd70adf0bad5f1fac53fed"}, + {file = "importlib_metadata-6.6.0.tar.gz", hash = "sha256:92501cdf9cc66ebd3e612f1b4f0c0765dfa42f0fa38ffb319b6bd84dd675d705"}, +] +iniconfig = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] +mypy = [ + {file = "mypy-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c1eb485cea53f4f5284e5baf92902cd0088b24984f4209e25981cc359d64448d"}, + {file = "mypy-1.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4c99c3ecf223cf2952638da9cd82793d8f3c0c5fa8b6ae2b2d9ed1e1ff51ba85"}, + {file = "mypy-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:550a8b3a19bb6589679a7c3c31f64312e7ff482a816c96e0cecec9ad3a7564dd"}, + {file = "mypy-1.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cbc07246253b9e3d7d74c9ff948cd0fd7a71afcc2b77c7f0a59c26e9395cb152"}, + {file = "mypy-1.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:a22435632710a4fcf8acf86cbd0d69f68ac389a3892cb23fbad176d1cddaf228"}, + {file = "mypy-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6e33bb8b2613614a33dff70565f4c803f889ebd2f859466e42b46e1df76018dd"}, + {file = "mypy-1.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7d23370d2a6b7a71dc65d1266f9a34e4cde9e8e21511322415db4b26f46f6b8c"}, + {file = "mypy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:658fe7b674769a0770d4b26cb4d6f005e88a442fe82446f020be8e5f5efb2fae"}, + {file = "mypy-1.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6e42d29e324cdda61daaec2336c42512e59c7c375340bd202efa1fe0f7b8f8ca"}, + {file = "mypy-1.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:d0b6c62206e04061e27009481cb0ec966f7d6172b5b936f3ead3d74f29fe3dcf"}, + {file = "mypy-1.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:76ec771e2342f1b558c36d49900dfe81d140361dd0d2df6cd71b3db1be155409"}, + {file = "mypy-1.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebc95f8386314272bbc817026f8ce8f4f0d2ef7ae44f947c4664efac9adec929"}, + {file = "mypy-1.3.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:faff86aa10c1aa4a10e1a301de160f3d8fc8703b88c7e98de46b531ff1276a9a"}, + {file = "mypy-1.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:8c5979d0deb27e0f4479bee18ea0f83732a893e81b78e62e2dda3e7e518c92ee"}, + {file = "mypy-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c5d2cc54175bab47011b09688b418db71403aefad07cbcd62d44010543fc143f"}, + {file = "mypy-1.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:87df44954c31d86df96c8bd6e80dfcd773473e877ac6176a8e29898bfb3501cb"}, + {file = "mypy-1.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:473117e310febe632ddf10e745a355714e771ffe534f06db40702775056614c4"}, + {file = "mypy-1.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:74bc9b6e0e79808bf8678d7678b2ae3736ea72d56eede3820bd3849823e7f305"}, + {file = "mypy-1.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:44797d031a41516fcf5cbfa652265bb994e53e51994c1bd649ffcd0c3a7eccbf"}, + {file = "mypy-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ddae0f39ca146972ff6bb4399f3b2943884a774b8771ea0a8f50e971f5ea5ba8"}, + {file = "mypy-1.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1c4c42c60a8103ead4c1c060ac3cdd3ff01e18fddce6f1016e08939647a0e703"}, + {file = "mypy-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e86c2c6852f62f8f2b24cb7a613ebe8e0c7dc1402c61d36a609174f63e0ff017"}, + {file = "mypy-1.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f9dca1e257d4cc129517779226753dbefb4f2266c4eaad610fc15c6a7e14283e"}, + {file = "mypy-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:95d8d31a7713510685b05fbb18d6ac287a56c8f6554d88c19e73f724a445448a"}, + {file = "mypy-1.3.0-py3-none-any.whl", hash = "sha256:a8763e72d5d9574d45ce5881962bc8e9046bf7b375b0abf031f3e6811732a897"}, + {file = "mypy-1.3.0.tar.gz", hash = "sha256:e1f4d16e296f5135624b34e8fb741eb0eadedca90862405b1f1fde2040b9bd11"}, +] +mypy-extensions = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] +packaging = [ + {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"}, + {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, +] +pathspec = [ + {file = "pathspec-0.11.1-py3-none-any.whl", hash = "sha256:d8af70af76652554bd134c22b3e8a1cc46ed7d91edcdd721ef1a0c51a84a5293"}, + {file = "pathspec-0.11.1.tar.gz", hash = "sha256:2798de800fa92780e33acca925945e9a19a133b715067cf165b8866c15a31687"}, +] +platformdirs = [ + {file = "platformdirs-3.5.1-py3-none-any.whl", hash = "sha256:e2378146f1964972c03c085bb5662ae80b2b8c06226c54b2ff4aa9483e8a13a5"}, + {file = "platformdirs-3.5.1.tar.gz", hash = "sha256:412dae91f52a6f84830f39a8078cecd0e866cb72294a5c66808e74d5e88d251f"}, +] +pluggy = [ + {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, + {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, +] +pytest = [ + {file = "pytest-7.3.1-py3-none-any.whl", hash = "sha256:3799fa815351fea3a5e96ac7e503a96fa51cc9942c3753cda7651b93c1cfa362"}, + {file = "pytest-7.3.1.tar.gz", hash = "sha256:434afafd78b1d78ed0addf160ad2b77a30d35d4bdf8af234fe621919d9ed15e3"}, +] +tomli = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] +typed-ast = [ + {file = "typed_ast-1.5.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:669dd0c4167f6f2cd9f57041e03c3c2ebf9063d0757dc89f79ba1daa2bfca9d4"}, + {file = "typed_ast-1.5.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:211260621ab1cd7324e0798d6be953d00b74e0428382991adfddb352252f1d62"}, + {file = "typed_ast-1.5.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:267e3f78697a6c00c689c03db4876dd1efdfea2f251a5ad6555e82a26847b4ac"}, + {file = "typed_ast-1.5.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c542eeda69212fa10a7ada75e668876fdec5f856cd3d06829e6aa64ad17c8dfe"}, + {file = "typed_ast-1.5.4-cp310-cp310-win_amd64.whl", hash = "sha256:a9916d2bb8865f973824fb47436fa45e1ebf2efd920f2b9f99342cb7fab93f72"}, + {file = "typed_ast-1.5.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:79b1e0869db7c830ba6a981d58711c88b6677506e648496b1f64ac7d15633aec"}, + {file = "typed_ast-1.5.4-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a94d55d142c9265f4ea46fab70977a1944ecae359ae867397757d836ea5a3f47"}, + {file = "typed_ast-1.5.4-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:183afdf0ec5b1b211724dfef3d2cad2d767cbefac291f24d69b00546c1837fb6"}, + {file = "typed_ast-1.5.4-cp36-cp36m-win_amd64.whl", hash = "sha256:639c5f0b21776605dd6c9dbe592d5228f021404dafd377e2b7ac046b0349b1a1"}, + {file = "typed_ast-1.5.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cf4afcfac006ece570e32d6fa90ab74a17245b83dfd6655a6f68568098345ff6"}, + {file = "typed_ast-1.5.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed855bbe3eb3715fca349c80174cfcfd699c2f9de574d40527b8429acae23a66"}, + {file = "typed_ast-1.5.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:6778e1b2f81dfc7bc58e4b259363b83d2e509a65198e85d5700dfae4c6c8ff1c"}, + {file = "typed_ast-1.5.4-cp37-cp37m-win_amd64.whl", hash = "sha256:0261195c2062caf107831e92a76764c81227dae162c4f75192c0d489faf751a2"}, + {file = "typed_ast-1.5.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2efae9db7a8c05ad5547d522e7dbe62c83d838d3906a3716d1478b6c1d61388d"}, + {file = "typed_ast-1.5.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7d5d014b7daa8b0bf2eaef684295acae12b036d79f54178b92a2b6a56f92278f"}, + {file = "typed_ast-1.5.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:370788a63915e82fd6f212865a596a0fefcbb7d408bbbb13dea723d971ed8bdc"}, + {file = "typed_ast-1.5.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4e964b4ff86550a7a7d56345c7864b18f403f5bd7380edf44a3c1fb4ee7ac6c6"}, + {file = "typed_ast-1.5.4-cp38-cp38-win_amd64.whl", hash = "sha256:683407d92dc953c8a7347119596f0b0e6c55eb98ebebd9b23437501b28dcbb8e"}, + {file = "typed_ast-1.5.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4879da6c9b73443f97e731b617184a596ac1235fe91f98d279a7af36c796da35"}, + {file = "typed_ast-1.5.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3e123d878ba170397916557d31c8f589951e353cc95fb7f24f6bb69adc1a8a97"}, + {file = "typed_ast-1.5.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebd9d7f80ccf7a82ac5f88c521115cc55d84e35bf8b446fcd7836eb6b98929a3"}, + {file = "typed_ast-1.5.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98f80dee3c03455e92796b58b98ff6ca0b2a6f652120c263efdba4d6c5e58f72"}, + {file = "typed_ast-1.5.4-cp39-cp39-win_amd64.whl", hash = "sha256:0fdbcf2fef0ca421a3f5912555804296f0b0960f0418c440f5d6d3abb549f3e1"}, + {file = "typed_ast-1.5.4.tar.gz", hash = "sha256:39e21ceb7388e4bb37f4c679d72707ed46c2fbf2a5609b8b8ebc4b067d977df2"}, +] +typing-extensions = [ + {file = "typing_extensions-4.5.0-py3-none-any.whl", hash = "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"}, + {file = "typing_extensions-4.5.0.tar.gz", hash = "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb"}, +] +zipp = [ + {file = "zipp-3.15.0-py3-none-any.whl", hash = "sha256:48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556"}, + {file = "zipp-3.15.0.tar.gz", hash = "sha256:112929ad649da941c23de50f356a2b5570c954b65150642bccdd66bf194d224b"}, +] diff --git a/pyproject.toml b/pyproject.toml index 366edb3..44343a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,3 +10,24 @@ default_section = "THIRDPARTY" skip = [".tox", ".git", "docs", "dist", "build" , "todo"] known_first_party = "parsy" +[tool.poetry] +name = "parsy" +version = "0.1.0" +description = "" +authors = ["Your Name "] +readme = "README.rst" +packages = [{include = "parsy"}] + +[tool.poetry.dependencies] +python = "^3.7" +typing-extensions = "^4.5.0" + + +[tool.poetry.group.dev.dependencies] +pytest = "^7.3.1" +mypy = "^1.3.0" +black = "^23.3.0" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/setup.py b/setup.py index bff016b..a5281d1 100755 --- a/setup.py +++ b/setup.py @@ -42,4 +42,5 @@ keywords="parser parsers parsing monad combinators", packages=find_packages("src"), package_dir={"": "src"}, + install_requires=["typing-extensions"], ) diff --git a/src/parsy/__init__.py b/src/parsy/__init__.py deleted file mode 100644 index 30865dc..0000000 --- a/src/parsy/__init__.py +++ /dev/null @@ -1,541 +0,0 @@ -# End-user documentation is in ../../doc/ and so is for the most part not -# duplicated here in the form of doc strings. Code comments and docstrings -# are mainly for internal use. -from __future__ import annotations - -import operator -import enum - -import re -from dataclasses import dataclass -from functools import reduce, wraps -from typing import Any, Callable, FrozenSet, Generator, Generic, Optional, TypeVar, Union - - -from .version import __version__ # noqa: F401 - - -OUT = TypeVar("OUT") -OUT1 = TypeVar("OUT1") -OUT2 = TypeVar("OUT2") -OUT_co = TypeVar("OUT_co", covariant=True) - - -T = TypeVar("T") - - -def noop(val: T) -> T: - return val - - -def line_info_at(stream: str, index: int) -> tuple[int, int]: - if index > len(stream): - raise ValueError("invalid index") - line = stream.count("\n", 0, index) - last_nl = stream.rfind("\n", 0, index) - col = index - (last_nl + 1) - return (line, col) - - -class ParseError(RuntimeError): - def __init__(self, expected: FrozenSet[str], stream: str, index: int): - self.expected: FrozenSet[str] = expected - self.stream: str = stream - self.index: int = index - - def line_info(self) -> str: - try: - return "{}:{}".format(*line_info_at(self.stream, self.index)) - except (TypeError, AttributeError): # not a str - return str(self.index) - - def __str__(self) -> str: - expected_list = sorted(repr(e) for e in self.expected) - - if len(expected_list) == 1: - return f"expected {expected_list[0]} at {self.line_info()}" - else: - return f"expected one of {', '.join(expected_list)} at {self.line_info()}" - - -@dataclass -class Result(Generic[OUT_co]): - status: bool - index: int - value: OUT_co - furthest: int - expected: FrozenSet[str] - - @staticmethod - def success(index: int, value: OUT) -> Result[OUT]: - return Result(True, index, value, -1, frozenset()) - - # We don't handle types of failures yet, and always - # either: - # - don't return these values (e.g. choose another parser) - # - raise an exception. - - # Therefore, I think it is safe here to use `Any` as type to keep type checker happy - # The same issue crops up in various branches that return parse failure results - @staticmethod - def failure(index: int, expected: str) -> Result[Any]: - return Result(False, -1, None, index, frozenset([expected])) - - # collect the furthest failure from self and other - def aggregate(self: Result[OUT], other: Optional[Result[Any]]) -> Result[OUT]: - if not other: - return self - - if self.furthest > other.furthest: - return self - elif self.furthest == other.furthest: - # if we both have the same failure index, we combine the expected messages. - return Result(self.status, self.index, self.value, self.furthest, self.expected | other.expected) - else: - return Result(self.status, self.index, self.value, other.furthest, other.expected) - - -class Parser(Generic[OUT]): - """ - A Parser is an object that wraps a function whose arguments are - a string to be parsed and the index on which to begin parsing. - The function should return either Result.success(next_index, value), - where the next index is where to continue the parse and the value is - the yielded value, or Result.failure(index, expected), where expected - is a string indicating what was expected, and the index is the index - of the failure. - """ - - def __init__(self, wrapped_fn: Callable[[str, int], Result[OUT]]): - self.wrapped_fn: Callable[[str, int], Result[OUT]] = wrapped_fn - - def __call__(self, stream: str, index: int) -> Result[OUT]: - return self.wrapped_fn(stream, index) - - def parse(self, stream: str) -> OUT: - """Parse a string and return the result or raise a ParseError.""" - (result, _) = (self << eof).parse_partial(stream) - return result - - def parse_partial(self, stream: str) -> tuple[OUT, str]: - """ - Parse the longest possible prefix of a given string. - Return a tuple of the result and the rest of the string, - or raise a ParseError. - """ - result = self(stream, 0) - - if result.status: - return (result.value, stream[result.index :]) - else: - raise ParseError(result.expected, stream, result.furthest) - - def bind(self: Parser[OUT1], bind_fn: Callable[[OUT1], Parser[OUT2]]) -> Parser[OUT2]: - @Parser - def bound_parser(stream: str, index: int) -> Result[OUT2]: - result: Result[OUT1] = self(stream, index) - - if result.status: - next_parser = bind_fn(result.value) - return next_parser(stream, result.index).aggregate(result) - else: - return result # type: ignore - - return bound_parser - - def map(self: Parser[OUT1], map_fn: Callable[[OUT1], OUT2]) -> Parser[OUT2]: - return self.bind(lambda res: success(map_fn(res))) - - def concat(self: Parser[list[str]]) -> Parser[str]: - return self.map("".join) - - def then(self: Parser, other: Parser[OUT2]) -> Parser[OUT2]: - return (self & other).map(lambda t: t[1]) - - def skip(self: Parser[OUT1], other: Parser) -> Parser[OUT1]: - return (self & other).map(lambda t: t[0]) - - def result(self: Parser, res: OUT2) -> Parser[OUT2]: - return self >> success(res) - - def many(self: Parser[OUT]) -> Parser[list[OUT]]: - return self.times(0, float("inf")) - - def times(self: Parser[OUT], min: int, max: int | float | None = None) -> Parser[list[OUT]]: - the_max: int | float - if max is None: - the_max = min - else: - the_max = max - - # TODO - must execute at least once - @Parser - def times_parser(stream: str, index: int) -> Result[list[OUT]]: - values: list[OUT] = [] - times = 0 - result = None - - while times < the_max: - result = self(stream, index).aggregate(result) - if result.status: - values.append(result.value) - index = result.index - times += 1 - elif times >= min: - break - else: - return result # type: ignore - - return Result.success(index, values).aggregate(result) - - return times_parser - - def at_most(self: Parser[OUT], n: int) -> Parser[list[OUT]]: - return self.times(0, n) - - def at_least(self: Parser[OUT], n: int) -> Parser[list[OUT]]: - # TODO: I cannot for the life of me work out why mypy rejects the following. - # Pyright does not reject it. - return (self.times(n) & self.many()).map(lambda t: t[0] + t[1]) - - # TODO overloads to distinguish calling with and without default - def optional(self: Parser[OUT1], default: OUT2 | None = None) -> Parser[OUT1 | OUT2 | None]: - return self.times(0, 1).map(lambda v: v[0] if v else default) - - def until( - self: Parser[OUT], - other: Parser[OUT], - min: int = 0, - max: int | float = float("inf"), - consume_other: bool = False, - ) -> Parser[list[OUT]]: - @Parser - def until_parser(stream: str, index: int) -> Result[list[OUT]]: - values = [] - times = 0 - while True: - - # try parser first - res = other(stream, index) - if res.status and times >= min: - if consume_other: - # consume other - values.append(res.value) - index = res.index - return Result.success(index, values) - - # exceeded max? - if times >= max: - # return failure, it matched parser more than max times - return Result.failure(index, f"at most {max} items") - - # failed, try parser - result = self(stream, index) - if result.status: - # consume - values.append(result.value) - index = result.index - times += 1 - elif times >= min: - # return failure, parser is not followed by other - return Result.failure(index, "did not find other parser") - else: - # return failure, it did not match parser at least min times - return Result.failure(index, f"at least {min} items; got {times} item(s)") - - return until_parser - - def sep_by(self: Parser[OUT], sep: Parser, *, min: int = 0, max: int | float = float("inf")) -> Parser[list[OUT]]: - zero_times: Parser[list[OUT]] = success([]) - if max == 0: - return zero_times - res = (self.times(1) & (sep >> self).times(min - 1, max - 1)).map(lambda t: t[0] + t[1]) - if min == 0: - res |= zero_times - return res - - def desc(self, description: str) -> Parser[OUT]: - @Parser - def desc_parser(stream: str, index: int) -> Result[OUT]: - result = self(stream, index) - if result.status: - return result - else: - return Result.failure(index, description) - - return desc_parser - - def mark(self): - @generate - def marked(): - start = yield line_info - body = yield self - end = yield line_info - return (start, body, end) - - return marked - - def tag(self, name): - return self.map(lambda v: (name, v)) - - def should_fail(self, description): - @Parser - def fail_parser(stream, index): - res = self(stream, index) - if res.status: - return Result.failure(index, description) - return Result.success(index, res) - - return fail_parser - - def __add__(self: Parser[str], other: Parser[str]) -> Parser[str]: - # TODO it would be nice to get more generic type checks here. - # I want some way of saying "the input value can be any - # type that has an ``__add__`` method that returns the same type - # as the two inputs". This would allow us to use it for both - # `str` and `list`, which satisfy that. - return (self & other).map(lambda t: t[0] + t[1]) - - def __mul__(self, other): - if isinstance(other, range): - return self.times(other.start, other.stop - 1) - return self.times(other) - - def __or__(self: Parser[OUT1], other: Parser[OUT2]) -> Parser[Union[OUT1, OUT2]]: - @Parser - def alt_parser(stream: str, index: int) -> Result[Union[OUT1, OUT2]]: - result0 = None - - result1 = self(stream, index).aggregate(result0) - if result1.status: - return result1 - - result2 = other(stream, index).aggregate(result1) - return result2 - - return alt_parser - - def __and__(self: Parser[OUT1], other: Parser[OUT2]) -> Parser[tuple[OUT1, OUT2]]: - @Parser - def seq_parser(stream: str, index: int) -> Result[tuple[OUT1, OUT2]]: - result0 = None - result1 = self(stream, index).aggregate(result0) - if not result1.status: - return result1 # type: ignore - result2 = other(stream, result1.index).aggregate(result1) - if not result2.status: - return result2 # type: ignore - - return Result.success(result2.index, (result1.value, result2.value)).aggregate(result2) - - return seq_parser - - # haskelley operators, for fun # - - # >> - def __rshift__(self, other: Parser[OUT2]) -> Parser[OUT2]: - return self.then(other) - - # << - def __lshift__(self, other: Parser) -> Parser[OUT]: - return self.skip(other) - - -# TODO: -# I think @generate is unfixable. It's not surprising, because -# we are doing something genuninely unusual with generator functions. - -# The return value of a `@generate` parser is now OK. - -# But we have no type checking within a user's @generate function. - -# The big issue is that each `val = yield parser` inside a @generate parser has -# a different type, and we'd like those to be typed checked. But the -# `Generator[...]` expects a homogeneous stream of yield and send types, -# whereas we have pairs of yield/send types which need to match within the -# pair, but each pair can be completely different from the next in the stream - - -def generate(fn: Callable[[], Generator[Parser[Any], Any, OUT]]) -> Parser[OUT]: - @Parser - @wraps(fn) - def generated(stream: str, index: int) -> Result[OUT]: - # start up the generator - iterator = fn() - - result = None - value = None - try: - while True: - next_parser = iterator.send(value) - result = next_parser(stream, index).aggregate(result) - if not result.status: - return result - value = result.value - index = result.index - except StopIteration as stop: - returnVal = stop.value - return Result.success(index, returnVal).aggregate(result) - - return generated - - -index = Parser(lambda _, index: Result.success(index, index)) -line_info = Parser(lambda stream, index: Result.success(index, line_info_at(stream, index))) - - -def success(val: OUT) -> Parser[OUT]: - return Parser(lambda _, index: Result.success(index, val)) - - -def fail(expected: str) -> Parser[None]: - return Parser(lambda _, index: Result.failure(index, expected)) - - -def string(s: str, transform: Callable[[str], str] = noop) -> Parser[str]: - slen = len(s) - transformed_s = transform(s) - - @Parser - def string_parser(stream, index): - if transform(stream[index : index + slen]) == transformed_s: - return Result.success(index + slen, s) - else: - return Result.failure(index, s) - - return string_parser - - -def regex(exp, flags=0, group=0) -> Parser[str]: - if isinstance(exp, (str, bytes)): - exp = re.compile(exp, flags) - if isinstance(group, (str, int)): - group = (group,) - - @Parser - def regex_parser(stream, index): - match = exp.match(stream, index) - if match: - return Result.success(match.end(), match.group(*group)) - else: - return Result.failure(index, exp.pattern) - - return regex_parser - - -# TODO the rest of the functions here need type annotations. - -# One problem is that `test_item` and `match_item` are assumning that the input -# type might not be str, but arbitrary types, including heterogeneous -# lists. We have no generic parameter for the input stream type -# yet, for simplicity. - - -def test_item(func, description): - @Parser - def test_item_parser(stream, index): - if index < len(stream): - if isinstance(stream, bytes): - # Subscripting bytes with `[index]` instead of - # `[index:index + 1]` returns an int - item = stream[index : index + 1] - else: - item = stream[index] - if func(item): - return Result.success(index + 1, item) - return Result.failure(index, description) - - return test_item_parser - - -def test_char(func: Callable[[str], bool], description: str) -> Parser[str]: - # Implementation is identical to test_item - return test_item(func, description) - - -def match_item(item, description=None): - if description is None: - description = str(item) - return test_item(lambda i: item == i, description) - - -def string_from(*strings: str, transform: Callable[[str], str] = noop) -> Parser[str]: - # Sort longest first, so that overlapping options work correctly - return reduce(operator.or_, [string(s, transform) for s in sorted(strings, key=len, reverse=True)]) - - -# TODO drop bytes support here -def char_from(string): - if isinstance(string, bytes): - return test_char(lambda c: c in string, b"[" + string + b"]") - else: - return test_char(lambda c: c in string, "[" + string + "]") - - -def peek(parser): - @Parser - def peek_parser(stream, index): - result = parser(stream, index) - if result.status: - return Result.success(index, result.value) - else: - return result - - return peek_parser - - -any_char = test_char(lambda c: True, "any character") - -whitespace = regex(r"\s+") - -letter = test_char(lambda c: c.isalpha(), "a letter") - -digit = test_char(lambda c: c.isdigit(), "a digit") - -decimal_digit = char_from("0123456789") - - -@Parser -def eof(stream: str, index: int) -> Result[None]: - if index >= len(stream): - return Result.success(index, None) - else: - return Result.failure(index, "EOF") - - -E = TypeVar("E", bound=enum.Enum) - - -def from_enum(enum_cls: type[E], transform: Callable[[str], str] = noop) -> Parser[E]: - items = sorted( - ((str(enum_item.value), enum_item) for enum_item in enum_cls), key=lambda t: len(t[0]), reverse=True - ) - return reduce(operator.or_, [string(value, transform=transform).result(enum_item) for value, enum_item in items]) - - -# TODO how do we type a forward_declaration instance? For a typical usage, see -# examples/json.py. I think this is probably a recursive type issue which is probably -# mirroring the recursive definition issues that forward_declaration is designed to solve. -# Cutting the recursive knot might be harder at the type level? - - -class forward_declaration(Parser): - """ - An empty parser that can be used as a forward declaration, - especially for parsers that need to be defined recursively. - - You must use `.become(parser)` before using. - """ - - def __init__(self) -> None: - pass - - def _raise_error(self, *args, **kwargs): - raise ValueError("You must use 'become' before attempting to call `parse` or `parse_partial`") - - parse = _raise_error - parse_partial = _raise_error - - def become(self, other: Parser) -> None: - self.__dict__ = other.__dict__ - self.__class__ = other.__class__ diff --git a/src/parsy/version.py b/src/parsy/version.py deleted file mode 100644 index f2dc0e4..0000000 --- a/src/parsy/version.py +++ /dev/null @@ -1 +0,0 @@ -__version__ = "2.0" diff --git a/tests/test_parsy.py b/tests/test_parsy.py index 02f6841..c19ab15 100644 --- a/tests/test_parsy.py +++ b/tests/test_parsy.py @@ -1,36 +1,30 @@ # -*- code: utf8 -*- -try: - import enum -except ImportError: - enum = None +import enum import re -from typing import Generator import unittest - -from typing import Any +from typing import Any, Generator, List, Tuple, Union from parsy import ( - Parser, ParseError, + Parser, + ParserReference, + Result, any_char, char_from, decimal_digit, digit, - forward_declaration, from_enum, generate, - index, letter, line_info, line_info_at, - match_item, peek, regex, + seq, string, string_from, ) from parsy import test_char as parsy_test_char # to stop pytest thinking this function is a test -from parsy import test_item as parsy_test_item # to stop pytest thinking this function is a test from parsy import whitespace @@ -64,13 +58,13 @@ def test_regex_str(self): self.assertRaises(ParseError, parser.parse, "x") - def test_regex_bytes(self): - parser = regex(rb"[0-9]") + # def test_regex_bytes(self): + # parser = regex(rb"[0-9]") - self.assertEqual(parser.parse(b"1"), b"1") - self.assertEqual(parser.parse(b"4"), b"4") + # self.assertEqual(parser.parse(b"1"), b"1") + # self.assertEqual(parser.parse(b"4"), b"4") - self.assertRaises(ParseError, parser.parse, b"x") + # self.assertRaises(ParseError, parser.parse, b"x") def test_regex_compiled(self): parser = regex(re.compile(r"[0-9]")) @@ -102,7 +96,7 @@ def test_then(self): def test_bind(self): piped = None - def binder(x): + def binder(x: str): nonlocal piped piped = x return string("y") @@ -122,10 +116,20 @@ def test_and(self): parser = digit & letter self.assertEqual(parser.parse("1A"), ("1", "A")) - def test_or(self): - self.assertEqual((letter | digit).parse("a"), "a") - self.assertEqual((letter | digit).parse("1"), "1") - self.assertRaises(ParseError, (letter | digit).parse, ".") + def test_append(self): + parser = digit.join(letter).append(letter) + self.assertEqual(parser.parse("1AB"), ("1", "A", "B")) + + def test_combine(self): + parser = digit.join(letter).append(letter).combine(lambda a, b, c: (c + b + a)) + self.assertEqual(parser.parse("1AB"), "BA1") + + # def test_combine_mixed_types(self): + # def demo(a: int, b: str, c: bool) -> Tuple[int, str, bool]: + # return (a, b, c) + + # parser = digit.map(int).join(letter).append(digit.map(bool)).combine(demo) + # self.assertEqual(parser.parse("1A1"), (1, "A", True)) def test_concat(self): parser = letter.many().concat() @@ -277,7 +281,6 @@ def test_at_least(self): self.assertEqual(ab.at_least(2).parse_partial("abababc"), (["ab", "ab", "ab"], "c")) def test_until(self): - until = string("s").until(string("x")) s = "ssssx" @@ -296,20 +299,7 @@ def test_until(self): until = regex(".").until(string("x")) self.assertEqual(until.parse_partial("xxxx"), ([], "xxxx")) - def test_until_with_consume_other(self): - - until = string("s").until(string("x"), consume_other=True) - - self.assertEqual(until.parse("ssssx"), 4 * ["s"] + ["x"]) - self.assertEqual(until.parse_partial("ssssxy"), (4 * ["s"] + ["x"], "y")) - - self.assertEqual(until.parse_partial("xxx"), (["x"], "xx")) - - self.assertRaises(ParseError, until.parse, "ssssy") - self.assertRaises(ParseError, until.parse, "xssssxy") - def test_until_with_min(self): - until = string("s").until(string("x"), min=3) self.assertEqual(until.parse_partial("sssx"), (3 * ["s"], "x")) @@ -318,7 +308,6 @@ def test_until_with_min(self): self.assertRaises(ParseError, until.parse_partial, "ssx") def test_until_with_max(self): - # until with max until = string("s").until(string("x"), max=3) @@ -328,7 +317,6 @@ def test_until_with_max(self): self.assertRaises(ParseError, until.parse_partial, "ssssx") def test_until_with_min_max(self): - until = string("s").until(string("x"), min=3, max=5) self.assertEqual(until.parse_partial("sssx"), (3 * ["s"], "x")) @@ -378,8 +366,97 @@ def test_sep_by_with_min_and_max(self): self.assertRaises(ParseError, digit_list.parse, "7.6") self.assertEqual(digit.sep_by(string(","), max=0).parse(""), []) - def test_add(self): - self.assertEqual((letter + digit).parse("a1"), "a1") + def test_add_tuple(self): + """This test code is for checking that pylance gives no type errors""" + letter_tuple = letter.as_tuple() + int_parser = regex(r"\d").map(int) + two_int_parser = int_parser & int_parser + barcode = letter_tuple + two_int_parser + + def my_foo(first: str, second: int, third: int) -> str: + return first + str(third + second) + + foo_parser = barcode.combine(my_foo) + + self.assertEqual(foo_parser.parse("a13"), "a4") + + def test_add_too_long_tuple_uniform_types(self): + """This test code is for checking that pylance gives no type errors""" + letter_tuple = letter.as_tuple() + int_parser = regex(r"\d") + six_int_parser = ( + (int_parser & int_parser).append(int_parser).append(int_parser).append(int_parser).append(int_parser) + ) + barcode = letter_tuple + six_int_parser + + def my_bar(first: str, *second: str) -> str: + return first + "-".join(second) + + foo_parser = barcode.combine(my_bar) + + self.assertEqual(foo_parser.parse("a123456"), "a1-2-3-4-5-6") + + def test_add_too_long_tuple_different_types(self): + """This test code is for checking that pylance gives no type errors""" + letter_tuple = letter.as_tuple() + int_parser = regex(r"\d").map(int) + six_int_parser = ( + (int_parser & int_parser).append(int_parser).append(int_parser).append(int_parser).append(int_parser) + ) + barcode = letter_tuple + six_int_parser + + def my_bar(first: str, *second: int) -> str: + return first + str(sum(second)) + + foo_parser = barcode.combine(my_bar) + + self.assertEqual(foo_parser.parse("a111111"), "a6") + + def test_add_list(self): + """This test code is for checking that pylance gives no type errors""" + letters = letter.many() + number_chars = regex(r"\d").many() + letters_numbers = letters + number_chars + + self.assertEqual(letters_numbers.parse("ab12"), ["a", "b", "1", "2"]) + + def test_add_unaddable_types(self): + """ + The type system warns us this isn't possible: + + `Operator "+" not supported for types "Parser[str]" and "Parser[int]"` + """ + bad_parser = letter + regex(r"\d").map(int) + + self.assertRaises(TypeError, bad_parser.parse, "a1") + + def test_add_numerics(self): + digit = regex(r"\d") + numeric_parser = digit.map(float) + digit.map(int) + + self.assertEqual(numeric_parser.parse("12"), 3.0) + + def test_seq(self): + + a = regex("a") + b = regex("b") + num = regex(r"[\d]").map(int) + + parser = seq(a, num, b, num, a | num) + + self.assertEqual(parser.parse("a1b2a"), ("a", 1, "b", 2, "a")) + self.assertEqual(parser.parse("a1b23"), ("a", 1, "b", 2, 3)) + + def test_add_tuples_like_seq(self): + """A possible alternative to `seq`""" + a = regex("a").as_tuple() + b = regex("b").as_tuple() + num = regex(r"[\d]").map(int).as_tuple() + + parser = a + num + b + num + (a | num) + + self.assertEqual(parser.parse("a1b2a"), ("a", 1, "b", 2, "a")) + self.assertEqual(parser.parse("a1b23"), ("a", 1, "b", 2, 3)) def test_multiply(self): self.assertEqual((letter * 3).parse("abc"), ["a", "b", "c"]) @@ -414,16 +491,16 @@ def test_char_from_str(self): ex = err.exception self.assertEqual(str(ex), """expected '[ab]' at 0:0""") - def test_char_from_bytes(self): - ab = char_from(b"ab") - self.assertEqual(ab.parse(b"a"), b"a") - self.assertEqual(ab.parse(b"b"), b"b") + # def test_char_from_bytes(self): + # ab = char_from(b"ab") + # self.assertEqual(ab.parse(b"a"), b"a") + # self.assertEqual(ab.parse(b"b"), b"b") - with self.assertRaises(ParseError) as err: - ab.parse(b"x") + # with self.assertRaises(ParseError) as err: + # ab.parse(b"x") - ex = err.exception - self.assertEqual(str(ex), """expected b'[ab]' at 0""") + # ex = err.exception + # self.assertEqual(str(ex), """expected b'[ab]' at 0""") def test_string_from(self): titles = string_from("Mr", "Mr.", "Mrs", "Mrs.") @@ -474,7 +551,7 @@ def test_decimal_digit(self): def test_line_info(self): @generate - def foo() -> Generator[Any, Any, tuple[str, tuple[int, int]]]: + def foo() -> Generator[Any, Any, Tuple[str, Tuple[int, int]]]: i = yield line_info l = yield any_char return (l, i) @@ -504,84 +581,43 @@ def test_should_fail(self): self.assertRaises(ParseError, not_a_digit.parse, "8ab") - if enum is not None: - - def test_from_enum_string(self): - class Pet(enum.Enum): - CAT = "cat" - DOG = "dog" - - pet = from_enum(Pet) - self.assertEqual(pet.parse("cat"), Pet.CAT) - self.assertEqual(pet.parse("dog"), Pet.DOG) - self.assertRaises(ParseError, pet.parse, "foo") - - def test_from_enum_int(self): - class Position(enum.Enum): - FIRST = 1 - SECOND = 2 - - position = from_enum(Position) - self.assertEqual(position.parse("1"), Position.FIRST) - self.assertEqual(position.parse("2"), Position.SECOND) - self.assertRaises(ParseError, position.parse, "foo") - - def test_from_enum_transform(self): - class Pet(enum.Enum): - CAT = "cat" - DOG = "dog" - - pet = from_enum(Pet, transform=lambda s: s.lower()) - self.assertEqual(pet.parse("cat"), Pet.CAT) - self.assertEqual(pet.parse("CAT"), Pet.CAT) + def test_should_fail_isolated(self): + not_a_digit = digit.should_fail("not a digit") + self.assertEqual( + not_a_digit.parse_partial("a"), + (Result(status=False, index=-1, value=None, furthest=0, expected=frozenset({"a digit"})), "a"), + ) + self.assertRaises(ParseError, not_a_digit.parse_partial, "1") -class TestParserTokens(unittest.TestCase): - """ - Tests that ensure that `.parse` can handle an arbitrary list of tokens, - rather than a string. - """ - - # Some opaque objects we will use in our stream: - START = object() - STOP = object() - - def test_test_item(self): - start_stop = parsy_test_item(lambda i: i in [self.START, self.STOP], "START/STOP") - self.assertEqual(start_stop.parse([self.START]), self.START) - self.assertEqual(start_stop.parse([self.STOP]), self.STOP) - with self.assertRaises(ParseError) as err: - start_stop.many().parse([self.START, "hello"]) - - ex = err.exception - self.assertEqual(str(ex), "expected one of 'EOF', 'START/STOP' at 1") - self.assertEqual(ex.expected, {"EOF", "START/STOP"}) - self.assertEqual(ex.index, 1) - - def test_match_item(self): - self.assertEqual(match_item(self.START).parse([self.START]), self.START) - with self.assertRaises(ParseError) as err: - match_item(self.START, "START").parse([]) + def test_from_enum_string(self): + class Pet(enum.Enum): + CAT = "cat" + DOG = "dog" - ex = err.exception - self.assertEqual(str(ex), "expected 'START' at 0") + pet = from_enum(Pet) + self.assertEqual(pet.parse("cat"), Pet.CAT) + self.assertEqual(pet.parse("dog"), Pet.DOG) + self.assertRaises(ParseError, pet.parse, "foo") - def test_parse_tokens(self): - other_vals = parsy_test_item(lambda i: i not in [self.START, self.STOP], "not START/STOP") + def test_from_enum_int(self): + class Position(enum.Enum): + FIRST = 1 + SECOND = 2 - bracketed = match_item(self.START) >> other_vals.many() << match_item(self.STOP) - stream = [self.START, "hello", 1, 2, "goodbye", self.STOP] - result = bracketed.parse(stream) - self.assertEqual(result, ["hello", 1, 2, "goodbye"]) + position = from_enum(Position) + self.assertEqual(position.parse("1"), Position.FIRST) + self.assertEqual(position.parse("2"), Position.SECOND) + self.assertRaises(ParseError, position.parse, "foo") - def test_index(self): - @generate - def foo(): - i = yield index - l = yield letter - return (l, i) + def test_from_enum_transform(self): + class Pet(enum.Enum): + CAT = "cat" + DOG = "dog" - self.assertEqual(foo.many().parse(["A", "B"]), [("A", 0), ("B", 1)]) + pet = from_enum(Pet, transform=lambda s: s.lower()) + self.assertEqual(pet.parse("cat"), Pet.CAT) + self.assertEqual(pet.parse("CAT"), Pet.CAT) class TestUtils(unittest.TestCase): @@ -595,46 +631,33 @@ def test_line_info_at(self): self.assertRaises(ValueError, lambda: line_info_at(text, 8)) -class TestForwardDeclaration(unittest.TestCase): - def test_forward_declaration_1(self): - # This is the example from the docs - expr = forward_declaration() - with self.assertRaises(ValueError): - expr.parse("()") +# Type alias used in test_recursive_parser, has to be defined at module or class level +RT = Union[int, List["RT"]] - with self.assertRaises(ValueError): - expr.parse_partial("()") - simple = regex("[0-9]+").map(int) - group = string("(") >> expr.sep_by(string(" ")) << string(")") - expr.become(simple | group) - - self.assertEqual(expr.parse("(0 1 (2 3))"), [0, 1, [2, 3]]) - - def test_forward_declaration_2(self): - # Simplest example I could think of - expr = forward_declaration() - expr.become(string("A") + expr | string("Z")) +def test_recursive_parser(): + """ + A recursive parser can be defined by using generators. - self.assertEqual(expr.parse("Z"), "Z") - self.assertEqual(expr.parse("AZ"), "AZ") - self.assertEqual(expr.parse("AAAAAZ"), "AAAAAZ") + The type of the parser has to be explicitly declared with a type alias which + is also recursively defined using a forward-declaration. - with self.assertRaises(ParseError): - expr.parse("A") + This works because the generator can refer the target parser before the target + parser is defined. Then, when defining the parser, it can use `_parser` to + indirectly refer to itself, creating a recursive parser. + """ + digits = regex("[0-9]+").map(int) - with self.assertRaises(ParseError): - expr.parse("B") + @generate + def _parser() -> ParserReference[RT]: + return (yield parser) - self.assertEqual(expr.parse_partial("AAZXX"), ("AAZ", "XX")) + # The explicit type annotation of `Parser[RT]` could be omitted + parser: Parser[RT] = digits | string("(") >> _parser.sep_by(string(" ")) << string(")") - def test_forward_declaration_cant_become_twice(self): - dec = forward_declaration() - other = string("X") - dec.become(other) + result = parser.parse("(0 1 (2 3 (4 5)))") - with self.assertRaises((AttributeError, TypeError)): - dec.become(other) + assert result == [0, 1, [2, 3, [4, 5]]] if __name__ == "__main__": diff --git a/tests/test_sexpr.py b/tests/test_sexpr.py index ec0fca6..227b7f6 100644 --- a/tests/test_sexpr.py +++ b/tests/test_sexpr.py @@ -1,13 +1,18 @@ -import re import unittest +from typing import List, TypeVar, Union -from parsy import generate, regex, string +from parsy import Parser, ParserReference, generate, regex, string -whitespace = regex(r"\s+", re.MULTILINE) +whitespace = regex(r"\s+") comment = regex(r";.*") ignore = (whitespace | comment).many() -lexeme = lambda p: p << ignore +T = TypeVar("T") + + +def lexeme(parser: Parser[T]) -> Parser[T]: + return parser << ignore + lparen = lexeme(string("(")) rparen = lexeme(string(")")) @@ -18,22 +23,21 @@ atom = true | false | number | symbol - -@generate -def form(): - yield lparen - els = yield expr.many() - yield rparen - return els +PT = Union[str, bool, int, List["PT"]] @generate -def quote(): - yield string("'") - e = yield expr - return ["quote", e] +def _expr() -> ParserReference[PT]: + # expr is referred to before it's defined + return (yield expr) + +# expr is indirectly used via _expr +form = lparen >> _expr.many() << rparen +quote = string("'") >> _expr.map(lambda e: ["quote", e]) +# Here, expr is finally defined, combining parsers which already refer to it via +# _expr, which creates a recursive parser expr = form | quote | atom program = ignore >> expr.many()