From ad1f90fb0ae3860ff7a21060682a9781f352cd7f Mon Sep 17 00:00:00 2001 From: Jacob Errington Date: Sat, 18 Jan 2025 21:46:21 -0500 Subject: [PATCH 01/11] add Stream to represent input data This primarily wraps the str|bytes|list that is the data to parse, but also adds the metadata `source` to hold a filename, URL, etc. where the data is from. Introducing this class also paves the way for eventually supporting streaming input data. --- src/parsy/__init__.py | 57 ++++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/src/parsy/__init__.py b/src/parsy/__init__.py index cf96e5c..07f32bf 100644 --- a/src/parsy/__init__.py +++ b/src/parsy/__init__.py @@ -12,13 +12,33 @@ noop = lambda x: x -def line_info_at(stream, index): +@dataclass +class Stream: + """Data to parse, possibly equipped with a name for the source it's from, + e.g. a file path.""" + + data: str | bytes | list + source: str | None = None + + def __len__(self): + return len(self.data) + + def __getitem__(self, i): + # Subscripting bytes with `[index]` instead of `[index:index + 1]` + # returns an int + if isinstance(self.data, bytes) and not isinstance(i, slice): + return self.data[i : i + 1] + else: + return self.data[i] + + +def line_info_at(stream: Stream, index): if index > len(stream): raise ValueError("invalid index") - line = stream.count("\n", 0, index) - last_nl = stream.rfind("\n", 0, index) + line = stream.data.count("\n", 0, index) + last_nl = stream.data.rfind("\n", 0, index) col = index - (last_nl + 1) - return (line, col) + return (stream.source, line, col) class ParseError(RuntimeError): @@ -29,7 +49,11 @@ def __init__(self, expected, stream, index): def line_info(self): try: - return "{}:{}".format(*line_info_at(self.stream, self.index)) + source, row, col = line_info_at(self.stream, self.index) + if source is None: + return "{}:{}".format(row, col) + else: + return "{}:{}:{}".format(source, row, col) except (TypeError, AttributeError): # not a str return str(self.index) @@ -83,22 +107,22 @@ class Parser: of the failure. """ - def __init__(self, wrapped_fn: Callable[[str | bytes | list, int], Result]): + def __init__(self, wrapped_fn: Callable[[Stream, int], Result]): """ Creates a new Parser from a function that takes a stream and returns a Result. """ self.wrapped_fn = wrapped_fn - def __call__(self, stream: str | bytes | list, index: int): + def __call__(self, stream: Stream, index: int): return self.wrapped_fn(stream, index) - def parse(self, stream: str | bytes | list) -> Any: + def parse(self, stream: Stream) -> Any: """Parses a string or list of tokens and returns the result or raise a ParseError.""" (result, _) = (self << eof).parse_partial(stream) return result - def parse_partial(self, stream: str | bytes | list) -> tuple[Any, str | bytes | list]: + def parse_partial(self, stream: Stream) -> tuple[Any, Stream]: """ Parses the longest possible prefix of a given string. Returns a tuple of the result and the unparsed remainder, @@ -343,10 +367,10 @@ def mark(self) -> Parser: @generate def marked(): - start = yield line_info + _, *start = yield line_info body = yield self - end = yield line_info - return (start, body, end) + _, *end = yield line_info + return (tuple(start), body, tuple(end)) return marked @@ -557,7 +581,7 @@ def regex(exp: str, flags=0, group: int | str | tuple = 0) -> Parser: @Parser def regex_parser(stream, index): - match = exp.match(stream, index) + match = exp.match(stream.data, index) if match: return Result.success(match.end(), match.group(*group)) else: @@ -577,12 +601,7 @@ def test_item(func: Callable[..., bool], description: str) -> Parser: @Parser def test_item_parser(stream, index): if index < len(stream): - if isinstance(stream, bytes): - # Subscripting bytes with `[index]` instead of - # `[index:index + 1]` returns an int - item = stream[index : index + 1] - else: - item = stream[index] + item = stream[index] if func(item): return Result.success(index + 1, item) return Result.failure(index, description) From 114e5ef86cd6932b82bb984df4000be860b9b0ba Mon Sep 17 00:00:00 2001 From: Jacob Errington Date: Sun, 19 Jan 2025 08:08:32 -0500 Subject: [PATCH 02/11] fix all existing tests to support Stream Wrap the string, bytes, list into a Stream before calling parse. --- examples/json.py | 6 +- examples/sql_select.py | 6 +- tests/test_parsy.py | 399 +++++++++++++++++++++-------------------- tests/test_sexpr.py | 14 +- 4 files changed, 215 insertions(+), 210 deletions(-) diff --git a/examples/json.py b/examples/json.py index b3164dc..66b214f 100644 --- a/examples/json.py +++ b/examples/json.py @@ -1,4 +1,4 @@ -from parsy import forward_declaration, regex, seq, string +from parsy import Stream, forward_declaration, regex, seq, string # Utilities whitespace = regex(r"\s*") @@ -45,7 +45,8 @@ def test(): assert ( json_doc.parse( - r""" + Stream( + r""" { "int": 1, "string": "hello", @@ -55,6 +56,7 @@ def test(): "other": [true, false, null] } """ + ) ) == { "int": 1, diff --git a/examples/sql_select.py b/examples/sql_select.py index cd6fec9..097e9a7 100644 --- a/examples/sql_select.py +++ b/examples/sql_select.py @@ -13,7 +13,7 @@ from dataclasses import dataclass from typing import List, Optional, Union -from parsy import from_enum, regex, seq, string +from parsy import Stream, from_enum, regex, seq, string # -- AST nodes: @@ -109,7 +109,7 @@ class Select: def test_select(): - assert select.parse("SELECT thing, stuff, 123, 'hello' FROM my_table WHERE id = 1;") == Select( + assert select.parse(Stream("SELECT thing, stuff, 123, 'hello' FROM my_table WHERE id = 1;")) == Select( columns=[ Field("thing"), Field("stuff"), @@ -126,7 +126,7 @@ def test_select(): def test_optional_where(): - assert select.parse("SELECT 1 FROM x;") == Select( + assert select.parse(Stream("SELECT 1 FROM x;")) == Select( columns=[Number(1)], table=Table("x"), where=None, diff --git a/tests/test_parsy.py b/tests/test_parsy.py index f699508..35e3cae 100644 --- a/tests/test_parsy.py +++ b/tests/test_parsy.py @@ -7,6 +7,7 @@ from parsy import ( ParseError, + Stream, alt, any_char, char_from, @@ -34,67 +35,67 @@ class TestParser(unittest.TestCase): def test_string(self): parser = string("x") - self.assertEqual(parser.parse("x"), "x") + self.assertEqual(parser.parse(Stream("x")), "x") - self.assertRaises(ParseError, parser.parse, "y") + self.assertRaises(ParseError, parser.parse, Stream("y")) def test_string_transform(self): parser = string("x", transform=lambda s: s.lower()) - self.assertEqual(parser.parse("x"), "x") - self.assertEqual(parser.parse("X"), "x") + self.assertEqual(parser.parse(Stream("x")), "x") + self.assertEqual(parser.parse(Stream("X")), "x") - self.assertRaises(ParseError, parser.parse, "y") + self.assertRaises(ParseError, parser.parse, Stream("y")) def test_string_transform_2(self): parser = string("Cat", transform=lambda s: s.lower()) - self.assertEqual(parser.parse("cat"), "Cat") - self.assertEqual(parser.parse("CAT"), "Cat") - self.assertEqual(parser.parse("CaT"), "Cat") + self.assertEqual(parser.parse(Stream("cat")), "Cat") + self.assertEqual(parser.parse(Stream("CAT")), "Cat") + self.assertEqual(parser.parse(Stream("CaT")), "Cat") - self.assertRaises(ParseError, parser.parse, "dog") + self.assertRaises(ParseError, parser.parse, Stream("dog")) def test_regex_str(self): parser = regex(r"[0-9]") - self.assertEqual(parser.parse("1"), "1") - self.assertEqual(parser.parse("4"), "4") + self.assertEqual(parser.parse(Stream("1")), "1") + self.assertEqual(parser.parse(Stream("4")), "4") - self.assertRaises(ParseError, parser.parse, "x") + self.assertRaises(ParseError, parser.parse, Stream("x")) def test_regex_bytes(self): parser = regex(rb"[0-9]") - self.assertEqual(parser.parse(b"1"), b"1") - self.assertEqual(parser.parse(b"4"), b"4") + self.assertEqual(parser.parse(Stream(b"1")), b"1") + self.assertEqual(parser.parse(Stream(b"4")), b"4") - self.assertRaises(ParseError, parser.parse, b"x") + self.assertRaises(ParseError, parser.parse, Stream(b"x")) def test_regex_compiled(self): parser = regex(re.compile(r"[0-9]")) - self.assertEqual(parser.parse("1"), "1") - self.assertRaises(ParseError, parser.parse, "x") + self.assertEqual(parser.parse(Stream("1")), "1") + self.assertRaises(ParseError, parser.parse, Stream("x")) def test_regex_group_number(self): parser = regex(re.compile(r"a([0-9])b"), group=1) - self.assertEqual(parser.parse("a1b"), "1") - self.assertRaises(ParseError, parser.parse, "x") + self.assertEqual(parser.parse(Stream("a1b")), "1") + self.assertRaises(ParseError, parser.parse, Stream("x")) def test_regex_group_name(self): parser = regex(re.compile(r"a(?P[0-9])b"), group="name") - self.assertEqual(parser.parse("a1b"), "1") - self.assertRaises(ParseError, parser.parse, "x") + self.assertEqual(parser.parse(Stream("a1b")), "1") + self.assertRaises(ParseError, parser.parse, Stream("x")) def test_regex_group_tuple(self): parser = regex(re.compile(r"a([0-9])b([0-9])c"), group=(1, 2)) - self.assertEqual(parser.parse("a1b2c"), ("1", "2")) - self.assertRaises(ParseError, parser.parse, "x") + self.assertEqual(parser.parse(Stream("a1b2c")), ("1", "2")) + self.assertRaises(ParseError, parser.parse, Stream("x")) def test_then(self): xy_parser = string("x") >> string("y") - self.assertEqual(xy_parser.parse("xy"), "y") + self.assertEqual(xy_parser.parse(Stream("xy")), "y") - self.assertRaises(ParseError, xy_parser.parse, "y") - self.assertRaises(ParseError, xy_parser.parse, "z") + self.assertRaises(ParseError, xy_parser.parse, Stream("y")) + self.assertRaises(ParseError, xy_parser.parse, Stream("z")) def test_bind(self): piped = None @@ -106,18 +107,18 @@ def binder(x): parser = string("x").bind(binder) - self.assertEqual(parser.parse("xy"), "y") + self.assertEqual(parser.parse(Stream("xy")), "y") self.assertEqual(piped, "x") - self.assertRaises(ParseError, parser.parse, "x") + self.assertRaises(ParseError, parser.parse, Stream("x")) def test_map(self): parser = digit.map(int) - self.assertEqual(parser.parse("7"), 7) + self.assertEqual(parser.parse(Stream("7")), 7) def test_combine(self): parser = seq(digit, letter).combine(lambda d, l: (d, l)) - self.assertEqual(parser.parse("1A"), ("1", "A")) + self.assertEqual(parser.parse(Stream("1A")), ("1", "A")) def test_combine_dict(self): ddmmyyyy = ( @@ -129,7 +130,7 @@ def test_combine_dict(self): .map(dict) .combine_dict(date) ) - self.assertEqual(ddmmyyyy.parse("05042003"), date(2003, 4, 5)) + self.assertEqual(ddmmyyyy.parse(Stream("05042003")), date(2003, 4, 5)) def test_combine_dict_list(self): Pair = namedtuple("Pair", ["word", "number"]) @@ -137,7 +138,7 @@ def test_combine_dict_list(self): regex(r"[A-Z]+").tag("word"), regex(r"[0-9]+").map(int).tag("number"), ).combine_dict(Pair) - self.assertEqual(parser.parse("ABC123"), Pair(word="ABC", number=123)) + self.assertEqual(parser.parse(Stream("ABC123")), Pair(word="ABC", number=123)) def test_combine_dict_skip_None(self): Pair = namedtuple("Pair", ["word", "number"]) @@ -146,7 +147,7 @@ def test_combine_dict_skip_None(self): whitespace.tag(None), regex(r"[0-9]+").map(int).tag("number"), ).combine_dict(Pair) - self.assertEqual(parser.parse("ABC 123"), Pair(word="ABC", number=123)) + self.assertEqual(parser.parse(Stream("ABC 123")), Pair(word="ABC", number=123)) def test_combine_dict_skip_underscores(self): Pair = namedtuple("Pair", ["word", "number"]) @@ -155,18 +156,18 @@ def test_combine_dict_skip_underscores(self): whitespace.tag("_whitespace"), regex(r"[0-9]+").map(int).tag("number"), ).combine_dict(Pair) - self.assertEqual(parser.parse("ABC 123"), Pair(word="ABC", number=123)) + self.assertEqual(parser.parse(Stream("ABC 123")), Pair(word="ABC", number=123)) def test_concat(self): parser = letter.many().concat() - self.assertEqual(parser.parse(""), "") - self.assertEqual(parser.parse("abc"), "abc") + self.assertEqual(parser.parse(Stream("")), "") + self.assertEqual(parser.parse(Stream("abc")), "abc") def test_concat_from_byte_stream(self): any_byte = parsy_test_item(lambda c: True, "any byte") parser = any_byte.map(lambda b: b.decode("ascii")).many().concat() - self.assertEqual(parser.parse(b""), "") - self.assertEqual(parser.parse(b"abc"), "abc") + self.assertEqual(parser.parse(Stream(b"")), "") + self.assertEqual(parser.parse(Stream(b"abc")), "abc") def test_generate(self): x = y = None @@ -179,7 +180,7 @@ def xy(): y = yield string("y") return 3 - self.assertEqual(xy.parse("xy"), 3) + self.assertEqual(xy.parse(Stream("xy")), 3) self.assertEqual(x, "x") self.assertEqual(y, "y") @@ -189,12 +190,12 @@ def example(): yield string("x") return string("y") - self.assertEqual(example.parse("xy"), "y") + self.assertEqual(example.parse(Stream("xy")), "y") def test_mark(self): parser = (letter.many().mark() << string("\n")).many() - lines = parser.parse("asdf\nqwer\n") + lines = parser.parse(Stream("asdf\nqwer\n")) self.assertEqual(len(lines), 2) @@ -211,13 +212,13 @@ def test_mark(self): def test_tag(self): parser = letter.many().concat().tag("word") self.assertEqual( - parser.sep_by(string(",")).parse("this,is,a,list"), + parser.sep_by(string(",")).parse(Stream("this,is,a,list")), [("word", "this"), ("word", "is"), ("word", "a"), ("word", "list")], ) def test_tag_map_dict(self): parser = seq(letter.tag("first_letter"), letter.many().concat().tag("remainder")).map(dict) - self.assertEqual(parser.parse("Hello"), {"first_letter": "H", "remainder": "ello"}) + self.assertEqual(parser.parse(Stream("Hello")), {"first_letter": "H", "remainder": "ello"}) def test_generate_desc(self): @generate("a thing") @@ -225,12 +226,12 @@ def thing(): yield string("t") with self.assertRaises(ParseError) as err: - thing.parse("x") + thing.parse(Stream("x")) ex = err.exception self.assertEqual(ex.expected, frozenset(["a thing"])) - self.assertEqual(ex.stream, "x") + self.assertEqual(ex.stream, Stream("x")) self.assertEqual(ex.index, 0) def test_generate_default_desc(self): @@ -242,12 +243,12 @@ def thing(): yield string("b") with self.assertRaises(ParseError) as err: - thing.parse("ax") + thing.parse(Stream("ax")) ex = err.exception self.assertEqual(ex.expected, frozenset(["b"])) - self.assertEqual(ex.stream, "ax") + self.assertEqual(ex.stream, Stream("ax")) self.assertEqual(ex.index, 1) self.assertIn("expected 'b' at 0:1", str(ex)) @@ -256,7 +257,7 @@ def test_multiple_failures(self): abc = string("a") | string("b") | string("c") with self.assertRaises(ParseError) as err: - abc.parse("d") + abc.parse(Stream("d")) ex = err.exception self.assertEqual(ex.expected, frozenset(["a", "b", "c"])) @@ -271,84 +272,84 @@ def xy(): parser = xy | string("z") # should not finish executing xy() - self.assertEqual(parser.parse("z"), "z") + self.assertEqual(parser.parse(Stream("z")), "z") def test_or(self): x_or_y = string("x") | string("y") - self.assertEqual(x_or_y.parse("x"), "x") - self.assertEqual(x_or_y.parse("y"), "y") + self.assertEqual(x_or_y.parse(Stream("x")), "x") + self.assertEqual(x_or_y.parse(Stream("y")), "y") def test_or_with_then(self): parser = (string("\\") >> string("y")) | string("z") - self.assertEqual(parser.parse("\\y"), "y") - self.assertEqual(parser.parse("z"), "z") + self.assertEqual(parser.parse(Stream("\\y")), "y") + self.assertEqual(parser.parse(Stream("z")), "z") - self.assertRaises(ParseError, parser.parse, "\\z") + self.assertRaises(ParseError, parser.parse, Stream("\\z")) def test_many(self): letters = letter.many() - self.assertEqual(letters.parse("x"), ["x"]) - self.assertEqual(letters.parse("xyz"), ["x", "y", "z"]) - self.assertEqual(letters.parse(""), []) + self.assertEqual(letters.parse(Stream("x")), ["x"]) + self.assertEqual(letters.parse(Stream("xyz")), ["x", "y", "z"]) + self.assertEqual(letters.parse(Stream("")), []) - self.assertRaises(ParseError, letters.parse, "1") + self.assertRaises(ParseError, letters.parse, Stream("1")) def test_many_with_then(self): parser = string("x").many() >> string("y") - self.assertEqual(parser.parse("y"), "y") - self.assertEqual(parser.parse("xy"), "y") - self.assertEqual(parser.parse("xxxxxy"), "y") + self.assertEqual(parser.parse(Stream("y")), "y") + self.assertEqual(parser.parse(Stream("xy")), "y") + self.assertEqual(parser.parse(Stream("xxxxxy")), "y") def test_times_zero(self): zero_letters = letter.times(0) - self.assertEqual(zero_letters.parse(""), []) + self.assertEqual(zero_letters.parse(Stream("")), []) - self.assertRaises(ParseError, zero_letters.parse, "x") + self.assertRaises(ParseError, zero_letters.parse, Stream("x")) def test_times(self): three_letters = letter.times(3) - self.assertEqual(three_letters.parse("xyz"), ["x", "y", "z"]) + self.assertEqual(three_letters.parse(Stream("xyz")), ["x", "y", "z"]) - self.assertRaises(ParseError, three_letters.parse, "xy") - self.assertRaises(ParseError, three_letters.parse, "xyzw") + self.assertRaises(ParseError, three_letters.parse, Stream("xy")) + self.assertRaises(ParseError, three_letters.parse, Stream("xyzw")) def test_times_with_then(self): then_digit = letter.times(3) >> digit - self.assertEqual(then_digit.parse("xyz1"), "1") + self.assertEqual(then_digit.parse(Stream("xyz1")), "1") - self.assertRaises(ParseError, then_digit.parse, "xy1") - self.assertRaises(ParseError, then_digit.parse, "xyz") - self.assertRaises(ParseError, then_digit.parse, "xyzw") + self.assertRaises(ParseError, then_digit.parse, Stream("xy1")) + self.assertRaises(ParseError, then_digit.parse, Stream("xyz")) + self.assertRaises(ParseError, then_digit.parse, Stream("xyzw")) def test_times_with_min_and_max(self): some_letters = letter.times(2, 4) - self.assertEqual(some_letters.parse("xy"), ["x", "y"]) - self.assertEqual(some_letters.parse("xyz"), ["x", "y", "z"]) - self.assertEqual(some_letters.parse("xyzw"), ["x", "y", "z", "w"]) + self.assertEqual(some_letters.parse(Stream("xy")), ["x", "y"]) + self.assertEqual(some_letters.parse(Stream("xyz")), ["x", "y", "z"]) + self.assertEqual(some_letters.parse(Stream("xyzw")), ["x", "y", "z", "w"]) - self.assertRaises(ParseError, some_letters.parse, "x") - self.assertRaises(ParseError, some_letters.parse, "xyzwv") + self.assertRaises(ParseError, some_letters.parse, Stream("x")) + self.assertRaises(ParseError, some_letters.parse, Stream("xyzwv")) def test_times_with_min_and_max_and_then(self): then_digit = letter.times(2, 4) >> digit - self.assertEqual(then_digit.parse("xy1"), "1") - self.assertEqual(then_digit.parse("xyz1"), "1") - self.assertEqual(then_digit.parse("xyzw1"), "1") + self.assertEqual(then_digit.parse(Stream("xy1")), "1") + self.assertEqual(then_digit.parse(Stream("xyz1")), "1") + self.assertEqual(then_digit.parse(Stream("xyzw1")), "1") - self.assertRaises(ParseError, then_digit.parse, "xy") - self.assertRaises(ParseError, then_digit.parse, "xyzw") - self.assertRaises(ParseError, then_digit.parse, "xyzwv1") - self.assertRaises(ParseError, then_digit.parse, "x1") + self.assertRaises(ParseError, then_digit.parse, Stream("xy")) + self.assertRaises(ParseError, then_digit.parse, Stream("xyzw")) + self.assertRaises(ParseError, then_digit.parse, Stream("xyzwv1")) + self.assertRaises(ParseError, then_digit.parse, Stream("x1")) def test_at_most(self): ab = string("ab") - self.assertEqual(ab.at_most(2).parse(""), []) - self.assertEqual(ab.at_most(2).parse("ab"), ["ab"]) - self.assertEqual(ab.at_most(2).parse("abab"), ["ab", "ab"]) - self.assertRaises(ParseError, ab.at_most(2).parse, "ababab") + self.assertEqual(ab.at_most(2).parse(Stream("")), []) + self.assertEqual(ab.at_most(2).parse(Stream("ab")), ["ab"]) + self.assertEqual(ab.at_most(2).parse(Stream("abab")), ["ab", "ab"]) + self.assertRaises(ParseError, ab.at_most(2).parse, Stream("ababab")) def test_until(self): @@ -364,8 +365,8 @@ def test_until(self): self.assertEqual(seq(until, string("x")).parse_partial(s), ([4 * ["s"], "x"], "y")) self.assertEqual(until.then(string("x")).parse_partial(s), ("x", "y")) - self.assertRaises(ParseError, until.parse, "ssssy") - self.assertRaises(ParseError, until.parse, "xssssxy") + self.assertRaises(ParseError, until.parse, Stream("ssssy")) + self.assertRaises(ParseError, until.parse, Stream("xssssxy")) self.assertEqual(until.parse_partial("xxx"), ([], "xxx")) @@ -376,13 +377,13 @@ def test_until_with_consume_other(self): until = string("s").until(string("x"), consume_other=True) - self.assertEqual(until.parse("ssssx"), 4 * ["s"] + ["x"]) + self.assertEqual(until.parse(Stream("ssssx")), 4 * ["s"] + ["x"]) self.assertEqual(until.parse_partial("ssssxy"), (4 * ["s"] + ["x"], "y")) self.assertEqual(until.parse_partial("xxx"), (["x"], "xx")) - self.assertRaises(ParseError, until.parse, "ssssy") - self.assertRaises(ParseError, until.parse, "xssssxy") + self.assertRaises(ParseError, until.parse, Stream("ssssy")) + self.assertRaises(ParseError, until.parse, Stream("xssssxy")) def test_until_with_min(self): @@ -419,157 +420,157 @@ def test_until_with_min_max(self): def test_optional(self): p = string("a").optional() - self.assertEqual(p.parse("a"), "a") - self.assertEqual(p.parse(""), None) + self.assertEqual(p.parse(Stream("a")), "a") + self.assertEqual(p.parse(Stream("")), None) p = string("a").optional("b") - self.assertEqual(p.parse("a"), "a") - self.assertEqual(p.parse(""), "b") + self.assertEqual(p.parse(Stream("a")), "a") + self.assertEqual(p.parse(Stream("")), "b") def test_sep_by(self): digit_list = digit.map(int).sep_by(string(",")) - self.assertEqual(digit_list.parse("1,2,3,4"), [1, 2, 3, 4]) - self.assertEqual(digit_list.parse("9,0,4,7"), [9, 0, 4, 7]) - self.assertEqual(digit_list.parse("3,7"), [3, 7]) - self.assertEqual(digit_list.parse("8"), [8]) - self.assertEqual(digit_list.parse(""), []) + self.assertEqual(digit_list.parse(Stream("1,2,3,4")), [1, 2, 3, 4]) + self.assertEqual(digit_list.parse(Stream("9,0,4,7")), [9, 0, 4, 7]) + self.assertEqual(digit_list.parse(Stream("3,7")), [3, 7]) + self.assertEqual(digit_list.parse(Stream("8")), [8]) + self.assertEqual(digit_list.parse(Stream("")), []) - self.assertRaises(ParseError, digit_list.parse, "8,") - self.assertRaises(ParseError, digit_list.parse, ",9") - self.assertRaises(ParseError, digit_list.parse, "82") - self.assertRaises(ParseError, digit_list.parse, "7.6") + self.assertRaises(ParseError, digit_list.parse, Stream("8,")) + self.assertRaises(ParseError, digit_list.parse, Stream(",9")) + self.assertRaises(ParseError, digit_list.parse, Stream("82")) + self.assertRaises(ParseError, digit_list.parse, Stream("7.6")) def test_sep_by_with_min_and_max(self): digit_list = digit.map(int).sep_by(string(","), min=2, max=4) - self.assertEqual(digit_list.parse("1,2,3,4"), [1, 2, 3, 4]) - self.assertEqual(digit_list.parse("9,0,4,7"), [9, 0, 4, 7]) - self.assertEqual(digit_list.parse("3,7"), [3, 7]) + self.assertEqual(digit_list.parse(Stream("1,2,3,4")), [1, 2, 3, 4]) + self.assertEqual(digit_list.parse(Stream("9,0,4,7")), [9, 0, 4, 7]) + self.assertEqual(digit_list.parse(Stream("3,7")), [3, 7]) - self.assertRaises(ParseError, digit_list.parse, "8") - self.assertRaises(ParseError, digit_list.parse, "") - self.assertRaises(ParseError, digit_list.parse, "8,") - self.assertRaises(ParseError, digit_list.parse, ",9") - self.assertRaises(ParseError, digit_list.parse, "82") - self.assertRaises(ParseError, digit_list.parse, "7.6") - self.assertEqual(digit.sep_by(string(","), max=0).parse(""), []) + self.assertRaises(ParseError, digit_list.parse, Stream("8")) + self.assertRaises(ParseError, digit_list.parse, Stream("")) + self.assertRaises(ParseError, digit_list.parse, Stream("8,")) + self.assertRaises(ParseError, digit_list.parse, Stream(",9")) + self.assertRaises(ParseError, digit_list.parse, Stream("82")) + self.assertRaises(ParseError, digit_list.parse, Stream("7.6")) + self.assertEqual(digit.sep_by(string(","), max=0).parse(Stream("")), []) def test_add(self): - self.assertEqual((letter + digit).parse("a1"), "a1") + self.assertEqual((letter + digit).parse(Stream("a1")), "a1") def test_multiply(self): - self.assertEqual((letter * 3).parse("abc"), ["a", "b", "c"]) + self.assertEqual((letter * 3).parse(Stream("abc")), ["a", "b", "c"]) def test_multiply_range(self): - self.assertEqual((letter * range(1, 2)).parse("a"), ["a"]) - self.assertRaises(ParseError, (letter * range(1, 2)).parse, "aa") + self.assertEqual((letter * range(1, 2)).parse(Stream("a")), ["a"]) + self.assertRaises(ParseError, (letter * range(1, 2)).parse, Stream("aa")) # Primitives def test_alt(self): - self.assertRaises(ParseError, alt().parse, "") - self.assertEqual(alt(letter, digit).parse("a"), "a") - self.assertEqual(alt(letter, digit).parse("1"), "1") - self.assertRaises(ParseError, alt(letter, digit).parse, ".") + self.assertRaises(ParseError, alt().parse, Stream("")) + self.assertEqual(alt(letter, digit).parse(Stream("a")), "a") + self.assertEqual(alt(letter, digit).parse(Stream("1")), "1") + self.assertRaises(ParseError, alt(letter, digit).parse, Stream(".")) def test_seq(self): - self.assertEqual(seq().parse(""), []) - self.assertEqual(seq(letter).parse("a"), ["a"]) - self.assertEqual(seq(letter, digit).parse("a1"), ["a", "1"]) - self.assertRaises(ParseError, seq(letter, digit).parse, "1a") + self.assertEqual(seq().parse(Stream("")), []) + self.assertEqual(seq(letter).parse(Stream("a")), ["a"]) + self.assertEqual(seq(letter, digit).parse(Stream("a1")), ["a", "1"]) + self.assertRaises(ParseError, seq(letter, digit).parse, Stream("1a")) def test_seq_kwargs(self): self.assertEqual( - seq(first_name=regex(r"\S+") << whitespace, last_name=regex(r"\S+")).parse("Jane Smith"), + seq(first_name=regex(r"\S+") << whitespace, last_name=regex(r"\S+")).parse(Stream("Jane Smith")), {"first_name": "Jane", "last_name": "Smith"}, ) def test_seq_kwargs_fail(self): - self.assertRaises(ParseError, seq(a=string("a")).parse, "b") + self.assertRaises(ParseError, seq(a=string("a")).parse, Stream("b")) def test_seq_kwargs_error(self): self.assertRaises(ValueError, lambda: seq(string("a"), b=string("b"))) def test_test_char(self): ascii = parsy_test_char(lambda c: ord(c) < 128, "ascii character") - self.assertEqual(ascii.parse("a"), "a") + self.assertEqual(ascii.parse(Stream("a")), "a") with self.assertRaises(ParseError) as err: - ascii.parse("☺") + ascii.parse(Stream("☺")) ex = err.exception self.assertEqual(str(ex), """expected 'ascii character' at 0:0""") with self.assertRaises(ParseError) as err: - ascii.parse("") + ascii.parse(Stream("")) ex = err.exception self.assertEqual(str(ex), """expected 'ascii character' at 0:0""") def test_char_from_str(self): ab = char_from("ab") - self.assertEqual(ab.parse("a"), "a") - self.assertEqual(ab.parse("b"), "b") + self.assertEqual(ab.parse(Stream("a")), "a") + self.assertEqual(ab.parse(Stream("b")), "b") with self.assertRaises(ParseError) as err: - ab.parse("x") + ab.parse(Stream("x")) ex = err.exception self.assertEqual(str(ex), """expected '[ab]' at 0:0""") def test_char_from_bytes(self): ab = char_from(b"ab") - self.assertEqual(ab.parse(b"a"), b"a") - self.assertEqual(ab.parse(b"b"), b"b") + self.assertEqual(ab.parse(Stream(b"a")), b"a") + self.assertEqual(ab.parse(Stream(b"b")), b"b") with self.assertRaises(ParseError) as err: - ab.parse(b"x") + ab.parse(Stream(b"x")) ex = err.exception self.assertEqual(str(ex), """expected b'[ab]' at 0""") def test_string_from(self): titles = string_from("Mr", "Mr.", "Mrs", "Mrs.") - self.assertEqual(titles.parse("Mr"), "Mr") - self.assertEqual(titles.parse("Mr."), "Mr.") - self.assertEqual((titles + string(" Hyde")).parse("Mr. Hyde"), "Mr. Hyde") + self.assertEqual(titles.parse(Stream("Mr")), "Mr") + self.assertEqual(titles.parse(Stream("Mr.")), "Mr.") + self.assertEqual((titles + string(" Hyde")).parse(Stream("Mr. Hyde")), "Mr. Hyde") with self.assertRaises(ParseError) as err: - titles.parse("foo") + titles.parse(Stream("foo")) ex = err.exception self.assertEqual(str(ex), """expected one of 'Mr', 'Mr.', 'Mrs', 'Mrs.' at 0:0""") def test_string_from_transform(self): titles = string_from("Mr", "Mr.", "Mrs", "Mrs.", transform=lambda s: s.lower()) - self.assertEqual(titles.parse("mr"), "Mr") - self.assertEqual(titles.parse("mr."), "Mr.") - self.assertEqual(titles.parse("MR"), "Mr") - self.assertEqual(titles.parse("MR."), "Mr.") + self.assertEqual(titles.parse(Stream("mr")), "Mr") + self.assertEqual(titles.parse(Stream("mr.")), "Mr.") + self.assertEqual(titles.parse(Stream("MR")), "Mr") + self.assertEqual(titles.parse(Stream("MR.")), "Mr.") def test_peek(self): - self.assertEqual(peek(any_char).parse_partial("abc"), ("a", "abc")) + self.assertEqual(peek(any_char).parse_partial(Stream("abc")), ("a", "abc")) with self.assertRaises(ParseError) as err: - peek(digit).parse("a") + peek(digit).parse(Stream("a")) self.assertEqual(str(err.exception), "expected 'a digit' at 0:0") def test_any_char(self): - self.assertEqual(any_char.parse("x"), "x") - self.assertEqual(any_char.parse("\n"), "\n") - self.assertRaises(ParseError, any_char.parse, "") + self.assertEqual(any_char.parse(Stream("x")), "x") + self.assertEqual(any_char.parse(Stream("\n")), "\n") + self.assertRaises(ParseError, any_char.parse, Stream("")) def test_whitespace(self): - self.assertEqual(whitespace.parse("\n"), "\n") - self.assertEqual(whitespace.parse(" "), " ") - self.assertRaises(ParseError, whitespace.parse, "x") + self.assertEqual(whitespace.parse(Stream("\n")), "\n") + self.assertEqual(whitespace.parse(Stream(" ")), " ") + self.assertRaises(ParseError, whitespace.parse, Stream("x")) def test_letter(self): - self.assertEqual(letter.parse("a"), "a") - self.assertRaises(ParseError, letter.parse, "1") + self.assertEqual(letter.parse(Stream("a")), "a") + self.assertRaises(ParseError, letter.parse, Stream("1")) def test_digit(self): - self.assertEqual(digit.parse("¹"), "¹") - self.assertEqual(digit.parse("2"), "2") - self.assertRaises(ParseError, digit.parse, "x") + self.assertEqual(digit.parse(Stream("¹")), "¹") + self.assertEqual(digit.parse(Stream("2")), "2") + self.assertRaises(ParseError, digit.parse, Stream("x")) def test_decimal_digit(self): - self.assertEqual(decimal_digit.at_least(1).concat().parse("9876543210"), "9876543210") - self.assertRaises(ParseError, decimal_digit.parse, "¹") + self.assertEqual(decimal_digit.at_least(1).concat().parse(Stream("9876543210")), "9876543210") + self.assertRaises(ParseError, decimal_digit.parse, Stream("¹")) def test_line_info(self): @generate @@ -579,29 +580,29 @@ def foo(): return (l, i) self.assertEqual( - foo.many().parse("AB\nCD"), + foo.many().parse(Stream("AB\nCD")), [ - ("A", (0, 0)), - ("B", (0, 1)), - ("\n", (0, 2)), - ("C", (1, 0)), - ("D", (1, 1)), + ("A", (None, 0, 0)), + ("B", (None, 0, 1)), + ("\n", (None, 0, 2)), + ("C", (None, 1, 0)), + ("D", (None, 1, 1)), ], ) def test_should_fail(self): not_a_digit = digit.should_fail("not a digit") >> regex(r".*") - self.assertEqual(not_a_digit.parse("a"), "a") - self.assertEqual(not_a_digit.parse("abc"), "abc") - self.assertEqual(not_a_digit.parse("a10"), "a10") - self.assertEqual(not_a_digit.parse(""), "") + self.assertEqual(not_a_digit.parse(Stream("a")), "a") + self.assertEqual(not_a_digit.parse(Stream("abc")), "abc") + self.assertEqual(not_a_digit.parse(Stream("a10")), "a10") + self.assertEqual(not_a_digit.parse(Stream("")), "") with self.assertRaises(ParseError) as err: - not_a_digit.parse("8") + not_a_digit.parse(Stream("8")) self.assertEqual(str(err.exception), "expected 'not a digit' at 0:0") - self.assertRaises(ParseError, not_a_digit.parse, "8ab") + self.assertRaises(ParseError, not_a_digit.parse, Stream("8ab")) def test_from_enum_string(self): class Pet(enum.Enum): @@ -609,9 +610,9 @@ class Pet(enum.Enum): DOG = "dog" pet = from_enum(Pet) - self.assertEqual(pet.parse("cat"), Pet.CAT) - self.assertEqual(pet.parse("dog"), Pet.DOG) - self.assertRaises(ParseError, pet.parse, "foo") + self.assertEqual(pet.parse(Stream("cat")), Pet.CAT) + self.assertEqual(pet.parse(Stream("dog")), Pet.DOG) + self.assertRaises(ParseError, pet.parse, Stream("foo")) def test_from_enum_int(self): class Position(enum.Enum): @@ -619,9 +620,9 @@ class Position(enum.Enum): SECOND = 2 position = from_enum(Position) - self.assertEqual(position.parse("1"), Position.FIRST) - self.assertEqual(position.parse("2"), Position.SECOND) - self.assertRaises(ParseError, position.parse, "foo") + self.assertEqual(position.parse(Stream("1")), Position.FIRST) + self.assertEqual(position.parse(Stream("2")), Position.SECOND) + self.assertRaises(ParseError, position.parse, Stream("foo")) def test_from_enum_transform(self): class Pet(enum.Enum): @@ -629,8 +630,8 @@ class Pet(enum.Enum): DOG = "dog" pet = from_enum(Pet, transform=lambda s: s.lower()) - self.assertEqual(pet.parse("cat"), Pet.CAT) - self.assertEqual(pet.parse("CAT"), Pet.CAT) + self.assertEqual(pet.parse(Stream("cat")), Pet.CAT) + self.assertEqual(pet.parse(Stream("CAT")), Pet.CAT) class TestParserTokens(unittest.TestCase): @@ -645,10 +646,10 @@ class TestParserTokens(unittest.TestCase): def test_test_item(self): start_stop = parsy_test_item(lambda i: i in [self.START, self.STOP], "START/STOP") - self.assertEqual(start_stop.parse([self.START]), self.START) - self.assertEqual(start_stop.parse([self.STOP]), self.STOP) + self.assertEqual(start_stop.parse(Stream([self.START])), self.START) + self.assertEqual(start_stop.parse(Stream([self.STOP])), self.STOP) with self.assertRaises(ParseError) as err: - start_stop.many().parse([self.START, "hello"]) + start_stop.many().parse(Stream([self.START, "hello"])) ex = err.exception self.assertEqual(str(ex), "expected one of 'EOF', 'START/STOP' at 1") @@ -656,7 +657,7 @@ def test_test_item(self): self.assertEqual(ex.index, 1) def test_match_item(self): - self.assertEqual(match_item(self.START).parse([self.START]), self.START) + self.assertEqual(match_item(self.START).parse(Stream([self.START])), self.START) with self.assertRaises(ParseError) as err: match_item(self.START, "START").parse([]) @@ -667,7 +668,7 @@ def test_parse_tokens(self): other_vals = parsy_test_item(lambda i: i not in [self.START, self.STOP], "not START/STOP") bracketed = match_item(self.START) >> other_vals.many() << match_item(self.STOP) - stream = [self.START, "hello", 1, 2, "goodbye", self.STOP] + stream = Stream([self.START, "hello", 1, 2, "goodbye", self.STOP]) result = bracketed.parse(stream) self.assertEqual(result, ["hello", 1, 2, "goodbye"]) @@ -678,17 +679,17 @@ def foo(): l = yield letter return (l, i) - self.assertEqual(foo.many().parse(["A", "B"]), [("A", 0), ("B", 1)]) + self.assertEqual(foo.many().parse(Stream(["A", "B"])), [("A", 0), ("B", 1)]) class TestUtils(unittest.TestCase): def test_line_info_at(self): - text = "abc\ndef" - self.assertEqual(line_info_at(text, 0), (0, 0)) - self.assertEqual(line_info_at(text, 2), (0, 2)) - self.assertEqual(line_info_at(text, 3), (0, 3)) - self.assertEqual(line_info_at(text, 4), (1, 0)) - self.assertEqual(line_info_at(text, 7), (1, 3)) + text = Stream("abc\ndef") + self.assertEqual(line_info_at(text, 0), (None, 0, 0)) + self.assertEqual(line_info_at(text, 2), (None, 0, 2)) + self.assertEqual(line_info_at(text, 3), (None, 0, 3)) + self.assertEqual(line_info_at(text, 4), (None, 1, 0)) + self.assertEqual(line_info_at(text, 7), (None, 1, 3)) self.assertRaises(ValueError, lambda: line_info_at(text, 8)) @@ -697,7 +698,7 @@ def test_forward_declaration_1(self): # This is the example from the docs expr = forward_declaration() with self.assertRaises(ValueError): - expr.parse("()") + expr.parse(Stream("()")) with self.assertRaises(ValueError): expr.parse_partial("()") @@ -706,22 +707,22 @@ def test_forward_declaration_1(self): group = string("(") >> expr.sep_by(string(" ")) << string(")") expr.become(simple | group) - self.assertEqual(expr.parse("(0 1 (2 3))"), [0, 1, [2, 3]]) + self.assertEqual(expr.parse(Stream("(0 1 (2 3))")), [0, 1, [2, 3]]) def test_forward_declaration_2(self): # Simplest example I could think of expr = forward_declaration() expr.become(string("A") + expr | string("Z")) - self.assertEqual(expr.parse("Z"), "Z") - self.assertEqual(expr.parse("AZ"), "AZ") - self.assertEqual(expr.parse("AAAAAZ"), "AAAAAZ") + self.assertEqual(expr.parse(Stream("Z")), "Z") + self.assertEqual(expr.parse(Stream("AZ")), "AZ") + self.assertEqual(expr.parse(Stream("AAAAAZ")), "AAAAAZ") with self.assertRaises(ParseError): - expr.parse("A") + expr.parse(Stream("A")) with self.assertRaises(ParseError): - expr.parse("B") + expr.parse(Stream("B")) self.assertEqual(expr.parse_partial("AAZXX"), ("AAZ", "XX")) diff --git a/tests/test_sexpr.py b/tests/test_sexpr.py index e88d0d5..0c9bc9f 100644 --- a/tests/test_sexpr.py +++ b/tests/test_sexpr.py @@ -1,7 +1,7 @@ import re import unittest -from parsy import generate, regex, string +from parsy import Stream, generate, regex, string whitespace = regex(r"\s+", re.MULTILINE) comment = regex(r";.*") @@ -40,29 +40,31 @@ def quote(): class TestSexpr(unittest.TestCase): def test_form(self): - result = program.parse("(1 2 3)") + result = program.parse(Stream("(1 2 3)")) self.assertEqual(result, [[1, 2, 3]]) def test_quote(self): - result = program.parse("'foo '(bar baz)") + result = program.parse(Stream("'foo '(bar baz)")) self.assertEqual(result, [["quote", "foo"], ["quote", ["bar", "baz"]]]) def test_double_quote(self): - result = program.parse("''foo") + result = program.parse(Stream("''foo")) self.assertEqual(result, [["quote", ["quote", "foo"]]]) def test_boolean(self): - result = program.parse("#t #f") + result = program.parse(Stream("#t #f")) self.assertEqual(result, [True, False]) def test_comments(self): result = program.parse( - """ + Stream( + """ ; a program with a comment ( foo ; that's a foo bar ) ; some comments at the end """ + ) ) self.assertEqual(result, [["foo", "bar"]]) From b81c83bf5206ec3050a54d6d3044f75b1389cf0c Mon Sep 17 00:00:00 2001 From: Jacob Errington Date: Sun, 19 Jan 2025 08:18:32 -0500 Subject: [PATCH 03/11] document Stream --- docs/ref/methods_and_combinators.rst | 29 ++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/docs/ref/methods_and_combinators.rst b/docs/ref/methods_and_combinators.rst index 5ef0a2e..1095a1e 100644 --- a/docs/ref/methods_and_combinators.rst +++ b/docs/ref/methods_and_combinators.rst @@ -23,25 +23,25 @@ can be used and manipulated as below. The following methods are for actually **using** the parsers that you have created: - .. method:: parse(string_or_list) + .. method:: parse(stream) - Attempts to parse the given string (or list). If the parse is successful - and consumes the entire string, the result is returned - otherwise, a + Attempts to parse the given :class:`Stream` of data. If the parse is successful + and consumes the entire stream, the result is returned - otherwise, a ``ParseError`` is raised. - Instead of passing a string, you can in fact pass a list of tokens. Almost - all the examples assume strings for simplicity. Some of the primitives are + Most commonly, a stream simply wraps a string, but you could use a list of tokens instead. + Almost all the examples assume strings for simplicity. Some of the primitives are also clearly string specific, and a few of the combinators (such as :meth:`Parser.concat`) are string specific, but most of the rest of the library will work with tokens just as well. See :doc:`/howto/lexing` for more information. - .. method:: parse_partial(string_or_list) + .. method:: parse_partial(stream) Similar to ``parse``, except that it does not require the entire - string (or list) to be consumed. Returns a tuple of + stream to be consumed. Returns a tuple of ``(result, remainder)``, where ``remainder`` is the part of - the string (or list) that was left over. + the stream that was left over. The following methods are essentially **combinators** that produce new parsers from the existing one. They are provided as methods on ``Parser`` for @@ -594,3 +594,16 @@ Parsy does not try to include every possible combinator - there is no reason why you cannot create your own for your needs using the built-in combinators and primitives. If you find something that is very generic and would be very useful to have as a built-in, please :doc:`submit ` as a PR! + +Auxiliary data structures +========================= + +.. class:: Stream + + Wraps a string, byte sequence, or list, possibly equipping it with a source. + If the data is loaded from a file or URL, the source should be that file path or URL. + The source name is used in generated parse error messages. + + .. method:: __init__(data, [source=None]) + + Wraps the data into a stream, possibly equipping it with a source. From a47532172328ec99a7b3b4725ce2b1a970c8cfdf Mon Sep 17 00:00:00 2001 From: Jacob Errington Date: Sun, 19 Jan 2025 09:20:59 -0500 Subject: [PATCH 04/11] allow calling .parse() with legacy streams --- src/parsy/__init__.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/parsy/__init__.py b/src/parsy/__init__.py index 07f32bf..2182e89 100644 --- a/src/parsy/__init__.py +++ b/src/parsy/__init__.py @@ -117,20 +117,25 @@ def __init__(self, wrapped_fn: Callable[[Stream, int], Result]): def __call__(self, stream: Stream, index: int): return self.wrapped_fn(stream, index) - def parse(self, stream: Stream) -> Any: + def parse(self, stream: Stream | str | bytes | list) -> Any: """Parses a string or list of tokens and returns the result or raise a ParseError.""" (result, _) = (self << eof).parse_partial(stream) return result - def parse_partial(self, stream: Stream) -> tuple[Any, Stream]: + def parse_partial(self, stream: Stream | str | bytes | list) -> tuple[Any, Stream]: """ Parses the longest possible prefix of a given string. Returns a tuple of the result and the unparsed remainder, or raises ParseError """ - result = self(stream, 0) + result = self( + stream if isinstance(stream, Stream) else Stream(stream), + 0, + ) if result.status: + # The type of the returned remaining stream matches the type of the + # input stream. return (result.value, stream[result.index :]) else: raise ParseError(result.expected, stream, result.furthest) From 7dd0a91e4a46359dd6343e12d46cb2a97101d235 Mon Sep 17 00:00:00 2001 From: Jacob Errington Date: Sat, 18 Jan 2025 21:49:27 -0500 Subject: [PATCH 05/11] add .span() method --- src/parsy/__init__.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/parsy/__init__.py b/src/parsy/__init__.py index 2182e89..cf4471b 100644 --- a/src/parsy/__init__.py +++ b/src/parsy/__init__.py @@ -32,6 +32,21 @@ def __getitem__(self, i): return self.data[i] +@dataclass +class SourceSpan: + """Identifies a span of material from the data to parse. + + Attributes: + source (str | None): the source of the data, e.g. a file path. + start ([int, int]): the start row and column of the span. + end ([int, int]): the end row and column of the span. + """ + + source: str | None + start: [int, int] + end: [int, int] + + def line_info_at(stream: Stream, index): if index > len(stream): raise ValueError("invalid index") @@ -368,6 +383,9 @@ def mark(self) -> Parser: ((start_row, start_column), original_value, (end_row, end_column)) + + ``.span()'' is a more powerful version of this combinator, returning a + SourceSpan. """ @generate @@ -379,6 +397,24 @@ def marked(): return marked + def span(self) -> Parser: + """ + Returns a parser that augments the initial parser's result with a + SourceSpan capturing where that parser started and stopped. + The new value is a tuple: + + (source_span, original_value) + """ + + @generate + def marked(): + source, *start = yield line_info + body = yield self + _, *end = yield line_info + return (SourceSpan(source, tuple(start), tuple(end)), body) + + return marked + def tag(self, name: str) -> Parser: """ Returns a parser that wraps the produced value of the initial parser in a From 1c4856f414a3de4bebee2768dc218e850adf46a8 Mon Sep 17 00:00:00 2001 From: Jacob Errington Date: Sun, 19 Jan 2025 08:18:44 -0500 Subject: [PATCH 06/11] document .span() and SourceSpan --- docs/ref/methods_and_combinators.rst | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/docs/ref/methods_and_combinators.rst b/docs/ref/methods_and_combinators.rst index 1095a1e..17c1404 100644 --- a/docs/ref/methods_and_combinators.rst +++ b/docs/ref/methods_and_combinators.rst @@ -401,6 +401,20 @@ can be used and manipulated as below. ` and want subsequent parsing of the token stream to be able to report original positions in error messages etc. + .. method:: span() + + Returns a parser that augments the initial parser's result with a :class:`SourceSpan` + containing information about where that parser started and stopped within the + source data. The new value is a tuple: + + .. code:: python + + (source_span, original_value) + + This enables reporting of custom errors involving source locations, such as when + using parsy as a :doc:`lexer` or when building a syntax tree that will be + further analyzed. + .. _operators: Parser operators @@ -607,3 +621,9 @@ Auxiliary data structures .. method:: __init__(data, [source=None]) Wraps the data into a stream, possibly equipping it with a source. + +.. class:: SourceSpan + + Identifies a span of material from the data being parsed by its start row and column and its end + row and column. If the data stream was equipped with a source, that value is also available in + this object. From d4c2586586785f9007bfef15d5436aa7dab93e04 Mon Sep 17 00:00:00 2001 From: Jacob Errington Date: Sun, 19 Jan 2025 08:26:01 -0500 Subject: [PATCH 07/11] add .span() test --- tests/test_parsy.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/test_parsy.py b/tests/test_parsy.py index 35e3cae..77e6bc1 100644 --- a/tests/test_parsy.py +++ b/tests/test_parsy.py @@ -7,6 +7,7 @@ from parsy import ( ParseError, + SourceSpan, Stream, alt, any_char, @@ -209,6 +210,21 @@ def test_mark(self): self.assertEqual(letters, ["q", "w", "e", "r"]) self.assertEqual(end, (1, 4)) + def test_span(self): + parser = (letter.many().span() << string("\n")).many() + source = "sample" + + lines = parser.parse(Stream("asdf\nqwer\n", source=source)) + + self.assertEqual(len(lines), 2) + + (span, letters) = lines[0] + self.assertEqual(span, SourceSpan(source, (0, 0), (0, 4))) + self.assertEqual(letters, ["a", "s", "d", "f"]) + + (span, letters) = lines[1] + self.assertEqual(span, SourceSpan(source, (1, 0), (1, 4))) + def test_tag(self): parser = letter.many().concat().tag("word") self.assertEqual( From d384337a112b636ebba1791342a46050adb54657 Mon Sep 17 00:00:00 2001 From: Jacob Errington Date: Sun, 19 Jan 2025 08:26:13 -0500 Subject: [PATCH 08/11] adjust line_info tests to include sources --- tests/test_parsy.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/tests/test_parsy.py b/tests/test_parsy.py index 77e6bc1..7c490bd 100644 --- a/tests/test_parsy.py +++ b/tests/test_parsy.py @@ -606,6 +606,18 @@ def foo(): ], ) + source = "aaaaa" + self.assertEqual( + foo.many().parse(Stream("AB\nCD", source=source)), + [ + ("A", (source, 0, 0)), + ("B", (source, 0, 1)), + ("\n", (source, 0, 2)), + ("C", (source, 1, 0)), + ("D", (source, 1, 1)), + ], + ) + def test_should_fail(self): not_a_digit = digit.should_fail("not a digit") >> regex(r".*") @@ -700,12 +712,12 @@ def foo(): class TestUtils(unittest.TestCase): def test_line_info_at(self): - text = Stream("abc\ndef") - self.assertEqual(line_info_at(text, 0), (None, 0, 0)) - self.assertEqual(line_info_at(text, 2), (None, 0, 2)) - self.assertEqual(line_info_at(text, 3), (None, 0, 3)) - self.assertEqual(line_info_at(text, 4), (None, 1, 0)) - self.assertEqual(line_info_at(text, 7), (None, 1, 3)) + text = Stream("abc\ndef", source="aaaa") + self.assertEqual(line_info_at(text, 0), ("aaaa", 0, 0)) + self.assertEqual(line_info_at(text, 2), ("aaaa", 0, 2)) + self.assertEqual(line_info_at(text, 3), ("aaaa", 0, 3)) + self.assertEqual(line_info_at(text, 4), ("aaaa", 1, 0)) + self.assertEqual(line_info_at(text, 7), ("aaaa", 1, 3)) self.assertRaises(ValueError, lambda: line_info_at(text, 8)) From 52ac95604d73d91ea723f274230fa21206a6896b Mon Sep 17 00:00:00 2001 From: Jacob Errington Date: Sun, 19 Jan 2025 09:44:19 -0500 Subject: [PATCH 09/11] actions: drop unsupported py37 runner --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 4ea7760..422cbc2 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.7, 3.8, 3.9, "3.10", "3.11", "pypy-3.7"] + python-version: ["3.8", "3.9", "3.10", "3.11", "pypy-3.7"] env: PYTHON: ${{ matrix.python-version }} From f0493770722c15b1a2b0e5acd054edcf453a1b6b Mon Sep 17 00:00:00 2001 From: Jacob Errington Date: Sun, 19 Jan 2025 09:57:42 -0500 Subject: [PATCH 10/11] actions: add runners for 3.13, 3.13 --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 422cbc2..d0ff09a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "pypy-3.7"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "pypy-3.7"] env: PYTHON: ${{ matrix.python-version }} From ef9b4254a36f6748fa62691de39b1912f355c2ae Mon Sep 17 00:00:00 2001 From: Jacob Errington Date: Sun, 19 Jan 2025 10:00:17 -0500 Subject: [PATCH 11/11] make setup.py a no-op on python>=3.12 --- setup.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/setup.py b/setup.py index 6068493..0ec4474 100755 --- a/setup.py +++ b/setup.py @@ -1,3 +1,8 @@ +import sys + +if sys.version_info[1] > 11: + sys.exit(0) + from setuptools import setup setup()