| import ast |
| import difflib |
| import io |
| import textwrap |
| import unittest |
| |
| from test import test_tools |
| from typing import Dict, Any |
| from tokenize import TokenInfo, NAME, NEWLINE, NUMBER, OP |
| |
| test_tools.skip_if_missing("peg_generator") |
| with test_tools.imports_under_tool("peg_generator"): |
| from pegen.grammar_parser import GeneratedParser as GrammarParser |
| from pegen.testutil import parse_string, generate_parser, make_parser |
| from pegen.grammar import GrammarVisitor, GrammarError, Grammar |
| from pegen.grammar_visualizer import ASTGrammarPrinter |
| from pegen.parser import Parser |
| from pegen.parser_generator import compute_nullables, compute_left_recursives |
| from pegen.python_generator import PythonParserGenerator |
| |
| |
| class TestPegen(unittest.TestCase): |
| def test_parse_grammar(self) -> None: |
| grammar_source = """ |
| start: sum NEWLINE |
| sum: t1=term '+' t2=term { action } | term |
| term: NUMBER |
| """ |
| expected = """ |
| start: sum NEWLINE |
| sum: term '+' term | term |
| term: NUMBER |
| """ |
| grammar: Grammar = parse_string(grammar_source, GrammarParser) |
| rules = grammar.rules |
| self.assertEqual(str(grammar), textwrap.dedent(expected).strip()) |
| # Check the str() and repr() of a few rules; AST nodes don't support ==. |
| self.assertEqual(str(rules["start"]), "start: sum NEWLINE") |
| self.assertEqual(str(rules["sum"]), "sum: term '+' term | term") |
| expected_repr = ( |
| "Rule('term', None, Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))" |
| ) |
| self.assertEqual(repr(rules["term"]), expected_repr) |
| |
| def test_long_rule_str(self) -> None: |
| grammar_source = """ |
| start: zero | one | one zero | one one | one zero zero | one zero one | one one zero | one one one |
| """ |
| expected = """ |
| start: |
| | zero |
| | one |
| | one zero |
| | one one |
| | one zero zero |
| | one zero one |
| | one one zero |
| | one one one |
| """ |
| grammar: Grammar = parse_string(grammar_source, GrammarParser) |
| self.assertEqual(str(grammar.rules["start"]), textwrap.dedent(expected).strip()) |
| |
| def test_typed_rules(self) -> None: |
| grammar = """ |
| start[int]: sum NEWLINE |
| sum[int]: t1=term '+' t2=term { action } | term |
| term[int]: NUMBER |
| """ |
| rules = parse_string(grammar, GrammarParser).rules |
| # Check the str() and repr() of a few rules; AST nodes don't support ==. |
| self.assertEqual(str(rules["start"]), "start: sum NEWLINE") |
| self.assertEqual(str(rules["sum"]), "sum: term '+' term | term") |
| self.assertEqual( |
| repr(rules["term"]), |
| "Rule('term', 'int', Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))", |
| ) |
| |
| def test_gather(self) -> None: |
| grammar = """ |
| start: ','.thing+ NEWLINE |
| thing: NUMBER |
| """ |
| rules = parse_string(grammar, GrammarParser).rules |
| self.assertEqual(str(rules["start"]), "start: ','.thing+ NEWLINE") |
| self.assertTrue( |
| repr(rules["start"]).startswith( |
| "Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'" |
| ) |
| ) |
| self.assertEqual(str(rules["thing"]), "thing: NUMBER") |
| parser_class = make_parser(grammar) |
| node = parse_string("42\n", parser_class) |
| node = parse_string("1, 2\n", parser_class) |
| self.assertEqual( |
| node, |
| [ |
| [ |
| TokenInfo( |
| NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2\n" |
| ), |
| TokenInfo( |
| NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2\n" |
| ), |
| ], |
| TokenInfo( |
| NEWLINE, string="\n", start=(1, 4), end=(1, 5), line="1, 2\n" |
| ), |
| ], |
| ) |
| |
| def test_expr_grammar(self) -> None: |
| grammar = """ |
| start: sum NEWLINE |
| sum: term '+' term | term |
| term: NUMBER |
| """ |
| parser_class = make_parser(grammar) |
| node = parse_string("42\n", parser_class) |
| self.assertEqual( |
| node, |
| [ |
| TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n"), |
| TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"), |
| ], |
| ) |
| |
| def test_optional_operator(self) -> None: |
| grammar = """ |
| start: sum NEWLINE |
| sum: term ('+' term)? |
| term: NUMBER |
| """ |
| parser_class = make_parser(grammar) |
| node = parse_string("1 + 2\n", parser_class) |
| self.assertEqual( |
| node, |
| [ |
| [ |
| TokenInfo( |
| NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n" |
| ), |
| [ |
| TokenInfo( |
| OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n" |
| ), |
| TokenInfo( |
| NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n" |
| ), |
| ], |
| ], |
| TokenInfo( |
| NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n" |
| ), |
| ], |
| ) |
| node = parse_string("1\n", parser_class) |
| self.assertEqual( |
| node, |
| [ |
| [ |
| TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"), |
| None, |
| ], |
| TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), |
| ], |
| ) |
| |
| def test_optional_literal(self) -> None: |
| grammar = """ |
| start: sum NEWLINE |
| sum: term '+' ? |
| term: NUMBER |
| """ |
| parser_class = make_parser(grammar) |
| node = parse_string("1+\n", parser_class) |
| self.assertEqual( |
| node, |
| [ |
| [ |
| TokenInfo( |
| NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n" |
| ), |
| TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"), |
| ], |
| TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"), |
| ], |
| ) |
| node = parse_string("1\n", parser_class) |
| self.assertEqual( |
| node, |
| [ |
| [ |
| TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"), |
| None, |
| ], |
| TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), |
| ], |
| ) |
| |
| def test_alt_optional_operator(self) -> None: |
| grammar = """ |
| start: sum NEWLINE |
| sum: term ['+' term] |
| term: NUMBER |
| """ |
| parser_class = make_parser(grammar) |
| node = parse_string("1 + 2\n", parser_class) |
| self.assertEqual( |
| node, |
| [ |
| [ |
| TokenInfo( |
| NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n" |
| ), |
| [ |
| TokenInfo( |
| OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n" |
| ), |
| TokenInfo( |
| NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n" |
| ), |
| ], |
| ], |
| TokenInfo( |
| NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n" |
| ), |
| ], |
| ) |
| node = parse_string("1\n", parser_class) |
| self.assertEqual( |
| node, |
| [ |
| [ |
| TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"), |
| None, |
| ], |
| TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), |
| ], |
| ) |
| |
| def test_repeat_0_simple(self) -> None: |
| grammar = """ |
| start: thing thing* NEWLINE |
| thing: NUMBER |
| """ |
| parser_class = make_parser(grammar) |
| node = parse_string("1 2 3\n", parser_class) |
| self.assertEqual( |
| node, |
| [ |
| TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"), |
| [ |
| TokenInfo( |
| NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n" |
| ), |
| TokenInfo( |
| NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n" |
| ), |
| ], |
| TokenInfo( |
| NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n" |
| ), |
| ], |
| ) |
| node = parse_string("1\n", parser_class) |
| self.assertEqual( |
| node, |
| [ |
| TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"), |
| [], |
| TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), |
| ], |
| ) |
| |
| def test_repeat_0_complex(self) -> None: |
| grammar = """ |
| start: term ('+' term)* NEWLINE |
| term: NUMBER |
| """ |
| parser_class = make_parser(grammar) |
| node = parse_string("1 + 2 + 3\n", parser_class) |
| self.assertEqual( |
| node, |
| [ |
| TokenInfo( |
| NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n" |
| ), |
| [ |
| [ |
| TokenInfo( |
| OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n" |
| ), |
| TokenInfo( |
| NUMBER, |
| string="2", |
| start=(1, 4), |
| end=(1, 5), |
| line="1 + 2 + 3\n", |
| ), |
| ], |
| [ |
| TokenInfo( |
| OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n" |
| ), |
| TokenInfo( |
| NUMBER, |
| string="3", |
| start=(1, 8), |
| end=(1, 9), |
| line="1 + 2 + 3\n", |
| ), |
| ], |
| ], |
| TokenInfo( |
| NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n" |
| ), |
| ], |
| ) |
| |
| def test_repeat_1_simple(self) -> None: |
| grammar = """ |
| start: thing thing+ NEWLINE |
| thing: NUMBER |
| """ |
| parser_class = make_parser(grammar) |
| node = parse_string("1 2 3\n", parser_class) |
| self.assertEqual( |
| node, |
| [ |
| TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"), |
| [ |
| TokenInfo( |
| NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n" |
| ), |
| TokenInfo( |
| NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n" |
| ), |
| ], |
| TokenInfo( |
| NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n" |
| ), |
| ], |
| ) |
| with self.assertRaises(SyntaxError): |
| parse_string("1\n", parser_class) |
| |
| def test_repeat_1_complex(self) -> None: |
| grammar = """ |
| start: term ('+' term)+ NEWLINE |
| term: NUMBER |
| """ |
| parser_class = make_parser(grammar) |
| node = parse_string("1 + 2 + 3\n", parser_class) |
| self.assertEqual( |
| node, |
| [ |
| TokenInfo( |
| NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n" |
| ), |
| [ |
| [ |
| TokenInfo( |
| OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n" |
| ), |
| TokenInfo( |
| NUMBER, |
| string="2", |
| start=(1, 4), |
| end=(1, 5), |
| line="1 + 2 + 3\n", |
| ), |
| ], |
| [ |
| TokenInfo( |
| OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n" |
| ), |
| TokenInfo( |
| NUMBER, |
| string="3", |
| start=(1, 8), |
| end=(1, 9), |
| line="1 + 2 + 3\n", |
| ), |
| ], |
| ], |
| TokenInfo( |
| NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n" |
| ), |
| ], |
| ) |
| with self.assertRaises(SyntaxError): |
| parse_string("1\n", parser_class) |
| |
| def test_repeat_with_sep_simple(self) -> None: |
| grammar = """ |
| start: ','.thing+ NEWLINE |
| thing: NUMBER |
| """ |
| parser_class = make_parser(grammar) |
| node = parse_string("1, 2, 3\n", parser_class) |
| self.assertEqual( |
| node, |
| [ |
| [ |
| TokenInfo( |
| NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2, 3\n" |
| ), |
| TokenInfo( |
| NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2, 3\n" |
| ), |
| TokenInfo( |
| NUMBER, string="3", start=(1, 6), end=(1, 7), line="1, 2, 3\n" |
| ), |
| ], |
| TokenInfo( |
| NEWLINE, string="\n", start=(1, 7), end=(1, 8), line="1, 2, 3\n" |
| ), |
| ], |
| ) |
| |
| def test_left_recursive(self) -> None: |
| grammar_source = """ |
| start: expr NEWLINE |
| expr: ('-' term | expr '+' term | term) |
| term: NUMBER |
| foo: NAME+ |
| bar: NAME* |
| baz: NAME? |
| """ |
| grammar: Grammar = parse_string(grammar_source, GrammarParser) |
| parser_class = generate_parser(grammar) |
| rules = grammar.rules |
| self.assertFalse(rules["start"].left_recursive) |
| self.assertTrue(rules["expr"].left_recursive) |
| self.assertFalse(rules["term"].left_recursive) |
| self.assertFalse(rules["foo"].left_recursive) |
| self.assertFalse(rules["bar"].left_recursive) |
| self.assertFalse(rules["baz"].left_recursive) |
| node = parse_string("1 + 2 + 3\n", parser_class) |
| self.assertEqual( |
| node, |
| [ |
| [ |
| [ |
| TokenInfo( |
| NUMBER, |
| string="1", |
| start=(1, 0), |
| end=(1, 1), |
| line="1 + 2 + 3\n", |
| ), |
| TokenInfo( |
| OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n" |
| ), |
| TokenInfo( |
| NUMBER, |
| string="2", |
| start=(1, 4), |
| end=(1, 5), |
| line="1 + 2 + 3\n", |
| ), |
| ], |
| TokenInfo( |
| OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n" |
| ), |
| TokenInfo( |
| NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n" |
| ), |
| ], |
| TokenInfo( |
| NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n" |
| ), |
| ], |
| ) |
| |
| def test_python_expr(self) -> None: |
| grammar = """ |
| start: expr NEWLINE? $ { ast.Expression(expr, lineno=1, col_offset=0) } |
| expr: ( expr '+' term { ast.BinOp(expr, ast.Add(), term, lineno=expr.lineno, col_offset=expr.col_offset, end_lineno=term.end_lineno, end_col_offset=term.end_col_offset) } |
| | expr '-' term { ast.BinOp(expr, ast.Sub(), term, lineno=expr.lineno, col_offset=expr.col_offset, end_lineno=term.end_lineno, end_col_offset=term.end_col_offset) } |
| | term { term } |
| ) |
| term: ( l=term '*' r=factor { ast.BinOp(l, ast.Mult(), r, lineno=l.lineno, col_offset=l.col_offset, end_lineno=r.end_lineno, end_col_offset=r.end_col_offset) } |
| | l=term '/' r=factor { ast.BinOp(l, ast.Div(), r, lineno=l.lineno, col_offset=l.col_offset, end_lineno=r.end_lineno, end_col_offset=r.end_col_offset) } |
| | factor { factor } |
| ) |
| factor: ( '(' expr ')' { expr } |
| | atom { atom } |
| ) |
| atom: ( n=NAME { ast.Name(id=n.string, ctx=ast.Load(), lineno=n.start[0], col_offset=n.start[1], end_lineno=n.end[0], end_col_offset=n.end[1]) } |
| | n=NUMBER { ast.Constant(value=ast.literal_eval(n.string), lineno=n.start[0], col_offset=n.start[1], end_lineno=n.end[0], end_col_offset=n.end[1]) } |
| ) |
| """ |
| parser_class = make_parser(grammar) |
| node = parse_string("(1 + 2*3 + 5)/(6 - 2)\n", parser_class) |
| code = compile(node, "", "eval") |
| val = eval(code) |
| self.assertEqual(val, 3.0) |
| |
| def test_nullable(self) -> None: |
| grammar_source = """ |
| start: sign NUMBER |
| sign: ['-' | '+'] |
| """ |
| grammar: Grammar = parse_string(grammar_source, GrammarParser) |
| rules = grammar.rules |
| nullables = compute_nullables(rules) |
| self.assertNotIn(rules["start"], nullables) # Not None! |
| self.assertIn(rules["sign"], nullables) |
| |
| def test_advanced_left_recursive(self) -> None: |
| grammar_source = """ |
| start: NUMBER | sign start |
| sign: ['-'] |
| """ |
| grammar: Grammar = parse_string(grammar_source, GrammarParser) |
| rules = grammar.rules |
| nullables = compute_nullables(rules) |
| compute_left_recursives(rules) |
| self.assertNotIn(rules["start"], nullables) # Not None! |
| self.assertIn(rules["sign"], nullables) |
| self.assertTrue(rules["start"].left_recursive) |
| self.assertFalse(rules["sign"].left_recursive) |
| |
| def test_mutually_left_recursive(self) -> None: |
| grammar_source = """ |
| start: foo 'E' |
| foo: bar 'A' | 'B' |
| bar: foo 'C' | 'D' |
| """ |
| grammar: Grammar = parse_string(grammar_source, GrammarParser) |
| out = io.StringIO() |
| genr = PythonParserGenerator(grammar, out) |
| rules = grammar.rules |
| self.assertFalse(rules["start"].left_recursive) |
| self.assertTrue(rules["foo"].left_recursive) |
| self.assertTrue(rules["bar"].left_recursive) |
| genr.generate("<string>") |
| ns: Dict[str, Any] = {} |
| exec(out.getvalue(), ns) |
| parser_class: Type[Parser] = ns["GeneratedParser"] |
| node = parse_string("D A C A E", parser_class) |
| |
| self.assertEqual( |
| node, |
| [ |
| [ |
| [ |
| [ |
| TokenInfo( |
| type=NAME, |
| string="D", |
| start=(1, 0), |
| end=(1, 1), |
| line="D A C A E", |
| ), |
| TokenInfo( |
| type=NAME, |
| string="A", |
| start=(1, 2), |
| end=(1, 3), |
| line="D A C A E", |
| ), |
| ], |
| TokenInfo( |
| type=NAME, |
| string="C", |
| start=(1, 4), |
| end=(1, 5), |
| line="D A C A E", |
| ), |
| ], |
| TokenInfo( |
| type=NAME, |
| string="A", |
| start=(1, 6), |
| end=(1, 7), |
| line="D A C A E", |
| ), |
| ], |
| TokenInfo( |
| type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E" |
| ), |
| ], |
| ) |
| node = parse_string("B C A E", parser_class) |
| self.assertEqual( |
| node, |
| [ |
| [ |
| [ |
| TokenInfo( |
| type=NAME, |
| string="B", |
| start=(1, 0), |
| end=(1, 1), |
| line="B C A E", |
| ), |
| TokenInfo( |
| type=NAME, |
| string="C", |
| start=(1, 2), |
| end=(1, 3), |
| line="B C A E", |
| ), |
| ], |
| TokenInfo( |
| type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E" |
| ), |
| ], |
| TokenInfo( |
| type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E" |
| ), |
| ], |
| ) |
| |
| def test_nasty_mutually_left_recursive(self) -> None: |
| # This grammar does not recognize 'x - + =', much to my chagrin. |
| # But that's the way PEG works. |
| # [Breathlessly] |
| # The problem is that the toplevel target call |
| # recurses into maybe, which recognizes 'x - +', |
| # and then the toplevel target looks for another '+', |
| # which fails, so it retreats to NAME, |
| # which succeeds, so we end up just recognizing 'x', |
| # and then start fails because there's no '=' after that. |
| grammar_source = """ |
| start: target '=' |
| target: maybe '+' | NAME |
| maybe: maybe '-' | target |
| """ |
| grammar: Grammar = parse_string(grammar_source, GrammarParser) |
| out = io.StringIO() |
| genr = PythonParserGenerator(grammar, out) |
| genr.generate("<string>") |
| ns: Dict[str, Any] = {} |
| exec(out.getvalue(), ns) |
| parser_class = ns["GeneratedParser"] |
| with self.assertRaises(SyntaxError): |
| parse_string("x - + =", parser_class) |
| |
| def test_lookahead(self) -> None: |
| grammar = """ |
| start: (expr_stmt | assign_stmt) &'.' |
| expr_stmt: !(target '=') expr |
| assign_stmt: target '=' expr |
| expr: term ('+' term)* |
| target: NAME |
| term: NUMBER |
| """ |
| parser_class = make_parser(grammar) |
| node = parse_string("foo = 12 + 12 .", parser_class) |
| self.assertEqual( |
| node, |
| [ |
| TokenInfo( |
| NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 ." |
| ), |
| TokenInfo( |
| OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 ." |
| ), |
| [ |
| TokenInfo( |
| NUMBER, |
| string="12", |
| start=(1, 6), |
| end=(1, 8), |
| line="foo = 12 + 12 .", |
| ), |
| [ |
| [ |
| TokenInfo( |
| OP, |
| string="+", |
| start=(1, 9), |
| end=(1, 10), |
| line="foo = 12 + 12 .", |
| ), |
| TokenInfo( |
| NUMBER, |
| string="12", |
| start=(1, 11), |
| end=(1, 13), |
| line="foo = 12 + 12 .", |
| ), |
| ] |
| ], |
| ], |
| ], |
| ) |
| |
| def test_named_lookahead_error(self) -> None: |
| grammar = """ |
| start: foo=!'x' NAME |
| """ |
| with self.assertRaises(SyntaxError): |
| make_parser(grammar) |
| |
| def test_start_leader(self) -> None: |
| grammar = """ |
| start: attr | NAME |
| attr: start '.' NAME |
| """ |
| # Would assert False without a special case in compute_left_recursives(). |
| make_parser(grammar) |
| |
| def test_opt_sequence(self) -> None: |
| grammar = """ |
| start: [NAME*] |
| """ |
| # This case was failing because of a double trailing comma at the end |
| # of a line in the generated source. See bpo-41044 |
| make_parser(grammar) |
| |
| def test_left_recursion_too_complex(self) -> None: |
| grammar = """ |
| start: foo |
| foo: bar '+' | baz '+' | '+' |
| bar: baz '-' | foo '-' | '-' |
| baz: foo '*' | bar '*' | '*' |
| """ |
| with self.assertRaises(ValueError) as errinfo: |
| make_parser(grammar) |
| self.assertTrue("no leader" in str(errinfo.exception.value)) |
| |
| def test_cut(self) -> None: |
| grammar = """ |
| start: '(' ~ expr ')' |
| expr: NUMBER |
| """ |
| parser_class = make_parser(grammar) |
| node = parse_string("(1)", parser_class) |
| self.assertEqual( |
| node, |
| [ |
| TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"), |
| TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)"), |
| TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"), |
| ], |
| ) |
| |
| def test_dangling_reference(self) -> None: |
| grammar = """ |
| start: foo ENDMARKER |
| foo: bar NAME |
| """ |
| with self.assertRaises(GrammarError): |
| parser_class = make_parser(grammar) |
| |
| def test_bad_token_reference(self) -> None: |
| grammar = """ |
| start: foo |
| foo: NAMEE |
| """ |
| with self.assertRaises(GrammarError): |
| parser_class = make_parser(grammar) |
| |
| def test_missing_start(self) -> None: |
| grammar = """ |
| foo: NAME |
| """ |
| with self.assertRaises(GrammarError): |
| parser_class = make_parser(grammar) |
| |
| def test_invalid_rule_name(self) -> None: |
| grammar = """ |
| start: _a b |
| _a: 'a' |
| b: 'b' |
| """ |
| with self.assertRaisesRegex(GrammarError, "cannot start with underscore: '_a'"): |
| parser_class = make_parser(grammar) |
| |
| def test_invalid_variable_name(self) -> None: |
| grammar = """ |
| start: a b |
| a: _x='a' |
| b: 'b' |
| """ |
| with self.assertRaisesRegex(GrammarError, "cannot start with underscore: '_x'"): |
| parser_class = make_parser(grammar) |
| |
| def test_invalid_variable_name_in_temporal_rule(self) -> None: |
| grammar = """ |
| start: a b |
| a: (_x='a' | 'b') | 'c' |
| b: 'b' |
| """ |
| with self.assertRaisesRegex(GrammarError, "cannot start with underscore: '_x'"): |
| parser_class = make_parser(grammar) |
| |
| def test_soft_keyword(self) -> None: |
| grammar = """ |
| start: |
| | "number" n=NUMBER { eval(n.string) } |
| | "string" n=STRING { n.string } |
| | SOFT_KEYWORD l=NAME n=(NUMBER | NAME | STRING) { f"{l.string} = {n.string}"} |
| """ |
| parser_class = make_parser(grammar) |
| self.assertEqual(parse_string("number 1", parser_class, verbose=True), 1) |
| self.assertEqual(parse_string("string 'b'", parser_class, verbose=True), "'b'") |
| self.assertEqual( |
| parse_string("number test 1", parser_class, verbose=True), "test = 1" |
| ) |
| assert ( |
| parse_string("string test 'b'", parser_class, verbose=True) == "test = 'b'" |
| ) |
| with self.assertRaises(SyntaxError): |
| parse_string("test 1", parser_class, verbose=True) |
| |
| def test_forced(self) -> None: |
| grammar = """ |
| start: NAME &&':' | NAME |
| """ |
| parser_class = make_parser(grammar) |
| self.assertTrue(parse_string("number :", parser_class, verbose=True)) |
| with self.assertRaises(SyntaxError) as e: |
| parse_string("a", parser_class, verbose=True) |
| |
| self.assertIn("expected ':'", str(e.exception)) |
| |
| def test_forced_with_group(self) -> None: |
| grammar = """ |
| start: NAME &&(':' | ';') | NAME |
| """ |
| parser_class = make_parser(grammar) |
| self.assertTrue(parse_string("number :", parser_class, verbose=True)) |
| self.assertTrue(parse_string("number ;", parser_class, verbose=True)) |
| with self.assertRaises(SyntaxError) as e: |
| parse_string("a", parser_class, verbose=True) |
| self.assertIn("expected (':' | ';')", e.exception.args[0]) |
| |
| def test_unreachable_explicit(self) -> None: |
| source = """ |
| start: NAME { UNREACHABLE } |
| """ |
| grammar = parse_string(source, GrammarParser) |
| out = io.StringIO() |
| genr = PythonParserGenerator( |
| grammar, out, unreachable_formatting="This is a test" |
| ) |
| genr.generate("<string>") |
| self.assertIn("This is a test", out.getvalue()) |
| |
| def test_unreachable_implicit1(self) -> None: |
| source = """ |
| start: NAME | invalid_input |
| invalid_input: NUMBER { None } |
| """ |
| grammar = parse_string(source, GrammarParser) |
| out = io.StringIO() |
| genr = PythonParserGenerator( |
| grammar, out, unreachable_formatting="This is a test" |
| ) |
| genr.generate("<string>") |
| self.assertIn("This is a test", out.getvalue()) |
| |
| def test_unreachable_implicit2(self) -> None: |
| source = """ |
| start: NAME | '(' invalid_input ')' |
| invalid_input: NUMBER { None } |
| """ |
| grammar = parse_string(source, GrammarParser) |
| out = io.StringIO() |
| genr = PythonParserGenerator( |
| grammar, out, unreachable_formatting="This is a test" |
| ) |
| genr.generate("<string>") |
| self.assertIn("This is a test", out.getvalue()) |
| |
| def test_unreachable_implicit3(self) -> None: |
| source = """ |
| start: NAME | invalid_input { None } |
| invalid_input: NUMBER |
| """ |
| grammar = parse_string(source, GrammarParser) |
| out = io.StringIO() |
| genr = PythonParserGenerator( |
| grammar, out, unreachable_formatting="This is a test" |
| ) |
| genr.generate("<string>") |
| self.assertNotIn("This is a test", out.getvalue()) |
| |
| def test_locations_in_alt_action_and_group(self) -> None: |
| grammar = """ |
| start: t=term NEWLINE? $ { ast.Expression(t, LOCATIONS) } |
| term: |
| | l=term '*' r=factor { ast.BinOp(l, ast.Mult(), r, LOCATIONS) } |
| | l=term '/' r=factor { ast.BinOp(l, ast.Div(), r, LOCATIONS) } |
| | factor |
| factor: |
| | ( |
| n=NAME { ast.Name(id=n.string, ctx=ast.Load(), LOCATIONS) } | |
| n=NUMBER { ast.Constant(value=ast.literal_eval(n.string), LOCATIONS) } |
| ) |
| """ |
| parser_class = make_parser(grammar) |
| source = "2*3\n" |
| o = ast.dump(parse_string(source, parser_class).body, include_attributes=True) |
| p = ast.dump(ast.parse(source).body[0].value, include_attributes=True).replace( |
| " kind=None,", "" |
| ) |
| diff = "\n".join( |
| difflib.unified_diff( |
| o.split("\n"), p.split("\n"), "cpython", "python-pegen" |
| ) |
| ) |
| self.assertFalse(diff) |
| |
| |
| class TestGrammarVisitor: |
| class Visitor(GrammarVisitor): |
| def __init__(self) -> None: |
| self.n_nodes = 0 |
| |
| def visit(self, node: Any, *args: Any, **kwargs: Any) -> None: |
| self.n_nodes += 1 |
| super().visit(node, *args, **kwargs) |
| |
| def test_parse_trivial_grammar(self) -> None: |
| grammar = """ |
| start: 'a' |
| """ |
| rules = parse_string(grammar, GrammarParser) |
| visitor = self.Visitor() |
| |
| visitor.visit(rules) |
| |
| self.assertEqual(visitor.n_nodes, 6) |
| |
| def test_parse_or_grammar(self) -> None: |
| grammar = """ |
| start: rule |
| rule: 'a' | 'b' |
| """ |
| rules = parse_string(grammar, GrammarParser) |
| visitor = self.Visitor() |
| |
| visitor.visit(rules) |
| |
| # Grammar/Rule/Rhs/Alt/NamedItem/NameLeaf -> 6 |
| # Rule/Rhs/ -> 2 |
| # Alt/NamedItem/StringLeaf -> 3 |
| # Alt/NamedItem/StringLeaf -> 3 |
| |
| self.assertEqual(visitor.n_nodes, 14) |
| |
| def test_parse_repeat1_grammar(self) -> None: |
| grammar = """ |
| start: 'a'+ |
| """ |
| rules = parse_string(grammar, GrammarParser) |
| visitor = self.Visitor() |
| |
| visitor.visit(rules) |
| |
| # Grammar/Rule/Rhs/Alt/NamedItem/Repeat1/StringLeaf -> 6 |
| self.assertEqual(visitor.n_nodes, 7) |
| |
| def test_parse_repeat0_grammar(self) -> None: |
| grammar = """ |
| start: 'a'* |
| """ |
| rules = parse_string(grammar, GrammarParser) |
| visitor = self.Visitor() |
| |
| visitor.visit(rules) |
| |
| # Grammar/Rule/Rhs/Alt/NamedItem/Repeat0/StringLeaf -> 6 |
| |
| self.assertEqual(visitor.n_nodes, 7) |
| |
| def test_parse_optional_grammar(self) -> None: |
| grammar = """ |
| start: 'a' ['b'] |
| """ |
| rules = parse_string(grammar, GrammarParser) |
| visitor = self.Visitor() |
| |
| visitor.visit(rules) |
| |
| # Grammar/Rule/Rhs/Alt/NamedItem/StringLeaf -> 6 |
| # NamedItem/Opt/Rhs/Alt/NamedItem/Stringleaf -> 6 |
| |
| self.assertEqual(visitor.n_nodes, 12) |
| |
| |
| class TestGrammarVisualizer(unittest.TestCase): |
| def test_simple_rule(self) -> None: |
| grammar = """ |
| start: 'a' 'b' |
| """ |
| rules = parse_string(grammar, GrammarParser) |
| |
| printer = ASTGrammarPrinter() |
| lines: List[str] = [] |
| printer.print_grammar_ast(rules, printer=lines.append) |
| |
| output = "\n".join(lines) |
| expected_output = textwrap.dedent( |
| """\ |
| └──Rule |
| └──Rhs |
| └──Alt |
| ├──NamedItem |
| │ └──StringLeaf("'a'") |
| └──NamedItem |
| └──StringLeaf("'b'") |
| """ |
| ) |
| |
| self.assertEqual(output, expected_output) |
| |
| def test_multiple_rules(self) -> None: |
| grammar = """ |
| start: a b |
| a: 'a' |
| b: 'b' |
| """ |
| rules = parse_string(grammar, GrammarParser) |
| |
| printer = ASTGrammarPrinter() |
| lines: List[str] = [] |
| printer.print_grammar_ast(rules, printer=lines.append) |
| |
| output = "\n".join(lines) |
| expected_output = textwrap.dedent( |
| """\ |
| └──Rule |
| └──Rhs |
| └──Alt |
| ├──NamedItem |
| │ └──NameLeaf('a') |
| └──NamedItem |
| └──NameLeaf('b') |
| |
| └──Rule |
| └──Rhs |
| └──Alt |
| └──NamedItem |
| └──StringLeaf("'a'") |
| |
| └──Rule |
| └──Rhs |
| └──Alt |
| └──NamedItem |
| └──StringLeaf("'b'") |
| """ |
| ) |
| |
| self.assertEqual(output, expected_output) |
| |
| def test_deep_nested_rule(self) -> None: |
| grammar = """ |
| start: 'a' ['b'['c'['d']]] |
| """ |
| rules = parse_string(grammar, GrammarParser) |
| |
| printer = ASTGrammarPrinter() |
| lines: List[str] = [] |
| printer.print_grammar_ast(rules, printer=lines.append) |
| |
| output = "\n".join(lines) |
| expected_output = textwrap.dedent( |
| """\ |
| └──Rule |
| └──Rhs |
| └──Alt |
| ├──NamedItem |
| │ └──StringLeaf("'a'") |
| └──NamedItem |
| └──Opt |
| └──Rhs |
| └──Alt |
| ├──NamedItem |
| │ └──StringLeaf("'b'") |
| └──NamedItem |
| └──Opt |
| └──Rhs |
| └──Alt |
| ├──NamedItem |
| │ └──StringLeaf("'c'") |
| └──NamedItem |
| └──Opt |
| └──Rhs |
| └──Alt |
| └──NamedItem |
| └──StringLeaf("'d'") |
| """ |
| ) |
| |
| self.assertEqual(output, expected_output) |