| import contextlib |
| from abc import abstractmethod |
| |
| from typing import AbstractSet, Dict, IO, Iterator, List, Optional, Set, Text, Tuple |
| |
| from pegen import sccutils |
| from pegen.grammar import ( |
| Grammar, |
| Rule, |
| Rhs, |
| Alt, |
| NamedItem, |
| Plain, |
| NameLeaf, |
| Gather, |
| ) |
| from pegen.grammar import GrammarError, GrammarVisitor |
| |
| |
| class RuleCheckingVisitor(GrammarVisitor): |
| def __init__(self, rules: Dict[str, Rule], tokens: Dict[int, str]): |
| self.rules = rules |
| self.tokens = tokens |
| |
| def visit_NameLeaf(self, node: NameLeaf) -> None: |
| if node.value not in self.rules and node.value not in self.tokens.values(): |
| # TODO: Add line/col info to (leaf) nodes |
| raise GrammarError(f"Dangling reference to rule {node.value!r}") |
| |
| def visit_NamedItem(self, node: NamedItem) -> None: |
| if node.name and node.name.startswith("_"): |
| raise GrammarError(f"Variable names cannot start with underscore: '{node.name}'") |
| self.visit(node.item) |
| |
| |
| class ParserGenerator: |
| |
| callmakervisitor: GrammarVisitor |
| |
| def __init__(self, grammar: Grammar, tokens: Dict[int, str], file: Optional[IO[Text]]): |
| self.grammar = grammar |
| self.tokens = tokens |
| self.rules = grammar.rules |
| self.validate_rule_names() |
| if "trailer" not in grammar.metas and "start" not in self.rules: |
| raise GrammarError("Grammar without a trailer must have a 'start' rule") |
| checker = RuleCheckingVisitor(self.rules, self.tokens) |
| for rule in self.rules.values(): |
| checker.visit(rule) |
| self.file = file |
| self.level = 0 |
| compute_nullables(self.rules) |
| self.first_graph, self.first_sccs = compute_left_recursives(self.rules) |
| self.todo = self.rules.copy() # Rules to generate |
| self.counter = 0 # For name_rule()/name_loop() |
| self.keyword_counter = 499 # For keyword_type() |
| self.all_rules: Dict[str, Rule] = {} # Rules + temporal rules |
| self._local_variable_stack: List[List[str]] = [] |
| |
| def validate_rule_names(self) -> None: |
| for rule in self.rules: |
| if rule.startswith("_"): |
| raise GrammarError(f"Rule names cannot start with underscore: '{rule}'") |
| |
| @contextlib.contextmanager |
| def local_variable_context(self) -> Iterator[None]: |
| self._local_variable_stack.append([]) |
| yield |
| self._local_variable_stack.pop() |
| |
| @property |
| def local_variable_names(self) -> List[str]: |
| return self._local_variable_stack[-1] |
| |
| @abstractmethod |
| def generate(self, filename: str) -> None: |
| raise NotImplementedError |
| |
| @contextlib.contextmanager |
| def indent(self) -> Iterator[None]: |
| self.level += 1 |
| try: |
| yield |
| finally: |
| self.level -= 1 |
| |
| def print(self, *args: object) -> None: |
| if not args: |
| print(file=self.file) |
| else: |
| print(" " * self.level, end="", file=self.file) |
| print(*args, file=self.file) |
| |
| def printblock(self, lines: str) -> None: |
| for line in lines.splitlines(): |
| self.print(line) |
| |
| def collect_todo(self) -> None: |
| done: Set[str] = set() |
| while True: |
| alltodo = list(self.todo) |
| self.all_rules.update(self.todo) |
| todo = [i for i in alltodo if i not in done] |
| if not todo: |
| break |
| for rulename in todo: |
| self.todo[rulename].collect_todo(self) |
| done = set(alltodo) |
| |
| def keyword_type(self) -> int: |
| self.keyword_counter += 1 |
| return self.keyword_counter |
| |
| def name_node(self, rhs: Rhs) -> str: |
| self.counter += 1 |
| name = f"_tmp_{self.counter}" # TODO: Pick a nicer name. |
| self.todo[name] = Rule(name, None, rhs) |
| return name |
| |
| def name_loop(self, node: Plain, is_repeat1: bool) -> str: |
| self.counter += 1 |
| if is_repeat1: |
| prefix = "_loop1_" |
| else: |
| prefix = "_loop0_" |
| name = f"{prefix}{self.counter}" # TODO: It's ugly to signal via the name. |
| self.todo[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])])) |
| return name |
| |
| def name_gather(self, node: Gather) -> str: |
| self.counter += 1 |
| name = f"_gather_{self.counter}" |
| self.counter += 1 |
| extra_function_name = f"_loop0_{self.counter}" |
| extra_function_alt = Alt( |
| [NamedItem(None, node.separator), NamedItem("elem", node.node)], action="elem", |
| ) |
| self.todo[extra_function_name] = Rule( |
| extra_function_name, None, Rhs([extra_function_alt]), |
| ) |
| alt = Alt([NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],) |
| self.todo[name] = Rule(name, None, Rhs([alt]),) |
| return name |
| |
| def dedupe(self, name: str) -> str: |
| origname = name |
| counter = 0 |
| while name in self.local_variable_names: |
| counter += 1 |
| name = f"{origname}_{counter}" |
| self.local_variable_names.append(name) |
| return name |
| |
| |
| def compute_nullables(rules: Dict[str, Rule]) -> None: |
| """Compute which rules in a grammar are nullable. |
| |
| Thanks to TatSu (tatsu/leftrec.py) for inspiration. |
| """ |
| for rule in rules.values(): |
| rule.nullable_visit(rules) |
| |
| |
| def compute_left_recursives( |
| rules: Dict[str, Rule] |
| ) -> Tuple[Dict[str, AbstractSet[str]], List[AbstractSet[str]]]: |
| graph = make_first_graph(rules) |
| sccs = list(sccutils.strongly_connected_components(graph.keys(), graph)) |
| for scc in sccs: |
| if len(scc) > 1: |
| for name in scc: |
| rules[name].left_recursive = True |
| # Try to find a leader such that all cycles go through it. |
| leaders = set(scc) |
| for start in scc: |
| for cycle in sccutils.find_cycles_in_scc(graph, scc, start): |
| # print("Cycle:", " -> ".join(cycle)) |
| leaders -= scc - set(cycle) |
| if not leaders: |
| raise ValueError( |
| f"SCC {scc} has no leadership candidate (no element is included in all cycles)" |
| ) |
| # print("Leaders:", leaders) |
| leader = min(leaders) # Pick an arbitrary leader from the candidates. |
| rules[leader].leader = True |
| else: |
| name = min(scc) # The only element. |
| if name in graph[name]: |
| rules[name].left_recursive = True |
| rules[name].leader = True |
| return graph, sccs |
| |
| |
| def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]: |
| """Compute the graph of left-invocations. |
| |
| There's an edge from A to B if A may invoke B at its initial |
| position. |
| |
| Note that this requires the nullable flags to have been computed. |
| """ |
| graph = {} |
| vertices: Set[str] = set() |
| for rulename, rhs in rules.items(): |
| graph[rulename] = names = rhs.initial_names() |
| vertices |= names |
| for vertex in vertices: |
| graph.setdefault(vertex, set()) |
| return graph |