Tools/peg_generator/pegen/parser_generator.py - platform/external/python/cpython3 - Git at Google

 import contextlib
 from abc import abstractmethod

 from typing import AbstractSet, Dict, IO, Iterator, List, Optional, Set, Text, Tuple

 from pegen import sccutils
 from pegen.grammar import (
     Grammar,
     Rule,
     Rhs,
     Alt,
     NamedItem,
     Plain,
     NameLeaf,
     Gather,
 )
 from pegen.grammar import GrammarError, GrammarVisitor


 class RuleCheckingVisitor(GrammarVisitor):
     def __init__(self, rules: Dict[str, Rule], tokens: Dict[int, str]):
         self.rules = rules
         self.tokens = tokens

     def visit_NameLeaf(self, node: NameLeaf) -> None:
         if node.value not in self.rules and node.value not in self.tokens.values():
             # TODO: Add line/col info to (leaf) nodes
             raise GrammarError(f"Dangling reference to rule {node.value!r}")

     def visit_NamedItem(self, node: NamedItem) -> None:
         if node.name and node.name.startswith("_"):
             raise GrammarError(f"Variable names cannot start with underscore: '{node.name}'")
         self.visit(node.item)


 class ParserGenerator:

     callmakervisitor: GrammarVisitor

     def __init__(self, grammar: Grammar, tokens: Dict[int, str], file: Optional[IO[Text]]):
         self.grammar = grammar
         self.tokens = tokens
         self.rules = grammar.rules
         self.validate_rule_names()
         if "trailer" not in grammar.metas and "start" not in self.rules:
             raise GrammarError("Grammar without a trailer must have a 'start' rule")
         checker = RuleCheckingVisitor(self.rules, self.tokens)
         for rule in self.rules.values():
             checker.visit(rule)
         self.file = file
         self.level = 0
         compute_nullables(self.rules)
         self.first_graph, self.first_sccs = compute_left_recursives(self.rules)
         self.todo = self.rules.copy()  # Rules to generate
         self.counter = 0  # For name_rule()/name_loop()
         self.keyword_counter = 499  # For keyword_type()
         self.all_rules: Dict[str, Rule] = {}  # Rules + temporal rules
         self._local_variable_stack: List[List[str]] = []

     def validate_rule_names(self) -> None:
         for rule in self.rules:
             if rule.startswith("_"):
                 raise GrammarError(f"Rule names cannot start with underscore: '{rule}'")

     @contextlib.contextmanager
     def local_variable_context(self) -> Iterator[None]:
         self._local_variable_stack.append([])
         yield
         self._local_variable_stack.pop()

     @property
     def local_variable_names(self) -> List[str]:
         return self._local_variable_stack[-1]

     @abstractmethod
     def generate(self, filename: str) -> None:
         raise NotImplementedError

     @contextlib.contextmanager
     def indent(self) -> Iterator[None]:
         self.level += 1
         try:
             yield
         finally:
             self.level -= 1

     def print(self, *args: object) -> None:
         if not args:
             print(file=self.file)
         else:
             print("    " * self.level, end="", file=self.file)
             print(*args, file=self.file)

     def printblock(self, lines: str) -> None:
         for line in lines.splitlines():
             self.print(line)

     def collect_todo(self) -> None:
         done: Set[str] = set()
         while True:
             alltodo = list(self.todo)
             self.all_rules.update(self.todo)
             todo = [i for i in alltodo if i not in done]
             if not todo:
                 break
             for rulename in todo:
                 self.todo[rulename].collect_todo(self)
             done = set(alltodo)

     def keyword_type(self) -> int:
         self.keyword_counter += 1
         return self.keyword_counter

     def name_node(self, rhs: Rhs) -> str:
         self.counter += 1
         name = f"_tmp_{self.counter}"  # TODO: Pick a nicer name.
         self.todo[name] = Rule(name, None, rhs)
         return name

     def name_loop(self, node: Plain, is_repeat1: bool) -> str:
         self.counter += 1
         if is_repeat1:
             prefix = "_loop1_"
         else:
             prefix = "_loop0_"
         name = f"{prefix}{self.counter}"  # TODO: It's ugly to signal via the name.
         self.todo[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])]))
         return name

     def name_gather(self, node: Gather) -> str:
         self.counter += 1
         name = f"_gather_{self.counter}"
         self.counter += 1
         extra_function_name = f"_loop0_{self.counter}"
         extra_function_alt = Alt(
             [NamedItem(None, node.separator), NamedItem("elem", node.node)], action="elem",
         )
         self.todo[extra_function_name] = Rule(
             extra_function_name, None, Rhs([extra_function_alt]),
         )
         alt = Alt([NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],)
         self.todo[name] = Rule(name, None, Rhs([alt]),)
         return name

     def dedupe(self, name: str) -> str:
         origname = name
         counter = 0
         while name in self.local_variable_names:
             counter += 1
             name = f"{origname}_{counter}"
         self.local_variable_names.append(name)
         return name


 def compute_nullables(rules: Dict[str, Rule]) -> None:
     """Compute which rules in a grammar are nullable.

     Thanks to TatSu (tatsu/leftrec.py) for inspiration.
     """
     for rule in rules.values():
         rule.nullable_visit(rules)


 def compute_left_recursives(
     rules: Dict[str, Rule]
 ) -> Tuple[Dict[str, AbstractSet[str]], List[AbstractSet[str]]]:
     graph = make_first_graph(rules)
     sccs = list(sccutils.strongly_connected_components(graph.keys(), graph))
     for scc in sccs:
         if len(scc) > 1:
             for name in scc:
                 rules[name].left_recursive = True
             # Try to find a leader such that all cycles go through it.
             leaders = set(scc)
             for start in scc:
                 for cycle in sccutils.find_cycles_in_scc(graph, scc, start):
                     # print("Cycle:", " -> ".join(cycle))
                     leaders -= scc - set(cycle)
                     if not leaders:
                         raise ValueError(
                             f"SCC {scc} has no leadership candidate (no element is included in all cycles)"
                         )
             # print("Leaders:", leaders)
             leader = min(leaders)  # Pick an arbitrary leader from the candidates.
             rules[leader].leader = True
         else:
             name = min(scc)  # The only element.
             if name in graph[name]:
                 rules[name].left_recursive = True
                 rules[name].leader = True
     return graph, sccs


 def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:
     """Compute the graph of left-invocations.

     There's an edge from A to B if A may invoke B at its initial
     position.

     Note that this requires the nullable flags to have been computed.
     """
     graph = {}
     vertices: Set[str] = set()
     for rulename, rhs in rules.items():
         graph[rulename] = names = rhs.initial_names()
         vertices |= names
     for vertex in vertices:
         graph.setdefault(vertex, set())
     return graph
	import contextlib
	from abc import abstractmethod

	from typing import AbstractSet, Dict, IO, Iterator, List, Optional, Set, Text, Tuple

	from pegen import sccutils
	from pegen.grammar import (
	Grammar,
	Rule,
	Rhs,
	Alt,
	NamedItem,
	Plain,
	NameLeaf,
	Gather,
	)
	from pegen.grammar import GrammarError, GrammarVisitor


	class RuleCheckingVisitor(GrammarVisitor):
	def __init__(self, rules: Dict[str, Rule], tokens: Dict[int, str]):
	self.rules = rules
	self.tokens = tokens

	def visit_NameLeaf(self, node: NameLeaf) -> None:
	if node.value not in self.rules and node.value not in self.tokens.values():
	# TODO: Add line/col info to (leaf) nodes
	raise GrammarError(f"Dangling reference to rule {node.value!r}")

	def visit_NamedItem(self, node: NamedItem) -> None:
	if node.name and node.name.startswith("_"):
	raise GrammarError(f"Variable names cannot start with underscore: '{node.name}'")
	self.visit(node.item)


	class ParserGenerator:

	callmakervisitor: GrammarVisitor

	def __init__(self, grammar: Grammar, tokens: Dict[int, str], file: Optional[IO[Text]]):
	self.grammar = grammar
	self.tokens = tokens
	self.rules = grammar.rules
	self.validate_rule_names()
	if "trailer" not in grammar.metas and "start" not in self.rules:
	raise GrammarError("Grammar without a trailer must have a 'start' rule")
	checker = RuleCheckingVisitor(self.rules, self.tokens)
	for rule in self.rules.values():
	checker.visit(rule)
	self.file = file
	self.level = 0
	compute_nullables(self.rules)
	self.first_graph, self.first_sccs = compute_left_recursives(self.rules)
	self.todo = self.rules.copy() # Rules to generate
	self.counter = 0 # For name_rule()/name_loop()
	self.keyword_counter = 499 # For keyword_type()
	self.all_rules: Dict[str, Rule] = {} # Rules + temporal rules
	self._local_variable_stack: List[List[str]] = []

	def validate_rule_names(self) -> None:
	for rule in self.rules:
	if rule.startswith("_"):
	raise GrammarError(f"Rule names cannot start with underscore: '{rule}'")

	@contextlib.contextmanager
	def local_variable_context(self) -> Iterator[None]:
	self._local_variable_stack.append([])
	yield
	self._local_variable_stack.pop()

	@property
	def local_variable_names(self) -> List[str]:
	return self._local_variable_stack[-1]

	@abstractmethod
	def generate(self, filename: str) -> None:
	raise NotImplementedError

	@contextlib.contextmanager
	def indent(self) -> Iterator[None]:
	self.level += 1
	try:
	yield
	finally:
	self.level -= 1

	def print(self, *args: object) -> None:
	if not args:
	print(file=self.file)
	else:
	print(" " * self.level, end="", file=self.file)
	print(*args, file=self.file)

	def printblock(self, lines: str) -> None:
	for line in lines.splitlines():
	self.print(line)

	def collect_todo(self) -> None:
	done: Set[str] = set()
	while True:
	alltodo = list(self.todo)
	self.all_rules.update(self.todo)
	todo = [i for i in alltodo if i not in done]
	if not todo:
	break
	for rulename in todo:
	self.todo[rulename].collect_todo(self)
	done = set(alltodo)

	def keyword_type(self) -> int:
	self.keyword_counter += 1
	return self.keyword_counter

	def name_node(self, rhs: Rhs) -> str:
	self.counter += 1
	name = f"_tmp_{self.counter}" # TODO: Pick a nicer name.
	self.todo[name] = Rule(name, None, rhs)
	return name

	def name_loop(self, node: Plain, is_repeat1: bool) -> str:
	self.counter += 1
	if is_repeat1:
	prefix = "_loop1_"
	else:
	prefix = "_loop0_"
	name = f"{prefix}{self.counter}" # TODO: It's ugly to signal via the name.
	self.todo[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])]))
	return name

	def name_gather(self, node: Gather) -> str:
	self.counter += 1
	name = f"_gather_{self.counter}"
	self.counter += 1
	extra_function_name = f"_loop0_{self.counter}"
	extra_function_alt = Alt(
	[NamedItem(None, node.separator), NamedItem("elem", node.node)], action="elem",
	)
	self.todo[extra_function_name] = Rule(
	extra_function_name, None, Rhs([extra_function_alt]),
	)
	alt = Alt([NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],)
	self.todo[name] = Rule(name, None, Rhs([alt]),)
	return name

	def dedupe(self, name: str) -> str:
	origname = name
	counter = 0
	while name in self.local_variable_names:
	counter += 1
	name = f"{origname}_{counter}"
	self.local_variable_names.append(name)
	return name


	def compute_nullables(rules: Dict[str, Rule]) -> None:
	"""Compute which rules in a grammar are nullable.

	Thanks to TatSu (tatsu/leftrec.py) for inspiration.
	"""
	for rule in rules.values():
	rule.nullable_visit(rules)


	def compute_left_recursives(
	rules: Dict[str, Rule]
	) -> Tuple[Dict[str, AbstractSet[str]], List[AbstractSet[str]]]:
	graph = make_first_graph(rules)
	sccs = list(sccutils.strongly_connected_components(graph.keys(), graph))
	for scc in sccs:
	if len(scc) > 1:
	for name in scc:
	rules[name].left_recursive = True
	# Try to find a leader such that all cycles go through it.
	leaders = set(scc)
	for start in scc:
	for cycle in sccutils.find_cycles_in_scc(graph, scc, start):
	# print("Cycle:", " -> ".join(cycle))
	leaders -= scc - set(cycle)
	if not leaders:
	raise ValueError(
	f"SCC {scc} has no leadership candidate (no element is included in all cycles)"
	)
	# print("Leaders:", leaders)
	leader = min(leaders) # Pick an arbitrary leader from the candidates.
	rules[leader].leader = True
	else:
	name = min(scc) # The only element.
	if name in graph[name]:
	rules[name].left_recursive = True
	rules[name].leader = True
	return graph, sccs


	def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:
	"""Compute the graph of left-invocations.

	There's an edge from A to B if A may invoke B at its initial
	position.

	Note that this requires the nullable flags to have been computed.
	"""
	graph = {}
	vertices: Set[str] = set()
	for rulename, rhs in rules.items():
	graph[rulename] = names = rhs.initial_names()
	vertices \|= names
	for vertex in vertices:
	graph.setdefault(vertex, set())
	return graph