Parser/pgen/metaparser.py - platform/external/python/cpython3 - Git at Google

 """Parser for the Python metagrammar"""

 import io
 import tokenize  # from stdlib

 from .automata import NFA, NFAState


 class GrammarParser:
     """Parser for Python grammar files."""

     _translation_table = {
         tokenize.NAME: "NAME",
         tokenize.STRING: "STRING",
         tokenize.NEWLINE: "NEWLINE",
         tokenize.NL: "NL",
         tokenize.OP: "OP",
         tokenize.ENDMARKER: "ENDMARKER",
         tokenize.COMMENT: "COMMENT",
     }

     def __init__(self, grammar):
         self.grammar = grammar
         grammar_adaptor = io.StringIO(grammar)
         self.generator = tokenize.generate_tokens(grammar_adaptor.readline)
         self._gettoken()  # Initialize lookahead
         self._current_rule_name = None

     def parse(self):
         """Turn the grammar into a collection of NFAs"""
         # grammar: (NEWLINE | rule)* ENDMARKER
         while self.type != tokenize.ENDMARKER:
             while self.type == tokenize.NEWLINE:
                 self._gettoken()
             # rule: NAME ':' rhs NEWLINE
             self._current_rule_name = self._expect(tokenize.NAME)
             self._expect(tokenize.OP, ":")
             a, z = self._parse_rhs()
             self._expect(tokenize.NEWLINE)

             yield NFA(a, z)

     def _parse_rhs(self):
         # rhs: items ('|' items)*
         a, z = self._parse_items()
         if self.value != "|":
             return a, z
         else:
             aa = NFAState(self._current_rule_name)
             zz = NFAState(self._current_rule_name)
             while True:
                 # Allow to transit directly to the previous state and connect the end of the
                 # previous state to the end of the current one, effectively allowing to skip
                 # the current state.
                 aa.add_arc(a)
                 z.add_arc(zz)
                 if self.value != "|":
                     break

                 self._gettoken()
                 a, z = self._parse_items()
             return aa, zz

     def _parse_items(self):
         # items: item+
         a, b = self._parse_item()
         while self.type in (tokenize.NAME, tokenize.STRING) or self.value in ("(", "["):
             c, d = self._parse_item()
             # Allow a transition between the end of the previous state
             # and the beginning of the new one, connecting all the items
             # together. In this way we can only reach the end if we visit
             # all the items.
             b.add_arc(c)
             b = d
         return a, b

     def _parse_item(self):
         # item: '[' rhs ']' | atom ['+' | '*']
         if self.value == "[":
             self._gettoken()
             a, z = self._parse_rhs()
             self._expect(tokenize.OP, "]")
             # Make a transition from the beginning to the end so it is possible to
             # advance for free to the next state of this item # without consuming
             # anything from the rhs.
             a.add_arc(z)
             return a, z
         else:
             a, z = self._parse_atom()
             value = self.value
             if value not in ("+", "*"):
                 return a, z
             self._gettoken()
             z.add_arc(a)
             if value == "+":
                 # Create a cycle to the beginning so we go back to the old state in this
                 # item and repeat.
                 return a, z
             else:
                 # The end state is the same as the beginning, so we can cycle arbitrarily
                 # and end in the beginning if necessary.
                 return a, a

     def _parse_atom(self):
         # atom: '(' rhs ')' | NAME | STRING
         if self.value == "(":
             self._gettoken()
             a, z = self._parse_rhs()
             self._expect(tokenize.OP, ")")
             return a, z
         elif self.type in (tokenize.NAME, tokenize.STRING):
             a = NFAState(self._current_rule_name)
             z = NFAState(self._current_rule_name)
             # We can transit to the next state only if we consume the value.
             a.add_arc(z, self.value)
             self._gettoken()
             return a, z
         else:
             self._raise_error(
                 "expected (...) or NAME or STRING, got {} ({})",
                 self._translation_table.get(self.type, self.type),
                 self.value,
             )

     def _expect(self, type_, value=None):
         if self.type != type_:
             self._raise_error(
                 "expected {}, got {} ({})",
                 self._translation_table.get(type_, type_),
                 self._translation_table.get(self.type, self.type),
                 self.value,
             )
         if value is not None and self.value != value:
             self._raise_error("expected {}, got {}", value, self.value)
         value = self.value
         self._gettoken()
         return value

     def _gettoken(self):
         tup = next(self.generator)
         while tup[0] in (tokenize.COMMENT, tokenize.NL):
             tup = next(self.generator)
         self.type, self.value, self.begin, self.end, self.line = tup

     def _raise_error(self, msg, *args):
         if args:
             try:
                 msg = msg.format(*args)
             except Exception:
                 msg = " ".join([msg] + list(map(str, args)))
         line = self.grammar.splitlines()[self.begin[0] - 1]
         raise SyntaxError(msg, ("<grammar>", self.begin[0], self.begin[1], line))
	"""Parser for the Python metagrammar"""

	import io
	import tokenize # from stdlib

	from .automata import NFA, NFAState


	class GrammarParser:
	"""Parser for Python grammar files."""

	_translation_table = {
	tokenize.NAME: "NAME",
	tokenize.STRING: "STRING",
	tokenize.NEWLINE: "NEWLINE",
	tokenize.NL: "NL",
	tokenize.OP: "OP",
	tokenize.ENDMARKER: "ENDMARKER",
	tokenize.COMMENT: "COMMENT",
	}

	def __init__(self, grammar):
	self.grammar = grammar
	grammar_adaptor = io.StringIO(grammar)
	self.generator = tokenize.generate_tokens(grammar_adaptor.readline)
	self._gettoken() # Initialize lookahead
	self._current_rule_name = None

	def parse(self):
	"""Turn the grammar into a collection of NFAs"""
	# grammar: (NEWLINE \| rule)* ENDMARKER
	while self.type != tokenize.ENDMARKER:
	while self.type == tokenize.NEWLINE:
	self._gettoken()
	# rule: NAME ':' rhs NEWLINE
	self._current_rule_name = self._expect(tokenize.NAME)
	self._expect(tokenize.OP, ":")
	a, z = self._parse_rhs()
	self._expect(tokenize.NEWLINE)

	yield NFA(a, z)

	def _parse_rhs(self):
	# rhs: items ('\|' items)*
	a, z = self._parse_items()
	if self.value != "\|":
	return a, z
	else:
	aa = NFAState(self._current_rule_name)
	zz = NFAState(self._current_rule_name)
	while True:
	# Allow to transit directly to the previous state and connect the end of the
	# previous state to the end of the current one, effectively allowing to skip
	# the current state.
	aa.add_arc(a)
	z.add_arc(zz)
	if self.value != "\|":
	break

	self._gettoken()
	a, z = self._parse_items()
	return aa, zz

	def _parse_items(self):
	# items: item+
	a, b = self._parse_item()
	while self.type in (tokenize.NAME, tokenize.STRING) or self.value in ("(", "["):
	c, d = self._parse_item()
	# Allow a transition between the end of the previous state
	# and the beginning of the new one, connecting all the items
	# together. In this way we can only reach the end if we visit
	# all the items.
	b.add_arc(c)
	b = d
	return a, b

	def _parse_item(self):
	# item: '[' rhs ']' \| atom ['+' \| '*']
	if self.value == "[":
	self._gettoken()
	a, z = self._parse_rhs()
	self._expect(tokenize.OP, "]")
	# Make a transition from the beginning to the end so it is possible to
	# advance for free to the next state of this item # without consuming
	# anything from the rhs.
	a.add_arc(z)
	return a, z
	else:
	a, z = self._parse_atom()
	value = self.value
	if value not in ("+", "*"):
	return a, z
	self._gettoken()
	z.add_arc(a)
	if value == "+":
	# Create a cycle to the beginning so we go back to the old state in this
	# item and repeat.
	return a, z
	else:
	# The end state is the same as the beginning, so we can cycle arbitrarily
	# and end in the beginning if necessary.
	return a, a

	def _parse_atom(self):
	# atom: '(' rhs ')' \| NAME \| STRING
	if self.value == "(":
	self._gettoken()
	a, z = self._parse_rhs()
	self._expect(tokenize.OP, ")")
	return a, z
	elif self.type in (tokenize.NAME, tokenize.STRING):
	a = NFAState(self._current_rule_name)
	z = NFAState(self._current_rule_name)
	# We can transit to the next state only if we consume the value.
	a.add_arc(z, self.value)
	self._gettoken()
	return a, z
	else:
	self._raise_error(
	"expected (...) or NAME or STRING, got {} ({})",
	self._translation_table.get(self.type, self.type),
	self.value,
	)

	def _expect(self, type_, value=None):
	if self.type != type_:
	self._raise_error(
	"expected {}, got {} ({})",
	self._translation_table.get(type_, type_),
	self._translation_table.get(self.type, self.type),
	self.value,
	)
	if value is not None and self.value != value:
	self._raise_error("expected {}, got {}", value, self.value)
	value = self.value
	self._gettoken()
	return value

	def _gettoken(self):
	tup = next(self.generator)
	while tup[0] in (tokenize.COMMENT, tokenize.NL):
	tup = next(self.generator)
	self.type, self.value, self.begin, self.end, self.line = tup

	def _raise_error(self, msg, *args):
	if args:
	try:
	msg = msg.format(*args)
	except Exception:
	msg = " ".join([msg] + list(map(str, args)))
	line = self.grammar.splitlines()[self.begin[0] - 1]
	raise SyntaxError(msg, ("<grammar>", self.begin[0], self.begin[1], line))