Parser/pgen/grammar.py - platform/external/python/cpython3 - Git at Google

 import collections


 class Grammar:
     """Pgen parsing tables class.

     The instance variables are as follows:

     symbol2number -- a dict mapping symbol names to numbers.  Symbol
                      numbers are always 256 or higher, to distinguish
                      them from token numbers, which are between 0 and
                      255 (inclusive).

     number2symbol -- a dict mapping numbers to symbol names;
                      these two are each other's inverse.

     states        -- a list of DFAs, where each DFA is a list of
                      states, each state is a list of arcs, and each
                      arc is a (i, j) pair where i is a label and j is
                      a state number.  The DFA number is the index into
                      this list.  (This name is slightly confusing.)
                      Final states are represented by a special arc of
                      the form (0, j) where j is its own state number.

     dfas          -- a dict mapping symbol numbers to (DFA, first)
                      pairs, where DFA is an item from the states list
                      above, and first is a set of tokens that can
                      begin this grammar rule.

     labels        -- a list of (x, y) pairs where x is either a token
                      number or a symbol number, and y is either None
                      or a string; the strings are keywords.  The label
                      number is the index in this list; label numbers
                      are used to mark state transitions (arcs) in the
                      DFAs.

     start         -- the number of the grammar's start symbol.

     keywords      -- a dict mapping keyword strings to arc labels.

     tokens        -- a dict mapping token numbers to arc labels.

     """

     def __init__(self):
         self.symbol2number = collections.OrderedDict()
         self.number2symbol = collections.OrderedDict()
         self.states = []
         self.dfas = collections.OrderedDict()
         self.labels = [(0, "EMPTY")]
         self.keywords = collections.OrderedDict()
         self.tokens = collections.OrderedDict()
         self.symbol2label = collections.OrderedDict()
         self.start = 256

     def produce_graminit_h(self, writer):
         writer("/* Generated by Parser/pgen */\n\n")
         for number, symbol in self.number2symbol.items():
             writer("#define {} {}\n".format(symbol, number))

     def produce_graminit_c(self, writer):
         writer("/* Generated by Parser/pgen */\n\n")

         writer('#include "grammar.h"\n')
         writer("grammar _PyParser_Grammar;\n")

         self.print_dfas(writer)
         self.print_labels(writer)

         writer("grammar _PyParser_Grammar = {\n")
         writer("    {n_dfas},\n".format(n_dfas=len(self.dfas)))
         writer("    dfas,\n")
         writer("    {{{n_labels}, labels}},\n".format(n_labels=len(self.labels)))
         writer("    {start_number}\n".format(start_number=self.start))
         writer("};\n")

     def print_labels(self, writer):
         writer(
             "static const label labels[{n_labels}] = {{\n".format(n_labels=len(self.labels))
         )
         for label, name in self.labels:
             label_name = '"{}"'.format(name) if name is not None else 0
             writer(
                 '    {{{label}, {label_name}}},\n'.format(
                     label=label, label_name=label_name
                 )
             )
         writer("};\n")

     def print_dfas(self, writer):
         self.print_states(writer)
         writer("static const dfa dfas[{}] = {{\n".format(len(self.dfas)))
         for dfaindex, dfa_elem in enumerate(self.dfas.items()):
             symbol, (dfa, first_sets) = dfa_elem
             writer(
                 '    {{{dfa_symbol}, "{symbol_name}", '.format(
                     dfa_symbol=symbol, symbol_name=self.number2symbol[symbol]
                 )
                 + "{n_states}, states_{dfa_index},\n".format(
                     n_states=len(dfa), dfa_index=dfaindex
                 )
                 + '     "'
             )

             bitset = bytearray((len(self.labels) >> 3) + 1)
             for token in first_sets:
                 bitset[token >> 3] |= 1 << (token & 7)
             for byte in bitset:
                 writer("\\%03o" % (byte & 0xFF))
             writer('"},\n')
         writer("};\n")

     def print_states(self, write):
         for dfaindex, dfa in enumerate(self.states):
             self.print_arcs(write, dfaindex, dfa)
             write(
                 "static state states_{dfa_index}[{n_states}] = {{\n".format(
                     dfa_index=dfaindex, n_states=len(dfa)
                 )
             )
             for stateindex, state in enumerate(dfa):
                 narcs = len(state)
                 write(
                     "    {{{n_arcs}, arcs_{dfa_index}_{state_index}}},\n".format(
                         n_arcs=narcs, dfa_index=dfaindex, state_index=stateindex
                     )
                 )
             write("};\n")

     def print_arcs(self, write, dfaindex, states):
         for stateindex, state in enumerate(states):
             narcs = len(state)
             write(
                 "static const arc arcs_{dfa_index}_{state_index}[{n_arcs}] = {{\n".format(
                     dfa_index=dfaindex, state_index=stateindex, n_arcs=narcs
                 )
             )
             for a, b in state:
                 write(
                     "    {{{from_label}, {to_state}}},\n".format(
                         from_label=a, to_state=b
                     )
                 )
             write("};\n")
	import collections


	class Grammar:
	"""Pgen parsing tables class.

	The instance variables are as follows:

	symbol2number -- a dict mapping symbol names to numbers. Symbol
	numbers are always 256 or higher, to distinguish
	them from token numbers, which are between 0 and
	255 (inclusive).

	number2symbol -- a dict mapping numbers to symbol names;
	these two are each other's inverse.

	states -- a list of DFAs, where each DFA is a list of
	states, each state is a list of arcs, and each
	arc is a (i, j) pair where i is a label and j is
	a state number. The DFA number is the index into
	this list. (This name is slightly confusing.)
	Final states are represented by a special arc of
	the form (0, j) where j is its own state number.

	dfas -- a dict mapping symbol numbers to (DFA, first)
	pairs, where DFA is an item from the states list
	above, and first is a set of tokens that can
	begin this grammar rule.

	labels -- a list of (x, y) pairs where x is either a token
	number or a symbol number, and y is either None
	or a string; the strings are keywords. The label
	number is the index in this list; label numbers
	are used to mark state transitions (arcs) in the
	DFAs.

	start -- the number of the grammar's start symbol.

	keywords -- a dict mapping keyword strings to arc labels.

	tokens -- a dict mapping token numbers to arc labels.

	"""

	def __init__(self):
	self.symbol2number = collections.OrderedDict()
	self.number2symbol = collections.OrderedDict()
	self.states = []
	self.dfas = collections.OrderedDict()
	self.labels = [(0, "EMPTY")]
	self.keywords = collections.OrderedDict()
	self.tokens = collections.OrderedDict()
	self.symbol2label = collections.OrderedDict()
	self.start = 256

	def produce_graminit_h(self, writer):
	writer("/* Generated by Parser/pgen */\n\n")
	for number, symbol in self.number2symbol.items():
	writer("#define {} {}\n".format(symbol, number))

	def produce_graminit_c(self, writer):
	writer("/* Generated by Parser/pgen */\n\n")

	writer('#include "grammar.h"\n')
	writer("grammar _PyParser_Grammar;\n")

	self.print_dfas(writer)
	self.print_labels(writer)

	writer("grammar _PyParser_Grammar = {\n")
	writer(" {n_dfas},\n".format(n_dfas=len(self.dfas)))
	writer(" dfas,\n")
	writer(" {{{n_labels}, labels}},\n".format(n_labels=len(self.labels)))
	writer(" {start_number}\n".format(start_number=self.start))
	writer("};\n")

	def print_labels(self, writer):
	writer(
	"static const label labels[{n_labels}] = {{\n".format(n_labels=len(self.labels))
	)
	for label, name in self.labels:
	label_name = '"{}"'.format(name) if name is not None else 0
	writer(
	' {{{label}, {label_name}}},\n'.format(
	label=label, label_name=label_name
	)
	)
	writer("};\n")

	def print_dfas(self, writer):
	self.print_states(writer)
	writer("static const dfa dfas[{}] = {{\n".format(len(self.dfas)))
	for dfaindex, dfa_elem in enumerate(self.dfas.items()):
	symbol, (dfa, first_sets) = dfa_elem
	writer(
	' {{{dfa_symbol}, "{symbol_name}", '.format(
	dfa_symbol=symbol, symbol_name=self.number2symbol[symbol]
	)
	+ "{n_states}, states_{dfa_index},\n".format(
	n_states=len(dfa), dfa_index=dfaindex
	)
	+ ' "'
	)

	bitset = bytearray((len(self.labels) >> 3) + 1)
	for token in first_sets:
	bitset[token >> 3] \|= 1 << (token & 7)
	for byte in bitset:
	writer("\\%03o" % (byte & 0xFF))
	writer('"},\n')
	writer("};\n")

	def print_states(self, write):
	for dfaindex, dfa in enumerate(self.states):
	self.print_arcs(write, dfaindex, dfa)
	write(
	"static state states_{dfa_index}[{n_states}] = {{\n".format(
	dfa_index=dfaindex, n_states=len(dfa)
	)
	)
	for stateindex, state in enumerate(dfa):
	narcs = len(state)
	write(
	" {{{n_arcs}, arcs_{dfa_index}_{state_index}}},\n".format(
	n_arcs=narcs, dfa_index=dfaindex, state_index=stateindex
	)
	)
	write("};\n")

	def print_arcs(self, write, dfaindex, states):
	for stateindex, state in enumerate(states):
	narcs = len(state)
	write(
	"static const arc arcs_{dfa_index}_{state_index}[{n_arcs}] = {{\n".format(
	dfa_index=dfaindex, state_index=stateindex, n_arcs=narcs
	)
	)
	for a, b in state:
	write(
	" {{{from_label}, {to_state}}},\n".format(
	from_label=a, to_state=b
	)
	)
	write("};\n")