Parser/pgen/automata.py - platform/external/python/cpython3 - Git at Google

 """Classes representing state-machine concepts"""

 class NFA:
     """A non deterministic finite automata

     A non deterministic automata is a form of a finite state
     machine. An NFA's rules are less restrictive than a DFA.
     The NFA rules are:

       * A transition can be non-deterministic and can result in
         nothing, one, or two or more states.

       * An epsilon transition consuming empty input is valid.
         Transitions consuming labeled symbols are also permitted.

     This class assumes that there is only one starting state and one
     accepting (ending) state.

     Attributes:
         name (str): The name of the rule the NFA is representing.
         start (NFAState): The starting state.
         end (NFAState): The ending state
     """

     def __init__(self, start, end):
         self.name = start.rule_name
         self.start = start
         self.end = end

     def __repr__(self):
         return "NFA(start={}, end={})".format(self.start, self.end)

     def dump(self, writer=print):
         """Dump a graphical representation of the NFA"""
         todo = [self.start]
         for i, state in enumerate(todo):
             writer("  State", i, state is self.end and "(final)" or "")
             for arc in state.arcs:
                 label = arc.label
                 next = arc.target
                 if next in todo:
                     j = todo.index(next)
                 else:
                     j = len(todo)
                     todo.append(next)
                 if label is None:
                     writer("    -> %d" % j)
                 else:
                     writer("    %s -> %d" % (label, j))

     def dump_graph(self, writer):
         """Dump a DOT representation of the NFA"""
         writer('digraph %s_nfa {\n' % self.name)
         todo = [self.start]
         for i, state in enumerate(todo):
             writer(' %d [label="State %d %s"];\n' % (i, i, state is self.end and "(final)" or ""))
             for arc in state.arcs:
                 label = arc.label
                 next = arc.target
                 if next in todo:
                     j = todo.index(next)
                 else:
                     j = len(todo)
                     todo.append(next)
                 if label is None:
                     writer(" %d -> %d [style=dotted label=ε];\n" % (i, j))
                 else:
                     writer(" %d -> %d [label=%s];\n" % (i, j, label.replace("'", '"')))
         writer('}\n')


 class NFAArc:
     """An arc representing a transition between two NFA states.

     NFA states can be connected via two ways:

         * A label transition: An input equal to the label must
           be consumed to perform the transition.
         * An epsilon transition: The transition can be taken without
           consuming any input symbol.

         Attributes:
             target (NFAState): The ending state of the transition arc.
             label (Optional[str]): The label that must be consumed to make
                 the transition. An epsilon transition is represented
                 using `None`.
     """

     def __init__(self, target, label):
         self.target = target
         self.label = label

     def __repr__(self):
         return "<%s: %s>" % (self.__class__.__name__, self.label)


 class NFAState:
     """A state of a NFA, non deterministic finite automata.

     Attributes:
         target (rule_name): The name of the rule used to represent the NFA's
             ending state after a transition.
         arcs (Dict[Optional[str], NFAState]): A mapping representing transitions
             between the current NFA state and another NFA state via following
             a label.
     """

     def __init__(self, rule_name):
         self.rule_name = rule_name
         self.arcs = []

     def add_arc(self, target, label=None):
         """Add a new arc to connect the state to a target state within the NFA

         The method adds a new arc to the list of arcs available as transitions
         from the present state. An optional label indicates a named transition
         that consumes an input while the absence of a label represents an epsilon
         transition.

         Attributes:
             target (NFAState): The end of the transition that the arc represents.
             label (Optional[str]): The label that must be consumed for making
                 the transition. If the label is not provided the transition is assumed
                 to be an epsilon-transition.
         """
         assert label is None or isinstance(label, str)
         assert isinstance(target, NFAState)
         self.arcs.append(NFAArc(target, label))

     def __repr__(self):
         return "<%s: from %s>" % (self.__class__.__name__, self.rule_name)


 class DFA:
     """A deterministic finite automata

     A deterministic finite automata is a form of a finite state machine
     that obeys the following rules:

        * Each of the transitions is uniquely determined by
          the source state and input symbol
        * Reading an input symbol is required for each state
          transition (no epsilon transitions).

     The finite-state machine will accept or reject a string of symbols
     and only produces a unique computation of the automaton for each input
     string. The DFA must have a unique starting state (represented as the first
     element in the list of states) but can have multiple final states.

     Attributes:
         name (str): The name of the rule the DFA is representing.
         states (List[DFAState]): A collection of DFA states.
     """

     def __init__(self, name, states):
         self.name = name
         self.states = states

     @classmethod
     def from_nfa(cls, nfa):
         """Constructs a DFA from a NFA using the Rabin–Scott construction algorithm.

         To simulate the operation of a DFA on a given input string, it's
         necessary to keep track of a single state at any time, or more precisely,
         the state that the automaton will reach after seeing a prefix of the
         input. In contrast, to simulate an NFA, it's necessary to keep track of
         a set of states: all of the states that the automaton could reach after
         seeing the same prefix of the input, according to the nondeterministic
         choices made by the automaton. There are two possible sources of
         non-determinism:

         1) Multiple (one or more) transitions with the same label

                          'A'     +-------+
                     +----------->+ State +----------->+
                     |            |   2   |
             +-------+            +-------+
             | State |
             |   1   |            +-------+
             +-------+            | State |
                     +----------->+   3   +----------->+
                          'A'     +-------+

         2) Epsilon transitions (transitions that can be taken without consuming any input)

             +-------+            +-------+
             | State |     ε      | State |
             |   1   +----------->+   2   +----------->+
             +-------+            +-------+

         Looking at the first case above, we can't determine which transition should be
         followed when given an input A. We could choose whether or not to follow the
         transition while in the second case the problem is that we can choose both to
         follow the transition or not doing it. To solve this problem we can imagine that
         we follow all possibilities at the same time and we construct new states from the
         set of all possible reachable states. For every case in the previous example:


         1) For multiple transitions with the same label we colapse all of the
            final states under the same one

             +-------+            +-------+
             | State |     'A'    | State |
             |   1   +----------->+  2-3  +----------->+
             +-------+            +-------+

         2) For epsilon transitions we collapse all epsilon-reachable states
            into the same one

             +-------+
             | State |
             |  1-2  +----------->
             +-------+

         Because the DFA states consist of sets of NFA states, an n-state NFA
         may be converted to a DFA with at most 2**n states. Notice that the
         constructed DFA is not minimal and can be simplified or reduced
         afterwards.

         Parameters:
             name (NFA): The NFA to transform to DFA.
         """
         assert isinstance(nfa, NFA)

         def add_closure(nfa_state, base_nfa_set):
             """Calculate the epsilon-closure of a given state

             Add to the *base_nfa_set* all the states that are
             reachable from *nfa_state* via epsilon-transitions.
             """
             assert isinstance(nfa_state, NFAState)
             if nfa_state in base_nfa_set:
                 return
             base_nfa_set.add(nfa_state)
             for nfa_arc in nfa_state.arcs:
                 if nfa_arc.label is None:
                     add_closure(nfa_arc.target, base_nfa_set)

         # Calculate the epsilon-closure of the starting state
         base_nfa_set = set()
         add_closure(nfa.start, base_nfa_set)

         # Start by visiting the NFA starting state (there is only one).
         states = [DFAState(nfa.name, base_nfa_set, nfa.end)]

         for state in states:  # NB states grow while we're iterating

             # Find transitions from the current state to other reachable states
             # and store them in mapping that correlates the label to all the
             # possible reachable states that can be obtained by consuming a
             # token equal to the label. Each set of all the states that can
             # be reached after following a label will be the a DFA state.
             arcs = {}
             for nfa_state in state.nfa_set:
                 for nfa_arc in nfa_state.arcs:
                     if nfa_arc.label is not None:
                         nfa_set = arcs.setdefault(nfa_arc.label, set())
                         # All states that can be reached by epsilon-transitions
                         # are also included in the set of reachable states.
                         add_closure(nfa_arc.target, nfa_set)

             # Now create new DFAs by visiting all posible transitions between
             # the current DFA state and the new power-set states (each nfa_set)
             # via the different labels. As the nodes are appended to *states* this
             # is performing a breadth-first search traversal over the power-set of
             # the states of the original NFA.
             for label, nfa_set in sorted(arcs.items()):
                 for exisisting_state in states:
                     if exisisting_state.nfa_set == nfa_set:
                         # The DFA state already exists for this rule.
                         next_state = exisisting_state
                         break
                 else:
                     next_state = DFAState(nfa.name, nfa_set, nfa.end)
                     states.append(next_state)

                 # Add a transition between the current DFA state and the new
                 # DFA state (the power-set state) via the current label.
                 state.add_arc(next_state, label)

         return cls(nfa.name, states)

     def __iter__(self):
         return iter(self.states)

     def simplify(self):
         """Attempt to reduce the number of states of the DFA

         Transform the DFA into an equivalent DFA that has fewer states. Two
         classes of states can be removed or merged from the original DFA without
         affecting the language it accepts to minimize it:

             * Unreachable states can not be reached from the initial
               state of the DFA, for any input string.
             * Nondistinguishable states are those that cannot be distinguished
               from one another for any input string.

         This algorithm does not achieve the optimal fully-reduced solution, but it
         works well enough for the particularities of the Python grammar. The
         algorithm repeatedly looks for two states that have the same set of
         arcs (same labels pointing to the same nodes) and unifies them, until
         things stop changing.
         """
         changes = True
         while changes:
             changes = False
             for i, state_i in enumerate(self.states):
                 for j in range(i + 1, len(self.states)):
                     state_j = self.states[j]
                     if state_i == state_j:
                         del self.states[j]
                         for state in self.states:
                             state.unifystate(state_j, state_i)
                         changes = True
                         break

     def dump(self, writer=print):
         """Dump a graphical representation of the DFA"""
         for i, state in enumerate(self.states):
             writer("  State", i, state.is_final and "(final)" or "")
             for label, next in sorted(state.arcs.items()):
                 writer("    %s -> %d" % (label, self.states.index(next)))

     def dump_graph(self, writer):
         """Dump a DOT representation of the DFA"""
         writer('digraph %s_dfa {\n' % self.name)
         for i, state in enumerate(self.states):
             writer(' %d [label="State %d %s"];\n' % (i, i, state.is_final and "(final)" or ""))
             for label, next in sorted(state.arcs.items()):
                 writer(" %d -> %d [label=%s];\n" % (i, self.states.index(next), label.replace("'", '"')))
         writer('}\n')


 class DFAState(object):
     """A state of a DFA

     Attributes:
         rule_name (rule_name): The name of the DFA rule containing the represented state.
         nfa_set (Set[NFAState]): The set of NFA states used to create this state.
         final (bool): True if the state represents an accepting state of the DFA
             containing this state.
         arcs (Dict[label, DFAState]): A mapping representing transitions between
             the current DFA state and another DFA state via following a label.
     """

     def __init__(self, rule_name, nfa_set, final):
         assert isinstance(nfa_set, set)
         assert isinstance(next(iter(nfa_set)), NFAState)
         assert isinstance(final, NFAState)
         self.rule_name = rule_name
         self.nfa_set = nfa_set
         self.arcs = {}  # map from terminals/nonterminals to DFAState
         self.is_final = final in nfa_set

     def add_arc(self, target, label):
         """Add a new arc to the current state.

         Parameters:
             target (DFAState): The DFA state at the end of the arc.
             label (str): The label respresenting the token that must be consumed
                 to perform this transition.
         """
         assert isinstance(label, str)
         assert label not in self.arcs
         assert isinstance(target, DFAState)
         self.arcs[label] = target

     def unifystate(self, old, new):
         """Replace all arcs from the current node to *old* with *new*.

         Parameters:
             old (DFAState): The  DFA state to remove from all existing arcs.
             new (DFAState): The DFA state to replace in all existing arcs.
         """
         for label, next_ in self.arcs.items():
             if next_ is old:
                 self.arcs[label] = new

     def __eq__(self, other):
         # The nfa_set does not matter for  equality
         assert isinstance(other, DFAState)
         if self.is_final != other.is_final:
             return False
         # We cannot just return self.arcs == other.arcs because that
         # would invoke this method recursively if there are any cycles.
         if len(self.arcs) != len(other.arcs):
             return False
         for label, next_ in self.arcs.items():
             if next_ is not other.arcs.get(label):
                 return False
         return True

     __hash__ = None  # For Py3 compatibility.

     def __repr__(self):
         return "<%s: %s is_final=%s>" % (
             self.__class__.__name__,
             self.rule_name,
             self.is_final,
         )
	"""Classes representing state-machine concepts"""

	class NFA:
	"""A non deterministic finite automata

	A non deterministic automata is a form of a finite state
	machine. An NFA's rules are less restrictive than a DFA.
	The NFA rules are:

	* A transition can be non-deterministic and can result in
	nothing, one, or two or more states.

	* An epsilon transition consuming empty input is valid.
	Transitions consuming labeled symbols are also permitted.

	This class assumes that there is only one starting state and one
	accepting (ending) state.

	Attributes:
	name (str): The name of the rule the NFA is representing.
	start (NFAState): The starting state.
	end (NFAState): The ending state
	"""

	def __init__(self, start, end):
	self.name = start.rule_name
	self.start = start
	self.end = end

	def __repr__(self):
	return "NFA(start={}, end={})".format(self.start, self.end)

	def dump(self, writer=print):
	"""Dump a graphical representation of the NFA"""
	todo = [self.start]
	for i, state in enumerate(todo):
	writer(" State", i, state is self.end and "(final)" or "")
	for arc in state.arcs:
	label = arc.label
	next = arc.target
	if next in todo:
	j = todo.index(next)
	else:
	j = len(todo)
	todo.append(next)
	if label is None:
	writer(" -> %d" % j)
	else:
	writer(" %s -> %d" % (label, j))

	def dump_graph(self, writer):
	"""Dump a DOT representation of the NFA"""
	writer('digraph %s_nfa {\n' % self.name)
	todo = [self.start]
	for i, state in enumerate(todo):
	writer(' %d [label="State %d %s"];\n' % (i, i, state is self.end and "(final)" or ""))
	for arc in state.arcs:
	label = arc.label
	next = arc.target
	if next in todo:
	j = todo.index(next)
	else:
	j = len(todo)
	todo.append(next)
	if label is None:
	writer(" %d -> %d [style=dotted label=ε];\n" % (i, j))
	else:
	writer(" %d -> %d [label=%s];\n" % (i, j, label.replace("'", '"')))
	writer('}\n')


	class NFAArc:
	"""An arc representing a transition between two NFA states.

	NFA states can be connected via two ways:

	* A label transition: An input equal to the label must
	be consumed to perform the transition.
	* An epsilon transition: The transition can be taken without
	consuming any input symbol.

	Attributes:
	target (NFAState): The ending state of the transition arc.
	label (Optional[str]): The label that must be consumed to make
	the transition. An epsilon transition is represented
	using `None`.
	"""

	def __init__(self, target, label):
	self.target = target
	self.label = label

	def __repr__(self):
	return "<%s: %s>" % (self.__class__.__name__, self.label)


	class NFAState:
	"""A state of a NFA, non deterministic finite automata.

	Attributes:
	target (rule_name): The name of the rule used to represent the NFA's
	ending state after a transition.
	arcs (Dict[Optional[str], NFAState]): A mapping representing transitions
	between the current NFA state and another NFA state via following
	a label.
	"""

	def __init__(self, rule_name):
	self.rule_name = rule_name
	self.arcs = []

	def add_arc(self, target, label=None):
	"""Add a new arc to connect the state to a target state within the NFA

	The method adds a new arc to the list of arcs available as transitions
	from the present state. An optional label indicates a named transition
	that consumes an input while the absence of a label represents an epsilon
	transition.

	Attributes:
	target (NFAState): The end of the transition that the arc represents.
	label (Optional[str]): The label that must be consumed for making
	the transition. If the label is not provided the transition is assumed
	to be an epsilon-transition.
	"""
	assert label is None or isinstance(label, str)
	assert isinstance(target, NFAState)
	self.arcs.append(NFAArc(target, label))

	def __repr__(self):
	return "<%s: from %s>" % (self.__class__.__name__, self.rule_name)


	class DFA:
	"""A deterministic finite automata

	A deterministic finite automata is a form of a finite state machine
	that obeys the following rules:

	* Each of the transitions is uniquely determined by
	the source state and input symbol
	* Reading an input symbol is required for each state
	transition (no epsilon transitions).

	The finite-state machine will accept or reject a string of symbols
	and only produces a unique computation of the automaton for each input
	string. The DFA must have a unique starting state (represented as the first
	element in the list of states) but can have multiple final states.

	Attributes:
	name (str): The name of the rule the DFA is representing.
	states (List[DFAState]): A collection of DFA states.
	"""

	def __init__(self, name, states):
	self.name = name
	self.states = states

	@classmethod
	def from_nfa(cls, nfa):
	"""Constructs a DFA from a NFA using the Rabin–Scott construction algorithm.

	To simulate the operation of a DFA on a given input string, it's
	necessary to keep track of a single state at any time, or more precisely,
	the state that the automaton will reach after seeing a prefix of the
	input. In contrast, to simulate an NFA, it's necessary to keep track of
	a set of states: all of the states that the automaton could reach after
	seeing the same prefix of the input, according to the nondeterministic
	choices made by the automaton. There are two possible sources of
	non-determinism:

	1) Multiple (one or more) transitions with the same label

	'A' +-------+
	+----------->+ State +----------->+
	\| \| 2 \|
	+-------+ +-------+
	\| State \|
	\| 1 \| +-------+
	+-------+ \| State \|
	+----------->+ 3 +----------->+
	'A' +-------+

	2) Epsilon transitions (transitions that can be taken without consuming any input)

	+-------+ +-------+
	\| State \| ε \| State \|
	\| 1 +----------->+ 2 +----------->+
	+-------+ +-------+

	Looking at the first case above, we can't determine which transition should be
	followed when given an input A. We could choose whether or not to follow the
	transition while in the second case the problem is that we can choose both to
	follow the transition or not doing it. To solve this problem we can imagine that
	we follow all possibilities at the same time and we construct new states from the
	set of all possible reachable states. For every case in the previous example:


	1) For multiple transitions with the same label we colapse all of the
	final states under the same one

	+-------+ +-------+
	\| State \| 'A' \| State \|
	\| 1 +----------->+ 2-3 +----------->+
	+-------+ +-------+

	2) For epsilon transitions we collapse all epsilon-reachable states
	into the same one

	+-------+
	\| State \|
	\| 1-2 +----------->
	+-------+

	Because the DFA states consist of sets of NFA states, an n-state NFA
	may be converted to a DFA with at most 2**n states. Notice that the
	constructed DFA is not minimal and can be simplified or reduced
	afterwards.

	Parameters:
	name (NFA): The NFA to transform to DFA.
	"""
	assert isinstance(nfa, NFA)

	def add_closure(nfa_state, base_nfa_set):
	"""Calculate the epsilon-closure of a given state

	Add to the base_nfa_set all the states that are
	reachable from nfa_state via epsilon-transitions.
	"""
	assert isinstance(nfa_state, NFAState)
	if nfa_state in base_nfa_set:
	return
	base_nfa_set.add(nfa_state)
	for nfa_arc in nfa_state.arcs:
	if nfa_arc.label is None:
	add_closure(nfa_arc.target, base_nfa_set)

	# Calculate the epsilon-closure of the starting state
	base_nfa_set = set()
	add_closure(nfa.start, base_nfa_set)

	# Start by visiting the NFA starting state (there is only one).
	states = [DFAState(nfa.name, base_nfa_set, nfa.end)]

	for state in states: # NB states grow while we're iterating

	# Find transitions from the current state to other reachable states
	# and store them in mapping that correlates the label to all the
	# possible reachable states that can be obtained by consuming a
	# token equal to the label. Each set of all the states that can
	# be reached after following a label will be the a DFA state.
	arcs = {}
	for nfa_state in state.nfa_set:
	for nfa_arc in nfa_state.arcs:
	if nfa_arc.label is not None:
	nfa_set = arcs.setdefault(nfa_arc.label, set())
	# All states that can be reached by epsilon-transitions
	# are also included in the set of reachable states.
	add_closure(nfa_arc.target, nfa_set)

	# Now create new DFAs by visiting all posible transitions between
	# the current DFA state and the new power-set states (each nfa_set)
	# via the different labels. As the nodes are appended to states this
	# is performing a breadth-first search traversal over the power-set of
	# the states of the original NFA.
	for label, nfa_set in sorted(arcs.items()):
	for exisisting_state in states:
	if exisisting_state.nfa_set == nfa_set:
	# The DFA state already exists for this rule.
	next_state = exisisting_state
	break
	else:
	next_state = DFAState(nfa.name, nfa_set, nfa.end)
	states.append(next_state)

	# Add a transition between the current DFA state and the new
	# DFA state (the power-set state) via the current label.
	state.add_arc(next_state, label)

	return cls(nfa.name, states)

	def __iter__(self):
	return iter(self.states)

	def simplify(self):
	"""Attempt to reduce the number of states of the DFA

	Transform the DFA into an equivalent DFA that has fewer states. Two
	classes of states can be removed or merged from the original DFA without
	affecting the language it accepts to minimize it:

	* Unreachable states can not be reached from the initial
	state of the DFA, for any input string.
	* Nondistinguishable states are those that cannot be distinguished
	from one another for any input string.

	This algorithm does not achieve the optimal fully-reduced solution, but it
	works well enough for the particularities of the Python grammar. The
	algorithm repeatedly looks for two states that have the same set of
	arcs (same labels pointing to the same nodes) and unifies them, until
	things stop changing.
	"""
	changes = True
	while changes:
	changes = False
	for i, state_i in enumerate(self.states):
	for j in range(i + 1, len(self.states)):
	state_j = self.states[j]
	if state_i == state_j:
	del self.states[j]
	for state in self.states:
	state.unifystate(state_j, state_i)
	changes = True
	break

	def dump(self, writer=print):
	"""Dump a graphical representation of the DFA"""
	for i, state in enumerate(self.states):
	writer(" State", i, state.is_final and "(final)" or "")
	for label, next in sorted(state.arcs.items()):
	writer(" %s -> %d" % (label, self.states.index(next)))

	def dump_graph(self, writer):
	"""Dump a DOT representation of the DFA"""
	writer('digraph %s_dfa {\n' % self.name)
	for i, state in enumerate(self.states):
	writer(' %d [label="State %d %s"];\n' % (i, i, state.is_final and "(final)" or ""))
	for label, next in sorted(state.arcs.items()):
	writer(" %d -> %d [label=%s];\n" % (i, self.states.index(next), label.replace("'", '"')))
	writer('}\n')


	class DFAState(object):
	"""A state of a DFA

	Attributes:
	rule_name (rule_name): The name of the DFA rule containing the represented state.
	nfa_set (Set[NFAState]): The set of NFA states used to create this state.
	final (bool): True if the state represents an accepting state of the DFA
	containing this state.
	arcs (Dict[label, DFAState]): A mapping representing transitions between
	the current DFA state and another DFA state via following a label.
	"""

	def __init__(self, rule_name, nfa_set, final):
	assert isinstance(nfa_set, set)
	assert isinstance(next(iter(nfa_set)), NFAState)
	assert isinstance(final, NFAState)
	self.rule_name = rule_name
	self.nfa_set = nfa_set
	self.arcs = {} # map from terminals/nonterminals to DFAState
	self.is_final = final in nfa_set

	def add_arc(self, target, label):
	"""Add a new arc to the current state.

	Parameters:
	target (DFAState): The DFA state at the end of the arc.
	label (str): The label respresenting the token that must be consumed
	to perform this transition.
	"""
	assert isinstance(label, str)
	assert label not in self.arcs
	assert isinstance(target, DFAState)
	self.arcs[label] = target

	def unifystate(self, old, new):
	"""Replace all arcs from the current node to old with new.

	Parameters:
	old (DFAState): The DFA state to remove from all existing arcs.
	new (DFAState): The DFA state to replace in all existing arcs.
	"""
	for label, next_ in self.arcs.items():
	if next_ is old:
	self.arcs[label] = new

	def __eq__(self, other):
	# The nfa_set does not matter for equality
	assert isinstance(other, DFAState)
	if self.is_final != other.is_final:
	return False
	# We cannot just return self.arcs == other.arcs because that
	# would invoke this method recursively if there are any cycles.
	if len(self.arcs) != len(other.arcs):
	return False
	for label, next_ in self.arcs.items():
	if next_ is not other.arcs.get(label):
	return False
	return True

	__hash__ = None # For Py3 compatibility.

	def __repr__(self):
	return "<%s: %s is_final=%s>" % (
	self.__class__.__name__,
	self.rule_name,
	self.is_final,
	)