tool/src/main/java/org/antlr/analysis/DFAOptimizer.java - platform/external/antlr - Git at Google

 /*
  * [The "BSD license"]
  *  Copyright (c) 2010 Terence Parr
  *  All rights reserved.
  *
  *  Redistribution and use in source and binary forms, with or without
  *  modification, are permitted provided that the following conditions
  *  are met:
  *  1. Redistributions of source code must retain the above copyright
  *      notice, this list of conditions and the following disclaimer.
  *  2. Redistributions in binary form must reproduce the above copyright
  *      notice, this list of conditions and the following disclaimer in the
  *      documentation and/or other materials provided with the distribution.
  *  3. The name of the author may not be used to endorse or promote products
  *      derived from this software without specific prior written permission.
  *
  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 package org.antlr.analysis;

 import org.antlr.misc.Utils;
 import org.antlr.tool.Grammar;

 import java.util.HashSet;
 import java.util.Set;

 /** A module to perform optimizations on DFAs.
  *
  *  I could more easily (and more quickly) do some optimizations (such as
  *  PRUNE_EBNF_EXIT_BRANCHES) during DFA construction, but then it
  *  messes up the determinism checking.  For example, it looks like
  *  loop exit branches are unreachable if you prune exit branches
  *  during DFA construction and before determinism checks.
  *
  *  In general, ANTLR's NFA&rarr;DFA&rarr;codegen pipeline seems very robust
  *  to me which I attribute to a uniform and consistent set of data
  *  structures.  Regardless of what I want to "say"/implement, I do so
  *  within the confines of, for example, a DFA.  The code generator
  *  can then just generate code--it doesn't have to do much thinking.
  *  Putting optimizations in the code gen code really starts to make
  *  it a spagetti factory (uh oh, now I'm hungry!).  The pipeline is
  *  very testable; each stage has well defined input/output pairs.
  *
  *  ### Optimization: PRUNE_EBNF_EXIT_BRANCHES
  *
  *  There is no need to test EBNF block exit branches.  Not only is it
  *  an unneeded computation, but counter-intuitively, you actually get
  *  better errors. You can report an error at the missing or extra
  *  token rather than as soon as you've figured out you will fail.
  *
  *  Imagine optional block "( DOT CLASS )? SEMI".  ANTLR generates:
  *
  *  int alt=0;
  *  if ( input.LA(1)==DOT ) {
  *      alt=1;
  *  }
  *  else if ( input.LA(1)==SEMI ) {
  *      alt=2;
  *  }
  *
  *  Clearly, since Parser.match() will ultimately find the error, we
  *  do not want to report an error nor do we want to bother testing
  *  lookahead against what follows the (...)?  We want to generate
  *  simply "should I enter the subrule?":
  *
  *  int alt=2;
  *  if ( input.LA(1)==DOT ) {
  *      alt=1;
  *  }
  *
  *  NOTE 1. Greedy loops cannot be optimized in this way.  For example,
  *  "(greedy=false:'x'|.)* '\n'".  You specifically need the exit branch
  *  to tell you when to terminate the loop as the same input actually
  *  predicts one of the alts (i.e., staying in the loop).
  *
  *  NOTE 2.  I do not optimize cyclic DFAs at the moment as it doesn't
  *  seem to work. ;)  I'll have to investigate later to see what work I
  *  can do on cyclic DFAs to make them have fewer edges.  Might have
  *  something to do with the EOT token.
  *
  *  ### PRUNE_SUPERFLUOUS_EOT_EDGES
  *
  *  When a token is a subset of another such as the following rules, ANTLR
  *  quietly assumes the first token to resolve the ambiguity.
  *
  *  EQ			: '=' ;
  *  ASSIGNOP	: '=' | '+=' ;
  *
  *  It can yield states that have only a single edge on EOT to an accept
  *  state.  This is a waste and messes up my code generation. ;)  If
  *  Tokens rule DFA goes
  *
  * 		s0 -'='-&gt; s3 -EOT-&gt; s5 (accept)
  *
  *  then s5 should be pruned and s3 should be made an accept.  Do NOT do this
  *  for keyword versus ID as the state with EOT edge emanating from it will
  *  also have another edge.
  *
  *  ### Optimization: COLLAPSE_ALL_INCIDENT_EDGES
  *
  *  Done during DFA construction.  See method addTransition() in
  *  NFAToDFAConverter.
  *
  *  ### Optimization: MERGE_STOP_STATES
  *
  *  Done during DFA construction.  See addDFAState() in NFAToDFAConverter.
  */
 public class DFAOptimizer {
 	public static boolean PRUNE_EBNF_EXIT_BRANCHES = true;
 	public static boolean PRUNE_TOKENS_RULE_SUPERFLUOUS_EOT_EDGES = true;
 	public static boolean COLLAPSE_ALL_PARALLEL_EDGES = true;
 	public static boolean MERGE_STOP_STATES = true;

 	/** Used by DFA state machine generator to avoid infinite recursion
 	 *  resulting from cycles int the DFA.  This is a set of int state #s.
 	 *  This is a side-effect of calling optimize; can't clear after use
 	 *  because code gen needs it.
 	 */
 	protected Set<Integer> visited = new HashSet<Integer>();

     protected Grammar grammar;

     public DFAOptimizer(Grammar grammar) {
 		this.grammar = grammar;
     }

 	public void optimize() {
 		// optimize each DFA in this grammar
 		for (int decisionNumber=1;
 			 decisionNumber<=grammar.getNumberOfDecisions();
 			 decisionNumber++)
 		{
 			DFA dfa = grammar.getLookaheadDFA(decisionNumber);
 			optimize(dfa);
 		}
 	}

 	protected void optimize(DFA dfa) {
 		if ( dfa==null ) {
 			return; // nothing to do
 		}
 		/*
 		System.out.println("Optimize DFA "+dfa.decisionNFAStartState.decisionNumber+
 						   " num states="+dfa.getNumberOfStates());
 		*/
 		//long start = System.currentTimeMillis();
 		if ( PRUNE_EBNF_EXIT_BRANCHES && dfa.canInlineDecision() ) {
 			visited.clear();
 			int decisionType =
 				dfa.getNFADecisionStartState().decisionStateType;
 			if ( dfa.isGreedy() &&
 				 (decisionType==NFAState.OPTIONAL_BLOCK_START ||
 				 decisionType==NFAState.LOOPBACK) )
 			{
 				optimizeExitBranches(dfa.startState);
 			}
 		}
 		// If the Tokens rule has syntactically ambiguous rules, try to prune
 		if ( PRUNE_TOKENS_RULE_SUPERFLUOUS_EOT_EDGES &&
 			 dfa.isTokensRuleDecision() &&
 			 dfa.probe.stateToSyntacticallyAmbiguousTokensRuleAltsMap.size()>0 )
 		{
 			visited.clear();
 			optimizeEOTBranches(dfa.startState);
 		}

 		/* ack...code gen needs this, cannot optimize
 		visited.clear();
 		unlinkUnneededStateData(dfa.startState);
 		*/
 		//long stop = System.currentTimeMillis();
 		//System.out.println("minimized in "+(int)(stop-start)+" ms");
     }

 	protected void optimizeExitBranches(DFAState d) {
 		Integer sI = Utils.integer(d.stateNumber);
 		if ( visited.contains(sI) ) {
 			return; // already visited
 		}
 		visited.add(sI);
 		int nAlts = d.dfa.getNumberOfAlts();
 		for (int i = 0; i < d.getNumberOfTransitions(); i++) {
 			Transition edge = d.transition(i);
 			DFAState edgeTarget = ((DFAState)edge.target);
 			/*
 			System.out.println(d.stateNumber+"-"+
 							   edge.label.toString(d.dfa.nfa.grammar)+"->"+
 							   edgeTarget.stateNumber);
 			*/
 			// if target is an accept state and that alt is the exit alt
 			if ( edgeTarget.isAcceptState() &&
 				edgeTarget.getUniquelyPredictedAlt()==nAlts)
 			{
 				/*
 				System.out.println("ignoring transition "+i+" to max alt "+
 					d.dfa.getNumberOfAlts());
 				*/
 				d.removeTransition(i);
 				i--; // back up one so that i++ of loop iteration stays within bounds
 			}
 			optimizeExitBranches(edgeTarget);
 		}
 	}

 	protected void optimizeEOTBranches(DFAState d) {
 		Integer sI = Utils.integer(d.stateNumber);
 		if ( visited.contains(sI) ) {
 			return; // already visited
 		}
 		visited.add(sI);
 		for (int i = 0; i < d.getNumberOfTransitions(); i++) {
 			Transition edge = d.transition(i);
 			DFAState edgeTarget = ((DFAState)edge.target);
 			/*
 			System.out.println(d.stateNumber+"-"+
 							   edge.label.toString(d.dfa.nfa.grammar)+"->"+
 							   edgeTarget.stateNumber);
 			*/
 			// if only one edge coming out, it is EOT, and target is accept prune
 			if ( PRUNE_TOKENS_RULE_SUPERFLUOUS_EOT_EDGES &&
 				edgeTarget.isAcceptState() &&
 				d.getNumberOfTransitions()==1 &&
 				edge.label.isAtom() &&
 				edge.label.getAtom()==Label.EOT )
 			{
 				//System.out.println("state "+d+" can be pruned");
 				// remove the superfluous EOT edge
 				d.removeTransition(i);
 				d.setAcceptState(true); // make it an accept state
 				// force it to uniquely predict the originally predicted state
 				d.cachedUniquelyPredicatedAlt =
 					edgeTarget.getUniquelyPredictedAlt();
 				i--; // back up one so that i++ of loop iteration stays within bounds
 			}
 			optimizeEOTBranches(edgeTarget);
 		}
 	}

 	/** Walk DFA states, unlinking the nfa configs and whatever else I
 	 *  can to reduce memory footprint.
 	protected void unlinkUnneededStateData(DFAState d) {
 		Integer sI = Utils.integer(d.stateNumber);
 		if ( visited.contains(sI) ) {
 			return; // already visited
 		}
 		visited.add(sI);
 		d.nfaConfigurations = null;
 		for (int i = 0; i < d.getNumberOfTransitions(); i++) {
 			Transition edge = (Transition) d.transition(i);
 			DFAState edgeTarget = ((DFAState)edge.target);
 			unlinkUnneededStateData(edgeTarget);
 		}
 	}
 	 */

 }
	/*
	* [The "BSD license"]
	* Copyright (c) 2010 Terence Parr
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 3. The name of the author may not be used to endorse or promote products
	* derived from this software without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/
	package org.antlr.analysis;

	import org.antlr.misc.Utils;
	import org.antlr.tool.Grammar;

	import java.util.HashSet;
	import java.util.Set;

	/** A module to perform optimizations on DFAs.
	*
	* I could more easily (and more quickly) do some optimizations (such as
	* PRUNE_EBNF_EXIT_BRANCHES) during DFA construction, but then it
	* messes up the determinism checking. For example, it looks like
	* loop exit branches are unreachable if you prune exit branches
	* during DFA construction and before determinism checks.
	*
	* In general, ANTLR's NFA→DFA→codegen pipeline seems very robust
	* to me which I attribute to a uniform and consistent set of data
	* structures. Regardless of what I want to "say"/implement, I do so
	* within the confines of, for example, a DFA. The code generator
	* can then just generate code--it doesn't have to do much thinking.
	* Putting optimizations in the code gen code really starts to make
	* it a spagetti factory (uh oh, now I'm hungry!). The pipeline is
	* very testable; each stage has well defined input/output pairs.
	*
	* ### Optimization: PRUNE_EBNF_EXIT_BRANCHES
	*
	* There is no need to test EBNF block exit branches. Not only is it
	* an unneeded computation, but counter-intuitively, you actually get
	* better errors. You can report an error at the missing or extra
	* token rather than as soon as you've figured out you will fail.
	*
	* Imagine optional block "( DOT CLASS )? SEMI". ANTLR generates:
	*
	* int alt=0;
	* if ( input.LA(1)==DOT ) {
	* alt=1;
	* }
	* else if ( input.LA(1)==SEMI ) {
	* alt=2;
	* }
	*
	* Clearly, since Parser.match() will ultimately find the error, we
	* do not want to report an error nor do we want to bother testing
	* lookahead against what follows the (...)? We want to generate
	* simply "should I enter the subrule?":
	*
	* int alt=2;
	* if ( input.LA(1)==DOT ) {
	* alt=1;
	* }
	*
	* NOTE 1. Greedy loops cannot be optimized in this way. For example,
	* "(greedy=false:'x'\|.)* '\n'". You specifically need the exit branch
	* to tell you when to terminate the loop as the same input actually
	* predicts one of the alts (i.e., staying in the loop).
	*
	* NOTE 2. I do not optimize cyclic DFAs at the moment as it doesn't
	* seem to work. ;) I'll have to investigate later to see what work I
	* can do on cyclic DFAs to make them have fewer edges. Might have
	* something to do with the EOT token.
	*
	* ### PRUNE_SUPERFLUOUS_EOT_EDGES
	*
	* When a token is a subset of another such as the following rules, ANTLR
	* quietly assumes the first token to resolve the ambiguity.
	*
	* EQ : '=' ;
	* ASSIGNOP : '=' \| '+=' ;
	*
	* It can yield states that have only a single edge on EOT to an accept
	* state. This is a waste and messes up my code generation. ;) If
	* Tokens rule DFA goes
	*
	* s0 -'='-> s3 -EOT-> s5 (accept)
	*
	* then s5 should be pruned and s3 should be made an accept. Do NOT do this
	* for keyword versus ID as the state with EOT edge emanating from it will
	* also have another edge.
	*
	* ### Optimization: COLLAPSE_ALL_INCIDENT_EDGES
	*
	* Done during DFA construction. See method addTransition() in
	* NFAToDFAConverter.
	*
	* ### Optimization: MERGE_STOP_STATES
	*
	* Done during DFA construction. See addDFAState() in NFAToDFAConverter.
	*/
	public class DFAOptimizer {
	public static boolean PRUNE_EBNF_EXIT_BRANCHES = true;
	public static boolean PRUNE_TOKENS_RULE_SUPERFLUOUS_EOT_EDGES = true;
	public static boolean COLLAPSE_ALL_PARALLEL_EDGES = true;
	public static boolean MERGE_STOP_STATES = true;

	/** Used by DFA state machine generator to avoid infinite recursion
	* resulting from cycles int the DFA. This is a set of int state #s.
	* This is a side-effect of calling optimize; can't clear after use
	* because code gen needs it.
	*/
	protected Set<Integer> visited = new HashSet<Integer>();

	protected Grammar grammar;

	public DFAOptimizer(Grammar grammar) {
	this.grammar = grammar;
	}

	public void optimize() {
	// optimize each DFA in this grammar
	for (int decisionNumber=1;
	decisionNumber<=grammar.getNumberOfDecisions();
	decisionNumber++)
	{
	DFA dfa = grammar.getLookaheadDFA(decisionNumber);
	optimize(dfa);
	}
	}

	protected void optimize(DFA dfa) {
	if ( dfa==null ) {
	return; // nothing to do
	}
	/*
	System.out.println("Optimize DFA "+dfa.decisionNFAStartState.decisionNumber+
	" num states="+dfa.getNumberOfStates());
	*/
	//long start = System.currentTimeMillis();
	if ( PRUNE_EBNF_EXIT_BRANCHES && dfa.canInlineDecision() ) {
	visited.clear();
	int decisionType =
	dfa.getNFADecisionStartState().decisionStateType;
	if ( dfa.isGreedy() &&
	(decisionType==NFAState.OPTIONAL_BLOCK_START \|\|
	decisionType==NFAState.LOOPBACK) )
	{
	optimizeExitBranches(dfa.startState);
	}
	}
	// If the Tokens rule has syntactically ambiguous rules, try to prune
	if ( PRUNE_TOKENS_RULE_SUPERFLUOUS_EOT_EDGES &&
	dfa.isTokensRuleDecision() &&
	dfa.probe.stateToSyntacticallyAmbiguousTokensRuleAltsMap.size()>0 )
	{
	visited.clear();
	optimizeEOTBranches(dfa.startState);
	}

	/* ack...code gen needs this, cannot optimize
	visited.clear();
	unlinkUnneededStateData(dfa.startState);
	*/
	//long stop = System.currentTimeMillis();
	//System.out.println("minimized in "+(int)(stop-start)+" ms");
	}

	protected void optimizeExitBranches(DFAState d) {
	Integer sI = Utils.integer(d.stateNumber);
	if ( visited.contains(sI) ) {
	return; // already visited
	}
	visited.add(sI);
	int nAlts = d.dfa.getNumberOfAlts();
	for (int i = 0; i < d.getNumberOfTransitions(); i++) {
	Transition edge = d.transition(i);
	DFAState edgeTarget = ((DFAState)edge.target);
	/*
	System.out.println(d.stateNumber+"-"+
	edge.label.toString(d.dfa.nfa.grammar)+"->"+
	edgeTarget.stateNumber);
	*/
	// if target is an accept state and that alt is the exit alt
	if ( edgeTarget.isAcceptState() &&
	edgeTarget.getUniquelyPredictedAlt()==nAlts)
	{
	/*
	System.out.println("ignoring transition "+i+" to max alt "+
	d.dfa.getNumberOfAlts());
	*/
	d.removeTransition(i);
	i--; // back up one so that i++ of loop iteration stays within bounds
	}
	optimizeExitBranches(edgeTarget);
	}
	}

	protected void optimizeEOTBranches(DFAState d) {
	Integer sI = Utils.integer(d.stateNumber);
	if ( visited.contains(sI) ) {
	return; // already visited
	}
	visited.add(sI);
	for (int i = 0; i < d.getNumberOfTransitions(); i++) {
	Transition edge = d.transition(i);
	DFAState edgeTarget = ((DFAState)edge.target);
	/*
	System.out.println(d.stateNumber+"-"+
	edge.label.toString(d.dfa.nfa.grammar)+"->"+
	edgeTarget.stateNumber);
	*/
	// if only one edge coming out, it is EOT, and target is accept prune
	if ( PRUNE_TOKENS_RULE_SUPERFLUOUS_EOT_EDGES &&
	edgeTarget.isAcceptState() &&
	d.getNumberOfTransitions()==1 &&
	edge.label.isAtom() &&
	edge.label.getAtom()==Label.EOT )
	{
	//System.out.println("state "+d+" can be pruned");
	// remove the superfluous EOT edge
	d.removeTransition(i);
	d.setAcceptState(true); // make it an accept state
	// force it to uniquely predict the originally predicted state
	d.cachedUniquelyPredicatedAlt =
	edgeTarget.getUniquelyPredictedAlt();
	i--; // back up one so that i++ of loop iteration stays within bounds
	}
	optimizeEOTBranches(edgeTarget);
	}
	}

	/** Walk DFA states, unlinking the nfa configs and whatever else I
	* can to reduce memory footprint.
	protected void unlinkUnneededStateData(DFAState d) {
	Integer sI = Utils.integer(d.stateNumber);
	if ( visited.contains(sI) ) {
	return; // already visited
	}
	visited.add(sI);
	d.nfaConfigurations = null;
	for (int i = 0; i < d.getNumberOfTransitions(); i++) {
	Transition edge = (Transition) d.transition(i);
	DFAState edgeTarget = ((DFAState)edge.target);
	unlinkUnneededStateData(edgeTarget);
	}
	}
	*/

	}