tool/src/main/java/org/antlr/analysis/DFA.java - platform/external/antlr - Git at Google

 /*
  * [The "BSD license"]
  *  Copyright (c) 2010 Terence Parr
  *  All rights reserved.
  *
  *  Redistribution and use in source and binary forms, with or without
  *  modification, are permitted provided that the following conditions
  *  are met:
  *  1. Redistributions of source code must retain the above copyright
  *      notice, this list of conditions and the following disclaimer.
  *  2. Redistributions in binary form must reproduce the above copyright
  *      notice, this list of conditions and the following disclaimer in the
  *      documentation and/or other materials provided with the distribution.
  *  3. The name of the author may not be used to endorse or promote products
  *      derived from this software without specific prior written permission.
  *
  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 package org.antlr.analysis;

 import org.antlr.codegen.CodeGenerator;
 import org.antlr.misc.IntSet;
 import org.antlr.misc.IntervalSet;
 import org.antlr.misc.Utils;
 import org.antlr.runtime.IntStream;
 import org.stringtemplate.v4.ST;
 import org.antlr.tool.*;

 import java.util.*;

 /** A DFA (converted from a grammar's NFA).
  *  DFAs are used as prediction machine for alternative blocks in all kinds
  *  of recognizers (lexers, parsers, tree walkers).
  */
 public class DFA {
 	public static final int REACHABLE_UNKNOWN = -2;
 	public static final int REACHABLE_BUSY = -1; // in process of computing
 	public static final int REACHABLE_NO = 0;
 	public static final int REACHABLE_YES = 1;

 	public static final int CYCLIC_UNKNOWN = -2;
 	public static final int CYCLIC_BUSY = -1; // in process of computing
 	public static final int CYCLIC_DONE = 0;

 	/** Prevent explosion of DFA states during conversion. The max number
 	 *  of states per alt in a single decision's DFA.
 	public static final int MAX_STATES_PER_ALT_IN_DFA = 450;
 	 */

 	/** Set to 0 to not terminate early (time in ms) */
 	public static int MAX_TIME_PER_DFA_CREATION = 1*1000;

 	/** How many edges can each DFA state have before a "special" state
 	 *  is created that uses IF expressions instead of a table?
 	 */
 	public static int MAX_STATE_TRANSITIONS_FOR_TABLE = 65534;

 	/** What's the start state for this DFA? */
     public DFAState startState;

 	/** This DFA is being built for which decision? */
 	public int decisionNumber = 0;

     /** From what NFAState did we create the DFA? */
     public NFAState decisionNFAStartState;

 	/** The printable grammar fragment associated with this DFA */
 	public String description;

 	/** A set of all uniquely-numbered DFA states.  Maps hash of DFAState
      *  to the actual DFAState object.  We use this to detect
      *  existing DFA states.  Map<DFAState,DFAState>.  Use Map so
 	 *  we can get old state back (Set only allows you to see if it's there).
 	 *  Not used during fixed k lookahead as it's a waste to fill it with
 	 *  a dup of states array.
      */
     protected Map<DFAState, DFAState> uniqueStates = new HashMap<DFAState, DFAState>();

 	/** Maps the state number to the actual DFAState.  Use a Vector as it
 	 *  grows automatically when I set the ith element.  This contains all
 	 *  states, but the states are not unique.  s3 might be same as s1 so
 	 *  s3 -> s1 in this table.  This is how cycles occur.  If fixed k,
 	 *  then these states will all be unique as states[i] always points
 	 *  at state i when no cycles exist.
 	 *
 	 *  This is managed in parallel with uniqueStates and simply provides
 	 *  a way to go from state number to DFAState rather than via a
 	 *  hash lookup.
 	 */
 	protected Vector<DFAState> states = new Vector<DFAState>();

 	/** Unique state numbers per DFA */
 	protected int stateCounter = 0;

 	/** count only new states not states that were rejected as already present */
 	protected int numberOfStates = 0;

 	/** User specified max fixed lookahead.  If 0, nothing specified.  -1
 	 *  implies we have not looked at the options table yet to set k.
 	 */
 	protected int user_k = -1;

 	/** While building the DFA, track max lookahead depth if not cyclic */
 	protected int max_k = -1;

     /** Is this DFA reduced?  I.e., can all states lead to an accept state? */
     protected boolean reduced = true;

     /** Are there any loops in this DFA?
 	 *  Computed by doesStateReachAcceptState()
 	 */
     protected boolean cyclic = false;

 	/** Track whether this DFA has at least one sem/syn pred encountered
 	 *  during a closure operation.  This is useful for deciding whether
 	 *  to retry a non-LL(*) with k=1.  If no pred, it will not work w/o
 	 *  a pred so don't bother.  It would just give another error message.
 	 */
 	public boolean predicateVisible = false;

 	public boolean hasPredicateBlockedByAction = false;

 	/** Each alt in an NFA derived from a grammar must have a DFA state that
      *  predicts it lest the parser not know what to do.  Nondeterminisms can
      *  lead to this situation (assuming no semantic predicates can resolve
      *  the problem) and when for some reason, I cannot compute the lookahead
      *  (which might arise from an error in the algorithm or from
      *  left-recursion etc...).  This list starts out with all alts contained
      *  and then in method doesStateReachAcceptState() I remove the alts I
      *  know to be uniquely predicted.
      */
     protected List<Integer> unreachableAlts;

 	protected int nAlts = 0;

 	/** We only want one accept state per predicted alt; track here */
 	protected DFAState[] altToAcceptState;

 	/** Track whether an alt discovers recursion for each alt during
 	 *  NFA to DFA conversion; >1 alt with recursion implies nonregular.
 	 */
 	public IntSet recursiveAltSet = new IntervalSet();

 	/** Which NFA are we converting (well, which piece of the NFA)? */
     public NFA nfa;

 	protected NFAToDFAConverter nfaConverter;

 	/** This probe tells you a lot about a decision and is useful even
 	 *  when there is no error such as when a syntactic nondeterminism
 	 *  is solved via semantic predicates.  Perhaps a GUI would want
 	 *  the ability to show that.
 	 */
 	public DecisionProbe probe = new DecisionProbe(this);

 	/** Track absolute time of the conversion so we can have a failsafe:
 	 *  if it takes too long, then terminate.  Assume bugs are in the
 	 *  analysis engine.
 	 */
 	//protected long conversionStartTime;

 	/** Map an edge transition table to a unique set number; ordered so
 	 *  we can push into the output template as an ordered list of sets
 	 *  and then ref them from within the transition[][] table.  Like this
 	 *  for C# target:
 	 *     public static readonly DFA30_transition0 =
 	 *     	new short[] { 46, 46, -1, 46, 46, -1, -1, -1, -1, -1, -1, -1,...};
 	 *         public static readonly DFA30_transition1 =
 	 *     	new short[] { 21 };
 	 *      public static readonly short[][] DFA30_transition = {
 	 *     	  DFA30_transition0,
 	 *     	  DFA30_transition0,
 	 *     	  DFA30_transition1,
 	 *     	  ...
 	 *      };
 	 */
 	public Map edgeTransitionClassMap = new LinkedHashMap();

 	/** The unique edge transition class number; every time we see a new
 	 *  set of edges emanating from a state, we number it so we can reuse
 	 *  if it's every seen again for another state.  For Java grammar,
 	 *  some of the big edge transition tables are seen about 57 times.
 	 */
 	protected int edgeTransitionClass =0;

 	/* This DFA can be converted to a transition[state][char] table and
 	 * the following tables are filled by createStateTables upon request.
 	 * These are injected into the templates for code generation.
 	 * See March 25, 2006 entry for description:
 	 *   http://www.antlr.org/blog/antlr3/codegen.tml
 	 * Often using Vector as can't set ith position in a List and have
 	 * it extend list size; bizarre.
 	 */

 	/** List of special DFAState objects */
 	public List specialStates;
 	/** List of ST for special states. */
 	public List specialStateSTs;
 	public Vector accept;
 	public Vector eot;
 	public Vector eof;
 	public Vector min;
 	public Vector max;
 	public Vector special;
 	public Vector transition;
 	/** just the Vector<Integer> indicating which unique edge table is at
 	 *  position i.
 	 */
 	public Vector transitionEdgeTables; // not used by java yet
 	protected int uniqueCompressedSpecialStateNum = 0;

 	/** Which generator to use if we're building state tables */
 	protected CodeGenerator generator = null;

 	protected DFA() {;}

 	public DFA(int decisionNumber, NFAState decisionStartState) {
 		this.decisionNumber = decisionNumber;
         this.decisionNFAStartState = decisionStartState;
         nfa = decisionStartState.nfa;
         nAlts = nfa.grammar.getNumberOfAltsForDecisionNFA(decisionStartState);
         //setOptions( nfa.grammar.getDecisionOptions(getDecisionNumber()) );
         initAltRelatedInfo();

 		//long start = System.currentTimeMillis();
         nfaConverter = new NFAToDFAConverter(this);
 		try {
 			nfaConverter.convert();

 			// figure out if there are problems with decision
 			verify();

 			if ( !probe.isDeterministic() || probe.analysisOverflowed() ) {
 				probe.issueWarnings();
 			}

 			// must be after verify as it computes cyclic, needed by this routine
 			// should be after warnings because early termination or something
 			// will not allow the reset to operate properly in some cases.
 			resetStateNumbersToBeContiguous();

 			//long stop = System.currentTimeMillis();
 			//System.out.println("verify cost: "+(int)(stop-start)+" ms");
 		}
 //		catch (AnalysisTimeoutException at) {
 //			probe.reportAnalysisTimeout();
 //			if ( !okToRetryDFAWithK1() ) {
 //				probe.issueWarnings();
 //			}
 //		}
 		catch (NonLLStarDecisionException nonLL) {
 			probe.reportNonLLStarDecision(this);
 			// >1 alt recurses, k=* and no auto backtrack nor manual sem/syn
 			if ( !okToRetryDFAWithK1() ) {
 				probe.issueWarnings();
 			}
 		}
     }

 	/** Walk all states and reset their numbers to be a contiguous sequence
 	 *  of integers starting from 0.  Only cyclic DFA can have unused positions
 	 *  in states list.  State i might be identical to a previous state j and
 	 *  will result in states[i] == states[j].  We don't want to waste a state
 	 *  number on this.  Useful mostly for code generation in tables.
 	 *
 	 *  At the start of this routine, states[i].stateNumber <= i by definition.
 	 *  If states[50].stateNumber is 50 then a cycle during conversion may
 	 *  try to add state 103, but we find that an identical DFA state, named
 	 *  50, already exists, hence, states[103]==states[50] and both have
 	 *  stateNumber 50 as they point at same object.  Afterwards, the set
 	 *  of state numbers from all states should represent a contiguous range
 	 *  from 0..n-1 where n is the number of unique states.
 	 */
 	public void resetStateNumbersToBeContiguous() {
 		if ( getUserMaxLookahead()>0 ) {
 			// all numbers are unique already; no states are thrown out.
 			return;
 		}

         // walk list of DFAState objects by state number,
 		// setting state numbers to 0..n-1
 		int snum=0;
 		for (int i = 0; i <= getMaxStateNumber(); i++) {
 			DFAState s = getState(i);
             // some states are unused after creation most commonly due to cycles
             // or conflict resolution.
             if ( s==null ) {
                 continue;
             }
 			// state i is mapped to DFAState with state number set to i originally
 			// so if it's less than i, then we renumbered it already; that
 			// happens when states have been merged or cycles occurred I think.
 			// states[50] will point to DFAState with s50 in it but
 			// states[103] might also point at this same DFAState.  Since
 			// 50 < 103 then it's already been renumbered as it points downwards.
 			boolean alreadyRenumbered = s.stateNumber<i;
 			if ( !alreadyRenumbered ) {
 				// state i is a valid state, reset it's state number
 				s.stateNumber = snum; // rewrite state numbers to be 0..n-1
 				snum++;
 			}
 		}
         if ( snum!=getNumberOfStates() ) {
 			ErrorManager.internalError("DFA "+decisionNumber+": "+
 				decisionNFAStartState.getDescription()+" num unique states "+getNumberOfStates()+
 				"!= num renumbered states "+snum);
 		}
 	}

 	// JAVA-SPECIFIC Accessors!!!!!  It is so impossible to get arrays
 	// or even consistently formatted strings acceptable to java that
 	// I am forced to build the individual char elements here

 	public List getJavaCompressedAccept() { return getRunLengthEncoding(accept); }
 	public List getJavaCompressedEOT() { return getRunLengthEncoding(eot); }
 	public List getJavaCompressedEOF() { return getRunLengthEncoding(eof); }
 	public List getJavaCompressedMin() { return getRunLengthEncoding(min); }
 	public List getJavaCompressedMax() { return getRunLengthEncoding(max); }
 	public List getJavaCompressedSpecial() { return getRunLengthEncoding(special); }
 	public List getJavaCompressedTransition() {
 		if ( transition==null || transition.size()==0 ) {
 			return null;
 		}
 		List encoded = new ArrayList(transition.size());
 		// walk Vector<Vector<FormattedInteger>> which is the transition[][] table
 		for (int i = 0; i < transition.size(); i++) {
 			Vector transitionsForState = (Vector) transition.elementAt(i);
 			encoded.add(getRunLengthEncoding(transitionsForState));
 		}
 		return encoded;
 	}

 	/** Compress the incoming data list so that runs of same number are
 	 *  encoded as number,value pair sequences.  3 -1 -1 -1 28 is encoded
 	 *  as 1 3 3 -1 1 28.  I am pretty sure this is the lossless compression
 	 *  that GIF files use.  Transition tables are heavily compressed by
 	 *  this technique.  I got the idea from JFlex http://jflex.de/
 	 *
 	 *  Return List<String> where each string is either \xyz for 8bit char
 	 *  and \uFFFF for 16bit.  Hideous and specific to Java, but it is the
 	 *  only target bad enough to need it.
 	 */
 	public List getRunLengthEncoding(List data) {
 		if ( data==null || data.size()==0 ) {
 			// for states with no transitions we want an empty string ""
 			// to hold its place in the transitions array.
 			List empty = new ArrayList();
 			empty.add("");
 			return empty;
 		}
 		int size = Math.max(2,data.size()/2);
 		List encoded = new ArrayList(size); // guess at size
 		// scan values looking for runs
 		int i = 0;
 		Integer emptyValue = Utils.integer(-1);
 		while ( i < data.size() ) {
 			Integer I = (Integer)data.get(i);
 			if ( I==null ) {
 				I = emptyValue;
 			}
 			// count how many v there are?
 			int n = 0;
 			for (int j = i; j < data.size(); j++) {
 				Integer v = (Integer)data.get(j);
 				if ( v==null ) {
 					v = emptyValue;
 				}
 				if ( I.equals(v) ) {
 					n++;
 				}
 				else {
 					break;
 				}
 			}
 			encoded.add(generator.target.encodeIntAsCharEscape((char)n));
 			encoded.add(generator.target.encodeIntAsCharEscape((char)I.intValue()));
 			i+=n;
 		}
 		return encoded;
 	}

 	public void createStateTables(CodeGenerator generator) {
 		//System.out.println("createTables:\n"+this);
 		this.generator = generator;
 		description = getNFADecisionStartState().getDescription();
 		description =
 			generator.target.getTargetStringLiteralFromString(description);

 		// create all the tables
 		special = new Vector(this.getNumberOfStates()); // Vector<short>
 		special.setSize(this.getNumberOfStates());
 		specialStates = new ArrayList();				// List<DFAState>
 		specialStateSTs = new ArrayList();				// List<ST>
 		accept = new Vector(this.getNumberOfStates()); // Vector<int>
 		accept.setSize(this.getNumberOfStates());
 		eot = new Vector(this.getNumberOfStates()); // Vector<int>
 		eot.setSize(this.getNumberOfStates());
 		eof = new Vector(this.getNumberOfStates()); // Vector<int>
 		eof.setSize(this.getNumberOfStates());
 		min = new Vector(this.getNumberOfStates()); // Vector<int>
 		min.setSize(this.getNumberOfStates());
 		max = new Vector(this.getNumberOfStates()); // Vector<int>
 		max.setSize(this.getNumberOfStates());
 		transition = new Vector(this.getNumberOfStates()); // Vector<Vector<int>>
 		transition.setSize(this.getNumberOfStates());
 		transitionEdgeTables = new Vector(this.getNumberOfStates()); // Vector<Vector<int>>
 		transitionEdgeTables.setSize(this.getNumberOfStates());

 		// for each state in the DFA, fill relevant tables.
 		Iterator it = null;
 		if ( getUserMaxLookahead()>0 ) {
 			it = states.iterator();
 		}
 		else {
 			it = getUniqueStates().values().iterator();
 		}
 		while ( it.hasNext() ) {
 			DFAState s = (DFAState)it.next();
 			if ( s==null ) {
 				// ignore null states; some acylic DFA see this condition
 				// when inlining DFA (due to lacking of exit branch pruning?)
 				continue;
 			}
 			if ( s.isAcceptState() ) {
 				// can't compute min,max,special,transition on accepts
 				accept.set(s.stateNumber,
 						   Utils.integer(s.getUniquelyPredictedAlt()));
 			}
 			else {
 				createMinMaxTables(s);
 				createTransitionTableEntryForState(s);
 				createSpecialTable(s);
 				createEOTAndEOFTables(s);
 			}
 		}

 		// now that we have computed list of specialStates, gen code for 'em
 		for (int i = 0; i < specialStates.size(); i++) {
 			DFAState ss = (DFAState) specialStates.get(i);
 			ST stateST =
 				generator.generateSpecialState(ss);
 			specialStateSTs.add(stateST);
 		}

 		// check that the tables are not messed up by encode/decode
 		/*
 		testEncodeDecode(min);
 		testEncodeDecode(max);
 		testEncodeDecode(accept);
 		testEncodeDecode(special);
 		System.out.println("min="+min);
 		System.out.println("max="+max);
 		System.out.println("eot="+eot);
 		System.out.println("eof="+eof);
 		System.out.println("accept="+accept);
 		System.out.println("special="+special);
 		System.out.println("transition="+transition);
 		*/
 	}

 	/*
 	private void testEncodeDecode(List data) {
 		System.out.println("data="+data);
 		List encoded = getRunLengthEncoding(data);
 		StringBuffer buf = new StringBuffer();
 		for (int i = 0; i < encoded.size(); i++) {
 			String I = (String)encoded.get(i);
 			int v = 0;
 			if ( I.startsWith("\\u") ) {
 				v = Integer.parseInt(I.substring(2,I.length()), 16);
 			}
 			else {
 				v = Integer.parseInt(I.substring(1,I.length()), 8);
 			}
 			buf.append((char)v);
 		}
 		String encodedS = buf.toString();
 		short[] decoded = org.antlr.runtime.DFA.unpackEncodedString(encodedS);
 		//System.out.println("decoded:");
 		for (int i = 0; i < decoded.length; i++) {
 			short x = decoded[i];
 			if ( x!=((Integer)data.get(i)).intValue() ) {
 				System.err.println("problem with encoding");
 			}
 			//System.out.print(", "+x);
 		}
 		//System.out.println();
 	}
 	*/

 	protected void createMinMaxTables(DFAState s) {
 		int smin = Label.MAX_CHAR_VALUE + 1;
 		int smax = Label.MIN_ATOM_VALUE - 1;
 		for (int j = 0; j < s.getNumberOfTransitions(); j++) {
 			Transition edge = (Transition) s.transition(j);
 			Label label = edge.label;
 			if ( label.isAtom() ) {
 				if ( label.getAtom()>=Label.MIN_CHAR_VALUE ) {
 					if ( label.getAtom()<smin ) {
 						smin = label.getAtom();
 					}
 					if ( label.getAtom()>smax ) {
 						smax = label.getAtom();
 					}
 				}
 			}
 			else if ( label.isSet() ) {
 				IntervalSet labels = (IntervalSet)label.getSet();
 				int lmin = labels.getMinElement();
 				// if valid char (don't do EOF) and less than current min
 				if ( lmin<smin && lmin>=Label.MIN_CHAR_VALUE ) {
 					smin = labels.getMinElement();
 				}
 				if ( labels.getMaxElement()>smax ) {
 					smax = labels.getMaxElement();
 				}
 			}
 		}

 		if ( smax<0 ) {
 			// must be predicates or pure EOT transition; just zero out min, max
 			smin = Label.MIN_CHAR_VALUE;
 			smax = Label.MIN_CHAR_VALUE;
 		}

 		min.set(s.stateNumber, Utils.integer((char)smin));
 		max.set(s.stateNumber, Utils.integer((char)smax));

 		if ( smax<0 || smin>Label.MAX_CHAR_VALUE || smin<0 ) {
 			ErrorManager.internalError("messed up: min="+min+", max="+max);
 		}
 	}

 	protected void createTransitionTableEntryForState(DFAState s) {
 		/*
 		System.out.println("createTransitionTableEntryForState s"+s.stateNumber+
 			" dec "+s.dfa.decisionNumber+" cyclic="+s.dfa.isCyclic());
 			*/
 		int smax = ((Integer)max.get(s.stateNumber)).intValue();
 		int smin = ((Integer)min.get(s.stateNumber)).intValue();

 		Vector stateTransitions = new Vector(smax-smin+1);
 		stateTransitions.setSize(smax-smin+1);
 		transition.set(s.stateNumber, stateTransitions);
 		for (int j = 0; j < s.getNumberOfTransitions(); j++) {
 			Transition edge = (Transition) s.transition(j);
 			Label label = edge.label;
 			if ( label.isAtom() && label.getAtom()>=Label.MIN_CHAR_VALUE ) {
 				int labelIndex = label.getAtom()-smin; // offset from 0
 				stateTransitions.set(labelIndex,
 									 Utils.integer(edge.target.stateNumber));
 			}
 			else if ( label.isSet() ) {
 				IntervalSet labels = (IntervalSet)label.getSet();
 				int[] atoms = labels.toArray();
 				for (int a = 0; a < atoms.length; a++) {
 					// set the transition if the label is valid (don't do EOF)
 					if ( atoms[a]>=Label.MIN_CHAR_VALUE ) {
 						int labelIndex = atoms[a]-smin; // offset from 0
 						stateTransitions.set(labelIndex,
 											 Utils.integer(edge.target.stateNumber));
 					}
 				}
 			}
 		}
 		// track unique state transition tables so we can reuse
 		Integer edgeClass = (Integer)edgeTransitionClassMap.get(stateTransitions);
 		if ( edgeClass!=null ) {
 			//System.out.println("we've seen this array before; size="+stateTransitions.size());
 			transitionEdgeTables.set(s.stateNumber, edgeClass);
 		}
 		else {
 			edgeClass = Utils.integer(edgeTransitionClass);
 			transitionEdgeTables.set(s.stateNumber, edgeClass);
 			edgeTransitionClassMap.put(stateTransitions, edgeClass);
 			edgeTransitionClass++;
 		}
 	}

 	/** Set up the EOT and EOF tables; we cannot put -1 min/max values so
 	 *  we need another way to test that in the DFA transition function.
 	 */
 	protected void createEOTAndEOFTables(DFAState s) {
 		for (int j = 0; j < s.getNumberOfTransitions(); j++) {
 			Transition edge = (Transition) s.transition(j);
 			Label label = edge.label;
 			if ( label.isAtom() ) {
 				if ( label.getAtom()==Label.EOT ) {
 					// eot[s] points to accept state
 					eot.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
 				}
 				else if ( label.getAtom()==Label.EOF ) {
 					// eof[s] points to accept state
 					eof.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
 				}
 			}
 			else if ( label.isSet() ) {
 				IntervalSet labels = (IntervalSet)label.getSet();
 				int[] atoms = labels.toArray();
 				for (int a = 0; a < atoms.length; a++) {
 					if ( atoms[a]==Label.EOT ) {
 						// eot[s] points to accept state
 						eot.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
 					}
 					else if ( atoms[a]==Label.EOF ) {
 						eof.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
 					}
 				}
 			}
 		}
 	}

 	protected void createSpecialTable(DFAState s) {
 		// number all special states from 0...n-1 instead of their usual numbers
 		boolean hasSemPred = false;

 		// TODO this code is very similar to canGenerateSwitch.  Refactor to share
 		for (int j = 0; j < s.getNumberOfTransitions(); j++) {
 			Transition edge = (Transition) s.transition(j);
 			Label label = edge.label;
 			// can't do a switch if the edges have preds or are going to
 			// require gated predicates
 			if ( label.isSemanticPredicate() ||
 				 ((DFAState)edge.target).getGatedPredicatesInNFAConfigurations()!=null)
 			{
 				hasSemPred = true;
 				break;
 			}
 		}
 		// if has pred or too big for table, make it special
 		int smax = ((Integer)max.get(s.stateNumber)).intValue();
 		int smin = ((Integer)min.get(s.stateNumber)).intValue();
 		if ( hasSemPred || smax-smin>MAX_STATE_TRANSITIONS_FOR_TABLE ) {
 			special.set(s.stateNumber,
 						Utils.integer(uniqueCompressedSpecialStateNum));
 			uniqueCompressedSpecialStateNum++;
 			specialStates.add(s);
 		}
 		else {
 			special.set(s.stateNumber, Utils.integer(-1)); // not special
 		}
 	}

 	public int predict(IntStream input) {
 		Interpreter interp = new Interpreter(nfa.grammar, input);
 		return interp.predict(this);
 	}

 	/** Add a new DFA state to this DFA if not already present.
      *  To force an acyclic, fixed maximum depth DFA, just always
 	 *  return the incoming state.  By not reusing old states,
 	 *  no cycles can be created.  If we're doing fixed k lookahead
 	 *  don't updated uniqueStates, just return incoming state, which
 	 *  indicates it's a new state.
      */
     protected DFAState addState(DFAState d) {
 		if ( getUserMaxLookahead()>0 ) {
 			return d;
 		}
 		// does a DFA state exist already with everything the same
 		// except its state number?
 		DFAState existing = (DFAState)uniqueStates.get(d);
 		if ( existing != null ) {
             /*
             System.out.println("state "+d.stateNumber+" exists as state "+
                 existing.stateNumber);
                 */
             // already there...get the existing DFA state
 			return existing;
 		}

 		// if not there, then add new state.
 		uniqueStates.put(d,d);
         numberOfStates++;
 		return d;
 	}

 	public void removeState(DFAState d) {
 		DFAState it = (DFAState)uniqueStates.remove(d);
 		if ( it!=null ) {
 			numberOfStates--;
 		}
 	}

 	public Map<DFAState, DFAState> getUniqueStates() {
 		return uniqueStates;
 	}

 	/** What is the max state number ever created?  This may be beyond
 	 *  getNumberOfStates().
 	 */
 	public int getMaxStateNumber() {
 		return states.size()-1;
 	}

 	public DFAState getState(int stateNumber) {
 		return (DFAState)states.get(stateNumber);
 	}

 	public void setState(int stateNumber, DFAState d) {
 		states.set(stateNumber, d);
 	}

 	/** Is the DFA reduced?  I.e., does every state have a path to an accept
      *  state?  If not, don't delete as we need to generate an error indicating
      *  which paths are "dead ends".  Also tracks list of alts with no accept
      *  state in the DFA.  Must call verify() first before this makes sense.
      */
     public boolean isReduced() {
         return reduced;
     }

     /** Is this DFA cyclic?  That is, are there any loops?  If not, then
      *  the DFA is essentially an LL(k) predictor for some fixed, max k value.
      *  We can build a series of nested IF statements to match this.  In the
      *  presence of cycles, we need to build a general DFA and interpret it
      *  to distinguish between alternatives.
      */
     public boolean isCyclic() {
         return cyclic && getUserMaxLookahead()==0;
     }

 	public boolean isClassicDFA() {
 		return !isCyclic() &&
 			   !nfa.grammar.decisionsWhoseDFAsUsesSemPreds.contains(this) &&
 			   !nfa.grammar.decisionsWhoseDFAsUsesSynPreds.contains(this);
 	}

 	public boolean canInlineDecision() {
 		return !isCyclic() &&
 		    !probe.isNonLLStarDecision() &&
 			getNumberOfStates() < CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE;
 	}

 	/** Is this DFA derived from the NFA for the Tokens rule? */
 	public boolean isTokensRuleDecision() {
 		if ( nfa.grammar.type!=Grammar.LEXER ) {
 			return false;
 		}
 		NFAState nfaStart = getNFADecisionStartState();
 		Rule r = nfa.grammar.getLocallyDefinedRule(Grammar.ARTIFICIAL_TOKENS_RULENAME);
 		NFAState TokensRuleStart = r.startState;
 		NFAState TokensDecisionStart =
 			(NFAState)TokensRuleStart.transition[0].target;
 		return nfaStart == TokensDecisionStart;
 	}

 	/** The user may specify a max, acyclic lookahead for any decision.  No
 	 *  DFA cycles are created when this value, k, is greater than 0.
 	 *  If this decision has no k lookahead specified, then try the grammar.
 	 */
 	public int getUserMaxLookahead() {
 		if ( user_k>=0 ) { // cache for speed
 			return user_k;
 		}
 		user_k = nfa.grammar.getUserMaxLookahead(decisionNumber);
 		return user_k;
 	}

 	public boolean getAutoBacktrackMode() {
 		return nfa.grammar.getAutoBacktrackMode(decisionNumber);
 	}

 	public void setUserMaxLookahead(int k) {
 		this.user_k = k;
 	}

 	/** Return k if decision is LL(k) for some k else return max int
      */
 	public int getMaxLookaheadDepth() {
 		if ( hasCycle() ) return Integer.MAX_VALUE;
 		// compute to be sure
 		return _getMaxLookaheadDepth(startState, 0);
 	}

 	int _getMaxLookaheadDepth(DFAState d, int depth) {
 		// not cyclic; don't worry about termination
 		// fail if pred edge.
 		int max = depth;
 		for (int i=0; i<d.getNumberOfTransitions(); i++) {
 			Transition t = d.transition(i);
 //			if ( t.isSemanticPredicate() ) return Integer.MAX_VALUE;
 			if ( !t.isSemanticPredicate() ) {
 				// if pure pred not gated, it must target stop state; don't count
 				DFAState edgeTarget = (DFAState)t.target;
 				int m = _getMaxLookaheadDepth(edgeTarget, depth+1);
 				max = Math.max(max, m);
 			}
 		}
 		return max;
 	}

 	/** Count all disambiguating syn preds (ignore synpred tests
 	 *  for gated edges, which occur for nonambig input sequences).
 	 *  E.g.,
 	 *  x  : (X)=> (X|Y)\n" +
 	 *     | X\n" +
 	 *     ;
 	 *
 	 *  gives
 	 *
 	 * .s0-X->.s1
 	 * .s0-Y&&{synpred1_t}?->:s2=>1
 	 * .s1-{synpred1_t}?->:s2=>1
 	 * .s1-{true}?->:s3=>2
 	 */
 	public boolean hasSynPred() {
 		boolean has = _hasSynPred(startState, new HashSet<DFAState>());
 //		if ( !has ) {
 //			System.out.println("no synpred in dec "+decisionNumber);
 //			FASerializer serializer = new FASerializer(nfa.grammar);
 //			String result = serializer.serialize(startState);
 //			System.out.println(result);
 //		}
 		return has;
 	}

 	public boolean getHasSynPred() { return hasSynPred(); } // for ST

 	boolean _hasSynPred(DFAState d, Set<DFAState> busy) {
 		busy.add(d);
 		for (int i=0; i<d.getNumberOfTransitions(); i++) {
 			Transition t = d.transition(i);
 			if ( t.isSemanticPredicate() ) {
 				SemanticContext ctx = t.label.getSemanticContext();
 //				if ( ctx.toString().indexOf("synpred")>=0 ) {
 //					System.out.println("has pred "+ctx.toString()+" "+ctx.isSyntacticPredicate());
 //					System.out.println(((SemanticContext.Predicate)ctx).predicateAST.token);
 //				}
 				if ( ctx.isSyntacticPredicate() ) return true;
 			}
 			DFAState edgeTarget = (DFAState)t.target;
 			if ( !busy.contains(edgeTarget) && _hasSynPred(edgeTarget, busy) ) return true;
 		}

 		return false;
 	}

 	public boolean hasSemPred() { // has user-defined sempred
 		boolean has = _hasSemPred(startState, new HashSet<DFAState>());
 		return has;
 	}

 	boolean _hasSemPred(DFAState d, Set<DFAState> busy) {
 		busy.add(d);
 		for (int i=0; i<d.getNumberOfTransitions(); i++) {
 			Transition t = d.transition(i);
 			if ( t.isSemanticPredicate() ) {
 				SemanticContext ctx = t.label.getSemanticContext();
 				if ( ctx.hasUserSemanticPredicate() ) return true;
 			}
 			DFAState edgeTarget = (DFAState)t.target;
 			if ( !busy.contains(edgeTarget) && _hasSemPred(edgeTarget, busy) ) return true;
 		}

 		return false;
 	}

 	/** Compute cyclic w/o relying on state computed during analysis. just check. */
 	public boolean hasCycle() {
 		boolean cyclic = _hasCycle(startState, new HashMap<DFAState, Integer>());
 		return cyclic;
 	}

 	boolean _hasCycle(DFAState d, Map<DFAState, Integer> busy) {
 		busy.put(d, CYCLIC_BUSY);
 		for (int i=0; i<d.getNumberOfTransitions(); i++) {
 			Transition t = d.transition(i);
 			DFAState target = (DFAState)t.target;
 			int cond = CYCLIC_UNKNOWN;
 			if ( busy.get(target)!=null ) cond = busy.get(target);
 			if ( cond==CYCLIC_BUSY ) return true;
 			if ( cond!=CYCLIC_DONE && _hasCycle(target, busy) ) return true;
 		}
 		busy.put(d, CYCLIC_DONE);
 		return false;
 	}


     /** Return a list of Integer alt numbers for which no lookahead could
      *  be computed or for which no single DFA accept state predicts those
      *  alts.  Must call verify() first before this makes sense.
      */
     public List<Integer> getUnreachableAlts() {
         return unreachableAlts;
     }

 	/** Once this DFA has been built, need to verify that:
 	 *
 	 *  1. it's reduced
 	 *  2. all alts have an accept state
 	 *
 	 *  Elsewhere, in the NFA converter, we need to verify that:
 	 *
 	 *  3. alts i and j have disjoint lookahead if no sem preds
 	 *  4. if sem preds, nondeterministic alts must be sufficiently covered
 	 *
 	 *  This is avoided if analysis bails out for any reason.
 	 */
 	public void verify() {
 		doesStateReachAcceptState(startState);
 	}

     /** figure out if this state eventually reaches an accept state and
      *  modify the instance variable 'reduced' to indicate if we find
      *  at least one state that cannot reach an accept state.  This implies
      *  that the overall DFA is not reduced.  This algorithm should be
      *  linear in the number of DFA states.
      *
      *  The algorithm also tracks which alternatives have no accept state,
      *  indicating a nondeterminism.
 	 *
 	 *  Also computes whether the DFA is cyclic.
 	 *
      *  TODO: I call getUniquelyPredicatedAlt too much; cache predicted alt
      */
     protected boolean doesStateReachAcceptState(DFAState d) {
 		if ( d.isAcceptState() ) {
             // accept states have no edges emanating from them so we can return
             d.setAcceptStateReachable(REACHABLE_YES);
             // this alt is uniquely predicted, remove from nondeterministic list
             int predicts = d.getUniquelyPredictedAlt();
             unreachableAlts.remove(Utils.integer(predicts));
             return true;
         }

         // avoid infinite loops
         d.setAcceptStateReachable(REACHABLE_BUSY);

         boolean anEdgeReachesAcceptState = false;
         // Visit every transition, track if at least one edge reaches stop state
 		// Cannot terminate when we know this state reaches stop state since
 		// all transitions must be traversed to set status of each DFA state.
 		for (int i=0; i<d.getNumberOfTransitions(); i++) {
             Transition t = d.transition(i);
             DFAState edgeTarget = (DFAState)t.target;
             int targetStatus = edgeTarget.getAcceptStateReachable();
             if ( targetStatus==REACHABLE_BUSY ) { // avoid cycles; they say nothing
                 cyclic = true;
                 continue;
             }
             if ( targetStatus==REACHABLE_YES ) { // avoid unnecessary work
                 anEdgeReachesAcceptState = true;
                 continue;
             }
             if ( targetStatus==REACHABLE_NO ) {  // avoid unnecessary work
                 continue;
             }
 			// target must be REACHABLE_UNKNOWN (i.e., unvisited)
             if ( doesStateReachAcceptState(edgeTarget) ) {
                 anEdgeReachesAcceptState = true;
                 // have to keep looking so don't break loop
                 // must cover all states even if we find a path for this state
             }
         }
         if ( anEdgeReachesAcceptState ) {
             d.setAcceptStateReachable(REACHABLE_YES);
         }
         else {
             d.setAcceptStateReachable(REACHABLE_NO);
 			reduced = false;
         }
         return anEdgeReachesAcceptState;
     }

 	/** Walk all accept states and find the manually-specified synpreds.
 	 *  Gated preds are not always hoisted
 	 *  I used to do this in the code generator, but that is too late.
 	 *  This converter tries to avoid computing DFA for decisions in
 	 *  syntactic predicates that are not ever used such as those
 	 *  created by autobacktrack mode.
 	 */
 	public void findAllGatedSynPredsUsedInDFAAcceptStates() {
 		int nAlts = getNumberOfAlts();
 		for (int i=1; i<=nAlts; i++) {
 			DFAState a = getAcceptState(i);
 			//System.out.println("alt "+i+": "+a);
 			if ( a!=null ) {
 				Set synpreds = a.getGatedSyntacticPredicatesInNFAConfigurations();
 				if ( synpreds!=null ) {
 					// add all the predicates we find (should be just one, right?)
 					for (Iterator it = synpreds.iterator(); it.hasNext();) {
 						SemanticContext semctx = (SemanticContext) it.next();
 						// System.out.println("synpreds: "+semctx);
 						nfa.grammar.synPredUsedInDFA(this, semctx);
 					}
 				}
 			}
 		}
 	}

 	public NFAState getNFADecisionStartState() {
         return decisionNFAStartState;
     }

 	public DFAState getAcceptState(int alt) {
 		return altToAcceptState[alt];
 	}

 	public void setAcceptState(int alt, DFAState acceptState) {
 		altToAcceptState[alt] = acceptState;
 	}

 	public String getDescription() {
 		return description;
 	}

 	public int getDecisionNumber() {
         return decisionNFAStartState.getDecisionNumber();
     }

 	/** If this DFA failed to finish during construction, we might be
 	 *  able to retry with k=1 but we need to know whether it will
 	 *  potentially succeed.  Can only succeed if there is a predicate
 	 *  to resolve the issue.  Don't try if k=1 already as it would
 	 *  cycle forever.  Timeout can retry with k=1 even if no predicate
 	 *  if k!=1.
 	 */
 	public boolean okToRetryDFAWithK1() {
 		boolean nonLLStarOrOverflowAndPredicateVisible =
 			(probe.isNonLLStarDecision()||probe.analysisOverflowed()) &&
 		    predicateVisible; // auto backtrack or manual sem/syn
 		return getUserMaxLookahead()!=1 &&
 			 nonLLStarOrOverflowAndPredicateVisible;
 	}

 	public String getReasonForFailure() {
 		StringBuffer buf = new StringBuffer();
 		if ( probe.isNonLLStarDecision() ) {
 			buf.append("non-LL(*)");
 			if ( predicateVisible ) {
 				buf.append(" && predicate visible");
 			}
 		}
 		if ( probe.analysisOverflowed() ) {
 			buf.append("recursion overflow");
 			if ( predicateVisible ) {
 				buf.append(" && predicate visible");
 			}
 		}
 		buf.append("\n");
 		return buf.toString();
 	}

 	/** What GrammarAST node (derived from the grammar) is this DFA
      *  associated with?  It will point to the start of a block or
      *  the loop back of a (...)+ block etc...
      */
     public GrammarAST getDecisionASTNode() {
         return decisionNFAStartState.associatedASTNode;
     }

     public boolean isGreedy() {
 		GrammarAST blockAST = nfa.grammar.getDecisionBlockAST(decisionNumber);
 		Object v = nfa.grammar.getBlockOption(blockAST,"greedy");
 		if ( v!=null && v.equals("false") ) {
 			return false;
 		}
         return true;

 	}

     public DFAState newState() {
         DFAState n = new DFAState(this);
         n.stateNumber = stateCounter;
         stateCounter++;
 		states.setSize(n.stateNumber+1);
 		states.set(n.stateNumber, n); // track state num to state
         return n;
     }

 	public int getNumberOfStates() {
 		if ( getUserMaxLookahead()>0 ) {
 			// if using fixed lookahead then uniqueSets not set
 			return states.size();
 		}
 		return numberOfStates;
 	}

 	public int getNumberOfAlts() {
 		return nAlts;
 	}

 //	public boolean analysisTimedOut() {
 //		return probe.analysisTimedOut();
 //	}

     protected void initAltRelatedInfo() {
         unreachableAlts = new LinkedList();
         for (int i = 1; i <= nAlts; i++) {
             unreachableAlts.add(Utils.integer(i));
         }
 		altToAcceptState = new DFAState[nAlts+1];
     }

 	public String toString() {
 		FASerializer serializer = new FASerializer(nfa.grammar);
 		if ( startState==null ) {
 			return "";
 		}
 		return serializer.serialize(startState, false);
 	}

 	/** EOT (end of token) is a label that indicates when the DFA conversion
 	 *  algorithm would "fall off the end of a lexer rule".  It normally
 	 *  means the default clause.  So for ('a'..'z')+ you would see a DFA
 	 *  with a state that has a..z and EOT emanating from it.  a..z would
 	 *  jump to a state predicting alt 1 and EOT would jump to a state
 	 *  predicting alt 2 (the exit loop branch).  EOT implies anything other
 	 *  than a..z.  If for some reason, the set is "all char" such as with
 	 *  the wildcard '.', then EOT cannot match anything.  For example,
 	 *
 	 *     BLOCK : '{' (.)* '}'
 	 *
 	 *  consumes all char until EOF when greedy=true.  When all edges are
 	 *  combined for the DFA state after matching '}', you will find that
 	 *  it is all char.  The EOT transition has nothing to match and is
 	 *  unreachable.  The findNewDFAStatesAndAddDFATransitions() method
 	 *  must know to ignore the EOT, so we simply remove it from the
 	 *  reachable labels.  Later analysis will find that the exit branch
 	 *  is not predicted by anything.  For greedy=false, we leave only
 	 *  the EOT label indicating that the DFA should stop immediately
 	 *  and predict the exit branch. The reachable labels are often a
 	 *  set of disjoint values like: [<EOT>, 42, {0..41, 43..65534}]
 	 *  due to DFA conversion so must construct a pure set to see if
 	 *  it is same as Label.ALLCHAR.
 	 *
 	 *  Only do this for Lexers.
 	 *
 	 *  If EOT coexists with ALLCHAR:
 	 *  1. If not greedy, modify the labels parameter to be EOT
 	 *  2. If greedy, remove EOT from the labels set
 	protected boolean reachableLabelsEOTCoexistsWithAllChar(OrderedHashSet labels)
 	{
 		Label eot = new Label(Label.EOT);
 		if ( !labels.containsKey(eot) ) {
 			return false;
 		}
 		System.out.println("### contains EOT");
 		boolean containsAllChar = false;
 		IntervalSet completeVocab = new IntervalSet();
 		int n = labels.size();
 		for (int i=0; i<n; i++) {
 			Label rl = (Label)labels.get(i);
 			if ( !rl.equals(eot) ) {
 				completeVocab.addAll(rl.getSet());
 			}
 		}
 		System.out.println("completeVocab="+completeVocab);
 		if ( completeVocab.equals(Label.ALLCHAR) ) {
 			System.out.println("all char");
 			containsAllChar = true;
 		}
 		return containsAllChar;
 	}
 	 */
 }