| /* |
| * [The "BSD license"] |
| * Copyright (c) 2010 Terence Parr |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * 3. The name of the author may not be used to endorse or promote products |
| * derived from this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| package org.antlr.analysis; |
| |
| import org.antlr.codegen.CodeGenerator; |
| import org.antlr.misc.IntSet; |
| import org.antlr.misc.IntervalSet; |
| import org.antlr.misc.Utils; |
| import org.antlr.runtime.IntStream; |
| import org.stringtemplate.v4.ST; |
| import org.antlr.tool.*; |
| |
| import java.util.*; |
| |
| /** A DFA (converted from a grammar's NFA). |
| * DFAs are used as prediction machine for alternative blocks in all kinds |
| * of recognizers (lexers, parsers, tree walkers). |
| */ |
| public class DFA { |
| public static final int REACHABLE_UNKNOWN = -2; |
| public static final int REACHABLE_BUSY = -1; // in process of computing |
| public static final int REACHABLE_NO = 0; |
| public static final int REACHABLE_YES = 1; |
| |
| public static final int CYCLIC_UNKNOWN = -2; |
| public static final int CYCLIC_BUSY = -1; // in process of computing |
| public static final int CYCLIC_DONE = 0; |
| |
| /** Prevent explosion of DFA states during conversion. The max number |
| * of states per alt in a single decision's DFA. |
| public static final int MAX_STATES_PER_ALT_IN_DFA = 450; |
| */ |
| |
| /** Set to 0 to not terminate early (time in ms) */ |
| public static int MAX_TIME_PER_DFA_CREATION = 1*1000; |
| |
| /** How many edges can each DFA state have before a "special" state |
| * is created that uses IF expressions instead of a table? |
| */ |
| public static int MAX_STATE_TRANSITIONS_FOR_TABLE = 65534; |
| |
| /** What's the start state for this DFA? */ |
| public DFAState startState; |
| |
| /** This DFA is being built for which decision? */ |
| public int decisionNumber = 0; |
| |
| /** From what NFAState did we create the DFA? */ |
| public NFAState decisionNFAStartState; |
| |
| /** The printable grammar fragment associated with this DFA */ |
| public String description; |
| |
| /** A set of all uniquely-numbered DFA states. Maps hash of DFAState |
| * to the actual DFAState object. We use this to detect |
| * existing DFA states. Map<DFAState,DFAState>. Use Map so |
| * we can get old state back (Set only allows you to see if it's there). |
| * Not used during fixed k lookahead as it's a waste to fill it with |
| * a dup of states array. |
| */ |
| protected Map<DFAState, DFAState> uniqueStates = new HashMap<DFAState, DFAState>(); |
| |
| /** Maps the state number to the actual DFAState. Use a Vector as it |
| * grows automatically when I set the ith element. This contains all |
| * states, but the states are not unique. s3 might be same as s1 so |
| * s3 -> s1 in this table. This is how cycles occur. If fixed k, |
| * then these states will all be unique as states[i] always points |
| * at state i when no cycles exist. |
| * |
| * This is managed in parallel with uniqueStates and simply provides |
| * a way to go from state number to DFAState rather than via a |
| * hash lookup. |
| */ |
| protected Vector<DFAState> states = new Vector<DFAState>(); |
| |
| /** Unique state numbers per DFA */ |
| protected int stateCounter = 0; |
| |
| /** count only new states not states that were rejected as already present */ |
| protected int numberOfStates = 0; |
| |
| /** User specified max fixed lookahead. If 0, nothing specified. -1 |
| * implies we have not looked at the options table yet to set k. |
| */ |
| protected int user_k = -1; |
| |
| /** While building the DFA, track max lookahead depth if not cyclic */ |
| protected int max_k = -1; |
| |
| /** Is this DFA reduced? I.e., can all states lead to an accept state? */ |
| protected boolean reduced = true; |
| |
| /** Are there any loops in this DFA? |
| * Computed by doesStateReachAcceptState() |
| */ |
| protected boolean cyclic = false; |
| |
| /** Track whether this DFA has at least one sem/syn pred encountered |
| * during a closure operation. This is useful for deciding whether |
| * to retry a non-LL(*) with k=1. If no pred, it will not work w/o |
| * a pred so don't bother. It would just give another error message. |
| */ |
| public boolean predicateVisible = false; |
| |
| public boolean hasPredicateBlockedByAction = false; |
| |
| /** Each alt in an NFA derived from a grammar must have a DFA state that |
| * predicts it lest the parser not know what to do. Nondeterminisms can |
| * lead to this situation (assuming no semantic predicates can resolve |
| * the problem) and when for some reason, I cannot compute the lookahead |
| * (which might arise from an error in the algorithm or from |
| * left-recursion etc...). This list starts out with all alts contained |
| * and then in method doesStateReachAcceptState() I remove the alts I |
| * know to be uniquely predicted. |
| */ |
| protected List<Integer> unreachableAlts; |
| |
| protected int nAlts = 0; |
| |
| /** We only want one accept state per predicted alt; track here */ |
| protected DFAState[] altToAcceptState; |
| |
| /** Track whether an alt discovers recursion for each alt during |
| * NFA to DFA conversion; >1 alt with recursion implies nonregular. |
| */ |
| public IntSet recursiveAltSet = new IntervalSet(); |
| |
| /** Which NFA are we converting (well, which piece of the NFA)? */ |
| public NFA nfa; |
| |
| protected NFAToDFAConverter nfaConverter; |
| |
| /** This probe tells you a lot about a decision and is useful even |
| * when there is no error such as when a syntactic nondeterminism |
| * is solved via semantic predicates. Perhaps a GUI would want |
| * the ability to show that. |
| */ |
| public DecisionProbe probe = new DecisionProbe(this); |
| |
| /** Track absolute time of the conversion so we can have a failsafe: |
| * if it takes too long, then terminate. Assume bugs are in the |
| * analysis engine. |
| */ |
| //protected long conversionStartTime; |
| |
| /** Map an edge transition table to a unique set number; ordered so |
| * we can push into the output template as an ordered list of sets |
| * and then ref them from within the transition[][] table. Like this |
| * for C# target: |
| * public static readonly DFA30_transition0 = |
| * new short[] { 46, 46, -1, 46, 46, -1, -1, -1, -1, -1, -1, -1,...}; |
| * public static readonly DFA30_transition1 = |
| * new short[] { 21 }; |
| * public static readonly short[][] DFA30_transition = { |
| * DFA30_transition0, |
| * DFA30_transition0, |
| * DFA30_transition1, |
| * ... |
| * }; |
| */ |
| public Map edgeTransitionClassMap = new LinkedHashMap(); |
| |
| /** The unique edge transition class number; every time we see a new |
| * set of edges emanating from a state, we number it so we can reuse |
| * if it's every seen again for another state. For Java grammar, |
| * some of the big edge transition tables are seen about 57 times. |
| */ |
| protected int edgeTransitionClass =0; |
| |
| /* This DFA can be converted to a transition[state][char] table and |
| * the following tables are filled by createStateTables upon request. |
| * These are injected into the templates for code generation. |
| * See March 25, 2006 entry for description: |
| * http://www.antlr.org/blog/antlr3/codegen.tml |
| * Often using Vector as can't set ith position in a List and have |
| * it extend list size; bizarre. |
| */ |
| |
| /** List of special DFAState objects */ |
| public List specialStates; |
| /** List of ST for special states. */ |
| public List specialStateSTs; |
| public Vector accept; |
| public Vector eot; |
| public Vector eof; |
| public Vector min; |
| public Vector max; |
| public Vector special; |
| public Vector transition; |
| /** just the Vector<Integer> indicating which unique edge table is at |
| * position i. |
| */ |
| public Vector transitionEdgeTables; // not used by java yet |
| protected int uniqueCompressedSpecialStateNum = 0; |
| |
| /** Which generator to use if we're building state tables */ |
| protected CodeGenerator generator = null; |
| |
| protected DFA() {;} |
| |
| public DFA(int decisionNumber, NFAState decisionStartState) { |
| this.decisionNumber = decisionNumber; |
| this.decisionNFAStartState = decisionStartState; |
| nfa = decisionStartState.nfa; |
| nAlts = nfa.grammar.getNumberOfAltsForDecisionNFA(decisionStartState); |
| //setOptions( nfa.grammar.getDecisionOptions(getDecisionNumber()) ); |
| initAltRelatedInfo(); |
| |
| //long start = System.currentTimeMillis(); |
| nfaConverter = new NFAToDFAConverter(this); |
| try { |
| nfaConverter.convert(); |
| |
| // figure out if there are problems with decision |
| verify(); |
| |
| if ( !probe.isDeterministic() || probe.analysisOverflowed() ) { |
| probe.issueWarnings(); |
| } |
| |
| // must be after verify as it computes cyclic, needed by this routine |
| // should be after warnings because early termination or something |
| // will not allow the reset to operate properly in some cases. |
| resetStateNumbersToBeContiguous(); |
| |
| //long stop = System.currentTimeMillis(); |
| //System.out.println("verify cost: "+(int)(stop-start)+" ms"); |
| } |
| // catch (AnalysisTimeoutException at) { |
| // probe.reportAnalysisTimeout(); |
| // if ( !okToRetryDFAWithK1() ) { |
| // probe.issueWarnings(); |
| // } |
| // } |
| catch (NonLLStarDecisionException nonLL) { |
| probe.reportNonLLStarDecision(this); |
| // >1 alt recurses, k=* and no auto backtrack nor manual sem/syn |
| if ( !okToRetryDFAWithK1() ) { |
| probe.issueWarnings(); |
| } |
| } |
| } |
| |
| /** Walk all states and reset their numbers to be a contiguous sequence |
| * of integers starting from 0. Only cyclic DFA can have unused positions |
| * in states list. State i might be identical to a previous state j and |
| * will result in states[i] == states[j]. We don't want to waste a state |
| * number on this. Useful mostly for code generation in tables. |
| * |
| * At the start of this routine, states[i].stateNumber <= i by definition. |
| * If states[50].stateNumber is 50 then a cycle during conversion may |
| * try to add state 103, but we find that an identical DFA state, named |
| * 50, already exists, hence, states[103]==states[50] and both have |
| * stateNumber 50 as they point at same object. Afterwards, the set |
| * of state numbers from all states should represent a contiguous range |
| * from 0..n-1 where n is the number of unique states. |
| */ |
| public void resetStateNumbersToBeContiguous() { |
| if ( getUserMaxLookahead()>0 ) { |
| // all numbers are unique already; no states are thrown out. |
| return; |
| } |
| |
| // walk list of DFAState objects by state number, |
| // setting state numbers to 0..n-1 |
| int snum=0; |
| for (int i = 0; i <= getMaxStateNumber(); i++) { |
| DFAState s = getState(i); |
| // some states are unused after creation most commonly due to cycles |
| // or conflict resolution. |
| if ( s==null ) { |
| continue; |
| } |
| // state i is mapped to DFAState with state number set to i originally |
| // so if it's less than i, then we renumbered it already; that |
| // happens when states have been merged or cycles occurred I think. |
| // states[50] will point to DFAState with s50 in it but |
| // states[103] might also point at this same DFAState. Since |
| // 50 < 103 then it's already been renumbered as it points downwards. |
| boolean alreadyRenumbered = s.stateNumber<i; |
| if ( !alreadyRenumbered ) { |
| // state i is a valid state, reset it's state number |
| s.stateNumber = snum; // rewrite state numbers to be 0..n-1 |
| snum++; |
| } |
| } |
| if ( snum!=getNumberOfStates() ) { |
| ErrorManager.internalError("DFA "+decisionNumber+": "+ |
| decisionNFAStartState.getDescription()+" num unique states "+getNumberOfStates()+ |
| "!= num renumbered states "+snum); |
| } |
| } |
| |
| // JAVA-SPECIFIC Accessors!!!!! It is so impossible to get arrays |
| // or even consistently formatted strings acceptable to java that |
| // I am forced to build the individual char elements here |
| |
| public List getJavaCompressedAccept() { return getRunLengthEncoding(accept); } |
| public List getJavaCompressedEOT() { return getRunLengthEncoding(eot); } |
| public List getJavaCompressedEOF() { return getRunLengthEncoding(eof); } |
| public List getJavaCompressedMin() { return getRunLengthEncoding(min); } |
| public List getJavaCompressedMax() { return getRunLengthEncoding(max); } |
| public List getJavaCompressedSpecial() { return getRunLengthEncoding(special); } |
| public List getJavaCompressedTransition() { |
| if ( transition==null || transition.size()==0 ) { |
| return null; |
| } |
| List encoded = new ArrayList(transition.size()); |
| // walk Vector<Vector<FormattedInteger>> which is the transition[][] table |
| for (int i = 0; i < transition.size(); i++) { |
| Vector transitionsForState = (Vector) transition.elementAt(i); |
| encoded.add(getRunLengthEncoding(transitionsForState)); |
| } |
| return encoded; |
| } |
| |
| /** Compress the incoming data list so that runs of same number are |
| * encoded as number,value pair sequences. 3 -1 -1 -1 28 is encoded |
| * as 1 3 3 -1 1 28. I am pretty sure this is the lossless compression |
| * that GIF files use. Transition tables are heavily compressed by |
| * this technique. I got the idea from JFlex http://jflex.de/ |
| * |
| * Return List<String> where each string is either \xyz for 8bit char |
| * and \uFFFF for 16bit. Hideous and specific to Java, but it is the |
| * only target bad enough to need it. |
| */ |
| public List getRunLengthEncoding(List data) { |
| if ( data==null || data.size()==0 ) { |
| // for states with no transitions we want an empty string "" |
| // to hold its place in the transitions array. |
| List empty = new ArrayList(); |
| empty.add(""); |
| return empty; |
| } |
| int size = Math.max(2,data.size()/2); |
| List encoded = new ArrayList(size); // guess at size |
| // scan values looking for runs |
| int i = 0; |
| Integer emptyValue = Utils.integer(-1); |
| while ( i < data.size() ) { |
| Integer I = (Integer)data.get(i); |
| if ( I==null ) { |
| I = emptyValue; |
| } |
| // count how many v there are? |
| int n = 0; |
| for (int j = i; j < data.size(); j++) { |
| Integer v = (Integer)data.get(j); |
| if ( v==null ) { |
| v = emptyValue; |
| } |
| if ( I.equals(v) ) { |
| n++; |
| } |
| else { |
| break; |
| } |
| } |
| encoded.add(generator.target.encodeIntAsCharEscape((char)n)); |
| encoded.add(generator.target.encodeIntAsCharEscape((char)I.intValue())); |
| i+=n; |
| } |
| return encoded; |
| } |
| |
| public void createStateTables(CodeGenerator generator) { |
| //System.out.println("createTables:\n"+this); |
| this.generator = generator; |
| description = getNFADecisionStartState().getDescription(); |
| description = |
| generator.target.getTargetStringLiteralFromString(description); |
| |
| // create all the tables |
| special = new Vector(this.getNumberOfStates()); // Vector<short> |
| special.setSize(this.getNumberOfStates()); |
| specialStates = new ArrayList(); // List<DFAState> |
| specialStateSTs = new ArrayList(); // List<ST> |
| accept = new Vector(this.getNumberOfStates()); // Vector<int> |
| accept.setSize(this.getNumberOfStates()); |
| eot = new Vector(this.getNumberOfStates()); // Vector<int> |
| eot.setSize(this.getNumberOfStates()); |
| eof = new Vector(this.getNumberOfStates()); // Vector<int> |
| eof.setSize(this.getNumberOfStates()); |
| min = new Vector(this.getNumberOfStates()); // Vector<int> |
| min.setSize(this.getNumberOfStates()); |
| max = new Vector(this.getNumberOfStates()); // Vector<int> |
| max.setSize(this.getNumberOfStates()); |
| transition = new Vector(this.getNumberOfStates()); // Vector<Vector<int>> |
| transition.setSize(this.getNumberOfStates()); |
| transitionEdgeTables = new Vector(this.getNumberOfStates()); // Vector<Vector<int>> |
| transitionEdgeTables.setSize(this.getNumberOfStates()); |
| |
| // for each state in the DFA, fill relevant tables. |
| Iterator it = null; |
| if ( getUserMaxLookahead()>0 ) { |
| it = states.iterator(); |
| } |
| else { |
| it = getUniqueStates().values().iterator(); |
| } |
| while ( it.hasNext() ) { |
| DFAState s = (DFAState)it.next(); |
| if ( s==null ) { |
| // ignore null states; some acylic DFA see this condition |
| // when inlining DFA (due to lacking of exit branch pruning?) |
| continue; |
| } |
| if ( s.isAcceptState() ) { |
| // can't compute min,max,special,transition on accepts |
| accept.set(s.stateNumber, |
| Utils.integer(s.getUniquelyPredictedAlt())); |
| } |
| else { |
| createMinMaxTables(s); |
| createTransitionTableEntryForState(s); |
| createSpecialTable(s); |
| createEOTAndEOFTables(s); |
| } |
| } |
| |
| // now that we have computed list of specialStates, gen code for 'em |
| for (int i = 0; i < specialStates.size(); i++) { |
| DFAState ss = (DFAState) specialStates.get(i); |
| ST stateST = |
| generator.generateSpecialState(ss); |
| specialStateSTs.add(stateST); |
| } |
| |
| // check that the tables are not messed up by encode/decode |
| /* |
| testEncodeDecode(min); |
| testEncodeDecode(max); |
| testEncodeDecode(accept); |
| testEncodeDecode(special); |
| System.out.println("min="+min); |
| System.out.println("max="+max); |
| System.out.println("eot="+eot); |
| System.out.println("eof="+eof); |
| System.out.println("accept="+accept); |
| System.out.println("special="+special); |
| System.out.println("transition="+transition); |
| */ |
| } |
| |
| /* |
| private void testEncodeDecode(List data) { |
| System.out.println("data="+data); |
| List encoded = getRunLengthEncoding(data); |
| StringBuffer buf = new StringBuffer(); |
| for (int i = 0; i < encoded.size(); i++) { |
| String I = (String)encoded.get(i); |
| int v = 0; |
| if ( I.startsWith("\\u") ) { |
| v = Integer.parseInt(I.substring(2,I.length()), 16); |
| } |
| else { |
| v = Integer.parseInt(I.substring(1,I.length()), 8); |
| } |
| buf.append((char)v); |
| } |
| String encodedS = buf.toString(); |
| short[] decoded = org.antlr.runtime.DFA.unpackEncodedString(encodedS); |
| //System.out.println("decoded:"); |
| for (int i = 0; i < decoded.length; i++) { |
| short x = decoded[i]; |
| if ( x!=((Integer)data.get(i)).intValue() ) { |
| System.err.println("problem with encoding"); |
| } |
| //System.out.print(", "+x); |
| } |
| //System.out.println(); |
| } |
| */ |
| |
| protected void createMinMaxTables(DFAState s) { |
| int smin = Label.MAX_CHAR_VALUE + 1; |
| int smax = Label.MIN_ATOM_VALUE - 1; |
| for (int j = 0; j < s.getNumberOfTransitions(); j++) { |
| Transition edge = (Transition) s.transition(j); |
| Label label = edge.label; |
| if ( label.isAtom() ) { |
| if ( label.getAtom()>=Label.MIN_CHAR_VALUE ) { |
| if ( label.getAtom()<smin ) { |
| smin = label.getAtom(); |
| } |
| if ( label.getAtom()>smax ) { |
| smax = label.getAtom(); |
| } |
| } |
| } |
| else if ( label.isSet() ) { |
| IntervalSet labels = (IntervalSet)label.getSet(); |
| int lmin = labels.getMinElement(); |
| // if valid char (don't do EOF) and less than current min |
| if ( lmin<smin && lmin>=Label.MIN_CHAR_VALUE ) { |
| smin = labels.getMinElement(); |
| } |
| if ( labels.getMaxElement()>smax ) { |
| smax = labels.getMaxElement(); |
| } |
| } |
| } |
| |
| if ( smax<0 ) { |
| // must be predicates or pure EOT transition; just zero out min, max |
| smin = Label.MIN_CHAR_VALUE; |
| smax = Label.MIN_CHAR_VALUE; |
| } |
| |
| min.set(s.stateNumber, Utils.integer((char)smin)); |
| max.set(s.stateNumber, Utils.integer((char)smax)); |
| |
| if ( smax<0 || smin>Label.MAX_CHAR_VALUE || smin<0 ) { |
| ErrorManager.internalError("messed up: min="+min+", max="+max); |
| } |
| } |
| |
| protected void createTransitionTableEntryForState(DFAState s) { |
| /* |
| System.out.println("createTransitionTableEntryForState s"+s.stateNumber+ |
| " dec "+s.dfa.decisionNumber+" cyclic="+s.dfa.isCyclic()); |
| */ |
| int smax = ((Integer)max.get(s.stateNumber)).intValue(); |
| int smin = ((Integer)min.get(s.stateNumber)).intValue(); |
| |
| Vector stateTransitions = new Vector(smax-smin+1); |
| stateTransitions.setSize(smax-smin+1); |
| transition.set(s.stateNumber, stateTransitions); |
| for (int j = 0; j < s.getNumberOfTransitions(); j++) { |
| Transition edge = (Transition) s.transition(j); |
| Label label = edge.label; |
| if ( label.isAtom() && label.getAtom()>=Label.MIN_CHAR_VALUE ) { |
| int labelIndex = label.getAtom()-smin; // offset from 0 |
| stateTransitions.set(labelIndex, |
| Utils.integer(edge.target.stateNumber)); |
| } |
| else if ( label.isSet() ) { |
| IntervalSet labels = (IntervalSet)label.getSet(); |
| int[] atoms = labels.toArray(); |
| for (int a = 0; a < atoms.length; a++) { |
| // set the transition if the label is valid (don't do EOF) |
| if ( atoms[a]>=Label.MIN_CHAR_VALUE ) { |
| int labelIndex = atoms[a]-smin; // offset from 0 |
| stateTransitions.set(labelIndex, |
| Utils.integer(edge.target.stateNumber)); |
| } |
| } |
| } |
| } |
| // track unique state transition tables so we can reuse |
| Integer edgeClass = (Integer)edgeTransitionClassMap.get(stateTransitions); |
| if ( edgeClass!=null ) { |
| //System.out.println("we've seen this array before; size="+stateTransitions.size()); |
| transitionEdgeTables.set(s.stateNumber, edgeClass); |
| } |
| else { |
| edgeClass = Utils.integer(edgeTransitionClass); |
| transitionEdgeTables.set(s.stateNumber, edgeClass); |
| edgeTransitionClassMap.put(stateTransitions, edgeClass); |
| edgeTransitionClass++; |
| } |
| } |
| |
| /** Set up the EOT and EOF tables; we cannot put -1 min/max values so |
| * we need another way to test that in the DFA transition function. |
| */ |
| protected void createEOTAndEOFTables(DFAState s) { |
| for (int j = 0; j < s.getNumberOfTransitions(); j++) { |
| Transition edge = (Transition) s.transition(j); |
| Label label = edge.label; |
| if ( label.isAtom() ) { |
| if ( label.getAtom()==Label.EOT ) { |
| // eot[s] points to accept state |
| eot.set(s.stateNumber, Utils.integer(edge.target.stateNumber)); |
| } |
| else if ( label.getAtom()==Label.EOF ) { |
| // eof[s] points to accept state |
| eof.set(s.stateNumber, Utils.integer(edge.target.stateNumber)); |
| } |
| } |
| else if ( label.isSet() ) { |
| IntervalSet labels = (IntervalSet)label.getSet(); |
| int[] atoms = labels.toArray(); |
| for (int a = 0; a < atoms.length; a++) { |
| if ( atoms[a]==Label.EOT ) { |
| // eot[s] points to accept state |
| eot.set(s.stateNumber, Utils.integer(edge.target.stateNumber)); |
| } |
| else if ( atoms[a]==Label.EOF ) { |
| eof.set(s.stateNumber, Utils.integer(edge.target.stateNumber)); |
| } |
| } |
| } |
| } |
| } |
| |
| protected void createSpecialTable(DFAState s) { |
| // number all special states from 0...n-1 instead of their usual numbers |
| boolean hasSemPred = false; |
| |
| // TODO this code is very similar to canGenerateSwitch. Refactor to share |
| for (int j = 0; j < s.getNumberOfTransitions(); j++) { |
| Transition edge = (Transition) s.transition(j); |
| Label label = edge.label; |
| // can't do a switch if the edges have preds or are going to |
| // require gated predicates |
| if ( label.isSemanticPredicate() || |
| ((DFAState)edge.target).getGatedPredicatesInNFAConfigurations()!=null) |
| { |
| hasSemPred = true; |
| break; |
| } |
| } |
| // if has pred or too big for table, make it special |
| int smax = ((Integer)max.get(s.stateNumber)).intValue(); |
| int smin = ((Integer)min.get(s.stateNumber)).intValue(); |
| if ( hasSemPred || smax-smin>MAX_STATE_TRANSITIONS_FOR_TABLE ) { |
| special.set(s.stateNumber, |
| Utils.integer(uniqueCompressedSpecialStateNum)); |
| uniqueCompressedSpecialStateNum++; |
| specialStates.add(s); |
| } |
| else { |
| special.set(s.stateNumber, Utils.integer(-1)); // not special |
| } |
| } |
| |
| public int predict(IntStream input) { |
| Interpreter interp = new Interpreter(nfa.grammar, input); |
| return interp.predict(this); |
| } |
| |
| /** Add a new DFA state to this DFA if not already present. |
| * To force an acyclic, fixed maximum depth DFA, just always |
| * return the incoming state. By not reusing old states, |
| * no cycles can be created. If we're doing fixed k lookahead |
| * don't updated uniqueStates, just return incoming state, which |
| * indicates it's a new state. |
| */ |
| protected DFAState addState(DFAState d) { |
| if ( getUserMaxLookahead()>0 ) { |
| return d; |
| } |
| // does a DFA state exist already with everything the same |
| // except its state number? |
| DFAState existing = (DFAState)uniqueStates.get(d); |
| if ( existing != null ) { |
| /* |
| System.out.println("state "+d.stateNumber+" exists as state "+ |
| existing.stateNumber); |
| */ |
| // already there...get the existing DFA state |
| return existing; |
| } |
| |
| // if not there, then add new state. |
| uniqueStates.put(d,d); |
| numberOfStates++; |
| return d; |
| } |
| |
| public void removeState(DFAState d) { |
| DFAState it = (DFAState)uniqueStates.remove(d); |
| if ( it!=null ) { |
| numberOfStates--; |
| } |
| } |
| |
| public Map<DFAState, DFAState> getUniqueStates() { |
| return uniqueStates; |
| } |
| |
| /** What is the max state number ever created? This may be beyond |
| * getNumberOfStates(). |
| */ |
| public int getMaxStateNumber() { |
| return states.size()-1; |
| } |
| |
| public DFAState getState(int stateNumber) { |
| return (DFAState)states.get(stateNumber); |
| } |
| |
| public void setState(int stateNumber, DFAState d) { |
| states.set(stateNumber, d); |
| } |
| |
| /** Is the DFA reduced? I.e., does every state have a path to an accept |
| * state? If not, don't delete as we need to generate an error indicating |
| * which paths are "dead ends". Also tracks list of alts with no accept |
| * state in the DFA. Must call verify() first before this makes sense. |
| */ |
| public boolean isReduced() { |
| return reduced; |
| } |
| |
| /** Is this DFA cyclic? That is, are there any loops? If not, then |
| * the DFA is essentially an LL(k) predictor for some fixed, max k value. |
| * We can build a series of nested IF statements to match this. In the |
| * presence of cycles, we need to build a general DFA and interpret it |
| * to distinguish between alternatives. |
| */ |
| public boolean isCyclic() { |
| return cyclic && getUserMaxLookahead()==0; |
| } |
| |
| public boolean isClassicDFA() { |
| return !isCyclic() && |
| !nfa.grammar.decisionsWhoseDFAsUsesSemPreds.contains(this) && |
| !nfa.grammar.decisionsWhoseDFAsUsesSynPreds.contains(this); |
| } |
| |
| public boolean canInlineDecision() { |
| return !isCyclic() && |
| !probe.isNonLLStarDecision() && |
| getNumberOfStates() < CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE; |
| } |
| |
| /** Is this DFA derived from the NFA for the Tokens rule? */ |
| public boolean isTokensRuleDecision() { |
| if ( nfa.grammar.type!=Grammar.LEXER ) { |
| return false; |
| } |
| NFAState nfaStart = getNFADecisionStartState(); |
| Rule r = nfa.grammar.getLocallyDefinedRule(Grammar.ARTIFICIAL_TOKENS_RULENAME); |
| NFAState TokensRuleStart = r.startState; |
| NFAState TokensDecisionStart = |
| (NFAState)TokensRuleStart.transition[0].target; |
| return nfaStart == TokensDecisionStart; |
| } |
| |
| /** The user may specify a max, acyclic lookahead for any decision. No |
| * DFA cycles are created when this value, k, is greater than 0. |
| * If this decision has no k lookahead specified, then try the grammar. |
| */ |
| public int getUserMaxLookahead() { |
| if ( user_k>=0 ) { // cache for speed |
| return user_k; |
| } |
| user_k = nfa.grammar.getUserMaxLookahead(decisionNumber); |
| return user_k; |
| } |
| |
| public boolean getAutoBacktrackMode() { |
| return nfa.grammar.getAutoBacktrackMode(decisionNumber); |
| } |
| |
| public void setUserMaxLookahead(int k) { |
| this.user_k = k; |
| } |
| |
| /** Return k if decision is LL(k) for some k else return max int |
| */ |
| public int getMaxLookaheadDepth() { |
| if ( hasCycle() ) return Integer.MAX_VALUE; |
| // compute to be sure |
| return _getMaxLookaheadDepth(startState, 0); |
| } |
| |
| int _getMaxLookaheadDepth(DFAState d, int depth) { |
| // not cyclic; don't worry about termination |
| // fail if pred edge. |
| int max = depth; |
| for (int i=0; i<d.getNumberOfTransitions(); i++) { |
| Transition t = d.transition(i); |
| // if ( t.isSemanticPredicate() ) return Integer.MAX_VALUE; |
| if ( !t.isSemanticPredicate() ) { |
| // if pure pred not gated, it must target stop state; don't count |
| DFAState edgeTarget = (DFAState)t.target; |
| int m = _getMaxLookaheadDepth(edgeTarget, depth+1); |
| max = Math.max(max, m); |
| } |
| } |
| return max; |
| } |
| |
| /** Count all disambiguating syn preds (ignore synpred tests |
| * for gated edges, which occur for nonambig input sequences). |
| * E.g., |
| * x : (X)=> (X|Y)\n" + |
| * | X\n" + |
| * ; |
| * |
| * gives |
| * |
| * .s0-X->.s1 |
| * .s0-Y&&{synpred1_t}?->:s2=>1 |
| * .s1-{synpred1_t}?->:s2=>1 |
| * .s1-{true}?->:s3=>2 |
| */ |
| public boolean hasSynPred() { |
| boolean has = _hasSynPred(startState, new HashSet<DFAState>()); |
| // if ( !has ) { |
| // System.out.println("no synpred in dec "+decisionNumber); |
| // FASerializer serializer = new FASerializer(nfa.grammar); |
| // String result = serializer.serialize(startState); |
| // System.out.println(result); |
| // } |
| return has; |
| } |
| |
| public boolean getHasSynPred() { return hasSynPred(); } // for ST |
| |
| boolean _hasSynPred(DFAState d, Set<DFAState> busy) { |
| busy.add(d); |
| for (int i=0; i<d.getNumberOfTransitions(); i++) { |
| Transition t = d.transition(i); |
| if ( t.isSemanticPredicate() ) { |
| SemanticContext ctx = t.label.getSemanticContext(); |
| // if ( ctx.toString().indexOf("synpred")>=0 ) { |
| // System.out.println("has pred "+ctx.toString()+" "+ctx.isSyntacticPredicate()); |
| // System.out.println(((SemanticContext.Predicate)ctx).predicateAST.token); |
| // } |
| if ( ctx.isSyntacticPredicate() ) return true; |
| } |
| DFAState edgeTarget = (DFAState)t.target; |
| if ( !busy.contains(edgeTarget) && _hasSynPred(edgeTarget, busy) ) return true; |
| } |
| |
| return false; |
| } |
| |
| public boolean hasSemPred() { // has user-defined sempred |
| boolean has = _hasSemPred(startState, new HashSet<DFAState>()); |
| return has; |
| } |
| |
| boolean _hasSemPred(DFAState d, Set<DFAState> busy) { |
| busy.add(d); |
| for (int i=0; i<d.getNumberOfTransitions(); i++) { |
| Transition t = d.transition(i); |
| if ( t.isSemanticPredicate() ) { |
| SemanticContext ctx = t.label.getSemanticContext(); |
| if ( ctx.hasUserSemanticPredicate() ) return true; |
| } |
| DFAState edgeTarget = (DFAState)t.target; |
| if ( !busy.contains(edgeTarget) && _hasSemPred(edgeTarget, busy) ) return true; |
| } |
| |
| return false; |
| } |
| |
| /** Compute cyclic w/o relying on state computed during analysis. just check. */ |
| public boolean hasCycle() { |
| boolean cyclic = _hasCycle(startState, new HashMap<DFAState, Integer>()); |
| return cyclic; |
| } |
| |
| boolean _hasCycle(DFAState d, Map<DFAState, Integer> busy) { |
| busy.put(d, CYCLIC_BUSY); |
| for (int i=0; i<d.getNumberOfTransitions(); i++) { |
| Transition t = d.transition(i); |
| DFAState target = (DFAState)t.target; |
| int cond = CYCLIC_UNKNOWN; |
| if ( busy.get(target)!=null ) cond = busy.get(target); |
| if ( cond==CYCLIC_BUSY ) return true; |
| if ( cond!=CYCLIC_DONE && _hasCycle(target, busy) ) return true; |
| } |
| busy.put(d, CYCLIC_DONE); |
| return false; |
| } |
| |
| |
| /** Return a list of Integer alt numbers for which no lookahead could |
| * be computed or for which no single DFA accept state predicts those |
| * alts. Must call verify() first before this makes sense. |
| */ |
| public List<Integer> getUnreachableAlts() { |
| return unreachableAlts; |
| } |
| |
| /** Once this DFA has been built, need to verify that: |
| * |
| * 1. it's reduced |
| * 2. all alts have an accept state |
| * |
| * Elsewhere, in the NFA converter, we need to verify that: |
| * |
| * 3. alts i and j have disjoint lookahead if no sem preds |
| * 4. if sem preds, nondeterministic alts must be sufficiently covered |
| * |
| * This is avoided if analysis bails out for any reason. |
| */ |
| public void verify() { |
| doesStateReachAcceptState(startState); |
| } |
| |
| /** figure out if this state eventually reaches an accept state and |
| * modify the instance variable 'reduced' to indicate if we find |
| * at least one state that cannot reach an accept state. This implies |
| * that the overall DFA is not reduced. This algorithm should be |
| * linear in the number of DFA states. |
| * |
| * The algorithm also tracks which alternatives have no accept state, |
| * indicating a nondeterminism. |
| * |
| * Also computes whether the DFA is cyclic. |
| * |
| * TODO: I call getUniquelyPredicatedAlt too much; cache predicted alt |
| */ |
| protected boolean doesStateReachAcceptState(DFAState d) { |
| if ( d.isAcceptState() ) { |
| // accept states have no edges emanating from them so we can return |
| d.setAcceptStateReachable(REACHABLE_YES); |
| // this alt is uniquely predicted, remove from nondeterministic list |
| int predicts = d.getUniquelyPredictedAlt(); |
| unreachableAlts.remove(Utils.integer(predicts)); |
| return true; |
| } |
| |
| // avoid infinite loops |
| d.setAcceptStateReachable(REACHABLE_BUSY); |
| |
| boolean anEdgeReachesAcceptState = false; |
| // Visit every transition, track if at least one edge reaches stop state |
| // Cannot terminate when we know this state reaches stop state since |
| // all transitions must be traversed to set status of each DFA state. |
| for (int i=0; i<d.getNumberOfTransitions(); i++) { |
| Transition t = d.transition(i); |
| DFAState edgeTarget = (DFAState)t.target; |
| int targetStatus = edgeTarget.getAcceptStateReachable(); |
| if ( targetStatus==REACHABLE_BUSY ) { // avoid cycles; they say nothing |
| cyclic = true; |
| continue; |
| } |
| if ( targetStatus==REACHABLE_YES ) { // avoid unnecessary work |
| anEdgeReachesAcceptState = true; |
| continue; |
| } |
| if ( targetStatus==REACHABLE_NO ) { // avoid unnecessary work |
| continue; |
| } |
| // target must be REACHABLE_UNKNOWN (i.e., unvisited) |
| if ( doesStateReachAcceptState(edgeTarget) ) { |
| anEdgeReachesAcceptState = true; |
| // have to keep looking so don't break loop |
| // must cover all states even if we find a path for this state |
| } |
| } |
| if ( anEdgeReachesAcceptState ) { |
| d.setAcceptStateReachable(REACHABLE_YES); |
| } |
| else { |
| d.setAcceptStateReachable(REACHABLE_NO); |
| reduced = false; |
| } |
| return anEdgeReachesAcceptState; |
| } |
| |
| /** Walk all accept states and find the manually-specified synpreds. |
| * Gated preds are not always hoisted |
| * I used to do this in the code generator, but that is too late. |
| * This converter tries to avoid computing DFA for decisions in |
| * syntactic predicates that are not ever used such as those |
| * created by autobacktrack mode. |
| */ |
| public void findAllGatedSynPredsUsedInDFAAcceptStates() { |
| int nAlts = getNumberOfAlts(); |
| for (int i=1; i<=nAlts; i++) { |
| DFAState a = getAcceptState(i); |
| //System.out.println("alt "+i+": "+a); |
| if ( a!=null ) { |
| Set synpreds = a.getGatedSyntacticPredicatesInNFAConfigurations(); |
| if ( synpreds!=null ) { |
| // add all the predicates we find (should be just one, right?) |
| for (Iterator it = synpreds.iterator(); it.hasNext();) { |
| SemanticContext semctx = (SemanticContext) it.next(); |
| // System.out.println("synpreds: "+semctx); |
| nfa.grammar.synPredUsedInDFA(this, semctx); |
| } |
| } |
| } |
| } |
| } |
| |
| public NFAState getNFADecisionStartState() { |
| return decisionNFAStartState; |
| } |
| |
| public DFAState getAcceptState(int alt) { |
| return altToAcceptState[alt]; |
| } |
| |
| public void setAcceptState(int alt, DFAState acceptState) { |
| altToAcceptState[alt] = acceptState; |
| } |
| |
| public String getDescription() { |
| return description; |
| } |
| |
| public int getDecisionNumber() { |
| return decisionNFAStartState.getDecisionNumber(); |
| } |
| |
| /** If this DFA failed to finish during construction, we might be |
| * able to retry with k=1 but we need to know whether it will |
| * potentially succeed. Can only succeed if there is a predicate |
| * to resolve the issue. Don't try if k=1 already as it would |
| * cycle forever. Timeout can retry with k=1 even if no predicate |
| * if k!=1. |
| */ |
| public boolean okToRetryDFAWithK1() { |
| boolean nonLLStarOrOverflowAndPredicateVisible = |
| (probe.isNonLLStarDecision()||probe.analysisOverflowed()) && |
| predicateVisible; // auto backtrack or manual sem/syn |
| return getUserMaxLookahead()!=1 && |
| nonLLStarOrOverflowAndPredicateVisible; |
| } |
| |
| public String getReasonForFailure() { |
| StringBuffer buf = new StringBuffer(); |
| if ( probe.isNonLLStarDecision() ) { |
| buf.append("non-LL(*)"); |
| if ( predicateVisible ) { |
| buf.append(" && predicate visible"); |
| } |
| } |
| if ( probe.analysisOverflowed() ) { |
| buf.append("recursion overflow"); |
| if ( predicateVisible ) { |
| buf.append(" && predicate visible"); |
| } |
| } |
| buf.append("\n"); |
| return buf.toString(); |
| } |
| |
| /** What GrammarAST node (derived from the grammar) is this DFA |
| * associated with? It will point to the start of a block or |
| * the loop back of a (...)+ block etc... |
| */ |
| public GrammarAST getDecisionASTNode() { |
| return decisionNFAStartState.associatedASTNode; |
| } |
| |
| public boolean isGreedy() { |
| GrammarAST blockAST = nfa.grammar.getDecisionBlockAST(decisionNumber); |
| Object v = nfa.grammar.getBlockOption(blockAST,"greedy"); |
| if ( v!=null && v.equals("false") ) { |
| return false; |
| } |
| return true; |
| |
| } |
| |
| public DFAState newState() { |
| DFAState n = new DFAState(this); |
| n.stateNumber = stateCounter; |
| stateCounter++; |
| states.setSize(n.stateNumber+1); |
| states.set(n.stateNumber, n); // track state num to state |
| return n; |
| } |
| |
| public int getNumberOfStates() { |
| if ( getUserMaxLookahead()>0 ) { |
| // if using fixed lookahead then uniqueSets not set |
| return states.size(); |
| } |
| return numberOfStates; |
| } |
| |
| public int getNumberOfAlts() { |
| return nAlts; |
| } |
| |
| // public boolean analysisTimedOut() { |
| // return probe.analysisTimedOut(); |
| // } |
| |
| protected void initAltRelatedInfo() { |
| unreachableAlts = new LinkedList(); |
| for (int i = 1; i <= nAlts; i++) { |
| unreachableAlts.add(Utils.integer(i)); |
| } |
| altToAcceptState = new DFAState[nAlts+1]; |
| } |
| |
| public String toString() { |
| FASerializer serializer = new FASerializer(nfa.grammar); |
| if ( startState==null ) { |
| return ""; |
| } |
| return serializer.serialize(startState, false); |
| } |
| |
| /** EOT (end of token) is a label that indicates when the DFA conversion |
| * algorithm would "fall off the end of a lexer rule". It normally |
| * means the default clause. So for ('a'..'z')+ you would see a DFA |
| * with a state that has a..z and EOT emanating from it. a..z would |
| * jump to a state predicting alt 1 and EOT would jump to a state |
| * predicting alt 2 (the exit loop branch). EOT implies anything other |
| * than a..z. If for some reason, the set is "all char" such as with |
| * the wildcard '.', then EOT cannot match anything. For example, |
| * |
| * BLOCK : '{' (.)* '}' |
| * |
| * consumes all char until EOF when greedy=true. When all edges are |
| * combined for the DFA state after matching '}', you will find that |
| * it is all char. The EOT transition has nothing to match and is |
| * unreachable. The findNewDFAStatesAndAddDFATransitions() method |
| * must know to ignore the EOT, so we simply remove it from the |
| * reachable labels. Later analysis will find that the exit branch |
| * is not predicted by anything. For greedy=false, we leave only |
| * the EOT label indicating that the DFA should stop immediately |
| * and predict the exit branch. The reachable labels are often a |
| * set of disjoint values like: [<EOT>, 42, {0..41, 43..65534}] |
| * due to DFA conversion so must construct a pure set to see if |
| * it is same as Label.ALLCHAR. |
| * |
| * Only do this for Lexers. |
| * |
| * If EOT coexists with ALLCHAR: |
| * 1. If not greedy, modify the labels parameter to be EOT |
| * 2. If greedy, remove EOT from the labels set |
| protected boolean reachableLabelsEOTCoexistsWithAllChar(OrderedHashSet labels) |
| { |
| Label eot = new Label(Label.EOT); |
| if ( !labels.containsKey(eot) ) { |
| return false; |
| } |
| System.out.println("### contains EOT"); |
| boolean containsAllChar = false; |
| IntervalSet completeVocab = new IntervalSet(); |
| int n = labels.size(); |
| for (int i=0; i<n; i++) { |
| Label rl = (Label)labels.get(i); |
| if ( !rl.equals(eot) ) { |
| completeVocab.addAll(rl.getSet()); |
| } |
| } |
| System.out.println("completeVocab="+completeVocab); |
| if ( completeVocab.equals(Label.ALLCHAR) ) { |
| System.out.println("all char"); |
| containsAllChar = true; |
| } |
| return containsAllChar; |
| } |
| */ |
| } |
| |