blob: 896acc720f4cd16de300f26587c67680a4634f9c [file] [log] [blame]
/*
* [The "BSD license"]
* Copyright (c) 2010 Terence Parr
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.analysis;
import org.antlr.codegen.CodeGenerator;
import org.antlr.misc.IntSet;
import org.antlr.misc.IntervalSet;
import org.antlr.misc.Utils;
import org.antlr.runtime.IntStream;
import org.stringtemplate.v4.ST;
import org.antlr.tool.*;
import java.util.*;
/** A DFA (converted from a grammar's NFA).
* DFAs are used as prediction machine for alternative blocks in all kinds
* of recognizers (lexers, parsers, tree walkers).
*/
public class DFA {
public static final int REACHABLE_UNKNOWN = -2;
public static final int REACHABLE_BUSY = -1; // in process of computing
public static final int REACHABLE_NO = 0;
public static final int REACHABLE_YES = 1;
public static final int CYCLIC_UNKNOWN = -2;
public static final int CYCLIC_BUSY = -1; // in process of computing
public static final int CYCLIC_DONE = 0;
/** Prevent explosion of DFA states during conversion. The max number
* of states per alt in a single decision's DFA.
public static final int MAX_STATES_PER_ALT_IN_DFA = 450;
*/
/** Set to 0 to not terminate early (time in ms) */
public static int MAX_TIME_PER_DFA_CREATION = 1*1000;
/** How many edges can each DFA state have before a "special" state
* is created that uses IF expressions instead of a table?
*/
public static int MAX_STATE_TRANSITIONS_FOR_TABLE = 65534;
/** What's the start state for this DFA? */
public DFAState startState;
/** This DFA is being built for which decision? */
public int decisionNumber = 0;
/** From what NFAState did we create the DFA? */
public NFAState decisionNFAStartState;
/** The printable grammar fragment associated with this DFA */
public String description;
/** A set of all uniquely-numbered DFA states. Maps hash of DFAState
* to the actual DFAState object. We use this to detect
* existing DFA states. Map<DFAState,DFAState>. Use Map so
* we can get old state back (Set only allows you to see if it's there).
* Not used during fixed k lookahead as it's a waste to fill it with
* a dup of states array.
*/
protected Map<DFAState, DFAState> uniqueStates = new HashMap<DFAState, DFAState>();
/** Maps the state number to the actual DFAState. Use a Vector as it
* grows automatically when I set the ith element. This contains all
* states, but the states are not unique. s3 might be same as s1 so
* s3 -> s1 in this table. This is how cycles occur. If fixed k,
* then these states will all be unique as states[i] always points
* at state i when no cycles exist.
*
* This is managed in parallel with uniqueStates and simply provides
* a way to go from state number to DFAState rather than via a
* hash lookup.
*/
protected Vector<DFAState> states = new Vector<DFAState>();
/** Unique state numbers per DFA */
protected int stateCounter = 0;
/** count only new states not states that were rejected as already present */
protected int numberOfStates = 0;
/** User specified max fixed lookahead. If 0, nothing specified. -1
* implies we have not looked at the options table yet to set k.
*/
protected int user_k = -1;
/** While building the DFA, track max lookahead depth if not cyclic */
protected int max_k = -1;
/** Is this DFA reduced? I.e., can all states lead to an accept state? */
protected boolean reduced = true;
/** Are there any loops in this DFA?
* Computed by doesStateReachAcceptState()
*/
protected boolean cyclic = false;
/** Track whether this DFA has at least one sem/syn pred encountered
* during a closure operation. This is useful for deciding whether
* to retry a non-LL(*) with k=1. If no pred, it will not work w/o
* a pred so don't bother. It would just give another error message.
*/
public boolean predicateVisible = false;
public boolean hasPredicateBlockedByAction = false;
/** Each alt in an NFA derived from a grammar must have a DFA state that
* predicts it lest the parser not know what to do. Nondeterminisms can
* lead to this situation (assuming no semantic predicates can resolve
* the problem) and when for some reason, I cannot compute the lookahead
* (which might arise from an error in the algorithm or from
* left-recursion etc...). This list starts out with all alts contained
* and then in method doesStateReachAcceptState() I remove the alts I
* know to be uniquely predicted.
*/
protected List<Integer> unreachableAlts;
protected int nAlts = 0;
/** We only want one accept state per predicted alt; track here */
protected DFAState[] altToAcceptState;
/** Track whether an alt discovers recursion for each alt during
* NFA to DFA conversion; >1 alt with recursion implies nonregular.
*/
public IntSet recursiveAltSet = new IntervalSet();
/** Which NFA are we converting (well, which piece of the NFA)? */
public NFA nfa;
protected NFAToDFAConverter nfaConverter;
/** This probe tells you a lot about a decision and is useful even
* when there is no error such as when a syntactic nondeterminism
* is solved via semantic predicates. Perhaps a GUI would want
* the ability to show that.
*/
public DecisionProbe probe = new DecisionProbe(this);
/** Track absolute time of the conversion so we can have a failsafe:
* if it takes too long, then terminate. Assume bugs are in the
* analysis engine.
*/
//protected long conversionStartTime;
/** Map an edge transition table to a unique set number; ordered so
* we can push into the output template as an ordered list of sets
* and then ref them from within the transition[][] table. Like this
* for C# target:
* public static readonly DFA30_transition0 =
* new short[] { 46, 46, -1, 46, 46, -1, -1, -1, -1, -1, -1, -1,...};
* public static readonly DFA30_transition1 =
* new short[] { 21 };
* public static readonly short[][] DFA30_transition = {
* DFA30_transition0,
* DFA30_transition0,
* DFA30_transition1,
* ...
* };
*/
public Map edgeTransitionClassMap = new LinkedHashMap();
/** The unique edge transition class number; every time we see a new
* set of edges emanating from a state, we number it so we can reuse
* if it's every seen again for another state. For Java grammar,
* some of the big edge transition tables are seen about 57 times.
*/
protected int edgeTransitionClass =0;
/* This DFA can be converted to a transition[state][char] table and
* the following tables are filled by createStateTables upon request.
* These are injected into the templates for code generation.
* See March 25, 2006 entry for description:
* http://www.antlr.org/blog/antlr3/codegen.tml
* Often using Vector as can't set ith position in a List and have
* it extend list size; bizarre.
*/
/** List of special DFAState objects */
public List specialStates;
/** List of ST for special states. */
public List specialStateSTs;
public Vector accept;
public Vector eot;
public Vector eof;
public Vector min;
public Vector max;
public Vector special;
public Vector transition;
/** just the Vector<Integer> indicating which unique edge table is at
* position i.
*/
public Vector transitionEdgeTables; // not used by java yet
protected int uniqueCompressedSpecialStateNum = 0;
/** Which generator to use if we're building state tables */
protected CodeGenerator generator = null;
protected DFA() {;}
public DFA(int decisionNumber, NFAState decisionStartState) {
this.decisionNumber = decisionNumber;
this.decisionNFAStartState = decisionStartState;
nfa = decisionStartState.nfa;
nAlts = nfa.grammar.getNumberOfAltsForDecisionNFA(decisionStartState);
//setOptions( nfa.grammar.getDecisionOptions(getDecisionNumber()) );
initAltRelatedInfo();
//long start = System.currentTimeMillis();
nfaConverter = new NFAToDFAConverter(this);
try {
nfaConverter.convert();
// figure out if there are problems with decision
verify();
if ( !probe.isDeterministic() || probe.analysisOverflowed() ) {
probe.issueWarnings();
}
// must be after verify as it computes cyclic, needed by this routine
// should be after warnings because early termination or something
// will not allow the reset to operate properly in some cases.
resetStateNumbersToBeContiguous();
//long stop = System.currentTimeMillis();
//System.out.println("verify cost: "+(int)(stop-start)+" ms");
}
// catch (AnalysisTimeoutException at) {
// probe.reportAnalysisTimeout();
// if ( !okToRetryDFAWithK1() ) {
// probe.issueWarnings();
// }
// }
catch (NonLLStarDecisionException nonLL) {
probe.reportNonLLStarDecision(this);
// >1 alt recurses, k=* and no auto backtrack nor manual sem/syn
if ( !okToRetryDFAWithK1() ) {
probe.issueWarnings();
}
}
}
/** Walk all states and reset their numbers to be a contiguous sequence
* of integers starting from 0. Only cyclic DFA can have unused positions
* in states list. State i might be identical to a previous state j and
* will result in states[i] == states[j]. We don't want to waste a state
* number on this. Useful mostly for code generation in tables.
*
* At the start of this routine, states[i].stateNumber <= i by definition.
* If states[50].stateNumber is 50 then a cycle during conversion may
* try to add state 103, but we find that an identical DFA state, named
* 50, already exists, hence, states[103]==states[50] and both have
* stateNumber 50 as they point at same object. Afterwards, the set
* of state numbers from all states should represent a contiguous range
* from 0..n-1 where n is the number of unique states.
*/
public void resetStateNumbersToBeContiguous() {
if ( getUserMaxLookahead()>0 ) {
// all numbers are unique already; no states are thrown out.
return;
}
// walk list of DFAState objects by state number,
// setting state numbers to 0..n-1
int snum=0;
for (int i = 0; i <= getMaxStateNumber(); i++) {
DFAState s = getState(i);
// some states are unused after creation most commonly due to cycles
// or conflict resolution.
if ( s==null ) {
continue;
}
// state i is mapped to DFAState with state number set to i originally
// so if it's less than i, then we renumbered it already; that
// happens when states have been merged or cycles occurred I think.
// states[50] will point to DFAState with s50 in it but
// states[103] might also point at this same DFAState. Since
// 50 < 103 then it's already been renumbered as it points downwards.
boolean alreadyRenumbered = s.stateNumber<i;
if ( !alreadyRenumbered ) {
// state i is a valid state, reset it's state number
s.stateNumber = snum; // rewrite state numbers to be 0..n-1
snum++;
}
}
if ( snum!=getNumberOfStates() ) {
ErrorManager.internalError("DFA "+decisionNumber+": "+
decisionNFAStartState.getDescription()+" num unique states "+getNumberOfStates()+
"!= num renumbered states "+snum);
}
}
// JAVA-SPECIFIC Accessors!!!!! It is so impossible to get arrays
// or even consistently formatted strings acceptable to java that
// I am forced to build the individual char elements here
public List getJavaCompressedAccept() { return getRunLengthEncoding(accept); }
public List getJavaCompressedEOT() { return getRunLengthEncoding(eot); }
public List getJavaCompressedEOF() { return getRunLengthEncoding(eof); }
public List getJavaCompressedMin() { return getRunLengthEncoding(min); }
public List getJavaCompressedMax() { return getRunLengthEncoding(max); }
public List getJavaCompressedSpecial() { return getRunLengthEncoding(special); }
public List getJavaCompressedTransition() {
if ( transition==null || transition.size()==0 ) {
return null;
}
List encoded = new ArrayList(transition.size());
// walk Vector<Vector<FormattedInteger>> which is the transition[][] table
for (int i = 0; i < transition.size(); i++) {
Vector transitionsForState = (Vector) transition.elementAt(i);
encoded.add(getRunLengthEncoding(transitionsForState));
}
return encoded;
}
/** Compress the incoming data list so that runs of same number are
* encoded as number,value pair sequences. 3 -1 -1 -1 28 is encoded
* as 1 3 3 -1 1 28. I am pretty sure this is the lossless compression
* that GIF files use. Transition tables are heavily compressed by
* this technique. I got the idea from JFlex http://jflex.de/
*
* Return List<String> where each string is either \xyz for 8bit char
* and \uFFFF for 16bit. Hideous and specific to Java, but it is the
* only target bad enough to need it.
*/
public List getRunLengthEncoding(List data) {
if ( data==null || data.size()==0 ) {
// for states with no transitions we want an empty string ""
// to hold its place in the transitions array.
List empty = new ArrayList();
empty.add("");
return empty;
}
int size = Math.max(2,data.size()/2);
List encoded = new ArrayList(size); // guess at size
// scan values looking for runs
int i = 0;
Integer emptyValue = Utils.integer(-1);
while ( i < data.size() ) {
Integer I = (Integer)data.get(i);
if ( I==null ) {
I = emptyValue;
}
// count how many v there are?
int n = 0;
for (int j = i; j < data.size(); j++) {
Integer v = (Integer)data.get(j);
if ( v==null ) {
v = emptyValue;
}
if ( I.equals(v) ) {
n++;
}
else {
break;
}
}
encoded.add(generator.target.encodeIntAsCharEscape((char)n));
encoded.add(generator.target.encodeIntAsCharEscape((char)I.intValue()));
i+=n;
}
return encoded;
}
public void createStateTables(CodeGenerator generator) {
//System.out.println("createTables:\n"+this);
this.generator = generator;
description = getNFADecisionStartState().getDescription();
description =
generator.target.getTargetStringLiteralFromString(description);
// create all the tables
special = new Vector(this.getNumberOfStates()); // Vector<short>
special.setSize(this.getNumberOfStates());
specialStates = new ArrayList(); // List<DFAState>
specialStateSTs = new ArrayList(); // List<ST>
accept = new Vector(this.getNumberOfStates()); // Vector<int>
accept.setSize(this.getNumberOfStates());
eot = new Vector(this.getNumberOfStates()); // Vector<int>
eot.setSize(this.getNumberOfStates());
eof = new Vector(this.getNumberOfStates()); // Vector<int>
eof.setSize(this.getNumberOfStates());
min = new Vector(this.getNumberOfStates()); // Vector<int>
min.setSize(this.getNumberOfStates());
max = new Vector(this.getNumberOfStates()); // Vector<int>
max.setSize(this.getNumberOfStates());
transition = new Vector(this.getNumberOfStates()); // Vector<Vector<int>>
transition.setSize(this.getNumberOfStates());
transitionEdgeTables = new Vector(this.getNumberOfStates()); // Vector<Vector<int>>
transitionEdgeTables.setSize(this.getNumberOfStates());
// for each state in the DFA, fill relevant tables.
Iterator it = null;
if ( getUserMaxLookahead()>0 ) {
it = states.iterator();
}
else {
it = getUniqueStates().values().iterator();
}
while ( it.hasNext() ) {
DFAState s = (DFAState)it.next();
if ( s==null ) {
// ignore null states; some acylic DFA see this condition
// when inlining DFA (due to lacking of exit branch pruning?)
continue;
}
if ( s.isAcceptState() ) {
// can't compute min,max,special,transition on accepts
accept.set(s.stateNumber,
Utils.integer(s.getUniquelyPredictedAlt()));
}
else {
createMinMaxTables(s);
createTransitionTableEntryForState(s);
createSpecialTable(s);
createEOTAndEOFTables(s);
}
}
// now that we have computed list of specialStates, gen code for 'em
for (int i = 0; i < specialStates.size(); i++) {
DFAState ss = (DFAState) specialStates.get(i);
ST stateST =
generator.generateSpecialState(ss);
specialStateSTs.add(stateST);
}
// check that the tables are not messed up by encode/decode
/*
testEncodeDecode(min);
testEncodeDecode(max);
testEncodeDecode(accept);
testEncodeDecode(special);
System.out.println("min="+min);
System.out.println("max="+max);
System.out.println("eot="+eot);
System.out.println("eof="+eof);
System.out.println("accept="+accept);
System.out.println("special="+special);
System.out.println("transition="+transition);
*/
}
/*
private void testEncodeDecode(List data) {
System.out.println("data="+data);
List encoded = getRunLengthEncoding(data);
StringBuffer buf = new StringBuffer();
for (int i = 0; i < encoded.size(); i++) {
String I = (String)encoded.get(i);
int v = 0;
if ( I.startsWith("\\u") ) {
v = Integer.parseInt(I.substring(2,I.length()), 16);
}
else {
v = Integer.parseInt(I.substring(1,I.length()), 8);
}
buf.append((char)v);
}
String encodedS = buf.toString();
short[] decoded = org.antlr.runtime.DFA.unpackEncodedString(encodedS);
//System.out.println("decoded:");
for (int i = 0; i < decoded.length; i++) {
short x = decoded[i];
if ( x!=((Integer)data.get(i)).intValue() ) {
System.err.println("problem with encoding");
}
//System.out.print(", "+x);
}
//System.out.println();
}
*/
protected void createMinMaxTables(DFAState s) {
int smin = Label.MAX_CHAR_VALUE + 1;
int smax = Label.MIN_ATOM_VALUE - 1;
for (int j = 0; j < s.getNumberOfTransitions(); j++) {
Transition edge = (Transition) s.transition(j);
Label label = edge.label;
if ( label.isAtom() ) {
if ( label.getAtom()>=Label.MIN_CHAR_VALUE ) {
if ( label.getAtom()<smin ) {
smin = label.getAtom();
}
if ( label.getAtom()>smax ) {
smax = label.getAtom();
}
}
}
else if ( label.isSet() ) {
IntervalSet labels = (IntervalSet)label.getSet();
int lmin = labels.getMinElement();
// if valid char (don't do EOF) and less than current min
if ( lmin<smin && lmin>=Label.MIN_CHAR_VALUE ) {
smin = labels.getMinElement();
}
if ( labels.getMaxElement()>smax ) {
smax = labels.getMaxElement();
}
}
}
if ( smax<0 ) {
// must be predicates or pure EOT transition; just zero out min, max
smin = Label.MIN_CHAR_VALUE;
smax = Label.MIN_CHAR_VALUE;
}
min.set(s.stateNumber, Utils.integer((char)smin));
max.set(s.stateNumber, Utils.integer((char)smax));
if ( smax<0 || smin>Label.MAX_CHAR_VALUE || smin<0 ) {
ErrorManager.internalError("messed up: min="+min+", max="+max);
}
}
protected void createTransitionTableEntryForState(DFAState s) {
/*
System.out.println("createTransitionTableEntryForState s"+s.stateNumber+
" dec "+s.dfa.decisionNumber+" cyclic="+s.dfa.isCyclic());
*/
int smax = ((Integer)max.get(s.stateNumber)).intValue();
int smin = ((Integer)min.get(s.stateNumber)).intValue();
Vector stateTransitions = new Vector(smax-smin+1);
stateTransitions.setSize(smax-smin+1);
transition.set(s.stateNumber, stateTransitions);
for (int j = 0; j < s.getNumberOfTransitions(); j++) {
Transition edge = (Transition) s.transition(j);
Label label = edge.label;
if ( label.isAtom() && label.getAtom()>=Label.MIN_CHAR_VALUE ) {
int labelIndex = label.getAtom()-smin; // offset from 0
stateTransitions.set(labelIndex,
Utils.integer(edge.target.stateNumber));
}
else if ( label.isSet() ) {
IntervalSet labels = (IntervalSet)label.getSet();
int[] atoms = labels.toArray();
for (int a = 0; a < atoms.length; a++) {
// set the transition if the label is valid (don't do EOF)
if ( atoms[a]>=Label.MIN_CHAR_VALUE ) {
int labelIndex = atoms[a]-smin; // offset from 0
stateTransitions.set(labelIndex,
Utils.integer(edge.target.stateNumber));
}
}
}
}
// track unique state transition tables so we can reuse
Integer edgeClass = (Integer)edgeTransitionClassMap.get(stateTransitions);
if ( edgeClass!=null ) {
//System.out.println("we've seen this array before; size="+stateTransitions.size());
transitionEdgeTables.set(s.stateNumber, edgeClass);
}
else {
edgeClass = Utils.integer(edgeTransitionClass);
transitionEdgeTables.set(s.stateNumber, edgeClass);
edgeTransitionClassMap.put(stateTransitions, edgeClass);
edgeTransitionClass++;
}
}
/** Set up the EOT and EOF tables; we cannot put -1 min/max values so
* we need another way to test that in the DFA transition function.
*/
protected void createEOTAndEOFTables(DFAState s) {
for (int j = 0; j < s.getNumberOfTransitions(); j++) {
Transition edge = (Transition) s.transition(j);
Label label = edge.label;
if ( label.isAtom() ) {
if ( label.getAtom()==Label.EOT ) {
// eot[s] points to accept state
eot.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
}
else if ( label.getAtom()==Label.EOF ) {
// eof[s] points to accept state
eof.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
}
}
else if ( label.isSet() ) {
IntervalSet labels = (IntervalSet)label.getSet();
int[] atoms = labels.toArray();
for (int a = 0; a < atoms.length; a++) {
if ( atoms[a]==Label.EOT ) {
// eot[s] points to accept state
eot.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
}
else if ( atoms[a]==Label.EOF ) {
eof.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
}
}
}
}
}
protected void createSpecialTable(DFAState s) {
// number all special states from 0...n-1 instead of their usual numbers
boolean hasSemPred = false;
// TODO this code is very similar to canGenerateSwitch. Refactor to share
for (int j = 0; j < s.getNumberOfTransitions(); j++) {
Transition edge = (Transition) s.transition(j);
Label label = edge.label;
// can't do a switch if the edges have preds or are going to
// require gated predicates
if ( label.isSemanticPredicate() ||
((DFAState)edge.target).getGatedPredicatesInNFAConfigurations()!=null)
{
hasSemPred = true;
break;
}
}
// if has pred or too big for table, make it special
int smax = ((Integer)max.get(s.stateNumber)).intValue();
int smin = ((Integer)min.get(s.stateNumber)).intValue();
if ( hasSemPred || smax-smin>MAX_STATE_TRANSITIONS_FOR_TABLE ) {
special.set(s.stateNumber,
Utils.integer(uniqueCompressedSpecialStateNum));
uniqueCompressedSpecialStateNum++;
specialStates.add(s);
}
else {
special.set(s.stateNumber, Utils.integer(-1)); // not special
}
}
public int predict(IntStream input) {
Interpreter interp = new Interpreter(nfa.grammar, input);
return interp.predict(this);
}
/** Add a new DFA state to this DFA if not already present.
* To force an acyclic, fixed maximum depth DFA, just always
* return the incoming state. By not reusing old states,
* no cycles can be created. If we're doing fixed k lookahead
* don't updated uniqueStates, just return incoming state, which
* indicates it's a new state.
*/
protected DFAState addState(DFAState d) {
if ( getUserMaxLookahead()>0 ) {
return d;
}
// does a DFA state exist already with everything the same
// except its state number?
DFAState existing = (DFAState)uniqueStates.get(d);
if ( existing != null ) {
/*
System.out.println("state "+d.stateNumber+" exists as state "+
existing.stateNumber);
*/
// already there...get the existing DFA state
return existing;
}
// if not there, then add new state.
uniqueStates.put(d,d);
numberOfStates++;
return d;
}
public void removeState(DFAState d) {
DFAState it = (DFAState)uniqueStates.remove(d);
if ( it!=null ) {
numberOfStates--;
}
}
public Map<DFAState, DFAState> getUniqueStates() {
return uniqueStates;
}
/** What is the max state number ever created? This may be beyond
* getNumberOfStates().
*/
public int getMaxStateNumber() {
return states.size()-1;
}
public DFAState getState(int stateNumber) {
return (DFAState)states.get(stateNumber);
}
public void setState(int stateNumber, DFAState d) {
states.set(stateNumber, d);
}
/** Is the DFA reduced? I.e., does every state have a path to an accept
* state? If not, don't delete as we need to generate an error indicating
* which paths are "dead ends". Also tracks list of alts with no accept
* state in the DFA. Must call verify() first before this makes sense.
*/
public boolean isReduced() {
return reduced;
}
/** Is this DFA cyclic? That is, are there any loops? If not, then
* the DFA is essentially an LL(k) predictor for some fixed, max k value.
* We can build a series of nested IF statements to match this. In the
* presence of cycles, we need to build a general DFA and interpret it
* to distinguish between alternatives.
*/
public boolean isCyclic() {
return cyclic && getUserMaxLookahead()==0;
}
public boolean isClassicDFA() {
return !isCyclic() &&
!nfa.grammar.decisionsWhoseDFAsUsesSemPreds.contains(this) &&
!nfa.grammar.decisionsWhoseDFAsUsesSynPreds.contains(this);
}
public boolean canInlineDecision() {
return !isCyclic() &&
!probe.isNonLLStarDecision() &&
getNumberOfStates() < CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE;
}
/** Is this DFA derived from the NFA for the Tokens rule? */
public boolean isTokensRuleDecision() {
if ( nfa.grammar.type!=Grammar.LEXER ) {
return false;
}
NFAState nfaStart = getNFADecisionStartState();
Rule r = nfa.grammar.getLocallyDefinedRule(Grammar.ARTIFICIAL_TOKENS_RULENAME);
NFAState TokensRuleStart = r.startState;
NFAState TokensDecisionStart =
(NFAState)TokensRuleStart.transition[0].target;
return nfaStart == TokensDecisionStart;
}
/** The user may specify a max, acyclic lookahead for any decision. No
* DFA cycles are created when this value, k, is greater than 0.
* If this decision has no k lookahead specified, then try the grammar.
*/
public int getUserMaxLookahead() {
if ( user_k>=0 ) { // cache for speed
return user_k;
}
user_k = nfa.grammar.getUserMaxLookahead(decisionNumber);
return user_k;
}
public boolean getAutoBacktrackMode() {
return nfa.grammar.getAutoBacktrackMode(decisionNumber);
}
public void setUserMaxLookahead(int k) {
this.user_k = k;
}
/** Return k if decision is LL(k) for some k else return max int
*/
public int getMaxLookaheadDepth() {
if ( hasCycle() ) return Integer.MAX_VALUE;
// compute to be sure
return _getMaxLookaheadDepth(startState, 0);
}
int _getMaxLookaheadDepth(DFAState d, int depth) {
// not cyclic; don't worry about termination
// fail if pred edge.
int max = depth;
for (int i=0; i<d.getNumberOfTransitions(); i++) {
Transition t = d.transition(i);
// if ( t.isSemanticPredicate() ) return Integer.MAX_VALUE;
if ( !t.isSemanticPredicate() ) {
// if pure pred not gated, it must target stop state; don't count
DFAState edgeTarget = (DFAState)t.target;
int m = _getMaxLookaheadDepth(edgeTarget, depth+1);
max = Math.max(max, m);
}
}
return max;
}
/** Count all disambiguating syn preds (ignore synpred tests
* for gated edges, which occur for nonambig input sequences).
* E.g.,
* x : (X)=> (X|Y)\n" +
* | X\n" +
* ;
*
* gives
*
* .s0-X->.s1
* .s0-Y&&{synpred1_t}?->:s2=>1
* .s1-{synpred1_t}?->:s2=>1
* .s1-{true}?->:s3=>2
*/
public boolean hasSynPred() {
boolean has = _hasSynPred(startState, new HashSet<DFAState>());
// if ( !has ) {
// System.out.println("no synpred in dec "+decisionNumber);
// FASerializer serializer = new FASerializer(nfa.grammar);
// String result = serializer.serialize(startState);
// System.out.println(result);
// }
return has;
}
public boolean getHasSynPred() { return hasSynPred(); } // for ST
boolean _hasSynPred(DFAState d, Set<DFAState> busy) {
busy.add(d);
for (int i=0; i<d.getNumberOfTransitions(); i++) {
Transition t = d.transition(i);
if ( t.isSemanticPredicate() ) {
SemanticContext ctx = t.label.getSemanticContext();
// if ( ctx.toString().indexOf("synpred")>=0 ) {
// System.out.println("has pred "+ctx.toString()+" "+ctx.isSyntacticPredicate());
// System.out.println(((SemanticContext.Predicate)ctx).predicateAST.token);
// }
if ( ctx.isSyntacticPredicate() ) return true;
}
DFAState edgeTarget = (DFAState)t.target;
if ( !busy.contains(edgeTarget) && _hasSynPred(edgeTarget, busy) ) return true;
}
return false;
}
public boolean hasSemPred() { // has user-defined sempred
boolean has = _hasSemPred(startState, new HashSet<DFAState>());
return has;
}
boolean _hasSemPred(DFAState d, Set<DFAState> busy) {
busy.add(d);
for (int i=0; i<d.getNumberOfTransitions(); i++) {
Transition t = d.transition(i);
if ( t.isSemanticPredicate() ) {
SemanticContext ctx = t.label.getSemanticContext();
if ( ctx.hasUserSemanticPredicate() ) return true;
}
DFAState edgeTarget = (DFAState)t.target;
if ( !busy.contains(edgeTarget) && _hasSemPred(edgeTarget, busy) ) return true;
}
return false;
}
/** Compute cyclic w/o relying on state computed during analysis. just check. */
public boolean hasCycle() {
boolean cyclic = _hasCycle(startState, new HashMap<DFAState, Integer>());
return cyclic;
}
boolean _hasCycle(DFAState d, Map<DFAState, Integer> busy) {
busy.put(d, CYCLIC_BUSY);
for (int i=0; i<d.getNumberOfTransitions(); i++) {
Transition t = d.transition(i);
DFAState target = (DFAState)t.target;
int cond = CYCLIC_UNKNOWN;
if ( busy.get(target)!=null ) cond = busy.get(target);
if ( cond==CYCLIC_BUSY ) return true;
if ( cond!=CYCLIC_DONE && _hasCycle(target, busy) ) return true;
}
busy.put(d, CYCLIC_DONE);
return false;
}
/** Return a list of Integer alt numbers for which no lookahead could
* be computed or for which no single DFA accept state predicts those
* alts. Must call verify() first before this makes sense.
*/
public List<Integer> getUnreachableAlts() {
return unreachableAlts;
}
/** Once this DFA has been built, need to verify that:
*
* 1. it's reduced
* 2. all alts have an accept state
*
* Elsewhere, in the NFA converter, we need to verify that:
*
* 3. alts i and j have disjoint lookahead if no sem preds
* 4. if sem preds, nondeterministic alts must be sufficiently covered
*
* This is avoided if analysis bails out for any reason.
*/
public void verify() {
doesStateReachAcceptState(startState);
}
/** figure out if this state eventually reaches an accept state and
* modify the instance variable 'reduced' to indicate if we find
* at least one state that cannot reach an accept state. This implies
* that the overall DFA is not reduced. This algorithm should be
* linear in the number of DFA states.
*
* The algorithm also tracks which alternatives have no accept state,
* indicating a nondeterminism.
*
* Also computes whether the DFA is cyclic.
*
* TODO: I call getUniquelyPredicatedAlt too much; cache predicted alt
*/
protected boolean doesStateReachAcceptState(DFAState d) {
if ( d.isAcceptState() ) {
// accept states have no edges emanating from them so we can return
d.setAcceptStateReachable(REACHABLE_YES);
// this alt is uniquely predicted, remove from nondeterministic list
int predicts = d.getUniquelyPredictedAlt();
unreachableAlts.remove(Utils.integer(predicts));
return true;
}
// avoid infinite loops
d.setAcceptStateReachable(REACHABLE_BUSY);
boolean anEdgeReachesAcceptState = false;
// Visit every transition, track if at least one edge reaches stop state
// Cannot terminate when we know this state reaches stop state since
// all transitions must be traversed to set status of each DFA state.
for (int i=0; i<d.getNumberOfTransitions(); i++) {
Transition t = d.transition(i);
DFAState edgeTarget = (DFAState)t.target;
int targetStatus = edgeTarget.getAcceptStateReachable();
if ( targetStatus==REACHABLE_BUSY ) { // avoid cycles; they say nothing
cyclic = true;
continue;
}
if ( targetStatus==REACHABLE_YES ) { // avoid unnecessary work
anEdgeReachesAcceptState = true;
continue;
}
if ( targetStatus==REACHABLE_NO ) { // avoid unnecessary work
continue;
}
// target must be REACHABLE_UNKNOWN (i.e., unvisited)
if ( doesStateReachAcceptState(edgeTarget) ) {
anEdgeReachesAcceptState = true;
// have to keep looking so don't break loop
// must cover all states even if we find a path for this state
}
}
if ( anEdgeReachesAcceptState ) {
d.setAcceptStateReachable(REACHABLE_YES);
}
else {
d.setAcceptStateReachable(REACHABLE_NO);
reduced = false;
}
return anEdgeReachesAcceptState;
}
/** Walk all accept states and find the manually-specified synpreds.
* Gated preds are not always hoisted
* I used to do this in the code generator, but that is too late.
* This converter tries to avoid computing DFA for decisions in
* syntactic predicates that are not ever used such as those
* created by autobacktrack mode.
*/
public void findAllGatedSynPredsUsedInDFAAcceptStates() {
int nAlts = getNumberOfAlts();
for (int i=1; i<=nAlts; i++) {
DFAState a = getAcceptState(i);
//System.out.println("alt "+i+": "+a);
if ( a!=null ) {
Set synpreds = a.getGatedSyntacticPredicatesInNFAConfigurations();
if ( synpreds!=null ) {
// add all the predicates we find (should be just one, right?)
for (Iterator it = synpreds.iterator(); it.hasNext();) {
SemanticContext semctx = (SemanticContext) it.next();
// System.out.println("synpreds: "+semctx);
nfa.grammar.synPredUsedInDFA(this, semctx);
}
}
}
}
}
public NFAState getNFADecisionStartState() {
return decisionNFAStartState;
}
public DFAState getAcceptState(int alt) {
return altToAcceptState[alt];
}
public void setAcceptState(int alt, DFAState acceptState) {
altToAcceptState[alt] = acceptState;
}
public String getDescription() {
return description;
}
public int getDecisionNumber() {
return decisionNFAStartState.getDecisionNumber();
}
/** If this DFA failed to finish during construction, we might be
* able to retry with k=1 but we need to know whether it will
* potentially succeed. Can only succeed if there is a predicate
* to resolve the issue. Don't try if k=1 already as it would
* cycle forever. Timeout can retry with k=1 even if no predicate
* if k!=1.
*/
public boolean okToRetryDFAWithK1() {
boolean nonLLStarOrOverflowAndPredicateVisible =
(probe.isNonLLStarDecision()||probe.analysisOverflowed()) &&
predicateVisible; // auto backtrack or manual sem/syn
return getUserMaxLookahead()!=1 &&
nonLLStarOrOverflowAndPredicateVisible;
}
public String getReasonForFailure() {
StringBuffer buf = new StringBuffer();
if ( probe.isNonLLStarDecision() ) {
buf.append("non-LL(*)");
if ( predicateVisible ) {
buf.append(" && predicate visible");
}
}
if ( probe.analysisOverflowed() ) {
buf.append("recursion overflow");
if ( predicateVisible ) {
buf.append(" && predicate visible");
}
}
buf.append("\n");
return buf.toString();
}
/** What GrammarAST node (derived from the grammar) is this DFA
* associated with? It will point to the start of a block or
* the loop back of a (...)+ block etc...
*/
public GrammarAST getDecisionASTNode() {
return decisionNFAStartState.associatedASTNode;
}
public boolean isGreedy() {
GrammarAST blockAST = nfa.grammar.getDecisionBlockAST(decisionNumber);
Object v = nfa.grammar.getBlockOption(blockAST,"greedy");
if ( v!=null && v.equals("false") ) {
return false;
}
return true;
}
public DFAState newState() {
DFAState n = new DFAState(this);
n.stateNumber = stateCounter;
stateCounter++;
states.setSize(n.stateNumber+1);
states.set(n.stateNumber, n); // track state num to state
return n;
}
public int getNumberOfStates() {
if ( getUserMaxLookahead()>0 ) {
// if using fixed lookahead then uniqueSets not set
return states.size();
}
return numberOfStates;
}
public int getNumberOfAlts() {
return nAlts;
}
// public boolean analysisTimedOut() {
// return probe.analysisTimedOut();
// }
protected void initAltRelatedInfo() {
unreachableAlts = new LinkedList();
for (int i = 1; i <= nAlts; i++) {
unreachableAlts.add(Utils.integer(i));
}
altToAcceptState = new DFAState[nAlts+1];
}
public String toString() {
FASerializer serializer = new FASerializer(nfa.grammar);
if ( startState==null ) {
return "";
}
return serializer.serialize(startState, false);
}
/** EOT (end of token) is a label that indicates when the DFA conversion
* algorithm would "fall off the end of a lexer rule". It normally
* means the default clause. So for ('a'..'z')+ you would see a DFA
* with a state that has a..z and EOT emanating from it. a..z would
* jump to a state predicting alt 1 and EOT would jump to a state
* predicting alt 2 (the exit loop branch). EOT implies anything other
* than a..z. If for some reason, the set is "all char" such as with
* the wildcard '.', then EOT cannot match anything. For example,
*
* BLOCK : '{' (.)* '}'
*
* consumes all char until EOF when greedy=true. When all edges are
* combined for the DFA state after matching '}', you will find that
* it is all char. The EOT transition has nothing to match and is
* unreachable. The findNewDFAStatesAndAddDFATransitions() method
* must know to ignore the EOT, so we simply remove it from the
* reachable labels. Later analysis will find that the exit branch
* is not predicted by anything. For greedy=false, we leave only
* the EOT label indicating that the DFA should stop immediately
* and predict the exit branch. The reachable labels are often a
* set of disjoint values like: [<EOT>, 42, {0..41, 43..65534}]
* due to DFA conversion so must construct a pure set to see if
* it is same as Label.ALLCHAR.
*
* Only do this for Lexers.
*
* If EOT coexists with ALLCHAR:
* 1. If not greedy, modify the labels parameter to be EOT
* 2. If greedy, remove EOT from the labels set
protected boolean reachableLabelsEOTCoexistsWithAllChar(OrderedHashSet labels)
{
Label eot = new Label(Label.EOT);
if ( !labels.containsKey(eot) ) {
return false;
}
System.out.println("### contains EOT");
boolean containsAllChar = false;
IntervalSet completeVocab = new IntervalSet();
int n = labels.size();
for (int i=0; i<n; i++) {
Label rl = (Label)labels.get(i);
if ( !rl.equals(eot) ) {
completeVocab.addAll(rl.getSet());
}
}
System.out.println("completeVocab="+completeVocab);
if ( completeVocab.equals(Label.ALLCHAR) ) {
System.out.println("all char");
containsAllChar = true;
}
return containsAllChar;
}
*/
}