blob: 543ab2b6e4b5eb4c01e5da315bab893ac1b3a6a2 [file] [log] [blame]
/*
* [The "BSD license"]
* Copyright (c) 2010 Terence Parr
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.analysis;
import org.antlr.misc.OrderedHashSet;
import org.antlr.misc.Utils;
import org.antlr.runtime.Token;
import org.antlr.tool.ErrorManager;
import java.util.*;
/** Code that embodies the NFA conversion to DFA. A new object is needed
* per DFA (also required for thread safety if multiple conversions
* launched).
*/
public class NFAToDFAConverter {
/** A list of DFA states we still need to process during NFA conversion */
protected List work = new LinkedList();
/** While converting NFA, we must track states that
* reference other rule's NFAs so we know what to do
* at the end of a rule. We need to know what context invoked
* this rule so we can know where to continue looking for NFA
* states. I'm tracking a context tree (record of rule invocation
* stack trace) for each alternative that could be predicted.
*/
protected NFAContext[] contextTrees;
/** We are converting which DFA? */
protected DFA dfa;
public static boolean debug = false;
/** Should ANTLR launch multiple threads to convert NFAs to DFAs?
* With a 2-CPU box, I note that it's about the same single or
* multithreaded. Both CPU meters are going even when single-threaded
* so I assume the GC is killing us. Could be the compiler. When I
* run java -Xint mode, I get about 15% speed improvement with multiple
* threads.
*/
public static boolean SINGLE_THREADED_NFA_CONVERSION = true;
protected boolean computingStartState = false;
public NFAToDFAConverter(DFA dfa) {
this.dfa = dfa;
int nAlts = dfa.getNumberOfAlts();
initContextTrees(nAlts);
}
public void convert() {
//dfa.conversionStartTime = System.currentTimeMillis();
// create the DFA start state
dfa.startState = computeStartState();
// while more DFA states to check, process them
while ( work.size()>0 &&
!dfa.nfa.grammar.NFAToDFAConversionExternallyAborted() )
{
DFAState d = (DFAState) work.get(0);
if ( dfa.nfa.grammar.composite.watchNFAConversion ) {
System.out.println("convert DFA state "+d.stateNumber+
" ("+d.nfaConfigurations.size()+" nfa states)");
}
int k = dfa.getUserMaxLookahead();
if ( k>0 && k==d.getLookaheadDepth() ) {
// we've hit max lookahead, make this a stop state
//System.out.println("stop state @k="+k+" (terminated early)");
/*
List<Label> sampleInputLabels = d.dfa.probe.getSampleNonDeterministicInputSequence(d);
String input = d.dfa.probe.getInputSequenceDisplay(sampleInputLabels);
System.out.println("sample input: "+input);
*/
resolveNonDeterminisms(d);
// Check to see if we need to add any semantic predicate transitions
if ( d.isResolvedWithPredicates() ) {
addPredicateTransitions(d);
}
else {
d.setAcceptState(true); // must convert to accept state at k
}
}
else {
findNewDFAStatesAndAddDFATransitions(d);
}
work.remove(0); // done with it; remove from work list
}
// Find all manual syn preds (gated). These are not discovered
// in tryToResolveWithSemanticPredicates because they are implicitly
// added to every edge by code gen, DOT generation etc...
dfa.findAllGatedSynPredsUsedInDFAAcceptStates();
}
/** From this first NFA state of a decision, create a DFA.
* Walk each alt in decision and compute closure from the start of that
* rule, making sure that the closure does not include other alts within
* that same decision. The idea is to associate a specific alt number
* with the starting closure so we can trace the alt number for all states
* derived from this. At a stop state in the DFA, we can return this alt
* number, indicating which alt is predicted.
*
* If this DFA is derived from an loop back NFA state, then the first
* transition is actually the exit branch of the loop. Rather than make
* this alternative one, let's make this alt n+1 where n is the number of
* alts in this block. This is nice to keep the alts of the block 1..n;
* helps with error messages.
*
* I handle nongreedy in findNewDFAStatesAndAddDFATransitions
* when nongreedy and EOT transition. Make state with EOT emanating
* from it the accept state.
*/
protected DFAState computeStartState() {
NFAState alt = dfa.decisionNFAStartState;
DFAState startState = dfa.newState();
computingStartState = true;
int i = 0;
int altNum = 1;
while ( alt!=null ) {
// find the set of NFA states reachable without consuming
// any input symbols for each alt. Keep adding to same
// overall closure that will represent the DFA start state,
// but track the alt number
NFAContext initialContext = contextTrees[i];
// if first alt is derived from loopback/exit branch of loop,
// make alt=n+1 for n alts instead of 1
if ( i==0 &&
dfa.getNFADecisionStartState().decisionStateType==NFAState.LOOPBACK )
{
int numAltsIncludingExitBranch = dfa.nfa.grammar
.getNumberOfAltsForDecisionNFA(dfa.decisionNFAStartState);
altNum = numAltsIncludingExitBranch;
closure((NFAState)alt.transition[0].target,
altNum,
initialContext,
SemanticContext.EMPTY_SEMANTIC_CONTEXT,
startState,
true
);
altNum = 1; // make next alt the first
}
else {
closure((NFAState)alt.transition[0].target,
altNum,
initialContext,
SemanticContext.EMPTY_SEMANTIC_CONTEXT,
startState,
true
);
altNum++;
}
i++;
// move to next alternative
if ( alt.transition[1] ==null ) {
break;
}
alt = (NFAState)alt.transition[1].target;
}
// now DFA start state has the complete closure for the decision
// but we have tracked which alt is associated with which
// NFA states.
dfa.addState(startState); // make sure dfa knows about this state
work.add(startState);
computingStartState = false;
return startState;
}
/** From this node, add a d--a-->t transition for all
* labels 'a' where t is a DFA node created
* from the set of NFA states reachable from any NFA
* state in DFA state d.
*/
protected void findNewDFAStatesAndAddDFATransitions(DFAState d) {
//System.out.println("work on DFA state "+d);
OrderedHashSet labels = d.getReachableLabels();
//System.out.println("reachable labels="+labels);
/*
System.out.println("|reachable|/|nfaconfigs|="+
labels.size()+"/"+d.getNFAConfigurations().size()+"="+
labels.size()/(float)d.getNFAConfigurations().size());
*/
// normally EOT is the "default" clause and decisions just
// choose that last clause when nothing else matches. DFA conversion
// continues searching for a unique sequence that predicts the
// various alts or until it finds EOT. So this rule
//
// DUH : ('x'|'y')* "xy!";
//
// does not need a greedy indicator. The following rule works fine too
//
// A : ('x')+ ;
//
// When the follow branch could match what is in the loop, by default,
// the nondeterminism is resolved in favor of the loop. You don't
// get a warning because the only way to get this condition is if
// the DFA conversion hits the end of the token. In that case,
// we're not *sure* what will happen next, but it could be anything.
// Anyway, EOT is the default case which means it will never be matched
// as resolution goes to the lowest alt number. Exit branches are
// always alt n+1 for n alts in a block.
//
// When a loop is nongreedy and we find an EOT transition, the DFA
// state should become an accept state, predicting exit of loop. It's
// just reversing the resolution of ambiguity.
// TODO: should this be done in the resolveAmbig method?
Label EOTLabel = new Label(Label.EOT);
boolean containsEOT = labels!=null && labels.contains(EOTLabel);
if ( !dfa.isGreedy() && containsEOT ) {
convertToEOTAcceptState(d);
return; // no more work to do on this accept state
}
// if in filter mode for lexer, want to match shortest not longest
// string so if we see an EOT edge emanating from this state, then
// convert this state to an accept state. This only counts for
// The Tokens rule as all other decisions must continue to look for
// longest match.
// [Taking back out a few days later on Jan 17, 2006. This could
// be an option for the future, but this was wrong soluion for
// filtering.]
/*
if ( dfa.nfa.grammar.type==Grammar.LEXER && containsEOT ) {
String filterOption = (String)dfa.nfa.grammar.getOption("filter");
boolean filterMode = filterOption!=null && filterOption.equals("true");
if ( filterMode && d.dfa.isTokensRuleDecision() ) {
DFAState t = reach(d, EOTLabel);
if ( t.getNFAConfigurations().size()>0 ) {
convertToEOTAcceptState(d);
//System.out.println("state "+d+" has EOT target "+t.stateNumber);
return;
}
}
}
*/
int numberOfEdgesEmanating = 0;
Map targetToLabelMap = new HashMap();
// for each label that could possibly emanate from NFAStates of d
int numLabels = 0;
if ( labels!=null ) {
numLabels = labels.size();
}
for (int i=0; i<numLabels; i++) {
Label label = (Label)labels.get(i);
DFAState t = reach(d, label);
if ( debug ) {
System.out.println("DFA state after reach "+label+" "+d+"-" +
label.toString(dfa.nfa.grammar)+"->"+t);
}
if ( t==null ) {
// nothing was reached by label due to conflict resolution
// EOT also seems to be in here occasionally probably due
// to an end-of-rule state seeing it even though we'll pop
// an invoking state off the state; don't bother to conflict
// as this labels set is a covering approximation only.
continue;
}
//System.out.println("dfa.k="+dfa.getUserMaxLookahead());
if ( t.getUniqueAlt()==NFA.INVALID_ALT_NUMBER ) {
// Only compute closure if a unique alt number is not known.
// If a unique alternative is mentioned among all NFA
// configurations then there is no possibility of needing to look
// beyond this state; also no possibility of a nondeterminism.
// This optimization May 22, 2006 just dropped -Xint time
// for analysis of Java grammar from 11.5s to 2s! Wow.
closure(t); // add any NFA states reachable via epsilon
}
/*
System.out.println("DFA state after closure "+d+"-"+
label.toString(dfa.nfa.grammar)+
"->"+t);
*/
// add if not in DFA yet and then make d-label->t
DFAState targetState = addDFAStateToWorkList(t);
numberOfEdgesEmanating +=
addTransition(d, label, targetState, targetToLabelMap);
// lookahead of target must be one larger than d's k
// We are possibly setting the depth of a pre-existing state
// that is equal to one we just computed...not sure if that's
// ok.
targetState.setLookaheadDepth(d.getLookaheadDepth() + 1);
}
//System.out.println("DFA after reach / closures:\n"+dfa);
if ( !d.isResolvedWithPredicates() && numberOfEdgesEmanating==0 ) {
//System.out.println("dangling DFA state "+d+"\nAfter reach / closures:\n"+dfa);
// TODO: can fixed lookahead hit a dangling state case?
// TODO: yes, with left recursion
//System.err.println("dangling state alts: "+d.getAltSet());
dfa.probe.reportDanglingState(d);
// turn off all configurations except for those associated with
// min alt number; somebody has to win else some input will not
// predict any alt.
int minAlt = resolveByPickingMinAlt(d, null);
// force it to be an accept state
// don't call convertToAcceptState() which merges stop states.
// other states point at us; don't want them pointing to dead states
d.setAcceptState(true); // might be adding new accept state for alt
dfa.setAcceptState(minAlt, d);
//convertToAcceptState(d, minAlt); // force it to be an accept state
}
// Check to see if we need to add any semantic predicate transitions
// might have both token and predicated edges from d
if ( d.isResolvedWithPredicates() ) {
addPredicateTransitions(d);
}
}
/** Add a transition from state d to targetState with label in normal case.
* if COLLAPSE_ALL_INCIDENT_EDGES, however, try to merge all edges from
* d to targetState; this means merging their labels. Another optimization
* is to reduce to a single EOT edge any set of edges from d to targetState
* where there exists an EOT state. EOT is like the wildcard so don't
* bother to test any other edges. Example:
*
* NUM_INT
* : '1'..'9' ('0'..'9')* ('l'|'L')?
* | '0' ('x'|'X') ('0'..'9'|'a'..'f'|'A'..'F')+ ('l'|'L')?
* | '0' ('0'..'7')* ('l'|'L')?
* ;
*
* The normal decision to predict alts 1, 2, 3 is:
*
* if ( (input.LA(1)>='1' && input.LA(1)<='9') ) {
* alt7=1;
* }
* else if ( input.LA(1)=='0' ) {
* if ( input.LA(2)=='X'||input.LA(2)=='x' ) {
* alt7=2;
* }
* else if ( (input.LA(2)>='0' && input.LA(2)<='7') ) {
* alt7=3;
* }
* else if ( input.LA(2)=='L'||input.LA(2)=='l' ) {
* alt7=3;
* }
* else {
* alt7=3;
* }
* }
* else error
*
* Clearly, alt 3 is predicted with extra work since it tests 0..7
* and [lL] before finally realizing that any character is actually
* ok at k=2.
*
* A better decision is as follows:
*
* if ( (input.LA(1)>='1' && input.LA(1)<='9') ) {
* alt7=1;
* }
* else if ( input.LA(1)=='0' ) {
* if ( input.LA(2)=='X'||input.LA(2)=='x' ) {
* alt7=2;
* }
* else {
* alt7=3;
* }
* }
*
* The DFA originally has 3 edges going to the state the predicts alt 3,
* but upon seeing the EOT edge (the "else"-clause), this method
* replaces the old merged label (which would have (0..7|l|L)) with EOT.
* The code generator then leaves alt 3 predicted with a simple else-
* clause. :)
*
* The only time the EOT optimization makes no sense is in the Tokens
* rule. We want EOT to truly mean you have matched an entire token
* so don't bother actually rewinding to execute that rule unless there
* are actions in that rule. For now, since I am not preventing
* backtracking from Tokens rule, I will simply allow the optimization.
*/
protected static int addTransition(DFAState d,
Label label,
DFAState targetState,
Map targetToLabelMap)
{
//System.out.println(d.stateNumber+"-"+label.toString(dfa.nfa.grammar)+"->"+targetState.stateNumber);
int n = 0;
if ( DFAOptimizer.COLLAPSE_ALL_PARALLEL_EDGES ) {
// track which targets we've hit
Integer tI = Utils.integer(targetState.stateNumber);
Transition oldTransition = (Transition)targetToLabelMap.get(tI);
if ( oldTransition!=null ) {
//System.out.println("extra transition to "+tI+" upon "+label.toString(dfa.nfa.grammar));
// already seen state d to target transition, just add label
// to old label unless EOT
if ( label.getAtom()==Label.EOT ) {
// merge with EOT means old edge can go away
oldTransition.label = new Label(Label.EOT);
}
else {
// don't add anything to EOT, it's essentially the wildcard
if ( oldTransition.label.getAtom()!=Label.EOT ) {
// ok, not EOT, add in this label to old label
oldTransition.label.add(label);
}
//System.out.println("label updated to be "+oldTransition.label.toString(dfa.nfa.grammar));
}
}
else {
// make a transition from d to t upon 'a'
n = 1;
label = (Label)label.clone(); // clone in case we alter later
int transitionIndex = d.addTransition(targetState, label);
Transition trans = d.getTransition(transitionIndex);
// track target/transition pairs
targetToLabelMap.put(tI, trans);
}
}
else {
n = 1;
d.addTransition(targetState, label);
}
return n;
}
/** For all NFA states (configurations) merged in d,
* compute the epsilon closure; that is, find all NFA states reachable
* from the NFA states in d via purely epsilon transitions.
*/
public void closure(DFAState d) {
if ( debug ) {
System.out.println("closure("+d+")");
}
List<NFAConfiguration> configs = new ArrayList<NFAConfiguration>();
// Because we are adding to the configurations in closure
// must clone initial list so we know when to stop doing closure
configs.addAll(d.nfaConfigurations);
// for each NFA configuration in d (abort if we detect non-LL(*) state)
int numConfigs = configs.size();
for (int i = 0; i < numConfigs; i++) {
NFAConfiguration c = (NFAConfiguration)configs.get(i);
if ( c.singleAtomTransitionEmanating ) {
continue; // ignore NFA states w/o epsilon transitions
}
//System.out.println("go do reach for NFA state "+c.state);
// figure out reachable NFA states from each of d's nfa states
// via epsilon transitions.
// Fill configsInClosure rather than altering d configs inline
closure(dfa.nfa.getState(c.state),
c.alt,
c.context,
c.semanticContext,
d,
false);
}
//System.out.println("after closure d="+d);
d.closureBusy = null; // wack all that memory used during closure
}
/** Where can we get from NFA state p traversing only epsilon transitions?
* Add new NFA states + context to DFA state d. Also add semantic
* predicates to semantic context if collectPredicates is set. We only
* collect predicates at hoisting depth 0, meaning before any token/char
* have been recognized. This corresponds, during analysis, to the
* initial DFA start state construction closure() invocation.
*
* There are four cases of interest (the last being the usual transition):
*
* 1. Traverse an edge that takes us to the start state of another
* rule, r. We must push this state so that if the DFA
* conversion hits the end of rule r, then it knows to continue
* the conversion at state following state that "invoked" r. By
* construction, there is a single transition emanating from a rule
* ref node.
*
* 2. Reach an NFA state associated with the end of a rule, r, in the
* grammar from which it was built. We must add an implicit (i.e.,
* don't actually add an epsilon transition) epsilon transition
* from r's end state to the NFA state following the NFA state
* that transitioned to rule r's start state. Because there are
* many states that could reach r, the context for a rule invocation
* is part of a call tree not a simple stack. When we fall off end
* of rule, "pop" a state off the call tree and add that state's
* "following" node to d's NFA configuration list. The context
* for this new addition will be the new "stack top" in the call tree.
*
* 3. Like case 2, we reach an NFA state associated with the end of a
* rule, r, in the grammar from which NFA was built. In this case,
* however, we realize that during this NFA->DFA conversion, no state
* invoked the current rule's NFA. There is no choice but to add
* all NFA states that follow references to r's start state. This is
* analogous to computing the FOLLOW(r) in the LL(k) world. By
* construction, even rule stop state has a chain of nodes emanating
* from it that points to every possible following node. This case
* is conveniently handled then by the 4th case.
*
* 4. Normal case. If p can reach another NFA state q, then add
* q to d's configuration list, copying p's context for q's context.
* If there is a semantic predicate on the transition, then AND it
* with any existing semantic context.
*
* Current state p is always added to d's configuration list as it's part
* of the closure as well.
*
* When is a closure operation in a cycle condition? While it is
* very possible to have the same NFA state mentioned twice
* within the same DFA state, there are two situations that
* would lead to nontermination of closure operation:
*
* o Whenever closure reaches a configuration where the same state
* with same or a suffix context already exists. This catches
* the IF-THEN-ELSE tail recursion cycle and things like
*
* a : A a | B ;
*
* the context will be $ (empty stack).
*
* We have to check
* larger context stacks because of (...)+ loops. For
* example, the context of a (...)+ can be nonempty if the
* surrounding rule is invoked by another rule:
*
* a : b A | X ;
* b : (B|)+ ; // nondeterministic by the way
*
* The context of the (B|)+ loop is "invoked from item
* a : . b A ;" and then the empty alt of the loop can reach back
* to itself. The context stack will have one "return
* address" element and so we must check for same state, same
* context for arbitrary context stacks.
*
* Idea: If we've seen this configuration before during closure, stop.
* We also need to avoid reaching same state with conflicting context.
* Ultimately analysis would stop and we'd find the conflict, but we
* should stop the computation. Previously I only checked for
* exact config. Need to check for same state, suffix context
* not just exact context.
*
* o Whenever closure reaches a configuration where state p
* is present in its own context stack. This means that
* p is a rule invocation state and the target rule has
* been called before. NFAContext.MAX_RECURSIVE_INVOCATIONS
* (See the comment there also) determines how many times
* it's possible to recurse; clearly we cannot recurse forever.
* Some grammars such as the following actually require at
* least one recursive call to correctly compute the lookahead:
*
* a : L ID R
* | b
* ;
* b : ID
* | L a R
* ;
*
* Input L ID R is ambiguous but to figure this out, ANTLR
* needs to go a->b->a->b to find the L ID sequence.
*
* Do not allow closure to add a configuration that would
* allow too much recursion.
*
* This case also catches infinite left recursion.
*/
public void closure(NFAState p,
int alt,
NFAContext context,
SemanticContext semanticContext,
DFAState d,
boolean collectPredicates)
{
if ( debug ){
System.out.println("closure at "+p.enclosingRule.name+" state "+p.stateNumber+"|"+
alt+" filling DFA state "+d.stateNumber+" with context "+context
);
}
// if ( DFA.MAX_TIME_PER_DFA_CREATION>0 &&
// System.currentTimeMillis() - d.dfa.conversionStartTime >=
// DFA.MAX_TIME_PER_DFA_CREATION )
// {
// // bail way out; we've blown up somehow
// throw new AnalysisTimeoutException(d.dfa);
// }
NFAConfiguration proposedNFAConfiguration =
new NFAConfiguration(p.stateNumber,
alt,
context,
semanticContext);
// Avoid infinite recursion
if ( closureIsBusy(d, proposedNFAConfiguration) ) {
if ( debug ) {
System.out.println("avoid visiting exact closure computation NFA config: "+
proposedNFAConfiguration+" in "+p.enclosingRule.name);
System.out.println("state is "+d.dfa.decisionNumber+"."+d.stateNumber);
}
return;
}
// set closure to be busy for this NFA configuration
d.closureBusy.add(proposedNFAConfiguration);
// p itself is always in closure
d.addNFAConfiguration(p, proposedNFAConfiguration);
// Case 1: are we a reference to another rule?
Transition transition0 = p.transition[0];
if ( transition0 instanceof RuleClosureTransition ) {
int depth = context.recursionDepthEmanatingFromState(p.stateNumber);
// Detect recursion by more than a single alt, which indicates
// that the decision's lookahead language is potentially non-regular; terminate
if ( depth == 1 && d.dfa.getUserMaxLookahead()==0 ) { // k=* only
d.dfa.recursiveAltSet.add(alt); // indicate that this alt is recursive
if ( d.dfa.recursiveAltSet.size()>1 ) {
//System.out.println("recursive alts: "+d.dfa.recursiveAltSet.toString());
d.abortedDueToMultipleRecursiveAlts = true;
throw new NonLLStarDecisionException(d.dfa);
}
/*
System.out.println("alt "+alt+" in rule "+p.enclosingRule+" dec "+d.dfa.decisionNumber+
" ctx: "+context);
System.out.println("d="+d);
*/
}
// Detect an attempt to recurse too high
// if this context has hit the max recursions for p.stateNumber,
// don't allow it to enter p.stateNumber again
if ( depth >= NFAContext.MAX_SAME_RULE_INVOCATIONS_PER_NFA_CONFIG_STACK ) {
/*
System.out.println("OVF state "+d);
System.out.println("proposed "+proposedNFAConfiguration);
*/
d.abortedDueToRecursionOverflow = true;
d.dfa.probe.reportRecursionOverflow(d, proposedNFAConfiguration);
if ( debug ) {
System.out.println("analysis overflow in closure("+d.stateNumber+")");
}
return;
}
// otherwise, it's cool to (re)enter target of this rule ref
RuleClosureTransition ref = (RuleClosureTransition)transition0;
// first create a new context and push onto call tree,
// recording the fact that we are invoking a rule and
// from which state (case 2 below will get the following state
// via the RuleClosureTransition emanating from the invoking state
// pushed on the stack).
// Reset the context to reflect the fact we invoked rule
NFAContext newContext = new NFAContext(context, p);
//System.out.println("invoking rule "+ref.rule.name);
// System.out.println(" context="+context);
// traverse epsilon edge to new rule
NFAState ruleTarget = (NFAState)ref.target;
closure(ruleTarget, alt, newContext, semanticContext, d, collectPredicates);
}
// Case 2: end of rule state, context (i.e., an invoker) exists
else if ( p.isAcceptState() && context.parent!=null ) {
NFAState whichStateInvokedRule = context.invokingState;
RuleClosureTransition edgeToRule =
(RuleClosureTransition)whichStateInvokedRule.transition[0];
NFAState continueState = edgeToRule.followState;
NFAContext newContext = context.parent; // "pop" invoking state
closure(continueState, alt, newContext, semanticContext, d, collectPredicates);
}
// Case 3: end of rule state, nobody invoked this rule (no context)
// Fall thru to be handled by case 4 automagically.
// Case 4: ordinary NFA->DFA conversion case: simple epsilon transition
else {
// recurse down any epsilon transitions
if ( transition0!=null && transition0.isEpsilon() ) {
boolean collectPredicatesAfterAction = collectPredicates;
if ( transition0.isAction() && collectPredicates ) {
collectPredicatesAfterAction = false;
/*
if ( computingStartState ) {
System.out.println("found action during prediction closure "+((ActionLabel)transition0.label).actionAST.token);
}
*/
}
closure((NFAState)transition0.target,
alt,
context,
semanticContext,
d,
collectPredicatesAfterAction
);
}
else if ( transition0!=null && transition0.isSemanticPredicate() ) {
SemanticContext labelContext = transition0.label.getSemanticContext();
if ( computingStartState ) {
if ( collectPredicates ) {
// only indicate we can see a predicate if we're collecting preds
// Could be computing start state & seen an action before this.
dfa.predicateVisible = true;
}
else {
// this state has a pred, but we can't see it.
dfa.hasPredicateBlockedByAction = true;
// System.out.println("found pred during prediction but blocked by action found previously");
}
}
// continue closure here too, but add the sem pred to ctx
SemanticContext newSemanticContext = semanticContext;
if ( collectPredicates ) {
// AND the previous semantic context with new pred
// do not hoist syn preds from other rules; only get if in
// starting state's rule (i.e., context is empty)
int walkAlt =
dfa.decisionNFAStartState.translateDisplayAltToWalkAlt(alt);
NFAState altLeftEdge =
dfa.nfa.grammar.getNFAStateForAltOfDecision(dfa.decisionNFAStartState,walkAlt);
/*
System.out.println("state "+p.stateNumber+" alt "+alt+" walkAlt "+walkAlt+" trans to "+transition0.target);
System.out.println("DFA start state "+dfa.decisionNFAStartState.stateNumber);
System.out.println("alt left edge "+altLeftEdge.stateNumber+
", epsilon target "+
altLeftEdge.transition(0).target.stateNumber);
*/
if ( !labelContext.isSyntacticPredicate() ||
p==altLeftEdge.transition[0].target )
{
//System.out.println("&"+labelContext+" enclosingRule="+p.enclosingRule);
newSemanticContext =
SemanticContext.and(semanticContext, labelContext);
}
}
closure((NFAState)transition0.target,
alt,
context,
newSemanticContext,
d,
collectPredicates);
}
Transition transition1 = p.transition[1];
if ( transition1!=null && transition1.isEpsilon() ) {
closure((NFAState)transition1.target,
alt,
context,
semanticContext,
d,
collectPredicates);
}
}
// don't remove "busy" flag as we want to prevent all
// references to same config of state|alt|ctx|semCtx even
// if resulting from another NFA state
}
/** A closure operation should abort if that computation has already
* been done or a computation with a conflicting context has already
* been done. If proposed NFA config's state and alt are the same
* there is potentially a problem. If the stack context is identical
* then clearly the exact same computation is proposed. If a context
* is a suffix of the other, then again the computation is in an
* identical context. ?$ and ??$ are considered the same stack.
* We could walk configurations linearly doing the comparison instead
* of a set for exact matches but it's much slower because you can't
* do a Set lookup. I use exact match as ANTLR
* always detect the conflict later when checking for context suffixes...
* I check for left-recursive stuff and terminate before analysis to
* avoid need to do this more expensive computation.
*
* 12-31-2007: I had to use the loop again rather than simple
* closureBusy.contains(proposedNFAConfiguration) lookup. The
* semantic context should not be considered when determining if
* a closure operation is busy. I saw a FOLLOW closure operation
* spin until time out because the predicate context kept increasing
* in size even though it's same boolean value. This seems faster also
* because I'm not doing String.equals on the preds all the time.
*
* 05-05-2008: Hmm...well, i think it was a mistake to remove the sem
* ctx check below...adding back in. Coincides with report of ANTLR
* getting super slow: http://www.antlr.org:8888/browse/ANTLR-235
* This could be because it doesn't properly compute then resolve
* a predicate expression. Seems to fix unit test:
* TestSemanticPredicates.testSemanticContextPreventsEarlyTerminationOfClosure()
* Changing back to Set from List. Changed a large grammar from 8 minutes
* to 11 seconds. Cool. Closing ANTLR-235.
*/
public static boolean closureIsBusy(DFAState d,
NFAConfiguration proposedNFAConfiguration)
{
return d.closureBusy.contains(proposedNFAConfiguration);
/*
int numConfigs = d.closureBusy.size();
// Check epsilon cycle (same state, same alt, same context)
for (int i = 0; i < numConfigs; i++) {
NFAConfiguration c = (NFAConfiguration) d.closureBusy.get(i);
if ( proposedNFAConfiguration.state==c.state &&
proposedNFAConfiguration.alt==c.alt &&
proposedNFAConfiguration.semanticContext.equals(c.semanticContext) &&
proposedNFAConfiguration.context.suffix(c.context) )
{
return true;
}
}
return false;
*/
}
/** Given the set of NFA states in DFA state d, find all NFA states
* reachable traversing label arcs. By definition, there can be
* only one DFA state reachable by an atom from DFA state d so we must
* find and merge all NFA states reachable via label. Return a new
* DFAState that has all of those NFA states with their context (i.e.,
* which alt do they predict and where to return to if they fall off
* end of a rule).
*
* Because we cannot jump to another rule nor fall off the end of a rule
* via a non-epsilon transition, NFA states reachable from d have the
* same configuration as the NFA state in d. So if NFA state 7 in d's
* configurations can reach NFA state 13 then 13 will be added to the
* new DFAState (labelDFATarget) with the same configuration as state
* 7 had.
*
* This method does not see EOT transitions off the end of token rule
* accept states if the rule was invoked by somebody.
*/
public DFAState reach(DFAState d, Label label) {
//System.out.println("reach "+label.toString(dfa.nfa.grammar)+" from "+d.stateNumber);
DFAState labelDFATarget = dfa.newState();
// for each NFA state in d with a labeled edge,
// add in target states for label
//System.out.println("size(d.state="+d.stateNumber+")="+d.nfaConfigurations.size());
//System.out.println("size(labeled edge states)="+d.configurationsWithLabeledEdges.size());
List<NFAConfiguration> configs = d.configurationsWithLabeledEdges;
int numConfigs = configs.size();
for (int i = 0; i < numConfigs; i++) {
NFAConfiguration c = configs.get(i);
if ( c.resolved || c.resolveWithPredicate ) {
continue; // the conflict resolver indicates we must leave alone
}
NFAState p = dfa.nfa.getState(c.state);
// by design of the grammar->NFA conversion, only transition 0
// may have a non-epsilon edge.
Transition edge = p.transition[0];
if ( edge==null || !c.singleAtomTransitionEmanating ) {
continue;
}
Label edgeLabel = edge.label;
// SPECIAL CASE
// if it's an EOT transition on end of lexer rule, but context
// stack is not empty, then don't see the EOT; the closure
// will have added in the proper states following the reference
// to this rule in the invoking rule. In other words, if
// somebody called this rule, don't see the EOT emanating from
// this accept state.
if ( c.context.parent!=null && edgeLabel.label==Label.EOT ) {
continue;
}
// Labels not unique at this point (not until addReachableLabels)
// so try simple int label match before general set intersection
//System.out.println("comparing "+edgeLabel+" with "+label);
if ( Label.intersect(label, edgeLabel) ) {
// found a transition with label;
// add NFA target to (potentially) new DFA state
NFAConfiguration newC = labelDFATarget.addNFAConfiguration(
(NFAState)edge.target,
c.alt,
c.context,
c.semanticContext);
}
}
if ( labelDFATarget.nfaConfigurations.size()==0 ) {
// kill; it's empty
dfa.setState(labelDFATarget.stateNumber, null);
labelDFATarget = null;
}
return labelDFATarget;
}
/** Walk the configurations of this DFA state d looking for the
* configuration, c, that has a transition on EOT. State d should
* be converted to an accept state predicting the c.alt. Blast
* d's current configuration set and make it just have config c.
*
* TODO: can there be more than one config with EOT transition?
* That would mean that two NFA configurations could reach the
* end of the token with possibly different predicted alts.
* Seems like that would be rare or impossible. Perhaps convert
* this routine to find all such configs and give error if >1.
*/
protected void convertToEOTAcceptState(DFAState d) {
Label eot = new Label(Label.EOT);
int numConfigs = d.nfaConfigurations.size();
for (int i = 0; i < numConfigs; i++) {
NFAConfiguration c = (NFAConfiguration)d.nfaConfigurations.get(i);
if ( c.resolved || c.resolveWithPredicate ) {
continue; // the conflict resolver indicates we must leave alone
}
NFAState p = dfa.nfa.getState(c.state);
Transition edge = p.transition[0];
Label edgeLabel = edge.label;
if ( edgeLabel.equals(eot) ) {
//System.out.println("config with EOT: "+c);
d.setAcceptState(true);
//System.out.println("d goes from "+d);
d.nfaConfigurations.clear();
d.addNFAConfiguration(p,c.alt,c.context,c.semanticContext);
//System.out.println("to "+d);
return; // assume only one EOT transition
}
}
}
/** Add a new DFA state to the DFA if not already present.
* If the DFA state uniquely predicts a single alternative, it
* becomes a stop state; don't add to work list. Further, if
* there exists an NFA state predicted by > 1 different alternatives
* and with the same syn and sem context, the DFA is nondeterministic for
* at least one input sequence reaching that NFA state.
*/
protected DFAState addDFAStateToWorkList(DFAState d) {
DFAState existingState = dfa.addState(d);
if ( d != existingState ) {
// already there...use/return the existing DFA state.
// But also set the states[d.stateNumber] to the existing
// DFA state because the closureIsBusy must report
// infinite recursion on a state before it knows
// whether or not the state will already be
// found after closure on it finishes. It could be
// referring to a state that will ultimately not make it
// into the reachable state space and the error
// reporting must be able to compute the path from
// start to the error state with infinite recursion
dfa.setState(d.stateNumber, existingState);
return existingState;
}
// if not there, then examine new state.
// resolve syntactic conflicts by choosing a single alt or
// by using semantic predicates if present.
resolveNonDeterminisms(d);
// If deterministic, don't add this state; it's an accept state
// Just return as a valid DFA state
int alt = d.getUniquelyPredictedAlt();
if ( alt!=NFA.INVALID_ALT_NUMBER ) { // uniquely predicts an alt?
d = convertToAcceptState(d, alt);
/*
System.out.println("convert to accept; DFA "+d.dfa.decisionNumber+" state "+d.stateNumber+" uniquely predicts alt "+
d.getUniquelyPredictedAlt());
*/
}
else {
// unresolved, add to work list to continue NFA conversion
work.add(d);
}
return d;
}
protected DFAState convertToAcceptState(DFAState d, int alt) {
// only merge stop states if they are deterministic and no
// recursion problems and only if they have the same gated pred
// context!
// Later, the error reporting may want to trace the path from
// the start state to the nondet state
if ( DFAOptimizer.MERGE_STOP_STATES &&
d.getNonDeterministicAlts()==null &&
!d.abortedDueToRecursionOverflow &&
!d.abortedDueToMultipleRecursiveAlts )
{
// check to see if we already have an accept state for this alt
// [must do this after we resolve nondeterminisms in general]
DFAState acceptStateForAlt = dfa.getAcceptState(alt);
if ( acceptStateForAlt!=null ) {
// we already have an accept state for alt;
// Are their gate sem pred contexts the same?
// For now we assume a braindead version: both must not
// have gated preds or share exactly same single gated pred.
// The equals() method is only defined on Predicate contexts not
// OR etc...
SemanticContext gatedPreds = d.getGatedPredicatesInNFAConfigurations();
SemanticContext existingStateGatedPreds =
acceptStateForAlt.getGatedPredicatesInNFAConfigurations();
if ( (gatedPreds==null && existingStateGatedPreds==null) ||
((gatedPreds!=null && existingStateGatedPreds!=null) &&
gatedPreds.equals(existingStateGatedPreds)) )
{
// make this d.statenumber point at old DFA state
dfa.setState(d.stateNumber, acceptStateForAlt);
dfa.removeState(d); // remove this state from unique DFA state set
d = acceptStateForAlt; // use old accept state; throw this one out
return d;
}
// else consider it a new accept state; fall through.
}
}
d.setAcceptState(true); // new accept state for alt
dfa.setAcceptState(alt, d);
return d;
}
/** If > 1 NFA configurations within this DFA state have identical
* NFA state and context, but differ in their predicted
* TODO update for new context suffix stuff 3-9-2005
* alternative then a single input sequence predicts multiple alts.
* The NFA decision is therefore syntactically indistinguishable
* from the left edge upon at least one input sequence. We may
* terminate the NFA to DFA conversion for these paths since no
* paths emanating from those NFA states can possibly separate
* these conjoined twins once interwined to make things
* deterministic (unless there are semantic predicates; see below).
*
* Upon a nondeterministic set of NFA configurations, we should
* report a problem to the grammar designer and resolve the issue
* by aribitrarily picking the first alternative (this usually
* ends up producing the most natural behavior). Pick the lowest
* alt number and just turn off all NFA configurations
* associated with the other alts. Rather than remove conflicting
* NFA configurations, I set the "resolved" bit so that future
* computations will ignore them. In this way, we maintain the
* complete DFA state with all its configurations, but prevent
* future DFA conversion operations from pursuing undesirable
* paths. Remember that we want to terminate DFA conversion as
* soon as we know the decision is deterministic *or*
* nondeterministic.
*
* [BTW, I have convinced myself that there can be at most one
* set of nondeterministic configurations in a DFA state. Only NFA
* configurations arising from the same input sequence can appear
* in a DFA state. There is no way to have another complete set
* of nondeterministic NFA configurations without another input
* sequence, which would reach a different DFA state. Therefore,
* the two nondeterministic NFA configuration sets cannot collide
* in the same DFA state.]
*
* Consider DFA state {(s|1),(s|2),(s|3),(t|3),(v|4)} where (s|a)
* is state 's' and alternative 'a'. Here, configuration set
* {(s|1),(s|2),(s|3)} predicts 3 different alts. Configurations
* (s|2) and (s|3) are "resolved", leaving {(s|1),(t|3),(v|4)} as
* items that must still be considered by the DFA conversion
* algorithm in DFA.findNewDFAStatesAndAddDFATransitions().
*
* Consider the following grammar where alts 1 and 2 are no
* problem because of the 2nd lookahead symbol. Alts 3 and 4 are
* identical and will therefore reach the rule end NFA state but
* predicting 2 different alts (no amount of future lookahead
* will render them deterministic/separable):
*
* a : A B
* | A C
* | A
* | A
* ;
*
* Here is a (slightly reduced) NFA of this grammar:
*
* (1)-A->(2)-B->(end)-EOF->(8)
* | ^
* (2)-A->(3)-C----|
* | ^
* (4)-A->(5)------|
* | ^
* (6)-A->(7)------|
*
* where (n) is NFA state n. To begin DFA conversion, the start
* state is created:
*
* {(1|1),(2|2),(4|3),(6|4)}
*
* Upon A, all NFA configurations lead to new NFA states yielding
* new DFA state:
*
* {(2|1),(3|2),(5|3),(7|4),(end|3),(end|4)}
*
* where the configurations with state end in them are added
* during the epsilon closure operation. State end predicts both
* alts 3 and 4. An error is reported, the latter configuration is
* flagged as resolved leaving the DFA state as:
*
* {(2|1),(3|2),(5|3),(7|4|resolved),(end|3),(end|4|resolved)}
*
* As NFA configurations are added to a DFA state during its
* construction, the reachable set of labels is computed. Here
* reachable is {B,C,EOF} because there is at least one NFA state
* in the DFA state that can transition upon those symbols.
*
* The final DFA looks like:
*
* {(1|1),(2|2),(4|3),(6|4)}
* |
* v
* {(2|1),(3|2),(5|3),(7|4),(end|3),(end|4)} -B-> (end|1)
* | |
* C ----EOF-> (8,3)
* |
* v
* (end|2)
*
* Upon AB, alt 1 is predicted. Upon AC, alt 2 is predicted.
* Upon A EOF, alt 3 is predicted. Alt 4 is not a viable
* alternative.
*
* The algorithm is essentially to walk all the configurations
* looking for a conflict of the form (s|i) and (s|j) for i!=j.
* Use a hash table to track state+context pairs for collisions
* so that we have O(n) to walk the n configurations looking for
* a conflict. Upon every conflict, track the alt number so
* we have a list of all nondeterministically predicted alts. Also
* track the minimum alt. Next go back over the configurations, setting
* the "resolved" bit for any that have an alt that is a member of
* the nondeterministic set. This will effectively remove any alts
* but the one we want from future consideration.
*
* See resolveWithSemanticPredicates()
*
* AMBIGUOUS TOKENS
*
* With keywords and ID tokens, there is an inherit ambiguity in that
* "int" can be matched by ID also. Each lexer rule has an EOT
* transition emanating from it which is used whenever the end of
* a rule is reached and another token rule did not invoke it. EOT
* is the only thing that can be seen next. If two rules are identical
* like "int" and "int" then the 2nd def is unreachable and you'll get
* a warning. We prevent a warning though for the keyword/ID issue as
* ID is still reachable. This can be a bit weird. '+' rule then a
* '+'|'+=' rule will fail to match '+' for the 2nd rule.
*
* If all NFA states in this DFA state are targets of EOT transitions,
* (and there is more than one state plus no unique alt is predicted)
* then DFA conversion will leave this state as a dead state as nothing
* can be reached from this state. To resolve the ambiguity, just do
* what flex and friends do: pick the first rule (alt in this case) to
* win. This means you should put keywords before the ID rule.
* If the DFA state has only one NFA state then there is no issue:
* it uniquely predicts one alt. :) Problem
* states will look like this during conversion:
*
* DFA 1:{9|1, 19|2, 14|3, 20|2, 23|2, 24|2, ...}-<EOT>->5:{41|3, 42|2}
*
* Worse, when you have two identical literal rules, you will see 3 alts
* in the EOT state (one for ID and one each for the identical rules).
*/
public void resolveNonDeterminisms(DFAState d) {
if ( debug ) {
System.out.println("resolveNonDeterminisms "+d.toString());
}
boolean conflictingLexerRules = false;
Set nondeterministicAlts = d.getNonDeterministicAlts();
if ( debug && nondeterministicAlts!=null ) {
System.out.println("nondet alts="+nondeterministicAlts);
}
// CHECK FOR AMBIGUOUS EOT (if |allAlts|>1 and EOT state, resolve)
// grab any config to see if EOT state; any other configs must
// transition on EOT to get to this DFA state as well so all
// states in d must be targets of EOT. These are the end states
// created in NFAFactory.build_EOFState
NFAConfiguration anyConfig = d.nfaConfigurations.get(0);
NFAState anyState = dfa.nfa.getState(anyConfig.state);
// if d is target of EOT and more than one predicted alt
// indicate that d is nondeterministic on all alts otherwise
// it looks like state has no problem
if ( anyState.isEOTTargetState() ) {
Set allAlts = d.getAltSet();
// is more than 1 alt predicted?
if ( allAlts!=null && allAlts.size()>1 ) {
nondeterministicAlts = allAlts;
// track Tokens rule issues differently than other decisions
if ( d.dfa.isTokensRuleDecision() ) {
dfa.probe.reportLexerRuleNondeterminism(d,allAlts);
//System.out.println("Tokens rule DFA state "+d+" nondeterministic");
conflictingLexerRules = true;
}
}
}
// if no problems return unless we aborted work on d to avoid inf recursion
if ( !d.abortedDueToRecursionOverflow && nondeterministicAlts==null ) {
return; // no problems, return
}
// if we're not a conflicting lexer rule and we didn't abort, report ambig
// We should get a report for abort so don't give another
if ( !d.abortedDueToRecursionOverflow && !conflictingLexerRules ) {
// TODO: with k=x option set, this is called twice for same state
dfa.probe.reportNondeterminism(d, nondeterministicAlts);
// TODO: how to turn off when it's only the FOLLOW that is
// conflicting. This used to shut off even alts i,j < n
// conflict warnings. :(
}
// ATTEMPT TO RESOLVE WITH SEMANTIC PREDICATES
boolean resolved =
tryToResolveWithSemanticPredicates(d, nondeterministicAlts);
if ( resolved ) {
if ( debug ) {
System.out.println("resolved DFA state "+d.stateNumber+" with pred");
}
d.resolvedWithPredicates = true;
dfa.probe.reportNondeterminismResolvedWithSemanticPredicate(d);
return;
}
// RESOLVE SYNTACTIC CONFLICT BY REMOVING ALL BUT ONE ALT
resolveByChoosingFirstAlt(d, nondeterministicAlts);
//System.out.println("state "+d.stateNumber+" resolved to alt "+winningAlt);
}
protected int resolveByChoosingFirstAlt(DFAState d, Set nondeterministicAlts) {
int winningAlt = 0;
if ( dfa.isGreedy() ) {
winningAlt = resolveByPickingMinAlt(d,nondeterministicAlts);
}
else {
// If nongreedy, the exit alt shout win, but only if it's
// involved in the nondeterminism!
/*
System.out.println("resolving exit alt for decision="+
dfa.decisionNumber+" state="+d);
System.out.println("nondet="+nondeterministicAlts);
System.out.println("exit alt "+exitAlt);
*/
int exitAlt = dfa.getNumberOfAlts();
if ( nondeterministicAlts.contains(Utils.integer(exitAlt)) ) {
// if nongreedy and exit alt is one of those nondeterministic alts
// predicted, resolve in favor of what follows block
winningAlt = resolveByPickingExitAlt(d,nondeterministicAlts);
}
else {
winningAlt = resolveByPickingMinAlt(d,nondeterministicAlts);
}
}
return winningAlt;
}
/** Turn off all configurations associated with the
* set of incoming nondeterministic alts except the min alt number.
* There may be many alts among the configurations but only turn off
* the ones with problems (other than the min alt of course).
*
* If nondeterministicAlts is null then turn off all configs 'cept those
* associated with the minimum alt.
*
* Return the min alt found.
*/
protected int resolveByPickingMinAlt(DFAState d, Set nondeterministicAlts) {
int min = Integer.MAX_VALUE;
if ( nondeterministicAlts!=null ) {
min = getMinAlt(nondeterministicAlts);
}
else {
min = d.minAltInConfigurations;
}
turnOffOtherAlts(d, min, nondeterministicAlts);
return min;
}
/** Resolve state d by choosing exit alt, which is same value as the
* number of alternatives. Return that exit alt.
*/
protected int resolveByPickingExitAlt(DFAState d, Set nondeterministicAlts) {
int exitAlt = dfa.getNumberOfAlts();
turnOffOtherAlts(d, exitAlt, nondeterministicAlts);
return exitAlt;
}
/** turn off all states associated with alts other than the good one
* (as long as they are one of the nondeterministic ones)
*/
protected static void turnOffOtherAlts(DFAState d, int min, Set<Integer> nondeterministicAlts) {
int numConfigs = d.nfaConfigurations.size();
for (int i = 0; i < numConfigs; i++) {
NFAConfiguration configuration = (NFAConfiguration)d.nfaConfigurations.get(i);
if ( configuration.alt!=min ) {
if ( nondeterministicAlts==null ||
nondeterministicAlts.contains(Utils.integer(configuration.alt)) )
{
configuration.resolved = true;
}
}
}
}
protected static int getMinAlt(Set<Integer> nondeterministicAlts) {
int min = Integer.MAX_VALUE;
for (Integer altI : nondeterministicAlts) {
int alt = altI.intValue();
if ( alt < min ) {
min = alt;
}
}
return min;
}
/** See if a set of nondeterministic alternatives can be disambiguated
* with the semantic predicate contexts of the alternatives.
*
* Without semantic predicates, syntactic conflicts are resolved
* by simply choosing the first viable alternative. In the
* presence of semantic predicates, you can resolve the issue by
* evaluating boolean expressions at run time. During analysis,
* this amounts to suppressing grammar error messages to the
* developer. NFA configurations are always marked as "to be
* resolved with predicates" so that
* DFA.findNewDFAStatesAndAddDFATransitions() will know to ignore
* these configurations and add predicate transitions to the DFA
* after adding token/char labels.
*
* During analysis, we can simply make sure that for n
* ambiguously predicted alternatives there are at least n-1
* unique predicate sets. The nth alternative can be predicted
* with "not" the "or" of all other predicates. NFA configurations without
* predicates are assumed to have the default predicate of
* "true" from a user point of view. When true is combined via || with
* another predicate, the predicate is a tautology and must be removed
* from consideration for disambiguation:
*
* a : b | B ; // hoisting p1||true out of rule b, yields no predicate
* b : {p1}? B | B ;
*
* This is done down in getPredicatesPerNonDeterministicAlt().
*/
protected boolean tryToResolveWithSemanticPredicates(DFAState d,
Set nondeterministicAlts)
{
Map<Integer, SemanticContext> altToPredMap =
getPredicatesPerNonDeterministicAlt(d, nondeterministicAlts);
if ( altToPredMap.size()==0 ) {
return false;
}
//System.out.println("nondeterministic alts with predicates: "+altToPredMap);
dfa.probe.reportAltPredicateContext(d, altToPredMap);
if ( nondeterministicAlts.size()-altToPredMap.size()>1 ) {
// too few predicates to resolve; just return
return false;
}
// Handle case where 1 predicate is missing
// Case 1. Semantic predicates
// If the missing pred is on nth alt, !(union of other preds)==true
// so we can avoid that computation. If naked alt is ith, then must
// test it with !(union) since semantic predicated alts are order
// independent
// Case 2: Syntactic predicates
// The naked alt is always assumed to be true as the order of
// alts is the order of precedence. The naked alt will be a tautology
// anyway as it's !(union of other preds). This implies
// that there is no such thing as noviable alt for synpred edges
// emanating from a DFA state.
if ( altToPredMap.size()==nondeterministicAlts.size()-1 ) {
// if there are n-1 predicates for n nondeterministic alts, can fix
org.antlr.misc.BitSet ndSet = org.antlr.misc.BitSet.of(nondeterministicAlts);
org.antlr.misc.BitSet predSet = org.antlr.misc.BitSet.of(altToPredMap);
int nakedAlt = ndSet.subtract(predSet).getSingleElement();
SemanticContext nakedAltPred = null;
if ( nakedAlt == max(nondeterministicAlts) ) {
// the naked alt is the last nondet alt and will be the default clause
nakedAltPred = new SemanticContext.TruePredicate();
}
else {
// pretend naked alternative is covered with !(union other preds)
// unless one of preds from other alts is a manually specified synpred
// since those have precedence same as alt order. Missing synpred
// is true so that alt wins (or is at least attempted).
// Note: can't miss any preds on alts (can't be here) if auto backtrack
// since it prefixes all.
// In LL(*) paper, i'll just have algorithm emit warning about uncovered
// pred
SemanticContext unionOfPredicatesFromAllAlts =
getUnionOfPredicates(altToPredMap);
//System.out.println("all predicates "+unionOfPredicatesFromAllAlts);
if ( unionOfPredicatesFromAllAlts.isSyntacticPredicate() ) {
nakedAltPred = new SemanticContext.TruePredicate();
}
else {
nakedAltPred =
SemanticContext.not(unionOfPredicatesFromAllAlts);
}
}
//System.out.println("covering naked alt="+nakedAlt+" with "+nakedAltPred);
altToPredMap.put(Utils.integer(nakedAlt), nakedAltPred);
// set all config with alt=nakedAlt to have the computed predicate
int numConfigs = d.nfaConfigurations.size();
for (int i = 0; i < numConfigs; i++) { // TODO: I don't think we need to do this; altToPredMap has it
//7/27/10 theok, I removed it and it still seems to work with everything; leave in anyway just in case
NFAConfiguration configuration = (NFAConfiguration)d.nfaConfigurations.get(i);
if ( configuration.alt == nakedAlt ) {
configuration.semanticContext = nakedAltPred;
}
}
}
if ( altToPredMap.size()==nondeterministicAlts.size() ) {
// RESOLVE CONFLICT by picking one NFA configuration for each alt
// and setting its resolvedWithPredicate flag
// First, prevent a recursion warning on this state due to
// pred resolution
if ( d.abortedDueToRecursionOverflow ) {
d.dfa.probe.removeRecursiveOverflowState(d);
}
int numConfigs = d.nfaConfigurations.size();
//System.out.println("pred map="+altToPredMap);
for (int i = 0; i < numConfigs; i++) {
NFAConfiguration configuration = (NFAConfiguration)d.nfaConfigurations.get(i);
SemanticContext semCtx = (SemanticContext)
altToPredMap.get(Utils.integer(configuration.alt));
if ( semCtx!=null ) {
// resolve (first found) with pred
// and remove alt from problem list
//System.out.println("c="+configuration);
configuration.resolveWithPredicate = true;
// altToPredMap has preds from all alts; store into "annointed" config
configuration.semanticContext = semCtx; // reset to combined
altToPredMap.remove(Utils.integer(configuration.alt));
// notify grammar that we've used the preds contained in semCtx
if ( semCtx.isSyntacticPredicate() ) {
dfa.nfa.grammar.synPredUsedInDFA(dfa, semCtx);
}
}
else if ( nondeterministicAlts.contains(Utils.integer(configuration.alt)) ) {
// resolve all configurations for nondeterministic alts
// for which there is no predicate context by turning it off
configuration.resolved = true;
}
}
return true;
}
return false; // couldn't fix the problem with predicates
}
/** Return a mapping from nondeterministc alt to combined list of predicates.
* If both (s|i|semCtx1) and (t|i|semCtx2) exist, then the proper predicate
* for alt i is semCtx1||semCtx2 because you have arrived at this single
* DFA state via two NFA paths, both of which have semantic predicates.
* We ignore deterministic alts because syntax alone is sufficient
* to predict those. Do not include their predicates.
*
* Alts with no predicate are assumed to have {true}? pred.
*
* When combining via || with "true", all predicates are removed from
* consideration since the expression will always be true and hence
* not tell us how to resolve anything. So, if any NFA configuration
* in this DFA state does not have a semantic context, the alt cannot
* be resolved with a predicate.
*
* If nonnull, incidentEdgeLabel tells us what NFA transition label
* we did a reach on to compute state d. d may have insufficient
* preds, so we really want this for the error message.
*/
protected Map<Integer, SemanticContext> getPredicatesPerNonDeterministicAlt(DFAState d,
Set nondeterministicAlts)
{
// map alt to combined SemanticContext
Map<Integer, SemanticContext> altToPredicateContextMap =
new HashMap<Integer, SemanticContext>();
// init the alt to predicate set map
Map<Integer, OrderedHashSet<SemanticContext>> altToSetOfContextsMap =
new HashMap<Integer, OrderedHashSet<SemanticContext>>();
for (Iterator it = nondeterministicAlts.iterator(); it.hasNext();) {
Integer altI = (Integer) it.next();
altToSetOfContextsMap.put(altI, new OrderedHashSet<SemanticContext>());
}
/*
List<Label> sampleInputLabels = d.dfa.probe.getSampleNonDeterministicInputSequence(d);
String input = d.dfa.probe.getInputSequenceDisplay(sampleInputLabels);
System.out.println("sample input: "+input);
*/
// for each configuration, create a unique set of predicates
// Also, track the alts with at least one uncovered configuration
// (one w/o a predicate); tracks tautologies like p1||true
Map<Integer, Set<Token>> altToLocationsReachableWithoutPredicate = new HashMap<Integer, Set<Token>>();
Set<Integer> nondetAltsWithUncoveredConfiguration = new HashSet<Integer>();
//System.out.println("configs="+d.nfaConfigurations);
//System.out.println("configs with preds?"+d.atLeastOneConfigurationHasAPredicate);
//System.out.println("configs with preds="+d.configurationsWithPredicateEdges);
int numConfigs = d.nfaConfigurations.size();
for (int i = 0; i < numConfigs; i++) {
NFAConfiguration configuration = (NFAConfiguration)d.nfaConfigurations.get(i);
Integer altI = Utils.integer(configuration.alt);
// if alt is nondeterministic, combine its predicates
if ( nondeterministicAlts.contains(altI) ) {
// if there is a predicate for this NFA configuration, OR in
if ( configuration.semanticContext !=
SemanticContext.EMPTY_SEMANTIC_CONTEXT )
{
Set<SemanticContext> predSet = altToSetOfContextsMap.get(altI);
predSet.add(configuration.semanticContext);
}
else {
// if no predicate, but it's part of nondeterministic alt
// then at least one path exists not covered by a predicate.
// must remove predicate for this alt; track incomplete alts
nondetAltsWithUncoveredConfiguration.add(altI);
/*
NFAState s = dfa.nfa.getState(configuration.state);
System.out.println("###\ndec "+dfa.decisionNumber+" alt "+configuration.alt+
" enclosing rule for nfa state not covered "+
s.enclosingRule);
if ( s.associatedASTNode!=null ) {
System.out.println("token="+s.associatedASTNode.token);
}
System.out.println("nfa state="+s);
if ( s.incidentEdgeLabel!=null && Label.intersect(incidentEdgeLabel, s.incidentEdgeLabel) ) {
Set<Token> locations = altToLocationsReachableWithoutPredicate.get(altI);
if ( locations==null ) {
locations = new HashSet<Token>();
altToLocationsReachableWithoutPredicate.put(altI, locations);
}
locations.add(s.associatedASTNode.token);
}
*/
}
}
}
// For each alt, OR together all unique predicates associated with
// all configurations
// Also, track the list of incompletely covered alts: those alts
// with at least 1 predicate and at least one configuration w/o a
// predicate. We want this in order to report to the decision probe.
List<Integer> incompletelyCoveredAlts = new ArrayList<Integer>();
for (Iterator it = nondeterministicAlts.iterator(); it.hasNext();) {
Integer altI = (Integer) it.next();
Set<SemanticContext> contextsForThisAlt = altToSetOfContextsMap.get(altI);
if ( nondetAltsWithUncoveredConfiguration.contains(altI) ) { // >= 1 config has no ctx
if ( contextsForThisAlt.size()>0 ) { // && at least one pred
incompletelyCoveredAlts.add(altI); // this alt incompleted covered
}
continue; // don't include at least 1 config has no ctx
}
SemanticContext combinedContext = null;
for (Iterator itrSet = contextsForThisAlt.iterator(); itrSet.hasNext();) {
SemanticContext ctx = (SemanticContext) itrSet.next();
combinedContext =
SemanticContext.or(combinedContext,ctx);
}
altToPredicateContextMap.put(altI, combinedContext);
}
if ( incompletelyCoveredAlts.size()>0 ) {
/*
System.out.println("prob in dec "+dfa.decisionNumber+" state="+d);
FASerializer serializer = new FASerializer(dfa.nfa.grammar);
String result = serializer.serialize(dfa.startState);
System.out.println("dfa: "+result);
System.out.println("incomplete alts: "+incompletelyCoveredAlts);
System.out.println("nondet="+nondeterministicAlts);
System.out.println("nondetAltsWithUncoveredConfiguration="+ nondetAltsWithUncoveredConfiguration);
System.out.println("altToCtxMap="+altToSetOfContextsMap);
System.out.println("altToPredicateContextMap="+altToPredicateContextMap);
*/
for (int i = 0; i < numConfigs; i++) {
NFAConfiguration configuration = (NFAConfiguration)d.nfaConfigurations.get(i);
Integer altI = Utils.integer(configuration.alt);
if ( incompletelyCoveredAlts.contains(altI) &&
configuration.semanticContext == SemanticContext.EMPTY_SEMANTIC_CONTEXT )
{
NFAState s = dfa.nfa.getState(configuration.state);
/*
System.out.print("nondet config w/o context "+configuration+
" incident "+(s.incidentEdgeLabel!=null?s.incidentEdgeLabel.toString(dfa.nfa.grammar):null));
if ( s.associatedASTNode!=null ) {
System.out.print(" token="+s.associatedASTNode.token);
}
else System.out.println();
*/
// We want to report getting to an NFA state with an
// incoming label, unless it's EOF, w/o a predicate.
if ( s.incidentEdgeLabel!=null && s.incidentEdgeLabel.label != Label.EOF ) {
if ( s.associatedASTNode==null || s.associatedASTNode.token==null ) {
ErrorManager.internalError("no AST/token for nonepsilon target w/o predicate");
}
else {
Set<Token> locations = altToLocationsReachableWithoutPredicate.get(altI);
if ( locations==null ) {
locations = new HashSet<Token>();
altToLocationsReachableWithoutPredicate.put(altI, locations);
}
locations.add(s.associatedASTNode.token);
}
}
}
}
dfa.probe.reportIncompletelyCoveredAlts(d,
altToLocationsReachableWithoutPredicate);
}
return altToPredicateContextMap;
}
/** OR together all predicates from the alts. Note that the predicate
* for an alt could itself be a combination of predicates.
*/
protected static SemanticContext getUnionOfPredicates(Map altToPredMap) {
Iterator iter;
SemanticContext unionOfPredicatesFromAllAlts = null;
iter = altToPredMap.values().iterator();
while ( iter.hasNext() ) {
SemanticContext semCtx = (SemanticContext)iter.next();
if ( unionOfPredicatesFromAllAlts==null ) {
unionOfPredicatesFromAllAlts = semCtx;
}
else {
unionOfPredicatesFromAllAlts =
SemanticContext.or(unionOfPredicatesFromAllAlts,semCtx);
}
}
return unionOfPredicatesFromAllAlts;
}
/** for each NFA config in d, look for "predicate required" sign set
* during nondeterminism resolution.
*
* Add the predicate edges sorted by the alternative number; I'm fairly
* sure that I could walk the configs backwards so they are added to
* the predDFATarget in the right order, but it's best to make sure.
* Predicates succeed in the order they are specifed. Alt i wins
* over alt i+1 if both predicates are true.
*/
protected void addPredicateTransitions(DFAState d) {
List configsWithPreds = new ArrayList();
// get a list of all configs with predicates
int numConfigs = d.nfaConfigurations.size();
for (int i = 0; i < numConfigs; i++) {
NFAConfiguration c = (NFAConfiguration)d.nfaConfigurations.get(i);
if ( c.resolveWithPredicate ) {
configsWithPreds.add(c);
}
}
// Sort ascending according to alt; alt i has higher precedence than i+1
Collections.sort(configsWithPreds,
new Comparator() {
public int compare(Object a, Object b) {
NFAConfiguration ca = (NFAConfiguration)a;
NFAConfiguration cb = (NFAConfiguration)b;
if ( ca.alt < cb.alt ) return -1;
else if ( ca.alt > cb.alt ) return 1;
return 0;
}
});
List predConfigsSortedByAlt = configsWithPreds;
// Now, we can add edges emanating from d for these preds in right order
for (int i = 0; i < predConfigsSortedByAlt.size(); i++) {
NFAConfiguration c = (NFAConfiguration)predConfigsSortedByAlt.get(i);
DFAState predDFATarget = d.dfa.getAcceptState(c.alt);
if ( predDFATarget==null ) {
predDFATarget = dfa.newState(); // create if not there.
// create a new DFA state that is a target of the predicate from d
predDFATarget.addNFAConfiguration(dfa.nfa.getState(c.state),
c.alt,
c.context,
c.semanticContext);
predDFATarget.setAcceptState(true);
dfa.setAcceptState(c.alt, predDFATarget);
DFAState existingState = dfa.addState(predDFATarget);
if ( predDFATarget != existingState ) {
// already there...use/return the existing DFA state that
// is a target of this predicate. Make this state number
// point at the existing state
dfa.setState(predDFATarget.stateNumber, existingState);
predDFATarget = existingState;
}
}
// add a transition to pred target from d
d.addTransition(predDFATarget, new PredicateLabel(c.semanticContext));
}
}
protected void initContextTrees(int numberOfAlts) {
contextTrees = new NFAContext[numberOfAlts];
for (int i = 0; i < contextTrees.length; i++) {
int alt = i+1;
// add a dummy root node so that an NFA configuration can
// always point at an NFAContext. If a context refers to this
// node then it implies there is no call stack for
// that configuration
contextTrees[i] = new NFAContext(null, null);
}
}
public static int max(Set s) {
if ( s==null ) {
return Integer.MIN_VALUE;
}
int i = 0;
int m = 0;
for (Iterator it = s.iterator(); it.hasNext();) {
i++;
Integer I = (Integer) it.next();
if ( i==1 ) { // init m with first value
m = I.intValue();
continue;
}
if ( I.intValue()>m ) {
m = I.intValue();
}
}
return m;
}
}