blob: f56ec42dc348824dd499f4cf8390dbe1051e7bb5 [file] [log] [blame]
/*
* [The "BSD license"]
* Copyright (c) 2010 Terence Parr
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.analysis;
/** A tree node for tracking the call chains for NFAs that invoke
* other NFAs. These trees only have to point upwards to their parents
* so we can walk back up the tree (i.e., pop stuff off the stack). We
* never walk from stack down down through the children.
*
* Each alt predicted in a decision has its own context tree,
* representing all possible return nodes. The initial stack has
* EOF ("$") in it. So, for m alternative productions, the lookahead
* DFA will have m NFAContext trees.
*
* To "push" a new context, just do "new NFAContext(context-parent, state)"
* which will add itself to the parent. The root is NFAContext(null, null).
*
* The complete context for an NFA configuration is the set of invoking states
* on the path from this node thru the parent pointers to the root.
*/
public class NFAContext {
/** This is similar to Bermudez's m constant in his LAR(m) where
* you bound the stack so your states don't explode. The main difference
* is that I bound only recursion on the stack, not the simple stack size.
* This looser constraint will let the conversion roam further to find
* lookahead to resolve a decision.
*
* Bermudez's m operates differently as it is his LR stack depth
* I'm pretty sure it therefore includes all stack symbols. Here I
* restrict the size of an NFA configuration to be finite because a
* stack component may mention the same NFA invocation state at
* most m times. Hence, the number of DFA states will not grow forever.
* With recursive rules like
*
* e : '(' e ')' | INT ;
*
* you could chase your tail forever if somebody said "s : e '.' | e ';' ;"
* This constant prevents new states from being created after a stack gets
* "too big". Actually (12/14/2007) I realize that this example is
* trapped by the non-LL(*) detector for recursion in > 1 alt. Here is
* an example that trips stack overflow:
*
* s : a Y | A A A A A X ; // force recursion past m=4
* a : A a | Q;
*
* If that were:
*
* s : a Y | A+ X ;
*
* it could loop forever.
*
* Imagine doing a depth-first search on the e DFA...as you chase an input
* sequence you can recurse to same rule such as e above. You'd have a
* chain of ((((. When you get do some point, you have to give up. The
* states in the chain will have longer and longer NFA config stacks.
* Must limit size.
*
* max=0 implies you cannot ever jump to another rule during closure.
* max=1 implies you can make as many calls as you want--you just
* can't ever visit a state that is on your rule invocation stack.
* I.e., you cannot ever recurse.
* max=2 implies you are able to recurse once (i.e., call a rule twice
* from the same place).
*
* This tracks recursion to a rule specific to an invocation site!
* It does not detect multiple calls to a rule from different rule
* invocation states. We are guaranteed to terminate because the
* stack can only grow as big as the number of NFA states * max.
*
* I noticed that the Java grammar didn't work with max=1, but did with
* max=4. Let's set to 4. Recursion is sometimes needed to resolve some
* fixed lookahead decisions.
*/
public static int MAX_SAME_RULE_INVOCATIONS_PER_NFA_CONFIG_STACK = 4;
public NFAContext parent;
/** The NFA state that invoked another rule's start state is recorded
* on the rule invocation context stack.
*/
public NFAState invokingState;
/** Computing the hashCode is very expensive and closureBusy()
* uses it to track when it's seen a state|ctx before to avoid
* infinite loops. As we add new contexts, record the hash code
* as this.invokingState + parent.cachedHashCode. Avoids walking
* up the tree for every hashCode(). Note that this caching works
* because a context is a monotonically growing tree of context nodes
* and nothing on the stack is ever modified...ctx just grows
* or shrinks.
*/
protected int cachedHashCode;
public NFAContext(NFAContext parent, NFAState invokingState) {
this.parent = parent;
this.invokingState = invokingState;
if ( invokingState!=null ) {
this.cachedHashCode = invokingState.stateNumber;
}
if ( parent!=null ) {
this.cachedHashCode += parent.cachedHashCode;
}
}
/** Two contexts are equals() if both have
* same call stack; walk upwards to the root.
* Recall that the root sentinel node has no invokingStates and no parent.
* Note that you may be comparing contexts in different alt trees.
*
* The hashCode is now cheap as it's computed once upon each context
* push on the stack. Use it to make equals() more efficient.
*/
public boolean equals(Object o) {
NFAContext other = ((NFAContext)o);
if ( this.cachedHashCode != other.cachedHashCode ) {
return false; // can't be same if hash is different
}
if ( this==other ) {
return true;
}
// System.out.println("comparing "+this+" with "+other);
NFAContext sp = this;
while ( sp.parent!=null && other.parent!=null ) {
if ( sp.invokingState != other.invokingState ) {
return false;
}
sp = sp.parent;
other = other.parent;
}
if ( !(sp.parent==null && other.parent==null) ) {
return false; // both pointers must be at their roots after walk
}
return true;
}
/** Two contexts conflict() if they are equals() or one is a stack suffix
* of the other. For example, contexts [21 12 $] and [21 9 $] do not
* conflict, but [21 $] and [21 12 $] do conflict. Note that I should
* probably not show the $ in this case. There is a dummy node for each
* stack that just means empty; $ is a marker that's all.
*
* This is used in relation to checking conflicts associated with a
* single NFA state's configurations within a single DFA state.
* If there are configurations s and t within a DFA state such that
* s.state=t.state && s.alt != t.alt && s.ctx conflicts t.ctx then
* the DFA state predicts more than a single alt--it's nondeterministic.
* Two contexts conflict if they are the same or if one is a suffix
* of the other.
*
* When comparing contexts, if one context has a stack and the other
* does not then they should be considered the same context. The only
* way for an NFA state p to have an empty context and a nonempty context
* is the case when closure falls off end of rule without a call stack
* and re-enters the rule with a context. This resolves the issue I
* discussed with Sriram Srinivasan Feb 28, 2005 about not terminating
* fast enough upon nondeterminism.
*/
public boolean conflictsWith(NFAContext other) {
return this.suffix(other); // || this.equals(other);
}
/** [$] suffix any context
* [21 $] suffix [21 12 $]
* [21 12 $] suffix [21 $]
* [21 18 $] suffix [21 18 12 9 $]
* [21 18 12 9 $] suffix [21 18 $]
* [21 12 $] not suffix [21 9 $]
*
* Example "[21 $] suffix [21 12 $]" means: rule r invoked current rule
* from state 21. Rule s invoked rule r from state 12 which then invoked
* current rule also via state 21. While the context prior to state 21
* is different, the fact that both contexts emanate from state 21 implies
* that they are now going to track perfectly together. Once they
* converged on state 21, there is no way they can separate. In other
* words, the prior stack state is not consulted when computing where to
* go in the closure operation. ?$ and ??$ are considered the same stack.
* If ? is popped off then $ and ?$ remain; they are now an empty and
* nonempty context comparison. So, if one stack is a suffix of
* another, then it will still degenerate to the simple empty stack
* comparison case.
*/
protected boolean suffix(NFAContext other) {
NFAContext sp = this;
// if one of the contexts is empty, it never enters loop and returns true
while ( sp.parent!=null && other.parent!=null ) {
if ( sp.invokingState != other.invokingState ) {
return false;
}
sp = sp.parent;
other = other.parent;
}
//System.out.println("suffix");
return true;
}
/** Walk upwards to the root of the call stack context looking
* for a particular invoking state.
public boolean contains(int state) {
NFAContext sp = this;
int n = 0; // track recursive invocations of state
System.out.println("this.context is "+sp);
while ( sp.parent!=null ) {
if ( sp.invokingState.stateNumber == state ) {
return true;
}
sp = sp.parent;
}
return false;
}
*/
/** Given an NFA state number, how many times has the NFA-to-DFA
* conversion pushed that state on the stack? In other words,
* the NFA state must be a rule invocation state and this method
* tells you how many times you've been to this state. If none,
* then you have not called the target rule from this state before
* (though another NFA state could have called that target rule).
* If n=1, then you've been to this state before during this
* DFA construction and are going to invoke that rule again.
*
* Note that many NFA states can invoke rule r, but we ignore recursion
* unless you hit the same rule invocation state again.
*/
public int recursionDepthEmanatingFromState(int state) {
NFAContext sp = this;
int n = 0; // track recursive invocations of target from this state
//System.out.println("this.context is "+sp);
while ( sp.parent!=null ) {
if ( sp.invokingState.stateNumber == state ) {
n++;
}
sp = sp.parent;
}
return n;
}
public int hashCode() {
return cachedHashCode;
/*
int h = 0;
NFAContext sp = this;
while ( sp.parent!=null ) {
h += sp.invokingState.getStateNumber();
sp = sp.parent;
}
return h;
*/
}
/** A context is empty if there is no parent; meaning nobody pushed
* anything on the call stack.
*/
public boolean isEmpty() {
return parent==null;
}
public String toString() {
StringBuffer buf = new StringBuffer();
NFAContext sp = this;
buf.append("[");
while ( sp.parent!=null ) {
buf.append(sp.invokingState.stateNumber);
buf.append(" ");
sp = sp.parent;
}
buf.append("$]");
return buf.toString();
}
}