blob: c3f443293f7f4ba5700b458d29705f2e64a44afc [file] [log] [blame]
/*
* [The "BSD license"]
* Copyright (c) 2010 Terence Parr
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.analysis;
import org.antlr.grammar.v3.ANTLRParser;
import org.antlr.misc.IntSet;
import org.antlr.misc.IntervalSet;
import org.antlr.tool.Grammar;
import org.antlr.tool.Rule;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
/**
* Created by IntelliJ IDEA.
* User: parrt
* Date: Dec 31, 2007
* Time: 1:31:16 PM
* To change this template use File | Settings | File Templates.
*/
public class LL1Analyzer {
/** 0 if we hit end of rule and invoker should keep going (epsilon) */
public static final int DETECT_PRED_EOR = 0;
/** 1 if we found a nonautobacktracking pred */
public static final int DETECT_PRED_FOUND = 1;
/** 2 if we didn't find such a pred */
public static final int DETECT_PRED_NOT_FOUND = 2;
public Grammar grammar;
/** Used during LOOK to detect computation cycles */
protected Set<NFAState> lookBusy = new HashSet<NFAState>();
public Map<NFAState, LookaheadSet> FIRSTCache = new HashMap<NFAState, LookaheadSet>();
public Map<Rule, LookaheadSet> FOLLOWCache = new HashMap<Rule, LookaheadSet>();
public LL1Analyzer(Grammar grammar) {
this.grammar = grammar;
}
/*
public void computeRuleFIRSTSets() {
if ( getNumberOfDecisions()==0 ) {
createNFAs();
}
for (Iterator it = getRules().iterator(); it.hasNext();) {
Rule r = (Rule)it.next();
if ( r.isSynPred ) {
continue;
}
LookaheadSet s = FIRST(r);
System.out.println("FIRST("+r.name+")="+s);
}
}
*/
/*
public Set<String> getOverriddenRulesWithDifferentFIRST() {
// walk every rule in this grammar and compare FIRST set with
// those in imported grammars.
Set<String> rules = new HashSet();
for (Iterator it = getRules().iterator(); it.hasNext();) {
Rule r = (Rule)it.next();
//System.out.println(r.name+" FIRST="+r.FIRST);
for (int i = 0; i < delegates.size(); i++) {
Grammar g = delegates.get(i);
Rule importedRule = g.getRule(r.name);
if ( importedRule != null ) { // exists in imported grammar
// System.out.println(r.name+" exists in imported grammar: FIRST="+importedRule.FIRST);
if ( !r.FIRST.equals(importedRule.FIRST) ) {
rules.add(r.name);
}
}
}
}
return rules;
}
public Set<Rule> getImportedRulesSensitiveToOverriddenRulesDueToLOOK() {
Set<String> diffFIRSTs = getOverriddenRulesWithDifferentFIRST();
Set<Rule> rules = new HashSet();
for (Iterator it = diffFIRSTs.iterator(); it.hasNext();) {
String r = (String) it.next();
for (int i = 0; i < delegates.size(); i++) {
Grammar g = delegates.get(i);
Set<Rule> callers = g.ruleSensitivity.get(r);
// somebody invokes rule whose FIRST changed in subgrammar?
if ( callers!=null ) {
rules.addAll(callers);
//System.out.println(g.name+" rules "+callers+" sensitive to "+r+"; dup 'em");
}
}
}
return rules;
}
*/
/*
public LookaheadSet LOOK(Rule r) {
if ( r.FIRST==null ) {
r.FIRST = FIRST(r.startState);
}
return r.FIRST;
}
*/
/** From an NFA state, s, find the set of all labels reachable from s.
* Used to compute follow sets for error recovery. Never computes
* a FOLLOW operation. FIRST stops at end of rules, returning EOR, unless
* invoked from another rule. I.e., routine properly handles
*
* a : b A ;
*
* where b is nullable.
*
* We record with EOR_TOKEN_TYPE if we hit the end of a rule so we can
* know at runtime (when these sets are used) to start walking up the
* follow chain to compute the real, correct follow set (as opposed to
* the FOLLOW, which is a superset).
*
* This routine will only be used on parser and tree parser grammars.
*/
public LookaheadSet FIRST(NFAState s) {
//System.out.println("> FIRST("+s.enclosingRule.name+") in rule "+s.enclosingRule);
lookBusy.clear();
LookaheadSet look = _FIRST(s, false);
//System.out.println("< FIRST("+s.enclosingRule.name+") in rule "+s.enclosingRule+"="+look.toString(this.grammar));
return look;
}
public LookaheadSet FOLLOW(Rule r) {
//System.out.println("> FOLLOW("+r.name+") in rule "+r.startState.enclosingRule);
LookaheadSet f = FOLLOWCache.get(r);
if ( f!=null ) {
return f;
}
f = _FIRST(r.stopState, true);
FOLLOWCache.put(r, f);
//System.out.println("< FOLLOW("+r+") in rule "+r.startState.enclosingRule+"="+f.toString(this.grammar));
return f;
}
public LookaheadSet LOOK(NFAState s) {
if ( NFAToDFAConverter.debug ) {
System.out.println("> LOOK("+s+")");
}
lookBusy.clear();
LookaheadSet look = _FIRST(s, true);
// FOLLOW makes no sense (at the moment!) for lexical rules.
if ( grammar.type!=Grammar.LEXER && look.member(Label.EOR_TOKEN_TYPE) ) {
// avoid altering FIRST reset as it is cached
LookaheadSet f = FOLLOW(s.enclosingRule);
f.orInPlace(look);
f.remove(Label.EOR_TOKEN_TYPE);
look = f;
//look.orInPlace(FOLLOW(s.enclosingRule));
}
else if ( grammar.type==Grammar.LEXER && look.member(Label.EOT) ) {
// if this has EOT, lookahead is all char (all char can follow rule)
//look = new LookaheadSet(Label.EOT);
look = new LookaheadSet(IntervalSet.COMPLETE_SET);
}
if ( NFAToDFAConverter.debug ) {
System.out.println("< LOOK("+s+")="+look.toString(grammar));
}
return look;
}
protected LookaheadSet _FIRST(NFAState s, boolean chaseFollowTransitions) {
/*
System.out.println("_LOOK("+s+") in rule "+s.enclosingRule);
if ( s.transition[0] instanceof RuleClosureTransition ) {
System.out.println("go to rule "+((NFAState)s.transition[0].target).enclosingRule);
}
*/
if ( !chaseFollowTransitions && s.isAcceptState() ) {
if ( grammar.type==Grammar.LEXER ) {
// FOLLOW makes no sense (at the moment!) for lexical rules.
// assume all char can follow
return new LookaheadSet(IntervalSet.COMPLETE_SET);
}
return new LookaheadSet(Label.EOR_TOKEN_TYPE);
}
if ( lookBusy.contains(s) ) {
// return a copy of an empty set; we may modify set inline
return new LookaheadSet();
}
lookBusy.add(s);
Transition transition0 = s.transition[0];
if ( transition0==null ) {
return null;
}
if ( transition0.label.isAtom() ) {
int atom = transition0.label.getAtom();
return new LookaheadSet(atom);
}
if ( transition0.label.isSet() ) {
IntSet sl = transition0.label.getSet();
return new LookaheadSet(sl);
}
// compute FIRST of transition 0
LookaheadSet tset = null;
// if transition 0 is a rule call and we don't want FOLLOW, check cache
if ( !chaseFollowTransitions && transition0 instanceof RuleClosureTransition ) {
tset = FIRSTCache.get((NFAState)transition0.target);
}
// if not in cache, must compute
if ( tset==null ) {
tset = _FIRST((NFAState)transition0.target, chaseFollowTransitions);
// save FIRST cache for transition 0 if rule call
if ( !chaseFollowTransitions && transition0 instanceof RuleClosureTransition ) {
FIRSTCache.put((NFAState)transition0.target, tset);
}
}
LookaheadSet tsetCached = tset; // tset is stored in cache. We can't return the same instance
// did we fall off the end?
if ( grammar.type!=Grammar.LEXER && tset.member(Label.EOR_TOKEN_TYPE) ) {
if ( transition0 instanceof RuleClosureTransition ) {
// we called a rule that found the end of the rule.
// That means the rule is nullable and we need to
// keep looking at what follows the rule ref. E.g.,
// a : b A ; where b is nullable means that LOOK(a)
// should include A.
RuleClosureTransition ruleInvocationTrans =
(RuleClosureTransition)transition0;
// remove the EOR and get what follows
//tset.remove(Label.EOR_TOKEN_TYPE);
NFAState following = (NFAState) ruleInvocationTrans.followState;
LookaheadSet fset = _FIRST(following, chaseFollowTransitions);
fset.orInPlace(tset); // tset cached; or into new set
fset.remove(Label.EOR_TOKEN_TYPE);
tset = fset;
}
}
Transition transition1 = s.transition[1];
if ( transition1!=null ) {
LookaheadSet tset1 =
_FIRST((NFAState)transition1.target, chaseFollowTransitions);
tset1.orInPlace(tset);
tset = tset1;
}
// never return a cached set; clone
return tset==tsetCached ? new LookaheadSet(tset) : tset;
}
/** Is there a non-syn-pred predicate visible from s that is not in
* the rule enclosing s? This accounts for most predicate situations
* and lets ANTLR do a simple LL(1)+pred computation.
*
* TODO: what about gated vs regular preds?
*/
public boolean detectConfoundingPredicates(NFAState s) {
lookBusy.clear();
Rule r = s.enclosingRule;
return _detectConfoundingPredicates(s, r, false) == DETECT_PRED_FOUND;
}
protected int _detectConfoundingPredicates(NFAState s,
Rule enclosingRule,
boolean chaseFollowTransitions)
{
//System.out.println("_detectNonAutobacktrackPredicates("+s+")");
if ( !chaseFollowTransitions && s.isAcceptState() ) {
if ( grammar.type==Grammar.LEXER ) {
// FOLLOW makes no sense (at the moment!) for lexical rules.
// assume all char can follow
return DETECT_PRED_NOT_FOUND;
}
return DETECT_PRED_EOR;
}
if ( lookBusy.contains(s) ) {
// return a copy of an empty set; we may modify set inline
return DETECT_PRED_NOT_FOUND;
}
lookBusy.add(s);
Transition transition0 = s.transition[0];
if ( transition0==null ) {
return DETECT_PRED_NOT_FOUND;
}
if ( !(transition0.label.isSemanticPredicate()||
transition0.label.isEpsilon()) ) {
return DETECT_PRED_NOT_FOUND;
}
if ( transition0.label.isSemanticPredicate() ) {
//System.out.println("pred "+transition0.label);
SemanticContext ctx = transition0.label.getSemanticContext();
SemanticContext.Predicate p = (SemanticContext.Predicate)ctx;
if ( p.predicateAST.getType() != ANTLRParser.BACKTRACK_SEMPRED ) {
return DETECT_PRED_FOUND;
}
}
/*
if ( transition0.label.isSemanticPredicate() ) {
System.out.println("pred "+transition0.label);
SemanticContext ctx = transition0.label.getSemanticContext();
SemanticContext.Predicate p = (SemanticContext.Predicate)ctx;
// if a non-syn-pred found not in enclosingRule, say we found one
if ( p.predicateAST.getType() != ANTLRParser.BACKTRACK_SEMPRED &&
!p.predicateAST.enclosingRuleName.equals(enclosingRule.name) )
{
System.out.println("found pred "+p+" not in "+enclosingRule.name);
return DETECT_PRED_FOUND;
}
}
*/
int result = _detectConfoundingPredicates((NFAState)transition0.target,
enclosingRule,
chaseFollowTransitions);
if ( result == DETECT_PRED_FOUND ) {
return DETECT_PRED_FOUND;
}
if ( result == DETECT_PRED_EOR ) {
if ( transition0 instanceof RuleClosureTransition ) {
// we called a rule that found the end of the rule.
// That means the rule is nullable and we need to
// keep looking at what follows the rule ref. E.g.,
// a : b A ; where b is nullable means that LOOK(a)
// should include A.
RuleClosureTransition ruleInvocationTrans =
(RuleClosureTransition)transition0;
NFAState following = (NFAState) ruleInvocationTrans.followState;
int afterRuleResult =
_detectConfoundingPredicates(following,
enclosingRule,
chaseFollowTransitions);
if ( afterRuleResult == DETECT_PRED_FOUND ) {
return DETECT_PRED_FOUND;
}
}
}
Transition transition1 = s.transition[1];
if ( transition1!=null ) {
int t1Result =
_detectConfoundingPredicates((NFAState)transition1.target,
enclosingRule,
chaseFollowTransitions);
if ( t1Result == DETECT_PRED_FOUND ) {
return DETECT_PRED_FOUND;
}
}
return DETECT_PRED_NOT_FOUND;
}
/** Return predicate expression found via epsilon edges from s. Do
* not look into other rules for now. Do something simple. Include
* backtracking synpreds.
*/
public SemanticContext getPredicates(NFAState altStartState) {
lookBusy.clear();
return _getPredicates(altStartState, altStartState);
}
protected SemanticContext _getPredicates(NFAState s, NFAState altStartState) {
//System.out.println("_getPredicates("+s+")");
if ( s.isAcceptState() ) {
return null;
}
// avoid infinite loops from (..)* etc...
if ( lookBusy.contains(s) ) {
return null;
}
lookBusy.add(s);
Transition transition0 = s.transition[0];
// no transitions
if ( transition0==null ) {
return null;
}
// not a predicate and not even an epsilon
if ( !(transition0.label.isSemanticPredicate()||
transition0.label.isEpsilon()) ) {
return null;
}
SemanticContext p = null;
SemanticContext p0 = null;
SemanticContext p1 = null;
if ( transition0.label.isSemanticPredicate() ) {
//System.out.println("pred "+transition0.label);
p = transition0.label.getSemanticContext();
// ignore backtracking preds not on left edge for this decision
if ( ((SemanticContext.Predicate)p).predicateAST.getType() ==
ANTLRParser.BACKTRACK_SEMPRED &&
s == altStartState.transition[0].target )
{
p = null; // don't count
}
}
// get preds from beyond this state
p0 = _getPredicates((NFAState)transition0.target, altStartState);
// get preds from other transition
Transition transition1 = s.transition[1];
if ( transition1!=null ) {
p1 = _getPredicates((NFAState)transition1.target, altStartState);
}
// join this&following-right|following-down
return SemanticContext.and(p,SemanticContext.or(p0,p1));
}
}