| /* |
| * [The "BSD license"] |
| * Copyright (c) 2010 Terence Parr |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * 3. The name of the author may not be used to endorse or promote products |
| * derived from this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| package org.antlr.analysis; |
| |
| import org.antlr.grammar.v3.ANTLRParser; |
| import org.antlr.misc.IntSet; |
| import org.antlr.misc.IntervalSet; |
| import org.antlr.tool.Grammar; |
| import org.antlr.tool.Rule; |
| |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.Map; |
| import java.util.Set; |
| |
| /** |
| * Created by IntelliJ IDEA. |
| * User: parrt |
| * Date: Dec 31, 2007 |
| * Time: 1:31:16 PM |
| * To change this template use File | Settings | File Templates. |
| */ |
| public class LL1Analyzer { |
| /** 0 if we hit end of rule and invoker should keep going (epsilon) */ |
| public static final int DETECT_PRED_EOR = 0; |
| /** 1 if we found a nonautobacktracking pred */ |
| public static final int DETECT_PRED_FOUND = 1; |
| /** 2 if we didn't find such a pred */ |
| public static final int DETECT_PRED_NOT_FOUND = 2; |
| |
| public Grammar grammar; |
| |
| /** Used during LOOK to detect computation cycles */ |
| protected Set<NFAState> lookBusy = new HashSet<NFAState>(); |
| |
| public Map<NFAState, LookaheadSet> FIRSTCache = new HashMap<NFAState, LookaheadSet>(); |
| public Map<Rule, LookaheadSet> FOLLOWCache = new HashMap<Rule, LookaheadSet>(); |
| |
| public LL1Analyzer(Grammar grammar) { |
| this.grammar = grammar; |
| } |
| |
| /* |
| public void computeRuleFIRSTSets() { |
| if ( getNumberOfDecisions()==0 ) { |
| createNFAs(); |
| } |
| for (Iterator it = getRules().iterator(); it.hasNext();) { |
| Rule r = (Rule)it.next(); |
| if ( r.isSynPred ) { |
| continue; |
| } |
| LookaheadSet s = FIRST(r); |
| System.out.println("FIRST("+r.name+")="+s); |
| } |
| } |
| */ |
| |
| /* |
| public Set<String> getOverriddenRulesWithDifferentFIRST() { |
| // walk every rule in this grammar and compare FIRST set with |
| // those in imported grammars. |
| Set<String> rules = new HashSet(); |
| for (Iterator it = getRules().iterator(); it.hasNext();) { |
| Rule r = (Rule)it.next(); |
| //System.out.println(r.name+" FIRST="+r.FIRST); |
| for (int i = 0; i < delegates.size(); i++) { |
| Grammar g = delegates.get(i); |
| Rule importedRule = g.getRule(r.name); |
| if ( importedRule != null ) { // exists in imported grammar |
| // System.out.println(r.name+" exists in imported grammar: FIRST="+importedRule.FIRST); |
| if ( !r.FIRST.equals(importedRule.FIRST) ) { |
| rules.add(r.name); |
| } |
| } |
| } |
| } |
| return rules; |
| } |
| |
| public Set<Rule> getImportedRulesSensitiveToOverriddenRulesDueToLOOK() { |
| Set<String> diffFIRSTs = getOverriddenRulesWithDifferentFIRST(); |
| Set<Rule> rules = new HashSet(); |
| for (Iterator it = diffFIRSTs.iterator(); it.hasNext();) { |
| String r = (String) it.next(); |
| for (int i = 0; i < delegates.size(); i++) { |
| Grammar g = delegates.get(i); |
| Set<Rule> callers = g.ruleSensitivity.get(r); |
| // somebody invokes rule whose FIRST changed in subgrammar? |
| if ( callers!=null ) { |
| rules.addAll(callers); |
| //System.out.println(g.name+" rules "+callers+" sensitive to "+r+"; dup 'em"); |
| } |
| } |
| } |
| return rules; |
| } |
| */ |
| |
| /* |
| public LookaheadSet LOOK(Rule r) { |
| if ( r.FIRST==null ) { |
| r.FIRST = FIRST(r.startState); |
| } |
| return r.FIRST; |
| } |
| */ |
| |
| /** From an NFA state, s, find the set of all labels reachable from s. |
| * Used to compute follow sets for error recovery. Never computes |
| * a FOLLOW operation. FIRST stops at end of rules, returning EOR, unless |
| * invoked from another rule. I.e., routine properly handles |
| * |
| * a : b A ; |
| * |
| * where b is nullable. |
| * |
| * We record with EOR_TOKEN_TYPE if we hit the end of a rule so we can |
| * know at runtime (when these sets are used) to start walking up the |
| * follow chain to compute the real, correct follow set (as opposed to |
| * the FOLLOW, which is a superset). |
| * |
| * This routine will only be used on parser and tree parser grammars. |
| */ |
| public LookaheadSet FIRST(NFAState s) { |
| //System.out.println("> FIRST("+s.enclosingRule.name+") in rule "+s.enclosingRule); |
| lookBusy.clear(); |
| LookaheadSet look = _FIRST(s, false); |
| //System.out.println("< FIRST("+s.enclosingRule.name+") in rule "+s.enclosingRule+"="+look.toString(this.grammar)); |
| return look; |
| } |
| |
| public LookaheadSet FOLLOW(Rule r) { |
| //System.out.println("> FOLLOW("+r.name+") in rule "+r.startState.enclosingRule); |
| LookaheadSet f = FOLLOWCache.get(r); |
| if ( f!=null ) { |
| return f; |
| } |
| f = _FIRST(r.stopState, true); |
| FOLLOWCache.put(r, f); |
| //System.out.println("< FOLLOW("+r+") in rule "+r.startState.enclosingRule+"="+f.toString(this.grammar)); |
| return f; |
| } |
| |
| public LookaheadSet LOOK(NFAState s) { |
| if ( NFAToDFAConverter.debug ) { |
| System.out.println("> LOOK("+s+")"); |
| } |
| lookBusy.clear(); |
| LookaheadSet look = _FIRST(s, true); |
| // FOLLOW makes no sense (at the moment!) for lexical rules. |
| if ( grammar.type!=Grammar.LEXER && look.member(Label.EOR_TOKEN_TYPE) ) { |
| // avoid altering FIRST reset as it is cached |
| LookaheadSet f = FOLLOW(s.enclosingRule); |
| f.orInPlace(look); |
| f.remove(Label.EOR_TOKEN_TYPE); |
| look = f; |
| //look.orInPlace(FOLLOW(s.enclosingRule)); |
| } |
| else if ( grammar.type==Grammar.LEXER && look.member(Label.EOT) ) { |
| // if this has EOT, lookahead is all char (all char can follow rule) |
| //look = new LookaheadSet(Label.EOT); |
| look = new LookaheadSet(IntervalSet.COMPLETE_SET); |
| } |
| if ( NFAToDFAConverter.debug ) { |
| System.out.println("< LOOK("+s+")="+look.toString(grammar)); |
| } |
| return look; |
| } |
| |
| protected LookaheadSet _FIRST(NFAState s, boolean chaseFollowTransitions) { |
| /* |
| System.out.println("_LOOK("+s+") in rule "+s.enclosingRule); |
| if ( s.transition[0] instanceof RuleClosureTransition ) { |
| System.out.println("go to rule "+((NFAState)s.transition[0].target).enclosingRule); |
| } |
| */ |
| if ( !chaseFollowTransitions && s.isAcceptState() ) { |
| if ( grammar.type==Grammar.LEXER ) { |
| // FOLLOW makes no sense (at the moment!) for lexical rules. |
| // assume all char can follow |
| return new LookaheadSet(IntervalSet.COMPLETE_SET); |
| } |
| return new LookaheadSet(Label.EOR_TOKEN_TYPE); |
| } |
| |
| if ( lookBusy.contains(s) ) { |
| // return a copy of an empty set; we may modify set inline |
| return new LookaheadSet(); |
| } |
| lookBusy.add(s); |
| |
| Transition transition0 = s.transition[0]; |
| if ( transition0==null ) { |
| return null; |
| } |
| |
| if ( transition0.label.isAtom() ) { |
| int atom = transition0.label.getAtom(); |
| return new LookaheadSet(atom); |
| } |
| if ( transition0.label.isSet() ) { |
| IntSet sl = transition0.label.getSet(); |
| return new LookaheadSet(sl); |
| } |
| |
| // compute FIRST of transition 0 |
| LookaheadSet tset = null; |
| // if transition 0 is a rule call and we don't want FOLLOW, check cache |
| if ( !chaseFollowTransitions && transition0 instanceof RuleClosureTransition ) { |
| tset = FIRSTCache.get((NFAState)transition0.target); |
| } |
| |
| // if not in cache, must compute |
| if ( tset==null ) { |
| tset = _FIRST((NFAState)transition0.target, chaseFollowTransitions); |
| // save FIRST cache for transition 0 if rule call |
| if ( !chaseFollowTransitions && transition0 instanceof RuleClosureTransition ) { |
| FIRSTCache.put((NFAState)transition0.target, tset); |
| } |
| } |
| |
| LookaheadSet tsetCached = tset; // tset is stored in cache. We can't return the same instance |
| |
| // did we fall off the end? |
| if ( grammar.type!=Grammar.LEXER && tset.member(Label.EOR_TOKEN_TYPE) ) { |
| if ( transition0 instanceof RuleClosureTransition ) { |
| // we called a rule that found the end of the rule. |
| // That means the rule is nullable and we need to |
| // keep looking at what follows the rule ref. E.g., |
| // a : b A ; where b is nullable means that LOOK(a) |
| // should include A. |
| RuleClosureTransition ruleInvocationTrans = |
| (RuleClosureTransition)transition0; |
| // remove the EOR and get what follows |
| //tset.remove(Label.EOR_TOKEN_TYPE); |
| NFAState following = (NFAState) ruleInvocationTrans.followState; |
| LookaheadSet fset = _FIRST(following, chaseFollowTransitions); |
| fset.orInPlace(tset); // tset cached; or into new set |
| fset.remove(Label.EOR_TOKEN_TYPE); |
| tset = fset; |
| } |
| } |
| |
| Transition transition1 = s.transition[1]; |
| if ( transition1!=null ) { |
| LookaheadSet tset1 = |
| _FIRST((NFAState)transition1.target, chaseFollowTransitions); |
| tset1.orInPlace(tset); |
| tset = tset1; |
| } |
| |
| // never return a cached set; clone |
| return tset==tsetCached ? new LookaheadSet(tset) : tset; |
| } |
| |
| /** Is there a non-syn-pred predicate visible from s that is not in |
| * the rule enclosing s? This accounts for most predicate situations |
| * and lets ANTLR do a simple LL(1)+pred computation. |
| * |
| * TODO: what about gated vs regular preds? |
| */ |
| public boolean detectConfoundingPredicates(NFAState s) { |
| lookBusy.clear(); |
| Rule r = s.enclosingRule; |
| return _detectConfoundingPredicates(s, r, false) == DETECT_PRED_FOUND; |
| } |
| |
| protected int _detectConfoundingPredicates(NFAState s, |
| Rule enclosingRule, |
| boolean chaseFollowTransitions) |
| { |
| //System.out.println("_detectNonAutobacktrackPredicates("+s+")"); |
| if ( !chaseFollowTransitions && s.isAcceptState() ) { |
| if ( grammar.type==Grammar.LEXER ) { |
| // FOLLOW makes no sense (at the moment!) for lexical rules. |
| // assume all char can follow |
| return DETECT_PRED_NOT_FOUND; |
| } |
| return DETECT_PRED_EOR; |
| } |
| |
| if ( lookBusy.contains(s) ) { |
| // return a copy of an empty set; we may modify set inline |
| return DETECT_PRED_NOT_FOUND; |
| } |
| lookBusy.add(s); |
| |
| Transition transition0 = s.transition[0]; |
| if ( transition0==null ) { |
| return DETECT_PRED_NOT_FOUND; |
| } |
| |
| if ( !(transition0.label.isSemanticPredicate()|| |
| transition0.label.isEpsilon()) ) { |
| return DETECT_PRED_NOT_FOUND; |
| } |
| |
| if ( transition0.label.isSemanticPredicate() ) { |
| //System.out.println("pred "+transition0.label); |
| SemanticContext ctx = transition0.label.getSemanticContext(); |
| SemanticContext.Predicate p = (SemanticContext.Predicate)ctx; |
| if ( p.predicateAST.getType() != ANTLRParser.BACKTRACK_SEMPRED ) { |
| return DETECT_PRED_FOUND; |
| } |
| } |
| |
| /* |
| if ( transition0.label.isSemanticPredicate() ) { |
| System.out.println("pred "+transition0.label); |
| SemanticContext ctx = transition0.label.getSemanticContext(); |
| SemanticContext.Predicate p = (SemanticContext.Predicate)ctx; |
| // if a non-syn-pred found not in enclosingRule, say we found one |
| if ( p.predicateAST.getType() != ANTLRParser.BACKTRACK_SEMPRED && |
| !p.predicateAST.enclosingRuleName.equals(enclosingRule.name) ) |
| { |
| System.out.println("found pred "+p+" not in "+enclosingRule.name); |
| return DETECT_PRED_FOUND; |
| } |
| } |
| */ |
| |
| int result = _detectConfoundingPredicates((NFAState)transition0.target, |
| enclosingRule, |
| chaseFollowTransitions); |
| if ( result == DETECT_PRED_FOUND ) { |
| return DETECT_PRED_FOUND; |
| } |
| |
| if ( result == DETECT_PRED_EOR ) { |
| if ( transition0 instanceof RuleClosureTransition ) { |
| // we called a rule that found the end of the rule. |
| // That means the rule is nullable and we need to |
| // keep looking at what follows the rule ref. E.g., |
| // a : b A ; where b is nullable means that LOOK(a) |
| // should include A. |
| RuleClosureTransition ruleInvocationTrans = |
| (RuleClosureTransition)transition0; |
| NFAState following = (NFAState) ruleInvocationTrans.followState; |
| int afterRuleResult = |
| _detectConfoundingPredicates(following, |
| enclosingRule, |
| chaseFollowTransitions); |
| if ( afterRuleResult == DETECT_PRED_FOUND ) { |
| return DETECT_PRED_FOUND; |
| } |
| } |
| } |
| |
| Transition transition1 = s.transition[1]; |
| if ( transition1!=null ) { |
| int t1Result = |
| _detectConfoundingPredicates((NFAState)transition1.target, |
| enclosingRule, |
| chaseFollowTransitions); |
| if ( t1Result == DETECT_PRED_FOUND ) { |
| return DETECT_PRED_FOUND; |
| } |
| } |
| |
| return DETECT_PRED_NOT_FOUND; |
| } |
| |
| /** Return predicate expression found via epsilon edges from s. Do |
| * not look into other rules for now. Do something simple. Include |
| * backtracking synpreds. |
| */ |
| public SemanticContext getPredicates(NFAState altStartState) { |
| lookBusy.clear(); |
| return _getPredicates(altStartState, altStartState); |
| } |
| |
| protected SemanticContext _getPredicates(NFAState s, NFAState altStartState) { |
| //System.out.println("_getPredicates("+s+")"); |
| if ( s.isAcceptState() ) { |
| return null; |
| } |
| |
| // avoid infinite loops from (..)* etc... |
| if ( lookBusy.contains(s) ) { |
| return null; |
| } |
| lookBusy.add(s); |
| |
| Transition transition0 = s.transition[0]; |
| // no transitions |
| if ( transition0==null ) { |
| return null; |
| } |
| |
| // not a predicate and not even an epsilon |
| if ( !(transition0.label.isSemanticPredicate()|| |
| transition0.label.isEpsilon()) ) { |
| return null; |
| } |
| |
| SemanticContext p = null; |
| SemanticContext p0 = null; |
| SemanticContext p1 = null; |
| if ( transition0.label.isSemanticPredicate() ) { |
| //System.out.println("pred "+transition0.label); |
| p = transition0.label.getSemanticContext(); |
| // ignore backtracking preds not on left edge for this decision |
| if ( ((SemanticContext.Predicate)p).predicateAST.getType() == |
| ANTLRParser.BACKTRACK_SEMPRED && |
| s == altStartState.transition[0].target ) |
| { |
| p = null; // don't count |
| } |
| } |
| |
| // get preds from beyond this state |
| p0 = _getPredicates((NFAState)transition0.target, altStartState); |
| |
| // get preds from other transition |
| Transition transition1 = s.transition[1]; |
| if ( transition1!=null ) { |
| p1 = _getPredicates((NFAState)transition1.target, altStartState); |
| } |
| |
| // join this&following-right|following-down |
| return SemanticContext.and(p,SemanticContext.or(p0,p1)); |
| } |
| } |