| /* |
| * [The "BSD license"] |
| * Copyright (c) 2010 Terence Parr |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * 3. The name of the author may not be used to endorse or promote products |
| * derived from this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */package org.antlr.tool; |
| |
| import org.antlr.analysis.Label; |
| import org.antlr.analysis.NFAState; |
| import org.antlr.grammar.v3.ANTLRParser; |
| import org.antlr.grammar.v3.AssignTokenTypesWalker; |
| import org.antlr.misc.Utils; |
| import org.antlr.runtime.RecognitionException; |
| import org.antlr.runtime.tree.CommonTreeNodeStream; |
| |
| import java.util.*; |
| |
| /** A tree of component (delegate) grammars. |
| * |
| * Rules defined in delegates are "inherited" like multi-inheritance |
| * so you can override them. All token types must be consistent across |
| * rules from all delegate grammars, so they must be stored here in one |
| * central place. |
| * |
| * We have to start out assuming a composite grammar situation as we can't |
| * look into the grammar files a priori to see if there is a delegate |
| * statement. Because of this, and to avoid duplicating token type tracking |
| * in each grammar, even single noncomposite grammars use one of these objects |
| * to track token types. |
| */ |
| public class CompositeGrammar { |
| public static final int MIN_RULE_INDEX = 1; |
| |
| public CompositeGrammarTree delegateGrammarTreeRoot; |
| |
| /** Used during getRuleReferenceClosure to detect computation cycles */ |
| protected Set<NFAState> refClosureBusy = new HashSet<NFAState>(); |
| |
| /** Used to assign state numbers; all grammars in composite share common |
| * NFA space. This NFA tracks state numbers number to state mapping. |
| */ |
| public int stateCounter = 0; |
| |
| /** The NFA states in the NFA built from rules across grammars in composite. |
| * Maps state number to NFAState object. |
| * This is a Vector instead of a List because I need to be able to grow |
| * this properly. After talking to Josh Bloch, Collections guy at Sun, |
| * I decided this was easiest solution. |
| */ |
| protected Vector<NFAState> numberToStateList = new Vector<NFAState>(1000); |
| |
| /** Token names and literal tokens like "void" are uniquely indexed. |
| * with -1 implying EOF. Characters are different; they go from |
| * -1 (EOF) to \uFFFE. For example, 0 could be a binary byte you |
| * want to lexer. Labels of DFA/NFA transitions can be both tokens |
| * and characters. I use negative numbers for bookkeeping labels |
| * like EPSILON. Char/String literals and token types overlap in the same |
| * space, however. |
| */ |
| protected int maxTokenType = Label.MIN_TOKEN_TYPE-1; |
| |
| /** Map token like ID (but not literals like "while") to its token type */ |
| public Map tokenIDToTypeMap = new LinkedHashMap(); |
| |
| /** Map token literals like "while" to its token type. It may be that |
| * WHILE="while"=35, in which case both tokenIDToTypeMap and this |
| * field will have entries both mapped to 35. |
| */ |
| public Map<String, Integer> stringLiteralToTypeMap = new LinkedHashMap<String, Integer>(); |
| /** Reverse index for stringLiteralToTypeMap */ |
| public Vector<String> typeToStringLiteralList = new Vector<String>(); |
| |
| /** Map a token type to its token name. |
| * Must subtract MIN_TOKEN_TYPE from index. |
| */ |
| public Vector<String> typeToTokenList = new Vector<String>(); |
| |
| /** If combined or lexer grammar, track the rules. |
| * Track lexer rules so we can warn about undefined tokens. |
| * This is combined set of lexer rules from all lexer grammars |
| * seen in all imports. |
| */ |
| protected Set<String> lexerRules = new HashSet<String>(); |
| |
| /** Rules are uniquely labeled from 1..n among all grammars */ |
| protected int ruleIndex = MIN_RULE_INDEX; |
| |
| /** Map a rule index to its name; use a Vector on purpose as new |
| * collections stuff won't let me setSize and make it grow. :( |
| * I need a specific guaranteed index, which the Collections stuff |
| * won't let me have. |
| */ |
| protected Vector<Rule> ruleIndexToRuleList = new Vector<Rule>(); |
| |
| public boolean watchNFAConversion = false; |
| |
| protected void initTokenSymbolTables() { |
| // the faux token types take first NUM_FAUX_LABELS positions |
| // then we must have room for the predefined runtime token types |
| // like DOWN/UP used for tree parsing. |
| typeToTokenList.setSize(Label.NUM_FAUX_LABELS+Label.MIN_TOKEN_TYPE-1); |
| typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.INVALID, "<INVALID>"); |
| typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOT, "<EOT>"); |
| typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.SEMPRED, "<SEMPRED>"); |
| typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.SET, "<SET>"); |
| typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EPSILON, Label.EPSILON_STR); |
| typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOF, "EOF"); |
| typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOR_TOKEN_TYPE-1, "<EOR>"); |
| typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.DOWN-1, "DOWN"); |
| typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.UP-1, "UP"); |
| tokenIDToTypeMap.put("<INVALID>", Utils.integer(Label.INVALID)); |
| tokenIDToTypeMap.put("<EOT>", Utils.integer(Label.EOT)); |
| tokenIDToTypeMap.put("<SEMPRED>", Utils.integer(Label.SEMPRED)); |
| tokenIDToTypeMap.put("<SET>", Utils.integer(Label.SET)); |
| tokenIDToTypeMap.put("<EPSILON>", Utils.integer(Label.EPSILON)); |
| tokenIDToTypeMap.put("EOF", Utils.integer(Label.EOF)); |
| tokenIDToTypeMap.put("<EOR>", Utils.integer(Label.EOR_TOKEN_TYPE)); |
| tokenIDToTypeMap.put("DOWN", Utils.integer(Label.DOWN)); |
| tokenIDToTypeMap.put("UP", Utils.integer(Label.UP)); |
| } |
| |
| public CompositeGrammar() { |
| initTokenSymbolTables(); |
| } |
| |
| public CompositeGrammar(Grammar g) { |
| this(); |
| setDelegationRoot(g); |
| } |
| |
| public void setDelegationRoot(Grammar root) { |
| delegateGrammarTreeRoot = new CompositeGrammarTree(root); |
| root.compositeTreeNode = delegateGrammarTreeRoot; |
| } |
| |
| public Rule getRule(String ruleName) { |
| return delegateGrammarTreeRoot.getRule(ruleName); |
| } |
| |
| public Object getOption(String key) { |
| return delegateGrammarTreeRoot.getOption(key); |
| } |
| |
| /** Add delegate grammar as child of delegator */ |
| public void addGrammar(Grammar delegator, Grammar delegate) { |
| if ( delegator.compositeTreeNode==null ) { |
| delegator.compositeTreeNode = new CompositeGrammarTree(delegator); |
| } |
| delegator.compositeTreeNode.addChild(new CompositeGrammarTree(delegate)); |
| |
| /*// find delegator in tree so we can add a child to it |
| CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(delegator); |
| t.addChild(); |
| */ |
| // make sure new grammar shares this composite |
| delegate.composite = this; |
| } |
| |
| /** Get parent of this grammar */ |
| public Grammar getDelegator(Grammar g) { |
| CompositeGrammarTree me = delegateGrammarTreeRoot.findNode(g); |
| if ( me==null ) { |
| return null; // not found |
| } |
| if ( me.parent!=null ) { |
| return me.parent.grammar; |
| } |
| return null; |
| } |
| |
| /** Get list of all delegates from all grammars in the delegate subtree of g. |
| * The grammars are in delegation tree preorder. Don't include g itself |
| * in list as it is not a delegate of itself. |
| */ |
| public List<Grammar> getDelegates(Grammar g) { |
| CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(g); |
| if ( t==null ) { |
| return null; // no delegates |
| } |
| List<Grammar> grammars = t.getPostOrderedGrammarList(); |
| grammars.remove(grammars.size()-1); // remove g (last one) |
| return grammars; |
| } |
| |
| public List<Grammar> getDirectDelegates(Grammar g) { |
| CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(g); |
| List<CompositeGrammarTree> children = t.children; |
| if ( children==null ) { |
| return null; |
| } |
| List<Grammar> grammars = new ArrayList(); |
| for (int i = 0; children!=null && i < children.size(); i++) { |
| CompositeGrammarTree child = (CompositeGrammarTree) children.get(i); |
| grammars.add(child.grammar); |
| } |
| return grammars; |
| } |
| |
| /** Get delegates below direct delegates of g */ |
| public List<Grammar> getIndirectDelegates(Grammar g) { |
| List<Grammar> direct = getDirectDelegates(g); |
| List<Grammar> delegates = getDelegates(g); |
| delegates.removeAll(direct); |
| return delegates; |
| } |
| |
| /** Return list of delegate grammars from root down to g. |
| * Order is root, ..., g.parent. (g not included). |
| */ |
| public List<Grammar> getDelegators(Grammar g) { |
| if ( g==delegateGrammarTreeRoot.grammar ) { |
| return null; |
| } |
| List<Grammar> grammars = new ArrayList(); |
| CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(g); |
| // walk backwards to root, collecting grammars |
| CompositeGrammarTree p = t.parent; |
| while ( p!=null ) { |
| grammars.add(0, p.grammar); // add to head so in order later |
| p = p.parent; |
| } |
| return grammars; |
| } |
| |
| /** Get set of rules for grammar g that need to have manual delegation |
| * methods. This is the list of rules collected from all direct/indirect |
| * delegates minus rules overridden in grammar g. |
| * |
| * This returns null except for the delegate root because it is the only |
| * one that has to have a complete grammar rule interface. The delegates |
| * should not be instantiated directly for use as parsers (you can create |
| * them to pass to the root parser's ctor as arguments). |
| */ |
| public Set<Rule> getDelegatedRules(Grammar g) { |
| if ( g!=delegateGrammarTreeRoot.grammar ) { |
| return null; |
| } |
| Set<Rule> rules = getAllImportedRules(g); |
| for (Iterator it = rules.iterator(); it.hasNext();) { |
| Rule r = (Rule) it.next(); |
| Rule localRule = g.getLocallyDefinedRule(r.name); |
| // if locally defined or it's not local but synpred, don't make |
| // a delegation method |
| if ( localRule!=null || r.isSynPred ) { |
| it.remove(); // kill overridden rules |
| } |
| } |
| return rules; |
| } |
| |
| /** Get all rule definitions from all direct/indirect delegate grammars |
| * of g. |
| */ |
| public Set<Rule> getAllImportedRules(Grammar g) { |
| Set<String> ruleNames = new HashSet(); |
| Set<Rule> rules = new HashSet(); |
| CompositeGrammarTree subtreeRoot = delegateGrammarTreeRoot.findNode(g); |
| |
| List<Grammar> grammars = subtreeRoot.getPreOrderedGrammarList(); |
| // walk all grammars preorder, priority given to grammar listed first. |
| for (int i = 0; i < grammars.size(); i++) { |
| Grammar delegate = (org.antlr.tool.Grammar) grammars.get(i); |
| // for each rule in delegate, add to rules if no rule with that |
| // name as been seen. (can't use removeAll; wrong hashcode/equals on Rule) |
| for (Iterator it = delegate.getRules().iterator(); it.hasNext();) { |
| Rule r = (Rule)it.next(); |
| if ( !ruleNames.contains(r.name) ) { |
| ruleNames.add(r.name); // track that we've seen this |
| rules.add(r); |
| } |
| } |
| } |
| return rules; |
| } |
| |
| public Grammar getRootGrammar() { |
| if ( delegateGrammarTreeRoot==null ) { |
| return null; |
| } |
| return delegateGrammarTreeRoot.grammar; |
| } |
| |
| public Grammar getGrammar(String grammarName) { |
| CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(grammarName); |
| if ( t!=null ) { |
| return t.grammar; |
| } |
| return null; |
| } |
| |
| // NFA spans multiple grammars, must handle here |
| |
| public int getNewNFAStateNumber() { |
| return stateCounter++; |
| } |
| |
| public void addState(NFAState state) { |
| numberToStateList.setSize(state.stateNumber+1); // make sure we have room |
| numberToStateList.set(state.stateNumber, state); |
| } |
| |
| public NFAState getState(int s) { |
| return (NFAState)numberToStateList.get(s); |
| } |
| |
| public void assignTokenTypes() throws RecognitionException { |
| // ASSIGN TOKEN TYPES for all delegates (same walker) |
| //System.out.println("### assign types"); |
| AssignTokenTypesWalker ttypesWalker = new AssignTokenTypesBehavior(); |
| List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList(); |
| for (int i = 0; grammars!=null && i < grammars.size(); i++) { |
| Grammar g = (Grammar)grammars.get(i); |
| ttypesWalker.setTreeNodeStream(new CommonTreeNodeStream(g.getGrammarTree())); |
| try { |
| //System.out.println(" walking "+g.name); |
| ttypesWalker.grammar_(g); |
| } |
| catch (RecognitionException re) { |
| ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, |
| re); |
| } |
| } |
| // the walker has filled literals, tokens, and alias tables. |
| // now tell it to define them in the root grammar |
| ttypesWalker.defineTokens(delegateGrammarTreeRoot.grammar); |
| } |
| |
| public void translateLeftRecursiveRules() { |
| List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList(); |
| for (int i = 0; grammars!=null && i < grammars.size(); i++) { |
| Grammar g = grammars.get(i); |
| if ( !(g.type==Grammar.PARSER || g.type==Grammar.COMBINED) ) continue; |
| for (GrammarAST r : g.grammarTree.findAllType(ANTLRParser.RULE)) { |
| if ( !Character.isUpperCase(r.getChild(0).getText().charAt(0)) ) { |
| if ( LeftRecursiveRuleAnalyzer.hasImmediateRecursiveRuleRefs(r, r.enclosingRuleName) ) { |
| g.translateLeftRecursiveRule(r); |
| } |
| } |
| } |
| } |
| } |
| |
| public void defineGrammarSymbols() { |
| delegateGrammarTreeRoot.trimLexerImportsIntoCombined(); |
| List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList(); |
| for (int i = 0; grammars!=null && i < grammars.size(); i++) { |
| Grammar g = grammars.get(i); |
| g.defineGrammarSymbols(); |
| } |
| for (int i = 0; grammars!=null && i < grammars.size(); i++) { |
| Grammar g = grammars.get(i); |
| g.checkNameSpaceAndActions(); |
| } |
| minimizeRuleSet(); |
| } |
| |
| public void createNFAs() { |
| if ( ErrorManager.doNotAttemptAnalysis() ) { |
| return; |
| } |
| List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList(); |
| //System.out.println("### createNFAs for composite; grammars: "+names); |
| for (int i = 0; grammars!=null && i < grammars.size(); i++) { |
| Grammar g = (Grammar)grammars.get(i); |
| g.createRuleStartAndStopNFAStates(); |
| } |
| for (int i = 0; grammars!=null && i < grammars.size(); i++) { |
| Grammar g = (Grammar)grammars.get(i); |
| g.buildNFA(); |
| } |
| } |
| |
| public void minimizeRuleSet() { |
| Set<String> ruleDefs = new HashSet<String>(); |
| _minimizeRuleSet(ruleDefs, delegateGrammarTreeRoot); |
| } |
| |
| public void _minimizeRuleSet(Set<String> ruleDefs, |
| CompositeGrammarTree p) { |
| Set<String> localRuleDefs = new HashSet<String>(); |
| Set<String> overrides = new HashSet<String>(); |
| // compute set of non-overridden rules for this delegate |
| for (Rule r : p.grammar.getRules()) { |
| if ( !ruleDefs.contains(r.name) ) { |
| localRuleDefs.add(r.name); |
| } |
| else if ( !r.name.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ) { |
| // record any overridden rule 'cept tokens rule |
| overrides.add(r.name); |
| } |
| } |
| //System.out.println("rule defs for "+p.grammar.name+": "+localRuleDefs); |
| //System.out.println("overridden rule for "+p.grammar.name+": "+overrides); |
| p.grammar.overriddenRules = overrides; |
| |
| // make set of all rules defined thus far walking delegation tree. |
| // the same rule in two delegates resolves in favor of first found |
| // in tree therefore second must not be included |
| ruleDefs.addAll(localRuleDefs); |
| |
| // pass larger set of defined rules to delegates |
| if ( p.children!=null ) { |
| for (CompositeGrammarTree delegate : p.children) { |
| _minimizeRuleSet(ruleDefs, delegate); |
| } |
| } |
| } |
| |
| /* |
| public void minimizeRuleSet() { |
| Set<Rule> refs = _minimizeRuleSet(delegateGrammarTreeRoot); |
| System.out.println("all rule refs: "+refs); |
| } |
| |
| public Set<Rule> _minimizeRuleSet(CompositeGrammarTree p) { |
| Set<Rule> refs = new HashSet<Rule>(); |
| for (GrammarAST refAST : p.grammar.ruleRefs) { |
| System.out.println("ref "+refAST.getText()+": "+refAST.NFAStartState+ |
| " enclosing rule: "+refAST.NFAStartState.enclosingRule+ |
| " invoking rule: "+((NFAState)refAST.NFAStartState.transition[0].target).enclosingRule); |
| refs.add(((NFAState)refAST.NFAStartState.transition[0].target).enclosingRule); |
| } |
| |
| if ( p.children!=null ) { |
| for (CompositeGrammarTree delegate : p.children) { |
| Set<Rule> delegateRuleRefs = _minimizeRuleSet(delegate); |
| refs.addAll(delegateRuleRefs); |
| } |
| } |
| |
| return refs; |
| } |
| */ |
| |
| /* |
| public void oldminimizeRuleSet() { |
| // first walk to remove all overridden rules |
| Set<String> ruleDefs = new HashSet<String>(); |
| Set<String> ruleRefs = new HashSet<String>(); |
| for (GrammarAST refAST : delegateGrammarTreeRoot.grammar.ruleRefs) { |
| String rname = refAST.getText(); |
| ruleRefs.add(rname); |
| } |
| _minimizeRuleSet(ruleDefs, |
| ruleRefs, |
| delegateGrammarTreeRoot); |
| System.out.println("overall rule defs: "+ruleDefs); |
| } |
| |
| public void _minimizeRuleSet(Set<String> ruleDefs, |
| Set<String> ruleRefs, |
| CompositeGrammarTree p) { |
| Set<String> localRuleDefs = new HashSet<String>(); |
| for (Rule r : p.grammar.getRules()) { |
| if ( !ruleDefs.contains(r.name) ) { |
| localRuleDefs.add(r.name); |
| ruleDefs.add(r.name); |
| } |
| } |
| System.out.println("rule defs for "+p.grammar.name+": "+localRuleDefs); |
| |
| // remove locally-defined rules not in ref set |
| // find intersection of local rules and references from delegator |
| // that is set of rules needed by delegator |
| Set<String> localRuleDefsSatisfyingRefsFromBelow = new HashSet<String>(); |
| for (String r : ruleRefs) { |
| if ( localRuleDefs.contains(r) ) { |
| localRuleDefsSatisfyingRefsFromBelow.add(r); |
| } |
| } |
| |
| // now get list of refs from localRuleDefsSatisfyingRefsFromBelow. |
| // Those rules are also allowed in this delegate |
| for (GrammarAST refAST : p.grammar.ruleRefs) { |
| if ( localRuleDefsSatisfyingRefsFromBelow.contains(refAST.enclosingRuleName) ) { |
| // found rule ref within needed rule |
| } |
| } |
| |
| // remove rule refs not in the new rule def set |
| |
| // walk all children, adding rules not already defined |
| if ( p.children!=null ) { |
| for (CompositeGrammarTree delegate : p.children) { |
| _minimizeRuleSet(ruleDefs, ruleRefs, delegate); |
| } |
| } |
| } |
| */ |
| |
| /* |
| public void trackNFAStatesThatHaveLabeledEdge(Label label, |
| NFAState stateWithLabeledEdge) |
| { |
| Set<NFAState> states = typeToNFAStatesWithEdgeOfTypeMap.get(label); |
| if ( states==null ) { |
| states = new HashSet<NFAState>(); |
| typeToNFAStatesWithEdgeOfTypeMap.put(label, states); |
| } |
| states.add(stateWithLabeledEdge); |
| } |
| |
| public Map<Label, Set<NFAState>> getTypeToNFAStatesWithEdgeOfTypeMap() { |
| return typeToNFAStatesWithEdgeOfTypeMap; |
| } |
| |
| public Set<NFAState> getStatesWithEdge(Label label) { |
| return typeToNFAStatesWithEdgeOfTypeMap.get(label); |
| } |
| */ |
| } |