blob: b9ef82fc2f485f733847fa6efe27c4af140bd692 [file] [log] [blame]
/*
* [The "BSD license"]
* Copyright (c) 2010 Terence Parr
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/package org.antlr.tool;
import org.antlr.analysis.Label;
import org.antlr.analysis.NFAState;
import org.antlr.grammar.v3.ANTLRParser;
import org.antlr.grammar.v3.AssignTokenTypesWalker;
import org.antlr.misc.Utils;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import java.util.*;
/** A tree of component (delegate) grammars.
*
* Rules defined in delegates are "inherited" like multi-inheritance
* so you can override them. All token types must be consistent across
* rules from all delegate grammars, so they must be stored here in one
* central place.
*
* We have to start out assuming a composite grammar situation as we can't
* look into the grammar files a priori to see if there is a delegate
* statement. Because of this, and to avoid duplicating token type tracking
* in each grammar, even single noncomposite grammars use one of these objects
* to track token types.
*/
public class CompositeGrammar {
public static final int MIN_RULE_INDEX = 1;
public CompositeGrammarTree delegateGrammarTreeRoot;
/** Used during getRuleReferenceClosure to detect computation cycles */
protected Set<NFAState> refClosureBusy = new HashSet<NFAState>();
/** Used to assign state numbers; all grammars in composite share common
* NFA space. This NFA tracks state numbers number to state mapping.
*/
public int stateCounter = 0;
/** The NFA states in the NFA built from rules across grammars in composite.
* Maps state number to NFAState object.
* This is a Vector instead of a List because I need to be able to grow
* this properly. After talking to Josh Bloch, Collections guy at Sun,
* I decided this was easiest solution.
*/
protected Vector<NFAState> numberToStateList = new Vector<NFAState>(1000);
/** Token names and literal tokens like "void" are uniquely indexed.
* with -1 implying EOF. Characters are different; they go from
* -1 (EOF) to \uFFFE. For example, 0 could be a binary byte you
* want to lexer. Labels of DFA/NFA transitions can be both tokens
* and characters. I use negative numbers for bookkeeping labels
* like EPSILON. Char/String literals and token types overlap in the same
* space, however.
*/
protected int maxTokenType = Label.MIN_TOKEN_TYPE-1;
/** Map token like ID (but not literals like "while") to its token type */
public Map tokenIDToTypeMap = new LinkedHashMap();
/** Map token literals like "while" to its token type. It may be that
* WHILE="while"=35, in which case both tokenIDToTypeMap and this
* field will have entries both mapped to 35.
*/
public Map<String, Integer> stringLiteralToTypeMap = new LinkedHashMap<String, Integer>();
/** Reverse index for stringLiteralToTypeMap */
public Vector<String> typeToStringLiteralList = new Vector<String>();
/** Map a token type to its token name.
* Must subtract MIN_TOKEN_TYPE from index.
*/
public Vector<String> typeToTokenList = new Vector<String>();
/** If combined or lexer grammar, track the rules.
* Track lexer rules so we can warn about undefined tokens.
* This is combined set of lexer rules from all lexer grammars
* seen in all imports.
*/
protected Set<String> lexerRules = new HashSet<String>();
/** Rules are uniquely labeled from 1..n among all grammars */
protected int ruleIndex = MIN_RULE_INDEX;
/** Map a rule index to its name; use a Vector on purpose as new
* collections stuff won't let me setSize and make it grow. :(
* I need a specific guaranteed index, which the Collections stuff
* won't let me have.
*/
protected Vector<Rule> ruleIndexToRuleList = new Vector<Rule>();
public boolean watchNFAConversion = false;
protected void initTokenSymbolTables() {
// the faux token types take first NUM_FAUX_LABELS positions
// then we must have room for the predefined runtime token types
// like DOWN/UP used for tree parsing.
typeToTokenList.setSize(Label.NUM_FAUX_LABELS+Label.MIN_TOKEN_TYPE-1);
typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.INVALID, "<INVALID>");
typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOT, "<EOT>");
typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.SEMPRED, "<SEMPRED>");
typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.SET, "<SET>");
typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EPSILON, Label.EPSILON_STR);
typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOF, "EOF");
typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOR_TOKEN_TYPE-1, "<EOR>");
typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.DOWN-1, "DOWN");
typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.UP-1, "UP");
tokenIDToTypeMap.put("<INVALID>", Utils.integer(Label.INVALID));
tokenIDToTypeMap.put("<EOT>", Utils.integer(Label.EOT));
tokenIDToTypeMap.put("<SEMPRED>", Utils.integer(Label.SEMPRED));
tokenIDToTypeMap.put("<SET>", Utils.integer(Label.SET));
tokenIDToTypeMap.put("<EPSILON>", Utils.integer(Label.EPSILON));
tokenIDToTypeMap.put("EOF", Utils.integer(Label.EOF));
tokenIDToTypeMap.put("<EOR>", Utils.integer(Label.EOR_TOKEN_TYPE));
tokenIDToTypeMap.put("DOWN", Utils.integer(Label.DOWN));
tokenIDToTypeMap.put("UP", Utils.integer(Label.UP));
}
public CompositeGrammar() {
initTokenSymbolTables();
}
public CompositeGrammar(Grammar g) {
this();
setDelegationRoot(g);
}
public void setDelegationRoot(Grammar root) {
delegateGrammarTreeRoot = new CompositeGrammarTree(root);
root.compositeTreeNode = delegateGrammarTreeRoot;
}
public Rule getRule(String ruleName) {
return delegateGrammarTreeRoot.getRule(ruleName);
}
public Object getOption(String key) {
return delegateGrammarTreeRoot.getOption(key);
}
/** Add delegate grammar as child of delegator */
public void addGrammar(Grammar delegator, Grammar delegate) {
if ( delegator.compositeTreeNode==null ) {
delegator.compositeTreeNode = new CompositeGrammarTree(delegator);
}
delegator.compositeTreeNode.addChild(new CompositeGrammarTree(delegate));
/*// find delegator in tree so we can add a child to it
CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(delegator);
t.addChild();
*/
// make sure new grammar shares this composite
delegate.composite = this;
}
/** Get parent of this grammar */
public Grammar getDelegator(Grammar g) {
CompositeGrammarTree me = delegateGrammarTreeRoot.findNode(g);
if ( me==null ) {
return null; // not found
}
if ( me.parent!=null ) {
return me.parent.grammar;
}
return null;
}
/** Get list of all delegates from all grammars in the delegate subtree of g.
* The grammars are in delegation tree preorder. Don't include g itself
* in list as it is not a delegate of itself.
*/
public List<Grammar> getDelegates(Grammar g) {
CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(g);
if ( t==null ) {
return null; // no delegates
}
List<Grammar> grammars = t.getPostOrderedGrammarList();
grammars.remove(grammars.size()-1); // remove g (last one)
return grammars;
}
public List<Grammar> getDirectDelegates(Grammar g) {
CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(g);
List<CompositeGrammarTree> children = t.children;
if ( children==null ) {
return null;
}
List<Grammar> grammars = new ArrayList();
for (int i = 0; children!=null && i < children.size(); i++) {
CompositeGrammarTree child = (CompositeGrammarTree) children.get(i);
grammars.add(child.grammar);
}
return grammars;
}
/** Get delegates below direct delegates of g */
public List<Grammar> getIndirectDelegates(Grammar g) {
List<Grammar> direct = getDirectDelegates(g);
List<Grammar> delegates = getDelegates(g);
delegates.removeAll(direct);
return delegates;
}
/** Return list of delegate grammars from root down to g.
* Order is root, ..., g.parent. (g not included).
*/
public List<Grammar> getDelegators(Grammar g) {
if ( g==delegateGrammarTreeRoot.grammar ) {
return null;
}
List<Grammar> grammars = new ArrayList();
CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(g);
// walk backwards to root, collecting grammars
CompositeGrammarTree p = t.parent;
while ( p!=null ) {
grammars.add(0, p.grammar); // add to head so in order later
p = p.parent;
}
return grammars;
}
/** Get set of rules for grammar g that need to have manual delegation
* methods. This is the list of rules collected from all direct/indirect
* delegates minus rules overridden in grammar g.
*
* This returns null except for the delegate root because it is the only
* one that has to have a complete grammar rule interface. The delegates
* should not be instantiated directly for use as parsers (you can create
* them to pass to the root parser's ctor as arguments).
*/
public Set<Rule> getDelegatedRules(Grammar g) {
if ( g!=delegateGrammarTreeRoot.grammar ) {
return null;
}
Set<Rule> rules = getAllImportedRules(g);
for (Iterator it = rules.iterator(); it.hasNext();) {
Rule r = (Rule) it.next();
Rule localRule = g.getLocallyDefinedRule(r.name);
// if locally defined or it's not local but synpred, don't make
// a delegation method
if ( localRule!=null || r.isSynPred ) {
it.remove(); // kill overridden rules
}
}
return rules;
}
/** Get all rule definitions from all direct/indirect delegate grammars
* of g.
*/
public Set<Rule> getAllImportedRules(Grammar g) {
Set<String> ruleNames = new HashSet();
Set<Rule> rules = new HashSet();
CompositeGrammarTree subtreeRoot = delegateGrammarTreeRoot.findNode(g);
List<Grammar> grammars = subtreeRoot.getPreOrderedGrammarList();
// walk all grammars preorder, priority given to grammar listed first.
for (int i = 0; i < grammars.size(); i++) {
Grammar delegate = (org.antlr.tool.Grammar) grammars.get(i);
// for each rule in delegate, add to rules if no rule with that
// name as been seen. (can't use removeAll; wrong hashcode/equals on Rule)
for (Iterator it = delegate.getRules().iterator(); it.hasNext();) {
Rule r = (Rule)it.next();
if ( !ruleNames.contains(r.name) ) {
ruleNames.add(r.name); // track that we've seen this
rules.add(r);
}
}
}
return rules;
}
public Grammar getRootGrammar() {
if ( delegateGrammarTreeRoot==null ) {
return null;
}
return delegateGrammarTreeRoot.grammar;
}
public Grammar getGrammar(String grammarName) {
CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(grammarName);
if ( t!=null ) {
return t.grammar;
}
return null;
}
// NFA spans multiple grammars, must handle here
public int getNewNFAStateNumber() {
return stateCounter++;
}
public void addState(NFAState state) {
numberToStateList.setSize(state.stateNumber+1); // make sure we have room
numberToStateList.set(state.stateNumber, state);
}
public NFAState getState(int s) {
return (NFAState)numberToStateList.get(s);
}
public void assignTokenTypes() throws RecognitionException {
// ASSIGN TOKEN TYPES for all delegates (same walker)
//System.out.println("### assign types");
AssignTokenTypesWalker ttypesWalker = new AssignTokenTypesBehavior();
List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList();
for (int i = 0; grammars!=null && i < grammars.size(); i++) {
Grammar g = (Grammar)grammars.get(i);
ttypesWalker.setTreeNodeStream(new CommonTreeNodeStream(g.getGrammarTree()));
try {
//System.out.println(" walking "+g.name);
ttypesWalker.grammar_(g);
}
catch (RecognitionException re) {
ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE,
re);
}
}
// the walker has filled literals, tokens, and alias tables.
// now tell it to define them in the root grammar
ttypesWalker.defineTokens(delegateGrammarTreeRoot.grammar);
}
public void translateLeftRecursiveRules() {
List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList();
for (int i = 0; grammars!=null && i < grammars.size(); i++) {
Grammar g = grammars.get(i);
if ( !(g.type==Grammar.PARSER || g.type==Grammar.COMBINED) ) continue;
for (GrammarAST r : g.grammarTree.findAllType(ANTLRParser.RULE)) {
if ( !Character.isUpperCase(r.getChild(0).getText().charAt(0)) ) {
if ( LeftRecursiveRuleAnalyzer.hasImmediateRecursiveRuleRefs(r, r.enclosingRuleName) ) {
g.translateLeftRecursiveRule(r);
}
}
}
}
}
public void defineGrammarSymbols() {
delegateGrammarTreeRoot.trimLexerImportsIntoCombined();
List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList();
for (int i = 0; grammars!=null && i < grammars.size(); i++) {
Grammar g = grammars.get(i);
g.defineGrammarSymbols();
}
for (int i = 0; grammars!=null && i < grammars.size(); i++) {
Grammar g = grammars.get(i);
g.checkNameSpaceAndActions();
}
minimizeRuleSet();
}
public void createNFAs() {
if ( ErrorManager.doNotAttemptAnalysis() ) {
return;
}
List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList();
//System.out.println("### createNFAs for composite; grammars: "+names);
for (int i = 0; grammars!=null && i < grammars.size(); i++) {
Grammar g = (Grammar)grammars.get(i);
g.createRuleStartAndStopNFAStates();
}
for (int i = 0; grammars!=null && i < grammars.size(); i++) {
Grammar g = (Grammar)grammars.get(i);
g.buildNFA();
}
}
public void minimizeRuleSet() {
Set<String> ruleDefs = new HashSet<String>();
_minimizeRuleSet(ruleDefs, delegateGrammarTreeRoot);
}
public void _minimizeRuleSet(Set<String> ruleDefs,
CompositeGrammarTree p) {
Set<String> localRuleDefs = new HashSet<String>();
Set<String> overrides = new HashSet<String>();
// compute set of non-overridden rules for this delegate
for (Rule r : p.grammar.getRules()) {
if ( !ruleDefs.contains(r.name) ) {
localRuleDefs.add(r.name);
}
else if ( !r.name.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ) {
// record any overridden rule 'cept tokens rule
overrides.add(r.name);
}
}
//System.out.println("rule defs for "+p.grammar.name+": "+localRuleDefs);
//System.out.println("overridden rule for "+p.grammar.name+": "+overrides);
p.grammar.overriddenRules = overrides;
// make set of all rules defined thus far walking delegation tree.
// the same rule in two delegates resolves in favor of first found
// in tree therefore second must not be included
ruleDefs.addAll(localRuleDefs);
// pass larger set of defined rules to delegates
if ( p.children!=null ) {
for (CompositeGrammarTree delegate : p.children) {
_minimizeRuleSet(ruleDefs, delegate);
}
}
}
/*
public void minimizeRuleSet() {
Set<Rule> refs = _minimizeRuleSet(delegateGrammarTreeRoot);
System.out.println("all rule refs: "+refs);
}
public Set<Rule> _minimizeRuleSet(CompositeGrammarTree p) {
Set<Rule> refs = new HashSet<Rule>();
for (GrammarAST refAST : p.grammar.ruleRefs) {
System.out.println("ref "+refAST.getText()+": "+refAST.NFAStartState+
" enclosing rule: "+refAST.NFAStartState.enclosingRule+
" invoking rule: "+((NFAState)refAST.NFAStartState.transition[0].target).enclosingRule);
refs.add(((NFAState)refAST.NFAStartState.transition[0].target).enclosingRule);
}
if ( p.children!=null ) {
for (CompositeGrammarTree delegate : p.children) {
Set<Rule> delegateRuleRefs = _minimizeRuleSet(delegate);
refs.addAll(delegateRuleRefs);
}
}
return refs;
}
*/
/*
public void oldminimizeRuleSet() {
// first walk to remove all overridden rules
Set<String> ruleDefs = new HashSet<String>();
Set<String> ruleRefs = new HashSet<String>();
for (GrammarAST refAST : delegateGrammarTreeRoot.grammar.ruleRefs) {
String rname = refAST.getText();
ruleRefs.add(rname);
}
_minimizeRuleSet(ruleDefs,
ruleRefs,
delegateGrammarTreeRoot);
System.out.println("overall rule defs: "+ruleDefs);
}
public void _minimizeRuleSet(Set<String> ruleDefs,
Set<String> ruleRefs,
CompositeGrammarTree p) {
Set<String> localRuleDefs = new HashSet<String>();
for (Rule r : p.grammar.getRules()) {
if ( !ruleDefs.contains(r.name) ) {
localRuleDefs.add(r.name);
ruleDefs.add(r.name);
}
}
System.out.println("rule defs for "+p.grammar.name+": "+localRuleDefs);
// remove locally-defined rules not in ref set
// find intersection of local rules and references from delegator
// that is set of rules needed by delegator
Set<String> localRuleDefsSatisfyingRefsFromBelow = new HashSet<String>();
for (String r : ruleRefs) {
if ( localRuleDefs.contains(r) ) {
localRuleDefsSatisfyingRefsFromBelow.add(r);
}
}
// now get list of refs from localRuleDefsSatisfyingRefsFromBelow.
// Those rules are also allowed in this delegate
for (GrammarAST refAST : p.grammar.ruleRefs) {
if ( localRuleDefsSatisfyingRefsFromBelow.contains(refAST.enclosingRuleName) ) {
// found rule ref within needed rule
}
}
// remove rule refs not in the new rule def set
// walk all children, adding rules not already defined
if ( p.children!=null ) {
for (CompositeGrammarTree delegate : p.children) {
_minimizeRuleSet(ruleDefs, ruleRefs, delegate);
}
}
}
*/
/*
public void trackNFAStatesThatHaveLabeledEdge(Label label,
NFAState stateWithLabeledEdge)
{
Set<NFAState> states = typeToNFAStatesWithEdgeOfTypeMap.get(label);
if ( states==null ) {
states = new HashSet<NFAState>();
typeToNFAStatesWithEdgeOfTypeMap.put(label, states);
}
states.add(stateWithLabeledEdge);
}
public Map<Label, Set<NFAState>> getTypeToNFAStatesWithEdgeOfTypeMap() {
return typeToNFAStatesWithEdgeOfTypeMap;
}
public Set<NFAState> getStatesWithEdge(Label label) {
return typeToNFAStatesWithEdgeOfTypeMap.get(label);
}
*/
}