| /* |
| * [The "BSD license"] |
| * Copyright (c) 2010 Terence Parr |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * 3. The name of the author may not be used to endorse or promote products |
| * derived from this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| package org.antlr.tool; |
| |
| import org.antlr.Tool; |
| import org.antlr.analysis.DFA; |
| import org.antlr.analysis.DFAState; |
| import org.antlr.analysis.LL1Analyzer; |
| import org.antlr.analysis.LL1DFA; |
| import org.antlr.analysis.Label; |
| import org.antlr.analysis.LookaheadSet; |
| import org.antlr.analysis.NFA; |
| import org.antlr.analysis.NFAConversionThread; |
| import org.antlr.analysis.NFAState; |
| import org.antlr.analysis.NFAToDFAConverter; |
| import org.antlr.analysis.SemanticContext; |
| import org.antlr.analysis.Transition; |
| import org.antlr.codegen.CodeGenerator; |
| import org.antlr.codegen.Target; |
| import org.antlr.grammar.v3.ANTLRLexer; |
| import org.antlr.grammar.v3.ANTLRParser; |
| import org.antlr.grammar.v3.ANTLRTreePrinter; |
| import org.antlr.grammar.v3.ActionAnalysis; |
| import org.antlr.grammar.v3.DefineGrammarItemsWalker; |
| import org.antlr.grammar.v3.TreeToNFAConverter; |
| import org.antlr.misc.Barrier; |
| import org.antlr.misc.IntSet; |
| import org.antlr.misc.IntervalSet; |
| import org.antlr.misc.MultiMap; |
| import org.antlr.misc.OrderedHashSet; |
| import org.antlr.misc.Utils; |
| import org.antlr.runtime.ANTLRReaderStream; |
| import org.antlr.runtime.ANTLRStringStream; |
| import org.antlr.runtime.CommonToken; |
| import org.antlr.runtime.CommonTokenStream; |
| import org.antlr.runtime.RecognitionException; |
| import org.antlr.runtime.Token; |
| import org.antlr.runtime.tree.CommonTreeNodeStream; |
| import org.stringtemplate.v4.ST; |
| import org.stringtemplate.v4.STGroup; |
| import org.stringtemplate.v4.STGroupString; |
| |
| import java.io.BufferedReader; |
| import java.io.File; |
| import java.io.FileNotFoundException; |
| import java.io.FileReader; |
| import java.io.IOException; |
| import java.io.PrintStream; |
| import java.io.Reader; |
| import java.io.StreamTokenizer; |
| import java.io.StringReader; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.BitSet; |
| import java.util.Collection; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.LinkedHashMap; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Set; |
| import java.util.Vector; |
| |
| /** Represents a grammar in memory. */ |
| public class Grammar { |
| public static final String SYNPRED_RULE_PREFIX = "synpred"; |
| |
| public static final String GRAMMAR_FILE_EXTENSION = ".g"; |
| |
| /** used for generating lexer temp files */ |
| public static final String LEXER_GRAMMAR_FILE_EXTENSION = ".g"; |
| |
| public static final int INITIAL_DECISION_LIST_SIZE = 300; |
| public static final int INVALID_RULE_INDEX = -1; |
| |
| // the various kinds of labels. t=type, id=ID, types+=type ids+=ID |
| public static final int RULE_LABEL = 1; |
| public static final int TOKEN_LABEL = 2; |
| public static final int RULE_LIST_LABEL = 3; |
| public static final int TOKEN_LIST_LABEL = 4; |
| public static final int CHAR_LABEL = 5; // used in lexer for x='a' |
| public static final int WILDCARD_TREE_LABEL = 6; // Used in tree grammar x=. |
| public static final int WILDCARD_TREE_LIST_LABEL = 7; // Used in tree grammar x+=. |
| |
| |
| public static String[] LabelTypeToString = |
| {"<invalid>", "rule", "token", "rule-list", "token-list", "wildcard-tree", "wildcard-tree-list"}; |
| |
| public static final String ARTIFICIAL_TOKENS_RULENAME = "Tokens"; |
| public static final String FRAGMENT_RULE_MODIFIER = "fragment"; |
| |
| public static final String SYNPREDGATE_ACTION_NAME = "synpredgate"; |
| |
| /** When converting ANTLR char and string literals, here is the |
| * value set of escape chars. |
| */ |
| public static int ANTLRLiteralEscapedCharValue[] = new int[255]; |
| |
| /** Given a char, we need to be able to show as an ANTLR literal. |
| */ |
| public static String ANTLRLiteralCharValueEscape[] = new String[255]; |
| |
| static { |
| ANTLRLiteralEscapedCharValue['n'] = '\n'; |
| ANTLRLiteralEscapedCharValue['r'] = '\r'; |
| ANTLRLiteralEscapedCharValue['t'] = '\t'; |
| ANTLRLiteralEscapedCharValue['b'] = '\b'; |
| ANTLRLiteralEscapedCharValue['f'] = '\f'; |
| ANTLRLiteralEscapedCharValue['\\'] = '\\'; |
| ANTLRLiteralEscapedCharValue['\''] = '\''; |
| ANTLRLiteralEscapedCharValue['"'] = '"'; |
| ANTLRLiteralCharValueEscape['\n'] = "\\n"; |
| ANTLRLiteralCharValueEscape['\r'] = "\\r"; |
| ANTLRLiteralCharValueEscape['\t'] = "\\t"; |
| ANTLRLiteralCharValueEscape['\b'] = "\\b"; |
| ANTLRLiteralCharValueEscape['\f'] = "\\f"; |
| ANTLRLiteralCharValueEscape['\\'] = "\\\\"; |
| ANTLRLiteralCharValueEscape['\''] = "\\'"; |
| } |
| |
| public static final int LEXER = 1; |
| public static final int PARSER = 2; |
| public static final int TREE_PARSER = 3; |
| public static final int COMBINED = 4; |
| public static final String[] grammarTypeToString = new String[] { |
| "<invalid>", |
| "lexer", |
| "parser", |
| "tree", |
| "combined" |
| }; |
| |
| public static final String[] grammarTypeToFileNameSuffix = new String[] { |
| "<invalid>", |
| "Lexer", |
| "Parser", |
| "", // no suffix for tree grammars |
| "Parser" // if combined grammar, gen Parser and Lexer will be done later |
| }; |
| |
| /** Set of valid imports. E.g., can only import a tree parser into |
| * another tree parser. Maps delegate to set of delegator grammar types. |
| * validDelegations.get(LEXER) gives list of the kinds of delegators |
| * that can import lexers. |
| */ |
| public static MultiMap<Integer,Integer> validDelegations = |
| new MultiMap<Integer,Integer>() { |
| { |
| map(LEXER, LEXER); |
| map(LEXER, PARSER); |
| map(LEXER, COMBINED); |
| |
| map(PARSER, PARSER); |
| map(PARSER, COMBINED); |
| |
| map(TREE_PARSER, TREE_PARSER); |
| |
| // TODO: allow COMBINED |
| // map(COMBINED, COMBINED); |
| } |
| }; |
| |
| /** This is the buffer of *all* tokens found in the grammar file |
| * including whitespace tokens etc... I use this to extract |
| * lexer rules from combined grammars. |
| */ |
| public CommonTokenStream tokenBuffer; |
| public static final String IGNORE_STRING_IN_GRAMMAR_FILE_NAME = "__"; |
| public static final String AUTO_GENERATED_TOKEN_NAME_PREFIX = "T__"; |
| |
| public static class Decision { |
| public Grammar grammar; |
| public int decision; |
| public NFAState startState; |
| public GrammarAST blockAST; |
| public DFA dfa; |
| } |
| |
| public class LabelElementPair { |
| public Token label; |
| public GrammarAST elementRef; |
| public String referencedRuleName; |
| /** Has an action referenced the label? Set by ActionAnalysis.g |
| * Currently only set for rule labels. |
| */ |
| public boolean actionReferencesLabel; |
| public int type; // in {RULE_LABEL,TOKEN_LABEL,RULE_LIST_LABEL,TOKEN_LIST_LABEL} |
| public LabelElementPair(Token label, GrammarAST elementRef) { |
| this.label = label; |
| this.elementRef = elementRef; |
| this.referencedRuleName = elementRef.getText(); |
| } |
| public Rule getReferencedRule() { |
| return getRule(referencedRuleName); |
| } |
| @Override |
| public String toString() { |
| return elementRef.toString(); |
| } |
| } |
| |
| /** What name did the user provide for this grammar? */ |
| public String name; |
| |
| /** What type of grammar is this: lexer, parser, tree walker */ |
| public int type; |
| |
| /** A list of options specified at the grammar level such as language=Java. |
| * The value can be an AST for complicated values such as character sets. |
| * There may be code generator specific options in here. I do no |
| * interpretation of the key/value pairs...they are simply available for |
| * who wants them. |
| */ |
| protected Map<String, Object> options; |
| |
| public static final Set<String> legalLexerOptions = |
| new HashSet<String>() { |
| { |
| add("language"); add("tokenVocab"); |
| add("TokenLabelType"); |
| add("superClass"); |
| add("filter"); |
| add("k"); |
| add("backtrack"); |
| add("memoize"); |
| } |
| }; |
| |
| public static final Set<String> legalParserOptions = |
| new HashSet<String>() { |
| { |
| add("language"); add("tokenVocab"); |
| add("output"); add("rewrite"); add("ASTLabelType"); |
| add("TokenLabelType"); |
| add("superClass"); |
| add("k"); |
| add("backtrack"); |
| add("memoize"); |
| } |
| }; |
| |
| public static final Set<String> legalTreeParserOptions = |
| new HashSet<String>() { |
| { |
| add("language"); add("tokenVocab"); |
| add("output"); add("rewrite"); add("ASTLabelType"); |
| add("TokenLabelType"); |
| add("superClass"); |
| add("k"); |
| add("backtrack"); |
| add("memoize"); |
| add("filter"); |
| } |
| }; |
| |
| public static final Set<String> doNotCopyOptionsToLexer = |
| new HashSet<String>() { |
| { |
| add("output"); add("ASTLabelType"); add("superClass"); |
| add("k"); add("backtrack"); add("memoize"); add("rewrite"); |
| } |
| }; |
| |
| public static final Map<String, String> defaultOptions = |
| new HashMap<String, String>() { |
| { |
| put("language","Java"); |
| } |
| }; |
| |
| public static final Set<String> legalBlockOptions = |
| new HashSet<String>() {{add("k"); add("greedy"); add("backtrack"); add("memoize");}}; |
| |
| /** What are the default options for a subrule? */ |
| public static final Map<String, String> defaultBlockOptions = |
| new HashMap<String, String>() {{put("greedy","true");}}; |
| |
| public static final Map<String, String> defaultLexerBlockOptions = |
| new HashMap<String, String>() {{put("greedy","true");}}; |
| |
| // Token options are here to avoid contaminating Token object in runtime |
| |
| /** Legal options for terminal refs like ID<node=MyVarNode> */ |
| public static final Set<String> legalTokenOptions = |
| new HashSet<String>() { |
| { |
| add(defaultTokenOption); |
| add("type"); |
| add("text"); |
| add("assoc"); |
| } |
| }; |
| |
| public static final String defaultTokenOption = "node"; |
| |
| /** Is there a global fixed lookahead set for this grammar? |
| * If 0, nothing specified. -1 implies we have not looked at |
| * the options table yet to set k. |
| */ |
| protected int global_k = -1; |
| |
| /** Map a scope to a map of name:action pairs. |
| * Map<String, Map<String,GrammarAST>> |
| * The code generator will use this to fill holes in the output files. |
| * I track the AST node for the action in case I need the line number |
| * for errors. |
| */ |
| private Map<String, Map<String, Object>> actions = |
| new HashMap<String, Map<String, Object>>(); |
| |
| /** The NFA that represents the grammar with edges labelled with tokens |
| * or epsilon. It is more suitable to analysis than an AST representation. |
| */ |
| public NFA nfa; |
| |
| protected NFAFactory factory; |
| |
| /** If this grammar is part of a larger composite grammar via delegate |
| * statement, then this points at the composite. The composite holds |
| * a global list of rules, token types, decision numbers, etc... |
| */ |
| public CompositeGrammar composite; |
| |
| /** A pointer back into grammar tree. Needed so we can add delegates. */ |
| public CompositeGrammarTree compositeTreeNode; |
| |
| /** If this is a delegate of another grammar, this is the label used |
| * as an instance var by that grammar to point at this grammar. null |
| * if no label was specified in the delegate statement. |
| */ |
| public String label; |
| |
| /** TODO: hook this to the charVocabulary option */ |
| protected IntSet charVocabulary = null; |
| |
| /** For ANTLRWorks, we want to be able to map a line:col to a specific |
| * decision DFA so it can display DFA. |
| */ |
| Map<String, DFA> lineColumnToLookaheadDFAMap = new HashMap<String, DFA>(); |
| |
| public Tool tool; |
| |
| /** The unique set of all rule references in any rule; set of tree node |
| * objects so two refs to same rule can exist but at different line/position. |
| */ |
| protected Set<GrammarAST> ruleRefs = new HashSet<GrammarAST>(); |
| |
| protected Set<GrammarAST> scopedRuleRefs = new HashSet<GrammarAST>(); |
| |
| /** The unique set of all token ID references in any rule */ |
| protected Set<Token> tokenIDRefs = new HashSet<Token>(); |
| |
| /** Be able to assign a number to every decision in grammar; |
| * decisions in 1..n |
| */ |
| protected int decisionCount = 0; |
| |
| /** A list of all rules that are in any left-recursive cycle. There |
| * could be multiple cycles, but this is a flat list of all problematic |
| * rules. This is stuff we couldn't refactor to precedence rule. |
| */ |
| protected Set<Rule> leftRecursiveRules; |
| |
| /** An external tool requests that DFA analysis abort prematurely. Stops |
| * at DFA granularity, which are limited to a DFA size and time computation |
| * as failsafe. |
| */ |
| protected boolean externalAnalysisAbort; |
| |
| public int numNonLLStar = 0; // hack to track for -report |
| |
| /** When we read in a grammar, we track the list of syntactic predicates |
| * and build faux rules for them later. See my blog entry Dec 2, 2005: |
| * http://www.antlr.org/blog/antlr3/lookahead.tml |
| * This maps the name (we make up) for a pred to the AST grammar fragment. |
| */ |
| protected LinkedHashMap<String, GrammarAST> nameToSynpredASTMap; |
| |
| /** Each left-recursive precedence rule must define precedence array |
| * for binary operators like: |
| * |
| * static int[] e_prec = new int[tokenNames.length]; |
| * static { |
| * e_prec[75] = 1; |
| * } |
| * Track and we push into parser later; this is computed |
| * early when we look for prec rules. |
| */ |
| public List<String> precRuleInitCodeBlocks = new ArrayList<String>(); |
| |
| /** At least one rule has memoize=true */ |
| public boolean atLeastOneRuleMemoizes; |
| |
| /** At least one backtrack=true in rule or decision or grammar. */ |
| public boolean atLeastOneBacktrackOption; |
| |
| /** Was this created from a COMBINED grammar? */ |
| public boolean implicitLexer; |
| |
| /** Map a rule to it's Rule object */ |
| protected LinkedHashMap<String,Rule> nameToRuleMap = new LinkedHashMap<String,Rule>(); |
| |
| /** If this rule is a delegate, some rules might be overridden; don't |
| * want to gen code for them. |
| */ |
| public Set<String> overriddenRules = new HashSet<String>(); |
| |
| /** The list of all rules referenced in this grammar, not defined here, |
| * and defined in a delegate grammar. Not all of these will be generated |
| * in the recognizer for this file; only those that are affected by rule |
| * definitions in this grammar. I am not sure the Java target will need |
| * this but I'm leaving in case other targets need it. |
| * see NameSpaceChecker.lookForReferencesToUndefinedSymbols() |
| */ |
| protected Set<Rule> delegatedRuleReferences = new HashSet<Rule>(); |
| |
| /** The ANTLRParser tracks lexer rules when reading combined grammars |
| * so we can build the Tokens rule. |
| */ |
| public List<String> lexerRuleNamesInCombined = new ArrayList<String>(); |
| |
| /** Track the scopes defined outside of rules and the scopes associated |
| * with all rules (even if empty). |
| */ |
| protected Map<String, AttributeScope> scopes = new HashMap<String, AttributeScope>(); |
| |
| /** An AST that records entire input grammar with all rules. A simple |
| * grammar with one rule, "grammar t; a : A | B ;", looks like: |
| * ( grammar t ( rule a ( BLOCK ( ALT A ) ( ALT B ) ) <end-of-rule> ) ) |
| */ |
| protected GrammarAST grammarTree = null; |
| |
| /** Each subrule/rule is a decision point and we must track them so we |
| * can go back later and build DFA predictors for them. This includes |
| * all the rules, subrules, optional blocks, ()+, ()* etc... |
| */ |
| protected Vector<Decision> indexToDecision = |
| new Vector<Decision>(INITIAL_DECISION_LIST_SIZE); |
| |
| /** If non-null, this is the code generator we will use to generate |
| * recognizers in the target language. |
| */ |
| protected CodeGenerator generator; |
| |
| public NameSpaceChecker nameSpaceChecker = new NameSpaceChecker(this); |
| |
| public LL1Analyzer ll1Analyzer = new LL1Analyzer(this); |
| |
| /** For merged lexer/parsers, we must construct a separate lexer spec. |
| * This is the template for lexer; put the literals first then the |
| * regular rules. We don't need to specify a token vocab import as |
| * I make the new grammar import from the old all in memory; don't want |
| * to force it to read from the disk. Lexer grammar will have same |
| * name as original grammar but will be in different filename. Foo.g |
| * with combined grammar will have FooParser.java generated and |
| * Foo__.g with again Foo inside. It will however generate FooLexer.java |
| * as it's a lexer grammar. A bit odd, but autogenerated. Can tweak |
| * later if we want. |
| */ |
| protected String lexerGrammarTemplate = |
| "grammar(name, options, imports, actionNames, actions, literals, rules) ::= <<\n" + |
| "lexer grammar <name>;\n" + |
| "<if(options)>" + |
| "options {\n" + |
| " <options:{it | <it.name>=<it.value>;<\\n>}>\n" + |
| "}<\\n>\n" + |
| "<endif>\n" + |
| "<if(imports)>import <imports; separator=\", \">;<endif>\n" + |
| "<actionNames,actions:{n,a|@<n> {<a>\\}\n}>\n" + |
| "<literals:{it | <it.ruleName> : <it.literal> ;\n}>\n" + |
| "<rules>\n" + |
| ">>\n"; |
| protected ST lexerGrammarST; |
| |
| /** What file name holds this grammar? */ |
| protected String fileName; |
| |
| /** How long in ms did it take to build DFAs for this grammar? |
| * If this grammar is a combined grammar, it only records time for |
| * the parser grammar component. This only records the time to |
| * do the LL(*) work; NFA→DFA conversion. |
| */ |
| public long DFACreationWallClockTimeInMS; |
| |
| public int numberOfSemanticPredicates = 0; |
| public int numberOfManualLookaheadOptions = 0; |
| public Set<Integer> setOfNondeterministicDecisionNumbers = new HashSet<Integer>(); |
| public Set<Integer> setOfNondeterministicDecisionNumbersResolvedWithPredicates = |
| new HashSet<Integer>(); |
| |
| /** Track decisions with syn preds specified for reporting. |
| * This is the a set of BLOCK type AST nodes. |
| */ |
| public Set<GrammarAST> blocksWithSynPreds = new HashSet<GrammarAST>(); |
| |
| /** Track decisions that actually use the syn preds in the DFA. |
| * Computed during NFA to DFA conversion. |
| */ |
| public Set<DFA> decisionsWhoseDFAsUsesSynPreds = new HashSet<DFA>(); |
| |
| /** Track names of preds so we can avoid generating preds that aren't used |
| * Computed during NFA to DFA conversion. Just walk accept states |
| * and look for synpreds because that is the only state target whose |
| * incident edges can have synpreds. Same is try for |
| * decisionsWhoseDFAsUsesSynPreds. |
| */ |
| public Set<String> synPredNamesUsedInDFA = new HashSet<String>(); |
| |
| /** Track decisions with syn preds specified for reporting. |
| * This is the a set of BLOCK type AST nodes. |
| */ |
| public Set<GrammarAST> blocksWithSemPreds = new HashSet<GrammarAST>(); |
| |
| /** Track decisions that actually use the syn preds in the DFA. */ |
| public Set<DFA> decisionsWhoseDFAsUsesSemPreds = new HashSet<DFA>(); |
| |
| protected boolean allDecisionDFACreated = false; |
| |
| /** We need a way to detect when a lexer grammar is autogenerated from |
| * another grammar or we are just sending in a string representing a |
| * grammar. We don't want to generate a .tokens file, for example, |
| * in such cases. |
| */ |
| protected boolean builtFromString = false; |
| |
| /** Factored out the sanity checking code; delegate to it. */ |
| GrammarSanity sanity = new GrammarSanity(this); |
| |
| /** Useful for asking questions about target during analysis */ |
| Target target; |
| |
| /** Create a grammar from file name. */ |
| public Grammar(Tool tool, String fileName, CompositeGrammar composite) { |
| this.composite = composite; |
| setTool(tool); |
| setFileName(fileName); |
| // ensure we have the composite set to something |
| if ( composite.delegateGrammarTreeRoot==null ) { |
| composite.setDelegationRoot(this); |
| } |
| STGroup lexerGrammarSTG = new STGroupString(lexerGrammarTemplate); |
| lexerGrammarST = lexerGrammarSTG.getInstanceOf("grammar"); |
| target = CodeGenerator.loadLanguageTarget((String) getOption("language")); |
| } |
| |
| /** Useful for when you are sure that you are not part of a composite |
| * already. Used in Interp/RandomPhrase and testing. |
| */ |
| public Grammar() { this((Tool)null); } |
| |
| public Grammar(Tool tool) { |
| setTool(tool); |
| builtFromString = true; |
| composite = new CompositeGrammar(this); |
| STGroup lexerGrammarSTG = new STGroupString(lexerGrammarTemplate); |
| lexerGrammarST = lexerGrammarSTG.getInstanceOf("grammar"); |
| target = CodeGenerator.loadLanguageTarget((String)getOption("language")); |
| } |
| |
| /** Used for testing; only useful on noncomposite grammars.*/ |
| public Grammar(String grammarString) |
| throws RecognitionException |
| { |
| this(null, grammarString); |
| } |
| |
| /** Used for testing and Interp/RandomPhrase. Only useful on |
| * noncomposite grammars. |
| */ |
| public Grammar(Tool tool, String grammarString) |
| throws RecognitionException |
| { |
| this(tool); |
| setFileName("<string>"); |
| StringReader r = new StringReader(grammarString); |
| parseAndBuildAST(r); |
| composite.assignTokenTypes(); |
| //composite.translateLeftRecursiveRules(); |
| addRulesForSyntacticPredicates(); |
| composite.defineGrammarSymbols(); |
| //composite.createNFAs(); |
| checkNameSpaceAndActions(); |
| } |
| |
| public void setFileName(String fileName) { |
| this.fileName = fileName; |
| } |
| |
| public String getFileName() { |
| return fileName; |
| } |
| |
| public void setName(String name) { |
| if ( name==null ) { |
| return; |
| } |
| // don't error check autogenerated files (those with '__' in them) |
| String saneFile = fileName.replace('\\', '/'); |
| int lastSlash = saneFile.lastIndexOf('/'); |
| String onlyFileName = saneFile.substring(lastSlash+1, fileName.length()); |
| if ( !builtFromString ) { |
| int lastDot = onlyFileName.lastIndexOf('.'); |
| String onlyFileNameNoSuffix; |
| if ( lastDot < 0 ) { |
| ErrorManager.error(ErrorManager.MSG_FILENAME_EXTENSION_ERROR, fileName); |
| onlyFileNameNoSuffix = onlyFileName+GRAMMAR_FILE_EXTENSION; |
| } |
| else { |
| onlyFileNameNoSuffix = onlyFileName.substring(0,lastDot); |
| } |
| if ( !name.equals(onlyFileNameNoSuffix) ) { |
| ErrorManager.error(ErrorManager.MSG_FILE_AND_GRAMMAR_NAME_DIFFER, |
| name, |
| fileName); |
| } |
| } |
| this.name = name; |
| } |
| |
| public void setGrammarContent(String grammarString) throws RecognitionException { |
| StringReader r = new StringReader(grammarString); |
| parseAndBuildAST(r); |
| composite.assignTokenTypes(); |
| composite.defineGrammarSymbols(); |
| } |
| |
| public void parseAndBuildAST() |
| throws IOException |
| { |
| FileReader fr; |
| BufferedReader br = null; |
| try { |
| fr = new FileReader(fileName); |
| br = new BufferedReader(fr); |
| parseAndBuildAST(br); |
| br.close(); |
| br = null; |
| } |
| finally { |
| if ( br!=null ) { |
| br.close(); |
| } |
| } |
| } |
| |
| public void parseAndBuildAST(Reader r) { |
| // BUILD AST FROM GRAMMAR |
| ANTLRLexer lexer; |
| try { |
| lexer = new ANTLRLexer(new ANTLRReaderStream(r)); |
| } catch (IOException e) { |
| ErrorManager.internalError("unexpected stream error from parsing "+fileName, e); |
| return; |
| } |
| |
| lexer.setFileName(this.getFileName()); |
| tokenBuffer = new CommonTokenStream(lexer); |
| ANTLRParser parser = ANTLRParser.createParser(tokenBuffer); |
| parser.setFileName(this.getFileName()); |
| ANTLRParser.grammar__return result = null; |
| try { |
| result = parser.grammar_(this); |
| } |
| catch (RecognitionException re) { |
| ErrorManager.internalError("unexpected parser recognition error from "+fileName, re); |
| } |
| |
| dealWithTreeFilterMode(); // tree grammar and filter=true? |
| |
| if ( lexer.hasASTOperator && !buildAST() ) { |
| Object value = getOption("output"); |
| if ( value == null ) { |
| ErrorManager.grammarWarning(ErrorManager.MSG_REWRITE_OR_OP_WITH_NO_OUTPUT_OPTION, |
| this, null); |
| setOption("output", "AST", null); |
| } |
| else { |
| ErrorManager.grammarError(ErrorManager.MSG_AST_OP_WITH_NON_AST_OUTPUT_OPTION, |
| this, null, value); |
| } |
| } |
| |
| setGrammarTree(result.getTree()); |
| |
| //if ( grammarTree!=null ) System.out.println("grammar tree: "+grammarTree.toStringTree()); |
| |
| grammarTree.setUnknownTokenBoundaries(); |
| |
| setFileName(lexer.getFileName()); // the lexer #src might change name |
| if ( grammarTree.findFirstType(ANTLRParser.RULE)==null ) { |
| ErrorManager.error(ErrorManager.MSG_NO_RULES, getFileName()); |
| } |
| } |
| |
| protected void dealWithTreeFilterMode() { |
| Object filterMode = (String)getOption("filter"); |
| if ( type==TREE_PARSER && filterMode!=null && filterMode.toString().equals("true") ) { |
| // check for conflicting options |
| // filter => backtrack=true |
| // filter&&output=AST => rewrite=true |
| // filter&&output!=AST => error |
| // any deviation from valid option set is an error |
| Object backtrack = (String)getOption("backtrack"); |
| Object output = getOption("output"); |
| Object rewrite = getOption("rewrite"); |
| if ( backtrack!=null && !backtrack.toString().equals("true") ) { |
| ErrorManager.error(ErrorManager.MSG_CONFLICTING_OPTION_IN_TREE_FILTER, |
| "backtrack", backtrack); |
| } |
| if ( output!=null && !output.toString().equals("AST") ) { |
| ErrorManager.error(ErrorManager.MSG_CONFLICTING_OPTION_IN_TREE_FILTER, |
| "output", output); |
| setOption("output", "", null); |
| } |
| if ( rewrite!=null && !rewrite.toString().equals("true") ) { |
| ErrorManager.error(ErrorManager.MSG_CONFLICTING_OPTION_IN_TREE_FILTER, |
| "rewrite", rewrite); |
| } |
| // set options properly |
| setOption("backtrack", "true", null); |
| if ( output!=null && output.toString().equals("AST") ) { |
| setOption("rewrite", "true", null); |
| } |
| // @synpredgate set to state.backtracking==1 by code gen when filter=true |
| // superClass set in template target::treeParser |
| } |
| } |
| |
| public void translateLeftRecursiveRule(GrammarAST ruleAST) { |
| //System.out.println(ruleAST.toStringTree()); |
| CommonTreeNodeStream input = new CommonTreeNodeStream(ruleAST); |
| LeftRecursiveRuleAnalyzer leftRecursiveRuleWalker = |
| new LeftRecursiveRuleAnalyzer(input, this, ruleAST.enclosingRuleName); |
| boolean isLeftRec = false; |
| try { |
| //System.out.println("TESTING "+ruleAST.enclosingRuleName); |
| isLeftRec = leftRecursiveRuleWalker.rec_rule(this); |
| } |
| catch (RecognitionException re) { |
| ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, re); |
| } |
| if ( !isLeftRec ) return; |
| List<String> rules = new ArrayList<String>(); |
| rules.add( leftRecursiveRuleWalker.getArtificialPrecStartRule() ) ; |
| rules.add( leftRecursiveRuleWalker.getArtificialOpPrecRule() ); |
| rules.add( leftRecursiveRuleWalker.getArtificialPrimaryRule() ); |
| for (String r : rules) { |
| GrammarAST t = parseArtificialRule(r); |
| addRule(grammarTree, t); |
| //System.out.println(t.toStringTree()); |
| } |
| |
| //precRuleInitCodeBlocks.add( precRuleWalker.getOpPrecJavaCode() ); |
| } |
| |
| public void defineGrammarSymbols() { |
| if ( Tool.internalOption_PrintGrammarTree ) { |
| System.out.println(grammarTree.toStringList()); |
| } |
| |
| // DEFINE RULES |
| //System.out.println("### define "+name+" rules"); |
| DefineGrammarItemsWalker defineItemsWalker = new DefineGrammarItemsWalker(new CommonTreeNodeStream(getGrammarTree())); |
| try { |
| defineItemsWalker.grammar_(this); |
| } |
| catch (RecognitionException re) { |
| ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, |
| re); |
| } |
| } |
| |
| /** ANALYZE ACTIONS, LOOKING FOR LABEL AND ATTR REFS, sanity check */ |
| public void checkNameSpaceAndActions() { |
| examineAllExecutableActions(); |
| checkAllRulesForUselessLabels(); |
| |
| nameSpaceChecker.checkConflicts(); |
| } |
| |
| /** Many imports are illegal such as lexer into a tree grammar */ |
| public boolean validImport(Grammar delegate) { |
| List<Integer> validDelegators = validDelegations.get(delegate.type); |
| return validDelegators!=null && validDelegators.contains(this.type); |
| } |
| |
| /** If the grammar is a combined grammar, return the text of the implicit |
| * lexer grammar. |
| */ |
| public String getLexerGrammar() { |
| if ( lexerGrammarST.getAttribute("literals")==null && |
| lexerGrammarST.getAttribute("rules")==null ) |
| { |
| // if no rules, return nothing |
| return null; |
| } |
| lexerGrammarST.add("name", name); |
| // if there are any actions set for lexer, pass them in |
| if ( getActions().get("lexer")!=null ) { |
| lexerGrammarST.add("actionNames", |
| getActions().get("lexer").keySet()); |
| lexerGrammarST.add("actions", |
| getActions().get("lexer").values()); |
| } |
| // make sure generated grammar has the same options |
| if ( options!=null ) { |
| for (String optionName : options.keySet()) { |
| if ( !doNotCopyOptionsToLexer.contains(optionName) ) { |
| Object value = options.get(optionName); |
| lexerGrammarST.addAggr("options.{name,value}", optionName, value); |
| } |
| } |
| } |
| return lexerGrammarST.render(); |
| } |
| |
| public String getImplicitlyGeneratedLexerFileName() { |
| return name+ |
| IGNORE_STRING_IN_GRAMMAR_FILE_NAME + |
| LEXER_GRAMMAR_FILE_EXTENSION; |
| } |
| |
| /** Get the name of the generated recognizer; may or may not be same |
| * as grammar name. |
| * Recognizer is TParser and TLexer from T if combined, else |
| * just use T regardless of grammar type. |
| */ |
| public String getRecognizerName() { |
| String suffix = ""; |
| List<Grammar> grammarsFromRootToMe = composite.getDelegators(this); |
| //System.out.println("grammarsFromRootToMe="+grammarsFromRootToMe); |
| String qualifiedName = name; |
| if ( grammarsFromRootToMe!=null ) { |
| StringBuilder buf = new StringBuilder(); |
| for (Grammar g : grammarsFromRootToMe) { |
| buf.append(g.name); |
| buf.append('_'); |
| } |
| buf.append(name); |
| qualifiedName = buf.toString(); |
| } |
| if ( type==Grammar.COMBINED || |
| (type==Grammar.LEXER && implicitLexer) ) |
| { |
| suffix = Grammar.grammarTypeToFileNameSuffix[type]; |
| } |
| return qualifiedName+suffix; |
| } |
| |
| /** Parse a rule we add artificially that is a list of the other lexer |
| * rules like this: "Tokens : ID | INT | SEMI ;" nextToken() will invoke |
| * this to set the current token. Add char literals before |
| * the rule references. |
| * |
| * If in filter mode, we want every alt to backtrack and we need to |
| * do k=1 to force the "first token def wins" rule. Otherwise, the |
| * longest-match rule comes into play with LL(*). |
| * |
| * The ANTLRParser antlr.g file now invokes this when parsing a lexer |
| * grammar, which I think is proper even though it peeks at the info |
| * that later phases will (re)compute. It gets a list of lexer rules |
| * and builds a string representing the rule; then it creates a parser |
| * and adds the resulting tree to the grammar's tree. |
| */ |
| public GrammarAST addArtificialMatchTokensRule(GrammarAST grammarAST, |
| List<String> ruleNames, |
| List<String> delegateNames, |
| boolean filterMode) { |
| ST matchTokenRuleST; |
| if ( filterMode ) { |
| matchTokenRuleST = new ST( |
| ARTIFICIAL_TOKENS_RULENAME+ |
| " options {k=1; backtrack=true;} : <rules; separator=\"|\">;"); |
| } |
| else { |
| matchTokenRuleST = new ST( |
| ARTIFICIAL_TOKENS_RULENAME+" : <rules; separator=\"|\">;"); |
| } |
| |
| // Now add token rule references |
| for (int i = 0; i < ruleNames.size(); i++) { |
| String rname = ruleNames.get(i); |
| matchTokenRuleST.add("rules", rname); |
| } |
| for (int i = 0; i < delegateNames.size(); i++) { |
| String dname = delegateNames.get(i); |
| matchTokenRuleST.add("rules", dname+".Tokens"); |
| } |
| //System.out.println("tokens rule: "+matchTokenRuleST.toString()); |
| GrammarAST r = parseArtificialRule(matchTokenRuleST.render()); |
| addRule(grammarAST, r); |
| //addRule((GrammarAST)parser.getAST()); |
| //return (GrammarAST)parser.getAST(); |
| return r; |
| } |
| |
| public GrammarAST parseArtificialRule(String ruleText) { |
| ANTLRLexer lexer = new ANTLRLexer(new ANTLRStringStream(ruleText)); |
| ANTLRParser parser = ANTLRParser.createParser(new CommonTokenStream(lexer)); |
| parser.setGrammar(this); |
| parser.setGrammarType(this.type); |
| try { |
| ANTLRParser.rule_return result = parser.rule(); |
| return result.getTree(); |
| } |
| catch (Exception e) { |
| ErrorManager.error(ErrorManager.MSG_ERROR_CREATING_ARTIFICIAL_RULE, |
| e); |
| return null; |
| } |
| } |
| |
| public void addRule(GrammarAST grammarTree, GrammarAST t) { |
| GrammarAST p = null; |
| for (int i = 0; i < grammarTree.getChildCount(); i++ ) { |
| p = (GrammarAST)grammarTree.getChild(i); |
| if (p == null || p.getType() == ANTLRParser.RULE || p.getType() == ANTLRParser.PREC_RULE) { |
| break; |
| } |
| } |
| |
| if (p != null) { |
| grammarTree.addChild(t); |
| } |
| } |
| |
| /** for any syntactic predicates, we need to define rules for them; they will get |
| * defined automatically like any other rule. :) |
| */ |
| protected List<? extends GrammarAST> getArtificialRulesForSyntacticPredicates(LinkedHashMap<String,GrammarAST> nameToSynpredASTMap) |
| { |
| List<GrammarAST> rules = new ArrayList<GrammarAST>(); |
| if ( nameToSynpredASTMap==null ) { |
| return rules; |
| } |
| boolean isLexer = grammarTree.getType()==ANTLRParser.LEXER_GRAMMAR; |
| for (Map.Entry<String, GrammarAST> entry : nameToSynpredASTMap.entrySet()) { |
| String synpredName = entry.getKey(); |
| GrammarAST fragmentAST = entry.getValue(); |
| GrammarAST ruleAST = |
| ANTLRParser.createSimpleRuleAST(synpredName, |
| fragmentAST, |
| isLexer); |
| rules.add(ruleAST); |
| } |
| return rules; |
| } |
| |
| public void addRulesForSyntacticPredicates() { |
| // Get syn pred rules and add to existing tree |
| List<? extends GrammarAST> synpredRules = |
| getArtificialRulesForSyntacticPredicates(nameToSynpredASTMap); |
| for (int i = 0; i < synpredRules.size(); i++) { |
| GrammarAST rAST = (GrammarAST) synpredRules.get(i); |
| grammarTree.addChild(rAST); |
| } |
| } |
| |
| /** Walk the list of options, altering this Grammar object according |
| * to any I recognize. |
| protected void processOptions() { |
| Iterator optionNames = options.keySet().iterator(); |
| while (optionNames.hasNext()) { |
| String optionName = (String) optionNames.next(); |
| Object value = options.get(optionName); |
| if ( optionName.equals("tokenVocab") ) { |
| |
| } |
| } |
| } |
| */ |
| |
| /** Define all the rule begin/end NFAStates to solve forward reference |
| * issues. Critical for composite grammars too. |
| * This is normally called on all root/delegates manually and then |
| * buildNFA() is called afterwards because the NFA construction needs |
| * to see rule start/stop states from potentially every grammar. Has |
| * to be have these created a priori. Testing routines will often |
| * just call buildNFA(), which forces a call to this method if not |
| * done already. Works ONLY for single noncomposite grammars. |
| */ |
| public void createRuleStartAndStopNFAStates() { |
| //System.out.println("### createRuleStartAndStopNFAStates "+getGrammarTypeString()+" grammar "+name+" NFAs"); |
| if ( nfa!=null ) { |
| return; |
| } |
| nfa = new NFA(this); |
| factory = new NFAFactory(nfa); |
| |
| Collection<Rule> rules = getRules(); |
| for (Rule r : rules) { |
| String ruleName = r.name; |
| NFAState ruleBeginState = factory.newState(); |
| ruleBeginState.setDescription("rule "+ruleName+" start"); |
| ruleBeginState.enclosingRule = r; |
| r.startState = ruleBeginState; |
| NFAState ruleEndState = factory.newState(); |
| ruleEndState.setDescription("rule "+ruleName+" end"); |
| ruleEndState.setAcceptState(true); |
| ruleEndState.enclosingRule = r; |
| r.stopState = ruleEndState; |
| } |
| } |
| |
| public void buildNFA() { |
| if ( nfa==null ) { |
| createRuleStartAndStopNFAStates(); |
| } |
| if ( nfa.complete ) { |
| // don't let it create more than once; has side-effects |
| return; |
| } |
| //System.out.println("### build "+getGrammarTypeString()+" grammar "+name+" NFAs"); |
| if ( getRules().isEmpty() ) { |
| return; |
| } |
| |
| CommonTreeNodeStream input = new CommonTreeNodeStream(getGrammarTree()); |
| TreeToNFAConverter nfaBuilder = new TreeToNFAConverter(input, this, nfa, factory); |
| try { |
| nfaBuilder.grammar_(); |
| } |
| catch (RecognitionException re) { |
| ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, |
| name, |
| re); |
| } |
| nfa.complete = true; |
| } |
| |
| /** For each decision in this grammar, compute a single DFA using the |
| * NFA states associated with the decision. The DFA construction |
| * determines whether or not the alternatives in the decision are |
| * separable using a regular lookahead language. |
| * |
| * Store the lookahead DFAs in the AST created from the user's grammar |
| * so the code generator or whoever can easily access it. |
| * |
| * This is a separate method because you might want to create a |
| * Grammar without doing the expensive analysis. |
| */ |
| public void createLookaheadDFAs() { |
| createLookaheadDFAs(true); |
| } |
| |
| public void createLookaheadDFAs(boolean wackTempStructures) { |
| if ( nfa==null ) { |
| buildNFA(); |
| } |
| |
| // CHECK FOR LEFT RECURSION; Make sure we can actually do analysis |
| checkAllRulesForLeftRecursion(); |
| |
| /* |
| // was there a severe problem while sniffing the grammar? |
| if ( ErrorManager.doNotAttemptAnalysis() ) { |
| return; |
| } |
| */ |
| |
| long start = System.currentTimeMillis(); |
| |
| //System.out.println("### create DFAs"); |
| int numDecisions = getNumberOfDecisions(); |
| if ( NFAToDFAConverter.SINGLE_THREADED_NFA_CONVERSION ) { |
| for (int decision=1; decision<=numDecisions; decision++) { |
| NFAState decisionStartState = getDecisionNFAStartState(decision); |
| if ( leftRecursiveRules.contains(decisionStartState.enclosingRule) ) { |
| // don't bother to process decisions within left recursive rules. |
| if ( composite.watchNFAConversion ) { |
| System.out.println("ignoring decision "+decision+ |
| " within left-recursive rule "+decisionStartState.enclosingRule.name); |
| } |
| continue; |
| } |
| if ( !externalAnalysisAbort && decisionStartState.getNumberOfTransitions()>1 ) { |
| Rule r = decisionStartState.enclosingRule; |
| if ( r.isSynPred && !synPredNamesUsedInDFA.contains(r.name) ) { |
| continue; |
| } |
| DFA dfa = null; |
| // if k=* or k=1, try LL(1) |
| if ( getUserMaxLookahead(decision)==0 || |
| getUserMaxLookahead(decision)==1 ) |
| { |
| dfa = createLL_1_LookaheadDFA(decision); |
| } |
| if ( dfa==null ) { |
| if ( composite.watchNFAConversion ) { |
| System.out.println("decision "+decision+ |
| " not suitable for LL(1)-optimized DFA analysis"); |
| } |
| dfa = createLookaheadDFA(decision, wackTempStructures); |
| } |
| if ( dfa.startState==null ) { |
| // something went wrong; wipe out DFA |
| setLookaheadDFA(decision, null); |
| } |
| if ( Tool.internalOption_PrintDFA ) { |
| System.out.println("DFA d="+decision); |
| FASerializer serializer = new FASerializer(nfa.grammar); |
| String result = serializer.serialize(dfa.startState); |
| System.out.println(result); |
| } |
| } |
| } |
| } |
| else { |
| ErrorManager.info("two-threaded DFA conversion"); |
| // create a barrier expecting n DFA and this main creation thread |
| Barrier barrier = new Barrier(3); |
| // assume 2 CPU for now |
| int midpoint = numDecisions/2; |
| NFAConversionThread t1 = |
| new NFAConversionThread(this, barrier, 1, midpoint); |
| new Thread(t1).start(); |
| if ( midpoint == (numDecisions/2) ) { |
| midpoint++; |
| } |
| NFAConversionThread t2 = |
| new NFAConversionThread(this, barrier, midpoint, numDecisions); |
| new Thread(t2).start(); |
| // wait for these two threads to finish |
| try { |
| barrier.waitForRelease(); |
| } |
| catch(InterruptedException e) { |
| ErrorManager.internalError("what the hell? DFA interruptus", e); |
| } |
| } |
| |
| long stop = System.currentTimeMillis(); |
| DFACreationWallClockTimeInMS = stop - start; |
| |
| // indicate that we've finished building DFA (even if #decisions==0) |
| allDecisionDFACreated = true; |
| } |
| |
| public DFA createLL_1_LookaheadDFA(int decision) { |
| Decision d = getDecision(decision); |
| String enclosingRule = d.startState.enclosingRule.name; |
| Rule r = d.startState.enclosingRule; |
| NFAState decisionStartState = getDecisionNFAStartState(decision); |
| |
| if ( composite.watchNFAConversion ) { |
| System.out.println("--------------------\nattempting LL(1) DFA (d=" |
| +decisionStartState.getDecisionNumber()+") for "+ |
| decisionStartState.getDescription()); |
| } |
| |
| if ( r.isSynPred && !synPredNamesUsedInDFA.contains(enclosingRule) ) { |
| return null; |
| } |
| |
| // compute lookahead for each alt |
| int numAlts = getNumberOfAltsForDecisionNFA(decisionStartState); |
| LookaheadSet[] altLook = new LookaheadSet[numAlts+1]; |
| for (int alt = 1; alt <= numAlts; alt++) { |
| int walkAlt = |
| decisionStartState.translateDisplayAltToWalkAlt(alt); |
| NFAState altLeftEdge = getNFAStateForAltOfDecision(decisionStartState, walkAlt); |
| NFAState altStartState = (NFAState)altLeftEdge.transition[0].target; |
| //System.out.println("alt "+alt+" start state = "+altStartState.stateNumber); |
| altLook[alt] = ll1Analyzer.LOOK(altStartState); |
| //System.out.println("alt "+alt+": "+altLook[alt].toString(this)); |
| } |
| |
| // compare alt i with alt j for disjointness |
| boolean decisionIsLL_1 = true; |
| outer: |
| for (int i = 1; i <= numAlts; i++) { |
| for (int j = i+1; j <= numAlts; j++) { |
| /* |
| System.out.println("compare "+i+", "+j+": "+ |
| altLook[i].toString(this)+" with "+ |
| altLook[j].toString(this)); |
| */ |
| LookaheadSet collision = altLook[i].intersection(altLook[j]); |
| if ( !collision.isNil() ) { |
| //System.out.println("collision (non-LL(1)): "+collision.toString(this)); |
| decisionIsLL_1 = false; |
| break outer; |
| } |
| } |
| } |
| |
| boolean foundConfoundingPredicate = |
| ll1Analyzer.detectConfoundingPredicates(decisionStartState); |
| if ( decisionIsLL_1 && !foundConfoundingPredicate ) { |
| // build an LL(1) optimized DFA with edge for each altLook[i] |
| if ( NFAToDFAConverter.debug ) { |
| System.out.println("decision "+decision+" is simple LL(1)"); |
| } |
| DFA lookaheadDFA = new LL1DFA(decision, decisionStartState, altLook); |
| setLookaheadDFA(decision, lookaheadDFA); |
| updateLineColumnToLookaheadDFAMap(lookaheadDFA); |
| return lookaheadDFA; |
| } |
| |
| // not LL(1) but perhaps we can solve with simplified predicate search |
| // even if k=1 set manually, only resolve here if we have preds; i.e., |
| // don't resolve etc... |
| |
| /* |
| SemanticContext visiblePredicates = |
| ll1Analyzer.getPredicates(decisionStartState); |
| boolean foundConfoundingPredicate = |
| ll1Analyzer.detectConfoundingPredicates(decisionStartState); |
| */ |
| |
| // exit if not forced k=1 or we found a predicate situation we |
| // can't handle: predicates in rules invoked from this decision. |
| if ( getUserMaxLookahead(decision)!=1 || // not manually set to k=1 |
| !getAutoBacktrackMode(decision) || |
| foundConfoundingPredicate ) |
| { |
| //System.out.println("trying LL(*)"); |
| return null; |
| } |
| |
| List<IntervalSet> edges = new ArrayList<IntervalSet>(); |
| for (int i = 1; i < altLook.length; i++) { |
| LookaheadSet s = altLook[i]; |
| edges.add(s.tokenTypeSet); |
| } |
| List<IntervalSet> disjoint = makeEdgeSetsDisjoint(edges); |
| //System.out.println("disjoint="+disjoint); |
| |
| MultiMap<IntervalSet, Integer> edgeMap = new MultiMap<IntervalSet, Integer>(); |
| for (int i = 0; i < disjoint.size(); i++) { |
| IntervalSet ds = disjoint.get(i); |
| for (int alt = 1; alt < altLook.length; alt++) { |
| LookaheadSet look = altLook[alt]; |
| if ( !ds.and(look.tokenTypeSet).isNil() ) { |
| edgeMap.map(ds, alt); |
| } |
| } |
| } |
| //System.out.println("edge map: "+edgeMap); |
| |
| // TODO: how do we know we covered stuff? |
| |
| // build an LL(1) optimized DFA with edge for each altLook[i] |
| DFA lookaheadDFA = new LL1DFA(decision, decisionStartState, edgeMap); |
| setLookaheadDFA(decision, lookaheadDFA); |
| |
| // create map from line:col to decision DFA (for ANTLRWorks) |
| updateLineColumnToLookaheadDFAMap(lookaheadDFA); |
| |
| return lookaheadDFA; |
| } |
| |
| private void updateLineColumnToLookaheadDFAMap(DFA lookaheadDFA) { |
| GrammarAST decisionAST = nfa.grammar.getDecisionBlockAST(lookaheadDFA.decisionNumber); |
| int line = decisionAST.getLine(); |
| int col = decisionAST.getCharPositionInLine(); |
| lineColumnToLookaheadDFAMap.put(new StringBuffer().append(line).append(":") |
| .append(col).toString(), lookaheadDFA); |
| } |
| |
| protected List<IntervalSet> makeEdgeSetsDisjoint(List<IntervalSet> edges) { |
| OrderedHashSet<IntervalSet> disjointSets = new OrderedHashSet<IntervalSet>(); |
| // walk each incoming edge label/set and add to disjoint set |
| int numEdges = edges.size(); |
| for (int e = 0; e < numEdges; e++) { |
| IntervalSet t = edges.get(e); |
| if ( disjointSets.contains(t) ) { // exact set present |
| continue; |
| } |
| |
| // compare t with set i for disjointness |
| IntervalSet remainder = t; // remainder starts out as whole set to add |
| int numDisjointElements = disjointSets.size(); |
| for (int i = 0; i < numDisjointElements; i++) { |
| IntervalSet s_i = disjointSets.get(i); |
| |
| if ( t.and(s_i).isNil() ) { // nothing in common |
| continue; |
| } |
| //System.out.println(label+" collides with "+rl); |
| |
| // For any (s_i, t) with s_i&t!=nil replace with (s_i-t, s_i&t) |
| // (ignoring s_i-t if nil; don't put in list) |
| |
| // Replace existing s_i with intersection since we |
| // know that will always be a non nil character class |
| IntervalSet intersection = s_i.and(t); |
| disjointSets.set(i, intersection); |
| |
| // Compute s_i-t to see what is in current set and not in incoming |
| IntervalSet existingMinusNewElements = s_i.subtract(t); |
| //System.out.println(s_i+"-"+t+"="+existingMinusNewElements); |
| if ( !existingMinusNewElements.isNil() ) { |
| // found a new character class, add to the end (doesn't affect |
| // outer loop duration due to n computation a priori. |
| disjointSets.add(existingMinusNewElements); |
| } |
| |
| // anything left to add to the reachableLabels? |
| remainder = t.subtract(s_i); |
| if ( remainder.isNil() ) { |
| break; // nothing left to add to set. done! |
| } |
| |
| t = remainder; |
| } |
| if ( !remainder.isNil() ) { |
| disjointSets.add(remainder); |
| } |
| } |
| return disjointSets.elements(); |
| } |
| |
| public DFA createLookaheadDFA(int decision, boolean wackTempStructures) { |
| Decision d = getDecision(decision); |
| String enclosingRule = d.startState.enclosingRule.name; |
| Rule r = d.startState.enclosingRule; |
| |
| //System.out.println("createLookaheadDFA(): "+enclosingRule+" dec "+decision+"; synprednames prev used "+synPredNamesUsedInDFA); |
| NFAState decisionStartState = getDecisionNFAStartState(decision); |
| long startDFA=0,stopDFA; |
| if ( composite.watchNFAConversion ) { |
| System.out.println("--------------------\nbuilding lookahead DFA (d=" |
| +decisionStartState.getDecisionNumber()+") for "+ |
| decisionStartState.getDescription()); |
| startDFA = System.currentTimeMillis(); |
| } |
| |
| DFA lookaheadDFA = new DFA(decision, decisionStartState); |
| // Retry to create a simpler DFA if analysis failed (non-LL(*), |
| // recursion overflow, or time out). |
| boolean failed = |
| lookaheadDFA.probe.isNonLLStarDecision() || |
| lookaheadDFA.probe.analysisOverflowed(); |
| if ( failed && lookaheadDFA.okToRetryDFAWithK1() ) { |
| // set k=1 option and try again. |
| // First, clean up tracking stuff |
| decisionsWhoseDFAsUsesSynPreds.remove(lookaheadDFA); |
| // TODO: clean up synPredNamesUsedInDFA also (harder) |
| d.blockAST.setBlockOption(this, "k", Utils.integer(1)); |
| if ( composite.watchNFAConversion ) { |
| System.out.print("trying decision "+decision+ |
| " again with k=1; reason: "+ |
| lookaheadDFA.getReasonForFailure()); |
| } |
| lookaheadDFA = null; // make sure other memory is "free" before redoing |
| lookaheadDFA = new DFA(decision, decisionStartState); |
| } |
| |
| setLookaheadDFA(decision, lookaheadDFA); |
| |
| if ( wackTempStructures ) { |
| for (DFAState s : lookaheadDFA.getUniqueStates().values()) { |
| s.reset(); |
| } |
| } |
| |
| // create map from line:col to decision DFA (for ANTLRWorks) |
| updateLineColumnToLookaheadDFAMap(lookaheadDFA); |
| |
| if ( composite.watchNFAConversion ) { |
| stopDFA = System.currentTimeMillis(); |
| System.out.println("cost: "+lookaheadDFA.getNumberOfStates()+ |
| " states, "+(int)(stopDFA-startDFA)+" ms"); |
| } |
| //System.out.println("after create DFA; synPredNamesUsedInDFA="+synPredNamesUsedInDFA); |
| return lookaheadDFA; |
| } |
| |
| /** Terminate DFA creation (grammar analysis). |
| */ |
| public void externallyAbortNFAToDFAConversion() { |
| externalAnalysisAbort = true; |
| } |
| |
| public boolean NFAToDFAConversionExternallyAborted() { |
| return externalAnalysisAbort; |
| } |
| |
| /** Return a new unique integer in the token type space */ |
| public int getNewTokenType() { |
| composite.maxTokenType++; |
| return composite.maxTokenType; |
| } |
| |
| /** Define a token at a particular token type value. Blast an |
| * old value with a new one. This is called normal grammar processsing |
| * and during import vocab operations to set tokens with specific values. |
| */ |
| public void defineToken(String text, int tokenType) { |
| //System.out.println("defineToken("+text+", "+tokenType+")"); |
| if ( composite.tokenIDToTypeMap.get(text)!=null ) { |
| // already defined? Must be predefined one like EOF; |
| // do nothing |
| return; |
| } |
| // the index in the typeToTokenList table is actually shifted to |
| // hold faux labels as you cannot have negative indices. |
| if ( text.charAt(0)=='\'' ) { |
| composite.stringLiteralToTypeMap.put(text, Utils.integer(tokenType)); |
| // track in reverse index too |
| if ( tokenType>=composite.typeToStringLiteralList.size() ) { |
| composite.typeToStringLiteralList.setSize(tokenType+1); |
| } |
| composite.typeToStringLiteralList.set(tokenType, text); |
| } |
| else { // must be a label like ID |
| composite.tokenIDToTypeMap.put(text, Utils.integer(tokenType)); |
| } |
| int index = Label.NUM_FAUX_LABELS+tokenType-1; |
| //System.out.println("defining "+name+" token "+text+" at type="+tokenType+", index="+index); |
| composite.maxTokenType = Math.max(composite.maxTokenType, tokenType); |
| if ( index>=composite.typeToTokenList.size() ) { |
| composite.typeToTokenList.setSize(index+1); |
| } |
| String prevToken = composite.typeToTokenList.get(index); |
| if ( prevToken==null || prevToken.charAt(0)=='\'' ) { |
| // only record if nothing there before or if thing before was a literal |
| composite.typeToTokenList.set(index, text); |
| } |
| } |
| |
| /** Define a new rule. A new rule index is created by incrementing |
| * ruleIndex. |
| */ |
| public void defineRule(Token ruleToken, |
| String modifier, |
| Map<String, Object> options, |
| GrammarAST tree, |
| GrammarAST argActionAST, |
| int numAlts) |
| { |
| String ruleName = ruleToken.getText(); |
| if ( getLocallyDefinedRule(ruleName)!=null ) { |
| ErrorManager.grammarError(ErrorManager.MSG_RULE_REDEFINITION, |
| this, ruleToken, ruleName); |
| return; |
| } |
| |
| if ( (type==Grammar.PARSER||type==Grammar.TREE_PARSER) && |
| Character.isUpperCase(ruleName.charAt(0)) ) |
| { |
| ErrorManager.grammarError(ErrorManager.MSG_LEXER_RULES_NOT_ALLOWED, |
| this, ruleToken, ruleName); |
| return; |
| } |
| |
| Rule r = new Rule(this, ruleName, composite.ruleIndex, numAlts); |
| /* |
| System.out.println("defineRule("+ruleName+",modifier="+modifier+ |
| "): index="+r.index+", nalts="+numAlts); |
| */ |
| r.modifier = modifier; |
| nameToRuleMap.put(ruleName, r); |
| setRuleAST(ruleName, tree); |
| r.setOptions(options, ruleToken); |
| r.argActionAST = argActionAST; |
| composite.ruleIndexToRuleList.setSize(composite.ruleIndex+1); |
| composite.ruleIndexToRuleList.set(composite.ruleIndex, r); |
| composite.ruleIndex++; |
| if ( ruleName.startsWith(SYNPRED_RULE_PREFIX) ) { |
| r.isSynPred = true; |
| } |
| } |
| |
| /** Define a new predicate and get back its name for use in building |
| * a semantic predicate reference to the syn pred. |
| */ |
| public String defineSyntacticPredicate(GrammarAST blockAST, |
| String currentRuleName) |
| { |
| if ( nameToSynpredASTMap==null ) { |
| nameToSynpredASTMap = new LinkedHashMap<String, GrammarAST>(); |
| } |
| String predName = |
| SYNPRED_RULE_PREFIX+(nameToSynpredASTMap.size() + 1)+"_"+name; |
| blockAST.setTreeEnclosingRuleNameDeeply(predName); |
| nameToSynpredASTMap.put(predName, blockAST); |
| return predName; |
| } |
| |
| public LinkedHashMap<String, GrammarAST> getSyntacticPredicates() { |
| return nameToSynpredASTMap; |
| } |
| |
| public GrammarAST getSyntacticPredicate(String name) { |
| if ( nameToSynpredASTMap==null ) { |
| return null; |
| } |
| return nameToSynpredASTMap.get(name); |
| } |
| |
| public void synPredUsedInDFA(DFA dfa, SemanticContext semCtx) { |
| decisionsWhoseDFAsUsesSynPreds.add(dfa); |
| semCtx.trackUseOfSyntacticPredicates(this); // walk ctx looking for preds |
| } |
| |
| /* |
| public Set<Rule> getRuleNamesVisitedDuringLOOK() { |
| return rulesSensitiveToOtherRules; |
| } |
| */ |
| |
| /** Given @scope::name {action} define it for this grammar. Later, |
| * the code generator will ask for the actions table. For composite |
| * grammars, make sure header action propogates down to all delegates. |
| */ |
| public void defineNamedAction(GrammarAST ampersandAST, |
| String scope, |
| GrammarAST nameAST, |
| GrammarAST actionAST) |
| { |
| if ( scope==null ) { |
| scope = getDefaultActionScope(type); |
| } |
| //System.out.println("Grammar "+name+" define @"+scope+"::"+nameAST.getText()+"{"+actionAST.getText()+"}"); |
| String actionName = nameAST.getText(); |
| Map<String, Object> scopeActions = getActions().get(scope); |
| if ( scopeActions==null ) { |
| scopeActions = new HashMap<String, Object>(); |
| getActions().put(scope, scopeActions); |
| } |
| Object a = scopeActions.get(actionName); |
| if ( a!=null ) { |
| ErrorManager.grammarError( |
| ErrorManager.MSG_ACTION_REDEFINITION,this, |
| nameAST.getToken(),nameAST.getText()); |
| } |
| else { |
| scopeActions.put(actionName,actionAST); |
| } |
| // propogate header (regardless of scope (lexer, parser, ...) ? |
| if ( this==composite.getRootGrammar() && actionName.equals("header") ) { |
| List<Grammar> allgrammars = composite.getRootGrammar().getDelegates(); |
| for (Grammar delegate : allgrammars) { |
| if ( target.isValidActionScope(delegate.type, scope) ) { |
| //System.out.println("propogate to "+delegate.name); |
| delegate.defineNamedAction(ampersandAST, scope, nameAST, actionAST); |
| } |
| } |
| } |
| } |
| |
| public void setSynPredGateIfNotAlready(ST gateST) { |
| String scope = getDefaultActionScope(type); |
| Map<String, Object> actionsForGrammarScope = getActions().get(scope); |
| // if no synpredgate action set by user then set |
| if ( (actionsForGrammarScope==null || |
| !actionsForGrammarScope.containsKey(Grammar.SYNPREDGATE_ACTION_NAME)) ) |
| { |
| if ( actionsForGrammarScope==null ) { |
| actionsForGrammarScope=new HashMap<String, Object>(); |
| getActions().put(scope, actionsForGrammarScope); |
| } |
| actionsForGrammarScope.put(Grammar.SYNPREDGATE_ACTION_NAME, |
| gateST); |
| } |
| } |
| |
| public Map<String, Map<String, Object>> getActions() { |
| return actions; |
| } |
| |
| /** Given a grammar type, what should be the default action scope? |
| * If I say @members in a COMBINED grammar, for example, the |
| * default scope should be "parser". |
| */ |
| public String getDefaultActionScope(int grammarType) { |
| switch (grammarType) { |
| case Grammar.LEXER : |
| return "lexer"; |
| case Grammar.PARSER : |
| case Grammar.COMBINED : |
| return "parser"; |
| case Grammar.TREE_PARSER : |
| return "treeparser"; |
| } |
| return null; |
| } |
| |
| public void defineLexerRuleFoundInParser(Token ruleToken, |
| GrammarAST ruleAST) |
| { |
| // System.out.println("rule tree is:\n"+ruleAST.toStringTree()); |
| /* |
| String ruleText = tokenBuffer.toOriginalString(ruleAST.ruleStartTokenIndex, |
| ruleAST.ruleStopTokenIndex); |
| */ |
| // first, create the text of the rule |
| StringBuilder buf = new StringBuilder(); |
| buf.append("// $ANTLR src \""); |
| buf.append(getFileName()); |
| buf.append("\" "); |
| buf.append(ruleAST.getLine()); |
| buf.append("\n"); |
| for (int i=ruleAST.getTokenStartIndex(); |
| i<=ruleAST.getTokenStopIndex() && i<tokenBuffer.size(); |
| i++) |
| { |
| CommonToken t = (CommonToken)tokenBuffer.get(i); |
| // undo the text deletions done by the lexer (ugh) |
| if ( t.getType()==ANTLRParser.BLOCK ) { |
| buf.append("("); |
| } |
| else if ( t.getType()==ANTLRParser.ACTION ) { |
| buf.append("{"); |
| buf.append(t.getText()); |
| buf.append("}"); |
| } |
| else if ( t.getType()==ANTLRParser.SEMPRED || |
| t.getType()==ANTLRParser.SYN_SEMPRED || |
| t.getType()==ANTLRParser.GATED_SEMPRED || |
| t.getType()==ANTLRParser.BACKTRACK_SEMPRED ) |
| { |
| buf.append("{"); |
| buf.append(t.getText()); |
| buf.append("}?"); |
| } |
| else if ( t.getType()==ANTLRParser.ARG_ACTION ) { |
| buf.append("["); |
| buf.append(t.getText()); |
| buf.append("]"); |
| } |
| else { |
| buf.append(t.getText()); |
| } |
| } |
| String ruleText = buf.toString(); |
| //System.out.println("[["+ruleText+"]]"); |
| // now put the rule into the lexer grammar template |
| if ( getGrammarIsRoot() ) { // don't build lexers for delegates |
| lexerGrammarST.add("rules", ruleText); |
| } |
| // track this lexer rule's name |
| composite.lexerRules.add(ruleToken.getText()); |
| } |
| |
| /** If someone does PLUS='+' in the parser, must make sure we get |
| * "PLUS : '+' ;" in lexer not "T73 : '+';" |
| */ |
| public void defineLexerRuleForAliasedStringLiteral(String tokenID, |
| String literal, |
| int tokenType) |
| { |
| if ( getGrammarIsRoot() ) { // don't build lexers for delegates |
| //System.out.println("defineLexerRuleForAliasedStringLiteral: "+literal+" "+tokenType); |
| lexerGrammarST.addAggr("literals.{ruleName,type,literal}", |
| tokenID, |
| Utils.integer(tokenType), |
| literal); |
| } |
| // track this lexer rule's name |
| composite.lexerRules.add(tokenID); |
| } |
| |
| public void defineLexerRuleForStringLiteral(String literal, int tokenType) { |
| //System.out.println("defineLexerRuleForStringLiteral: "+literal+" "+tokenType); |
| // compute new token name like T237 and define it as having tokenType |
| String tokenID = computeTokenNameFromLiteral(tokenType,literal); |
| defineToken(tokenID, tokenType); |
| // tell implicit lexer to define a rule to match the literal |
| if ( getGrammarIsRoot() ) { // don't build lexers for delegates |
| lexerGrammarST.addAggr("literals.{ruleName,type,literal}", |
| tokenID, |
| Utils.integer(tokenType), |
| literal); |
| } |
| } |
| |
| public Rule getLocallyDefinedRule(String ruleName) { |
| Rule r = nameToRuleMap.get(ruleName); |
| return r; |
| } |
| |
| public Rule getRule(String ruleName) { |
| Rule r = composite.getRule(ruleName); |
| /* |
| if ( r!=null && r.grammar != this ) { |
| System.out.println(name+".getRule("+ruleName+")="+r); |
| } |
| */ |
| return r; |
| } |
| |
| public Rule getRule(String scopeName, String ruleName) { |
| if ( scopeName!=null ) { // scope override |
| Grammar scope = composite.getGrammar(scopeName); |
| if ( scope==null ) { |
| return null; |
| } |
| return scope.getLocallyDefinedRule(ruleName); |
| } |
| return getRule(ruleName); |
| } |
| |
| public int getRuleIndex(String scopeName, String ruleName) { |
| Rule r = getRule(scopeName, ruleName); |
| if ( r!=null ) { |
| return r.index; |
| } |
| return INVALID_RULE_INDEX; |
| } |
| |
| public int getRuleIndex(String ruleName) { |
| return getRuleIndex(null, ruleName); |
| } |
| |
| public String getRuleName(int ruleIndex) { |
| Rule r = composite.ruleIndexToRuleList.get(ruleIndex); |
| if ( r!=null ) { |
| return r.name; |
| } |
| return null; |
| } |
| |
| /** Should codegen.g gen rule for ruleName? |
| * If synpred, only gen if used in a DFA. |
| * If regular rule, only gen if not overridden in delegator |
| * Always gen Tokens rule though. |
| */ |
| public boolean generateMethodForRule(String ruleName) { |
| if ( ruleName.equals(ARTIFICIAL_TOKENS_RULENAME) ) { |
| // always generate Tokens rule to satisfy lexer interface |
| // but it may have no alternatives. |
| return true; |
| } |
| if ( overriddenRules.contains(ruleName) ) { |
| // don't generate any overridden rules |
| return false; |
| } |
| // generate if non-synpred or synpred used in a DFA |
| Rule r = getLocallyDefinedRule(ruleName); |
| return !r.isSynPred || |
| (r.isSynPred&&synPredNamesUsedInDFA.contains(ruleName)); |
| } |
| |
| public AttributeScope defineGlobalScope(String name, Token scopeAction) { |
| AttributeScope scope = new AttributeScope(this, name, scopeAction); |
| scopes.put(name,scope); |
| return scope; |
| } |
| |
| public AttributeScope createReturnScope(String ruleName, Token retAction) { |
| AttributeScope scope = new AttributeScope(this, ruleName, retAction); |
| scope.isReturnScope = true; |
| return scope; |
| } |
| |
| public AttributeScope createRuleScope(String ruleName, Token scopeAction) { |
| AttributeScope scope = new AttributeScope(this, ruleName, scopeAction); |
| scope.isDynamicRuleScope = true; |
| return scope; |
| } |
| |
| public AttributeScope createParameterScope(String ruleName, Token argAction) { |
| AttributeScope scope = new AttributeScope(this, ruleName, argAction); |
| scope.isParameterScope = true; |
| return scope; |
| } |
| |
| /** Get a global scope */ |
| public AttributeScope getGlobalScope(String name) { |
| return scopes.get(name); |
| } |
| |
| public Map<String, AttributeScope> getGlobalScopes() { |
| return scopes; |
| } |
| |
| /** Define a label defined in a rule r; check the validity then ask the |
| * Rule object to actually define it. |
| */ |
| protected void defineLabel(Rule r, Token label, GrammarAST element, int type) { |
| boolean err = nameSpaceChecker.checkForLabelTypeMismatch(r, label, type); |
| if ( err ) { |
| return; |
| } |
| r.defineLabel(label, element, type); |
| } |
| |
| public void defineTokenRefLabel(String ruleName, |
| Token label, |
| GrammarAST tokenRef) |
| { |
| Rule r = getLocallyDefinedRule(ruleName); |
| if ( r!=null ) { |
| if ( type==LEXER && |
| (tokenRef.getType()==ANTLRParser.CHAR_LITERAL|| |
| tokenRef.getType()==ANTLRParser.BLOCK|| |
| tokenRef.getType()==ANTLRParser.NOT|| |
| tokenRef.getType()==ANTLRParser.CHAR_RANGE|| |
| tokenRef.getType()==ANTLRParser.WILDCARD)) |
| { |
| defineLabel(r, label, tokenRef, CHAR_LABEL); |
| } |
| else { |
| defineLabel(r, label, tokenRef, TOKEN_LABEL); |
| } |
| } |
| } |
| |
| public void defineWildcardTreeLabel(String ruleName, |
| Token label, |
| GrammarAST tokenRef) |
| { |
| Rule r = getLocallyDefinedRule(ruleName); |
| if ( r!=null ) { |
| defineLabel(r, label, tokenRef, WILDCARD_TREE_LABEL); |
| } |
| } |
| |
| public void defineWildcardTreeListLabel(String ruleName, |
| Token label, |
| GrammarAST tokenRef) |
| { |
| Rule r = getLocallyDefinedRule(ruleName); |
| if ( r!=null ) { |
| defineLabel(r, label, tokenRef, WILDCARD_TREE_LIST_LABEL); |
| } |
| } |
| |
| public void defineRuleRefLabel(String ruleName, |
| Token label, |
| GrammarAST ruleRef) |
| { |
| Rule r = getLocallyDefinedRule(ruleName); |
| if ( r!=null ) { |
| defineLabel(r, label, ruleRef, RULE_LABEL); |
| } |
| } |
| |
| public void defineTokenListLabel(String ruleName, |
| Token label, |
| GrammarAST element) |
| { |
| Rule r = getLocallyDefinedRule(ruleName); |
| if ( r!=null ) { |
| defineLabel(r, label, element, TOKEN_LIST_LABEL); |
| } |
| } |
| |
| public void defineRuleListLabel(String ruleName, |
| Token label, |
| GrammarAST element) |
| { |
| Rule r = getLocallyDefinedRule(ruleName); |
| if ( r!=null ) { |
| if ( !r.getHasMultipleReturnValues() ) { |
| ErrorManager.grammarError( |
| ErrorManager.MSG_LIST_LABEL_INVALID_UNLESS_RETVAL_STRUCT,this, |
| label,label.getText()); |
| } |
| defineLabel(r, label, element, RULE_LIST_LABEL); |
| } |
| } |
| |
| /** Given a set of all rewrite elements on right of ->, filter for |
| * label types such as Grammar.TOKEN_LABEL, Grammar.TOKEN_LIST_LABEL, ... |
| * Return a displayable token type name computed from the GrammarAST. |
| */ |
| public Set<String> getLabels(Set<GrammarAST> rewriteElements, int labelType) { |
| Set<String> labels = new HashSet<String>(); |
| for (GrammarAST el : rewriteElements) { |
| if ( el.getType()==ANTLRParser.LABEL ) { |
| String labelName = el.getText(); |
| Rule enclosingRule = getLocallyDefinedRule(el.enclosingRuleName); |
| if ( enclosingRule==null ) continue; |
| LabelElementPair pair = enclosingRule.getLabel(labelName); |
| /* |
| // if tree grammar and we have a wildcard, only notice it |
| // when looking for rule labels not token label. x=. should |
| // look like a rule ref since could be subtree. |
| if ( type==TREE_PARSER && pair!=null && |
| pair.elementRef.getType()==ANTLRParser.WILDCARD ) |
| { |
| if ( labelType==WILDCARD_TREE_LABEL ) { |
| labels.add(labelName); |
| continue; |
| } |
| else continue; |
| } |
| */ |
| // if valid label and type is what we're looking for |
| // and not ref to old value val $rule, add to list |
| if ( pair!=null && pair.type==labelType && |
| !labelName.equals(el.enclosingRuleName) ) |
| { |
| labels.add(labelName); |
| } |
| } |
| } |
| return labels; |
| } |
| |
| /** Before generating code, we examine all actions that can have |
| * $x.y and $y stuff in them because some code generation depends on |
| * Rule.referencedPredefinedRuleAttributes. I need to remove unused |
| * rule labels for example. |
| */ |
| protected void examineAllExecutableActions() { |
| Collection<Rule> rules = getRules(); |
| for (Rule r : rules) { |
| // walk all actions within the rule elements, args, and exceptions |
| List<GrammarAST> actions = r.getInlineActions(); |
| for (int i = 0; i < actions.size(); i++) { |
| GrammarAST actionAST = actions.get(i); |
| ActionAnalysis sniffer = |
| new ActionAnalysis(this, r.name, actionAST); |
| sniffer.analyze(); |
| } |
| // walk any named actions like @init, @after |
| Collection<? extends Object> namedActions = r.getActions().values(); |
| for (Object namedAction : namedActions) { |
| GrammarAST actionAST = (GrammarAST)namedAction; |
| ActionAnalysis sniffer = |
| new ActionAnalysis(this, r.name, actionAST); |
| sniffer.analyze(); |
| } |
| } |
| } |
| |
| /** Remove all labels on rule refs whose target rules have no return value. |
| * Do this for all rules in grammar. |
| */ |
| public void checkAllRulesForUselessLabels() { |
| if ( type==LEXER ) { |
| return; |
| } |
| Set<String> rules = nameToRuleMap.keySet(); |
| for (String ruleName : rules) { |
| Rule r = getRule(ruleName); |
| removeUselessLabels(r.getRuleLabels()); |
| removeUselessLabels(r.getRuleListLabels()); |
| } |
| } |
| |
| /** A label on a rule is useless if the rule has no return value, no |
| * tree or template output, and it is not referenced in an action. |
| */ |
| protected void removeUselessLabels(Map<String, LabelElementPair> ruleToElementLabelPairMap) { |
| if ( ruleToElementLabelPairMap==null ) { |
| return; |
| } |
| Collection<LabelElementPair> labels = ruleToElementLabelPairMap.values(); |
| List<String> kill = new ArrayList<String>(); |
| for (LabelElementPair pair : labels) { |
| Rule refdRule = getRule(pair.elementRef.getText()); |
| if ( refdRule!=null && !refdRule.getHasReturnValue() && !pair.actionReferencesLabel ) { |
| //System.out.println(pair.label.getText()+" is useless"); |
| kill.add(pair.label.getText()); |
| } |
| } |
| for (int i = 0; i < kill.size(); i++) { |
| String labelToKill = kill.get(i); |
| // System.out.println("kill "+labelToKill); |
| ruleToElementLabelPairMap.remove(labelToKill); |
| } |
| } |
| |
| /** Track a rule reference within an outermost alt of a rule. Used |
| * at the moment to decide if $ruleref refers to a unique rule ref in |
| * the alt. Rewrite rules force tracking of all rule AST results. |
| * |
| * This data is also used to verify that all rules have been defined. |
| */ |
| public void altReferencesRule(String enclosingRuleName, |
| GrammarAST refScopeAST, |
| GrammarAST refAST, |
| int outerAltNum) |
| { |
| /* Do nothing for now; not sure need; track S.x as x |
| String scope = null; |
| Grammar scopeG = null; |
| if ( refScopeAST!=null ) { |
| if ( !scopedRuleRefs.contains(refScopeAST) ) { |
| scopedRuleRefs.add(refScopeAST); |
| } |
| scope = refScopeAST.getText(); |
| } |
| */ |
| Rule r = getRule(enclosingRuleName); |
| if ( r==null ) { |
| return; // no error here; see NameSpaceChecker |
| } |
| r.trackRuleReferenceInAlt(refAST, outerAltNum); |
| Token refToken = refAST.getToken(); |
| if ( !ruleRefs.contains(refAST) ) { |
| ruleRefs.add(refAST); |
| } |
| } |
| |
| /** Track a token reference within an outermost alt of a rule. Used |
| * to decide if $tokenref refers to a unique token ref in |
| * the alt. Does not track literals! |
| * |
| * Rewrite rules force tracking of all tokens. |
| */ |
| public void altReferencesTokenID(String ruleName, GrammarAST refAST, int outerAltNum) { |
| Rule r = getLocallyDefinedRule(ruleName); |
| if ( r==null ) { |
| return; |
| } |
| r.trackTokenReferenceInAlt(refAST, outerAltNum); |
| if ( !tokenIDRefs.contains(refAST.getToken()) ) { |
| tokenIDRefs.add(refAST.getToken()); |
| } |
| } |
| |
| /** To yield smaller, more readable code, track which rules have their |
| * predefined attributes accessed. If the rule has no user-defined |
| * return values, then don't generate the return value scope classes |
| * etc... Make the rule have void return value. Don't track for lexer |
| * rules. |
| */ |
| public void referenceRuleLabelPredefinedAttribute(String ruleName) { |
| Rule r = getRule(ruleName); |
| if ( r!=null && type!=LEXER ) { |
| // indicate that an action ref'd an attr unless it's in a lexer |
| // so that $ID.text refs don't force lexer rules to define |
| // return values...Token objects are created by the caller instead. |
| r.referencedPredefinedRuleAttributes = true; |
| } |
| } |
| |
| public List<? extends Collection<? extends Rule>> checkAllRulesForLeftRecursion() { |
| return sanity.checkAllRulesForLeftRecursion(); |
| } |
| |
| /** Return a list of left-recursive rules; no analysis can be done |
| * successfully on these. Useful to skip these rules then and also |
| * for ANTLRWorks to highlight them. |
| */ |
| public Set<Rule> getLeftRecursiveRules() { |
| if ( nfa==null ) { |
| buildNFA(); |
| } |
| if ( leftRecursiveRules!=null ) { |
| return leftRecursiveRules; |
| } |
| sanity.checkAllRulesForLeftRecursion(); |
| return leftRecursiveRules; |
| } |
| |
| public void checkRuleReference(GrammarAST scopeAST, |
| GrammarAST refAST, |
| GrammarAST argsAST, |
| String currentRuleName) |
| { |
| sanity.checkRuleReference(scopeAST, refAST, argsAST, currentRuleName); |
| } |
| |
| /** Rules like "a : ;" and "a : {...} ;" should not generate |
| * try/catch blocks for RecognitionException. To detect this |
| * it's probably ok to just look for any reference to an atom |
| * that can match some input. W/o that, the rule is unlikey to have |
| * any else. |
| */ |
| public boolean isEmptyRule(GrammarAST block) { |
| BitSet nonEmptyTerminals = new BitSet(); |
| nonEmptyTerminals.set(ANTLRParser.TOKEN_REF); |
| nonEmptyTerminals.set(ANTLRParser.STRING_LITERAL); |
| nonEmptyTerminals.set(ANTLRParser.CHAR_LITERAL); |
| nonEmptyTerminals.set(ANTLRParser.WILDCARD); |
| nonEmptyTerminals.set(ANTLRParser.RULE_REF); |
| return findFirstTypeOutsideRewrite(block, nonEmptyTerminals) == null; |
| } |
| |
| protected GrammarAST findFirstTypeOutsideRewrite(GrammarAST block, BitSet types) { |
| ArrayList<GrammarAST> worklist = new ArrayList<GrammarAST>(); |
| worklist.add(block); |
| while (!worklist.isEmpty()) { |
| GrammarAST current = worklist.remove(worklist.size() - 1); |
| if (current.getType() == ANTLRParser.REWRITE) { |
| continue; |
| } |
| |
| if (current.getType() >= 0 && types.get(current.getType())) { |
| return current; |
| } |
| |
| worklist.addAll(Arrays.asList(current.getChildrenAsArray())); |
| } |
| |
| return null; |
| } |
| |
| public boolean isAtomTokenType(int ttype) { |
| return ttype == ANTLRParser.WILDCARD|| |
| ttype == ANTLRParser.CHAR_LITERAL|| |
| ttype == ANTLRParser.CHAR_RANGE|| |
| ttype == ANTLRParser.STRING_LITERAL|| |
| ttype == ANTLRParser.NOT|| |
| (type != LEXER && ttype == ANTLRParser.TOKEN_REF); |
| } |
| |
| public int getTokenType(String tokenName) { |
| Integer I; |
| if ( tokenName.charAt(0)=='\'') { |
| I = composite.stringLiteralToTypeMap.get(tokenName); |
| } |
| else { // must be a label like ID |
| I = composite.tokenIDToTypeMap.get(tokenName); |
| } |
| int i = (I!=null)? I :Label.INVALID; |
| //System.out.println("grammar type "+type+" "+tokenName+"->"+i); |
| return i; |
| } |
| |
| /** Get the list of tokens that are IDs like BLOCK and LPAREN */ |
| public Set<String> getTokenIDs() { |
| return composite.tokenIDToTypeMap.keySet(); |
| } |
| |
| /** Return an ordered integer list of token types that have no |
| * corresponding token ID like INT or KEYWORD_BEGIN; for stuff |
| * like 'begin'. |
| */ |
| public Collection<Integer> getTokenTypesWithoutID() { |
| List<Integer> types = new ArrayList<Integer>(); |
| for (int t =Label.MIN_TOKEN_TYPE; t<=getMaxTokenType(); t++) { |
| String name = getTokenDisplayName(t); |
| if ( name.charAt(0)=='\'' ) { |
| types.add(Utils.integer(t)); |
| } |
| } |
| return types; |
| } |
| |
| /** Get a list of all token IDs and literals that have an associated |
| * token type. |
| */ |
| public Set<String> getTokenDisplayNames() { |
| Set<String> names = new HashSet<String>(); |
| for (int t =Label.MIN_TOKEN_TYPE; t <=getMaxTokenType(); t++) { |
| names.add(getTokenDisplayName(t)); |
| } |
| return names; |
| } |
| |
| /** Given a literal like (the 3 char sequence with single quotes) 'a', |
| * return the int value of 'a'. Convert escape sequences here also. |
| * ANTLR's antlr.g parser does not convert escape sequences. |
| * |
| * 11/26/2005: I changed literals to always be '...' even for strings. |
| * This routine still works though. |
| */ |
| public static int getCharValueFromGrammarCharLiteral(String literal) { |
| switch ( literal.length() ) { |
| case 3 : |
| // 'x' |
| return literal.charAt(1); // no escape char |
| case 4 : |
| // '\x' (antlr lexer will catch invalid char) |
| if ( Character.isDigit(literal.charAt(2)) ) { |
| ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR, |
| "invalid char literal: "+literal); |
| return -1; |
| } |
| int escChar = literal.charAt(2); |
| int charVal = ANTLRLiteralEscapedCharValue[escChar]; |
| if ( charVal==0 ) { |
| // Unnecessary escapes like '\{' should just yield { |
| return escChar; |
| } |
| return charVal; |
| case 8 : |
| // '\u1234' |
| String unicodeChars = literal.substring(3,literal.length()-1); |
| return Integer.parseInt(unicodeChars, 16); |
| default : |
| ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR, |
| "invalid char literal: "+literal); |
| return -1; |
| } |
| } |
| |
| /** ANTLR does not convert escape sequences during the parse phase because |
| * it could not know how to print String/char literals back out when |
| * printing grammars etc... Someone in China might use the real unicode |
| * char in a literal as it will display on their screen; when printing |
| * back out, I could not know whether to display or use a unicode escape. |
| * |
| * This routine converts a string literal with possible escape sequences |
| * into a pure string of 16-bit char values. Escapes and unicode \u0000 |
| * specs are converted to pure chars. return in a buffer; people may |
| * want to walk/manipulate further. |
| * |
| * The NFA construction routine must know the actual char values. |
| */ |
| public static StringBuffer getUnescapedStringFromGrammarStringLiteral(String literal) { |
| //System.out.println("escape: ["+literal+"]"); |
| StringBuffer buf = new StringBuffer(); |
| int last = literal.length()-1; // skip quotes on outside |
| for (int i=1; i<last; i++) { |
| char c = literal.charAt(i); |
| if ( c=='\\' ) { |
| i++; |
| c = literal.charAt(i); |
| if ( Character.toUpperCase(c)=='U' ) { |
| // \u0000 |
| i++; |
| String unicodeChars = literal.substring(i,i+4); |
| // parse the unicode 16 bit hex value |
| int val = Integer.parseInt(unicodeChars, 16); |
| i+=4-1; // loop will inc by 1; only jump 3 then |
| buf.append((char)val); |
| } |
| else if ( Character.isDigit(c) ) { |
| ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR, |
| "invalid char literal: "+literal); |
| buf.append("\\").append(c); |
| } |
| else { |
| buf.append((char)ANTLRLiteralEscapedCharValue[c]); // normal \x escape |
| } |
| } |
| else { |
| buf.append(c); // simple char x |
| } |
| } |
| //System.out.println("string: ["+buf.toString()+"]"); |
| return buf; |
| } |
| |
| /** Pull your token definitions from an existing grammar in memory. |
| * You must use Grammar() ctor then this method then setGrammarContent() |
| * to make this work. This was useful primarily for testing and |
| * interpreting grammars until I added import grammar functionality. |
| * When you import a grammar you implicitly import its vocabulary as well |
| * and keep the same token type values. |
| * |
| * Returns the max token type found. |
| */ |
| public int importTokenVocabulary(Grammar importFromGr) { |
| Set<String> importedTokenIDs = importFromGr.getTokenIDs(); |
| for (String tokenID : importedTokenIDs) { |
| int tokenType = importFromGr.getTokenType(tokenID); |
| composite.maxTokenType = Math.max(composite.maxTokenType,tokenType); |
| if ( tokenType>=Label.MIN_TOKEN_TYPE ) { |
| //System.out.println("import token from grammar "+tokenID+"="+tokenType); |
| defineToken(tokenID, tokenType); |
| } |
| } |
| return composite.maxTokenType; // return max found |
| } |
| |
| /** Import the rules/tokens of a delegate grammar. All delegate grammars are |
| * read during the ctor of first Grammar created. |
| * |
| * Do not create NFA here because NFA construction needs to hook up with |
| * overridden rules in delegation root grammar. |
| */ |
| public void importGrammar(GrammarAST grammarNameAST, String label) { |
| String grammarName = grammarNameAST.getText(); |
| //System.out.println("import "+gfile.getName()); |
| String gname = grammarName + GRAMMAR_FILE_EXTENSION; |
| BufferedReader br = null; |
| try { |
| String fullName = tool.getLibraryFile(gname); |
| FileReader fr = new FileReader(fullName); |
| br = new BufferedReader(fr); |
| Grammar delegateGrammar; |
| delegateGrammar = new Grammar(tool, gname, composite); |
| delegateGrammar.label = label; |
| |
| addDelegateGrammar(delegateGrammar); |
| |
| delegateGrammar.parseAndBuildAST(br); |
| delegateGrammar.addRulesForSyntacticPredicates(); |
| if ( !validImport(delegateGrammar) ) { |
| ErrorManager.grammarError(ErrorManager.MSG_INVALID_IMPORT, |
| this, |
| grammarNameAST.token, |
| this, |
| delegateGrammar); |
| return; |
| } |
| if ( this.type==COMBINED && |
| (delegateGrammar.name.equals(this.name+grammarTypeToFileNameSuffix[LEXER])|| |
| delegateGrammar.name.equals(this.name+grammarTypeToFileNameSuffix[PARSER])) ) |
| { |
| ErrorManager.grammarError(ErrorManager.MSG_IMPORT_NAME_CLASH, |
| this, |
| grammarNameAST.token, |
| this, |
| delegateGrammar); |
| return; |
| } |
| if ( delegateGrammar.grammarTree!=null ) { |
| // we have a valid grammar |
| // deal with combined grammars |
| if ( delegateGrammar.type == LEXER && this.type == COMBINED ) { |
| // ooops, we wasted some effort; tell lexer to read it in |
| // later |
| lexerGrammarST.add("imports", grammarName); |
| // but, this parser grammar will need the vocab |
| // so add to composite anyway so we suck in the tokens later |
| } |
| } |
| //System.out.println("Got grammar:\n"+delegateGrammar); |
| } |
| catch (IOException ioe) { |
| ErrorManager.error(ErrorManager.MSG_CANNOT_OPEN_FILE, |
| gname, |
| ioe); |
| } |
| finally { |
| if ( br!=null ) { |
| try { |
| br.close(); |
| } |
| catch (IOException ioe) { |
| ErrorManager.error(ErrorManager.MSG_CANNOT_CLOSE_FILE, |
| gname, |
| ioe); |
| } |
| } |
| } |
| } |
| |
| /** add new delegate to composite tree */ |
| protected void addDelegateGrammar(Grammar delegateGrammar) { |
| CompositeGrammarTree t = composite.delegateGrammarTreeRoot.findNode(this); |
| t.addChild(new CompositeGrammarTree(delegateGrammar)); |
| // make sure new grammar shares this composite |
| delegateGrammar.composite = this.composite; |
| } |
| |
| /** Load a vocab file <vocabName>.tokens and return max token type found. */ |
| public int importTokenVocabulary(GrammarAST tokenVocabOptionAST, |
| String vocabName) |
| { |
| if ( !getGrammarIsRoot() ) { |
| ErrorManager.grammarWarning(ErrorManager.MSG_TOKEN_VOCAB_IN_DELEGATE, |
| this, |
| tokenVocabOptionAST.token, |
| name); |
| return composite.maxTokenType; |
| } |
| |
| File fullFile = tool.getImportedVocabFile(vocabName); |
| try { |
| FileReader fr = new FileReader(fullFile); |
| BufferedReader br = new BufferedReader(fr); |
| StreamTokenizer tokenizer = new StreamTokenizer(br); |
| tokenizer.parseNumbers(); |
| tokenizer.wordChars('_', '_'); |
| tokenizer.eolIsSignificant(true); |
| tokenizer.slashSlashComments(true); |
| tokenizer.slashStarComments(true); |
| tokenizer.ordinaryChar('='); |
| tokenizer.quoteChar('\''); |
| tokenizer.whitespaceChars(' ',' '); |
| tokenizer.whitespaceChars('\t','\t'); |
| int lineNum = 1; |
| int token = tokenizer.nextToken(); |
| while (token != StreamTokenizer.TT_EOF) { |
| String tokenID; |
| if ( token == StreamTokenizer.TT_WORD ) { |
| tokenID = tokenizer.sval; |
| } |
| else if ( token == '\'' ) { |
| tokenID = "'"+tokenizer.sval+"'"; |
| } |
| else { |
| ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR, |
| vocabName+CodeGenerator.VOCAB_FILE_EXTENSION, |
| Utils.integer(lineNum)); |
| while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {} |
| token = tokenizer.nextToken(); |
| continue; |
| } |
| token = tokenizer.nextToken(); |
| if ( token != '=' ) { |
| ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR, |
| vocabName+CodeGenerator.VOCAB_FILE_EXTENSION, |
| Utils.integer(lineNum)); |
| while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {} |
| token = tokenizer.nextToken(); |
| continue; |
| } |
| token = tokenizer.nextToken(); // skip '=' |
| if ( token != StreamTokenizer.TT_NUMBER ) { |
| ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR, |
| vocabName+CodeGenerator.VOCAB_FILE_EXTENSION, |
| Utils.integer(lineNum)); |
| while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {} |
| token = tokenizer.nextToken(); |
| continue; |
| } |
| int tokenType = (int)tokenizer.nval; |
| token = tokenizer.nextToken(); |
| //System.out.println("import "+tokenID+"="+tokenType); |
| composite.maxTokenType = Math.max(composite.maxTokenType,tokenType); |
| defineToken(tokenID, tokenType); |
| lineNum++; |
| if ( token != StreamTokenizer.TT_EOL ) { |
| ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR, |
| vocabName+CodeGenerator.VOCAB_FILE_EXTENSION, |
| Utils.integer(lineNum)); |
| while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {} |
| token = tokenizer.nextToken(); |
| continue; |
| } |
| token = tokenizer.nextToken(); // skip newline |
| } |
| br.close(); |
| } |
| catch (FileNotFoundException fnfe) { |
| ErrorManager.error(ErrorManager.MSG_CANNOT_FIND_TOKENS_FILE, |
| fullFile); |
| } |
| catch (IOException ioe) { |
| ErrorManager.error(ErrorManager.MSG_ERROR_READING_TOKENS_FILE, |
| fullFile, |
| ioe); |
| } |
| catch (Exception e) { |
| ErrorManager.error(ErrorManager.MSG_ERROR_READING_TOKENS_FILE, |
| fullFile, |
| e); |
| } |
| return composite.maxTokenType; |
| } |
| |
| /** Given a token type, get a meaningful name for it such as the ID |
| * or string literal. If this is a lexer and the ttype is in the |
| * char vocabulary, compute an ANTLR-valid (possibly escaped) char literal. |
| */ |
| public String getTokenDisplayName(int ttype) { |
| String tokenName; |
| int index; |
| // inside any target's char range and is lexer grammar? |
| if ( this.type==LEXER && |
| ttype >= Label.MIN_CHAR_VALUE && ttype <= Label.MAX_CHAR_VALUE ) |
| { |
| return getANTLRCharLiteralForChar(ttype); |
| } |
| // faux label? |
| else if ( ttype<0 ) { |
| tokenName = composite.typeToTokenList.get(Label.NUM_FAUX_LABELS+ttype); |
| } |
| else { |
| // compute index in typeToTokenList for ttype |
| index = ttype-1; // normalize to 0..n-1 |
| index += Label.NUM_FAUX_LABELS; // jump over faux tokens |
| |
| if ( index<composite.typeToTokenList.size() ) { |
| tokenName = composite.typeToTokenList.get(index); |
| if ( tokenName!=null && |
| tokenName.startsWith(AUTO_GENERATED_TOKEN_NAME_PREFIX) ) |
| { |
| tokenName = composite.typeToStringLiteralList.get(ttype); |
| } |
| } |
| else { |
| tokenName = String.valueOf(ttype); |
| } |
| } |
| //System.out.println("getTokenDisplayName ttype="+ttype+", index="+index+", name="+tokenName); |
| return tokenName; |
| } |
| |
| /** Get the list of ANTLR String literals */ |
| public Set<String> getStringLiterals() { |
| return composite.stringLiteralToTypeMap.keySet(); |
| } |
| |
| public String getGrammarTypeString() { |
| return grammarTypeToString[type]; |
| } |
| |
| public int getGrammarMaxLookahead() { |
| if ( global_k>=0 ) { |
| return global_k; |
| } |
| Object k = getOption("k"); |
| if ( k==null ) { |
| global_k = 0; |
| } |
| else if (k instanceof Integer) { |
| Integer kI = (Integer)k; |
| global_k = kI; |
| } |
| else { |
| // must be String "*" |
| if ( k.equals("*") ) { // this the default anyway |
| global_k = 0; |
| } |
| } |
| return global_k; |
| } |
| |
| /** Save the option key/value pair and process it; return the key |
| * or null if invalid option. |
| */ |
| public String setOption(String key, Object value, Token optionsStartToken) { |
| if ( legalOption(key) ) { |
| ErrorManager.grammarError(ErrorManager.MSG_ILLEGAL_OPTION, |
| this, |
| optionsStartToken, |
| key); |
| return null; |
| } |
| if ( !optionIsValid(key, value) ) { |
| return null; |
| } |
| if ( key.equals("backtrack") && value.toString().equals("true") ) { |
| composite.getRootGrammar().atLeastOneBacktrackOption = true; |
| } |
| if ( options==null ) { |
| options = new HashMap<String, Object>(); |
| } |
| options.put(key, value); |
| return key; |
| } |
| |
| public boolean legalOption(String key) { |
| switch ( type ) { |
| case LEXER : |
| return !legalLexerOptions.contains(key); |
| case PARSER : |
| return !legalParserOptions.contains(key); |
| case TREE_PARSER : |
| return !legalTreeParserOptions.contains(key); |
| default : |
| return !legalParserOptions.contains(key); |
| } |
| } |
| |
| public void setOptions(Map<String, Object> options, Token optionsStartToken) { |
| if ( options==null ) { |
| this.options = null; |
| return; |
| } |
| Set<String> keys = options.keySet(); |
| for (Iterator<String> it = keys.iterator(); it.hasNext();) { |
| String optionName = it.next(); |
| Object optionValue = options.get(optionName); |
| String stored=setOption(optionName, optionValue, optionsStartToken); |
| if ( stored==null ) { |
| it.remove(); |
| } |
| } |
| } |
| |
| public Object getOption(String key) { |
| return composite.getOption(key); |
| } |
| |
| public Object getLocallyDefinedOption(String key) { |
| Object value = null; |
| if ( options!=null ) { |
| value = options.get(key); |
| } |
| if ( value==null ) { |
| value = defaultOptions.get(key); |
| } |
| return value; |
| } |
| |
| public Object getBlockOption(GrammarAST blockAST, String key) { |
| String v = (String)blockAST.getBlockOption(key); |
| if ( v!=null ) { |
| return v; |
| } |
| if ( type==Grammar.LEXER ) { |
| return defaultLexerBlockOptions.get(key); |
| } |
| return defaultBlockOptions.get(key); |
| } |
| |
| public int getUserMaxLookahead(int decision) { |
| int user_k = 0; |
| GrammarAST blockAST = nfa.grammar.getDecisionBlockAST(decision); |
| Object k = blockAST.getBlockOption("k"); |
| if ( k==null ) { |
| user_k = nfa.grammar.getGrammarMaxLookahead(); |
| return user_k; |
| } |
| if (k instanceof Integer) { |
| Integer kI = (Integer)k; |
| user_k = kI; |
| } |
| else { |
| // must be String "*" |
| if ( k.equals("*") ) { |
| user_k = 0; |
| } |
| } |
| return user_k; |
| } |
| |
| public boolean getAutoBacktrackMode(int decision) { |
| NFAState decisionNFAStartState = getDecisionNFAStartState(decision); |
| String autoBacktrack = |
| (String)getBlockOption(decisionNFAStartState.associatedASTNode, "backtrack"); |
| |
| if ( autoBacktrack==null ) { |
| autoBacktrack = (String)nfa.grammar.getOption("backtrack"); |
| } |
| return autoBacktrack!=null&&autoBacktrack.equals("true"); |
| } |
| |
| public boolean optionIsValid(String key, Object value) { |
| return true; |
| } |
| |
| public boolean buildAST() { |
| String outputType = (String)getOption("output"); |
| if ( outputType!=null ) { |
| return outputType.toString().equals("AST"); |
| } |
| return false; |
| } |
| |
| public boolean rewriteMode() { |
| Object outputType = getOption("rewrite"); |
| if ( outputType!=null ) { |
| return outputType.toString().equals("true"); |
| } |
| return false; |
| } |
| |
| public boolean isBuiltFromString() { |
| return builtFromString; |
| } |
| |
| public boolean buildTemplate() { |
| String outputType = (String)getOption("output"); |
| if ( outputType!=null ) { |
| return outputType.toString().equals("template"); |
| } |
| return false; |
| } |
| |
| public Collection<Rule> getRules() { |
| return nameToRuleMap.values(); |
| } |
| |
| /** Get the set of Rules that need to have manual delegations |
| * like "void rule() { importedGrammar.rule(); }" |
| * |
| * If this grammar is master, get list of all rule definitions from all |
| * delegate grammars. Only master has complete interface from combined |
| * grammars...we will generated delegates as helper objects. |
| * |
| * Composite grammars that are not the root/master do not have complete |
| * interfaces. It is not my intention that people use subcomposites. |
| * Only the outermost grammar should be used from outside code. The |
| * other grammar components are specifically generated to work only |
| * with the master/root. |
| * |
| * delegatedRules = imported - overridden |
| */ |
| public Set<? extends Rule> getDelegatedRules() { |
| return composite.getDelegatedRules(this); |
| } |
| |
| /** Get set of all rules imported from all delegate grammars even if |
| * indirectly delegated. |
| */ |
| public Set<? extends Rule> getAllImportedRules() { |
| return composite.getAllImportedRules(this); |
| } |
| |
| /** Get list of all delegates from all grammars directly or indirectly |
| * imported into this grammar. |
| */ |
| public List<Grammar> getDelegates() { |
| return composite.getDelegates(this); |
| } |
| |
| public boolean getHasDelegates() { |
| return !getDelegates().isEmpty(); |
| } |
| |
| public List<String> getDelegateNames() { |
| // compute delegates:{Grammar g | return g.name;} |
| List<String> names = new ArrayList<String>(); |
| List<Grammar> delegates = composite.getDelegates(this); |
| if ( delegates!=null ) { |
| for (Grammar g : delegates) { |
| names.add(g.name); |
| } |
| } |
| return names; |
| } |
| |
| public List<Grammar> getDirectDelegates() { |
| return composite.getDirectDelegates(this); |
| } |
| |
| /** Get delegates below direct delegates */ |
| public List<Grammar> getIndirectDelegates() { |
| return composite.getIndirectDelegates(this); |
| } |
| |
| /** Get list of all delegators. This amounts to the grammars on the path |
| * to the root of the delegation tree. |
| */ |
| public List<Grammar> getDelegators() { |
| return composite.getDelegators(this); |
| } |
| |
| /** Who's my direct parent grammar? */ |
| public Grammar getDelegator() { |
| return composite.getDelegator(this); |
| } |
| |
| public Set<Rule> getDelegatedRuleReferences() { |
| return delegatedRuleReferences; |
| } |
| |
| public boolean getGrammarIsRoot() { |
| return composite.delegateGrammarTreeRoot.grammar == this; |
| } |
| |
| public void setRuleAST(String ruleName, GrammarAST t) { |
| Rule r = getLocallyDefinedRule(ruleName); |
| if ( r!=null ) { |
| r.tree = t; |
| r.EORNode = t.getLastChild(); |
| } |
| } |
| |
| public NFAState getRuleStartState(String ruleName) { |
| return getRuleStartState(null, ruleName); |
| } |
| |
| public NFAState getRuleStartState(String scopeName, String ruleName) { |
| Rule r = getRule(scopeName, ruleName); |
| if ( r!=null ) { |
| //System.out.println("getRuleStartState("+scopeName+", "+ruleName+")="+r.startState); |
| return r.startState; |
| } |
| //System.out.println("getRuleStartState("+scopeName+", "+ruleName+")=null"); |
| return null; |
| } |
| |
| public String getRuleModifier(String ruleName) { |
| Rule r = getRule(ruleName); |
| if ( r!=null ) { |
| return r.modifier; |
| } |
| return null; |
| } |
| |
| public NFAState getRuleStopState(String ruleName) { |
| Rule r = getRule(ruleName); |
| if ( r!=null ) { |
| return r.stopState; |
| } |
| return null; |
| } |
| |
| public int assignDecisionNumber(NFAState state) { |
| decisionCount++; |
| state.setDecisionNumber(decisionCount); |
| return decisionCount; |
| } |
| |
| protected Decision getDecision(int decision) { |
| int index = decision-1; |
| if ( index >= indexToDecision.size() ) { |
| return null; |
| } |
| Decision d = indexToDecision.get(index); |
| return d; |
| } |
| |
| public List<Decision> getDecisions() { |
| return indexToDecision; |
| } |
| |
| protected Decision createDecision(int decision) { |
| int index = decision-1; |
| if ( index < indexToDecision.size() ) { |
| return getDecision(decision); // don't recreate |
| } |
| Decision d = new Decision(); |
| d.decision = decision; |
| d.grammar = this; |
| indexToDecision.setSize(getNumberOfDecisions()); |
| indexToDecision.set(index, d); |
| return d; |
| } |
| |
| public List<NFAState> getDecisionNFAStartStateList() { |
| List<NFAState> states = new ArrayList<NFAState>(100); |
| for (int d = 0; d < indexToDecision.size(); d++) { |
| Decision dec = indexToDecision.get(d); |
| states.add(dec.startState); |
| } |
| return states; |
| } |
| |
| public NFAState getDecisionNFAStartState(int decision) { |
| Decision d = getDecision(decision); |
| if ( d==null ) { |
| return null; |
| } |
| return d.startState; |
| } |
| |
| public DFA getLookaheadDFA(int decision) { |
| Decision d = getDecision(decision); |
| if ( d==null ) { |
| return null; |
| } |
| return d.dfa; |
| } |
| |
| public GrammarAST getDecisionBlockAST(int decision) { |
| Decision d = getDecision(decision); |
| if ( d==null ) { |
| return null; |
| } |
| return d.blockAST; |
| } |
| |
| /** returns a list of column numbers for all decisions |
| * on a particular line so ANTLRWorks choose the decision |
| * depending on the location of the cursor (otherwise, |
| * ANTLRWorks has to give the *exact* location which |
| * is not easy from the user point of view). |
| * |
| * This is not particularly fast as it walks entire line:col→DFA map |
| * looking for a prefix of "line:". |
| */ |
| public List<Integer> getLookaheadDFAColumnsForLineInFile(int line) { |
| String prefix = line+":"; |
| List<Integer> columns = new ArrayList<Integer>(); |
| for (String key : lineColumnToLookaheadDFAMap.keySet()) { |
| if(key.startsWith(prefix)) { |
| columns.add(Integer.valueOf(key.substring(prefix.length()))); |
| } |
| } |
| return columns; |
| } |
| |
| /** Useful for ANTLRWorks to map position in file to the DFA for display */ |
| public DFA getLookaheadDFAFromPositionInFile(int line, int col) { |
| return lineColumnToLookaheadDFAMap.get( |
| new StringBuffer().append(line).append(":").append(col).toString()); |
| } |
| |
| public Map<String, DFA> getLineColumnToLookaheadDFAMap() { |
| return lineColumnToLookaheadDFAMap; |
| } |
| |
| /* |
| public void setDecisionOptions(int decision, Map options) { |
| Decision d = createDecision(decision); |
| d.options = options; |
| } |
| |
| public void setDecisionOption(int decision, String name, Object value) { |
| Decision d = getDecision(decision); |
| if ( d!=null ) { |
| if ( d.options==null ) { |
| d.options = new HashMap(); |
| } |
| d.options.put(name,value); |
| } |
| } |
| |
| public Map getDecisionOptions(int decision) { |
| Decision d = getDecision(decision); |
| if ( d==null ) { |
| return null; |
| } |
| return d.options; |
| } |
| */ |
| |
| public int getNumberOfDecisions() { |
| return decisionCount; |
| } |
| |
| public int getNumberOfCyclicDecisions() { |
| int n = 0; |
| for (int i=1; i<=getNumberOfDecisions(); i++) { |
| Decision d = getDecision(i); |
| if ( d.dfa!=null && d.dfa.isCyclic() ) { |
| n++; |
| } |
| } |
| return n; |
| } |
| |
| /** Set the lookahead DFA for a particular decision. This means |
| * that the appropriate AST node must updated to have the new lookahead |
| * DFA. This method could be used to properly set the DFAs without |
| * using the createLookaheadDFAs() method. You could do this |
| * |
| * Grammar g = new Grammar("..."); |
| * g.setLookahead(1, dfa1); |
| * g.setLookahead(2, dfa2); |
| * ... |
| */ |
| public void setLookaheadDFA(int decision, DFA lookaheadDFA) { |
| Decision d = createDecision(decision); |
| d.dfa = lookaheadDFA; |
| GrammarAST ast = d.startState.associatedASTNode; |
| ast.setLookaheadDFA(lookaheadDFA); |
| } |
| |
| public void setDecisionNFA(int decision, NFAState state) { |
| Decision d = createDecision(decision); |
| d.startState = state; |
| } |
| |
| public void setDecisionBlockAST(int decision, GrammarAST blockAST) { |
| //System.out.println("setDecisionBlockAST("+decision+", "+blockAST.token); |
| Decision d = createDecision(decision); |
| d.blockAST = blockAST; |
| } |
| |
| public boolean allDecisionDFAHaveBeenCreated() { |
| return allDecisionDFACreated; |
| } |
| |
| /** How many token types have been allocated so far? */ |
| public int getMaxTokenType() { |
| return composite.maxTokenType; |
| } |
| |
| /** What is the max char value possible for this grammar's target? Use |
| * unicode max if no target defined. |
| */ |
| public int getMaxCharValue() { |
| if ( generator!=null ) { |
| return generator.target.getMaxCharValue(generator); |
| } |
| else { |
| return Label.MAX_CHAR_VALUE; |
| } |
| } |
| |
| /** Return a set of all possible token or char types for this grammar */ |
| public IntSet getTokenTypes() { |
| if ( type==LEXER ) { |
| return getAllCharValues(); |
| } |
| return IntervalSet.of(Label.MIN_TOKEN_TYPE, getMaxTokenType()); |
| } |
| |
| /** If there is a char vocabulary, use it; else return min to max char |
| * as defined by the target. If no target, use max unicode char value. |
| */ |
| public IntSet getAllCharValues() { |
| if ( charVocabulary!=null ) { |
| return charVocabulary; |
| } |
| IntSet allChar = IntervalSet.of(Label.MIN_CHAR_VALUE, getMaxCharValue()); |
| return allChar; |
| } |
| |
| /** Return a string representing the escaped char for code c. E.g., If c |
| * has value 0x100, you will get "\u0100". ASCII gets the usual |
| * char (non-hex) representation. Control characters are spit out |
| * as unicode. While this is specially set up for returning Java strings, |
| * it can be used by any language target that has the same syntax. :) |
| * |
| * 11/26/2005: I changed this to use double quotes, consistent with antlr.g |
| * 12/09/2005: I changed so everything is single quotes |
| */ |
| public static String getANTLRCharLiteralForChar(int c) { |
| if ( c<Label.MIN_CHAR_VALUE ) { |
| ErrorManager.internalError("invalid char value "+c); |
| return "'<INVALID>'"; |
| } |
| if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) { |
| return '\''+ANTLRLiteralCharValueEscape[c]+'\''; |
| } |
| if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN && |
| !Character.isISOControl((char)c) ) { |
| if ( c=='\\' ) { |
| return "'\\\\'"; |
| } |
| if ( c=='\'') { |
| return "'\\''"; |
| } |
| return '\''+Character.toString((char)c)+'\''; |
| } |
| // turn on the bit above max "\uFFFF" value so that we pad with zeros |
| // then only take last 4 digits |
| String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5); |
| String unicodeStr = "'\\u"+hex+"'"; |
| return unicodeStr; |
| } |
| |
| /** For lexer grammars, return everything in unicode not in set. |
| * For parser and tree grammars, return everything in token space |
| * from MIN_TOKEN_TYPE to last valid token type or char value. |
| */ |
| public IntSet complement(IntSet set) { |
| //System.out.println("complement "+set.toString(this)); |
| //System.out.println("vocabulary "+getTokenTypes().toString(this)); |
| IntSet c = set.complement(getTokenTypes()); |
| //System.out.println("result="+c.toString(this)); |
| return c; |
| } |
| |
| public IntSet complement(int atom) { |
| return complement(IntervalSet.of(atom)); |
| } |
| |
| /** Given set tree like ( SET A B ), check that A and B |
| * are both valid sets themselves, else we must tree like a BLOCK |
| */ |
| public boolean isValidSet(TreeToNFAConverter nfabuilder, GrammarAST t) { |
| boolean valid; |
| try { |
| //System.out.println("parse BLOCK as set tree: "+t.toStringTree()); |
| int alts = nfabuilder.testBlockAsSet(t); |
| valid = alts > 1; |
| } |
| catch (RecognitionException re) { |
| // The rule did not parse as a set, return null; ignore exception |
| valid = false; |
| } |
| //System.out.println("valid? "+valid); |
| return valid; |
| } |
| |
| /** Get the set equivalent (if any) of the indicated rule from this |
| * grammar. Mostly used in the lexer to do ~T for some fragment rule |
| * T. If the rule AST has a SET use that. If the rule is a single char |
| * convert it to a set and return. If rule is not a simple set (w/o actions) |
| * then return null. |
| * Rules have AST form: |
| * |
| * ^( RULE ID modifier ARG RET SCOPE block EOR ) |
| */ |
| public IntSet getSetFromRule(TreeToNFAConverter nfabuilder, String ruleName) |
| throws RecognitionException |
| { |
| Rule r = getRule(ruleName); |
| if ( r==null ) { |
| return null; |
| } |
| IntSet elements; |
| //System.out.println("parsed tree: "+r.tree.toStringTree()); |
| elements = nfabuilder.setRule(r.tree); |
| //System.out.println("elements="+elements); |
| return elements; |
| } |
| |
| /** Decisions are linked together with transition(1). Count how |
| * many there are. This is here rather than in NFAState because |
| * a grammar decides how NFAs are put together to form a decision. |
| */ |
| public int getNumberOfAltsForDecisionNFA(NFAState decisionState) { |
| if ( decisionState==null ) { |
| return 0; |
| } |
| int n = 1; |
| NFAState p = decisionState; |
| while ( p.transition[1] !=null ) { |
| n++; |
| p = (NFAState)p.transition[1].target; |
| } |
| return n; |
| } |
| |
| /** Get the ith alternative (1..n) from a decision; return null when |
| * an invalid alt is requested. I must count in to find the right |
| * alternative number. For (A|B), you get NFA structure (roughly): |
| * |
| * o->o-A->o |
| * | |
| * o->o-B->o |
| * |
| * This routine returns the leftmost state for each alt. So alt=1, returns |
| * the upperleft most state in this structure. |
| */ |
| public NFAState getNFAStateForAltOfDecision(NFAState decisionState, int alt) { |
| if ( decisionState==null || alt<=0 ) { |
| return null; |
| } |
| int n = 1; |
| NFAState p = decisionState; |
| while ( p!=null ) { |
| if ( n==alt ) { |
| return p; |
| } |
| n++; |
| Transition next = p.transition[1]; |
| p = null; |
| if ( next!=null ) { |
| p = (NFAState)next.target; |
| } |
| } |
| return null; |
| } |
| |
| /* |
| public void computeRuleFOLLOWSets() { |
| if ( getNumberOfDecisions()==0 ) { |
| createNFAs(); |
| } |
| for (Iterator it = getRules().iterator(); it.hasNext();) { |
| Rule r = (Rule)it.next(); |
| if ( r.isSynPred ) { |
| continue; |
| } |
| LookaheadSet s = ll1Analyzer.FOLLOW(r); |
| System.out.println("FOLLOW("+r.name+")="+s); |
| } |
| } |
| */ |
| |
| public LookaheadSet FIRST(NFAState s) { |
| return ll1Analyzer.FIRST(s); |
| } |
| |
| public LookaheadSet LOOK(NFAState s) { |
| return ll1Analyzer.LOOK(s); |
| } |
| |
| public void setCodeGenerator(CodeGenerator generator) { |
| this.generator = generator; |
| } |
| |
| public CodeGenerator getCodeGenerator() { |
| return generator; |
| } |
| |
| public GrammarAST getGrammarTree() { |
| return grammarTree; |
| } |
| |
| public void setGrammarTree(GrammarAST value) { |
| grammarTree = value; |
| } |
| |
| public Tool getTool() { |
| return tool; |
| } |
| |
| public void setTool(Tool tool) { |
| this.tool = tool; |
| } |
| |
| /** given a token type and the text of the literal, come up with a |
| * decent token type label. For now it's just T<type>. Actually, |
| * if there is an aliased name from tokens like PLUS='+', use it. |
| */ |
| public String computeTokenNameFromLiteral(int tokenType, String literal) { |
| return AUTO_GENERATED_TOKEN_NAME_PREFIX +tokenType; |
| } |
| |
| @Override |
| public String toString() { |
| // return "FFFFFFFFFFFFFF"; |
| return grammarTreeToString(grammarTree); |
| } |
| |
| public String grammarTreeToString(GrammarAST t) { |
| return grammarTreeToString(t, true); |
| } |
| |
| public String grammarTreeToString(GrammarAST t, boolean showActions) { |
| String s; |
| try { |
| s = t.getLine()+":"+(t.getCharPositionInLine()+1)+": "; |
| s += new ANTLRTreePrinter(new CommonTreeNodeStream(t)).toString(this, showActions); |
| } |
| catch (Exception e) { |
| s = "<invalid or missing tree structure>"; |
| } |
| return s; |
| } |
| |
| public void printGrammar(PrintStream output) { |
| ANTLRTreePrinter printer = new ANTLRTreePrinter(new CommonTreeNodeStream(getGrammarTree())); |
| try { |
| String g = printer.toString(this, false); |
| output.println(g); |
| } |
| catch (RecognitionException re) { |
| ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,re); |
| } |
| } |
| |
| } |