| /* |
| * [The "BSD license"] |
| * Copyright (c) 2010 Terence Parr |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * 3. The name of the author may not be used to endorse or promote products |
| * derived from this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| package org.antlr.codegen; |
| |
| import org.antlr.Tool; |
| import org.antlr.analysis.Label; |
| import org.antlr.runtime.Token; |
| import org.stringtemplate.v4.ST; |
| import org.antlr.tool.Grammar; |
| |
| import java.io.IOException; |
| import java.util.List; |
| |
| /** The code generator for ANTLR can usually be retargeted just by providing |
| * a new X.stg file for language X, however, sometimes the files that must |
| * be generated vary enough that some X-specific functionality is required. |
| * For example, in C, you must generate header files whereas in Java you do not. |
| * Other languages may want to keep DFA separate from the main |
| * generated recognizer file. |
| * |
| * The notion of a Code Generator target abstracts out the creation |
| * of the various files. As new language targets get added to the ANTLR |
| * system, this target class may have to be altered to handle more |
| * functionality. Eventually, just about all language generation issues |
| * will be expressible in terms of these methods. |
| * |
| * If org.antlr.codegen.XTarget class exists, it is used else |
| * Target base class is used. I am using a superclass rather than an |
| * interface for this target concept because I can add functionality |
| * later without breaking previously written targets (extra interface |
| * methods would force adding dummy functions to all code generator |
| * target classes). |
| * |
| */ |
| public class Target { |
| |
| /** For pure strings of Java 16-bit unicode char, how can we display |
| * it in the target language as a literal. Useful for dumping |
| * predicates and such that may refer to chars that need to be escaped |
| * when represented as strings. Also, templates need to be escaped so |
| * that the target language can hold them as a string. |
| * |
| * I have defined (via the constructor) the set of typical escapes, |
| * but your Target subclass is free to alter the translated chars or |
| * add more definitions. This is nonstatic so each target can have |
| * a different set in memory at same time. |
| */ |
| protected String[] targetCharValueEscape = new String[255]; |
| |
| public Target() { |
| targetCharValueEscape['\n'] = "\\n"; |
| targetCharValueEscape['\r'] = "\\r"; |
| targetCharValueEscape['\t'] = "\\t"; |
| targetCharValueEscape['\b'] = "\\b"; |
| targetCharValueEscape['\f'] = "\\f"; |
| targetCharValueEscape['\\'] = "\\\\"; |
| targetCharValueEscape['\''] = "\\'"; |
| targetCharValueEscape['"'] = "\\\""; |
| } |
| |
| protected void genRecognizerFile(Tool tool, |
| CodeGenerator generator, |
| Grammar grammar, |
| ST outputFileST) |
| throws IOException |
| { |
| String fileName = |
| generator.getRecognizerFileName(grammar.name, grammar.type); |
| generator.write(outputFileST, fileName); |
| } |
| |
| protected void genRecognizerHeaderFile(Tool tool, |
| CodeGenerator generator, |
| Grammar grammar, |
| ST headerFileST, |
| String extName) // e.g., ".h" |
| throws IOException |
| { |
| // no header file by default |
| } |
| |
| protected void performGrammarAnalysis(CodeGenerator generator, |
| Grammar grammar) |
| { |
| // Build NFAs from the grammar AST |
| grammar.buildNFA(); |
| |
| // Create the DFA predictors for each decision |
| grammar.createLookaheadDFAs(); |
| } |
| |
| /** Is scope in @scope::name {action} valid for this kind of grammar? |
| * Targets like C++ may want to allow new scopes like headerfile or |
| * some such. The action names themselves are not policed at the |
| * moment so targets can add template actions w/o having to recompile |
| * ANTLR. |
| */ |
| public boolean isValidActionScope(int grammarType, String scope) { |
| switch (grammarType) { |
| case Grammar.LEXER : |
| if ( scope.equals("lexer") ) {return true;} |
| break; |
| case Grammar.PARSER : |
| if ( scope.equals("parser") ) {return true;} |
| break; |
| case Grammar.COMBINED : |
| if ( scope.equals("parser") ) {return true;} |
| if ( scope.equals("lexer") ) {return true;} |
| break; |
| case Grammar.TREE_PARSER : |
| if ( scope.equals("treeparser") ) {return true;} |
| break; |
| } |
| return false; |
| } |
| |
| /** Target must be able to override the labels used for token types */ |
| public String getTokenTypeAsTargetLabel(CodeGenerator generator, int ttype) { |
| String name = generator.grammar.getTokenDisplayName(ttype); |
| // If name is a literal, return the token type instead |
| if ( name.charAt(0)=='\'' ) { |
| return String.valueOf(ttype); |
| } |
| return name; |
| } |
| |
| /** Convert from an ANTLR char literal found in a grammar file to |
| * an equivalent char literal in the target language. For most |
| * languages, this means leaving 'x' as 'x'. Actually, we need |
| * to escape '\u000A' so that it doesn't get converted to \n by |
| * the compiler. Convert the literal to the char value and then |
| * to an appropriate target char literal. |
| * |
| * Expect single quotes around the incoming literal. |
| */ |
| public String getTargetCharLiteralFromANTLRCharLiteral( |
| CodeGenerator generator, |
| String literal) |
| { |
| StringBuffer buf = new StringBuffer(); |
| buf.append('\''); |
| int c = Grammar.getCharValueFromGrammarCharLiteral(literal); |
| if ( c<Label.MIN_CHAR_VALUE ) { |
| return "'\u0000'"; |
| } |
| if ( c<targetCharValueEscape.length && |
| targetCharValueEscape[c]!=null ) |
| { |
| buf.append(targetCharValueEscape[c]); |
| } |
| else if ( Character.UnicodeBlock.of((char)c)== |
| Character.UnicodeBlock.BASIC_LATIN && |
| !Character.isISOControl((char)c) ) |
| { |
| // normal char |
| buf.append((char)c); |
| } |
| else { |
| // must be something unprintable...use \\uXXXX |
| // turn on the bit above max "\\uFFFF" value so that we pad with zeros |
| // then only take last 4 digits |
| String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5); |
| buf.append("\\u"); |
| buf.append(hex); |
| } |
| |
| buf.append('\''); |
| return buf.toString(); |
| } |
| |
| /** Convert from an ANTLR string literal found in a grammar file to |
| * an equivalent string literal in the target language. For Java, this |
| * is the translation 'a\n"' -> "a\n\"". Expect single quotes |
| * around the incoming literal. Just flip the quotes and replace |
| * double quotes with \" |
| * |
| * Note that we have decided to allow poeple to use '\"' without |
| * penalty, so we must build the target string in a loop as Utils.replae |
| * cannot handle both \" and " without a lot of messing around. |
| * |
| */ |
| public String getTargetStringLiteralFromANTLRStringLiteral( |
| CodeGenerator generator, |
| String literal) |
| { |
| StringBuilder sb = new StringBuilder(); |
| StringBuffer is = new StringBuffer(literal); |
| |
| // Opening quote |
| // |
| sb.append('"'); |
| |
| for (int i = 1; i < is.length() -1; i++) { |
| if (is.charAt(i) == '\\') { |
| // Anything escaped is what it is! We assume that |
| // people know how to escape characters correctly. However |
| // we catch anything that does not need an escape in Java (which |
| // is what the default implementation is dealing with and remove |
| // the escape. The C target does this for instance. |
| // |
| switch (is.charAt(i+1)) { |
| // Pass through any escapes that Java also needs |
| // |
| case '"': |
| case 'n': |
| case 'r': |
| case 't': |
| case 'b': |
| case 'f': |
| case '\\': |
| case 'u': // Assume unnnn |
| sb.append('\\'); // Pass the escape through |
| break; |
| default: |
| // Remove the escape by virtue of not adding it here |
| // Thus \' becomes ' and so on |
| // |
| break; |
| } |
| |
| // Go past the \ character |
| // |
| i++; |
| } else { |
| // Chracters that don't need \ in ANTLR 'strings' but do in Java |
| // |
| if (is.charAt(i) == '"') { |
| // We need to escape " in Java |
| // |
| sb.append('\\'); |
| } |
| } |
| // Add in the next character, which may have been escaped |
| // |
| sb.append(is.charAt(i)); |
| } |
| |
| // Append closing " and return |
| // |
| sb.append('"'); |
| |
| return sb.toString(); |
| } |
| |
| /** Given a random string of Java unicode chars, return a new string with |
| * optionally appropriate quote characters for target language and possibly |
| * with some escaped characters. For example, if the incoming string has |
| * actual newline characters, the output of this method would convert them |
| * to the two char sequence \n for Java, C, C++, ... The new string has |
| * double-quotes around it as well. Example String in memory: |
| * |
| * a"[newlinechar]b'c[carriagereturnchar]d[tab]e\f |
| * |
| * would be converted to the valid Java s: |
| * |
| * "a\"\nb'c\rd\te\\f" |
| * |
| * or |
| * |
| * a\"\nb'c\rd\te\\f |
| * |
| * depending on the quoted arg. |
| */ |
| public String getTargetStringLiteralFromString(String s, boolean quoted) { |
| if ( s==null ) { |
| return null; |
| } |
| |
| StringBuffer buf = new StringBuffer(); |
| if ( quoted ) { |
| buf.append('"'); |
| } |
| for (int i=0; i<s.length(); i++) { |
| int c = s.charAt(i); |
| if ( c!='\'' && // don't escape single quotes in strings for java |
| c<targetCharValueEscape.length && |
| targetCharValueEscape[c]!=null ) |
| { |
| buf.append(targetCharValueEscape[c]); |
| } |
| else { |
| buf.append((char)c); |
| } |
| } |
| if ( quoted ) { |
| buf.append('"'); |
| } |
| return buf.toString(); |
| } |
| |
| public String getTargetStringLiteralFromString(String s) { |
| return getTargetStringLiteralFromString(s, false); |
| } |
| |
| /** Convert long to 0xNNNNNNNNNNNNNNNN by default for spitting out |
| * with bitsets. I.e., convert bytes to hex string. |
| */ |
| public String getTarget64BitStringFromValue(long word) { |
| int numHexDigits = 8*2; |
| StringBuffer buf = new StringBuffer(numHexDigits+2); |
| buf.append("0x"); |
| String digits = Long.toHexString(word); |
| digits = digits.toUpperCase(); |
| int padding = numHexDigits - digits.length(); |
| // pad left with zeros |
| for (int i=1; i<=padding; i++) { |
| buf.append('0'); |
| } |
| buf.append(digits); |
| return buf.toString(); |
| } |
| |
| public String encodeIntAsCharEscape(int v) { |
| if ( v<=127 ) { |
| return "\\"+Integer.toOctalString(v); |
| } |
| String hex = Integer.toHexString(v|0x10000).substring(1,5); |
| return "\\u"+hex; |
| } |
| |
| /** Some targets only support ASCII or 8-bit chars/strings. For example, |
| * C++ will probably want to return 0xFF here. |
| */ |
| public int getMaxCharValue(CodeGenerator generator) { |
| return Label.MAX_CHAR_VALUE; |
| } |
| |
| /** Give target a chance to do some postprocessing on actions. |
| * Python for example will have to fix the indention. |
| */ |
| public List postProcessAction(List chunks, Token actionToken) { |
| return chunks; |
| } |
| |
| } |