| /* |
| * [The "BSD license"] |
| * Copyright (c) 2010 Terence Parr |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * 3. The name of the author may not be used to endorse or promote products |
| * derived from this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| package org.antlr.codegen; |
| |
| import org.antlr.Tool; |
| import org.stringtemplate.v4.ST; |
| import org.antlr.tool.Grammar; |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| |
| public class CTarget extends Target { |
| |
| ArrayList strings = new ArrayList(); |
| |
| @Override |
| protected void genRecognizerFile(Tool tool, |
| CodeGenerator generator, |
| Grammar grammar, |
| ST outputFileST) |
| throws IOException { |
| |
| // Before we write this, and cause it to generate its string, |
| // we need to add all the string literals that we are going to match |
| // |
| outputFileST.add("literals", strings); |
| String fileName = generator.getRecognizerFileName(grammar.name, grammar.type); |
| generator.write(outputFileST, fileName); |
| } |
| |
| @Override |
| protected void genRecognizerHeaderFile(Tool tool, |
| CodeGenerator generator, |
| Grammar grammar, |
| ST headerFileST, |
| String extName) |
| throws IOException { |
| // Pick up the file name we are generating. This method will return a |
| // a file suffixed with .c, so we must substring and add the extName |
| // to it as we cannot assign into strings in Java. |
| /// |
| String fileName = generator.getRecognizerFileName(grammar.name, grammar.type); |
| fileName = fileName.substring(0, fileName.length() - 2) + extName; |
| |
| generator.write(headerFileST, fileName); |
| } |
| |
| protected ST chooseWhereCyclicDFAsGo(Tool tool, |
| CodeGenerator generator, |
| Grammar grammar, |
| ST recognizerST, |
| ST cyclicDFAST) { |
| return recognizerST; |
| } |
| |
| /** Is scope in @scope::name {action} valid for this kind of grammar? |
| * Targets like C++ may want to allow new scopes like headerfile or |
| * some such. The action names themselves are not policed at the |
| * moment so targets can add template actions w/o having to recompile |
| * ANTLR. |
| */ |
| @Override |
| public boolean isValidActionScope(int grammarType, String scope) { |
| switch (grammarType) { |
| case Grammar.LEXER: |
| if (scope.equals("lexer")) { |
| return true; |
| } |
| if (scope.equals("header")) { |
| return true; |
| } |
| if (scope.equals("includes")) { |
| return true; |
| } |
| if (scope.equals("preincludes")) { |
| return true; |
| } |
| if (scope.equals("overrides")) { |
| return true; |
| } |
| break; |
| case Grammar.PARSER: |
| if (scope.equals("parser")) { |
| return true; |
| } |
| if (scope.equals("header")) { |
| return true; |
| } |
| if (scope.equals("includes")) { |
| return true; |
| } |
| if (scope.equals("preincludes")) { |
| return true; |
| } |
| if (scope.equals("overrides")) { |
| return true; |
| } |
| break; |
| case Grammar.COMBINED: |
| if (scope.equals("parser")) { |
| return true; |
| } |
| if (scope.equals("lexer")) { |
| return true; |
| } |
| if (scope.equals("header")) { |
| return true; |
| } |
| if (scope.equals("includes")) { |
| return true; |
| } |
| if (scope.equals("preincludes")) { |
| return true; |
| } |
| if (scope.equals("overrides")) { |
| return true; |
| } |
| break; |
| case Grammar.TREE_PARSER: |
| if (scope.equals("treeparser")) { |
| return true; |
| } |
| if (scope.equals("header")) { |
| return true; |
| } |
| if (scope.equals("includes")) { |
| return true; |
| } |
| if (scope.equals("preincludes")) { |
| return true; |
| } |
| if (scope.equals("overrides")) { |
| return true; |
| } |
| break; |
| } |
| return false; |
| } |
| |
| @Override |
| public String getTargetCharLiteralFromANTLRCharLiteral( |
| CodeGenerator generator, |
| String literal) { |
| |
| if (literal.startsWith("'\\u")) { |
| literal = "0x" + literal.substring(3, 7); |
| } else { |
| int c = literal.charAt(1); |
| |
| if (c < 32 || c > 127) { |
| literal = "0x" + Integer.toHexString(c); |
| } |
| } |
| |
| return literal; |
| } |
| |
| /** Convert from an ANTLR string literal found in a grammar file to |
| * an equivalent string literal in the C target. |
| * Because we must support Unicode character sets and have chosen |
| * to have the lexer match UTF32 characters, then we must encode |
| * string matches to use 32 bit character arrays. Here then we |
| * must produce the C array and cater for the case where the |
| * lexer has been encoded with a string such as 'xyz\n', |
| */ |
| @Override |
| public String getTargetStringLiteralFromANTLRStringLiteral( |
| CodeGenerator generator, |
| String literal) { |
| int index; |
| String bytes; |
| StringBuffer buf = new StringBuffer(); |
| |
| buf.append("{ "); |
| |
| // We need ot lose any escaped characters of the form \x and just |
| // replace them with their actual values as well as lose the surrounding |
| // quote marks. |
| // |
| for (int i = 1; i < literal.length() - 1; i++) { |
| buf.append("0x"); |
| |
| if (literal.charAt(i) == '\\') { |
| i++; // Assume that there is a next character, this will just yield |
| // invalid strings if not, which is what the input would be of course - invalid |
| switch (literal.charAt(i)) { |
| case 'u': |
| case 'U': |
| buf.append(literal.substring(i + 1, i + 5)); // Already a hex string |
| i = i + 5; // Move to next string/char/escape |
| break; |
| |
| case 'n': |
| case 'N': |
| |
| buf.append("0A"); |
| break; |
| |
| case 'r': |
| case 'R': |
| |
| buf.append("0D"); |
| break; |
| |
| case 't': |
| case 'T': |
| |
| buf.append("09"); |
| break; |
| |
| case 'b': |
| case 'B': |
| |
| buf.append("08"); |
| break; |
| |
| case 'f': |
| case 'F': |
| |
| buf.append("0C"); |
| break; |
| |
| default: |
| |
| // Anything else is what it is! |
| // |
| buf.append(Integer.toHexString((int) literal.charAt(i)).toUpperCase()); |
| break; |
| } |
| } else { |
| buf.append(Integer.toHexString((int) literal.charAt(i)).toUpperCase()); |
| } |
| buf.append(", "); |
| } |
| buf.append(" ANTLR3_STRING_TERMINATOR}"); |
| |
| bytes = buf.toString(); |
| index = strings.indexOf(bytes); |
| |
| if (index == -1) { |
| strings.add(bytes); |
| index = strings.indexOf(bytes); |
| } |
| |
| String strref = "lit_" + String.valueOf(index + 1); |
| |
| return strref; |
| } |
| |
| /** |
| * Overrides the standard grammar analysis so we can prepare the analyser |
| * a little differently from the other targets. |
| * |
| * In particular we want to influence the way the code generator makes assumptions about |
| * switchs vs ifs, vs table driven DFAs. In general, C code should be generated that |
| * has the minimum use of tables, and tha meximum use of large switch statements. This |
| * allows the optimizers to generate very efficient code, it can reduce object code size |
| * by about 30% and give about a 20% performance improvement over not doing this. Hence, |
| * for the C target only, we change the defaults here, but only if they are still set to the |
| * defaults. |
| * |
| * @param generator An instance of the generic code generator class. |
| * @param grammar The grammar that we are currently analyzing |
| */ |
| @Override |
| protected void performGrammarAnalysis(CodeGenerator generator, Grammar grammar) { |
| |
| // Check to see if the maximum inline DFA states is still set to |
| // the default size. If it is then whack it all the way up to the maximum that |
| // we can sensibly get away with. |
| // |
| if (CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE == CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE ) { |
| |
| CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE = 65535; |
| } |
| |
| // Check to see if the maximum switch size is still set to the default |
| // and bring it up much higher if it is. Modern C compilers can handle |
| // much bigger switch statements than say Java can and if anyone finds a compiler |
| // that cannot deal with such big switches, all the need do is generate the |
| // code with a reduced -Xmaxswitchcaselabels nnn |
| // |
| if (CodeGenerator.MAX_SWITCH_CASE_LABELS == CodeGenerator.MSCL_DEFAULT) { |
| |
| CodeGenerator.MAX_SWITCH_CASE_LABELS = 3000; |
| } |
| |
| // Check to see if the number of transitions considered a miminum for using |
| // a switch is still at the default. Because a switch is still generally faster than |
| // an if even with small sets, and given that the optimizer will do the best thing with it |
| // anyway, then we simply want to generate a switch for any number of states. |
| // |
| if (CodeGenerator.MIN_SWITCH_ALTS == CodeGenerator.MSA_DEFAULT) { |
| |
| CodeGenerator.MIN_SWITCH_ALTS = 1; |
| } |
| |
| // Now we allow the superclass implementation to do whatever it feels it |
| // must do. |
| // |
| super.performGrammarAnalysis(generator, grammar); |
| } |
| } |
| |