blob: 51911c3aa627906b202e5db9af72587ff59c95d9 [file] [log] [blame]
/*
* [The "BSD license"]
* Copyright (c) 2010 Terence Parr
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.codegen;
import org.antlr.Tool;
import org.stringtemplate.v4.ST;
import org.antlr.tool.Grammar;
import java.io.IOException;
import java.util.ArrayList;
public class CTarget extends Target {
ArrayList strings = new ArrayList();
@Override
protected void genRecognizerFile(Tool tool,
CodeGenerator generator,
Grammar grammar,
ST outputFileST)
throws IOException {
// Before we write this, and cause it to generate its string,
// we need to add all the string literals that we are going to match
//
outputFileST.add("literals", strings);
String fileName = generator.getRecognizerFileName(grammar.name, grammar.type);
generator.write(outputFileST, fileName);
}
@Override
protected void genRecognizerHeaderFile(Tool tool,
CodeGenerator generator,
Grammar grammar,
ST headerFileST,
String extName)
throws IOException {
// Pick up the file name we are generating. This method will return a
// a file suffixed with .c, so we must substring and add the extName
// to it as we cannot assign into strings in Java.
///
String fileName = generator.getRecognizerFileName(grammar.name, grammar.type);
fileName = fileName.substring(0, fileName.length() - 2) + extName;
generator.write(headerFileST, fileName);
}
protected ST chooseWhereCyclicDFAsGo(Tool tool,
CodeGenerator generator,
Grammar grammar,
ST recognizerST,
ST cyclicDFAST) {
return recognizerST;
}
/** Is scope in @scope::name {action} valid for this kind of grammar?
* Targets like C++ may want to allow new scopes like headerfile or
* some such. The action names themselves are not policed at the
* moment so targets can add template actions w/o having to recompile
* ANTLR.
*/
@Override
public boolean isValidActionScope(int grammarType, String scope) {
switch (grammarType) {
case Grammar.LEXER:
if (scope.equals("lexer")) {
return true;
}
if (scope.equals("header")) {
return true;
}
if (scope.equals("includes")) {
return true;
}
if (scope.equals("preincludes")) {
return true;
}
if (scope.equals("overrides")) {
return true;
}
break;
case Grammar.PARSER:
if (scope.equals("parser")) {
return true;
}
if (scope.equals("header")) {
return true;
}
if (scope.equals("includes")) {
return true;
}
if (scope.equals("preincludes")) {
return true;
}
if (scope.equals("overrides")) {
return true;
}
break;
case Grammar.COMBINED:
if (scope.equals("parser")) {
return true;
}
if (scope.equals("lexer")) {
return true;
}
if (scope.equals("header")) {
return true;
}
if (scope.equals("includes")) {
return true;
}
if (scope.equals("preincludes")) {
return true;
}
if (scope.equals("overrides")) {
return true;
}
break;
case Grammar.TREE_PARSER:
if (scope.equals("treeparser")) {
return true;
}
if (scope.equals("header")) {
return true;
}
if (scope.equals("includes")) {
return true;
}
if (scope.equals("preincludes")) {
return true;
}
if (scope.equals("overrides")) {
return true;
}
break;
}
return false;
}
@Override
public String getTargetCharLiteralFromANTLRCharLiteral(
CodeGenerator generator,
String literal) {
if (literal.startsWith("'\\u")) {
literal = "0x" + literal.substring(3, 7);
} else {
int c = literal.charAt(1);
if (c < 32 || c > 127) {
literal = "0x" + Integer.toHexString(c);
}
}
return literal;
}
/** Convert from an ANTLR string literal found in a grammar file to
* an equivalent string literal in the C target.
* Because we must support Unicode character sets and have chosen
* to have the lexer match UTF32 characters, then we must encode
* string matches to use 32 bit character arrays. Here then we
* must produce the C array and cater for the case where the
* lexer has been encoded with a string such as 'xyz\n',
*/
@Override
public String getTargetStringLiteralFromANTLRStringLiteral(
CodeGenerator generator,
String literal) {
int index;
String bytes;
StringBuffer buf = new StringBuffer();
buf.append("{ ");
// We need ot lose any escaped characters of the form \x and just
// replace them with their actual values as well as lose the surrounding
// quote marks.
//
for (int i = 1; i < literal.length() - 1; i++) {
buf.append("0x");
if (literal.charAt(i) == '\\') {
i++; // Assume that there is a next character, this will just yield
// invalid strings if not, which is what the input would be of course - invalid
switch (literal.charAt(i)) {
case 'u':
case 'U':
buf.append(literal.substring(i + 1, i + 5)); // Already a hex string
i = i + 5; // Move to next string/char/escape
break;
case 'n':
case 'N':
buf.append("0A");
break;
case 'r':
case 'R':
buf.append("0D");
break;
case 't':
case 'T':
buf.append("09");
break;
case 'b':
case 'B':
buf.append("08");
break;
case 'f':
case 'F':
buf.append("0C");
break;
default:
// Anything else is what it is!
//
buf.append(Integer.toHexString((int) literal.charAt(i)).toUpperCase());
break;
}
} else {
buf.append(Integer.toHexString((int) literal.charAt(i)).toUpperCase());
}
buf.append(", ");
}
buf.append(" ANTLR3_STRING_TERMINATOR}");
bytes = buf.toString();
index = strings.indexOf(bytes);
if (index == -1) {
strings.add(bytes);
index = strings.indexOf(bytes);
}
String strref = "lit_" + String.valueOf(index + 1);
return strref;
}
/**
* Overrides the standard grammar analysis so we can prepare the analyser
* a little differently from the other targets.
*
* In particular we want to influence the way the code generator makes assumptions about
* switchs vs ifs, vs table driven DFAs. In general, C code should be generated that
* has the minimum use of tables, and tha meximum use of large switch statements. This
* allows the optimizers to generate very efficient code, it can reduce object code size
* by about 30% and give about a 20% performance improvement over not doing this. Hence,
* for the C target only, we change the defaults here, but only if they are still set to the
* defaults.
*
* @param generator An instance of the generic code generator class.
* @param grammar The grammar that we are currently analyzing
*/
@Override
protected void performGrammarAnalysis(CodeGenerator generator, Grammar grammar) {
// Check to see if the maximum inline DFA states is still set to
// the default size. If it is then whack it all the way up to the maximum that
// we can sensibly get away with.
//
if (CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE == CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE ) {
CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE = 65535;
}
// Check to see if the maximum switch size is still set to the default
// and bring it up much higher if it is. Modern C compilers can handle
// much bigger switch statements than say Java can and if anyone finds a compiler
// that cannot deal with such big switches, all the need do is generate the
// code with a reduced -Xmaxswitchcaselabels nnn
//
if (CodeGenerator.MAX_SWITCH_CASE_LABELS == CodeGenerator.MSCL_DEFAULT) {
CodeGenerator.MAX_SWITCH_CASE_LABELS = 3000;
}
// Check to see if the number of transitions considered a miminum for using
// a switch is still at the default. Because a switch is still generally faster than
// an if even with small sets, and given that the optimizer will do the best thing with it
// anyway, then we simply want to generate a switch for any number of states.
//
if (CodeGenerator.MIN_SWITCH_ALTS == CodeGenerator.MSA_DEFAULT) {
CodeGenerator.MIN_SWITCH_ALTS = 1;
}
// Now we allow the superclass implementation to do whatever it feels it
// must do.
//
super.performGrammarAnalysis(generator, grammar);
}
}