antlr-3.4/tool/src/main/java/org/antlr/codegen/CTarget.java - platform/external/antlr - Git at Google

 /*
  * [The "BSD license"]
  *  Copyright (c) 2010 Terence Parr
  *  All rights reserved.
  *
  *  Redistribution and use in source and binary forms, with or without
  *  modification, are permitted provided that the following conditions
  *  are met:
  *  1. Redistributions of source code must retain the above copyright
  *      notice, this list of conditions and the following disclaimer.
  *  2. Redistributions in binary form must reproduce the above copyright
  *      notice, this list of conditions and the following disclaimer in the
  *      documentation and/or other materials provided with the distribution.
  *  3. The name of the author may not be used to endorse or promote products
  *      derived from this software without specific prior written permission.
  *
  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 package org.antlr.codegen;

 import org.antlr.Tool;
 import org.stringtemplate.v4.ST;
 import org.antlr.tool.Grammar;

 import java.io.IOException;
 import java.util.ArrayList;

 public class CTarget extends Target {

     ArrayList strings = new ArrayList();

     @Override
     protected void genRecognizerFile(Tool tool,
             CodeGenerator generator,
             Grammar grammar,
             ST outputFileST)
             throws IOException {

         // Before we write this, and cause it to generate its string,
         // we need to add all the string literals that we are going to match
         //
         outputFileST.add("literals", strings);
         String fileName = generator.getRecognizerFileName(grammar.name, grammar.type);
         generator.write(outputFileST, fileName);
     }

     @Override
     protected void genRecognizerHeaderFile(Tool tool,
             CodeGenerator generator,
             Grammar grammar,
             ST headerFileST,
             String extName)
             throws IOException {
         // Pick up the file name we are generating. This method will return a
         // a file suffixed with .c, so we must substring and add the extName
         // to it as we cannot assign into strings in Java.
         ///
         String fileName = generator.getRecognizerFileName(grammar.name, grammar.type);
         fileName = fileName.substring(0, fileName.length() - 2) + extName;

         generator.write(headerFileST, fileName);
     }

     protected ST chooseWhereCyclicDFAsGo(Tool tool,
             CodeGenerator generator,
             Grammar grammar,
             ST recognizerST,
             ST cyclicDFAST) {
         return recognizerST;
     }

     /** Is scope in @scope::name {action} valid for this kind of grammar?
      *  Targets like C++ may want to allow new scopes like headerfile or
      *  some such.  The action names themselves are not policed at the
      *  moment so targets can add template actions w/o having to recompile
      *  ANTLR.
      */
     @Override
     public boolean isValidActionScope(int grammarType, String scope) {
         switch (grammarType) {
             case Grammar.LEXER:
                 if (scope.equals("lexer")) {
                     return true;
                 }
                 if (scope.equals("header")) {
                     return true;
                 }
                 if (scope.equals("includes")) {
                     return true;
                 }
                 if (scope.equals("preincludes")) {
                     return true;
                 }
                 if (scope.equals("overrides")) {
                     return true;
                 }
                 break;
             case Grammar.PARSER:
                 if (scope.equals("parser")) {
                     return true;
                 }
                 if (scope.equals("header")) {
                     return true;
                 }
                 if (scope.equals("includes")) {
                     return true;
                 }
                 if (scope.equals("preincludes")) {
                     return true;
                 }
                 if (scope.equals("overrides")) {
                     return true;
                 }
                 break;
             case Grammar.COMBINED:
                 if (scope.equals("parser")) {
                     return true;
                 }
                 if (scope.equals("lexer")) {
                     return true;
                 }
                 if (scope.equals("header")) {
                     return true;
                 }
                 if (scope.equals("includes")) {
                     return true;
                 }
                 if (scope.equals("preincludes")) {
                     return true;
                 }
                 if (scope.equals("overrides")) {
                     return true;
                 }
                 break;
             case Grammar.TREE_PARSER:
                 if (scope.equals("treeparser")) {
                     return true;
                 }
                 if (scope.equals("header")) {
                     return true;
                 }
                 if (scope.equals("includes")) {
                     return true;
                 }
                 if (scope.equals("preincludes")) {
                     return true;
                 }
                 if (scope.equals("overrides")) {
                     return true;
                 }
                 break;
         }
         return false;
     }

     @Override
     public String getTargetCharLiteralFromANTLRCharLiteral(
             CodeGenerator generator,
             String literal) {

         if (literal.startsWith("'\\u")) {
             literal = "0x" + literal.substring(3, 7);
         } else {
             int c = literal.charAt(1);

             if (c < 32 || c > 127) {
                 literal = "0x" + Integer.toHexString(c);
             }
         }

         return literal;
     }

     /** Convert from an ANTLR string literal found in a grammar file to
      *  an equivalent string literal in the C target.
      *  Because we must support Unicode character sets and have chosen
      *  to have the lexer match UTF32 characters, then we must encode
      *  string matches to use 32 bit character arrays. Here then we
      *  must produce the C array and cater for the case where the
      *  lexer has been encoded with a string such as 'xyz\n',
      */
     @Override
     public String getTargetStringLiteralFromANTLRStringLiteral(
             CodeGenerator generator,
             String literal) {
         int index;
         String bytes;
         StringBuffer buf = new StringBuffer();

         buf.append("{ ");

         // We need ot lose any escaped characters of the form \x and just
         // replace them with their actual values as well as lose the surrounding
         // quote marks.
         //
         for (int i = 1; i < literal.length() - 1; i++) {
             buf.append("0x");

             if (literal.charAt(i) == '\\') {
                 i++; // Assume that there is a next character, this will just yield
                 // invalid strings if not, which is what the input would be of course - invalid
                 switch (literal.charAt(i)) {
                     case 'u':
                     case 'U':
                         buf.append(literal.substring(i + 1, i + 5));  // Already a hex string
                         i = i + 5;                                // Move to next string/char/escape
                         break;

                     case 'n':
                     case 'N':

                         buf.append("0A");
                         break;

                     case 'r':
                     case 'R':

                         buf.append("0D");
                         break;

                     case 't':
                     case 'T':

                         buf.append("09");
                         break;

                     case 'b':
                     case 'B':

                         buf.append("08");
                         break;

                     case 'f':
                     case 'F':

                         buf.append("0C");
                         break;

                     default:

                         // Anything else is what it is!
                         //
                         buf.append(Integer.toHexString((int) literal.charAt(i)).toUpperCase());
                         break;
                 }
             } else {
                 buf.append(Integer.toHexString((int) literal.charAt(i)).toUpperCase());
             }
             buf.append(", ");
         }
         buf.append(" ANTLR3_STRING_TERMINATOR}");

         bytes = buf.toString();
         index = strings.indexOf(bytes);

         if (index == -1) {
             strings.add(bytes);
             index = strings.indexOf(bytes);
         }

         String strref = "lit_" + String.valueOf(index + 1);

         return strref;
     }

     /**
      * Overrides the standard grammar analysis so we can prepare the analyser
      * a little differently from the other targets.
      *
      * In particular we want to influence the way the code generator makes assumptions about
      * switchs vs ifs, vs table driven DFAs. In general, C code should be generated that
      * has the minimum use of tables, and tha meximum use of large switch statements. This
      * allows the optimizers to generate very efficient code, it can reduce object code size
      * by about 30% and give about a 20% performance improvement over not doing this. Hence,
      * for the C target only, we change the defaults here, but only if they are still set to the
      * defaults.
      *
      * @param generator An instance of the generic code generator class.
      * @param grammar The grammar that we are currently analyzing
      */
     @Override
     protected void performGrammarAnalysis(CodeGenerator generator, Grammar grammar) {

         // Check to see if the maximum inline DFA states is still set to
         // the default size. If it is then whack it all the way up to the maximum that
         // we can sensibly get away with.
         //
         if (CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE == CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE ) {

             CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE = 65535;
         }

         // Check to see if the maximum switch size is still set to the default
         // and bring it up much higher if it is. Modern C compilers can handle
         // much bigger switch statements than say Java can and if anyone finds a compiler
         // that cannot deal with such big switches, all the need do is generate the
         // code with a reduced -Xmaxswitchcaselabels nnn
         //
         if  (CodeGenerator.MAX_SWITCH_CASE_LABELS == CodeGenerator.MSCL_DEFAULT) {

             CodeGenerator.MAX_SWITCH_CASE_LABELS = 3000;
         }

         // Check to see if the number of transitions considered a miminum for using
         // a switch is still at the default. Because a switch is still generally faster than
         // an if even with small sets, and given that the optimizer will do the best thing with it
         // anyway, then we simply want to generate a switch for any number of states.
         //
         if (CodeGenerator.MIN_SWITCH_ALTS == CodeGenerator.MSA_DEFAULT) {

             CodeGenerator.MIN_SWITCH_ALTS = 1;
         }

         // Now we allow the superclass implementation to do whatever it feels it
         // must do.
         //
         super.performGrammarAnalysis(generator, grammar);
     }
 }
	/*
	* [The "BSD license"]
	* Copyright (c) 2010 Terence Parr
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 3. The name of the author may not be used to endorse or promote products
	* derived from this software without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/
	package org.antlr.codegen;

	import org.antlr.Tool;
	import org.stringtemplate.v4.ST;
	import org.antlr.tool.Grammar;

	import java.io.IOException;
	import java.util.ArrayList;

	public class CTarget extends Target {

	ArrayList strings = new ArrayList();

	@Override
	protected void genRecognizerFile(Tool tool,
	CodeGenerator generator,
	Grammar grammar,
	ST outputFileST)
	throws IOException {

	// Before we write this, and cause it to generate its string,
	// we need to add all the string literals that we are going to match
	//
	outputFileST.add("literals", strings);
	String fileName = generator.getRecognizerFileName(grammar.name, grammar.type);
	generator.write(outputFileST, fileName);
	}

	@Override
	protected void genRecognizerHeaderFile(Tool tool,
	CodeGenerator generator,
	Grammar grammar,
	ST headerFileST,
	String extName)
	throws IOException {
	// Pick up the file name we are generating. This method will return a
	// a file suffixed with .c, so we must substring and add the extName
	// to it as we cannot assign into strings in Java.
	///
	String fileName = generator.getRecognizerFileName(grammar.name, grammar.type);
	fileName = fileName.substring(0, fileName.length() - 2) + extName;

	generator.write(headerFileST, fileName);
	}

	protected ST chooseWhereCyclicDFAsGo(Tool tool,
	CodeGenerator generator,
	Grammar grammar,
	ST recognizerST,
	ST cyclicDFAST) {
	return recognizerST;
	}

	/** Is scope in @scope::name {action} valid for this kind of grammar?
	* Targets like C++ may want to allow new scopes like headerfile or
	* some such. The action names themselves are not policed at the
	* moment so targets can add template actions w/o having to recompile
	* ANTLR.
	*/
	@Override
	public boolean isValidActionScope(int grammarType, String scope) {
	switch (grammarType) {
	case Grammar.LEXER:
	if (scope.equals("lexer")) {
	return true;
	}
	if (scope.equals("header")) {
	return true;
	}
	if (scope.equals("includes")) {
	return true;
	}
	if (scope.equals("preincludes")) {
	return true;
	}
	if (scope.equals("overrides")) {
	return true;
	}
	break;
	case Grammar.PARSER:
	if (scope.equals("parser")) {
	return true;
	}
	if (scope.equals("header")) {
	return true;
	}
	if (scope.equals("includes")) {
	return true;
	}
	if (scope.equals("preincludes")) {
	return true;
	}
	if (scope.equals("overrides")) {
	return true;
	}
	break;
	case Grammar.COMBINED:
	if (scope.equals("parser")) {
	return true;
	}
	if (scope.equals("lexer")) {
	return true;
	}
	if (scope.equals("header")) {
	return true;
	}
	if (scope.equals("includes")) {
	return true;
	}
	if (scope.equals("preincludes")) {
	return true;
	}
	if (scope.equals("overrides")) {
	return true;
	}
	break;
	case Grammar.TREE_PARSER:
	if (scope.equals("treeparser")) {
	return true;
	}
	if (scope.equals("header")) {
	return true;
	}
	if (scope.equals("includes")) {
	return true;
	}
	if (scope.equals("preincludes")) {
	return true;
	}
	if (scope.equals("overrides")) {
	return true;
	}
	break;
	}
	return false;
	}

	@Override
	public String getTargetCharLiteralFromANTLRCharLiteral(
	CodeGenerator generator,
	String literal) {

	if (literal.startsWith("'\\u")) {
	literal = "0x" + literal.substring(3, 7);
	} else {
	int c = literal.charAt(1);

	if (c < 32 \|\| c > 127) {
	literal = "0x" + Integer.toHexString(c);
	}
	}

	return literal;
	}

	/** Convert from an ANTLR string literal found in a grammar file to
	* an equivalent string literal in the C target.
	* Because we must support Unicode character sets and have chosen
	* to have the lexer match UTF32 characters, then we must encode
	* string matches to use 32 bit character arrays. Here then we
	* must produce the C array and cater for the case where the
	* lexer has been encoded with a string such as 'xyz\n',
	*/
	@Override
	public String getTargetStringLiteralFromANTLRStringLiteral(
	CodeGenerator generator,
	String literal) {
	int index;
	String bytes;
	StringBuffer buf = new StringBuffer();

	buf.append("{ ");

	// We need ot lose any escaped characters of the form \x and just
	// replace them with their actual values as well as lose the surrounding
	// quote marks.
	//
	for (int i = 1; i < literal.length() - 1; i++) {
	buf.append("0x");

	if (literal.charAt(i) == '\\') {
	i++; // Assume that there is a next character, this will just yield
	// invalid strings if not, which is what the input would be of course - invalid
	switch (literal.charAt(i)) {
	case 'u':
	case 'U':
	buf.append(literal.substring(i + 1, i + 5)); // Already a hex string
	i = i + 5; // Move to next string/char/escape
	break;

	case 'n':
	case 'N':

	buf.append("0A");
	break;

	case 'r':
	case 'R':

	buf.append("0D");
	break;

	case 't':
	case 'T':

	buf.append("09");
	break;

	case 'b':
	case 'B':

	buf.append("08");
	break;

	case 'f':
	case 'F':

	buf.append("0C");
	break;

	default:

	// Anything else is what it is!
	//
	buf.append(Integer.toHexString((int) literal.charAt(i)).toUpperCase());
	break;
	}
	} else {
	buf.append(Integer.toHexString((int) literal.charAt(i)).toUpperCase());
	}
	buf.append(", ");
	}
	buf.append(" ANTLR3_STRING_TERMINATOR}");

	bytes = buf.toString();
	index = strings.indexOf(bytes);

	if (index == -1) {
	strings.add(bytes);
	index = strings.indexOf(bytes);
	}

	String strref = "lit_" + String.valueOf(index + 1);

	return strref;
	}

	/**
	* Overrides the standard grammar analysis so we can prepare the analyser
	* a little differently from the other targets.
	*
	* In particular we want to influence the way the code generator makes assumptions about
	* switchs vs ifs, vs table driven DFAs. In general, C code should be generated that
	* has the minimum use of tables, and tha meximum use of large switch statements. This
	* allows the optimizers to generate very efficient code, it can reduce object code size
	* by about 30% and give about a 20% performance improvement over not doing this. Hence,
	* for the C target only, we change the defaults here, but only if they are still set to the
	* defaults.
	*
	* @param generator An instance of the generic code generator class.
	* @param grammar The grammar that we are currently analyzing
	*/
	@Override
	protected void performGrammarAnalysis(CodeGenerator generator, Grammar grammar) {

	// Check to see if the maximum inline DFA states is still set to
	// the default size. If it is then whack it all the way up to the maximum that
	// we can sensibly get away with.
	//
	if (CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE == CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE ) {

	CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE = 65535;
	}

	// Check to see if the maximum switch size is still set to the default
	// and bring it up much higher if it is. Modern C compilers can handle
	// much bigger switch statements than say Java can and if anyone finds a compiler
	// that cannot deal with such big switches, all the need do is generate the
	// code with a reduced -Xmaxswitchcaselabels nnn
	//
	if (CodeGenerator.MAX_SWITCH_CASE_LABELS == CodeGenerator.MSCL_DEFAULT) {

	CodeGenerator.MAX_SWITCH_CASE_LABELS = 3000;
	}

	// Check to see if the number of transitions considered a miminum for using
	// a switch is still at the default. Because a switch is still generally faster than
	// an if even with small sets, and given that the optimizer will do the best thing with it
	// anyway, then we simply want to generate a switch for any number of states.
	//
	if (CodeGenerator.MIN_SWITCH_ALTS == CodeGenerator.MSA_DEFAULT) {

	CodeGenerator.MIN_SWITCH_ALTS = 1;
	}

	// Now we allow the superclass implementation to do whatever it feels it
	// must do.
	//
	super.performGrammarAnalysis(generator, grammar);
	}
	}