tool/src/main/java/org/antlr/tool/GrammarSpelunker.java - platform/external/antlr - Git at Google

 /*
  * [The "BSD license"]
  *  Copyright (c) 2010 Terence Parr
  *  All rights reserved.
  *
  *  Redistribution and use in source and binary forms, with or without
  *  modification, are permitted provided that the following conditions
  *  are met:
  *  1. Redistributions of source code must retain the above copyright
  *      notice, this list of conditions and the following disclaimer.
  *  2. Redistributions in binary form must reproduce the above copyright
  *      notice, this list of conditions and the following disclaimer in the
  *      documentation and/or other materials provided with the distribution.
  *  3. The name of the author may not be used to endorse or promote products
  *      derived from this software without specific prior written permission.
  *
  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 package org.antlr.tool;

 import java.io.*;
 import java.util.ArrayList;
 import java.util.List;

 /** Load a grammar file and scan it just until we learn a few items
  *  of interest.  Currently: name, type, imports, tokenVocab, language option.
  *
  *  GrammarScanner (at bottom of this class) converts grammar to stuff like:
  *
  *   grammar Java ; options { backtrack true memoize true }
  *   import JavaDecl JavaAnnotations JavaExpr ;
  *   ... : ...
  *
  *  First ':' or '@' indicates we can stop looking for imports/options.
  *
  *  Then we just grab interesting grammar properties.
  */
 public class GrammarSpelunker {
     protected String grammarFileName;
     protected String token;
     protected Scanner scanner;

     // grammar info / properties
     protected String grammarModifier;
     protected String grammarName;
     protected String tokenVocab;
     protected String language = "Java"; // default
     protected String inputDirectory;
     protected List<String> importedGrammars;

     public GrammarSpelunker(String inputDirectory, String grammarFileName) {
         this.inputDirectory = inputDirectory;
         this.grammarFileName = grammarFileName;
     }

     void consume() throws IOException { token = scanner.nextToken(); }

     protected void match(String expecting) throws IOException {
         //System.out.println("match "+expecting+"; is "+token);
         if ( token.equals(expecting) ) consume();
         else throw new Error("Error parsing "+grammarFileName+": '"+token+
                              "' not expected '"+expecting+"'");
     }

     public void parse() throws IOException {
         Reader r = new FileReader((inputDirectory != null ? inputDirectory + File.separator : "") + grammarFileName);
         BufferedReader br = new BufferedReader(r);
         try {
             scanner = new Scanner(br);
             consume();
             grammarHeader();
             // scan until imports or options
             while ( token!=null && !token.equals("@") && !token.equals(":") &&
                     !token.equals("import") && !token.equals("options") )
             {
                 consume();
             }
             if ( token.equals("options") ) options();
             // scan until options or first rule
             while ( token!=null && !token.equals("@") && !token.equals(":") &&
                     !token.equals("import") )
             {
                 consume();
             }
             if ( token.equals("import") ) imports();
             // ignore rest of input; close up shop
         }
         finally {
             if ( br!=null ) br.close();
         }
     }

     protected void grammarHeader() throws IOException {
         if ( token==null ) return;
         if ( token.equals("tree") || token.equals("parser") || token.equals("lexer") ) {
             grammarModifier=token;
             consume();
         }
         match("grammar");
         grammarName = token;
         consume(); // move beyond name
     }

     // looks like "options { backtrack true ; tokenVocab MyTokens ; }"
     protected void options() throws IOException {
         match("options");
         match("{");
         while ( token!=null && !token.equals("}") ) {
             String name = token;
             consume();
             String value = token;
             consume();
             match(";");
             if ( name.equals("tokenVocab") ) tokenVocab = value;
             if ( name.equals("language") ) language = value;
         }
         match("}");
     }

     // looks like "import JavaDecl JavaAnnotations JavaExpr ;"
     protected void imports() throws IOException {
         match("import");
         importedGrammars = new ArrayList<String>();
         while ( token!=null && !token.equals(";") ) {
             importedGrammars.add(token);
             consume();
         }
         match(";");
         if ( importedGrammars.size()==0 ) importedGrammars = null;
     }

     public String getGrammarModifier() { return grammarModifier; }

     public String getGrammarName() { return grammarName; }

     public String getTokenVocab() { return tokenVocab; }

     public String getLanguage() { return language; }

     public List<String> getImportedGrammars() { return importedGrammars; }

     /** Strip comments and then return stream of words and
      *  tokens {';', ':', '{', '}'}
      */
     public static class Scanner {
         public static final int EOF = -1;
         Reader input;
         int c;

         public Scanner(Reader input) throws IOException {
             this.input = input;
             consume();
         }

         boolean isDIGIT() { return c>='0'&&c<='9'; }
         boolean isID_START() { return c>='a'&&c<='z' || c>='A'&&c<='Z'; }
         boolean isID_LETTER() { return isID_START() || c>='0'&&c<='9' || c=='_'; }

         void consume() throws IOException { c = input.read(); }

         public String nextToken() throws IOException {
             while ( c!=EOF ) {
                 //System.out.println("check "+(char)c);
                 switch ( c ) {
                     case ';' : consume(); return ";";
                     case '{' : consume(); return "{";
                     case '}' : consume(); return "}";
                     case ':' : consume(); return ":";
                     case '@' : consume(); return "@";
                     case '/' : COMMENT(); break;
                     case '\'': return STRING();
                     default:
                         if ( isID_START() ) return ID();
                         else if ( isDIGIT() ) return INT();
                         consume(); // ignore anything else
                 }
             }
             return null;
         }

         /** NAME : LETTER+ ; // NAME is sequence of >=1 letter */
         String ID() throws IOException {
             StringBuffer buf = new StringBuffer();
             while ( c!=EOF && isID_LETTER() ) { buf.append((char)c); consume(); }
             return buf.toString();
         }

         String INT() throws IOException {
             StringBuffer buf = new StringBuffer();
             while ( c!=EOF && isDIGIT() ) { buf.append((char)c); consume(); }
             return buf.toString();
         }

         String STRING() throws IOException {
             StringBuffer buf = new StringBuffer();
             consume();
             while ( c!=EOF && c!='\'' ) {
                 if ( c=='\\' ) {
                     buf.append((char)c);
                     consume();
                 }
                 buf.append((char)c);
                 consume();
             }
             consume(); // scan past '
             return buf.toString();
         }

         void COMMENT() throws IOException {
             if ( c=='/' ) {
                 consume();
                 if ( c=='*' ) {
                     consume();
         scarf:
                     while ( true ) {
                         if ( c=='*' ) {
                             consume();
                             if ( c=='/' ) { consume(); break scarf; }
                         }
                         else {
                             while ( c!=EOF && c!='*' ) consume();
                         }
                     }
                 }
                 else if ( c=='/' ) {
                     while ( c!=EOF && c!='\n' ) consume();
                 }
             }
         }
     }

     /** Tester; Give grammar filename as arg */
     public static void main(String[] args) throws IOException {
         GrammarSpelunker g = new GrammarSpelunker(".", args[0]);
         g.parse();
         System.out.println(g.grammarModifier+" grammar "+g.grammarName);
         System.out.println("language="+g.language);
         System.out.println("tokenVocab="+g.tokenVocab);
         System.out.println("imports="+g.importedGrammars);
     }
 }
	/*
	* [The "BSD license"]
	* Copyright (c) 2010 Terence Parr
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 3. The name of the author may not be used to endorse or promote products
	* derived from this software without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/
	package org.antlr.tool;

	import java.io.*;
	import java.util.ArrayList;
	import java.util.List;

	/** Load a grammar file and scan it just until we learn a few items
	* of interest. Currently: name, type, imports, tokenVocab, language option.
	*
	* GrammarScanner (at bottom of this class) converts grammar to stuff like:
	*
	* grammar Java ; options { backtrack true memoize true }
	* import JavaDecl JavaAnnotations JavaExpr ;
	* ... : ...
	*
	* First ':' or '@' indicates we can stop looking for imports/options.
	*
	* Then we just grab interesting grammar properties.
	*/
	public class GrammarSpelunker {
	protected String grammarFileName;
	protected String token;
	protected Scanner scanner;

	// grammar info / properties
	protected String grammarModifier;
	protected String grammarName;
	protected String tokenVocab;
	protected String language = "Java"; // default
	protected String inputDirectory;
	protected List<String> importedGrammars;

	public GrammarSpelunker(String inputDirectory, String grammarFileName) {
	this.inputDirectory = inputDirectory;
	this.grammarFileName = grammarFileName;
	}

	void consume() throws IOException { token = scanner.nextToken(); }

	protected void match(String expecting) throws IOException {
	//System.out.println("match "+expecting+"; is "+token);
	if ( token.equals(expecting) ) consume();
	else throw new Error("Error parsing "+grammarFileName+": '"+token+
	"' not expected '"+expecting+"'");
	}

	public void parse() throws IOException {
	Reader r = new FileReader((inputDirectory != null ? inputDirectory + File.separator : "") + grammarFileName);
	BufferedReader br = new BufferedReader(r);
	try {
	scanner = new Scanner(br);
	consume();
	grammarHeader();
	// scan until imports or options
	while ( token!=null && !token.equals("@") && !token.equals(":") &&
	!token.equals("import") && !token.equals("options") )
	{
	consume();
	}
	if ( token.equals("options") ) options();
	// scan until options or first rule
	while ( token!=null && !token.equals("@") && !token.equals(":") &&
	!token.equals("import") )
	{
	consume();
	}
	if ( token.equals("import") ) imports();
	// ignore rest of input; close up shop
	}
	finally {
	if ( br!=null ) br.close();
	}
	}

	protected void grammarHeader() throws IOException {
	if ( token==null ) return;
	if ( token.equals("tree") \|\| token.equals("parser") \|\| token.equals("lexer") ) {
	grammarModifier=token;
	consume();
	}
	match("grammar");
	grammarName = token;
	consume(); // move beyond name
	}

	// looks like "options { backtrack true ; tokenVocab MyTokens ; }"
	protected void options() throws IOException {
	match("options");
	match("{");
	while ( token!=null && !token.equals("}") ) {
	String name = token;
	consume();
	String value = token;
	consume();
	match(";");
	if ( name.equals("tokenVocab") ) tokenVocab = value;
	if ( name.equals("language") ) language = value;
	}
	match("}");
	}

	// looks like "import JavaDecl JavaAnnotations JavaExpr ;"
	protected void imports() throws IOException {
	match("import");
	importedGrammars = new ArrayList<String>();
	while ( token!=null && !token.equals(";") ) {
	importedGrammars.add(token);
	consume();
	}
	match(";");
	if ( importedGrammars.size()==0 ) importedGrammars = null;
	}

	public String getGrammarModifier() { return grammarModifier; }

	public String getGrammarName() { return grammarName; }

	public String getTokenVocab() { return tokenVocab; }

	public String getLanguage() { return language; }

	public List<String> getImportedGrammars() { return importedGrammars; }

	/** Strip comments and then return stream of words and
	* tokens {';', ':', '{', '}'}
	*/
	public static class Scanner {
	public static final int EOF = -1;
	Reader input;
	int c;

	public Scanner(Reader input) throws IOException {
	this.input = input;
	consume();
	}

	boolean isDIGIT() { return c>='0'&&c<='9'; }
	boolean isID_START() { return c>='a'&&c<='z' \|\| c>='A'&&c<='Z'; }
	boolean isID_LETTER() { return isID_START() \|\| c>='0'&&c<='9' \|\| c=='_'; }

	void consume() throws IOException { c = input.read(); }

	public String nextToken() throws IOException {
	while ( c!=EOF ) {
	//System.out.println("check "+(char)c);
	switch ( c ) {
	case ';' : consume(); return ";";
	case '{' : consume(); return "{";
	case '}' : consume(); return "}";
	case ':' : consume(); return ":";
	case '@' : consume(); return "@";
	case '/' : COMMENT(); break;
	case '\'': return STRING();
	default:
	if ( isID_START() ) return ID();
	else if ( isDIGIT() ) return INT();
	consume(); // ignore anything else
	}
	}
	return null;
	}

	/** NAME : LETTER+ ; // NAME is sequence of >=1 letter */
	String ID() throws IOException {
	StringBuffer buf = new StringBuffer();
	while ( c!=EOF && isID_LETTER() ) { buf.append((char)c); consume(); }
	return buf.toString();
	}

	String INT() throws IOException {
	StringBuffer buf = new StringBuffer();
	while ( c!=EOF && isDIGIT() ) { buf.append((char)c); consume(); }
	return buf.toString();
	}

	String STRING() throws IOException {
	StringBuffer buf = new StringBuffer();
	consume();
	while ( c!=EOF && c!='\'' ) {
	if ( c=='\\' ) {
	buf.append((char)c);
	consume();
	}
	buf.append((char)c);
	consume();
	}
	consume(); // scan past '
	return buf.toString();
	}

	void COMMENT() throws IOException {
	if ( c=='/' ) {
	consume();
	if ( c=='*' ) {
	consume();
	scarf:
	while ( true ) {
	if ( c=='*' ) {
	consume();
	if ( c=='/' ) { consume(); break scarf; }
	}
	else {
	while ( c!=EOF && c!='*' ) consume();
	}
	}
	}
	else if ( c=='/' ) {
	while ( c!=EOF && c!='\n' ) consume();
	}
	}
	}
	}

	/** Tester; Give grammar filename as arg */
	public static void main(String[] args) throws IOException {
	GrammarSpelunker g = new GrammarSpelunker(".", args[0]);
	g.parse();
	System.out.println(g.grammarModifier+" grammar "+g.grammarName);
	System.out.println("language="+g.language);
	System.out.println("tokenVocab="+g.tokenVocab);
	System.out.println("imports="+g.importedGrammars);
	}
	}