runtime/CSharp2/Sources/Antlr3.Runtime/Antlr.Runtime/Lexer.cs - platform/external/antlr - Git at Google

 /*
  * [The "BSD licence"]
  * Copyright (c) 2005-2008 Terence Parr
  * All rights reserved.
  *
  * Conversion to C#:
  * Copyright (c) 2008-2009 Sam Harwell, Pixel Mine, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 namespace Antlr.Runtime {
     using ConditionalAttribute = System.Diagnostics.ConditionalAttribute;

     /** <summary>
      *  A lexer is recognizer that draws input symbols from a character stream.
      *  lexer grammars result in a subclass of this object. A Lexer object
      *  uses simplified match() and error recovery mechanisms in the interest
      *  of speed.
      *  </summary>
      */
     public abstract class Lexer : BaseRecognizer, ITokenSource {
         /** <summary>Where is the lexer drawing characters from?</summary> */
         protected ICharStream input;

         public Lexer() {
         }

         public Lexer(ICharStream input) {
             this.input = input;
         }

         public Lexer(ICharStream input, RecognizerSharedState state)
             : base(state) {
             this.input = input;
         }

         #region Properties
         public string Text {
             /** <summary>Return the text matched so far for the current token or any text override.</summary> */
             get {
                 if (state.text != null) {
                     return state.text;
                 }
                 return input.Substring(state.tokenStartCharIndex, CharIndex - state.tokenStartCharIndex);
             }
             /** <summary>Set the complete text of this token; it wipes any previous changes to the text.</summary> */
             set {
                 state.text = value;
             }
         }
         public int Line {
             get {
                 return input.Line;
             }
             set {
                 input.Line = value;
             }
         }
         public int CharPositionInLine {
             get {
                 return input.CharPositionInLine;
             }
             set {
                 input.CharPositionInLine = value;
             }
         }
         #endregion

         public override void Reset() {
             base.Reset(); // reset all recognizer state variables
             // wack Lexer state variables
             if (input != null) {
                 input.Seek(0); // rewind the input
             }
             if (state == null) {
                 return; // no shared state work to do
             }
             state.token = null;
             state.type = TokenTypes.Invalid;
             state.channel = TokenChannels.Default;
             state.tokenStartCharIndex = -1;
             state.tokenStartCharPositionInLine = -1;
             state.tokenStartLine = -1;
             state.text = null;
         }

         /** <summary>Return a token from this source; i.e., match a token on the char stream.</summary> */
         public virtual IToken NextToken() {
             for (; ; ) {
                 state.token = null;
                 state.channel = TokenChannels.Default;
                 state.tokenStartCharIndex = input.Index;
                 state.tokenStartCharPositionInLine = input.CharPositionInLine;
                 state.tokenStartLine = input.Line;
                 state.text = null;
                 if (input.LA(1) == CharStreamConstants.EndOfFile) {
                     IToken eof = new CommonToken((ICharStream)input, CharStreamConstants.EndOfFile, TokenChannels.Default, input.Index, input.Index);
                     eof.Line = Line;
                     eof.CharPositionInLine = CharPositionInLine;
                     return eof;
                 }
                 try {
                     mTokens();
                     if (state.token == null) {
                         Emit();
                     } else if (state.token == Tokens.Skip) {
                         continue;
                     }
                     return state.token;
                 } catch (NoViableAltException nva) {
                     ReportError(nva);
                     Recover(nva); // throw out current char and try again
                 } catch (RecognitionException re) {
                     ReportError(re);
                     // match() routine has already called recover()
                 }
             }
         }

         /** <summary>
          *  Instruct the lexer to skip creating a token for current lexer rule
          *  and look for another token.  nextToken() knows to keep looking when
          *  a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
          *  if token==null at end of any token rule, it creates one for you
          *  and emits it.
          *  </summary>
          */
         public virtual void Skip() {
             state.token = Tokens.Skip;
         }

         /** <summary>This is the lexer entry point that sets instance var 'token'</summary> */
         public abstract void mTokens();

         public virtual ICharStream CharStream {
             get {
                 return input;
             }
             /** <summary>Set the char stream and reset the lexer</summary> */
             set {
                 input = null;
                 Reset();
                 input = value;
             }
         }

         public override string SourceName {
             get {
                 return input.SourceName;
             }
         }

         /** <summary>
          *  Currently does not support multiple emits per nextToken invocation
          *  for efficiency reasons.  Subclass and override this method and
          *  nextToken (to push tokens into a list and pull from that list rather
          *  than a single variable as this implementation does).
          *  </summary>
          */
         public virtual void Emit(IToken token) {
             state.token = token;
         }

         /** <summary>
          *  The standard method called to automatically emit a token at the
          *  outermost lexical rule.  The token object should point into the
          *  char buffer start..stop.  If there is a text override in 'text',
          *  use that to set the token's text.  Override this method to emit
          *  custom Token objects.
          *  </summary>
          *
          *  <remarks>
          *  If you are building trees, then you should also override
          *  Parser or TreeParser.getMissingSymbol().
          *  </remarks>
          */
         public virtual IToken Emit() {
             IToken t = new CommonToken(input, state.type, state.channel, state.tokenStartCharIndex, CharIndex - 1);
             t.Line = state.tokenStartLine;
             t.Text = state.text;
             t.CharPositionInLine = state.tokenStartCharPositionInLine;
             Emit(t);
             return t;
         }

         public virtual void Match(string s) {
             int i = 0;
             while (i < s.Length) {
                 if (input.LA(1) != s[i]) {
                     if (state.backtracking > 0) {
                         state.failed = true;
                         return;
                     }
                     MismatchedTokenException mte = new MismatchedTokenException(s[i], input, TokenNames);
                     Recover(mte);
                     throw mte;
                 }
                 i++;
                 input.Consume();
                 state.failed = false;
             }
         }

         public virtual void MatchAny() {
             input.Consume();
         }

         public virtual void Match(int c) {
             if (input.LA(1) != c) {
                 if (state.backtracking > 0) {
                     state.failed = true;
                     return;
                 }
                 MismatchedTokenException mte = new MismatchedTokenException(c, input, TokenNames);
                 Recover(mte);  // don't really recover; just consume in lexer
                 throw mte;
             }
             input.Consume();
             state.failed = false;
         }

         public virtual void MatchRange(int a, int b) {
             if (input.LA(1) < a || input.LA(1) > b) {
                 if (state.backtracking > 0) {
                     state.failed = true;
                     return;
                 }
                 MismatchedRangeException mre = new MismatchedRangeException(a, b, input);
                 Recover(mre);
                 throw mre;
             }
             input.Consume();
             state.failed = false;
         }

         /** <summary>What is the index of the current character of lookahead?</summary> */
         public virtual int CharIndex {
             get {
                 return input.Index;
             }
         }

         public override void ReportError(RecognitionException e) {
             /** TODO: not thought about recovery in lexer yet.
              *
             // if we've already reported an error and have not matched a token
             // yet successfully, don't report any errors.
             if ( errorRecovery ) {
                 //System.err.print("[SPURIOUS] ");
                 return;
             }
             errorRecovery = true;
              */

             DisplayRecognitionError(this.TokenNames, e);
         }

         public override string GetErrorMessage(RecognitionException e, string[] tokenNames) {
             string msg = null;
             if (e is MismatchedTokenException) {
                 MismatchedTokenException mte = (MismatchedTokenException)e;
                 msg = "mismatched character " + GetCharErrorDisplay(e.Character) + " expecting " + GetCharErrorDisplay(mte.Expecting);
             } else if (e is NoViableAltException) {
                 NoViableAltException nvae = (NoViableAltException)e;
                 // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
                 // and "(decision="+nvae.decisionNumber+") and
                 // "state "+nvae.stateNumber
                 msg = "no viable alternative at character " + GetCharErrorDisplay(e.Character);
             } else if (e is EarlyExitException) {
                 EarlyExitException eee = (EarlyExitException)e;
                 // for development, can add "(decision="+eee.decisionNumber+")"
                 msg = "required (...)+ loop did not match anything at character " + GetCharErrorDisplay(e.Character);
             } else if (e is MismatchedNotSetException) {
                 MismatchedNotSetException mse = (MismatchedNotSetException)e;
                 msg = "mismatched character " + GetCharErrorDisplay(e.Character) + " expecting set " + mse.Expecting;
             } else if (e is MismatchedSetException) {
                 MismatchedSetException mse = (MismatchedSetException)e;
                 msg = "mismatched character " + GetCharErrorDisplay(e.Character) + " expecting set " + mse.Expecting;
             } else if (e is MismatchedRangeException) {
                 MismatchedRangeException mre = (MismatchedRangeException)e;
                 msg = "mismatched character " + GetCharErrorDisplay(e.Character) + " expecting set " +
                       GetCharErrorDisplay(mre.A) + ".." + GetCharErrorDisplay(mre.B);
             } else {
                 msg = base.GetErrorMessage(e, tokenNames);
             }
             return msg;
         }

         public virtual string GetCharErrorDisplay(int c) {
             string s = ((char)c).ToString();
             switch (c) {
                 case TokenTypes.EndOfFile:
                     s = "<EOF>";
                     break;
                 case '\n':
                     s = "\\n";
                     break;
                 case '\t':
                     s = "\\t";
                     break;
                 case '\r':
                     s = "\\r";
                     break;
             }
             return "'" + s + "'";
         }

         /** <summary>
          *  Lexers can normally match any char in it's vocabulary after matching
          *  a token, so do the easy thing and just kill a character and hope
          *  it all works out.  You can instead use the rule invocation stack
          *  to do sophisticated error recovery if you are in a fragment rule.
          *  </summary>
          */
         public virtual void Recover(RecognitionException re) {
             //System.out.println("consuming char "+(char)input.LA(1)+" during recovery");
             //re.printStackTrace();
             input.Consume();
         }

         [Conditional("ANTLR_TRACE")]
         public virtual void TraceIn(string ruleName, int ruleIndex) {
             string inputSymbol = ((char)input.LT(1)) + " line=" + Line + ":" + CharPositionInLine;
             base.TraceIn(ruleName, ruleIndex, inputSymbol);
         }

         [Conditional("ANTLR_TRACE")]
         public virtual void TraceOut(string ruleName, int ruleIndex) {
             string inputSymbol = ((char)input.LT(1)) + " line=" + Line + ":" + CharPositionInLine;
             base.TraceOut(ruleName, ruleIndex, inputSymbol);
         }
     }
 }
	/*
	* [The "BSD licence"]
	* Copyright (c) 2005-2008 Terence Parr
	* All rights reserved.
	*
	* Conversion to C#:
	* Copyright (c) 2008-2009 Sam Harwell, Pixel Mine, Inc.
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 3. The name of the author may not be used to endorse or promote products
	* derived from this software without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/

	namespace Antlr.Runtime {
	using ConditionalAttribute = System.Diagnostics.ConditionalAttribute;

	/** <summary>
	* A lexer is recognizer that draws input symbols from a character stream.
	* lexer grammars result in a subclass of this object. A Lexer object
	* uses simplified match() and error recovery mechanisms in the interest
	* of speed.
	* </summary>
	*/
	public abstract class Lexer : BaseRecognizer, ITokenSource {
	/** <summary>Where is the lexer drawing characters from?</summary> */
	protected ICharStream input;

	public Lexer() {
	}

	public Lexer(ICharStream input) {
	this.input = input;
	}

	public Lexer(ICharStream input, RecognizerSharedState state)
	: base(state) {
	this.input = input;
	}

	#region Properties
	public string Text {
	/** <summary>Return the text matched so far for the current token or any text override.</summary> */
	get {
	if (state.text != null) {
	return state.text;
	}
	return input.Substring(state.tokenStartCharIndex, CharIndex - state.tokenStartCharIndex);
	}
	/** <summary>Set the complete text of this token; it wipes any previous changes to the text.</summary> */
	set {
	state.text = value;
	}
	}
	public int Line {
	get {
	return input.Line;
	}
	set {
	input.Line = value;
	}
	}
	public int CharPositionInLine {
	get {
	return input.CharPositionInLine;
	}
	set {
	input.CharPositionInLine = value;
	}
	}
	#endregion

	public override void Reset() {
	base.Reset(); // reset all recognizer state variables
	// wack Lexer state variables
	if (input != null) {
	input.Seek(0); // rewind the input
	}
	if (state == null) {
	return; // no shared state work to do
	}
	state.token = null;
	state.type = TokenTypes.Invalid;
	state.channel = TokenChannels.Default;
	state.tokenStartCharIndex = -1;
	state.tokenStartCharPositionInLine = -1;
	state.tokenStartLine = -1;
	state.text = null;
	}

	/** <summary>Return a token from this source; i.e., match a token on the char stream.</summary> */
	public virtual IToken NextToken() {
	for (; ; ) {
	state.token = null;
	state.channel = TokenChannels.Default;
	state.tokenStartCharIndex = input.Index;
	state.tokenStartCharPositionInLine = input.CharPositionInLine;
	state.tokenStartLine = input.Line;
	state.text = null;
	if (input.LA(1) == CharStreamConstants.EndOfFile) {
	IToken eof = new CommonToken((ICharStream)input, CharStreamConstants.EndOfFile, TokenChannels.Default, input.Index, input.Index);
	eof.Line = Line;
	eof.CharPositionInLine = CharPositionInLine;
	return eof;
	}
	try {
	mTokens();
	if (state.token == null) {
	Emit();
	} else if (state.token == Tokens.Skip) {
	continue;
	}
	return state.token;
	} catch (NoViableAltException nva) {
	ReportError(nva);
	Recover(nva); // throw out current char and try again
	} catch (RecognitionException re) {
	ReportError(re);
	// match() routine has already called recover()
	}
	}
	}

	/** <summary>
	* Instruct the lexer to skip creating a token for current lexer rule
	* and look for another token. nextToken() knows to keep looking when
	* a lexer rule finishes with token set to SKIP_TOKEN. Recall that
	* if token==null at end of any token rule, it creates one for you
	* and emits it.
	* </summary>
	*/
	public virtual void Skip() {
	state.token = Tokens.Skip;
	}

	/** <summary>This is the lexer entry point that sets instance var 'token'</summary> */
	public abstract void mTokens();

	public virtual ICharStream CharStream {
	get {
	return input;
	}
	/** <summary>Set the char stream and reset the lexer</summary> */
	set {
	input = null;
	Reset();
	input = value;
	}
	}

	public override string SourceName {
	get {
	return input.SourceName;
	}
	}

	/** <summary>
	* Currently does not support multiple emits per nextToken invocation
	* for efficiency reasons. Subclass and override this method and
	* nextToken (to push tokens into a list and pull from that list rather
	* than a single variable as this implementation does).
	* </summary>
	*/
	public virtual void Emit(IToken token) {
	state.token = token;
	}

	/** <summary>
	* The standard method called to automatically emit a token at the
	* outermost lexical rule. The token object should point into the
	* char buffer start..stop. If there is a text override in 'text',
	* use that to set the token's text. Override this method to emit
	* custom Token objects.
	* </summary>
	*
	* <remarks>
	* If you are building trees, then you should also override
	* Parser or TreeParser.getMissingSymbol().
	* </remarks>
	*/
	public virtual IToken Emit() {
	IToken t = new CommonToken(input, state.type, state.channel, state.tokenStartCharIndex, CharIndex - 1);
	t.Line = state.tokenStartLine;
	t.Text = state.text;
	t.CharPositionInLine = state.tokenStartCharPositionInLine;
	Emit(t);
	return t;
	}

	public virtual void Match(string s) {
	int i = 0;
	while (i < s.Length) {
	if (input.LA(1) != s[i]) {
	if (state.backtracking > 0) {
	state.failed = true;
	return;
	}
	MismatchedTokenException mte = new MismatchedTokenException(s[i], input, TokenNames);
	Recover(mte);
	throw mte;
	}
	i++;
	input.Consume();
	state.failed = false;
	}
	}

	public virtual void MatchAny() {
	input.Consume();
	}

	public virtual void Match(int c) {
	if (input.LA(1) != c) {
	if (state.backtracking > 0) {
	state.failed = true;
	return;
	}
	MismatchedTokenException mte = new MismatchedTokenException(c, input, TokenNames);
	Recover(mte); // don't really recover; just consume in lexer
	throw mte;
	}
	input.Consume();
	state.failed = false;
	}

	public virtual void MatchRange(int a, int b) {
	if (input.LA(1) < a \|\| input.LA(1) > b) {
	if (state.backtracking > 0) {
	state.failed = true;
	return;
	}
	MismatchedRangeException mre = new MismatchedRangeException(a, b, input);
	Recover(mre);
	throw mre;
	}
	input.Consume();
	state.failed = false;
	}

	/** <summary>What is the index of the current character of lookahead?</summary> */
	public virtual int CharIndex {
	get {
	return input.Index;
	}
	}

	public override void ReportError(RecognitionException e) {
	/** TODO: not thought about recovery in lexer yet.
	*
	// if we've already reported an error and have not matched a token
	// yet successfully, don't report any errors.
	if ( errorRecovery ) {
	//System.err.print("[SPURIOUS] ");
	return;
	}
	errorRecovery = true;
	*/

	DisplayRecognitionError(this.TokenNames, e);
	}

	public override string GetErrorMessage(RecognitionException e, string[] tokenNames) {
	string msg = null;
	if (e is MismatchedTokenException) {
	MismatchedTokenException mte = (MismatchedTokenException)e;
	msg = "mismatched character " + GetCharErrorDisplay(e.Character) + " expecting " + GetCharErrorDisplay(mte.Expecting);
	} else if (e is NoViableAltException) {
	NoViableAltException nvae = (NoViableAltException)e;
	// for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
	// and "(decision="+nvae.decisionNumber+") and
	// "state "+nvae.stateNumber
	msg = "no viable alternative at character " + GetCharErrorDisplay(e.Character);
	} else if (e is EarlyExitException) {
	EarlyExitException eee = (EarlyExitException)e;
	// for development, can add "(decision="+eee.decisionNumber+")"
	msg = "required (...)+ loop did not match anything at character " + GetCharErrorDisplay(e.Character);
	} else if (e is MismatchedNotSetException) {
	MismatchedNotSetException mse = (MismatchedNotSetException)e;
	msg = "mismatched character " + GetCharErrorDisplay(e.Character) + " expecting set " + mse.Expecting;
	} else if (e is MismatchedSetException) {
	MismatchedSetException mse = (MismatchedSetException)e;
	msg = "mismatched character " + GetCharErrorDisplay(e.Character) + " expecting set " + mse.Expecting;
	} else if (e is MismatchedRangeException) {
	MismatchedRangeException mre = (MismatchedRangeException)e;
	msg = "mismatched character " + GetCharErrorDisplay(e.Character) + " expecting set " +
	GetCharErrorDisplay(mre.A) + ".." + GetCharErrorDisplay(mre.B);
	} else {
	msg = base.GetErrorMessage(e, tokenNames);
	}
	return msg;
	}

	public virtual string GetCharErrorDisplay(int c) {
	string s = ((char)c).ToString();
	switch (c) {
	case TokenTypes.EndOfFile:
	s = "<EOF>";
	break;
	case '\n':
	s = "\\n";
	break;
	case '\t':
	s = "\\t";
	break;
	case '\r':
	s = "\\r";
	break;
	}
	return "'" + s + "'";
	}

	/** <summary>
	* Lexers can normally match any char in it's vocabulary after matching
	* a token, so do the easy thing and just kill a character and hope
	* it all works out. You can instead use the rule invocation stack
	* to do sophisticated error recovery if you are in a fragment rule.
	* </summary>
	*/
	public virtual void Recover(RecognitionException re) {
	//System.out.println("consuming char "+(char)input.LA(1)+" during recovery");
	//re.printStackTrace();
	input.Consume();
	}

	[Conditional("ANTLR_TRACE")]
	public virtual void TraceIn(string ruleName, int ruleIndex) {
	string inputSymbol = ((char)input.LT(1)) + " line=" + Line + ":" + CharPositionInLine;
	base.TraceIn(ruleName, ruleIndex, inputSymbol);
	}

	[Conditional("ANTLR_TRACE")]
	public virtual void TraceOut(string ruleName, int ruleIndex) {
	string inputSymbol = ((char)input.LT(1)) + " line=" + Line + ":" + CharPositionInLine;
	base.TraceOut(ruleName, ruleIndex, inputSymbol);
	}
	}
	}