antlr-3.4/runtime/CSharp2/Sources/Antlr3.Runtime/Antlr.Runtime/BaseRecognizer.cs - platform/external/antlr - Git at Google

 /*
  * [The "BSD licence"]
  * Copyright (c) 2005-2008 Terence Parr
  * All rights reserved.
  *
  * Conversion to C#:
  * Copyright (c) 2008-2009 Sam Harwell, Pixel Mine, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 namespace Antlr.Runtime {
     using System.Collections.Generic;

     using ArgumentNullException = System.ArgumentNullException;
     using Array = System.Array;
     using Conditional = System.Diagnostics.ConditionalAttribute;
     using Exception = System.Exception;
     using IDebugEventListener = Antlr.Runtime.Debug.IDebugEventListener;
     using MethodBase = System.Reflection.MethodBase;
     using NotSupportedException = System.NotSupportedException;
     using Regex = System.Text.RegularExpressions.Regex;
     using StackFrame = System.Diagnostics.StackFrame;
     using StackTrace = System.Diagnostics.StackTrace;
     using TextWriter = System.IO.TextWriter;
     using Type = System.Type;

     /** <summary>
      *  A generic recognizer that can handle recognizers generated from
      *  lexer, parser, and tree grammars.  This is all the parsing
      *  support code essentially; most of it is error recovery stuff and
      *  backtracking.
      *  </summary>
      */
     public abstract class BaseRecognizer {
         public const int MemoRuleFailed = -2;
         public const int MemoRuleUnknown = -1;
         public const int InitialFollowStackSize = 100;

         // copies from Token object for convenience in actions
         public const int DefaultTokenChannel = TokenChannels.Default;
         public const int Hidden = TokenChannels.Hidden;

         public const string NextTokenRuleName = "nextToken";

         /** <summary>
          *  State of a lexer, parser, or tree parser are collected into a state
          *  object so the state can be shared.  This sharing is needed to
          *  have one grammar import others and share same error variables
          *  and other state variables.  It's a kind of explicit multiple
          *  inheritance via delegation of methods and shared state.
          *  </summary>
          */
         protected internal RecognizerSharedState state;

         public BaseRecognizer()
             : this(new RecognizerSharedState()) {
         }

         public BaseRecognizer(RecognizerSharedState state) {
             if (state == null) {
                 state = new RecognizerSharedState();
             }
             this.state = state;
             InitDFAs();
         }

         public TextWriter TraceDestination {
             get;
             set;
         }

         protected virtual void InitDFAs() {
         }

         /** <summary>reset the parser's state; subclasses must rewinds the input stream</summary> */
         public virtual void Reset() {
             // wack everything related to error recovery
             if (state == null) {
                 return; // no shared state work to do
             }
             state._fsp = -1;
             state.errorRecovery = false;
             state.lastErrorIndex = -1;
             state.failed = false;
             state.syntaxErrors = 0;
             // wack everything related to backtracking and memoization
             state.backtracking = 0;
             for (int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++) { // wipe cache
                 state.ruleMemo[i] = null;
             }
         }


         /** <summary>
          *  Match current input symbol against ttype.  Attempt
          *  single token insertion or deletion error recovery.  If
          *  that fails, throw MismatchedTokenException.
          *  </summary>
          *
          *  <remarks>
          *  To turn off single token insertion or deletion error
          *  recovery, override recoverFromMismatchedToken() and have it
          *  throw an exception. See TreeParser.recoverFromMismatchedToken().
          *  This way any error in a rule will cause an exception and
          *  immediate exit from rule.  Rule would recover by resynchronizing
          *  to the set of symbols that can follow rule ref.
          *  </remarks>
          */
         public virtual object Match(IIntStream input, int ttype, BitSet follow) {
             //System.out.println("match "+((TokenStream)input).LT(1));
             object matchedSymbol = GetCurrentInputSymbol(input);
             if (input.LA(1) == ttype) {
                 input.Consume();
                 state.errorRecovery = false;
                 state.failed = false;
                 return matchedSymbol;
             }
             if (state.backtracking > 0) {
                 state.failed = true;
                 return matchedSymbol;
             }
             matchedSymbol = RecoverFromMismatchedToken(input, ttype, follow);
             return matchedSymbol;
         }

         /** <summary>Match the wildcard: in a symbol</summary> */
         public virtual void MatchAny(IIntStream input) {
             state.errorRecovery = false;
             state.failed = false;
             input.Consume();
         }

         public virtual bool MismatchIsUnwantedToken(IIntStream input, int ttype) {
             return input.LA(2) == ttype;
         }

         public virtual bool MismatchIsMissingToken(IIntStream input, BitSet follow) {
             if (follow == null) {
                 // we have no information about the follow; we can only consume
                 // a single token and hope for the best
                 return false;
             }
             // compute what can follow this grammar element reference
             if (follow.Member(TokenTypes.EndOfRule)) {
                 BitSet viableTokensFollowingThisRule = ComputeContextSensitiveRuleFOLLOW();
                 follow = follow.Or(viableTokensFollowingThisRule);
                 if (state._fsp >= 0) { // remove EOR if we're not the start symbol
                     follow.Remove(TokenTypes.EndOfRule);
                 }
             }
             // if current token is consistent with what could come after set
             // then we know we're missing a token; error recovery is free to
             // "insert" the missing token

             //System.out.println("viable tokens="+follow.toString(getTokenNames()));
             //System.out.println("LT(1)="+((TokenStream)input).LT(1));

             // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
             // in follow set to indicate that the fall of the start symbol is
             // in the set (EOF can follow).
             if (follow.Member(input.LA(1)) || follow.Member(TokenTypes.EndOfRule)) {
                 //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting...");
                 return true;
             }
             return false;
         }

         /** <summary>Report a recognition problem.</summary>
          *
          *  <remarks>
          *  This method sets errorRecovery to indicate the parser is recovering
          *  not parsing.  Once in recovery mode, no errors are generated.
          *  To get out of recovery mode, the parser must successfully match
          *  a token (after a resync).  So it will go:
          *
          * 		1. error occurs
          * 		2. enter recovery mode, report error
          * 		3. consume until token found in resynch set
          * 		4. try to resume parsing
          * 		5. next match() will reset errorRecovery mode
          *
          *  If you override, make sure to update syntaxErrors if you care about that.
          *  </remarks>
          */
         public virtual void ReportError(RecognitionException e) {
             // if we've already reported an error and have not matched a token
             // yet successfully, don't report any errors.
             if (state.errorRecovery) {
                 //System.err.print("[SPURIOUS] ");
                 return;
             }
             state.syntaxErrors++; // don't count spurious
             state.errorRecovery = true;

             DisplayRecognitionError(this.TokenNames, e);
         }

         public virtual void DisplayRecognitionError(string[] tokenNames,
                                             RecognitionException e) {
             string hdr = GetErrorHeader(e);
             string msg = GetErrorMessage(e, tokenNames);
             EmitErrorMessage(hdr + " " + msg);
         }

         /** <summary>What error message should be generated for the various exception types?</summary>
          *
          *  <remarks>
          *  Not very object-oriented code, but I like having all error message
          *  generation within one method rather than spread among all of the
          *  exception classes. This also makes it much easier for the exception
          *  handling because the exception classes do not have to have pointers back
          *  to this object to access utility routines and so on. Also, changing
          *  the message for an exception type would be difficult because you
          *  would have to subclassing exception, but then somehow get ANTLR
          *  to make those kinds of exception objects instead of the default.
          *  This looks weird, but trust me--it makes the most sense in terms
          *  of flexibility.
          *
          *  For grammar debugging, you will want to override this to add
          *  more information such as the stack frame with
          *  getRuleInvocationStack(e, this.getClass().getName()) and,
          *  for no viable alts, the decision description and state etc...
          *
          *  Override this to change the message generated for one or more
          *  exception types.
          *  </remarks>
          */
         public virtual string GetErrorMessage(RecognitionException e, string[] tokenNames) {
             string msg = e.Message;
             if (e is UnwantedTokenException) {
                 UnwantedTokenException ute = (UnwantedTokenException)e;
                 string tokenName = "<unknown>";
                 if (ute.Expecting == TokenTypes.EndOfFile) {
                     tokenName = "EndOfFile";
                 } else {
                     tokenName = tokenNames[ute.Expecting];
                 }
                 msg = "extraneous input " + GetTokenErrorDisplay(ute.UnexpectedToken) +
                     " expecting " + tokenName;
             } else if (e is MissingTokenException) {
                 MissingTokenException mte = (MissingTokenException)e;
                 string tokenName = "<unknown>";
                 if (mte.Expecting == TokenTypes.EndOfFile) {
                     tokenName = "EndOfFile";
                 } else {
                     tokenName = tokenNames[mte.Expecting];
                 }
                 msg = "missing " + tokenName + " at " + GetTokenErrorDisplay(e.Token);
             } else if (e is MismatchedTokenException) {
                 MismatchedTokenException mte = (MismatchedTokenException)e;
                 string tokenName = "<unknown>";
                 if (mte.Expecting == TokenTypes.EndOfFile) {
                     tokenName = "EndOfFile";
                 } else {
                     tokenName = tokenNames[mte.Expecting];
                 }
                 msg = "mismatched input " + GetTokenErrorDisplay(e.Token) +
                     " expecting " + tokenName;
             } else if (e is MismatchedTreeNodeException) {
                 MismatchedTreeNodeException mtne = (MismatchedTreeNodeException)e;
                 string tokenName = "<unknown>";
                 if (mtne.Expecting == TokenTypes.EndOfFile) {
                     tokenName = "EndOfFile";
                 } else {
                     tokenName = tokenNames[mtne.Expecting];
                 }
                 // workaround for a .NET framework bug (NullReferenceException)
                 string nodeText = (mtne.Node != null) ? mtne.Node.ToString() ?? string.Empty : string.Empty;
                 msg = "mismatched tree node: " + nodeText + " expecting " + tokenName;
             } else if (e is NoViableAltException) {
                 //NoViableAltException nvae = (NoViableAltException)e;
                 // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
                 // and "(decision="+nvae.decisionNumber+") and
                 // "state "+nvae.stateNumber
                 msg = "no viable alternative at input " + GetTokenErrorDisplay(e.Token);
             } else if (e is EarlyExitException) {
                 //EarlyExitException eee = (EarlyExitException)e;
                 // for development, can add "(decision="+eee.decisionNumber+")"
                 msg = "required (...)+ loop did not match anything at input " +
                     GetTokenErrorDisplay(e.Token);
             } else if (e is MismatchedSetException) {
                 MismatchedSetException mse = (MismatchedSetException)e;
                 msg = "mismatched input " + GetTokenErrorDisplay(e.Token) +
                     " expecting set " + mse.Expecting;
             } else if (e is MismatchedNotSetException) {
                 MismatchedNotSetException mse = (MismatchedNotSetException)e;
                 msg = "mismatched input " + GetTokenErrorDisplay(e.Token) +
                     " expecting set " + mse.Expecting;
             } else if (e is FailedPredicateException) {
                 FailedPredicateException fpe = (FailedPredicateException)e;
                 msg = "rule " + fpe.RuleName + " failed predicate: {" +
                     fpe.PredicateText + "}?";
             }
             return msg;
         }

         /** <summary>
          *  Get number of recognition errors (lexer, parser, tree parser).  Each
          *  recognizer tracks its own number.  So parser and lexer each have
          *  separate count.  Does not count the spurious errors found between
          *  an error and next valid token match
          *  </summary>
          *
          *  <seealso cref="reportError()"/>
          */
         public virtual int NumberOfSyntaxErrors {
             get {
                 return state.syntaxErrors;
             }
         }

         /** <summary>What is the error header, normally line/character position information?</summary> */
         public virtual string GetErrorHeader(RecognitionException e) {
             return "line " + e.Line + ":" + (e.CharPositionInLine + 1);
         }

         /** <summary>
          *  How should a token be displayed in an error message? The default
          *  is to display just the text, but during development you might
          *  want to have a lot of information spit out.  Override in that case
          *  to use t.ToString() (which, for CommonToken, dumps everything about
          *  the token). This is better than forcing you to override a method in
          *  your token objects because you don't have to go modify your lexer
          *  so that it creates a new Java type.
          *  </summary>
          */
         public virtual string GetTokenErrorDisplay(IToken t) {
             string s = t.Text;
             if (s == null) {
                 if (t.Type == TokenTypes.EndOfFile) {
                     s = "<EOF>";
                 } else {
                     s = "<" + t.Type + ">";
                 }
             }
             s = Regex.Replace(s, "\n", "\\\\n");
             s = Regex.Replace(s, "\r", "\\\\r");
             s = Regex.Replace(s, "\t", "\\\\t");
             return "'" + s + "'";
         }

         /** <summary>Override this method to change where error messages go</summary> */
         public virtual void EmitErrorMessage(string msg) {
             if (TraceDestination != null)
                 TraceDestination.WriteLine(msg);
         }

         /** <summary>
          *  Recover from an error found on the input stream.  This is
          *  for NoViableAlt and mismatched symbol exceptions.  If you enable
          *  single token insertion and deletion, this will usually not
          *  handle mismatched symbol exceptions but there could be a mismatched
          *  token that the match() routine could not recover from.
          *  </summary>
          */
         public virtual void Recover(IIntStream input, RecognitionException re) {
             if (state.lastErrorIndex == input.Index) {
                 // uh oh, another error at same token index; must be a case
                 // where LT(1) is in the recovery token set so nothing is
                 // consumed; consume a single token so at least to prevent
                 // an infinite loop; this is a failsafe.
                 input.Consume();
             }
             state.lastErrorIndex = input.Index;
             BitSet followSet = ComputeErrorRecoverySet();
             BeginResync();
             ConsumeUntil(input, followSet);
             EndResync();
         }

         /** <summary>
          *  A hook to listen in on the token consumption during error recovery.
          *  The DebugParser subclasses this to fire events to the listenter.
          *  </summary>
          */
         public virtual void BeginResync() {
         }

         public virtual void EndResync() {
         }

         /*  Compute the error recovery set for the current rule.  During
          *  rule invocation, the parser pushes the set of tokens that can
          *  follow that rule reference on the stack; this amounts to
          *  computing FIRST of what follows the rule reference in the
          *  enclosing rule. This local follow set only includes tokens
          *  from within the rule; i.e., the FIRST computation done by
          *  ANTLR stops at the end of a rule.
          *
          *  EXAMPLE
          *
          *  When you find a "no viable alt exception", the input is not
          *  consistent with any of the alternatives for rule r.  The best
          *  thing to do is to consume tokens until you see something that
          *  can legally follow a call to r *or* any rule that called r.
          *  You don't want the exact set of viable next tokens because the
          *  input might just be missing a token--you might consume the
          *  rest of the input looking for one of the missing tokens.
          *
          *  Consider grammar:
          *
          *  a : '[' b ']'
          *    | '(' b ')'
          *    ;
          *  b : c '^' INT ;
          *  c : ID
          *    | INT
          *    ;
          *
          *  At each rule invocation, the set of tokens that could follow
          *  that rule is pushed on a stack.  Here are the various "local"
          *  follow sets:
          *
          *  FOLLOW(b1_in_a) = FIRST(']') = ']'
          *  FOLLOW(b2_in_a) = FIRST(')') = ')'
          *  FOLLOW(c_in_b) = FIRST('^') = '^'
          *
          *  Upon erroneous input "[]", the call chain is
          *
          *  a -> b -> c
          *
          *  and, hence, the follow context stack is:
          *
          *  depth  local follow set     after call to rule
          *    0         <EOF>                    a (from main())
          *    1          ']'                     b
          *    3          '^'                     c
          *
          *  Notice that ')' is not included, because b would have to have
          *  been called from a different context in rule a for ')' to be
          *  included.
          *
          *  For error recovery, we cannot consider FOLLOW(c)
          *  (context-sensitive or otherwise).  We need the combined set of
          *  all context-sensitive FOLLOW sets--the set of all tokens that
          *  could follow any reference in the call chain.  We need to
          *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
          *  we resync'd to that token, we'd consume until EOF.  We need to
          *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
          *  In this case, for input "[]", LA(1) is in this set so we would
          *  not consume anything and after printing an error rule c would
          *  return normally.  It would not find the required '^' though.
          *  At this point, it gets a mismatched token error and throws an
          *  exception (since LA(1) is not in the viable following token
          *  set).  The rule exception handler tries to recover, but finds
          *  the same recovery set and doesn't consume anything.  Rule b
          *  exits normally returning to rule a.  Now it finds the ']' (and
          *  with the successful match exits errorRecovery mode).
          *
          *  So, you cna see that the parser walks up call chain looking
          *  for the token that was a member of the recovery set.
          *
          *  Errors are not generated in errorRecovery mode.
          *
          *  ANTLR's error recovery mechanism is based upon original ideas:
          *
          *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
          *
          *  and
          *
          *  "A note on error recovery in recursive descent parsers":
          *  http://portal.acm.org/citation.cfm?id=947902.947905
          *
          *  Later, Josef Grosch had some good ideas:
          *
          *  "Efficient and Comfortable Error Recovery in Recursive Descent
          *  Parsers":
          *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
          *
          *  Like Grosch I implemented local FOLLOW sets that are combined
          *  at run-time upon error to avoid overhead during parsing.
          */
         protected virtual BitSet ComputeErrorRecoverySet() {
             return CombineFollows(false);
         }

         /** <summary>
          *  Compute the context-sensitive FOLLOW set for current rule.
          *  This is set of token types that can follow a specific rule
          *  reference given a specific call chain.  You get the set of
          *  viable tokens that can possibly come next (lookahead depth 1)
          *  given the current call chain.  Contrast this with the
          *  definition of plain FOLLOW for rule r:
          *  </summary>
          *
          *   FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
          *
          *  where x in T* and alpha, beta in V*; T is set of terminals and
          *  V is the set of terminals and nonterminals.  In other words,
          *  FOLLOW(r) is the set of all tokens that can possibly follow
          *  references to r in *any* sentential form (context).  At
          *  runtime, however, we know precisely which context applies as
          *  we have the call chain.  We may compute the exact (rather
          *  than covering superset) set of following tokens.
          *
          *  For example, consider grammar:
          *
          *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
          *       | "return" expr '.'
          *       ;
          *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
          *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
          *       | '(' expr ')'
          *       ;
          *
          *  The FOLLOW sets are all inclusive whereas context-sensitive
          *  FOLLOW sets are precisely what could follow a rule reference.
          *  For input input "i=(3);", here is the derivation:
          *
          *  stat => ID '=' expr ';'
          *       => ID '=' atom ('+' atom)* ';'
          *       => ID '=' '(' expr ')' ('+' atom)* ';'
          *       => ID '=' '(' atom ')' ('+' atom)* ';'
          *       => ID '=' '(' INT ')' ('+' atom)* ';'
          *       => ID '=' '(' INT ')' ';'
          *
          *  At the "3" token, you'd have a call chain of
          *
          *    stat -> expr -> atom -> expr -> atom
          *
          *  What can follow that specific nested ref to atom?  Exactly ')'
          *  as you can see by looking at the derivation of this specific
          *  input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
          *
          *  You want the exact viable token set when recovering from a
          *  token mismatch.  Upon token mismatch, if LA(1) is member of
          *  the viable next token set, then you know there is most likely
          *  a missing token in the input stream.  "Insert" one by just not
          *  throwing an exception.
          */
         protected virtual BitSet ComputeContextSensitiveRuleFOLLOW() {
             return CombineFollows(true);
         }

         // what is exact? it seems to only add sets from above on stack
         // if EOR is in set i.  When it sees a set w/o EOR, it stops adding.
         // Why would we ever want them all?  Maybe no viable alt instead of
         // mismatched token?
         protected virtual BitSet CombineFollows(bool exact) {
             int top = state._fsp;
             BitSet followSet = new BitSet();
             for (int i = top; i >= 0; i--) {
                 BitSet localFollowSet = (BitSet)state.following[i];
                 /*
                 System.out.println("local follow depth "+i+"="+
                                    localFollowSet.toString(getTokenNames())+")");
                  */
                 followSet.OrInPlace(localFollowSet);
                 if (exact) {
                     // can we see end of rule?
                     if (localFollowSet.Member(TokenTypes.EndOfRule)) {
                         // Only leave EOR in set if at top (start rule); this lets
                         // us know if have to include follow(start rule); i.e., EOF
                         if (i > 0) {
                             followSet.Remove(TokenTypes.EndOfRule);
                         }
                     } else { // can't see end of rule, quit
                         break;
                     }
                 }
             }
             return followSet;
         }

         /** <summary>Attempt to recover from a single missing or extra token.</summary>
          *
          *  EXTRA TOKEN
          *
          *  LA(1) is not what we are looking for.  If LA(2) has the right token,
          *  however, then assume LA(1) is some extra spurious token.  Delete it
          *  and LA(2) as if we were doing a normal match(), which advances the
          *  input.
          *
          *  MISSING TOKEN
          *
          *  If current token is consistent with what could come after
          *  ttype then it is ok to "insert" the missing token, else throw
          *  exception For example, Input "i=(3;" is clearly missing the
          *  ')'.  When the parser returns from the nested call to expr, it
          *  will have call chain:
          *
          *    stat -> expr -> atom
          *
          *  and it will be trying to match the ')' at this point in the
          *  derivation:
          *
          *       => ID '=' '(' INT ')' ('+' atom)* ';'
          *                          ^
          *  match() will see that ';' doesn't match ')' and report a
          *  mismatched token error.  To recover, it sees that LA(1)==';'
          *  is in the set of tokens that can follow the ')' token
          *  reference in rule atom.  It can assume that you forgot the ')'.
          */
         protected virtual object RecoverFromMismatchedToken(IIntStream input, int ttype, BitSet follow) {
             RecognitionException e = null;
             // if next token is what we are looking for then "delete" this token
             if (MismatchIsUnwantedToken(input, ttype)) {
                 e = new UnwantedTokenException(ttype, input, TokenNames);
                 /*
                 System.err.println("recoverFromMismatchedToken deleting "+
                                    ((TokenStream)input).LT(1)+
                                    " since "+((TokenStream)input).LT(2)+" is what we want");
                  */
                 BeginResync();
                 input.Consume(); // simply delete extra token
                 EndResync();
                 ReportError(e);  // report after consuming so AW sees the token in the exception
                 // we want to return the token we're actually matching
                 object matchedSymbol = GetCurrentInputSymbol(input);
                 input.Consume(); // move past ttype token as if all were ok
                 return matchedSymbol;
             }
             // can't recover with single token deletion, try insertion
             if (MismatchIsMissingToken(input, follow)) {
                 object inserted = GetMissingSymbol(input, e, ttype, follow);
                 e = new MissingTokenException(ttype, input, inserted);
                 ReportError(e);  // report after inserting so AW sees the token in the exception
                 return inserted;
             }
             // even that didn't work; must throw the exception
             e = new MismatchedTokenException(ttype, input, TokenNames);
             throw e;
         }

         /** Not currently used */
         public virtual object RecoverFromMismatchedSet(IIntStream input,
                                                RecognitionException e,
                                                BitSet follow) {
             if (MismatchIsMissingToken(input, follow)) {
                 // System.out.println("missing token");
                 ReportError(e);
                 // we don't know how to conjure up a token for sets yet
                 return GetMissingSymbol(input, e, TokenTypes.Invalid, follow);
             }
             // TODO do single token deletion like above for Token mismatch
             throw e;
         }

         /** <summary>
          *  Match needs to return the current input symbol, which gets put
          *  into the label for the associated token ref; e.g., x=ID.  Token
          *  and tree parsers need to return different objects. Rather than test
          *  for input stream type or change the IntStream interface, I use
          *  a simple method to ask the recognizer to tell me what the current
          *  input symbol is.
          *  </summary>
          *
          *  <remarks>This is ignored for lexers.</remarks>
          */
         protected virtual object GetCurrentInputSymbol(IIntStream input) {
             return null;
         }

         /** <summary>Conjure up a missing token during error recovery.</summary>
          *
          *  <remarks>
          *  The recognizer attempts to recover from single missing
          *  symbols. But, actions might refer to that missing symbol.
          *  For example, x=ID {f($x);}. The action clearly assumes
          *  that there has been an identifier matched previously and that
          *  $x points at that token. If that token is missing, but
          *  the next token in the stream is what we want we assume that
          *  this token is missing and we keep going. Because we
          *  have to return some token to replace the missing token,
          *  we have to conjure one up. This method gives the user control
          *  over the tokens returned for missing tokens. Mostly,
          *  you will want to create something special for identifier
          *  tokens. For literals such as '{' and ',', the default
          *  action in the parser or tree parser works. It simply creates
          *  a CommonToken of the appropriate type. The text will be the token.
          *  If you change what tokens must be created by the lexer,
          *  override this method to create the appropriate tokens.
          *  </remarks>
          */
         protected virtual object GetMissingSymbol(IIntStream input,
                                           RecognitionException e,
                                           int expectedTokenType,
                                           BitSet follow) {
             return null;
         }

         public virtual void ConsumeUntil(IIntStream input, int tokenType) {
             //System.out.println("consumeUntil "+tokenType);
             int ttype = input.LA(1);
             while (ttype != TokenTypes.EndOfFile && ttype != tokenType) {
                 input.Consume();
                 ttype = input.LA(1);
             }
         }

         /** <summary>Consume tokens until one matches the given token set</summary> */
         public virtual void ConsumeUntil(IIntStream input, BitSet set) {
             //System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
             int ttype = input.LA(1);
             while (ttype != TokenTypes.EndOfFile && !set.Member(ttype)) {
                 //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
                 input.Consume();
                 ttype = input.LA(1);
             }
         }

         /** <summary>Push a rule's follow set using our own hardcoded stack</summary> */
         protected void PushFollow(BitSet fset) {
             if ((state._fsp + 1) >= state.following.Length) {
                 Array.Resize(ref state.following, state.following.Length * 2);
             }
             state.following[++state._fsp] = fset;
         }

         protected void PopFollow() {
             state._fsp--;
         }

         /** <summary>
          *  Return List<String> of the rules in your parser instance
          *  leading up to a call to this method.  You could override if
          *  you want more details such as the file/line info of where
          *  in the parser java code a rule is invoked.
          *  </summary>
          *
          *  <remarks>
          *  This is very useful for error messages and for context-sensitive
          *  error recovery.
          *  </remarks>
          */
         public virtual IList<string> GetRuleInvocationStack() {
             return GetRuleInvocationStack(new StackTrace(true));
         }

         /** <summary>
          *  A more general version of GetRuleInvocationStack where you can
          *  pass in the StackTrace of, for example, a RecognitionException
          *  to get it's rule stack trace.
          *  </summary>
          */
         public static IList<string> GetRuleInvocationStack(StackTrace trace) {
             if (trace == null)
                 throw new ArgumentNullException("trace");

             List<string> rules = new List<string>();
             StackFrame[] stack = trace.GetFrames() ?? new StackFrame[0];

             for (int i = stack.Length - 1; i >= 0; i--) {
                 StackFrame frame = stack[i];
                 MethodBase method = frame.GetMethod();
                 GrammarRuleAttribute[] attributes = (GrammarRuleAttribute[])method.GetCustomAttributes(typeof(GrammarRuleAttribute), true);
                 if (attributes != null && attributes.Length > 0)
                     rules.Add(attributes[0].Name);
             }

             return rules;
         }

         public virtual int BacktrackingLevel {
             get {
                 return state.backtracking;
             }
             set {
                 state.backtracking = value;
             }
         }

         /** <summary>Return whether or not a backtracking attempt failed.</summary> */
         public virtual bool Failed {
             get {
                 return state.failed;
             }
         }

         /** <summary>
          *  Used to print out token names like ID during debugging and
          *  error reporting.  The generated parsers implement a method
          *  that overrides this to point to their String[] tokenNames.
          *  </summary>
          */
         public virtual string[] TokenNames {
             get {
                 return null;
             }
         }

         /** <summary>
          *  For debugging and other purposes, might want the grammar name.
          *  Have ANTLR generate an implementation for this method.
          *  </summary>
          */
         public virtual string GrammarFileName {
             get {
                 return null;
             }
         }

         public abstract string SourceName {
             get;
         }

         /** <summary>
          *  A convenience method for use most often with template rewrites.
          *  Convert a List<Token> to List<String>
          *  </summary>
          */
         public virtual List<string> ToStrings(ICollection<IToken> tokens) {
             if (tokens == null)
                 return null;

             List<string> strings = new List<string>(tokens.Count);
             foreach (IToken token in tokens) {
                 strings.Add(token.Text);
             }

             return strings;
         }

         /** <summary>
          *  Given a rule number and a start token index number, return
          *  MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
          *  start index.  If this rule has parsed input starting from the
          *  start index before, then return where the rule stopped parsing.
          *  It returns the index of the last token matched by the rule.
          *  </summary>
          *
          *  <remarks>
          *  For now we use a hashtable and just the slow Object-based one.
          *  Later, we can make a special one for ints and also one that
          *  tosses out data after we commit past input position i.
          *  </remarks>
          */
         public virtual int GetRuleMemoization(int ruleIndex, int ruleStartIndex) {
             if (state.ruleMemo[ruleIndex] == null) {
                 state.ruleMemo[ruleIndex] = new Dictionary<int, int>();
             }

             int stopIndex;
             if (!state.ruleMemo[ruleIndex].TryGetValue(ruleStartIndex, out stopIndex))
                 return MemoRuleUnknown;

             return stopIndex;
         }

         /** <summary>
          *  Has this rule already parsed input at the current index in the
          *  input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
          *  If we attempted but failed to parse properly before, return
          *  MEMO_RULE_FAILED.
          *  </summary>
          *
          *  <remarks>
          *  This method has a side-effect: if we have seen this input for
          *  this rule and successfully parsed before, then seek ahead to
          *  1 past the stop token matched for this rule last time.
          *  </remarks>
          */
         public virtual bool AlreadyParsedRule(IIntStream input, int ruleIndex) {
             int stopIndex = GetRuleMemoization(ruleIndex, input.Index);
             if (stopIndex == MemoRuleUnknown) {
                 return false;
             }
             if (stopIndex == MemoRuleFailed) {
                 //System.out.println("rule "+ruleIndex+" will never succeed");
                 state.failed = true;
             } else {
                 //System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+state.failed);
                 input.Seek(stopIndex + 1); // jump to one past stop token
             }
             return true;
         }

         /** <summary>
          *  Record whether or not this rule parsed the input at this position
          *  successfully.  Use a standard java hashtable for now.
          *  </summary>
          */
         public virtual void Memoize(IIntStream input,
                             int ruleIndex,
                             int ruleStartIndex) {
             int stopTokenIndex = state.failed ? MemoRuleFailed : input.Index - 1;
             if (state.ruleMemo == null) {
                 if (TraceDestination != null)
                     TraceDestination.WriteLine("!!!!!!!!! memo array is null for " + GrammarFileName);
             }
             if (ruleIndex >= state.ruleMemo.Length) {
                 if (TraceDestination != null)
                     TraceDestination.WriteLine("!!!!!!!!! memo size is " + state.ruleMemo.Length + ", but rule index is " + ruleIndex);
             }
             if (state.ruleMemo[ruleIndex] != null) {
                 state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex;
             }
         }

         /** <summary>return how many rule/input-index pairs there are in total.</summary>
          *  TODO: this includes synpreds. :(
          */
         public virtual int GetRuleMemoizationCacheSize() {
             int n = 0;
             for (int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++) {
                 var ruleMap = state.ruleMemo[i];
                 if (ruleMap != null) {
                     n += ruleMap.Count; // how many input indexes are recorded?
                 }
             }
             return n;
         }

         [Conditional("ANTLR_TRACE")]
         public virtual void TraceIn(string ruleName, int ruleIndex, object inputSymbol) {
             if (TraceDestination == null)
                 return;

             TraceDestination.Write("enter " + ruleName + " " + inputSymbol);
             if (state.backtracking > 0) {
                 TraceDestination.Write(" backtracking=" + state.backtracking);
             }
             TraceDestination.WriteLine();
         }

         [Conditional("ANTLR_TRACE")]
         public virtual void TraceOut(string ruleName, int ruleIndex, object inputSymbol) {
             if (TraceDestination == null)
                 return;

             TraceDestination.Write("exit " + ruleName + " " + inputSymbol);
             if (state.backtracking > 0) {
                 TraceDestination.Write(" backtracking=" + state.backtracking);
                 if (state.failed)
                     TraceDestination.Write(" failed");
                 else
                     TraceDestination.Write(" succeeded");
             }
             TraceDestination.WriteLine();
         }

         #region Debugging support
         public virtual IDebugEventListener DebugListener {
             get {
                 return null;
             }
         }

         [Conditional("ANTLR_DEBUG")]
         protected virtual void DebugEnterRule(string grammarFileName, string ruleName) {
             IDebugEventListener dbg = DebugListener;
             if (dbg != null)
                 dbg.EnterRule(grammarFileName, ruleName);
         }

         [Conditional("ANTLR_DEBUG")]
         protected virtual void DebugExitRule(string grammarFileName, string ruleName) {
             IDebugEventListener dbg = DebugListener;
             if (dbg != null)
                 dbg.ExitRule(grammarFileName, ruleName);
         }

         [Conditional("ANTLR_DEBUG")]
         protected virtual void DebugEnterSubRule(int decisionNumber) {
             IDebugEventListener dbg = DebugListener;
             if (dbg != null)
                 dbg.EnterSubRule(decisionNumber);
         }

         [Conditional("ANTLR_DEBUG")]
         protected virtual void DebugExitSubRule(int decisionNumber) {
             IDebugEventListener dbg = DebugListener;
             if (dbg != null)
                 dbg.ExitSubRule(decisionNumber);
         }

         [Conditional("ANTLR_DEBUG")]
         protected virtual void DebugEnterAlt(int alt) {
             IDebugEventListener dbg = DebugListener;
             if (dbg != null)
                 dbg.EnterAlt(alt);
         }

         [Conditional("ANTLR_DEBUG")]
         protected virtual void DebugEnterDecision(int decisionNumber, bool couldBacktrack) {
             IDebugEventListener dbg = DebugListener;
             if (dbg != null)
                 dbg.EnterDecision(decisionNumber, couldBacktrack);
         }

         [Conditional("ANTLR_DEBUG")]
         protected virtual void DebugExitDecision(int decisionNumber) {
             IDebugEventListener dbg = DebugListener;
             if (dbg != null)
                 dbg.ExitDecision(decisionNumber);
         }

         [Conditional("ANTLR_DEBUG")]
         protected virtual void DebugLocation(int line, int charPositionInLine) {
             IDebugEventListener dbg = DebugListener;
             if (dbg != null)
                 dbg.Location(line, charPositionInLine);
         }

         [Conditional("ANTLR_DEBUG")]
         protected virtual void DebugSemanticPredicate(bool result, string predicate) {
             IDebugEventListener dbg = DebugListener;
             if (dbg != null)
                 dbg.SemanticPredicate(result, predicate);
         }

         [Conditional("ANTLR_DEBUG")]
         protected virtual void DebugBeginBacktrack(int level) {
             IDebugEventListener dbg = DebugListener;
             if (dbg != null)
                 dbg.BeginBacktrack(level);
         }

         [Conditional("ANTLR_DEBUG")]
         protected virtual void DebugEndBacktrack(int level, bool successful) {
             IDebugEventListener dbg = DebugListener;
             if (dbg != null)
                 dbg.EndBacktrack(level, successful);
         }

         [Conditional("ANTLR_DEBUG")]
         protected virtual void DebugRecognitionException(RecognitionException ex) {
             IDebugEventListener dbg = DebugListener;
             if (dbg != null)
                 dbg.RecognitionException(ex);
         }
         #endregion
     }
 }