blob: 3d71c237afeac924297e64f4c433a29137c09257 [file] [log] [blame]
/*
* [The "BSD licence"]
* Copyright (c) 2005-2008 Terence Parr
* All rights reserved.
*
* Conversion to C#:
* Copyright (c) 2008-2009 Sam Harwell, Pixel Mine, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
namespace Antlr.Runtime {
using System.Collections.Generic;
using ArgumentNullException = System.ArgumentNullException;
using Array = System.Array;
using Conditional = System.Diagnostics.ConditionalAttribute;
using Exception = System.Exception;
using IDebugEventListener = Antlr.Runtime.Debug.IDebugEventListener;
using MethodBase = System.Reflection.MethodBase;
using NotSupportedException = System.NotSupportedException;
using Regex = System.Text.RegularExpressions.Regex;
using StackFrame = System.Diagnostics.StackFrame;
using StackTrace = System.Diagnostics.StackTrace;
using TextWriter = System.IO.TextWriter;
using Type = System.Type;
/** <summary>
* A generic recognizer that can handle recognizers generated from
* lexer, parser, and tree grammars. This is all the parsing
* support code essentially; most of it is error recovery stuff and
* backtracking.
* </summary>
*/
public abstract class BaseRecognizer {
public const int MemoRuleFailed = -2;
public const int MemoRuleUnknown = -1;
public const int InitialFollowStackSize = 100;
// copies from Token object for convenience in actions
public const int DefaultTokenChannel = TokenChannels.Default;
public const int Hidden = TokenChannels.Hidden;
public const string NextTokenRuleName = "nextToken";
/** <summary>
* State of a lexer, parser, or tree parser are collected into a state
* object so the state can be shared. This sharing is needed to
* have one grammar import others and share same error variables
* and other state variables. It's a kind of explicit multiple
* inheritance via delegation of methods and shared state.
* </summary>
*/
protected internal RecognizerSharedState state;
public BaseRecognizer()
: this(new RecognizerSharedState()) {
}
public BaseRecognizer(RecognizerSharedState state) {
if (state == null) {
state = new RecognizerSharedState();
}
this.state = state;
InitDFAs();
}
public TextWriter TraceDestination {
get;
set;
}
protected virtual void InitDFAs() {
}
/** <summary>reset the parser's state; subclasses must rewinds the input stream</summary> */
public virtual void Reset() {
// wack everything related to error recovery
if (state == null) {
return; // no shared state work to do
}
state._fsp = -1;
state.errorRecovery = false;
state.lastErrorIndex = -1;
state.failed = false;
state.syntaxErrors = 0;
// wack everything related to backtracking and memoization
state.backtracking = 0;
for (int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++) { // wipe cache
state.ruleMemo[i] = null;
}
}
/** <summary>
* Match current input symbol against ttype. Attempt
* single token insertion or deletion error recovery. If
* that fails, throw MismatchedTokenException.
* </summary>
*
* <remarks>
* To turn off single token insertion or deletion error
* recovery, override recoverFromMismatchedToken() and have it
* throw an exception. See TreeParser.recoverFromMismatchedToken().
* This way any error in a rule will cause an exception and
* immediate exit from rule. Rule would recover by resynchronizing
* to the set of symbols that can follow rule ref.
* </remarks>
*/
public virtual object Match(IIntStream input, int ttype, BitSet follow) {
//System.out.println("match "+((TokenStream)input).LT(1));
object matchedSymbol = GetCurrentInputSymbol(input);
if (input.LA(1) == ttype) {
input.Consume();
state.errorRecovery = false;
state.failed = false;
return matchedSymbol;
}
if (state.backtracking > 0) {
state.failed = true;
return matchedSymbol;
}
matchedSymbol = RecoverFromMismatchedToken(input, ttype, follow);
return matchedSymbol;
}
/** <summary>Match the wildcard: in a symbol</summary> */
public virtual void MatchAny(IIntStream input) {
state.errorRecovery = false;
state.failed = false;
input.Consume();
}
public virtual bool MismatchIsUnwantedToken(IIntStream input, int ttype) {
return input.LA(2) == ttype;
}
public virtual bool MismatchIsMissingToken(IIntStream input, BitSet follow) {
if (follow == null) {
// we have no information about the follow; we can only consume
// a single token and hope for the best
return false;
}
// compute what can follow this grammar element reference
if (follow.Member(TokenTypes.EndOfRule)) {
BitSet viableTokensFollowingThisRule = ComputeContextSensitiveRuleFOLLOW();
follow = follow.Or(viableTokensFollowingThisRule);
if (state._fsp >= 0) { // remove EOR if we're not the start symbol
follow.Remove(TokenTypes.EndOfRule);
}
}
// if current token is consistent with what could come after set
// then we know we're missing a token; error recovery is free to
// "insert" the missing token
//System.out.println("viable tokens="+follow.toString(getTokenNames()));
//System.out.println("LT(1)="+((TokenStream)input).LT(1));
// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
// in follow set to indicate that the fall of the start symbol is
// in the set (EOF can follow).
if (follow.Member(input.LA(1)) || follow.Member(TokenTypes.EndOfRule)) {
//System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting...");
return true;
}
return false;
}
/** <summary>Report a recognition problem.</summary>
*
* <remarks>
* This method sets errorRecovery to indicate the parser is recovering
* not parsing. Once in recovery mode, no errors are generated.
* To get out of recovery mode, the parser must successfully match
* a token (after a resync). So it will go:
*
* 1. error occurs
* 2. enter recovery mode, report error
* 3. consume until token found in resynch set
* 4. try to resume parsing
* 5. next match() will reset errorRecovery mode
*
* If you override, make sure to update syntaxErrors if you care about that.
* </remarks>
*/
public virtual void ReportError(RecognitionException e) {
// if we've already reported an error and have not matched a token
// yet successfully, don't report any errors.
if (state.errorRecovery) {
//System.err.print("[SPURIOUS] ");
return;
}
state.syntaxErrors++; // don't count spurious
state.errorRecovery = true;
DisplayRecognitionError(this.TokenNames, e);
}
public virtual void DisplayRecognitionError(string[] tokenNames,
RecognitionException e) {
string hdr = GetErrorHeader(e);
string msg = GetErrorMessage(e, tokenNames);
EmitErrorMessage(hdr + " " + msg);
}
/** <summary>What error message should be generated for the various exception types?</summary>
*
* <remarks>
* Not very object-oriented code, but I like having all error message
* generation within one method rather than spread among all of the
* exception classes. This also makes it much easier for the exception
* handling because the exception classes do not have to have pointers back
* to this object to access utility routines and so on. Also, changing
* the message for an exception type would be difficult because you
* would have to subclassing exception, but then somehow get ANTLR
* to make those kinds of exception objects instead of the default.
* This looks weird, but trust me--it makes the most sense in terms
* of flexibility.
*
* For grammar debugging, you will want to override this to add
* more information such as the stack frame with
* getRuleInvocationStack(e, this.getClass().getName()) and,
* for no viable alts, the decision description and state etc...
*
* Override this to change the message generated for one or more
* exception types.
* </remarks>
*/
public virtual string GetErrorMessage(RecognitionException e, string[] tokenNames) {
string msg = e.Message;
if (e is UnwantedTokenException) {
UnwantedTokenException ute = (UnwantedTokenException)e;
string tokenName = "<unknown>";
if (ute.Expecting == TokenTypes.EndOfFile) {
tokenName = "EndOfFile";
} else {
tokenName = tokenNames[ute.Expecting];
}
msg = "extraneous input " + GetTokenErrorDisplay(ute.UnexpectedToken) +
" expecting " + tokenName;
} else if (e is MissingTokenException) {
MissingTokenException mte = (MissingTokenException)e;
string tokenName = "<unknown>";
if (mte.Expecting == TokenTypes.EndOfFile) {
tokenName = "EndOfFile";
} else {
tokenName = tokenNames[mte.Expecting];
}
msg = "missing " + tokenName + " at " + GetTokenErrorDisplay(e.Token);
} else if (e is MismatchedTokenException) {
MismatchedTokenException mte = (MismatchedTokenException)e;
string tokenName = "<unknown>";
if (mte.Expecting == TokenTypes.EndOfFile) {
tokenName = "EndOfFile";
} else {
tokenName = tokenNames[mte.Expecting];
}
msg = "mismatched input " + GetTokenErrorDisplay(e.Token) +
" expecting " + tokenName;
} else if (e is MismatchedTreeNodeException) {
MismatchedTreeNodeException mtne = (MismatchedTreeNodeException)e;
string tokenName = "<unknown>";
if (mtne.Expecting == TokenTypes.EndOfFile) {
tokenName = "EndOfFile";
} else {
tokenName = tokenNames[mtne.Expecting];
}
// workaround for a .NET framework bug (NullReferenceException)
string nodeText = (mtne.Node != null) ? mtne.Node.ToString() ?? string.Empty : string.Empty;
msg = "mismatched tree node: " + nodeText + " expecting " + tokenName;
} else if (e is NoViableAltException) {
//NoViableAltException nvae = (NoViableAltException)e;
// for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
// and "(decision="+nvae.decisionNumber+") and
// "state "+nvae.stateNumber
msg = "no viable alternative at input " + GetTokenErrorDisplay(e.Token);
} else if (e is EarlyExitException) {
//EarlyExitException eee = (EarlyExitException)e;
// for development, can add "(decision="+eee.decisionNumber+")"
msg = "required (...)+ loop did not match anything at input " +
GetTokenErrorDisplay(e.Token);
} else if (e is MismatchedSetException) {
MismatchedSetException mse = (MismatchedSetException)e;
msg = "mismatched input " + GetTokenErrorDisplay(e.Token) +
" expecting set " + mse.Expecting;
} else if (e is MismatchedNotSetException) {
MismatchedNotSetException mse = (MismatchedNotSetException)e;
msg = "mismatched input " + GetTokenErrorDisplay(e.Token) +
" expecting set " + mse.Expecting;
} else if (e is FailedPredicateException) {
FailedPredicateException fpe = (FailedPredicateException)e;
msg = "rule " + fpe.RuleName + " failed predicate: {" +
fpe.PredicateText + "}?";
}
return msg;
}
/** <summary>
* Get number of recognition errors (lexer, parser, tree parser). Each
* recognizer tracks its own number. So parser and lexer each have
* separate count. Does not count the spurious errors found between
* an error and next valid token match
* </summary>
*
* <seealso cref="reportError()"/>
*/
public virtual int NumberOfSyntaxErrors {
get {
return state.syntaxErrors;
}
}
/** <summary>What is the error header, normally line/character position information?</summary> */
public virtual string GetErrorHeader(RecognitionException e) {
return "line " + e.Line + ":" + (e.CharPositionInLine + 1);
}
/** <summary>
* How should a token be displayed in an error message? The default
* is to display just the text, but during development you might
* want to have a lot of information spit out. Override in that case
* to use t.ToString() (which, for CommonToken, dumps everything about
* the token). This is better than forcing you to override a method in
* your token objects because you don't have to go modify your lexer
* so that it creates a new Java type.
* </summary>
*/
public virtual string GetTokenErrorDisplay(IToken t) {
string s = t.Text;
if (s == null) {
if (t.Type == TokenTypes.EndOfFile) {
s = "<EOF>";
} else {
s = "<" + t.Type + ">";
}
}
s = Regex.Replace(s, "\n", "\\\\n");
s = Regex.Replace(s, "\r", "\\\\r");
s = Regex.Replace(s, "\t", "\\\\t");
return "'" + s + "'";
}
/** <summary>Override this method to change where error messages go</summary> */
public virtual void EmitErrorMessage(string msg) {
if (TraceDestination != null)
TraceDestination.WriteLine(msg);
}
/** <summary>
* Recover from an error found on the input stream. This is
* for NoViableAlt and mismatched symbol exceptions. If you enable
* single token insertion and deletion, this will usually not
* handle mismatched symbol exceptions but there could be a mismatched
* token that the match() routine could not recover from.
* </summary>
*/
public virtual void Recover(IIntStream input, RecognitionException re) {
if (state.lastErrorIndex == input.Index) {
// uh oh, another error at same token index; must be a case
// where LT(1) is in the recovery token set so nothing is
// consumed; consume a single token so at least to prevent
// an infinite loop; this is a failsafe.
input.Consume();
}
state.lastErrorIndex = input.Index;
BitSet followSet = ComputeErrorRecoverySet();
BeginResync();
ConsumeUntil(input, followSet);
EndResync();
}
/** <summary>
* A hook to listen in on the token consumption during error recovery.
* The DebugParser subclasses this to fire events to the listenter.
* </summary>
*/
public virtual void BeginResync() {
}
public virtual void EndResync() {
}
/* Compute the error recovery set for the current rule. During
* rule invocation, the parser pushes the set of tokens that can
* follow that rule reference on the stack; this amounts to
* computing FIRST of what follows the rule reference in the
* enclosing rule. This local follow set only includes tokens
* from within the rule; i.e., the FIRST computation done by
* ANTLR stops at the end of a rule.
*
* EXAMPLE
*
* When you find a "no viable alt exception", the input is not
* consistent with any of the alternatives for rule r. The best
* thing to do is to consume tokens until you see something that
* can legally follow a call to r *or* any rule that called r.
* You don't want the exact set of viable next tokens because the
* input might just be missing a token--you might consume the
* rest of the input looking for one of the missing tokens.
*
* Consider grammar:
*
* a : '[' b ']'
* | '(' b ')'
* ;
* b : c '^' INT ;
* c : ID
* | INT
* ;
*
* At each rule invocation, the set of tokens that could follow
* that rule is pushed on a stack. Here are the various "local"
* follow sets:
*
* FOLLOW(b1_in_a) = FIRST(']') = ']'
* FOLLOW(b2_in_a) = FIRST(')') = ')'
* FOLLOW(c_in_b) = FIRST('^') = '^'
*
* Upon erroneous input "[]", the call chain is
*
* a -> b -> c
*
* and, hence, the follow context stack is:
*
* depth local follow set after call to rule
* 0 <EOF> a (from main())
* 1 ']' b
* 3 '^' c
*
* Notice that ')' is not included, because b would have to have
* been called from a different context in rule a for ')' to be
* included.
*
* For error recovery, we cannot consider FOLLOW(c)
* (context-sensitive or otherwise). We need the combined set of
* all context-sensitive FOLLOW sets--the set of all tokens that
* could follow any reference in the call chain. We need to
* resync to one of those tokens. Note that FOLLOW(c)='^' and if
* we resync'd to that token, we'd consume until EOF. We need to
* sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
* In this case, for input "[]", LA(1) is in this set so we would
* not consume anything and after printing an error rule c would
* return normally. It would not find the required '^' though.
* At this point, it gets a mismatched token error and throws an
* exception (since LA(1) is not in the viable following token
* set). The rule exception handler tries to recover, but finds
* the same recovery set and doesn't consume anything. Rule b
* exits normally returning to rule a. Now it finds the ']' (and
* with the successful match exits errorRecovery mode).
*
* So, you cna see that the parser walks up call chain looking
* for the token that was a member of the recovery set.
*
* Errors are not generated in errorRecovery mode.
*
* ANTLR's error recovery mechanism is based upon original ideas:
*
* "Algorithms + Data Structures = Programs" by Niklaus Wirth
*
* and
*
* "A note on error recovery in recursive descent parsers":
* http://portal.acm.org/citation.cfm?id=947902.947905
*
* Later, Josef Grosch had some good ideas:
*
* "Efficient and Comfortable Error Recovery in Recursive Descent
* Parsers":
* ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
*
* Like Grosch I implemented local FOLLOW sets that are combined
* at run-time upon error to avoid overhead during parsing.
*/
protected virtual BitSet ComputeErrorRecoverySet() {
return CombineFollows(false);
}
/** <summary>
* Compute the context-sensitive FOLLOW set for current rule.
* This is set of token types that can follow a specific rule
* reference given a specific call chain. You get the set of
* viable tokens that can possibly come next (lookahead depth 1)
* given the current call chain. Contrast this with the
* definition of plain FOLLOW for rule r:
* </summary>
*
* FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
*
* where x in T* and alpha, beta in V*; T is set of terminals and
* V is the set of terminals and nonterminals. In other words,
* FOLLOW(r) is the set of all tokens that can possibly follow
* references to r in *any* sentential form (context). At
* runtime, however, we know precisely which context applies as
* we have the call chain. We may compute the exact (rather
* than covering superset) set of following tokens.
*
* For example, consider grammar:
*
* stat : ID '=' expr ';' // FOLLOW(stat)=={EOF}
* | "return" expr '.'
* ;
* expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'}
* atom : INT // FOLLOW(atom)=={'+',')',';','.'}
* | '(' expr ')'
* ;
*
* The FOLLOW sets are all inclusive whereas context-sensitive
* FOLLOW sets are precisely what could follow a rule reference.
* For input input "i=(3);", here is the derivation:
*
* stat => ID '=' expr ';'
* => ID '=' atom ('+' atom)* ';'
* => ID '=' '(' expr ')' ('+' atom)* ';'
* => ID '=' '(' atom ')' ('+' atom)* ';'
* => ID '=' '(' INT ')' ('+' atom)* ';'
* => ID '=' '(' INT ')' ';'
*
* At the "3" token, you'd have a call chain of
*
* stat -> expr -> atom -> expr -> atom
*
* What can follow that specific nested ref to atom? Exactly ')'
* as you can see by looking at the derivation of this specific
* input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
*
* You want the exact viable token set when recovering from a
* token mismatch. Upon token mismatch, if LA(1) is member of
* the viable next token set, then you know there is most likely
* a missing token in the input stream. "Insert" one by just not
* throwing an exception.
*/
protected virtual BitSet ComputeContextSensitiveRuleFOLLOW() {
return CombineFollows(true);
}
// what is exact? it seems to only add sets from above on stack
// if EOR is in set i. When it sees a set w/o EOR, it stops adding.
// Why would we ever want them all? Maybe no viable alt instead of
// mismatched token?
protected virtual BitSet CombineFollows(bool exact) {
int top = state._fsp;
BitSet followSet = new BitSet();
for (int i = top; i >= 0; i--) {
BitSet localFollowSet = (BitSet)state.following[i];
/*
System.out.println("local follow depth "+i+"="+
localFollowSet.toString(getTokenNames())+")");
*/
followSet.OrInPlace(localFollowSet);
if (exact) {
// can we see end of rule?
if (localFollowSet.Member(TokenTypes.EndOfRule)) {
// Only leave EOR in set if at top (start rule); this lets
// us know if have to include follow(start rule); i.e., EOF
if (i > 0) {
followSet.Remove(TokenTypes.EndOfRule);
}
} else { // can't see end of rule, quit
break;
}
}
}
return followSet;
}
/** <summary>Attempt to recover from a single missing or extra token.</summary>
*
* EXTRA TOKEN
*
* LA(1) is not what we are looking for. If LA(2) has the right token,
* however, then assume LA(1) is some extra spurious token. Delete it
* and LA(2) as if we were doing a normal match(), which advances the
* input.
*
* MISSING TOKEN
*
* If current token is consistent with what could come after
* ttype then it is ok to "insert" the missing token, else throw
* exception For example, Input "i=(3;" is clearly missing the
* ')'. When the parser returns from the nested call to expr, it
* will have call chain:
*
* stat -> expr -> atom
*
* and it will be trying to match the ')' at this point in the
* derivation:
*
* => ID '=' '(' INT ')' ('+' atom)* ';'
* ^
* match() will see that ';' doesn't match ')' and report a
* mismatched token error. To recover, it sees that LA(1)==';'
* is in the set of tokens that can follow the ')' token
* reference in rule atom. It can assume that you forgot the ')'.
*/
protected virtual object RecoverFromMismatchedToken(IIntStream input, int ttype, BitSet follow) {
RecognitionException e = null;
// if next token is what we are looking for then "delete" this token
if (MismatchIsUnwantedToken(input, ttype)) {
e = new UnwantedTokenException(ttype, input, TokenNames);
/*
System.err.println("recoverFromMismatchedToken deleting "+
((TokenStream)input).LT(1)+
" since "+((TokenStream)input).LT(2)+" is what we want");
*/
BeginResync();
input.Consume(); // simply delete extra token
EndResync();
ReportError(e); // report after consuming so AW sees the token in the exception
// we want to return the token we're actually matching
object matchedSymbol = GetCurrentInputSymbol(input);
input.Consume(); // move past ttype token as if all were ok
return matchedSymbol;
}
// can't recover with single token deletion, try insertion
if (MismatchIsMissingToken(input, follow)) {
object inserted = GetMissingSymbol(input, e, ttype, follow);
e = new MissingTokenException(ttype, input, inserted);
ReportError(e); // report after inserting so AW sees the token in the exception
return inserted;
}
// even that didn't work; must throw the exception
e = new MismatchedTokenException(ttype, input, TokenNames);
throw e;
}
/** Not currently used */
public virtual object RecoverFromMismatchedSet(IIntStream input,
RecognitionException e,
BitSet follow) {
if (MismatchIsMissingToken(input, follow)) {
// System.out.println("missing token");
ReportError(e);
// we don't know how to conjure up a token for sets yet
return GetMissingSymbol(input, e, TokenTypes.Invalid, follow);
}
// TODO do single token deletion like above for Token mismatch
throw e;
}
/** <summary>
* Match needs to return the current input symbol, which gets put
* into the label for the associated token ref; e.g., x=ID. Token
* and tree parsers need to return different objects. Rather than test
* for input stream type or change the IntStream interface, I use
* a simple method to ask the recognizer to tell me what the current
* input symbol is.
* </summary>
*
* <remarks>This is ignored for lexers.</remarks>
*/
protected virtual object GetCurrentInputSymbol(IIntStream input) {
return null;
}
/** <summary>Conjure up a missing token during error recovery.</summary>
*
* <remarks>
* The recognizer attempts to recover from single missing
* symbols. But, actions might refer to that missing symbol.
* For example, x=ID {f($x);}. The action clearly assumes
* that there has been an identifier matched previously and that
* $x points at that token. If that token is missing, but
* the next token in the stream is what we want we assume that
* this token is missing and we keep going. Because we
* have to return some token to replace the missing token,
* we have to conjure one up. This method gives the user control
* over the tokens returned for missing tokens. Mostly,
* you will want to create something special for identifier
* tokens. For literals such as '{' and ',', the default
* action in the parser or tree parser works. It simply creates
* a CommonToken of the appropriate type. The text will be the token.
* If you change what tokens must be created by the lexer,
* override this method to create the appropriate tokens.
* </remarks>
*/
protected virtual object GetMissingSymbol(IIntStream input,
RecognitionException e,
int expectedTokenType,
BitSet follow) {
return null;
}
public virtual void ConsumeUntil(IIntStream input, int tokenType) {
//System.out.println("consumeUntil "+tokenType);
int ttype = input.LA(1);
while (ttype != TokenTypes.EndOfFile && ttype != tokenType) {
input.Consume();
ttype = input.LA(1);
}
}
/** <summary>Consume tokens until one matches the given token set</summary> */
public virtual void ConsumeUntil(IIntStream input, BitSet set) {
//System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
int ttype = input.LA(1);
while (ttype != TokenTypes.EndOfFile && !set.Member(ttype)) {
//System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
input.Consume();
ttype = input.LA(1);
}
}
/** <summary>Push a rule's follow set using our own hardcoded stack</summary> */
protected void PushFollow(BitSet fset) {
if ((state._fsp + 1) >= state.following.Length) {
Array.Resize(ref state.following, state.following.Length * 2);
}
state.following[++state._fsp] = fset;
}
protected void PopFollow() {
state._fsp--;
}
/** <summary>
* Return List<String> of the rules in your parser instance
* leading up to a call to this method. You could override if
* you want more details such as the file/line info of where
* in the parser java code a rule is invoked.
* </summary>
*
* <remarks>
* This is very useful for error messages and for context-sensitive
* error recovery.
* </remarks>
*/
public virtual IList<string> GetRuleInvocationStack() {
return GetRuleInvocationStack(new StackTrace(true));
}
/** <summary>
* A more general version of GetRuleInvocationStack where you can
* pass in the StackTrace of, for example, a RecognitionException
* to get it's rule stack trace.
* </summary>
*/
public static IList<string> GetRuleInvocationStack(StackTrace trace) {
if (trace == null)
throw new ArgumentNullException("trace");
List<string> rules = new List<string>();
StackFrame[] stack = trace.GetFrames() ?? new StackFrame[0];
for (int i = stack.Length - 1; i >= 0; i--) {
StackFrame frame = stack[i];
MethodBase method = frame.GetMethod();
GrammarRuleAttribute[] attributes = (GrammarRuleAttribute[])method.GetCustomAttributes(typeof(GrammarRuleAttribute), true);
if (attributes != null && attributes.Length > 0)
rules.Add(attributes[0].Name);
}
return rules;
}
public virtual int BacktrackingLevel {
get {
return state.backtracking;
}
set {
state.backtracking = value;
}
}
/** <summary>Return whether or not a backtracking attempt failed.</summary> */
public virtual bool Failed {
get {
return state.failed;
}
}
/** <summary>
* Used to print out token names like ID during debugging and
* error reporting. The generated parsers implement a method
* that overrides this to point to their String[] tokenNames.
* </summary>
*/
public virtual string[] TokenNames {
get {
return null;
}
}
/** <summary>
* For debugging and other purposes, might want the grammar name.
* Have ANTLR generate an implementation for this method.
* </summary>
*/
public virtual string GrammarFileName {
get {
return null;
}
}
public abstract string SourceName {
get;
}
/** <summary>
* A convenience method for use most often with template rewrites.
* Convert a List<Token> to List<String>
* </summary>
*/
public virtual List<string> ToStrings(ICollection<IToken> tokens) {
if (tokens == null)
return null;
List<string> strings = new List<string>(tokens.Count);
foreach (IToken token in tokens) {
strings.Add(token.Text);
}
return strings;
}
/** <summary>
* Given a rule number and a start token index number, return
* MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
* start index. If this rule has parsed input starting from the
* start index before, then return where the rule stopped parsing.
* It returns the index of the last token matched by the rule.
* </summary>
*
* <remarks>
* For now we use a hashtable and just the slow Object-based one.
* Later, we can make a special one for ints and also one that
* tosses out data after we commit past input position i.
* </remarks>
*/
public virtual int GetRuleMemoization(int ruleIndex, int ruleStartIndex) {
if (state.ruleMemo[ruleIndex] == null) {
state.ruleMemo[ruleIndex] = new Dictionary<int, int>();
}
int stopIndex;
if (!state.ruleMemo[ruleIndex].TryGetValue(ruleStartIndex, out stopIndex))
return MemoRuleUnknown;
return stopIndex;
}
/** <summary>
* Has this rule already parsed input at the current index in the
* input stream? Return the stop token index or MEMO_RULE_UNKNOWN.
* If we attempted but failed to parse properly before, return
* MEMO_RULE_FAILED.
* </summary>
*
* <remarks>
* This method has a side-effect: if we have seen this input for
* this rule and successfully parsed before, then seek ahead to
* 1 past the stop token matched for this rule last time.
* </remarks>
*/
public virtual bool AlreadyParsedRule(IIntStream input, int ruleIndex) {
int stopIndex = GetRuleMemoization(ruleIndex, input.Index);
if (stopIndex == MemoRuleUnknown) {
return false;
}
if (stopIndex == MemoRuleFailed) {
//System.out.println("rule "+ruleIndex+" will never succeed");
state.failed = true;
} else {
//System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+state.failed);
input.Seek(stopIndex + 1); // jump to one past stop token
}
return true;
}
/** <summary>
* Record whether or not this rule parsed the input at this position
* successfully. Use a standard java hashtable for now.
* </summary>
*/
public virtual void Memoize(IIntStream input,
int ruleIndex,
int ruleStartIndex) {
int stopTokenIndex = state.failed ? MemoRuleFailed : input.Index - 1;
if (state.ruleMemo == null) {
if (TraceDestination != null)
TraceDestination.WriteLine("!!!!!!!!! memo array is null for " + GrammarFileName);
}
if (ruleIndex >= state.ruleMemo.Length) {
if (TraceDestination != null)
TraceDestination.WriteLine("!!!!!!!!! memo size is " + state.ruleMemo.Length + ", but rule index is " + ruleIndex);
}
if (state.ruleMemo[ruleIndex] != null) {
state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex;
}
}
/** <summary>return how many rule/input-index pairs there are in total.</summary>
* TODO: this includes synpreds. :(
*/
public virtual int GetRuleMemoizationCacheSize() {
int n = 0;
for (int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++) {
var ruleMap = state.ruleMemo[i];
if (ruleMap != null) {
n += ruleMap.Count; // how many input indexes are recorded?
}
}
return n;
}
[Conditional("ANTLR_TRACE")]
public virtual void TraceIn(string ruleName, int ruleIndex, object inputSymbol) {
if (TraceDestination == null)
return;
TraceDestination.Write("enter " + ruleName + " " + inputSymbol);
if (state.backtracking > 0) {
TraceDestination.Write(" backtracking=" + state.backtracking);
}
TraceDestination.WriteLine();
}
[Conditional("ANTLR_TRACE")]
public virtual void TraceOut(string ruleName, int ruleIndex, object inputSymbol) {
if (TraceDestination == null)
return;
TraceDestination.Write("exit " + ruleName + " " + inputSymbol);
if (state.backtracking > 0) {
TraceDestination.Write(" backtracking=" + state.backtracking);
if (state.failed)
TraceDestination.Write(" failed");
else
TraceDestination.Write(" succeeded");
}
TraceDestination.WriteLine();
}
#region Debugging support
public virtual IDebugEventListener DebugListener {
get {
return null;
}
}
[Conditional("ANTLR_DEBUG")]
protected virtual void DebugEnterRule(string grammarFileName, string ruleName) {
IDebugEventListener dbg = DebugListener;
if (dbg != null)
dbg.EnterRule(grammarFileName, ruleName);
}
[Conditional("ANTLR_DEBUG")]
protected virtual void DebugExitRule(string grammarFileName, string ruleName) {
IDebugEventListener dbg = DebugListener;
if (dbg != null)
dbg.ExitRule(grammarFileName, ruleName);
}
[Conditional("ANTLR_DEBUG")]
protected virtual void DebugEnterSubRule(int decisionNumber) {
IDebugEventListener dbg = DebugListener;
if (dbg != null)
dbg.EnterSubRule(decisionNumber);
}
[Conditional("ANTLR_DEBUG")]
protected virtual void DebugExitSubRule(int decisionNumber) {
IDebugEventListener dbg = DebugListener;
if (dbg != null)
dbg.ExitSubRule(decisionNumber);
}
[Conditional("ANTLR_DEBUG")]
protected virtual void DebugEnterAlt(int alt) {
IDebugEventListener dbg = DebugListener;
if (dbg != null)
dbg.EnterAlt(alt);
}
[Conditional("ANTLR_DEBUG")]
protected virtual void DebugEnterDecision(int decisionNumber, bool couldBacktrack) {
IDebugEventListener dbg = DebugListener;
if (dbg != null)
dbg.EnterDecision(decisionNumber, couldBacktrack);
}
[Conditional("ANTLR_DEBUG")]
protected virtual void DebugExitDecision(int decisionNumber) {
IDebugEventListener dbg = DebugListener;
if (dbg != null)
dbg.ExitDecision(decisionNumber);
}
[Conditional("ANTLR_DEBUG")]
protected virtual void DebugLocation(int line, int charPositionInLine) {
IDebugEventListener dbg = DebugListener;
if (dbg != null)
dbg.Location(line, charPositionInLine);
}
[Conditional("ANTLR_DEBUG")]
protected virtual void DebugSemanticPredicate(bool result, string predicate) {
IDebugEventListener dbg = DebugListener;
if (dbg != null)
dbg.SemanticPredicate(result, predicate);
}
[Conditional("ANTLR_DEBUG")]
protected virtual void DebugBeginBacktrack(int level) {
IDebugEventListener dbg = DebugListener;
if (dbg != null)
dbg.BeginBacktrack(level);
}
[Conditional("ANTLR_DEBUG")]
protected virtual void DebugEndBacktrack(int level, bool successful) {
IDebugEventListener dbg = DebugListener;
if (dbg != null)
dbg.EndBacktrack(level, successful);
}
[Conditional("ANTLR_DEBUG")]
protected virtual void DebugRecognitionException(RecognitionException ex) {
IDebugEventListener dbg = DebugListener;
if (dbg != null)
dbg.RecognitionException(ex);
}
#endregion
}
}