blob: 3b36cbfb9e061c670e7e2fa5bfa304bb414ba538 [file] [log] [blame]
/** A generic recognizer that can handle recognizers generated from
* lexer, parser, and tree grammars. This is all the parsing
* support code essentially; most of it is error recovery stuff and
* backtracking.
*
* <p>This class should not be instantiated directly. Instead, use one of its
* subclasses.</p>
*
* @class
* @param {org.antlr.runtime.RecognizerSharedState} [state] state object with
* which to initialize this recognizer.
*/
org.antlr.runtime.BaseRecognizer = function(state) {
/** State of a lexer, parser, or tree parser are collected into a state
* object so the state can be shared. This sharing is needed to
* have one grammar import others and share same error variables
* and other state variables. It's a kind of explicit multiple
* inheritance via delegation of methods and shared state.
* @type org.antlr.runtime.RecognizerSharedState
*/
this.state = state || new org.antlr.runtime.RecognizerSharedState();
};
org.antlr.lang.augmentObject(org.antlr.runtime.BaseRecognizer, {
/**
* @memberOf org.antlr.runtime.BaseRecognizer
* @type Number
*/
MEMO_RULE_FAILED: -2,
/**
* @memberOf org.antlr.runtime.BaseRecognizer
* @type Number
*/
MEMO_RULE_UNKNOWN: -1,
/**
* @memberOf org.antlr.runtime.BaseRecognizer
* @type Number
*/
INITIAL_FOLLOW_STACK_SIZE: 100,
/**
* @memberOf org.antlr.runtime.BaseRecognizer
* @type Number
*/
MEMO_RULE_FAILED_I: -2,
/**
* @memberOf org.antlr.runtime.BaseRecognizer
* @type Number
*/
DEFAULT_TOKEN_CHANNEL: org.antlr.runtime.Token.DEFAULT_CHANNEL,
/**
* @memberOf org.antlr.runtime.BaseRecognizer
* @type Number
*/
HIDDEN: org.antlr.runtime.Token.HIDDEN_CHANNEL,
/**
* @memberOf org.antlr.runtime.BaseRecognizer
* @type String
*/
NEXT_TOKEN_RULE_NAME: "nextToken"
});
org.antlr.runtime.BaseRecognizer.prototype = {
/** Reset the parser's state. Subclasses must rewinds the input stream */
reset: function() {
var i, len;
// wack everything related to error recovery
if (!this.state) {
return; // no shared state work to do
}
this.state._fsp = -1;
this.state.errorRecovery = false;
this.state.lastErrorIndex = -1;
this.state.failed = false;
this.state.syntaxErrors = 0;
// wack everything related to backtracking and memoization
this.state.backtracking = 0;
// wipe cache
if (this.state.ruleMemo) {
for (i=0, len=this.state.ruleMemo.length; i<len; i++) {
this.state.ruleMemo[i] = null;
}
}
},
/** Match current input symbol against ttype. Attempt
* single token insertion or deletion error recovery. If
* that fails, throw {@link org.antlr.runtime.MismatchedTokenException}.
*
* <p>To turn off single token insertion or deletion error
* recovery, override {@link #mismatchRecover} and have it call
* plain {@link #mismatch}, which does not recover. Then any error
* in a rule will cause an exception and immediate exit from
* rule. Rule would recover by resynchronizing to the set of
* symbols that can follow rule ref.</p>
*
* @param {org.antlr.runtime.IntStream} input input stream to match against.
* @param {Number} ttype input type to match.
* @param {org.antlr.runtime.BitSet} [follow] set of tokens that can follow the
* matched token.
* @returns {Object} the matched symbol
*/
match: function(input, ttype, follow) {
var matchedSymbol = this.getCurrentInputSymbol(input);
if ( input.LA(1)===ttype ) {
input.consume();
this.state.errorRecovery = false;
this.state.failed = false;
return matchedSymbol;
}
if ( this.state.backtracking>0 ) {
this.state.failed = true;
return matchedSymbol;
}
matchedSymbol = this.recoverFromMismatchedToken(input, ttype, follow);
return matchedSymbol;
},
/**
* Match any token.
* @param {org.antlr.runtime.IntStream} input input stream to match against.
*/
matchAny: function(input) {
this.state.errorRecovery = false;
this.state.failed = false;
input.consume();
},
/**
* Is the following token (LA(2)) the unwanted type (ttype)?
* @param {org.antlr.runtime.IntStream} input input stream to match against.
* @param {Number} ttype the undesired token type.
* @returns {Boolean} true if and only if the following token is the
* unwanted type.
*/
mismatchIsUnwantedToken: function(input, ttype) {
return input.LA(2)===ttype;
},
/**
* Does the stream appear to be missing a single token?
* @param {org.antlr.runtime.IntStream} input input stream to match against.
* @param {org.antlr.runtime.BitSet} [follow] set of tokens that can follow the
* matched token.
* @returns {Boolean} true if and only if it appears that the stream is
* missing a single token.
*/
mismatchIsMissingToken: function(input, follow) {
if ( !follow ) {
// we have no information about the follow; we can only consume
// a single token and hope for the best
return false;
}
// compute what can follow this grammar element reference
if ( follow.member(org.antlr.runtime.Token.EOR_TOKEN_TYPE) ) {
var viableTokensFollowingThisRule = this.computeContextSensitiveRuleFOLLOW();
follow = follow.or(this.viableTokensFollowingThisRule);
if ( this.state._fsp>=0 ) { // remove EOR if we're not the start symbol
follow.remove(org.antlr.runtime.Token.EOR_TOKEN_TYPE);
}
}
// if current token is consistent with what could come after set
// then we know we're missing a token; error recovery is free to
// "insert" the missing token
// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
// in follow set to indicate that the fall of the start symbol is
// in the set (EOF can follow).
if ( follow.member(input.LA(1)) ||
follow.member(org.antlr.runtime.Token.EOR_TOKEN_TYPE) )
{
return true;
}
return false;
},
/** Factor out what to do upon token mismatch so tree parsers can behave
* differently. Override and call {@link #mismatchRecover}
* to get single token insertion and deletion.
*
* @param {org.antlr.runtime.IntStream} input input stream to match against.
* @param {Number} ttype input type to match.
* @param {org.antlr.runtime.BitSet} [follow] set of tokens that can follow the
* matched token.
*/
mismatch: function(input, ttype, follow) {
if ( this.mismatchIsUnwantedToken(input, ttype) ) {
throw new org.antlr.runtime.UnwantedTokenException(ttype, input);
} else if ( this.mismatchIsMissingToken(input, follow) ) {
throw new org.antlr.runtime.MissingTokenException(ttype, input, null);
}
throw new org.antlr.runtime.MismatchedTokenException(ttype, input);
},
/** Report a recognition problem.
*
* <p>This method sets errorRecovery to indicate the parser is recovering
* not parsing. Once in recovery mode, no errors are generated.
* To get out of recovery mode, the parser must successfully match
* a token (after a resync). So it will go:</p>
* <ol>
* <li>error occurs</li>
* <li>enter recovery mode, report error</li>
* <li>consume until token found in resynch set</li>
* <li>try to resume parsing</li>
* <li>next match() will reset errorRecovery mode</li>
* </ol>
*
* <p>If you override, make sure to update this.state.syntaxErrors if you
* care about that.</p>
* @param {org.antlr.runtime.RecognitionException} e the error to be reported.
*/
reportError: function(e) {
// if we've already reported an error and have not matched a token
// yet successfully, don't report any errors.
if ( this.state.errorRecovery ) {
return;
}
this.state.syntaxErrors++;
this.state.errorRecovery = true;
this.displayRecognitionError(this.getTokenNames(), e);
},
/**
* Assemble recognition error message.
* @param {Array} tokenNames array of token names (strings).
* @param {org.antlr.runtime.RecognitionException} e the error to be reported.
*/
displayRecognitionError: function(tokenNames, e) {
var hdr = this.getErrorHeader(e),
msg = this.getErrorMessage(e, tokenNames);
this.emitErrorMessage(hdr+" "+msg);
},
/**
* Create error header message. Format is <q>line
* lineNumber:positionInLine</q>.
* @param {org.antlr.runtime.RecognitionException} e the error to be reported.
* @returns {String} The error header.
*/
getErrorHeader: function(e) {
/* handle null input */
if (!org.antlr.lang.isNumber(e.line)) {
e.line = 0;
}
return "line "+e.line+":"+e.charPositionInLine;
},
/**
* Override this method to change where error messages go.
* Defaults to "alert"-ing the error in browsers and "print"-ing the error
* in other environments (e.g. Rhino, SpiderMonkey).
* @param {String} msg the error message to be displayed.
*/
emitErrorMessage: function(msg) {
if (typeof(window) != 'undefined' && window.alert) {
alert(msg);
} else {
print(msg);
}
},
/** What error message should be generated for the various
* exception types?
*
* <p>Not very object-oriented code, but I like having all error message
* generation within one method rather than spread among all of the
* exception classes. This also makes it much easier for the exception
* handling because the exception classes do not have to have pointers back
* to this object to access utility routines and so on. Also, changing
* the message for an exception type would be difficult because you
* would have to be subclassing exceptions, but then somehow get ANTLR
* to make those kinds of exception objects instead of the default.</p>
*
* <p>For grammar debugging, you will want to override this to add
* more information such as the stack frame and no viable alts.</p>
*
* <p>Override this to change the message generated for one or more
* exception types.</p>
*
* @param {Array} tokenNames array of token names (strings).
* @param {org.antlr.runtime.RecognitionException} e the error to be reported.
* @returns {String} the error message to be emitted.
*/
getErrorMessage: function(e, tokenNames) {
var msg = (e && e.getMessage) ? e.getMessage() : null,
mte,
tokenName;
if ( e instanceof org.antlr.runtime.UnwantedTokenException ) {
var ute = e;
tokenName="<unknown>";
if ( ute.expecting== org.antlr.runtime.Token.EOF ) {
tokenName = "EOF";
} else {
tokenName = tokenNames[ute.expecting];
}
msg = "extraneous input "+this.getTokenErrorDisplay(ute.getUnexpectedToken())+
" expecting "+tokenName;
}
else if ( e instanceof org.antlr.runtime.MissingTokenException ) {
mte = e;
tokenName="<unknown>";
if ( mte.expecting== org.antlr.runtime.Token.EOF ) {
tokenName = "EOF";
} else {
tokenName = tokenNames[mte.expecting];
}
msg = "missing "+tokenName+" at "+this.getTokenErrorDisplay(e.token);
}
else if ( e instanceof org.antlr.runtime.MismatchedTokenException ) {
mte = e;
tokenName="<unknown>";
if ( mte.expecting== org.antlr.runtime.Token.EOF ) {
tokenName = "EOF";
}
else {
tokenName = tokenNames[mte.expecting];
}
msg = "mismatched input "+this.getTokenErrorDisplay(e.token)+
" expecting "+tokenName;
}
else if ( e instanceof org.antlr.runtime.NoViableAltException ) {
msg = "no viable alternative at input "+this.getTokenErrorDisplay(e.token);
}
else if ( e instanceof org.antlr.runtime.EarlyExitException ) {
msg = "required (...)+ loop did not match anything at input "+
this.getTokenErrorDisplay(e.token);
}
else if ( e instanceof org.antlr.runtime.MismatchedSetException ) {
msg = "mismatched input "+this.getTokenErrorDisplay(e.token)+
" expecting set "+e.expecting;
}
else if ( e instanceof org.antlr.runtime.MismatchedNotSetException ) {
msg = "mismatched input "+this.getTokenErrorDisplay(e.token)+
" expecting set "+e.expecting;
}
else if ( e instanceof org.antlr.runtime.FailedPredicateException ) {
msg = "rule "+e.ruleName+" failed predicate: {"+
e.predicateText+"}?";
}
return msg;
},
/** <p>Get number of recognition errors (lexer, parser, tree parser). Each
* recognizer tracks its own number. So parser and lexer each have
* separate count. Does not count the spurious errors found between
* an error and next valid token match.</p>
*
* <p>See also {@link #reportError}()
* @returns {Number} number of syntax errors encountered
*/
getNumberOfSyntaxErrors: function() {
return this.state.syntaxErrors;
},
/** How should a token be displayed in an error message? The default
* is to display just the text, but during development you might
* want to have a lot of information spit out. Override in that case
* to use t.toString() (which, for CommonToken, dumps everything about
* the token).
* @param {org.antlr.runtime.Token} t token that will be displayed in an error message
* @return {String} the string representation of the token
*/
getTokenErrorDisplay: function(t) {
var s = t.getText();
if ( !org.antlr.lang.isValue(s) ) {
if ( t.getType()==org.antlr.runtime.Token.EOF ) {
s = "<EOF>";
}
else {
s = "<"+t.getType()+">";
}
}
s = s.replace(/\n/g,"\\n");
s = s.replace(/\r/g,"\\r");
s = s.replace(/\t/g,"\\t");
return "'"+s+"'";
},
/** Recover from an error found on the input stream. This is
* for NoViableAlt and mismatched symbol exceptions. If you enable
* single token insertion and deletion, this will usually not
* handle mismatched symbol exceptions but there could be a mismatched
* token that the match() routine could not recover from.
* @param {org.antlr.runtime.IntStream} input the intput stream
* @param {org.antlr.runtime.RecogntionException} the error found on the input stream
*/
recover: function(input, re) {
if ( this.state.lastErrorIndex==input.index() ) {
// uh oh, another error at same token index; must be a case
// where LT(1) is in the recovery token set so nothing is
// consumed; consume a single token so at least to prevent
// an infinite loop; this is a failsafe.
input.consume();
}
this.state.lastErrorIndex = input.index();
var followSet = this.computeErrorRecoverySet();
this.beginResync();
this.consumeUntil(input, followSet);
this.endResync();
},
/** A hook to listen in on the token consumption during error recovery.
*/
beginResync: function() {
},
/** A hook to listen in on the token consumption during error recovery.
*/
endResync: function() {
},
/** Compute the error recovery set for the current rule.
* <p>During rule invocation, the parser pushes the set of tokens that can
* follow that rule reference on the stack; this amounts to
* computing FIRST of what follows the rule reference in the
* enclosing rule. This local follow set only includes tokens
* from within the rule; i.e., the FIRST computation done by
* ANTLR stops at the end of a rule.</p>
*
* <p>EXAMPLE</p>
*
* <p>When you find a "no viable alt exception", the input is not
* consistent with any of the alternatives for rule r. The best
* thing to do is to consume tokens until you see something that
* can legally follow a call to r *or* any rule that called r.
* You don't want the exact set of viable next tokens because the
* input might just be missing a token--you might consume the
* rest of the input looking for one of the missing tokens.</p>
*
* <p>Consider grammar:</p>
* <code><pre>
* a : '[' b ']'
* | '(' b ')'
* ;
* b : c '^' INT ;
* c : ID
* | INT
* ;
* </pre></code>
*
* <p>At each rule invocation, the set of tokens that could follow
* that rule is pushed on a stack. Here are the various "local"
* follow sets:</p>
*
* <code><pre>
* FOLLOW(b1_in_a) = FIRST(']') = ']'
* FOLLOW(b2_in_a) = FIRST(')') = ')'
* FOLLOW(c_in_b) = FIRST('^') = '^'
* </pre></code>
*
* <p>Upon erroneous input "[]", the call chain is</p>
*
* <code>a -> b -> c</code>
*
* <p>and, hence, the follow context stack is:</p>
*
* <code><pre>
* depth local follow set after call to rule
* 0 <EOF> a (from main())
* 1 ']' b
* 3 '^' c
* </pre></code>
*
* <p>Notice that ')' is not included, because b would have to have
* been called from a different context in rule a for ')' to be
* included.</p>
*
* <p>For error recovery, we cannot consider FOLLOW(c)
* (context-sensitive or otherwise). We need the combined set of
* all context-sensitive FOLLOW sets--the set of all tokens that
* could follow any reference in the call chain. We need to
* resync to one of those tokens. Note that FOLLOW(c)='^' and if
* we resync'd to that token, we'd consume until EOF. We need to
* sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
* In this case, for input "[]", LA(1) is in this set so we would
* not consume anything and after printing an error rule c would
* return normally. It would not find the required '^' though.
* At this point, it gets a mismatched token error and throws an
* exception (since LA(1) is not in the viable following token
* set). The rule exception handler tries to recover, but finds
* the same recovery set and doesn't consume anything. Rule b
* exits normally returning to rule a. Now it finds the ']' (and
* with the successful match exits errorRecovery mode).</p>
*
* <p>So, you cna see that the parser walks up call chain looking
* for the token that was a member of the recovery set.</p>
*
* <p>Errors are not generated in errorRecovery mode.</p>
*
* <p>ANTLR's error recovery mechanism is based upon original ideas:</p>
*
* <p>"Algorithms + Data Structures = Programs" by Niklaus Wirth</p>
*
* <p>and</p>
*
* <p>"A note on error recovery in recursive descent parsers":
* http://portal.acm.org/citation.cfm?id=947902.947905</p>
*
* <p>Later, Josef Grosch had some good ideas:</p>
*
* <p>"Efficient and Comfortable Error Recovery in Recursive Descent
* Parsers":
* ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip</p>
*
* <p>Like Grosch I implemented local FOLLOW sets that are combined
* at run-time upon error to avoid overhead during parsing.</p>
* @returns {org.antlr.runtime.BitSet}
*/
computeErrorRecoverySet: function() {
return this.combineFollows(false);
},
/** Compute the context-sensitive FOLLOW set for current rule.
* <p>This is set of token types that can follow a specific rule
* reference given a specific call chain. You get the set of
* viable tokens that can possibly come next (lookahead depth 1)
* given the current call chain. Contrast this with the
* definition of plain FOLLOW for rule r:</p>
*
* <code>FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}</code>
*
* <p>where x in T* and alpha, beta in V*; T is set of terminals and
* V is the set of terminals and nonterminals. In other words,
* FOLLOW(r) is the set of all tokens that can possibly follow
* references to r in *any* sentential form (context). At
* runtime, however, we know precisely which context applies as
* we have the call chain. We may compute the exact (rather
* than covering superset) set of following tokens.</p>
*
* <p>For example, consider grammar:</p>
*
* <code><pre>
* stat : ID '=' expr ';' // FOLLOW(stat)=={EOF}
* | "return" expr '.'
* ;
* expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'}
* atom : INT // FOLLOW(atom)=={'+',')',';','.'}
* | '(' expr ')'
* ;
* </pre></code>
*
* <p>The FOLLOW sets are all inclusive whereas context-sensitive
* FOLLOW sets are precisely what could follow a rule reference.
* For input input "i=(3);", here is the derivation:</p>
*
* <code><pre>
* stat => ID '=' expr ';'
* => ID '=' atom ('+' atom)* ';'
* => ID '=' '(' expr ')' ('+' atom)* ';'
* => ID '=' '(' atom ')' ('+' atom)* ';'
* => ID '=' '(' INT ')' ('+' atom)* ';'
* => ID '=' '(' INT ')' ';'
* </pre></code>
*
* <p>At the "3" token, you'd have a call chain of</p>
*
* <code> stat -> expr -> atom -> expr -> atom</code>
*
* <p>What can follow that specific nested ref to atom? Exactly ')'
* as you can see by looking at the derivation of this specific
* input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}.</p>
*
* <p>You want the exact viable token set when recovering from a
* token mismatch. Upon token mismatch, if LA(1) is member of
* the viable next token set, then you know there is most likely
* a missing token in the input stream. "Insert" one by just not
* throwing an exception.</p>
* @returns {org.antlr.runtime.BitSet}
*/
computeContextSensitiveRuleFOLLOW: function() {
return this.combineFollows(true);
},
/**
* Helper method for {@link #computeErrorRecoverySet} and
* {@link computeContextSensitiveRuleFOLLO}.
* @param {Boolean} exact
* @returns {org.antlr.runtime.BitSet}
*/
combineFollows: function(exact) {
var top = this.state._fsp,
i,
localFollowSet,
followSet = new org.antlr.runtime.BitSet();
for (i=top; i>=0; i--) {
localFollowSet = this.state.following[i];
followSet.orInPlace(localFollowSet);
if ( exact ) {
// can we see end of rule?
if ( localFollowSet.member(org.antlr.runtime.Token.EOR_TOKEN_TYPE) )
{
// Only leave EOR in set if at top (start rule); this lets
// us know if have to include follow(start rule); i.e., EOF
if ( i>0 ) {
followSet.remove(org.antlr.runtime.Token.EOR_TOKEN_TYPE);
}
}
else { // can't see end of rule, quit
break;
}
}
}
return followSet;
},
/** Attempt to recover from a single missing or extra token.
*
* <p>EXTRA TOKEN</p>
*
* <p>LA(1) is not what we are looking for. If LA(2) has the right token,
* however, then assume LA(1) is some extra spurious token. Delete it
* and LA(2) as if we were doing a normal match(), which advances the
* input.</p>
*
* <p>MISSING TOKEN</p>
*
* <p>If current token is consistent with what could come after
* ttype then it is ok to "insert" the missing token, else throw
* exception For example, Input "i=(3;" is clearly missing the
* ')'. When the parser returns from the nested call to expr, it
* will have call chain:</p>
*
* <pre><code> stat -> expr -> atom</code></pre>
*
* <p>and it will be trying to match the ')' at this point in the
* derivation:</p>
*
* <pre><code> => ID '=' '(' INT ')' ('+' atom)* ';'</code></pre>
* ^
* <p>match() will see that ';' doesn't match ')' and report a
* mismatched token error. To recover, it sees that LA(1)==';'
* is in the set of tokens that can follow the ')' token
* reference in rule atom. It can assume that you forgot the ')'.</p>
*
* @param {org.antlr.runtime.IntStream} input
* @param {Number} ttype
* @param {org.antlr.runtime.BitSet} follow
* @returns {Object}
*/
recoverFromMismatchedToken: function(input,
ttype,
follow)
{
var e = null;
// if next token is what we are looking for then "delete" this token
if ( this.mismatchIsUnwantedToken(input, ttype) ) {
e = new org.antlr.runtime.UnwantedTokenException(ttype, input);
this.beginResync();
input.consume(); // simply delete extra token
this.endResync();
this.reportError(e); // report after consuming so AW sees the token in the exception
// we want to return the token we're actually matching
var matchedSymbol = this.getCurrentInputSymbol(input);
input.consume(); // move past ttype token as if all were ok
return matchedSymbol;
}
// can't recover with single token deletion, try insertion
if ( this.mismatchIsMissingToken(input, follow) ) {
var inserted = this.getMissingSymbol(input, e, ttype, follow);
e = new org.antlr.runtime.MissingTokenException(ttype, input, inserted);
this.reportError(e); // report after inserting so AW sees the token in the exception
return inserted;
}
// even that didn't work; must throw the exception
e = new org.antlr.runtime.MismatchedTokenException(ttype, input);
throw e;
},
/**
* Recover from a mismatched set exception.
* @param {org.antlr.runtime.IntStream} input
* @param {org.antlr.runtime.RecognitionException} e
* @param {org.antlr.runtime.BitSet} follow
* @returns {Object}
*/
recoverFromMismatchedSet: function(input,
e,
follow)
{
if ( this.mismatchIsMissingToken(input, follow) ) {
// System.out.println("missing token");
this.reportError(e);
// we don't know how to conjure up a token for sets yet
return this.getMissingSymbol(input, e, org.antlr.runtime.Token.INVALID_TOKEN_TYPE, follow);
}
throw e;
},
/** Match needs to return the current input symbol, which gets put
* into the label for the associated token ref; e.g., x=ID. Token
* and tree parsers need to return different objects. Rather than test
* for input stream type or change the IntStream interface, I use
* a simple method to ask the recognizer to tell me what the current
* input symbol is.
*
* <p>This is ignored for lexers.</p>
* @param {org.antlr.runtime.IntStream} input
* @returns {Object}
*/
getCurrentInputSymbol: function(input) { return null; },
/** Conjure up a missing token during error recovery.
*
* <p>The recognizer attempts to recover from single missing
* symbols. But, actions might refer to that missing symbol.
* For example, x=ID {f($x);}. The action clearly assumes
* that there has been an identifier matched previously and that
* $x points at that token. If that token is missing, but
* the next token in the stream is what we want we assume that
* this token is missing and we keep going. Because we
* have to return some token to replace the missing token,
* we have to conjure one up. This method gives the user control
* over the tokens returned for missing tokens. Mostly,
* you will want to create something special for identifier
* tokens. For literals such as '{' and ',', the default
* action in the parser or tree parser works. It simply creates
* a CommonToken of the appropriate type. The text will be the token.
* If you change what tokens must be created by the lexer,
* override this method to create the appropriate tokens.</p>
*
* @param {org.antlr.runtime.IntStream} input
* @param {org.antlr.runtime.RecognitionException} e
* @param {Number} expectedTokenType
* @param {org.antlr.runtime.BitSet} follow
* @returns {Object}
*/
getMissingSymbol: function(input,
e,
expectedTokenType,
follow)
{
return null;
},
/**
* Consume tokens until one matches the given token or token set
* @param {org.antlr.runtime.IntStream} input
* @param {Number|org.antlr.runtime.BitSet} set
*/
consumeUntil: function(input, set) {
var ttype = input.LA(1);
while (ttype != org.antlr.runtime.Token.EOF && !set.member(ttype) ) {
input.consume();
ttype = input.LA(1);
}
},
/**
* Push a rule's follow set using our own hardcoded stack.
* @param {org.antlr.runtime.BitSet} fset
*/
pushFollow: function(fset) {
if ( (this.state._fsp +1)>=this.state.following.length ) {
var f = [];
var i;
for (i=this.state.following.length-1; i>=0; i--) {
f[i] = this.state.following[i];
}
this.state.following = f;
}
this.state._fsp++;
this.state.following[this.state._fsp] = fset;
},
/**
* Sadly JavaScript doesn't provide a robust mechanism for runtime stack reflection.
* This makes implementing this function impossible without maintaining an auxillary
* stack data structure, which would be crazy expensive, especially in Lexers. As such,
* this method remains unimplemented.
* @deprecated
*/
getRuleInvocationStack: function(e, recognizerClassName)
{
throw new Error("Not implemented.");
},
/**
* Get this recognizer's backtracking level.
* @returns {Number} backtracking level
*/
getBacktrackingLevel: function() {
return this.state.backtracking;
},
/** Used to print out token names like ID during debugging and
* error reporting. The generated parsers implement a method
* that overrides this to point to their String[] tokenNames.
* @returns {Array} String array of token names.
*/
getTokenNames: function() {
return null;
},
/** For debugging and other purposes, might want the grammar name.
* Have ANTLR generate an implementation for this method.
* @returns {String} the grammar name.
*/
getGrammarFileName: function() {
return null;
},
/** A convenience method for use most often with template rewrites.
* Convert an array of Tokens to an array of Strings.
* @param {Array} array of org.antlr.runtime.Token objects.
* @returns {Array} array of string representations of the argument.
*/
toStrings: function(tokens) {
if ( !tokens ) {
return null;
}
var strings = [];
var i;
for (i=0; i<tokens.length; i++) {
strings.push(tokens[i].getText());
}
return strings;
},
/** Given a rule number and a start token index number, return
* MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
* start index. If this rule has parsed input starting from the
* start index before, then return where the rule stopped parsing.
* It returns the index of the last token matched by the rule.
*
* <p>For now we use a hashtable and just the slow Object-based one.
* Later, we can make a special one for ints and also one that
* tosses out data after we commit past input position i.</p>
* @param {Number} ruleIndex
* @param {Number} ruleStartIndex
* @returns {Number}
*/
getRuleMemoization: function(ruleIndex, ruleStartIndex) {
if ( !this.state.ruleMemo[ruleIndex] ) {
this.state.ruleMemo[ruleIndex] = {};
}
var stopIndexI =
this.state.ruleMemo[ruleIndex][ruleStartIndex];
if ( !org.antlr.lang.isNumber(stopIndexI) ) {
return org.antlr.runtime.BaseRecognizer.MEMO_RULE_UNKNOWN;
}
return stopIndexI;
},
/** Has this rule already parsed input at the current index in the
* input stream? Return the stop token index or MEMO_RULE_UNKNOWN.
* If we attempted but failed to parse properly before, return
* MEMO_RULE_FAILED.
*
* <p>This method has a side-effect: if we have seen this input for
* this rule and successfully parsed before, then seek ahead to
* 1 past the stop token matched for this rule last time.</p>
* @param {org.antlr.runtime.IntStream} input
* @param {Number} ruleIndex
* @returns {Boolean}
*/
alreadyParsedRule: function(input, ruleIndex) {
var stopIndex = this.getRuleMemoization(ruleIndex, input.index());
if ( stopIndex==org.antlr.runtime.BaseRecognizer.MEMO_RULE_UNKNOWN ) {
return false;
}
if ( stopIndex==org.antlr.runtime.BaseRecognizer.MEMO_RULE_FAILED ) {
//System.out.println("rule "+ruleIndex+" will never succeed");
this.state.failed=true;
}
else {
input.seek(stopIndex+1); // jump to one past stop token
}
return true;
},
/** Record whether or not this rule parsed the input at this position
* successfully. Use a standard java hashtable for now.
* @param {org.antlr.runtime.IntStream} input
* @param {Number} ruleIndex
* @param {Number} ruleStartIndex
*/
memoize: function(input,
ruleIndex,
ruleStartIndex)
{
var stopTokenIndex = this.state.failed ?
org.antlr.runtime.BaseRecognizer.MEMO_RULE_FAILED : input.index()-1;
if ( !org.antlr.lang.isValue(this.state.ruleMemo) ) {
throw new Error("!!!!!!!!! memo array is null for "+ this.getGrammarFileName());
}
if ( ruleIndex >= this.state.ruleMemo.length ) {
throw new Error("!!!!!!!!! memo size is "+this.state.ruleMemo.length+", but rule index is "+ruleIndex);
}
if ( org.antlr.lang.isValue(this.state.ruleMemo[ruleIndex]) ) {
this.state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex;
}
},
/** return how many rule/input-index pairs there are in total.
* TODO: this includes synpreds.
* @returns {Number}
*/
getRuleMemoizationCacheSize: function() {
var n = 0, i;
for (i = 0; this.state.ruleMemo && i < this.state.ruleMemo.length; i++) {
var ruleMap = this.state.ruleMemo[i];
if ( ruleMap ) {
// @todo need to get size of rulemap?
n += ruleMap.length; // how many input indexes are recorded?
}
}
return n;
},
/**
* When a grammar is compiled with the tracing flag enabled, this method is invoked
* at the start of each rule.
* @param {String} ruleName the current ruleName
* @param {Number} ruleIndex
* @param {Object} inputSymbol
*/
traceIn: function(ruleName, ruleIndex, inputSymbol) {
this.emitErrorMessage("enter "+ruleName+" "+inputSymbol);
if ( this.state.failed ) {
this.emitErrorMessage(" failed="+this.failed);
}
if ( this.state.backtracking>0 ) {
this.emitErrorMessage(" backtracking="+this.state.backtracking);
}
// System.out.println();
},
/**
* When a grammar is compiled with the tracing flag enabled, this method is invoked
* at the end of each rule.
* @param {String} ruleName the current ruleName
* @param {Number} ruleIndex
* @param {Object} inputSymbol
*/
traceOut: function(ruleName, ruleIndex, inputSymbol) {
this.emitErrorMessage("exit "+ruleName+" "+inputSymbol);
if ( this.state.failed ) {
this.emitErrorMessage(" failed="+this.state.failed);
}
if ( this.state.backtracking>0 ) {
this.emitErrorMessage(" backtracking="+this.state.backtracking);
}
}
};