antlr-3.4/runtime/JavaScript/src/org/antlr/runtime/Lexer.js - platform/external/antlr - Git at Google

 /** A lexer is recognizer that draws input symbols from a character stream.
  *  lexer grammars result in a subclass of this object. A Lexer object
  *  uses simplified match() and error recovery mechanisms in the interest
  *  of speed.
  */
 org.antlr.runtime.Lexer = function(input, state) {
     if (state) {
         org.antlr.runtime.Lexer.superclass.constructor.call(this, state);
     }
     if (input) {
         this.input = input;
     }
 };

 org.antlr.lang.extend(org.antlr.runtime.Lexer, org.antlr.runtime.BaseRecognizer, {
     reset: function() {
         // reset all recognizer state variables
         org.antlr.runtime.Lexer.superclass.reset.call(this);
         if ( org.antlr.lang.isValue(this.input) ) {
             this.input.seek(0); // rewind the input
         }
         if ( !org.antlr.lang.isValue(this.state) ) {
             return; // no shared state work to do
         }
         this.state.token = null;
         this.state.type = org.antlr.runtime.Token.INVALID_TOKEN_TYPE;
         this.state.channel = org.antlr.runtime.Token.DEFAULT_CHANNEL;
         this.state.tokenStartCharIndex = -1;
         this.state.tokenStartCharPositionInLine = -1;
         this.state.tokenStartLine = -1;
         this.state.text = null;
     },

     /** Return a token from this source; i.e., match a token on the char
      *  stream.
      */
     nextToken: function() {
         while (true) {
             this.state.token = null;
             this.state.channel = org.antlr.runtime.Token.DEFAULT_CHANNEL;
             this.state.tokenStartCharIndex = this.input.index();
             this.state.tokenStartCharPositionInLine = this.input.getCharPositionInLine();
             this.state.tokenStartLine = this.input.getLine();
             this.state.text = null;
             if ( this.input.LA(1)===org.antlr.runtime.CharStream.EOF ) {
                 return org.antlr.runtime.Token.EOF_TOKEN;
             }
             try {
                 this.mTokens();
                 if ( !org.antlr.lang.isValue(this.state.token) ) {
                     this.emit();
                 }
                 else if ( this.state.token==org.antlr.runtime.Token.SKIP_TOKEN ) {
                     continue;
                 }
                 return this.state.token;
             }
             catch (re) {
                 if (re instanceof org.antlr.runtime.NoViableAltException) {
                     this.reportError(re);
                     this.recover(re);
                 } else if ( re instanceof org.antlr.runtime.RecognitionException ) {
                     this.reportError(re);
                 } else {
                     throw re;
                 }
             }
         }
     },

     /** Instruct the lexer to skip creating a token for current lexer rule
      *  and look for another token.  nextToken() knows to keep looking when
      *  a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
      *  if token==null at end of any token rule, it creates one for you
      *  and emits it.
      */
     skip: function() {
         this.state.token = org.antlr.runtime.Token.SKIP_TOKEN;
     },

     /** Set the char stream and reset the lexer */
     setCharStream: function(input) {
         this.input = null;
         this.reset();
         this.input = input;
     },

     getCharStream: function() {
         return this.input;
     },

     getSourceName: function() {
         return this.input.getSourceName();
     },

     /** Currently does not support multiple emits per nextToken invocation
      *  for efficiency reasons.  Subclass and override this method and
      *  nextToken (to push tokens into a list and pull from that list rather
      *  than a single variable as this implementation does).
      *
      *  The standard method called to automatically emit a token at the
      *  outermost lexical rule.  The token object should point into the
      *  char buffer start..stop.  If there is a text override in 'text',
      *  use that to set the token's text.  Override this method to emit
      *  custom Token objects.
      *
      *  If you are building trees, then you should also override
      *  Parser or TreeParser.getMissingSymbol().
      */
     emit: function() {
         if (arguments.length===0) {
             var t = new org.antlr.runtime.CommonToken(this.input, this.state.type, this.state.channel, this.state.tokenStartCharIndex, this.getCharIndex()-1);
             t.setLine(this.state.tokenStartLine);
             t.setText(this.state.text);
             t.setCharPositionInLine(this.state.tokenStartCharPositionInLine);
             this.state.token = t;
             return t;
         } else {
             this.state.token = arguments[0];
         }
     },

     match: function(s) {
         var i = 0,
             mte;

         if (org.antlr.lang.isString(s)) {
             while ( i<s.length ) {
                 if ( this.input.LA(1)!=s.charAt(i) ) {
                     if ( this.state.backtracking>0 ) {
                         this.state.failed = true;
                         return;
                     }
                     mte = new org.antlr.runtime.MismatchedTokenException(s.charAt(i), this.input);
                     this.recover(mte);
                     throw mte;
                 }
                 i++;
                 this.input.consume();
                 this.state.failed = false;
             }
         } else if (org.antlr.lang.isNumber(s)) {
             if ( this.input.LA(1)!=s ) {
                 if ( this.state.backtracking>0 ) {
                     this.state.failed = true;
                     return;
                 }
                 mte = new org.antlr.runtime.MismatchedTokenException(s, this.input);
                 this.recover(mte);
                 throw mte;
             }
             this.input.consume();
             this.state.failed = false;
         }
     },

     matchAny: function() {
         this.input.consume();
     },

     matchRange: function(a, b) {
         if ( this.input.LA(1)<a || this.input.LA(1)>b ) {
             if ( this.state.backtracking>0 ) {
                 this.state.failed = true;
                 return;
             }
             var mre = new org.antlr.runtime.MismatchedRangeException(a,b,this.input);
             this.recover(mre);
             throw mre;
         }
         this.input.consume();
         this.state.failed = false;
     },

     getLine: function() {
         return this.input.getLine();
     },

     getCharPositionInLine: function() {
         return this.input.getCharPositionInLine();
     },

     /** What is the index of the current character of lookahead? */
     getCharIndex: function() {
         return this.input.index();
     },

     /** Return the text matched so far for the current token or any
      *  text override.
      */
     getText: function() {
         if ( org.antlr.lang.isString(this.state.text) ) {
             return this.state.text;
         }
         return this.input.substring(this.state.tokenStartCharIndex,this.getCharIndex()-1);
     },

     /** Set the complete text of this token; it wipes any previous
      *  changes to the text.
      */
     setText: function(text) {
         this.state.text = text;
     },

     reportError: function(e) {
         /** TODO: not thought about recovery in lexer yet.
          *
         // if we've already reported an error and have not matched a token
         // yet successfully, don't report any errors.
         if ( errorRecovery ) {
             //System.err.print("[SPURIOUS] ");
             return;
         }
         errorRecovery = true;
          */

         this.displayRecognitionError(this.getTokenNames(), e);
     },

     getErrorMessage: function(e, tokenNames) {
         var msg = null;
         if ( e instanceof org.antlr.runtime.MismatchedTokenException ) {
             msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting "+this.getCharErrorDisplay(e.expecting);
         }
         else if ( e instanceof org.antlr.runtime.NoViableAltException ) {
             msg = "no viable alternative at character "+this.getCharErrorDisplay(e.c);
         }
         else if ( e instanceof org.antlr.runtime.EarlyExitException ) {
             msg = "required (...)+ loop did not match anything at character "+this.getCharErrorDisplay(e.c);
         }
         else if ( e instanceof org.antlr.runtime.MismatchedNotSetException ) {
             msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting set "+e.expecting;
         }
         else if ( e instanceof org.antlr.runtime.MismatchedSetException ) {
             msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting set "+e.expecting;
         }
         else if ( e instanceof org.antlr.runtime.MismatchedRangeException ) {
             msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting set "+
                 this.getCharErrorDisplay(e.a)+".."+this.getCharErrorDisplay(e.b);
         }
         else {
             msg = org.antlr.runtime.Lexer.superclass.getErrorMessage.call(this, e, tokenNames);
         }
         return msg;
     },

     getCharErrorDisplay: function(c) {
         var s = c; //String.fromCharCode(c);
         switch ( s ) {
             case org.antlr.runtime.Token.EOF :
                 s = "<EOF>";
                 break;
             case "\n" :
                 s = "\\n";
                 break;
             case "\t" :
                 s = "\\t";
                 break;
             case "\r" :
                 s = "\\r";
                 break;
         }
         return "'"+s+"'";
     },

     /** Lexers can normally match any char in it's vocabulary after matching
      *  a token, so do the easy thing and just kill a character and hope
      *  it all works out.  You can instead use the rule invocation stack
      *  to do sophisticated error recovery if you are in a fragment rule.
      */
     recover: function(re) {
         this.input.consume();
     },

     traceIn: function(ruleName, ruleIndex)  {
         var inputSymbol = String.fromCharCode(this.input.LT(1))+" line="+this.getLine()+":"+this.getCharPositionInLine();
         org.antlr.runtime.Lexer.superclass.traceIn.call(this, ruleName, ruleIndex, inputSymbol);
     },

     traceOut: function(ruleName, ruleIndex)  {
 		var inputSymbol = String.fromCharCode(this.input.LT(1))+" line="+this.getLine()+":"+this.getCharPositionInLine();
 		org.antlr.runtime.Lexer.superclass.traceOut.call(this, ruleName, ruleIndex, inputSymbol);
 	}
 });
	/** A lexer is recognizer that draws input symbols from a character stream.
	* lexer grammars result in a subclass of this object. A Lexer object
	* uses simplified match() and error recovery mechanisms in the interest
	* of speed.
	*/
	org.antlr.runtime.Lexer = function(input, state) {
	if (state) {
	org.antlr.runtime.Lexer.superclass.constructor.call(this, state);
	}
	if (input) {
	this.input = input;
	}
	};

	org.antlr.lang.extend(org.antlr.runtime.Lexer, org.antlr.runtime.BaseRecognizer, {
	reset: function() {
	// reset all recognizer state variables
	org.antlr.runtime.Lexer.superclass.reset.call(this);
	if ( org.antlr.lang.isValue(this.input) ) {
	this.input.seek(0); // rewind the input
	}
	if ( !org.antlr.lang.isValue(this.state) ) {
	return; // no shared state work to do
	}
	this.state.token = null;
	this.state.type = org.antlr.runtime.Token.INVALID_TOKEN_TYPE;
	this.state.channel = org.antlr.runtime.Token.DEFAULT_CHANNEL;
	this.state.tokenStartCharIndex = -1;
	this.state.tokenStartCharPositionInLine = -1;
	this.state.tokenStartLine = -1;
	this.state.text = null;
	},

	/** Return a token from this source; i.e., match a token on the char
	* stream.
	*/
	nextToken: function() {
	while (true) {
	this.state.token = null;
	this.state.channel = org.antlr.runtime.Token.DEFAULT_CHANNEL;
	this.state.tokenStartCharIndex = this.input.index();
	this.state.tokenStartCharPositionInLine = this.input.getCharPositionInLine();
	this.state.tokenStartLine = this.input.getLine();
	this.state.text = null;
	if ( this.input.LA(1)===org.antlr.runtime.CharStream.EOF ) {
	return org.antlr.runtime.Token.EOF_TOKEN;
	}
	try {
	this.mTokens();
	if ( !org.antlr.lang.isValue(this.state.token) ) {
	this.emit();
	}
	else if ( this.state.token==org.antlr.runtime.Token.SKIP_TOKEN ) {
	continue;
	}
	return this.state.token;
	}
	catch (re) {
	if (re instanceof org.antlr.runtime.NoViableAltException) {
	this.reportError(re);
	this.recover(re);
	} else if ( re instanceof org.antlr.runtime.RecognitionException ) {
	this.reportError(re);
	} else {
	throw re;
	}
	}
	}
	},

	/** Instruct the lexer to skip creating a token for current lexer rule
	* and look for another token. nextToken() knows to keep looking when
	* a lexer rule finishes with token set to SKIP_TOKEN. Recall that
	* if token==null at end of any token rule, it creates one for you
	* and emits it.
	*/
	skip: function() {
	this.state.token = org.antlr.runtime.Token.SKIP_TOKEN;
	},

	/** Set the char stream and reset the lexer */
	setCharStream: function(input) {
	this.input = null;
	this.reset();
	this.input = input;
	},

	getCharStream: function() {
	return this.input;
	},

	getSourceName: function() {
	return this.input.getSourceName();
	},

	/** Currently does not support multiple emits per nextToken invocation
	* for efficiency reasons. Subclass and override this method and
	* nextToken (to push tokens into a list and pull from that list rather
	* than a single variable as this implementation does).
	*
	* The standard method called to automatically emit a token at the
	* outermost lexical rule. The token object should point into the
	* char buffer start..stop. If there is a text override in 'text',
	* use that to set the token's text. Override this method to emit
	* custom Token objects.
	*
	* If you are building trees, then you should also override
	* Parser or TreeParser.getMissingSymbol().
	*/
	emit: function() {
	if (arguments.length===0) {
	var t = new org.antlr.runtime.CommonToken(this.input, this.state.type, this.state.channel, this.state.tokenStartCharIndex, this.getCharIndex()-1);
	t.setLine(this.state.tokenStartLine);
	t.setText(this.state.text);
	t.setCharPositionInLine(this.state.tokenStartCharPositionInLine);
	this.state.token = t;
	return t;
	} else {
	this.state.token = arguments[0];
	}
	},

	match: function(s) {
	var i = 0,
	mte;

	if (org.antlr.lang.isString(s)) {
	while ( i<s.length ) {
	if ( this.input.LA(1)!=s.charAt(i) ) {
	if ( this.state.backtracking>0 ) {
	this.state.failed = true;
	return;
	}
	mte = new org.antlr.runtime.MismatchedTokenException(s.charAt(i), this.input);
	this.recover(mte);
	throw mte;
	}
	i++;
	this.input.consume();
	this.state.failed = false;
	}
	} else if (org.antlr.lang.isNumber(s)) {
	if ( this.input.LA(1)!=s ) {
	if ( this.state.backtracking>0 ) {
	this.state.failed = true;
	return;
	}
	mte = new org.antlr.runtime.MismatchedTokenException(s, this.input);
	this.recover(mte);
	throw mte;
	}
	this.input.consume();
	this.state.failed = false;
	}
	},

	matchAny: function() {
	this.input.consume();
	},

	matchRange: function(a, b) {
	if ( this.input.LA(1)<a \|\| this.input.LA(1)>b ) {
	if ( this.state.backtracking>0 ) {
	this.state.failed = true;
	return;
	}
	var mre = new org.antlr.runtime.MismatchedRangeException(a,b,this.input);
	this.recover(mre);
	throw mre;
	}
	this.input.consume();
	this.state.failed = false;
	},

	getLine: function() {
	return this.input.getLine();
	},

	getCharPositionInLine: function() {
	return this.input.getCharPositionInLine();
	},

	/** What is the index of the current character of lookahead? */
	getCharIndex: function() {
	return this.input.index();
	},

	/** Return the text matched so far for the current token or any
	* text override.
	*/
	getText: function() {
	if ( org.antlr.lang.isString(this.state.text) ) {
	return this.state.text;
	}
	return this.input.substring(this.state.tokenStartCharIndex,this.getCharIndex()-1);
	},

	/** Set the complete text of this token; it wipes any previous
	* changes to the text.
	*/
	setText: function(text) {
	this.state.text = text;
	},

	reportError: function(e) {
	/** TODO: not thought about recovery in lexer yet.
	*
	// if we've already reported an error and have not matched a token
	// yet successfully, don't report any errors.
	if ( errorRecovery ) {
	//System.err.print("[SPURIOUS] ");
	return;
	}
	errorRecovery = true;
	*/

	this.displayRecognitionError(this.getTokenNames(), e);
	},

	getErrorMessage: function(e, tokenNames) {
	var msg = null;
	if ( e instanceof org.antlr.runtime.MismatchedTokenException ) {
	msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting "+this.getCharErrorDisplay(e.expecting);
	}
	else if ( e instanceof org.antlr.runtime.NoViableAltException ) {
	msg = "no viable alternative at character "+this.getCharErrorDisplay(e.c);
	}
	else if ( e instanceof org.antlr.runtime.EarlyExitException ) {
	msg = "required (...)+ loop did not match anything at character "+this.getCharErrorDisplay(e.c);
	}
	else if ( e instanceof org.antlr.runtime.MismatchedNotSetException ) {
	msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting set "+e.expecting;
	}
	else if ( e instanceof org.antlr.runtime.MismatchedSetException ) {
	msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting set "+e.expecting;
	}
	else if ( e instanceof org.antlr.runtime.MismatchedRangeException ) {
	msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting set "+
	this.getCharErrorDisplay(e.a)+".."+this.getCharErrorDisplay(e.b);
	}
	else {
	msg = org.antlr.runtime.Lexer.superclass.getErrorMessage.call(this, e, tokenNames);
	}
	return msg;
	},

	getCharErrorDisplay: function(c) {
	var s = c; //String.fromCharCode(c);
	switch ( s ) {
	case org.antlr.runtime.Token.EOF :
	s = "<EOF>";
	break;
	case "\n" :
	s = "\\n";
	break;
	case "\t" :
	s = "\\t";
	break;
	case "\r" :
	s = "\\r";
	break;
	}
	return "'"+s+"'";
	},

	/** Lexers can normally match any char in it's vocabulary after matching
	* a token, so do the easy thing and just kill a character and hope
	* it all works out. You can instead use the rule invocation stack
	* to do sophisticated error recovery if you are in a fragment rule.
	*/
	recover: function(re) {
	this.input.consume();
	},

	traceIn: function(ruleName, ruleIndex) {
	var inputSymbol = String.fromCharCode(this.input.LT(1))+" line="+this.getLine()+":"+this.getCharPositionInLine();
	org.antlr.runtime.Lexer.superclass.traceIn.call(this, ruleName, ruleIndex, inputSymbol);
	},

	traceOut: function(ruleName, ruleIndex) {
	var inputSymbol = String.fromCharCode(this.input.LT(1))+" line="+this.getLine()+":"+this.getCharPositionInLine();
	org.antlr.runtime.Lexer.superclass.traceOut.call(this, ruleName, ruleIndex, inputSymbol);
	}
	});