runtime/C/src/antlr3lexer.c - platform/external/antlr - Git at Google

 /** \file
  *
  * Base implementation of an antlr 3 lexer.
  *
  * An ANTLR3 lexer implements a base recongizer, a token source and
  * a lexer interface. It constructs a base recognizer with default
  * functions, then overrides any of these that are parser specific (usual
  * default implementation of base recognizer.
  */

 // [The "BSD licence"]
 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
 // http://www.temporal-wave.com
 // http://www.linkedin.com/in/jimidle
 //
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
 // are met:
 // 1. Redistributions of source code must retain the above copyright
 //    notice, this list of conditions and the following disclaimer.
 // 2. Redistributions in binary form must reproduce the above copyright
 //    notice, this list of conditions and the following disclaimer in the
 //    documentation and/or other materials provided with the distribution.
 // 3. The name of the author may not be used to endorse or promote products
 //    derived from this software without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #include    <antlr3lexer.h>

 static void					mTokens						(pANTLR3_LEXER lexer);
 static void					setCharStream				(pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input);
 static void					pushCharStream				(pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input);
 static void					popCharStream				(pANTLR3_LEXER lexer);

 static void					emitNew						(pANTLR3_LEXER lexer,  pANTLR3_COMMON_TOKEN token);
 static pANTLR3_COMMON_TOKEN emit						(pANTLR3_LEXER lexer);
 static ANTLR3_BOOLEAN	    matchs						(pANTLR3_LEXER lexer, ANTLR3_UCHAR * string);
 static ANTLR3_BOOLEAN	    matchc						(pANTLR3_LEXER lexer, ANTLR3_UCHAR c);
 static ANTLR3_BOOLEAN	    matchRange					(pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high);
 static void					matchAny					(pANTLR3_LEXER lexer);
 static void					recover						(pANTLR3_LEXER lexer);
 static ANTLR3_UINT32	    getLine						(pANTLR3_LEXER lexer);
 static ANTLR3_MARKER	    getCharIndex				(pANTLR3_LEXER lexer);
 static ANTLR3_UINT32	    getCharPositionInLine		(pANTLR3_LEXER lexer);
 static pANTLR3_STRING	    getText						(pANTLR3_LEXER lexer);
 static pANTLR3_COMMON_TOKEN nextToken					(pANTLR3_TOKEN_SOURCE toksource);

 static void					displayRecognitionError	    (pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 * tokenNames);
 static void					reportError					(pANTLR3_BASE_RECOGNIZER rec);
 static void *				getCurrentInputSymbol		(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream);
 static void *				getMissingSymbol			(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM	istream, pANTLR3_EXCEPTION	e,
 															ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow);

 static void					reset						(pANTLR3_BASE_RECOGNIZER rec);

 static void					freeLexer					(pANTLR3_LEXER lexer);


 ANTLR3_API pANTLR3_LEXER
 antlr3LexerNew(ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state)
 {
     pANTLR3_LEXER   lexer;
     pANTLR3_COMMON_TOKEN	specialT;

 	/* Allocate memory
 	*/
 	lexer   = (pANTLR3_LEXER) ANTLR3_MALLOC(sizeof(ANTLR3_LEXER));

 	if	(lexer == NULL)
 	{
 		return	NULL;
 	}

 	/* Now we need to create the base recognizer
 	*/
 	lexer->rec	    =  antlr3BaseRecognizerNew(ANTLR3_TYPE_LEXER, sizeHint, state);

 	if	(lexer->rec == NULL)
 	{
 		lexer->free(lexer);
 		return	NULL;
 	}
 	lexer->rec->super  =  lexer;

 	lexer->rec->displayRecognitionError	    = displayRecognitionError;
 	lexer->rec->reportError					= reportError;
 	lexer->rec->reset						= reset;
 	lexer->rec->getCurrentInputSymbol		= getCurrentInputSymbol;
 	lexer->rec->getMissingSymbol			= getMissingSymbol;

 	/* Now install the token source interface
 	*/
 	if	(lexer->rec->state->tokSource == NULL)
 	{
 		lexer->rec->state->tokSource	= (pANTLR3_TOKEN_SOURCE)ANTLR3_CALLOC(1, sizeof(ANTLR3_TOKEN_SOURCE));

 		if	(lexer->rec->state->tokSource == NULL)
 		{
 			lexer->rec->free(lexer->rec);
 			lexer->free(lexer);

 			return	NULL;
 		}
 		lexer->rec->state->tokSource->super    =  lexer;

 		/* Install the default nextToken() method, which may be overridden
 		 * by generated code, or by anything else in fact.
 		 */
 		lexer->rec->state->tokSource->nextToken	    =  nextToken;
 		lexer->rec->state->tokSource->strFactory    = NULL;

 		lexer->rec->state->tokFactory				= NULL;
 	}

     /* Install the lexer API
      */
     lexer->setCharStream			=  setCharStream;
     lexer->mTokens					= (void (*)(void *))(mTokens);
     lexer->setCharStream			=  setCharStream;
     lexer->pushCharStream			=  pushCharStream;
     lexer->popCharStream			=  popCharStream;
     lexer->emit						=  emit;
     lexer->emitNew					=  emitNew;
     lexer->matchs					=  matchs;
     lexer->matchc					=  matchc;
     lexer->matchRange				=  matchRange;
     lexer->matchAny					=  matchAny;
     lexer->recover					=  recover;
     lexer->getLine					=  getLine;
     lexer->getCharIndex				=  getCharIndex;
     lexer->getCharPositionInLine    =  getCharPositionInLine;
     lexer->getText					=  getText;
     lexer->free						=  freeLexer;

     /* Initialise the eof token
      */
     specialT					= &(lexer->rec->state->tokSource->eofToken);
     antlr3SetTokenAPI	  (specialT);
     specialT->setType	  (specialT, ANTLR3_TOKEN_EOF);
     specialT->factoryMade		= ANTLR3_TRUE;					// Prevent things trying to free() it
     specialT->strFactory        = NULL;
 	specialT->textState			= ANTLR3_TEXT_NONE;
 	specialT->custom			= NULL;
 	specialT->user1				= 0;
 	specialT->user2				= 0;
 	specialT->user3				= 0;

 	// Initialize the skip token.
 	//
     specialT					= &(lexer->rec->state->tokSource->skipToken);
     antlr3SetTokenAPI	  (specialT);
     specialT->setType	  (specialT, ANTLR3_TOKEN_INVALID);
     specialT->factoryMade		= ANTLR3_TRUE;					// Prevent things trying to free() it
     specialT->strFactory        = NULL;
 	specialT->custom			= NULL;
 	specialT->user1				= 0;
 	specialT->user2				= 0;
 	specialT->user3				= 0;
     return  lexer;
 }

 static void
 reset	(pANTLR3_BASE_RECOGNIZER rec)
 {
     pANTLR3_LEXER   lexer;

     lexer   = rec->super;

     lexer->rec->state->token			    = NULL;
     lexer->rec->state->type			    = ANTLR3_TOKEN_INVALID;
     lexer->rec->state->channel			    = ANTLR3_TOKEN_DEFAULT_CHANNEL;
     lexer->rec->state->tokenStartCharIndex	    = -1;
     lexer->rec->state->tokenStartCharPositionInLine = -1;
     lexer->rec->state->tokenStartLine		    = -1;

     lexer->rec->state->text	                    = NULL;

     // OK - that's all hunky dory, but we may well have had
     // a token factory that needs a reset. Do that here
     //
     if  (lexer->rec->state->tokFactory != NULL)
     {
         lexer->rec->state->tokFactory->reset(lexer->rec->state->tokFactory);
     }
 }

 ///
 /// \brief
 /// Returns the next available token from the current input stream.
 ///
 /// \param toksource
 /// Points to the implementation of a token source. The lexer is
 /// addressed by the super structure pointer.
 ///
 /// \returns
 /// The next token in the current input stream or the EOF token
 /// if there are no more tokens.
 ///
 /// \remarks
 /// Write remarks for nextToken here.
 ///
 /// \see nextToken
 ///
 ANTLR3_INLINE static pANTLR3_COMMON_TOKEN
 nextTokenStr	    (pANTLR3_TOKEN_SOURCE toksource)
 {
     pANTLR3_LEXER                   lexer;
     pANTLR3_RECOGNIZER_SHARED_STATE state;
     pANTLR3_INPUT_STREAM            input;
     pANTLR3_INT_STREAM              istream;

     lexer   = (pANTLR3_LEXER)(toksource->super);
     state   = lexer->rec->state;
     input   = lexer->input;
     istream = input->istream;

     /// Loop until we get a non skipped token or EOF
     ///
     for	(;;)
     {
         // Get rid of any previous token (token factory takes care of
         // any de-allocation when this token is finally used up.
         //
         state->token		    = NULL;
         state->error		    = ANTLR3_FALSE;	    // Start out without an exception
         state->failed		    = ANTLR3_FALSE;

         // Now call the matching rules and see if we can generate a new token
         //
         for	(;;)
         {
             // Record the start of the token in our input stream.
             //
             state->channel			    = ANTLR3_TOKEN_DEFAULT_CHANNEL;
             state->tokenStartCharIndex	            = (ANTLR3_MARKER)(((pANTLR3_UINT8)input->nextChar));
             state->tokenStartCharPositionInLine     = input->charPositionInLine;
             state->tokenStartLine		    = input->line;
             state->text			            = NULL;
             state->custom                           = NULL;
             state->user1                            = 0;
             state->user2                            = 0;
             state->user3                            = 0;

             if  (istream->_LA(istream, 1) == ANTLR3_CHARSTREAM_EOF)
             {
                 // Reached the end of the current stream, nothing more to do if this is
                 // the last in the stack.
                 //
                 pANTLR3_COMMON_TOKEN    teof = &(toksource->eofToken);

                 teof->setStartIndex (teof, lexer->getCharIndex(lexer));
                 teof->setStopIndex  (teof, lexer->getCharIndex(lexer));
                 teof->setLine	    (teof, lexer->getLine(lexer));
                 teof->factoryMade = ANTLR3_TRUE;	// This isn't really manufactured but it stops things from trying to free it
                 return  teof;
             }

             state->token		= NULL;
             state->error		= ANTLR3_FALSE;	    // Start out without an exception
             state->failed		= ANTLR3_FALSE;

             // Call the generated lexer, see if it can get a new token together.
             //
             lexer->mTokens(lexer->ctx);

             if  (state->error  == ANTLR3_TRUE)
             {
                 // Recognition exception, report it and try to recover.
                 //
                 state->failed	    = ANTLR3_TRUE;
                 lexer->rec->reportError(lexer->rec);
                 lexer->recover(lexer);
             }
             else
             {
                 if (state->token == NULL)
                 {
                     // Emit the real token, which adds it in to the token stream basically
                     //
                     emit(lexer);
                 }
                 else if	(state->token ==  &(toksource->skipToken))
                 {
                     // A real token could have been generated, but "Computer say's naaaaah" and it
                     // it is just something we need to skip altogether.
                     //
                     continue;
                 }

                 // Good token, not skipped, not EOF token
                 //
                 return  state->token;
             }
         }
     }
 }

 /**
  * \brief
  * Default implementation of the nextToken() call for a lexer.
  *
  * \param toksource
  * Points to the implementation of a token source. The lexer is
  * addressed by the super structure pointer.
  *
  * \returns
  * The next token in the current input stream or the EOF token
  * if there are no more tokens in any input stream in the stack.
  *
  * Write detailed description for nextToken here.
  *
  * \remarks
  * Write remarks for nextToken here.
  *
  * \see nextTokenStr
  */
 static pANTLR3_COMMON_TOKEN
 nextToken	    (pANTLR3_TOKEN_SOURCE toksource)
 {
 	pANTLR3_COMMON_TOKEN tok;

 	// Find the next token in the current stream
 	//
 	tok = nextTokenStr(toksource);

 	// If we got to the EOF token then switch to the previous
 	// input stream if there were any and just return the
 	// EOF if there are none. We must check the next token
 	// in any outstanding input stream we pop into the active
 	// role to see if it was sitting at EOF after PUSHing the
 	// stream we just consumed, otherwise we will return EOF
 	// on the reinstalled input stream, when in actual fact
 	// there might be more input streams to POP before the
 	// real EOF of the whole logical inptu stream. Hence we
 	// use a while loop here until we find somethign in the stream
 	// that isn't EOF or we reach the actual end of the last input
 	// stream on the stack.
 	//
 	while	(tok->type == ANTLR3_TOKEN_EOF)
 	{
 		pANTLR3_LEXER   lexer;

 		lexer   = (pANTLR3_LEXER)(toksource->super);

 		if  (lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0)
 		{
 			// We have another input stream in the stack so we
 			// need to revert to it, then resume the loop to check
 			// it wasn't sitting at EOF itself.
 			//
 			lexer->popCharStream(lexer);
 			tok = nextTokenStr(toksource);
 		}
 		else
 		{
 			// There were no more streams on the input stack
 			// so this EOF is the 'real' logical EOF for
 			// the input stream. So we just exit the loop and
 			// return the EOF we have found.
 			//
 			break;
 		}

 	}

 	// return whatever token we have, which may be EOF
 	//
 	return  tok;
 }

 ANTLR3_API pANTLR3_LEXER
 antlr3LexerNewStream(ANTLR3_UINT32 sizeHint, pANTLR3_INPUT_STREAM input, pANTLR3_RECOGNIZER_SHARED_STATE state)
 {
     pANTLR3_LEXER   lexer;

     // Create a basic lexer first
     //
     lexer   = antlr3LexerNew(sizeHint, state);

     if	(lexer != NULL)
     {
 		// Install the input stream and reset the lexer
 		//
 		setCharStream(lexer, input);
     }

     return  lexer;
 }

 static void mTokens	    (pANTLR3_LEXER lexer)
 {
     if	(lexer)	    // Fool compiler, avoid pragmas
     {
 		ANTLR3_FPRINTF(stderr, "lexer->mTokens(): Error: No lexer rules were added to the lexer yet!\n");
     }
 }

 static void
 reportError		    (pANTLR3_BASE_RECOGNIZER rec)
 {
     // Indicate this recognizer had an error while processing.
 	//
 	rec->state->errorCount++;

     rec->displayRecognitionError(rec, rec->state->tokenNames);
 }

 #ifdef	ANTLR3_WINDOWS
 #pragma warning( disable : 4100 )
 #endif

 /** Default lexer error handler (works for 8 bit streams only!!!)
  */
 static void
 displayRecognitionError	    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames)
 {
     pANTLR3_LEXER			lexer;
 	pANTLR3_EXCEPTION	    ex;
 	pANTLR3_STRING			ftext;

     lexer   = (pANTLR3_LEXER)(recognizer->super);
 	ex		= lexer->rec->state->exception;

 	// See if there is a 'filename' we can use
     //
     if	(ex->name == NULL)
     {
 		ANTLR3_FPRINTF(stderr, "-unknown source-(");
     }
     else
     {
 		ftext = ex->streamName->to8(ex->streamName);
 		ANTLR3_FPRINTF(stderr, "%s(", ftext->chars);
     }

     ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line);
     ANTLR3_FPRINTF(stderr, ": lexer error %d :\n\t%s at offset %d, ",
 						ex->type,
 						(pANTLR3_UINT8)	   (ex->message),
 					    ex->charPositionInLine+1
 		    );
 	{
 		ANTLR3_INT32	width;

 		width	= ANTLR3_UINT32_CAST(( (pANTLR3_UINT8)(lexer->input->data) + (lexer->input->size(lexer->input) )) - (pANTLR3_UINT8)(ex->index));

 		if	(width >= 1)
 		{
 			if	(isprint(ex->c))
 			{
 				ANTLR3_FPRINTF(stderr, "near '%c' :\n", ex->c);
 			}
 			else
 			{
 				ANTLR3_FPRINTF(stderr, "near char(%#02X) :\n", (ANTLR3_UINT8)(ex->c));
 			}
 			ANTLR3_FPRINTF(stderr, "\t%.*s\n", width > 20 ? 20 : width ,((pANTLR3_UINT8)ex->index));
 		}
 		else
 		{
 			ANTLR3_FPRINTF(stderr, "(end of input).\n\t This indicates a poorly specified lexer RULE\n\t or unterminated input element such as: \"STRING[\"]\n");
 			ANTLR3_FPRINTF(stderr, "\t The lexer was matching from line %d, offset %d, which\n\t ",
 								(ANTLR3_UINT32)(lexer->rec->state->tokenStartLine),
 								(ANTLR3_UINT32)(lexer->rec->state->tokenStartCharPositionInLine)
 								);
 			width = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)(lexer->input->data)+(lexer->input->size(lexer->input))) - (pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex));

 			if	(width >= 1)
 			{
 				ANTLR3_FPRINTF(stderr, "looks like this:\n\t\t%.*s\n", width > 20 ? 20 : width ,(pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex));
 			}
 			else
 			{
 				ANTLR3_FPRINTF(stderr, "is also the end of the line, so you must check your lexer rules\n");
 			}
 		}
 	}
 }

 static void setCharStream   (pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input)
 {
     /* Install the input interface
      */
     lexer->input	= input;

     /* We may need a token factory for the lexer; we don't destroy any existing factory
      * until the lexer is destroyed, as people may still be using the tokens it produced.
      * TODO: Later I will provide a dup() method for a token so that it can extract itself
      * out of the factory.
      */
     if	(lexer->rec->state->tokFactory == NULL)
     {
 	lexer->rec->state->tokFactory	= antlr3TokenFactoryNew(input);
     }
     else
     {
 	/* When the input stream is being changed on the fly, rather than
 	 * at the start of a new lexer, then we must tell the tokenFactory
 	 * which input stream to adorn the tokens with so that when they
 	 * are asked to provide their original input strings they can
 	 * do so from the correct text stream.
 	 */
 	lexer->rec->state->tokFactory->setInputStream(lexer->rec->state->tokFactory, input);
     }

     /* Propagate the string factory so that we preserve the encoding form from
      * the input stream.
      */
     if	(lexer->rec->state->tokSource->strFactory == NULL)
     {
         lexer->rec->state->tokSource->strFactory	= input->strFactory;

         // Set the newly acquired string factory up for our pre-made tokens
         // for EOF.
         //
         if (lexer->rec->state->tokSource->eofToken.strFactory == NULL)
         {
             lexer->rec->state->tokSource->eofToken.strFactory = input->strFactory;
         }
     }

     /* This is a lexer, install the appropriate exception creator
      */
     lexer->rec->exConstruct = antlr3RecognitionExceptionNew;

     /* Set the current token to nothing
      */
     lexer->rec->state->token		= NULL;
     lexer->rec->state->text			= NULL;
     lexer->rec->state->tokenStartCharIndex	= -1;

     /* Copy the name of the char stream to the token source
      */
     lexer->rec->state->tokSource->fileName = input->fileName;
 }

 /*!
  * \brief
  * Change to a new input stream, remembering the old one.
  *
  * \param lexer
  * Pointer to the lexer instance to switch input streams for.
  *
  * \param input
  * New input stream to install as the current one.
  *
  * Switches the current character input stream to
  * a new one, saving the old one, which we will revert to at the end of this
  * new one.
  */
 static void
 pushCharStream  (pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input)
 {
 	// Do we need a new input stream stack?
 	//
 	if	(lexer->rec->state->streams == NULL)
 	{
 		// This is the first call to stack a new
 		// stream and so we must create the stack first.
 		//
 		lexer->rec->state->streams = antlr3StackNew(0);

 		if  (lexer->rec->state->streams == NULL)
 		{
 			// Could not do this, we just fail to push it.
 			// TODO: Consider if this is what we want to do, but then
 			//       any programmer can override this method to do something else.
 			return;
 		}
 	}

 	// We have a stack, so we can save the current input stream
 	// into it.
 	//
 	lexer->input->istream->mark(lexer->input->istream);
 	lexer->rec->state->streams->push(lexer->rec->state->streams, lexer->input, NULL);

 	// And now we can install this new one
 	//
 	lexer->setCharStream(lexer, input);
 }

 /*!
  * \brief
  * Stops using the current input stream and reverts to any prior
  * input stream on the stack.
  *
  * \param lexer
  * Description of parameter lexer.
  *
  * Pointer to a function that abandons the current input stream, whether it
  * is empty or not and reverts to the previous stacked input stream.
  *
  * \remark
  * The function fails silently if there are no prior input streams.
  */
 static void
 popCharStream   (pANTLR3_LEXER lexer)
 {
     pANTLR3_INPUT_STREAM input;

     // If we do not have a stream stack or we are already at the
     // stack bottom, then do nothing.
     //
     if	(lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0)
     {
 	// We just leave the current stream to its fate, we do not close
 	// it or anything as we do not know what the programmer intended
 	// for it. This method can always be overridden of course.
 	// So just find out what was currently saved on the stack and use
 	// that now, then pop it from the stack.
 	//
 	input	= (pANTLR3_INPUT_STREAM)(lexer->rec->state->streams->top);
 	lexer->rec->state->streams->pop(lexer->rec->state->streams);

 	// Now install the stream as the current one.
 	//
 	lexer->setCharStream(lexer, input);
 	lexer->input->istream->rewindLast(lexer->input->istream);
     }
     return;
 }

 static void emitNew	    (pANTLR3_LEXER lexer,  pANTLR3_COMMON_TOKEN token)
 {
     lexer->rec->state->token    = token;	/* Voila!   */
 }

 static pANTLR3_COMMON_TOKEN
 emit	    (pANTLR3_LEXER lexer)
 {
     pANTLR3_COMMON_TOKEN	token;

     /* We could check pointers to token factories and so on, but
     * we are in code that we want to run as fast as possible
     * so we are not checking any errors. So make sure you have installed an input stream before
     * trying to emit a new token.
     */
     token   = lexer->rec->state->tokFactory->newToken(lexer->rec->state->tokFactory);

     /* Install the supplied information, and some other bits we already know
     * get added automatically, such as the input stream it is associated with
     * (though it can all be overridden of course)
     */
     token->type		    = lexer->rec->state->type;
     token->channel	    = lexer->rec->state->channel;
     token->start	    = lexer->rec->state->tokenStartCharIndex;
     token->stop		    = lexer->getCharIndex(lexer) - 1;
     token->line		    = lexer->rec->state->tokenStartLine;
     token->charPosition	= lexer->rec->state->tokenStartCharPositionInLine;

     if	(lexer->rec->state->text != NULL)
     {
         token->textState	    = ANTLR3_TEXT_STRING;
         token->tokText.text	    = lexer->rec->state->text;
     }
     else
     {
         token->textState	= ANTLR3_TEXT_NONE;
     }
     token->lineStart	= lexer->input->currentLine;
     token->user1	= lexer->rec->state->user1;
     token->user2	= lexer->rec->state->user2;
     token->user3	= lexer->rec->state->user3;
     token->custom	= lexer->rec->state->custom;

     lexer->rec->state->token	    = token;

     return  token;
 }

 /**
  * Free the resources allocated by a lexer
  */
 static void
 freeLexer    (pANTLR3_LEXER lexer)
 {
 	// This may have ben a delegate or delegator lexer, in which case the
 	// state may already have been freed (and set to NULL therefore)
 	// so we ignore the state if we don't have it.
 	//
 	if	(lexer->rec->state != NULL)
 	{
 		if	(lexer->rec->state->streams != NULL)
 		{
 			lexer->rec->state->streams->free(lexer->rec->state->streams);
 		}
 		if	(lexer->rec->state->tokFactory != NULL)
 		{
 			lexer->rec->state->tokFactory->close(lexer->rec->state->tokFactory);
 			lexer->rec->state->tokFactory = NULL;
 		}
 		if	(lexer->rec->state->tokSource != NULL)
 		{
 			ANTLR3_FREE(lexer->rec->state->tokSource);
 			lexer->rec->state->tokSource = NULL;
 		}
 	}
 	if	(lexer->rec != NULL)
 	{
 		lexer->rec->free(lexer->rec);
 		lexer->rec = NULL;
 	}
 	ANTLR3_FREE(lexer);
 }

 /** Implementation of matchs for the lexer, overrides any
  *  base implementation in the base recognizer.
  *
  *  \remark
  *  Note that the generated code lays down arrays of ints for constant
  *  strings so that they are int UTF32 form!
  */
 static ANTLR3_BOOLEAN
 matchs(pANTLR3_LEXER lexer, ANTLR3_UCHAR * string)
 {
 	while   (*string != ANTLR3_STRING_TERMINATOR)
 	{
 		if  (lexer->input->istream->_LA(lexer->input->istream, 1) != (*string))
 		{
 			if	(lexer->rec->state->backtracking > 0)
 			{
 				lexer->rec->state->failed = ANTLR3_TRUE;
 				return ANTLR3_FALSE;
 			}

 			lexer->rec->exConstruct(lexer->rec);
 			lexer->rec->state->failed	 = ANTLR3_TRUE;

 			/* TODO: Implement exception creation more fully perhaps
 			 */
 			lexer->recover(lexer);
 			return  ANTLR3_FALSE;
 		}

 		/* Matched correctly, do consume it
 		 */
 		lexer->input->istream->consume(lexer->input->istream);
 		string++;

 		/* Reset any failed indicator
 		 */
 		lexer->rec->state->failed = ANTLR3_FALSE;
 	}


 	return  ANTLR3_TRUE;
 }

 /** Implementation of matchc for the lexer, overrides any
  *  base implementation in the base recognizer.
  *
  *  \remark
  *  Note that the generated code lays down arrays of ints for constant
  *  strings so that they are int UTF32 form!
  */
 static ANTLR3_BOOLEAN
 matchc(pANTLR3_LEXER lexer, ANTLR3_UCHAR c)
 {
 	if	(lexer->input->istream->_LA(lexer->input->istream, 1) == c)
 	{
 		/* Matched correctly, do consume it
 		 */
 		lexer->input->istream->consume(lexer->input->istream);

 		/* Reset any failed indicator
 		 */
 		lexer->rec->state->failed = ANTLR3_FALSE;

 		return	ANTLR3_TRUE;
 	}

 	/* Failed to match, exception and recovery time.
 	 */
 	if	(lexer->rec->state->backtracking > 0)
 	{
 		lexer->rec->state->failed  = ANTLR3_TRUE;
 		return	ANTLR3_FALSE;
 	}

 	lexer->rec->exConstruct(lexer->rec);

 	/* TODO: Implement exception creation more fully perhaps
 	 */
 	lexer->recover(lexer);

 	return  ANTLR3_FALSE;
 }

 /** Implementation of match range for the lexer, overrides any
  *  base implementation in the base recognizer.
  *
  *  \remark
  *  Note that the generated code lays down arrays of ints for constant
  *  strings so that they are int UTF32 form!
  */
 static ANTLR3_BOOLEAN
 matchRange(pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high)
 {
     ANTLR3_UCHAR    c;

     /* What is in the stream at the moment?
      */
     c	= lexer->input->istream->_LA(lexer->input->istream, 1);
     if	( c >= low && c <= high)
     {
 	/* Matched correctly, consume it
 	 */
 	lexer->input->istream->consume(lexer->input->istream);

 	/* Reset any failed indicator
 	 */
 	lexer->rec->state->failed = ANTLR3_FALSE;

 	return	ANTLR3_TRUE;
     }

     /* Failed to match, execption and recovery time.
      */

     if	(lexer->rec->state->backtracking > 0)
     {
 	lexer->rec->state->failed  = ANTLR3_TRUE;
 	return	ANTLR3_FALSE;
     }

     lexer->rec->exConstruct(lexer->rec);

     /* TODO: Implement exception creation more fully
      */
     lexer->recover(lexer);

     return  ANTLR3_FALSE;
 }

 static void
 matchAny	    (pANTLR3_LEXER lexer)
 {
     lexer->input->istream->consume(lexer->input->istream);
 }

 static void
 recover	    (pANTLR3_LEXER lexer)
 {
     lexer->input->istream->consume(lexer->input->istream);
 }

 static ANTLR3_UINT32
 getLine	    (pANTLR3_LEXER lexer)
 {
     return  lexer->input->getLine(lexer->input);
 }

 static ANTLR3_UINT32
 getCharPositionInLine	(pANTLR3_LEXER lexer)
 {
     return  lexer->input->charPositionInLine;
 }

 static ANTLR3_MARKER	getCharIndex	    (pANTLR3_LEXER lexer)
 {
     return lexer->input->istream->index(lexer->input->istream);
 }

 static pANTLR3_STRING
 getText	    (pANTLR3_LEXER lexer)
 {
 	if (lexer->rec->state->text)
 	{
 		return	lexer->rec->state->text;

 	}
 	return  lexer->input->substr(
 									lexer->input,
 									lexer->rec->state->tokenStartCharIndex,
 									lexer->getCharIndex(lexer) - lexer->input->charByteSize
 							);

 }

 static void *
 getCurrentInputSymbol		(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream)
 {
 	return NULL;
 }

 static void *
 getMissingSymbol			(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM	istream, pANTLR3_EXCEPTION	e,
 									ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow)
 {
 	return NULL;
 }