| /** \file |
| * Contains the base functions that all recognizers require. |
| * Any function can be overridden by a lexer/parser/tree parser or by the |
| * ANTLR3 programmer. |
| * |
| * \addtogroup pANTLR3_BASE_RECOGNIZER |
| * @{ |
| */ |
| #include <antlr3baserecognizer.h> |
| |
| // [The "BSD licence"] |
| // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC |
| // http://www.temporal-wave.com |
| // http://www.linkedin.com/in/jimidle |
| // |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions |
| // are met: |
| // 1. Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // 2. Redistributions in binary form must reproduce the above copyright |
| // notice, this list of conditions and the following disclaimer in the |
| // documentation and/or other materials provided with the distribution. |
| // 3. The name of the author may not be used to endorse or promote products |
| // derived from this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #ifdef ANTLR3_WINDOWS |
| #pragma warning( disable : 4100 ) |
| #endif |
| |
| /* Interface functions -standard implementations cover parser and treeparser |
| * almost completely but are overridden by the parser or tree parser as needed. Lexer overrides |
| * most of these functions. |
| */ |
| static void beginResync (pANTLR3_BASE_RECOGNIZER recognizer); |
| static pANTLR3_BITSET computeErrorRecoverySet (pANTLR3_BASE_RECOGNIZER recognizer); |
| static void endResync (pANTLR3_BASE_RECOGNIZER recognizer); |
| static void beginBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level); |
| static void endBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful); |
| |
| static void * match (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow); |
| static void matchAny (pANTLR3_BASE_RECOGNIZER recognizer); |
| static void mismatch (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow); |
| static ANTLR3_BOOLEAN mismatchIsUnwantedToken (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype); |
| static ANTLR3_BOOLEAN mismatchIsMissingToken (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow); |
| static void reportError (pANTLR3_BASE_RECOGNIZER recognizer); |
| static pANTLR3_BITSET computeCSRuleFollow (pANTLR3_BASE_RECOGNIZER recognizer); |
| static pANTLR3_BITSET combineFollows (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact); |
| static void displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames); |
| static void recover (pANTLR3_BASE_RECOGNIZER recognizer); |
| static void * recoverFromMismatchedToken (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow); |
| static void * recoverFromMismatchedSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow); |
| static ANTLR3_BOOLEAN recoverFromMismatchedElement(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow); |
| static void consumeUntil (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType); |
| static void consumeUntilSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set); |
| static pANTLR3_STACK getRuleInvocationStack (pANTLR3_BASE_RECOGNIZER recognizer); |
| static pANTLR3_STACK getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name); |
| static pANTLR3_HASH_TABLE toStrings (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE); |
| static ANTLR3_MARKER getRuleMemoization (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart); |
| static ANTLR3_BOOLEAN alreadyParsedRule (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex); |
| static void memoize (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart); |
| static ANTLR3_BOOLEAN synpred (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx)); |
| static void reset (pANTLR3_BASE_RECOGNIZER recognizer); |
| static void freeBR (pANTLR3_BASE_RECOGNIZER recognizer); |
| static void * getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream); |
| static void * getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e, |
| ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow); |
| static ANTLR3_UINT32 getNumberOfSyntaxErrors (pANTLR3_BASE_RECOGNIZER recognizer); |
| |
| ANTLR3_API pANTLR3_BASE_RECOGNIZER |
| antlr3BaseRecognizerNew(ANTLR3_UINT32 type, ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state) |
| { |
| pANTLR3_BASE_RECOGNIZER recognizer; |
| |
| // Allocate memory for the structure |
| // |
| recognizer = (pANTLR3_BASE_RECOGNIZER) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_BASE_RECOGNIZER)); |
| |
| if (recognizer == NULL) |
| { |
| // Allocation failed |
| // |
| return NULL; |
| } |
| |
| |
| // If we have been supplied with a pre-existing recognizer state |
| // then we just install it, otherwise we must create one from scratch |
| // |
| if (state == NULL) |
| { |
| recognizer->state = (pANTLR3_RECOGNIZER_SHARED_STATE) ANTLR3_CALLOC(1, (size_t)sizeof(ANTLR3_RECOGNIZER_SHARED_STATE)); |
| |
| if (recognizer->state == NULL) |
| { |
| ANTLR3_FREE(recognizer); |
| return NULL; |
| } |
| |
| // Initialize any new recognizer state |
| // |
| recognizer->state->errorRecovery = ANTLR3_FALSE; |
| recognizer->state->lastErrorIndex = -1; |
| recognizer->state->failed = ANTLR3_FALSE; |
| recognizer->state->errorCount = 0; |
| recognizer->state->backtracking = 0; |
| recognizer->state->following = NULL; |
| recognizer->state->ruleMemo = NULL; |
| recognizer->state->tokenNames = NULL; |
| recognizer->state->sizeHint = sizeHint; |
| recognizer->state->tokSource = NULL; |
| recognizer->state->tokFactory = NULL; |
| |
| // Rather than check to see if we must initialize |
| // the stack every time we are asked for an new rewrite stream |
| // we just always create an empty stack and then just |
| // free it when the base recognizer is freed. |
| // |
| recognizer->state->rStreams = antlr3VectorNew(0); // We don't know the size. |
| |
| if (recognizer->state->rStreams == NULL) |
| { |
| // Out of memory |
| // |
| ANTLR3_FREE(recognizer->state); |
| ANTLR3_FREE(recognizer); |
| return NULL; |
| } |
| } |
| else |
| { |
| // Install the one we were given, and do not reset it here |
| // as it will either already have been initialized or will |
| // be in a state that needs to be preserved. |
| // |
| recognizer->state = state; |
| } |
| |
| // Install the BR API |
| // |
| recognizer->alreadyParsedRule = alreadyParsedRule; |
| recognizer->beginResync = beginResync; |
| recognizer->combineFollows = combineFollows; |
| recognizer->beginBacktrack = beginBacktrack; |
| recognizer->endBacktrack = endBacktrack; |
| recognizer->computeCSRuleFollow = computeCSRuleFollow; |
| recognizer->computeErrorRecoverySet = computeErrorRecoverySet; |
| recognizer->consumeUntil = consumeUntil; |
| recognizer->consumeUntilSet = consumeUntilSet; |
| recognizer->displayRecognitionError = displayRecognitionError; |
| recognizer->endResync = endResync; |
| recognizer->exConstruct = antlr3MTExceptionNew; |
| recognizer->getRuleInvocationStack = getRuleInvocationStack; |
| recognizer->getRuleInvocationStackNamed = getRuleInvocationStackNamed; |
| recognizer->getRuleMemoization = getRuleMemoization; |
| recognizer->match = match; |
| recognizer->matchAny = matchAny; |
| recognizer->memoize = memoize; |
| recognizer->mismatch = mismatch; |
| recognizer->mismatchIsUnwantedToken = mismatchIsUnwantedToken; |
| recognizer->mismatchIsMissingToken = mismatchIsMissingToken; |
| recognizer->recover = recover; |
| recognizer->recoverFromMismatchedElement= recoverFromMismatchedElement; |
| recognizer->recoverFromMismatchedSet = recoverFromMismatchedSet; |
| recognizer->recoverFromMismatchedToken = recoverFromMismatchedToken; |
| recognizer->getNumberOfSyntaxErrors = getNumberOfSyntaxErrors; |
| recognizer->reportError = reportError; |
| recognizer->reset = reset; |
| recognizer->synpred = synpred; |
| recognizer->toStrings = toStrings; |
| recognizer->getCurrentInputSymbol = getCurrentInputSymbol; |
| recognizer->getMissingSymbol = getMissingSymbol; |
| recognizer->debugger = NULL; |
| |
| recognizer->free = freeBR; |
| |
| /* Initialize variables |
| */ |
| recognizer->type = type; |
| |
| |
| return recognizer; |
| } |
| static void |
| freeBR (pANTLR3_BASE_RECOGNIZER recognizer) |
| { |
| pANTLR3_EXCEPTION thisE; |
| |
| // Did we have a state allocated? |
| // |
| if (recognizer->state != NULL) |
| { |
| // Free any rule memoization we set up |
| // |
| if (recognizer->state->ruleMemo != NULL) |
| { |
| recognizer->state->ruleMemo->free(recognizer->state->ruleMemo); |
| recognizer->state->ruleMemo = NULL; |
| } |
| |
| // Free any exception space we have left around |
| // |
| thisE = recognizer->state->exception; |
| if (thisE != NULL) |
| { |
| thisE->freeEx(thisE); |
| } |
| |
| // Free any rewrite streams we have allocated |
| // |
| if (recognizer->state->rStreams != NULL) |
| { |
| recognizer->state->rStreams->free(recognizer->state->rStreams); |
| } |
| |
| // Free up any token factory we created (error recovery for instance) |
| // |
| if (recognizer->state->tokFactory != NULL) |
| { |
| recognizer->state->tokFactory->close(recognizer->state->tokFactory); |
| } |
| // Free the shared state memory |
| // |
| ANTLR3_FREE(recognizer->state); |
| } |
| |
| // Free the actual recognizer space |
| // |
| ANTLR3_FREE(recognizer); |
| } |
| |
| /** |
| * Creates a new Mismatched Token Exception and inserts in the recognizer |
| * exception stack. |
| * |
| * \param recognizer |
| * Context pointer for this recognizer |
| * |
| */ |
| ANTLR3_API void |
| antlr3MTExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer) |
| { |
| /* Create a basic recognition exception structure |
| */ |
| antlr3RecognitionExceptionNew(recognizer); |
| |
| /* Now update it to indicate this is a Mismatched token exception |
| */ |
| recognizer->state->exception->name = ANTLR3_MISMATCHED_EX_NAME; |
| recognizer->state->exception->type = ANTLR3_MISMATCHED_TOKEN_EXCEPTION; |
| |
| return; |
| } |
| |
| ANTLR3_API void |
| antlr3RecognitionExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer) |
| { |
| pANTLR3_EXCEPTION ex; |
| pANTLR3_LEXER lexer; |
| pANTLR3_PARSER parser; |
| pANTLR3_TREE_PARSER tparser; |
| |
| pANTLR3_INPUT_STREAM ins; |
| pANTLR3_INT_STREAM is; |
| pANTLR3_COMMON_TOKEN_STREAM cts; |
| pANTLR3_TREE_NODE_STREAM tns; |
| |
| ins = NULL; |
| cts = NULL; |
| tns = NULL; |
| is = NULL; |
| lexer = NULL; |
| parser = NULL; |
| tparser = NULL; |
| |
| switch (recognizer->type) |
| { |
| case ANTLR3_TYPE_LEXER: |
| |
| lexer = (pANTLR3_LEXER) (recognizer->super); |
| ins = lexer->input; |
| is = ins->istream; |
| |
| break; |
| |
| case ANTLR3_TYPE_PARSER: |
| |
| parser = (pANTLR3_PARSER) (recognizer->super); |
| cts = (pANTLR3_COMMON_TOKEN_STREAM)(parser->tstream->super); |
| is = parser->tstream->istream; |
| |
| break; |
| |
| case ANTLR3_TYPE_TREE_PARSER: |
| |
| tparser = (pANTLR3_TREE_PARSER) (recognizer->super); |
| tns = tparser->ctnstream->tnstream; |
| is = tns->istream; |
| |
| break; |
| |
| default: |
| |
| ANTLR3_FPRINTF(stderr, "Base recognizer function antlr3RecognitionExceptionNew called by unknown parser type - provide override for this function\n"); |
| return; |
| |
| break; |
| } |
| |
| /* Create a basic exception structure |
| */ |
| ex = antlr3ExceptionNew(ANTLR3_RECOGNITION_EXCEPTION, |
| (void *)ANTLR3_RECOGNITION_EX_NAME, |
| NULL, |
| ANTLR3_FALSE); |
| |
| /* Rest of information depends on the base type of the |
| * input stream. |
| */ |
| switch (is->type & ANTLR3_INPUT_MASK) |
| { |
| case ANTLR3_CHARSTREAM: |
| |
| ex->c = is->_LA (is, 1); /* Current input character */ |
| ex->line = ins->getLine (ins); /* Line number comes from stream */ |
| ex->charPositionInLine = ins->getCharPositionInLine (ins); /* Line offset also comes from the stream */ |
| ex->index = is->index (is); |
| ex->streamName = ins->fileName; |
| ex->message = "Unexpected character"; |
| break; |
| |
| case ANTLR3_TOKENSTREAM: |
| |
| ex->token = cts->tstream->_LT (cts->tstream, 1); /* Current input token */ |
| ex->line = ((pANTLR3_COMMON_TOKEN)(ex->token))->getLine ((pANTLR3_COMMON_TOKEN)(ex->token)); |
| ex->charPositionInLine = ((pANTLR3_COMMON_TOKEN)(ex->token))->getCharPositionInLine ((pANTLR3_COMMON_TOKEN)(ex->token)); |
| ex->index = cts->tstream->istream->index (cts->tstream->istream); |
| if (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF) |
| { |
| ex->streamName = NULL; |
| } |
| else |
| { |
| ex->streamName = ((pANTLR3_COMMON_TOKEN)(ex->token))->input->fileName; |
| } |
| ex->message = "Unexpected token"; |
| break; |
| |
| case ANTLR3_COMMONTREENODE: |
| |
| ex->token = tns->_LT (tns, 1); /* Current input tree node */ |
| ex->line = ((pANTLR3_BASE_TREE)(ex->token))->getLine ((pANTLR3_BASE_TREE)(ex->token)); |
| ex->charPositionInLine = ((pANTLR3_BASE_TREE)(ex->token))->getCharPositionInLine ((pANTLR3_BASE_TREE)(ex->token)); |
| ex->index = tns->istream->index (tns->istream); |
| |
| // Are you ready for this? Deep breath now... |
| // |
| { |
| pANTLR3_COMMON_TREE tnode; |
| |
| tnode = ((pANTLR3_COMMON_TREE)(((pANTLR3_BASE_TREE)(ex->token))->super)); |
| |
| if (tnode->token == NULL) |
| { |
| ex->streamName = ((pANTLR3_BASE_TREE)(ex->token))->strFactory->newStr(((pANTLR3_BASE_TREE)(ex->token))->strFactory, (pANTLR3_UINT8)"-unknown source-"); |
| } |
| else |
| { |
| if (tnode->token->input == NULL) |
| { |
| ex->streamName = NULL; |
| } |
| else |
| { |
| ex->streamName = tnode->token->input->fileName; |
| } |
| } |
| ex->message = "Unexpected node"; |
| } |
| break; |
| } |
| |
| ex->input = is; |
| ex->nextException = recognizer->state->exception; /* So we don't leak the memory */ |
| recognizer->state->exception = ex; |
| recognizer->state->error = ANTLR3_TRUE; /* Exception is outstanding */ |
| |
| return; |
| } |
| |
| |
| /// Match current input symbol against ttype. Upon error, do one token |
| /// insertion or deletion if possible. |
| /// To turn off single token insertion or deletion error |
| /// recovery, override mismatchRecover() and have it call |
| /// plain mismatch(), which does not recover. Then any error |
| /// in a rule will cause an exception and immediate exit from |
| /// rule. Rule would recover by resynchronizing to the set of |
| /// symbols that can follow rule ref. |
| /// |
| static void * |
| match( pANTLR3_BASE_RECOGNIZER recognizer, |
| ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow) |
| { |
| pANTLR3_PARSER parser; |
| pANTLR3_TREE_PARSER tparser; |
| pANTLR3_INT_STREAM is; |
| void * matchedSymbol; |
| |
| switch (recognizer->type) |
| { |
| case ANTLR3_TYPE_PARSER: |
| |
| parser = (pANTLR3_PARSER) (recognizer->super); |
| tparser = NULL; |
| is = parser->tstream->istream; |
| |
| break; |
| |
| case ANTLR3_TYPE_TREE_PARSER: |
| |
| tparser = (pANTLR3_TREE_PARSER) (recognizer->super); |
| parser = NULL; |
| is = tparser->ctnstream->tnstream->istream; |
| |
| break; |
| |
| default: |
| |
| ANTLR3_FPRINTF(stderr, "Base recognizer function 'match' called by unknown parser type - provide override for this function\n"); |
| return ANTLR3_FALSE; |
| |
| break; |
| } |
| |
| // Pick up the current input token/node for assignment to labels |
| // |
| matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is); |
| |
| if (is->_LA(is, 1) == ttype) |
| { |
| // The token was the one we were told to expect |
| // |
| is->consume(is); // Consume that token from the stream |
| recognizer->state->errorRecovery = ANTLR3_FALSE; // Not in error recovery now (if we were) |
| recognizer->state->failed = ANTLR3_FALSE; // The match was a success |
| return matchedSymbol; // We are done |
| } |
| |
| // We did not find the expected token type, if we are backtracking then |
| // we just set the failed flag and return. |
| // |
| if (recognizer->state->backtracking > 0) |
| { |
| // Backtracking is going on |
| // |
| recognizer->state->failed = ANTLR3_TRUE; |
| return matchedSymbol; |
| } |
| |
| // We did not find the expected token and there is no backtracking |
| // going on, so we mismatch, which creates an exception in the recognizer exception |
| // stack. |
| // |
| matchedSymbol = recognizer->recoverFromMismatchedToken(recognizer, ttype, follow); |
| return matchedSymbol; |
| } |
| |
| /// Consumes the next token, whatever it is, and resets the recognizer state |
| /// so that it is not in error. |
| /// |
| /// \param recognizer |
| /// Recognizer context pointer |
| /// |
| static void |
| matchAny(pANTLR3_BASE_RECOGNIZER recognizer) |
| { |
| pANTLR3_PARSER parser; |
| pANTLR3_TREE_PARSER tparser; |
| pANTLR3_INT_STREAM is; |
| |
| switch (recognizer->type) |
| { |
| case ANTLR3_TYPE_PARSER: |
| |
| parser = (pANTLR3_PARSER) (recognizer->super); |
| tparser = NULL; |
| is = parser->tstream->istream; |
| |
| break; |
| |
| case ANTLR3_TYPE_TREE_PARSER: |
| |
| tparser = (pANTLR3_TREE_PARSER) (recognizer->super); |
| parser = NULL; |
| is = tparser->ctnstream->tnstream->istream; |
| |
| break; |
| |
| default: |
| |
| ANTLR3_FPRINTF(stderr, "Base recognizer function 'matchAny' called by unknown parser type - provide override for this function\n"); |
| return; |
| |
| break; |
| } |
| recognizer->state->errorRecovery = ANTLR3_FALSE; |
| recognizer->state->failed = ANTLR3_FALSE; |
| is->consume(is); |
| |
| return; |
| } |
| /// |
| /// |
| static ANTLR3_BOOLEAN |
| mismatchIsUnwantedToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype) |
| { |
| ANTLR3_UINT32 nextt; |
| |
| nextt = is->_LA(is, 2); |
| |
| if (nextt == ttype) |
| { |
| if (recognizer->state->exception != NULL) |
| { |
| recognizer->state->exception->expecting = nextt; |
| } |
| return ANTLR3_TRUE; // This token is unknown, but the next one is the one we wanted |
| } |
| else |
| { |
| return ANTLR3_FALSE; // Neither this token, nor the one following is the one we wanted |
| } |
| } |
| |
| /// |
| /// |
| static ANTLR3_BOOLEAN |
| mismatchIsMissingToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow) |
| { |
| ANTLR3_BOOLEAN retcode; |
| pANTLR3_BITSET followClone; |
| pANTLR3_BITSET viableTokensFollowingThisRule; |
| |
| if (follow == NULL) |
| { |
| // There is no information about the tokens that can follow the last one |
| // hence we must say that the current one we found is not a member of the |
| // follow set and does not indicate a missing token. We will just consume this |
| // single token and see if the parser works it out from there. |
| // |
| return ANTLR3_FALSE; |
| } |
| |
| followClone = NULL; |
| viableTokensFollowingThisRule = NULL; |
| |
| // The C bitset maps are laid down at compile time by the |
| // C code generation. Hence we cannot remove things from them |
| // and so on. So, in order to remove EOR (if we need to) then |
| // we clone the static bitset. |
| // |
| followClone = antlr3BitsetLoad(follow); |
| if (followClone == NULL) |
| { |
| return ANTLR3_FALSE; |
| } |
| |
| // Compute what can follow this grammar reference |
| // |
| if (followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE)) |
| { |
| // EOR can follow, but if we are not the start symbol, we |
| // need to remove it. |
| // |
| //if (recognizer->state->following->vector->count >= 0) ml: always true |
| { |
| followClone->remove(followClone, ANTLR3_EOR_TOKEN_TYPE); |
| } |
| |
| // Now compute the visiable tokens that can follow this rule, according to context |
| // and make them part of the follow set. |
| // |
| viableTokensFollowingThisRule = recognizer->computeCSRuleFollow(recognizer); |
| followClone->borInPlace(followClone, viableTokensFollowingThisRule); |
| } |
| |
| /// if current token is consistent with what could come after set |
| /// then we know we're missing a token; error recovery is free to |
| /// "insert" the missing token |
| /// |
| /// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR |
| /// in follow set to indicate that the fall of the start symbol is |
| /// in the set (EOF can follow). |
| /// |
| if ( followClone->isMember(followClone, is->_LA(is, 1)) |
| || followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE) |
| ) |
| { |
| retcode = ANTLR3_TRUE; |
| } |
| else |
| { |
| retcode = ANTLR3_FALSE; |
| } |
| |
| if (viableTokensFollowingThisRule != NULL) |
| { |
| viableTokensFollowingThisRule->free(viableTokensFollowingThisRule); |
| } |
| if (followClone != NULL) |
| { |
| followClone->free(followClone); |
| } |
| |
| return retcode; |
| |
| } |
| |
| /// Factor out what to do upon token mismatch so tree parsers can behave |
| /// differently. Override and call mismatchRecover(input, ttype, follow) |
| /// to get single token insertion and deletion. Use this to turn off |
| /// single token insertion and deletion. Override mismatchRecover |
| /// to call this instead. |
| /// |
| /// \remark mismatch only works for parsers and must be overridden for anything else. |
| /// |
| static void |
| mismatch(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow) |
| { |
| pANTLR3_PARSER parser; |
| pANTLR3_TREE_PARSER tparser; |
| pANTLR3_INT_STREAM is; |
| |
| // Install a mismatched token exception in the exception stack |
| // |
| antlr3MTExceptionNew(recognizer); |
| recognizer->state->exception->expecting = ttype; |
| |
| switch (recognizer->type) |
| { |
| case ANTLR3_TYPE_PARSER: |
| |
| parser = (pANTLR3_PARSER) (recognizer->super); |
| tparser = NULL; |
| is = parser->tstream->istream; |
| |
| break; |
| |
| default: |
| |
| ANTLR3_FPRINTF(stderr, "Base recognizer function 'mismatch' called by unknown parser type - provide override for this function\n"); |
| return; |
| |
| break; |
| } |
| |
| if (mismatchIsUnwantedToken(recognizer, is, ttype)) |
| { |
| // Create a basic recognition exception structure |
| // |
| antlr3RecognitionExceptionNew(recognizer); |
| |
| // Now update it to indicate this is an unwanted token exception |
| // |
| recognizer->state->exception->name = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME; |
| recognizer->state->exception->type = ANTLR3_UNWANTED_TOKEN_EXCEPTION; |
| |
| return; |
| } |
| |
| if (mismatchIsMissingToken(recognizer, is, follow)) |
| { |
| // Create a basic recognition exception structure |
| // |
| antlr3RecognitionExceptionNew(recognizer); |
| |
| // Now update it to indicate this is an unwanted token exception |
| // |
| recognizer->state->exception->name = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME; |
| recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION; |
| |
| return; |
| } |
| |
| // Just a mismatched token is all we can dtermine |
| // |
| antlr3MTExceptionNew(recognizer); |
| |
| return; |
| } |
| /// Report a recognition problem. |
| /// |
| /// This method sets errorRecovery to indicate the parser is recovering |
| /// not parsing. Once in recovery mode, no errors are generated. |
| /// To get out of recovery mode, the parser must successfully match |
| /// a token (after a resync). So it will go: |
| /// |
| /// 1. error occurs |
| /// 2. enter recovery mode, report error |
| /// 3. consume until token found in resynch set |
| /// 4. try to resume parsing |
| /// 5. next match() will reset errorRecovery mode |
| /// |
| /// If you override, make sure to update errorCount if you care about that. |
| /// |
| static void |
| reportError (pANTLR3_BASE_RECOGNIZER recognizer) |
| { |
| // Invoke the debugger event if there is a debugger listening to us |
| // |
| if (recognizer->debugger != NULL) |
| { |
| recognizer->debugger->recognitionException(recognizer->debugger, recognizer->state->exception); |
| } |
| |
| if (recognizer->state->errorRecovery == ANTLR3_TRUE) |
| { |
| // Already in error recovery so don't display another error while doing so |
| // |
| return; |
| } |
| |
| // Signal we are in error recovery now |
| // |
| recognizer->state->errorRecovery = ANTLR3_TRUE; |
| |
| // Indicate this recognizer had an error while processing. |
| // |
| recognizer->state->errorCount++; |
| |
| // Call the error display routine |
| // |
| recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames); |
| } |
| |
| static void |
| beginBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level) |
| { |
| if (recognizer->debugger != NULL) |
| { |
| recognizer->debugger->beginBacktrack(recognizer->debugger, level); |
| } |
| } |
| |
| static void |
| endBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful) |
| { |
| if (recognizer->debugger != NULL) |
| { |
| recognizer->debugger->endBacktrack(recognizer->debugger, level, successful); |
| } |
| } |
| static void |
| beginResync (pANTLR3_BASE_RECOGNIZER recognizer) |
| { |
| if (recognizer->debugger != NULL) |
| { |
| recognizer->debugger->beginResync(recognizer->debugger); |
| } |
| } |
| |
| static void |
| endResync (pANTLR3_BASE_RECOGNIZER recognizer) |
| { |
| if (recognizer->debugger != NULL) |
| { |
| recognizer->debugger->endResync(recognizer->debugger); |
| } |
| } |
| |
| /// Compute the error recovery set for the current rule. |
| /// Documentation below is from the Java implementation. |
| /// |
| /// During rule invocation, the parser pushes the set of tokens that can |
| /// follow that rule reference on the stack; this amounts to |
| /// computing FIRST of what follows the rule reference in the |
| /// enclosing rule. This local follow set only includes tokens |
| /// from within the rule; i.e., the FIRST computation done by |
| /// ANTLR stops at the end of a rule. |
| // |
| /// EXAMPLE |
| // |
| /// When you find a "no viable alt exception", the input is not |
| /// consistent with any of the alternatives for rule r. The best |
| /// thing to do is to consume tokens until you see something that |
| /// can legally follow a call to r *or* any rule that called r. |
| /// You don't want the exact set of viable next tokens because the |
| /// input might just be missing a token--you might consume the |
| /// rest of the input looking for one of the missing tokens. |
| /// |
| /// Consider grammar: |
| /// |
| /// a : '[' b ']' |
| /// | '(' b ')' |
| /// ; |
| /// b : c '^' INT ; |
| /// c : ID |
| /// | INT |
| /// ; |
| /// |
| /// At each rule invocation, the set of tokens that could follow |
| /// that rule is pushed on a stack. Here are the various "local" |
| /// follow sets: |
| /// |
| /// FOLLOW(b1_in_a) = FIRST(']') = ']' |
| /// FOLLOW(b2_in_a) = FIRST(')') = ')' |
| /// FOLLOW(c_in_b) = FIRST('^') = '^' |
| /// |
| /// Upon erroneous input "[]", the call chain is |
| /// |
| /// a -> b -> c |
| /// |
| /// and, hence, the follow context stack is: |
| /// |
| /// depth local follow set after call to rule |
| /// 0 <EOF> a (from main()) |
| /// 1 ']' b |
| /// 3 '^' c |
| /// |
| /// Notice that ')' is not included, because b would have to have |
| /// been called from a different context in rule a for ')' to be |
| /// included. |
| /// |
| /// For error recovery, we cannot consider FOLLOW(c) |
| /// (context-sensitive or otherwise). We need the combined set of |
| /// all context-sensitive FOLLOW sets--the set of all tokens that |
| /// could follow any reference in the call chain. We need to |
| /// resync to one of those tokens. Note that FOLLOW(c)='^' and if |
| /// we resync'd to that token, we'd consume until EOF. We need to |
| /// sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. |
| /// In this case, for input "[]", LA(1) is in this set so we would |
| /// not consume anything and after printing an error rule c would |
| /// return normally. It would not find the required '^' though. |
| /// At this point, it gets a mismatched token error and throws an |
| /// exception (since LA(1) is not in the viable following token |
| /// set). The rule exception handler tries to recover, but finds |
| /// the same recovery set and doesn't consume anything. Rule b |
| /// exits normally returning to rule a. Now it finds the ']' (and |
| /// with the successful match exits errorRecovery mode). |
| /// |
| /// So, you can see that the parser walks up call chain looking |
| /// for the token that was a member of the recovery set. |
| /// |
| /// Errors are not generated in errorRecovery mode. |
| /// |
| /// ANTLR's error recovery mechanism is based upon original ideas: |
| /// |
| /// "Algorithms + Data Structures = Programs" by Niklaus Wirth |
| /// |
| /// and |
| /// |
| /// "A note on error recovery in recursive descent parsers": |
| /// http://portal.acm.org/citation.cfm?id=947902.947905 |
| /// |
| /// Later, Josef Grosch had some good ideas: |
| /// |
| /// "Efficient and Comfortable Error Recovery in Recursive Descent |
| /// Parsers": |
| /// ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip |
| /// |
| /// Like Grosch I implemented local FOLLOW sets that are combined |
| /// at run-time upon error to avoid overhead during parsing. |
| /// |
| static pANTLR3_BITSET |
| computeErrorRecoverySet (pANTLR3_BASE_RECOGNIZER recognizer) |
| { |
| return recognizer->combineFollows(recognizer, ANTLR3_FALSE); |
| } |
| |
| /// Compute the context-sensitive FOLLOW set for current rule. |
| /// Documentation below is from the Java runtime. |
| /// |
| /// This is the set of token types that can follow a specific rule |
| /// reference given a specific call chain. You get the set of |
| /// viable tokens that can possibly come next (look ahead depth 1) |
| /// given the current call chain. Contrast this with the |
| /// definition of plain FOLLOW for rule r: |
| /// |
| /// FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} |
| /// |
| /// where x in T* and alpha, beta in V*; T is set of terminals and |
| /// V is the set of terminals and non terminals. In other words, |
| /// FOLLOW(r) is the set of all tokens that can possibly follow |
| /// references to r in///any* sentential form (context). At |
| /// runtime, however, we know precisely which context applies as |
| /// we have the call chain. We may compute the exact (rather |
| /// than covering superset) set of following tokens. |
| /// |
| /// For example, consider grammar: |
| /// |
| /// stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} |
| /// | "return" expr '.' |
| /// ; |
| /// expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} |
| /// atom : INT // FOLLOW(atom)=={'+',')',';','.'} |
| /// | '(' expr ')' |
| /// ; |
| /// |
| /// The FOLLOW sets are all inclusive whereas context-sensitive |
| /// FOLLOW sets are precisely what could follow a rule reference. |
| /// For input input "i=(3);", here is the derivation: |
| /// |
| /// stat => ID '=' expr ';' |
| /// => ID '=' atom ('+' atom)* ';' |
| /// => ID '=' '(' expr ')' ('+' atom)* ';' |
| /// => ID '=' '(' atom ')' ('+' atom)* ';' |
| /// => ID '=' '(' INT ')' ('+' atom)* ';' |
| /// => ID '=' '(' INT ')' ';' |
| /// |
| /// At the "3" token, you'd have a call chain of |
| /// |
| /// stat -> expr -> atom -> expr -> atom |
| /// |
| /// What can follow that specific nested ref to atom? Exactly ')' |
| /// as you can see by looking at the derivation of this specific |
| /// input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. |
| /// |
| /// You want the exact viable token set when recovering from a |
| /// token mismatch. Upon token mismatch, if LA(1) is member of |
| /// the viable next token set, then you know there is most likely |
| /// a missing token in the input stream. "Insert" one by just not |
| /// throwing an exception. |
| /// |
| static pANTLR3_BITSET |
| computeCSRuleFollow (pANTLR3_BASE_RECOGNIZER recognizer) |
| { |
| return recognizer->combineFollows(recognizer, ANTLR3_FALSE); |
| } |
| |
| /// Compute the current followset for the input stream. |
| /// |
| static pANTLR3_BITSET |
| combineFollows (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact) |
| { |
| pANTLR3_BITSET followSet; |
| pANTLR3_BITSET localFollowSet; |
| ANTLR3_UINT32 top; |
| ANTLR3_UINT32 i; |
| |
| top = recognizer->state->following->size(recognizer->state->following); |
| |
| followSet = antlr3BitsetNew(0); |
| localFollowSet = NULL; |
| |
| for (i = top; i>0; i--) |
| { |
| localFollowSet = antlr3BitsetLoad((pANTLR3_BITSET_LIST) recognizer->state->following->get(recognizer->state->following, i-1)); |
| |
| if (localFollowSet != NULL) |
| { |
| followSet->borInPlace(followSet, localFollowSet); |
| |
| if (exact == ANTLR3_TRUE) |
| { |
| if (localFollowSet->isMember(localFollowSet, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_FALSE) |
| { |
| // Only leave EOR in the set if at top (start rule); this lets us know |
| // if we have to include the follow(start rule); I.E., EOF |
| // |
| if (i>1) |
| { |
| followSet->remove(followSet, ANTLR3_EOR_TOKEN_TYPE); |
| } |
| } |
| else |
| { |
| break; // Cannot see End Of Rule from here, just drop out |
| } |
| } |
| localFollowSet->free(localFollowSet); |
| localFollowSet = NULL; |
| } |
| } |
| |
| if (localFollowSet != NULL) |
| { |
| localFollowSet->free(localFollowSet); |
| } |
| return followSet; |
| } |
| |
| /// Standard/Example error display method. |
| /// No generic error message display funciton coudl possibly do everything correctly |
| /// for all possible parsers. Hence you are provided with this example routine, which |
| /// you should override in your parser/tree parser to do as you will. |
| /// |
| /// Here we depart somewhat from the Java runtime as that has now split up a lot |
| /// of the error display routines into spearate units. However, ther is little advantage |
| /// to this in the C version as you will probably implement all such routines as a |
| /// separate translation unit, rather than install them all as pointers to functions |
| /// in the base recognizer. |
| /// |
| static void |
| displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames) |
| { |
| pANTLR3_PARSER parser; |
| pANTLR3_TREE_PARSER tparser; |
| pANTLR3_INT_STREAM is; |
| pANTLR3_STRING ttext; |
| pANTLR3_STRING ftext; |
| pANTLR3_EXCEPTION ex; |
| pANTLR3_COMMON_TOKEN theToken; |
| pANTLR3_BASE_TREE theBaseTree; |
| pANTLR3_COMMON_TREE theCommonTree; |
| |
| // Retrieve some info for easy reading. |
| // |
| ex = recognizer->state->exception; |
| ttext = NULL; |
| |
| // See if there is a 'filename' we can use |
| // |
| if (ex->streamName == NULL) |
| { |
| if (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF) |
| { |
| ANTLR3_FPRINTF(stderr, "-end of input-("); |
| } |
| else |
| { |
| ANTLR3_FPRINTF(stderr, "-unknown source-("); |
| } |
| } |
| else |
| { |
| ftext = ex->streamName->to8(ex->streamName); |
| ANTLR3_FPRINTF(stderr, "%s(", ftext->chars); |
| } |
| |
| // Next comes the line number |
| // |
| |
| ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line); |
| ANTLR3_FPRINTF(stderr, " : error %d : %s", |
| recognizer->state->exception->type, |
| (pANTLR3_UINT8) (recognizer->state->exception->message)); |
| |
| |
| // How we determine the next piece is dependent on which thing raised the |
| // error. |
| // |
| switch (recognizer->type) |
| { |
| case ANTLR3_TYPE_PARSER: |
| |
| // Prepare the knowledge we know we have |
| // |
| parser = (pANTLR3_PARSER) (recognizer->super); |
| tparser = NULL; |
| is = parser->tstream->istream; |
| theToken = (pANTLR3_COMMON_TOKEN)(recognizer->state->exception->token); |
| ttext = theToken->toString(theToken); |
| |
| ANTLR3_FPRINTF(stderr, ", at offset %d", recognizer->state->exception->charPositionInLine); |
| if (theToken != NULL) |
| { |
| if (theToken->type == ANTLR3_TOKEN_EOF) |
| { |
| ANTLR3_FPRINTF(stderr, ", at <EOF>"); |
| } |
| else |
| { |
| // Guard against null text in a token |
| // |
| ANTLR3_FPRINTF(stderr, "\n near %s\n ", ttext == NULL ? (pANTLR3_UINT8)"<no text for the token>" : ttext->chars); |
| } |
| } |
| break; |
| |
| case ANTLR3_TYPE_TREE_PARSER: |
| |
| tparser = (pANTLR3_TREE_PARSER) (recognizer->super); |
| parser = NULL; |
| is = tparser->ctnstream->tnstream->istream; |
| theBaseTree = (pANTLR3_BASE_TREE)(recognizer->state->exception->token); |
| ttext = theBaseTree->toStringTree(theBaseTree); |
| |
| if (theBaseTree != NULL) |
| { |
| theCommonTree = (pANTLR3_COMMON_TREE) theBaseTree->super; |
| |
| if (theCommonTree != NULL) |
| { |
| theToken = (pANTLR3_COMMON_TOKEN) theBaseTree->getToken(theBaseTree); |
| } |
| ANTLR3_FPRINTF(stderr, ", at offset %d", theBaseTree->getCharPositionInLine(theBaseTree)); |
| ANTLR3_FPRINTF(stderr, ", near %s", ttext->chars); |
| } |
| break; |
| |
| default: |
| |
| ANTLR3_FPRINTF(stderr, "Base recognizer function displayRecognitionError called by unknown parser type - provide override for this function\n"); |
| return; |
| break; |
| } |
| |
| // Although this function should generally be provided by the implementation, this one |
| // should be as helpful as possible for grammar developers and serve as an example |
| // of what you can do with each exception type. In general, when you make up your |
| // 'real' handler, you should debug the routine with all possible errors you expect |
| // which will then let you be as specific as possible about all circumstances. |
| // |
| // Note that in the general case, errors thrown by tree parsers indicate a problem |
| // with the output of the parser or with the tree grammar itself. The job of the parser |
| // is to produce a perfect (in traversal terms) syntactically correct tree, so errors |
| // at that stage should really be semantic errors that your own code determines and handles |
| // in whatever way is appropriate. |
| // |
| switch (ex->type) |
| { |
| case ANTLR3_UNWANTED_TOKEN_EXCEPTION: |
| |
| // Indicates that the recognizer was fed a token which seesm to be |
| // spurious input. We can detect this when the token that follows |
| // this unwanted token would normally be part of the syntactically |
| // correct stream. Then we can see that the token we are looking at |
| // is just something that should not be there and throw this exception. |
| // |
| if (tokenNames == NULL) |
| { |
| ANTLR3_FPRINTF(stderr, " : Extraneous input..."); |
| } |
| else |
| { |
| if (ex->expecting == ANTLR3_TOKEN_EOF) |
| { |
| ANTLR3_FPRINTF(stderr, " : Extraneous input - expected <EOF>\n"); |
| } |
| else |
| { |
| ANTLR3_FPRINTF(stderr, " : Extraneous input - expected %s ...\n", tokenNames[ex->expecting]); |
| } |
| } |
| break; |
| |
| case ANTLR3_MISSING_TOKEN_EXCEPTION: |
| |
| // Indicates that the recognizer detected that the token we just |
| // hit would be valid syntactically if preceeded by a particular |
| // token. Perhaps a missing ';' at line end or a missing ',' in an |
| // expression list, and such like. |
| // |
| if (tokenNames == NULL) |
| { |
| ANTLR3_FPRINTF(stderr, " : Missing token (%d)...\n", ex->expecting); |
| } |
| else |
| { |
| if (ex->expecting == ANTLR3_TOKEN_EOF) |
| { |
| ANTLR3_FPRINTF(stderr, " : Missing <EOF>\n"); |
| } |
| else |
| { |
| ANTLR3_FPRINTF(stderr, " : Missing %s \n", tokenNames[ex->expecting]); |
| } |
| } |
| break; |
| |
| case ANTLR3_RECOGNITION_EXCEPTION: |
| |
| // Indicates that the recognizer received a token |
| // in the input that was not predicted. This is the basic exception type |
| // from which all others are derived. So we assume it was a syntax error. |
| // You may get this if there are not more tokens and more are needed |
| // to complete a parse for instance. |
| // |
| ANTLR3_FPRINTF(stderr, " : syntax error...\n"); |
| break; |
| |
| case ANTLR3_MISMATCHED_TOKEN_EXCEPTION: |
| |
| // We were expecting to see one thing and got another. This is the |
| // most common error if we coudl not detect a missing or unwanted token. |
| // Here you can spend your efforts to |
| // derive more useful error messages based on the expected |
| // token set and the last token and so on. The error following |
| // bitmaps do a good job of reducing the set that we were looking |
| // for down to something small. Knowing what you are parsing may be |
| // able to allow you to be even more specific about an error. |
| // |
| if (tokenNames == NULL) |
| { |
| ANTLR3_FPRINTF(stderr, " : syntax error...\n"); |
| } |
| else |
| { |
| if (ex->expecting == ANTLR3_TOKEN_EOF) |
| { |
| ANTLR3_FPRINTF(stderr, " : expected <EOF>\n"); |
| } |
| else |
| { |
| ANTLR3_FPRINTF(stderr, " : expected %s ...\n", tokenNames[ex->expecting]); |
| } |
| } |
| break; |
| |
| case ANTLR3_NO_VIABLE_ALT_EXCEPTION: |
| |
| // We could not pick any alt decision from the input given |
| // so god knows what happened - however when you examine your grammar, |
| // you should. It means that at the point where the current token occurred |
| // that the DFA indicates nowhere to go from here. |
| // |
| ANTLR3_FPRINTF(stderr, " : cannot match to any predicted input...\n"); |
| |
| break; |
| |
| case ANTLR3_MISMATCHED_SET_EXCEPTION: |
| |
| { |
| ANTLR3_UINT32 count; |
| ANTLR3_UINT32 bit; |
| ANTLR3_UINT32 size; |
| ANTLR3_UINT32 numbits; |
| pANTLR3_BITSET errBits; |
| |
| // This means we were able to deal with one of a set of |
| // possible tokens at this point, but we did not see any |
| // member of that set. |
| // |
| ANTLR3_FPRINTF(stderr, " : unexpected input...\n expected one of : "); |
| |
| // What tokens could we have accepted at this point in the |
| // parse? |
| // |
| count = 0; |
| errBits = antlr3BitsetLoad (ex->expectingSet); |
| numbits = errBits->numBits (errBits); |
| size = errBits->size (errBits); |
| |
| if (size > 0) |
| { |
| // However many tokens we could have dealt with here, it is usually |
| // not useful to print ALL of the set here. I arbitrarily chose 8 |
| // here, but you should do whatever makes sense for you of course. |
| // No token number 0, so look for bit 1 and on. |
| // |
| for (bit = 1; bit < numbits && count < 8 && count < size; bit++) |
| { |
| // TODO: This doesn;t look right - should be asking if the bit is set!! |
| // |
| if (tokenNames[bit]) |
| { |
| ANTLR3_FPRINTF(stderr, "%s%s", count > 0 ? ", " : "", tokenNames[bit]); |
| count++; |
| } |
| } |
| ANTLR3_FPRINTF(stderr, "\n"); |
| } |
| else |
| { |
| ANTLR3_FPRINTF(stderr, "Actually dude, we didn't seem to be expecting anything here, or at least\n"); |
| ANTLR3_FPRINTF(stderr, "I could not work out what I was expecting, like so many of us these days!\n"); |
| } |
| } |
| break; |
| |
| case ANTLR3_EARLY_EXIT_EXCEPTION: |
| |
| // We entered a loop requiring a number of token sequences |
| // but found a token that ended that sequence earlier than |
| // we should have done. |
| // |
| ANTLR3_FPRINTF(stderr, " : missing elements...\n"); |
| break; |
| |
| default: |
| |
| // We don't handle any other exceptions here, but you can |
| // if you wish. If we get an exception that hits this point |
| // then we are just going to report what we know about the |
| // token. |
| // |
| ANTLR3_FPRINTF(stderr, " : syntax not recognized...\n"); |
| break; |
| } |
| |
| // Here you have the token that was in error which if this is |
| // the standard implementation will tell you the line and offset |
| // and also record the address of the start of the line in the |
| // input stream. You could therefore print the source line and so on. |
| // Generally though, I would expect that your lexer/parser will keep |
| // its own map of lines and source pointers or whatever as there |
| // are a lot of specific things you need to know about the input |
| // to do something like that. |
| // Here is where you do it though :-). |
| // |
| } |
| |
| /// Return how many syntax errors were detected by this recognizer |
| /// |
| static ANTLR3_UINT32 |
| getNumberOfSyntaxErrors(pANTLR3_BASE_RECOGNIZER recognizer) |
| { |
| return recognizer->state->errorCount; |
| } |
| |
| /// Recover from an error found on the input stream. Mostly this is |
| /// NoViableAlt exceptions, but could be a mismatched token that |
| /// the match() routine could not recover from. |
| /// |
| static void |
| recover (pANTLR3_BASE_RECOGNIZER recognizer) |
| { |
| // Used to compute the follow set of tokens |
| // |
| pANTLR3_BITSET followSet; |
| pANTLR3_PARSER parser; |
| pANTLR3_TREE_PARSER tparser; |
| pANTLR3_INT_STREAM is; |
| |
| switch (recognizer->type) |
| { |
| case ANTLR3_TYPE_PARSER: |
| |
| parser = (pANTLR3_PARSER) (recognizer->super); |
| tparser = NULL; |
| is = parser->tstream->istream; |
| |
| break; |
| |
| case ANTLR3_TYPE_TREE_PARSER: |
| |
| tparser = (pANTLR3_TREE_PARSER) (recognizer->super); |
| parser = NULL; |
| is = tparser->ctnstream->tnstream->istream; |
| |
| break; |
| |
| default: |
| |
| ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n"); |
| return; |
| |
| break; |
| } |
| |
| // Are we about to repeat the same error? |
| // |
| if (recognizer->state->lastErrorIndex == is->index(is)) |
| { |
| // The last error was at the same token index point. This must be a case |
| // where LT(1) is in the recovery token set so nothing is |
| // consumed. Consume a single token so at least to prevent |
| // an infinite loop; this is a failsafe. |
| // |
| is->consume(is); |
| } |
| |
| // Record error index position |
| // |
| recognizer->state->lastErrorIndex = is->index(is); |
| |
| // Work out the follows set for error recovery |
| // |
| followSet = recognizer->computeErrorRecoverySet(recognizer); |
| |
| // Call resync hook (for debuggers and so on) |
| // |
| recognizer->beginResync(recognizer); |
| |
| // Consume tokens until we have resynced to something in the follows set |
| // |
| recognizer->consumeUntilSet(recognizer, followSet); |
| |
| // End resync hook |
| // |
| recognizer->endResync(recognizer); |
| |
| // Destroy the temporary bitset we produced. |
| // |
| followSet->free(followSet); |
| |
| // Reset the inError flag so we don't re-report the exception |
| // |
| recognizer->state->error = ANTLR3_FALSE; |
| recognizer->state->failed = ANTLR3_FALSE; |
| } |
| |
| |
| /// Attempt to recover from a single missing or extra token. |
| /// |
| /// EXTRA TOKEN |
| /// |
| /// LA(1) is not what we are looking for. If LA(2) has the right token, |
| /// however, then assume LA(1) is some extra spurious token. Delete it |
| /// and LA(2) as if we were doing a normal match(), which advances the |
| /// input. |
| /// |
| /// MISSING TOKEN |
| /// |
| /// If current token is consistent with what could come after |
| /// ttype then it is ok to "insert" the missing token, else throw |
| /// exception For example, Input "i=(3;" is clearly missing the |
| /// ')'. When the parser returns from the nested call to expr, it |
| /// will have call chain: |
| /// |
| /// stat -> expr -> atom |
| /// |
| /// and it will be trying to match the ')' at this point in the |
| /// derivation: |
| /// |
| /// => ID '=' '(' INT ')' ('+' atom)* ';' |
| /// ^ |
| /// match() will see that ';' doesn't match ')' and report a |
| /// mismatched token error. To recover, it sees that LA(1)==';' |
| /// is in the set of tokens that can follow the ')' token |
| /// reference in rule atom. It can assume that you forgot the ')'. |
| /// |
| /// The exception that was passed in, in the java implementation is |
| /// sorted in the recognizer exception stack in the C version. To 'throw' it we set the |
| /// error flag and rules cascade back when this is set. |
| /// |
| static void * |
| recoverFromMismatchedToken (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow) |
| { |
| pANTLR3_PARSER parser; |
| pANTLR3_TREE_PARSER tparser; |
| pANTLR3_INT_STREAM is; |
| void * matchedSymbol; |
| |
| |
| |
| switch (recognizer->type) |
| { |
| case ANTLR3_TYPE_PARSER: |
| |
| parser = (pANTLR3_PARSER) (recognizer->super); |
| tparser = NULL; |
| is = parser->tstream->istream; |
| |
| break; |
| |
| case ANTLR3_TYPE_TREE_PARSER: |
| |
| tparser = (pANTLR3_TREE_PARSER) (recognizer->super); |
| parser = NULL; |
| is = tparser->ctnstream->tnstream->istream; |
| |
| break; |
| |
| default: |
| |
| ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedToken called by unknown parser type - provide override for this function\n"); |
| return NULL; |
| |
| break; |
| } |
| |
| // Create an exception if we need one |
| // |
| if (recognizer->state->exception == NULL) |
| { |
| antlr3RecognitionExceptionNew(recognizer); |
| } |
| |
| // If the next token after the one we are looking at in the input stream |
| // is what we are looking for then we remove the one we have discovered |
| // from the stream by consuming it, then consume this next one along too as |
| // if nothing had happened. |
| // |
| if ( recognizer->mismatchIsUnwantedToken(recognizer, is, ttype) == ANTLR3_TRUE) |
| { |
| recognizer->state->exception->type = ANTLR3_UNWANTED_TOKEN_EXCEPTION; |
| recognizer->state->exception->message = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME; |
| |
| // Call resync hook (for debuggers and so on) |
| // |
| if (recognizer->debugger != NULL) |
| { |
| recognizer->debugger->beginResync(recognizer->debugger); |
| } |
| |
| // "delete" the extra token |
| // |
| recognizer->beginResync(recognizer); |
| is->consume(is); |
| recognizer->endResync(recognizer); |
| // End resync hook |
| // |
| if (recognizer->debugger != NULL) |
| { |
| recognizer->debugger->endResync(recognizer->debugger); |
| } |
| |
| // Print out the error after we consume so that ANTLRWorks sees the |
| // token in the exception. |
| // |
| recognizer->reportError(recognizer); |
| |
| // Return the token we are actually matching |
| // |
| matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is); |
| |
| // Consume the token that the rule actually expected to get as if everything |
| // was hunky dory. |
| // |
| is->consume(is); |
| |
| recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more |
| |
| return matchedSymbol; |
| } |
| |
| // Single token deletion (Unwanted above) did not work |
| // so we see if we can insert a token instead by calculating which |
| // token would be missing |
| // |
| if (mismatchIsMissingToken(recognizer, is, follow)) |
| { |
| // We can fake the missing token and proceed |
| // |
| matchedSymbol = recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ttype, follow); |
| recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION; |
| recognizer->state->exception->message = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME; |
| recognizer->state->exception->token = matchedSymbol; |
| recognizer->state->exception->expecting = ttype; |
| |
| // Print out the error after we insert so that ANTLRWorks sees the |
| // token in the exception. |
| // |
| recognizer->reportError(recognizer); |
| |
| recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more |
| |
| return matchedSymbol; |
| } |
| |
| |
| // Neither deleting nor inserting tokens allows recovery |
| // must just report the exception. |
| // |
| recognizer->state->error = ANTLR3_TRUE; |
| return NULL; |
| } |
| |
| static void * |
| recoverFromMismatchedSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow) |
| { |
| pANTLR3_PARSER parser; |
| pANTLR3_TREE_PARSER tparser; |
| pANTLR3_INT_STREAM is; |
| pANTLR3_COMMON_TOKEN matchedSymbol; |
| |
| switch (recognizer->type) |
| { |
| case ANTLR3_TYPE_PARSER: |
| |
| parser = (pANTLR3_PARSER) (recognizer->super); |
| tparser = NULL; |
| is = parser->tstream->istream; |
| |
| break; |
| |
| case ANTLR3_TYPE_TREE_PARSER: |
| |
| tparser = (pANTLR3_TREE_PARSER) (recognizer->super); |
| parser = NULL; |
| is = tparser->ctnstream->tnstream->istream; |
| |
| break; |
| |
| default: |
| |
| ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedSet called by unknown parser type - provide override for this function\n"); |
| return NULL; |
| |
| break; |
| } |
| |
| if (recognizer->mismatchIsMissingToken(recognizer, is, follow) == ANTLR3_TRUE) |
| { |
| // We can fake the missing token and proceed |
| // |
| matchedSymbol = (pANTLR3_COMMON_TOKEN)recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ANTLR3_TOKEN_INVALID, follow); |
| recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION; |
| recognizer->state->exception->token = matchedSymbol; |
| |
| // Print out the error after we insert so that ANTLRWorks sees the |
| // token in the exception. |
| // |
| recognizer->reportError(recognizer); |
| |
| recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more |
| |
| return matchedSymbol; |
| } |
| |
| // TODO - Single token deletion like in recoverFromMismatchedToken() |
| // |
| recognizer->state->error = ANTLR3_TRUE; |
| recognizer->state->failed = ANTLR3_TRUE; |
| return NULL; |
| } |
| |
| /// This code is factored out from mismatched token and mismatched set |
| /// recovery. It handles "single token insertion" error recovery for |
| /// both. No tokens are consumed to recover from insertions. Return |
| /// true if recovery was possible else return false. |
| /// |
| static ANTLR3_BOOLEAN |
| recoverFromMismatchedElement (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST followBits) |
| { |
| pANTLR3_BITSET viableToksFollowingRule; |
| pANTLR3_BITSET follow; |
| pANTLR3_PARSER parser; |
| pANTLR3_TREE_PARSER tparser; |
| pANTLR3_INT_STREAM is; |
| |
| switch (recognizer->type) |
| { |
| case ANTLR3_TYPE_PARSER: |
| |
| parser = (pANTLR3_PARSER) (recognizer->super); |
| tparser = NULL; |
| is = parser->tstream->istream; |
| |
| break; |
| |
| case ANTLR3_TYPE_TREE_PARSER: |
| |
| tparser = (pANTLR3_TREE_PARSER) (recognizer->super); |
| parser = NULL; |
| is = tparser->ctnstream->tnstream->istream; |
| |
| break; |
| |
| default: |
| |
| ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n"); |
| return ANTLR3_FALSE; |
| |
| break; |
| } |
| |
| follow = antlr3BitsetLoad(followBits); |
| |
| if (follow == NULL) |
| { |
| /* The follow set is NULL, which means we don't know what can come |
| * next, so we "hit and hope" by just signifying that we cannot |
| * recover, which will just cause the next token to be consumed, |
| * which might dig us out. |
| */ |
| return ANTLR3_FALSE; |
| } |
| |
| /* We have a bitmap for the follow set, hence we can compute |
| * what can follow this grammar element reference. |
| */ |
| if (follow->isMember(follow, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_TRUE) |
| { |
| /* First we need to know which of the available tokens are viable |
| * to follow this reference. |
| */ |
| viableToksFollowingRule = recognizer->computeCSRuleFollow(recognizer); |
| |
| /* Remove the EOR token, which we do not wish to compute with |
| */ |
| follow->remove(follow, ANTLR3_EOR_TOKEN_TYPE); |
| viableToksFollowingRule->free(viableToksFollowingRule); |
| /* We now have the computed set of what can follow the current token |
| */ |
| } |
| |
| /* We can now see if the current token works with the set of tokens |
| * that could follow the current grammar reference. If it looks like it |
| * is consistent, then we can "insert" that token by not throwing |
| * an exception and assuming that we saw it. |
| */ |
| if ( follow->isMember(follow, is->_LA(is, 1)) == ANTLR3_TRUE) |
| { |
| /* report the error, but don't cause any rules to abort and stuff |
| */ |
| recognizer->reportError(recognizer); |
| if (follow != NULL) |
| { |
| follow->free(follow); |
| } |
| recognizer->state->error = ANTLR3_FALSE; |
| recognizer->state->failed = ANTLR3_FALSE; |
| return ANTLR3_TRUE; /* Success in recovery */ |
| } |
| |
| if (follow != NULL) |
| { |
| follow->free(follow); |
| } |
| |
| /* We could not find anything viable to do, so this is going to |
| * cause an exception. |
| */ |
| return ANTLR3_FALSE; |
| } |
| |
| /// Eat tokens from the input stream until we get one of JUST the right type |
| /// |
| static void |
| consumeUntil (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType) |
| { |
| ANTLR3_UINT32 ttype; |
| pANTLR3_PARSER parser; |
| pANTLR3_TREE_PARSER tparser; |
| pANTLR3_INT_STREAM is; |
| |
| switch (recognizer->type) |
| { |
| case ANTLR3_TYPE_PARSER: |
| |
| parser = (pANTLR3_PARSER) (recognizer->super); |
| tparser = NULL; |
| is = parser->tstream->istream; |
| |
| break; |
| |
| case ANTLR3_TYPE_TREE_PARSER: |
| |
| tparser = (pANTLR3_TREE_PARSER) (recognizer->super); |
| parser = NULL; |
| is = tparser->ctnstream->tnstream->istream; |
| |
| break; |
| |
| default: |
| |
| ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntil' called by unknown parser type - provide override for this function\n"); |
| return; |
| |
| break; |
| } |
| |
| // What do have at the moment? |
| // |
| ttype = is->_LA(is, 1); |
| |
| // Start eating tokens until we get to the one we want. |
| // |
| while (ttype != ANTLR3_TOKEN_EOF && ttype != tokenType) |
| { |
| is->consume(is); |
| ttype = is->_LA(is, 1); |
| } |
| } |
| |
| /// Eat tokens from the input stream until we find one that |
| /// belongs to the supplied set. |
| /// |
| static void |
| consumeUntilSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set) |
| { |
| ANTLR3_UINT32 ttype; |
| pANTLR3_PARSER parser; |
| pANTLR3_TREE_PARSER tparser; |
| pANTLR3_INT_STREAM is; |
| |
| switch (recognizer->type) |
| { |
| case ANTLR3_TYPE_PARSER: |
| |
| parser = (pANTLR3_PARSER) (recognizer->super); |
| tparser = NULL; |
| is = parser->tstream->istream; |
| |
| break; |
| |
| case ANTLR3_TYPE_TREE_PARSER: |
| |
| tparser = (pANTLR3_TREE_PARSER) (recognizer->super); |
| parser = NULL; |
| is = tparser->ctnstream->tnstream->istream; |
| |
| break; |
| |
| default: |
| |
| ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntilSet' called by unknown parser type - provide override for this function\n"); |
| return; |
| |
| break; |
| } |
| |
| // What do have at the moment? |
| // |
| ttype = is->_LA(is, 1); |
| |
| // Start eating tokens until we get to one we want. |
| // |
| while (ttype != ANTLR3_TOKEN_EOF && set->isMember(set, ttype) == ANTLR3_FALSE) |
| { |
| is->consume(is); |
| ttype = is->_LA(is, 1); |
| } |
| } |
| |
| /** Return the rule invocation stack (how we got here in the parse. |
| * In the java version Ter just asks the JVM for all the information |
| * but in C we don't get this information, so I am going to do nothing |
| * right now. |
| */ |
| static pANTLR3_STACK |
| getRuleInvocationStack (pANTLR3_BASE_RECOGNIZER recognizer) |
| { |
| return NULL; |
| } |
| |
| static pANTLR3_STACK |
| getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name) |
| { |
| return NULL; |
| } |
| |
| /** Convenience method for template rewrites - NYI. |
| */ |
| static pANTLR3_HASH_TABLE |
| toStrings (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE tokens) |
| { |
| return NULL; |
| } |
| |
| static void ANTLR3_CDECL |
| freeIntTrie (void * trie) |
| { |
| ((pANTLR3_INT_TRIE)trie)->free((pANTLR3_INT_TRIE)trie); |
| } |
| |
| |
| /** Pointer to a function to return whether the rule has parsed input starting at the supplied |
| * start index before. If the rule has not parsed input starting from the supplied start index, |
| * then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point |
| * then it will return the point where it last stopped parsing after that start point. |
| * |
| * \remark |
| * The rule memos are an ANTLR3_LIST of ANTLR3_LISTS, however if this becomes any kind of performance |
| * issue (it probably won't, the hash tables are pretty quick) then we could make a special int only |
| * version of the table. |
| */ |
| static ANTLR3_MARKER |
| getRuleMemoization (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart) |
| { |
| /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST. |
| */ |
| pANTLR3_INT_TRIE ruleList; |
| ANTLR3_MARKER stopIndex; |
| pANTLR3_TRIE_ENTRY entry; |
| |
| /* See if we have a list in the ruleMemos for this rule, and if not, then create one |
| * as we will need it eventually if we are being asked for the memo here. |
| */ |
| entry = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex); |
| |
| if (entry == NULL) |
| { |
| /* Did not find it, so create a new one for it, with a bit depth based on the |
| * size of the input stream. We need the bit depth to incorporate the number if |
| * bits required to represent the largest possible stop index in the input, which is the |
| * last character. An int stream is free to return the largest 64 bit offset if it has |
| * no idea of the size, but you should remember that this will cause the leftmost |
| * bit match algorithm to run to 63 bits, which will be the whole time spent in the trie ;-) |
| */ |
| ruleList = antlr3IntTrieNew(63); /* Depth is theoretically 64 bits, but probably not ;-) */ |
| |
| if (ruleList != NULL) |
| { |
| recognizer->state->ruleMemo->add(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex, ANTLR3_HASH_TYPE_STR, 0, ANTLR3_FUNC_PTR(ruleList), freeIntTrie); |
| } |
| |
| /* We cannot have a stopIndex in a trie we have just created of course |
| */ |
| return MEMO_RULE_UNKNOWN; |
| } |
| |
| ruleList = (pANTLR3_INT_TRIE) (entry->data.ptr); |
| |
| /* See if there is a stop index associated with the supplied start index. |
| */ |
| stopIndex = 0; |
| |
| entry = ruleList->get(ruleList, ruleParseStart); |
| if (entry != NULL) |
| { |
| stopIndex = (ANTLR3_MARKER)(entry->data.intVal); |
| } |
| |
| if (stopIndex == 0) |
| { |
| return MEMO_RULE_UNKNOWN; |
| } |
| |
| return stopIndex; |
| } |
| |
| /** Has this rule already parsed input at the current index in the |
| * input stream? Return ANTLR3_TRUE if we have and ANTLR3_FALSE |
| * if we have not. |
| * |
| * This method has a side-effect: if we have seen this input for |
| * this rule and successfully parsed before, then seek ahead to |
| * 1 past the stop token matched for this rule last time. |
| */ |
| static ANTLR3_BOOLEAN |
| alreadyParsedRule (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex) |
| { |
| ANTLR3_MARKER stopIndex; |
| pANTLR3_LEXER lexer; |
| pANTLR3_PARSER parser; |
| pANTLR3_TREE_PARSER tparser; |
| pANTLR3_INT_STREAM is; |
| |
| switch (recognizer->type) |
| { |
| case ANTLR3_TYPE_PARSER: |
| |
| parser = (pANTLR3_PARSER) (recognizer->super); |
| tparser = NULL; |
| lexer = NULL; |
| is = parser->tstream->istream; |
| |
| break; |
| |
| case ANTLR3_TYPE_TREE_PARSER: |
| |
| tparser = (pANTLR3_TREE_PARSER) (recognizer->super); |
| parser = NULL; |
| lexer = NULL; |
| is = tparser->ctnstream->tnstream->istream; |
| |
| break; |
| |
| case ANTLR3_TYPE_LEXER: |
| |
| lexer = (pANTLR3_LEXER) (recognizer->super); |
| parser = NULL; |
| tparser = NULL; |
| is = lexer->input->istream; |
| break; |
| |
| default: |
| |
| ANTLR3_FPRINTF(stderr, "Base recognizer function 'alreadyParsedRule' called by unknown parser type - provide override for this function\n"); |
| return ANTLR3_FALSE; |
| |
| break; |
| } |
| |
| /* See if we have a memo marker for this. |
| */ |
| stopIndex = recognizer->getRuleMemoization(recognizer, ruleIndex, is->index(is)); |
| |
| if (stopIndex == MEMO_RULE_UNKNOWN) |
| { |
| return ANTLR3_FALSE; |
| } |
| |
| if (stopIndex == MEMO_RULE_FAILED) |
| { |
| recognizer->state->failed = ANTLR3_TRUE; |
| } |
| else |
| { |
| is->seek(is, stopIndex+1); |
| } |
| |
| /* If here then the rule was executed for this input already |
| */ |
| return ANTLR3_TRUE; |
| } |
| |
| /** Record whether or not this rule parsed the input at this position |
| * successfully. |
| */ |
| static void |
| memoize (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart) |
| { |
| /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST. |
| */ |
| pANTLR3_INT_TRIE ruleList; |
| pANTLR3_TRIE_ENTRY entry; |
| ANTLR3_MARKER stopIndex; |
| pANTLR3_LEXER lexer; |
| pANTLR3_PARSER parser; |
| pANTLR3_TREE_PARSER tparser; |
| pANTLR3_INT_STREAM is; |
| |
| switch (recognizer->type) |
| { |
| case ANTLR3_TYPE_PARSER: |
| |
| parser = (pANTLR3_PARSER) (recognizer->super); |
| tparser = NULL; |
| is = parser->tstream->istream; |
| |
| break; |
| |
| case ANTLR3_TYPE_TREE_PARSER: |
| |
| tparser = (pANTLR3_TREE_PARSER) (recognizer->super); |
| parser = NULL; |
| is = tparser->ctnstream->tnstream->istream; |
| |
| break; |
| |
| case ANTLR3_TYPE_LEXER: |
| |
| lexer = (pANTLR3_LEXER) (recognizer->super); |
| parser = NULL; |
| tparser = NULL; |
| is = lexer->input->istream; |
| break; |
| |
| default: |
| |
| ANTLR3_FPRINTF(stderr, "Base recognizer function consumeUntilSet called by unknown parser type - provide override for this function\n"); |
| return; |
| |
| break; |
| } |
| |
| stopIndex = recognizer->state->failed == ANTLR3_TRUE ? MEMO_RULE_FAILED : is->index(is) - 1; |
| |
| entry = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex); |
| |
| if (entry != NULL) |
| { |
| ruleList = (pANTLR3_INT_TRIE)(entry->data.ptr); |
| |
| /* If we don't already have this entry, append it. The memoize trie does not |
| * accept duplicates so it won't add it if already there and we just ignore the |
| * return code as we don't care if it is there already. |
| */ |
| ruleList->add(ruleList, ruleParseStart, ANTLR3_HASH_TYPE_INT, stopIndex, NULL, NULL); |
| } |
| } |
| /** A syntactic predicate. Returns true/false depending on whether |
| * the specified grammar fragment matches the current input stream. |
| * This resets the failed instance var afterwards. |
| */ |
| static ANTLR3_BOOLEAN |
| synpred (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx)) |
| { |
| ANTLR3_MARKER start; |
| pANTLR3_PARSER parser; |
| pANTLR3_TREE_PARSER tparser; |
| pANTLR3_INT_STREAM is; |
| |
| switch (recognizer->type) |
| { |
| case ANTLR3_TYPE_PARSER: |
| |
| parser = (pANTLR3_PARSER) (recognizer->super); |
| tparser = NULL; |
| is = parser->tstream->istream; |
| |
| break; |
| |
| case ANTLR3_TYPE_TREE_PARSER: |
| |
| tparser = (pANTLR3_TREE_PARSER) (recognizer->super); |
| parser = NULL; |
| is = tparser->ctnstream->tnstream->istream; |
| |
| break; |
| |
| default: |
| |
| ANTLR3_FPRINTF(stderr, "Base recognizer function 'synPred' called by unknown parser type - provide override for this function\n"); |
| return ANTLR3_FALSE; |
| |
| break; |
| } |
| |
| /* Begin backtracking so we can get back to where we started after trying out |
| * the syntactic predicate. |
| */ |
| start = is->mark(is); |
| recognizer->state->backtracking++; |
| |
| /* Try the syntactical predicate |
| */ |
| predicate(ctx); |
| |
| /* Reset |
| */ |
| is->rewind(is, start); |
| recognizer->state->backtracking--; |
| |
| if (recognizer->state->failed == ANTLR3_TRUE) |
| { |
| /* Predicate failed |
| */ |
| recognizer->state->failed = ANTLR3_FALSE; |
| return ANTLR3_FALSE; |
| } |
| else |
| { |
| /* Predicate was successful |
| */ |
| recognizer->state->failed = ANTLR3_FALSE; |
| return ANTLR3_TRUE; |
| } |
| } |
| |
| static void |
| reset(pANTLR3_BASE_RECOGNIZER recognizer) |
| { |
| if (recognizer->state->following != NULL) |
| { |
| recognizer->state->following->free(recognizer->state->following); |
| } |
| |
| // Reset the state flags |
| // |
| recognizer->state->errorRecovery = ANTLR3_FALSE; |
| recognizer->state->lastErrorIndex = -1; |
| recognizer->state->failed = ANTLR3_FALSE; |
| recognizer->state->errorCount = 0; |
| recognizer->state->backtracking = 0; |
| recognizer->state->following = NULL; |
| |
| if (recognizer->state != NULL) |
| { |
| if (recognizer->state->ruleMemo != NULL) |
| { |
| recognizer->state->ruleMemo->free(recognizer->state->ruleMemo); |
| recognizer->state->ruleMemo = antlr3IntTrieNew(15); /* 16 bit depth is enough for 32768 rules! */ |
| } |
| } |
| |
| // ml: 2013-11-05, added reset of old exceptions. |
| pANTLR3_EXCEPTION thisE = recognizer->state->exception; |
| if (thisE != NULL) |
| { |
| thisE->freeEx(thisE); |
| recognizer->state->exception = NULL; |
| } |
| |
| // Install a new following set |
| // |
| recognizer->state->following = antlr3StackNew(8); |
| |
| } |
| |
| // Default implementation is for parser and assumes a token stream as supplied by the runtime. |
| // You MAY need override this function if the standard TOKEN_STREAM is not what you are using. |
| // |
| static void * |
| getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream) |
| { |
| return ((pANTLR3_TOKEN_STREAM)istream->super)->_LT((pANTLR3_TOKEN_STREAM)istream->super, 1); |
| } |
| |
| // Default implementation is for parser and assumes a token stream as supplied by the runtime. |
| // You MAY need override this function if the standard COMMON_TOKEN_STREAM is not what you are using. |
| // |
| static void * |
| getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e, |
| ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow) |
| { |
| pANTLR3_TOKEN_STREAM ts; |
| pANTLR3_COMMON_TOKEN_STREAM cts; |
| pANTLR3_COMMON_TOKEN token; |
| pANTLR3_COMMON_TOKEN current; |
| pANTLR3_STRING text; |
| |
| // Dereference the standard pointers |
| // |
| ts = (pANTLR3_TOKEN_STREAM)istream->super; |
| cts = (pANTLR3_COMMON_TOKEN_STREAM)ts->super; |
| |
| // Work out what to use as the current symbol to make a line and offset etc |
| // If we are at EOF, we use the token before EOF |
| // |
| current = ts->_LT(ts, 1); |
| if (current->getType(current) == ANTLR3_TOKEN_EOF) |
| { |
| current = ts->_LT(ts, -1); |
| } |
| |
| // Create a new empty token |
| // |
| if (recognizer->state->tokFactory == NULL) |
| { |
| // We don't yet have a token factory for making tokens |
| // we just need a fake one using the input stream of the current |
| // token. |
| // |
| recognizer->state->tokFactory = antlr3TokenFactoryNew(current->input); |
| } |
| token = recognizer->state->tokFactory->newToken(recognizer->state->tokFactory); |
| if (token == NULL) { return NULL; } |
| |
| // Set some of the token properties based on the current token |
| // |
| token->setLine (token, current->getLine(current)); |
| token->setCharPositionInLine (token, current->getCharPositionInLine(current)); |
| token->setChannel (token, ANTLR3_TOKEN_DEFAULT_CHANNEL); |
| token->setType (token, expectedTokenType); |
| token->user1 = current->user1; |
| token->user2 = current->user2; |
| token->user3 = current->user3; |
| token->custom = current->custom; |
| token->lineStart = current->lineStart; |
| |
| // Create the token text that shows it has been inserted |
| // |
| token->setText8(token, (pANTLR3_UINT8)"<missing "); |
| text = token->getText(token); |
| |
| if (text != NULL) |
| { |
| text->append8(text, (const char *)recognizer->state->tokenNames[expectedTokenType]); |
| text->append8(text, (const char *)">"); |
| } |
| |
| // Finally return the pointer to our new token |
| // |
| return token; |
| } |
| |
| |
| #ifdef ANTLR3_WINDOWS |
| #pragma warning( default : 4100 ) |
| #endif |
| |
| /// @} |
| /// |
| |