| /** \file |
| * While the C runtime does not need to model the state of |
| * multiple lexers and parsers in the same way as the Java runtime does |
| * it is no overhead to reflect that model. In fact the |
| * C runtime has always been able to share recognizer state. |
| * |
| * This 'class' therefore defines all the elements of a recognizer |
| * (either lexer, parser or tree parser) that are need to |
| * track the current recognition state. Multiple recognizers |
| * may then share this state, for instance when one grammar |
| * imports another. |
| */ |
| |
| #ifndef _ANTLR3_RECOGNIZER_SHARED_STATE_H |
| #define _ANTLR3_RECOGNIZER_SHARED_STATE_H |
| |
| // [The "BSD licence"] |
| // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC |
| // http://www.temporal-wave.com |
| // http://www.linkedin.com/in/jimidle |
| // |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions |
| // are met: |
| // 1. Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // 2. Redistributions in binary form must reproduce the above copyright |
| // notice, this list of conditions and the following disclaimer in the |
| // documentation and/or other materials provided with the distribution. |
| // 3. The name of the author may not be used to endorse or promote products |
| // derived from this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #include <antlr3defs.h> |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| |
| /** All the data elements required to track the current state |
| * of any recognizer (lexer, parser, tree parser). |
| * May be share between multiple recognizers such that |
| * grammar inheritance is easily supported. |
| */ |
| typedef struct ANTLR3_RECOGNIZER_SHARED_STATE_struct |
| { |
| /** If set to ANTLR3_TRUE then the recognizer has an exception |
| * condition (this is tested by the generated code for the rules of |
| * the grammar). |
| */ |
| ANTLR3_BOOLEAN error; |
| |
| /** Points to the first in a possible chain of exceptions that the |
| * recognizer has discovered. |
| */ |
| pANTLR3_EXCEPTION exception; |
| |
| /** Track around a hint from the creator of the recognizer as to how big this |
| * thing is going to get, as the actress said to the bishop. This allows us |
| * to tune hash tables accordingly. This might not be the best place for this |
| * in the end but we will see. |
| */ |
| ANTLR3_UINT32 sizeHint; |
| |
| /** Track the set of token types that can follow any rule invocation. |
| * Stack structure, to support: List<BitSet>. |
| */ |
| pANTLR3_STACK following; |
| |
| |
| /** This is true when we see an error and before having successfully |
| * matched a token. Prevents generation of more than one error message |
| * per error. |
| */ |
| ANTLR3_BOOLEAN errorRecovery; |
| |
| /** The index into the input stream where the last error occurred. |
| * This is used to prevent infinite loops where an error is found |
| * but no token is consumed during recovery...another error is found, |
| * ad nauseam. This is a failsafe mechanism to guarantee that at least |
| * one token/tree node is consumed for two errors. |
| */ |
| ANTLR3_MARKER lastErrorIndex; |
| |
| /** In lieu of a return value, this indicates that a rule or token |
| * has failed to match. Reset to false upon valid token match. |
| */ |
| ANTLR3_BOOLEAN failed; |
| |
| /** When the recognizer terminates, the error handling functions |
| * will have incremented this value if any error occurred (that was displayed). It can then be |
| * used by the grammar programmer without having to use static globals. |
| */ |
| ANTLR3_UINT32 errorCount; |
| |
| /** If 0, no backtracking is going on. Safe to exec actions etc... |
| * If >0 then it's the level of backtracking. |
| */ |
| ANTLR3_INT32 backtracking; |
| |
| /** ANTLR3_VECTOR of ANTLR3_LIST for rule memoizing. |
| * Tracks the stop token index for each rule. ruleMemo[ruleIndex] is |
| * the memoization table for ruleIndex. For key ruleStartIndex, you |
| * get back the stop token for associated rule or MEMO_RULE_FAILED. |
| * |
| * This is only used if rule memoization is on. |
| */ |
| pANTLR3_INT_TRIE ruleMemo; |
| |
| /** Pointer to an array of token names |
| * that are generally useful in error reporting. The generated parsers install |
| * this pointer. The table it points to is statically allocated as 8 bit ascii |
| * at parser compile time - grammar token names are thus restricted in character |
| * sets, which does not seem to terrible. |
| */ |
| pANTLR3_UINT8 * tokenNames; |
| |
| /** User programmable pointer that can be used for instance as a place to |
| * store some tracking structure specific to the grammar that would not normally |
| * be available to the error handling functions. |
| */ |
| void * userp; |
| |
| /** The goal of all lexer rules/methods is to create a token object. |
| * This is an instance variable as multiple rules may collaborate to |
| * create a single token. For example, NUM : INT | FLOAT ; |
| * In this case, you want the INT or FLOAT rule to set token and not |
| * have it reset to a NUM token in rule NUM. |
| */ |
| pANTLR3_COMMON_TOKEN token; |
| |
| /** The goal of all lexer rules being to create a token, then a lexer |
| * needs to build a token factory to create them. |
| */ |
| pANTLR3_TOKEN_FACTORY tokFactory; |
| |
| /** A lexer is a source of tokens, produced by all the generated (or |
| * hand crafted if you like) matching rules. As such it needs to provide |
| * a token source interface implementation. |
| */ |
| pANTLR3_TOKEN_SOURCE tokSource; |
| |
| /** The channel number for the current token |
| */ |
| ANTLR3_UINT32 channel; |
| |
| /** The token type for the current token |
| */ |
| ANTLR3_UINT32 type; |
| |
| /** The input line (where it makes sense) on which the first character of the current |
| * token resides. |
| */ |
| ANTLR3_INT32 tokenStartLine; |
| |
| /** The character position of the first character of the current token |
| * within the line specified by tokenStartLine |
| */ |
| ANTLR3_INT32 tokenStartCharPositionInLine; |
| |
| /** What character index in the stream did the current token start at? |
| * Needed, for example, to get the text for current token. Set at |
| * the start of nextToken. |
| */ |
| ANTLR3_MARKER tokenStartCharIndex; |
| |
| /** Text for the current token. This can be overridden by setting this |
| * variable directly or by using the SETTEXT() macro (preferred) in your |
| * lexer rules. |
| */ |
| pANTLR3_STRING text; |
| |
| /** User controlled variables that will be installed in a newly created |
| * token. |
| */ |
| ANTLR3_UINT32 user1, user2, user3; |
| void * custom; |
| |
| /** Input stream stack, which allows the C programmer to switch input streams |
| * easily and allow the standard nextToken() implementation to deal with it |
| * as this is a common requirement. |
| */ |
| pANTLR3_STACK streams; |
| |
| /// A stack of token/tree rewrite streams that are available for use |
| /// by a parser or tree parser that is using rewrites to generate |
| /// an AST. This saves each rule in the recongizer from having to |
| /// allocate and deallocate rewtire streams on entry and exit. As |
| /// the parser recurses throgh the rules it will reach a steady state |
| /// of the maximum number of allocated streams, which instead of |
| /// deallocating them at rule exit, it will place on this stack for |
| /// reuse. The streams are then all finally freed when this stack |
| /// is freed. |
| /// |
| pANTLR3_VECTOR rStreams; |
| |
| } |
| ANTLR3_RECOGNIZER_SHARED_STATE; |
| |
| #ifdef __cplusplus |
| } |
| #endif |
| |
| #endif |
| |
| |