| /** \file |
| * \brief Defines the interface for a common token. |
| * |
| * All token streams should provide their tokens using an instance |
| * of this common token. A custom pointer is provided, wher you may attach |
| * a further structure to enhance the common token if you feel the need |
| * to do so. The C runtime will assume that a token provides implementations |
| * of the interface functions, but all of them may be rplaced by your own |
| * implementation if you require it. |
| */ |
| #ifndef _ANTLR3_COMMON_TOKEN_H |
| #define _ANTLR3_COMMON_TOKEN_H |
| |
| // [The "BSD licence"] |
| // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC |
| // http://www.temporal-wave.com |
| // http://www.linkedin.com/in/jimidle |
| // |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions |
| // are met: |
| // 1. Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // 2. Redistributions in binary form must reproduce the above copyright |
| // notice, this list of conditions and the following disclaimer in the |
| // documentation and/or other materials provided with the distribution. |
| // 3. The name of the author may not be used to endorse or promote products |
| // derived from this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #include <antlr3defs.h> |
| |
| /** How many tokens to allocate at once in the token factory |
| */ |
| #define ANTLR3_FACTORY_POOL_SIZE 1024 |
| |
| /* Base token types, which all lexer/parser tokens come after in sequence. |
| */ |
| |
| /** Indicator of an invalid token |
| */ |
| #define ANTLR3_TOKEN_INVALID 0 |
| |
| #define ANTLR3_EOR_TOKEN_TYPE 1 |
| |
| /** Imaginary token type to cause a traversal of child nodes in a tree parser |
| */ |
| #define ANTLR3_TOKEN_DOWN 2 |
| |
| /** Imaginary token type to signal the end of a stream of child nodes. |
| */ |
| #define ANTLR3_TOKEN_UP 3 |
| |
| /** First token that can be used by users/generated code |
| */ |
| |
| #define ANTLR3_MIN_TOKEN_TYPE ANTLR3_TOKEN_UP + 1 |
| |
| /** End of file token |
| */ |
| #define ANTLR3_TOKEN_EOF (ANTLR3_CHARSTREAM_EOF & 0xFFFFFFFF) |
| |
| /** Default channel for a token |
| */ |
| #define ANTLR3_TOKEN_DEFAULT_CHANNEL 0 |
| |
| /** Reserved channel number for a HIDDEN token - a token that |
| * is hidden from the parser. |
| */ |
| #define HIDDEN 99 |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| |
| // Indicates whether this token is carrying: |
| // |
| // State | Meaning |
| // ------+-------------------------------------- |
| // 0 | Nothing (neither rewrite text, nor setText) |
| // 1 | char * to user supplied rewrite text |
| // 2 | pANTLR3_STRING because of setText or similar action |
| // |
| #define ANTLR3_TEXT_NONE 0 |
| #define ANTLR3_TEXT_CHARP 1 |
| #define ANTLR3_TEXT_STRING 2 |
| |
| /** The definition of an ANTLR3 common token structure, which all implementations |
| * of a token stream should provide, installing any further structures in the |
| * custom pointer element of this structure. |
| * |
| * \remark |
| * Token streams are in essence provided by lexers or other programs that serve |
| * as lexers. |
| */ |
| typedef struct ANTLR3_COMMON_TOKEN_struct |
| { |
| /** The actual type of this token |
| */ |
| ANTLR3_UINT32 type; |
| |
| /** Indicates that a token was produced from the token factory and therefore |
| * the the freeToken() method should not do anything itself because |
| * token factory is responsible for deleting it. |
| */ |
| ANTLR3_BOOLEAN factoryMade; |
| |
| /// A string factory that we can use if we ever need the text of a token |
| /// and need to manufacture a pANTLR3_STRING |
| /// |
| pANTLR3_STRING_FACTORY strFactory; |
| |
| /** The line number in the input stream where this token was derived from |
| */ |
| ANTLR3_UINT32 line; |
| |
| /** The offset into the input stream that the line in which this |
| * token resides starts. |
| */ |
| void * lineStart; |
| |
| /** The character position in the line that this token was derived from |
| */ |
| ANTLR3_INT32 charPosition; |
| |
| /** The virtual channel that this token exists in. |
| */ |
| ANTLR3_UINT32 channel; |
| |
| /** Pointer to the input stream that this token originated in. |
| */ |
| pANTLR3_INPUT_STREAM input; |
| |
| /** What the index of this token is, 0, 1, .., n-2, n-1 tokens |
| */ |
| ANTLR3_MARKER index; |
| |
| /** The character offset in the input stream where the text for this token |
| * starts. |
| */ |
| ANTLR3_MARKER start; |
| |
| /** The character offset in the input stream where the text for this token |
| * stops. |
| */ |
| ANTLR3_MARKER stop; |
| |
| /// Indicates whether this token is carrying: |
| /// |
| /// State | Meaning |
| /// ------+-------------------------------------- |
| /// 0 | Nothing (neither rewrite text, nor setText) |
| /// 1 | char * to user supplied rewrite text |
| /// 2 | pANTLR3_STRING because of setText or similar action |
| /// |
| /// Affects the union structure tokText below |
| /// (uses 32 bit so alignment is always good) |
| /// |
| ANTLR3_UINT32 textState; |
| |
| union |
| { |
| /// Pointer that is used when the token just has a pointer to |
| /// a char *, such as when a rewrite of an imaginary token supplies |
| /// a string in the grammar. No sense in constructing a pANTLR3_STRING just |
| /// for that, as mostly the text will not be accessed - if it is, then |
| /// we will build a pANTLR3_STRING for it a that point. |
| /// |
| pANTLR3_UCHAR chars; |
| |
| /// Some token types actually do carry around their associated text, hence |
| /// (*getText)() will return this pointer if it is not NULL |
| /// |
| pANTLR3_STRING text; |
| } |
| tokText; |
| |
| /** Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN |
| * as the standard structure for a token, a number of user programmable |
| * elements are allowed in a token. This is one of them. |
| */ |
| ANTLR3_UINT32 user1; |
| |
| /** Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN |
| * as the standard structure for a token, a number of user programmable |
| * elements are allowed in a token. This is one of them. |
| */ |
| ANTLR3_UINT32 user2; |
| |
| /** Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN |
| * as the standard structure for a token, a number of user programmable |
| * elements are allowed in a token. This is one of them. |
| */ |
| ANTLR3_UINT32 user3; |
| |
| /** Pointer to a custom element that the ANTLR3 programmer may define and install |
| */ |
| void * custom; |
| |
| /** Pointer to a function that knows how to free the custom structure when the |
| * token is destroyed. |
| */ |
| void (*freeCustom)(void * custom); |
| |
| /* ============================== |
| * API |
| */ |
| |
| /** Pointer to function that returns the text pointer of a token, use |
| * toString() if you want a pANTLR3_STRING version of the token. |
| */ |
| pANTLR3_STRING (*getText)(struct ANTLR3_COMMON_TOKEN_struct * token); |
| |
| /** Pointer to a function that 'might' be able to set the text associated |
| * with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually |
| * do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actaully have |
| * strings associated with them but just point into the current input stream. These |
| * tokens will implement this function with a function that errors out (probably |
| * drastically. |
| */ |
| void (*setText)(struct ANTLR3_COMMON_TOKEN_struct * token, pANTLR3_STRING text); |
| |
| /** Pointer to a function that 'might' be able to set the text associated |
| * with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually |
| * do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actully have |
| * strings associated with them but just point into the current input stream. These |
| * tokens will implement this function with a function that errors out (probably |
| * drastically. |
| */ |
| void (*setText8)(struct ANTLR3_COMMON_TOKEN_struct * token, pANTLR3_UINT8 text); |
| |
| /** Pointer to a function that returns the token type of this token |
| */ |
| ANTLR3_UINT32 (*getType)(struct ANTLR3_COMMON_TOKEN_struct * token); |
| |
| /** Pointer to a function that sets the type of this token |
| */ |
| void (*setType)(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 ttype); |
| |
| /** Pointer to a function that gets the 'line' number where this token resides |
| */ |
| ANTLR3_UINT32 (*getLine)(struct ANTLR3_COMMON_TOKEN_struct * token); |
| |
| /** Pointer to a function that sets the 'line' number where this token reside |
| */ |
| void (*setLine)(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 line); |
| |
| /** Pointer to a function that gets the offset in the line where this token exists |
| */ |
| ANTLR3_INT32 (*getCharPositionInLine) (struct ANTLR3_COMMON_TOKEN_struct * token); |
| |
| /** Pointer to a function that sets the offset in the line where this token exists |
| */ |
| void (*setCharPositionInLine) (struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_INT32 pos); |
| |
| /** Pointer to a function that gets the channel that this token was placed in (parsers |
| * can 'tune' to these channels. |
| */ |
| ANTLR3_UINT32 (*getChannel) (struct ANTLR3_COMMON_TOKEN_struct * token); |
| |
| /** Pointer to a function that sets the channel that this token should belong to |
| */ |
| void (*setChannel) (struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 channel); |
| |
| /** Pointer to a function that returns an index 0...n-1 of the token in the token |
| * input stream. |
| */ |
| ANTLR3_MARKER (*getTokenIndex) (struct ANTLR3_COMMON_TOKEN_struct * token); |
| |
| /** Pointer to a function that can set the token index of this token in the token |
| * input stream. |
| */ |
| void (*setTokenIndex) (struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER); |
| |
| /** Pointer to a function that gets the start index in the input stream for this token. |
| */ |
| ANTLR3_MARKER (*getStartIndex) (struct ANTLR3_COMMON_TOKEN_struct * token); |
| |
| /** Pointer to a function that sets the start index in the input stream for this token. |
| */ |
| void (*setStartIndex) (struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER index); |
| |
| /** Pointer to a function that gets the stop index in the input stream for this token. |
| */ |
| ANTLR3_MARKER (*getStopIndex) (struct ANTLR3_COMMON_TOKEN_struct * token); |
| |
| /** Pointer to a function that sets the stop index in the input stream for this token. |
| */ |
| void (*setStopIndex) (struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER index); |
| |
| /** Pointer to a function that returns this token as a text representation that can be |
| * printed with embedded control codes such as \n replaced with the printable sequence "\\n" |
| * This also yields a string structure that can be used more easily than the pointer to |
| * the input stream in certain situations. |
| */ |
| pANTLR3_STRING (*toString) (struct ANTLR3_COMMON_TOKEN_struct * token); |
| } |
| ANTLR3_COMMON_TOKEN; |
| |
| /** \brief ANTLR3 Token factory interface to create lots of tokens efficiently |
| * rather than creating and freeing lots of little bits of memory. |
| */ |
| typedef struct ANTLR3_TOKEN_FACTORY_struct |
| { |
| /** Pointers to the array of tokens that this factory has produced so far |
| */ |
| pANTLR3_COMMON_TOKEN *pools; |
| |
| /** Current pool tokens we are allocating from |
| */ |
| ANTLR3_INT32 thisPool; |
| |
| /** Maximum pool count we have available |
| */ |
| ANTLR3_INT32 maxPool; |
| |
| /** The next token to throw out from the pool, will cause a new pool allocation |
| * if this exceeds the available tokenCount |
| */ |
| ANTLR3_UINT32 nextToken; |
| |
| /** Trick to initialize tokens and their API quickly, we set up this token when the |
| * factory is created, then just copy the memory it uses into the new token. |
| */ |
| ANTLR3_COMMON_TOKEN unTruc; |
| |
| /** Pointer to an input stream that is using this token factory (may be NULL) |
| * which will be assigned to the tokens automatically. |
| */ |
| pANTLR3_INPUT_STREAM input; |
| |
| /** Pointer to a function that returns a new token |
| */ |
| pANTLR3_COMMON_TOKEN (*newToken) (struct ANTLR3_TOKEN_FACTORY_struct * factory); |
| |
| /** Pointer to a function that resets the factory so you can reuse the pools it |
| * has laready allocated |
| */ |
| void (*reset) (struct ANTLR3_TOKEN_FACTORY_struct * factory); |
| |
| /** Pointer to a function that changes teh curent inptu stream so that |
| * new tokens are created with reference to their originating text. |
| */ |
| void (*setInputStream) (struct ANTLR3_TOKEN_FACTORY_struct * factory, pANTLR3_INPUT_STREAM input); |
| /** Pointer to a function the destroys the factory |
| */ |
| void (*close) (struct ANTLR3_TOKEN_FACTORY_struct * factory); |
| } |
| ANTLR3_TOKEN_FACTORY; |
| |
| #ifdef __cplusplus |
| } |
| #endif |
| |
| #endif |