| /** \file |
| * Defines the interface for an ANTLR3 common token stream. Custom token streams should create |
| * one of these and then override any functions by installing their own pointers |
| * to implement the various functions. |
| */ |
| #ifndef _ANTLR3_TOKENSTREAM_HPP |
| #define _ANTLR3_TOKENSTREAM_HPP |
| |
| // [The "BSD licence"] |
| // Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB |
| |
| // |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions |
| // are met: |
| // 1. Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // 2. Redistributions in binary form must reproduce the above copyright |
| // notice, this list of conditions and the following disclaimer in the |
| // documentation and/or other materials provided with the distribution. |
| // 3. The name of the author may not be used to endorse or promote products |
| // derived from this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #include "antlr3defs.hpp" |
| |
| /** Definition of a token source, which has a pointer to a function that |
| * returns the next token (using a token factory if it is going to be |
| * efficient) and a pointer to an ANTLR3_INPUT_STREAM. This is slightly |
| * different to the Java interface because we have no way to implement |
| * multiple interfaces without defining them in the interface structure |
| * or casting (void *), which is too convoluted. |
| */ |
| ANTLR_BEGIN_NAMESPACE() |
| |
| //We are not making it subclass AllocPolicy, as this will always be a base class |
| template<class ImplTraits> |
| class TokenSource |
| { |
| public: |
| typedef typename ImplTraits::CommonTokenType TokenType; |
| typedef TokenType CommonTokenType; |
| typedef typename ImplTraits::StringType StringType; |
| typedef typename ImplTraits::LexerType LexerType; |
| |
| private: |
| /** A special pre-allocated token, which signifies End Of Tokens. Because this must |
| * be set up with the current input index and so on, we embed the structure and |
| * return the address of it. It is marked as factoryMade, so that it is never |
| * attempted to be freed. |
| */ |
| TokenType m_eofToken; |
| |
| /// A special pre-allocated token, which is returned by mTokens() if the |
| /// lexer rule said to just skip the generated token altogether. |
| /// Having this single token stops us wasting memory by have the token factory |
| /// actually create something that we are going to SKIP(); anyway. |
| /// |
| TokenType m_skipToken; |
| |
| /** When the token source is constructed, it is populated with the file |
| * name from whence the tokens were produced by the lexer. This pointer is a |
| * copy of the one supplied by the CharStream (and may be NULL) so should |
| * not be manipulated other than to copy or print it. |
| */ |
| StringType m_fileName; |
| |
| public: |
| TokenType& get_eofToken(); |
| const TokenType& get_eofToken() const; |
| TokenType& get_skipToken(); |
| StringType& get_fileName(); |
| LexerType* get_super(); |
| |
| void set_fileName( const StringType& fileName ); |
| |
| /** |
| * \brief |
| * Default implementation of the nextToken() call for a lexer. |
| * |
| * \param toksource |
| * Points to the implementation of a token source. The lexer is |
| * addressed by the super structure pointer. |
| * |
| * \returns |
| * The next token in the current input stream or the EOF token |
| * if there are no more tokens in any input stream in the stack. |
| * |
| * Write detailed description for nextToken here. |
| * |
| * \remarks |
| * Write remarks for nextToken here. |
| * |
| * \see nextTokenStr |
| */ |
| TokenType* nextToken(); |
| CommonTokenType* nextToken( BoolForwarder<true> /*isFiltered*/ ); |
| CommonTokenType* nextToken( BoolForwarder<false> /*isFiltered*/ ); |
| |
| /// |
| /// \brief |
| /// Returns the next available token from the current input stream. |
| /// |
| /// \param toksource |
| /// Points to the implementation of a token source. The lexer is |
| /// addressed by the super structure pointer. |
| /// |
| /// \returns |
| /// The next token in the current input stream or the EOF token |
| /// if there are no more tokens. |
| /// |
| /// \remarks |
| /// Write remarks for nextToken here. |
| /// |
| /// \see nextToken |
| /// |
| TokenType* nextTokenStr(); |
| |
| protected: |
| TokenSource(); |
| }; |
| |
| /** Definition of the ANTLR3 common token stream interface. |
| * \remark |
| * Much of the documentation for this interface is stolen from Ter's Java implementation. |
| */ |
| template<class ImplTraits> |
| class TokenStream : public ImplTraits::TokenIntStreamType |
| { |
| public: |
| typedef typename ImplTraits::TokenSourceType TokenSourceType; |
| typedef typename ImplTraits::TokenIntStreamType IntStreamType; |
| typedef typename ImplTraits::CommonTokenType TokenType; |
| typedef TokenType UnitType; |
| typedef typename ImplTraits::StringType StringType; |
| typedef typename ImplTraits::DebugEventListenerType DebugEventListenerType; |
| typedef typename ImplTraits::TokenStreamType TokenStreamType; |
| typedef typename ImplTraits::ParserType ComponentType; |
| |
| protected: |
| /** Pointer to the token source for this stream |
| */ |
| TokenSourceType* m_tokenSource; |
| |
| /// Debugger interface, is this is a debugging token stream |
| /// |
| DebugEventListenerType* m_debugger; |
| |
| /// Indicates the initial stream state for dbgConsume() |
| /// |
| bool m_initialStreamState; |
| |
| public: |
| TokenStream(TokenSourceType* source, DebugEventListenerType* debugger); |
| IntStreamType* get_istream(); |
| TokenSourceType* get_tokenSource() const; |
| void set_tokenSource( TokenSourceType* tokenSource ); |
| |
| /** Get Token at current input pointer + i ahead where i=1 is next Token. |
| * i<0 indicates tokens in the past. So -1 is previous token and -2 is |
| * two tokens ago. LT(0) is undefined. For i>=n, return Token.EOFToken. |
| * Return null for LT(0) and any index that results in an absolute address |
| * that is negative. |
| */ |
| const TokenType* _LT(ANTLR_INT32 k); |
| |
| /** Where is this stream pulling tokens from? This is not the name, but |
| * a pointer into an interface that contains a ANTLR3_TOKEN_SOURCE interface. |
| * The Token Source interface contains a pointer to the input stream and a pointer |
| * to a function that returns the next token. |
| */ |
| TokenSourceType* getTokenSource(); |
| |
| /** Function that installs a token source for teh stream |
| */ |
| void setTokenSource(TokenSourceType* tokenSource); |
| |
| /** Return the text of all the tokens in the stream, as the old tramp in |
| * Leeds market used to say; "Get the lot!" |
| */ |
| StringType toString(); |
| |
| /** Return the text of all tokens from start to stop, inclusive. |
| * If the stream does not buffer all the tokens then it can just |
| * return an empty ANTLR3_STRING or NULL; Grammars should not access $ruleLabel.text in |
| * an action in that case. |
| */ |
| StringType toStringSS(ANTLR_MARKER start, ANTLR_MARKER stop); |
| |
| /** Because the user is not required to use a token with an index stored |
| * in it, we must provide a means for two token objects themselves to |
| * indicate the start/end location. Most often this will just delegate |
| * to the other toString(int,int). This is also parallel with |
| * the pTREENODE_STREAM->toString(Object,Object). |
| */ |
| StringType toStringTT(const TokenType* start, const TokenType* stop); |
| |
| |
| /** Function that sets the token stream into debugging mode |
| */ |
| void setDebugListener(DebugEventListenerType* debugger); |
| |
| TokenStream(); |
| |
| }; |
| |
| /** Common token stream is an implementation of ANTLR_TOKEN_STREAM for the default |
| * parsers and recognizers. You may of course build your own implementation if |
| * you are so inclined. |
| */ |
| template<bool TOKENS_ACCESSED_FROM_OWNING_RULE, class ListType, class MapType> |
| class TokenStoreSelector |
| { |
| public: |
| typedef ListType TokensType; |
| }; |
| |
| template<class ListType, class MapType> |
| class TokenStoreSelector<true, ListType, MapType> |
| { |
| public: |
| typedef MapType TokensType; |
| }; |
| |
| template<class ImplTraits> |
| class CommonTokenStream : public TokenStream<ImplTraits> |
| { |
| public: |
| typedef typename ImplTraits::AllocPolicyType AllocPolicyType; |
| typedef typename ImplTraits::BitsetType BitsetType; |
| typedef typename ImplTraits::CommonTokenType TokenType; |
| typedef typename ImplTraits::TokenSourceType TokenSourceType; |
| typedef typename ImplTraits::DebugEventListenerType DebugEventListenerType; |
| typedef typename AllocPolicyType::template ListType<TokenType> TokensListType; |
| typedef typename AllocPolicyType::template OrderedMapType<ANTLR_MARKER, TokenType> TokensMapType; |
| typedef typename TokenStoreSelector< ImplTraits::TOKENS_ACCESSED_FROM_OWNING_RULE, |
| TokensListType, TokensMapType >::TokensType TokensType; |
| |
| typedef typename AllocPolicyType::template UnOrderedMapType<ANTLR_UINT32, ANTLR_UINT32> ChannelOverridesType; |
| typedef typename AllocPolicyType::template OrderedSetType<ANTLR_UINT32> DiscardSetType; |
| typedef typename AllocPolicyType::template ListType<ANTLR_UINT32> IntListType; |
| typedef TokenStream<ImplTraits> BaseType; |
| |
| private: |
| /** Records every single token pulled from the source indexed by the token index. |
| * There might be more efficient ways to do this, such as referencing directly in to |
| * the token factory pools, but for now this is convenient and the ANTLR3_LIST is not |
| * a huge overhead as it only stores pointers anyway, but allows for iterations and |
| * so on. |
| */ |
| TokensType m_tokens; |
| |
| /** Override map of tokens. If a token type has an entry in here, then |
| * the pointer in the table points to an int, being the override channel number |
| * that should always be used for this token type. |
| */ |
| ChannelOverridesType m_channelOverrides; |
| |
| /** Discared set. If a token has an entry in this table, then it is thrown |
| * away (data pointer is always NULL). |
| */ |
| DiscardSetType m_discardSet; |
| |
| /* The channel number that this token stream is tuned to. For instance, whitespace |
| * is usually tuned to channel 99, which no token stream would normally tune to and |
| * so it is thrown away. |
| */ |
| ANTLR_UINT32 m_channel; |
| |
| /** The index into the tokens list of the current token (the next one that will be |
| * consumed. p = -1 indicates that the token list is empty. |
| */ |
| ANTLR_INT32 m_p; |
| |
| /* The total number of tokens issued till now. For streams that delete tokens, |
| this helps in issuing the index |
| */ |
| ANTLR_UINT32 m_nissued; |
| |
| /** If this flag is set to true, then tokens that the stream sees that are not |
| * in the channel that this stream is tuned to, are not tracked in the |
| * tokens table. When set to false, ALL tokens are added to the tracking. |
| */ |
| bool m_discardOffChannel; |
| |
| public: |
| CommonTokenStream(ANTLR_UINT32 hint, TokenSourceType* source = NULL, |
| DebugEventListenerType* debugger = NULL); |
| ~CommonTokenStream(); |
| TokensType& get_tokens(); |
| const TokensType& get_tokens() const; |
| DiscardSetType& get_discardSet(); |
| const DiscardSetType& get_discardSet() const; |
| ANTLR_INT32 get_p() const; |
| void set_p( ANTLR_INT32 p ); |
| void inc_p(); |
| void dec_p(); |
| |
| /** A simple filter mechanism whereby you can tell this token stream |
| * to force all tokens of type ttype to be on channel. For example, |
| * when interpreting, we cannot exec actions so we need to tell |
| * the stream to force all WS and NEWLINE to be a different, ignored |
| * channel. |
| */ |
| void setTokenTypeChannel(ANTLR_UINT32 ttype, ANTLR_UINT32 channel); |
| |
| /** Add a particular token type to the discard set. If a token is found to belong |
| * to this set, then it is skipped/thrown away |
| */ |
| void discardTokenType(ANTLR_INT32 ttype); |
| |
| //This will discard tokens of a particular rule after the rule execution completion |
| void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop ); |
| void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop, |
| BoolForwarder<true> tokens_accessed_from_owning_rule ); |
| void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop, |
| BoolForwarder<false> tokens_accessed_from_owning_rule ); |
| |
| void insertToken( const TokenType& tok ); |
| void insertToken( const TokenType& tok, BoolForwarder<true> tokens_accessed_from_owning_rule ); |
| void insertToken( const TokenType& tok, BoolForwarder<false> tokens_accessed_from_owning_rule ); |
| |
| /** Get a token at an absolute index i; 0..n-1. This is really only |
| * needed for profiling and debugging and token stream rewriting. |
| * If you don't want to buffer up tokens, then this method makes no |
| * sense for you. Naturally you can't use the rewrite stream feature. |
| * I believe DebugTokenStream can easily be altered to not use |
| * this method, removing the dependency. |
| */ |
| const TokenType* get(ANTLR_MARKER i); |
| const TokenType* getToken(ANTLR_MARKER i); |
| const TokenType* getToken( ANTLR_MARKER tok_idx, BoolForwarder<true> tokens_accessed_from_owning_rule ); |
| const TokenType* getToken( ANTLR_MARKER tok_idx, BoolForwarder<false> tokens_accessed_from_owning_rule ); |
| |
| /** Signal to discard off channel tokens from here on in. |
| */ |
| void discardOffChannelToks(bool discard); |
| |
| /** Function that returns a pointer to the ANTLR3_LIST of all tokens |
| * in the stream (this causes the buffer to fill if we have not get any yet) |
| */ |
| TokensType* getTokens(); |
| |
| /** Function that returns all the tokens between a start and a stop index. |
| */ |
| void getTokenRange(ANTLR_UINT32 start, ANTLR_UINT32 stop, TokensListType& tokenRange); |
| |
| /** Function that returns all the tokens indicated by the specified bitset, within a range of tokens |
| */ |
| void getTokensSet(ANTLR_UINT32 start, ANTLR_UINT32 stop, BitsetType* types, TokensListType& tokenSet); |
| |
| /** Function that returns all the tokens indicated by being a member of the supplied List |
| */ |
| void getTokensList(ANTLR_UINT32 start, ANTLR_UINT32 stop, |
| const IntListType& list, TokensListType& tokenList); |
| |
| /** Function that returns all tokens of a certain type within a range. |
| */ |
| void getTokensType(ANTLR_UINT32 start, ANTLR_UINT32 stop, ANTLR_UINT32 type, TokensListType& tokens); |
| |
| /** Function that resets the token stream so that it can be reused, but |
| * but that does not free up any resources, such as the token factory |
| * the factory pool and so on. This prevents the need to keep freeing |
| * and reallocating the token pools if the thing you are building is |
| * a multi-shot dameon or somethign like that. It is much faster to |
| * just reuse all the vectors. |
| */ |
| void reset(); |
| |
| const TokenType* LB(ANTLR_INT32 k); |
| |
| |
| void fillBufferExt(); |
| void fillBuffer(); |
| |
| bool hasReachedFillbufferTarget( ANTLR_UINT32 cnt, BoolForwarder<true> tokens_accessed_from_owning_rule ); |
| bool hasReachedFillbufferTarget( ANTLR_UINT32 cnt, BoolForwarder<false> tokens_accessed_from_owning_rule ); |
| |
| ANTLR_UINT32 skipOffTokenChannels(ANTLR_INT32 i); |
| ANTLR_UINT32 skipOffTokenChannelsReverse(ANTLR_INT32 x); |
| ANTLR_MARKER index_impl(); |
| }; |
| |
| class TokenAccessException : public std::exception |
| { |
| virtual const char* what() const throw() |
| { |
| return " Attempted access on Deleted Token"; |
| } |
| }; |
| |
| ANTLR_END_NAMESPACE() |
| |
| #include "antlr3tokenstream.inl" |
| |
| #endif |