| /** \file |
| * While the C runtime does not need to model the state of |
| * multiple lexers and parsers in the same way as the Java runtime does |
| * it is no overhead to reflect that model. In fact the |
| * C runtime has always been able to share recognizer state. |
| * |
| * This 'class' therefore defines all the elements of a recognizer |
| * (either lexer, parser or tree parser) that are need to |
| * track the current recognition state. Multiple recognizers |
| * may then share this state, for instance when one grammar |
| * imports another. |
| */ |
| |
| #ifndef _ANTLR3_RECOGNIZER_SHARED_STATE_HPP |
| #define _ANTLR3_RECOGNIZER_SHARED_STATE_HPP |
| |
| // [The "BSD licence"] |
| // Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB |
| |
| // |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions |
| // are met: |
| // 1. Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // 2. Redistributions in binary form must reproduce the above copyright |
| // notice, this list of conditions and the following disclaimer in the |
| // documentation and/or other materials provided with the distribution. |
| // 3. The name of the author may not be used to endorse or promote products |
| // derived from this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #include "antlr3defs.hpp" |
| |
| ANTLR_BEGIN_NAMESPACE() |
| |
| /** All the data elements required to track the current state |
| * of any recognizer (lexer, parser, tree parser). |
| * May be share between multiple recognizers such that |
| * grammar inheritance is easily supported. |
| */ |
| template<class ImplTraits, class StreamType> |
| class RecognizerSharedState : public ImplTraits::AllocPolicyType |
| { |
| public: |
| typedef typename ImplTraits::AllocPolicyType AllocPolicyType; |
| typedef typename StreamType::UnitType TokenType; |
| typedef typename ImplTraits::CommonTokenType CommonTokenType; |
| |
| typedef typename ComponentTypeFinder<ImplTraits, StreamType>::ComponentType ComponentType; |
| typedef typename ImplTraits::template RewriteStreamType< ComponentType > RewriteStreamType; |
| typedef typename ImplTraits::StringType StringType; |
| typedef typename ImplTraits::TokenSourceType TokenSourceType; |
| typedef typename ImplTraits::template ExceptionBaseType<StreamType> ExceptionBaseType; |
| typedef typename ImplTraits::BitsetType BitsetType; |
| typedef typename ImplTraits::BitsetListType BitsetListType; |
| |
| typedef typename AllocPolicyType::template StackType< BitsetListType > FollowingType; |
| typedef typename AllocPolicyType::template StackType< typename ImplTraits::InputStreamType* > InputStreamsType; |
| typedef InputStreamsType StreamsType; |
| typedef typename AllocPolicyType::template VectorType<RewriteStreamType> RewriteStreamsType; |
| |
| typedef IntTrie<ImplTraits, ANTLR_MARKER> RuleListType; |
| typedef IntTrie<ImplTraits, RuleListType*> RuleMemoType; |
| |
| private: |
| /** Points to the first in a possible chain of exceptions that the |
| * recognizer has discovered. |
| */ |
| ExceptionBaseType* m_exception; |
| |
| |
| /** Track the set of token types that can follow any rule invocation. |
| * Stack structure, to support: List<BitSet>. |
| */ |
| FollowingType m_following; |
| |
| /** Track around a hint from the creator of the recognizer as to how big this |
| * thing is going to get, as the actress said to the bishop. This allows us |
| * to tune hash tables accordingly. This might not be the best place for this |
| * in the end but we will see. |
| */ |
| ANTLR_UINT32 m_sizeHint; |
| |
| |
| /** If set to true then the recognizer has an exception |
| * condition (this is tested by the generated code for the rules of |
| * the grammar). |
| */ |
| bool m_error; |
| |
| |
| /** This is true when we see an error and before having successfully |
| * matched a token. Prevents generation of more than one error message |
| * per error. |
| */ |
| bool m_errorRecovery; |
| |
| /** In lieu of a return value, this indicates that a rule or token |
| * has failed to match. Reset to false upon valid token match. |
| */ |
| bool m_failed; |
| |
| /* |
| Instead of allocating CommonTokenType, we do it in the stack. hence we need a null indicator |
| */ |
| bool m_token_present; |
| |
| /** The index into the input stream where the last error occurred. |
| * This is used to prevent infinite loops where an error is found |
| * but no token is consumed during recovery...another error is found, |
| * ad nauseam. This is a failsafe mechanism to guarantee that at least |
| * one token/tree node is consumed for two errors. |
| */ |
| ANTLR_MARKER m_lastErrorIndex; |
| |
| /** When the recognizer terminates, the error handling functions |
| * will have incremented this value if any error occurred (that was displayed). It can then be |
| * used by the grammar programmer without having to use static globals. |
| */ |
| ANTLR_UINT32 m_errorCount; |
| |
| /** If 0, no backtracking is going on. Safe to exec actions etc... |
| * If >0 then it's the level of backtracking. |
| */ |
| ANTLR_INT32 m_backtracking; |
| |
| /** ANTLR3_VECTOR of ANTLR3_LIST for rule memoizing. |
| * Tracks the stop token index for each rule. ruleMemo[ruleIndex] is |
| * the memoization table for ruleIndex. For key ruleStartIndex, you |
| * get back the stop token for associated rule or MEMO_RULE_FAILED. |
| * |
| * This is only used if rule memoization is on. |
| */ |
| RuleMemoType* m_ruleMemo; |
| |
| /** Pointer to an array of token names |
| * that are generally useful in error reporting. The generated parsers install |
| * this pointer. The table it points to is statically allocated as 8 bit ascii |
| * at parser compile time - grammar token names are thus restricted in character |
| * sets, which does not seem to terrible. |
| */ |
| ANTLR_UINT8** m_tokenNames; |
| |
| /** The goal of all lexer rules/methods is to create a token object. |
| * This is an instance variable as multiple rules may collaborate to |
| * create a single token. For example, NUM : INT | FLOAT ; |
| * In this case, you want the INT or FLOAT rule to set token and not |
| * have it reset to a NUM token in rule NUM. |
| */ |
| CommonTokenType m_token; |
| |
| /** A lexer is a source of tokens, produced by all the generated (or |
| * hand crafted if you like) matching rules. As such it needs to provide |
| * a token source interface implementation. For others, this will become a empty class |
| */ |
| TokenSourceType* m_tokSource; |
| |
| /** The channel number for the current token |
| */ |
| ANTLR_UINT32 m_channel; |
| |
| /** The token type for the current token |
| */ |
| ANTLR_UINT32 m_type; |
| |
| /** The input line (where it makes sense) on which the first character of the current |
| * token resides. |
| */ |
| ANTLR_INT32 m_tokenStartLine; |
| |
| /** The character position of the first character of the current token |
| * within the line specified by tokenStartLine |
| */ |
| ANTLR_INT32 m_tokenStartCharPositionInLine; |
| |
| /** What character index in the stream did the current token start at? |
| * Needed, for example, to get the text for current token. Set at |
| * the start of nextToken. |
| */ |
| ANTLR_MARKER m_tokenStartCharIndex; |
| |
| /** Text for the current token. This can be overridden by setting this |
| * variable directly or by using the SETTEXT() macro (preferred) in your |
| * lexer rules. |
| */ |
| StringType m_text; |
| |
| /** Input stream stack, which allows the C programmer to switch input streams |
| * easily and allow the standard nextToken() implementation to deal with it |
| * as this is a common requirement. |
| */ |
| InputStreamsType m_streams; |
| |
| public: |
| RecognizerSharedState(); |
| ExceptionBaseType* get_exception() const; |
| FollowingType& get_following(); |
| ANTLR_UINT32 get_sizeHint() const; |
| bool get_error() const; |
| bool get_errorRecovery() const; |
| bool get_failed() const; |
| bool get_token_present() const; |
| ANTLR_MARKER get_lastErrorIndex() const; |
| ANTLR_UINT32 get_errorCount() const; |
| ANTLR_INT32 get_backtracking() const; |
| RuleMemoType* get_ruleMemo() const; |
| ANTLR_UINT8** get_tokenNames() const; |
| ANTLR_UINT8* get_tokenName( ANTLR_UINT32 i ) const; |
| CommonTokenType* get_token(); |
| TokenSourceType* get_tokSource() const; |
| ANTLR_UINT32& get_channel(); |
| ANTLR_UINT32 get_type() const; |
| ANTLR_INT32 get_tokenStartLine() const; |
| ANTLR_INT32 get_tokenStartCharPositionInLine() const; |
| ANTLR_MARKER get_tokenStartCharIndex() const; |
| StringType& get_text(); |
| InputStreamsType& get_streams(); |
| |
| void set_following( const FollowingType& following ); |
| void set_sizeHint( ANTLR_UINT32 sizeHint ); |
| void set_error( bool error ); |
| void set_errorRecovery( bool errorRecovery ); |
| void set_failed( bool failed ); |
| void set_token_present(bool token_present); |
| void set_lastErrorIndex( ANTLR_MARKER lastErrorIndex ); |
| void set_errorCount( ANTLR_UINT32 errorCount ); |
| void set_backtracking( ANTLR_INT32 backtracking ); |
| void set_ruleMemo( RuleMemoType* ruleMemo ); |
| void set_tokenNames( ANTLR_UINT8** tokenNames ); |
| void set_tokSource( TokenSourceType* tokSource ); |
| void set_channel( ANTLR_UINT32 channel ); |
| void set_exception( ExceptionBaseType* exception ); |
| void set_type( ANTLR_UINT32 type ); |
| void set_token( const CommonTokenType* tok); |
| void set_tokenStartLine( ANTLR_INT32 tokenStartLine ); |
| void set_tokenStartCharPositionInLine( ANTLR_INT32 tokenStartCharPositionInLine ); |
| void set_tokenStartCharIndex( ANTLR_MARKER tokenStartCharIndex ); |
| void set_text( const StringType& text ); |
| void set_streams( const InputStreamsType& streams ); |
| |
| void inc_errorCount(); |
| void inc_backtracking(); |
| void dec_backtracking(); |
| }; |
| |
| ANTLR_END_NAMESPACE() |
| |
| #include "antlr3recognizersharedstate.inl" |
| |
| #endif |
| |
| |