runtime/Cpp/include/antlr3recognizersharedstate.hpp - platform/external/antlr - Git at Google

 /** \file
  * While the C runtime does not need to model the state of
  * multiple lexers and parsers in the same way as the Java runtime does
  * it is no overhead to reflect that model. In fact the
  * C runtime has always been able to share recognizer state.
  *
  * This 'class' therefore defines all the elements of a recognizer
  * (either lexer, parser or tree parser) that are need to
  * track the current recognition state. Multiple recognizers
  * may then share this state, for instance when one grammar
  * imports another.
  */

 #ifndef	_ANTLR3_RECOGNIZER_SHARED_STATE_HPP
 #define	_ANTLR3_RECOGNIZER_SHARED_STATE_HPP

 // [The "BSD licence"]
 // Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB

 //
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
 // are met:
 // 1. Redistributions of source code must retain the above copyright
 //    notice, this list of conditions and the following disclaimer.
 // 2. Redistributions in binary form must reproduce the above copyright
 //    notice, this list of conditions and the following disclaimer in the
 //    documentation and/or other materials provided with the distribution.
 // 3. The name of the author may not be used to endorse or promote products
 //    derived from this software without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #include "antlr3defs.hpp"

 ANTLR_BEGIN_NAMESPACE()

 /** All the data elements required to track the current state
  *  of any recognizer (lexer, parser, tree parser).
  * May be share between multiple recognizers such that
  * grammar inheritance is easily supported.
  */
 template<class ImplTraits, class StreamType>
 class RecognizerSharedState  : public ImplTraits::AllocPolicyType
 {
 public:
 	typedef typename ImplTraits::AllocPolicyType AllocPolicyType;
 	typedef typename StreamType::UnitType TokenType;
 	typedef typename ImplTraits::CommonTokenType CommonTokenType;

 	typedef typename ComponentTypeFinder<ImplTraits, StreamType>::ComponentType  ComponentType;
 	typedef typename ImplTraits::template RewriteStreamType< ComponentType > RewriteStreamType;
 	typedef typename ImplTraits::StringType StringType;
 	typedef typename ImplTraits::TokenSourceType TokenSourceType;
 	typedef typename ImplTraits::template ExceptionBaseType<StreamType> ExceptionBaseType;
 	typedef typename ImplTraits::BitsetType BitsetType;
 	typedef typename ImplTraits::BitsetListType BitsetListType;

 	typedef typename AllocPolicyType::template StackType< BitsetListType > FollowingType;
 	typedef typename AllocPolicyType::template StackType< typename ImplTraits::InputStreamType* > InputStreamsType;
 	typedef InputStreamsType StreamsType;
 	typedef typename AllocPolicyType::template VectorType<RewriteStreamType> RewriteStreamsType;

 	typedef IntTrie<ImplTraits, ANTLR_MARKER> RuleListType;
 	typedef IntTrie<ImplTraits, RuleListType*> RuleMemoType;

 private:
 	/** Points to the first in a possible chain of exceptions that the
      *  recognizer has discovered.
      */
     ExceptionBaseType*			m_exception;


     /** Track the set of token types that can follow any rule invocation.
      *  Stack structure, to support: List<BitSet>.
      */
     FollowingType		m_following;

     /** Track around a hint from the creator of the recognizer as to how big this
      *  thing is going to get, as the actress said to the bishop. This allows us
      *  to tune hash tables accordingly. This might not be the best place for this
      *  in the end but we will see.
      */
     ANTLR_UINT32		m_sizeHint;


     /** If set to true then the recognizer has an exception
      * condition (this is tested by the generated code for the rules of
      * the grammar).
      */
     bool				m_error;


     /** This is true when we see an error and before having successfully
      *  matched a token.  Prevents generation of more than one error message
      *  per error.
      */
     bool				m_errorRecovery;

 	/** In lieu of a return value, this indicates that a rule or token
      *  has failed to match.  Reset to false upon valid token match.
      */
     bool				m_failed;

 	/*
 	Instead of allocating CommonTokenType, we do it in the stack. hence we need a null indicator
 	*/
 	bool				m_token_present;

     /** The index into the input stream where the last error occurred.
      * 	This is used to prevent infinite loops where an error is found
      *  but no token is consumed during recovery...another error is found,
      *  ad nauseam.  This is a failsafe mechanism to guarantee that at least
      *  one token/tree node is consumed for two errors.
      */
     ANTLR_MARKER		m_lastErrorIndex;

     /** When the recognizer terminates, the error handling functions
      *  will have incremented this value if any error occurred (that was displayed). It can then be
      *  used by the grammar programmer without having to use static globals.
      */
     ANTLR_UINT32		m_errorCount;

     /** If 0, no backtracking is going on.  Safe to exec actions etc...
      *  If >0 then it's the level of backtracking.
      */
     ANTLR_INT32			m_backtracking;

     /** ANTLR3_VECTOR of ANTLR3_LIST for rule memoizing.
      *  Tracks  the stop token index for each rule.  ruleMemo[ruleIndex] is
      *  the memoization table for ruleIndex.  For key ruleStartIndex, you
      *  get back the stop token for associated rule or MEMO_RULE_FAILED.
      *
      *  This is only used if rule memoization is on.
      */
     RuleMemoType*		m_ruleMemo;

     /** Pointer to an array of token names
      *  that are generally useful in error reporting. The generated parsers install
      *  this pointer. The table it points to is statically allocated as 8 bit ascii
      *  at parser compile time - grammar token names are thus restricted in character
      *  sets, which does not seem to terrible.
      */
     ANTLR_UINT8**		m_tokenNames;

     /** The goal of all lexer rules/methods is to create a token object.
      *  This is an instance variable as multiple rules may collaborate to
      *  create a single token.  For example, NUM : INT | FLOAT ;
      *  In this case, you want the INT or FLOAT rule to set token and not
      *  have it reset to a NUM token in rule NUM.
      */
     CommonTokenType		m_token;

     /** A lexer is a source of tokens, produced by all the generated (or
      *  hand crafted if you like) matching rules. As such it needs to provide
      *  a token source interface implementation. For others, this will become a empty class
      */
     TokenSourceType*	m_tokSource;

     /** The channel number for the current token
      */
     ANTLR_UINT32			m_channel;

     /** The token type for the current token
      */
     ANTLR_UINT32			m_type;

     /** The input line (where it makes sense) on which the first character of the current
      *  token resides.
      */
     ANTLR_INT32			m_tokenStartLine;

     /** The character position of the first character of the current token
      *  within the line specified by tokenStartLine
      */
     ANTLR_INT32		m_tokenStartCharPositionInLine;

     /** What character index in the stream did the current token start at?
      *  Needed, for example, to get the text for current token.  Set at
      *  the start of nextToken.
      */
     ANTLR_MARKER		m_tokenStartCharIndex;

     /** Text for the current token. This can be overridden by setting this
      *  variable directly or by using the SETTEXT() macro (preferred) in your
      *  lexer rules.
      */
     StringType			m_text;

     /** Input stream stack, which allows the C programmer to switch input streams
      *  easily and allow the standard nextToken() implementation to deal with it
      *  as this is a common requirement.
      */
     InputStreamsType	m_streams;

 public:
 	RecognizerSharedState();
 	ExceptionBaseType* get_exception() const;
 	FollowingType& get_following();
 	ANTLR_UINT32 get_sizeHint() const;
 	bool get_error() const;
 	bool get_errorRecovery() const;
 	bool get_failed() const;
 	bool get_token_present() const;
 	ANTLR_MARKER get_lastErrorIndex() const;
 	ANTLR_UINT32 get_errorCount() const;
 	ANTLR_INT32 get_backtracking() const;
 	RuleMemoType* get_ruleMemo() const;
 	ANTLR_UINT8** get_tokenNames() const;
 	ANTLR_UINT8* get_tokenName( ANTLR_UINT32 i ) const;
 	CommonTokenType* get_token();
 	TokenSourceType* get_tokSource() const;
 	ANTLR_UINT32& get_channel();
 	ANTLR_UINT32 get_type() const;
 	ANTLR_INT32 get_tokenStartLine() const;
 	ANTLR_INT32 get_tokenStartCharPositionInLine() const;
 	ANTLR_MARKER get_tokenStartCharIndex() const;
 	StringType& get_text();
 	InputStreamsType& get_streams();

 	void  set_following( const FollowingType& following );
 	void  set_sizeHint( ANTLR_UINT32 sizeHint );
 	void  set_error( bool error );
 	void  set_errorRecovery( bool errorRecovery );
 	void  set_failed( bool failed );
 	void  set_token_present(bool token_present);
 	void  set_lastErrorIndex( ANTLR_MARKER lastErrorIndex );
 	void  set_errorCount( ANTLR_UINT32 errorCount );
 	void  set_backtracking( ANTLR_INT32 backtracking );
 	void  set_ruleMemo( RuleMemoType* ruleMemo );
 	void  set_tokenNames( ANTLR_UINT8** tokenNames );
 	void  set_tokSource( TokenSourceType* tokSource );
 	void  set_channel( ANTLR_UINT32 channel );
 	void  set_exception( ExceptionBaseType* exception );
 	void  set_type( ANTLR_UINT32 type );
 	void  set_token( const CommonTokenType* tok);
 	void  set_tokenStartLine( ANTLR_INT32 tokenStartLine );
 	void  set_tokenStartCharPositionInLine( ANTLR_INT32 tokenStartCharPositionInLine );
 	void  set_tokenStartCharIndex( ANTLR_MARKER tokenStartCharIndex );
 	void  set_text( const StringType& text );
 	void  set_streams( const InputStreamsType& streams );

 	void inc_errorCount();
 	void inc_backtracking();
 	void dec_backtracking();
 };

 ANTLR_END_NAMESPACE()

 #include "antlr3recognizersharedstate.inl"

 #endif
	/** \file
	* While the C runtime does not need to model the state of
	* multiple lexers and parsers in the same way as the Java runtime does
	* it is no overhead to reflect that model. In fact the
	* C runtime has always been able to share recognizer state.
	*
	* This 'class' therefore defines all the elements of a recognizer
	* (either lexer, parser or tree parser) that are need to
	* track the current recognition state. Multiple recognizers
	* may then share this state, for instance when one grammar
	* imports another.
	*/

	#ifndef _ANTLR3_RECOGNIZER_SHARED_STATE_HPP
	#define _ANTLR3_RECOGNIZER_SHARED_STATE_HPP

	// [The "BSD licence"]
	// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB

	//
	// All rights reserved.
	//
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions
	// are met:
	// 1. Redistributions of source code must retain the above copyright
	// notice, this list of conditions and the following disclaimer.
	// 2. Redistributions in binary form must reproduce the above copyright
	// notice, this list of conditions and the following disclaimer in the
	// documentation and/or other materials provided with the distribution.
	// 3. The name of the author may not be used to endorse or promote products
	// derived from this software without specific prior written permission.
	//
	// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	#include "antlr3defs.hpp"

	ANTLR_BEGIN_NAMESPACE()

	/** All the data elements required to track the current state
	* of any recognizer (lexer, parser, tree parser).
	* May be share between multiple recognizers such that
	* grammar inheritance is easily supported.
	*/
	template<class ImplTraits, class StreamType>
	class RecognizerSharedState : public ImplTraits::AllocPolicyType
	{
	public:
	typedef typename ImplTraits::AllocPolicyType AllocPolicyType;
	typedef typename StreamType::UnitType TokenType;
	typedef typename ImplTraits::CommonTokenType CommonTokenType;

	typedef typename ComponentTypeFinder<ImplTraits, StreamType>::ComponentType ComponentType;
	typedef typename ImplTraits::template RewriteStreamType< ComponentType > RewriteStreamType;
	typedef typename ImplTraits::StringType StringType;
	typedef typename ImplTraits::TokenSourceType TokenSourceType;
	typedef typename ImplTraits::template ExceptionBaseType<StreamType> ExceptionBaseType;
	typedef typename ImplTraits::BitsetType BitsetType;
	typedef typename ImplTraits::BitsetListType BitsetListType;

	typedef typename AllocPolicyType::template StackType< BitsetListType > FollowingType;
	typedef typename AllocPolicyType::template StackType< typename ImplTraits::InputStreamType* > InputStreamsType;
	typedef InputStreamsType StreamsType;
	typedef typename AllocPolicyType::template VectorType<RewriteStreamType> RewriteStreamsType;

	typedef IntTrie<ImplTraits, ANTLR_MARKER> RuleListType;
	typedef IntTrie<ImplTraits, RuleListType*> RuleMemoType;

	private:
	/** Points to the first in a possible chain of exceptions that the
	* recognizer has discovered.
	*/
	ExceptionBaseType* m_exception;


	/** Track the set of token types that can follow any rule invocation.
	* Stack structure, to support: List<BitSet>.
	*/
	FollowingType m_following;

	/** Track around a hint from the creator of the recognizer as to how big this
	* thing is going to get, as the actress said to the bishop. This allows us
	* to tune hash tables accordingly. This might not be the best place for this
	* in the end but we will see.
	*/
	ANTLR_UINT32 m_sizeHint;


	/** If set to true then the recognizer has an exception
	* condition (this is tested by the generated code for the rules of
	* the grammar).
	*/
	bool m_error;


	/** This is true when we see an error and before having successfully
	* matched a token. Prevents generation of more than one error message
	* per error.
	*/
	bool m_errorRecovery;

	/** In lieu of a return value, this indicates that a rule or token
	* has failed to match. Reset to false upon valid token match.
	*/
	bool m_failed;

	/*
	Instead of allocating CommonTokenType, we do it in the stack. hence we need a null indicator
	*/
	bool m_token_present;

	/** The index into the input stream where the last error occurred.
	* This is used to prevent infinite loops where an error is found
	* but no token is consumed during recovery...another error is found,
	* ad nauseam. This is a failsafe mechanism to guarantee that at least
	* one token/tree node is consumed for two errors.
	*/
	ANTLR_MARKER m_lastErrorIndex;

	/** When the recognizer terminates, the error handling functions
	* will have incremented this value if any error occurred (that was displayed). It can then be
	* used by the grammar programmer without having to use static globals.
	*/
	ANTLR_UINT32 m_errorCount;

	/** If 0, no backtracking is going on. Safe to exec actions etc...
	* If >0 then it's the level of backtracking.
	*/
	ANTLR_INT32 m_backtracking;

	/** ANTLR3_VECTOR of ANTLR3_LIST for rule memoizing.
	* Tracks the stop token index for each rule. ruleMemo[ruleIndex] is
	* the memoization table for ruleIndex. For key ruleStartIndex, you
	* get back the stop token for associated rule or MEMO_RULE_FAILED.
	*
	* This is only used if rule memoization is on.
	*/
	RuleMemoType* m_ruleMemo;

	/** Pointer to an array of token names
	* that are generally useful in error reporting. The generated parsers install
	* this pointer. The table it points to is statically allocated as 8 bit ascii
	* at parser compile time - grammar token names are thus restricted in character
	* sets, which does not seem to terrible.
	*/
	ANTLR_UINT8** m_tokenNames;

	/** The goal of all lexer rules/methods is to create a token object.
	* This is an instance variable as multiple rules may collaborate to
	* create a single token. For example, NUM : INT \| FLOAT ;
	* In this case, you want the INT or FLOAT rule to set token and not
	* have it reset to a NUM token in rule NUM.
	*/
	CommonTokenType m_token;

	/** A lexer is a source of tokens, produced by all the generated (or
	* hand crafted if you like) matching rules. As such it needs to provide
	* a token source interface implementation. For others, this will become a empty class
	*/
	TokenSourceType* m_tokSource;

	/** The channel number for the current token
	*/
	ANTLR_UINT32 m_channel;

	/** The token type for the current token
	*/
	ANTLR_UINT32 m_type;

	/** The input line (where it makes sense) on which the first character of the current
	* token resides.
	*/
	ANTLR_INT32 m_tokenStartLine;

	/** The character position of the first character of the current token
	* within the line specified by tokenStartLine
	*/
	ANTLR_INT32 m_tokenStartCharPositionInLine;

	/** What character index in the stream did the current token start at?
	* Needed, for example, to get the text for current token. Set at
	* the start of nextToken.
	*/
	ANTLR_MARKER m_tokenStartCharIndex;

	/** Text for the current token. This can be overridden by setting this
	* variable directly or by using the SETTEXT() macro (preferred) in your
	* lexer rules.
	*/
	StringType m_text;

	/** Input stream stack, which allows the C programmer to switch input streams
	* easily and allow the standard nextToken() implementation to deal with it
	* as this is a common requirement.
	*/
	InputStreamsType m_streams;

	public:
	RecognizerSharedState();
	ExceptionBaseType* get_exception() const;
	FollowingType& get_following();
	ANTLR_UINT32 get_sizeHint() const;
	bool get_error() const;
	bool get_errorRecovery() const;
	bool get_failed() const;
	bool get_token_present() const;
	ANTLR_MARKER get_lastErrorIndex() const;
	ANTLR_UINT32 get_errorCount() const;
	ANTLR_INT32 get_backtracking() const;
	RuleMemoType* get_ruleMemo() const;
	ANTLR_UINT8** get_tokenNames() const;
	ANTLR_UINT8* get_tokenName( ANTLR_UINT32 i ) const;
	CommonTokenType* get_token();
	TokenSourceType* get_tokSource() const;
	ANTLR_UINT32& get_channel();
	ANTLR_UINT32 get_type() const;
	ANTLR_INT32 get_tokenStartLine() const;
	ANTLR_INT32 get_tokenStartCharPositionInLine() const;
	ANTLR_MARKER get_tokenStartCharIndex() const;
	StringType& get_text();
	InputStreamsType& get_streams();

	void set_following( const FollowingType& following );
	void set_sizeHint( ANTLR_UINT32 sizeHint );
	void set_error( bool error );
	void set_errorRecovery( bool errorRecovery );
	void set_failed( bool failed );
	void set_token_present(bool token_present);
	void set_lastErrorIndex( ANTLR_MARKER lastErrorIndex );
	void set_errorCount( ANTLR_UINT32 errorCount );
	void set_backtracking( ANTLR_INT32 backtracking );
	void set_ruleMemo( RuleMemoType* ruleMemo );
	void set_tokenNames( ANTLR_UINT8** tokenNames );
	void set_tokSource( TokenSourceType* tokSource );
	void set_channel( ANTLR_UINT32 channel );
	void set_exception( ExceptionBaseType* exception );
	void set_type( ANTLR_UINT32 type );
	void set_token( const CommonTokenType* tok);
	void set_tokenStartLine( ANTLR_INT32 tokenStartLine );
	void set_tokenStartCharPositionInLine( ANTLR_INT32 tokenStartCharPositionInLine );
	void set_tokenStartCharIndex( ANTLR_MARKER tokenStartCharIndex );
	void set_text( const StringType& text );
	void set_streams( const InputStreamsType& streams );

	void inc_errorCount();
	void inc_backtracking();
	void dec_backtracking();
	};

	ANTLR_END_NAMESPACE()

	#include "antlr3recognizersharedstate.inl"

	#endif