antlr-3.4/runtime/Ruby/lib/antlr3/token.rb - platform/external/antlr - Git at Google

 #!/usr/bin/ruby
 # encoding: utf-8

 =begin LICENSE

 [The "BSD licence"]
 Copyright (c) 2009-2010 Kyle Yetter
 All rights reserved.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
 are met:

  1. Redistributions of source code must retain the above copyright
     notice, this list of conditions and the following disclaimer.
  2. Redistributions in binary form must reproduce the above copyright
     notice, this list of conditions and the following disclaimer in the
     documentation and/or other materials provided with the distribution.
  3. The name of the author may not be used to endorse or promote products
     derived from this software without specific prior written permission.

 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 =end

 module ANTLR3

 =begin rdoc ANTLR3::Token

 At a minimum, tokens are data structures that bind together a chunk of text and
 a corresponding type symbol, which categorizes/characterizes the content of the
 text. Tokens also usually carry information about their location in the input,
 such as absolute character index, line number, and position within the line (or
 column).

 Furthermore, ANTLR tokens are assigned a "channel" number, an extra degree of
 categorization that groups things on a larger scale. Parsers will usually ignore
 tokens that have channel value 99 (the HIDDEN_CHANNEL), so you can keep things
 like comment and white space huddled together with neighboring tokens,
 effectively ignoring them without discarding them.

 ANTLR tokens also keep a reference to the source stream from which they
 originated. Token streams will also provide an index value for the token, which
 indicates the position of the token relative to other tokens in the stream,
 starting at zero. For example, the 22nd token pulled from a lexer by
 CommonTokenStream will have index value 21.

 == Token as an Interface

 This library provides a token implementation (see CommonToken). Additionally,
 you may write your own token class as long as you provide methods that give
 access to the attributes expected by a token. Even though most of the ANTLR
 library tries to use duck-typing techniques instead of pure object-oriented type
 checking, it's a good idea to include this ANTLR3::Token into your customized
 token class.

 =end

 module Token
   include ANTLR3::Constants
   include Comparable

   # the token's associated chunk of text
   attr_accessor :text

   # the integer value associated with the token's type
   attr_accessor :type

   # the text's starting line number within the source (indexed starting at 1)
   attr_accessor :line

   # the text's starting position in the line within the source (indexed starting at 0)
   attr_accessor :column

   # the integer value of the channel to which the token is assigned
   attr_accessor :channel

   # the index of the token with respect to other the other tokens produced during lexing
   attr_accessor :index

   # a reference to the input stream from which the token was extracted
   attr_accessor :input

   # the absolute character index in the input at which the text starts
   attr_accessor :start

   # the absolute character index in the input at which the text ends
   attr_accessor :stop

   alias :input_stream :input
   alias :input_stream= :input=
   alias :token_index :index
   alias :token_index= :index=

   #
   # The match operator has been implemented to match against several different
   # attributes of a token for convenience in quick scripts
   #
   # @example Match against an integer token type constant
   #   token =~ VARIABLE_NAME   => true/false
   # @example Match against a token type name as a Symbol
   #   token =~ :FLOAT          => true/false
   # @example Match the token text against a Regular Expression
   #   token =~ /^@[a-z_]\w*$/i
   # @example Compare the token's text to a string
   #   token =~ "class"
   #
   def =~ obj
     case obj
     when Integer then type == obj
     when Symbol then name == obj.to_s
     when Regexp then obj =~ text
     when String then text == obj
     else super
     end
   end

   #
   # Tokens are comparable by their stream index values
   #
   def <=> tk2
     index <=> tk2.index
   end

   def initialize_copy( orig )
     self.index   = -1
     self.type    = orig.type
     self.channel = orig.channel
     self.text    = orig.text.clone if orig.text
     self.start   = orig.start
     self.stop    = orig.stop
     self.line    = orig.line
     self.column  = orig.column
     self.input   = orig.input
   end

   def concrete?
     input && start && stop ? true : false
   end

   def imaginary?
     input && start && stop ? false : true
   end

   def name
     token_name( type )
   end

   def source_name
     i = input and i.source_name
   end

   def hidden?
     channel == HIDDEN_CHANNEL
   end

   def source_text
     concrete? ? input.substring( start, stop ) : text
   end

   #
   # Sets the token's channel value to HIDDEN_CHANNEL
   #
   def hide!
     self.channel = HIDDEN_CHANNEL
   end

   def inspect
     text_inspect    = text  ? "[#{ text.inspect }] " : ' '
     text_position   = line > 0  ? "@ line #{ line } col #{ column } " : ''
     stream_position = start ? "(#{ range.inspect })" : ''

     front =  index >= 0 ? "#{ index } " : ''
     rep = front << name << text_inspect <<
                 text_position << stream_position
     rep.strip!
     channel == DEFAULT_CHANNEL or rep << " (#{ channel.to_s })"
     return( rep )
   end

   def pretty_print( printer )
     printer.text( inspect )
   end

   def range
     start..stop rescue nil
   end

   def to_i
     index.to_i
   end

   def to_s
     text.to_s
   end

 private

   def token_name( type )
     BUILT_IN_TOKEN_NAMES[ type ]
   end
 end

 CommonToken = Struct.new( :type, :channel, :text, :input, :start,
                          :stop, :index, :line, :column )

 =begin rdoc ANTLR3::CommonToken

 The base class for the standard implementation of Token. It is implemented as a
 simple Struct as tokens are basically simple data structures binding together a
 bunch of different information and Structs are slightly faster than a standard
 Object with accessor methods implementation.

 By default, ANTLR generated ruby code will provide a customized subclass of
 CommonToken to track token-type names efficiently for debugging, inspection, and
 general utility. Thus code generated for a standard combo lexer-parser grammar
 named XYZ will have a base module named XYZ and a customized CommonToken
 subclass named XYZ::Token.

 Here is the token structure attribute list in order:

 * <tt>type</tt>
 * <tt>channel</tt>
 * <tt>text</tt>
 * <tt>input</tt>
 * <tt>start</tt>
 * <tt>stop</tt>
 * <tt>index</tt>
 * <tt>line</tt>
 * <tt>column</tt>

 =end

 class CommonToken
   include Token
   DEFAULT_VALUES = {
     :channel => DEFAULT_CHANNEL,
     :index   => -1,
     :line    =>  0,
     :column  => -1
   }.freeze

   def self.token_name( type )
     BUILT_IN_TOKEN_NAMES[ type ]
   end

   def self.create( fields = {} )
     fields = DEFAULT_VALUES.merge( fields )
     args = members.map { |name| fields[ name.to_sym ] }
     new( *args )
   end

   # allows you to make a copy of a token with a different class
   def self.from_token( token )
     new(
       token.type,  token.channel, token.text ? token.text.clone : nil,
       token.input, token.start,   token.stop, -1, token.line, token.column
     )
   end

   def initialize( type = nil, channel = DEFAULT_CHANNEL, text = nil,
                  input = nil, start = nil, stop = nil, index = -1,
                  line = 0, column = -1 )
     super
     block_given? and yield( self )
     self.text.nil? && self.start && self.stop and
       self.text = self.input.substring( self.start, self.stop )
   end

   alias :input_stream :input
   alias :input_stream= :input=
   alias :token_index :index
   alias :token_index= :index=
 end

 module Constants

   # End of File / End of Input character and token type
   EOF_TOKEN = CommonToken.new( EOF ).freeze
   INVALID_TOKEN = CommonToken.new( INVALID_TOKEN_TYPE ).freeze
   SKIP_TOKEN = CommonToken.new( INVALID_TOKEN_TYPE ).freeze
 end


 =begin rdoc ANTLR3::TokenSource

 TokenSource is a simple mixin module that demands an
 implementation of the method #next_token. In return, it
 defines methods #next and #each, which provide basic
 iterator methods for token generators. Furthermore, it
 includes Enumerable to provide the standard Ruby iteration
 methods to token generators, like lexers.

 =end

 module TokenSource
   include Constants
   include Enumerable
   extend ClassMacros

   abstract :next_token

   def next
     token = next_token()
     raise StopIteration if token.nil? || token.type == EOF
     return token
   end

   def each
     block_given? or return enum_for( :each )
     while token = next_token and token.type != EOF
       yield( token )
     end
     return self
   end

   def to_stream( options = {} )
     if block_given?
       CommonTokenStream.new( self, options ) { | t, stream | yield( t, stream ) }
     else
       CommonTokenStream.new( self, options )
     end
   end
 end


 =begin rdoc ANTLR3::TokenFactory

 There are a variety of different entities throughout the ANTLR runtime library
 that need to create token objects This module serves as a mixin that provides
 methods for constructing tokens.

 Including this module provides a +token_class+ attribute. Instance of the
 including class can create tokens using the token class (which defaults to
 ANTLR3::CommonToken). Token classes are presumed to have an #initialize method
 that can be called without any parameters and the token objects are expected to
 have the standard token attributes (see ANTLR3::Token).

 =end

 module TokenFactory
   attr_writer :token_class
   def token_class
     @token_class ||= begin
       self.class.token_class rescue
       self::Token rescue
       ANTLR3::CommonToken
     end
   end

   def create_token( *args )
     if block_given?
       token_class.new( *args ) do |*targs|
         yield( *targs )
       end
     else
       token_class.new( *args )
     end
   end
 end


 =begin rdoc ANTLR3::TokenScheme

 TokenSchemes exist to handle the problem of defining token types as integer
 values while maintaining meaningful text names for the types. They are
 dynamically defined modules that map integer values to constants with token-type
 names.

 ---

 Fundamentally, tokens exist to take a chunk of text and identify it as belonging
 to some category, like "VARIABLE" or "INTEGER". In code, the category is
 represented by an integer -- some arbitrary value that ANTLR will decide to use
 as it is creating the recognizer. The purpose of using an integer (instead of
 say, a ruby symbol) is that ANTLR's decision logic often needs to test whether a
 token's type falls within a range, which is not possible with symbols.

 The downside of token types being represented as integers is that a developer
 needs to be able to reference the unknown type value by name in action code.
 Furthermore, code that references the type by name and tokens that can be
 inspected with names in place of type values are more meaningful to a developer.

 Since ANTLR requires token type names to follow capital-letter naming
 conventions, defining types as named constants of the recognizer class resolves
 the problem of referencing type values by name. Thus, a token type like
 ``VARIABLE'' can be represented by a number like 5 and referenced within code by
 +VARIABLE+. However, when a recognizer creates tokens, the name of the token's
 type cannot be seen without using the data defined in the recognizer.

 Of course, tokens could be defined with a name attribute that could be specified
 when tokens are created. However, doing so would make tokens take up more space
 than necessary, as well as making it difficult to change the type of a token
 while maintaining a correct name value.

 TokenSchemes exist as a technique to manage token type referencing and name
 extraction. They:

 1. keep token type references clear and understandable in recognizer code
 2. permit access to a token's type-name independently of recognizer objects
 3. allow multiple classes to share the same token information

 == Building Token Schemes

 TokenScheme is a subclass of Module. Thus, it has the method
 <tt>TokenScheme.new(tk_class = nil) { ... module-level code ...}</tt>, which
 will evaluate the block in the context of the scheme (module), similarly to
 Module#module_eval. Before evaluating the block, <tt>.new</tt> will setup the
 module with the following actions:

 1. define a customized token class (more on that below)
 2. add a new constant, TOKEN_NAMES, which is a hash that maps types to names
 3. dynamically populate the new scheme module with a couple instance methods
 4. include ANTLR3::Constants in the new scheme module

 As TokenScheme the class functions as a metaclass, figuring out some of the
 scoping behavior can be mildly confusing if you're trying to get a handle of the
 entity for your own purposes. Remember that all of the instance methods of
 TokenScheme function as module-level methods of TokenScheme instances, ala
 +attr_accessor+ and friends.

 <tt>TokenScheme#define_token(name_symbol, int_value)</tt> adds a constant
 definition <tt>name_symbol</tt> with the value <tt>int_value</tt>. It is
 essentially like <tt>Module#const_set</tt>, except it forbids constant
 overwriting (which would mess up recognizer code fairly badly) and adds an
 inverse type-to-name map to its own <tt>TOKEN_NAMES</tt> table.
 <tt>TokenScheme#define_tokens</tt> is a convenience method for defining many
 types with a hash pairing names to values.

 <tt>TokenScheme#register_name(value, name_string)</tt> specifies a custom
 type-to-name definition. This is particularly useful for the anonymous tokens
 that ANTLR generates for literal strings in the grammar specification. For
 example, if you refer to the literal <tt>'='</tt> in some parser rule in your
 grammar, ANTLR will add a lexer rule for the literal and give the token a name
 like <tt>T__<i>x</i></tt>, where <tt><i>x</i></tt> is the type's integer value.
 Since this is pretty meaningless to a developer, generated code should add a
 special name definition for type value <tt><i>x</i></tt> with the string
 <tt>"'='"</tt>.

 === Sample TokenScheme Construction

   TokenData = ANTLR3::TokenScheme.new do
     define_tokens(
       :INT  => 4,
       :ID   => 6,
       :T__5 => 5,
       :WS   => 7
     )

     # note the self:: scoping below is due to the fact that
     # ruby lexically-scopes constant names instead of
     # looking up in the current scope
     register_name(self::T__5, "'='")
   end

   TokenData::ID           # => 6
   TokenData::T__5         # => 5
   TokenData.token_name(4) # => 'INT'
   TokenData.token_name(5) # => "'='"

   class ARecognizerOrSuch < ANTLR3::Parser
     include TokenData
     ID   # => 6
   end

 == Custom Token Classes and Relationship with Tokens

 When a TokenScheme is created, it will define a subclass of ANTLR3::CommonToken
 and assigned it to the constant name +Token+. This token class will both include
 and extend the scheme module. Since token schemes define the private instance
 method <tt>token_name(type)</tt>, instances of the token class are now able to
 provide their type names. The Token method <tt>name</tt> uses the
 <tt>token_name</tt> method to provide the type name as if it were a simple
 attribute without storing the name itself.

 When a TokenScheme is included in a recognizer class, the class will now have
 the token types as named constants, a type-to-name map constant +TOKEN_NAMES+,
 and a grammar-specific subclass of ANTLR3::CommonToken assigned to the constant
 Token. Thus, when recognizers need to manufacture tokens, instead of using the
 generic CommonToken class, they can create tokens using the customized Token
 class provided by the token scheme.

 If you need to use a token class other than CommonToken, you can pass the class
 as a parameter to TokenScheme.new, which will be used in place of the
 dynamically-created CommonToken subclass.

 =end

 class TokenScheme < ::Module
   include TokenFactory

   def self.new( tk_class = nil, &body )
     super() do
       tk_class ||= Class.new( ::ANTLR3::CommonToken )
       self.token_class = tk_class

       const_set( :TOKEN_NAMES, ::ANTLR3::Constants::BUILT_IN_TOKEN_NAMES.clone )

       @types  = ::ANTLR3::Constants::BUILT_IN_TOKEN_NAMES.invert
       @unused = ::ANTLR3::Constants::MIN_TOKEN_TYPE

       scheme = self
       define_method( :token_scheme ) { scheme }
       define_method( :token_names )  { scheme::TOKEN_NAMES }
       define_method( :token_name ) do |type|
         begin
           token_names[ type ] or super
         rescue NoMethodError
           ::ANTLR3::CommonToken.token_name( type )
         end
       end
       module_function :token_name, :token_names

       include ANTLR3::Constants

       body and module_eval( &body )
     end
   end

   def self.build( *token_names )
     token_names = [ token_names ].flatten!
     token_names.compact!
     token_names.uniq!
     tk_class = Class === token_names.first ? token_names.shift : nil
     value_maps, names = token_names.partition { |i| Hash === i }
     new( tk_class ) do
       for value_map in value_maps
         define_tokens( value_map )
       end

       for name in names
         define_token( name )
       end
     end
   end


   def included( mod )
     super
     mod.extend( self )
   end
   private :included

   attr_reader :unused, :types

   def define_tokens( token_map = {} )
     for token_name, token_value in token_map
       define_token( token_name, token_value )
     end
     return self
   end

   def define_token( name, value = nil )
     name = name.to_s

     if current_value = @types[ name ]
       # token type has already been defined
       # raise an error unless value is the same as the current value
       value ||= current_value
       unless current_value == value
         raise NameError.new(
           "new token type definition ``#{ name } = #{ value }'' conflicts " <<
           "with existing type definition ``#{ name } = #{ current_value }''", name
         )
       end
     else
       value ||= @unused
       if name =~ /^[A-Z]\w*$/
         const_set( name, @types[ name ] = value )
       else
         constant = "T__#{ value }"
         const_set( constant, @types[ constant ] = value )
         @types[ name ] = value
       end
       register_name( value, name ) unless built_in_type?( value )
     end

     value >= @unused and @unused = value + 1
     return self
   end

   def register_names( *names )
     if names.length == 1 and Hash === names.first
       names.first.each do |value, name|
         register_name( value, name )
       end
     else
       names.each_with_index do |name, i|
         type_value = Constants::MIN_TOKEN_TYPE + i
         register_name( type_value, name )
       end
     end
   end

   def register_name( type_value, name )
     name = name.to_s.freeze
     if token_names.has_key?( type_value )
       current_name = token_names[ type_value ]
       current_name == name and return name

       if current_name == "T__#{ type_value }"
         # only an anonymous name is registered -- upgrade the name to the full literal name
         token_names[ type_value ] = name
       elsif name == "T__#{ type_value }"
         # ignore name downgrade from literal to anonymous constant
         return current_name
       else
         error = NameError.new(
           "attempted assignment of token type #{ type_value }" <<
           " to name #{ name } conflicts with existing name #{ current_name }", name
         )
         raise error
       end
     else
       token_names[ type_value ] = name.to_s.freeze
     end
   end

   def built_in_type?( type_value )
     Constants::BUILT_IN_TOKEN_NAMES.fetch( type_value, false ) and true
   end

   def token_defined?( name_or_value )
     case value
     when Integer then token_names.has_key?( name_or_value )
     else const_defined?( name_or_value.to_s )
     end
   end

   def []( name_or_value )
     case name_or_value
     when Integer then token_names.fetch( name_or_value, nil )
     else const_get( name_or_value.to_s ) rescue token_names.index( name_or_value )
     end
   end

   def token_class
     self::Token
   end

   def token_class=( klass )
     Class === klass or raise( TypeError, "token_class must be a Class" )
     Util.silence_warnings do
       klass < self or klass.send( :include, self )
       const_set( :Token, klass )
     end
   end

 end

 end
	#!/usr/bin/ruby
	# encoding: utf-8

	=begin LICENSE

	[The "BSD licence"]
	Copyright (c) 2009-2010 Kyle Yetter
	All rights reserved.

	Redistribution and use in source and binary forms, with or without
	modification, are permitted provided that the following conditions
	are met:

	1. Redistributions of source code must retain the above copyright
	notice, this list of conditions and the following disclaimer.
	2. Redistributions in binary form must reproduce the above copyright
	notice, this list of conditions and the following disclaimer in the
	documentation and/or other materials provided with the distribution.
	3. The name of the author may not be used to endorse or promote products
	derived from this software without specific prior written permission.

	THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	=end

	module ANTLR3

	=begin rdoc ANTLR3::Token

	At a minimum, tokens are data structures that bind together a chunk of text and
	a corresponding type symbol, which categorizes/characterizes the content of the
	text. Tokens also usually carry information about their location in the input,
	such as absolute character index, line number, and position within the line (or
	column).

	Furthermore, ANTLR tokens are assigned a "channel" number, an extra degree of
	categorization that groups things on a larger scale. Parsers will usually ignore
	tokens that have channel value 99 (the HIDDEN_CHANNEL), so you can keep things
	like comment and white space huddled together with neighboring tokens,
	effectively ignoring them without discarding them.

	ANTLR tokens also keep a reference to the source stream from which they
	originated. Token streams will also provide an index value for the token, which
	indicates the position of the token relative to other tokens in the stream,
	starting at zero. For example, the 22nd token pulled from a lexer by
	CommonTokenStream will have index value 21.

	== Token as an Interface

	This library provides a token implementation (see CommonToken). Additionally,
	you may write your own token class as long as you provide methods that give
	access to the attributes expected by a token. Even though most of the ANTLR
	library tries to use duck-typing techniques instead of pure object-oriented type
	checking, it's a good idea to include this ANTLR3::Token into your customized
	token class.

	=end

	module Token
	include ANTLR3::Constants
	include Comparable

	# the token's associated chunk of text
	attr_accessor :text

	# the integer value associated with the token's type
	attr_accessor :type

	# the text's starting line number within the source (indexed starting at 1)
	attr_accessor :line

	# the text's starting position in the line within the source (indexed starting at 0)
	attr_accessor :column

	# the integer value of the channel to which the token is assigned
	attr_accessor :channel

	# the index of the token with respect to other the other tokens produced during lexing
	attr_accessor :index

	# a reference to the input stream from which the token was extracted
	attr_accessor :input

	# the absolute character index in the input at which the text starts
	attr_accessor :start

	# the absolute character index in the input at which the text ends
	attr_accessor :stop

	alias :input_stream :input
	alias :input_stream= :input=
	alias :token_index :index
	alias :token_index= :index=

	#
	# The match operator has been implemented to match against several different
	# attributes of a token for convenience in quick scripts
	#
	# @example Match against an integer token type constant
	# token =~ VARIABLE_NAME => true/false
	# @example Match against a token type name as a Symbol
	# token =~ :FLOAT => true/false
	# @example Match the token text against a Regular Expression
	# token =~ /^@[a-z_]\w*$/i
	# @example Compare the token's text to a string
	# token =~ "class"
	#
	def =~ obj
	case obj
	when Integer then type == obj
	when Symbol then name == obj.to_s
	when Regexp then obj =~ text
	when String then text == obj
	else super
	end
	end

	#
	# Tokens are comparable by their stream index values
	#
	def <=> tk2
	index <=> tk2.index
	end

	def initialize_copy( orig )
	self.index = -1
	self.type = orig.type
	self.channel = orig.channel
	self.text = orig.text.clone if orig.text
	self.start = orig.start
	self.stop = orig.stop
	self.line = orig.line
	self.column = orig.column
	self.input = orig.input
	end

	def concrete?
	input && start && stop ? true : false
	end

	def imaginary?
	input && start && stop ? false : true
	end

	def name
	token_name( type )
	end

	def source_name
	i = input and i.source_name
	end

	def hidden?
	channel == HIDDEN_CHANNEL
	end

	def source_text
	concrete? ? input.substring( start, stop ) : text
	end

	#
	# Sets the token's channel value to HIDDEN_CHANNEL
	#
	def hide!
	self.channel = HIDDEN_CHANNEL
	end

	def inspect
	text_inspect = text ? "[#{ text.inspect }] " : ' '
	text_position = line > 0 ? "@ line #{ line } col #{ column } " : ''
	stream_position = start ? "(#{ range.inspect })" : ''

	front = index >= 0 ? "#{ index } " : ''
	rep = front << name << text_inspect <<
	text_position << stream_position
	rep.strip!
	channel == DEFAULT_CHANNEL or rep << " (#{ channel.to_s })"
	return( rep )
	end

	def pretty_print( printer )
	printer.text( inspect )
	end

	def range
	start..stop rescue nil
	end

	def to_i
	index.to_i
	end

	def to_s
	text.to_s
	end

	private

	def token_name( type )
	BUILT_IN_TOKEN_NAMES[ type ]
	end
	end

	CommonToken = Struct.new( :type, :channel, :text, :input, :start,
	:stop, :index, :line, :column )

	=begin rdoc ANTLR3::CommonToken

	The base class for the standard implementation of Token. It is implemented as a
	simple Struct as tokens are basically simple data structures binding together a
	bunch of different information and Structs are slightly faster than a standard
	Object with accessor methods implementation.

	By default, ANTLR generated ruby code will provide a customized subclass of
	CommonToken to track token-type names efficiently for debugging, inspection, and
	general utility. Thus code generated for a standard combo lexer-parser grammar
	named XYZ will have a base module named XYZ and a customized CommonToken
	subclass named XYZ::Token.

	Here is the token structure attribute list in order:

	* <tt>type</tt>
	* <tt>channel</tt>
	* <tt>text</tt>
	* <tt>input</tt>
	* <tt>start</tt>
	* <tt>stop</tt>
	* <tt>index</tt>
	* <tt>line</tt>
	* <tt>column</tt>

	=end

	class CommonToken
	include Token
	DEFAULT_VALUES = {
	:channel => DEFAULT_CHANNEL,
	:index => -1,
	:line => 0,
	:column => -1
	}.freeze

	def self.token_name( type )
	BUILT_IN_TOKEN_NAMES[ type ]
	end

	def self.create( fields = {} )
	fields = DEFAULT_VALUES.merge( fields )
	args = members.map { \|name\| fields[ name.to_sym ] }
	new( *args )
	end

	# allows you to make a copy of a token with a different class
	def self.from_token( token )
	new(
	token.type, token.channel, token.text ? token.text.clone : nil,
	token.input, token.start, token.stop, -1, token.line, token.column
	)
	end

	def initialize( type = nil, channel = DEFAULT_CHANNEL, text = nil,
	input = nil, start = nil, stop = nil, index = -1,
	line = 0, column = -1 )
	super
	block_given? and yield( self )
	self.text.nil? && self.start && self.stop and
	self.text = self.input.substring( self.start, self.stop )
	end

	alias :input_stream :input
	alias :input_stream= :input=
	alias :token_index :index
	alias :token_index= :index=
	end

	module Constants

	# End of File / End of Input character and token type
	EOF_TOKEN = CommonToken.new( EOF ).freeze
	INVALID_TOKEN = CommonToken.new( INVALID_TOKEN_TYPE ).freeze
	SKIP_TOKEN = CommonToken.new( INVALID_TOKEN_TYPE ).freeze
	end



	=begin rdoc ANTLR3::TokenSource

	TokenSource is a simple mixin module that demands an
	implementation of the method #next_token. In return, it
	defines methods #next and #each, which provide basic
	iterator methods for token generators. Furthermore, it
	includes Enumerable to provide the standard Ruby iteration
	methods to token generators, like lexers.

	=end

	module TokenSource
	include Constants
	include Enumerable
	extend ClassMacros

	abstract :next_token

	def next
	token = next_token()
	raise StopIteration if token.nil? \|\| token.type == EOF
	return token
	end

	def each
	block_given? or return enum_for( :each )
	while token = next_token and token.type != EOF
	yield( token )
	end
	return self
	end

	def to_stream( options = {} )
	if block_given?
	CommonTokenStream.new( self, options ) { \| t, stream \| yield( t, stream ) }
	else
	CommonTokenStream.new( self, options )
	end
	end
	end


	=begin rdoc ANTLR3::TokenFactory

	There are a variety of different entities throughout the ANTLR runtime library
	that need to create token objects This module serves as a mixin that provides
	methods for constructing tokens.

	Including this module provides a +token_class+ attribute. Instance of the
	including class can create tokens using the token class (which defaults to
	ANTLR3::CommonToken). Token classes are presumed to have an #initialize method
	that can be called without any parameters and the token objects are expected to
	have the standard token attributes (see ANTLR3::Token).

	=end

	module TokenFactory
	attr_writer :token_class
	def token_class
	@token_class \|\|= begin
	self.class.token_class rescue
	self::Token rescue
	ANTLR3::CommonToken
	end
	end

	def create_token( *args )
	if block_given?
	token_class.new( args ) do \|targs\|
	yield( *targs )
	end
	else
	token_class.new( *args )
	end
	end
	end


	=begin rdoc ANTLR3::TokenScheme

	TokenSchemes exist to handle the problem of defining token types as integer
	values while maintaining meaningful text names for the types. They are
	dynamically defined modules that map integer values to constants with token-type
	names.

	---

	Fundamentally, tokens exist to take a chunk of text and identify it as belonging
	to some category, like "VARIABLE" or "INTEGER". In code, the category is
	represented by an integer -- some arbitrary value that ANTLR will decide to use
	as it is creating the recognizer. The purpose of using an integer (instead of
	say, a ruby symbol) is that ANTLR's decision logic often needs to test whether a
	token's type falls within a range, which is not possible with symbols.

	The downside of token types being represented as integers is that a developer
	needs to be able to reference the unknown type value by name in action code.
	Furthermore, code that references the type by name and tokens that can be
	inspected with names in place of type values are more meaningful to a developer.

	Since ANTLR requires token type names to follow capital-letter naming
	conventions, defining types as named constants of the recognizer class resolves
	the problem of referencing type values by name. Thus, a token type like
	``VARIABLE'' can be represented by a number like 5 and referenced within code by
	+VARIABLE+. However, when a recognizer creates tokens, the name of the token's
	type cannot be seen without using the data defined in the recognizer.

	Of course, tokens could be defined with a name attribute that could be specified
	when tokens are created. However, doing so would make tokens take up more space
	than necessary, as well as making it difficult to change the type of a token
	while maintaining a correct name value.

	TokenSchemes exist as a technique to manage token type referencing and name
	extraction. They:

	1. keep token type references clear and understandable in recognizer code
	2. permit access to a token's type-name independently of recognizer objects
	3. allow multiple classes to share the same token information

	== Building Token Schemes

	TokenScheme is a subclass of Module. Thus, it has the method
	<tt>TokenScheme.new(tk_class = nil) { ... module-level code ...}</tt>, which
	will evaluate the block in the context of the scheme (module), similarly to
	Module#module_eval. Before evaluating the block, <tt>.new</tt> will setup the
	module with the following actions:

	1. define a customized token class (more on that below)
	2. add a new constant, TOKEN_NAMES, which is a hash that maps types to names
	3. dynamically populate the new scheme module with a couple instance methods
	4. include ANTLR3::Constants in the new scheme module

	As TokenScheme the class functions as a metaclass, figuring out some of the
	scoping behavior can be mildly confusing if you're trying to get a handle of the
	entity for your own purposes. Remember that all of the instance methods of
	TokenScheme function as module-level methods of TokenScheme instances, ala
	+attr_accessor+ and friends.

	<tt>TokenScheme#define_token(name_symbol, int_value)</tt> adds a constant
	definition <tt>name_symbol</tt> with the value <tt>int_value</tt>. It is
	essentially like <tt>Module#const_set</tt>, except it forbids constant
	overwriting (which would mess up recognizer code fairly badly) and adds an
	inverse type-to-name map to its own <tt>TOKEN_NAMES</tt> table.
	<tt>TokenScheme#define_tokens</tt> is a convenience method for defining many
	types with a hash pairing names to values.

	<tt>TokenScheme#register_name(value, name_string)</tt> specifies a custom
	type-to-name definition. This is particularly useful for the anonymous tokens
	that ANTLR generates for literal strings in the grammar specification. For
	example, if you refer to the literal <tt>'='</tt> in some parser rule in your
	grammar, ANTLR will add a lexer rule for the literal and give the token a name
	like <tt>T__<i>x</i></tt>, where <tt><i>x</i></tt> is the type's integer value.
	Since this is pretty meaningless to a developer, generated code should add a
	special name definition for type value <tt><i>x</i></tt> with the string
	<tt>"'='"</tt>.

	=== Sample TokenScheme Construction

	TokenData = ANTLR3::TokenScheme.new do
	define_tokens(
	:INT => 4,
	:ID => 6,
	:T__5 => 5,
	:WS => 7
	)

	# note the self:: scoping below is due to the fact that
	# ruby lexically-scopes constant names instead of
	# looking up in the current scope
	register_name(self::T__5, "'='")
	end

	TokenData::ID # => 6
	TokenData::T__5 # => 5
	TokenData.token_name(4) # => 'INT'
	TokenData.token_name(5) # => "'='"

	class ARecognizerOrSuch < ANTLR3::Parser
	include TokenData
	ID # => 6
	end

	== Custom Token Classes and Relationship with Tokens

	When a TokenScheme is created, it will define a subclass of ANTLR3::CommonToken
	and assigned it to the constant name +Token+. This token class will both include
	and extend the scheme module. Since token schemes define the private instance
	method <tt>token_name(type)</tt>, instances of the token class are now able to
	provide their type names. The Token method <tt>name</tt> uses the
	<tt>token_name</tt> method to provide the type name as if it were a simple
	attribute without storing the name itself.

	When a TokenScheme is included in a recognizer class, the class will now have
	the token types as named constants, a type-to-name map constant +TOKEN_NAMES+,
	and a grammar-specific subclass of ANTLR3::CommonToken assigned to the constant
	Token. Thus, when recognizers need to manufacture tokens, instead of using the
	generic CommonToken class, they can create tokens using the customized Token
	class provided by the token scheme.

	If you need to use a token class other than CommonToken, you can pass the class
	as a parameter to TokenScheme.new, which will be used in place of the
	dynamically-created CommonToken subclass.

	=end

	class TokenScheme < ::Module
	include TokenFactory

	def self.new( tk_class = nil, &body )
	super() do
	tk_class \|\|= Class.new( ::ANTLR3::CommonToken )
	self.token_class = tk_class

	const_set( :TOKEN_NAMES, ::ANTLR3::Constants::BUILT_IN_TOKEN_NAMES.clone )

	@types = ::ANTLR3::Constants::BUILT_IN_TOKEN_NAMES.invert
	@unused = ::ANTLR3::Constants::MIN_TOKEN_TYPE

	scheme = self
	define_method( :token_scheme ) { scheme }
	define_method( :token_names ) { scheme::TOKEN_NAMES }
	define_method( :token_name ) do \|type\|
	begin
	token_names[ type ] or super
	rescue NoMethodError
	::ANTLR3::CommonToken.token_name( type )
	end
	end
	module_function :token_name, :token_names

	include ANTLR3::Constants

	body and module_eval( &body )
	end
	end

	def self.build( *token_names )
	token_names = [ token_names ].flatten!
	token_names.compact!
	token_names.uniq!
	tk_class = Class === token_names.first ? token_names.shift : nil
	value_maps, names = token_names.partition { \|i\| Hash === i }
	new( tk_class ) do
	for value_map in value_maps
	define_tokens( value_map )
	end

	for name in names
	define_token( name )
	end
	end
	end


	def included( mod )
	super
	mod.extend( self )
	end
	private :included

	attr_reader :unused, :types

	def define_tokens( token_map = {} )
	for token_name, token_value in token_map
	define_token( token_name, token_value )
	end
	return self
	end

	def define_token( name, value = nil )
	name = name.to_s

	if current_value = @types[ name ]
	# token type has already been defined
	# raise an error unless value is the same as the current value
	value \|\|= current_value
	unless current_value == value
	raise NameError.new(
	"new token type definition ``#{ name } = #{ value }'' conflicts " <<
	"with existing type definition ``#{ name } = #{ current_value }''", name
	)
	end
	else
	value \|\|= @unused
	if name =~ /^[A-Z]\w*$/
	const_set( name, @types[ name ] = value )
	else
	constant = "T__#{ value }"
	const_set( constant, @types[ constant ] = value )
	@types[ name ] = value
	end
	register_name( value, name ) unless built_in_type?( value )
	end

	value >= @unused and @unused = value + 1
	return self
	end

	def register_names( *names )
	if names.length == 1 and Hash === names.first
	names.first.each do \|value, name\|
	register_name( value, name )
	end
	else
	names.each_with_index do \|name, i\|
	type_value = Constants::MIN_TOKEN_TYPE + i
	register_name( type_value, name )
	end
	end
	end

	def register_name( type_value, name )
	name = name.to_s.freeze
	if token_names.has_key?( type_value )
	current_name = token_names[ type_value ]
	current_name == name and return name

	if current_name == "T__#{ type_value }"
	# only an anonymous name is registered -- upgrade the name to the full literal name
	token_names[ type_value ] = name
	elsif name == "T__#{ type_value }"
	# ignore name downgrade from literal to anonymous constant
	return current_name
	else
	error = NameError.new(
	"attempted assignment of token type #{ type_value }" <<
	" to name #{ name } conflicts with existing name #{ current_name }", name
	)
	raise error
	end
	else
	token_names[ type_value ] = name.to_s.freeze
	end
	end

	def built_in_type?( type_value )
	Constants::BUILT_IN_TOKEN_NAMES.fetch( type_value, false ) and true
	end

	def token_defined?( name_or_value )
	case value
	when Integer then token_names.has_key?( name_or_value )
	else const_defined?( name_or_value.to_s )
	end
	end

	def []( name_or_value )
	case name_or_value
	when Integer then token_names.fetch( name_or_value, nil )
	else const_get( name_or_value.to_s ) rescue token_names.index( name_or_value )
	end
	end

	def token_class
	self::Token
	end

	def token_class=( klass )
	Class === klass or raise( TypeError, "token_class must be a Class" )
	Util.silence_warnings do
	klass < self or klass.send( :include, self )
	const_set( :Token, klass )
	end
	end

	end

	end