blob: 31a2e93f0261a294a1a2d1ab39f8ba68eb1644a8 [file] [log] [blame]
"""ANTLR3 runtime package"""
# begin[licence]
#
# [The "BSD licence"]
# Copyright (c) 2005-2012 Terence Parr
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# end[licence]
from .constants import DEFAULT_CHANNEL, EOF, INVALID_TOKEN_TYPE
############################################################################
#
# basic token interface
#
############################################################################
class Token(object):
"""@brief Abstract token baseclass."""
TOKEN_NAMES_MAP = None
@classmethod
def registerTokenNamesMap(cls, tokenNamesMap):
"""@brief Store a mapping from token type to token name.
This enables token.typeName to give something more meaningful
than, e.g., '6'.
"""
cls.TOKEN_NAMES_MAP = tokenNamesMap
cls.TOKEN_NAMES_MAP[EOF] = "EOF"
def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None,
index=-1, line=0, charPositionInLine=-1, input=None):
# We use -1 for index and charPositionInLine as an invalid index
self._type = type
self._channel = channel
self._text = text
self._index = index
self._line = 0
self._charPositionInLine = charPositionInLine
self.input = input
# To override a property, you'll need to override both the getter and setter.
@property
def text(self):
return self._text
@text.setter
def text(self, value):
self._text = value
@property
def type(self):
return self._type
@type.setter
def type(self, value):
self._type = value
# For compatibility
def getType(self):
return self._type
@property
def typeName(self):
if self.TOKEN_NAMES_MAP:
return self.TOKEN_NAMES_MAP.get(self._type, "INVALID_TOKEN_TYPE")
else:
return str(self._type)
@property
def line(self):
"""Lines are numbered 1..n."""
return self._line
@line.setter
def line(self, value):
self._line = value
@property
def charPositionInLine(self):
"""Columns are numbered 0..n-1."""
return self._charPositionInLine
@charPositionInLine.setter
def charPositionInLine(self, pos):
self._charPositionInLine = pos
@property
def channel(self):
return self._channel
@channel.setter
def channel(self, value):
self._channel = value
@property
def index(self):
"""
An index from 0..n-1 of the token object in the input stream.
This must be valid in order to use the ANTLRWorks debugger.
"""
return self._index
@index.setter
def index(self, value):
self._index = value
def getInputStream(self):
"""@brief From what character stream was this token created.
You don't have to implement but it's nice to know where a Token
comes from if you have include files etc... on the input."""
raise NotImplementedError
def setInputStream(self, input):
"""@brief From what character stream was this token created.
You don't have to implement but it's nice to know where a Token
comes from if you have include files etc... on the input."""
raise NotImplementedError
############################################################################
#
# token implementations
#
# Token
# +- CommonToken
# \- ClassicToken
#
############################################################################
class CommonToken(Token):
"""@brief Basic token implementation.
This implementation does not copy the text from the input stream upon
creation, but keeps start/stop pointers into the stream to avoid
unnecessary copy operations.
"""
def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None,
input=None, start=None, stop=None, oldToken=None):
if oldToken:
super().__init__(oldToken.type, oldToken.channel, oldToken.text,
oldToken.index, oldToken.line,
oldToken.charPositionInLine, oldToken.input)
if isinstance(oldToken, CommonToken):
self.start = oldToken.start
self.stop = oldToken.stop
else:
self.start = start
self.stop = stop
else:
super().__init__(type=type, channel=channel, input=input)
# We need to be able to change the text once in a while. If
# this is non-null, then getText should return this. Note that
# start/stop are not affected by changing this.
self._text = text
# The char position into the input buffer where this token starts
self.start = start
# The char position into the input buffer where this token stops
# This is the index of the last char, *not* the index after it!
self.stop = stop
@property
def text(self):
# Could be the empty string, and we want to return that.
if self._text is not None:
return self._text
if not self.input:
return None
if self.start < self.input.size() and self.stop < self.input.size():
return self.input.substring(self.start, self.stop)
return '<EOF>'
@text.setter
def text(self, value):
"""
Override the text for this token. getText() will return this text
rather than pulling from the buffer. Note that this does not mean
that start/stop indexes are not valid. It means that that input
was converted to a new string in the token object.
"""
self._text = value
def getInputStream(self):
return self.input
def setInputStream(self, input):
self.input = input
def __str__(self):
if self.type == EOF:
return "<EOF>"
channelStr = ""
if self.channel > 0:
channelStr = ",channel=" + str(self.channel)
txt = self.text
if txt:
# Put 2 backslashes in front of each character
txt = txt.replace("\n", r"\\n")
txt = txt.replace("\r", r"\\r")
txt = txt.replace("\t", r"\\t")
else:
txt = "<no text>"
return ("[@{0.index},{0.start}:{0.stop}={txt!r},"
"<{0.typeName}>{channelStr},"
"{0.line}:{0.charPositionInLine}]"
.format(self, txt=txt, channelStr=channelStr))
class ClassicToken(Token):
"""@brief Alternative token implementation.
A Token object like we'd use in ANTLR 2.x; has an actual string created
and associated with this object. These objects are needed for imaginary
tree nodes that have payload objects. We need to create a Token object
that has a string; the tree node will point at this token. CommonToken
has indexes into a char stream and hence cannot be used to introduce
new strings.
"""
def __init__(self, type=None, text=None, channel=DEFAULT_CHANNEL,
oldToken=None):
if oldToken:
super().__init__(type=oldToken.type, channel=oldToken.channel,
text=oldToken.text, line=oldToken.line,
charPositionInLine=oldToken.charPositionInLine)
else:
super().__init__(type=type, channel=channel, text=text,
index=None, line=None, charPositionInLine=None)
def getInputStream(self):
return None
def setInputStream(self, input):
pass
def toString(self):
channelStr = ""
if self.channel > 0:
channelStr = ",channel=" + str(self.channel)
txt = self.text
if not txt:
txt = "<no text>"
return ("[@{0.index!r},{txt!r},<{0.type!r}>{channelStr},"
"{0.line!r}:{0.charPositionInLine!r}]"
.format(self, txt=txt, channelStr=channelStr))
__str__ = toString
__repr__ = toString
INVALID_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE)
# In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR
# will avoid creating a token for this symbol and try to fetch another.
SKIP_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE)