| """ANTLR3 runtime package""" |
| |
| # begin[licence] |
| # |
| # [The "BSD licence"] |
| # Copyright (c) 2005-2008 Terence Parr |
| # All rights reserved. |
| # |
| # Redistribution and use in source and binary forms, with or without |
| # modification, are permitted provided that the following conditions |
| # are met: |
| # 1. Redistributions of source code must retain the above copyright |
| # notice, this list of conditions and the following disclaimer. |
| # 2. Redistributions in binary form must reproduce the above copyright |
| # notice, this list of conditions and the following disclaimer in the |
| # documentation and/or other materials provided with the distribution. |
| # 3. The name of the author may not be used to endorse or promote products |
| # derived from this software without specific prior written permission. |
| # |
| # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| # |
| # end[licence] |
| |
| from antlr3.constants import EOF, DEFAULT_CHANNEL, INVALID_TOKEN_TYPE |
| |
| ############################################################################ |
| # |
| # basic token interface |
| # |
| ############################################################################ |
| |
| class Token(object): |
| """@brief Abstract token baseclass.""" |
| |
| def getText(self): |
| """@brief Get the text of the token. |
| |
| Using setter/getter methods is deprecated. Use o.text instead. |
| """ |
| raise NotImplementedError |
| |
| def setText(self, text): |
| """@brief Set the text of the token. |
| |
| Using setter/getter methods is deprecated. Use o.text instead. |
| """ |
| raise NotImplementedError |
| |
| |
| def getType(self): |
| """@brief Get the type of the token. |
| |
| Using setter/getter methods is deprecated. Use o.type instead.""" |
| |
| raise NotImplementedError |
| |
| def setType(self, ttype): |
| """@brief Get the type of the token. |
| |
| Using setter/getter methods is deprecated. Use o.type instead.""" |
| |
| raise NotImplementedError |
| |
| |
| def getLine(self): |
| """@brief Get the line number on which this token was matched |
| |
| Lines are numbered 1..n |
| |
| Using setter/getter methods is deprecated. Use o.line instead.""" |
| |
| raise NotImplementedError |
| |
| def setLine(self, line): |
| """@brief Set the line number on which this token was matched |
| |
| Using setter/getter methods is deprecated. Use o.line instead.""" |
| |
| raise NotImplementedError |
| |
| |
| def getCharPositionInLine(self): |
| """@brief Get the column of the tokens first character, |
| |
| Columns are numbered 0..n-1 |
| |
| Using setter/getter methods is deprecated. Use o.charPositionInLine instead.""" |
| |
| raise NotImplementedError |
| |
| def setCharPositionInLine(self, pos): |
| """@brief Set the column of the tokens first character, |
| |
| Using setter/getter methods is deprecated. Use o.charPositionInLine instead.""" |
| |
| raise NotImplementedError |
| |
| |
| def getChannel(self): |
| """@brief Get the channel of the token |
| |
| Using setter/getter methods is deprecated. Use o.channel instead.""" |
| |
| raise NotImplementedError |
| |
| def setChannel(self, channel): |
| """@brief Set the channel of the token |
| |
| Using setter/getter methods is deprecated. Use o.channel instead.""" |
| |
| raise NotImplementedError |
| |
| |
| def getTokenIndex(self): |
| """@brief Get the index in the input stream. |
| |
| An index from 0..n-1 of the token object in the input stream. |
| This must be valid in order to use the ANTLRWorks debugger. |
| |
| Using setter/getter methods is deprecated. Use o.index instead.""" |
| |
| raise NotImplementedError |
| |
| def setTokenIndex(self, index): |
| """@brief Set the index in the input stream. |
| |
| Using setter/getter methods is deprecated. Use o.index instead.""" |
| |
| raise NotImplementedError |
| |
| |
| def getInputStream(self): |
| """@brief From what character stream was this token created. |
| |
| You don't have to implement but it's nice to know where a Token |
| comes from if you have include files etc... on the input.""" |
| |
| raise NotImplementedError |
| |
| def setInputStream(self, input): |
| """@brief From what character stream was this token created. |
| |
| You don't have to implement but it's nice to know where a Token |
| comes from if you have include files etc... on the input.""" |
| |
| raise NotImplementedError |
| |
| |
| ############################################################################ |
| # |
| # token implementations |
| # |
| # Token |
| # +- CommonToken |
| # \- ClassicToken |
| # |
| ############################################################################ |
| |
| class CommonToken(Token): |
| """@brief Basic token implementation. |
| |
| This implementation does not copy the text from the input stream upon |
| creation, but keeps start/stop pointers into the stream to avoid |
| unnecessary copy operations. |
| |
| """ |
| |
| def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None, |
| input=None, start=None, stop=None, oldToken=None): |
| Token.__init__(self) |
| |
| if oldToken is not None: |
| self.type = oldToken.type |
| self.line = oldToken.line |
| self.charPositionInLine = oldToken.charPositionInLine |
| self.channel = oldToken.channel |
| self.index = oldToken.index |
| self._text = oldToken._text |
| self.input = oldToken.input |
| if isinstance(oldToken, CommonToken): |
| self.start = oldToken.start |
| self.stop = oldToken.stop |
| |
| else: |
| self.type = type |
| self.input = input |
| self.charPositionInLine = -1 # set to invalid position |
| self.line = 0 |
| self.channel = channel |
| |
| #What token number is this from 0..n-1 tokens; < 0 implies invalid index |
| self.index = -1 |
| |
| # We need to be able to change the text once in a while. If |
| # this is non-null, then getText should return this. Note that |
| # start/stop are not affected by changing this. |
| self._text = text |
| |
| # The char position into the input buffer where this token starts |
| self.start = start |
| |
| # The char position into the input buffer where this token stops |
| # This is the index of the last char, *not* the index after it! |
| self.stop = stop |
| |
| |
| def getText(self): |
| if self._text is not None: |
| return self._text |
| |
| if self.input is None: |
| return None |
| |
| if self.start < self.input.size() and self.stop < self.input.size(): |
| return self.input.substring(self.start, self.stop) |
| |
| return '<EOF>' |
| |
| |
| def setText(self, text): |
| """ |
| Override the text for this token. getText() will return this text |
| rather than pulling from the buffer. Note that this does not mean |
| that start/stop indexes are not valid. It means that that input |
| was converted to a new string in the token object. |
| """ |
| self._text = text |
| |
| text = property(getText, setText) |
| |
| |
| def getType(self): |
| return self.type |
| |
| def setType(self, ttype): |
| self.type = ttype |
| |
| def getTypeName(self): |
| return str(self.type) |
| |
| typeName = property(lambda s: s.getTypeName()) |
| |
| def getLine(self): |
| return self.line |
| |
| def setLine(self, line): |
| self.line = line |
| |
| |
| def getCharPositionInLine(self): |
| return self.charPositionInLine |
| |
| def setCharPositionInLine(self, pos): |
| self.charPositionInLine = pos |
| |
| |
| def getChannel(self): |
| return self.channel |
| |
| def setChannel(self, channel): |
| self.channel = channel |
| |
| |
| def getTokenIndex(self): |
| return self.index |
| |
| def setTokenIndex(self, index): |
| self.index = index |
| |
| |
| def getInputStream(self): |
| return self.input |
| |
| def setInputStream(self, input): |
| self.input = input |
| |
| |
| def __str__(self): |
| if self.type == EOF: |
| return "<EOF>" |
| |
| channelStr = "" |
| if self.channel > 0: |
| channelStr = ",channel=" + str(self.channel) |
| |
| txt = self.text |
| if txt is not None: |
| txt = txt.replace("\n","\\\\n") |
| txt = txt.replace("\r","\\\\r") |
| txt = txt.replace("\t","\\\\t") |
| else: |
| txt = "<no text>" |
| |
| return "[@%d,%d:%d=%r,<%s>%s,%d:%d]" % ( |
| self.index, |
| self.start, self.stop, |
| txt, |
| self.typeName, channelStr, |
| self.line, self.charPositionInLine |
| ) |
| |
| |
| class ClassicToken(Token): |
| """@brief Alternative token implementation. |
| |
| A Token object like we'd use in ANTLR 2.x; has an actual string created |
| and associated with this object. These objects are needed for imaginary |
| tree nodes that have payload objects. We need to create a Token object |
| that has a string; the tree node will point at this token. CommonToken |
| has indexes into a char stream and hence cannot be used to introduce |
| new strings. |
| """ |
| |
| def __init__(self, type=None, text=None, channel=DEFAULT_CHANNEL, |
| oldToken=None |
| ): |
| Token.__init__(self) |
| |
| if oldToken is not None: |
| self.text = oldToken.text |
| self.type = oldToken.type |
| self.line = oldToken.line |
| self.charPositionInLine = oldToken.charPositionInLine |
| self.channel = oldToken.channel |
| |
| self.text = text |
| self.type = type |
| self.line = None |
| self.charPositionInLine = None |
| self.channel = channel |
| self.index = None |
| |
| |
| def getText(self): |
| return self.text |
| |
| def setText(self, text): |
| self.text = text |
| |
| |
| def getType(self): |
| return self.type |
| |
| def setType(self, ttype): |
| self.type = ttype |
| |
| |
| def getLine(self): |
| return self.line |
| |
| def setLine(self, line): |
| self.line = line |
| |
| |
| def getCharPositionInLine(self): |
| return self.charPositionInLine |
| |
| def setCharPositionInLine(self, pos): |
| self.charPositionInLine = pos |
| |
| |
| def getChannel(self): |
| return self.channel |
| |
| def setChannel(self, channel): |
| self.channel = channel |
| |
| |
| def getTokenIndex(self): |
| return self.index |
| |
| def setTokenIndex(self, index): |
| self.index = index |
| |
| |
| def getInputStream(self): |
| return None |
| |
| def setInputStream(self, input): |
| pass |
| |
| |
| def toString(self): |
| channelStr = "" |
| if self.channel > 0: |
| channelStr = ",channel=" + str(self.channel) |
| |
| txt = self.text |
| if txt is None: |
| txt = "<no text>" |
| |
| return "[@%r,%r,<%r>%s,%r:%r]" % (self.index, |
| txt, |
| self.type, |
| channelStr, |
| self.line, |
| self.charPositionInLine |
| ) |
| |
| |
| __str__ = toString |
| __repr__ = toString |
| |
| |
| INVALID_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE) |
| |
| # In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR |
| # will avoid creating a token for this symbol and try to fetch another. |
| SKIP_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE) |