blob: 7bc8446739c4d7663fc84cceaccece2abd52d1a7 [file] [log] [blame]
""" @package antlr3.tree
@brief ANTLR3 runtime package, tree module
This module contains all support classes for AST construction and tree parsers.
"""
# begin[licence]
#
# [The "BSD licence"]
# Copyright (c) 2005-2008 Terence Parr
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# end[licence]
# lot's of docstrings are missing, don't complain for now...
# pylint: disable-msg=C0111
import re
from antlr3.constants import UP, DOWN, EOF, INVALID_TOKEN_TYPE
from antlr3.recognizers import BaseRecognizer, RuleReturnScope
from antlr3.streams import IntStream
from antlr3.tokens import CommonToken, Token, INVALID_TOKEN
from antlr3.exceptions import MismatchedTreeNodeException, \
MissingTokenException, UnwantedTokenException, MismatchedTokenException, \
NoViableAltException
############################################################################
#
# tree related exceptions
#
############################################################################
class RewriteCardinalityException(RuntimeError):
"""
@brief Base class for all exceptions thrown during AST rewrite construction.
This signifies a case where the cardinality of two or more elements
in a subrule are different: (ID INT)+ where |ID|!=|INT|
"""
def __init__(self, elementDescription):
RuntimeError.__init__(self, elementDescription)
self.elementDescription = elementDescription
def getMessage(self):
return self.elementDescription
class RewriteEarlyExitException(RewriteCardinalityException):
"""@brief No elements within a (...)+ in a rewrite rule"""
def __init__(self, elementDescription=None):
RewriteCardinalityException.__init__(self, elementDescription)
class RewriteEmptyStreamException(RewriteCardinalityException):
"""
@brief Ref to ID or expr but no tokens in ID stream or subtrees in expr stream
"""
pass
############################################################################
#
# basic Tree and TreeAdaptor interfaces
#
############################################################################
class Tree(object):
"""
@brief Abstract baseclass for tree nodes.
What does a tree look like? ANTLR has a number of support classes
such as CommonTreeNodeStream that work on these kinds of trees. You
don't have to make your trees implement this interface, but if you do,
you'll be able to use more support code.
NOTE: When constructing trees, ANTLR can build any kind of tree; it can
even use Token objects as trees if you add a child list to your tokens.
This is a tree node without any payload; just navigation and factory stuff.
"""
def getChild(self, i):
raise NotImplementedError
def getChildCount(self):
raise NotImplementedError
def getParent(self):
"""Tree tracks parent and child index now > 3.0"""
raise NotImplementedError
def setParent(self, t):
"""Tree tracks parent and child index now > 3.0"""
raise NotImplementedError
def hasAncestor(self, ttype):
"""Walk upwards looking for ancestor with this token type."""
raise NotImplementedError
def getAncestor(self, ttype):
"""Walk upwards and get first ancestor with this token type."""
raise NotImplementedError
def getAncestors(self):
"""Return a list of all ancestors of this node.
The first node of list is the root and the last is the parent of
this node.
"""
raise NotImplementedError
def getChildIndex(self):
"""This node is what child index? 0..n-1"""
raise NotImplementedError
def setChildIndex(self, index):
"""This node is what child index? 0..n-1"""
raise NotImplementedError
def freshenParentAndChildIndexes(self):
"""Set the parent and child index values for all children"""
raise NotImplementedError
def addChild(self, t):
"""
Add t as a child to this node. If t is null, do nothing. If t
is nil, add all children of t to this' children.
"""
raise NotImplementedError
def setChild(self, i, t):
"""Set ith child (0..n-1) to t; t must be non-null and non-nil node"""
raise NotImplementedError
def deleteChild(self, i):
raise NotImplementedError
def replaceChildren(self, startChildIndex, stopChildIndex, t):
"""
Delete children from start to stop and replace with t even if t is
a list (nil-root tree). num of children can increase or decrease.
For huge child lists, inserting children can force walking rest of
children to set their childindex; could be slow.
"""
raise NotImplementedError
def isNil(self):
"""
Indicates the node is a nil node but may still have children, meaning
the tree is a flat list.
"""
raise NotImplementedError
def getTokenStartIndex(self):
"""
What is the smallest token index (indexing from 0) for this node
and its children?
"""
raise NotImplementedError
def setTokenStartIndex(self, index):
raise NotImplementedError
def getTokenStopIndex(self):
"""
What is the largest token index (indexing from 0) for this node
and its children?
"""
raise NotImplementedError
def setTokenStopIndex(self, index):
raise NotImplementedError
def dupNode(self):
raise NotImplementedError
def getType(self):
"""Return a token type; needed for tree parsing."""
raise NotImplementedError
def getText(self):
raise NotImplementedError
def getLine(self):
"""
In case we don't have a token payload, what is the line for errors?
"""
raise NotImplementedError
def getCharPositionInLine(self):
raise NotImplementedError
def toStringTree(self):
raise NotImplementedError
def toString(self):
raise NotImplementedError
class TreeAdaptor(object):
"""
@brief Abstract baseclass for tree adaptors.
How to create and navigate trees. Rather than have a separate factory
and adaptor, I've merged them. Makes sense to encapsulate.
This takes the place of the tree construction code generated in the
generated code in 2.x and the ASTFactory.
I do not need to know the type of a tree at all so they are all
generic Objects. This may increase the amount of typecasting needed. :(
"""
# C o n s t r u c t i o n
def createWithPayload(self, payload):
"""
Create a tree node from Token object; for CommonTree type trees,
then the token just becomes the payload. This is the most
common create call.
Override if you want another kind of node to be built.
"""
raise NotImplementedError
def dupNode(self, treeNode):
"""Duplicate a single tree node.
Override if you want another kind of node to be built."""
raise NotImplementedError
def dupTree(self, tree):
"""Duplicate tree recursively, using dupNode() for each node"""
raise NotImplementedError
def nil(self):
"""
Return a nil node (an empty but non-null node) that can hold
a list of element as the children. If you want a flat tree (a list)
use "t=adaptor.nil(); t.addChild(x); t.addChild(y);"
"""
raise NotImplementedError
def errorNode(self, input, start, stop, exc):
"""
Return a tree node representing an error. This node records the
tokens consumed during error recovery. The start token indicates the
input symbol at which the error was detected. The stop token indicates
the last symbol consumed during recovery.
You must specify the input stream so that the erroneous text can
be packaged up in the error node. The exception could be useful
to some applications; default implementation stores ptr to it in
the CommonErrorNode.
This only makes sense during token parsing, not tree parsing.
Tree parsing should happen only when parsing and tree construction
succeed.
"""
raise NotImplementedError
def isNil(self, tree):
"""Is tree considered a nil node used to make lists of child nodes?"""
raise NotImplementedError
def addChild(self, t, child):
"""
Add a child to the tree t. If child is a flat tree (a list), make all
in list children of t. Warning: if t has no children, but child does
and child isNil then you can decide it is ok to move children to t via
t.children = child.children; i.e., without copying the array. Just
make sure that this is consistent with have the user will build
ASTs. Do nothing if t or child is null.
"""
raise NotImplementedError
def becomeRoot(self, newRoot, oldRoot):
"""
If oldRoot is a nil root, just copy or move the children to newRoot.
If not a nil root, make oldRoot a child of newRoot.
old=^(nil a b c), new=r yields ^(r a b c)
old=^(a b c), new=r yields ^(r ^(a b c))
If newRoot is a nil-rooted single child tree, use the single
child as the new root node.
old=^(nil a b c), new=^(nil r) yields ^(r a b c)
old=^(a b c), new=^(nil r) yields ^(r ^(a b c))
If oldRoot was null, it's ok, just return newRoot (even if isNil).
old=null, new=r yields r
old=null, new=^(nil r) yields ^(nil r)
Return newRoot. Throw an exception if newRoot is not a
simple node or nil root with a single child node--it must be a root
node. If newRoot is ^(nil x) return x as newRoot.
Be advised that it's ok for newRoot to point at oldRoot's
children; i.e., you don't have to copy the list. We are
constructing these nodes so we should have this control for
efficiency.
"""
raise NotImplementedError
def rulePostProcessing(self, root):
"""
Given the root of the subtree created for this rule, post process
it to do any simplifications or whatever you want. A required
behavior is to convert ^(nil singleSubtree) to singleSubtree
as the setting of start/stop indexes relies on a single non-nil root
for non-flat trees.
Flat trees such as for lists like "idlist : ID+ ;" are left alone
unless there is only one ID. For a list, the start/stop indexes
are set in the nil node.
This method is executed after all rule tree construction and right
before setTokenBoundaries().
"""
raise NotImplementedError
def getUniqueID(self, node):
"""For identifying trees.
How to identify nodes so we can say "add node to a prior node"?
Even becomeRoot is an issue. Use System.identityHashCode(node)
usually.
"""
raise NotImplementedError
# R e w r i t e R u l e s
def createFromToken(self, tokenType, fromToken, text=None):
"""
Create a new node derived from a token, with a new token type and
(optionally) new text.
This is invoked from an imaginary node ref on right side of a
rewrite rule as IMAG[$tokenLabel] or IMAG[$tokenLabel "IMAG"].
This should invoke createToken(Token).
"""
raise NotImplementedError
def createFromType(self, tokenType, text):
"""Create a new node derived from a token, with a new token type.
This is invoked from an imaginary node ref on right side of a
rewrite rule as IMAG["IMAG"].
This should invoke createToken(int,String).
"""
raise NotImplementedError
# C o n t e n t
def getType(self, t):
"""For tree parsing, I need to know the token type of a node"""
raise NotImplementedError
def setType(self, t, type):
"""Node constructors can set the type of a node"""
raise NotImplementedError
def getText(self, t):
raise NotImplementedError
def setText(self, t, text):
"""Node constructors can set the text of a node"""
raise NotImplementedError
def getToken(self, t):
"""Return the token object from which this node was created.
Currently used only for printing an error message.
The error display routine in BaseRecognizer needs to
display where the input the error occurred. If your
tree of limitation does not store information that can
lead you to the token, you can create a token filled with
the appropriate information and pass that back. See
BaseRecognizer.getErrorMessage().
"""
raise NotImplementedError
def setTokenBoundaries(self, t, startToken, stopToken):
"""
Where are the bounds in the input token stream for this node and
all children? Each rule that creates AST nodes will call this
method right before returning. Flat trees (i.e., lists) will
still usually have a nil root node just to hold the children list.
That node would contain the start/stop indexes then.
"""
raise NotImplementedError
def getTokenStartIndex(self, t):
"""
Get the token start index for this subtree; return -1 if no such index
"""
raise NotImplementedError
def getTokenStopIndex(self, t):
"""
Get the token stop index for this subtree; return -1 if no such index
"""
raise NotImplementedError
# N a v i g a t i o n / T r e e P a r s i n g
def getChild(self, t, i):
"""Get a child 0..n-1 node"""
raise NotImplementedError
def setChild(self, t, i, child):
"""Set ith child (0..n-1) to t; t must be non-null and non-nil node"""
raise NotImplementedError
def deleteChild(self, t, i):
"""Remove ith child and shift children down from right."""
raise NotImplementedError
def getChildCount(self, t):
"""How many children? If 0, then this is a leaf node"""
raise NotImplementedError
def getParent(self, t):
"""
Who is the parent node of this node; if null, implies node is root.
If your node type doesn't handle this, it's ok but the tree rewrites
in tree parsers need this functionality.
"""
raise NotImplementedError
def setParent(self, t, parent):
"""
Who is the parent node of this node; if null, implies node is root.
If your node type doesn't handle this, it's ok but the tree rewrites
in tree parsers need this functionality.
"""
raise NotImplementedError
def getChildIndex(self, t):
"""
What index is this node in the child list? Range: 0..n-1
If your node type doesn't handle this, it's ok but the tree rewrites
in tree parsers need this functionality.
"""
raise NotImplementedError
def setChildIndex(self, t, index):
"""
What index is this node in the child list? Range: 0..n-1
If your node type doesn't handle this, it's ok but the tree rewrites
in tree parsers need this functionality.
"""
raise NotImplementedError
def replaceChildren(self, parent, startChildIndex, stopChildIndex, t):
"""
Replace from start to stop child index of parent with t, which might
be a list. Number of children may be different
after this call.
If parent is null, don't do anything; must be at root of overall tree.
Can't replace whatever points to the parent externally. Do nothing.
"""
raise NotImplementedError
# Misc
def create(self, *args):
"""
Deprecated, use createWithPayload, createFromToken or createFromType.
This method only exists to mimic the Java interface of TreeAdaptor.
"""
if len(args) == 1 and isinstance(args[0], Token):
# Object create(Token payload);
## warnings.warn(
## "Using create() is deprecated, use createWithPayload()",
## DeprecationWarning,
## stacklevel=2
## )
return self.createWithPayload(args[0])
if (len(args) == 2
and isinstance(args[0], (int, long))
and isinstance(args[1], Token)
):
# Object create(int tokenType, Token fromToken);
## warnings.warn(
## "Using create() is deprecated, use createFromToken()",
## DeprecationWarning,
## stacklevel=2
## )
return self.createFromToken(args[0], args[1])
if (len(args) == 3
and isinstance(args[0], (int, long))
and isinstance(args[1], Token)
and isinstance(args[2], basestring)
):
# Object create(int tokenType, Token fromToken, String text);
## warnings.warn(
## "Using create() is deprecated, use createFromToken()",
## DeprecationWarning,
## stacklevel=2
## )
return self.createFromToken(args[0], args[1], args[2])
if (len(args) == 2
and isinstance(args[0], (int, long))
and isinstance(args[1], basestring)
):
# Object create(int tokenType, String text);
## warnings.warn(
## "Using create() is deprecated, use createFromType()",
## DeprecationWarning,
## stacklevel=2
## )
return self.createFromType(args[0], args[1])
raise TypeError(
"No create method with this signature found: %s"
% (', '.join(type(v).__name__ for v in args))
)
############################################################################
#
# base implementation of Tree and TreeAdaptor
#
# Tree
# \- BaseTree
#
# TreeAdaptor
# \- BaseTreeAdaptor
#
############################################################################
class BaseTree(Tree):
"""
@brief A generic tree implementation with no payload.
You must subclass to
actually have any user data. ANTLR v3 uses a list of children approach
instead of the child-sibling approach in v2. A flat tree (a list) is
an empty node whose children represent the list. An empty, but
non-null node is called "nil".
"""
# BaseTree is abstract, no need to complain about not implemented abstract
# methods
# pylint: disable-msg=W0223
def __init__(self, node=None):
"""
Create a new node from an existing node does nothing for BaseTree
as there are no fields other than the children list, which cannot
be copied as the children are not considered part of this node.
"""
Tree.__init__(self)
self.children = []
self.parent = None
self.childIndex = 0
def getChild(self, i):
try:
return self.children[i]
except IndexError:
return None
def getChildren(self):
"""@brief Get the children internal List
Note that if you directly mess with
the list, do so at your own risk.
"""
# FIXME: mark as deprecated
return self.children
def getFirstChildWithType(self, treeType):
for child in self.children:
if child.getType() == treeType:
return child
return None
def getChildCount(self):
return len(self.children)
def addChild(self, childTree):
"""Add t as child of this node.
Warning: if t has no children, but child does
and child isNil then this routine moves children to t via
t.children = child.children; i.e., without copying the array.
"""
# this implementation is much simpler and probably less efficient
# than the mumbo-jumbo that Ter did for the Java runtime.
if childTree is None:
return
if childTree.isNil():
# t is an empty node possibly with children
if self.children is childTree.children:
raise ValueError("attempt to add child list to itself")
# fix parent pointer and childIndex for new children
for idx, child in enumerate(childTree.children):
child.parent = self
child.childIndex = len(self.children) + idx
self.children += childTree.children
else:
# child is not nil (don't care about children)
self.children.append(childTree)
childTree.parent = self
childTree.childIndex = len(self.children) - 1
def addChildren(self, children):
"""Add all elements of kids list as children of this node"""
self.children += children
def setChild(self, i, t):
if t is None:
return
if t.isNil():
raise ValueError("Can't set single child to a list")
self.children[i] = t
t.parent = self
t.childIndex = i
def deleteChild(self, i):
killed = self.children[i]
del self.children[i]
# walk rest and decrement their child indexes
for idx, child in enumerate(self.children[i:]):
child.childIndex = i + idx
return killed
def replaceChildren(self, startChildIndex, stopChildIndex, newTree):
"""
Delete children from start to stop and replace with t even if t is
a list (nil-root tree). num of children can increase or decrease.
For huge child lists, inserting children can force walking rest of
children to set their childindex; could be slow.
"""
if (startChildIndex >= len(self.children)
or stopChildIndex >= len(self.children)
):
raise IndexError("indexes invalid")
replacingHowMany = stopChildIndex - startChildIndex + 1
# normalize to a list of children to add: newChildren
if newTree.isNil():
newChildren = newTree.children
else:
newChildren = [newTree]
replacingWithHowMany = len(newChildren)
delta = replacingHowMany - replacingWithHowMany
if delta == 0:
# if same number of nodes, do direct replace
for idx, child in enumerate(newChildren):
self.children[idx + startChildIndex] = child
child.parent = self
child.childIndex = idx + startChildIndex
else:
# length of children changes...
# ...delete replaced segment...
del self.children[startChildIndex:stopChildIndex+1]
# ...insert new segment...
self.children[startChildIndex:startChildIndex] = newChildren
# ...and fix indeces
self.freshenParentAndChildIndexes(startChildIndex)
def isNil(self):
return False
def freshenParentAndChildIndexes(self, offset=0):
for idx, child in enumerate(self.children[offset:]):
child.childIndex = idx + offset
child.parent = self
def sanityCheckParentAndChildIndexes(self, parent=None, i=-1):
if parent != self.parent:
raise ValueError(
"parents don't match; expected %r found %r"
% (parent, self.parent)
)
if i != self.childIndex:
raise ValueError(
"child indexes don't match; expected %d found %d"
% (i, self.childIndex)
)
for idx, child in enumerate(self.children):
child.sanityCheckParentAndChildIndexes(self, idx)
def getChildIndex(self):
"""BaseTree doesn't track child indexes."""
return 0
def setChildIndex(self, index):
"""BaseTree doesn't track child indexes."""
pass
def getParent(self):
"""BaseTree doesn't track parent pointers."""
return None
def setParent(self, t):
"""BaseTree doesn't track parent pointers."""
pass
def hasAncestor(self, ttype):
"""Walk upwards looking for ancestor with this token type."""
return self.getAncestor(ttype) is not None
def getAncestor(self, ttype):
"""Walk upwards and get first ancestor with this token type."""
t = self.getParent()
while t is not None:
if t.getType() == ttype:
return t
t = t.getParent()
return None
def getAncestors(self):
"""Return a list of all ancestors of this node.
The first node of list is the root and the last is the parent of
this node.
"""
if selfgetParent() is None:
return None
ancestors = []
t = self.getParent()
while t is not None:
ancestors.insert(0, t) # insert at start
t = t.getParent()
return ancestors
def toStringTree(self):
"""Print out a whole tree not just a node"""
if len(self.children) == 0:
return self.toString()
buf = []
if not self.isNil():
buf.append('(')
buf.append(self.toString())
buf.append(' ')
for i, child in enumerate(self.children):
if i > 0:
buf.append(' ')
buf.append(child.toStringTree())
if not self.isNil():
buf.append(')')
return ''.join(buf)
def getLine(self):
return 0
def getCharPositionInLine(self):
return 0
def toString(self):
"""Override to say how a node (not a tree) should look as text"""
raise NotImplementedError
class BaseTreeAdaptor(TreeAdaptor):
"""
@brief A TreeAdaptor that works with any Tree implementation.
"""
# BaseTreeAdaptor is abstract, no need to complain about not implemented
# abstract methods
# pylint: disable-msg=W0223
def nil(self):
return self.createWithPayload(None)
def errorNode(self, input, start, stop, exc):
"""
create tree node that holds the start and stop tokens associated
with an error.
If you specify your own kind of tree nodes, you will likely have to
override this method. CommonTree returns Token.INVALID_TOKEN_TYPE
if no token payload but you might have to set token type for diff
node type.
You don't have to subclass CommonErrorNode; you will likely need to
subclass your own tree node class to avoid class cast exception.
"""
return CommonErrorNode(input, start, stop, exc)
def isNil(self, tree):
return tree.isNil()
def dupTree(self, t, parent=None):
"""
This is generic in the sense that it will work with any kind of
tree (not just Tree interface). It invokes the adaptor routines
not the tree node routines to do the construction.
"""
if t is None:
return None
newTree = self.dupNode(t)
# ensure new subtree root has parent/child index set
# same index in new tree
self.setChildIndex(newTree, self.getChildIndex(t))
self.setParent(newTree, parent)
for i in range(self.getChildCount(t)):
child = self.getChild(t, i)
newSubTree = self.dupTree(child, t)
self.addChild(newTree, newSubTree)
return newTree
def addChild(self, tree, child):
"""
Add a child to the tree t. If child is a flat tree (a list), make all
in list children of t. Warning: if t has no children, but child does
and child isNil then you can decide it is ok to move children to t via
t.children = child.children; i.e., without copying the array. Just
make sure that this is consistent with have the user will build
ASTs.
"""
#if isinstance(child, Token):
# child = self.createWithPayload(child)
if tree is not None and child is not None:
tree.addChild(child)
def becomeRoot(self, newRoot, oldRoot):
"""
If oldRoot is a nil root, just copy or move the children to newRoot.
If not a nil root, make oldRoot a child of newRoot.
old=^(nil a b c), new=r yields ^(r a b c)
old=^(a b c), new=r yields ^(r ^(a b c))
If newRoot is a nil-rooted single child tree, use the single
child as the new root node.
old=^(nil a b c), new=^(nil r) yields ^(r a b c)
old=^(a b c), new=^(nil r) yields ^(r ^(a b c))
If oldRoot was null, it's ok, just return newRoot (even if isNil).
old=null, new=r yields r
old=null, new=^(nil r) yields ^(nil r)
Return newRoot. Throw an exception if newRoot is not a
simple node or nil root with a single child node--it must be a root
node. If newRoot is ^(nil x) return x as newRoot.
Be advised that it's ok for newRoot to point at oldRoot's
children; i.e., you don't have to copy the list. We are
constructing these nodes so we should have this control for
efficiency.
"""
if isinstance(newRoot, Token):
newRoot = self.create(newRoot)
if oldRoot is None:
return newRoot
if not isinstance(newRoot, CommonTree):
newRoot = self.createWithPayload(newRoot)
# handle ^(nil real-node)
if newRoot.isNil():
nc = newRoot.getChildCount()
if nc == 1:
newRoot = newRoot.getChild(0)
elif nc > 1:
# TODO: make tree run time exceptions hierarchy
raise RuntimeError("more than one node as root")
# add oldRoot to newRoot; addChild takes care of case where oldRoot
# is a flat list (i.e., nil-rooted tree). All children of oldRoot
# are added to newRoot.
newRoot.addChild(oldRoot)
return newRoot
def rulePostProcessing(self, root):
"""Transform ^(nil x) to x and nil to null"""
if root is not None and root.isNil():
if root.getChildCount() == 0:
root = None
elif root.getChildCount() == 1:
root = root.getChild(0)
# whoever invokes rule will set parent and child index
root.setParent(None)
root.setChildIndex(-1)
return root
def createFromToken(self, tokenType, fromToken, text=None):
if fromToken is None:
return self.createFromType(tokenType, text)
assert isinstance(tokenType, (int, long)), type(tokenType).__name__
assert isinstance(fromToken, Token), type(fromToken).__name__
assert text is None or isinstance(text, basestring), type(text).__name__
fromToken = self.createToken(fromToken)
fromToken.type = tokenType
if text is not None:
fromToken.text = text
t = self.createWithPayload(fromToken)
return t
def createFromType(self, tokenType, text):
assert isinstance(tokenType, (int, long)), type(tokenType).__name__
assert isinstance(text, basestring) or text is None, type(text).__name__
fromToken = self.createToken(tokenType=tokenType, text=text)
t = self.createWithPayload(fromToken)
return t
def getType(self, t):
return t.getType()
def setType(self, t, type):
raise RuntimeError("don't know enough about Tree node")
def getText(self, t):
return t.getText()
def setText(self, t, text):
raise RuntimeError("don't know enough about Tree node")
def getChild(self, t, i):
return t.getChild(i)
def setChild(self, t, i, child):
t.setChild(i, child)
def deleteChild(self, t, i):
return t.deleteChild(i)
def getChildCount(self, t):
return t.getChildCount()
def getUniqueID(self, node):
return hash(node)
def createToken(self, fromToken=None, tokenType=None, text=None):
"""
Tell me how to create a token for use with imaginary token nodes.
For example, there is probably no input symbol associated with imaginary
token DECL, but you need to create it as a payload or whatever for
the DECL node as in ^(DECL type ID).
If you care what the token payload objects' type is, you should
override this method and any other createToken variant.
"""
raise NotImplementedError
############################################################################
#
# common tree implementation
#
# Tree
# \- BaseTree
# \- CommonTree
# \- CommonErrorNode
#
# TreeAdaptor
# \- BaseTreeAdaptor
# \- CommonTreeAdaptor
#
############################################################################
class CommonTree(BaseTree):
"""@brief A tree node that is wrapper for a Token object.
After 3.0 release
while building tree rewrite stuff, it became clear that computing
parent and child index is very difficult and cumbersome. Better to
spend the space in every tree node. If you don't want these extra
fields, it's easy to cut them out in your own BaseTree subclass.
"""
def __init__(self, payload):
BaseTree.__init__(self)
# What token indexes bracket all tokens associated with this node
# and below?
self.startIndex = -1
self.stopIndex = -1
# Who is the parent node of this node; if null, implies node is root
self.parent = None
# What index is this node in the child list? Range: 0..n-1
self.childIndex = -1
# A single token is the payload
if payload is None:
self.token = None
elif isinstance(payload, CommonTree):
self.token = payload.token
self.startIndex = payload.startIndex
self.stopIndex = payload.stopIndex
elif payload is None or isinstance(payload, Token):
self.token = payload
else:
raise TypeError(type(payload).__name__)
def getToken(self):
return self.token
def dupNode(self):
return CommonTree(self)
def isNil(self):
return self.token is None
def getType(self):
if self.token is None:
return INVALID_TOKEN_TYPE
return self.token.getType()
type = property(getType)
def getText(self):
if self.token is None:
return None
return self.token.text
text = property(getText)
def getLine(self):
if self.token is None or self.token.getLine() == 0:
if self.getChildCount():
return self.getChild(0).getLine()
else:
return 0
return self.token.getLine()
line = property(getLine)
def getCharPositionInLine(self):
if self.token is None or self.token.getCharPositionInLine() == -1:
if self.getChildCount():
return self.getChild(0).getCharPositionInLine()
else:
return 0
else:
return self.token.getCharPositionInLine()
charPositionInLine = property(getCharPositionInLine)
def getTokenStartIndex(self):
if self.startIndex == -1 and self.token is not None:
return self.token.getTokenIndex()
return self.startIndex
def setTokenStartIndex(self, index):
self.startIndex = index
tokenStartIndex = property(getTokenStartIndex, setTokenStartIndex)
def getTokenStopIndex(self):
if self.stopIndex == -1 and self.token is not None:
return self.token.getTokenIndex()
return self.stopIndex
def setTokenStopIndex(self, index):
self.stopIndex = index
tokenStopIndex = property(getTokenStopIndex, setTokenStopIndex)
def setUnknownTokenBoundaries(self):
"""For every node in this subtree, make sure it's start/stop token's
are set. Walk depth first, visit bottom up. Only updates nodes
with at least one token index < 0.
"""
if self.children is None:
if self.startIndex < 0 or self.stopIndex < 0:
self.startIndex = self.stopIndex = self.token.getTokenIndex()
return
for child in self.children:
child.setUnknownTokenBoundaries()
if self.startIndex >= 0 and self.stopIndex >= 0:
# already set
return
if self.children:
firstChild = self.children[0]
lastChild = self.children[-1]
self.startIndex = firstChild.getTokenStartIndex()
self.stopIndex = lastChild.getTokenStopIndex()
def getChildIndex(self):
#FIXME: mark as deprecated
return self.childIndex
def setChildIndex(self, idx):
#FIXME: mark as deprecated
self.childIndex = idx
def getParent(self):
#FIXME: mark as deprecated
return self.parent
def setParent(self, t):
#FIXME: mark as deprecated
self.parent = t
def toString(self):
if self.isNil():
return "nil"
if self.getType() == INVALID_TOKEN_TYPE:
return "<errornode>"
return self.token.text
__str__ = toString
def toStringTree(self):
if not self.children:
return self.toString()
ret = ''
if not self.isNil():
ret += '(%s ' % (self.toString())
ret += ' '.join([child.toStringTree() for child in self.children])
if not self.isNil():
ret += ')'
return ret
INVALID_NODE = CommonTree(INVALID_TOKEN)
class CommonErrorNode(CommonTree):
"""A node representing erroneous token range in token stream"""
def __init__(self, input, start, stop, exc):
CommonTree.__init__(self, None)
if (stop is None or
(stop.getTokenIndex() < start.getTokenIndex() and
stop.getType() != EOF
)
):
# sometimes resync does not consume a token (when LT(1) is
# in follow set. So, stop will be 1 to left to start. adjust.
# Also handle case where start is the first token and no token
# is consumed during recovery; LT(-1) will return null.
stop = start
self.input = input
self.start = start
self.stop = stop
self.trappedException = exc
def isNil(self):
return False
def getType(self):
return INVALID_TOKEN_TYPE
def getText(self):
if isinstance(self.start, Token):
i = self.start.getTokenIndex()
j = self.stop.getTokenIndex()
if self.stop.getType() == EOF:
j = self.input.size()
badText = self.input.toString(i, j)
elif isinstance(self.start, Tree):
badText = self.input.toString(self.start, self.stop)
else:
# people should subclass if they alter the tree type so this
# next one is for sure correct.
badText = "<unknown>"
return badText
def toString(self):
if isinstance(self.trappedException, MissingTokenException):
return ("<missing type: "
+ str(self.trappedException.getMissingType())
+ ">")
elif isinstance(self.trappedException, UnwantedTokenException):
return ("<extraneous: "
+ str(self.trappedException.getUnexpectedToken())
+ ", resync=" + self.getText() + ">")
elif isinstance(self.trappedException, MismatchedTokenException):
return ("<mismatched token: "
+ str(self.trappedException.token)
+ ", resync=" + self.getText() + ">")
elif isinstance(self.trappedException, NoViableAltException):
return ("<unexpected: "
+ str(self.trappedException.token)
+ ", resync=" + self.getText() + ">")
return "<error: "+self.getText()+">"
class CommonTreeAdaptor(BaseTreeAdaptor):
"""
@brief A TreeAdaptor that works with any Tree implementation.
It provides
really just factory methods; all the work is done by BaseTreeAdaptor.
If you would like to have different tokens created than ClassicToken
objects, you need to override this and then set the parser tree adaptor to
use your subclass.
To get your parser to build nodes of a different type, override
create(Token), errorNode(), and to be safe, YourTreeClass.dupNode().
dupNode is called to duplicate nodes during rewrite operations.
"""
def dupNode(self, treeNode):
"""
Duplicate a node. This is part of the factory;
override if you want another kind of node to be built.
I could use reflection to prevent having to override this
but reflection is slow.
"""
if treeNode is None:
return None
return treeNode.dupNode()
def createWithPayload(self, payload):
return CommonTree(payload)
def createToken(self, fromToken=None, tokenType=None, text=None):
"""
Tell me how to create a token for use with imaginary token nodes.
For example, there is probably no input symbol associated with imaginary
token DECL, but you need to create it as a payload or whatever for
the DECL node as in ^(DECL type ID).
If you care what the token payload objects' type is, you should
override this method and any other createToken variant.
"""
if fromToken is not None:
return CommonToken(oldToken=fromToken)
return CommonToken(type=tokenType, text=text)
def setTokenBoundaries(self, t, startToken, stopToken):
"""
Track start/stop token for subtree root created for a rule.
Only works with Tree nodes. For rules that match nothing,
seems like this will yield start=i and stop=i-1 in a nil node.
Might be useful info so I'll not force to be i..i.
"""
if t is None:
return
start = 0
stop = 0
if startToken is not None:
start = startToken.index
if stopToken is not None:
stop = stopToken.index
t.setTokenStartIndex(start)
t.setTokenStopIndex(stop)
def getTokenStartIndex(self, t):
if t is None:
return -1
return t.getTokenStartIndex()
def getTokenStopIndex(self, t):
if t is None:
return -1
return t.getTokenStopIndex()
def getText(self, t):
if t is None:
return None
return t.getText()
def getType(self, t):
if t is None:
return INVALID_TOKEN_TYPE
return t.getType()
def getToken(self, t):
"""
What is the Token associated with this node? If
you are not using CommonTree, then you must
override this in your own adaptor.
"""
if isinstance(t, CommonTree):
return t.getToken()
return None # no idea what to do
def getChild(self, t, i):
if t is None:
return None
return t.getChild(i)
def getChildCount(self, t):
if t is None:
return 0
return t.getChildCount()
def getParent(self, t):
return t.getParent()
def setParent(self, t, parent):
t.setParent(parent)
def getChildIndex(self, t):
if t is None:
return 0
return t.getChildIndex()
def setChildIndex(self, t, index):
t.setChildIndex(index)
def replaceChildren(self, parent, startChildIndex, stopChildIndex, t):
if parent is not None:
parent.replaceChildren(startChildIndex, stopChildIndex, t)
############################################################################
#
# streams
#
# TreeNodeStream
# \- BaseTree
# \- CommonTree
#
# TreeAdaptor
# \- BaseTreeAdaptor
# \- CommonTreeAdaptor
#
############################################################################
class TreeNodeStream(IntStream):
"""@brief A stream of tree nodes
It accessing nodes from a tree of some kind.
"""
# TreeNodeStream is abstract, no need to complain about not implemented
# abstract methods
# pylint: disable-msg=W0223
def get(self, i):
"""Get a tree node at an absolute index i; 0..n-1.
If you don't want to buffer up nodes, then this method makes no
sense for you.
"""
raise NotImplementedError
def LT(self, k):
"""
Get tree node at current input pointer + i ahead where i=1 is next node.
i<0 indicates nodes in the past. So LT(-1) is previous node, but
implementations are not required to provide results for k < -1.
LT(0) is undefined. For i>=n, return null.
Return null for LT(0) and any index that results in an absolute address
that is negative.
This is analogus to the LT() method of the TokenStream, but this
returns a tree node instead of a token. Makes code gen identical
for both parser and tree grammars. :)
"""
raise NotImplementedError
def getTreeSource(self):
"""
Where is this stream pulling nodes from? This is not the name, but
the object that provides node objects.
"""
raise NotImplementedError
def getTokenStream(self):
"""
If the tree associated with this stream was created from a TokenStream,
you can specify it here. Used to do rule $text attribute in tree
parser. Optional unless you use tree parser rule text attribute
or output=template and rewrite=true options.
"""
raise NotImplementedError
def getTreeAdaptor(self):
"""
What adaptor can tell me how to interpret/navigate nodes and
trees. E.g., get text of a node.
"""
raise NotImplementedError
def setUniqueNavigationNodes(self, uniqueNavigationNodes):
"""
As we flatten the tree, we use UP, DOWN nodes to represent
the tree structure. When debugging we need unique nodes
so we have to instantiate new ones. When doing normal tree
parsing, it's slow and a waste of memory to create unique
navigation nodes. Default should be false;
"""
raise NotImplementedError
def reset(self):
"""
Reset the tree node stream in such a way that it acts like
a freshly constructed stream.
"""
raise NotImplementedError
def toString(self, start, stop):
"""
Return the text of all nodes from start to stop, inclusive.
If the stream does not buffer all the nodes then it can still
walk recursively from start until stop. You can always return
null or "" too, but users should not access $ruleLabel.text in
an action of course in that case.
"""
raise NotImplementedError
# REWRITING TREES (used by tree parser)
def replaceChildren(self, parent, startChildIndex, stopChildIndex, t):
"""
Replace from start to stop child index of parent with t, which might
be a list. Number of children may be different
after this call. The stream is notified because it is walking the
tree and might need to know you are monkeying with the underlying
tree. Also, it might be able to modify the node stream to avoid
restreaming for future phases.
If parent is null, don't do anything; must be at root of overall tree.
Can't replace whatever points to the parent externally. Do nothing.
"""
raise NotImplementedError
class CommonTreeNodeStream(TreeNodeStream):
"""@brief A buffered stream of tree nodes.
Nodes can be from a tree of ANY kind.
This node stream sucks all nodes out of the tree specified in
the constructor during construction and makes pointers into
the tree using an array of Object pointers. The stream necessarily
includes pointers to DOWN and UP and EOF nodes.
This stream knows how to mark/release for backtracking.
This stream is most suitable for tree interpreters that need to
jump around a lot or for tree parsers requiring speed (at cost of memory).
There is some duplicated functionality here with UnBufferedTreeNodeStream
but just in bookkeeping, not tree walking etc...
@see UnBufferedTreeNodeStream
"""
def __init__(self, *args):
TreeNodeStream.__init__(self)
if len(args) == 1:
adaptor = CommonTreeAdaptor()
tree = args[0]
nodes = None
down = None
up = None
eof = None
elif len(args) == 2:
adaptor = args[0]
tree = args[1]
nodes = None
down = None
up = None
eof = None
elif len(args) == 3:
parent = args[0]
start = args[1]
stop = args[2]
adaptor = parent.adaptor
tree = parent.root
nodes = parent.nodes[start:stop]
down = parent.down
up = parent.up
eof = parent.eof
else:
raise TypeError("Invalid arguments")
# all these navigation nodes are shared and hence they
# cannot contain any line/column info
if down is not None:
self.down = down
else:
self.down = adaptor.createFromType(DOWN, "DOWN")
if up is not None:
self.up = up
else:
self.up = adaptor.createFromType(UP, "UP")
if eof is not None:
self.eof = eof
else:
self.eof = adaptor.createFromType(EOF, "EOF")
# The complete mapping from stream index to tree node.
# This buffer includes pointers to DOWN, UP, and EOF nodes.
# It is built upon ctor invocation. The elements are type
# Object as we don't what the trees look like.
# Load upon first need of the buffer so we can set token types
# of interest for reverseIndexing. Slows us down a wee bit to
# do all of the if p==-1 testing everywhere though.
if nodes is not None:
self.nodes = nodes
else:
self.nodes = []
# Pull nodes from which tree?
self.root = tree
# IF this tree (root) was created from a token stream, track it.
self.tokens = None
# What tree adaptor was used to build these trees
self.adaptor = adaptor
# Reuse same DOWN, UP navigation nodes unless this is true
self.uniqueNavigationNodes = False
# The index into the nodes list of the current node (next node
# to consume). If -1, nodes array not filled yet.
self.p = -1
# Track the last mark() call result value for use in rewind().
self.lastMarker = None
# Stack of indexes used for push/pop calls
self.calls = []
def __iter__(self):
return TreeIterator(self.root, self.adaptor)
def fillBuffer(self):
"""Walk tree with depth-first-search and fill nodes buffer.
Don't do DOWN, UP nodes if its a list (t is isNil).
"""
self._fillBuffer(self.root)
self.p = 0 # buffer of nodes intialized now
def _fillBuffer(self, t):
nil = self.adaptor.isNil(t)
if not nil:
self.nodes.append(t) # add this node
# add DOWN node if t has children
n = self.adaptor.getChildCount(t)
if not nil and n > 0:
self.addNavigationNode(DOWN)
# and now add all its children
for c in range(n):
self._fillBuffer(self.adaptor.getChild(t, c))
# add UP node if t has children
if not nil and n > 0:
self.addNavigationNode(UP)
def getNodeIndex(self, node):
"""What is the stream index for node? 0..n-1
Return -1 if node not found.
"""
if self.p == -1:
self.fillBuffer()
for i, t in enumerate(self.nodes):
if t == node:
return i
return -1
def addNavigationNode(self, ttype):
"""
As we flatten the tree, we use UP, DOWN nodes to represent
the tree structure. When debugging we need unique nodes
so instantiate new ones when uniqueNavigationNodes is true.
"""
navNode = None
if ttype == DOWN:
if self.hasUniqueNavigationNodes():
navNode = self.adaptor.createFromType(DOWN, "DOWN")
else:
navNode = self.down
else:
if self.hasUniqueNavigationNodes():
navNode = self.adaptor.createFromType(UP, "UP")
else:
navNode = self.up
self.nodes.append(navNode)
def get(self, i):
if self.p == -1:
self.fillBuffer()
return self.nodes[i]
def LT(self, k):
if self.p == -1:
self.fillBuffer()
if k == 0:
return None
if k < 0:
return self.LB(-k)
if self.p + k - 1 >= len(self.nodes):
return self.eof
return self.nodes[self.p + k - 1]
def getCurrentSymbol(self):
return self.LT(1)
def LB(self, k):
"""Look backwards k nodes"""
if k == 0:
return None
if self.p - k < 0:
return None
return self.nodes[self.p - k]
def isEOF(self, obj):
return self.adaptor.getType(obj) == EOF
def getTreeSource(self):
return self.root
def getSourceName(self):
return self.getTokenStream().getSourceName()
def getTokenStream(self):
return self.tokens
def setTokenStream(self, tokens):
self.tokens = tokens
def getTreeAdaptor(self):
return self.adaptor
def hasUniqueNavigationNodes(self):
return self.uniqueNavigationNodes
def setUniqueNavigationNodes(self, uniqueNavigationNodes):
self.uniqueNavigationNodes = uniqueNavigationNodes
def consume(self):
if self.p == -1:
self.fillBuffer()
self.p += 1
def LA(self, i):
return self.adaptor.getType(self.LT(i))
def mark(self):
if self.p == -1:
self.fillBuffer()
self.lastMarker = self.index()
return self.lastMarker
def release(self, marker=None):
# no resources to release
pass
def index(self):
return self.p
def rewind(self, marker=None):
if marker is None:
marker = self.lastMarker
self.seek(marker)
def seek(self, index):
if self.p == -1:
self.fillBuffer()
self.p = index
def push(self, index):
"""
Make stream jump to a new location, saving old location.
Switch back with pop().
"""
self.calls.append(self.p) # save current index
self.seek(index)
def pop(self):
"""
Seek back to previous index saved during last push() call.
Return top of stack (return index).
"""
ret = self.calls.pop(-1)
self.seek(ret)
return ret
def reset(self):
self.p = 0
self.lastMarker = 0
self.calls = []
def size(self):
if self.p == -1:
self.fillBuffer()
return len(self.nodes)
# TREE REWRITE INTERFACE
def replaceChildren(self, parent, startChildIndex, stopChildIndex, t):
if parent is not None:
self.adaptor.replaceChildren(
parent, startChildIndex, stopChildIndex, t
)
def __str__(self):
"""Used for testing, just return the token type stream"""
if self.p == -1:
self.fillBuffer()
return ' '.join([str(self.adaptor.getType(node))
for node in self.nodes
])
def toString(self, start, stop):
if start is None or stop is None:
return None
if self.p == -1:
self.fillBuffer()
#System.out.println("stop: "+stop);
#if ( start instanceof CommonTree )
# System.out.print("toString: "+((CommonTree)start).getToken()+", ");
#else
# System.out.println(start);
#if ( stop instanceof CommonTree )
# System.out.println(((CommonTree)stop).getToken());
#else
# System.out.println(stop);
# if we have the token stream, use that to dump text in order
if self.tokens is not None:
beginTokenIndex = self.adaptor.getTokenStartIndex(start)
endTokenIndex = self.adaptor.getTokenStopIndex(stop)
# if it's a tree, use start/stop index from start node
# else use token range from start/stop nodes
if self.adaptor.getType(stop) == UP:
endTokenIndex = self.adaptor.getTokenStopIndex(start)
elif self.adaptor.getType(stop) == EOF:
endTokenIndex = self.size() -2 # don't use EOF
return self.tokens.toString(beginTokenIndex, endTokenIndex)
# walk nodes looking for start
i, t = 0, None
for i, t in enumerate(self.nodes):
if t == start:
break
# now walk until we see stop, filling string buffer with text
buf = []
t = self.nodes[i]
while t != stop:
text = self.adaptor.getText(t)
if text is None:
text = " " + self.adaptor.getType(t)
buf.append(text)
i += 1
t = self.nodes[i]
# include stop node too
text = self.adaptor.getText(stop)
if text is None:
text = " " +self.adaptor.getType(stop)
buf.append(text)
return ''.join(buf)
## iterator interface
def __iter__(self):
if self.p == -1:
self.fillBuffer()
for node in self.nodes:
yield node
#############################################################################
#
# tree parser
#
#############################################################################
class TreeParser(BaseRecognizer):
"""@brief Baseclass for generated tree parsers.
A parser for a stream of tree nodes. "tree grammars" result in a subclass
of this. All the error reporting and recovery is shared with Parser via
the BaseRecognizer superclass.
"""
def __init__(self, input, state=None):
BaseRecognizer.__init__(self, state)
self.input = None
self.setTreeNodeStream(input)
def reset(self):
BaseRecognizer.reset(self) # reset all recognizer state variables
if self.input is not None:
self.input.seek(0) # rewind the input
def setTreeNodeStream(self, input):
"""Set the input stream"""
self.input = input
def getTreeNodeStream(self):
return self.input
def getSourceName(self):
return self.input.getSourceName()
def getCurrentInputSymbol(self, input):
return input.LT(1)
def getMissingSymbol(self, input, e, expectedTokenType, follow):
tokenText = "<missing " + self.tokenNames[expectedTokenType] + ">"
adaptor = input.adaptor
return adaptor.createToken(
CommonToken(type=expectedTokenType, text=tokenText))
# precompiled regex used by inContext
dotdot = ".*[^.]\\.\\.[^.].*"
doubleEtc = ".*\\.\\.\\.\\s+\\.\\.\\..*"
dotdotPattern = re.compile(dotdot)
doubleEtcPattern = re.compile(doubleEtc)
def inContext(self, context, adaptor=None, tokenName=None, t=None):
"""Check if current node in input has a context.
Context means sequence of nodes towards root of tree. For example,
you might say context is "MULT" which means my parent must be MULT.
"CLASS VARDEF" says current node must be child of a VARDEF and whose
parent is a CLASS node. You can use "..." to mean zero-or-more nodes.
"METHOD ... VARDEF" means my parent is VARDEF and somewhere above
that is a METHOD node. The first node in the context is not
necessarily the root. The context matcher stops matching and returns
true when it runs out of context. There is no way to force the first
node to be the root.
"""
return _inContext(
self.input.getTreeAdaptor(), self.getTokenNames(),
self.input.LT(1), context)
@classmethod
def _inContext(cls, adaptor, tokenNames, t, context):
"""The worker for inContext.
It's static and full of parameters for testing purposes.
"""
if cls.dotdotPattern.match(context):
# don't allow "..", must be "..."
raise ValueError("invalid syntax: ..")
if cls.doubleEtcPattern.match(context):
# don't allow double "..."
raise ValueError("invalid syntax: ... ...")
# ensure spaces around ...
context = context.replace("...", " ... ")
context = context.strip()
nodes = context.split()
ni = len(nodes) - 1
t = adaptor.getParent(t)
while ni >= 0 and t is not None:
if nodes[ni] == "...":
# walk upwards until we see nodes[ni-1] then continue walking
if ni == 0:
# ... at start is no-op
return True
goal = nodes[ni-1]
ancestor = cls._getAncestor(adaptor, tokenNames, t, goal)
if ancestor is None:
return False
t = ancestor
ni -= 1
name = tokenNames[adaptor.getType(t)]
if name != nodes[ni]:
return False
# advance to parent and to previous element in context node list
ni -= 1
t = adaptor.getParent(t)
# at root but more nodes to match
if t is None and ni >= 0:
return False
return True
@staticmethod
def _getAncestor(adaptor, tokenNames, t, goal):
"""Helper for static inContext."""
while t is not None:
name = tokenNames[adaptor.getType(t)]
if name == goal:
return t
t = adaptor.getParent(t)
return None
def matchAny(self, ignore): # ignore stream, copy of this.input
"""
Match '.' in tree parser has special meaning. Skip node or
entire tree if node has children. If children, scan until
corresponding UP node.
"""
self._state.errorRecovery = False
look = self.input.LT(1)
if self.input.getTreeAdaptor().getChildCount(look) == 0:
self.input.consume() # not subtree, consume 1 node and return
return
# current node is a subtree, skip to corresponding UP.
# must count nesting level to get right UP
level = 0
tokenType = self.input.getTreeAdaptor().getType(look)
while tokenType != EOF and not (tokenType == UP and level==0):
self.input.consume()
look = self.input.LT(1)
tokenType = self.input.getTreeAdaptor().getType(look)
if tokenType == DOWN:
level += 1
elif tokenType == UP:
level -= 1
self.input.consume() # consume UP
def mismatch(self, input, ttype, follow):
"""
We have DOWN/UP nodes in the stream that have no line info; override.
plus we want to alter the exception type. Don't try to recover
from tree parser errors inline...
"""
raise MismatchedTreeNodeException(ttype, input)
def getErrorHeader(self, e):
"""
Prefix error message with the grammar name because message is
always intended for the programmer because the parser built
the input tree not the user.
"""
return (self.getGrammarFileName() +
": node from %sline %s:%s"
% (['', "after "][e.approximateLineInfo],
e.line,
e.charPositionInLine
)
)
def getErrorMessage(self, e, tokenNames):
"""
Tree parsers parse nodes they usually have a token object as
payload. Set the exception token and do the default behavior.
"""
if isinstance(self, TreeParser):
adaptor = e.input.getTreeAdaptor()
e.token = adaptor.getToken(e.node)
if e.token is not None: # could be an UP/DOWN node
e.token = CommonToken(
type=adaptor.getType(e.node),
text=adaptor.getText(e.node)
)
return BaseRecognizer.getErrorMessage(self, e, tokenNames)
def traceIn(self, ruleName, ruleIndex):
BaseRecognizer.traceIn(self, ruleName, ruleIndex, self.input.LT(1))
def traceOut(self, ruleName, ruleIndex):
BaseRecognizer.traceOut(self, ruleName, ruleIndex, self.input.LT(1))
#############################################################################
#
# tree visitor
#
#############################################################################
class TreeVisitor(object):
"""Do a depth first walk of a tree, applying pre() and post() actions
we go.
"""
def __init__(self, adaptor=None):
if adaptor is not None:
self.adaptor = adaptor
else:
self.adaptor = CommonTreeAdaptor()
def visit(self, t, pre_action=None, post_action=None):
"""Visit every node in tree t and trigger an action for each node
before/after having visited all of its children. Bottom up walk.
Execute both actions even if t has no children. Ignore return
results from transforming children since they will have altered
the child list of this node (their parent). Return result of
applying post action to this node.
The Python version differs from the Java version by taking two
callables 'pre_action' and 'post_action' instead of a class instance
that wraps those methods. Those callables must accept a TreeNode as
their single argument and return the (potentially transformed or
replaced) TreeNode.
"""
isNil = self.adaptor.isNil(t)
if pre_action is not None and not isNil:
# if rewritten, walk children of new t
t = pre_action(t)
idx = 0
while idx < self.adaptor.getChildCount(t):
child = self.adaptor.getChild(t, idx)
self.visit(child, pre_action, post_action)
idx += 1
if post_action is not None and not isNil:
t = post_action(t)
return t
#############################################################################
#
# tree iterator
#
#############################################################################
class TreeIterator(object):
"""
Return a node stream from a doubly-linked tree whose nodes
know what child index they are.
Emit navigation nodes (DOWN, UP, and EOF) to let show tree structure.
"""
def __init__(self, tree, adaptor=None):
if adaptor is None:
adaptor = CommonTreeAdaptor()
self.root = tree
self.adaptor = adaptor
self.first_time = True
self.tree = tree
# If we emit UP/DOWN nodes, we need to spit out multiple nodes per
# next() call.
self.nodes = []
# navigation nodes to return during walk and at end
self.down = adaptor.createFromType(DOWN, "DOWN")
self.up = adaptor.createFromType(UP, "UP")
self.eof = adaptor.createFromType(EOF, "EOF")
def reset(self):
self.first_time = True
self.tree = self.root
self.nodes = []
def __iter__(self):
return self
def has_next(self):
if self.first_time:
return self.root is not None
if len(self.nodes) > 0:
return True
if self.tree is None:
return False
if self.adaptor.getChildCount(self.tree) > 0:
return True
# back at root?
return self.adaptor.getParent(self.tree) is not None
def next(self):
if not self.has_next():
raise StopIteration
if self.first_time:
# initial condition
self.first_time = False
if self.adaptor.getChildCount(self.tree) == 0:
# single node tree (special)
self.nodes.append(self.eof)
return self.tree
return self.tree
# if any queued up, use those first
if len(self.nodes) > 0:
return self.nodes.pop(0)
# no nodes left?
if self.tree is None:
return self.eof
# next node will be child 0 if any children
if self.adaptor.getChildCount(self.tree) > 0:
self.tree = self.adaptor.getChild(self.tree, 0)
# real node is next after DOWN
self.nodes.append(self.tree)
return self.down
# if no children, look for next sibling of tree or ancestor
parent = self.adaptor.getParent(self.tree)
# while we're out of siblings, keep popping back up towards root
while (parent is not None
and self.adaptor.getChildIndex(self.tree)+1 >= self.adaptor.getChildCount(parent)):
# we're moving back up
self.nodes.append(self.up)
self.tree = parent
parent = self.adaptor.getParent(self.tree)
# no nodes left?
if parent is None:
self.tree = None # back at root? nothing left then
self.nodes.append(self.eof) # add to queue, might have UP nodes in there
return self.nodes.pop(0)
# must have found a node with an unvisited sibling
# move to it and return it
nextSiblingIndex = self.adaptor.getChildIndex(self.tree) + 1
self.tree = self.adaptor.getChild(parent, nextSiblingIndex)
self.nodes.append(self.tree) # add to queue, might have UP nodes in there
return self.nodes.pop(0)
#############################################################################
#
# streams for rule rewriting
#
#############################################################################
class RewriteRuleElementStream(object):
"""@brief Internal helper class.
A generic list of elements tracked in an alternative to be used in
a -> rewrite rule. We need to subclass to fill in the next() method,
which returns either an AST node wrapped around a token payload or
an existing subtree.
Once you start next()ing, do not try to add more elements. It will
break the cursor tracking I believe.
@see org.antlr.runtime.tree.RewriteRuleSubtreeStream
@see org.antlr.runtime.tree.RewriteRuleTokenStream
TODO: add mechanism to detect/puke on modification after reading from
stream
"""
def __init__(self, adaptor, elementDescription, elements=None):
# Cursor 0..n-1. If singleElement!=null, cursor is 0 until you next(),
# which bumps it to 1 meaning no more elements.
self.cursor = 0
# Track single elements w/o creating a list. Upon 2nd add, alloc list
self.singleElement = None
# The list of tokens or subtrees we are tracking
self.elements = None
# Once a node / subtree has been used in a stream, it must be dup'd
# from then on. Streams are reset after subrules so that the streams
# can be reused in future subrules. So, reset must set a dirty bit.
# If dirty, then next() always returns a dup.
self.dirty = False
# The element or stream description; usually has name of the token or
# rule reference that this list tracks. Can include rulename too, but
# the exception would track that info.
self.elementDescription = elementDescription
self.adaptor = adaptor
if isinstance(elements, (list, tuple)):
# Create a stream, but feed off an existing list
self.singleElement = None
self.elements = elements
else:
# Create a stream with one element
self.add(elements)
def reset(self):
"""
Reset the condition of this stream so that it appears we have
not consumed any of its elements. Elements themselves are untouched.
Once we reset the stream, any future use will need duplicates. Set
the dirty bit.
"""
self.cursor = 0
self.dirty = True
def add(self, el):
if el is None:
return
if self.elements is not None: # if in list, just add
self.elements.append(el)
return
if self.singleElement is None: # no elements yet, track w/o list
self.singleElement = el
return
# adding 2nd element, move to list
self.elements = []
self.elements.append(self.singleElement)
self.singleElement = None
self.elements.append(el)
def nextTree(self):
"""
Return the next element in the stream. If out of elements, throw
an exception unless size()==1. If size is 1, then return elements[0].
Return a duplicate node/subtree if stream is out of elements and
size==1. If we've already used the element, dup (dirty bit set).
"""
if (self.dirty
or (self.cursor >= len(self) and len(self) == 1)
):
# if out of elements and size is 1, dup
el = self._next()
return self.dup(el)
# test size above then fetch
el = self._next()
return el
def _next(self):
"""
do the work of getting the next element, making sure that it's
a tree node or subtree. Deal with the optimization of single-
element list versus list of size > 1. Throw an exception
if the stream is empty or we're out of elements and size>1.
protected so you can override in a subclass if necessary.
"""
if len(self) == 0:
raise RewriteEmptyStreamException(self.elementDescription)
if self.cursor >= len(self): # out of elements?
if len(self) == 1: # if size is 1, it's ok; return and we'll dup
return self.toTree(self.singleElement)
# out of elements and size was not 1, so we can't dup
raise RewriteCardinalityException(self.elementDescription)
# we have elements
if self.singleElement is not None:
self.cursor += 1 # move cursor even for single element list
return self.toTree(self.singleElement)
# must have more than one in list, pull from elements
o = self.toTree(self.elements[self.cursor])
self.cursor += 1
return o
def dup(self, el):
"""
When constructing trees, sometimes we need to dup a token or AST
subtree. Dup'ing a token means just creating another AST node
around it. For trees, you must call the adaptor.dupTree() unless
the element is for a tree root; then it must be a node dup.
"""
raise NotImplementedError
def toTree(self, el):
"""
Ensure stream emits trees; tokens must be converted to AST nodes.
AST nodes can be passed through unmolested.
"""
return el
def hasNext(self):
return ( (self.singleElement is not None and self.cursor < 1)
or (self.elements is not None
and self.cursor < len(self.elements)
)
)
def size(self):
if self.singleElement is not None:
return 1
if self.elements is not None:
return len(self.elements)
return 0
__len__ = size
def getDescription(self):
"""Deprecated. Directly access elementDescription attribute"""
return self.elementDescription
class RewriteRuleTokenStream(RewriteRuleElementStream):
"""@brief Internal helper class."""
def toTree(self, el):
# Don't convert to a tree unless they explicitly call nextTree.
# This way we can do hetero tree nodes in rewrite.
return el
def nextNode(self):
t = self._next()
return self.adaptor.createWithPayload(t)
def nextToken(self):
return self._next()
def dup(self, el):
raise TypeError("dup can't be called for a token stream.")
class RewriteRuleSubtreeStream(RewriteRuleElementStream):
"""@brief Internal helper class."""
def nextNode(self):
"""
Treat next element as a single node even if it's a subtree.
This is used instead of next() when the result has to be a
tree root node. Also prevents us from duplicating recently-added
children; e.g., ^(type ID)+ adds ID to type and then 2nd iteration
must dup the type node, but ID has been added.
Referencing a rule result twice is ok; dup entire tree as
we can't be adding trees as root; e.g., expr expr.
Hideous code duplication here with super.next(). Can't think of
a proper way to refactor. This needs to always call dup node
and super.next() doesn't know which to call: dup node or dup tree.
"""
if (self.dirty
or (self.cursor >= len(self) and len(self) == 1)
):
# if out of elements and size is 1, dup (at most a single node
# since this is for making root nodes).
el = self._next()
return self.adaptor.dupNode(el)
# test size above then fetch
el = self._next()
while self.adaptor.isNil(el) and self.adaptor.getChildCount(el) == 1:
el = self.adaptor.getChild(el, 0)
# dup just the root (want node here)
return self.adaptor.dupNode(el)
def dup(self, el):
return self.adaptor.dupTree(el)
class RewriteRuleNodeStream(RewriteRuleElementStream):
"""
Queues up nodes matched on left side of -> in a tree parser. This is
the analog of RewriteRuleTokenStream for normal parsers.
"""
def nextNode(self):
return self._next()
def toTree(self, el):
return self.adaptor.dupNode(el)
def dup(self, el):
# we dup every node, so don't have to worry about calling dup; short-
#circuited next() so it doesn't call.
raise TypeError("dup can't be called for a node stream.")
class TreeRuleReturnScope(RuleReturnScope):
"""
This is identical to the ParserRuleReturnScope except that
the start property is a tree nodes not Token object
when you are parsing trees. To be generic the tree node types
have to be Object.
"""
def __init__(self):
self.start = None
self.tree = None
def getStart(self):
return self.start
def getTree(self):
return self.tree