catapult/common/py_utils/py_utils/refactor/snippet.py - platform/external/chromium-trace - Git at Google

 # Copyright 2015 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 import parser
 import symbol
 import sys
 import token
 import tokenize

 from py_utils.refactor import offset_token


 class Snippet(object):
   """A node in the Python parse tree.

   The Python grammar is defined at:
   https://docs.python.org/2/reference/grammar.html

   There are two types of Snippets:
     TokenSnippets are leaf nodes containing actual text.
     Symbols are internal nodes representing higher-level groupings, and are
         defined by the left-hand sides of the BNFs in the above link.
   """
   @property
   def type(self):
     raise NotImplementedError()

   @property
   def type_name(self):
     raise NotImplementedError()

   @property
   def children(self):
     """Return a list of this node's children."""
     raise NotImplementedError()

   @property
   def tokens(self):
     """Return a tuple of the tokens this Snippet contains."""
     raise NotImplementedError()

   def PrintTree(self, indent=0, stream=sys.stdout):
     """Spew a pretty-printed parse tree. Mostly useful for debugging."""
     raise NotImplementedError()

   def __str__(self):
     return offset_token.Untokenize(self.tokens)

   def FindAll(self, snippet_type):
     if isinstance(snippet_type, int):
       if self.type == snippet_type:
         yield self
     else:
       if isinstance(self, snippet_type):
         yield self

     for child in self.children:
       for snippet in child.FindAll(snippet_type):
         yield snippet

   def FindChild(self, snippet_type, **kwargs):
     for child in self.children:
       if isinstance(snippet_type, int):
         if child.type != snippet_type:
           continue
       else:
         if not isinstance(child, snippet_type):
           continue

       for attribute, value in kwargs:
         if getattr(child, attribute) != value:
           break
       else:
         return child
     raise ValueError('%s is not in %s. Children are: %s' %
                      (snippet_type, self, self.children))

   def FindChildren(self, snippet_type):
     if isinstance(snippet_type, int):
       for child in self.children:
         if child.type == snippet_type:
           yield child
     else:
       for child in self.children:
         if isinstance(child, snippet_type):
           yield child


 class TokenSnippet(Snippet):
   """A Snippet containing a list of tokens.

   A list of tokens may start with any number of comments and non-terminating
   newlines, but must end with a syntactically meaningful token.
   """

   def __init__(self, token_type, tokens):
     # For operators and delimiters, the TokenSnippet's type may be more specific
     # than the type of the constituent token. E.g. the TokenSnippet type is
     # token.DOT, but the token type is token.OP. This is because the parser
     # has more context than the tokenizer.
     self._type = token_type
     self._tokens = tokens
     self._modified = False

   @classmethod
   def Create(cls, token_type, string, offset=(0, 0)):
     return cls(token_type,
                [offset_token.OffsetToken(token_type, string, offset)])

   @property
   def type(self):
     return self._type

   @property
   def type_name(self):
     return token.tok_name[self.type]

   @property
   def value(self):
     return self._tokens[-1].string

   @value.setter
   def value(self, value):
     self._tokens[-1].string = value
     self._modified = True

   @property
   def children(self):
     return []

   @property
   def tokens(self):
     return tuple(self._tokens)

   @property
   def modified(self):
     return self._modified

   def PrintTree(self, indent=0, stream=sys.stdout):
     stream.write(' ' * indent)
     if not self.tokens:
       print >> stream, self.type_name
       return

     print >> stream, '%-4s' % self.type_name, repr(self.tokens[0].string)
     for tok in self.tokens[1:]:
       stream.write(' ' * indent)
       print >> stream, ' ' * max(len(self.type_name), 4), repr(tok.string)


 class Symbol(Snippet):
   """A Snippet containing sub-Snippets.

   The possible types and type_names are defined in Python's symbol module."""

   def __init__(self, symbol_type, children):
     self._type = symbol_type
     self._children = children

   @property
   def type(self):
     return self._type

   @property
   def type_name(self):
     return symbol.sym_name[self.type]

   @property
   def children(self):
     return self._children

   @children.setter
   def children(self, value):  # pylint: disable=arguments-differ
     self._children = value

   @property
   def tokens(self):
     tokens = []
     for child in self.children:
       tokens += child.tokens
     return tuple(tokens)

   @property
   def modified(self):
     return any(child.modified for child in self.children)

   def PrintTree(self, indent=0, stream=sys.stdout):
     stream.write(' ' * indent)

     # If there's only one child, collapse it onto the same line.
     node = self
     while len(node.children) == 1 and len(node.children[0].children) == 1:
       print >> stream, node.type_name,
       node = node.children[0]

     print >> stream, node.type_name
     for child in node.children:
       child.PrintTree(indent + 2, stream)


 def Snippetize(f):
   """Return the syntax tree of the given file."""
   f.seek(0)
   syntax_tree = parser.st2list(parser.suite(f.read()))
   tokens = offset_token.Tokenize(f)

   snippet = _SnippetizeNode(syntax_tree, tokens)
   assert not tokens
   return snippet


 def _SnippetizeNode(node, tokens):
   # The parser module gives a syntax tree that discards comments,
   # non-terminating newlines, and whitespace information. Use the tokens given
   # by the tokenize module to annotate the syntax tree with the information
   # needed to exactly reproduce the original source code.
   node_type = node[0]

   if node_type >= token.NT_OFFSET:
     # Symbol.
     children = tuple(_SnippetizeNode(child, tokens) for child in node[1:])
     return Symbol(node_type, children)
   else:
     # Token.
     grabbed_tokens = []
     while tokens and (
         tokens[0].type == tokenize.COMMENT or tokens[0].type == tokenize.NL):
       grabbed_tokens.append(tokens.popleft())

     # parser has 2 NEWLINEs right before the end.
     # tokenize has 0 or 1 depending on if the file has one.
     # Create extra nodes without consuming tokens to account for this.
     if node_type == token.NEWLINE:
       for tok in tokens:
         if tok.type == token.ENDMARKER:
           return TokenSnippet(node_type, grabbed_tokens)
         if tok.type != token.DEDENT:
           break

     assert tokens[0].type == token.OP or node_type == tokens[0].type

     grabbed_tokens.append(tokens.popleft())
     return TokenSnippet(node_type, grabbed_tokens)
	# Copyright 2015 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	import parser
	import symbol
	import sys
	import token
	import tokenize

	from py_utils.refactor import offset_token


	class Snippet(object):
	"""A node in the Python parse tree.

	The Python grammar is defined at:
	https://docs.python.org/2/reference/grammar.html

	There are two types of Snippets:
	TokenSnippets are leaf nodes containing actual text.
	Symbols are internal nodes representing higher-level groupings, and are
	defined by the left-hand sides of the BNFs in the above link.
	"""
	@property
	def type(self):
	raise NotImplementedError()

	@property
	def type_name(self):
	raise NotImplementedError()

	@property
	def children(self):
	"""Return a list of this node's children."""
	raise NotImplementedError()

	@property
	def tokens(self):
	"""Return a tuple of the tokens this Snippet contains."""
	raise NotImplementedError()

	def PrintTree(self, indent=0, stream=sys.stdout):
	"""Spew a pretty-printed parse tree. Mostly useful for debugging."""
	raise NotImplementedError()

	def __str__(self):
	return offset_token.Untokenize(self.tokens)

	def FindAll(self, snippet_type):
	if isinstance(snippet_type, int):
	if self.type == snippet_type:
	yield self
	else:
	if isinstance(self, snippet_type):
	yield self

	for child in self.children:
	for snippet in child.FindAll(snippet_type):
	yield snippet

	def FindChild(self, snippet_type, **kwargs):
	for child in self.children:
	if isinstance(snippet_type, int):
	if child.type != snippet_type:
	continue
	else:
	if not isinstance(child, snippet_type):
	continue

	for attribute, value in kwargs:
	if getattr(child, attribute) != value:
	break
	else:
	return child
	raise ValueError('%s is not in %s. Children are: %s' %
	(snippet_type, self, self.children))

	def FindChildren(self, snippet_type):
	if isinstance(snippet_type, int):
	for child in self.children:
	if child.type == snippet_type:
	yield child
	else:
	for child in self.children:
	if isinstance(child, snippet_type):
	yield child


	class TokenSnippet(Snippet):
	"""A Snippet containing a list of tokens.

	A list of tokens may start with any number of comments and non-terminating
	newlines, but must end with a syntactically meaningful token.
	"""

	def __init__(self, token_type, tokens):
	# For operators and delimiters, the TokenSnippet's type may be more specific
	# than the type of the constituent token. E.g. the TokenSnippet type is
	# token.DOT, but the token type is token.OP. This is because the parser
	# has more context than the tokenizer.
	self._type = token_type
	self._tokens = tokens
	self._modified = False

	@classmethod
	def Create(cls, token_type, string, offset=(0, 0)):
	return cls(token_type,
	[offset_token.OffsetToken(token_type, string, offset)])

	@property
	def type(self):
	return self._type

	@property
	def type_name(self):
	return token.tok_name[self.type]

	@property
	def value(self):
	return self._tokens[-1].string

	@value.setter
	def value(self, value):
	self._tokens[-1].string = value
	self._modified = True

	@property
	def children(self):
	return []

	@property
	def tokens(self):
	return tuple(self._tokens)

	@property
	def modified(self):
	return self._modified

	def PrintTree(self, indent=0, stream=sys.stdout):
	stream.write(' ' * indent)
	if not self.tokens:
	print >> stream, self.type_name
	return

	print >> stream, '%-4s' % self.type_name, repr(self.tokens[0].string)
	for tok in self.tokens[1:]:
	stream.write(' ' * indent)
	print >> stream, ' ' * max(len(self.type_name), 4), repr(tok.string)


	class Symbol(Snippet):
	"""A Snippet containing sub-Snippets.

	The possible types and type_names are defined in Python's symbol module."""

	def __init__(self, symbol_type, children):
	self._type = symbol_type
	self._children = children

	@property
	def type(self):
	return self._type

	@property
	def type_name(self):
	return symbol.sym_name[self.type]

	@property
	def children(self):
	return self._children

	@children.setter
	def children(self, value): # pylint: disable=arguments-differ
	self._children = value

	@property
	def tokens(self):
	tokens = []
	for child in self.children:
	tokens += child.tokens
	return tuple(tokens)

	@property
	def modified(self):
	return any(child.modified for child in self.children)

	def PrintTree(self, indent=0, stream=sys.stdout):
	stream.write(' ' * indent)

	# If there's only one child, collapse it onto the same line.
	node = self
	while len(node.children) == 1 and len(node.children[0].children) == 1:
	print >> stream, node.type_name,
	node = node.children[0]

	print >> stream, node.type_name
	for child in node.children:
	child.PrintTree(indent + 2, stream)


	def Snippetize(f):
	"""Return the syntax tree of the given file."""
	f.seek(0)
	syntax_tree = parser.st2list(parser.suite(f.read()))
	tokens = offset_token.Tokenize(f)

	snippet = _SnippetizeNode(syntax_tree, tokens)
	assert not tokens
	return snippet


	def _SnippetizeNode(node, tokens):
	# The parser module gives a syntax tree that discards comments,
	# non-terminating newlines, and whitespace information. Use the tokens given
	# by the tokenize module to annotate the syntax tree with the information
	# needed to exactly reproduce the original source code.
	node_type = node[0]

	if node_type >= token.NT_OFFSET:
	# Symbol.
	children = tuple(_SnippetizeNode(child, tokens) for child in node[1:])
	return Symbol(node_type, children)
	else:
	# Token.
	grabbed_tokens = []
	while tokens and (
	tokens[0].type == tokenize.COMMENT or tokens[0].type == tokenize.NL):
	grabbed_tokens.append(tokens.popleft())

	# parser has 2 NEWLINEs right before the end.
	# tokenize has 0 or 1 depending on if the file has one.
	# Create extra nodes without consuming tokens to account for this.
	if node_type == token.NEWLINE:
	for tok in tokens:
	if tok.type == token.ENDMARKER:
	return TokenSnippet(node_type, grabbed_tokens)
	if tok.type != token.DEDENT:
	break

	assert tokens[0].type == token.OP or node_type == tokens[0].type

	grabbed_tokens.append(tokens.popleft())
	return TokenSnippet(node_type, grabbed_tokens)