blob: 5fa953e93fb740ae806853003b433fd4799679d1 [file] [log] [blame]
# Copyright 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import collections
import itertools
import token
import tokenize
def _Pairwise(iterable):
"""s -> (None, s0), (s0, s1), (s1, s2), (s2, s3), ..."""
a, b = itertools.tee(iterable)
a = itertools.chain((None,), a)
return itertools.izip(a, b)
class OffsetToken(object):
"""A Python token with a relative position.
A token is represented by a type defined in Python's token module, a string
representing the content, and an offset. Using relative positions makes it
easy to insert and remove tokens.
"""
def __init__(self, token_type, string, offset):
self._type = token_type
self._string = string
self._offset = offset
@property
def type(self):
return self._type
@property
def type_name(self):
return token.tok_name[self._type]
@property
def string(self):
return self._string
@string.setter
def string(self, value):
self._string = value
@property
def offset(self):
return self._offset
def __str__(self):
return str((self.type_name, self.string, self.offset))
def Tokenize(f):
"""Read tokens from a file-like object.
Args:
f: Any object that has a readline method.
Returns:
A collections.deque containing OffsetTokens. Deques are cheaper and easier
to manipulate sequentially than lists.
"""
f.seek(0)
tokenize_tokens = tokenize.generate_tokens(f.readline)
offset_tokens = collections.deque()
for prev_token, next_token in _Pairwise(tokenize_tokens):
token_type, string, (srow, scol), _, _ = next_token
if not prev_token:
offset_tokens.append(OffsetToken(token_type, string, (0, 0)))
else:
erow, ecol = prev_token[3]
if erow == srow:
offset_tokens.append(OffsetToken(token_type, string, (0, scol - ecol)))
else:
offset_tokens.append(OffsetToken(
token_type, string, (srow - erow, scol)))
return offset_tokens
def Untokenize(offset_tokens):
"""Return the string representation of an iterable of OffsetTokens."""
# Make a copy. Don't modify the original.
offset_tokens = collections.deque(offset_tokens)
# Strip leading NL tokens.
while offset_tokens[0].type == tokenize.NL:
offset_tokens.popleft()
# Strip leading vertical whitespace.
first_token = offset_tokens.popleft()
# Take care not to modify the existing token. Create a new one in its place.
first_token = OffsetToken(first_token.type, first_token.string,
(0, first_token.offset[1]))
offset_tokens.appendleft(first_token)
# Convert OffsetTokens to tokenize tokens.
tokenize_tokens = []
row = 1
col = 0
for t in offset_tokens:
offset_row, offset_col = t.offset
if offset_row == 0:
col += offset_col
else:
row += offset_row
col = offset_col
tokenize_tokens.append((t.type, t.string, (row, col), (row, col), None))
# tokenize can't handle whitespace before line continuations.
# So add a space.
return tokenize.untokenize(tokenize_tokens).replace('\\\n', ' \\\n')