Tools/peg_generator/pegen/tokenizer.py - platform/external/python/cpython3 - Git at Google

 import token
 import tokenize
 from typing import Dict, Iterator, List

 Mark = int  # NewType('Mark', int)

 exact_token_types = token.EXACT_TOKEN_TYPES


 def shorttok(tok: tokenize.TokenInfo) -> str:
     return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"


 class Tokenizer:
     """Caching wrapper for the tokenize module.

     This is pretty tied to Python's syntax.
     """

     _tokens: List[tokenize.TokenInfo]

     def __init__(
         self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False
     ):
         self._tokengen = tokengen
         self._tokens = []
         self._index = 0
         self._verbose = verbose
         self._lines: Dict[int, str] = {}
         self._path = path
         if verbose:
             self.report(False, False)

     def getnext(self) -> tokenize.TokenInfo:
         """Return the next token and updates the index."""
         cached = not self._index == len(self._tokens)
         tok = self.peek()
         self._index += 1
         if self._verbose:
             self.report(cached, False)
         return tok

     def peek(self) -> tokenize.TokenInfo:
         """Return the next token *without* updating the index."""
         while self._index == len(self._tokens):
             tok = next(self._tokengen)
             if tok.type in (tokenize.NL, tokenize.COMMENT):
                 continue
             if tok.type == token.ERRORTOKEN and tok.string.isspace():
                 continue
             if (
                 tok.type == token.NEWLINE
                 and self._tokens
                 and self._tokens[-1].type == token.NEWLINE
             ):
                 continue
             self._tokens.append(tok)
             if not self._path:
                 self._lines[tok.start[0]] = tok.line
         return self._tokens[self._index]

     def diagnose(self) -> tokenize.TokenInfo:
         if not self._tokens:
             self.getnext()
         return self._tokens[-1]

     def get_last_non_whitespace_token(self) -> tokenize.TokenInfo:
         for tok in reversed(self._tokens[: self._index]):
             if tok.type != tokenize.ENDMARKER and (
                 tok.type < tokenize.NEWLINE or tok.type > tokenize.DEDENT
             ):
                 break
         return tok

     def get_lines(self, line_numbers: List[int]) -> List[str]:
         """Retrieve source lines corresponding to line numbers."""
         if self._lines:
             lines = self._lines
         else:
             n = len(line_numbers)
             lines = {}
             count = 0
             seen = 0
             with open(self._path) as f:
                 for l in f:
                     count += 1
                     if count in line_numbers:
                         seen += 1
                         lines[count] = l
                         if seen == n:
                             break

         return [lines[n] for n in line_numbers]

     def mark(self) -> Mark:
         return self._index

     def reset(self, index: Mark) -> None:
         if index == self._index:
             return
         assert 0 <= index <= len(self._tokens), (index, len(self._tokens))
         old_index = self._index
         self._index = index
         if self._verbose:
             self.report(True, index < old_index)

     def report(self, cached: bool, back: bool) -> None:
         if back:
             fill = "-" * self._index + "-"
         elif cached:
             fill = "-" * self._index + ">"
         else:
             fill = "-" * self._index + "*"
         if self._index == 0:
             print(f"{fill} (Bof)")
         else:
             tok = self._tokens[self._index - 1]
             print(f"{fill} {shorttok(tok)}")
	import token
	import tokenize
	from typing import Dict, Iterator, List

	Mark = int # NewType('Mark', int)

	exact_token_types = token.EXACT_TOKEN_TYPES


	def shorttok(tok: tokenize.TokenInfo) -> str:
	return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"


	class Tokenizer:
	"""Caching wrapper for the tokenize module.

	This is pretty tied to Python's syntax.
	"""

	_tokens: List[tokenize.TokenInfo]

	def __init__(
	self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False
	):
	self._tokengen = tokengen
	self._tokens = []
	self._index = 0
	self._verbose = verbose
	self._lines: Dict[int, str] = {}
	self._path = path
	if verbose:
	self.report(False, False)

	def getnext(self) -> tokenize.TokenInfo:
	"""Return the next token and updates the index."""
	cached = not self._index == len(self._tokens)
	tok = self.peek()
	self._index += 1
	if self._verbose:
	self.report(cached, False)
	return tok

	def peek(self) -> tokenize.TokenInfo:
	"""Return the next token without updating the index."""
	while self._index == len(self._tokens):
	tok = next(self._tokengen)
	if tok.type in (tokenize.NL, tokenize.COMMENT):
	continue
	if tok.type == token.ERRORTOKEN and tok.string.isspace():
	continue
	if (
	tok.type == token.NEWLINE
	and self._tokens
	and self._tokens[-1].type == token.NEWLINE
	):
	continue
	self._tokens.append(tok)
	if not self._path:
	self._lines[tok.start[0]] = tok.line
	return self._tokens[self._index]

	def diagnose(self) -> tokenize.TokenInfo:
	if not self._tokens:
	self.getnext()
	return self._tokens[-1]

	def get_last_non_whitespace_token(self) -> tokenize.TokenInfo:
	for tok in reversed(self._tokens[: self._index]):
	if tok.type != tokenize.ENDMARKER and (
	tok.type < tokenize.NEWLINE or tok.type > tokenize.DEDENT
	):
	break
	return tok

	def get_lines(self, line_numbers: List[int]) -> List[str]:
	"""Retrieve source lines corresponding to line numbers."""
	if self._lines:
	lines = self._lines
	else:
	n = len(line_numbers)
	lines = {}
	count = 0
	seen = 0
	with open(self._path) as f:
	for l in f:
	count += 1
	if count in line_numbers:
	seen += 1
	lines[count] = l
	if seen == n:
	break

	return [lines[n] for n in line_numbers]

	def mark(self) -> Mark:
	return self._index

	def reset(self, index: Mark) -> None:
	if index == self._index:
	return
	assert 0 <= index <= len(self._tokens), (index, len(self._tokens))
	old_index = self._index
	self._index = index
	if self._verbose:
	self.report(True, index < old_index)

	def report(self, cached: bool, back: bool) -> None:
	if back:
	fill = "-" * self._index + "-"
	elif cached:
	fill = "-" * self._index + ">"
	else:
	fill = "-" * self._index + "*"
	if self._index == 0:
	print(f"{fill} (Bof)")
	else:
	tok = self._tokens[self._index - 1]
	print(f"{fill} {shorttok(tok)}")