Tools/peg_generator/pegen/tokenizer.py - platform/external/python/cpython3 - Git at Google

 import token
 import tokenize
 from typing import List, Iterator

 Mark = int  # NewType('Mark', int)

 exact_token_types = token.EXACT_TOKEN_TYPES  # type: ignore


 def shorttok(tok: tokenize.TokenInfo) -> str:
     return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"


 class Tokenizer:
     """Caching wrapper for the tokenize module.

     This is pretty tied to Python's syntax.
     """

     _tokens: List[tokenize.TokenInfo]

     def __init__(self, tokengen: Iterator[tokenize.TokenInfo], *, verbose: bool = False):
         self._tokengen = tokengen
         self._tokens = []
         self._index = 0
         self._verbose = verbose
         if verbose:
             self.report(False, False)

     def getnext(self) -> tokenize.TokenInfo:
         """Return the next token and updates the index."""
         cached = True
         while self._index == len(self._tokens):
             tok = next(self._tokengen)
             if tok.type in (tokenize.NL, tokenize.COMMENT):
                 continue
             if tok.type == token.ERRORTOKEN and tok.string.isspace():
                 continue
             self._tokens.append(tok)
             cached = False
         tok = self._tokens[self._index]
         self._index += 1
         if self._verbose:
             self.report(cached, False)
         return tok

     def peek(self) -> tokenize.TokenInfo:
         """Return the next token *without* updating the index."""
         while self._index == len(self._tokens):
             tok = next(self._tokengen)
             if tok.type in (tokenize.NL, tokenize.COMMENT):
                 continue
             if tok.type == token.ERRORTOKEN and tok.string.isspace():
                 continue
             self._tokens.append(tok)
         return self._tokens[self._index]

     def diagnose(self) -> tokenize.TokenInfo:
         if not self._tokens:
             self.getnext()
         return self._tokens[-1]

     def mark(self) -> Mark:
         return self._index

     def reset(self, index: Mark) -> None:
         if index == self._index:
             return
         assert 0 <= index <= len(self._tokens), (index, len(self._tokens))
         old_index = self._index
         self._index = index
         if self._verbose:
             self.report(True, index < old_index)

     def report(self, cached: bool, back: bool) -> None:
         if back:
             fill = "-" * self._index + "-"
         elif cached:
             fill = "-" * self._index + ">"
         else:
             fill = "-" * self._index + "*"
         if self._index == 0:
             print(f"{fill} (Bof)")
         else:
             tok = self._tokens[self._index - 1]
             print(f"{fill} {shorttok(tok)}")
	import token
	import tokenize
	from typing import List, Iterator

	Mark = int # NewType('Mark', int)

	exact_token_types = token.EXACT_TOKEN_TYPES # type: ignore


	def shorttok(tok: tokenize.TokenInfo) -> str:
	return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"


	class Tokenizer:
	"""Caching wrapper for the tokenize module.

	This is pretty tied to Python's syntax.
	"""

	_tokens: List[tokenize.TokenInfo]

	def __init__(self, tokengen: Iterator[tokenize.TokenInfo], *, verbose: bool = False):
	self._tokengen = tokengen
	self._tokens = []
	self._index = 0
	self._verbose = verbose
	if verbose:
	self.report(False, False)

	def getnext(self) -> tokenize.TokenInfo:
	"""Return the next token and updates the index."""
	cached = True
	while self._index == len(self._tokens):
	tok = next(self._tokengen)
	if tok.type in (tokenize.NL, tokenize.COMMENT):
	continue
	if tok.type == token.ERRORTOKEN and tok.string.isspace():
	continue
	self._tokens.append(tok)
	cached = False
	tok = self._tokens[self._index]
	self._index += 1
	if self._verbose:
	self.report(cached, False)
	return tok

	def peek(self) -> tokenize.TokenInfo:
	"""Return the next token without updating the index."""
	while self._index == len(self._tokens):
	tok = next(self._tokengen)
	if tok.type in (tokenize.NL, tokenize.COMMENT):
	continue
	if tok.type == token.ERRORTOKEN and tok.string.isspace():
	continue
	self._tokens.append(tok)
	return self._tokens[self._index]

	def diagnose(self) -> tokenize.TokenInfo:
	if not self._tokens:
	self.getnext()
	return self._tokens[-1]

	def mark(self) -> Mark:
	return self._index

	def reset(self, index: Mark) -> None:
	if index == self._index:
	return
	assert 0 <= index <= len(self._tokens), (index, len(self._tokens))
	old_index = self._index
	self._index = index
	if self._verbose:
	self.report(True, index < old_index)

	def report(self, cached: bool, back: bool) -> None:
	if back:
	fill = "-" * self._index + "-"
	elif cached:
	fill = "-" * self._index + ">"
	else:
	fill = "-" * self._index + "*"
	if self._index == 0:
	print(f"{fill} (Bof)")
	else:
	tok = self._tokens[self._index - 1]
	print(f"{fill} {shorttok(tok)}")