Tools/clinic/libclinic/block_parser.py - platform/external/python/cpython3 - Git at Google

 from __future__ import annotations
 import collections
 import dataclasses as dc
 import re
 import shlex
 from typing import Any

 import libclinic
 from libclinic import fail, ClinicError
 from libclinic.language import Language
 from libclinic.function import (
     Module, Class, Function)


 @dc.dataclass(slots=True, repr=False)
 class Block:
     r"""
     Represents a single block of text embedded in
     another file.  If dsl_name is None, the block represents
     verbatim text, raw original text from the file, in
     which case "input" will be the only non-false member.
     If dsl_name is not None, the block represents a Clinic
     block.

     input is always str, with embedded \n characters.
     input represents the original text from the file;
     if it's a Clinic block, it is the original text with
     the body_prefix and redundant leading whitespace removed.

     dsl_name is either str or None.  If str, it's the text
     found on the start line of the block between the square
     brackets.

     signatures is a list.
     It may only contain clinic.Module, clinic.Class, and
     clinic.Function objects.  At the moment it should
     contain at most one of each.

     output is either str or None.  If str, it's the output
     from this block, with embedded '\n' characters.

     indent is a str.  It's the leading whitespace
     that was found on every line of input.  (If body_prefix is
     not empty, this is the indent *after* removing the
     body_prefix.)

     "indent" is different from the concept of "preindent"
     (which is not stored as state on Block objects).
     "preindent" is the whitespace that
     was found in front of every line of input *before* the
     "body_prefix" (see the Language object).  If body_prefix
     is empty, preindent must always be empty too.

     To illustrate the difference between "indent" and "preindent":

     Assume that '_' represents whitespace.
     If the block processed was in a Python file, and looked like this:
       ____#/*[python]
       ____#__for a in range(20):
       ____#____print(a)
       ____#[python]*/
     "preindent" would be "____" and "indent" would be "__".

     """
     input: str
     dsl_name: str | None = None
     signatures: list[Module | Class | Function] = dc.field(default_factory=list)
     output: Any = None  # TODO: Very dynamic; probably untypeable in its current form?
     indent: str = ''

     def __repr__(self) -> str:
         dsl_name = self.dsl_name or "text"
         def summarize(s: object) -> str:
             s = repr(s)
             if len(s) > 30:
                 return s[:26] + "..." + s[0]
             return s
         parts = (
             repr(dsl_name),
             f"input={summarize(self.input)}",
             f"output={summarize(self.output)}"
         )
         return f"<clinic.Block {' '.join(parts)}>"


 class BlockParser:
     """
     Block-oriented parser for Argument Clinic.
     Iterator, yields Block objects.
     """

     def __init__(
             self,
             input: str,
             language: Language,
             *,
             verify: bool = True
     ) -> None:
         """
         "input" should be a str object
         with embedded \n characters.

         "language" should be a Language object.
         """
         language.validate()

         self.input = collections.deque(reversed(input.splitlines(keepends=True)))
         self.block_start_line_number = self.line_number = 0

         self.language = language
         before, _, after = language.start_line.partition('{dsl_name}')
         assert _ == '{dsl_name}'
         self.find_start_re = libclinic.create_regex(before, after,
                                                     whole_line=False)
         self.start_re = libclinic.create_regex(before, after)
         self.verify = verify
         self.last_checksum_re: re.Pattern[str] | None = None
         self.last_dsl_name: str | None = None
         self.dsl_name: str | None = None
         self.first_block = True

     def __iter__(self) -> BlockParser:
         return self

     def __next__(self) -> Block:
         while True:
             if not self.input:
                 raise StopIteration

             if self.dsl_name:
                 try:
                     return_value = self.parse_clinic_block(self.dsl_name)
                 except ClinicError as exc:
                     exc.filename = self.language.filename
                     exc.lineno = self.line_number
                     raise
                 self.dsl_name = None
                 self.first_block = False
                 return return_value
             block = self.parse_verbatim_block()
             if self.first_block and not block.input:
                 continue
             self.first_block = False
             return block


     def is_start_line(self, line: str) -> str | None:
         match = self.start_re.match(line.lstrip())
         return match.group(1) if match else None

     def _line(self, lookahead: bool = False) -> str:
         self.line_number += 1
         line = self.input.pop()
         if not lookahead:
             self.language.parse_line(line)
         return line

     def parse_verbatim_block(self) -> Block:
         lines = []
         self.block_start_line_number = self.line_number

         while self.input:
             line = self._line()
             dsl_name = self.is_start_line(line)
             if dsl_name:
                 self.dsl_name = dsl_name
                 break
             lines.append(line)

         return Block("".join(lines))

     def parse_clinic_block(self, dsl_name: str) -> Block:
         in_lines = []
         self.block_start_line_number = self.line_number + 1
         stop_line = self.language.stop_line.format(dsl_name=dsl_name)
         body_prefix = self.language.body_prefix.format(dsl_name=dsl_name)

         def is_stop_line(line: str) -> bool:
             # make sure to recognize stop line even if it
             # doesn't end with EOL (it could be the very end of the file)
             if line.startswith(stop_line):
                 remainder = line.removeprefix(stop_line)
                 if remainder and not remainder.isspace():
                     fail(f"Garbage after stop line: {remainder!r}")
                 return True
             else:
                 # gh-92256: don't allow incorrectly formatted stop lines
                 if line.lstrip().startswith(stop_line):
                     fail(f"Whitespace is not allowed before the stop line: {line!r}")
                 return False

         # consume body of program
         while self.input:
             line = self._line()
             if is_stop_line(line) or self.is_start_line(line):
                 break
             if body_prefix:
                 line = line.lstrip()
                 assert line.startswith(body_prefix)
                 line = line.removeprefix(body_prefix)
             in_lines.append(line)

         # consume output and checksum line, if present.
         if self.last_dsl_name == dsl_name:
             checksum_re = self.last_checksum_re
         else:
             before, _, after = self.language.checksum_line.format(dsl_name=dsl_name, arguments='{arguments}').partition('{arguments}')
             assert _ == '{arguments}'
             checksum_re = libclinic.create_regex(before, after, word=False)
             self.last_dsl_name = dsl_name
             self.last_checksum_re = checksum_re
         assert checksum_re is not None

         # scan forward for checksum line
         out_lines = []
         arguments = None
         while self.input:
             line = self._line(lookahead=True)
             match = checksum_re.match(line.lstrip())
             arguments = match.group(1) if match else None
             if arguments:
                 break
             out_lines.append(line)
             if self.is_start_line(line):
                 break

         output: str | None
         output = "".join(out_lines)
         if arguments:
             d = {}
             for field in shlex.split(arguments):
                 name, equals, value = field.partition('=')
                 if not equals:
                     fail(f"Mangled Argument Clinic marker line: {line!r}")
                 d[name.strip()] = value.strip()

             if self.verify:
                 if 'input' in d:
                     checksum = d['output']
                 else:
                     checksum = d['checksum']

                 computed = libclinic.compute_checksum(output, len(checksum))
                 if checksum != computed:
                     fail("Checksum mismatch! "
                          f"Expected {checksum!r}, computed {computed!r}. "
                          "Suggested fix: remove all generated code including "
                          "the end marker, or use the '-f' option.")
         else:
             # put back output
             output_lines = output.splitlines(keepends=True)
             self.line_number -= len(output_lines)
             self.input.extend(reversed(output_lines))
             output = None

         return Block("".join(in_lines), dsl_name, output=output)
	from __future__ import annotations
	import collections
	import dataclasses as dc
	import re
	import shlex
	from typing import Any

	import libclinic
	from libclinic import fail, ClinicError
	from libclinic.language import Language
	from libclinic.function import (
	Module, Class, Function)


	@dc.dataclass(slots=True, repr=False)
	class Block:
	r"""
	Represents a single block of text embedded in
	another file. If dsl_name is None, the block represents
	verbatim text, raw original text from the file, in
	which case "input" will be the only non-false member.
	If dsl_name is not None, the block represents a Clinic
	block.

	input is always str, with embedded \n characters.
	input represents the original text from the file;
	if it's a Clinic block, it is the original text with
	the body_prefix and redundant leading whitespace removed.

	dsl_name is either str or None. If str, it's the text
	found on the start line of the block between the square
	brackets.

	signatures is a list.
	It may only contain clinic.Module, clinic.Class, and
	clinic.Function objects. At the moment it should
	contain at most one of each.

	output is either str or None. If str, it's the output
	from this block, with embedded '\n' characters.

	indent is a str. It's the leading whitespace
	that was found on every line of input. (If body_prefix is
	not empty, this is the indent after removing the
	body_prefix.)

	"indent" is different from the concept of "preindent"
	(which is not stored as state on Block objects).
	"preindent" is the whitespace that
	was found in front of every line of input before the
	"body_prefix" (see the Language object). If body_prefix
	is empty, preindent must always be empty too.

	To illustrate the difference between "indent" and "preindent":

	Assume that '_' represents whitespace.
	If the block processed was in a Python file, and looked like this:
	____#/*[python]
	____#__for a in range(20):
	____#____print(a)
	____#[python]*/
	"preindent" would be "____" and "indent" would be "__".

	"""
	input: str
	dsl_name: str \| None = None
	signatures: list[Module \| Class \| Function] = dc.field(default_factory=list)
	output: Any = None # TODO: Very dynamic; probably untypeable in its current form?
	indent: str = ''

	def __repr__(self) -> str:
	dsl_name = self.dsl_name or "text"
	def summarize(s: object) -> str:
	s = repr(s)
	if len(s) > 30:
	return s[:26] + "..." + s[0]
	return s
	parts = (
	repr(dsl_name),
	f"input={summarize(self.input)}",
	f"output={summarize(self.output)}"
	)
	return f"<clinic.Block {' '.join(parts)}>"


	class BlockParser:
	"""
	Block-oriented parser for Argument Clinic.
	Iterator, yields Block objects.
	"""

	def __init__(
	self,
	input: str,
	language: Language,
	*,
	verify: bool = True
	) -> None:
	"""
	"input" should be a str object
	with embedded \n characters.

	"language" should be a Language object.
	"""
	language.validate()

	self.input = collections.deque(reversed(input.splitlines(keepends=True)))
	self.block_start_line_number = self.line_number = 0

	self.language = language
	before, _, after = language.start_line.partition('{dsl_name}')
	assert _ == '{dsl_name}'
	self.find_start_re = libclinic.create_regex(before, after,
	whole_line=False)
	self.start_re = libclinic.create_regex(before, after)
	self.verify = verify
	self.last_checksum_re: re.Pattern[str] \| None = None
	self.last_dsl_name: str \| None = None
	self.dsl_name: str \| None = None
	self.first_block = True

	def __iter__(self) -> BlockParser:
	return self

	def __next__(self) -> Block:
	while True:
	if not self.input:
	raise StopIteration

	if self.dsl_name:
	try:
	return_value = self.parse_clinic_block(self.dsl_name)
	except ClinicError as exc:
	exc.filename = self.language.filename
	exc.lineno = self.line_number
	raise
	self.dsl_name = None
	self.first_block = False
	return return_value
	block = self.parse_verbatim_block()
	if self.first_block and not block.input:
	continue
	self.first_block = False
	return block


	def is_start_line(self, line: str) -> str \| None:
	match = self.start_re.match(line.lstrip())
	return match.group(1) if match else None

	def _line(self, lookahead: bool = False) -> str:
	self.line_number += 1
	line = self.input.pop()
	if not lookahead:
	self.language.parse_line(line)
	return line

	def parse_verbatim_block(self) -> Block:
	lines = []
	self.block_start_line_number = self.line_number

	while self.input:
	line = self._line()
	dsl_name = self.is_start_line(line)
	if dsl_name:
	self.dsl_name = dsl_name
	break
	lines.append(line)

	return Block("".join(lines))

	def parse_clinic_block(self, dsl_name: str) -> Block:
	in_lines = []
	self.block_start_line_number = self.line_number + 1
	stop_line = self.language.stop_line.format(dsl_name=dsl_name)
	body_prefix = self.language.body_prefix.format(dsl_name=dsl_name)

	def is_stop_line(line: str) -> bool:
	# make sure to recognize stop line even if it
	# doesn't end with EOL (it could be the very end of the file)
	if line.startswith(stop_line):
	remainder = line.removeprefix(stop_line)
	if remainder and not remainder.isspace():
	fail(f"Garbage after stop line: {remainder!r}")
	return True
	else:
	# gh-92256: don't allow incorrectly formatted stop lines
	if line.lstrip().startswith(stop_line):
	fail(f"Whitespace is not allowed before the stop line: {line!r}")
	return False

	# consume body of program
	while self.input:
	line = self._line()
	if is_stop_line(line) or self.is_start_line(line):
	break
	if body_prefix:
	line = line.lstrip()
	assert line.startswith(body_prefix)
	line = line.removeprefix(body_prefix)
	in_lines.append(line)

	# consume output and checksum line, if present.
	if self.last_dsl_name == dsl_name:
	checksum_re = self.last_checksum_re
	else:
	before, _, after = self.language.checksum_line.format(dsl_name=dsl_name, arguments='{arguments}').partition('{arguments}')
	assert _ == '{arguments}'
	checksum_re = libclinic.create_regex(before, after, word=False)
	self.last_dsl_name = dsl_name
	self.last_checksum_re = checksum_re
	assert checksum_re is not None

	# scan forward for checksum line
	out_lines = []
	arguments = None
	while self.input:
	line = self._line(lookahead=True)
	match = checksum_re.match(line.lstrip())
	arguments = match.group(1) if match else None
	if arguments:
	break
	out_lines.append(line)
	if self.is_start_line(line):
	break

	output: str \| None
	output = "".join(out_lines)
	if arguments:
	d = {}
	for field in shlex.split(arguments):
	name, equals, value = field.partition('=')
	if not equals:
	fail(f"Mangled Argument Clinic marker line: {line!r}")
	d[name.strip()] = value.strip()

	if self.verify:
	if 'input' in d:
	checksum = d['output']
	else:
	checksum = d['checksum']

	computed = libclinic.compute_checksum(output, len(checksum))
	if checksum != computed:
	fail("Checksum mismatch! "
	f"Expected {checksum!r}, computed {computed!r}. "
	"Suggested fix: remove all generated code including "
	"the end marker, or use the '-f' option.")
	else:
	# put back output
	output_lines = output.splitlines(keepends=True)
	self.line_number -= len(output_lines)
	self.input.extend(reversed(output_lines))
	output = None

	return Block("".join(in_lines), dsl_name, output=output)