Tools/cases_generator/instructions.py - platform/external/python/cpython3 - Git at Google

 import dataclasses
 import re
 import typing

 from flags import InstructionFlags, variable_used, variable_used_unspecialized
 from formatting import (
     Formatter,
     UNUSED,
     list_effect_size,
 )
 import lexer as lx
 import parsing
 from parsing import StackEffect
 import stacking

 BITS_PER_CODE_UNIT = 16


 @dataclasses.dataclass
 class ActiveCacheEffect:
     """Wraps a CacheEffect that is actually used, in context."""

     effect: parsing.CacheEffect
     offset: int


 FORBIDDEN_NAMES_IN_UOPS = (
     "resume_with_error",
     "kwnames",
     "next_instr",
     "oparg1",  # Proxy for super-instructions like LOAD_FAST_LOAD_FAST
     "JUMPBY",
     "DISPATCH",
     "INSTRUMENTED_JUMP",
     "throwflag",
     "exception_unwind",
     "import_from",
     "import_name",
     "_PyObject_CallNoArgs",  # Proxy for BEFORE_WITH
 )


 # Interpreter tiers
 TIER_ONE: typing.Final = 1  # Specializing adaptive interpreter (PEP 659)
 TIER_TWO: typing.Final = 2  # Experimental tracing interpreter
 Tiers: typing.TypeAlias = typing.Literal[1, 2]


 @dataclasses.dataclass
 class Instruction:
     """An instruction with additional data and code."""

     # Parts of the underlying instruction definition
     inst: parsing.InstDef
     kind: typing.Literal["inst", "op"]
     name: str
     block: parsing.Block
     block_text: list[str]  # Block.text, less curlies, less PREDICT() calls
     block_line: int  # First line of block in original code

     # Computed by constructor
     always_exits: str  # If the block always exits, its last line; else ""
     has_deopt: bool
     cache_offset: int
     cache_effects: list[parsing.CacheEffect]
     input_effects: list[StackEffect]
     output_effects: list[StackEffect]
     unmoved_names: frozenset[str]
     instr_fmt: str
     instr_flags: InstructionFlags
     active_caches: list[ActiveCacheEffect]

     # Set later
     family: parsing.Family | None = None
     predicted: bool = False

     def __init__(self, inst: parsing.InstDef):
         self.inst = inst
         self.kind = inst.kind
         self.name = inst.name
         self.block = inst.block
         self.block_text, self.check_eval_breaker, self.block_line = extract_block_text(
             self.block
         )
         self.always_exits = always_exits(self.block_text)
         self.has_deopt = variable_used(self.inst, "DEOPT_IF")
         self.cache_effects = [
             effect for effect in inst.inputs if isinstance(effect, parsing.CacheEffect)
         ]
         self.cache_offset = sum(c.size for c in self.cache_effects)
         self.input_effects = [
             effect for effect in inst.inputs if isinstance(effect, StackEffect)
         ]
         self.output_effects = inst.outputs  # For consistency/completeness
         unmoved_names: set[str] = set()
         for ieffect, oeffect in zip(self.input_effects, self.output_effects):
             if ieffect == oeffect and ieffect.name == oeffect.name:
                 unmoved_names.add(ieffect.name)
             else:
                 break
         self.unmoved_names = frozenset(unmoved_names)

         self.instr_flags = InstructionFlags.fromInstruction(inst)

         self.active_caches = []
         offset = 0
         for effect in self.cache_effects:
             if effect.name != UNUSED:
                 self.active_caches.append(ActiveCacheEffect(effect, offset))
             offset += effect.size

         if self.instr_flags.HAS_ARG_FLAG:
             fmt = "IB"
         else:
             fmt = "IX"
         if offset:
             fmt += "C" + "0" * (offset - 1)
         self.instr_fmt = fmt

     def is_viable_uop(self) -> bool:
         """Whether this instruction is viable as a uop."""
         dprint: typing.Callable[..., None] = lambda *args, **kwargs: None
         if "FRAME" in self.name:
             dprint = print

         if self.name == "EXIT_TRACE":
             return True  # This has 'return frame' but it's okay
         if self.always_exits:
             dprint(f"Skipping {self.name} because it always exits: {self.always_exits}")
             return False
         if len(self.active_caches) > 1:
             # print(f"Skipping {self.name} because it has >1 cache entries")
             return False
         res = True
         for forbidden in FORBIDDEN_NAMES_IN_UOPS:
             # NOTE: To disallow unspecialized uops, use
             # if variable_used(self.inst, forbidden):
             if variable_used_unspecialized(self.inst, forbidden):
                 dprint(f"Skipping {self.name} because it uses {forbidden}")
                 res = False
         return res

     def write_body(
         self,
         out: Formatter,
         dedent: int,
         active_caches: list[ActiveCacheEffect],
         tier: Tiers = TIER_ONE,
     ) -> None:
         """Write the instruction body."""
         # Write cache effect variable declarations and initializations
         for active in active_caches:
             ceffect = active.effect
             bits = ceffect.size * BITS_PER_CODE_UNIT
             if bits == 64:
                 # NOTE: We assume that 64-bit data in the cache
                 # is always an object pointer.
                 # If this becomes false, we need a way to specify
                 # syntactically what type the cache data is.
                 typ = "PyObject *"
                 func = "read_obj"
             else:
                 typ = f"uint{bits}_t "
                 func = f"read_u{bits}"
             if tier == TIER_ONE:
                 out.emit(
                     f"{typ}{ceffect.name} = {func}(&next_instr[{active.offset}].cache);"
                 )
             else:
                 out.emit(f"{typ}{ceffect.name} = ({typ.strip()})operand;")

         # Write the body, substituting a goto for ERROR_IF() and other stuff
         assert dedent <= 0
         extra = " " * -dedent
         names_to_skip = self.unmoved_names | frozenset({UNUSED, "null"})
         offset = 0
         context = self.block.context
         assert context is not None and context.owner is not None
         filename = context.owner.filename
         for line in self.block_text:
             out.set_lineno(self.block_line + offset, filename)
             offset += 1
             if m := re.match(r"(\s*)ERROR_IF\((.+), (\w+)\);\s*(?://.*)?$", line):
                 space, cond, label = m.groups()
                 space = extra + space
                 # ERROR_IF() must pop the inputs from the stack.
                 # The code block is responsible for DECREF()ing them.
                 # NOTE: If the label doesn't exist, just add it to ceval.c.

                 # Don't pop common input/output effects at the bottom!
                 # These aren't DECREF'ed so they can stay.
                 ieffs = list(self.input_effects)
                 oeffs = list(self.output_effects)
                 while (
                     ieffs
                     and oeffs
                     and ieffs[0] == oeffs[0]
                     and ieffs[0].name == oeffs[0].name
                 ):
                     ieffs.pop(0)
                     oeffs.pop(0)
                 ninputs, symbolic = list_effect_size(ieffs)
                 if ninputs:
                     label = f"pop_{ninputs}_{label}"
                 if symbolic:
                     out.write_raw(
                         f"{space}if ({cond}) {{ STACK_SHRINK({symbolic}); goto {label}; }}\n"
                     )
                 else:
                     out.write_raw(f"{space}if ({cond}) goto {label};\n")
             elif m := re.match(r"(\s*)DECREF_INPUTS\(\);\s*(?://.*)?$", line):
                 out.reset_lineno()
                 space = extra + m.group(1)
                 for ieff in self.input_effects:
                     if ieff.name in names_to_skip:
                         continue
                     if ieff.size:
                         out.write_raw(
                             f"{space}for (int _i = {ieff.size}; --_i >= 0;) {{\n"
                         )
                         out.write_raw(f"{space}    Py_DECREF({ieff.name}[_i]);\n")
                         out.write_raw(f"{space}}}\n")
                     else:
                         decref = "XDECREF" if ieff.cond else "DECREF"
                         out.write_raw(f"{space}Py_{decref}({ieff.name});\n")
             else:
                 out.write_raw(extra + line)
         out.reset_lineno()


 InstructionOrCacheEffect = Instruction | parsing.CacheEffect


 # Instruction used for abstract interpretation.
 class AbstractInstruction(Instruction):
     def __init__(self, inst: parsing.InstDef):
         super().__init__(inst)

     def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None:
         """Write one abstract instruction, sans prologue and epilogue."""
         stacking.write_single_instr_for_abstract_interp(self, out)

     def write_body(
         self,
         out: Formatter,
         dedent: int,
         active_caches: list[ActiveCacheEffect],
         tier: Tiers = TIER_ONE,
     ) -> None:
         pass


 @dataclasses.dataclass
 class Component:
     instr: Instruction
     active_caches: list[ActiveCacheEffect]


 MacroParts = list[Component | parsing.CacheEffect]


 @dataclasses.dataclass
 class MacroInstruction:
     """A macro instruction."""

     name: str
     instr_fmt: str
     instr_flags: InstructionFlags
     macro: parsing.Macro
     parts: MacroParts
     cache_offset: int
     predicted: bool = False


 @dataclasses.dataclass
 class PseudoInstruction:
     """A pseudo instruction."""

     name: str
     targets: list[Instruction]
     instr_fmt: str
     instr_flags: InstructionFlags


 @dataclasses.dataclass
 class OverriddenInstructionPlaceHolder:
     name: str


 AnyInstruction = Instruction | MacroInstruction | PseudoInstruction


 def extract_block_text(block: parsing.Block) -> tuple[list[str], bool, int]:
     # Get lines of text with proper dedent
     blocklines = block.text.splitlines(True)
     first_token: lx.Token = block.tokens[0]  # IndexError means the context is broken
     block_line = first_token.begin[0]

     # Remove blank lines from both ends
     while blocklines and not blocklines[0].strip():
         blocklines.pop(0)
         block_line += 1
     while blocklines and not blocklines[-1].strip():
         blocklines.pop()

     # Remove leading and trailing braces
     assert blocklines and blocklines[0].strip() == "{"
     assert blocklines and blocklines[-1].strip() == "}"
     blocklines.pop()
     blocklines.pop(0)
     block_line += 1

     # Remove trailing blank lines
     while blocklines and not blocklines[-1].strip():
         blocklines.pop()

     # Separate CHECK_EVAL_BREAKER() macro from end
     check_eval_breaker = (
         blocklines != [] and blocklines[-1].strip() == "CHECK_EVAL_BREAKER();"
     )
     if check_eval_breaker:
         del blocklines[-1]

     return blocklines, check_eval_breaker, block_line


 def always_exits(lines: list[str]) -> str:
     """Determine whether a block always ends in a return/goto/etc."""
     if not lines:
         return ""
     line = lines[-1].rstrip()
     # Indent must match exactly (TODO: Do something better)
     if line[:12] != " " * 12:
         return ""
     line = line[12:]
     if line.startswith(
         (
             "goto ",
             "return ",
             "DISPATCH",
             "GO_TO_",
             "Py_UNREACHABLE()",
             "ERROR_IF(true, ",
         )
     ):
         return line
     return ""
	import dataclasses
	import re
	import typing

	from flags import InstructionFlags, variable_used, variable_used_unspecialized
	from formatting import (
	Formatter,
	UNUSED,
	list_effect_size,
	)
	import lexer as lx
	import parsing
	from parsing import StackEffect
	import stacking

	BITS_PER_CODE_UNIT = 16


	@dataclasses.dataclass
	class ActiveCacheEffect:
	"""Wraps a CacheEffect that is actually used, in context."""

	effect: parsing.CacheEffect
	offset: int


	FORBIDDEN_NAMES_IN_UOPS = (
	"resume_with_error",
	"kwnames",
	"next_instr",
	"oparg1", # Proxy for super-instructions like LOAD_FAST_LOAD_FAST
	"JUMPBY",
	"DISPATCH",
	"INSTRUMENTED_JUMP",
	"throwflag",
	"exception_unwind",
	"import_from",
	"import_name",
	"_PyObject_CallNoArgs", # Proxy for BEFORE_WITH
	)


	# Interpreter tiers
	TIER_ONE: typing.Final = 1 # Specializing adaptive interpreter (PEP 659)
	TIER_TWO: typing.Final = 2 # Experimental tracing interpreter
	Tiers: typing.TypeAlias = typing.Literal[1, 2]


	@dataclasses.dataclass
	class Instruction:
	"""An instruction with additional data and code."""

	# Parts of the underlying instruction definition
	inst: parsing.InstDef
	kind: typing.Literal["inst", "op"]
	name: str
	block: parsing.Block
	block_text: list[str] # Block.text, less curlies, less PREDICT() calls
	block_line: int # First line of block in original code

	# Computed by constructor
	always_exits: str # If the block always exits, its last line; else ""
	has_deopt: bool
	cache_offset: int
	cache_effects: list[parsing.CacheEffect]
	input_effects: list[StackEffect]
	output_effects: list[StackEffect]
	unmoved_names: frozenset[str]
	instr_fmt: str
	instr_flags: InstructionFlags
	active_caches: list[ActiveCacheEffect]

	# Set later
	family: parsing.Family \| None = None
	predicted: bool = False

	def __init__(self, inst: parsing.InstDef):
	self.inst = inst
	self.kind = inst.kind
	self.name = inst.name
	self.block = inst.block
	self.block_text, self.check_eval_breaker, self.block_line = extract_block_text(
	self.block
	)
	self.always_exits = always_exits(self.block_text)
	self.has_deopt = variable_used(self.inst, "DEOPT_IF")
	self.cache_effects = [
	effect for effect in inst.inputs if isinstance(effect, parsing.CacheEffect)
	]
	self.cache_offset = sum(c.size for c in self.cache_effects)
	self.input_effects = [
	effect for effect in inst.inputs if isinstance(effect, StackEffect)
	]
	self.output_effects = inst.outputs # For consistency/completeness
	unmoved_names: set[str] = set()
	for ieffect, oeffect in zip(self.input_effects, self.output_effects):
	if ieffect == oeffect and ieffect.name == oeffect.name:
	unmoved_names.add(ieffect.name)
	else:
	break
	self.unmoved_names = frozenset(unmoved_names)

	self.instr_flags = InstructionFlags.fromInstruction(inst)

	self.active_caches = []
	offset = 0
	for effect in self.cache_effects:
	if effect.name != UNUSED:
	self.active_caches.append(ActiveCacheEffect(effect, offset))
	offset += effect.size

	if self.instr_flags.HAS_ARG_FLAG:
	fmt = "IB"
	else:
	fmt = "IX"
	if offset:
	fmt += "C" + "0" * (offset - 1)
	self.instr_fmt = fmt

	def is_viable_uop(self) -> bool:
	"""Whether this instruction is viable as a uop."""
	dprint: typing.Callable[..., None] = lambda args, *kwargs: None
	if "FRAME" in self.name:
	dprint = print

	if self.name == "EXIT_TRACE":
	return True # This has 'return frame' but it's okay
	if self.always_exits:
	dprint(f"Skipping {self.name} because it always exits: {self.always_exits}")
	return False
	if len(self.active_caches) > 1:
	# print(f"Skipping {self.name} because it has >1 cache entries")
	return False
	res = True
	for forbidden in FORBIDDEN_NAMES_IN_UOPS:
	# NOTE: To disallow unspecialized uops, use
	# if variable_used(self.inst, forbidden):
	if variable_used_unspecialized(self.inst, forbidden):
	dprint(f"Skipping {self.name} because it uses {forbidden}")
	res = False
	return res

	def write_body(
	self,
	out: Formatter,
	dedent: int,
	active_caches: list[ActiveCacheEffect],
	tier: Tiers = TIER_ONE,
	) -> None:
	"""Write the instruction body."""
	# Write cache effect variable declarations and initializations
	for active in active_caches:
	ceffect = active.effect
	bits = ceffect.size * BITS_PER_CODE_UNIT
	if bits == 64:
	# NOTE: We assume that 64-bit data in the cache
	# is always an object pointer.
	# If this becomes false, we need a way to specify
	# syntactically what type the cache data is.
	typ = "PyObject *"
	func = "read_obj"
	else:
	typ = f"uint{bits}_t "
	func = f"read_u{bits}"
	if tier == TIER_ONE:
	out.emit(
	f"{typ}{ceffect.name} = {func}(&next_instr[{active.offset}].cache);"
	)
	else:
	out.emit(f"{typ}{ceffect.name} = ({typ.strip()})operand;")

	# Write the body, substituting a goto for ERROR_IF() and other stuff
	assert dedent <= 0
	extra = " " * -dedent
	names_to_skip = self.unmoved_names \| frozenset({UNUSED, "null"})
	offset = 0
	context = self.block.context
	assert context is not None and context.owner is not None
	filename = context.owner.filename
	for line in self.block_text:
	out.set_lineno(self.block_line + offset, filename)
	offset += 1
	if m := re.match(r"(\s)ERROR_IF\((.+), (\w+)\);\s(?://.*)?$", line):
	space, cond, label = m.groups()
	space = extra + space
	# ERROR_IF() must pop the inputs from the stack.
	# The code block is responsible for DECREF()ing them.
	# NOTE: If the label doesn't exist, just add it to ceval.c.

	# Don't pop common input/output effects at the bottom!
	# These aren't DECREF'ed so they can stay.
	ieffs = list(self.input_effects)
	oeffs = list(self.output_effects)
	while (
	ieffs
	and oeffs
	and ieffs[0] == oeffs[0]
	and ieffs[0].name == oeffs[0].name
	):
	ieffs.pop(0)
	oeffs.pop(0)
	ninputs, symbolic = list_effect_size(ieffs)
	if ninputs:
	label = f"pop_{ninputs}_{label}"
	if symbolic:
	out.write_raw(
	f"{space}if ({cond}) {{ STACK_SHRINK({symbolic}); goto {label}; }}\n"
	)
	else:
	out.write_raw(f"{space}if ({cond}) goto {label};\n")
	elif m := re.match(r"(\s)DECREF_INPUTS\(\);\s(?://.*)?$", line):
	out.reset_lineno()
	space = extra + m.group(1)
	for ieff in self.input_effects:
	if ieff.name in names_to_skip:
	continue
	if ieff.size:
	out.write_raw(
	f"{space}for (int _i = {ieff.size}; --_i >= 0;) {{\n"
	)
	out.write_raw(f"{space} Py_DECREF({ieff.name}[_i]);\n")
	out.write_raw(f"{space}}}\n")
	else:
	decref = "XDECREF" if ieff.cond else "DECREF"
	out.write_raw(f"{space}Py_{decref}({ieff.name});\n")
	else:
	out.write_raw(extra + line)
	out.reset_lineno()


	InstructionOrCacheEffect = Instruction \| parsing.CacheEffect


	# Instruction used for abstract interpretation.
	class AbstractInstruction(Instruction):
	def __init__(self, inst: parsing.InstDef):
	super().__init__(inst)

	def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None:
	"""Write one abstract instruction, sans prologue and epilogue."""
	stacking.write_single_instr_for_abstract_interp(self, out)

	def write_body(
	self,
	out: Formatter,
	dedent: int,
	active_caches: list[ActiveCacheEffect],
	tier: Tiers = TIER_ONE,
	) -> None:
	pass


	@dataclasses.dataclass
	class Component:
	instr: Instruction
	active_caches: list[ActiveCacheEffect]


	MacroParts = list[Component \| parsing.CacheEffect]


	@dataclasses.dataclass
	class MacroInstruction:
	"""A macro instruction."""

	name: str
	instr_fmt: str
	instr_flags: InstructionFlags
	macro: parsing.Macro
	parts: MacroParts
	cache_offset: int
	predicted: bool = False


	@dataclasses.dataclass
	class PseudoInstruction:
	"""A pseudo instruction."""

	name: str
	targets: list[Instruction]
	instr_fmt: str
	instr_flags: InstructionFlags


	@dataclasses.dataclass
	class OverriddenInstructionPlaceHolder:
	name: str


	AnyInstruction = Instruction \| MacroInstruction \| PseudoInstruction


	def extract_block_text(block: parsing.Block) -> tuple[list[str], bool, int]:
	# Get lines of text with proper dedent
	blocklines = block.text.splitlines(True)
	first_token: lx.Token = block.tokens[0] # IndexError means the context is broken
	block_line = first_token.begin[0]

	# Remove blank lines from both ends
	while blocklines and not blocklines[0].strip():
	blocklines.pop(0)
	block_line += 1
	while blocklines and not blocklines[-1].strip():
	blocklines.pop()

	# Remove leading and trailing braces
	assert blocklines and blocklines[0].strip() == "{"
	assert blocklines and blocklines[-1].strip() == "}"
	blocklines.pop()
	blocklines.pop(0)
	block_line += 1

	# Remove trailing blank lines
	while blocklines and not blocklines[-1].strip():
	blocklines.pop()

	# Separate CHECK_EVAL_BREAKER() macro from end
	check_eval_breaker = (
	blocklines != [] and blocklines[-1].strip() == "CHECK_EVAL_BREAKER();"
	)
	if check_eval_breaker:
	del blocklines[-1]

	return blocklines, check_eval_breaker, block_line


	def always_exits(lines: list[str]) -> str:
	"""Determine whether a block always ends in a return/goto/etc."""
	if not lines:
	return ""
	line = lines[-1].rstrip()
	# Indent must match exactly (TODO: Do something better)
	if line[:12] != " " * 12:
	return ""
	line = line[12:]
	if line.startswith(
	(
	"goto ",
	"return ",
	"DISPATCH",
	"GO_TO_",
	"Py_UNREACHABLE()",
	"ERROR_IF(true, ",
	)
	):
	return line
	return ""