backends/vulkan/vulkan_preprocess.py - platform/external/executorch - Git at Google

 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.

 # pyre-strict

 import copy
 from functools import partial
 from typing import Any, Callable, Dict, final, List

 import executorch.backends.vulkan.utils as utils
 from executorch.backends.transforms.addmm_mm_to_linear import AddmmToLinearTransform
 from executorch.backends.transforms.fuse_conv_with_clamp import FuseClampPass
 from executorch.backends.transforms.fuse_view_copy import FuseViewCopyTransform
 from executorch.backends.transforms.view_copy_to_squeeze_unsqueeze import (
     ViewCopyToSqueezeUnsqueezePass,
 )
 from executorch.backends.vulkan._passes import (
     FoldQDQPass,
     FuseQuantizedOpsTransform,
     insert_prepack_nodes,
     InsertDtypePromotionPass,
     RemoveRedundantOpsTransform,
     SqueezeUnsqueezeInputs,
     TagMemoryMetaPass,
 )
 from executorch.backends.vulkan._passes.fuse_patterns import FusePatternsPass
 from executorch.backends.vulkan._passes.remove_asserts import RemoveAssertsTransform
 from executorch.backends.vulkan.serialization.vulkan_graph_builder import VkGraphBuilder
 from executorch.backends.vulkan.serialization.vulkan_graph_schema import (
     VkMemoryLayout,
     VkStorageType,
 )
 from executorch.backends.vulkan.serialization.vulkan_graph_serialize import (
     serialize_vulkan_graph,
 )
 from executorch.backends.xnnpack._passes import FuseBatchNormPass
 from executorch.exir.backend.backend_details import (
     BackendDetails,
     CompileSpec,
     ExportedProgram,
     PreprocessResult,
 )
 from executorch.exir.backend.utils import DelegateMappingBuilder
 from executorch.exir.memory_planning import greedy, MemoryPlanningAlgorithmSuite
 from executorch.exir.pass_base import ExportPass, PassBase
 from executorch.exir.passes import MemoryPlanningPass, SpecPropPass
 from executorch.exir.passes.sym_shape_eval_pass import ConstraintBasedSymShapeEvalPass
 from executorch.exir.program._program import _transform
 from torch._export.verifier import Verifier
 from torch.export._remove_auto_functionalized_pass import (
     unsafe_remove_auto_functionalized_pass,
 )

 DEFAULT_DEBUG_HANDLE = 65535


 class _any_op(Verifier):
     # Set training dialect to skip functional check in base verifier
     dialect = "TRAINING"

     def allowed_op_types(self):
         return (Callable,)


 # pyre-ignore
 def apply_passes(program: ExportedProgram, passes) -> ExportedProgram:
     for p in passes:
         if isinstance(p, MemoryPlanningPass) and hasattr(p, "run"):
             p.run(program.graph_module)

         elif issubclass(type(p), ExportPass) or issubclass(type(p), PassBase):
             # Some passes require the ep to be provided. However, since the ep may be
             # updated with each pass applied, the ep must be set right before calling
             # the pass. _exported_program is the attribute used by XNNPACK and Vulkan
             # passes to store the exported program.
             if hasattr(p, "_exported_program"):
                 p._exported_program = program

             program = _transform(program, p, override_verifiers=[_any_op])
             # See the application of this function in exir/program/_program.py for more
             # details on why this step is necessary.
             if isinstance(p, SpecPropPass):
                 p.update_placeholder_tensor_specs(program, program.graph_module)

         else:
             program = p(program)

     return program


 def parse_compile_spec(compile_specs: List[CompileSpec]) -> Dict[str, Any]:
     options = {}
     for spec in compile_specs:
         if spec.key == "storage_type_override":
             options[spec.key] = VkStorageType(
                 int.from_bytes(spec.value, byteorder="little")
             )
         if spec.key == "memory_layout_override":
             options[spec.key] = VkMemoryLayout(
                 int.from_bytes(spec.value, byteorder="little")
             )
         if spec.key in {"texture_limits_x", "texture_limits_y", "texture_limits_z"}:
             options[spec.key] = int.from_bytes(spec.value, byteorder="little")

         if spec.key == "skip_tag_memory_metadata":
             options[spec.key] = bool.from_bytes(spec.value, byteorder="little")

         if spec.key == "downcast_64_bit":
             options[spec.key] = bool.from_bytes(spec.value, byteorder="little")

         if spec.key == "force_fp16":
             options[spec.key] = bool.from_bytes(spec.value, byteorder="little")

         # Unhandled options are ignored

     return options


 @final
 class VulkanBackend(BackendDetails):
     @classmethod
     # pyre-ignore
     def preprocess(  # noqa: C901
         cls,
         program: ExportedProgram,
         module_compile_spec: List[CompileSpec],
     ) -> PreprocessResult:
         compile_options = parse_compile_spec(module_compile_spec)

         default_texture_limits = copy.deepcopy(utils.DEFAULT_TEXTURE_LIMITS)
         # 2048 is the typical limit value for 3D textures, but mobile GPUs often support
         # 16384. Since the Vulkan delegate primarily targets mobile GPUs at the moment,
         # 16394 is the default texture limit used. This option is provided as a
         # convenient way to switch to using a limit of 2048 for image textures which
         # will be compatible with most GPUs.
         if compile_options.get("small_texture_limits", False):
             default_texture_limits[0] = 2048
             default_texture_limits[1] = 2048
             default_texture_limits[2] = 2048

         limits_x = compile_options.get("texture_limits_x", default_texture_limits[0])
         limits_y = compile_options.get("texture_limits_y", default_texture_limits[1])
         limits_z = compile_options.get("texture_limits_z", default_texture_limits[2])
         texture_limits = (limits_x, limits_y, limits_z)

         default_storage_type = compile_options.get(
             "storage_type_override", VkStorageType.TEXTURE_3D
         )
         default_memory_layout = compile_options.get(
             "memory_layout_override", VkMemoryLayout.TENSOR_WIDTH_PACKED
         )
         downcast_64_bit = compile_options.get("downcast_64_bit", True)
         force_fp16 = compile_options.get("force_fp16", False)

         program = unsafe_remove_auto_functionalized_pass(program)

         # First, apply passes that fuse/remove operators to consolidate the graph
         # structure but still preserve an "ATen-compliant" graph structure (i.e. all
         # arguments to ATen operators must match the ATen function schema).
         program = apply_passes(
             program,
             [
                 AddmmToLinearTransform(),
                 FuseBatchNormPass(program),
                 AddmmToLinearTransform(),
                 InsertDtypePromotionPass(),
                 FusePatternsPass(),
                 FuseClampPass(),
                 RemoveRedundantOpsTransform(),
                 FuseQuantizedOpsTransform(),
                 FoldQDQPass(),
                 SqueezeUnsqueezeInputs(),
                 FuseViewCopyTransform(),
                 ViewCopyToSqueezeUnsqueezePass(),
             ],
         )

         # Next annotate tensor nodes with TensorSpec structs which is needed for dynamic
         # shapes and memory planning. Until this point, the graph must be ATen compliant
         # because SpecPropPass will be calling the underlying ATen operators during its
         # execution.
         program = apply_passes(program, [SpecPropPass()])

         # Apply graph transforms which either require `TensorSpec`s to have been created
         # or would create an non ATen compliant graph structure.
         program = apply_passes(
             program,
             [
                 RemoveAssertsTransform(),
                 insert_prepack_nodes,
             ],
         )

         # Optionally apply the memory metadata tagging pass, which will insert storage
         # type and memory layout transition nodes to ensure that all tensor arguments
         # to an operator is in a supported or optimal configuration. If this pass is not
         # applied, there will be a risk that some operators recieve arguments with
         # memory settings that are not supported by the implementation.
         if not compile_options.get("skip_tag_memory_metadata", False):
             program = apply_passes(
                 program,
                 [
                     TagMemoryMetaPass(
                         texture_limits,
                         default_storage_type=default_storage_type,
                         default_memory_layout=default_memory_layout,
                         force_fp16=force_fp16,
                     ),
                 ],
             )

         # Finally, apply dynamic shape passes and memory planning pass. These passes
         # must be applied only when the graph structure is finalized.
         final_passes = [
             ConstraintBasedSymShapeEvalPass(),
         ]
         if not compile_options.get("skip_memory_planning", False):
             greedy_memory_planning = partial(
                 greedy, allow_overlapping_allocations=False
             )
             mem_planning_suite = MemoryPlanningAlgorithmSuite(
                 algo_list=[greedy_memory_planning]
             )
             # This is a workaround to allow the memory planning pass to work without having
             # to first apply ToOutVarPass(). See the `greedy()` function in
             # `exir.memory_planning`; if this attribute isn't set, assertions in
             # `collect_spec_from_nodes()` will fail.
             program.graph_module.encounter_to_out_var_failure = True
             final_passes.append(
                 MemoryPlanningPass(memory_planning_algo=mem_planning_suite)
             )

         program = apply_passes(program, final_passes)

         graph_builder = VkGraphBuilder(
             program,
             DelegateMappingBuilder(generated_identifiers=True),
             downcast_64_bit=downcast_64_bit,
             force_fp16=force_fp16,
         )
         vk_graph = graph_builder.build_graph()

         return PreprocessResult(
             processed_bytes=serialize_vulkan_graph(
                 vk_graph, graph_builder.const_tensors, []
             ),
             debug_handle_map=graph_builder.delegate_mapping_builder.get_delegate_mapping(),
             data_store_output=graph_builder.named_data_store.get_named_data_store_output(),
         )
	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the BSD-style license found in the
	# LICENSE file in the root directory of this source tree.

	# pyre-strict

	import copy
	from functools import partial
	from typing import Any, Callable, Dict, final, List

	import executorch.backends.vulkan.utils as utils
	from executorch.backends.transforms.addmm_mm_to_linear import AddmmToLinearTransform
	from executorch.backends.transforms.fuse_conv_with_clamp import FuseClampPass
	from executorch.backends.transforms.fuse_view_copy import FuseViewCopyTransform
	from executorch.backends.transforms.view_copy_to_squeeze_unsqueeze import (
	ViewCopyToSqueezeUnsqueezePass,
	)
	from executorch.backends.vulkan._passes import (
	FoldQDQPass,
	FuseQuantizedOpsTransform,
	insert_prepack_nodes,
	InsertDtypePromotionPass,
	RemoveRedundantOpsTransform,
	SqueezeUnsqueezeInputs,
	TagMemoryMetaPass,
	)
	from executorch.backends.vulkan._passes.fuse_patterns import FusePatternsPass
	from executorch.backends.vulkan._passes.remove_asserts import RemoveAssertsTransform
	from executorch.backends.vulkan.serialization.vulkan_graph_builder import VkGraphBuilder
	from executorch.backends.vulkan.serialization.vulkan_graph_schema import (
	VkMemoryLayout,
	VkStorageType,
	)
	from executorch.backends.vulkan.serialization.vulkan_graph_serialize import (
	serialize_vulkan_graph,
	)
	from executorch.backends.xnnpack._passes import FuseBatchNormPass
	from executorch.exir.backend.backend_details import (
	BackendDetails,
	CompileSpec,
	ExportedProgram,
	PreprocessResult,
	)
	from executorch.exir.backend.utils import DelegateMappingBuilder
	from executorch.exir.memory_planning import greedy, MemoryPlanningAlgorithmSuite
	from executorch.exir.pass_base import ExportPass, PassBase
	from executorch.exir.passes import MemoryPlanningPass, SpecPropPass
	from executorch.exir.passes.sym_shape_eval_pass import ConstraintBasedSymShapeEvalPass
	from executorch.exir.program._program import _transform
	from torch._export.verifier import Verifier
	from torch.export._remove_auto_functionalized_pass import (
	unsafe_remove_auto_functionalized_pass,
	)

	DEFAULT_DEBUG_HANDLE = 65535


	class _any_op(Verifier):
	# Set training dialect to skip functional check in base verifier
	dialect = "TRAINING"

	def allowed_op_types(self):
	return (Callable,)


	# pyre-ignore
	def apply_passes(program: ExportedProgram, passes) -> ExportedProgram:
	for p in passes:
	if isinstance(p, MemoryPlanningPass) and hasattr(p, "run"):
	p.run(program.graph_module)

	elif issubclass(type(p), ExportPass) or issubclass(type(p), PassBase):
	# Some passes require the ep to be provided. However, since the ep may be
	# updated with each pass applied, the ep must be set right before calling
	# the pass. _exported_program is the attribute used by XNNPACK and Vulkan
	# passes to store the exported program.
	if hasattr(p, "_exported_program"):
	p._exported_program = program

	program = _transform(program, p, override_verifiers=[_any_op])
	# See the application of this function in exir/program/_program.py for more
	# details on why this step is necessary.
	if isinstance(p, SpecPropPass):
	p.update_placeholder_tensor_specs(program, program.graph_module)

	else:
	program = p(program)

	return program


	def parse_compile_spec(compile_specs: List[CompileSpec]) -> Dict[str, Any]:
	options = {}
	for spec in compile_specs:
	if spec.key == "storage_type_override":
	options[spec.key] = VkStorageType(
	int.from_bytes(spec.value, byteorder="little")
	)
	if spec.key == "memory_layout_override":
	options[spec.key] = VkMemoryLayout(
	int.from_bytes(spec.value, byteorder="little")
	)
	if spec.key in {"texture_limits_x", "texture_limits_y", "texture_limits_z"}:
	options[spec.key] = int.from_bytes(spec.value, byteorder="little")

	if spec.key == "skip_tag_memory_metadata":
	options[spec.key] = bool.from_bytes(spec.value, byteorder="little")

	if spec.key == "downcast_64_bit":
	options[spec.key] = bool.from_bytes(spec.value, byteorder="little")

	if spec.key == "force_fp16":
	options[spec.key] = bool.from_bytes(spec.value, byteorder="little")

	# Unhandled options are ignored

	return options


	@final
	class VulkanBackend(BackendDetails):
	@classmethod
	# pyre-ignore
	def preprocess( # noqa: C901
	cls,
	program: ExportedProgram,
	module_compile_spec: List[CompileSpec],
	) -> PreprocessResult:
	compile_options = parse_compile_spec(module_compile_spec)

	default_texture_limits = copy.deepcopy(utils.DEFAULT_TEXTURE_LIMITS)
	# 2048 is the typical limit value for 3D textures, but mobile GPUs often support
	# 16384. Since the Vulkan delegate primarily targets mobile GPUs at the moment,
	# 16394 is the default texture limit used. This option is provided as a
	# convenient way to switch to using a limit of 2048 for image textures which
	# will be compatible with most GPUs.
	if compile_options.get("small_texture_limits", False):
	default_texture_limits[0] = 2048
	default_texture_limits[1] = 2048
	default_texture_limits[2] = 2048

	limits_x = compile_options.get("texture_limits_x", default_texture_limits[0])
	limits_y = compile_options.get("texture_limits_y", default_texture_limits[1])
	limits_z = compile_options.get("texture_limits_z", default_texture_limits[2])
	texture_limits = (limits_x, limits_y, limits_z)

	default_storage_type = compile_options.get(
	"storage_type_override", VkStorageType.TEXTURE_3D
	)
	default_memory_layout = compile_options.get(
	"memory_layout_override", VkMemoryLayout.TENSOR_WIDTH_PACKED
	)
	downcast_64_bit = compile_options.get("downcast_64_bit", True)
	force_fp16 = compile_options.get("force_fp16", False)

	program = unsafe_remove_auto_functionalized_pass(program)

	# First, apply passes that fuse/remove operators to consolidate the graph
	# structure but still preserve an "ATen-compliant" graph structure (i.e. all
	# arguments to ATen operators must match the ATen function schema).
	program = apply_passes(
	program,
	[
	AddmmToLinearTransform(),
	FuseBatchNormPass(program),
	AddmmToLinearTransform(),
	InsertDtypePromotionPass(),
	FusePatternsPass(),
	FuseClampPass(),
	RemoveRedundantOpsTransform(),
	FuseQuantizedOpsTransform(),
	FoldQDQPass(),
	SqueezeUnsqueezeInputs(),
	FuseViewCopyTransform(),
	ViewCopyToSqueezeUnsqueezePass(),
	],
	)

	# Next annotate tensor nodes with TensorSpec structs which is needed for dynamic
	# shapes and memory planning. Until this point, the graph must be ATen compliant
	# because SpecPropPass will be calling the underlying ATen operators during its
	# execution.
	program = apply_passes(program, [SpecPropPass()])

	# Apply graph transforms which either require `TensorSpec`s to have been created
	# or would create an non ATen compliant graph structure.
	program = apply_passes(
	program,
	[
	RemoveAssertsTransform(),
	insert_prepack_nodes,
	],
	)

	# Optionally apply the memory metadata tagging pass, which will insert storage
	# type and memory layout transition nodes to ensure that all tensor arguments
	# to an operator is in a supported or optimal configuration. If this pass is not
	# applied, there will be a risk that some operators recieve arguments with
	# memory settings that are not supported by the implementation.
	if not compile_options.get("skip_tag_memory_metadata", False):
	program = apply_passes(
	program,
	[
	TagMemoryMetaPass(
	texture_limits,
	default_storage_type=default_storage_type,
	default_memory_layout=default_memory_layout,
	force_fp16=force_fp16,
	),
	],
	)

	# Finally, apply dynamic shape passes and memory planning pass. These passes
	# must be applied only when the graph structure is finalized.
	final_passes = [
	ConstraintBasedSymShapeEvalPass(),
	]
	if not compile_options.get("skip_memory_planning", False):
	greedy_memory_planning = partial(
	greedy, allow_overlapping_allocations=False
	)
	mem_planning_suite = MemoryPlanningAlgorithmSuite(
	algo_list=[greedy_memory_planning]
	)
	# This is a workaround to allow the memory planning pass to work without having
	# to first apply ToOutVarPass(). See the `greedy()` function in
	# `exir.memory_planning`; if this attribute isn't set, assertions in
	# `collect_spec_from_nodes()` will fail.
	program.graph_module.encounter_to_out_var_failure = True
	final_passes.append(
	MemoryPlanningPass(memory_planning_algo=mem_planning_suite)
	)

	program = apply_passes(program, final_passes)

	graph_builder = VkGraphBuilder(
	program,
	DelegateMappingBuilder(generated_identifiers=True),
	downcast_64_bit=downcast_64_bit,
	force_fp16=force_fp16,
	)
	vk_graph = graph_builder.build_graph()

	return PreprocessResult(
	processed_bytes=serialize_vulkan_graph(
	vk_graph, graph_builder.const_tensors, []
	),
	debug_handle_map=graph_builder.delegate_mapping_builder.get_delegate_mapping(),
	data_store_output=graph_builder.named_data_store.get_named_data_store_output(),
	)