backends/vulkan/runtime/VulkanBackend.cpp - platform/external/executorch - Git at Google

 /*
  * Copyright (c) Meta Platforms, Inc. and affiliates.
  * All rights reserved.
  *
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
  */

 #include <executorch/backends/vulkan/runtime/ResolveLayouts.h>
 #include <executorch/backends/vulkan/runtime/VulkanDelegateHeader.h>
 #include <executorch/backends/vulkan/serialization/schema_generated.h>

 #include <executorch/backends/vulkan/runtime/graph/ComputeGraph.h>

 #include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>

 #include <executorch/backends/vulkan/runtime/vk_api/Runtime.h>

 #include <executorch/runtime/backend/interface.h>
 #include <executorch/runtime/core/error.h>
 #include <executorch/runtime/core/evalue.h>
 #ifdef ET_EVENT_TRACER_ENABLED
 #include <executorch/backends/vulkan/runtime/graph/Logging.h>
 #include <executorch/runtime/core/event_tracer_hooks_delegate.h>
 #endif // ET_EVENT_TRACER_ENABLED
 #include <executorch/runtime/core/exec_aten/util/tensor_util.h>
 #include <executorch/runtime/core/named_data_map.h>
 #include <executorch/runtime/platform/compiler.h>
 #include <executorch/runtime/platform/profiler.h>

 #include <cstdio>
 #include <cstdlib> /* strtol */
 #include <cstring>
 #include <memory>
 #include <type_traits>
 #include <unordered_map>
 #include <vector>

 namespace executorch {
 namespace backends {
 namespace vulkan {
 namespace {

 using executorch::runtime::ArrayRef;
 using executorch::runtime::Backend;
 using executorch::runtime::BackendExecutionContext;
 using executorch::runtime::BackendInitContext;
 using executorch::runtime::CompileSpec;
 using executorch::runtime::DelegateHandle;
 using executorch::runtime::Error;
 using executorch::runtime::EValue;
 using executorch::runtime::FreeableBuffer;
 using executorch::runtime::kTensorDimensionLimit;
 using executorch::runtime::NamedDataMap;
 using executorch::runtime::Result;
 using executorch::runtime::Span;

 using namespace vkcompute;

 // Flatbuffer types
 using VkGraphPtr = const vkgraph::VkGraph*;
 using OpCallPtr = const vkgraph::OperatorCall*;
 using VkValuePtr = const vkgraph::VkValue*;
 using VkTensorPtr = const vkgraph::VkTensor*;
 using VkBytesPtr = const vkgraph::VkBytes*;

 // Flatbuffer vector types
 using VkValuesVector =
     const flatbuffers::Vector<flatbuffers::Offset<vkgraph::VkValue>>*;
 using BytesVector =
     const flatbuffers::Vector<flatbuffers::Offset<vkgraph::VkBytes>>*;
 using UIntVector = const flatbuffers::Vector<uint32_t>*;

 vkapi::ScalarType get_scalar_type(const vkgraph::VkDataType& vk_datatype) {
   switch (vk_datatype) {
     case vkgraph::VkDataType::BOOL:
       return vkapi::kBool;
     case vkgraph::VkDataType::UINT8:
       return vkapi::kByte;
     case vkgraph::VkDataType::INT8:
       return vkapi::kChar;
     case vkgraph::VkDataType::INT32:
       return vkapi::kInt;
     case vkgraph::VkDataType::INT64:
       return vkapi::kLong;
     case vkgraph::VkDataType::FLOAT16:
       return vkapi::kHalf;
     case vkgraph::VkDataType::FLOAT32:
       return vkapi::kFloat;
     case vkgraph::VkDataType::FLOAT64:
       return vkapi::kDouble;
     default:
       VK_THROW("Invalid VkDataType type encountered!");
   }
 }

 vkapi::ScalarType equivalent_scalar_type(
     const executorch::runtime::etensor::ScalarType& et_datatype) {
   switch (et_datatype) {
     case executorch::runtime::etensor::ScalarType::Byte:
       return vkapi::kByte;
     case executorch::runtime::etensor::ScalarType::Char:
       return vkapi::kChar;
     case executorch::runtime::etensor::ScalarType::Int:
       return vkapi::kInt;
     case executorch::runtime::etensor::ScalarType::Long:
       return vkapi::kLong;
     case executorch::runtime::etensor::ScalarType::Half:
       return vkapi::kHalf;
     case executorch::runtime::etensor::ScalarType::Float:
       return vkapi::kFloat;
     case executorch::runtime::etensor::ScalarType::Double:
       return vkapi::kDouble;
     case executorch::runtime::etensor::ScalarType::Bool:
       return vkapi::kBool;
     default:
       VK_THROW("Invalid etensor::ScalarType encountered!");
   }
 }

 utils::StorageType get_storage_type(
     const vkgraph::VkStorageType& vk_storage_type) {
   switch (vk_storage_type) {
     case vkgraph::VkStorageType::BUFFER:
       return utils::kBuffer;
     case vkgraph::VkStorageType::TEXTURE_3D:
       return utils::kTexture3D;
     case vkgraph::VkStorageType::TEXTURE_2D:
       return utils::kTexture2D;
     default:
       break;
   }
   VK_THROW("Invalid storage type encountered!");
 }

 utils::GPUMemoryLayout get_memory_layout(
     const vkgraph::VkMemoryLayout& vk_memory_layout) {
   switch (vk_memory_layout) {
     case vkgraph::VkMemoryLayout::TENSOR_WIDTH_PACKED:
       return utils::kWidthPacked;
     case vkgraph::VkMemoryLayout::TENSOR_HEIGHT_PACKED:
       return utils::kHeightPacked;
     case vkgraph::VkMemoryLayout::TENSOR_CHANNELS_PACKED:
       return utils::kChannelsPacked;
     case vkgraph::VkMemoryLayout::PACKED_INT8_4W4C:
       return utils::kPackedInt8_4W4C;
     case vkgraph::VkMemoryLayout::PACKED_INT8_4H4W:
       return utils::kPackedInt8_4H4W;
     case vkgraph::VkMemoryLayout::PACKED_INT8_4W:
       return utils::kPackedInt8_4W;
     case vkgraph::VkMemoryLayout::PACKED_INT8_4C:
       return utils::kPackedInt8_4C;
     case vkgraph::VkMemoryLayout::PACKED_INT8_4C1W:
       return utils::kPackedInt8_4C1W;
     case vkgraph::VkMemoryLayout::PACKED_INT8_CONV2D:
       // Fallback for unresolved dynamic layout
       return utils::kPackedInt8_4C1W;
     default:
       break;
   }
   VK_THROW("Invalid memory layout encountered!");
 }

 GraphConfig get_graph_config(ArrayRef<CompileSpec>& compile_specs) {
   GraphConfig config = GraphConfig();

   for (const CompileSpec& spec : compile_specs) {
     const uint8_t* value_data = (const uint8_t*)spec.value.buffer;
     const size_t value_size = spec.value.nbytes;
     if (strcmp(spec.key, "storage_type_override") == 0) {
       ET_CHECK_MSG(value_size == sizeof(int32_t), "Unexpected value size!");
       int value_as_int = static_cast<int>(getUInt32LE(value_data));
       utils::StorageType storage_type =
           static_cast<utils::StorageType>(value_as_int);

       config.set_storage_type_override(storage_type);
     }
     if (strcmp(spec.key, "memory_layout_override") == 0) {
       ET_CHECK_MSG(value_size == sizeof(uint32_t), "Unexpected value size!");
       uint32_t value_as_int = getUInt32LE(value_data);
       utils::GPUMemoryLayout memory_layout =
           static_cast<utils::GPUMemoryLayout>(value_as_int);

       config.set_memory_layout_override(memory_layout);
     }
     if (strcmp(spec.key, "require_dynamic_shapes") == 0) {
       ET_CHECK_MSG(value_size == sizeof(uint8_t), "Unexpected value size!");
       bool value = getBool(value_data);

       if (value) {
         config.expect_dynamic_shapes = true;
       }
     }
     if (strcmp(spec.key, "warmup_execute_after_compile") == 0) {
       ET_CHECK_MSG(value_size == sizeof(uint8_t), "Unexpected value size!");
       bool value = getBool(value_data);

       config.warmup_execute_after_compile = value;
     }
   }
 #ifdef ET_EVENT_TRACER_ENABLED
   config.enable_querypool = true;
 #endif // ET_EVENT_TRACER_ENABLED
   return config;
 }

 class GraphBuilder {
   ComputeGraph* compute_graph_;
   VkGraphPtr flatbuffer_;
   const uint8_t* constant_data_;
   const NamedDataMap* named_data_map_;
   std::vector<FreeableBuffer> loaded_buffers_from_map_;

   std::vector<ValueRef> ref_mapping_;
   std::unordered_map<uint32_t, vkgraph::VkMemoryLayout>
       memory_layout_overrides_;

  public:
   explicit GraphBuilder(
       ComputeGraph* compute_graph,
       VkGraphPtr flatbuffer,
       const uint8_t* constant_data,
       const NamedDataMap* named_data_map)
       : compute_graph_(compute_graph),
         flatbuffer_(flatbuffer),
         constant_data_(constant_data),
         named_data_map_(named_data_map),
         loaded_buffers_from_map_(),
         ref_mapping_(),
         memory_layout_overrides_() {}

   void resolve_layouts() {
     resolve_memory_layouts(
         flatbuffer_, compute_graph_, memory_layout_overrides_);
   }

   void resize(uint32_t size) {
     ref_mapping_.resize(size, INT32_MAX);
   }

   bool fb_id_exists(const uint32_t fb_id) {
     return fb_id < ref_mapping_.size() && ref_mapping_[fb_id] != INT32_MAX;
   }

   ValueRef get_fb_id_valueref(const uint32_t fb_id) {
     ET_CHECK_MSG(
         fb_id_exists(fb_id),
         "Trying to extract a value that hasn't yet been added to the graph.");

     return ref_mapping_[fb_id];
   }

   utils::GPUMemoryLayout get_resolved_memory_layout(
       const uint32_t fb_id,
       VkTensorPtr tensor_fb,
       const std::vector<int64_t>& dims_vector) {
     auto it = memory_layout_overrides_.find(fb_id);
     if (it != memory_layout_overrides_.end()) {
       return get_memory_layout(it->second);
     }

     if (tensor_fb->memory_layout() == vkgraph::VkMemoryLayout::DEFAULT_LAYOUT) {
       return compute_graph_->suggested_memory_layout(dims_vector);
     }
     return get_memory_layout(tensor_fb->memory_layout());
   }

   void add_tensor_to_graph(const uint32_t fb_id, VkTensorPtr tensor_fb) {
     const vkapi::ScalarType& dtype = get_scalar_type(tensor_fb->datatype());
     utils::StorageType storage_type =
         tensor_fb->storage_type() == vkgraph::VkStorageType::DEFAULT_STORAGE
         ? compute_graph_->suggested_storage_type()
         : get_storage_type(tensor_fb->storage_type());

     UIntVector dims_fb = tensor_fb->dims();
     const std::vector<int64_t> dims_vector(dims_fb->cbegin(), dims_fb->cend());

     utils::GPUMemoryLayout memory_layout =
         get_resolved_memory_layout(fb_id, tensor_fb, dims_vector);

     ValueRef ref;
     if (tensor_fb->constant_id() >= 0) {
       VkBytesPtr constant_bytes =
           flatbuffer_->constants()->Get(tensor_fb->constant_id());

       if (constant_bytes->named_key() != nullptr &&
           constant_bytes->offset() == UINT64_MAX &&
           named_data_map_ != nullptr) {
         const std::string& data_name = constant_bytes->named_key()->str();
         Result<FreeableBuffer> buffer =
             named_data_map_->get_data(data_name.c_str());

         VK_CHECK_COND(
             buffer.ok(),
             "Failed to get constant data for key %s from named_data_map. Error code: %u",
             data_name.c_str(),
             static_cast<uint32_t>(buffer.error()));
         ref = compute_graph_->add_tensorref(
             dims_vector, dtype, std::move(buffer.get()));
       } else {
         const uint8_t* tensor_data = constant_data_ + constant_bytes->offset();
         ref = compute_graph_->add_tensorref(dims_vector, dtype, tensor_data);
       }
     } else {
       ref = compute_graph_->add_tensor(
           dims_vector,
           dtype,
           storage_type,
           memory_layout,
           tensor_fb->mem_obj_id());
     }

     ref_mapping_[fb_id] = ref;
   }

   void add_none_to_graph(const uint32_t fb_id) {
     ValueRef ref = compute_graph_->add_none();
     ref_mapping_[fb_id] = ref;
   }

   template <typename T>
   typename std::enable_if<is_valid_scalar_type<T>::value, void>::type
   add_scalar_to_graph(const uint32_t fb_id, T value) {
     ValueRef ref = compute_graph_->add_scalar(value);
     ref_mapping_[fb_id] = ref;
   }

   template <typename T>
   typename std::enable_if<is_valid_scalar_type<T>::value, void>::type
   add_scalar_list_to_graph(const uint32_t fb_id, std::vector<T>&& value) {
     ValueRef ref = compute_graph_->add_scalar_list(std::move(value));
     ref_mapping_[fb_id] = ref;
   }

   void add_value_list_to_graph(
       const uint32_t fb_id,
       std::vector<ValueRef>&& value) {
     ValueRef ref = compute_graph_->add_value_list(std::move(value));
     ref_mapping_[fb_id] = ref;
   }

   void add_string_to_graph(const uint32_t fb_id, VkValuePtr value) {
     const auto fb_str = value->value_as_String()->string_val();
     std::string string(fb_str->cbegin(), fb_str->cend());
     ValueRef ref = compute_graph_->add_string(std::move(string));
     ref_mapping_[fb_id] = ref;
   }

   void add_symint_to_graph(const uint32_t fb_id, VkValuePtr value) {
     const int32_t fb_symint = value->value_as_SymInt()->value();
     ValueRef ref = compute_graph_->add_symint(fb_symint);
     ref_mapping_[fb_id] = ref;
   }

   void add_value_to_graph(const uint32_t fb_id, VkValuePtr value) {
     ET_CHECK_MSG(
         !fb_id_exists(fb_id),
         "Trying to add a value that has already been added to the graph.");

     switch (value->value_type()) {
       case vkgraph::GraphTypes::Null:
         add_none_to_graph(fb_id);
         break;
       case vkgraph::GraphTypes::Int:
         add_scalar_to_graph(fb_id, value->value_as_Int()->int_val());
         break;
       case vkgraph::GraphTypes::Double:
         add_scalar_to_graph(fb_id, value->value_as_Double()->double_val());
         break;
       case vkgraph::GraphTypes::Bool:
         add_scalar_to_graph(fb_id, value->value_as_Bool()->bool_val());
         break;
       case vkgraph::GraphTypes::VkTensor:
         add_tensor_to_graph(fb_id, value->value_as_VkTensor());
         break;
       case vkgraph::GraphTypes::IntList:
         add_scalar_list_to_graph(
             fb_id,
             std::vector<int64_t>(
                 value->value_as_IntList()->items()->cbegin(),
                 value->value_as_IntList()->items()->cend()));
         break;
       case vkgraph::GraphTypes::DoubleList:
         add_scalar_list_to_graph(
             fb_id,
             std::vector<double>(
                 value->value_as_DoubleList()->items()->cbegin(),
                 value->value_as_DoubleList()->items()->cend()));
         break;
       case vkgraph::GraphTypes::BoolList:
         add_scalar_list_to_graph(
             fb_id,
             std::vector<bool>(
                 value->value_as_BoolList()->items()->cbegin(),
                 value->value_as_BoolList()->items()->cend()));
         break;
       case vkgraph::GraphTypes::ValueList:
         add_value_list_to_graph(
             fb_id,
             std::vector<ValueRef>(
                 value->value_as_ValueList()->items()->cbegin(),
                 value->value_as_ValueList()->items()->cend()));
         break;
       case vkgraph::GraphTypes::String:
         add_string_to_graph(fb_id, value);
         break;
       case vkgraph::GraphTypes::SymInt:
         add_symint_to_graph(fb_id, value);
         break;
       default:
         ET_CHECK_MSG(false, "Unsupported value type.");
     }
   }

   vkapi::ScalarType get_staging_scalar_type_of(const uint32_t fb_id) {
     VkTensorPtr tensor_fb =
         flatbuffer_->values()->Get(fb_id)->value_as_VkTensor();
     if (tensor_fb->staging_datatype() == vkgraph::VkDataType::UNSET) {
       return get_scalar_type(tensor_fb->datatype());
     }
     return get_scalar_type(tensor_fb->staging_datatype());
   }

   void build_graph() {
     // Resize the mapping to the number of values in the flatbuffer
     resize(flatbuffer_->values()->size());

     // First, add all values to the graph
     for (uint32_t fb_id = 0; fb_id < flatbuffer_->values()->size(); ++fb_id) {
       VkValuePtr value = flatbuffer_->values()->Get(fb_id);
       add_value_to_graph(fb_id, value);
     }

     // Parse the inputs, which will be tensors most of the time but can also be
     // symints and tensorrefs (which will be the case if the original graph had)
     // mutable buffers.
     for (const uint32_t fb_id : *flatbuffer_->input_ids()) {
       const ValueRef ref = get_fb_id_valueref(fb_id);
       if (compute_graph_->val_is_tensor(ref)) {
         compute_graph_->set_input_tensor(
             ref, get_staging_scalar_type_of(fb_id));
       } else {
         compute_graph_->set_val_as_input(ref);
       }
     }

     // Parse the operators
     for (OpCallPtr op_call : *(flatbuffer_->chain())) {
       std::string op_name = op_call->name()->str();
       ET_CHECK_MSG(VK_HAS_OP(op_name), "Missing operator: %s", op_name.c_str());

       std::vector<ValueRef> args;
       args.reserve(op_call->args()->size());
       for (const auto arg_fb_id : *op_call->args()) {
         args.push_back(get_fb_id_valueref(static_cast<int>(arg_fb_id)));
       }

 #ifdef ET_EVENT_TRACER_ENABLED
       std::string operator_json =
           make_operator_json(compute_graph_, op_name, args);
       set_and_get_current_operator_json(operator_json);
       get_current_operator_count(true);
 #endif // ET_EVENT_TRACER_ENABLED

       auto vkFn = VK_GET_OP_FN(op_name);
       vkFn(*compute_graph_, args);
     }

     // Parse the outputs, which will be mostly tensors but may contain tensorref
     // values as well if the source graph returns parameter nodes.
     for (const uint32_t fb_id : *flatbuffer_->output_ids()) {
       const ValueRef ref = get_fb_id_valueref(fb_id);
       if (compute_graph_->val_is_tensor(ref)) {
 #ifdef ET_EVENT_TRACER_ENABLED
         get_current_operator_count(true);
 #endif // ET_EVENT_TRACER_ENABLED
         compute_graph_->set_output_tensor(
             ref, get_staging_scalar_type_of(fb_id));
       } else {
         compute_graph_->set_output_value(ref);
       }
     }

     if (compute_graph_->graphconfig().enable_querypool) {
       for (uint32_t i = 0; i < compute_graph_->prepack_nodes().size(); ++i) {
         compute_graph_->prepack_nodes()[i]->set_node_id(i);
       }
       for (uint32_t i = 0; i < compute_graph_->execute_nodes().size(); ++i) {
         compute_graph_->execute_nodes()[i]->set_node_id(i);
       }
     }
   }
 };

 //
 // Execution tools
 //

 bool maybe_resize_input(
     ComputeGraph* graph,
     const size_t input_i,
     executorch::aten::Tensor& et_tensor) {
   ValueRef in_tensor_ref = graph->inputs()[input_i].value;

   const std::vector<int64_t> in_tensor_vk_sizes =
       graph->sizes_of(in_tensor_ref);

   ET_CHECK_MSG(
       et_tensor.dim() == in_tensor_vk_sizes.size(),
       "Cannot resize input tensor: old ndim %zu does not match new ndim %zu",
       static_cast<size_t>(in_tensor_vk_sizes.size()),
       static_cast<size_t>(et_tensor.dim()));

   bool should_resize = false;
   std::vector<int64_t> new_sizes(et_tensor.dim());
   for (size_t i = 0; i < et_tensor.dim(); i++) {
     if (in_tensor_vk_sizes[i] != et_tensor.sizes()[i]) {
       should_resize = true;
     }
     new_sizes.at(i) = et_tensor.sizes()[i];
   }

   if (should_resize) {
     graph->resize_input(input_i, new_sizes);
   }

   const size_t in_tensor_vk_numel = graph->numel_of(in_tensor_ref);
   ET_CHECK_MSG(
       in_tensor_vk_numel == et_tensor.numel(),
       "Vulkan tensor numel %zu does not match ET tensor numel %zu",
       static_cast<size_t>(in_tensor_vk_numel),
       static_cast<size_t>(et_tensor.numel()));

   return should_resize;
 }

 bool maybe_update_scalar_tensor(
     ComputeGraph* graph,
     const ValueRef ref,
     executorch::aten::Tensor& scalar_tensor_src) {
   const int32_t cur_val = graph->read_symint(ref);
   int32_t scalar_tensor_val = 0;
   executorch::aten::ScalarType dtype = scalar_tensor_src.scalar_type();
   if (dtype == executorch::aten::ScalarType::Int) {
     scalar_tensor_val = *scalar_tensor_src.const_data_ptr<int32_t>();
   } else if (dtype == executorch::aten::ScalarType::Long) {
     scalar_tensor_val = int32_t(*scalar_tensor_src.const_data_ptr<int64_t>());
   }
   bool was_updated = false;
   if (scalar_tensor_val != cur_val) {
     graph->set_symint(ref, scalar_tensor_val);
     was_updated = true;
   }
   return was_updated;
 }

 void maybe_resize_output(
     ComputeGraph* graph,
     const size_t output_i,
     executorch::aten::Tensor& et_tensor) {
   ValueRef out_tensor_ref = graph->outputs()[output_i].value;

   const std::vector<int64_t> out_tensor_vk_sizes =
       graph->sizes_of(out_tensor_ref);

   executorch::aten::SizesType new_output_size[kTensorDimensionLimit];
   size_t ndim = out_tensor_vk_sizes.size();
   for (int i = 0; i < ndim; ++i) {
     new_output_size[i] = out_tensor_vk_sizes[i];
   }

   executorch::aten::ArrayRef<executorch::aten::SizesType> output_size{
       new_output_size, ndim};
   Error err = resize_tensor(et_tensor, output_size);

   ET_CHECK_MSG(err == Error::Ok, "Failed to resize output tensor.");
 }

 //
 // VulkanBackend class
 //

 class VulkanBackend final : public ::executorch::runtime::BackendInterface {
  public:
   ~VulkanBackend() override = default;

   bool is_available() const override {
     // TODO(ssjia): replace with an actual Vulkan runtime availability check
     return true;
   }

   ET_NODISCARD Error compileModel(
       const void* buffer_pointer,
       ComputeGraph* compute_graph,
       const NamedDataMap* named_data_map) const {
     Result<VulkanDelegateHeader> header =
         VulkanDelegateHeader::parse(buffer_pointer);

     const uint8_t* flatbuffer_data = nullptr;
     const uint8_t* constant_data = nullptr;

     if (header.ok()) {
       const uint8_t* buffer_start =
           reinterpret_cast<const uint8_t*>(buffer_pointer);
       flatbuffer_data = buffer_start + header->flatbuffer_offset;
       constant_data = buffer_start + header->bytes_offset;
     } else {
       ET_LOG(Error, "VulkanDelegateHeader may be corrupt");
       return header.error();
     }

     ET_CHECK_OR_RETURN_ERROR(
         vkgraph::VkGraphBufferHasIdentifier(flatbuffer_data),
         DelegateInvalidCompatibility,
         "Vulkan Delegate Serialization Format version identifier '%.4s' != expected '%.4s'",
         flatbuffers::GetBufferIdentifier(flatbuffer_data),
         vkgraph::VkGraphIdentifier());

     VkGraphPtr flatbuffer_graph = vkgraph::GetVkGraph(flatbuffer_data);

     GraphBuilder builder(
         compute_graph, flatbuffer_graph, constant_data, named_data_map);

     builder.resolve_layouts();
     builder.build_graph();

     compute_graph->prepare();
     compute_graph->prepare_pipelines();

     compute_graph->prepack();

     compute_graph->optional_warmup_execute();

     return Error::Ok;
   }

   Result<DelegateHandle*> init(
       BackendInitContext& context,
       FreeableBuffer* processed,
       ArrayRef<CompileSpec> compile_specs) const override {
     ComputeGraph* compute_graph =
         context.get_runtime_allocator()->allocateInstance<ComputeGraph>();
     if (compute_graph == nullptr) {
       return Error::MemoryAllocationFailed;
     }

     GraphConfig graph_config = get_graph_config(compile_specs);
     graph_config.external_adapter = vkapi::set_and_get_external_adapter();
     new (compute_graph) ComputeGraph(graph_config);

     const NamedDataMap* named_data_map = context.get_named_data_map();
     Error err = compileModel(processed->data(), compute_graph, named_data_map);

     // This backend does not need its processed data after compiling the
     // model.
     processed->Free();

     if (err != Error::Ok) {
       return err;
     }

     return compute_graph;
   }

   Error execute(
       ET_UNUSED BackendExecutionContext& context,
       DelegateHandle* handle,
       Span<EValue*> args) const override {
     EXECUTORCH_SCOPE_PROF("VulkanBackend::execute");

     ComputeGraph* compute_graph = static_cast<ComputeGraph*>(handle);

     const size_t num_inputs = compute_graph->inputs().size();
     const size_t num_outputs = compute_graph->outputs().size();
     bool should_propagate_resize = false;
 #ifdef ET_EVENT_TRACER_ENABLED
     runtime::EventTracer* event_tracer = context.event_tracer();
     runtime::EventTracerEntry overall_event_tracer_entry =
         event_tracer_start_profiling_delegate(
             event_tracer,
             "ETVK_EXECUTE",
             /* delegate_debug_id = */ -1);
 #endif // ET_EVENT_TRACER_ENABLED
 #ifdef ET_EVENT_TRACER_ENABLED
     runtime::EventTracerEntry copy_inputs_event_tracer_entry =
         event_tracer_start_profiling_delegate(
             event_tracer,
             "ETVK_COPY_INPUTS",
             /* delegate_debug_id = */ -1);
 #endif // ET_EVENT_TRACER_ENABLED
     for (size_t i = 0; i < num_inputs; i++) {
       const ValueRef iref = compute_graph->inputs()[i].value;
       if (compute_graph->val_is_tensor(iref)) {
         VK_CHECK_COND(args[i]->isTensor());
         bool was_resized =
             maybe_resize_input(compute_graph, i, args[i]->toTensor());
         should_propagate_resize = should_propagate_resize || was_resized;
         compute_graph->maybe_cast_and_copy_into_staging(
             compute_graph->inputs()[i].staging,
             args[i]->toTensor().const_data_ptr(),
             args[i]->toTensor().numel(),
             equivalent_scalar_type(args[i]->toTensor().scalar_type()));
       } else if (compute_graph->val_is_symint(iref)) {
         VK_CHECK_COND(
             args[i]->isTensor(),
             "Cannot handle symint arg to graph that is not derived from a "
             "scalar tensor at the moment.");
         bool was_updated = maybe_update_scalar_tensor(
             compute_graph, iref, args[i]->toTensor());
         // Since symint inputs may impact tensor's sizes, trigger a resize if
         // any symbolic integer shapes are updated.
         should_propagate_resize = should_propagate_resize || was_updated;
       } else {
         VK_THROW(
             "Could not handle input with type ",
             compute_graph->get_val_type(iref));
       }
     }
 #ifdef ET_EVENT_TRACER_ENABLED
     event_tracer_end_profiling_delegate(
         event_tracer, copy_inputs_event_tracer_entry);
 #endif // ET_EVENT_TRACER_ENABLED

     if (should_propagate_resize || compute_graph->has_data_dependent_shapes()) {
 #ifdef ET_EVENT_TRACER_ENABLED
       runtime::EventTracerEntry resize_event_tracer_entry =
           event_tracer_start_profiling_delegate(
               event_tracer,
               "ETVK_RESIZE",
               /* delegate_debug_id = */ -1);
 #endif // ET_EVENT_TRACER_ENABLED
       compute_graph->propagate_resize();
 #ifdef ET_EVENT_TRACER_ENABLED
       event_tracer_end_profiling_delegate(
           event_tracer, resize_event_tracer_entry);
 #endif // ET_EVENT_TRACER_ENABLED
     }

 #ifdef ET_EVENT_TRACER_ENABLED
     runtime::EventTracerEntry execute_event_tracer_entry =
         event_tracer_start_profiling_delegate(
             event_tracer,
             "ETVK_COMPUTE_GRAPH_EXECUTE",
             /* delegate_debug_id = */ -1);
 #endif // ET_EVENT_TRACER_ENABLED
     compute_graph->execute();
 #ifdef ET_EVENT_TRACER_ENABLED
     event_tracer_end_profiling_delegate(
         event_tracer, execute_event_tracer_entry);
 #endif // ET_EVENT_TRACER_ENABLED

 #ifdef ET_EVENT_TRACER_ENABLED
     compute_graph->context()->querypool().extract_results();
     for (const auto& r :
          compute_graph->context()->querypool().get_shader_timestamp_data()) {
       std::string event_name = "{" + r.kernel_name +
           ", \"dispatch_id\": " + std::to_string(r.dispatch_id) + "}";
       event_tracer_log_profiling_delegate(
           event_tracer,
           event_name.c_str(),
           /* delegate_debug_id = */ -1,
           r.start_time_ns,
           r.end_time_ns);
     }
 #endif // ET_EVENT_TRACER_ENABLED

 #ifdef ET_EVENT_TRACER_ENABLED
     runtime::EventTracerEntry copy_outputs_event_tracer_entry =
         event_tracer_start_profiling_delegate(
             event_tracer,
             "ETVK_COPY_OUTPUTS",
             /* delegate_debug_id = */ -1);
 #endif // ET_EVENT_TRACER_ENABLED
     const size_t output_offset = args.size() - num_outputs;
     for (size_t i = 0; i < num_outputs; i++) {
       const size_t o = output_offset + i;
       const ValueRef oref = compute_graph->outputs()[i].value;
       if (compute_graph->val_is_tensor(oref)) {
         VK_CHECK_COND(args[o]->isTensor());
         maybe_resize_output(compute_graph, i, args[o]->toTensor());
         compute_graph->maybe_cast_and_copy_from_staging(
             compute_graph->outputs()[i].staging,
             args[o]->toTensor().mutable_data_ptr(),
             args[o]->toTensor().numel(),
             equivalent_scalar_type(args[o]->toTensor().scalar_type()));
       }
       // TensorRef values represent constant tensors which will not have been
       // modified by the graph execution. Therefore, if a constant tensor is
       // returned as an output, no action is required.
       else if (compute_graph->val_is_tref(oref)) {
         continue;
       } else {
         VK_THROW(
             "Could not handle output with type ",
             compute_graph->get_val_type(oref));
       }
     }
 #ifdef ET_EVENT_TRACER_ENABLED
     event_tracer_end_profiling_delegate(
         event_tracer, copy_outputs_event_tracer_entry);
 #endif // ET_EVENT_TRACER_ENABLED

 #ifdef ET_EVENT_TRACER_ENABLED
     event_tracer_end_profiling_delegate(
         event_tracer, overall_event_tracer_entry);
 #endif // ET_EVENT_TRACER_ENABLED

     return Error::Ok;
   }

   void destroy(DelegateHandle* handle) const override {
     if (handle != nullptr) {
       ComputeGraph* compute_graph = static_cast<ComputeGraph*>(handle);
       compute_graph->context()
           ->adapter_ptr()
           ->compute_pipeline_cache()
           .save_cache();
       // ComputeGraph is not trivially destructible. Since
       // this was constructed manually in init(), we must destroy it manually
       // here.
       compute_graph->~ComputeGraph();
     }
   }
 };

 auto cls = VulkanBackend();
 Backend backend{"VulkanBackend", &cls};
 static auto success_with_compiler = register_backend(backend);

 } // namespace
 } // namespace vulkan
 } // namespace backends
 } // namespace executorch