| /* |
| * Copyright (c) Meta Platforms, Inc. and affiliates. |
| * All rights reserved. |
| * |
| * This source code is licensed under the BSD-style license found in the |
| * LICENSE file in the root directory of this source tree. |
| */ |
| |
| // @lint-ignore-every CLANGTIDY |
| // facebook-security-vulnerable-integer-sign-conversion |
| |
| #include <executorch/backends/vulkan/runtime/graph/ComputeGraph.h> |
| |
| #include <executorch/backends/vulkan/runtime/graph/ops/impl/Staging.h> |
| |
| #include <executorch/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h> |
| |
| namespace vkcompute { |
| |
| // |
| // VTensorPtr |
| // |
| |
| #define VALUE_PTR_CLASS_IMPL(classname, ctype, type_name) \ |
| classname::classname(ComputeGraph* const graph, const ValueRef idx) \ |
| : graph_(graph), ptr_(&(graph_->values_.at(idx).to##type_name())) { \ |
| graph_->values_in_use_++; \ |
| } \ |
| ctype* classname::operator->() const { \ |
| return ptr_; \ |
| } \ |
| ctype& classname::operator*() const { \ |
| return *ptr_; \ |
| } \ |
| classname::~classname() { \ |
| graph_->values_in_use_--; \ |
| } |
| |
| VALUE_PTR_CLASS_IMPL(vTensorPtr, api::vTensor, Tensor) |
| VALUE_PTR_CLASS_IMPL(TensorRefPtr, TensorRef, TensorRef) |
| VALUE_PTR_CLASS_IMPL(StagingPtr, api::StorageBuffer, Staging) |
| VALUE_PTR_CLASS_IMPL(IntListPtr, std::vector<int64_t>, IntList) |
| VALUE_PTR_CLASS_IMPL(DoubleListPtr, std::vector<double>, DoubleList) |
| VALUE_PTR_CLASS_IMPL(BoolListPtr, std::vector<bool>, BoolList) |
| VALUE_PTR_CLASS_IMPL(ValueListPtr, std::vector<ValueRef>, ValueList) |
| |
| #undef VALUE_PTR_CLASS_IMPL |
| |
| // |
| // ComputeGraph |
| // |
| |
| ComputeGraph::ComputeGraph(GraphConfig config) |
| : config_{config}, |
| prepack_descriptor_counts_{}, |
| execute_descriptor_counts_{}, |
| context_{new api::Context( |
| vkapi::runtime()->default_adapter_i(), |
| config_.context_config)}, |
| shared_objects_{}, |
| values_{}, |
| param_ubos_{}, |
| prepack_nodes_{}, |
| execute_nodes_{}, |
| inputs_{}, |
| outputs_{} { |
| // Ensure that descriptor counts are initialized to 0 |
| prepack_descriptor_counts_.descriptor_pool_max_sets = 0; |
| prepack_descriptor_counts_.descriptor_uniform_buffer_count = 0; |
| prepack_descriptor_counts_.descriptor_storage_buffer_count = 0; |
| prepack_descriptor_counts_.descriptor_combined_sampler_count = 0; |
| prepack_descriptor_counts_.descriptor_storage_image_count = 0; |
| |
| execute_descriptor_counts_.descriptor_pool_max_sets = 0; |
| execute_descriptor_counts_.descriptor_uniform_buffer_count = 0; |
| execute_descriptor_counts_.descriptor_storage_buffer_count = 0; |
| execute_descriptor_counts_.descriptor_combined_sampler_count = 0; |
| execute_descriptor_counts_.descriptor_storage_image_count = 0; |
| |
| context_->set_cmd(/*reusable = */ true); |
| } |
| |
| ComputeGraph::~ComputeGraph() { |
| values_.clear(); |
| |
| prepack_nodes_.clear(); |
| execute_nodes_.clear(); |
| |
| context_->flush(); |
| } |
| |
| utils::StorageType ComputeGraph::suggested_storage_type() { |
| if (config_.enable_storage_type_override) { |
| return config_.storage_type_override; |
| } |
| return utils::kTexture3D; |
| } |
| |
| utils::GPUMemoryLayout ComputeGraph::suggested_memory_layout( |
| const std::vector<int64_t>& sizes) { |
| if (config_.enable_memory_layout_override) { |
| return config_.memory_layout_override; |
| } |
| if (sizes.size() < 3) { |
| return utils::kWidthPacked; |
| } |
| // For 3 dimensional tensors that only have a channels dimension of 1, still |
| // prefer width packed. |
| if (utils::val_at(-3, sizes) == 1) { |
| return utils::kWidthPacked; |
| } |
| return utils::kChannelsPacked; |
| } |
| |
| void ComputeGraph::check_no_active_value_ptrs() { |
| VK_CHECK_COND( |
| values_in_use_ == 0, |
| "Make sure that there are no pointers stored from the return values of " |
| "`ComputeGraph::get_*()` functions in scope before adding Values to the " |
| "graph. Modifying the graph's values may cause existing pointers to be " |
| "invalidated."); |
| } |
| |
| std::vector<int64_t> ComputeGraph::sizes_of(const ValueRef idx) const { |
| const Value& val = values_.at(idx); |
| if (val.isTensor()) { |
| return val.toConstTensor().sizes(); |
| } else if (val.isTensorRef()) { |
| return val.toConstTensorRef().sizes; |
| } |
| VK_THROW("Could not get sizes of value with type ", val.type()); |
| } |
| |
| vkapi::ScalarType ComputeGraph::dtype_of(const ValueRef idx) const { |
| const Value& val = values_.at(idx); |
| if (val.isTensor()) { |
| return val.toConstTensor().dtype(); |
| } else if (val.isTensorRef()) { |
| return val.toConstTensorRef().dtype; |
| } |
| VK_THROW("Could not get dtype of value with type ", val.type()); |
| } |
| |
| ValueRef ComputeGraph::add_tensor( |
| const std::vector<int64_t>& sizes, |
| const vkapi::ScalarType dtype, |
| const utils::StorageType storage_type, |
| const utils::GPUMemoryLayout memory_layout, |
| const int64_t shared_object_idx) { |
| bool allocate_memory = shared_object_idx < 0; |
| |
| ValueRef idx(static_cast<int>(values_.size())); |
| check_no_active_value_ptrs(); |
| values_.emplace_back(api::vTensor( |
| context(), sizes, dtype, storage_type, memory_layout, allocate_memory)); |
| |
| if (!allocate_memory) { |
| get_shared_object(shared_object_idx).add_user(this, idx); |
| } |
| return idx; |
| } |
| |
| ValueRef ComputeGraph::add_tensor( |
| const std::vector<int64_t>& sizes, |
| const vkapi::ScalarType dtype, |
| const utils::StorageType storage_type, |
| const int64_t shared_object_idx) { |
| return add_tensor( |
| sizes, |
| dtype, |
| storage_type, |
| suggested_memory_layout(sizes), |
| shared_object_idx); |
| } |
| |
| ValueRef ComputeGraph::add_tensor( |
| const std::vector<int64_t>& sizes, |
| const vkapi::ScalarType dtype, |
| const utils::GPUMemoryLayout memory_layout, |
| const int64_t shared_object_idx) { |
| return add_tensor( |
| sizes, dtype, suggested_storage_type(), memory_layout, shared_object_idx); |
| } |
| |
| ValueRef ComputeGraph::add_tensor_like( |
| const ValueRef idx, |
| const utils::StorageType storage_type, |
| const utils::GPUMemoryLayout memory_layout) { |
| return add_tensor(sizes_of(idx), dtype_of(idx), storage_type, memory_layout); |
| } |
| |
| ValueRef ComputeGraph::add_tensor_like( |
| const ValueRef idx, |
| const utils::GPUMemoryLayout memory_layout) { |
| return add_tensor(sizes_of(idx), dtype_of(idx), memory_layout); |
| } |
| |
| ValueRef ComputeGraph::add_tensor( |
| const std::vector<int64_t>& sizes, |
| const vkapi::ScalarType dtype, |
| const int64_t shared_object_idx) { |
| return add_tensor( |
| sizes, dtype, suggested_memory_layout(sizes), shared_object_idx); |
| } |
| |
| ValueRef ComputeGraph::add_tensorref( |
| const std::vector<int64_t>& sizes, |
| const vkapi::ScalarType dtype, |
| const void* const data) { |
| ValueRef idx(static_cast<int>(values_.size())); |
| check_no_active_value_ptrs(); |
| values_.emplace_back(TensorRef(sizes, dtype, data)); |
| return idx; |
| } |
| |
| ValueRef ComputeGraph::add_staging( |
| const vkapi::ScalarType dtype, |
| const size_t numel) { |
| ValueRef idx(static_cast<int>(values_.size())); |
| check_no_active_value_ptrs(); |
| values_.emplace_back(api::StorageBuffer(context(), dtype, numel)); |
| return idx; |
| } |
| |
| ValueRef ComputeGraph::add_none() { |
| ValueRef idx(static_cast<int>(values_.size())); |
| check_no_active_value_ptrs(); |
| values_.emplace_back(); |
| return idx; |
| } |
| |
| ValueRef ComputeGraph::add_value_list(std::vector<ValueRef>&& value) { |
| ValueRef idx(static_cast<int>(values_.size())); |
| check_no_active_value_ptrs(); |
| values_.emplace_back(std::move(value)); |
| return idx; |
| } |
| |
| ValueRef ComputeGraph::add_string(std::string&& str) { |
| ValueRef idx(static_cast<int>(values_.size())); |
| check_no_active_value_ptrs(); |
| values_.emplace_back(std::move(str)); |
| return idx; |
| } |
| |
| ValueRef ComputeGraph::set_input_tensor( |
| const ValueRef idx, |
| const bool use_staging) { |
| if (use_staging) { |
| vkapi::ScalarType dtype = get_tensor(idx)->dtype(); |
| size_t gpu_numel = get_tensor(idx)->gpu_numel(); |
| ValueRef staging_idx = add_staging(dtype, gpu_numel); |
| add_staging_to_tensor_node(*this, staging_idx, idx); |
| inputs_.push_back({idx, staging_idx}); |
| return staging_idx; |
| } |
| inputs_.push_back({idx, kDummyValueRef}); |
| return idx; |
| } |
| |
| ValueRef ComputeGraph::set_output_tensor( |
| const ValueRef idx, |
| const bool use_staging) { |
| if (use_staging) { |
| vkapi::ScalarType dtype = get_tensor(idx)->dtype(); |
| size_t gpu_numel = get_tensor(idx)->gpu_numel(); |
| ValueRef staging_idx = add_staging(dtype, gpu_numel); |
| // We only run this when the tensor is non-empty. When the underlying |
| // tensor is empty (e.g. gpu_numel == 0), we do not allocate a VkImage to |
| // tensor, we will not be able to bind the node for execution. |
| if (gpu_numel > 0) { |
| add_tensor_to_staging_node(*this, idx, staging_idx); |
| } |
| outputs_.push_back({idx, staging_idx}); |
| return staging_idx; |
| } |
| outputs_.push_back({idx, kDummyValueRef}); |
| return idx; |
| } |
| |
| SharedObject& ComputeGraph::get_shared_object(const int64_t idx) { |
| if (idx >= shared_objects_.size()) { |
| shared_objects_.resize(static_cast<size_t>(idx + 1)); |
| } |
| return shared_objects_.at(idx); |
| } |
| |
| void ComputeGraph::update_descriptor_counts( |
| const vkapi::ShaderInfo& shader_info, |
| bool execute) { |
| vkapi::DescriptorPoolConfig* config = |
| execute ? &execute_descriptor_counts_ : &prepack_descriptor_counts_; |
| |
| config->descriptor_pool_max_sets += 1; |
| for (const VkDescriptorType arg_type : shader_info.kernel_layout) { |
| switch (arg_type) { |
| case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: |
| config->descriptor_uniform_buffer_count += 1; |
| break; |
| case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: |
| config->descriptor_storage_buffer_count += 1; |
| break; |
| case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: |
| config->descriptor_combined_sampler_count += 1; |
| break; |
| case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: |
| config->descriptor_storage_image_count += 1; |
| break; |
| default: |
| VK_THROW("Unsupported descriptor type!"); |
| } |
| } |
| } |
| |
| utils::uvec3 ComputeGraph::create_global_wg_size(const ValueRef idx) { |
| if (is_buffer_storage(idx)) { |
| return {uint32_t(texel_numel_of(idx)), 1u, 1u}; |
| } |
| return image_extents_of(idx); |
| } |
| |
| utils::uvec3 ComputeGraph::create_local_wg_size(const ValueRef idx) { |
| if (config_.enable_local_wg_size_override) { |
| return config_.local_wg_size_override; |
| } |
| |
| if (is_buffer_storage(idx)) { |
| return {64u, 1u, 1u}; |
| } |
| |
| const utils::uvec3 image_extents = image_extents_of(idx); |
| utils::uvec3 local_group_size = {4, 4, 4}; |
| |
| if (image_extents.data[2u] == 1) { |
| if (image_extents.data[1u] == 1) { |
| local_group_size.data[0u] = 64; |
| local_group_size.data[1u] = 1; |
| local_group_size.data[2u] = 1; |
| } else if (image_extents.data[1u] < 8) { |
| local_group_size.data[0u] = 16; |
| local_group_size.data[1u] = 4; |
| local_group_size.data[2u] = 1; |
| } else { |
| local_group_size.data[0u] = 8; |
| local_group_size.data[1u] = 8; |
| local_group_size.data[2u] = 1; |
| } |
| } |
| return local_group_size; |
| } |
| |
| void ComputeGraph::copy_into_staging( |
| const ValueRef idx, |
| const void* data, |
| const size_t numel) { |
| StagingPtr staging = get_staging(idx); |
| size_t nbytes = numel * vkapi::element_size(staging->dtype()); |
| copy_ptr_to_staging(data, *staging, nbytes); |
| } |
| |
| void ComputeGraph::copy_from_staging( |
| const ValueRef idx, |
| void* data, |
| const size_t numel) { |
| StagingPtr staging = get_staging(idx); |
| size_t nbytes = numel * vkapi::element_size(staging->dtype()); |
| copy_staging_to_ptr(*staging, data, nbytes); |
| } |
| |
| void ComputeGraph::prepare() { |
| #define MERGE_FIELD(field) \ |
| static_cast<uint32_t>(std::ceil( \ |
| std::max( \ |
| execute_descriptor_counts_.field, \ |
| prepack_descriptor_counts_.field) * \ |
| config_.descriptor_pool_safety_factor)) |
| |
| uint32_t max_sets = MERGE_FIELD(descriptor_pool_max_sets); |
| vkapi::DescriptorPoolConfig config{ |
| max_sets, |
| std::max(MERGE_FIELD(descriptor_uniform_buffer_count), max_sets), |
| std::max(MERGE_FIELD(descriptor_storage_buffer_count), max_sets), |
| std::max(MERGE_FIELD(descriptor_combined_sampler_count), max_sets), |
| std::max(MERGE_FIELD(descriptor_storage_image_count), max_sets), |
| 1u, |
| }; |
| |
| if (!context_->descriptor_pool()) { |
| context_->descriptor_pool().init(config); |
| } |
| #undef MERGE_FIELD |
| |
| if (config_.enable_querypool) { |
| context_->initialize_querypool(); |
| } |
| } |
| |
| void ComputeGraph::encode_prepack() { |
| for (std::unique_ptr<PrepackNode>& node : prepack_nodes_) { |
| node->encode(this); |
| } |
| } |
| |
| void ComputeGraph::prepack() const { |
| // Submit and execute the command buffer |
| vkapi::VulkanFence fence = context_->fences().get_fence(); |
| context_->submit_cmd_to_gpu(fence.get_submit_handle(), /*final_use = */ true); |
| fence.wait(); |
| |
| context_->flush(); |
| } |
| |
| void ComputeGraph::encode_execute() { |
| context_->flush(); |
| context_->set_cmd(/*reusable = */ true); |
| |
| context_->cmd_reset_querypool(); |
| |
| for (SharedObject& shared_object : shared_objects_) { |
| shared_object.allocate(this); |
| shared_object.bind_users(this); |
| } |
| |
| for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) { |
| node->encode(this); |
| } |
| } |
| |
| void ComputeGraph::execute() const { |
| vkapi::VulkanFence fence = context_->fences().get_fence(); |
| context_->submit_cmd_to_gpu(fence.get_submit_handle()); |
| fence.wait(); |
| } |
| |
| void ComputeGraph::resize_input( |
| const int64_t idx, |
| const std::vector<int64_t>& new_sizes) { |
| IOValueRef io_val = inputs_.at(idx); |
| get_tensor(io_val.value)->virtual_resize(new_sizes); |
| } |
| |
| void ComputeGraph::propagate_resize() { |
| for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) { |
| node->trigger_resize(this); |
| } |
| } |
| |
| } // namespace vkcompute |