blob: 0deea81a7f366e40c017470ac688b9048823e7d6 [file]
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
// @lint-ignore-every CLANGTIDY facebook-hte-BadMemberName
#include <executorch/backends/vulkan/runtime/vk_api/vk_api.h>
#include <executorch/backends/vulkan/runtime/vk_api/Device.h>
#include <executorch/backends/vulkan/runtime/vk_api/Pipeline.h>
#include <executorch/backends/vulkan/runtime/vk_api/memory/Allocator.h>
#include <array>
namespace vkcompute {
namespace vkapi {
//
// A Vulkan Adapter represents a logical device and all its properties. It
// manages all relevant properties of the underlying physical device, a
// handle to the logical device, and a number of compute queues available to
// the device. It is primarily responsible for managing the VkDevice handle
// which points to the logical device object on the GPU.
//
// This class is primarily used by the Runtime class, which holds one Adapter
// instance for each physical device visible to the VkInstance. Upon
// construction, this class will populate the physical device properties, but
// will not create the logical device until specifically requested via the
// init_device() function.
//
// init_device() will create the logical device and obtain the VkDevice handle
// for it. It will also create a number of compute queues up to the amount
// requested when the Adapter instance was constructed.
//
// Contexts (which represent one thread of execution) will request a compute
// queue from an Adapter. The Adapter will then select a compute queue to
// assign to the Context, attempting to balance load between all available
// queues. This will allow different Contexts (which typically execute on
// separate threads) to run concurrently.
//
#define NUM_QUEUE_MUTEXES 4
class Adapter final {
public:
explicit Adapter(
VkInstance instance,
PhysicalDevice physical_device,
const uint32_t num_queues,
const std::string& cache_data_path);
Adapter(const Adapter&) = delete;
Adapter& operator=(const Adapter&) = delete;
Adapter(Adapter&&) = delete;
Adapter& operator=(Adapter&&) = delete;
~Adapter() = default;
struct Queue {
uint32_t family_index;
uint32_t queue_index;
VkQueueFlags capabilities;
VkQueue handle;
};
private:
// Use a mutex to manage queue usage info since
// it can be accessed from multiple threads
std::mutex queue_usage_mutex_;
// Physical Device Info
PhysicalDevice physical_device_;
// Queue Management
std::vector<Queue> queues_;
std::vector<uint32_t> queue_usage_;
std::array<std::mutex, NUM_QUEUE_MUTEXES> queue_mutexes_;
// Handles
VkInstance instance_;
DeviceHandle device_;
// Device-level resource caches
ShaderLayoutCache shader_layout_cache_;
ShaderCache shader_cache_;
PipelineLayoutCache pipeline_layout_cache_;
ComputePipelineCache compute_pipeline_cache_;
// Memory Management
SamplerCache sampler_cache_;
Allocator vma_;
// Miscellaneous
bool linear_tiling_3d_enabled_;
public:
// Physical Device metadata
inline VkPhysicalDevice physical_handle() const {
return physical_device_.handle;
}
inline VkDevice device_handle() const {
return device_.handle;
}
inline bool has_unified_memory() const {
return physical_device_.has_unified_memory;
}
inline uint32_t num_compute_queues() const {
return physical_device_.num_compute_queues;
}
inline bool timestamp_compute_and_graphics() const {
return physical_device_.has_timestamps;
}
inline float timestamp_period() const {
return physical_device_.timestamp_period;
}
// Queue Management
Queue request_queue();
void return_queue(Queue&);
// Caches
inline ShaderLayoutCache& shader_layout_cache() {
return shader_layout_cache_;
}
inline ShaderCache& shader_cache() {
return shader_cache_;
}
inline PipelineLayoutCache& pipeline_layout_cache() {
return pipeline_layout_cache_;
}
inline ComputePipelineCache& compute_pipeline_cache() {
return compute_pipeline_cache_;
}
// Memory Allocation
inline SamplerCache& sampler_cache() {
return sampler_cache_;
}
inline Allocator& vma() {
return vma_;
}
inline bool linear_tiling_3d_enabled() const {
return linear_tiling_3d_enabled_;
}
// Physical Device Features
inline bool supports_16bit_storage_buffers() {
#ifdef VK_KHR_16bit_storage
return physical_device_.shader_16bit_storage.storageBuffer16BitAccess ==
VK_TRUE;
#else
return false;
#endif /* VK_KHR_16bit_storage */
}
inline bool supports_8bit_storage_buffers() {
#ifdef VK_KHR_8bit_storage
return physical_device_.shader_8bit_storage.storageBuffer8BitAccess ==
VK_TRUE;
#else
return false;
#endif /* VK_KHR_8bit_storage */
}
inline bool supports_float16_shader_types() {
#ifdef VK_KHR_shader_float16_int8
return physical_device_.shader_float16_int8_types.shaderFloat16 == VK_TRUE;
#else
return false;
#endif /* VK_KHR_shader_float16_int8 */
}
inline bool supports_int8_shader_types() {
#ifdef VK_KHR_shader_float16_int8
return physical_device_.shader_float16_int8_types.shaderInt8 == VK_TRUE;
#else
return false;
#endif /* VK_KHR_shader_float16_int8 */
}
inline bool supports_int16_shader_types() {
return physical_device_.supports_int16_shader_types;
}
inline bool has_full_float16_buffers_support() {
return supports_16bit_storage_buffers() && supports_float16_shader_types();
}
inline bool has_full_int8_buffers_support() {
return supports_8bit_storage_buffers() && supports_int8_shader_types();
}
// Command Buffer Submission
void
submit_cmd(const Queue&, VkCommandBuffer, VkFence fence = VK_NULL_HANDLE);
std::string stringize() const;
friend std::ostream& operator<<(std::ostream&, const Adapter&);
};
} // namespace vkapi
} // namespace vkcompute