blob: 12968fa28c92ecbbf4d666da9dc3f01890e9a94b [file] [log] [blame] [edit]
/*
* Copyright (c) Qualcomm Innovation Center, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <dlfcn.h>
#include <executorch/backends/qualcomm/runtime/Logging.h>
#include <executorch/backends/qualcomm/runtime/SharedBuffer.h>
// Refer to the QNN HTP Shared Buffer Tutorial
// in Qualcomm® AI Engine Direct document
constexpr uint8_t RPCMEM_HEAP_ID_SYSTEM = 25;
constexpr uint8_t RPCMEM_DEFAULT_FLAGS = 1;
std::size_t std::hash<CustomMemTensorInfo>::operator()(
const CustomMemTensorInfo& info) const noexcept {
size_t hash_val = 0;
hash_val ^= std::hash<void*>()(info.tensor_addr);
hash_val ^= std::hash<void*>()(info.custom_mem);
hash_val ^= std::hash<size_t>()(info.pos);
hash_val ^= std::hash<size_t>()(info.tensor_bytes);
for (int i = 0; i < info.rank; ++i) {
hash_val ^= info.shape[i];
}
hash_val ^= std::hash<uint32_t>()(info.rank);
hash_val ^= std::hash<executorch::aten::ScalarType>()(info.dtype);
return hash_val;
}
bool operator==(
const CustomMemTensorInfo& lhs,
const CustomMemTensorInfo& rhs) {
bool is_same =
(lhs.tensor_addr == rhs.tensor_addr && lhs.custom_mem == rhs.custom_mem &&
lhs.pos == rhs.pos && lhs.tensor_bytes == rhs.tensor_bytes &&
lhs.rank == rhs.rank && lhs.dtype == rhs.dtype);
for (int i = 0; i < lhs.rank; ++i) {
is_same &= lhs.shape[i] == rhs.shape[i];
}
return is_same;
}
namespace executorch {
namespace backends {
namespace qnn {
using executorch::runtime::Error;
namespace {
intptr_t alignTo(size_t alignment, intptr_t offset) {
return offset % alignment == 0 ? offset
: offset +
(static_cast<intptr_t>(alignment) -
offset % static_cast<intptr_t>(alignment));
}
} // namespace
std::mutex SharedBuffer::init_mutex_;
void* SharedBuffer::GetCustomMemBase(void* buf) {
auto it = tensor_addr_to_custom_mem_.find(buf);
if (it == tensor_addr_to_custom_mem_.end()) {
return nullptr;
}
return it->second;
}
void* SharedBuffer::GetUnAlignedAddr(void* buf) {
auto it = restore_map_.find(buf);
if (it == restore_map_.end()) {
return nullptr;
}
return it->second;
}
size_t SharedBuffer::GetAllocatedSize(void* buf) {
auto it = allocated_size_map_.find(buf);
if (it == allocated_size_map_.end()) {
return 0;
}
return it->second;
}
SharedBuffer& SharedBuffer::GetSharedBufferManager() {
std::lock_guard<std::mutex> lk(init_mutex_);
static SharedBuffer shared_buffer_manager;
if (!shared_buffer_manager.GetInitialize()) {
#if defined(__aarch64__)
Error status = shared_buffer_manager.Load();
#else
// For x86_64 platform
Error status = Error::Ok;
#endif
if (status == Error::Ok) {
shared_buffer_manager.SetInitialize(true);
}
}
return shared_buffer_manager;
}
SharedBuffer::~SharedBuffer() {
#if defined(__aarch64__)
if (initialize_) {
SharedBuffer::GetSharedBufferManager().UnLoad();
}
#endif
};
void* SharedBuffer::AllocMem(size_t bytes, size_t alignment) {
if (!initialize_) {
QNN_EXECUTORCH_LOG_ERROR("Shared memory not initialized.");
return nullptr;
}
// do alignment:
auto allocate_bytes = static_cast<int32_t>(bytes + alignment);
void* buf = rpc_mem_alloc_(
RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, allocate_bytes);
if (buf == nullptr) {
QNN_EXECUTORCH_LOG_WARN("Failed to allocate the tensor by RPC memory.");
return nullptr;
}
allocated_size_map_.insert({buf, allocate_bytes});
auto aligned_buf = reinterpret_cast<void*>(
alignTo(alignment, reinterpret_cast<intptr_t>(buf)));
bool status = restore_map_.insert({aligned_buf, buf}).second;
if (!status) {
QNN_EXECUTORCH_LOG_ERROR("Failed to allocate the tensor by RPC memory.");
rpc_mem_free_(buf);
}
return aligned_buf;
}
int32_t SharedBuffer::MemToFd(void* buf) {
int32_t memFd = -1;
if (!initialize_) {
QNN_EXECUTORCH_LOG_ERROR("Shared memory not initialized.");
} else {
memFd = rpc_mem_to_fd_(buf);
}
return memFd;
}
void SharedBuffer::FreeMem(void* buf) {
if (!initialize_) {
QNN_EXECUTORCH_LOG_ERROR("Shared memory not initialized.");
} else if (restore_map_.count(buf) == 0) {
QNN_EXECUTORCH_LOG_WARN("Don't free an unallocated tensor.");
} else {
rpc_mem_free_(restore_map_[buf]);
restore_map_.erase(buf);
}
}
bool SharedBuffer::IsAllocated(void* buf) {
return restore_map_.count(buf) != 0U;
}
Error SharedBuffer::Load() {
// On Android, 32-bit and 64-bit libcdsprpc.so can be found at /vendor/lib/
// and /vendor/lib64/ respectively.
lib_cdsp_rpc_ = dlopen("libcdsprpc.so", RTLD_NOW | RTLD_LOCAL);
if (lib_cdsp_rpc_ == nullptr) {
QNN_EXECUTORCH_LOG_ERROR(
"Unable to load shared buffer. dlerror(): %s", dlerror());
return Error::Internal;
}
rpc_mem_alloc_ = reinterpret_cast<RpcMemAllocFn_t>( // NOLINT
dlsym(lib_cdsp_rpc_, "rpcmem_alloc"));
rpc_mem_free_ = reinterpret_cast<RpcMemFreeFn_t>( // NOLINT
dlsym(lib_cdsp_rpc_, "rpcmem_free"));
rpc_mem_to_fd_ = reinterpret_cast<RpcMemToFdFn_t>( // NOLINT
dlsym(lib_cdsp_rpc_, "rpcmem_to_fd"));
if (nullptr == rpc_mem_alloc_ || nullptr == rpc_mem_free_ ||
nullptr == rpc_mem_to_fd_) {
QNN_EXECUTORCH_LOG_ERROR(
"Unable to access symbols in shared buffer. dlerror(): %s", dlerror());
dlclose(lib_cdsp_rpc_);
return Error::Internal;
}
return Error::Ok;
}
void SharedBuffer::AddCusomMemTensorAddr(void* tensor_addr, void* custom_mem) {
tensor_addr_to_custom_mem_.insert({tensor_addr, custom_mem});
};
void SharedBuffer::AddCusomMemTensorInfo(const CustomMemTensorInfo& info) {
custom_mem_tensor_info_set_.insert(info);
tensor_addr_to_custom_mem_.insert({info.tensor_addr, info.custom_mem});
}
Error SharedBuffer::UnLoad() {
if (dlclose(lib_cdsp_rpc_) != 0) {
QNN_EXECUTORCH_LOG_ERROR(
"Unable to close shared buffer. dlerror(): %s", dlerror());
return Error::Internal;
};
return Error::Ok;
}
} // namespace qnn
} // namespace backends
} // namespace executorch