blob: 60208afeec5ee802ec15bfca430070dcbbc17c89 [file]
/*
* Copyright (c) Qualcomm Innovation Center, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <executorch/backends/qualcomm/aot/wrappers/TensorWrapper.h>
#include <executorch/backends/qualcomm/qc_compiler_spec_generated.h>
#include <executorch/backends/qualcomm/runtime/QnnExecuTorchBackend.h>
#include <executorch/backends/qualcomm/runtime/QnnManager.h>
namespace executorch {
namespace backends {
namespace qnn {
using namespace qnn_delegate;
using executorch::runtime::ArrayRef;
using executorch::runtime::BackendExecutionContext;
using executorch::runtime::BackendInitContext;
using executorch::runtime::CompileSpec;
using executorch::runtime::DelegateHandle;
using executorch::runtime::EValue;
using executorch::runtime::FreeableBuffer;
using executorch::runtime::MemoryAllocator;
using executorch::runtime::Result;
// ========== Public method implementations =========================
constexpr const char* QNN_COMPILE_SPEC = "qnn_compile_spec";
Result<DelegateHandle*> QnnExecuTorchBackend::init(
BackendInitContext& context,
FreeableBuffer* processed,
ArrayRef<CompileSpec> compile_specs) const {
// covert SizedBuffer to qnn ExecuTorch option
QnnExecuTorchContextBinary qnn_context_blob;
const qnn_delegate::QnnExecuTorchOptions* qnn_executorch_options = nullptr;
qnn_context_blob.buffer = const_cast<void*>(processed->data());
qnn_context_blob.nbytes = processed->size();
// convert CompileSpec to qnn ExecuTorch option
for (auto& compile_spec : compile_specs) {
if (std::strcmp(compile_spec.key, QNN_COMPILE_SPEC) == 0)
qnn_executorch_options =
GetQnnExecuTorchOptions(compile_spec.value.buffer);
else
QNN_EXECUTORCH_LOG_WARN("unknown argument: %s", compile_spec.key);
}
// Create QnnManager
MemoryAllocator* runtime_allocator = context.get_runtime_allocator();
QnnManager* qnn_manager =
ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(runtime_allocator, QnnManager);
// NOTE: Since we use placement new and since this type is not trivially
// destructible, we must call the destructor manually in destroy().
new (qnn_manager) QnnManager(qnn_executorch_options, qnn_context_blob);
// TODO: this is a temporal solution for multi-graph support, will be
// removed once framework starts to accept runtime configuration
// ---
// check if current context binary has already been initialized
// return cached one for reducing memory footprint
std::string signature = qnn_manager->GetBinarySignature();
auto iter = delegate_map_.find(signature);
if (iter != delegate_map_.end()) {
QNN_EXECUTORCH_LOG_INFO(
"Use cached delegate handle for current method: %s",
context.get_method_name());
return iter->second;
}
ET_CHECK_OR_RETURN_ERROR(
qnn_manager->Init() == Error::Ok,
Internal,
"Fail to initialize Qnn Manager");
if (qnn_manager->IsOnlinePrepare()) {
ET_CHECK_OR_RETURN_ERROR(
qnn_manager->CompileQcir() == Error::Ok,
Internal,
"Fail to compile binary in qcir format");
} else {
for (const std::string& graph_name : qnn_manager->GetGraphNames()) {
ET_CHECK_OR_RETURN_ERROR(
qnn_manager->AllocateTensor(graph_name) == Error::Ok,
Internal,
"Fail to allocate tensor");
}
}
add_cached_delegate(signature, qnn_manager);
// This backend does not need its processed data after Init.
processed->Free();
return qnn_manager;
}
Error QnnExecuTorchBackend::execute(
BackendExecutionContext& context,
DelegateHandle* handle,
EValue** args) const {
ET_CHECK_OR_RETURN_ERROR(
delegate_map_rev_.count(handle) != 0,
Internal,
"DelegateHandle has been deleted");
QnnManager* qnn_manager = static_cast<QnnManager*>(handle);
std::string method_name = context.get_method_name();
std::vector<std::shared_ptr<TensorWrapper>> input_tensors =
qnn_manager->GetGraphInputs(method_name);
std::vector<std::shared_ptr<TensorWrapper>> output_tensors =
qnn_manager->GetGraphOutputs(method_name);
std::vector<Qnn_Tensor_t> input_tensor_structs;
std::vector<Qnn_Tensor_t> output_tensor_structs;
input_tensor_structs.reserve(input_tensors.size());
for (int i = 0; i < input_tensors.size(); ++i) {
if (qnn_manager->RegisterMem(
args[i]->toTensor().mutable_data_ptr(), input_tensors[i]) !=
Error::Ok) {
// update data ptr only should be fine
input_tensors[i]->FillDataBuffer(
args[i]->toTensor().const_data_ptr(), false /* copy_data */);
}
input_tensor_structs.push_back(input_tensors[i]->CloneTensorStruct());
}
int output_index = input_tensors.size();
for (const auto& output_tensor : output_tensors) {
// pos=0 limits the search to the prefix
if (output_tensor->GetName().rfind("output_", 0) == 0) {
void* mutable_data_ptr =
args[output_index]->toTensor().mutable_data_ptr();
if (qnn_manager->RegisterMem(mutable_data_ptr, output_tensor) !=
Error::Ok) {
output_tensor->FillDataBuffer(mutable_data_ptr, false /* copy_data */);
}
output_index++;
}
output_tensor_structs.push_back(output_tensor->CloneTensorStruct());
}
ET_CHECK_OR_RETURN_ERROR(
qnn_manager->Execute(
method_name,
input_tensor_structs,
output_tensor_structs,
context.event_tracer()) == Error::Ok,
Internal,
"Fail to execute graph");
ET_CHECK_OR_RETURN_ERROR(
qnn_manager->ProfileExecuteData(method_name, context.event_tracer()) ==
Error::Ok,
Internal,
"Fail to profile graph");
return Error::Ok;
}
void QnnExecuTorchBackend::destroy(DelegateHandle* handle) const {
if (handle != nullptr && delegate_map_rev_.count(handle)) {
QnnManager* qnn_manager = static_cast<QnnManager*>(handle);
qnn_manager->Destroy();
erase_cached_delegate(handle);
}
}
bool QnnExecuTorchBackend::is_available() const {
return true;
}
void QnnExecuTorchBackend::add_cached_delegate(
const std::string& signature,
executorch::runtime::DelegateHandle* handle) const {
std::lock_guard<std::mutex> guard(mutex_);
delegate_map_[signature] = handle;
delegate_map_rev_[handle] = signature;
}
void QnnExecuTorchBackend::erase_cached_delegate(
executorch::runtime::DelegateHandle* handle) const {
std::lock_guard<std::mutex> guard(mutex_);
auto iter = delegate_map_rev_.find(handle);
if (iter == delegate_map_rev_.end()) {
return;
}
delegate_map_.erase(iter->second);
delegate_map_rev_.erase(handle);
}
namespace {
auto cls = QnnExecuTorchBackend();
executorch::runtime::Backend backend{"QnnBackend", &cls};
static auto success_with_compiler = register_backend(backend);
} // namespace
} // namespace qnn
} // namespace backends
} // namespace executorch