backends/qualcomm/runtime/QnnExecuTorchBackend.cpp - platform/external/executorch - Git at Google

 /*
  * Copyright (c) Qualcomm Innovation Center, Inc.
  * All rights reserved.
  *
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
  */

 #include <executorch/backends/qualcomm/aot/wrappers/TensorWrapper.h>
 #include <executorch/backends/qualcomm/qc_compiler_spec_generated.h>
 #include <executorch/backends/qualcomm/runtime/QnnExecuTorchBackend.h>
 #include <executorch/backends/qualcomm/runtime/QnnManager.h>

 namespace executorch {
 namespace backends {
 namespace qnn {

 using namespace qnn_delegate;
 using executorch::runtime::ArrayRef;
 using executorch::runtime::BackendExecutionContext;
 using executorch::runtime::BackendInitContext;
 using executorch::runtime::CompileSpec;
 using executorch::runtime::DelegateHandle;
 using executorch::runtime::EValue;
 using executorch::runtime::FreeableBuffer;
 using executorch::runtime::MemoryAllocator;
 using executorch::runtime::Result;

 // ========== Public method implementations =========================
 constexpr const char* QNN_COMPILE_SPEC = "qnn_compile_spec";
 Result<DelegateHandle*> QnnExecuTorchBackend::init(
     BackendInitContext& context,
     FreeableBuffer* processed,
     ArrayRef<CompileSpec> compile_specs) const {
   // covert SizedBuffer to qnn ExecuTorch option
   QnnExecuTorchContextBinary qnn_context_blob;
   const qnn_delegate::QnnExecuTorchOptions* qnn_executorch_options = nullptr;

   qnn_context_blob.buffer = const_cast<void*>(processed->data());
   qnn_context_blob.nbytes = processed->size();

   // convert CompileSpec to qnn ExecuTorch option
   for (auto& compile_spec : compile_specs) {
     if (std::strcmp(compile_spec.key, QNN_COMPILE_SPEC) == 0)
       qnn_executorch_options =
           GetQnnExecuTorchOptions(compile_spec.value.buffer);
     else
       QNN_EXECUTORCH_LOG_WARN("unknown argument: %s", compile_spec.key);
   }

   // Create QnnManager
   MemoryAllocator* runtime_allocator = context.get_runtime_allocator();
   QnnManager* qnn_manager =
       ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(runtime_allocator, QnnManager);

   // NOTE: Since we use placement new and since this type is not trivially
   // destructible, we must call the destructor manually in destroy().
   new (qnn_manager) QnnManager(qnn_executorch_options, qnn_context_blob);

   // TODO: this is a temporal solution for multi-graph support, will be
   //       removed once framework starts to accept runtime configuration
   // ---
   // check if current context binary has already been initialized
   // return cached one for reducing memory footprint
   std::string signature = qnn_manager->GetBinarySignature();
   auto iter = delegate_map_.find(signature);
   if (iter != delegate_map_.end()) {
     QNN_EXECUTORCH_LOG_INFO(
         "Use cached delegate handle for current method: %s",
         context.get_method_name());
     return iter->second;
   }

   ET_CHECK_OR_RETURN_ERROR(
       qnn_manager->Init() == Error::Ok,
       Internal,
       "Fail to initialize Qnn Manager");

   if (qnn_manager->IsOnlinePrepare()) {
     ET_CHECK_OR_RETURN_ERROR(
         qnn_manager->CompileQcir() == Error::Ok,
         Internal,
         "Fail to compile binary in qcir format");
   } else {
     for (const std::string& graph_name : qnn_manager->GetGraphNames()) {
       ET_CHECK_OR_RETURN_ERROR(
           qnn_manager->AllocateTensor(graph_name) == Error::Ok,
           Internal,
           "Fail to allocate tensor");
     }
   }
   add_cached_delegate(signature, qnn_manager);
   // This backend does not need its processed data after Init.
   processed->Free();
   return qnn_manager;
 }

 Error QnnExecuTorchBackend::execute(
     BackendExecutionContext& context,
     DelegateHandle* handle,
     EValue** args) const {
   ET_CHECK_OR_RETURN_ERROR(
       delegate_map_rev_.count(handle) != 0,
       Internal,
       "DelegateHandle has been deleted");
   QnnManager* qnn_manager = static_cast<QnnManager*>(handle);

   std::string method_name = context.get_method_name();
   std::vector<std::shared_ptr<TensorWrapper>> input_tensors =
       qnn_manager->GetGraphInputs(method_name);
   std::vector<std::shared_ptr<TensorWrapper>> output_tensors =
       qnn_manager->GetGraphOutputs(method_name);
   std::vector<Qnn_Tensor_t> input_tensor_structs;
   std::vector<Qnn_Tensor_t> output_tensor_structs;

   input_tensor_structs.reserve(input_tensors.size());
   for (int i = 0; i < input_tensors.size(); ++i) {
     if (qnn_manager->RegisterMem(
             args[i]->toTensor().mutable_data_ptr(), input_tensors[i]) !=
         Error::Ok) {
       // update data ptr only should be fine
       input_tensors[i]->FillDataBuffer(
           args[i]->toTensor().const_data_ptr(), false /* copy_data */);
     }
     input_tensor_structs.push_back(input_tensors[i]->CloneTensorStruct());
   }

   int output_index = input_tensors.size();
   for (const auto& output_tensor : output_tensors) {
     // pos=0 limits the search to the prefix
     if (output_tensor->GetName().rfind("output_", 0) == 0) {
       void* mutable_data_ptr =
           args[output_index]->toTensor().mutable_data_ptr();
       if (qnn_manager->RegisterMem(mutable_data_ptr, output_tensor) !=
           Error::Ok) {
         output_tensor->FillDataBuffer(mutable_data_ptr, false /* copy_data */);
       }
       output_index++;
     }
     output_tensor_structs.push_back(output_tensor->CloneTensorStruct());
   }

   ET_CHECK_OR_RETURN_ERROR(
       qnn_manager->Execute(
           method_name,
           input_tensor_structs,
           output_tensor_structs,
           context.event_tracer()) == Error::Ok,
       Internal,
       "Fail to execute graph");
   ET_CHECK_OR_RETURN_ERROR(
       qnn_manager->ProfileExecuteData(method_name, context.event_tracer()) ==
           Error::Ok,
       Internal,
       "Fail to profile graph");

   return Error::Ok;
 }

 void QnnExecuTorchBackend::destroy(DelegateHandle* handle) const {
   if (handle != nullptr && delegate_map_rev_.count(handle)) {
     QnnManager* qnn_manager = static_cast<QnnManager*>(handle);
     qnn_manager->Destroy();
     erase_cached_delegate(handle);
   }
 }

 bool QnnExecuTorchBackend::is_available() const {
   return true;
 }

 void QnnExecuTorchBackend::add_cached_delegate(
     const std::string& signature,
     executorch::runtime::DelegateHandle* handle) const {
   std::lock_guard<std::mutex> guard(mutex_);
   delegate_map_[signature] = handle;
   delegate_map_rev_[handle] = signature;
 }

 void QnnExecuTorchBackend::erase_cached_delegate(
     executorch::runtime::DelegateHandle* handle) const {
   std::lock_guard<std::mutex> guard(mutex_);
   auto iter = delegate_map_rev_.find(handle);
   if (iter == delegate_map_rev_.end()) {
     return;
   }
   delegate_map_.erase(iter->second);
   delegate_map_rev_.erase(handle);
 }

 namespace {
 auto cls = QnnExecuTorchBackend();
 executorch::runtime::Backend backend{"QnnBackend", &cls};
 static auto success_with_compiler = register_backend(backend);
 } // namespace
 } // namespace qnn
 } // namespace backends
 } // namespace executorch
	/*
	* Copyright (c) Qualcomm Innovation Center, Inc.
	* All rights reserved.
	*
	* This source code is licensed under the BSD-style license found in the
	* LICENSE file in the root directory of this source tree.
	*/

	#include <executorch/backends/qualcomm/aot/wrappers/TensorWrapper.h>
	#include <executorch/backends/qualcomm/qc_compiler_spec_generated.h>
	#include <executorch/backends/qualcomm/runtime/QnnExecuTorchBackend.h>
	#include <executorch/backends/qualcomm/runtime/QnnManager.h>

	namespace executorch {
	namespace backends {
	namespace qnn {

	using namespace qnn_delegate;
	using executorch::runtime::ArrayRef;
	using executorch::runtime::BackendExecutionContext;
	using executorch::runtime::BackendInitContext;
	using executorch::runtime::CompileSpec;
	using executorch::runtime::DelegateHandle;
	using executorch::runtime::EValue;
	using executorch::runtime::FreeableBuffer;
	using executorch::runtime::MemoryAllocator;
	using executorch::runtime::Result;

	// ========== Public method implementations =========================
	constexpr const char* QNN_COMPILE_SPEC = "qnn_compile_spec";
	Result<DelegateHandle*> QnnExecuTorchBackend::init(
	BackendInitContext& context,
	FreeableBuffer* processed,
	ArrayRef<CompileSpec> compile_specs) const {
	// covert SizedBuffer to qnn ExecuTorch option
	QnnExecuTorchContextBinary qnn_context_blob;
	const qnn_delegate::QnnExecuTorchOptions* qnn_executorch_options = nullptr;

	qnn_context_blob.buffer = const_cast<void*>(processed->data());
	qnn_context_blob.nbytes = processed->size();

	// convert CompileSpec to qnn ExecuTorch option
	for (auto& compile_spec : compile_specs) {
	if (std::strcmp(compile_spec.key, QNN_COMPILE_SPEC) == 0)
	qnn_executorch_options =
	GetQnnExecuTorchOptions(compile_spec.value.buffer);
	else
	QNN_EXECUTORCH_LOG_WARN("unknown argument: %s", compile_spec.key);
	}

	// Create QnnManager
	MemoryAllocator* runtime_allocator = context.get_runtime_allocator();
	QnnManager* qnn_manager =
	ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(runtime_allocator, QnnManager);

	// NOTE: Since we use placement new and since this type is not trivially
	// destructible, we must call the destructor manually in destroy().
	new (qnn_manager) QnnManager(qnn_executorch_options, qnn_context_blob);

	// TODO: this is a temporal solution for multi-graph support, will be
	// removed once framework starts to accept runtime configuration
	// ---
	// check if current context binary has already been initialized
	// return cached one for reducing memory footprint
	std::string signature = qnn_manager->GetBinarySignature();
	auto iter = delegate_map_.find(signature);
	if (iter != delegate_map_.end()) {
	QNN_EXECUTORCH_LOG_INFO(
	"Use cached delegate handle for current method: %s",
	context.get_method_name());
	return iter->second;
	}

	ET_CHECK_OR_RETURN_ERROR(
	qnn_manager->Init() == Error::Ok,
	Internal,
	"Fail to initialize Qnn Manager");

	if (qnn_manager->IsOnlinePrepare()) {
	ET_CHECK_OR_RETURN_ERROR(
	qnn_manager->CompileQcir() == Error::Ok,
	Internal,
	"Fail to compile binary in qcir format");
	} else {
	for (const std::string& graph_name : qnn_manager->GetGraphNames()) {
	ET_CHECK_OR_RETURN_ERROR(
	qnn_manager->AllocateTensor(graph_name) == Error::Ok,
	Internal,
	"Fail to allocate tensor");
	}
	}
	add_cached_delegate(signature, qnn_manager);
	// This backend does not need its processed data after Init.
	processed->Free();
	return qnn_manager;
	}

	Error QnnExecuTorchBackend::execute(
	BackendExecutionContext& context,
	DelegateHandle* handle,
	EValue** args) const {
	ET_CHECK_OR_RETURN_ERROR(
	delegate_map_rev_.count(handle) != 0,
	Internal,
	"DelegateHandle has been deleted");
	QnnManager* qnn_manager = static_cast<QnnManager*>(handle);

	std::string method_name = context.get_method_name();
	std::vector<std::shared_ptr<TensorWrapper>> input_tensors =
	qnn_manager->GetGraphInputs(method_name);
	std::vector<std::shared_ptr<TensorWrapper>> output_tensors =
	qnn_manager->GetGraphOutputs(method_name);
	std::vector<Qnn_Tensor_t> input_tensor_structs;
	std::vector<Qnn_Tensor_t> output_tensor_structs;

	input_tensor_structs.reserve(input_tensors.size());
	for (int i = 0; i < input_tensors.size(); ++i) {
	if (qnn_manager->RegisterMem(
	args[i]->toTensor().mutable_data_ptr(), input_tensors[i]) !=
	Error::Ok) {
	// update data ptr only should be fine
	input_tensors[i]->FillDataBuffer(
	args[i]->toTensor().const_data_ptr(), false /* copy_data */);
	}
	input_tensor_structs.push_back(input_tensors[i]->CloneTensorStruct());
	}

	int output_index = input_tensors.size();
	for (const auto& output_tensor : output_tensors) {
	// pos=0 limits the search to the prefix
	if (output_tensor->GetName().rfind("output_", 0) == 0) {
	void* mutable_data_ptr =
	args[output_index]->toTensor().mutable_data_ptr();
	if (qnn_manager->RegisterMem(mutable_data_ptr, output_tensor) !=
	Error::Ok) {
	output_tensor->FillDataBuffer(mutable_data_ptr, false /* copy_data */);
	}
	output_index++;
	}
	output_tensor_structs.push_back(output_tensor->CloneTensorStruct());
	}

	ET_CHECK_OR_RETURN_ERROR(
	qnn_manager->Execute(
	method_name,
	input_tensor_structs,
	output_tensor_structs,
	context.event_tracer()) == Error::Ok,
	Internal,
	"Fail to execute graph");
	ET_CHECK_OR_RETURN_ERROR(
	qnn_manager->ProfileExecuteData(method_name, context.event_tracer()) ==
	Error::Ok,
	Internal,
	"Fail to profile graph");

	return Error::Ok;
	}

	void QnnExecuTorchBackend::destroy(DelegateHandle* handle) const {
	if (handle != nullptr && delegate_map_rev_.count(handle)) {
	QnnManager* qnn_manager = static_cast<QnnManager*>(handle);
	qnn_manager->Destroy();
	erase_cached_delegate(handle);
	}
	}

	bool QnnExecuTorchBackend::is_available() const {
	return true;
	}

	void QnnExecuTorchBackend::add_cached_delegate(
	const std::string& signature,
	executorch::runtime::DelegateHandle* handle) const {
	std::lock_guard<std::mutex> guard(mutex_);
	delegate_map_[signature] = handle;
	delegate_map_rev_[handle] = signature;
	}

	void QnnExecuTorchBackend::erase_cached_delegate(
	executorch::runtime::DelegateHandle* handle) const {
	std::lock_guard<std::mutex> guard(mutex_);
	auto iter = delegate_map_rev_.find(handle);
	if (iter == delegate_map_rev_.end()) {
	return;
	}
	delegate_map_.erase(iter->second);
	delegate_map_rev_.erase(handle);
	}

	namespace {
	auto cls = QnnExecuTorchBackend();
	executorch::runtime::Backend backend{"QnnBackend", &cls};
	static auto success_with_compiler = register_backend(backend);
	} // namespace
	} // namespace qnn
	} // namespace backends
	} // namespace executorch