backends/qualcomm/runtime/QnnManager.cpp - platform/external/executorch - Git at Google

 /*
  * Copyright (c) Qualcomm Innovation Center, Inc.
  * All rights reserved.
  *
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
  */

 #include <executorch/backends/qualcomm/aot/ir/qcir_utils.h>
 #include <executorch/backends/qualcomm/qc_binary_info_generated.h>
 #include <executorch/backends/qualcomm/runtime/QnnManager.h>
 #include <executorch/backends/qualcomm/runtime/SharedBuffer.h>
 #include <executorch/backends/qualcomm/runtime/Utils.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnBackendCommon.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnImplementation.h>
 #include <executorch/extension/tensor/tensor.h>
 #include <algorithm>
 #include <cstdlib>
 #include <cstring>
 #include <fstream>
 #include <string>

 namespace executorch {
 namespace backends {
 namespace qnn {

 using executorch::runtime::Error;

 bool CompareExportedInput(
     const std::shared_ptr<TensorWrapper>& a,
     const std::shared_ptr<TensorWrapper>& b) {
   // Using the order of the nodes as external_id in AOT
   // to extract the right arg from *args at runtime
   int numA = std::stoi(a->GetName().substr(a->GetName().find('_') + 1));
   int numB = std::stoi(b->GetName().substr(b->GetName().find('_') + 1));
   return numA < numB;
 }

 QnnManager::~QnnManager() {
   backend_params_ptr_.reset(new BackendConfigParameters());
   logger_.reset();
   qnn_loaded_backend_.TerminateAllBackends();
 }

 QnnManager::QnnManager(
     const QnnExecuTorchOptions* options,
     const QnnExecuTorchContextBinary& qnn_executorch_context_binary)
     : qnn_context_blob_(qnn_executorch_context_binary),
       qnn_loaded_backend_(""),
       // options' life cycle is decided by compiler specs which is
       // kept by executorch runtime framework
       // please pay attention to any potential seg fault
       options_(options) {
   QnnExecuTorchBackendType backend_type =
       options->backend_options()->backend_type();
   std::string library_path = options->library_path()->str();

   if (options->log_level() >= QnnExecuTorchLogLevel::kLogLevelInfo) {
     QNN_EXECUTORCH_LOG_INFO(
         "soc_model in soc_info: %s",
         EnumNameQcomChipset(options_->soc_info()->soc_model()));
     QNN_EXECUTORCH_LOG_INFO(
         "backend_type: %s", EnumNameQnnExecuTorchBackendType(backend_type));
     QNN_EXECUTORCH_LOG_INFO("graph_name: %s", options_->graph_name()->c_str());
     QNN_EXECUTORCH_LOG_INFO("library_path: %s", library_path.c_str());
     QNN_EXECUTORCH_LOG_INFO("dump intermediate outputs: %s", IsTensorDump());
     QNN_EXECUTORCH_LOG_INFO(
         "log_level: %s", EnumNameQnnExecuTorchLogLevel(options_->log_level()));
     QNN_EXECUTORCH_LOG_INFO(
         "profile_level: %s",
         EnumNameQnnExecuTorchProfileLevel(options_->profile_level()));
     QNN_EXECUTORCH_LOG_INFO(
         "the size of qnn context binary: %d",
         qnn_executorch_context_binary.nbytes);
     QNN_EXECUTORCH_LOG_INFO(
         "Is on-device graph construction: %d", options->online_prepare());
     QNN_EXECUTORCH_LOG_INFO(
         "Enable shared buffer: %d", options->shared_buffer());
   }

   if (library_path.empty()) {
     switch (backend_type) {
       case QnnExecuTorchBackendType::kHtpBackend:
         library_path = htp_library_name_;
         break;
       case QnnExecuTorchBackendType::kDspBackend:
         library_path = dsp_library_name_;
         break;
       case QnnExecuTorchBackendType::kGpuBackend:
         library_path = gpu_library_name_;
         break;
       default:
         QNN_EXECUTORCH_LOG_ERROR("Unknown backend type: %d", backend_type);
         break;
     }
   }
   qnn_loaded_backend_ = QnnImplementation(library_path);
   backend_params_ptr_ = std::make_unique<BackendConfigParameters>();
 }

 Error QnnManager::LoadQnnLibrary() {
   Error ret = qnn_loaded_backend_.Load(nullptr);
   return ret;
 }

 Error QnnManager::PreRegisterMem() {
   SharedBuffer& shared_buffer_manager = SharedBuffer::GetSharedBufferManager();
   for (const auto info : shared_buffer_manager.GetCustomMemTensorInfoSet()) {
     void* unaligned_custom_mem_base =
         shared_buffer_manager.GetUnAlignedAddr(info.custom_mem);

     size_t tensor_offset = (static_cast<char*>(info.custom_mem) -
                             static_cast<char*>(unaligned_custom_mem_base)) +
         info.pos;
     size_t total_custom_mem_size =
         shared_buffer_manager.GetAllocatedSize(info.custom_mem);

     int32_t mem_fd = shared_buffer_manager.MemToFd(unaligned_custom_mem_base);
     if (mem_fd == -1) {
       QNN_EXECUTORCH_LOG_WARN(
           "PreRegisterMem failed to get file descriptor.",
           "custom_mem: %p",
           "tensor_addr: %p",
           "pos: %uz",
           "tensor_bytes: %uz",
           "shape: %p",
           "rank: %zu",
           "qnn_dtype: %X",
           info.custom_mem,
           info.tensor_addr,
           info.pos,
           info.tensor_bytes,
           info.shape,
           info.rank,
           info.dtype);
       return Error::Internal;
     }

     ET_CHECK_OR_RETURN_ERROR(
         backend_params_ptr_->qnn_mem_manager_ptr_->PreRegisterCustomMemHandle(
             mem_fd,
             unaligned_custom_mem_base,
             total_custom_mem_size,
             tensor_offset,
             info) == Error::Ok,
         Internal,
         "Fail to register to shared memory.");
   }
   return Error::Ok;
 }

 Error QnnManager::RegisterMem(
     void* data_ptr,
     const std::shared_ptr<TensorWrapper>& tensor_wrapper) {
   SharedBuffer& shared_buffer_manager = SharedBuffer::GetSharedBufferManager();
   // Not enable shared buffer
   if (!options_->shared_buffer())
     return Error::Internal;

   if (backend_params_ptr_->qnn_mem_manager_ptr_ == nullptr) {
     QNN_EXECUTORCH_LOG_WARN(
         "Backend %s doesn't supported shared buffer.",
         EnumNameQnnExecuTorchBackendType(
             options_->backend_options()->backend_type()));
     return Error::Internal;
   }

   void* custom_mem_base = shared_buffer_manager.GetCustomMemBase(data_ptr);
   if (custom_mem_base != nullptr) {
     return RegisterCustomMem(data_ptr, custom_mem_base, tensor_wrapper);
   }
   return RegisterIonMem(data_ptr, tensor_wrapper);
 }

 Error QnnManager::RegisterIonMem(
     void* data_ptr,
     const std::shared_ptr<TensorWrapper>& tensor_wrapper) {
   SharedBuffer& shared_buffer_manager = SharedBuffer::GetSharedBufferManager();
   if (!shared_buffer_manager.IsAllocated(data_ptr)) {
     // It means two scenarios here:
     // 1. the input and output partitioned graph
     // 2. Actually, user doesn't allocate shared buffer with
     // QnnExecuTorchAllocCustomMem API
     return Error::Internal;
   } else if (backend_params_ptr_->qnn_mem_manager_ptr_->IsRegistered(
                  tensor_wrapper->GetMemHandle(), data_ptr)) {
     if (options_->log_level() >= QnnExecuTorchLogLevel::kLogLevelInfo)
       QNN_EXECUTORCH_LOG_INFO(
           "Tensor name %s has been registered shared memory.",
           tensor_wrapper->GetName().c_str());
     return Error::Ok;
   }

   int32_t mem_fd = shared_buffer_manager.MemToFd(data_ptr);
   if (mem_fd == -1) {
     QNN_EXECUTORCH_LOG_WARN(
         "Tensor name %s is failed to get file descriptor.",
         tensor_wrapper->GetName().c_str());
     return Error::Internal;
   }
   ET_CHECK_OR_RETURN_ERROR(
       backend_params_ptr_->qnn_mem_manager_ptr_->RegisterIonMem(
           tensor_wrapper, mem_fd, data_ptr) == Error::Ok,
       Internal,
       "Fail to register to shared memory.");

   return Error::Ok;
 }

 Error QnnManager::RegisterCustomMem(
     void* data_ptr,
     void* custom_mem_base,
     const std::shared_ptr<TensorWrapper>& tensor_wrapper) {
   if (backend_params_ptr_->qnn_mem_manager_ptr_->IsRegistered(
           tensor_wrapper->GetMemHandle(), data_ptr)) {
     if (options_->log_level() >= QnnExecuTorchLogLevel::kLogLevelInfo)
       QNN_EXECUTORCH_LOG_INFO(
           "Tensor name %s has been registered shared memory.",
           tensor_wrapper->GetName().c_str());
     return Error::Ok;
   }

   CustomMemTensorInfo info{
       custom_mem_base,
       data_ptr,
       static_cast<size_t>(
           static_cast<char*>(data_ptr) - static_cast<char*>(custom_mem_base)),
       tensor_wrapper->GetBytes(),
       tensor_wrapper->GetDims(),
       tensor_wrapper->GetRank(),
       qnn_dtype_to_scalar_type_[tensor_wrapper->GetDataType()]};

   Qnn_MemHandle_t pre_registered_handle =
       backend_params_ptr_->qnn_mem_manager_ptr_->GetPreRegisteredHandle(info);
   if (pre_registered_handle != nullptr) {
     if (options_->log_level() >= QnnExecuTorchLogLevel::kLogLevelInfo) {
       QNN_EXECUTORCH_LOG_INFO(
           "Tensor name %s found a pre-registered memHandle.",
           tensor_wrapper->GetName().c_str());
     }
     return backend_params_ptr_->qnn_mem_manager_ptr_->SetMemHandle(
         tensor_wrapper, data_ptr, pre_registered_handle);
   }

   SharedBuffer& shared_buffer_manager = SharedBuffer::GetSharedBufferManager();
   void* unaligned_custom_mem_base =
       shared_buffer_manager.GetUnAlignedAddr(custom_mem_base);

   size_t tensor_offset = static_cast<char*>(custom_mem_base) -
       static_cast<char*>(unaligned_custom_mem_base) + info.pos;
   size_t total_custom_mem_size =
       shared_buffer_manager.GetAllocatedSize(custom_mem_base);

   int32_t mem_fd = shared_buffer_manager.MemToFd(unaligned_custom_mem_base);
   if (mem_fd == -1) {
     QNN_EXECUTORCH_LOG_WARN(
         "Tensor name %s failed to get file descriptor.",
         tensor_wrapper->GetName().c_str());
     return Error::Internal;
   }

   ET_CHECK_OR_RETURN_ERROR(
       backend_params_ptr_->qnn_mem_manager_ptr_->RegisterCustomMem(
           tensor_wrapper,
           mem_fd,
           data_ptr,
           unaligned_custom_mem_base,
           total_custom_mem_size,
           tensor_offset) == Error::Ok,
       Internal,
       "Fail to register to shared memory.");

   return Error::Ok;
 }

 Error QnnManager::Init() {
   ET_CHECK_OR_RETURN_ERROR(
       LoadQnnLibrary() == Error::Ok, Internal, "Fail to load Qnn library");
   logger_ = std::make_unique<QnnLogger>(
       qnn_loaded_backend_, LoggingCallback, options_->log_level());
   if (backend_params_ptr_->backend_init_state_ ==
       BackendInitializeState::UNINITIALIZED) {
     QNN_EXECUTORCH_LOG_INFO(
         "Initialize Qnn backend "
         "parameters for Qnn executorch backend type %d",
         options_->backend_options()->backend_type());
     backend_params_ptr_ = QnnBackendFactory().Create(
         qnn_loaded_backend_, logger_.get(), qnn_context_blob_, options_);
     ET_CHECK_OR_RETURN_ERROR(
         backend_params_ptr_ != nullptr, Internal, "Failed to load Qnn backend.")
     ET_CHECK_OR_RETURN_ERROR(
         backend_params_ptr_->qnn_backend_cache_ptr_->Configure() == Error::Ok,
         Internal,
         "Fail to configure Qnn backend cache");
     ET_CHECK_OR_RETURN_ERROR(
         backend_params_ptr_->qnn_backend_ptr_->Configure() == Error::Ok,
         Internal,
         "Fail to configure Qnn backend");
     ET_CHECK_OR_RETURN_ERROR(
         backend_params_ptr_->qnn_device_ptr_->Configure() == Error::Ok,
         Internal,
         "Fail to configure Qnn device");
     ET_CHECK_OR_RETURN_ERROR(
         backend_params_ptr_->qnn_context_ptr_->Configure() == Error::Ok,
         Internal,
         "Fail to configure Qnn context");
     for (const std::string& graph_name :
          backend_params_ptr_->qnn_context_ptr_->GetGraphNames()) {
       ET_CHECK_OR_RETURN_ERROR(
           backend_params_ptr_->qnn_graph_ptr_->Configure(graph_name) ==
               Error::Ok,
           Internal,
           "Fail to configure Qnn graph");
     }
     backend_params_ptr_->backend_init_state_ =
         BackendInitializeState::INITIALIZED;
   }

 #if defined(__aarch64__)
   ET_CHECK_OR_RETURN_ERROR(
       PreRegisterMem() == Error::Ok,
       Internal,
       "Fail to pre register custom memory handle");
 #endif
   return Error::Ok;
 }

 Error QnnManager::AllocateTensor(const std::string& graph_name) {
   std::vector<Qnn_Tensor_t> input_tensors =
       backend_params_ptr_->qnn_context_ptr_->GetGraphInputs(graph_name);
   std::vector<Qnn_Tensor_t> output_tensors =
       backend_params_ptr_->qnn_context_ptr_->GetGraphOutputs(graph_name);

   for (auto& tensor : input_tensors) {
     std::shared_ptr<TensorWrapper> tensor_wrapper = CreateTensorWrapper(tensor);
     tensor_wrapper->UpdateQnnTensorMeta(tensor);
     input_tensors_[graph_name].emplace_back(std::move(tensor_wrapper));
   }
   if (!options_->is_from_context_binary()) {
     std::sort(
         input_tensors_[graph_name].begin(),
         input_tensors_[graph_name].end(),
         CompareExportedInput);
   }
   for (size_t i = 0; i < output_tensors.size(); ++i) {
     std::shared_ptr<TensorWrapper> tensor_wrapper =
         CreateTensorWrapper(output_tensors[i]);
     tensor_wrapper->UpdateQnnTensorMeta(output_tensors[i]);
     const std::string& tensor_name = tensor_wrapper->GetName();
     // this is required by identifying shared buffer mechanism
     // info might be missed if context binary came from qnn_converter
     if (options_->is_from_context_binary() &&
         tensor_name.find("output_") == std::string::npos) {
       tensor_wrapper->SetName("output_" + tensor_name);
     }
     if (IsTensorDump()) {
       tensor_wrapper->AllocateDataBuffer();
     }
     output_tensors_[graph_name].emplace_back(std::move(tensor_wrapper));
   }
   return Error::Ok;
 }

 Error QnnManager::AllocateTensor(
     const std::string& graph_name,
     std::vector<std::shared_ptr<TensorWrapper>>& inputs,
     std::vector<std::shared_ptr<TensorWrapper>>& outputs) {
   input_tensors_[graph_name] = std::move(inputs);
   // TODO: suuport per-tensor dump in online prepare mode
   //       should be achievable with some pre-process
   if (!options_->is_from_context_binary()) {
     std::sort(
         input_tensors_[graph_name].begin(),
         input_tensors_[graph_name].end(),
         CompareExportedInput);
   }
   output_tensors_[graph_name] = std::move(outputs);
   return Error::Ok;
 }

 Error QnnManager::Execute(
     const std::string& graph_name,
     const std::vector<Qnn_Tensor_t>& input_tensor_structs,
     std::vector<Qnn_Tensor_t>& output_tensor_structs,
     executorch::runtime::EventTracer* event_tracer) {
   Qnn_ErrorHandle_t error = QNN_SUCCESS;

   error = backend_params_ptr_->qnn_graph_ptr_->GraphExecute(
       graph_name, input_tensor_structs, output_tensor_structs);

   if (error != QNN_SUCCESS) {
     QNN_EXECUTORCH_LOG_ERROR(
         "qnn_graph_execute failed. Error %d", QNN_GET_ERROR_CODE(error));
     return Error::Internal;
   }
   if (IsTensorDump()) {
     // TODO: Need to handle the graph which is partitioned.
     // Maybe we could use graph name.
     for (std::size_t out_idx = 0; out_idx < output_tensor_structs.size();
          ++out_idx) {
       const Qnn_Tensor_t& output_tensor = output_tensor_structs[out_idx];
       std::vector<executorch::aten::SizesType> sizes(
           QNN_VER_PTR(output_tensor)->dimensions,
           QNN_VER_PTR(output_tensor)->dimensions +
               QNN_VER_PTR(output_tensor)->rank);

       auto dump_tensor = executorch::extension::from_blob(
           QNN_VER_PTR(output_tensor)->clientBuf.data,
           sizes,
           qnn_dtype_to_scalar_type_[QNN_VER_PTR(output_tensor)->dataType]);

       executorch::runtime::event_tracer_log_output_delegate<
           executorch::aten::Tensor>(
           event_tracer,
           QNN_VER_PTR(output_tensor)->name,
           /*delegate_debug_id=*/
           static_cast<executorch::runtime::DebugHandle>(-1),
           *dump_tensor);
     }
   }

   return Error::Ok;
 }

 Error QnnManager::ProfileExecuteData(
     const std::string& graph_name,
     executorch::runtime::EventTracer* event_tracer) {
   Qnn_ErrorHandle_t error = QNN_SUCCESS;
   if (options_->profile_level() != QnnExecuTorchProfileLevel::kProfileOff) {
     error = backend_params_ptr_->qnn_graph_ptr_->ProfileExecuteData(
         graph_name, event_tracer);
     if (error != QNN_SUCCESS) {
       QNN_EXECUTORCH_LOG_ERROR(
           " Failed to profile. Error %d", QNN_GET_ERROR_CODE(error));
       return Error::Internal;
     }
   }
   return Error::Ok;
 }

 void QnnManager::Destroy() {
   QNN_EXECUTORCH_LOG_INFO("Destroy Qnn backend parameters");
   backend_params_ptr_.reset(new BackendConfigParameters());
   logger_.reset();

   qnn_loaded_backend_.TerminateAllBackends();
 }

 bool QnnManager::IsNodeSupportedByBackend(
     std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
   Qnn_ErrorHandle_t error = QNN_SUCCESS;

   for (std::shared_ptr<OpWrapper>& op_wrapper : op_wrappers) {
     for (const auto& param : op_wrapper->GetParams()) {
       // unused?
       // auto* p_tensor_param = dynamic_cast<TensorParamWrapper*>(param.get());
       if (param->PopulateQnnParam() != Error::Ok) {
         QNN_EXECUTORCH_LOG_WARN(
             "Qnn Backend op validation failed "
             "with PopulateQnnParam: %d",
             QNN_GET_ERROR_CODE(error));
         return false;
       }
     }

     error = backend_params_ptr_->qnn_backend_ptr_->BackendValidateOpConfig(
         op_wrapper->GetOpConfig());
     if (error != QNN_SUCCESS) {
       QNN_EXECUTORCH_LOG_WARN(
           "Qnn Backend op validation failed with error: %d",
           QNN_GET_ERROR_CODE(error));

       return false;
     }
   }
   return true;
 }

 Error QnnManager::GetContextBinary(
     QnnExecuTorchContextBinary& qnn_executorch_context_binary) {
   ET_CHECK_OR_RETURN_ERROR(
       backend_params_ptr_->qnn_context_ptr_->GetContextBinary(
           qnn_executorch_context_binary) == Error::Ok,
       Internal,
       "Fail to get context binary.");

   return Error::Ok;
 }

 Error QnnManager::CompileQcir() {
   flatbuffers::Verifier verifier_binary_info(
       static_cast<const uint8_t* const>(qnn_context_blob_.buffer),
       qnn_context_blob_.nbytes);
   if (!qnn_delegate::VerifyBinaryInfoBuffer(verifier_binary_info)) {
     QNN_EXECUTORCH_LOG_ERROR("Fail to verify binary info");
     return Error::Internal;
   }

   auto binary_info = qnn_delegate::GetBinaryInfo(qnn_context_blob_.buffer);
   flatbuffers::Verifier verifier_qcir(
       binary_info->data()->data(), binary_info->data()->size());
   if (!qcir::VerifyContextBuffer(verifier_qcir)) {
     QNN_EXECUTORCH_LOG_ERROR("Fail to verify qcir format");
     return Error::Internal;
   }

   auto context = qcir::GetContext(binary_info->data()->data());
   for (const auto& graph : *context->graphs()) {
     // qcir tensors to TensorWrapper
     std::vector<std::shared_ptr<TensorWrapper>> graph_inputs, graph_outputs,
         tensors;
     for (const auto& tensor : *graph->tensors()) {
       tensors.emplace_back(CreateTensorWrapper(ToTensor(tensor)));
       if (tensor->type() == qcir::TensorType::WRITE) {
         graph_inputs.push_back(tensors.back());
       } else if (tensor->type() == qcir::TensorType::READ) {
         graph_outputs.push_back(tensors.back());
       }
     }
     std::vector<std::shared_ptr<OpWrapper>> op_wrappers;
     // qcir graph node to OpWrapper
     for (const auto& node : *graph->nodes()) {
       std::shared_ptr<OpWrapper> op = std::make_shared<OpWrapper>(
           node->name()->str(),
           node->package_name()->str(),
           node->type_name()->str());

       // qcir input tensors to OpWrapper input tensors
       std::vector<std::shared_ptr<TensorWrapper>> inputs;
       for (uint32_t index : *node->inputs()) {
         inputs.push_back(tensors[index]);
       }
       op->AddInputTensors(inputs);

       // qcir output tensors to OpWrapper output tensors
       std::vector<std::shared_ptr<TensorWrapper>> outputs;
       for (uint32_t index : *node->outputs()) {
         outputs.push_back(tensors[index]);
       }
       op->AddOutputTensors(outputs);

       // qcir operator param to OpWrapper param
       for (uint32_t index : *node->params()) {
         const auto& tensor = graph->tensors()->Get(index);
         std::string name = tensor->name()->str();
         Qnn_DataType_t dtype = ToDataType(tensor->dtype());
         if (tensor->shape()->size() != 0) {
           // add tensor param
           op->AddTensorParam(
               name,
               dtype,
               tensor->shape()->size(),
               tensor->shape()->data(),
               tensor->data()->data());
         } else {
           // add scalar param
           switch (dtype) {
             case Qnn_DataType_t::QNN_DATATYPE_INT_32:
               op->AddScalarParam(
                   name,
                   dtype,
                   *reinterpret_cast<const int32_t*>(tensor->data()->Data()));
               break;
             case Qnn_DataType_t::QNN_DATATYPE_INT_16:
               op->AddScalarParam(
                   name,
                   dtype,
                   *reinterpret_cast<const int16_t*>(tensor->data()->Data()));
               break;
             case Qnn_DataType_t::QNN_DATATYPE_INT_8:
               op->AddScalarParam(
                   name, dtype, static_cast<int8_t>(*tensor->data()->Data()));
               break;
             case Qnn_DataType_t::QNN_DATATYPE_UINT_32:
               op->AddScalarParam(
                   name,
                   dtype,
                   *reinterpret_cast<const uint32_t*>(tensor->data()->Data()));
               break;
             case Qnn_DataType_t::QNN_DATATYPE_UINT_16:
               op->AddScalarParam(
                   name,
                   dtype,
                   *reinterpret_cast<const uint16_t*>(tensor->data()->Data()));
               break;
             case Qnn_DataType_t::QNN_DATATYPE_UINT_8:
               op->AddScalarParam(name, dtype, *tensor->data()->Data());
               break;
             case Qnn_DataType_t::QNN_DATATYPE_FLOAT_32:
             case Qnn_DataType_t::QNN_DATATYPE_FLOAT_16:
               op->AddScalarParam(
                   name,
                   dtype,
                   *reinterpret_cast<const float*>(tensor->data()->Data()));
               break;
             case Qnn_DataType_t::QNN_DATATYPE_BOOL_8:
               op->AddScalarParam(name, dtype, *tensor->data()->Data());
               break;
             default:
               QNN_EXECUTORCH_LOG_ERROR(
                   "Invalid scalar type: %s", tensor->name()->c_str());
               break;
           }
         }
       }
       op_wrappers.push_back(std::move(op));
     }

     ET_CHECK_OR_RETURN_ERROR(
         Compile(graph->name()->str(), op_wrappers) == Error::Ok,
         Internal,
         "Fail to compile graph from qcir with graph_name: %s",
         graph->name()->str().c_str());

     ET_CHECK_OR_RETURN_ERROR(
         AllocateTensor(graph->name()->str(), graph_inputs, graph_outputs) ==
             Error::Ok,
         Internal,
         "Fail to allocate tensor for qcir with graph_name: %s",
         graph->name()->str().c_str());
   }

   return Error::Ok;
 }

 Error QnnManager::Compile(
     const std::string& graph_name,
     std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
   Qnn_ErrorHandle_t error = QNN_SUCCESS;

   for (std::shared_ptr<OpWrapper>& op_wrapper : op_wrappers) {
     for (const auto& tensor_wrapper : op_wrapper->GetInputTensors()) {
       ET_CHECK_OR_RETURN_ERROR(
           backend_params_ptr_->qnn_graph_ptr_->EnsureTensorInQnnGraph(
               graph_name, tensor_wrapper) == Error::Ok,
           Internal,
           "Tensor name %s isn't added to Qnn Graph",
           tensor_wrapper->GetName().c_str());
     }

     for (const auto& tensor_wrapper : op_wrapper->GetOutputTensors()) {
       ET_CHECK_OR_RETURN_ERROR(
           backend_params_ptr_->qnn_graph_ptr_->EnsureTensorInQnnGraph(
               graph_name, tensor_wrapper) == Error::Ok,
           Internal,
           "Tensor name %s isn't added to Qnn Graph",
           tensor_wrapper->GetName().c_str());
     }

     for (const auto& param : op_wrapper->GetParams()) {
       auto* p_tensor_param = dynamic_cast<TensorParamWrapper*>(param.get());
       if (p_tensor_param != nullptr) {
         ET_CHECK_OR_RETURN_ERROR(
             backend_params_ptr_->qnn_graph_ptr_->EnsureTensorInQnnGraph(
                 graph_name, p_tensor_param->GetTensorWrapper()) == Error::Ok,
             Internal,
             "Param tensor name %s isn't added to Qnn Graph",
             p_tensor_param->GetName().c_str());
       }
       ET_CHECK_OR_RETURN_ERROR(
           param->PopulateQnnParam() == Error::Ok,
           Internal,
           "Fail to configure Qnn backend");
     }

     error = backend_params_ptr_->qnn_graph_ptr_->GraphAddNode(
         graph_name, op_wrapper->GetOpConfig());
     if (error != QNN_SUCCESS) {
       QNN_EXECUTORCH_LOG_ERROR(
           "Failed to add node to Qnn Graph with error: %d",
           QNN_GET_ERROR_CODE(error));
       return Error::Internal;
     }
   }

   error = backend_params_ptr_->qnn_graph_ptr_->GraphFinalize(graph_name);
   if (error != QNN_SUCCESS) {
     QNN_EXECUTORCH_LOG_ERROR(
         "Failed to finalize Qnn Graph with error: %d",
         QNN_GET_ERROR_CODE(error));
     return Error::Internal;
   }

   return Error::Ok;
 }

 std::string QnnManager::GetBinarySignature() {
   flatbuffers::Verifier verifier(
       static_cast<const uint8_t* const>(qnn_context_blob_.buffer),
       qnn_context_blob_.nbytes);
   return VerifyBinaryInfoBuffer(verifier)
       ? GetBinaryInfo(qnn_context_blob_.buffer)->signature()->str()
       : "";
 }

 } // namespace qnn
 } // namespace backends
 } // namespace executorch
 void* QnnExecuTorchAllocCustomMem(size_t bytes, size_t alignment) {
   void* buffer_ptr =
       executorch::backends::qnn::SharedBuffer::GetSharedBufferManager()
           .AllocMem(bytes, alignment);
   return buffer_ptr;
 }

 void QnnExecuTorchFreeCustomMem(void* buffer_ptr) {
   executorch::backends::qnn::SharedBuffer::GetSharedBufferManager().FreeMem(
       buffer_ptr);
 }

 void QnnExecuTorchAddCustomMemTensorAddr(void* tensor_addr, void* custom_mem) {
   executorch::backends::qnn::SharedBuffer::GetSharedBufferManager()
       .AddCusomMemTensorAddr(tensor_addr, custom_mem);
 }

 void QnnExecuTorchAddCustomMemTensorInfo(const CustomMemTensorInfo& info) {
   executorch::backends::qnn::SharedBuffer::GetSharedBufferManager()
       .AddCusomMemTensorInfo(info);
 }