| /* |
| * Copyright (c) Qualcomm Innovation Center, Inc. |
| * All rights reserved. |
| * |
| * This source code is licensed under the BSD-style license found in the |
| * LICENSE file in the root directory of this source tree. |
| */ |
| |
| #include <executorch/backends/qualcomm/aot/ir/qcir_utils.h> |
| #include <executorch/backends/qualcomm/aot/wrappers/TensorWrapper.h> |
| #include <executorch/backends/qualcomm/runtime/QnnExecuTorchBackend.h> |
| #include <executorch/backends/qualcomm/runtime/QnnManager.h> |
| #include <executorch/backends/qualcomm/schema_generated.h> |
| |
| #include <string> |
| namespace torch { |
| namespace executor { |
| // ========== Public method implementations ========================= |
| using namespace qnn; |
| using namespace qnn_delegate; |
| constexpr const char* QNN_COMPILE_SPEC = "qnn_compile_spec"; |
| |
| Result<DelegateHandle*> QnnExecuTorchBackend::init( |
| BackendInitContext& context, |
| FreeableBuffer* processed, |
| ArrayRef<CompileSpec> compile_specs) const { |
| // covert SizedBuffer to qnn ExecuTorch option |
| QnnExecuTorchContextBinary qnn_context_blob; |
| const qnn_delegate::QnnExecuTorchOptions* qnn_executorch_options; |
| |
| qnn_context_blob.buffer = const_cast<void*>(processed->data()); |
| qnn_context_blob.nbytes = processed->size(); |
| |
| // covert CompileSpec to qnn ExecuTorch option |
| for (auto& compile_spec : compile_specs) { |
| if (std::strcmp(compile_spec.key, QNN_COMPILE_SPEC) == 0) |
| qnn_executorch_options = |
| GetQnnExecuTorchOptions(compile_spec.value.buffer); |
| else |
| QNN_EXECUTORCH_LOG_WARN("unknown argument: %s", compile_spec.key); |
| } |
| // Create QnnManager |
| MemoryAllocator* runtime_allocator = context.get_runtime_allocator(); |
| QnnManager* qnn_manager = |
| ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(runtime_allocator, QnnManager); |
| |
| // NOTE: Since we use placement new and since this type is not trivially |
| // destructible, we must call the destructor manually in destroy(). |
| new (qnn_manager) QnnManager(qnn_executorch_options, qnn_context_blob); |
| |
| ET_CHECK_OR_RETURN_ERROR( |
| qnn_manager->Init() == Error::Ok, |
| Internal, |
| "Fail to initialize Qnn Manager"); |
| |
| if (qnn_manager->IsOnlinePrepare()) { |
| auto graph = qcir::GetGraph(qnn_context_blob.buffer); |
| // qcir tensors to TensorWrapper |
| std::vector<std::shared_ptr<TensorWrapper>> tensors, graph_inputs, |
| graph_outputs; |
| for (const auto& tensor : *graph->tensors()) { |
| tensors.emplace_back(CreateTensorWrapper(ToTensor(tensor))); |
| if (tensor->type() == qcir::TensorType::WRITE) { |
| graph_inputs.push_back(tensors.back()); |
| } else if (tensor->type() == qcir::TensorType::READ) { |
| graph_outputs.push_back(tensors.back()); |
| } |
| } |
| |
| std::vector<std::shared_ptr<OpWrapper>> op_wrappers; |
| // qcir graph node to OpWrapper |
| for (const auto& node : *graph->nodes()) { |
| std::shared_ptr<OpWrapper> op = std::make_shared<OpWrapper>( |
| node->name()->str(), |
| node->package_name()->str(), |
| node->type_name()->str()); |
| |
| // qcir input tensors to OpWrapper input tensors |
| std::vector<std::shared_ptr<TensorWrapper>> inputs; |
| for (uint32_t index : *node->inputs()) { |
| inputs.push_back(tensors[index]); |
| } |
| op->AddInputTensors(inputs); |
| |
| // qcir output tensors to OpWrapper output tensors |
| std::vector<std::shared_ptr<TensorWrapper>> outputs; |
| for (uint32_t index : *node->outputs()) { |
| outputs.push_back(tensors[index]); |
| } |
| op->AddOutputTensors(outputs); |
| |
| // qcir operator param to OpWrapper param |
| for (uint32_t index : *node->params()) { |
| const auto& tensor = graph->tensors()->Get(index); |
| std::string name = tensor->name()->str(); |
| Qnn_DataType_t dtype = ToDataType(tensor->dtype()); |
| if (tensor->shape()->size() != 0) { |
| // add tensor param |
| op->AddTensorParam( |
| name, |
| dtype, |
| tensor->shape()->size(), |
| tensor->shape()->data(), |
| tensor->data()->data()); |
| } else { |
| // add scalar param |
| switch (dtype) { |
| case Qnn_DataType_t::QNN_DATATYPE_INT_32: |
| op->AddScalarParam( |
| name, |
| dtype, |
| *reinterpret_cast<const int32_t*>(tensor->data()->Data())); |
| break; |
| case Qnn_DataType_t::QNN_DATATYPE_INT_16: |
| op->AddScalarParam( |
| name, |
| dtype, |
| *reinterpret_cast<const int16_t*>(tensor->data()->Data())); |
| break; |
| case Qnn_DataType_t::QNN_DATATYPE_INT_8: |
| op->AddScalarParam( |
| name, dtype, static_cast<int8_t>(*tensor->data()->Data())); |
| break; |
| case Qnn_DataType_t::QNN_DATATYPE_UINT_32: |
| op->AddScalarParam( |
| name, |
| dtype, |
| *reinterpret_cast<const uint32_t*>(tensor->data()->Data())); |
| break; |
| case Qnn_DataType_t::QNN_DATATYPE_UINT_16: |
| op->AddScalarParam( |
| name, |
| dtype, |
| *reinterpret_cast<const uint16_t*>(tensor->data()->Data())); |
| break; |
| case Qnn_DataType_t::QNN_DATATYPE_UINT_8: |
| op->AddScalarParam(name, dtype, *tensor->data()->Data()); |
| break; |
| case Qnn_DataType_t::QNN_DATATYPE_FLOAT_32: |
| case Qnn_DataType_t::QNN_DATATYPE_FLOAT_16: |
| op->AddScalarParam( |
| name, |
| dtype, |
| *reinterpret_cast<const float*>(tensor->data()->Data())); |
| break; |
| case Qnn_DataType_t::QNN_DATATYPE_BOOL_8: |
| op->AddScalarParam(name, dtype, *tensor->data()->Data()); |
| break; |
| default: |
| QNN_EXECUTORCH_LOG_ERROR( |
| "Invalid scalar type: %s", tensor->name()->c_str()); |
| break; |
| } |
| } |
| } |
| op_wrappers.push_back(std::move(op)); |
| } |
| |
| QnnExecuTorchContextBinary context_binary; |
| ET_CHECK_OR_RETURN_ERROR( |
| qnn_manager->Compile(op_wrappers, context_binary) == Error::Ok, |
| Internal, |
| "Fail to compile graph in online prepare stage"); |
| |
| ET_CHECK_OR_RETURN_ERROR( |
| qnn_manager->AllocateTensor(graph_inputs, graph_outputs) == Error::Ok, |
| Internal, |
| "Fail to allocate tensor in online prepare stage"); |
| } else { |
| ET_CHECK_OR_RETURN_ERROR( |
| qnn_manager->AllocateTensor() == Error::Ok, |
| Internal, |
| "Fail to allocate tensor"); |
| } |
| return qnn_manager; |
| } |
| |
| Error QnnExecuTorchBackend::execute( |
| __ET_UNUSED BackendExecutionContext& context, |
| DelegateHandle* handle, |
| EValue** args) const { |
| QnnManager* qnn_manager = static_cast<QnnManager*>(handle); |
| |
| std::vector<std::shared_ptr<TensorWrapper>> input_tensors = |
| qnn_manager->GetGraphInputs(); |
| std::vector<std::shared_ptr<TensorWrapper>> output_tensors = |
| qnn_manager->GetGraphOutputs(); |
| std::vector<Qnn_Tensor_t> input_tensor_structs; |
| std::vector<Qnn_Tensor_t> output_tensor_structs; |
| |
| for (int i = 0; i < input_tensors.size(); ++i) { |
| input_tensors[i]->FillDataBuffer( |
| args[i]->toTensor().const_data_ptr(), true /* copy_data */); |
| input_tensor_structs.push_back(input_tensors[i]->CloneTensorStruct()); |
| } |
| |
| int output_index = input_tensors.size(); |
| for (const auto& output_tensor : output_tensors) { |
| // pos=0 limits the search to the prefix |
| if (output_tensor->GetName().rfind("output_", 0) == 0) { |
| output_tensor->FillDataBuffer( |
| args[output_index]->toTensor().mutable_data_ptr(), |
| false /* copy_data */); |
| output_index++; |
| } |
| output_tensor_structs.push_back(output_tensor->CloneTensorStruct()); |
| } |
| |
| ET_CHECK_OR_RETURN_ERROR( |
| qnn_manager->Execute(input_tensor_structs, output_tensor_structs) == |
| Error::Ok, |
| Internal, |
| "Fail to execute graph"); |
| return Error::Ok; |
| } |
| |
| void QnnExecuTorchBackend::destroy(DelegateHandle* handle) const { |
| if (handle != nullptr) { |
| QnnManager* qnn_manager = static_cast<QnnManager*>(handle); |
| qnn_manager->Destroy(); |
| } |
| } |
| |
| bool QnnExecuTorchBackend::is_available() const { |
| return true; |
| } |
| |
| namespace { |
| auto cls = QnnExecuTorchBackend(); |
| Backend backend{"QnnBackend", &cls}; |
| static auto success_with_compiler = register_backend(backend); |
| } // namespace |
| } // namespace executor |
| } // namespace torch |