backends/mediatek/runtime/NeuronBackend.cpp - platform/external/executorch - Git at Google

 /*
  * Copyright (c) 2024 MediaTek Inc.
  *
  * Licensed under the BSD License (the "License"); you may not use this file
  * except in compliance with the License. See the license file in the root
  * directory of this source tree for more details.
  */

 #include "NeuronBackend.h"
 #include "NeuronBufferAllocator.h"
 #include "NeuronLog.h"
 #include "NeuronPayloadHeader.h"
 #include "api/NeuronAdapter.h"

 #include "executorch/runtime/core/error.h"

 #include <algorithm>
 #include <memory>
 #include <new>
 #include <unordered_set>

 namespace executorch {
 namespace backends {
 namespace neuron {

 using executorch::runtime::ArrayRef;
 using executorch::runtime::BackendExecutionContext;
 using executorch::runtime::BackendInitContext;
 using executorch::runtime::CompileSpec;
 using executorch::runtime::DelegateHandle;
 using executorch::runtime::Error;
 using executorch::runtime::EValue;
 using executorch::runtime::FreeableBuffer;
 using executorch::runtime::MemoryAllocator;
 using executorch::runtime::Result;

 const char kHighAddrKey[] = "HighAddr";
 const char kImportForeverKey[] = "ImportForever";

 Result<DelegateHandle*> NeuronBackend::init(
     BackendInitContext& context,
     FreeableBuffer* processed,
     ArrayRef<CompileSpec> compile_specs) const {
   NeuronDelegateSetting setting;
   for (auto& compile_spec : compile_specs) {
     if (std::strcmp(compile_spec.key, kHighAddrKey) == 0) {
       setting.mHighAddr = *static_cast<char*>(compile_spec.value.buffer);
       LogInfo("NeuronBackend", "IsHighAddr Enable : %d", setting.mHighAddr);
     } else if (std::strcmp(compile_spec.key, kImportForeverKey) == 0) {
       setting.mImportForever = *static_cast<char*>(compile_spec.value.buffer);
       LogInfo(
           "NeuronBackend",
           "IsImportForever Enable : %d",
           setting.mImportForever);
     } else {
       LogWarn("NeuronBackend", "unknown compile spec: %s", compile_spec.key);
     }
   }
   auto Payload = NeuronPayload(processed->data(), processed->size());
   LogInfo(
       "NeuronBackend",
       "version %u, input %u, output %u, length %u, payload size: %zu",
       Payload.Header.Version,
       Payload.Header.InputCount,
       Payload.Header.OutputCount,
       Payload.Header.DataLen,
       processed->size());

   MemoryAllocator* runtime_allocator = context.get_runtime_allocator();
   NeuronExecuTorchDelegate* delegate = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(
       runtime_allocator, NeuronExecuTorchDelegate);
   new (delegate) NeuronExecuTorchDelegate();

   if (delegate == nullptr) {
     return nullptr;
   }
   auto res = delegate->LoadCompiledNetwork(Payload, setting);
   return res == NEURON_NO_ERROR ? delegate : nullptr;
 }

 Error NeuronBackend::execute(
     ET_UNUSED BackendExecutionContext& context,
     DelegateHandle* handle,
     EValue** args) const {
   NeuronExecuTorchDelegate* delegate =
       reinterpret_cast<NeuronExecuTorchDelegate*>(handle);
   return delegate->execute(context, args);
 }

 void NeuronBackend::destroy(DelegateHandle* handle) const {
   if (handle != nullptr) {
     NeuronExecuTorchDelegate* delegate =
         reinterpret_cast<NeuronExecuTorchDelegate*>(handle);
     delegate->~NeuronExecuTorchDelegate();
   }
 }

 bool NeuronBackend::is_available() const {
   return true;
 }

 Error NeuronExecuTorchDelegate::execute(
     BackendExecutionContext& context,
     EValue** args) const {
   if (HintNeuronBackend(args) != NEURON_NO_ERROR) {
     return Error::InvalidState;
   };

   auto allocator = dynamic_cast<torch::executor::neuron::BufferAllocator*>(
       context.get_temp_allocator());
   size_t inputCount = mInputSizes.size(), outputCount = mOutputSizes.size();

   for (int i = 0; i < inputCount; i++) {
     auto data_ptr = args[i]->toTensor().data_ptr();
     auto data_size = args[i]->toTensor().nbytes();
     if (IsCached</*isInput=*/true>(i, data_ptr)) {
       continue;
     };
     auto unit = allocator != nullptr ? allocator->Find(data_ptr) : nullptr;
     if (unit) {
       UpdateCache<true>(i, data_ptr);
       size_t offset = (char*)data_ptr - (char*)unit->GetAddress();
       mExecutor.SetInputOutputFromMemory</*isInput*/ true>(
           i, unit->GetNeuronMemory(), offset, data_size);
     } else {
       mExecutor.SetInputOutput</*isInput=*/true>(i, data_ptr, data_size);
     }
   }

   for (int o = inputCount; o < inputCount + outputCount; o++) {
     auto data_ptr = args[o]->toTensor().data_ptr();
     auto data_size = args[o]->toTensor().nbytes();
     auto output_index = o - inputCount;
     if (IsCached</*isInput=*/false>(output_index, data_ptr)) {
       continue;
     };
     auto unit = allocator != nullptr ? allocator->Find(data_ptr) : nullptr;
     if (unit) {
       UpdateCache</*isInput=*/false>(output_index, data_ptr);
       size_t offset = (char*)data_ptr - (char*)unit->GetAddress();
       mExecutor.SetInputOutputFromMemory</*isInput*/ false>(
           output_index, unit->GetNeuronMemory(), offset, data_size);
     } else {
       mExecutor.SetInputOutput</*isInput=*/false>(
           output_index, data_ptr, data_size);
     }
   }

   return mExecutor.Compute() == NEURON_NO_ERROR ? Error::Ok
                                                 : Error::InvalidState;
 };

 int NeuronExecuTorchDelegate::HintNeuronBackend(EValue** args) const {
   auto HintImportForever = [this](EValue** args) -> int {
     auto& allocator = GET_NEURON_ALLOCATOR;
     size_t inputCount = mInputSizes.size(), outputCount = mOutputSizes.size();
     for (int i = 0; i < inputCount; i++) {
       auto data_ptr = args[i]->toTensor().data_ptr();
       if (mHasImported.count(data_ptr)) {
         continue;
       }
       auto unit = allocator.Find(data_ptr);
       if (unit) {
         mExecutor.SetInputOutputFromMemory</*isInput*/ true>(
             i, unit->GetNeuronMemory(), 0, unit->GetSize());
         mHasImported.insert(data_ptr);
       }
     }
     for (int o = inputCount; o < inputCount + outputCount; o++) {
       auto data_ptr = args[o]->toTensor().data_ptr();
       if (mHasImported.count(data_ptr)) {
         continue;
       }
       auto output_index = o - inputCount;
       auto unit = allocator.Find(data_ptr);
       if (unit) {
         mExecutor.SetInputOutputFromMemory</*isInput*/ false>(
             output_index, unit->GetNeuronMemory(), 0, unit->GetSize());
         mHasImported.insert(data_ptr);
       }
     }
     return NEURON_NO_ERROR;
   };
   if (mSettings.mImportForever) {
     CHECK_NO_ERROR(HintImportForever(args));
   }
   return NEURON_NO_ERROR;
 }

 } // namespace neuron
 } // namespace backends
 } // namespace executorch

 namespace {
 auto cls = executorch::backends::neuron::NeuronBackend();
 executorch::runtime::Backend backend{"NeuropilotBackend", &cls};
 static auto success_with_compiler =
     executorch::runtime::register_backend(backend);
 } // namespace
	/*
	* Copyright (c) 2024 MediaTek Inc.
	*
	* Licensed under the BSD License (the "License"); you may not use this file
	* except in compliance with the License. See the license file in the root
	* directory of this source tree for more details.
	*/

	#include "NeuronBackend.h"
	#include "NeuronBufferAllocator.h"
	#include "NeuronLog.h"
	#include "NeuronPayloadHeader.h"
	#include "api/NeuronAdapter.h"

	#include "executorch/runtime/core/error.h"

	#include <algorithm>
	#include <memory>
	#include <new>
	#include <unordered_set>

	namespace executorch {
	namespace backends {
	namespace neuron {

	using executorch::runtime::ArrayRef;
	using executorch::runtime::BackendExecutionContext;
	using executorch::runtime::BackendInitContext;
	using executorch::runtime::CompileSpec;
	using executorch::runtime::DelegateHandle;
	using executorch::runtime::Error;
	using executorch::runtime::EValue;
	using executorch::runtime::FreeableBuffer;
	using executorch::runtime::MemoryAllocator;
	using executorch::runtime::Result;

	const char kHighAddrKey[] = "HighAddr";
	const char kImportForeverKey[] = "ImportForever";

	Result<DelegateHandle*> NeuronBackend::init(
	BackendInitContext& context,
	FreeableBuffer* processed,
	ArrayRef<CompileSpec> compile_specs) const {
	NeuronDelegateSetting setting;
	for (auto& compile_spec : compile_specs) {
	if (std::strcmp(compile_spec.key, kHighAddrKey) == 0) {
	setting.mHighAddr = static_cast<char>(compile_spec.value.buffer);
	LogInfo("NeuronBackend", "IsHighAddr Enable : %d", setting.mHighAddr);
	} else if (std::strcmp(compile_spec.key, kImportForeverKey) == 0) {
	setting.mImportForever = static_cast<char>(compile_spec.value.buffer);
	LogInfo(
	"NeuronBackend",
	"IsImportForever Enable : %d",
	setting.mImportForever);
	} else {
	LogWarn("NeuronBackend", "unknown compile spec: %s", compile_spec.key);
	}
	}
	auto Payload = NeuronPayload(processed->data(), processed->size());
	LogInfo(
	"NeuronBackend",
	"version %u, input %u, output %u, length %u, payload size: %zu",
	Payload.Header.Version,
	Payload.Header.InputCount,
	Payload.Header.OutputCount,
	Payload.Header.DataLen,
	processed->size());

	MemoryAllocator* runtime_allocator = context.get_runtime_allocator();
	NeuronExecuTorchDelegate* delegate = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(
	runtime_allocator, NeuronExecuTorchDelegate);
	new (delegate) NeuronExecuTorchDelegate();

	if (delegate == nullptr) {
	return nullptr;
	}
	auto res = delegate->LoadCompiledNetwork(Payload, setting);
	return res == NEURON_NO_ERROR ? delegate : nullptr;
	}

	Error NeuronBackend::execute(
	ET_UNUSED BackendExecutionContext& context,
	DelegateHandle* handle,
	EValue** args) const {
	NeuronExecuTorchDelegate* delegate =
	reinterpret_cast<NeuronExecuTorchDelegate*>(handle);
	return delegate->execute(context, args);
	}

	void NeuronBackend::destroy(DelegateHandle* handle) const {
	if (handle != nullptr) {
	NeuronExecuTorchDelegate* delegate =
	reinterpret_cast<NeuronExecuTorchDelegate*>(handle);
	delegate->~NeuronExecuTorchDelegate();
	}
	}

	bool NeuronBackend::is_available() const {
	return true;
	}

	Error NeuronExecuTorchDelegate::execute(
	BackendExecutionContext& context,
	EValue** args) const {
	if (HintNeuronBackend(args) != NEURON_NO_ERROR) {
	return Error::InvalidState;
	};

	auto allocator = dynamic_cast<torch::executor::neuron::BufferAllocator*>(
	context.get_temp_allocator());
	size_t inputCount = mInputSizes.size(), outputCount = mOutputSizes.size();

	for (int i = 0; i < inputCount; i++) {
	auto data_ptr = args[i]->toTensor().data_ptr();
	auto data_size = args[i]->toTensor().nbytes();
	if (IsCached</isInput=/true>(i, data_ptr)) {
	continue;
	};
	auto unit = allocator != nullptr ? allocator->Find(data_ptr) : nullptr;
	if (unit) {
	UpdateCache<true>(i, data_ptr);
	size_t offset = (char)data_ptr - (char)unit->GetAddress();
	mExecutor.SetInputOutputFromMemory</isInput/ true>(
	i, unit->GetNeuronMemory(), offset, data_size);
	} else {
	mExecutor.SetInputOutput</isInput=/true>(i, data_ptr, data_size);
	}
	}

	for (int o = inputCount; o < inputCount + outputCount; o++) {
	auto data_ptr = args[o]->toTensor().data_ptr();
	auto data_size = args[o]->toTensor().nbytes();
	auto output_index = o - inputCount;
	if (IsCached</isInput=/false>(output_index, data_ptr)) {
	continue;
	};
	auto unit = allocator != nullptr ? allocator->Find(data_ptr) : nullptr;
	if (unit) {
	UpdateCache</isInput=/false>(output_index, data_ptr);
	size_t offset = (char)data_ptr - (char)unit->GetAddress();
	mExecutor.SetInputOutputFromMemory</isInput/ false>(
	output_index, unit->GetNeuronMemory(), offset, data_size);
	} else {
	mExecutor.SetInputOutput</isInput=/false>(
	output_index, data_ptr, data_size);
	}
	}

	return mExecutor.Compute() == NEURON_NO_ERROR ? Error::Ok
	: Error::InvalidState;
	};

	int NeuronExecuTorchDelegate::HintNeuronBackend(EValue** args) const {
	auto HintImportForever = [this](EValue** args) -> int {
	auto& allocator = GET_NEURON_ALLOCATOR;
	size_t inputCount = mInputSizes.size(), outputCount = mOutputSizes.size();
	for (int i = 0; i < inputCount; i++) {
	auto data_ptr = args[i]->toTensor().data_ptr();
	if (mHasImported.count(data_ptr)) {
	continue;
	}
	auto unit = allocator.Find(data_ptr);
	if (unit) {
	mExecutor.SetInputOutputFromMemory</isInput/ true>(
	i, unit->GetNeuronMemory(), 0, unit->GetSize());
	mHasImported.insert(data_ptr);
	}
	}
	for (int o = inputCount; o < inputCount + outputCount; o++) {
	auto data_ptr = args[o]->toTensor().data_ptr();
	if (mHasImported.count(data_ptr)) {
	continue;
	}
	auto output_index = o - inputCount;
	auto unit = allocator.Find(data_ptr);
	if (unit) {
	mExecutor.SetInputOutputFromMemory</isInput/ false>(
	output_index, unit->GetNeuronMemory(), 0, unit->GetSize());
	mHasImported.insert(data_ptr);
	}
	}
	return NEURON_NO_ERROR;
	};
	if (mSettings.mImportForever) {
	CHECK_NO_ERROR(HintImportForever(args));
	}
	return NEURON_NO_ERROR;
	}

	} // namespace neuron
	} // namespace backends
	} // namespace executorch

	namespace {
	auto cls = executorch::backends::neuron::NeuronBackend();
	executorch::runtime::Backend backend{"NeuropilotBackend", &cls};
	static auto success_with_compiler =
	executorch::runtime::register_backend(backend);
	} // namespace