blob: 342534474d10bc0499abfee6e796e5a3c35fb901 [file] [log] [blame] [edit]
/*
* Copyright (c) 2024 MediaTek Inc.
*
* Licensed under the BSD License (the "License"); you may not use this file
* except in compliance with the License. See the license file in the root
* directory of this source tree for more details.
*/
#include "NeuronBackend.h"
#include "NeuronBufferAllocator.h"
#include "NeuronLog.h"
#include "NeuronPayloadHeader.h"
#include "api/NeuronAdapter.h"
#include "executorch/runtime/core/error.h"
#include <algorithm>
#include <memory>
#include <new>
#include <unordered_set>
namespace executorch {
namespace backends {
namespace neuron {
using executorch::runtime::ArrayRef;
using executorch::runtime::BackendExecutionContext;
using executorch::runtime::BackendInitContext;
using executorch::runtime::CompileSpec;
using executorch::runtime::DelegateHandle;
using executorch::runtime::Error;
using executorch::runtime::EValue;
using executorch::runtime::FreeableBuffer;
using executorch::runtime::MemoryAllocator;
using executorch::runtime::Result;
const char kHighAddrKey[] = "HighAddr";
const char kImportForeverKey[] = "ImportForever";
Result<DelegateHandle*> NeuronBackend::init(
BackendInitContext& context,
FreeableBuffer* processed,
ArrayRef<CompileSpec> compile_specs) const {
NeuronDelegateSetting setting;
for (auto& compile_spec : compile_specs) {
if (std::strcmp(compile_spec.key, kHighAddrKey) == 0) {
setting.mHighAddr = *static_cast<char*>(compile_spec.value.buffer);
LogInfo("NeuronBackend", "IsHighAddr Enable : %d", setting.mHighAddr);
} else if (std::strcmp(compile_spec.key, kImportForeverKey) == 0) {
setting.mImportForever = *static_cast<char*>(compile_spec.value.buffer);
LogInfo(
"NeuronBackend",
"IsImportForever Enable : %d",
setting.mImportForever);
} else {
LogWarn("NeuronBackend", "unknown compile spec: %s", compile_spec.key);
}
}
auto Payload = NeuronPayload(processed->data(), processed->size());
LogInfo(
"NeuronBackend",
"version %u, input %u, output %u, length %u, payload size: %zu",
Payload.Header.Version,
Payload.Header.InputCount,
Payload.Header.OutputCount,
Payload.Header.DataLen,
processed->size());
MemoryAllocator* runtime_allocator = context.get_runtime_allocator();
NeuronExecuTorchDelegate* delegate = ET_ALLOCATE_INSTANCE_OR_RETURN_ERROR(
runtime_allocator, NeuronExecuTorchDelegate);
new (delegate) NeuronExecuTorchDelegate();
if (delegate == nullptr) {
return nullptr;
}
auto res = delegate->LoadCompiledNetwork(Payload, setting);
return res == NEURON_NO_ERROR ? delegate : nullptr;
}
Error NeuronBackend::execute(
ET_UNUSED BackendExecutionContext& context,
DelegateHandle* handle,
EValue** args) const {
NeuronExecuTorchDelegate* delegate =
reinterpret_cast<NeuronExecuTorchDelegate*>(handle);
return delegate->execute(context, args);
}
void NeuronBackend::destroy(DelegateHandle* handle) const {
if (handle != nullptr) {
NeuronExecuTorchDelegate* delegate =
reinterpret_cast<NeuronExecuTorchDelegate*>(handle);
delegate->~NeuronExecuTorchDelegate();
}
}
bool NeuronBackend::is_available() const {
return true;
}
Error NeuronExecuTorchDelegate::execute(
BackendExecutionContext& context,
EValue** args) const {
if (HintNeuronBackend(args) != NEURON_NO_ERROR) {
return Error::InvalidState;
};
auto allocator = dynamic_cast<torch::executor::neuron::BufferAllocator*>(
context.get_temp_allocator());
size_t inputCount = mInputSizes.size(), outputCount = mOutputSizes.size();
for (int i = 0; i < inputCount; i++) {
auto data_ptr = args[i]->toTensor().data_ptr();
auto data_size = args[i]->toTensor().nbytes();
if (IsCached</*isInput=*/true>(i, data_ptr)) {
continue;
};
auto unit = allocator != nullptr ? allocator->Find(data_ptr) : nullptr;
if (unit) {
UpdateCache<true>(i, data_ptr);
size_t offset = (char*)data_ptr - (char*)unit->GetAddress();
mExecutor.SetInputOutputFromMemory</*isInput*/ true>(
i, unit->GetNeuronMemory(), offset, data_size);
} else {
mExecutor.SetInputOutput</*isInput=*/true>(i, data_ptr, data_size);
}
}
for (int o = inputCount; o < inputCount + outputCount; o++) {
auto data_ptr = args[o]->toTensor().data_ptr();
auto data_size = args[o]->toTensor().nbytes();
auto output_index = o - inputCount;
if (IsCached</*isInput=*/false>(output_index, data_ptr)) {
continue;
};
auto unit = allocator != nullptr ? allocator->Find(data_ptr) : nullptr;
if (unit) {
UpdateCache</*isInput=*/false>(output_index, data_ptr);
size_t offset = (char*)data_ptr - (char*)unit->GetAddress();
mExecutor.SetInputOutputFromMemory</*isInput*/ false>(
output_index, unit->GetNeuronMemory(), offset, data_size);
} else {
mExecutor.SetInputOutput</*isInput=*/false>(
output_index, data_ptr, data_size);
}
}
return mExecutor.Compute() == NEURON_NO_ERROR ? Error::Ok
: Error::InvalidState;
};
int NeuronExecuTorchDelegate::HintNeuronBackend(EValue** args) const {
auto HintImportForever = [this](EValue** args) -> int {
auto& allocator = GET_NEURON_ALLOCATOR;
size_t inputCount = mInputSizes.size(), outputCount = mOutputSizes.size();
for (int i = 0; i < inputCount; i++) {
auto data_ptr = args[i]->toTensor().data_ptr();
if (mHasImported.count(data_ptr)) {
continue;
}
auto unit = allocator.Find(data_ptr);
if (unit) {
mExecutor.SetInputOutputFromMemory</*isInput*/ true>(
i, unit->GetNeuronMemory(), 0, unit->GetSize());
mHasImported.insert(data_ptr);
}
}
for (int o = inputCount; o < inputCount + outputCount; o++) {
auto data_ptr = args[o]->toTensor().data_ptr();
if (mHasImported.count(data_ptr)) {
continue;
}
auto output_index = o - inputCount;
auto unit = allocator.Find(data_ptr);
if (unit) {
mExecutor.SetInputOutputFromMemory</*isInput*/ false>(
output_index, unit->GetNeuronMemory(), 0, unit->GetSize());
mHasImported.insert(data_ptr);
}
}
return NEURON_NO_ERROR;
};
if (mSettings.mImportForever) {
CHECK_NO_ERROR(HintImportForever(args));
}
return NEURON_NO_ERROR;
}
} // namespace neuron
} // namespace backends
} // namespace executorch
namespace {
auto cls = executorch::backends::neuron::NeuronBackend();
executorch::runtime::Backend backend{"NeuropilotBackend", &cls};
static auto success_with_compiler =
executorch::runtime::register_backend(backend);
} // namespace