blob: 570cc5dca595b24c67fc9285aed9a1c6975f2deb [file]
/*
* Copyright (c) 2024 MediaTek Inc.
*
* Licensed under the BSD License (the "License"); you may not use this file
* except in compliance with the License. See the license file in the root
* directory of this source tree for more details.
*/
#pragma once
#include "NeuronBufferAllocator.h"
#include "NeuronExecutor.h"
#include "NeuronLog.h"
#include "NeuronPayloadHeader.h"
#include "api/APUWareUtilsLib.h"
#include "api/NeuronAdapter.h"
#include <executorch/runtime/backend/interface.h>
#include <executorch/runtime/core/error.h>
#include <executorch/runtime/core/evalue.h>
#include <memory>
#include <unordered_map>
#include <unordered_set>
namespace executorch {
namespace backends {
namespace neuron {
class NeuronBackend final : public ::executorch::runtime::BackendInterface {
public:
::executorch::runtime::Result<::executorch::runtime::DelegateHandle*> init(
::executorch::runtime::BackendInitContext& context,
::executorch::runtime::FreeableBuffer* processed,
::executorch::runtime::ArrayRef<::executorch::runtime::CompileSpec>
compile_specs) const override;
::executorch::runtime::Error execute(
ET_UNUSED ::executorch::runtime::BackendExecutionContext& context,
::executorch::runtime::DelegateHandle* handle,
::executorch::runtime::EValue** args) const override;
void destroy(::executorch::runtime::DelegateHandle* handle) const override;
bool is_available() const override;
};
extern const char kHighAddrKey[];
extern const char kImportForeverKey[];
struct NeuronDelegateSetting {
bool mHighAddr = false;
bool mImportForever = false;
std::string ToRuntimeOption() {
if (mHighAddr && mImportForever) {
return "--apusys-config \"{ \\\"high_addr\\\": true, \\\"import_forever\\\": true }\"";
} else if (mHighAddr) {
return "--apusys-config \"{ \\\"high_addr\\\": true }\"";
} else if (mImportForever) {
return "--apusys-config \"{ \\\"import_forever\\\": true }\"";
} else {
return "";
}
}
};
class NeuronExecuTorchDelegate {
public:
class MemoryCache {
public:
template <bool isInput>
bool IsCached(int i, void* ptr) {
const auto& cache = isInput ? mInputCache : mOutputCache;
auto it = cache.find(i);
return (it != cache.end()) && (ptr == it->second);
}
template <bool isInput>
void UpdateCache(int i, void* ptr) {
(isInput ? mInputCache[i] : mOutputCache[i]) = ptr;
return;
}
private:
std::unordered_map<int, void*> mInputCache;
std::unordered_map<int, void*> mOutputCache;
};
NeuronExecuTorchDelegate() {}
~NeuronExecuTorchDelegate() {
mPLock->Stop();
}
int LoadCompiledNetwork(
NeuronPayload payload,
NeuronDelegateSetting options) {
mSettings = options;
auto runtimeOption = mSettings.ToRuntimeOption();
auto res = mExecutor.LoadFromCompiledNetwork(
payload.CompiledNetwork,
payload.Header.DataLen,
payload.Header.InputCount,
payload.Header.OutputCount,
runtimeOption);
CHECK_NO_ERROR(res);
CHECK_TRUE(mExecutor.IsValid());
SummaryIoCounts();
mPLock = std::unique_ptr<ScopePerformancer>(new ScopePerformancer);
return NEURON_NO_ERROR;
}
::executorch::runtime::Error execute(
ET_UNUSED ::executorch::runtime::BackendExecutionContext& context,
::executorch::runtime::EValue** args) const;
private:
template <bool isInput>
bool IsCached(int index, void* ptr) const {
return mCache.IsCached</*isInput=*/isInput>(index, ptr);
}
template <bool isInput>
void UpdateCache(int index, void* ptr) const {
mCache.UpdateCache<isInput>(index, ptr);
}
int SummaryIoCounts() {
for (int i = 0;; i++) {
size_t size = mExecutor.GetInputOutputPaddedSize</*isInput*/ true>(i);
if (size == 0) {
break;
}
LogInfo("NeuronBackend", "Model input:%d size: %lu", i, size);
mInputSizes.push_back(size);
}
for (int o = 0;; o++) {
size_t size = mExecutor.GetInputOutputPaddedSize</*isInput*/ false>(o);
if (size == 0) {
break;
}
LogInfo("NeuronBackend", "Model output:%d size: %lu", o, size);
mOutputSizes.push_back(size);
}
return NEURON_NO_ERROR;
}
int HintNeuronBackend(::executorch::runtime::EValue** args) const;
private:
std::vector<size_t> mInputSizes;
std::vector<size_t> mOutputSizes;
mutable MemoryCache mCache;
std::unique_ptr<ScopePerformancer> mPLock;
neuron::NeuronExecutor mExecutor;
NeuronDelegateSetting mSettings;
mutable std::unordered_set<const void*> mHasImported;
private:
NeuronExecuTorchDelegate(const NeuronExecuTorchDelegate&);
NeuronExecuTorchDelegate operator=(const NeuronExecuTorchDelegate&);
};
} // namespace neuron
} // namespace backends
} // namespace executorch