backends/mediatek/runtime/include/NeuronBackend.h - platform/external/executorch - Git at Google

 /*
  * Copyright (c) 2024 MediaTek Inc.
  *
  * Licensed under the BSD License (the "License"); you may not use this file
  * except in compliance with the License. See the license file in the root
  * directory of this source tree for more details.
  */

 #pragma once

 #include "NeuronBufferAllocator.h"
 #include "NeuronExecutor.h"
 #include "NeuronLog.h"
 #include "NeuronPayloadHeader.h"
 #include "api/APUWareUtilsLib.h"
 #include "api/NeuronAdapter.h"

 #include <executorch/runtime/backend/interface.h>
 #include <executorch/runtime/core/error.h>
 #include <executorch/runtime/core/evalue.h>

 #include <memory>
 #include <unordered_map>
 #include <unordered_set>

 namespace executorch {
 namespace backends {
 namespace neuron {

 class NeuronBackend final : public ::executorch::runtime::BackendInterface {
  public:
   ::executorch::runtime::Result<::executorch::runtime::DelegateHandle*> init(
       ::executorch::runtime::BackendInitContext& context,
       ::executorch::runtime::FreeableBuffer* processed,
       ::executorch::runtime::ArrayRef<::executorch::runtime::CompileSpec>
           compile_specs) const override;

   ::executorch::runtime::Error execute(
       ET_UNUSED ::executorch::runtime::BackendExecutionContext& context,
       ::executorch::runtime::DelegateHandle* handle,
       ::executorch::runtime::EValue** args) const override;

   void destroy(::executorch::runtime::DelegateHandle* handle) const override;

   bool is_available() const override;
 };

 extern const char kHighAddrKey[];
 extern const char kImportForeverKey[];

 struct NeuronDelegateSetting {
   bool mHighAddr = false;

   bool mImportForever = false;

   std::string ToRuntimeOption() {
     if (mHighAddr && mImportForever) {
       return "--apusys-config \"{ \\\"high_addr\\\": true, \\\"import_forever\\\": true }\"";
     } else if (mHighAddr) {
       return "--apusys-config \"{ \\\"high_addr\\\": true }\"";
     } else if (mImportForever) {
       return "--apusys-config \"{ \\\"import_forever\\\": true }\"";
     } else {
       return "";
     }
   }
 };

 class NeuronExecuTorchDelegate {
  public:
   class MemoryCache {
    public:
     template <bool isInput>
     bool IsCached(int i, void* ptr) {
       const auto& cache = isInput ? mInputCache : mOutputCache;
       auto it = cache.find(i);
       return (it != cache.end()) && (ptr == it->second);
     }

     template <bool isInput>
     void UpdateCache(int i, void* ptr) {
       (isInput ? mInputCache[i] : mOutputCache[i]) = ptr;
       return;
     }

    private:
     std::unordered_map<int, void*> mInputCache;

     std::unordered_map<int, void*> mOutputCache;
   };

   NeuronExecuTorchDelegate() {}

   ~NeuronExecuTorchDelegate() {
     mPLock->Stop();
   }

   int LoadCompiledNetwork(
       NeuronPayload payload,
       NeuronDelegateSetting options) {
     mSettings = options;
     auto runtimeOption = mSettings.ToRuntimeOption();
     auto res = mExecutor.LoadFromCompiledNetwork(
         payload.CompiledNetwork,
         payload.Header.DataLen,
         payload.Header.InputCount,
         payload.Header.OutputCount,
         runtimeOption);
     CHECK_NO_ERROR(res);
     CHECK_TRUE(mExecutor.IsValid());
     SummaryIoCounts();
     mPLock = std::unique_ptr<ScopePerformancer>(new ScopePerformancer);
     return NEURON_NO_ERROR;
   }

   ::executorch::runtime::Error execute(
       ET_UNUSED ::executorch::runtime::BackendExecutionContext& context,
       ::executorch::runtime::EValue** args) const;

  private:
   template <bool isInput>
   bool IsCached(int index, void* ptr) const {
     return mCache.IsCached</*isInput=*/isInput>(index, ptr);
   }

   template <bool isInput>
   void UpdateCache(int index, void* ptr) const {
     mCache.UpdateCache<isInput>(index, ptr);
   }

   int SummaryIoCounts() {
     for (int i = 0;; i++) {
       size_t size = mExecutor.GetInputOutputPaddedSize</*isInput*/ true>(i);
       if (size == 0) {
         break;
       }
       LogInfo("NeuronBackend", "Model input:%d size: %lu", i, size);
       mInputSizes.push_back(size);
     }
     for (int o = 0;; o++) {
       size_t size = mExecutor.GetInputOutputPaddedSize</*isInput*/ false>(o);
       if (size == 0) {
         break;
       }
       LogInfo("NeuronBackend", "Model output:%d size: %lu", o, size);
       mOutputSizes.push_back(size);
     }
     return NEURON_NO_ERROR;
   }

   int HintNeuronBackend(::executorch::runtime::EValue** args) const;

  private:
   std::vector<size_t> mInputSizes;

   std::vector<size_t> mOutputSizes;

   mutable MemoryCache mCache;

   std::unique_ptr<ScopePerformancer> mPLock;

   neuron::NeuronExecutor mExecutor;

   NeuronDelegateSetting mSettings;

   mutable std::unordered_set<const void*> mHasImported;

  private:
   NeuronExecuTorchDelegate(const NeuronExecuTorchDelegate&);

   NeuronExecuTorchDelegate operator=(const NeuronExecuTorchDelegate&);
 };

 } // namespace neuron
 } // namespace backends
 } // namespace executorch
	/*
	* Copyright (c) 2024 MediaTek Inc.
	*
	* Licensed under the BSD License (the "License"); you may not use this file
	* except in compliance with the License. See the license file in the root
	* directory of this source tree for more details.
	*/

	#pragma once

	#include "NeuronBufferAllocator.h"
	#include "NeuronExecutor.h"
	#include "NeuronLog.h"
	#include "NeuronPayloadHeader.h"
	#include "api/APUWareUtilsLib.h"
	#include "api/NeuronAdapter.h"

	#include <executorch/runtime/backend/interface.h>
	#include <executorch/runtime/core/error.h>
	#include <executorch/runtime/core/evalue.h>

	#include <memory>
	#include <unordered_map>
	#include <unordered_set>

	namespace executorch {
	namespace backends {
	namespace neuron {

	class NeuronBackend final : public ::executorch::runtime::BackendInterface {
	public:
	::executorch::runtime::Result<::executorch::runtime::DelegateHandle*> init(
	::executorch::runtime::BackendInitContext& context,
	::executorch::runtime::FreeableBuffer* processed,
	::executorch::runtime::ArrayRef<::executorch::runtime::CompileSpec>
	compile_specs) const override;

	::executorch::runtime::Error execute(
	ET_UNUSED ::executorch::runtime::BackendExecutionContext& context,
	::executorch::runtime::DelegateHandle* handle,
	::executorch::runtime::EValue** args) const override;

	void destroy(::executorch::runtime::DelegateHandle* handle) const override;

	bool is_available() const override;
	};

	extern const char kHighAddrKey[];
	extern const char kImportForeverKey[];

	struct NeuronDelegateSetting {
	bool mHighAddr = false;

	bool mImportForever = false;

	std::string ToRuntimeOption() {
	if (mHighAddr && mImportForever) {
	return "--apusys-config \"{ \\\"high_addr\\\": true, \\\"import_forever\\\": true }\"";
	} else if (mHighAddr) {
	return "--apusys-config \"{ \\\"high_addr\\\": true }\"";
	} else if (mImportForever) {
	return "--apusys-config \"{ \\\"import_forever\\\": true }\"";
	} else {
	return "";
	}
	}
	};

	class NeuronExecuTorchDelegate {
	public:
	class MemoryCache {
	public:
	template <bool isInput>
	bool IsCached(int i, void* ptr) {
	const auto& cache = isInput ? mInputCache : mOutputCache;
	auto it = cache.find(i);
	return (it != cache.end()) && (ptr == it->second);
	}

	template <bool isInput>
	void UpdateCache(int i, void* ptr) {
	(isInput ? mInputCache[i] : mOutputCache[i]) = ptr;
	return;
	}

	private:
	std::unordered_map<int, void*> mInputCache;

	std::unordered_map<int, void*> mOutputCache;
	};

	NeuronExecuTorchDelegate() {}

	~NeuronExecuTorchDelegate() {
	mPLock->Stop();
	}

	int LoadCompiledNetwork(
	NeuronPayload payload,
	NeuronDelegateSetting options) {
	mSettings = options;
	auto runtimeOption = mSettings.ToRuntimeOption();
	auto res = mExecutor.LoadFromCompiledNetwork(
	payload.CompiledNetwork,
	payload.Header.DataLen,
	payload.Header.InputCount,
	payload.Header.OutputCount,
	runtimeOption);
	CHECK_NO_ERROR(res);
	CHECK_TRUE(mExecutor.IsValid());
	SummaryIoCounts();
	mPLock = std::unique_ptr<ScopePerformancer>(new ScopePerformancer);
	return NEURON_NO_ERROR;
	}

	::executorch::runtime::Error execute(
	ET_UNUSED ::executorch::runtime::BackendExecutionContext& context,
	::executorch::runtime::EValue** args) const;

	private:
	template <bool isInput>
	bool IsCached(int index, void* ptr) const {
	return mCache.IsCached</isInput=/isInput>(index, ptr);
	}

	template <bool isInput>
	void UpdateCache(int index, void* ptr) const {
	mCache.UpdateCache<isInput>(index, ptr);
	}

	int SummaryIoCounts() {
	for (int i = 0;; i++) {
	size_t size = mExecutor.GetInputOutputPaddedSize</isInput/ true>(i);
	if (size == 0) {
	break;
	}
	LogInfo("NeuronBackend", "Model input:%d size: %lu", i, size);
	mInputSizes.push_back(size);
	}
	for (int o = 0;; o++) {
	size_t size = mExecutor.GetInputOutputPaddedSize</isInput/ false>(o);
	if (size == 0) {
	break;
	}
	LogInfo("NeuronBackend", "Model output:%d size: %lu", o, size);
	mOutputSizes.push_back(size);
	}
	return NEURON_NO_ERROR;
	}

	int HintNeuronBackend(::executorch::runtime::EValue** args) const;

	private:
	std::vector<size_t> mInputSizes;

	std::vector<size_t> mOutputSizes;

	mutable MemoryCache mCache;

	std::unique_ptr<ScopePerformancer> mPLock;

	neuron::NeuronExecutor mExecutor;

	NeuronDelegateSetting mSettings;

	mutable std::unordered_set<const void*> mHasImported;

	private:
	NeuronExecuTorchDelegate(const NeuronExecuTorchDelegate&);

	NeuronExecuTorchDelegate operator=(const NeuronExecuTorchDelegate&);
	};

	} // namespace neuron
	} // namespace backends
	} // namespace executorch