tensorflow/lite/delegates/nnapi/nnapi_delegate.h - platform/external/tensorflow - Git at Google

 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #ifndef TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_H_
 #define TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_H_

 #include <memory>
 #include <string>
 #include <unordered_map>
 #include <vector>

 #include "tensorflow/lite/c/common.h"

 typedef struct ANeuralNetworksMemory ANeuralNetworksMemory;

 namespace tflite {

 namespace delegate {
 namespace nnapi {
 class NNAPIDelegateKernel;
 }  // namespace nnapi
 }  // namespace delegate

 using tflite::delegate::nnapi::NNAPIDelegateKernel;

 // TFliteDelegate to interface with NNAPI.
 class StatefulNnApiDelegate : public TfLiteDelegate {
  public:
   // Encapsulates all options that are specific to NNAPI delegate.
   struct Options {
     // Preferred Power/perf trade-off. For more details please see
     // ANeuralNetworksCompilation_setPreference documentation in :
     // https://developer.android.com/ndk/reference/group/neural-networks.html
     enum ExecutionPreference {
       kUndefined = -1,
       kLowPower = 0,
       kFastSingleAnswer = 1,
       kSustainedSpeed = 2,
     };

     // Preferred Power/perf trade-off.
     ExecutionPreference execution_preference = kUndefined;

     // Selected NNAPI accelerator with nul-terminated name.
     // Default to nullptr, which implies the NNAPI default behavior: NNAPI
     // runtime is allowed to use all available accelerators. If the selected
     // accelerator cannot be found, NNAPI will not be used.
     // It is the caller's responsibility to ensure the string is valid for the
     // duration of the Options object lifetime.
     const char* accelerator_name = nullptr;

     // The nul-terminated cache dir for NNAPI model.
     // Default to nullptr, which implies the NNAPI will not try caching the
     // compilation.
     const char* cache_dir = nullptr;

     // The unique nul-terminated token string for NNAPI model.
     // Default to nullptr, which implies the NNAPI will not try caching the
     // compilation. It is the caller's responsibility to ensure there is no
     // clash of the tokens.
     // NOTE: when using compilation caching, it is not recommended to use the
     // same delegate instance for multiple models.
     const char* model_token = nullptr;

     // Whether to disallow NNAPI CPU usage. Only effective on Android 10 and
     // above. The NNAPI CPU typically performs less well than built-in TfLite
     // kernels, but allowing CPU allows partial acceleration of models. If this
     // is set to true, NNAPI is only used if the whole model is accelerated.
     bool disallow_nnapi_cpu = false;
   };

   // Uses default options.
   StatefulNnApiDelegate();

   // The constructor that accepts options from user.
   explicit StatefulNnApiDelegate(Options options);

   ~StatefulNnApiDelegate() = default;

   // Returns the delegate options.
   static const Options GetOptions(TfLiteDelegate* delegate);

   // Callback function which copies data from ANeuralNetworksMemory to host
   // tensor CPU buffer. It is the users responsibility to implement these
   // callbacks for the specific types of shared memory they intend to use.
   // WARNING: This is an experimental interface that is subject to change.
   typedef TfLiteStatus (*CopyToHostTensorFnPtr)(TfLiteTensor* tensor,
                                                 ANeuralNetworksMemory* memory,
                                                 size_t memory_offset,
                                                 size_t byte_size,
                                                 void* callback_context);

   // Encapsulates all fields related to memory registration for internal
   // bookkeeping only.
   struct MemoryRegistration {
     ANeuralNetworksMemory* memory;
     CopyToHostTensorFnPtr callback;
     void* callback_context;
   };

   // Register the ANeuralNetworksMemory handle with the delegate. A
   // TfLiteBufferHandle will be returned to be used with
   // Interpreter::SetBufferHandle. The callback_context will be passed to the
   // callback function when invoked.
   // Note: the returned TfLiteBufferHandle can only be used with a single
   // Interpreter instance. However, the caller can register the same memory
   // multiple times to get different handles to use with difference Interpreter
   // instances
   // WARNING: This is an experimental interface that is subject to change.
   TfLiteBufferHandle RegisterNnapiMemory(ANeuralNetworksMemory* memory,
                                          CopyToHostTensorFnPtr callback,
                                          void* callback_context);

   // Returns the vector of known ANeuralNetworksMemory handles.
   // Note: this function is not intended to be called by developers.
   // WARNING: This is an experimental interface that is subject to change.
   static const std::vector<MemoryRegistration>& GetTensorMemoryMap(
       TfLiteDelegate* delegate);

   // Returns the int value of the ResultCode returned by the latest
   // failed call to NNAPI, if any. Zero only in case of NO failed calls since
   // the construction of this instance of StatefulNnApiDelegate.
   // The error code is reset when the delegate is re-initialized
   // (i.e. when calling interpreter.ModifyGraphWithDelegate(delegate)).
   int GetNnApiErrno() const;

  private:
   // Encapsulates all delegate data.
   struct Data {
     // Preferred Power/perf trade-off.
     Options::ExecutionPreference execution_preference;
     // Selected NNAPI accelerator name.
     std::string accelerator_name;
     // The cache dir for NNAPI model.
     std::string cache_dir;
     // The unique token string for NNAPI model.
     std::string model_token;
     // Whether to disallow NNAPI CPU.
     bool disallow_nnapi_cpu;
     // Tensor to ANeuralNetworksMemory mapping.
     std::vector<MemoryRegistration> tensor_memory_map;
     // Constains a non zero value if any NNAPI method call
     // operation returned a non zero result code.
     int nnapi_errno;
     // Cache of kernels already built in StatefulNnApiDelegate::DoPrepare
     // when trying to understand if all nodes are supported by the target
     // accelerators.
     // The key is the index of the first node in the partition.
     // Couldn't use unique_ptr because of problems building on gcc
     std::unordered_map<int, NNAPIDelegateKernel*> delegate_state_cache;

     ~Data();

     // Caches an initialised NNAPIDelegateKernel.
     void CacheDelegateKernel(const TfLiteDelegateParams* delegate_params,
                              NNAPIDelegateKernel* delegate_state);
     // Returns a cached NNAPIDelegateKernel if available.
     std::optional<NNAPIDelegateKernel*> GetCachedDelegateKernel(
         const TfLiteDelegateParams* delegate_params);
   };

   // Implements TfLiteDelegate::Prepare. Please refer to TFLiteDelegate
   // documentation for more info.
   static TfLiteStatus DoPrepare(TfLiteContext* context,
                                 TfLiteDelegate* delegate);

   // Copy the data from delegate buffer handle into raw memory of the given
   // 'tensor'. The delegate is allowed to allocate the raw
   // bytes as long as it follows the rules for kTfLiteDynamic tensors.
   static TfLiteStatus DoCopyFromBufferHandle(TfLiteContext* context,
                                              TfLiteDelegate* delegate,
                                              TfLiteBufferHandle buffer_handle,
                                              TfLiteTensor* tensor);

   // Copy the data from raw memory of the given 'tensor' to delegate buffer
   // handle. Currently this function is not supported, and calling the function
   // will result in an error.
   static TfLiteStatus DoCopyToBufferHandle(TfLiteContext* context,
                                            TfLiteDelegate* delegate,
                                            TfLiteBufferHandle buffer_handle,
                                            TfLiteTensor* tensor);

   // Free the Delegate Buffer Handle. Note: This only frees the handle, but
   // this doesn't release the underlying resource (e.g. textures). The
   // resources are either owned by application layer or the delegate.
   static void DoFreeBufferHandle(TfLiteContext* context,
                                  TfLiteDelegate* delegate,
                                  TfLiteBufferHandle* handle);

   // Delegate data presented through TfLiteDelegate::data_.
   Data delegate_data_;
 };

 // DEPRECATED: Please use StatefulNnApiDelegate class instead.
 //
 // Returns a singleton delegate that can be used to use the NN API.
 // e.g.
 //   NnApiDelegate* delegate = NnApiDelegate();
 //   interpreter->ModifyGraphWithDelegate(&delegate);
 // NnApiDelegate() returns a singleton, so you should not free this
 // pointer or worry about its lifetime.
 TfLiteDelegate* NnApiDelegate();

 }  // namespace tflite

 #endif  // TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_H_
	/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.

	Licensed under the Apache License, Version 2.0 (the "License");
	you may not use this file except in compliance with the License.
	You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

	Unless required by applicable law or agreed to in writing, software
	distributed under the License is distributed on an "AS IS" BASIS,
	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	See the License for the specific language governing permissions and
	limitations under the License.
	==============================================================================*/
	#ifndef TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_H_
	#define TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_H_

	#include <memory>
	#include <string>
	#include <unordered_map>
	#include <vector>

	#include "tensorflow/lite/c/common.h"

	typedef struct ANeuralNetworksMemory ANeuralNetworksMemory;

	namespace tflite {

	namespace delegate {
	namespace nnapi {
	class NNAPIDelegateKernel;
	} // namespace nnapi
	} // namespace delegate

	using tflite::delegate::nnapi::NNAPIDelegateKernel;

	// TFliteDelegate to interface with NNAPI.
	class StatefulNnApiDelegate : public TfLiteDelegate {
	public:
	// Encapsulates all options that are specific to NNAPI delegate.
	struct Options {
	// Preferred Power/perf trade-off. For more details please see
	// ANeuralNetworksCompilation_setPreference documentation in :
	// https://developer.android.com/ndk/reference/group/neural-networks.html
	enum ExecutionPreference {
	kUndefined = -1,
	kLowPower = 0,
	kFastSingleAnswer = 1,
	kSustainedSpeed = 2,
	};

	// Preferred Power/perf trade-off.
	ExecutionPreference execution_preference = kUndefined;

	// Selected NNAPI accelerator with nul-terminated name.
	// Default to nullptr, which implies the NNAPI default behavior: NNAPI
	// runtime is allowed to use all available accelerators. If the selected
	// accelerator cannot be found, NNAPI will not be used.
	// It is the caller's responsibility to ensure the string is valid for the
	// duration of the Options object lifetime.
	const char* accelerator_name = nullptr;

	// The nul-terminated cache dir for NNAPI model.
	// Default to nullptr, which implies the NNAPI will not try caching the
	// compilation.
	const char* cache_dir = nullptr;

	// The unique nul-terminated token string for NNAPI model.
	// Default to nullptr, which implies the NNAPI will not try caching the
	// compilation. It is the caller's responsibility to ensure there is no
	// clash of the tokens.
	// NOTE: when using compilation caching, it is not recommended to use the
	// same delegate instance for multiple models.
	const char* model_token = nullptr;

	// Whether to disallow NNAPI CPU usage. Only effective on Android 10 and
	// above. The NNAPI CPU typically performs less well than built-in TfLite
	// kernels, but allowing CPU allows partial acceleration of models. If this
	// is set to true, NNAPI is only used if the whole model is accelerated.
	bool disallow_nnapi_cpu = false;
	};

	// Uses default options.
	StatefulNnApiDelegate();

	// The constructor that accepts options from user.
	explicit StatefulNnApiDelegate(Options options);

	~StatefulNnApiDelegate() = default;

	// Returns the delegate options.
	static const Options GetOptions(TfLiteDelegate* delegate);

	// Callback function which copies data from ANeuralNetworksMemory to host
	// tensor CPU buffer. It is the users responsibility to implement these
	// callbacks for the specific types of shared memory they intend to use.
	// WARNING: This is an experimental interface that is subject to change.
	typedef TfLiteStatus (CopyToHostTensorFnPtr)(TfLiteTensor tensor,
	ANeuralNetworksMemory* memory,
	size_t memory_offset,
	size_t byte_size,
	void* callback_context);

	// Encapsulates all fields related to memory registration for internal
	// bookkeeping only.
	struct MemoryRegistration {
	ANeuralNetworksMemory* memory;
	CopyToHostTensorFnPtr callback;
	void* callback_context;
	};

	// Register the ANeuralNetworksMemory handle with the delegate. A
	// TfLiteBufferHandle will be returned to be used with
	// Interpreter::SetBufferHandle. The callback_context will be passed to the
	// callback function when invoked.
	// Note: the returned TfLiteBufferHandle can only be used with a single
	// Interpreter instance. However, the caller can register the same memory
	// multiple times to get different handles to use with difference Interpreter
	// instances
	// WARNING: This is an experimental interface that is subject to change.
	TfLiteBufferHandle RegisterNnapiMemory(ANeuralNetworksMemory* memory,
	CopyToHostTensorFnPtr callback,
	void* callback_context);

	// Returns the vector of known ANeuralNetworksMemory handles.
	// Note: this function is not intended to be called by developers.
	// WARNING: This is an experimental interface that is subject to change.
	static const std::vector<MemoryRegistration>& GetTensorMemoryMap(
	TfLiteDelegate* delegate);

	// Returns the int value of the ResultCode returned by the latest
	// failed call to NNAPI, if any. Zero only in case of NO failed calls since
	// the construction of this instance of StatefulNnApiDelegate.
	// The error code is reset when the delegate is re-initialized
	// (i.e. when calling interpreter.ModifyGraphWithDelegate(delegate)).
	int GetNnApiErrno() const;

	private:
	// Encapsulates all delegate data.
	struct Data {
	// Preferred Power/perf trade-off.
	Options::ExecutionPreference execution_preference;
	// Selected NNAPI accelerator name.
	std::string accelerator_name;
	// The cache dir for NNAPI model.
	std::string cache_dir;
	// The unique token string for NNAPI model.
	std::string model_token;
	// Whether to disallow NNAPI CPU.
	bool disallow_nnapi_cpu;
	// Tensor to ANeuralNetworksMemory mapping.
	std::vector<MemoryRegistration> tensor_memory_map;
	// Constains a non zero value if any NNAPI method call
	// operation returned a non zero result code.
	int nnapi_errno;
	// Cache of kernels already built in StatefulNnApiDelegate::DoPrepare
	// when trying to understand if all nodes are supported by the target
	// accelerators.
	// The key is the index of the first node in the partition.
	// Couldn't use unique_ptr because of problems building on gcc
	std::unordered_map<int, NNAPIDelegateKernel*> delegate_state_cache;

	~Data();

	// Caches an initialised NNAPIDelegateKernel.
	void CacheDelegateKernel(const TfLiteDelegateParams* delegate_params,
	NNAPIDelegateKernel* delegate_state);
	// Returns a cached NNAPIDelegateKernel if available.
	std::optional<NNAPIDelegateKernel*> GetCachedDelegateKernel(
	const TfLiteDelegateParams* delegate_params);
	};

	// Implements TfLiteDelegate::Prepare. Please refer to TFLiteDelegate
	// documentation for more info.
	static TfLiteStatus DoPrepare(TfLiteContext* context,
	TfLiteDelegate* delegate);

	// Copy the data from delegate buffer handle into raw memory of the given
	// 'tensor'. The delegate is allowed to allocate the raw
	// bytes as long as it follows the rules for kTfLiteDynamic tensors.
	static TfLiteStatus DoCopyFromBufferHandle(TfLiteContext* context,
	TfLiteDelegate* delegate,
	TfLiteBufferHandle buffer_handle,
	TfLiteTensor* tensor);

	// Copy the data from raw memory of the given 'tensor' to delegate buffer
	// handle. Currently this function is not supported, and calling the function
	// will result in an error.
	static TfLiteStatus DoCopyToBufferHandle(TfLiteContext* context,
	TfLiteDelegate* delegate,
	TfLiteBufferHandle buffer_handle,
	TfLiteTensor* tensor);

	// Free the Delegate Buffer Handle. Note: This only frees the handle, but
	// this doesn't release the underlying resource (e.g. textures). The
	// resources are either owned by application layer or the delegate.
	static void DoFreeBufferHandle(TfLiteContext* context,
	TfLiteDelegate* delegate,
	TfLiteBufferHandle* handle);

	// Delegate data presented through TfLiteDelegate::data_.
	Data delegate_data_;
	};

	// DEPRECATED: Please use StatefulNnApiDelegate class instead.
	//
	// Returns a singleton delegate that can be used to use the NN API.
	// e.g.
	// NnApiDelegate* delegate = NnApiDelegate();
	// interpreter->ModifyGraphWithDelegate(&delegate);
	// NnApiDelegate() returns a singleton, so you should not free this
	// pointer or worry about its lifetime.
	TfLiteDelegate* NnApiDelegate();

	} // namespace tflite

	#endif // TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_H_