jni/run_tflite.h - platform/test/mlts/benchmark - Git at Google

 /**
  * Copyright 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #ifndef COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H
 #define COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H

 #include "tensorflow/lite/delegates/gpu/delegate.h"
 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
 #include "tensorflow/lite/interpreter.h"
 #include "tensorflow/lite/model.h"

 #include <memory>
 #include <unistd.h>
 #include <vector>

 struct InferenceOutput {
   uint8_t* ptr;
   size_t size;
 };

 // Inputs and expected outputs for inference
 struct InferenceInOut {
   // Input can either be directly specified as a pointer or indirectly with
   // the createInput callback. This is needed for large datasets where
   // allocating memory for all inputs at once is not feasible.
   uint8_t* input;
   size_t input_size;

   std::vector<InferenceOutput> outputs;
   std::function<bool(uint8_t*, size_t)> createInput;
 };

 // Inputs and expected outputs for an inference sequence.
 using InferenceInOutSequence = std::vector<InferenceInOut>;

 // Result of a single inference
 struct InferenceResult {
   float computeTimeSec;
   // MSE for each output
   std::vector<float> meanSquareErrors;
   // Max single error for each output
   std::vector<float> maxSingleErrors;
   // Outputs
   std::vector<std::vector<uint8_t>> inferenceOutputs;
   int inputOutputSequenceIndex;
   int inputOutputIndex;
 };

 struct CompilationBenchmarkResult {
   std::vector<float> compileWithoutCacheTimeSec;
   // The following optional fields have no value if compilation caching is not supported.
   std::optional<std::vector<float>> saveToCacheTimeSec;
   std::optional<std::vector<float>> prepareFromCacheTimeSec;
   // The total size of cache files. It is zero if compilation caching is not supported.
   int cacheSizeBytes = 0;
 };

 /** Discard inference output in inference results. */
 const int FLAG_DISCARD_INFERENCE_OUTPUT = 1 << 0;
 /** Do not expect golden output for inference inputs. */
 const int FLAG_IGNORE_GOLDEN_OUTPUT = 1 << 1;
 /** Collect only 1 benchmark result every INFERENCE_OUT_SAMPLE_RATE **/
 const int FLAG_SAMPLE_BENCHMARK_RESULTS = 1 << 2;

 const int INFERENCE_OUT_SAMPLE_RATE = 10;

 enum class CompilationBenchmarkType {
   // Benchmark without cache
   WITHOUT_CACHE,
   // Benchmark cache miss
   SAVE_TO_CACHE,
   // Benchmark cache hit
   PREPARE_FROM_CACHE,
 };

 /** TFLite backend. */
 constexpr int TFLITE_CPU = 0;
 constexpr int TFLITE_NNAPI = 1;
 constexpr int TFLITE_GPU = 2;

 class BenchmarkModel {
  public:
   ~BenchmarkModel();

   static BenchmarkModel* create(const char* modelfile, int tfliteBackend,
                                 bool enable_intermediate_tensors_dump,
                                 int* nnapiErrno, const char* nnapi_device_name,
                                 bool mmapModel, const char* nnapi_cache_dir);

   bool resizeInputTensors(std::vector<int> shape);
   bool setInput(const uint8_t* dataPtr, size_t length);
   bool runInference();
   // Resets TFLite states (RNN/LSTM states etc).
   bool resetStates();

   bool benchmark(const std::vector<InferenceInOutSequence>& inOutData,
                  int seqInferencesMaxCount, float timeout, int flags,
                  std::vector<InferenceResult>* result);

   bool benchmarkCompilation(int maxNumIterations, float warmupTimeout, float runTimeout,
                             CompilationBenchmarkResult* result);

   bool dumpAllLayers(const char* path,
                      const std::vector<InferenceInOutSequence>& inOutData);

  private:
   BenchmarkModel() = default;
   bool init(const char* modelfile, int tfliteBackend,
             bool enable_intermediate_tensors_dump,
             int* nnapiErrno, const char* nnapi_device_name,
             /* flag to choose between memory mapping the model and initializing
                 the model from programs memory*/
             bool mmapModel,
             const char* nnapi_cache_dir);

   void getOutputError(const uint8_t* dataPtr, size_t length,
                       InferenceResult* result, int output_index);
   void saveInferenceOutput(InferenceResult* result, int output_index);

   bool runCompilation(const char* cacheDir);
   bool benchmarkSingleTypeOfCompilation(CompilationBenchmarkType type, int maxNumIterations,
                                         float timeout, std::vector<float>* results);
   bool benchmarkSingleTypeOfCompilationWithWarmup(CompilationBenchmarkType type,
                                                   int maxNumIterations, float warmupTimeout,
                                                   float runTimeout, std::vector<float>* results);
   bool getCompilationCacheSize(int* cacheSizeBytes);

   std::string mModelBuffer;
   std::unique_ptr<tflite::FlatBufferModel> mTfliteModel;
   std::unique_ptr<tflite::Interpreter> mTfliteInterpreter;
   std::unique_ptr<tflite::StatefulNnApiDelegate> mTfliteNnapiDelegate;
   // Store indices of output tensors, used to dump intermediate tensors
   std::vector<int> outputs;

   // Parameters for compilation
   std::string mModelFile;
   std::optional<std::string> mCacheDir;
   std::string mNnApiDeviceName;
 #if defined(NN_BENCHMARK_ENABLE_GPU)
   TfLiteDelegate* mGpuDelegate;
 #endif  // defined(NN_BENCHMARK_ENABLE_GPU)
   int mTfliteBackend;
 };

 #endif  // COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H
	/**
	* Copyright 2017 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#ifndef COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H
	#define COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H

	#include "tensorflow/lite/delegates/gpu/delegate.h"
	#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
	#include "tensorflow/lite/interpreter.h"
	#include "tensorflow/lite/model.h"

	#include <memory>
	#include <unistd.h>
	#include <vector>

	struct InferenceOutput {
	uint8_t* ptr;
	size_t size;
	};

	// Inputs and expected outputs for inference
	struct InferenceInOut {
	// Input can either be directly specified as a pointer or indirectly with
	// the createInput callback. This is needed for large datasets where
	// allocating memory for all inputs at once is not feasible.
	uint8_t* input;
	size_t input_size;

	std::vector<InferenceOutput> outputs;
	std::function<bool(uint8_t*, size_t)> createInput;
	};

	// Inputs and expected outputs for an inference sequence.
	using InferenceInOutSequence = std::vector<InferenceInOut>;

	// Result of a single inference
	struct InferenceResult {
	float computeTimeSec;
	// MSE for each output
	std::vector<float> meanSquareErrors;
	// Max single error for each output
	std::vector<float> maxSingleErrors;
	// Outputs
	std::vector<std::vector<uint8_t>> inferenceOutputs;
	int inputOutputSequenceIndex;
	int inputOutputIndex;
	};

	struct CompilationBenchmarkResult {
	std::vector<float> compileWithoutCacheTimeSec;
	// The following optional fields have no value if compilation caching is not supported.
	std::optional<std::vector<float>> saveToCacheTimeSec;
	std::optional<std::vector<float>> prepareFromCacheTimeSec;
	// The total size of cache files. It is zero if compilation caching is not supported.
	int cacheSizeBytes = 0;
	};

	/** Discard inference output in inference results. */
	const int FLAG_DISCARD_INFERENCE_OUTPUT = 1 << 0;
	/** Do not expect golden output for inference inputs. */
	const int FLAG_IGNORE_GOLDEN_OUTPUT = 1 << 1;
	/ Collect only 1 benchmark result every INFERENCE_OUT_SAMPLE_RATE /
	const int FLAG_SAMPLE_BENCHMARK_RESULTS = 1 << 2;

	const int INFERENCE_OUT_SAMPLE_RATE = 10;

	enum class CompilationBenchmarkType {
	// Benchmark without cache
	WITHOUT_CACHE,
	// Benchmark cache miss
	SAVE_TO_CACHE,
	// Benchmark cache hit
	PREPARE_FROM_CACHE,
	};

	/** TFLite backend. */
	constexpr int TFLITE_CPU = 0;
	constexpr int TFLITE_NNAPI = 1;
	constexpr int TFLITE_GPU = 2;

	class BenchmarkModel {
	public:
	~BenchmarkModel();

	static BenchmarkModel* create(const char* modelfile, int tfliteBackend,
	bool enable_intermediate_tensors_dump,
	int* nnapiErrno, const char* nnapi_device_name,
	bool mmapModel, const char* nnapi_cache_dir);

	bool resizeInputTensors(std::vector<int> shape);
	bool setInput(const uint8_t* dataPtr, size_t length);
	bool runInference();
	// Resets TFLite states (RNN/LSTM states etc).
	bool resetStates();

	bool benchmark(const std::vector<InferenceInOutSequence>& inOutData,
	int seqInferencesMaxCount, float timeout, int flags,
	std::vector<InferenceResult>* result);

	bool benchmarkCompilation(int maxNumIterations, float warmupTimeout, float runTimeout,
	CompilationBenchmarkResult* result);

	bool dumpAllLayers(const char* path,
	const std::vector<InferenceInOutSequence>& inOutData);

	private:
	BenchmarkModel() = default;
	bool init(const char* modelfile, int tfliteBackend,
	bool enable_intermediate_tensors_dump,
	int* nnapiErrno, const char* nnapi_device_name,
	/* flag to choose between memory mapping the model and initializing
	the model from programs memory*/
	bool mmapModel,
	const char* nnapi_cache_dir);

	void getOutputError(const uint8_t* dataPtr, size_t length,
	InferenceResult* result, int output_index);
	void saveInferenceOutput(InferenceResult* result, int output_index);

	bool runCompilation(const char* cacheDir);
	bool benchmarkSingleTypeOfCompilation(CompilationBenchmarkType type, int maxNumIterations,
	float timeout, std::vector<float>* results);
	bool benchmarkSingleTypeOfCompilationWithWarmup(CompilationBenchmarkType type,
	int maxNumIterations, float warmupTimeout,
	float runTimeout, std::vector<float>* results);
	bool getCompilationCacheSize(int* cacheSizeBytes);

	std::string mModelBuffer;
	std::unique_ptr<tflite::FlatBufferModel> mTfliteModel;
	std::unique_ptr<tflite::Interpreter> mTfliteInterpreter;
	std::unique_ptr<tflite::StatefulNnApiDelegate> mTfliteNnapiDelegate;
	// Store indices of output tensors, used to dump intermediate tensors
	std::vector<int> outputs;

	// Parameters for compilation
	std::string mModelFile;
	std::optional<std::string> mCacheDir;
	std::string mNnApiDeviceName;
	#if defined(NN_BENCHMARK_ENABLE_GPU)
	TfLiteDelegate* mGpuDelegate;
	#endif // defined(NN_BENCHMARK_ENABLE_GPU)
	int mTfliteBackend;
	};

	#endif // COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H