| /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==============================================================================*/ |
| |
| #include "tensorflow/lite/tools/benchmark/benchmark_tflite_model.h" |
| |
| #include <cstdarg> |
| #include <cstdlib> |
| #include <iostream> |
| #include <memory> |
| #include <string> |
| #include <unordered_set> |
| #include <vector> |
| |
| #include "tensorflow/lite/kernels/register.h" |
| #include "tensorflow/lite/model.h" |
| #include "tensorflow/lite/op_resolver.h" |
| #include "tensorflow/lite/profiling/buffered_profiler.h" |
| #include "tensorflow/lite/profiling/profile_summarizer.h" |
| #include "tensorflow/lite/string_util.h" |
| #include "tensorflow/lite/tools/benchmark/benchmark_utils.h" |
| #include "tensorflow/lite/tools/benchmark/logging.h" |
| #include "tensorflow/lite/tools/evaluation/utils.h" |
| |
| #ifdef GEMMLOWP_PROFILING |
| #include "profiling/profiler.h" |
| #endif |
| |
| #ifdef TFLITE_CUSTOM_OPS_HEADER |
| void RegisterSelectedOps(::tflite::MutableOpResolver* resolver); |
| #endif |
| |
| namespace tflite { |
| namespace benchmark { |
| namespace { |
| |
| // Backward compat with previous approach to enabling op profiling. |
| #if defined(TFLITE_PROFILING_ENABLED) |
| constexpr int kOpProfilingEnabledDefault = true; |
| #else |
| constexpr int kOpProfilingEnabledDefault = false; |
| #endif |
| |
| // Dumps profiling events if profiling is enabled. |
| class ProfilingListener : public BenchmarkListener { |
| public: |
| explicit ProfilingListener(Interpreter* interpreter, uint32_t max_num_entries) |
| : interpreter_(interpreter), |
| profiler_(max_num_entries), |
| has_profiles_(false) { |
| TFLITE_BENCHMARK_CHECK(interpreter); |
| interpreter_->SetProfiler(&profiler_); |
| } |
| |
| void OnSingleRunStart(RunType run_type) override; |
| |
| void OnSingleRunEnd() override; |
| |
| void OnBenchmarkEnd(const BenchmarkResults& results) override; |
| |
| private: |
| Interpreter* interpreter_; |
| profiling::BufferedProfiler profiler_; |
| profiling::ProfileSummarizer summarizer_; |
| bool has_profiles_; |
| }; |
| |
| // Dumps gemmlowp profiling events if gemmlowp profiling is enabled. |
| class GemmlowpProfilingListener : public BenchmarkListener { |
| public: |
| void OnBenchmarkStart(const BenchmarkParams& params) override; |
| |
| void OnBenchmarkEnd(const BenchmarkResults& results) override; |
| }; |
| |
| void ProfilingListener::OnSingleRunStart(RunType run_type) { |
| if (run_type == REGULAR) { |
| profiler_.Reset(); |
| profiler_.StartProfiling(); |
| } |
| } |
| |
| void ProfilingListener::OnBenchmarkEnd(const BenchmarkResults& results) { |
| if (has_profiles_) { |
| TFLITE_LOG(INFO) << summarizer_.GetOutputString(); |
| } |
| } |
| |
| void ProfilingListener::OnSingleRunEnd() { |
| profiler_.StopProfiling(); |
| auto profile_events = profiler_.GetProfileEvents(); |
| has_profiles_ = !profile_events.empty(); |
| summarizer_.ProcessProfiles(profile_events, *interpreter_); |
| } |
| |
| void GemmlowpProfilingListener::OnBenchmarkStart( |
| const BenchmarkParams& params) { |
| #ifdef GEMMLOWP_PROFILING |
| gemmlowp::RegisterCurrentThreadForProfiling(); |
| gemmlowp::StartProfiling(); |
| #endif |
| } |
| |
| void GemmlowpProfilingListener::OnBenchmarkEnd( |
| const BenchmarkResults& results) { |
| #ifdef GEMMLOWP_PROFILING |
| gemmlowp::FinishProfiling(); |
| #endif |
| } |
| |
| std::vector<std::string> Split(const std::string& str, const char delim) { |
| std::vector<std::string> results; |
| if (!util::SplitAndParse(str, delim, &results)) { |
| results.clear(); |
| } |
| return results; |
| } |
| |
| template <typename T> |
| void FillRandomValue(T* ptr, int num_elements, |
| const std::function<T()>& random_func) { |
| for (int i = 0; i < num_elements; ++i) { |
| *ptr++ = random_func(); |
| } |
| } |
| |
| void FillRandomString(tflite::DynamicBuffer* buffer, |
| const std::vector<int>& sizes, |
| const std::function<string()>& random_func) { |
| int num_elements = 1; |
| for (int dim : sizes) { |
| num_elements *= dim; |
| } |
| for (int i = 0; i < num_elements; ++i) { |
| auto str = random_func(); |
| buffer->AddString(str.data(), str.length()); |
| } |
| } |
| |
| bool PopulateInputLayerInfo( |
| const string& names_string, const string& shapes_string, |
| std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) { |
| std::vector<std::string> names = Split(names_string, ','); |
| std::vector<std::string> shapes = Split(shapes_string, ':'); |
| |
| if (names.size() != shapes.size()) { |
| TFLITE_LOG(ERROR) << "The number of items in" |
| << " --input_layer_shape (" << shapes_string << ", with " |
| << shapes.size() << " items)" |
| << " must match the number of items in" |
| << " --input_layer (" << names_string << ", with " |
| << names.size() << " items)." |
| << " For example --input_layer=input1,input2" |
| << " --input_layer_shape=1,224,224,4:1,20"; |
| return false; |
| } |
| |
| for (int i = 0; i < names.size(); ++i) { |
| info->push_back(BenchmarkTfLiteModel::InputLayerInfo()); |
| BenchmarkTfLiteModel::InputLayerInfo& input = info->back(); |
| |
| input.name = names[i]; |
| |
| TFLITE_BENCHMARK_CHECK(util::SplitAndParse(shapes[i], ',', &input.shape)) |
| << "Incorrect size string specified: " << shapes[i]; |
| for (int dim : input.shape) { |
| if (dim == -1) { |
| TFLITE_LOG(ERROR) |
| << "Any unknown sizes in the shapes (-1's) must be replaced" |
| << " with the size you want to benchmark with."; |
| return false; |
| } |
| } |
| } |
| |
| return true; |
| } |
| |
| std::vector<int> TfLiteIntArrayToVector(const TfLiteIntArray* int_array) { |
| std::vector<int> values; |
| values.reserve(int_array->size); |
| for (size_t i = 0; i < int_array->size; i++) { |
| values.push_back(int_array->data[i]); |
| } |
| return values; |
| } |
| |
| } // namespace |
| |
| BenchmarkParams BenchmarkTfLiteModel::DefaultParams() { |
| BenchmarkParams default_params = BenchmarkModel::DefaultParams(); |
| default_params.AddParam("graph", BenchmarkParam::Create<std::string>("")); |
| default_params.AddParam("input_layer", |
| BenchmarkParam::Create<std::string>("")); |
| default_params.AddParam("input_layer_shape", |
| BenchmarkParam::Create<std::string>("")); |
| default_params.AddParam("use_nnapi", BenchmarkParam::Create<bool>(false)); |
| default_params.AddParam("use_legacy_nnapi", |
| BenchmarkParam::Create<bool>(false)); |
| default_params.AddParam("nnapi_accelerator_name", |
| BenchmarkParam::Create<std::string>("")); |
| default_params.AddParam("use_gpu", BenchmarkParam::Create<bool>(false)); |
| default_params.AddParam("allow_fp16", BenchmarkParam::Create<bool>(false)); |
| default_params.AddParam( |
| "enable_op_profiling", |
| BenchmarkParam::Create<bool>(kOpProfilingEnabledDefault)); |
| default_params.AddParam("max_profiling_buffer_entries", |
| BenchmarkParam::Create<int32_t>(1024)); |
| return default_params; |
| } |
| |
| BenchmarkTfLiteModel::BenchmarkTfLiteModel() |
| : BenchmarkTfLiteModel(DefaultParams()) {} |
| |
| BenchmarkTfLiteModel::BenchmarkTfLiteModel(BenchmarkParams params) |
| : BenchmarkModel(std::move(params)) {} |
| |
| void BenchmarkTfLiteModel::CleanUp() { |
| if (inputs_data_.empty()) { |
| return; |
| } |
| // Free up any pre-allocated tensor data during PrepareInputData. |
| for (int i = 0; i < inputs_data_.size(); ++i) { |
| delete[] inputs_data_[i].data.raw; |
| } |
| inputs_data_.clear(); |
| } |
| |
| BenchmarkTfLiteModel::~BenchmarkTfLiteModel() { CleanUp(); } |
| |
| std::vector<Flag> BenchmarkTfLiteModel::GetFlags() { |
| std::vector<Flag> flags = BenchmarkTfLiteModel::BenchmarkModel::GetFlags(); |
| std::vector<Flag> specific_flags = { |
| CreateFlag<std::string>("graph", ¶ms_, "graph file name"), |
| CreateFlag<std::string>("input_layer", ¶ms_, "input layer names"), |
| CreateFlag<std::string>("input_layer_shape", ¶ms_, |
| "input layer shape"), |
| CreateFlag<bool>("use_nnapi", ¶ms_, "use nnapi delegate api"), |
| CreateFlag<bool>("use_legacy_nnapi", ¶ms_, "use legacy nnapi api"), |
| CreateFlag<std::string>( |
| "nnapi_accelerator_name", ¶ms_, |
| "the name of the nnapi accelerator to use (requires Android Q+)"), |
| CreateFlag<bool>("use_gpu", ¶ms_, "use gpu"), |
| CreateFlag<bool>("allow_fp16", ¶ms_, "allow fp16"), |
| CreateFlag<bool>("enable_op_profiling", ¶ms_, "enable op profiling"), |
| CreateFlag<int32_t>("max_profiling_buffer_entries", ¶ms_, |
| "max profiling buffer entries")}; |
| |
| flags.insert(flags.end(), specific_flags.begin(), specific_flags.end()); |
| return flags; |
| } |
| |
| void BenchmarkTfLiteModel::LogParams() { |
| BenchmarkModel::LogParams(); |
| TFLITE_LOG(INFO) << "Graph: [" << params_.Get<std::string>("graph") << "]"; |
| TFLITE_LOG(INFO) << "Input layers: [" |
| << params_.Get<std::string>("input_layer") << "]"; |
| TFLITE_LOG(INFO) << "Input shapes: [" |
| << params_.Get<std::string>("input_layer_shape") << "]"; |
| TFLITE_LOG(INFO) << "Use nnapi : [" << params_.Get<bool>("use_nnapi") << "]"; |
| TFLITE_LOG(INFO) << "Use legacy nnapi : [" |
| << params_.Get<bool>("use_legacy_nnapi") << "]"; |
| if (params_.HasParam("nnapi_accelerator_name")) { |
| TFLITE_LOG(INFO) << "nnapi accelerator name: [" |
| << params_.Get<string>("nnapi_accelerator_name") << "]"; |
| } |
| TFLITE_LOG(INFO) << "Use gpu : [" << params_.Get<bool>("use_gpu") << "]"; |
| TFLITE_LOG(INFO) << "Allow fp16 : [" << params_.Get<bool>("allow_fp16") |
| << "]"; |
| TFLITE_LOG(INFO) << "Enable op profiling: [" |
| << params_.Get<bool>("enable_op_profiling") << "]"; |
| TFLITE_LOG(INFO) << "Max profiling buffer entries: [" |
| << params_.Get<int32_t>("max_profiling_buffer_entries") |
| << "]"; |
| } |
| |
| bool BenchmarkTfLiteModel::ValidateParams() { |
| if (params_.Get<std::string>("graph").empty()) { |
| TFLITE_LOG(ERROR) |
| << "Please specify the name of your TF Lite input file with --graph"; |
| return false; |
| } |
| return PopulateInputLayerInfo(params_.Get<std::string>("input_layer"), |
| params_.Get<std::string>("input_layer_shape"), |
| &inputs_); |
| } |
| |
| uint64_t BenchmarkTfLiteModel::ComputeInputBytes() { |
| TFLITE_BENCHMARK_CHECK(interpreter_); |
| uint64_t total_input_bytes = 0; |
| for (int input : interpreter_->inputs()) { |
| auto* t = interpreter_->tensor(input); |
| total_input_bytes += t->bytes; |
| } |
| return total_input_bytes; |
| } |
| |
| void BenchmarkTfLiteModel::PrepareInputData() { |
| auto interpreter_inputs = interpreter_->inputs(); |
| const size_t input_size = interpreter_inputs.size(); |
| CleanUp(); |
| |
| for (int j = 0; j < input_size; ++j) { |
| int i = interpreter_inputs[j]; |
| TfLiteTensor* t = interpreter_->tensor(i); |
| std::vector<int> sizes = TfLiteIntArrayToVector(t->dims); |
| int num_elements = 1; |
| for (int i = 0; i < sizes.size(); ++i) { |
| num_elements *= sizes[i]; |
| } |
| InputTensorData t_data; |
| if (t->type == kTfLiteFloat32) { |
| t_data.bytes = sizeof(float) * num_elements; |
| t_data.data.raw = new char[t_data.bytes]; |
| FillRandomValue<float>(t_data.data.f, num_elements, []() { |
| return static_cast<float>(rand()) / RAND_MAX - 0.5f; |
| }); |
| } else if (t->type == kTfLiteInt64) { |
| t_data.bytes = sizeof(int64_t) * num_elements; |
| t_data.data.raw = new char[t_data.bytes]; |
| FillRandomValue<int64_t>(t_data.data.i64, num_elements, []() { |
| return static_cast<int64_t>(rand()) % 100; |
| }); |
| } else if (t->type == kTfLiteInt32) { |
| // TODO(yunluli): This is currently only used for handling embedding input |
| // for speech models. Generalize if necessary. |
| t_data.bytes = sizeof(int32_t) * num_elements; |
| t_data.data.raw = new char[t_data.bytes]; |
| FillRandomValue<int32_t>(t_data.data.i32, num_elements, []() { |
| return static_cast<int32_t>(rand()) % 100; |
| }); |
| } else if (t->type == kTfLiteInt16) { |
| t_data.bytes = sizeof(int16_t) * num_elements; |
| t_data.data.raw = new char[t_data.bytes]; |
| FillRandomValue<int16_t>(t_data.data.i16, num_elements, []() { |
| return static_cast<int16_t>(rand()) % 100; |
| }); |
| } else if (t->type == kTfLiteUInt8) { |
| t_data.bytes = sizeof(uint8_t) * num_elements; |
| t_data.data.raw = new char[t_data.bytes]; |
| FillRandomValue<uint8_t>(t_data.data.uint8, num_elements, []() { |
| return static_cast<uint8_t>(rand()) % 255; |
| }); |
| } else if (t->type == kTfLiteInt8) { |
| t_data.bytes = sizeof(int8_t) * num_elements; |
| t_data.data.raw = new char[t_data.bytes]; |
| FillRandomValue<int8_t>(t_data.data.int8, num_elements, []() { |
| return static_cast<int8_t>(rand()) % 255 - 127; |
| }); |
| } else if (t->type == kTfLiteString) { |
| // TODO(haoliang): No need to cache string tensors right now. |
| } else { |
| TFLITE_LOG(FATAL) << "Don't know how to populate tensor " << t->name |
| << " of type " << t->type; |
| } |
| inputs_data_.push_back(t_data); |
| } |
| } |
| |
| void BenchmarkTfLiteModel::ResetInputsAndOutputs() { |
| auto interpreter_inputs = interpreter_->inputs(); |
| // Set the values of the input tensors from inputs_data_. |
| for (int j = 0; j < interpreter_inputs.size(); ++j) { |
| int i = interpreter_inputs[j]; |
| TfLiteTensor* t = interpreter_->tensor(i); |
| if (t->type == kTfLiteFloat32) { |
| std::memcpy(interpreter_->typed_tensor<float>(i), inputs_data_[j].data.f, |
| inputs_data_[j].bytes); |
| } else if (t->type == kTfLiteInt64) { |
| std::memcpy(interpreter_->typed_tensor<int64_t>(i), |
| inputs_data_[j].data.i64, inputs_data_[j].bytes); |
| } else if (t->type == kTfLiteInt32) { |
| std::memcpy(interpreter_->typed_tensor<int32_t>(i), |
| inputs_data_[j].data.i32, inputs_data_[j].bytes); |
| } else if (t->type == kTfLiteInt64) { |
| std::memcpy(interpreter_->typed_tensor<int64_t>(i), |
| inputs_data_[j].data.i64, inputs_data_[j].bytes); |
| } else if (t->type == kTfLiteInt16) { |
| std::memcpy(interpreter_->typed_tensor<int16_t>(i), |
| inputs_data_[j].data.i16, inputs_data_[j].bytes); |
| } else if (t->type == kTfLiteUInt8) { |
| std::memcpy(interpreter_->typed_tensor<uint8_t>(i), |
| inputs_data_[j].data.uint8, inputs_data_[j].bytes); |
| } else if (t->type == kTfLiteInt8) { |
| std::memcpy(interpreter_->typed_tensor<int8_t>(i), |
| inputs_data_[j].data.int8, inputs_data_[j].bytes); |
| } else if (t->type == kTfLiteString) { |
| tflite::DynamicBuffer buffer; |
| std::vector<int> sizes = TfLiteIntArrayToVector(t->dims); |
| FillRandomString(&buffer, sizes, []() { |
| return "we're have some friends over saturday to hang out in the yard"; |
| }); |
| buffer.WriteToTensor(interpreter_->tensor(i), /*new_shape=*/nullptr); |
| } else { |
| TFLITE_LOG(FATAL) << "Don't know how to populate tensor " << t->name |
| << " of type " << t->type; |
| } |
| } |
| } |
| |
| void BenchmarkTfLiteModel::Init() { |
| std::string graph = params_.Get<std::string>("graph"); |
| model_ = tflite::FlatBufferModel::BuildFromFile(graph.c_str()); |
| if (!model_) { |
| TFLITE_LOG(FATAL) << "Failed to mmap model " << graph; |
| } |
| TFLITE_LOG(INFO) << "Loaded model " << graph; |
| model_->error_reporter(); |
| TFLITE_LOG(INFO) << "resolved reporter"; |
| |
| auto resolver = GetOpResolver(); |
| |
| const int32_t num_threads = params_.Get<int32_t>("num_threads"); |
| tflite::InterpreterBuilder(*model_, *resolver)(&interpreter_, num_threads); |
| if (!interpreter_) { |
| TFLITE_LOG(FATAL) << "Failed to construct interpreter"; |
| } |
| |
| interpreter_->UseNNAPI(params_.Get<bool>("use_legacy_nnapi")); |
| |
| delegates_ = GetDelegates(); |
| for (const auto& delegate : delegates_) { |
| if (interpreter_->ModifyGraphWithDelegate(delegate.second.get()) != |
| kTfLiteOk) { |
| TFLITE_LOG(FATAL) << "Failed to apply " << delegate.first << " delegate."; |
| } else { |
| TFLITE_LOG(INFO) << "Applied " << delegate.first << " delegate."; |
| } |
| } |
| |
| interpreter_->SetAllowFp16PrecisionForFp32(params_.Get<bool>("allow_fp16")); |
| |
| auto interpreter_inputs = interpreter_->inputs(); |
| |
| if (!inputs_.empty()) { |
| TFLITE_BENCHMARK_CHECK_EQ(inputs_.size(), interpreter_inputs.size()) |
| << "Inputs mismatch: Model inputs #:" << interpreter_inputs.size() |
| << " expected: " << inputs_.size(); |
| } |
| |
| // Check if the tensor names match, and log a warning if it doesn't. |
| // TODO(ycling): Consider to make this an error again when the new converter |
| // create tensors with consistent naming. |
| for (int j = 0; j < inputs_.size(); ++j) { |
| const InputLayerInfo& input = inputs_[j]; |
| int i = interpreter_inputs[j]; |
| TfLiteTensor* t = interpreter_->tensor(i); |
| if (input.name != t->name) { |
| TFLITE_LOG(WARN) << "Tensor # " << i << " is named " << t->name |
| << " but flags call it " << input.name; |
| } |
| } |
| |
| // Resize all non-string tensors. |
| for (int j = 0; j < inputs_.size(); ++j) { |
| const InputLayerInfo& input = inputs_[j]; |
| int i = interpreter_inputs[j]; |
| TfLiteTensor* t = interpreter_->tensor(i); |
| if (t->type != kTfLiteString) { |
| interpreter_->ResizeInputTensor(i, input.shape); |
| } |
| } |
| |
| if (interpreter_->AllocateTensors() != kTfLiteOk) { |
| TFLITE_LOG(FATAL) << "Failed to allocate tensors!"; |
| } |
| |
| // Install profilers if necessary. |
| if (params_.Get<bool>("enable_op_profiling")) { |
| profiling_listener_.reset(new ProfilingListener( |
| interpreter_.get(), |
| params_.Get<int32_t>("max_profiling_buffer_entries"))); |
| AddListener(profiling_listener_.get()); |
| } |
| #ifdef GEMMLOWP_PROFILING |
| gemmlowp_profiling_listener_.reset(new GemmlowpProfilingListener()); |
| AddListener(gemmlowp_profiling_listener_.get()); |
| #endif |
| } |
| |
| BenchmarkTfLiteModel::TfLiteDelegatePtrMap BenchmarkTfLiteModel::GetDelegates() |
| const { |
| TfLiteDelegatePtrMap delegates; |
| if (params_.Get<bool>("use_gpu")) { |
| Interpreter::TfLiteDelegatePtr delegate = |
| evaluation::CreateGPUDelegate(model_.get()); |
| if (!delegate) { |
| TFLITE_LOG(WARN) << "GPU acceleration is unsupported on this platform."; |
| } else { |
| delegates.emplace("GPU", std::move(delegate)); |
| } |
| } |
| if (params_.Get<bool>("use_nnapi")) { |
| StatefulNnApiDelegate::Options options; |
| std::string accelerator_name; |
| if (params_.HasParam("nnapi_accelerator_name")) { |
| accelerator_name = params_.Get<std::string>("nnapi_accelerator_name"); |
| options.accelerator_name = accelerator_name.c_str(); |
| } |
| Interpreter::TfLiteDelegatePtr delegate = |
| evaluation::CreateNNAPIDelegate(options); |
| if (!delegate) { |
| TFLITE_LOG(WARN) << "NNAPI acceleration is unsupported on this platform."; |
| } else { |
| delegates.emplace("NNAPI", std::move(delegate)); |
| } |
| } else if (params_.HasParam("nnapi_accelerator_name")) { |
| TFLITE_LOG(WARN) |
| << "`--use_nnapi=true` must be set for the provided NNAPI accelerator (" |
| << params_.Get<std::string>("nnapi_accelerator_name") |
| << ") to be used."; |
| } |
| return delegates; |
| } |
| |
| std::unique_ptr<tflite::OpResolver> BenchmarkTfLiteModel::GetOpResolver() |
| const { |
| tflite::OpResolver* resolver = nullptr; |
| #ifdef TFLITE_CUSTOM_OPS_HEADER |
| resolver = new tflite::MutableOpResolver(); |
| RegisterSelectedOps(static_cast<tflite::MutableOpResolver*>(resolver)); |
| #else |
| resolver = new tflite::ops::builtin::BuiltinOpResolver(); |
| #endif |
| |
| return std::unique_ptr<tflite::OpResolver>(resolver); |
| } |
| |
| void BenchmarkTfLiteModel::RunImpl() { |
| if (interpreter_->Invoke() != kTfLiteOk) { |
| TFLITE_LOG(FATAL) << "Failed to invoke!"; |
| } |
| } |
| |
| } // namespace benchmark |
| } // namespace tflite |