| /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==============================================================================*/ |
| |
| #include "tensorflow/lite/tools/benchmark/benchmark_tflite_model.h" |
| |
| #include <cstdarg> |
| #include <cstdint> |
| #include <cstdlib> |
| #include <fstream> |
| #include <iostream> |
| #include <memory> |
| #include <random> |
| #include <string> |
| #include <unordered_set> |
| #include <vector> |
| |
| #include "absl/base/attributes.h" |
| #include "absl/strings/numbers.h" |
| #include "ruy/profiler/profiler.h" // from @ruy |
| #include "tensorflow/lite/c/common.h" |
| #include "tensorflow/lite/core/subgraph.h" |
| #include "tensorflow/lite/kernels/cpu_backend_context.h" |
| #include "tensorflow/lite/kernels/register.h" |
| #include "tensorflow/lite/model.h" |
| #include "tensorflow/lite/op_resolver.h" |
| #include "tensorflow/lite/optional_debug_tools.h" |
| #include "tensorflow/lite/profiling/profile_summary_formatter.h" |
| #include "tensorflow/lite/string_util.h" |
| #include "tensorflow/lite/tools/benchmark/benchmark_utils.h" |
| #include "tensorflow/lite/tools/benchmark/profiling_listener.h" |
| #include "tensorflow/lite/tools/delegates/delegate_provider.h" |
| #include "tensorflow/lite/tools/logging.h" |
| |
| void RegisterSelectedOps(::tflite::MutableOpResolver* resolver); |
| |
| // Version with Weak linker attribute doing nothing: if someone links this |
| // library with another definition of this function (presumably to actually |
| // register custom ops), that version will be used instead. |
| void ABSL_ATTRIBUTE_WEAK |
| RegisterSelectedOps(::tflite::MutableOpResolver* resolver) {} |
| |
| namespace tflite { |
| namespace benchmark { |
| namespace { |
| |
| // Backward compat with previous approach to enabling op profiling. |
| #if defined(TFLITE_PROFILING_ENABLED) |
| constexpr int kOpProfilingEnabledDefault = true; |
| #else |
| constexpr int kOpProfilingEnabledDefault = false; |
| #endif |
| |
| // Dumps ruy profiling events if the ruy profiler is enabled. |
| class RuyProfileListener : public BenchmarkListener { |
| public: |
| void OnBenchmarkStart(const BenchmarkParams& params) override; |
| |
| void OnBenchmarkEnd(const BenchmarkResults& results) override; |
| |
| private: |
| std::unique_ptr<ruy::profiler::ScopeProfile> ruy_profile_; |
| }; |
| |
| void RuyProfileListener::OnBenchmarkStart(const BenchmarkParams& params) { |
| ruy_profile_.reset(new ruy::profiler::ScopeProfile); |
| } |
| |
| void RuyProfileListener::OnBenchmarkEnd(const BenchmarkResults& results) { |
| ruy_profile_ = nullptr; |
| } |
| |
| class InterpreterStatePrinter : public BenchmarkListener { |
| public: |
| explicit InterpreterStatePrinter(Interpreter* interpreter) |
| : interpreter_(interpreter) {} |
| |
| void OnBenchmarkStart(const BenchmarkParams& params) override { |
| params_ = ¶ms; |
| if (params_->Get<bool>("print_preinvoke_state")) { |
| TFLITE_LOG(INFO) << "\n====Printing out TfLite interpreter pre-invoke " |
| "state begins===="; |
| tflite::PrintInterpreterState(interpreter_); |
| TFLITE_LOG(INFO) << "====Printing out TfLite interpreter pre-invoke " |
| "state ends====\n"; |
| } |
| } |
| |
| void OnBenchmarkEnd(const BenchmarkResults& results) override { |
| if (params_->Get<bool>("print_postinvoke_state")) { |
| TFLITE_LOG(INFO) << "\n====Printing out TfLite interpreter post-invoke " |
| "state begins===="; |
| tflite::PrintInterpreterState(interpreter_); |
| TFLITE_LOG(INFO) << "====Printing out TfLite interpreter post-invoke " |
| "state ends====\n"; |
| } |
| } |
| |
| private: |
| Interpreter* const interpreter_ = nullptr; // not own the memory. |
| const BenchmarkParams* params_ = nullptr; // not own the memory. |
| }; |
| |
| std::vector<std::string> Split(const std::string& str, const char delim) { |
| std::vector<std::string> results; |
| if (!util::SplitAndParse(str, delim, &results)) { |
| results.clear(); |
| } |
| return results; |
| } |
| |
| int GetNumElements(const TfLiteIntArray* dim_array) { |
| int num_elements = 1; |
| for (size_t i = 0; i < dim_array->size; i++) { |
| num_elements *= dim_array->data[i]; |
| } |
| return num_elements; |
| } |
| |
| void FillRandomString(tflite::DynamicBuffer* buffer, |
| const TfLiteIntArray* dim_array, |
| const std::function<std::string()>& random_func) { |
| int num_elements = GetNumElements(dim_array); |
| for (int i = 0; i < num_elements; ++i) { |
| auto str = random_func(); |
| buffer->AddString(str.data(), str.length()); |
| } |
| } |
| |
| int FindLayerInfoIndex(std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info, |
| const std::string& input_name, |
| const string& names_string) { |
| for (int i = 0; i < info->size(); ++i) { |
| if (info->at(i).name == input_name) { |
| return i; |
| } |
| } |
| TFLITE_LOG(FATAL) << "Cannot find the corresponding input_layer name(" |
| << input_name << ") in --input_layer as " << names_string; |
| return -1; |
| } |
| |
| TfLiteStatus PopulateInputValueRanges( |
| const std::string& names_string, const std::string& value_ranges_string, |
| std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) { |
| std::vector<std::string> value_ranges = Split(value_ranges_string, ':'); |
| for (const auto& val : value_ranges) { |
| std::vector<std::string> name_range = Split(val, ','); |
| if (name_range.size() != 3) { |
| TFLITE_LOG(ERROR) << "Wrong input value range item specified: " << val; |
| return kTfLiteError; |
| } |
| |
| // Ensure the specific input layer name exists. |
| int layer_info_idx = FindLayerInfoIndex(info, name_range[0], names_string); |
| |
| // Parse the range value. |
| int low, high; |
| bool has_low = absl::SimpleAtoi(name_range[1], &low); |
| bool has_high = absl::SimpleAtoi(name_range[2], &high); |
| if (!has_low || !has_high || low > high) { |
| TFLITE_LOG(ERROR) |
| << "Wrong low and high value of the input value range specified: " |
| << val; |
| return kTfLiteError; |
| } |
| info->at(layer_info_idx).has_value_range = true; |
| info->at(layer_info_idx).low = low; |
| info->at(layer_info_idx).high = high; |
| } |
| return kTfLiteOk; |
| } |
| |
| TfLiteStatus PopulateInputValueFiles( |
| const std::string& names_string, const std::string& value_files_string, |
| std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) { |
| std::vector<std::string> value_files = Split(value_files_string, ','); |
| for (const auto& val : value_files) { |
| std::vector<std::string> name_file = Split(val, ':'); |
| if (name_file.size() != 2) { |
| TFLITE_LOG(ERROR) << "Wrong input value file item specified: " << val; |
| return kTfLiteError; |
| } |
| |
| // Ensure the specific input layer name exists. |
| int layer_info_idx = FindLayerInfoIndex(info, name_file[0], names_string); |
| if (info->at(layer_info_idx).has_value_range) { |
| TFLITE_LOG(WARN) |
| << "The input_name:" << info->at(layer_info_idx).name |
| << " appears both in input_layer_value_files and " |
| "input_layer_value_range. The input_layer_value_range of the " |
| "input_name will be ignored."; |
| } |
| info->at(layer_info_idx).input_file_path = name_file[1]; |
| } |
| return kTfLiteOk; |
| } |
| |
| TfLiteStatus PopulateInputLayerInfo( |
| const std::string& names_string, const std::string& shapes_string, |
| const std::string& value_ranges_string, |
| const std::string& value_files_string, |
| std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) { |
| info->clear(); |
| std::vector<std::string> names = Split(names_string, ','); |
| std::vector<std::string> shapes = Split(shapes_string, ':'); |
| |
| if (names.size() != shapes.size()) { |
| TFLITE_LOG(ERROR) << "The number of items in" |
| << " --input_layer_shape (" << shapes_string << ", with " |
| << shapes.size() << " items)" |
| << " must match the number of items in" |
| << " --input_layer (" << names_string << ", with " |
| << names.size() << " items)." |
| << " For example --input_layer=input1,input2" |
| << " --input_layer_shape=1,224,224,4:1,20"; |
| return kTfLiteError; |
| } |
| |
| for (int i = 0; i < names.size(); ++i) { |
| info->push_back(BenchmarkTfLiteModel::InputLayerInfo()); |
| BenchmarkTfLiteModel::InputLayerInfo& input = info->back(); |
| |
| input.name = names[i]; |
| |
| TFLITE_TOOLS_CHECK(util::SplitAndParse(shapes[i], ',', &input.shape)) |
| << "Incorrect size string specified: " << shapes[i]; |
| for (int dim : input.shape) { |
| if (dim == -1) { |
| TFLITE_LOG(ERROR) |
| << "Any unknown sizes in the shapes (-1's) must be replaced" |
| << " with the size you want to benchmark with."; |
| return kTfLiteError; |
| } |
| } |
| } |
| |
| // Populate input value range if it's specified. |
| TF_LITE_ENSURE_STATUS( |
| PopulateInputValueRanges(names_string, value_ranges_string, info)); |
| |
| // Populate input value files if it's specified. |
| TF_LITE_ENSURE_STATUS( |
| PopulateInputValueFiles(names_string, value_files_string, info)); |
| |
| return kTfLiteOk; |
| } |
| |
| std::shared_ptr<profiling::ProfileSummaryFormatter> |
| CreateProfileSummaryFormatter(bool format_as_csv) { |
| return format_as_csv |
| ? std::make_shared<profiling::ProfileSummaryCSVFormatter>() |
| : std::make_shared<profiling::ProfileSummaryDefaultFormatter>(); |
| } |
| |
| } // namespace |
| |
| BenchmarkParams BenchmarkTfLiteModel::DefaultParams() { |
| BenchmarkParams default_params = BenchmarkModel::DefaultParams(); |
| default_params.AddParam("graph", BenchmarkParam::Create<std::string>("")); |
| default_params.AddParam("input_layer", |
| BenchmarkParam::Create<std::string>("")); |
| default_params.AddParam("input_layer_shape", |
| BenchmarkParam::Create<std::string>("")); |
| default_params.AddParam("input_layer_value_range", |
| BenchmarkParam::Create<std::string>("")); |
| default_params.AddParam("input_layer_value_files", |
| BenchmarkParam::Create<std::string>("")); |
| default_params.AddParam("allow_fp16", BenchmarkParam::Create<bool>(false)); |
| default_params.AddParam("require_full_delegation", |
| BenchmarkParam::Create<bool>(false)); |
| default_params.AddParam( |
| "enable_op_profiling", |
| BenchmarkParam::Create<bool>(kOpProfilingEnabledDefault)); |
| default_params.AddParam("max_profiling_buffer_entries", |
| BenchmarkParam::Create<int32_t>(1024)); |
| default_params.AddParam("profiling_output_csv_file", |
| BenchmarkParam::Create<std::string>("")); |
| |
| default_params.AddParam("print_preinvoke_state", |
| BenchmarkParam::Create<bool>(false)); |
| default_params.AddParam("print_postinvoke_state", |
| BenchmarkParam::Create<bool>(false)); |
| |
| for (const auto& delegate_provider : |
| tools::GetRegisteredDelegateProviders()) { |
| default_params.Merge(delegate_provider->DefaultParams()); |
| } |
| |
| return default_params; |
| } |
| |
| BenchmarkTfLiteModel::BenchmarkTfLiteModel(BenchmarkParams params) |
| : BenchmarkModel(std::move(params)), |
| random_engine_(std::random_device()()) { |
| AddListener(&log_output_); |
| } |
| |
| void BenchmarkTfLiteModel::CleanUp() { |
| // Free up any pre-allocated tensor data during PrepareInputData. |
| inputs_data_.clear(); |
| } |
| |
| BenchmarkTfLiteModel::~BenchmarkTfLiteModel() { |
| CleanUp(); |
| |
| // Destory the owned interpreter earlier than other objects (specially |
| // 'owned_delegates_'). |
| interpreter_.reset(); |
| } |
| |
| std::vector<Flag> BenchmarkTfLiteModel::GetFlags() { |
| std::vector<Flag> flags = BenchmarkModel::GetFlags(); |
| std::vector<Flag> specific_flags = { |
| CreateFlag<std::string>("graph", ¶ms_, "graph file name"), |
| CreateFlag<std::string>("input_layer", ¶ms_, "input layer names"), |
| CreateFlag<std::string>("input_layer_shape", ¶ms_, |
| "input layer shape"), |
| CreateFlag<std::string>( |
| "input_layer_value_range", ¶ms_, |
| "A map-like string representing value range for *integer* input " |
| "layers. Each item is separated by ':', and the item value consists " |
| "of input layer name and integer-only range values (both low and " |
| "high are inclusive) separated by ',', e.g. input1,1,2:input2,0,254"), |
| CreateFlag<std::string>( |
| "input_layer_value_files", ¶ms_, |
| "A map-like string representing value file. Each item is separated " |
| "by ',', and the item value consists " |
| "of input layer name and value file path separated by ':', e.g. " |
| "input1:file_path1,input2:file_path2. If the input_name appears both " |
| "in input_layer_value_range and input_layer_value_files, " |
| "input_layer_value_range of the input_name will be ignored. The file " |
| "format is binary and it should be array format or null separated " |
| "strings format."), |
| CreateFlag<bool>("allow_fp16", ¶ms_, "allow fp16"), |
| CreateFlag<bool>("require_full_delegation", ¶ms_, |
| "require delegate to run the entire graph"), |
| CreateFlag<bool>("enable_op_profiling", ¶ms_, "enable op profiling"), |
| CreateFlag<int32_t>("max_profiling_buffer_entries", ¶ms_, |
| "max profiling buffer entries"), |
| CreateFlag<std::string>( |
| "profiling_output_csv_file", ¶ms_, |
| "File path to export profile data as CSV, if not set " |
| "prints to stdout."), |
| CreateFlag<bool>( |
| "print_preinvoke_state", ¶ms_, |
| "print out the interpreter internals just before calling Invoke. The " |
| "internals will include allocated memory size of each tensor etc."), |
| CreateFlag<bool>( |
| "print_postinvoke_state", ¶ms_, |
| "print out the interpreter internals just before benchmark completes " |
| "(i.e. after all repeated Invoke calls complete). The internals will " |
| "include allocated memory size of each tensor etc.")}; |
| |
| flags.insert(flags.end(), specific_flags.begin(), specific_flags.end()); |
| |
| for (const auto& delegate_provider : |
| tools::GetRegisteredDelegateProviders()) { |
| auto delegate_flags = delegate_provider->CreateFlags(¶ms_); |
| flags.insert(flags.end(), delegate_flags.begin(), delegate_flags.end()); |
| } |
| |
| return flags; |
| } |
| |
| void BenchmarkTfLiteModel::LogParams() { |
| BenchmarkModel::LogParams(); |
| const bool verbose = params_.Get<bool>("verbose"); |
| // Always log the value of --graph. |
| LOG_BENCHMARK_PARAM(std::string, "graph", "Graph", /*verbose*/ true); |
| LOG_BENCHMARK_PARAM(std::string, "input_layer", "Input layers", verbose); |
| LOG_BENCHMARK_PARAM(std::string, "input_layer_shape", "Input shapes", |
| verbose); |
| LOG_BENCHMARK_PARAM(std::string, "input_layer_value_range", |
| "Input value ranges", verbose); |
| LOG_BENCHMARK_PARAM(std::string, "input_layer_value_files", |
| "Input value files", verbose); |
| |
| LOG_BENCHMARK_PARAM(bool, "allow_fp16", "Allow fp16", verbose); |
| LOG_BENCHMARK_PARAM(bool, "require_full_delegation", |
| "Require full delegation", verbose); |
| LOG_BENCHMARK_PARAM(bool, "enable_op_profiling", "Enable op profiling", |
| verbose); |
| LOG_BENCHMARK_PARAM(int32_t, "max_profiling_buffer_entries", |
| "Max profiling buffer entries", verbose); |
| LOG_BENCHMARK_PARAM(std::string, "profiling_output_csv_file", |
| "CSV File to export profiling data to", verbose); |
| LOG_BENCHMARK_PARAM(bool, "print_preinvoke_state", |
| "Print pre-invoke interpreter state", verbose); |
| LOG_BENCHMARK_PARAM(bool, "print_postinvoke_state", |
| "Print post-invoke interpreter state", verbose); |
| |
| for (const auto& delegate_provider : |
| tools::GetRegisteredDelegateProviders()) { |
| delegate_provider->LogParams(params_, verbose); |
| } |
| } |
| |
| TfLiteStatus BenchmarkTfLiteModel::ValidateParams() { |
| if (params_.Get<std::string>("graph").empty()) { |
| TFLITE_LOG(ERROR) |
| << "Please specify the name of your TF Lite input file with --graph"; |
| return kTfLiteError; |
| } |
| |
| return PopulateInputLayerInfo( |
| params_.Get<std::string>("input_layer"), |
| params_.Get<std::string>("input_layer_shape"), |
| params_.Get<std::string>("input_layer_value_range"), |
| params_.Get<std::string>("input_layer_value_files"), &inputs_); |
| } |
| |
| uint64_t BenchmarkTfLiteModel::ComputeInputBytes() { |
| TFLITE_TOOLS_CHECK(interpreter_); |
| uint64_t total_input_bytes = 0; |
| for (int input : interpreter_->inputs()) { |
| auto* t = interpreter_->tensor(input); |
| total_input_bytes += t->bytes; |
| } |
| return total_input_bytes; |
| } |
| |
| int64_t BenchmarkTfLiteModel::MayGetModelFileSize() { |
| std::ifstream in_file(params_.Get<std::string>("graph"), |
| std::ios::binary | std::ios::ate); |
| return in_file.tellg(); |
| } |
| |
| BenchmarkTfLiteModel::InputTensorData BenchmarkTfLiteModel::LoadInputTensorData( |
| const TfLiteTensor& t, const std::string& input_file_path) { |
| std::ifstream value_file(input_file_path, std::ios::binary); |
| if (!value_file.good()) { |
| TFLITE_LOG(FATAL) << "Failed to read the input_layer_value_file:" |
| << input_file_path; |
| } |
| InputTensorData t_data; |
| if (t.type == kTfLiteString) { |
| t_data.data = VoidUniquePtr( |
| static_cast<void*>(new tflite::DynamicBuffer()), |
| [](void* ptr) { delete static_cast<DynamicBuffer*>(ptr); }); |
| std::string line; |
| size_t num_line = 0; |
| // Read the line with the delimiter '\0'. |
| while (std::getline(value_file, line, '\0')) { |
| num_line++; |
| static_cast<DynamicBuffer*>(t_data.data.get()) |
| ->AddString(line.data(), line.length()); |
| } |
| int num_elements = GetNumElements(t.dims); |
| if (num_line != num_elements) { |
| TFLITE_LOG(FATAL) << "The number of string in the input_layer_value_file(" |
| << input_file_path << ") is " << num_line |
| << ". It should be " << num_elements << "."; |
| } |
| } else { |
| value_file.seekg(0, std::ios_base::end); |
| if (value_file.tellg() != t.bytes) { |
| TFLITE_LOG(FATAL) << "The size of " << input_file_path << " is " |
| << value_file.tellg() << " bytes. It should be " |
| << t.bytes << " bytes."; |
| } |
| t_data.bytes = t.bytes; |
| t_data.data = |
| VoidUniquePtr(static_cast<void*>(new char[t.bytes]), |
| [](void* ptr) { delete[] static_cast<char*>(ptr); }); |
| value_file.clear(); |
| value_file.seekg(0, std::ios_base::beg); |
| value_file.read(static_cast<char*>(t_data.data.get()), t.bytes); |
| } |
| return t_data; |
| } |
| |
| BenchmarkTfLiteModel::InputTensorData |
| BenchmarkTfLiteModel::CreateRandomTensorData(const TfLiteTensor& t, |
| const InputLayerInfo* layer_info) { |
| bool has_value_range = false; |
| int low_range = 0; |
| int high_range = 0; |
| if (layer_info) { |
| has_value_range = layer_info->has_value_range; |
| low_range = layer_info->low; |
| high_range = layer_info->high; |
| } |
| int num_elements = GetNumElements(t.dims); |
| switch (t.type) { |
| case kTfLiteFloat32: { |
| return CreateInputTensorData<float>( |
| num_elements, std::uniform_real_distribution<float>(-0.5f, 0.5f)); |
| } |
| case kTfLiteFloat16: { |
| // TODO(b/138843274): Remove this preprocessor guard when bug is fixed. |
| #if TFLITE_ENABLE_FP16_CPU_BENCHMARKS |
| #if __GNUC__ && \ |
| (__clang__ || __ARM_FP16_FORMAT_IEEE || __ARM_FP16_FORMAT_ALTERNATIVE) |
| // __fp16 is available on Clang or when __ARM_FP16_FORMAT_* is defined. |
| return CreateInputTensorData<__fp16>( |
| num_elements, std::uniform_real_distribution<float>(-0.5f, 0.5f)); |
| #else |
| TFLITE_LOG(FATAL) << "Don't know how to populate tensor " << t->name |
| << " of type FLOAT16 on this platform."; |
| #endif |
| #else |
| // You need to build with -DTFLITE_ENABLE_FP16_CPU_BENCHMARKS=1 using a |
| // compiler that supports __fp16 type. Note: when using Clang and *not* |
| // linking with compiler-rt, a definition of __gnu_h2f_ieee and |
| // __gnu_f2h_ieee must be supplied. |
| TFLITE_LOG(FATAL) << "Populating the tensor " << t.name |
| << " of type FLOAT16 is disabled."; |
| #endif // TFLITE_ENABLE_FP16_CPU_BENCHMARKS |
| break; |
| } |
| case kTfLiteFloat64: { |
| return CreateInputTensorData<double>( |
| num_elements, std::uniform_real_distribution<double>(-0.5, 0.5)); |
| } |
| case kTfLiteInt64: { |
| int low = has_value_range ? low_range : 0; |
| int high = has_value_range ? high_range : 99; |
| return CreateInputTensorData<int64_t>( |
| num_elements, std::uniform_int_distribution<int64_t>(low, high)); |
| } |
| case kTfLiteInt32: { |
| int low = has_value_range ? low_range : 0; |
| int high = has_value_range ? high_range : 99; |
| return CreateInputTensorData<int32_t>( |
| num_elements, std::uniform_int_distribution<int32_t>(low, high)); |
| } |
| case kTfLiteUInt32: { |
| int low = has_value_range ? low_range : 0; |
| int high = has_value_range ? high_range : 99; |
| return CreateInputTensorData<uint32_t>( |
| num_elements, std::uniform_int_distribution<uint32_t>(low, high)); |
| } |
| case kTfLiteInt16: { |
| int low = has_value_range ? low_range : 0; |
| int high = has_value_range ? high_range : 99; |
| return CreateInputTensorData<int16_t>( |
| num_elements, std::uniform_int_distribution<int16_t>(low, high)); |
| } |
| case kTfLiteUInt8: { |
| int low = has_value_range ? low_range : 0; |
| int high = has_value_range ? high_range : 254; |
| // std::uniform_int_distribution is specified not to support char types. |
| return CreateInputTensorData<uint8_t>( |
| num_elements, std::uniform_int_distribution<uint32_t>(low, high)); |
| } |
| case kTfLiteInt8: { |
| int low = has_value_range ? low_range : -127; |
| int high = has_value_range ? high_range : 127; |
| // std::uniform_int_distribution is specified not to support char types. |
| return CreateInputTensorData<int8_t>( |
| num_elements, std::uniform_int_distribution<int32_t>(low, high)); |
| } |
| case kTfLiteString: { |
| // Don't populate input for string. Instead, return a default-initialized |
| // `InputTensorData` object directly. |
| break; |
| } |
| case kTfLiteBool: { |
| // According to std::uniform_int_distribution specification, non-int type |
| // is not supported. |
| return CreateInputTensorData<bool>( |
| num_elements, std::uniform_int_distribution<uint32_t>(0, 1)); |
| } |
| default: { |
| TFLITE_LOG(FATAL) << "Don't know how to populate tensor " << t.name |
| << " of type " << t.type; |
| } |
| } |
| return InputTensorData(); |
| } |
| |
| TfLiteStatus BenchmarkTfLiteModel::PrepareInputData() { |
| CleanUp(); |
| |
| // Note the corresponding relation between 'interpreter_inputs' and 'inputs_' |
| // (i.e. the specified input layer info) has been checked in |
| // BenchmarkTfLiteModel::Init() before calling this function. So, we simply |
| // use the corresponding input layer info to initializethe input data value |
| // properly. |
| auto interpreter_inputs = interpreter_->inputs(); |
| for (int i = 0; i < interpreter_inputs.size(); ++i) { |
| int tensor_index = interpreter_inputs[i]; |
| const TfLiteTensor& t = *(interpreter_->tensor(tensor_index)); |
| const InputLayerInfo* input_layer_info = nullptr; |
| // Note that when input layer parameters (i.e. --input_layer, |
| // --input_layer_shape) are not specified, inputs_ is empty. |
| if (!inputs_.empty()) input_layer_info = &inputs_[i]; |
| |
| InputTensorData t_data; |
| if (input_layer_info && !input_layer_info->input_file_path.empty()) { |
| t_data = LoadInputTensorData(t, input_layer_info->input_file_path); |
| } else { |
| t_data = CreateRandomTensorData(t, input_layer_info); |
| } |
| inputs_data_.push_back(std::move(t_data)); |
| } |
| return kTfLiteOk; |
| } |
| |
| TfLiteStatus BenchmarkTfLiteModel::ResetInputsAndOutputs() { |
| auto interpreter_inputs = interpreter_->inputs(); |
| // Set the values of the input tensors from inputs_data_. |
| for (int j = 0; j < interpreter_inputs.size(); ++j) { |
| int i = interpreter_inputs[j]; |
| TfLiteTensor* t = interpreter_->tensor(i); |
| if (t->type == kTfLiteString) { |
| if (inputs_data_[j].data) { |
| static_cast<DynamicBuffer*>(inputs_data_[j].data.get()) |
| ->WriteToTensor(t, /*new_shape=*/nullptr); |
| } else { |
| tflite::DynamicBuffer buffer; |
| FillRandomString(&buffer, t->dims, []() { |
| return "we're have some friends over saturday to hang out in the " |
| "yard"; |
| }); |
| buffer.WriteToTensor(t, /*new_shape=*/nullptr); |
| } |
| } else { |
| std::memcpy(t->data.raw, inputs_data_[j].data.get(), |
| inputs_data_[j].bytes); |
| } |
| } |
| |
| return kTfLiteOk; |
| } |
| |
| TfLiteStatus BenchmarkTfLiteModel::InitInterpreter() { |
| auto resolver = GetOpResolver(); |
| const int32_t num_threads = params_.Get<int32_t>("num_threads"); |
| const bool use_caching = params_.Get<bool>("use_caching"); |
| tflite::InterpreterBuilder(*model_, *resolver)(&interpreter_, num_threads); |
| if (!interpreter_) { |
| TFLITE_LOG(ERROR) << "Failed to initialize the interpreter"; |
| return kTfLiteError; |
| } |
| // Manually enable caching behavior in TF Lite interpreter. |
| if (use_caching) { |
| external_context_.reset(new tflite::ExternalCpuBackendContext()); |
| std::unique_ptr<tflite::CpuBackendContext> cpu_backend_context( |
| new tflite::CpuBackendContext()); |
| cpu_backend_context->SetUseCaching(true); |
| cpu_backend_context->SetMaxNumThreads(num_threads); |
| external_context_->set_internal_backend_context( |
| std::move(cpu_backend_context)); |
| interpreter_->SetExternalContext(kTfLiteCpuBackendContext, |
| external_context_.get()); |
| } |
| |
| return kTfLiteOk; |
| } |
| |
| TfLiteStatus BenchmarkTfLiteModel::Init() { |
| TF_LITE_ENSURE_STATUS(LoadModel()); |
| TF_LITE_ENSURE_STATUS(InitInterpreter()); |
| |
| // Install profilers if necessary right after interpreter is created so that |
| // any memory allocations inside the TFLite runtime could be recorded if the |
| // installed profiler profile memory usage information. |
| |
| // Adjust "max_profiling_buffer_entries" according to the loaded model. |
| int total_nodes = 0; |
| for (int i = 0; i < interpreter_->subgraphs_size(); ++i) { |
| // subgraph(...) is non-const member method. |
| total_nodes += static_cast<int>(interpreter_->subgraph(i)->nodes_size()); |
| } |
| if (total_nodes > params_.Get<int32_t>("max_profiling_buffer_entries")) { |
| constexpr int kProfilingBufferHeadrooms = 512; |
| params_.Set<int32_t>("max_profiling_buffer_entries", |
| total_nodes + kProfilingBufferHeadrooms); |
| } |
| profiling_listener_ = MayCreateProfilingListener(); |
| if (profiling_listener_) AddListener(profiling_listener_.get()); |
| |
| interpreter_state_printer_ = std::unique_ptr<BenchmarkListener>( |
| new InterpreterStatePrinter(interpreter_.get())); |
| AddListener(interpreter_state_printer_.get()); |
| |
| interpreter_->SetAllowFp16PrecisionForFp32(params_.Get<bool>("allow_fp16")); |
| |
| owned_delegates_.clear(); |
| |
| // Contains all ids of TfLiteNodes that have been checked to see whether it's |
| // delegated or not. |
| std::unordered_set<int> checked_node_ids; |
| for (const auto& delegate_provider : |
| tools::GetRegisteredDelegateProviders()) { |
| auto delegate = delegate_provider->CreateTfLiteDelegate(params_); |
| // It's possible that a delegate of certain type won't be created as |
| // user-specified benchmark params tells not to. |
| if (delegate == nullptr) continue; |
| if (interpreter_->ModifyGraphWithDelegate(delegate.get()) != kTfLiteOk) { |
| TFLITE_LOG(ERROR) << "Failed to apply " << delegate_provider->GetName() |
| << " delegate."; |
| return kTfLiteError; |
| } else { |
| // Ideally, such delegate info should already be computed when the |
| // delegate is being applied to the model graph. |
| int num_delegated_kernels = 0; |
| for (int i = 0; i < interpreter_->execution_plan().size(); ++i) { |
| int node_id = interpreter_->execution_plan()[i]; |
| if (checked_node_ids.find(node_id) != checked_node_ids.end()) { |
| continue; |
| } |
| const TfLiteNode& node = |
| interpreter_->node_and_registration(node_id)->first; |
| |
| // Note that the 'delegate' here could be an ExternalDelegateWrapper |
| // object that wraps an actual external delegate, in which case, |
| // 'node.delegate' will be different from 'delegate' because |
| // 'node.delegate' refers to the actual external delegate. |
| if (node.delegate != nullptr) { |
| num_delegated_kernels++; |
| checked_node_ids.insert(node_id); |
| } |
| } |
| bool fully_delegated = (num_delegated_kernels == 1 && |
| interpreter_->execution_plan().size() == 1); |
| |
| if (params_.Get<bool>("require_full_delegation") && !fully_delegated) { |
| TFLITE_LOG(ERROR) << "Disallowed CPU fallback detected."; |
| return kTfLiteError; |
| } |
| if (fully_delegated) { |
| TFLITE_LOG(INFO) << "Explicitly applied " |
| << delegate_provider->GetName() |
| << " delegate, and the model graph will be completely" |
| << " executed by the delegate."; |
| } else if (num_delegated_kernels > 0) { |
| TFLITE_LOG(INFO) << "Explicitly applied " |
| << delegate_provider->GetName() |
| << " delegate, and the model graph will be partially" |
| << " executed by the delegate w/ " |
| << num_delegated_kernels << " delegate kernels."; |
| } else { |
| TFLITE_LOG(INFO) |
| << "Though " << delegate_provider->GetName() |
| << " delegate is explicitly applied, the model graph will not be" |
| << " executed by the delegate."; |
| } |
| } |
| owned_delegates_.emplace_back(std::move(delegate)); |
| } |
| |
| auto interpreter_inputs = interpreter_->inputs(); |
| |
| if (!inputs_.empty()) { |
| TFLITE_TOOLS_CHECK_EQ(inputs_.size(), interpreter_inputs.size()) |
| << "Inputs mismatch: Model inputs #:" << inputs_.size() |
| << " expected: " << interpreter_inputs.size(); |
| } |
| |
| // Check if the tensor names match, and log a warning if it doesn't. |
| for (int j = 0; j < inputs_.size(); ++j) { |
| const InputLayerInfo& input = inputs_[j]; |
| int i = interpreter_inputs[j]; |
| TfLiteTensor* t = interpreter_->tensor(i); |
| if (input.name != t->name) { |
| TFLITE_LOG(WARN) << "Tensor # " << i << " is named " << t->name |
| << " but flags call it " << input.name; |
| } |
| |
| if (t->type != kTfLiteString && input.shape.size() != t->dims->size) { |
| TFLITE_LOG(ERROR) << "Input tensor #" << i << " should have " |
| << t->dims->size << " dimensions!"; |
| return kTfLiteError; |
| } |
| } |
| |
| // Resize all non-string tensors. |
| for (int j = 0; j < inputs_.size(); ++j) { |
| const InputLayerInfo& input = inputs_[j]; |
| int i = interpreter_inputs[j]; |
| TfLiteTensor* t = interpreter_->tensor(i); |
| if (t->type != kTfLiteString) { |
| interpreter_->ResizeInputTensor(i, input.shape); |
| } |
| } |
| |
| if (interpreter_->AllocateTensors() != kTfLiteOk) { |
| TFLITE_LOG(ERROR) << "Failed to allocate tensors!"; |
| return kTfLiteError; |
| } |
| |
| ruy_profiling_listener_.reset(new RuyProfileListener()); |
| AddListener(ruy_profiling_listener_.get()); |
| |
| return kTfLiteOk; |
| } |
| |
| TfLiteStatus BenchmarkTfLiteModel::LoadModel() { |
| std::string graph = params_.Get<std::string>("graph"); |
| model_ = tflite::FlatBufferModel::BuildFromFile(graph.c_str()); |
| if (!model_) { |
| TFLITE_LOG(ERROR) << "Failed to mmap model " << graph; |
| return kTfLiteError; |
| } |
| TFLITE_LOG(INFO) << "Loaded model " << graph; |
| return kTfLiteOk; |
| } |
| |
| std::unique_ptr<tflite::OpResolver> BenchmarkTfLiteModel::GetOpResolver() |
| const { |
| tflite::ops::builtin::BuiltinOpResolver* resolver = nullptr; |
| // When --use_xnnpack is explicitly set to false, skip applying the default |
| // XNNPACK delegate in TfLite runtime so that the original execution path |
| // based on the unmodified model graph is still excercised. |
| if (params_.HasParam("use_xnnpack") && |
| params_.HasValueSet<bool>("use_xnnpack") && |
| !params_.Get<bool>("use_xnnpack")) { |
| resolver = |
| new tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates(); |
| } else { |
| resolver = new tflite::ops::builtin::BuiltinOpResolver(); |
| } |
| RegisterSelectedOps(resolver); |
| return std::unique_ptr<tflite::OpResolver>(resolver); |
| } |
| |
| std::unique_ptr<BenchmarkListener> |
| BenchmarkTfLiteModel::MayCreateProfilingListener() const { |
| if (!params_.Get<bool>("enable_op_profiling")) return nullptr; |
| |
| return std::unique_ptr<BenchmarkListener>(new ProfilingListener( |
| interpreter_.get(), params_.Get<int32_t>("max_profiling_buffer_entries"), |
| params_.Get<std::string>("profiling_output_csv_file"), |
| CreateProfileSummaryFormatter( |
| !params_.Get<std::string>("profiling_output_csv_file").empty()))); |
| } |
| |
| TfLiteStatus BenchmarkTfLiteModel::RunImpl() { return interpreter_->Invoke(); } |
| |
| } // namespace benchmark |
| } // namespace tflite |