|  | /** | 
|  | * Copyright (c) 2016-present, Facebook, Inc. | 
|  | * | 
|  | * Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | * you may not use this file except in compliance with the License. | 
|  | * You may obtain a copy of the License at | 
|  | * | 
|  | *     http://www.apache.org/licenses/LICENSE-2.0 | 
|  | * | 
|  | * Unless required by applicable law or agreed to in writing, software | 
|  | * distributed under the License is distributed on an "AS IS" BASIS, | 
|  | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | * See the License for the specific language governing permissions and | 
|  | * limitations under the License. | 
|  | */ | 
|  |  | 
|  | #include <chrono> | 
|  | #include <fstream> | 
|  | #include <string> | 
|  | #include <thread> | 
|  |  | 
|  | #include <binaries/benchmark_helper.h> | 
|  | #include "caffe2/core/blob_serialization.h" | 
|  | #ifdef __CUDA_ARCH__ | 
|  | #include "caffe2/core/context_gpu.h" | 
|  | #endif | 
|  | #include "caffe2/core/init.h" | 
|  | #include "caffe2/core/logging.h" | 
|  | #include "caffe2/core/net.h" | 
|  | #include "caffe2/core/operator.h" | 
|  | #include "caffe2/core/tensor_int8.h" | 
|  | #include "caffe2/utils/bench_utils.h" | 
|  | #include "caffe2/utils/string_utils.h" | 
|  | #include <observers/net_observer_reporter_print.h> | 
|  | #include <observers/observer_config.h> | 
|  | #include <observers/perf_observer.h> | 
|  |  | 
|  | using std::map; | 
|  | using std::shared_ptr; | 
|  | using std::string; | 
|  | using std::unique_ptr; | 
|  | using std::vector; | 
|  |  | 
|  | void observerConfig() { | 
|  | caffe2::ClearGlobalNetObservers(); | 
|  | caffe2::AddGlobalNetObserverCreator([](caffe2::NetBase* subject) { | 
|  | return caffe2::make_unique<caffe2::PerfNetObserver>(subject); | 
|  | }); | 
|  | caffe2::ObserverConfig::setReporter( | 
|  | caffe2::make_unique<caffe2::NetObserverReporterPrint>()); | 
|  | } | 
|  |  | 
|  | bool backendCudaSet(const string& backend) { | 
|  | bool run_on_gpu = false; | 
|  | if (backend == "cuda") { | 
|  | #ifdef __CUDA_ARCH__ | 
|  | if (caffe2::HasCudaGPU()) { | 
|  | run_on_gpu = true; | 
|  | } else { | 
|  | CAFFE_THROW("NO GPU support on this host machine"); | 
|  | } | 
|  | #else | 
|  | CAFFE_THROW("NO GPU support"); | 
|  | #endif | 
|  | } | 
|  | return run_on_gpu; | 
|  | } | 
|  |  | 
|  | void setDeviceType(caffe2::NetDef* net_def, caffe2::DeviceType& run_dev) { | 
|  | for (int j = 0; j < net_def->op_size(); j++) { | 
|  | caffe2::OperatorDef* op = net_def->mutable_op(j); | 
|  | op->mutable_device_option()->set_device_type(caffe2::TypeToProto(run_dev)); | 
|  | } | 
|  | } | 
|  |  | 
|  | void setOperatorEngine(caffe2::NetDef* net_def, const string& backend) { | 
|  | if (backend != "builtin") { | 
|  | string engine = backend == "nnpack" | 
|  | ? "NNPACK" | 
|  | : backend == "eigen" ? "EIGEN" | 
|  | : backend == "mkl" ? "MKLDNN" | 
|  | : backend == "cuda" | 
|  | ? "CUDA" | 
|  | : backend == "dnnlowp" ? "DNNLOWP" | 
|  | : backend == "dnnlowp_acc16" | 
|  | ? "DNNLOWP_ACC16" | 
|  | : backend == "default" ? "" : "NONE"; | 
|  | CAFFE_ENFORCE(engine != "NONE", "Backend is not supported"); | 
|  | for (int i = 0; i < net_def->op_size(); i++) { | 
|  | caffe2::OperatorDef* op_def = net_def->mutable_op(i); | 
|  | op_def->set_engine(engine); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | int loadInput( | 
|  | shared_ptr<caffe2::Workspace> workspace, | 
|  | const bool run_on_gpu, | 
|  | map<string, caffe2::TensorProtos>& tensor_protos_map, | 
|  | const string& input, | 
|  | const string& input_file, | 
|  | const string& input_dims, | 
|  | const string& input_type) { | 
|  | // How many input blobs are in the inputs | 
|  | int blob_num = 1; | 
|  | // Load input. | 
|  | if (input.size()) { | 
|  | vector<string> input_names = caffe2::split(',', input); | 
|  | if (input_file.size()) { | 
|  | vector<string> input_files = caffe2::split(',', input_file); | 
|  | CAFFE_ENFORCE_EQ( | 
|  | input_names.size(), | 
|  | input_files.size(), | 
|  | "Input name and file should have the same number."); | 
|  | for (int i = 0; i < input_names.size(); ++i) { | 
|  | caffe2::TensorProtos tensor_protos; | 
|  | CAFFE_ENFORCE( | 
|  | caffe2::ReadProtoFromFile(input_files[i], &tensor_protos)); | 
|  | workspace->CreateBlob(input_names[i]); | 
|  | tensor_protos_map.insert(std::make_pair(input_names[i], tensor_protos)); | 
|  | } | 
|  | // Check that all blobs have the same number of entries | 
|  | blob_num = tensor_protos_map[input_names[0]].protos_size(); | 
|  | for (int i = 1; i < input_names.size(); ++i) { | 
|  | int bnum = tensor_protos_map[input_names[i]].protos_size(); | 
|  | CAFFE_ENFORCE_EQ( | 
|  | blob_num, | 
|  | bnum, | 
|  | "Number of blobs are not the same for all inputs"); | 
|  | } | 
|  | } else if (input_dims.size() || input_type.size()) { | 
|  | CAFFE_ENFORCE_GE( | 
|  | input_dims.size(), | 
|  | 0, | 
|  | "Input dims must be specified when input tensors are used."); | 
|  | CAFFE_ENFORCE_GE( | 
|  | input_type.size(), | 
|  | 0, | 
|  | "Input type must be specified when input tensors are used."); | 
|  |  | 
|  | vector<string> input_dims_list = caffe2::split(';', input_dims); | 
|  | CAFFE_ENFORCE_EQ( | 
|  | input_names.size(), | 
|  | input_dims_list.size(), | 
|  | "Input name and dims should have the same number of items."); | 
|  | vector<string> input_type_list = caffe2::split(';', input_type); | 
|  | CAFFE_ENFORCE_EQ( | 
|  | input_names.size(), | 
|  | input_type_list.size(), | 
|  | "Input name and type should have the same number of items."); | 
|  | for (size_t i = 0; i < input_names.size(); ++i) { | 
|  | vector<string> input_dims_str = caffe2::split(',', input_dims_list[i]); | 
|  | vector<int> input_dims; | 
|  | for (const string& s : input_dims_str) { | 
|  | input_dims.push_back(c10::stoi(s)); | 
|  | } | 
|  | caffe2::Blob* blob = workspace->GetBlob(input_names[i]); | 
|  | if (blob == nullptr) { | 
|  | blob = workspace->CreateBlob(input_names[i]); | 
|  | } | 
|  | if (run_on_gpu) { | 
|  | LOG(INFO) << "Running on GPU."; | 
|  | #ifdef __CUDA_ARCH__ | 
|  | caffe2::TensorCUDA* tensor = blob->GetMutable<caffe2::TensorCUDA>(); | 
|  | CHECK_NOTNULL(tensor); | 
|  | tensor->Resize(input_dims); | 
|  | if (input_type_list[i] == "uint8_t") { | 
|  | tensor->mutable_data<uint8_t>(); | 
|  | } else if (input_type_list[i] == "float") { | 
|  | tensor->mutable_data<float>(); | 
|  | } else { | 
|  | CAFFE_THROW("Unsupported input type: ", input_type_list[i]); | 
|  | } | 
|  | #else | 
|  | CAFFE_THROW("Not support GPU on mobile."); | 
|  | #endif | 
|  | } else { | 
|  | if (input_type_list[i] == "uint8_t") { | 
|  | caffe2::int8::Int8TensorCPU* tensor = | 
|  | blob->GetMutable<caffe2::int8::Int8TensorCPU>(); | 
|  | CHECK_NOTNULL(tensor); | 
|  | tensor->t.Resize(input_dims); | 
|  | tensor->t.mutable_data<uint8_t>(); | 
|  | } else if (input_type_list[i] == "float") { | 
|  | caffe2::TensorCPU* tensor = BlobGetMutableTensor(blob, caffe2::CPU); | 
|  | CHECK_NOTNULL(tensor); | 
|  | tensor->Resize(input_dims); | 
|  | tensor->mutable_data<float>(); | 
|  | } else if (input_type_list[i] == "int") { | 
|  | caffe2::TensorCPU* tensor = BlobGetMutableTensor(blob, caffe2::CPU); | 
|  | CHECK_NOTNULL(tensor); | 
|  | tensor->Resize(input_dims); | 
|  | tensor->mutable_data<int>(); | 
|  | } else { | 
|  | CAFFE_THROW("Unsupported input type: ", input_type_list[i]); | 
|  | } | 
|  | } | 
|  | } | 
|  | } else { | 
|  | CAFFE_THROW( | 
|  | "You requested input tensors, but neither input_file nor " | 
|  | "input_dims is set."); | 
|  | } | 
|  | } | 
|  | return blob_num; | 
|  | } | 
|  |  | 
|  | void fillInputBlob( | 
|  | shared_ptr<caffe2::Workspace> workspace, | 
|  | map<string, caffe2::TensorProtos>& tensor_protos_map, | 
|  | int iteration) { | 
|  | if (tensor_protos_map.empty()) { | 
|  | return; | 
|  | } | 
|  | static caffe2::TensorDeserializer deserializer; | 
|  | for (auto& tensor_kv : tensor_protos_map) { | 
|  | caffe2::Blob* blob = workspace->GetBlob(tensor_kv.first); | 
|  | if (blob == nullptr) { | 
|  | blob = workspace->CreateBlob(tensor_kv.first); | 
|  | } | 
|  | // todo: support gpu and make this function a tempalte | 
|  | int protos_size = tensor_kv.second.protos_size(); | 
|  | if (protos_size == 1 && iteration > 0) { | 
|  | // Do not override the input data if there is only one input data, | 
|  | // since it will clear all caches. Rely on wipe_cache to | 
|  | // clear caches | 
|  | continue; | 
|  | } | 
|  | caffe2::TensorProto* tensor_proto = | 
|  | tensor_kv.second.mutable_protos(iteration % protos_size); | 
|  | BlobSetTensor(blob, deserializer.Deserialize(*tensor_proto)); | 
|  | // todo: for other types | 
|  | } | 
|  | } | 
|  |  | 
|  | void runNetwork( | 
|  | shared_ptr<caffe2::Workspace> workspace, | 
|  | caffe2::NetDef& net_def, | 
|  | map<string, caffe2::TensorProtos>& tensor_protos_map, | 
|  | const bool wipe_cache, | 
|  | const bool run_individual, | 
|  | const bool run_on_gpu, | 
|  | const bool text_output, | 
|  | const int warmup, | 
|  | const int iter, | 
|  | const int num_blobs, | 
|  | const int sleep_before_run, | 
|  | const int sleep_between_iteration, | 
|  | const int sleep_between_net_and_operator, | 
|  | const std::string& output, | 
|  | const std::string& output_folder) { | 
|  |  | 
|  | if (!net_def.has_name()) { | 
|  | net_def.set_name("benchmark"); | 
|  | } | 
|  |  | 
|  | caffe2::NetBase* net = workspace->CreateNet(net_def); | 
|  | CHECK_NOTNULL(net); | 
|  |  | 
|  | LOG(INFO) << "Starting benchmark."; | 
|  | caffe2::ObserverConfig::initSampleRate(1, 1, 1, run_individual, warmup); | 
|  | LOG(INFO) << "Running warmup runs."; | 
|  | for (int i = 0; i < warmup; ++i) { | 
|  | fillInputBlob(workspace, tensor_protos_map, i); | 
|  | CAFFE_ENFORCE(net->Run(), "Warmup run ", i, " has failed."); | 
|  | } | 
|  |  | 
|  | if (wipe_cache) { | 
|  | caffe2::wipe_cache(); | 
|  | } | 
|  | if (sleep_before_run > 0) { | 
|  | std::this_thread::sleep_for(std::chrono::seconds(sleep_before_run)); | 
|  | } | 
|  | LOG(INFO) << "Main runs."; | 
|  | CAFFE_ENFORCE( | 
|  | iter >= 0, | 
|  | "Number of main runs should be non negative, provided ", | 
|  | iter, | 
|  | "."); | 
|  | LOG(INFO) << "net runs."; | 
|  | for (int i = 0; i < iter; ++i) { | 
|  | caffe2::ObserverConfig::initSampleRate(1, 1, 1, 0, warmup); | 
|  | fillInputBlob(workspace, tensor_protos_map, i); | 
|  | if (wipe_cache) { | 
|  | caffe2::wipe_cache(); | 
|  | } | 
|  | CAFFE_ENFORCE(net->Run(), "Main run ", i, " has failed."); | 
|  | // Write the output for the first num_blobs times | 
|  | writeOutput( | 
|  | workspace, | 
|  | run_on_gpu, | 
|  | output, | 
|  | output_folder, | 
|  | text_output, | 
|  | i, | 
|  | num_blobs); | 
|  | if (wipe_cache) { | 
|  | caffe2::wipe_cache(); | 
|  | } | 
|  | if (sleep_between_iteration > 0) { | 
|  | std::this_thread::sleep_for( | 
|  | std::chrono::seconds(sleep_between_iteration)); | 
|  | } | 
|  | } | 
|  | if (run_individual) { | 
|  | LOG(INFO) << "operator runs."; | 
|  | if (sleep_between_net_and_operator > 0) { | 
|  | std::this_thread::sleep_for( | 
|  | std::chrono::seconds(sleep_between_net_and_operator)); | 
|  | } | 
|  | for (int i = 0; i < iter; ++i) { | 
|  | caffe2::ObserverConfig::initSampleRate(1, 1, 1, 1, warmup); | 
|  | fillInputBlob(workspace, tensor_protos_map, i); | 
|  | CAFFE_ENFORCE(net->Run(), "Main run ", i, " with operator has failed."); | 
|  | if (wipe_cache) { | 
|  | caffe2::wipe_cache(); | 
|  | } | 
|  | if (sleep_between_iteration > 0) { | 
|  | std::this_thread::sleep_for( | 
|  | std::chrono::seconds(sleep_between_iteration)); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void writeOutput( | 
|  | shared_ptr<caffe2::Workspace> workspace, | 
|  | const bool run_on_gpu, | 
|  | const string& output, | 
|  | const string& output_folder, | 
|  | const bool text_output, | 
|  | const int index, | 
|  | const int num_blobs) { | 
|  | if (output.size() == 0) { | 
|  | return; | 
|  | } | 
|  | string output_prefix = output_folder.size() ? output_folder + "/" : ""; | 
|  | vector<string> output_names = caffe2::split(',', output); | 
|  | if (output == "*") { | 
|  | output_names = workspace->Blobs(); | 
|  | } | 
|  | for (const string& name : output_names) { | 
|  | CAFFE_ENFORCE( | 
|  | workspace->HasBlob(name), | 
|  | "You requested a non-existing blob: ", | 
|  | name); | 
|  | if (text_output) { | 
|  | if (run_on_gpu) { | 
|  | #ifdef __CUDA_ARCH__ | 
|  | writeTextOutput<caffe2::CUDAContext, caffe2::TensorCUDA>( | 
|  | workspace->GetBlob(name)->GetMutable<caffe2::TensorCUDA>(), | 
|  | output_prefix, | 
|  | name, | 
|  | index, | 
|  | num_blobs); | 
|  | #else | 
|  | CAFFE_THROW("Not support GPU."); | 
|  | #endif | 
|  | } else { | 
|  | writeTextOutput<caffe2::CPUContext, caffe2::TensorCPU>( | 
|  | BlobGetMutableTensor(workspace->GetBlob(name), caffe2::CPU), | 
|  | output_prefix, | 
|  | name, | 
|  | index, | 
|  | num_blobs); | 
|  | } | 
|  | } else { | 
|  | // Do not support multiple entries per blob. | 
|  | CAFFE_ENFORCE( | 
|  | index == 0, | 
|  | "Binary file only support one output."); | 
|  | string serialized = SerializeBlob(*workspace->GetBlob(name), name); | 
|  | string output_filename = output_prefix + name; | 
|  | caffe2::WriteStringToFile(serialized, output_filename.c_str()); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | int benchmark( | 
|  | int argc, | 
|  | char* argv[], | 
|  | const string& FLAGS_backend, | 
|  | const string& FLAGS_init_net, | 
|  | const string& FLAGS_input, | 
|  | const string& FLAGS_input_dims, | 
|  | const string& FLAGS_input_file, | 
|  | const string& FLAGS_input_type, | 
|  | int FLAGS_iter, | 
|  | const string& FLAGS_net, | 
|  | const string& FLAGS_output, | 
|  | const string& FLAGS_output_folder, | 
|  | bool FLAGS_run_individual, | 
|  | int FLAGS_sleep_before_run, | 
|  | int FLAGS_sleep_between_iteration, | 
|  | int FLAGS_sleep_between_net_and_operator, | 
|  | bool FLAGS_text_output, | 
|  | int FLAGS_warmup, | 
|  | bool FLAGS_wipe_cache) { | 
|  | // Check arguments to be correct | 
|  | { | 
|  | // Need to check whether file exists, as the file reader does not assert if | 
|  | // file does not exist | 
|  | std::ifstream net_file(FLAGS_net); | 
|  | CAFFE_ENFORCE(net_file.good()); | 
|  | net_file.close(); | 
|  |  | 
|  | std::ifstream init_net_file(FLAGS_init_net); | 
|  | CAFFE_ENFORCE(init_net_file.good()); | 
|  | init_net_file.close(); | 
|  |  | 
|  | if (FLAGS_input_file.size() > 0) { | 
|  | vector<string> input_files = caffe2::split(',', FLAGS_input_file); | 
|  | for (auto input_file : input_files) { | 
|  | std::ifstream ifile(input_file); | 
|  | CAFFE_ENFORCE(ifile.good()); | 
|  | ifile.close(); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | observerConfig(); | 
|  | caffe2::ShowLogInfoToStderr(); | 
|  |  | 
|  | auto workspace = std::make_shared<caffe2::Workspace>(new caffe2::Workspace()); | 
|  | bool run_on_gpu = backendCudaSet(FLAGS_backend); | 
|  | // Run initialization network. | 
|  | caffe2::NetDef init_net_def; | 
|  | CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_init_net, &init_net_def)); | 
|  | setOperatorEngine(&init_net_def, FLAGS_backend); | 
|  | CAFFE_ENFORCE(workspace->RunNetOnce(init_net_def)); | 
|  |  | 
|  | // Run main network. | 
|  | caffe2::NetDef net_def; | 
|  | CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_net, &net_def)); | 
|  | setOperatorEngine(&net_def, FLAGS_backend); | 
|  |  | 
|  | map<string, caffe2::TensorProtos> tensor_protos_map; | 
|  |  | 
|  | int num_blobs = loadInput( | 
|  | workspace, | 
|  | run_on_gpu, | 
|  | tensor_protos_map, | 
|  | FLAGS_input, | 
|  | FLAGS_input_file, | 
|  | FLAGS_input_dims, | 
|  | FLAGS_input_type); | 
|  |  | 
|  | runNetwork( | 
|  | workspace, | 
|  | net_def, | 
|  | tensor_protos_map, | 
|  | FLAGS_wipe_cache, | 
|  | FLAGS_run_individual, | 
|  | run_on_gpu, | 
|  | FLAGS_text_output, | 
|  | FLAGS_warmup, | 
|  | FLAGS_iter, | 
|  | num_blobs, | 
|  | FLAGS_sleep_before_run, | 
|  | FLAGS_sleep_between_iteration, | 
|  | FLAGS_sleep_between_net_and_operator, | 
|  | FLAGS_output, | 
|  | FLAGS_output_folder); | 
|  |  | 
|  | return 0; | 
|  | } |