blob: a6aa7502e641e319bf542acf6cfe1b7277b20a40 [file] [log] [blame]
/**
* Copyright (c) 2016-present, Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <chrono>
#include <fstream>
#include <string>
#include <thread>
#include "binaries/benchmark_helper.h"
#include "caffe2/core/blob_serialization.h"
#ifdef __CUDA_ARCH__
#include "caffe2/core/context_gpu.h"
#endif
#include "caffe2/core/init.h"
#include "caffe2/core/logging.h"
#include "caffe2/core/net.h"
#include "caffe2/core/operator.h"
#include "caffe2/core/tensor_int8.h"
#include "caffe2/utils/bench_utils.h"
#include "caffe2/utils/string_utils.h"
#include "observers/net_observer_reporter_print.h"
#include "observers/observer_config.h"
#include "observers/perf_observer.h"
using std::map;
using std::shared_ptr;
using std::string;
using std::unique_ptr;
using std::vector;
void observerConfig() {
caffe2::ClearGlobalNetObservers();
caffe2::AddGlobalNetObserverCreator([](caffe2::NetBase* subject) {
return caffe2::make_unique<caffe2::PerfNetObserver>(subject);
});
caffe2::ObserverConfig::setReporter(
caffe2::make_unique<caffe2::NetObserverReporterPrint>());
}
bool backendCudaSet(const string& backend) {
bool run_on_gpu = false;
if (backend == "cuda") {
#ifdef __CUDA_ARCH__
if (caffe2::HasCudaGPU()) {
run_on_gpu = true;
} else {
CAFFE_THROW("NO GPU support on this host machine");
}
#else
CAFFE_THROW("NO GPU support");
#endif
}
return run_on_gpu;
}
void setDeviceType(caffe2::NetDef* net_def, caffe2::DeviceType& run_dev) {
for (int j = 0; j < net_def->op_size(); j++) {
caffe2::OperatorDef* op = net_def->mutable_op(j);
op->mutable_device_option()->set_device_type(caffe2::TypeToProto(run_dev));
}
}
void setOperatorEngine(caffe2::NetDef* net_def, const string& backend) {
if (backend != "builtin") {
string engine = backend == "nnpack"
? "NNPACK"
: backend == "eigen" ? "EIGEN"
: backend == "mkl" ? "MKLDNN"
: backend == "cuda"
? "CUDA"
: backend == "dnnlowp" ? "DNNLOWP"
: backend == "dnnlowp_acc16"
? "DNNLOWP_ACC16"
: backend == "default" ? "" : "NONE";
CAFFE_ENFORCE(engine != "NONE", "Backend is not supported");
for (int i = 0; i < net_def->op_size(); i++) {
caffe2::OperatorDef* op_def = net_def->mutable_op(i);
op_def->set_engine(engine);
}
}
}
void loadInput(
shared_ptr<caffe2::Workspace> workspace,
const bool run_on_gpu,
map<string, caffe2::TensorProtos>& tensor_protos_map,
const string& input,
const string& input_file,
const string& input_dims,
const string& input_type) {
// Load input.
if (input.size()) {
vector<string> input_names = caffe2::split(',', input);
if (input_file.size()) {
vector<string> input_files = caffe2::split(',', input_file);
CAFFE_ENFORCE_EQ(
input_names.size(),
input_files.size(),
"Input name and file should have the same number.");
for (int i = 0; i < input_names.size(); ++i) {
caffe2::TensorProtos tensor_protos;
CAFFE_ENFORCE(
caffe2::ReadProtoFromFile(input_files[i], &tensor_protos));
workspace->CreateBlob(input_names[i]);
tensor_protos_map.insert(std::make_pair(input_names[i], tensor_protos));
}
} else if (input_dims.size() || input_type.size()) {
CAFFE_ENFORCE_GE(
input_dims.size(),
0,
"Input dims must be specified when input tensors are used.");
CAFFE_ENFORCE_GE(
input_type.size(),
0,
"Input type must be specified when input tensors are used.");
vector<string> input_dims_list = caffe2::split(';', input_dims);
CAFFE_ENFORCE_EQ(
input_names.size(),
input_dims_list.size(),
"Input name and dims should have the same number of items.");
vector<string> input_type_list = caffe2::split(';', input_type);
CAFFE_ENFORCE_EQ(
input_names.size(),
input_type_list.size(),
"Input name and type should have the same number of items.");
for (size_t i = 0; i < input_names.size(); ++i) {
vector<string> input_dims_str = caffe2::split(',', input_dims_list[i]);
vector<int> input_dims;
for (const string& s : input_dims_str) {
input_dims.push_back(caffe2::stoi(s));
}
caffe2::Blob* blob = workspace->GetBlob(input_names[i]);
if (blob == nullptr) {
blob = workspace->CreateBlob(input_names[i]);
}
if (run_on_gpu) {
LOG(INFO) << "Running on GPU.";
#ifdef __CUDA_ARCH__
caffe2::TensorCUDA* tensor = blob->GetMutable<caffe2::TensorCUDA>();
CHECK_NOTNULL(tensor);
tensor->Resize(input_dims);
if (input_type_list[i] == "uint8_t") {
tensor->mutable_data<uint8_t>();
} else if (input_type_list[i] == "float") {
tensor->mutable_data<float>();
} else {
CAFFE_THROW("Unsupported input type: ", input_type_list[i]);
}
#else
CAFFE_THROW("Not support GPU on mobile.");
#endif
} else {
if (input_type_list[i] == "uint8_t") {
caffe2::int8::Int8TensorCPU* tensor =
blob->GetMutable<caffe2::int8::Int8TensorCPU>();
CHECK_NOTNULL(tensor);
tensor->t.Resize(input_dims);
tensor->t.mutable_data<uint8_t>();
} else if (input_type_list[i] == "float") {
caffe2::TensorCPU* tensor = BlobGetMutableTensor(blob, caffe2::CPU);
CHECK_NOTNULL(tensor);
tensor->Resize(input_dims);
tensor->mutable_data<float>();
} else {
CAFFE_THROW("Unsupported input type: ", input_type_list[i]);
}
}
}
} else {
CAFFE_THROW(
"You requested input tensors, but neither input_file nor "
"input_dims is set.");
}
}
}
void fillInputBlob(
shared_ptr<caffe2::Workspace> workspace,
map<string, caffe2::TensorProtos>& tensor_protos_map,
int iteration) {
if (tensor_protos_map.empty()) {
return;
}
static caffe2::TensorDeserializer serializer;
for (auto& tensor_kv : tensor_protos_map) {
caffe2::Blob* blob = workspace->GetBlob(tensor_kv.first);
if (blob == nullptr) {
blob = workspace->CreateBlob(tensor_kv.first);
}
// todo: support gpu and make this function a tempalte
int protos_size = tensor_kv.second.protos_size();
caffe2::TensorProto* tensor_proto =
tensor_kv.second.mutable_protos(iteration % protos_size);
if (tensor_proto->data_type() == caffe2::TensorProto::STRING) {
caffe2::TensorCPU* tensor = BlobGetMutableTensor(blob, caffe2::CPU);
int total_size = tensor_proto->string_data_size();
for (size_t i = 0; i < total_size; i++) {
(tensor->mutable_data<string>())[i] = tensor_proto->string_data(i);
}
} else if (tensor_proto->data_type() == caffe2::TensorProto::FLOAT) {
vector<int64_t> dims;
for (const int64_t d : tensor_proto->dims()) {
dims.push_back(d);
}
// int total_size = tensor_proto->float_data_size();
caffe2::TensorCPU* tensor =
new caffe2::TensorCPU(dims, caffe2::DeviceType::CPU);
serializer.Deserialize(*tensor_proto, tensor);
blob->Reset(tensor);
}
// todo: for other types
}
}
void runNetwork(
shared_ptr<caffe2::Workspace> workspace,
caffe2::NetDef& net_def,
map<string, caffe2::TensorProtos>& tensor_protos_map,
const bool wipe_cache,
const bool run_individual,
const int warmup,
const int iter,
const int sleep_before_run) {
if (!net_def.has_name()) {
net_def.set_name("benchmark");
}
caffe2::NetBase* net = workspace->CreateNet(net_def);
CHECK_NOTNULL(net);
LOG(INFO) << "Starting benchmark.";
caffe2::ObserverConfig::initSampleRate(1, 1, 1, run_individual, warmup);
LOG(INFO) << "Running warmup runs.";
for (int i = 0; i < warmup; ++i) {
fillInputBlob(workspace, tensor_protos_map, i);
CAFFE_ENFORCE(net->Run(), "Warmup run ", i, " has failed.");
}
if (wipe_cache) {
caffe2::wipe_cache();
}
if (sleep_before_run > 0) {
std::this_thread::sleep_for(std::chrono::seconds(sleep_before_run));
}
LOG(INFO) << "Main runs.";
CAFFE_ENFORCE(
iter >= 0,
"Number of main runs should be non negative, provided ",
iter,
".");
for (int i = 0; i < iter; ++i) {
caffe2::ObserverConfig::initSampleRate(1, 1, 1, 0, warmup);
fillInputBlob(workspace, tensor_protos_map, i);
if (wipe_cache) {
caffe2::wipe_cache();
}
CAFFE_ENFORCE(net->Run(), "Main run ", i, " has failed.");
if (wipe_cache) {
caffe2::wipe_cache();
}
if (run_individual) {
caffe2::ObserverConfig::initSampleRate(1, 1, 1, 1, warmup);
CAFFE_ENFORCE(net->Run(), "Main run ", i, " with operator has failed.");
}
}
}
void writeOutput(
shared_ptr<caffe2::Workspace> workspace,
const bool run_on_gpu,
const string& output,
const string& output_folder,
const bool text_output) {
string output_prefix = output_folder.size() ? output_folder + "/" : "";
if (output.size()) {
vector<string> output_names = caffe2::split(',', output);
if (output == "*") {
output_names = workspace->Blobs();
}
for (const string& name : output_names) {
CAFFE_ENFORCE(
workspace->HasBlob(name),
"You requested a non-existing blob: ",
name);
if (text_output) {
if (run_on_gpu) {
#ifdef __CUDA_ARCH__
writeTextOutput<caffe2::CUDAContext, caffe2::TensorCUDA>(
workspace->GetBlob(name)->GetMutable<caffe2::TensorCUDA>(),
output_prefix,
name);
#else
CAFFE_THROW("Not support GPU.");
#endif
} else {
writeTextOutput<caffe2::CPUContext, caffe2::TensorCPU>(
BlobGetMutableTensor(workspace->GetBlob(name), caffe2::CPU),
output_prefix,
name);
}
} else {
string serialized = SerializeBlob(*workspace->GetBlob(name), name);
string output_filename = output_prefix + name;
caffe2::WriteStringToFile(serialized, output_filename.c_str());
}
}
}
}
int benchmark(
int argc,
char* argv[],
const string& FLAGS_backend,
const string& FLAGS_init_net,
const string& FLAGS_input,
const string& FLAGS_input_dims,
const string& FLAGS_input_file,
const string& FLAGS_input_type,
int FLAGS_iter,
const string& FLAGS_net,
const string& FLAGS_output,
const string& FLAGS_output_folder,
bool FLAGS_run_individual,
int FLAGS_sleep_before_run,
bool FLAGS_text_output,
int FLAGS_warmup,
bool FLAGS_wipe_cache) {
// Check arguments to be correct
{
// Need to check whether file exists, as the file reader does not assert if
// file does not exist
std::ifstream net_file(FLAGS_net);
CAFFE_ENFORCE(net_file.good());
net_file.close();
std::ifstream init_net_file(FLAGS_init_net);
CAFFE_ENFORCE(init_net_file.good());
init_net_file.close();
if (FLAGS_input_file.size() > 0) {
vector<string> input_files = caffe2::split(',', FLAGS_input_file);
for (auto input_file : input_files) {
std::ifstream ifile(input_file);
CAFFE_ENFORCE(ifile.good());
ifile.close();
}
}
}
observerConfig();
caffe2::ShowLogInfoToStderr();
auto workspace = std::make_shared<caffe2::Workspace>(new caffe2::Workspace());
bool run_on_gpu = backendCudaSet(FLAGS_backend);
// Run initialization network.
caffe2::NetDef init_net_def;
CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_init_net, &init_net_def));
setOperatorEngine(&init_net_def, FLAGS_backend);
CAFFE_ENFORCE(workspace->RunNetOnce(init_net_def));
// Run main network.
caffe2::NetDef net_def;
CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_net, &net_def));
setOperatorEngine(&net_def, FLAGS_backend);
map<string, caffe2::TensorProtos> tensor_protos_map;
loadInput(
workspace,
run_on_gpu,
tensor_protos_map,
FLAGS_input,
FLAGS_input_file,
FLAGS_input_dims,
FLAGS_input_type);
runNetwork(
workspace,
net_def,
tensor_protos_map,
FLAGS_wipe_cache,
FLAGS_run_individual,
FLAGS_warmup,
FLAGS_iter,
FLAGS_sleep_before_run);
writeOutput(
workspace,
run_on_gpu,
FLAGS_output,
FLAGS_output_folder,
FLAGS_text_output);
return 0;
}