caffe2/opt/onnxifi_transformer.cc - platform/external/pytorch - Git at Google

 #include "caffe2/opt/onnxifi_transformer.h"

 #include <iostream>
 #include <unordered_set>

 #include <google/protobuf/text_format.h>

 #include "caffe2/core/context.h"
 #include "caffe2/core/logging.h"
 #include "caffe2/core/operator.h"
 #include "caffe2/core/tensor.h"
 #include "caffe2/onnx/onnx_exporter.h"
 #include "caffe2/opt/backend_cutting.h"

 namespace caffe2 {

 namespace {

 // TODO(yinghai): Remove the awkward conversion between unordered_map and map
 std::unordered_map<std::string, TensorShape> InferShapes(
     Workspace* ws,
     NetDef* pred_net,
     CaffeMap<std::string, TensorShape>* shape_hints_ordered) {
   // Populate shapes from workplace
   const std::vector<std::string>& ws_blobs = ws->Blobs();
   for (const auto& s : ws_blobs) {
     auto shape = GetTensorShapeOfBlob(ws->GetBlob(s));
     if (!shape.unknown_shape()) {
       shape_hints_ordered->emplace(s, std::move(shape));
     }
   }

   std::vector<NetDef*> nets;
   nets.emplace_back(pred_net);
   InferBlobShapesAndTypes(*shape_hints_ordered, nets);
   std::unordered_map<std::string, TensorShape> shape_hints;
   for (const auto& kv : *shape_hints_ordered) {
     shape_hints.emplace(kv.first, kv.second);
   }

   return shape_hints;
 }

 void DumpModel(
     const ::ONNX_NAMESPACE::ModelProto& model,
     const std::string& fname) {
   std::ofstream ff(fname);
   std::string body;
   ::google::protobuf::TextFormat::PrintToString(model.graph(), &body);
   ff << body << std::endl;
   ff.close();
 }

 std::vector<::ONNX_NAMESPACE::ValueInfoProto> ConvertToValueInfo(
     const std::vector<std::string>& names,
     const std::unordered_map<std::string, TensorShape>& shape_hints) {
   std::vector<::ONNX_NAMESPACE::ValueInfoProto> r;
   for (const auto& s : names) {
     r.emplace_back();
     auto& value_info = r.back();
     value_info.set_name(s);
     const auto it = shape_hints.find(s);
     if (it == shape_hints.end()) {
       LOG(WARNING) << "Cannot get shape of " << s;
     } else {
       auto* tensor_type = value_info.mutable_type()->mutable_tensor_type();
       tensor_type->set_elem_type(
           ::ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT);
       auto* shape = tensor_type->mutable_shape();
       for (int i = 0; i < it->second.dims().size(); ++i) {
         shape->add_dim()->set_dim_value(it->second.dims(i));
       }
     }
   }
   return r;
 }

 void FillModelInfo(::ONNX_NAMESPACE::ModelProto* model) {
   model->set_ir_version(::ONNX_NAMESPACE::Version::IR_VERSION);
   model->set_producer_name("caffe2");
   auto* opset_id = model->add_opset_import();
   opset_id->set_domain("");
   opset_id->set_version(3);
 }
 } // namespace

 OnnxifiTransformer::OnnxifiTransformer(bool debug) : debug_(debug) {
   lib_ = onnx::initOnnxifiLibrary();
   CAFFE_ENFORCE(lib_, "Cannot initialize ONNXIFI library");
   CAFFE_ENFORCE_EQ(
       lib_->onnxGetBackendIDs(nullptr, &num_backends_),
       ONNXIFI_STATUS_FALLBACK);
   CAFFE_ENFORCE_GT(
       num_backends_, 0, "At least 1 onnxifi backend should be available");
   backend_ids_.resize(num_backends_);
   CAFFE_ENFORCE_EQ(
       lib_->onnxGetBackendIDs(backend_ids_.data(), &num_backends_),
       ONNXIFI_STATUS_SUCCESS);
 }

 OperatorDef OnnxifiTransformer::BuildOnnxifiOp(
     const std::string& onnx_model_str,
     const std::unordered_map<std::string, std::vector<int>>& output_size_hints,
     const std::unordered_set<std::string>& initialization_list,
     const caffe2::NetDef& net) {
   OperatorDef op;
   op.set_type("Onnxifi");
   auto* onnx_model_arg = op.add_arg();
   onnx_model_arg->set_name("onnx_model");
   onnx_model_arg->set_s(onnx_model_str);

   // Add the names of the initializer blobs that we want to fetch from the
   // workspace later
   auto* initializers_arg = op.add_arg();
   initializers_arg->set_name("initializers");
   for (const auto& s : initialization_list) {
     initializers_arg->add_strings(s);
     initializers_arg->add_strings(input_mapping_.at(s));
   }

   // Add the input/output
   for (const auto& input : net.external_input()) {
     if (!initialization_list.count(input)) {
       op.add_input(input);
     }
   }
   for (const auto& output : net.external_output()) {
     op.add_output(output);
   }

   // Add output size hints
   for (int i = 0; i < op.output_size(); ++i) {
     const auto& o = op.output(i);
     const auto it = output_size_hints.find(o);
     if (it != output_size_hints.end()) {
       const auto& dims = it->second;
       auto* output_size_hint_arg = op.add_arg();
       output_size_hint_arg->set_name(MakeString("output_size_hint_", i));
       for (const auto& d : dims) {
         output_size_hint_arg->add_ints(d);
       }

       VLOG(2) << "Adding output hint: " << o;
     }
   }
   return op;
 }

 NetDef OnnxifiTransformer::SubnetToOnnxifiOp(
     const caffe2::NetDef& net,
     Workspace* ws,
     onnx::OnnxExporter* exporter,
     std::unordered_map<std::string, TensorShape>* shape_hints) {
   ::ONNX_NAMESPACE::ModelProto onnx_model;
   FillModelInfo(&onnx_model);

   // Convert c2 ops to onnx ops, add const weights if there are any
   DeviceOption option;
   CPUContext context(option);
   context.SwitchToDevice();
   for (const auto& op : net.op()) {
     const auto results = exporter->Caffe2OpToOnnxNodes(op, *shape_hints);
     for (const auto& n : results.first) {
       onnx_model.mutable_graph()->add_node()->CopyFrom(n);
     }
     for (const auto& t : results.second) {
       VLOG(2) << "Adding extra init tensor: " << t.name();
       CAFFE_ENFORCE_EQ(
           t.data_type(),
           ::ONNX_NAMESPACE::TensorProto::FLOAT,
           "Only supports conversion of float type for now");
       TensorShape shape;
       shape.mutable_dims()->CopyFrom(t.dims());
       shape_hints->emplace(t.name(), std::move(shape));

       // Feed into workspace as CPU Tensors
       auto* blob = ws->CreateBlob(t.name());
       auto* cpu_tensor = blob->GetMutableTensor(CPU);
       std::vector<TIndex> dims;
       std::copy(t.dims().begin(), t.dims().end(), dims.begin());
       cpu_tensor->Resize(dims);
       context.CopyBytesSameDevice(
           cpu_tensor->size() * sizeof(float),
           static_cast<const void*>(t.raw_data().data()),
           cpu_tensor->raw_mutable_data(TypeMeta::Make<float>()));
       context.FinishDeviceComputation();

       // Add mappings
       CAFFE_ENFORCE(
           input_mapping_.emplace(t.name(), t.name()).second,
           MakeString("Tensor ", t.name(), " already exists in the workspace"));
     }
   }

   // Convert outputs and compute output shape hints
   std::vector<std::string> io_names;
   for (const auto& output : net.external_output()) {
     io_names.emplace_back(output);
   }
   auto io_vec = ConvertToValueInfo(io_names, *shape_hints);
   std::unordered_map<std::string, std::vector<int>> output_shape_hints;
   for (const auto& i : io_vec) {
     onnx_model.mutable_graph()->add_output()->CopyFrom(i);
     auto ret = output_shape_hints.emplace(i.name(), std::vector<int>());
     auto& vec = ret.first->second;
     const auto it = shape_hints->find(i.name());
     CAFFE_ENFORCE(
         it != shape_hints->end(),
         "Cannot find shape info for output ",
         i.name());
     const auto& shape = it->second;
     for (int k = 0; k < shape.dims().size(); ++k) {
       vec.push_back(shape.dims(k));
     }
   }

   // Convert inputs and figure out weights
   std::unordered_set<std::string> weights;
   const std::vector<string>& ws_blobs = ws->Blobs();
   for (const auto& s : ws_blobs) {
     VLOG(2) << "Add weights: " << s;
     weights.emplace(s);
   }

   std::unordered_set<std::string> total_inputs;
   std::unordered_set<std::string> initialization_list;
   std::vector<std::string> total_inputs_vec;

   // Extra intermediate weights created during conversion
   for (const auto& extra_weight : onnx_model.graph().initializer()) {
     if (total_inputs.emplace(extra_weight.name()).second) {
       total_inputs_vec.emplace_back(extra_weight.name());
     }
   }
   // Boundary inputs, should not be weights
   std::unordered_set<std::string> boundary_inputs;
   for (const auto& i : net.external_input()) {
     boundary_inputs.emplace(i);
   }

   for (const auto& op : net.op()) {
     for (const auto& input : op.input()) {
       if (total_inputs.emplace(input).second && weights.count(input)) {
         // We add weights as inputs too
         total_inputs_vec.emplace_back(input);
         initialization_list.emplace(input);
         VLOG(2) << "Add input weights: " << input;
       } else if (boundary_inputs.count(input)) {
         VLOG(2) << "Adding boundary input: " << input;
         total_inputs_vec.emplace_back(input);
       }
     }
   }
   io_vec = ConvertToValueInfo(total_inputs_vec, *shape_hints);
   for (const auto& i : io_vec) {
     onnx_model.mutable_graph()->add_input()->CopyFrom(i);
   }

   // Debugging stuff
   if (debug_) {
     DumpModel(onnx_model, "debug.onnxtxt");
   }

   // Onnx model is ready. Build ONNXIFI Op
   std::string model_str;
   onnx_model.SerializeToString(&model_str);
   NetDef net_opt;
   auto* op = net_opt.add_op();
   *op = BuildOnnxifiOp(model_str, output_shape_hints, initialization_list, net);
   for (const auto& i : op->input()) {
     net_opt.add_external_input(i);
   }
   for (const auto& i : op->output()) {
     net_opt.add_external_output(i);
   }

   return net_opt;
 }

 CaffeMap<std::string, TensorShape> OnnxifiTransformer::SsaRewriteAndMapNames(
     Workspace* ws,
     NetDef* pred_net,
     const std::unordered_map<std::string, TensorShape>& input_shape_hints) {
   input_mapping_ = onnx::SsaRewrite(nullptr, pred_net);
   std::unordered_map<std::string, std::string> input_reverse_mapping;
   std::vector<std::string> external_inputs;
   for (const auto kv : input_mapping_) {
     input_reverse_mapping.emplace(kv.second, kv.first);
     if (!ws->HasBlob(kv.second)) {
       external_inputs.emplace_back(kv.first);
     }
   }
   for (const auto& i : external_inputs) {
     input_mapping_.erase(i);
   }
   CaffeMap<std::string, TensorShape> shape_hints_ordered;
   for (const auto& kv : input_shape_hints) {
     const auto it = input_reverse_mapping.find(kv.first);
     if (it != input_reverse_mapping.end()) {
       LOG(INFO) << "Adding input hint: " << it->second;
       shape_hints_ordered.emplace(it->second, kv.second);
     } else {
       shape_hints_ordered.emplace(kv.first, kv.second);
     }
   }
   return shape_hints_ordered;
 }

 // Cutting off the runnable part and replace with ONNXIFI ops. Asssume the nets
 // were topologically sorted
 void OnnxifiTransformer::Transform(
     Workspace* ws,
     NetDef* pred_net,
     const std::unordered_map<std::string, TensorShape>& input_shape_hints) {
   CAFFE_ENFORCE(ws);
   auto shape_hints_ordered =
       SsaRewriteAndMapNames(ws, pred_net, input_shape_hints);
   Workspace mapped_ws(ws, input_mapping_);
   auto shape_hints = InferShapes(&mapped_ws, pred_net, &shape_hints_ordered);

   CAFFE_ENFORCE(pred_net, "Predict net cannot be nullptr");
   onnx::OnnxExporter exporter(nullptr, true);

   // function to tell whether the ONNXIFI backend supports a given C2 op or not
   // TODO: choose backend id
   onnxifi_library* backend = lib_;
   onnxBackendID backend_id = backend_ids_[0];
   auto supports =
       [&exporter, &shape_hints, backend, backend_id](
           const caffe2::OperatorDef& op) {
         const OpSchema* schema = OpSchemaRegistry::Schema(op.type());
         // NB: this might not be a hard constraint as we can just export C2
         // domain specific ops to ONNX
         if (!schema || schema->onnx_schema().empty()) {
           LOG(INFO) << "Cannot export c2 op " << op.type()
                     << " to onnx as there is no corresponding ONNX schema.";
           return false;
         }

         ::ONNX_NAMESPACE::ModelProto onnx_model;
         FillModelInfo(&onnx_model);
         auto results = exporter.Caffe2OpToOnnxNodes(op, shape_hints);
         for (const auto& n : results.first) {
           onnx_model.mutable_graph()->add_node()->CopyFrom(n);
         }
         std::string onnx_model_str;
         onnx_model.SerializeToString(&onnx_model_str);
         auto ret = backend->onnxGetBackendCompatibility(
             backend_id, onnx_model_str.size(), onnx_model_str.c_str());
         if (ret != ONNXIFI_STATUS_SUCCESS) {
           LOG(INFO) << "Don't support onnx for " << op.type() << " c2 op ("
                     << ret << ")";
           return false;
         } else {
           return true;
         }
       };

   // function to convert runnbale subgraph into a trt op. Note that to keep the
   // interface clean, we do the double conversion from C2 op to Onnx ops here
   // but it should be OK as the cost is really small. We also need to keep the
   // same exporter throughout the process to avoid duplicated dummy name
   // generation
   onnx::OnnxExporter exporter2(nullptr, true);
   auto trt_converter = [this, &mapped_ws, &shape_hints, &exporter2](
                            const caffe2::NetDef& net) mutable {
     return SubnetToOnnxifiOp(net, &mapped_ws, &exporter2, &shape_hints);
   };

   NetDef net_opt = opt::OptimizeForBackend(*pred_net, supports, trt_converter);

   // Need to figure out a proper place to handle device option
   net_opt.mutable_device_option()->CopyFrom(pred_net->device_option());
   pred_net->Swap(&net_opt);
 }

 } // namespace caffe2
	#include "caffe2/opt/onnxifi_transformer.h"

	#include <iostream>
	#include <unordered_set>

	#include <google/protobuf/text_format.h>

	#include "caffe2/core/context.h"
	#include "caffe2/core/logging.h"
	#include "caffe2/core/operator.h"
	#include "caffe2/core/tensor.h"
	#include "caffe2/onnx/onnx_exporter.h"
	#include "caffe2/opt/backend_cutting.h"

	namespace caffe2 {

	namespace {

	// TODO(yinghai): Remove the awkward conversion between unordered_map and map
	std::unordered_map<std::string, TensorShape> InferShapes(
	Workspace* ws,
	NetDef* pred_net,
	CaffeMap<std::string, TensorShape>* shape_hints_ordered) {
	// Populate shapes from workplace
	const std::vector<std::string>& ws_blobs = ws->Blobs();
	for (const auto& s : ws_blobs) {
	auto shape = GetTensorShapeOfBlob(ws->GetBlob(s));
	if (!shape.unknown_shape()) {
	shape_hints_ordered->emplace(s, std::move(shape));
	}
	}

	std::vector<NetDef*> nets;
	nets.emplace_back(pred_net);
	InferBlobShapesAndTypes(*shape_hints_ordered, nets);
	std::unordered_map<std::string, TensorShape> shape_hints;
	for (const auto& kv : *shape_hints_ordered) {
	shape_hints.emplace(kv.first, kv.second);
	}

	return shape_hints;
	}

	void DumpModel(
	const ::ONNX_NAMESPACE::ModelProto& model,
	const std::string& fname) {
	std::ofstream ff(fname);
	std::string body;
	::google::protobuf::TextFormat::PrintToString(model.graph(), &body);
	ff << body << std::endl;
	ff.close();
	}

	std::vector<::ONNX_NAMESPACE::ValueInfoProto> ConvertToValueInfo(
	const std::vector<std::string>& names,
	const std::unordered_map<std::string, TensorShape>& shape_hints) {
	std::vector<::ONNX_NAMESPACE::ValueInfoProto> r;
	for (const auto& s : names) {
	r.emplace_back();
	auto& value_info = r.back();
	value_info.set_name(s);
	const auto it = shape_hints.find(s);
	if (it == shape_hints.end()) {
	LOG(WARNING) << "Cannot get shape of " << s;
	} else {
	auto* tensor_type = value_info.mutable_type()->mutable_tensor_type();
	tensor_type->set_elem_type(
	::ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT);
	auto* shape = tensor_type->mutable_shape();
	for (int i = 0; i < it->second.dims().size(); ++i) {
	shape->add_dim()->set_dim_value(it->second.dims(i));
	}
	}
	}
	return r;
	}

	void FillModelInfo(::ONNX_NAMESPACE::ModelProto* model) {
	model->set_ir_version(::ONNX_NAMESPACE::Version::IR_VERSION);
	model->set_producer_name("caffe2");
	auto* opset_id = model->add_opset_import();
	opset_id->set_domain("");
	opset_id->set_version(3);
	}
	} // namespace

	OnnxifiTransformer::OnnxifiTransformer(bool debug) : debug_(debug) {
	lib_ = onnx::initOnnxifiLibrary();
	CAFFE_ENFORCE(lib_, "Cannot initialize ONNXIFI library");
	CAFFE_ENFORCE_EQ(
	lib_->onnxGetBackendIDs(nullptr, &num_backends_),
	ONNXIFI_STATUS_FALLBACK);
	CAFFE_ENFORCE_GT(
	num_backends_, 0, "At least 1 onnxifi backend should be available");
	backend_ids_.resize(num_backends_);
	CAFFE_ENFORCE_EQ(
	lib_->onnxGetBackendIDs(backend_ids_.data(), &num_backends_),
	ONNXIFI_STATUS_SUCCESS);
	}

	OperatorDef OnnxifiTransformer::BuildOnnxifiOp(
	const std::string& onnx_model_str,
	const std::unordered_map<std::string, std::vector<int>>& output_size_hints,
	const std::unordered_set<std::string>& initialization_list,
	const caffe2::NetDef& net) {
	OperatorDef op;
	op.set_type("Onnxifi");
	auto* onnx_model_arg = op.add_arg();
	onnx_model_arg->set_name("onnx_model");
	onnx_model_arg->set_s(onnx_model_str);

	// Add the names of the initializer blobs that we want to fetch from the
	// workspace later
	auto* initializers_arg = op.add_arg();
	initializers_arg->set_name("initializers");
	for (const auto& s : initialization_list) {
	initializers_arg->add_strings(s);
	initializers_arg->add_strings(input_mapping_.at(s));
	}

	// Add the input/output
	for (const auto& input : net.external_input()) {
	if (!initialization_list.count(input)) {
	op.add_input(input);
	}
	}
	for (const auto& output : net.external_output()) {
	op.add_output(output);
	}

	// Add output size hints
	for (int i = 0; i < op.output_size(); ++i) {
	const auto& o = op.output(i);
	const auto it = output_size_hints.find(o);
	if (it != output_size_hints.end()) {
	const auto& dims = it->second;
	auto* output_size_hint_arg = op.add_arg();
	output_size_hint_arg->set_name(MakeString("output_size_hint_", i));
	for (const auto& d : dims) {
	output_size_hint_arg->add_ints(d);
	}

	VLOG(2) << "Adding output hint: " << o;
	}
	}
	return op;
	}

	NetDef OnnxifiTransformer::SubnetToOnnxifiOp(
	const caffe2::NetDef& net,
	Workspace* ws,
	onnx::OnnxExporter* exporter,
	std::unordered_map<std::string, TensorShape>* shape_hints) {
	::ONNX_NAMESPACE::ModelProto onnx_model;
	FillModelInfo(&onnx_model);

	// Convert c2 ops to onnx ops, add const weights if there are any
	DeviceOption option;
	CPUContext context(option);
	context.SwitchToDevice();
	for (const auto& op : net.op()) {
	const auto results = exporter->Caffe2OpToOnnxNodes(op, *shape_hints);
	for (const auto& n : results.first) {
	onnx_model.mutable_graph()->add_node()->CopyFrom(n);
	}
	for (const auto& t : results.second) {
	VLOG(2) << "Adding extra init tensor: " << t.name();
	CAFFE_ENFORCE_EQ(
	t.data_type(),
	::ONNX_NAMESPACE::TensorProto::FLOAT,
	"Only supports conversion of float type for now");
	TensorShape shape;
	shape.mutable_dims()->CopyFrom(t.dims());
	shape_hints->emplace(t.name(), std::move(shape));

	// Feed into workspace as CPU Tensors
	auto* blob = ws->CreateBlob(t.name());
	auto* cpu_tensor = blob->GetMutableTensor(CPU);
	std::vector<TIndex> dims;
	std::copy(t.dims().begin(), t.dims().end(), dims.begin());
	cpu_tensor->Resize(dims);
	context.CopyBytesSameDevice(
	cpu_tensor->size() * sizeof(float),
	static_cast<const void*>(t.raw_data().data()),
	cpu_tensor->raw_mutable_data(TypeMeta::Make<float>()));
	context.FinishDeviceComputation();

	// Add mappings
	CAFFE_ENFORCE(
	input_mapping_.emplace(t.name(), t.name()).second,
	MakeString("Tensor ", t.name(), " already exists in the workspace"));
	}
	}

	// Convert outputs and compute output shape hints
	std::vector<std::string> io_names;
	for (const auto& output : net.external_output()) {
	io_names.emplace_back(output);
	}
	auto io_vec = ConvertToValueInfo(io_names, *shape_hints);
	std::unordered_map<std::string, std::vector<int>> output_shape_hints;
	for (const auto& i : io_vec) {
	onnx_model.mutable_graph()->add_output()->CopyFrom(i);
	auto ret = output_shape_hints.emplace(i.name(), std::vector<int>());
	auto& vec = ret.first->second;
	const auto it = shape_hints->find(i.name());
	CAFFE_ENFORCE(
	it != shape_hints->end(),
	"Cannot find shape info for output ",
	i.name());
	const auto& shape = it->second;
	for (int k = 0; k < shape.dims().size(); ++k) {
	vec.push_back(shape.dims(k));
	}
	}

	// Convert inputs and figure out weights
	std::unordered_set<std::string> weights;
	const std::vector<string>& ws_blobs = ws->Blobs();
	for (const auto& s : ws_blobs) {
	VLOG(2) << "Add weights: " << s;
	weights.emplace(s);
	}

	std::unordered_set<std::string> total_inputs;
	std::unordered_set<std::string> initialization_list;
	std::vector<std::string> total_inputs_vec;

	// Extra intermediate weights created during conversion
	for (const auto& extra_weight : onnx_model.graph().initializer()) {
	if (total_inputs.emplace(extra_weight.name()).second) {
	total_inputs_vec.emplace_back(extra_weight.name());
	}
	}
	// Boundary inputs, should not be weights
	std::unordered_set<std::string> boundary_inputs;
	for (const auto& i : net.external_input()) {
	boundary_inputs.emplace(i);
	}

	for (const auto& op : net.op()) {
	for (const auto& input : op.input()) {
	if (total_inputs.emplace(input).second && weights.count(input)) {
	// We add weights as inputs too
	total_inputs_vec.emplace_back(input);
	initialization_list.emplace(input);
	VLOG(2) << "Add input weights: " << input;
	} else if (boundary_inputs.count(input)) {
	VLOG(2) << "Adding boundary input: " << input;
	total_inputs_vec.emplace_back(input);
	}
	}
	}
	io_vec = ConvertToValueInfo(total_inputs_vec, *shape_hints);
	for (const auto& i : io_vec) {
	onnx_model.mutable_graph()->add_input()->CopyFrom(i);
	}

	// Debugging stuff
	if (debug_) {
	DumpModel(onnx_model, "debug.onnxtxt");
	}

	// Onnx model is ready. Build ONNXIFI Op
	std::string model_str;
	onnx_model.SerializeToString(&model_str);
	NetDef net_opt;
	auto* op = net_opt.add_op();
	*op = BuildOnnxifiOp(model_str, output_shape_hints, initialization_list, net);
	for (const auto& i : op->input()) {
	net_opt.add_external_input(i);
	}
	for (const auto& i : op->output()) {
	net_opt.add_external_output(i);
	}

	return net_opt;
	}

	CaffeMap<std::string, TensorShape> OnnxifiTransformer::SsaRewriteAndMapNames(
	Workspace* ws,
	NetDef* pred_net,
	const std::unordered_map<std::string, TensorShape>& input_shape_hints) {
	input_mapping_ = onnx::SsaRewrite(nullptr, pred_net);
	std::unordered_map<std::string, std::string> input_reverse_mapping;
	std::vector<std::string> external_inputs;
	for (const auto kv : input_mapping_) {
	input_reverse_mapping.emplace(kv.second, kv.first);
	if (!ws->HasBlob(kv.second)) {
	external_inputs.emplace_back(kv.first);
	}
	}
	for (const auto& i : external_inputs) {
	input_mapping_.erase(i);
	}
	CaffeMap<std::string, TensorShape> shape_hints_ordered;
	for (const auto& kv : input_shape_hints) {
	const auto it = input_reverse_mapping.find(kv.first);
	if (it != input_reverse_mapping.end()) {
	LOG(INFO) << "Adding input hint: " << it->second;
	shape_hints_ordered.emplace(it->second, kv.second);
	} else {
	shape_hints_ordered.emplace(kv.first, kv.second);
	}
	}
	return shape_hints_ordered;
	}

	// Cutting off the runnable part and replace with ONNXIFI ops. Asssume the nets
	// were topologically sorted
	void OnnxifiTransformer::Transform(
	Workspace* ws,
	NetDef* pred_net,
	const std::unordered_map<std::string, TensorShape>& input_shape_hints) {
	CAFFE_ENFORCE(ws);
	auto shape_hints_ordered =
	SsaRewriteAndMapNames(ws, pred_net, input_shape_hints);
	Workspace mapped_ws(ws, input_mapping_);
	auto shape_hints = InferShapes(&mapped_ws, pred_net, &shape_hints_ordered);

	CAFFE_ENFORCE(pred_net, "Predict net cannot be nullptr");
	onnx::OnnxExporter exporter(nullptr, true);

	// function to tell whether the ONNXIFI backend supports a given C2 op or not
	// TODO: choose backend id
	onnxifi_library* backend = lib_;
	onnxBackendID backend_id = backend_ids_[0];
	auto supports =
	[&exporter, &shape_hints, backend, backend_id](
	const caffe2::OperatorDef& op) {
	const OpSchema* schema = OpSchemaRegistry::Schema(op.type());
	// NB: this might not be a hard constraint as we can just export C2
	// domain specific ops to ONNX
	if (!schema \|\| schema->onnx_schema().empty()) {
	LOG(INFO) << "Cannot export c2 op " << op.type()
	<< " to onnx as there is no corresponding ONNX schema.";
	return false;
	}

	::ONNX_NAMESPACE::ModelProto onnx_model;
	FillModelInfo(&onnx_model);
	auto results = exporter.Caffe2OpToOnnxNodes(op, shape_hints);
	for (const auto& n : results.first) {
	onnx_model.mutable_graph()->add_node()->CopyFrom(n);
	}
	std::string onnx_model_str;
	onnx_model.SerializeToString(&onnx_model_str);
	auto ret = backend->onnxGetBackendCompatibility(
	backend_id, onnx_model_str.size(), onnx_model_str.c_str());
	if (ret != ONNXIFI_STATUS_SUCCESS) {
	LOG(INFO) << "Don't support onnx for " << op.type() << " c2 op ("
	<< ret << ")";
	return false;
	} else {
	return true;
	}
	};

	// function to convert runnbale subgraph into a trt op. Note that to keep the
	// interface clean, we do the double conversion from C2 op to Onnx ops here
	// but it should be OK as the cost is really small. We also need to keep the
	// same exporter throughout the process to avoid duplicated dummy name
	// generation
	onnx::OnnxExporter exporter2(nullptr, true);
	auto trt_converter = [this, &mapped_ws, &shape_hints, &exporter2](
	const caffe2::NetDef& net) mutable {
	return SubnetToOnnxifiOp(net, &mapped_ws, &exporter2, &shape_hints);
	};

	NetDef net_opt = opt::OptimizeForBackend(*pred_net, supports, trt_converter);

	// Need to figure out a proper place to handle device option
	net_opt.mutable_device_option()->CopyFrom(pred_net->device_option());
	pred_net->Swap(&net_opt);
	}

	} // namespace caffe2