caffe2/opt/backend_cutting.cc - platform/external/pytorch - Git at Google

 #include "caffe2/opt/backend_cutting.h"
 #include "caffe2/core/logging.h"
 #include "caffe2/opt/converter.h"
 #include "nomnigraph/Converters/Dot.h"
 #include "nomnigraph/Representations/NeuralNet.h"

 #include <algorithm>
 #include <fstream>
 #include <queue>

 namespace caffe2 {
 namespace opt {

 namespace {

 using namespace nom::repr;
 using NodeRef = NNGraph::NodeRef;
 using EdgeRef = NNGraph::EdgeRef;

 struct GroupAnnotation {
   GroupAnnotation(int i, int g = -1) : group(g), in_degree(i) {}
   int group;
   int in_degree;
   bool needs_transform{true};
 };

 std::string ShowNode(NodeRef node) {
   if (nn::is<NeuralNetData>(node)) {
     const auto* nn_tensor = nn::get<NeuralNetData>(node);
     return c10::str("Tensor: ", nn_tensor->getName());
   } else if (nn::is<NeuralNetOperator>(node)) {
     const auto* nn_op = nn::get<NeuralNetOperator>(node);
     const auto& op_def =
         dyn_cast<Caffe2Annotation>(nn_op->getAnnotation())->getOperatorDef();
     return c10::str("Op: ", op_def.type());
   } else {
     CAFFE_THROW("Known node");
   }
 }


 struct VisitorContext {
   // NOLINTNEXTLINE(modernize-pass-by-value)
   VisitorContext(std::function<bool(const caffe2::OperatorDef&)> func)
       : predicate(func) {}

   std::unordered_map<NodeRef, GroupAnnotation> infos;
   std::unordered_set<NodeRef> frontier;
   std::vector<NodeRef> current_group;
   std::function<bool(const caffe2::OperatorDef&)> predicate;

   int group{0};
   bool find_supported{true};
 };

 GroupAnnotation& GetInfo(
     std::unordered_map<NodeRef, GroupAnnotation>& infos,
     NodeRef node) {
   auto it = infos.find(node);
   CAFFE_ENFORCE(it != infos.end(), "Node info not found for ", ShowNode(node));
   return it->second;
 }

 const GroupAnnotation& GetInfo(
     const std::unordered_map<NodeRef, GroupAnnotation>& infos,
     NodeRef node) {
   auto it = infos.find(node);
   CAFFE_ENFORCE(
       it != infos.end(), "Const node info not found for ", ShowNode(node));
   return it->second;
 }

 // Explore the graph in topological order until we hit stopping nodes. This is
 // based on Khan's algorithm:
 // https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm
 // Precondition: nodes in `current_frontier` must have satisfy `in_degree == 0`
 void Explore(
     const std::vector<NodeRef>& current_frontier,
     VisitorContext* context) {
   std::queue<NodeRef> q;
   for (const auto n : current_frontier) {
     q.push(n);
   }

   while (!q.empty()) {
     auto node = q.front();
     q.pop();
     auto& info = GetInfo(context->infos, node);

     // Check if the node is supported, stop exploring further if not supported
     if (nn::is<NeuralNetOperator>(node)) {
       const auto* nn_op = nn::get<NeuralNetOperator>(node);
       const auto& op_def =
           dyn_cast<Caffe2Annotation>(nn_op->getAnnotation())->getOperatorDef();
       bool wanted = context->predicate(op_def);
       wanted = context->find_supported ? wanted : (!wanted);
       if (!wanted) {
         context->frontier.emplace(node);
         continue;
       }
     }

     // Adding to current group
     info.group = context->group;
     info.needs_transform = context->find_supported;
     context->current_group.push_back(node);

     // Continue exploring its fanouts
     for (const auto& out_edge : node->getOutEdges()) {
       auto child_node = out_edge->head();
       auto& child_info = GetInfo(context->infos, child_node);
       if (--child_info.in_degree == 0) {
         q.push(child_node);
       }
     }
   }
 }

 // Note: subgraph always starts with ops and ends with tensors, except for the
 // very first group, which can be all tensors
 struct TransformSubgraph {
   explicit TransformSubgraph(
       std::vector<NodeRef>&& f,
       std::vector<NodeRef>&& n,
       int id,
       bool need)
       : input_nodes(std::move(f)),
         nodes(std::move(n)),
         group_id(id),
         needed(need) {}

   TransformSubgraph(TransformSubgraph&& rhs) noexcept
       : input_nodes(std::move(rhs.input_nodes)),
         nodes(std::move(rhs.nodes)),
         external_input_refs(std::move(rhs.external_input_refs)),
         external_output_refs(std::move(rhs.external_output_refs)),
         group_id(rhs.group_id),
         needed(rhs.needed) {}

   TransformSubgraph& operator=(TransformSubgraph&& rhs) noexcept {
     input_nodes = std::move(rhs.input_nodes);
     nodes = std::move(rhs.nodes);
     external_input_refs = std::move(rhs.external_input_refs);
     external_output_refs = std::move(rhs.external_output_refs);
     group_id = rhs.group_id;
     needed = rhs.needed;
     return *this;
   }

   void Print() const {
     LOG(INFO) << "Group :" << group_id;
     LOG(INFO) << "  Input Nodes: ";
     for (const auto i : input_nodes) {
       LOG(INFO) << "    " << ShowNode(i);
     }
     LOG(INFO) << "  Nodes: ";
     for (const auto i : nodes) {
       LOG(INFO) << "    " << ShowNode(i);
     }
   }

   std::vector<NodeRef> input_nodes;
   std::vector<NodeRef> nodes;
   std::unordered_map<std::string, NodeRef> external_input_refs;
   std::unordered_map<std::string, NodeRef> external_output_refs;
   int group_id{-1};
   bool needed{true};
 };

 caffe2::NetDef ConvertToC2Net(
     const TransformSubgraph& sub,
     const std::unordered_map<NodeRef, GroupAnnotation>& infos) {
   caffe2::NetDef net;
   for (auto node : sub.nodes) {
     if (nn::is<NeuralNetOperator>(node)) {
       const auto* nn_op = nn::get<NeuralNetOperator>(node);
       assert(
           isa<Caffe2Annotation>(nn_op->getAnnotation()) &&
           "Cannot get caffe2 op from NNOp");
       const auto& op_def =
           dyn_cast<Caffe2Annotation>(nn_op->getAnnotation())->getOperatorDef();
       net.add_op()->CopyFrom(op_def);
     }
   }
   for (const auto& kv : sub.external_input_refs) {
     net.add_external_input(kv.first);
     VLOG(2) << "Adding external input: " << kv.first;
   }
   for (const auto& kv : sub.external_output_refs) {
     net.add_external_output(kv.first);
     VLOG(2) << "Adding external output: " << kv.first;
   }

   return net;
 }

 void DetectBoundaryReferences(
     TransformSubgraph* subgraph,
     const std::unordered_map<NodeRef, GroupAnnotation>& infos,
     const std::unordered_set<std::string>& original_external_output) {
   for (auto node : subgraph->nodes) {
     // inputs
     for (auto in_edge : node->getInEdges()) {
       auto parent_node = in_edge->tail();
       const auto& info = GetInfo(infos, parent_node);
       if (info.group != subgraph->group_id &&
           nn::is<NeuralNetData>(parent_node)) {
         const auto* nn_tensor = nn::get<const NeuralNetData>(parent_node);
         subgraph->external_input_refs.emplace(
             nn_tensor->getName(), parent_node);
       }
     }

     // outputs
     if (!nn::is<NeuralNetData>(node)) {
       continue;
     }
     // Note that although matched subgraph won't contain external inputs as we
     // skip the initial input tensor of matching, it is possible to contain
     // external outputs. We will mark these external outputs as boundary outputs
     // too.
     auto name = nn::get<const NeuralNetData>(node)->getName();
     if (original_external_output.count(name)) {
       subgraph->external_output_refs.emplace(name, node);
     } else {
       for (auto child_node : nn::getConsumers(node)) {
         const auto& info = GetInfo(infos, child_node);
         if (info.group != subgraph->group_id) {
           subgraph->external_output_refs.emplace(name, node);
           break;
         }
       }
     }
   }
 }

 void ReplaceSubgraph(
     const TransformSubgraph& subgraph,
     caffe2::NetDef& net_opt,
     NNGraph* g) {
   // Delete the old subgraph starting from the input nodes until we hit boundary
   // tensors
   for (auto node : subgraph.nodes) {
     if (nn::is<NeuralNetData>(node) &&
         subgraph.external_output_refs.count(
             nn::get<const NeuralNetData>(node)->getName())) {
       VLOG(2) << "Keeping " << ShowNode(node);
       continue;
     }
     VLOG(2) << "Deleting " << ShowNode(node);
     g->deleteNode(node);
   }

   // Convert new NetDef back to NNGraph
   std::unordered_map<std::string, NodeRef> tensor_map;
   for (const auto& kv : subgraph.external_input_refs) {
     tensor_map.emplace(kv.first, kv.second);
   }
   for (const auto& kv : subgraph.external_output_refs) {
     tensor_map.emplace(kv.first, kv.second);
   }
   for (auto& op : *net_opt.mutable_op()) {
     auto op_node = g->createNode();
     for (const auto& input : op.input()) {
       if (!tensor_map.count(input)) {
         tensor_map[input] =
             g->createNode(std::make_unique<nom::repr::Tensor>(input));
       }

       auto tensor_node = tensor_map[input];
       g->createEdge(tensor_node, op_node);
     }

     for (const auto& output : op.output()) {
       if (!tensor_map.count(output)) {
         tensor_map[output] =
             g->createNode(std::make_unique<nom::repr::Tensor>(output));
       }
       auto tensor_node = tensor_map[output];
       g->createEdge(op_node, tensor_node);
     }

     op_node->resetData(convertToNeuralNetOperator(op));
   }
 }

 void PruneUnrefereredNodes(NNModule* nn) {
   auto& g = nn->dataFlow;
   std::vector<NodeRef> to_delete;
   for (auto node : g.getMutableNodes()) {
     if (!nn::hasProducer(node) && !nn::hasConsumer(node)) {
       to_delete.push_back(node);
     }
   }
   for (auto i : to_delete) {
     if (nn::is<NeuralNetData>(i)) {
       auto name = nn::get<NeuralNetData>(i)->getName();
       auto it = nn->inputs.find(i);
       if (it != nn->inputs.end()) {
         VLOG(2) << "Removing external input " << name;
         nn->inputs.erase(it);
       }
       it = nn->outputs.find(i);
       if (it != nn->outputs.end()) {
         VLOG(2) << "Removing external output " << name;
         nn->outputs.erase(it);
       }
     }
     g.deleteNode(i);
   }
 }

 } // namespace
 void DumpGraph(NNGraph* g, const std::string& fname) {
   auto nnprinter = [](typename NNGraph::NodeRef node) {
     std::map<std::string, std::string> labelMap;
     assert(node->data() && "Node doesn't have data, can't render it");
     if (isa<NeuralNetOperator>(node->data())) {
       auto* op = dyn_cast<NeuralNetOperator>(node->data().get());
       const auto& op_def =
           dyn_cast<Caffe2Annotation>(op->getAnnotation())->getOperatorDef();
       int pos = -1;
       for (const auto& arg : op_def.arg()) {
         if (arg.name() == "net_pos") {
           if (arg.has_i()) {
             pos = arg.i();
           }
           break;
         }
       }
       labelMap["label"] =
           op->getName() + " (" + c10::to_string((unsigned long long)node) + ")";
       auto* annotation = op->getAnnotation();
       if (annotation && isa<Caffe2Annotation>(annotation)) {
         auto device_annotation = dyn_cast<Caffe2Annotation>(annotation);
         labelMap["label"] += "\\n[" + device_annotation->getDevice() +
             ", pos=" + c10::to_string(pos) + "]";
         auto hash = std::hash<std::string>{}(device_annotation->getDevice());
         std::stringstream hex_stream;
         hex_stream << std::hex << hash;
         labelMap["color"] = "#" + hex_stream.str().substr(0, 6);
         labelMap["fontcolor"] = labelMap["color"];
       }
       labelMap["shape"] = "box";
     } else if (isa<Data>(node->data())) {
       auto tensor = dyn_cast<NeuralNetData>(node->data().get());
       labelMap["label"] = tensor->getName();
       labelMap["label"] += "_" + c10::to_string(tensor->getVersion()) + " " +
           c10::to_string((unsigned long long)node);
     }
     return labelMap;
   };

   std::ofstream out(fname.c_str());
   if (out) {
     out << nom::converters::convertToDotString(g, nnprinter);
   } else {
     LOG(ERROR) << "Cannot create nomnigraph dump file: " << fname;
   }
 }

 CutResult OptimizeForBackend(
     caffe2::NetDef& net,
     std::function<bool(const caffe2::OperatorDef&)> supports,
     std::function<caffe2::NetDef(const caffe2::NetDef&)> transform_func,
     bool debug) {
   auto nn = convertToNNModule(net);
   auto& dfg = nn.dataFlow;

   // Initialize the group info and figure out the external/input output
   VisitorContext context(supports);
   std::vector<NodeRef> external_inputs;
   std::unordered_set<std::string> external_outputs;
   for (auto node : dfg.getMutableNodes()) {
     context.infos.emplace(
         std::piecewise_construct,
         std::forward_as_tuple(node),
         std::forward_as_tuple(node->getInEdges().size(), -1));

     if (!nn::is<NeuralNetOperator>(node)) {
       if (!nn::hasProducer(node)) {
         external_inputs.push_back(node);
       }
       if (!nn::hasConsumer(node)) {
         external_outputs.emplace(nn::get<const NeuralNetData>(node)->getName());
       }
       for (auto i = 0; i < net.external_output_size(); ++i) {
         const auto& n = net.external_output(i);
         if (n == nn::get<const NeuralNetData>(node)->getName()) {
           external_outputs.emplace(n);
         }
       }
     }
   }

   // Find unsupported and supported groups of nodes alternatively
   context.frontier.clear();
   context.current_group.clear();
   context.find_supported = false;
   std::vector<TransformSubgraph> subs;
   for (std::vector<NodeRef> frontier(
            external_inputs.begin(), external_inputs.end());
        !frontier.empty();
        context.find_supported = !context.find_supported) {
     Explore(frontier, &context);
     if (context.find_supported) {
       subs.emplace_back(
           std::move(frontier),
           std::move(context.current_group),
           context.group,
           context.find_supported);
     }

     frontier.assign(context.frontier.begin(), context.frontier.end());
     context.frontier.clear();
     context.current_group.clear();
     context.group++;
   }

   // Transform needed subgraphs one by one
   CutResult cutResult;
   cutResult.numberOfSubnets = 0;
   std::vector<caffe2::NetDef> opt_subnets;
   opt_subnets.reserve(subs.size());
   for (auto& g : subs) {
     // Generate boundary input/output edges
     DetectBoundaryReferences(&g, context.infos, external_outputs);

     caffe2::NetDef subnet = ConvertToC2Net(g, context.infos);
     // Transform the subgraph protobuf def, note that we can have less external
     // inputs/outputs but not more
     opt_subnets.emplace_back(transform_func(subnet));
     if (opt_subnets.back().op_size() > 0 && opt_subnets.back().op(0).type() == "Onnxifi") {
       cutResult.numberOfSubnets++;
     }
     ReplaceSubgraph(g, opt_subnets.back(), &dfg);
   }

   // Prune dangling nodes, because after transformation, some weights might be
   // absorbed
   PruneUnrefereredNodes(&nn);

   if (debug) {
     DumpGraph(&dfg, "dump.dot");
   }

   auto new_net = convertToCaffe2Proto(nn);
   new_net.set_name(net.name() + "_opt");
   cutResult.net = std::move(new_net);
   return cutResult;
 }

 } // namespace opt
 } // namespace caffe2
	#include "caffe2/opt/backend_cutting.h"
	#include "caffe2/core/logging.h"
	#include "caffe2/opt/converter.h"
	#include "nomnigraph/Converters/Dot.h"
	#include "nomnigraph/Representations/NeuralNet.h"

	#include <algorithm>
	#include <fstream>
	#include <queue>

	namespace caffe2 {
	namespace opt {

	namespace {

	using namespace nom::repr;
	using NodeRef = NNGraph::NodeRef;
	using EdgeRef = NNGraph::EdgeRef;

	struct GroupAnnotation {
	GroupAnnotation(int i, int g = -1) : group(g), in_degree(i) {}
	int group;
	int in_degree;
	bool needs_transform{true};
	};

	std::string ShowNode(NodeRef node) {
	if (nn::is<NeuralNetData>(node)) {
	const auto* nn_tensor = nn::get<NeuralNetData>(node);
	return c10::str("Tensor: ", nn_tensor->getName());
	} else if (nn::is<NeuralNetOperator>(node)) {
	const auto* nn_op = nn::get<NeuralNetOperator>(node);
	const auto& op_def =
	dyn_cast<Caffe2Annotation>(nn_op->getAnnotation())->getOperatorDef();
	return c10::str("Op: ", op_def.type());
	} else {
	CAFFE_THROW("Known node");
	}
	}



	struct VisitorContext {
	// NOLINTNEXTLINE(modernize-pass-by-value)
	VisitorContext(std::function<bool(const caffe2::OperatorDef&)> func)
	: predicate(func) {}

	std::unordered_map<NodeRef, GroupAnnotation> infos;
	std::unordered_set<NodeRef> frontier;
	std::vector<NodeRef> current_group;
	std::function<bool(const caffe2::OperatorDef&)> predicate;

	int group{0};
	bool find_supported{true};
	};

	GroupAnnotation& GetInfo(
	std::unordered_map<NodeRef, GroupAnnotation>& infos,
	NodeRef node) {
	auto it = infos.find(node);
	CAFFE_ENFORCE(it != infos.end(), "Node info not found for ", ShowNode(node));
	return it->second;
	}

	const GroupAnnotation& GetInfo(
	const std::unordered_map<NodeRef, GroupAnnotation>& infos,
	NodeRef node) {
	auto it = infos.find(node);
	CAFFE_ENFORCE(
	it != infos.end(), "Const node info not found for ", ShowNode(node));
	return it->second;
	}

	// Explore the graph in topological order until we hit stopping nodes. This is
	// based on Khan's algorithm:
	// https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm
	// Precondition: nodes in `current_frontier` must have satisfy `in_degree == 0`
	void Explore(
	const std::vector<NodeRef>& current_frontier,
	VisitorContext* context) {
	std::queue<NodeRef> q;
	for (const auto n : current_frontier) {
	q.push(n);
	}

	while (!q.empty()) {
	auto node = q.front();
	q.pop();
	auto& info = GetInfo(context->infos, node);

	// Check if the node is supported, stop exploring further if not supported
	if (nn::is<NeuralNetOperator>(node)) {
	const auto* nn_op = nn::get<NeuralNetOperator>(node);
	const auto& op_def =
	dyn_cast<Caffe2Annotation>(nn_op->getAnnotation())->getOperatorDef();
	bool wanted = context->predicate(op_def);
	wanted = context->find_supported ? wanted : (!wanted);
	if (!wanted) {
	context->frontier.emplace(node);
	continue;
	}
	}

	// Adding to current group
	info.group = context->group;
	info.needs_transform = context->find_supported;
	context->current_group.push_back(node);

	// Continue exploring its fanouts
	for (const auto& out_edge : node->getOutEdges()) {
	auto child_node = out_edge->head();
	auto& child_info = GetInfo(context->infos, child_node);
	if (--child_info.in_degree == 0) {
	q.push(child_node);
	}
	}
	}
	}

	// Note: subgraph always starts with ops and ends with tensors, except for the
	// very first group, which can be all tensors
	struct TransformSubgraph {
	explicit TransformSubgraph(
	std::vector<NodeRef>&& f,
	std::vector<NodeRef>&& n,
	int id,
	bool need)
	: input_nodes(std::move(f)),
	nodes(std::move(n)),
	group_id(id),
	needed(need) {}

	TransformSubgraph(TransformSubgraph&& rhs) noexcept
	: input_nodes(std::move(rhs.input_nodes)),
	nodes(std::move(rhs.nodes)),
	external_input_refs(std::move(rhs.external_input_refs)),
	external_output_refs(std::move(rhs.external_output_refs)),
	group_id(rhs.group_id),
	needed(rhs.needed) {}

	TransformSubgraph& operator=(TransformSubgraph&& rhs) noexcept {
	input_nodes = std::move(rhs.input_nodes);
	nodes = std::move(rhs.nodes);
	external_input_refs = std::move(rhs.external_input_refs);
	external_output_refs = std::move(rhs.external_output_refs);
	group_id = rhs.group_id;
	needed = rhs.needed;
	return *this;
	}

	void Print() const {
	LOG(INFO) << "Group :" << group_id;
	LOG(INFO) << " Input Nodes: ";
	for (const auto i : input_nodes) {
	LOG(INFO) << " " << ShowNode(i);
	}
	LOG(INFO) << " Nodes: ";
	for (const auto i : nodes) {
	LOG(INFO) << " " << ShowNode(i);
	}
	}

	std::vector<NodeRef> input_nodes;
	std::vector<NodeRef> nodes;
	std::unordered_map<std::string, NodeRef> external_input_refs;
	std::unordered_map<std::string, NodeRef> external_output_refs;
	int group_id{-1};
	bool needed{true};
	};

	caffe2::NetDef ConvertToC2Net(
	const TransformSubgraph& sub,
	const std::unordered_map<NodeRef, GroupAnnotation>& infos) {
	caffe2::NetDef net;
	for (auto node : sub.nodes) {
	if (nn::is<NeuralNetOperator>(node)) {
	const auto* nn_op = nn::get<NeuralNetOperator>(node);
	assert(
	isa<Caffe2Annotation>(nn_op->getAnnotation()) &&
	"Cannot get caffe2 op from NNOp");
	const auto& op_def =
	dyn_cast<Caffe2Annotation>(nn_op->getAnnotation())->getOperatorDef();
	net.add_op()->CopyFrom(op_def);
	}
	}
	for (const auto& kv : sub.external_input_refs) {
	net.add_external_input(kv.first);
	VLOG(2) << "Adding external input: " << kv.first;
	}
	for (const auto& kv : sub.external_output_refs) {
	net.add_external_output(kv.first);
	VLOG(2) << "Adding external output: " << kv.first;
	}

	return net;
	}

	void DetectBoundaryReferences(
	TransformSubgraph* subgraph,
	const std::unordered_map<NodeRef, GroupAnnotation>& infos,
	const std::unordered_set<std::string>& original_external_output) {
	for (auto node : subgraph->nodes) {
	// inputs
	for (auto in_edge : node->getInEdges()) {
	auto parent_node = in_edge->tail();
	const auto& info = GetInfo(infos, parent_node);
	if (info.group != subgraph->group_id &&
	nn::is<NeuralNetData>(parent_node)) {
	const auto* nn_tensor = nn::get<const NeuralNetData>(parent_node);
	subgraph->external_input_refs.emplace(
	nn_tensor->getName(), parent_node);
	}
	}

	// outputs
	if (!nn::is<NeuralNetData>(node)) {
	continue;
	}
	// Note that although matched subgraph won't contain external inputs as we
	// skip the initial input tensor of matching, it is possible to contain
	// external outputs. We will mark these external outputs as boundary outputs
	// too.
	auto name = nn::get<const NeuralNetData>(node)->getName();
	if (original_external_output.count(name)) {
	subgraph->external_output_refs.emplace(name, node);
	} else {
	for (auto child_node : nn::getConsumers(node)) {
	const auto& info = GetInfo(infos, child_node);
	if (info.group != subgraph->group_id) {
	subgraph->external_output_refs.emplace(name, node);
	break;
	}
	}
	}
	}
	}

	void ReplaceSubgraph(
	const TransformSubgraph& subgraph,
	caffe2::NetDef& net_opt,
	NNGraph* g) {
	// Delete the old subgraph starting from the input nodes until we hit boundary
	// tensors
	for (auto node : subgraph.nodes) {
	if (nn::is<NeuralNetData>(node) &&
	subgraph.external_output_refs.count(
	nn::get<const NeuralNetData>(node)->getName())) {
	VLOG(2) << "Keeping " << ShowNode(node);
	continue;
	}
	VLOG(2) << "Deleting " << ShowNode(node);
	g->deleteNode(node);
	}

	// Convert new NetDef back to NNGraph
	std::unordered_map<std::string, NodeRef> tensor_map;
	for (const auto& kv : subgraph.external_input_refs) {
	tensor_map.emplace(kv.first, kv.second);
	}
	for (const auto& kv : subgraph.external_output_refs) {
	tensor_map.emplace(kv.first, kv.second);
	}
	for (auto& op : *net_opt.mutable_op()) {
	auto op_node = g->createNode();
	for (const auto& input : op.input()) {
	if (!tensor_map.count(input)) {
	tensor_map[input] =
	g->createNode(std::make_unique<nom::repr::Tensor>(input));
	}

	auto tensor_node = tensor_map[input];
	g->createEdge(tensor_node, op_node);
	}

	for (const auto& output : op.output()) {
	if (!tensor_map.count(output)) {
	tensor_map[output] =
	g->createNode(std::make_unique<nom::repr::Tensor>(output));
	}
	auto tensor_node = tensor_map[output];
	g->createEdge(op_node, tensor_node);
	}

	op_node->resetData(convertToNeuralNetOperator(op));
	}
	}

	void PruneUnrefereredNodes(NNModule* nn) {
	auto& g = nn->dataFlow;
	std::vector<NodeRef> to_delete;
	for (auto node : g.getMutableNodes()) {
	if (!nn::hasProducer(node) && !nn::hasConsumer(node)) {
	to_delete.push_back(node);
	}
	}
	for (auto i : to_delete) {
	if (nn::is<NeuralNetData>(i)) {
	auto name = nn::get<NeuralNetData>(i)->getName();
	auto it = nn->inputs.find(i);
	if (it != nn->inputs.end()) {
	VLOG(2) << "Removing external input " << name;
	nn->inputs.erase(it);
	}
	it = nn->outputs.find(i);
	if (it != nn->outputs.end()) {
	VLOG(2) << "Removing external output " << name;
	nn->outputs.erase(it);
	}
	}
	g.deleteNode(i);
	}
	}

	} // namespace
	void DumpGraph(NNGraph* g, const std::string& fname) {
	auto nnprinter = [](typename NNGraph::NodeRef node) {
	std::map<std::string, std::string> labelMap;
	assert(node->data() && "Node doesn't have data, can't render it");
	if (isa<NeuralNetOperator>(node->data())) {
	auto* op = dyn_cast<NeuralNetOperator>(node->data().get());
	const auto& op_def =
	dyn_cast<Caffe2Annotation>(op->getAnnotation())->getOperatorDef();
	int pos = -1;
	for (const auto& arg : op_def.arg()) {
	if (arg.name() == "net_pos") {
	if (arg.has_i()) {
	pos = arg.i();
	}
	break;
	}
	}
	labelMap["label"] =
	op->getName() + " (" + c10::to_string((unsigned long long)node) + ")";
	auto* annotation = op->getAnnotation();
	if (annotation && isa<Caffe2Annotation>(annotation)) {
	auto device_annotation = dyn_cast<Caffe2Annotation>(annotation);
	labelMap["label"] += "\\n[" + device_annotation->getDevice() +
	", pos=" + c10::to_string(pos) + "]";
	auto hash = std::hash<std::string>{}(device_annotation->getDevice());
	std::stringstream hex_stream;
	hex_stream << std::hex << hash;
	labelMap["color"] = "#" + hex_stream.str().substr(0, 6);
	labelMap["fontcolor"] = labelMap["color"];
	}
	labelMap["shape"] = "box";
	} else if (isa<Data>(node->data())) {
	auto tensor = dyn_cast<NeuralNetData>(node->data().get());
	labelMap["label"] = tensor->getName();
	labelMap["label"] += "_" + c10::to_string(tensor->getVersion()) + " " +
	c10::to_string((unsigned long long)node);
	}
	return labelMap;
	};

	std::ofstream out(fname.c_str());
	if (out) {
	out << nom::converters::convertToDotString(g, nnprinter);
	} else {
	LOG(ERROR) << "Cannot create nomnigraph dump file: " << fname;
	}
	}

	CutResult OptimizeForBackend(
	caffe2::NetDef& net,
	std::function<bool(const caffe2::OperatorDef&)> supports,
	std::function<caffe2::NetDef(const caffe2::NetDef&)> transform_func,
	bool debug) {
	auto nn = convertToNNModule(net);
	auto& dfg = nn.dataFlow;

	// Initialize the group info and figure out the external/input output
	VisitorContext context(supports);
	std::vector<NodeRef> external_inputs;
	std::unordered_set<std::string> external_outputs;
	for (auto node : dfg.getMutableNodes()) {
	context.infos.emplace(
	std::piecewise_construct,
	std::forward_as_tuple(node),
	std::forward_as_tuple(node->getInEdges().size(), -1));

	if (!nn::is<NeuralNetOperator>(node)) {
	if (!nn::hasProducer(node)) {
	external_inputs.push_back(node);
	}
	if (!nn::hasConsumer(node)) {
	external_outputs.emplace(nn::get<const NeuralNetData>(node)->getName());
	}
	for (auto i = 0; i < net.external_output_size(); ++i) {
	const auto& n = net.external_output(i);
	if (n == nn::get<const NeuralNetData>(node)->getName()) {
	external_outputs.emplace(n);
	}
	}
	}
	}

	// Find unsupported and supported groups of nodes alternatively
	context.frontier.clear();
	context.current_group.clear();
	context.find_supported = false;
	std::vector<TransformSubgraph> subs;
	for (std::vector<NodeRef> frontier(
	external_inputs.begin(), external_inputs.end());
	!frontier.empty();
	context.find_supported = !context.find_supported) {
	Explore(frontier, &context);
	if (context.find_supported) {
	subs.emplace_back(
	std::move(frontier),
	std::move(context.current_group),
	context.group,
	context.find_supported);
	}

	frontier.assign(context.frontier.begin(), context.frontier.end());
	context.frontier.clear();
	context.current_group.clear();
	context.group++;
	}

	// Transform needed subgraphs one by one
	CutResult cutResult;
	cutResult.numberOfSubnets = 0;
	std::vector<caffe2::NetDef> opt_subnets;
	opt_subnets.reserve(subs.size());
	for (auto& g : subs) {
	// Generate boundary input/output edges
	DetectBoundaryReferences(&g, context.infos, external_outputs);

	caffe2::NetDef subnet = ConvertToC2Net(g, context.infos);
	// Transform the subgraph protobuf def, note that we can have less external
	// inputs/outputs but not more
	opt_subnets.emplace_back(transform_func(subnet));
	if (opt_subnets.back().op_size() > 0 && opt_subnets.back().op(0).type() == "Onnxifi") {
	cutResult.numberOfSubnets++;
	}
	ReplaceSubgraph(g, opt_subnets.back(), &dfg);
	}

	// Prune dangling nodes, because after transformation, some weights might be
	// absorbed
	PruneUnrefereredNodes(&nn);

	if (debug) {
	DumpGraph(&dfg, "dump.dot");
	}

	auto new_net = convertToCaffe2Proto(nn);
	new_net.set_name(net.name() + "_opt");
	cutResult.net = std::move(new_net);
	return cutResult;
	}

	} // namespace opt
	} // namespace caffe2