caffe2/opt/tvm_transformer.cc - platform/external/pytorch - Git at Google

 #include "caffe2/opt/tvm_transformer.h"
 #include "caffe2/opt/backend_cutting.h"

 namespace caffe2 {

 NetDef TvmTransformer::buildTvmOp(
     const caffe2::NetDef& net,
     const std::unordered_set<std::string>& weights,
     const ShapeInfoMap& shape_hints) {
   // NOLINTNEXTLINE(clang-diagnostic-sign-compare)
   if (opts_.min_ops > net.op_size()) {
     return net;
   }
   caffe2::NetDef net_opt;
   auto* op = net_opt.add_op();
   op->set_type("TVMJit");

   // Remove the second output of Concat/Reshape from external_output.
   // And figure out what primary inputs of the net is sequence look-ups
   std::unordered_set<std::string> split_infos;
   std::unordered_set<std::string> input_set(
       net.external_input().begin(), net.external_input().end());
   std::unordered_set<std::string> seq_input_set;
   for (const auto& op0 : net.op()) {
     if ((op0.type() == "Concat" || op0.type() == "Reshape") &&
         op0.output_size() == 2) {
       split_infos.emplace(op0.output(1));
     } else if (
         op0.type() == "SparseLengthsSum" ||
         op0.type() == "SparseLengthsSumFused8BitRowwise") {
       // The indices input of SparseLengthSum should be of SEQ type
       if (op0.input_size() > 1 && input_set.count(op0.input(1))) {
         seq_input_set.emplace(op0.input(1));
       }
     } else if (
         op0.type() == "SparseLengthsWeightedSum" ||
         op0.type() == "SparseLengthsWeightedSumFused8BitRowwise") {
       // The weight and indices inputs of SparseLengthWeightedSum should be of
       // SEQ type
       if (op0.input_size() > 1 && input_set.count(op0.input(1))) {
         seq_input_set.emplace(op0.input(1));
       }
       if (op0.input_size() > 2 && input_set.count(op0.input(2))) {
         seq_input_set.emplace(op0.input(2));
       }
     }
   }

   // C2 operator bind input/output by position (they can be rewritten by e.g.
   // Memonger) while TVM runtime bind them by name. Therefore, we need to record
   // the input/output names.
   auto* input_arg = op->add_arg();
   input_arg->set_name("inputs");
   auto* output_arg = op->add_arg();
   output_arg->set_name("outputs");

   // We expose both inputs and weights as inputs of TVMJitOp
   for (const auto& i : net.external_input()) {
     net_opt.add_external_input(i);
     op->add_input(i);
     input_arg->add_strings(i);
   }
   for (const auto& i : net.external_output()) {
     if (split_infos.count(i)) {
       continue;
     }
     net_opt.add_external_output(i);
     op->add_output(i);
     output_arg->add_strings(i);
   }

   // Record the referred weights
   auto* w_arg = op->add_arg();
   std::unordered_set<std::string> referred_weights;
   for (const auto& op0 : net.op()) {
     for (const auto& i : op0.input()) {
       if (weights.count(i)) {
         referred_weights.emplace(i);
       }
     }
   }
   w_arg->set_name("weights");
   for (const auto& w : referred_weights) {
     w_arg->add_strings(w);
   }

   // Add input shape info in "input_shape_info" argument of the net
   if (!opts_.profiling_based_jit) {
     auto* shape_arg = op->add_arg();
     shape_arg->set_name("input_shape_info");
     for (const auto& i : net_opt.external_input()) {
       shape_arg->mutable_tensors()->Add()->CopyFrom(
           wrapShapeInfoIntoTensorProto(i, shape_hints.at(i)));
     }
   }

   // Add original net as a fallback
   auto* original_net_arg = op->add_arg();
   original_net_arg->set_name("original_net");
   original_net_arg->mutable_n()->CopyFrom(net);

   // Add model id
   AddArgument("model_id", model_id_, op);

   // Add op id
   AddArgument("tvm_op_id", tvm_op_id_++, op);

   // Add nominal batch size
   AddArgument("nominal_batch_size", opts_.bound_shape_spec.max_batch_size, op);

   // Add nominal sequence size
   AddArgument("nominal_seq_size", opts_.bound_shape_spec.max_seq_size, op);

   // Indices of the input blobs with sequence type
   auto* seq_input_indices_arg = op->add_arg();
   seq_input_indices_arg->set_name("seq_input_indices");
   int64_t input_idx = 0;
   for (const auto& input : net_opt.external_input()) {
     if (seq_input_set.count(input)) {
       seq_input_indices_arg->add_ints(input_idx);
     }
     ++input_idx;
   }

   if (opts_.debug) {
     AddArgument("debug", 1, op);
   }

   if (opts_.profiling_based_jit) {
     AddArgument("profiling_based_jit", 1, op);
   }

   return net_opt;
 }

 // Cutting off the runnable part and replace with TVMJitOPs. Asssume the nets
 // were topologically sorted
 void TvmTransformer::transform(
     Workspace* ws,
     NetDef* pred_net,
     const std::vector<std::string>& weight_names,
     const ShapeInfoMap& input_shape_hints,
     const std::unordered_set<int>& blocklisted_ops) {
   CAFFE_ENFORCE(ws);
   CAFFE_ENFORCE(pred_net, "Predict net cannot be nullptr");

   // Save the args of the net so that we can copy it to opt net later
   std::vector<Argument> args;
   for (const auto& arg : pred_net->arg()) {
     args.emplace_back(arg);
   }

   // Get model id and reset TVM op id to 0
   model_id_ = getModelId(*pred_net);
   tvm_op_id_ = 0;

   std::unordered_set<std::string> weights(
       weight_names.begin(), weight_names.end());

   // input_shape_hints should only contain shapes of inputs and not activations
   ShapeInfoMap shape_hints;
   if (!opts_.profiling_based_jit) {
     shape_hints =
         inferShapes(ws, pred_net, input_shape_hints, opts_.bound_shape_spec);
   }

   if (opts_.debug) {
     dumpNet(*pred_net, shape_hints, "debug_net.pbtxt");
   }

   // We are ready to transform the net
   NetDef net_opt =
       applyTvmTransform(pred_net, weights, blocklisted_ops, shape_hints);

   // Copy the properties
   for (const auto& arg : args) {
     net_opt.add_arg()->CopyFrom(arg);
   }
   net_opt.mutable_device_option()->CopyFrom(pred_net->device_option());
   pred_net->Swap(&net_opt);
   if (opts_.debug) {
     dumpNet(*pred_net, shape_hints, "debug_full_opt_net.pbtxt");
   }
 }

 const std::unordered_set<std::string>& TvmTransformer::getSupportedOps() {
   const static std::unordered_set<std::string> supported_ops{
       "Add",
       "BatchGather",
       "BatchMatMul",
       "Cast",
       "Clip",
       "Concat",
       "Copy",
       "DotProduct",
       "EnsureCPUOutput",
       "ExpandDims",
       "FbFCPacked",
       "FC",
       "FCTransposed",
       "Flatten",
       "Fused8BitRowwiseQuantizedToFloat",
       "Logit",
       "MatMul",
       "Mul",
       "Relu",
       "Reshape",
       "ReplaceNaN",
       "Sigmoid",
       "Slice",
       "Softmax",
       "Split",
       "Sum",
       "Tanh",
       "Transpose",
       "UnPackRecords",
   };
   return supported_ops;
 }

 bool TvmTransformer::canConvertFullGraph(
     const caffe2::NetDef& net,
     const std::unordered_set<int>& blocklisted_ops) {
   const auto& supported_ops = getSupportedOps();
   for (const auto& op : net.op()) {
     int pos =
         ArgumentHelper::GetSingleArgument<OperatorDef, int>(op, kNetPos, -1);
     if (blocklisted_ops.count(pos) || supported_ops.count(op.type()) == 0) {
       return false;
     }
   }
   return true;
 }

 NetDef TvmTransformer::applyTvmTransform(
     NetDef* pred_net,
     const std::unordered_set<std::string>& weights,
     const std::unordered_set<int>& blocklisted_ops,
     const ShapeInfoMap& shape_hints) {
   const auto profiling_based_jit = opts_.profiling_based_jit;
   auto tvm_supports = [&blocklisted_ops, &shape_hints, &profiling_based_jit](
                           const caffe2::OperatorDef& op) {
     const auto& supported_ops = getSupportedOps();
     try {
       // If the op position is block listed, return false
       int pos =
           ArgumentHelper::GetSingleArgument<OperatorDef, int>(op, kNetPos, -1);
       if (blocklisted_ops.count(pos)) {
         LOG(INFO) << "op is being blocklisted, " << op.type() << " at position " << pos;
         return false;
       }

       // If we don't have proper shape info for the op, we cannot compile it
       // properly, return false
       if (!profiling_based_jit) {
         for (const auto& i : op.input()) {
           if (shape_hints.find(i) == shape_hints.end()) {
             LOG(INFO) << "Skipping op " << op.type()
                       << " due to missing shape info for input " << i;
             return false;
           }
         }
       }

       // If TVM c2 frontend doesn't support this op, return false
       // TODO: This should be something like TVMC2Frontend::supports(op);
       return (supported_ops.count(op.type()) != 0);
     } catch (const std::exception& ex) {
       LOG(ERROR) << "Caught exception when querying op " << op.type()
                  << ", what: " << ex.what();
       return false;
     }
   };
   auto tvm_op_converter =
       [this, &weights, &shape_hints](const caffe2::NetDef& net) {
         return buildTvmOp(net, weights, shape_hints);
       };

   return opt::OptimizeForBackend(*pred_net, tvm_supports, tvm_op_converter).net;
 }

 void tvmTransform(
     NetDef* net,
     Workspace* ws,
     const std::vector<std::string>& input_names,
     const std::vector<std::string>& output_names,
     const std::vector<std::string>& weight_names,
     const ShapeInfoMap& shape_hints,
     const std::unordered_set<int>& blocklisted_ops,
     int32_t max_batch_size,
     int32_t max_seq_size,
     int32_t num_embeddings,
     int32_t embedding_size,
     int32_t tvm_min_ops,
     bool tvm_profiling_based_jit,
     bool debug) {
   TvmTransformOptions opts;
   opts.bound_shape_spec.max_batch_size = max_batch_size;
   opts.bound_shape_spec.max_seq_size = max_seq_size;
   opts.bound_shape_spec.num_embeddings = num_embeddings;
   opts.bound_shape_spec.embedding_length = embedding_size;
   opts.min_ops = tvm_min_ops;
   opts.profiling_based_jit = tvm_profiling_based_jit;
   opts.debug = debug;
   TvmTransformer ts(opts);

   // Clean up the external input/output of the net
   cleanUpPredictNet(net, input_names, output_names, weight_names);

   ts.transform(ws, net, weight_names, shape_hints, blocklisted_ops);
 }

 void cleanUpPredictNet(
     NetDef* net,
     const std::vector<std::string>& input_names,
     const std::vector<std::string>& output_names,
     const std::vector<std::string>& weight_names) {
   net->mutable_external_input()->Clear();
   net->mutable_external_output()->Clear();
   for (const auto& i : input_names) {
     net->add_external_input(i);
   }
   for (const auto& w : weight_names) {
     net->add_external_input(w);
   }
   for (const auto& o : output_names) {
     net->add_external_output(o);
   }
 }
 } // namespace caffe2
	#include "caffe2/opt/tvm_transformer.h"
	#include "caffe2/opt/backend_cutting.h"

	namespace caffe2 {

	NetDef TvmTransformer::buildTvmOp(
	const caffe2::NetDef& net,
	const std::unordered_set<std::string>& weights,
	const ShapeInfoMap& shape_hints) {
	// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
	if (opts_.min_ops > net.op_size()) {
	return net;
	}
	caffe2::NetDef net_opt;
	auto* op = net_opt.add_op();
	op->set_type("TVMJit");

	// Remove the second output of Concat/Reshape from external_output.
	// And figure out what primary inputs of the net is sequence look-ups
	std::unordered_set<std::string> split_infos;
	std::unordered_set<std::string> input_set(
	net.external_input().begin(), net.external_input().end());
	std::unordered_set<std::string> seq_input_set;
	for (const auto& op0 : net.op()) {
	if ((op0.type() == "Concat" \|\| op0.type() == "Reshape") &&
	op0.output_size() == 2) {
	split_infos.emplace(op0.output(1));
	} else if (
	op0.type() == "SparseLengthsSum" \|\|
	op0.type() == "SparseLengthsSumFused8BitRowwise") {
	// The indices input of SparseLengthSum should be of SEQ type
	if (op0.input_size() > 1 && input_set.count(op0.input(1))) {
	seq_input_set.emplace(op0.input(1));
	}
	} else if (
	op0.type() == "SparseLengthsWeightedSum" \|\|
	op0.type() == "SparseLengthsWeightedSumFused8BitRowwise") {
	// The weight and indices inputs of SparseLengthWeightedSum should be of
	// SEQ type
	if (op0.input_size() > 1 && input_set.count(op0.input(1))) {
	seq_input_set.emplace(op0.input(1));
	}
	if (op0.input_size() > 2 && input_set.count(op0.input(2))) {
	seq_input_set.emplace(op0.input(2));
	}
	}
	}

	// C2 operator bind input/output by position (they can be rewritten by e.g.
	// Memonger) while TVM runtime bind them by name. Therefore, we need to record
	// the input/output names.
	auto* input_arg = op->add_arg();
	input_arg->set_name("inputs");
	auto* output_arg = op->add_arg();
	output_arg->set_name("outputs");

	// We expose both inputs and weights as inputs of TVMJitOp
	for (const auto& i : net.external_input()) {
	net_opt.add_external_input(i);
	op->add_input(i);
	input_arg->add_strings(i);
	}
	for (const auto& i : net.external_output()) {
	if (split_infos.count(i)) {
	continue;
	}
	net_opt.add_external_output(i);
	op->add_output(i);
	output_arg->add_strings(i);
	}

	// Record the referred weights
	auto* w_arg = op->add_arg();
	std::unordered_set<std::string> referred_weights;
	for (const auto& op0 : net.op()) {
	for (const auto& i : op0.input()) {
	if (weights.count(i)) {
	referred_weights.emplace(i);
	}
	}
	}
	w_arg->set_name("weights");
	for (const auto& w : referred_weights) {
	w_arg->add_strings(w);
	}

	// Add input shape info in "input_shape_info" argument of the net
	if (!opts_.profiling_based_jit) {
	auto* shape_arg = op->add_arg();
	shape_arg->set_name("input_shape_info");
	for (const auto& i : net_opt.external_input()) {
	shape_arg->mutable_tensors()->Add()->CopyFrom(
	wrapShapeInfoIntoTensorProto(i, shape_hints.at(i)));
	}
	}

	// Add original net as a fallback
	auto* original_net_arg = op->add_arg();
	original_net_arg->set_name("original_net");
	original_net_arg->mutable_n()->CopyFrom(net);

	// Add model id
	AddArgument("model_id", model_id_, op);

	// Add op id
	AddArgument("tvm_op_id", tvm_op_id_++, op);

	// Add nominal batch size
	AddArgument("nominal_batch_size", opts_.bound_shape_spec.max_batch_size, op);

	// Add nominal sequence size
	AddArgument("nominal_seq_size", opts_.bound_shape_spec.max_seq_size, op);

	// Indices of the input blobs with sequence type
	auto* seq_input_indices_arg = op->add_arg();
	seq_input_indices_arg->set_name("seq_input_indices");
	int64_t input_idx = 0;
	for (const auto& input : net_opt.external_input()) {
	if (seq_input_set.count(input)) {
	seq_input_indices_arg->add_ints(input_idx);
	}
	++input_idx;
	}

	if (opts_.debug) {
	AddArgument("debug", 1, op);
	}

	if (opts_.profiling_based_jit) {
	AddArgument("profiling_based_jit", 1, op);
	}

	return net_opt;
	}

	// Cutting off the runnable part and replace with TVMJitOPs. Asssume the nets
	// were topologically sorted
	void TvmTransformer::transform(
	Workspace* ws,
	NetDef* pred_net,
	const std::vector<std::string>& weight_names,
	const ShapeInfoMap& input_shape_hints,
	const std::unordered_set<int>& blocklisted_ops) {
	CAFFE_ENFORCE(ws);
	CAFFE_ENFORCE(pred_net, "Predict net cannot be nullptr");

	// Save the args of the net so that we can copy it to opt net later
	std::vector<Argument> args;
	for (const auto& arg : pred_net->arg()) {
	args.emplace_back(arg);
	}

	// Get model id and reset TVM op id to 0
	model_id_ = getModelId(*pred_net);
	tvm_op_id_ = 0;

	std::unordered_set<std::string> weights(
	weight_names.begin(), weight_names.end());

	// input_shape_hints should only contain shapes of inputs and not activations
	ShapeInfoMap shape_hints;
	if (!opts_.profiling_based_jit) {
	shape_hints =
	inferShapes(ws, pred_net, input_shape_hints, opts_.bound_shape_spec);
	}

	if (opts_.debug) {
	dumpNet(*pred_net, shape_hints, "debug_net.pbtxt");
	}

	// We are ready to transform the net
	NetDef net_opt =
	applyTvmTransform(pred_net, weights, blocklisted_ops, shape_hints);

	// Copy the properties
	for (const auto& arg : args) {
	net_opt.add_arg()->CopyFrom(arg);
	}
	net_opt.mutable_device_option()->CopyFrom(pred_net->device_option());
	pred_net->Swap(&net_opt);
	if (opts_.debug) {
	dumpNet(*pred_net, shape_hints, "debug_full_opt_net.pbtxt");
	}
	}

	const std::unordered_set<std::string>& TvmTransformer::getSupportedOps() {
	const static std::unordered_set<std::string> supported_ops{
	"Add",
	"BatchGather",
	"BatchMatMul",
	"Cast",
	"Clip",
	"Concat",
	"Copy",
	"DotProduct",
	"EnsureCPUOutput",
	"ExpandDims",
	"FbFCPacked",
	"FC",
	"FCTransposed",
	"Flatten",
	"Fused8BitRowwiseQuantizedToFloat",
	"Logit",
	"MatMul",
	"Mul",
	"Relu",
	"Reshape",
	"ReplaceNaN",
	"Sigmoid",
	"Slice",
	"Softmax",
	"Split",
	"Sum",
	"Tanh",
	"Transpose",
	"UnPackRecords",
	};
	return supported_ops;
	}

	bool TvmTransformer::canConvertFullGraph(
	const caffe2::NetDef& net,
	const std::unordered_set<int>& blocklisted_ops) {
	const auto& supported_ops = getSupportedOps();
	for (const auto& op : net.op()) {
	int pos =
	ArgumentHelper::GetSingleArgument<OperatorDef, int>(op, kNetPos, -1);
	if (blocklisted_ops.count(pos) \|\| supported_ops.count(op.type()) == 0) {
	return false;
	}
	}
	return true;
	}

	NetDef TvmTransformer::applyTvmTransform(
	NetDef* pred_net,
	const std::unordered_set<std::string>& weights,
	const std::unordered_set<int>& blocklisted_ops,
	const ShapeInfoMap& shape_hints) {
	const auto profiling_based_jit = opts_.profiling_based_jit;
	auto tvm_supports = [&blocklisted_ops, &shape_hints, &profiling_based_jit](
	const caffe2::OperatorDef& op) {
	const auto& supported_ops = getSupportedOps();
	try {
	// If the op position is block listed, return false
	int pos =
	ArgumentHelper::GetSingleArgument<OperatorDef, int>(op, kNetPos, -1);
	if (blocklisted_ops.count(pos)) {
	LOG(INFO) << "op is being blocklisted, " << op.type() << " at position " << pos;
	return false;
	}

	// If we don't have proper shape info for the op, we cannot compile it
	// properly, return false
	if (!profiling_based_jit) {
	for (const auto& i : op.input()) {
	if (shape_hints.find(i) == shape_hints.end()) {
	LOG(INFO) << "Skipping op " << op.type()
	<< " due to missing shape info for input " << i;
	return false;
	}
	}
	}

	// If TVM c2 frontend doesn't support this op, return false
	// TODO: This should be something like TVMC2Frontend::supports(op);
	return (supported_ops.count(op.type()) != 0);
	} catch (const std::exception& ex) {
	LOG(ERROR) << "Caught exception when querying op " << op.type()
	<< ", what: " << ex.what();
	return false;
	}
	};
	auto tvm_op_converter =
	[this, &weights, &shape_hints](const caffe2::NetDef& net) {
	return buildTvmOp(net, weights, shape_hints);
	};

	return opt::OptimizeForBackend(*pred_net, tvm_supports, tvm_op_converter).net;
	}

	void tvmTransform(
	NetDef* net,
	Workspace* ws,
	const std::vector<std::string>& input_names,
	const std::vector<std::string>& output_names,
	const std::vector<std::string>& weight_names,
	const ShapeInfoMap& shape_hints,
	const std::unordered_set<int>& blocklisted_ops,
	int32_t max_batch_size,
	int32_t max_seq_size,
	int32_t num_embeddings,
	int32_t embedding_size,
	int32_t tvm_min_ops,
	bool tvm_profiling_based_jit,
	bool debug) {
	TvmTransformOptions opts;
	opts.bound_shape_spec.max_batch_size = max_batch_size;
	opts.bound_shape_spec.max_seq_size = max_seq_size;
	opts.bound_shape_spec.num_embeddings = num_embeddings;
	opts.bound_shape_spec.embedding_length = embedding_size;
	opts.min_ops = tvm_min_ops;
	opts.profiling_based_jit = tvm_profiling_based_jit;
	opts.debug = debug;
	TvmTransformer ts(opts);

	// Clean up the external input/output of the net
	cleanUpPredictNet(net, input_names, output_names, weight_names);

	ts.transform(ws, net, weight_names, shape_hints, blocklisted_ops);
	}

	void cleanUpPredictNet(
	NetDef* net,
	const std::vector<std::string>& input_names,
	const std::vector<std::string>& output_names,
	const std::vector<std::string>& weight_names) {
	net->mutable_external_input()->Clear();
	net->mutable_external_output()->Clear();
	for (const auto& i : input_names) {
	net->add_external_input(i);
	}
	for (const auto& w : weight_names) {
	net->add_external_input(w);
	}
	for (const auto& o : output_names) {
	net->add_external_output(o);
	}
	}
	} // namespace caffe2