|  | #include "caffe2/opt/tvm_transformer.h" | 
|  | #include "caffe2/opt/backend_cutting.h" | 
|  |  | 
|  | namespace caffe2 { | 
|  |  | 
|  | NetDef TvmTransformer::buildTvmOp( | 
|  | const caffe2::NetDef& net, | 
|  | const std::unordered_set<std::string>& weights, | 
|  | const ShapeInfoMap& shape_hints) { | 
|  | // NOLINTNEXTLINE(clang-diagnostic-sign-compare) | 
|  | if (opts_.min_ops > net.op_size()) { | 
|  | return net; | 
|  | } | 
|  | caffe2::NetDef net_opt; | 
|  | auto* op = net_opt.add_op(); | 
|  | op->set_type("TVMJit"); | 
|  |  | 
|  | // Remove the second output of Concat/Reshape from external_output. | 
|  | // And figure out what primary inputs of the net is sequence look-ups | 
|  | std::unordered_set<std::string> split_infos; | 
|  | std::unordered_set<std::string> input_set( | 
|  | net.external_input().begin(), net.external_input().end()); | 
|  | std::unordered_set<std::string> seq_input_set; | 
|  | for (const auto& op0 : net.op()) { | 
|  | if ((op0.type() == "Concat" || op0.type() == "Reshape") && | 
|  | op0.output_size() == 2) { | 
|  | split_infos.emplace(op0.output(1)); | 
|  | } else if ( | 
|  | op0.type() == "SparseLengthsSum" || | 
|  | op0.type() == "SparseLengthsSumFused8BitRowwise") { | 
|  | // The indices input of SparseLengthSum should be of SEQ type | 
|  | if (op0.input_size() > 1 && input_set.count(op0.input(1))) { | 
|  | seq_input_set.emplace(op0.input(1)); | 
|  | } | 
|  | } else if ( | 
|  | op0.type() == "SparseLengthsWeightedSum" || | 
|  | op0.type() == "SparseLengthsWeightedSumFused8BitRowwise") { | 
|  | // The weight and indices inputs of SparseLengthWeightedSum should be of | 
|  | // SEQ type | 
|  | if (op0.input_size() > 1 && input_set.count(op0.input(1))) { | 
|  | seq_input_set.emplace(op0.input(1)); | 
|  | } | 
|  | if (op0.input_size() > 2 && input_set.count(op0.input(2))) { | 
|  | seq_input_set.emplace(op0.input(2)); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // C2 operator bind input/output by position (they can be rewritten by e.g. | 
|  | // Memonger) while TVM runtime bind them by name. Therefore, we need to record | 
|  | // the input/output names. | 
|  | auto* input_arg = op->add_arg(); | 
|  | input_arg->set_name("inputs"); | 
|  | auto* output_arg = op->add_arg(); | 
|  | output_arg->set_name("outputs"); | 
|  |  | 
|  | // We expose both inputs and weights as inputs of TVMJitOp | 
|  | for (const auto& i : net.external_input()) { | 
|  | net_opt.add_external_input(i); | 
|  | op->add_input(i); | 
|  | input_arg->add_strings(i); | 
|  | } | 
|  | for (const auto& i : net.external_output()) { | 
|  | if (split_infos.count(i)) { | 
|  | continue; | 
|  | } | 
|  | net_opt.add_external_output(i); | 
|  | op->add_output(i); | 
|  | output_arg->add_strings(i); | 
|  | } | 
|  |  | 
|  | // Record the referred weights | 
|  | auto* w_arg = op->add_arg(); | 
|  | std::unordered_set<std::string> referred_weights; | 
|  | for (const auto& op0 : net.op()) { | 
|  | for (const auto& i : op0.input()) { | 
|  | if (weights.count(i)) { | 
|  | referred_weights.emplace(i); | 
|  | } | 
|  | } | 
|  | } | 
|  | w_arg->set_name("weights"); | 
|  | for (const auto& w : referred_weights) { | 
|  | w_arg->add_strings(w); | 
|  | } | 
|  |  | 
|  | // Add input shape info in "input_shape_info" argument of the net | 
|  | if (!opts_.profiling_based_jit) { | 
|  | auto* shape_arg = op->add_arg(); | 
|  | shape_arg->set_name("input_shape_info"); | 
|  | for (const auto& i : net_opt.external_input()) { | 
|  | shape_arg->mutable_tensors()->Add()->CopyFrom( | 
|  | wrapShapeInfoIntoTensorProto(i, shape_hints.at(i))); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Add original net as a fallback | 
|  | auto* original_net_arg = op->add_arg(); | 
|  | original_net_arg->set_name("original_net"); | 
|  | original_net_arg->mutable_n()->CopyFrom(net); | 
|  |  | 
|  | // Add model id | 
|  | AddArgument("model_id", model_id_, op); | 
|  |  | 
|  | // Add op id | 
|  | AddArgument("tvm_op_id", tvm_op_id_++, op); | 
|  |  | 
|  | // Add nominal batch size | 
|  | AddArgument("nominal_batch_size", opts_.bound_shape_spec.max_batch_size, op); | 
|  |  | 
|  | // Add nominal sequence size | 
|  | AddArgument("nominal_seq_size", opts_.bound_shape_spec.max_seq_size, op); | 
|  |  | 
|  | // Indices of the input blobs with sequence type | 
|  | auto* seq_input_indices_arg = op->add_arg(); | 
|  | seq_input_indices_arg->set_name("seq_input_indices"); | 
|  | int64_t input_idx = 0; | 
|  | for (const auto& input : net_opt.external_input()) { | 
|  | if (seq_input_set.count(input)) { | 
|  | seq_input_indices_arg->add_ints(input_idx); | 
|  | } | 
|  | ++input_idx; | 
|  | } | 
|  |  | 
|  | if (opts_.debug) { | 
|  | AddArgument("debug", 1, op); | 
|  | } | 
|  |  | 
|  | if (opts_.profiling_based_jit) { | 
|  | AddArgument("profiling_based_jit", 1, op); | 
|  | } | 
|  |  | 
|  | return net_opt; | 
|  | } | 
|  |  | 
|  | // Cutting off the runnable part and replace with TVMJitOPs. Asssume the nets | 
|  | // were topologically sorted | 
|  | void TvmTransformer::transform( | 
|  | Workspace* ws, | 
|  | NetDef* pred_net, | 
|  | const std::vector<std::string>& weight_names, | 
|  | const ShapeInfoMap& input_shape_hints, | 
|  | const std::unordered_set<int>& blocklisted_ops) { | 
|  | CAFFE_ENFORCE(ws); | 
|  | CAFFE_ENFORCE(pred_net, "Predict net cannot be nullptr"); | 
|  |  | 
|  | // Save the args of the net so that we can copy it to opt net later | 
|  | std::vector<Argument> args; | 
|  | for (const auto& arg : pred_net->arg()) { | 
|  | args.emplace_back(arg); | 
|  | } | 
|  |  | 
|  | // Get model id and reset TVM op id to 0 | 
|  | model_id_ = getModelId(*pred_net); | 
|  | tvm_op_id_ = 0; | 
|  |  | 
|  | std::unordered_set<std::string> weights( | 
|  | weight_names.begin(), weight_names.end()); | 
|  |  | 
|  | // input_shape_hints should only contain shapes of inputs and not activations | 
|  | ShapeInfoMap shape_hints; | 
|  | if (!opts_.profiling_based_jit) { | 
|  | shape_hints = | 
|  | inferShapes(ws, pred_net, input_shape_hints, opts_.bound_shape_spec); | 
|  | } | 
|  |  | 
|  | if (opts_.debug) { | 
|  | dumpNet(*pred_net, shape_hints, "debug_net.pbtxt"); | 
|  | } | 
|  |  | 
|  | // We are ready to transform the net | 
|  | NetDef net_opt = | 
|  | applyTvmTransform(pred_net, weights, blocklisted_ops, shape_hints); | 
|  |  | 
|  | // Copy the properties | 
|  | for (const auto& arg : args) { | 
|  | net_opt.add_arg()->CopyFrom(arg); | 
|  | } | 
|  | net_opt.mutable_device_option()->CopyFrom(pred_net->device_option()); | 
|  | pred_net->Swap(&net_opt); | 
|  | if (opts_.debug) { | 
|  | dumpNet(*pred_net, shape_hints, "debug_full_opt_net.pbtxt"); | 
|  | } | 
|  | } | 
|  |  | 
|  | const std::unordered_set<std::string>& TvmTransformer::getSupportedOps() { | 
|  | const static std::unordered_set<std::string> supported_ops{ | 
|  | "Add", | 
|  | "BatchGather", | 
|  | "BatchMatMul", | 
|  | "Cast", | 
|  | "Clip", | 
|  | "Concat", | 
|  | "Copy", | 
|  | "DotProduct", | 
|  | "EnsureCPUOutput", | 
|  | "ExpandDims", | 
|  | "FbFCPacked", | 
|  | "FC", | 
|  | "FCTransposed", | 
|  | "Flatten", | 
|  | "Fused8BitRowwiseQuantizedToFloat", | 
|  | "Logit", | 
|  | "MatMul", | 
|  | "Mul", | 
|  | "Relu", | 
|  | "Reshape", | 
|  | "ReplaceNaN", | 
|  | "Sigmoid", | 
|  | "Slice", | 
|  | "Softmax", | 
|  | "Split", | 
|  | "Sum", | 
|  | "Tanh", | 
|  | "Transpose", | 
|  | "UnPackRecords", | 
|  | }; | 
|  | return supported_ops; | 
|  | } | 
|  |  | 
|  | bool TvmTransformer::canConvertFullGraph( | 
|  | const caffe2::NetDef& net, | 
|  | const std::unordered_set<int>& blocklisted_ops) { | 
|  | const auto& supported_ops = getSupportedOps(); | 
|  | for (const auto& op : net.op()) { | 
|  | int pos = | 
|  | ArgumentHelper::GetSingleArgument<OperatorDef, int>(op, kNetPos, -1); | 
|  | if (blocklisted_ops.count(pos) || supported_ops.count(op.type()) == 0) { | 
|  | return false; | 
|  | } | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  | NetDef TvmTransformer::applyTvmTransform( | 
|  | NetDef* pred_net, | 
|  | const std::unordered_set<std::string>& weights, | 
|  | const std::unordered_set<int>& blocklisted_ops, | 
|  | const ShapeInfoMap& shape_hints) { | 
|  | const auto profiling_based_jit = opts_.profiling_based_jit; | 
|  | auto tvm_supports = [&blocklisted_ops, &shape_hints, &profiling_based_jit]( | 
|  | const caffe2::OperatorDef& op) { | 
|  | const auto& supported_ops = getSupportedOps(); | 
|  | try { | 
|  | // If the op position is block listed, return false | 
|  | int pos = | 
|  | ArgumentHelper::GetSingleArgument<OperatorDef, int>(op, kNetPos, -1); | 
|  | if (blocklisted_ops.count(pos)) { | 
|  | LOG(INFO) << "op is being blocklisted, " << op.type() << " at position " << pos; | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // If we don't have proper shape info for the op, we cannot compile it | 
|  | // properly, return false | 
|  | if (!profiling_based_jit) { | 
|  | for (const auto& i : op.input()) { | 
|  | if (shape_hints.find(i) == shape_hints.end()) { | 
|  | LOG(INFO) << "Skipping op " << op.type() | 
|  | << " due to missing shape info for input " << i; | 
|  | return false; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // If TVM c2 frontend doesn't support this op, return false | 
|  | // TODO: This should be something like TVMC2Frontend::supports(op); | 
|  | return (supported_ops.count(op.type()) != 0); | 
|  | } catch (const std::exception& ex) { | 
|  | LOG(ERROR) << "Caught exception when querying op " << op.type() | 
|  | << ", what: " << ex.what(); | 
|  | return false; | 
|  | } | 
|  | }; | 
|  | auto tvm_op_converter = | 
|  | [this, &weights, &shape_hints](const caffe2::NetDef& net) { | 
|  | return buildTvmOp(net, weights, shape_hints); | 
|  | }; | 
|  |  | 
|  | return opt::OptimizeForBackend(*pred_net, tvm_supports, tvm_op_converter).net; | 
|  | } | 
|  |  | 
|  | void tvmTransform( | 
|  | NetDef* net, | 
|  | Workspace* ws, | 
|  | const std::vector<std::string>& input_names, | 
|  | const std::vector<std::string>& output_names, | 
|  | const std::vector<std::string>& weight_names, | 
|  | const ShapeInfoMap& shape_hints, | 
|  | const std::unordered_set<int>& blocklisted_ops, | 
|  | int32_t max_batch_size, | 
|  | int32_t max_seq_size, | 
|  | int32_t num_embeddings, | 
|  | int32_t embedding_size, | 
|  | int32_t tvm_min_ops, | 
|  | bool tvm_profiling_based_jit, | 
|  | bool debug) { | 
|  | TvmTransformOptions opts; | 
|  | opts.bound_shape_spec.max_batch_size = max_batch_size; | 
|  | opts.bound_shape_spec.max_seq_size = max_seq_size; | 
|  | opts.bound_shape_spec.num_embeddings = num_embeddings; | 
|  | opts.bound_shape_spec.embedding_length = embedding_size; | 
|  | opts.min_ops = tvm_min_ops; | 
|  | opts.profiling_based_jit = tvm_profiling_based_jit; | 
|  | opts.debug = debug; | 
|  | TvmTransformer ts(opts); | 
|  |  | 
|  | // Clean up the external input/output of the net | 
|  | cleanUpPredictNet(net, input_names, output_names, weight_names); | 
|  |  | 
|  | ts.transform(ws, net, weight_names, shape_hints, blocklisted_ops); | 
|  | } | 
|  |  | 
|  | void cleanUpPredictNet( | 
|  | NetDef* net, | 
|  | const std::vector<std::string>& input_names, | 
|  | const std::vector<std::string>& output_names, | 
|  | const std::vector<std::string>& weight_names) { | 
|  | net->mutable_external_input()->Clear(); | 
|  | net->mutable_external_output()->Clear(); | 
|  | for (const auto& i : input_names) { | 
|  | net->add_external_input(i); | 
|  | } | 
|  | for (const auto& w : weight_names) { | 
|  | net->add_external_input(w); | 
|  | } | 
|  | for (const auto& o : output_names) { | 
|  | net->add_external_output(o); | 
|  | } | 
|  | } | 
|  | } // namespace caffe2 |