caffe2/opt/onnxifi_transformer.h - platform/external/pytorch - Git at Google

 #pragma once

 #include <cstdint>
 #include <string>
 #include <unordered_map>
 #include <vector>

 #include "caffe2/opt/backend_cutting.h"
 #include "onnx/onnx_pb.h"

 #include "caffe2/core/operator.h"
 #include "caffe2/onnx/onnxifi_init.h"
 #include "caffe2/opt/backend_transformer_base.h"

 namespace caffe2 {
 namespace onnx {
 class OnnxExporter;
 }

 // Split SparseLengthsSumSparse into SparseLengthsSumSparseLookup +
 // SparseLengthsSum
 TORCH_API void splitSparseLengthsSumSparse(NetDef* net, const Workspace& ws);

 struct OnnxifiTransformerOptions final : public BackendTransformOptions {
   explicit OnnxifiTransformerOptions() : BackendTransformOptions() {}

   // Pass serialized onnx model if true, otherwise pass serialized c2 model
   bool use_onnx{false};

   // Whether to adjust batch at the outputs or not
   bool adjust_batch{true};

   // Whether to lower model blob by blob
   bool load_model_by_blob{false};

   // Whether to enforce fp32 inputs into fp16.
   bool enforce_fp32_inputs_into_fp16{false};

   // Whether to combine fp32 batched inputs into one tensor and convert it to
   // fp16 or not
   bool merge_fp32_inputs_into_fp16{false};

   // Whether to verify that a single subnet was created
   bool verify_only_single_subnet{false};

   // Whether the net has been ssaRewritten
   bool predictor_net_ssa_rewritten{false};

   // Inference timeout
   int timeout{0};

   // Mapping of batch sizes to shape infos
   std::unordered_map<int, ShapeInfoMap> shape_hints_per_bs;

   // Whether to read batch size from Onnxifi.
   bool use_onnxifi_batch_size{false};
 };

 class TORCH_API OnnxifiOptionHelper final {
  public:
   OnnxifiOptionHelper();

   // Set Onnxifi option
   bool setOnnxifiOption(const std::string& option, const std::string& value);

   //  Get Onnxifi option
   std::string getOnnxifiOption(const std::string& option);

  private:
   // Pointer to loaded onnxifi library
   onnxifi_library* lib_{nullptr};
 };

 class TORCH_API OnnxifiTransformer final : public BackendTransformerBase {
  public:
   explicit OnnxifiTransformer(const OnnxifiTransformerOptions& opts);
   ~OnnxifiTransformer() override;

   void transform(
       Workspace* ws,
       NetDef* pred_net,
       const std::vector<std::string>& weight_names,
       const ShapeInfoMap& shape_hints,
       const std::unordered_set<int>& blocklisted_ops) override;

   // Query whether an operator is supported by passing C2 protobuf
   bool supportOpC2(
       const caffe2::OperatorDef& op,
       const ShapeInfoMap& shape_hints,
       const std::unordered_set<std::string>& weights,
       const std::unordered_set<int>& blocklisted_ops,
       onnxBackendID backend_id) const;

   // Determine backend id
   std::vector<onnxBackendID> getBackendId();

  private:
   // Since we create new tensors during the conversion process, we actually need
   // into inject them into the original workspace
   // Since our onnx exporter uses std::unordered_map<std::string, TensorShape>
   // as lut, we need to include an extra copy of shape info and maintain them
   // together
   caffe2::NetDef SubnetToOnnxifiOpViaOnnx(
       const caffe2::NetDef& net,
       const std::unordered_set<std::string>& weights_in_ws,
       Workspace* ws,
       onnx::OnnxExporter* exporter,
       ShapeInfoMap* shape_hints_max_bs,
       const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);

   // Convert a cutoff subgraph net to an Onnxifi op
   caffe2::NetDef SubnetToOnnxifiOpViaC2(
       const caffe2::NetDef& net,
       const std::unordered_set<std::string>& weights_in_ws,
       const ShapeInfoMap& shape_hints_max_bs,
       const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);

   // Check that output shape hints are present to ensure we can pass them to
   // OnnxifiOp
   bool canPassOutputShapeHintsPerBs(
       const OperatorDef& op,
       const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs) const;

   // We already have all the ops and external inputs and outputs!
   OperatorDef buildOnnxifiOp(
       const std::string& onnx_model_str,
       const std::unordered_set<std::string>& initialization_list,
       const std::vector<std::string>& external_inputs,
       const std::vector<std::string>& external_outputs,
       const ShapeInfoMap& shape_hints_max_bs,
       const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);

   // Transform by passing C2 proto to backend
   opt::CutResult TransformViaC2(
       NetDef* pred_net,
       const std::unordered_set<std::string>& weights,
       const std::unordered_set<int>& blocklisted_ops,
       const ShapeInfoMap& shape_hints_max_bs,
       const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);

   // Transform by passing ONNX proto to backend
   opt::CutResult TransformViaOnnx(
       Workspace* ws,
       NetDef* pred_net,
       const std::unordered_set<std::string>& weights,
       const std::unordered_set<int>& blocklisted_ops,
       ShapeInfoMap* shape_hints_max_bs,
       const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);

   // Query whether an operator is supported by passing ONNX protobuf
   bool supportOpOnnx(
       const caffe2::OperatorDef& op,
       onnx::OnnxExporter* exporter,
       const std::unordered_set<int>& blocklisted_ops,
       onnxBackendID backend_id) const;

   // Tie the output of Gather to the scalar weight input of the
   // SparseLengthsWeighted* and SparseLengthsSumSparseLookup (which is split
   // from the SparseLengthsWeighted*Sparse) ops. If the latter is disabled,
   // disable the former too.
   void tieGatherAndSparseLengthsWeightedSumOps(
       const NetDef& net,
       const ShapeInfoMap& shape_hints,
       const std::unordered_set<std::string>& weights,
       std::unordered_set<int>* blocklisted_ops) const;

   // For net with partitioning info, blocklist ops that are supposed to run on
   // CPU, whose partition info will contain empty device_id list.
   void blocklistCpuPartition(
       const NetDef& net,
       std::unordered_set<int>* blocklisted_ops) const;

   // Rule based filtering
   void applyFilteringRules(
       const NetDef& net,
       const ShapeInfoMap& shape_hints,
       const std::unordered_set<std::string>& weights,
       std::unordered_set<int>* blocklisted_ops) const;

   // Extract partition info from the original net
   void extractPartitionInfo(const NetDef& net);

   // Options
   OnnxifiTransformerOptions opts_;

   // Pointer to loaded onnxifi library
   onnxifi_library* lib_{nullptr};

   // Number of backends
   size_t num_backends_{0};

   // backend idx
   int idx_{0};

   // Number of Onnxifi Ops we build so far
   int onnxifi_op_id_{0};

   // Model id
   std::string model_id_;

   // Backned IDs
   std::vector<onnxBackendID> backend_ids_;

   // A cache for ONNX shape hints
   std::unordered_map<std::string, TensorShape> shape_hints_onnx_;

   // Partition info
   std::vector<PartitionInfo> partition_infos_;
 };
 } // namespace caffe2
	#pragma once

	#include <cstdint>
	#include <string>
	#include <unordered_map>
	#include <vector>

	#include "caffe2/opt/backend_cutting.h"
	#include "onnx/onnx_pb.h"

	#include "caffe2/core/operator.h"
	#include "caffe2/onnx/onnxifi_init.h"
	#include "caffe2/opt/backend_transformer_base.h"

	namespace caffe2 {
	namespace onnx {
	class OnnxExporter;
	}

	// Split SparseLengthsSumSparse into SparseLengthsSumSparseLookup +
	// SparseLengthsSum
	TORCH_API void splitSparseLengthsSumSparse(NetDef* net, const Workspace& ws);

	struct OnnxifiTransformerOptions final : public BackendTransformOptions {
	explicit OnnxifiTransformerOptions() : BackendTransformOptions() {}

	// Pass serialized onnx model if true, otherwise pass serialized c2 model
	bool use_onnx{false};

	// Whether to adjust batch at the outputs or not
	bool adjust_batch{true};

	// Whether to lower model blob by blob
	bool load_model_by_blob{false};

	// Whether to enforce fp32 inputs into fp16.
	bool enforce_fp32_inputs_into_fp16{false};

	// Whether to combine fp32 batched inputs into one tensor and convert it to
	// fp16 or not
	bool merge_fp32_inputs_into_fp16{false};

	// Whether to verify that a single subnet was created
	bool verify_only_single_subnet{false};

	// Whether the net has been ssaRewritten
	bool predictor_net_ssa_rewritten{false};

	// Inference timeout
	int timeout{0};

	// Mapping of batch sizes to shape infos
	std::unordered_map<int, ShapeInfoMap> shape_hints_per_bs;

	// Whether to read batch size from Onnxifi.
	bool use_onnxifi_batch_size{false};
	};

	class TORCH_API OnnxifiOptionHelper final {
	public:
	OnnxifiOptionHelper();

	// Set Onnxifi option
	bool setOnnxifiOption(const std::string& option, const std::string& value);

	// Get Onnxifi option
	std::string getOnnxifiOption(const std::string& option);

	private:
	// Pointer to loaded onnxifi library
	onnxifi_library* lib_{nullptr};
	};

	class TORCH_API OnnxifiTransformer final : public BackendTransformerBase {
	public:
	explicit OnnxifiTransformer(const OnnxifiTransformerOptions& opts);
	~OnnxifiTransformer() override;

	void transform(
	Workspace* ws,
	NetDef* pred_net,
	const std::vector<std::string>& weight_names,
	const ShapeInfoMap& shape_hints,
	const std::unordered_set<int>& blocklisted_ops) override;

	// Query whether an operator is supported by passing C2 protobuf
	bool supportOpC2(
	const caffe2::OperatorDef& op,
	const ShapeInfoMap& shape_hints,
	const std::unordered_set<std::string>& weights,
	const std::unordered_set<int>& blocklisted_ops,
	onnxBackendID backend_id) const;

	// Determine backend id
	std::vector<onnxBackendID> getBackendId();

	private:
	// Since we create new tensors during the conversion process, we actually need
	// into inject them into the original workspace
	// Since our onnx exporter uses std::unordered_map<std::string, TensorShape>
	// as lut, we need to include an extra copy of shape info and maintain them
	// together
	caffe2::NetDef SubnetToOnnxifiOpViaOnnx(
	const caffe2::NetDef& net,
	const std::unordered_set<std::string>& weights_in_ws,
	Workspace* ws,
	onnx::OnnxExporter* exporter,
	ShapeInfoMap* shape_hints_max_bs,
	const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);

	// Convert a cutoff subgraph net to an Onnxifi op
	caffe2::NetDef SubnetToOnnxifiOpViaC2(
	const caffe2::NetDef& net,
	const std::unordered_set<std::string>& weights_in_ws,
	const ShapeInfoMap& shape_hints_max_bs,
	const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);

	// Check that output shape hints are present to ensure we can pass them to
	// OnnxifiOp
	bool canPassOutputShapeHintsPerBs(
	const OperatorDef& op,
	const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs) const;

	// We already have all the ops and external inputs and outputs!
	OperatorDef buildOnnxifiOp(
	const std::string& onnx_model_str,
	const std::unordered_set<std::string>& initialization_list,
	const std::vector<std::string>& external_inputs,
	const std::vector<std::string>& external_outputs,
	const ShapeInfoMap& shape_hints_max_bs,
	const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);

	// Transform by passing C2 proto to backend
	opt::CutResult TransformViaC2(
	NetDef* pred_net,
	const std::unordered_set<std::string>& weights,
	const std::unordered_set<int>& blocklisted_ops,
	const ShapeInfoMap& shape_hints_max_bs,
	const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);

	// Transform by passing ONNX proto to backend
	opt::CutResult TransformViaOnnx(
	Workspace* ws,
	NetDef* pred_net,
	const std::unordered_set<std::string>& weights,
	const std::unordered_set<int>& blocklisted_ops,
	ShapeInfoMap* shape_hints_max_bs,
	const std::unordered_map<int, ShapeInfoMap>& shape_hints_per_bs);

	// Query whether an operator is supported by passing ONNX protobuf
	bool supportOpOnnx(
	const caffe2::OperatorDef& op,
	onnx::OnnxExporter* exporter,
	const std::unordered_set<int>& blocklisted_ops,
	onnxBackendID backend_id) const;

	// Tie the output of Gather to the scalar weight input of the
	// SparseLengthsWeighted* and SparseLengthsSumSparseLookup (which is split
	// from the SparseLengthsWeighted*Sparse) ops. If the latter is disabled,
	// disable the former too.
	void tieGatherAndSparseLengthsWeightedSumOps(
	const NetDef& net,
	const ShapeInfoMap& shape_hints,
	const std::unordered_set<std::string>& weights,
	std::unordered_set<int>* blocklisted_ops) const;

	// For net with partitioning info, blocklist ops that are supposed to run on
	// CPU, whose partition info will contain empty device_id list.
	void blocklistCpuPartition(
	const NetDef& net,
	std::unordered_set<int>* blocklisted_ops) const;

	// Rule based filtering
	void applyFilteringRules(
	const NetDef& net,
	const ShapeInfoMap& shape_hints,
	const std::unordered_set<std::string>& weights,
	std::unordered_set<int>* blocklisted_ops) const;

	// Extract partition info from the original net
	void extractPartitionInfo(const NetDef& net);

	// Options
	OnnxifiTransformerOptions opts_;

	// Pointer to loaded onnxifi library
	onnxifi_library* lib_{nullptr};

	// Number of backends
	size_t num_backends_{0};

	// backend idx
	int idx_{0};

	// Number of Onnxifi Ops we build so far
	int onnxifi_op_id_{0};

	// Model id
	std::string model_id_;

	// Backned IDs
	std::vector<onnxBackendID> backend_ids_;

	// A cache for ONNX shape hints
	std::unordered_map<std::string, TensorShape> shape_hints_onnx_;

	// Partition info
	std::vector<PartitionInfo> partition_infos_;
	};
	} // namespace caffe2