| #pragma once |
| |
| #include <unordered_map> |
| |
| #include "onnx/onnx_pb.h" |
| |
| #include "c10/util/SmallVector.h" |
| #include "caffe2/core/context.h" |
| #include "caffe2/core/logging.h" |
| #include "caffe2/core/operator.h" |
| #include "caffe2/onnx/onnxifi_graph_info.h" |
| #include "caffe2/onnx/onnxifi_init.h" |
| #include "caffe2/utils/string_utils.h" |
| |
| namespace caffe2 { |
| |
| template <typename Context> |
| class OnnxifiOp final : public Operator<Context> { |
| struct TensorInfo { |
| TensorInfo() {} |
| TensorInfo(TensorInfo&&) = default; |
| TensorInfo& operator=(TensorInfo&&) = default; |
| std::vector<uint64_t> dims; |
| uint64_t onnxifi_type; |
| }; |
| |
| public: |
| USE_OPERATOR_CONTEXT_FUNCTIONS; |
| explicit OnnxifiOp(const OperatorDef& operator_def, Workspace* ws) |
| : Operator<Context>(operator_def, ws) { |
| lib_ = onnx::initOnnxifiLibrary(); |
| backend_graph_map_ptr_ = onnx::getOnnxBackendGraphMap(); |
| CAFFE_ENFORCE(lib_, "Cannot initialize ONNXIFI library"); |
| use_onnx_ = this->template GetSingleArgument<int>("use_onnx", 0); |
| auto onnx_model_str = |
| this->template GetSingleArgument<std::string>("onnx_model", ""); |
| CAFFE_ENFORCE(!onnx_model_str.empty(), "onnx_model cannot be empty"); |
| |
| // Setup input/output descriptor templates |
| input_names_ = |
| this->template GetRepeatedArgument<std::string>("input_names"); |
| output_names_ = |
| this->template GetRepeatedArgument<std::string>("output_names"); |
| CAFFE_ENFORCE_EQ(input_names_.size(), operator_def.input_size()); |
| CAFFE_ENFORCE_EQ(output_names_.size(), operator_def.output_size()); |
| for (const auto& input : input_names_) { |
| input_desc_.push_back(onnxTensorDescriptorV1()); |
| input_desc_.back().name = input.c_str(); |
| } |
| int output_idx = 0; |
| for (const auto& output : output_names_) { |
| output_desc_.push_back(onnxTensorDescriptorV1()); |
| output_desc_.back().name = output.c_str(); |
| |
| // For output, we try to get its output size hint |
| const std::string key = c10::str("output_shape_hint_", output_idx); |
| auto output_shape_hint = this->template GetRepeatedArgument<int>(key); |
| if (!output_shape_hint.empty()) { |
| TensorInfo info; |
| info.onnxifi_type = output_shape_hint.front(); |
| for (size_t i = 1; i < output_shape_hint.size(); ++i) { |
| info.dims.push_back(output_shape_hint[i]); |
| } |
| output_shape_hints_.emplace(output_idx, std::move(info)); |
| } |
| ++output_idx; |
| } |
| input_shapes_.resize(input_names_.size()); |
| output_shapes_.resize(output_names_.size()); |
| |
| // Get output resizing hints |
| adjust_output_batch_ = |
| this->template GetSingleArgument<int>("adjust_output_batch", 0); |
| permit_unknown_output_batch_size_ = this->template GetSingleArgument<int>( |
| "permit_unknown_output_batch_size", 0); |
| auto output_resize_hints = |
| this->template GetRepeatedArgument<int>("output_resize_hints"); |
| CAFFE_ENFORCE_EQ( |
| output_resize_hints.size() % 2, |
| 0, |
| "output_resize_hints must have even size: ", |
| output_resize_hints.size()); |
| for (int i = 0; i < output_resize_hints.size(); ++i) { |
| auto k = output_resize_hints[i++]; |
| batch_pos_map_.emplace(k, output_resize_hints[i]); |
| } |
| |
| // Encode arguments starting with "custom_" to backend |
| std::vector<uint64_t> property_pointers; |
| std::vector<int64_t> int_args; |
| std::vector<float> float_args; |
| buildPropertyList(operator_def, &property_pointers, &int_args, &float_args); |
| |
| // Initialize the backend if it has not been already created. When we |
| // initialized the backend, we will get the weights (initializers) from the |
| // workspace and offload onto the backend. This should be done only once. |
| // Subsequent call of this function with the same model id should find a |
| // cached backend and therefore there is no need to repeat the above |
| // process. |
| buildBackendAndGraph(ws, property_pointers, onnx_model_str); |
| } |
| |
| ~OnnxifiOp() { |
| backend_graph_shared_ptr_.reset(); |
| backend_graph_map_ptr_->remove(op_id_string_); |
| #ifdef ONNXIFI_ENABLE_EXT |
| traces_.reset(); |
| #endif |
| } |
| |
| bool RunOnDevice() override; |
| |
| void setEnableTracing(bool b) { |
| enable_tracing_ = b; |
| } |
| |
| #ifdef ONNXIFI_ENABLE_EXT |
| std::shared_ptr<onnxTraceEventList> traces() const { |
| return traces_; |
| } |
| #endif |
| private: |
| uint64_t SetOutputShapeAndType(int output_idx, std::vector<size_t>* dims) { |
| uint64_t type = ONNXIFI_DATATYPE_FLOAT32; |
| const auto it = output_shape_hints_.find(output_idx); |
| if (it != output_shape_hints_.end()) { |
| std::copy( |
| it->second.dims.begin(), |
| it->second.dims.end(), |
| std::back_inserter(*dims)); |
| type = it->second.onnxifi_type; |
| } |
| return type; |
| } |
| |
| void buildPropertyList( |
| const OperatorDef& /* unused */, |
| std::vector<uint64_t>* property_list, |
| std::vector<int64_t>* /* unused */, |
| std::vector<float>* /* unused */) { |
| property_list->push_back(ONNXIFI_BACKEND_PROPERTY_NONE); |
| } |
| |
| void buildBackendAndGraph( |
| Workspace* ws, |
| const std::vector<uint64_t>& property_pointers, |
| const std::string& onnx_model_str) { |
| op_id_string_ = |
| this->template GetSingleArgument<std::string>("model_id", "") + ":" + |
| this->template GetSingleArgument<std::string>("net_pos", ""); |
| |
| auto initializers = |
| this->template GetRepeatedArgument<std::string>("initializers"); |
| // Build the Onnxifi engine |
| auto backend_index = this->template GetSingleArgument<int>("backend_id", 0); |
| auto creator = [this, |
| ws, |
| property_pointers, |
| backend_index, |
| &onnx_model_str, |
| &initializers]() { |
| std::vector<onnxBackendID> backend_ids; |
| size_t num_backends{0}; |
| CAFFE_ENFORCE_EQ( |
| lib_->onnxGetBackendIDs(nullptr, &num_backends), |
| ONNXIFI_STATUS_FALLBACK); |
| CAFFE_ENFORCE_GT( |
| num_backends, 0, "At least 1 onnxifi backend should be available"); |
| CAFFE_ENFORCE_LT( |
| backend_index, |
| num_backends, |
| "Backend idx out of bound: ", |
| backend_index, |
| ", #backends: ", |
| num_backends); |
| backend_ids.resize(num_backends); |
| CAFFE_ENFORCE_EQ( |
| lib_->onnxGetBackendIDs(backend_ids.data(), &num_backends), |
| ONNXIFI_STATUS_SUCCESS); |
| |
| onnxBackendID backend_id = backend_ids[backend_index]; |
| onnxBackend backend{nullptr}; |
| |
| CAFFE_ENFORCE_EQ( |
| lib_->onnxInitBackend(backend_id, property_pointers.data(), &backend), |
| ONNXIFI_STATUS_SUCCESS); |
| |
| // Release unused backend ids. |
| for (auto i = 0; i < num_backends; ++i) { |
| if (i == backend_index) { |
| continue; |
| } |
| lib_->onnxReleaseBackendID(backend_ids[i]); |
| } |
| |
| // Get weights |
| std::vector<std::string> weight_names; |
| std::vector<std::vector<uint64_t>> weight_shapes; |
| auto weight_descs = buildInitializationList( |
| ws, initializers, &weight_names, &weight_shapes); |
| |
| onnxGraph graph{nullptr}; |
| CAFFE_ENFORCE_EQ( |
| lib_->onnxInitGraph( |
| backend, |
| nullptr, |
| onnx_model_str.size(), |
| (const void*)(onnx_model_str.c_str()), |
| weight_descs.size(), |
| weight_descs.data(), |
| &graph), |
| ONNXIFI_STATUS_SUCCESS); |
| |
| return std::make_shared<onnx::BackendGraphInfo>( |
| backend_id, backend, graph, lib_); |
| }; |
| backend_graph_shared_ptr_ = |
| backend_graph_map_ptr_->insert(op_id_string_, creator); |
| |
| backend_id_ = backend_graph_shared_ptr_->backend_id; |
| backend_ = backend_graph_shared_ptr_->backend; |
| graph_ = backend_graph_shared_ptr_->graph; |
| |
| getExtFunctionPointers(); |
| } |
| |
| /// Set up function pointer if onnxifi_ext is enabled |
| void getExtFunctionPointers() { |
| #ifdef ONNXIFI_ENABLE_EXT |
| onnxExtensionFunctionPointer p; |
| if (lib_->onnxGetExtensionFunctionAddress( |
| backend_id_, "onnxSetIOAndRunGraphFunction", &p) != |
| ONNXIFI_STATUS_SUCCESS) { |
| onnxSetIOAndRunGraphPointer_ = nullptr; |
| } else { |
| onnxSetIOAndRunGraphPointer_ = |
| reinterpret_cast<decltype(onnxSetIOAndRunGraphPointer_)>(p); |
| } |
| if (lib_->onnxGetExtensionFunctionAddress( |
| backend_id_, "onnxReleaseTraceEventsFunction", &p) != |
| ONNXIFI_STATUS_SUCCESS) { |
| onnxReleaseTraceEventsPointer_ = nullptr; |
| } else { |
| onnxReleaseTraceEventsPointer_ = |
| reinterpret_cast<decltype(onnxReleaseTraceEventsPointer_)>(p); |
| } |
| #endif |
| } |
| |
| std::vector<int> extractOutputBatchSizes() const; |
| |
| void maybeAdjustOutputBatchSizes( |
| const std::vector<int>& real_output_batch_sizes); |
| |
| std::vector<onnxTensorDescriptorV1> buildInitializationList( |
| Workspace* ws, |
| const std::vector<std::string>& initializers, |
| std::vector<std::string>* weight_names, |
| std::vector<std::vector<uint64_t>>* weight_shapes); |
| |
| // pointer to loaded onnxifi library |
| onnxifi_library* lib_{nullptr}; |
| onnx::OnnxBackendGraphMap* backend_graph_map_ptr_; |
| std::string op_id_string_; |
| |
| onnxBackendID backend_id_{nullptr}; |
| onnxBackend backend_{nullptr}; |
| onnxGraph graph_{nullptr}; |
| onnx::SharedPtrBackendGraphInfo backend_graph_shared_ptr_; |
| |
| // input/output descriptors |
| std::vector<onnxTensorDescriptorV1> input_desc_; |
| std::vector<onnxTensorDescriptorV1> output_desc_; |
| |
| #ifdef ONNXIFI_ENABLE_EXT |
| // onnxifi extension mode function pointer |
| onnxStatus (*onnxSetIOAndRunGraphPointer_)( |
| onnxGraph, |
| uint32_t, |
| const onnxTensorDescriptorV1*, |
| uint32_t, |
| const onnxTensorDescriptorV1*, |
| onnxMemoryFenceV1*, |
| onnxTraceEventList*); |
| |
| onnxStatus (*onnxReleaseTraceEventsPointer_)(onnxTraceEventList*); |
| |
| std::shared_ptr<onnxTraceEventList> traces_{nullptr}; |
| #endif |
| bool use_onnx_{false}; |
| |
| // We bind the op input/output by position while ONNXIFI binds input/output by |
| // names. In addition, op input/output names can be writtten by, for example, |
| // memonger. We cache the original input/output name of ONNX object here and |
| // bind them by position. |
| std::vector<std::string> input_names_; |
| std::vector<std::string> output_names_; |
| |
| std::vector<c10::SmallVector<uint64_t, 4>> input_shapes_; |
| std::vector<c10::SmallVector<uint64_t, 4>> output_shapes_; |
| |
| // A cache vector to avoid repeated reallocation. The existence of this is not |
| // ideal, which is purely due to the factor that we use int64_t for c2::tensor |
| // dim but uint64_t for onnxDesciptor dim. Maybe we should just use int64_t |
| c10::SmallVector<int64_t, 4> tensor_dims_int64_; |
| |
| // This is for multi group quantization info |
| std::vector<std::vector<float>> all_scales_; |
| std::vector<std::vector<float>> all_offsets_; |
| |
| // output shape hints |
| std::unordered_map<int, TensorInfo> output_shape_hints_; |
| |
| // Whether we need to resize outputs or not |
| bool adjust_output_batch_{false}; |
| |
| // Whether we allow unknown output batch size. This is often needed when |
| // we explicitly blacklist operators out of the onnxifi op. |
| bool permit_unknown_output_batch_size_{false}; |
| |
| // Output resizing hint map |
| // key: max batch size |
| // value: position of the input where the real batch size can be extracted |
| // from its first dimension |
| std::unordered_map<int, int> batch_pos_map_; |
| // Whether we enable tracing in one run of inference |
| bool enable_tracing_{false}; |
| }; |
| |
| } // namespace caffe2 |