| #include "caffe2/operators/onnxifi_op.h" |
| |
| namespace caffe2 { |
| |
| namespace { |
| |
| void BlobToTensorDescriptor( |
| const std::string& name, |
| Workspace* ws, |
| onnxTensorDescriptorV1* desc, |
| std::vector<std::vector<uint64_t>>* shapes) { |
| const Blob* blob = ws->GetBlob(name); |
| CAFFE_ENFORCE(blob, "Blob ", name, " doesn't exist"); |
| |
| // Memory type |
| // We only allow weights to be CPU tensor for now |
| CAFFE_ENFORCE( |
| blob->template IsType<Tensor>(CPU), |
| "Initialization blob ", |
| name, |
| " needs to be TensorCPU"); |
| desc->tag = ONNXIFI_TAG_TENSOR_DESCRIPTOR_V1; |
| desc->memoryType = ONNXIFI_MEMORY_TYPE_CPU; |
| |
| // Data type |
| const auto& cpu_tensor = blob->template Get<TensorCPU>(); |
| if (cpu_tensor.template IsType<float>()) { |
| desc->dataType = ONNXIFI_DATATYPE_FLOAT32; |
| desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<float>()); |
| } else if (cpu_tensor.template IsType<int64_t>()) { |
| desc->dataType = ONNXIFI_DATATYPE_INT64; |
| desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<int64_t>()); |
| } else if (cpu_tensor.template IsType<int32_t>()) { |
| desc->dataType = ONNXIFI_DATATYPE_INT32; |
| desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<int32_t>()); |
| } |
| |
| // Set dims |
| const auto& shape = cpu_tensor.dims(); |
| desc->dimensions = shape.size(); |
| shapes->emplace_back(shape.cbegin(), shape.cend()); |
| desc->shape = shapes->back().data(); |
| } |
| } // namespace |
| |
| template <> |
| std::vector<onnxTensorDescriptorV1> |
| OnnxifiOp<float, CPUContext>::BuildInitializationList( |
| Workspace* ws, |
| std::unordered_set<std::string>* initialization_list, |
| std::vector<std::string>* weight_names, |
| std::vector<std::vector<uint64_t>>* weight_shapes) { |
| const std::vector<string>& ws_blobs = ws->Blobs(); |
| std::vector<onnxTensorDescriptorV1> descs; |
| for (const auto& s : ws_blobs) { |
| auto it = initialization_list->find(s); |
| if (it != initialization_list->end()) { |
| weight_names->emplace_back(s); |
| onnxTensorDescriptorV1 tensor_desc; |
| tensor_desc.name = weight_names->back().c_str(); |
| BlobToTensorDescriptor(s, ws, &tensor_desc, weight_shapes); |
| descs.push_back(tensor_desc); |
| initialization_list->erase(it); |
| } |
| } |
| CAFFE_ENFORCE( |
| initialization_list->empty(), "Unfulfilled initialization list"); |
| return descs; |
| } |
| |
| template <> |
| bool OnnxifiOp<float, CPUContext>::RunOnDevice() { |
| for (unsigned i = 0U; i < InputSize(); ++i) { |
| const auto& input_tensor = Input(i); |
| const auto& tensor_dims = input_tensor.dims(); |
| auto& tensor_descriptor = input_desc_.at(i); |
| tensor_descriptor.dataType = ONNXIFI_DATATYPE_FLOAT32; |
| tensor_descriptor.memoryType = ONNXIFI_MEMORY_TYPE_CPU; |
| tensor_descriptor.dimensions = tensor_dims.size(); |
| input_shapes_.emplace_back(tensor_dims.cbegin(), tensor_dims.cend()); |
| tensor_descriptor.shape = input_shapes_.back().data(); |
| tensor_descriptor.buffer = |
| reinterpret_cast<onnxPointer>(input_tensor.data<float>()); |
| } |
| |
| for (unsigned i = 0U; i < OutputSize(); ++i) { |
| auto* output_tensor = Output(i); |
| std::vector<TIndex> tensor_dims; |
| SetOutputShape(i, &tensor_dims); |
| output_tensor->Resize(tensor_dims); |
| auto& tensor_descriptor = output_desc_.at(i); |
| tensor_descriptor.dataType = ONNXIFI_DATATYPE_FLOAT32; |
| tensor_descriptor.memoryType = ONNXIFI_MEMORY_TYPE_CPU; |
| tensor_descriptor.dimensions = tensor_dims.size(); |
| CAFFE_ENFORCE( |
| tensor_descriptor.dimensions != 0, |
| tensor_descriptor.name, |
| " has 0 dim"); |
| output_shapes_.emplace_back(tensor_dims.cbegin(), tensor_dims.cend()); |
| tensor_descriptor.shape = output_shapes_.back().data(); |
| tensor_descriptor.buffer = |
| reinterpret_cast<onnxPointer>(output_tensor->mutable_data<float>()); |
| } |
| |
| CAFFE_ENFORCE_EQ( |
| lib_->onnxSetGraphIO( |
| graph_, |
| input_desc_.size(), |
| input_desc_.data(), |
| output_desc_.size(), |
| output_desc_.data()), |
| ONNXIFI_STATUS_SUCCESS); |
| |
| onnxMemoryFenceV1 input_fence; |
| input_fence.tag = ONNXIFI_TAG_MEMORY_FENCE_V1; |
| input_fence.type = ONNXIFI_SYNCHRONIZATION_EVENT; |
| CAFFE_ENFORCE_EQ( |
| lib_->onnxInitEvent(backend_, &input_fence.event), |
| ONNXIFI_STATUS_SUCCESS); |
| onnxMemoryFenceV1 output_fence; |
| output_fence.tag = ONNXIFI_TAG_MEMORY_FENCE_V1; |
| output_fence.type = ONNXIFI_SYNCHRONIZATION_EVENT; |
| |
| // Call the asycn run on backend, singal event on input fence and wait for the |
| // event on output fence |
| CAFFE_ENFORCE_EQ( |
| lib_->onnxSignalEvent(input_fence.event), ONNXIFI_STATUS_SUCCESS); |
| CAFFE_ENFORCE_EQ( |
| lib_->onnxRunGraph(graph_, &input_fence, &output_fence), |
| ONNXIFI_STATUS_SUCCESS); |
| CAFFE_ENFORCE_EQ( |
| lib_->onnxWaitEvent(output_fence.event), ONNXIFI_STATUS_SUCCESS); |
| |
| // Destroy the event objects |
| CAFFE_ENFORCE_EQ( |
| lib_->onnxReleaseEvent(input_fence.event), ONNXIFI_STATUS_SUCCESS); |
| CAFFE_ENFORCE_EQ( |
| lib_->onnxReleaseEvent(output_fence.event), ONNXIFI_STATUS_SUCCESS); |
| |
| return true; |
| } |
| |
| REGISTER_CPU_OPERATOR(Onnxifi, OnnxifiOp<float, CPUContext>); |
| OPERATOR_SCHEMA(Onnxifi) |
| .NumInputs(0, INT_MAX) |
| .NumOutputs(0, INT_MAX) |
| .SetDoc(R"DOC( |
| The Onnxifi operator is a black-box operator to lower the computation to Onnxifi backend |
| )DOC") |
| .Arg( |
| "onnx_model", |
| "(string default=\"\") Serialized ONNX model to be converted to backend representation") |
| .Arg( |
| "initializers", |
| "Initialization pair indicating the mapping of the name between NetDef and ONNX model"); |
| } // namespace caffe2 |