| #include "caffe2/opt/onnxifi_op.h" |
| #include "caffe2/operators/slice_op.h" |
| #include "caffe2/opt/bound_shape_inferencer.h" |
| |
| #include <c10/util/irange.h> |
| |
| namespace caffe2 { |
| |
| namespace { |
| |
| void setInputTensorDescriptorTypeAndBuffer( |
| const Tensor& cpu_tensor, |
| onnxTensorDescriptorV1* desc) { |
| if (cpu_tensor.template IsType<int32_t>()) { |
| desc->dataType = ONNXIFI_DATATYPE_INT32; |
| desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<int32_t>()); |
| } else if (cpu_tensor.template IsType<c10::Half>()) { |
| desc->dataType = ONNXIFI_DATATYPE_FLOAT16; |
| desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<c10::Half>()); |
| } else if (cpu_tensor.template IsType<float>()) { |
| desc->dataType = ONNXIFI_DATATYPE_FLOAT32; |
| desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<float>()); |
| } else if (cpu_tensor.template IsType<int8_t>()) { |
| desc->dataType = ONNXIFI_DATATYPE_INT8; |
| desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<int8_t>()); |
| } else if (cpu_tensor.template IsType<uint8_t>()) { |
| desc->dataType = ONNXIFI_DATATYPE_UINT8; |
| desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<uint8_t>()); |
| } else if (cpu_tensor.template IsType<int64_t>()) { |
| desc->dataType = ONNXIFI_DATATYPE_INT64; |
| desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<int64_t>()); |
| } else if (cpu_tensor.template IsType<int16_t>()) { |
| desc->dataType = ONNXIFI_DATATYPE_INT16; |
| desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<int16_t>()); |
| } else if (cpu_tensor.template IsType<uint16_t>()) { |
| desc->dataType = ONNXIFI_DATATYPE_UINT16; |
| desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<uint16_t>()); |
| } else { |
| CAFFE_THROW( |
| "Unsupported tensor type in ONNXIFI: ", cpu_tensor.dtype().name()); |
| } |
| } |
| |
| void setInputTensorDescriptorTypeAndBuffer( |
| const int8::Int8TensorCPU& cpu_int8tensor, |
| onnxTensorDescriptorV1* desc) { |
| const Tensor& cpu_tensor = cpu_int8tensor.t; |
| if (cpu_tensor.template IsType<uint8_t>()) { |
| desc->dataType = ONNXIFI_DATATYPE_UINT8; |
| desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<uint8_t>()); |
| } else if (cpu_tensor.template IsType<int8_t>()) { |
| desc->dataType = ONNXIFI_DATATYPE_INT8; |
| desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<int8_t>()); |
| } else if (cpu_tensor.template IsType<int32_t>()) { |
| desc->dataType = ONNXIFI_DATATYPE_INT32; |
| desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<int32_t>()); |
| } else { |
| CAFFE_THROW( |
| "Unsupported Int8Tensor type in ONNXIFI: ", cpu_tensor.dtype().name()); |
| } |
| desc->quantizationParams = 1; |
| desc->quantizationAxis = 1; |
| desc->scales = &cpu_int8tensor.scale; |
| desc->biases = &cpu_int8tensor.zero_point; |
| } |
| |
| template <typename T> |
| void adjustQuantizedOffsetImpl(Tensor* t, uint8_t offset) { |
| auto* data = t->mutable_data<T>(); |
| for (auto i: c10::irange(t->numel())) { |
| data[i] -= offset; |
| } |
| } |
| |
| void adjustQuantizedOffset(Tensor* t, uint8_t offset) { |
| if (t->template IsType<uint8_t>()) { |
| adjustQuantizedOffsetImpl<uint8_t>(t, offset); |
| } |
| } |
| |
| TypeMeta OnnxifiTypeToDataType(uint64_t onnxifi_type) { |
| static std::map<uint64_t, TypeMeta> data_type_map{ |
| {ONNXIFI_DATATYPE_FLOAT32, TypeMeta::Make<float>()}, |
| {ONNXIFI_DATATYPE_FLOAT16, TypeMeta::Make<c10::Half>()}, |
| {ONNXIFI_DATATYPE_INT32, TypeMeta::Make<int>()}, |
| {ONNXIFI_DATATYPE_INT8, TypeMeta::Make<int8_t>()}, |
| {ONNXIFI_DATATYPE_UINT8, TypeMeta::Make<uint8_t>()}, |
| {ONNXIFI_DATATYPE_INT64, TypeMeta::Make<int64_t>()}, |
| {ONNXIFI_DATATYPE_INT16, TypeMeta::Make<int16_t>()}, |
| {ONNXIFI_DATATYPE_UINT16, TypeMeta::Make<uint16_t>()}, |
| }; |
| const auto it = data_type_map.find(onnxifi_type); |
| CAFFE_ENFORCE( |
| it != data_type_map.end(), |
| "Unsupported ONNXIFI data type: ", |
| onnxifi_type); |
| return it->second; |
| } |
| |
| void setOutputTensorDescriptorTypeAndBuffer( |
| uint64_t onnxifi_type, |
| Tensor* cpu_tensor, |
| onnxTensorDescriptorV1* desc) { |
| desc->dataType = onnxifi_type; |
| desc->buffer = reinterpret_cast<onnxPointer>( |
| cpu_tensor->raw_mutable_data(OnnxifiTypeToDataType(onnxifi_type))); |
| } |
| |
| #ifndef C10_MOBILE |
| void copyDescriptor( |
| const ExternalTensorDescriptor* from, |
| onnxTensorDescriptorV1* to) { |
| to->dataType = from->dataType; |
| to->buffer = from->buffer; |
| to->isOffline = from->isOffline; |
| to->quantizationParams = from->quantizationParams; |
| to->quantizationAxis = from->quantizationAxis; |
| to->scales = from->scales; |
| to->biases = from->biases; |
| to->dimensions = from->dimensions; |
| to->shape = from->shape; |
| } |
| #endif |
| |
| void BlobToTensorDescriptor( |
| const std::string& name, |
| Workspace* ws, |
| onnxTensorDescriptorV1* desc, |
| std::vector<std::vector<uint64_t>>* shapes, |
| std::vector<std::vector<float>>* all_scales, |
| std::vector<std::vector<int32_t>>* all_offsets) { |
| const Blob* blob = ws->GetBlob(name); |
| CAFFE_ENFORCE(blob, "Blob ", name, " doesn't exist"); |
| const bool is_int8tensor = |
| blob->meta().id() == TypeMeta::Id<int8::Int8TensorCPU>(); |
| // NOLINTNEXTLINE(cppcoreguidelines-init-variables) |
| bool is_external_tensor; |
| #ifndef C10_MOBILE |
| auto function_ptr = |
| ExternalTensorFunctionsBaseRegistry()->Create(blob->meta().id()); |
| is_external_tensor = function_ptr != nullptr; |
| #else |
| is_external_tensor = false; |
| #endif |
| // Memory type |
| // We only allow weights to be CPU tensor or int8tensor for now |
| CAFFE_ENFORCE( |
| (BlobIsTensorType(*blob, CPU) || BlobIsInt8TensorCPUType(*blob) || |
| is_external_tensor), |
| "Initialization blob ", |
| name, |
| " needs to be TensorCPU or Int8TensorCPU or Int8FCDNNLowPPackedWeightBlob Based class: ", |
| blob->TypeName()); |
| desc->tag = ONNXIFI_TAG_TENSOR_DESCRIPTOR_V1; |
| desc->memoryType = ONNXIFI_MEMORY_TYPE_CPU; |
| desc->isOffline = false; |
| |
| if (is_int8tensor) { |
| // Data type |
| const auto& cpu_int8tensor = blob->template Get<int8::Int8TensorCPU>(); |
| const auto& cpu_tensor = cpu_int8tensor.t; |
| setInputTensorDescriptorTypeAndBuffer(cpu_int8tensor, desc); |
| // Set dims |
| const auto shape = cpu_tensor.sizes(); |
| desc->dimensions = shape.size(); |
| shapes->emplace_back(shape.cbegin(), shape.cend()); |
| desc->shape = shapes->back().data(); |
| } else if (is_external_tensor) { |
| #ifndef C10_MOBILE |
| ExternalTensorDescriptor ext_desc; |
| function_ptr->SetupExternalTensorDescriptor( |
| blob, shapes, all_scales, all_offsets, &ext_desc); |
| copyDescriptor(&ext_desc, desc); |
| #endif |
| } else { |
| // Data type |
| const auto& cpu_tensor = blob->template Get<TensorCPU>(); |
| setInputTensorDescriptorTypeAndBuffer(cpu_tensor, desc); |
| // Set dims |
| const auto shape = cpu_tensor.sizes(); |
| desc->dimensions = shape.size(); |
| shapes->emplace_back(shape.cbegin(), shape.cend()); |
| desc->shape = shapes->back().data(); |
| desc->quantizationParams = 0; |
| } |
| } |
| |
| uint64_t getOnnxifiDataType(caffe2::TensorProto::DataType t) { |
| #define CAFFE2_TO_ONNXIFI_TYPE(x) \ |
| case (caffe2::TensorProto::x): \ |
| return ONNXIFI_DATATYPE_##x |
| switch (t) { |
| CAFFE2_TO_ONNXIFI_TYPE(INT8); |
| CAFFE2_TO_ONNXIFI_TYPE(UINT8); |
| CAFFE2_TO_ONNXIFI_TYPE(UINT16); |
| CAFFE2_TO_ONNXIFI_TYPE(INT16); |
| CAFFE2_TO_ONNXIFI_TYPE(INT32); |
| CAFFE2_TO_ONNXIFI_TYPE(INT64); |
| CAFFE2_TO_ONNXIFI_TYPE(FLOAT16); |
| case (caffe2::TensorProto::FLOAT): |
| return ONNXIFI_DATATYPE_FLOAT32; |
| default: |
| LOG(WARNING) << "Unsupported Caffe2 tensor type: " << t; |
| return ONNXIFI_DATATYPE_UNDEFINED; |
| } |
| #undef CAFFE2_TO_ONNXIFI_TYPE |
| } |
| |
| } // namespace |
| |
| namespace details { |
| TensorInfo::TensorInfo(const TensorProto& t) |
| : onnxifi_type(getOnnxifiDataType(t.data_type())), |
| quantized(false), |
| quantizationAxis(0), |
| quantizationParams(0) { |
| for (const auto d : t.dims()) { |
| dims.push_back(d); |
| } |
| } |
| |
| TensorInfo::TensorInfo(const QTensorProto& t) |
| : onnxifi_type(getOnnxifiDataType(t.data_type())), |
| quantized(true), |
| quantizationAxis(t.has_axis() ? t.axis() : 0), |
| quantizationParams(t.scales_size() ? t.scales_size() : 1) { |
| for (const auto d : t.dims()) { |
| dims.push_back(d); |
| } |
| if (t.scales_size()) { |
| for (const auto d : t.scales()) { |
| scales.push_back(static_cast<float>(d)); |
| } |
| for (const auto d : t.biases()) { |
| biases.push_back(static_cast<int32_t>(d)); |
| } |
| } else { |
| scales.push_back(static_cast<float>(t.scale())); |
| biases.push_back(static_cast<int32_t>(t.bias())); |
| } |
| } |
| } // namespace details |
| |
| template <> |
| std::vector<onnxTensorDescriptorV1> |
| OnnxifiOp<CPUContext>::buildInitializationList( |
| Workspace* ws, |
| const std::vector<std::string>& initializers, |
| std::vector<std::string>* weight_names, |
| std::vector<std::vector<uint64_t>>* weight_shapes, |
| std::vector<std::vector<float>>* all_scales, |
| std::vector<std::vector<int32_t>>* all_offsets) const { |
| std::unordered_set<std::string> initialization_list( |
| initializers.begin(), initializers.end()); |
| const std::vector<string>& ws_blobs = ws->Blobs(); |
| // Since onnxTensorDescriptorV1.name will point into the memory in |
| // weight_names, we need to prevent weight_names from reallocating by |
| // reserving enough memory ahead of time |
| weight_names->reserve(ws_blobs.size()); |
| std::vector<onnxTensorDescriptorV1> descs; |
| for (const auto& s : ws_blobs) { |
| auto it = initialization_list.find(s); |
| if (it != initialization_list.end()) { |
| weight_names->emplace_back(s); |
| onnxTensorDescriptorV1 tensor_desc; |
| tensor_desc.name = weight_names->back().c_str(); |
| BlobToTensorDescriptor( |
| s, ws, &tensor_desc, weight_shapes, all_scales, all_offsets); |
| descs.push_back(tensor_desc); |
| initialization_list.erase(it); |
| } |
| } |
| CAFFE_ENFORCE(initialization_list.empty(), "Unfulfilled initialization list"); |
| return descs; |
| } |
| |
| template <> |
| details::OutputReshapeInfo OnnxifiOp<CPUContext>::initOutputReshapeInfo() |
| const { |
| details::OutputReshapeInfo output_reshape_info; |
| output_reshape_info.begins.reserve(output_names_.size()); |
| output_reshape_info.ends.reserve(output_names_.size()); |
| output_reshape_info.fast_path.reserve(output_names_.size()); |
| for (auto i: c10::irange(output_names_.size())) { |
| const auto it = output_shape_hints_.find(i); |
| CAFFE_ENFORCE( |
| it != output_shape_hints_.end(), |
| "Cannot find output shape hints for ", |
| output_names_[i]); |
| int64_t num_dims = it->second.dims.size(); |
| // Initialize the tensors used to slice the output |
| output_reshape_info.begins.emplace_back(); |
| ReinitializeTensor( |
| &output_reshape_info.begins.back(), |
| {num_dims}, |
| at::dtype<int32_t>().device(CPU)); |
| output_reshape_info.ends.emplace_back(); |
| ReinitializeTensor( |
| &output_reshape_info.ends.back(), |
| {num_dims}, |
| at::dtype<int32_t>().device(CPU)); |
| } |
| return output_reshape_info; |
| } |
| |
| template <> |
| template <typename DimContainer> |
| void OnnxifiOp<CPUContext>::fillOutputReshapeInfo( |
| const DimContainer& real_shape, |
| c10::ArrayRef<uint64_t> max_shape, |
| details::OutputReshapeInfo& output_reshape_info, |
| int currentIndex) { |
| CAFFE_ENFORCE_EQ(real_shape.size(), max_shape.size()); |
| const auto dim_size = real_shape.size(); |
| auto& begin = output_reshape_info.begins[currentIndex]; |
| begin.Resize(dim_size); |
| int32_t* begin_ptr = begin.template mutable_data<int32_t>(); |
| auto& end = output_reshape_info.ends[currentIndex]; |
| end.Resize(dim_size); |
| int32_t* end_ptr = end.template mutable_data<int32_t>(); |
| int32_t mismatch = 0; |
| for (auto j: c10::irange(dim_size)) { |
| CAFFE_ENFORCE_GE( |
| max_shape[j], |
| real_shape[j], |
| "It is weird that max shape of ", |
| output_names_[currentIndex], |
| " is smaller than real shape at dim ", |
| j, |
| " (", |
| max_shape[j], |
| " vs ", |
| real_shape[j], |
| ")"); |
| begin_ptr[j] = 0; |
| if (max_shape[j] > static_cast<uint64_t>(real_shape[j])) { |
| end_ptr[j] = real_shape[j]; |
| mismatch += j; |
| } else { |
| end_ptr[j] = max_shape[j]; |
| } |
| } |
| |
| if (dim_size > 0) { |
| output_reshape_info.fast_path[currentIndex] = !mismatch; |
| } else { |
| output_reshape_info.fast_path[currentIndex] = false; |
| } |
| } |
| |
| template <> |
| void OnnxifiOp<CPUContext>::extractOutputBatchSizes(int current_batch_size) { |
| auto& output_reshape_info = |
| output_reshape_info_.emplace(current_batch_size, initOutputReshapeInfo()) |
| .first->second; |
| |
| if (use_passed_output_shapes_) { |
| const auto shape_info_it = output_shapes_per_bs_.find(current_batch_size); |
| CAFFE_ENFORCE( |
| shape_info_it != output_shapes_per_bs_.end(), |
| "Unable to find outputs shapes for bs=", |
| current_batch_size); |
| CAFFE_ENFORCE_EQ(shape_info_it->second.size(), OutputSize()); |
| |
| for (int i = 0; i < OutputSize(); ++i) { |
| fillOutputReshapeInfo( |
| shape_info_it->second[i], |
| output_shapes_max_bs_[i], |
| output_reshape_info, |
| i); |
| } |
| } else { |
| BoundShapeSpec spec(current_batch_size, max_seq_size_); |
| auto bound_shape_inferencer = |
| BoundShapeInferencerRegistry()->Create("C10", spec); |
| for (int i = 0; i < InputSize(); ++i) { |
| at::IntArrayRef dim0; |
| bool quantized = false; |
| if (this->template InputIsType<int8::Int8TensorCPU>(i)) { |
| const auto& input_tensor_int8 = |
| this->template Input<int8::Int8TensorCPU>(i); |
| const auto& t0 = input_tensor_int8.t; |
| dim0 = t0.sizes(); |
| quantized = true; |
| } else { |
| const auto& t0 = Input(i); |
| dim0 = t0.sizes(); |
| } |
| TensorShape shape; |
| for (const auto d : dim0) { |
| shape.add_dims(d); |
| } |
| std::vector<TensorBoundShape::DimType> dim_type( |
| shape.dims_size(), TensorBoundShape_DimType_CONSTANT); |
| if (dim_type.size()) { |
| dim_type[0] = TensorBoundShape_DimType_BATCH; |
| } |
| input_shape_info_[input_names_[i]] = |
| ShapeInfo(dim_type, std::move(shape), quantized); |
| } |
| bound_shape_inferencer->InferBoundShapeAndType( |
| netdef_, input_shape_info_, nullptr, false); |
| const auto& shape_info = bound_shape_inferencer->shape_info(); |
| for (int i = 0; i < OutputSize(); ++i) { |
| const auto find_res = shape_info.find(output_names_[i]); |
| CAFFE_ENFORCE(find_res != shape_info.end()); |
| fillOutputReshapeInfo( |
| find_res->second.shape.dims(), |
| output_shapes_max_bs_[i], |
| output_reshape_info, |
| i); |
| } |
| } |
| } |
| |
| template <> |
| int OnnxifiOp<CPUContext>::extractOutputBatchSizes() { |
| if (use_onnx_ || !adjust_output_batch_) { |
| return max_batch_size_; |
| } |
| |
| // Get the real batch size from nominal input. If it's equal to |
| // max_batch_size, mark that we don't need to adjust batch size and return. |
| // Otherwise, do a pass of shape inference to get the real shapes of the |
| // outputs. |
| const Tensor* t = nullptr; |
| if (this->template InputIsType<int8::Int8TensorCPU>(nominal_batch_idx_)) { |
| const auto& input_tensor_int8 = |
| this->template Input<int8::Int8TensorCPU>(nominal_batch_idx_); |
| t = &input_tensor_int8.t; |
| } else { |
| t = &Input(nominal_batch_idx_); |
| } |
| |
| CAFFE_ENFORCE( |
| t, "Null input shape tensor ptr. Possibly unsupported tensor type"); |
| CAFFE_ENFORCE( |
| !t->sizes().empty(), |
| input_names_[nominal_batch_idx_], |
| " cannot be empty"); |
| const auto dims = t->sizes(); |
| const int current_batch_size = dims[0]; |
| if (current_batch_size == max_batch_size_) { |
| return max_batch_size_; |
| } |
| |
| // We still need to adjust output size but we can skip the shape inference as |
| // it was done before. |
| if (output_reshape_info_.count(current_batch_size)) { |
| return current_batch_size; |
| } |
| |
| extractOutputBatchSizes(current_batch_size); |
| |
| return current_batch_size; |
| } |
| |
| template <> |
| void OnnxifiOp<CPUContext>::adjustOutputBatchSizes(int current_batch_size) { |
| auto it = output_reshape_info_.find(current_batch_size); |
| CAFFE_ENFORCE( |
| it != output_reshape_info_.end(), |
| "Cannot find current_batch_size ", |
| current_batch_size, |
| " in output_reshape_info_"); |
| const auto& output_reshape_info = it->second; |
| CPUContext context; |
| Tensor tmp(CPU); |
| for (int i = 0; i < OutputSize(); ++i) { |
| Tensor* output_tensor = quantized_outputs_[i] |
| ? (&this->template Output<int8::Int8TensorCPU>(i)->t) |
| : Output(i); |
| const auto& end = output_reshape_info.ends[i]; |
| if (output_reshape_info.fast_path[i]) { |
| output_tensor->ShrinkTo(end.data<int32_t>()[0]); |
| } else { |
| // We need to use generic Slice |
| SliceImpl<int32_t, CPUContext>( |
| &tmp, *output_tensor, output_reshape_info.begins[i], end, &context); |
| output_tensor->CopyFrom(tmp); |
| } |
| } |
| } |
| |
| template <> |
| void OnnxifiOp<CPUContext>::setOutputShapeAndType( |
| int output_idx, |
| c10::SmallVector<int64_t, 4>& tensor_dims_int64) { |
| tensor_dims_int64.clear(); |
| std::vector<size_t> tensor_dims; |
| uint64_t type = ONNXIFI_DATATYPE_FLOAT32; |
| const auto it = output_shape_hints_.find(output_idx); |
| CAFFE_ENFORCE( |
| it != output_shape_hints_.end(), |
| "Cannot find shape hint for output: ", |
| output_names_[output_idx]); |
| const auto& info = it->second; |
| std::copy( |
| info.dims.begin(), info.dims.end(), std::back_inserter(tensor_dims)); |
| type = it->second.onnxifi_type; |
| auto& tensor_descriptor = output_desc_[output_idx]; |
| tensor_descriptor.tag = ONNXIFI_TAG_TENSOR_DESCRIPTOR_V1; |
| tensor_descriptor.memoryType = ONNXIFI_MEMORY_TYPE_CPU; |
| tensor_descriptor.dimensions = tensor_dims.size(); |
| CAFFE_ENFORCE( |
| tensor_descriptor.dimensions != 0, tensor_descriptor.name, " has 0 dim"); |
| auto& output_shape = output_shapes_max_bs_[output_idx]; |
| output_shape.clear(); |
| output_shape.insert( |
| output_shape.begin(), tensor_dims.cbegin(), tensor_dims.cend()); |
| tensor_descriptor.shape = output_shape.data(); |
| std::copy( |
| tensor_dims.cbegin(), |
| tensor_dims.cend(), |
| std::back_inserter(tensor_dims_int64)); |
| |
| // Setup the output C2 tensor |
| if (!info.quantized) { |
| // Normal Tensor |
| auto* output_tensor = Output( |
| output_idx, |
| tensor_dims_int64, |
| at::dtype(OnnxifiTypeToDataType(type)).device(CPU)); |
| setOutputTensorDescriptorTypeAndBuffer( |
| type, output_tensor, &tensor_descriptor); |
| } else if (info.quantizationParams == 1) { |
| // single quantizer, output Int8Tensor |
| auto* output_tensor = |
| this->template Output<int8::Int8TensorCPU>(output_idx); |
| output_tensor->t.Resize(tensor_dims_int64); |
| setOutputTensorDescriptorTypeAndBuffer( |
| type, &output_tensor->t, &tensor_descriptor); |
| tensor_descriptor.quantizationParams = 1; |
| tensor_descriptor.quantizationAxis = 1; |
| tensor_descriptor.scales = &output_tensor->scale; |
| tensor_descriptor.biases = &output_tensor->zero_point; |
| } else { |
| CAFFE_THROW( |
| "OnnxifiOp does not support output tensor with multi-quantization params: ", |
| output_names_[output_idx]); |
| } |
| } |
| |
| string mapOnnxStateToString(onnxEventState state) { |
| switch (state) { |
| case ONNXIFI_EVENT_STATE_NONSIGNALLED: |
| return "ONNXIFI_EVENT_STATE_NONSIGNALLED"; |
| default: |
| return "ONNXIFI_EVENT_STATE_STRING_NOT_MAPPED"; |
| } |
| } |
| |
| string mapOnnxStatusToString(onnxStatus status) { |
| switch (status) { |
| case ONNXIFI_STATUS_SUCCESS: |
| return "ONNXIFI_STATUS_SUCCESS"; |
| case ONNXIFI_STATUS_FALLBACK: |
| return "ONNXIFI_STATUS_FALLBACK"; |
| case ONNXIFI_STATUS_INVALID_ID: |
| return "ONNXIFI_STATUS_INVALID_ID"; |
| case ONNXIFI_STATUS_INVALID_SIZE: |
| return "ONNXIFI_STATUS_INVALID_SIZE"; |
| case ONNXIFI_STATUS_INVALID_POINTER: |
| return "ONNXIFI_STATUS_INVALID_POINTER"; |
| case ONNXIFI_STATUS_INVALID_PROTOBUF: |
| return "ONNXIFI_STATUS_INVALID_PROTOBUF"; |
| case ONNXIFI_STATUS_INVALID_MODEL: |
| return "ONNXIFI_STATUS_INVALID_MODEL"; |
| case ONNXIFI_STATUS_INVALID_BACKEND: |
| return "ONNXIFI_STATUS_INVALID_BACKEND"; |
| case ONNXIFI_STATUS_INVALID_GRAPH: |
| return "ONNXIFI_STATUS_INVALID_GRAPH"; |
| case ONNXIFI_STATUS_INVALID_EVENT: |
| return "ONNXIFI_STATUS_INVALID_EVENT"; |
| case ONNXIFI_STATUS_INVALID_STATE: |
| return "ONNXIFI_STATUS_INVALID_STATE"; |
| case ONNXIFI_STATUS_INVALID_NAME: |
| return "ONNXIFI_STATUS_INVALID_NAME"; |
| case ONNXIFI_STATUS_INVALID_SHAPE: |
| return "ONNXIFI_STATUS_INVALID_SHAPE"; |
| case ONNXIFI_STATUS_INVALID_DATATYPE: |
| return "ONNXIFI_STATUS_INVALID_DATATYPE"; |
| case ONNXIFI_STATUS_INVALID_MEMORY_TYPE: |
| return "ONNXIFI_STATUS_INVALID_MEMORY_TYPE"; |
| case ONNXIFI_STATUS_INVALID_MEMORY_LOCATION: |
| return "ONNXIFI_STATUS_INVALID_MEMORY_LOCATION"; |
| case ONNXIFI_STATUS_INVALID_FENCE_TYPE: |
| return "ONNXIFI_STATUS_INVALID_FENCE_TYPE"; |
| case ONNXIFI_STATUS_INVALID_PROPERTY: |
| return "ONNXIFI_STATUS_INVALID_PROPERTY"; |
| case ONNXIFI_STATUS_UNSUPPORTED_TAG: |
| return "ONNXIFI_STATUS_UNSUPPORTED_TAG"; |
| case ONNXIFI_STATUS_UNSUPPORTED_VERSION: |
| return "ONNXIFI_STATUS_UNSUPPORTED_VERSION"; |
| case ONNXIFI_STATUS_UNSUPPORTED_OPERATOR: |
| return "ONNXIFI_STATUS_UNSUPPORTED_OPERATOR"; |
| case ONNXIFI_STATUS_UNSUPPORTED_ATTRIBUTE: |
| return "ONNXIFI_STATUS_UNSUPPORTED_ATTRIBUTE"; |
| case ONNXIFI_STATUS_UNSUPPORTED_SHAPE: |
| return "ONNXIFI_STATUS_UNSUPPORTED_SHAPE"; |
| case ONNXIFI_STATUS_UNSUPPORTED_DATATYPE: |
| return "ONNXIFI_STATUS_UNSUPPORTED_DATATYPE"; |
| case ONNXIFI_STATUS_UNSUPPORTED_MEMORY_TYPE: |
| return "ONNXIFI_STATUS_UNSUPPORTED_MEMORY_TYPE"; |
| case ONNXIFI_STATUS_UNSUPPORTED_FENCE_TYPE: |
| return "ONNXIFI_STATUS_UNSUPPORTED_FENCE_TYPE"; |
| case ONNXIFI_STATUS_UNSUPPORTED_PROPERTY: |
| return "ONNXIFI_STATUS_UNSUPPORTED_PROPERTY"; |
| case ONNXIFI_STATUS_UNIDENTIFIED_NAME: |
| return "ONNXIFI_STATUS_UNIDENTIFIED_NAME"; |
| case ONNXIFI_STATUS_MISMATCHING_SHAPE: |
| return "ONNXIFI_STATUS_MISMATCHING_SHAPE"; |
| case ONNXIFI_STATUS_MISMATCHING_DATATYPE: |
| return "ONNXIFI_STATUS_MISMATCHING_DATATYPE"; |
| case ONNXIFI_STATUS_NO_SYSTEM_MEMORY: |
| return "ONNXIFI_STATUS_NO_SYSTEM_MEMORY"; |
| case ONNXIFI_STATUS_NO_DEVICE_MEMORY: |
| return "ONNXIFI_STATUS_NO_DEVICE_MEMORY"; |
| case ONNXIFI_STATUS_NO_SYSTEM_RESOURCES: |
| return "ONNXIFI_STATUS_NO_SYSTEM_RESOURCES"; |
| case ONNXIFI_STATUS_NO_DEVICE_RESOURCES: |
| return "ONNXIFI_STATUS_NO_DEVICE_RESOURCES"; |
| case ONNXIFI_STATUS_BACKEND_UNAVAILABLE: |
| return "ONNXIFI_STATUS_BACKEND_UNAVAILABLE"; |
| case ONNXIFI_STATUS_INTERNAL_ERROR: |
| return "ONNXIFI_STATUS_INTERNAL_ERROR"; |
| case ONNXIFI_STATUS_FATAL_ERROR: |
| return "ONNXIFI_STATUS_FATAL_ERROR"; |
| default: |
| return "ONNXIFI_STATUS_STRING_NOT_MAPPED"; |
| } |
| } |
| |
| template <> |
| bool OnnxifiOp<CPUContext>::RunOnDevice() { |
| CAFFE_ENFORCE_EQ(input_desc_.size(), InputSize()); |
| for (auto i: c10::irange(InputSize())) { |
| auto& tensor_descriptor = input_desc_[i]; |
| tensor_descriptor.tag = ONNXIFI_TAG_TENSOR_DESCRIPTOR_V1; |
| tensor_descriptor.memoryType = ONNXIFI_MEMORY_TYPE_CPU; |
| at::IntArrayRef tensor_dims; |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| if (this->template InputIsType<int8::Int8TensorCPU>(i)) { |
| const auto& input_tensor_int8 = |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| this->template Input<int8::Int8TensorCPU>(i); |
| const auto& cpu_tensor = input_tensor_int8.t; |
| tensor_dims = cpu_tensor.sizes(); |
| setInputTensorDescriptorTypeAndBuffer( |
| input_tensor_int8, &tensor_descriptor); |
| } else { |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) |
| const auto& input_tensor = Input(i); |
| tensor_dims = input_tensor.sizes(); |
| setInputTensorDescriptorTypeAndBuffer(input_tensor, &tensor_descriptor); |
| } |
| auto& input_shape = input_shapes_[i]; |
| input_shape.clear(); |
| input_shape.insert( |
| input_shape.begin(), tensor_dims.cbegin(), tensor_dims.cend()); |
| tensor_descriptor.dimensions = tensor_dims.size(); |
| tensor_descriptor.shape = input_shape.data(); |
| } |
| |
| CAFFE_ENFORCE_EQ(output_desc_.size(), OutputSize()); |
| c10::SmallVector<int64_t, 4> tensor_dims_int64; |
| for (auto i: c10::irange(OutputSize())) { |
| setOutputShapeAndType(i, tensor_dims_int64); |
| } |
| bool ext_supported = false; |
| onnxMemoryFenceV1 input_fence; |
| onnxMemoryFenceV1 output_fence; |
| std::vector<int> output_batch_sizes; |
| int current_batch_size = max_batch_size_; |
| #ifdef ONNXIFI_ENABLE_EXT |
| /** |
| * If onnxifi extension mode is enabled, |
| * and onnxSetIOAndRunGraph is supported in backend, |
| * then we run through this workflow; |
| * Else we fallback to non-onnxifi-extension workflow. |
| **/ |
| if (onnxSetIOAndRunGraphPointer_ != nullptr) { |
| ext_supported = true; |
| output_fence.tag = ONNXIFI_TAG_MEMORY_FENCE_V1; |
| output_fence.type = ONNXIFI_SYNCHRONIZATION_EVENT; |
| traces_.reset(); |
| if (enable_tracing_) { |
| traces_ = std::shared_ptr<onnxTraceEventList>( |
| new onnxTraceEventList(), [this](onnxTraceEventList* p) { |
| if (p && onnxReleaseTraceEventsPointer_) { |
| CAFFE_ENFORCE_EQ( |
| (*onnxReleaseTraceEventsPointer_)(p), ONNXIFI_STATUS_SUCCESS); |
| } |
| delete p; |
| }); |
| traces_->numEvents = 0; |
| } |
| |
| const onnxStatus status = (*onnxSetIOAndRunGraphPointer_)( |
| graph_, |
| input_desc_.size(), |
| input_desc_.data(), |
| output_desc_.size(), |
| output_desc_.data(), |
| &output_fence, |
| traces_.get()); |
| CAFFE_ENFORCE_EQ( |
| status, |
| ONNXIFI_STATUS_SUCCESS, |
| "Reason: onnxSetIOAndRunGraph returned status code ", |
| mapOnnxStatusToString(status)); |
| |
| // Check if we should rely on Onnxifi to provide current batch size |
| if (use_onnxifi_batch_size_ && onnxGetCurrentBatchSizePointer_ != nullptr) { |
| int64_t onnxifiBatchSize; |
| if ((*onnxGetCurrentBatchSizePointer_)(&onnxifiBatchSize) == ONNXIFI_STATUS_SUCCESS) { |
| current_batch_size = onnxifiBatchSize; |
| |
| if (current_batch_size != max_batch_size_ && |
| output_reshape_info_.count(current_batch_size) == 0) { |
| extractOutputBatchSizes(current_batch_size); |
| } |
| } else { |
| current_batch_size = extractOutputBatchSizes(); |
| } |
| } else { |
| current_batch_size = extractOutputBatchSizes(); |
| } |
| // NOLINTNEXTLINE(cppcoreguidelines-init-variables) |
| onnxEventState eventState; |
| // NOLINTNEXTLINE(cppcoreguidelines-init-variables) |
| onnxStatus eventStatus; |
| std::string message; |
| size_t messageLength = 512; |
| message.resize(messageLength); |
| |
| CAFFE_ENFORCE_EQ( |
| (*onnxWaitEventForPointer_)( |
| output_fence.event, |
| timeout_, |
| &eventState, |
| &eventStatus, |
| // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) |
| const_cast<char*>(message.data()), |
| &messageLength), |
| ONNXIFI_STATUS_SUCCESS); |
| CAFFE_ENFORCE_EQ( |
| eventState, |
| ONNXIFI_EVENT_STATE_SIGNALLED, |
| "Onnxifi run timeouted out after ", |
| timeout_, |
| " ms.", |
| "Reason: Onnxifi run returned event state code ", |
| mapOnnxStateToString(eventState)); |
| if (eventStatus != ONNXIFI_STATUS_SUCCESS) { |
| if (messageLength == 0) { |
| CAFFE_THROW("onnxifi internal error"); |
| } else { |
| CAFFE_THROW(message); |
| } |
| } |
| CAFFE_ENFORCE_EQ( |
| lib_->onnxReleaseEvent(output_fence.event), ONNXIFI_STATUS_SUCCESS); |
| } |
| #endif |
| if (!ext_supported) { |
| CAFFE_ENFORCE_EQ( |
| lib_->onnxSetGraphIO( |
| graph_, |
| input_desc_.size(), |
| input_desc_.data(), |
| output_desc_.size(), |
| output_desc_.data()), |
| ONNXIFI_STATUS_SUCCESS); |
| |
| input_fence.tag = ONNXIFI_TAG_MEMORY_FENCE_V1; |
| input_fence.type = ONNXIFI_SYNCHRONIZATION_EVENT; |
| CAFFE_ENFORCE_EQ( |
| lib_->onnxInitEvent(backend_, &input_fence.event), |
| ONNXIFI_STATUS_SUCCESS); |
| output_fence.tag = ONNXIFI_TAG_MEMORY_FENCE_V1; |
| output_fence.type = ONNXIFI_SYNCHRONIZATION_EVENT; |
| |
| // Call the async run on backend, signal event on input fence and wait for |
| // the event on output fence |
| CAFFE_ENFORCE_EQ( |
| lib_->onnxRunGraph(graph_, &input_fence, &output_fence), |
| ONNXIFI_STATUS_SUCCESS); |
| CAFFE_ENFORCE_EQ( |
| lib_->onnxSignalEvent(input_fence.event), ONNXIFI_STATUS_SUCCESS); |
| current_batch_size = extractOutputBatchSizes(); |
| CAFFE_ENFORCE_EQ( |
| lib_->onnxWaitEvent(output_fence.event), ONNXIFI_STATUS_SUCCESS); |
| |
| // Destroy the event objects |
| CAFFE_ENFORCE_EQ( |
| lib_->onnxReleaseEvent(input_fence.event), ONNXIFI_STATUS_SUCCESS); |
| CAFFE_ENFORCE_EQ( |
| lib_->onnxReleaseEvent(output_fence.event), ONNXIFI_STATUS_SUCCESS); |
| } |
| |
| if (adjust_quantized_offset_) { |
| for (auto i: c10::irange(OutputSize())) { |
| if (quantized_outputs_[i]) { |
| auto* int8_tensor = this->template Output<int8::Int8TensorCPU>(i); |
| int8_tensor->zero_point += adjust_quantized_offset_; |
| adjustQuantizedOffset(&int8_tensor->t, adjust_quantized_offset_); |
| } |
| } |
| } |
| |
| if (adjust_output_batch_ && current_batch_size != max_batch_size_) { |
| adjustOutputBatchSizes(current_batch_size); |
| } |
| enable_tracing_ = false; |
| return true; |
| } |
| |
| REGISTER_CPU_OPERATOR(Onnxifi, OnnxifiOp<CPUContext>); |
| OPERATOR_SCHEMA(Onnxifi) |
| .NumInputs(0, INT_MAX) |
| .NumOutputs(0, INT_MAX) |
| .SetDoc(R"DOC( |
| The Onnxifi operator is a black-box operator to lower the computation to Onnxifi backend |
| )DOC") |
| .Arg( |
| "onnx_model", |
| "(string default=\"\") Serialized ONNX model to be converted to backend representation") |
| .Arg( |
| "initializers", |
| "Initialization pair indicating the mapping of the name between NetDef and ONNX model") |
| .Arg( |
| "output_resize_hints", |
| "A list of key/value pairs indicating which input index to look up for real batch size for the given max output batch size"); |
| } // namespace caffe2 |