blob: 3cdf252c8c0ed6f1d893988e5b4853655beb5b2d [file] [log] [blame]
#include "caffe2/operators/onnxifi_op.h"
namespace caffe2 {
namespace {
void BlobToTensorDescriptor(
const std::string& name,
Workspace* ws,
onnxTensorDescriptorV1* desc,
std::vector<std::vector<uint64_t>>* shapes) {
const Blob* blob = ws->GetBlob(name);
CAFFE_ENFORCE(blob, "Blob ", name, " doesn't exist");
// Memory type
// We only allow weights to be CPU tensor for now
CAFFE_ENFORCE(
blob->template IsType<Tensor>(CPU),
"Initialization blob ",
name,
" needs to be TensorCPU");
desc->tag = ONNXIFI_TAG_TENSOR_DESCRIPTOR_V1;
desc->memoryType = ONNXIFI_MEMORY_TYPE_CPU;
// Data type
const auto& cpu_tensor = blob->template Get<TensorCPU>();
if (cpu_tensor.template IsType<float>()) {
desc->dataType = ONNXIFI_DATATYPE_FLOAT32;
desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<float>());
} else if (cpu_tensor.template IsType<int64_t>()) {
desc->dataType = ONNXIFI_DATATYPE_INT64;
desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<int64_t>());
} else if (cpu_tensor.template IsType<int32_t>()) {
desc->dataType = ONNXIFI_DATATYPE_INT32;
desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<int32_t>());
}
// Set dims
const auto& shape = cpu_tensor.dims();
desc->dimensions = shape.size();
shapes->emplace_back(shape.cbegin(), shape.cend());
desc->shape = shapes->back().data();
}
} // namespace
template <>
std::vector<onnxTensorDescriptorV1>
OnnxifiOp<float, CPUContext>::BuildInitializationList(
Workspace* ws,
std::unordered_set<std::string>* initialization_list,
std::vector<std::string>* weight_names,
std::vector<std::vector<uint64_t>>* weight_shapes) {
const std::vector<string>& ws_blobs = ws->Blobs();
std::vector<onnxTensorDescriptorV1> descs;
for (const auto& s : ws_blobs) {
auto it = initialization_list->find(s);
if (it != initialization_list->end()) {
weight_names->emplace_back(s);
onnxTensorDescriptorV1 tensor_desc;
tensor_desc.name = weight_names->back().c_str();
BlobToTensorDescriptor(s, ws, &tensor_desc, weight_shapes);
descs.push_back(tensor_desc);
initialization_list->erase(it);
}
}
CAFFE_ENFORCE(
initialization_list->empty(), "Unfulfilled initialization list");
return descs;
}
template <>
bool OnnxifiOp<float, CPUContext>::RunOnDevice() {
for (unsigned i = 0U; i < InputSize(); ++i) {
const auto& input_tensor = Input(i);
const auto& tensor_dims = input_tensor.dims();
auto& tensor_descriptor = input_desc_.at(i);
tensor_descriptor.dataType = ONNXIFI_DATATYPE_FLOAT32;
tensor_descriptor.memoryType = ONNXIFI_MEMORY_TYPE_CPU;
tensor_descriptor.dimensions = tensor_dims.size();
input_shapes_.emplace_back(tensor_dims.cbegin(), tensor_dims.cend());
tensor_descriptor.shape = input_shapes_.back().data();
tensor_descriptor.buffer =
reinterpret_cast<onnxPointer>(input_tensor.data<float>());
}
for (unsigned i = 0U; i < OutputSize(); ++i) {
auto* output_tensor = Output(i);
std::vector<TIndex> tensor_dims;
SetOutputShape(i, &tensor_dims);
output_tensor->Resize(tensor_dims);
auto& tensor_descriptor = output_desc_.at(i);
tensor_descriptor.dataType = ONNXIFI_DATATYPE_FLOAT32;
tensor_descriptor.memoryType = ONNXIFI_MEMORY_TYPE_CPU;
tensor_descriptor.dimensions = tensor_dims.size();
CAFFE_ENFORCE(
tensor_descriptor.dimensions != 0,
tensor_descriptor.name,
" has 0 dim");
output_shapes_.emplace_back(tensor_dims.cbegin(), tensor_dims.cend());
tensor_descriptor.shape = output_shapes_.back().data();
tensor_descriptor.buffer =
reinterpret_cast<onnxPointer>(output_tensor->mutable_data<float>());
}
CAFFE_ENFORCE_EQ(
lib_->onnxSetGraphIO(
graph_,
input_desc_.size(),
input_desc_.data(),
output_desc_.size(),
output_desc_.data()),
ONNXIFI_STATUS_SUCCESS);
onnxMemoryFenceV1 input_fence;
input_fence.tag = ONNXIFI_TAG_MEMORY_FENCE_V1;
input_fence.type = ONNXIFI_SYNCHRONIZATION_EVENT;
CAFFE_ENFORCE_EQ(
lib_->onnxInitEvent(backend_, &input_fence.event),
ONNXIFI_STATUS_SUCCESS);
onnxMemoryFenceV1 output_fence;
output_fence.tag = ONNXIFI_TAG_MEMORY_FENCE_V1;
output_fence.type = ONNXIFI_SYNCHRONIZATION_EVENT;
// Call the asycn run on backend, singal event on input fence and wait for the
// event on output fence
CAFFE_ENFORCE_EQ(
lib_->onnxSignalEvent(input_fence.event), ONNXIFI_STATUS_SUCCESS);
CAFFE_ENFORCE_EQ(
lib_->onnxRunGraph(graph_, &input_fence, &output_fence),
ONNXIFI_STATUS_SUCCESS);
CAFFE_ENFORCE_EQ(
lib_->onnxWaitEvent(output_fence.event), ONNXIFI_STATUS_SUCCESS);
// Destroy the event objects
CAFFE_ENFORCE_EQ(
lib_->onnxReleaseEvent(input_fence.event), ONNXIFI_STATUS_SUCCESS);
CAFFE_ENFORCE_EQ(
lib_->onnxReleaseEvent(output_fence.event), ONNXIFI_STATUS_SUCCESS);
return true;
}
REGISTER_CPU_OPERATOR(Onnxifi, OnnxifiOp<float, CPUContext>);
OPERATOR_SCHEMA(Onnxifi)
.NumInputs(0, INT_MAX)
.NumOutputs(0, INT_MAX)
.SetDoc(R"DOC(
The Onnxifi operator is a black-box operator to lower the computation to Onnxifi backend
)DOC")
.Arg(
"onnx_model",
"(string default=\"\") Serialized ONNX model to be converted to backend representation")
.Arg(
"initializers",
"Initialization pair indicating the mapping of the name between NetDef and ONNX model");
} // namespace caffe2