blob: bb0dcb7a1efe6006a228c6c5d27ef307573270d7 [file] [log] [blame]
#include "bound_shape_inferencer.h"
#include "caffe2/core/operator_schema.h"
#include "caffe2/core/tensor_impl.h"
#include "caffe2/core/types.h"
#include "caffe2/utils/proto_utils.h"
#include "caffe2/utils/string_utils.h"
#include <c10/util/irange.h>
namespace caffe2 {
namespace {
std::vector<int64_t> ConvertToVec(
const ::google::protobuf::RepeatedField<::google::protobuf::int64>& in) {
std::vector<int64_t> out;
out.reserve(in.size());
for (const auto d : in) {
out.push_back(d);
}
return out;
}
std::vector<TensorBoundShape::DimType> setDimTypeWithFirst(
TensorBoundShape::DimType firstDimType,
uint32_t n) {
std::vector<TensorBoundShape::DimType> dimTypes(
n, TensorBoundShape_DimType_CONSTANT);
if (dimTypes.size() > 0) {
dimTypes[0] = firstDimType;
}
return dimTypes;
}
int64_t SizeFromDim(const TensorShape& shape, int axis) {
int64_t r = 1;
for (int i = axis; i < shape.dims_size(); ++i) {
r *= shape.dims(i);
}
return r;
}
int64_t SizeToDim(const TensorShape& shape, int axis) {
CAFFE_ENFORCE_LE(axis, shape.dims_size());
int64_t r = 1;
for (int i = 0; i < axis; ++i) {
r *= shape.dims(i);
}
return r;
}
// Check precedence between two vector of ensorBoundShape::DimType.
// If return 1: right take precedence over left
// If return -1: left take precedence over right
// If return 0: no precedence between left and right
int takePrecedenceOver(
const std::vector<TensorBoundShape::DimType>& left,
const std::vector<TensorBoundShape::DimType>& right) {
const static std::vector<
std::tuple<TensorBoundShape::DimType, TensorBoundShape::DimType, int>>
precedence = {
std::tuple<TensorBoundShape::DimType, TensorBoundShape::DimType, int>{
TensorBoundShape_DimType_FEATURE_MAX_DEFAULT,
TensorBoundShape_DimType_FEATURE_MAX,
1},
std::tuple<TensorBoundShape::DimType, TensorBoundShape::DimType, int>{
TensorBoundShape_DimType_FEATURE_MAX,
TensorBoundShape_DimType_FEATURE_MAX_DEFAULT,
-1},
std::tuple<TensorBoundShape::DimType, TensorBoundShape::DimType, int>{
TensorBoundShape_DimType_BATCH_OF_FEATURE_MAX_DEFAULT,
TensorBoundShape_DimType_BATCH_OF_FEATURE_MAX,
1},
std::tuple<TensorBoundShape::DimType, TensorBoundShape::DimType, int>{
TensorBoundShape_DimType_BATCH_OF_FEATURE_MAX,
TensorBoundShape_DimType_BATCH_OF_FEATURE_MAX_DEFAULT,
-1}};
// If left is empty and right is not, right take precedence
if (left.size() == 0 || right.size() == 0) {
return right.size() > left.size();
}
for (auto i: c10::irange(right.size())) {
// If right.size > left.size and left[0:i] == right[0:i],
// right take precedence
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
if (i >= left.size()) {
return 1;
}
auto l = left[i];
auto r = right[i];
if (l == TensorBoundShape_DimType_UNKNOWN &&
r != TensorBoundShape_DimType_UNKNOWN) {
return 1;
}
if (r == TensorBoundShape_DimType_UNKNOWN &&
l != TensorBoundShape_DimType_UNKNOWN) {
return -1;
}
for (auto& t : precedence) {
if (l == std::get<0>(t) && r == std::get<1>(t)) {
return std::get<2>(t);
}
}
if (l != r) {
return 0;
}
}
return 0;
}
} // namespace
void BoundShapeInferencer::EnsureShapeNames(
std::unordered_map<std::string, ShapeInfo>* info) const {
for (auto& kv : *info) {
kv.second.shape.set_name(kv.first);
}
}
void BoundShapeInferencer::Initialize(
const ShapeInfoMap& info,
bool extract_feature_len) {
shape_info_ = info;
extract_feature_len_ = extract_feature_len;
}
void BoundShapeInferencer::InferOps(
const OperatorDef& op,
caffe2::Workspace* /* ws */) {
const static std::unordered_set<std::string> kSlsOps = {
"SparseLengthsSum",
"SparseLengthsSumFused8BitRowwise",
"SparseLengthsWeightedSum",
"SparseLengthsWeightedSumFused8BitRowwise",
"SparseLengthsSumFused4BitRowwise",
"SparseLengthsWeightedSumFused4BitRowwise",
"SparseLengthsSum4BitRowwiseSparse",
"SparseLengthsWeightedSum4BitRowwiseSparse",
"SparseLengthsSum8BitRowwiseSparse",
"SparseLengthsWeightedSum8BitRowwiseSparse"};
if (kSlsOps.count(op.type())) {
InferSparseLengthsSum(op);
} else if (op.type() == "Add" || op.type() == "Mul") {
InferElementwiseOp(op);
} else if (
op.type() == "FC" || op.type() == "FCTransposed" ||
op.type() == "FbFCPacked" || op.type() == "Int8FC") {
InferFC(op);
} else if (op.type() == "Concat") {
InferConcat(op);
} else if (op.type() == "Reshape") {
InferReshape(op);
} else if (op.type() == "LengthsRangeFill") {
InferLengthsRangeFill(op);
} else if (
(caffe2::StartsWith(op.type(), "GivenTensor") &&
caffe2::EndsWith(op.type(), "Fill")) ||
op.type() == "ConstantFill" || op.type() == "Int8GivenTensorFill" ||
op.type() == "Int8GivenIntTensorFill") {
InferGivenTensorFill(op);
} else if (op.type() == "Shape") {
InferShape(op);
} else if (
op.type() == "FloatToFused8BitRowwiseQuantized" ||
op.type() == "HalfFloatToFused8BitRowwiseQuantized" ||
op.type() == "FloatToFused4BitRowwiseQuantized" ||
op.type() == "HalfToFused4BitRowwiseQuantized" ||
op.type() == "FloatToHalf" || op.type() == "FbGemmPack") {
InferQuantizationTransformation(op);
} else if (op.type() == "UnPackRecords") {
InferUnPackRecords(op);
} else if (op.type() == "Tile") {
InferTile(op);
} else if (op.type() == "SparseLengthsSumSparseLookup") {
InferSparseLengthsSumSparseLookup(op);
} else if (op.type() == "Softmax") {
InferSoftmax(op);
} else if (op.type() == "LpNorm") {
InferLpNorm(op);
} else if (op.type() == "Transpose") {
InferTranspose(op);
} else if (op.type() == "Bucketize") {
InferBucketize(op);
} else {
InferCommonOp(op);
}
}
void BoundShapeInferencer::InferBoundShapeAndType(
const NetDef& net,
const ShapeInfoMap& info,
caffe2::Workspace* ws,
bool extract_feature_len) {
const static std::unordered_set<std::string> unsupported{};
Initialize(info, extract_feature_len);
bool inferFinished = false;
auto old_shape_num = shape_info_.size();
while (!inferFinished) {
for (const auto& op : net.op()) {
VLOG(1) << op.type();
if (unsupported.count(op.type())) {
continue;
}
InferOps(op, ws);
}
// Doing a reverse pass to infer the input shapes if applicable
for (int i = net.op_size() - 1; i >= 0; --i) {
const auto& op = net.op(i);
if (op.type() == "Concat") {
InferConcatInputs(op);
} else if (op.type() == "Int8Quantize") {
InferInt8QuantizeInput(op);
} else if (op.type() == "Mul" || op.type() == "Add") {
InferElementwiseOpInput(op);
}
}
inferFinished = old_shape_num == shape_info_.size();
VLOG(1) << "old shape info num: " << old_shape_num
<< ", new shape info num: " << shape_info_.size();
old_shape_num = shape_info_.size();
}
// Make sure shape has name
EnsureShapeNames(&shape_info_);
}
TensorShape& BoundShapeInferencer::SetTensorBoundShapeIfNotExist(
const std::string& name,
const std::vector<TensorBoundShape::DimType>& t,
std::vector<int64_t> bound_dims,
TensorProto::DataType type,
bool is_quantized) {
return CheckAndSetTensorBoundShape(
name, t, bound_dims, type, is_quantized, true);
}
// if allow_existing_shape is true, we use existing shape directly
// and not enforce shape to be equal to bound_dims
// else we enforce them to be equal
TensorShape& BoundShapeInferencer::CheckAndSetTensorBoundShape(
const std::string& name,
const std::vector<TensorBoundShape::DimType>& t,
std::vector<int64_t> bound_dims,
TensorProto::DataType type,
bool is_quantized,
bool allow_existing_shape,
float scale,
int offset,
bool in_place_op) {
auto rt = shape_info_.emplace(name, ShapeInfo());
ShapeInfo& shape_info = rt.first->second;
TensorShape& shape = shape_info.shape;
if (shape_info.getShapeIsFinal()) {
return shape;
}
if (is_quantized) {
shape_info.is_quantized = true;
shape_info.q_info.scale.clear();
shape_info.q_info.scale.push_back(scale);
shape_info.q_info.offset.clear();
shape_info.q_info.offset.push_back(offset);
shape_info.q_info.axis = 1;
}
// If the shape information exists in shape_info_ already and we want to
// compare old/new shapes
if (!rt.second && !in_place_op) {
// Check dim size consistency
CAFFE_ENFORCE_EQ(
shape.dims_size(),
bound_dims.size(),
"Dim size inconsistency found in tensor ",
name);
// Get precedence of previous shape vs new shape
int precedence = 0;
if (!shape_info.dimTypeIsSet()) {
precedence = 1;
} else {
precedence = takePrecedenceOver(shape_info.getDimType(), t);
}
// If precedence == 0: check whether previous shape == new shape
// If precedence == 1, override shape with new value
// If precedence == -1, previous shape takes precedence and
// new value is skipped.
if (precedence == 1) {
shape_info.setDimType(t);
for (auto i: c10::irange(bound_dims.size())) {
shape.set_dims(i, bound_dims[i]);
}
} else if (precedence == 0 && !allow_existing_shape) {
// Enforce previous dims and current dims are the same.
for (int i = 0; i < shape.dims_size(); ++i) {
CAFFE_ENFORCE_EQ(
shape.dims(i),
bound_dims[i],
"Shape inconsistency found in tensor ",
name,
" on dim ",
i,
" (",
shape.dims(i),
" vs ",
bound_dims[i],
")");
}
}
return shape;
}
// If shape information does not exist in shape_info_,
// or shape info is not final,
// set shape info according to inputs.
if (!shape_info.getShapeIsFinal()) {
shape_info.setDimType(t);
shape.mutable_dims()->Clear();
for (const auto d : bound_dims) {
shape.add_dims(d);
}
shape.set_data_type(type);
if (in_place_op) {
shape_info.setShapeIsFinal(true);
}
}
return shape;
}
std::vector<TensorShape> InferOutput(
const OperatorDef& op,
const std::vector<TensorShape>& input_shapes) {
const OpSchema* schema = OpSchemaRegistry::Schema(op.type());
CAFFE_ENFORCE(schema);
return schema->InferTensor(op, input_shapes);
}
void BoundShapeInferencer::InferGivenTensorFill(const OperatorDef& op) {
CAFFE_ENFORCE_EQ(op.output_size(), 1, op.type(), " must have 1 output");
InferCommonOp(op);
auto it = shape_info_.find(op.output(0));
if (it != shape_info_.end()) {
it->second.setDimType(std::vector<TensorBoundShape::DimType>(
it->second.shape.dims_size(), TensorBoundShape_DimType_CONSTANT));
if (op.type() == "ConstantFill" && op.input_size() >= 1) {
auto it_input = shape_info_.find(op.input(0));
if (it_input != shape_info_.end()) {
it->second.setDimType(it_input->second.getDimType());
}
}
}
}
void BoundShapeInferencer::InferLengthsRangeFill(const OperatorDef& op) {
CAFFE_ENFORCE_EQ(op.input_size(), 1, "LengthsRangeFill must have 1 input");
CAFFE_ENFORCE_EQ(op.output_size(), 1, "LengthsRangeFill must have 1 output");
// Both input and ouptut of LengthsRangeFill is int32:
// https://fburl.com/fhwb5666
CheckAndSetTensorBoundShape(
op.input(0),
{TensorBoundShape_DimType_BATCH},
{spec_.max_batch_size},
TensorProto_DataType_INT32,
false);
CheckAndSetTensorBoundShape(
op.output(0),
{TensorBoundShape_DimType_BATCH_OF_FEATURE_MAX_DEFAULT},
{spec_.max_batch_size * spec_.max_seq_size},
TensorProto_DataType_INT32,
false);
current_dim_type_ = TensorBoundShape_DimType_BATCH_OF_FEATURE_MAX_DEFAULT;
}
void BoundShapeInferencer::InferSparseLengthsSumSparseLookup(
const OperatorDef& op) {
CAFFE_ENFORCE_GT(
op.input_size(),
2,
"SparseLengthsSumSparseLookup must have more than 2 input");
CAFFE_ENFORCE_GT(
op.output_size(),
1,
"SparseLengthsSumSparseLookup must have more than 1 output");
if (shape_info_.find(op.input(2)) != shape_info_.end()) {
LOG(WARNING)
<< "Shape of COMPRESSED_INDICES_MAPPING input of SparseLengthsSumSparseLookup "
<< op.input(2) << " needs to be presented";
}
for (int i = 0; i < 2; ++i) {
const auto it = shape_info_.find(op.input(i));
if (it != shape_info_.end()) {
shape_info_[op.output(i)] = it->second;
}
}
// Handle the weights
if (op.input_size() == 4) {
CAFFE_ENFORCE_EQ(op.output_size(), 3);
const auto it = shape_info_.find(op.input(3));
if (it != shape_info_.end()) {
shape_info_[op.output(2)] = it->second;
}
}
}
void BoundShapeInferencer::InferSparseLengthsSum(const OperatorDef& op) {
CAFFE_ENFORCE_GE(
op.input_size(), 3, op.type(), " must have at least 3 inputs");
const auto it = shape_info_.find(op.input(0));
CAFFE_ENFORCE(
it != shape_info_.end(),
"Shape of DATA input of SparseLengthsSum ",
op.input(0),
" needs to be presented");
CAFFE_ENFORCE_EQ(
it->second.shape.dims().size(),
2,
"DATA input ",
op.input(0),
"needs to be 2D");
const int weight =
(op.type() == "SparseLengthsWeightedSum" ||
op.type() == "SparseLengthsWeightedSumFused8BitRowwise" ||
op.type() == "SparseLengthsWeightedSumFused4BitRowwise" ||
op.type() == "SparseLengthsWeightedSum4BitRowwiseSparse" ||
op.type() == "SparseLengthsWeightedSum8BitRowwiseSparse")
? 1
: 0;
const bool is4bit =
(op.type() == "SparseLengthsSumFused4BitRowwise" ||
op.type() == "SparseLengthsWeightedSumFused4BitRowwise" ||
op.type() == "SparseLengthsWeightedSum4BitRowwiseSparse" ||
op.type() == "SparseLengthsSum4BitRowwiseSparse");
// NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores,clang-diagnostic-unused-variable)
const bool isSparse =
(op.type() == "SparseLengthsSum4BitRowwiseSparse" ||
op.type() == "SparseLengthsWeightedSum4BitRowwiseSparse" ||
op.type() == "SparseLengthsSum8BitRowwiseSparse" ||
op.type() == "SparseLengthsWeightedSum8BitRowwiseSparse");
if (weight) {
CAFFE_ENFORCE_GE(
op.input_size(),
4,
"SparseLengthsWeightedSum(Sparse) must have 4 or 5 inputs");
SetTensorBoundShapeIfNotExist(
op.input(weight),
{TensorBoundShape_DimType_BATCH_OF_FEATURE_MAX_DEFAULT},
{spec_.max_batch_size * spec_.max_seq_size},
TensorProto_DataType_FLOAT,
false);
}
// Bound inputs
SetTensorBoundShapeIfNotExist(
op.input(1 + weight),
{TensorBoundShape_DimType_BATCH_OF_FEATURE_MAX_DEFAULT},
{spec_.max_batch_size * spec_.max_seq_size},
TensorProto_DataType_INT64,
false);
CheckAndSetTensorBoundShape(
op.input(2 + weight),
{TensorBoundShape_DimType_BATCH},
{spec_.max_batch_size},
TensorProto_DataType_INT32,
false);
// Infer output
CAFFE_ENFORCE_EQ(it->second.shape.dims_size(), 2);
current_dim_type_ = TensorBoundShape_DimType_BATCH;
current_max_batch_size_ = spec_.max_batch_size;
auto output_dim1 = it->second.shape.dims(1);
// If the op is SparseLengthsSumFused8BitRowwise, we need to extract 4 bytes
// for fp32 scale and 4 bytes for fp32 bias (https://fburl.com/t6dp9tsc)
if (op.type() == "SparseLengthsSumFused8BitRowwise" ||
op.type() == "SparseLengthsWeightedSumFused8BitRowwise" ||
op.type() == "SparseLengthsSum8BitRowwiseSparse" ||
op.type() == "SparseLengthsWeightedSum8BitRowwiseSparse") {
output_dim1 -= 8;
}
// If the op is SparseLengthsSumFused4BitRowwise, we need to extract 2 bytes
// for fp16 scale and 2 bytes for fp16 bias. Then we double it because we
// pack 2 entries into 1 uint8 element of the embedding table.
// (https://fburl.com/diffusion/stmsyz74)
else if (is4bit) {
output_dim1 -= 4;
output_dim1 *= 2;
}
CAFFE_ENFORCE_GE(
it->second.getDimType().size(), 2, "input(0): ", op.input(0));
CheckAndSetTensorBoundShape(
op.output(0),
{TensorBoundShape_DimType_BATCH, it->second.getDimType(1)},
{spec_.max_batch_size, output_dim1},
TensorProto_DataType_FLOAT,
false);
}
void BoundShapeInferencer::InferShape(const OperatorDef& op) {
InferCommonOp(op);
// old_shape should be a constant
if (op.output_size() > 0 && shape_info_.count(op.output(0))) {
shape_info_[op.output(0)].setDimType(0, TensorBoundShape_DimType_CONSTANT);
}
}
void BoundShapeInferencer::InferReshape(const OperatorDef& op) {
InferCommonOp(op);
// old_shape should be a constant
if (op.output_size() > 1 && shape_info_.count(op.output(1))) {
shape_info_[op.output(1)].setDimType(0, TensorBoundShape_DimType_CONSTANT);
}
}
void BoundShapeInferencer::InferInt8QuantizeInput(const OperatorDef& op) {
if (op.output_size() == 0 || op.input_size() == 0) {
return;
}
if (shape_info_.find(op.input(0)) != shape_info_.end()) {
return;
}
const auto it = shape_info_.find(op.output(0));
if (it == shape_info_.end()) {
return;
}
auto input_shape_info = it->second;
input_shape_info.is_quantized = false;
input_shape_info.q_info.offset.clear();
input_shape_info.q_info.scale.clear();
input_shape_info.shape.set_data_type(TensorProto_DataType_FLOAT);
shape_info_.emplace(op.input(0), std::move(input_shape_info));
}
void BoundShapeInferencer::InferElementwiseOpInput(const OperatorDef& op) {
if (shape_info_.find(op.input(0)) != shape_info_.end() &&
shape_info_.find(op.input(1)) != shape_info_.end()) {
return;
}
const auto it = shape_info_.find(op.output(0));
if (it == shape_info_.end()) {
return;
}
ArgumentHelper helper(op);
const bool broadcast = helper.GetSingleArgument<bool>("broadcast", false);
if (broadcast) {
auto input_shape_info = it->second;
shape_info_.emplace(op.input(0), input_shape_info);
// From definition of Add/Mul:
// "When broadcasting is specified,
// the second tensor can either be of size 1 (a scalar value),
// or having its shape as a contiguous subset of the first tensors shape."
// shape info of second input is always subset of first input.
// Set bound shape of second input same as first input.
shape_info_.emplace(op.input(1), std::move(input_shape_info));
}
}
void BoundShapeInferencer::InferConcatInputs(const OperatorDef& op) {
ArgumentHelper helper(op);
const auto add_axis = helper.GetSingleArgument<int32_t>("add_axis", 0);
// NOLINTNEXTLINE(bugprone-branch-clone)
if (add_axis) {
return;
} else if (op.output_size() == 0 || !shape_info_.count(op.output(0))) {
return;
}
const auto axis = helper.HasArgument("axis")
? helper.GetSingleArgument<int32_t>("axis", -1)
: GetDimFromOrderString(
helper.GetSingleArgument<string>("order", "NCHW"));
const auto& shape_info = shape_info_.at(op.output(0));
int output_channel = shape_info.shape.dims(axis);
int missing_shape_infos = 0;
int channel_acc = 0;
std::string input_to_infer;
for (const auto& i : op.input()) {
const auto it = shape_info_.find(i);
if (it != shape_info_.end()) {
const auto& current_input_shape = it->second;
if (axis < current_input_shape.shape.dims_size()) {
channel_acc += current_input_shape.shape.dims(axis);
} else {
LOG(INFO) << "Mismatched input dim along axis " << axis
<< ". We cannot infer missing input shape for Concat";
return;
}
} else if (missing_shape_infos) {
LOG(INFO) << "More than one missing shapes, previous one: "
<< input_to_infer;
// We can only infer one missing input shape info
return;
} else {
++missing_shape_infos;
input_to_infer = i;
}
}
if (missing_shape_infos && !input_to_infer.empty()) {
auto input_shape_info = shape_info;
input_shape_info.shape.set_dims(axis, output_channel - channel_acc);
shape_info_.emplace(input_to_infer, std::move(input_shape_info));
// Infer the shape of the second output of Concat
InferCommonOp(op);
if (op.output_size() > 1 && shape_info_.count(op.output(1))) {
shape_info_[op.output(1)].setDimType(
0, TensorBoundShape_DimType_CONSTANT);
}
}
}
void BoundShapeInferencer::InferElementwiseOp(const OperatorDef& op) {
InferCommonOp(op);
if (shape_info_.find(op.output(0)) != shape_info_.end() &&
shape_info_.find(op.input(1)) != shape_info_.end()) {
return;
}
const auto it = shape_info_.find(op.input(0));
if (it == shape_info_.end()) {
return;
}
ArgumentHelper helper(op);
const bool broadcast = helper.GetSingleArgument<bool>("broadcast", false);
if (broadcast) {
auto input_shape_info = it->second;
shape_info_.emplace(op.input(1), input_shape_info);
shape_info_.emplace(op.output(0), std::move(input_shape_info));
}
}
// For concat net, if some inputs are missing and we have add_axis argument,
// it means that all the inputs should be of the same dimension. In this case,
// we can infer the shape of the missing inputs
void BoundShapeInferencer::InferConcat(const OperatorDef& op) {
ArgumentHelper helper(op);
auto add_axis = helper.GetSingleArgument<int32_t>("add_axis", 0);
if (add_axis) {
ShapeInfo* ref_input_shape = nullptr;
std::string ref_name;
std::unordered_set<std::string> missing_shape_inputs;
for (const auto& i : op.input()) {
const auto it = shape_info_.find(i);
if (it != shape_info_.end()) {
const auto& current_input_shape = it->second;
if (ref_input_shape) {
CAFFE_ENFORCE_EQ(
ref_input_shape->shape.dims_size(),
current_input_shape.shape.dims_size(),
ref_name,
" vs ",
i);
for (int j = 0; j < ref_input_shape->shape.dims_size(); ++j) {
CAFFE_ENFORCE_EQ(
ref_input_shape->shape.dims(j),
current_input_shape.shape.dims(j),
"Mismatched size on dim ",
j,
" between ",
ref_name,
" and ",
i,
" (",
ref_input_shape->shape.dims(j),
" vs ",
current_input_shape.shape.dims(j),
")");
}
} else {
ref_input_shape = &it->second;
ref_name = i;
}
} else {
missing_shape_inputs.emplace(i);
}
}
if (ref_input_shape) {
current_dim_type_ = ref_input_shape->getDimType(0);
for (const auto& i : missing_shape_inputs) {
shape_info_.emplace(i, *ref_input_shape);
}
}
}
InferCommonOp(op);
// split_info should be a constant
if (op.output_size() > 1 && shape_info_.count(op.output(1))) {
shape_info_[op.output(1)].setDimType(0, TensorBoundShape_DimType_CONSTANT);
}
}
void BoundShapeInferencer::InferFC(const OperatorDef& op) {
CAFFE_ENFORCE(
op.input_size() == 3 || op.input_size() == 4,
"FC has to have 3 or 4 inputs");
const auto w_it = shape_info_.find(op.input(1));
CAFFE_ENFORCE(
w_it != shape_info_.end(),
"Shape of WEIGHT input of FC ",
op.input(1),
" needs to be presented");
const ShapeInfo& w_shape_info = w_it->second;
const auto b_it = shape_info_.find(op.input(2));
CAFFE_ENFORCE(
b_it != shape_info_.end(),
"Shape of BIAS input of FC ",
op.input(2),
" needs to be presented");
const ShapeInfo& b_shape_info = b_it->second;
bool fp16 = (op.type() == "FbFCPacked");
bool int8_fc = (op.type() == "Int8FC" || op.engine() == "DNNLOWP");
float scale = 1;
int offset = 0;
auto x_it = shape_info_.find(op.input(0));
if (x_it == shape_info_.end()) {
// We don't have a hint at the x input we try to deduce it from weight
// shape
ArgumentHelper helper(op);
auto axis = helper.GetSingleArgument<int32_t>("axis", 1);
auto axis_w = helper.GetSingleArgument<int32_t>("axis_w", 1);
const TensorShape w_shape = w_shape_info.shape;
bool transposed = (op.type() == "FCTransposed") ? true : false;
const int canonical_axis_w =
canonical_axis_index_(axis_w, w_shape.dims().size());
const int64_t K = transposed ? SizeToDim(w_shape, canonical_axis_w)
: SizeFromDim(w_shape, canonical_axis_w);
std::vector<int64_t> dims;
std::vector<TensorBoundShape::DimType> dimTypes;
for (int i = 0; i < axis - 1; ++i) {
dims.push_back(1);
dimTypes.push_back(TensorBoundShape_DimType_CONSTANT);
}
dims.push_back(spec_.max_batch_size);
dimTypes.push_back(TensorBoundShape_DimType_BATCH);
dims.push_back(K);
dimTypes.push_back(TensorBoundShape_DimType_CONSTANT);
current_dim_type_ = TensorBoundShape_DimType_BATCH;
current_max_batch_size_ = spec_.max_batch_size;
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
TensorProto::DataType w_data_type;
if (fp16) {
w_data_type = TensorProto_DataType_FLOAT;
} else if (int8_fc) {
w_data_type = TensorProto_DataType_UINT8;
} else {
w_data_type = w_shape.data_type();
}
if (int8_fc) {
scale = helper.GetSingleArgument<float>("Y_scale", 1);
offset = helper.GetSingleArgument<int>("Y_zero_point", 0);
}
// Note: for FbFCPacked, weight is fp16 but activations are in fp32
CheckAndSetTensorBoundShape(
op.input(0),
dimTypes,
dims,
w_data_type,
int8_fc ? true : false,
false,
scale,
offset);
} else {
ShapeInfo& x_shape_info = x_it->second;
if (x_shape_info.getDimType(0) == TensorBoundShape_DimType_UNKNOWN) {
CAFFE_ENFORCE_GE(x_shape_info.shape.dims_size(), 1);
x_shape_info.shape.set_dims(0, spec_.max_batch_size);
x_shape_info.setDimType(0, TensorBoundShape_DimType_BATCH);
}
}
// Standard shape inference for outputs
std::vector<TensorShape> input_shapes{
shape_info_[op.input(0)].shape, w_shape_info.shape, b_shape_info.shape};
if (op.input_size() == 4) {
const auto quant_param_it = shape_info_.find(op.input(3));
CAFFE_ENFORCE(
quant_param_it != shape_info_.end(),
"Shape of quant_param input of FC ",
op.input(3),
" needs to be presented");
const ShapeInfo& quant_param_shape_info = quant_param_it->second;
input_shapes.emplace_back(quant_param_shape_info.shape);
}
std::vector<TensorShape> output_shapes = InferOutput(op, input_shapes);
CAFFE_ENFORCE_EQ(output_shapes.size(), 1);
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
TensorProto::DataType output_data_type;
if (fp16) {
output_data_type = TensorProto_DataType_FLOAT;
} else if (int8_fc) {
output_data_type = TensorProto_DataType_UINT8;
} else {
output_data_type = output_shapes.front().data_type();
}
if (int8_fc) {
ArgumentHelper helper(op);
scale = helper.GetSingleArgument<float>("Y_scale", 1);
offset = helper.GetSingleArgument<int>("Y_zero_point", 0);
}
CheckAndSetTensorBoundShape(
op.output(0),
setDimTypeWithFirst(
TensorBoundShape_DimType_BATCH, output_shapes.front().dims().size()),
ConvertToVec(output_shapes[0].dims()),
output_data_type,
int8_fc ? true : false,
false,
scale,
offset);
}
// Infers shapes for operators which are used to transform non-quantized
// operators (e.g. SparseLengthsSum) into quantized operators (e.g.
// SparseLengthsSumFused8BitRowwise) at model training time. If we're doing
// quantization for CONSTANTS (eg. embedding tables), current_dim_type_ should
// be set to CONSTANT.
void BoundShapeInferencer::InferQuantizationTransformation(
const OperatorDef& op) {
bool all_constant = true;
for (const auto& input : op.input()) {
const auto it = shape_info_.find(input);
if (it == shape_info_.end() ||
it->second.getDimType(0) != TensorBoundShape_DimType_CONSTANT) {
all_constant = false;
break;
}
}
const auto previous_dim_type = current_dim_type_;
if (all_constant) {
current_dim_type_ = TensorBoundShape_DimType_CONSTANT;
}
InferCommonOp(op);
current_dim_type_ = previous_dim_type;
}
void BoundShapeInferencer::InferUnPackRecords(const OperatorDef& op) {
std::vector<TensorShape> input_shapes;
for (const auto& input : op.input()) {
const auto it = shape_info_.find(input);
if (it == shape_info_.end()) {
LOG(WARNING) << "Cannot find shape info for " << input << ". Skipping "
<< op.type();
return;
}
input_shapes.emplace_back(it->second.shape);
}
std::vector<TensorShape> output_shapes;
ArgumentHelper helper(op);
std::vector<std::string> fields =
helper.GetRepeatedArgument<std::string>("fields");
const int num_tensors = fields.size();
if (spec_.max_batch_size == 1 && num_tensors == 1 &&
input_shapes[0].dims_size() != 1) {
// Special case of single tensor input
output_shapes.push_back(input_shapes[0]);
} else {
// Input is packed
TensorShape oshape;
oshape.add_dims(spec_.max_batch_size);
oshape.add_dims(spec_.num_embeddings);
oshape.add_dims(spec_.embedding_length);
// TODO: how to do this more intelligently
oshape.set_data_type(TensorProto::FLOAT);
for (int i = 0; i < num_tensors; i++) {
output_shapes.push_back(oshape);
}
}
for (auto i: c10::irange(output_shapes.size())) {
const auto& shape = output_shapes[i];
CheckAndSetTensorBoundShape(
op.output(i),
setDimTypeWithFirst(current_dim_type_, shape.dims().size()),
ConvertToVec(shape.dims()),
output_shapes[i].data_type(),
false);
}
}
void BoundShapeInferencer::InferTile(const OperatorDef& op) {
if (op.input_size() > 1) {
LOG(WARNING) << "Cannot infer shape for Tile when axis and tils are inputs";
return;
}
const auto it = shape_info_.find(op.input(0));
if (it == shape_info_.end()) {
LOG(WARNING) << "Cannot find shape info for " << op.input(0)
<< ". Skipping " << op.type();
return;
}
ArgumentHelper helper(op);
const std::int32_t tiles = helper.GetSingleArgument<std::int32_t>("tiles", 1);
std::int32_t axis = helper.GetSingleArgument<std::int32_t>("axis", 0);
bool dynamic = helper.GetSingleArgument<bool>("dynamic", false);
auto ndims = it->second.shape.dims_size();
const auto canonical_axis = canonical_axis_index_(axis, ndims);
auto shape = it->second.shape;
shape.set_dims(
canonical_axis,
shape.dims(canonical_axis) * (dynamic ? spec_.max_batch_size : tiles));
CheckAndSetTensorBoundShape(
op.output(0),
setDimTypeWithFirst(TensorBoundShape_DimType_BATCH, ndims),
ConvertToVec(shape.dims()),
it->second.shape.data_type(),
false);
}
void BoundShapeInferencer::InferSoftmax(const OperatorDef& op) {
CAFFE_ENFORCE_EQ(op.input_size(), 1, op.type(), " must have 1 input");
CAFFE_ENFORCE_EQ(op.output_size(), 1, op.type(), " must have 1 output");
auto it = shape_info_.find(op.input(0));
if (it == shape_info_.end()) {
LOG(WARNING) << "Didn't find shape info for the input of Softmax, skipping";
return;
}
CheckAndSetTensorBoundShape(
op.output(0),
setDimTypeWithFirst(it->second.getDimType(0), it->second.shape.dims_size()),
ConvertToVec(it->second.shape.dims()),
it->second.shape.data_type(),
false);
}
void BoundShapeInferencer::InferBucketize(const OperatorDef& op) {
CAFFE_ENFORCE_EQ(op.input_size(), 1, op.type(), " must have 1 input");
CAFFE_ENFORCE_EQ(op.output_size(), 1, op.type(), " must have 1 output");
auto it = shape_info_.find(op.input(0));
if (it == shape_info_.end()) {
LOG(WARNING) << "Didn't find shape info for the input of Bucketize, skipping";
return;
}
InferCommonOp(op);
auto it_output = shape_info_.find(op.output(0));
if (it_output != shape_info_.end()) {
it_output->second.setDimType(it->second.getDimType());
}
}
void BoundShapeInferencer::InferLpNorm(const OperatorDef& op) {
CAFFE_ENFORCE_EQ(op.output_size(), 1, op.type(), " must have 1 output");
InferCommonOp(op);
auto it = shape_info_.find(op.output(0));
if (it != shape_info_.end()) {
it->second.setDimType(std::vector<TensorBoundShape::DimType>(
it->second.shape.dims_size(), TensorBoundShape_DimType_CONSTANT));
}
}
void BoundShapeInferencer::InferTranspose(const OperatorDef& op) {
CAFFE_ENFORCE_EQ(op.input_size(), 1, op.type(), " must have 1 input");
CAFFE_ENFORCE_EQ(op.output_size(), 1, op.type(), " must have 1 output");
auto it = shape_info_.find(op.input(0));
if (it == shape_info_.end()) {
LOG(WARNING) << "Didn't find shape info for the input of Transpose";
return;
}
ArgumentHelper helper(op);
std::vector<int> axes = helper.GetRepeatedArgument<int>("axes");
if (axes.empty()) {
// In this case it should be existing dims in reverse order
for (int i = it->second.shape.dims().size() - 1; i >= 0; --i) {
axes.push_back(i);
}
} else {
CAFFE_ENFORCE_EQ(
axes.size(),
it->second.shape.dims().size(),
op.type(),
" must specify all axes in Transpose."
);
auto valid_axes =
std::all_of(axes.begin(), axes.end(), [numDims = it->second.shape.dims().size()](int& axis) {
return axis >= 0 && axis < numDims;
});
CAFFE_ENFORCE(valid_axes, "Invalid axes were provided.");
}
std::vector<TensorBoundShape::DimType> dimTypes;
std::vector<int64_t> dims;
for (auto axis : axes) {
dimTypes.push_back(it->second.getDimType(axis));
dims.push_back(it->second.shape.dims()[axis]);
}
CheckAndSetTensorBoundShape(
op.output(0),
dimTypes,
dims,
it->second.shape.data_type(),
false);
}
void BoundShapeInferencer::InferCommonOp(
const OperatorDef& op,
const OpSchema* schema,
bool bypass_input_check,
bool in_place_op) {
// First, we need to check that all the input shape/types are already
// presented
try {
const static std::unordered_set<std::string>
types_with_independent_output_shape = {
"Int8GenQuantParams",
"Int8QuantSchemeBlobFill",
"ComputeEqualizationScale",
"Int8GenQuantParamsMinMax"};
std::vector<TensorShape> input_shapes;
for (const auto& input : op.input()) {
const auto it = shape_info_.find(input);
if (it == shape_info_.end() &&
!types_with_independent_output_shape.count(op.type()) &&
!bypass_input_check) {
LOG(WARNING) << "Cannot find shape info for " << input << ". Skipping "
<< op.type();
return;
}
if (types_with_independent_output_shape.count(op.type()) ||
(bypass_input_check && it == shape_info_.end())) {
TensorShape input_shape;
input_shapes.emplace_back(std::move(input_shape));
} else {
input_shapes.emplace_back(it->second.shape);
}
}
// Schema can be pre-defined.
// If not predefined, get the schema for the op.
if (schema == nullptr) {
schema = OpSchemaRegistry::Schema(op.type());
}
CAFFE_ENFORCE(schema);
std::vector<TensorShape> output_shapes;
output_shapes = schema->InferTensor(op, input_shapes);
bool is_quantized = !(op.type().compare(0, 4, "Int8")) &&
(op.type() != "Int8Dequantize") &&
(op.type() != "Int8QuantSchemeBlobFill") &&
(op.type() != "ComputeEqualizationScale") &&
(op.type() != "Int8GenQuantParams") &&
(op.type() != "Int8GenQuantParamsMinMax");
float scale = 1;
int offset = 0;
TensorProto::DataType infered_data_type = TensorProto::UNDEFINED;
if (is_quantized) {
const static std::map<std::string, int> type_info_from_input = {
{"Int8Quantize", -1}, // Force this op's output to be uint8
{"Int8FCPackWeight", 0},
{"Int8ConvPackWeight", 0},
{"Int8ConvRelu", 1},
{"Int8MaxPool", 0},
{"Int8AveragePool", 0},
{"Int8FC", 1},
{"Int8Conv", 1},
{"Int8SumRelu", 0},
{"Int8Relu", 0}};
CAFFE_ENFORCE(
type_info_from_input.find(op.type()) != type_info_from_input.end(),
"Undefined quantized output data type, add it into type_info_from_input");
int target = type_info_from_input.find(op.type())->second;
if (target == -1) {
infered_data_type = TensorProto::UINT8;
} else {
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
CAFFE_ENFORCE(target < input_shapes.size());
infered_data_type = input_shapes[target].data_type();
}
// Extract output scale and offset
ArgumentHelper helper(op);
scale = helper.GetSingleArgument<float>("Y_scale", 1);
offset = helper.GetSingleArgument<int>("Y_zero_point", 0);
} else if (op.type() == "Int8Dequantize") {
infered_data_type = TensorProto::FLOAT;
}
for (auto i: c10::irange(output_shapes.size())) {
const auto& shape = output_shapes[i];
if (shape.unknown_shape()) {
continue;
}
auto tmp_dtype = infered_data_type;
if (infered_data_type == TensorProto::UNDEFINED) {
infered_data_type = shape.data_type();
}
CheckAndSetTensorBoundShape(
op.output(i),
setDimTypeWithFirst(current_dim_type_, shape.dims().size()),
ConvertToVec(shape.dims()),
infered_data_type,
is_quantized,
false,
scale,
offset,
in_place_op);
infered_data_type = tmp_dtype;
}
} catch (const caffe2::EnforceNotMet& e) {
LOG(ERROR) << "Enforce not met while inferring shapes for " << op.type()
<< ": " << e.what() << " first output: " << op.output(0);
} catch (const std::exception& e) {
LOG(WARNING) << "Caught exception while inferring shapes for " << op.type()
<< ": " << e.what() << " first output: " << op.output(0);
}
}
std::shared_ptr<BoundShapeInferencerBase> getBoundShapeInferencer(
const BoundShapeSpec& spec) {
return std::make_shared<BoundShapeInferencer>(spec);
}
C10_DEFINE_SHARED_REGISTRY(
BoundShapeInferencerRegistry,
BoundShapeInferencerBase,
const BoundShapeSpec&);
C10_REGISTER_CREATOR(
BoundShapeInferencerRegistry,
C10,
getBoundShapeInferencer);
} // namespace caffe2