| #include "bound_shape_inferencer.h" |
| #include "caffe2/core/operator_schema.h" |
| #include "caffe2/core/tensor_impl.h" |
| #include "caffe2/core/types.h" |
| #include "caffe2/utils/proto_utils.h" |
| #include "caffe2/utils/string_utils.h" |
| |
| #include <c10/util/irange.h> |
| |
| namespace caffe2 { |
| |
| namespace { |
| std::vector<int64_t> ConvertToVec( |
| const ::google::protobuf::RepeatedField<::google::protobuf::int64>& in) { |
| std::vector<int64_t> out; |
| out.reserve(in.size()); |
| for (const auto d : in) { |
| out.push_back(d); |
| } |
| return out; |
| } |
| |
| std::vector<TensorBoundShape::DimType> setDimTypeWithFirst( |
| TensorBoundShape::DimType firstDimType, |
| uint32_t n) { |
| std::vector<TensorBoundShape::DimType> dimTypes( |
| n, TensorBoundShape_DimType_CONSTANT); |
| if (dimTypes.size() > 0) { |
| dimTypes[0] = firstDimType; |
| } |
| return dimTypes; |
| } |
| |
| int64_t SizeFromDim(const TensorShape& shape, int axis) { |
| int64_t r = 1; |
| for (int i = axis; i < shape.dims_size(); ++i) { |
| r *= shape.dims(i); |
| } |
| return r; |
| } |
| |
| int64_t SizeToDim(const TensorShape& shape, int axis) { |
| CAFFE_ENFORCE_LE(axis, shape.dims_size()); |
| int64_t r = 1; |
| for (int i = 0; i < axis; ++i) { |
| r *= shape.dims(i); |
| } |
| return r; |
| } |
| |
| // Check precedence between two vector of ensorBoundShape::DimType. |
| // If return 1: right take precedence over left |
| // If return -1: left take precedence over right |
| // If return 0: no precedence between left and right |
| int takePrecedenceOver( |
| const std::vector<TensorBoundShape::DimType>& left, |
| const std::vector<TensorBoundShape::DimType>& right) { |
| const static std::vector< |
| std::tuple<TensorBoundShape::DimType, TensorBoundShape::DimType, int>> |
| precedence = { |
| std::tuple<TensorBoundShape::DimType, TensorBoundShape::DimType, int>{ |
| TensorBoundShape_DimType_FEATURE_MAX_DEFAULT, |
| TensorBoundShape_DimType_FEATURE_MAX, |
| 1}, |
| std::tuple<TensorBoundShape::DimType, TensorBoundShape::DimType, int>{ |
| TensorBoundShape_DimType_FEATURE_MAX, |
| TensorBoundShape_DimType_FEATURE_MAX_DEFAULT, |
| -1}, |
| std::tuple<TensorBoundShape::DimType, TensorBoundShape::DimType, int>{ |
| TensorBoundShape_DimType_BATCH_OF_FEATURE_MAX_DEFAULT, |
| TensorBoundShape_DimType_BATCH_OF_FEATURE_MAX, |
| 1}, |
| std::tuple<TensorBoundShape::DimType, TensorBoundShape::DimType, int>{ |
| TensorBoundShape_DimType_BATCH_OF_FEATURE_MAX, |
| TensorBoundShape_DimType_BATCH_OF_FEATURE_MAX_DEFAULT, |
| -1}}; |
| |
| // If left is empty and right is not, right take precedence |
| if (left.size() == 0 || right.size() == 0) { |
| return right.size() > left.size(); |
| } |
| for (auto i: c10::irange(right.size())) { |
| // If right.size > left.size and left[0:i] == right[0:i], |
| // right take precedence |
| // NOLINTNEXTLINE(clang-diagnostic-sign-compare) |
| if (i >= left.size()) { |
| return 1; |
| } |
| auto l = left[i]; |
| auto r = right[i]; |
| if (l == TensorBoundShape_DimType_UNKNOWN && |
| r != TensorBoundShape_DimType_UNKNOWN) { |
| return 1; |
| } |
| if (r == TensorBoundShape_DimType_UNKNOWN && |
| l != TensorBoundShape_DimType_UNKNOWN) { |
| return -1; |
| } |
| for (auto& t : precedence) { |
| if (l == std::get<0>(t) && r == std::get<1>(t)) { |
| return std::get<2>(t); |
| } |
| } |
| if (l != r) { |
| return 0; |
| } |
| } |
| return 0; |
| } |
| } // namespace |
| |
| void BoundShapeInferencer::EnsureShapeNames( |
| std::unordered_map<std::string, ShapeInfo>* info) const { |
| for (auto& kv : *info) { |
| kv.second.shape.set_name(kv.first); |
| } |
| } |
| |
| void BoundShapeInferencer::Initialize( |
| const ShapeInfoMap& info, |
| bool extract_feature_len) { |
| shape_info_ = info; |
| extract_feature_len_ = extract_feature_len; |
| } |
| |
| void BoundShapeInferencer::InferOps( |
| const OperatorDef& op, |
| caffe2::Workspace* /* ws */) { |
| const static std::unordered_set<std::string> kSlsOps = { |
| "SparseLengthsSum", |
| "SparseLengthsSumFused8BitRowwise", |
| "SparseLengthsWeightedSum", |
| "SparseLengthsWeightedSumFused8BitRowwise", |
| "SparseLengthsSumFused4BitRowwise", |
| "SparseLengthsWeightedSumFused4BitRowwise", |
| "SparseLengthsSum4BitRowwiseSparse", |
| "SparseLengthsWeightedSum4BitRowwiseSparse", |
| "SparseLengthsSum8BitRowwiseSparse", |
| "SparseLengthsWeightedSum8BitRowwiseSparse"}; |
| if (kSlsOps.count(op.type())) { |
| InferSparseLengthsSum(op); |
| } else if (op.type() == "Add" || op.type() == "Mul") { |
| InferElementwiseOp(op); |
| } else if ( |
| op.type() == "FC" || op.type() == "FCTransposed" || |
| op.type() == "FbFCPacked" || op.type() == "Int8FC") { |
| InferFC(op); |
| } else if (op.type() == "Concat") { |
| InferConcat(op); |
| } else if (op.type() == "Reshape") { |
| InferReshape(op); |
| } else if (op.type() == "LengthsRangeFill") { |
| InferLengthsRangeFill(op); |
| } else if ( |
| (caffe2::StartsWith(op.type(), "GivenTensor") && |
| caffe2::EndsWith(op.type(), "Fill")) || |
| op.type() == "ConstantFill" || op.type() == "Int8GivenTensorFill" || |
| op.type() == "Int8GivenIntTensorFill") { |
| InferGivenTensorFill(op); |
| } else if (op.type() == "Shape") { |
| InferShape(op); |
| } else if ( |
| op.type() == "FloatToFused8BitRowwiseQuantized" || |
| op.type() == "HalfFloatToFused8BitRowwiseQuantized" || |
| op.type() == "FloatToFused4BitRowwiseQuantized" || |
| op.type() == "HalfToFused4BitRowwiseQuantized" || |
| op.type() == "FloatToHalf" || op.type() == "FbGemmPack") { |
| InferQuantizationTransformation(op); |
| } else if (op.type() == "UnPackRecords") { |
| InferUnPackRecords(op); |
| } else if (op.type() == "Tile") { |
| InferTile(op); |
| } else if (op.type() == "SparseLengthsSumSparseLookup") { |
| InferSparseLengthsSumSparseLookup(op); |
| } else if (op.type() == "Softmax") { |
| InferSoftmax(op); |
| } else if (op.type() == "LpNorm") { |
| InferLpNorm(op); |
| } else if (op.type() == "Transpose") { |
| InferTranspose(op); |
| } else if (op.type() == "Bucketize") { |
| InferBucketize(op); |
| } else if (op.type() == "Clip") { |
| InferClip(op); |
| } else if (op.type() == "Div") { |
| InferDiv(op); |
| } else if (op.type() == "Mean") { |
| InferMean(op); |
| } else { |
| InferCommonOp(op); |
| } |
| } |
| |
| void BoundShapeInferencer::InferBoundShapeAndType( |
| const NetDef& net, |
| const ShapeInfoMap& info, |
| caffe2::Workspace* ws, |
| bool extract_feature_len) { |
| const static std::unordered_set<std::string> unsupported{}; |
| Initialize(info, extract_feature_len); |
| |
| bool inferFinished = false; |
| |
| auto old_shape_num = shape_info_.size(); |
| while (!inferFinished) { |
| for (const auto& op : net.op()) { |
| VLOG(1) << op.type(); |
| if (unsupported.count(op.type())) { |
| continue; |
| } |
| InferOps(op, ws); |
| } |
| |
| // Doing a reverse pass to infer the input shapes if applicable |
| for (int i = net.op_size() - 1; i >= 0; --i) { |
| const auto& op = net.op(i); |
| if (op.type() == "Concat") { |
| InferConcatInputs(op); |
| } else if (op.type() == "Int8Quantize") { |
| InferInt8QuantizeInput(op); |
| } else if (op.type() == "Mul" || op.type() == "Add") { |
| InferElementwiseOpInput(op); |
| } |
| } |
| inferFinished = old_shape_num == shape_info_.size(); |
| VLOG(1) << "old shape info num: " << old_shape_num |
| << ", new shape info num: " << shape_info_.size(); |
| old_shape_num = shape_info_.size(); |
| } |
| |
| // Make sure shape has name |
| EnsureShapeNames(&shape_info_); |
| } |
| |
| TensorShape& BoundShapeInferencer::SetTensorBoundShapeIfNotExist( |
| const std::string& name, |
| const std::vector<TensorBoundShape::DimType>& t, |
| std::vector<int64_t> bound_dims, |
| TensorProto::DataType type, |
| bool is_quantized) { |
| return CheckAndSetTensorBoundShape( |
| name, t, bound_dims, type, is_quantized, true); |
| } |
| |
| // if allow_existing_shape is true, we use existing shape directly |
| // and not enforce shape to be equal to bound_dims |
| // else we enforce them to be equal |
| TensorShape& BoundShapeInferencer::CheckAndSetTensorBoundShape( |
| const std::string& name, |
| const std::vector<TensorBoundShape::DimType>& t, |
| std::vector<int64_t> bound_dims, |
| TensorProto::DataType type, |
| bool is_quantized, |
| bool allow_existing_shape, |
| float scale, |
| int offset, |
| bool in_place_op) { |
| auto rt = shape_info_.emplace(name, ShapeInfo()); |
| ShapeInfo& shape_info = rt.first->second; |
| TensorShape& shape = shape_info.shape; |
| if (shape_info.getShapeIsFinal()) { |
| return shape; |
| } |
| if (is_quantized) { |
| shape_info.is_quantized = true; |
| shape_info.q_info.scale.clear(); |
| shape_info.q_info.scale.push_back(scale); |
| shape_info.q_info.offset.clear(); |
| shape_info.q_info.offset.push_back(offset); |
| shape_info.q_info.axis = 1; |
| } |
| // If the shape information exists in shape_info_ already and we want to |
| // compare old/new shapes |
| if (!rt.second && !in_place_op) { |
| // Check dim size consistency |
| CAFFE_ENFORCE_EQ( |
| shape.dims_size(), |
| bound_dims.size(), |
| "Dim size inconsistency found in tensor ", |
| name); |
| // Get precedence of previous shape vs new shape |
| int precedence = 0; |
| if (!shape_info.dimTypeIsSet()) { |
| precedence = 1; |
| } else { |
| precedence = takePrecedenceOver(shape_info.getDimType(), t); |
| } |
| // If precedence == 0: check whether previous shape == new shape |
| // If precedence == 1, override shape with new value |
| // If precedence == -1, previous shape takes precedence and |
| // new value is skipped. |
| if (precedence == 1) { |
| shape_info.setDimType(t); |
| for (auto i: c10::irange(bound_dims.size())) { |
| shape.set_dims(i, bound_dims[i]); |
| } |
| } else if (precedence == 0 && !allow_existing_shape) { |
| // Enforce previous dims and current dims are the same. |
| for (int i = 0; i < shape.dims_size(); ++i) { |
| CAFFE_ENFORCE_EQ( |
| shape.dims(i), |
| bound_dims[i], |
| "Shape inconsistency found in tensor ", |
| name, |
| " on dim ", |
| i, |
| " (", |
| shape.dims(i), |
| " vs ", |
| bound_dims[i], |
| ")"); |
| } |
| } |
| return shape; |
| } |
| // If shape information does not exist in shape_info_, |
| // or shape info is not final, |
| // set shape info according to inputs. |
| if (!shape_info.getShapeIsFinal()) { |
| shape_info.setDimType(t); |
| shape.mutable_dims()->Clear(); |
| for (const auto d : bound_dims) { |
| shape.add_dims(d); |
| } |
| shape.set_data_type(type); |
| if (in_place_op) { |
| shape_info.setShapeIsFinal(true); |
| } |
| } |
| return shape; |
| } |
| |
| std::vector<TensorShape> InferOutput( |
| const OperatorDef& op, |
| const std::vector<TensorShape>& input_shapes) { |
| const OpSchema* schema = OpSchemaRegistry::Schema(op.type()); |
| CAFFE_ENFORCE(schema); |
| return schema->InferTensor(op, input_shapes); |
| } |
| |
| void BoundShapeInferencer::InferGivenTensorFill(const OperatorDef& op) { |
| CAFFE_ENFORCE_EQ(op.output_size(), 1, op.type(), " must have 1 output"); |
| InferCommonOp(op); |
| auto it = shape_info_.find(op.output(0)); |
| if (it != shape_info_.end()) { |
| it->second.setDimType(std::vector<TensorBoundShape::DimType>( |
| it->second.shape.dims_size(), TensorBoundShape_DimType_CONSTANT)); |
| if (op.type() == "ConstantFill" && op.input_size() >= 1) { |
| auto it_input = shape_info_.find(op.input(0)); |
| if (it_input != shape_info_.end()) { |
| it->second.setDimType(it_input->second.getDimType()); |
| } |
| } |
| } |
| } |
| |
| void BoundShapeInferencer::InferLengthsRangeFill(const OperatorDef& op) { |
| CAFFE_ENFORCE_EQ(op.input_size(), 1, "LengthsRangeFill must have 1 input"); |
| CAFFE_ENFORCE_EQ(op.output_size(), 1, "LengthsRangeFill must have 1 output"); |
| // Both input and ouptut of LengthsRangeFill is int32: |
| // https://fburl.com/fhwb5666 |
| CheckAndSetTensorBoundShape( |
| op.input(0), |
| {TensorBoundShape_DimType_BATCH}, |
| {spec_.max_batch_size}, |
| TensorProto_DataType_INT32, |
| false); |
| CheckAndSetTensorBoundShape( |
| op.output(0), |
| {TensorBoundShape_DimType_BATCH_OF_FEATURE_MAX_DEFAULT}, |
| {spec_.max_batch_size * spec_.max_seq_size}, |
| TensorProto_DataType_INT32, |
| false); |
| current_dim_type_ = TensorBoundShape_DimType_BATCH_OF_FEATURE_MAX_DEFAULT; |
| } |
| |
| void BoundShapeInferencer::InferSparseLengthsSumSparseLookup( |
| const OperatorDef& op) { |
| CAFFE_ENFORCE_GT( |
| op.input_size(), |
| 2, |
| "SparseLengthsSumSparseLookup must have more than 2 input"); |
| CAFFE_ENFORCE_GT( |
| op.output_size(), |
| 1, |
| "SparseLengthsSumSparseLookup must have more than 1 output"); |
| if (shape_info_.find(op.input(2)) != shape_info_.end()) { |
| LOG(WARNING) |
| << "Shape of COMPRESSED_INDICES_MAPPING input of SparseLengthsSumSparseLookup " |
| << op.input(2) << " needs to be presented"; |
| } |
| for (int i = 0; i < 2; ++i) { |
| const auto it = shape_info_.find(op.input(i)); |
| if (it != shape_info_.end()) { |
| shape_info_[op.output(i)] = it->second; |
| } |
| } |
| // Handle the weights |
| if (op.input_size() == 4) { |
| CAFFE_ENFORCE_EQ(op.output_size(), 3); |
| const auto it = shape_info_.find(op.input(3)); |
| if (it != shape_info_.end()) { |
| shape_info_[op.output(2)] = it->second; |
| } |
| } |
| } |
| |
| void BoundShapeInferencer::InferSparseLengthsSum(const OperatorDef& op) { |
| CAFFE_ENFORCE_GE( |
| op.input_size(), 3, op.type(), " must have at least 3 inputs"); |
| const auto it = shape_info_.find(op.input(0)); |
| CAFFE_ENFORCE( |
| it != shape_info_.end(), |
| "Shape of DATA input of SparseLengthsSum ", |
| op.input(0), |
| " needs to be presented"); |
| CAFFE_ENFORCE_EQ( |
| it->second.shape.dims().size(), |
| 2, |
| "DATA input ", |
| op.input(0), |
| "needs to be 2D"); |
| |
| const int weight = |
| (op.type() == "SparseLengthsWeightedSum" || |
| op.type() == "SparseLengthsWeightedSumFused8BitRowwise" || |
| op.type() == "SparseLengthsWeightedSumFused4BitRowwise" || |
| op.type() == "SparseLengthsWeightedSum4BitRowwiseSparse" || |
| op.type() == "SparseLengthsWeightedSum8BitRowwiseSparse") |
| ? 1 |
| : 0; |
| |
| const bool is4bit = |
| (op.type() == "SparseLengthsSumFused4BitRowwise" || |
| op.type() == "SparseLengthsWeightedSumFused4BitRowwise" || |
| op.type() == "SparseLengthsWeightedSum4BitRowwiseSparse" || |
| op.type() == "SparseLengthsSum4BitRowwiseSparse"); |
| |
| if (weight) { |
| CAFFE_ENFORCE_GE( |
| op.input_size(), |
| 4, |
| "SparseLengthsWeightedSum(Sparse) must have 4 or 5 inputs"); |
| SetTensorBoundShapeIfNotExist( |
| op.input(weight), |
| {TensorBoundShape_DimType_BATCH_OF_FEATURE_MAX_DEFAULT}, |
| {spec_.max_batch_size * spec_.max_seq_size}, |
| TensorProto_DataType_FLOAT, |
| false); |
| } |
| |
| // Bound inputs |
| SetTensorBoundShapeIfNotExist( |
| op.input(1 + weight), |
| {TensorBoundShape_DimType_BATCH_OF_FEATURE_MAX_DEFAULT}, |
| {spec_.max_batch_size * spec_.max_seq_size}, |
| TensorProto_DataType_INT64, |
| false); |
| CheckAndSetTensorBoundShape( |
| op.input(2 + weight), |
| {TensorBoundShape_DimType_BATCH}, |
| {spec_.max_batch_size}, |
| TensorProto_DataType_INT32, |
| false); |
| |
| // Infer output |
| CAFFE_ENFORCE_EQ(it->second.shape.dims_size(), 2); |
| current_dim_type_ = TensorBoundShape_DimType_BATCH; |
| current_max_batch_size_ = spec_.max_batch_size; |
| auto output_dim1 = it->second.shape.dims(1); |
| // If the op is SparseLengthsSumFused8BitRowwise, we need to extract 4 bytes |
| // for fp32 scale and 4 bytes for fp32 bias (https://fburl.com/t6dp9tsc) |
| if (op.type() == "SparseLengthsSumFused8BitRowwise" || |
| op.type() == "SparseLengthsWeightedSumFused8BitRowwise" || |
| op.type() == "SparseLengthsSum8BitRowwiseSparse" || |
| op.type() == "SparseLengthsWeightedSum8BitRowwiseSparse") { |
| output_dim1 -= 8; |
| } |
| // If the op is SparseLengthsSumFused4BitRowwise, we need to extract 2 bytes |
| // for fp16 scale and 2 bytes for fp16 bias. Then we double it because we |
| // pack 2 entries into 1 uint8 element of the embedding table. |
| // (https://fburl.com/diffusion/stmsyz74) |
| else if (is4bit) { |
| output_dim1 -= 4; |
| output_dim1 *= 2; |
| } |
| CAFFE_ENFORCE_GE( |
| it->second.getDimType().size(), 2, "input(0): ", op.input(0)); |
| CheckAndSetTensorBoundShape( |
| op.output(0), |
| {TensorBoundShape_DimType_BATCH, it->second.getDimType(1)}, |
| {spec_.max_batch_size, output_dim1}, |
| TensorProto_DataType_FLOAT, |
| false); |
| } |
| |
| void BoundShapeInferencer::InferShape(const OperatorDef& op) { |
| InferCommonOp(op); |
| // old_shape should be a constant |
| if (op.output_size() > 0 && shape_info_.count(op.output(0))) { |
| shape_info_[op.output(0)].setDimType(0, TensorBoundShape_DimType_CONSTANT); |
| } |
| } |
| |
| void BoundShapeInferencer::InferReshape(const OperatorDef& op) { |
| InferCommonOp(op); |
| // old_shape should be a constant |
| if (op.output_size() > 1 && shape_info_.count(op.output(1))) { |
| shape_info_[op.output(1)].setDimType(0, TensorBoundShape_DimType_CONSTANT); |
| } |
| } |
| |
| void BoundShapeInferencer::InferInt8QuantizeInput(const OperatorDef& op) { |
| if (op.output_size() == 0 || op.input_size() == 0) { |
| return; |
| } |
| if (shape_info_.find(op.input(0)) != shape_info_.end()) { |
| return; |
| } |
| const auto it = shape_info_.find(op.output(0)); |
| if (it == shape_info_.end()) { |
| return; |
| } |
| auto input_shape_info = it->second; |
| input_shape_info.is_quantized = false; |
| input_shape_info.q_info.offset.clear(); |
| input_shape_info.q_info.scale.clear(); |
| input_shape_info.shape.set_data_type(TensorProto_DataType_FLOAT); |
| shape_info_.emplace(op.input(0), std::move(input_shape_info)); |
| } |
| |
| void BoundShapeInferencer::InferElementwiseOpInput(const OperatorDef& op) { |
| if (shape_info_.find(op.input(0)) != shape_info_.end() && |
| shape_info_.find(op.input(1)) != shape_info_.end()) { |
| return; |
| } |
| const auto it = shape_info_.find(op.output(0)); |
| if (it == shape_info_.end()) { |
| return; |
| } |
| ArgumentHelper helper(op); |
| const bool broadcast = helper.GetSingleArgument<bool>("broadcast", false); |
| if (broadcast) { |
| auto input_shape_info = it->second; |
| shape_info_.emplace(op.input(0), input_shape_info); |
| // From definition of Add/Mul: |
| // "When broadcasting is specified, |
| // the second tensor can either be of size 1 (a scalar value), |
| // or having its shape as a contiguous subset of the first tensors shape." |
| // shape info of second input is always subset of first input. |
| // Set bound shape of second input same as first input. |
| shape_info_.emplace(op.input(1), std::move(input_shape_info)); |
| } |
| } |
| |
| void BoundShapeInferencer::InferConcatInputs(const OperatorDef& op) { |
| ArgumentHelper helper(op); |
| const auto add_axis = helper.GetSingleArgument<int32_t>("add_axis", 0); |
| // NOLINTNEXTLINE(bugprone-branch-clone) |
| if (add_axis) { |
| return; |
| } else if (op.output_size() == 0 || !shape_info_.count(op.output(0))) { |
| return; |
| } |
| |
| const auto axis = helper.HasArgument("axis") |
| ? helper.GetSingleArgument<int32_t>("axis", -1) |
| : GetDimFromOrderString( |
| helper.GetSingleArgument<string>("order", "NCHW")); |
| |
| const auto& shape_info = shape_info_.at(op.output(0)); |
| int output_channel = shape_info.shape.dims(axis); |
| int missing_shape_infos = 0; |
| int channel_acc = 0; |
| std::string input_to_infer; |
| for (const auto& i : op.input()) { |
| const auto it = shape_info_.find(i); |
| if (it != shape_info_.end()) { |
| const auto& current_input_shape = it->second; |
| if (axis < current_input_shape.shape.dims_size()) { |
| channel_acc += current_input_shape.shape.dims(axis); |
| } else { |
| LOG(INFO) << "Mismatched input dim along axis " << axis |
| << ". We cannot infer missing input shape for Concat"; |
| return; |
| } |
| } else if (missing_shape_infos) { |
| LOG(INFO) << "More than one missing shapes, previous one: " |
| << input_to_infer; |
| // We can only infer one missing input shape info |
| return; |
| } else { |
| ++missing_shape_infos; |
| input_to_infer = i; |
| } |
| } |
| |
| if (missing_shape_infos && !input_to_infer.empty()) { |
| auto input_shape_info = shape_info; |
| input_shape_info.shape.set_dims(axis, output_channel - channel_acc); |
| shape_info_.emplace(input_to_infer, std::move(input_shape_info)); |
| |
| // Infer the shape of the second output of Concat |
| InferCommonOp(op); |
| if (op.output_size() > 1 && shape_info_.count(op.output(1))) { |
| shape_info_[op.output(1)].setDimType( |
| 0, TensorBoundShape_DimType_CONSTANT); |
| } |
| } |
| } |
| |
| void BoundShapeInferencer::InferElementwiseOp(const OperatorDef& op) { |
| InferCommonOp(op); |
| if (shape_info_.find(op.output(0)) != shape_info_.end() && |
| shape_info_.find(op.input(1)) != shape_info_.end()) { |
| return; |
| } |
| const auto it = shape_info_.find(op.input(0)); |
| if (it == shape_info_.end()) { |
| return; |
| } |
| ArgumentHelper helper(op); |
| const bool broadcast = helper.GetSingleArgument<bool>("broadcast", false); |
| if (broadcast) { |
| auto input_shape_info = it->second; |
| shape_info_.emplace(op.input(1), input_shape_info); |
| shape_info_.emplace(op.output(0), std::move(input_shape_info)); |
| } |
| } |
| |
| // For concat net, if some inputs are missing and we have add_axis argument, |
| // it means that all the inputs should be of the same dimension. In this case, |
| // we can infer the shape of the missing inputs |
| void BoundShapeInferencer::InferConcat(const OperatorDef& op) { |
| ArgumentHelper helper(op); |
| auto add_axis = helper.GetSingleArgument<int32_t>("add_axis", 0); |
| if (add_axis) { |
| ShapeInfo* ref_input_shape = nullptr; |
| std::string ref_name; |
| std::unordered_set<std::string> missing_shape_inputs; |
| for (const auto& i : op.input()) { |
| const auto it = shape_info_.find(i); |
| if (it != shape_info_.end()) { |
| const auto& current_input_shape = it->second; |
| if (ref_input_shape) { |
| CAFFE_ENFORCE_EQ( |
| ref_input_shape->shape.dims_size(), |
| current_input_shape.shape.dims_size(), |
| ref_name, |
| " vs ", |
| i); |
| for (int j = 0; j < ref_input_shape->shape.dims_size(); ++j) { |
| CAFFE_ENFORCE_EQ( |
| ref_input_shape->shape.dims(j), |
| current_input_shape.shape.dims(j), |
| "Mismatched size on dim ", |
| j, |
| " between ", |
| ref_name, |
| " and ", |
| i, |
| " (", |
| ref_input_shape->shape.dims(j), |
| " vs ", |
| current_input_shape.shape.dims(j), |
| ")"); |
| } |
| } else { |
| ref_input_shape = &it->second; |
| ref_name = i; |
| } |
| } else { |
| missing_shape_inputs.emplace(i); |
| } |
| } |
| |
| if (ref_input_shape) { |
| current_dim_type_ = ref_input_shape->getDimType(0); |
| for (const auto& i : missing_shape_inputs) { |
| shape_info_.emplace(i, *ref_input_shape); |
| } |
| } |
| } |
| InferCommonOp(op); |
| // split_info should be a constant |
| if (op.output_size() > 1 && shape_info_.count(op.output(1))) { |
| shape_info_[op.output(1)].setDimType(0, TensorBoundShape_DimType_CONSTANT); |
| } |
| } |
| |
| void BoundShapeInferencer::InferFC(const OperatorDef& op) { |
| CAFFE_ENFORCE( |
| op.input_size() == 3 || op.input_size() == 4, |
| "FC has to have 3 or 4 inputs"); |
| const auto w_it = shape_info_.find(op.input(1)); |
| CAFFE_ENFORCE( |
| w_it != shape_info_.end(), |
| "Shape of WEIGHT input of FC ", |
| op.input(1), |
| " needs to be presented"); |
| const ShapeInfo& w_shape_info = w_it->second; |
| const auto b_it = shape_info_.find(op.input(2)); |
| CAFFE_ENFORCE( |
| b_it != shape_info_.end(), |
| "Shape of BIAS input of FC ", |
| op.input(2), |
| " needs to be presented"); |
| const ShapeInfo& b_shape_info = b_it->second; |
| bool fp16 = (op.type() == "FbFCPacked"); |
| bool int8_fc = (op.type() == "Int8FC" || op.engine() == "DNNLOWP"); |
| float scale = 1; |
| int offset = 0; |
| |
| auto x_it = shape_info_.find(op.input(0)); |
| if (x_it == shape_info_.end()) { |
| // We don't have a hint at the x input we try to deduce it from weight |
| // shape |
| ArgumentHelper helper(op); |
| auto axis = helper.GetSingleArgument<int32_t>("axis", 1); |
| auto axis_w = helper.GetSingleArgument<int32_t>("axis_w", 1); |
| const TensorShape w_shape = w_shape_info.shape; |
| bool transposed = (op.type() == "FCTransposed") ? true : false; |
| const int canonical_axis_w = |
| canonical_axis_index_(axis_w, w_shape.dims().size()); |
| const int64_t K = transposed ? SizeToDim(w_shape, canonical_axis_w) |
| : SizeFromDim(w_shape, canonical_axis_w); |
| std::vector<int64_t> dims; |
| std::vector<TensorBoundShape::DimType> dimTypes; |
| for (int i = 0; i < axis - 1; ++i) { |
| dims.push_back(1); |
| dimTypes.push_back(TensorBoundShape_DimType_CONSTANT); |
| } |
| dims.push_back(spec_.max_batch_size); |
| dimTypes.push_back(TensorBoundShape_DimType_BATCH); |
| dims.push_back(K); |
| dimTypes.push_back(TensorBoundShape_DimType_CONSTANT); |
| current_dim_type_ = TensorBoundShape_DimType_BATCH; |
| current_max_batch_size_ = spec_.max_batch_size; |
| // NOLINTNEXTLINE(cppcoreguidelines-init-variables) |
| TensorProto::DataType w_data_type; |
| if (fp16) { |
| w_data_type = TensorProto_DataType_FLOAT; |
| } else if (int8_fc) { |
| w_data_type = TensorProto_DataType_UINT8; |
| } else { |
| w_data_type = w_shape.data_type(); |
| } |
| |
| if (int8_fc) { |
| scale = helper.GetSingleArgument<float>("Y_scale", 1); |
| offset = helper.GetSingleArgument<int>("Y_zero_point", 0); |
| } |
| // Note: for FbFCPacked, weight is fp16 but activations are in fp32 |
| CheckAndSetTensorBoundShape( |
| op.input(0), |
| dimTypes, |
| dims, |
| w_data_type, |
| int8_fc ? true : false, |
| false, |
| scale, |
| offset); |
| } else { |
| ShapeInfo& x_shape_info = x_it->second; |
| if (x_shape_info.getDimType(0) == TensorBoundShape_DimType_UNKNOWN) { |
| CAFFE_ENFORCE_GE(x_shape_info.shape.dims_size(), 1); |
| x_shape_info.shape.set_dims(0, spec_.max_batch_size); |
| x_shape_info.setDimType(0, TensorBoundShape_DimType_BATCH); |
| } |
| } |
| |
| // Standard shape inference for outputs |
| std::vector<TensorShape> input_shapes{ |
| shape_info_[op.input(0)].shape, w_shape_info.shape, b_shape_info.shape}; |
| if (op.input_size() == 4) { |
| const auto quant_param_it = shape_info_.find(op.input(3)); |
| CAFFE_ENFORCE( |
| quant_param_it != shape_info_.end(), |
| "Shape of quant_param input of FC ", |
| op.input(3), |
| " needs to be presented"); |
| const ShapeInfo& quant_param_shape_info = quant_param_it->second; |
| input_shapes.emplace_back(quant_param_shape_info.shape); |
| } |
| std::vector<TensorShape> output_shapes = InferOutput(op, input_shapes); |
| CAFFE_ENFORCE_EQ(output_shapes.size(), 1); |
| // NOLINTNEXTLINE(cppcoreguidelines-init-variables) |
| TensorProto::DataType output_data_type; |
| if (fp16) { |
| output_data_type = TensorProto_DataType_FLOAT; |
| } else if (int8_fc) { |
| output_data_type = TensorProto_DataType_UINT8; |
| } else { |
| output_data_type = output_shapes.front().data_type(); |
| } |
| |
| if (int8_fc) { |
| ArgumentHelper helper(op); |
| |
| scale = helper.GetSingleArgument<float>("Y_scale", 1); |
| offset = helper.GetSingleArgument<int>("Y_zero_point", 0); |
| } |
| |
| CheckAndSetTensorBoundShape( |
| op.output(0), |
| setDimTypeWithFirst( |
| TensorBoundShape_DimType_BATCH, output_shapes.front().dims().size()), |
| ConvertToVec(output_shapes[0].dims()), |
| output_data_type, |
| int8_fc ? true : false, |
| false, |
| scale, |
| offset); |
| } |
| |
| // Infers shapes for operators which are used to transform non-quantized |
| // operators (e.g. SparseLengthsSum) into quantized operators (e.g. |
| // SparseLengthsSumFused8BitRowwise) at model training time. If we're doing |
| // quantization for CONSTANTS (eg. embedding tables), current_dim_type_ should |
| // be set to CONSTANT. |
| void BoundShapeInferencer::InferQuantizationTransformation( |
| const OperatorDef& op) { |
| bool all_constant = true; |
| for (const auto& input : op.input()) { |
| const auto it = shape_info_.find(input); |
| if (it == shape_info_.end() || |
| it->second.getDimType(0) != TensorBoundShape_DimType_CONSTANT) { |
| all_constant = false; |
| break; |
| } |
| } |
| const auto previous_dim_type = current_dim_type_; |
| if (all_constant) { |
| current_dim_type_ = TensorBoundShape_DimType_CONSTANT; |
| } |
| InferCommonOp(op); |
| current_dim_type_ = previous_dim_type; |
| } |
| |
| void BoundShapeInferencer::InferUnPackRecords(const OperatorDef& op) { |
| std::vector<TensorShape> input_shapes; |
| for (const auto& input : op.input()) { |
| const auto it = shape_info_.find(input); |
| if (it == shape_info_.end()) { |
| LOG(WARNING) << "Cannot find shape info for " << input << ". Skipping " |
| << op.type(); |
| return; |
| } |
| input_shapes.emplace_back(it->second.shape); |
| } |
| |
| std::vector<TensorShape> output_shapes; |
| |
| ArgumentHelper helper(op); |
| std::vector<std::string> fields = |
| helper.GetRepeatedArgument<std::string>("fields"); |
| |
| const int num_tensors = fields.size(); |
| if (spec_.max_batch_size == 1 && num_tensors == 1 && |
| input_shapes[0].dims_size() != 1) { |
| // Special case of single tensor input |
| output_shapes.push_back(input_shapes[0]); |
| } else { |
| // Input is packed |
| TensorShape oshape; |
| oshape.add_dims(spec_.max_batch_size); |
| oshape.add_dims(spec_.num_embeddings); |
| oshape.add_dims(spec_.embedding_length); |
| // TODO: how to do this more intelligently |
| oshape.set_data_type(TensorProto::FLOAT); |
| for (int i = 0; i < num_tensors; i++) { |
| output_shapes.push_back(oshape); |
| } |
| } |
| |
| for (auto i: c10::irange(output_shapes.size())) { |
| const auto& shape = output_shapes[i]; |
| |
| CheckAndSetTensorBoundShape( |
| op.output(i), |
| setDimTypeWithFirst(current_dim_type_, shape.dims().size()), |
| ConvertToVec(shape.dims()), |
| output_shapes[i].data_type(), |
| false); |
| } |
| } |
| |
| void BoundShapeInferencer::InferTile(const OperatorDef& op) { |
| if (op.input_size() > 1) { |
| LOG(WARNING) << "Cannot infer shape for Tile when axis and tils are inputs"; |
| return; |
| } |
| const auto it = shape_info_.find(op.input(0)); |
| if (it == shape_info_.end()) { |
| LOG(WARNING) << "Cannot find shape info for " << op.input(0) |
| << ". Skipping " << op.type(); |
| return; |
| } |
| |
| ArgumentHelper helper(op); |
| const std::int32_t tiles = helper.GetSingleArgument<std::int32_t>("tiles", 1); |
| std::int32_t axis = helper.GetSingleArgument<std::int32_t>("axis", 0); |
| bool dynamic = helper.GetSingleArgument<bool>("dynamic", false); |
| auto ndims = it->second.shape.dims_size(); |
| const auto canonical_axis = canonical_axis_index_(axis, ndims); |
| auto shape = it->second.shape; |
| shape.set_dims( |
| canonical_axis, |
| shape.dims(canonical_axis) * (dynamic ? spec_.max_batch_size : tiles)); |
| CheckAndSetTensorBoundShape( |
| op.output(0), |
| setDimTypeWithFirst(TensorBoundShape_DimType_BATCH, ndims), |
| ConvertToVec(shape.dims()), |
| it->second.shape.data_type(), |
| false); |
| } |
| |
| void BoundShapeInferencer::InferSoftmax(const OperatorDef& op) { |
| CAFFE_ENFORCE_EQ(op.input_size(), 1, op.type(), " must have 1 input"); |
| CAFFE_ENFORCE_EQ(op.output_size(), 1, op.type(), " must have 1 output"); |
| |
| auto it = shape_info_.find(op.input(0)); |
| if (it == shape_info_.end()) { |
| LOG(WARNING) << "Didn't find shape info for the input of Softmax, skipping"; |
| return; |
| } |
| |
| CheckAndSetTensorBoundShape( |
| op.output(0), |
| setDimTypeWithFirst(it->second.getDimType(0), it->second.shape.dims_size()), |
| ConvertToVec(it->second.shape.dims()), |
| it->second.shape.data_type(), |
| false); |
| } |
| |
| void BoundShapeInferencer::InferBucketize(const OperatorDef& op) { |
| CAFFE_ENFORCE_EQ(op.input_size(), 1, op.type(), " must have 1 input"); |
| CAFFE_ENFORCE_EQ(op.output_size(), 1, op.type(), " must have 1 output"); |
| |
| auto it = shape_info_.find(op.input(0)); |
| if (it == shape_info_.end()) { |
| LOG(WARNING) << "Didn't find shape info for the input of Bucketize, skipping"; |
| return; |
| } |
| |
| InferCommonOp(op); |
| auto it_output = shape_info_.find(op.output(0)); |
| if (it_output != shape_info_.end()) { |
| it_output->second.setDimType(it->second.getDimType()); |
| } |
| } |
| |
| void BoundShapeInferencer::InferLpNorm(const OperatorDef& op) { |
| CAFFE_ENFORCE_EQ(op.output_size(), 1, op.type(), " must have 1 output"); |
| InferCommonOp(op); |
| auto it = shape_info_.find(op.output(0)); |
| if (it != shape_info_.end()) { |
| it->second.setDimType(std::vector<TensorBoundShape::DimType>( |
| it->second.shape.dims_size(), TensorBoundShape_DimType_CONSTANT)); |
| } |
| } |
| |
| void BoundShapeInferencer::InferClip(const OperatorDef& op) { |
| CAFFE_ENFORCE_EQ(op.output_size(), 1, op.type(), " must have 1 output"); |
| InferCommonOp(op); |
| auto it = shape_info_.find(op.output(0)); |
| if (it != shape_info_.end()) { |
| auto it_input = shape_info_.find(op.input(0)); |
| if (it_input != shape_info_.end()) { |
| it->second.setDimType(it_input->second.getDimType()); |
| } |
| } |
| } |
| |
| void BoundShapeInferencer::InferMean(const OperatorDef& op) { |
| CAFFE_ENFORCE_EQ(op.output_size(), 1, op.type(), " must have at 1 output"); |
| InferCommonOp(op); |
| auto it = shape_info_.find(op.output(0)); |
| if (it != shape_info_.end()) { |
| auto it_input = shape_info_.find(op.input(0)); |
| if (it_input != shape_info_.end()) { |
| it->second.setDimType(it_input->second.getDimType()); |
| } |
| } |
| } |
| |
| void BoundShapeInferencer::InferDiv(const OperatorDef& op) { |
| CAFFE_ENFORCE_EQ(op.output_size(), 1, op.type(), " must have 1 output"); |
| InferCommonOp(op); |
| auto it = shape_info_.find(op.output(0)); |
| if (it != shape_info_.end()) { |
| auto it_input = shape_info_.find(op.input(0)); |
| if (it_input != shape_info_.end()) { |
| it->second.setDimType(it_input->second.getDimType()); |
| } |
| } |
| } |
| |
| void BoundShapeInferencer::InferTranspose(const OperatorDef& op) { |
| CAFFE_ENFORCE_EQ(op.input_size(), 1, op.type(), " must have 1 input"); |
| CAFFE_ENFORCE_EQ(op.output_size(), 1, op.type(), " must have 1 output"); |
| |
| auto it = shape_info_.find(op.input(0)); |
| if (it == shape_info_.end()) { |
| LOG(WARNING) << "Didn't find shape info for the input of Transpose"; |
| return; |
| } |
| |
| ArgumentHelper helper(op); |
| std::vector<int> axes = helper.GetRepeatedArgument<int>("axes"); |
| if (axes.empty()) { |
| // In this case it should be existing dims in reverse order |
| for (int i = it->second.shape.dims().size() - 1; i >= 0; --i) { |
| axes.push_back(i); |
| } |
| } else { |
| CAFFE_ENFORCE_EQ( |
| axes.size(), |
| it->second.shape.dims().size(), |
| op.type(), |
| " must specify all axes in Transpose." |
| ); |
| auto valid_axes = |
| std::all_of(axes.begin(), axes.end(), [numDims = it->second.shape.dims().size()](int& axis) { |
| return axis >= 0 && axis < numDims; |
| }); |
| CAFFE_ENFORCE(valid_axes, "Invalid axes were provided."); |
| } |
| |
| std::vector<TensorBoundShape::DimType> dimTypes; |
| std::vector<int64_t> dims; |
| for (auto axis : axes) { |
| dimTypes.push_back(it->second.getDimType(axis)); |
| dims.push_back(it->second.shape.dims()[axis]); |
| } |
| |
| CheckAndSetTensorBoundShape( |
| op.output(0), |
| dimTypes, |
| dims, |
| it->second.shape.data_type(), |
| false); |
| } |
| |
| void BoundShapeInferencer::InferCommonOp( |
| const OperatorDef& op, |
| const OpSchema* schema, |
| bool bypass_input_check, |
| bool in_place_op) { |
| // First, we need to check that all the input shape/types are already |
| // presented |
| try { |
| const static std::unordered_set<std::string> |
| types_with_independent_output_shape = { |
| "Int8GenQuantParams", |
| "Int8QuantSchemeBlobFill", |
| "ComputeEqualizationScale", |
| "Int8GenQuantParamsMinMax"}; |
| std::vector<TensorShape> input_shapes; |
| for (const auto& input : op.input()) { |
| const auto it = shape_info_.find(input); |
| if (it == shape_info_.end() && |
| !types_with_independent_output_shape.count(op.type()) && |
| !bypass_input_check) { |
| LOG(WARNING) << "Cannot find shape info for " << input << ". Skipping " |
| << op.type(); |
| return; |
| } |
| if (types_with_independent_output_shape.count(op.type()) || |
| (bypass_input_check && it == shape_info_.end())) { |
| TensorShape input_shape; |
| input_shapes.emplace_back(std::move(input_shape)); |
| } else { |
| input_shapes.emplace_back(it->second.shape); |
| } |
| } |
| |
| // Schema can be pre-defined. |
| // If not predefined, get the schema for the op. |
| if (schema == nullptr) { |
| schema = OpSchemaRegistry::Schema(op.type()); |
| } |
| CAFFE_ENFORCE(schema); |
| std::vector<TensorShape> output_shapes; |
| output_shapes = schema->InferTensor(op, input_shapes); |
| bool is_quantized = !(op.type().compare(0, 4, "Int8")) && |
| (op.type() != "Int8Dequantize") && |
| (op.type() != "Int8QuantSchemeBlobFill") && |
| (op.type() != "ComputeEqualizationScale") && |
| (op.type() != "Int8GenQuantParams") && |
| (op.type() != "Int8GenQuantParamsMinMax"); |
| float scale = 1; |
| int offset = 0; |
| |
| TensorProto::DataType infered_data_type = TensorProto::UNDEFINED; |
| if (is_quantized) { |
| const static std::map<std::string, int> type_info_from_input = { |
| {"Int8Quantize", -1}, // Force this op's output to be uint8 |
| {"Int8FCPackWeight", 0}, |
| {"Int8ConvPackWeight", 0}, |
| {"Int8ConvRelu", 1}, |
| {"Int8MaxPool", 0}, |
| {"Int8AveragePool", 0}, |
| {"Int8FC", 1}, |
| {"Int8Conv", 1}, |
| {"Int8SumRelu", 0}, |
| {"Int8Relu", 0}}; |
| CAFFE_ENFORCE( |
| type_info_from_input.find(op.type()) != type_info_from_input.end(), |
| "Undefined quantized output data type, add it into type_info_from_input"); |
| int target = type_info_from_input.find(op.type())->second; |
| if (target == -1) { |
| infered_data_type = TensorProto::UINT8; |
| } else { |
| // NOLINTNEXTLINE(clang-diagnostic-sign-compare) |
| CAFFE_ENFORCE(target < input_shapes.size()); |
| infered_data_type = input_shapes[target].data_type(); |
| } |
| |
| // Extract output scale and offset |
| ArgumentHelper helper(op); |
| scale = helper.GetSingleArgument<float>("Y_scale", 1); |
| offset = helper.GetSingleArgument<int>("Y_zero_point", 0); |
| } else if (op.type() == "Int8Dequantize") { |
| infered_data_type = TensorProto::FLOAT; |
| } |
| |
| for (auto i: c10::irange(output_shapes.size())) { |
| const auto& shape = output_shapes[i]; |
| if (shape.unknown_shape()) { |
| continue; |
| } |
| auto tmp_dtype = infered_data_type; |
| if (infered_data_type == TensorProto::UNDEFINED) { |
| infered_data_type = shape.data_type(); |
| } |
| CheckAndSetTensorBoundShape( |
| op.output(i), |
| setDimTypeWithFirst(current_dim_type_, shape.dims().size()), |
| ConvertToVec(shape.dims()), |
| infered_data_type, |
| is_quantized, |
| false, |
| scale, |
| offset, |
| in_place_op); |
| infered_data_type = tmp_dtype; |
| } |
| } catch (const caffe2::EnforceNotMet& e) { |
| LOG(ERROR) << "Enforce not met while inferring shapes for " << op.type() |
| << ": " << e.what() << " first output: " << op.output(0); |
| } catch (const std::exception& e) { |
| LOG(WARNING) << "Caught exception while inferring shapes for " << op.type() |
| << ": " << e.what() << " first output: " << op.output(0); |
| } |
| } |
| |
| std::shared_ptr<BoundShapeInferencerBase> getBoundShapeInferencer( |
| const BoundShapeSpec& spec) { |
| return std::make_shared<BoundShapeInferencer>(spec); |
| } |
| |
| C10_DEFINE_SHARED_REGISTRY( |
| BoundShapeInferencerRegistry, |
| BoundShapeInferencerBase, |
| const BoundShapeSpec&); |
| |
| C10_REGISTER_CREATOR( |
| BoundShapeInferencerRegistry, |
| C10, |
| getBoundShapeInferencer); |
| } // namespace caffe2 |