| /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==============================================================================*/ |
| |
| #include "tensorflow/compiler/mlir/tosa/transforms/legalize_utils.h" |
| |
| #include "mlir/Dialect/Tosa/IR/TosaOps.h" // from @llvm-project |
| #include "mlir/Dialect/Tosa/Utils/QuantUtils.h" // from @llvm-project |
| #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h" |
| #include "tensorflow/compiler/mlir/tosa/transforms/legalize_common.h" |
| |
| // Implements legalization and post-legalization optimization helper functions |
| |
| namespace mlir { |
| namespace tosa { |
| |
| // Create a TOSA rescale op from TFLite scaling, zero points and rounding mode |
| Value buildRescale(PatternRewriter& rewriter, Operation* op, |
| ShapedType output_type, Value input_val, double scale, |
| int64_t input_zp, int64_t output_zp, bool double_round, |
| bool scale32) { |
| int32_t multiplier; |
| int32_t shift; |
| |
| int32_t scale_width = scale32 ? 32 : 16; |
| |
| computeMultiplierAndShift(scale, multiplier, shift, scale_width); |
| |
| auto rescale_op = CreateOpAndInfer<tosa::RescaleOp>( |
| rewriter, op->getLoc(), output_type, input_val, |
| rewriter.getI32IntegerAttr(static_cast<int32_t>(input_zp)), |
| rewriter.getI32IntegerAttr(static_cast<int32_t>(output_zp)), |
| rewriter.getI32ArrayAttr({multiplier}), rewriter.getI32ArrayAttr({shift}), |
| rewriter.getBoolAttr(scale32), rewriter.getBoolAttr(double_round), |
| rewriter.getBoolAttr(false)); |
| |
| return rescale_op.getResult(); |
| } |
| |
| // Creates TOSA rescale op with int32 output |
| Value buildRescaleToInt32(PatternRewriter& rewriter, Operation* op, |
| Value input_val, double input_scale, |
| int64_t input_zp) { |
| // Output is always int32 type |
| auto input_type = input_val.getType().dyn_cast<mlir::ShapedType>(); |
| assert(input_type); |
| auto output_type = input_type.clone(rewriter.getI32Type()); |
| |
| return buildRescale(rewriter, op, output_type, input_val, input_scale, |
| input_zp, 0, false, true); |
| } |
| |
| // Creates TOSA rescale op with int32 input |
| Value buildRescaleFromInt32(PatternRewriter& rewriter, Operation* op, |
| ShapedType output_type, Value input_val, |
| double output_scale, int64_t output_zp) { |
| // Input should be int32 type |
| auto input_type = input_val.getType().dyn_cast<mlir::ShapedType>(); |
| (void)input_type; |
| assert(input_type && input_type.getElementType().isInteger(32) && |
| "expected rescale input element type to be i32"); |
| |
| // Potentially check input_shape == output_shape here |
| return buildRescale(rewriter, op, output_type, input_val, output_scale, 0, |
| output_zp, true, true); |
| } |
| |
| // Creates a TOSA rescale op based on conv2d parameters. |
| Value buildRescaleOpConvOutput(PatternRewriter& rewriter, Operation* op, |
| Value conv_val, ShapedType input_type, |
| ShapedType weight_type, ShapedType output_type) { |
| auto input_qtype = |
| input_type.getElementType().dyn_cast<mlir::quant::UniformQuantizedType>(); |
| auto output_qtype = output_type.getElementType() |
| .dyn_cast<mlir::quant::UniformQuantizedType>(); |
| |
| double input_scale = input_qtype.getScale(); |
| |
| int64_t output_zp = output_qtype.getZeroPoint(); |
| double output_scale = output_qtype.getScale(); |
| |
| bool scale32 = isScale32(output_qtype); |
| int32_t scale_width = scale32 ? 32 : 16; |
| |
| if (auto weight_per_tensor_qtype = |
| weight_type.getElementType() |
| .dyn_cast<mlir::quant::UniformQuantizedType>()) { |
| // Per-tensor quantization |
| double weight_scale = weight_per_tensor_qtype.getScale(); |
| |
| int32_t multiplier; |
| int32_t shift; |
| |
| double op_tensor_scale = (input_scale * weight_scale) / output_scale; |
| |
| computeMultiplierAndShift(op_tensor_scale, multiplier, shift, scale_width); |
| |
| auto rescale_op = CreateOpAndInfer<tosa::RescaleOp>( |
| rewriter, op->getLoc(), output_type, conv_val, |
| rewriter.getI32IntegerAttr(0), rewriter.getI32IntegerAttr(output_zp), |
| rewriter.getI32ArrayAttr({multiplier}), |
| rewriter.getI32ArrayAttr({shift}), rewriter.getBoolAttr(scale32), |
| rewriter.getBoolAttr(true), rewriter.getBoolAttr(false)); |
| |
| return rescale_op.getResult(); |
| |
| } else if (auto weight_per_channel_qtype = |
| weight_type.getElementType() |
| .dyn_cast<mlir::quant::UniformQuantizedPerAxisType>()) { |
| // Per-channel quantization |
| auto output_last_axis = output_type.getShape().size() - 1; |
| uint32_t output_channels = output_type.getShape()[output_last_axis]; |
| |
| SmallVector<int32_t> multiplier_arr; |
| SmallVector<int32_t> shift_arr; |
| |
| SmallVector<double> weight_scale_arr( |
| weight_per_channel_qtype.getScales().begin(), |
| weight_per_channel_qtype.getScales().end()); |
| |
| int64_t output_zp = output_qtype.getZeroPoint(); |
| double output_scale = output_qtype.getScale(); |
| |
| for (uint32_t oc = 0; oc < output_channels; oc++) { |
| double weight_scale = weight_scale_arr[oc]; |
| |
| int32_t multiplier; |
| int32_t shift; |
| |
| double op_channel_scale = (input_scale * weight_scale) / output_scale; |
| |
| computeMultiplierAndShift(op_channel_scale, multiplier, shift, |
| scale_width); |
| |
| multiplier_arr.push_back(multiplier); |
| shift_arr.push_back(shift); |
| } |
| |
| auto rescale_op = CreateOpAndInfer<tosa::RescaleOp>( |
| rewriter, op->getLoc(), output_type, conv_val, |
| rewriter.getI32IntegerAttr(0), rewriter.getI32IntegerAttr(output_zp), |
| rewriter.getI32ArrayAttr(multiplier_arr), |
| rewriter.getI32ArrayAttr(shift_arr), rewriter.getBoolAttr(scale32), |
| rewriter.getBoolAttr(true), rewriter.getBoolAttr(true)); |
| |
| return rescale_op.getResult(); |
| |
| } else { |
| op->emitOpError("buildConvRescaleOp: unknown weight quantized type"); |
| return nullptr; |
| } |
| } |
| |
| // Create a 8-bit TOSA TABLE constant tensor with int8[256] array. |
| // Follow PopulateLookupTable() tensorflow/lite/kernels/activations.cc |
| Value getTosaConst8bitTable(PatternRewriter& rewriter, Operation* op, |
| double input_scale, int32_t input_zp, |
| double output_scale, int32_t output_zp, |
| std::function<double(double)> func) { |
| SmallVector<int8_t, 256> table; |
| |
| for (int32_t i = -128; i < 128; i++) { |
| double dequantized = input_scale * (i - input_zp); |
| double transformed = func(dequantized); |
| int32_t rescaled = std::llround(transformed / output_scale); |
| int32_t quantized = static_cast<int32_t>(rescaled + output_zp); |
| table.push_back( |
| static_cast<int8_t>(std::min(std::max(quantized, -128), 127))); |
| } |
| |
| auto element_qtype = |
| UniformQuantizedType::get(true, rewriter.getIntegerType(8), |
| rewriter.getF32Type(), 1.0f, 0, -128, 127); |
| auto const_type = RankedTensorType::get({256}, element_qtype); |
| auto storage_type = |
| RankedTensorType::get({256}, element_qtype.getStorageType()); |
| auto const_attr = |
| DenseElementsAttr::get(storage_type, llvm::makeArrayRef(table)); |
| |
| auto const_op = |
| rewriter.create<tosa::ConstOp>(op->getLoc(), const_type, const_attr); |
| return const_op.getResult(); |
| } |
| |
| // Create a 16-bit TOSA TABLE constant tensor with int16[513] array. |
| // Output is restricted to [-1.0, 1.0]. |
| // Follow gen_lut() tensorflow/lite/kernels/internal/common.h |
| Value getTosaConst16bitTable(PatternRewriter& rewriter, Operation* op, |
| std::function<double(double)> func, double min, |
| double max) { |
| SmallVector<int16_t, 513> table; |
| |
| double step = (max - min) / 512.0f; |
| double half_step = step / 2.0f; |
| for (int32_t i = 0; i < 512; i++) { |
| int32_t sample_val = std::llround(func(min + (i * step)) * 32768.0); |
| double midpoint_interp_val = |
| std::round(((func(min + (i + 1) * step) * 32768.0) + |
| std::round(func(min + (i * step)) * 32768.0)) / |
| 2.0); |
| double midpoint_val = |
| std::round(func(min + (i * step) + half_step) * 32768.0); |
| double midpoint_err = midpoint_interp_val - midpoint_val; |
| int32_t bias = std::llround(midpoint_err / 2.0); |
| |
| table.push_back(static_cast<int16_t>( |
| std::min(std::max(sample_val - bias, -32768), 32767))); |
| } |
| |
| int32_t max_val = std::llround(func(max) * 32768.0); |
| table.push_back( |
| static_cast<int16_t>(std::min(std::max(max_val, -32768), 32767))); |
| |
| auto element_qtype = |
| UniformQuantizedType::get(true, rewriter.getIntegerType(16), |
| rewriter.getF32Type(), 1.0f, 0, -32768, 32767); |
| auto const_type = RankedTensorType::get({513}, element_qtype); |
| auto storage_type = |
| RankedTensorType::get({513}, element_qtype.getStorageType()); |
| auto const_attr = |
| DenseElementsAttr::get(storage_type, llvm::makeArrayRef(table)); |
| |
| auto const_op = |
| rewriter.create<tosa::ConstOp>(op->getLoc(), const_type, const_attr); |
| return const_op.getResult(); |
| } |
| |
| // Create a 32-bit TOSA TABLE constant tensor with int16[513] array. |
| // Output is restricted to [-1.0, 1.0] as s0.31 format. |
| void getTosaConst32bitTable(PatternRewriter& rewriter, Operation* op, |
| double input_scale, int32_t input_zp, |
| std::function<double(double)> func, |
| Value& upper_const, Value& lower_const) { |
| SmallVector<int16_t, 513> upper_table, lower_table; |
| |
| double output_inv_scale = static_cast<double>(1L << 31); |
| |
| for (int32_t i = -256; i <= 256; i++) { |
| double dequantized = input_scale * (i - input_zp); |
| double transformed = func(dequantized); |
| double truncated = std::min(std::max(transformed, -1.0), 1.0); |
| int64_t rescaled = |
| static_cast<int64_t>(std::round(truncated * output_inv_scale)); |
| |
| // 2^31 is not representable in int32_t, so store as 2^31 - 1 instead |
| if (rescaled == static_cast<int64_t>(1L << 31)) { |
| rescaled = static_cast<int64_t>(1L << 31) - 1; |
| } |
| |
| int32_t upper = (rescaled >> 16) & 0xFFFF; |
| // TABLE output is signed 16 bits with range [-32768, 32767] |
| // Lower 16 bits are unsigned and ranges [0, 65536] |
| // Need to adjust value with offset 0x8000 in table generation |
| // Legalization should add this back before recovering 32-bit value |
| int32_t lower = (rescaled & 0xFFFF) - 0x8000; |
| |
| upper_table.push_back(upper); |
| lower_table.push_back(lower); |
| } |
| |
| auto element_qtype = |
| UniformQuantizedType::get(true, rewriter.getIntegerType(16), |
| rewriter.getF32Type(), 1.0f, 0, -32768, 32767); |
| auto const_type = RankedTensorType::get({513}, element_qtype); |
| auto storage_type = |
| RankedTensorType::get({513}, element_qtype.getStorageType()); |
| |
| auto upper_const_attr = |
| DenseElementsAttr::get(storage_type, llvm::makeArrayRef(upper_table)); |
| auto lower_const_attr = |
| DenseElementsAttr::get(storage_type, llvm::makeArrayRef(lower_table)); |
| |
| upper_const = |
| rewriter.create<tosa::ConstOp>(op->getLoc(), const_type, upper_const_attr) |
| .getResult(); |
| lower_const = |
| rewriter.create<tosa::ConstOp>(op->getLoc(), const_type, lower_const_attr) |
| .getResult(); |
| } |
| |
| // Create a 32-bit float constant operator from a float |
| Value getTosaConstTensorSingleF32(PatternRewriter& rewriter, Operation* op, |
| float val) { |
| auto const_type = RankedTensorType::get({}, rewriter.getF32Type()); |
| auto const_attr = DenseElementsAttr::get(const_type, val); |
| |
| auto const_op = |
| rewriter.create<tosa::ConstOp>(op->getLoc(), const_type, const_attr); |
| return const_op.getResult(); |
| } |
| |
| // Create a 32-bit integer constant operator from an int |
| Value getTosaConstTensorSingleI32(PatternRewriter& rewriter, Operation* op, |
| int32_t val) { |
| auto const_type = RankedTensorType::get({}, rewriter.getIntegerType(32)); |
| auto const_attr = DenseElementsAttr::get(const_type, val); |
| |
| auto const_op = |
| rewriter.create<tosa::ConstOp>(op->getLoc(), const_type, const_attr); |
| return const_op.getResult(); |
| } |
| |
| // Create a vector from a 32-bit value tensor. Returns the size of |
| // the new vector or -1 on error. |
| LogicalResult getVectorFromValue32(Value val, SmallVectorImpl<int32_t>& vec) { |
| int i = 0; |
| |
| ElementsAttr elems; |
| |
| vec.clear(); |
| |
| if (!matchPattern(val, m_Constant(&elems))) return failure(); |
| |
| for (auto idx : elems.getValues<IntegerAttr>()) { |
| vec.push_back(idx.getInt()); |
| i++; |
| } |
| |
| return success(); |
| } |
| |
| // Calculates the TOSA padding values based on TF operators padded with |
| // SAME/VALID. |
| // |
| // This could pass tensorflow::FilterTensorFormat and do |
| // GetFilterTensorSpatialDimIndex but the current TF core libs do not support |
| // FORMAT_OHWI parsing by that function in core/util/tensor_format.h |
| bool getPaddingValuesFromPadType(tensorflow::Padding tf_pad, |
| tensorflow::TensorFormat data_format_tf, |
| uint32_t first_filter_spatial_dim, |
| ShapedType input_type, ShapedType filter_type, |
| ArrayAttr strides, ArrayAttr dilations, |
| PatternRewriter& rewriter, |
| ArrayAttr& explicit_padding) { |
| assert(tf_pad != tensorflow::Padding::EXPLICIT); |
| if (!input_type.hasRank() || !filter_type.getRank()) return false; |
| |
| // Storing the numeric padding values is useful for TOSA codegen, as opposed |
| // to holding the padding regime mnemonic, i.e. SAME, VALID, FULL, ... |
| SmallVector<int64_t> computed_paddings; |
| |
| int64_t pad_before, pad_after; |
| for (int i = 0; i < 2; i++) { // Two spatial dimensions X&Y |
| int64_t ifm_dim = GetTensorSpatialDimIndex( |
| 4, data_format_tf, i); // 4D tensor, NHWC/NCHW format |
| int64_t filter_dim = first_filter_spatial_dim + i; |
| |
| int64_t dim_dilation = dilations[i].template cast<IntegerAttr>().getInt(); |
| int64_t dim_stride = strides[i].template cast<IntegerAttr>().getInt(); |
| |
| int64_t ip_size = input_type.getDimSize(ifm_dim); |
| int64_t f_size = filter_type.getDimSize(filter_dim); |
| // If we have a dynamic shape we should assume it is wide enough. |
| ip_size = ip_size < 0 ? f_size * dim_dilation : ip_size; |
| int64_t op_size, pad_before_tf, |
| pad_after_tf; // Complains if using int64_T |
| tensorflow::Status status = tensorflow::GetWindowedOutputSizeVerboseV2( |
| ip_size, f_size, dim_dilation, dim_stride, tf_pad, &op_size, |
| &pad_before_tf, &pad_after_tf); |
| if (!status.ok()) return false; |
| |
| pad_before = pad_before_tf; |
| pad_after = pad_after_tf; |
| computed_paddings.push_back(pad_before); |
| computed_paddings.push_back(pad_after); |
| } |
| |
| explicit_padding = rewriter.getI64ArrayAttr(computed_paddings); |
| return true; |
| } |
| |
| // Calculates the TOSA padding values for explicit-padded TF operators. |
| // |
| // This function only handles the TF padding array explicit_padding, which is |
| // only present in certain TF ops. All others encode padding using the string |
| // SAME/VALID, which is interpreted using the getPaddingValuesFromPadString |
| // function below. |
| |
| // The explicit padding array in TF holds 2 pad values for every |
| // dimension, even those that are not the 2 spatial ones. Just extract the |
| // 2x pad values for the XY dims. |
| ArrayAttr getPaddingValuesFromExplicitPadAttr( |
| ArrayAttr explicit_pad, tensorflow::TensorFormat data_format_tf, |
| PatternRewriter& rewriter) { |
| SmallVector<int64_t> computed_paddings; |
| |
| int64_t pad_before, pad_after; |
| for (int i = 0; i < 2; i++) { // Two spatial dimensions X&Y |
| int64_t dim = GetTensorSpatialDimIndex(4, data_format_tf, |
| i); // 4D tensor, NHWC/NCHW format |
| |
| pad_before = explicit_pad[dim * 2].template cast<IntegerAttr>().getInt(); |
| pad_after = explicit_pad[dim * 2 + 1].template cast<IntegerAttr>().getInt(); |
| computed_paddings.push_back(pad_before); |
| computed_paddings.push_back(pad_after); |
| } |
| |
| return rewriter.getI64ArrayAttr(computed_paddings); |
| } |
| |
| // Calculates the TOSA padding values for transposeConv2d |
| bool getTransposeConv2dPaddingValues( |
| tensorflow::Padding tf_pad, tensorflow::TensorFormat data_format_tf, |
| uint32_t first_filter_spatial_dim, ShapedType input_type, |
| ShapedType filter_type, ShapedType output_type, ArrayAttr strides, |
| ArrayAttr dilations, PatternRewriter& rewriter, |
| ArrayAttr& explicit_padding) { |
| assert(tf_pad != tensorflow::Padding::EXPLICIT); |
| if (!input_type.hasRank() || !filter_type.hasRank() || !output_type.hasRank()) |
| return false; |
| |
| // Storing the numeric padding values is useful for TOSA codegen, as opposed |
| // to holding the padding regime mnemonic, i.e. SAME, VALID, FULL, ... |
| |
| SmallVector<int64_t> computed_paddings; |
| |
| int64_t pad_before, pad_after; |
| for (int i = 0; i < 2; i++) { // Two spatial dimensions X&Y |
| int64_t ifm_dim = GetTensorSpatialDimIndex( |
| 4, data_format_tf, i); // 4D tensor, NHWC/NCHW format |
| int64_t ofm_dim = GetTensorSpatialDimIndex( |
| 4, data_format_tf, i); // 4D tensor, NHWC/NCHW format |
| int64_t filter_dim = first_filter_spatial_dim + i; |
| |
| int64_t ifm_size = input_type.getDimSize(ifm_dim); |
| int64_t filter_size = filter_type.getDimSize(filter_dim); |
| int64_t ofm_size = output_type.getDimSize(ofm_dim); |
| int64_t dim_dilation = dilations[i].template cast<IntegerAttr>().getInt(); |
| int64_t dim_stride = strides[i].template cast<IntegerAttr>().getInt(); |
| |
| int effective_filter_size = (filter_size - 1) * dim_dilation + 1; |
| int total_padding = |
| ((ifm_size - 1) * dim_stride + effective_filter_size - ofm_size); |
| total_padding = total_padding > 0 ? total_padding : 0; |
| |
| pad_before = total_padding / 2; |
| pad_after = total_padding - pad_before; |
| |
| computed_paddings.push_back(pad_before); |
| } |
| |
| explicit_padding = rewriter.getI64ArrayAttr(computed_paddings); |
| return true; |
| } |
| |
| // Templated function to create a constant op for given type and shape. |
| // T: storage C type. |
| // Default template creates a constant tensor in T. |
| template <typename T> |
| llvm::Optional<Value> getConstTensor(PatternRewriter& rewriter, Operation* op, |
| ArrayRef<T> vec, ArrayRef<int64_t> shape) { |
| int64_t num_total_elements = 1; |
| for (int64_t a : shape) { |
| num_total_elements *= a; |
| } |
| |
| if (vec.size() != num_total_elements) { |
| op->emitOpError("getConstTensor(): number of elements mismatch."); |
| return llvm::None; |
| } |
| |
| auto const_type = |
| RankedTensorType::get(shape, rewriter.getIntegerType(sizeof(T) * 8)); |
| auto const_attr = DenseElementsAttr::get(const_type, vec); |
| |
| auto const_op = |
| rewriter.create<tosa::ConstOp>(op->getLoc(), const_type, const_attr); |
| return const_op.getResult(); |
| } |
| |
| // Template specialization for APInt |
| template <> |
| llvm::Optional<Value> getConstTensor<APInt>(PatternRewriter& rewriter, |
| Operation* op, ArrayRef<APInt> vec, |
| ArrayRef<int64_t> shape) { |
| int64_t num_total_elements = 1; |
| for (int64_t a : shape) { |
| num_total_elements *= a; |
| } |
| |
| if (vec.size() != num_total_elements) { |
| op->emitOpError("getConstTensor(): number of elements mismatch."); |
| return llvm::None; |
| } |
| |
| auto const_type = RankedTensorType::get( |
| shape, rewriter.getIntegerType(vec[0].getBitWidth())); |
| auto const_attr = DenseElementsAttr::get(const_type, vec); |
| |
| auto const_op = |
| rewriter.create<tosa::ConstOp>(op->getLoc(), const_type, const_attr); |
| return const_op.getResult(); |
| } |
| |
| // Template specialization for float |
| template <> |
| llvm::Optional<Value> getConstTensor<float>(PatternRewriter& rewriter, |
| Operation* op, ArrayRef<float> vec, |
| ArrayRef<int64_t> shape) { |
| int64_t num_total_elements = 1; |
| for (int64_t a : shape) { |
| num_total_elements *= a; |
| } |
| |
| if (vec.size() != num_total_elements) { |
| op->emitOpError("getConstTensor(): number of elements mismatch."); |
| return llvm::None; |
| } |
| |
| auto const_type = RankedTensorType::get(shape, rewriter.getF32Type()); |
| auto const_attr = DenseElementsAttr::get(const_type, vec); |
| |
| auto const_op = |
| rewriter.create<tosa::ConstOp>(op->getLoc(), const_type, const_attr); |
| return const_op.getResult(); |
| } |
| |
| // Template instantiation |
| template llvm::Optional<Value> getConstTensor<int32_t>(PatternRewriter&, |
| Operation*, |
| ArrayRef<int32_t> vec, |
| ArrayRef<int64_t> shape); |
| |
| // Check if scale32 mode is used for given output_element_type |
| bool isScale32(mlir::quant::UniformQuantizedType output_element_type) { |
| return (output_element_type.getStorageTypeIntegralWidth() == 8); |
| } |
| |
| } // namespace tosa |
| } // namespace mlir |