| /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==============================================================================*/ |
| |
| // This header file defines common utils used by TFLite transformation |
| // passes to work with op attributes. |
| |
| #ifndef TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_QUANTIZATION_UTILS_H_ |
| #define TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_QUANTIZATION_UTILS_H_ |
| |
| #include <unordered_map> |
| |
| #include "mlir/Dialect/QuantOps/QuantTypes.h" // TF:local_config_mlir |
| #include "mlir/IR/BlockAndValueMapping.h" // TF:local_config_mlir |
| #include "mlir/IR/PatternMatch.h" // TF:local_config_mlir |
| #include "mlir/IR/StandardTypes.h" // TF:local_config_mlir |
| #include "mlir/StandardOps/Ops.h" // TF:local_config_mlir |
| |
| namespace mlir { |
| namespace TFL { |
| |
| using QuantParams = quant::QuantizedType; |
| using SignedInteger = std::pair<unsigned, unsigned>; // bitwidth and sign |
| using QuantParamsForResults = llvm::SmallVector<QuantParams, 4>; |
| using AccumulatorScaleFunc = |
| std::function<QuantParams(const std::vector<QuantParams>&)>; |
| |
| // Quantization spec of an op, driving the quantization algorithm. |
| struct OpQuantSpec { |
| // Whether the op has quantizable result. This flag is set to false if the op |
| // has "TFL::NoQuantizableResult" trait. |
| bool is_quantizable = true; |
| |
| // Whether it requires same inputs and result scale. This flag is set to true |
| // if the op has "TFL::SameOperandsAndResultScale" trait. |
| bool requires_same_scale = false; |
| |
| // Maps the operand index of a bias input to its quantization specifications, |
| // including the non-bias operand indexes and the method retrieving |
| // quantization parameters from list of parameters of the non-bias operands. |
| // This map is empty if the op doesn't havea bias operand. |
| std::unordered_map<int, std::pair<std::vector<int>, AccumulatorScaleFunc>> |
| biases_params; |
| |
| // Quantization parameters for value restricted outputs. This is the |
| // "hard-coded" parameters and should be used unconditionally for the |
| // quantized op. This vector is empty if the op doesn't have value resctricted |
| // outputs. |
| llvm::DenseMap<SignedInteger, QuantParamsForResults> restricted_output_params; |
| }; |
| |
| // A function signature for getting the particular OpQuantSpec for the provided |
| // op. |
| typedef std::unique_ptr<OpQuantSpec> (*OpQuantSpecGetter)(Operation* op); |
| |
| // A generic rewrite pattern which matches any N-in-1-out operations with |
| // quantization parameters propagated to all the operands and results values. |
| // The quantization parameters are annotated by the Q/DQ op pairs. Each matched |
| // pattern are rewritten by its quantized alternatives. |
| // |
| // This pattern assumes all the matched ops are quantizable. This assumption is |
| // always right, except when a "Q" op is used as a requantize op. For non-"Q" |
| // ops, quantization parameters should be propagated to their result. |
| // |
| // This pattern only matches ops which only have one result. |
| template <typename Q, typename DQ> |
| struct GenericFullQuantizationPattern : public RewritePattern { |
| explicit GenericFullQuantizationPattern(MLIRContext* context) |
| : RewritePattern(Q::getOperationName(), 1, context) {} |
| |
| PatternMatchResult matchAndRewrite(Operation* op, |
| PatternRewriter& rewriter) const override { |
| if (op->getNumResults() != 1) { |
| return matchFailure(); |
| } |
| auto quantize_op = cast<Q>(op); |
| Operation* quantized_op = quantize_op.input()->getDefiningOp(); |
| // If it is a block argument, requantize op, or has more than one result, we |
| // shouldn't rewrite this op. |
| if (!quantized_op || llvm::isa<Q>(quantized_op) || |
| llvm::isa<DQ>(quantized_op)) { |
| return matchFailure(); |
| } |
| |
| // Collect all the quantized inputs and "clone" the matched op by these |
| // inputs. |
| SmallVector<Value*, 4> inputs; |
| inputs.reserve(quantized_op->getNumOperands()); |
| for (auto operand : quantized_op->getOperands()) { |
| auto tensor_type = operand->getType().dyn_cast<TensorType>(); |
| if (!tensor_type) { |
| // There are none type values. |
| return matchFailure(); |
| } |
| auto operand_ele_type = tensor_type.getElementType(); |
| if (auto op_inst = dyn_cast_or_null<DQ>(operand->getDefiningOp())) { |
| inputs.push_back(op_inst.input()); |
| } else if (operand_ele_type.isa<IntegerType>()) { |
| // If the operand is an integer tensor, then it doesn't require the |
| // DQ op in the pattern. |
| inputs.push_back(operand); |
| } else { |
| return matchFailure(); |
| } |
| } |
| |
| // Collect all the quantized outputs and replace them by the results of the |
| // new quantized op. |
| llvm::SmallDenseMap<Value*, int> outputs_replaced; |
| SmallVector<Type, 4> output_types; |
| output_types.reserve(quantized_op->getNumResults()); |
| for (auto result : llvm::enumerate(quantized_op->getResults())) { |
| if (!result.value()->hasOneUse()) return matchFailure(); |
| auto result_ele_type = |
| result.value()->getType().cast<TensorType>().getElementType(); |
| if (auto user = dyn_cast_or_null<Q>(*result.value()->user_begin())) { |
| outputs_replaced.insert({user.output(), result.index()}); |
| output_types.push_back(user.getType()); |
| } else if (result_ele_type.template isa<IntegerType>()) { |
| // If the result is an integer tensor, then it doesn't require the |
| // D op in the pattern. |
| outputs_replaced.insert({result.value(), result.index()}); |
| output_types.push_back(result_ele_type); |
| } else { |
| return matchFailure(); |
| } |
| } |
| |
| // Use OpBuilder so we can use op name to create the new op. |
| OpBuilder builder(quantized_op); |
| OperationState new_state(quantized_op->getLoc(), |
| quantized_op->getName().getStringRef(), inputs, |
| output_types, quantized_op->getAttrs()); |
| Operation* new_op = builder.createOperation(new_state); |
| for (auto output : outputs_replaced) { |
| output.getFirst()->replaceAllUsesWith( |
| new_op->getResult(output.getSecond())); |
| } |
| return matchSuccess(); |
| } |
| }; |
| |
| // Converts the min/max/storage_type/narrow_range information to a |
| // QuantizedType, and then returns the attribute containing the QuantizedType. |
| TypeAttr GetQuantizedTypeAttr(Builder builder, Type input_type, FloatAttr min, |
| FloatAttr max, Type storage_type, |
| bool narrow_range = false, |
| bool is_signed = false); |
| |
| // Converts the min/max/num_bits/narrow_range information to a |
| // QuantizedType, and then returns the attribute containing the QuantizedType. |
| // Note that this method assumes an unsigned quantization type, which is |
| // implicitly defined by FakeQuant* ops in TensorFlow. |
| TypeAttr GetQuantizedTypeAttr(Builder builder, Type input_type, Attribute min, |
| Attribute max, IntegerAttr num_bits, |
| BoolAttr narrow_range); |
| |
| // Casts the `target` type to a quantized type by using the quantization |
| // parameters from the type in the `source` type attribute. |
| // Examples: |
| // f32 -> !quant.uniform<i8:f32, 1.0> |
| // tensor<4xf32> -> tensor<4x!quant.uniform<i8:f32, 1.0>> |
| // The result is wrapped by a type attribute. Returns nullptr if the cast isn't |
| // valid. |
| TypeAttr CastQuantizedTypeAttrFromExpressedType(Builder builder, |
| TypeAttr source, Type target); |
| |
| // Quantizes the elements in the attribute `real_value` by the quantization |
| // parameters in `tensor_type`. Returns empty Attribute if the |
| // `tensor_type` is not a QuantizedType or the quantization fails. |
| ElementsAttr Quantize(Attribute real_value, Type tensor_type); |
| |
| // Returns the quantized type for an element attribute. The quantization |
| // parameters in this type is based on the min and max element of the attribute. |
| // When the elements in the `attr` are not in floating-point, or the value range |
| // isn't straddling zero, an empty type is returned. |
| Type GetUniformQuantizedTypeForElementsAttr(ElementsAttr attr, |
| unsigned storage_type_width, |
| bool is_sign, bool narrow_range); |
| |
| // Returns the quantized type of a bias input, given the quantized types of |
| // other operands which are multiply-accumulated (the bias is added to the |
| // accumulated value). |
| quant::QuantizedType GetUniformQuantizedTypeForBias( |
| const std::vector<quant::QuantizedType>& op_types); |
| |
| // Propagates quantization parameters across ops in this function and satisfy |
| // the quantization specification of the ops. This methods assumes the initial |
| // quantization parameters are stored as adjacent quantize and dequantize ops |
| // and the propagation results are materialized by inserting pairs of quantize |
| // and dequantize ops to this function. |
| void ApplyQuantizationParamsPropagation(mlir::FuncOp func, bool is_signed, |
| OpQuantSpecGetter op_quant_spec_getter); |
| |
| } // end namespace TFL |
| } // end namespace mlir |
| |
| #endif // TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_QUANTIZATION_UTILS_H_ |