| /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==============================================================================*/ |
| |
| // This transformation pass convert dense tensor to sparse format. |
| |
| #include "absl/memory/memory.h" |
| #include "third_party/eigen3/Eigen/Core" |
| #include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project |
| #include "mlir/IR/Attributes.h" // from @llvm-project |
| #include "mlir/IR/Builders.h" // from @llvm-project |
| #include "mlir/IR/BuiltinTypes.h" // from @llvm-project |
| #include "mlir/Pass/Pass.h" // from @llvm-project |
| #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h" |
| #include "tensorflow/lite/kernels/internal/utils/sparsity_format_converter.h" |
| |
| //===----------------------------------------------------------------------===// |
| // The DenseToSparse Pass. |
| // |
| namespace mlir { |
| namespace TFL { |
| |
| namespace { |
| // If sparsity level is below this threshold, keep the tensor in dense format. |
| constexpr float kMinSparsityLevel = 0.3; |
| // Heuristic to check if a block configuration is correct for float constants. |
| constexpr float kBlockOverRandomSparsityRatio = 0.9; |
| // After quantization, some non-zero values are set to 0. |
| // Lower the ratio for identifying block configuration for quantized constants. |
| constexpr float kBlockOverRandomSparsityRatioQuant = 0.8; |
| |
| Eigen::half APFloatToEigenHalf(const APFloat& val) { |
| uint16_t raw_data = val.bitcastToAPInt().getZExtValue(); |
| return Eigen::numext::bit_cast<Eigen::half>(raw_data); |
| } |
| |
| APFloat EigenHalfToAPFloat(const Eigen::half& val) { |
| uint16_t raw_data = Eigen::numext::bit_cast<uint16_t>(val); |
| return APFloat(APFloat::IEEEhalf(), APInt(16, raw_data)); |
| } |
| |
| void PopulateEncodingParams(const std::vector<int>& block_size, |
| std::vector<int>* traversal_order, |
| std::vector<TfLiteDimensionType>* format, |
| std::vector<int>* b_map, std::vector<int>* b_size) { |
| const int dims_count = block_size.size(); |
| traversal_order->resize(dims_count); |
| format->resize(dims_count); |
| for (int i = 0; i < dims_count; i++) { |
| (*traversal_order)[i] = i; |
| } |
| for (int i = 0; i < dims_count - 1; i++) { |
| (*format)[i] = kTfLiteDimDense; |
| } |
| (*format)[dims_count - 1] = kTfLiteDimSparseCSR; |
| *b_map = {}; |
| *b_size = {}; |
| int block_rank = 0; |
| for (int i = 0; i < dims_count; i++) { |
| if (block_size[i] != 1) { |
| traversal_order->push_back(block_rank + dims_count); |
| format->push_back(kTfLiteDimDense); |
| block_rank++; |
| b_map->push_back(i); |
| b_size->push_back(block_size[i]); |
| } |
| } |
| } |
| |
| inline float GetSparsity(const int num_zeros, const int num_elements) { |
| return (1.0 * num_zeros / num_elements); |
| } |
| |
| float CalculateRandomSparsity(const ElementsAttr& attr, |
| const ShapedType& type) { |
| int num_elements = type.getNumElements(); |
| int num_zeros = 0; |
| |
| if (type.getElementType().isa<FloatType>()) { |
| for (const auto val : attr.getValues<APFloat>()) { |
| if (val.isZero()) { |
| num_zeros++; |
| } |
| } |
| } else if (type.getElementType().isa<quant::QuantizedType>()) { |
| for (const auto val : attr.getValues<int8_t>()) { |
| if (val == 0) { |
| num_zeros++; |
| } |
| } |
| } |
| |
| return GetSparsity(num_zeros, num_elements); |
| } |
| |
| float CalculateBlockSparsity(const ElementsAttr& attr, const ShapedType& type, |
| const std::vector<int>& block_size) { |
| float sparsity = 0; |
| std::vector<int> shape(2); |
| shape[0] = type.getDimSize(0); |
| shape[1] = type.getDimSize(1); |
| |
| std::vector<int> traversal_order = {}; |
| std::vector<TfLiteDimensionType> format = {}; |
| std::vector<int> b_size = {}; |
| std::vector<int> b_map = {}; |
| PopulateEncodingParams(block_size, &traversal_order, &format, &b_map, |
| &b_size); |
| |
| if (type.getElementType().isF32()) { |
| tflite::internal::sparsity::FormatConverter<float> format_converter( |
| shape, traversal_order, format, b_size, b_map); |
| std::vector<float> data; |
| data.reserve(type.getNumElements()); |
| for (const auto val : attr.getValues<float>()) data.push_back(val); |
| format_converter.DenseToSparse(data.data()); |
| sparsity = |
| GetSparsity(type.getNumElements() - format_converter.GetData().size(), |
| type.getNumElements()); |
| } else if (type.getElementType().isF16()) { |
| tflite::internal::sparsity::FormatConverter<Eigen::half> format_converter( |
| shape, traversal_order, format, b_size, b_map); |
| std::vector<Eigen::half> data; |
| data.reserve(type.getNumElements()); |
| for (const auto& val : attr.getValues<APFloat>()) |
| data.push_back(APFloatToEigenHalf(val)); |
| format_converter.DenseToSparse(data.data()); |
| sparsity = |
| GetSparsity(type.getNumElements() - format_converter.GetData().size(), |
| type.getNumElements()); |
| } else if (type.getElementType().isa<quant::QuantizedType>()) { |
| tflite::internal::sparsity::FormatConverter<int8_t> format_converter( |
| shape, traversal_order, format, b_size, b_map); |
| std::vector<int8_t> data; |
| data.reserve(type.getNumElements()); |
| for (const auto val : attr.getValues<int8_t>()) data.push_back(val); |
| format_converter.DenseToSparse(data.data()); |
| sparsity = |
| GetSparsity(type.getNumElements() - format_converter.GetData().size(), |
| type.getNumElements()); |
| } |
| |
| return sparsity; |
| } |
| |
| typedef struct InspectResult { |
| // Whether the weight tensor is sparse enough to be compressed. |
| bool can_compress; |
| // If the weight tensor cannot be encoded in a block configuration that the op |
| // supports, a Densify() op will be inserted afterwards to fall back to dense |
| // execution. |
| bool needs_densify; |
| // Among the supported block configs of an op, which got selected to encode |
| // the sparse weight. |
| std::vector<int> selected_block_size; |
| } InspectResult; |
| |
| InspectResult InspectWeight( |
| Operation* inst, const std::vector<std::vector<int>>& supported_block_size, |
| const float ratio_threshold) { |
| ElementsAttr attr; |
| ShapedType type; |
| InspectResult result = {}; |
| if (auto cst = dyn_cast<ConstOp>(inst)) { |
| attr = cst.value(); |
| type = cst.getType().cast<ShapedType>(); |
| } else if (auto cst = dyn_cast<QConstOp>(inst)) { |
| attr = cst.value(); |
| type = cst.getType().cast<ShapedType>(); |
| } else { |
| result.can_compress = false; |
| return result; |
| } |
| |
| // Currently we only support compressing weights of ops: |
| // Conv, DepthwiseConv, TransposeConv, whose filter has rank 4, and |
| // FullyConnected, whose filter has rank 2. |
| if (type.getRank() != 2 && type.getRank() != 4) { |
| result.can_compress = false; |
| return result; |
| } |
| |
| float random_sparsity = CalculateRandomSparsity(attr, type); |
| if (random_sparsity < kMinSparsityLevel) { |
| result.can_compress = false; |
| return result; |
| } |
| |
| result.can_compress = true; |
| |
| float curr_sparsity = 0; |
| std::vector<int> selected_block_size; |
| result.needs_densify = true; |
| for (const auto& block_size : supported_block_size) { |
| curr_sparsity = CalculateBlockSparsity(attr, type, block_size); |
| if (curr_sparsity / random_sparsity > ratio_threshold) { |
| selected_block_size = block_size; |
| result.can_compress = true; |
| result.needs_densify = false; |
| result.selected_block_size = selected_block_size; |
| break; |
| } |
| } |
| |
| return result; |
| } |
| |
| template <typename T> |
| std::vector<T> BuildSparsityParameterAttribute( |
| const std::vector<int>& block_size, const T* dense_buffer, Operation* inst, |
| OpBuilder* builder, SparsityParameterAttr* s_param) { |
| ElementsAttr attr; |
| ShapedType type; |
| if (auto cst = dyn_cast<ConstOp>(inst)) { |
| attr = cst.value(); |
| type = cst.getType().cast<ShapedType>(); |
| } else if (auto cst = dyn_cast<QConstOp>(inst)) { |
| attr = cst.value(); |
| type = cst.getType().cast<ShapedType>(); |
| } else { |
| assert(false && "Expected a constant-like op"); |
| } |
| const int dims_count = type.getRank(); |
| std::vector<int> shape(dims_count); |
| for (int i = 0; i < dims_count; i++) { |
| shape[i] = type.getDimSize(i); |
| } |
| |
| std::vector<int> traversal_order = {}; |
| std::vector<TfLiteDimensionType> format = {}; |
| std::vector<int> b_size = {}; |
| std::vector<int> b_map = {}; |
| PopulateEncodingParams(block_size, &traversal_order, &format, &b_map, |
| &b_size); |
| |
| tflite::internal::sparsity::FormatConverter<T> format_converter( |
| shape, traversal_order, format, b_size, b_map); |
| format_converter.DenseToSparse(dense_buffer); |
| const auto& metadata = format_converter.GetDimMetadata(); |
| const auto& compressed_data = format_converter.GetData(); |
| const int dim_size = metadata.size() / 2; |
| std::vector<Attribute> dim_metadata(traversal_order.size()); |
| for (int i = 0; i < dim_size; i++) { |
| if (format[i] == kTfLiteDimDense) { |
| dim_metadata[i] = DimensionMetadataAttr::get( |
| ::mlir::TFL::DimensionTypeAttr::get( |
| builder->getContext(), ::mlir::TFL::DimensionType::DENSE), |
| builder->getI32IntegerAttr(metadata[2 * i][0]), |
| builder->getArrayAttr({}), builder->getArrayAttr({}), |
| builder->getContext()); |
| } else { |
| dim_metadata[i] = DimensionMetadataAttr::get( |
| ::mlir::TFL::DimensionTypeAttr::get( |
| builder->getContext(), ::mlir::TFL::DimensionType::SPARSE_CSR), |
| builder->getI32IntegerAttr(0), |
| builder->getI32ArrayAttr(metadata[2 * i]), |
| builder->getI32ArrayAttr(metadata[2 * i + 1]), builder->getContext()); |
| } |
| } |
| *s_param = SparsityParameterAttr::get( |
| builder->getI32ArrayAttr(traversal_order), |
| builder->getI32ArrayAttr(b_map), builder->getArrayAttr(dim_metadata), |
| builder->getContext()); |
| |
| return compressed_data; |
| } |
| |
| // This pass encodes sparse weights in the model in the proper format, and adds |
| // Densify() op if necessary. The general algorithm is: |
| // 1. Get list of operands (weights) of an op that can be sparse. |
| // 2. Get list of supported block configurations of the op. |
| // 3. Calculate random sparsity of the weight. |
| // 3.1. If sparsity level is below the encoding threshold, keep in dense. |
| // 3.2. If sparsity level is above the encoding threshold, go to 4. |
| // 4. Try to encode the weight with supported block configurations. If the |
| // weight was pruned with the same block config, the blocked sparsity level |
| // should match the random sparsity. |
| // 4.1. Return the matching block config if found. |
| // 4.2. If no matching block config is found, encode the weight with random |
| // sparsity, and add Densify() op to fall back to dense execution. |
| struct DenseToSparse |
| : public PassWrapper<DenseToSparse, OperationPass<func::FuncOp>> { |
| MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(DenseToSparse) |
| |
| void runOnOperation() override; |
| |
| StringRef getArgument() const final { |
| // This is the argument used to refer to the pass in |
| // the textual format (on the commandline for example). |
| return "tfl-dense-to-sparse"; |
| } |
| StringRef getDescription() const final { |
| // This is a brief description of the pass. |
| return "Convert dense tensor to sparse format."; |
| } |
| }; |
| |
| void DenseToSparse::runOnOperation() { |
| func::FuncOp func = getOperation(); |
| OpBuilder builder(func); |
| |
| func.walk([&](SparseOpInterface sparse_op) { |
| const auto& sparse_operands = sparse_op.GetSparseOperands(); |
| std::vector<std::vector<int>> supported_block_size; |
| for (int operand : sparse_operands) { |
| auto* op = sparse_op.getOperation(); |
| auto value = op->getOperand(operand); |
| |
| auto* inst = value.getDefiningOp(); |
| if (!inst) { |
| continue; |
| } |
| |
| // There could be a Dequantize op after the weight tensor in cases like |
| // fp16 post-training quantization. We need to get the weight from the |
| // input of the Dequantize op. |
| if (isa<DequantizeOp>(inst)) { |
| op = inst; |
| value = inst->getOperand(0); |
| inst = value.getDefiningOp(); |
| if (!inst) { |
| continue; |
| } |
| operand = 0; |
| } |
| |
| ShapedType type; |
| float ratio_threshold = kBlockOverRandomSparsityRatio; |
| if (isa<ConstOp>(inst)) { |
| supported_block_size = sparse_op.GetFloatBlockSize(); |
| type = dyn_cast<ConstOp>(inst).getType().cast<ShapedType>(); |
| } else if (isa<QConstOp>(inst)) { |
| supported_block_size = sparse_op.GetQuantizedBlockSize(); |
| type = dyn_cast<QConstOp>(inst).getType().cast<ShapedType>(); |
| ratio_threshold = kBlockOverRandomSparsityRatioQuant; |
| } else { |
| continue; |
| } |
| |
| InspectResult result = |
| InspectWeight(inst, supported_block_size, ratio_threshold); |
| if (!result.can_compress) { |
| continue; |
| } |
| |
| // The weight is not block sparse. Encode with random sparsity. |
| if (result.selected_block_size.empty()) { |
| result.selected_block_size = std::vector<int>(type.getRank(), 1); |
| } |
| |
| builder.setInsertionPoint(op); |
| SparsityParameterAttr s_param; |
| if (auto cst = dyn_cast<ConstOp>(inst)) { |
| auto attr = cst.value(); |
| auto type = cst.getType().cast<ShapedType>(); |
| if (type.getElementType().isF32()) { |
| std::vector<float> dense_data; |
| dense_data.reserve(type.getNumElements()); |
| for (const auto val : attr.getValues<float>()) |
| dense_data.push_back(val); |
| std::vector<float> compressed_data = |
| BuildSparsityParameterAttribute<float>(result.selected_block_size, |
| dense_data.data(), inst, |
| &builder, &s_param); |
| auto compressed_data_type = RankedTensorType::get( |
| {static_cast<int64_t>(compressed_data.size())}, |
| builder.getF32Type()); |
| auto new_value = DenseElementsAttr::get<float>(compressed_data_type, |
| compressed_data); |
| auto s_const = builder.create<SparseConstOp>( |
| op->getLoc(), cst.value(), s_param, new_value); |
| value.replaceAllUsesWith(s_const.getResult()); |
| cst.erase(); |
| } else if (type.getElementType().isF16()) { |
| std::vector<Eigen::half> dense_data; |
| dense_data.reserve(type.getNumElements()); |
| for (const auto& val : attr.getValues<APFloat>()) |
| dense_data.push_back(APFloatToEigenHalf(val)); |
| std::vector<Eigen::half> compressed_data = |
| BuildSparsityParameterAttribute<Eigen::half>( |
| result.selected_block_size, dense_data.data(), inst, &builder, |
| &s_param); |
| std::vector<APFloat> apfloat_data; |
| apfloat_data.reserve(type.getNumElements()); |
| for (const auto& val : compressed_data) |
| apfloat_data.push_back(EigenHalfToAPFloat(val)); |
| auto compressed_data_type = RankedTensorType::get( |
| {static_cast<int64_t>(compressed_data.size())}, |
| type.getElementType()); |
| auto new_value = |
| DenseElementsAttr::get(compressed_data_type, apfloat_data); |
| auto s_const = builder.create<SparseConstOp>( |
| op->getLoc(), cst.value(), s_param, new_value); |
| value.replaceAllUsesWith(s_const.getResult()); |
| cst.erase(); |
| } |
| } else if (auto cst = dyn_cast<QConstOp>(inst)) { |
| auto attr = cst.value(); |
| auto type = cst.getType().cast<ShapedType>(); |
| std::vector<int8_t> dense_data; |
| dense_data.reserve(type.getNumElements()); |
| for (const auto& val : attr.getValues<int8_t>()) |
| dense_data.push_back(val); |
| std::vector<int8_t> compressed_data = |
| BuildSparsityParameterAttribute<int8_t>(result.selected_block_size, |
| dense_data.data(), inst, |
| &builder, &s_param); |
| auto compressed_data_type = RankedTensorType::get( |
| {static_cast<int64_t>(compressed_data.size())}, |
| builder.getIntegerType(8, true)); |
| auto new_value = DenseElementsAttr::get<int8_t>(compressed_data_type, |
| compressed_data); |
| auto s_qconst = builder.create<SparseQConstOp>( |
| op->getLoc(), cst.qtypeAttr(), cst.value(), s_param, new_value); |
| value.replaceAllUsesWith(s_qconst.getResult()); |
| cst.erase(); |
| } |
| |
| if (result.needs_densify) { |
| const auto value = op->getOperand(operand); |
| auto densify = |
| builder.create<DensifyOp>(op->getLoc(), value.getType(), value); |
| value.replaceAllUsesWith(densify); |
| densify.setOperand(value); |
| } |
| } |
| }); |
| } |
| |
| } // namespace |
| |
| // Creates an instance of the TensorFlow Lite dialect DenseToSparse pass. |
| std::unique_ptr<OperationPass<func::FuncOp>> CreateDenseToSparsePass() { |
| return absl::make_unique<DenseToSparse>(); |
| } |
| |
| static PassRegistration<DenseToSparse> pass; |
| |
| } // namespace TFL |
| } // namespace mlir |