| /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==============================================================================*/ |
| #include <stdint.h> |
| |
| #include <algorithm> |
| #include <cmath> |
| #include <limits> |
| #include <vector> |
| |
| #include "tensorflow/lite/c/builtin_op_data.h" |
| #include "tensorflow/lite/c/common.h" |
| #include "tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.h" |
| #include "tensorflow/lite/delegates/hexagon/hexagon_nn/hexagon_nn.h" |
| #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h" |
| #include "tensorflow/lite/kernels/kernel_util.h" |
| |
| namespace tflite { |
| namespace delegates { |
| namespace hexagon { |
| namespace { |
| |
| constexpr uint8_t k8BitSignFlipConstant = 0x80; |
| // 1/1024 ~ 0.0009766 is a restriction set by Hexagon's kernels. |
| // TODO(b/151103818): Figure out a way to retrieve this constant reliably. |
| constexpr float kHexagonMinRelativeScale = 0.0009766f; |
| |
| } // namespace |
| |
| TfLiteStatus ProcessPerChannelQuantizedWeights( |
| const TfLiteIntArray* inputs, const TfLiteIntArray* outputs, |
| TfLiteContext* context, float* weights_min, float* weights_max, |
| GraphBuilder* graph_builder, PerChannelQuantData* per_channel_quant) { |
| if (!per_channel_quant) return kTfLiteError; |
| const auto& weights_tensor = context->tensors[inputs->data[1]]; |
| TfLiteAffineQuantization* weights_quant_params = |
| reinterpret_cast<TfLiteAffineQuantization*>( |
| weights_tensor.quantization.params); |
| |
| // Retrieve channel scales. |
| per_channel_quant->num_scale_values = weights_quant_params->scale->size; |
| // Normalize the scales as expected by Hexagon. |
| per_channel_quant->scales_data = weights_quant_params->scale->data; |
| std::vector<float> normalized_scales; |
| normalized_scales.reserve(per_channel_quant->num_scale_values); |
| float scale_max = 0.0; |
| for (int i = 0; i < per_channel_quant->num_scale_values; ++i) { |
| normalized_scales.push_back(per_channel_quant->scales_data[i]); |
| if (per_channel_quant->scales_data[i] > scale_max) { |
| scale_max = per_channel_quant->scales_data[i]; |
| } |
| } |
| if (scale_max == 0.0) { |
| TF_LITE_KERNEL_LOG(context, "Scale max is zero for: %s", |
| weights_tensor.name); |
| return kTfLiteError; |
| } |
| for (int i = 0; i < per_channel_quant->num_scale_values; ++i) { |
| normalized_scales[i] = |
| std::max(normalized_scales[i] / scale_max, kHexagonMinRelativeScale); |
| } |
| // Add node for channel scales data. |
| const std::vector<int> scales_shape = {1, 1, 1, |
| per_channel_quant->num_scale_values}; |
| per_channel_quant->channel_scales_node = graph_builder->AddConstNodeWithData( |
| scales_shape.data(), reinterpret_cast<char*>(normalized_scales.data()), |
| normalized_scales.size() * sizeof(normalized_scales[0])); |
| *weights_min = -128 * scale_max; |
| *weights_max = 127 * scale_max; |
| return kTfLiteOk; |
| } |
| |
| TfLiteStatus ProcessPerChannelQuantizedBias( |
| const TfLiteIntArray* inputs, const TfLiteIntArray* outputs, |
| TfLiteContext* context, float* bias_min, float* bias_max, |
| GraphBuilder* graph_builder, PerChannelQuantData* per_channel_quant, |
| OpBuilder** bias_const_node) { |
| const auto& bias_tensor = context->tensors[inputs->data[2]]; |
| |
| const TfLiteAffineQuantization* input_quant_params = |
| static_cast<const TfLiteAffineQuantization*>( |
| context->tensors[inputs->data[0]].quantization.params); |
| const float input_scale = input_quant_params->scale->data[0]; |
| // Now dequantize bias values to float first, to adjust for the |
| // normalization of channel scales. |
| auto* bias_data = bias_tensor.data.i32; |
| const int bias_size = NumElements(&bias_tensor); |
| if (bias_size != per_channel_quant->num_scale_values) { |
| TF_LITE_KERNEL_LOG( |
| context, "Bias/channel scales number mismatch for bias tensor: %s", |
| bias_tensor.name); |
| return kTfLiteError; |
| } |
| std::vector<float> dequantized_bias; |
| dequantized_bias.reserve(bias_size); |
| for (int i = 0; i < bias_size; ++i) { |
| const float dequantized_value = |
| bias_data[i] * input_scale * per_channel_quant->scales_data[i]; |
| const float abs_dequantized_value = std::abs(dequantized_value); |
| if (abs_dequantized_value > *bias_max) { |
| *bias_max = abs_dequantized_value; |
| } |
| dequantized_bias.push_back(dequantized_value); |
| } |
| *bias_max = *bias_max * 8; |
| *bias_min = -1 * *bias_max; |
| // Now requantize the bias values to the new min/max values. |
| std::vector<int> preprocessed_bias_data; |
| preprocessed_bias_data.reserve(per_channel_quant->num_scale_values); |
| for (int i = 0; i < bias_size; ++i) { |
| preprocessed_bias_data.push_back(static_cast<int>( |
| std::round(std::pow(2, 31) * (dequantized_bias[i] / *bias_max)))); |
| } |
| // Add nodes for bias. |
| const std::vector<int> bias_shape = {1, 1, 1, bias_size}; |
| auto* bias_data_node = graph_builder->AddConstNodeWithData( |
| bias_shape.data(), reinterpret_cast<char*>(preprocessed_bias_data.data()), |
| preprocessed_bias_data.size() * sizeof(preprocessed_bias_data[0])); |
| if (bias_const_node) { |
| *bias_const_node = bias_data_node; |
| } |
| graph_builder->AddTensorWithID(inputs->data[2], bias_data_node->GetID(), 0, |
| /*overwrite=*/true); |
| return kTfLiteOk; |
| } |
| |
| TfLiteStatus Conv2dOpBuilder::InitializeWeightsNodes( |
| const TfLiteIntArray* inputs, const TfLiteIntArray* outputs, |
| TfLiteContext* context, const int input_depth) { |
| const std::vector<int> quant_bound_shape = {1, 1, 1, 1}; |
| |
| const auto& weights_tensor = context->tensors[inputs->data[1]]; |
| if (weights_tensor.allocation_type != kTfLiteMmapRo) { |
| TF_LITE_KERNEL_LOG( |
| context, "Weights tensor doesn't have correct allocation type: %s", |
| weights_tensor.name); |
| return kTfLiteError; |
| } |
| int weights_batch_size, weights_height_size, weights_width_size, |
| weights_depth_size; |
| // Hexagon lib expects the weight tensor in HWCN, TFLite uses NHWC. |
| // Transpose NHWC -> HWCN |
| GetDims(&weights_batch_size, &weights_height_size, &weights_width_size, |
| &weights_depth_size, weights_tensor.dims); |
| |
| // Weights tensor could be int8 even for per-tensor quantization. |
| // Therefore, we look at the number of scale values to check if it is |
| // per-channel quantized. |
| TfLiteAffineQuantization* weights_quant_params = |
| reinterpret_cast<TfLiteAffineQuantization*>( |
| weights_tensor.quantization.params); |
| const bool is_per_channel_quant = weights_quant_params->scale->size > 1; |
| |
| // WEIGHTS DATA. |
| OpBuilder* weights_data_node = nullptr; |
| if (op_node_.op_type == OP_Supernode_8x8p32to8) { |
| // Hexagon lib expects the weight tensor in HWCN, TFLite uses NHWC. |
| // Transpose NHWC -> HWCN |
| weight_shape_ = {weights_height_size, weights_width_size, |
| weights_depth_size, weights_batch_size}; |
| RuntimeShape nhwc_shape({weights_batch_size, weights_height_size, |
| weights_width_size, weights_depth_size}); |
| RuntimeShape hwcn_shape({weights_height_size, weights_width_size, |
| weights_depth_size, weights_batch_size}); |
| std::vector<uint8_t> hwcn(NumElements(&weights_tensor)); |
| TransposeParams transpose_params; |
| transpose_params.perm_count = 4; |
| transpose_params.perm[0] = 1; |
| transpose_params.perm[1] = 2; |
| transpose_params.perm[2] = 3; |
| transpose_params.perm[3] = 0; |
| // TODO(b/151103818): Try merging Transpose & bit flip. |
| if (weights_tensor.type == kTfLiteInt8) { |
| optimized_ops::Transpose<int8_t>(transpose_params, nhwc_shape, |
| weights_tensor.data.int8, hwcn_shape, |
| reinterpret_cast<int8_t*>(hwcn.data())); |
| // Flip bits on the weight values so that the int8 values are treated |
| // as uint8. |
| for (int i = 0; i < hwcn.size(); ++i) { |
| hwcn[i] = hwcn[i] ^ k8BitSignFlipConstant; |
| } |
| } else { |
| optimized_ops::Transpose<uint8_t>(transpose_params, nhwc_shape, |
| weights_tensor.data.uint8, hwcn_shape, |
| hwcn.data()); |
| } |
| weights_data_node = graph_builder_->AddConstNodeWithData( |
| weight_shape_.data(), reinterpret_cast<char*>(hwcn.data()), |
| hwcn.size() * sizeof(hwcn[0])); |
| } else if (op_node_.op_type == OP_DepthwiseSupernode_8x8p32to8) { |
| // Hexagon treats depthwise conv like tf.nn.depthwise_conv2d, where the |
| // expected filter shape is [fh,fw,din,dmul]. |
| // The data itself will remain the same, since TFLite's representation is |
| // just a 'flattening' of Hexagon's version. |
| const int channel_multiplier = weights_depth_size / input_depth; |
| weight_shape_ = {weights_height_size, weights_width_size, input_depth, |
| channel_multiplier}; |
| |
| if (weights_tensor.type == kTfLiteInt8) { |
| // Flip bits on the weight values so that the int8 values are treated |
| // as uint8. |
| std::vector<uint8_t> converted_data(NumElements(&weights_tensor)); |
| for (int i = 0; i < converted_data.size(); ++i) { |
| converted_data[i] = weights_tensor.data.int8[i] ^ k8BitSignFlipConstant; |
| } |
| weights_data_node = graph_builder_->AddConstNodeWithData( |
| weight_shape_.data(), reinterpret_cast<char*>(converted_data.data()), |
| converted_data.size() * sizeof(converted_data[0])); |
| } else { |
| weights_data_node = graph_builder_->AddConstNodeWithData( |
| weight_shape_.data(), weights_tensor.data.raw, |
| NumElements(&weights_tensor) * sizeof(weights_tensor.data.uint8[0])); |
| } |
| } |
| graph_builder_->AddTensorWithID(inputs->data[1], weights_data_node->GetID(), |
| 0, /*overwrite=*/true); |
| |
| // WEIGHTS QUANTIZATION. |
| float weights_min = 0; |
| float weights_max = 0; |
| if (is_per_channel_quant) { |
| ProcessPerChannelQuantizedWeights(inputs, outputs, context, &weights_min, |
| &weights_max, graph_builder_, |
| &per_channel_quant_); |
| } else { |
| TF_LITE_ENSURE_STATUS(ComputeMinAndMaxQuantValues( |
| weights_tensor, &weights_min, &weights_max)); |
| } |
| weights_min_node_ = graph_builder_->AddConstNodeWithData( |
| quant_bound_shape.data(), reinterpret_cast<char*>(&weights_min), |
| sizeof(weights_min)); |
| weights_max_node_ = graph_builder_->AddConstNodeWithData( |
| quant_bound_shape.data(), reinterpret_cast<char*>(&weights_max), |
| sizeof(weights_max)); |
| |
| return kTfLiteOk; |
| } |
| |
| TfLiteStatus Conv2dOpBuilder::InitializeBiasNodes(const TfLiteIntArray* inputs, |
| const TfLiteIntArray* outputs, |
| TfLiteContext* context) { |
| const std::vector<int> quant_bound_shape = {1, 1, 1, 1}; |
| |
| const auto& bias_tensor = context->tensors[inputs->data[2]]; |
| |
| float bias_min = 0; |
| float bias_max = 0; |
| if (per_channel_quant_.channel_scales_node != nullptr) { |
| ProcessPerChannelQuantizedBias(inputs, outputs, context, &bias_min, |
| &bias_max, graph_builder_, |
| &per_channel_quant_); |
| } else { |
| auto* bias_data_node = |
| graph_builder_->AddConstNodeWithData(inputs->data[2], bias_tensor); |
| graph_builder_->AddTensorWithID(inputs->data[2], bias_data_node->GetID(), 0, |
| /*overwrite=*/true); |
| TF_LITE_ENSURE_STATUS( |
| ComputeMinAndMaxQuantValues(bias_tensor, &bias_min, &bias_max)); |
| } |
| |
| bias_min_node_ = graph_builder_->AddConstNodeWithData( |
| quant_bound_shape.data(), reinterpret_cast<char*>(&bias_min), |
| sizeof(bias_min)); |
| bias_max_node_ = graph_builder_->AddConstNodeWithData( |
| quant_bound_shape.data(), reinterpret_cast<char*>(&bias_max), |
| sizeof(bias_max)); |
| |
| return kTfLiteOk; |
| } |
| |
| } // namespace hexagon |
| } // namespace delegates |
| } // namespace tflite |