Add an utility method to decompose a double to two integers
PiperOrigin-RevId: 305991934
Change-Id: I049ec90a4730144deb2f2620aad7fc3fe98ea9ba
diff --git a/tensorflow/compiler/mlir/lite/quantization/BUILD b/tensorflow/compiler/mlir/lite/quantization/BUILD
index a63a1e4..22b4d36 100644
--- a/tensorflow/compiler/mlir/lite/quantization/BUILD
+++ b/tensorflow/compiler/mlir/lite/quantization/BUILD
@@ -1,4 +1,4 @@
-load("//tensorflow:tensorflow.bzl", "tf_native_cc_binary")
+load("//tensorflow:tensorflow.bzl", "tf_cc_test", "tf_native_cc_binary")
load(
"//tensorflow/core/platform:build_config.bzl",
"tf_proto_library",
@@ -116,6 +116,12 @@
)
cc_library(
+ name = "numerical_utils",
+ srcs = ["numerical_utils.cc"],
+ hdrs = ["numerical_utils.h"],
+)
+
+cc_library(
name = "device_target",
srcs = ["device_target.cc"],
hdrs = ["device_target.h"],
@@ -142,3 +148,12 @@
"@llvm-project//mlir:Support",
],
)
+
+tf_cc_test(
+ name = "numerical_utils_test",
+ srcs = ["numerical_utils_test.cc"],
+ deps = [
+ ":numerical_utils",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
diff --git a/tensorflow/compiler/mlir/lite/quantization/numerical_utils.cc b/tensorflow/compiler/mlir/lite/quantization/numerical_utils.cc
new file mode 100644
index 0000000..3abd0c6
--- /dev/null
+++ b/tensorflow/compiler/mlir/lite/quantization/numerical_utils.cc
@@ -0,0 +1,59 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/compiler/mlir/lite/quantization/numerical_utils.h"
+
+#include <assert.h>
+
+#include <cmath>
+#include <limits>
+
+namespace mlir {
+namespace quant {
+
+// This method is adopted from TFLite:
+// ["tensorflow/lite/kernels/internal/quantization_util.cc"]
+QuantizedMultiplier QuantizeMultiplier(double double_multiplier) {
+ if (double_multiplier < 1e-6) {
+ return {0, 0};
+ }
+
+ int32_t shift;
+ const double q = std::frexp(double_multiplier, &shift);
+ auto q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
+ assert(q_fixed <= (1ll << 31));
+ if (q_fixed == (1ll << 31)) {
+ q_fixed /= 2;
+ ++shift;
+ }
+ assert(q_fixed <= std::numeric_limits<int32_t>::max());
+ // A shift amount smaller than -31 would cause all bits to be shifted out
+ // and thus all results would be zero. We implement that instead with
+ // q_fixed==0, so as to avoid hitting issues with right-shift
+ // operations with shift amounts greater than 31. Note that this happens
+ // roughly when abs(double_multiplier) < 2^-31 and the present handling means
+ // that we're effectively flushing tiny double_multiplier's to zero.
+ // We could conceivably handle values in the range (roughly) [32, 63]
+ // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
+ // the present handling is just doing 'flush denormals to zero'. We could
+ // reconsider and actually generate nonzero denormals if a need arises.
+ if (shift < -31) {
+ shift = 0;
+ q_fixed = 0;
+ }
+ return {static_cast<int32_t>(q_fixed), shift};
+}
+
+} // namespace quant
+} // namespace mlir
diff --git a/tensorflow/compiler/mlir/lite/quantization/numerical_utils.h b/tensorflow/compiler/mlir/lite/quantization/numerical_utils.h
new file mode 100644
index 0000000..3f12f2c
--- /dev/null
+++ b/tensorflow/compiler/mlir/lite/quantization/numerical_utils.h
@@ -0,0 +1,35 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_NUMERICAL_UTILS_H_
+#define TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_NUMERICAL_UTILS_H_
+
+#include <cstdint>
+#include <utility>
+
+namespace mlir {
+namespace quant {
+
+using QuantizedMultiplier = std::pair<int32_t, int32_t>;
+
+// Decompose double precision multiplier to integer multiplier and exponent.
+// double_multiplier = int_multiplier * 2 ^ (-31 + exponent)
+// int_multiplier will be range of (2^31, 2^30].
+QuantizedMultiplier QuantizeMultiplier(double double_multiplier);
+
+} // namespace quant
+} // namespace mlir
+
+#endif // TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_NUMERICAL_UTILS_H_
diff --git a/tensorflow/compiler/mlir/lite/quantization/numerical_utils_test.cc b/tensorflow/compiler/mlir/lite/quantization/numerical_utils_test.cc
new file mode 100644
index 0000000..6f436ad
--- /dev/null
+++ b/tensorflow/compiler/mlir/lite/quantization/numerical_utils_test.cc
@@ -0,0 +1,55 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/mlir/lite/quantization/numerical_utils.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace mlir {
+namespace quant {
+
+namespace {
+
+double ComposeScale(const QuantizedMultiplier& input) {
+ return input.first * std::exp2(-31 + input.second);
+}
+
+TEST(DecomposeScale, QuantizeMultiplier) {
+ // Decompose multiplier larger than 1.
+ ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e6)), 1.0e6);
+ ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e3)), 1.0e3);
+ ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(10.)), 10.);
+ ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(5.)), 5.);
+ ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(2.)), 2.);
+
+ // Decompose multiplier between 1.0 and 1e-6.
+ ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(0.0)), 0.0);
+ ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0)), 1.0);
+ ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-1)), 1.0e-1);
+ ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-2)), 1.0e-2);
+ ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-3)), 1.0e-3);
+ ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-4)), 1.0e-4);
+ ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-5)), 1.0e-5);
+ ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-6)), 1.0e-6);
+
+ // When scale is smaller than 1.0e-6, it is decomposed to {0, 0}.
+ ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-7)), 0.0);
+ ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-8)), 0.0);
+}
+
+} // namespace
+} // namespace quant
+} // namespace mlir