Implements device-independent fake quantization functors and adds unit tests for symmetric fake quantization.

commit: 638140a045d35acc978115919d6d3b846e73cb3b [log] [tgz]
author: Philipp Hack <phack@nvidia.com> Mon Jun 28 20:59:09 2021 +0000
committer: Philipp Hack <phack@nvidia.com> Mon Jun 28 20:59:09 2021 +0000
tree: 64525d29aecf4ccc80a2d19aa8d81f5886c43af8
parent: 81457e8917b1ce923eba19873a8b6578e4fb0cd3 [diff]
diff --git a/tensorflow/core/kernels/fake_quant_ops_functor.h b/tensorflow/core/kernels/fake_quant_ops_functor.h
index 045a96a..d3fd25c 100644
--- a/tensorflow/core/kernels/fake_quant_ops_functor.h
+++ b/tensorflow/core/kernels/fake_quant_ops_functor.h

@@ -87,13 +87,15 @@
     float nudged_min, nudged_max, nudged_scale;
     Nudge(min, max, quant_min, quant_max, &nudged_min, &nudged_max,
           &nudged_scale);
+
     const float inv_nudged_scale = 1.0f / nudged_scale;
+    const float quant_zero = floor(-nudged_min * inv_nudged_scale + 0.5f);
 
     auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min);
     auto clamped_shifted = clamped - nudged_min;
     outputs.device(d) =
-        (clamped_shifted * inv_nudged_scale + 0.5f).floor() * nudged_scale +
-        nudged_min;
+        (clamped_shifted * inv_nudged_scale - quant_zero + 0.5f).floor() *
+        nudged_scale;
   }
 };
 
@@ -138,13 +140,17 @@
     float nudged_min, nudged_max, nudged_scale;
     Nudge(min_val, max_val, quant_min, quant_max, &nudged_min, &nudged_max,
           &nudged_scale);
+
+    const float inv_nudged_scale = 1.0f / nudged_scale;
+    const float quant_zero = floor(-nudged_min * inv_nudged_scale + 0.5f);
     const auto nudged_scale_repl = inputs.constant(nudged_scale);
+    const auto inv_nudged_scale_repl = inputs.constant(inv_nudged_scale);
 
     const auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min);
     const auto clamped_shifted = clamped - nudged_min;
-    outputs.device(d) = (clamped_shifted / nudged_scale_repl + 0.5f).floor() *
-                            nudged_scale_repl +
-                        nudged_min;
+    outputs.device(d) =
+        (clamped_shifted * inv_nudged_scale_repl - quant_zero + 0.5f).floor() *
+        nudged_scale_repl;
   }
 };
 
@@ -212,13 +218,17 @@
       float nudged_min, nudged_max, nudged_scale;
       Nudge(min_val, max_val, quant_min, quant_max, &nudged_min, &nudged_max,
             &nudged_scale);
+
+      const float inv_nudged_scale = 1.0f / nudged_scale;
+      const float quant_zero = floor(-nudged_min * inv_nudged_scale + 0.5f);
+
       const auto clamped =
           inputs.chip<1>(i).cwiseMin(nudged_max).cwiseMax(nudged_min);
       const auto clamped_shifted = clamped - nudged_min;
 
       outputs.chip<1>(i).device(d) =
-          (clamped_shifted / nudged_scale + 0.5f).floor() * nudged_scale +
-          nudged_min;
+          (clamped_shifted * inv_nudged_scale - quant_zero + 0.5f).floor() *
+          nudged_scale;
     }
   }
 };

diff --git a/tensorflow/core/kernels/fake_quant_ops_test.cc b/tensorflow/core/kernels/fake_quant_ops_test.cc
index 5f62bc3..e22a383 100644
--- a/tensorflow/core/kernels/fake_quant_ops_test.cc
+++ b/tensorflow/core/kernels/fake_quant_ops_test.cc

@@ -53,8 +53,17 @@
   void RunTestFakeQuantWithMinMaxArgs(const int num_bits,
                                       const bool narrow_range, const float min,
                                       const float max, const TensorShape& shape,
-                                      const gtl::ArraySlice<float> data,
-                                      gtl::ArraySlice<float> expected_data) {
+                                      const gtl::ArraySlice<float>& data,
+                                      gtl::ArraySlice<float> expected_data,
+                                      const double atol = -1.0,
+                                      const double rtol = -1.0,
+                                      const DeviceType device = DEVICE_CPU) {
+    if (device == DEVICE_GPU) {
+      SetDevice(device,
+                std::unique_ptr<tensorflow::Device>(DeviceFactory::NewDevice(
+                    "GPU", {}, "/job:a/replica:0/task:0")));
+    }
+
     TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxArgs")
                      .Input(FakeInput(DT_FLOAT))  // inputs
                      .Attr("min", min)
@@ -70,16 +79,26 @@
     TF_ASSERT_OK(RunOpKernel());
 
     Tensor* output = GetOutput(0);
+    TF_EXPECT_OK(device_->Sync());
     Tensor expected(allocator(), DT_FLOAT, shape);
     FillValues<float>(&expected, expected_data);
-    ExpectClose(expected, *output);
+    ExpectClose(expected, *output, atol, rtol);
   }
 
   void RunTestFakeQuantWithMinMaxVars(const int num_bits,
                                       const bool narrow_range, const float min,
                                       const float max, const TensorShape& shape,
-                                      const gtl::ArraySlice<float> data,
-                                      gtl::ArraySlice<float> expected_data) {
+                                      const gtl::ArraySlice<float>& data,
+                                      gtl::ArraySlice<float> expected_data,
+                                      const double atol = -1.0,
+                                      const double rtol = -1.0,
+                                      const DeviceType device = DEVICE_CPU) {
+    if (device == DEVICE_GPU) {
+      SetDevice(device,
+                std::unique_ptr<tensorflow::Device>(DeviceFactory::NewDevice(
+                    "GPU", {}, "/job:a/replica:0/task:0")));
+    }
+
     TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVars")
                      .Input(FakeInput(DT_FLOAT))  // inputs
                      .Input(FakeInput(DT_FLOAT))  // min
@@ -101,14 +120,22 @@
     Tensor* output = GetOutput(0);
     Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3}));
     FillValues<float>(&expected, expected_data);
-    ExpectClose(expected, *output);
+    ExpectClose(expected, *output, atol, rtol);
   }
 
   void RunTestFakeQuantWithMinMaxVarsPerChannel(
       const int num_bits, const bool narrow_range,
-      const TensorShape& minmax_shape, const gtl::ArraySlice<float> min,
-      const gtl::ArraySlice<float> max, const TensorShape& shape,
-      const gtl::ArraySlice<float> data, gtl::ArraySlice<float> expected_data) {
+      const TensorShape& minmax_shape, const gtl::ArraySlice<float>& min,
+      const gtl::ArraySlice<float>& max, const TensorShape& shape,
+      const gtl::ArraySlice<float>& data, gtl::ArraySlice<float> expected_data,
+      const double atol = -1.0, const double rtol = -1.0,
+      const DeviceType device = DEVICE_CPU) {
+    if (device == DEVICE_GPU) {
+      SetDevice(device,
+                std::unique_ptr<tensorflow::Device>(DeviceFactory::NewDevice(
+                    "GPU", {}, "/job:a/replica:0/task:0")));
+    }
+
     TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannel")
                      .Input(FakeInput(DT_FLOAT))  // inputs
                      .Input(FakeInput(DT_FLOAT))  // min
@@ -130,10 +157,54 @@
     Tensor* output = GetOutput(0);
     Tensor expected(allocator(), DT_FLOAT, shape);
     FillValues<float>(&expected, expected_data);
-    ExpectClose(expected, *output);
+    ExpectClose(expected, *output, atol, rtol);
   }
 };
 
+TEST_F(QuantOpsTest, WithArgsSymmetricRangeZeroInput_RegularRange) {
+  // Original quantization range: [-10, 10], scale: 20/255.
+  // Original zero point: 127.5, nudged zero point 128.0.
+  // Expected quantized values: 0.0.
+  RunTestFakeQuantWithMinMaxArgs(8, false, -10.0f, 10.0f, TensorShape({2, 3}),
+                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0,
+                                 0.0);
+}
+
+#if GOOGLE_CUDA
+TEST_F(QuantOpsTest, WithArgsSymmetricRangeZeroInput_RegularRange_Gpu) {
+  // Original quantization range: [-10, 10], scale: 20/255.
+  // Original zero point: 127.5, nudged zero point 128.0.
+  // Expected quantized values: 0.0.
+  RunTestFakeQuantWithMinMaxArgs(8, false, -10.0f, 10.0f, TensorShape({2, 3}),
+                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0, 0.0,
+                                 DEVICE_GPU);
+}
+#endif
+
+TEST_F(QuantOpsTest, WithArgsSymmetricRangeZeroInput_NarrowRange) {
+  // Original quantization range: [-10, 10], scale: 20/254.
+  // Original zero point: 128., no nudging necessary.
+  // Expected quantized values: 0.0.
+  RunTestFakeQuantWithMinMaxArgs(8, true, -10.0f, 10.0f, TensorShape({2, 3}),
+                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0,
+                                 0.0);
+}
+
+#if GOOGLE_CUDA
+TEST_F(QuantOpsTest, WithArgsSymmetricRangeZeroInput_NarrowRange_Gpu) {
+  // Original quantization range: [-10, 10], scale: 20/254.
+  // Original zero point: 128., no nudging necessary.
+  // Expected quantized values: 0.0.
+  RunTestFakeQuantWithMinMaxArgs(8, true, -10.0f, 10.0f, TensorShape({2, 3}),
+                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0, 0.0,
+                                 DEVICE_GPU);
+}
+#endif
+
 TEST_F(QuantOpsTest, WithArgsNoNudging_RegularRange) {
   // Original quantization range: [-10 + 0 / 4, -10 + 255 / 4], scale: 1/4.
   // Original zero point: 40, no nudging necessary.
@@ -481,6 +552,50 @@
                                  {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
 }
 
+TEST_F(QuantOpsTest, WithVarsSymmetricRangeZeroInput_RegularRange) {
+  // Original quantization range: [-10, 10], scale: 20/255.
+  // Original zero point: 127.5, nudged zero point 128.
+  // Expected quantized values: 0.
+  RunTestFakeQuantWithMinMaxVars(8, false, -10.0f, 10.0f, TensorShape({2, 3}),
+                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0,
+                                 0.0);
+}
+
+#if GOOGLE_CUDA
+TEST_F(QuantOpsTest, WithVarsSymmetricRangeZeroInput_RegularRange_Gpu) {
+  // Original quantization range: [-10, 10], scale: 20/255.
+  // Original zero point: 127.5, nudged zero point 128.
+  // Expected quantized values: 0.
+  RunTestFakeQuantWithMinMaxVars(8, false, -10.0f, 10.0f, TensorShape({2, 3}),
+                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0, 0.0,
+                                 DEVICE_GPU);
+}
+#endif
+
+TEST_F(QuantOpsTest, WithVarsSymmetricRangeZeroInput_NarrowRange) {
+  // Original quantization range: [-10, 10], scale: 20/254.
+  // Original zero point: 128., no nudging necessary.
+  // Expected quantized values: 0.
+  RunTestFakeQuantWithMinMaxVars(8, true, -10.0f, 10.0f, TensorShape({2, 3}),
+                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0,
+                                 0.0);
+}
+
+#if GOOGLE_CUDA
+TEST_F(QuantOpsTest, WithVarsSymmetricRangeZeroInput_NarrowRange_Gpu) {
+  // Original quantization range: [-10, 10], scale: 20/254.
+  // Original zero point: 128., no nudging necessary.
+  // Expected quantized values: 0.
+  RunTestFakeQuantWithMinMaxVars(8, true, -10.0f, 10.0f, TensorShape({2, 3}),
+                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0, 0.0,
+                                 DEVICE_GPU);
+}
+#endif
+
 TEST_F(QuantOpsTest, WithVarsNoNudging_RegularRange) {
   // Original quantization range: [-10 + 0 / 4, -10 + 255 / 4], scale: 1/4.
   // Original zero point: 40, no nudging necessary.
@@ -868,6 +983,52 @@
       {0.0f, 0.0f, 0.0f, 0.0f});
 }
 
+TEST_F(QuantOpsTest, WithVarsPerChannelSymmetricRangeZeroInput_RegularRange) {
+  // Original quantization range: [-10, 10], scale: 20/255.
+  // Original zero point: 127.5, nudged zero point 128.0.
+  // Expected quantized values: 0.
+  RunTestFakeQuantWithMinMaxVarsPerChannel(
+      8, false, TensorShape({4}), {-10.0f, -10.0f, -10.0f, -10.0f},
+      {10.0f, 10.0f, 10.0f, 10.0f}, TensorShape({4}), {0.0f, 0.0f, 0.0f, 0.0f},
+      {0.0f, 0.0f, 0.0f, 0.0f}, 0.0, 0.0);
+}
+
+#if GOOGLE_CUDA
+TEST_F(QuantOpsTest,
+       WithVarsPerChannelSymmetricRangeZeroInput_RegularRange_Gpu) {
+  // Original quantization range: [-10, 10], scale: 20/255.
+  // Original zero point: 127.5, nudged zero point 128.0.
+  // Expected quantized values: 0.
+  RunTestFakeQuantWithMinMaxVarsPerChannel(
+      8, false, TensorShape({4}), {-10.0f, -10.0f, -10.0f, -10.0f},
+      {10.0f, 10.0f, 10.0f, 10.0f}, TensorShape({4}), {0.0f, 0.0f, 0.0f, 0.0f},
+      {0.0f, 0.0f, 0.0f, 0.0f}, 0.0, 0.0, DEVICE_GPU);
+}
+#endif
+
+TEST_F(QuantOpsTest, WithVarsPerChannelSymmetricRangeZeroInput_NarrowRange) {
+  // Original quantization range: [-10, 10], scale: 20/254.
+  // Original zero point: 128.0, no nudging necessary.
+  // Expected quantized values: 0.
+  RunTestFakeQuantWithMinMaxVarsPerChannel(
+      8, true, TensorShape({4}), {-10.0f, -10.0f, -10.0f, -10.0f},
+      {10.0f, 10.0f, 10.0f, 10.0f}, TensorShape({4}), {0.0f, 0.0f, 0.0f, 0.0f},
+      {0.0f, 0.0f, 0.0f, 0.0f}, 0.0, 0.0);
+}
+
+#if GOOGLE_CUDA
+TEST_F(QuantOpsTest,
+       WithVarsPerChannelSymmetricRangeZeroInput_NarrowRange_Gpu) {
+  // Original quantization range: [-10, 10], scale: 20/254.
+  // Original zero point: 128.0, no nudging necessary.
+  // Expected quantized values: 0.
+  RunTestFakeQuantWithMinMaxVarsPerChannel(
+      8, true, TensorShape({4}), {-10.0f, -10.0f, -10.0f, -10.0f},
+      {10.0f, 10.0f, 10.0f, 10.0f}, TensorShape({4}), {0.0f, 0.0f, 0.0f, 0.0f},
+      {0.0f, 0.0f, 0.0f, 0.0f}, 0.0, 0.0, DEVICE_GPU);
+}
+#endif
+
 TEST_F(QuantOpsTest, WithVarsPerChannelDim1NudgedDown_RegularRange) {
   // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4].
   // Scale: 1/4,  original zero point: 0.4, nudged to 0.
commit	638140a045d35acc978115919d6d3b846e73cb3b	[log] [tgz]
author	Philipp Hack <phack@nvidia.com>	Mon Jun 28 20:59:09 2021 +0000
committer	Philipp Hack <phack@nvidia.com>	Mon Jun 28 20:59:09 2021 +0000
tree	64525d29aecf4ccc80a2d19aa8d81f5886c43af8
parent	81457e8917b1ce923eba19873a8b6578e4fb0cd3 [diff]