Implements device-independent fake quantization functors and adds unit tests for symmetric fake quantization.
diff --git a/tensorflow/core/kernels/fake_quant_ops_functor.h b/tensorflow/core/kernels/fake_quant_ops_functor.h
index 045a96a..d3fd25c 100644
--- a/tensorflow/core/kernels/fake_quant_ops_functor.h
+++ b/tensorflow/core/kernels/fake_quant_ops_functor.h
@@ -87,13 +87,15 @@
float nudged_min, nudged_max, nudged_scale;
Nudge(min, max, quant_min, quant_max, &nudged_min, &nudged_max,
&nudged_scale);
+
const float inv_nudged_scale = 1.0f / nudged_scale;
+ const float quant_zero = floor(-nudged_min * inv_nudged_scale + 0.5f);
auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min);
auto clamped_shifted = clamped - nudged_min;
outputs.device(d) =
- (clamped_shifted * inv_nudged_scale + 0.5f).floor() * nudged_scale +
- nudged_min;
+ (clamped_shifted * inv_nudged_scale - quant_zero + 0.5f).floor() *
+ nudged_scale;
}
};
@@ -138,13 +140,17 @@
float nudged_min, nudged_max, nudged_scale;
Nudge(min_val, max_val, quant_min, quant_max, &nudged_min, &nudged_max,
&nudged_scale);
+
+ const float inv_nudged_scale = 1.0f / nudged_scale;
+ const float quant_zero = floor(-nudged_min * inv_nudged_scale + 0.5f);
const auto nudged_scale_repl = inputs.constant(nudged_scale);
+ const auto inv_nudged_scale_repl = inputs.constant(inv_nudged_scale);
const auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min);
const auto clamped_shifted = clamped - nudged_min;
- outputs.device(d) = (clamped_shifted / nudged_scale_repl + 0.5f).floor() *
- nudged_scale_repl +
- nudged_min;
+ outputs.device(d) =
+ (clamped_shifted * inv_nudged_scale_repl - quant_zero + 0.5f).floor() *
+ nudged_scale_repl;
}
};
@@ -212,13 +218,17 @@
float nudged_min, nudged_max, nudged_scale;
Nudge(min_val, max_val, quant_min, quant_max, &nudged_min, &nudged_max,
&nudged_scale);
+
+ const float inv_nudged_scale = 1.0f / nudged_scale;
+ const float quant_zero = floor(-nudged_min * inv_nudged_scale + 0.5f);
+
const auto clamped =
inputs.chip<1>(i).cwiseMin(nudged_max).cwiseMax(nudged_min);
const auto clamped_shifted = clamped - nudged_min;
outputs.chip<1>(i).device(d) =
- (clamped_shifted / nudged_scale + 0.5f).floor() * nudged_scale +
- nudged_min;
+ (clamped_shifted * inv_nudged_scale - quant_zero + 0.5f).floor() *
+ nudged_scale;
}
}
};
diff --git a/tensorflow/core/kernels/fake_quant_ops_test.cc b/tensorflow/core/kernels/fake_quant_ops_test.cc
index 5f62bc3..e22a383 100644
--- a/tensorflow/core/kernels/fake_quant_ops_test.cc
+++ b/tensorflow/core/kernels/fake_quant_ops_test.cc
@@ -53,8 +53,17 @@
void RunTestFakeQuantWithMinMaxArgs(const int num_bits,
const bool narrow_range, const float min,
const float max, const TensorShape& shape,
- const gtl::ArraySlice<float> data,
- gtl::ArraySlice<float> expected_data) {
+ const gtl::ArraySlice<float>& data,
+ gtl::ArraySlice<float> expected_data,
+ const double atol = -1.0,
+ const double rtol = -1.0,
+ const DeviceType device = DEVICE_CPU) {
+ if (device == DEVICE_GPU) {
+ SetDevice(device,
+ std::unique_ptr<tensorflow::Device>(DeviceFactory::NewDevice(
+ "GPU", {}, "/job:a/replica:0/task:0")));
+ }
+
TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxArgs")
.Input(FakeInput(DT_FLOAT)) // inputs
.Attr("min", min)
@@ -70,16 +79,26 @@
TF_ASSERT_OK(RunOpKernel());
Tensor* output = GetOutput(0);
+ TF_EXPECT_OK(device_->Sync());
Tensor expected(allocator(), DT_FLOAT, shape);
FillValues<float>(&expected, expected_data);
- ExpectClose(expected, *output);
+ ExpectClose(expected, *output, atol, rtol);
}
void RunTestFakeQuantWithMinMaxVars(const int num_bits,
const bool narrow_range, const float min,
const float max, const TensorShape& shape,
- const gtl::ArraySlice<float> data,
- gtl::ArraySlice<float> expected_data) {
+ const gtl::ArraySlice<float>& data,
+ gtl::ArraySlice<float> expected_data,
+ const double atol = -1.0,
+ const double rtol = -1.0,
+ const DeviceType device = DEVICE_CPU) {
+ if (device == DEVICE_GPU) {
+ SetDevice(device,
+ std::unique_ptr<tensorflow::Device>(DeviceFactory::NewDevice(
+ "GPU", {}, "/job:a/replica:0/task:0")));
+ }
+
TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVars")
.Input(FakeInput(DT_FLOAT)) // inputs
.Input(FakeInput(DT_FLOAT)) // min
@@ -101,14 +120,22 @@
Tensor* output = GetOutput(0);
Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3}));
FillValues<float>(&expected, expected_data);
- ExpectClose(expected, *output);
+ ExpectClose(expected, *output, atol, rtol);
}
void RunTestFakeQuantWithMinMaxVarsPerChannel(
const int num_bits, const bool narrow_range,
- const TensorShape& minmax_shape, const gtl::ArraySlice<float> min,
- const gtl::ArraySlice<float> max, const TensorShape& shape,
- const gtl::ArraySlice<float> data, gtl::ArraySlice<float> expected_data) {
+ const TensorShape& minmax_shape, const gtl::ArraySlice<float>& min,
+ const gtl::ArraySlice<float>& max, const TensorShape& shape,
+ const gtl::ArraySlice<float>& data, gtl::ArraySlice<float> expected_data,
+ const double atol = -1.0, const double rtol = -1.0,
+ const DeviceType device = DEVICE_CPU) {
+ if (device == DEVICE_GPU) {
+ SetDevice(device,
+ std::unique_ptr<tensorflow::Device>(DeviceFactory::NewDevice(
+ "GPU", {}, "/job:a/replica:0/task:0")));
+ }
+
TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannel")
.Input(FakeInput(DT_FLOAT)) // inputs
.Input(FakeInput(DT_FLOAT)) // min
@@ -130,10 +157,54 @@
Tensor* output = GetOutput(0);
Tensor expected(allocator(), DT_FLOAT, shape);
FillValues<float>(&expected, expected_data);
- ExpectClose(expected, *output);
+ ExpectClose(expected, *output, atol, rtol);
}
};
+TEST_F(QuantOpsTest, WithArgsSymmetricRangeZeroInput_RegularRange) {
+ // Original quantization range: [-10, 10], scale: 20/255.
+ // Original zero point: 127.5, nudged zero point 128.0.
+ // Expected quantized values: 0.0.
+ RunTestFakeQuantWithMinMaxArgs(8, false, -10.0f, 10.0f, TensorShape({2, 3}),
+ {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+ {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0,
+ 0.0);
+}
+
+#if GOOGLE_CUDA
+TEST_F(QuantOpsTest, WithArgsSymmetricRangeZeroInput_RegularRange_Gpu) {
+ // Original quantization range: [-10, 10], scale: 20/255.
+ // Original zero point: 127.5, nudged zero point 128.0.
+ // Expected quantized values: 0.0.
+ RunTestFakeQuantWithMinMaxArgs(8, false, -10.0f, 10.0f, TensorShape({2, 3}),
+ {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+ {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0, 0.0,
+ DEVICE_GPU);
+}
+#endif
+
+TEST_F(QuantOpsTest, WithArgsSymmetricRangeZeroInput_NarrowRange) {
+ // Original quantization range: [-10, 10], scale: 20/254.
+ // Original zero point: 128., no nudging necessary.
+ // Expected quantized values: 0.0.
+ RunTestFakeQuantWithMinMaxArgs(8, true, -10.0f, 10.0f, TensorShape({2, 3}),
+ {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+ {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0,
+ 0.0);
+}
+
+#if GOOGLE_CUDA
+TEST_F(QuantOpsTest, WithArgsSymmetricRangeZeroInput_NarrowRange_Gpu) {
+ // Original quantization range: [-10, 10], scale: 20/254.
+ // Original zero point: 128., no nudging necessary.
+ // Expected quantized values: 0.0.
+ RunTestFakeQuantWithMinMaxArgs(8, true, -10.0f, 10.0f, TensorShape({2, 3}),
+ {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+ {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0, 0.0,
+ DEVICE_GPU);
+}
+#endif
+
TEST_F(QuantOpsTest, WithArgsNoNudging_RegularRange) {
// Original quantization range: [-10 + 0 / 4, -10 + 255 / 4], scale: 1/4.
// Original zero point: 40, no nudging necessary.
@@ -481,6 +552,50 @@
{0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
}
+TEST_F(QuantOpsTest, WithVarsSymmetricRangeZeroInput_RegularRange) {
+ // Original quantization range: [-10, 10], scale: 20/255.
+ // Original zero point: 127.5, nudged zero point 128.
+ // Expected quantized values: 0.
+ RunTestFakeQuantWithMinMaxVars(8, false, -10.0f, 10.0f, TensorShape({2, 3}),
+ {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+ {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0,
+ 0.0);
+}
+
+#if GOOGLE_CUDA
+TEST_F(QuantOpsTest, WithVarsSymmetricRangeZeroInput_RegularRange_Gpu) {
+ // Original quantization range: [-10, 10], scale: 20/255.
+ // Original zero point: 127.5, nudged zero point 128.
+ // Expected quantized values: 0.
+ RunTestFakeQuantWithMinMaxVars(8, false, -10.0f, 10.0f, TensorShape({2, 3}),
+ {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+ {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0, 0.0,
+ DEVICE_GPU);
+}
+#endif
+
+TEST_F(QuantOpsTest, WithVarsSymmetricRangeZeroInput_NarrowRange) {
+ // Original quantization range: [-10, 10], scale: 20/254.
+ // Original zero point: 128., no nudging necessary.
+ // Expected quantized values: 0.
+ RunTestFakeQuantWithMinMaxVars(8, true, -10.0f, 10.0f, TensorShape({2, 3}),
+ {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+ {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0,
+ 0.0);
+}
+
+#if GOOGLE_CUDA
+TEST_F(QuantOpsTest, WithVarsSymmetricRangeZeroInput_NarrowRange_Gpu) {
+ // Original quantization range: [-10, 10], scale: 20/254.
+ // Original zero point: 128., no nudging necessary.
+ // Expected quantized values: 0.
+ RunTestFakeQuantWithMinMaxVars(8, true, -10.0f, 10.0f, TensorShape({2, 3}),
+ {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+ {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0, 0.0,
+ DEVICE_GPU);
+}
+#endif
+
TEST_F(QuantOpsTest, WithVarsNoNudging_RegularRange) {
// Original quantization range: [-10 + 0 / 4, -10 + 255 / 4], scale: 1/4.
// Original zero point: 40, no nudging necessary.
@@ -868,6 +983,52 @@
{0.0f, 0.0f, 0.0f, 0.0f});
}
+TEST_F(QuantOpsTest, WithVarsPerChannelSymmetricRangeZeroInput_RegularRange) {
+ // Original quantization range: [-10, 10], scale: 20/255.
+ // Original zero point: 127.5, nudged zero point 128.0.
+ // Expected quantized values: 0.
+ RunTestFakeQuantWithMinMaxVarsPerChannel(
+ 8, false, TensorShape({4}), {-10.0f, -10.0f, -10.0f, -10.0f},
+ {10.0f, 10.0f, 10.0f, 10.0f}, TensorShape({4}), {0.0f, 0.0f, 0.0f, 0.0f},
+ {0.0f, 0.0f, 0.0f, 0.0f}, 0.0, 0.0);
+}
+
+#if GOOGLE_CUDA
+TEST_F(QuantOpsTest,
+ WithVarsPerChannelSymmetricRangeZeroInput_RegularRange_Gpu) {
+ // Original quantization range: [-10, 10], scale: 20/255.
+ // Original zero point: 127.5, nudged zero point 128.0.
+ // Expected quantized values: 0.
+ RunTestFakeQuantWithMinMaxVarsPerChannel(
+ 8, false, TensorShape({4}), {-10.0f, -10.0f, -10.0f, -10.0f},
+ {10.0f, 10.0f, 10.0f, 10.0f}, TensorShape({4}), {0.0f, 0.0f, 0.0f, 0.0f},
+ {0.0f, 0.0f, 0.0f, 0.0f}, 0.0, 0.0, DEVICE_GPU);
+}
+#endif
+
+TEST_F(QuantOpsTest, WithVarsPerChannelSymmetricRangeZeroInput_NarrowRange) {
+ // Original quantization range: [-10, 10], scale: 20/254.
+ // Original zero point: 128.0, no nudging necessary.
+ // Expected quantized values: 0.
+ RunTestFakeQuantWithMinMaxVarsPerChannel(
+ 8, true, TensorShape({4}), {-10.0f, -10.0f, -10.0f, -10.0f},
+ {10.0f, 10.0f, 10.0f, 10.0f}, TensorShape({4}), {0.0f, 0.0f, 0.0f, 0.0f},
+ {0.0f, 0.0f, 0.0f, 0.0f}, 0.0, 0.0);
+}
+
+#if GOOGLE_CUDA
+TEST_F(QuantOpsTest,
+ WithVarsPerChannelSymmetricRangeZeroInput_NarrowRange_Gpu) {
+ // Original quantization range: [-10, 10], scale: 20/254.
+ // Original zero point: 128.0, no nudging necessary.
+ // Expected quantized values: 0.
+ RunTestFakeQuantWithMinMaxVarsPerChannel(
+ 8, true, TensorShape({4}), {-10.0f, -10.0f, -10.0f, -10.0f},
+ {10.0f, 10.0f, 10.0f, 10.0f}, TensorShape({4}), {0.0f, 0.0f, 0.0f, 0.0f},
+ {0.0f, 0.0f, 0.0f, 0.0f}, 0.0, 0.0, DEVICE_GPU);
+}
+#endif
+
TEST_F(QuantOpsTest, WithVarsPerChannelDim1NudgedDown_RegularRange) {
// Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4].
// Scale: 1/4, original zero point: 0.4, nudged to 0.