Ensure that histogram observers have zero-point of zero for post ReLU activations (#37107)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/37107

Currently histogram observers relax both the min and max values of the activations for performance speedup reasons. This causes an issue for glow where there is a slow down if the zero-point is not zero for post ReLU activations.
ghstack-source-id: 102768017

Test Plan: buck test caffe2/test:quantization -- 'test_histogram_observer_one_sided \(quantization\.test_quantization\.RecordHistogramObserverTest\)' --print-passing-details

Differential Revision: D21187636

fbshipit-source-id: 8d616b9e9caf2979a26a215e99434f71025e3d8b
diff --git a/test/quantization/test_quantize.py b/test/quantization/test_quantize.py
index f935151..f32ab8b 100644
--- a/test/quantization/test_quantize.py
+++ b/test/quantization/test_quantize.py
@@ -1780,3 +1780,18 @@
         self.assertEqual(myobs.histogram, loaded_obs.histogram)
         self.assertEqual(myobs.bins, loaded_obs.bins)
         self.assertEqual(myobs.calculate_qparams(), loaded_obs.calculate_qparams())
+
+    def test_histogram_observer_one_sided(self):
+        myobs = HistogramObserver(bins=8, dtype=torch.quint8, qscheme=torch.per_tensor_affine, reduce_range=True)
+        x = torch.tensor([0.0, 0.3, 1.2, 1.7])
+        y = torch.tensor([0.1, 1.3, 2.0, 2.7])
+        myobs(x)
+        myobs(y)
+        self.assertEqual(myobs.min_val, 0)
+        qparams = myobs.calculate_qparams()
+        self.assertEqual(qparams[1].item(), 0)
+
+
+
+if __name__ == '__main__':
+    run_tests()
diff --git a/torch/quantization/observer.py b/torch/quantization/observer.py
index 4509c40..c124e07 100644
--- a/torch/quantization/observer.py
+++ b/torch/quantization/observer.py
@@ -843,8 +843,9 @@
         hist_bin_width = (self.max_val - self.min_val) / (self.bins * upsample_rate)
         downsample_rate = torch.ceil((combined_max - combined_min) / (self.bins * hist_bin_width)).to(torch.int).item()
         e = downsample_rate * (self.bins * hist_bin_width) - (combined_max - combined_min)
-        combined_max = combined_max + e / 2
-        combined_min = combined_min - e / 2
+        # Relax only the max, not the min, so that for one sided distributions, min stays at zero
+        combined_max = combined_max + e
+        combined_min = combined_min
         start_idx = torch.round((self.min_val - combined_min) / hist_bin_width).to(torch.int).item()
         return combined_min, combined_max, downsample_rate, start_idx