Unify the usage of Dequantize (#15685)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/15685

The declaration of "Dequantize" is in "fbsource/fbcode/deeplearning/fbgemm2/QuantUtils.h", so it requires the "namespace fbgemm".

<T> is actually optional, since the type can de deduced from the first argument.

In some places we have "Dequantize<T>(...)", while in other places we have "Dequantize(...)". We'd better unify them. As a reference, all occurrences of "Quantize" are using "fbgemm::Quantize<T>(...)".

Reviewed By: jspark1105

Differential Revision: D13570847

fbshipit-source-id: 7fca9f7f9e4e0d9e5eb27ac44b8707adc3c80717
diff --git a/caffe2/quantization/server/conv_pool_dnnlowp_op_base.h b/caffe2/quantization/server/conv_pool_dnnlowp_op_base.h
index 9b553d5..1b511af 100644
--- a/caffe2/quantization/server/conv_pool_dnnlowp_op_base.h
+++ b/caffe2/quantization/server/conv_pool_dnnlowp_op_base.h
@@ -82,7 +82,7 @@
       actual = OutputTensorCPU_(0)->template data<float>();
     } else {
       actual_temp.resize(OutputTensorCPU_(0)->numel());
-      Dequantize(
+      fbgemm::Dequantize<T>(
           OutputTensorCPU_(0)->template data<T>(),
           actual_temp.data(),
           OutputTensorCPU_(0)->numel(),
@@ -105,7 +105,7 @@
 
   void RunOnDeviceEpilogue_() {
     if (dequantize_output_) {
-      Dequantize(
+      fbgemm::Dequantize<T>(
           out_temp_.data(),
           OutputTensorCPU_(0)->template mutable_data<float>(),
           OutputTensorCPU_(0)->size(),
diff --git a/caffe2/quantization/server/dequantize_dnnlowp_op.cc b/caffe2/quantization/server/dequantize_dnnlowp_op.cc
index cd8362e..6d81e27 100644
--- a/caffe2/quantization/server/dequantize_dnnlowp_op.cc
+++ b/caffe2/quantization/server/dequantize_dnnlowp_op.cc
@@ -24,7 +24,7 @@
 
   CAFFE_ENFORCE(input.template IsType<T>());
   Output(0)->ResizeLike(input);
-  Dequantize(
+  fbgemm::Dequantize<T>(
       input.template data<T>(),
       Output(0)->template mutable_data<float>(),
       input.numel(),
diff --git a/caffe2/quantization/server/dnnlowp_op.h b/caffe2/quantization/server/dnnlowp_op.h
index b67fa1a..72d4005 100644
--- a/caffe2/quantization/server/dnnlowp_op.h
+++ b/caffe2/quantization/server/dnnlowp_op.h
@@ -130,7 +130,7 @@
       actual = OutputTensorCPU_(0)->template data<float>();
     } else {
       actual_temp.resize(OutputTensorCPU_(0)->numel());
-      Dequantize(
+      fbgemm::Dequantize<float>(
           OutputTensorCPU_(0)->template data<float>(),
           actual_temp.data(),
           OutputTensorCPU_(0)->numel(),
@@ -151,7 +151,7 @@
 
   void RunOnDeviceEpilogue_() {
     if (dequantize_output_) {
-      Dequantize(
+      fbgemm::Dequantize<T>(
           out_temp_.data(),
           OutputTensorCPU_(0)->template mutable_data<float>(),
           OutputTensorCPU_(0)->numel(),
diff --git a/caffe2/quantization/server/op_wrapper.h b/caffe2/quantization/server/op_wrapper.h
index c2bab60..20ea7b9 100644
--- a/caffe2/quantization/server/op_wrapper.h
+++ b/caffe2/quantization/server/op_wrapper.h
@@ -41,7 +41,7 @@
         // model loading when we're running a shadow operator in fp32 for
         // example for measuring quantization error.
         float_tensor->ResizeLike(qtensor);
-        Dequantize(
+        fbgemm::Dequantize<T>(
             qtensor.data<T>(),
             float_tensor->template mutable_data<float>(),
             qtensor.numel(),
diff --git a/caffe2/quantization/server/relu_dnnlowp_op.cc b/caffe2/quantization/server/relu_dnnlowp_op.cc
index f8bf0a0..0e5b3f6 100644
--- a/caffe2/quantization/server/relu_dnnlowp_op.cc
+++ b/caffe2/quantization/server/relu_dnnlowp_op.cc
@@ -65,7 +65,7 @@
   // If input was not quantized, output should be dequantized because ReLU
   // can be inplace.
   if (!X.template IsType<T>()) {
-    Dequantize(
+    fbgemm::Dequantize<T>(
         Y_data, Y->template mutable_data<float>(), Y->numel(), in_qparams);
   }
 
diff --git a/caffe2/quantization/server/sigmoid_test.cc b/caffe2/quantization/server/sigmoid_test.cc
index 2a9f053..a4b1c95 100644
--- a/caffe2/quantization/server/sigmoid_test.cc
+++ b/caffe2/quantization/server/sigmoid_test.cc
@@ -23,8 +23,8 @@
       uint8_t x_q = fbgemm::Quantize<uint8_t>(
           x, sigmoid_approx.GetInputQuantizationParams());
       uint8_t y_q = sigmoid_approx.Compute(x_q);
-      float y =
-          fbgemm::Dequantize(y_q, sigmoid_approx.GetOutputQuantizationParams());
+      float y = fbgemm::Dequantize<uint8_t>(
+          y_q, sigmoid_approx.GetOutputQuantizationParams());
       float sigmoid = exp(x) / (exp(x) + 1);
       float err = fabs(sigmoid - y);
       sq_err_sum += err * err;
diff --git a/caffe2/quantization/server/tanh_test.cc b/caffe2/quantization/server/tanh_test.cc
index 1a02b9c..15aa929 100644
--- a/caffe2/quantization/server/tanh_test.cc
+++ b/caffe2/quantization/server/tanh_test.cc
@@ -27,8 +27,8 @@
       uint8_t x_q = fbgemm::Quantize<uint8_t>(
           x, tanh_approx.GetInputQuantizationParams());
       uint8_t y_q = tanh_approx.Compute(x_q);
-      float y =
-          fbgemm::Dequantize(y_q, tanh_approx.GetOutputQuantizationParams());
+      float y = fbgemm::Dequantize<uint8_t>(
+          y_q, tanh_approx.GetOutputQuantizationParams());
       float err = fabs(tanh(x) - y);
       sq_err_sum += err * err;
       max_err = std::max(err, max_err);