| #include "quantize_dnnlowp_op.h" | 
 | #include "dnnlowp_op.h" | 
 |  | 
 | #include "caffe2/core/tensor_int8.h" | 
 | #include "caffe2/quantization/server/int8_gen_quant_params.h" | 
 | #include "caffe2_dnnlowp_utils.h" | 
 | #include "dnnlowp_partition.h" | 
 |  | 
 | namespace caffe2 { | 
 |  | 
 | using namespace std; | 
 |  | 
 | template <typename T> | 
 | QuantizeDNNLowPOp<T>::QuantizeDNNLowPOp( | 
 |     const OperatorDef& operator_def, | 
 |     Workspace* ws) | 
 |     : Operator<CPUContext>(operator_def, ws), | 
 |       qfactory_(dnnlowp::GetQuantizationFactoryOf(this)) {} | 
 |  | 
 | template <typename T> | 
 | bool QuantizeDNNLowPOp<T>::RunOnDevice() { | 
 |   using namespace dnnlowp; | 
 |  | 
 |   if (!arguments_parsed_) { | 
 |     dnnlowp::ParseDNNLowPOperatorArguments(this); | 
 |     arguments_parsed_ = true; | 
 |   } | 
 |  | 
 |   CAFFE_ENFORCE(InputSize() <= 2); | 
 |   CAFFE_ENFORCE(Input(0).template IsType<float>()); | 
 |  | 
 |   bool use_input_qparam = false; | 
 |   float in_scale = 0; | 
 |   int in_zero_point = 0; | 
 |   if (InputSize() == 2) { | 
 |     use_input_qparam = true; | 
 |  | 
 |     const auto* input_qparam_blob = | 
 |         Input<caffe2::unique_ptr<Int8QuantParamsBlob>>(1).get(); | 
 |     CAFFE_ENFORCE(input_qparam_blob); | 
 |     in_scale = input_qparam_blob->qparam.scale; | 
 |     in_zero_point = input_qparam_blob->qparam.zero_point; | 
 |   } | 
 |  | 
 |   // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) | 
 |   TensorQuantizationParams in_qparams; | 
 |  | 
 |   if (use_input_qparam) { | 
 |     in_qparams.scale = in_scale; | 
 |     in_qparams.zero_point = in_zero_point; | 
 |     in_qparams.precision = qfactory_->GetActivationPrecision(); | 
 |   } else { | 
 |     if (HasStaticQuantization(this)) { | 
 |       in_qparams = GetStaticQuantizationParamsOf(this, 0); | 
 |     } else { | 
 |       in_qparams = GetInputTensorQuantizationParamsOf(this, 0, qfactory_.get()); | 
 |     } | 
 |   } | 
 |  | 
 |   int8::Int8TensorCPU* output = | 
 |       Outputs()[0]->template GetMutable<int8::Int8TensorCPU>(); | 
 |   output->t.ResizeLike(Input(0)); | 
 |  | 
 |   const float* in_data = Input(0).template data<float>(); | 
 |   T* out_data = output->t.template mutable_data<T>(); | 
 |  | 
 |   fbgemm::Quantize<T>(in_data, out_data, Input(0).numel(), in_qparams); | 
 |  | 
 |   PropagateOutputTensorQuantizationParams(this, 0, in_qparams); | 
 |  | 
 |   return true; | 
 | } | 
 |  | 
 | OPERATOR_SCHEMA(Quantize) | 
 |     .NumInputs(1, 2) | 
 |     .NumOutputs(1) | 
 |     .IdenticalTypeAndShapeOfInput(0); | 
 |  | 
 | REGISTER_CPU_OPERATOR_WITH_ENGINE( | 
 |     Quantize, | 
 |     DNNLOWP, | 
 |     QuantizeDNNLowPOp<uint8_t>); | 
 | REGISTER_CPU_OPERATOR_WITH_ENGINE( | 
 |     Quantize, | 
 |     DNNLOWP_ROWWISE, | 
 |     QuantizeDNNLowPOp<uint8_t>); | 
 |  | 
 | REGISTER_CPU_OPERATOR_WITH_ENGINE( | 
 |     Quantize, | 
 |     DNNLOWP_16, | 
 |     QuantizeDNNLowPOp<uint16_t>); | 
 | REGISTER_CPU_OPERATOR_WITH_ENGINE( | 
 |     Quantize, | 
 |     DNNLOWP_ROWWISE_16, | 
 |     QuantizeDNNLowPOp<uint16_t>); | 
 |  | 
 | REGISTER_CPU_OPERATOR_WITH_ENGINE( | 
 |     Int8Quantize, | 
 |     DNNLOWP, | 
 |     QuantizeDNNLowPOp<uint8_t>); | 
 | REGISTER_CPU_OPERATOR_WITH_ENGINE( | 
 |     Int8Quantize, | 
 |     DNNLOWP_ROWWISE, | 
 |     QuantizeDNNLowPOp<uint8_t>); | 
 |  | 
 | } // namespace caffe2 |