caffe2/quantization/server/quantize_dnnlowp_op.cc - platform/external/pytorch - Git at Google

 #include "quantize_dnnlowp_op.h"
 #include "dnnlowp_op.h"

 #ifdef _OPENMP
 #include <omp.h>
 #endif

 #include "caffe2/core/tensor_int8.h"
 #include "caffe2_dnnlowp_utils.h"
 #include "dnnlowp_partition.h"

 namespace caffe2 {

 using namespace std;

 template <typename T>
 QuantizeDNNLowPOp<T>::QuantizeDNNLowPOp(
     const OperatorDef& operator_def,
     Workspace* ws)
     : Operator<CPUContext>(operator_def, ws),
       qfactory_(dnnlowp::GetQuantizationFactoryOf(this)) {}

 template <typename T>
 bool QuantizeDNNLowPOp<T>::RunOnDevice() {
   using namespace dnnlowp;

   if (!arguments_parsed_) {
     dnnlowp::ParseDNNLowPOperatorArguments(this);
     arguments_parsed_ = true;
   }

   CAFFE_ENFORCE(Input(0).template IsType<float>());

   TensorQuantizationParams in_qparams;
   if (HasStaticQuantization(this)) {
     in_qparams = GetStaticQuantizationParamsOf(this, 0);
   } else {
     in_qparams = GetInputTensorQuantizationParamsOf(this, 0, qfactory_.get());
   }
   int8::Int8TensorCPU* output =
       Outputs()[0]->template GetMutable<int8::Int8TensorCPU>();
   output->t.ResizeLike(Input(0));

   const float* in_data = Input(0).template data<float>();
   T* out_data = output->t.template mutable_data<T>();

 #ifdef _OPENMP
 #pragma omp parallel
 #endif
   {
     int i_begin, i_end;
     tie(i_begin, i_end) = Get1DPartition(
         Input(0).numel(), dnnlowp_get_num_threads(), dnnlowp_get_thread_num());
     fbgemm::Quantize<T>(
         in_data + i_begin, out_data + i_begin, i_end - i_begin, in_qparams);
   }

   PropagateOutputTensorQuantizationParams(this, 0, in_qparams);

   return true;
 }

 OPERATOR_SCHEMA(Quantize)
     .NumInputs(1)
     .NumOutputs(1)
     .IdenticalTypeAndShapeOfInput(0);

 REGISTER_CPU_OPERATOR_WITH_ENGINE(
     Quantize,
     DNNLOWP,
     QuantizeDNNLowPOp<uint8_t>);
 REGISTER_CPU_OPERATOR_WITH_ENGINE(
     Quantize,
     DNNLOWP_ROWWISE,
     QuantizeDNNLowPOp<uint8_t>);

 REGISTER_CPU_OPERATOR_WITH_ENGINE(
     Quantize,
     DNNLOWP_16,
     QuantizeDNNLowPOp<uint16_t>);
 REGISTER_CPU_OPERATOR_WITH_ENGINE(
     Quantize,
     DNNLOWP_ROWWISE_16,
     QuantizeDNNLowPOp<uint16_t>);

 REGISTER_CPU_OPERATOR_WITH_ENGINE(
     Int8Quantize,
     DNNLOWP,
     QuantizeDNNLowPOp<uint8_t>);
 REGISTER_CPU_OPERATOR_WITH_ENGINE(
     Int8Quantize,
     DNNLOWP_ROWWISE,
     QuantizeDNNLowPOp<uint8_t>);

 } // namespace caffe2
	#include "quantize_dnnlowp_op.h"
	#include "dnnlowp_op.h"

	#ifdef _OPENMP
	#include <omp.h>
	#endif

	#include "caffe2/core/tensor_int8.h"
	#include "caffe2_dnnlowp_utils.h"
	#include "dnnlowp_partition.h"

	namespace caffe2 {

	using namespace std;

	template <typename T>
	QuantizeDNNLowPOp<T>::QuantizeDNNLowPOp(
	const OperatorDef& operator_def,
	Workspace* ws)
	: Operator<CPUContext>(operator_def, ws),
	qfactory_(dnnlowp::GetQuantizationFactoryOf(this)) {}

	template <typename T>
	bool QuantizeDNNLowPOp<T>::RunOnDevice() {
	using namespace dnnlowp;

	if (!arguments_parsed_) {
	dnnlowp::ParseDNNLowPOperatorArguments(this);
	arguments_parsed_ = true;
	}

	CAFFE_ENFORCE(Input(0).template IsType<float>());

	TensorQuantizationParams in_qparams;
	if (HasStaticQuantization(this)) {
	in_qparams = GetStaticQuantizationParamsOf(this, 0);
	} else {
	in_qparams = GetInputTensorQuantizationParamsOf(this, 0, qfactory_.get());
	}
	int8::Int8TensorCPU* output =
	Outputs()[0]->template GetMutable<int8::Int8TensorCPU>();
	output->t.ResizeLike(Input(0));

	const float* in_data = Input(0).template data<float>();
	T* out_data = output->t.template mutable_data<T>();

	#ifdef _OPENMP
	#pragma omp parallel
	#endif
	{
	int i_begin, i_end;
	tie(i_begin, i_end) = Get1DPartition(
	Input(0).numel(), dnnlowp_get_num_threads(), dnnlowp_get_thread_num());
	fbgemm::Quantize<T>(
	in_data + i_begin, out_data + i_begin, i_end - i_begin, in_qparams);
	}

	PropagateOutputTensorQuantizationParams(this, 0, in_qparams);

	return true;
	}

	OPERATOR_SCHEMA(Quantize)
	.NumInputs(1)
	.NumOutputs(1)
	.IdenticalTypeAndShapeOfInput(0);

	REGISTER_CPU_OPERATOR_WITH_ENGINE(
	Quantize,
	DNNLOWP,
	QuantizeDNNLowPOp<uint8_t>);
	REGISTER_CPU_OPERATOR_WITH_ENGINE(
	Quantize,
	DNNLOWP_ROWWISE,
	QuantizeDNNLowPOp<uint8_t>);

	REGISTER_CPU_OPERATOR_WITH_ENGINE(
	Quantize,
	DNNLOWP_16,
	QuantizeDNNLowPOp<uint16_t>);
	REGISTER_CPU_OPERATOR_WITH_ENGINE(
	Quantize,
	DNNLOWP_ROWWISE_16,
	QuantizeDNNLowPOp<uint16_t>);

	REGISTER_CPU_OPERATOR_WITH_ENGINE(
	Int8Quantize,
	DNNLOWP,
	QuantizeDNNLowPOp<uint8_t>);
	REGISTER_CPU_OPERATOR_WITH_ENGINE(
	Int8Quantize,
	DNNLOWP_ROWWISE,
	QuantizeDNNLowPOp<uint8_t>);

	} // namespace caffe2