| #pragma once |
| |
| #include "caffe2/core/tensor_int8.h" |
| #include "caffe2/operators/elementwise_ops.h" |
| #include "caffe2/quantization/server/caffe2_dnnlowp_utils.h" |
| #include "caffe2/quantization/server/dnnlowp_op.h" |
| #include "caffe2/quantization/server/sigmoid.h" |
| |
| namespace caffe2 { |
| |
| template <typename T, class Functor> |
| class UnaryElementwiseWithArgsDNNLowPOp : public Operator<CPUContext> { |
| public: |
| USE_OPERATOR_FUNCTIONS(CPUContext); |
| UnaryElementwiseWithArgsDNNLowPOp( |
| const OperatorDef& operator_def, |
| Workspace* ws) |
| : Operator<CPUContext>(operator_def, ws), functor_() {} |
| |
| bool RunOnDevice() override { |
| if (!arguments_parsed_) { |
| dnnlowp::ParseDNNLowPOperatorArguments(this); |
| dnnlowp::SetStaticQuantizationParams( |
| this, 0, functor_.GetOutputQuantizationParams()); |
| arguments_parsed_ = true; |
| } |
| |
| auto& input = this->template Input<int8::Int8TensorCPU>(0).t; |
| auto& output = Outputs()[0]->template GetMutable<int8::Int8TensorCPU>()->t; |
| output.ResizeLike(input); |
| functor_( |
| input.size(), |
| input.template data<T>(), |
| output.template mutable_data<T>()); |
| |
| dnnlowp::PropagateOutputTensorQuantizationParams( |
| this, 0, functor_.GetOutputQuantizationParams()); |
| return true; |
| } |
| |
| private: |
| Functor functor_; |
| bool arguments_parsed_{false}; |
| }; |
| |
| template <typename T, typename FP32_OP> |
| class BinaryElementwiseDNNLowPOp : public DNNLowPOp<T, FP32_OP> { |
| public: |
| USE_OPERATOR_FUNCTIONS(CPUContext); |
| BinaryElementwiseDNNLowPOp(const OperatorDef& operator_def, Workspace* ws) |
| : DNNLowPOp<T, FP32_OP>(operator_def, ws), |
| OP_SINGLE_ARG(bool, "broadcast", enable_broadcast_, 0), |
| OP_SINGLE_ARG(int, "axis", axis_, -1), |
| OP_SINGLE_ARG(string, "axis_str", axis_str_, ""), |
| OP_SINGLE_ARG(string, "order", order_, "NCHW") { |
| // Figure out the correct axis to use. |
| if (enable_broadcast_) { |
| if (axis_ != -1) { |
| // Get axis from an explicit axis argument. |
| CAFFE_ENFORCE_EQ( |
| axis_str_.size(), |
| 0, |
| "Args axis and axis_str cannot be used simultaneously."); |
| } else if (axis_str_.size()) { |
| // Get the axis index semantically. |
| CAFFE_ENFORCE_EQ( |
| axis_str_.size(), 1, "Unsupported axis string", axis_str_); |
| size_t semantic_axis_ = order_.find(axis_str_); |
| CAFFE_ENFORCE_NE( |
| semantic_axis_, |
| string::npos, |
| "Unrecognizable axis string ", |
| axis_str_, |
| " from order string ", |
| order_); |
| axis_ = semantic_axis_; |
| } |
| } else { |
| CAFFE_ENFORCE( |
| axis_ == -1 && axis_str_.size() == 0, |
| "Do not specify axis or axis_str if broadcast is not enabled."); |
| } |
| } |
| |
| protected: |
| bool enable_broadcast_; |
| int axis_; |
| string axis_str_; |
| string order_; |
| |
| dnnlowp::RequantizationParams requantization_params_; |
| }; // BinaryElementwiseDNNLowPOp |
| |
| // For arithmetic operators, Eigen provides a good way to vectorize even |
| // when broadcasting. |
| #define DECLARE_EIGEN_FUNCTOR(name, eigen_op, input_type, output_type) \ |
| struct Eigen##name##Functor { \ |
| template <int b_is_scalar, typename T, typename R> \ |
| inline void Run(size_t n, const T* a, const T* b, R* out, CPUContext*) { \ |
| if (b_is_scalar) { \ |
| EigenVectorArrayMap<R>(out, n) = \ |
| eigen_op((ConstEigenVectorArrayMap<T>(a, n)), (b[0])); \ |
| } else { \ |
| EigenVectorArrayMap<R>(out, n) = eigen_op( \ |
| (ConstEigenVectorArrayMap<T>(a, n)), \ |
| (ConstEigenVectorArrayMap<T>(b, n))); \ |
| } \ |
| } \ |
| template <typename T, typename R> \ |
| void RunWithBroadcast( \ |
| const T* a, \ |
| const T* b, \ |
| R* out, \ |
| size_t pre, \ |
| size_t n, \ |
| CPUContext*) { \ |
| EigenArrayMap<R>(out, n, pre) = eigen_op( \ |
| (ConstEigenArrayMap<T>(a, n, pre).colwise()), \ |
| (ConstEigenVectorArrayMap<T>(b, n))); \ |
| } \ |
| template <typename T, typename R> \ |
| void RunWithBroadcast2( \ |
| const T* a, \ |
| const T* b, \ |
| R* out, \ |
| size_t pre, \ |
| size_t n, \ |
| size_t post, \ |
| CPUContext*) { \ |
| for (const auto i : c10::irange(pre)) { \ |
| EigenArrayMap<R>(out + i * n * post, post, n) = eigen_op( \ |
| (ConstEigenArrayMap<T>(a + i * n * post, post, n).rowwise()), \ |
| (Eigen::Map<const Eigen::Array<T, 1, Eigen::Dynamic>>(b, n))); \ |
| } \ |
| } \ |
| }; |
| } // namespace caffe2 |