caffe2/operators/elementwise_op.cc - platform/external/pytorch - Git at Google

 #include "caffe2/operators/elementwise_op.h"

 namespace caffe2 {

 // For some comparison and logical operators, eigen does not have vectorized
 // math so we need to improvise.
 #define NAIVE_FUNCTOR(name, op, input_type, output_type)                       \
   struct Naive##name##Functor {                                                \
     template <int b_is_scalar, typename T, typename R>                         \
     inline void Run(size_t n, const T* a, const T* b, R* out, CPUContext*) {   \
       for (int i = 0; i < n; ++i) {                                            \
         out[i] = op(a[i], b[b_is_scalar ? 0 : i]);                             \
       }                                                                        \
     }                                                                          \
     template <typename T, typename R>                                          \
     void RunWithBroadcast(                                                     \
         const T* a,                                                            \
         const T* b,                                                            \
         R* out,                                                                \
         size_t pre,                                                            \
         size_t n,                                                              \
         CPUContext*) {                                                         \
       for (int i = 0; i < pre; ++i) {                                          \
         for (int j = 0; j < n; ++j) {                                          \
           out[i * n + j] = op(a[i * n + j], b[j]);                             \
         }                                                                      \
       }                                                                        \
     }                                                                          \
     template <typename T, typename R>                                          \
     void RunWithBroadcast2(                                                    \
         const T* a,                                                            \
         const T* b,                                                            \
         R* out,                                                                \
         size_t pre,                                                            \
         size_t n,                                                              \
         size_t post,                                                           \
         CPUContext*) {                                                         \
       for (int i = 0; i < pre; ++i) {                                          \
         for (int j = 0; j < n; ++j) {                                          \
           for (int k = 0; k < post; ++k) {                                     \
             out[(i * n + j) * post + k] = op(a[(i * n + j) * post + k], b[j]); \
           }                                                                    \
         }                                                                      \
       }                                                                        \
     }                                                                          \
   };                                                                           \
   REGISTER_CPU_OPERATOR(                                                       \
       name,                                                                    \
       BinaryElementwiseOp<                                                     \
           input_type,                                                          \
           CPUContext,                                                          \
           Naive##name##Functor,                                                \
           output_type>)

 #define NAIVE_LT(x, y) ((x) < (y))
 NAIVE_FUNCTOR(LT, NAIVE_LT, NumericTypes, FixedType<bool>);
 #undef NAIVE_LT
 #define NAIVE_LE(x, y) ((x) <= (y))
 NAIVE_FUNCTOR(LE, NAIVE_LE, NumericTypes, FixedType<bool>);
 #undef NAIVE_LE
 #define NAIVE_GT(x, y) ((x) > (y))
 NAIVE_FUNCTOR(GT, NAIVE_GT, NumericTypes, FixedType<bool>);
 #undef NAIVE_GT
 #define NAIVE_GE(x, y) ((x) >= (y))
 NAIVE_FUNCTOR(GE, NAIVE_GE, NumericTypes, FixedType<bool>);
 #undef NAIVE_GE
 #define NAIVE_EQ(x, y) ((x) == (y))
 NAIVE_FUNCTOR(EQ, NAIVE_EQ, IntTypes, FixedType<bool>);
 #undef NAIVE_EQ
 #define NAIVE_AND(x, y) ((x) & (y))
 NAIVE_FUNCTOR(And, NAIVE_AND, BoolTypes, FixedType<bool>);
 #undef NAIVE_AND
 #define NAIVE_OR(x, y) ((x) | (y))
 NAIVE_FUNCTOR(Or, NAIVE_OR, BoolTypes, FixedType<bool>);
 #undef NAIVE_OR
 #define NAIVE_XOR(x, y) ((x) ^ (y))
 NAIVE_FUNCTOR(Xor, NAIVE_XOR, BoolTypes, FixedType<bool>);
 #undef NAIVE_XOR

 struct NotFunctor {
   inline void operator()(const int n, const bool* x, bool* y, CPUContext*) {
     for (int i = 0; i < n; ++i) {
       y[i] = !x[i];
     }
   }
 };
 REGISTER_CPU_OPERATOR(
     Not,
     UnaryElementwiseOp<BoolTypes, CPUContext, NotFunctor>);

 template <typename T>
 void SRLHelper::sum2one(const T* x, T* y, size_t n) {
   *y = ConstEigenArrayMap<T>(x, n, 1).sum();
 }

 template <typename T>
 void SRLHelper::RunWithBroadcastFront(
     const T* x,
     T* y,
     size_t pre,
     size_t n,
     CPUContext*) {
   EigenArrayMap<T>(y, n, 1) = ConstEigenArrayMap<T>(x, n, pre).rowwise().sum();
 }

 template <typename T>
 void SRLHelper::RunWithBroadcastBack(
     const T* x,
     T* y,
     size_t post,
     size_t n,
     CPUContext*) {
   EigenArrayMap<T>(y, 1, n) = ConstEigenArrayMap<T>(x, post, n).colwise().sum();
 }

 template <typename T>
 void SRLHelper::RunWithBroadcast2(
     const T* a,
     T* y,
     size_t pre,
     size_t n,
     size_t post,
     CPUContext*) {
   for (int i = 0; i < n; ++i) {
     y[i] = 0;
     for (int j = 0; j < pre; ++j) {
       for (int k = 0; k < post; ++k) {
         y[i] += a[(j * n + i) * post + k];
       }
     }
   }
 }

 template <>
 template <typename T>
 bool SumReduceLikeOp<CPUContext>::DoRunWithType() {
   const auto& A = Input(0);
   const auto& B = Input(1);
   auto* C = Output(0);
   CAFFE_ENFORCE(&B != C, "In-place is not allowed.");
   C->ResizeLike(B);
   const T* Adata = A.template data<T>();
   auto* Cdata = C->template mutable_data<T>();
   if (B.size() == 1) {
     auto count = A.size();
     SRLHelper::sum2one<T>(Adata, Cdata, count);
   } else {
     CAFFE_ENFORCE_GT(
         A.ndim(),
         B.ndim(),
         "If you are doing ReduceSumLike, input1 should have "
         "a smaller number of dimensions.");
     const int axis = (axis_ == -1 ? A.ndim() - B.ndim() : axis_);
     CAFFE_ENFORCE(
         axis >= 0 && axis < A.ndim(),
         "ReduceSum axis should be in the range of the number "
         "of dimensions of the first input.");
     size_t pre = 1, n = 1, post = 1;
     for (int i = 0; i < axis; ++i) {
       pre *= A.dim(i);
     }
     for (int i = 0; i < B.ndim(); ++i) {
       CAFFE_ENFORCE_EQ(
           A.dim(i + axis), B.dim(i), "Broadcast dimension mismatch.");
       n *= B.dim(i);
     }
     for (int i = axis + B.ndim(); i < A.ndim(); ++i) {
       post *= A.dim(i);
     }
     if (post == 1) {
       SRLHelper::RunWithBroadcastFront<T>(Adata, Cdata, pre, n, &context_);
     } else if (pre == 1) {
       SRLHelper::RunWithBroadcastBack<T>(Adata, Cdata, post, n, &context_);
     } else {
       SRLHelper::RunWithBroadcast2<T>(Adata, Cdata, pre, n, post, &context_);
     }
   }
   return true;
 }
 REGISTER_CPU_OPERATOR(SumReduceLike, SumReduceLikeOp<CPUContext>);

 }  // namespace caffe2
	#include "caffe2/operators/elementwise_op.h"

	namespace caffe2 {

	// For some comparison and logical operators, eigen does not have vectorized
	// math so we need to improvise.
	#define NAIVE_FUNCTOR(name, op, input_type, output_type) \
	struct Naive##name##Functor { \
	template <int b_is_scalar, typename T, typename R> \
	inline void Run(size_t n, const T* a, const T* b, R* out, CPUContext*) { \
	for (int i = 0; i < n; ++i) { \
	out[i] = op(a[i], b[b_is_scalar ? 0 : i]); \
	} \
	} \
	template <typename T, typename R> \
	void RunWithBroadcast( \
	const T* a, \
	const T* b, \
	R* out, \
	size_t pre, \
	size_t n, \
	CPUContext*) { \
	for (int i = 0; i < pre; ++i) { \
	for (int j = 0; j < n; ++j) { \
	out[i * n + j] = op(a[i * n + j], b[j]); \
	} \
	} \
	} \
	template <typename T, typename R> \
	void RunWithBroadcast2( \
	const T* a, \
	const T* b, \
	R* out, \
	size_t pre, \
	size_t n, \
	size_t post, \
	CPUContext*) { \
	for (int i = 0; i < pre; ++i) { \
	for (int j = 0; j < n; ++j) { \
	for (int k = 0; k < post; ++k) { \
	out[(i * n + j) * post + k] = op(a[(i * n + j) * post + k], b[j]); \
	} \
	} \
	} \
	} \
	}; \
	REGISTER_CPU_OPERATOR( \
	name, \
	BinaryElementwiseOp< \
	input_type, \
	CPUContext, \
	Naive##name##Functor, \
	output_type>)

	#define NAIVE_LT(x, y) ((x) < (y))
	NAIVE_FUNCTOR(LT, NAIVE_LT, NumericTypes, FixedType<bool>);
	#undef NAIVE_LT
	#define NAIVE_LE(x, y) ((x) <= (y))
	NAIVE_FUNCTOR(LE, NAIVE_LE, NumericTypes, FixedType<bool>);
	#undef NAIVE_LE
	#define NAIVE_GT(x, y) ((x) > (y))
	NAIVE_FUNCTOR(GT, NAIVE_GT, NumericTypes, FixedType<bool>);
	#undef NAIVE_GT
	#define NAIVE_GE(x, y) ((x) >= (y))
	NAIVE_FUNCTOR(GE, NAIVE_GE, NumericTypes, FixedType<bool>);
	#undef NAIVE_GE
	#define NAIVE_EQ(x, y) ((x) == (y))
	NAIVE_FUNCTOR(EQ, NAIVE_EQ, IntTypes, FixedType<bool>);
	#undef NAIVE_EQ
	#define NAIVE_AND(x, y) ((x) & (y))
	NAIVE_FUNCTOR(And, NAIVE_AND, BoolTypes, FixedType<bool>);
	#undef NAIVE_AND
	#define NAIVE_OR(x, y) ((x) \| (y))
	NAIVE_FUNCTOR(Or, NAIVE_OR, BoolTypes, FixedType<bool>);
	#undef NAIVE_OR
	#define NAIVE_XOR(x, y) ((x) ^ (y))
	NAIVE_FUNCTOR(Xor, NAIVE_XOR, BoolTypes, FixedType<bool>);
	#undef NAIVE_XOR

	struct NotFunctor {
	inline void operator()(const int n, const bool* x, bool* y, CPUContext*) {
	for (int i = 0; i < n; ++i) {
	y[i] = !x[i];
	}
	}
	};
	REGISTER_CPU_OPERATOR(
	Not,
	UnaryElementwiseOp<BoolTypes, CPUContext, NotFunctor>);

	template <typename T>
	void SRLHelper::sum2one(const T* x, T* y, size_t n) {
	*y = ConstEigenArrayMap<T>(x, n, 1).sum();
	}

	template <typename T>
	void SRLHelper::RunWithBroadcastFront(
	const T* x,
	T* y,
	size_t pre,
	size_t n,
	CPUContext*) {
	EigenArrayMap<T>(y, n, 1) = ConstEigenArrayMap<T>(x, n, pre).rowwise().sum();
	}

	template <typename T>
	void SRLHelper::RunWithBroadcastBack(
	const T* x,
	T* y,
	size_t post,
	size_t n,
	CPUContext*) {
	EigenArrayMap<T>(y, 1, n) = ConstEigenArrayMap<T>(x, post, n).colwise().sum();
	}

	template <typename T>
	void SRLHelper::RunWithBroadcast2(
	const T* a,
	T* y,
	size_t pre,
	size_t n,
	size_t post,
	CPUContext*) {
	for (int i = 0; i < n; ++i) {
	y[i] = 0;
	for (int j = 0; j < pre; ++j) {
	for (int k = 0; k < post; ++k) {
	y[i] += a[(j * n + i) * post + k];
	}
	}
	}
	}

	template <>
	template <typename T>
	bool SumReduceLikeOp<CPUContext>::DoRunWithType() {
	const auto& A = Input(0);
	const auto& B = Input(1);
	auto* C = Output(0);
	CAFFE_ENFORCE(&B != C, "In-place is not allowed.");
	C->ResizeLike(B);
	const T* Adata = A.template data<T>();
	auto* Cdata = C->template mutable_data<T>();
	if (B.size() == 1) {
	auto count = A.size();
	SRLHelper::sum2one<T>(Adata, Cdata, count);
	} else {
	CAFFE_ENFORCE_GT(
	A.ndim(),
	B.ndim(),
	"If you are doing ReduceSumLike, input1 should have "
	"a smaller number of dimensions.");
	const int axis = (axis_ == -1 ? A.ndim() - B.ndim() : axis_);
	CAFFE_ENFORCE(
	axis >= 0 && axis < A.ndim(),
	"ReduceSum axis should be in the range of the number "
	"of dimensions of the first input.");
	size_t pre = 1, n = 1, post = 1;
	for (int i = 0; i < axis; ++i) {
	pre *= A.dim(i);
	}
	for (int i = 0; i < B.ndim(); ++i) {
	CAFFE_ENFORCE_EQ(
	A.dim(i + axis), B.dim(i), "Broadcast dimension mismatch.");
	n *= B.dim(i);
	}
	for (int i = axis + B.ndim(); i < A.ndim(); ++i) {
	post *= A.dim(i);
	}
	if (post == 1) {
	SRLHelper::RunWithBroadcastFront<T>(Adata, Cdata, pre, n, &context_);
	} else if (pre == 1) {
	SRLHelper::RunWithBroadcastBack<T>(Adata, Cdata, post, n, &context_);
	} else {
	SRLHelper::RunWithBroadcast2<T>(Adata, Cdata, pre, n, post, &context_);
	}
	}
	return true;
	}
	REGISTER_CPU_OPERATOR(SumReduceLike, SumReduceLikeOp<CPUContext>);

	} // namespace caffe2