caffe2/operators/integral_image_op.h - platform/external/pytorch - Git at Google

 #ifndef INTEGRAL_IMAGE_OP_H_
 #define INTEGRAL_IMAGE_OP_H_

 #include "caffe2/core/context.h"
 #include "caffe2/core/logging.h"
 #include "caffe2/core/operator.h"
 #include "caffe2/utils/math.h"

 namespace caffe2 {

 namespace {
 template <typename T>
 using EigenMatrixMapRowMajor = Eigen::Map<
     Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>;

 template <typename T>
 using ConstEigenMatrixMapRowMajor = Eigen::Map<
     const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>;
 } // namespace

 template <typename T, class Context>
 class IntegralImageOp final : public Operator<Context> {
  public:
   IntegralImageOp(const OperatorDef& operator_def, Workspace* ws)
       : Operator<Context>(operator_def, ws) {}
   USE_OPERATOR_CONTEXT_FUNCTIONS;

   bool RunOnDevice() override {
     const auto& X = Input(0);
     auto* Y = Output(0);
     CAFFE_ENFORCE_EQ(X.ndim(), 4, "Only supports 4D tensors for the momement");

     vector<TIndex> out_shape(X.dims());
     out_shape[2] += 1; // H + 1 output size
     out_shape[3] += 1; // W + 1 output size
     Y->Resize(out_shape);
     const int ind = X.dim32(0);
     const int chans = X.dim32(1);
     const int rows_in = X.dim32(2);
     const int cols_in = X.dim32(3);
     const int rows_out = Y->dim32(2);
     const int cols_out = Y->dim32(3);

     const float* input_data = X.template data<float>();
     float* output_data = Y->template mutable_data<float>();

     const int row_out_pass_size = ind * chans * rows_out;
     const int row_in_pass_size = ind * chans * rows_in;
     EigenMatrixMapRowMajor<float> Y_arr(
         output_data, row_out_pass_size, cols_out);
     ConstEigenMatrixMapRowMajor<float> X_arr(
         input_data, row_in_pass_size, cols_in);

     // Row Pass
     for (int i = 0; i < row_out_pass_size; i++) {
       int row = i % rows_out;
       int diff = i / rows_out + 1;
       Y_arr(i, 0) = 0.;
       if (row == 0) {
         for (int j = 1; j < cols_out; ++j) {
           Y_arr(i, j) = 0.;
         }
       } else {
         for (int j = 1; j < cols_out; ++j) {
           Y_arr(i, j) = Y_arr(i, j - 1) + X_arr(i - diff, j - 1);
         }
       }
     }

     // Col Pass
     const int col_out_pass_size = X.dim32(0) * chans * cols_out;
     for (int i = 0; i < col_out_pass_size; i++) {
       int col = i % cols_out;
       int row = i / cols_out;
       for (int j = row * rows_out + 1; j < (row + 1) * rows_out; ++j) {
         Y_arr(j, col) += Y_arr(j - 1, col);
       }
     }
     return true;
   }
 };

 template <typename T, class Context>
 class IntegralImageGradientOp final : public Operator<Context> {
  public:
   IntegralImageGradientOp(const OperatorDef& def, Workspace* ws)
       : Operator<Context>(def, ws) {}
   USE_OPERATOR_CONTEXT_FUNCTIONS;

   bool RunOnDevice() override {
     auto& X = Input(0); // Original input to "forward" op
     auto& dY = Input(1); // Gradient of net w.r.t. output of "forward" op
     // (aka "gradOutput")
     auto* dX = Output(0); // Gradient of net w.r.t. input to "forward" op
     // (aka "gradInput")

     dX->ResizeLike(X);
     const int ind = X.dim32(0);
     const int chans = X.dim32(1);
     const int rows_in = dY.dim32(2);
     const int cols_in = dY.dim32(3);
     const int rows_out = dX->dim32(2);
     const int cols_out = dX->dim32(3);

     const float* input_data = dY.template data<float>();
     float* output_data = dX->template mutable_data<float>();

     const int row_out_pass_size = ind * chans * rows_out;
     const int row_in_pass_size = ind * chans * rows_in;
     EigenMatrixMapRowMajor<float> dX_arr(
         output_data, row_out_pass_size, cols_out);
     ConstEigenMatrixMapRowMajor<float> dY_arr(
         input_data, row_in_pass_size, cols_in);
     Eigen::MatrixXf tmp(row_in_pass_size, cols_out);

     // Row Pass dY(N, C, H+1, W+1) => tmp(N, C, H+1, W)
     for (int i = 0; i < row_in_pass_size; i++) {
       tmp(i, 0) = dY_arr(i, 0);
       for (int j = 1; j < cols_out; ++j) {
         tmp(i, j) = tmp(i, j - 1) + dY_arr(i, j);
       }
     }

     // Col Pass tmp(N, C, H+1, W)=>dX(N, C, H, W)
     const int col_out_pass_size = X.dim32(0) * chans * cols_out;
     for (int i = 0; i < col_out_pass_size; i++) {
       int col = i % cols_out;
       int row_out_start = (i / cols_out) * rows_out;
       int row_in_start = (i / cols_out) * rows_in;
       dX_arr(row_out_start, col) = tmp(row_in_start, col);
       for (int j = 1; j < rows_out; ++j) {
         dX_arr(row_out_start + j, col) =
             dX_arr(row_out_start + j - 1, col) + tmp(row_in_start + j, col);
       }
     }
     return true;
   }

  protected:
   Tensor<Context> row_pass_buffer_;
 };

 } // namespace caffe2

 #endif // INTEGRAL_IMAGE_OP_H_
	#ifndef INTEGRAL_IMAGE_OP_H_
	#define INTEGRAL_IMAGE_OP_H_

	#include "caffe2/core/context.h"
	#include "caffe2/core/logging.h"
	#include "caffe2/core/operator.h"
	#include "caffe2/utils/math.h"

	namespace caffe2 {

	namespace {
	template <typename T>
	using EigenMatrixMapRowMajor = Eigen::Map<
	Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>;

	template <typename T>
	using ConstEigenMatrixMapRowMajor = Eigen::Map<
	const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>;
	} // namespace

	template <typename T, class Context>
	class IntegralImageOp final : public Operator<Context> {
	public:
	IntegralImageOp(const OperatorDef& operator_def, Workspace* ws)
	: Operator<Context>(operator_def, ws) {}
	USE_OPERATOR_CONTEXT_FUNCTIONS;

	bool RunOnDevice() override {
	const auto& X = Input(0);
	auto* Y = Output(0);
	CAFFE_ENFORCE_EQ(X.ndim(), 4, "Only supports 4D tensors for the momement");

	vector<TIndex> out_shape(X.dims());
	out_shape[2] += 1; // H + 1 output size
	out_shape[3] += 1; // W + 1 output size
	Y->Resize(out_shape);
	const int ind = X.dim32(0);
	const int chans = X.dim32(1);
	const int rows_in = X.dim32(2);
	const int cols_in = X.dim32(3);
	const int rows_out = Y->dim32(2);
	const int cols_out = Y->dim32(3);

	const float* input_data = X.template data<float>();
	float* output_data = Y->template mutable_data<float>();

	const int row_out_pass_size = ind * chans * rows_out;
	const int row_in_pass_size = ind * chans * rows_in;
	EigenMatrixMapRowMajor<float> Y_arr(
	output_data, row_out_pass_size, cols_out);
	ConstEigenMatrixMapRowMajor<float> X_arr(
	input_data, row_in_pass_size, cols_in);

	// Row Pass
	for (int i = 0; i < row_out_pass_size; i++) {
	int row = i % rows_out;
	int diff = i / rows_out + 1;
	Y_arr(i, 0) = 0.;
	if (row == 0) {
	for (int j = 1; j < cols_out; ++j) {
	Y_arr(i, j) = 0.;
	}
	} else {
	for (int j = 1; j < cols_out; ++j) {
	Y_arr(i, j) = Y_arr(i, j - 1) + X_arr(i - diff, j - 1);
	}
	}
	}

	// Col Pass
	const int col_out_pass_size = X.dim32(0) * chans * cols_out;
	for (int i = 0; i < col_out_pass_size; i++) {
	int col = i % cols_out;
	int row = i / cols_out;
	for (int j = row * rows_out + 1; j < (row + 1) * rows_out; ++j) {
	Y_arr(j, col) += Y_arr(j - 1, col);
	}
	}
	return true;
	}
	};

	template <typename T, class Context>
	class IntegralImageGradientOp final : public Operator<Context> {
	public:
	IntegralImageGradientOp(const OperatorDef& def, Workspace* ws)
	: Operator<Context>(def, ws) {}
	USE_OPERATOR_CONTEXT_FUNCTIONS;

	bool RunOnDevice() override {
	auto& X = Input(0); // Original input to "forward" op
	auto& dY = Input(1); // Gradient of net w.r.t. output of "forward" op
	// (aka "gradOutput")
	auto* dX = Output(0); // Gradient of net w.r.t. input to "forward" op
	// (aka "gradInput")

	dX->ResizeLike(X);
	const int ind = X.dim32(0);
	const int chans = X.dim32(1);
	const int rows_in = dY.dim32(2);
	const int cols_in = dY.dim32(3);
	const int rows_out = dX->dim32(2);
	const int cols_out = dX->dim32(3);

	const float* input_data = dY.template data<float>();
	float* output_data = dX->template mutable_data<float>();

	const int row_out_pass_size = ind * chans * rows_out;
	const int row_in_pass_size = ind * chans * rows_in;
	EigenMatrixMapRowMajor<float> dX_arr(
	output_data, row_out_pass_size, cols_out);
	ConstEigenMatrixMapRowMajor<float> dY_arr(
	input_data, row_in_pass_size, cols_in);
	Eigen::MatrixXf tmp(row_in_pass_size, cols_out);

	// Row Pass dY(N, C, H+1, W+1) => tmp(N, C, H+1, W)
	for (int i = 0; i < row_in_pass_size; i++) {
	tmp(i, 0) = dY_arr(i, 0);
	for (int j = 1; j < cols_out; ++j) {
	tmp(i, j) = tmp(i, j - 1) + dY_arr(i, j);
	}
	}

	// Col Pass tmp(N, C, H+1, W)=>dX(N, C, H, W)
	const int col_out_pass_size = X.dim32(0) * chans * cols_out;
	for (int i = 0; i < col_out_pass_size; i++) {
	int col = i % cols_out;
	int row_out_start = (i / cols_out) * rows_out;
	int row_in_start = (i / cols_out) * rows_in;
	dX_arr(row_out_start, col) = tmp(row_in_start, col);
	for (int j = 1; j < rows_out; ++j) {
	dX_arr(row_out_start + j, col) =
	dX_arr(row_out_start + j - 1, col) + tmp(row_in_start + j, col);
	}
	}
	return true;
	}

	protected:
	Tensor<Context> row_pass_buffer_;
	};

	} // namespace caffe2

	#endif // INTEGRAL_IMAGE_OP_H_