caffe2/operators/integral_image_op.cc - platform/external/pytorch - Git at Google

 #include "integral_image_op.h"
 #include "caffe2/utils/eigen_utils.h"

 namespace caffe2 {

 namespace {
 template <typename T>
 using EigenMatrixMapRowMajor = Eigen::Map<
     Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>;

 template <typename T>
 using ConstEigenMatrixMapRowMajor = Eigen::Map<
     const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>;
 } // namespace

 template <>
 bool IntegralImageOp<float, CPUContext>::RunOnDevice() {
   const auto& X = Input(0);

   CAFFE_ENFORCE_EQ(X.dim(), 4, "Only supports 4D tensors for the momement");

   vector<int64_t> out_shape(X.sizes().vec());
   out_shape[2] += 1; // H + 1 output size
   out_shape[3] += 1; // W + 1 output size
   auto* Y = Output(0, out_shape, at::dtype<float>());
   const int ind = X.dim32(0);
   const int chans = X.dim32(1);
   const int rows_in = X.dim32(2);
   const int cols_in = X.dim32(3);
   const int rows_out = Y->dim32(2);
   const int cols_out = Y->dim32(3);

   const float* input_data = X.template data<float>();
   float* output_data = Y->template mutable_data<float>();

   const int row_out_pass_size = ind * chans * rows_out;
   const int row_in_pass_size = ind * chans * rows_in;
   EigenMatrixMapRowMajor<float> Y_arr(output_data, row_out_pass_size, cols_out);
   ConstEigenMatrixMapRowMajor<float> X_arr(
       input_data, row_in_pass_size, cols_in);

   // Row Pass
   for (int i = 0; i < row_out_pass_size; i++) {
     int row = i % rows_out;
     int diff = i / rows_out + 1;
     Y_arr(i, 0) = 0.;
     if (row == 0) {
       for (int j = 1; j < cols_out; ++j) {
         Y_arr(i, j) = 0.;
       }
     } else {
       for (int j = 1; j < cols_out; ++j) {
         Y_arr(i, j) = Y_arr(i, j - 1) + X_arr(i - diff, j - 1);
       }
     }
   }

   // Col Pass
   const int col_out_pass_size = X.dim32(0) * chans * cols_out;
   for (int i = 0; i < col_out_pass_size; i++) {
     int col = i % cols_out;
     int row = i / cols_out;
     for (int j = row * rows_out + 1; j < (row + 1) * rows_out; ++j) {
       Y_arr(j, col) += Y_arr(j - 1, col);
     }
   }
   return true;
 }

 template <>
 bool IntegralImageGradientOp<float, CPUContext>::RunOnDevice() {
   auto& X = Input(0); // Original input to "forward" op
   auto& dY = Input(1); // Gradient of net w.r.t. output of "forward" op
   // (aka "gradOutput")
   auto* dX = Output(
       0, X.sizes(), at::dtype<float>()); // Gradient of net w.r.t. input to
                                          // "forward" op (aka "gradInput")

   const int ind = X.dim32(0);
   const int chans = X.dim32(1);
   const int rows_in = dY.dim32(2);
   const int cols_in = dY.dim32(3);
   const int rows_out = dX->dim32(2);
   const int cols_out = dX->dim32(3);

   const float* input_data = dY.template data<float>();
   float* output_data = dX->template mutable_data<float>();

   const int row_out_pass_size = ind * chans * rows_out;
   const int row_in_pass_size = ind * chans * rows_in;
   EigenMatrixMapRowMajor<float> dX_arr(
       output_data, row_out_pass_size, cols_out);
   ConstEigenMatrixMapRowMajor<float> dY_arr(
       input_data, row_in_pass_size, cols_in);
   Eigen::MatrixXf tmp(row_in_pass_size, cols_out);

   // Row Pass dY(N, C, H+1, W+1) => tmp(N, C, H+1, W)
   for (int i = 0; i < row_in_pass_size; i++) {
     tmp(i, 0) = dY_arr(i, 0);
     for (int j = 1; j < cols_out; ++j) {
       tmp(i, j) = tmp(i, j - 1) + dY_arr(i, j);
     }
   }

   // Col Pass tmp(N, C, H+1, W)=>dX(N, C, H, W)
   const int col_out_pass_size = X.dim32(0) * chans * cols_out;
   for (int i = 0; i < col_out_pass_size; i++) {
     int col = i % cols_out;
     int row_out_start = (i / cols_out) * rows_out;
     int row_in_start = (i / cols_out) * rows_in;
     dX_arr(row_out_start, col) = tmp(row_in_start, col);
     for (int j = 1; j < rows_out; ++j) {
       dX_arr(row_out_start + j, col) =
           dX_arr(row_out_start + j - 1, col) + tmp(row_in_start + j, col);
     }
   }
   return true;
 }

 REGISTER_CPU_OPERATOR(IntegralImage, IntegralImageOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(
     IntegralImageGradient,
     IntegralImageGradientOp<float, CPUContext>);

 // Input: X; Output: Y
 OPERATOR_SCHEMA(IntegralImage)
     .NumInputs(1)
     .NumOutputs(1)
     .SetDoc(R"DOC(
 Computes an integral image, which contains the sum of pixel values within
 an image vertically and horizontally. This integral image can then be used
 with other detection and tracking techniques.
 )DOC")
     .Input(0, "X", "Images tensor of the form (N, C, H, W)")
     .Output(0, "Y", "Integrated image of the form (N, C, H+1, W+1)");

 // Input: X, dY (aka "gradOutput"); Output: dX (aka "gradInput")
 OPERATOR_SCHEMA(IntegralImageGradient).NumInputs(2).NumOutputs(1);

 class GetIntegralImageGradient : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;
   vector<OperatorDef> GetGradientDefs() override {
     return SingleGradientDef(
         "IntegralImageGradient",
         "",
         vector<string>{I(0), GO(0)},
         vector<string>{GI(0)});
   }
 };

 REGISTER_GRADIENT(IntegralImage, GetIntegralImageGradient);

 } // namespace caffe2
	#include "integral_image_op.h"
	#include "caffe2/utils/eigen_utils.h"

	namespace caffe2 {

	namespace {
	template <typename T>
	using EigenMatrixMapRowMajor = Eigen::Map<
	Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>;

	template <typename T>
	using ConstEigenMatrixMapRowMajor = Eigen::Map<
	const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>;
	} // namespace

	template <>
	bool IntegralImageOp<float, CPUContext>::RunOnDevice() {
	const auto& X = Input(0);

	CAFFE_ENFORCE_EQ(X.dim(), 4, "Only supports 4D tensors for the momement");

	vector<int64_t> out_shape(X.sizes().vec());
	out_shape[2] += 1; // H + 1 output size
	out_shape[3] += 1; // W + 1 output size
	auto* Y = Output(0, out_shape, at::dtype<float>());
	const int ind = X.dim32(0);
	const int chans = X.dim32(1);
	const int rows_in = X.dim32(2);
	const int cols_in = X.dim32(3);
	const int rows_out = Y->dim32(2);
	const int cols_out = Y->dim32(3);

	const float* input_data = X.template data<float>();
	float* output_data = Y->template mutable_data<float>();

	const int row_out_pass_size = ind * chans * rows_out;
	const int row_in_pass_size = ind * chans * rows_in;
	EigenMatrixMapRowMajor<float> Y_arr(output_data, row_out_pass_size, cols_out);
	ConstEigenMatrixMapRowMajor<float> X_arr(
	input_data, row_in_pass_size, cols_in);

	// Row Pass
	for (int i = 0; i < row_out_pass_size; i++) {
	int row = i % rows_out;
	int diff = i / rows_out + 1;
	Y_arr(i, 0) = 0.;
	if (row == 0) {
	for (int j = 1; j < cols_out; ++j) {
	Y_arr(i, j) = 0.;
	}
	} else {
	for (int j = 1; j < cols_out; ++j) {
	Y_arr(i, j) = Y_arr(i, j - 1) + X_arr(i - diff, j - 1);
	}
	}
	}

	// Col Pass
	const int col_out_pass_size = X.dim32(0) * chans * cols_out;
	for (int i = 0; i < col_out_pass_size; i++) {
	int col = i % cols_out;
	int row = i / cols_out;
	for (int j = row * rows_out + 1; j < (row + 1) * rows_out; ++j) {
	Y_arr(j, col) += Y_arr(j - 1, col);
	}
	}
	return true;
	}

	template <>
	bool IntegralImageGradientOp<float, CPUContext>::RunOnDevice() {
	auto& X = Input(0); // Original input to "forward" op
	auto& dY = Input(1); // Gradient of net w.r.t. output of "forward" op
	// (aka "gradOutput")
	auto* dX = Output(
	0, X.sizes(), at::dtype<float>()); // Gradient of net w.r.t. input to
	// "forward" op (aka "gradInput")

	const int ind = X.dim32(0);
	const int chans = X.dim32(1);
	const int rows_in = dY.dim32(2);
	const int cols_in = dY.dim32(3);
	const int rows_out = dX->dim32(2);
	const int cols_out = dX->dim32(3);

	const float* input_data = dY.template data<float>();
	float* output_data = dX->template mutable_data<float>();

	const int row_out_pass_size = ind * chans * rows_out;
	const int row_in_pass_size = ind * chans * rows_in;
	EigenMatrixMapRowMajor<float> dX_arr(
	output_data, row_out_pass_size, cols_out);
	ConstEigenMatrixMapRowMajor<float> dY_arr(
	input_data, row_in_pass_size, cols_in);
	Eigen::MatrixXf tmp(row_in_pass_size, cols_out);

	// Row Pass dY(N, C, H+1, W+1) => tmp(N, C, H+1, W)
	for (int i = 0; i < row_in_pass_size; i++) {
	tmp(i, 0) = dY_arr(i, 0);
	for (int j = 1; j < cols_out; ++j) {
	tmp(i, j) = tmp(i, j - 1) + dY_arr(i, j);
	}
	}

	// Col Pass tmp(N, C, H+1, W)=>dX(N, C, H, W)
	const int col_out_pass_size = X.dim32(0) * chans * cols_out;
	for (int i = 0; i < col_out_pass_size; i++) {
	int col = i % cols_out;
	int row_out_start = (i / cols_out) * rows_out;
	int row_in_start = (i / cols_out) * rows_in;
	dX_arr(row_out_start, col) = tmp(row_in_start, col);
	for (int j = 1; j < rows_out; ++j) {
	dX_arr(row_out_start + j, col) =
	dX_arr(row_out_start + j - 1, col) + tmp(row_in_start + j, col);
	}
	}
	return true;
	}

	REGISTER_CPU_OPERATOR(IntegralImage, IntegralImageOp<float, CPUContext>);
	REGISTER_CPU_OPERATOR(
	IntegralImageGradient,
	IntegralImageGradientOp<float, CPUContext>);

	// Input: X; Output: Y
	OPERATOR_SCHEMA(IntegralImage)
	.NumInputs(1)
	.NumOutputs(1)
	.SetDoc(R"DOC(
	Computes an integral image, which contains the sum of pixel values within
	an image vertically and horizontally. This integral image can then be used
	with other detection and tracking techniques.
	)DOC")
	.Input(0, "X", "Images tensor of the form (N, C, H, W)")
	.Output(0, "Y", "Integrated image of the form (N, C, H+1, W+1)");

	// Input: X, dY (aka "gradOutput"); Output: dX (aka "gradInput")
	OPERATOR_SCHEMA(IntegralImageGradient).NumInputs(2).NumOutputs(1);

	class GetIntegralImageGradient : public GradientMakerBase {
	using GradientMakerBase::GradientMakerBase;
	vector<OperatorDef> GetGradientDefs() override {
	return SingleGradientDef(
	"IntegralImageGradient",
	"",
	vector<string>{I(0), GO(0)},
	vector<string>{GI(0)});
	}
	};

	REGISTER_GRADIENT(IntegralImage, GetIntegralImageGradient);

	} // namespace caffe2