blob: 71d9b076336fa7f13c2e7f05ee590bcd16d1b7de [file] [log] [blame]
#ifndef INTEGRAL_IMAGE_OP_H_
#define INTEGRAL_IMAGE_OP_H_
#include "caffe2/core/context.h"
#include "caffe2/core/logging.h"
#include "caffe2/core/operator.h"
#include "caffe2/utils/math.h"
namespace caffe2 {
namespace {
template <typename T>
using EigenMatrixMapRowMajor = Eigen::Map<
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>;
template <typename T>
using ConstEigenMatrixMapRowMajor = Eigen::Map<
const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>;
} // namespace
template <typename T, class Context>
class IntegralImageOp final : public Operator<Context> {
public:
IntegralImageOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<Context>(operator_def, ws) {}
USE_OPERATOR_CONTEXT_FUNCTIONS;
bool RunOnDevice() override {
const auto& X = Input(0);
auto* Y = Output(0);
CAFFE_ENFORCE_EQ(X.ndim(), 4, "Only supports 4D tensors for the momement");
vector<TIndex> out_shape(X.dims());
out_shape[2] += 1; // H + 1 output size
out_shape[3] += 1; // W + 1 output size
Y->Resize(out_shape);
const int ind = X.dim32(0);
const int chans = X.dim32(1);
const int rows_in = X.dim32(2);
const int cols_in = X.dim32(3);
const int rows_out = Y->dim32(2);
const int cols_out = Y->dim32(3);
const float* input_data = X.template data<float>();
float* output_data = Y->template mutable_data<float>();
const int row_out_pass_size = ind * chans * rows_out;
const int row_in_pass_size = ind * chans * rows_in;
EigenMatrixMapRowMajor<float> Y_arr(
output_data, row_out_pass_size, cols_out);
ConstEigenMatrixMapRowMajor<float> X_arr(
input_data, row_in_pass_size, cols_in);
// Row Pass
for (int i = 0; i < row_out_pass_size; i++) {
int row = i % rows_out;
int diff = i / rows_out + 1;
Y_arr(i, 0) = 0.;
if (row == 0) {
for (int j = 1; j < cols_out; ++j) {
Y_arr(i, j) = 0.;
}
} else {
for (int j = 1; j < cols_out; ++j) {
Y_arr(i, j) = Y_arr(i, j - 1) + X_arr(i - diff, j - 1);
}
}
}
// Col Pass
const int col_out_pass_size = X.dim32(0) * chans * cols_out;
for (int i = 0; i < col_out_pass_size; i++) {
int col = i % cols_out;
int row = i / cols_out;
for (int j = row * rows_out + 1; j < (row + 1) * rows_out; ++j) {
Y_arr(j, col) += Y_arr(j - 1, col);
}
}
return true;
}
};
template <typename T, class Context>
class IntegralImageGradientOp final : public Operator<Context> {
public:
IntegralImageGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws) {}
USE_OPERATOR_CONTEXT_FUNCTIONS;
bool RunOnDevice() override {
auto& X = Input(0); // Original input to "forward" op
auto& dY = Input(1); // Gradient of net w.r.t. output of "forward" op
// (aka "gradOutput")
auto* dX = Output(0); // Gradient of net w.r.t. input to "forward" op
// (aka "gradInput")
dX->ResizeLike(X);
const int ind = X.dim32(0);
const int chans = X.dim32(1);
const int rows_in = dY.dim32(2);
const int cols_in = dY.dim32(3);
const int rows_out = dX->dim32(2);
const int cols_out = dX->dim32(3);
const float* input_data = dY.template data<float>();
float* output_data = dX->template mutable_data<float>();
const int row_out_pass_size = ind * chans * rows_out;
const int row_in_pass_size = ind * chans * rows_in;
EigenMatrixMapRowMajor<float> dX_arr(
output_data, row_out_pass_size, cols_out);
ConstEigenMatrixMapRowMajor<float> dY_arr(
input_data, row_in_pass_size, cols_in);
Eigen::MatrixXf tmp(row_in_pass_size, cols_out);
// Row Pass dY(N, C, H+1, W+1) => tmp(N, C, H+1, W)
for (int i = 0; i < row_in_pass_size; i++) {
tmp(i, 0) = dY_arr(i, 0);
for (int j = 1; j < cols_out; ++j) {
tmp(i, j) = tmp(i, j - 1) + dY_arr(i, j);
}
}
// Col Pass tmp(N, C, H+1, W)=>dX(N, C, H, W)
const int col_out_pass_size = X.dim32(0) * chans * cols_out;
for (int i = 0; i < col_out_pass_size; i++) {
int col = i % cols_out;
int row_out_start = (i / cols_out) * rows_out;
int row_in_start = (i / cols_out) * rows_in;
dX_arr(row_out_start, col) = tmp(row_in_start, col);
for (int j = 1; j < rows_out; ++j) {
dX_arr(row_out_start + j, col) =
dX_arr(row_out_start + j - 1, col) + tmp(row_in_start + j, col);
}
}
return true;
}
protected:
Tensor<Context> row_pass_buffer_;
};
} // namespace caffe2
#endif // INTEGRAL_IMAGE_OP_H_