caffe2/operators/roi_pool_op.cc - platform/external/pytorch - Git at Google

 #include "roi_pool_op.h"

 #include <cfloat>

 namespace caffe2 {

 using std::max;
 using std::min;

 template <>
 bool RoIPoolOp<float, CPUContext>::RunOnDevice() {
   const auto& X = Input(0); // Input data to pool
   const auto& R = Input(1); // RoIs
   auto* Y = Output(0); // RoI pooled data
   auto* A = is_test_ ? nullptr : Output(1); // argmaxes

   // Each ROI is of the form [batch_index x1 y1 x2 y2]
   CAFFE_ENFORCE_EQ(R.dim32(1), 5);

   // TODO: Handle the storage_order properly to get the NCWH.
   int batch_size = X.dim32(0);
   int channels = X.dim32(1);
   int height = X.dim32(2);
   int width = X.dim32(3);
   int num_rois = R.dim32(0);

   Y->Resize(num_rois, channels, pooled_height_, pooled_width_);
   if (!is_test_) {
     A->Resize(Y->sizes());
   }

   const float* Xdata = X.data<float>();
   const float* rois = R.data<float>();
   float* Ydata = Y->template mutable_data<float>();
   int* argmax_data = is_test_ ? nullptr : A->template mutable_data<int>();

   // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
   for (int n = 0; n < num_rois; ++n) {
     int roi_batch_id = rois[0];
     int roi_start_w = round(rois[1] * spatial_scale_);
     int roi_start_h = round(rois[2] * spatial_scale_);
     int roi_end_w = round(rois[3] * spatial_scale_);
     int roi_end_h = round(rois[4] * spatial_scale_);
     CAFFE_ENFORCE_GE(roi_batch_id, 0);
     CAFFE_ENFORCE_LT(roi_batch_id, batch_size);

     // Force malformed ROIs to be 1x1
     int roi_height = max(roi_end_h - roi_start_h + 1, 1);
     int roi_width = max(roi_end_w - roi_start_w + 1, 1);

     const float bin_size_h =
         static_cast<float>(roi_height) / static_cast<float>(pooled_height_);
     const float bin_size_w =
         static_cast<float>(roi_width) / static_cast<float>(pooled_width_);

     const float* batch_data = Xdata + roi_batch_id * X.size_from_dim(1);

     for (int c = 0; c < channels; ++c) {
       for (int ph = 0; ph < pooled_height_; ++ph) {
         for (int pw = 0; pw < pooled_width_; ++pw) {
           // Compute pooling region for this output unit:
           //  start (included) = floor(ph * roi_height / pooled_height_)
           //  end (excluded) = ceil((ph + 1) * roi_height / pooled_height_)
           int hstart =
               static_cast<int>(floor(static_cast<float>(ph) * bin_size_h));
           int wstart =
               static_cast<int>(floor(static_cast<float>(pw) * bin_size_w));
           int hend =
               static_cast<int>(ceil(static_cast<float>(ph + 1) * bin_size_h));
           int wend =
               static_cast<int>(ceil(static_cast<float>(pw + 1) * bin_size_w));

           // Add roi offsets and clip to input boundaries
           hstart = min(max(hstart + roi_start_h, 0), height);
           hend = min(max(hend + roi_start_h, 0), height);
           wstart = min(max(wstart + roi_start_w, 0), width);
           wend = min(max(wend + roi_start_w, 0), width);

           const int pool_index = ph * pooled_width_ + pw;

           // Define an empty pooling region to be zero
           bool is_empty = (hend <= hstart) || (wend <= wstart);
           Ydata[pool_index] = is_empty ? 0 : -FLT_MAX;
           if (!is_test_) {
             // If nothing is pooled, argmax = -1 causes nothing to be backprop'd
             argmax_data[pool_index] = -1;
           }

           for (int h = hstart; h < hend; ++h) {
             for (int w = wstart; w < wend; ++w) {
               const int index = h * width + w;
               if (batch_data[index] > Ydata[pool_index]) {
                 Ydata[pool_index] = batch_data[index];
                 if (!is_test_) {
                   argmax_data[pool_index] = index;
                 }
               }
             }
           }
         }
       }
       // Increment all data pointers by one channel
       batch_data += X.size_from_dim(2);
       Ydata += Y->size_from_dim(2);
       if (!is_test_) {
         argmax_data += A->size_from_dim(2);
       }
     }
     // Increment ROI data pointer
     rois += R.size_from_dim(1);
   }

   return true;
 }

 REGISTER_CPU_OPERATOR(RoIPool, RoIPoolOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(RoIPoolGradient, RoIPoolGradientOp<float, CPUContext>);

 // Input: X, rois
 // Output case #1: Y, argmaxes (train mode)
 // Output case #2: Y           (test mode)
 OPERATOR_SCHEMA(RoIPool)
     .NumInputs(2)
     .NumOutputs({1, 2})
     .TensorInferenceFunction([](const OperatorDef& def,
                                 const vector<TensorShape>& in) {
       ArgumentHelper helper(def);
       const StorageOrder order = StringToStorageOrder(
           helper.GetSingleArgument<string>("order", "NCHW"));
       const TensorShape& X = in[0];
       const int num_channels =
           (order == StorageOrder::NCHW ? X.dims(1) : X.dims(3));
       const TensorShape& R = in[1];
       const int num_rois = R.dims(0);
       const int pooled_height = helper.GetSingleArgument<int>("pooled_h", 1);
       const int pooled_width = helper.GetSingleArgument<int>("pooled_w", 1);
       TensorShape Y = CreateTensorShape(
           vector<int>({num_rois, num_channels, pooled_height, pooled_width}),
           X.data_type());

       bool is_test = helper.GetSingleArgument<int>(OpSchema::Arg_IsTest, 0);
       if (!is_test) {
         TensorShape argmaxes = Y;
         argmaxes.set_data_type(TensorProto_DataType_INT32);
         return vector<TensorShape>({Y, argmaxes});
       } else {
         return vector<TensorShape>({Y});
       }
     })
     .SetDoc(R"DOC(
 Carries out ROI Pooling for Faster-RCNN.
 Depending on the mode, there are multiple output cases:

   Output case #1: Y, argmaxes (train mode)
   Output case #2: Y           (test mode)
 )DOC")
     .Arg(
         "is_test",
         "If set, run in test mode and skip computation of argmaxes (used for "
         "gradient computation). Only one output tensor is produced. "
         "(Default: false).")
     .Arg("order", "A StorageOrder string (Default: \"NCHW\").")
     .Arg("pooled_h", "The pooled output height (Default: 1).")
     .Arg("pooled_w", "The pooled output width (Default: 1).")
     .Arg(
         "spatial_scale",
         "Multiplicative spatial scale factor to translate ROI coords from "
         "their input scale to the scale used when pooling (Default: 1.0).")
     .Input(
         0,
         "X",
         "The input 4-D tensor of data. Only NCHW order is currently supported.")
     .Input(
         1,
         "rois",
         "RoIs (Regions of Interest) to pool over. Should be a 2-D tensor of "
         "shape (num_rois, 5) given as [[batch_id, x1, y1, x2, y2], ...].")
     .Output(
         0,
         "Y",
         "RoI pooled output 4-D tensor of shape "
         "(num_rois, channels, pooled_h, pooled_w).")
     .Output(
         1,
         "argmaxes",
         "Argmaxes corresponding to indices in X used for gradient computation. "
         "Only output if arg \"is_test\" is false.");

 // Input: X, rois, argmaxes, dY (aka "gradOutput")
 // Output: dX (aka "gradInput")
 OPERATOR_SCHEMA(RoIPoolGradient).NumInputs(4).NumOutputs(1);

 class GetRoIPoolGradient : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;
   vector<OperatorDef> GetGradientDefs() override {
     return SingleGradientDef(
         "RoIPoolGradient",
         "",
         vector<string>{I(0), I(1), O(1), GO(0)},
         vector<string>{GI(0)});
   }
 };

 REGISTER_GRADIENT(RoIPool, GetRoIPoolGradient);

 } // namespace caffe2
	#include "roi_pool_op.h"

	#include <cfloat>

	namespace caffe2 {

	using std::max;
	using std::min;

	template <>
	bool RoIPoolOp<float, CPUContext>::RunOnDevice() {
	const auto& X = Input(0); // Input data to pool
	const auto& R = Input(1); // RoIs
	auto* Y = Output(0); // RoI pooled data
	auto* A = is_test_ ? nullptr : Output(1); // argmaxes

	// Each ROI is of the form [batch_index x1 y1 x2 y2]
	CAFFE_ENFORCE_EQ(R.dim32(1), 5);

	// TODO: Handle the storage_order properly to get the NCWH.
	int batch_size = X.dim32(0);
	int channels = X.dim32(1);
	int height = X.dim32(2);
	int width = X.dim32(3);
	int num_rois = R.dim32(0);

	Y->Resize(num_rois, channels, pooled_height_, pooled_width_);
	if (!is_test_) {
	A->Resize(Y->sizes());
	}

	const float* Xdata = X.data<float>();
	const float* rois = R.data<float>();
	float* Ydata = Y->template mutable_data<float>();
	int* argmax_data = is_test_ ? nullptr : A->template mutable_data<int>();

	// For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
	for (int n = 0; n < num_rois; ++n) {
	int roi_batch_id = rois[0];
	int roi_start_w = round(rois[1] * spatial_scale_);
	int roi_start_h = round(rois[2] * spatial_scale_);
	int roi_end_w = round(rois[3] * spatial_scale_);
	int roi_end_h = round(rois[4] * spatial_scale_);
	CAFFE_ENFORCE_GE(roi_batch_id, 0);
	CAFFE_ENFORCE_LT(roi_batch_id, batch_size);

	// Force malformed ROIs to be 1x1
	int roi_height = max(roi_end_h - roi_start_h + 1, 1);
	int roi_width = max(roi_end_w - roi_start_w + 1, 1);

	const float bin_size_h =
	static_cast<float>(roi_height) / static_cast<float>(pooled_height_);
	const float bin_size_w =
	static_cast<float>(roi_width) / static_cast<float>(pooled_width_);

	const float* batch_data = Xdata + roi_batch_id * X.size_from_dim(1);

	for (int c = 0; c < channels; ++c) {
	for (int ph = 0; ph < pooled_height_; ++ph) {
	for (int pw = 0; pw < pooled_width_; ++pw) {
	// Compute pooling region for this output unit:
	// start (included) = floor(ph * roi_height / pooled_height_)
	// end (excluded) = ceil((ph + 1) * roi_height / pooled_height_)
	int hstart =
	static_cast<int>(floor(static_cast<float>(ph) * bin_size_h));
	int wstart =
	static_cast<int>(floor(static_cast<float>(pw) * bin_size_w));
	int hend =
	static_cast<int>(ceil(static_cast<float>(ph + 1) * bin_size_h));
	int wend =
	static_cast<int>(ceil(static_cast<float>(pw + 1) * bin_size_w));

	// Add roi offsets and clip to input boundaries
	hstart = min(max(hstart + roi_start_h, 0), height);
	hend = min(max(hend + roi_start_h, 0), height);
	wstart = min(max(wstart + roi_start_w, 0), width);
	wend = min(max(wend + roi_start_w, 0), width);

	const int pool_index = ph * pooled_width_ + pw;

	// Define an empty pooling region to be zero
	bool is_empty = (hend <= hstart) \|\| (wend <= wstart);
	Ydata[pool_index] = is_empty ? 0 : -FLT_MAX;
	if (!is_test_) {
	// If nothing is pooled, argmax = -1 causes nothing to be backprop'd
	argmax_data[pool_index] = -1;
	}

	for (int h = hstart; h < hend; ++h) {
	for (int w = wstart; w < wend; ++w) {
	const int index = h * width + w;
	if (batch_data[index] > Ydata[pool_index]) {
	Ydata[pool_index] = batch_data[index];
	if (!is_test_) {
	argmax_data[pool_index] = index;
	}
	}
	}
	}
	}
	}
	// Increment all data pointers by one channel
	batch_data += X.size_from_dim(2);
	Ydata += Y->size_from_dim(2);
	if (!is_test_) {
	argmax_data += A->size_from_dim(2);
	}
	}
	// Increment ROI data pointer
	rois += R.size_from_dim(1);
	}

	return true;
	}

	REGISTER_CPU_OPERATOR(RoIPool, RoIPoolOp<float, CPUContext>);
	REGISTER_CPU_OPERATOR(RoIPoolGradient, RoIPoolGradientOp<float, CPUContext>);

	// Input: X, rois
	// Output case #1: Y, argmaxes (train mode)
	// Output case #2: Y (test mode)
	OPERATOR_SCHEMA(RoIPool)
	.NumInputs(2)
	.NumOutputs({1, 2})
	.TensorInferenceFunction([](const OperatorDef& def,
	const vector<TensorShape>& in) {
	ArgumentHelper helper(def);
	const StorageOrder order = StringToStorageOrder(
	helper.GetSingleArgument<string>("order", "NCHW"));
	const TensorShape& X = in[0];
	const int num_channels =
	(order == StorageOrder::NCHW ? X.dims(1) : X.dims(3));
	const TensorShape& R = in[1];
	const int num_rois = R.dims(0);
	const int pooled_height = helper.GetSingleArgument<int>("pooled_h", 1);
	const int pooled_width = helper.GetSingleArgument<int>("pooled_w", 1);
	TensorShape Y = CreateTensorShape(
	vector<int>({num_rois, num_channels, pooled_height, pooled_width}),
	X.data_type());

	bool is_test = helper.GetSingleArgument<int>(OpSchema::Arg_IsTest, 0);
	if (!is_test) {
	TensorShape argmaxes = Y;
	argmaxes.set_data_type(TensorProto_DataType_INT32);
	return vector<TensorShape>({Y, argmaxes});
	} else {
	return vector<TensorShape>({Y});
	}
	})
	.SetDoc(R"DOC(
	Carries out ROI Pooling for Faster-RCNN.
	Depending on the mode, there are multiple output cases:

	Output case #1: Y, argmaxes (train mode)
	Output case #2: Y (test mode)
	)DOC")
	.Arg(
	"is_test",
	"If set, run in test mode and skip computation of argmaxes (used for "
	"gradient computation). Only one output tensor is produced. "
	"(Default: false).")
	.Arg("order", "A StorageOrder string (Default: \"NCHW\").")
	.Arg("pooled_h", "The pooled output height (Default: 1).")
	.Arg("pooled_w", "The pooled output width (Default: 1).")
	.Arg(
	"spatial_scale",
	"Multiplicative spatial scale factor to translate ROI coords from "
	"their input scale to the scale used when pooling (Default: 1.0).")
	.Input(
	0,
	"X",
	"The input 4-D tensor of data. Only NCHW order is currently supported.")
	.Input(
	1,
	"rois",
	"RoIs (Regions of Interest) to pool over. Should be a 2-D tensor of "
	"shape (num_rois, 5) given as [[batch_id, x1, y1, x2, y2], ...].")
	.Output(
	0,
	"Y",
	"RoI pooled output 4-D tensor of shape "
	"(num_rois, channels, pooled_h, pooled_w).")
	.Output(
	1,
	"argmaxes",
	"Argmaxes corresponding to indices in X used for gradient computation. "
	"Only output if arg \"is_test\" is false.");

	// Input: X, rois, argmaxes, dY (aka "gradOutput")
	// Output: dX (aka "gradInput")
	OPERATOR_SCHEMA(RoIPoolGradient).NumInputs(4).NumOutputs(1);

	class GetRoIPoolGradient : public GradientMakerBase {
	using GradientMakerBase::GradientMakerBase;
	vector<OperatorDef> GetGradientDefs() override {
	return SingleGradientDef(
	"RoIPoolGradient",
	"",
	vector<string>{I(0), I(1), O(1), GO(0)},
	vector<string>{GI(0)});
	}
	};

	REGISTER_GRADIENT(RoIPool, GetRoIPoolGradient);

	} // namespace caffe2