| /** |
| * Copyright (c) 2016-present, Facebook, Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // TODO: reduce the apparent redundancy of all the code below. |
| #include "caffe2/operators/pool_op.h" |
| |
| namespace caffe2 { |
| |
| using std::min; |
| using std::max; |
| |
| class LpPool {}; |
| |
| template <> |
| bool PoolOp<float, CPUContext, LpPool>::RunOnDeviceWithOrderNCHW() { |
| auto& X = Input(0); |
| auto* Y = Output(0); |
| ConvPoolOpBase::SetOutputSize(X, Y, X.dim32(1)); |
| const auto p = OperatorBase::GetSingleArgument<float>("p", 2.0); |
| const auto inv_p = 1.0 / p; |
| |
| const float* Xdata = X.data<float>(); |
| float* Ydata = Y->mutable_data<float>(); |
| math::Set<float, CPUContext>(Y->size(), 0, Ydata, &context_); |
| // The main loop |
| int channels = X.dim32(1); |
| int height = X.dim32(2); |
| int width = X.dim32(3); |
| int pooled_height = Y->dim32(2); |
| int pooled_width = Y->dim32(3); |
| |
| for (int n = 0; n < X.dim32(0); ++n) { |
| for (int c = 0; c < channels; ++c) { |
| for (int ph = 0; ph < pooled_height; ++ph) { |
| for (int pw = 0; pw < pooled_width; ++pw) { |
| int hstart = ph * stride_[0] - pads_[0]; |
| int wstart = pw * stride_[1] - pads_[1]; |
| int hend = min(hstart + kernel_[0], height); |
| int wend = min(wstart + kernel_[1], width); |
| hstart = max(hstart, 0); |
| wstart = max(wstart, 0); |
| const int pool_index = ph * pooled_width + pw; |
| for (int h = hstart; h < hend; ++h) { |
| for (int w = wstart; w < wend; ++w) { |
| const int input_index = h * width + w; |
| Ydata[pool_index] += std::pow(std::abs(Xdata[input_index]), p); |
| } |
| } |
| Ydata[pool_index] = std::pow(Ydata[pool_index], inv_p); |
| } |
| } |
| // Do offset. |
| Xdata += height * width; |
| Ydata += pooled_height * pooled_width; |
| } |
| } |
| return true; |
| } |
| |
| template <> |
| bool PoolOp<float, CPUContext, LpPool>::RunOnDeviceWithOrderNHWC() { |
| auto& X = Input(0); |
| auto* Y = Output(0); |
| int height = X.dim32(1); |
| int width = X.dim32(2); |
| int channels = X.dim32(3); |
| ConvPoolOpBase::SetOutputSize(X, Y, channels); |
| |
| const auto p = OperatorBase::GetSingleArgument<float>("p", 2.0); |
| const auto inv_p = 1.0 / p; |
| |
| const float* Xdata = X.data<float>(); |
| float* Ydata = Y->mutable_data<float>(); |
| math::Set<float, CPUContext>(Y->size(), 0, Ydata, &context_); |
| // The main loop |
| int pooled_height = Y->dim32(1); |
| int pooled_width = Y->dim32(2); |
| for (int n = 0; n < X.dim32(0); ++n) { |
| for (int ph = 0; ph < pooled_height; ++ph) { |
| for (int pw = 0; pw < pooled_width; ++pw) { |
| int hstart = ph * stride_[0] - pads_[0]; |
| int wstart = pw * stride_[1] - pads_[1]; |
| int hend = min(hstart + kernel_[0], height); |
| int wend = min(wstart + kernel_[1], width); |
| hstart = max(hstart, 0); |
| wstart = max(wstart, 0); |
| const int pool_index = (ph * pooled_width + pw) * channels; |
| for (int h = hstart; h < hend; ++h) { |
| for (int w = wstart; w < wend; ++w) { |
| const int input_index = (h * width + w) * channels; |
| for (int c = 0; c < channels; ++c) { |
| Ydata[pool_index + c] += |
| std::pow(std::abs(Xdata[input_index + c]), p); |
| } |
| } |
| } |
| for (int c = 0; c < channels; ++c) { |
| Ydata[pool_index + c] = std::pow(Ydata[pool_index + c], inv_p); |
| } |
| } |
| } |
| // Do offset. |
| Xdata += X.size() / X.dim32(0); |
| Ydata += Y->size() / Y->dim32(0); |
| } |
| return true; |
| } |
| |
| template <> |
| bool PoolGradientOp<float, CPUContext, LpPool>::RunOnDeviceWithOrderNCHW() { |
| const auto& X = Input(0); |
| const auto& Y = Input(1); |
| auto& dY = Input(2); |
| auto* dX = Output(0); |
| const auto p = OperatorBase::GetSingleArgument<float>("p", 2.0); |
| const auto inv_p = 1.0 / p; |
| |
| // TODO(Yangqing): Add shape checks. |
| dX->ResizeLike(X); |
| math::Set<float, CPUContext>( |
| X.size(), 0, dX->mutable_data<float>(), &context_); |
| const float* dYdata = dY.data<float>(); |
| const float* Xdata = X.data<float>(); |
| const float* Ydata = Y.data<float>(); |
| float* dXdata = dX->mutable_data<float>(); |
| |
| int channels = X.dim32(1); |
| CAFFE_ENFORCE_EQ(channels, dY.dim32(1)); |
| int height = X.dim32(2); |
| int width = X.dim32(3); |
| ConvPoolOpBase<CPUContext>::ComputePads({height, width}); |
| int pooled_height = dY.dim32(2); |
| int pooled_width = dY.dim32(3); |
| // The main loop |
| for (int n = 0; n < X.dim32(0); ++n) { |
| for (int c = 0; c < channels; ++c) { |
| for (int ph = 0; ph < pooled_height; ++ph) { |
| for (int pw = 0; pw < pooled_width; ++pw) { |
| int hstart = ph * stride_[0] - pads_[0]; |
| int wstart = pw * stride_[1] - pads_[1]; |
| int hend = min(hstart + kernel_[0], height); |
| int wend = min(wstart + kernel_[1], width); |
| hstart = max(hstart, 0); |
| wstart = max(wstart, 0); |
| float scale = 1. / (hend - hstart) / (wend - wstart); |
| for (int h = hstart; h < hend; ++h) { |
| for (int w = wstart; w < wend; ++w) { |
| // gradient of p-norm is x_j * |x_j|^{p-2} / |x|_p^{p-1} |
| dXdata[h * width + w] += dYdata[ph * pooled_width + pw] * |
| Xdata[h * width + w] * |
| std::pow(std::abs(Xdata[h * width + w]), p - 2) / |
| std::pow(Ydata[ph * pooled_width + pw], p - 1); |
| } |
| } |
| } |
| } |
| // offset |
| dXdata += height * width; |
| dYdata += pooled_height * pooled_width; |
| Ydata += pooled_height * pooled_width; |
| Xdata += height * width; |
| } |
| } |
| return true; |
| } |
| |
| template <> |
| bool PoolGradientOp<float, CPUContext, LpPool>::RunOnDeviceWithOrderNHWC() { |
| const auto& X = Input(0); |
| const auto& Y = Input(1); |
| auto& dY = Input(2); |
| CAFFE_ENFORCE_EQ(dY.ndim(), 4); |
| auto* dX = Output(0); |
| // TODO(Yangqing): Add shape checks. |
| dX->ResizeLike(X); |
| math::Set<float, CPUContext>( |
| X.size(), 0, dX->mutable_data<float>(), &context_); |
| const float* dYdata = dY.data<float>(); |
| float* dXdata = dX->mutable_data<float>(); |
| const float* Xdata = X.data<float>(); |
| const float* Ydata = Y.data<float>(); |
| // The main loop |
| int height = X.dim32(1); |
| int width = X.dim32(2); |
| ConvPoolOpBase<CPUContext>::ComputePads({height, width}); |
| const auto p = OperatorBase::GetSingleArgument<float>("p", 2.0); |
| const auto inv_p = 1.0 / p; |
| |
| int pooled_height = dY.dim32(1); |
| int pooled_width = dY.dim32(2); |
| int channels = X.dim32(3); |
| CAFFE_ENFORCE_EQ(channels, dY.dim32(3)); |
| for (int n = 0; n < X.dim32(0); ++n) { |
| for (int ph = 0; ph < pooled_height; ++ph) { |
| for (int pw = 0; pw < pooled_width; ++pw) { |
| int hstart = ph * stride_[0] - pads_[0]; |
| int wstart = pw * stride_[1] - pads_[1]; |
| int hend = min(hstart + kernel_[0], height); |
| int wend = min(wstart + kernel_[1], width); |
| hstart = max(hstart, 0); |
| wstart = max(wstart, 0); |
| float scale = 1. / (hend - hstart) / (wend - wstart); |
| for (int h = hstart; h < hend; ++h) { |
| for (int w = wstart; w < wend; ++w) { |
| for (int c = 0; c < channels; ++c) { |
| dXdata[(h * width + w) * channels + c] += |
| dYdata[(ph * pooled_width + pw) * channels + c] * |
| Xdata[(h * width + w) * channels + c] * |
| std::pow( |
| std::abs(Xdata[(h * width + w) * channels + c]), p - 2) / |
| std::pow( |
| Ydata[(ph * pooled_width + pw) * channels + c], p - 1); |
| } |
| } |
| } |
| } |
| } |
| // offset |
| dXdata += X.size() / X.dim32(0); |
| dYdata += dY.size() / dY.dim32(0); |
| Xdata += X.size() / X.dim32(0); |
| Ydata += Y.size() / Y.dim32(0); |
| } |
| return true; |
| } |
| |
| REGISTER_CPU_OPERATOR(LpPool, PoolOp<float, CPUContext, LpPool>); |
| REGISTER_CPU_OPERATOR( |
| LpPoolGradient, |
| PoolGradientOp<float, CPUContext, LpPool>); |
| |
| OPERATOR_SCHEMA(LpPool) |
| .NumInputs(1) |
| .NumOutputs(1) |
| .SetDoc(R"DOC( |
| |
| LpPool consumes an input blob X and applies L-p pooling across the |
| the blob according to kernel sizes, stride sizes, and pad lengths defined by the |
| ConvPoolOpBase operator. L-p pooling consisting of taking the L-p norm of a |
| subset of the input tensor according to the kernel size and downsampling the |
| data into the output blob Y for further processing. |
| |
| )DOC") |
| .Input( |
| 0, |
| "X", |
| "Input data tensor from the previous operator; dimensions " |
| "depend on whether the NCHW or NHWC operators are being used. For example, " |
| "in the former, the input has size (N x C x H x W), where N is the batch " |
| "size, C is the number of channels, and H and W are the height and the width " |
| "of the data. The corresponding permutation of dimensions is used in the " |
| "latter case. ") |
| .Output( |
| 0, |
| "Y", |
| "Output data tensor from L-p pooling across the input " |
| "tensor. Dimensions will vary based on various kernel, stride, and pad " |
| "sizes."); |
| |
| OPERATOR_SCHEMA(LpPoolGradient).NumInputs(3).NumOutputs(1); |
| |
| class GetPoolGradient : public GradientMakerBase { |
| using GradientMakerBase::GradientMakerBase; |
| vector<OperatorDef> GetGradientDefs() override { |
| return SingleGradientDef( |
| def_.type() + "Gradient", |
| "", |
| vector<string>{I(0), O(0), GO(0)}, |
| vector<string>{GI(0)}); |
| } |
| }; |
| REGISTER_GRADIENT(LpPool, GetPoolGradient); |
| } |