blob: 57ecf6882847a4108d0c651fe217e009a5fed97b [file] [log] [blame]
// Copyright 2004-present Facebook. All Rights Reserved.
#include "../core/GLFilter.h"
#include "../core/GLImage.h"
#include "../core/ImageAllocator.h"
#include "caffe2/core/operator.h"
#include "caffe2/core/timer.h"
#include "caffe2/operators/conv_pool_op_base.h"
class GLPadImage : public GLFilter {
public:
binding* padSize;
binding* inputSize;
binding* outputSize;
binding* inputData;
GLPadImage()
: GLFilter(
"GLPadImage",
vertex_shader,
fragment_shader,
std::vector<binding*>(
{BINDING(padSize), BINDING(inputSize), BINDING(outputSize), BINDING(inputData)}),
{/* no uniform blocks */},
{/* no attributes */},
{/* no replacements */}) {}
template <typename T>
void pad(const GLImageVector<T>& input_images,
const GLImageVector<T>& output_images,
const int pad_l,
const int pad_t);
static const char* fragment_shader;
};
// MARK: GLSL
const char* GLPadImage::fragment_shader = R"GLSL(#version 300 es
precision mediump float;
precision mediump int;
precision mediump sampler2D;
in highp vec2 v_texCoord;
uniform ivec2 padSize;
uniform ivec2 inputSize;
uniform ivec2 outputSize;
uniform sampler2D inputData;
layout(location = 0) out mediump vec4 outputData;
void main() {
ivec2 texelCoord = ivec2(v_texCoord * vec2(outputSize)) - padSize;
texelCoord = max(texelCoord, -texelCoord);
texelCoord = min(texelCoord, ivec2(2) * (inputSize - 1) - texelCoord);
outputData = texelFetch(inputData, texelCoord, 0);
}
)GLSL";
template <typename T>
void GLPadImage::pad(const GLImageVector<T>& input_images,
const GLImageVector<T>& output_images,
const int pad_l,
const int pad_t) {
for (int i = 0; i < input_images.size(); i++) {
auto input_image = input_images[i];
auto output_image = output_images[i];
int input_slices = input_image->slices;
int output_slices = output_image->slices;
for (int is = 0; is < input_slices; is++) {
run(std::vector<texture_attachment>({{input_image->textures[is], inputData}}),
{output_image->textures.begin() + is, output_image->textures.begin() + is + 1},
[&]() {
glUniform2i(inputSize->location, input_image->width, input_image->height);
glUniform2i(outputSize->location, output_image->width, output_image->height);
glUniform2i(padSize->location, pad_l, pad_t);
},
output_image->width,
output_image->height);
}
}
}
namespace caffe2 {
template <typename OPBase>
static void computeOutputHW(OPBase* op, int H, int W, int* OH, int* OW) {
Tensor<CPUContext> input, output;
input.Resize(1, 1, H, W);
op->SetOutputSize(input, &output, 1);
CAFFE_ENFORCE_EQ(output.ndim(), 4);
*OH = output.dim(2);
*OW = output.dim(3);
}
template <class T>
class OpenGLPadImageOp final : public ConvPoolOpBase<CPUContext>, ImageAllocator<T> {
public:
OpenGLPadImageOp(const OperatorDef& operator_def, Workspace* ws)
: ConvPoolOpBase<CPUContext>(operator_def, ws),
mode_(OperatorBase::GetSingleArgument<string>("mode", "")) {
OPERATOR_NEEDS_FEATURE(order_ == StorageOrder::NCHW, "OpenGL only supports NCHW order.");
OPERATOR_NEEDS_FEATURE(mode_ == "reflect", "OpenGL only supports reflection");
CAFFE_ENFORCE(legacy_pad_ == LegacyPadding::NOTSET,
"Padding layer only supports explicit pad values.");
CAFFE_ENFORCE(dilation_h() == 1 && dilation_w() == 1,
"Pooling op does not support dilation right now.");
CAFFE_ENFORCE(stride_h() == 1 && stride_w() == 1,
"Pooling op does not support stride right now.");
// Pad op does not use kernel sizes, so we set it to 1 for computing the
// output size.
kernel_.assign(pads_.size() / 2, 1);
}
bool RunOnDeviceWithOrderNCHW() override {
const GLImageVector<T>& input = Inputs()[0]->template Get<GLImageVector<T>>();
const int num_images = input.size();
const int input_width = input.width();
const int input_height = input.height();
const int input_channels = input.channels();
const int output_channels = input_channels;
int output_height, output_width;
computeOutputHW(this, input_height, input_width, &output_height, &output_width);
int is_last = OperatorBase::GetSingleArgument<int>("is_last", 0);
GLImageVector<T>* output = ImageAllocator<T>::newImage(
num_images, output_width, output_height, output_channels, is_last);
if (!padImage_) {
padImage_.reset(new GLPadImage());
LOG(INFO) << input_channels << ": " << input_height << " X " << input_width << " => "
<< output_channels << ": " << output_height << " X " << output_width;
LOG(INFO) << "Padmode: " << mode_ << "pad_l = " << pad_l() << ", pad_r = " << pad_r()
<< ", pad_t = " << pad_t() << ", pad_b = " << pad_b();
}
padImage_->pad(input, *output, pad_l(), pad_t());
Outputs()[0]->Reset(output);
return true;
}
private:
std::string mode_;
std::unique_ptr<GLPadImage> padImage_;
};
REGISTER_CPU_OPERATOR(OpenGLPadImage, OpenGLPadImageOp<float16_t>);
OPERATOR_SCHEMA(OpenGLPadImage).NumInputs(1).NumOutputs(1);
} // namespace caffe2