blob: 282a1637c767bde316071b3cb0ed5ffc3273e7b6 [file] [log] [blame]
#include "convolution.h"
#include <sstream>
#include "torch/csrc/autograd/variable.h"
#include "torch/csrc/autograd/functions/utils.h"
#include "torch/csrc/autograd/functions/basic_ops.h"
#include "torch/csrc/autograd/functions/tensor.h"
#include "torch/csrc/utils/auto_gpu.h"
#include <ATen/ATen.h>
#ifdef WITH_CUDNN
#include "torch/csrc/cudnn/Conv.h"
#include "torch/csrc/cudnn/Handles.h"
#include "torch/csrc/cudnn/Types.h"
extern THCState* state;
using namespace torch::cudnn;
#endif
#ifdef WITH_NNPACK
#include "torch/csrc/nnpack/NNPACK.h"
#endif
using torch::cudnn::Convolution;
using at::Tensor;
using tensor_pair = std::pair<at::Tensor, at::Tensor>;
namespace torch { namespace autograd {
// Forward function definition and utility functions
static at::Tensor compute_output(
at::Tensor& input, at::Tensor& weight, at::Tensor& bias, at::Tensor& columns, at::Tensor& ones,
const ConvForward& params);
static std::tuple<Tensor, Tensor, Tensor> compute_backward(
at::Tensor& input, at::Tensor& grad_output, at::Tensor& weight, at::Tensor& columns, at::Tensor& ones,
const ConvBackward& params, std::array<bool, 3> output_mask);
auto ConvParams::is_strided() const -> bool {
bool is_strided = false;
for (int s : stride) {
is_strided |= (s != 1);
}
return is_strided;
}
auto ConvParams::is_dilated() const -> bool {
bool is_dilated = false;
for (int d : dilation) {
is_dilated |= (d != 1);
}
return is_dilated;
}
auto ConvParams::is_padded() const -> bool {
bool is_padded = false;
for (int p : padding) {
is_padded |= (p != 0);
}
return is_padded;
}
auto ConvParams::is_output_padding_neg() const -> bool {
bool is_non_neg = false;
for (int p : output_padding) {
is_non_neg |= (p < 0);
}
return is_non_neg;
}
auto ConvParams::is_output_padding_big() const -> bool {
bool is_big = false;
for (size_t i = 0; i < output_padding.size(); i++) {
is_big |= (output_padding[i] >= stride[i] || output_padding[i] >= dilation[i]);
}
return is_big;
}
auto ConvParams::is_padding_neg() const -> bool {
bool is_non_neg = false;
for (int p : padding) {
is_non_neg |= (p < 0);
}
return is_non_neg;
}
auto ConvParams::view1d_as_2d() -> void {
if (stride.size() == 1) {
stride.insert(stride.begin(), 1);
padding.insert(padding.begin(), 0);
dilation.insert(dilation.begin(), 1);
output_padding.insert(output_padding.begin(), 0);
}
}
auto ConvParams::use_cudnn(const at::Tensor& input) const -> bool {
#ifdef WITH_CUDNN
if (!input.type().isCuda() || !cudnn_enabled) {
return false;
}
if (deterministic && is_dilated()) {
// cudnn doesn't support deterministic dilated convolution fully yet
return false;
}
if (is_dilated()) {
cudaDeviceProp* prop = THCState_getCurrentDeviceProperties(state);
// NOTE: extra parenthesis around numbers disable clang warnings about dead code
return ((CUDNN_VERSION >= (6021)) || (CUDNN_VERSION >= (6000) && prop->major >= 5)) && !is_output_padding_big();
}
return !is_output_padding_big();
#endif
return false;
}
auto ConvParams::use_nnpack(const at::Tensor& input) const -> bool {
#ifdef WITH_NNPACK
return input.type().ID() == at::TypeID::CPUFloat && // only on CPU Float Tensors
!is_strided() && // doesn't support strides
!is_dilated() && // or dilation
!transposed && // or transposed tensors
input.ndimension() == 4 && // must be in NCHW format
input.size(0) >= 16; // ensure large enough batch size to ensure perf, tuneable
#endif
return false;
}
// We currently only have depthwise support for the case where groups ==
// nInputPlane and nInputPlane == nOutputPlane (the latter due to the lack of
// a depthwise multiplier)
auto ConvParams::is_depthwise(
const at::Tensor& input, const at::Tensor& weight, int groups) const -> bool {
return input.type().isCuda() &&
!transposed &&
input.ndimension() == 4 &&
input.size(1) == groups &&
groups > 1 && // no point if there is only a single group
weight.size(0) % input.size(1) == 0; // output channels must be a multiple of input channels
}
std::string ConvForward::name() { return "ConvForward"; }
auto ConvForward::output_size(at::Tensor& input, at::Tensor& weight) const -> std::vector<int64_t> {
auto in_size = input.sizes();
auto weight_size = weight.sizes();
auto dim = input.ndimension();
std::vector<int64_t> output_size(dim);
output_size[0] = in_size[0];
output_size[1] = transposed ? weight_size[1] * groups : weight_size[0];
for (int d = 2; d < dim; ++d) {
int kernel = dilation[d - 2] * (weight_size[d] - 1) + 1;
if (transposed) {
output_size[d] = (in_size[d] - 1) * stride[d - 2] - (2 * padding[d - 2]) +
kernel + output_padding[d - 2];
} else {
output_size[d] = (in_size[d] + (2 * padding[d - 2]) - kernel) / stride[d - 2] + 1;
}
}
return output_size;
}
static auto view4d(const at::Tensor& tensor) -> at::Tensor {
if (tensor.ndimension() != 3) throw std::runtime_error("expected 3D tensor");
return tensor.unsqueeze(2);
}
static auto view3d(const at::Tensor& tensor) -> at::Tensor {
if (tensor.ndimension() != 4) throw std::runtime_error("expected 4D tensor");
return tensor.squeeze(2);
}
static void check_input_shape_forward(const at::Tensor& input,
const at::Tensor& weight, const at::Tensor& bias,
int64_t groups, bool transposed) {
int k = input.ndimension();
if (weight.ndimension() != k) {
std::stringstream ss;
ss << "Expected " << k << "-dimensional input for " << k
<< "-dimensional weight " << weight.sizes() << ", but got input of size "
<< input.sizes() << " instead";
throw std::runtime_error(ss.str());
}
if (weight.size(0) < groups) {
std::stringstream ss;
ss << "Given groups=" << groups << ", expected weight to be at least "
<< groups << " at dimension 0, but got weight of size " << weight.sizes()
<< " instead";
throw std::runtime_error(ss.str());
}
if (!transposed) {
if (input.size(1) != (weight.size(1) * groups)) {
std::stringstream ss;
ss << "Given groups=" << groups << ", weight" << weight.sizes()
<< ", so expected input" << input.sizes() << " to have "
<< (weight.size(1) * groups) << " channels, but got " << input.size(1)
<< " channels instead";
throw std::runtime_error(ss.str());
}
if (bias.defined() && (bias.ndimension() != 1 || bias.size(0) != weight.size(0))) {
std::stringstream ss;
ss << "Given weight of size " << weight.sizes()
<< ", expected bias to be 1-dimensional with " << weight.size(0) << " elements"
<< ", but got bias of size " << bias.sizes() << " instead";
throw std::runtime_error(ss.str());
}
} else { // transposed
if (input.size(1) != weight.size(0)) {
std::stringstream ss;
ss << "Given transposed=" << transposed << ", weight" << weight.sizes()
<< ", so expected input" << input.sizes() << " to have "
<< weight.size(0) << " channels, but got " << input.size(1)
<< " channels instead";
throw std::runtime_error(ss.str());
}
if (bias.defined() && (bias.ndimension() != 1 || bias.size(0) != weight.size(1) * groups)) {
std::stringstream ss;
ss << "Given transposed=" << transposed << ", weight of size " << weight.sizes()
<< ", expected bias to be 1-dimensional with " << weight.size(1) * groups << " elements"
<< ", but got bias of size " << bias.sizes() << " instead";
throw std::runtime_error(ss.str());
}
}
}
static at::Tensor subtensor(at::Tensor& tensor, int dim, int groups, int g) {
if (!tensor.defined()) {
return at::Tensor();
}
int64_t n = tensor.sizes()[dim] / groups;
return tensor.narrow(dim, n * g, n).contiguous();
}
static Variable subvariable(const Variable& var, int dim, int groups, int g) {
int64_t n = var.sizes()[dim] / groups;
auto result = apply_fn<Narrow>(dim, n * g, n)(var);
return result;
}
static std::vector<int64_t> vecToInt64(const std::vector<int>& src) {
std::vector<int64_t> res(src.size());
for (size_t i = 0; i < src.size(); i++) {
res[i] = static_cast<int64_t>(src[i]);
}
return res;
}
static at::Tensor cat(const tensor_list& tensors, int dim) {
int num_inputs = tensors.size();
if (num_inputs == 0) {
return at::Tensor();
}
auto output = tensors[0].type().tensor();
at::cat_out(output, tensors, dim);
return output;
}
// ConvForward implementation
auto ConvForward::apply(const variable_list& inputs) -> variable_list {
check_input_variables("ConvNd", inputs, 3, 2);
if (is_padding_neg()) throw std::runtime_error("negative padding is not supported");
if (is_output_padding_neg()) throw std::runtime_error("negative output_padding is not supported");
AutoGPU guard(inputs[0]);
auto input = inputs[0].data().contiguous();
auto weight = inputs[1].data();
auto bias = inputs[2].opt_data();
check_input_shape_forward(input, weight, bias, groups, transposed);
int k = input.ndimension();
if (k == 3) {
view1d_as_2d();
input = view4d(input);
weight = view4d(weight);
}
auto output = input.type().tensor();
tensor_list columns(groups);
tensor_list ones(groups);
std::unique_ptr<Convolution> convolution;
if (is_depthwise(input, weight, groups)) {
/* output.resize_(output_size(input, weight)); */
auto kernel_size = weight.sizes().slice(2);
auto stride = vecToInt64(this->stride);
auto padding = vecToInt64(this->padding);
auto dilation = vecToInt64(this->dilation);
output = at::conv_depthwise2d_forward(input, weight, kernel_size, bias, stride, padding, dilation);
} else if (use_cudnn(input)) {
#ifdef WITH_CUDNN
if (input.type().ID() != weight.type().ID()){
std::stringstream ss;
ss << "Input type (" << input.toString() << ") and weight type (" << weight.toString() << ") should be the same";
throw std::runtime_error(ss.str());
}
if (bias.defined() && input.type().ID() != bias.type().ID()){
std::stringstream ss;
ss << "Input type (" << input.toString() << ") and bias type (" << bias.toString() << ") should be the same";
throw std::runtime_error(ss.str());
}
output = input.type().tensor();
output.resize_(output_size(input, weight));
if (transposed) {
convolution.reset(cudnn_convolution_transpose_full_forward(
state, torch::cudnn::getCudnnHandle(), torch::cudnn::getCudnnDataType(input),
(THVoidTensor*)input.unsafeGetTH(false), (THVoidTensor*)weight.unsafeGetTH(false),
bias.defined() ? (THVoidTensor*)bias.unsafeGetTH(false) : nullptr, (THVoidTensor*)output.unsafeGetTH(false),
padding, stride, dilation, groups, benchmark, deterministic));
} else {
convolution.reset(cudnn_convolution_full_forward(
state, torch::cudnn::getCudnnHandle(), torch::cudnn::getCudnnDataType(input),
(THVoidTensor*)input.unsafeGetTH(false), (THVoidTensor*)weight.unsafeGetTH(false),
bias.defined() ? (THVoidTensor*)bias.unsafeGetTH(false) : nullptr, (THVoidTensor*)output.unsafeGetTH(false),
padding, stride, dilation, groups, benchmark, deterministic));
}
#endif
} else {
for (int g = 0; g < groups; ++g) {
columns[g] = input.type().tensor();
ones[g] = input.type().tensor();
}
if (groups == 1) {
output = compute_output(
input, weight, bias,
columns[0], ones[0], *this);
} else {
tensor_list outputs(groups);
for (int g = 0; g < groups; ++g) {
auto input_g = subtensor(input, 1, groups, g);
auto weight_g = subtensor(weight, 0, groups, g);
auto bias_g = subtensor(bias, 0, groups, g);
outputs[g] = compute_output(
input_g, weight_g, bias_g,
columns[g], ones[g], *this);
}
output = cat(outputs, 1);
}
}
if (k == 3) {
output = view3d(output);
}
auto outputs = as_tensor_list(std::move(output));
return wrap_outputs(inputs, std::move(outputs), [&](FunctionFlags f) {
return std::make_shared<ConvBackward>(
f, *this,
inputs[0], inputs[1], inputs[2],
std::move(columns), std::move(ones), std::move(convolution));
});
};
// For Convolution strategies that don't implicitly handle grad_bias, we add a helper
// function here to perform it using simple Tensor operators
static at::Tensor compute_grad_bias(const at::Tensor& grad_output) {
// grad_output is in N, C, H, W, we re-shape and reduce over spatial dims and batches
return grad_output.contiguous().view({grad_output.size(0), grad_output.size(1), -1}).sum(0).sum(1);
}
// ConvBackward implementation
auto ConvBackward::apply(const variable_list& grad_outputs) -> variable_list {
check_input_variables("ConvNdBackward", grad_outputs, 1);
if (is_padding_neg()) throw std::runtime_error("negative padding is not supported");
if (is_output_padding_neg()) throw std::runtime_error("negative output_padding is not supported");
auto input_var = input_.unpack();
auto weight_var = weight_.unpack();
auto bias_var = bias_.unpack();
auto input = input_var.data();
auto weight = weight_var.data();
AutoGPU guard(input);
auto bias = bias_var.defined() ? bias_var.data() : Tensor();
input = input.contiguous();
auto grad_output = grad_outputs[0].data().contiguous();
int k = input.ndimension();
if (k == 3) {
input = view4d(input);
weight = view4d(weight);
grad_output = view4d(grad_output);
}
bool use_depthwise = this->is_depthwise(input, weight, groups);
bool use_cudnn = this->use_cudnn(input);
at::Tensor grad_input;
at::Tensor grad_weight;
at::Tensor grad_bias;
std::array<bool, 3> output_mask = {
should_compute_output(0),
should_compute_output(1),
should_compute_output(2) && bias.defined(),
};
if (use_depthwise) {
if (output_mask[0] || output_mask[1]) {
auto kernel_size = weight.sizes().slice(2);
auto stride = vecToInt64(this->stride);
auto padding = vecToInt64(this->padding);
auto dilation = vecToInt64(this->dilation);
std::tie(grad_input, grad_weight) = at::conv_depthwise2d_backward(
grad_output, input, weight, kernel_size, stride, padding, dilation,
{output_mask[0], output_mask[1]});
}
// THCUNN implementation does not handle bias, so we do it ourselves
if (output_mask[2]) {
grad_bias = compute_grad_bias(grad_output);
}
} else if (use_cudnn) {
#ifdef WITH_CUDNN
if (output_mask[0]) {
grad_input = input.type().tensor();
grad_input.resize_as_(input);
if (transposed) {
// ConvTranspose uses the same kernels as regular convolution
// but swaps forward and backward calls
cudnn_convolution_forward(
state, torch::cudnn::getCudnnHandle(), torch::cudnn::getCudnnDataType(input),
(THVoidTensor*)grad_output.unsafeGetTH(false), (THVoidTensor*)weight.unsafeGetTH(false), (THVoidTensor*)grad_input.unsafeGetTH(false),
convolution.get(), benchmark, deterministic);
} else {
cudnn_convolution_backward_data(
state, torch::cudnn::getCudnnHandle(), torch::cudnn::getCudnnDataType(input),
(THVoidTensor*)grad_output.unsafeGetTH(false), (THVoidTensor*)grad_input.unsafeGetTH(false), (THVoidTensor*)weight.unsafeGetTH(false),
convolution.get(), benchmark, deterministic);
}
}
if (output_mask[1] || output_mask[2]) {
grad_weight = weight.type().tensor();
grad_weight.resize_as_(weight);
cudnn_convolution_backward_filter(
state, torch::cudnn::getCudnnHandle(), torch::cudnn::getCudnnDataType(input),
(THVoidTensor*)grad_output.unsafeGetTH(false), (THVoidTensor*)input.unsafeGetTH(false), (THVoidTensor*)grad_weight.unsafeGetTH(false),
convolution.get(), benchmark, deterministic);
if (output_mask[2]) {
grad_bias = bias.type().tensor();
grad_bias.resize_as_(bias);
cudnn_convolution_backward_bias(
state, torch::cudnn::getCudnnHandle(), torch::cudnn::getCudnnDataType(input),
(THVoidTensor*)grad_output.unsafeGetTH(false), (THVoidTensor*)grad_bias.unsafeGetTH(false),
convolution.get());
}
}
#endif
} else if (groups == 1) {
std::tie(grad_input, grad_weight, grad_bias) = compute_backward(
input, grad_output, weight, columns[0], ones[0],
*this, output_mask);
} else {
tensor_list grad_inputs(groups);
tensor_list grad_weights(groups);
tensor_list grad_biases(groups);
for (int g = 0; g < groups; ++g) {
auto input_g = subtensor(input, 1, groups, g);
auto grad_output_g = subtensor(grad_output, 1, groups, g);
auto weight_g = subtensor(weight, 0, groups, g);
std::tie(grad_inputs[g], grad_weights[g], grad_biases[g]) = compute_backward(
input_g, grad_output_g, weight_g, columns[g], ones[g],
*this, output_mask);
}
if (output_mask[0]) {
grad_input = cat(grad_inputs, 1);
}
if (output_mask[1]) {
grad_weight = cat(grad_weights, 0);
}
if (output_mask[2]) {
grad_bias = cat(grad_biases, 0);
}
}
if (k == 3) {
if (grad_input.defined()) {
grad_input = view3d(grad_input);
}
if (grad_weight.defined()) {
grad_weight = view3d(grad_weight);
}
}
// Add saved variables used out of the pure autograd to inputs
variable_list all_inputs(grad_outputs);
all_inputs.push_back(input_var);
all_inputs.push_back(weight_var);
auto outputs = as_tensor_list(std::move(grad_input),
std::move(grad_weight),
std::move(grad_bias));
return wrap_outputs(all_inputs, std::move(outputs), [&](FunctionFlags f) {
return std::make_shared<ConvBackwardBackward>(
f, *this,
input_var, weight_var,
bias_var, grad_outputs[0]);
});
};
auto ConvBackward::releaseVariables() -> void {
input_.data.reset();
weight_.data.reset();
bias_.data.reset();
}
// ConvBackwardBackward implementation
auto ConvBackwardBackward::apply(const variable_list& grad_grad_inputs) -> variable_list {
check_input_variables("ConvNdBackwardBackward", grad_grad_inputs, 3, 0);
if (transposed) throw std::runtime_error("ConvBackwardBackward does not support transposed convolution");
auto ggI = grad_grad_inputs[0];
auto ggW = grad_grad_inputs[1];
auto ggb = grad_grad_inputs[2];
auto gO = grad_output_.unpack();
auto weight = weight_.unpack();
auto input = input_.unpack();
AutoGPU guard(input.data());
// Compute ggO = conv(w, ggI) + conv(ggW, i) + ggb
Variable ggO;
if (ggI.defined()) {
if (weight.type().isCuda()) {
weight = apply_fn<Contiguous>()(weight);
}
ggO = apply_fn<ConvForward>(*this)(ggI, weight, Variable());
}
if (ggW.defined()) {
if (ggW.type().isCuda()) {
ggW = apply_fn<Contiguous>()(ggW);
}
auto ggW_term = apply_fn<ConvForward>(*this)(input_.unpack(), ggW, Variable());
if (ggO.defined()) {
ggO = apply_fn<Add>()(ggO, ggW_term);
} else {
ggO = ggW_term;
}
}
if (ggb.defined()) {
// View as (1, ggb.size(0), 1, 1...)
// Expand
std::vector<int64_t> new_size(gO.ndimension(), 1);
new_size[1] = ggb.sizes()[0];
auto ggb_contiguous = apply_fn<Contiguous>()(ggb);
auto ggb_view = apply_fn<View>(new_size)(ggb_contiguous);
// Expand
auto ggb_expanded = apply_fn<Expand>(gO.sizes())(ggb_view);
if (ggO.defined()) {
ggO = apply_fn<Add>()(ggO, ggb_expanded);
} else {
ggO = ggb_expanded;
}
}
// Compute gW = conv(ggI, g0)
Variable gW;
if (ggI.defined()) {
// Modified params with correct padding
ConvParams gw_conv_params(*this);
// Disable groups as they are handled separately
auto groups = gw_conv_params.groups;
gw_conv_params.groups = 1;
std::swap(gw_conv_params.dilation, gw_conv_params.stride);
// Transpose gO and ggI to accumulate over batch
auto gOt = apply_fn<Transpose>(0, 1)(gO);
auto ggIt = apply_fn<Transpose>(0, 1)(ggI);
Variable gWt;
// Compute conv
if (groups == 1) {
if (gOt.type().isCuda()) {
gOt = apply_fn<Contiguous>()(gOt);
}
// Compute conv
gWt = apply_fn<ConvForward>(gw_conv_params)(ggIt, gOt, Variable());
} else {
variable_list gWt_list(groups);
for (int g = 0; g < groups; ++g) {
auto ggIt_g = subvariable(ggIt, 0, groups, g);
auto gOt_g = subvariable(gOt, 0, groups, g);
if (gOt_g.type().isCuda()) {
gOt_g = apply_fn<Contiguous>()(gOt_g);
}
gWt_list[g] = apply_fn<ConvForward>(gw_conv_params)(ggIt_g, gOt_g, Variable());
}
gWt = apply_fn<Cat>(1)(gWt_list);
}
// Transpose gW to match chan_in and chan_out
gW = apply_fn<Transpose>(0, 1)(gWt);
// narrow gW to only relevant portion
// we do it this way instead of narrowing the input itself because
// the ConvForward kernels don't support asymmetric padding.
auto gW_size = gW.sizes();
auto w_size = weight.sizes();
for (size_t i = 2; i < gW_size.size(); ++i) {
if (gW_size[i] > w_size[i]) {
gW = apply_fn<Narrow>(i, 0, w_size[i])(gW);
}
}
}
// Compute gI = convT(gO, ggW)
Variable gI;
if (ggW.defined()) {
// select conv transpose
ConvParams gi_conv_params(*this);
gi_conv_params.transposed = true;
// swap stride and dilation
std::swap(gi_conv_params.dilation, gi_conv_params.stride);
// calculate output_padding
auto kernel_size = weight.sizes().slice(2);
auto input_shape = input.sizes().slice(2);
auto grad_output_shape = gO.sizes().slice(2);
if (kernel_size.size() == 1) {
auto expected_input_shape = (kernel_size[0] - 1) * gi_conv_params.stride[1]
- 2 * gi_conv_params.padding[1]
+ (gi_conv_params.dilation[1] * (grad_output_shape[0] - 1) + 1);
if (expected_input_shape != input_shape[0]) {
gi_conv_params.output_padding[1] = input_shape[0] - expected_input_shape;
}
} else {
for(size_t i = 0; i < kernel_size.size(); ++i) {
// Check if whole input has been used or not
auto expected_input_shape = (kernel_size[i] - 1) * gi_conv_params.stride[i]
- 2 * gi_conv_params.padding[i]
+ (gi_conv_params.dilation[i] * (grad_output_shape[i] - 1) + 1);
if (expected_input_shape != input_shape[i]) {
gi_conv_params.output_padding[i] = input_shape[i] - expected_input_shape;
}
}
}
// Disable groups as they are handled separately
auto groups = gi_conv_params.groups;
gi_conv_params.groups = 1;
auto ggWt = apply_fn<Transpose>(0, 1)(ggW);
auto gOt = apply_fn<Transpose>(0, 1)(gO);
Variable gIt;
if (groups == 1) {
if (gOt.type().isCuda()) {
gOt = apply_fn<Contiguous>()(gOt);
}
gIt = apply_fn<ConvForward>(gi_conv_params)(ggWt, gOt, Variable());
} else {
variable_list gIt_list(groups);
for (int g = 0; g < groups; ++g) {
auto ggWt_g = subvariable(ggWt, 1, groups, g);
auto gOt_g = subvariable(gOt, 0, groups, g);
if (gOt_g.type().isCuda()) {
gOt_g = apply_fn<Contiguous>()(gOt_g);
}
gIt_list[g] = apply_fn<ConvForward>(gi_conv_params)(ggWt_g, gOt_g, Variable());
}
gIt = apply_fn<Cat>(0)(gIt_list);
}
gI = apply_fn<Transpose>(0, 1)(gIt);
}
return {ggO, gI, gW};
}
auto ConvBackwardBackward::releaseVariables() -> void {
input_.data.reset();
weight_.data.reset();
bias_.data.reset();
grad_output_.data.reset();
}
// Forward and backward functions for Tensor
static at::Tensor compute_output(
at::Tensor& input, at::Tensor& weight, at::Tensor& bias,
at::Tensor& columns, at::Tensor& ones,
const ConvForward& params) {
auto dim = input.ndimension();
auto dilated = params.is_dilated();
auto kernel_size = weight.sizes().slice(2);
auto stride = vecToInt64(params.stride);
auto padding = vecToInt64(params.padding);
auto dilation = vecToInt64(params.dilation);
auto output_padding = vecToInt64(params.output_padding);
if (params.transposed) {
if (dim == 4) {
return at::conv_transpose2d_forward(
input, weight, kernel_size, bias,
stride, padding, output_padding, dilation,
columns, ones);
} else if (dim == 5) {
return at::conv_transpose3d_forward(
input, weight, bias,
stride, padding, output_padding, dilation,
columns, ones);
}
} else { /* Not transposed */
if (dim == 4) {
if (dilated) {
return at::conv_dilated2d_forward(
input, weight, kernel_size, bias,
stride, padding, dilation,
columns, ones);
} else { /* dim == 4, non-dilated */
if (params.use_nnpack(input)) {
#ifdef WITH_NNPACK
// THNN functions handle resizing the output Tensor themselves,
// but NNPACK expects the Tensors to be in the appropriate shape
// already, so we resize here
auto output = input.type().tensor(params.output_size(input, weight));
nnpack::SpatialConvolution_updateOutput(
input, output, weight, bias,
kernel_size[1], kernel_size[0],
params.padding[1], params.padding[0]);
return output;
#endif
} else {
/* CPU implementation has specialized MM kernels
for non-dilated case here */
return at::conv2d_forward(
input, weight, kernel_size, bias,
stride, padding,
columns, ones);
}
}
} else if (dim == 5 && (input.type().isCuda() || dilated)) {
return at::conv_dilated3d_forward(
input, weight, kernel_size, bias,
stride, padding, dilation,
columns, ones);
} else if (dim == 5) { /* dim == 5, CPU, non-dilated */
/* CPU implementation has specialized MM kernels
for non-dilated case here */
return at::conv3d_forward(
input, weight, kernel_size, bias,
stride, padding,
columns);
}
}
throw std::runtime_error("unsupported ConvNd parameters");
}
static std::tuple<Tensor, Tensor, Tensor> compute_backward(
at::Tensor& input, at::Tensor& grad_output, at::Tensor& weight,
at::Tensor& columns, at::Tensor& ones,
const ConvBackward& params,
std::array<bool, 3> output_mask) {
auto kernel_size = weight.sizes().slice(2);
auto stride = vecToInt64(params.stride);
auto padding = vecToInt64(params.padding);
auto dilation = vecToInt64(params.dilation);
auto output_padding = vecToInt64(params.output_padding);
auto dim = input.ndimension();
auto dilated = params.is_dilated();
if (params.transposed) {
if (dim == 4) {
return at::conv_transpose2d_backward(
grad_output, input, weight, kernel_size,
stride, padding, output_padding, dilation,
columns, ones, output_mask);
} else if (dim == 5) {
return at::conv_transpose3d_backward(
grad_output, input, weight,
stride, padding, output_padding, dilation,
columns, ones, output_mask);
}
} else { /* Not transposed */
if (dim == 4) {
if (dilated) {
return at::conv_dilated2d_backward(
grad_output, input, weight, kernel_size,
stride, padding, dilation,
columns, ones, output_mask);
} else {
if (params.use_nnpack(input)) {
#ifdef WITH_NNPACK
Tensor grad_input;
Tensor grad_weight;
Tensor grad_bias;
if (output_mask[0]) {
grad_input = input.type().tensor(input.sizes());
nnpack::SpatialConvolution_updateGradInput(
input, grad_output, grad_input, weight,
kernel_size[1], kernel_size[0],
params.padding[1], params.padding[0]);
}
// NNPACK does not have a bias gradient calculation, so we split
// into two calls here if necessary
if (output_mask[1]) {
grad_weight = weight.type().tensor(weight.sizes());
grad_weight.zero_();
nnpack::SpatialConvolution_accGradWeight(
input, grad_output, grad_weight,
kernel_size[1], kernel_size[0],
params.padding[1], params.padding[0]);
}
if (output_mask[2]) {
grad_bias = compute_grad_bias(grad_output);
}
return std::make_tuple(grad_input, grad_weight, grad_bias);
#endif
} else {
/* CPU implementation has specialized MM kernels
for non-dilated case here */
return at::conv2d_backward(
grad_output, input, weight, kernel_size,
stride, padding,
columns, ones, output_mask);
}
}
} else if (dim == 5 && (input.type().isCuda() || dilated)) {
return at::conv_dilated3d_backward(
grad_output, input, weight, kernel_size,
stride, padding, dilation,
columns, ones, output_mask);
} else if (dim == 5) { /* dim == 5, CPU, non-dilated */
/* CPU implementation has specialized MM kernels
for non-dilated case here */
return at::conv3d_backward(
grad_output, input, weight, kernel_size,
stride, padding,
columns, ones, output_mask);
}
}
throw std::runtime_error("unsupported ConvNdBackward parameters");
}
}} // namespace torch::autograd