enable use of MIOpen for depthwise convolutions (#17685)
Summary:
* added miopen conv mode to be used for setConvDescriptor
* added miopen depthwise convolutions
Pull Request resolved: https://github.com/pytorch/pytorch/pull/17685
Differential Revision: D14327811
Pulled By: bddppq
fbshipit-source-id: d5bdc1abafd5f39694fadf3f9275b9d880c5b115
diff --git a/aten/src/ATen/core/aten_interned_strings.h b/aten/src/ATen/core/aten_interned_strings.h
index a739996..29c90c2 100644
--- a/aten/src/ATen/core/aten_interned_strings.h
+++ b/aten/src/ATen/core/aten_interned_strings.h
@@ -463,6 +463,10 @@
_(aten, miopen_convolution_transpose_backward) \
_(aten, miopen_convolution_transpose_backward_input) \
_(aten, miopen_convolution_transpose_backward_weight) \
+_(aten, miopen_depthwise_convolution) \
+_(aten, miopen_depthwise_convolution_backward) \
+_(aten, miopen_depthwise_convolution_backward_input) \
+_(aten, miopen_depthwise_convolution_backward_weight) \
_(aten, mkldnn_convolution) \
_(aten, mkldnn_convolution_backward) \
_(aten, mkldnn_convolution_backward_input) \
diff --git a/aten/src/ATen/miopen/Descriptors.h b/aten/src/ATen/miopen/Descriptors.h
index 6cab385..4c8d29f 100644
--- a/aten/src/ATen/miopen/Descriptors.h
+++ b/aten/src/ATen/miopen/Descriptors.h
@@ -121,8 +121,8 @@
&miopenCreateConvolutionDescriptor,
&miopenDestroyConvolutionDescriptor>
{
- void set(miopenDataType_t dataType, int dim, int* pad, int* stride, int * upscale /* aka dilation */, int groups) {
- MIOPEN_CHECK(miopenInitConvolutionDescriptor(mut_desc(), miopenConvolution, pad[0], pad[1], stride[0], stride[1], upscale[0], upscale[1]));
+ void set(miopenDataType_t dataType, miopenConvolutionMode_t c_mode, int dim, int* pad, int* stride, int * upscale /* aka dilation */, int groups) {
+ MIOPEN_CHECK(miopenInitConvolutionDescriptor(mut_desc(), c_mode, pad[0], pad[1], stride[0], stride[1], upscale[0], upscale[1]));
MIOPEN_CHECK(miopenSetConvolutionGroupCount(mut_desc(), groups));
}
};
diff --git a/aten/src/ATen/native/Convolution.cpp b/aten/src/ATen/native/Convolution.cpp
index cbfc8bb..0b8af12 100644
--- a/aten/src/ATen/native/Convolution.cpp
+++ b/aten/src/ATen/native/Convolution.cpp
@@ -354,7 +354,14 @@
auto padding = params.padding;
auto dilation = params.dilation;
- output = at::thnn_conv_depthwise2d(input, weight, kernel_size, bias, stride, padding, dilation);
+ if(params.use_miopen(input)) {
+ output = at::miopen_depthwise_convolution(
+ input, weight, bias,
+ params.padding, params.stride, params.dilation, params.groups, params.benchmark, params.deterministic);
+ } else {
+ output = at::thnn_conv_depthwise2d(input, weight, kernel_size, bias, stride, padding, dilation);
+ }
+
} else if (params.use_cudnn(input)) {
AT_CHECK(input.type() == weight.type(),
"Input type (", input.type().toString(), ") and weight type (", weight.type().toString(),
diff --git a/aten/src/ATen/native/miopen/Conv_miopen.cpp b/aten/src/ATen/native/miopen/Conv_miopen.cpp
index 07c4cc7..47e399e 100644
--- a/aten/src/ATen/native/miopen/Conv_miopen.cpp
+++ b/aten/src/ATen/native/miopen/Conv_miopen.cpp
@@ -73,6 +73,34 @@
AT_ERROR("miopen_convolution_transpose_backward: ATen not compiled with MIOpen support");
}
+at::Tensor miopen_depthwise_convolution(
+ const at::Tensor& input, const at::Tensor& weight, const at::Tensor& bias /* optional */,
+ IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation,
+ int64_t groups, bool benchmark, bool deterministic) {
+ AT_ERROR("miopen_depthwise_convolution: ATen not compiled with MIOpen support");
+}
+
+at::Tensor miopen_depthwise_convolution_backward_input(
+ IntArrayRef input_size, const at::Tensor& grad_output, const at::Tensor& weight,
+ IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups,
+ bool benchmark, bool deterministic) {
+ AT_ERROR("miopen_depthwise_convolution_backward_input: ATen not compiled with MIOpen support");
+}
+
+at::Tensor miopen_depthwise_convolution_backward_weight(
+ IntArrayRef weight_size, const at::Tensor& grad_output, const at::Tensor& input,
+ IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups,
+ bool benchmark, bool deterministic) {
+ AT_ERROR("miopen_depthwise_convolution_backward_weight: ATen not compiled with MIOpen support");
+}
+
+std::tuple<at::Tensor,at::Tensor,at::Tensor> miopen_depthwise_convolution_backward(
+ const at::Tensor& input, const at::Tensor& grad_output, const at::Tensor& weight,
+ IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups,
+ bool benchmark, bool deterministic, std::array<bool,3> output_mask) {
+ AT_ERROR("miopen_depthwise_convolution_backward: ATen not compiled with MIOpen support");
+}
+
}}
#else // AT_ROCM_ENABLED
@@ -603,6 +631,7 @@
bool benchmark, bool deterministic) {
auto dataType = getMiopenDataType(input);
+ miopenConvolutionMode_t c_mode = miopenConvolution;
ConvolutionArgs args{ input, output, weight };
args.handle = getMiopenHandle();
@@ -610,7 +639,7 @@
args.idesc.set(input);
args.wdesc.set(weight);
args.odesc.set(output);
- args.cdesc.set(dataType, input.dim() - 2, args.params.padding, args.params.stride, args.params.dilation, args.params.groups);
+ args.cdesc.set(dataType, c_mode, input.dim() - 2, args.params.padding, args.params.stride, args.params.dilation, args.params.groups);
miopenConvFwdAlgorithm_t fwdAlg;
Workspace workspace = chooseAlgorithm(args, benchmark, &fwdAlg);
@@ -672,6 +701,81 @@
return output_t;
}
+//Depthwise Convolutions
+void raw_miopen_depthwise_convolution_forward_out(
+ const Tensor& output, const Tensor& input, const Tensor& weight,
+ IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups,
+ bool benchmark, bool deterministic) {
+
+ auto dataType = getMiopenDataType(input);
+ miopenConvolutionMode_t c_mode = miopenDepthwise;
+
+ ConvolutionArgs args{ input, output, weight };
+ args.handle = getMiopenHandle();
+ setConvolutionParams(&args.params, args.handle, input, weight, padding, stride, dilation, groups, deterministic);
+ args.idesc.set(input);
+ args.wdesc.set(weight);
+ args.odesc.set(output);
+ args.cdesc.set(dataType, c_mode, input.dim() - 2, args.params.padding, args.params.stride, args.params.dilation, args.params.groups);
+
+ miopenConvFwdAlgorithm_t fwdAlg;
+ Workspace workspace = chooseAlgorithm(args, benchmark, &fwdAlg);
+
+ Constant one(dataType, 1);
+ Constant zero(dataType, 0);
+
+ MIOPEN_CHECK(miopenConvolutionForward(
+ args.handle,
+ &one, args.idesc.desc(), input.data_ptr(),
+ args.wdesc.desc(), weight.data_ptr(),
+ args.cdesc.desc(), fwdAlg, &zero,
+ args.odesc.desc(), output.data_ptr(), workspace.data, workspace.size));
+}
+
+Tensor miopen_depthwise_convolution_forward(
+ CheckedFrom c,
+ const TensorArg& input, const TensorArg& weight,
+ IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups,
+ bool benchmark, bool deterministic)
+{
+ checkAllSameType(c, {input, weight});
+ checkAllSameGPU(c, {input, weight});
+
+ auto output_t = at::empty(
+ conv_output_size(input->sizes(), weight->sizes(),
+ padding, stride, dilation, groups),
+ input->options());
+
+ TensorArg output{ output_t, "result", 0 };
+ convolution_shape_check(c, input, weight, output, padding, stride, dilation, groups);
+
+ Tensor weight_contig = weight->contiguous();
+
+ raw_miopen_depthwise_convolution_forward_out(
+ *output, *input, weight_contig,
+ padding, stride, dilation, groups, benchmark, deterministic);
+
+ return *output;
+}
+
+Tensor miopen_depthwise_convolution(
+ const Tensor& input_t, const Tensor& weight_t, const Tensor& bias_t,
+ IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation,
+ int64_t groups, bool benchmark, bool deterministic)
+{
+ TensorArg input { input_t, "input", 1 },
+ weight { weight_t, "weight", 2 },
+ bias { bias_t, "bias", 3 };
+ setMIOpenStreamToCurrent();
+ CheckedFrom c = "miopen_depthwise_convolution";
+ auto output_t = miopen_depthwise_convolution_forward(
+ c, input, weight, padding, stride, dilation, groups, benchmark, deterministic);
+ if (bias->defined()) {
+ miopen_convolution_add_bias_(c, { output_t, "result", 0 }, bias);
+ }
+ return output_t;
+}
+
Tensor miopen_convolution_transpose_backward_input(
const Tensor& grad_output_t, const Tensor& weight_t,
IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation,
@@ -720,6 +824,7 @@
bool benchmark, bool deterministic) {
auto dataType = getMiopenDataType(grad_output);
+ miopenConvolutionMode_t c_mode = miopenConvolution;
ConvolutionArgs args{ grad_input, grad_output, weight };
args.handle = getMiopenHandle();
@@ -727,7 +832,7 @@
args.idesc.set(grad_input);
args.wdesc.set(weight);
args.odesc.set(grad_output);
- args.cdesc.set(dataType, grad_output.dim() - 2, args.params.padding, args.params.stride, args.params.dilation, args.params.groups);
+ args.cdesc.set(dataType, c_mode, grad_output.dim() - 2, args.params.padding, args.params.stride, args.params.dilation, args.params.groups);
miopenConvBwdDataAlgorithm_t bwdDataAlg;
Workspace workspace = chooseAlgorithm(args, benchmark, &bwdDataAlg);
@@ -796,6 +901,76 @@
padding, stride, dilation, groups, benchmark, deterministic);
}
+//Depthwise convolutions backward data.
+void raw_miopen_depthwise_convolution_backward_input_out(
+ const at::Tensor& grad_input,
+ const at::Tensor& grad_output,
+ const at::Tensor& weight,
+ IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups,
+ bool benchmark, bool deterministic) {
+
+ auto dataType = getMiopenDataType(grad_output);
+ miopenConvolutionMode_t c_mode = miopenDepthwise;
+
+ ConvolutionArgs args{ grad_input, grad_output, weight };
+ args.handle = getMiopenHandle();
+ setConvolutionParams(&args.params, args.handle, grad_input, weight, padding, stride, dilation, groups, deterministic);
+ args.idesc.set(grad_input);
+ args.wdesc.set(weight);
+ args.odesc.set(grad_output);
+ args.cdesc.set(dataType, c_mode, grad_output.dim() - 2, args.params.padding, args.params.stride, args.params.dilation, args.params.groups);
+
+ miopenConvBwdDataAlgorithm_t bwdDataAlg;
+ Workspace workspace = chooseAlgorithm(args, benchmark, &bwdDataAlg);
+
+ Constant one(dataType, 1);
+ Constant zero(dataType, 0);
+
+ MIOPEN_CHECK(miopenConvolutionBackwardData(
+ args.handle,
+ &one, args.odesc.desc(), grad_output.data_ptr(),
+ args.wdesc.desc(), weight.data_ptr(),
+ args.cdesc.desc(), bwdDataAlg, &zero,
+ args.idesc.desc(), grad_input.data_ptr(), workspace.data, workspace.size));
+}
+
+Tensor miopen_depthwise_convolution_backward_input(
+ CheckedFrom c,
+ IntArrayRef input_size, const TensorArg& grad_output, const TensorArg& weight,
+ IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups,
+ bool benchmark, bool deterministic)
+{
+ checkAllSameType(c, {grad_output, weight});
+ checkAllSameGPU(c, {grad_output, weight});
+
+ auto grad_input_t = at::empty(input_size, grad_output->options());
+
+ TensorArg grad_input{ grad_input_t, "result", 0 };
+ convolution_shape_check(c, grad_input, weight, grad_output, padding, stride, dilation, groups);
+
+ Tensor weight_contig = weight->contiguous();
+
+ raw_miopen_depthwise_convolution_backward_input_out(
+ *grad_input, *grad_output, weight_contig,
+ padding, stride, dilation, groups, benchmark, deterministic);
+
+ return *grad_input;
+}
+
+Tensor miopen_depthwise_convolution_backward_input(
+ IntArrayRef input_size, const Tensor& grad_output_t, const Tensor& weight_t,
+ IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups,
+ bool benchmark, bool deterministic)
+{
+ TensorArg grad_output{ grad_output_t, "grad_output", 1 },
+ weight{ weight_t, "weight", 2 };
+ setMIOpenStreamToCurrent();
+ return miopen_depthwise_convolution_backward_input(
+ "miopen_depthwise_convolution_backward_input",
+ input_size, grad_output, weight,
+ padding, stride, dilation, groups, benchmark, deterministic);
+}
+
std::tuple<at::Tensor,at::Tensor,at::Tensor> miopen_convolution_backward(
const at::Tensor& input, const at::Tensor& grad_output_t, const at::Tensor& weight,
IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups,
@@ -817,6 +992,27 @@
return std::tuple<Tensor,Tensor,Tensor>{grad_input, grad_weight, grad_bias};
}
+std::tuple<at::Tensor,at::Tensor,at::Tensor> miopen_depthwise_convolution_backward(
+ const at::Tensor& input, const at::Tensor& grad_output_t, const at::Tensor& weight,
+ IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups,
+ bool benchmark, bool deterministic, std::array<bool,3> output_mask) {
+
+ Tensor grad_output = grad_output_t.contiguous();
+
+ Tensor grad_input, grad_weight, grad_bias;
+ if (output_mask[0]) {
+ grad_input = at::miopen_depthwise_convolution_backward_input(input.sizes(), grad_output, weight, padding, stride, dilation, groups, benchmark, deterministic);
+ }
+ if (output_mask[1]) {
+ grad_weight = at::miopen_depthwise_convolution_backward_weight(weight.sizes(), grad_output, input, padding, stride, dilation, groups, benchmark, deterministic);
+ }
+ if (output_mask[2]) {
+ grad_bias = at::miopen_convolution_backward_bias(grad_output);
+ }
+
+ return std::tuple<Tensor,Tensor,Tensor>{grad_input, grad_weight, grad_bias};
+}
+
Tensor miopen_convolution_transpose(
const Tensor& input_t, const Tensor& weight_t, const Tensor& bias_t,
IntArrayRef padding, IntArrayRef output_padding, IntArrayRef stride, IntArrayRef dilation,
@@ -846,6 +1042,7 @@
bool benchmark, bool deterministic) {
auto dataType = getMiopenDataType(input);
+ miopenConvolutionMode_t c_mode = miopenConvolution;
ConvolutionArgs args{ input, grad_output, grad_weight };
args.handle = getMiopenHandle();
@@ -853,7 +1050,7 @@
args.idesc.set(input);
args.wdesc.set(grad_weight);
args.odesc.set(grad_output);
- args.cdesc.set(dataType, input.dim() - 2, args.params.padding, args.params.stride, args.params.dilation, args.params.groups);
+ args.cdesc.set(dataType, c_mode, input.dim() - 2, args.params.padding, args.params.stride, args.params.dilation, args.params.groups);
miopenConvBwdWeightsAlgorithm_t bwdFilterAlg;
Workspace workspace = chooseAlgorithm(args, benchmark, &bwdFilterAlg);
@@ -893,6 +1090,61 @@
return grad_weight_t;
}
+//Depthwise backward weights.
+void raw_miopen_depthwise_convolution_backward_weight_out(
+ const Tensor& grad_weight, const Tensor& grad_output, const Tensor& input,
+ IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups,
+ bool benchmark, bool deterministic) {
+
+ auto dataType = getMiopenDataType(input);
+ miopenConvolutionMode_t c_mode = miopenDepthwise;
+
+ ConvolutionArgs args{ input, grad_output, grad_weight };
+ args.handle = getMiopenHandle();
+ setConvolutionParams(&args.params, args.handle, input, grad_weight, padding, stride, dilation, groups, deterministic);
+ args.idesc.set(input);
+ args.wdesc.set(grad_weight);
+ args.odesc.set(grad_output);
+ args.cdesc.set(dataType, c_mode, input.dim() - 2, args.params.padding, args.params.stride, args.params.dilation, args.params.groups);
+
+ miopenConvBwdWeightsAlgorithm_t bwdFilterAlg;
+ Workspace workspace = chooseAlgorithm(args, benchmark, &bwdFilterAlg);
+
+ Constant one(dataType, 1);
+ Constant zero(dataType, 0);
+
+ MIOPEN_CHECK(miopenConvolutionBackwardWeights(
+ args.handle,
+ &one, args.odesc.desc(), grad_output.data_ptr(),
+ args.idesc.desc(), input.data_ptr(),
+ args.cdesc.desc(), bwdFilterAlg, &zero,
+ args.wdesc.desc(), grad_weight.data_ptr(), workspace.data, workspace.size));
+}
+
+Tensor miopen_depthwise_convolution_backward_weight(
+ CheckedFrom c,
+ IntArrayRef weight_size, const TensorArg& grad_output, const TensorArg& input,
+ IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups,
+ bool benchmark, bool deterministic)
+{
+
+ checkAllSameType(c, {grad_output, input});
+ checkAllSameGPU(c, {grad_output, input});
+
+ auto grad_weight_t = at::empty(weight_size, grad_output->options());
+
+ // For uniformity with everything else, although it seems grad_weight
+ // would be unambiguous too.
+ TensorArg grad_weight{ grad_weight_t, "result", 0 };
+ convolution_shape_check(c, input, grad_weight, grad_output, padding, stride, dilation, groups);
+
+ raw_miopen_depthwise_convolution_backward_weight_out(
+ *grad_weight, *grad_output, *input,
+ padding, stride, dilation, groups, benchmark, deterministic);
+
+ return grad_weight_t;
+}
+
Tensor miopen_convolution_backward_weight(
IntArrayRef weight_size,
const Tensor& grad_output_t,
@@ -925,6 +1177,22 @@
padding, stride, dilation, groups, benchmark, deterministic);
}
+Tensor miopen_depthwise_convolution_backward_weight(
+ IntArrayRef weight_size,
+ const Tensor& grad_output_t,
+ const Tensor& input_t,
+ IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups,
+ bool benchmark, bool deterministic)
+{
+ TensorArg grad_output{ grad_output_t, "grad_output", 1 },
+ input{ input_t, "input", 2 };
+ setMIOpenStreamToCurrent();
+ return miopen_depthwise_convolution_backward_weight(
+ "miopen_depthwise_convolution_backward_weight",
+ weight_size, grad_output, input,
+ padding, stride, dilation, groups, benchmark, deterministic);
+}
+
// ---------------------------------------------------------------------
//
// Convolution backward (bias)
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
index 49f8f5b..dada9bd 100644
--- a/aten/src/ATen/native/native_functions.yaml
+++ b/aten/src/ATen/native/native_functions.yaml
@@ -1511,6 +1511,26 @@
dispatch:
CUDA: miopen_convolution_transpose_backward_weight
+- func: miopen_depthwise_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+ matches_jit_signature: True
+ dispatch:
+ CUDA: miopen_depthwise_convolution
+
+- func: miopen_depthwise_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+ matches_jit_signature: True
+ dispatch:
+ CUDA: miopen_depthwise_convolution_backward_input
+
+- func: miopen_depthwise_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
+ matches_jit_signature: True
+ dispatch:
+ CUDA: miopen_depthwise_convolution_backward
+
+- func: miopen_depthwise_convolution_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+ matches_jit_signature: True
+ dispatch:
+ CUDA: miopen_depthwise_convolution_backward_weight
+
- func: mm(Tensor self, Tensor mat2) -> Tensor
matches_jit_signature: True
variants: function, method
diff --git a/tools/autograd/derivatives.yaml b/tools/autograd/derivatives.yaml
index dc28a6d..a730f67 100644
--- a/tools/autograd/derivatives.yaml
+++ b/tools/autograd/derivatives.yaml
@@ -1409,6 +1409,12 @@
- name: miopen_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, std::array<bool,3> output_mask)
grad_output, self, weight: _convolution_double_backward(grads[0], grads[1], grads[2], grad_output, weight, self, stride, padding, dilation, false, std::vector<int64_t>(padding.size(), 0), groups, benchmark, deterministic, true, grad_input_mask)
+- name: miopen_depthwise_convolution(Tensor self, Tensor weight, Tensor bias, IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic)
+ self, weight, bias: miopen_depthwise_convolution_backward(self, grad, weight, padding, stride, dilation, groups, benchmark, deterministic, grad_input_mask)
+
+- name: miopen_depthwise_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, std::array<bool,3> output_mask)
+ grad_output, self, weight: _convolution_double_backward(grads[0], grads[1], grads[2], grad_output, weight, self, stride, padding, dilation, false, std::vector<int64_t>(padding.size(), 0), groups, benchmark, deterministic, true, grad_input_mask)
+
- name: miopen_batch_norm(Tensor input, Tensor weight, Tensor bias, Tensor running_mean, Tensor running_var, bool training, double exponential_average_factor, double epsilon)
input, weight, bias: "training ? miopen_batch_norm_backward(input, grad.contiguous(), weight, running_mean, running_var, result1, result2, epsilon) : native_batch_norm_backward(grad, input, weight, running_mean, running_var, result1, result2, training, epsilon, grad_input_mask)"