blob: 2fe284ba8f4ffb2e1784cb04b373f712d44af166 [file] [log] [blame]
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
#include <ATen/Config.h>
#include <ATen/Parallel.h>
#include <ATen/core/Tensor.h>
#include <torch/library.h>
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Functions.h>
#include <ATen/NativeFunctions.h>
#else
#include <ATen/ops/_to_dense_native.h>
#include <ATen/ops/empty.h>
#include <ATen/ops/linear.h>
#include <ATen/ops/mkldnn_linear_backward_input.h>
#include <ATen/ops/mkldnn_linear_backward_input_native.h>
#include <ATen/ops/mkldnn_linear_backward_native.h>
#include <ATen/ops/mkldnn_linear_backward_weights.h>
#include <ATen/ops/mkldnn_linear_backward_weights_native.h>
#include <ATen/ops/mkldnn_linear_native.h>
#endif
#if !AT_MKLDNN_ENABLED()
namespace at {
namespace native {
Tensor mkldnn_linear(
const Tensor& self,
const Tensor& weight, const c10::optional<Tensor>& bias_opt) {
TORCH_CHECK(false, "mkldnn_linear: ATen not compiled with MKLDNN support");
}
Tensor mkldnn_linear_backward_input(
IntArrayRef input_size, const Tensor& grad_output, const Tensor& weight) {
TORCH_CHECK(false, "mkldnn_linear_backward_input: ATen not compiled with MKLDNN support");
}
std::tuple<Tensor, Tensor> mkldnn_linear_backward_weights(
const Tensor& grad_output, const Tensor& input, const Tensor& weight, bool bias_defined) {
TORCH_CHECK(false, "mkldnn_linear_backward_weights: ATen not compiled with MKLDNN support");
}
std::tuple<Tensor, Tensor, Tensor> mkldnn_linear_backward(
const Tensor& input, const Tensor& grad_output_t,
const Tensor& weight, std::array<bool,3> output_mask) {
TORCH_CHECK(false, "mkldnn_linear_backward: ATen not compiled with MKLDNN support");
}
} // namespace native
} // namespace at
#else // AT_MKLDNN_ENABLED
#include <ATen/native/mkldnn/MKLDNNCommon.h>
#include <ATen/native/mkldnn/Utils.h>
namespace at {
namespace native {
Tensor mkldnn_linear(
const Tensor& self,
const Tensor& weight_t, const c10::optional<Tensor>& bias_opt) {
// See [Note: hacky wrapper removal for optional tensor]
c10::MaybeOwned<Tensor> bias_maybe_owned = at::borrow_from_optional_tensor(bias_opt);
const Tensor& bias = *bias_maybe_owned;
const int64_t dim = self.dim();
TORCH_CHECK(
self.dim() != 0,
"mkldnn_linear: input needs to has dim at least 1, input dim ",
self.dim());
TORCH_CHECK(self.is_mkldnn(),
"mkldnn_linear: input needs to be mkldnn layout");
if (self.scalar_type() == ScalarType::BFloat16) {
TORCH_CHECK(mkldnn_bf16_device_check(),
"mkldnn_linear: bf16 path needs the cpu support avx512bw, avx512vl and avx512dq");
}
// reshape first if input dim != 2 and the reshape will cost a memory copy.
auto self_reshaped =
dim == 2 ? self : self.reshape({-1, self.size(self.dim() - 1)});
const ideep::tensor x = itensor_from_mkldnn(self_reshaped);
// weight_t can be a mkldnn tensor or dense tensor.
const Tensor weight = (weight_t.is_mkldnn() || weight_t.is_contiguous()) ? weight_t : weight_t.contiguous();
const ideep::tensor w = itensor_from_tensor(weight);
ideep::tensor y;
if (bias.defined()) {
const ideep::tensor b = itensor_from_tensor(bias);
ideep::inner_product_forward::compute(x, w, b, y);
} else {
ideep::inner_product_forward::compute(x, w, y);
}
auto input_size = self.sizes();
std::vector<int64_t> output_size(input_size.begin(), input_size.end() - 1);
output_size.push_back(weight.size(0));
if (self.dim() != 2) {
return new_with_itensor_mkldnn(std::move(y), optTypeMetaToScalarType(self.options().dtype_opt()),
self.options().device_opt()).reshape(output_size);
}
return new_with_itensor_mkldnn(std::move(y), optTypeMetaToScalarType(self.options().dtype_opt()),
self.options().device_opt());
}
Tensor mkldnn_linear_backward_input(
IntArrayRef input_size, const Tensor& grad_output, const Tensor& weight_t){
TORCH_CHECK(grad_output.is_mkldnn(),
"mkldnn_linear_backward: grad_output needs to be mkldnn layout");
TORCH_CHECK(weight_t.device().is_cpu() && weight_t.scalar_type() == kFloat,
"mkldnn_linear_backward: weight_t needs to be a dense tensor");
auto grad_output_reshaped = grad_output.dim() > 2 ?
grad_output.reshape({-1, grad_output.size(grad_output.dim() - 1)}) : grad_output;
ideep::tensor& grady = itensor_from_mkldnn(grad_output_reshaped);
// weight_t always dense tensor for training.
const Tensor weight = weight_t.is_contiguous() ? weight_t : weight_t.contiguous();
const ideep::tensor w = itensor_view_from_dense(weight);
std::vector<int64_t> input_reshaped_size;
input_reshaped_size.push_back(grad_output_reshaped.size(0));
input_reshaped_size.push_back(weight.size(1));
ideep::tensor gradx;
ideep::inner_product_backward_data::compute(
grady, w, {input_reshaped_size.begin(), input_reshaped_size.end()}, gradx);
if (input_size.size() > 2) {
return new_with_itensor_mkldnn(std::move(gradx), optTypeMetaToScalarType(grad_output.options().dtype_opt()),
grad_output.options().device_opt()).reshape(input_size);
}
return new_with_itensor_mkldnn(std::move(gradx), optTypeMetaToScalarType(grad_output.options().dtype_opt()),
grad_output.options().device_opt());
}
std::tuple<Tensor, Tensor> mkldnn_linear_backward_weights(
const Tensor& grad_output, const Tensor& input, const Tensor& weight, bool bias_defined) {
TORCH_CHECK(grad_output.is_mkldnn() && input.is_mkldnn(),
"mkldnn_linear_backward: grad_output and input needs to be mkldnn layout");
TORCH_CHECK(weight.device().is_cpu() && weight.scalar_type() == kFloat,
"mkldnn_linear_backward: weight needs to be a dense tensor");
auto grad_output_reshaped = grad_output.dim() > 2 ?
grad_output.reshape({-1, grad_output.size(grad_output.dim() - 1)}) : grad_output;
auto input_reshaped = input.dim() > 2 ? input.reshape({-1, input.size(input.dim() - 1)}) : input;
ideep::tensor& grady = itensor_from_mkldnn(grad_output_reshaped);
ideep::tensor& x = itensor_from_mkldnn(input_reshaped);
ideep::tensor gradw, gradb;
if (bias_defined) {
ideep::inner_product_backward_weights::compute(x, grady, gradw, gradb);
} else {
ideep::inner_product_backward_weights::compute(x, grady, gradw);
}
return std::tuple<Tensor, Tensor>{
mkldnn_to_dense(new_with_itensor_mkldnn(std::move(gradw),
optTypeMetaToScalarType(weight.options().dtype_opt()),
weight.options().device_opt())),
mkldnn_to_dense(new_with_itensor_mkldnn(std::move(gradb),
optTypeMetaToScalarType(weight.options().dtype_opt()),
weight.options().device_opt()))};
}
std::tuple<Tensor, Tensor, Tensor> mkldnn_linear_backward(
const Tensor& input, const Tensor& grad_output,
const Tensor& weight, std::array<bool,3> output_mask) {
Tensor grad_input, grad_weight, grad_bias;
if (output_mask[0]) {
grad_input = at::mkldnn_linear_backward_input(input.sizes(), grad_output, weight);
}
if (output_mask[1] || output_mask[2]) {
std::tie(grad_weight, grad_bias) = at::mkldnn_linear_backward_weights(grad_output, input, weight, output_mask[2]);
}
return std::tuple<Tensor, Tensor, Tensor>{grad_input, grad_weight, grad_bias};
}
Tensor mkldnn_linear_pointwise(
const Tensor& input_t,
const Tensor& weight_t,
const c10::optional<Tensor>& bias_opt,
c10::string_view attr,
torch::List<c10::optional<at::Scalar>> scalars,
c10::optional<c10::string_view> algorithm) {
auto input = input_t.contiguous();
auto input_size = input.sizes();
const int64_t dim = input.dim();
auto input_reshaped =
dim == 2 ? input : input.reshape({-1, input.size(input.dim() - 1)});
std::vector<int64_t> output_size(input_size.begin(), input_size.end() - 1);
output_size.push_back(weight_t.size(0));
auto output = at::empty(output_size, input.options());
if (dim != 2) {
std::vector<int64_t> output_size_reshaped = {input_reshaped.size(0),
weight_t.size(0)};
output = output.reshape(output_size_reshaped);
}
c10::impl::ExcludeDispatchKeyGuard edkg(c10::autograd_dispatch_keyset);
ideep::tensor mkldnn_output = itensor_from_tensor(output);
c10::MaybeOwned<Tensor> bias_maybe_owned =
at::borrow_from_optional_tensor(bias_opt);
const Tensor& bias = *bias_maybe_owned;
const ideep::tensor mkldnn_input = itensor_view_from_dense(input_reshaped);
c10::optional<ideep::tensor> mkldnn_bias{c10::nullopt};
if (bias.defined()) {
mkldnn_bias = itensor_from_tensor(bias);
}
const ideep::tensor w = itensor_from_tensor(weight_t);
ideep::attr_t op_attr = ideep::attr_t();
if (attr != "none") {
auto it = fusion_unary_attr_map().find(attr);
TORCH_CHECK(
it != fusion_unary_attr_map().end(), "Fusion behavior undefined.");
op_attr = it->second(scalars, algorithm);
}
if (mkldnn_bias.has_value()) {
ideep::inner_product_forward::compute</*reorder_src=*/false, /*reorder_weight=*/false>(
mkldnn_input,
w,
mkldnn_bias.value(),
mkldnn_output,
op_attr);
} else {
ideep::inner_product_forward::compute</*reorder_src=*/false, /*reorder_weight=*/false>(
mkldnn_input,
w,
mkldnn_output,
op_attr);
}
if (dim != 2) {
output = output.reshape(output_size);
}
return output;
}
Tensor mkldnn_linear_pointwise_binary(
const Tensor& input_t,
const Tensor& other_t,
const Tensor& weight_t,
const c10::optional<Tensor>& bias_opt,
c10::string_view attr) {
c10::MaybeOwned<Tensor> bias_maybe_owned =
at::borrow_from_optional_tensor(bias_opt);
const Tensor& bias = *bias_maybe_owned;
// Make sure inputs have same type(device, layout, dtype), device is cpu and
// dtype is float or bfloat16.
check_mkldnn_binary_fusion_inputs(input_t, other_t, weight_t, bias);
auto input = input_t.contiguous();
auto it_binary = fusion_binary_alg_map().find(attr);
TORCH_CHECK(
it_binary != fusion_binary_alg_map().end(), "Fusion behavior undefined.");
auto input_size = input.sizes();
const int64_t dim = input.dim();
auto input_reshaped =
dim == 2 ? input : input.reshape({-1, input.size(input.dim() - 1)});
std::vector<int64_t> output_size(input_size.begin(), input_size.end() - 1);
output_size.push_back(weight_t.size(0));
auto output = at::empty(output_size, input.options());
auto other_reshaped = other_t.contiguous();
if (dim != 2) {
std::vector<int64_t> output_size_reshaped = {
input_reshaped.size(0), weight_t.size(0)};
output = output.reshape(output_size_reshaped);
other_reshaped = other_reshaped.reshape(output_size_reshaped);
}
TORCH_CHECK(
output.sizes() == other_reshaped.sizes(),
"linear_binary_run expects the size of output and other tensor to be the same");
c10::impl::ExcludeDispatchKeyGuard edkg(c10::autograd_dispatch_keyset);
ideep::tensor mkldnn_output = itensor_from_tensor(output);
const ideep::tensor mkldnn_other = itensor_from_tensor(other_reshaped);
const ideep::tensor mkldnn_input = itensor_view_from_dense(input_reshaped);
c10::optional<ideep::tensor> mkldnn_bias{c10::nullopt};
if (bias.defined()) {
mkldnn_bias = itensor_from_tensor(bias);
}
const ideep::tensor w = itensor_from_tensor(weight_t);
auto other_desc = mkldnn_other.get_desc();
auto op_attr = ideep::attr_t::fuse_binary(it_binary->second, other_desc);
if (mkldnn_bias.has_value()) {
ideep::inner_product_forward::compute_binary</*reorder_src=*/false, /*reorder_weight=*/false>(
mkldnn_input,
mkldnn_other,
w,
mkldnn_bias.value(),
mkldnn_output,
op_attr);
} else {
ideep::inner_product_forward::compute_binary</*reorder_src=*/false, /*reorder_weight=*/false>(
mkldnn_input, mkldnn_other, w, mkldnn_output, op_attr);
}
if (dim != 2) {
output = output.reshape(output_size);
}
return output;
}
#if AT_MKL_ENABLED()
#include <mkl.h>
Tensor mkl_linear(
const Tensor& self,
const Tensor& mkl_weight_t,
const Tensor& origin_weight_t,
const c10::optional<Tensor>& bias_opt,
const int64_t prepack_batch_size) {
c10::MaybeOwned<Tensor> bias_maybe_owned =
at::borrow_from_optional_tensor(bias_opt);
const Tensor& bias = *bias_maybe_owned;
TORCH_CHECK(
self.options().type_equal(origin_weight_t.options()),
"Input type (",
self.toString(),
") and weight type (",
origin_weight_t.toString(),
") should be the same");
TORCH_CHECK(
!bias.defined() || (self.options().type_equal(bias.options())),
"Input type (",
self.toString(),
") and bias type (",
bias.toString(),
") should be the same");
TORCH_CHECK(
mkl_weight_t.scalar_type() == origin_weight_t.scalar_type() &&
origin_weight_t.scalar_type() == kFloat,
"mkl_linear: weight dtype should be float");
c10::impl::ExcludeDispatchKeyGuard edkg(c10::autograd_dispatch_keyset);
auto input_size = self.sizes();
std::vector<int64_t> output_size(input_size.begin(), input_size.end() - 1);
output_size.push_back(origin_weight_t.size(0));
auto output = at::empty(output_size, self.options());
int64_t M = self.numel() / self.size(self.dim() - 1);
if (M == prepack_batch_size && mkl_weight_t.is_mkldnn()) {
auto self_ = self.is_contiguous() ? self : self.contiguous();
auto K = origin_weight_t.size(1);
auto N = origin_weight_t.size(0);
const ideep::tensor& w = itensor_from_mkldnn(mkl_weight_t);
auto in_ptr = self_.data_ptr<float>();
auto weight_ptr = (float*)(w.get_data_handle());
auto out_ptr = output.data_ptr<float>();
if (bias.defined()) {
auto bias_ = bias.is_contiguous() ? bias : bias.contiguous();
auto bias_ptr = bias_.data_ptr<float>();
at::parallel_for(0, M, 1, [&](int64_t begin, int64_t end) {
for (const auto d : c10::irange(begin, end)) {
memcpy(out_ptr + d * N, bias_ptr, sizeof(float) * N);
}
});
}
cblas_sgemm_compute(
CblasRowMajor,
CblasNoTrans,
CblasPacked,
M,
N,
K,
in_ptr,
K,
weight_ptr,
K,
bias.defined() ? 1.f : 0.f,
out_ptr,
N);
} else {
output = at::linear_out(output, self, origin_weight_t, bias_opt);
}
return output;
}
TORCH_LIBRARY_IMPL(mkl, CPU, m) {
m.impl(TORCH_SELECTIVE_NAME("mkl::_mkl_linear"), TORCH_FN(mkl_linear));
}
TORCH_LIBRARY_IMPL(mkl, MkldnnCPU, m) {
m.impl(TORCH_SELECTIVE_NAME("mkl::_mkl_linear"), TORCH_FN(mkl_linear));
}
#else // AT_MKL_ENABLED
Tensor mkl_linear(
const Tensor& self,
const Tensor& mkl_weight_t,
const Tensor& origin_weight_t,
const c10::optional<Tensor>& bias_opt,
const int64_t prepack_batch_size) {
TORCH_CHECK(false, "mkl_linear: ATen not compiled with MKL support");
}
#endif// AT_MKL_ENABLED
TORCH_LIBRARY_IMPL(mkldnn, CPU, m) {
m.impl(
TORCH_SELECTIVE_NAME("mkldnn::_linear_pointwise"),
TORCH_FN(mkldnn_linear_pointwise));
m.impl(
TORCH_SELECTIVE_NAME("mkldnn::_linear_pointwise.binary"),
TORCH_FN(mkldnn_linear_pointwise_binary));
}
TORCH_LIBRARY_IMPL(mkldnn, MkldnnCPU, m) {
m.impl(
TORCH_SELECTIVE_NAME("mkldnn::_linear_pointwise"),
TORCH_FN(mkldnn_linear_pointwise));
m.impl(
TORCH_SELECTIVE_NAME("mkldnn::_linear_pointwise.binary"),
TORCH_FN(mkldnn_linear_pointwise_binary));
}
} // namespace native
} // namespace at
#endif // AT_MKLDNN_ENABLED