| #define TORCH_ASSERT_ONLY_METHOD_OPERATORS |
| #include <ATen/Config.h> |
| #include <ATen/Parallel.h> |
| #include <ATen/core/Tensor.h> |
| #include <torch/library.h> |
| |
| #ifndef AT_PER_OPERATOR_HEADERS |
| #include <ATen/Functions.h> |
| #include <ATen/NativeFunctions.h> |
| #else |
| #include <ATen/ops/_to_dense_native.h> |
| #include <ATen/ops/empty.h> |
| #include <ATen/ops/linear.h> |
| #include <ATen/ops/mkldnn_linear_backward_input.h> |
| #include <ATen/ops/mkldnn_linear_backward_input_native.h> |
| #include <ATen/ops/mkldnn_linear_backward_native.h> |
| #include <ATen/ops/mkldnn_linear_backward_weights.h> |
| #include <ATen/ops/mkldnn_linear_backward_weights_native.h> |
| #include <ATen/ops/mkldnn_linear_native.h> |
| #endif |
| |
| #if !AT_MKLDNN_ENABLED() |
| |
| namespace at { |
| namespace native { |
| |
| Tensor mkldnn_linear( |
| const Tensor& self, |
| const Tensor& weight, const c10::optional<Tensor>& bias_opt) { |
| TORCH_CHECK(false, "mkldnn_linear: ATen not compiled with MKLDNN support"); |
| } |
| Tensor mkldnn_linear_backward_input( |
| IntArrayRef input_size, const Tensor& grad_output, const Tensor& weight) { |
| TORCH_CHECK(false, "mkldnn_linear_backward_input: ATen not compiled with MKLDNN support"); |
| } |
| |
| std::tuple<Tensor, Tensor> mkldnn_linear_backward_weights( |
| const Tensor& grad_output, const Tensor& input, const Tensor& weight, bool bias_defined) { |
| TORCH_CHECK(false, "mkldnn_linear_backward_weights: ATen not compiled with MKLDNN support"); |
| } |
| |
| std::tuple<Tensor, Tensor, Tensor> mkldnn_linear_backward( |
| const Tensor& input, const Tensor& grad_output_t, |
| const Tensor& weight, std::array<bool,3> output_mask) { |
| TORCH_CHECK(false, "mkldnn_linear_backward: ATen not compiled with MKLDNN support"); |
| } |
| |
| } // namespace native |
| } // namespace at |
| |
| #else // AT_MKLDNN_ENABLED |
| |
| #include <ATen/native/mkldnn/MKLDNNCommon.h> |
| #include <ATen/native/mkldnn/Utils.h> |
| |
| namespace at { |
| namespace native { |
| |
| Tensor mkldnn_linear( |
| const Tensor& self, |
| const Tensor& weight_t, const c10::optional<Tensor>& bias_opt) { |
| // See [Note: hacky wrapper removal for optional tensor] |
| c10::MaybeOwned<Tensor> bias_maybe_owned = at::borrow_from_optional_tensor(bias_opt); |
| const Tensor& bias = *bias_maybe_owned; |
| |
| const int64_t dim = self.dim(); |
| TORCH_CHECK( |
| self.dim() != 0, |
| "mkldnn_linear: input needs to has dim at least 1, input dim ", |
| self.dim()); |
| TORCH_CHECK(self.is_mkldnn(), |
| "mkldnn_linear: input needs to be mkldnn layout"); |
| if (self.scalar_type() == ScalarType::BFloat16) { |
| TORCH_CHECK(mkldnn_bf16_device_check(), |
| "mkldnn_linear: bf16 path needs the cpu support avx512bw, avx512vl and avx512dq"); |
| } |
| |
| // reshape first if input dim != 2 and the reshape will cost a memory copy. |
| auto self_reshaped = |
| dim == 2 ? self : self.reshape({-1, self.size(self.dim() - 1)}); |
| |
| const ideep::tensor x = itensor_from_mkldnn(self_reshaped); |
| // weight_t can be a mkldnn tensor or dense tensor. |
| const Tensor weight = (weight_t.is_mkldnn() || weight_t.is_contiguous()) ? weight_t : weight_t.contiguous(); |
| const ideep::tensor w = itensor_from_tensor(weight); |
| |
| ideep::tensor y; |
| if (bias.defined()) { |
| const ideep::tensor b = itensor_from_tensor(bias); |
| ideep::inner_product_forward::compute(x, w, b, y); |
| } else { |
| ideep::inner_product_forward::compute(x, w, y); |
| } |
| |
| auto input_size = self.sizes(); |
| std::vector<int64_t> output_size(input_size.begin(), input_size.end() - 1); |
| output_size.push_back(weight.size(0)); |
| |
| if (self.dim() != 2) { |
| return new_with_itensor_mkldnn(std::move(y), optTypeMetaToScalarType(self.options().dtype_opt()), |
| self.options().device_opt()).reshape(output_size); |
| } |
| return new_with_itensor_mkldnn(std::move(y), optTypeMetaToScalarType(self.options().dtype_opt()), |
| self.options().device_opt()); |
| } |
| |
| |
| Tensor mkldnn_linear_backward_input( |
| IntArrayRef input_size, const Tensor& grad_output, const Tensor& weight_t){ |
| TORCH_CHECK(grad_output.is_mkldnn(), |
| "mkldnn_linear_backward: grad_output needs to be mkldnn layout"); |
| TORCH_CHECK(weight_t.device().is_cpu() && weight_t.scalar_type() == kFloat, |
| "mkldnn_linear_backward: weight_t needs to be a dense tensor"); |
| auto grad_output_reshaped = grad_output.dim() > 2 ? |
| grad_output.reshape({-1, grad_output.size(grad_output.dim() - 1)}) : grad_output; |
| |
| ideep::tensor& grady = itensor_from_mkldnn(grad_output_reshaped); |
| // weight_t always dense tensor for training. |
| const Tensor weight = weight_t.is_contiguous() ? weight_t : weight_t.contiguous(); |
| const ideep::tensor w = itensor_view_from_dense(weight); |
| |
| std::vector<int64_t> input_reshaped_size; |
| input_reshaped_size.push_back(grad_output_reshaped.size(0)); |
| input_reshaped_size.push_back(weight.size(1)); |
| |
| ideep::tensor gradx; |
| ideep::inner_product_backward_data::compute( |
| grady, w, {input_reshaped_size.begin(), input_reshaped_size.end()}, gradx); |
| |
| if (input_size.size() > 2) { |
| return new_with_itensor_mkldnn(std::move(gradx), optTypeMetaToScalarType(grad_output.options().dtype_opt()), |
| grad_output.options().device_opt()).reshape(input_size); |
| } |
| return new_with_itensor_mkldnn(std::move(gradx), optTypeMetaToScalarType(grad_output.options().dtype_opt()), |
| grad_output.options().device_opt()); |
| } |
| |
| std::tuple<Tensor, Tensor> mkldnn_linear_backward_weights( |
| const Tensor& grad_output, const Tensor& input, const Tensor& weight, bool bias_defined) { |
| TORCH_CHECK(grad_output.is_mkldnn() && input.is_mkldnn(), |
| "mkldnn_linear_backward: grad_output and input needs to be mkldnn layout"); |
| TORCH_CHECK(weight.device().is_cpu() && weight.scalar_type() == kFloat, |
| "mkldnn_linear_backward: weight needs to be a dense tensor"); |
| |
| auto grad_output_reshaped = grad_output.dim() > 2 ? |
| grad_output.reshape({-1, grad_output.size(grad_output.dim() - 1)}) : grad_output; |
| auto input_reshaped = input.dim() > 2 ? input.reshape({-1, input.size(input.dim() - 1)}) : input; |
| |
| ideep::tensor& grady = itensor_from_mkldnn(grad_output_reshaped); |
| ideep::tensor& x = itensor_from_mkldnn(input_reshaped); |
| ideep::tensor gradw, gradb; |
| if (bias_defined) { |
| ideep::inner_product_backward_weights::compute(x, grady, gradw, gradb); |
| } else { |
| ideep::inner_product_backward_weights::compute(x, grady, gradw); |
| } |
| |
| return std::tuple<Tensor, Tensor>{ |
| mkldnn_to_dense(new_with_itensor_mkldnn(std::move(gradw), |
| optTypeMetaToScalarType(weight.options().dtype_opt()), |
| weight.options().device_opt())), |
| mkldnn_to_dense(new_with_itensor_mkldnn(std::move(gradb), |
| optTypeMetaToScalarType(weight.options().dtype_opt()), |
| weight.options().device_opt()))}; |
| } |
| |
| std::tuple<Tensor, Tensor, Tensor> mkldnn_linear_backward( |
| const Tensor& input, const Tensor& grad_output, |
| const Tensor& weight, std::array<bool,3> output_mask) { |
| Tensor grad_input, grad_weight, grad_bias; |
| if (output_mask[0]) { |
| grad_input = at::mkldnn_linear_backward_input(input.sizes(), grad_output, weight); |
| } |
| if (output_mask[1] || output_mask[2]) { |
| std::tie(grad_weight, grad_bias) = at::mkldnn_linear_backward_weights(grad_output, input, weight, output_mask[2]); |
| } |
| return std::tuple<Tensor, Tensor, Tensor>{grad_input, grad_weight, grad_bias}; |
| } |
| |
| Tensor mkldnn_linear_pointwise( |
| const Tensor& input_t, |
| const Tensor& weight_t, |
| const c10::optional<Tensor>& bias_opt, |
| c10::string_view attr, |
| torch::List<c10::optional<at::Scalar>> scalars, |
| c10::optional<c10::string_view> algorithm) { |
| auto input = input_t.contiguous(); |
| auto input_size = input.sizes(); |
| |
| const int64_t dim = input.dim(); |
| auto input_reshaped = |
| dim == 2 ? input : input.reshape({-1, input.size(input.dim() - 1)}); |
| |
| std::vector<int64_t> output_size(input_size.begin(), input_size.end() - 1); |
| output_size.push_back(weight_t.size(0)); |
| auto output = at::empty(output_size, input.options()); |
| |
| if (dim != 2) { |
| std::vector<int64_t> output_size_reshaped = {input_reshaped.size(0), |
| weight_t.size(0)}; |
| output = output.reshape(output_size_reshaped); |
| } |
| |
| c10::impl::ExcludeDispatchKeyGuard edkg(c10::autograd_dispatch_keyset); |
| ideep::tensor mkldnn_output = itensor_from_tensor(output); |
| |
| c10::MaybeOwned<Tensor> bias_maybe_owned = |
| at::borrow_from_optional_tensor(bias_opt); |
| const Tensor& bias = *bias_maybe_owned; |
| |
| const ideep::tensor mkldnn_input = itensor_view_from_dense(input_reshaped); |
| |
| c10::optional<ideep::tensor> mkldnn_bias{c10::nullopt}; |
| if (bias.defined()) { |
| mkldnn_bias = itensor_from_tensor(bias); |
| } |
| const ideep::tensor w = itensor_from_tensor(weight_t); |
| |
| ideep::attr_t op_attr = ideep::attr_t(); |
| if (attr != "none") { |
| auto it = fusion_unary_attr_map().find(attr); |
| TORCH_CHECK( |
| it != fusion_unary_attr_map().end(), "Fusion behavior undefined."); |
| op_attr = it->second(scalars, algorithm); |
| } |
| |
| if (mkldnn_bias.has_value()) { |
| ideep::inner_product_forward::compute</*reorder_src=*/false, /*reorder_weight=*/false>( |
| mkldnn_input, |
| w, |
| mkldnn_bias.value(), |
| mkldnn_output, |
| op_attr); |
| } else { |
| ideep::inner_product_forward::compute</*reorder_src=*/false, /*reorder_weight=*/false>( |
| mkldnn_input, |
| w, |
| mkldnn_output, |
| op_attr); |
| } |
| |
| if (dim != 2) { |
| output = output.reshape(output_size); |
| } |
| |
| return output; |
| } |
| |
| Tensor mkldnn_linear_pointwise_binary( |
| const Tensor& input_t, |
| const Tensor& other_t, |
| const Tensor& weight_t, |
| const c10::optional<Tensor>& bias_opt, |
| c10::string_view attr) { |
| c10::MaybeOwned<Tensor> bias_maybe_owned = |
| at::borrow_from_optional_tensor(bias_opt); |
| const Tensor& bias = *bias_maybe_owned; |
| // Make sure inputs have same type(device, layout, dtype), device is cpu and |
| // dtype is float or bfloat16. |
| check_mkldnn_binary_fusion_inputs(input_t, other_t, weight_t, bias); |
| |
| auto input = input_t.contiguous(); |
| |
| auto it_binary = fusion_binary_alg_map().find(attr); |
| TORCH_CHECK( |
| it_binary != fusion_binary_alg_map().end(), "Fusion behavior undefined."); |
| |
| auto input_size = input.sizes(); |
| |
| const int64_t dim = input.dim(); |
| auto input_reshaped = |
| dim == 2 ? input : input.reshape({-1, input.size(input.dim() - 1)}); |
| |
| std::vector<int64_t> output_size(input_size.begin(), input_size.end() - 1); |
| output_size.push_back(weight_t.size(0)); |
| auto output = at::empty(output_size, input.options()); |
| auto other_reshaped = other_t.contiguous(); |
| |
| if (dim != 2) { |
| std::vector<int64_t> output_size_reshaped = { |
| input_reshaped.size(0), weight_t.size(0)}; |
| output = output.reshape(output_size_reshaped); |
| other_reshaped = other_reshaped.reshape(output_size_reshaped); |
| } |
| |
| TORCH_CHECK( |
| output.sizes() == other_reshaped.sizes(), |
| "linear_binary_run expects the size of output and other tensor to be the same"); |
| |
| c10::impl::ExcludeDispatchKeyGuard edkg(c10::autograd_dispatch_keyset); |
| ideep::tensor mkldnn_output = itensor_from_tensor(output); |
| const ideep::tensor mkldnn_other = itensor_from_tensor(other_reshaped); |
| const ideep::tensor mkldnn_input = itensor_view_from_dense(input_reshaped); |
| |
| c10::optional<ideep::tensor> mkldnn_bias{c10::nullopt}; |
| if (bias.defined()) { |
| mkldnn_bias = itensor_from_tensor(bias); |
| } |
| const ideep::tensor w = itensor_from_tensor(weight_t); |
| |
| auto other_desc = mkldnn_other.get_desc(); |
| auto op_attr = ideep::attr_t::fuse_binary(it_binary->second, other_desc); |
| |
| if (mkldnn_bias.has_value()) { |
| ideep::inner_product_forward::compute_binary</*reorder_src=*/false, /*reorder_weight=*/false>( |
| mkldnn_input, |
| mkldnn_other, |
| w, |
| mkldnn_bias.value(), |
| mkldnn_output, |
| op_attr); |
| } else { |
| ideep::inner_product_forward::compute_binary</*reorder_src=*/false, /*reorder_weight=*/false>( |
| mkldnn_input, mkldnn_other, w, mkldnn_output, op_attr); |
| } |
| |
| if (dim != 2) { |
| output = output.reshape(output_size); |
| } |
| |
| return output; |
| } |
| |
| #if AT_MKL_ENABLED() |
| #include <mkl.h> |
| |
| Tensor mkl_linear( |
| const Tensor& self, |
| const Tensor& mkl_weight_t, |
| const Tensor& origin_weight_t, |
| const c10::optional<Tensor>& bias_opt, |
| const int64_t prepack_batch_size) { |
| c10::MaybeOwned<Tensor> bias_maybe_owned = |
| at::borrow_from_optional_tensor(bias_opt); |
| const Tensor& bias = *bias_maybe_owned; |
| TORCH_CHECK( |
| self.options().type_equal(origin_weight_t.options()), |
| "Input type (", |
| self.toString(), |
| ") and weight type (", |
| origin_weight_t.toString(), |
| ") should be the same"); |
| TORCH_CHECK( |
| !bias.defined() || (self.options().type_equal(bias.options())), |
| "Input type (", |
| self.toString(), |
| ") and bias type (", |
| bias.toString(), |
| ") should be the same"); |
| TORCH_CHECK( |
| mkl_weight_t.scalar_type() == origin_weight_t.scalar_type() && |
| origin_weight_t.scalar_type() == kFloat, |
| "mkl_linear: weight dtype should be float"); |
| |
| c10::impl::ExcludeDispatchKeyGuard edkg(c10::autograd_dispatch_keyset); |
| auto input_size = self.sizes(); |
| std::vector<int64_t> output_size(input_size.begin(), input_size.end() - 1); |
| output_size.push_back(origin_weight_t.size(0)); |
| auto output = at::empty(output_size, self.options()); |
| int64_t M = self.numel() / self.size(self.dim() - 1); |
| if (M == prepack_batch_size && mkl_weight_t.is_mkldnn()) { |
| auto self_ = self.is_contiguous() ? self : self.contiguous(); |
| auto K = origin_weight_t.size(1); |
| auto N = origin_weight_t.size(0); |
| const ideep::tensor& w = itensor_from_mkldnn(mkl_weight_t); |
| auto in_ptr = self_.data_ptr<float>(); |
| auto weight_ptr = (float*)(w.get_data_handle()); |
| auto out_ptr = output.data_ptr<float>(); |
| if (bias.defined()) { |
| auto bias_ = bias.is_contiguous() ? bias : bias.contiguous(); |
| auto bias_ptr = bias_.data_ptr<float>(); |
| at::parallel_for(0, M, 1, [&](int64_t begin, int64_t end) { |
| for (const auto d : c10::irange(begin, end)) { |
| memcpy(out_ptr + d * N, bias_ptr, sizeof(float) * N); |
| } |
| }); |
| } |
| cblas_sgemm_compute( |
| CblasRowMajor, |
| CblasNoTrans, |
| CblasPacked, |
| M, |
| N, |
| K, |
| in_ptr, |
| K, |
| weight_ptr, |
| K, |
| bias.defined() ? 1.f : 0.f, |
| out_ptr, |
| N); |
| } else { |
| output = at::linear_out(output, self, origin_weight_t, bias_opt); |
| } |
| return output; |
| } |
| |
| TORCH_LIBRARY_IMPL(mkl, CPU, m) { |
| m.impl(TORCH_SELECTIVE_NAME("mkl::_mkl_linear"), TORCH_FN(mkl_linear)); |
| } |
| |
| TORCH_LIBRARY_IMPL(mkl, MkldnnCPU, m) { |
| m.impl(TORCH_SELECTIVE_NAME("mkl::_mkl_linear"), TORCH_FN(mkl_linear)); |
| } |
| |
| #else // AT_MKL_ENABLED |
| |
| Tensor mkl_linear( |
| const Tensor& self, |
| const Tensor& mkl_weight_t, |
| const Tensor& origin_weight_t, |
| const c10::optional<Tensor>& bias_opt, |
| const int64_t prepack_batch_size) { |
| TORCH_CHECK(false, "mkl_linear: ATen not compiled with MKL support"); |
| } |
| |
| #endif// AT_MKL_ENABLED |
| |
| TORCH_LIBRARY_IMPL(mkldnn, CPU, m) { |
| m.impl( |
| TORCH_SELECTIVE_NAME("mkldnn::_linear_pointwise"), |
| TORCH_FN(mkldnn_linear_pointwise)); |
| m.impl( |
| TORCH_SELECTIVE_NAME("mkldnn::_linear_pointwise.binary"), |
| TORCH_FN(mkldnn_linear_pointwise_binary)); |
| } |
| |
| TORCH_LIBRARY_IMPL(mkldnn, MkldnnCPU, m) { |
| m.impl( |
| TORCH_SELECTIVE_NAME("mkldnn::_linear_pointwise"), |
| TORCH_FN(mkldnn_linear_pointwise)); |
| m.impl( |
| TORCH_SELECTIVE_NAME("mkldnn::_linear_pointwise.binary"), |
| TORCH_FN(mkldnn_linear_pointwise_binary)); |
| } |
| |
| } // namespace native |
| } // namespace at |
| |
| #endif // AT_MKLDNN_ENABLED |