explicitly provide memory format when calling to *_like operators
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/29390
Test Plan: Imported from OSS
Differential Revision: D18429722
Pulled By: VitalyFedyunin
fbshipit-source-id: e5f40da1550b4316e9c4725adbdf557c832b7563
diff --git a/aten/src/ATen/native/LossCTC.cpp b/aten/src/ATen/native/LossCTC.cpp
index fcb8889..3bc8570 100644
--- a/aten/src/ATen/native/LossCTC.cpp
+++ b/aten/src/ATen/native/LossCTC.cpp
@@ -175,7 +175,7 @@
int64_t max_input_length = log_probs.size(0);
int64_t batch_size = log_probs.size(1);
int64_t num_labels = log_probs.size(2);
- Tensor grad = at::full_like(log_probs, neginf); // at this point, this is log of empty sum
+ Tensor grad = at::full_like(log_probs, neginf, LEGACY_CONTIGUOUS_MEMORY_FORMAT); // at this point, this is log of empty sum
// The admin bits. We don't do much checking and assume that the forward did.
int64_t tg_target_stride;
diff --git a/aten/src/ATen/native/TensorShape.cpp b/aten/src/ATen/native/TensorShape.cpp
index 96f1ede..7f661b9 100644
--- a/aten/src/ATen/native/TensorShape.cpp
+++ b/aten/src/ATen/native/TensorShape.cpp
@@ -496,7 +496,7 @@
}
if (THTensor_compute_stride(self.sizes(), self.strides(), shape)) {
- // `THTensor_compute_stride` returns the proper strides to use if this
+ // `THTensor_compute_stride` returns the proper strides to use if this
// `reshape` can be just a view.
//
// NB: Even though we have viewable geometry and the target strides here,
@@ -794,7 +794,7 @@
auto row1 = indices.select(0, dim1);
// swap row0 and row1
- auto tmp = at::zeros_like(row0);
+ auto tmp = at::zeros_like(row0, LEGACY_CONTIGUOUS_MEMORY_FORMAT);
tmp.copy_(row0);
row0.copy_(row1);
row1.copy_(tmp);
diff --git a/aten/src/ATen/native/cuda/LossCTC.cu b/aten/src/ATen/native/cuda/LossCTC.cu
index 07f90bf..03e5d60 100644
--- a/aten/src/ATen/native/cuda/LossCTC.cu
+++ b/aten/src/ATen/native/cuda/LossCTC.cu
@@ -608,7 +608,7 @@
Tensor log_beta = at::empty_like(log_alpha, at::MemoryFormat::Contiguous);
log_beta.fill_(neginf);
- Tensor grad = at::full_like(log_probs, neginf); // initialization for log(sum (alpha beta))
+ Tensor grad = at::full_like(log_probs, neginf, LEGACY_CONTIGUOUS_MEMORY_FORMAT); // initialization for log(sum (alpha beta))
// As above, there may be better configurations to use.
constexpr int max_threads = std::is_same<scalar_t, float>::value ? 1024 : 896; // we need 72 or so 32 bit registers for double
diff --git a/aten/src/ATen/native/cuda/SoftMax.cu b/aten/src/ATen/native/cuda/SoftMax.cu
index 10969b2..88827e1 100644
--- a/aten/src/ATen/native/cuda/SoftMax.cu
+++ b/aten/src/ATen/native/cuda/SoftMax.cu
@@ -489,7 +489,7 @@
Tensor host_softmax(const Tensor & input_, const int64_t dim_, const bool half_to_float){
if (half_to_float) AT_ASSERTM(input_.scalar_type() == ScalarType::Half,"conversion is supported for Half type only");
auto input = input_.contiguous();
- Tensor output = half_to_float ? at::empty_like(input, input.options().dtype(ScalarType::Float)) : at::empty_like(input, at::MemoryFormat::Contiguous);
+ Tensor output = half_to_float ? at::empty_like(input, input.options().dtype(ScalarType::Float), LEGACY_CONTIGUOUS_MEMORY_FORMAT) : at::empty_like(input, LEGACY_CONTIGUOUS_MEMORY_FORMAT);
static_assert(std::is_same<acc_type<at::Half, true>, float>::value, "accscalar_t for half should be float");
if (input.dim() == 0) input = input.view(1);
int64_t dim = maybe_wrap_dim(dim_, input.dim());
@@ -571,7 +571,7 @@
template<template<typename, typename, typename> class Epilogue, bool is_log_softmax>
Tensor host_softmax_backward(const Tensor &grad_, const Tensor &output_, int64_t dim_, bool half_to_float){
int64_t dim = maybe_wrap_dim(dim_, grad_.dim());
- Tensor gI = half_to_float ? at::empty_like(grad_, grad_.options().dtype(ScalarType::Half)) : at::empty_like(grad_, at::MemoryFormat::Contiguous);
+ Tensor gI = half_to_float ? at::empty_like(grad_, grad_.options().dtype(ScalarType::Half), LEGACY_CONTIGUOUS_MEMORY_FORMAT) : at::empty_like(grad_, LEGACY_CONTIGUOUS_MEMORY_FORMAT);
if (grad_.numel() == 0) {
return gI;
}
diff --git a/test/cpp/jit/test_misc.cpp b/test/cpp/jit/test_misc.cpp
index 7a07d2a..d170a38 100644
--- a/test/cpp/jit/test_misc.cpp
+++ b/test/cpp/jit/test_misc.cpp
@@ -153,9 +153,12 @@
input, weight, kernel_size, bias, stride, padding);
// make grad_outputs
- at::Tensor grad_output = torch::randn_like(output);
- at::Tensor grad_finput = torch::zeros_like(finput);
- at::Tensor grad_fgradinput = torch::zeros_like(fgradinput);
+ at::Tensor grad_output =
+ torch::randn_like(output, at::MemoryFormat::Preserve);
+ at::Tensor grad_finput =
+ torch::zeros_like(finput, at::MemoryFormat::Preserve);
+ at::Tensor grad_fgradinput =
+ torch::zeros_like(fgradinput, at::MemoryFormat::Preserve);
// run backward eagerly
at::Tensor grad_input, grad_weight, grad_bias;
@@ -261,9 +264,12 @@
eps);
// make grad_outputs
- at::Tensor grad_output = torch::randn_like(output);
- at::Tensor grad_savemean = torch::zeros_like(savemean);
- at::Tensor grad_saveinvstd = torch::zeros_like(saveinvstd);
+ at::Tensor grad_output =
+ torch::randn_like(output, at::MemoryFormat::Preserve);
+ at::Tensor grad_savemean =
+ torch::zeros_like(savemean, at::MemoryFormat::Preserve);
+ at::Tensor grad_saveinvstd =
+ torch::zeros_like(saveinvstd, at::MemoryFormat::Preserve);
// run backward eagerly
at::Tensor grad_input, grad_weight, grad_bias;
@@ -741,14 +747,14 @@
auto t = torch::randn({1, 2, 3}, at::kCPU);
t.set_requires_grad(true);
auto t2 = invokeTestRecordFunction(t);
- t2.backward(torch::ones_like(t2));
+ t2.backward(torch::ones_like(t2, at::MemoryFormat::Preserve));
auto eager_inputs = traced_inputs;
traced_inputs.clear();
t = torch::randn({1, 2, 3}, at::kCPU);
t.set_requires_grad(true);
t2 = invokeTestRecordFunctionJIT(t);
- t2.backward(torch::ones_like(t2));
+ t2.backward(torch::ones_like(t2, at::MemoryFormat::Preserve));
auto jit_inputs = traced_inputs;
traced_inputs.clear();
@@ -864,7 +870,7 @@
auto t = torch::randn({1, 2, 3}, at::kCPU);
t.set_requires_grad(true);
auto t2 = t.pow(2);
- t2.backward(torch::ones_like(t2));
+ t2.backward(torch::ones_like(t2, at::MemoryFormat::Preserve));
}
autograd::profiler::popCallback();
diff --git a/torch/csrc/jit/symbolic_script.cpp b/torch/csrc/jit/symbolic_script.cpp
index 54b3b44..7a2b673 100644
--- a/torch/csrc/jit/symbolic_script.cpp
+++ b/torch/csrc/jit/symbolic_script.cpp
@@ -473,7 +473,7 @@
def backward(grad_output):
return None, None
- return torch.full_like(self, fill_value), backward
+ return torch.full_like(self, fill_value, memory_format=1), backward
def lerp_0(self,
end,
@@ -562,7 +562,7 @@
def index(self,
indices: List[Tensor]):
def backward(grad_output):
- grad_self = torch.zeros_like(self).index_put_(indices, grad_output, True)
+ grad_self = torch.zeros_like(self, memory_format=1).index_put_(indices, grad_output, True)
return grad_self, None
return torch.index(self, indices), backward
@@ -608,7 +608,7 @@
exponent: number):
def backward(grad_output):
if float(exponent) == 0.0:
- grad_self = torch.zeros_like(self)
+ grad_self = torch.zeros_like(self, memory_format=1)
else:
grad_self = grad_output * exponent * torch.pow(self, float(exponent) - 1)
return grad_self, None
@@ -621,7 +621,7 @@
exponent_size = torch._size_if_not_equal(exponent.size(), result.size())
def backward(grad_output):
- grad_self = torch.where(exponent == 0.0, torch.zeros_like(self), grad_output * exponent * torch.pow(self, exponent - 1))._grad_sum_to_size(self_size)
+ grad_self = torch.where(exponent == 0.0, torch.zeros_like(self, memory_format=1), grad_output * exponent * torch.pow(self, exponent - 1))._grad_sum_to_size(self_size)
grad_exponent = (grad_output * torch.pow(self, exponent) * torch.log(self))._grad_sum_to_size(exponent_size)
return grad_self, grad_exponent
@@ -864,7 +864,7 @@
def ceil(self):
def backward(grad_output):
- return torch.zeros_like(grad_output)
+ return torch.zeros_like(grad_output, memory_format=1)
return torch.ceil(self), backward
@@ -889,7 +889,7 @@
def floor(self):
def backward(grad_output):
- return torch.zeros_like(grad_output)
+ return torch.zeros_like(grad_output, memory_format=1)
return torch.floor(self), backward
@@ -938,7 +938,7 @@
def round(self):
def backward(grad_output):
- return torch.zeros_like(grad_output)
+ return torch.zeros_like(grad_output, memory_format=1)
return torch.round(self), backward
@@ -970,7 +970,7 @@
def trunc(self):
def backward(grad_output):
- return torch.zeros_like(grad_output)
+ return torch.zeros_like(grad_output, memory_format=1)
return torch.trunc(self), backward
@@ -1178,7 +1178,7 @@
else:
p1m = 1.
res = input
- mask = torch.empty_like(input)
+ mask = torch.empty_like(input, memory_format=1)
def backward(grad_output):
use_cuda = grad_output.is_cuda
@@ -1252,7 +1252,7 @@
grad_input = torch.adaptive_avg_pool3d_backward(grad, input)
else:
# NEVER REACH HERE
- grad_input = torch.zeros_like(input)
+ grad_input = torch.zeros_like(input, memory_format=1)
raise RuntimeError('Input Error: Only 3D, 4D and 5D input Tensors supported')
return grad_input