Fix clang-tidy warnings of aten/src/ATen/functorch (#122779)
This PR fixes some performance related clang-tidy warnings of aten/src/ATen/functorch.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/122779
Approved by: https://github.com/ezyang
diff --git a/aten/src/ATen/functorch/BatchRulesConvolution.cpp b/aten/src/ATen/functorch/BatchRulesConvolution.cpp
index ab2ab09..ca4eda1 100644
--- a/aten/src/ATen/functorch/BatchRulesConvolution.cpp
+++ b/aten/src/ATen/functorch/BatchRulesConvolution.cpp
@@ -29,7 +29,7 @@
// If we have a batched bias or weight, we need to perform the computation separately.
optional<Tensor> unbatched_bias;
- bool separate_bias;
+ bool separate_bias = false;
if ((rhs_bdim && bias && bias->defined()) || bias_bdim) {
TORCH_INTERNAL_ASSERT(bias.has_value());
TORCH_INTERNAL_ASSERT(bias->defined());
@@ -245,7 +245,7 @@
const Tensor& input, optional<int64_t> input_bdim,
const Tensor& weight, optional<int64_t> weight_bdim,
c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed,
- c10::SymIntArrayRef output_padding, c10::SymInt groups) {
+ c10::SymIntArrayRef output_padding, const c10::SymInt& groups) {
const std::array<bool, 3> mask = {true, false, false};
if (grad_output_bdim && weight_bdim) {
// regular: BNO, BOI -> N(BO), (BO)I -> N(BI)
@@ -326,7 +326,7 @@
const Tensor& input, optional<int64_t> input_bdim,
const Tensor& weight, optional<int64_t> weight_bdim,
c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed,
- c10::SymIntArrayRef output_padding, c10::SymInt groups) {
+ c10::SymIntArrayRef output_padding, const c10::SymInt& groups) {
const std::array<bool, 3> mask = {false, true, false};
if (grad_output_bdim && input_bdim) {
// BNO, BNI -> N(BO), N(BI) -> (BO)I (regular) (BI)O (transposed)
diff --git a/aten/src/ATen/functorch/BatchRulesHelper.cpp b/aten/src/ATen/functorch/BatchRulesHelper.cpp
index 088881a..edac0eb 100644
--- a/aten/src/ATen/functorch/BatchRulesHelper.cpp
+++ b/aten/src/ATen/functorch/BatchRulesHelper.cpp
@@ -118,11 +118,9 @@
// NOTE: 0 % 0 leads to FPE
TORCH_INTERNAL_ASSERT(shape[src] % size1 == 0);
}
- int64_t size2;
// split any size out of `0`-sized dim
- if (shape[src] == 0) {
- size2 = 0;
- } else {
+ int64_t size2 = 0;
+ if (shape[src] != 0) {
size2 = shape[src] / size1;
}
shape[src] = size1;
@@ -130,7 +128,7 @@
return at::reshape(x, shape);
}
-Tensor reshape_dim_outof_symint(int64_t src, c10::SymInt size1, const Tensor& x) {
+Tensor reshape_dim_outof_symint(int64_t src, const c10::SymInt& size1, const Tensor& x) {
src = maybe_wrap_dim(src, x.dim());
c10::SymDimVector shape(x.sym_sizes().begin(), x.sym_sizes().end());
if (shape[src] != 0) {
diff --git a/aten/src/ATen/functorch/BatchRulesHelper.h b/aten/src/ATen/functorch/BatchRulesHelper.h
index f36e714..eb347d6 100644
--- a/aten/src/ATen/functorch/BatchRulesHelper.h
+++ b/aten/src/ATen/functorch/BatchRulesHelper.h
@@ -28,7 +28,7 @@
TORCH_API Tensor reshape_dim_into(int64_t src, int64_t dst, const Tensor& x);
TORCH_API Tensor reshape_dim_outof(int64_t src, int64_t size1, const Tensor& x);
-TORCH_API Tensor reshape_dim_outof_symint(int64_t src, c10::SymInt size1, const Tensor& x);
+TORCH_API Tensor reshape_dim_outof_symint(int64_t src, const c10::SymInt& size1, const Tensor& x);
Tensor moveBatchDimToFront(const Tensor& tensor, optional<int64_t> maybe_batch_dim);
int64_t rankWithoutBatchDim(const Tensor& tensor, optional<int64_t> maybe_batch_dim);
diff --git a/aten/src/ATen/functorch/BatchRulesRandomness.cpp b/aten/src/ATen/functorch/BatchRulesRandomness.cpp
index 00d3e1d..79572f2 100644
--- a/aten/src/ATen/functorch/BatchRulesRandomness.cpp
+++ b/aten/src/ATen/functorch/BatchRulesRandomness.cpp
@@ -199,8 +199,8 @@
}
auto [output, mask] = at::native_dropout(tensor_value, p, train);
return std::make_tuple(
- makeBatched(std::move(output), 0, cur_level),
- makeBatched(std::move(mask), 0, cur_level));
+ makeBatched(output, 0, cur_level),
+ makeBatched(mask, 0, cur_level));
}
// repeated code from the CPU kernel since the CUDA one doesn't call bernoulli_ explicitly
@@ -264,7 +264,7 @@
template <typename F, F Func, typename... T>
Tensor rand_int_wrapper(SymIntArrayRef shape, c10::SymInt high, T... extra_args) {
- return Func(high, std::move(shape), std::forward<T>(extra_args)...);
+ return Func(high, shape, std::forward<T>(extra_args)...);
}
template <typename A, A a, typename C>
diff --git a/aten/src/ATen/functorch/BatchRulesViews.cpp b/aten/src/ATen/functorch/BatchRulesViews.cpp
index 7a66b94..8e386b7 100644
--- a/aten/src/ATen/functorch/BatchRulesViews.cpp
+++ b/aten/src/ATen/functorch/BatchRulesViews.cpp
@@ -5,7 +5,6 @@
// LICENSE file in the root directory of this source tree.
#include <ATen/functorch/BatchRulesHelper.h>
-#include <iostream>
#include <utility>
#include <ATen/Operators.h>
@@ -202,7 +201,7 @@
int64_t new_batch_idx = 0;
int64_t original_idx = 0;
- for (auto it : shape) {
+ for (const auto& it : shape) {
// Keep only dimensions != 1 and the batch dimension (irrespective of size).
if (it != 1 || original_idx == bdim) {
squeezed_sizes.push_back(it);
@@ -452,7 +451,7 @@
auto self_ = moveBatchDimToFront(self, self_bdim);
auto self_sizes = self_.sym_sizes();
- auto batch_size = self_sizes[0];
+ const auto& batch_size = self_sizes[0];
c10::SmallVector<c10::SymInt> size_(size.size() + 1);
size_[0] = batch_size;
diff --git a/aten/src/ATen/functorch/BatchedFallback.cpp b/aten/src/ATen/functorch/BatchedFallback.cpp
index 1c7c79d..9c287f0 100644
--- a/aten/src/ATen/functorch/BatchedFallback.cpp
+++ b/aten/src/ATen/functorch/BatchedFallback.cpp
@@ -450,13 +450,13 @@
TORCH_INTERNAL_ASSERT(!batched_tensor_inputs.empty());
std::vector<std::vector<Tensor>> unbound;
- for (auto iter = batched_tensor_inputs.begin(); iter != batched_tensor_inputs.end(); ++iter) {
- auto *batched_impl = maybeGetBatchedImpl(*iter);
+ for (auto const &batched_tensor_input: batched_tensor_inputs) {
+ auto *batched_impl = maybeGetBatchedImpl(batched_tensor_input);
TORCH_INTERNAL_ASSERT(batched_impl->value().is_nested() || batched_impl->bdim() == 0,
"Fallback not supported for mixed nested / non-nested arguments without bdim=0");
c10::impl::ExcludeDispatchKeyGuard guard(DispatchKey::BatchedNestedTensor);
auto this_unbound = batched_impl->value().unbind();
- if (unbound.size() > 0) {
+ if (!unbound.empty()) {
TORCH_INTERNAL_ASSERT(unbound.front().size() == this_unbound.size(),
"Fallback not supported for differently-sized nested arguments");
}
diff --git a/aten/src/ATen/functorch/DynamicLayer.cpp b/aten/src/ATen/functorch/DynamicLayer.cpp
index 453ebbb..45976fa 100644
--- a/aten/src/ATen/functorch/DynamicLayer.cpp
+++ b/aten/src/ATen/functorch/DynamicLayer.cpp
@@ -234,7 +234,7 @@
auto& dynamicLayerStack = dynamicLayerStackAccessor();
int64_t layerId = 1 + dynamicLayerStack.size();
TORCH_INTERNAL_ASSERT(layerId == dynamic_layer.layerId());
- dynamicLayerStack.emplace_back(dynamic_layer);
+ dynamicLayerStack.emplace_back(std::move(dynamic_layer));
if (layerId == 1) {
setDynamicLayerFrontBackKeysIncluded(true);
@@ -257,7 +257,7 @@
optional<bool> functionalize_add_back_views) {
const auto& dynamicLayerStack = dynamicLayerStackAccessor();
const auto layerId = 1 + dynamicLayerStack.size();
- DynamicLayer new_layer(transform_type, layerId, batch_size, randomness, prev_grad_mode, prev_fwd_grad_mode, functionalize_add_back_views);
+ DynamicLayer new_layer(transform_type, layerId, std::move(batch_size), randomness, prev_grad_mode, prev_fwd_grad_mode, functionalize_add_back_views);
// NB: this function should be called while holding the GIL to avoid races
new_layer.interpreter().set_is_alive(true);
pushDynamicLayer(std::move(new_layer));
@@ -306,7 +306,7 @@
}
void foreachTensorInplaceWithFlag(std::vector<IValue>& args, int64_t begin, int64_t end,
- const std::bitset<64> use_flag_relative, std::function<Tensor(const Tensor&, bool)> func){
+ const std::bitset<64> use_flag_relative, const std::function<Tensor(const Tensor&, bool)>& func){
TORCH_INTERNAL_ASSERT(begin >= 0);
TORCH_INTERNAL_ASSERT(end >= 0);
TORCH_INTERNAL_ASSERT(begin <= end);
diff --git a/aten/src/ATen/functorch/Interpreter.h b/aten/src/ATen/functorch/Interpreter.h
index 81190ff..33fe26b 100644
--- a/aten/src/ATen/functorch/Interpreter.h
+++ b/aten/src/ATen/functorch/Interpreter.h
@@ -195,7 +195,7 @@
// args[i] = func(args[i], i - begin, true)
// args[i] = func(args[i], i - begin)
void foreachTensorInplaceWithFlag(std::vector<IValue>& args, int64_t begin, int64_t end,
- const std::bitset<64> use_flag_relative, std::function<Tensor(const Tensor&, bool)> func);
+ const std::bitset<64> use_flag_relative, const std::function<Tensor(const Tensor&, bool)>& func);
std::vector<int64_t> findUnwrappedInputs(std::vector<IValue>& args, int64_t begin, int64_t end);
diff --git a/aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp b/aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp
index 59d7de8..bc26374 100644
--- a/aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp
+++ b/aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp
@@ -286,7 +286,7 @@
// can be indexed (or nullopt if such a location doesn't exist, e.g., tensors
// with zero-size dims).
static optional<c10::SymInt> maximum_indexable_location(
- c10::SymIntArrayRef sizes, c10::SymIntArrayRef strides, c10::SymInt storage_offset) {
+ c10::SymIntArrayRef sizes, c10::SymIntArrayRef strides, const c10::SymInt& storage_offset) {
auto result = native::storage_size_for(sizes, strides);
if (result == 0) {
return nullopt;
@@ -303,7 +303,7 @@
int64_t num_batch_dims,
c10::SymIntArrayRef sizes,
c10::SymIntArrayRef strides,
- optional<c10::SymInt> maybe_storage_offset) {
+ const optional<c10::SymInt>& maybe_storage_offset) {
auto slice_sizes = physical_tensor.sym_sizes().slice(num_batch_dims);
auto slice_strides = physical_tensor.sym_strides().slice(num_batch_dims);
auto base_offset = physical_tensor.sym_storage_offset();
@@ -696,8 +696,8 @@
TORCH_CHECK(tensors.size() > 0, "cat() not supported on empty tensor list");
std::vector<std::vector<Tensor>> unbound;
- for (auto tensor_iter = tensors.begin(); tensor_iter != tensors.end(); ++tensor_iter) {
- auto* maybe_batched_impl = maybeGetBatchedImpl(*tensor_iter);
+ for (const auto & tensor : tensors) {
+ auto* maybe_batched_impl = maybeGetBatchedImpl(tensor);
TORCH_CHECK(maybe_batched_impl, "Tried to run batching rule for cat() on a non-batched tensor");
auto nt = maybe_batched_impl->value();
TORCH_CHECK(nt.is_nested(), "Tried to run batching rule for cat() on a non-nested tensor");
diff --git a/aten/src/ATen/functorch/LegacyVmapTransforms.cpp b/aten/src/ATen/functorch/LegacyVmapTransforms.cpp
index 47b1441..2bd3d7c 100644
--- a/aten/src/ATen/functorch/LegacyVmapTransforms.cpp
+++ b/aten/src/ATen/functorch/LegacyVmapTransforms.cpp
@@ -135,7 +135,7 @@
TORCH_INTERNAL_ASSERT(bdim_size != -1);
std::bitset<kVmapNumLevels> levels;
- levels[cur_level] = 1;
+ levels[cur_level] = true;
VmapPhysicalViewVec result;
for (const auto& logical_tensor : logical_tensors) {
@@ -184,7 +184,7 @@
TORCH_INTERNAL_ASSERT(bdim_size != -1);
std::bitset<kVmapNumLevels> levels;
- levels[cur_level] = 1;
+ levels[cur_level] = true;
// figure out the example ndim
int64_t max_example_dim = -1;
diff --git a/aten/src/ATen/functorch/PyTorchOperatorHacks.cpp b/aten/src/ATen/functorch/PyTorchOperatorHacks.cpp
index c4cccba..355ac59 100644
--- a/aten/src/ATen/functorch/PyTorchOperatorHacks.cpp
+++ b/aten/src/ATen/functorch/PyTorchOperatorHacks.cpp
@@ -167,7 +167,7 @@
namespace {
template<bool inplace>
-using Ctype = typename std::conditional<inplace, Tensor&, Tensor>::type;
+using Ctype = std::conditional_t<inplace, Tensor&, Tensor>;
static Tensor make_feature_noise(const Tensor& input) {
auto input_sizes = input.sizes();
diff --git a/aten/src/ATen/functorch/TensorWrapper.cpp b/aten/src/ATen/functorch/TensorWrapper.cpp
index 5bb232d..a1ffbf3 100644
--- a/aten/src/ATen/functorch/TensorWrapper.cpp
+++ b/aten/src/ATen/functorch/TensorWrapper.cpp
@@ -50,7 +50,7 @@
void dumpTensorCout(const Tensor& tensor) {
dumpTensor(std::cout, tensor);
- std::cout << std::endl;
+ std::cout << '\n';
}
static c10::intrusive_ptr<TensorWrapper> makeTensorWrapperPtr(const Tensor& tensor, int64_t level, const std::shared_ptr<bool>& life_handle) {