[Distributed] [1/N] Fix clang-tidy warnings in torch/csrc/distributed/c10d (#122884)
This PR fixes some clang-tidy warnings in distributed code.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/122884
Approved by: https://github.com/kwen2501
diff --git a/torch/csrc/distributed/c10d/FileStore.cpp b/torch/csrc/distributed/c10d/FileStore.cpp
index a47deca..98b9029 100644
--- a/torch/csrc/distributed/c10d/FileStore.cpp
+++ b/torch/csrc/distributed/c10d/FileStore.cpp
@@ -17,11 +17,6 @@
#include <chrono>
#include <cstdio>
-#include <functional>
-#include <iostream>
-#include <limits>
-#include <sstream>
-#include <system_error>
#include <thread>
#include <utility>
@@ -260,7 +255,7 @@
File& file,
off_t pos,
std::unordered_map<std::string, std::vector<uint8_t>>& cache,
- const std::string deletePrefix) {
+ const std::string& deletePrefix) {
auto size = file.size();
if (size != pos) {
std::string tmpKey;
@@ -429,7 +424,7 @@
File file(path_, O_RDONLY, timeout_);
auto lock = file.lockShared();
pos_ = refresh(file, pos_, cache_, deletePrefix_);
- return cache_.size();
+ return static_cast<int64_t>(cache_.size());
}
bool FileStore::deleteKey(const std::string& key) {
diff --git a/torch/csrc/distributed/c10d/PrefixStore.cpp b/torch/csrc/distributed/c10d/PrefixStore.cpp
index 54f6ac9..bd69c9a 100644
--- a/torch/csrc/distributed/c10d/PrefixStore.cpp
+++ b/torch/csrc/distributed/c10d/PrefixStore.cpp
@@ -83,6 +83,7 @@
std::vector<std::vector<uint8_t>> PrefixStore::multiGet(
const std::vector<std::string>& keys) {
std::vector<std::string> prefixed_keys;
+ prefixed_keys.reserve(keys.size());
for (auto& key : keys) {
prefixed_keys.push_back(joinKey(key));
}
@@ -93,6 +94,7 @@
const std::vector<std::string>& keys,
const std::vector<std::vector<uint8_t>>& values) {
std::vector<std::string> prefixed_keys;
+ prefixed_keys.reserve(keys.size());
for (auto& key : keys) {
prefixed_keys.push_back(joinKey(key));
}
diff --git a/torch/csrc/distributed/c10d/ProcessGroup.cpp b/torch/csrc/distributed/c10d/ProcessGroup.cpp
index 248a2c9..ec80ed9 100644
--- a/torch/csrc/distributed/c10d/ProcessGroup.cpp
+++ b/torch/csrc/distributed/c10d/ProcessGroup.cpp
@@ -3,6 +3,7 @@
#include <c10/util/Logging.h>
#include <fmt/format.h>
+#include <string_view>
#include <torch/csrc/distributed/c10d/PrefixStore.hpp>
#include <torch/csrc/distributed/c10d/ProcessGroupGloo.hpp>
@@ -13,7 +14,7 @@
namespace c10d {
-static ProcessGroup::BackendType strToBackendType(std::string backend) {
+static ProcessGroup::BackendType strToBackendType(std::string_view backend) {
if (backend == "undefined") {
return ProcessGroup::BackendType::UNDEFINED;
} else if (backend == "gloo") {
@@ -111,7 +112,7 @@
}
// Get the backend type associated with the device
- ProcessGroup::BackendType backendType;
+ ProcessGroup::BackendType backendType{ProcessGroup::BackendType::UNDEFINED};
try {
backendType = deviceTypeToBackendType_.at(deviceType);
} catch (const std::out_of_range& e) {
@@ -142,8 +143,8 @@
: store_(store),
rank_(rank),
size_(size),
- options_(options),
- backendType_(strToBackendType(options->backend)),
+ options_(std::move(options)),
+ backendType_(strToBackendType(options_->backend)),
dist_debug_level_(debug_level()) {
C10_LOG_API_USAGE_ONCE("c10d.process_group");
}
@@ -159,7 +160,7 @@
}
const std::string& ProcessGroup::getGroupName() const {
- TORCH_CHECK(deviceTypeToBackend_.size(), "ProcessGroup name not set");
+ TORCH_CHECK(!deviceTypeToBackend_.empty(), "ProcessGroup name not set");
return deviceTypeToBackend_.begin()->second->getGroupName();
}
diff --git a/torch/csrc/distributed/c10d/ProcessGroup.hpp b/torch/csrc/distributed/c10d/ProcessGroup.hpp
index a21b98c..5aba2c4 100644
--- a/torch/csrc/distributed/c10d/ProcessGroup.hpp
+++ b/torch/csrc/distributed/c10d/ProcessGroup.hpp
@@ -727,7 +727,7 @@
// Debug level setting. It is parsed once when ProcessGroup is constructed and
// remains the same across use of this process group.
- DebugLevel dist_debug_level_;
+ DebugLevel dist_debug_level_{DebugLevel::Off};
// Backend classes for this ProcessGroup
std::unordered_set<c10::DeviceType> deviceTypes_;
diff --git a/torch/csrc/distributed/c10d/ProcessGroupGloo.cpp b/torch/csrc/distributed/c10d/ProcessGroupGloo.cpp
index d3c9a61..50e1b95 100644
--- a/torch/csrc/distributed/c10d/ProcessGroupGloo.cpp
+++ b/torch/csrc/distributed/c10d/ProcessGroupGloo.cpp
@@ -7,8 +7,6 @@
#include <torch/csrc/distributed/c10d/PrefixStore.hpp>
#include <chrono>
#include <exception>
-#include <ratio>
-#include <tuple>
#ifdef _WIN32
#include <gloo/common/win.h>
diff --git a/torch/csrc/distributed/c10d/ProcessGroupWrapper.cpp b/torch/csrc/distributed/c10d/ProcessGroupWrapper.cpp
index 45f5135..a6086d2 100644
--- a/torch/csrc/distributed/c10d/ProcessGroupWrapper.cpp
+++ b/torch/csrc/distributed/c10d/ProcessGroupWrapper.cpp
@@ -89,15 +89,15 @@
// Takes a serialized fingerprint from
// CollectiveFingerPrint::serialize_fingerprint and deserializes it back to a
// CollectiveFingerPrint struct
- CollectiveFingerPrint deserialize_fingerprint(at::Tensor serialized_tensor) {
- OpType optype;
+ CollectiveFingerPrint deserialize_fingerprint(
+ const at::Tensor& serialized_tensor) {
auto dtypes = std::vector<int8_t>();
auto device_types = std::vector<int8_t>();
auto sizes = std::vector<std::vector<int64_t>>();
int index = 0;
- int seq = 0;
+ int64_t seq = 0;
// 1. OpType
- optype = OpType(serialized_tensor[index].item<int>());
+ auto optype = OpType(serialized_tensor[index].item<int>());
index++;
int num_tensors = 0;
if (index < serialized_tensor.size(0)) {
@@ -383,7 +383,7 @@
} // namespace
ProcessGroupWrapper::ProcessGroupWrapper(
- c10::intrusive_ptr<Backend> backend,
+ const c10::intrusive_ptr<Backend>& backend,
c10::intrusive_ptr<Backend> glooBackend)
: Backend(backend->getRank(), backend->getSize()),
backend_(backend),
diff --git a/torch/csrc/distributed/c10d/ProcessGroupWrapper.hpp b/torch/csrc/distributed/c10d/ProcessGroupWrapper.hpp
index 13503ca..50c0bfc 100644
--- a/torch/csrc/distributed/c10d/ProcessGroupWrapper.hpp
+++ b/torch/csrc/distributed/c10d/ProcessGroupWrapper.hpp
@@ -11,7 +11,7 @@
class TORCH_API ProcessGroupWrapper : public Backend {
public:
explicit ProcessGroupWrapper(
- c10::intrusive_ptr<Backend> backend,
+ const c10::intrusive_ptr<Backend>& backend,
c10::intrusive_ptr<Backend> glooBackend);
const std::string getBackendName() const override;
diff --git a/torch/csrc/distributed/c10d/intra_node_comm.cpp b/torch/csrc/distributed/c10d/intra_node_comm.cpp
index c4120e3..2579b38 100644
--- a/torch/csrc/distributed/c10d/intra_node_comm.cpp
+++ b/torch/csrc/distributed/c10d/intra_node_comm.cpp
@@ -6,7 +6,7 @@
#include <torch/csrc/distributed/c10d/Utils.hpp>
#include <iostream>
-#include <random>
+#include <utility>
#include <fcntl.h>
#include <pthread.h>
@@ -22,8 +22,7 @@
#include <cuda_runtime.h>
-namespace c10d {
-namespace intra_node_comm {
+namespace c10d::intra_node_comm {
static std::vector<std::string> ENABLE_INTRA_NODE_COMM = {
"ENABLE_INTRA_NODE_COMM"};
@@ -57,7 +56,7 @@
for (size_t j = 0; j < kMaxDevices; ++j) {
oss << nvlMesh[i][j] << " ";
}
- oss << std::endl;
+ oss << '\n';
}
os << oss.str();
return os;
@@ -77,7 +76,7 @@
/**
* Query the nvlink connection among devices.
*/
-static NvlMesh getNvlMesh(std::vector<std::string> rankToBusId) {
+static NvlMesh getNvlMesh(const std::vector<std::string>& rankToBusId) {
#if !defined(USE_ROCM) && defined(PYTORCH_C10_DRIVER_API_SUPPORTED)
using namespace c10::cuda;
@@ -88,12 +87,12 @@
}
const auto worldSize = rankToBusId.size();
- std::vector<nvmlDevice_t> devices(worldSize, 0);
+ std::vector<nvmlDevice_t> devices(worldSize, nullptr);
std::unordered_map<std::string, size_t> busIdToRank;
std::vector<size_t> switchLinkCount(worldSize, 0);
for (size_t r = 0; r < worldSize; ++r) {
- busIdToRank.emplace(std::make_pair(rankToBusId[r], r));
+ busIdToRank.emplace(rankToBusId[r], r);
TORCH_CHECK(
driverApi->nvmlDeviceGetHandleByPciBusId_v2_(
rankToBusId[r].c_str(), &devices[r]) == NVML_SUCCESS);
@@ -209,7 +208,7 @@
size_t rank,
size_t worldSize,
c10::optional<size_t> bufferSize)
- : store_(store),
+ : store_(std::move(store)),
rank_(rank),
worldSize_(worldSize),
bufferSize_(bufferSize.has_value() ? *bufferSize : kDefaultBufferSize) {
@@ -248,12 +247,12 @@
*/
template <typename T>
std::vector<T> storeAllGather(
- c10::intrusive_ptr<c10d::Store> store,
+ const c10::intrusive_ptr<c10d::Store>& store,
const std::string& prefix,
size_t rank,
size_t worldSize,
T val) {
- static_assert(std::is_trivially_copyable<T>::value);
+ static_assert(std::is_trivially_copyable_v<T>);
std::vector<std::string> peerKeys;
for (size_t r = 0; r < worldSize; ++r) {
@@ -278,7 +277,7 @@
store->wait({peerKeys[r]});
auto payload = store->get(peerKeys[r]);
TORCH_CHECK(payload.size() == sizeof(T));
- T peerVal;
+ T peerVal{};
std::memcpy(&peerVal, payload.data(), sizeof(T));
peerVals.push_back(peerVal);
}
@@ -429,5 +428,4 @@
return false;
}
-} // namespace intra_node_comm
-} // namespace c10d
+} // namespace c10d::intra_node_comm
diff --git a/torch/csrc/distributed/c10d/intra_node_comm.hpp b/torch/csrc/distributed/c10d/intra_node_comm.hpp
index 7addb49..5c5bcdc 100644
--- a/torch/csrc/distributed/c10d/intra_node_comm.hpp
+++ b/torch/csrc/distributed/c10d/intra_node_comm.hpp
@@ -6,8 +6,7 @@
#include <torch/csrc/distributed/c10d/Store.hpp>
#include <torch/csrc/distributed/c10d/Work.hpp>
-namespace c10d {
-namespace intra_node_comm {
+namespace c10d::intra_node_comm {
constexpr size_t kMaxDevices = 8;
constexpr size_t kDefaultBufferSize = 10 * 1024 * 1024;
@@ -27,7 +26,7 @@
size_t worldSize,
c10::optional<size_t> bufferSize = c10::nullopt);
- ~IntraNodeComm();
+ ~IntraNodeComm() override;
static bool isEnabled();
@@ -69,11 +68,11 @@
*/
bool isInitialized_ = false;
Topology topology_ = Topology::UNKNOWN;
- std::array<void*, kMaxDevices> p2pStates_;
- std::array<void*, kMaxDevices> buffers_;
- void* p2pStatesDev_;
- void* buffersDev_;
- void* topoInfo_;
+ std::array<void*, kMaxDevices> p2pStates_{};
+ std::array<void*, kMaxDevices> buffers_{};
+ void* p2pStatesDev_{};
+ void* buffersDev_{};
+ void* topoInfo_{};
};
/**
@@ -115,5 +114,4 @@
TORCH_API int64_t getIntraNodeCommUsageCounter();
-} // namespace intra_node_comm
-} // namespace c10d
+} // namespace c10d::intra_node_comm
diff --git a/torch/csrc/distributed/c10d/socket.cpp b/torch/csrc/distributed/c10d/socket.cpp
index 5013f25..093a47a 100644
--- a/torch/csrc/distributed/c10d/socket.cpp
+++ b/torch/csrc/distributed/c10d/socket.cpp
@@ -8,7 +8,6 @@
#include <cstring>
#include <system_error>
-#include <thread>
#include <utility>
#include <vector>
@@ -38,8 +37,7 @@
#include <c10/util/CallOnce.h>
-namespace c10d {
-namespace detail {
+namespace c10d::detail {
namespace {
#ifdef _WIN32
@@ -183,8 +181,7 @@
Handle hnd_;
};
-} // namespace detail
-} // namespace c10d
+} // namespace c10d::detail
//
// libfmt formatters for `addrinfo` and `Socket`
@@ -251,8 +248,7 @@
} // namespace fmt
-namespace c10d {
-namespace detail {
+namespace c10d::detail {
SocketImpl::~SocketImpl() {
#ifdef _WIN32
@@ -458,6 +454,7 @@
bool tryListen(const ::addrinfo& addr);
template <typename... Args>
+ // NOLINTNEXTLINE(cppcoreguidelines-missing-std-forward)
void recordError(fmt::string_view format, Args&&... args) {
auto msg = fmt::vformat(format, fmt::make_format_args(args...));
@@ -614,7 +611,9 @@
std::unique_ptr<SocketImpl> run() const;
private:
+ // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
const int fd_;
+ // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
const std::uint16_t expected_port_;
};
@@ -624,7 +623,7 @@
std::unique_ptr<SocketImpl> SocketListenFromFdOp::run() const {
C10D_DEBUG("listenFromFd: fd {}, expected port {}", fd_, expected_port_);
- ::sockaddr_storage addr_storage;
+ ::sockaddr_storage addr_storage{};
::socklen_t addr_len = sizeof(addr_storage);
if (::getsockname(
fd_, reinterpret_cast<::sockaddr*>(&addr_storage), &addr_len) < 0) {
@@ -691,6 +690,7 @@
[[noreturn]] void throwTimeoutError() const;
template <typename... Args>
+ // NOLINTNEXTLINE(cppcoreguidelines-missing-std-forward)
void recordError(fmt::string_view format, Args&&... args) {
auto msg = fmt::vformat(format, fmt::make_format_args(args...));
@@ -1033,6 +1033,4 @@
return impl_->waitForInput(timeout);
}
-} // namespace detail
-
-} // namespace c10d
+} // namespace c10d::detail