[DDP] Rename the member divFactor_ as div_factor for naming consistency in reducer (#59523) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/59523 Should use snake case instead of camel case for the consistency. ghstack-source-id: 130759655 Test Plan: buck test mode/dev-nosan caffe2/test/distributed:distributed_nccl_fork -- test_ddp_grad_div_uneven_inputs Reviewed By: cbalioglu Differential Revision: D28922896 fbshipit-source-id: e04298284a78b2e71b562f790a878731962f873a

commit: 31d136c81f8e6921f30abcdcff8bddb3f5054671 [log] [tgz]
author: Yi Wang <wayi@fb.com> Tue Jun 08 10:02:11 2021 -0700
committer: Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com> Tue Jun 08 10:04:20 2021 -0700
tree: a90f46d8d9b1e08051d73a63ba0ae3f1705693dd
parent: b7ee1644561e55c0fec651a419a938d6d51d53ee [diff]
diff --git a/torch/lib/c10d/reducer.cpp b/torch/lib/c10d/reducer.cpp
index aa58b12..e1879b0 100644
--- a/torch/lib/c10d/reducer.cpp
+++ b/torch/lib/c10d/reducer.cpp

@@ -61,8 +61,7 @@
       num_buckets_ready_(0),
       has_rebuilt_bucket_(false),
       bucket_bytes_cap_(bucket_bytes_cap),
-      // Only used for handling unevent input.
-      divFactor_(kUnsetDivFactor),
+      div_factor_(kUnsetDivFactor),
       static_graph_(false),
       comm_hook_(nullptr),
       thread_local_state_(at::ThreadLocalState()),
@@ -426,8 +425,8 @@
 void Reducer::set_divide_factor() {
   // If it was scheduled, wait on allreduce in forward pass that tells us
   // division factor based on no. of currently participating processes.
-  if (divFactor_ == kUnsetDivFactor) {
-    divFactor_ = process_group_->getSize();
+  if (div_factor_ == kUnsetDivFactor) {
+    div_factor_ = process_group_->getSize();
     auto& workHandle = forwardPassWorkHandle_.workHandle;
     if (workHandle && !forwardPassWorkHandle_.useStaticWorldSize) {
       workHandle->wait();
@@ -435,7 +434,7 @@
       // Guard against the results being empty
       TORCH_INTERNAL_ASSERT(results.size() > 0);
       at::Tensor& res = results.front();
-      divFactor_ = res.item().to<int>();
+      div_factor_ = res.item().to<int>();
     }
   }
 }
@@ -814,8 +813,7 @@
       bucket.replicas[0].lengths,
       bucket.replicas[0].sizes_vec);
   if (comm_hook_ == nullptr) {
-    _AllReduceCommHookWithDivFactorState state(
-        process_group_.get(), divFactor_);
+    _AllReduceCommHookWithDivFactorState state(process_group_.get(), div_factor_);
     _AllReduceCommHookWithDivFactor allreduce_hook(state);
     bucket.future_work = allreduce_hook.runHook(grad_bucket);
   } else {
@@ -1371,7 +1369,7 @@
 
   // Unset allreduce division factor, as it may change in next backwards pass
   // when running with DDP join mode.
-  divFactor_ = kUnsetDivFactor;
+  div_factor_ = kUnsetDivFactor;
 
   // Wait for asynchronous reduction to complete and unflatten contents.
   for (auto& bucket : buckets_) {

diff --git a/torch/lib/c10d/reducer.hpp b/torch/lib/c10d/reducer.hpp
index 5fee0e0..72d925c 100644
--- a/torch/lib/c10d/reducer.hpp
+++ b/torch/lib/c10d/reducer.hpp

@@ -297,7 +297,8 @@
     size_t pending;
 
     // Keep future work handle around DDP comm hook.
-    // If no hook is registered, a temporary vanilla allreduce hook will be used.
+    // If no hook is registered, a temporary vanilla allreduce hook will be
+    // used.
     c10::intrusive_ptr<torch::jit::Future> future_work;
 
     // If this bucket should expect a single sparse gradient.
@@ -414,7 +415,9 @@
   ForwardPassAllreduceWork forwardPassWorkHandle_;
 
   // Division factor for reduction of gradients.
-  int divFactor_;
+  // Equal to the process group size, with an exception of handling uneven
+  // input.
+  int div_factor_;
 
   bool static_graph_;
commit	31d136c81f8e6921f30abcdcff8bddb3f5054671	[log] [tgz]
author	Yi Wang <wayi@fb.com>	Tue Jun 08 10:02:11 2021 -0700
committer	Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>	Tue Jun 08 10:04:20 2021 -0700
tree	a90f46d8d9b1e08051d73a63ba0ae3f1705693dd
parent	b7ee1644561e55c0fec651a419a938d6d51d53ee [diff]