PyTorch ThroughputBenchmark: fix inaccuracy in number of iterations reporting (#22292) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/22292 as we do atomic fetch_add to validate if a thread should finish, we should not take the last iteration into account. As a result total number of iterations should be exactly the same as user sets via config.num_iters Now when running a unit test I see exact number of iterations reported Differential Revision: D16023963 fbshipit-source-id: 3b12ee17276628ecd7b0979f28cd6deb777a1543

commit: d0db2a76a0f0cbb7a4be2a761c39cef3d774af71 [log] [tgz]
author: Alexander Sidorov <salex@fb.com> Mon Jul 01 14:12:10 2019 -0700
committer: Facebook Github Bot <facebook-github-bot@users.noreply.github.com> Mon Jul 01 14:24:29 2019 -0700
tree: 6af2338f0f7cf0d9cce0de09142e5341610715b7
parent: 813b01e4a8f3c6b0dfb75c3476421c2797ca37ec [diff]
diff --git a/torch/csrc/utils/throughput_benchmark-inl.h b/torch/csrc/utils/throughput_benchmark-inl.h
index b07242b..e14c041 100644
--- a/torch/csrc/utils/throughput_benchmark-inl.h
+++ b/torch/csrc/utils/throughput_benchmark-inl.h

@@ -50,7 +50,7 @@
   int64_t initialized{0};
   int64_t finished{0};
   bool start{false};
-  std::atomic<int64_t> num_forwards{0};
+  std::atomic<int64_t> num_attempted_iters{0};
   std::vector<std::thread> callers;
 
   for (auto thread_id = 0; thread_id < config.num_calling_threads;
@@ -71,7 +71,7 @@
         }
       }
       LOG(INFO) << "Starting forward thread " << thread_id;
-      while (num_forwards.fetch_add(1) < config.num_iters) {
+      while (num_attempted_iters.fetch_add(1) < config.num_iters) {
         runOnce(std::move(thread_inputs[thread_id][input_iters[thread_id]]));
         ++input_iters[thread_id];
       }
@@ -115,9 +115,12 @@
                             end_time - start_time)
                             .count() /
       1000.0 / 1000.0;
+  // We use config.num_iters instead of num_attempted_iters as it is
+  // repsesatative of the real work done. Last attempted iteration on each
+  // calling threads doesn't represent the real work (i.e. running the model)
   stats.latency_avg_ms =
-      total_time_ms * config.num_calling_threads / num_forwards;
-  stats.num_iters = num_forwards;
+      total_time_ms * config.num_calling_threads / config.num_iters;
+  stats.num_iters = config.num_iters;
 
   for (auto& t : callers) {
     t.join();
commit	d0db2a76a0f0cbb7a4be2a761c39cef3d774af71	[log] [tgz]
author	Alexander Sidorov <salex@fb.com>	Mon Jul 01 14:12:10 2019 -0700
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>	Mon Jul 01 14:24:29 2019 -0700
tree	6af2338f0f7cf0d9cce0de09142e5341610715b7
parent	813b01e4a8f3c6b0dfb75c3476421c2797ca37ec [diff]