irange-ify 3 (#62112)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/62112

Test Plan: Sandcastle

Reviewed By: malfet

Differential Revision: D29879513

fbshipit-source-id: c01d18d34bb19014bf28d92c4d04b07e50a2770a
diff --git a/torch/csrc/jit/codegen/cuda/codegen.cpp b/torch/csrc/jit/codegen/cuda/codegen.cpp
index d4fc406..d68bade 100644
--- a/torch/csrc/jit/codegen/cuda/codegen.cpp
+++ b/torch/csrc/jit/codegen/cuda/codegen.cpp
@@ -1,3 +1,4 @@
+#include <c10/util/irange.h>
 #include <torch/csrc/jit/codegen/cuda/codegen.h>
 #include <torch/csrc/jit/codegen/cuda/instrumentation.h>
 #include <torch/csrc/jit/codegen/cuda/ir_iostream.h>
@@ -157,7 +158,8 @@
   }
 
   std::ostream& indent() {
-    for (int i = 0; i < block_nest_level_; ++i) {
+    for (const auto i : c10::irange(block_nest_level_)) {
+      (void)i; // Suppress unused variable warning
       code_ << kTab;
     }
     return code_;
diff --git a/torch/csrc/jit/codegen/cuda/executor_utils.cpp b/torch/csrc/jit/codegen/cuda/executor_utils.cpp
index 25a50ad..db69c28 100644
--- a/torch/csrc/jit/codegen/cuda/executor_utils.cpp
+++ b/torch/csrc/jit/codegen/cuda/executor_utils.cpp
@@ -183,7 +183,7 @@
 
   std::stringstream msg;
   bool mismatch = false;
-  for (size_t i = 0; i < inputs.size(); ++i) {
+  for (const auto i : c10::irange(inputs.size())) {
     const IValue& arg = inputs[i];
     const Val* param = fusion->inputs()[i];
     mismatch = !validateKernelArg(arg, param, device, msg) || mismatch;
@@ -208,7 +208,7 @@
 
   std::stringstream msg;
   bool mismatch = false;
-  for (size_t i = 0; i < outputs.size(); ++i) {
+  for (const auto i : c10::irange(outputs.size())) {
     const at::Tensor& arg = outputs[i];
     const Val* param = fusion->outputs()[i];
     mismatch = !validateKernelArg(arg, param, device, msg) || mismatch;
diff --git a/torch/csrc/jit/codegen/cuda/graph_fuser.cpp b/torch/csrc/jit/codegen/cuda/graph_fuser.cpp
index aa2105a..ebe3ef0 100644
--- a/torch/csrc/jit/codegen/cuda/graph_fuser.cpp
+++ b/torch/csrc/jit/codegen/cuda/graph_fuser.cpp
@@ -99,7 +99,7 @@
     std::unordered_map<Value*, Value*> inner_to_outer;
     auto inner_inputs = producer_subgraph->inputs();
     auto outer_inputs = producer_group->inputs();
-    for (size_t i = 0; i < inner_inputs.size(); ++i) {
+    for (const auto i : c10::irange(inner_inputs.size())) {
       inner_to_outer[inner_inputs[i]] = outer_inputs[i];
     }
 
@@ -111,13 +111,14 @@
       temporary_nodes.emplace_back(outer);
       auto inner_outputs = inner->outputs();
       auto outer_outputs = outer->outputs();
-      for (size_t i = 0; i < inner_outputs.size(); ++i)
+      for (const auto i : c10::irange(inner_outputs.size())) {
         inner_to_outer[inner_outputs[i]] = outer_outputs[i];
+      }
     }
 
     // Replace uses of producer_group outputs and destroy the producer
     auto subgraph_outputs = producer_subgraph->outputs();
-    for (size_t i = 0; i < subgraph_outputs.size(); ++i) {
+    for (const auto i : c10::irange(subgraph_outputs.size())) {
       auto outer_output = inner_to_outer.at(subgraph_outputs[i]);
       producer_group->outputs()[i]->replaceAllUsesWith(outer_output);
     }
@@ -133,7 +134,7 @@
       Node* merged = mergeNodeIntoGroup(consumer_group, node);
       // If any of the outputs are still used then we need to add them
       auto outputs = node->outputs();
-      for (size_t i = 0; i < outputs.size(); ++i) {
+      for (const auto i : c10::irange(outputs.size())) {
         auto output = outputs[i];
         if (output->uses().size() == 0)
           continue;
@@ -388,7 +389,7 @@
     Node* bchunk =
         chunk->owningGraph()->create(prim::BroadcastingChunk, nchunks);
     bchunk->addInput(chunk->input());
-    for (size_t i = 0; i < nchunks; ++i) {
+    for (const auto i : c10::irange(nchunks)) {
       auto* old_output = chunk->outputs().at(i);
       auto* new_output = bchunk->outputs().at(i);
       new_output->copyMetadata(old_output);
@@ -532,10 +533,10 @@
       auto it = std::find(bchunk_inputs.begin(), bchunk_inputs.end(), input);
       if (it != bchunk_inputs.end()) {
         chunked_inputs.emplace_back();
-        auto input_index = std::distance(bchunk_inputs.begin(), it);
-        for (size_t chunk = 0; chunk < nchunks; ++chunk) {
+        const auto input_index = std::distance(bchunk_inputs.begin(), it);
+        for (const auto chunki : c10::irange(nchunks)) {
           chunked_inputs.back().push_back(
-              bchunk->outputs().at(nchunks * input_index + chunk));
+              bchunk->outputs().at(nchunks * input_index + chunki));
         }
         continue;
       }
@@ -659,8 +660,7 @@
         Node* new_chunk =
             graph->insertNode(graph->create(prim::ConstantChunk, input, 0));
         new_chunk->copyAttributes(*bchunk);
-        for (size_t output_offset = 0; output_offset < nchunks;
-             output_offset++) {
+        for (const auto output_offset : c10::irange(nchunks)) {
           auto new_output = new_chunk->addOutput();
           auto old_output =
               bchunk->outputs().at(input_offset * nchunks + output_offset);
@@ -692,7 +692,7 @@
     auto inputs = fusion_group->inputs();
     auto sinputs = subgraph->inputs();
     AT_ASSERT(inputs.size() == sinputs.size());
-    for (size_t i = 0; i < inputs.size(); ++i) {
+    for (const auto i : c10::irange(inputs.size())) {
       if (inputs[i]->type()->isSubtypeOf(TensorType::get())) {
         shape_of[sinputs[i]] = graph->insert(aten::size, {inputs[i]});
       }
@@ -705,7 +705,7 @@
     auto outputs = fusion_group->outputs();
     auto soutputs = subgraph->outputs();
     AT_ASSERT(outputs.size() == soutputs.size());
-    for (size_t i = 0; i < outputs.size(); ++i) {
+    for (const auto i : c10::irange(outputs.size())) {
       if (usedOnlyInSize(outputs[i]))
         continue;
       shape_of[soutputs[i]] = graph->insert(aten::size, {outputs[i]});
diff --git a/torch/csrc/jit/codegen/cuda/kernel_ir_printer.cpp b/torch/csrc/jit/codegen/cuda/kernel_ir_printer.cpp
index 4a20543..1b474e5 100644
--- a/torch/csrc/jit/codegen/cuda/kernel_ir_printer.cpp
+++ b/torch/csrc/jit/codegen/cuda/kernel_ir_printer.cpp
@@ -1,3 +1,4 @@
+#include <c10/util/irange.h>
 #include <torch/csrc/jit/codegen/cuda/kernel_ir_printer.h>
 
 #include <torch/csrc/jit/codegen/cuda/instrumentation.h>
@@ -49,7 +50,8 @@
 }
 
 std::ostream& IrPrinter::indent() {
-  for (int i = 0; i < indent_level_; ++i) {
+  for (const auto i : c10::irange(indent_level_)) {
+    (void)i; // Suppress unused variable warning
     os_ << kTab;
   }
   return os_;
diff --git a/torch/csrc/jit/codegen/cuda/lower_index.cpp b/torch/csrc/jit/codegen/cuda/lower_index.cpp
index 8205abb..546ced2 100644
--- a/torch/csrc/jit/codegen/cuda/lower_index.cpp
+++ b/torch/csrc/jit/codegen/cuda/lower_index.cpp
@@ -1,3 +1,4 @@
+#include <c10/util/irange.h>
 #include <torch/csrc/jit/codegen/cuda/arith.h>
 #include <torch/csrc/jit/codegen/cuda/index_compute.h>
 #include <torch/csrc/jit/codegen/cuda/ir_iostream.h>
@@ -211,7 +212,7 @@
 
     Val* buffer_size =
         buffer_ids.empty() ? new Int(1) : buffer_ids[0]->rawExtent();
-    for (size_t i = 1; i < buffer_ids.size(); i++) {
+    for (const auto i : c10::irange(1, buffer_ids.size())) {
       buffer_size = mul(buffer_size, buffer_ids[i]->rawExtent());
     }
 
@@ -226,7 +227,7 @@
         sync_ids.end());
 
     Val* sync_size = sync_ids.empty() ? new Int(1) : sync_ids[0]->rawExtent();
-    for (size_t i = 1; i < sync_ids.size(); i++) {
+    for (const auto i : c10::irange(1, sync_ids.size())) {
       sync_size = mul(sync_size, sync_ids[i]->rawExtent());
     }
 
diff --git a/torch/csrc/jit/codegen/cuda/lower_loops.cpp b/torch/csrc/jit/codegen/cuda/lower_loops.cpp
index 814399f..4e90d54 100644
--- a/torch/csrc/jit/codegen/cuda/lower_loops.cpp
+++ b/torch/csrc/jit/codegen/cuda/lower_loops.cpp
@@ -1,3 +1,4 @@
+#include <c10/util/irange.h>
 #include <torch/csrc/jit/codegen/cuda/lower_loops.h>
 
 #include <torch/csrc/jit/codegen/cuda/arith.h>
@@ -66,7 +67,7 @@
     size = ir_builder_.create<kir::Int>(1);
   } else {
     size = GpuLower::lowerValue(alloc_dims[0]);
-    for (size_t i = 1; i < alloc_dims.size(); i++) {
+    for (const auto i : c10::irange(1, alloc_dims.size())) {
       size = ir_builder_.mulExpr(size, GpuLower::lowerValue(alloc_dims[i]));
     }
   }
diff --git a/torch/csrc/jit/codegen/cuda/lower_utils.cpp b/torch/csrc/jit/codegen/cuda/lower_utils.cpp
index 109b66b..b7892f3 100644
--- a/torch/csrc/jit/codegen/cuda/lower_utils.cpp
+++ b/torch/csrc/jit/codegen/cuda/lower_utils.cpp
@@ -210,7 +210,7 @@
       : replacement_map_(std::move(replacement_map)) {}
 
   void handleScope(kir::Scope& scope) {
-    for (size_t i = 0; i < scope.size(); ++i) {
+    for (const auto i : c10::irange(scope.size())) {
       const auto it = replacement_map_.find(scope[i]);
       if (it == replacement_map_.end()) {
         handle(scope[i]);