Add -tile-sizes command line option for loop tiling; clean up cl options for
for dma-generate, loop-unroll.

- add -tile-sizes command line option for loop tiling to specify different tile
  sizes for loops in a band

- clean up command line options for loop-unroll, dma-generate (remove
  cl::hidden)

PiperOrigin-RevId: 234006232
diff --git a/lib/Transforms/DmaGeneration.cpp b/lib/Transforms/DmaGeneration.cpp
index 29cc435..06aa375 100644
--- a/lib/Transforms/DmaGeneration.cpp
+++ b/lib/Transforms/DmaGeneration.cpp
@@ -42,14 +42,19 @@
 
 static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
 
-static llvm::cl::opt<unsigned> clFastMemorySpace(
-    "dma-fast-mem-space", llvm::cl::Hidden,
-    llvm::cl::desc("Set fast memory space id for DMA generation"),
+static llvm::cl::opt<unsigned> clFastMemoryCapacity(
+    "dma-fast-mem-capacity",
+    llvm::cl::init(std::numeric_limits<uint64_t>::max() / 1024),
+    llvm::cl::desc(
+        "Set fast memory space capacity in KiB (default: unlimited)"),
     llvm::cl::cat(clOptionsCategory));
 
-static llvm::cl::opt<unsigned> clFastMemoryCapacity(
-    "dma-fast-mem-capacity", llvm::cl::Hidden,
-    llvm::cl::desc("Set fast memory space capacity in KiB"),
+static const unsigned kDefaultFastMemorySpace = 1;
+
+static llvm::cl::opt<unsigned> clFastMemorySpace(
+    "dma-fast-mem-space", llvm::cl::init(kDefaultFastMemorySpace),
+    llvm::cl::desc(
+        "Fast memory space identifier for DMA generation (default: 1)"),
     llvm::cl::cat(clOptionsCategory));
 
 namespace {
@@ -60,10 +65,11 @@
 // TODO(bondhugula): We currently can't generate DMAs correctly when stores are
 // strided. Check for strided stores.
 struct DmaGeneration : public FunctionPass {
-  explicit DmaGeneration(
-      unsigned slowMemorySpace = 0, unsigned fastMemorySpace = 1,
-      int minDmaTransferSize = 1024,
-      uint64_t fastMemCapacityBytes = std::numeric_limits<uint64_t>::max())
+  explicit DmaGeneration(unsigned slowMemorySpace = 0,
+                         unsigned fastMemorySpace = clFastMemorySpace,
+                         int minDmaTransferSize = 1024,
+                         uint64_t fastMemCapacityBytes = clFastMemoryCapacity *
+                                                         1024)
       : FunctionPass(&DmaGeneration::passID), slowMemorySpace(slowMemorySpace),
         fastMemorySpace(fastMemorySpace),
         minDmaTransferSize(minDmaTransferSize),
@@ -684,14 +690,6 @@
   FuncBuilder topBuilder(f);
   zeroIndex = topBuilder.create<ConstantIndexOp>(f->getLoc(), 0);
 
-  if (clFastMemorySpace.getNumOccurrences() > 0) {
-    fastMemorySpace = clFastMemorySpace;
-  }
-
-  if (clFastMemoryCapacity.getNumOccurrences() > 0) {
-    fastMemCapacityBytes = clFastMemoryCapacity * 1024;
-  }
-
   for (auto &block : *f) {
     runOnBlock(&block, /*consumedCapacityBytes=*/0);
   }
diff --git a/lib/Transforms/LoopTiling.cpp b/lib/Transforms/LoopTiling.cpp
index 368a1da..b86516f 100644
--- a/lib/Transforms/LoopTiling.cpp
+++ b/lib/Transforms/LoopTiling.cpp
@@ -36,11 +36,13 @@
 
 static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
 
-// Tile size for all loops.
-static llvm::cl::opt<unsigned>
-    clTileSize("tile-size", llvm::cl::Hidden,
-               llvm::cl::desc("Use this tile size for all loops"),
-               llvm::cl::cat(clOptionsCategory));
+// List of tile sizes. If any of them aren't provided, they are filled with
+// clTileSize.
+static llvm::cl::list<unsigned> clTileSizes(
+    "tile-sizes",
+    llvm::cl::desc(
+        "List of tile sizes for each perfect nest (overrides -tile-size)"),
+    llvm::cl::ZeroOrMore, llvm::cl::cat(clOptionsCategory));
 
 namespace {
 
@@ -57,6 +59,12 @@
 
 char LoopTiling::passID = 0;
 
+// Tile size to use for all loops (overridden by -tile-sizes if provided).
+static llvm::cl::opt<unsigned>
+    clTileSize("tile-size", llvm::cl::init(LoopTiling::kDefaultTileSize),
+               llvm::cl::desc("Use this tile size for all loops"),
+               llvm::cl::cat(clOptionsCategory));
+
 /// Creates a pass to perform loop tiling on all suitable loop nests of an
 /// Function.
 FunctionPass *mlir::createLoopTilingPass() { return new LoopTiling(); }
@@ -252,12 +260,14 @@
   std::vector<SmallVector<OpPointer<AffineForOp>, 6>> bands;
   getTileableBands(f, &bands);
 
-  // Temporary tile sizes.
-  unsigned tileSize =
-      clTileSize.getNumOccurrences() > 0 ? clTileSize : kDefaultTileSize;
-
   for (auto &band : bands) {
-    SmallVector<unsigned, 6> tileSizes(band.size(), tileSize);
+    // Set up tile sizes; fill missing tile sizes at the end with default tile
+    // size or clTileSize if one was provided.
+    SmallVector<unsigned, 6> tileSizes(band.size(), clTileSize);
+    std::copy(clTileSizes.begin(),
+              clTileSizes.begin() + std::min(clTileSizes.size(), band.size()),
+              tileSizes.begin());
+
     if (tileCodeGen(band, tileSizes)) {
       return failure();
     }
diff --git a/lib/Transforms/LoopUnroll.cpp b/lib/Transforms/LoopUnroll.cpp
index 3a7cfb8..84b0d22 100644
--- a/lib/Transforms/LoopUnroll.cpp
+++ b/lib/Transforms/LoopUnroll.cpp
@@ -41,16 +41,16 @@
 
 // Loop unrolling factor.
 static llvm::cl::opt<unsigned> clUnrollFactor(
-    "unroll-factor", llvm::cl::Hidden,
+    "unroll-factor",
     llvm::cl::desc("Use this unroll factor for all loops being unrolled"),
     llvm::cl::cat(clOptionsCategory));
 
-static llvm::cl::opt<bool> clUnrollFull("unroll-full", llvm::cl::Hidden,
+static llvm::cl::opt<bool> clUnrollFull("unroll-full",
                                         llvm::cl::desc("Fully unroll loops"),
                                         llvm::cl::cat(clOptionsCategory));
 
 static llvm::cl::opt<unsigned> clUnrollNumRepetitions(
-    "unroll-num-reps", llvm::cl::Hidden,
+    "unroll-num-reps",
     llvm::cl::desc("Unroll innermost loops repeatedly this many times"),
     llvm::cl::cat(clOptionsCategory));