Add -tile-sizes command line option for loop tiling; clean up cl options for
for dma-generate, loop-unroll.
- add -tile-sizes command line option for loop tiling to specify different tile
sizes for loops in a band
- clean up command line options for loop-unroll, dma-generate (remove
cl::hidden)
PiperOrigin-RevId: 234006232
diff --git a/lib/Transforms/DmaGeneration.cpp b/lib/Transforms/DmaGeneration.cpp
index 29cc435..06aa375 100644
--- a/lib/Transforms/DmaGeneration.cpp
+++ b/lib/Transforms/DmaGeneration.cpp
@@ -42,14 +42,19 @@
static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-static llvm::cl::opt<unsigned> clFastMemorySpace(
- "dma-fast-mem-space", llvm::cl::Hidden,
- llvm::cl::desc("Set fast memory space id for DMA generation"),
+static llvm::cl::opt<unsigned> clFastMemoryCapacity(
+ "dma-fast-mem-capacity",
+ llvm::cl::init(std::numeric_limits<uint64_t>::max() / 1024),
+ llvm::cl::desc(
+ "Set fast memory space capacity in KiB (default: unlimited)"),
llvm::cl::cat(clOptionsCategory));
-static llvm::cl::opt<unsigned> clFastMemoryCapacity(
- "dma-fast-mem-capacity", llvm::cl::Hidden,
- llvm::cl::desc("Set fast memory space capacity in KiB"),
+static const unsigned kDefaultFastMemorySpace = 1;
+
+static llvm::cl::opt<unsigned> clFastMemorySpace(
+ "dma-fast-mem-space", llvm::cl::init(kDefaultFastMemorySpace),
+ llvm::cl::desc(
+ "Fast memory space identifier for DMA generation (default: 1)"),
llvm::cl::cat(clOptionsCategory));
namespace {
@@ -60,10 +65,11 @@
// TODO(bondhugula): We currently can't generate DMAs correctly when stores are
// strided. Check for strided stores.
struct DmaGeneration : public FunctionPass {
- explicit DmaGeneration(
- unsigned slowMemorySpace = 0, unsigned fastMemorySpace = 1,
- int minDmaTransferSize = 1024,
- uint64_t fastMemCapacityBytes = std::numeric_limits<uint64_t>::max())
+ explicit DmaGeneration(unsigned slowMemorySpace = 0,
+ unsigned fastMemorySpace = clFastMemorySpace,
+ int minDmaTransferSize = 1024,
+ uint64_t fastMemCapacityBytes = clFastMemoryCapacity *
+ 1024)
: FunctionPass(&DmaGeneration::passID), slowMemorySpace(slowMemorySpace),
fastMemorySpace(fastMemorySpace),
minDmaTransferSize(minDmaTransferSize),
@@ -684,14 +690,6 @@
FuncBuilder topBuilder(f);
zeroIndex = topBuilder.create<ConstantIndexOp>(f->getLoc(), 0);
- if (clFastMemorySpace.getNumOccurrences() > 0) {
- fastMemorySpace = clFastMemorySpace;
- }
-
- if (clFastMemoryCapacity.getNumOccurrences() > 0) {
- fastMemCapacityBytes = clFastMemoryCapacity * 1024;
- }
-
for (auto &block : *f) {
runOnBlock(&block, /*consumedCapacityBytes=*/0);
}
diff --git a/lib/Transforms/LoopTiling.cpp b/lib/Transforms/LoopTiling.cpp
index 368a1da..b86516f 100644
--- a/lib/Transforms/LoopTiling.cpp
+++ b/lib/Transforms/LoopTiling.cpp
@@ -36,11 +36,13 @@
static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-// Tile size for all loops.
-static llvm::cl::opt<unsigned>
- clTileSize("tile-size", llvm::cl::Hidden,
- llvm::cl::desc("Use this tile size for all loops"),
- llvm::cl::cat(clOptionsCategory));
+// List of tile sizes. If any of them aren't provided, they are filled with
+// clTileSize.
+static llvm::cl::list<unsigned> clTileSizes(
+ "tile-sizes",
+ llvm::cl::desc(
+ "List of tile sizes for each perfect nest (overrides -tile-size)"),
+ llvm::cl::ZeroOrMore, llvm::cl::cat(clOptionsCategory));
namespace {
@@ -57,6 +59,12 @@
char LoopTiling::passID = 0;
+// Tile size to use for all loops (overridden by -tile-sizes if provided).
+static llvm::cl::opt<unsigned>
+ clTileSize("tile-size", llvm::cl::init(LoopTiling::kDefaultTileSize),
+ llvm::cl::desc("Use this tile size for all loops"),
+ llvm::cl::cat(clOptionsCategory));
+
/// Creates a pass to perform loop tiling on all suitable loop nests of an
/// Function.
FunctionPass *mlir::createLoopTilingPass() { return new LoopTiling(); }
@@ -252,12 +260,14 @@
std::vector<SmallVector<OpPointer<AffineForOp>, 6>> bands;
getTileableBands(f, &bands);
- // Temporary tile sizes.
- unsigned tileSize =
- clTileSize.getNumOccurrences() > 0 ? clTileSize : kDefaultTileSize;
-
for (auto &band : bands) {
- SmallVector<unsigned, 6> tileSizes(band.size(), tileSize);
+ // Set up tile sizes; fill missing tile sizes at the end with default tile
+ // size or clTileSize if one was provided.
+ SmallVector<unsigned, 6> tileSizes(band.size(), clTileSize);
+ std::copy(clTileSizes.begin(),
+ clTileSizes.begin() + std::min(clTileSizes.size(), band.size()),
+ tileSizes.begin());
+
if (tileCodeGen(band, tileSizes)) {
return failure();
}
diff --git a/lib/Transforms/LoopUnroll.cpp b/lib/Transforms/LoopUnroll.cpp
index 3a7cfb8..84b0d22 100644
--- a/lib/Transforms/LoopUnroll.cpp
+++ b/lib/Transforms/LoopUnroll.cpp
@@ -41,16 +41,16 @@
// Loop unrolling factor.
static llvm::cl::opt<unsigned> clUnrollFactor(
- "unroll-factor", llvm::cl::Hidden,
+ "unroll-factor",
llvm::cl::desc("Use this unroll factor for all loops being unrolled"),
llvm::cl::cat(clOptionsCategory));
-static llvm::cl::opt<bool> clUnrollFull("unroll-full", llvm::cl::Hidden,
+static llvm::cl::opt<bool> clUnrollFull("unroll-full",
llvm::cl::desc("Fully unroll loops"),
llvm::cl::cat(clOptionsCategory));
static llvm::cl::opt<unsigned> clUnrollNumRepetitions(
- "unroll-num-reps", llvm::cl::Hidden,
+ "unroll-num-reps",
llvm::cl::desc("Unroll innermost loops repeatedly this many times"),
llvm::cl::cat(clOptionsCategory));