blob: ae09098f9d53279780284a94f51f2548909a2b21 [file] [log] [blame]
//===- LoopUnroll.cpp - Code to perform loop unrolling --------------------===//
//
// Copyright 2019 The MLIR Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
//
// This file implements loop unrolling.
//
//===----------------------------------------------------------------------===//
#include "mlir/Transforms/Passes.h"
#include "mlir/Analysis/LoopAnalysis.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/StmtVisitor.h"
#include "mlir/Pass.h"
#include "mlir/Transforms/LoopUtils.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/CommandLine.h"
using namespace mlir;
// Loop unrolling factor.
static llvm::cl::opt<unsigned>
clUnrollFactor("unroll-factor", llvm::cl::Hidden,
llvm::cl::desc("Use this unroll factor for all loops"));
static llvm::cl::opt<bool> clUnrollFull("unroll-full", llvm::cl::Hidden,
llvm::cl::desc("Fully unroll loops"));
static llvm::cl::opt<unsigned> clUnrollFullThreshold(
"unroll-full-threshold", llvm::cl::Hidden,
llvm::cl::desc(
"Unroll all loops with trip count less than or equal to this"));
namespace {
/// Loop unrolling pass. Unrolls all innermost loops unless full unrolling and a
/// full unroll threshold was specified, in which case, fully unrolls all loops
/// with trip count less than the specified threshold. The latter is for testing
/// purposes, especially for testing outer loop unrolling.
struct LoopUnroll : public FunctionPass {
Optional<unsigned> unrollFactor;
Optional<bool> unrollFull;
explicit LoopUnroll(Optional<unsigned> unrollFactor = None,
Optional<bool> unrollFull = None)
: unrollFactor(unrollFactor), unrollFull(unrollFull) {}
PassResult runOnMLFunction(MLFunction *f) override;
/// Unroll this for stmt. Returns false if nothing was done.
bool runOnForStmt(ForStmt *forStmt);
static char passID;
};
} // end anonymous namespace
char LoopUnroll::passID = 0;
PassResult LoopUnroll::runOnMLFunction(MLFunction *f) {
// Gathers all innermost loops through a post order pruned walk.
class InnermostLoopGatherer : public StmtWalker<InnermostLoopGatherer, bool> {
public:
// Store innermost loops as we walk.
std::vector<ForStmt *> loops;
// This method specialized to encode custom return logic.
using StmtListType = llvm::iplist<Statement>;
bool walkPostOrder(StmtListType::iterator Start,
StmtListType::iterator End) {
bool hasInnerLoops = false;
// We need to walk all elements since all innermost loops need to be
// gathered as opposed to determining whether this list has any inner
// loops or not.
while (Start != End)
hasInnerLoops |= walkPostOrder(&(*Start++));
return hasInnerLoops;
}
bool walkForStmtPostOrder(ForStmt *forStmt) {
bool hasInnerLoops = walkPostOrder(forStmt->begin(), forStmt->end());
if (!hasInnerLoops)
loops.push_back(forStmt);
return true;
}
bool walkIfStmtPostOrder(IfStmt *ifStmt) {
bool hasInnerLoops =
walkPostOrder(ifStmt->getThen()->begin(), ifStmt->getThen()->end());
if (ifStmt->hasElse())
hasInnerLoops |=
walkPostOrder(ifStmt->getElse()->begin(), ifStmt->getElse()->end());
return hasInnerLoops;
}
bool visitOperationStmt(OperationStmt *opStmt) { return false; }
// FIXME: can't use base class method for this because that in turn would
// need to use the derived class method above. CRTP doesn't allow it, and
// the compiler error resulting from it is also misleading.
using StmtWalker<InnermostLoopGatherer, bool>::walkPostOrder;
};
// Gathers all loops with trip count <= minTripCount.
class ShortLoopGatherer : public StmtWalker<ShortLoopGatherer> {
public:
// Store short loops as we walk.
std::vector<ForStmt *> loops;
const unsigned minTripCount;
ShortLoopGatherer(unsigned minTripCount) : minTripCount(minTripCount) {}
void visitForStmt(ForStmt *forStmt) {
Optional<uint64_t> tripCount = getConstantTripCount(*forStmt);
if (tripCount.hasValue() && tripCount.getValue() <= minTripCount)
loops.push_back(forStmt);
}
};
if (clUnrollFull.getNumOccurrences() > 0 &&
clUnrollFullThreshold.getNumOccurrences() > 0) {
ShortLoopGatherer slg(clUnrollFullThreshold);
// Do a post order walk so that loops are gathered from innermost to
// outermost (or else unrolling an outer one may delete gathered inner
// ones).
slg.walkPostOrder(f);
auto &loops = slg.loops;
for (auto *forStmt : loops)
loopUnrollFull(forStmt);
return success();
}
InnermostLoopGatherer ilg;
ilg.walkPostOrder(f);
auto &loops = ilg.loops;
for (auto *forStmt : loops)
runOnForStmt(forStmt);
return success();
}
/// Unroll a for stmt. Default unroll factor is 4.
bool LoopUnroll::runOnForStmt(ForStmt *forStmt) {
// Unroll by the factor passed, if any.
if (unrollFactor.hasValue())
return loopUnrollByFactor(forStmt, unrollFactor.getValue());
// Unroll by the command line factor if one was specified.
if (clUnrollFactor.getNumOccurrences() > 0)
return loopUnrollByFactor(forStmt, clUnrollFactor);
// Unroll completely if full loop unroll was specified.
if (clUnrollFull.getNumOccurrences() > 0 ||
(unrollFull.hasValue() && unrollFull.getValue()))
return loopUnrollFull(forStmt);
// Unroll by four otherwise.
return loopUnrollByFactor(forStmt, 4);
}
/// Unrolls this loop completely.
bool mlir::loopUnrollFull(ForStmt *forStmt) {
Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(*forStmt);
if (mayBeConstantTripCount.hasValue()) {
uint64_t tripCount = mayBeConstantTripCount.getValue();
if (tripCount == 1) {
return promoteIfSingleIteration(forStmt);
}
return loopUnrollByFactor(forStmt, tripCount);
}
return false;
}
/// Unrolls and jams this loop by the specified factor or by the trip count (if
/// constant) whichever is lower.
bool mlir::loopUnrollUpToFactor(ForStmt *forStmt, uint64_t unrollFactor) {
Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(*forStmt);
if (mayBeConstantTripCount.hasValue() &&
mayBeConstantTripCount.getValue() < unrollFactor)
return loopUnrollByFactor(forStmt, mayBeConstantTripCount.getValue());
return loopUnrollByFactor(forStmt, unrollFactor);
}
/// Unrolls this loop by the specified factor. Returns true if the loop
/// is successfully unrolled.
bool mlir::loopUnrollByFactor(ForStmt *forStmt, uint64_t unrollFactor) {
assert(unrollFactor >= 1 && "unroll factor should be >= 1");
if (unrollFactor == 1 || forStmt->getStatements().empty())
return false;
auto lbMap = forStmt->getLowerBoundMap();
auto ubMap = forStmt->getUpperBoundMap();
// Loops with max/min expressions won't be unrolled here (the output can't be
// expressed as an MLFunction in the general case). However, the right way to
// do such unrolling for an MLFunction would be to specialize the loop for the
// 'hotspot' case and unroll that hotspot.
if (lbMap.getNumResults() != 1 || ubMap.getNumResults() != 1)
return false;
// Same operand list for lower and upper bound for now.
// TODO(bondhugula): handle bounds with different operand lists.
if (!forStmt->matchingBoundOperandList())
return false;
Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(*forStmt);
// If the trip count is lower than the unroll factor, no unrolled body.
// TODO(bondhugula): option to specify cleanup loop unrolling.
if (mayBeConstantTripCount.hasValue() &&
mayBeConstantTripCount.getValue() < unrollFactor)
return false;
// Generate the cleanup loop if trip count isn't a multiple of unrollFactor.
if (getLargestDivisorOfTripCount(*forStmt) % unrollFactor != 0) {
DenseMap<const MLValue *, MLValue *> operandMap;
MLFuncBuilder builder(forStmt->getBlock(), ++StmtBlock::iterator(forStmt));
auto *cleanupForStmt = cast<ForStmt>(builder.clone(*forStmt, operandMap));
auto clLbMap = getCleanupLoopLowerBound(*forStmt, unrollFactor, &builder);
assert(clLbMap &&
"cleanup loop lower bound map for single result bound maps can "
"always be determined");
cleanupForStmt->setLowerBoundMap(clLbMap);
// Promote the loop body up if this has turned into a single iteration loop.
promoteIfSingleIteration(cleanupForStmt);
// Adjust upper bound.
auto unrolledUbMap =
getUnrolledLoopUpperBound(*forStmt, unrollFactor, &builder);
assert(unrolledUbMap &&
"upper bound map can alwayys be determined for an unrolled loop "
"with single result bounds");
forStmt->setUpperBoundMap(unrolledUbMap);
}
// Scale the step of loop being unrolled by unroll factor.
int64_t step = forStmt->getStep();
forStmt->setStep(step * unrollFactor);
// Builder to insert unrolled bodies right after the last statement in the
// body of 'forStmt'.
MLFuncBuilder builder(forStmt, StmtBlock::iterator(forStmt->end()));
// Keep a pointer to the last statement in the original block so that we know
// what to clone (since we are doing this in-place).
StmtBlock::iterator srcBlockEnd = std::prev(forStmt->end());
// Unroll the contents of 'forStmt' (append unrollFactor-1 additional copies).
for (unsigned i = 1; i < unrollFactor; i++) {
DenseMap<const MLValue *, MLValue *> operandMap;
// If the induction variable is used, create a remapping to the value for
// this unrolled instance.
if (!forStmt->use_empty()) {
// iv' = iv + 1/2/3...unrollFactor-1;
auto d0 = builder.getAffineDimExpr(0);
auto bumpMap = builder.getAffineMap(1, 0, {d0 + i * step}, {});
auto *ivUnroll =
builder.create<AffineApplyOp>(forStmt->getLoc(), bumpMap, forStmt)
->getResult(0);
operandMap[forStmt] = cast<MLValue>(ivUnroll);
}
// Clone the original body of 'forStmt'.
for (auto it = forStmt->begin(); it != std::next(srcBlockEnd); it++) {
builder.clone(*it, operandMap);
}
}
// Promote the loop body up if this has turned into a single iteration loop.
promoteIfSingleIteration(forStmt);
return true;
}
FunctionPass *mlir::createLoopUnrollPass(int unrollFactor, int unrollFull) {
return new LoopUnroll(unrollFactor == -1 ? None
: Optional<unsigned>(unrollFactor),
unrollFull == -1 ? None : Optional<bool>(unrollFull));
}
static PassRegistration<LoopUnroll> pass("loop-unroll", "Unroll loops");