blob: ae4647e143d530194888c052cc00f386be0c8707 [file] [log] [blame]
//===- LoopFusion.cpp - Code to perform loop fusion -----------------------===//
//
// Copyright 2019 The MLIR Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
//
// This file implements loop fusion.
//
//===----------------------------------------------------------------------===//
#include "mlir/Analysis/AffineAnalysis.h"
#include "mlir/Analysis/LoopAnalysis.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/StmtVisitor.h"
#include "mlir/Pass.h"
#include "mlir/StandardOps/StandardOps.h"
#include "mlir/Transforms/LoopUtils.h"
#include "mlir/Transforms/Passes.h"
#include "llvm/ADT/DenseMap.h"
using namespace mlir;
namespace {
/// Loop fusion pass. This pass fuses adjacent loops in MLFunctions which
/// access the same memref with no dependences.
// See MatchTestPattern for details on candidate loop selection.
// TODO(andydavis) Extend this pass to check for fusion preventing dependences,
// and add support for more general loop fusion algorithms.
struct LoopFusion : public FunctionPass {
LoopFusion() {}
PassResult runOnMLFunction(MLFunction *f) override;
static char passID;
};
// LoopCollector walks the statements in an MLFunction and builds a map from
// StmtBlocks to a list of loops within the StmtBlock, and a map from ForStmts
// to the list of loads and stores with its StmtBlock.
class LoopCollector : public StmtWalker<LoopCollector> {
public:
DenseMap<StmtBlock *, SmallVector<ForStmt *, 2>> loopMap;
DenseMap<ForStmt *, SmallVector<OperationStmt *, 2>> loadsAndStoresMap;
bool hasIfStmt = false;
void visitForStmt(ForStmt *forStmt) {
loopMap[forStmt->getBlock()].push_back(forStmt);
}
void visitIfStmt(IfStmt *ifStmt) { hasIfStmt = true; }
void visitOperationStmt(OperationStmt *opStmt) {
if (auto *parentStmt = opStmt->getParentStmt()) {
if (auto *parentForStmt = dyn_cast<ForStmt>(parentStmt)) {
if (opStmt->isa<LoadOp>() || opStmt->isa<StoreOp>()) {
loadsAndStoresMap[parentForStmt].push_back(opStmt);
}
}
}
}
};
} // end anonymous namespace
char LoopFusion::passID = 0;
FunctionPass *mlir::createLoopFusionPass() { return new LoopFusion; }
// TODO(andydavis) Remove the following test code when more general loop
// fusion is supported.
struct FusionCandidate {
// Loop nest of ForStmts with 'accessA' in the inner-most loop.
SmallVector<ForStmt *, 2> forStmtsA;
// Load or store operation within loop nest 'forStmtsA'.
MemRefAccess accessA;
// Loop nest of ForStmts with 'accessB' in the inner-most loop.
SmallVector<ForStmt *, 2> forStmtsB;
// Load or store operation within loop nest 'forStmtsB'.
MemRefAccess accessB;
};
static void getSingleMemRefAccess(OperationStmt *loadOrStoreOpStmt,
MemRefAccess *access) {
if (auto loadOp = loadOrStoreOpStmt->dyn_cast<LoadOp>()) {
access->memref = cast<MLValue>(loadOp->getMemRef());
access->opStmt = loadOrStoreOpStmt;
auto loadMemrefType = loadOp->getMemRefType();
access->indices.reserve(loadMemrefType.getRank());
for (auto *index : loadOp->getIndices()) {
access->indices.push_back(cast<MLValue>(index));
}
} else {
assert(loadOrStoreOpStmt->isa<StoreOp>());
auto storeOp = loadOrStoreOpStmt->dyn_cast<StoreOp>();
access->opStmt = loadOrStoreOpStmt;
access->memref = cast<MLValue>(storeOp->getMemRef());
auto storeMemrefType = storeOp->getMemRefType();
access->indices.reserve(storeMemrefType.getRank());
for (auto *index : storeOp->getIndices()) {
access->indices.push_back(cast<MLValue>(index));
}
}
}
// Checks if 'forStmtA' and 'forStmtB' match specific test criterion:
// constant loop bounds, no nested loops, single StoreOp in 'forStmtA' and
// a single LoadOp in 'forStmtB'.
// Returns true if the test pattern matches, false otherwise.
static bool MatchTestPatternLoopPair(LoopCollector *lc,
FusionCandidate *candidate,
ForStmt *forStmtA, ForStmt *forStmtB) {
if (forStmtA == nullptr || forStmtB == nullptr)
return false;
// Return if 'forStmtA' and 'forStmtB' do not have matching constant
// bounds and step.
if (!forStmtA->hasConstantBounds() || !forStmtB->hasConstantBounds() ||
forStmtA->getConstantLowerBound() != forStmtB->getConstantLowerBound() ||
forStmtA->getConstantUpperBound() != forStmtB->getConstantUpperBound() ||
forStmtA->getStep() != forStmtB->getStep())
return false;
// Return if 'forStmtA' or 'forStmtB' have nested loops.
if (lc->loopMap.count(forStmtA) > 0 || lc->loopMap.count(forStmtB))
return false;
// Return if 'forStmtA' or 'forStmtB' do not have exactly one load or store.
if (lc->loadsAndStoresMap[forStmtA].size() != 1 ||
lc->loadsAndStoresMap[forStmtB].size() != 1)
return false;
// Get load/store access for forStmtA.
getSingleMemRefAccess(lc->loadsAndStoresMap[forStmtA][0],
&candidate->accessA);
// Return if 'accessA' is not a store.
if (!candidate->accessA.opStmt->isa<StoreOp>())
return false;
// Get load/store access for forStmtB.
getSingleMemRefAccess(lc->loadsAndStoresMap[forStmtB][0],
&candidate->accessB);
// Return if accesses do not access the same memref.
if (candidate->accessA.memref != candidate->accessB.memref)
return false;
candidate->forStmtsA.push_back(forStmtA);
candidate->forStmtsB.push_back(forStmtB);
return true;
}
// Returns the child ForStmt of 'parent' if unique, returns false otherwise.
ForStmt *getSingleForStmtChild(ForStmt *parent) {
if (parent->getStatements().size() == 1 && isa<ForStmt>(parent->front()))
return dyn_cast<ForStmt>(&parent->front());
return nullptr;
}
// Checks for a specific ForStmt/OpStatment test pattern in 'f', returns true
// on success and resturns fusion candidate in 'candidate'. Returns false
// otherwise.
// Currently supported test patterns:
// *) Adjacent loops with a StoreOp the only op in first loop, and a LoadOp the
// only op in the second loop (both load/store accessing the same memref).
// *) As above, but with one level of perfect loop nesting.
//
// TODO(andydavis) Look into using ntv@ pattern matcher here.
static bool MatchTestPattern(MLFunction *f, FusionCandidate *candidate) {
LoopCollector lc;
lc.walk(f);
// Return if an IfStmt was found or if less than two ForStmts were found.
if (lc.hasIfStmt || lc.loopMap.count(f) == 0 || lc.loopMap[f].size() < 2)
return false;
auto *forStmtA = lc.loopMap[f][0];
auto *forStmtB = lc.loopMap[f][1];
if (!MatchTestPatternLoopPair(&lc, candidate, forStmtA, forStmtB)) {
// Check for one level of loop nesting.
candidate->forStmtsA.push_back(forStmtA);
candidate->forStmtsB.push_back(forStmtB);
return MatchTestPatternLoopPair(&lc, candidate,
getSingleForStmtChild(forStmtA),
getSingleForStmtChild(forStmtB));
}
return true;
}
// FuseLoops implements the code generation mechanics of loop fusion.
// Fuses the operations statments from the inner-most loop in 'c.forStmtsB',
// by cloning them into the inner-most loop in 'c.forStmtsA', then erasing
// old statements and loops.
static void fuseLoops(const FusionCandidate &c) {
MLFuncBuilder builder(c.forStmtsA.back(),
StmtBlock::iterator(c.forStmtsA.back()->end()));
DenseMap<const MLValue *, MLValue *> operandMap;
assert(c.forStmtsA.size() == c.forStmtsB.size());
for (unsigned i = 0, e = c.forStmtsA.size(); i < e; i++) {
// Map loop IVs to 'forStmtB[i]' to loop IV for 'forStmtA[i]'.
operandMap[c.forStmtsB[i]] = c.forStmtsA[i];
}
// Clone the body of inner-most loop in 'forStmtsB', into the body of
// inner-most loop in 'forStmtsA'.
SmallVector<Statement *, 2> stmtsToErase;
auto *innerForStmtB = c.forStmtsB.back();
for (auto &stmt : *innerForStmtB) {
builder.clone(stmt, operandMap);
stmtsToErase.push_back(&stmt);
}
// Erase 'forStmtB' and its statement list.
for (auto it = stmtsToErase.rbegin(); it != stmtsToErase.rend(); ++it)
(*it)->erase();
// Erase 'forStmtsB' loop nest.
for (int i = static_cast<int>(c.forStmtsB.size()) - 1; i >= 0; --i)
c.forStmtsB[i]->erase();
}
PassResult LoopFusion::runOnMLFunction(MLFunction *f) {
FusionCandidate candidate;
if (!MatchTestPattern(f, &candidate))
return failure();
// TODO(andydavis) Add checks for fusion-preventing dependences and ordering
// constraints which would prevent fusion.
// TODO(andydavis) This check if overly conservative for now. Support fusing
// statements with compatible dependences (i.e. statements where the
// dependence between the statements does not reverse direction when the
// statements are fused into the same loop).
if (!checkMemrefAccessDependence(candidate.accessA, candidate.accessB)) {
// Current conservatinve test policy: No dependence exists between accesses
// in different loop nests -> fuse loops.
fuseLoops(candidate);
}
return success();
}
static PassRegistration<LoopFusion> pass("loop-fusion", "Fuse loop nests");