blob: ae4647e143d530194888c052cc00f386be0c8707 [file] [log] [blame]
//===- LoopFusion.cpp - Code to perform loop fusion -----------------------===//
// Copyright 2019 The MLIR Authors.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
// This file implements loop fusion.
#include "mlir/Analysis/AffineAnalysis.h"
#include "mlir/Analysis/LoopAnalysis.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/StmtVisitor.h"
#include "mlir/Pass.h"
#include "mlir/StandardOps/StandardOps.h"
#include "mlir/Transforms/LoopUtils.h"
#include "mlir/Transforms/Passes.h"
#include "llvm/ADT/DenseMap.h"
using namespace mlir;
namespace {
/// Loop fusion pass. This pass fuses adjacent loops in MLFunctions which
/// access the same memref with no dependences.
// See MatchTestPattern for details on candidate loop selection.
// TODO(andydavis) Extend this pass to check for fusion preventing dependences,
// and add support for more general loop fusion algorithms.
struct LoopFusion : public FunctionPass {
LoopFusion() {}
PassResult runOnMLFunction(MLFunction *f) override;
static char passID;
// LoopCollector walks the statements in an MLFunction and builds a map from
// StmtBlocks to a list of loops within the StmtBlock, and a map from ForStmts
// to the list of loads and stores with its StmtBlock.
class LoopCollector : public StmtWalker<LoopCollector> {
DenseMap<StmtBlock *, SmallVector<ForStmt *, 2>> loopMap;
DenseMap<ForStmt *, SmallVector<OperationStmt *, 2>> loadsAndStoresMap;
bool hasIfStmt = false;
void visitForStmt(ForStmt *forStmt) {
void visitIfStmt(IfStmt *ifStmt) { hasIfStmt = true; }
void visitOperationStmt(OperationStmt *opStmt) {
if (auto *parentStmt = opStmt->getParentStmt()) {
if (auto *parentForStmt = dyn_cast<ForStmt>(parentStmt)) {
if (opStmt->isa<LoadOp>() || opStmt->isa<StoreOp>()) {
} // end anonymous namespace
char LoopFusion::passID = 0;
FunctionPass *mlir::createLoopFusionPass() { return new LoopFusion; }
// TODO(andydavis) Remove the following test code when more general loop
// fusion is supported.
struct FusionCandidate {
// Loop nest of ForStmts with 'accessA' in the inner-most loop.
SmallVector<ForStmt *, 2> forStmtsA;
// Load or store operation within loop nest 'forStmtsA'.
MemRefAccess accessA;
// Loop nest of ForStmts with 'accessB' in the inner-most loop.
SmallVector<ForStmt *, 2> forStmtsB;
// Load or store operation within loop nest 'forStmtsB'.
MemRefAccess accessB;
static void getSingleMemRefAccess(OperationStmt *loadOrStoreOpStmt,
MemRefAccess *access) {
if (auto loadOp = loadOrStoreOpStmt->dyn_cast<LoadOp>()) {
access->memref = cast<MLValue>(loadOp->getMemRef());
access->opStmt = loadOrStoreOpStmt;
auto loadMemrefType = loadOp->getMemRefType();
for (auto *index : loadOp->getIndices()) {
} else {
auto storeOp = loadOrStoreOpStmt->dyn_cast<StoreOp>();
access->opStmt = loadOrStoreOpStmt;
access->memref = cast<MLValue>(storeOp->getMemRef());
auto storeMemrefType = storeOp->getMemRefType();
for (auto *index : storeOp->getIndices()) {
// Checks if 'forStmtA' and 'forStmtB' match specific test criterion:
// constant loop bounds, no nested loops, single StoreOp in 'forStmtA' and
// a single LoadOp in 'forStmtB'.
// Returns true if the test pattern matches, false otherwise.
static bool MatchTestPatternLoopPair(LoopCollector *lc,
FusionCandidate *candidate,
ForStmt *forStmtA, ForStmt *forStmtB) {
if (forStmtA == nullptr || forStmtB == nullptr)
return false;
// Return if 'forStmtA' and 'forStmtB' do not have matching constant
// bounds and step.
if (!forStmtA->hasConstantBounds() || !forStmtB->hasConstantBounds() ||
forStmtA->getConstantLowerBound() != forStmtB->getConstantLowerBound() ||
forStmtA->getConstantUpperBound() != forStmtB->getConstantUpperBound() ||
forStmtA->getStep() != forStmtB->getStep())
return false;
// Return if 'forStmtA' or 'forStmtB' have nested loops.
if (lc->loopMap.count(forStmtA) > 0 || lc->loopMap.count(forStmtB))
return false;
// Return if 'forStmtA' or 'forStmtB' do not have exactly one load or store.
if (lc->loadsAndStoresMap[forStmtA].size() != 1 ||
lc->loadsAndStoresMap[forStmtB].size() != 1)
return false;
// Get load/store access for forStmtA.
// Return if 'accessA' is not a store.
if (!candidate->accessA.opStmt->isa<StoreOp>())
return false;
// Get load/store access for forStmtB.
// Return if accesses do not access the same memref.
if (candidate->accessA.memref != candidate->accessB.memref)
return false;
return true;
// Returns the child ForStmt of 'parent' if unique, returns false otherwise.
ForStmt *getSingleForStmtChild(ForStmt *parent) {
if (parent->getStatements().size() == 1 && isa<ForStmt>(parent->front()))
return dyn_cast<ForStmt>(&parent->front());
return nullptr;
// Checks for a specific ForStmt/OpStatment test pattern in 'f', returns true
// on success and resturns fusion candidate in 'candidate'. Returns false
// otherwise.
// Currently supported test patterns:
// *) Adjacent loops with a StoreOp the only op in first loop, and a LoadOp the
// only op in the second loop (both load/store accessing the same memref).
// *) As above, but with one level of perfect loop nesting.
// TODO(andydavis) Look into using ntv@ pattern matcher here.
static bool MatchTestPattern(MLFunction *f, FusionCandidate *candidate) {
LoopCollector lc;
// Return if an IfStmt was found or if less than two ForStmts were found.
if (lc.hasIfStmt || lc.loopMap.count(f) == 0 || lc.loopMap[f].size() < 2)
return false;
auto *forStmtA = lc.loopMap[f][0];
auto *forStmtB = lc.loopMap[f][1];
if (!MatchTestPatternLoopPair(&lc, candidate, forStmtA, forStmtB)) {
// Check for one level of loop nesting.
return MatchTestPatternLoopPair(&lc, candidate,
return true;
// FuseLoops implements the code generation mechanics of loop fusion.
// Fuses the operations statments from the inner-most loop in 'c.forStmtsB',
// by cloning them into the inner-most loop in 'c.forStmtsA', then erasing
// old statements and loops.
static void fuseLoops(const FusionCandidate &c) {
MLFuncBuilder builder(c.forStmtsA.back(),
DenseMap<const MLValue *, MLValue *> operandMap;
assert(c.forStmtsA.size() == c.forStmtsB.size());
for (unsigned i = 0, e = c.forStmtsA.size(); i < e; i++) {
// Map loop IVs to 'forStmtB[i]' to loop IV for 'forStmtA[i]'.
operandMap[c.forStmtsB[i]] = c.forStmtsA[i];
// Clone the body of inner-most loop in 'forStmtsB', into the body of
// inner-most loop in 'forStmtsA'.
SmallVector<Statement *, 2> stmtsToErase;
auto *innerForStmtB = c.forStmtsB.back();
for (auto &stmt : *innerForStmtB) {
builder.clone(stmt, operandMap);
// Erase 'forStmtB' and its statement list.
for (auto it = stmtsToErase.rbegin(); it != stmtsToErase.rend(); ++it)
// Erase 'forStmtsB' loop nest.
for (int i = static_cast<int>(c.forStmtsB.size()) - 1; i >= 0; --i)
PassResult LoopFusion::runOnMLFunction(MLFunction *f) {
FusionCandidate candidate;
if (!MatchTestPattern(f, &candidate))
return failure();
// TODO(andydavis) Add checks for fusion-preventing dependences and ordering
// constraints which would prevent fusion.
// TODO(andydavis) This check if overly conservative for now. Support fusing
// statements with compatible dependences (i.e. statements where the
// dependence between the statements does not reverse direction when the
// statements are fused into the same loop).
if (!checkMemrefAccessDependence(candidate.accessA, candidate.accessB)) {
// Current conservatinve test policy: No dependence exists between accesses
// in different loop nests -> fuse loops.
return success();
static PassRegistration<LoopFusion> pass("loop-fusion", "Fuse loop nests");