blob: 7c07060386cbc9fff02993427646c8c3b6c09801 [file] [log] [blame]
//===- Utils.cpp ---- Misc utilities for analysis -------------------------===//
//
// Copyright 2019 The MLIR Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
//
// This file implements miscellaneous analysis routines for non-loop IR
// structures.
//
//===----------------------------------------------------------------------===//
#include "mlir/Analysis/Utils.h"
#include "mlir/Analysis/AffineAnalysis.h"
#include "mlir/Analysis/AffineStructures.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/StandardOps/StandardOps.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "analysis-utils"
using namespace mlir;
/// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from
/// the outermost 'for' instruction to the innermost one.
void mlir::getLoopIVs(const Instruction &inst,
SmallVectorImpl<ForInst *> *loops) {
auto *currInst = inst.getParentInst();
ForInst *currForInst;
// Traverse up the hierarchy collecing all 'for' instruction while skipping
// over 'if' instructions.
while (currInst && ((currForInst = dyn_cast<ForInst>(currInst)) ||
isa<IfInst>(currInst))) {
if (currForInst)
loops->push_back(currForInst);
currInst = currInst->getParentInst();
}
std::reverse(loops->begin(), loops->end());
}
unsigned MemRefRegion::getRank() const {
return memref->getType().cast<MemRefType>().getRank();
}
Optional<int64_t> MemRefRegion::getBoundingConstantSizeAndShape(
SmallVectorImpl<int> *shape,
std::vector<SmallVector<int64_t, 4>> *lbs) const {
auto memRefType = memref->getType().cast<MemRefType>();
unsigned rank = memRefType.getRank();
shape->reserve(rank);
// Find a constant upper bound on the extent of this memref region along each
// dimension.
int64_t numElements = 1;
int64_t diffConstant;
for (unsigned d = 0; d < rank; d++) {
SmallVector<int64_t, 4> lb;
Optional<int64_t> diff = cst.getConstantBoundOnDimSize(d, &lb);
if (diff.hasValue()) {
diffConstant = diff.getValue();
} else {
// If no constant bound is found, then it can always be bound by the
// memref's dim size if the latter has a constant size along this dim.
auto dimSize = memRefType.getDimSize(d);
if (dimSize == -1)
return None;
diffConstant = dimSize;
// Lower bound becomes 0.
lb.resize(cst.getNumSymbolIds() + 1, 0);
}
numElements *= diffConstant;
if (lbs) {
lbs->push_back(lb);
}
if (shape) {
shape->push_back(diffConstant);
}
}
return numElements;
}
/// Computes the memory region accessed by this memref with the region
/// represented as constraints symbolic/parameteric in 'loopDepth' loops
/// surrounding opInst and any additional Function symbols. Returns false if
/// this fails due to yet unimplemented cases.
// For example, the memref region for this load operation at loopDepth = 1 will
// be as below:
//
// for %i = 0 to 32 {
// for %ii = %i to (d0) -> (d0 + 8) (%i) {
// load %A[%ii]
// }
// }
//
// region: {memref = %A, write = false, {%i <= m0 <= %i + 7} }
// The last field is a 2-d FlatAffineConstraints symbolic in %i.
//
// TODO(bondhugula): extend this to any other memref dereferencing ops
// (dma_start, dma_wait).
bool mlir::getMemRefRegion(OperationInst *opInst, unsigned loopDepth,
MemRefRegion *region) {
OpPointer<LoadOp> loadOp;
OpPointer<StoreOp> storeOp;
unsigned rank;
SmallVector<Value *, 4> indices;
if ((loadOp = opInst->dyn_cast<LoadOp>())) {
rank = loadOp->getMemRefType().getRank();
for (auto *index : loadOp->getIndices()) {
indices.push_back(index);
}
region->memref = loadOp->getMemRef();
region->setWrite(false);
} else if ((storeOp = opInst->dyn_cast<StoreOp>())) {
rank = storeOp->getMemRefType().getRank();
for (auto *index : storeOp->getIndices()) {
indices.push_back(index);
}
region->memref = storeOp->getMemRef();
region->setWrite(true);
} else {
return false;
}
// Build the constraints for this region.
FlatAffineConstraints *regionCst = region->getConstraints();
FuncBuilder b(opInst);
auto idMap = b.getMultiDimIdentityMap(rank);
// Initialize 'accessValueMap' and compose with reachable AffineApplyOps.
AffineValueMap accessValueMap(idMap, indices);
forwardSubstituteReachableOps(&accessValueMap);
AffineMap accessMap = accessValueMap.getAffineMap();
// We'll first associate the dims and symbols of the access map to the dims
// and symbols resp. of regionCst. This will change below once regionCst is
// fully constructed out.
regionCst->reset(accessMap.getNumDims(), accessMap.getNumSymbols(), 0,
accessValueMap.getOperands());
// Add equality constraints.
unsigned numDims = accessMap.getNumDims();
unsigned numSymbols = accessMap.getNumSymbols();
// Add inequalties for loop lower/upper bounds.
for (unsigned i = 0; i < numDims + numSymbols; ++i) {
if (auto *loop = dyn_cast<ForInst>(accessValueMap.getOperand(i))) {
// Note that regionCst can now have more dimensions than accessMap if the
// bounds expressions involve outer loops or other symbols.
// TODO(bondhugula): rewrite this to use getInstIndexSet; this way
// conditionals will be handled when the latter supports it.
if (!regionCst->addForInstDomain(*loop))
return false;
} else {
// Has to be a valid symbol.
auto *symbol = accessValueMap.getOperand(i);
assert(symbol->isValidSymbol());
// Check if the symbol is a constant.
if (auto *opInst = symbol->getDefiningInst()) {
if (auto constOp = opInst->dyn_cast<ConstantIndexOp>()) {
regionCst->setIdToConstant(*symbol, constOp->getValue());
}
}
}
}
// Add access function equalities to connect loop IVs to data dimensions.
if (!regionCst->composeMap(&accessValueMap)) {
LLVM_DEBUG(llvm::dbgs() << "getMemRefRegion: compose affine map failed\n");
return false;
}
// Eliminate any loop IVs other than the outermost 'loopDepth' IVs, on which
// this memref region is symbolic.
SmallVector<ForInst *, 4> outerIVs;
getLoopIVs(*opInst, &outerIVs);
outerIVs.resize(loopDepth);
for (auto *operand : accessValueMap.getOperands()) {
ForInst *iv;
if ((iv = dyn_cast<ForInst>(operand)) &&
std::find(outerIVs.begin(), outerIVs.end(), iv) == outerIVs.end()) {
regionCst->projectOut(operand);
}
}
// Project out any local variables (these would have been added for any
// mod/divs).
regionCst->projectOut(regionCst->getNumDimIds() +
regionCst->getNumSymbolIds(),
regionCst->getNumLocalIds());
// Set all identifiers appearing after the first 'rank' identifiers as
// symbolic identifiers - so that the ones correspoding to the memref
// dimensions are the dimensional identifiers for the memref region.
regionCst->setDimSymbolSeparation(regionCst->getNumIds() - rank);
// Constant fold any symbolic identifiers.
regionCst->constantFoldIdRange(/*pos=*/regionCst->getNumDimIds(),
/*num=*/regionCst->getNumSymbolIds());
assert(regionCst->getNumDimIds() == rank && "unexpected MemRefRegion format");
return true;
}
/// Returns the size of memref data in bytes if it's statically shaped, None
/// otherwise. If the element of the memref has vector type, takes into account
/// size of the vector as well.
Optional<uint64_t> mlir::getMemRefSizeInBytes(MemRefType memRefType) {
if (memRefType.getNumDynamicDims() > 0)
return None;
auto elementType = memRefType.getElementType();
if (!elementType.isIntOrFloat() && !elementType.isa<VectorType>())
return None;
uint64_t sizeInBits;
if (elementType.isIntOrFloat()) {
sizeInBits = elementType.getIntOrFloatBitWidth();
} else {
auto vectorType = elementType.cast<VectorType>();
sizeInBits =
vectorType.getElementTypeBitWidth() * vectorType.getNumElements();
}
for (unsigned i = 0, e = memRefType.getRank(); i < e; i++) {
sizeInBits = sizeInBits * memRefType.getDimSize(i);
}
return llvm::divideCeil(sizeInBits, 8);
}
template <typename LoadOrStoreOpPointer>
bool mlir::boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp,
bool emitError) {
static_assert(
std::is_same<LoadOrStoreOpPointer, OpPointer<LoadOp>>::value ||
std::is_same<LoadOrStoreOpPointer, OpPointer<StoreOp>>::value,
"function argument should be either a LoadOp or a StoreOp");
OperationInst *opInst = loadOrStoreOp->getInstruction();
MemRefRegion region;
if (!getMemRefRegion(opInst, /*loopDepth=*/0, &region))
return false;
LLVM_DEBUG(llvm::dbgs() << "Memory region");
LLVM_DEBUG(region.getConstraints()->dump());
bool outOfBounds = false;
unsigned rank = loadOrStoreOp->getMemRefType().getRank();
// For each dimension, check for out of bounds.
for (unsigned r = 0; r < rank; r++) {
FlatAffineConstraints ucst(*region.getConstraints());
// Intersect memory region with constraint capturing out of bounds (both out
// of upper and out of lower), and check if the constraint system is
// feasible. If it is, there is at least one point out of bounds.
SmallVector<int64_t, 4> ineq(rank + 1, 0);
int dimSize = loadOrStoreOp->getMemRefType().getDimSize(r);
// TODO(bondhugula): handle dynamic dim sizes.
if (dimSize == -1)
continue;
// Check for overflow: d_i >= memref dim size.
ucst.addConstantLowerBound(r, dimSize);
outOfBounds = !ucst.isEmpty();
if (outOfBounds && emitError) {
loadOrStoreOp->emitOpError(
"memref out of upper bound access along dimension #" + Twine(r + 1));
}
// Check for a negative index.
FlatAffineConstraints lcst(*region.getConstraints());
std::fill(ineq.begin(), ineq.end(), 0);
// d_i <= -1;
lcst.addConstantUpperBound(r, -1);
outOfBounds = !lcst.isEmpty();
if (outOfBounds && emitError) {
loadOrStoreOp->emitOpError(
"memref out of lower bound access along dimension #" + Twine(r + 1));
}
}
return outOfBounds;
}
// Explicitly instantiate the template so that the compiler knows we need them!
template bool mlir::boundCheckLoadOrStoreOp(OpPointer<LoadOp> loadOp,
bool emitError);
template bool mlir::boundCheckLoadOrStoreOp(OpPointer<StoreOp> storeOp,
bool emitError);
// Returns in 'positions' the Block positions of 'inst' in each ancestor
// Block from the Block containing instruction, stopping at 'limitBlock'.
static void findInstPosition(const Instruction *inst, Block *limitBlock,
SmallVectorImpl<unsigned> *positions) {
const Block *block = inst->getBlock();
while (block != limitBlock) {
int instPosInBlock = block->findInstPositionInBlock(*inst);
assert(instPosInBlock >= 0);
positions->push_back(instPosInBlock);
inst = block->getContainingInst();
block = inst->getBlock();
}
std::reverse(positions->begin(), positions->end());
}
// Returns the Instruction in a possibly nested set of Blocks, where the
// position of the instruction is represented by 'positions', which has a
// Block position for each level of nesting.
static Instruction *getInstAtPosition(ArrayRef<unsigned> positions,
unsigned level, Block *block) {
unsigned i = 0;
for (auto &inst : *block) {
if (i != positions[level]) {
++i;
continue;
}
if (level == positions.size() - 1)
return &inst;
if (auto *childForInst = dyn_cast<ForInst>(&inst))
return getInstAtPosition(positions, level + 1, childForInst->getBody());
if (auto *ifInst = dyn_cast<IfInst>(&inst)) {
auto *ret = getInstAtPosition(positions, level + 1, ifInst->getThen());
if (ret != nullptr)
return ret;
if (auto *elseClause = ifInst->getElse())
return getInstAtPosition(positions, level + 1, elseClause);
}
}
return nullptr;
}
// Computes memref dependence between 'srcAccess' and 'dstAccess' and uses the
// dependence constraint system to create AffineMaps with which to adjust the
// loop bounds of the inserted compution slice so that they are functions of the
// loop IVs and symbols of the loops surrounding 'dstAccess'.
ForInst *mlir::insertBackwardComputationSlice(MemRefAccess *srcAccess,
MemRefAccess *dstAccess,
unsigned srcLoopDepth,
unsigned dstLoopDepth) {
FlatAffineConstraints dependenceConstraints;
if (!checkMemrefAccessDependence(*srcAccess, *dstAccess, /*loopDepth=*/1,
&dependenceConstraints,
/*dependenceComponents=*/nullptr)) {
return nullptr;
}
// Get loop nest surrounding src operation.
SmallVector<ForInst *, 4> srcLoopNest;
getLoopIVs(*srcAccess->opInst, &srcLoopNest);
unsigned srcLoopNestSize = srcLoopNest.size();
assert(srcLoopDepth <= srcLoopNestSize);
// Get loop nest surrounding dst operation.
SmallVector<ForInst *, 4> dstLoopNest;
getLoopIVs(*dstAccess->opInst, &dstLoopNest);
unsigned dstLoopNestSize = dstLoopNest.size();
(void)dstLoopNestSize;
assert(dstLoopDepth > 0);
assert(dstLoopDepth <= dstLoopNestSize);
// Solve for src IVs in terms of dst IVs, symbols and constants.
SmallVector<AffineMap, 4> srcIvMaps(srcLoopNestSize, AffineMap::Null());
std::vector<SmallVector<Value *, 2>> srcIvOperands(srcLoopNestSize);
for (unsigned i = 0; i < srcLoopNestSize; ++i) {
// Skip IVs which are greater than requested loop depth.
if (i >= srcLoopDepth) {
srcIvMaps[i] = AffineMap::Null();
continue;
}
auto cst = dependenceConstraints.clone();
for (int j = srcLoopNestSize - 1; j >= 0; --j) {
if (i != j)
cst->projectOut(j);
}
// TODO(andydavis) Check for case with two equalities where we have
// set on IV to a constant. Set a constant IV map for these cases.
if (cst->getNumEqualities() != 1) {
srcIvMaps[i] = AffineMap::Null();
continue;
}
SmallVector<unsigned, 2> nonZeroDimIds;
SmallVector<unsigned, 2> nonZeroSymbolIds;
srcIvMaps[i] = cst->toAffineMapFromEq(0, 0, srcAccess->opInst->getContext(),
&nonZeroDimIds, &nonZeroSymbolIds);
if (srcIvMaps[i] == AffineMap::Null()) {
continue;
}
// Add operands for all non-zero dst dims and symbols.
// TODO(andydavis) Add local variable support.
for (auto dimId : nonZeroDimIds) {
if (dimId - 1 >= dstLoopDepth) {
// This src IV has a dependence on dst IV dstLoopDepth where it will
// be inserted. So we cannot slice the iteration space at srcLoopDepth,
// and also insert it into the dst loop nest at 'dstLoopDepth'.
return nullptr;
}
srcIvOperands[i].push_back(dstLoopNest[dimId - 1]);
}
// TODO(andydavis) Add symbols from the access function. Ideally, we
// should be able to query the constaint system for the Value associated
// with a symbol identifiers in 'nonZeroSymbolIds'.
}
// Find the inst block positions of 'srcAccess->opInst' within 'srcLoopNest'.
SmallVector<unsigned, 4> positions;
findInstPosition(srcAccess->opInst, srcLoopNest[0]->getBlock(), &positions);
// Clone src loop nest and insert it a the beginning of the instruction block
// of the loop at 'dstLoopDepth' in 'dstLoopNest'.
auto *dstForInst = dstLoopNest[dstLoopDepth - 1];
FuncBuilder b(dstForInst->getBody(), dstForInst->getBody()->begin());
DenseMap<const Value *, Value *> operandMap;
auto *sliceLoopNest = cast<ForInst>(b.clone(*srcLoopNest[0], operandMap));
// Lookup inst in cloned 'sliceLoopNest' at 'positions'.
Instruction *sliceInst =
getInstAtPosition(positions, /*level=*/0, sliceLoopNest->getBody());
// Get loop nest surrounding 'sliceInst'.
SmallVector<ForInst *, 4> sliceSurroundingLoops;
getLoopIVs(*sliceInst, &sliceSurroundingLoops);
unsigned sliceSurroundingLoopsSize = sliceSurroundingLoops.size();
(void)sliceSurroundingLoopsSize;
// Update loop bounds for loops in 'sliceLoopNest'.
unsigned sliceLoopLimit = dstLoopDepth + srcLoopNestSize;
assert(sliceLoopLimit <= sliceSurroundingLoopsSize);
for (unsigned i = dstLoopDepth; i < sliceLoopLimit; ++i) {
auto *forInst = sliceSurroundingLoops[i];
unsigned index = i - dstLoopDepth;
AffineMap lbMap = srcIvMaps[index];
if (lbMap == AffineMap::Null())
continue;
forInst->setLowerBound(srcIvOperands[index], lbMap);
// Create upper bound map with is lower bound map + 1;
assert(lbMap.getNumResults() == 1);
AffineExpr ubResultExpr = lbMap.getResult(0) + 1;
AffineMap ubMap = AffineMap::get(lbMap.getNumDims(), lbMap.getNumSymbols(),
{ubResultExpr}, {});
forInst->setUpperBound(srcIvOperands[index], ubMap);
}
return sliceLoopNest;
}
void mlir::getMemRefAccess(OperationInst *loadOrStoreOpInst,
MemRefAccess *access) {
if (auto loadOp = loadOrStoreOpInst->dyn_cast<LoadOp>()) {
access->memref = loadOp->getMemRef();
access->opInst = loadOrStoreOpInst;
auto loadMemrefType = loadOp->getMemRefType();
access->indices.reserve(loadMemrefType.getRank());
for (auto *index : loadOp->getIndices()) {
access->indices.push_back(index);
}
} else {
assert(loadOrStoreOpInst->isa<StoreOp>() && "load/store op expected");
auto storeOp = loadOrStoreOpInst->dyn_cast<StoreOp>();
access->opInst = loadOrStoreOpInst;
access->memref = storeOp->getMemRef();
auto storeMemrefType = storeOp->getMemRefType();
access->indices.reserve(storeMemrefType.getRank());
for (auto *index : storeOp->getIndices()) {
access->indices.push_back(index);
}
}
}
/// Returns the nesting depth of this statement, i.e., the number of loops
/// surrounding this statement.
unsigned mlir::getNestingDepth(const Instruction &stmt) {
const Instruction *currInst = &stmt;
unsigned depth = 0;
while ((currInst = currInst->getParentInst())) {
if (isa<ForInst>(currInst))
depth++;
}
return depth;
}
/// Returns the number of surrounding loops common to 'loopsA' and 'loopsB',
/// where each lists loops from outer-most to inner-most in loop nest.
unsigned mlir::getNumCommonSurroundingLoops(const Instruction &A,
const Instruction &B) {
SmallVector<ForInst *, 4> loopsA, loopsB;
getLoopIVs(A, &loopsA);
getLoopIVs(B, &loopsB);
unsigned minNumLoops = std::min(loopsA.size(), loopsB.size());
unsigned numCommonLoops = 0;
for (unsigned i = 0; i < minNumLoops; ++i) {
if (loopsA[i] != loopsB[i])
break;
++numCommonLoops;
}
return numCommonLoops;
}