| //===- Utils.cpp ---- Misc utilities for analysis -------------------------===// |
| // |
| // Copyright 2019 The MLIR Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // ============================================================================= |
| // |
| // This file implements miscellaneous analysis routines for non-loop IR |
| // structures. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "mlir/Analysis/Utils.h" |
| |
| #include "mlir/AffineOps/AffineOps.h" |
| #include "mlir/Analysis/AffineAnalysis.h" |
| #include "mlir/Analysis/AffineStructures.h" |
| #include "mlir/IR/Builders.h" |
| #include "mlir/StandardOps/Ops.h" |
| #include "llvm/ADT/DenseMap.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/raw_ostream.h" |
| |
| #define DEBUG_TYPE "analysis-utils" |
| |
| using namespace mlir; |
| |
| using llvm::SmallDenseMap; |
| |
| /// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from |
| /// the outermost 'for' instruction to the innermost one. |
| void mlir::getLoopIVs(const Instruction &inst, |
| SmallVectorImpl<OpPointer<AffineForOp>> *loops) { |
| auto *currInst = inst.getParentInst(); |
| OpPointer<AffineForOp> currAffineForOp; |
| // Traverse up the hierarchy collecing all 'for' instruction while |
| // skipping over 'if' instructions. |
| while (currInst && ((currAffineForOp = currInst->dyn_cast<AffineForOp>()) || |
| currInst->isa<AffineIfOp>())) { |
| if (currAffineForOp) |
| loops->push_back(currAffineForOp); |
| currInst = currInst->getParentInst(); |
| } |
| std::reverse(loops->begin(), loops->end()); |
| } |
| |
| // Populates 'cst' with FlatAffineConstraints which represent slice bounds. |
| LogicalResult |
| ComputationSliceState::getAsConstraints(FlatAffineConstraints *cst) { |
| assert(!lbOperands.empty()); |
| // Adds src 'ivs' as dimension identifiers in 'cst'. |
| unsigned numDims = ivs.size(); |
| // Adds operands (dst ivs and symbols) as symbols in 'cst'. |
| unsigned numSymbols = lbOperands[0].size(); |
| |
| SmallVector<Value *, 4> values(ivs); |
| // Append 'ivs' then 'operands' to 'values'. |
| values.append(lbOperands[0].begin(), lbOperands[0].end()); |
| cst->reset(numDims, numSymbols, 0, values); |
| |
| // Add loop bound constraints for values which are loop IVs and equality |
| // constraints for symbols which are constants. |
| for (const auto &value : values) { |
| assert(cst->containsId(*value) && "value expected to be present"); |
| if (isValidSymbol(value)) { |
| // Check if the symbol is a constant. |
| if (auto *inst = value->getDefiningInst()) { |
| if (auto constOp = inst->dyn_cast<ConstantIndexOp>()) { |
| cst->setIdToConstant(*value, constOp->getValue()); |
| } |
| } |
| } else { |
| if (auto loop = getForInductionVarOwner(value)) { |
| if (failed(cst->addAffineForOpDomain(loop))) |
| return failure(); |
| } |
| } |
| } |
| |
| // Add slices bounds on 'ivs' using maps 'lbs'/'ubs' with 'lbOperands[0]' |
| LogicalResult ret = cst->addSliceBounds(ivs, lbs, ubs, lbOperands[0]); |
| assert(succeeded(ret) && |
| "should not fail as we never have semi-affine slice maps"); |
| (void)ret; |
| return success(); |
| } |
| |
| // Clears state bounds and operand state. |
| void ComputationSliceState::clearBounds() { |
| lbs.clear(); |
| ubs.clear(); |
| lbOperands.clear(); |
| ubOperands.clear(); |
| } |
| |
| unsigned MemRefRegion::getRank() const { |
| return memref->getType().cast<MemRefType>().getRank(); |
| } |
| |
| Optional<int64_t> MemRefRegion::getConstantBoundingSizeAndShape( |
| SmallVectorImpl<int64_t> *shape, std::vector<SmallVector<int64_t, 4>> *lbs, |
| SmallVectorImpl<int64_t> *lbDivisors) const { |
| auto memRefType = memref->getType().cast<MemRefType>(); |
| unsigned rank = memRefType.getRank(); |
| if (shape) |
| shape->reserve(rank); |
| |
| assert(rank == cst.getNumDimIds() && "inconsistent memref region"); |
| |
| // Find a constant upper bound on the extent of this memref region along each |
| // dimension. |
| int64_t numElements = 1; |
| int64_t diffConstant; |
| int64_t lbDivisor; |
| for (unsigned d = 0; d < rank; d++) { |
| SmallVector<int64_t, 4> lb; |
| Optional<int64_t> diff = cst.getConstantBoundOnDimSize(d, &lb, &lbDivisor); |
| if (diff.hasValue()) { |
| diffConstant = diff.getValue(); |
| assert(lbDivisor > 0); |
| } else { |
| // If no constant bound is found, then it can always be bound by the |
| // memref's dim size if the latter has a constant size along this dim. |
| auto dimSize = memRefType.getDimSize(d); |
| if (dimSize == -1) |
| return None; |
| diffConstant = dimSize; |
| // Lower bound becomes 0. |
| lb.resize(cst.getNumSymbolIds() + 1, 0); |
| lbDivisor = 1; |
| } |
| numElements *= diffConstant; |
| if (lbs) { |
| lbs->push_back(lb); |
| assert(lbDivisors && "both lbs and lbDivisor or none"); |
| lbDivisors->push_back(lbDivisor); |
| } |
| if (shape) { |
| shape->push_back(diffConstant); |
| } |
| } |
| return numElements; |
| } |
| |
| LogicalResult MemRefRegion::unionBoundingBox(const MemRefRegion &other) { |
| assert(memref == other.memref); |
| return cst.unionBoundingBox(*other.getConstraints()); |
| } |
| |
| /// Computes the memory region accessed by this memref with the region |
| /// represented as constraints symbolic/parameteric in 'loopDepth' loops |
| /// surrounding opInst and any additional Function symbols. |
| // For example, the memref region for this load operation at loopDepth = 1 will |
| // be as below: |
| // |
| // for %i = 0 to 32 { |
| // for %ii = %i to (d0) -> (d0 + 8) (%i) { |
| // load %A[%ii] |
| // } |
| // } |
| // |
| // region: {memref = %A, write = false, {%i <= m0 <= %i + 7} } |
| // The last field is a 2-d FlatAffineConstraints symbolic in %i. |
| // |
| // TODO(bondhugula): extend this to any other memref dereferencing ops |
| // (dma_start, dma_wait). |
| LogicalResult MemRefRegion::compute(Instruction *inst, unsigned loopDepth, |
| ComputationSliceState *sliceState) { |
| assert((inst->isa<LoadOp>() || inst->isa<StoreOp>()) && |
| "load/store op expected"); |
| |
| MemRefAccess access(inst); |
| memref = access.memref; |
| write = access.isStore(); |
| |
| unsigned rank = access.getRank(); |
| |
| LLVM_DEBUG(llvm::dbgs() << "MemRefRegion::compute: " << *inst |
| << "depth: " << loopDepth << "\n";); |
| |
| if (rank == 0) { |
| SmallVector<OpPointer<AffineForOp>, 4> ivs; |
| getLoopIVs(*inst, &ivs); |
| SmallVector<Value *, 8> regionSymbols; |
| extractForInductionVars(ivs, ®ionSymbols); |
| // A rank 0 memref has a 0-d region. |
| cst.reset(rank, loopDepth, 0, regionSymbols); |
| return success(); |
| } |
| |
| // Build the constraints for this region. |
| AffineValueMap accessValueMap; |
| access.getAccessMap(&accessValueMap); |
| AffineMap accessMap = accessValueMap.getAffineMap(); |
| |
| unsigned numDims = accessMap.getNumDims(); |
| unsigned numSymbols = accessMap.getNumSymbols(); |
| unsigned numOperands = accessValueMap.getNumOperands(); |
| // Merge operands with slice operands. |
| SmallVector<Value *, 4> operands; |
| operands.resize(numOperands); |
| for (unsigned i = 0; i < numOperands; ++i) |
| operands[i] = accessValueMap.getOperand(i); |
| |
| if (sliceState != nullptr) { |
| operands.reserve(operands.size() + sliceState->lbOperands[0].size()); |
| // Append slice operands to 'operands' as symbols. |
| for (auto extraOperand : sliceState->lbOperands[0]) { |
| if (!llvm::is_contained(operands, extraOperand)) { |
| operands.push_back(extraOperand); |
| numSymbols++; |
| } |
| } |
| } |
| // We'll first associate the dims and symbols of the access map to the dims |
| // and symbols resp. of cst. This will change below once cst is |
| // fully constructed out. |
| cst.reset(numDims, numSymbols, 0, operands); |
| |
| // Add equality constraints. |
| // Add inequalties for loop lower/upper bounds. |
| for (unsigned i = 0; i < numDims + numSymbols; ++i) { |
| auto *operand = operands[i]; |
| if (auto loop = getForInductionVarOwner(operand)) { |
| // Note that cst can now have more dimensions than accessMap if the |
| // bounds expressions involve outer loops or other symbols. |
| // TODO(bondhugula): rewrite this to use getInstIndexSet; this way |
| // conditionals will be handled when the latter supports it. |
| if (failed(cst.addAffineForOpDomain(loop))) |
| return failure(); |
| } else { |
| // Has to be a valid symbol. |
| auto *symbol = operand; |
| assert(isValidSymbol(symbol)); |
| // Check if the symbol is a constant. |
| if (auto *inst = symbol->getDefiningInst()) { |
| if (auto constOp = inst->dyn_cast<ConstantIndexOp>()) { |
| cst.setIdToConstant(*symbol, constOp->getValue()); |
| } |
| } |
| } |
| } |
| |
| // Add lower/upper bounds on loop IVs using bounds from 'sliceState'. |
| if (sliceState != nullptr) { |
| // Add dim and symbol slice operands. |
| for (const auto &operand : sliceState->lbOperands[0]) { |
| cst.addInductionVarOrTerminalSymbol(const_cast<Value *>(operand)); |
| } |
| // Add upper/lower bounds from 'sliceState' to 'cst'. |
| LogicalResult ret = |
| cst.addSliceBounds(sliceState->ivs, sliceState->lbs, sliceState->ubs, |
| sliceState->lbOperands[0]); |
| assert(succeeded(ret) && |
| "should not fail as we never have semi-affine slice maps"); |
| (void)ret; |
| } |
| |
| // Add access function equalities to connect loop IVs to data dimensions. |
| if (failed(cst.composeMap(&accessValueMap))) { |
| inst->emitError("getMemRefRegion: compose affine map failed"); |
| LLVM_DEBUG(accessValueMap.getAffineMap().dump()); |
| return failure(); |
| } |
| |
| // Set all identifiers appearing after the first 'rank' identifiers as |
| // symbolic identifiers - so that the ones corresponding to the memref |
| // dimensions are the dimensional identifiers for the memref region. |
| cst.setDimSymbolSeparation(cst.getNumDimAndSymbolIds() - rank); |
| |
| // Eliminate any loop IVs other than the outermost 'loopDepth' IVs, on which |
| // this memref region is symbolic. |
| SmallVector<OpPointer<AffineForOp>, 4> enclosingIVs; |
| getLoopIVs(*inst, &enclosingIVs); |
| assert(loopDepth <= enclosingIVs.size() && "invalid loop depth"); |
| enclosingIVs.resize(loopDepth); |
| SmallVector<Value *, 4> ids; |
| cst.getIdValues(cst.getNumDimIds(), cst.getNumDimAndSymbolIds(), &ids); |
| for (auto *id : ids) { |
| OpPointer<AffineForOp> iv; |
| if ((iv = getForInductionVarOwner(id)) && |
| llvm::is_contained(enclosingIVs, iv) == false) { |
| cst.projectOut(id); |
| } |
| } |
| |
| // Project out any local variables (these would have been added for any |
| // mod/divs). |
| cst.projectOut(cst.getNumDimAndSymbolIds(), cst.getNumLocalIds()); |
| |
| // Constant fold any symbolic identifiers. |
| cst.constantFoldIdRange(/*pos=*/cst.getNumDimIds(), |
| /*num=*/cst.getNumSymbolIds()); |
| |
| assert(cst.getNumDimIds() == rank && "unexpected MemRefRegion format"); |
| |
| LLVM_DEBUG(llvm::dbgs() << "Memory region:\n"); |
| LLVM_DEBUG(cst.dump()); |
| return success(); |
| } |
| |
| // TODO(mlir-team): improve/complete this when we have target data. |
| static unsigned getMemRefEltSizeInBytes(MemRefType memRefType) { |
| auto elementType = memRefType.getElementType(); |
| |
| unsigned sizeInBits; |
| if (elementType.isIntOrFloat()) { |
| sizeInBits = elementType.getIntOrFloatBitWidth(); |
| } else { |
| auto vectorType = elementType.cast<VectorType>(); |
| sizeInBits = |
| vectorType.getElementTypeBitWidth() * vectorType.getNumElements(); |
| } |
| return llvm::divideCeil(sizeInBits, 8); |
| } |
| |
| // Returns the size of the region. |
| Optional<int64_t> MemRefRegion::getRegionSize() { |
| auto memRefType = memref->getType().cast<MemRefType>(); |
| |
| auto layoutMaps = memRefType.getAffineMaps(); |
| if (layoutMaps.size() > 1 || |
| (layoutMaps.size() == 1 && !layoutMaps[0].isIdentity())) { |
| LLVM_DEBUG(llvm::dbgs() << "Non-identity layout map not yet supported\n"); |
| return false; |
| } |
| |
| // Indices to use for the DmaStart op. |
| // Indices for the original memref being DMAed from/to. |
| SmallVector<Value *, 4> memIndices; |
| // Indices for the faster buffer being DMAed into/from. |
| SmallVector<Value *, 4> bufIndices; |
| |
| // Compute the extents of the buffer. |
| Optional<int64_t> numElements = getConstantBoundingSizeAndShape(); |
| if (!numElements.hasValue()) { |
| LLVM_DEBUG(llvm::dbgs() << "Dynamic shapes not yet supported\n"); |
| return None; |
| } |
| return getMemRefEltSizeInBytes(memRefType) * numElements.getValue(); |
| } |
| |
| /// Returns the size of memref data in bytes if it's statically shaped, None |
| /// otherwise. If the element of the memref has vector type, takes into account |
| /// size of the vector as well. |
| // TODO(mlir-team): improve/complete this when we have target data. |
| Optional<uint64_t> mlir::getMemRefSizeInBytes(MemRefType memRefType) { |
| if (memRefType.getNumDynamicDims() > 0) |
| return None; |
| auto elementType = memRefType.getElementType(); |
| if (!elementType.isIntOrFloat() && !elementType.isa<VectorType>()) |
| return None; |
| |
| uint64_t sizeInBytes = getMemRefEltSizeInBytes(memRefType); |
| for (unsigned i = 0, e = memRefType.getRank(); i < e; i++) { |
| sizeInBytes = sizeInBytes * memRefType.getDimSize(i); |
| } |
| return sizeInBytes; |
| } |
| |
| template <typename LoadOrStoreOpPointer> |
| LogicalResult mlir::boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp, |
| bool emitError) { |
| static_assert( |
| std::is_same<LoadOrStoreOpPointer, OpPointer<LoadOp>>::value || |
| std::is_same<LoadOrStoreOpPointer, OpPointer<StoreOp>>::value, |
| "argument should be either a LoadOp or a StoreOp"); |
| |
| Instruction *opInst = loadOrStoreOp->getInstruction(); |
| |
| MemRefRegion region(opInst->getLoc()); |
| if (failed(region.compute(opInst, /*loopDepth=*/0))) |
| return success(); |
| |
| LLVM_DEBUG(llvm::dbgs() << "Memory region"); |
| LLVM_DEBUG(region.getConstraints()->dump()); |
| |
| bool outOfBounds = false; |
| unsigned rank = loadOrStoreOp->getMemRefType().getRank(); |
| |
| // For each dimension, check for out of bounds. |
| for (unsigned r = 0; r < rank; r++) { |
| FlatAffineConstraints ucst(*region.getConstraints()); |
| |
| // Intersect memory region with constraint capturing out of bounds (both out |
| // of upper and out of lower), and check if the constraint system is |
| // feasible. If it is, there is at least one point out of bounds. |
| SmallVector<int64_t, 4> ineq(rank + 1, 0); |
| int64_t dimSize = loadOrStoreOp->getMemRefType().getDimSize(r); |
| // TODO(bondhugula): handle dynamic dim sizes. |
| if (dimSize == -1) |
| continue; |
| |
| // Check for overflow: d_i >= memref dim size. |
| ucst.addConstantLowerBound(r, dimSize); |
| outOfBounds = !ucst.isEmpty(); |
| if (outOfBounds && emitError) { |
| loadOrStoreOp->emitOpError( |
| "memref out of upper bound access along dimension #" + Twine(r + 1)); |
| } |
| |
| // Check for a negative index. |
| FlatAffineConstraints lcst(*region.getConstraints()); |
| std::fill(ineq.begin(), ineq.end(), 0); |
| // d_i <= -1; |
| lcst.addConstantUpperBound(r, -1); |
| outOfBounds = !lcst.isEmpty(); |
| if (outOfBounds && emitError) { |
| loadOrStoreOp->emitOpError( |
| "memref out of lower bound access along dimension #" + Twine(r + 1)); |
| } |
| } |
| return failure(outOfBounds); |
| } |
| |
| // Explicitly instantiate the template so that the compiler knows we need them! |
| template LogicalResult mlir::boundCheckLoadOrStoreOp(OpPointer<LoadOp> loadOp, |
| bool emitError); |
| template LogicalResult mlir::boundCheckLoadOrStoreOp(OpPointer<StoreOp> storeOp, |
| bool emitError); |
| |
| // Returns in 'positions' the Block positions of 'inst' in each ancestor |
| // Block from the Block containing instruction, stopping at 'limitBlock'. |
| static void findInstPosition(const Instruction *inst, Block *limitBlock, |
| SmallVectorImpl<unsigned> *positions) { |
| const Block *block = inst->getBlock(); |
| while (block != limitBlock) { |
| // FIXME: This algorithm is unnecessarily O(n) and should be improved to not |
| // rely on linear scans. |
| int instPosInBlock = std::distance(block->begin(), inst->getIterator()); |
| positions->push_back(instPosInBlock); |
| inst = block->getContainingInst(); |
| block = inst->getBlock(); |
| } |
| std::reverse(positions->begin(), positions->end()); |
| } |
| |
| // Returns the Instruction in a possibly nested set of Blocks, where the |
| // position of the instruction is represented by 'positions', which has a |
| // Block position for each level of nesting. |
| static Instruction *getInstAtPosition(ArrayRef<unsigned> positions, |
| unsigned level, Block *block) { |
| unsigned i = 0; |
| for (auto &inst : *block) { |
| if (i != positions[level]) { |
| ++i; |
| continue; |
| } |
| if (level == positions.size() - 1) |
| return &inst; |
| if (auto childAffineForOp = inst.dyn_cast<AffineForOp>()) |
| return getInstAtPosition(positions, level + 1, |
| childAffineForOp->getBody()); |
| |
| for (auto &blockList : inst.getBlockLists()) { |
| for (auto &b : blockList) |
| if (auto *ret = getInstAtPosition(positions, level + 1, &b)) |
| return ret; |
| } |
| return nullptr; |
| } |
| return nullptr; |
| } |
| |
| const char *const kSliceFusionBarrierAttrName = "slice_fusion_barrier"; |
| // Computes memref dependence between 'srcAccess' and 'dstAccess', projects |
| // out any dst loop IVs at depth greater than 'dstLoopDepth', and computes slice |
| // bounds in 'sliceState' which represent the src IVs in terms of the dst IVs, |
| // symbols and constants. |
| LogicalResult mlir::getBackwardComputationSliceState( |
| const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, |
| unsigned dstLoopDepth, ComputationSliceState *sliceState) { |
| bool readReadAccesses = |
| srcAccess.opInst->isa<LoadOp>() && dstAccess.opInst->isa<LoadOp>(); |
| FlatAffineConstraints dependenceConstraints; |
| if (!checkMemrefAccessDependence( |
| srcAccess, dstAccess, /*loopDepth=*/1, &dependenceConstraints, |
| /*dependenceComponents=*/nullptr, /*allowRAR=*/readReadAccesses)) { |
| return failure(); |
| } |
| // Get loop nest surrounding src operation. |
| SmallVector<OpPointer<AffineForOp>, 4> srcLoopIVs; |
| getLoopIVs(*srcAccess.opInst, &srcLoopIVs); |
| unsigned numSrcLoopIVs = srcLoopIVs.size(); |
| |
| // Get loop nest surrounding dst operation. |
| SmallVector<OpPointer<AffineForOp>, 4> dstLoopIVs; |
| getLoopIVs(*dstAccess.opInst, &dstLoopIVs); |
| unsigned numDstLoopIVs = dstLoopIVs.size(); |
| if (dstLoopDepth > numDstLoopIVs) { |
| dstAccess.opInst->emitError("invalid destination loop depth"); |
| return failure(); |
| } |
| |
| // Project out dimensions other than those up to 'dstLoopDepth'. |
| dependenceConstraints.projectOut(numSrcLoopIVs + dstLoopDepth, |
| numDstLoopIVs - dstLoopDepth); |
| |
| // Add src loop IV values to 'sliceState'. |
| dependenceConstraints.getIdValues(0, numSrcLoopIVs, &sliceState->ivs); |
| |
| // Set up lower/upper bound affine maps for the slice. |
| sliceState->lbs.resize(numSrcLoopIVs, AffineMap()); |
| sliceState->ubs.resize(numSrcLoopIVs, AffineMap()); |
| |
| // Get bounds for src IVs in terms of dst IVs, symbols, and constants. |
| dependenceConstraints.getSliceBounds(numSrcLoopIVs, |
| srcAccess.opInst->getContext(), |
| &sliceState->lbs, &sliceState->ubs); |
| |
| // Set up bound operands for the slice's lower and upper bounds. |
| SmallVector<Value *, 4> sliceBoundOperands; |
| dependenceConstraints.getIdValues( |
| numSrcLoopIVs, dependenceConstraints.getNumDimAndSymbolIds(), |
| &sliceBoundOperands); |
| // Give each bound its own copy of 'sliceBoundOperands' for subsequent |
| // canonicalization. |
| sliceState->lbOperands.resize(numSrcLoopIVs, sliceBoundOperands); |
| sliceState->ubOperands.resize(numSrcLoopIVs, sliceBoundOperands); |
| |
| llvm::SmallDenseSet<Value *, 8> sequentialLoops; |
| if (readReadAccesses) { |
| // For read-read access pairs, clear any slice bounds on sequential loops. |
| // Get sequential loops in loop nest rooted at 'srcLoopIVs[0]'. |
| getSequentialLoops(srcLoopIVs[0], &sequentialLoops); |
| } |
| // Clear all sliced loop bounds beginning at the first sequential loop, or |
| // first loop with a slice fusion barrier attribute.. |
| // TODO(andydavis, bondhugula) Use MemRef read/write regions instead of |
| // using 'kSliceFusionBarrierAttrName'. |
| for (unsigned i = 0; i < numSrcLoopIVs; ++i) { |
| Value *iv = srcLoopIVs[i]->getInductionVar(); |
| if (sequentialLoops.count(iv) == 0 && |
| srcLoopIVs[i]->getAttr(kSliceFusionBarrierAttrName) == nullptr) |
| continue; |
| for (unsigned j = i; j < numSrcLoopIVs; ++j) { |
| sliceState->lbs[j] = AffineMap(); |
| sliceState->ubs[j] = AffineMap(); |
| } |
| break; |
| } |
| |
| return success(); |
| } |
| |
| /// Creates a computation slice of the loop nest surrounding 'srcOpInst', |
| /// updates the slice loop bounds with any non-null bound maps specified in |
| /// 'sliceState', and inserts this slice into the loop nest surrounding |
| /// 'dstOpInst' at loop depth 'dstLoopDepth'. |
| // TODO(andydavis,bondhugula): extend the slicing utility to compute slices that |
| // aren't necessarily a one-to-one relation b/w the source and destination. The |
| // relation between the source and destination could be many-to-many in general. |
| // TODO(andydavis,bondhugula): the slice computation is incorrect in the cases |
| // where the dependence from the source to the destination does not cover the |
| // entire destination index set. Subtract out the dependent destination |
| // iterations from destination index set and check for emptiness --- this is one |
| // solution. |
| OpPointer<AffineForOp> mlir::insertBackwardComputationSlice( |
| Instruction *srcOpInst, Instruction *dstOpInst, unsigned dstLoopDepth, |
| ComputationSliceState *sliceState) { |
| // Get loop nest surrounding src operation. |
| SmallVector<OpPointer<AffineForOp>, 4> srcLoopIVs; |
| getLoopIVs(*srcOpInst, &srcLoopIVs); |
| unsigned numSrcLoopIVs = srcLoopIVs.size(); |
| |
| // Get loop nest surrounding dst operation. |
| SmallVector<OpPointer<AffineForOp>, 4> dstLoopIVs; |
| getLoopIVs(*dstOpInst, &dstLoopIVs); |
| unsigned dstLoopIVsSize = dstLoopIVs.size(); |
| if (dstLoopDepth > dstLoopIVsSize) { |
| dstOpInst->emitError("invalid destination loop depth"); |
| return OpPointer<AffineForOp>(); |
| } |
| |
| // Find the inst block positions of 'srcOpInst' within 'srcLoopIVs'. |
| SmallVector<unsigned, 4> positions; |
| // TODO(andydavis): This code is incorrect since srcLoopIVs can be 0-d. |
| findInstPosition(srcOpInst, srcLoopIVs[0]->getInstruction()->getBlock(), |
| &positions); |
| |
| // Clone src loop nest and insert it a the beginning of the instruction block |
| // of the loop at 'dstLoopDepth' in 'dstLoopIVs'. |
| auto dstAffineForOp = dstLoopIVs[dstLoopDepth - 1]; |
| FuncBuilder b(dstAffineForOp->getBody(), dstAffineForOp->getBody()->begin()); |
| auto sliceLoopNest = |
| b.clone(*srcLoopIVs[0]->getInstruction())->cast<AffineForOp>(); |
| |
| Instruction *sliceInst = |
| getInstAtPosition(positions, /*level=*/0, sliceLoopNest->getBody()); |
| // Get loop nest surrounding 'sliceInst'. |
| SmallVector<OpPointer<AffineForOp>, 4> sliceSurroundingLoops; |
| getLoopIVs(*sliceInst, &sliceSurroundingLoops); |
| |
| // Sanity check. |
| unsigned sliceSurroundingLoopsSize = sliceSurroundingLoops.size(); |
| (void)sliceSurroundingLoopsSize; |
| assert(dstLoopDepth + numSrcLoopIVs >= sliceSurroundingLoopsSize); |
| unsigned sliceLoopLimit = dstLoopDepth + numSrcLoopIVs; |
| (void)sliceLoopLimit; |
| assert(sliceLoopLimit >= sliceSurroundingLoopsSize); |
| |
| // Update loop bounds for loops in 'sliceLoopNest'. |
| for (unsigned i = 0; i < numSrcLoopIVs; ++i) { |
| auto forOp = sliceSurroundingLoops[dstLoopDepth + i]; |
| if (AffineMap lbMap = sliceState->lbs[i]) |
| forOp->setLowerBound(sliceState->lbOperands[i], lbMap); |
| if (AffineMap ubMap = sliceState->ubs[i]) |
| forOp->setUpperBound(sliceState->ubOperands[i], ubMap); |
| } |
| return sliceLoopNest; |
| } |
| |
| // Constructs MemRefAccess populating it with the memref, its indices and |
| // opinst from 'loadOrStoreOpInst'. |
| MemRefAccess::MemRefAccess(Instruction *loadOrStoreOpInst) { |
| if (auto loadOp = loadOrStoreOpInst->dyn_cast<LoadOp>()) { |
| memref = loadOp->getMemRef(); |
| opInst = loadOrStoreOpInst; |
| auto loadMemrefType = loadOp->getMemRefType(); |
| indices.reserve(loadMemrefType.getRank()); |
| for (auto *index : loadOp->getIndices()) { |
| indices.push_back(index); |
| } |
| } else { |
| assert(loadOrStoreOpInst->isa<StoreOp>() && "load/store op expected"); |
| auto storeOp = loadOrStoreOpInst->dyn_cast<StoreOp>(); |
| opInst = loadOrStoreOpInst; |
| memref = storeOp->getMemRef(); |
| auto storeMemrefType = storeOp->getMemRefType(); |
| indices.reserve(storeMemrefType.getRank()); |
| for (auto *index : storeOp->getIndices()) { |
| indices.push_back(index); |
| } |
| } |
| } |
| |
| unsigned MemRefAccess::getRank() const { |
| return memref->getType().cast<MemRefType>().getRank(); |
| } |
| |
| bool MemRefAccess::isStore() const { return opInst->isa<StoreOp>(); } |
| |
| /// Returns the nesting depth of this statement, i.e., the number of loops |
| /// surrounding this statement. |
| unsigned mlir::getNestingDepth(const Instruction &inst) { |
| const Instruction *currInst = &inst; |
| unsigned depth = 0; |
| while ((currInst = currInst->getParentInst())) { |
| if (currInst->isa<AffineForOp>()) |
| depth++; |
| } |
| return depth; |
| } |
| |
| /// Returns the number of surrounding loops common to 'loopsA' and 'loopsB', |
| /// where each lists loops from outer-most to inner-most in loop nest. |
| unsigned mlir::getNumCommonSurroundingLoops(const Instruction &A, |
| const Instruction &B) { |
| SmallVector<OpPointer<AffineForOp>, 4> loopsA, loopsB; |
| getLoopIVs(A, &loopsA); |
| getLoopIVs(B, &loopsB); |
| |
| unsigned minNumLoops = std::min(loopsA.size(), loopsB.size()); |
| unsigned numCommonLoops = 0; |
| for (unsigned i = 0; i < minNumLoops; ++i) { |
| if (loopsA[i]->getInstruction() != loopsB[i]->getInstruction()) |
| break; |
| ++numCommonLoops; |
| } |
| return numCommonLoops; |
| } |
| |
| static Optional<int64_t> getMemoryFootprintBytes(const Block &block, |
| Block::const_iterator start, |
| Block::const_iterator end, |
| int memorySpace) { |
| SmallDenseMap<Value *, std::unique_ptr<MemRefRegion>, 4> regions; |
| |
| // Cast away constness since the walker uses non-const versions; but we |
| // guarantee that the visitor callback isn't mutating opInst. |
| auto *cStart = reinterpret_cast<Block::iterator *>(&start); |
| auto *cEnd = reinterpret_cast<Block::iterator *>(&end); |
| |
| // Walk this 'for' instruction to gather all memory regions. |
| bool error = false; |
| const_cast<Block *>(&block)->walk(*cStart, *cEnd, [&](Instruction *opInst) { |
| if (!opInst->isa<LoadOp>() && !opInst->isa<StoreOp>()) { |
| // Neither load nor a store op. |
| return; |
| } |
| |
| // Compute the memref region symbolic in any IVs enclosing this block. |
| auto region = std::make_unique<MemRefRegion>(opInst->getLoc()); |
| if (failed( |
| region->compute(opInst, |
| /*loopDepth=*/getNestingDepth(*block.begin())))) { |
| opInst->emitError("Error obtaining memory region\n"); |
| error = true; |
| return; |
| } |
| auto it = regions.find(region->memref); |
| if (it == regions.end()) { |
| regions[region->memref] = std::move(region); |
| } else if (failed(it->second->unionBoundingBox(*region))) { |
| opInst->emitError("Error performing a union on a memory region\n"); |
| error = true; |
| return; |
| } |
| }); |
| |
| if (error) |
| return None; |
| |
| int64_t totalSizeInBytes = 0; |
| for (const auto ®ion : regions) { |
| Optional<int64_t> size = region.second->getRegionSize(); |
| if (!size.hasValue()) |
| return None; |
| totalSizeInBytes += size.getValue(); |
| } |
| return totalSizeInBytes; |
| } |
| |
| Optional<int64_t> |
| mlir::getMemoryFootprintBytes(ConstOpPointer<AffineForOp> forOp, |
| int memorySpace) { |
| auto *forInst = forOp->getInstruction(); |
| return ::getMemoryFootprintBytes( |
| *forInst->getBlock(), Block::const_iterator(forInst), |
| std::next(Block::const_iterator(forInst)), memorySpace); |
| } |
| |
| /// Returns in 'sequentialLoops' all sequential loops in loop nest rooted |
| /// at 'forOp'. |
| void mlir::getSequentialLoops( |
| OpPointer<AffineForOp> forOp, |
| llvm::SmallDenseSet<Value *, 8> *sequentialLoops) { |
| forOp->getInstruction()->walk([&](Instruction *inst) { |
| if (auto innerFor = inst->dyn_cast<AffineForOp>()) |
| if (!isLoopParallel(innerFor)) |
| sequentialLoops->insert(innerFor->getInductionVar()); |
| }); |
| } |
| |
| /// Returns true if 'forOp' is parallel. |
| bool mlir::isLoopParallel(OpPointer<AffineForOp> forOp) { |
| // Collect all load and store ops in loop nest rooted at 'forOp'. |
| SmallVector<Instruction *, 8> loadAndStoreOpInsts; |
| forOp->getInstruction()->walk([&](Instruction *opInst) { |
| if (opInst->isa<LoadOp>() || opInst->isa<StoreOp>()) |
| loadAndStoreOpInsts.push_back(opInst); |
| }); |
| |
| // Dep check depth would be number of enclosing loops + 1. |
| unsigned depth = getNestingDepth(*forOp->getInstruction()) + 1; |
| |
| // Check dependences between all pairs of ops in 'loadAndStoreOpInsts'. |
| for (auto *srcOpInst : loadAndStoreOpInsts) { |
| MemRefAccess srcAccess(srcOpInst); |
| for (auto *dstOpInst : loadAndStoreOpInsts) { |
| MemRefAccess dstAccess(dstOpInst); |
| FlatAffineConstraints dependenceConstraints; |
| if (checkMemrefAccessDependence(srcAccess, dstAccess, depth, |
| &dependenceConstraints, |
| /*dependenceComponents=*/nullptr)) |
| return false; |
| } |
| } |
| return true; |
| } |