lib/Analysis/Utils.cpp - platform/external/tensorflow - Git at Google

 //===- Utils.cpp ---- Misc utilities for analysis -------------------------===//
 //
 // Copyright 2019 The MLIR Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 // =============================================================================
 //
 // This file implements miscellaneous analysis routines for non-loop IR
 // structures.
 //
 //===----------------------------------------------------------------------===//

 #include "mlir/Analysis/Utils.h"

 #include "mlir/Analysis/AffineAnalysis.h"
 #include "mlir/Analysis/AffineStructures.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/StandardOps/StandardOps.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"

 #define DEBUG_TYPE "analysis-utils"

 using namespace mlir;

 /// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from
 /// the outermost 'for' instruction to the innermost one.
 void mlir::getLoopIVs(const Instruction &inst,
                       SmallVectorImpl<ForInst *> *loops) {
   auto *currInst = inst.getParentInst();
   ForInst *currForInst;
   // Traverse up the hierarchy collecing all 'for' instruction while skipping
   // over 'if' instructions.
   while (currInst && ((currForInst = dyn_cast<ForInst>(currInst)) ||
                       isa<IfInst>(currInst))) {
     if (currForInst)
       loops->push_back(currForInst);
     currInst = currInst->getParentInst();
   }
   std::reverse(loops->begin(), loops->end());
 }

 unsigned MemRefRegion::getRank() const {
   return memref->getType().cast<MemRefType>().getRank();
 }

 Optional<int64_t> MemRefRegion::getBoundingConstantSizeAndShape(
     SmallVectorImpl<int> *shape,
     std::vector<SmallVector<int64_t, 4>> *lbs) const {
   auto memRefType = memref->getType().cast<MemRefType>();
   unsigned rank = memRefType.getRank();
   shape->reserve(rank);

   // Find a constant upper bound on the extent of this memref region along each
   // dimension.
   int64_t numElements = 1;
   int64_t diffConstant;
   for (unsigned d = 0; d < rank; d++) {
     SmallVector<int64_t, 4> lb;
     Optional<int64_t> diff = cst.getConstantBoundOnDimSize(d, &lb);
     if (diff.hasValue()) {
       diffConstant = diff.getValue();
     } else {
       // If no constant bound is found, then it can always be bound by the
       // memref's dim size if the latter has a constant size along this dim.
       auto dimSize = memRefType.getDimSize(d);
       if (dimSize == -1)
         return None;
       diffConstant = dimSize;
       // Lower bound becomes 0.
       lb.resize(cst.getNumSymbolIds() + 1, 0);
     }
     numElements *= diffConstant;
     if (lbs) {
       lbs->push_back(lb);
     }
     if (shape) {
       shape->push_back(diffConstant);
     }
   }
   return numElements;
 }

 /// Computes the memory region accessed by this memref with the region
 /// represented as constraints symbolic/parameteric in 'loopDepth' loops
 /// surrounding opInst and any additional Function symbols. Returns false if
 /// this fails due to yet unimplemented cases.
 //  For example, the memref region for this load operation at loopDepth = 1 will
 //  be as below:
 //
 //    for %i = 0 to 32 {
 //      for %ii = %i to (d0) -> (d0 + 8) (%i) {
 //        load %A[%ii]
 //      }
 //    }
 //
 // region:  {memref = %A, write = false, {%i <= m0 <= %i + 7} }
 // The last field is a 2-d FlatAffineConstraints symbolic in %i.
 //
 // TODO(bondhugula): extend this to any other memref dereferencing ops
 // (dma_start, dma_wait).
 bool mlir::getMemRefRegion(OperationInst *opInst, unsigned loopDepth,
                            MemRefRegion *region) {
   OpPointer<LoadOp> loadOp;
   OpPointer<StoreOp> storeOp;
   unsigned rank;
   SmallVector<Value *, 4> indices;

   if ((loadOp = opInst->dyn_cast<LoadOp>())) {
     rank = loadOp->getMemRefType().getRank();
     for (auto *index : loadOp->getIndices()) {
       indices.push_back(index);
     }
     region->memref = loadOp->getMemRef();
     region->setWrite(false);
   } else if ((storeOp = opInst->dyn_cast<StoreOp>())) {
     rank = storeOp->getMemRefType().getRank();
     for (auto *index : storeOp->getIndices()) {
       indices.push_back(index);
     }
     region->memref = storeOp->getMemRef();
     region->setWrite(true);
   } else {
     return false;
   }

   // Build the constraints for this region.
   FlatAffineConstraints *regionCst = region->getConstraints();

   FuncBuilder b(opInst);
   auto idMap = b.getMultiDimIdentityMap(rank);

   // Initialize 'accessValueMap' and compose with reachable AffineApplyOps.
   AffineValueMap accessValueMap(idMap, indices);
   forwardSubstituteReachableOps(&accessValueMap);
   AffineMap accessMap = accessValueMap.getAffineMap();

   // We'll first associate the dims and symbols of the access map to the dims
   // and symbols resp. of regionCst. This will change below once regionCst is
   // fully constructed out.
   regionCst->reset(accessMap.getNumDims(), accessMap.getNumSymbols(), 0,
                    accessValueMap.getOperands());

   // Add equality constraints.
   unsigned numDims = accessMap.getNumDims();
   unsigned numSymbols = accessMap.getNumSymbols();
   // Add inequalties for loop lower/upper bounds.
   for (unsigned i = 0; i < numDims + numSymbols; ++i) {
     if (auto *loop = dyn_cast<ForInst>(accessValueMap.getOperand(i))) {
       // Note that regionCst can now have more dimensions than accessMap if the
       // bounds expressions involve outer loops or other symbols.
       // TODO(bondhugula): rewrite this to use getInstIndexSet; this way
       // conditionals will be handled when the latter supports it.
       if (!regionCst->addForInstDomain(*loop))
         return false;
     } else {
       // Has to be a valid symbol.
       auto *symbol = accessValueMap.getOperand(i);
       assert(symbol->isValidSymbol());
       // Check if the symbol is a constant.
       if (auto *opInst = symbol->getDefiningInst()) {
         if (auto constOp = opInst->dyn_cast<ConstantIndexOp>()) {
           regionCst->setIdToConstant(*symbol, constOp->getValue());
         }
       }
     }
   }

   // Add access function equalities to connect loop IVs to data dimensions.
   if (!regionCst->composeMap(&accessValueMap)) {
     LLVM_DEBUG(llvm::dbgs() << "getMemRefRegion: compose affine map failed\n");
     return false;
   }

   // Eliminate any loop IVs other than the outermost 'loopDepth' IVs, on which
   // this memref region is symbolic.
   SmallVector<ForInst *, 4> outerIVs;
   getLoopIVs(*opInst, &outerIVs);
   outerIVs.resize(loopDepth);
   for (auto *operand : accessValueMap.getOperands()) {
     ForInst *iv;
     if ((iv = dyn_cast<ForInst>(operand)) &&
         std::find(outerIVs.begin(), outerIVs.end(), iv) == outerIVs.end()) {
       regionCst->projectOut(operand);
     }
   }
   // Project out any local variables (these would have been added for any
   // mod/divs).
   regionCst->projectOut(regionCst->getNumDimIds() +
                             regionCst->getNumSymbolIds(),
                         regionCst->getNumLocalIds());

   // Set all identifiers appearing after the first 'rank' identifiers as
   // symbolic identifiers - so that the ones correspoding to the memref
   // dimensions are the dimensional identifiers for the memref region.
   regionCst->setDimSymbolSeparation(regionCst->getNumIds() - rank);

   // Constant fold any symbolic identifiers.
   regionCst->constantFoldIdRange(/*pos=*/regionCst->getNumDimIds(),
                                  /*num=*/regionCst->getNumSymbolIds());

   assert(regionCst->getNumDimIds() == rank && "unexpected MemRefRegion format");

   return true;
 }

 /// Returns the size of memref data in bytes if it's statically shaped, None
 /// otherwise.  If the element of the memref has vector type, takes into account
 /// size of the vector as well.
 Optional<uint64_t> mlir::getMemRefSizeInBytes(MemRefType memRefType) {
   if (memRefType.getNumDynamicDims() > 0)
     return None;
   auto elementType = memRefType.getElementType();
   if (!elementType.isIntOrFloat() && !elementType.isa<VectorType>())
     return None;

   uint64_t sizeInBits;
   if (elementType.isIntOrFloat()) {
     sizeInBits = elementType.getIntOrFloatBitWidth();
   } else {
     auto vectorType = elementType.cast<VectorType>();
     sizeInBits =
         vectorType.getElementTypeBitWidth() * vectorType.getNumElements();
   }
   for (unsigned i = 0, e = memRefType.getRank(); i < e; i++) {
     sizeInBits = sizeInBits * memRefType.getDimSize(i);
   }
   return llvm::divideCeil(sizeInBits, 8);
 }

 template <typename LoadOrStoreOpPointer>
 bool mlir::boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp,
                                    bool emitError) {
   static_assert(
       std::is_same<LoadOrStoreOpPointer, OpPointer<LoadOp>>::value ||
           std::is_same<LoadOrStoreOpPointer, OpPointer<StoreOp>>::value,
       "function argument should be either a LoadOp or a StoreOp");

   OperationInst *opInst = loadOrStoreOp->getInstruction();
   MemRefRegion region;
   if (!getMemRefRegion(opInst, /*loopDepth=*/0, &region))
     return false;
   LLVM_DEBUG(llvm::dbgs() << "Memory region");
   LLVM_DEBUG(region.getConstraints()->dump());

   bool outOfBounds = false;
   unsigned rank = loadOrStoreOp->getMemRefType().getRank();

   // For each dimension, check for out of bounds.
   for (unsigned r = 0; r < rank; r++) {
     FlatAffineConstraints ucst(*region.getConstraints());

     // Intersect memory region with constraint capturing out of bounds (both out
     // of upper and out of lower), and check if the constraint system is
     // feasible. If it is, there is at least one point out of bounds.
     SmallVector<int64_t, 4> ineq(rank + 1, 0);
     int dimSize = loadOrStoreOp->getMemRefType().getDimSize(r);
     // TODO(bondhugula): handle dynamic dim sizes.
     if (dimSize == -1)
       continue;

     // Check for overflow: d_i >= memref dim size.
     ucst.addConstantLowerBound(r, dimSize);
     outOfBounds = !ucst.isEmpty();
     if (outOfBounds && emitError) {
       loadOrStoreOp->emitOpError(
           "memref out of upper bound access along dimension #" + Twine(r + 1));
     }

     // Check for a negative index.
     FlatAffineConstraints lcst(*region.getConstraints());
     std::fill(ineq.begin(), ineq.end(), 0);
     // d_i <= -1;
     lcst.addConstantUpperBound(r, -1);
     outOfBounds = !lcst.isEmpty();
     if (outOfBounds && emitError) {
       loadOrStoreOp->emitOpError(
           "memref out of lower bound access along dimension #" + Twine(r + 1));
     }
   }
   return outOfBounds;
 }

 // Explicitly instantiate the template so that the compiler knows we need them!
 template bool mlir::boundCheckLoadOrStoreOp(OpPointer<LoadOp> loadOp,
                                             bool emitError);
 template bool mlir::boundCheckLoadOrStoreOp(OpPointer<StoreOp> storeOp,
                                             bool emitError);

 // Returns in 'positions' the Block positions of 'inst' in each ancestor
 // Block from the Block containing instruction, stopping at 'limitBlock'.
 static void findInstPosition(const Instruction *inst, Block *limitBlock,
                              SmallVectorImpl<unsigned> *positions) {
   const Block *block = inst->getBlock();
   while (block != limitBlock) {
     int instPosInBlock = block->findInstPositionInBlock(*inst);
     assert(instPosInBlock >= 0);
     positions->push_back(instPosInBlock);
     inst = block->getContainingInst();
     block = inst->getBlock();
   }
   std::reverse(positions->begin(), positions->end());
 }

 // Returns the Instruction in a possibly nested set of Blocks, where the
 // position of the instruction is represented by 'positions', which has a
 // Block position for each level of nesting.
 static Instruction *getInstAtPosition(ArrayRef<unsigned> positions,
                                       unsigned level, Block *block) {
   unsigned i = 0;
   for (auto &inst : *block) {
     if (i != positions[level]) {
       ++i;
       continue;
     }
     if (level == positions.size() - 1)
       return &inst;
     if (auto *childForInst = dyn_cast<ForInst>(&inst))
       return getInstAtPosition(positions, level + 1, childForInst->getBody());

     if (auto *ifInst = dyn_cast<IfInst>(&inst)) {
       auto *ret = getInstAtPosition(positions, level + 1, ifInst->getThen());
       if (ret != nullptr)
         return ret;
       if (auto *elseClause = ifInst->getElse())
         return getInstAtPosition(positions, level + 1, elseClause);
     }
   }
   return nullptr;
 }

 // Computes memref dependence between 'srcAccess' and 'dstAccess' and uses the
 // dependence constraint system to create AffineMaps with which to adjust the
 // loop bounds of the inserted compution slice so that they are functions of the
 // loop IVs and symbols of the loops surrounding 'dstAccess'.
 ForInst *mlir::insertBackwardComputationSlice(MemRefAccess *srcAccess,
                                               MemRefAccess *dstAccess,
                                               unsigned srcLoopDepth,
                                               unsigned dstLoopDepth) {
   FlatAffineConstraints dependenceConstraints;
   if (!checkMemrefAccessDependence(*srcAccess, *dstAccess, /*loopDepth=*/1,
                                    &dependenceConstraints,
                                    /*dependenceComponents=*/nullptr)) {
     return nullptr;
   }
   // Get loop nest surrounding src operation.
   SmallVector<ForInst *, 4> srcLoopNest;
   getLoopIVs(*srcAccess->opInst, &srcLoopNest);
   unsigned srcLoopNestSize = srcLoopNest.size();
   assert(srcLoopDepth <= srcLoopNestSize);

   // Get loop nest surrounding dst operation.
   SmallVector<ForInst *, 4> dstLoopNest;
   getLoopIVs(*dstAccess->opInst, &dstLoopNest);
   unsigned dstLoopNestSize = dstLoopNest.size();
   (void)dstLoopNestSize;
   assert(dstLoopDepth > 0);
   assert(dstLoopDepth <= dstLoopNestSize);

   // Solve for src IVs in terms of dst IVs, symbols and constants.
   SmallVector<AffineMap, 4> srcIvMaps(srcLoopNestSize, AffineMap::Null());
   std::vector<SmallVector<Value *, 2>> srcIvOperands(srcLoopNestSize);
   for (unsigned i = 0; i < srcLoopNestSize; ++i) {
     // Skip IVs which are greater than requested loop depth.
     if (i >= srcLoopDepth) {
       srcIvMaps[i] = AffineMap::Null();
       continue;
     }
     auto cst = dependenceConstraints.clone();
     for (int j = srcLoopNestSize - 1; j >= 0; --j) {
       if (i != j)
         cst->projectOut(j);
     }
     // TODO(andydavis) Check for case with two equalities where we have
     // set on IV to a constant. Set a constant IV map for these cases.
     if (cst->getNumEqualities() != 1) {
       srcIvMaps[i] = AffineMap::Null();
       continue;
     }
     SmallVector<unsigned, 2> nonZeroDimIds;
     SmallVector<unsigned, 2> nonZeroSymbolIds;
     srcIvMaps[i] = cst->toAffineMapFromEq(0, 0, srcAccess->opInst->getContext(),
                                           &nonZeroDimIds, &nonZeroSymbolIds);
     if (srcIvMaps[i] == AffineMap::Null()) {
       continue;
     }
     // Add operands for all non-zero dst dims and symbols.
     // TODO(andydavis) Add local variable support.
     for (auto dimId : nonZeroDimIds) {
       if (dimId - 1 >= dstLoopDepth) {
         // This src IV has a dependence on dst IV dstLoopDepth where it will
         // be inserted. So we cannot slice the iteration space at srcLoopDepth,
         // and also insert it into the dst loop nest at 'dstLoopDepth'.
         return nullptr;
       }
       srcIvOperands[i].push_back(dstLoopNest[dimId - 1]);
     }
     // TODO(andydavis) Add symbols from the access function. Ideally, we
     // should be able to query the constaint system for the Value associated
     // with a symbol identifiers in 'nonZeroSymbolIds'.
   }

   // Find the inst block positions of 'srcAccess->opInst' within 'srcLoopNest'.
   SmallVector<unsigned, 4> positions;
   findInstPosition(srcAccess->opInst, srcLoopNest[0]->getBlock(), &positions);

   // Clone src loop nest and insert it a the beginning of the instruction block
   // of the loop at 'dstLoopDepth' in 'dstLoopNest'.
   auto *dstForInst = dstLoopNest[dstLoopDepth - 1];
   FuncBuilder b(dstForInst->getBody(), dstForInst->getBody()->begin());
   DenseMap<const Value *, Value *> operandMap;
   auto *sliceLoopNest = cast<ForInst>(b.clone(*srcLoopNest[0], operandMap));

   // Lookup inst in cloned 'sliceLoopNest' at 'positions'.
   Instruction *sliceInst =
       getInstAtPosition(positions, /*level=*/0, sliceLoopNest->getBody());
   // Get loop nest surrounding 'sliceInst'.
   SmallVector<ForInst *, 4> sliceSurroundingLoops;
   getLoopIVs(*sliceInst, &sliceSurroundingLoops);
   unsigned sliceSurroundingLoopsSize = sliceSurroundingLoops.size();
   (void)sliceSurroundingLoopsSize;

   // Update loop bounds for loops in 'sliceLoopNest'.
   unsigned sliceLoopLimit = dstLoopDepth + srcLoopNestSize;
   assert(sliceLoopLimit <= sliceSurroundingLoopsSize);
   for (unsigned i = dstLoopDepth; i < sliceLoopLimit; ++i) {
     auto *forInst = sliceSurroundingLoops[i];
     unsigned index = i - dstLoopDepth;
     AffineMap lbMap = srcIvMaps[index];
     if (lbMap == AffineMap::Null())
       continue;
     forInst->setLowerBound(srcIvOperands[index], lbMap);
     // Create upper bound map with is lower bound map + 1;
     assert(lbMap.getNumResults() == 1);
     AffineExpr ubResultExpr = lbMap.getResult(0) + 1;
     AffineMap ubMap = AffineMap::get(lbMap.getNumDims(), lbMap.getNumSymbols(),
                                      {ubResultExpr}, {});
     forInst->setUpperBound(srcIvOperands[index], ubMap);
   }
   return sliceLoopNest;
 }

 void mlir::getMemRefAccess(OperationInst *loadOrStoreOpInst,
                            MemRefAccess *access) {
   if (auto loadOp = loadOrStoreOpInst->dyn_cast<LoadOp>()) {
     access->memref = loadOp->getMemRef();
     access->opInst = loadOrStoreOpInst;
     auto loadMemrefType = loadOp->getMemRefType();
     access->indices.reserve(loadMemrefType.getRank());
     for (auto *index : loadOp->getIndices()) {
       access->indices.push_back(index);
     }
   } else {
     assert(loadOrStoreOpInst->isa<StoreOp>() && "load/store op expected");
     auto storeOp = loadOrStoreOpInst->dyn_cast<StoreOp>();
     access->opInst = loadOrStoreOpInst;
     access->memref = storeOp->getMemRef();
     auto storeMemrefType = storeOp->getMemRefType();
     access->indices.reserve(storeMemrefType.getRank());
     for (auto *index : storeOp->getIndices()) {
       access->indices.push_back(index);
     }
   }
 }

 /// Returns the nesting depth of this statement, i.e., the number of loops
 /// surrounding this statement.
 unsigned mlir::getNestingDepth(const Instruction &stmt) {
   const Instruction *currInst = &stmt;
   unsigned depth = 0;
   while ((currInst = currInst->getParentInst())) {
     if (isa<ForInst>(currInst))
       depth++;
   }
   return depth;
 }

 /// Returns the number of surrounding loops common to 'loopsA' and 'loopsB',
 /// where each lists loops from outer-most to inner-most in loop nest.
 unsigned mlir::getNumCommonSurroundingLoops(const Instruction &A,
                                             const Instruction &B) {
   SmallVector<ForInst *, 4> loopsA, loopsB;
   getLoopIVs(A, &loopsA);
   getLoopIVs(B, &loopsB);

   unsigned minNumLoops = std::min(loopsA.size(), loopsB.size());
   unsigned numCommonLoops = 0;
   for (unsigned i = 0; i < minNumLoops; ++i) {
     if (loopsA[i] != loopsB[i])
       break;
     ++numCommonLoops;
   }
   return numCommonLoops;
 }
	//===- Utils.cpp ---- Misc utilities for analysis -------------------------===//
	//
	// Copyright 2019 The MLIR Authors.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.
	// =============================================================================
	//
	// This file implements miscellaneous analysis routines for non-loop IR
	// structures.
	//
	//===----------------------------------------------------------------------===//

	#include "mlir/Analysis/Utils.h"

	#include "mlir/Analysis/AffineAnalysis.h"
	#include "mlir/Analysis/AffineStructures.h"
	#include "mlir/IR/Builders.h"
	#include "mlir/IR/BuiltinOps.h"
	#include "mlir/StandardOps/StandardOps.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/raw_ostream.h"

	#define DEBUG_TYPE "analysis-utils"

	using namespace mlir;

	/// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from
	/// the outermost 'for' instruction to the innermost one.
	void mlir::getLoopIVs(const Instruction &inst,
	SmallVectorImpl<ForInst > loops) {
	auto *currInst = inst.getParentInst();
	ForInst *currForInst;
	// Traverse up the hierarchy collecing all 'for' instruction while skipping
	// over 'if' instructions.
	while (currInst && ((currForInst = dyn_cast<ForInst>(currInst)) \|\|
	isa<IfInst>(currInst))) {
	if (currForInst)
	loops->push_back(currForInst);
	currInst = currInst->getParentInst();
	}
	std::reverse(loops->begin(), loops->end());
	}

	unsigned MemRefRegion::getRank() const {
	return memref->getType().cast<MemRefType>().getRank();
	}

	Optional<int64_t> MemRefRegion::getBoundingConstantSizeAndShape(
	SmallVectorImpl<int> *shape,
	std::vector<SmallVector<int64_t, 4>> *lbs) const {
	auto memRefType = memref->getType().cast<MemRefType>();
	unsigned rank = memRefType.getRank();
	shape->reserve(rank);

	// Find a constant upper bound on the extent of this memref region along each
	// dimension.
	int64_t numElements = 1;
	int64_t diffConstant;
	for (unsigned d = 0; d < rank; d++) {
	SmallVector<int64_t, 4> lb;
	Optional<int64_t> diff = cst.getConstantBoundOnDimSize(d, &lb);
	if (diff.hasValue()) {
	diffConstant = diff.getValue();
	} else {
	// If no constant bound is found, then it can always be bound by the
	// memref's dim size if the latter has a constant size along this dim.
	auto dimSize = memRefType.getDimSize(d);
	if (dimSize == -1)
	return None;
	diffConstant = dimSize;
	// Lower bound becomes 0.
	lb.resize(cst.getNumSymbolIds() + 1, 0);
	}
	numElements *= diffConstant;
	if (lbs) {
	lbs->push_back(lb);
	}
	if (shape) {
	shape->push_back(diffConstant);
	}
	}
	return numElements;
	}

	/// Computes the memory region accessed by this memref with the region
	/// represented as constraints symbolic/parameteric in 'loopDepth' loops
	/// surrounding opInst and any additional Function symbols. Returns false if
	/// this fails due to yet unimplemented cases.
	// For example, the memref region for this load operation at loopDepth = 1 will
	// be as below:
	//
	// for %i = 0 to 32 {
	// for %ii = %i to (d0) -> (d0 + 8) (%i) {
	// load %A[%ii]
	// }
	// }
	//
	// region: {memref = %A, write = false, {%i <= m0 <= %i + 7} }
	// The last field is a 2-d FlatAffineConstraints symbolic in %i.
	//
	// TODO(bondhugula): extend this to any other memref dereferencing ops
	// (dma_start, dma_wait).
	bool mlir::getMemRefRegion(OperationInst *opInst, unsigned loopDepth,
	MemRefRegion *region) {
	OpPointer<LoadOp> loadOp;
	OpPointer<StoreOp> storeOp;
	unsigned rank;
	SmallVector<Value *, 4> indices;

	if ((loadOp = opInst->dyn_cast<LoadOp>())) {
	rank = loadOp->getMemRefType().getRank();
	for (auto *index : loadOp->getIndices()) {
	indices.push_back(index);
	}
	region->memref = loadOp->getMemRef();
	region->setWrite(false);
	} else if ((storeOp = opInst->dyn_cast<StoreOp>())) {
	rank = storeOp->getMemRefType().getRank();
	for (auto *index : storeOp->getIndices()) {
	indices.push_back(index);
	}
	region->memref = storeOp->getMemRef();
	region->setWrite(true);
	} else {
	return false;
	}

	// Build the constraints for this region.
	FlatAffineConstraints *regionCst = region->getConstraints();

	FuncBuilder b(opInst);
	auto idMap = b.getMultiDimIdentityMap(rank);

	// Initialize 'accessValueMap' and compose with reachable AffineApplyOps.
	AffineValueMap accessValueMap(idMap, indices);
	forwardSubstituteReachableOps(&accessValueMap);
	AffineMap accessMap = accessValueMap.getAffineMap();

	// We'll first associate the dims and symbols of the access map to the dims
	// and symbols resp. of regionCst. This will change below once regionCst is
	// fully constructed out.
	regionCst->reset(accessMap.getNumDims(), accessMap.getNumSymbols(), 0,
	accessValueMap.getOperands());

	// Add equality constraints.
	unsigned numDims = accessMap.getNumDims();
	unsigned numSymbols = accessMap.getNumSymbols();
	// Add inequalties for loop lower/upper bounds.
	for (unsigned i = 0; i < numDims + numSymbols; ++i) {
	if (auto *loop = dyn_cast<ForInst>(accessValueMap.getOperand(i))) {
	// Note that regionCst can now have more dimensions than accessMap if the
	// bounds expressions involve outer loops or other symbols.
	// TODO(bondhugula): rewrite this to use getInstIndexSet; this way
	// conditionals will be handled when the latter supports it.
	if (!regionCst->addForInstDomain(*loop))
	return false;
	} else {
	// Has to be a valid symbol.
	auto *symbol = accessValueMap.getOperand(i);
	assert(symbol->isValidSymbol());
	// Check if the symbol is a constant.
	if (auto *opInst = symbol->getDefiningInst()) {
	if (auto constOp = opInst->dyn_cast<ConstantIndexOp>()) {
	regionCst->setIdToConstant(*symbol, constOp->getValue());
	}
	}
	}
	}

	// Add access function equalities to connect loop IVs to data dimensions.
	if (!regionCst->composeMap(&accessValueMap)) {
	LLVM_DEBUG(llvm::dbgs() << "getMemRefRegion: compose affine map failed\n");
	return false;
	}

	// Eliminate any loop IVs other than the outermost 'loopDepth' IVs, on which
	// this memref region is symbolic.
	SmallVector<ForInst *, 4> outerIVs;
	getLoopIVs(*opInst, &outerIVs);
	outerIVs.resize(loopDepth);
	for (auto *operand : accessValueMap.getOperands()) {
	ForInst *iv;
	if ((iv = dyn_cast<ForInst>(operand)) &&
	std::find(outerIVs.begin(), outerIVs.end(), iv) == outerIVs.end()) {
	regionCst->projectOut(operand);
	}
	}
	// Project out any local variables (these would have been added for any
	// mod/divs).
	regionCst->projectOut(regionCst->getNumDimIds() +
	regionCst->getNumSymbolIds(),
	regionCst->getNumLocalIds());

	// Set all identifiers appearing after the first 'rank' identifiers as
	// symbolic identifiers - so that the ones correspoding to the memref
	// dimensions are the dimensional identifiers for the memref region.
	regionCst->setDimSymbolSeparation(regionCst->getNumIds() - rank);

	// Constant fold any symbolic identifiers.
	regionCst->constantFoldIdRange(/pos=/regionCst->getNumDimIds(),
	/num=/regionCst->getNumSymbolIds());

	assert(regionCst->getNumDimIds() == rank && "unexpected MemRefRegion format");

	return true;
	}

	/// Returns the size of memref data in bytes if it's statically shaped, None
	/// otherwise. If the element of the memref has vector type, takes into account
	/// size of the vector as well.
	Optional<uint64_t> mlir::getMemRefSizeInBytes(MemRefType memRefType) {
	if (memRefType.getNumDynamicDims() > 0)
	return None;
	auto elementType = memRefType.getElementType();
	if (!elementType.isIntOrFloat() && !elementType.isa<VectorType>())
	return None;

	uint64_t sizeInBits;
	if (elementType.isIntOrFloat()) {
	sizeInBits = elementType.getIntOrFloatBitWidth();
	} else {
	auto vectorType = elementType.cast<VectorType>();
	sizeInBits =
	vectorType.getElementTypeBitWidth() * vectorType.getNumElements();
	}
	for (unsigned i = 0, e = memRefType.getRank(); i < e; i++) {
	sizeInBits = sizeInBits * memRefType.getDimSize(i);
	}
	return llvm::divideCeil(sizeInBits, 8);
	}

	template <typename LoadOrStoreOpPointer>
	bool mlir::boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp,
	bool emitError) {
	static_assert(
	std::is_same<LoadOrStoreOpPointer, OpPointer<LoadOp>>::value \|\|
	std::is_same<LoadOrStoreOpPointer, OpPointer<StoreOp>>::value,
	"function argument should be either a LoadOp or a StoreOp");

	OperationInst *opInst = loadOrStoreOp->getInstruction();
	MemRefRegion region;
	if (!getMemRefRegion(opInst, /loopDepth=/0, &region))
	return false;
	LLVM_DEBUG(llvm::dbgs() << "Memory region");
	LLVM_DEBUG(region.getConstraints()->dump());

	bool outOfBounds = false;
	unsigned rank = loadOrStoreOp->getMemRefType().getRank();

	// For each dimension, check for out of bounds.
	for (unsigned r = 0; r < rank; r++) {
	FlatAffineConstraints ucst(*region.getConstraints());

	// Intersect memory region with constraint capturing out of bounds (both out
	// of upper and out of lower), and check if the constraint system is
	// feasible. If it is, there is at least one point out of bounds.
	SmallVector<int64_t, 4> ineq(rank + 1, 0);
	int dimSize = loadOrStoreOp->getMemRefType().getDimSize(r);
	// TODO(bondhugula): handle dynamic dim sizes.
	if (dimSize == -1)
	continue;

	// Check for overflow: d_i >= memref dim size.
	ucst.addConstantLowerBound(r, dimSize);
	outOfBounds = !ucst.isEmpty();
	if (outOfBounds && emitError) {
	loadOrStoreOp->emitOpError(
	"memref out of upper bound access along dimension #" + Twine(r + 1));
	}

	// Check for a negative index.
	FlatAffineConstraints lcst(*region.getConstraints());
	std::fill(ineq.begin(), ineq.end(), 0);
	// d_i <= -1;
	lcst.addConstantUpperBound(r, -1);
	outOfBounds = !lcst.isEmpty();
	if (outOfBounds && emitError) {
	loadOrStoreOp->emitOpError(
	"memref out of lower bound access along dimension #" + Twine(r + 1));
	}
	}
	return outOfBounds;
	}

	// Explicitly instantiate the template so that the compiler knows we need them!
	template bool mlir::boundCheckLoadOrStoreOp(OpPointer<LoadOp> loadOp,
	bool emitError);
	template bool mlir::boundCheckLoadOrStoreOp(OpPointer<StoreOp> storeOp,
	bool emitError);

	// Returns in 'positions' the Block positions of 'inst' in each ancestor
	// Block from the Block containing instruction, stopping at 'limitBlock'.
	static void findInstPosition(const Instruction inst, Block limitBlock,
	SmallVectorImpl<unsigned> *positions) {
	const Block *block = inst->getBlock();
	while (block != limitBlock) {
	int instPosInBlock = block->findInstPositionInBlock(*inst);
	assert(instPosInBlock >= 0);
	positions->push_back(instPosInBlock);
	inst = block->getContainingInst();
	block = inst->getBlock();
	}
	std::reverse(positions->begin(), positions->end());
	}

	// Returns the Instruction in a possibly nested set of Blocks, where the
	// position of the instruction is represented by 'positions', which has a
	// Block position for each level of nesting.
	static Instruction *getInstAtPosition(ArrayRef<unsigned> positions,
	unsigned level, Block *block) {
	unsigned i = 0;
	for (auto &inst : *block) {
	if (i != positions[level]) {
	++i;
	continue;
	}
	if (level == positions.size() - 1)
	return &inst;
	if (auto *childForInst = dyn_cast<ForInst>(&inst))
	return getInstAtPosition(positions, level + 1, childForInst->getBody());

	if (auto *ifInst = dyn_cast<IfInst>(&inst)) {
	auto *ret = getInstAtPosition(positions, level + 1, ifInst->getThen());
	if (ret != nullptr)
	return ret;
	if (auto *elseClause = ifInst->getElse())
	return getInstAtPosition(positions, level + 1, elseClause);
	}
	}
	return nullptr;
	}

	// Computes memref dependence between 'srcAccess' and 'dstAccess' and uses the
	// dependence constraint system to create AffineMaps with which to adjust the
	// loop bounds of the inserted compution slice so that they are functions of the
	// loop IVs and symbols of the loops surrounding 'dstAccess'.
	ForInst mlir::insertBackwardComputationSlice(MemRefAccess srcAccess,
	MemRefAccess *dstAccess,
	unsigned srcLoopDepth,
	unsigned dstLoopDepth) {
	FlatAffineConstraints dependenceConstraints;
	if (!checkMemrefAccessDependence(srcAccess, dstAccess, /loopDepth=/1,
	&dependenceConstraints,
	/dependenceComponents=/nullptr)) {
	return nullptr;
	}
	// Get loop nest surrounding src operation.
	SmallVector<ForInst *, 4> srcLoopNest;
	getLoopIVs(*srcAccess->opInst, &srcLoopNest);
	unsigned srcLoopNestSize = srcLoopNest.size();
	assert(srcLoopDepth <= srcLoopNestSize);

	// Get loop nest surrounding dst operation.
	SmallVector<ForInst *, 4> dstLoopNest;
	getLoopIVs(*dstAccess->opInst, &dstLoopNest);
	unsigned dstLoopNestSize = dstLoopNest.size();
	(void)dstLoopNestSize;
	assert(dstLoopDepth > 0);
	assert(dstLoopDepth <= dstLoopNestSize);

	// Solve for src IVs in terms of dst IVs, symbols and constants.
	SmallVector<AffineMap, 4> srcIvMaps(srcLoopNestSize, AffineMap::Null());
	std::vector<SmallVector<Value *, 2>> srcIvOperands(srcLoopNestSize);
	for (unsigned i = 0; i < srcLoopNestSize; ++i) {
	// Skip IVs which are greater than requested loop depth.
	if (i >= srcLoopDepth) {
	srcIvMaps[i] = AffineMap::Null();
	continue;
	}
	auto cst = dependenceConstraints.clone();
	for (int j = srcLoopNestSize - 1; j >= 0; --j) {
	if (i != j)
	cst->projectOut(j);
	}
	// TODO(andydavis) Check for case with two equalities where we have
	// set on IV to a constant. Set a constant IV map for these cases.
	if (cst->getNumEqualities() != 1) {
	srcIvMaps[i] = AffineMap::Null();
	continue;
	}
	SmallVector<unsigned, 2> nonZeroDimIds;
	SmallVector<unsigned, 2> nonZeroSymbolIds;
	srcIvMaps[i] = cst->toAffineMapFromEq(0, 0, srcAccess->opInst->getContext(),
	&nonZeroDimIds, &nonZeroSymbolIds);
	if (srcIvMaps[i] == AffineMap::Null()) {
	continue;
	}
	// Add operands for all non-zero dst dims and symbols.
	// TODO(andydavis) Add local variable support.
	for (auto dimId : nonZeroDimIds) {
	if (dimId - 1 >= dstLoopDepth) {
	// This src IV has a dependence on dst IV dstLoopDepth where it will
	// be inserted. So we cannot slice the iteration space at srcLoopDepth,
	// and also insert it into the dst loop nest at 'dstLoopDepth'.
	return nullptr;
	}
	srcIvOperands[i].push_back(dstLoopNest[dimId - 1]);
	}
	// TODO(andydavis) Add symbols from the access function. Ideally, we
	// should be able to query the constaint system for the Value associated
	// with a symbol identifiers in 'nonZeroSymbolIds'.
	}

	// Find the inst block positions of 'srcAccess->opInst' within 'srcLoopNest'.
	SmallVector<unsigned, 4> positions;
	findInstPosition(srcAccess->opInst, srcLoopNest[0]->getBlock(), &positions);

	// Clone src loop nest and insert it a the beginning of the instruction block
	// of the loop at 'dstLoopDepth' in 'dstLoopNest'.
	auto *dstForInst = dstLoopNest[dstLoopDepth - 1];
	FuncBuilder b(dstForInst->getBody(), dstForInst->getBody()->begin());
	DenseMap<const Value , Value > operandMap;
	auto sliceLoopNest = cast<ForInst>(b.clone(srcLoopNest[0], operandMap));

	// Lookup inst in cloned 'sliceLoopNest' at 'positions'.
	Instruction *sliceInst =
	getInstAtPosition(positions, /level=/0, sliceLoopNest->getBody());
	// Get loop nest surrounding 'sliceInst'.
	SmallVector<ForInst *, 4> sliceSurroundingLoops;
	getLoopIVs(*sliceInst, &sliceSurroundingLoops);
	unsigned sliceSurroundingLoopsSize = sliceSurroundingLoops.size();
	(void)sliceSurroundingLoopsSize;

	// Update loop bounds for loops in 'sliceLoopNest'.
	unsigned sliceLoopLimit = dstLoopDepth + srcLoopNestSize;
	assert(sliceLoopLimit <= sliceSurroundingLoopsSize);
	for (unsigned i = dstLoopDepth; i < sliceLoopLimit; ++i) {
	auto *forInst = sliceSurroundingLoops[i];
	unsigned index = i - dstLoopDepth;
	AffineMap lbMap = srcIvMaps[index];
	if (lbMap == AffineMap::Null())
	continue;
	forInst->setLowerBound(srcIvOperands[index], lbMap);
	// Create upper bound map with is lower bound map + 1;
	assert(lbMap.getNumResults() == 1);
	AffineExpr ubResultExpr = lbMap.getResult(0) + 1;
	AffineMap ubMap = AffineMap::get(lbMap.getNumDims(), lbMap.getNumSymbols(),
	{ubResultExpr}, {});
	forInst->setUpperBound(srcIvOperands[index], ubMap);
	}
	return sliceLoopNest;
	}

	void mlir::getMemRefAccess(OperationInst *loadOrStoreOpInst,
	MemRefAccess *access) {
	if (auto loadOp = loadOrStoreOpInst->dyn_cast<LoadOp>()) {
	access->memref = loadOp->getMemRef();
	access->opInst = loadOrStoreOpInst;
	auto loadMemrefType = loadOp->getMemRefType();
	access->indices.reserve(loadMemrefType.getRank());
	for (auto *index : loadOp->getIndices()) {
	access->indices.push_back(index);
	}
	} else {
	assert(loadOrStoreOpInst->isa<StoreOp>() && "load/store op expected");
	auto storeOp = loadOrStoreOpInst->dyn_cast<StoreOp>();
	access->opInst = loadOrStoreOpInst;
	access->memref = storeOp->getMemRef();
	auto storeMemrefType = storeOp->getMemRefType();
	access->indices.reserve(storeMemrefType.getRank());
	for (auto *index : storeOp->getIndices()) {
	access->indices.push_back(index);
	}
	}
	}

	/// Returns the nesting depth of this statement, i.e., the number of loops
	/// surrounding this statement.
	unsigned mlir::getNestingDepth(const Instruction &stmt) {
	const Instruction *currInst = &stmt;
	unsigned depth = 0;
	while ((currInst = currInst->getParentInst())) {
	if (isa<ForInst>(currInst))
	depth++;
	}
	return depth;
	}

	/// Returns the number of surrounding loops common to 'loopsA' and 'loopsB',
	/// where each lists loops from outer-most to inner-most in loop nest.
	unsigned mlir::getNumCommonSurroundingLoops(const Instruction &A,
	const Instruction &B) {
	SmallVector<ForInst *, 4> loopsA, loopsB;
	getLoopIVs(A, &loopsA);
	getLoopIVs(B, &loopsB);

	unsigned minNumLoops = std::min(loopsA.size(), loopsB.size());
	unsigned numCommonLoops = 0;
	for (unsigned i = 0; i < minNumLoops; ++i) {
	if (loopsA[i] != loopsB[i])
	break;
	++numCommonLoops;
	}
	return numCommonLoops;
	}