Change the dependence check in the loop fusion pass to use the MLIR instruction list ordering (instead of the dependence graph node id ordering). This breaks the overloading of dependence graph node ids as both edge endpoints and instruction list position. PiperOrigin-RevId: 230849232

commit: be5616414020f2980857e958b13ec08dbd47ddcb [log] [tgz]
author: MLIR Team <no-reply@google.com> Thu Jan 24 22:27:40 2019 -0800
committer: jpienaar <jpienaar@google.com> Fri Mar 29 15:35:53 2019 -0700
tree: c011a08a0f4541124ddad91417867f05b2e42a19
parent: a6cc5d87d1b8eca94efe937e250403dfc2018769 [diff]
diff --git a/lib/Transforms/LoopFusion.cpp b/lib/Transforms/LoopFusion.cpp
index 6a8c4fd..4df3a8c 100644
--- a/lib/Transforms/LoopFusion.cpp
+++ b/lib/Transforms/LoopFusion.cpp

@@ -302,15 +302,33 @@
     return outEdgeCount;
   }
 
-  // Returns the min node id across all outgoing edges from node 'id', skipping
-  // edges with 'memrefToSkip'.
-  unsigned getMinOutEdgeNodeId(unsigned id, Value *memrefToSkip) {
-    unsigned minId = std::numeric_limits<unsigned>::max();
-    if (outEdges.count(id) > 0)
-      for (auto &outEdge : outEdges[id])
-        if (outEdge.memref != memrefToSkip)
-          minId = std::min(minId, outEdge.id);
-    return minId;
+  // Check for a dependence in Block instruction list range (srcId, dstId) on
+  // memrefs other than 'memrefToSkip' (which will be privatized for the fused
+  // loop).
+  bool hasDependenceTargetInRange(unsigned srcId, unsigned dstId,
+                                  Value *memrefToSkip) {
+    if (outEdges.count(srcId) == 0)
+      return false;
+    // Check if any of the outgoing edge targets from srcId lie in
+    // (srcId, dstId).
+    SmallPtrSet<Instruction *, 2> depInsts;
+    for (auto &outEdge : outEdges[srcId]) {
+      if (outEdge.id != dstId && outEdge.memref != memrefToSkip) {
+        Node *node = getNode(outEdge.id);
+        depInsts.insert(node->inst);
+      }
+    }
+    // Do a linear walk from 'srcNode.inst' to 'dstNode.inst' and for each
+    // instruction 'inst' in range ('srcNode.inst', 'dstNode.inst') test
+    // if 'depInsts' contains 'inst', and return true if it does.
+    // TODO(andydavis) If this linear search becomes a compile time issue,
+    // create a data structure which allows a faster search through ForInsts
+    // in a Block.
+    Block::iterator it = std::next(Block::iterator(getNode(srcId)->inst));
+    Block::iterator itEnd = Block::iterator(getNode(dstId)->inst);
+    return std::any_of(it, itEnd, [&](Instruction &inst) {
+      return depInsts.count(&inst) > 0;
+    });
   }
 
   // Updates edge mappings from node 'srcId' to node 'dstId'.
@@ -1063,7 +1081,7 @@
       100.0 * (minFusedLoopNestComputeCost /
                    (static_cast<double>(srcLoopNestCost) + dstLoopNestCost) -
                1);
-
+  (void)additionalComputeFraction;
   LLVM_DEBUG({
     std::stringstream msg;
     msg << " fusion is most profitable at depth " << *dstLoopDepth << " with "
@@ -1134,7 +1152,7 @@
 // TODO(andydavis) Experiment with other fusion policies.
 // TODO(andydavis) Add support for fusing for input reuse (perhaps by
 // constructing a graph with edges which represent loads from the same memref
-// in two different loop nestst.
+// in two different loop nests.
 struct GreedyFusion {
 public:
   MemRefDependenceGraph *mdg;
@@ -1194,8 +1212,9 @@
           if (mdg->getInEdgeCount(srcNode->id, memref) != 0)
             continue;
 
-          // Skip if 'srcNode' has out edges to other memrefs after 'dstId'.
-          if (mdg->getMinOutEdgeNodeId(srcNode->id, memref) < dstId)
+          // Skip if 'srcNode' has out edges on memrefs other than 'memref'
+          // for nodes in instruction list range (srcNode.inst, dstNode.inst).
+          if (mdg->hasDependenceTargetInRange(srcNode->id, dstNode->id, memref))
             continue;
 
           // Check if fusion would be profitable.
commit	be5616414020f2980857e958b13ec08dbd47ddcb	[log] [tgz]
author	MLIR Team <no-reply@google.com>	Thu Jan 24 22:27:40 2019 -0800
committer	jpienaar <jpienaar@google.com>	Fri Mar 29 15:35:53 2019 -0700
tree	c011a08a0f4541124ddad91417867f05b2e42a19
parent	a6cc5d87d1b8eca94efe937e250403dfc2018769 [diff]