Rewrite topological sort order and improve GVN.

Rewrite the topological sort order to include a full loop
before the blocks that go after the loop. Add a new iterator
class LoopRepeatingTopologicalSortIterator that differs from
the RepeatingTopologicalSortIterator by repeating only loops
and repeating them early. It returns to the loop head if the
head needs recalculation when we reach the end of the loop.

In GVN, use the new loop-repeating topological sort iterator
and for a loop head merge only the preceding blocks' LVNs
if we're not currently recalculating this loop.

Also fix LocalValueNumbering::InPlaceIntersectMaps() which
was keeping only the last element of the intersection, avoid
some unnecessary processing during LVN merge and add some
missing braces to MIRGraph::InferTypeAndSize().

Bug: 16398693
Change-Id: I4e10d4acb626a5b8a28ec0de106a7b37f9cbca32
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index d75644d..2cba0ec 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -127,6 +127,7 @@
   runtime/reflection_test.cc \
   compiler/dex/global_value_numbering_test.cc \
   compiler/dex/local_value_numbering_test.cc \
+  compiler/dex/mir_graph_test.cc \
   compiler/dex/mir_optimization_test.cc \
   compiler/driver/compiler_driver_test.cc \
   compiler/elf_writer_test.cc \
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index d1d5ad9..7395324 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -172,7 +172,7 @@
 class ClassInitCheckElimination : public PassME {
  public:
   ClassInitCheckElimination()
-    : PassME("ClInitCheckElimination", kRepeatingTopologicalSortTraversal) {
+    : PassME("ClInitCheckElimination", kLoopRepeatingTopologicalSortTraversal) {
   }
 
   bool Gate(const PassDataHolder* data) const {
@@ -207,17 +207,17 @@
 class GlobalValueNumberingPass : public PassME {
  public:
   GlobalValueNumberingPass()
-    : PassME("GVN", kRepeatingTopologicalSortTraversal, "4_post_gvn_cfg") {
+    : PassME("GVN", kLoopRepeatingTopologicalSortTraversal, "4_post_gvn_cfg") {
   }
 
-  bool Gate(const PassDataHolder* data) const {
+  bool Gate(const PassDataHolder* data) const OVERRIDE {
     DCHECK(data != nullptr);
     CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
     return cUnit->mir_graph->ApplyGlobalValueNumberingGate();
   }
 
-  bool Worker(const PassDataHolder* data) const {
+  bool Worker(const PassDataHolder* data) const OVERRIDE {
     DCHECK(data != nullptr);
     const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
     CompilationUnit* cUnit = pass_me_data_holder->c_unit;
@@ -227,7 +227,7 @@
     return cUnit->mir_graph->ApplyGlobalValueNumbering(bb);
   }
 
-  void End(PassDataHolder* data) const {
+  void End(PassDataHolder* data) const OVERRIDE {
     DCHECK(data != nullptr);
     CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
diff --git a/compiler/dex/dataflow_iterator-inl.h b/compiler/dex/dataflow_iterator-inl.h
index f8b9c1a..d1abf7f 100644
--- a/compiler/dex/dataflow_iterator-inl.h
+++ b/compiler/dex/dataflow_iterator-inl.h
@@ -121,6 +121,56 @@
   return res;
 }
 
+inline BasicBlock* LoopRepeatingTopologicalSortIterator::Next(bool had_change) {
+  if (idx_ != 0) {
+    // Mark last processed block visited.
+    BasicBlock* bb = mir_graph_->GetBasicBlock(block_id_list_->Get(idx_ - 1));
+    bb->visited = true;
+    if (had_change) {
+      // If we had a change we need to revisit the children.
+      ChildBlockIterator iter(bb, mir_graph_);
+      for (BasicBlock* child_bb = iter.Next(); child_bb != nullptr; child_bb = iter.Next()) {
+        child_bb->visited = false;
+      }
+    }
+  }
+
+  while (true) {
+    // Pop loops we have left and check if we need to recalculate one of them.
+    // NOTE: We need to do this even if idx_ == end_idx_.
+    while (loop_head_stack_->Size() != 0u &&
+        loop_ends_->Get(loop_head_stack_->Peek().first) == idx_) {
+      auto top = loop_head_stack_->Peek();
+      uint16_t loop_head_idx = top.first;
+      bool recalculated = top.second;
+      loop_head_stack_->Pop();
+      BasicBlock* loop_head = mir_graph_->GetBasicBlock(block_id_list_->Get(loop_head_idx));
+      DCHECK(loop_head != nullptr);
+      if (!recalculated || !loop_head->visited) {
+        loop_head_stack_->Insert(std::make_pair(loop_head_idx, true));  // Recalculating this loop.
+        idx_ = loop_head_idx + 1;
+        return loop_head;
+      }
+    }
+
+    if (idx_ == end_idx_) {
+      return nullptr;
+    }
+
+    // Get next block and return it if unvisited.
+    BasicBlockId idx = idx_;
+    idx_ += 1;
+    BasicBlock* bb = mir_graph_->GetBasicBlock(block_id_list_->Get(idx));
+    DCHECK(bb != nullptr);
+    if (!bb->visited) {
+      if (loop_ends_->Get(idx) != 0u) {
+        loop_head_stack_->Insert(std::make_pair(idx, false));  // Not recalculating.
+      }
+      return bb;
+    }
+  }
+}
+
 }  // namespace art
 
 #endif  // ART_COMPILER_DEX_DATAFLOW_ITERATOR_INL_H_
diff --git a/compiler/dex/dataflow_iterator.h b/compiler/dex/dataflow_iterator.h
index 66c524f..06d6832 100644
--- a/compiler/dex/dataflow_iterator.h
+++ b/compiler/dex/dataflow_iterator.h
@@ -388,6 +388,52 @@
      }
   };
 
+  /**
+   * @class LoopRepeatingTopologicalSortIterator
+   * @brief Used to perform a Topological Sort Iteration of a MIRGraph, repeating loops as needed.
+   * @details The iterator uses the visited flags to keep track of the blocks that need
+   * recalculation and keeps a stack of loop heads in the MIRGraph. At the end of the loop
+   * it returns back to the loop head if it needs to be recalculated. Due to the use of
+   * the visited flags and the loop head stack in the MIRGraph, it's not possible to use
+   * two iterators at the same time or modify this data during iteration (though inspection
+   * of this data is allowed and sometimes even expected).
+   *
+   * NOTE: This iterator is not suitable for passes that need to propagate changes to
+   * predecessors, such as type inferrence.
+   */
+  class LoopRepeatingTopologicalSortIterator : public DataflowIterator {
+    public:
+     /**
+      * @brief The constructor, using all of the reachable blocks of the MIRGraph.
+      * @param mir_graph The MIRGraph considered.
+      */
+     explicit LoopRepeatingTopologicalSortIterator(MIRGraph* mir_graph)
+         : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder()->Size()),
+           loop_ends_(mir_graph->GetTopologicalSortOrderLoopEnds()),
+           loop_head_stack_(mir_graph_->GetTopologicalSortOrderLoopHeadStack()) {
+       // Extra setup for RepeatingTopologicalSortIterator.
+       idx_ = start_idx_;
+       block_id_list_ = mir_graph->GetTopologicalSortOrder();
+       // Clear visited flags and check that the loop head stack is empty.
+       mir_graph->ClearAllVisitedFlags();
+       DCHECK_EQ(loop_head_stack_->Size(), 0u);
+     }
+
+     ~LoopRepeatingTopologicalSortIterator() {
+       DCHECK_EQ(loop_head_stack_->Size(), 0u);
+     }
+
+     /**
+      * @brief Get the next BasicBlock depending on iteration order.
+      * @param had_change did the user of the iteration change the previous BasicBlock.
+      * @return the next BasicBlock following the iteration order, 0 if finished.
+      */
+     virtual BasicBlock* Next(bool had_change = false) OVERRIDE;
+
+    private:
+     const GrowableArray<BasicBlockId>* const loop_ends_;
+     GrowableArray<std::pair<uint16_t, bool>>* const loop_head_stack_;
+  };
 
 }  // namespace art
 
diff --git a/compiler/dex/global_value_numbering.cc b/compiler/dex/global_value_numbering.cc
index 614e826..d86be4e 100644
--- a/compiler/dex/global_value_numbering.cc
+++ b/compiler/dex/global_value_numbering.cc
@@ -22,8 +22,10 @@
 
 GlobalValueNumbering::GlobalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator)
     : cu_(cu),
+      mir_graph_(cu->mir_graph.get()),
       allocator_(allocator),
-      repeat_count_(0u),
+      bbs_processed_(0u),
+      max_bbs_to_process_(kMaxBbsToProcessMultiplyFactor * mir_graph_->GetNumReachableBlocks()),
       last_value_(0u),
       modifications_allowed_(false),
       global_value_map_(std::less<uint64_t>(), allocator->Adapter()),
@@ -32,10 +34,9 @@
       array_location_map_(ArrayLocationComparator(), allocator->Adapter()),
       array_location_reverse_map_(allocator->Adapter()),
       ref_set_map_(std::less<ValueNameSet>(), allocator->Adapter()),
-      lvns_(cu_->mir_graph->GetNumBlocks(), nullptr, allocator->Adapter()),
+      lvns_(mir_graph_->GetNumBlocks(), nullptr, allocator->Adapter()),
       work_lvn_(nullptr),
       merge_lvns_(allocator->Adapter()) {
-  cu_->mir_graph->ClearAllVisitedFlags();
 }
 
 GlobalValueNumbering::~GlobalValueNumbering() {
@@ -46,21 +47,15 @@
   if (UNLIKELY(!Good())) {
     return nullptr;
   }
-  if (bb->data_flow_info == nullptr) {
+  if (UNLIKELY(bb->data_flow_info == nullptr)) {
     return nullptr;
   }
-  if (bb->block_type == kEntryBlock) {
-    repeat_count_ += 1u;
-    if (repeat_count_ > kMaxRepeatCount) {
-      last_value_ = kNoValue;  // Make bad.
-      return nullptr;
-    }
-  }
-  if (bb->block_type == kExitBlock) {
+  if (UNLIKELY(bb->block_type == kExitBlock)) {
     DCHECK(bb->first_mir_insn == nullptr);
     return nullptr;
   }
-  if (bb->visited) {
+  if (UNLIKELY(bbs_processed_ == max_bbs_to_process_)) {
+    last_value_ = kNoValue;  // Make bad.
     return nullptr;
   }
   DCHECK(work_lvn_.get() == nullptr);
@@ -72,13 +67,34 @@
       work_lvn_->SetSRegNullChecked(this_reg);
     }
   } else {
-    // Merge all incoming arcs.
     // To avoid repeated allocation on the ArenaStack, reuse a single vector kept as a member.
     DCHECK(merge_lvns_.empty());
+    // If we're running the full GVN, the RepeatingTopologicalSortIterator keeps the loop
+    // head stack in the MIRGraph up to date and for a loop head we need to check whether
+    // we're making the initial computation and need to merge only preceding blocks in the
+    // topological order, or we're recalculating a loop head and need to merge all incoming
+    // LVNs. When we're not at a loop head (including having an empty loop head stack) all
+    // predecessors should be preceding blocks and we shall merge all of them anyway.
+    //
+    // If we're running the modification phase of the full GVN, the loop head stack will be
+    // empty and we need to merge all incoming LVNs. If we're running just a simple LVN,
+    // the loop head stack will also be empty and there will be nothing to merge anyway.
+    bool use_all_predecessors = true;
+    uint16_t loop_head_idx = 0u;  // Used only if !use_all_predecessors.
+    if (mir_graph_->GetTopologicalSortOrderLoopHeadStack()->Size() != 0) {
+      // Full GVN inside a loop, see if we're at the loop head for the first time.
+      auto top = mir_graph_->GetTopologicalSortOrderLoopHeadStack()->Peek();
+      loop_head_idx = top.first;
+      bool recalculating = top.second;
+      use_all_predecessors = recalculating ||
+          loop_head_idx != mir_graph_->GetTopologicalSortOrderIndexes()->Get(bb->id);
+    }
     GrowableArray<BasicBlockId>::Iterator iter(bb->predecessors);
-    for (BasicBlock* pred_bb = cu_->mir_graph->GetBasicBlock(iter.Next());
-         pred_bb != nullptr; pred_bb = cu_->mir_graph->GetBasicBlock(iter.Next())) {
-      if (lvns_[pred_bb->id] != nullptr) {
+    for (BasicBlock* pred_bb = mir_graph_->GetBasicBlock(iter.Next());
+         pred_bb != nullptr; pred_bb = mir_graph_->GetBasicBlock(iter.Next())) {
+      if (lvns_[pred_bb->id] != nullptr &&
+          (use_all_predecessors ||
+              mir_graph_->GetTopologicalSortOrderIndexes()->Get(pred_bb->id) < loop_head_idx)) {
         merge_lvns_.push_back(lvns_[pred_bb->id]);
       }
     }
@@ -87,19 +103,22 @@
     if (bb->catch_entry) {
       merge_type = LocalValueNumbering::kCatchMerge;
     } else if (bb->last_mir_insn != nullptr &&
-        (bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN ||
+        (bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN_VOID ||
+         bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN ||
          bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN_OBJECT ||
          bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN_WIDE) &&
         (bb->first_mir_insn == bb->last_mir_insn ||
-         (bb->first_mir_insn->next == bb->last_mir_insn &&
-          static_cast<int>(bb->first_mir_insn->dalvikInsn.opcode) == kMirOpPhi))) {
+         (static_cast<int>(bb->first_mir_insn->dalvikInsn.opcode) == kMirOpPhi &&
+          (bb->first_mir_insn->next == bb->last_mir_insn ||
+           (static_cast<int>(bb->first_mir_insn->next->dalvikInsn.opcode) == kMirOpPhi &&
+            bb->first_mir_insn->next->next == bb->last_mir_insn))))) {
       merge_type = LocalValueNumbering::kReturnMerge;
     }
     // At least one predecessor must have been processed before this bb.
     CHECK(!merge_lvns_.empty());
     if (merge_lvns_.size() == 1u) {
       work_lvn_->MergeOne(*merge_lvns_[0], merge_type);
-      BasicBlock* pred_bb = cu_->mir_graph->GetBasicBlock(merge_lvns_[0]->Id());
+      BasicBlock* pred_bb = mir_graph_->GetBasicBlock(merge_lvns_[0]->Id());
       if (HasNullCheckLastInsn(pred_bb, bb->id)) {
         work_lvn_->SetSRegNullChecked(pred_bb->last_mir_insn->ssa_rep->uses[0]);
       }
@@ -112,32 +131,13 @@
 
 bool GlobalValueNumbering::FinishBasicBlock(BasicBlock* bb) {
   DCHECK(work_lvn_ != nullptr);
-  DCHECK(bb->id == work_lvn_->Id());
+  DCHECK_EQ(bb->id, work_lvn_->Id());
+  ++bbs_processed_;
   merge_lvns_.clear();
 
-  bool change = false;
-  // Look for a branch to self or an already processed child.
-  // (No need to repeat the LVN if all children are processed later.)
-  ChildBlockIterator iter(bb, cu_->mir_graph.get());
-  for (BasicBlock* child = iter.Next(); child != nullptr; child = iter.Next()) {
-    if (child == bb || lvns_[child->id] != nullptr) {
-      // If we found an already processed child, check if the LVN actually differs.
-      change = (lvns_[bb->id] == nullptr || !lvns_[bb->id]->Equals(*work_lvn_));
-      break;
-    }
-  }
-
   std::unique_ptr<const LocalValueNumbering> old_lvn(lvns_[bb->id]);
   lvns_[bb->id] = work_lvn_.release();
-
-  bb->visited = true;
-  if (change) {
-    ChildBlockIterator iter(bb, cu_->mir_graph.get());
-    for (BasicBlock* child = iter.Next(); child != nullptr; child = iter.Next()) {
-      child->visited = false;
-    }
-  }
-  return change;
+  return (old_lvn == nullptr) || !old_lvn->Equals(*lvns_[bb->id]);
 }
 
 uint16_t GlobalValueNumbering::GetFieldId(const MirFieldInfo& field_info, uint16_t type) {
@@ -188,7 +188,7 @@
     uint16_t value_name = merge_names[i];
     if (!pred_lvn->IsValueNullChecked(value_name)) {
       // Check if the predecessor has an IF_EQZ/IF_NEZ as the last insn.
-      const BasicBlock* pred_bb = cu_->mir_graph->GetBasicBlock(pred_lvn->Id());
+      const BasicBlock* pred_bb = mir_graph_->GetBasicBlock(pred_lvn->Id());
       if (!HasNullCheckLastInsn(pred_bb, work_lvn_->Id())) {
         return false;
       }
diff --git a/compiler/dex/global_value_numbering.h b/compiler/dex/global_value_numbering.h
index 7ab77b7..a12a779 100644
--- a/compiler/dex/global_value_numbering.h
+++ b/compiler/dex/global_value_numbering.h
@@ -31,7 +31,10 @@
   GlobalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator);
   ~GlobalValueNumbering();
 
+  // Prepare LVN for the basic block.
   LocalValueNumbering* PrepareBasicBlock(BasicBlock* bb);
+
+  // Finish processing the basic block.
   bool FinishBasicBlock(BasicBlock* bb);
 
   // Checks that the value names didn't overflow.
@@ -42,7 +45,6 @@
   // Allow modifications.
   void AllowModifications() {
     DCHECK(Good());
-    cu_->mir_graph->ClearAllVisitedFlags();
     modifications_allowed_ = true;
   }
 
@@ -182,7 +184,7 @@
   }
 
   const BasicBlock* GetBasicBlock(uint16_t bb_id) const {
-    return cu_->mir_graph->GetBasicBlock(bb_id);
+    return mir_graph_->GetBasicBlock(bb_id);
   }
 
   static bool HasNullCheckLastInsn(const BasicBlock* pred_bb, BasicBlockId succ_id);
@@ -194,7 +196,7 @@
   }
 
   MIRGraph* GetMirGraph() const {
-    return cu_->mir_graph.get();
+    return mir_graph_;
   }
 
   ScopedArenaAllocator* Allocator() const {
@@ -202,12 +204,16 @@
   }
 
   CompilationUnit* const cu_;
+  MIRGraph* mir_graph_;
   ScopedArenaAllocator* const allocator_;
 
-  static constexpr uint32_t kMaxRepeatCount = 10u;
+  // The number of BBs that we need to process grows exponentially with the number
+  // of nested loops. Don't allow excessive processing for too many nested loops or
+  // otherwise expensive methods.
+  static constexpr uint32_t kMaxBbsToProcessMultiplyFactor = 20u;
 
-  // Track the repeat count to make sure the GVN converges quickly and abort the GVN otherwise.
-  uint32_t repeat_count_;
+  uint32_t bbs_processed_;
+  uint32_t max_bbs_to_process_;
 
   // We have 32-bit last_value_ so that we can detect when we run out of value names, see Good().
   // We usually don't check Good() until the end of LVN unless we're about to modify code.
diff --git a/compiler/dex/global_value_numbering_test.cc b/compiler/dex/global_value_numbering_test.cc
index 18adbab..c82d231 100644
--- a/compiler/dex/global_value_numbering_test.cc
+++ b/compiler/dex/global_value_numbering_test.cc
@@ -273,23 +273,20 @@
   }
 
   void PerformGVN() {
-    cu_.mir_graph->SSATransformationStart();
-    cu_.mir_graph->ComputeDFSOrders();
-    cu_.mir_graph->ComputeDominators();
-    cu_.mir_graph->ComputeTopologicalSortOrder();
-    cu_.mir_graph->SSATransformationEnd();
-    DoPerformGVN<RepeatingPreOrderDfsIterator>();
+    DoPerformGVN<LoopRepeatingTopologicalSortIterator>();
   }
 
   void PerformPreOrderDfsGVN() {
-    cu_.mir_graph->SSATransformationStart();
-    cu_.mir_graph->ComputeDFSOrders();
-    cu_.mir_graph->SSATransformationEnd();
     DoPerformGVN<RepeatingPreOrderDfsIterator>();
   }
 
   template <typename IteratorType>
   void DoPerformGVN() {
+    cu_.mir_graph->SSATransformationStart();
+    cu_.mir_graph->ComputeDFSOrders();
+    cu_.mir_graph->ComputeDominators();
+    cu_.mir_graph->ComputeTopologicalSortOrder();
+    cu_.mir_graph->SSATransformationEnd();
     ASSERT_TRUE(gvn_ == nullptr);
     gvn_.reset(new (allocator_.get()) GlobalValueNumbering(&cu_, allocator_.get()));
     ASSERT_FALSE(gvn_->CanModify());
@@ -313,7 +310,7 @@
     ASSERT_TRUE(gvn_->Good());
     ASSERT_FALSE(gvn_->CanModify());
     gvn_->AllowModifications();
-    PreOrderDfsIterator iterator(cu_.mir_graph.get());
+    TopologicalSortIterator iterator(cu_.mir_graph.get());
     for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) {
       LocalValueNumbering* lvn = gvn_->PrepareBasicBlock(bb);
       if (lvn != nullptr) {
@@ -340,7 +337,6 @@
     cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
     cu_.access_flags = kAccStatic;  // Don't let "this" interfere with this test.
     allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
-    // gvn_->AllowModifications();
   }
 
   ArenaPool pool_;
@@ -1917,7 +1913,7 @@
   PerformPreOrderDfsGVN();
 }
 
-TEST_F(GlobalValueNumberingTestTwoConsecutiveLoops, DISABLED_IFieldAndPhi) {
+TEST_F(GlobalValueNumberingTestTwoConsecutiveLoops, IFieldAndPhi) {
   static const IFieldDef ifields[] = {
       { 0u, 1u, 0u, false },  // Int.
   };
@@ -1954,7 +1950,7 @@
   EXPECT_EQ(value_names_[5], value_names_[12]);
 }
 
-TEST_F(GlobalValueNumberingTestTwoConsecutiveLoops, DISABLED_NullCheck) {
+TEST_F(GlobalValueNumberingTestTwoConsecutiveLoops, NullCheck) {
   static const IFieldDef ifields[] = {
       { 0u, 1u, 0u, false },  // Int.
   };
@@ -2024,14 +2020,10 @@
   EXPECT_NE(value_names_[2], value_names_[6]);
   EXPECT_NE(value_names_[3], value_names_[7]);
   EXPECT_NE(value_names_[4], value_names_[8]);
-  EXPECT_NE(value_names_[0], value_names_[12]);
-  EXPECT_NE(value_names_[1], value_names_[13]);
-  EXPECT_NE(value_names_[2], value_names_[14]);
-  EXPECT_NE(value_names_[3], value_names_[15]);
   EXPECT_EQ(value_names_[4], value_names_[12]);
-  EXPECT_NE(value_names_[5], value_names_[13]);
-  EXPECT_NE(value_names_[6], value_names_[14]);
-  EXPECT_NE(value_names_[7], value_names_[15]);
+  EXPECT_EQ(value_names_[5], value_names_[13]);
+  EXPECT_EQ(value_names_[6], value_names_[14]);
+  EXPECT_EQ(value_names_[7], value_names_[15]);
   EXPECT_EQ(value_names_[12], value_names_[20]);
   EXPECT_EQ(value_names_[13], value_names_[21]);
   EXPECT_EQ(value_names_[14], value_names_[22]);
@@ -2049,7 +2041,7 @@
   }
 }
 
-TEST_F(GlobalValueNumberingTestTwoNestedLoops, DISABLED_IFieldAndPhi) {
+TEST_F(GlobalValueNumberingTestTwoNestedLoops, IFieldAndPhi) {
   static const IFieldDef ifields[] = {
       { 0u, 1u, 0u, false },  // Int.
   };
@@ -2097,7 +2089,7 @@
   static const BBDef bbs[] = {
       DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
       DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
-      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(4)),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)),
       DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
       DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED1(3)),
       DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(3, 4)),
@@ -2108,6 +2100,15 @@
   PrepareBasicBlocks(bbs);
   BasicBlock* catch_handler = cu_.mir_graph->GetBasicBlock(5u);
   catch_handler->catch_entry = true;
+  // Add successor block info to the check block.
+  BasicBlock* check_bb = cu_.mir_graph->GetBasicBlock(3u);
+  check_bb->successor_block_list_type = kCatch;
+  check_bb->successor_blocks = new (&cu_.arena) GrowableArray<SuccessorBlockInfo*>(
+      &cu_.arena, 2, kGrowableArraySuccessorBlocks);
+  SuccessorBlockInfo* successor_block_info = reinterpret_cast<SuccessorBlockInfo*>
+      (cu_.arena.Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessor));
+  successor_block_info->block = catch_handler->id;
+  check_bb->successor_blocks->Insert(successor_block_info);
   BasicBlock* merge_block = cu_.mir_graph->GetBasicBlock(4u);
   std::swap(merge_block->taken, merge_block->fall_through);
   PrepareMIRs(mirs);
diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc
index ef893fe..0e072ec 100644
--- a/compiler/dex/local_value_numbering.cc
+++ b/compiler/dex/local_value_numbering.cc
@@ -534,6 +534,10 @@
          (!cmp(entry, *work_it) && !(work_it->second == entry.second)))) {
       work_it = work_map->erase(work_it);
     }
+    if (work_it == work_end) {
+      return;
+    }
+    ++work_it;
   }
 }
 
@@ -855,13 +859,18 @@
   MergeMemoryVersions(merge_type == kCatchMerge);
 
   // Merge non-aliasing maps/sets.
-  MergeSets<IFieldLocToValueMap, &LocalValueNumbering::non_aliasing_ifield_value_map_,
-            &LocalValueNumbering::MergeNonAliasingIFieldValues>();
-  MergeSets<NonAliasingArrayValuesMap, &LocalValueNumbering::non_aliasing_array_value_map_,
-            &LocalValueNumbering::MergeAliasingValues<
-                NonAliasingArrayValuesMap, &LocalValueNumbering::non_aliasing_array_value_map_,
-                NonAliasingArrayVersions>>();
   IntersectSets<ValueNameSet, &LocalValueNumbering::non_aliasing_refs_>();
+  if (!non_aliasing_refs_.empty() && merge_type == kCatchMerge) {
+    PruneNonAliasingRefsForCatch();
+  }
+  if (!non_aliasing_refs_.empty()) {
+    MergeSets<IFieldLocToValueMap, &LocalValueNumbering::non_aliasing_ifield_value_map_,
+              &LocalValueNumbering::MergeNonAliasingIFieldValues>();
+    MergeSets<NonAliasingArrayValuesMap, &LocalValueNumbering::non_aliasing_array_value_map_,
+              &LocalValueNumbering::MergeAliasingValues<
+                  NonAliasingArrayValuesMap, &LocalValueNumbering::non_aliasing_array_value_map_,
+                  NonAliasingArrayVersions>>();
+  }
 
   // We won't do anything complicated for range checks, just calculate the intersection.
   IntersectSets<RangeCheckSet, &LocalValueNumbering::range_checked_>();
@@ -872,7 +881,6 @@
 
   if (merge_type == kCatchMerge) {
     // Memory is clobbered. New memory version already created, don't merge aliasing locations.
-    PruneNonAliasingRefsForCatch();
     return;
   }
 
@@ -1361,8 +1369,8 @@
     case Instruction::MONITOR_EXIT:
       HandleNullCheck(mir, GetOperandValue(mir->ssa_rep->uses[0]));
       // If we're running GVN and CanModify(), uneliminated null check indicates bytecode error.
-      if ((gvn_->cu_->disable_opt & (1 << kGlobalValueNumbering)) == 0 && gvn_->CanModify() &&
-          (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0) {
+      if ((gvn_->GetCompilationUnit()->disable_opt & (1u << kGlobalValueNumbering)) == 0u &&
+          gvn_->CanModify() && (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0) {
         LOG(WARNING) << "Bytecode error: MONITOR_EXIT is still null checked at 0x" << std::hex
             << mir->offset << " in " << PrettyMethod(gvn_->cu_->method_idx, *gvn_->cu_->dex_file);
       }
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 1c8a9b5..331af21 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -84,6 +84,9 @@
       dfs_post_order_(NULL),
       dom_post_order_traversal_(NULL),
       topological_order_(nullptr),
+      topological_order_loop_ends_(nullptr),
+      topological_order_indexes_(nullptr),
+      topological_order_loop_head_stack_(nullptr),
       i_dom_list_(NULL),
       def_block_matrix_(NULL),
       temp_scoped_alloc_(),
@@ -1526,117 +1529,248 @@
   temp_scoped_alloc_.reset();
 }
 
-void MIRGraph::ComputeTopologicalSortOrder() {
-  // Clear the nodes.
-  ClearAllVisitedFlags();
-
-  // Create the topological order if need be.
-  if (topological_order_ == nullptr) {
-    topological_order_ = new (arena_) GrowableArray<BasicBlockId>(arena_, GetNumBlocks());
-  }
-  topological_order_->Reset();
-
-  ScopedArenaAllocator allocator(&cu_->arena_stack);
-  ScopedArenaQueue<BasicBlock*> q(allocator.Adapter());
-  ScopedArenaVector<size_t> visited_cnt_values(GetNumBlocks(), 0u, allocator.Adapter());
-
-  // Set up visitedCntValues map for all BB. The default value for this counters in the map is zero.
-  // also fill initial queue.
-  GrowableArray<BasicBlock*>::Iterator iterator(&block_list_);
-
-  size_t num_blocks = 0u;
-  while (true) {
-    BasicBlock* bb = iterator.Next();
-
-    if (bb == nullptr) {
-      break;
+static BasicBlock* SelectTopologicalSortOrderFallBack(
+    MIRGraph* mir_graph, const ArenaBitVector* current_loop,
+    const ScopedArenaVector<size_t>* visited_cnt_values, ScopedArenaAllocator* allocator,
+    ScopedArenaVector<BasicBlockId>* tmp_stack) {
+  // No true loop head has been found but there may be true loop heads after the mess we need
+  // to resolve. To avoid taking one of those, pick the candidate with the highest number of
+  // reachable unvisited nodes. That candidate will surely be a part of a loop.
+  BasicBlock* fall_back = nullptr;
+  size_t fall_back_num_reachable = 0u;
+  // Reuse the same bit vector for each candidate to mark reachable unvisited blocks.
+  ArenaBitVector candidate_reachable(allocator, mir_graph->GetNumBlocks(), false, kBitMapMisc);
+  AllNodesIterator iter(mir_graph);
+  for (BasicBlock* candidate = iter.Next(); candidate != nullptr; candidate = iter.Next()) {
+    if (candidate->hidden ||                            // Hidden, or
+        candidate->visited ||                           // already processed, or
+        (*visited_cnt_values)[candidate->id] == 0u ||   // no processed predecessors, or
+        (current_loop != nullptr &&                     // outside current loop.
+         !current_loop->IsBitSet(candidate->id))) {
+      continue;
     }
+    DCHECK(tmp_stack->empty());
+    tmp_stack->push_back(candidate->id);
+    candidate_reachable.ClearAllBits();
+    size_t num_reachable = 0u;
+    while (!tmp_stack->empty()) {
+      BasicBlockId current_id = tmp_stack->back();
+      tmp_stack->pop_back();
+      BasicBlock* current_bb = mir_graph->GetBasicBlock(current_id);
+      DCHECK(current_bb != nullptr);
+      ChildBlockIterator child_iter(current_bb, mir_graph);
+      BasicBlock* child_bb = child_iter.Next();
+      for ( ; child_bb != nullptr; child_bb = child_iter.Next()) {
+        DCHECK(!child_bb->hidden);
+        if (child_bb->visited ||                            // Already processed, or
+            (current_loop != nullptr &&                     // outside current loop.
+             !current_loop->IsBitSet(child_bb->id))) {
+          continue;
+        }
+        if (!candidate_reachable.IsBitSet(child_bb->id)) {
+          candidate_reachable.SetBit(child_bb->id);
+          tmp_stack->push_back(child_bb->id);
+          num_reachable += 1u;
+        }
+      }
+    }
+    if (fall_back_num_reachable < num_reachable) {
+      fall_back_num_reachable = num_reachable;
+      fall_back = candidate;
+    }
+  }
+  return fall_back;
+}
 
+// Compute from which unvisited blocks is bb_id reachable through unvisited blocks.
+static void ComputeUnvisitedReachableFrom(MIRGraph* mir_graph, BasicBlockId bb_id,
+                                          ArenaBitVector* reachable,
+                                          ScopedArenaVector<BasicBlockId>* tmp_stack) {
+  // NOTE: Loop heads indicated by the "visited" flag.
+  DCHECK(tmp_stack->empty());
+  reachable->ClearAllBits();
+  tmp_stack->push_back(bb_id);
+  while (!tmp_stack->empty()) {
+    BasicBlockId current_id = tmp_stack->back();
+    tmp_stack->pop_back();
+    BasicBlock* current_bb = mir_graph->GetBasicBlock(current_id);
+    DCHECK(current_bb != nullptr);
+    GrowableArray<BasicBlockId>::Iterator iter(current_bb->predecessors);
+    BasicBlock* pred_bb = mir_graph->GetBasicBlock(iter.Next());
+    for ( ; pred_bb != nullptr; pred_bb = mir_graph->GetBasicBlock(iter.Next())) {
+      if (!pred_bb->visited && !reachable->IsBitSet(pred_bb->id)) {
+        reachable->SetBit(pred_bb->id);
+        tmp_stack->push_back(pred_bb->id);
+      }
+    }
+  }
+}
+
+void MIRGraph::ComputeTopologicalSortOrder() {
+  ScopedArenaAllocator allocator(&cu_->arena_stack);
+  unsigned int num_blocks = GetNumBlocks();
+
+  ScopedArenaQueue<BasicBlock*> q(allocator.Adapter());
+  ScopedArenaVector<size_t> visited_cnt_values(num_blocks, 0u, allocator.Adapter());
+  ScopedArenaVector<BasicBlockId> loop_head_stack(allocator.Adapter());
+  size_t max_nested_loops = 0u;
+  ArenaBitVector loop_exit_blocks(&allocator, num_blocks, false, kBitMapMisc);
+  loop_exit_blocks.ClearAllBits();
+
+  // Count the number of blocks to process and add the entry block(s).
+  GrowableArray<BasicBlock*>::Iterator iterator(&block_list_);
+  unsigned int num_blocks_to_process = 0u;
+  for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) {
     if (bb->hidden == true) {
       continue;
     }
 
-    num_blocks += 1u;
-    size_t unvisited_predecessor_count = bb->predecessors->Size();
+    num_blocks_to_process += 1u;
 
-    GrowableArray<BasicBlockId>::Iterator pred_iterator(bb->predecessors);
-    // To process loops we should not wait for dominators.
-    while (true) {
-      BasicBlock* pred_bb = GetBasicBlock(pred_iterator.Next());
-
-      if (pred_bb == nullptr) {
-        break;
-      }
-
-      // Skip the backward branch or hidden predecessor.
-      if (pred_bb->hidden ||
-          (pred_bb->dominators != nullptr && pred_bb->dominators->IsBitSet(bb->id))) {
-        unvisited_predecessor_count -= 1u;
-      }
-    }
-
-    visited_cnt_values[bb->id] = unvisited_predecessor_count;
-
-    // Add entry block to queue.
-    if (unvisited_predecessor_count == 0) {
+    if (bb->predecessors->Size() == 0u) {
+      // Add entry block to the queue.
       q.push(bb);
     }
   }
 
-  // We can get a cycle where none of the blocks dominates the other. Therefore don't
-  // stop when the queue is empty, continue until we've processed all the blocks.
-  AllNodesIterator candidate_iter(this);  // For the empty queue case.
-  while (num_blocks != 0u) {
-    num_blocks -= 1u;
+  // Create the topological order if need be.
+  if (topological_order_ == nullptr) {
+    topological_order_ = new (arena_) GrowableArray<BasicBlockId>(arena_, num_blocks);
+    topological_order_loop_ends_ = new (arena_) GrowableArray<uint16_t>(arena_, num_blocks);
+    topological_order_indexes_ = new (arena_) GrowableArray<uint16_t>(arena_, num_blocks);
+  }
+  topological_order_->Reset();
+  topological_order_loop_ends_->Reset();
+  topological_order_indexes_->Reset();
+  topological_order_loop_ends_->Resize(num_blocks);
+  topological_order_indexes_->Resize(num_blocks);
+  for (BasicBlockId i = 0; i != num_blocks; ++i) {
+    topological_order_loop_ends_->Insert(0u);
+    topological_order_indexes_->Insert(static_cast<uint16_t>(-1));
+  }
+
+  // Mark all blocks as unvisited.
+  ClearAllVisitedFlags();
+
+  // For loop heads, keep track from which blocks they are reachable not going through other
+  // loop heads. Other loop heads are excluded to detect the heads of nested loops. The children
+  // in this set go into the loop body, the other children are jumping over the loop.
+  ScopedArenaVector<ArenaBitVector*> loop_head_reachable_from(allocator.Adapter());
+  loop_head_reachable_from.resize(num_blocks, nullptr);
+  // Reuse the same temp stack whenever calculating a loop_head_reachable_from[loop_head_id].
+  ScopedArenaVector<BasicBlockId> tmp_stack(allocator.Adapter());
+
+  while (num_blocks_to_process != 0u) {
     BasicBlock* bb = nullptr;
     if (!q.empty()) {
+      num_blocks_to_process -= 1u;
       // Get top.
       bb = q.front();
       q.pop();
-    } else {
-      // Find some block we didn't visit yet that has at least one visited predecessor.
-      while (bb == nullptr) {
-        BasicBlock* candidate = candidate_iter.Next();
-        DCHECK(candidate != nullptr);
-        if (candidate->visited || candidate->hidden) {
-          continue;
-        }
-        GrowableArray<BasicBlockId>::Iterator iter(candidate->predecessors);
-        for (BasicBlock* pred_bb = GetBasicBlock(iter.Next()); pred_bb != nullptr;
-            pred_bb = GetBasicBlock(iter.Next())) {
-          if (!pred_bb->hidden && pred_bb->visited) {
-            bb = candidate;
-            break;
+      if (bb->visited) {
+        // Loop head: it was already processed, mark end and copy exit blocks to the queue.
+        DCHECK(q.empty()) << PrettyMethod(cu_->method_idx, *cu_->dex_file);
+        uint16_t idx = static_cast<uint16_t>(topological_order_->Size());
+        topological_order_loop_ends_->Put(topological_order_indexes_->Get(bb->id), idx);
+        DCHECK_EQ(loop_head_stack.back(), bb->id);
+        loop_head_stack.pop_back();
+        ArenaBitVector* reachable =
+            loop_head_stack.empty() ? nullptr : loop_head_reachable_from[loop_head_stack.back()];
+        for (BasicBlockId candidate_id : loop_exit_blocks.Indexes()) {
+          if (reachable == nullptr || reachable->IsBitSet(candidate_id)) {
+            q.push(GetBasicBlock(candidate_id));
+            // NOTE: The BitVectorSet::IndexIterator will not check the pointed-to bit again,
+            // so clearing the bit has no effect on the iterator.
+            loop_exit_blocks.ClearBit(candidate_id);
           }
         }
+        continue;
       }
+    } else {
+      // Find the new loop head.
+      AllNodesIterator iter(this);
+      while (true) {
+        BasicBlock* candidate = iter.Next();
+        if (candidate == nullptr) {
+          // We did not find a true loop head, fall back to a reachable block in any loop.
+          ArenaBitVector* current_loop =
+              loop_head_stack.empty() ? nullptr : loop_head_reachable_from[loop_head_stack.back()];
+          bb = SelectTopologicalSortOrderFallBack(this, current_loop, &visited_cnt_values,
+                                                  &allocator, &tmp_stack);
+          DCHECK(bb != nullptr) << PrettyMethod(cu_->method_idx, *cu_->dex_file);
+          if (kIsDebugBuild && cu_->dex_file != nullptr) {
+            LOG(INFO) << "Topological sort order: Using fall-back in "
+                << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " BB #" << bb->id
+                << " @0x" << std::hex << bb->start_offset
+                << ", num_blocks = " << std::dec << num_blocks;
+          }
+          break;
+        }
+        if (candidate->hidden ||                            // Hidden, or
+            candidate->visited ||                           // already processed, or
+            visited_cnt_values[candidate->id] == 0u ||      // no processed predecessors, or
+            (!loop_head_stack.empty() &&                    // outside current loop.
+             !loop_head_reachable_from[loop_head_stack.back()]->IsBitSet(candidate->id))) {
+          continue;
+        }
+
+        GrowableArray<BasicBlockId>::Iterator pred_iter(candidate->predecessors);
+        BasicBlock* pred_bb = GetBasicBlock(pred_iter.Next());
+        for ( ; pred_bb != nullptr; pred_bb = GetBasicBlock(pred_iter.Next())) {
+          if (pred_bb != candidate && !pred_bb->visited &&
+              !pred_bb->dominators->IsBitSet(candidate->id)) {
+            break;  // Keep non-null pred_bb to indicate failure.
+          }
+        }
+        if (pred_bb == nullptr) {
+          bb = candidate;
+          break;
+        }
+      }
+      // Compute blocks from which the loop head is reachable and process those blocks first.
+      ArenaBitVector* reachable =
+          new (&allocator) ArenaBitVector(&allocator, num_blocks, false, kBitMapMisc);
+      loop_head_reachable_from[bb->id] = reachable;
+      ComputeUnvisitedReachableFrom(this, bb->id, reachable, &tmp_stack);
+      // Now mark as loop head. (Even if it's only a fall back when we don't find a true loop.)
+      loop_head_stack.push_back(bb->id);
+      max_nested_loops = std::max(max_nested_loops, loop_head_stack.size());
     }
 
     DCHECK_EQ(bb->hidden, false);
     DCHECK_EQ(bb->visited, false);
-
-    // We've visited all the predecessors. So, we can visit bb.
     bb->visited = true;
 
     // Now add the basic block.
+    uint16_t idx = static_cast<uint16_t>(topological_order_->Size());
+    topological_order_indexes_->Put(bb->id, idx);
     topological_order_->Insert(bb->id);
 
-    // Reduce visitedCnt for all the successors and add into the queue ones with visitedCnt equals to zero.
+    // Update visited_cnt_values for children.
     ChildBlockIterator succIter(bb, this);
     BasicBlock* successor = succIter.Next();
     for ( ; successor != nullptr; successor = succIter.Next()) {
-      if (successor->visited || successor->hidden) {
+      if (successor->hidden) {
         continue;
       }
 
-      // one more predecessor was visited.
-      DCHECK_NE(visited_cnt_values[successor->id], 0u);
-      visited_cnt_values[successor->id] -= 1u;
-      if (visited_cnt_values[successor->id] == 0u) {
-        q.push(successor);
+      // One more predecessor was visited.
+      visited_cnt_values[successor->id] += 1u;
+      if (visited_cnt_values[successor->id] == successor->predecessors->Size()) {
+        if (loop_head_stack.empty() ||
+            loop_head_reachable_from[loop_head_stack.back()]->IsBitSet(successor->id)) {
+          q.push(successor);
+        } else {
+          DCHECK(!loop_exit_blocks.IsBitSet(successor->id));
+          loop_exit_blocks.SetBit(successor->id);
+        }
       }
     }
   }
+
+  // Prepare the loop head stack for iteration.
+  topological_order_loop_head_stack_ =
+      new (arena_) GrowableArray<std::pair<uint16_t, bool>>(arena_, max_nested_loops);
 }
 
 bool BasicBlock::IsExceptionBlock() const {
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 79b3edf..768ae21 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -27,6 +27,7 @@
 #include "mir_method_info.h"
 #include "utils/arena_bit_vector.h"
 #include "utils/growable_array.h"
+#include "utils/scoped_arena_containers.h"
 #include "reg_location.h"
 #include "reg_storage.h"
 
@@ -689,6 +690,21 @@
     return topological_order_;
   }
 
+  GrowableArray<BasicBlockId>* GetTopologicalSortOrderLoopEnds() {
+    DCHECK(topological_order_loop_ends_ != nullptr);
+    return topological_order_loop_ends_;
+  }
+
+  GrowableArray<BasicBlockId>* GetTopologicalSortOrderIndexes() {
+    DCHECK(topological_order_indexes_ != nullptr);
+    return topological_order_indexes_;
+  }
+
+  GrowableArray<std::pair<uint16_t, bool>>* GetTopologicalSortOrderLoopHeadStack() {
+    DCHECK(topological_order_loop_head_stack_ != nullptr);
+    return topological_order_loop_head_stack_;
+  }
+
   bool IsConst(int32_t s_reg) const {
     return is_constant_v_->IsBitSet(s_reg);
   }
@@ -1132,6 +1148,14 @@
   GrowableArray<BasicBlockId>* dfs_post_order_;
   GrowableArray<BasicBlockId>* dom_post_order_traversal_;
   GrowableArray<BasicBlockId>* topological_order_;
+  // Indexes in topological_order_ need to be only as big as the BasicBlockId.
+  COMPILE_ASSERT(sizeof(BasicBlockId) == sizeof(uint16_t), assuming_16_bit_BasicBlockId);
+  // For each loop head, remember the past-the-end index of the end of the loop. 0 if not loop head.
+  GrowableArray<uint16_t>* topological_order_loop_ends_;
+  // Map BB ids to topological_order_ indexes. 0xffff if not included (hidden or null block).
+  GrowableArray<uint16_t>* topological_order_indexes_;
+  // Stack of the loop head indexes and recalculation flags for RepeatingTopologicalSortIterator.
+  GrowableArray<std::pair<uint16_t, bool>>* topological_order_loop_head_stack_;
   int* i_dom_list_;
   ArenaBitVector** def_block_matrix_;    // num_dalvik_register x num_blocks.
   std::unique_ptr<ScopedArenaAllocator> temp_scoped_alloc_;
@@ -1177,6 +1201,7 @@
   friend class ClassInitCheckEliminationTest;
   friend class GlobalValueNumberingTest;
   friend class LocalValueNumberingTest;
+  friend class TopologicalSortOrderTest;
 };
 
 }  // namespace art
diff --git a/compiler/dex/mir_graph_test.cc b/compiler/dex/mir_graph_test.cc
new file mode 100644
index 0000000..932f453
--- /dev/null
+++ b/compiler/dex/mir_graph_test.cc
@@ -0,0 +1,381 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mir_graph.h"
+#include "gtest/gtest.h"
+
+namespace art {
+
+class TopologicalSortOrderTest : public testing::Test {
+ protected:
+  struct BBDef {
+    static constexpr size_t kMaxSuccessors = 4;
+    static constexpr size_t kMaxPredecessors = 4;
+
+    BBType type;
+    size_t num_successors;
+    BasicBlockId successors[kMaxPredecessors];
+    size_t num_predecessors;
+    BasicBlockId predecessors[kMaxPredecessors];
+  };
+
+#define DEF_SUCC0() \
+    0u, { }
+#define DEF_SUCC1(s1) \
+    1u, { s1 }
+#define DEF_SUCC2(s1, s2) \
+    2u, { s1, s2 }
+#define DEF_SUCC3(s1, s2, s3) \
+    3u, { s1, s2, s3 }
+#define DEF_SUCC4(s1, s2, s3, s4) \
+    4u, { s1, s2, s3, s4 }
+#define DEF_PRED0() \
+    0u, { }
+#define DEF_PRED1(p1) \
+    1u, { p1 }
+#define DEF_PRED2(p1, p2) \
+    2u, { p1, p2 }
+#define DEF_PRED3(p1, p2, p3) \
+    3u, { p1, p2, p3 }
+#define DEF_PRED4(p1, p2, p3, p4) \
+    4u, { p1, p2, p3, p4 }
+#define DEF_BB(type, succ, pred) \
+    { type, succ, pred }
+
+  void DoPrepareBasicBlocks(const BBDef* defs, size_t count) {
+    cu_.mir_graph->block_id_map_.clear();
+    cu_.mir_graph->block_list_.Reset();
+    ASSERT_LT(3u, count);  // null, entry, exit and at least one bytecode block.
+    ASSERT_EQ(kNullBlock, defs[0].type);
+    ASSERT_EQ(kEntryBlock, defs[1].type);
+    ASSERT_EQ(kExitBlock, defs[2].type);
+    for (size_t i = 0u; i != count; ++i) {
+      const BBDef* def = &defs[i];
+      BasicBlock* bb = cu_.mir_graph->NewMemBB(def->type, i);
+      cu_.mir_graph->block_list_.Insert(bb);
+      if (def->num_successors <= 2) {
+        bb->successor_block_list_type = kNotUsed;
+        bb->successor_blocks = nullptr;
+        bb->fall_through = (def->num_successors >= 1) ? def->successors[0] : 0u;
+        bb->taken = (def->num_successors >= 2) ? def->successors[1] : 0u;
+      } else {
+        bb->successor_block_list_type = kPackedSwitch;
+        bb->fall_through = 0u;
+        bb->taken = 0u;
+        bb->successor_blocks = new (&cu_.arena) GrowableArray<SuccessorBlockInfo*>(
+            &cu_.arena, def->num_successors, kGrowableArraySuccessorBlocks);
+        for (size_t j = 0u; j != def->num_successors; ++j) {
+          SuccessorBlockInfo* successor_block_info =
+              static_cast<SuccessorBlockInfo*>(cu_.arena.Alloc(sizeof(SuccessorBlockInfo),
+                                                               kArenaAllocSuccessor));
+          successor_block_info->block = j;
+          successor_block_info->key = 0u;  // Not used by class init check elimination.
+          bb->successor_blocks->Insert(successor_block_info);
+        }
+      }
+      bb->predecessors = new (&cu_.arena) GrowableArray<BasicBlockId>(
+          &cu_.arena, def->num_predecessors, kGrowableArrayPredecessors);
+      for (size_t j = 0u; j != def->num_predecessors; ++j) {
+        ASSERT_NE(0u, def->predecessors[j]);
+        bb->predecessors->Insert(def->predecessors[j]);
+      }
+      if (def->type == kDalvikByteCode || def->type == kEntryBlock || def->type == kExitBlock) {
+        bb->data_flow_info = static_cast<BasicBlockDataFlow*>(
+            cu_.arena.Alloc(sizeof(BasicBlockDataFlow), kArenaAllocDFInfo));
+      }
+    }
+    cu_.mir_graph->num_blocks_ = count;
+    ASSERT_EQ(count, cu_.mir_graph->block_list_.Size());
+    cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_.Get(1);
+    ASSERT_EQ(kEntryBlock, cu_.mir_graph->entry_block_->block_type);
+    cu_.mir_graph->exit_block_ = cu_.mir_graph->block_list_.Get(2);
+    ASSERT_EQ(kExitBlock, cu_.mir_graph->exit_block_->block_type);
+  }
+
+  template <size_t count>
+  void PrepareBasicBlocks(const BBDef (&defs)[count]) {
+    DoPrepareBasicBlocks(defs, count);
+  }
+
+  void ComputeTopologicalSortOrder() {
+    cu_.mir_graph->SSATransformationStart();
+    cu_.mir_graph->ComputeDFSOrders();
+    cu_.mir_graph->ComputeDominators();
+    cu_.mir_graph->ComputeTopologicalSortOrder();
+    cu_.mir_graph->SSATransformationEnd();
+    ASSERT_NE(cu_.mir_graph->topological_order_, nullptr);
+    ASSERT_NE(cu_.mir_graph->topological_order_loop_ends_, nullptr);
+    ASSERT_NE(cu_.mir_graph->topological_order_indexes_, nullptr);
+    ASSERT_EQ(cu_.mir_graph->GetNumBlocks(), cu_.mir_graph->topological_order_indexes_->Size());
+    for (size_t i = 0, size = cu_.mir_graph->GetTopologicalSortOrder()->Size(); i != size; ++i) {
+      ASSERT_LT(cu_.mir_graph->topological_order_->Get(i), cu_.mir_graph->GetNumBlocks());
+      BasicBlockId id = cu_.mir_graph->topological_order_->Get(i);
+      EXPECT_EQ(i, cu_.mir_graph->topological_order_indexes_->Get(id));
+    }
+  }
+
+  void DoCheckOrder(const BasicBlockId* ids, size_t count) {
+    ASSERT_EQ(count, cu_.mir_graph->GetTopologicalSortOrder()->Size());
+    for (size_t i = 0; i != count; ++i) {
+      EXPECT_EQ(ids[i], cu_.mir_graph->GetTopologicalSortOrder()->Get(i)) << i;
+    }
+  }
+
+  template <size_t count>
+  void CheckOrder(const BasicBlockId (&ids)[count]) {
+    DoCheckOrder(ids, count);
+  }
+
+  void DoCheckLoopEnds(const uint16_t* ends, size_t count) {
+    for (size_t i = 0; i != count; ++i) {
+      ASSERT_LT(i, cu_.mir_graph->GetTopologicalSortOrderLoopEnds()->Size());
+      EXPECT_EQ(ends[i], cu_.mir_graph->GetTopologicalSortOrderLoopEnds()->Get(i)) << i;
+    }
+  }
+
+  template <size_t count>
+  void CheckLoopEnds(const uint16_t (&ends)[count]) {
+    DoCheckLoopEnds(ends, count);
+  }
+
+  TopologicalSortOrderTest()
+      : pool_(),
+        cu_(&pool_) {
+    cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
+  }
+
+  ArenaPool pool_;
+  CompilationUnit cu_;
+};
+
+TEST_F(TopologicalSortOrderTest, DoWhile) {
+  const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 4), DEF_PRED2(3, 4)),  // "taken" loops to self.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(4)),
+  };
+  const BasicBlockId expected_order[] = {
+      1, 3, 4, 5, 2
+  };
+  const uint16_t loop_ends[] = {
+      0, 0, 3, 0, 0
+  };
+
+  PrepareBasicBlocks(bbs);
+  ComputeTopologicalSortOrder();
+  CheckOrder(expected_order);
+  CheckLoopEnds(loop_ends);
+}
+
+TEST_F(TopologicalSortOrderTest, While) {
+  const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 5), DEF_PRED2(1, 4)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(3), DEF_PRED1(3)),     // Loops to 3.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)),
+  };
+  const BasicBlockId expected_order[] = {
+      1, 3, 4, 5, 2
+  };
+  const uint16_t loop_ends[] = {
+      0, 3, 0, 0, 0
+  };
+
+  PrepareBasicBlocks(bbs);
+  ComputeTopologicalSortOrder();
+  CheckOrder(expected_order);
+  CheckLoopEnds(loop_ends);
+}
+
+TEST_F(TopologicalSortOrderTest, WhileWithTwoBackEdges) {
+  const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 6), DEF_PRED3(1, 4, 5)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 3), DEF_PRED1(3)),     // Loops to 3.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(3), DEF_PRED1(4)),        // Loops to 3.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)),
+  };
+  const BasicBlockId expected_order[] = {
+      1, 3, 4, 5, 6, 2
+  };
+  const uint16_t loop_ends[] = {
+      0, 4, 0, 0, 0, 0
+  };
+
+  PrepareBasicBlocks(bbs);
+  ComputeTopologicalSortOrder();
+  CheckOrder(expected_order);
+  CheckLoopEnds(loop_ends);
+}
+
+TEST_F(TopologicalSortOrderTest, NestedLoop) {
+  const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(7)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 7), DEF_PRED2(1, 6)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 6), DEF_PRED2(3, 5)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(4)),            // Loops to 4.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(3), DEF_PRED1(4)),            // Loops to 3.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)),
+  };
+  const BasicBlockId expected_order[] = {
+      1, 3, 4, 5, 6, 7, 2
+  };
+  const uint16_t loop_ends[] = {
+      0, 5, 4, 0, 0, 0, 0
+  };
+
+  PrepareBasicBlocks(bbs);
+  ComputeTopologicalSortOrder();
+  CheckOrder(expected_order);
+  CheckLoopEnds(loop_ends);
+}
+
+TEST_F(TopologicalSortOrderTest, NestedLoopHeadLoops) {
+  const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 6), DEF_PRED2(1, 4)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 3), DEF_PRED2(3, 5)),      // Nested head, loops to 3.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(4)),            // Loops to 4.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)),
+  };
+  const BasicBlockId expected_order[] = {
+      1, 3, 4, 5, 6, 2
+  };
+  const uint16_t loop_ends[] = {
+      0, 4, 4, 0, 0, 0
+  };
+
+  PrepareBasicBlocks(bbs);
+  ComputeTopologicalSortOrder();
+  CheckOrder(expected_order);
+  CheckLoopEnds(loop_ends);
+}
+
+TEST_F(TopologicalSortOrderTest, NestedLoopSameBackBranchBlock) {
+  const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 6), DEF_PRED2(1, 5)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED2(3, 5)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 3), DEF_PRED1(4)),         // Loops to 4 and 3.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)),
+  };
+  const BasicBlockId expected_order[] = {
+      1, 3, 4, 5, 6, 2
+  };
+  const uint16_t loop_ends[] = {
+      0, 4, 4, 0, 0, 0
+  };
+
+  PrepareBasicBlocks(bbs);
+  ComputeTopologicalSortOrder();
+  CheckOrder(expected_order);
+  CheckLoopEnds(loop_ends);
+}
+
+TEST_F(TopologicalSortOrderTest, TwoReorderedInnerLoops) {
+  // This is a simplified version of real code graph where the branch from 8 to 5 must prevent
+  // the block 5 from being considered a loop head before processing the loop 7-8.
+  const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(9)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 9), DEF_PRED2(1, 5)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 7), DEF_PRED1(3)),         // Branch over loop in 5.
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(6, 3), DEF_PRED3(4, 6, 8)),   // Loops to 4; inner loop.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED1(5)),            // Loops to 5.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(8), DEF_PRED2(4, 8)),         // Loop head.
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(7, 5), DEF_PRED1(7)),         // Loops to 7; branches to 5.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)),
+  };
+  const BasicBlockId expected_order[] = {
+      1, 3, 4, 7, 8, 5, 6, 9, 2
+  };
+  const uint16_t loop_ends[] = {
+      0, 7, 0, 5, 0, 7, 0, 0, 0
+  };
+
+  PrepareBasicBlocks(bbs);
+  ComputeTopologicalSortOrder();
+  CheckOrder(expected_order);
+  CheckLoopEnds(loop_ends);
+}
+
+TEST_F(TopologicalSortOrderTest, NestedLoopWithBackEdgeAfterOuterLoopBackEdge) {
+  // This is a simplified version of real code graph. The back-edge from 7 to the inner
+  // loop head 4 comes after the back-edge from 6 to the outer loop head 3. To make this
+  // appear a bit more complex, there's also a back-edge from 5 to 4.
+  const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(7)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED2(1, 6)),         // Outer loop head.
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 6), DEF_PRED3(3, 5, 7)),   // Inner loop head.
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(4)),            // Loops to inner loop head 4.
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(7, 3), DEF_PRED1(4)),         // Loops to outer loop head 3.
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(2, 4), DEF_PRED1(6)),         // Loops to inner loop head 4.
+  };
+  const BasicBlockId expected_order[] = {
+      // NOTE: The 5 goes before 6 only because 5 is a "fall-through" from 4 while 6 is "taken".
+      1, 3, 4, 5, 6, 7, 2
+  };
+  const uint16_t loop_ends[] = {
+      0, 6, 6, 0, 0, 0, 0
+  };
+
+  PrepareBasicBlocks(bbs);
+  ComputeTopologicalSortOrder();
+  CheckOrder(expected_order);
+  CheckLoopEnds(loop_ends);
+}
+
+TEST_F(TopologicalSortOrderTest, LoopWithTwoEntryPoints) {
+  const BBDef bbs[] = {
+      DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+      DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+      DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(7)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 4), DEF_PRED1(1)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED2(3, 6)),  // Fall-back block is chosen as
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED2(3, 4)),  // the earlier from these two.
+      DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 7), DEF_PRED1(5)),
+      DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(6)),
+  };
+  const BasicBlockId expected_order[] = {
+      1, 3, 4, 5, 6, 7, 2
+  };
+  const uint16_t loop_ends[] = {
+      0, 0, 5, 0, 0, 0, 0
+  };
+
+  PrepareBasicBlocks(bbs);
+  ComputeTopologicalSortOrder();
+  CheckOrder(expected_order);
+  CheckLoopEnds(loop_ends);
+}
+
+}  // namespace art
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index f69eea7..5c98654 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -321,7 +321,7 @@
     return true;
   }
   // Don't do a separate LVN if we did the GVN.
-  bool use_lvn = bb->use_lvn && (cu_->disable_opt & (1 << kGlobalValueNumbering)) != 0;
+  bool use_lvn = bb->use_lvn && (cu_->disable_opt & (1u << kGlobalValueNumbering)) != 0u;
   std::unique_ptr<ScopedArenaAllocator> allocator;
   std::unique_ptr<GlobalValueNumbering> global_valnum;
   std::unique_ptr<LocalValueNumbering> local_valnum;
@@ -1139,11 +1139,7 @@
 }
 
 bool MIRGraph::ApplyGlobalValueNumberingGate() {
-  if ((cu_->disable_opt & (1 << kGlobalValueNumbering)) != 0) {
-    return false;
-  }
-
-  if ((merged_df_flags_ & DF_LVN) == 0) {
+  if ((cu_->disable_opt & (1u << kGlobalValueNumbering)) != 0u) {
     return false;
   }
 
@@ -1179,7 +1175,7 @@
           lvn->GetValueNumber(mir);
         }
         bool change = temp_gvn_->FinishBasicBlock(bb);
-        DCHECK(!change);
+        DCHECK(!change) << PrettyMethod(cu_->method_idx, *cu_->dex_file);
       }
     }
   } else {
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index 4a0cf5c..c510b52 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -195,7 +195,7 @@
     cu_.mir_graph->SSATransformationEnd();
     bool gate_result = cu_.mir_graph->EliminateClassInitChecksGate();
     ASSERT_TRUE(gate_result);
-    RepeatingTopologicalSortIterator iterator(cu_.mir_graph.get());
+    LoopRepeatingTopologicalSortIterator iterator(cu_.mir_graph.get());
     bool change = false;
     for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) {
       change = cu_.mir_graph->EliminateClassInitChecks(bb);
diff --git a/compiler/dex/pass_driver_me.h b/compiler/dex/pass_driver_me.h
index 031c5cf..133593c 100644
--- a/compiler/dex/pass_driver_me.h
+++ b/compiler/dex/pass_driver_me.h
@@ -68,6 +68,9 @@
       case kRepeatingTopologicalSortTraversal:
         DoWalkBasicBlocks<RepeatingTopologicalSortIterator>(&pass_me_data_holder_, me_pass);
         break;
+      case kLoopRepeatingTopologicalSortTraversal:
+        DoWalkBasicBlocks<LoopRepeatingTopologicalSortIterator>(&pass_me_data_holder_, me_pass);
+        break;
       case kAllNodes:
         DoWalkBasicBlocks<AllNodesIterator>(&pass_me_data_holder_, me_pass);
         break;
diff --git a/compiler/dex/pass_me.h b/compiler/dex/pass_me.h
index ff69865..c7276eb 100644
--- a/compiler/dex/pass_me.h
+++ b/compiler/dex/pass_me.h
@@ -55,6 +55,7 @@
   kPostOrderDOMTraversal,                  /**< @brief Dominator tree / Post-Order. */
   kTopologicalSortTraversal,               /**< @brief Topological Order traversal. */
   kRepeatingTopologicalSortTraversal,      /**< @brief Repeating Topological Order traversal. */
+  kLoopRepeatingTopologicalSortTraversal,  /**< @brief Loop-repeating Topological Order traversal. */
   kNoNodes,                                /**< @brief Skip BasicBlock traversal. */
 };
 
diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc
index 892b302..4a3e071 100644
--- a/compiler/dex/vreg_analysis.cc
+++ b/compiler/dex/vreg_analysis.cc
@@ -324,13 +324,15 @@
     }
 
     for (int i = 0; ssa_rep->fp_use && i< ssa_rep->num_uses; i++) {
-      if (ssa_rep->fp_use[i])
+      if (ssa_rep->fp_use[i]) {
         changed |= SetFp(uses[i]);
       }
+    }
     for (int i = 0; ssa_rep->fp_def && i< ssa_rep->num_defs; i++) {
-      if (ssa_rep->fp_def[i])
+      if (ssa_rep->fp_def[i]) {
         changed |= SetFp(defs[i]);
       }
+    }
     // Special-case handling for moves & Phi
     if (attrs & (DF_IS_MOVE | DF_NULL_TRANSFER_N)) {
       /*