ART: SBC: Support single exit loops with live_outs.

Support copying subgraphs with a single exit and live-outs -
values which are defined inside the subgraph and have uses outside.

Test: test-art-target, test-art-host.
Test: 530-checker-peel-unroll.

Change-Id: Id06a6f08d14c6e86f6437d662e24489f955e9edf
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 7f78dc2..5549660 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1121,6 +1121,23 @@
   user->FixUpUserRecordsAfterEnvUseRemoval(before_env_use_node);
 }
 
+void HEnvironment::ReplaceInput(HInstruction* replacement, size_t index) {
+  const HUserRecord<HEnvironment*>& env_use_record = vregs_[index];
+  HInstruction* orig_instr = env_use_record.GetInstruction();
+
+  DCHECK(orig_instr != replacement);
+
+  HUseList<HEnvironment*>::iterator before_use_node = env_use_record.GetBeforeUseNode();
+  // Note: fixup_end remains valid across splice_after().
+  auto fixup_end = replacement->env_uses_.empty() ? replacement->env_uses_.begin()
+                                                  : ++replacement->env_uses_.begin();
+  replacement->env_uses_.splice_after(replacement->env_uses_.before_begin(),
+                                      env_use_record.GetInstruction()->env_uses_,
+                                      before_use_node);
+  replacement->FixUpUserRecordsAfterEnvUseInsertion(fixup_end);
+  orig_instr->FixUpUserRecordsAfterEnvUseRemoval(before_use_node);
+}
+
 HInstruction* HInstruction::GetNextDisregardingMoves() const {
   HInstruction* next = GetNext();
   while (next != nullptr && next->IsParallelMove()) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 09d9c57..3fd5b6b 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1909,6 +1909,11 @@
 
   void RemoveAsUserOfInput(size_t index) const;
 
+  // Replaces the input at the position 'index' with the replacement; the replacement and old
+  // input instructions' env_uses_ lists are adjusted. The function works similar to
+  // HInstruction::ReplaceInput.
+  void ReplaceInput(HInstruction* replacement, size_t index);
+
   size_t Size() const { return vregs_.size(); }
 
   HEnvironment* GetParent() const { return parent_; }
diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc
index fad7729..1b43618 100644
--- a/compiler/optimizing/superblock_cloner.cc
+++ b/compiler/optimizing/superblock_cloner.cc
@@ -409,7 +409,7 @@
 // Main algorithm methods.
 //
 
-void SuperblockCloner::SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) {
+void SuperblockCloner::SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) const {
   DCHECK(exits->empty());
   for (uint32_t block_id : orig_bb_set_.Indexes()) {
     HBasicBlock* block = GetBlockById(block_id);
@@ -521,6 +521,113 @@
 }
 
 //
+// Helpers for live-outs processing and Subgraph-closed SSA.
+//
+
+bool SuperblockCloner::CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs) const {
+  DCHECK(live_outs->empty());
+  for (uint32_t idx : orig_bb_set_.Indexes()) {
+    HBasicBlock* block = GetBlockById(idx);
+
+    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+      HInstruction* instr = it.Current();
+      DCHECK(instr->IsClonable());
+
+      if (IsUsedOutsideRegion(instr, orig_bb_set_)) {
+        live_outs->FindOrAdd(instr, instr);
+      }
+    }
+
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* instr = it.Current();
+      if (!instr->IsClonable()) {
+        return false;
+      }
+
+      if (IsUsedOutsideRegion(instr, orig_bb_set_)) {
+        // TODO: Investigate why HNewInstance, HCheckCast has a requirement for the input.
+        if (instr->IsLoadClass()) {
+          return false;
+        }
+        live_outs->FindOrAdd(instr, instr);
+      }
+    }
+  }
+  return true;
+}
+
+void SuperblockCloner::ConstructSubgraphClosedSSA() {
+  if (live_outs_.empty()) {
+    return;
+  }
+
+  ArenaVector<HBasicBlock*> exits(arena_->Adapter(kArenaAllocSuperblockCloner));
+  SearchForSubgraphExits(&exits);
+  if (exits.empty()) {
+    DCHECK(live_outs_.empty());
+    return;
+  }
+
+  DCHECK_EQ(exits.size(), 1u);
+  HBasicBlock* exit_block = exits[0];
+  // There should be no critical edges.
+  DCHECK_EQ(exit_block->GetPredecessors().size(), 1u);
+  DCHECK(exit_block->GetPhis().IsEmpty());
+
+  // For each live-out value insert a phi into the loop exit and replace all the value's uses
+  // external to the loop with this phi. The phi will have the original value as its only input;
+  // after copying is done FixSubgraphClosedSSAAfterCloning will add a corresponding copy of the
+  // original value as the second input thus merging data flow from the original and copy parts of
+  // the subgraph. Also update the record in the live_outs_ map from (value, value) to
+  // (value, new_phi).
+  for (auto live_out_it = live_outs_.begin(); live_out_it != live_outs_.end(); ++live_out_it) {
+    HInstruction* value = live_out_it->first;
+    HPhi* phi = new (arena_) HPhi(arena_, kNoRegNumber, 0, value->GetType());
+
+    if (value->GetType() == DataType::Type::kReference) {
+      phi->SetReferenceTypeInfo(value->GetReferenceTypeInfo());
+    }
+
+    exit_block->AddPhi(phi);
+    live_out_it->second = phi;
+
+    const HUseList<HInstruction*>& uses = value->GetUses();
+    for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+      HInstruction* user = it->GetUser();
+      size_t index = it->GetIndex();
+      // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput().
+      ++it;
+      if (!IsInOrigBBSet(user->GetBlock())) {
+        user->ReplaceInput(phi, index);
+      }
+    }
+
+    const HUseList<HEnvironment*>& env_uses = value->GetEnvUses();
+    for (auto it = env_uses.begin(), e = env_uses.end(); it != e; /* ++it below */) {
+      HEnvironment* env = it->GetUser();
+      size_t index = it->GetIndex();
+      ++it;
+      if (!IsInOrigBBSet(env->GetHolder()->GetBlock())) {
+        env->ReplaceInput(phi, index);
+      }
+    }
+
+    phi->AddInput(value);
+  }
+}
+
+void SuperblockCloner::FixSubgraphClosedSSAAfterCloning() {
+  for (auto it : live_outs_) {
+    DCHECK(it.first != it.second);
+    HInstruction* orig_value = it.first;
+    HPhi* phi = it.second->AsPhi();
+    HInstruction* copy_value = GetInstrCopy(orig_value);
+    // Copy edges are inserted after the original so we can just add new input to the phi.
+    phi->AddInput(copy_value);
+  }
+}
+
+//
 // Debug and logging methods.
 //
 
@@ -644,7 +751,6 @@
 }
 
 void SuperblockCloner::DumpInputSets() {
-  std::cout << graph_->PrettyMethod() << "\n";
   std::cout << "orig_bb_set:\n";
   for (uint32_t idx : orig_bb_set_.Indexes()) {
     std::cout << idx << "\n";
@@ -680,7 +786,9 @@
     bb_map_(bb_map),
     hir_map_(hir_map),
     outer_loop_(nullptr),
-    outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner) {
+    outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner),
+    live_outs_(std::less<HInstruction*>(),
+        graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)) {
   orig_bb_set_.Copy(orig_bb_set);
 }
 
@@ -699,26 +807,19 @@
     return false;
   }
 
-  // Check that there are no instructions defined in the subgraph and used outside.
-  // TODO: Improve this by accepting graph with such uses but only one exit.
-  for (uint32_t idx : orig_bb_set_.Indexes()) {
-    HBasicBlock* block = GetBlockById(idx);
+  HInstructionMap live_outs(
+      std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
 
-    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
-      HInstruction* instr = it.Current();
-      if (!instr->IsClonable() ||
-          IsUsedOutsideRegion(instr, orig_bb_set_)) {
-        return false;
-      }
-    }
+  if (!CollectLiveOutsAndCheckClonable(&live_outs)) {
+    return false;
+  }
 
-    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-      HInstruction* instr = it.Current();
-      if (!instr->IsClonable() ||
-          IsUsedOutsideRegion(instr, orig_bb_set_)) {
-        return false;
-      }
-    }
+  ArenaVector<HBasicBlock*> exits(arena_->Adapter(kArenaAllocSuperblockCloner));
+  SearchForSubgraphExits(&exits);
+
+  // The only loops with live-outs which are currently supported are loops with a single exit.
+  if (!live_outs.empty() && exits.size() != 1) {
+    return false;
   }
 
   return true;
@@ -794,8 +895,10 @@
     DumpInputSets();
   }
 
+  CollectLiveOutsAndCheckClonable(&live_outs_);
   // Find an area in the graph for which control flow information should be adjusted.
   FindAndSetLocalAreaForAdjustments();
+  ConstructSubgraphClosedSSA();
   // Clone the basic blocks from the orig_bb_set_; data flow is invalid after the call and is to be
   // adjusted.
   CloneBasicBlocks();
@@ -819,6 +922,7 @@
   AdjustControlFlowInfo();
   // Fix data flow of the graph.
   ResolveDataFlow();
+  FixSubgraphClosedSSAAfterCloning();
 }
 
 void SuperblockCloner::CleanUp() {
@@ -985,8 +1089,14 @@
   HBasicBlock* loop_header = loop_info_->GetHeader();
   // Check that loop info is up-to-date.
   DCHECK(loop_info_ == loop_header->GetLoopInformation());
-
   HGraph* graph = loop_header->GetGraph();
+
+  if (kSuperblockClonerLogging) {
+    std::cout << "Method: " << graph->PrettyMethod() << std::endl;
+    std::cout << "Scalar loop " << (to_unroll ? "unrolling" : "peeling") <<
+                 " was applied to the loop <" << loop_header->GetBlockId() << ">." << std::endl;
+  }
+
   ArenaAllocator allocator(graph->GetAllocator()->GetArenaPool());
 
   HEdgeSet remap_orig_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h
index e093167..f211721 100644
--- a/compiler/optimizing/superblock_cloner.h
+++ b/compiler/optimizing/superblock_cloner.h
@@ -218,7 +218,7 @@
 
  private:
   // Fills the 'exits' vector with the subgraph exits.
-  void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits);
+  void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) const;
 
   // Finds and records information about the area in the graph for which control flow (back edges,
   // loops, dominators) needs to be adjusted.
@@ -240,6 +240,33 @@
   void ResolveDataFlow();
 
   //
+  // Helpers for live-outs processing and Subgraph-closed SSA.
+  //
+  //  - live-outs - values which are defined inside the subgraph and have uses outside.
+  //  - Subgraph-closed SSA - SSA form for which all the values defined inside the subgraph
+  //    have no outside uses except for the phi-nodes in the subgraph exits.
+  //
+  // Note: now if the subgraph has live-outs it is only clonable if it has a single exit; this
+  // makes the subgraph-closed SSA form construction much easier.
+  //
+  // TODO: Support subgraphs with live-outs and multiple exits.
+  //
+
+  // For each live-out value 'val' in the region puts a record <val, val> into the map.
+  // Returns whether all of the instructions in the subgraph are clonable.
+  bool CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs_) const;
+
+  // Constructs Subgraph-closed SSA; precondition - a subgraph has a single exit.
+  //
+  // For each live-out 'val' in 'live_outs_' map inserts a HPhi 'phi' into the exit node, updates
+  // the record in the map to <val, phi> and replaces all outside uses with this phi.
+  void ConstructSubgraphClosedSSA();
+
+  // Fixes the data flow for the live-out 'val' by adding a 'copy_val' input to the corresponding
+  // (<val, phi>) phi after the cloning is done.
+  void FixSubgraphClosedSSAAfterCloning();
+
+  //
   // Helpers for CloneBasicBlock.
   //
 
@@ -316,6 +343,8 @@
   HLoopInformation* outer_loop_;
   HBasicBlockSet outer_loop_bb_set_;
 
+  HInstructionMap live_outs_;
+
   ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo);
   ART_FRIEND_TEST(SuperblockClonerTest, IsGraphConnected);
 
diff --git a/test/530-checker-peel-unroll/src/Main.java b/test/530-checker-peel-unroll/src/Main.java
index 2051b47..804c9fe 100644
--- a/test/530-checker-peel-unroll/src/Main.java
+++ b/test/530-checker-peel-unroll/src/Main.java
@@ -455,6 +455,235 @@
     }
   }
 
+  /// CHECK-START: int Main.unrollingSimpleLiveOuts(int[]) loop_optimization (before)
+  /// CHECK-DAG: <<Array:l\d+>>   ParameterValue                            loop:none
+  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                             loop:none
+  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                             loop:none
+  /// CHECK-DAG: <<Const2:i\d+>>  IntConstant 2                             loop:none
+  /// CHECK-DAG: <<Limit:i\d+>>   IntConstant 4094                          loop:none
+  /// CHECK-DAG: <<PhiS:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<PhiT:i\d+>>    Phi [<<Const2>>,{{i\d+}}]                 loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<PhiI:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                 loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Check:z\d+>>   GreaterThanOrEqual [<<PhiI>>,<<Limit>>]   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                  If [<<Check>>]                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddI:i\d+>>    Add [<<PhiI>>,<<Const1>>]                 loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get0:i\d+>>    ArrayGet [<<Array>>,<<AddI>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddS:i\d+>>    Add [<<PhiS>>,<<Get0>>]                   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddT:i\d+>>    Mul [<<PhiT>>,<<Get0>>]                   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get1:i\d+>>    ArrayGet [<<Array>>,<<PhiI>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddArr:i\d+>>  Add [<<AddS>>,<<Get1>>]                   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                  ArraySet [<<Array>>,<<PhiI>>,<<AddArr>>]  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-DAG: <<STAdd:i\d+>>   Add [<<PhiS>>,<<PhiT>>]                   loop:none
+  /// CHECK-DAG: <<ZCheck:i\d+>>  DivZeroCheck [<<STAdd>>] env:[[<<PhiS>>,<<PhiT>>,<<STAdd>>,<<Const1>>,_,<<Array>>]] loop:none
+  /// CHECK-DAG: <<Div:i\d+>>     Div [<<Const1>>,<<ZCheck>>]               loop:none
+  /// CHECK-DAG:                  Return [<<Div>>]                          loop:none
+  /// CHECK-NOT:                  ArrayGet
+  /// CHECK-NOT:                  ArraySet
+
+  /// CHECK-START: int Main.unrollingSimpleLiveOuts(int[]) loop_optimization (after)
+  /// CHECK-DAG: <<Array:l\d+>>   ParameterValue                            loop:none
+  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                             loop:none
+  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                             loop:none
+  /// CHECK-DAG: <<Const2:i\d+>>  IntConstant 2                             loop:none
+  /// CHECK-DAG: <<Limit:i\d+>>   IntConstant 4094                          loop:none
+  /// CHECK-DAG: <<PhiS:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<PhiT:i\d+>>    Phi [<<Const2>>,{{i\d+}}]                 loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<PhiI:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                 loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Check:z\d+>>   GreaterThanOrEqual [<<PhiI>>,<<Limit>>]   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                  If [<<Check>>]                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddI:i\d+>>    Add [<<PhiI>>,<<Const1>>]                 loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get0:i\d+>>    ArrayGet [<<Array>>,<<AddI>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddS:i\d+>>    Add [<<PhiS>>,<<Get0>>]                   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddT:i\d+>>    Mul [<<PhiT>>,<<Get0>>]                   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get1:i\d+>>    ArrayGet [<<Array>>,<<PhiI>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddArr:i\d+>>  Add [<<AddS>>,<<Get1>>]                   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                  ArraySet [<<Array>>,<<PhiI>>,<<AddArr>>]  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-DAG:                  GreaterThanOrEqual [<<AddI>>,<<Limit>>]   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                  If [<<Const0>>]                           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddIA:i\d+>>   Add [<<AddI>>,<<Const1>>]                 loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get0A:i\d+>>   ArrayGet [<<Array>>,<<AddIA>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddSA:i\d+>>   Add [<<AddS>>,<<Get0A>>]                  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddTA:i\d+>>   Mul [<<AddT>>,<<Get0A>>]                  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get1A:i\d+>>   ArrayGet [<<Array>>,<<AddI>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddArrA:i\d+>> Add [<<AddSA>>,<<Get1A>>]                 loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                  ArraySet [<<Array>>,<<AddI>>,<<AddArrA>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-DAG: <<RetPhiS:i\d+>> Phi [<<PhiS>>,<<AddS>>]                   loop:none
+  /// CHECK-DAG: <<RetPhiT:i\d+>> Phi [<<PhiT>>,<<AddT>>]                   loop:none
+  /// CHECK-DAG: <<STAdd:i\d+>>   Add [<<RetPhiS>>,<<RetPhiT>>]             loop:none
+  /// CHECK-DAG: <<ZCheck:i\d+>>  DivZeroCheck [<<STAdd>>] env:[[<<RetPhiS>>,<<RetPhiT>>,<<STAdd>>,<<Const1>>,_,<<Array>>]] loop:none
+  /// CHECK-DAG: <<Div:i\d+>>     Div [<<Const1>>,<<ZCheck>>]               loop:none
+  /// CHECK-DAG:                  Return [<<Div>>]                          loop:none
+  //
+  /// CHECK-NOT:                  ArrayGet
+  /// CHECK-NOT:                  ArraySet
+  private static final int unrollingSimpleLiveOuts(int[] a) {
+    int s = 1;
+    int t = 2;
+    for (int i = 0; i < LENGTH - 2; i++) {
+      int temp = a[i + 1];
+      s += temp;
+      t *= temp;
+      a[i] += s;
+    }
+
+    return 1 / (s + t);
+  }
+
+  /// CHECK-START: int Main.unrollingWhileLiveOuts(int[]) loop_optimization (before)
+  /// CHECK-DAG: <<Array:l\d+>>    ParameterValue                            loop:none
+  /// CHECK-DAG: <<Const0:i\d+>>   IntConstant 0                             loop:none
+  /// CHECK-DAG: <<Const1:i\d+>>   IntConstant 1                             loop:none
+  /// CHECK-DAG: <<Const2:i\d+>>   IntConstant 2                             loop:none
+  /// CHECK-DAG: <<Const128:i\d+>> IntConstant 128                           loop:none
+  /// CHECK-DAG: <<Limit:i\d+>>    IntConstant 4094                          loop:none
+  /// CHECK-DAG: <<PhiI:i\d+>>     Phi [<<Const0>>,{{i\d+}}]                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<PhiS:i\d+>>     Phi [<<Const128>>,{{i\d+}}]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddI:i\d+>>     Add [<<PhiI>>,<<Const1>>]                 loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Check:z\d+>>    GreaterThanOrEqual [<<PhiI>>,<<Limit>>]   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<If:v\d+>>       If [<<Check>>]                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Rem:i\d+>>      Rem [<<AddI>>,<<Const2>>]                 loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<NE:z\d+>>       NotEqual [<<Rem>>,<<Const0>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                   If [<<NE>>]                               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddS:i\d+>>     Add [<<PhiS>>,<<Const1>>]                 loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                   ArraySet                                  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                   Phi [<<PhiS>>,<<AddS>>]                   loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-NOT:                   ArrayGet
+  /// CHECK-NOT:                   ArraySet
+
+  /// CHECK-START: int Main.unrollingWhileLiveOuts(int[]) loop_optimization (after)
+  /// CHECK-DAG: <<Array:l\d+>>    ParameterValue                            loop:none
+  /// CHECK-DAG: <<Const0:i\d+>>   IntConstant 0                             loop:none
+  /// CHECK-DAG: <<Const1:i\d+>>   IntConstant 1                             loop:none
+  /// CHECK-DAG: <<Const2:i\d+>>   IntConstant 2                             loop:none
+  /// CHECK-DAG: <<Const128:i\d+>> IntConstant 128                           loop:none
+  /// CHECK-DAG: <<Limit:i\d+>>    IntConstant 4094                          loop:none
+  /// CHECK-DAG: <<PhiI:i\d+>>     Phi [<<Const0>>,{{i\d+}}]                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<PhiS:i\d+>>     Phi [<<Const128>>,{{i\d+}}]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddI:i\d+>>     Add [<<PhiI>>,<<Const1>>]                 loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Check:z\d+>>    GreaterThanOrEqual [<<PhiI>>,<<Limit>>]   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<If:v\d+>>       If [<<Check>>]                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Rem:i\d+>>      Rem [<<AddI>>,<<Const2>>]                 loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<NE:z\d+>>       NotEqual [<<Rem>>,<<Const0>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                   If [<<NE>>]                               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddS:i\d+>>     Add [<<PhiS>>,<<Const1>>]                 loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                   ArraySet [{{l\d+}},{{i\d+}},<<PhiS>>]     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<PhiSM:i\d+>>    Phi [<<PhiS>>,<<AddS>>]                   loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-DAG: <<AddIA:i\d+>>    Add [<<AddI>>,<<Const1>>]                 loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<CheckA:z\d+>>   GreaterThanOrEqual [<<AddI>>,<<Limit>>]   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<IfA:v\d+>>      If [<<Const0>>]                           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<RemA:i\d+>>     Rem [<<AddIA>>,<<Const2>>]                loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<NEA:z\d+>>      NotEqual [<<RemA>>,<<Const0>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                   If [<<NEA>>]                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<AddSA:i\d+>>    Add [<<PhiSM>>,<<Const1>>]                loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                   ArraySet [{{l\d+}},{{i\d+}},<<PhiSM>>]    loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                   Phi [<<AddSA>>,<<PhiSM>>]                 loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-DAG: <<RetPhi:i\d+>>   Phi [<<PhiS>>,<<PhiSM>>]                  loop:none
+  /// CHECK-DAG:                   Return [<<RetPhi>>]                       loop:none
+  //
+  /// CHECK-NOT:                   ArrayGet
+  /// CHECK-NOT:                   ArraySet
+  private static final int unrollingWhileLiveOuts(int[] a) {
+    int i = 0;
+    int s = 128;
+    while (i++ < LENGTH - 2) {
+      if (i % 2 == 0) {
+        a[i] = s++;
+      }
+    }
+    return s;
+  }
+
+  /// CHECK-START: int Main.unrollingLiveOutsNested(int[]) loop_optimization (before)
+  /// CHECK-DAG: <<Array:l\d+>>   ParameterValue                            loop:none
+  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                             loop:none
+  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                             loop:none
+  /// CHECK-DAG: <<Const2:i\d+>>  IntConstant 2                             loop:none
+  /// CHECK-DAG: <<Limit:i\d+>>   IntConstant 4094                          loop:none
+  //
+  /// CHECK-DAG: <<OutPhiJ:i\d+>> Phi [<<Const0>>,{{i\d+}}]                 loop:<<Loop0:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<OutPhiS:i\d+>> Phi [<<Const1>>,{{i\d+}}]                 loop:<<Loop0>>      outer_loop:none
+  /// CHECK-DAG: <<OutPhiT:i\d+>> Phi [<<Const2>>,{{i\d+}}]                 loop:<<Loop0>>      outer_loop:none
+  //
+  /// CHECK-DAG: <<PhiI:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                 loop:<<Loop1:B\d+>> outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<PhiS:i\d+>>    Phi [<<OutPhiS>>,{{i\d+}}]                loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<PhiT:i\d+>>    Phi [<<OutPhiT>>,{{i\d+}}]                loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<Check:z\d+>>   GreaterThanOrEqual [<<PhiI>>,<<Limit>>]   loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG:                  If [<<Check>>]                            loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<AddI:i\d+>>    Add [<<PhiI>>,<<Const1>>]                 loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<Get0:i\d+>>    ArrayGet [<<Array>>,<<AddI>>]             loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<AddS:i\d+>>    Add [<<PhiS>>,<<Get0>>]                   loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<AddT:i\d+>>    Mul [<<PhiT>>,<<Get0>>]                   loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<Get1:i\d+>>    ArrayGet [<<Array>>,<<PhiI>>]             loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<AddArr:i\d+>>  Add [<<AddS>>,<<Get1>>]                   loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG:                  ArraySet [<<Array>>,<<PhiI>>,<<AddArr>>]  loop:<<Loop1>>      outer_loop:<<Loop0>>
+  //
+  /// CHECK-DAG:                  Add [<<OutPhiJ>>,<<Const1>>]              loop:<<Loop0>>      outer_loop:none
+  //
+  /// CHECK-NOT:                  ArrayGet
+  /// CHECK-NOT:                  ArraySet
+
+  /// CHECK-START: int Main.unrollingLiveOutsNested(int[]) loop_optimization (after)
+  /// CHECK-DAG: <<Array:l\d+>>   ParameterValue                            loop:none
+  /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                             loop:none
+  /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                             loop:none
+  /// CHECK-DAG: <<Const2:i\d+>>  IntConstant 2                             loop:none
+  /// CHECK-DAG: <<Limit:i\d+>>   IntConstant 4094                          loop:none
+  //
+  /// CHECK-DAG: <<OutPhiJ:i\d+>> Phi [<<Const0>>,{{i\d+}}]                 loop:<<Loop0:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<OutPhiS:i\d+>> Phi [<<Const1>>,{{i\d+}}]                 loop:<<Loop0>>      outer_loop:none
+  /// CHECK-DAG: <<OutPhiT:i\d+>> Phi [<<Const2>>,{{i\d+}}]                 loop:<<Loop0>>      outer_loop:none
+  //
+  /// CHECK-DAG: <<PhiI:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                 loop:<<Loop1:B\d+>> outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<PhiS:i\d+>>    Phi [<<OutPhiS>>,{{i\d+}}]                loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<PhiT:i\d+>>    Phi [<<OutPhiT>>,{{i\d+}}]                loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<Check:z\d+>>   GreaterThanOrEqual [<<PhiI>>,<<Limit>>]   loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG:                  If [<<Check>>]                            loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<AddI:i\d+>>    Add [<<PhiI>>,<<Const1>>]                 loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<Get0:i\d+>>    ArrayGet [<<Array>>,<<AddI>>]             loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<AddS:i\d+>>    Add [<<PhiS>>,<<Get0>>]                   loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<AddT:i\d+>>    Mul [<<PhiT>>,<<Get0>>]                   loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<Get1:i\d+>>    ArrayGet [<<Array>>,<<PhiI>>]             loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<AddArr:i\d+>>  Add [<<AddS>>,<<Get1>>]                   loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG:                  ArraySet [<<Array>>,<<PhiI>>,<<AddArr>>]  loop:<<Loop1>>      outer_loop:<<Loop0>>
+  //
+  /// CHECK-DAG:                  If [<<Const0>>]                           loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<AddIA:i\d+>>   Add [<<AddI>>,<<Const1>>]                 loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<Get0A:i\d+>>   ArrayGet [<<Array>>,<<AddIA>>]            loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<AddSA:i\d+>>   Add [<<AddS>>,<<Get0A>>]                  loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<AddTA:i\d+>>   Mul [<<AddT>>,<<Get0A>>]                  loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<Get1A:i\d+>>   ArrayGet [<<Array>>,<<AddI>>]             loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG: <<AddArrA:i\d+>> Add [<<AddSA>>,<<Get1A>>]                 loop:<<Loop1>>      outer_loop:<<Loop0>>
+  /// CHECK-DAG:                  ArraySet [<<Array>>,<<AddI>>,<<AddArrA>>] loop:<<Loop1>>      outer_loop:<<Loop0>>
+  //
+  /// CHECK-DAG: <<RetPhiS:i\d+>> Phi [<<PhiS>>,<<AddS>>]                   loop:<<Loop0>>      outer_loop:none
+  /// CHECK-DAG: <<RetPhiT:i\d+>> Phi [<<PhiT>>,<<AddT>>]                   loop:<<Loop0>>      outer_loop:none
+  /// CHECK-DAG:                  Add [<<OutPhiJ>>,<<Const1>>]              loop:<<Loop0>>      outer_loop:none
+  //
+  /// CHECK-DAG: <<RetAdd:i\d+>>  Add [<<OutPhiS>>,<<OutPhiT>>]             loop:none
+  /// CHECK-DAG:                  Return [<<RetAdd>>]                       loop:none
+  //
+  /// CHECK-NOT:                  ArrayGet
+  /// CHECK-NOT:                  ArraySet
+  private static final int unrollingLiveOutsNested(int[] a) {
+    int s = 1;
+    int t = 2;
+    for (int j = 0; j < 16; j++) {
+      for (int i = 0; i < LENGTH - 2; i++) {
+        int temp = a[i + 1];
+        s += temp;
+        t *= temp;
+        a[i] += s;
+      }
+    }
+    return s + t;
+  }
+
   /// CHECK-START: void Main.noUnrollingOddTripCount(int[]) loop_optimization (before)
   /// CHECK-DAG: <<Array:l\d+>>   ParameterValue                            loop:none
   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                             loop:none
@@ -802,7 +1031,11 @@
     peelingBreakFromNest(a, false);
     peelingBreakFromNest(a, true);
 
-    int expected = 141312;
+    unrollingSimpleLiveOuts(a);
+    unrollingWhileLiveOuts(a);
+    unrollingLiveOutsNested(a);
+
+    int expected = 51565978;
     int found = 0;
     for (int i = 0; i < a.length; i++) {
       found += a[i];