Optimize suspend checks in optimizing compiler.

- Remove the ones added during graph build (they were added
  for the baseline code generator).
- Emit them at loop back edges after phi moves, so that the test
  can directly jump to the loop header.
- Fix x86 and x86_64 suspend check by using cmpw instead of cmpl.

Change-Id: I6fad5795a55705d86c9e1cb85bf5d63dadfafa2a
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 7ac1c6b..13c5671 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -95,6 +95,7 @@
 	optimizing/graph_checker.cc \
 	optimizing/graph_visualizer.cc \
 	optimizing/gvn.cc \
+	optimizing/instruction_simplifier.cc \
 	optimizing/locations.cc \
 	optimizing/nodes.cc \
 	optimizing/optimizing_compiler.cc \
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 33b00d2..5015bd0 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -183,9 +183,9 @@
 
   // Setup the graph with the entry block and exit block.
   graph_ = new (arena_) HGraph(arena_);
-  entry_block_ = new (arena_) HBasicBlock(graph_);
+  entry_block_ = new (arena_) HBasicBlock(graph_, 0);
   graph_->AddBlock(entry_block_);
-  exit_block_ = new (arena_) HBasicBlock(graph_);
+  exit_block_ = new (arena_) HBasicBlock(graph_, kNoDexPc);
   graph_->SetEntryBlock(entry_block_);
   graph_->SetExitBlock(exit_block_);
 
@@ -241,7 +241,7 @@
   branch_targets_.SetSize(code_end - code_ptr);
 
   // Create the first block for the dex instructions, single successor of the entry block.
-  HBasicBlock* block = new (arena_) HBasicBlock(graph_);
+  HBasicBlock* block = new (arena_) HBasicBlock(graph_, 0);
   branch_targets_.Put(0, block);
   entry_block_->AddSuccessor(block);
 
@@ -254,13 +254,13 @@
       int32_t target = instruction.GetTargetOffset() + dex_offset;
       // Create a block for the target instruction.
       if (FindBlockStartingAt(target) == nullptr) {
-        block = new (arena_) HBasicBlock(graph_);
+        block = new (arena_) HBasicBlock(graph_, target);
         branch_targets_.Put(target, block);
       }
       dex_offset += instruction.SizeInCodeUnits();
       code_ptr += instruction.SizeInCodeUnits();
       if ((code_ptr < code_end) && (FindBlockStartingAt(dex_offset) == nullptr)) {
-        block = new (arena_) HBasicBlock(graph_);
+        block = new (arena_) HBasicBlock(graph_, dex_offset);
         branch_targets_.Put(dex_offset, block);
       }
     } else {
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 3231c99..2a9a7b3 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -25,6 +25,7 @@
 #include "gc_map_builder.h"
 #include "leb128.h"
 #include "mapping_table.h"
+#include "ssa_liveness_analysis.h"
 #include "utils/assembler.h"
 #include "verifier/dex_gc_map.h"
 #include "vmap_table.h"
@@ -518,4 +519,23 @@
   }
 }
 
+void CodeGenerator::ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const {
+  LocationSummary* locations = suspend_check->GetLocations();
+  HBasicBlock* block = suspend_check->GetBlock();
+  DCHECK(block->GetLoopInformation()->GetSuspendCheck() == suspend_check);
+  DCHECK(block->IsLoopHeader());
+
+  for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+    HInstruction* current = it.Current();
+    LiveInterval* interval = current->GetLiveInterval();
+    // We only need to clear bits of loop phis containing objects and allocated in register.
+    // Loop phis allocated on stack already have the object in the stack.
+    if (current->GetType() == Primitive::kPrimNot
+        && interval->HasRegister()
+        && interval->HasSpillSlot()) {
+      locations->ClearStackBit(interval->GetSpillSlot() / kVRegSize);
+    }
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 55f5d8d..b58f3b3 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -143,6 +143,13 @@
     is_leaf_ = false;
   }
 
+  // Clears the spill slots taken by loop phis in the `LocationSummary` of the
+  // suspend check. This is called when the code generator generates code
+  // for the suspend check at the back edge (instead of where the suspend check
+  // is, which is the loop entry). At this point, the spill slots for the phis
+  // have not been written to.
+  void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const;
+
  protected:
   CodeGenerator(HGraph* graph, size_t number_of_registers)
       : frame_size_(kUninitializedFrameSize),
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 206ed13..dd60745 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -93,8 +93,8 @@
 
 class SuspendCheckSlowPathARM : public SlowPathCode {
  public:
-  explicit SuspendCheckSlowPathARM(HSuspendCheck* instruction)
-      : instruction_(instruction) {}
+  explicit SuspendCheckSlowPathARM(HSuspendCheck* instruction, HBasicBlock* successor)
+      : instruction_(instruction), successor_(successor) {}
 
   virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
@@ -104,13 +104,24 @@
     __ blx(LR);
     codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
     codegen->RestoreLiveRegisters(instruction_->GetLocations());
-    __ b(GetReturnLabel());
+    if (successor_ == nullptr) {
+      __ b(GetReturnLabel());
+    } else {
+      __ b(codegen->GetLabelOf(successor_));
+    }
   }
 
-  Label* GetReturnLabel() { return &return_label_; }
+  Label* GetReturnLabel() {
+    DCHECK(successor_ == nullptr);
+    return &return_label_;
+  }
 
  private:
   HSuspendCheck* const instruction_;
+  // If not null, the block to branch to after the suspend check.
+  HBasicBlock* const successor_;
+
+  // If `successor_` is null, the label to branch to after the suspend check.
   Label return_label_;
 
   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM);
@@ -562,9 +573,22 @@
 
 void InstructionCodeGeneratorARM::VisitGoto(HGoto* got) {
   HBasicBlock* successor = got->GetSuccessor();
-  if (GetGraph()->GetExitBlock() == successor) {
-    codegen_->GenerateFrameExit();
-  } else if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
+  DCHECK(!successor->IsExitBlock());
+
+  HBasicBlock* block = got->GetBlock();
+  HInstruction* previous = got->GetPrevious();
+
+  HLoopInformation* info = block->GetLoopInformation();
+  if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) {
+    codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
+    GenerateSuspendCheck(info->GetSuspendCheck(), successor);
+    return;
+  }
+
+  if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
+    GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
+  }
+  if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
     __ b(codegen_->GetLabelOf(successor));
   }
 }
@@ -1567,14 +1591,34 @@
 }
 
 void InstructionCodeGeneratorARM::VisitSuspendCheck(HSuspendCheck* instruction) {
+  HBasicBlock* block = instruction->GetBlock();
+  if (block->GetLoopInformation() != nullptr) {
+    DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
+    // The back edge will generate the suspend check.
+    return;
+  }
+  if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
+    // The goto will generate the suspend check.
+    return;
+  }
+  GenerateSuspendCheck(instruction, nullptr);
+}
+
+void InstructionCodeGeneratorARM::GenerateSuspendCheck(HSuspendCheck* instruction,
+                                                       HBasicBlock* successor) {
   SuspendCheckSlowPathARM* slow_path =
-      new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction);
+      new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor);
   codegen_->AddSlowPath(slow_path);
 
   __ AddConstant(R4, R4, -1);
   __ cmp(R4, ShifterOperand(0));
-  __ b(slow_path->GetEntryLabel(), LE);
-  __ Bind(slow_path->GetReturnLabel());
+  if (successor == nullptr) {
+    __ b(slow_path->GetEntryLabel(), LE);
+    __ Bind(slow_path->GetReturnLabel());
+  } else {
+    __ b(codegen_->GetLabelOf(successor), GT);
+    __ b(slow_path->GetEntryLabel());
+  }
 }
 
 ArmAssembler* ParallelMoveResolverARM::GetAssembler() const {
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 0902fb8..949d9ae 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -117,6 +117,11 @@
   void LoadCurrentMethod(Register reg);
 
  private:
+  // Generate code for the given suspend check. If not null, `successor`
+  // is the block to branch to if the suspend check is not needed, and after
+  // the suspend call.
+  void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
+
   ArmAssembler* const assembler_;
   CodeGeneratorARM* const codegen_;
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 0db4311..f1716a3 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -117,8 +117,8 @@
 
 class SuspendCheckSlowPathX86 : public SlowPathCode {
  public:
-  explicit SuspendCheckSlowPathX86(HSuspendCheck* instruction)
-      : instruction_(instruction) {}
+  explicit SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
+      : instruction_(instruction), successor_(successor) {}
 
   virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
@@ -126,13 +126,21 @@
     __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pTestSuspend)));
     codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
     codegen->RestoreLiveRegisters(instruction_->GetLocations());
-    __ jmp(GetReturnLabel());
+    if (successor_ == nullptr) {
+      __ jmp(GetReturnLabel());
+    } else {
+      __ jmp(codegen->GetLabelOf(successor_));
+    }
   }
 
-  Label* GetReturnLabel() { return &return_label_; }
+  Label* GetReturnLabel() {
+    DCHECK(successor_ == nullptr);
+    return &return_label_;
+  }
 
  private:
   HSuspendCheck* const instruction_;
+  HBasicBlock* const successor_;
   Label return_label_;
 
   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86);
@@ -517,9 +525,22 @@
 
 void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) {
   HBasicBlock* successor = got->GetSuccessor();
-  if (GetGraph()->GetExitBlock() == successor) {
-    codegen_->GenerateFrameExit();
-  } else if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
+  DCHECK(!successor->IsExitBlock());
+
+  HBasicBlock* block = got->GetBlock();
+  HInstruction* previous = got->GetPrevious();
+
+  HLoopInformation* info = block->GetLoopInformation();
+  if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) {
+    codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
+    GenerateSuspendCheck(info->GetSuspendCheck(), successor);
+    return;
+  }
+
+  if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
+    GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
+  }
+  if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
     __ jmp(codegen_->GetLabelOf(successor));
   }
 }
@@ -1558,13 +1579,33 @@
 }
 
 void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
+  HBasicBlock* block = instruction->GetBlock();
+  if (block->GetLoopInformation() != nullptr) {
+    DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
+    // The back edge will generate the suspend check.
+    return;
+  }
+  if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
+    // The goto will generate the suspend check.
+    return;
+  }
+  GenerateSuspendCheck(instruction, nullptr);
+}
+
+void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
+                                                       HBasicBlock* successor) {
   SuspendCheckSlowPathX86* slow_path =
-      new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction);
+      new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor);
   codegen_->AddSlowPath(slow_path);
-  __ fs()->cmpl(Address::Absolute(
+  __ fs()->cmpw(Address::Absolute(
       Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()), Immediate(0));
-  __ j(kNotEqual, slow_path->GetEntryLabel());
-  __ Bind(slow_path->GetReturnLabel());
+  if (successor == nullptr) {
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+    __ Bind(slow_path->GetReturnLabel());
+  } else {
+    __ j(kEqual, codegen_->GetLabelOf(successor));
+    __ jmp(slow_path->GetEntryLabel());
+  }
 }
 
 X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index ffcaf60..23145bf 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -119,6 +119,11 @@
   X86Assembler* GetAssembler() const { return assembler_; }
 
  private:
+  // Generate code for the given suspend check. If not null, `successor`
+  // is the block to branch to if the suspend check is not needed, and after
+  // the suspend call.
+  void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
+
   X86Assembler* const assembler_;
   CodeGeneratorX86* const codegen_;
 
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 56198af..5df8ed9 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -98,8 +98,8 @@
 
 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
  public:
-  explicit SuspendCheckSlowPathX86_64(HSuspendCheck* instruction)
-      : instruction_(instruction) {}
+  explicit SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
+      : instruction_(instruction), successor_(successor) {}
 
   virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
@@ -107,13 +107,21 @@
     __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pTestSuspend), true));
     codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
     codegen->RestoreLiveRegisters(instruction_->GetLocations());
-    __ jmp(GetReturnLabel());
+    if (successor_ == nullptr) {
+      __ jmp(GetReturnLabel());
+    } else {
+      __ jmp(codegen->GetLabelOf(successor_));
+    }
   }
 
-  Label* GetReturnLabel() { return &return_label_; }
+  Label* GetReturnLabel() {
+    DCHECK(successor_ == nullptr);
+    return &return_label_;
+  }
 
  private:
   HSuspendCheck* const instruction_;
+  HBasicBlock* const successor_;
   Label return_label_;
 
   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
@@ -400,9 +408,22 @@
 
 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
   HBasicBlock* successor = got->GetSuccessor();
-  if (GetGraph()->GetExitBlock() == successor) {
-    codegen_->GenerateFrameExit();
-  } else if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
+  DCHECK(!successor->IsExitBlock());
+
+  HBasicBlock* block = got->GetBlock();
+  HInstruction* previous = got->GetPrevious();
+
+  HLoopInformation* info = block->GetLoopInformation();
+  if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) {
+    codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
+    GenerateSuspendCheck(info->GetSuspendCheck(), successor);
+    return;
+  }
+
+  if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
+    GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
+  }
+  if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
     __ jmp(codegen_->GetLabelOf(successor));
   }
 }
@@ -1403,13 +1424,33 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
+  HBasicBlock* block = instruction->GetBlock();
+  if (block->GetLoopInformation() != nullptr) {
+    DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
+    // The back edge will generate the suspend check.
+    return;
+  }
+  if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
+    // The goto will generate the suspend check.
+    return;
+  }
+  GenerateSuspendCheck(instruction, nullptr);
+}
+
+void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
+                                                          HBasicBlock* successor) {
   SuspendCheckSlowPathX86_64* slow_path =
-      new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction);
+      new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor);
   codegen_->AddSlowPath(slow_path);
-  __ gs()->cmpl(Address::Absolute(
+  __ gs()->cmpw(Address::Absolute(
       Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(), true), Immediate(0));
-  __ j(kNotEqual, slow_path->GetEntryLabel());
-  __ Bind(slow_path->GetReturnLabel());
+  if (successor == nullptr) {
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+    __ Bind(slow_path->GetReturnLabel());
+  } else {
+    __ j(kEqual, codegen_->GetLabelOf(successor));
+    __ jmp(slow_path->GetEntryLabel());
+  }
 }
 
 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index ea21872..a299cf6 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -116,6 +116,11 @@
   X86_64Assembler* GetAssembler() const { return assembler_; }
 
  private:
+  // Generate code for the given suspend check. If not null, `successor`
+  // is the block to branch to if the suspend check is not needed, and after
+  // the suspend call.
+  void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
+
   X86_64Assembler* const assembler_;
   CodeGeneratorX86_64* const codegen_;
 
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
new file mode 100644
index 0000000..a0de73d
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_simplifier.h"
+
+namespace art {
+
+void InstructionSimplifier::Run() {
+  VisitInsertionOrder();
+}
+
+void InstructionSimplifier::VisitSuspendCheck(HSuspendCheck* check) {
+  HBasicBlock* block = check->GetBlock();
+  // Currently always keep the suspend check at entry.
+  if (block->IsEntryBlock()) return;
+
+  // Currently always keep suspend checks at loop entry.
+  if (block->IsLoopHeader() && block->GetFirstInstruction() == check) {
+    DCHECK(block->GetLoopInformation()->GetSuspendCheck() == check);
+    return;
+  }
+
+  // Remove the suspend check that was added at build time for the baseline
+  // compiler.
+  block->RemoveInstruction(check);
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h
new file mode 100644
index 0000000..b2f3f52
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_H_
+
+#include "nodes.h"
+
+namespace art {
+
+/**
+ * Implements optimizations specific to each instruction.
+ */
+class InstructionSimplifier : public HGraphVisitor {
+ public:
+  explicit InstructionSimplifier(HGraph* graph) : HGraphVisitor(graph) {}
+
+  void Run();
+
+ private:
+  virtual void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_H_
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 06623b6..3a769df 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -363,6 +363,10 @@
     stack_mask_->SetBit(index);
   }
 
+  void ClearStackBit(uint32_t index) {
+    stack_mask_->ClearBit(index);
+  }
+
   void SetRegisterBit(uint32_t reg_id) {
     register_mask_ |= (1 << reg_id);
   }
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 09412a9..9ed7ef7 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -141,7 +141,7 @@
 void HGraph::SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor) {
   // Insert a new node between `block` and `successor` to split the
   // critical edge.
-  HBasicBlock* new_block = new (arena_) HBasicBlock(this);
+  HBasicBlock* new_block = new (arena_) HBasicBlock(this, successor->GetDexPc());
   AddBlock(new_block);
   new_block->AddInstruction(new (arena_) HGoto());
   block->ReplaceSuccessor(successor, new_block);
@@ -162,8 +162,10 @@
   // If there are more than one back edge, make them branch to the same block that
   // will become the only back edge. This simplifies finding natural loops in the
   // graph.
-  if (info->NumberOfBackEdges() > 1) {
-    HBasicBlock* new_back_edge = new (arena_) HBasicBlock(this);
+  // Also, if the loop is a do/while (that is the back edge is an if), change the
+  // back edge to be a goto. This simplifies code generation of suspend cheks.
+  if (info->NumberOfBackEdges() > 1 || info->GetBackEdges().Get(0)->GetLastInstruction()->IsIf()) {
+    HBasicBlock* new_back_edge = new (arena_) HBasicBlock(this, header->GetDexPc());
     AddBlock(new_back_edge);
     new_back_edge->AddInstruction(new (arena_) HGoto());
     for (size_t pred = 0, e = info->GetBackEdges().Size(); pred < e; ++pred) {
@@ -180,7 +182,7 @@
   // loop.
   size_t number_of_incomings = header->GetPredecessors().Size() - info->NumberOfBackEdges();
   if (number_of_incomings != 1) {
-    HBasicBlock* pre_header = new (arena_) HBasicBlock(this);
+    HBasicBlock* pre_header = new (arena_) HBasicBlock(this, header->GetDexPc());
     AddBlock(pre_header);
     pre_header->AddInstruction(new (arena_) HGoto());
 
@@ -200,6 +202,18 @@
   if (header->GetPredecessors().Get(1) != info->GetBackEdges().Get(0)) {
     header->SwapPredecessors();
   }
+
+  // Place the suspend check at the beginning of the header, so that live registers
+  // will be known when allocating registers. Note that code generation can still
+  // generate the suspend check at the back edge, but needs to be careful with
+  // loop phi spill slots (which are not written to at back edge).
+  HInstruction* first_instruction = header->GetFirstInstruction();
+  if (!first_instruction->IsSuspendCheck()) {
+    HSuspendCheck* check = new (arena_) HSuspendCheck(header->GetDexPc());
+    header->InsertInstructionBefore(check, first_instruction);
+    first_instruction = check;
+  }
+  info->SetSuspendCheck(first_instruction->AsSuspendCheck());
 }
 
 void HGraph::SimplifyCFG() {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index be6b355..7c8e7e2 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -32,6 +32,7 @@
 class HIntConstant;
 class HGraphVisitor;
 class HPhi;
+class HSuspendCheck;
 class LiveInterval;
 class LocationSummary;
 
@@ -198,6 +199,7 @@
  public:
   HLoopInformation(HBasicBlock* header, HGraph* graph)
       : header_(header),
+        suspend_check_(nullptr),
         back_edges_(graph->GetArena(), kDefaultNumberOfBackEdges),
         // Make bit vector growable, as the number of blocks may change.
         blocks_(graph->GetArena(), graph->GetBlocks().Size(), true) {}
@@ -206,6 +208,10 @@
     return header_;
   }
 
+  HSuspendCheck* GetSuspendCheck() const { return suspend_check_; }
+  void SetSuspendCheck(HSuspendCheck* check) { suspend_check_ = check; }
+  bool HasSuspendCheck() const { return suspend_check_ != nullptr; }
+
   void AddBackEdge(HBasicBlock* back_edge) {
     back_edges_.Add(back_edge);
   }
@@ -254,6 +260,7 @@
   void PopulateRecursive(HBasicBlock* block);
 
   HBasicBlock* header_;
+  HSuspendCheck* suspend_check_;
   GrowableArray<HBasicBlock*> back_edges_;
   ArenaBitVector blocks_;
 
@@ -261,13 +268,15 @@
 };
 
 static constexpr size_t kNoLifetime = -1;
+static constexpr uint32_t kNoDexPc = -1;
 
 // A block in a method. Contains the list of instructions represented
 // as a double linked list. Each block knows its predecessors and
 // successors.
+
 class HBasicBlock : public ArenaObject {
  public:
-  explicit HBasicBlock(HGraph* graph)
+  explicit HBasicBlock(HGraph* graph, uint32_t dex_pc = kNoDexPc)
       : graph_(graph),
         predecessors_(graph->GetArena(), kDefaultNumberOfPredecessors),
         successors_(graph->GetArena(), kDefaultNumberOfSuccessors),
@@ -275,6 +284,7 @@
         dominator_(nullptr),
         dominated_blocks_(graph->GetArena(), kDefaultNumberOfDominatedBlocks),
         block_id_(-1),
+        dex_pc_(dex_pc),
         lifetime_start_(kNoLifetime),
         lifetime_end_(kNoLifetime) {}
 
@@ -290,6 +300,14 @@
     return dominated_blocks_;
   }
 
+  bool IsEntryBlock() const {
+    return graph_->GetEntryBlock() == this;
+  }
+
+  bool IsExitBlock() const {
+    return graph_->GetExitBlock() == this;
+  }
+
   void AddBackEdge(HBasicBlock* back_edge) {
     if (loop_information_ == nullptr) {
       loop_information_ = new (graph_->GetArena()) HLoopInformation(this, graph_);
@@ -423,6 +441,8 @@
   void SetLifetimeStart(size_t start) { lifetime_start_ = start; }
   void SetLifetimeEnd(size_t end) { lifetime_end_ = end; }
 
+  uint32_t GetDexPc() const { return dex_pc_; }
+
  private:
   HGraph* const graph_;
   GrowableArray<HBasicBlock*> predecessors_;
@@ -433,6 +453,8 @@
   HBasicBlock* dominator_;
   GrowableArray<HBasicBlock*> dominated_blocks_;
   int block_id_;
+  // The dex program counter of the first instruction of this block.
+  const uint32_t dex_pc_;
   size_t lifetime_start_;
   size_t lifetime_end_;
 
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 702eba1..65bdb18 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -26,6 +26,7 @@
 #include "driver/dex_compilation_unit.h"
 #include "graph_visualizer.h"
 #include "gvn.h"
+#include "instruction_simplifier.h"
 #include "nodes.h"
 #include "register_allocator.h"
 #include "ssa_phi_elimination.h"
@@ -261,6 +262,7 @@
 
     SsaRedundantPhiElimination(graph).Run();
     SsaDeadPhiElimination(graph).Run();
+    InstructionSimplifier(graph).Run();
     GlobalValueNumberer(graph->GetArena(), graph).Run();
     visualizer.DumpGraph(kGVNPassName);
 
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 1ac9b78..78f20a1 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -685,14 +685,6 @@
   }
   size_t end = last_sibling->GetEnd();
 
-  if (NeedTwoSpillSlot(parent->GetType())) {
-    AllocateTwoSpillSlots(parent, end);
-  } else {
-    AllocateOneSpillSlot(parent, end);
-  }
-}
-
-void RegisterAllocator::AllocateTwoSpillSlots(LiveInterval* parent, size_t end) {
   // Find an available spill slot.
   size_t slot = 0;
   for (size_t e = spill_slots_.Size(); slot < e; ++slot) {
@@ -706,35 +698,25 @@
     }
   }
 
-  if (slot == spill_slots_.Size()) {
-    // We need a new spill slot.
-    spill_slots_.Add(end);
-    spill_slots_.Add(end);
-  } else if (slot == spill_slots_.Size() - 1) {
-    spill_slots_.Put(slot, end);
-    spill_slots_.Add(end);
-  } else {
-    spill_slots_.Put(slot, end);
-    spill_slots_.Put(slot + 1, end);
-  }
-
-  parent->SetSpillSlot((slot + reserved_out_slots_) * kVRegSize);
-}
-
-void RegisterAllocator::AllocateOneSpillSlot(LiveInterval* parent, size_t end) {
-  // Find an available spill slot.
-  size_t slot = 0;
-  for (size_t e = spill_slots_.Size(); slot < e; ++slot) {
-    if (spill_slots_.Get(slot) <= parent->GetStart()) {
-      break;
+  if (NeedTwoSpillSlot(parent->GetType())) {
+    if (slot == spill_slots_.Size()) {
+      // We need a new spill slot.
+      spill_slots_.Add(end);
+      spill_slots_.Add(end);
+    } else if (slot == spill_slots_.Size() - 1) {
+      spill_slots_.Put(slot, end);
+      spill_slots_.Add(end);
+    } else {
+      spill_slots_.Put(slot, end);
+      spill_slots_.Put(slot + 1, end);
     }
-  }
-
-  if (slot == spill_slots_.Size()) {
-    // We need a new spill slot.
-    spill_slots_.Add(end);
   } else {
-    spill_slots_.Put(slot, end);
+    if (slot == spill_slots_.Size()) {
+      // We need a new spill slot.
+      spill_slots_.Add(end);
+    } else {
+      spill_slots_.Put(slot, end);
+    }
   }
 
   parent->SetSpillSlot((slot + reserved_out_slots_) * kVRegSize);
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 3c305c8..b97b6fc 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -100,8 +100,6 @@
 
   // Allocate a spill slot for the given interval.
   void AllocateSpillSlotFor(LiveInterval* interval);
-  void AllocateOneSpillSlot(LiveInterval* interval, size_t end);
-  void AllocateTwoSpillSlots(LiveInterval* interval, size_t end);
 
   // Connect adjacent siblings within blocks.
   void ConnectSiblings(LiveInterval* interval);
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 2c9bc28..f888d46 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -746,6 +746,7 @@
   EmitRegisterOperand(dst, src);
 }
 
+
 void X86Assembler::xchgl(Register reg, const Address& address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x87);
@@ -753,6 +754,13 @@
 }
 
 
+void X86Assembler::cmpw(const Address& address, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitComplex(7, address, imm);
+}
+
+
 void X86Assembler::cmpl(Register reg, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitComplex(7, Operand(reg), imm);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 5c4e34f..ec983d9 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -337,6 +337,8 @@
   void xchgl(Register dst, Register src);
   void xchgl(Register reg, const Address& address);
 
+  void cmpw(const Address& address, const Immediate& imm);
+
   void cmpl(Register reg, const Immediate& imm);
   void cmpl(Register reg0, Register reg1);
   void cmpl(Register reg, const Address& address);
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 1e2884a..a47e968 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -839,6 +839,14 @@
 }
 
 
+void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(address);
+  EmitUint8(0x66);
+  EmitComplex(7, address, imm);
+}
+
+
 void X86_64Assembler::cmpl(CpuRegister reg, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(reg);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 763dafe..1fd65c2 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -378,6 +378,8 @@
   void xchgq(CpuRegister dst, CpuRegister src);
   void xchgl(CpuRegister reg, const Address& address);
 
+  void cmpw(const Address& address, const Immediate& imm);
+
   void cmpl(CpuRegister reg, const Immediate& imm);
   void cmpl(CpuRegister reg0, CpuRegister reg1);
   void cmpl(CpuRegister reg, const Address& address);