Merge "Use the lock word bits for Baker-style read barrier."
diff --git a/Android.mk b/Android.mk
index 9360355..e7623c6 100644
--- a/Android.mk
+++ b/Android.mk
@@ -55,16 +55,16 @@
 clean-oat-target:
 	adb root
 	adb wait-for-device remount
-	adb shell rm -rf $(ART_TARGET_NATIVETEST_DIR)
-	adb shell rm -rf $(ART_TARGET_TEST_DIR)
-	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*/*
-	adb shell rm -rf $(DEXPREOPT_BOOT_JAR_DIR)/$(DEX2OAT_TARGET_ARCH)
-	adb shell rm -rf system/app/$(DEX2OAT_TARGET_ARCH)
+	adb shell sh -c "rm -rf $(ART_TARGET_NATIVETEST_DIR)"
+	adb shell sh -c "rm -rf $(ART_TARGET_TEST_DIR)"
+	adb shell sh -c "rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*/*"
+	adb shell sh -c "rm -rf $(DEXPREOPT_BOOT_JAR_DIR)/$(DEX2OAT_TARGET_ARCH)"
+	adb shell sh -c "rm -rf system/app/$(DEX2OAT_TARGET_ARCH)"
 ifdef TARGET_2ND_ARCH
-	adb shell rm -rf $(DEXPREOPT_BOOT_JAR_DIR)/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)
-	adb shell rm -rf system/app/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)
+	adb shell sh -c "rm -rf $(DEXPREOPT_BOOT_JAR_DIR)/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)"
+	adb shell sh -c "rm -rf system/app/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)"
 endif
-	adb shell rm -rf data/run-test/test-*/dalvik-cache/*
+	adb shell sh -c "rm -rf data/run-test/test-*/dalvik-cache/*"
 
 ifneq ($(art_dont_bother),true)
 
@@ -404,7 +404,7 @@
 use-art-full:
 	adb root
 	adb wait-for-device shell stop
-	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
+	adb shell sh -c "rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*"
 	adb shell setprop dalvik.vm.dex2oat-filter ""
 	adb shell setprop dalvik.vm.image-dex2oat-filter ""
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libart.so
@@ -414,19 +414,19 @@
 use-artd-full:
 	adb root
 	adb wait-for-device shell stop
-	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
+	adb shell sh -c "rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*"
 	adb shell setprop dalvik.vm.dex2oat-filter ""
 	adb shell setprop dalvik.vm.image-dex2oat-filter ""
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libartd.so
 	adb shell start
 
-.PHONY: use-art-smart
-use-art-smart:
+.PHONY: use-art-verify-at-runtime
+use-art-verify-at-runtime:
 	adb root
 	adb wait-for-device shell stop
-	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
-	adb shell setprop dalvik.vm.dex2oat-filter "interpret-only"
-	adb shell setprop dalvik.vm.image-dex2oat-filter ""
+	adb shell sh -c "rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*"
+	adb shell setprop dalvik.vm.dex2oat-filter "verify-at-runtime"
+	adb shell setprop dalvik.vm.image-dex2oat-filter "verify-at-runtime"
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libart.so
 	adb shell start
 
@@ -434,7 +434,7 @@
 use-art-interpret-only:
 	adb root
 	adb wait-for-device shell stop
-	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
+	adb shell sh -c "rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*"
 	adb shell setprop dalvik.vm.dex2oat-filter "interpret-only"
 	adb shell setprop dalvik.vm.image-dex2oat-filter "interpret-only"
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libart.so
@@ -444,7 +444,7 @@
 use-artd-interpret-only:
 	adb root
 	adb wait-for-device shell stop
-	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
+	adb shell sh -c "rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*"
 	adb shell setprop dalvik.vm.dex2oat-filter "interpret-only"
 	adb shell setprop dalvik.vm.image-dex2oat-filter "interpret-only"
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libartd.so
@@ -454,7 +454,7 @@
 use-art-verify-none:
 	adb root
 	adb wait-for-device shell stop
-	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
+	adb shell sh -c "rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*"
 	adb shell setprop dalvik.vm.dex2oat-filter "verify-none"
 	adb shell setprop dalvik.vm.image-dex2oat-filter "verify-none"
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libart.so
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 7d76795..5a3236d 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -244,6 +244,7 @@
 
 COMPILER_GTEST_HOST_SRC_FILES := \
   $(COMPILER_GTEST_COMMON_SRC_FILES) \
+  compiler/dex/quick/x86/quick_assemble_x86_test.cc \
   compiler/utils/arm/assembler_arm32_test.cc \
   compiler/utils/arm/assembler_thumb2_test.cc \
   compiler/utils/assembler_thumb_test.cc \
diff --git a/compiler/dex/bb_optimizations.cc b/compiler/dex/bb_optimizations.cc
index 11a7e44..f351d99 100644
--- a/compiler/dex/bb_optimizations.cc
+++ b/compiler/dex/bb_optimizations.cc
@@ -17,6 +17,7 @@
 #include "bb_optimizations.h"
 #include "dataflow_iterator.h"
 #include "dataflow_iterator-inl.h"
+#include "global_value_numbering.h"
 
 namespace art {
 
@@ -79,4 +80,14 @@
   return false;
 }
 
+bool GlobalValueNumberingCleanupPass::Gate(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+  DCHECK(c_unit != nullptr);
+  // Do not do cleanup if GVN skipped this.
+  // TODO: Proper dependencies between passes?
+  return !GlobalValueNumbering::Skip(c_unit);
+}
+
+
 }  // namespace art
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 0850f42..b948afd 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -270,7 +270,28 @@
     CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     c_unit->mir_graph->EliminateDeadCodeEnd();
-    down_cast<PassMEDataHolder*>(data)->dirty = !c_unit->mir_graph->MirSsaRepUpToDate();
+  }
+};
+
+/**
+ * @class GlobalValueNumberingCleanupPass
+ * @brief Performs the cleanup after global value numbering pass and the dependent
+ *        dead code elimination pass that needs the GVN data.
+ */
+class GlobalValueNumberingCleanupPass : public PassME {
+ public:
+  GlobalValueNumberingCleanupPass()
+    : PassME("GVNCleanup", kNoNodes, "") {
+  }
+
+  // Depends on GlobalValueNumbering, so implemented in cc file.
+  bool Gate(const PassDataHolder* data) const OVERRIDE;
+
+  void Start(PassDataHolder* data) const OVERRIDE {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    return c_unit->mir_graph->GlobalValueNumberingCleanup();
   }
 };
 
diff --git a/compiler/dex/global_value_numbering_test.cc b/compiler/dex/global_value_numbering_test.cc
index c538d0b..c8aa990 100644
--- a/compiler/dex/global_value_numbering_test.cc
+++ b/compiler/dex/global_value_numbering_test.cc
@@ -290,6 +290,15 @@
     DoPrepareVregToSsaMapExit(bb_id, map, count);
   }
 
+  template <size_t count>
+  void MarkAsWideSRegs(const int32_t (&sregs)[count]) {
+    for (int32_t sreg : sregs) {
+      cu_.mir_graph->reg_location_[sreg].wide = true;
+      cu_.mir_graph->reg_location_[sreg + 1].wide = true;
+      cu_.mir_graph->reg_location_[sreg + 1].high_word = true;
+    }
+  }
+
   void PerformGVN() {
     DoPerformGVN<LoopRepeatingTopologicalSortIterator>();
   }
@@ -360,9 +369,11 @@
     cu_.access_flags = kAccStatic;  // Don't let "this" interfere with this test.
     allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
     // By default, the zero-initialized reg_location_[.] with ref == false tells LVN that
-    // 0 constants are integral, not references. Nothing else is used by LVN/GVN.
+    // 0 constants are integral, not references, and the values are all narrow.
+    // Nothing else is used by LVN/GVN. Tests can override the default values as needed.
     cu_.mir_graph->reg_location_ =
         cu_.arena.AllocArray<RegLocation>(kMaxSsaRegs, kArenaAllocRegAlloc);
+    cu_.mir_graph->num_ssa_regs_ = kMaxSsaRegs;
     // Bind all possible sregs to live vregs for test purposes.
     live_in_v_->SetInitialBits(kMaxSsaRegs);
     cu_.mir_graph->ssa_base_vregs_.reserve(kMaxSsaRegs);
@@ -910,14 +921,14 @@
       DEF_IGET(6, Instruction::AGET_OBJECT, 3u, 200u, 201u),  // Same as at the left side.
 
       DEF_AGET(3, Instruction::AGET_WIDE, 4u, 300u, 301u),
-      DEF_CONST(5, Instruction::CONST_WIDE, 5u, 1000),
-      DEF_APUT(5, Instruction::APUT_WIDE, 5u, 300u, 301u),
-      DEF_AGET(6, Instruction::AGET_WIDE, 7u, 300u, 301u),  // Differs from the top and the CONST.
+      DEF_CONST(5, Instruction::CONST_WIDE, 6u, 1000),
+      DEF_APUT(5, Instruction::APUT_WIDE, 6u, 300u, 301u),
+      DEF_AGET(6, Instruction::AGET_WIDE, 8u, 300u, 301u),  // Differs from the top and the CONST.
 
-      DEF_AGET(3, Instruction::AGET_SHORT, 8u, 400u, 401u),
-      DEF_CONST(3, Instruction::CONST, 9u, 2000),
-      DEF_APUT(4, Instruction::APUT_SHORT, 9u, 400u, 401u),
-      DEF_APUT(5, Instruction::APUT_SHORT, 9u, 400u, 401u),
+      DEF_AGET(3, Instruction::AGET_SHORT, 10u, 400u, 401u),
+      DEF_CONST(3, Instruction::CONST, 11u, 2000),
+      DEF_APUT(4, Instruction::APUT_SHORT, 11u, 400u, 401u),
+      DEF_APUT(5, Instruction::APUT_SHORT, 11u, 400u, 401u),
       DEF_AGET(6, Instruction::AGET_SHORT, 12u, 400u, 401u),  // Differs from the top, == CONST.
 
       DEF_AGET(3, Instruction::AGET_CHAR, 13u, 500u, 501u),
@@ -939,6 +950,8 @@
   };
 
   PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 4, 6, 8 };
+  MarkAsWideSRegs(wide_sregs);
   PerformGVN();
   ASSERT_EQ(arraysize(mirs), value_names_.size());
   EXPECT_EQ(value_names_[0], value_names_[1]);
@@ -1057,6 +1070,12 @@
   };
 
   PrepareMIRs(mirs);
+  for (size_t i = 0u; i != arraysize(mirs); ++i) {
+    if ((mirs_[i].ssa_rep->defs[0] % 2) == 0) {
+      const int32_t wide_sregs[] = { mirs_[i].ssa_rep->defs[0] };
+      MarkAsWideSRegs(wide_sregs);
+    }
+  }
   PerformGVN();
   ASSERT_EQ(arraysize(mirs), value_names_.size());
   EXPECT_EQ(value_names_[0], value_names_[7]);
@@ -1493,27 +1512,27 @@
   static const MIRDef mirs[] = {
       // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
       DEF_AGET(3, Instruction::AGET_WIDE, 0u, 100u, 101u),
-      DEF_AGET(4, Instruction::AGET_WIDE, 1u, 100u, 101u),   // Same as at the top.
-      DEF_AGET(5, Instruction::AGET_WIDE, 2u, 100u, 101u),   // Same as at the top.
+      DEF_AGET(4, Instruction::AGET_WIDE, 2u, 100u, 101u),   // Same as at the top.
+      DEF_AGET(5, Instruction::AGET_WIDE, 4u, 100u, 101u),   // Same as at the top.
 
-      DEF_AGET(3, Instruction::AGET_BYTE, 3u, 200u, 201u),
-      DEF_AGET(4, Instruction::AGET_BYTE, 4u, 200u, 201u),  // Differs from top...
-      DEF_APUT(4, Instruction::APUT_BYTE, 5u, 200u, 201u),  // Because of this IPUT.
-      DEF_AGET(5, Instruction::AGET_BYTE, 6u, 200u, 201u),  // Differs from top and the loop AGET.
+      DEF_AGET(3, Instruction::AGET_BYTE, 6u, 200u, 201u),
+      DEF_AGET(4, Instruction::AGET_BYTE, 7u, 200u, 201u),  // Differs from top...
+      DEF_APUT(4, Instruction::APUT_BYTE, 8u, 200u, 201u),  // Because of this IPUT.
+      DEF_AGET(5, Instruction::AGET_BYTE, 9u, 200u, 201u),  // Differs from top and the loop AGET.
 
-      DEF_AGET(3, Instruction::AGET, 7u, 300u, 301u),
-      DEF_APUT(4, Instruction::APUT, 8u, 300u, 301u),   // Because of this IPUT...
-      DEF_AGET(4, Instruction::AGET, 9u, 300u, 301u),   // Differs from top.
-      DEF_AGET(5, Instruction::AGET, 10u, 300u, 301u),  // Differs from top but == the loop AGET.
+      DEF_AGET(3, Instruction::AGET, 10u, 300u, 301u),
+      DEF_APUT(4, Instruction::APUT, 11u, 300u, 301u),  // Because of this IPUT...
+      DEF_AGET(4, Instruction::AGET, 12u, 300u, 301u),   // Differs from top.
+      DEF_AGET(5, Instruction::AGET, 13u, 300u, 301u),  // Differs from top but == the loop AGET.
 
-      DEF_CONST(3, Instruction::CONST, 11u, 3000),
-      DEF_APUT(3, Instruction::APUT_CHAR, 11u, 400u, 401u),
-      DEF_APUT(3, Instruction::APUT_CHAR, 11u, 400u, 402u),
-      DEF_AGET(4, Instruction::AGET_CHAR, 14u, 400u, 401u),  // Differs from 11u and 16u.
-      DEF_AGET(4, Instruction::AGET_CHAR, 15u, 400u, 402u),  // Same as 14u.
-      DEF_CONST(4, Instruction::CONST, 16u, 4000),
-      DEF_APUT(4, Instruction::APUT_CHAR, 16u, 400u, 401u),
-      DEF_APUT(4, Instruction::APUT_CHAR, 16u, 400u, 402u),
+      DEF_CONST(3, Instruction::CONST, 14u, 3000),
+      DEF_APUT(3, Instruction::APUT_CHAR, 14u, 400u, 401u),
+      DEF_APUT(3, Instruction::APUT_CHAR, 14u, 400u, 402u),
+      DEF_AGET(4, Instruction::AGET_CHAR, 15u, 400u, 401u),  // Differs from 11u and 16u.
+      DEF_AGET(4, Instruction::AGET_CHAR, 16u, 400u, 402u),  // Same as 14u.
+      DEF_CONST(4, Instruction::CONST, 17u, 4000),
+      DEF_APUT(4, Instruction::APUT_CHAR, 17u, 400u, 401u),
+      DEF_APUT(4, Instruction::APUT_CHAR, 17u, 400u, 402u),
       DEF_AGET(5, Instruction::AGET_CHAR, 19u, 400u, 401u),  // Differs from 11u and 14u...
       DEF_AGET(5, Instruction::AGET_CHAR, 20u, 400u, 402u),  // and same as the CONST 16u.
 
@@ -1531,6 +1550,8 @@
   };
 
   PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 0, 2, 4 };
+  MarkAsWideSRegs(wide_sregs);
   PerformGVN();
   ASSERT_EQ(arraysize(mirs), value_names_.size());
   EXPECT_EQ(value_names_[0], value_names_[1]);
diff --git a/compiler/dex/gvn_dead_code_elimination.cc b/compiler/dex/gvn_dead_code_elimination.cc
index d7f36f7..4f0e9d1 100644
--- a/compiler/dex/gvn_dead_code_elimination.cc
+++ b/compiler/dex/gvn_dead_code_elimination.cc
@@ -347,6 +347,21 @@
   return false;
 }
 
+bool GvnDeadCodeElimination::VRegChains::IsVRegUsed(uint16_t first_change, uint16_t last_change,
+                                                    int v_reg, MIRGraph* mir_graph) const {
+  DCHECK_LE(first_change, last_change);
+  DCHECK_LE(last_change, mir_data_.size());
+  for (size_t c = first_change; c != last_change; ++c) {
+    SSARepresentation* ssa_rep = mir_data_[c].mir->ssa_rep;
+    for (int i = 0; i != ssa_rep->num_uses; ++i) {
+      if (mir_graph->SRegToVReg(ssa_rep->uses[i]) == v_reg) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 void GvnDeadCodeElimination::VRegChains::RenameSRegUses(uint16_t first_change, uint16_t last_change,
                                                         int old_s_reg, int new_s_reg, bool wide) {
   for (size_t c = first_change; c != last_change; ++c) {
@@ -672,8 +687,14 @@
         uint16_t src_name =
             (d->wide_def ? lvn_->GetSregValueWide(src_s_reg) : lvn_->GetSregValue(src_s_reg));
         if (value_name == src_name) {
-          RecordPassKillMoveByRenamingSrcDef(check_change, c);
-          return;
+          // Check if the move's destination vreg is unused between check_change and the move.
+          uint32_t new_dest_v_reg = mir_graph_->SRegToVReg(d->mir->ssa_rep->defs[0]);
+          if (!vreg_chains_.IsVRegUsed(check_change + 1u, c, new_dest_v_reg, mir_graph_) &&
+              (!d->wide_def ||
+               !vreg_chains_.IsVRegUsed(check_change + 1u, c, new_dest_v_reg + 1, mir_graph_))) {
+            RecordPassKillMoveByRenamingSrcDef(check_change, c);
+            return;
+          }
         }
       }
     }
@@ -963,18 +984,17 @@
   uint16_t opcode = mir->dalvikInsn.opcode;
   switch (opcode) {
     case kMirOpPhi: {
-      // We can't recognize wide variables in Phi from num_defs == 2 as we've got two Phis instead.
+      // Determine if this Phi is merging wide regs.
+      RegLocation raw_dest = gvn_->GetMirGraph()->GetRawDest(mir);
+      if (raw_dest.high_word) {
+        // This is the high part of a wide reg. Ignore the Phi.
+        return false;
+      }
+      bool wide = raw_dest.wide;
+      // Record the value.
       DCHECK_EQ(mir->ssa_rep->num_defs, 1);
       int s_reg = mir->ssa_rep->defs[0];
-      bool wide = false;
-      uint16_t new_value = lvn_->GetSregValue(s_reg);
-      if (new_value == kNoValue) {
-        wide = true;
-        new_value = lvn_->GetSregValueWide(s_reg);
-        if (new_value == kNoValue) {
-          return false;  // Ignore the high word Phi.
-        }
-      }
+      uint16_t new_value = wide ? lvn_->GetSregValueWide(s_reg) : lvn_->GetSregValue(s_reg);
 
       int v_reg = mir_graph_->SRegToVReg(s_reg);
       DCHECK_EQ(vreg_chains_.CurrentValue(v_reg), kNoValue);  // No previous def for v_reg.
diff --git a/compiler/dex/gvn_dead_code_elimination.h b/compiler/dex/gvn_dead_code_elimination.h
index f2378f2..bc75a01 100644
--- a/compiler/dex/gvn_dead_code_elimination.h
+++ b/compiler/dex/gvn_dead_code_elimination.h
@@ -111,6 +111,8 @@
     void RemoveChange(uint16_t change);
     bool IsTopChange(uint16_t change) const;
     bool IsSRegUsed(uint16_t first_change, uint16_t last_change, int s_reg) const;
+    bool IsVRegUsed(uint16_t first_change, uint16_t last_change, int v_reg,
+                    MIRGraph* mir_graph) const;
     void RenameSRegUses(uint16_t first_change, uint16_t last_change,
                         int old_s_reg, int new_s_reg, bool wide);
     void RenameVRegUses(uint16_t first_change, uint16_t last_change,
diff --git a/compiler/dex/gvn_dead_code_elimination_test.cc b/compiler/dex/gvn_dead_code_elimination_test.cc
index 4d2b8b3..f9f0882 100644
--- a/compiler/dex/gvn_dead_code_elimination_test.cc
+++ b/compiler/dex/gvn_dead_code_elimination_test.cc
@@ -406,6 +406,15 @@
     }
   }
 
+  template <size_t count>
+  void MarkAsWideSRegs(const int32_t (&sregs)[count]) {
+    for (int32_t sreg : sregs) {
+      cu_.mir_graph->reg_location_[sreg].wide = true;
+      cu_.mir_graph->reg_location_[sreg + 1].wide = true;
+      cu_.mir_graph->reg_location_[sreg + 1].high_word = true;
+    }
+  }
+
   void PerformDCE() {
     FillVregToSsaRegExitMaps();
     cu_.mir_graph->GetNumOfCodeAndTempVRs();
@@ -467,9 +476,11 @@
     cu_.access_flags = kAccStatic;  // Don't let "this" interfere with this test.
     allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
     // By default, the zero-initialized reg_location_[.] with ref == false tells LVN that
-    // 0 constants are integral, not references. Nothing else is used by LVN/GVN.
+    // 0 constants are integral, not references, and the values are all narrow.
+    // Nothing else is used by LVN/GVN. Tests can override the default values as needed.
     cu_.mir_graph->reg_location_ = static_cast<RegLocation*>(cu_.arena.Alloc(
         kMaxSsaRegs * sizeof(cu_.mir_graph->reg_location_[0]), kArenaAllocRegAlloc));
+    cu_.mir_graph->num_ssa_regs_ = kMaxSsaRegs;
     // Bind all possible sregs to live vregs for test purposes.
     live_in_v_->SetInitialBits(kMaxSsaRegs);
     cu_.mir_graph->ssa_base_vregs_.reserve(kMaxSsaRegs);
@@ -705,6 +716,8 @@
   PrepareSRegToVRegMap(sreg_to_vreg_map);
 
   PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 3 };
+  MarkAsWideSRegs(wide_sregs);
   PerformGVN_DCE();
 
   ASSERT_EQ(arraysize(mirs), value_names_.size());
@@ -745,6 +758,8 @@
 
   PrepareIFields(ifields);
   PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 5 };
+  MarkAsWideSRegs(wide_sregs);
   PerformGVN_DCE();
 
   ASSERT_EQ(arraysize(mirs), value_names_.size());
@@ -777,6 +792,8 @@
   PrepareSRegToVRegMap(sreg_to_vreg_map);
 
   PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 0, 2 };
+  MarkAsWideSRegs(wide_sregs);
   PerformGVN_DCE();
 
   ASSERT_EQ(arraysize(mirs), value_names_.size());
@@ -1030,6 +1047,40 @@
   }
 }
 
+TEST_F(GvnDeadCodeEliminationTestSimple, NoRename4) {
+  static const MIRDef mirs[] = {
+      DEF_CONST(3, Instruction::CONST, 0u, 1000u),
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 1u),
+      DEF_CONST(3, Instruction::CONST, 2u, 100u),
+      DEF_CONST(3, Instruction::CONST, 3u, 200u),
+      DEF_BINOP(3, Instruction::OR_INT_2ADDR, 4u, 2u, 3u),   // 3. Find definition of the move src.
+      DEF_MOVE(3, Instruction::MOVE, 5u, 0u),                // 4. Uses move dest vreg.
+      DEF_MOVE(3, Instruction::MOVE, 6u, 4u),                // 2. Find overwritten move src.
+      DEF_CONST(3, Instruction::CONST, 7u, 2000u),           // 1. Overwrites 4u, look for moves.
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 2, 4, 0, 2 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 7 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[0], value_names_[5]);
+  EXPECT_EQ(value_names_[4], value_names_[6]);
+
+  static const bool eliminated[] = {
+      false, false, false, false, false, false, false, false
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+}
+
 TEST_F(GvnDeadCodeEliminationTestSimple, Simple1) {
   static const IFieldDef ifields[] = {
       { 0u, 1u, 0u, false, kDexMemAccessObject },
@@ -1221,6 +1272,8 @@
 
   PrepareIFields(ifields);
   PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 1, 6 };
+  MarkAsWideSRegs(wide_sregs);
   PerformGVN_DCE();
 
   ASSERT_EQ(arraysize(mirs), value_names_.size());
diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc
index cdf5e38..cc9dbe4 100644
--- a/compiler/dex/local_value_numbering.cc
+++ b/compiler/dex/local_value_numbering.cc
@@ -1152,28 +1152,20 @@
     // Running LVN without a full GVN?
     return kNoValue;
   }
-  int32_t* uses = mir->ssa_rep->uses;
-  // Try to find out if this is merging wide regs.
-  if (mir->ssa_rep->defs[0] != 0 &&
-      sreg_wide_value_map_.count(mir->ssa_rep->defs[0] - 1) != 0u) {
+  // Determine if this Phi is merging wide regs.
+  RegLocation raw_dest = gvn_->GetMirGraph()->GetRawDest(mir);
+  if (raw_dest.high_word) {
     // This is the high part of a wide reg. Ignore the Phi.
     return kNoValue;
   }
-  BasicBlockId* incoming = mir->meta.phi_incoming;
-  int16_t pos = 0;
-  // Check if we're merging a wide value based on the first merged LVN.
-  const LocalValueNumbering* first_lvn = gvn_->merge_lvns_[0];
-  DCHECK_LT(pos, mir->ssa_rep->num_uses);
-  while (incoming[pos] != first_lvn->Id()) {
-    ++pos;
-    DCHECK_LT(pos, mir->ssa_rep->num_uses);
-  }
-  int first_s_reg = uses[pos];
-  bool wide = (first_lvn->sreg_wide_value_map_.count(first_s_reg) != 0u);
+  bool wide = raw_dest.wide;
   // Iterate over *merge_lvns_ and skip incoming sregs for BBs without associated LVN.
   merge_names_.clear();
   uint16_t value_name = kNoValue;
   bool same_values = true;
+  BasicBlockId* incoming = mir->meta.phi_incoming;
+  int32_t* uses = mir->ssa_rep->uses;
+  int16_t pos = 0;
   for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
     DCHECK_LT(pos, mir->ssa_rep->num_uses);
     while (incoming[pos] != lvn->Id()) {
@@ -1994,6 +1986,9 @@
   if (s_reg == INVALID_SREG) {
     return kNoValue;
   }
+  if (gvn_->GetMirGraph()->GetRegLocation(s_reg).wide != wide) {
+    return kNoValue;
+  }
   if (wide) {
     int high_s_reg = bb->data_flow_info->vreg_to_ssa_map_exit[v_reg + 1];
     if (high_s_reg != s_reg + 1) {
diff --git a/compiler/dex/local_value_numbering.h b/compiler/dex/local_value_numbering.h
index 379c952..67fb647 100644
--- a/compiler/dex/local_value_numbering.h
+++ b/compiler/dex/local_value_numbering.h
@@ -53,10 +53,12 @@
   }
 
   uint16_t GetSregValue(uint16_t s_reg) const {
+    DCHECK(!gvn_->GetMirGraph()->GetRegLocation(s_reg).wide);
     return GetSregValueImpl(s_reg, &sreg_value_map_);
   }
 
   uint16_t GetSregValueWide(uint16_t s_reg) const {
+    DCHECK(gvn_->GetMirGraph()->GetRegLocation(s_reg).wide);
     return GetSregValueImpl(s_reg, &sreg_wide_value_map_);
   }
 
@@ -123,21 +125,27 @@
 
   void SetOperandValue(uint16_t s_reg, uint16_t value) {
     DCHECK_EQ(sreg_wide_value_map_.count(s_reg), 0u);
+    DCHECK(!gvn_->GetMirGraph()->GetRegLocation(s_reg).wide);
     SetOperandValueImpl(s_reg, value, &sreg_value_map_);
   }
 
   uint16_t GetOperandValue(int s_reg) const {
     DCHECK_EQ(sreg_wide_value_map_.count(s_reg), 0u);
+    DCHECK(!gvn_->GetMirGraph()->GetRegLocation(s_reg).wide);
     return GetOperandValueImpl(s_reg, &sreg_value_map_);
   }
 
   void SetOperandValueWide(uint16_t s_reg, uint16_t value) {
     DCHECK_EQ(sreg_value_map_.count(s_reg), 0u);
+    DCHECK(gvn_->GetMirGraph()->GetRegLocation(s_reg).wide);
+    DCHECK(!gvn_->GetMirGraph()->GetRegLocation(s_reg).high_word);
     SetOperandValueImpl(s_reg, value, &sreg_wide_value_map_);
   }
 
   uint16_t GetOperandValueWide(int s_reg) const {
     DCHECK_EQ(sreg_value_map_.count(s_reg), 0u);
+    DCHECK(gvn_->GetMirGraph()->GetRegLocation(s_reg).wide);
+    DCHECK(!gvn_->GetMirGraph()->GetRegLocation(s_reg).high_word);
     return GetOperandValueImpl(s_reg, &sreg_wide_value_map_);
   }
 
@@ -331,7 +339,7 @@
 
   void CopyLiveSregValues(SregValueMap* dest, const SregValueMap& src);
 
-  // Intersect maps as sets. The value type must be equality-comparable.
+  // Intersect SSA reg value maps as sets, ignore dead regs.
   template <SregValueMap LocalValueNumbering::* map_ptr>
   void IntersectSregValueMaps();
 
diff --git a/compiler/dex/local_value_numbering_test.cc b/compiler/dex/local_value_numbering_test.cc
index 0393410..bd00690 100644
--- a/compiler/dex/local_value_numbering_test.cc
+++ b/compiler/dex/local_value_numbering_test.cc
@@ -182,6 +182,15 @@
         ~MirSFieldLoweringInfo::kFlagClassIsInitialized;
   }
 
+  template <size_t count>
+  void MarkAsWideSRegs(const int32_t (&sregs)[count]) {
+    for (int32_t sreg : sregs) {
+      cu_.mir_graph->reg_location_[sreg].wide = true;
+      cu_.mir_graph->reg_location_[sreg + 1].wide = true;
+      cu_.mir_graph->reg_location_[sreg + 1].high_word = true;
+    }
+  }
+
   void PerformLVN() {
     cu_.mir_graph->temp_.gvn.ifield_ids =  GlobalValueNumbering::PrepareGvnFieldIds(
         allocator_.get(), cu_.mir_graph->ifield_lowering_infos_);
@@ -210,9 +219,11 @@
     cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
     allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
     // By default, the zero-initialized reg_location_[.] with ref == false tells LVN that
-    // 0 constants are integral, not references. Nothing else is used by LVN/GVN.
+    // 0 constants are integral, not references, and the values are all narrow.
+    // Nothing else is used by LVN/GVN. Tests can override the default values as needed.
     cu_.mir_graph->reg_location_ = static_cast<RegLocation*>(cu_.arena.Alloc(
         kMaxSsaRegs * sizeof(cu_.mir_graph->reg_location_[0]), kArenaAllocRegAlloc));
+    cu_.mir_graph->num_ssa_regs_ = kMaxSsaRegs;
   }
 
   static constexpr size_t kMaxSsaRegs = 16384u;
@@ -379,26 +390,28 @@
       { 3u, 0u, 0u, false, kDexMemAccessWord },  // Unresolved field.
   };
   static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 20u),
-      DEF_IGET(Instruction::IGET, 1u, 20u, 0u),             // Resolved field #1, unique object.
-      DEF_IGET(Instruction::IGET, 2u, 21u, 0u),             // Resolved field #1.
-      DEF_IGET_WIDE(Instruction::IGET_WIDE, 3u, 21u, 1u),   // Resolved field #2.
-      DEF_IGET(Instruction::IGET, 4u, 22u, 2u),             // Unresolved IGET can be "acquire".
-      DEF_IGET(Instruction::IGET, 5u, 20u, 0u),             // Resolved field #1, unique object.
-      DEF_IGET(Instruction::IGET, 6u, 21u, 0u),             // Resolved field #1.
-      DEF_IGET_WIDE(Instruction::IGET_WIDE, 7u, 21u, 1u),   // Resolved field #2.
-      DEF_IPUT(Instruction::IPUT, 8u, 22u, 2u),             // IPUT clobbers field #1 (#2 is wide).
-      DEF_IGET(Instruction::IGET, 9u, 20u, 0u),             // Resolved field #1, unique object.
-      DEF_IGET(Instruction::IGET, 10u, 21u, 0u),            // Resolved field #1, new value name.
-      DEF_IGET_WIDE(Instruction::IGET_WIDE, 11u, 21u, 1u),  // Resolved field #2.
-      DEF_IGET_WIDE(Instruction::IGET_WIDE, 12u, 20u, 1u),  // Resolved field #2, unique object.
-      DEF_IPUT(Instruction::IPUT, 13u, 20u, 2u),            // IPUT clobbers field #1 (#2 is wide).
-      DEF_IGET(Instruction::IGET, 14u, 20u, 0u),            // Resolved field #1, unique object.
-      DEF_IGET_WIDE(Instruction::IGET_WIDE, 15u, 20u, 1u),  // Resolved field #2, unique object.
+      DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 30u),
+      DEF_IGET(Instruction::IGET, 1u, 30u, 0u),             // Resolved field #1, unique object.
+      DEF_IGET(Instruction::IGET, 2u, 31u, 0u),             // Resolved field #1.
+      DEF_IGET_WIDE(Instruction::IGET_WIDE, 3u, 31u, 1u),   // Resolved field #2.
+      DEF_IGET(Instruction::IGET, 5u, 32u, 2u),             // Unresolved IGET can be "acquire".
+      DEF_IGET(Instruction::IGET, 6u, 30u, 0u),             // Resolved field #1, unique object.
+      DEF_IGET(Instruction::IGET, 7u, 31u, 0u),             // Resolved field #1.
+      DEF_IGET_WIDE(Instruction::IGET_WIDE, 8u, 31u, 1u),   // Resolved field #2.
+      DEF_IPUT(Instruction::IPUT, 10u, 32u, 2u),            // IPUT clobbers field #1 (#2 is wide).
+      DEF_IGET(Instruction::IGET, 11u, 30u, 0u),            // Resolved field #1, unique object.
+      DEF_IGET(Instruction::IGET, 12u, 31u, 0u),            // Resolved field #1, new value name.
+      DEF_IGET_WIDE(Instruction::IGET_WIDE, 13u, 31u, 1u),  // Resolved field #2.
+      DEF_IGET_WIDE(Instruction::IGET_WIDE, 15u, 30u, 1u),  // Resolved field #2, unique object.
+      DEF_IPUT(Instruction::IPUT, 17u, 30u, 2u),            // IPUT clobbers field #1 (#2 is wide).
+      DEF_IGET(Instruction::IGET, 18u, 30u, 0u),            // Resolved field #1, unique object.
+      DEF_IGET_WIDE(Instruction::IGET_WIDE, 19u, 30u, 1u),  // Resolved field #2, unique object.
   };
 
   PrepareIFields(ifields);
   PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 3, 8, 13, 15, 19 };
+  MarkAsWideSRegs(wide_sregs);
   PerformLVN();
   ASSERT_EQ(value_names_.size(), 16u);
   // Unresolved field is potentially volatile, so we need to adhere to the volatile semantics.
@@ -430,16 +443,18 @@
   static const MIRDef mirs[] = {
       DEF_SGET(Instruction::SGET, 0u, 0u),            // Resolved field #1.
       DEF_SGET_WIDE(Instruction::SGET_WIDE, 1u, 1u),  // Resolved field #2.
-      DEF_SGET(Instruction::SGET, 2u, 2u),            // Unresolved SGET can be "acquire".
-      DEF_SGET(Instruction::SGET, 3u, 0u),            // Resolved field #1.
-      DEF_SGET_WIDE(Instruction::SGET_WIDE, 4u, 1u),  // Resolved field #2.
-      DEF_SPUT(Instruction::SPUT, 5u, 2u),            // SPUT clobbers field #1 (#2 is wide).
-      DEF_SGET(Instruction::SGET, 6u, 0u),            // Resolved field #1.
-      DEF_SGET_WIDE(Instruction::SGET_WIDE, 7u, 1u),  // Resolved field #2.
+      DEF_SGET(Instruction::SGET, 3u, 2u),            // Unresolved SGET can be "acquire".
+      DEF_SGET(Instruction::SGET, 4u, 0u),            // Resolved field #1.
+      DEF_SGET_WIDE(Instruction::SGET_WIDE, 5u, 1u),  // Resolved field #2.
+      DEF_SPUT(Instruction::SPUT, 7u, 2u),            // SPUT clobbers field #1 (#2 is wide).
+      DEF_SGET(Instruction::SGET, 8u, 0u),            // Resolved field #1.
+      DEF_SGET_WIDE(Instruction::SGET_WIDE, 9u, 1u),  // Resolved field #2.
   };
 
   PrepareSFields(sfields);
   PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 1, 5, 9 };
+  MarkAsWideSRegs(wide_sregs);
   PerformLVN();
   ASSERT_EQ(value_names_.size(), 8u);
   // Unresolved field is potentially volatile, so we need to adhere to the volatile semantics.
@@ -585,18 +600,20 @@
       DEF_IGET(Instruction::IGET, 7u, 20u, 0u),              // New value.
       DEF_IGET(Instruction::IGET, 8u, 20u, 1u),              // Still the same.
       DEF_IPUT_WIDE(Instruction::IPUT_WIDE, 9u, 31u, 3u),    // No aliasing, different type.
-      DEF_IGET(Instruction::IGET, 10u, 20u, 0u),
-      DEF_IGET(Instruction::IGET, 11u, 20u, 1u),
-      DEF_IPUT_WIDE(Instruction::IPUT_WIDE, 12u, 31u, 5u),   // No aliasing, different type.
-      DEF_IGET(Instruction::IGET, 13u, 20u, 0u),
-      DEF_IGET(Instruction::IGET, 14u, 20u, 1u),
-      DEF_IPUT(Instruction::IPUT, 15u, 31u, 4u),             // Aliasing, same type.
-      DEF_IGET(Instruction::IGET, 16u, 20u, 0u),
-      DEF_IGET(Instruction::IGET, 17u, 20u, 1u),
+      DEF_IGET(Instruction::IGET, 11u, 20u, 0u),
+      DEF_IGET(Instruction::IGET, 12u, 20u, 1u),
+      DEF_IPUT_WIDE(Instruction::IPUT_WIDE, 13u, 31u, 5u),   // No aliasing, different type.
+      DEF_IGET(Instruction::IGET, 15u, 20u, 0u),
+      DEF_IGET(Instruction::IGET, 16u, 20u, 1u),
+      DEF_IPUT(Instruction::IPUT, 17u, 31u, 4u),             // Aliasing, same type.
+      DEF_IGET(Instruction::IGET, 18u, 20u, 0u),
+      DEF_IGET(Instruction::IGET, 19u, 20u, 1u),
   };
 
   PrepareIFields(ifields);
   PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 9, 13 };
+  MarkAsWideSRegs(wide_sregs);
   PerformLVN();
   ASSERT_EQ(value_names_.size(), 18u);
   EXPECT_EQ(value_names_[1], value_names_[4]);
@@ -626,14 +643,16 @@
       DEF_AGET(Instruction::AGET, 4u, 20u, 40u),
       DEF_AGET(Instruction::AGET, 5u, 20u, 41u),
       DEF_APUT_WIDE(Instruction::APUT_WIDE, 6u, 31u, 43u),  // No aliasing, different type.
-      DEF_AGET(Instruction::AGET, 7u, 20u, 40u),
-      DEF_AGET(Instruction::AGET, 8u, 20u, 41u),
-      DEF_APUT(Instruction::APUT, 9u, 32u, 40u),            // May alias with all elements.
-      DEF_AGET(Instruction::AGET, 10u, 20u, 40u),           // New value (same index name).
-      DEF_AGET(Instruction::AGET, 11u, 20u, 41u),           // New value (different index name).
+      DEF_AGET(Instruction::AGET, 8u, 20u, 40u),
+      DEF_AGET(Instruction::AGET, 9u, 20u, 41u),
+      DEF_APUT(Instruction::APUT, 10u, 32u, 40u),           // May alias with all elements.
+      DEF_AGET(Instruction::AGET, 11u, 20u, 40u),           // New value (same index name).
+      DEF_AGET(Instruction::AGET, 12u, 20u, 41u),           // New value (different index name).
   };
 
   PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 6 };
+  MarkAsWideSRegs(wide_sregs);
   PerformLVN();
   ASSERT_EQ(value_names_.size(), 12u);
   EXPECT_EQ(value_names_[1], value_names_[4]);
@@ -769,6 +788,8 @@
   };
 
   PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 5, 7, 12, 14, 16 };
+  MarkAsWideSRegs(wide_sregs);
   PerformLVN();
   for (size_t i = 0u; i != mir_count_; ++i) {
     int expected = expected_ignore_div_zero_check[i] ? MIR_IGNORE_DIV_ZERO_CHECK : 0u;
@@ -780,51 +801,55 @@
   static const MIRDef mirs[] = {
       // Core reg constants.
       DEF_CONST(Instruction::CONST_WIDE_16, 0u, 0),
-      DEF_CONST(Instruction::CONST_WIDE_16, 1u, 1),
-      DEF_CONST(Instruction::CONST_WIDE_16, 2u, -1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 3u, 1 << 16),
-      DEF_CONST(Instruction::CONST_WIDE_32, 4u, -1 << 16),
-      DEF_CONST(Instruction::CONST_WIDE_32, 5u, (1 << 16) + 1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 6u, (1 << 16) - 1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 7u, -(1 << 16) + 1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 8u, -(1 << 16) - 1),
-      DEF_CONST(Instruction::CONST_WIDE, 9u, INT64_C(1) << 32),
-      DEF_CONST(Instruction::CONST_WIDE, 10u, INT64_C(-1) << 32),
-      DEF_CONST(Instruction::CONST_WIDE, 11u, (INT64_C(1) << 32) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 12u, (INT64_C(1) << 32) - 1),
-      DEF_CONST(Instruction::CONST_WIDE, 13u, (INT64_C(-1) << 32) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 14u, (INT64_C(-1) << 32) - 1),
-      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 15u, 1),       // Effectively 1 << 48.
-      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 16u, 0xffff),  // Effectively -1 << 48.
-      DEF_CONST(Instruction::CONST_WIDE, 17u, (INT64_C(1) << 48) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 18u, (INT64_C(1) << 48) - 1),
-      DEF_CONST(Instruction::CONST_WIDE, 19u, (INT64_C(-1) << 48) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 20u, (INT64_C(-1) << 48) - 1),
+      DEF_CONST(Instruction::CONST_WIDE_16, 2u, 1),
+      DEF_CONST(Instruction::CONST_WIDE_16, 4u, -1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 6u, 1 << 16),
+      DEF_CONST(Instruction::CONST_WIDE_32, 8u, -1 << 16),
+      DEF_CONST(Instruction::CONST_WIDE_32, 10u, (1 << 16) + 1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 12u, (1 << 16) - 1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 14u, -(1 << 16) + 1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 16u, -(1 << 16) - 1),
+      DEF_CONST(Instruction::CONST_WIDE, 18u, INT64_C(1) << 32),
+      DEF_CONST(Instruction::CONST_WIDE, 20u, INT64_C(-1) << 32),
+      DEF_CONST(Instruction::CONST_WIDE, 22u, (INT64_C(1) << 32) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 24u, (INT64_C(1) << 32) - 1),
+      DEF_CONST(Instruction::CONST_WIDE, 26u, (INT64_C(-1) << 32) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 28u, (INT64_C(-1) << 32) - 1),
+      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 30u, 1),       // Effectively 1 << 48.
+      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 32u, 0xffff),  // Effectively -1 << 48.
+      DEF_CONST(Instruction::CONST_WIDE, 34u, (INT64_C(1) << 48) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 36u, (INT64_C(1) << 48) - 1),
+      DEF_CONST(Instruction::CONST_WIDE, 38u, (INT64_C(-1) << 48) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 40u, (INT64_C(-1) << 48) - 1),
       // FP reg constants.
-      DEF_CONST(Instruction::CONST_WIDE_16, 21u, 0),
-      DEF_CONST(Instruction::CONST_WIDE_16, 22u, 1),
-      DEF_CONST(Instruction::CONST_WIDE_16, 23u, -1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 24u, 1 << 16),
-      DEF_CONST(Instruction::CONST_WIDE_32, 25u, -1 << 16),
-      DEF_CONST(Instruction::CONST_WIDE_32, 26u, (1 << 16) + 1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 27u, (1 << 16) - 1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 28u, -(1 << 16) + 1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 29u, -(1 << 16) - 1),
-      DEF_CONST(Instruction::CONST_WIDE, 30u, INT64_C(1) << 32),
-      DEF_CONST(Instruction::CONST_WIDE, 31u, INT64_C(-1) << 32),
-      DEF_CONST(Instruction::CONST_WIDE, 32u, (INT64_C(1) << 32) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 33u, (INT64_C(1) << 32) - 1),
-      DEF_CONST(Instruction::CONST_WIDE, 34u, (INT64_C(-1) << 32) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 35u, (INT64_C(-1) << 32) - 1),
-      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 36u, 1),       // Effectively 1 << 48.
-      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 37u, 0xffff),  // Effectively -1 << 48.
-      DEF_CONST(Instruction::CONST_WIDE, 38u, (INT64_C(1) << 48) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 39u, (INT64_C(1) << 48) - 1),
-      DEF_CONST(Instruction::CONST_WIDE, 40u, (INT64_C(-1) << 48) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 41u, (INT64_C(-1) << 48) - 1),
+      DEF_CONST(Instruction::CONST_WIDE_16, 42u, 0),
+      DEF_CONST(Instruction::CONST_WIDE_16, 44u, 1),
+      DEF_CONST(Instruction::CONST_WIDE_16, 46u, -1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 48u, 1 << 16),
+      DEF_CONST(Instruction::CONST_WIDE_32, 50u, -1 << 16),
+      DEF_CONST(Instruction::CONST_WIDE_32, 52u, (1 << 16) + 1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 54u, (1 << 16) - 1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 56u, -(1 << 16) + 1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 58u, -(1 << 16) - 1),
+      DEF_CONST(Instruction::CONST_WIDE, 60u, INT64_C(1) << 32),
+      DEF_CONST(Instruction::CONST_WIDE, 62u, INT64_C(-1) << 32),
+      DEF_CONST(Instruction::CONST_WIDE, 64u, (INT64_C(1) << 32) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 66u, (INT64_C(1) << 32) - 1),
+      DEF_CONST(Instruction::CONST_WIDE, 68u, (INT64_C(-1) << 32) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 70u, (INT64_C(-1) << 32) - 1),
+      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 72u, 1),       // Effectively 1 << 48.
+      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 74u, 0xffff),  // Effectively -1 << 48.
+      DEF_CONST(Instruction::CONST_WIDE, 76u, (INT64_C(1) << 48) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 78u, (INT64_C(1) << 48) - 1),
+      DEF_CONST(Instruction::CONST_WIDE, 80u, (INT64_C(-1) << 48) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 82u, (INT64_C(-1) << 48) - 1),
   };
 
   PrepareMIRs(mirs);
+  for (size_t i = 0; i != arraysize(mirs); ++i) {
+    const int32_t wide_sregs[] = { mirs_[i].ssa_rep->defs[0] };
+    MarkAsWideSRegs(wide_sregs);
+  }
   for (size_t i = arraysize(mirs) / 2u; i != arraysize(mirs); ++i) {
     cu_.mir_graph->reg_location_[mirs_[i].ssa_rep->defs[0]].fp = true;
   }
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 0db54bf..7bfbb34 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -1101,6 +1101,7 @@
   bool EliminateDeadCodeGate();
   bool EliminateDeadCode(BasicBlock* bb);
   void EliminateDeadCodeEnd();
+  void GlobalValueNumberingCleanup();
   bool EliminateSuspendChecksGate();
   bool EliminateSuspendChecks(BasicBlock* bb);
 
@@ -1450,6 +1451,7 @@
   friend class TopologicalSortOrderTest;
   friend class TypeInferenceTest;
   friend class QuickCFITest;
+  friend class QuickAssembleX86TestBase;
 };
 
 }  // namespace art
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 467c14e..3482602 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -1355,8 +1355,13 @@
   temp_scoped_alloc_.reset();
 }
 
+static void DisableGVNDependentOptimizations(CompilationUnit* cu) {
+  cu->disable_opt |= (1u << kGvnDeadCodeElimination);
+}
+
 bool MIRGraph::ApplyGlobalValueNumberingGate() {
   if (GlobalValueNumbering::Skip(cu_)) {
+    DisableGVNDependentOptimizations(cu_);
     return false;
   }
 
@@ -1407,16 +1412,12 @@
     cu_->disable_opt |= (1u << kLocalValueNumbering);
   } else {
     LOG(WARNING) << "GVN failed for " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-    cu_->disable_opt |= (1u << kGvnDeadCodeElimination);
+    DisableGVNDependentOptimizations(cu_);
   }
-
-  if ((cu_->disable_opt & (1 << kGvnDeadCodeElimination)) != 0) {
-    EliminateDeadCodeEnd();
-  }  // else preserve GVN data for CSE.
 }
 
 bool MIRGraph::EliminateDeadCodeGate() {
-  if ((cu_->disable_opt & (1 << kGvnDeadCodeElimination)) != 0) {
+  if ((cu_->disable_opt & (1 << kGvnDeadCodeElimination)) != 0 || temp_.gvn.gvn == nullptr) {
     return false;
   }
   DCHECK(temp_scoped_alloc_ != nullptr);
@@ -1437,11 +1438,21 @@
 }
 
 void MIRGraph::EliminateDeadCodeEnd() {
-  DCHECK_EQ(temp_.gvn.dce != nullptr, (cu_->disable_opt & (1 << kGvnDeadCodeElimination)) == 0);
-  if (temp_.gvn.dce != nullptr) {
-    delete temp_.gvn.dce;
-    temp_.gvn.dce = nullptr;
+  if (kIsDebugBuild) {
+    // DCE can make some previously dead vregs alive again. Make sure the obsolete
+    // live-in information is not used anymore.
+    AllNodesIterator iter(this);
+    for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
+      if (bb->data_flow_info != nullptr) {
+        bb->data_flow_info->live_in_v = nullptr;
+      }
+    }
   }
+}
+
+void MIRGraph::GlobalValueNumberingCleanup() {
+  delete temp_.gvn.dce;
+  temp_.gvn.dce = nullptr;
   delete temp_.gvn.gvn;
   temp_.gvn.gvn = nullptr;
   temp_.gvn.ifield_ids = nullptr;
diff --git a/compiler/dex/pass_driver_me_opts.cc b/compiler/dex/pass_driver_me_opts.cc
index 2e871da..3e193b4 100644
--- a/compiler/dex/pass_driver_me_opts.cc
+++ b/compiler/dex/pass_driver_me_opts.cc
@@ -46,6 +46,7 @@
   pass_manager->AddPass(new CodeLayout);
   pass_manager->AddPass(new GlobalValueNumberingPass);
   pass_manager->AddPass(new DeadCodeEliminationPass);
+  pass_manager->AddPass(new GlobalValueNumberingCleanupPass);
   pass_manager->AddPass(new ConstantPropagation);
   pass_manager->AddPass(new MethodUseCount);
   pass_manager->AddPass(new BBOptimizations);
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index de5e041..0592c74 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -58,24 +58,19 @@
   return (cu->enable_debug & (1 << kDebugSlowTypePath)) != 0;
 }
 
-void Mir2Lir::GenIfNullUseHelperImmMethod(
-    RegStorage r_result, QuickEntrypointEnum trampoline, int imm, RegStorage r_method) {
+void Mir2Lir::GenIfNullUseHelperImm(RegStorage r_result, QuickEntrypointEnum trampoline, int imm) {
   class CallHelperImmMethodSlowPath : public LIRSlowPath {
    public:
     CallHelperImmMethodSlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont,
                                 QuickEntrypointEnum trampoline_in, int imm_in,
-                                RegStorage r_method_in, RegStorage r_result_in)
+                                RegStorage r_result_in)
         : LIRSlowPath(m2l, fromfast, cont), trampoline_(trampoline_in),
-          imm_(imm_in), r_method_(r_method_in), r_result_(r_result_in) {
+          imm_(imm_in), r_result_(r_result_in) {
     }
 
     void Compile() {
       GenerateTargetLabel();
-      if (r_method_.Valid()) {
-        m2l_->CallRuntimeHelperImmReg(trampoline_, imm_, r_method_, true);
-      } else {
-        m2l_->CallRuntimeHelperImmMethod(trampoline_, imm_, true);
-      }
+      m2l_->CallRuntimeHelperImm(trampoline_, imm_, true);
       m2l_->OpRegCopy(r_result_,  m2l_->TargetReg(kRet0, kRef));
       m2l_->OpUnconditionalBranch(cont_);
     }
@@ -83,7 +78,6 @@
    private:
     QuickEntrypointEnum trampoline_;
     const int imm_;
-    const RegStorage r_method_;
     const RegStorage r_result_;
   };
 
@@ -91,7 +85,7 @@
   LIR* cont = NewLIR0(kPseudoTargetLabel);
 
   AddSlowPath(new (arena_) CallHelperImmMethodSlowPath(this, branch, cont, trampoline, imm,
-                                                       r_method, r_result));
+                                                       r_result));
 }
 
 RegStorage Mir2Lir::GenGetOtherTypeForSgetSput(const MirSFieldLoweringInfo& field_info,
@@ -101,13 +95,12 @@
   FlushAllRegs();
   RegStorage r_base = TargetReg(kArg0, kRef);
   LockTemp(r_base);
-  RegStorage r_method = RegStorage::InvalidReg();  // Loaded lazily, maybe in the slow-path.
   if (CanUseOpPcRelDexCacheArrayLoad()) {
     uint32_t offset = dex_cache_arrays_layout_.TypeOffset(field_info.StorageIndex());
     OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, r_base);
   } else {
     // Using fixed register to sync with possible call to runtime support.
-    r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
+    RegStorage r_method = LoadCurrMethodWithHint(r_base);
     LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base,
                 kNotVolatile);
     int32_t offset_of_field = ObjArray::OffsetOfElement(field_info.StorageIndex()).Int32Value();
@@ -139,10 +132,10 @@
       // entry in the dex cache is null, and the "uninit" when the class is not yet initialized.
       // At least one will be non-null here, otherwise we wouldn't generate the slow path.
       StaticFieldSlowPath(Mir2Lir* m2l, LIR* unresolved, LIR* uninit, LIR* cont, int storage_index,
-                          RegStorage r_base_in, RegStorage r_method_in)
+                          RegStorage r_base_in)
           : LIRSlowPath(m2l, unresolved != nullptr ? unresolved : uninit, cont),
             second_branch_(unresolved != nullptr ? uninit : nullptr),
-            storage_index_(storage_index), r_base_(r_base_in), r_method_(r_method_in) {
+            storage_index_(storage_index), r_base_(r_base_in) {
       }
 
       void Compile() {
@@ -150,14 +143,7 @@
         if (second_branch_ != nullptr) {
           second_branch_->target = target;
         }
-        if (r_method_.Valid()) {
-          // ArtMethod* was loaded in normal path - use it.
-          m2l_->CallRuntimeHelperImmReg(kQuickInitializeStaticStorage, storage_index_, r_method_,
-                                        true);
-        } else {
-          // ArtMethod* wasn't loaded in normal path - use a helper that loads it.
-          m2l_->CallRuntimeHelperImmMethod(kQuickInitializeStaticStorage, storage_index_, true);
-        }
+        m2l_->CallRuntimeHelperImm(kQuickInitializeStaticStorage, storage_index_, true);
         // Copy helper's result into r_base, a no-op on all but MIPS.
         m2l_->OpRegCopy(r_base_,  m2l_->TargetReg(kRet0, kRef));
 
@@ -170,17 +156,13 @@
 
       const int storage_index_;
       const RegStorage r_base_;
-      RegStorage r_method_;
     };
 
     // The slow path is invoked if the r_base is null or the class pointed
     // to by it is not initialized.
     LIR* cont = NewLIR0(kPseudoTargetLabel);
     AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont,
-                                                 field_info.StorageIndex(), r_base, r_method));
-  }
-  if (IsTemp(r_method)) {
-    FreeTemp(r_method);
+                                                 field_info.StorageIndex(), r_base));
   }
   return r_base;
 }
@@ -1042,22 +1024,19 @@
                                                         type_idx)) {
     // Call out to helper which resolves type and verifies access.
     // Resolved type returned in kRet0.
-    CallRuntimeHelperImmMethod(kQuickInitializeTypeAndVerifyAccess, type_idx, true);
+    CallRuntimeHelperImm(kQuickInitializeTypeAndVerifyAccess, type_idx, true);
     rl_result = GetReturn(kRefReg);
   } else {
     rl_result = EvalLoc(rl_dest, kRefReg, true);
     // We don't need access checks, load type from dex cache
-    RegStorage r_method = RegStorage::InvalidReg();
     if (CanUseOpPcRelDexCacheArrayLoad()) {
       size_t offset = dex_cache_arrays_layout_.TypeOffset(type_idx);
       OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, rl_result.reg);
     } else {
-      RegLocation rl_method = LoadCurrMethod();
-      CheckRegLocation(rl_method);
-      r_method = rl_method.reg;
       int32_t dex_cache_offset =
           mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value();
       RegStorage res_reg = AllocTempRef();
+      RegStorage r_method = LoadCurrMethodWithHint(res_reg);
       LoadRefDisp(r_method, dex_cache_offset, res_reg, kNotVolatile);
       int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
       LoadRefDisp(res_reg, offset_of_type, rl_result.reg, kNotVolatile);
@@ -1067,7 +1046,7 @@
         type_idx) || ForceSlowTypePath(cu_)) {
       // Slow path, at runtime test if type is null and if so initialize
       FlushAllRegs();
-      GenIfNullUseHelperImmMethod(rl_result.reg, kQuickInitializeType, type_idx, r_method);
+      GenIfNullUseHelperImm(rl_result.reg, kQuickInitializeType, type_idx);
     }
   }
   StoreValue(rl_dest, rl_result);
@@ -1085,14 +1064,13 @@
 
     // Might call out to helper, which will return resolved string in kRet0
     RegStorage ret0 = TargetReg(kRet0, kRef);
-    RegStorage r_method = RegStorage::InvalidReg();
     if (CanUseOpPcRelDexCacheArrayLoad()) {
       size_t offset = dex_cache_arrays_layout_.StringOffset(string_idx);
       OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, ret0);
     } else {
-      r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
       // Method to declaring class.
       RegStorage arg0 = TargetReg(kArg0, kRef);
+      RegStorage r_method = LoadCurrMethodWithHint(arg0);
       LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
                   arg0, kNotVolatile);
       // Declaring class to dex cache strings.
@@ -1100,7 +1078,7 @@
 
       LoadRefDisp(arg0, offset_of_string, ret0, kNotVolatile);
     }
-    GenIfNullUseHelperImmMethod(ret0, kQuickResolveString, string_idx, r_method);
+    GenIfNullUseHelperImm(ret0, kQuickResolveString, string_idx);
 
     GenBarrier();
     StoreValue(rl_dest, GetReturn(kRefReg));
@@ -1262,12 +1240,11 @@
       LoadValueDirectFixed(rl_src, ref_reg);  // kArg0 <= ref
     }
 
-    RegStorage r_method = RegStorage::InvalidReg();
     if (CanUseOpPcRelDexCacheArrayLoad()) {
       size_t offset = dex_cache_arrays_layout_.TypeOffset(type_idx);
       OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, class_reg);
     } else {
-      r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
+      RegStorage r_method = LoadCurrMethodWithHint(class_reg);
       // Load dex cache entry into class_reg (kArg2)
       LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
                   class_reg, kNotVolatile);
@@ -1275,7 +1252,7 @@
       LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile);
     }
     if (!can_assume_type_is_in_dex_cache) {
-      GenIfNullUseHelperImmMethod(class_reg, kQuickInitializeType, type_idx, r_method);
+      GenIfNullUseHelperImm(class_reg, kQuickInitializeType, type_idx);
 
       // Should load value here.
       LoadValueDirectFixed(rl_src, ref_reg);  // kArg0 <= ref
@@ -1394,12 +1371,11 @@
                 class_reg, kNotVolatile);
   } else {
     // Load dex cache entry into class_reg (kArg2)
-    RegStorage r_method = RegStorage::InvalidReg();
     if (CanUseOpPcRelDexCacheArrayLoad()) {
       size_t offset = dex_cache_arrays_layout_.TypeOffset(type_idx);
       OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, class_reg);
     } else {
-      r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
+      RegStorage r_method = LoadCurrMethodWithHint(class_reg);
 
       LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
                   class_reg, kNotVolatile);
@@ -1408,7 +1384,7 @@
     }
     if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file, type_idx)) {
       // Need to test presence of type in dex cache at runtime
-      GenIfNullUseHelperImmMethod(class_reg, kQuickInitializeType, type_idx, r_method);
+      GenIfNullUseHelperImm(class_reg, kQuickInitializeType, type_idx);
     }
   }
   // At this point, class_reg (kArg2) has class
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 8f08a51..6f227fc 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -1692,10 +1692,8 @@
      * @param r_result the result register.
      * @param trampoline the helper to call in slow path.
      * @param imm the immediate passed to the helper.
-     * @param r_method the register with ArtMethod* if available, otherwise RegStorage::Invalid().
      */
-    void GenIfNullUseHelperImmMethod(
-        RegStorage r_result, QuickEntrypointEnum trampoline, int imm, RegStorage r_method);
+    void GenIfNullUseHelperImm(RegStorage r_result, QuickEntrypointEnum trampoline, int imm);
 
     /**
      * @brief Generate code to retrieve Class* for another type to be used by SGET/SPUT.
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 39eb117..73cfe92 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -575,7 +575,7 @@
   // (1 << kNullCheckElimination) |
   // (1 << kClassInitCheckElimination) |
   // (1 << kGlobalValueNumbering) |
-  (1 << kGvnDeadCodeElimination) |
+  // (1 << kGvnDeadCodeElimination) |
   // (1 << kLocalValueNumbering) |
   // (1 << kPromoteRegs) |
   // (1 << kTrackLiveTemps) |
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index eb33357..934fa35 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -409,7 +409,7 @@
   EXT_0F_ENCODING_MAP(Paddq,     0x66, 0xD4, REG_DEF0_USE0),
   EXT_0F_ENCODING_MAP(Psadbw,    0x66, 0xF6, REG_DEF0_USE0),
   EXT_0F_ENCODING_MAP(Addps,     0x00, 0x58, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Addpd,     0xF2, 0x58, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Addpd,     0x66, 0x58, REG_DEF0_USE0),
   EXT_0F_ENCODING_MAP(Psubb,     0x66, 0xF8, REG_DEF0_USE0),
   EXT_0F_ENCODING_MAP(Psubw,     0x66, 0xF9, REG_DEF0_USE0),
   EXT_0F_ENCODING_MAP(Psubd,     0x66, 0xFA, REG_DEF0_USE0),
@@ -1627,13 +1627,13 @@
  * instruction.  In those cases we will try to substitute a new code
  * sequence or request that the trace be shortened and retried.
  */
-AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) {
+AssemblerStatus X86Mir2Lir::AssembleInstructions(LIR* first_lir_insn, CodeOffset start_addr) {
   UNUSED(start_addr);
   LIR *lir;
   AssemblerStatus res = kSuccess;  // Assume success
 
   const bool kVerbosePcFixup = false;
-  for (lir = first_lir_insn_; lir != nullptr; lir = NEXT_LIR(lir)) {
+  for (lir = first_lir_insn; lir != nullptr; lir = NEXT_LIR(lir)) {
     if (IsPseudoLirOp(lir->opcode)) {
       continue;
     }
@@ -2034,7 +2034,7 @@
    */
 
   while (true) {
-    AssemblerStatus res = AssembleInstructions(0);
+    AssemblerStatus res = AssembleInstructions(first_lir_insn_, 0);
     if (res == kSuccess) {
       break;
     } else {
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 72580a3..5a46520 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -432,7 +432,7 @@
 
   int AssignInsnOffsets();
   void AssignOffsets();
-  AssemblerStatus AssembleInstructions(CodeOffset start_addr);
+  AssemblerStatus AssembleInstructions(LIR* first_lir_insn, CodeOffset start_addr);
 
   size_t ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_index,
                      int32_t raw_base, int32_t displacement);
@@ -972,6 +972,9 @@
   static const X86EncodingMap EncodingMap[kX86Last];
 
   friend std::ostream& operator<<(std::ostream& os, const X86OpCode& rhs);
+  friend class QuickAssembleX86Test;
+  friend class QuickAssembleX86MacroTest;
+  friend class QuickAssembleX86LowLevelTest;
 
   DISALLOW_COPY_AND_ASSIGN(X86Mir2Lir);
 };
diff --git a/compiler/dex/quick/x86/quick_assemble_x86_test.cc b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
new file mode 100644
index 0000000..36339f7
--- /dev/null
+++ b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
@@ -0,0 +1,263 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dex/quick/quick_compiler.h"
+#include "dex/pass_manager.h"
+#include "dex/verification_results.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
+#include "runtime/dex_file.h"
+#include "driver/compiler_options.h"
+#include "driver/compiler_driver.h"
+#include "codegen_x86.h"
+#include "gtest/gtest.h"
+#include "utils/assembler_test_base.h"
+
+namespace art {
+
+class QuickAssembleX86TestBase : public testing::Test {
+ protected:
+  X86Mir2Lir* Prepare(InstructionSet target) {
+    isa_ = target;
+    pool_.reset(new ArenaPool());
+    compiler_options_.reset(new CompilerOptions(
+        CompilerOptions::kDefaultCompilerFilter,
+        CompilerOptions::kDefaultHugeMethodThreshold,
+        CompilerOptions::kDefaultLargeMethodThreshold,
+        CompilerOptions::kDefaultSmallMethodThreshold,
+        CompilerOptions::kDefaultTinyMethodThreshold,
+        CompilerOptions::kDefaultNumDexMethodsThreshold,
+        false,
+        CompilerOptions::kDefaultTopKProfileThreshold,
+        false,
+        false,
+        false,
+        false,
+        false,
+        false,
+        false,
+        nullptr,
+        new PassManagerOptions(),
+        nullptr,
+        false));
+    verification_results_.reset(new VerificationResults(compiler_options_.get()));
+    method_inliner_map_.reset(new DexFileToMethodInlinerMap());
+    compiler_driver_.reset(new CompilerDriver(
+        compiler_options_.get(),
+        verification_results_.get(),
+        method_inliner_map_.get(),
+        Compiler::kQuick,
+        isa_,
+        nullptr,
+        false,
+        nullptr,
+        nullptr,
+        nullptr,
+        0,
+        false,
+        false,
+        "",
+        0,
+        -1,
+        ""));
+    cu_.reset(new CompilationUnit(pool_.get(), isa_, compiler_driver_.get(), nullptr));
+    DexFile::CodeItem* code_item = static_cast<DexFile::CodeItem*>(
+        cu_->arena.Alloc(sizeof(DexFile::CodeItem), kArenaAllocMisc));
+    memset(code_item, 0, sizeof(DexFile::CodeItem));
+    cu_->mir_graph.reset(new MIRGraph(cu_.get(), &cu_->arena));
+    cu_->mir_graph->current_code_item_ = code_item;
+    cu_->cg.reset(QuickCompiler::GetCodeGenerator(cu_.get(), nullptr));
+
+    test_helper_.reset(new AssemblerTestInfrastructure(
+        isa_ == kX86 ? "x86" : "x86_64",
+        "as",
+        isa_ == kX86 ? " --32" : "",
+        "objdump",
+        " -h",
+        "objdump",
+        isa_ == kX86 ?
+            " -D -bbinary -mi386 --no-show-raw-insn" :
+            " -D -bbinary -mi386:x86-64 -Mx86-64,addr64,data32 --no-show-raw-insn",
+        nullptr));
+
+    X86Mir2Lir* m2l = static_cast<X86Mir2Lir*>(cu_->cg.get());
+    m2l->CompilerInitializeRegAlloc();
+    return m2l;
+  }
+
+  void Release() {
+    cu_.reset();
+    compiler_driver_.reset();
+    method_inliner_map_.reset();
+    verification_results_.reset();
+    compiler_options_.reset();
+    pool_.reset();
+
+    test_helper_.reset();
+  }
+
+  void TearDown() OVERRIDE {
+    Release();
+  }
+
+  bool CheckTools(InstructionSet target) {
+    Prepare(target);
+    bool result = test_helper_->CheckTools();
+    Release();
+    return result;
+  }
+
+  std::unique_ptr<CompilationUnit> cu_;
+  std::unique_ptr<AssemblerTestInfrastructure> test_helper_;
+
+ private:
+  InstructionSet isa_;
+  std::unique_ptr<ArenaPool> pool_;
+  std::unique_ptr<CompilerOptions> compiler_options_;
+  std::unique_ptr<VerificationResults> verification_results_;
+  std::unique_ptr<DexFileToMethodInlinerMap> method_inliner_map_;
+  std::unique_ptr<CompilerDriver> compiler_driver_;
+};
+
+class QuickAssembleX86LowLevelTest : public QuickAssembleX86TestBase {
+ protected:
+  void Test(InstructionSet target, std::string test_name, std::string gcc_asm,
+            int opcode, int op0 = 0, int op1 = 0, int op2 = 0, int op3 = 0, int op4 = 0) {
+    X86Mir2Lir* m2l = Prepare(target);
+
+    LIR lir;
+    memset(&lir, 0, sizeof(LIR));
+    lir.opcode = opcode;
+    lir.operands[0] = op0;
+    lir.operands[1] = op1;
+    lir.operands[2] = op2;
+    lir.operands[3] = op3;
+    lir.operands[4] = op4;
+    lir.flags.size = m2l->GetInsnSize(&lir);
+
+    AssemblerStatus status = m2l->AssembleInstructions(&lir, 0);
+    // We don't expect a retry.
+    ASSERT_EQ(status, AssemblerStatus::kSuccess);
+
+    // Need a "base" std::vector.
+    std::vector<uint8_t> buffer(m2l->code_buffer_.begin(), m2l->code_buffer_.end());
+    test_helper_->Driver(buffer, gcc_asm, test_name);
+
+    Release();
+  }
+};
+
+TEST_F(QuickAssembleX86LowLevelTest, Addpd) {
+  Test(kX86, "Addpd", "addpd %xmm1, %xmm0\n", kX86AddpdRR,
+       RegStorage::Solo128(0).GetReg(), RegStorage::Solo128(1).GetReg());
+  Test(kX86_64, "Addpd", "addpd %xmm1, %xmm0\n", kX86AddpdRR,
+       RegStorage::Solo128(0).GetReg(), RegStorage::Solo128(1).GetReg());
+}
+
+TEST_F(QuickAssembleX86LowLevelTest, Subpd) {
+  Test(kX86, "Subpd", "subpd %xmm1, %xmm0\n", kX86SubpdRR,
+       RegStorage::Solo128(0).GetReg(), RegStorage::Solo128(1).GetReg());
+  Test(kX86_64, "Subpd", "subpd %xmm1, %xmm0\n", kX86SubpdRR,
+       RegStorage::Solo128(0).GetReg(), RegStorage::Solo128(1).GetReg());
+}
+
+TEST_F(QuickAssembleX86LowLevelTest, Mulpd) {
+  Test(kX86, "Mulpd", "mulpd %xmm1, %xmm0\n", kX86MulpdRR,
+       RegStorage::Solo128(0).GetReg(), RegStorage::Solo128(1).GetReg());
+  Test(kX86_64, "Mulpd", "mulpd %xmm1, %xmm0\n", kX86MulpdRR,
+       RegStorage::Solo128(0).GetReg(), RegStorage::Solo128(1).GetReg());
+}
+
+class QuickAssembleX86MacroTest : public QuickAssembleX86TestBase {
+ protected:
+  typedef void (X86Mir2Lir::*AsmFn)(MIR*);
+
+  void TestVectorFn(InstructionSet target,
+                    Instruction::Code opcode,
+                    AsmFn f,
+                    std::string inst_string) {
+    X86Mir2Lir *m2l = Prepare(target);
+
+    // Create a vector MIR.
+    MIR* mir = cu_->mir_graph->NewMIR();
+    mir->dalvikInsn.opcode = opcode;
+    mir->dalvikInsn.vA = 0;  // Destination and source.
+    mir->dalvikInsn.vB = 1;  // Source.
+    int vector_size = 128;
+    int vector_type = kDouble;
+    mir->dalvikInsn.vC = (vector_type << 16) | vector_size;  // Type size.
+    (m2l->*f)(mir);
+    m2l->AssembleLIR();
+
+    std::string gcc_asm = inst_string + " %xmm1, %xmm0\n";
+    // Need a "base" std::vector.
+    std::vector<uint8_t> buffer(m2l->code_buffer_.begin(), m2l->code_buffer_.end());
+    test_helper_->Driver(buffer, gcc_asm, inst_string);
+
+    Release();
+  }
+
+  // Tests are member functions as many of the assembler functions are protected or private,
+  // and it would be inelegant to define ART_FRIEND_TEST for all the tests.
+
+  void TestAddpd() {
+    TestVectorFn(kX86,
+                 static_cast<Instruction::Code>(kMirOpPackedAddition),
+                 &X86Mir2Lir::GenAddVector,
+                 "addpd");
+    TestVectorFn(kX86_64,
+                 static_cast<Instruction::Code>(kMirOpPackedAddition),
+                 &X86Mir2Lir::GenAddVector,
+                 "addpd");
+  }
+
+  void TestSubpd() {
+    TestVectorFn(kX86,
+                 static_cast<Instruction::Code>(kMirOpPackedSubtract),
+                 &X86Mir2Lir::GenSubtractVector,
+                 "subpd");
+    TestVectorFn(kX86_64,
+                 static_cast<Instruction::Code>(kMirOpPackedSubtract),
+                 &X86Mir2Lir::GenSubtractVector,
+                 "subpd");
+  }
+
+  void TestMulpd() {
+    TestVectorFn(kX86,
+                 static_cast<Instruction::Code>(kMirOpPackedMultiply),
+                 &X86Mir2Lir::GenMultiplyVector,
+                 "mulpd");
+    TestVectorFn(kX86_64,
+                 static_cast<Instruction::Code>(kMirOpPackedMultiply),
+                 &X86Mir2Lir::GenMultiplyVector,
+                 "mulpd");
+  }
+};
+
+TEST_F(QuickAssembleX86MacroTest, CheckTools) {
+  ASSERT_TRUE(CheckTools(kX86)) << "x86 tools not found.";
+  ASSERT_TRUE(CheckTools(kX86_64)) << "x86_64 tools not found.";
+}
+
+#define DECLARE_TEST(name)             \
+  TEST_F(QuickAssembleX86MacroTest, name) { \
+    Test ## name();                    \
+  }
+
+DECLARE_TEST(Addpd)
+DECLARE_TEST(Subpd)
+DECLARE_TEST(Mulpd)
+
+}  // namespace art
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index bad8335..e54cbf6 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -127,34 +127,67 @@
   return std::make_pair(fast_get, fast_put);
 }
 
-inline std::pair<bool, bool> CompilerDriver::IsFastStaticField(
-    mirror::DexCache* dex_cache, mirror::Class* referrer_class,
-    ArtField* resolved_field, uint16_t field_idx, uint32_t* storage_index) {
-  DCHECK(resolved_field->IsStatic());
+template <typename ArtMember>
+inline bool CompilerDriver::CanAccessResolvedMember(mirror::Class* referrer_class ATTRIBUTE_UNUSED,
+                                                    mirror::Class* access_to ATTRIBUTE_UNUSED,
+                                                    ArtMember* member ATTRIBUTE_UNUSED,
+                                                    mirror::DexCache* dex_cache ATTRIBUTE_UNUSED,
+                                                    uint32_t field_idx ATTRIBUTE_UNUSED) {
+  // Not defined for ArtMember values other than ArtField or mirror::ArtMethod.
+  UNREACHABLE();
+}
+
+template <>
+inline bool CompilerDriver::CanAccessResolvedMember<ArtField>(mirror::Class* referrer_class,
+                                                              mirror::Class* access_to,
+                                                              ArtField* field,
+                                                              mirror::DexCache* dex_cache,
+                                                              uint32_t field_idx) {
+  return referrer_class->CanAccessResolvedField(access_to, field, dex_cache, field_idx);
+}
+
+template <>
+inline bool CompilerDriver::CanAccessResolvedMember<mirror::ArtMethod>(
+    mirror::Class* referrer_class,
+    mirror::Class* access_to,
+    mirror::ArtMethod* method,
+    mirror::DexCache* dex_cache,
+    uint32_t field_idx) {
+  return referrer_class->CanAccessResolvedMethod(access_to, method, dex_cache, field_idx);
+}
+
+template <typename ArtMember>
+inline std::pair<bool, bool> CompilerDriver::IsClassOfStaticMemberAvailableToReferrer(
+    mirror::DexCache* dex_cache,
+    mirror::Class* referrer_class,
+    ArtMember* resolved_member,
+    uint16_t member_idx,
+    uint32_t* storage_index) {
+  DCHECK(resolved_member->IsStatic());
   if (LIKELY(referrer_class != nullptr)) {
-    mirror::Class* fields_class = resolved_field->GetDeclaringClass();
-    if (fields_class == referrer_class) {
-      *storage_index = fields_class->GetDexTypeIndex();
+    mirror::Class* members_class = resolved_member->GetDeclaringClass();
+    if (members_class == referrer_class) {
+      *storage_index = members_class->GetDexTypeIndex();
       return std::make_pair(true, true);
     }
-    if (referrer_class->CanAccessResolvedField(fields_class, resolved_field,
-                                               dex_cache, field_idx)) {
-      // We have the resolved field, we must make it into a index for the referrer
+    if (CanAccessResolvedMember<ArtMember>(
+            referrer_class, members_class, resolved_member, dex_cache, member_idx)) {
+      // We have the resolved member, we must make it into a index for the referrer
       // in its static storage (which may fail if it doesn't have a slot for it)
       // TODO: for images we can elide the static storage base null check
       // if we know there's a non-null entry in the image
       const DexFile* dex_file = dex_cache->GetDexFile();
       uint32_t storage_idx = DexFile::kDexNoIndex;
-      if (LIKELY(fields_class->GetDexCache() == dex_cache)) {
-        // common case where the dex cache of both the referrer and the field are the same,
+      if (LIKELY(members_class->GetDexCache() == dex_cache)) {
+        // common case where the dex cache of both the referrer and the member are the same,
         // no need to search the dex file
-        storage_idx = fields_class->GetDexTypeIndex();
+        storage_idx = members_class->GetDexTypeIndex();
       } else {
-        // Search dex file for localized ssb index, may fail if field's class is a parent
+        // Search dex file for localized ssb index, may fail if member's class is a parent
         // of the class mentioned in the dex file and there is no dex cache entry.
         std::string temp;
         const DexFile::StringId* string_id =
-            dex_file->FindStringId(resolved_field->GetDeclaringClass()->GetDescriptor(&temp));
+            dex_file->FindStringId(resolved_member->GetDeclaringClass()->GetDescriptor(&temp));
         if (string_id != nullptr) {
           const DexFile::TypeId* type_id =
              dex_file->FindTypeId(dex_file->GetIndexForStringId(*string_id));
@@ -166,7 +199,7 @@
       }
       if (storage_idx != DexFile::kDexNoIndex) {
         *storage_index = storage_idx;
-        return std::make_pair(true, !resolved_field->IsFinal());
+        return std::make_pair(true, !resolved_member->IsFinal());
       }
     }
   }
@@ -175,6 +208,23 @@
   return std::make_pair(false, false);
 }
 
+inline std::pair<bool, bool> CompilerDriver::IsFastStaticField(
+    mirror::DexCache* dex_cache, mirror::Class* referrer_class,
+    ArtField* resolved_field, uint16_t field_idx, uint32_t* storage_index) {
+  return IsClassOfStaticMemberAvailableToReferrer(
+      dex_cache, referrer_class, resolved_field, field_idx, storage_index);
+}
+
+inline bool CompilerDriver::IsClassOfStaticMethodAvailableToReferrer(
+    mirror::DexCache* dex_cache, mirror::Class* referrer_class,
+    mirror::ArtMethod* resolved_method, uint16_t method_idx, uint32_t* storage_index) {
+  std::pair<bool, bool> result = IsClassOfStaticMemberAvailableToReferrer(
+      dex_cache, referrer_class, resolved_method, method_idx, storage_index);
+  // Only the first member of `result` is meaningful, as there is no
+  // "write access" to a method.
+  return result.first;
+}
+
 inline bool CompilerDriver::IsStaticFieldInReferrerClass(mirror::Class* referrer_class,
                                                          ArtField* resolved_field) {
   DCHECK(resolved_field->IsStatic());
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 03c5c5c..02de11e 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -281,6 +281,18 @@
       ArtField* resolved_field, uint16_t field_idx, uint32_t* storage_index)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Return whether the declaring class of `resolved_method` is
+  // available to `referrer_class`. If this is true, compute the type
+  // index of the declaring class in the referrer's dex file and
+  // return it through the out argument `storage_index`; otherwise
+  // return DexFile::kDexNoIndex through `storage_index`.
+  bool IsClassOfStaticMethodAvailableToReferrer(mirror::DexCache* dex_cache,
+                                                mirror::Class* referrer_class,
+                                                mirror::ArtMethod* resolved_method,
+                                                uint16_t method_idx,
+                                                uint32_t* storage_index)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Is static field's in referrer's class?
   bool IsStaticFieldInReferrerClass(mirror::Class* referrer_class, ArtField* resolved_field)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -459,6 +471,33 @@
   }
 
  private:
+  // Return whether the declaring class of `resolved_member` is
+  // available to `referrer_class` for read or write access using two
+  // Boolean values returned as a pair. If is true at least for read
+  // access, compute the type index of the declaring class in the
+  // referrer's dex file and return it through the out argument
+  // `storage_index`; otherwise return DexFile::kDexNoIndex through
+  // `storage_index`.
+  template <typename ArtMember>
+  std::pair<bool, bool> IsClassOfStaticMemberAvailableToReferrer(mirror::DexCache* dex_cache,
+                                                                 mirror::Class* referrer_class,
+                                                                 ArtMember* resolved_member,
+                                                                 uint16_t member_idx,
+                                                                 uint32_t* storage_index)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Can `referrer_class` access the resolved `member`?
+  // Dispatch call to mirror::Class::CanAccessResolvedField or
+  // mirror::Class::CanAccessResolvedMember depending on the value of
+  // ArtMember.
+  template <typename ArtMember>
+  static bool CanAccessResolvedMember(mirror::Class* referrer_class,
+                                      mirror::Class* access_to,
+                                      ArtMember* member,
+                                      mirror::DexCache* dex_cache,
+                                      uint32_t field_idx)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // These flags are internal to CompilerDriver for collecting INVOKE resolution statistics.
   // The only external contract is that unresolved method has flags 0 and resolved non-0.
   enum {
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 6a08548..7c400ee 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -62,7 +62,7 @@
 
 JitCompiler::JitCompiler() : total_time_(0) {
   auto* pass_manager_options = new PassManagerOptions;
-  pass_manager_options->SetDisablePassList("GVN,DCE");
+  pass_manager_options->SetDisablePassList("GVN,DCE,GVNCleanup");
   compiler_options_.reset(new CompilerOptions(
       CompilerOptions::kDefaultCompilerFilter,
       CompilerOptions::kDefaultHugeMethodThreshold,
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
index 6ebfb45..8100a29 100644
--- a/compiler/optimizing/boolean_simplifier.cc
+++ b/compiler/optimizing/boolean_simplifier.cc
@@ -18,6 +18,26 @@
 
 namespace art {
 
+void HBooleanSimplifier::TryRemovingNegatedCondition(HBasicBlock* block) {
+  DCHECK(block->EndsWithIf());
+
+  // Check if the condition is a Boolean negation.
+  HIf* if_instruction = block->GetLastInstruction()->AsIf();
+  HInstruction* boolean_not = if_instruction->InputAt(0);
+  if (!boolean_not->IsBooleanNot()) {
+    return;
+  }
+
+  // Make BooleanNot's input the condition of the If and swap branches.
+  if_instruction->ReplaceInput(boolean_not->InputAt(0), 0);
+  block->SwapSuccessors();
+
+  // Remove the BooleanNot if it is now unused.
+  if (!boolean_not->HasUses()) {
+    boolean_not->GetBlock()->RemoveInstruction(boolean_not);
+  }
+}
+
 // Returns true if 'block1' and 'block2' are empty, merge into the same single
 // successor and the successor can only be reached from them.
 static bool BlocksDoMergeTogether(HBasicBlock* block1, HBasicBlock* block2) {
@@ -78,55 +98,69 @@
   }
 }
 
+void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) {
+  DCHECK(block->EndsWithIf());
+
+  // Find elements of the pattern.
+  HIf* if_instruction = block->GetLastInstruction()->AsIf();
+  HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
+  HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
+  if (!BlocksDoMergeTogether(true_block, false_block)) {
+    return;
+  }
+  HBasicBlock* merge_block = true_block->GetSuccessors().Get(0);
+  if (!merge_block->HasSinglePhi()) {
+    return;
+  }
+  HPhi* phi = merge_block->GetFirstPhi()->AsPhi();
+  HInstruction* true_value = phi->InputAt(merge_block->GetPredecessorIndexOf(true_block));
+  HInstruction* false_value = phi->InputAt(merge_block->GetPredecessorIndexOf(false_block));
+
+  // Check if the selection negates/preserves the value of the condition and
+  // if so, generate a suitable replacement instruction.
+  HInstruction* if_condition = if_instruction->InputAt(0);
+  HInstruction* replacement;
+  if (NegatesCondition(true_value, false_value)) {
+    replacement = GetOppositeCondition(if_condition);
+    if (replacement->GetBlock() == nullptr) {
+      block->InsertInstructionBefore(replacement, if_instruction);
+    }
+  } else if (PreservesCondition(true_value, false_value)) {
+    replacement = if_condition;
+  } else {
+    return;
+  }
+
+  // Replace the selection outcome with the new instruction.
+  phi->ReplaceWith(replacement);
+  merge_block->RemovePhi(phi);
+
+  // Delete the true branch and merge the resulting chain of blocks
+  // 'block->false_block->merge_block' into one.
+  true_block->DisconnectAndDelete();
+  block->MergeWith(false_block);
+  block->MergeWith(merge_block);
+
+  // Remove the original condition if it is now unused.
+  if (!if_condition->HasUses()) {
+    if_condition->GetBlock()->RemoveInstructionOrPhi(if_condition);
+  }
+}
+
 void HBooleanSimplifier::Run() {
   // Iterate in post order in the unlikely case that removing one occurrence of
-  // the pattern empties a branch block of another occurrence. Otherwise the
-  // order does not matter.
+  // the selection pattern empties a branch block of another occurrence.
+  // Otherwise the order does not matter.
   for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     if (!block->EndsWithIf()) continue;
 
-    // Find elements of the pattern.
-    HIf* if_instruction = block->GetLastInstruction()->AsIf();
-    HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
-    HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
-    if (!BlocksDoMergeTogether(true_block, false_block)) {
-      continue;
-    }
-    HBasicBlock* merge_block = true_block->GetSuccessors().Get(0);
-    if (!merge_block->HasSinglePhi()) {
-      continue;
-    }
-    HPhi* phi = merge_block->GetFirstPhi()->AsPhi();
-    HInstruction* true_value = phi->InputAt(merge_block->GetPredecessorIndexOf(true_block));
-    HInstruction* false_value = phi->InputAt(merge_block->GetPredecessorIndexOf(false_block));
+    // If condition is negated, remove the negation and swap the branches.
+    TryRemovingNegatedCondition(block);
 
-    // Check if the selection negates/preserves the value of the condition and
-    // if so, generate a suitable replacement instruction.
-    HInstruction* if_condition = if_instruction->InputAt(0);
-    HInstruction* replacement;
-    if (NegatesCondition(true_value, false_value)) {
-      replacement = GetOppositeCondition(if_condition);
-      if (replacement->GetBlock() == nullptr) {
-        block->InsertInstructionBefore(replacement, if_instruction);
-      }
-    } else if (PreservesCondition(true_value, false_value)) {
-      replacement = if_condition;
-    } else {
-      continue;
-    }
-
-    // Replace the selection outcome with the new instruction.
-    phi->ReplaceWith(replacement);
-    merge_block->RemovePhi(phi);
-
-    // Link the start/end blocks and remove empty branches.
-    graph_->MergeEmptyBranches(block, merge_block);
-
-    // Remove the original condition if it is now unused.
-    if (!if_condition->HasUses()) {
-      if_condition->GetBlock()->RemoveInstruction(if_condition);
-    }
+    // If this is a boolean-selection diamond pattern, replace its result with
+    // the condition value (or its negation) and simplify the graph.
+    TryRemovingBooleanSelection(block);
   }
 }
 
diff --git a/compiler/optimizing/boolean_simplifier.h b/compiler/optimizing/boolean_simplifier.h
index a88733e..733ebaa 100644
--- a/compiler/optimizing/boolean_simplifier.h
+++ b/compiler/optimizing/boolean_simplifier.h
@@ -14,11 +14,15 @@
  * limitations under the License.
  */
 
-// This optimization recognizes a common pattern where a boolean value is
-// either cast to an integer or negated by selecting from zero/one integer
-// constants with an If statement. Because boolean values are internally
-// represented as zero/one, we can safely replace the pattern with a suitable
-// condition instruction.
+// This optimization recognizes two common patterns:
+//  (a) Boolean selection: Casting a boolean to an integer or negating it is
+//      carried out with an If statement selecting from zero/one integer
+//      constants. Because Boolean values are represented as zero/one, the
+//      pattern can be replaced with the condition instruction itself or its
+//      negation, depending on the layout.
+//  (b) Negated condition: Instruction simplifier may replace an If's condition
+//      with a boolean value. If this value is the result of a Boolean negation,
+//      the true/false branches can be swapped and negation removed.
 
 // Example: Negating a boolean value
 //     B1:
@@ -66,6 +70,9 @@
   static constexpr const char* kBooleanSimplifierPassName = "boolean_simplifier";
 
  private:
+  void TryRemovingNegatedCondition(HBasicBlock* block);
+  void TryRemovingBooleanSelection(HBasicBlock* block);
+
   DISALLOW_COPY_AND_ASSIGN(HBooleanSimplifier);
 };
 
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 6511120..92fa6db 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -246,6 +246,141 @@
   int32_t constant_;
 };
 
+// Collect array access data for a loop.
+// TODO: make it work for multiple arrays inside the loop.
+class ArrayAccessInsideLoopFinder : public ValueObject {
+ public:
+  explicit ArrayAccessInsideLoopFinder(HInstruction* induction_variable)
+      : induction_variable_(induction_variable),
+        found_array_length_(nullptr),
+        offset_low_(INT_MAX),
+        offset_high_(INT_MIN) {
+    Run();
+  }
+
+  HArrayLength* GetFoundArrayLength() const { return found_array_length_; }
+  bool HasFoundArrayLength() const { return found_array_length_ != nullptr; }
+  int32_t GetOffsetLow() const { return offset_low_; }
+  int32_t GetOffsetHigh() const { return offset_high_; }
+
+  // Returns if `block` that is in loop_info may exit the loop, unless it's
+  // the loop header for loop_info.
+  static bool EarlyExit(HBasicBlock* block, HLoopInformation* loop_info) {
+    DCHECK(loop_info->Contains(*block));
+    if (block == loop_info->GetHeader()) {
+      // Loop header of loop_info. Exiting loop is normal.
+      return false;
+    }
+    const GrowableArray<HBasicBlock*> successors = block->GetSuccessors();
+    for (size_t i = 0; i < successors.Size(); i++) {
+      if (!loop_info->Contains(*successors.Get(i))) {
+        // One of the successors exits the loop.
+        return true;
+      }
+    }
+    return false;
+  }
+
+  void Run() {
+    HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation();
+    // Must be simplified loop.
+    DCHECK_EQ(loop_info->GetBackEdges().Size(), 1U);
+    for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) {
+      HBasicBlock* block = it_loop.Current();
+      DCHECK(block->IsInLoop());
+      HBasicBlock* back_edge = loop_info->GetBackEdges().Get(0);
+      if (!block->Dominates(back_edge)) {
+        // In order not to trigger deoptimization unnecessarily, make sure
+        // that all array accesses collected are really executed in the loop.
+        // For array accesses in a branch inside the loop, don't collect the
+        // access. The bounds check in that branch might not be eliminated.
+        continue;
+      }
+      if (EarlyExit(block, loop_info)) {
+        // If the loop body can exit loop (like break, return, etc.), it's not guaranteed
+        // that the loop will loop through the full monotonic value range from
+        // initial_ to end_. So adding deoptimization might be too aggressive and can
+        // trigger deoptimization unnecessarily even if the loop won't actually throw
+        // AIOOBE. Otherwise, the loop induction variable is going to cover the full
+        // monotonic value range from initial_ to end_, and deoptimizations are added
+        // iff the loop will throw AIOOBE.
+        found_array_length_ = nullptr;
+        return;
+      }
+      for (HInstruction* instruction = block->GetFirstInstruction();
+           instruction != nullptr;
+           instruction = instruction->GetNext()) {
+        if (!instruction->IsArrayGet() && !instruction->IsArraySet()) {
+          continue;
+        }
+        HInstruction* index = instruction->InputAt(1);
+        if (!index->IsBoundsCheck()) {
+          continue;
+        }
+
+        HArrayLength* array_length = index->InputAt(1)->AsArrayLength();
+        if (array_length == nullptr) {
+          DCHECK(index->InputAt(1)->IsIntConstant());
+          // TODO: may optimize for constant case.
+          continue;
+        }
+
+        HInstruction* array = array_length->InputAt(0);
+        if (array->IsNullCheck()) {
+          array = array->AsNullCheck()->InputAt(0);
+        }
+        if (loop_info->Contains(*array->GetBlock())) {
+          // Array is defined inside the loop. Skip.
+          continue;
+        }
+
+        if (found_array_length_ != nullptr && found_array_length_ != array_length) {
+          // There is already access for another array recorded for the loop.
+          // TODO: handle multiple arrays.
+          continue;
+        }
+
+        index = index->AsBoundsCheck()->InputAt(0);
+        HInstruction* left = index;
+        int32_t right = 0;
+        if (left == induction_variable_ ||
+            (ValueBound::IsAddOrSubAConstant(index, &left, &right) &&
+             left == induction_variable_)) {
+          // For patterns like array[i] or array[i + 2].
+          if (right < offset_low_) {
+            offset_low_ = right;
+          }
+          if (right > offset_high_) {
+            offset_high_ = right;
+          }
+        } else {
+          // Access not in induction_variable/(induction_variable_ + constant)
+          // format. Skip.
+          continue;
+        }
+        // Record this array.
+        found_array_length_ = array_length;
+      }
+    }
+  }
+
+ private:
+  // The instruction that corresponds to a MonotonicValueRange.
+  HInstruction* induction_variable_;
+
+  // The array length of the array that's accessed inside the loop.
+  HArrayLength* found_array_length_;
+
+  // The lowest and highest constant offsets relative to induction variable
+  // instruction_ in all array accesses.
+  // If array access are: array[i-1], array[i], array[i+1],
+  // offset_low_ is -1 and offset_high is 1.
+  int32_t offset_low_;
+  int32_t offset_high_;
+
+  DISALLOW_COPY_AND_ASSIGN(ArrayAccessInsideLoopFinder);
+};
+
 /**
  * Represent a range of lower bound and upper bound, both being inclusive.
  * Currently a ValueRange may be generated as a result of the following:
@@ -332,21 +467,31 @@
 class MonotonicValueRange : public ValueRange {
  public:
   MonotonicValueRange(ArenaAllocator* allocator,
+                      HPhi* induction_variable,
                       HInstruction* initial,
                       int32_t increment,
                       ValueBound bound)
       // To be conservative, give it full range [INT_MIN, INT_MAX] in case it's
       // used as a regular value range, due to possible overflow/underflow.
       : ValueRange(allocator, ValueBound::Min(), ValueBound::Max()),
+        induction_variable_(induction_variable),
         initial_(initial),
+        end_(nullptr),
+        inclusive_(false),
         increment_(increment),
         bound_(bound) {}
 
   virtual ~MonotonicValueRange() {}
 
+  HInstruction* GetInductionVariable() const { return induction_variable_; }
   int32_t GetIncrement() const { return increment_; }
-
   ValueBound GetBound() const { return bound_; }
+  void SetEnd(HInstruction* end) { end_ = end; }
+  void SetInclusive(bool inclusive) { inclusive_ = inclusive; }
+  HBasicBlock* GetLoopHead() const {
+    DCHECK(induction_variable_->GetBlock()->IsLoopHeader());
+    return induction_variable_->GetBlock();
+  }
 
   MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; }
 
@@ -371,6 +516,10 @@
     if (increment_ > 0) {
       // Monotonically increasing.
       ValueBound lower = ValueBound::NarrowLowerBound(bound_, range->GetLower());
+      if (!lower.IsConstant() || lower.GetConstant() == INT_MIN) {
+        // Lower bound isn't useful. Leave it to deoptimization.
+        return this;
+      }
 
       // We currently conservatively assume max array length is INT_MAX. If we can
       // make assumptions about the max array length, e.g. due to the max heap size,
@@ -417,6 +566,11 @@
       DCHECK_NE(increment_, 0);
       // Monotonically decreasing.
       ValueBound upper = ValueBound::NarrowUpperBound(bound_, range->GetUpper());
+      if ((!upper.IsConstant() || upper.GetConstant() == INT_MAX) &&
+          !upper.IsRelatedToArrayLength()) {
+        // Upper bound isn't useful. Leave it to deoptimization.
+        return this;
+      }
 
       // Need to take care of underflow. Try to prove underflow won't happen
       // for common cases.
@@ -432,10 +586,217 @@
     }
   }
 
+  // Returns true if adding a (constant >= value) check for deoptimization
+  // is allowed and will benefit compiled code.
+  bool CanAddDeoptimizationConstant(HInstruction* value,
+                                    int32_t constant,
+                                    bool* is_proven) {
+    *is_proven = false;
+    // See if we can prove the relationship first.
+    if (value->IsIntConstant()) {
+      if (value->AsIntConstant()->GetValue() >= constant) {
+        // Already true.
+        *is_proven = true;
+        return true;
+      } else {
+        // May throw exception. Don't add deoptimization.
+        // Keep bounds checks in the loops.
+        return false;
+      }
+    }
+    // Can benefit from deoptimization.
+    return true;
+  }
+
+  // Adds a check that (value >= constant), and HDeoptimize otherwise.
+  void AddDeoptimizationConstant(HInstruction* value,
+                                 int32_t constant) {
+    HBasicBlock* block = induction_variable_->GetBlock();
+    DCHECK(block->IsLoopHeader());
+    HGraph* graph = block->GetGraph();
+    HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
+    HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck();
+    HIntConstant* const_instr = graph->GetIntConstant(constant);
+    HCondition* cond = new (graph->GetArena()) HLessThan(value, const_instr);
+    HDeoptimize* deoptimize = new (graph->GetArena())
+        HDeoptimize(cond, suspend_check->GetDexPc());
+    pre_header->InsertInstructionBefore(cond, pre_header->GetLastInstruction());
+    pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction());
+    deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
+        suspend_check->GetEnvironment(), block);
+  }
+
+  // Returns true if adding a (value <= array_length + offset) check for deoptimization
+  // is allowed and will benefit compiled code.
+  bool CanAddDeoptimizationArrayLength(HInstruction* value,
+                                       HArrayLength* array_length,
+                                       int32_t offset,
+                                       bool* is_proven) {
+    *is_proven = false;
+    if (offset > 0) {
+      // There might be overflow issue.
+      // TODO: handle this, possibly with some distance relationship between
+      // offset_low and offset_high, or using another deoptimization to make
+      // sure (array_length + offset) doesn't overflow.
+      return false;
+    }
+
+    // See if we can prove the relationship first.
+    if (value == array_length) {
+      if (offset >= 0) {
+        // Already true.
+        *is_proven = true;
+        return true;
+      } else {
+        // May throw exception. Don't add deoptimization.
+        // Keep bounds checks in the loops.
+        return false;
+      }
+    }
+    // Can benefit from deoptimization.
+    return true;
+  }
+
+  // Adds a check that (value <= array_length + offset), and HDeoptimize otherwise.
+  void AddDeoptimizationArrayLength(HInstruction* value,
+                                    HArrayLength* array_length,
+                                    int32_t offset) {
+    HBasicBlock* block = induction_variable_->GetBlock();
+    DCHECK(block->IsLoopHeader());
+    HGraph* graph = block->GetGraph();
+    HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
+    HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck();
+
+    // We may need to hoist null-check and array_length out of loop first.
+    if (!array_length->GetBlock()->Dominates(pre_header)) {
+      HInstruction* array = array_length->InputAt(0);
+      HNullCheck* null_check = array->AsNullCheck();
+      if (null_check != nullptr) {
+        array = null_check->InputAt(0);
+      }
+      // We've already made sure array is defined before the loop when collecting
+      // array accesses for the loop.
+      DCHECK(array->GetBlock()->Dominates(pre_header));
+      if (null_check != nullptr && !null_check->GetBlock()->Dominates(pre_header)) {
+        // Hoist null check out of loop with a deoptimization.
+        HNullConstant* null_constant = graph->GetNullConstant();
+        HCondition* null_check_cond = new (graph->GetArena()) HEqual(array, null_constant);
+        // TODO: for one dex_pc, share the same deoptimization slow path.
+        HDeoptimize* null_check_deoptimize = new (graph->GetArena())
+            HDeoptimize(null_check_cond, suspend_check->GetDexPc());
+        pre_header->InsertInstructionBefore(null_check_cond, pre_header->GetLastInstruction());
+        pre_header->InsertInstructionBefore(
+            null_check_deoptimize, pre_header->GetLastInstruction());
+        // Eliminate null check in the loop.
+        null_check->ReplaceWith(array);
+        null_check->GetBlock()->RemoveInstruction(null_check);
+        null_check_deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
+            suspend_check->GetEnvironment(), block);
+      }
+      // Hoist array_length out of loop.
+      array_length->MoveBefore(pre_header->GetLastInstruction());
+    }
+
+    HIntConstant* offset_instr = graph->GetIntConstant(offset);
+    HAdd* add = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr);
+    HCondition* cond = new (graph->GetArena()) HGreaterThan(value, add);
+    HDeoptimize* deoptimize = new (graph->GetArena())
+        HDeoptimize(cond, suspend_check->GetDexPc());
+    pre_header->InsertInstructionBefore(add, pre_header->GetLastInstruction());
+    pre_header->InsertInstructionBefore(cond, pre_header->GetLastInstruction());
+    pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction());
+    deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
+        suspend_check->GetEnvironment(), block);
+  }
+
+  // Add deoptimizations in loop pre-header with the collected array access
+  // data so that value ranges can be established in loop body.
+  // Returns true if deoptimizations are successfully added, or if it's proven
+  // it's not necessary.
+  bool AddDeoptimization(const ArrayAccessInsideLoopFinder& finder) {
+    int32_t offset_low = finder.GetOffsetLow();
+    int32_t offset_high = finder.GetOffsetHigh();
+    HArrayLength* array_length = finder.GetFoundArrayLength();
+
+    HBasicBlock* pre_header =
+        induction_variable_->GetBlock()->GetLoopInformation()->GetPreHeader();
+    if (!initial_->GetBlock()->Dominates(pre_header) ||
+        !end_->GetBlock()->Dominates(pre_header)) {
+      // Can't move initial_ or end_ into pre_header for comparisons.
+      return false;
+    }
+
+    bool is_constant_proven, is_length_proven;
+    if (increment_ == 1) {
+      // Increasing from initial_ to end_.
+      int32_t offset = inclusive_ ? -offset_high - 1 : -offset_high;
+      if (CanAddDeoptimizationConstant(initial_, -offset_low, &is_constant_proven) &&
+          CanAddDeoptimizationArrayLength(end_, array_length, offset, &is_length_proven)) {
+        if (!is_constant_proven) {
+          AddDeoptimizationConstant(initial_, -offset_low);
+        }
+        if (!is_length_proven) {
+          AddDeoptimizationArrayLength(end_, array_length, offset);
+        }
+        return true;
+      }
+    } else if (increment_ == -1) {
+      // Decreasing from initial_ to end_.
+      int32_t constant = inclusive_ ? -offset_low : -offset_low - 1;
+      if (CanAddDeoptimizationConstant(end_, constant, &is_constant_proven) &&
+          CanAddDeoptimizationArrayLength(
+              initial_, array_length, -offset_high - 1, &is_length_proven)) {
+        if (!is_constant_proven) {
+          AddDeoptimizationConstant(end_, constant);
+        }
+        if (!is_length_proven) {
+          AddDeoptimizationArrayLength(initial_, array_length, -offset_high - 1);
+        }
+        return true;
+      }
+    }
+    return false;
+  }
+
+  // Try to add HDeoptimize's in the loop pre-header first to narrow this range.
+  ValueRange* NarrowWithDeoptimization() {
+    if (increment_ != 1 && increment_ != -1) {
+      // TODO: possibly handle overflow/underflow issues with deoptimization.
+      return this;
+    }
+
+    if (end_ == nullptr) {
+      // No full info to add deoptimization.
+      return this;
+    }
+
+    ArrayAccessInsideLoopFinder finder(induction_variable_);
+
+    if (!finder.HasFoundArrayLength()) {
+      // No array access was found inside the loop that can benefit
+      // from deoptimization.
+      return this;
+    }
+
+    if (!AddDeoptimization(finder)) {
+      return this;
+    }
+
+    // After added deoptimizations, induction variable fits in
+    // [-offset_low, array.length-1-offset_high], adjusted with collected offsets.
+    ValueBound lower = ValueBound(0, -finder.GetOffsetLow());
+    ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh());
+    // We've narrowed the range after added deoptimizations.
+    return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper);
+  }
+
  private:
-  HInstruction* const initial_;
-  const int32_t increment_;
-  ValueBound bound_;  // Additional value bound info for initial_;
+  HPhi* const induction_variable_;  // Induction variable for this monotonic value range.
+  HInstruction* const initial_;     // Initial value.
+  HInstruction* end_;               // End value.
+  bool inclusive_;                  // Whether end value is inclusive.
+  const int32_t increment_;         // Increment for each loop iteration.
+  const ValueBound bound_;          // Additional value bound info for initial_.
 
   DISALLOW_COPY_AND_ASSIGN(MonotonicValueRange);
 };
@@ -598,6 +959,20 @@
     // There should be no critical edge at this point.
     DCHECK_EQ(false_successor->GetPredecessors().Size(), 1u);
 
+    ValueRange* left_range = LookupValueRange(left, block);
+    MonotonicValueRange* left_monotonic_range = nullptr;
+    if (left_range != nullptr) {
+      left_monotonic_range = left_range->AsMonotonicValueRange();
+      if (left_monotonic_range != nullptr) {
+        HBasicBlock* loop_head = left_monotonic_range->GetLoopHead();
+        if (instruction->GetBlock() != loop_head) {
+          // For monotonic value range, don't handle `instruction`
+          // if it's not defined in the loop header.
+          return;
+        }
+      }
+    }
+
     bool found;
     ValueBound bound = ValueBound::DetectValueBoundFromValue(right, &found);
     // Each comparison can establish a lower bound and an upper bound
@@ -610,7 +985,6 @@
       ValueRange* right_range = LookupValueRange(right, block);
       if (right_range != nullptr) {
         if (right_range->IsMonotonicValueRange()) {
-          ValueRange* left_range = LookupValueRange(left, block);
           if (left_range != nullptr && left_range->IsMonotonicValueRange()) {
             HandleIfBetweenTwoMonotonicValueRanges(instruction, left, right, cond,
                                                    left_range->AsMonotonicValueRange(),
@@ -628,6 +1002,17 @@
 
     bool overflow, underflow;
     if (cond == kCondLT || cond == kCondLE) {
+      if (left_monotonic_range != nullptr) {
+        // Update the info for monotonic value range.
+        if (left_monotonic_range->GetInductionVariable() == left &&
+            left_monotonic_range->GetIncrement() < 0 &&
+            block == left_monotonic_range->GetLoopHead() &&
+            instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) {
+          left_monotonic_range->SetEnd(right);
+          left_monotonic_range->SetInclusive(cond == kCondLT);
+        }
+      }
+
       if (!upper.Equals(ValueBound::Max())) {
         int32_t compensation = (cond == kCondLT) ? -1 : 0;  // upper bound is inclusive
         ValueBound new_upper = upper.Add(compensation, &overflow, &underflow);
@@ -651,6 +1036,17 @@
         ApplyRangeFromComparison(left, block, false_successor, new_range);
       }
     } else if (cond == kCondGT || cond == kCondGE) {
+      if (left_monotonic_range != nullptr) {
+        // Update the info for monotonic value range.
+        if (left_monotonic_range->GetInductionVariable() == left &&
+            left_monotonic_range->GetIncrement() > 0 &&
+            block == left_monotonic_range->GetLoopHead() &&
+            instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) {
+          left_monotonic_range->SetEnd(right);
+          left_monotonic_range->SetInclusive(cond == kCondGT);
+        }
+      }
+
       // array.length as a lower bound isn't considered useful.
       if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) {
         int32_t compensation = (cond == kCondGT) ? 1 : 0;  // lower bound is inclusive
@@ -790,6 +1186,7 @@
             }
             range = new (GetGraph()->GetArena()) MonotonicValueRange(
                 GetGraph()->GetArena(),
+                phi,
                 initial_value,
                 increment,
                 bound);
@@ -809,6 +1206,36 @@
         HInstruction* left = cond->GetLeft();
         HInstruction* right = cond->GetRight();
         HandleIf(instruction, left, right, cmp);
+
+        HBasicBlock* block = instruction->GetBlock();
+        ValueRange* left_range = LookupValueRange(left, block);
+        if (left_range == nullptr) {
+          return;
+        }
+
+        if (left_range->IsMonotonicValueRange() &&
+            block == left_range->AsMonotonicValueRange()->GetLoopHead()) {
+          // The comparison is for an induction variable in the loop header.
+          DCHECK(left == left_range->AsMonotonicValueRange()->GetInductionVariable());
+          HBasicBlock* loop_body_successor;
+          if (LIKELY(block->GetLoopInformation()->
+              Contains(*instruction->IfFalseSuccessor()))) {
+            loop_body_successor = instruction->IfFalseSuccessor();
+          } else {
+            loop_body_successor = instruction->IfTrueSuccessor();
+          }
+          ValueRange* new_left_range = LookupValueRange(left, loop_body_successor);
+          if (new_left_range == left_range) {
+            // We are not successful in narrowing the monotonic value range to
+            // a regular value range. Try using deoptimization.
+            new_left_range = left_range->AsMonotonicValueRange()->
+                NarrowWithDeoptimization();
+            if (new_left_range != left_range) {
+              GetValueRangeMap(instruction->IfFalseSuccessor())->
+                  Overwrite(left->GetId(), new_left_range);
+            }
+          }
+        }
       }
     }
   }
@@ -1064,7 +1491,7 @@
 };
 
 void BoundsCheckElimination::Run() {
-  if (!graph_->HasArrayAccesses()) {
+  if (!graph_->HasBoundsChecks()) {
     return;
   }
 
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index 75cf1cf..97be778 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -43,7 +43,7 @@
   ArenaAllocator allocator(&pool);
 
   HGraph* graph = new (&allocator) HGraph(&allocator);
-  graph->SetHasArrayAccesses(true);
+  graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
@@ -148,7 +148,7 @@
   ArenaAllocator allocator(&pool);
 
   HGraph* graph = new (&allocator) HGraph(&allocator);
-  graph->SetHasArrayAccesses(true);
+  graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
@@ -220,7 +220,7 @@
   ArenaAllocator allocator(&pool);
 
   HGraph* graph = new (&allocator) HGraph(&allocator);
-  graph->SetHasArrayAccesses(true);
+  graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
@@ -292,7 +292,7 @@
   ArenaAllocator allocator(&pool);
 
   HGraph* graph = new (&allocator) HGraph(&allocator);
-  graph->SetHasArrayAccesses(true);
+  graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
@@ -365,7 +365,7 @@
                               int increment,
                               IfCondition cond = kCondGE) {
   HGraph* graph = new (allocator) HGraph(allocator);
-  graph->SetHasArrayAccesses(true);
+  graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
@@ -502,7 +502,7 @@
                               int increment = -1,
                               IfCondition cond = kCondLE) {
   HGraph* graph = new (allocator) HGraph(allocator);
-  graph->SetHasArrayAccesses(true);
+  graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
@@ -633,7 +633,7 @@
                               int increment,
                               IfCondition cond) {
   HGraph* graph = new (allocator) HGraph(allocator);
-  graph->SetHasArrayAccesses(true);
+  graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
@@ -744,7 +744,7 @@
                               int initial,
                               IfCondition cond = kCondGE) {
   HGraph* graph = new (allocator) HGraph(allocator);
-  graph->SetHasArrayAccesses(true);
+  graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
@@ -869,7 +869,7 @@
   ArenaAllocator allocator(&pool);
 
   HGraph* graph = new (&allocator) HGraph(&allocator);
-  graph->SetHasArrayAccesses(true);
+  graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 818d671..96e08fd 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -21,6 +21,7 @@
 #include "class_linker.h"
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
+#include "dex/verified_method.h"
 #include "driver/compiler_driver-inl.h"
 #include "driver/compiler_options.h"
 #include "mirror/class_loader.h"
@@ -587,7 +588,7 @@
   const char* descriptor = dex_file_->StringDataByIdx(proto_id.shorty_idx_);
   Primitive::Type return_type = Primitive::GetType(descriptor[0]);
   bool is_instance_call = invoke_type != kStatic;
-  const size_t number_of_arguments = strlen(descriptor) - (is_instance_call ? 0 : 1);
+  size_t number_of_arguments = strlen(descriptor) - (is_instance_call ? 0 : 1);
 
   MethodReference target_method(dex_file_, method_idx);
   uintptr_t direct_code;
@@ -605,7 +606,15 @@
   }
   DCHECK(optimized_invoke_type != kSuper);
 
+  // By default, consider that the called method implicitly requires
+  // an initialization check of its declaring method.
+  HInvokeStaticOrDirect::ClinitCheckRequirement clinit_check_requirement =
+      HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit;
+  // Potential class initialization check, in the case of a static method call.
+  HClinitCheck* clinit_check = nullptr;
+
   HInvoke* invoke = nullptr;
+
   if (optimized_invoke_type == kVirtual) {
     invoke = new (arena_) HInvokeVirtual(
         arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index);
@@ -620,9 +629,76 @@
     bool is_recursive =
         (target_method.dex_method_index == dex_compilation_unit_->GetDexMethodIndex());
     DCHECK(!is_recursive || (target_method.dex_file == dex_compilation_unit_->GetDexFile()));
+
+    if (optimized_invoke_type == kStatic) {
+      ScopedObjectAccess soa(Thread::Current());
+      StackHandleScope<4> hs(soa.Self());
+      Handle<mirror::DexCache> dex_cache(hs.NewHandle(
+          dex_compilation_unit_->GetClassLinker()->FindDexCache(
+              *dex_compilation_unit_->GetDexFile())));
+      Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+          soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
+      mirror::ArtMethod* resolved_method = compiler_driver_->ResolveMethod(
+          soa, dex_cache, class_loader, dex_compilation_unit_, method_idx,
+          optimized_invoke_type);
+
+      if (resolved_method == nullptr) {
+        MaybeRecordStat(MethodCompilationStat::kNotCompiledUnresolvedMethod);
+        return false;
+      }
+
+      const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+      Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
+          outer_compilation_unit_->GetClassLinker()->FindDexCache(outer_dex_file)));
+      Handle<mirror::Class> referrer_class(hs.NewHandle(GetOutermostCompilingClass()));
+
+      // The index at which the method's class is stored in the DexCache's type array.
+      uint32_t storage_index = DexFile::kDexNoIndex;
+      bool is_referrer_class = (resolved_method->GetDeclaringClass() == referrer_class.Get());
+      if (is_referrer_class) {
+        storage_index = referrer_class->GetDexTypeIndex();
+      } else if (outer_dex_cache.Get() == dex_cache.Get()) {
+        // Get `storage_index` from IsClassOfStaticMethodAvailableToReferrer.
+        compiler_driver_->IsClassOfStaticMethodAvailableToReferrer(outer_dex_cache.Get(),
+                                                                   referrer_class.Get(),
+                                                                   resolved_method,
+                                                                   method_idx,
+                                                                   &storage_index);
+      }
+
+      if (referrer_class.Get()->IsSubClass(resolved_method->GetDeclaringClass())) {
+        // If the referrer class is the declaring class or a subclass
+        // of the declaring class, no class initialization is needed
+        // before the static method call.
+        clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
+      } else if (storage_index != DexFile::kDexNoIndex) {
+        // If the method's class type index is available, check
+        // whether we should add an explicit class initialization
+        // check for its declaring class before the static method call.
+
+        // TODO: find out why this check is needed.
+        bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache(
+            *outer_compilation_unit_->GetDexFile(), storage_index);
+        bool is_initialized =
+            resolved_method->GetDeclaringClass()->IsInitialized() && is_in_dex_cache;
+
+        if (is_initialized) {
+          clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
+        } else {
+          clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
+          HLoadClass* load_class =
+              new (arena_) HLoadClass(storage_index, is_referrer_class, dex_pc);
+          current_block_->AddInstruction(load_class);
+          clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
+          current_block_->AddInstruction(clinit_check);
+          ++number_of_arguments;
+        }
+      }
+    }
+
     invoke = new (arena_) HInvokeStaticOrDirect(
         arena_, number_of_arguments, return_type, dex_pc, target_method.dex_method_index,
-        is_recursive, invoke_type, optimized_invoke_type);
+        is_recursive, invoke_type, optimized_invoke_type, clinit_check_requirement);
   }
 
   size_t start_index = 0;
@@ -655,6 +731,12 @@
     }
   }
 
+  if (clinit_check_requirement == HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit) {
+    // Add the class initialization check as last input of `invoke`.
+    DCHECK(clinit_check != nullptr);
+    invoke->SetArgumentAt(argument_index++, clinit_check);
+  }
+
   DCHECK_EQ(argument_index, number_of_arguments);
   current_block_->AddInstruction(invoke);
   latest_result_ = invoke;
@@ -732,7 +814,6 @@
   return compiling_class.Get() == cls.Get();
 }
 
-
 bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction,
                                            uint32_t dex_pc,
                                            bool is_put) {
@@ -764,7 +845,7 @@
   if (is_referrer_class) {
     storage_index = referrer_class->GetDexTypeIndex();
   } else if (outer_dex_cache.Get() != dex_cache.Get()) {
-    // The compiler driver cannot currently understand multple dex caches involved. Just bailout.
+    // The compiler driver cannot currently understand multiple dex caches involved. Just bailout.
     return false;
   } else {
     std::pair<bool, bool> pair = compiler_driver_->IsFastStaticField(
@@ -882,7 +963,7 @@
     current_block_->AddInstruction(new (arena_) HArrayGet(object, index, anticipated_type));
     UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction());
   }
-  graph_->SetHasArrayAccesses(true);
+  graph_->SetHasBoundsChecks(true);
 }
 
 void HGraphBuilder::BuildFilledNewArray(uint32_t dex_pc,
@@ -984,6 +1065,7 @@
     default:
       LOG(FATAL) << "Unknown element width for " << payload->element_width;
   }
+  graph_->SetHasBoundsChecks(true);
 }
 
 void HGraphBuilder::BuildFillWideArrayData(HInstruction* object,
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index b14b69b..5163395 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -612,7 +612,7 @@
 }
 
 void CodeGenerator::BuildStackMaps(std::vector<uint8_t>* data) {
-  uint32_t size = stack_map_stream_.ComputeNeededSize();
+  uint32_t size = stack_map_stream_.PrepareForFillIn();
   data->resize(size);
   MemoryRegion region(data->data(), size);
   stack_map_stream_.FillIn(region);
@@ -654,7 +654,8 @@
 
   if (instruction == nullptr) {
     // For stack overflow checks.
-    stack_map_stream_.AddStackMapEntry(dex_pc, pc_info.native_pc, 0, 0, 0, inlining_depth);
+    stack_map_stream_.BeginStackMapEntry(dex_pc, pc_info.native_pc, 0, 0, 0, inlining_depth);
+    stack_map_stream_.EndStackMapEntry();
     return;
   }
   LocationSummary* locations = instruction->GetLocations();
@@ -672,12 +673,12 @@
   }
   // The register mask must be a subset of callee-save registers.
   DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask);
-  stack_map_stream_.AddStackMapEntry(dex_pc,
-                                     pc_info.native_pc,
-                                     register_mask,
-                                     locations->GetStackMask(),
-                                     environment_size,
-                                     inlining_depth);
+  stack_map_stream_.BeginStackMapEntry(dex_pc,
+                                       pc_info.native_pc,
+                                       register_mask,
+                                       locations->GetStackMask(),
+                                       environment_size,
+                                       inlining_depth);
 
   // Walk over the environment, and record the location of dex registers.
   for (size_t i = 0; i < environment_size; ++i) {
@@ -823,6 +824,7 @@
         LOG(FATAL) << "Unexpected kind " << location.GetKind();
     }
   }
+  stack_map_stream_.EndStackMapEntry();
 }
 
 bool CodeGenerator::CanMoveNullCheckToUser(HNullCheck* null_check) {
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index ae1fb53..01748a9 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -176,7 +176,6 @@
 
     InvokeRuntimeCallingConvention calling_convention;
     __ LoadImmediate(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex());
-    arm_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1));
     int32_t entry_point_offset = do_clinit_
         ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
         : QUICK_ENTRY_POINT(pInitializeType);
@@ -222,7 +221,6 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    arm_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1));
     __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex());
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this);
@@ -1243,6 +1241,14 @@
 }
 
 void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // Explicit clinit checks triggered by static invokes must have been
+  // pruned by art::PrepareForRegisterAllocation, but this step is not
+  // run in baseline. So we remove them manually here if we find them.
+  // TODO: Instead of this local workaround, address this properly.
+  if (invoke->IsStaticWithExplicitClinitCheck()) {
+    invoke->RemoveClinitCheckOrLoadClassAsLastInput();
+  }
+
   IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(),
                                          codegen_->GetInstructionSetFeatures());
   if (intrinsic.TryDispatch(invoke)) {
@@ -1267,6 +1273,10 @@
 }
 
 void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // Explicit clinit checks triggered by static invokes must have been
+  // pruned by art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
+
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
   }
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 1c6debd..dada4ce 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -173,14 +173,13 @@
 
     InvokeRuntimeCallingConvention calling_convention;
     __ Mov(calling_convention.GetRegisterAt(0).W(), cls_->GetTypeIndex());
-    arm64_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1).W());
     int32_t entry_point_offset = do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
                                             : QUICK_ENTRY_POINT(pInitializeType);
     arm64_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this);
     if (do_clinit_) {
-      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t, mirror::ArtMethod*>();
+      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
     } else {
-      CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t, mirror::ArtMethod*>();
+      CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
     }
 
     // Move the class to the desired location.
@@ -225,11 +224,10 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    arm64_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1).W());
     __ Mov(calling_convention.GetRegisterAt(0).W(), instruction_->GetStringIndex());
     arm64_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this);
-    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t, mirror::ArtMethod*>();
+    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
     Primitive::Type type = instruction_->GetType();
     arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type);
 
@@ -1970,6 +1968,14 @@
 }
 
 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // Explicit clinit checks triggered by static invokes must have been
+  // pruned by art::PrepareForRegisterAllocation, but this step is not
+  // run in baseline. So we remove them manually here if we find them.
+  // TODO: Instead of this local workaround, address this properly.
+  if (invoke->IsStaticWithExplicitClinitCheck()) {
+    invoke->RemoveClinitCheckOrLoadClassAsLastInput();
+  }
+
   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena());
   if (intrinsic.TryDispatch(invoke)) {
     return;
@@ -2020,6 +2026,10 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // Explicit clinit checks triggered by static invokes must have been
+  // pruned by art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
+
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
   }
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index c604842..04999be 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -174,7 +174,6 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    x86_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1));
     __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction_->GetStringIndex()));
     __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pResolveString)));
     RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
@@ -208,7 +207,6 @@
 
     InvokeRuntimeCallingConvention calling_convention;
     __ movl(calling_convention.GetRegisterAt(0), Immediate(cls_->GetTypeIndex()));
-    x86_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1));
     __ fs()->call(Address::Absolute(do_clinit_
         ? QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pInitializeStaticStorage)
         : QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pInitializeType)));
@@ -1196,6 +1194,14 @@
 }
 
 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // Explicit clinit checks triggered by static invokes must have been
+  // pruned by art::PrepareForRegisterAllocation, but this step is not
+  // run in baseline. So we remove them manually here if we find them.
+  // TODO: Instead of this local workaround, address this properly.
+  if (invoke->IsStaticWithExplicitClinitCheck()) {
+    invoke->RemoveClinitCheckOrLoadClassAsLastInput();
+  }
+
   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
     return;
@@ -1214,6 +1220,10 @@
 }
 
 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // Explicit clinit checks triggered by static invokes must have been
+  // pruned by art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
+
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
   }
@@ -3809,7 +3819,7 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
-  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
   }
@@ -3821,16 +3831,38 @@
   Location length_loc = locations->InAt(1);
   SlowPathCodeX86* slow_path =
     new (GetGraph()->GetArena()) BoundsCheckSlowPathX86(instruction, index_loc, length_loc);
-  codegen_->AddSlowPath(slow_path);
 
-  Register length = length_loc.AsRegister<Register>();
-  if (index_loc.IsConstant()) {
-    int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
-    __ cmpl(length, Immediate(value));
+  if (length_loc.IsConstant()) {
+    int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
+    if (index_loc.IsConstant()) {
+      // BCE will remove the bounds check if we are guarenteed to pass.
+      int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+      if (index < 0 || index >= length) {
+        codegen_->AddSlowPath(slow_path);
+        __ jmp(slow_path->GetEntryLabel());
+      } else {
+        // Some optimization after BCE may have generated this, and we should not
+        // generate a bounds check if it is a valid range.
+      }
+      return;
+    }
+
+    // We have to reverse the jump condition because the length is the constant.
+    Register index_reg = index_loc.AsRegister<Register>();
+    __ cmpl(index_reg, Immediate(length));
+    codegen_->AddSlowPath(slow_path);
+    __ j(kAboveEqual, slow_path->GetEntryLabel());
   } else {
-    __ cmpl(length, index_loc.AsRegister<Register>());
+    Register length = length_loc.AsRegister<Register>();
+    if (index_loc.IsConstant()) {
+      int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+      __ cmpl(length, Immediate(value));
+    } else {
+      __ cmpl(length, index_loc.AsRegister<Register>());
+    }
+    codegen_->AddSlowPath(slow_path);
+    __ j(kBelowEqual, slow_path->GetEntryLabel());
   }
-  __ j(kBelowEqual, slow_path->GetEntryLabel());
 }
 
 void LocationsBuilderX86::VisitTemporary(HTemporary* temp) {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 47425fb..5ce9329 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -197,7 +197,6 @@
 
     InvokeRuntimeCallingConvention calling_convention;
     __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex()));
-    x64_codegen->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1)));
     __ gs()->call(Address::Absolute((do_clinit_
           ? QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pInitializeStaticStorage)
           : QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pInitializeType)) , true));
@@ -244,7 +243,6 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    x64_codegen->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1)));
     __ movl(CpuRegister(calling_convention.GetRegisterAt(0)),
             Immediate(instruction_->GetStringIndex()));
     __ gs()->call(Address::Absolute(
@@ -1291,6 +1289,14 @@
 }
 
 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // Explicit clinit checks triggered by static invokes must have been
+  // pruned by art::PrepareForRegisterAllocation, but this step is not
+  // run in baseline. So we remove them manually here if we find them.
+  // TODO: Instead of this local workaround, address this properly.
+  if (invoke->IsStaticWithExplicitClinitCheck()) {
+    invoke->RemoveClinitCheckOrLoadClassAsLastInput();
+  }
+
   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
     return;
@@ -1309,6 +1315,10 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // Explicit clinit checks triggered by static invokes must have been
+  // pruned by art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
+
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
   }
@@ -3750,7 +3760,7 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
-  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
   }
@@ -3762,16 +3772,38 @@
   Location length_loc = locations->InAt(1);
   SlowPathCodeX86_64* slow_path =
     new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction, index_loc, length_loc);
-  codegen_->AddSlowPath(slow_path);
 
-  CpuRegister length = length_loc.AsRegister<CpuRegister>();
-  if (index_loc.IsConstant()) {
-    int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
-    __ cmpl(length, Immediate(value));
+  if (length_loc.IsConstant()) {
+    int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
+    if (index_loc.IsConstant()) {
+      // BCE will remove the bounds check if we are guarenteed to pass.
+      int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+      if (index < 0 || index >= length) {
+        codegen_->AddSlowPath(slow_path);
+        __ jmp(slow_path->GetEntryLabel());
+      } else {
+        // Some optimization after BCE may have generated this, and we should not
+        // generate a bounds check if it is a valid range.
+      }
+      return;
+    }
+
+    // We have to reverse the jump condition because the length is the constant.
+    CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
+    __ cmpl(index_reg, Immediate(length));
+    codegen_->AddSlowPath(slow_path);
+    __ j(kAboveEqual, slow_path->GetEntryLabel());
   } else {
-    __ cmpl(length, index_loc.AsRegister<CpuRegister>());
+    CpuRegister length = length_loc.AsRegister<CpuRegister>();
+    if (index_loc.IsConstant()) {
+      int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+      __ cmpl(length, Immediate(value));
+    } else {
+      __ cmpl(length, index_loc.AsRegister<CpuRegister>());
+    }
+    codegen_->AddSlowPath(slow_path);
+    __ j(kBelowEqual, slow_path->GetEntryLabel());
   }
-  __ j(kBelowEqual, slow_path->GetEntryLabel());
 }
 
 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h
index ac00824..66ff578 100644
--- a/compiler/optimizing/constant_folding.h
+++ b/compiler/optimizing/constant_folding.h
@@ -32,8 +32,8 @@
  */
 class HConstantFolding : public HOptimization {
  public:
-  explicit HConstantFolding(HGraph* graph)
-      : HOptimization(graph, true, kConstantFoldingPassName) {}
+  explicit HConstantFolding(HGraph* graph, const char* name = kConstantFoldingPassName)
+      : HOptimization(graph, true, name) {}
 
   void Run() OVERRIDE;
 
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index 02ad675..422223f 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -572,14 +572,19 @@
   };
 
   // Expected difference after dead code elimination.
-  diff_t expected_dce_diff = {
-    { "  3: IntConstant\n",     removed },
-    { "  13: IntConstant\n",    removed },
-    { "  18: IntConstant\n",    removed },
-    { "  24: IntConstant\n",    removed },
-    { "  34: IntConstant\n",    removed },
-  };
-  std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
+  std::string expected_after_dce =
+    "BasicBlock 0, succ: 1\n"
+    "  5: IntConstant []\n"
+    "  30: SuspendCheck\n"
+    "  32: IntConstant []\n"
+    "  33: IntConstant []\n"
+    "  35: IntConstant [28]\n"
+    "  31: Goto 1\n"
+    "BasicBlock 1, pred: 0, succ: 5\n"
+    "  21: SuspendCheck\n"
+    "  28: Return(35)\n"
+    "BasicBlock 5, pred: 1\n"
+    "  29: Exit\n";
 
   TestCode(data,
            expected_before,
@@ -647,13 +652,15 @@
     ASSERT_EQ(inst->AsIntConstant()->GetValue(), 1);
   };
 
-  // Expected difference after dead code elimination.
-  diff_t expected_dce_diff = {
-    { "  3: IntConstant [9, 15, 22]\n", "  3: IntConstant [9, 22]\n" },
-    { "  22: Phi(3, 5) [15]\n",         "  22: Phi(3, 5)\n" },
-    { "  15: Add(22, 3)\n",             removed }
-  };
-  std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
+  // Expected graph after dead code elimination.
+  std::string expected_after_dce =
+    "BasicBlock 0, succ: 1\n"
+    "  19: SuspendCheck\n"
+    "  20: Goto 1\n"
+    "BasicBlock 1, pred: 0, succ: 4\n"
+    "  17: ReturnVoid\n"
+    "BasicBlock 4, pred: 1\n"
+    "  18: Exit\n";
 
   TestCode(data,
            expected_before,
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 8045cc5..91cd60a 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -20,10 +20,78 @@
 
 namespace art {
 
-void HDeadCodeElimination::Run() {
+static void MarkReachableBlocks(HBasicBlock* block, ArenaBitVector* visited) {
+  int block_id = block->GetBlockId();
+  if (visited->IsBitSet(block_id)) {
+    return;
+  }
+  visited->SetBit(block_id);
+
+  HInstruction* last_instruction = block->GetLastInstruction();
+  if (last_instruction->IsIf()) {
+    HIf* if_instruction = last_instruction->AsIf();
+    HInstruction* condition = if_instruction->InputAt(0);
+    if (!condition->IsIntConstant()) {
+      MarkReachableBlocks(if_instruction->IfTrueSuccessor(), visited);
+      MarkReachableBlocks(if_instruction->IfFalseSuccessor(), visited);
+    } else if (condition->AsIntConstant()->IsOne()) {
+      MarkReachableBlocks(if_instruction->IfTrueSuccessor(), visited);
+    } else {
+      DCHECK(condition->AsIntConstant()->IsZero());
+      MarkReachableBlocks(if_instruction->IfFalseSuccessor(), visited);
+    }
+  } else {
+    for (size_t i = 0, e = block->GetSuccessors().Size(); i < e; ++i) {
+      MarkReachableBlocks(block->GetSuccessors().Get(i), visited);
+    }
+  }
+}
+
+void HDeadCodeElimination::MaybeRecordDeadBlock(HBasicBlock* block) {
+  if (stats_ != nullptr) {
+    stats_->RecordStat(MethodCompilationStat::kRemovedDeadInstruction,
+                       block->GetPhis().CountSize() + block->GetInstructions().CountSize());
+  }
+}
+
+void HDeadCodeElimination::RemoveDeadBlocks() {
+  // Classify blocks as reachable/unreachable.
+  ArenaAllocator* allocator = graph_->GetArena();
+  ArenaBitVector live_blocks(allocator, graph_->GetBlocks().Size(), false);
+  MarkReachableBlocks(graph_->GetEntryBlock(), &live_blocks);
+
+  // Remove all dead blocks. Process blocks in post-order, because removal needs
+  // the block's chain of dominators.
+  for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block  = it.Current();
+    if (live_blocks.IsBitSet(block->GetBlockId())) {
+      continue;
+    }
+    MaybeRecordDeadBlock(block);
+    block->DisconnectAndDelete();
+  }
+
+  // Connect successive blocks created by dead branches. Order does not matter.
+  for (HReversePostOrderIterator it(*graph_); !it.Done();) {
+    HBasicBlock* block  = it.Current();
+    if (block->IsEntryBlock() || block->GetSuccessors().Size() != 1u) {
+      it.Advance();
+      continue;
+    }
+    HBasicBlock* successor = block->GetSuccessors().Get(0);
+    if (successor->IsExitBlock() || successor->GetPredecessors().Size() != 1u) {
+      it.Advance();
+      continue;
+    }
+    block->MergeWith(successor);
+
+    // Reiterate on this block in case it can be merged with its new successor.
+  }
+}
+
+void HDeadCodeElimination::RemoveDeadInstructions() {
   // Process basic blocks in post-order in the dominator tree, so that
-  // a dead instruction depending on another dead instruction is
-  // removed.
+  // a dead instruction depending on another dead instruction is removed.
   for (HPostOrderIterator b(*graph_); !b.Done(); b.Advance()) {
     HBasicBlock* block = b.Current();
     // Traverse this block's instructions in backward order and remove
@@ -47,4 +115,9 @@
   }
 }
 
+void HDeadCodeElimination::Run() {
+  RemoveDeadBlocks();
+  RemoveDeadInstructions();
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
index cee9364..0bea0fc 100644
--- a/compiler/optimizing/dead_code_elimination.h
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -40,6 +40,10 @@
     "dead_code_elimination";
 
  private:
+  void MaybeRecordDeadBlock(HBasicBlock* block);
+  void RemoveDeadBlocks();
+  void RemoveDeadInstructions();
+
   DISALLOW_COPY_AND_ASSIGN(HDeadCodeElimination);
 };
 
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index 98ae1ec..3209d3e 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -169,20 +169,25 @@
     "BasicBlock 5, pred: 4\n"
     "  28: Exit\n";
 
-  // Expected difference after dead code elimination.
-  diff_t expected_diff = {
-    { "  13: IntConstant [14]\n", removed },
-    { "  24: IntConstant [25]\n", removed },
-    { "  14: Add(19, 13) [25]\n", removed },
-    // The SuspendCheck instruction following this Add instruction
-    // inserts the latter in an environment, thus making it "used" and
-    // therefore non removable.  It ensues that some other Add and
-    // IntConstant instructions cannot be removed, as they are direct
-    // or indirect inputs of the initial Add instruction.
-    { "  19: Add(9, 18) [14]\n",  "  19: Add(9, 18) []\n" },
-    { "  25: Add(14, 24)\n",      removed },
-  };
-  std::string expected_after = Patch(expected_before, expected_diff);
+  // The SuspendCheck instruction following this Add instruction
+  // inserts the latter in an environment, thus making it "used" and
+  // therefore non removable.  It ensures that some other Add and
+  // IntConstant instructions cannot be removed, as they are direct
+  // or indirect inputs of the initial Add instruction.
+  std::string expected_after =
+    "BasicBlock 0, succ: 1\n"
+    "  3: IntConstant [9]\n"
+    "  5: IntConstant [9]\n"
+    "  18: IntConstant [19]\n"
+    "  29: SuspendCheck\n"
+    "  30: Goto 1\n"
+    "BasicBlock 1, pred: 0, succ: 5\n"
+    "  9: Add(3, 5) [19]\n"
+    "  19: Add(9, 18) []\n"
+    "  21: SuspendCheck\n"
+    "  27: ReturnVoid\n"
+    "BasicBlock 5, pred: 1\n"
+    "  28: Exit\n";
 
   TestCode(data, expected_before, expected_after);
 }
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 8950635..dc3124b 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -121,6 +121,18 @@
   }
 }
 
+void GraphChecker::VisitBoundsCheck(HBoundsCheck* check) {
+  if (!GetGraph()->HasBoundsChecks()) {
+    AddError(StringPrintf("Instruction %s:%d is a HBoundsCheck, "
+                          "but HasBoundsChecks() returns false",
+                          check->DebugName(),
+                          check->GetId()));
+  }
+
+  // Perform the instruction base checks too.
+  VisitInstruction(check);
+}
+
 void GraphChecker::VisitInstruction(HInstruction* instruction) {
   if (seen_ids_.IsBitSet(instruction->GetId())) {
     AddError(StringPrintf("Instruction id %d is duplicate in graph.",
@@ -191,6 +203,30 @@
   }
 }
 
+void GraphChecker::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  VisitInstruction(invoke);
+
+  if (invoke->IsStaticWithExplicitClinitCheck()) {
+    size_t last_input_index = invoke->InputCount() - 1;
+    HInstruction* last_input = invoke->InputAt(last_input_index);
+    if (last_input == nullptr) {
+      AddError(StringPrintf("Static invoke %s:%d marked as having an explicit clinit check "
+                            "has a null pointer as last input.",
+                            invoke->DebugName(),
+                            invoke->GetId()));
+    }
+    if (!last_input->IsClinitCheck() && !last_input->IsLoadClass()) {
+      AddError(StringPrintf("Static invoke %s:%d marked as having an explicit clinit check "
+                            "has a last instruction (%s:%d) which is neither a clinit check "
+                            "nor a load class instruction.",
+                            invoke->DebugName(),
+                            invoke->GetId(),
+                            last_input->DebugName(),
+                            last_input->GetId()));
+    }
+  }
+}
+
 void SSAChecker::VisitBasicBlock(HBasicBlock* block) {
   super_type::VisitBasicBlock(block);
 
@@ -264,6 +300,8 @@
     }
   }
 
+  const ArenaBitVector& loop_blocks = loop_header->GetLoopInformation()->GetBlocks();
+
   // Ensure there is only one back edge per loop.
   size_t num_back_edges =
     loop_header->GetLoopInformation()->GetBackEdges().Size();
@@ -276,16 +314,27 @@
         "Loop defined by header %d has several back edges: %zu.",
         id,
         num_back_edges));
+  } else {
+    DCHECK_EQ(num_back_edges, 1u);
+    int back_edge_id = loop_header->GetLoopInformation()->GetBackEdges().Get(0)->GetBlockId();
+    if (!loop_blocks.IsBitSet(back_edge_id)) {
+      AddError(StringPrintf(
+          "Loop defined by header %d has an invalid back edge %d.",
+          id,
+          back_edge_id));
+    }
   }
 
-  // Ensure all blocks in the loop are dominated by the loop header.
-  const ArenaBitVector& loop_blocks =
-    loop_header->GetLoopInformation()->GetBlocks();
+  // Ensure all blocks in the loop are live and dominated by the loop header.
   for (uint32_t i : loop_blocks.Indexes()) {
     HBasicBlock* loop_block = GetGraph()->GetBlocks().Get(i);
-    if (!loop_header->Dominates(loop_block)) {
+    if (loop_block == nullptr) {
+      AddError(StringPrintf("Loop defined by header %d contains a previously removed block %d.",
+                            id,
+                            i));
+    } else if (!loop_header->Dominates(loop_block)) {
       AddError(StringPrintf("Loop block %d not dominated by loop header %d.",
-                            loop_block->GetBlockId(),
+                            i,
                             id));
     }
   }
@@ -296,7 +345,7 @@
     if (!loop_blocks.IsSubsetOf(&outer_info->GetBlocks())) {
       AddError(StringPrintf("Blocks of loop defined by header %d are not a subset of blocks of "
                             "an outer loop defined by header %d.",
-                            loop_header->GetBlockId(),
+                            id,
                             outer_info->GetHeader()->GetBlockId()));
     }
   }
@@ -483,7 +532,7 @@
           Primitive::PrettyDescriptor(op->InputAt(1)->GetType())));
     }
   } else {
-    if (PrimitiveKind(op->InputAt(1)->GetType()) != PrimitiveKind(op->InputAt(0)->GetType())) {
+    if (PrimitiveKind(op->InputAt(0)->GetType()) != PrimitiveKind(op->InputAt(1)->GetType())) {
       AddError(StringPrintf(
           "Binary operation %s %d has inputs of different types: "
           "%s, and %s.",
@@ -508,7 +557,7 @@
           "from its input type: %s vs %s.",
           op->DebugName(), op->GetId(),
           Primitive::PrettyDescriptor(op->GetType()),
-          Primitive::PrettyDescriptor(op->InputAt(1)->GetType())));
+          Primitive::PrettyDescriptor(op->InputAt(0)->GetType())));
     }
   }
 }
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index 24fee37..b4314da 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -42,6 +42,12 @@
   // Check `instruction`.
   void VisitInstruction(HInstruction* instruction) OVERRIDE;
 
+  // Perform control-flow graph checks on instruction.
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
+
+  // Check that the HasBoundsChecks() flag is set for bounds checks.
+  void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE;
+
   // Was the last visit of the graph valid?
   bool IsValid() const {
     return errors_.empty();
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index bffd639..ada32db 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -130,6 +130,16 @@
     return false;
   }
 
+  if (invoke_instruction->IsInvokeStaticOrDirect() &&
+      invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) {
+    // Case of a static method that cannot be inlined because it implicitly
+    // requires an initialization check of its declaring class.
+    VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+                   << " is not inlined because it is static and requires a clinit"
+                   << " check that cannot be emitted due to Dex cache limitations";
+    return false;
+  }
+
   if (!TryBuildAndInline(resolved_method, invoke_instruction, method_index, can_use_dex_cache)) {
     resolved_method->SetShouldNotInline();
     return false;
@@ -258,8 +268,8 @@
 
   callee_graph->InlineInto(graph_, invoke_instruction);
 
-  if (callee_graph->HasArrayAccesses()) {
-    graph_->SetHasArrayAccesses(true);
+  if (callee_graph->HasBoundsChecks()) {
+    graph_->SetHasBoundsChecks(true);
   }
 
   return true;
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 932192e..abdf04e 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -79,6 +79,7 @@
 
 static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM* codegen) {
   if (invoke->InputCount() == 0) {
+    // No argument to move.
     return;
   }
 
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 117d6a4..7a753b2 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -88,6 +88,7 @@
 
 static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM64* codegen) {
   if (invoke->InputCount() == 0) {
+    // No argument to move.
     return;
   }
 
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index a8e2cdf..7275edb 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -113,6 +113,7 @@
 
 static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86* codegen) {
   if (invoke->InputCount() == 0) {
+    // No argument to move.
     return;
   }
 
@@ -1038,7 +1039,7 @@
                                                            LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::RequiresRegister());
-  HInstruction *value = invoke->InputAt(1);
+  HInstruction* value = invoke->InputAt(1);
   if (size == Primitive::kPrimByte) {
     locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
   } else {
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 5d24d1f..35daaf6 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -105,6 +105,7 @@
 
 static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) {
   if (invoke->InputCount() == 0) {
+    // No argument to move.
     return;
   }
 
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 6ab57b8..e2eb46a 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -303,25 +303,6 @@
   return cached_null_constant_;
 }
 
-template <class InstructionType, typename ValueType>
-InstructionType* HGraph::CreateConstant(ValueType value,
-                                        ArenaSafeMap<ValueType, InstructionType*>* cache) {
-  // Try to find an existing constant of the given value.
-  InstructionType* constant = nullptr;
-  auto cached_constant = cache->find(value);
-  if (cached_constant != cache->end()) {
-    constant = cached_constant->second;
-  }
-
-  // If not found or previously deleted, create and cache a new instruction.
-  if (constant == nullptr || constant->GetBlock() == nullptr) {
-    constant = new (arena_) InstructionType(value);
-    cache->Overwrite(value, constant);
-    InsertConstant(constant);
-  }
-  return constant;
-}
-
 HConstant* HGraph::GetConstant(Primitive::Type type, int64_t value) {
   switch (type) {
     case Primitive::Type::kPrimBoolean:
@@ -343,6 +324,18 @@
   }
 }
 
+void HGraph::CacheFloatConstant(HFloatConstant* constant) {
+  int32_t value = bit_cast<int32_t, float>(constant->GetValue());
+  DCHECK(cached_float_constants_.find(value) == cached_float_constants_.end());
+  cached_float_constants_.Overwrite(value, constant);
+}
+
+void HGraph::CacheDoubleConstant(HDoubleConstant* constant) {
+  int64_t value = bit_cast<int64_t, double>(constant->GetValue());
+  DCHECK(cached_double_constants_.find(value) == cached_double_constants_.end());
+  cached_double_constants_.Overwrite(value, constant);
+}
+
 void HLoopInformation::Add(HBasicBlock* block) {
   blocks_.SetBit(block->GetBlockId());
 }
@@ -481,6 +474,7 @@
 }
 
 void HBasicBlock::RemoveInstruction(HInstruction* instruction, bool ensure_safety) {
+  DCHECK(!instruction->IsPhi());
   Remove(&instructions_, this, instruction, ensure_safety);
 }
 
@@ -488,6 +482,14 @@
   Remove(&phis_, this, phi, ensure_safety);
 }
 
+void HBasicBlock::RemoveInstructionOrPhi(HInstruction* instruction, bool ensure_safety) {
+  if (instruction->IsPhi()) {
+    RemovePhi(instruction->AsPhi(), ensure_safety);
+  } else {
+    RemoveInstruction(instruction, ensure_safety);
+  }
+}
+
 void HEnvironment::CopyFrom(HEnvironment* env) {
   for (size_t i = 0; i < env->Size(); i++) {
     HInstruction* instruction = env->GetInstructionAt(i);
@@ -498,6 +500,28 @@
   }
 }
 
+void HEnvironment::CopyFromWithLoopPhiAdjustment(HEnvironment* env,
+                                                 HBasicBlock* loop_header) {
+  DCHECK(loop_header->IsLoopHeader());
+  for (size_t i = 0; i < env->Size(); i++) {
+    HInstruction* instruction = env->GetInstructionAt(i);
+    SetRawEnvAt(i, instruction);
+    if (instruction == nullptr) {
+      continue;
+    }
+    if (instruction->IsLoopHeaderPhi() && (instruction->GetBlock() == loop_header)) {
+      // At the end of the loop pre-header, the corresponding value for instruction
+      // is the first input of the phi.
+      HInstruction* initial = instruction->AsPhi()->InputAt(0);
+      DCHECK(initial->GetBlock()->Dominates(loop_header));
+      SetRawEnvAt(i, initial);
+      initial->AddEnvUseAt(this, i);
+    } else {
+      instruction->AddEnvUseAt(this, i);
+    }
+  }
+}
+
 void HEnvironment::RemoveAsUserOfInput(size_t index) const {
   const HUserRecord<HEnvironment*> user_record = vregs_.Get(index);
   user_record.GetInstruction()->RemoveEnvironmentUser(user_record.GetUseNode());
@@ -672,6 +696,11 @@
   input->AddUseAt(this, inputs_.Size() - 1);
 }
 
+void HPhi::RemoveInputAt(size_t index) {
+  RemoveAsUserOfInput(index);
+  inputs_.DeleteAt(index);
+}
+
 #define DEFINE_ACCEPT(name, super)                                             \
 void H##name::Accept(HGraphVisitor* visitor) {                                 \
   visitor->Visit##name(this);                                                  \
@@ -867,6 +896,15 @@
   return !GetPhis().IsEmpty() && GetFirstPhi()->GetNext() == nullptr;
 }
 
+size_t HInstructionList::CountSize() const {
+  size_t size = 0;
+  HInstruction* current = first_instruction_;
+  for (; current != nullptr; current = current->GetNext()) {
+    size++;
+  }
+  return size;
+}
+
 void HInstructionList::SetBlockOfInstructions(HBasicBlock* block) const {
   for (HInstruction* current = first_instruction_;
        current != nullptr;
@@ -898,40 +936,167 @@
   }
 }
 
-void HBasicBlock::DisconnectFromAll() {
-  DCHECK(dominated_blocks_.IsEmpty()) << "Unimplemented scenario";
+void HBasicBlock::DisconnectAndDelete() {
+  // Dominators must be removed after all the blocks they dominate. This way
+  // a loop header is removed last, a requirement for correct loop information
+  // iteration.
+  DCHECK(dominated_blocks_.IsEmpty());
 
+  // Remove the block from all loops it is included in.
+  for (HLoopInformationOutwardIterator it(*this); !it.Done(); it.Advance()) {
+    HLoopInformation* loop_info = it.Current();
+    loop_info->Remove(this);
+    if (loop_info->IsBackEdge(*this)) {
+      // This deliberately leaves the loop in an inconsistent state and will
+      // fail SSAChecker unless the entire loop is removed during the pass.
+      loop_info->RemoveBackEdge(this);
+    }
+  }
+
+  // Disconnect the block from its predecessors and update their control-flow
+  // instructions.
   for (size_t i = 0, e = predecessors_.Size(); i < e; ++i) {
-    predecessors_.Get(i)->successors_.Delete(this);
+    HBasicBlock* predecessor = predecessors_.Get(i);
+    HInstruction* last_instruction = predecessor->GetLastInstruction();
+    predecessor->RemoveInstruction(last_instruction);
+    predecessor->RemoveSuccessor(this);
+    if (predecessor->GetSuccessors().Size() == 1u) {
+      DCHECK(last_instruction->IsIf());
+      predecessor->AddInstruction(new (graph_->GetArena()) HGoto());
+    } else {
+      // The predecessor has no remaining successors and therefore must be dead.
+      // We deliberately leave it without a control-flow instruction so that the
+      // SSAChecker fails unless it is not removed during the pass too.
+      DCHECK_EQ(predecessor->GetSuccessors().Size(), 0u);
+    }
   }
-  for (size_t i = 0, e = successors_.Size(); i < e; ++i) {
-    successors_.Get(i)->predecessors_.Delete(this);
-  }
-  dominator_->dominated_blocks_.Delete(this);
-
   predecessors_.Reset();
+
+  // Disconnect the block from its successors and update their dominators
+  // and phis.
+  for (size_t i = 0, e = successors_.Size(); i < e; ++i) {
+    HBasicBlock* successor = successors_.Get(i);
+    // Delete this block from the list of predecessors.
+    size_t this_index = successor->GetPredecessorIndexOf(this);
+    successor->predecessors_.DeleteAt(this_index);
+
+    // Check that `successor` has other predecessors, otherwise `this` is the
+    // dominator of `successor` which violates the order DCHECKed at the top.
+    DCHECK(!successor->predecessors_.IsEmpty());
+
+    // Recompute the successor's dominator.
+    HBasicBlock* old_dominator = successor->GetDominator();
+    HBasicBlock* new_dominator = successor->predecessors_.Get(0);
+    for (size_t j = 1, f = successor->predecessors_.Size(); j < f; ++j) {
+      new_dominator = graph_->FindCommonDominator(
+          new_dominator, successor->predecessors_.Get(j));
+    }
+    if (old_dominator != new_dominator) {
+      successor->SetDominator(new_dominator);
+      old_dominator->RemoveDominatedBlock(successor);
+      new_dominator->AddDominatedBlock(successor);
+    }
+
+    // Remove this block's entries in the successor's phis.
+    if (successor->predecessors_.Size() == 1u) {
+      // The successor has just one predecessor left. Replace phis with the only
+      // remaining input.
+      for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+        HPhi* phi = phi_it.Current()->AsPhi();
+        phi->ReplaceWith(phi->InputAt(1 - this_index));
+        successor->RemovePhi(phi);
+      }
+    } else {
+      for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+        phi_it.Current()->AsPhi()->RemoveInputAt(this_index);
+      }
+    }
+  }
   successors_.Reset();
-  dominator_ = nullptr;
-  graph_ = nullptr;
+
+  // Disconnect from the dominator.
+  dominator_->RemoveDominatedBlock(this);
+  SetDominator(nullptr);
+
+  // Delete from the graph. The function safely deletes remaining instructions
+  // and updates the reverse post order.
+  graph_->DeleteDeadBlock(this);
+  SetGraph(nullptr);
 }
 
 void HBasicBlock::MergeWith(HBasicBlock* other) {
-  DCHECK(successors_.IsEmpty()) << "Unimplemented block merge scenario";
-  DCHECK(dominated_blocks_.IsEmpty()
-         || (dominated_blocks_.Size() == 1 && dominated_blocks_.Get(0) == other))
-      << "Unimplemented block merge scenario";
+  DCHECK_EQ(GetGraph(), other->GetGraph());
+  DCHECK(GetDominatedBlocks().Contains(other));
+  DCHECK_EQ(GetSuccessors().Size(), 1u);
+  DCHECK_EQ(GetSuccessors().Get(0), other);
+  DCHECK_EQ(other->GetPredecessors().Size(), 1u);
+  DCHECK_EQ(other->GetPredecessors().Get(0), this);
   DCHECK(other->GetPhis().IsEmpty());
 
-  successors_.Reset();
-  dominated_blocks_.Reset();
+  // Move instructions from `other` to `this`.
+  DCHECK(EndsWithControlFlowInstruction());
+  RemoveInstruction(GetLastInstruction());
   instructions_.Add(other->GetInstructions());
-  other->GetInstructions().SetBlockOfInstructions(this);
+  other->instructions_.SetBlockOfInstructions(this);
+  other->instructions_.Clear();
 
-  while (!other->GetSuccessors().IsEmpty()) {
-    HBasicBlock* successor = other->GetSuccessors().Get(0);
+  // Remove `other` from the loops it is included in.
+  for (HLoopInformationOutwardIterator it(*other); !it.Done(); it.Advance()) {
+    HLoopInformation* loop_info = it.Current();
+    loop_info->Remove(other);
+    if (loop_info->IsBackEdge(*other)) {
+      loop_info->ClearBackEdges();
+      loop_info->AddBackEdge(this);
+    }
+  }
+
+  // Update links to the successors of `other`.
+  successors_.Reset();
+  while (!other->successors_.IsEmpty()) {
+    HBasicBlock* successor = other->successors_.Get(0);
     successor->ReplacePredecessor(other, this);
   }
 
+  // Update the dominator tree.
+  dominated_blocks_.Delete(other);
+  for (size_t i = 0, e = other->GetDominatedBlocks().Size(); i < e; ++i) {
+    HBasicBlock* dominated = other->GetDominatedBlocks().Get(i);
+    dominated_blocks_.Add(dominated);
+    dominated->SetDominator(this);
+  }
+  other->dominated_blocks_.Reset();
+  other->dominator_ = nullptr;
+
+  // Clear the list of predecessors of `other` in preparation of deleting it.
+  other->predecessors_.Reset();
+
+  // Delete `other` from the graph. The function updates reverse post order.
+  graph_->DeleteDeadBlock(other);
+  other->SetGraph(nullptr);
+}
+
+void HBasicBlock::MergeWithInlined(HBasicBlock* other) {
+  DCHECK_NE(GetGraph(), other->GetGraph());
+  DCHECK(GetDominatedBlocks().IsEmpty());
+  DCHECK(GetSuccessors().IsEmpty());
+  DCHECK(!EndsWithControlFlowInstruction());
+  DCHECK_EQ(other->GetPredecessors().Size(), 1u);
+  DCHECK(other->GetPredecessors().Get(0)->IsEntryBlock());
+  DCHECK(other->GetPhis().IsEmpty());
+  DCHECK(!other->IsInLoop());
+
+  // Move instructions from `other` to `this`.
+  instructions_.Add(other->GetInstructions());
+  other->instructions_.SetBlockOfInstructions(this);
+
+  // Update links to the successors of `other`.
+  successors_.Reset();
+  while (!other->successors_.IsEmpty()) {
+    HBasicBlock* successor = other->successors_.Get(0);
+    successor->ReplacePredecessor(other, this);
+  }
+
+  // Update the dominator tree.
   for (size_t i = 0, e = other->GetDominatedBlocks().Size(); i < e; ++i) {
     HBasicBlock* dominated = other->GetDominatedBlocks().Get(i);
     dominated_blocks_.Add(dominated);
@@ -973,6 +1138,24 @@
   }
 }
 
+void HGraph::DeleteDeadBlock(HBasicBlock* block) {
+  DCHECK_EQ(block->GetGraph(), this);
+  DCHECK(block->GetSuccessors().IsEmpty());
+  DCHECK(block->GetPredecessors().IsEmpty());
+  DCHECK(block->GetDominatedBlocks().IsEmpty());
+  DCHECK(block->GetDominator() == nullptr);
+
+  for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+    block->RemoveInstruction(it.Current());
+  }
+  for (HBackwardInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+    block->RemovePhi(it.Current()->AsPhi());
+  }
+
+  reverse_post_order_.Delete(block);
+  blocks_.Put(block->GetBlockId(), nullptr);
+}
+
 void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
   if (GetBlocks().Size() == 3) {
     // Simple case of an entry block, a body block, and an exit block.
@@ -1005,7 +1188,7 @@
 
     HBasicBlock* first = entry_block_->GetSuccessors().Get(0);
     DCHECK(!first->IsInLoop());
-    at->MergeWith(first);
+    at->MergeWithInlined(first);
     exit_block_->ReplaceWith(to);
 
     // Update all predecessors of the exit block (now the `to` block)
@@ -1113,7 +1296,7 @@
   // - Remove suspend checks, that hold an environment.
   // We must do this after the other blocks have been inlined, otherwise ids of
   // constants could overlap with the inner graph.
-  int parameter_index = 0;
+  size_t parameter_index = 0;
   for (HInstructionIterator it(entry_block_->GetInstructions()); !it.Done(); it.Advance()) {
     HInstruction* current = it.Current();
     if (current->IsNullConstant()) {
@@ -1122,10 +1305,19 @@
       current->ReplaceWith(outer_graph->GetIntConstant(current->AsIntConstant()->GetValue()));
     } else if (current->IsLongConstant()) {
       current->ReplaceWith(outer_graph->GetLongConstant(current->AsLongConstant()->GetValue()));
-    } else if (current->IsFloatConstant() || current->IsDoubleConstant()) {
-      // TODO: Don't duplicate floating-point constants.
-      current->MoveBefore(outer_graph->GetEntryBlock()->GetLastInstruction());
+    } else if (current->IsFloatConstant()) {
+      current->ReplaceWith(outer_graph->GetFloatConstant(current->AsFloatConstant()->GetValue()));
+    } else if (current->IsDoubleConstant()) {
+      current->ReplaceWith(outer_graph->GetDoubleConstant(current->AsDoubleConstant()->GetValue()));
     } else if (current->IsParameterValue()) {
+      if (kIsDebugBuild
+          && invoke->IsInvokeStaticOrDirect()
+          && invoke->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck()) {
+        // Ensure we do not use the last input of `invoke`, as it
+        // contains a clinit check which is not an actual argument.
+        size_t last_input_index = invoke->InputCount() - 1;
+        DCHECK(parameter_index != last_input_index);
+      }
       current->ReplaceWith(invoke->InputAt(parameter_index++));
     } else {
       DCHECK(current->IsGoto() || current->IsSuspendCheck());
@@ -1137,53 +1329,6 @@
   invoke->GetBlock()->RemoveInstruction(invoke);
 }
 
-void HGraph::MergeEmptyBranches(HBasicBlock* start_block, HBasicBlock* end_block) {
-  // Find the two branches of an If.
-  DCHECK_EQ(start_block->GetSuccessors().Size(), 2u);
-  HBasicBlock* left_branch = start_block->GetSuccessors().Get(0);
-  HBasicBlock* right_branch = start_block->GetSuccessors().Get(1);
-
-  // Make sure this is a diamond control-flow path.
-  DCHECK_EQ(left_branch->GetSuccessors().Get(0), end_block);
-  DCHECK_EQ(right_branch->GetSuccessors().Get(0), end_block);
-  DCHECK_EQ(end_block->GetPredecessors().Size(), 2u);
-  DCHECK_EQ(start_block, end_block->GetDominator());
-
-  // Disconnect the branches and merge the two blocks. This will move
-  // all instructions from 'end_block' to 'start_block'.
-  DCHECK(left_branch->IsSingleGoto());
-  DCHECK(right_branch->IsSingleGoto());
-  left_branch->DisconnectFromAll();
-  right_branch->DisconnectFromAll();
-  start_block->RemoveInstruction(start_block->GetLastInstruction());
-  start_block->MergeWith(end_block);
-
-  // Delete the now redundant blocks from the graph.
-  blocks_.Put(left_branch->GetBlockId(), nullptr);
-  blocks_.Put(right_branch->GetBlockId(), nullptr);
-  blocks_.Put(end_block->GetBlockId(), nullptr);
-
-  // Update reverse post order.
-  reverse_post_order_.Delete(left_branch);
-  reverse_post_order_.Delete(right_branch);
-  reverse_post_order_.Delete(end_block);
-
-  // Update loops which contain the code.
-  for (HLoopInformationOutwardIterator it(*start_block); !it.Done(); it.Advance()) {
-    HLoopInformation* loop_info = it.Current();
-    DCHECK(loop_info->Contains(*left_branch));
-    DCHECK(loop_info->Contains(*right_branch));
-    DCHECK(loop_info->Contains(*end_block));
-    loop_info->Remove(left_branch);
-    loop_info->Remove(right_branch);
-    loop_info->Remove(end_block);
-    if (loop_info->IsBackEdge(*end_block)) {
-      loop_info->RemoveBackEdge(end_block);
-      loop_info->AddBackEdge(start_block);
-    }
-  }
-}
-
 std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) {
   ScopedObjectAccess soa(Thread::Current());
   os << "["
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index b89487f..0533bff 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -97,6 +97,9 @@
   void AddAfter(HInstruction* cursor, const HInstructionList& instruction_list);
   void Add(const HInstructionList& instruction_list);
 
+  // Return the number of instructions in the list. This is an expensive operation.
+  size_t CountSize() const;
+
  private:
   HInstruction* first_instruction_;
   HInstruction* last_instruction_;
@@ -124,12 +127,14 @@
         number_of_vregs_(0),
         number_of_in_vregs_(0),
         temporaries_vreg_slots_(0),
-        has_array_accesses_(false),
+        has_bounds_checks_(false),
         debuggable_(debuggable),
         current_instruction_id_(start_instruction_id),
         cached_null_constant_(nullptr),
         cached_int_constants_(std::less<int32_t>(), arena->Adapter()),
-        cached_long_constants_(std::less<int64_t>(), arena->Adapter()) {}
+        cached_float_constants_(std::less<int32_t>(), arena->Adapter()),
+        cached_long_constants_(std::less<int64_t>(), arena->Adapter()),
+        cached_double_constants_(std::less<int64_t>(), arena->Adapter()) {}
 
   ArenaAllocator* GetArena() const { return arena_; }
   const GrowableArray<HBasicBlock*>& GetBlocks() const { return blocks_; }
@@ -168,7 +173,8 @@
   // Inline this graph in `outer_graph`, replacing the given `invoke` instruction.
   void InlineInto(HGraph* outer_graph, HInvoke* invoke);
 
-  void MergeEmptyBranches(HBasicBlock* start_block, HBasicBlock* end_block);
+  // Removes `block` from the graph.
+  void DeleteDeadBlock(HBasicBlock* block);
 
   void SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor);
   void SimplifyLoop(HBasicBlock* header);
@@ -226,19 +232,19 @@
     return linear_order_;
   }
 
-  bool HasArrayAccesses() const {
-    return has_array_accesses_;
+  bool HasBoundsChecks() const {
+    return has_bounds_checks_;
   }
 
-  void SetHasArrayAccesses(bool value) {
-    has_array_accesses_ = value;
+  void SetHasBoundsChecks(bool value) {
+    has_bounds_checks_ = value;
   }
 
   bool IsDebuggable() const { return debuggable_; }
 
   // Returns a constant of the given type and value. If it does not exist
-  // already, it is created and inserted into the graph. Only integral types
-  // are currently supported.
+  // already, it is created and inserted into the graph. This method is only for
+  // integral types.
   HConstant* GetConstant(Primitive::Type type, int64_t value);
   HNullConstant* GetNullConstant();
   HIntConstant* GetIntConstant(int32_t value) {
@@ -247,9 +253,16 @@
   HLongConstant* GetLongConstant(int64_t value) {
     return CreateConstant(value, &cached_long_constants_);
   }
+  HFloatConstant* GetFloatConstant(float value) {
+    return CreateConstant(bit_cast<int32_t, float>(value), &cached_float_constants_);
+  }
+  HDoubleConstant* GetDoubleConstant(double value) {
+    return CreateConstant(bit_cast<int64_t, double>(value), &cached_double_constants_);
+  }
+
+  HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const;
 
  private:
-  HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const;
   void VisitBlockForDominatorTree(HBasicBlock* block,
                                   HBasicBlock* predecessor,
                                   GrowableArray<size_t>* visits);
@@ -260,10 +273,34 @@
   void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
   void RemoveDeadBlocks(const ArenaBitVector& visited);
 
-  template <class InstType, typename ValueType>
-  InstType* CreateConstant(ValueType value, ArenaSafeMap<ValueType, InstType*>* cache);
+  template <class InstructionType, typename ValueType>
+  InstructionType* CreateConstant(ValueType value,
+                                  ArenaSafeMap<ValueType, InstructionType*>* cache) {
+    // Try to find an existing constant of the given value.
+    InstructionType* constant = nullptr;
+    auto cached_constant = cache->find(value);
+    if (cached_constant != cache->end()) {
+      constant = cached_constant->second;
+    }
+
+    // If not found or previously deleted, create and cache a new instruction.
+    if (constant == nullptr || constant->GetBlock() == nullptr) {
+      constant = new (arena_) InstructionType(value);
+      cache->Overwrite(value, constant);
+      InsertConstant(constant);
+    }
+    return constant;
+  }
+
   void InsertConstant(HConstant* instruction);
 
+  // Cache a float constant into the graph. This method should only be
+  // called by the SsaBuilder when creating "equivalent" instructions.
+  void CacheFloatConstant(HFloatConstant* constant);
+
+  // See CacheFloatConstant comment.
+  void CacheDoubleConstant(HDoubleConstant* constant);
+
   ArenaAllocator* const arena_;
 
   // List of blocks in insertion order.
@@ -290,8 +327,8 @@
   // Number of vreg size slots that the temporaries use (used in baseline compiler).
   size_t temporaries_vreg_slots_;
 
-  // Has array accesses. We can totally skip BCE if it's false.
-  bool has_array_accesses_;
+  // Has bounds checks. We can totally skip BCE if it's false.
+  bool has_bounds_checks_;
 
   // Indicates whether the graph should be compiled in a way that
   // ensures full debuggability. If false, we can apply more
@@ -301,11 +338,14 @@
   // The current id to assign to a newly added instruction. See HInstruction.id_.
   int32_t current_instruction_id_;
 
-  // Cached common constants often needed by optimization passes.
+  // Cached constants.
   HNullConstant* cached_null_constant_;
   ArenaSafeMap<int32_t, HIntConstant*> cached_int_constants_;
+  ArenaSafeMap<int32_t, HFloatConstant*> cached_float_constants_;
   ArenaSafeMap<int64_t, HLongConstant*> cached_long_constants_;
+  ArenaSafeMap<int64_t, HDoubleConstant*> cached_double_constants_;
 
+  friend class SsaBuilder;           // For caching constants.
   friend class SsaLivenessAnalysis;  // For the linear order.
   ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
   DISALLOW_COPY_AND_ASSIGN(HGraph);
@@ -451,6 +491,7 @@
   HBasicBlock* GetDominator() const { return dominator_; }
   void SetDominator(HBasicBlock* dominator) { dominator_ = dominator; }
   void AddDominatedBlock(HBasicBlock* block) { dominated_blocks_.Add(block); }
+  void RemoveDominatedBlock(HBasicBlock* block) { dominated_blocks_.Delete(block); }
   void ReplaceDominatedBlock(HBasicBlock* existing, HBasicBlock* new_block) {
     for (size_t i = 0, e = dominated_blocks_.Size(); i < e; ++i) {
       if (dominated_blocks_.Get(i) == existing) {
@@ -520,6 +561,13 @@
     predecessors_.Put(1, temp);
   }
 
+  void SwapSuccessors() {
+    DCHECK_EQ(successors_.Size(), 2u);
+    HBasicBlock* temp = successors_.Get(0);
+    successors_.Put(0, successors_.Get(1));
+    successors_.Put(1, temp);
+  }
+
   size_t GetPredecessorIndexOf(HBasicBlock* predecessor) {
     for (size_t i = 0, e = predecessors_.Size(); i < e; ++i) {
       if (predecessors_.Get(i) == predecessor) {
@@ -550,7 +598,7 @@
   // that this method does not update the graph, reverse post order, loop
   // information, nor make sure the blocks are consistent (for example ending
   // with a control flow instruction).
-  void MergeWith(HBasicBlock* other);
+  void MergeWithInlined(HBasicBlock* other);
 
   // Replace `this` with `other`. Predecessors, successors, and dominated blocks
   // of `this` are moved to `other`.
@@ -559,12 +607,17 @@
   // with a control flow instruction).
   void ReplaceWith(HBasicBlock* other);
 
-  // Disconnects `this` from all its predecessors, successors and the dominator.
-  // It assumes that `this` does not dominate any blocks.
-  // Note that this method does not update the graph, reverse post order, loop
-  // information, nor make sure the blocks are consistent (for example ending
-  // with a control flow instruction).
-  void DisconnectFromAll();
+  // Merge `other` at the end of `this`. This method updates loops, reverse post
+  // order, links to predecessors, successors, dominators and deletes the block
+  // from the graph. The two blocks must be successive, i.e. `this` the only
+  // predecessor of `other` and vice versa.
+  void MergeWith(HBasicBlock* other);
+
+  // Disconnects `this` from all its predecessors, successors and dominator,
+  // removes it from all loops it is included in and eventually from the graph.
+  // The block must not dominate any other block. Predecessors and successors
+  // are safely updated.
+  void DisconnectAndDelete();
 
   void AddInstruction(HInstruction* instruction);
   void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor);
@@ -578,6 +631,7 @@
   // instruction is not in use and removes it from the use lists of its inputs.
   void RemoveInstruction(HInstruction* instruction, bool ensure_safety = true);
   void RemovePhi(HPhi* phi, bool ensure_safety = true);
+  void RemoveInstructionOrPhi(HInstruction* instruction, bool ensure_safety = true);
 
   bool IsLoopHeader() const {
     return (loop_information_ != nullptr) && (loop_information_->GetHeader() == this);
@@ -996,6 +1050,10 @@
   }
 
   void CopyFrom(HEnvironment* env);
+  // Copy from `env`. If it's a loop phi for `loop_header`, copy the first
+  // input to the loop phi instead. This is for inserting instructions that
+  // require an environment (like HDeoptimization) in the loop pre-header.
+  void CopyFromWithLoopPhiAdjustment(HEnvironment* env, HBasicBlock* loop_header);
 
   void SetRawEnvAt(size_t index, HInstruction* instruction) {
     vregs_.Put(index, HUserRecord<HEnvironment*>(instruction));
@@ -1231,6 +1289,13 @@
     environment_->CopyFrom(environment);
   }
 
+  void CopyEnvironmentFromWithLoopPhiAdjustment(HEnvironment* environment,
+                                                HBasicBlock* block) {
+    ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena();
+    environment_ = new (allocator) HEnvironment(allocator, environment->Size());
+    environment_->CopyFromWithLoopPhiAdjustment(environment, block);
+  }
+
   // Returns the number of entries in the environment. Typically, that is the
   // number of dex registers in a method. It could be more in case of inlining.
   size_t EnvironmentSize() const;
@@ -2023,13 +2088,14 @@
 
  private:
   explicit HFloatConstant(float value) : HConstant(Primitive::kPrimFloat), value_(value) {}
+  explicit HFloatConstant(int32_t value)
+      : HConstant(Primitive::kPrimFloat), value_(bit_cast<float, int32_t>(value)) {}
 
   const float value_;
 
-  // Only the SsaBuilder can currently create floating-point constants. If we
-  // ever need to create them later in the pipeline, we will have to handle them
-  // the same way as integral constants.
+  // Only the SsaBuilder and HGraph can create floating-point constants.
   friend class SsaBuilder;
+  friend class HGraph;
   DISALLOW_COPY_AND_ASSIGN(HFloatConstant);
 };
 
@@ -2060,13 +2126,14 @@
 
  private:
   explicit HDoubleConstant(double value) : HConstant(Primitive::kPrimDouble), value_(value) {}
+  explicit HDoubleConstant(int64_t value)
+      : HConstant(Primitive::kPrimDouble), value_(bit_cast<double, int64_t>(value)) {}
 
   const double value_;
 
-  // Only the SsaBuilder can currently create floating-point constants. If we
-  // ever need to create them later in the pipeline, we will have to handle them
-  // the same way as integral constants.
+  // Only the SsaBuilder and HGraph can create floating-point constants.
   friend class SsaBuilder;
+  friend class HGraph;
   DISALLOW_COPY_AND_ASSIGN(HDoubleConstant);
 };
 
@@ -2211,6 +2278,14 @@
 
 class HInvokeStaticOrDirect : public HInvoke {
  public:
+  // Requirements of this method call regarding the class
+  // initialization (clinit) check of its declaring class.
+  enum class ClinitCheckRequirement {
+    kNone,      // Class already initialized.
+    kExplicit,  // Static call having explicit clinit check as last input.
+    kImplicit,  // Static call implicitly requiring a clinit check.
+  };
+
   HInvokeStaticOrDirect(ArenaAllocator* arena,
                         uint32_t number_of_arguments,
                         Primitive::Type return_type,
@@ -2218,11 +2293,13 @@
                         uint32_t dex_method_index,
                         bool is_recursive,
                         InvokeType original_invoke_type,
-                        InvokeType invoke_type)
+                        InvokeType invoke_type,
+                        ClinitCheckRequirement clinit_check_requirement)
       : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
         original_invoke_type_(original_invoke_type),
         invoke_type_(invoke_type),
-        is_recursive_(is_recursive) {}
+        is_recursive_(is_recursive),
+        clinit_check_requirement_(clinit_check_requirement) {}
 
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
     UNUSED(obj);
@@ -2236,12 +2313,60 @@
   bool IsRecursive() const { return is_recursive_; }
   bool NeedsDexCache() const OVERRIDE { return !IsRecursive(); }
 
+  // Is this instruction a call to a static method?
+  bool IsStatic() const {
+    return GetInvokeType() == kStatic;
+  }
+
+  // Remove the art::HClinitCheck or art::HLoadClass instruction as
+  // last input (only relevant for static calls with explicit clinit
+  // check).
+  void RemoveClinitCheckOrLoadClassAsLastInput() {
+    DCHECK(IsStaticWithExplicitClinitCheck());
+    size_t last_input_index = InputCount() - 1;
+    HInstruction* last_input = InputAt(last_input_index);
+    DCHECK(last_input != nullptr);
+    DCHECK(last_input->IsClinitCheck() || last_input->IsLoadClass()) << last_input->DebugName();
+    RemoveAsUserOfInput(last_input_index);
+    inputs_.DeleteAt(last_input_index);
+    clinit_check_requirement_ = ClinitCheckRequirement::kImplicit;
+    DCHECK(IsStaticWithImplicitClinitCheck());
+  }
+
+  // Is this a call to a static method whose declaring class has an
+  // explicit intialization check in the graph?
+  bool IsStaticWithExplicitClinitCheck() const {
+    return IsStatic() && (clinit_check_requirement_ == ClinitCheckRequirement::kExplicit);
+  }
+
+  // Is this a call to a static method whose declaring class has an
+  // implicit intialization check requirement?
+  bool IsStaticWithImplicitClinitCheck() const {
+    return IsStatic() && (clinit_check_requirement_ == ClinitCheckRequirement::kImplicit);
+  }
+
   DECLARE_INSTRUCTION(InvokeStaticOrDirect);
 
+ protected:
+  const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE {
+    const HUserRecord<HInstruction*> input_record = HInvoke::InputRecordAt(i);
+    if (kIsDebugBuild && IsStaticWithExplicitClinitCheck() && (i == InputCount() - 1)) {
+      HInstruction* input = input_record.GetInstruction();
+      // `input` is the last input of a static invoke marked as having
+      // an explicit clinit check. It must either be:
+      // - an art::HClinitCheck instruction, set by art::HGraphBuilder; or
+      // - an art::HLoadClass instruction, set by art::PrepareForRegisterAllocation.
+      DCHECK(input != nullptr);
+      DCHECK(input->IsClinitCheck() || input->IsLoadClass()) << input->DebugName();
+    }
+    return input_record;
+  }
+
  private:
   const InvokeType original_invoke_type_;
   const InvokeType invoke_type_;
   const bool is_recursive_;
+  ClinitCheckRequirement clinit_check_requirement_;
 
   DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect);
 };
@@ -2746,6 +2871,7 @@
   size_t InputCount() const OVERRIDE { return inputs_.Size(); }
 
   void AddInput(HInstruction* input);
+  void RemoveInputAt(size_t index);
 
   Primitive::Type GetType() const OVERRIDE { return type_; }
   void SetType(Primitive::Type type) { type_ = type; }
@@ -3214,7 +3340,6 @@
   DISALLOW_COPY_AND_ASSIGN(HLoadString);
 };
 
-// TODO: Pass this check to HInvokeStaticOrDirect nodes.
 /**
  * Performs an initialization check on its Class object input.
  */
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index b13e07e..c46a219 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -21,9 +21,9 @@
 
 namespace art {
 
-void HOptimization::MaybeRecordStat(MethodCompilationStat compilation_stat) const {
+void HOptimization::MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count) const {
   if (stats_ != nullptr) {
-    stats_->RecordStat(compilation_stat);
+    stats_->RecordStat(compilation_stat, count);
   }
 }
 
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index 8b20281..ccf8de9 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -48,7 +48,7 @@
   void Check();
 
  protected:
-  void MaybeRecordStat(MethodCompilationStat compilation_stat) const;
+  void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const;
 
   HGraph* const graph_;
   // Used to record stats about the optimization.
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index d99d359..05451bc 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -328,7 +328,7 @@
 
   HInliner inliner(graph, dex_compilation_unit, dex_compilation_unit, driver, stats);
 
-  HConstantFolding fold2(graph);
+  HConstantFolding fold2(graph, "constant_folding_after_inlining");
   SideEffectsAnalysis side_effects(graph);
   GVNOptimization gvn(graph, side_effects);
   LICM licm(graph, side_effects);
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index e6508c9..65c84e6 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -58,8 +58,8 @@
  public:
   OptimizingCompilerStats() {}
 
-  void RecordStat(MethodCompilationStat stat) {
-    compile_stats_[stat]++;
+  void RecordStat(MethodCompilationStat stat, size_t count = 1) {
+    compile_stats_[stat] += count;
   }
 
   void Log() const {
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index f5d8d82..fa6b3c2 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -79,4 +79,26 @@
   }
 }
 
+void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  if (invoke->IsStaticWithExplicitClinitCheck()) {
+    size_t last_input_index = invoke->InputCount() - 1;
+    HInstruction* last_input = invoke->InputAt(last_input_index);
+    DCHECK(last_input->IsLoadClass()) << last_input->DebugName();
+
+    // Remove a load class instruction as last input of a static
+    // invoke, which has been added (along with a clinit check,
+    // removed by PrepareForRegisterAllocation::VisitClinitCheck
+    // previously) by the graph builder during the creation of the
+    // static invoke instruction, but is no longer required at this
+    // stage (i.e., after inlining has been performed).
+    invoke->RemoveClinitCheckOrLoadClassAsLastInput();
+
+    // If the load class instruction is no longer used, remove it from
+    // the graph.
+    if (!last_input->HasUses()) {
+      last_input->GetBlock()->RemoveInstruction(last_input);
+    }
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index c28507c..d7f277f 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -39,6 +39,7 @@
   void VisitBoundType(HBoundType* bound_type) OVERRIDE;
   void VisitClinitCheck(HClinitCheck* check) OVERRIDE;
   void VisitCondition(HCondition* condition) OVERRIDE;
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
 
   DISALLOW_COPY_AND_ASSIGN(PrepareForRegisterAllocation);
 };
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 0fdf051..a8d006f 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1455,6 +1455,7 @@
                         : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
   }
   UsePosition* use = current->GetFirstUse();
+  UsePosition* env_use = current->GetFirstEnvironmentUse();
 
   // Walk over all siblings, updating locations of use positions, and
   // connecting them when they are adjacent.
@@ -1466,32 +1467,39 @@
 
     LiveRange* range = current->GetFirstRange();
     while (range != nullptr) {
-      while (use != nullptr && use->GetPosition() < range->GetStart()) {
-        DCHECK(use->GetIsEnvironment());
-        use = use->GetNext();
-      }
+      DCHECK(use == nullptr || use->GetPosition() >= range->GetStart());
       while (use != nullptr && use->GetPosition() <= range->GetEnd()) {
+        DCHECK(!use->GetIsEnvironment());
         DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd()));
         LocationSummary* locations = use->GetUser()->GetLocations();
-        if (use->GetIsEnvironment()) {
-          locations->SetEnvironmentAt(use->GetInputIndex(), source);
-        } else {
-          Location expected_location = locations->InAt(use->GetInputIndex());
-          // The expected (actual) location may be invalid in case the input is unused. Currently
-          // this only happens for intrinsics.
-          if (expected_location.IsValid()) {
-            if (expected_location.IsUnallocated()) {
-              locations->SetInAt(use->GetInputIndex(), source);
-            } else if (!expected_location.IsConstant()) {
-              AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location);
-            }
-          } else {
-            DCHECK(use->GetUser()->IsInvoke());
-            DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
+        Location expected_location = locations->InAt(use->GetInputIndex());
+        // The expected (actual) location may be invalid in case the input is unused. Currently
+        // this only happens for intrinsics.
+        if (expected_location.IsValid()) {
+          if (expected_location.IsUnallocated()) {
+            locations->SetInAt(use->GetInputIndex(), source);
+          } else if (!expected_location.IsConstant()) {
+            AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location);
           }
+        } else {
+          DCHECK(use->GetUser()->IsInvoke());
+          DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
         }
         use = use->GetNext();
       }
+
+      // Walk over the environment uses, and update their locations.
+      while (env_use != nullptr && env_use->GetPosition() < range->GetStart()) {
+        env_use = env_use->GetNext();
+      }
+
+      while (env_use != nullptr && env_use->GetPosition() <= range->GetEnd()) {
+        DCHECK(current->CoversSlow(env_use->GetPosition()) || (env_use->GetPosition() == range->GetEnd()));
+        LocationSummary* locations = env_use->GetUser()->GetLocations();
+        locations->SetEnvironmentAt(env_use->GetInputIndex(), source);
+        env_use = env_use->GetNext();
+      }
+
       range = range->GetNext();
     }
 
@@ -1553,14 +1561,7 @@
     current = next_sibling;
   } while (current != nullptr);
 
-  if (kIsDebugBuild) {
-    // Following uses can only be environment uses. The location for
-    // these environments will be none.
-    while (use != nullptr) {
-      DCHECK(use->GetIsEnvironment());
-      use = use->GetNext();
-    }
-  }
+  DCHECK(use == nullptr);
 }
 
 void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 7a252af..b66e655 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -417,6 +417,7 @@
     ArenaAllocator* allocator = graph->GetArena();
     result = new (allocator) HFloatConstant(bit_cast<float, int32_t>(constant->GetValue()));
     constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
+    graph->CacheFloatConstant(result);
   } else {
     // If there is already a constant with the expected type, we know it is
     // the floating point equivalent of this constant.
@@ -439,6 +440,7 @@
     ArenaAllocator* allocator = graph->GetArena();
     result = new (allocator) HDoubleConstant(bit_cast<double, int64_t>(constant->GetValue()));
     constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
+    graph->CacheDoubleConstant(result);
   } else {
     // If there is already a constant with the expected type, we know it is
     // the floating point equivalent of this constant.
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index ea0e7c3..b674f74 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -341,7 +341,7 @@
   size_t end = GetEnd();
   while (use != nullptr && use->GetPosition() <= end) {
     size_t use_position = use->GetPosition();
-    if (use_position >= start && !use->GetIsEnvironment()) {
+    if (use_position >= start) {
       HInstruction* user = use->GetUser();
       size_t input_index = use->GetInputIndex();
       if (user->IsPhi()) {
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 97254ed..b95276a 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -219,6 +219,7 @@
   void AddTempUse(HInstruction* instruction, size_t temp_index) {
     DCHECK(IsTemp());
     DCHECK(first_use_ == nullptr) << "A temporary can only have one user";
+    DCHECK(first_env_use_ == nullptr) << "A temporary cannot have environment user";
     size_t position = instruction->GetLifetimePosition();
     first_use_ = new (allocator_) UsePosition(
         instruction, temp_index, /* is_environment */ false, position, first_use_);
@@ -265,8 +266,13 @@
       return;
     }
 
-    first_use_ = new (allocator_) UsePosition(
-        instruction, input_index, is_environment, position, first_use_);
+    if (is_environment) {
+      first_env_use_ = new (allocator_) UsePosition(
+          instruction, input_index, is_environment, position, first_env_use_);
+    } else {
+      first_use_ = new (allocator_) UsePosition(
+          instruction, input_index, is_environment, position, first_use_);
+    }
 
     if (is_environment && !keep_alive) {
       // If this environment use does not keep the instruction live, it does not
@@ -477,7 +483,7 @@
     size_t end = GetEnd();
     while (use != nullptr && use->GetPosition() <= end) {
       size_t use_position = use->GetPosition();
-      if (use_position > position && !use->GetIsEnvironment()) {
+      if (use_position > position) {
         Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex());
         if (location.IsUnallocated()
             && (location.GetPolicy() == Location::kRequiresRegister
@@ -508,12 +514,10 @@
     UsePosition* use = first_use_;
     size_t end = GetEnd();
     while (use != nullptr && use->GetPosition() <= end) {
-      if (!use->GetIsEnvironment()) {
-        Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex());
-        size_t use_position = use->GetPosition();
-        if (use_position > position && location.IsValid()) {
-          return use_position;
-        }
+      Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex());
+      size_t use_position = use->GetPosition();
+      if (use_position > position && location.IsValid()) {
+        return use_position;
       }
       use = use->GetNext();
     }
@@ -524,6 +528,10 @@
     return first_use_;
   }
 
+  UsePosition* GetFirstEnvironmentUse() const {
+    return first_env_use_;
+  }
+
   Primitive::Type GetType() const {
     return type_;
   }
@@ -577,6 +585,7 @@
     new_interval->parent_ = parent_;
 
     new_interval->first_use_ = first_use_;
+    new_interval->first_env_use_ = first_env_use_;
     LiveRange* current = first_range_;
     LiveRange* previous = nullptr;
     // Iterate over the ranges, and either find a range that covers this position, or
@@ -655,10 +664,18 @@
         stream << " ";
       } while ((use = use->GetNext()) != nullptr);
     }
+    stream << "}, {";
+    use = first_env_use_;
+    if (use != nullptr) {
+      do {
+        use->Dump(stream);
+        stream << " ";
+      } while ((use = use->GetNext()) != nullptr);
+    }
     stream << "}";
     stream << " is_fixed: " << is_fixed_ << ", is_split: " << IsSplit();
-    stream << " is_high: " << IsHighInterval();
     stream << " is_low: " << IsLowInterval();
+    stream << " is_high: " << IsHighInterval();
   }
 
   LiveInterval* GetNextSibling() const { return next_sibling_; }
@@ -754,6 +771,10 @@
     if (first_use_ != nullptr) {
       high_or_low_interval_->first_use_ = first_use_->Dup(allocator_);
     }
+
+    if (first_env_use_ != nullptr) {
+      high_or_low_interval_->first_env_use_ = first_env_use_->Dup(allocator_);
+    }
   }
 
   // Returns whether an interval, when it is non-split, is using
@@ -851,6 +872,7 @@
         first_safepoint_(nullptr),
         last_safepoint_(nullptr),
         first_use_(nullptr),
+        first_env_use_(nullptr),
         type_(type),
         next_sibling_(nullptr),
         parent_(this),
@@ -905,6 +927,7 @@
 
   // Uses of this interval. Note that this linked list is shared amongst siblings.
   UsePosition* first_use_;
+  UsePosition* first_env_use_;
 
   // The instruction type this interval corresponds to.
   const Primitive::Type type_;
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index fcc86d5..8344fc3 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -18,29 +18,29 @@
 
 namespace art {
 
-void StackMapStream::AddStackMapEntry(uint32_t dex_pc,
-                                     uint32_t native_pc_offset,
-                                     uint32_t register_mask,
-                                     BitVector* sp_mask,
-                                     uint32_t num_dex_registers,
-                                     uint8_t inlining_depth) {
-  StackMapEntry entry;
-  entry.dex_pc = dex_pc;
-  entry.native_pc_offset = native_pc_offset;
-  entry.register_mask = register_mask;
-  entry.sp_mask = sp_mask;
-  entry.num_dex_registers = num_dex_registers;
-  entry.inlining_depth = inlining_depth;
-  entry.dex_register_locations_start_index = dex_register_locations_.Size();
-  entry.inline_infos_start_index = inline_infos_.Size();
-  entry.dex_register_map_hash = 0;
+void StackMapStream::BeginStackMapEntry(uint32_t dex_pc,
+                                        uint32_t native_pc_offset,
+                                        uint32_t register_mask,
+                                        BitVector* sp_mask,
+                                        uint32_t num_dex_registers,
+                                        uint8_t inlining_depth) {
+  DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry";
+  current_entry_.dex_pc = dex_pc;
+  current_entry_.native_pc_offset = native_pc_offset;
+  current_entry_.register_mask = register_mask;
+  current_entry_.sp_mask = sp_mask;
+  current_entry_.num_dex_registers = num_dex_registers;
+  current_entry_.inlining_depth = inlining_depth;
+  current_entry_.dex_register_locations_start_index = dex_register_locations_.Size();
+  current_entry_.inline_infos_start_index = inline_infos_.Size();
+  current_entry_.dex_register_map_hash = 0;
+  current_entry_.same_dex_register_map_as_ = kNoSameDexMapFound;
   if (num_dex_registers != 0) {
-    entry.live_dex_registers_mask =
+    current_entry_.live_dex_registers_mask =
         new (allocator_) ArenaBitVector(allocator_, num_dex_registers, true);
   } else {
-    entry.live_dex_registers_mask = nullptr;
+    current_entry_.live_dex_registers_mask = nullptr;
   }
-  stack_maps_.Add(entry);
 
   if (sp_mask != nullptr) {
     stack_mask_max_ = std::max(stack_mask_max_, sp_mask->GetHighestBitSet());
@@ -54,11 +54,16 @@
   register_mask_max_ = std::max(register_mask_max_, register_mask);
 }
 
+void StackMapStream::EndStackMapEntry() {
+  current_entry_.same_dex_register_map_as_ = FindEntryWithTheSameDexMap();
+  stack_maps_.Add(current_entry_);
+  current_entry_ = StackMapEntry();
+}
+
 void StackMapStream::AddDexRegisterEntry(uint16_t dex_register,
-                                        DexRegisterLocation::Kind kind,
-                                        int32_t value) {
-  StackMapEntry entry = stack_maps_.Get(stack_maps_.Size() - 1);
-  DCHECK_LT(dex_register, entry.num_dex_registers);
+                                         DexRegisterLocation::Kind kind,
+                                         int32_t value) {
+  DCHECK_LT(dex_register, current_entry_.num_dex_registers);
 
   if (kind != DexRegisterLocation::Kind::kNone) {
     // Ensure we only use non-compressed location kind at this stage.
@@ -82,12 +87,11 @@
       location_catalog_entries_indices_.Insert(std::make_pair(location, index));
     }
 
-    entry.live_dex_registers_mask->SetBit(dex_register);
-    entry.dex_register_map_hash +=
-      (1 << (dex_register % (sizeof(entry.dex_register_map_hash) * kBitsPerByte)));
-    entry.dex_register_map_hash += static_cast<uint32_t>(value);
-    entry.dex_register_map_hash += static_cast<uint32_t>(kind);
-    stack_maps_.Put(stack_maps_.Size() - 1, entry);
+    current_entry_.live_dex_registers_mask->SetBit(dex_register);
+    current_entry_.dex_register_map_hash +=
+      (1 << (dex_register % (sizeof(current_entry_.dex_register_map_hash) * kBitsPerByte)));
+    current_entry_.dex_register_map_hash += static_cast<uint32_t>(value);
+    current_entry_.dex_register_map_hash += static_cast<uint32_t>(kind);
   }
 }
 
@@ -97,29 +101,33 @@
   inline_infos_.Add(entry);
 }
 
-size_t StackMapStream::ComputeNeededSize() {
-  size_t size = CodeInfo::kFixedSize
-      + ComputeDexRegisterLocationCatalogSize()
-      + ComputeStackMapsSize()
-      + ComputeDexRegisterMapsSize()
-      + ComputeInlineInfoSize();
+size_t StackMapStream::PrepareForFillIn() {
+  int stack_mask_number_of_bits = stack_mask_max_ + 1;  // Need room for max element too.
+  stack_mask_size_ = RoundUp(stack_mask_number_of_bits, kBitsPerByte) / kBitsPerByte;
+  inline_info_size_ = ComputeInlineInfoSize();
+  dex_register_maps_size_ = ComputeDexRegisterMapsSize();
+  stack_maps_size_ = stack_maps_.Size()
+      * StackMap::ComputeStackMapSize(stack_mask_size_,
+                                      inline_info_size_,
+                                      dex_register_maps_size_,
+                                      dex_pc_max_,
+                                      native_pc_offset_max_,
+                                      register_mask_max_);
+  dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize();
+
   // Note: use RoundUp to word-size here if you want CodeInfo objects to be word aligned.
-  return size;
-}
+  needed_size_ = CodeInfo::kFixedSize
+      + dex_register_location_catalog_size_
+      + stack_maps_size_
+      + dex_register_maps_size_
+      + inline_info_size_;
 
-size_t StackMapStream::ComputeStackMaskSize() const {
-  int number_of_bits = stack_mask_max_ + 1;  // Need room for max element too.
-  return RoundUp(number_of_bits, kBitsPerByte) / kBitsPerByte;
-}
+  dex_register_location_catalog_start_ = CodeInfo::kFixedSize;
+  stack_maps_start_ = dex_register_location_catalog_start_ + dex_register_location_catalog_size_;
+  dex_register_maps_start_ = stack_maps_start_ + stack_maps_size_;
+  inline_infos_start_ = dex_register_maps_start_ + dex_register_maps_size_;
 
-size_t StackMapStream::ComputeStackMapsSize() {
-  return stack_maps_.Size() * StackMap::ComputeStackMapSize(
-      ComputeStackMaskSize(),
-      ComputeInlineInfoSize(),
-      ComputeDexRegisterMapsSize(),
-      dex_pc_max_,
-      native_pc_offset_max_,
-      register_mask_max_);
+  return needed_size_;
 }
 
 size_t StackMapStream::ComputeDexRegisterLocationCatalogSize() const {
@@ -157,12 +165,13 @@
   return size;
 }
 
-size_t StackMapStream::ComputeDexRegisterMapsSize() {
+size_t StackMapStream::ComputeDexRegisterMapsSize() const {
   size_t size = 0;
   for (size_t i = 0; i < stack_maps_.Size(); ++i) {
-    if (FindEntryWithTheSameDexMap(i) == kNoSameDexMapFound) {
+    StackMapEntry entry = stack_maps_.Get(i);
+    if (entry.same_dex_register_map_as_ == kNoSameDexMapFound) {
       // Entries with the same dex map will have the same offset.
-      size += ComputeDexRegisterMapSize(stack_maps_.Get(i));
+      size += ComputeDexRegisterMapSize(entry);
     }
   }
   return size;
@@ -174,55 +183,33 @@
     + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize);
 }
 
-size_t StackMapStream::ComputeDexRegisterLocationCatalogStart() const {
-  return CodeInfo::kFixedSize;
-}
-
-size_t StackMapStream::ComputeStackMapsStart() const {
-  return ComputeDexRegisterLocationCatalogStart() + ComputeDexRegisterLocationCatalogSize();
-}
-
-size_t StackMapStream::ComputeDexRegisterMapsStart() {
-  return ComputeStackMapsStart() + ComputeStackMapsSize();
-}
-
-size_t StackMapStream::ComputeInlineInfoStart() {
-  return ComputeDexRegisterMapsStart() + ComputeDexRegisterMapsSize();
-}
-
 void StackMapStream::FillIn(MemoryRegion region) {
+  DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry";
+  DCHECK_NE(0u, needed_size_) << "PrepareForFillIn not called before FillIn";
+
   CodeInfo code_info(region);
-  DCHECK_EQ(region.size(), ComputeNeededSize());
+  DCHECK_EQ(region.size(), needed_size_);
   code_info.SetOverallSize(region.size());
 
-  size_t stack_mask_size = ComputeStackMaskSize();
-
-  size_t dex_register_map_size = ComputeDexRegisterMapsSize();
-  size_t inline_info_size = ComputeInlineInfoSize();
-
   MemoryRegion dex_register_locations_region = region.Subregion(
-    ComputeDexRegisterMapsStart(),
-    dex_register_map_size);
+      dex_register_maps_start_, dex_register_maps_size_);
 
   MemoryRegion inline_infos_region = region.Subregion(
-    ComputeInlineInfoStart(),
-    inline_info_size);
+      inline_infos_start_, inline_info_size_);
 
-  code_info.SetEncoding(inline_info_size,
-                        dex_register_map_size,
+  code_info.SetEncoding(inline_info_size_,
+                        dex_register_maps_size_,
                         dex_pc_max_,
                         native_pc_offset_max_,
                         register_mask_max_);
   code_info.SetNumberOfStackMaps(stack_maps_.Size());
-  code_info.SetStackMaskSize(stack_mask_size);
-  DCHECK_EQ(code_info.GetStackMapsSize(), ComputeStackMapsSize());
+  code_info.SetStackMaskSize(stack_mask_size_);
+  DCHECK_EQ(code_info.GetStackMapsSize(), stack_maps_size_);
 
   // Set the Dex register location catalog.
-  code_info.SetNumberOfDexRegisterLocationCatalogEntries(
-      location_catalog_entries_.Size());
+  code_info.SetNumberOfDexRegisterLocationCatalogEntries(location_catalog_entries_.Size());
   MemoryRegion dex_register_location_catalog_region = region.Subregion(
-      ComputeDexRegisterLocationCatalogStart(),
-      ComputeDexRegisterLocationCatalogSize());
+      dex_register_location_catalog_start_, dex_register_location_catalog_size_);
   DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region);
   // Offset in `dex_register_location_catalog` where to store the next
   // register location.
@@ -253,11 +240,11 @@
       stack_map.SetDexRegisterMapOffset(code_info, StackMap::kNoDexRegisterMap);
     } else {
       // Search for an entry with the same dex map.
-      size_t entry_with_same_map = FindEntryWithTheSameDexMap(i);
-      if (entry_with_same_map != kNoSameDexMapFound) {
+      if (entry.same_dex_register_map_as_ != kNoSameDexMapFound) {
         // If we have a hit reuse the offset.
         stack_map.SetDexRegisterMapOffset(code_info,
-            code_info.GetStackMapAt(entry_with_same_map).GetDexRegisterMapOffset(code_info));
+            code_info.GetStackMapAt(entry.same_dex_register_map_as_)
+                     .GetDexRegisterMapOffset(code_info));
       } else {
         // New dex registers maps should be added to the stack map.
         MemoryRegion register_region =
@@ -309,49 +296,34 @@
         inline_info.SetMethodReferenceIndexAtDepth(j, inline_entry.method_index);
       }
     } else {
-      if (inline_info_size != 0) {
+      if (inline_info_size_ != 0) {
         stack_map.SetInlineDescriptorOffset(code_info, StackMap::kNoInlineInfo);
       }
     }
   }
 }
 
-size_t StackMapStream::FindEntryWithTheSameDexMap(size_t entry_index) {
-  StackMapEntry entry = stack_maps_.Get(entry_index);
-  auto entries_it = dex_map_hash_to_stack_map_indices_.find(entry.dex_register_map_hash);
+size_t StackMapStream::FindEntryWithTheSameDexMap() {
+  size_t current_entry_index = stack_maps_.Size();
+  auto entries_it = dex_map_hash_to_stack_map_indices_.find(current_entry_.dex_register_map_hash);
   if (entries_it == dex_map_hash_to_stack_map_indices_.end()) {
     // We don't have a perfect hash functions so we need a list to collect all stack maps
     // which might have the same dex register map.
     GrowableArray<uint32_t> stack_map_indices(allocator_, 1);
-    stack_map_indices.Add(entry_index);
-    dex_map_hash_to_stack_map_indices_.Put(entry.dex_register_map_hash, stack_map_indices);
+    stack_map_indices.Add(current_entry_index);
+    dex_map_hash_to_stack_map_indices_.Put(current_entry_.dex_register_map_hash, stack_map_indices);
     return kNoSameDexMapFound;
   }
 
-  // TODO: We don't need to add ourselves to the map if we can guarantee that
-  // FindEntryWithTheSameDexMap is called just once per stack map entry.
-  // A good way to do this is to cache the offset in the stack map entry. This
-  // is easier to do if we add markers when the stack map constructions begins
-  // and when it ends.
-
-  // We might have collisions, so we need to check whether or not we should
-  // add the entry to the map. `needs_to_be_added` keeps track of this.
-  bool needs_to_be_added = true;
-  size_t result = kNoSameDexMapFound;
+  // We might have collisions, so we need to check whether or not we really have a match.
   for (size_t i = 0; i < entries_it->second.Size(); i++) {
     size_t test_entry_index = entries_it->second.Get(i);
-    if (test_entry_index == entry_index) {
-      needs_to_be_added = false;
-    } else if (HaveTheSameDexMaps(stack_maps_.Get(test_entry_index), entry)) {
-      result = test_entry_index;
-      needs_to_be_added = false;
-      break;
+    if (HaveTheSameDexMaps(stack_maps_.Get(test_entry_index), current_entry_)) {
+      return test_entry_index;
     }
   }
-  if (needs_to_be_added) {
-    entries_it->second.Add(entry_index);
-  }
-  return result;
+  entries_it->second.Add(current_entry_index);
+  return kNoSameDexMapFound;
 }
 
 bool StackMapStream::HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEntry& b) const {
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 990e682..0c626be 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -70,7 +70,18 @@
         native_pc_offset_max_(0),
         register_mask_max_(0),
         number_of_stack_maps_with_inline_info_(0),
-        dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()) {}
+        dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()),
+        current_entry_(),
+        stack_mask_size_(0),
+        inline_info_size_(0),
+        dex_register_maps_size_(0),
+        stack_maps_size_(0),
+        dex_register_location_catalog_size_(0),
+        dex_register_location_catalog_start_(0),
+        stack_maps_start_(0),
+        dex_register_maps_start_(0),
+        inline_infos_start_(0),
+        needed_size_(0) {}
 
   // See runtime/stack_map.h to know what these fields contain.
   struct StackMapEntry {
@@ -84,18 +95,20 @@
     size_t inline_infos_start_index;
     BitVector* live_dex_registers_mask;
     uint32_t dex_register_map_hash;
+    size_t same_dex_register_map_as_;
   };
 
   struct InlineInfoEntry {
     uint32_t method_index;
   };
 
-  void AddStackMapEntry(uint32_t dex_pc,
-                        uint32_t native_pc_offset,
-                        uint32_t register_mask,
-                        BitVector* sp_mask,
-                        uint32_t num_dex_registers,
-                        uint8_t inlining_depth);
+  void BeginStackMapEntry(uint32_t dex_pc,
+                          uint32_t native_pc_offset,
+                          uint32_t register_mask,
+                          BitVector* sp_mask,
+                          uint32_t num_dex_registers,
+                          uint8_t inlining_depth);
+  void EndStackMapEntry();
 
   void AddDexRegisterEntry(uint16_t dex_register,
                            DexRegisterLocation::Kind kind,
@@ -103,25 +116,20 @@
 
   void AddInlineInfoEntry(uint32_t method_index);
 
-  size_t ComputeNeededSize();
-  size_t ComputeStackMaskSize() const;
-  size_t ComputeStackMapsSize();
-  size_t ComputeDexRegisterLocationCatalogSize() const;
-  size_t ComputeDexRegisterMapSize(const StackMapEntry& entry) const;
-  size_t ComputeDexRegisterMapsSize();
-  size_t ComputeInlineInfoSize() const;
-
-  size_t ComputeDexRegisterLocationCatalogStart() const;
-  size_t ComputeStackMapsStart() const;
-  size_t ComputeDexRegisterMapsStart();
-  size_t ComputeInlineInfoStart();
-
+  // Prepares the stream to fill in a memory region. Must be called before FillIn.
+  // Returns the size (in bytes) needed to store this stream.
+  size_t PrepareForFillIn();
   void FillIn(MemoryRegion region);
 
  private:
-  // Returns the index of an entry with the same dex register map
+  size_t ComputeDexRegisterLocationCatalogSize() const;
+  size_t ComputeDexRegisterMapSize(const StackMapEntry& entry) const;
+  size_t ComputeDexRegisterMapsSize() const;
+  size_t ComputeInlineInfoSize() const;
+
+  // Returns the index of an entry with the same dex register map as the current_entry,
   // or kNoSameDexMapFound if no such entry exists.
-  size_t FindEntryWithTheSameDexMap(size_t entry_index);
+  size_t FindEntryWithTheSameDexMap();
   bool HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEntry& b) const;
 
   ArenaAllocator* allocator_;
@@ -146,6 +154,18 @@
 
   ArenaSafeMap<uint32_t, GrowableArray<uint32_t>> dex_map_hash_to_stack_map_indices_;
 
+  StackMapEntry current_entry_;
+  size_t stack_mask_size_;
+  size_t inline_info_size_;
+  size_t dex_register_maps_size_;
+  size_t stack_maps_size_;
+  size_t dex_register_location_catalog_size_;
+  size_t dex_register_location_catalog_start_;
+  size_t stack_maps_start_;
+  size_t dex_register_maps_start_;
+  size_t inline_infos_start_;
+  size_t needed_size_;
+
   static constexpr uint32_t kNoSameDexMapFound = -1;
 
   DISALLOW_COPY_AND_ASSIGN(StackMapStream);
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 8d160bc..3291a77 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -40,11 +40,12 @@
 
   ArenaBitVector sp_mask(&arena, 0, false);
   size_t number_of_dex_registers = 2;
-  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+  stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
   stream.AddDexRegisterEntry(0, Kind::kInStack, 0);         // Short location.
   stream.AddDexRegisterEntry(1, Kind::kConstant, -2);       // Short location.
+  stream.EndStackMapEntry();
 
-  size_t size = stream.ComputeNeededSize();
+  size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
   stream.FillIn(region);
@@ -123,20 +124,22 @@
   sp_mask1.SetBit(2);
   sp_mask1.SetBit(4);
   size_t number_of_dex_registers = 2;
-  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2);
+  stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2);
   stream.AddDexRegisterEntry(0, Kind::kInStack, 0);         // Short location.
   stream.AddDexRegisterEntry(1, Kind::kConstant, -2);       // Large location.
   stream.AddInlineInfoEntry(42);
   stream.AddInlineInfoEntry(82);
+  stream.EndStackMapEntry();
 
   ArenaBitVector sp_mask2(&arena, 0, true);
   sp_mask2.SetBit(3);
   sp_mask1.SetBit(8);
-  stream.AddStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0);
+  stream.BeginStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0);
   stream.AddDexRegisterEntry(0, Kind::kInRegister, 18);     // Short location.
   stream.AddDexRegisterEntry(1, Kind::kInFpuRegister, 3);   // Short location.
+  stream.EndStackMapEntry();
 
-  size_t size = stream.ComputeNeededSize();
+  size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
   stream.FillIn(region);
@@ -273,11 +276,12 @@
 
   ArenaBitVector sp_mask(&arena, 0, false);
   uint32_t number_of_dex_registers = 2;
-  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+  stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
   stream.AddDexRegisterEntry(0, Kind::kNone, 0);            // No location.
   stream.AddDexRegisterEntry(1, Kind::kConstant, -2);       // Large location.
+  stream.EndStackMapEntry();
 
-  size_t size = stream.ComputeNeededSize();
+  size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
   stream.FillIn(region);
@@ -353,7 +357,7 @@
   ArenaBitVector sp_mask(&arena, 0, false);
   uint32_t number_of_dex_registers = 1024;
   // Create the first stack map (and its Dex register map).
-  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+  stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
   uint32_t number_of_dex_live_registers_in_dex_register_map_0 = number_of_dex_registers - 8;
   for (uint32_t i = 0; i < number_of_dex_live_registers_in_dex_register_map_0; ++i) {
     // Use two different Dex register locations to populate this map,
@@ -362,13 +366,15 @@
     // art::DexRegisterMap::SingleEntrySizeInBits).
     stream.AddDexRegisterEntry(i, Kind::kConstant, i % 2);  // Short location.
   }
+  stream.EndStackMapEntry();
   // Create the second stack map (and its Dex register map).
-  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+  stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
   for (uint32_t i = 0; i < number_of_dex_registers; ++i) {
     stream.AddDexRegisterEntry(i, Kind::kConstant, 0);  // Short location.
   }
+  stream.EndStackMapEntry();
 
-  size_t size = stream.ComputeNeededSize();
+  size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
   stream.FillIn(region);
@@ -413,19 +419,22 @@
   ArenaBitVector sp_mask(&arena, 0, false);
   uint32_t number_of_dex_registers = 2;
   // First stack map.
-  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+  stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
   stream.AddDexRegisterEntry(0, Kind::kInRegister, 0);  // Short location.
   stream.AddDexRegisterEntry(1, Kind::kConstant, -2);   // Large location.
+  stream.EndStackMapEntry();
   // Second stack map, which should share the same dex register map.
-  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+  stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
   stream.AddDexRegisterEntry(0, Kind::kInRegister, 0);  // Short location.
   stream.AddDexRegisterEntry(1, Kind::kConstant, -2);   // Large location.
+  stream.EndStackMapEntry();
   // Third stack map (doesn't share the dex register map).
-  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+  stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
   stream.AddDexRegisterEntry(0, Kind::kInRegister, 2);  // Short location.
   stream.AddDexRegisterEntry(1, Kind::kConstant, -2);   // Large location.
+  stream.EndStackMapEntry();
 
-  size_t size = stream.ComputeNeededSize();
+  size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
   stream.FillIn(region);
@@ -462,9 +471,10 @@
 
   ArenaBitVector sp_mask(&arena, 0, false);
   uint32_t number_of_dex_registers = 0;
-  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+  stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+  stream.EndStackMapEntry();
 
-  size_t size = stream.ComputeNeededSize();
+  size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
   stream.FillIn(region);
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 3fe1a31..a339633 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -19,6 +19,7 @@
 
 #include "assembler.h"
 
+#include "assembler_test_base.h"
 #include "common_runtime_test.h"  // For ScratchFile
 
 #include <cstdio>
@@ -29,19 +30,11 @@
 
 namespace art {
 
-// If you want to take a look at the differences between the ART assembler and GCC, set this flag
-// to true. The disassembled files will then remain in the tmp directory.
-static constexpr bool kKeepDisassembledFiles = false;
-
 // Helper for a constexpr string length.
 constexpr size_t ConstexprStrLen(char const* str, size_t count = 0) {
   return ('\0' == str[0]) ? count : ConstexprStrLen(str+1, count+1);
 }
 
-// Use a glocal static variable to keep the same name for all test data. Else we'll just spam the
-// temp directory.
-static std::string tmpnam_;
-
 enum class RegisterView {  // private
   kUsePrimaryName,
   kUseSecondaryName,
@@ -59,12 +52,12 @@
   typedef std::string (*TestFn)(AssemblerTest* assembler_test, Ass* assembler);
 
   void DriverFn(TestFn f, std::string test_name) {
-    Driver(f(this, assembler_.get()), test_name);
+    DriverWrapper(f(this, assembler_.get()), test_name);
   }
 
   // This driver assumes the assembler has already been called.
   void DriverStr(std::string assembly_string, std::string test_name) {
-    Driver(assembly_string, test_name);
+    DriverWrapper(assembly_string, test_name);
   }
 
   std::string RepeatR(void (Ass::*f)(Reg), std::string fmt) {
@@ -212,28 +205,7 @@
 
   // This is intended to be run as a test.
   bool CheckTools() {
-    if (!FileExists(FindTool(GetAssemblerCmdName()))) {
-      return false;
-    }
-    LOG(INFO) << "Chosen assembler command: " << GetAssemblerCommand();
-
-    if (!FileExists(FindTool(GetObjdumpCmdName()))) {
-      return false;
-    }
-    LOG(INFO) << "Chosen objdump command: " << GetObjdumpCommand();
-
-    // Disassembly is optional.
-    std::string disassembler = GetDisassembleCommand();
-    if (disassembler.length() != 0) {
-      if (!FileExists(FindTool(GetDisassembleCmdName()))) {
-        return false;
-      }
-      LOG(INFO) << "Chosen disassemble command: " << GetDisassembleCommand();
-    } else {
-      LOG(INFO) << "No disassembler given.";
-    }
-
-    return true;
+    return test_helper_->CheckTools();
   }
 
   // The following functions are public so that TestFn can use them...
@@ -272,17 +244,21 @@
 
   void SetUp() OVERRIDE {
     assembler_.reset(new Ass());
-
-    // Fake a runtime test for ScratchFile
-    CommonRuntimeTest::SetUpAndroidData(android_data_);
+    test_helper_.reset(
+        new AssemblerTestInfrastructure(GetArchitectureString(),
+                                        GetAssemblerCmdName(),
+                                        GetAssemblerParameters(),
+                                        GetObjdumpCmdName(),
+                                        GetObjdumpParameters(),
+                                        GetDisassembleCmdName(),
+                                        GetDisassembleParameters(),
+                                        GetAssemblyHeader()));
 
     SetUpHelpers();
   }
 
   void TearDown() OVERRIDE {
-    // We leave temporaries in case this failed so we can debug issues.
-    CommonRuntimeTest::TearDownAndroidData(android_data_, false);
-    tmpnam_ = "";
+    test_helper_.reset();  // Clean up the helper.
   }
 
   // Override this to set up any architecture-specific things, e.g., register vectors.
@@ -301,23 +277,6 @@
     return "";
   }
 
-  // Return the host assembler command for this test.
-  virtual std::string GetAssemblerCommand() {
-    // Already resolved it once?
-    if (resolved_assembler_cmd_.length() != 0) {
-      return resolved_assembler_cmd_;
-    }
-
-    std::string line = FindTool(GetAssemblerCmdName());
-    if (line.length() == 0) {
-      return line;
-    }
-
-    resolved_assembler_cmd_ = line + GetAssemblerParameters();
-
-    return resolved_assembler_cmd_;
-  }
-
   // Get the name of the objdump, e.g., "objdump" by default.
   virtual std::string GetObjdumpCmdName() {
     return "objdump";
@@ -328,23 +287,6 @@
     return " -h";
   }
 
-  // Return the host objdump command for this test.
-  virtual std::string GetObjdumpCommand() {
-    // Already resolved it once?
-    if (resolved_objdump_cmd_.length() != 0) {
-      return resolved_objdump_cmd_;
-    }
-
-    std::string line = FindTool(GetObjdumpCmdName());
-    if (line.length() == 0) {
-      return line;
-    }
-
-    resolved_objdump_cmd_ = line + GetObjdumpParameters();
-
-    return resolved_objdump_cmd_;
-  }
-
   // Get the name of the objdump, e.g., "objdump" by default.
   virtual std::string GetDisassembleCmdName() {
     return "objdump";
@@ -354,23 +296,6 @@
   // such to objdump, so it's architecture-specific and there is no default.
   virtual std::string GetDisassembleParameters() = 0;
 
-  // Return the host disassembler command for this test.
-  virtual std::string GetDisassembleCommand() {
-    // Already resolved it once?
-    if (resolved_disassemble_cmd_.length() != 0) {
-      return resolved_disassemble_cmd_;
-    }
-
-    std::string line = FindTool(GetDisassembleCmdName());
-    if (line.length() == 0) {
-      return line;
-    }
-
-    resolved_disassemble_cmd_ = line + GetDisassembleParameters();
-
-    return resolved_disassemble_cmd_;
-  }
-
   // Create a couple of immediate values up to the number of bytes given.
   virtual std::vector<int64_t> CreateImmediateValues(size_t imm_bytes, bool as_uint = false) {
     std::vector<int64_t> res;
@@ -618,395 +543,18 @@
     return str;
   }
 
-  // Driver() assembles and compares the results. If the results are not equal and we have a
-  // disassembler, disassemble both and check whether they have the same mnemonics (in which case
-  // we just warn).
-  void Driver(std::string assembly_text, std::string test_name) {
-    EXPECT_NE(assembly_text.length(), 0U) << "Empty assembly";
-
-    NativeAssemblerResult res;
-    Compile(assembly_text, &res, test_name);
-
-    EXPECT_TRUE(res.ok) << res.error_msg;
-    if (!res.ok) {
-      // No way of continuing.
-      return;
-    }
-
+  void DriverWrapper(std::string assembly_text, std::string test_name) {
     size_t cs = assembler_->CodeSize();
     std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
     MemoryRegion code(&(*data)[0], data->size());
     assembler_->FinalizeInstructions(code);
-
-    if (*data == *res.code) {
-      Clean(&res);
-    } else {
-      if (DisassembleBinaries(*data, *res.code, test_name)) {
-        if (data->size() > res.code->size()) {
-          // Fail this test with a fancy colored warning being printed.
-          EXPECT_TRUE(false) << "Assembly code is not identical, but disassembly of machine code "
-              "is equal: this implies sub-optimal encoding! Our code size=" << data->size() <<
-              ", gcc size=" << res.code->size();
-        } else {
-          // Otherwise just print an info message and clean up.
-          LOG(INFO) << "GCC chose a different encoding than ours, but the overall length is the "
-              "same.";
-          Clean(&res);
-        }
-      } else {
-        // This will output the assembly.
-        EXPECT_EQ(*res.code, *data) << "Outputs (and disassembly) not identical.";
-      }
-    }
-  }
-
-  // Structure to store intermediates and results.
-  struct NativeAssemblerResult {
-    bool ok;
-    std::string error_msg;
-    std::string base_name;
-    std::unique_ptr<std::vector<uint8_t>> code;
-    uintptr_t length;
-  };
-
-  // Compile the assembly file from_file to a binary file to_file. Returns true on success.
-  bool Assemble(const char* from_file, const char* to_file, std::string* error_msg) {
-    bool have_assembler = FileExists(FindTool(GetAssemblerCmdName()));
-    EXPECT_TRUE(have_assembler) << "Cannot find assembler:" << GetAssemblerCommand();
-    if (!have_assembler) {
-      return false;
-    }
-
-    std::vector<std::string> args;
-
-    // Encaspulate the whole command line in a single string passed to
-    // the shell, so that GetAssemblerCommand() may contain arguments
-    // in addition to the program name.
-    args.push_back(GetAssemblerCommand());
-    args.push_back("-o");
-    args.push_back(to_file);
-    args.push_back(from_file);
-    std::string cmd = Join(args, ' ');
-
-    args.clear();
-    args.push_back("/bin/sh");
-    args.push_back("-c");
-    args.push_back(cmd);
-
-    bool success = Exec(args, error_msg);
-    if (!success) {
-      LOG(INFO) << "Assembler command line:";
-      for (std::string arg : args) {
-        LOG(INFO) << arg;
-      }
-    }
-    return success;
-  }
-
-  // Runs objdump -h on the binary file and extracts the first line with .text.
-  // Returns "" on failure.
-  std::string Objdump(std::string file) {
-    bool have_objdump = FileExists(FindTool(GetObjdumpCmdName()));
-    EXPECT_TRUE(have_objdump) << "Cannot find objdump: " << GetObjdumpCommand();
-    if (!have_objdump) {
-      return "";
-    }
-
-    std::string error_msg;
-    std::vector<std::string> args;
-
-    // Encaspulate the whole command line in a single string passed to
-    // the shell, so that GetObjdumpCommand() may contain arguments
-    // in addition to the program name.
-    args.push_back(GetObjdumpCommand());
-    args.push_back(file);
-    args.push_back(">");
-    args.push_back(file+".dump");
-    std::string cmd = Join(args, ' ');
-
-    args.clear();
-    args.push_back("/bin/sh");
-    args.push_back("-c");
-    args.push_back(cmd);
-
-    if (!Exec(args, &error_msg)) {
-      EXPECT_TRUE(false) << error_msg;
-    }
-
-    std::ifstream dump(file+".dump");
-
-    std::string line;
-    bool found = false;
-    while (std::getline(dump, line)) {
-      if (line.find(".text") != line.npos) {
-        found = true;
-        break;
-      }
-    }
-
-    dump.close();
-
-    if (found) {
-      return line;
-    } else {
-      return "";
-    }
-  }
-
-  // Disassemble both binaries and compare the text.
-  bool DisassembleBinaries(std::vector<uint8_t>& data, std::vector<uint8_t>& as,
-                           std::string test_name) {
-    std::string disassembler = GetDisassembleCommand();
-    if (disassembler.length() == 0) {
-      LOG(WARNING) << "No dissassembler command.";
-      return false;
-    }
-
-    std::string data_name = WriteToFile(data, test_name + ".ass");
-    std::string error_msg;
-    if (!DisassembleBinary(data_name, &error_msg)) {
-      LOG(INFO) << "Error disassembling: " << error_msg;
-      std::remove(data_name.c_str());
-      return false;
-    }
-
-    std::string as_name = WriteToFile(as, test_name + ".gcc");
-    if (!DisassembleBinary(as_name, &error_msg)) {
-      LOG(INFO) << "Error disassembling: " << error_msg;
-      std::remove(data_name.c_str());
-      std::remove((data_name + ".dis").c_str());
-      std::remove(as_name.c_str());
-      return false;
-    }
-
-    bool result = CompareFiles(data_name + ".dis", as_name + ".dis");
-
-    if (!kKeepDisassembledFiles) {
-      std::remove(data_name.c_str());
-      std::remove(as_name.c_str());
-      std::remove((data_name + ".dis").c_str());
-      std::remove((as_name + ".dis").c_str());
-    }
-
-    return result;
-  }
-
-  bool DisassembleBinary(std::string file, std::string* error_msg) {
-    std::vector<std::string> args;
-
-    // Encaspulate the whole command line in a single string passed to
-    // the shell, so that GetDisassembleCommand() may contain arguments
-    // in addition to the program name.
-    args.push_back(GetDisassembleCommand());
-    args.push_back(file);
-    args.push_back("| sed -n \'/<.data>/,$p\' | sed -e \'s/.*://\'");
-    args.push_back(">");
-    args.push_back(file+".dis");
-    std::string cmd = Join(args, ' ');
-
-    args.clear();
-    args.push_back("/bin/sh");
-    args.push_back("-c");
-    args.push_back(cmd);
-
-    return Exec(args, error_msg);
-  }
-
-  std::string WriteToFile(std::vector<uint8_t>& buffer, std::string test_name) {
-    std::string file_name = GetTmpnam() + std::string("---") + test_name;
-    const char* data = reinterpret_cast<char*>(buffer.data());
-    std::ofstream s_out(file_name + ".o");
-    s_out.write(data, buffer.size());
-    s_out.close();
-    return file_name + ".o";
-  }
-
-  bool CompareFiles(std::string f1, std::string f2) {
-    std::ifstream f1_in(f1);
-    std::ifstream f2_in(f2);
-
-    bool result = std::equal(std::istreambuf_iterator<char>(f1_in),
-                             std::istreambuf_iterator<char>(),
-                             std::istreambuf_iterator<char>(f2_in));
-
-    f1_in.close();
-    f2_in.close();
-
-    return result;
-  }
-
-  // Compile the given assembly code and extract the binary, if possible. Put result into res.
-  bool Compile(std::string assembly_code, NativeAssemblerResult* res, std::string test_name) {
-    res->ok = false;
-    res->code.reset(nullptr);
-
-    res->base_name = GetTmpnam() + std::string("---") + test_name;
-
-    // TODO: Lots of error checking.
-
-    std::ofstream s_out(res->base_name + ".S");
-    const char* header = GetAssemblyHeader();
-    if (header != nullptr) {
-      s_out << header;
-    }
-    s_out << assembly_code;
-    s_out.close();
-
-    if (!Assemble((res->base_name + ".S").c_str(), (res->base_name + ".o").c_str(),
-                  &res->error_msg)) {
-      res->error_msg = "Could not compile.";
-      return false;
-    }
-
-    std::string odump = Objdump(res->base_name + ".o");
-    if (odump.length() == 0) {
-      res->error_msg = "Objdump failed.";
-      return false;
-    }
-
-    std::istringstream iss(odump);
-    std::istream_iterator<std::string> start(iss);
-    std::istream_iterator<std::string> end;
-    std::vector<std::string> tokens(start, end);
-
-    if (tokens.size() < OBJDUMP_SECTION_LINE_MIN_TOKENS) {
-      res->error_msg = "Objdump output not recognized: too few tokens.";
-      return false;
-    }
-
-    if (tokens[1] != ".text") {
-      res->error_msg = "Objdump output not recognized: .text not second token.";
-      return false;
-    }
-
-    std::string lengthToken = "0x" + tokens[2];
-    std::istringstream(lengthToken) >> std::hex >> res->length;
-
-    std::string offsetToken = "0x" + tokens[5];
-    uintptr_t offset;
-    std::istringstream(offsetToken) >> std::hex >> offset;
-
-    std::ifstream obj(res->base_name + ".o");
-    obj.seekg(offset);
-    res->code.reset(new std::vector<uint8_t>(res->length));
-    obj.read(reinterpret_cast<char*>(&(*res->code)[0]), res->length);
-    obj.close();
-
-    res->ok = true;
-    return true;
-  }
-
-  // Remove temporary files.
-  void Clean(const NativeAssemblerResult* res) {
-    std::remove((res->base_name + ".S").c_str());
-    std::remove((res->base_name + ".o").c_str());
-    std::remove((res->base_name + ".o.dump").c_str());
-  }
-
-  // Check whether file exists. Is used for commands, so strips off any parameters: anything after
-  // the first space. We skip to the last slash for this, so it should work with directories with
-  // spaces.
-  static bool FileExists(std::string file) {
-    if (file.length() == 0) {
-      return false;
-    }
-
-    // Need to strip any options.
-    size_t last_slash = file.find_last_of('/');
-    if (last_slash == std::string::npos) {
-      // No slash, start looking at the start.
-      last_slash = 0;
-    }
-    size_t space_index = file.find(' ', last_slash);
-
-    if (space_index == std::string::npos) {
-      std::ifstream infile(file.c_str());
-      return infile.good();
-    } else {
-      std::string copy = file.substr(0, space_index - 1);
-
-      struct stat buf;
-      return stat(copy.c_str(), &buf) == 0;
-    }
-  }
-
-  static std::string GetGCCRootPath() {
-    return "prebuilts/gcc/linux-x86";
-  }
-
-  static std::string GetRootPath() {
-    // 1) Check ANDROID_BUILD_TOP
-    char* build_top = getenv("ANDROID_BUILD_TOP");
-    if (build_top != nullptr) {
-      return std::string(build_top) + "/";
-    }
-
-    // 2) Do cwd
-    char temp[1024];
-    return getcwd(temp, 1024) ? std::string(temp) + "/" : std::string("");
-  }
-
-  std::string FindTool(std::string tool_name) {
-    // Find the current tool. Wild-card pattern is "arch-string*tool-name".
-    std::string gcc_path = GetRootPath() + GetGCCRootPath();
-    std::vector<std::string> args;
-    args.push_back("find");
-    args.push_back(gcc_path);
-    args.push_back("-name");
-    args.push_back(GetArchitectureString() + "*" + tool_name);
-    args.push_back("|");
-    args.push_back("sort");
-    args.push_back("|");
-    args.push_back("tail");
-    args.push_back("-n");
-    args.push_back("1");
-    std::string tmp_file = GetTmpnam();
-    args.push_back(">");
-    args.push_back(tmp_file);
-    std::string sh_args = Join(args, ' ');
-
-    args.clear();
-    args.push_back("/bin/sh");
-    args.push_back("-c");
-    args.push_back(sh_args);
-
-    std::string error_msg;
-    if (!Exec(args, &error_msg)) {
-      EXPECT_TRUE(false) << error_msg;
-      return "";
-    }
-
-    std::ifstream in(tmp_file.c_str());
-    std::string line;
-    if (!std::getline(in, line)) {
-      in.close();
-      std::remove(tmp_file.c_str());
-      return "";
-    }
-    in.close();
-    std::remove(tmp_file.c_str());
-    return line;
-  }
-
-  // Use a consistent tmpnam, so store it.
-  std::string GetTmpnam() {
-    if (tmpnam_.length() == 0) {
-      ScratchFile tmp;
-      tmpnam_ = tmp.GetFilename() + "asm";
-    }
-    return tmpnam_;
+    test_helper_->Driver(*data, assembly_text, test_name);
   }
 
   static constexpr size_t kWarnManyCombinationsThreshold = 500;
-  static constexpr size_t OBJDUMP_SECTION_LINE_MIN_TOKENS = 6;
 
   std::unique_ptr<Ass> assembler_;
-
-  std::string resolved_assembler_cmd_;
-  std::string resolved_objdump_cmd_;
-  std::string resolved_disassemble_cmd_;
-
-  std::string android_data_;
+  std::unique_ptr<AssemblerTestInfrastructure> test_helper_;
 
   DISALLOW_COPY_AND_ASSIGN(AssemblerTest);
 };
diff --git a/compiler/utils/assembler_test_base.h b/compiler/utils/assembler_test_base.h
new file mode 100644
index 0000000..3341151
--- /dev/null
+++ b/compiler/utils/assembler_test_base.h
@@ -0,0 +1,544 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_ASSEMBLER_TEST_BASE_H_
+#define ART_COMPILER_UTILS_ASSEMBLER_TEST_BASE_H_
+
+#include "common_runtime_test.h"  // For ScratchFile
+
+#include <cstdio>
+#include <cstdlib>
+#include <fstream>
+#include <iterator>
+#include <sys/stat.h>
+
+namespace art {
+
+// If you want to take a look at the differences between the ART assembler and GCC, set this flag
+// to true. The disassembled files will then remain in the tmp directory.
+static constexpr bool kKeepDisassembledFiles = false;
+
+// Use a glocal static variable to keep the same name for all test data. Else we'll just spam the
+// temp directory.
+static std::string tmpnam_;
+
+// We put this into a class as gtests are self-contained, so this helper needs to be in an h-file.
+class AssemblerTestInfrastructure {
+ public:
+  AssemblerTestInfrastructure(std::string architecture,
+                              std::string as,
+                              std::string as_params,
+                              std::string objdump,
+                              std::string objdump_params,
+                              std::string disasm,
+                              std::string disasm_params,
+                              const char* asm_header) :
+      architecture_string_(architecture),
+      asm_header_(asm_header),
+      assembler_cmd_name_(as),
+      assembler_parameters_(as_params),
+      objdump_cmd_name_(objdump),
+      objdump_parameters_(objdump_params),
+      disassembler_cmd_name_(disasm),
+      disassembler_parameters_(disasm_params) {
+    // Fake a runtime test for ScratchFile
+    CommonRuntimeTest::SetUpAndroidData(android_data_);
+  }
+
+  virtual ~AssemblerTestInfrastructure() {
+    // We leave temporaries in case this failed so we can debug issues.
+    CommonRuntimeTest::TearDownAndroidData(android_data_, false);
+    tmpnam_ = "";
+  }
+
+  // This is intended to be run as a test.
+  bool CheckTools() {
+    if (!FileExists(FindTool(assembler_cmd_name_))) {
+      return false;
+    }
+    LOG(INFO) << "Chosen assembler command: " << GetAssemblerCommand();
+
+    if (!FileExists(FindTool(objdump_cmd_name_))) {
+      return false;
+    }
+    LOG(INFO) << "Chosen objdump command: " << GetObjdumpCommand();
+
+    // Disassembly is optional.
+    std::string disassembler = GetDisassembleCommand();
+    if (disassembler.length() != 0) {
+      if (!FileExists(FindTool(disassembler_cmd_name_))) {
+        return false;
+      }
+      LOG(INFO) << "Chosen disassemble command: " << GetDisassembleCommand();
+    } else {
+      LOG(INFO) << "No disassembler given.";
+    }
+
+    return true;
+  }
+
+  // Driver() assembles and compares the results. If the results are not equal and we have a
+  // disassembler, disassemble both and check whether they have the same mnemonics (in which case
+  // we just warn).
+  void Driver(const std::vector<uint8_t>& data, std::string assembly_text, std::string test_name) {
+    EXPECT_NE(assembly_text.length(), 0U) << "Empty assembly";
+
+    NativeAssemblerResult res;
+    Compile(assembly_text, &res, test_name);
+
+    EXPECT_TRUE(res.ok) << res.error_msg;
+    if (!res.ok) {
+      // No way of continuing.
+      return;
+    }
+
+    if (data == *res.code) {
+      Clean(&res);
+    } else {
+      if (DisassembleBinaries(data, *res.code, test_name)) {
+        if (data.size() > res.code->size()) {
+          // Fail this test with a fancy colored warning being printed.
+          EXPECT_TRUE(false) << "Assembly code is not identical, but disassembly of machine code "
+              "is equal: this implies sub-optimal encoding! Our code size=" << data.size() <<
+              ", gcc size=" << res.code->size();
+        } else {
+          // Otherwise just print an info message and clean up.
+          LOG(INFO) << "GCC chose a different encoding than ours, but the overall length is the "
+              "same.";
+          Clean(&res);
+        }
+      } else {
+        // This will output the assembly.
+        EXPECT_EQ(*res.code, data) << "Outputs (and disassembly) not identical.";
+      }
+    }
+  }
+
+ protected:
+  // Return the host assembler command for this test.
+  virtual std::string GetAssemblerCommand() {
+    // Already resolved it once?
+    if (resolved_assembler_cmd_.length() != 0) {
+      return resolved_assembler_cmd_;
+    }
+
+    std::string line = FindTool(assembler_cmd_name_);
+    if (line.length() == 0) {
+      return line;
+    }
+
+    resolved_assembler_cmd_ = line + assembler_parameters_;
+
+    return resolved_assembler_cmd_;
+  }
+
+  // Return the host objdump command for this test.
+  virtual std::string GetObjdumpCommand() {
+    // Already resolved it once?
+    if (resolved_objdump_cmd_.length() != 0) {
+      return resolved_objdump_cmd_;
+    }
+
+    std::string line = FindTool(objdump_cmd_name_);
+    if (line.length() == 0) {
+      return line;
+    }
+
+    resolved_objdump_cmd_ = line + objdump_parameters_;
+
+    return resolved_objdump_cmd_;
+  }
+
+  // Return the host disassembler command for this test.
+  virtual std::string GetDisassembleCommand() {
+    // Already resolved it once?
+    if (resolved_disassemble_cmd_.length() != 0) {
+      return resolved_disassemble_cmd_;
+    }
+
+    std::string line = FindTool(disassembler_cmd_name_);
+    if (line.length() == 0) {
+      return line;
+    }
+
+    resolved_disassemble_cmd_ = line + disassembler_parameters_;
+
+    return resolved_disassemble_cmd_;
+  }
+
+ private:
+  // Structure to store intermediates and results.
+  struct NativeAssemblerResult {
+    bool ok;
+    std::string error_msg;
+    std::string base_name;
+    std::unique_ptr<std::vector<uint8_t>> code;
+    uintptr_t length;
+  };
+
+  // Compile the assembly file from_file to a binary file to_file. Returns true on success.
+  bool Assemble(const char* from_file, const char* to_file, std::string* error_msg) {
+    bool have_assembler = FileExists(FindTool(assembler_cmd_name_));
+    EXPECT_TRUE(have_assembler) << "Cannot find assembler:" << GetAssemblerCommand();
+    if (!have_assembler) {
+      return false;
+    }
+
+    std::vector<std::string> args;
+
+    // Encaspulate the whole command line in a single string passed to
+    // the shell, so that GetAssemblerCommand() may contain arguments
+    // in addition to the program name.
+    args.push_back(GetAssemblerCommand());
+    args.push_back("-o");
+    args.push_back(to_file);
+    args.push_back(from_file);
+    std::string cmd = Join(args, ' ');
+
+    args.clear();
+    args.push_back("/bin/sh");
+    args.push_back("-c");
+    args.push_back(cmd);
+
+    bool success = Exec(args, error_msg);
+    if (!success) {
+      LOG(INFO) << "Assembler command line:";
+      for (std::string arg : args) {
+        LOG(INFO) << arg;
+      }
+    }
+    return success;
+  }
+
+  // Runs objdump -h on the binary file and extracts the first line with .text.
+  // Returns "" on failure.
+  std::string Objdump(std::string file) {
+    bool have_objdump = FileExists(FindTool(objdump_cmd_name_));
+    EXPECT_TRUE(have_objdump) << "Cannot find objdump: " << GetObjdumpCommand();
+    if (!have_objdump) {
+      return "";
+    }
+
+    std::string error_msg;
+    std::vector<std::string> args;
+
+    // Encaspulate the whole command line in a single string passed to
+    // the shell, so that GetObjdumpCommand() may contain arguments
+    // in addition to the program name.
+    args.push_back(GetObjdumpCommand());
+    args.push_back(file);
+    args.push_back(">");
+    args.push_back(file+".dump");
+    std::string cmd = Join(args, ' ');
+
+    args.clear();
+    args.push_back("/bin/sh");
+    args.push_back("-c");
+    args.push_back(cmd);
+
+    if (!Exec(args, &error_msg)) {
+      EXPECT_TRUE(false) << error_msg;
+    }
+
+    std::ifstream dump(file+".dump");
+
+    std::string line;
+    bool found = false;
+    while (std::getline(dump, line)) {
+      if (line.find(".text") != line.npos) {
+        found = true;
+        break;
+      }
+    }
+
+    dump.close();
+
+    if (found) {
+      return line;
+    } else {
+      return "";
+    }
+  }
+
+  // Disassemble both binaries and compare the text.
+  bool DisassembleBinaries(const std::vector<uint8_t>& data, const std::vector<uint8_t>& as,
+                           std::string test_name) {
+    std::string disassembler = GetDisassembleCommand();
+    if (disassembler.length() == 0) {
+      LOG(WARNING) << "No dissassembler command.";
+      return false;
+    }
+
+    std::string data_name = WriteToFile(data, test_name + ".ass");
+    std::string error_msg;
+    if (!DisassembleBinary(data_name, &error_msg)) {
+      LOG(INFO) << "Error disassembling: " << error_msg;
+      std::remove(data_name.c_str());
+      return false;
+    }
+
+    std::string as_name = WriteToFile(as, test_name + ".gcc");
+    if (!DisassembleBinary(as_name, &error_msg)) {
+      LOG(INFO) << "Error disassembling: " << error_msg;
+      std::remove(data_name.c_str());
+      std::remove((data_name + ".dis").c_str());
+      std::remove(as_name.c_str());
+      return false;
+    }
+
+    bool result = CompareFiles(data_name + ".dis", as_name + ".dis");
+
+    if (!kKeepDisassembledFiles) {
+      std::remove(data_name.c_str());
+      std::remove(as_name.c_str());
+      std::remove((data_name + ".dis").c_str());
+      std::remove((as_name + ".dis").c_str());
+    }
+
+    return result;
+  }
+
+  bool DisassembleBinary(std::string file, std::string* error_msg) {
+    std::vector<std::string> args;
+
+    // Encaspulate the whole command line in a single string passed to
+    // the shell, so that GetDisassembleCommand() may contain arguments
+    // in addition to the program name.
+    args.push_back(GetDisassembleCommand());
+    args.push_back(file);
+    args.push_back("| sed -n \'/<.data>/,$p\' | sed -e \'s/.*://\'");
+    args.push_back(">");
+    args.push_back(file+".dis");
+    std::string cmd = Join(args, ' ');
+
+    args.clear();
+    args.push_back("/bin/sh");
+    args.push_back("-c");
+    args.push_back(cmd);
+
+    return Exec(args, error_msg);
+  }
+
+  std::string WriteToFile(const std::vector<uint8_t>& buffer, std::string test_name) {
+    std::string file_name = GetTmpnam() + std::string("---") + test_name;
+    const char* data = reinterpret_cast<const char*>(buffer.data());
+    std::ofstream s_out(file_name + ".o");
+    s_out.write(data, buffer.size());
+    s_out.close();
+    return file_name + ".o";
+  }
+
+  bool CompareFiles(std::string f1, std::string f2) {
+    std::ifstream f1_in(f1);
+    std::ifstream f2_in(f2);
+
+    bool result = std::equal(std::istreambuf_iterator<char>(f1_in),
+                             std::istreambuf_iterator<char>(),
+                             std::istreambuf_iterator<char>(f2_in));
+
+    f1_in.close();
+    f2_in.close();
+
+    return result;
+  }
+
+  // Compile the given assembly code and extract the binary, if possible. Put result into res.
+  bool Compile(std::string assembly_code, NativeAssemblerResult* res, std::string test_name) {
+    res->ok = false;
+    res->code.reset(nullptr);
+
+    res->base_name = GetTmpnam() + std::string("---") + test_name;
+
+    // TODO: Lots of error checking.
+
+    std::ofstream s_out(res->base_name + ".S");
+    if (asm_header_ != nullptr) {
+      s_out << asm_header_;
+    }
+    s_out << assembly_code;
+    s_out.close();
+
+    if (!Assemble((res->base_name + ".S").c_str(), (res->base_name + ".o").c_str(),
+                  &res->error_msg)) {
+      res->error_msg = "Could not compile.";
+      return false;
+    }
+
+    std::string odump = Objdump(res->base_name + ".o");
+    if (odump.length() == 0) {
+      res->error_msg = "Objdump failed.";
+      return false;
+    }
+
+    std::istringstream iss(odump);
+    std::istream_iterator<std::string> start(iss);
+    std::istream_iterator<std::string> end;
+    std::vector<std::string> tokens(start, end);
+
+    if (tokens.size() < OBJDUMP_SECTION_LINE_MIN_TOKENS) {
+      res->error_msg = "Objdump output not recognized: too few tokens.";
+      return false;
+    }
+
+    if (tokens[1] != ".text") {
+      res->error_msg = "Objdump output not recognized: .text not second token.";
+      return false;
+    }
+
+    std::string lengthToken = "0x" + tokens[2];
+    std::istringstream(lengthToken) >> std::hex >> res->length;
+
+    std::string offsetToken = "0x" + tokens[5];
+    uintptr_t offset;
+    std::istringstream(offsetToken) >> std::hex >> offset;
+
+    std::ifstream obj(res->base_name + ".o");
+    obj.seekg(offset);
+    res->code.reset(new std::vector<uint8_t>(res->length));
+    obj.read(reinterpret_cast<char*>(&(*res->code)[0]), res->length);
+    obj.close();
+
+    res->ok = true;
+    return true;
+  }
+
+  // Remove temporary files.
+  void Clean(const NativeAssemblerResult* res) {
+    std::remove((res->base_name + ".S").c_str());
+    std::remove((res->base_name + ".o").c_str());
+    std::remove((res->base_name + ".o.dump").c_str());
+  }
+
+  // Check whether file exists. Is used for commands, so strips off any parameters: anything after
+  // the first space. We skip to the last slash for this, so it should work with directories with
+  // spaces.
+  static bool FileExists(std::string file) {
+    if (file.length() == 0) {
+      return false;
+    }
+
+    // Need to strip any options.
+    size_t last_slash = file.find_last_of('/');
+    if (last_slash == std::string::npos) {
+      // No slash, start looking at the start.
+      last_slash = 0;
+    }
+    size_t space_index = file.find(' ', last_slash);
+
+    if (space_index == std::string::npos) {
+      std::ifstream infile(file.c_str());
+      return infile.good();
+    } else {
+      std::string copy = file.substr(0, space_index - 1);
+
+      struct stat buf;
+      return stat(copy.c_str(), &buf) == 0;
+    }
+  }
+
+  static std::string GetGCCRootPath() {
+    return "prebuilts/gcc/linux-x86";
+  }
+
+  static std::string GetRootPath() {
+    // 1) Check ANDROID_BUILD_TOP
+    char* build_top = getenv("ANDROID_BUILD_TOP");
+    if (build_top != nullptr) {
+      return std::string(build_top) + "/";
+    }
+
+    // 2) Do cwd
+    char temp[1024];
+    return getcwd(temp, 1024) ? std::string(temp) + "/" : std::string("");
+  }
+
+  std::string FindTool(std::string tool_name) {
+    // Find the current tool. Wild-card pattern is "arch-string*tool-name".
+    std::string gcc_path = GetRootPath() + GetGCCRootPath();
+    std::vector<std::string> args;
+    args.push_back("find");
+    args.push_back(gcc_path);
+    args.push_back("-name");
+    args.push_back(architecture_string_ + "*" + tool_name);
+    args.push_back("|");
+    args.push_back("sort");
+    args.push_back("|");
+    args.push_back("tail");
+    args.push_back("-n");
+    args.push_back("1");
+    std::string tmp_file = GetTmpnam();
+    args.push_back(">");
+    args.push_back(tmp_file);
+    std::string sh_args = Join(args, ' ');
+
+    args.clear();
+    args.push_back("/bin/sh");
+    args.push_back("-c");
+    args.push_back(sh_args);
+
+    std::string error_msg;
+    if (!Exec(args, &error_msg)) {
+      EXPECT_TRUE(false) << error_msg;
+      return "";
+    }
+
+    std::ifstream in(tmp_file.c_str());
+    std::string line;
+    if (!std::getline(in, line)) {
+      in.close();
+      std::remove(tmp_file.c_str());
+      return "";
+    }
+    in.close();
+    std::remove(tmp_file.c_str());
+    return line;
+  }
+
+  // Use a consistent tmpnam, so store it.
+  std::string GetTmpnam() {
+    if (tmpnam_.length() == 0) {
+      ScratchFile tmp;
+      tmpnam_ = tmp.GetFilename() + "asm";
+    }
+    return tmpnam_;
+  }
+
+  static constexpr size_t OBJDUMP_SECTION_LINE_MIN_TOKENS = 6;
+
+  std::string architecture_string_;
+  const char* asm_header_;
+
+  std::string assembler_cmd_name_;
+  std::string assembler_parameters_;
+
+  std::string objdump_cmd_name_;
+  std::string objdump_parameters_;
+
+  std::string disassembler_cmd_name_;
+  std::string disassembler_parameters_;
+
+  std::string resolved_assembler_cmd_;
+  std::string resolved_objdump_cmd_;
+  std::string resolved_disassemble_cmd_;
+
+  std::string android_data_;
+
+  DISALLOW_COPY_AND_ASSIGN(AssemblerTestInfrastructure);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_ASSEMBLER_TEST_BASE_H_
diff --git a/compiler/utils/growable_array.h b/compiler/utils/growable_array.h
index 821e28b..e4b1e7d 100644
--- a/compiler/utils/growable_array.h
+++ b/compiler/utils/growable_array.h
@@ -46,6 +46,14 @@
       }
     }
 
+    bool Contains(T value) const {
+      for (size_t i = 0; i < num_used_; ++i) {
+        if (elem_list_[i] == value) {
+          return true;
+        }
+      }
+      return false;
+    }
 
     // Expand the list size to at least new length.
     void Resize(size_t new_length) {
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 2a3a346..b764095 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1217,9 +1217,9 @@
       if (!UseSwap(image_, dex_files_)) {
         close(swap_fd_);
         swap_fd_ = -1;
-        LOG(INFO) << "Decided to run without swap.";
+        VLOG(compiler) << "Decided to run without swap.";
       } else {
-        LOG(INFO) << "Accepted running with swap.";
+        LOG(INFO) << "Large app, accepted running with swap.";
       }
     }
     // Note that dex2oat won't close the swap_fd_. The compiler driver's swap space will do that.
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 8f6162f..599c22a 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -669,6 +669,18 @@
 END art_quick_aput_obj
 
 // Macro to facilitate adding new allocation entrypoints.
+.macro ONE_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r1, r2  @ save callee saves in case of GC
+    mov    r1, r9                     @ pass Thread::Current
+    bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    \return
+END \name
+.endm
+
+// Macro to facilitate adding new allocation entrypoints.
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
@@ -693,10 +705,9 @@
 END \name
 .endm
 
-TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-
-TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
     /*
      * Called by managed code to resolve a static field and load a non-wide value.
@@ -805,11 +816,10 @@
 
     /*
      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
-     * exception on error. On success the String is returned. R0 holds the referring method,
-     * R1 holds the string index. The fast path check for hit in strings cache has already been
-     * performed.
+     * exception on error. On success the String is returned. R0 holds the string index. The fast
+     * path check for hit in strings cache has already been performed.
      */
-TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALL_ALLOC_ENTRYPOINTS
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index cbd4b7c..1e78877 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1261,6 +1261,18 @@
 END art_quick_aput_obj
 
 // Macro to facilitate adding new allocation entrypoints.
+.macro ONE_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    mov    x1, xSELF                  // pass Thread::Current
+    bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    \return
+END \name
+.endm
+
+// Macro to facilitate adding new allocation entrypoints.
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
@@ -1339,10 +1351,10 @@
      * initializer and deliver the exception on error. On success the static storage base is
      * returned.
      */
-TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
-TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
 ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
 ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
@@ -1386,11 +1398,10 @@
 
     /*
      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
-     * exception on error. On success the String is returned. x0 holds the referring method,
-     * w1 holds the string index. The fast path check for hit in strings cache has already been
-     * performed.
+     * exception on error. On success the String is returned. w0 holds the string index. The fast
+     * path check for hit in strings cache has already been performed.
      */
-TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALL_ALLOC_ENTRYPOINTS
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 622c48f..356a145 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -982,6 +982,16 @@
     RETURN_IF_ZERO
 END art_quick_set_obj_instance
 
+.macro ONE_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC
+    jal     \entrypoint
+    move    $a1, rSELF                # pass Thread::Current
+    \return
+END \name
+.endm
+
 // Macro to facilitate adding new allocation entrypoints.
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
@@ -1008,29 +1018,28 @@
 
     /*
      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
-     * exception on error. On success the String is returned. R0 holds the referring method,
-     * R1 holds the string index. The fast path check for hit in strings cache has already been
-     * performed.
+     * exception on error. On success the String is returned. A0 holds the string index. The fast
+     * path check for hit in strings cache has already been performed.
      */
-TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
     /*
      * Entry from managed code when uninitialized static storage, this stub will run the class
      * initializer and deliver the exception on error. On success the static storage base is
      * returned.
      */
-TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
     /*
      * Entry from managed code when dex cache misses for a type_idx.
      */
-TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
     /*
      * Entry from managed code when type_idx needs to be checked for access and dex cache may also
      * miss.
      */
-TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
     /*
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index bf18dd5..f867aa8 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -945,45 +945,6 @@
 END art_quick_aput_obj
 
     /*
-     * Entry from managed code when uninitialized static storage, this stub will run the class
-     * initializer and deliver the exception on error. On success the static storage base is
-     * returned.
-     */
-    .extern artInitializeStaticStorageFromCode
-ENTRY art_quick_initialize_static_storage
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME           # save callee saves in case of GC
-    # artInitializeStaticStorageFromCode(uint32_t type_idx, Method* referrer, Thread*)
-    jal     artInitializeStaticStorageFromCode
-    move    $a2, rSELF                          # pass Thread::Current
-    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_initialize_static_storage
-
-    /*
-     * Entry from managed code when dex cache misses for a type_idx.
-     */
-    .extern artInitializeTypeFromCode
-ENTRY art_quick_initialize_type
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME          # save callee saves in case of GC
-    # artInitializeTypeFromCode(uint32_t type_idx, Method* referrer, Thread*)
-    jal     artInitializeTypeFromCode
-    move    $a2, rSELF                         # pass Thread::Current
-    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_initialize_type
-
-    /*
-     * Entry from managed code when type_idx needs to be checked for access and dex cache may also
-     * miss.
-     */
-    .extern artInitializeTypeAndVerifyAccessFromCode
-ENTRY art_quick_initialize_type_and_verify_access
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME          # save callee saves in case of GC
-    # artInitializeTypeFromCode(uint32_t type_idx, Method* referrer, Thread*)
-    jal     artInitializeTypeAndVerifyAccessFromCode
-    move    $a2, rSELF                         # pass Thread::Current
-    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_initialize_type_and_verify_access
-
-    /*
      * Called by managed code to resolve a static field and load a boolean primitive value.
      */
     .extern artGetBooleanStaticFromCode
@@ -1272,20 +1233,16 @@
     RETURN_IF_ZERO
 END art_quick_set_obj_instance
 
-    /*
-     * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
-     * exception on error. On success the String is returned. R0 holds the referring method,
-     * R1 holds the string index. The fast path check for hit in strings cache has already been
-     * performed.
-     */
-    .extern artResolveStringFromCode
-ENTRY art_quick_resolve_string
+// Macro to facilitate adding new allocation entrypoints.
+.macro ONE_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    # artResolveStringFromCode(Method* referrer, uint32_t string_idx, Thread*, $sp)
-    jal     artResolveStringFromCode
-    move    $a2, rSELF                 # pass Thread::Current
-    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_resolve_string
+    jal     \entrypoint
+    move    $a1, rSELF                 # pass Thread::Current
+    \return
+END \name
+.endm
 
 // Macro to facilitate adding new allocation entrypoints.
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
@@ -1312,6 +1269,31 @@
 GENERATE_ALL_ALLOC_ENTRYPOINTS
 
     /*
+     * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
+     * exception on error. On success the String is returned. A0 holds the string index. The fast
+     * path check for hit in strings cache has already been performed.
+     */
+ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+    /*
+     * Entry from managed code when uninitialized static storage, this stub will run the class
+     * initializer and deliver the exception on error. On success the static storage base is
+     * returned.
+     */
+ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+    /*
+     * Entry from managed code when dex cache misses for a type_idx.
+     */
+ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+    /*
+     * Entry from managed code when type_idx needs to be checked for access and dex cache may also
+     * miss.
+     */
+ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+    /*
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
      */
     .extern artTestSuspendFromCode
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index c5d8b8f..55e3dff 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -910,10 +910,10 @@
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
 
-TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
-TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
-TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO
-TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO
 
 TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
 
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 8185deb..570624c 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -980,10 +980,10 @@
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
 
-TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
-TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
-TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO
-TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO
 
 TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
 
diff --git a/runtime/barrier.cc b/runtime/barrier.cc
index 66ee870..f80a65f 100644
--- a/runtime/barrier.cc
+++ b/runtime/barrier.cc
@@ -86,7 +86,7 @@
 }
 
 Barrier::~Barrier() {
-  CHECK(!count_) << "Attempted to destroy barrier with non zero count";
+  CHECK_EQ(count_, 0) << "Attempted to destroy barrier with non zero count";
 }
 
 }  // namespace art
diff --git a/runtime/base/casts.h b/runtime/base/casts.h
index c7e39a2..f884649 100644
--- a/runtime/base/casts.h
+++ b/runtime/base/casts.h
@@ -18,9 +18,11 @@
 #define ART_RUNTIME_BASE_CASTS_H_
 
 #include <assert.h>
+#include <limits>
 #include <string.h>
 #include <type_traits>
 
+#include "base/logging.h"
 #include "base/macros.h"
 
 namespace art {
@@ -83,6 +85,23 @@
   return dest;
 }
 
+// A version of static_cast that DCHECKs that the value can be precisely represented
+// when converting to Dest.
+template <typename Dest, typename Source>
+inline Dest dchecked_integral_cast(const Source source) {
+  DCHECK(
+      // Check that the value is within the lower limit of Dest.
+      (static_cast<intmax_t>(std::numeric_limits<Dest>::min()) <=
+          static_cast<intmax_t>(std::numeric_limits<Source>::min()) ||
+          source >= static_cast<Source>(std::numeric_limits<Dest>::min())) &&
+      // Check that the value is within the upper limit of Dest.
+      (static_cast<uintmax_t>(std::numeric_limits<Dest>::max()) >=
+          static_cast<uintmax_t>(std::numeric_limits<Source>::max()) ||
+          source <= static_cast<Source>(std::numeric_limits<Dest>::max())));
+
+  return static_cast<Dest>(source);
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_CASTS_H_
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index dc8bf2a..8a0c315 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -31,6 +31,7 @@
 #include "base/scoped_flock.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
+#include "base/value_object.h"
 #include "class_linker-inl.h"
 #include "compiler_callbacks.h"
 #include "debugger.h"
@@ -81,6 +82,10 @@
 
 static constexpr bool kSanityCheckObjects = kIsDebugBuild;
 
+// Do a simple class redefinition check in OpenDexFilesFromOat. This is a conservative check to
+// avoid problems with compile-time class-path != runtime class-path.
+static constexpr bool kCheckForDexCollisions = true;
+
 static void ThrowNoClassDefFoundError(const char* fmt, ...)
     __attribute__((__format__(__printf__, 1, 2)))
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -712,6 +717,186 @@
   return *oat_file;
 }
 
+class DexFileAndClassPair : ValueObject {
+ public:
+  DexFileAndClassPair(const DexFile* dex_file, size_t current_class_index, bool from_loaded_oat)
+     : cached_descriptor_(GetClassDescriptor(dex_file, current_class_index)),
+       dex_file_(dex_file),
+       current_class_index_(current_class_index),
+       from_loaded_oat_(from_loaded_oat) {}
+
+  DexFileAndClassPair(const DexFileAndClassPair&) = default;
+
+  DexFileAndClassPair& operator=(const DexFileAndClassPair& rhs) {
+    cached_descriptor_ = rhs.cached_descriptor_;
+    dex_file_ = rhs.dex_file_;
+    current_class_index_ = rhs.current_class_index_;
+    from_loaded_oat_ = rhs.from_loaded_oat_;
+    return *this;
+  }
+
+  const char* GetCachedDescriptor() const {
+    return cached_descriptor_;
+  }
+
+  bool operator<(const DexFileAndClassPair& rhs) const {
+    const char* lhsDescriptor = cached_descriptor_;
+    const char* rhsDescriptor = rhs.cached_descriptor_;
+    int cmp = strcmp(lhsDescriptor, rhsDescriptor);
+    if (cmp != 0) {
+      return cmp > 0;
+    }
+    return dex_file_ < rhs.dex_file_;
+  }
+
+  bool DexFileHasMoreClasses() const {
+    return current_class_index_ + 1 < dex_file_->NumClassDefs();
+  }
+
+  DexFileAndClassPair GetNext() const {
+    return DexFileAndClassPair(dex_file_, current_class_index_ + 1, from_loaded_oat_);
+  }
+
+  size_t GetCurrentClassIndex() const {
+    return current_class_index_;
+  }
+
+  bool FromLoadedOat() const {
+    return from_loaded_oat_;
+  }
+
+  const DexFile* GetDexFile() const {
+    return dex_file_;
+  }
+
+ private:
+  static const char* GetClassDescriptor(const DexFile* dex_file, size_t index) {
+    const DexFile::ClassDef& class_def = dex_file->GetClassDef(static_cast<uint16_t>(index));
+    return dex_file->StringByTypeIdx(class_def.class_idx_);
+  }
+
+  const char* cached_descriptor_;
+  const DexFile* dex_file_;
+  size_t current_class_index_;
+  bool from_loaded_oat_;  // We only need to compare mismatches between what we load now
+                          // and what was loaded before. Any old duplicates must have been
+                          // OK, and any new "internal" duplicates are as well (they must
+                          // be from multidex, which resolves correctly).
+};
+
+static void AddDexFilesFromOat(const OatFile* oat_file, bool already_loaded,
+                               std::priority_queue<DexFileAndClassPair>* heap) {
+  const std::vector<const OatDexFile*>& oat_dex_files = oat_file->GetOatDexFiles();
+  for (const OatDexFile* oat_dex_file : oat_dex_files) {
+    std::string error;
+    std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error);
+    if (dex_file.get() == nullptr) {
+      LOG(WARNING) << "Could not create dex file from oat file: " << error;
+    } else {
+      if (dex_file->NumClassDefs() > 0U) {
+        heap->emplace(dex_file.release(), 0U, already_loaded);
+      }
+    }
+  }
+}
+
+static void AddNext(const DexFileAndClassPair& original,
+                    std::priority_queue<DexFileAndClassPair>* heap) {
+  if (original.DexFileHasMoreClasses()) {
+    heap->push(original.GetNext());
+  } else {
+    // Need to delete the dex file.
+    delete original.GetDexFile();
+  }
+}
+
+static void FreeDexFilesInHeap(std::priority_queue<DexFileAndClassPair>* heap) {
+  while (!heap->empty()) {
+    delete heap->top().GetDexFile();
+    heap->pop();
+  }
+}
+
+// Check for class-def collisions in dex files.
+//
+// This works by maintaining a heap with one class from each dex file, sorted by the class
+// descriptor. Then a dex-file/class pair is continually removed from the heap and compared
+// against the following top element. If the descriptor is the same, it is now checked whether
+// the two elements agree on whether their dex file was from an already-loaded oat-file or the
+// new oat file. Any disagreement indicates a collision.
+bool ClassLinker::HasCollisions(const OatFile* oat_file, std::string* error_msg) {
+  if (!kCheckForDexCollisions) {
+    return false;
+  }
+
+  // Dex files are registered late - once a class is actually being loaded. We have to compare
+  // against the open oat files.
+  ReaderMutexLock mu(Thread::Current(), dex_lock_);
+
+  std::priority_queue<DexFileAndClassPair> heap;
+
+  // Add dex files from already loaded oat files, but skip boot.
+  {
+    // To grab the boot oat, look at the dex files in the boot classpath.
+    const OatFile* boot_oat = nullptr;
+    if (!boot_class_path_.empty()) {
+      const DexFile* boot_dex_file = boot_class_path_[0];
+      // Is it from an oat file?
+      if (boot_dex_file->GetOatDexFile() != nullptr) {
+        boot_oat = boot_dex_file->GetOatDexFile()->GetOatFile();
+      }
+    }
+
+    for (const OatFile* loaded_oat_file : oat_files_) {
+      if (loaded_oat_file == boot_oat) {
+        continue;
+      }
+      AddDexFilesFromOat(loaded_oat_file, true, &heap);
+    }
+  }
+
+  if (heap.empty()) {
+    // No other oat files, return early.
+    return false;
+  }
+
+  // Add dex files from the oat file to check.
+  AddDexFilesFromOat(oat_file, false, &heap);
+
+  // Now drain the heap.
+  while (!heap.empty()) {
+    DexFileAndClassPair compare_pop = heap.top();
+    heap.pop();
+
+    // Compare against the following elements.
+    while (!heap.empty()) {
+      DexFileAndClassPair top = heap.top();
+
+      if (strcmp(compare_pop.GetCachedDescriptor(), top.GetCachedDescriptor()) == 0) {
+        // Same descriptor. Check whether it's crossing old-oat-files to new-oat-files.
+        if (compare_pop.FromLoadedOat() != top.FromLoadedOat()) {
+          *error_msg =
+              StringPrintf("Found duplicated class when checking oat files: '%s' in %s and %s",
+                           compare_pop.GetCachedDescriptor(),
+                           compare_pop.GetDexFile()->GetLocation().c_str(),
+                           top.GetDexFile()->GetLocation().c_str());
+          FreeDexFilesInHeap(&heap);
+          return true;
+        }
+        // Pop it.
+        heap.pop();
+        AddNext(top, &heap);
+      } else {
+        // Something else. Done here.
+        break;
+      }
+    }
+    AddNext(compare_pop, &heap);
+  }
+
+  return false;
+}
+
 std::vector<std::unique_ptr<const DexFile>> ClassLinker::OpenDexFilesFromOat(
     const char* dex_location, const char* oat_location,
     std::vector<std::string>* error_msgs) {
@@ -757,8 +942,20 @@
     // Get the oat file on disk.
     std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
     if (oat_file.get() != nullptr) {
-      source_oat_file = oat_file.release();
-      RegisterOatFile(source_oat_file);
+      // Take the file only if it has no collisions.
+      if (!HasCollisions(oat_file.get(), &error_msg)) {
+        source_oat_file = oat_file.release();
+        RegisterOatFile(source_oat_file);
+      } else {
+        if (Runtime::Current()->IsDexFileFallbackEnabled()) {
+          LOG(WARNING) << "Found duplicate classes, falling back to interpreter mode for "
+                       << dex_location;
+        } else {
+          LOG(WARNING) << "Found duplicate classes, dex-file-fallback disabled, will be failing to "
+                          " load classes for " << dex_location;
+        }
+        LOG(WARNING) << error_msg;
+      }
     }
   }
 
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 1bd9f0a..57989b2 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -663,6 +663,9 @@
   //       a recreation with a custom string.
   void ThrowEarlierClassFailure(mirror::Class* c) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Check for duplicate class definitions of the given oat file against all open oat files.
+  bool HasCollisions(const OatFile* oat_file, std::string* error_msg) LOCKS_EXCLUDED(dex_lock_);
+
   std::vector<const DexFile*> boot_class_path_;
   std::vector<std::unique_ptr<const DexFile>> opened_dex_files_;
 
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 2603975..a66c38e 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -944,7 +944,7 @@
         uint32_t type_idx = DecodeUnsignedLeb128(&ptr_);
         if (type_idx != 0) {
           type_idx--;
-          if (!CheckIndex(type_idx, header_->string_ids_size_, "DBG_START_LOCAL type_idx")) {
+          if (!CheckIndex(type_idx, header_->type_ids_size_, "DBG_START_LOCAL type_idx")) {
             return false;
           }
         }
@@ -975,7 +975,7 @@
         uint32_t type_idx = DecodeUnsignedLeb128(&ptr_);
         if (type_idx != 0) {
           type_idx--;
-          if (!CheckIndex(type_idx, header_->string_ids_size_, "DBG_START_LOCAL_EXTENDED type_idx")) {
+          if (!CheckIndex(type_idx, header_->type_ids_size_, "DBG_START_LOCAL_EXTENDED type_idx")) {
             return false;
           }
         }
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index 95a47cc..9f1ffec 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -200,11 +200,11 @@
   return dex_file;
 }
 
-static bool ModifyAndLoad(const char* location, size_t offset, uint8_t new_val,
-                                    std::string* error_msg) {
+static bool ModifyAndLoad(const char* dex_file_content, const char* location, size_t offset,
+                          uint8_t new_val, std::string* error_msg) {
   // Decode base64.
   size_t length;
-  std::unique_ptr<uint8_t[]> dex_bytes(DecodeBase64(kGoodTestDex, &length));
+  std::unique_ptr<uint8_t[]> dex_bytes(DecodeBase64(dex_file_content, &length));
   CHECK(dex_bytes.get() != nullptr);
 
   // Make modifications.
@@ -221,7 +221,7 @@
     // Class error.
     ScratchFile tmp;
     std::string error_msg;
-    bool success = !ModifyAndLoad(tmp.GetFilename().c_str(), 220, 0xFFU, &error_msg);
+    bool success = !ModifyAndLoad(kGoodTestDex, tmp.GetFilename().c_str(), 220, 0xFFU, &error_msg);
     ASSERT_TRUE(success);
     ASSERT_NE(error_msg.find("inter_method_id_item class_idx"), std::string::npos) << error_msg;
   }
@@ -230,7 +230,7 @@
     // Proto error.
     ScratchFile tmp;
     std::string error_msg;
-    bool success = !ModifyAndLoad(tmp.GetFilename().c_str(), 222, 0xFFU, &error_msg);
+    bool success = !ModifyAndLoad(kGoodTestDex, tmp.GetFilename().c_str(), 222, 0xFFU, &error_msg);
     ASSERT_TRUE(success);
     ASSERT_NE(error_msg.find("inter_method_id_item proto_idx"), std::string::npos) << error_msg;
   }
@@ -239,10 +239,81 @@
     // Name error.
     ScratchFile tmp;
     std::string error_msg;
-    bool success = !ModifyAndLoad(tmp.GetFilename().c_str(), 224, 0xFFU, &error_msg);
+    bool success = !ModifyAndLoad(kGoodTestDex, tmp.GetFilename().c_str(), 224, 0xFFU, &error_msg);
     ASSERT_TRUE(success);
     ASSERT_NE(error_msg.find("inter_method_id_item name_idx"), std::string::npos) << error_msg;
   }
 }
 
+// Generated from:
+//
+// .class public LTest;
+// .super Ljava/lang/Object;
+// .source "Test.java"
+//
+// .method public constructor <init>()V
+//     .registers 1
+//
+//     .prologue
+//     .line 1
+//     invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+//
+//     return-void
+// .end method
+//
+// .method public static main()V
+//     .registers 2
+//
+//     const-string v0, "a"
+//     const-string v0, "b"
+//     const-string v0, "c"
+//     const-string v0, "d"
+//     const-string v0, "e"
+//     const-string v0, "f"
+//     const-string v0, "g"
+//     const-string v0, "h"
+//     const-string v0, "i"
+//     const-string v0, "j"
+//     const-string v0, "k"
+//
+//     .local v1, "local_var":Ljava/lang/String;
+//     const-string v1, "test"
+// .end method
+
+static const char kDebugInfoTestDex[] =
+    "ZGV4CjAzNQCHRkHix2eIMQgvLD/0VGrlllZLo0Rb6VyUAgAAcAAAAHhWNBIAAAAAAAAAAAwCAAAU"
+    "AAAAcAAAAAQAAADAAAAAAQAAANAAAAAAAAAAAAAAAAMAAADcAAAAAQAAAPQAAACAAQAAFAEAABQB"
+    "AAAcAQAAJAEAADgBAABMAQAAVwEAAFoBAABdAQAAYAEAAGMBAABmAQAAaQEAAGwBAABvAQAAcgEA"
+    "AHUBAAB4AQAAewEAAIYBAACMAQAAAQAAAAIAAAADAAAABQAAAAUAAAADAAAAAAAAAAAAAAAAAAAA"
+    "AAAAABIAAAABAAAAAAAAAAAAAAABAAAAAQAAAAAAAAAEAAAAAAAAAPwBAAAAAAAABjxpbml0PgAG"
+    "TFRlc3Q7ABJMamF2YS9sYW5nL09iamVjdDsAEkxqYXZhL2xhbmcvU3RyaW5nOwAJVGVzdC5qYXZh"
+    "AAFWAAFhAAFiAAFjAAFkAAFlAAFmAAFnAAFoAAFpAAFqAAFrAAlsb2NhbF92YXIABG1haW4ABHRl"
+    "c3QAAAABAAcOAAAAARYDARIDAAAAAQABAAEAAACUAQAABAAAAHAQAgAAAA4AAgAAAAAAAACZAQAA"
+    "GAAAABoABgAaAAcAGgAIABoACQAaAAoAGgALABoADAAaAA0AGgAOABoADwAaABAAGgETAAAAAgAA"
+    "gYAEpAMBCbwDAAALAAAAAAAAAAEAAAAAAAAAAQAAABQAAABwAAAAAgAAAAQAAADAAAAAAwAAAAEA"
+    "AADQAAAABQAAAAMAAADcAAAABgAAAAEAAAD0AAAAAiAAABQAAAAUAQAAAyAAAAIAAACUAQAAASAA"
+    "AAIAAACkAQAAACAAAAEAAAD8AQAAABAAAAEAAAAMAgAA";
+
+TEST_F(DexFileVerifierTest, DebugInfoTypeIdxTest) {
+  {
+    // The input dex file should be good before modification.
+    ScratchFile tmp;
+    std::string error_msg;
+    std::unique_ptr<const DexFile> raw(OpenDexFileBase64(kDebugInfoTestDex,
+                                                         tmp.GetFilename().c_str(),
+                                                         &error_msg));
+    ASSERT_TRUE(raw.get() != nullptr) << error_msg;
+  }
+
+  {
+    // Modify the debug information entry.
+    ScratchFile tmp;
+    std::string error_msg;
+    bool success = !ModifyAndLoad(kDebugInfoTestDex, tmp.GetFilename().c_str(), 416, 0x14U,
+                                  &error_msg);
+    ASSERT_TRUE(success);
+    ASSERT_NE(error_msg.find("DBG_START_LOCAL type_idx"), std::string::npos) << error_msg;
+  }
+}
+
 }  // namespace art
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 64b7ecd..9292cff 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -22,6 +22,8 @@
 #include "class_linker-inl.h"
 #include "common_throws.h"
 #include "dex_file.h"
+#include "entrypoints/quick/callee_save_frame.h"
+#include "handle_scope-inl.h"
 #include "indirect_reference_table.h"
 #include "invoke_type.h"
 #include "jni_internal.h"
@@ -30,11 +32,31 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/throwable.h"
-#include "handle_scope-inl.h"
+#include "nth_caller_visitor.h"
+#include "runtime.h"
 #include "thread.h"
 
 namespace art {
 
+inline mirror::ArtMethod* GetCalleeSaveMethodCaller(Thread* self, Runtime::CalleeSaveType type)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  auto* refs_only_sp = self->GetManagedStack()->GetTopQuickFrame();
+  DCHECK_EQ(refs_only_sp->AsMirrorPtr(), Runtime::Current()->GetCalleeSaveMethod(type));
+
+  const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA, type);
+  auto* caller_sp = reinterpret_cast<StackReference<mirror::ArtMethod>*>(
+          reinterpret_cast<uintptr_t>(refs_only_sp) + callee_frame_size);
+  auto* caller = caller_sp->AsMirrorPtr();
+
+  if (kIsDebugBuild) {
+    NthCallerVisitor visitor(self, 1, true);
+    visitor.WalkStack();
+    CHECK(caller == visitor.caller);
+  }
+
+  return caller;
+}
+
 template <const bool kAccessCheck>
 ALWAYS_INLINE
 inline mirror::Class* CheckObjectAlloc(uint32_t type_idx,
diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h
index b7e8d50..1fd8a949a 100644
--- a/runtime/entrypoints/quick/quick_default_externs.h
+++ b/runtime/entrypoints/quick/quick_default_externs.h
@@ -34,10 +34,10 @@
 extern "C" void art_quick_check_cast(const art::mirror::Class*, const art::mirror::Class*);
 
 // DexCache entrypoints.
-extern "C" void* art_quick_initialize_static_storage(uint32_t, art::mirror::ArtMethod*);
-extern "C" void* art_quick_initialize_type(uint32_t, art::mirror::ArtMethod*);
-extern "C" void* art_quick_initialize_type_and_verify_access(uint32_t, art::mirror::ArtMethod*);
-extern "C" void* art_quick_resolve_string(uint32_t, art::mirror::ArtMethod*);
+extern "C" void* art_quick_initialize_static_storage(uint32_t);
+extern "C" void* art_quick_initialize_type(uint32_t);
+extern "C" void* art_quick_initialize_type_and_verify_access(uint32_t);
+extern "C" void* art_quick_resolve_string(uint32_t);
 
 // Field entrypoints.
 extern "C" int art_quick_set8_instance(uint32_t, void*, int8_t);
diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
index 348495d..46629f5 100644
--- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
@@ -26,41 +26,41 @@
 namespace art {
 
 extern "C" mirror::Class* artInitializeStaticStorageFromCode(uint32_t type_idx,
-                                                             mirror::ArtMethod* referrer,
                                                              Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Called to ensure static storage base is initialized for direct static field reads and writes.
   // A class may be accessing another class' fields when it doesn't have access, as access has been
   // given by inheritance.
   ScopedQuickEntrypointChecks sqec(self);
-  return ResolveVerifyAndClinit(type_idx, referrer, self, true, false);
+  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kRefsOnly);
+  return ResolveVerifyAndClinit(type_idx, caller, self, true, false);
 }
 
 extern "C" mirror::Class* artInitializeTypeFromCode(uint32_t type_idx,
-                                                    mirror::ArtMethod* referrer,
                                                     Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Called when method->dex_cache_resolved_types_[] misses.
   ScopedQuickEntrypointChecks sqec(self);
-  return ResolveVerifyAndClinit(type_idx, referrer, self, false, false);
+  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kRefsOnly);
+  return ResolveVerifyAndClinit(type_idx, caller, self, false, false);
 }
 
 extern "C" mirror::Class* artInitializeTypeAndVerifyAccessFromCode(uint32_t type_idx,
-                                                                   mirror::ArtMethod* referrer,
                                                                    Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Called when caller isn't guaranteed to have access to a type and the dex cache may be
   // unpopulated.
   ScopedQuickEntrypointChecks sqec(self);
-  return ResolveVerifyAndClinit(type_idx, referrer, self, false, true);
+  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kRefsOnly);
+  return ResolveVerifyAndClinit(type_idx, caller, self, false, true);
 }
 
 extern "C" mirror::String* artResolveStringFromCode(int32_t string_idx,
-                                                    mirror::ArtMethod* referrer,
                                                     Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  return ResolveStringFromCode(referrer, string_idx);
+  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kRefsOnly);
+  return ResolveStringFromCode(caller, string_idx);
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index eaf874e..6d9e483 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -33,10 +33,10 @@
   V(InstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*) \
   V(CheckCast, void, const mirror::Class*, const mirror::Class*) \
 \
-  V(InitializeStaticStorage, void*, uint32_t, mirror::ArtMethod*) \
-  V(InitializeTypeAndVerifyAccess, void*, uint32_t, mirror::ArtMethod*) \
-  V(InitializeType, void*, uint32_t, mirror::ArtMethod*) \
-  V(ResolveString, void*, uint32_t, mirror::ArtMethod*) \
+  V(InitializeStaticStorage, void*, uint32_t) \
+  V(InitializeTypeAndVerifyAccess, void*, uint32_t) \
+  V(InitializeType, void*, uint32_t) \
+  V(ResolveString, void*, uint32_t) \
 \
   V(Set8Instance, int, uint32_t, void*, int8_t) \
   V(Set8Static, int, uint32_t, int8_t) \
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index af01a02..1a7a3e5 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -65,6 +65,7 @@
     DEBUG_ENABLE_SAFEMODE           = 1 << 3,
     DEBUG_ENABLE_JNI_LOGGING        = 1 << 4,
     DEBUG_ENABLE_JIT                = 1 << 5,
+    DEBUG_GENERATE_CFI              = 1 << 6,
   };
 
   Runtime* const runtime = Runtime::Current();
@@ -111,6 +112,12 @@
   }
   runtime->GetJITOptions()->SetUseJIT(use_jit);
 
+  const bool generate_cfi = (debug_flags & DEBUG_GENERATE_CFI) != 0;
+  if (generate_cfi) {
+    runtime->AddCompilerOption("--include-cfi");
+    debug_flags &= ~DEBUG_GENERATE_CFI;
+  }
+
   // This is for backwards compatibility with Dalvik.
   debug_flags &= ~DEBUG_ENABLE_ASSERT;
 
@@ -145,6 +152,7 @@
   if (Trace::GetMethodTracingMode() != TracingMode::kTracingInactive) {
     Trace::TraceOutputMode output_mode = Trace::GetOutputMode();
     Trace::TraceMode trace_mode = Trace::GetMode();
+    size_t buffer_size = Trace::GetBufferSize();
 
     // Just drop it.
     Trace::Abort();
@@ -169,7 +177,7 @@
                                               proc_name.c_str());
         Trace::Start(trace_file.c_str(),
                      -1,
-                     -1,  // TODO: Expose buffer size.
+                     buffer_size,
                      0,   // TODO: Expose flags.
                      output_mode,
                      trace_mode,
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index b0d923b..48a8bc7 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -29,6 +29,7 @@
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/string-inl.h"
+#include "reflection.h"
 #include "scoped_thread_state_change.h"
 #include "scoped_fast_native_object_access.h"
 #include "ScopedLocalRef.h"
@@ -391,8 +392,8 @@
       nullptr;
 }
 
-jobjectArray Class_getDeclaredMethodsUnchecked(JNIEnv* env, jobject javaThis,
-                                               jboolean publicOnly) {
+static jobjectArray Class_getDeclaredMethodsUnchecked(JNIEnv* env, jobject javaThis,
+                                                      jboolean publicOnly) {
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<5> hs(soa.Self());
   auto* klass = DecodeClass(soa, javaThis);
@@ -457,6 +458,74 @@
   return soa.AddLocalReference<jobjectArray>(ret.Get());
 }
 
+static jobject Class_newInstance(JNIEnv* env, jobject javaThis) {
+  ScopedFastNativeObjectAccess soa(env);
+  StackHandleScope<4> hs(soa.Self());
+  auto klass = hs.NewHandle(DecodeClass(soa, javaThis));
+  if (UNLIKELY(klass->GetPrimitiveType() != 0 || klass->IsInterface() || klass->IsArrayClass() ||
+               klass->IsAbstract())) {
+    soa.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;",
+                                   "%s cannot be instantiated", PrettyClass(klass.Get()).c_str());
+    return nullptr;
+  }
+  auto caller = hs.NewHandle<mirror::Class>(nullptr);
+  // Verify that we can access the class.
+  if (!klass->IsPublic()) {
+    caller.Assign(GetCallingClass(soa.Self(), 1));
+    if (caller.Get() != nullptr && !caller->CanAccess(klass.Get())) {
+      soa.Self()->ThrowNewExceptionF(
+          "Ljava/lang/IllegalAccessException;", "%s is not accessible from %s",
+          PrettyClass(klass.Get()).c_str(), PrettyClass(caller.Get()).c_str());
+      return nullptr;
+    }
+  }
+  auto* constructor = klass->GetDeclaredConstructor(
+      soa.Self(), NullHandle<mirror::ObjectArray<mirror::Class>>());
+  if (UNLIKELY(constructor == nullptr)) {
+    soa.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;",
+                                   "%s has no zero argument constructor",
+                                   PrettyClass(klass.Get()).c_str());
+    return nullptr;
+  }
+  auto receiver = hs.NewHandle(klass->AllocObject(soa.Self()));
+  if (UNLIKELY(receiver.Get() == nullptr)) {
+    soa.Self()->AssertPendingOOMException();
+    return nullptr;
+  }
+  // Verify that we can access the constructor.
+  auto* declaring_class = constructor->GetDeclaringClass();
+  if (!constructor->IsPublic()) {
+    if (caller.Get() == nullptr) {
+      caller.Assign(GetCallingClass(soa.Self(), 1));
+    }
+    if (UNLIKELY(caller.Get() != nullptr && !VerifyAccess(
+        soa.Self(), receiver.Get(), declaring_class, constructor->GetAccessFlags(),
+        caller.Get()))) {
+      soa.Self()->ThrowNewExceptionF(
+          "Ljava/lang/IllegalAccessException;", "%s is not accessible from %s",
+          PrettyMethod(constructor).c_str(), PrettyClass(caller.Get()).c_str());
+      return nullptr;
+    }
+  }
+  // Ensure that we are initialized.
+  if (UNLIKELY(!declaring_class->IsInitialized())) {
+    if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(
+        soa.Self(), hs.NewHandle(declaring_class), true, true)) {
+      soa.Self()->AssertPendingException();
+      return nullptr;
+    }
+  }
+  // Invoke the constructor.
+  JValue result;
+  uint32_t args[1] = { static_cast<uint32_t>(reinterpret_cast<uintptr_t>(receiver.Get())) };
+  constructor->Invoke(soa.Self(), args, sizeof(args), &result, "V");
+  if (UNLIKELY(soa.Self()->IsExceptionPending())) {
+    return nullptr;
+  }
+  // Constructors are ()V methods, so we shouldn't touch the result of InvokeMethod.
+  return soa.AddLocalReference<jobject>(receiver.Get());
+}
+
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(Class, classForName,
                 "!(Ljava/lang/String;ZLjava/lang/ClassLoader;)Ljava/lang/Class;"),
@@ -474,6 +543,7 @@
   NATIVE_METHOD(Class, getNameNative, "!()Ljava/lang/String;"),
   NATIVE_METHOD(Class, getProxyInterfaces, "!()[Ljava/lang/Class;"),
   NATIVE_METHOD(Class, getPublicDeclaredFields, "!()[Ljava/lang/reflect/Field;"),
+  NATIVE_METHOD(Class, newInstance, "!()Ljava/lang/Object;"),
 };
 
 void register_java_lang_Class(JNIEnv* env) {
diff --git a/runtime/native/java_lang_reflect_Constructor.cc b/runtime/native/java_lang_reflect_Constructor.cc
index c33f81a..04d2e5e 100644
--- a/runtime/native/java_lang_reflect_Constructor.cc
+++ b/runtime/native/java_lang_reflect_Constructor.cc
@@ -29,29 +29,43 @@
 
 namespace art {
 
-static ALWAYS_INLINE inline jobject NewInstanceHelper(
-    JNIEnv* env, jobject javaMethod, jobjectArray javaArgs, size_t num_frames) {
+/*
+ * We get here through Constructor.newInstance().  The Constructor object
+ * would not be available if the constructor weren't public (per the
+ * definition of Class.getConstructor), so we can skip the method access
+ * check.  We can also safely assume the constructor isn't associated
+ * with an interface, array, or primitive class. If this is coming from
+ * native, it is OK to avoid access checks since JNI does not enforce them.
+ */
+static jobject Constructor_newInstance(JNIEnv* env, jobject javaMethod, jobjectArray javaArgs) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Method* m = soa.Decode<mirror::Method*>(javaMethod);
   StackHandleScope<1> hs(soa.Self());
   Handle<mirror::Class> c(hs.NewHandle(m->GetDeclaringClass()));
   if (UNLIKELY(c->IsAbstract())) {
-    soa.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;",
-                                   "Can't instantiate %s %s",
+    soa.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;", "Can't instantiate %s %s",
                                    c->IsInterface() ? "interface" : "abstract class",
                                    PrettyDescriptor(c.Get()).c_str());
     return nullptr;
   }
-
+  // Verify that we can access the class (only for debug since the above comment).
+  if (kIsDebugBuild && !c->IsPublic()) {
+    auto* caller = GetCallingClass(soa.Self(), 1);
+    // If caller is null, then we called from JNI, just avoid the check since JNI avoids most
+    // access checks anyways. TODO: Investigate if this the correct behavior.
+    if (caller != nullptr && !caller->CanAccess(c.Get())) {
+      soa.Self()->ThrowNewExceptionF(
+          "Ljava/lang/IllegalAccessException;", "%s is not accessible from %s",
+          PrettyClass(c.Get()).c_str(), PrettyClass(caller).c_str());
+      return nullptr;
+    }
+  }
   if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(soa.Self(), c, true, true)) {
     DCHECK(soa.Self()->IsExceptionPending());
     return nullptr;
   }
-
   bool movable = true;
-  if (!kMovingMethods && c->IsArtMethodClass()) {
-    movable = false;
-  } else if (!kMovingClasses && c->IsClassClass()) {
+  if (!kMovingClasses && c->IsClassClass()) {
     movable = false;
   }
   mirror::Object* receiver =
@@ -59,33 +73,14 @@
   if (receiver == nullptr) {
     return nullptr;
   }
-
   jobject javaReceiver = soa.AddLocalReference<jobject>(receiver);
-  InvokeMethod(soa, javaMethod, javaReceiver, javaArgs, num_frames);
-
+  InvokeMethod(soa, javaMethod, javaReceiver, javaArgs, 1);
   // Constructors are ()V methods, so we shouldn't touch the result of InvokeMethod.
   return javaReceiver;
 }
 
-/*
- * We get here through Constructor.newInstance().  The Constructor object
- * would not be available if the constructor weren't public (per the
- * definition of Class.getConstructor), so we can skip the method access
- * check.  We can also safely assume the constructor isn't associated
- * with an interface, array, or primitive class.
- */
-static jobject Constructor_newInstance(JNIEnv* env, jobject javaMethod, jobjectArray javaArgs) {
-  return NewInstanceHelper(env, javaMethod, javaArgs, 1);
-}
-
-static jobject Constructor_newInstanceTwoFrames(JNIEnv* env, jobject javaMethod,
-                                                jobjectArray javaArgs) {
-  return NewInstanceHelper(env, javaMethod, javaArgs, 2);
-}
-
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(Constructor, newInstance, "!([Ljava/lang/Object;)Ljava/lang/Object;"),
-  NATIVE_METHOD(Constructor, newInstanceTwoFrames, "!([Ljava/lang/Object;)Ljava/lang/Object;"),
 };
 
 void register_java_lang_reflect_Constructor(JNIEnv* env) {
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index e546738..a2ce0cb 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -615,11 +615,21 @@
 
   // Wrap any exception with "Ljava/lang/reflect/InvocationTargetException;" and return early.
   if (soa.Self()->IsExceptionPending()) {
+    // If we get another exception when we are trying to wrap, then just use that instead.
     jthrowable th = soa.Env()->ExceptionOccurred();
-    soa.Env()->ExceptionClear();
+    soa.Self()->ClearException();
     jclass exception_class = soa.Env()->FindClass("java/lang/reflect/InvocationTargetException");
+    if (exception_class == nullptr) {
+      soa.Self()->AssertPendingOOMException();
+      return nullptr;
+    }
     jmethodID mid = soa.Env()->GetMethodID(exception_class, "<init>", "(Ljava/lang/Throwable;)V");
+    CHECK(mid != nullptr);
     jobject exception_instance = soa.Env()->NewObject(exception_class, mid, th);
+    if (exception_instance == nullptr) {
+      soa.Self()->AssertPendingOOMException();
+      return nullptr;
+    }
     soa.Env()->Throw(reinterpret_cast<jthrowable>(exception_instance));
     return nullptr;
   }
@@ -789,40 +799,48 @@
   return UnboxPrimitive(o, dst_class, f, unboxed_value);
 }
 
-bool UnboxPrimitiveForResult(mirror::Object* o,
-                             mirror::Class* dst_class, JValue* unboxed_value) {
+bool UnboxPrimitiveForResult(mirror::Object* o, mirror::Class* dst_class, JValue* unboxed_value) {
   return UnboxPrimitive(o, dst_class, nullptr, unboxed_value);
 }
 
+mirror::Class* GetCallingClass(Thread* self, size_t num_frames) {
+  NthCallerVisitor visitor(self, num_frames);
+  visitor.WalkStack();
+  return visitor.caller != nullptr ? visitor.caller->GetDeclaringClass() : nullptr;
+}
+
 bool VerifyAccess(Thread* self, mirror::Object* obj, mirror::Class* declaring_class,
                   uint32_t access_flags, mirror::Class** calling_class, size_t num_frames) {
   if ((access_flags & kAccPublic) != 0) {
     return true;
   }
-  NthCallerVisitor visitor(self, num_frames);
-  visitor.WalkStack();
-  if (UNLIKELY(visitor.caller == nullptr)) {
+  auto* klass = GetCallingClass(self, num_frames);
+  if (UNLIKELY(klass == nullptr)) {
     // The caller is an attached native thread.
     return false;
   }
-  mirror::Class* caller_class = visitor.caller->GetDeclaringClass();
-  if (caller_class == declaring_class) {
+  *calling_class = klass;
+  return VerifyAccess(self, obj, declaring_class, access_flags, klass);
+}
+
+bool VerifyAccess(Thread* self, mirror::Object* obj, mirror::Class* declaring_class,
+                  uint32_t access_flags, mirror::Class* calling_class) {
+  if (calling_class == declaring_class) {
     return true;
   }
   ScopedAssertNoThreadSuspension sants(self, "verify-access");
-  *calling_class = caller_class;
   if ((access_flags & kAccPrivate) != 0) {
     return false;
   }
   if ((access_flags & kAccProtected) != 0) {
-    if (obj != nullptr && !obj->InstanceOf(caller_class) &&
-        !declaring_class->IsInSamePackage(caller_class)) {
+    if (obj != nullptr && !obj->InstanceOf(calling_class) &&
+        !declaring_class->IsInSamePackage(calling_class)) {
       return false;
-    } else if (declaring_class->IsAssignableFrom(caller_class)) {
+    } else if (declaring_class->IsAssignableFrom(calling_class)) {
       return true;
     }
   }
-  return declaring_class->IsInSamePackage(caller_class);
+  return declaring_class->IsInSamePackage(calling_class);
 }
 
 void InvalidReceiverError(mirror::Object* o, mirror::Class* c) {
diff --git a/runtime/reflection.h b/runtime/reflection.h
index c63f858..6305d68 100644
--- a/runtime/reflection.h
+++ b/runtime/reflection.h
@@ -77,6 +77,15 @@
                   uint32_t access_flags, mirror::Class** calling_class, size_t num_frames)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+// This version takes a known calling class.
+bool VerifyAccess(Thread* self, mirror::Object* obj, mirror::Class* declaring_class,
+                  uint32_t access_flags, mirror::Class* calling_class)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+// Get the calling class by using a stack visitor, may return null for unattached native threads.
+mirror::Class* GetCallingClass(Thread* self, size_t num_frames)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
 void InvalidReceiverError(mirror::Object* o, mirror::Class* c)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/stack.cc b/runtime/stack.cc
index aa3e320..e49bc1d 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -345,7 +345,7 @@
       DCHECK(context_ != nullptr);  // You can't reliably write registers without a context.
       DCHECK(m == GetMethod());
       if (m->IsOptimized(sizeof(void*))) {
-        return SetVRegFromOptimizedCode(m, vreg, new_value, kind);
+        return false;
       } else {
         return SetVRegFromQuickCode(m, vreg, new_value, kind);
       }
@@ -382,57 +382,6 @@
   }
 }
 
-bool StackVisitor::SetVRegFromOptimizedCode(mirror::ArtMethod* m, uint16_t vreg, uint32_t new_value,
-                                            VRegKind kind) {
-  const void* code_pointer = m->GetQuickOatCodePointer(sizeof(void*));
-  DCHECK(code_pointer != nullptr);
-  uint32_t native_pc_offset = m->NativeQuickPcOffset(cur_quick_frame_pc_);
-  CodeInfo code_info = m->GetOptimizedCodeInfo();
-  StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset);
-  const DexFile::CodeItem* code_item = m->GetCodeItem();
-  DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be null or how would we compile
-                                                    // its instructions?
-  uint16_t number_of_dex_registers = code_item->registers_size_;
-  DCHECK_LT(vreg, number_of_dex_registers);
-  DexRegisterMap dex_register_map =
-      code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
-  DexRegisterLocation::Kind location_kind =
-      dex_register_map.GetLocationKind(vreg, number_of_dex_registers, code_info);
-  uint32_t dex_pc = m->ToDexPc(cur_quick_frame_pc_, false);
-  switch (location_kind) {
-    case DexRegisterLocation::Kind::kInStack: {
-      const int32_t offset =
-          dex_register_map.GetStackOffsetInBytes(vreg, number_of_dex_registers, code_info);
-      uint8_t* addr = reinterpret_cast<uint8_t*>(cur_quick_frame_) + offset;
-      *reinterpret_cast<uint32_t*>(addr) = new_value;
-      return true;
-    }
-    case DexRegisterLocation::Kind::kInRegister:
-    case DexRegisterLocation::Kind::kInFpuRegister: {
-      uint32_t reg = dex_register_map.GetMachineRegister(vreg, number_of_dex_registers, code_info);
-      return SetRegisterIfAccessible(reg, new_value, kind);
-    }
-    case DexRegisterLocation::Kind::kConstant:
-      LOG(ERROR) << StringPrintf("Cannot change value of DEX register v%u used as a constant at "
-                                 "DEX pc 0x%x (native pc 0x%x) of method %s",
-                                 vreg, dex_pc, native_pc_offset,
-                                 PrettyMethod(cur_quick_frame_->AsMirrorPtr()).c_str());
-      return false;
-    case DexRegisterLocation::Kind::kNone:
-      LOG(ERROR) << StringPrintf("No location for DEX register v%u at DEX pc 0x%x "
-                                 "(native pc 0x%x) of method %s",
-                                 vreg, dex_pc, native_pc_offset,
-                                 PrettyMethod(cur_quick_frame_->AsMirrorPtr()).c_str());
-      return false;
-    default:
-      LOG(FATAL) << StringPrintf("Unknown location for DEX register v%u at DEX pc 0x%x "
-                                 "(native pc 0x%x) of method %s",
-                                 vreg, dex_pc, native_pc_offset,
-                                 PrettyMethod(cur_quick_frame_->AsMirrorPtr()).c_str());
-      UNREACHABLE();
-  }
-}
-
 bool StackVisitor::SetRegisterIfAccessible(uint32_t reg, uint32_t new_value, VRegKind kind) {
   const bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
   if (!IsAccessibleRegister(reg, is_float)) {
@@ -477,7 +426,7 @@
     DCHECK(context_ != nullptr);  // You can't reliably write registers without a context.
     DCHECK(m == GetMethod());
     if (m->IsOptimized(sizeof(void*))) {
-      return SetVRegPairFromOptimizedCode(m, vreg, new_value, kind_lo, kind_hi);
+      return false;
     } else {
       return SetVRegPairFromQuickCode(m, vreg, new_value, kind_lo, kind_hi);
     }
@@ -515,15 +464,6 @@
   }
 }
 
-bool StackVisitor::SetVRegPairFromOptimizedCode(
-    mirror::ArtMethod* m, uint16_t vreg, uint64_t new_value, VRegKind kind_lo, VRegKind kind_hi) {
-  uint32_t low_32bits = Low32Bits(new_value);
-  uint32_t high_32bits = High32Bits(new_value);
-  bool success = SetVRegFromOptimizedCode(m, vreg, low_32bits, kind_lo);
-  success &= SetVRegFromOptimizedCode(m, vreg + 1, high_32bits, kind_hi);
-  return success;
-}
-
 bool StackVisitor::SetRegisterPairIfAccessible(uint32_t reg_lo, uint32_t reg_hi,
                                                uint64_t new_value, bool is_float) {
   if (!IsAccessibleRegister(reg_lo, is_float) || !IsAccessibleRegister(reg_hi, is_float)) {
diff --git a/runtime/stack.h b/runtime/stack.h
index ed9e458..e2af5ee 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -668,18 +668,12 @@
   bool SetVRegFromQuickCode(mirror::ArtMethod* m, uint16_t vreg, uint32_t new_value,
                             VRegKind kind)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool SetVRegFromOptimizedCode(mirror::ArtMethod* m, uint16_t vreg, uint32_t new_value,
-                                VRegKind kind)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool SetRegisterIfAccessible(uint32_t reg, uint32_t new_value, VRegKind kind)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool SetVRegPairFromQuickCode(mirror::ArtMethod* m, uint16_t vreg, uint64_t new_value,
                                 VRegKind kind_lo, VRegKind kind_hi)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool SetVRegPairFromOptimizedCode(mirror::ArtMethod* m, uint16_t vreg, uint64_t new_value,
-                                    VRegKind kind_lo, VRegKind kind_hi)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool SetRegisterPairIfAccessible(uint32_t reg_lo, uint32_t reg_hi, uint64_t new_value,
                                    bool is_float)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/thread.cc b/runtime/thread.cc
index fa65bce..9f7c303 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -572,13 +572,13 @@
   if (GetThreadId() != 0) {
     // If we're in kStarting, we won't have a thin lock id or tid yet.
     os << GetThreadId()
-             << ",tid=" << GetTid() << ',';
+       << ",tid=" << GetTid() << ',';
   }
   os << GetState()
-           << ",Thread*=" << this
-           << ",peer=" << tlsPtr_.opeer
-           << ",\"" << *tlsPtr_.name << "\""
-           << "]";
+     << ",Thread*=" << this
+     << ",peer=" << tlsPtr_.opeer
+     << ",\"" << (tlsPtr_.name != nullptr ? *tlsPtr_.name : "null") << "\""
+     << "]";
 }
 
 void Thread::Dump(std::ostream& os) const {
@@ -1171,9 +1171,14 @@
 }
 
 void Thread::AssertPendingException() const {
-  if (UNLIKELY(!IsExceptionPending())) {
-    LOG(FATAL) << "Pending exception expected.";
-  }
+  CHECK(IsExceptionPending()) << "Pending exception expected.";
+}
+
+void Thread::AssertPendingOOMException() const {
+  AssertPendingException();
+  auto* e = GetException();
+  CHECK_EQ(e->GetClass(), DecodeJObject(WellKnownClasses::java_lang_OutOfMemoryError)->AsClass())
+      << e->Dump();
 }
 
 void Thread::AssertNoPendingException() const {
diff --git a/runtime/thread.h b/runtime/thread.h
index dd9e734..35b785d 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -336,6 +336,7 @@
   }
 
   void AssertPendingException() const;
+  void AssertPendingOOMException() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void AssertNoPendingException() const;
   void AssertNoPendingExceptionForNewException(const char* msg) const;
 
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 5322f9f..9eca517 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -22,6 +22,7 @@
 #define ATRACE_TAG ATRACE_TAG_DALVIK
 #include "cutils/trace.h"
 
+#include "base/casts.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
@@ -329,7 +330,7 @@
   return nullptr;
 }
 
-void Trace::Start(const char* trace_filename, int trace_fd, int buffer_size, int flags,
+void Trace::Start(const char* trace_filename, int trace_fd, size_t buffer_size, int flags,
                   TraceOutputMode output_mode, TraceMode trace_mode, int interval_us) {
   Thread* self = Thread::Current();
   {
@@ -592,19 +593,15 @@
   }
 }
 
-static constexpr size_t kStreamingBufferSize = 16 * KB;
+static constexpr size_t kMinBufSize = 18U;  // Trace header is up to 18B.
 
-Trace::Trace(File* trace_file, const char* trace_name, int buffer_size, int flags,
+Trace::Trace(File* trace_file, const char* trace_name, size_t buffer_size, int flags,
              TraceOutputMode output_mode, TraceMode trace_mode)
     : trace_file_(trace_file),
-      buf_(new uint8_t[output_mode == TraceOutputMode::kStreaming ?
-          kStreamingBufferSize :
-          buffer_size]()),
+      buf_(new uint8_t[std::max(kMinBufSize, buffer_size)]()),
       flags_(flags), trace_output_mode_(output_mode), trace_mode_(trace_mode),
       clock_source_(default_clock_source_),
-      buffer_size_(output_mode == TraceOutputMode::kStreaming ?
-          kStreamingBufferSize :
-          buffer_size),
+      buffer_size_(std::max(kMinBufSize, buffer_size)),
       start_time_(MicroTime()), clock_overhead_ns_(GetClockOverheadNanoSeconds()), cur_offset_(0),
       overflow_(false), interval_us_(0), streaming_lock_(nullptr) {
   uint16_t trace_version = GetTraceVersion(clock_source_);
@@ -621,6 +618,7 @@
     uint16_t record_size = GetRecordSize(clock_source_);
     Append2LE(buf_.get() + 16, record_size);
   }
+  static_assert(18 <= kMinBufSize, "Minimum buffer size not large enough for trace header");
 
   // Update current offset.
   cur_offset_.StoreRelaxed(kTraceHeaderLength);
@@ -875,11 +873,21 @@
 void Trace::WriteToBuf(const uint8_t* src, size_t src_size) {
   int32_t old_offset = cur_offset_.LoadRelaxed();
   int32_t new_offset = old_offset + static_cast<int32_t>(src_size);
-  if (new_offset > buffer_size_) {
+  if (dchecked_integral_cast<size_t>(new_offset) > buffer_size_) {
     // Flush buffer.
     if (!trace_file_->WriteFully(buf_.get(), old_offset)) {
       PLOG(WARNING) << "Failed streaming a tracing event.";
     }
+
+    // Check whether the data is too large for the buffer, then write immediately.
+    if (src_size >= buffer_size_) {
+      if (!trace_file_->WriteFully(src, src_size)) {
+        PLOG(WARNING) << "Failed streaming a tracing event.";
+      }
+      cur_offset_.StoreRelease(0);  // Buffer is empty now.
+      return;
+    }
+
     old_offset = 0;
     new_offset = static_cast<int32_t>(src_size);
   }
@@ -900,7 +908,7 @@
     do {
       old_offset = cur_offset_.LoadRelaxed();
       new_offset = old_offset + GetRecordSize(clock_source_);
-      if (new_offset > buffer_size_) {
+      if (static_cast<size_t>(new_offset) > buffer_size_) {
         overflow_ = true;
         return;
       }
@@ -1034,4 +1042,10 @@
   return the_trace_->trace_mode_;
 }
 
+size_t Trace::GetBufferSize() {
+  MutexLock mu(Thread::Current(), *Locks::trace_lock_);
+  CHECK(the_trace_ != nullptr) << "Trace mode requested, but no trace currently running";
+  return the_trace_->buffer_size_;
+}
+
 }  // namespace art
diff --git a/runtime/trace.h b/runtime/trace.h
index 1ecd4d8..06824b8 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -72,7 +72,7 @@
 
   static void SetDefaultClockSource(TraceClockSource clock_source);
 
-  static void Start(const char* trace_filename, int trace_fd, int buffer_size, int flags,
+  static void Start(const char* trace_filename, int trace_fd, size_t buffer_size, int flags,
                     TraceOutputMode output_mode, TraceMode trace_mode, int interval_us)
       LOCKS_EXCLUDED(Locks::mutator_lock_,
                      Locks::thread_list_lock_,
@@ -136,9 +136,10 @@
 
   static TraceOutputMode GetOutputMode() LOCKS_EXCLUDED(Locks::trace_lock_);
   static TraceMode GetMode() LOCKS_EXCLUDED(Locks::trace_lock_);
+  static size_t GetBufferSize() LOCKS_EXCLUDED(Locks::trace_lock_);
 
  private:
-  Trace(File* trace_file, const char* trace_name, int buffer_size, int flags,
+  Trace(File* trace_file, const char* trace_name, size_t buffer_size, int flags,
         TraceOutputMode output_mode, TraceMode trace_mode);
 
   // The sampling interval in microseconds is passed as an argument.
@@ -202,7 +203,7 @@
   const TraceClockSource clock_source_;
 
   // Size of buf_.
-  const int buffer_size_;
+  const size_t buffer_size_;
 
   // Time trace was created.
   const uint64_t start_time_;
diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc
index ae24b77..d8f8950 100644
--- a/runtime/utils_test.cc
+++ b/runtime/utils_test.cc
@@ -384,7 +384,8 @@
 TEST_F(UtilsTest, ExecSuccess) {
   std::vector<std::string> command;
   if (kIsTargetBuild) {
-    command.push_back("/system/bin/id");
+    std::string android_root(GetAndroidRoot());
+    command.push_back(android_root + "/bin/id");
   } else {
     command.push_back("/usr/bin/id");
   }
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index a803df8..a2d0427 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -39,6 +39,7 @@
 jclass WellKnownClasses::java_lang_Daemons;
 jclass WellKnownClasses::java_lang_Error;
 jclass WellKnownClasses::java_lang_Object;
+jclass WellKnownClasses::java_lang_OutOfMemoryError;
 jclass WellKnownClasses::java_lang_reflect_AbstractMethod;
 jclass WellKnownClasses::java_lang_reflect_ArtMethod;
 jclass WellKnownClasses::java_lang_reflect_Constructor;
@@ -176,6 +177,7 @@
   java_lang_ClassNotFoundException = CacheClass(env, "java/lang/ClassNotFoundException");
   java_lang_Daemons = CacheClass(env, "java/lang/Daemons");
   java_lang_Object = CacheClass(env, "java/lang/Object");
+  java_lang_OutOfMemoryError = CacheClass(env, "java/lang/OutOfMemoryError");
   java_lang_Error = CacheClass(env, "java/lang/Error");
   java_lang_reflect_AbstractMethod = CacheClass(env, "java/lang/reflect/AbstractMethod");
   java_lang_reflect_ArtMethod = CacheClass(env, "java/lang/reflect/ArtMethod");
diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h
index 2df1c0e..cef9d55 100644
--- a/runtime/well_known_classes.h
+++ b/runtime/well_known_classes.h
@@ -50,6 +50,7 @@
   static jclass java_lang_Daemons;
   static jclass java_lang_Error;
   static jclass java_lang_Object;
+  static jclass java_lang_OutOfMemoryError;
   static jclass java_lang_reflect_AbstractMethod;
   static jclass java_lang_reflect_ArtMethod;
   static jclass java_lang_reflect_Constructor;
diff --git a/test/080-oom-throw/expected.txt b/test/080-oom-throw/expected.txt
index 73cc0d8..904393b 100644
--- a/test/080-oom-throw/expected.txt
+++ b/test/080-oom-throw/expected.txt
@@ -1,2 +1,3 @@
+Test reflection correctly threw
 NEW_ARRAY correctly threw OOME
 NEW_INSTANCE correctly threw OOME
diff --git a/test/080-oom-throw/src/Main.java b/test/080-oom-throw/src/Main.java
index c93f8bb..f007b25 100644
--- a/test/080-oom-throw/src/Main.java
+++ b/test/080-oom-throw/src/Main.java
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
 public class Main {
     static class ArrayMemEater {
         static boolean sawOome;
@@ -68,6 +71,10 @@
     }
 
     public static void main(String[] args) {
+        if (triggerReflectionOOM()) {
+            System.out.println("Test reflection correctly threw");
+        }
+
         if (triggerArrayOOM()) {
             System.out.println("NEW_ARRAY correctly threw OOME");
         }
@@ -76,4 +83,46 @@
             System.out.println("NEW_INSTANCE correctly threw OOME");
         }
     }
+
+    static Object[] holder;
+
+    public static void blowup() throws Exception {
+        int size = 32 * 1024 * 1024;
+        for (int i = 0; i < holder.length; ) {
+            try {
+                holder[i] = new char[size];
+                i++;
+            } catch (OutOfMemoryError oome) {
+                size = size / 2;
+                if (size == 0) {
+                     break;
+                }
+            }
+        }
+        holder[0] = new char[100000];
+    }
+
+    static boolean triggerReflectionOOM() {
+        try {
+            Class<?> c = Main.class;
+            Method m = c.getMethod("blowup", (Class[]) null);
+            holder = new Object[1000000];
+            m.invoke(null);
+            holder = null;
+            System.out.println("Didn't throw from blowup");
+        } catch (OutOfMemoryError e) {
+            holder = null;
+        } catch (InvocationTargetException e) {
+            holder = null;
+            if (!(e.getCause() instanceof OutOfMemoryError)) {
+                System.out.println("InvocationTargetException cause not OOME " + e.getCause());
+                return false;
+            }
+        } catch (Exception e) {
+            holder = null;
+            System.out.println("Unexpected exception " + e);
+            return false;
+        }
+        return true;
+    }
 }
diff --git a/test/090-loop-formation/expected.txt b/test/090-loop-formation/expected.txt
index b7e0bb3..b945c30 100644
--- a/test/090-loop-formation/expected.txt
+++ b/test/090-loop-formation/expected.txt
@@ -3,3 +3,4 @@
 counter3 is 32767
 counter4 is 0
 counter5 is 65534
+256
diff --git a/test/090-loop-formation/src/Main.java b/test/090-loop-formation/src/Main.java
index 7c16667..16ff3b2 100644
--- a/test/090-loop-formation/src/Main.java
+++ b/test/090-loop-formation/src/Main.java
@@ -52,5 +52,31 @@
         System.out.println("counter3 is " + counter3);
         System.out.println("counter4 is " + counter4);
         System.out.println("counter5 is " + counter5);
+
+        deeplyNested();
+    }
+
+    // GVN is limited to a maximum loop depth of 6. To track whether dependent passes are
+    // correctly turned off, test some very simple, but deeply nested loops.
+    private static void deeplyNested() {
+        int sum = 0;
+        for (int i = 0; i < 2; i++) {
+            for (int j = 0; j < 2; j++) {
+                for (int k = 0; k < 2; k++) {
+                    for (int l = 0; l < 2; l++) {
+                        for (int m = 0; m < 2; m++) {
+                            for (int n = 0; n < 2; n++) {
+                                for (int o = 0; o < 2; o++) {
+                                    for (int p = 0; p < 2; p++) {
+                                        sum++;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        System.out.println(sum);
     }
 }
diff --git a/test/104-growth-limit/src/Main.java b/test/104-growth-limit/src/Main.java
index d666377..d31cbf1 100644
--- a/test/104-growth-limit/src/Main.java
+++ b/test/104-growth-limit/src/Main.java
@@ -29,26 +29,28 @@
         final Method get_runtime = vm_runtime.getDeclaredMethod("getRuntime");
         final Object runtime = get_runtime.invoke(null);
         final Method clear_growth_limit = vm_runtime.getDeclaredMethod("clearGrowthLimit");
+        List<byte[]> l = new ArrayList<byte[]>();
         try {
-            List<byte[]> l = new ArrayList<byte[]>();
             while (true) {
                 // Allocate a MB at a time
                 l.add(new byte[1048576]);
                 alloc1++;
             }
         } catch (OutOfMemoryError e) {
+            l = null;
         }
         // Expand the heap to the maximum size.
         clear_growth_limit.invoke(runtime);
         int alloc2 = 1;
+        l = new ArrayList<byte[]>();
         try {
-            List<byte[]> l = new ArrayList<byte[]>();
             while (true) {
                 // Allocate a MB at a time
                 l.add(new byte[1048576]);
                 alloc2++;
             }
         } catch (OutOfMemoryError e2) {
+            l = null;
             if (alloc1 > alloc2) {
                 System.out.println("ERROR: Allocated less memory after growth" +
                     "limit cleared (" + alloc1 + " MBs > " + alloc2 + " MBs");
diff --git a/test/138-duplicate-classes-check/build b/test/138-duplicate-classes-check/build
new file mode 100755
index 0000000..7ddc81d
--- /dev/null
+++ b/test/138-duplicate-classes-check/build
@@ -0,0 +1,31 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Stop if something fails.
+set -e
+
+mkdir classes
+${JAVAC} -d classes `find src -name '*.java'`
+
+mkdir classes-ex
+${JAVAC} -d classes-ex `find src-ex -name '*.java'`
+
+if [ ${NEED_DEX} = "true" ]; then
+  ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 classes
+  zip $TEST_NAME.jar classes.dex
+  ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes.dex --dump-width=1000 classes-ex
+  zip ${TEST_NAME}-ex.jar classes.dex
+fi
diff --git a/test/138-duplicate-classes-check/expected.txt b/test/138-duplicate-classes-check/expected.txt
new file mode 100644
index 0000000..b2f7f08
--- /dev/null
+++ b/test/138-duplicate-classes-check/expected.txt
@@ -0,0 +1,2 @@
+10
+10
diff --git a/test/138-duplicate-classes-check/info.txt b/test/138-duplicate-classes-check/info.txt
new file mode 100644
index 0000000..22a66a2
--- /dev/null
+++ b/test/138-duplicate-classes-check/info.txt
@@ -0,0 +1 @@
+Check whether a duplicate class is detected.
diff --git a/test/138-duplicate-classes-check/src-ex/A.java b/test/138-duplicate-classes-check/src-ex/A.java
new file mode 100644
index 0000000..8e52cb3
--- /dev/null
+++ b/test/138-duplicate-classes-check/src-ex/A.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class A {
+    public volatile int i;
+
+    public A() {
+      i = 10;
+    }
+}
diff --git a/test/138-duplicate-classes-check/src-ex/TestEx.java b/test/138-duplicate-classes-check/src-ex/TestEx.java
new file mode 100644
index 0000000..87558fa
--- /dev/null
+++ b/test/138-duplicate-classes-check/src-ex/TestEx.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TestEx {
+    public static void test() {
+        System.out.println(new A().i);
+    }
+}
diff --git a/test/138-duplicate-classes-check/src/A.java b/test/138-duplicate-classes-check/src/A.java
new file mode 100644
index 0000000..e1773e5
--- /dev/null
+++ b/test/138-duplicate-classes-check/src/A.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class A {
+    // Object fields add padding in the Foo class object layout. Therefore the field 'i' should
+    // be at a different offset compared to the A class from the ex DEX file.
+    public final Object anObject = null;
+    public final Object anotherObject = null;
+    // Use volatile to defeat inlining of the constructor + load-elimination.
+    public volatile int i;
+
+    public A() {
+      i = 10;
+    }
+}
diff --git a/test/138-duplicate-classes-check/src/FancyLoader.java b/test/138-duplicate-classes-check/src/FancyLoader.java
new file mode 100644
index 0000000..03ec948
--- /dev/null
+++ b/test/138-duplicate-classes-check/src/FancyLoader.java
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+
+/**
+ * A class loader with atypical behavior: we try to load a private
+ * class implementation before asking the system or boot loader.  This
+ * is used to create multiple classes with identical names in a single VM.
+ *
+ * If DexFile is available, we use that; if not, we assume we're not in
+ * Dalvik and instantiate the class with defineClass().
+ *
+ * The location of the DEX files and class data is dependent upon the
+ * test framework.
+ */
+public class FancyLoader extends ClassLoader {
+    /* this is where the "alternate" .class files live */
+    static final String CLASS_PATH = "classes-ex/";
+
+    /* this is the "alternate" DEX/Jar file */
+    static final String DEX_FILE = System.getenv("DEX_LOCATION") +
+            "/138-duplicate-classes-check-ex.jar";
+
+    /* on Dalvik, this is a DexFile; otherwise, it's null */
+    private Class mDexClass;
+
+    private Object mDexFile;
+
+    /**
+     * Construct FancyLoader, grabbing a reference to the DexFile class
+     * if we're running under Dalvik.
+     */
+    public FancyLoader(ClassLoader parent) {
+        super(parent);
+
+        try {
+            mDexClass = parent.loadClass("dalvik.system.DexFile");
+        } catch (ClassNotFoundException cnfe) {
+            // ignore -- not running Dalvik
+        }
+    }
+
+    /**
+     * Finds the class with the specified binary name.
+     *
+     * We search for a file in CLASS_PATH or pull an entry from DEX_FILE.
+     * If we don't find a match, we throw an exception.
+     */
+    protected Class<?> findClass(String name) throws ClassNotFoundException
+    {
+        if (mDexClass != null) {
+            return findClassDalvik(name);
+        } else {
+            return findClassNonDalvik(name);
+        }
+    }
+
+    /**
+     * Finds the class with the specified binary name, from a DEX file.
+     */
+    private Class<?> findClassDalvik(String name)
+        throws ClassNotFoundException {
+
+        if (mDexFile == null) {
+            synchronized (FancyLoader.class) {
+                Constructor ctor;
+                /*
+                 * Construct a DexFile object through reflection.
+                 */
+                try {
+                    ctor = mDexClass.getConstructor(new Class[] {String.class});
+                } catch (NoSuchMethodException nsme) {
+                    throw new ClassNotFoundException("getConstructor failed",
+                        nsme);
+                }
+
+                try {
+                    mDexFile = ctor.newInstance(DEX_FILE);
+                } catch (InstantiationException ie) {
+                    throw new ClassNotFoundException("newInstance failed", ie);
+                } catch (IllegalAccessException iae) {
+                    throw new ClassNotFoundException("newInstance failed", iae);
+                } catch (InvocationTargetException ite) {
+                    throw new ClassNotFoundException("newInstance failed", ite);
+                }
+            }
+        }
+
+        /*
+         * Call DexFile.loadClass(String, ClassLoader).
+         */
+        Method meth;
+
+        try {
+            meth = mDexClass.getMethod("loadClass",
+                    new Class[] { String.class, ClassLoader.class });
+        } catch (NoSuchMethodException nsme) {
+            throw new ClassNotFoundException("getMethod failed", nsme);
+        }
+
+        try {
+            meth.invoke(mDexFile, name, this);
+        } catch (IllegalAccessException iae) {
+            throw new ClassNotFoundException("loadClass failed", iae);
+        } catch (InvocationTargetException ite) {
+            throw new ClassNotFoundException("loadClass failed",
+                ite.getCause());
+        }
+
+        return null;
+    }
+
+    /**
+     * Finds the class with the specified binary name, from .class files.
+     */
+    private Class<?> findClassNonDalvik(String name)
+        throws ClassNotFoundException {
+
+        String pathName = CLASS_PATH + name + ".class";
+        //System.out.println("--- Fancy: looking for " + pathName);
+
+        File path = new File(pathName);
+        RandomAccessFile raf;
+
+        try {
+            raf = new RandomAccessFile(path, "r");
+        } catch (FileNotFoundException fnfe) {
+            throw new ClassNotFoundException("Not found: " + pathName);
+        }
+
+        /* read the entire file in */
+        byte[] fileData;
+        try {
+            fileData = new byte[(int) raf.length()];
+            raf.readFully(fileData);
+        } catch (IOException ioe) {
+            throw new ClassNotFoundException("Read error: " + pathName);
+        } finally {
+            try {
+                raf.close();
+            } catch (IOException ioe) {
+                // drop
+            }
+        }
+
+        /* create the class */
+        //System.out.println("--- Fancy: defining " + name);
+        try {
+            return defineClass(name, fileData, 0, fileData.length);
+        } catch (Throwable th) {
+            throw new ClassNotFoundException("defineClass failed", th);
+        }
+    }
+
+    /**
+     * Load a class.
+     *
+     * Normally a class loader wouldn't override this, but we want our
+     * version of the class to take precedence over an already-loaded
+     * version.
+     *
+     * We still want the system classes (e.g. java.lang.Object) from the
+     * bootstrap class loader.
+     */
+    protected Class<?> loadClass(String name, boolean resolve)
+        throws ClassNotFoundException
+    {
+        Class res;
+
+        /*
+         * 1. Invoke findLoadedClass(String) to check if the class has
+         * already been loaded.
+         *
+         * This doesn't change.
+         */
+        res = findLoadedClass(name);
+        if (res != null) {
+            System.out.println("FancyLoader.loadClass: "
+                + name + " already loaded");
+            if (resolve)
+                resolveClass(res);
+            return res;
+        }
+
+        /*
+         * 3. Invoke the findClass(String) method to find the class.
+         */
+        try {
+            res = findClass(name);
+            if (resolve)
+                resolveClass(res);
+        }
+        catch (ClassNotFoundException e) {
+            // we couldn't find it, so eat the exception and keep going
+        }
+
+        /*
+         * 2. Invoke the loadClass method on the parent class loader.  If
+         * the parent loader is null the class loader built-in to the
+         * virtual machine is used, instead.
+         *
+         * (Since we're not in java.lang, we can't actually invoke the
+         * parent's loadClass() method, but we passed our parent to the
+         * super-class which can take care of it for us.)
+         */
+        res = super.loadClass(name, resolve);   // returns class or throws
+        return res;
+    }
+}
diff --git a/test/138-duplicate-classes-check/src/Main.java b/test/138-duplicate-classes-check/src/Main.java
new file mode 100644
index 0000000..a9b5bb0
--- /dev/null
+++ b/test/138-duplicate-classes-check/src/Main.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.lang.reflect.Method;
+
+/**
+ * Structural hazard test.
+ */
+public class Main {
+    public static void main(String[] args) {
+        new Main().run();
+    }
+
+    private void run() {
+        System.out.println(new A().i);
+
+        // Now run the class from the -ex file.
+
+        FancyLoader loader = new FancyLoader(getClass().getClassLoader());
+
+        try {
+            Class testEx = loader.loadClass("TestEx");
+            Method test = testEx.getDeclaredMethod("test");
+            test.invoke(null);
+        } catch (Exception exc) {
+            exc.printStackTrace();
+        }
+    }
+}
diff --git a/test/138-duplicate-classes-check2/build b/test/138-duplicate-classes-check2/build
new file mode 100755
index 0000000..abcbbb8
--- /dev/null
+++ b/test/138-duplicate-classes-check2/build
@@ -0,0 +1,32 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Stop if something fails.
+set -e
+
+mkdir classes
+${JAVAC} -d classes `find src -name '*.java'`
+
+mkdir classes-ex
+${JAVAC} -d classes-ex `find src-ex -name '*.java'`
+rm classes-ex/A.class
+
+if [ ${NEED_DEX} = "true" ]; then
+  ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 classes
+  zip $TEST_NAME.jar classes.dex
+  ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes.dex --dump-width=1000 classes-ex
+  zip ${TEST_NAME}-ex.jar classes.dex
+fi
diff --git a/test/138-duplicate-classes-check2/expected.txt b/test/138-duplicate-classes-check2/expected.txt
new file mode 100644
index 0000000..b2f7f08
--- /dev/null
+++ b/test/138-duplicate-classes-check2/expected.txt
@@ -0,0 +1,2 @@
+10
+10
diff --git a/test/138-duplicate-classes-check2/info.txt b/test/138-duplicate-classes-check2/info.txt
new file mode 100644
index 0000000..7100122
--- /dev/null
+++ b/test/138-duplicate-classes-check2/info.txt
@@ -0,0 +1,2 @@
+Check whether a duplicate class is not detected, even though we compiled against one (but removed
+it before creating the dex file).
diff --git a/test/138-duplicate-classes-check2/run b/test/138-duplicate-classes-check2/run
new file mode 100755
index 0000000..8494ad9
--- /dev/null
+++ b/test/138-duplicate-classes-check2/run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# We want to run as no-dex-file-fallback to confirm that even though the -ex file has a symbolic
+# reference to A, there's no class-def, so we don't detect a collision.
+exec ${RUN} --runtime-option -Xno-dex-file-fallback "${@}"
diff --git a/test/138-duplicate-classes-check2/src-ex/A.java b/test/138-duplicate-classes-check2/src-ex/A.java
new file mode 100644
index 0000000..8e52cb3
--- /dev/null
+++ b/test/138-duplicate-classes-check2/src-ex/A.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class A {
+    public volatile int i;
+
+    public A() {
+      i = 10;
+    }
+}
diff --git a/test/138-duplicate-classes-check2/src-ex/TestEx.java b/test/138-duplicate-classes-check2/src-ex/TestEx.java
new file mode 100644
index 0000000..87558fa
--- /dev/null
+++ b/test/138-duplicate-classes-check2/src-ex/TestEx.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TestEx {
+    public static void test() {
+        System.out.println(new A().i);
+    }
+}
diff --git a/test/138-duplicate-classes-check2/src/A.java b/test/138-duplicate-classes-check2/src/A.java
new file mode 100644
index 0000000..e1773e5
--- /dev/null
+++ b/test/138-duplicate-classes-check2/src/A.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class A {
+    // Object fields add padding in the Foo class object layout. Therefore the field 'i' should
+    // be at a different offset compared to the A class from the ex DEX file.
+    public final Object anObject = null;
+    public final Object anotherObject = null;
+    // Use volatile to defeat inlining of the constructor + load-elimination.
+    public volatile int i;
+
+    public A() {
+      i = 10;
+    }
+}
diff --git a/test/138-duplicate-classes-check2/src/FancyLoader.java b/test/138-duplicate-classes-check2/src/FancyLoader.java
new file mode 100644
index 0000000..7e2bb08
--- /dev/null
+++ b/test/138-duplicate-classes-check2/src/FancyLoader.java
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+
+/**
+ * A class loader with atypical behavior: we try to load a private
+ * class implementation before asking the system or boot loader.  This
+ * is used to create multiple classes with identical names in a single VM.
+ *
+ * If DexFile is available, we use that; if not, we assume we're not in
+ * Dalvik and instantiate the class with defineClass().
+ *
+ * The location of the DEX files and class data is dependent upon the
+ * test framework.
+ */
+public class FancyLoader extends ClassLoader {
+    /* this is where the "alternate" .class files live */
+    static final String CLASS_PATH = "classes-ex/";
+
+    /* this is the "alternate" DEX/Jar file */
+    static final String DEX_FILE = System.getenv("DEX_LOCATION") +
+            "/138-duplicate-classes-check2-ex.jar";
+
+    /* on Dalvik, this is a DexFile; otherwise, it's null */
+    private Class mDexClass;
+
+    private Object mDexFile;
+
+    /**
+     * Construct FancyLoader, grabbing a reference to the DexFile class
+     * if we're running under Dalvik.
+     */
+    public FancyLoader(ClassLoader parent) {
+        super(parent);
+
+        try {
+            mDexClass = parent.loadClass("dalvik.system.DexFile");
+        } catch (ClassNotFoundException cnfe) {
+            // ignore -- not running Dalvik
+        }
+    }
+
+    /**
+     * Finds the class with the specified binary name.
+     *
+     * We search for a file in CLASS_PATH or pull an entry from DEX_FILE.
+     * If we don't find a match, we throw an exception.
+     */
+    protected Class<?> findClass(String name) throws ClassNotFoundException
+    {
+        if (mDexClass != null) {
+            return findClassDalvik(name);
+        } else {
+            return findClassNonDalvik(name);
+        }
+    }
+
+    /**
+     * Finds the class with the specified binary name, from a DEX file.
+     */
+    private Class<?> findClassDalvik(String name)
+        throws ClassNotFoundException {
+
+        if (mDexFile == null) {
+            synchronized (FancyLoader.class) {
+                Constructor ctor;
+                /*
+                 * Construct a DexFile object through reflection.
+                 */
+                try {
+                    ctor = mDexClass.getConstructor(new Class[] {String.class});
+                } catch (NoSuchMethodException nsme) {
+                    throw new ClassNotFoundException("getConstructor failed",
+                        nsme);
+                }
+
+                try {
+                    mDexFile = ctor.newInstance(DEX_FILE);
+                } catch (InstantiationException ie) {
+                    throw new ClassNotFoundException("newInstance failed", ie);
+                } catch (IllegalAccessException iae) {
+                    throw new ClassNotFoundException("newInstance failed", iae);
+                } catch (InvocationTargetException ite) {
+                    throw new ClassNotFoundException("newInstance failed", ite);
+                }
+            }
+        }
+
+        /*
+         * Call DexFile.loadClass(String, ClassLoader).
+         */
+        Method meth;
+
+        try {
+            meth = mDexClass.getMethod("loadClass",
+                    new Class[] { String.class, ClassLoader.class });
+        } catch (NoSuchMethodException nsme) {
+            throw new ClassNotFoundException("getMethod failed", nsme);
+        }
+
+        try {
+            meth.invoke(mDexFile, name, this);
+        } catch (IllegalAccessException iae) {
+            throw new ClassNotFoundException("loadClass failed", iae);
+        } catch (InvocationTargetException ite) {
+            throw new ClassNotFoundException("loadClass failed",
+                ite.getCause());
+        }
+
+        return null;
+    }
+
+    /**
+     * Finds the class with the specified binary name, from .class files.
+     */
+    private Class<?> findClassNonDalvik(String name)
+        throws ClassNotFoundException {
+
+        String pathName = CLASS_PATH + name + ".class";
+        //System.out.println("--- Fancy: looking for " + pathName);
+
+        File path = new File(pathName);
+        RandomAccessFile raf;
+
+        try {
+            raf = new RandomAccessFile(path, "r");
+        } catch (FileNotFoundException fnfe) {
+            throw new ClassNotFoundException("Not found: " + pathName);
+        }
+
+        /* read the entire file in */
+        byte[] fileData;
+        try {
+            fileData = new byte[(int) raf.length()];
+            raf.readFully(fileData);
+        } catch (IOException ioe) {
+            throw new ClassNotFoundException("Read error: " + pathName);
+        } finally {
+            try {
+                raf.close();
+            } catch (IOException ioe) {
+                // drop
+            }
+        }
+
+        /* create the class */
+        //System.out.println("--- Fancy: defining " + name);
+        try {
+            return defineClass(name, fileData, 0, fileData.length);
+        } catch (Throwable th) {
+            throw new ClassNotFoundException("defineClass failed", th);
+        }
+    }
+
+    /**
+     * Load a class.
+     *
+     * Normally a class loader wouldn't override this, but we want our
+     * version of the class to take precedence over an already-loaded
+     * version.
+     *
+     * We still want the system classes (e.g. java.lang.Object) from the
+     * bootstrap class loader.
+     */
+    protected Class<?> loadClass(String name, boolean resolve)
+        throws ClassNotFoundException
+    {
+        Class res;
+
+        /*
+         * 1. Invoke findLoadedClass(String) to check if the class has
+         * already been loaded.
+         *
+         * This doesn't change.
+         */
+        res = findLoadedClass(name);
+        if (res != null) {
+            System.out.println("FancyLoader.loadClass: "
+                + name + " already loaded");
+            if (resolve)
+                resolveClass(res);
+            return res;
+        }
+
+        /*
+         * 3. Invoke the findClass(String) method to find the class.
+         */
+        try {
+            res = findClass(name);
+            if (resolve)
+                resolveClass(res);
+        }
+        catch (ClassNotFoundException e) {
+            // we couldn't find it, so eat the exception and keep going
+        }
+
+        /*
+         * 2. Invoke the loadClass method on the parent class loader.  If
+         * the parent loader is null the class loader built-in to the
+         * virtual machine is used, instead.
+         *
+         * (Since we're not in java.lang, we can't actually invoke the
+         * parent's loadClass() method, but we passed our parent to the
+         * super-class which can take care of it for us.)
+         */
+        res = super.loadClass(name, resolve);   // returns class or throws
+        return res;
+    }
+}
diff --git a/test/138-duplicate-classes-check2/src/Main.java b/test/138-duplicate-classes-check2/src/Main.java
new file mode 100644
index 0000000..a9b5bb0
--- /dev/null
+++ b/test/138-duplicate-classes-check2/src/Main.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.lang.reflect.Method;
+
+/**
+ * Structural hazard test.
+ */
+public class Main {
+    public static void main(String[] args) {
+        new Main().run();
+    }
+
+    private void run() {
+        System.out.println(new A().i);
+
+        // Now run the class from the -ex file.
+
+        FancyLoader loader = new FancyLoader(getClass().getClassLoader());
+
+        try {
+            Class testEx = loader.loadClass("TestEx");
+            Method test = testEx.getDeclaredMethod("test");
+            test.invoke(null);
+        } catch (Exception exc) {
+            exc.printStackTrace();
+        }
+    }
+}
diff --git a/test/449-checker-bce/expected.txt b/test/449-checker-bce/expected.txt
index 29d6383..e69de29 100644
--- a/test/449-checker-bce/expected.txt
+++ b/test/449-checker-bce/expected.txt
@@ -1 +0,0 @@
-100
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index 17039a3..f90d85d 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -608,6 +608,380 @@
   }
 
 
+  int sum;
+
+  // CHECK-START: void Main.foo1(int[], int, int) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  // CHECK-START: void Main.foo1(int[], int, int) BCE (after)
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK-NOT: Deoptimize
+  // CHECK: Phi
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  void foo1(int[] array, int start, int end) {
+    // Three HDeoptimize will be added. One for
+    // start >= 0, one for end <= array.length,
+    // and one for null check on array (to hoist null
+    // check and array.length out of loop).
+    for (int i = start ; i < end; i++) {
+      array[i] = 1;
+      sum += array[i];
+    }
+  }
+
+
+  // CHECK-START: void Main.foo2(int[], int, int) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  // CHECK-START: void Main.foo2(int[], int, int) BCE (after)
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK-NOT: Deoptimize
+  // CHECK: Phi
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  void foo2(int[] array, int start, int end) {
+    // Three HDeoptimize will be added. One for
+    // start >= 0, one for end <= array.length,
+    // and one for null check on array (to hoist null
+    // check and array.length out of loop).
+    for (int i = start ; i <= end; i++) {
+      array[i] = 1;
+      sum += array[i];
+    }
+  }
+
+
+  // CHECK-START: void Main.foo3(int[], int) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  // CHECK-START: void Main.foo3(int[], int) BCE (after)
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK-NOT: Deoptimize
+  // CHECK: Phi
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  void foo3(int[] array, int end) {
+    // Two HDeoptimize will be added. One for end < array.length,
+    // and one for null check on array (to hoist null check
+    // and array.length out of loop).
+    for (int i = 3 ; i <= end; i++) {
+      array[i] = 1;
+      sum += array[i];
+    }
+  }
+
+  // CHECK-START: void Main.foo4(int[], int) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  // CHECK-START: void Main.foo4(int[], int) BCE (after)
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK-NOT: Deoptimize
+  // CHECK: Phi
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  void foo4(int[] array, int end) {
+    // Two HDeoptimize will be added. One for end <= array.length,
+    // and one for null check on array (to hoist null check
+    // and array.length out of loop).
+    for (int i = end ; i > 0; i--) {
+      array[i - 1] = 1;
+      sum += array[i - 1];
+    }
+  }
+
+
+  // CHECK-START: void Main.foo5(int[], int) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+
+  // CHECK-START: void Main.foo5(int[], int) BCE (after)
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK: Deoptimize
+  // CHECK-NOT: Deoptimize
+  // CHECK: Phi
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  void foo5(int[] array, int end) {
+    // Bounds check in this loop can be eliminated without deoptimization.
+    for (int i = array.length - 1 ; i >= 0; i--) {
+      array[i] = 1;
+    }
+    // One HDeoptimize will be added.
+    // It's for (end - 2 <= array.length - 2).
+    for (int i = end - 2 ; i > 0; i--) {
+      sum += array[i - 1];
+      sum += array[i];
+      sum += array[i + 1];
+    }
+  }
+
+
+  // CHECK-START: void Main.foo6(int[], int, int) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArraySet
+
+  // CHECK-START: void Main.foo6(int[], int, int) BCE (after)
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK-NOT: Deoptimize
+  // CHECK: Phi
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArraySet
+
+  void foo6(int[] array, int start, int end) {
+    // Three HDeoptimize will be added. One for
+    // start >= 2, one for end <= array.length - 3,
+    // and one for null check on array (to hoist null
+    // check and array.length out of loop).
+    for (int i = end; i >= start; i--) {
+      array[i] = (array[i-2] + array[i-1] + array[i] + array[i+1] + array[i+2]) / 5;
+    }
+  }
+
+
+  // CHECK-START: void Main.foo7(int[], int, int, boolean) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+
+  // CHECK-START: void Main.foo7(int[], int, int, boolean) BCE (after)
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK-NOT: Deoptimize
+  // CHECK: Phi
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  void foo7(int[] array, int start, int end, boolean lowEnd) {
+    // Three HDeoptimize will be added. One for
+    // start >= 0, one for end <= array.length,
+    // and one for null check on array (to hoist null
+    // check and array.length out of loop).
+    for (int i = start ; i < end; i++) {
+      if (lowEnd) {
+        // This array access isn't certain. So we don't
+        // use +1000 offset in decision making for deoptimization
+        // conditions.
+        sum += array[i + 1000];
+      }
+      sum += array[i];
+    }
+  }
+
+
+  // CHECK-START: void Main.partialLooping(int[], int, int) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArraySet
+
+  // CHECK-START: void Main.partialLooping(int[], int, int) BCE (after)
+  // CHECK-NOT: Deoptimize
+  // CHECK: BoundsCheck
+  // CHECK: ArraySet
+
+  void partialLooping(int[] array, int start, int end) {
+    // This loop doesn't cover the full range of [start, end) so
+    // adding deoptimization is too aggressive, since end can be
+    // greater than array.length but the loop is never going to work on
+    // more than 2 elements.
+    for (int i = start; i < end; i++) {
+      if (i == 2) {
+        return;
+      }
+      array[i] = 1;
+    }
+  }
+
+
+  static void testUnknownBounds() {
+    boolean caught = false;
+    Main main = new Main();
+    main.foo1(new int[10], 0, 10);
+    if (main.sum != 10) {
+      System.out.println("foo1 failed!");
+    }
+
+    caught = false;
+    main = new Main();
+    try {
+      main.foo1(new int[10], 0, 11);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught || main.sum != 10) {
+      System.out.println("foo1 exception failed!");
+    }
+
+    main = new Main();
+    main.foo2(new int[10], 0, 9);
+    if (main.sum != 10) {
+      System.out.println("foo2 failed!");
+    }
+
+    caught = false;
+    main = new Main();
+    try {
+      main.foo2(new int[10], 0, 10);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught || main.sum != 10) {
+      System.out.println("foo2 exception failed!");
+    }
+
+    main = new Main();
+    main.foo3(new int[10], 9);
+    if (main.sum != 7) {
+      System.out.println("foo3 failed!");
+    }
+
+    caught = false;
+    main = new Main();
+    try {
+      main.foo3(new int[10], 10);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught || main.sum != 7) {
+      System.out.println("foo3 exception failed!");
+    }
+
+    main = new Main();
+    main.foo4(new int[10], 10);
+    if (main.sum != 10) {
+      System.out.println("foo4 failed!");
+    }
+
+    caught = false;
+    main = new Main();
+    try {
+      main.foo4(new int[10], 11);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught || main.sum != 0) {
+      System.out.println("foo4 exception failed!");
+    }
+
+    main = new Main();
+    main.foo5(new int[10], 10);
+    if (main.sum != 24) {
+      System.out.println("foo5 failed!");
+    }
+
+    caught = false;
+    main = new Main();
+    try {
+      main.foo5(new int[10], 11);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught || main.sum != 2) {
+      System.out.println("foo5 exception failed!");
+    }
+
+    main = new Main();
+    main.foo6(new int[10], 2, 7);
+
+    main = new Main();
+    int[] array = new int[4];
+    main.partialLooping(new int[3], 0, 4);
+    if ((array[0] != 1) && (array[1] != 1) &&
+        (array[2] != 0) && (array[3] != 0)) {
+      System.out.println("partialLooping failed!");
+    }
+
+    caught = false;
+    main = new Main();
+    try {
+      main.foo6(new int[10], 2, 8);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught) {
+      System.out.println("foo6 exception failed!");
+    }
+
+    caught = false;
+    main = new Main();
+    try {
+      main.foo6(new int[10], 1, 7);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught) {
+      System.out.println("foo6 exception failed!");
+    }
+
+  }
+
   // Make sure this method is compiled with optimizing.
   // CHECK-START: void Main.main(java.lang.String[]) register (after)
   // CHECK: ParallelMove
@@ -643,7 +1017,11 @@
 
     // Make sure this value is kept after deoptimization.
     int i = 1;
-    System.out.println(foo() + i);
+    if (foo() + i != 100) {
+      System.out.println("foo failed!");
+    };
+
+    testUnknownBounds();
   }
 
 }
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index 65be6cb..0dbda6b 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -374,6 +374,15 @@
   // CHECK-DAG:     [[Or:i\d+]]       Or [ [[Add1]] [[Add2]] ]
   // CHECK-DAG:                       Return [ [[Or]] ]
 
+  // CHECK-START: int Main.AddNegs2(int, int) GVN (after)
+  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Neg1:i\d+]]     Neg [ [[Arg1]] ]
+  // CHECK-DAG:     [[Neg2:i\d+]]     Neg [ [[Arg2]] ]
+  // CHECK-DAG:     [[Add:i\d+]]      Add [ [[Neg1]] [[Neg2]] ]
+  // CHECK-DAG:     [[Or:i\d+]]       Or [ [[Add]] [[Add]] ]
+  // CHECK-DAG:                       Return [ [[Or]] ]
+
   public static int AddNegs2(int arg1, int arg2) {
     int temp1 = -arg1;
     int temp2 = -arg2;
@@ -530,6 +539,12 @@
   // CHECK-NOT:                       Neg
   // CHECK-NOT:                       Add
 
+  // CHECK-START: int Main.NegNeg2(int) constant_folding_after_inlining (after)
+  // CHECK:         [[Const0:i\d+]]   IntConstant 0
+  // CHECK-NOT:                       Neg
+  // CHECK-NOT:                       Add
+  // CHECK:                           Return [ [[Const0]] ]
+
   public static int NegNeg2(int arg) {
     int temp = -arg;
     return temp + -temp;
@@ -903,8 +918,12 @@
   // CHECK:                            BooleanNot
   // CHECK-NOT:                        BooleanNot
 
+  public static boolean NegateValue(boolean arg) {
+    return !arg;
+  }
+
   public static boolean NotNotBool(boolean arg) {
-    return !(!arg);
+    return !(NegateValue(arg));
   }
 
   public static void main(String[] args) {
diff --git a/test/463-checker-boolean-simplifier/src/Main.java b/test/463-checker-boolean-simplifier/src/Main.java
index 3daf693..4346103 100644
--- a/test/463-checker-boolean-simplifier/src/Main.java
+++ b/test/463-checker-boolean-simplifier/src/Main.java
@@ -26,6 +26,12 @@
     }
   }
 
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
   /*
    * Elementary test negating a boolean. Verifies that blocks are merged and
    * empty branches removed.
@@ -155,6 +161,36 @@
     return (x <= y) == (y <= z);
   }
 
+  // CHECK-START: int Main.NegatedCondition(boolean) boolean_simplifier (before)
+  // CHECK-DAG:     [[Param:z\d+]]    ParameterValue
+  // CHECK-DAG:     [[Const42:i\d+]]  IntConstant 42
+  // CHECK-DAG:     [[Const43:i\d+]]  IntConstant 43
+  // CHECK-DAG:     [[NotParam:z\d+]] BooleanNot [ [[Param]] ]
+  // CHECK-DAG:                       If [ [[NotParam]] ]
+  // CHECK-DAG:     [[Phi:i\d+]]      Phi [ [[Const42]] [[Const43]] ]
+  // CHECK-DAG:                       Return [ [[Phi]] ]
+
+  // CHECK-START: int Main.NegatedCondition(boolean) boolean_simplifier (after)
+  // CHECK-DAG:     [[Param:z\d+]]    ParameterValue
+  // CHECK-DAG:     [[Const42:i\d+]]  IntConstant 42
+  // CHECK-DAG:     [[Const43:i\d+]]  IntConstant 43
+  // CHECK-DAG:                       If [ [[Param]] ]
+  // CHECK-DAG:     [[Phi:i\d+]]      Phi [ [[Const42]] [[Const43]] ]
+  // CHECK-DAG:                       Return [ [[Phi]] ]
+
+  // Note: The fact that branches are swapped is verified by running the test.
+
+  // CHECK-START: int Main.NegatedCondition(boolean) boolean_simplifier (after)
+  // CHECK-NOT:                       BooleanNot
+
+  public static int NegatedCondition(boolean x) {
+    if (x != false) {
+      return 42;
+    } else {
+      return 43;
+    }
+  }
+
   public static void main(String[] args) {
     assertBoolEquals(false, BooleanNot(true));
     assertBoolEquals(true, BooleanNot(false));
@@ -171,5 +207,7 @@
     assertBoolEquals(true, ValuesOrdered(3, 3, 3));
     assertBoolEquals(true, ValuesOrdered(3, 3, 5));
     assertBoolEquals(false, ValuesOrdered(5, 5, 3));
+    assertIntEquals(42, NegatedCondition(true));
+    assertIntEquals(43, NegatedCondition(false));
   }
 }
diff --git a/test/476-clinit-check-inlining-static-invoke/expected.txt b/test/476-clinit-check-inlining-static-invoke/expected.txt
new file mode 100644
index 0000000..c55bf72
--- /dev/null
+++ b/test/476-clinit-check-inlining-static-invoke/expected.txt
@@ -0,0 +1,2 @@
+checkClinitCheckBeforeStaticMethodInvoke START
+checkClinitCheckBeforeStaticMethodInvoke PASSED
diff --git a/test/476-clinit-check-inlining-static-invoke/info.txt b/test/476-clinit-check-inlining-static-invoke/info.txt
new file mode 100644
index 0000000..1a439fc
--- /dev/null
+++ b/test/476-clinit-check-inlining-static-invoke/info.txt
@@ -0,0 +1,3 @@
+Regression test for a bug where an inlined call to a static method
+failed to emit a prior initialization check of the method's declaring
+class.
diff --git a/test/476-clinit-check-inlining-static-invoke/src/Main.java b/test/476-clinit-check-inlining-static-invoke/src/Main.java
new file mode 100644
index 0000000..a7d3bcd
--- /dev/null
+++ b/test/476-clinit-check-inlining-static-invoke/src/Main.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String[] args) {
+    checkClinitCheckBeforeStaticMethodInvoke();
+  }
+
+  static void checkClinitCheckBeforeStaticMethodInvoke() {
+    System.out.println("checkClinitCheckBeforeStaticMethodInvoke START");
+
+    // Call static method to cause implicit class initialization, even
+    // if it is inlined.
+    ClassWithClinit.$opt$inline$StaticMethod();
+    if (!classWithClinitInitialized) {
+      System.out.println("checkClinitCheckBeforeStaticMethodInvoke FAILED");
+      return;
+    }
+
+    System.out.println("checkClinitCheckBeforeStaticMethodInvoke PASSED");
+  }
+
+  static class ClassWithClinit {
+    static {
+      Main.classWithClinitInitialized = true;
+    }
+
+    static void $opt$inline$StaticMethod() {
+    }
+  }
+
+  static boolean classWithClinitInitialized = false;
+}
diff --git a/test/478-checker-clinit-check-pruning/expected.txt b/test/478-checker-clinit-check-pruning/expected.txt
new file mode 100644
index 0000000..387e1a7
--- /dev/null
+++ b/test/478-checker-clinit-check-pruning/expected.txt
@@ -0,0 +1,6 @@
+Main$ClassWithClinit1's static initializer
+Main$ClassWithClinit2's static initializer
+Main$ClassWithClinit3's static initializer
+Main$ClassWithClinit4's static initializer
+Main$ClassWithClinit5's static initializer
+Main$ClassWithClinit6's static initializer
diff --git a/test/478-checker-clinit-check-pruning/info.txt b/test/478-checker-clinit-check-pruning/info.txt
new file mode 100644
index 0000000..deb64de
--- /dev/null
+++ b/test/478-checker-clinit-check-pruning/info.txt
@@ -0,0 +1,3 @@
+Test ensuring class initializations checks (and load class instructions)
+added by the graph builder during the construction of a static invoke
+are properly pruned.
diff --git a/test/478-checker-clinit-check-pruning/src/Main.java b/test/478-checker-clinit-check-pruning/src/Main.java
new file mode 100644
index 0000000..6da8945
--- /dev/null
+++ b/test/478-checker-clinit-check-pruning/src/Main.java
@@ -0,0 +1,276 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /*
+   * Ensure an inlined static invoke explicitly triggers the
+   * initialization check of the called method's declaring class, and
+   * that the corresponding load class instruction does not get
+   * removed before register allocation & code generation.
+   */
+
+  // CHECK-START: void Main.invokeStaticInlined() builder (after)
+  // CHECK-DAG:     [[LoadClass:l\d+]]    LoadClass
+  // CHECK-DAG:     [[ClinitCheck:l\d+]]  ClinitCheck [ [[LoadClass]] ]
+  // CHECK-DAG:                           InvokeStaticOrDirect [ [[ClinitCheck]] ]
+
+  // CHECK-START: void Main.invokeStaticInlined() inliner (after)
+  // CHECK-DAG:     [[LoadClass:l\d+]]    LoadClass
+  // CHECK-DAG:     [[ClinitCheck:l\d+]]  ClinitCheck [ [[LoadClass]] ]
+
+  // CHECK-START: void Main.invokeStaticInlined() inliner (after)
+  // CHECK-NOT:                           InvokeStaticOrDirect
+
+  // The following checks ensure the clinit check instruction added by
+  // the builder is pruned by the PrepareForRegisterAllocation, while
+  // the load class instruction is preserved.  As the control flow
+  // graph is not dumped after (nor before) this step, we check the
+  // CFG as it is before the next pass (liveness analysis) instead.
+
+  // CHECK-START: void Main.invokeStaticInlined() liveness (before)
+  // CHECK-DAG:                           LoadClass
+
+  // CHECK-START: void Main.invokeStaticInlined() liveness (before)
+  // CHECK-NOT:                           ClinitCheck
+  // CHECK-NOT:                           InvokeStaticOrDirect
+
+  static void invokeStaticInlined() {
+    ClassWithClinit1.$opt$inline$StaticMethod();
+  }
+
+  static class ClassWithClinit1 {
+    static {
+      System.out.println("Main$ClassWithClinit1's static initializer");
+    }
+
+    static void $opt$inline$StaticMethod() {
+    }
+  }
+
+  /*
+   * Ensure a non-inlined static invoke eventually has an implicit
+   * initialization check of the called method's declaring class.
+   */
+
+  // CHECK-START: void Main.invokeStaticNotInlined() builder (after)
+  // CHECK-DAG:     [[LoadClass:l\d+]]    LoadClass
+  // CHECK-DAG:     [[ClinitCheck:l\d+]]  ClinitCheck [ [[LoadClass]] ]
+  // CHECK-DAG:                           InvokeStaticOrDirect [ [[ClinitCheck]] ]
+
+  // CHECK-START: void Main.invokeStaticNotInlined() inliner (after)
+  // CHECK-DAG:     [[LoadClass:l\d+]]    LoadClass
+  // CHECK-DAG:     [[ClinitCheck:l\d+]]  ClinitCheck [ [[LoadClass]] ]
+  // CHECK-DAG:                           InvokeStaticOrDirect [ [[ClinitCheck]] ]
+
+  // The following checks ensure the clinit check and load class
+  // instructions added by the builder are pruned by the
+  // PrepareForRegisterAllocation.  As the control flow graph is not
+  // dumped after (nor before) this step, we check the CFG as it is
+  // before the next pass (liveness analysis) instead.
+
+  // CHECK-START: void Main.invokeStaticNotInlined() liveness (before)
+  // CHECK-DAG:                           InvokeStaticOrDirect
+
+  // CHECK-START: void Main.invokeStaticNotInlined() liveness (before)
+  // CHECK-NOT:                           LoadClass
+  // CHECK-NOT:                           ClinitCheck
+
+  static void invokeStaticNotInlined() {
+    ClassWithClinit2.staticMethod();
+  }
+
+  static class ClassWithClinit2 {
+    static {
+      System.out.println("Main$ClassWithClinit2's static initializer");
+    }
+
+    static boolean doThrow = false;
+
+    static void staticMethod() {
+      if (doThrow) {
+        // Try defeating inlining.
+        throw new Error();
+      }
+    }
+  }
+
+  /*
+   * Ensure an inlined call to a static method whose declaring class
+   * is statically known to have been initialized does not require an
+   * explicit clinit check.
+   */
+
+  // CHECK-START: void Main$ClassWithClinit3.invokeStaticInlined() builder (after)
+  // CHECK-DAG:                           InvokeStaticOrDirect
+
+  // CHECK-START: void Main$ClassWithClinit3.invokeStaticInlined() builder (after)
+  // CHECK-NOT:                           LoadClass
+  // CHECK-NOT:                           ClinitCheck
+
+  // CHECK-START: void Main$ClassWithClinit3.invokeStaticInlined() inliner (after)
+  // CHECK-NOT:                           LoadClass
+  // CHECK-NOT:                           ClinitCheck
+  // CHECK-NOT:                           InvokeStaticOrDirect
+
+  static class ClassWithClinit3 {
+    static void invokeStaticInlined() {
+      // The invocation of invokeStaticInlined triggers the
+      // initialization of ClassWithClinit3, meaning that the
+      // hereinbelow call to $opt$inline$StaticMethod does not need a
+      // clinit check.
+      $opt$inline$StaticMethod();
+    }
+
+    static {
+      System.out.println("Main$ClassWithClinit3's static initializer");
+    }
+
+    static void $opt$inline$StaticMethod() {
+    }
+  }
+
+  /*
+   * Ensure an non-inlined call to a static method whose declaring
+   * class is statically known to have been initialized does not
+   * require an explicit clinit check.
+   */
+
+  // CHECK-START: void Main$ClassWithClinit4.invokeStaticNotInlined() builder (after)
+  // CHECK-DAG:                           InvokeStaticOrDirect
+
+  // CHECK-START: void Main$ClassWithClinit4.invokeStaticNotInlined() builder (after)
+  // CHECK-NOT:                           LoadClass
+  // CHECK-NOT:                           ClinitCheck
+
+  // CHECK-START: void Main$ClassWithClinit4.invokeStaticNotInlined() inliner (after)
+  // CHECK-DAG:                           InvokeStaticOrDirect
+
+  // CHECK-START: void Main$ClassWithClinit4.invokeStaticNotInlined() inliner (after)
+  // CHECK-NOT:                           LoadClass
+  // CHECK-NOT:                           ClinitCheck
+
+  static class ClassWithClinit4 {
+    static void invokeStaticNotInlined() {
+      // The invocation of invokeStaticNotInlined triggers the
+      // initialization of ClassWithClinit4, meaning that the
+      // hereinbelow call to staticMethod does not need a clinit
+      // check.
+      staticMethod();
+    }
+
+    static {
+      System.out.println("Main$ClassWithClinit4's static initializer");
+    }
+
+    static boolean doThrow = false;
+
+    static void staticMethod() {
+      if (doThrow) {
+        // Try defeating inlining.
+        throw new Error();
+      }
+    }
+  }
+
+  /*
+   * Ensure an inlined call to a static method whose declaring class
+   * is a super class of the caller's class does not require an
+   * explicit clinit check.
+   */
+
+  // CHECK-START: void Main$SubClassOfClassWithClinit5.invokeStaticInlined() builder (after)
+  // CHECK-DAG:                           InvokeStaticOrDirect
+
+  // CHECK-START: void Main$SubClassOfClassWithClinit5.invokeStaticInlined() builder (after)
+  // CHECK-NOT:                           LoadClass
+  // CHECK-NOT:                           ClinitCheck
+
+  // CHECK-START: void Main$SubClassOfClassWithClinit5.invokeStaticInlined() inliner (after)
+  // CHECK-NOT:                           LoadClass
+  // CHECK-NOT:                           ClinitCheck
+  // CHECK-NOT:                           InvokeStaticOrDirect
+
+  static class ClassWithClinit5 {
+    static void $opt$inline$StaticMethod() {
+    }
+
+    static {
+      System.out.println("Main$ClassWithClinit5's static initializer");
+    }
+  }
+
+  static class SubClassOfClassWithClinit5 extends ClassWithClinit5 {
+    static void invokeStaticInlined() {
+      ClassWithClinit5.$opt$inline$StaticMethod();
+    }
+  }
+
+  /*
+   * Ensure an non-inlined call to a static method whose declaring
+   * class is a super class of the caller's class does not require an
+   * explicit clinit check.
+   */
+
+  // CHECK-START: void Main$SubClassOfClassWithClinit6.invokeStaticNotInlined() builder (after)
+  // CHECK-DAG:                           InvokeStaticOrDirect
+
+  // CHECK-START: void Main$SubClassOfClassWithClinit6.invokeStaticNotInlined() builder (after)
+  // CHECK-NOT:                           LoadClass
+  // CHECK-NOT:                           ClinitCheck
+
+  // CHECK-START: void Main$SubClassOfClassWithClinit6.invokeStaticNotInlined() inliner (after)
+  // CHECK-DAG:                           InvokeStaticOrDirect
+
+  // CHECK-START: void Main$SubClassOfClassWithClinit6.invokeStaticNotInlined() inliner (after)
+  // CHECK-NOT:                           LoadClass
+  // CHECK-NOT:                           ClinitCheck
+
+  static class ClassWithClinit6 {
+    static boolean doThrow = false;
+
+    static void staticMethod() {
+      if (doThrow) {
+        // Try defeating inlining.
+        throw new Error();
+      }
+    }
+
+    static {
+      System.out.println("Main$ClassWithClinit6's static initializer");
+    }
+  }
+
+  static class SubClassOfClassWithClinit6 extends ClassWithClinit6 {
+    static void invokeStaticNotInlined() {
+      ClassWithClinit6.staticMethod();
+    }
+  }
+
+  // TODO: Add a test for the case of a static method whose declaring
+  // class type index is not available (i.e. when `storage_index`
+  // equals `DexFile::kDexNoIndex` in
+  // art::HGraphBuilder::BuildInvoke).
+
+  public static void main(String[] args) {
+    invokeStaticInlined();
+    invokeStaticNotInlined();
+    ClassWithClinit3.invokeStaticInlined();
+    ClassWithClinit4.invokeStaticNotInlined();
+    SubClassOfClassWithClinit5.invokeStaticInlined();
+    SubClassOfClassWithClinit6.invokeStaticNotInlined();
+  }
+}
diff --git a/test/480-checker-dead-blocks/expected.txt b/test/480-checker-dead-blocks/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/480-checker-dead-blocks/expected.txt
diff --git a/test/480-checker-dead-blocks/info.txt b/test/480-checker-dead-blocks/info.txt
new file mode 100644
index 0000000..5aeafac
--- /dev/null
+++ b/test/480-checker-dead-blocks/info.txt
@@ -0,0 +1 @@
+Test removal of dead blocks.
\ No newline at end of file
diff --git a/test/480-checker-dead-blocks/src/Main.java b/test/480-checker-dead-blocks/src/Main.java
new file mode 100644
index 0000000..560ce95
--- /dev/null
+++ b/test/480-checker-dead-blocks/src/Main.java
@@ -0,0 +1,147 @@
+/*
+* Copyright (C) 2015 The Android Open Source Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+public class Main {
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static boolean inlineTrue() {
+    return true;
+  }
+
+  public static boolean inlineFalse() {
+    return false;
+  }
+
+  // CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination_final (before)
+  // CHECK-DAG:     [[ArgX:i\d+]]    ParameterValue
+  // CHECK-DAG:     [[ArgY:i\d+]]    ParameterValue
+  // CHECK-DAG:                      If
+  // CHECK-DAG:     [[Add:i\d+]]     Add [ [[ArgX]] [[ArgY]] ]
+  // CHECK-DAG:     [[Sub:i\d+]]     Sub [ [[ArgX]] [[ArgY]] ]
+  // CHECK-DAG:     [[Phi:i\d+]]     Phi [ [[Add]] [[Sub]] ]
+  // CHECK-DAG:                      Return [ [[Phi]] ]
+
+  // CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination_final (after)
+  // CHECK-DAG:     [[ArgX:i\d+]]    ParameterValue
+  // CHECK-DAG:     [[ArgY:i\d+]]    ParameterValue
+  // CHECK-DAG:     [[Add:i\d+]]     Add [ [[ArgX]] [[ArgY]] ]
+  // CHECK-DAG:                      Return [ [[Add]] ]
+
+  // CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination_final (after)
+  // CHECK-NOT:                      If
+  // CHECK-NOT:                      Sub
+  // CHECK-NOT:                      Phi
+
+  public static int testTrueBranch(int x, int y) {
+    int z;
+    if (inlineTrue()) {
+      z = x + y;
+    } else {
+      z = x - y;
+    }
+    return z;
+  }
+
+  // CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination_final (before)
+  // CHECK-DAG:     [[ArgX:i\d+]]    ParameterValue
+  // CHECK-DAG:     [[ArgY:i\d+]]    ParameterValue
+  // CHECK-DAG:                      If
+  // CHECK-DAG:     [[Add:i\d+]]     Add [ [[ArgX]] [[ArgY]] ]
+  // CHECK-DAG:     [[Sub:i\d+]]     Sub [ [[ArgX]] [[ArgY]] ]
+  // CHECK-DAG:     [[Phi:i\d+]]     Phi [ [[Add]] [[Sub]] ]
+  // CHECK-DAG:                      Return [ [[Phi]] ]
+
+  // CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination_final (after)
+  // CHECK-DAG:     [[ArgX:i\d+]]    ParameterValue
+  // CHECK-DAG:     [[ArgY:i\d+]]    ParameterValue
+  // CHECK-DAG:     [[Sub:i\d+]]     Sub [ [[ArgX]] [[ArgY]] ]
+  // CHECK-DAG:                      Return [ [[Sub]] ]
+
+  // CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination_final (after)
+  // CHECK-NOT:                      If
+  // CHECK-NOT:                      Add
+  // CHECK-NOT:                      Phi
+
+  public static int testFalseBranch(int x, int y) {
+    int z;
+    if (inlineFalse()) {
+      z = x + y;
+    } else {
+      z = x - y;
+    }
+    return z;
+  }
+
+  // CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination_final (before)
+  // CHECK:                          Mul
+
+  // CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination_final (after)
+  // CHECK-NOT:                      Mul
+
+  public static int testRemoveLoop(int x) {
+    if (inlineFalse()) {
+      for (int i = 0; i < x; ++i) {
+        x *= x;
+      }
+    }
+    return x;
+  }
+
+  // CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination_final (before)
+  // CHECK-DAG:                      Return
+  // CHECK-DAG:                      Exit
+
+  // CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination_final (after)
+  // CHECK-NOT:                      Return
+  // CHECK-NOT:                      Exit
+
+  public static int testInfiniteLoop(int x) {
+    while (inlineTrue()) {
+      x++;
+    }
+    return x;
+  }
+
+  // CHECK-START: int Main.testDeadLoop(int) dead_code_elimination_final (before)
+  // CHECK-DAG:                      If
+  // CHECK-DAG:                      Add
+
+  // CHECK-START: int Main.testDeadLoop(int) dead_code_elimination_final (after)
+  // CHECK-DAG:     [[Arg:i\d+]]     ParameterValue
+  // CHECK-DAG:                      Return [ [[Arg]] ]
+
+  // CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination_final (after)
+  // CHECK-NOT:                      If
+  // CHECK-NOT:                      Add
+
+  public static int testDeadLoop(int x) {
+    while (inlineFalse()) {
+      x++;
+    }
+    return x;
+  }
+
+  public static void main(String[] args) {
+    assertIntEquals(7, testTrueBranch(4, 3));
+    assertIntEquals(1, testFalseBranch(4, 3));
+    assertIntEquals(42, testRemoveLoop(42));
+  }
+}
diff --git a/test/481-regression-phi-cond/expected.txt b/test/481-regression-phi-cond/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/481-regression-phi-cond/expected.txt
diff --git a/test/481-regression-phi-cond/info.txt b/test/481-regression-phi-cond/info.txt
new file mode 100644
index 0000000..7ac3bb6
--- /dev/null
+++ b/test/481-regression-phi-cond/info.txt
@@ -0,0 +1,2 @@
+Tests a regression in which simplification of a boolean selection could attempt
+to remove a Phi from the wrong instruction list.
diff --git a/test/481-regression-phi-cond/src/Main.java b/test/481-regression-phi-cond/src/Main.java
new file mode 100644
index 0000000..bad9669
--- /dev/null
+++ b/test/481-regression-phi-cond/src/Main.java
@@ -0,0 +1,51 @@
+/*
+* Copyright (C) 2015 The Android Open Source Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+
+public class Main {
+  public static void assertBooleanEquals(boolean expected, boolean result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static boolean inlinePhi(boolean x, boolean y, boolean z) {
+    boolean phi;
+    if (z) {
+      phi = x;
+    } else {
+      phi = y;
+    }
+    return phi;
+  }
+
+  public static boolean dontUseParam(boolean x) {
+    return false;
+  }
+
+  public static boolean testCase(boolean x, boolean y, boolean z) {
+    // First create a Phi(x, y).
+    boolean phi = inlinePhi(x, y, z);
+    // Now use the phi as a condition which the boolean simplifier will try to
+    // optimize out. If the result is not used, the algorithm will try to remove
+    // the original condition (phi) and crash.
+    return dontUseParam(phi == false ? false : true);
+  }
+
+  public static void main(String[] args) {
+    assertBooleanEquals(false, testCase(true, true, true));
+  }
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index c5abd46..93340fb 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -95,7 +95,7 @@
   RELOCATE_TYPES += no-relocate
 endif
 ifeq ($(ART_TEST_RUN_TEST_RELOCATE_NO_PATCHOAT),true)
-  RELOCATE_TYPES := relocate-npatchoat
+  RELOCATE_TYPES += relocate-npatchoat
 endif
 TRACE_TYPES := ntrace
 ifeq ($(ART_TEST_TRACE),true)
@@ -250,6 +250,12 @@
     $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \
     $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),130-hprof,$(ALL_ADDRESS_SIZES))
 
+# 131 is an old test. The functionality has been implemented at an earlier stage and is checked
+# in tests 138.
+ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+    $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \
+    $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),131-structural-change,$(ALL_ADDRESS_SIZES))
+
 # All these tests check that we have sane behavior if we don't have a patchoat or dex2oat.
 # Therefore we shouldn't run them in situations where we actually don't have these since they
 # explicitly test for them. These all also assume we have an image.
@@ -257,7 +263,12 @@
   116-nodex2oat \
   117-nopatchoat \
   118-noimage-dex2oat \
-  119-noimage-patchoat
+  119-noimage-patchoat \
+  138-duplicate-classes-check2
+
+# This test fails without an image.
+TEST_ART_BROKEN_NO_IMAGE_RUN_TESTS := \
+  138-duplicate-classes-check
 
 ifneq (,$(filter no-dex2oat,$(PREBUILD_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),no-dex2oat, \
@@ -270,6 +281,9 @@
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
       $(COMPILER_TYPES), $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),no-image, \
       $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_FALLBACK_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+      $(COMPILER_TYPES), $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),no-image, \
+      $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_NO_IMAGE_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 endif
 
 ifneq (,$(filter relocate-npatchoat,$(RELOCATE_TYPES)))
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 414e4df..1c44958 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -225,7 +225,8 @@
 fi
 
 if [ "$USE_JVM" = "y" ]; then
-  ${JAVA} ${DEBUGGER_OPTS} ${JVM_VERIFY_ARG} -classpath classes $MAIN "$@"
+  # Xmx is necessary since we don't pass down the ART flags to JVM.
+  ${JAVA} ${DEBUGGER_OPTS} ${JVM_VERIFY_ARG} -Xmx256m -classpath classes $MAIN "$@"
   exit
 fi
 
@@ -363,6 +364,7 @@
              export ANDROID_ROOT=$ANDROID_ROOT && \
              $mkdir_cmdline && \
              export LD_LIBRARY_PATH=$LD_LIBRARY_PATH && \
+             export PATH=$ANDROID_ROOT/bin:$PATH && \
              $dex2oat_cmdline && \
              $dalvikvm_cmdline"
 
diff --git a/tools/art b/tools/art
index 6c89a60..85e6e2f 100644
--- a/tools/art
+++ b/tools/art
@@ -92,6 +92,7 @@
 ANDROID_DATA=$ANDROID_DATA \
   ANDROID_ROOT=$ANDROID_ROOT \
   LD_LIBRARY_PATH=$LD_LIBRARY_PATH \
+  PATH=$ANDROID_ROOT/bin:$PATH \
   $invoke_with $ANDROID_ROOT/bin/$DALVIKVM $lib \
     -XXlib:$LIBART \
     -Ximage:$ANDROID_ROOT/framework/core.art \
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 2040b57..a387036 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -124,5 +124,11 @@
   description: "Needs kernel updates on host/device",
   result: EXEC_FAILED,
   names: ["libcore.io.OsTest#test_socketPing"]
+},
+{
+  description: "Linker issues in chrooted environment",
+  modes: [device],
+  result: EXEC_FAILED,
+  names: ["org.apache.harmony.tests.java.lang.ProcessManagerTest#testEnvironment"]
 }
 ]
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index 503ec71..a007fa2 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -35,6 +35,7 @@
 fi
 
 art="/data/local/tmp/system/bin/art"
+art_debugee="sh /data/local/tmp/system/bin/art"
 # We use Quick's image on target because optimizing's image is not compiled debuggable.
 image="-Ximage:/data/art-test/core.art"
 args=$@
@@ -50,6 +51,7 @@
     # Specify bash explicitly since the art script cannot, since it has to run on the device
     # with mksh.
     art="bash out/host/linux-x86/bin/art"
+    art_debugee="bash out/host/linux-x86/bin/art"
     # We force generation of a new image to avoid build-time and run-time classpath differences.
     image="-Ximage:/system/non/existent"
     # We do not need a device directory on host.
@@ -81,7 +83,7 @@
       --vm-arg -Djpda.settings.verbose=true \
       --vm-arg -Djpda.settings.syncPort=34016 \
       --vm-arg -Djpda.settings.transportAddress=127.0.0.1:55107 \
-      --vm-arg -Djpda.settings.debuggeeJavaPath="\"$art $image $debuggee_args\"" \
+      --vm-arg -Djpda.settings.debuggeeJavaPath="\"$art_debugee $image $debuggee_args\"" \
       --classpath $test_jar \
       --classpath $junit_jar \
       --vm-arg -Xcompiler-option --vm-arg --compiler-backend=Optimizing \