Merge "Opt compiler: ARM64: Use ldp/stp on arm64 for slow paths."
diff --git a/Android.mk b/Android.mk
index 9360355..3467f1d 100644
--- a/Android.mk
+++ b/Android.mk
@@ -405,8 +405,8 @@
 	adb root
 	adb wait-for-device shell stop
 	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
-	adb shell setprop dalvik.vm.dex2oat-filter ""
-	adb shell setprop dalvik.vm.image-dex2oat-filter ""
+	adb shell setprop dalvik.vm.dex2oat-filter \"\"
+	adb shell setprop dalvik.vm.image-dex2oat-filter \"\"
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libart.so
 	adb shell start
 
@@ -415,18 +415,18 @@
 	adb root
 	adb wait-for-device shell stop
 	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
-	adb shell setprop dalvik.vm.dex2oat-filter ""
-	adb shell setprop dalvik.vm.image-dex2oat-filter ""
+	adb shell setprop dalvik.vm.dex2oat-filter \"\"
+	adb shell setprop dalvik.vm.image-dex2oat-filter \"\"
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libartd.so
 	adb shell start
 
-.PHONY: use-art-smart
-use-art-smart:
+.PHONY: use-art-verify-at-runtime
+use-art-verify-at-runtime:
 	adb root
 	adb wait-for-device shell stop
 	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
-	adb shell setprop dalvik.vm.dex2oat-filter "interpret-only"
-	adb shell setprop dalvik.vm.image-dex2oat-filter ""
+	adb shell setprop dalvik.vm.dex2oat-filter "verify-at-runtime"
+	adb shell setprop dalvik.vm.image-dex2oat-filter "verify-at-runtime"
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libart.so
 	adb shell start
 
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index c60e75b..3e427a3 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -83,19 +83,10 @@
 else
 ART_TARGET_CLANG := false
 endif
-
-ifeq ($(TARGET_ARCH)|$(ART_TARGET_CLANG),mips|true)
-  # b/18807290, Clang generated mips assembly code for array.cc
-  # cannot be compiled by gas.
-  # b/18789639, Clang assembler cannot compile inlined assembly code in
-  # valgrind_malloc_space-inl.h:192:5: error: used $at without ".set noat"
-  $(warning Clang is disabled for the mips target)
-endif
 ART_TARGET_CLANG_arm :=
 ART_TARGET_CLANG_arm64 :=
-# TODO: Enable clang mips when b/18807290 and b/18789639 are fixed.
-ART_TARGET_CLANG_mips := false
-ART_TARGET_CLANG_mips64 := false
+ART_TARGET_CLANG_mips :=
+ART_TARGET_CLANG_mips64 :=
 ART_TARGET_CLANG_x86 :=
 ART_TARGET_CLANG_x86_64 :=
 
@@ -119,10 +110,6 @@
 ART_TARGET_CLANG_CFLAGS_arm64  += \
   -DNVALGRIND
 
-# FIXME: upstream LLVM has a vectorizer bug that needs to be fixed
-ART_TARGET_CLANG_CFLAGS_arm64 += \
-  -fno-vectorize
-
 # Warn about thread safety violations with clang.
 art_clang_cflags := -Wthread-safety
 
@@ -189,6 +176,7 @@
 
 ART_C_INCLUDES := \
   external/gtest/include \
+  external/icu/icu4c/source/common \
   external/valgrind/main/include \
   external/valgrind/main \
   external/vixl/src \
diff --git a/build/Android.common_path.mk b/build/Android.common_path.mk
index e0c0b0c..2d6b6a3 100644
--- a/build/Android.common_path.mk
+++ b/build/Android.common_path.mk
@@ -80,7 +80,7 @@
 TARGET_CORE_IMG_LOCATION := $(ART_TARGET_TEST_OUT)/core.art
 
 # Jar files for core.art.
-TARGET_CORE_JARS := core-libart conscrypt okhttp core-junit bouncycastle
+TARGET_CORE_JARS := core-libart conscrypt okhttp bouncycastle
 HOST_CORE_JARS := $(addsuffix -hostdex,$(TARGET_CORE_JARS))
 
 HOST_CORE_DEX_LOCATIONS   := $(foreach jar,$(HOST_CORE_JARS),  $(HOST_OUT_JAVA_LIBRARIES)/$(jar).jar)
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 7d76795..730e61d 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -26,6 +26,7 @@
   AllFields \
   ExceptionHandle \
   GetMethodSignature \
+  Instrumentation \
   Interfaces \
   Main \
   MultiDex \
@@ -64,6 +65,7 @@
 ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods
 ART_GTEST_dex_file_test_DEX_DEPS := GetMethodSignature Main Nested
 ART_GTEST_exception_test_DEX_DEPS := ExceptionHandle
+ART_GTEST_instrumentation_test_DEX_DEPS := Instrumentation
 ART_GTEST_jni_compiler_test_DEX_DEPS := MyClassNatives
 ART_GTEST_jni_internal_test_DEX_DEPS := AllFields StaticLeafMethods
 ART_GTEST_oat_file_assistant_test_DEX_DEPS := Main MainStripped MultiDex Nested
@@ -157,6 +159,7 @@
   runtime/handle_scope_test.cc \
   runtime/indenter_test.cc \
   runtime/indirect_reference_table_test.cc \
+  runtime/instrumentation_test.cc \
   runtime/intern_table_test.cc \
   runtime/interpreter/safe_math_test.cc \
   runtime/java_vm_ext_test.cc \
@@ -244,6 +247,7 @@
 
 COMPILER_GTEST_HOST_SRC_FILES := \
   $(COMPILER_GTEST_COMMON_SRC_FILES) \
+  compiler/dex/quick/x86/quick_assemble_x86_test.cc \
   compiler/utils/arm/assembler_arm32_test.cc \
   compiler/utils/arm/assembler_thumb2_test.cc \
   compiler/utils/assembler_thumb_test.cc \
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 0850f42..02d5327 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -26,6 +26,30 @@
 namespace art {
 
 /**
+ * @class String Change
+ * @brief Converts calls to String.<init> to StringFactory instead.
+ */
+class StringChange : public PassME {
+ public:
+  StringChange() : PassME("StringChange", kNoNodes) {
+  }
+
+  void Start(PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    c_unit->mir_graph->StringChange();
+  }
+
+  bool Gate(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    return c_unit->mir_graph->HasInvokes();
+  }
+};
+
+/**
  * @class CacheFieldLoweringInfo
  * @brief Cache the lowering info for fields used by IGET/IPUT/SGET/SPUT insns.
  */
@@ -270,7 +294,25 @@
     CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     c_unit->mir_graph->EliminateDeadCodeEnd();
-    down_cast<PassMEDataHolder*>(data)->dirty = !c_unit->mir_graph->MirSsaRepUpToDate();
+  }
+};
+
+/**
+ * @class GlobalValueNumberingCleanupPass
+ * @brief Performs the cleanup after global value numbering pass and the dependent
+ *        dead code elimination pass that needs the GVN data.
+ */
+class GlobalValueNumberingCleanupPass : public PassME {
+ public:
+  GlobalValueNumberingCleanupPass()
+    : PassME("GVNCleanup", kNoNodes, "") {
+  }
+
+  void Start(PassDataHolder* data) const OVERRIDE {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    return c_unit->mir_graph->GlobalValueNumberingCleanup();
   }
 };
 
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 0acdd42..b78b3d7 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -172,7 +172,6 @@
   kMirOpRangeCheck,
   kMirOpDivZeroCheck,
   kMirOpCheck,
-  kMirOpCheckPart2,
   kMirOpSelect,
 
   // Vector opcodes:
diff --git a/compiler/dex/global_value_numbering.cc b/compiler/dex/global_value_numbering.cc
index 30e3ce0..e2b9987 100644
--- a/compiler/dex/global_value_numbering.cc
+++ b/compiler/dex/global_value_numbering.cc
@@ -128,8 +128,9 @@
   ++bbs_processed_;
   merge_lvns_.clear();
 
-  bool change = (lvns_[bb->id] == nullptr) || !lvns_[bb->id]->Equals(*work_lvn_);
+  bool change = false;
   if (mode_ == kModeGvn) {
+    change = (lvns_[bb->id] == nullptr) || !lvns_[bb->id]->Equals(*work_lvn_);
     // In GVN mode, keep the latest LVN even if Equals() indicates no change. This is
     // to keep the correct values of fields that do not contribute to Equals() as long
     // as they depend only on predecessor LVNs' fields that do contribute to Equals().
@@ -137,6 +138,9 @@
     std::unique_ptr<const LocalValueNumbering> old_lvn(lvns_[bb->id]);
     lvns_[bb->id] = work_lvn_.release();
   } else {
+    DCHECK_EQ(mode_, kModeGvnPostProcessing);  // kModeLvn doesn't use FinishBasicBlock().
+    DCHECK(lvns_[bb->id] != nullptr);
+    DCHECK(lvns_[bb->id]->Equals(*work_lvn_));
     work_lvn_.reset();
   }
   return change;
diff --git a/compiler/dex/global_value_numbering_test.cc b/compiler/dex/global_value_numbering_test.cc
index c538d0b..c8aa990 100644
--- a/compiler/dex/global_value_numbering_test.cc
+++ b/compiler/dex/global_value_numbering_test.cc
@@ -290,6 +290,15 @@
     DoPrepareVregToSsaMapExit(bb_id, map, count);
   }
 
+  template <size_t count>
+  void MarkAsWideSRegs(const int32_t (&sregs)[count]) {
+    for (int32_t sreg : sregs) {
+      cu_.mir_graph->reg_location_[sreg].wide = true;
+      cu_.mir_graph->reg_location_[sreg + 1].wide = true;
+      cu_.mir_graph->reg_location_[sreg + 1].high_word = true;
+    }
+  }
+
   void PerformGVN() {
     DoPerformGVN<LoopRepeatingTopologicalSortIterator>();
   }
@@ -360,9 +369,11 @@
     cu_.access_flags = kAccStatic;  // Don't let "this" interfere with this test.
     allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
     // By default, the zero-initialized reg_location_[.] with ref == false tells LVN that
-    // 0 constants are integral, not references. Nothing else is used by LVN/GVN.
+    // 0 constants are integral, not references, and the values are all narrow.
+    // Nothing else is used by LVN/GVN. Tests can override the default values as needed.
     cu_.mir_graph->reg_location_ =
         cu_.arena.AllocArray<RegLocation>(kMaxSsaRegs, kArenaAllocRegAlloc);
+    cu_.mir_graph->num_ssa_regs_ = kMaxSsaRegs;
     // Bind all possible sregs to live vregs for test purposes.
     live_in_v_->SetInitialBits(kMaxSsaRegs);
     cu_.mir_graph->ssa_base_vregs_.reserve(kMaxSsaRegs);
@@ -910,14 +921,14 @@
       DEF_IGET(6, Instruction::AGET_OBJECT, 3u, 200u, 201u),  // Same as at the left side.
 
       DEF_AGET(3, Instruction::AGET_WIDE, 4u, 300u, 301u),
-      DEF_CONST(5, Instruction::CONST_WIDE, 5u, 1000),
-      DEF_APUT(5, Instruction::APUT_WIDE, 5u, 300u, 301u),
-      DEF_AGET(6, Instruction::AGET_WIDE, 7u, 300u, 301u),  // Differs from the top and the CONST.
+      DEF_CONST(5, Instruction::CONST_WIDE, 6u, 1000),
+      DEF_APUT(5, Instruction::APUT_WIDE, 6u, 300u, 301u),
+      DEF_AGET(6, Instruction::AGET_WIDE, 8u, 300u, 301u),  // Differs from the top and the CONST.
 
-      DEF_AGET(3, Instruction::AGET_SHORT, 8u, 400u, 401u),
-      DEF_CONST(3, Instruction::CONST, 9u, 2000),
-      DEF_APUT(4, Instruction::APUT_SHORT, 9u, 400u, 401u),
-      DEF_APUT(5, Instruction::APUT_SHORT, 9u, 400u, 401u),
+      DEF_AGET(3, Instruction::AGET_SHORT, 10u, 400u, 401u),
+      DEF_CONST(3, Instruction::CONST, 11u, 2000),
+      DEF_APUT(4, Instruction::APUT_SHORT, 11u, 400u, 401u),
+      DEF_APUT(5, Instruction::APUT_SHORT, 11u, 400u, 401u),
       DEF_AGET(6, Instruction::AGET_SHORT, 12u, 400u, 401u),  // Differs from the top, == CONST.
 
       DEF_AGET(3, Instruction::AGET_CHAR, 13u, 500u, 501u),
@@ -939,6 +950,8 @@
   };
 
   PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 4, 6, 8 };
+  MarkAsWideSRegs(wide_sregs);
   PerformGVN();
   ASSERT_EQ(arraysize(mirs), value_names_.size());
   EXPECT_EQ(value_names_[0], value_names_[1]);
@@ -1057,6 +1070,12 @@
   };
 
   PrepareMIRs(mirs);
+  for (size_t i = 0u; i != arraysize(mirs); ++i) {
+    if ((mirs_[i].ssa_rep->defs[0] % 2) == 0) {
+      const int32_t wide_sregs[] = { mirs_[i].ssa_rep->defs[0] };
+      MarkAsWideSRegs(wide_sregs);
+    }
+  }
   PerformGVN();
   ASSERT_EQ(arraysize(mirs), value_names_.size());
   EXPECT_EQ(value_names_[0], value_names_[7]);
@@ -1493,27 +1512,27 @@
   static const MIRDef mirs[] = {
       // NOTE: MIRs here are ordered by unique tests. They will be put into appropriate blocks.
       DEF_AGET(3, Instruction::AGET_WIDE, 0u, 100u, 101u),
-      DEF_AGET(4, Instruction::AGET_WIDE, 1u, 100u, 101u),   // Same as at the top.
-      DEF_AGET(5, Instruction::AGET_WIDE, 2u, 100u, 101u),   // Same as at the top.
+      DEF_AGET(4, Instruction::AGET_WIDE, 2u, 100u, 101u),   // Same as at the top.
+      DEF_AGET(5, Instruction::AGET_WIDE, 4u, 100u, 101u),   // Same as at the top.
 
-      DEF_AGET(3, Instruction::AGET_BYTE, 3u, 200u, 201u),
-      DEF_AGET(4, Instruction::AGET_BYTE, 4u, 200u, 201u),  // Differs from top...
-      DEF_APUT(4, Instruction::APUT_BYTE, 5u, 200u, 201u),  // Because of this IPUT.
-      DEF_AGET(5, Instruction::AGET_BYTE, 6u, 200u, 201u),  // Differs from top and the loop AGET.
+      DEF_AGET(3, Instruction::AGET_BYTE, 6u, 200u, 201u),
+      DEF_AGET(4, Instruction::AGET_BYTE, 7u, 200u, 201u),  // Differs from top...
+      DEF_APUT(4, Instruction::APUT_BYTE, 8u, 200u, 201u),  // Because of this IPUT.
+      DEF_AGET(5, Instruction::AGET_BYTE, 9u, 200u, 201u),  // Differs from top and the loop AGET.
 
-      DEF_AGET(3, Instruction::AGET, 7u, 300u, 301u),
-      DEF_APUT(4, Instruction::APUT, 8u, 300u, 301u),   // Because of this IPUT...
-      DEF_AGET(4, Instruction::AGET, 9u, 300u, 301u),   // Differs from top.
-      DEF_AGET(5, Instruction::AGET, 10u, 300u, 301u),  // Differs from top but == the loop AGET.
+      DEF_AGET(3, Instruction::AGET, 10u, 300u, 301u),
+      DEF_APUT(4, Instruction::APUT, 11u, 300u, 301u),  // Because of this IPUT...
+      DEF_AGET(4, Instruction::AGET, 12u, 300u, 301u),   // Differs from top.
+      DEF_AGET(5, Instruction::AGET, 13u, 300u, 301u),  // Differs from top but == the loop AGET.
 
-      DEF_CONST(3, Instruction::CONST, 11u, 3000),
-      DEF_APUT(3, Instruction::APUT_CHAR, 11u, 400u, 401u),
-      DEF_APUT(3, Instruction::APUT_CHAR, 11u, 400u, 402u),
-      DEF_AGET(4, Instruction::AGET_CHAR, 14u, 400u, 401u),  // Differs from 11u and 16u.
-      DEF_AGET(4, Instruction::AGET_CHAR, 15u, 400u, 402u),  // Same as 14u.
-      DEF_CONST(4, Instruction::CONST, 16u, 4000),
-      DEF_APUT(4, Instruction::APUT_CHAR, 16u, 400u, 401u),
-      DEF_APUT(4, Instruction::APUT_CHAR, 16u, 400u, 402u),
+      DEF_CONST(3, Instruction::CONST, 14u, 3000),
+      DEF_APUT(3, Instruction::APUT_CHAR, 14u, 400u, 401u),
+      DEF_APUT(3, Instruction::APUT_CHAR, 14u, 400u, 402u),
+      DEF_AGET(4, Instruction::AGET_CHAR, 15u, 400u, 401u),  // Differs from 11u and 16u.
+      DEF_AGET(4, Instruction::AGET_CHAR, 16u, 400u, 402u),  // Same as 14u.
+      DEF_CONST(4, Instruction::CONST, 17u, 4000),
+      DEF_APUT(4, Instruction::APUT_CHAR, 17u, 400u, 401u),
+      DEF_APUT(4, Instruction::APUT_CHAR, 17u, 400u, 402u),
       DEF_AGET(5, Instruction::AGET_CHAR, 19u, 400u, 401u),  // Differs from 11u and 14u...
       DEF_AGET(5, Instruction::AGET_CHAR, 20u, 400u, 402u),  // and same as the CONST 16u.
 
@@ -1531,6 +1550,8 @@
   };
 
   PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 0, 2, 4 };
+  MarkAsWideSRegs(wide_sregs);
   PerformGVN();
   ASSERT_EQ(arraysize(mirs), value_names_.size());
   EXPECT_EQ(value_names_[0], value_names_[1]);
diff --git a/compiler/dex/gvn_dead_code_elimination.cc b/compiler/dex/gvn_dead_code_elimination.cc
index d7f36f7..6d8a7da 100644
--- a/compiler/dex/gvn_dead_code_elimination.cc
+++ b/compiler/dex/gvn_dead_code_elimination.cc
@@ -20,6 +20,7 @@
 
 #include "base/bit_vector-inl.h"
 #include "base/macros.h"
+#include "base/allocator.h"
 #include "compiler_enums.h"
 #include "dataflow_iterator-inl.h"
 #include "dex_instruction.h"
@@ -57,14 +58,12 @@
       low_def_over_high_word = prev_data->low_def_over_high_word;
     } else {
       prev_value = prev_data->prev_value_high;
-      low_def_over_high_word =
-          prev_data->prev_value_high.value != kNPos && !prev_data->high_def_over_low_word;
+      low_def_over_high_word = !prev_data->high_def_over_low_word;
     }
   } else {
     if (prev_data->vreg_def == v_reg) {
       prev_value_high = prev_data->prev_value;
-      high_def_over_low_word =
-          prev_data->prev_value.value != kNPos && !prev_data->low_def_over_high_word;
+      high_def_over_low_word = !prev_data->low_def_over_high_word;
     } else {
       prev_value_high = prev_data->prev_value_high;
       high_def_over_low_word = prev_data->high_def_over_low_word;
@@ -75,6 +74,9 @@
 GvnDeadCodeElimination::VRegChains::VRegChains(uint32_t num_vregs, ScopedArenaAllocator* alloc)
     : num_vregs_(num_vregs),
       vreg_data_(alloc->AllocArray<VRegValue>(num_vregs, kArenaAllocMisc)),
+      vreg_high_words_(num_vregs, false, Allocator::GetNoopAllocator(),
+                       BitVector::BitsToWords(num_vregs),
+                       alloc->AllocArray<uint32_t>(BitVector::BitsToWords(num_vregs))),
       mir_data_(alloc->Adapter()) {
   mir_data_.reserve(100);
 }
@@ -82,6 +84,7 @@
 inline void GvnDeadCodeElimination::VRegChains::Reset() {
   DCHECK(mir_data_.empty());
   std::fill_n(vreg_data_, num_vregs_, VRegValue());
+  vreg_high_words_.ClearAllBits();
 }
 
 void GvnDeadCodeElimination::VRegChains::AddMIRWithDef(MIR* mir, int v_reg, bool wide,
@@ -93,24 +96,26 @@
   data->wide_def = wide;
   data->vreg_def = v_reg;
 
-  if (vreg_data_[v_reg].change != kNPos &&
-      mir_data_[vreg_data_[v_reg].change].vreg_def + 1 == v_reg) {
-    data->low_def_over_high_word = true;
-  }
-  data->prev_value = vreg_data_[v_reg];
   DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
+  data->prev_value = vreg_data_[v_reg];
+  data->low_def_over_high_word =
+      (vreg_data_[v_reg].change != kNPos)
+      ? GetMIRData(vreg_data_[v_reg].change)->vreg_def + 1 == v_reg
+      : vreg_high_words_.IsBitSet(v_reg);
   vreg_data_[v_reg].value = new_value;
   vreg_data_[v_reg].change = pos;
+  vreg_high_words_.ClearBit(v_reg);
 
   if (wide) {
-    if (vreg_data_[v_reg + 1].change != kNPos &&
-        mir_data_[vreg_data_[v_reg + 1].change].vreg_def == v_reg + 1) {
-      data->high_def_over_low_word = true;
-    }
-    data->prev_value_high = vreg_data_[v_reg + 1];
     DCHECK_LT(static_cast<size_t>(v_reg + 1), num_vregs_);
+    data->prev_value_high = vreg_data_[v_reg + 1];
+    data->high_def_over_low_word =
+        (vreg_data_[v_reg + 1].change != kNPos)
+        ? GetMIRData(vreg_data_[v_reg + 1].change)->vreg_def == v_reg + 1
+        : !vreg_high_words_.IsBitSet(v_reg + 1);
     vreg_data_[v_reg + 1].value = new_value;
     vreg_data_[v_reg + 1].change = pos;
+    vreg_high_words_.SetBit(v_reg + 1);
   }
 }
 
@@ -123,9 +128,17 @@
   if (data->has_def) {
     DCHECK_EQ(vreg_data_[data->vreg_def].change, NumMIRs() - 1u);
     vreg_data_[data->vreg_def] = data->prev_value;
+    DCHECK(!vreg_high_words_.IsBitSet(data->vreg_def));
+    if (data->low_def_over_high_word) {
+      vreg_high_words_.SetBit(data->vreg_def);
+    }
     if (data->wide_def) {
       DCHECK_EQ(vreg_data_[data->vreg_def + 1].change, NumMIRs() - 1u);
       vreg_data_[data->vreg_def + 1] = data->prev_value_high;
+      DCHECK(vreg_high_words_.IsBitSet(data->vreg_def + 1));
+      if (data->high_def_over_low_word) {
+        vreg_high_words_.ClearBit(data->vreg_def + 1);
+      }
     }
   }
   mir_data_.pop_back();
@@ -169,6 +182,7 @@
   uint16_t change = vreg_data_[v_reg].change;
   if (change == kNPos) {
     vreg_data_[v_reg].value = value;
+    vreg_high_words_.SetBit(v_reg);
   } else {
     while (true) {
       MIRData* data = &mir_data_[change];
@@ -208,6 +222,7 @@
         }
       }
       vreg_data_[v_reg].value = old_value;
+      DCHECK(!vreg_high_words_.IsBitSet(v_reg));  // Keep marked as low word.
     }
   } else {
     DCHECK_LT(static_cast<size_t>(v_reg + 1), num_vregs_);
@@ -223,6 +238,7 @@
         old_value = lvn->GetStartingVregValueNumber(v_reg);
       }
       vreg_data_[v_reg].value = old_value;
+      DCHECK(!vreg_high_words_.IsBitSet(v_reg));  // Keep marked as low word.
     }
     if (check_high && vreg_data_[v_reg + 1].value == kNoValue) {
       uint16_t old_value = lvn->GetStartingVregValueNumber(v_reg + 1);
@@ -234,6 +250,7 @@
         }
       }
       vreg_data_[v_reg + 1].value = old_value;
+      DCHECK(!vreg_high_words_.IsBitSet(v_reg + 1));  // Keep marked as low word.
     }
   }
 }
@@ -300,6 +317,8 @@
     if (next_change == kNPos) {
       DCHECK_EQ(vreg_data_[v_reg].change, old_change);
       vreg_data_[v_reg].change = new_change;
+      DCHECK_EQ(vreg_high_words_.IsBitSet(v_reg), v_reg == old_data->vreg_def + 1);
+      // No change in vreg_high_words_.
     } else {
       DCHECK_EQ(mir_data_[next_change].PrevChange(v_reg), old_change);
       mir_data_[next_change].SetPrevChange(v_reg, new_change);
@@ -316,6 +335,12 @@
     if (next_change == kNPos) {
       DCHECK_EQ(vreg_data_[v_reg].change, change);
       vreg_data_[v_reg] = (data->vreg_def == v_reg) ? data->prev_value : data->prev_value_high;
+      DCHECK_EQ(vreg_high_words_.IsBitSet(v_reg), v_reg == data->vreg_def + 1);
+      if (data->vreg_def == v_reg && data->low_def_over_high_word) {
+        vreg_high_words_.SetBit(v_reg);
+      } else if (data->vreg_def != v_reg && data->high_def_over_low_word) {
+        vreg_high_words_.ClearBit(v_reg);
+      }
     } else {
       DCHECK_EQ(mir_data_[next_change].PrevChange(v_reg), change);
       mir_data_[next_change].RemovePrevChange(v_reg, data);
@@ -347,6 +372,21 @@
   return false;
 }
 
+bool GvnDeadCodeElimination::VRegChains::IsVRegUsed(uint16_t first_change, uint16_t last_change,
+                                                    int v_reg, MIRGraph* mir_graph) const {
+  DCHECK_LE(first_change, last_change);
+  DCHECK_LE(last_change, mir_data_.size());
+  for (size_t c = first_change; c != last_change; ++c) {
+    SSARepresentation* ssa_rep = mir_data_[c].mir->ssa_rep;
+    for (int i = 0; i != ssa_rep->num_uses; ++i) {
+      if (mir_graph->SRegToVReg(ssa_rep->uses[i]) == v_reg) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 void GvnDeadCodeElimination::VRegChains::RenameSRegUses(uint16_t first_change, uint16_t last_change,
                                                         int old_s_reg, int new_s_reg, bool wide) {
   for (size_t c = first_change; c != last_change; ++c) {
@@ -518,7 +558,7 @@
 
   // Just before we kill mir_to_kill, we need to replace the previous SSA reg assigned to the
   // same dalvik reg to keep consistency with subsequent instructions. However, if there's no
-  // defining MIR for that dalvik reg, the preserved valus must come from its predecessors
+  // defining MIR for that dalvik reg, the preserved values must come from its predecessors
   // and we need to create a new Phi (a degenerate Phi if there's only a single predecessor).
   if (def_change == kNPos) {
     if (wide) {
@@ -526,7 +566,21 @@
       DCHECK_EQ(mir_graph_->SRegToVReg(new_s_reg) + 1, mir_graph_->SRegToVReg(new_s_reg + 1));
       CreatePhi(new_s_reg + 1);  // High word Phi.
     }
-    return CreatePhi(new_s_reg);
+    MIR* phi = CreatePhi(new_s_reg);
+    // If this is a degenerate Phi with all inputs being the same SSA reg, we need to its uses.
+    DCHECK_NE(phi->ssa_rep->num_uses, 0u);
+    int old_s_reg = phi->ssa_rep->uses[0];
+    bool all_same = true;
+    for (size_t i = 1u, num = phi->ssa_rep->num_uses; i != num; ++i) {
+      if (phi->ssa_rep->uses[i] != old_s_reg) {
+        all_same = false;
+        break;
+      }
+    }
+    if (all_same) {
+      vreg_chains_.RenameSRegUses(0u, last_change, old_s_reg, new_s_reg, wide);
+    }
+    return phi;
   } else {
     DCHECK_LT(def_change, last_change);
     DCHECK_LE(last_change, vreg_chains_.NumMIRs());
@@ -672,8 +726,14 @@
         uint16_t src_name =
             (d->wide_def ? lvn_->GetSregValueWide(src_s_reg) : lvn_->GetSregValue(src_s_reg));
         if (value_name == src_name) {
-          RecordPassKillMoveByRenamingSrcDef(check_change, c);
-          return;
+          // Check if the move's destination vreg is unused between check_change and the move.
+          uint32_t new_dest_v_reg = mir_graph_->SRegToVReg(d->mir->ssa_rep->defs[0]);
+          if (!vreg_chains_.IsVRegUsed(check_change + 1u, c, new_dest_v_reg, mir_graph_) &&
+              (!d->wide_def ||
+               !vreg_chains_.IsVRegUsed(check_change + 1u, c, new_dest_v_reg + 1, mir_graph_))) {
+            RecordPassKillMoveByRenamingSrcDef(check_change, c);
+            return;
+          }
         }
       }
     }
@@ -963,18 +1023,17 @@
   uint16_t opcode = mir->dalvikInsn.opcode;
   switch (opcode) {
     case kMirOpPhi: {
-      // We can't recognize wide variables in Phi from num_defs == 2 as we've got two Phis instead.
+      // Determine if this Phi is merging wide regs.
+      RegLocation raw_dest = gvn_->GetMirGraph()->GetRawDest(mir);
+      if (raw_dest.high_word) {
+        // This is the high part of a wide reg. Ignore the Phi.
+        return false;
+      }
+      bool wide = raw_dest.wide;
+      // Record the value.
       DCHECK_EQ(mir->ssa_rep->num_defs, 1);
       int s_reg = mir->ssa_rep->defs[0];
-      bool wide = false;
-      uint16_t new_value = lvn_->GetSregValue(s_reg);
-      if (new_value == kNoValue) {
-        wide = true;
-        new_value = lvn_->GetSregValueWide(s_reg);
-        if (new_value == kNoValue) {
-          return false;  // Ignore the high word Phi.
-        }
-      }
+      uint16_t new_value = wide ? lvn_->GetSregValueWide(s_reg) : lvn_->GetSregValue(s_reg);
 
       int v_reg = mir_graph_->SRegToVReg(s_reg);
       DCHECK_EQ(vreg_chains_.CurrentValue(v_reg), kNoValue);  // No previous def for v_reg.
diff --git a/compiler/dex/gvn_dead_code_elimination.h b/compiler/dex/gvn_dead_code_elimination.h
index f2378f2..06022db 100644
--- a/compiler/dex/gvn_dead_code_elimination.h
+++ b/compiler/dex/gvn_dead_code_elimination.h
@@ -111,6 +111,8 @@
     void RemoveChange(uint16_t change);
     bool IsTopChange(uint16_t change) const;
     bool IsSRegUsed(uint16_t first_change, uint16_t last_change, int s_reg) const;
+    bool IsVRegUsed(uint16_t first_change, uint16_t last_change, int v_reg,
+                    MIRGraph* mir_graph) const;
     void RenameSRegUses(uint16_t first_change, uint16_t last_change,
                         int old_s_reg, int new_s_reg, bool wide);
     void RenameVRegUses(uint16_t first_change, uint16_t last_change,
@@ -119,6 +121,7 @@
    private:
     const uint32_t num_vregs_;
     VRegValue* const vreg_data_;
+    BitVector vreg_high_words_;
     ScopedArenaVector<MIRData> mir_data_;
   };
 
diff --git a/compiler/dex/gvn_dead_code_elimination_test.cc b/compiler/dex/gvn_dead_code_elimination_test.cc
index 4d2b8b3..de591d0 100644
--- a/compiler/dex/gvn_dead_code_elimination_test.cc
+++ b/compiler/dex/gvn_dead_code_elimination_test.cc
@@ -406,6 +406,15 @@
     }
   }
 
+  template <size_t count>
+  void MarkAsWideSRegs(const int32_t (&sregs)[count]) {
+    for (int32_t sreg : sregs) {
+      cu_.mir_graph->reg_location_[sreg].wide = true;
+      cu_.mir_graph->reg_location_[sreg + 1].wide = true;
+      cu_.mir_graph->reg_location_[sreg + 1].high_word = true;
+    }
+  }
+
   void PerformDCE() {
     FillVregToSsaRegExitMaps();
     cu_.mir_graph->GetNumOfCodeAndTempVRs();
@@ -467,9 +476,11 @@
     cu_.access_flags = kAccStatic;  // Don't let "this" interfere with this test.
     allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
     // By default, the zero-initialized reg_location_[.] with ref == false tells LVN that
-    // 0 constants are integral, not references. Nothing else is used by LVN/GVN.
+    // 0 constants are integral, not references, and the values are all narrow.
+    // Nothing else is used by LVN/GVN. Tests can override the default values as needed.
     cu_.mir_graph->reg_location_ = static_cast<RegLocation*>(cu_.arena.Alloc(
         kMaxSsaRegs * sizeof(cu_.mir_graph->reg_location_[0]), kArenaAllocRegAlloc));
+    cu_.mir_graph->num_ssa_regs_ = kMaxSsaRegs;
     // Bind all possible sregs to live vregs for test purposes.
     live_in_v_->SetInitialBits(kMaxSsaRegs);
     cu_.mir_graph->ssa_base_vregs_.reserve(kMaxSsaRegs);
@@ -705,6 +716,8 @@
   PrepareSRegToVRegMap(sreg_to_vreg_map);
 
   PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 3 };
+  MarkAsWideSRegs(wide_sregs);
   PerformGVN_DCE();
 
   ASSERT_EQ(arraysize(mirs), value_names_.size());
@@ -745,6 +758,8 @@
 
   PrepareIFields(ifields);
   PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 5 };
+  MarkAsWideSRegs(wide_sregs);
   PerformGVN_DCE();
 
   ASSERT_EQ(arraysize(mirs), value_names_.size());
@@ -777,6 +792,8 @@
   PrepareSRegToVRegMap(sreg_to_vreg_map);
 
   PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 0, 2 };
+  MarkAsWideSRegs(wide_sregs);
   PerformGVN_DCE();
 
   ASSERT_EQ(arraysize(mirs), value_names_.size());
@@ -1030,6 +1047,40 @@
   }
 }
 
+TEST_F(GvnDeadCodeEliminationTestSimple, NoRename4) {
+  static const MIRDef mirs[] = {
+      DEF_CONST(3, Instruction::CONST, 0u, 1000u),
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 1u),
+      DEF_CONST(3, Instruction::CONST, 2u, 100u),
+      DEF_CONST(3, Instruction::CONST, 3u, 200u),
+      DEF_BINOP(3, Instruction::OR_INT_2ADDR, 4u, 2u, 3u),   // 3. Find definition of the move src.
+      DEF_MOVE(3, Instruction::MOVE, 5u, 0u),                // 4. Uses move dest vreg.
+      DEF_MOVE(3, Instruction::MOVE, 6u, 4u),                // 2. Find overwritten move src.
+      DEF_CONST(3, Instruction::CONST, 7u, 2000u),           // 1. Overwrites 4u, look for moves.
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 2, 4, 0, 2 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 7 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[0], value_names_[5]);
+  EXPECT_EQ(value_names_[4], value_names_[6]);
+
+  static const bool eliminated[] = {
+      false, false, false, false, false, false, false, false
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+}
+
 TEST_F(GvnDeadCodeEliminationTestSimple, Simple1) {
   static const IFieldDef ifields[] = {
       { 0u, 1u, 0u, false, kDexMemAccessObject },
@@ -1221,6 +1272,8 @@
 
   PrepareIFields(ifields);
   PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 1, 6 };
+  MarkAsWideSRegs(wide_sregs);
   PerformGVN_DCE();
 
   ASSERT_EQ(arraysize(mirs), value_names_.size());
@@ -1576,6 +1629,52 @@
 }
 
 TEST_F(GvnDeadCodeEliminationTestDiamond, CreatePhi2) {
+  static const MIRDef mirs[] = {
+      DEF_CONST(3, Instruction::CONST, 0u, 1000),
+      DEF_MOVE(4, Instruction::MOVE, 1u, 0u),
+      DEF_CONST(4, Instruction::CONST, 2u, 1000),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 0 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_EQ(value_names_[0], value_names_[1]);
+  EXPECT_EQ(value_names_[0], value_names_[2]);
+
+  static const bool eliminated[] = {
+      false, false, true,
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that we've created a single-input Phi to replace the CONST 3u.
+  BasicBlock* bb4 = cu_.mir_graph->GetBasicBlock(4);
+  MIR* phi = bb4->first_mir_insn;
+  ASSERT_TRUE(phi != nullptr);
+  ASSERT_EQ(kMirOpPhi, static_cast<int>(phi->dalvikInsn.opcode));
+  ASSERT_EQ(1, phi->ssa_rep->num_uses);
+  EXPECT_EQ(0, phi->ssa_rep->uses[0]);
+  ASSERT_EQ(1, phi->ssa_rep->num_defs);
+  EXPECT_EQ(2, phi->ssa_rep->defs[0]);
+  EXPECT_EQ(0u, phi->dalvikInsn.vA);
+  MIR* move = phi->next;
+  ASSERT_TRUE(move != nullptr);
+  ASSERT_EQ(Instruction::MOVE, move->dalvikInsn.opcode);
+  ASSERT_EQ(1, move->ssa_rep->num_uses);
+  EXPECT_EQ(2, move->ssa_rep->uses[0]);
+  ASSERT_EQ(1, move->ssa_rep->num_defs);
+  EXPECT_EQ(1, move->ssa_rep->defs[0]);
+  EXPECT_EQ(1u, move->dalvikInsn.vA);
+  EXPECT_EQ(0u, move->dalvikInsn.vB);
+}
+
+TEST_F(GvnDeadCodeEliminationTestDiamond, CreatePhi3) {
   static const IFieldDef ifields[] = {
       { 0u, 1u, 0u, false, kDexMemAccessWord },
   };
@@ -1797,4 +1896,91 @@
   EXPECT_EQ(2u, phi->dalvikInsn.vA);
 }
 
+TEST_F(GvnDeadCodeEliminationTestDiamond, LongOverlaps1) {
+  static const MIRDef mirs[] = {
+      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 0u, 1000u),
+      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 2u, 1000u),
+      DEF_MOVE_WIDE(4, Instruction::MOVE_WIDE, 4u, 0u),
+      DEF_MOVE_WIDE(4, Instruction::MOVE_WIDE, 6u, 2u),
+      DEF_MOVE_WIDE(4, Instruction::MOVE_WIDE, 8u, 4u),
+      DEF_MOVE_WIDE(4, Instruction::MOVE_WIDE, 10u, 6u),
+  };
+
+  // The last insn should overlap the first and second.
+  static const int32_t sreg_to_vreg_map[] = { 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 0, 2, 4, 6, 8, 10 };
+  MarkAsWideSRegs(wide_sregs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_EQ(value_names_[0], value_names_[1]);
+  EXPECT_EQ(value_names_[0], value_names_[2]);
+  EXPECT_EQ(value_names_[0], value_names_[3]);
+  EXPECT_EQ(value_names_[0], value_names_[4]);
+
+  static const bool eliminated[] = {
+      false, false, false, false, false, false,
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, MixedOverlaps1) {
+  static const MIRDef mirs[] = {
+      DEF_CONST(3, Instruction::CONST, 0u, 1000u),
+      DEF_MOVE(3, Instruction::MOVE, 1u, 0u),
+      DEF_CONST(3, Instruction::CONST, 2u, 2000u),
+      { 3, Instruction::INT_TO_LONG, 0, 0u, 1, { 2u }, 2, { 3u, 4u} },
+      DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 5u, 3u),
+      DEF_CONST(3, Instruction::CONST, 7u, 3000u),
+      DEF_CONST(3, Instruction::CONST, 8u, 4000u),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 1, 2, 0, 0, 1, 3, 4, 0, 1 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 3, 5 };
+  MarkAsWideSRegs(wide_sregs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 2, 3, 5, 6 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[0], value_names_[1]);
+  EXPECT_EQ(value_names_[3], value_names_[4]);
+
+  static const bool eliminated[] = {
+      false, true, false, false, true, false, false,
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check renamed registers in CONST.
+  MIR* cst = &mirs_[0];
+  ASSERT_EQ(Instruction::CONST, cst->dalvikInsn.opcode);
+  ASSERT_EQ(0, cst->ssa_rep->num_uses);
+  ASSERT_EQ(1, cst->ssa_rep->num_defs);
+  EXPECT_EQ(1, cst->ssa_rep->defs[0]);
+  EXPECT_EQ(2u, cst->dalvikInsn.vA);
+  // Check renamed registers in INT_TO_LONG.
+  MIR* int_to_long = &mirs_[3];
+  ASSERT_EQ(Instruction::INT_TO_LONG, int_to_long->dalvikInsn.opcode);
+  ASSERT_EQ(1, int_to_long->ssa_rep->num_uses);
+  EXPECT_EQ(2, int_to_long->ssa_rep->uses[0]);
+  ASSERT_EQ(2, int_to_long->ssa_rep->num_defs);
+  EXPECT_EQ(5, int_to_long->ssa_rep->defs[0]);
+  EXPECT_EQ(6, int_to_long->ssa_rep->defs[1]);
+  EXPECT_EQ(3u, int_to_long->dalvikInsn.vA);
+  EXPECT_EQ(0u, int_to_long->dalvikInsn.vB);
+}
+
 }  // namespace art
diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc
index cdf5e38..cc9dbe4 100644
--- a/compiler/dex/local_value_numbering.cc
+++ b/compiler/dex/local_value_numbering.cc
@@ -1152,28 +1152,20 @@
     // Running LVN without a full GVN?
     return kNoValue;
   }
-  int32_t* uses = mir->ssa_rep->uses;
-  // Try to find out if this is merging wide regs.
-  if (mir->ssa_rep->defs[0] != 0 &&
-      sreg_wide_value_map_.count(mir->ssa_rep->defs[0] - 1) != 0u) {
+  // Determine if this Phi is merging wide regs.
+  RegLocation raw_dest = gvn_->GetMirGraph()->GetRawDest(mir);
+  if (raw_dest.high_word) {
     // This is the high part of a wide reg. Ignore the Phi.
     return kNoValue;
   }
-  BasicBlockId* incoming = mir->meta.phi_incoming;
-  int16_t pos = 0;
-  // Check if we're merging a wide value based on the first merged LVN.
-  const LocalValueNumbering* first_lvn = gvn_->merge_lvns_[0];
-  DCHECK_LT(pos, mir->ssa_rep->num_uses);
-  while (incoming[pos] != first_lvn->Id()) {
-    ++pos;
-    DCHECK_LT(pos, mir->ssa_rep->num_uses);
-  }
-  int first_s_reg = uses[pos];
-  bool wide = (first_lvn->sreg_wide_value_map_.count(first_s_reg) != 0u);
+  bool wide = raw_dest.wide;
   // Iterate over *merge_lvns_ and skip incoming sregs for BBs without associated LVN.
   merge_names_.clear();
   uint16_t value_name = kNoValue;
   bool same_values = true;
+  BasicBlockId* incoming = mir->meta.phi_incoming;
+  int32_t* uses = mir->ssa_rep->uses;
+  int16_t pos = 0;
   for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
     DCHECK_LT(pos, mir->ssa_rep->num_uses);
     while (incoming[pos] != lvn->Id()) {
@@ -1994,6 +1986,9 @@
   if (s_reg == INVALID_SREG) {
     return kNoValue;
   }
+  if (gvn_->GetMirGraph()->GetRegLocation(s_reg).wide != wide) {
+    return kNoValue;
+  }
   if (wide) {
     int high_s_reg = bb->data_flow_info->vreg_to_ssa_map_exit[v_reg + 1];
     if (high_s_reg != s_reg + 1) {
diff --git a/compiler/dex/local_value_numbering.h b/compiler/dex/local_value_numbering.h
index 379c952..67fb647 100644
--- a/compiler/dex/local_value_numbering.h
+++ b/compiler/dex/local_value_numbering.h
@@ -53,10 +53,12 @@
   }
 
   uint16_t GetSregValue(uint16_t s_reg) const {
+    DCHECK(!gvn_->GetMirGraph()->GetRegLocation(s_reg).wide);
     return GetSregValueImpl(s_reg, &sreg_value_map_);
   }
 
   uint16_t GetSregValueWide(uint16_t s_reg) const {
+    DCHECK(gvn_->GetMirGraph()->GetRegLocation(s_reg).wide);
     return GetSregValueImpl(s_reg, &sreg_wide_value_map_);
   }
 
@@ -123,21 +125,27 @@
 
   void SetOperandValue(uint16_t s_reg, uint16_t value) {
     DCHECK_EQ(sreg_wide_value_map_.count(s_reg), 0u);
+    DCHECK(!gvn_->GetMirGraph()->GetRegLocation(s_reg).wide);
     SetOperandValueImpl(s_reg, value, &sreg_value_map_);
   }
 
   uint16_t GetOperandValue(int s_reg) const {
     DCHECK_EQ(sreg_wide_value_map_.count(s_reg), 0u);
+    DCHECK(!gvn_->GetMirGraph()->GetRegLocation(s_reg).wide);
     return GetOperandValueImpl(s_reg, &sreg_value_map_);
   }
 
   void SetOperandValueWide(uint16_t s_reg, uint16_t value) {
     DCHECK_EQ(sreg_value_map_.count(s_reg), 0u);
+    DCHECK(gvn_->GetMirGraph()->GetRegLocation(s_reg).wide);
+    DCHECK(!gvn_->GetMirGraph()->GetRegLocation(s_reg).high_word);
     SetOperandValueImpl(s_reg, value, &sreg_wide_value_map_);
   }
 
   uint16_t GetOperandValueWide(int s_reg) const {
     DCHECK_EQ(sreg_value_map_.count(s_reg), 0u);
+    DCHECK(gvn_->GetMirGraph()->GetRegLocation(s_reg).wide);
+    DCHECK(!gvn_->GetMirGraph()->GetRegLocation(s_reg).high_word);
     return GetOperandValueImpl(s_reg, &sreg_wide_value_map_);
   }
 
@@ -331,7 +339,7 @@
 
   void CopyLiveSregValues(SregValueMap* dest, const SregValueMap& src);
 
-  // Intersect maps as sets. The value type must be equality-comparable.
+  // Intersect SSA reg value maps as sets, ignore dead regs.
   template <SregValueMap LocalValueNumbering::* map_ptr>
   void IntersectSregValueMaps();
 
diff --git a/compiler/dex/local_value_numbering_test.cc b/compiler/dex/local_value_numbering_test.cc
index 0393410..bd00690 100644
--- a/compiler/dex/local_value_numbering_test.cc
+++ b/compiler/dex/local_value_numbering_test.cc
@@ -182,6 +182,15 @@
         ~MirSFieldLoweringInfo::kFlagClassIsInitialized;
   }
 
+  template <size_t count>
+  void MarkAsWideSRegs(const int32_t (&sregs)[count]) {
+    for (int32_t sreg : sregs) {
+      cu_.mir_graph->reg_location_[sreg].wide = true;
+      cu_.mir_graph->reg_location_[sreg + 1].wide = true;
+      cu_.mir_graph->reg_location_[sreg + 1].high_word = true;
+    }
+  }
+
   void PerformLVN() {
     cu_.mir_graph->temp_.gvn.ifield_ids =  GlobalValueNumbering::PrepareGvnFieldIds(
         allocator_.get(), cu_.mir_graph->ifield_lowering_infos_);
@@ -210,9 +219,11 @@
     cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
     allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
     // By default, the zero-initialized reg_location_[.] with ref == false tells LVN that
-    // 0 constants are integral, not references. Nothing else is used by LVN/GVN.
+    // 0 constants are integral, not references, and the values are all narrow.
+    // Nothing else is used by LVN/GVN. Tests can override the default values as needed.
     cu_.mir_graph->reg_location_ = static_cast<RegLocation*>(cu_.arena.Alloc(
         kMaxSsaRegs * sizeof(cu_.mir_graph->reg_location_[0]), kArenaAllocRegAlloc));
+    cu_.mir_graph->num_ssa_regs_ = kMaxSsaRegs;
   }
 
   static constexpr size_t kMaxSsaRegs = 16384u;
@@ -379,26 +390,28 @@
       { 3u, 0u, 0u, false, kDexMemAccessWord },  // Unresolved field.
   };
   static const MIRDef mirs[] = {
-      DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 20u),
-      DEF_IGET(Instruction::IGET, 1u, 20u, 0u),             // Resolved field #1, unique object.
-      DEF_IGET(Instruction::IGET, 2u, 21u, 0u),             // Resolved field #1.
-      DEF_IGET_WIDE(Instruction::IGET_WIDE, 3u, 21u, 1u),   // Resolved field #2.
-      DEF_IGET(Instruction::IGET, 4u, 22u, 2u),             // Unresolved IGET can be "acquire".
-      DEF_IGET(Instruction::IGET, 5u, 20u, 0u),             // Resolved field #1, unique object.
-      DEF_IGET(Instruction::IGET, 6u, 21u, 0u),             // Resolved field #1.
-      DEF_IGET_WIDE(Instruction::IGET_WIDE, 7u, 21u, 1u),   // Resolved field #2.
-      DEF_IPUT(Instruction::IPUT, 8u, 22u, 2u),             // IPUT clobbers field #1 (#2 is wide).
-      DEF_IGET(Instruction::IGET, 9u, 20u, 0u),             // Resolved field #1, unique object.
-      DEF_IGET(Instruction::IGET, 10u, 21u, 0u),            // Resolved field #1, new value name.
-      DEF_IGET_WIDE(Instruction::IGET_WIDE, 11u, 21u, 1u),  // Resolved field #2.
-      DEF_IGET_WIDE(Instruction::IGET_WIDE, 12u, 20u, 1u),  // Resolved field #2, unique object.
-      DEF_IPUT(Instruction::IPUT, 13u, 20u, 2u),            // IPUT clobbers field #1 (#2 is wide).
-      DEF_IGET(Instruction::IGET, 14u, 20u, 0u),            // Resolved field #1, unique object.
-      DEF_IGET_WIDE(Instruction::IGET_WIDE, 15u, 20u, 1u),  // Resolved field #2, unique object.
+      DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 30u),
+      DEF_IGET(Instruction::IGET, 1u, 30u, 0u),             // Resolved field #1, unique object.
+      DEF_IGET(Instruction::IGET, 2u, 31u, 0u),             // Resolved field #1.
+      DEF_IGET_WIDE(Instruction::IGET_WIDE, 3u, 31u, 1u),   // Resolved field #2.
+      DEF_IGET(Instruction::IGET, 5u, 32u, 2u),             // Unresolved IGET can be "acquire".
+      DEF_IGET(Instruction::IGET, 6u, 30u, 0u),             // Resolved field #1, unique object.
+      DEF_IGET(Instruction::IGET, 7u, 31u, 0u),             // Resolved field #1.
+      DEF_IGET_WIDE(Instruction::IGET_WIDE, 8u, 31u, 1u),   // Resolved field #2.
+      DEF_IPUT(Instruction::IPUT, 10u, 32u, 2u),            // IPUT clobbers field #1 (#2 is wide).
+      DEF_IGET(Instruction::IGET, 11u, 30u, 0u),            // Resolved field #1, unique object.
+      DEF_IGET(Instruction::IGET, 12u, 31u, 0u),            // Resolved field #1, new value name.
+      DEF_IGET_WIDE(Instruction::IGET_WIDE, 13u, 31u, 1u),  // Resolved field #2.
+      DEF_IGET_WIDE(Instruction::IGET_WIDE, 15u, 30u, 1u),  // Resolved field #2, unique object.
+      DEF_IPUT(Instruction::IPUT, 17u, 30u, 2u),            // IPUT clobbers field #1 (#2 is wide).
+      DEF_IGET(Instruction::IGET, 18u, 30u, 0u),            // Resolved field #1, unique object.
+      DEF_IGET_WIDE(Instruction::IGET_WIDE, 19u, 30u, 1u),  // Resolved field #2, unique object.
   };
 
   PrepareIFields(ifields);
   PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 3, 8, 13, 15, 19 };
+  MarkAsWideSRegs(wide_sregs);
   PerformLVN();
   ASSERT_EQ(value_names_.size(), 16u);
   // Unresolved field is potentially volatile, so we need to adhere to the volatile semantics.
@@ -430,16 +443,18 @@
   static const MIRDef mirs[] = {
       DEF_SGET(Instruction::SGET, 0u, 0u),            // Resolved field #1.
       DEF_SGET_WIDE(Instruction::SGET_WIDE, 1u, 1u),  // Resolved field #2.
-      DEF_SGET(Instruction::SGET, 2u, 2u),            // Unresolved SGET can be "acquire".
-      DEF_SGET(Instruction::SGET, 3u, 0u),            // Resolved field #1.
-      DEF_SGET_WIDE(Instruction::SGET_WIDE, 4u, 1u),  // Resolved field #2.
-      DEF_SPUT(Instruction::SPUT, 5u, 2u),            // SPUT clobbers field #1 (#2 is wide).
-      DEF_SGET(Instruction::SGET, 6u, 0u),            // Resolved field #1.
-      DEF_SGET_WIDE(Instruction::SGET_WIDE, 7u, 1u),  // Resolved field #2.
+      DEF_SGET(Instruction::SGET, 3u, 2u),            // Unresolved SGET can be "acquire".
+      DEF_SGET(Instruction::SGET, 4u, 0u),            // Resolved field #1.
+      DEF_SGET_WIDE(Instruction::SGET_WIDE, 5u, 1u),  // Resolved field #2.
+      DEF_SPUT(Instruction::SPUT, 7u, 2u),            // SPUT clobbers field #1 (#2 is wide).
+      DEF_SGET(Instruction::SGET, 8u, 0u),            // Resolved field #1.
+      DEF_SGET_WIDE(Instruction::SGET_WIDE, 9u, 1u),  // Resolved field #2.
   };
 
   PrepareSFields(sfields);
   PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 1, 5, 9 };
+  MarkAsWideSRegs(wide_sregs);
   PerformLVN();
   ASSERT_EQ(value_names_.size(), 8u);
   // Unresolved field is potentially volatile, so we need to adhere to the volatile semantics.
@@ -585,18 +600,20 @@
       DEF_IGET(Instruction::IGET, 7u, 20u, 0u),              // New value.
       DEF_IGET(Instruction::IGET, 8u, 20u, 1u),              // Still the same.
       DEF_IPUT_WIDE(Instruction::IPUT_WIDE, 9u, 31u, 3u),    // No aliasing, different type.
-      DEF_IGET(Instruction::IGET, 10u, 20u, 0u),
-      DEF_IGET(Instruction::IGET, 11u, 20u, 1u),
-      DEF_IPUT_WIDE(Instruction::IPUT_WIDE, 12u, 31u, 5u),   // No aliasing, different type.
-      DEF_IGET(Instruction::IGET, 13u, 20u, 0u),
-      DEF_IGET(Instruction::IGET, 14u, 20u, 1u),
-      DEF_IPUT(Instruction::IPUT, 15u, 31u, 4u),             // Aliasing, same type.
-      DEF_IGET(Instruction::IGET, 16u, 20u, 0u),
-      DEF_IGET(Instruction::IGET, 17u, 20u, 1u),
+      DEF_IGET(Instruction::IGET, 11u, 20u, 0u),
+      DEF_IGET(Instruction::IGET, 12u, 20u, 1u),
+      DEF_IPUT_WIDE(Instruction::IPUT_WIDE, 13u, 31u, 5u),   // No aliasing, different type.
+      DEF_IGET(Instruction::IGET, 15u, 20u, 0u),
+      DEF_IGET(Instruction::IGET, 16u, 20u, 1u),
+      DEF_IPUT(Instruction::IPUT, 17u, 31u, 4u),             // Aliasing, same type.
+      DEF_IGET(Instruction::IGET, 18u, 20u, 0u),
+      DEF_IGET(Instruction::IGET, 19u, 20u, 1u),
   };
 
   PrepareIFields(ifields);
   PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 9, 13 };
+  MarkAsWideSRegs(wide_sregs);
   PerformLVN();
   ASSERT_EQ(value_names_.size(), 18u);
   EXPECT_EQ(value_names_[1], value_names_[4]);
@@ -626,14 +643,16 @@
       DEF_AGET(Instruction::AGET, 4u, 20u, 40u),
       DEF_AGET(Instruction::AGET, 5u, 20u, 41u),
       DEF_APUT_WIDE(Instruction::APUT_WIDE, 6u, 31u, 43u),  // No aliasing, different type.
-      DEF_AGET(Instruction::AGET, 7u, 20u, 40u),
-      DEF_AGET(Instruction::AGET, 8u, 20u, 41u),
-      DEF_APUT(Instruction::APUT, 9u, 32u, 40u),            // May alias with all elements.
-      DEF_AGET(Instruction::AGET, 10u, 20u, 40u),           // New value (same index name).
-      DEF_AGET(Instruction::AGET, 11u, 20u, 41u),           // New value (different index name).
+      DEF_AGET(Instruction::AGET, 8u, 20u, 40u),
+      DEF_AGET(Instruction::AGET, 9u, 20u, 41u),
+      DEF_APUT(Instruction::APUT, 10u, 32u, 40u),           // May alias with all elements.
+      DEF_AGET(Instruction::AGET, 11u, 20u, 40u),           // New value (same index name).
+      DEF_AGET(Instruction::AGET, 12u, 20u, 41u),           // New value (different index name).
   };
 
   PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 6 };
+  MarkAsWideSRegs(wide_sregs);
   PerformLVN();
   ASSERT_EQ(value_names_.size(), 12u);
   EXPECT_EQ(value_names_[1], value_names_[4]);
@@ -769,6 +788,8 @@
   };
 
   PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 5, 7, 12, 14, 16 };
+  MarkAsWideSRegs(wide_sregs);
   PerformLVN();
   for (size_t i = 0u; i != mir_count_; ++i) {
     int expected = expected_ignore_div_zero_check[i] ? MIR_IGNORE_DIV_ZERO_CHECK : 0u;
@@ -780,51 +801,55 @@
   static const MIRDef mirs[] = {
       // Core reg constants.
       DEF_CONST(Instruction::CONST_WIDE_16, 0u, 0),
-      DEF_CONST(Instruction::CONST_WIDE_16, 1u, 1),
-      DEF_CONST(Instruction::CONST_WIDE_16, 2u, -1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 3u, 1 << 16),
-      DEF_CONST(Instruction::CONST_WIDE_32, 4u, -1 << 16),
-      DEF_CONST(Instruction::CONST_WIDE_32, 5u, (1 << 16) + 1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 6u, (1 << 16) - 1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 7u, -(1 << 16) + 1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 8u, -(1 << 16) - 1),
-      DEF_CONST(Instruction::CONST_WIDE, 9u, INT64_C(1) << 32),
-      DEF_CONST(Instruction::CONST_WIDE, 10u, INT64_C(-1) << 32),
-      DEF_CONST(Instruction::CONST_WIDE, 11u, (INT64_C(1) << 32) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 12u, (INT64_C(1) << 32) - 1),
-      DEF_CONST(Instruction::CONST_WIDE, 13u, (INT64_C(-1) << 32) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 14u, (INT64_C(-1) << 32) - 1),
-      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 15u, 1),       // Effectively 1 << 48.
-      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 16u, 0xffff),  // Effectively -1 << 48.
-      DEF_CONST(Instruction::CONST_WIDE, 17u, (INT64_C(1) << 48) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 18u, (INT64_C(1) << 48) - 1),
-      DEF_CONST(Instruction::CONST_WIDE, 19u, (INT64_C(-1) << 48) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 20u, (INT64_C(-1) << 48) - 1),
+      DEF_CONST(Instruction::CONST_WIDE_16, 2u, 1),
+      DEF_CONST(Instruction::CONST_WIDE_16, 4u, -1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 6u, 1 << 16),
+      DEF_CONST(Instruction::CONST_WIDE_32, 8u, -1 << 16),
+      DEF_CONST(Instruction::CONST_WIDE_32, 10u, (1 << 16) + 1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 12u, (1 << 16) - 1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 14u, -(1 << 16) + 1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 16u, -(1 << 16) - 1),
+      DEF_CONST(Instruction::CONST_WIDE, 18u, INT64_C(1) << 32),
+      DEF_CONST(Instruction::CONST_WIDE, 20u, INT64_C(-1) << 32),
+      DEF_CONST(Instruction::CONST_WIDE, 22u, (INT64_C(1) << 32) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 24u, (INT64_C(1) << 32) - 1),
+      DEF_CONST(Instruction::CONST_WIDE, 26u, (INT64_C(-1) << 32) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 28u, (INT64_C(-1) << 32) - 1),
+      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 30u, 1),       // Effectively 1 << 48.
+      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 32u, 0xffff),  // Effectively -1 << 48.
+      DEF_CONST(Instruction::CONST_WIDE, 34u, (INT64_C(1) << 48) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 36u, (INT64_C(1) << 48) - 1),
+      DEF_CONST(Instruction::CONST_WIDE, 38u, (INT64_C(-1) << 48) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 40u, (INT64_C(-1) << 48) - 1),
       // FP reg constants.
-      DEF_CONST(Instruction::CONST_WIDE_16, 21u, 0),
-      DEF_CONST(Instruction::CONST_WIDE_16, 22u, 1),
-      DEF_CONST(Instruction::CONST_WIDE_16, 23u, -1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 24u, 1 << 16),
-      DEF_CONST(Instruction::CONST_WIDE_32, 25u, -1 << 16),
-      DEF_CONST(Instruction::CONST_WIDE_32, 26u, (1 << 16) + 1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 27u, (1 << 16) - 1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 28u, -(1 << 16) + 1),
-      DEF_CONST(Instruction::CONST_WIDE_32, 29u, -(1 << 16) - 1),
-      DEF_CONST(Instruction::CONST_WIDE, 30u, INT64_C(1) << 32),
-      DEF_CONST(Instruction::CONST_WIDE, 31u, INT64_C(-1) << 32),
-      DEF_CONST(Instruction::CONST_WIDE, 32u, (INT64_C(1) << 32) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 33u, (INT64_C(1) << 32) - 1),
-      DEF_CONST(Instruction::CONST_WIDE, 34u, (INT64_C(-1) << 32) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 35u, (INT64_C(-1) << 32) - 1),
-      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 36u, 1),       // Effectively 1 << 48.
-      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 37u, 0xffff),  // Effectively -1 << 48.
-      DEF_CONST(Instruction::CONST_WIDE, 38u, (INT64_C(1) << 48) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 39u, (INT64_C(1) << 48) - 1),
-      DEF_CONST(Instruction::CONST_WIDE, 40u, (INT64_C(-1) << 48) + 1),
-      DEF_CONST(Instruction::CONST_WIDE, 41u, (INT64_C(-1) << 48) - 1),
+      DEF_CONST(Instruction::CONST_WIDE_16, 42u, 0),
+      DEF_CONST(Instruction::CONST_WIDE_16, 44u, 1),
+      DEF_CONST(Instruction::CONST_WIDE_16, 46u, -1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 48u, 1 << 16),
+      DEF_CONST(Instruction::CONST_WIDE_32, 50u, -1 << 16),
+      DEF_CONST(Instruction::CONST_WIDE_32, 52u, (1 << 16) + 1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 54u, (1 << 16) - 1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 56u, -(1 << 16) + 1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 58u, -(1 << 16) - 1),
+      DEF_CONST(Instruction::CONST_WIDE, 60u, INT64_C(1) << 32),
+      DEF_CONST(Instruction::CONST_WIDE, 62u, INT64_C(-1) << 32),
+      DEF_CONST(Instruction::CONST_WIDE, 64u, (INT64_C(1) << 32) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 66u, (INT64_C(1) << 32) - 1),
+      DEF_CONST(Instruction::CONST_WIDE, 68u, (INT64_C(-1) << 32) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 70u, (INT64_C(-1) << 32) - 1),
+      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 72u, 1),       // Effectively 1 << 48.
+      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 74u, 0xffff),  // Effectively -1 << 48.
+      DEF_CONST(Instruction::CONST_WIDE, 76u, (INT64_C(1) << 48) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 78u, (INT64_C(1) << 48) - 1),
+      DEF_CONST(Instruction::CONST_WIDE, 80u, (INT64_C(-1) << 48) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 82u, (INT64_C(-1) << 48) - 1),
   };
 
   PrepareMIRs(mirs);
+  for (size_t i = 0; i != arraysize(mirs); ++i) {
+    const int32_t wide_sregs[] = { mirs_[i].ssa_rep->defs[0] };
+    MarkAsWideSRegs(wide_sregs);
+  }
   for (size_t i = arraysize(mirs) / 2u; i != arraysize(mirs); ++i) {
     cu_.mir_graph->reg_location_[mirs_[i].ssa_rep->defs[0]].fp = true;
   }
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index b4aec98..a7ba061 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -834,9 +834,6 @@
   // 10B MIR_CHECK
   0,
 
-  // 10C MIR_CHECKPART2
-  0,
-
   // 10D MIR_SELECT
   DF_DA | DF_UB,
 
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index b5c42f1..1871f07 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -52,8 +52,7 @@
   "OpNullCheck",
   "OpRangeCheck",
   "OpDivZeroCheck",
-  "Check1",
-  "Check2",
+  "Check",
   "Select",
   "ConstVector",
   "MoveVector",
@@ -291,8 +290,12 @@
 BasicBlock* MIRGraph::FindBlock(DexOffset code_offset, bool create,
                                 BasicBlock** immed_pred_block_p,
                                 ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
-  if (code_offset >= current_code_item_->insns_size_in_code_units_) {
-    return nullptr;
+  if (UNLIKELY(code_offset >= current_code_item_->insns_size_in_code_units_)) {
+    // There can be a fall-through out of the method code. We shall record such a block
+    // here (assuming create==true) and check that it's dead at the end of InlineMethod().
+    // Though we're only aware of the cases where code_offset is exactly the same as
+    // insns_size_in_code_units_, treat greater code_offset the same just in case.
+    code_offset = current_code_item_->insns_size_in_code_units_;
   }
 
   int block_id = (*dex_pc_to_block_map)[code_offset];
@@ -483,6 +486,7 @@
   BasicBlock* taken_block = FindBlock(target, /* create */ true,
                                       /* immed_pred_block_p */ &cur_block,
                                       dex_pc_to_block_map);
+  DCHECK(taken_block != nullptr);
   cur_block->taken = taken_block->id;
   taken_block->predecessors.push_back(cur_block->id);
 
@@ -494,6 +498,7 @@
                                              /* immed_pred_block_p */
                                              &cur_block,
                                              dex_pc_to_block_map);
+    DCHECK(fallthrough_block != nullptr);
     cur_block->fall_through = fallthrough_block->id;
     fallthrough_block->predecessors.push_back(cur_block->id);
   } else if (code_ptr < code_end) {
@@ -508,7 +513,8 @@
                                        ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
   UNUSED(flags);
   const uint16_t* switch_data =
-      reinterpret_cast<const uint16_t*>(GetCurrentInsns() + cur_offset + insn->dalvikInsn.vB);
+      reinterpret_cast<const uint16_t*>(GetCurrentInsns() + cur_offset +
+          static_cast<int32_t>(insn->dalvikInsn.vB));
   int size;
   const int* keyTable;
   const int* target_table;
@@ -561,6 +567,7 @@
     BasicBlock* case_block = FindBlock(cur_offset + target_table[i],  /* create */ true,
                                        /* immed_pred_block_p */ &cur_block,
                                        dex_pc_to_block_map);
+    DCHECK(case_block != nullptr);
     SuccessorBlockInfo* successor_block_info =
         static_cast<SuccessorBlockInfo*>(arena_->Alloc(sizeof(SuccessorBlockInfo),
                                                        kArenaAllocSuccessor));
@@ -576,6 +583,7 @@
   BasicBlock* fallthrough_block = FindBlock(cur_offset +  width, /* create */ true,
                                             /* immed_pred_block_p */ nullptr,
                                             dex_pc_to_block_map);
+  DCHECK(fallthrough_block != nullptr);
   cur_block->fall_through = fallthrough_block->id;
   fallthrough_block->predecessors.push_back(cur_block->id);
   return cur_block;
@@ -709,8 +717,8 @@
   // FindBlock lookup cache.
   ScopedArenaAllocator allocator(&cu_->arena_stack);
   ScopedArenaVector<uint16_t> dex_pc_to_block_map(allocator.Adapter());
-  dex_pc_to_block_map.resize(dex_pc_to_block_map.size() +
-                             current_code_item_->insns_size_in_code_units_);
+  dex_pc_to_block_map.resize(current_code_item_->insns_size_in_code_units_ +
+                             1 /* Fall-through on last insn; dead or punt to interpreter. */);
 
   // TODO: replace with explicit resize routine.  Using automatic extension side effect for now.
   try_block_addr_->SetBit(current_code_item_->insns_size_in_code_units_);
@@ -876,6 +884,20 @@
   if (cu_->verbose) {
     DumpMIRGraph();
   }
+
+  // Check if there's been a fall-through out of the method code.
+  BasicBlockId out_bb_id = dex_pc_to_block_map[current_code_item_->insns_size_in_code_units_];
+  if (UNLIKELY(out_bb_id != NullBasicBlockId)) {
+    // Eagerly calculate DFS order to determine if the block is dead.
+    DCHECK(!DfsOrdersUpToDate());
+    ComputeDFSOrders();
+    BasicBlock* out_bb = GetBasicBlock(out_bb_id);
+    DCHECK(out_bb != nullptr);
+    if (out_bb->block_type != kDead) {
+      LOG(WARNING) << "Live fall-through out of method in " << PrettyMethod(method_idx, dex_file);
+      SetPuntToInterpreter(true);
+    }
+  }
 }
 
 void MIRGraph::ShowOpcodeStats() {
@@ -1485,7 +1507,7 @@
   Instruction::Format dalvik_format = Instruction::k10x;  // Default to no-operand format.
 
   // Handle special cases that recover the original dalvik instruction.
-  if ((opcode == kMirOpCheck) || (opcode == kMirOpCheckPart2)) {
+  if (opcode == kMirOpCheck) {
     str.append(extended_mir_op_names_[opcode - kMirOpFirst]);
     str.append(": ");
     // Recover the original Dex instruction.
@@ -2494,8 +2516,6 @@
       return Instruction::kContinue | Instruction::kThrow;
     case kMirOpCheck:
       return Instruction::kContinue | Instruction::kThrow;
-    case kMirOpCheckPart2:
-      return Instruction::kContinue;
     case kMirOpSelect:
       return Instruction::kContinue;
     case kMirOpConstVector:
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 0db54bf..7385a8b 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -519,6 +519,7 @@
   bool is_range;
   DexOffset offset;       // Offset in code units.
   MIR* mir;
+  int32_t string_init_offset;
 };
 
 
@@ -723,6 +724,8 @@
   void BasicBlockOptimization();
   void BasicBlockOptimizationEnd();
 
+  void StringChange();
+
   const ArenaVector<BasicBlockId>& GetTopologicalSortOrder() {
     DCHECK(!topological_order_.empty());
     return topological_order_;
@@ -1101,6 +1104,7 @@
   bool EliminateDeadCodeGate();
   bool EliminateDeadCode(BasicBlock* bb);
   void EliminateDeadCodeEnd();
+  void GlobalValueNumberingCleanup();
   bool EliminateSuspendChecksGate();
   bool EliminateSuspendChecks(BasicBlock* bb);
 
@@ -1450,6 +1454,7 @@
   friend class TopologicalSortOrderTest;
   friend class TypeInferenceTest;
   friend class QuickCFITest;
+  friend class QuickAssembleX86TestBase;
 };
 
 }  // namespace art
diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc
index 0c84b82..94be1fd 100644
--- a/compiler/dex/mir_method_info.cc
+++ b/compiler/dex/mir_method_info.cc
@@ -16,6 +16,7 @@
 
 # include "mir_method_info.h"
 
+#include "dex/compiler_ir.h"
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/verified_method.h"
@@ -83,6 +84,13 @@
     MethodReference* devirt_target = (it->target_dex_file_ != nullptr) ? &devirt_ref : nullptr;
     InvokeType invoke_type = it->GetInvokeType();
     mirror::ArtMethod* resolved_method = nullptr;
+
+    bool string_init = false;
+    if (default_inliner->IsStringInitMethodIndex(it->MethodIndex())) {
+      string_init = true;
+      invoke_type = kDirect;
+    }
+
     if (!it->IsQuickened()) {
       it->target_dex_file_ = dex_file;
       it->target_method_idx_ = it->MethodIndex();
@@ -161,7 +169,8 @@
         ~(kFlagFastPath | kFlagIsIntrinsic | kFlagIsSpecial | kFlagClassIsInitialized |
             (kInvokeTypeMask << kBitSharpTypeBegin));
     it->flags_ = other_flags |
-        (fast_path_flags != 0 ? kFlagFastPath : 0u) |
+        // String init path is a special always-fast path.
+        (fast_path_flags != 0 || string_init ? kFlagFastPath : 0u) |
         ((is_intrinsic_or_special & kInlineIntrinsic) != 0 ? kFlagIsIntrinsic : 0u) |
         ((is_intrinsic_or_special & kInlineSpecial) != 0 ? kFlagIsSpecial : 0u) |
         (static_cast<uint16_t>(invoke_type) << kBitSharpTypeBegin) |
@@ -170,6 +179,9 @@
     it->target_dex_file_ = target_method.dex_file;
     it->target_method_idx_ = target_method.dex_method_index;
     it->stats_flags_ = fast_path_flags;
+    if (string_init) {
+      it->direct_code_ = 0;
+    }
   }
 }
 
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 467c14e..217dbee 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -18,6 +18,7 @@
 #include "base/logging.h"
 #include "base/scoped_arena_containers.h"
 #include "dataflow_iterator-inl.h"
+#include "dex/verified_method.h"
 #include "dex_flags.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
@@ -25,10 +26,11 @@
 #include "gvn_dead_code_elimination.h"
 #include "local_value_numbering.h"
 #include "mir_field_info.h"
-#include "type_inference.h"
+#include "mirror/string.h"
 #include "quick/dex_file_method_inliner.h"
 #include "quick/dex_file_to_method_inliner_map.h"
 #include "stack.h"
+#include "type_inference.h"
 
 namespace art {
 
@@ -1355,8 +1357,13 @@
   temp_scoped_alloc_.reset();
 }
 
+static void DisableGVNDependentOptimizations(CompilationUnit* cu) {
+  cu->disable_opt |= (1u << kGvnDeadCodeElimination);
+}
+
 bool MIRGraph::ApplyGlobalValueNumberingGate() {
   if (GlobalValueNumbering::Skip(cu_)) {
+    DisableGVNDependentOptimizations(cu_);
     return false;
   }
 
@@ -1407,16 +1414,12 @@
     cu_->disable_opt |= (1u << kLocalValueNumbering);
   } else {
     LOG(WARNING) << "GVN failed for " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
-    cu_->disable_opt |= (1u << kGvnDeadCodeElimination);
+    DisableGVNDependentOptimizations(cu_);
   }
-
-  if ((cu_->disable_opt & (1 << kGvnDeadCodeElimination)) != 0) {
-    EliminateDeadCodeEnd();
-  }  // else preserve GVN data for CSE.
 }
 
 bool MIRGraph::EliminateDeadCodeGate() {
-  if ((cu_->disable_opt & (1 << kGvnDeadCodeElimination)) != 0) {
+  if ((cu_->disable_opt & (1 << kGvnDeadCodeElimination)) != 0 || temp_.gvn.gvn == nullptr) {
     return false;
   }
   DCHECK(temp_scoped_alloc_ != nullptr);
@@ -1437,16 +1440,26 @@
 }
 
 void MIRGraph::EliminateDeadCodeEnd() {
-  DCHECK_EQ(temp_.gvn.dce != nullptr, (cu_->disable_opt & (1 << kGvnDeadCodeElimination)) == 0);
-  if (temp_.gvn.dce != nullptr) {
-    delete temp_.gvn.dce;
-    temp_.gvn.dce = nullptr;
+  if (kIsDebugBuild) {
+    // DCE can make some previously dead vregs alive again. Make sure the obsolete
+    // live-in information is not used anymore.
+    AllNodesIterator iter(this);
+    for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
+      if (bb->data_flow_info != nullptr) {
+        bb->data_flow_info->live_in_v = nullptr;
+      }
+    }
   }
+}
+
+void MIRGraph::GlobalValueNumberingCleanup() {
+  // If the GVN didn't run, these pointers should be null and everything is effectively no-op.
+  delete temp_.gvn.dce;
+  temp_.gvn.dce = nullptr;
   delete temp_.gvn.gvn;
   temp_.gvn.gvn = nullptr;
   temp_.gvn.ifield_ids = nullptr;
   temp_.gvn.sfield_ids = nullptr;
-  DCHECK(temp_scoped_alloc_ != nullptr);
   temp_scoped_alloc_.reset();
 }
 
@@ -1649,6 +1662,77 @@
   temp_scoped_alloc_.reset();
 }
 
+void MIRGraph::StringChange() {
+  AllNodesIterator iter(this);
+  for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
+    for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
+      // Look for new instance opcodes, skip otherwise
+      Instruction::Code opcode = mir->dalvikInsn.opcode;
+      if (opcode == Instruction::NEW_INSTANCE) {
+        uint32_t type_idx = mir->dalvikInsn.vB;
+        if (cu_->compiler_driver->IsStringTypeIndex(type_idx, cu_->dex_file)) {
+          // Change NEW_INSTANCE and throwing half of the insn (if it exists) into CONST_4 of 0
+          mir->dalvikInsn.opcode = Instruction::CONST_4;
+          mir->dalvikInsn.vB = 0;
+          MIR* check_mir = GetBasicBlock(bb->predecessors[0])->last_mir_insn;
+          if (check_mir != nullptr &&
+              static_cast<int>(check_mir->dalvikInsn.opcode) == kMirOpCheck) {
+            check_mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
+            check_mir->dalvikInsn.vB = 0;
+          }
+        }
+      } else if ((opcode == Instruction::INVOKE_DIRECT) ||
+                 (opcode == Instruction::INVOKE_DIRECT_RANGE)) {
+        uint32_t method_idx = mir->dalvikInsn.vB;
+        DexFileMethodInliner* inliner =
+            cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file);
+        if (inliner->IsStringInitMethodIndex(method_idx)) {
+          bool is_range = (opcode == Instruction::INVOKE_DIRECT_RANGE);
+          uint32_t orig_this_reg = is_range ? mir->dalvikInsn.vC : mir->dalvikInsn.arg[0];
+          // Remove this pointer from string init and change to static call.
+          mir->dalvikInsn.vA--;
+          if (!is_range) {
+            mir->dalvikInsn.opcode = Instruction::INVOKE_STATIC;
+            for (uint32_t i = 0; i < mir->dalvikInsn.vA; i++) {
+              mir->dalvikInsn.arg[i] = mir->dalvikInsn.arg[i + 1];
+            }
+          } else {
+            mir->dalvikInsn.opcode = Instruction::INVOKE_STATIC_RANGE;
+            mir->dalvikInsn.vC++;
+          }
+          // Insert a move-result instruction to the original this pointer reg.
+          MIR* move_result_mir = static_cast<MIR *>(arena_->Alloc(sizeof(MIR), kArenaAllocMIR));
+          move_result_mir->dalvikInsn.opcode = Instruction::MOVE_RESULT_OBJECT;
+          move_result_mir->dalvikInsn.vA = orig_this_reg;
+          move_result_mir->offset = mir->offset;
+          move_result_mir->m_unit_index = mir->m_unit_index;
+          bb->InsertMIRAfter(mir, move_result_mir);
+          // Add additional moves if this pointer was copied to other registers.
+          const VerifiedMethod* verified_method =
+              cu_->compiler_driver->GetVerifiedMethod(cu_->dex_file, cu_->method_idx);
+          DCHECK(verified_method != nullptr);
+          const SafeMap<uint32_t, std::set<uint32_t>>& string_init_map =
+              verified_method->GetStringInitPcRegMap();
+          auto map_it = string_init_map.find(mir->offset);
+          if (map_it != string_init_map.end()) {
+            const std::set<uint32_t>& reg_set = map_it->second;
+            for (auto set_it = reg_set.begin(); set_it != reg_set.end(); ++set_it) {
+              MIR* move_mir = static_cast<MIR *>(arena_->Alloc(sizeof(MIR), kArenaAllocMIR));
+              move_mir->dalvikInsn.opcode = Instruction::MOVE_OBJECT;
+              move_mir->dalvikInsn.vA = *set_it;
+              move_mir->dalvikInsn.vB = orig_this_reg;
+              move_mir->offset = mir->offset;
+              move_mir->m_unit_index = mir->m_unit_index;
+              bb->InsertMIRAfter(move_result_mir, move_mir);
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+
 bool MIRGraph::EliminateSuspendChecksGate() {
   if ((cu_->disable_opt & (1 << kSuspendCheckElimination)) != 0 ||  // Disabled.
       GetMaxNestedLoops() == 0u ||   // Nothing to do.
diff --git a/compiler/dex/pass_driver_me_opts.cc b/compiler/dex/pass_driver_me_opts.cc
index 2e871da..375003b 100644
--- a/compiler/dex/pass_driver_me_opts.cc
+++ b/compiler/dex/pass_driver_me_opts.cc
@@ -35,6 +35,7 @@
    * Disadvantage is the passes can't change their internal states depending on CompilationUnit:
    *   - This is not yet an issue: no current pass would require it.
    */
+  pass_manager->AddPass(new StringChange);
   pass_manager->AddPass(new CacheFieldLoweringInfo);
   pass_manager->AddPass(new CacheMethodLoweringInfo);
   pass_manager->AddPass(new CalculatePredecessors);
@@ -46,6 +47,7 @@
   pass_manager->AddPass(new CodeLayout);
   pass_manager->AddPass(new GlobalValueNumberingPass);
   pass_manager->AddPass(new DeadCodeEliminationPass);
+  pass_manager->AddPass(new GlobalValueNumberingCleanupPass);
   pass_manager->AddPass(new ConstantPropagation);
   pass_manager->AddPass(new MethodUseCount);
   pass_manager->AddPass(new BBOptimizations);
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 6ba4016..2b2d6af 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -21,6 +21,7 @@
 #include "arm_lir.h"
 #include "base/logging.h"
 #include "dex/mir_graph.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
@@ -619,13 +620,31 @@
  * Bit of a hack here - in the absence of a real scheduling pass,
  * emit the next instruction in static & direct invoke sequences.
  */
-int ArmMir2Lir::ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED,
+int ArmMir2Lir::ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info,
                                   int state, const MethodReference& target_method,
                                   uint32_t unused_idx ATTRIBUTE_UNUSED,
                                   uintptr_t direct_code, uintptr_t direct_method,
                                   InvokeType type) {
   ArmMir2Lir* cg = static_cast<ArmMir2Lir*>(cu->cg.get());
-  if (direct_code != 0 && direct_method != 0) {
+  if (info->string_init_offset != 0) {
+    RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
+    switch (state) {
+    case 0: {  // Grab target method* from thread pointer
+      cg->LoadRefDisp(rs_rARM_SELF, info->string_init_offset, arg0_ref, kNotVolatile);
+      break;
+    }
+    case 1:  // Grab the code from the method*
+      if (direct_code == 0) {
+        // kInvokeTgt := arg0_ref->entrypoint
+        cg->LoadWordDisp(arg0_ref,
+                         mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+                             kArmPointerSize).Int32Value(), cg->TargetPtrReg(kInvokeTgt));
+      }
+      break;
+    default:
+      return -1;
+    }
+  } else if (direct_code != 0 && direct_method != 0) {
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
       if (direct_code != static_cast<uintptr_t>(-1)) {
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 9a7c2ad..e49e40d 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -21,6 +21,7 @@
 #include "arm64_lir.h"
 #include "base/logging.h"
 #include "dex/mir_graph.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
@@ -460,7 +461,25 @@
                                       InvokeType type) {
   UNUSED(info, unused_idx);
   Arm64Mir2Lir* cg = static_cast<Arm64Mir2Lir*>(cu->cg.get());
-  if (direct_code != 0 && direct_method != 0) {
+  if (info->string_init_offset != 0) {
+    RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
+    switch (state) {
+    case 0: {  // Grab target method* from thread pointer
+      cg->LoadRefDisp(rs_xSELF, info->string_init_offset, arg0_ref, kNotVolatile);
+      break;
+    }
+    case 1:  // Grab the code from the method*
+      if (direct_code == 0) {
+        // kInvokeTgt := arg0_ref->entrypoint
+        cg->LoadWordDisp(arg0_ref,
+                         mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+                             kArm64PointerSize).Int32Value(), cg->TargetPtrReg(kInvokeTgt));
+      }
+      break;
+    default:
+      return -1;
+    }
+  } else if (direct_code != 0 && direct_method != 0) {
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
       if (direct_code != static_cast<uintptr_t>(-1)) {
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index fb68335..86bb69d 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1391,22 +1391,6 @@
       }
     }
   }
-  if (bb->block_type != kEntryBlock && bb->first_mir_insn != nullptr &&
-      static_cast<int>(bb->first_mir_insn->dalvikInsn.opcode) == kMirOpCheckPart2) {
-    // In Mir2Lir::MethodBlockCodeGen() we have artificially moved the throwing
-    // instruction to the previous block. However, the MIRGraph data used above
-    // doesn't reflect that, so we still need to process that MIR insn here.
-    MIR* mir = nullptr;
-    BasicBlock* pred_bb = bb;
-    // Traverse empty blocks.
-    while (mir == nullptr && pred_bb->predecessors.size() == 1u) {
-      pred_bb = mir_graph_->GetBasicBlock(bb->predecessors[0]);
-      DCHECK(pred_bb != nullptr);
-      mir = pred_bb->last_mir_insn;
-    }
-    DCHECK(mir != nullptr);
-    UpdateReferenceVRegsLocal(nullptr, mir, references);
-  }
 }
 
 bool Mir2Lir::UpdateReferenceVRegsLocal(MIR* mir, MIR* prev_mir, BitVector* references) {
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index f5e6c09..2568ee3 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -55,8 +55,12 @@
     false,  // kIntrinsicReferenceGetReferent
     false,  // kIntrinsicCharAt
     false,  // kIntrinsicCompareTo
+    false,  // kIntrinsicGetCharsNoCheck
     false,  // kIntrinsicIsEmptyOrLength
     false,  // kIntrinsicIndexOf
+    true,   // kIntrinsicNewStringFromBytes
+    true,   // kIntrinsicNewStringFromChars
+    true,   // kIntrinsicNewStringFromString
     true,   // kIntrinsicCurrentThread
     true,   // kIntrinsicPeek
     true,   // kIntrinsicPoke
@@ -88,8 +92,15 @@
 static_assert(!kIntrinsicIsStatic[kIntrinsicReferenceGetReferent], "Get must not be static");
 static_assert(!kIntrinsicIsStatic[kIntrinsicCharAt], "CharAt must not be static");
 static_assert(!kIntrinsicIsStatic[kIntrinsicCompareTo], "CompareTo must not be static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicGetCharsNoCheck], "GetCharsNoCheck must not be static");
 static_assert(!kIntrinsicIsStatic[kIntrinsicIsEmptyOrLength], "IsEmptyOrLength must not be static");
 static_assert(!kIntrinsicIsStatic[kIntrinsicIndexOf], "IndexOf must not be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicNewStringFromBytes],
+              "NewStringFromBytes must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicNewStringFromChars],
+              "NewStringFromChars must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicNewStringFromString],
+              "NewStringFromString must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicCurrentThread], "CurrentThread must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicPeek], "Peek must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicPoke], "Poke must be static");
@@ -137,9 +148,15 @@
     "F",                       // kClassCacheFloat
     "D",                       // kClassCacheDouble
     "V",                       // kClassCacheVoid
+    "[B",                      // kClassCacheJavaLangByteArray
+    "[C",                      // kClassCacheJavaLangCharArray
+    "[I",                      // kClassCacheJavaLangIntArray
     "Ljava/lang/Object;",      // kClassCacheJavaLangObject
-    "Ljava/lang/ref/Reference;",  // kClassCacheJavaLangRefReference
+    "Ljava/lang/ref/Reference;",   // kClassCacheJavaLangRefReference
     "Ljava/lang/String;",      // kClassCacheJavaLangString
+    "Ljava/lang/StringBuffer;",    // kClassCacheJavaLangStringBuffer
+    "Ljava/lang/StringBuilder;",   // kClassCacheJavaLangStringBuilder
+    "Ljava/lang/StringFactory;",   // kClassCacheJavaLangStringFactory
     "Ljava/lang/Double;",      // kClassCacheJavaLangDouble
     "Ljava/lang/Float;",       // kClassCacheJavaLangFloat
     "Ljava/lang/Integer;",     // kClassCacheJavaLangInteger
@@ -148,10 +165,10 @@
     "Ljava/lang/Math;",        // kClassCacheJavaLangMath
     "Ljava/lang/StrictMath;",  // kClassCacheJavaLangStrictMath
     "Ljava/lang/Thread;",      // kClassCacheJavaLangThread
+    "Ljava/nio/charset/Charset;",  // kClassCacheJavaNioCharsetCharset
     "Llibcore/io/Memory;",     // kClassCacheLibcoreIoMemory
     "Lsun/misc/Unsafe;",       // kClassCacheSunMiscUnsafe
     "Ljava/lang/System;",      // kClassCacheJavaLangSystem
-    "[C"                       // kClassCacheJavaLangCharArray
 };
 
 const char* const DexFileMethodInliner::kNameCacheNames[] = {
@@ -172,9 +189,14 @@
     "getReferent",           // kNameCacheReferenceGet
     "charAt",                // kNameCacheCharAt
     "compareTo",             // kNameCacheCompareTo
+    "getCharsNoCheck",       // kNameCacheGetCharsNoCheck
     "isEmpty",               // kNameCacheIsEmpty
     "indexOf",               // kNameCacheIndexOf
     "length",                // kNameCacheLength
+    "<init>",                // kNameCacheInit
+    "newStringFromBytes",    // kNameCacheNewStringFromBytes
+    "newStringFromChars",    // kNameCacheNewStringFromChars
+    "newStringFromString",   // kNameCacheNewStringFromString
     "currentThread",         // kNameCacheCurrentThread
     "peekByte",              // kNameCachePeekByte
     "peekIntNative",         // kNameCachePeekIntNative
@@ -282,7 +304,53 @@
         kClassCacheJavaLangObject } },
     // kProtoCacheCharArrayICharArrayII_V
     { kClassCacheVoid, 5, {kClassCacheJavaLangCharArray, kClassCacheInt,
-                kClassCacheJavaLangCharArray, kClassCacheInt, kClassCacheInt}}
+        kClassCacheJavaLangCharArray, kClassCacheInt, kClassCacheInt} },
+    // kProtoCacheIICharArrayI_V
+    { kClassCacheVoid, 4, { kClassCacheInt, kClassCacheInt, kClassCacheJavaLangCharArray,
+        kClassCacheInt } },
+    // kProtoCacheByteArrayIII_String
+    { kClassCacheJavaLangString, 4, { kClassCacheJavaLangByteArray, kClassCacheInt, kClassCacheInt,
+        kClassCacheInt } },
+    // kProtoCacheIICharArray_String
+    { kClassCacheJavaLangString, 3, { kClassCacheInt, kClassCacheInt,
+        kClassCacheJavaLangCharArray } },
+    // kProtoCacheString_String
+    { kClassCacheJavaLangString, 1, { kClassCacheJavaLangString } },
+    // kProtoCache_V
+    { kClassCacheVoid, 0, { } },
+    // kProtoCacheByteArray_V
+    { kClassCacheVoid, 1, { kClassCacheJavaLangByteArray } },
+    // kProtoCacheByteArrayI_V
+    { kClassCacheVoid, 2, { kClassCacheJavaLangByteArray, kClassCacheInt } },
+    // kProtoCacheByteArrayII_V
+    { kClassCacheVoid, 3, { kClassCacheJavaLangByteArray, kClassCacheInt, kClassCacheInt } },
+    // kProtoCacheByteArrayIII_V
+    { kClassCacheVoid, 4, { kClassCacheJavaLangByteArray, kClassCacheInt, kClassCacheInt,
+        kClassCacheInt } },
+    // kProtoCacheByteArrayIIString_V
+    { kClassCacheVoid, 4, { kClassCacheJavaLangByteArray, kClassCacheInt, kClassCacheInt,
+        kClassCacheJavaLangString } },
+    // kProtoCacheByteArrayString_V
+    { kClassCacheVoid, 2, { kClassCacheJavaLangByteArray, kClassCacheJavaLangString } },
+    // kProtoCacheByteArrayIICharset_V
+    { kClassCacheVoid, 4, { kClassCacheJavaLangByteArray, kClassCacheInt, kClassCacheInt,
+        kClassCacheJavaNioCharsetCharset } },
+    // kProtoCacheByteArrayCharset_V
+    { kClassCacheVoid, 2, { kClassCacheJavaLangByteArray, kClassCacheJavaNioCharsetCharset } },
+    // kProtoCacheCharArray_V
+    { kClassCacheVoid, 1, { kClassCacheJavaLangCharArray } },
+    // kProtoCacheCharArrayII_V
+    { kClassCacheVoid, 3, { kClassCacheJavaLangCharArray, kClassCacheInt, kClassCacheInt } },
+    // kProtoCacheIICharArray_V
+    { kClassCacheVoid, 3, { kClassCacheInt, kClassCacheInt, kClassCacheJavaLangCharArray } },
+    // kProtoCacheIntArrayII_V
+    { kClassCacheVoid, 3, { kClassCacheJavaLangIntArray, kClassCacheInt, kClassCacheInt } },
+    // kProtoCacheString_V
+    { kClassCacheVoid, 1, { kClassCacheJavaLangString } },
+    // kProtoCacheStringBuffer_V
+    { kClassCacheVoid, 1, { kClassCacheJavaLangStringBuffer } },
+    // kProtoCacheStringBuilder_V
+    { kClassCacheVoid, 1, { kClassCacheJavaLangStringBuilder } },
 };
 
 const DexFileMethodInliner::IntrinsicDef DexFileMethodInliner::kIntrinsicMethods[] = {
@@ -343,6 +411,7 @@
 
     INTRINSIC(JavaLangString, CharAt, I_C, kIntrinsicCharAt, 0),
     INTRINSIC(JavaLangString, CompareTo, String_I, kIntrinsicCompareTo, 0),
+    INTRINSIC(JavaLangString, GetCharsNoCheck, IICharArrayI_V, kIntrinsicGetCharsNoCheck, 0),
     INTRINSIC(JavaLangString, IsEmpty, _Z, kIntrinsicIsEmptyOrLength, kIntrinsicFlagIsEmpty),
     INTRINSIC(JavaLangString, IndexOf, II_I, kIntrinsicIndexOf, kIntrinsicFlagNone),
     INTRINSIC(JavaLangString, IndexOf, I_I, kIntrinsicIndexOf, kIntrinsicFlagBase0),
@@ -386,8 +455,29 @@
     INTRINSIC(JavaLangSystem, ArrayCopy, CharArrayICharArrayII_V , kIntrinsicSystemArrayCopyCharArray,
               0),
 
-
 #undef INTRINSIC
+
+#define SPECIAL(c, n, p, o, d) \
+    { { kClassCache ## c, kNameCache ## n, kProtoCache ## p }, { o, kInlineSpecial, { d } } }
+
+    SPECIAL(JavaLangString, Init, _V, kInlineStringInit, 0),
+    SPECIAL(JavaLangString, Init, ByteArray_V, kInlineStringInit, 1),
+    SPECIAL(JavaLangString, Init, ByteArrayI_V, kInlineStringInit, 2),
+    SPECIAL(JavaLangString, Init, ByteArrayII_V, kInlineStringInit, 3),
+    SPECIAL(JavaLangString, Init, ByteArrayIII_V, kInlineStringInit, 4),
+    SPECIAL(JavaLangString, Init, ByteArrayIIString_V, kInlineStringInit, 5),
+    SPECIAL(JavaLangString, Init, ByteArrayString_V, kInlineStringInit, 6),
+    SPECIAL(JavaLangString, Init, ByteArrayIICharset_V, kInlineStringInit, 7),
+    SPECIAL(JavaLangString, Init, ByteArrayCharset_V, kInlineStringInit, 8),
+    SPECIAL(JavaLangString, Init, CharArray_V, kInlineStringInit, 9),
+    SPECIAL(JavaLangString, Init, CharArrayII_V, kInlineStringInit, 10),
+    SPECIAL(JavaLangString, Init, IICharArray_V, kInlineStringInit, 11),
+    SPECIAL(JavaLangString, Init, IntArrayII_V, kInlineStringInit, 12),
+    SPECIAL(JavaLangString, Init, String_V, kInlineStringInit, 13),
+    SPECIAL(JavaLangString, Init, StringBuffer_V, kInlineStringInit, 14),
+    SPECIAL(JavaLangString, Init, StringBuilder_V, kInlineStringInit, 15),
+
+#undef SPECIAL
 };
 
 DexFileMethodInliner::DexFileMethodInliner()
@@ -491,11 +581,19 @@
       return backend->GenInlinedCharAt(info);
     case kIntrinsicCompareTo:
       return backend->GenInlinedStringCompareTo(info);
+    case kIntrinsicGetCharsNoCheck:
+      return backend->GenInlinedStringGetCharsNoCheck(info);
     case kIntrinsicIsEmptyOrLength:
       return backend->GenInlinedStringIsEmptyOrLength(
           info, intrinsic.d.data & kIntrinsicFlagIsEmpty);
     case kIntrinsicIndexOf:
       return backend->GenInlinedIndexOf(info, intrinsic.d.data & kIntrinsicFlagBase0);
+    case kIntrinsicNewStringFromBytes:
+      return backend->GenInlinedStringFactoryNewStringFromBytes(info);
+    case kIntrinsicNewStringFromChars:
+      return backend->GenInlinedStringFactoryNewStringFromChars(info);
+    case kIntrinsicNewStringFromString:
+      return backend->GenInlinedStringFactoryNewStringFromString(info);
     case kIntrinsicCurrentThread:
       return backend->GenInlinedCurrentThread(info);
     case kIntrinsicPeek:
@@ -574,6 +672,8 @@
       move_result = mir_graph->FindMoveResult(bb, invoke);
       result = GenInlineIPut(mir_graph, bb, invoke, move_result, method);
       break;
+    case kInlineStringInit:
+      return false;
     default:
       LOG(FATAL) << "Unexpected inline op: " << method.opcode;
       break;
@@ -921,4 +1021,21 @@
   return true;
 }
 
+uint32_t DexFileMethodInliner::GetOffsetForStringInit(uint32_t method_index, size_t pointer_size) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
+  auto it = inline_methods_.find(method_index);
+  if (it != inline_methods_.end() && (it->second.opcode == kInlineStringInit)) {
+    uint32_t string_init_base_offset = Thread::QuickEntryPointOffsetWithSize(
+              OFFSETOF_MEMBER(QuickEntryPoints, pNewEmptyString), pointer_size);
+    return string_init_base_offset + it->second.d.data * pointer_size;
+  }
+  return 0;
+}
+
+bool DexFileMethodInliner::IsStringInitMethodIndex(uint32_t method_index) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
+  auto it = inline_methods_.find(method_index);
+  return (it != inline_methods_.end()) && (it->second.opcode == kInlineStringInit);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index d1e5621..26b41bf 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -96,6 +96,17 @@
         LOCKS_EXCLUDED(lock_);
 
     /**
+     * Gets the thread pointer entrypoint offset for a string init method index and pointer size.
+     */
+    uint32_t GetOffsetForStringInit(uint32_t method_index, size_t pointer_size)
+        LOCKS_EXCLUDED(lock_);
+
+    /**
+     * Check whether a particular method index is a string init.
+     */
+    bool IsStringInitMethodIndex(uint32_t method_index) LOCKS_EXCLUDED(lock_);
+
+    /**
      * To avoid multiple lookups of a class by its descriptor, we cache its
      * type index in the IndexCache. These are the indexes into the IndexCache
      * class_indexes array.
@@ -111,9 +122,15 @@
       kClassCacheFloat,
       kClassCacheDouble,
       kClassCacheVoid,
+      kClassCacheJavaLangByteArray,
+      kClassCacheJavaLangCharArray,
+      kClassCacheJavaLangIntArray,
       kClassCacheJavaLangObject,
       kClassCacheJavaLangRefReference,
       kClassCacheJavaLangString,
+      kClassCacheJavaLangStringBuffer,
+      kClassCacheJavaLangStringBuilder,
+      kClassCacheJavaLangStringFactory,
       kClassCacheJavaLangDouble,
       kClassCacheJavaLangFloat,
       kClassCacheJavaLangInteger,
@@ -122,10 +139,10 @@
       kClassCacheJavaLangMath,
       kClassCacheJavaLangStrictMath,
       kClassCacheJavaLangThread,
+      kClassCacheJavaNioCharsetCharset,
       kClassCacheLibcoreIoMemory,
       kClassCacheSunMiscUnsafe,
       kClassCacheJavaLangSystem,
-      kClassCacheJavaLangCharArray,
       kClassCacheLast
     };
 
@@ -153,9 +170,14 @@
       kNameCacheReferenceGetReferent,
       kNameCacheCharAt,
       kNameCacheCompareTo,
+      kNameCacheGetCharsNoCheck,
       kNameCacheIsEmpty,
       kNameCacheIndexOf,
       kNameCacheLength,
+      kNameCacheInit,
+      kNameCacheNewStringFromBytes,
+      kNameCacheNewStringFromChars,
+      kNameCacheNewStringFromString,
       kNameCacheCurrentThread,
       kNameCachePeekByte,
       kNameCachePeekIntNative,
@@ -230,6 +252,26 @@
       kProtoCacheObjectJ_Object,
       kProtoCacheObjectJObject_V,
       kProtoCacheCharArrayICharArrayII_V,
+      kProtoCacheIICharArrayI_V,
+      kProtoCacheByteArrayIII_String,
+      kProtoCacheIICharArray_String,
+      kProtoCacheString_String,
+      kProtoCache_V,
+      kProtoCacheByteArray_V,
+      kProtoCacheByteArrayI_V,
+      kProtoCacheByteArrayII_V,
+      kProtoCacheByteArrayIII_V,
+      kProtoCacheByteArrayIIString_V,
+      kProtoCacheByteArrayString_V,
+      kProtoCacheByteArrayIICharset_V,
+      kProtoCacheByteArrayCharset_V,
+      kProtoCacheCharArray_V,
+      kProtoCacheCharArrayII_V,
+      kProtoCacheIICharArray_V,
+      kProtoCacheIntArrayII_V,
+      kProtoCacheString_V,
+      kProtoCacheStringBuffer_V,
+      kProtoCacheStringBuilder_V,
       kProtoCacheLast
     };
 
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index de5e041..0592c74 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -58,24 +58,19 @@
   return (cu->enable_debug & (1 << kDebugSlowTypePath)) != 0;
 }
 
-void Mir2Lir::GenIfNullUseHelperImmMethod(
-    RegStorage r_result, QuickEntrypointEnum trampoline, int imm, RegStorage r_method) {
+void Mir2Lir::GenIfNullUseHelperImm(RegStorage r_result, QuickEntrypointEnum trampoline, int imm) {
   class CallHelperImmMethodSlowPath : public LIRSlowPath {
    public:
     CallHelperImmMethodSlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont,
                                 QuickEntrypointEnum trampoline_in, int imm_in,
-                                RegStorage r_method_in, RegStorage r_result_in)
+                                RegStorage r_result_in)
         : LIRSlowPath(m2l, fromfast, cont), trampoline_(trampoline_in),
-          imm_(imm_in), r_method_(r_method_in), r_result_(r_result_in) {
+          imm_(imm_in), r_result_(r_result_in) {
     }
 
     void Compile() {
       GenerateTargetLabel();
-      if (r_method_.Valid()) {
-        m2l_->CallRuntimeHelperImmReg(trampoline_, imm_, r_method_, true);
-      } else {
-        m2l_->CallRuntimeHelperImmMethod(trampoline_, imm_, true);
-      }
+      m2l_->CallRuntimeHelperImm(trampoline_, imm_, true);
       m2l_->OpRegCopy(r_result_,  m2l_->TargetReg(kRet0, kRef));
       m2l_->OpUnconditionalBranch(cont_);
     }
@@ -83,7 +78,6 @@
    private:
     QuickEntrypointEnum trampoline_;
     const int imm_;
-    const RegStorage r_method_;
     const RegStorage r_result_;
   };
 
@@ -91,7 +85,7 @@
   LIR* cont = NewLIR0(kPseudoTargetLabel);
 
   AddSlowPath(new (arena_) CallHelperImmMethodSlowPath(this, branch, cont, trampoline, imm,
-                                                       r_method, r_result));
+                                                       r_result));
 }
 
 RegStorage Mir2Lir::GenGetOtherTypeForSgetSput(const MirSFieldLoweringInfo& field_info,
@@ -101,13 +95,12 @@
   FlushAllRegs();
   RegStorage r_base = TargetReg(kArg0, kRef);
   LockTemp(r_base);
-  RegStorage r_method = RegStorage::InvalidReg();  // Loaded lazily, maybe in the slow-path.
   if (CanUseOpPcRelDexCacheArrayLoad()) {
     uint32_t offset = dex_cache_arrays_layout_.TypeOffset(field_info.StorageIndex());
     OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, r_base);
   } else {
     // Using fixed register to sync with possible call to runtime support.
-    r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
+    RegStorage r_method = LoadCurrMethodWithHint(r_base);
     LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base,
                 kNotVolatile);
     int32_t offset_of_field = ObjArray::OffsetOfElement(field_info.StorageIndex()).Int32Value();
@@ -139,10 +132,10 @@
       // entry in the dex cache is null, and the "uninit" when the class is not yet initialized.
       // At least one will be non-null here, otherwise we wouldn't generate the slow path.
       StaticFieldSlowPath(Mir2Lir* m2l, LIR* unresolved, LIR* uninit, LIR* cont, int storage_index,
-                          RegStorage r_base_in, RegStorage r_method_in)
+                          RegStorage r_base_in)
           : LIRSlowPath(m2l, unresolved != nullptr ? unresolved : uninit, cont),
             second_branch_(unresolved != nullptr ? uninit : nullptr),
-            storage_index_(storage_index), r_base_(r_base_in), r_method_(r_method_in) {
+            storage_index_(storage_index), r_base_(r_base_in) {
       }
 
       void Compile() {
@@ -150,14 +143,7 @@
         if (second_branch_ != nullptr) {
           second_branch_->target = target;
         }
-        if (r_method_.Valid()) {
-          // ArtMethod* was loaded in normal path - use it.
-          m2l_->CallRuntimeHelperImmReg(kQuickInitializeStaticStorage, storage_index_, r_method_,
-                                        true);
-        } else {
-          // ArtMethod* wasn't loaded in normal path - use a helper that loads it.
-          m2l_->CallRuntimeHelperImmMethod(kQuickInitializeStaticStorage, storage_index_, true);
-        }
+        m2l_->CallRuntimeHelperImm(kQuickInitializeStaticStorage, storage_index_, true);
         // Copy helper's result into r_base, a no-op on all but MIPS.
         m2l_->OpRegCopy(r_base_,  m2l_->TargetReg(kRet0, kRef));
 
@@ -170,17 +156,13 @@
 
       const int storage_index_;
       const RegStorage r_base_;
-      RegStorage r_method_;
     };
 
     // The slow path is invoked if the r_base is null or the class pointed
     // to by it is not initialized.
     LIR* cont = NewLIR0(kPseudoTargetLabel);
     AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont,
-                                                 field_info.StorageIndex(), r_base, r_method));
-  }
-  if (IsTemp(r_method)) {
-    FreeTemp(r_method);
+                                                 field_info.StorageIndex(), r_base));
   }
   return r_base;
 }
@@ -1042,22 +1024,19 @@
                                                         type_idx)) {
     // Call out to helper which resolves type and verifies access.
     // Resolved type returned in kRet0.
-    CallRuntimeHelperImmMethod(kQuickInitializeTypeAndVerifyAccess, type_idx, true);
+    CallRuntimeHelperImm(kQuickInitializeTypeAndVerifyAccess, type_idx, true);
     rl_result = GetReturn(kRefReg);
   } else {
     rl_result = EvalLoc(rl_dest, kRefReg, true);
     // We don't need access checks, load type from dex cache
-    RegStorage r_method = RegStorage::InvalidReg();
     if (CanUseOpPcRelDexCacheArrayLoad()) {
       size_t offset = dex_cache_arrays_layout_.TypeOffset(type_idx);
       OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, rl_result.reg);
     } else {
-      RegLocation rl_method = LoadCurrMethod();
-      CheckRegLocation(rl_method);
-      r_method = rl_method.reg;
       int32_t dex_cache_offset =
           mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value();
       RegStorage res_reg = AllocTempRef();
+      RegStorage r_method = LoadCurrMethodWithHint(res_reg);
       LoadRefDisp(r_method, dex_cache_offset, res_reg, kNotVolatile);
       int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
       LoadRefDisp(res_reg, offset_of_type, rl_result.reg, kNotVolatile);
@@ -1067,7 +1046,7 @@
         type_idx) || ForceSlowTypePath(cu_)) {
       // Slow path, at runtime test if type is null and if so initialize
       FlushAllRegs();
-      GenIfNullUseHelperImmMethod(rl_result.reg, kQuickInitializeType, type_idx, r_method);
+      GenIfNullUseHelperImm(rl_result.reg, kQuickInitializeType, type_idx);
     }
   }
   StoreValue(rl_dest, rl_result);
@@ -1085,14 +1064,13 @@
 
     // Might call out to helper, which will return resolved string in kRet0
     RegStorage ret0 = TargetReg(kRet0, kRef);
-    RegStorage r_method = RegStorage::InvalidReg();
     if (CanUseOpPcRelDexCacheArrayLoad()) {
       size_t offset = dex_cache_arrays_layout_.StringOffset(string_idx);
       OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, ret0);
     } else {
-      r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
       // Method to declaring class.
       RegStorage arg0 = TargetReg(kArg0, kRef);
+      RegStorage r_method = LoadCurrMethodWithHint(arg0);
       LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
                   arg0, kNotVolatile);
       // Declaring class to dex cache strings.
@@ -1100,7 +1078,7 @@
 
       LoadRefDisp(arg0, offset_of_string, ret0, kNotVolatile);
     }
-    GenIfNullUseHelperImmMethod(ret0, kQuickResolveString, string_idx, r_method);
+    GenIfNullUseHelperImm(ret0, kQuickResolveString, string_idx);
 
     GenBarrier();
     StoreValue(rl_dest, GetReturn(kRefReg));
@@ -1262,12 +1240,11 @@
       LoadValueDirectFixed(rl_src, ref_reg);  // kArg0 <= ref
     }
 
-    RegStorage r_method = RegStorage::InvalidReg();
     if (CanUseOpPcRelDexCacheArrayLoad()) {
       size_t offset = dex_cache_arrays_layout_.TypeOffset(type_idx);
       OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, class_reg);
     } else {
-      r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
+      RegStorage r_method = LoadCurrMethodWithHint(class_reg);
       // Load dex cache entry into class_reg (kArg2)
       LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
                   class_reg, kNotVolatile);
@@ -1275,7 +1252,7 @@
       LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile);
     }
     if (!can_assume_type_is_in_dex_cache) {
-      GenIfNullUseHelperImmMethod(class_reg, kQuickInitializeType, type_idx, r_method);
+      GenIfNullUseHelperImm(class_reg, kQuickInitializeType, type_idx);
 
       // Should load value here.
       LoadValueDirectFixed(rl_src, ref_reg);  // kArg0 <= ref
@@ -1394,12 +1371,11 @@
                 class_reg, kNotVolatile);
   } else {
     // Load dex cache entry into class_reg (kArg2)
-    RegStorage r_method = RegStorage::InvalidReg();
     if (CanUseOpPcRelDexCacheArrayLoad()) {
       size_t offset = dex_cache_arrays_layout_.TypeOffset(type_idx);
       OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, class_reg);
     } else {
-      r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
+      RegStorage r_method = LoadCurrMethodWithHint(class_reg);
 
       LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
                   class_reg, kNotVolatile);
@@ -1408,7 +1384,7 @@
     }
     if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file, type_idx)) {
       // Need to test presence of type in dex cache at runtime
-      GenIfNullUseHelperImmMethod(class_reg, kQuickInitializeType, type_idx, r_method);
+      GenIfNullUseHelperImm(class_reg, kQuickInitializeType, type_idx);
     }
   }
   // At this point, class_reg (kArg2) has class
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 1eb3a5f..ab011fc 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -375,6 +375,18 @@
   CallHelper(r_tgt, trampoline, safepoint_pc);
 }
 
+void Mir2Lir::CallRuntimeHelperRegLocationRegLocationRegLocationRegLocation(
+    QuickEntrypointEnum trampoline, RegLocation arg0, RegLocation arg1, RegLocation arg2,
+    RegLocation arg3, bool safepoint_pc) {
+  RegStorage r_tgt = CallHelperSetup(trampoline);
+  LoadValueDirectFixed(arg0, TargetReg(kArg0, arg0));
+  LoadValueDirectFixed(arg1, TargetReg(kArg1, arg1));
+  LoadValueDirectFixed(arg2, TargetReg(kArg2, arg2));
+  LoadValueDirectFixed(arg3, TargetReg(kArg3, arg3));
+  ClobberCallerSave();
+  CallHelper(r_tgt, trampoline, safepoint_pc);
+}
+
 /*
  * If there are any ins passed in registers that have not been promoted
  * to a callee-save register, flush them to the frame.  Perform initial
@@ -966,14 +978,10 @@
 }
 
 bool Mir2Lir::GenInlinedCharAt(CallInfo* info) {
-  // Location of reference to data array
+  // Location of char array data
   int value_offset = mirror::String::ValueOffset().Int32Value();
   // Location of count
   int count_offset = mirror::String::CountOffset().Int32Value();
-  // Starting offset within data array
-  int offset_offset = mirror::String::OffsetOffset().Int32Value();
-  // Start of char data with array_
-  int data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
 
   RegLocation rl_obj = info->args[0];
   RegLocation rl_idx = info->args[1];
@@ -983,38 +991,21 @@
   GenNullCheck(rl_obj.reg, info->opt_flags);
   bool range_check = (!(info->opt_flags & MIR_IGNORE_RANGE_CHECK));
   LIR* range_check_branch = nullptr;
-  RegStorage reg_off;
-  RegStorage reg_ptr;
-  reg_off = AllocTemp();
-  reg_ptr = AllocTempRef();
   if (range_check) {
     reg_max = AllocTemp();
     Load32Disp(rl_obj.reg, count_offset, reg_max);
     MarkPossibleNullPointerException(info->opt_flags);
-  }
-  Load32Disp(rl_obj.reg, offset_offset, reg_off);
-  MarkPossibleNullPointerException(info->opt_flags);
-  LoadRefDisp(rl_obj.reg, value_offset, reg_ptr, kNotVolatile);
-  if (range_check) {
-    // Set up a slow path to allow retry in case of bounds violation */
+    // Set up a slow path to allow retry in case of bounds violation
     OpRegReg(kOpCmp, rl_idx.reg, reg_max);
     FreeTemp(reg_max);
     range_check_branch = OpCondBranch(kCondUge, nullptr);
   }
-  OpRegImm(kOpAdd, reg_ptr, data_offset);
-  if (rl_idx.is_const) {
-    OpRegImm(kOpAdd, reg_off, mir_graph_->ConstantValue(rl_idx.orig_sreg));
-  } else {
-    OpRegReg(kOpAdd, reg_off, rl_idx.reg);
-  }
+  RegStorage reg_ptr = AllocTempRef();
+  OpRegRegImm(kOpAdd, reg_ptr, rl_obj.reg, value_offset);
   FreeTemp(rl_obj.reg);
-  if (rl_idx.location == kLocPhysReg) {
-    FreeTemp(rl_idx.reg);
-  }
   RegLocation rl_dest = InlineTarget(info);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  LoadBaseIndexed(reg_ptr, reg_off, rl_result.reg, 1, kUnsignedHalf);
-  FreeTemp(reg_off);
+  LoadBaseIndexed(reg_ptr, rl_idx.reg, rl_result.reg, 1, kUnsignedHalf);
   FreeTemp(reg_ptr);
   StoreValue(rl_dest, rl_result);
   if (range_check) {
@@ -1025,6 +1016,59 @@
   return true;
 }
 
+bool Mir2Lir::GenInlinedStringGetCharsNoCheck(CallInfo* info) {
+  if (cu_->instruction_set == kMips) {
+    // TODO - add Mips implementation
+    return false;
+  }
+  size_t char_component_size = Primitive::ComponentSize(Primitive::kPrimChar);
+  // Location of data in char array buffer
+  int data_offset = mirror::Array::DataOffset(char_component_size).Int32Value();
+  // Location of char array data in string
+  int value_offset = mirror::String::ValueOffset().Int32Value();
+
+  RegLocation rl_obj = info->args[0];
+  RegLocation rl_start = info->args[1];
+  RegLocation rl_end = info->args[2];
+  RegLocation rl_buffer = info->args[3];
+  RegLocation rl_index = info->args[4];
+
+  ClobberCallerSave();
+  LockCallTemps();  // Using fixed registers
+  RegStorage reg_dst_ptr = TargetReg(kArg0, kRef);
+  RegStorage reg_src_ptr = TargetReg(kArg1, kRef);
+  RegStorage reg_length = TargetReg(kArg2, kNotWide);
+  RegStorage reg_tmp = TargetReg(kArg3, kNotWide);
+  RegStorage reg_tmp_ptr = RegStorage(RegStorage::k64BitSolo, reg_tmp.GetRawBits() & RegStorage::kRegTypeMask);
+
+  LoadValueDirectFixed(rl_buffer, reg_dst_ptr);
+  OpRegImm(kOpAdd, reg_dst_ptr, data_offset);
+  LoadValueDirectFixed(rl_index, reg_tmp);
+  OpRegRegImm(kOpLsl, reg_tmp, reg_tmp, 1);
+  OpRegReg(kOpAdd, reg_dst_ptr, cu_->instruction_set == kArm64 ? reg_tmp_ptr : reg_tmp);
+
+  LoadValueDirectFixed(rl_start, reg_tmp);
+  LoadValueDirectFixed(rl_end, reg_length);
+  OpRegReg(kOpSub, reg_length, reg_tmp);
+  OpRegRegImm(kOpLsl, reg_length, reg_length, 1);
+  LoadValueDirectFixed(rl_obj, reg_src_ptr);
+
+  OpRegImm(kOpAdd, reg_src_ptr, value_offset);
+  OpRegRegImm(kOpLsl, reg_tmp, reg_tmp, 1);
+  OpRegReg(kOpAdd, reg_src_ptr, cu_->instruction_set == kArm64 ? reg_tmp_ptr : reg_tmp);
+
+  RegStorage r_tgt;
+  if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) {
+    r_tgt = LoadHelper(kQuickMemcpy);
+  } else {
+    r_tgt = RegStorage::InvalidReg();
+  }
+  // NOTE: not a safepoint
+  CallHelper(r_tgt, kQuickMemcpy, false, true);
+
+  return true;
+}
+
 // Generates an inlined String.is_empty or String.length.
 bool Mir2Lir::GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty) {
   if (cu_->instruction_set == kMips || cu_->instruction_set == kMips64) {
@@ -1058,6 +1102,58 @@
   return true;
 }
 
+bool Mir2Lir::GenInlinedStringFactoryNewStringFromBytes(CallInfo* info) {
+  if (cu_->instruction_set == kMips) {
+    // TODO - add Mips implementation
+    return false;
+  }
+  RegLocation rl_data = info->args[0];
+  RegLocation rl_high = info->args[1];
+  RegLocation rl_offset = info->args[2];
+  RegLocation rl_count = info->args[3];
+  rl_data = LoadValue(rl_data, kRefReg);
+  LIR* data_null_check_branch = OpCmpImmBranch(kCondEq, rl_data.reg, 0, nullptr);
+  AddIntrinsicSlowPath(info, data_null_check_branch);
+  CallRuntimeHelperRegLocationRegLocationRegLocationRegLocation(
+      kQuickAllocStringFromBytes, rl_data, rl_high, rl_offset, rl_count, true);
+  RegLocation rl_return = GetReturn(kRefReg);
+  RegLocation rl_dest = InlineTarget(info);
+  StoreValue(rl_dest, rl_return);
+  return true;
+}
+
+bool Mir2Lir::GenInlinedStringFactoryNewStringFromChars(CallInfo* info) {
+  if (cu_->instruction_set == kMips) {
+    // TODO - add Mips implementation
+    return false;
+  }
+  RegLocation rl_offset = info->args[0];
+  RegLocation rl_count = info->args[1];
+  RegLocation rl_data = info->args[2];
+  CallRuntimeHelperRegLocationRegLocationRegLocation(
+      kQuickAllocStringFromChars, rl_offset, rl_count, rl_data, true);
+  RegLocation rl_return = GetReturn(kRefReg);
+  RegLocation rl_dest = InlineTarget(info);
+  StoreValue(rl_dest, rl_return);
+  return true;
+}
+
+bool Mir2Lir::GenInlinedStringFactoryNewStringFromString(CallInfo* info) {
+  if (cu_->instruction_set == kMips) {
+    // TODO - add Mips implementation
+    return false;
+  }
+  RegLocation rl_string = info->args[0];
+  rl_string = LoadValue(rl_string, kRefReg);
+  LIR* string_null_check_branch = OpCmpImmBranch(kCondEq, rl_string.reg, 0, nullptr);
+  AddIntrinsicSlowPath(info, string_null_check_branch);
+  CallRuntimeHelperRegLocation(kQuickAllocStringFromString, rl_string, true);
+  RegLocation rl_return = GetReturn(kRefReg);
+  RegLocation rl_dest = InlineTarget(info);
+  StoreValue(rl_dest, rl_return);
+  return true;
+}
+
 bool Mir2Lir::GenInlinedReverseBytes(CallInfo* info, OpSize size) {
   if (cu_->instruction_set == kMips || cu_->instruction_set == kMips64) {
     // TODO: add Mips and Mips64 implementations.
@@ -1451,9 +1547,22 @@
   LockCallTemps();
 
   const MirMethodLoweringInfo& method_info = mir_graph_->GetMethodLoweringInfo(info->mir);
+  MethodReference target_method = method_info.GetTargetMethod();
   cu_->compiler_driver->ProcessedInvoke(method_info.GetInvokeType(), method_info.StatsFlags());
   InvokeType original_type = static_cast<InvokeType>(method_info.GetInvokeType());
   info->type = method_info.GetSharpType();
+  bool is_string_init = false;
+  if (method_info.IsSpecial()) {
+    DexFileMethodInliner* inliner = cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(
+        target_method.dex_file);
+    if (inliner->IsStringInitMethodIndex(target_method.dex_method_index)) {
+      is_string_init = true;
+      size_t pointer_size = GetInstructionSetPointerSize(cu_->instruction_set);
+      info->string_init_offset = inliner->GetOffsetForStringInit(target_method.dex_method_index,
+                                                                 pointer_size);
+      info->type = kStatic;
+    }
+  }
   bool fast_path = method_info.FastPath();
   bool skip_this;
 
@@ -1478,7 +1587,6 @@
     next_call_insn = fast_path ? NextVCallInsn : NextVCallInsnSP;
     skip_this = fast_path;
   }
-  MethodReference target_method = method_info.GetTargetMethod();
   call_state = GenDalvikArgs(info, call_state, p_null_ck,
                              next_call_insn, target_method, method_info.VTableIndex(),
                              method_info.DirectCode(), method_info.DirectMethod(),
@@ -1495,7 +1603,7 @@
   FreeCallTemps();
   if (info->result.location != kLocInvalid) {
     // We have a following MOVE_RESULT - do it now.
-    RegisterClass reg_class =
+    RegisterClass reg_class = is_string_init ? kRefReg :
         ShortyToRegClass(mir_graph_->GetShortyFromMethodReference(info->method_ref)[0]);
     if (info->result.wide) {
       RegLocation ret_loc = GetReturnWide(reg_class);
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index 39b9cc7..3d25384 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -20,7 +20,9 @@
 
 #include "base/logging.h"
 #include "dex/mir_graph.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "driver/compiler_driver.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "gc/accounting/card_table.h"
 #include "mips_lir.h"
@@ -397,11 +399,28 @@
  * Bit of a hack here - in the absence of a real scheduling pass,
  * emit the next instruction in static & direct invoke sequences.
  */
-static int NextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED, int state,
+static int NextSDCallInsn(CompilationUnit* cu, CallInfo* info, int state,
                           const MethodReference& target_method, uint32_t, uintptr_t direct_code,
                           uintptr_t direct_method, InvokeType type) {
   Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
-  if (direct_code != 0 && direct_method != 0) {
+  if (info->string_init_offset != 0) {
+    RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
+    switch (state) {
+    case 0: {  // Grab target method* from thread pointer
+      cg->LoadRefDisp(cg->TargetPtrReg(kSelf), info->string_init_offset, arg0_ref, kNotVolatile);
+      break;
+    }
+    case 1:  // Grab the code from the method*
+      if (direct_code == 0) {
+        int32_t offset = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+            InstructionSetPointerSize(cu->instruction_set)).Int32Value();
+        cg->LoadWordDisp(arg0_ref, offset, cg->TargetPtrReg(kInvokeTgt));
+      }
+      break;
+    default:
+      return -1;
+    }
+  } else if (direct_code != 0 && direct_method != 0) {
     switch (state) {
       case 0:  // Get the current Method* [sets kArg0]
         if (direct_code != static_cast<uintptr_t>(-1)) {
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index e9e9161..e3e87ec 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -1187,7 +1187,6 @@
     case kMirOpRangeCheck:
     case kMirOpDivZeroCheck:
     case kMirOpCheck:
-    case kMirOpCheckPart2:
       // Ignore these known opcodes
       break;
     default:
@@ -1276,20 +1275,6 @@
       head_lir->u.m.def_mask = &kEncodeAll;
     }
 
-    if (opcode == kMirOpCheck) {
-      // Combine check and work halves of throwing instruction.
-      MIR* work_half = mir->meta.throw_insn;
-      mir->dalvikInsn = work_half->dalvikInsn;
-      mir->optimization_flags = work_half->optimization_flags;
-      mir->meta = work_half->meta;  // Whatever the work_half had, we need to copy it.
-      opcode = work_half->dalvikInsn.opcode;
-      SSARepresentation* ssa_rep = work_half->ssa_rep;
-      work_half->ssa_rep = mir->ssa_rep;
-      mir->ssa_rep = ssa_rep;
-      work_half->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpCheckPart2);
-      work_half->meta.throw_insn = mir;
-    }
-
     if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
       HandleExtendedMethodMIR(bb, mir);
       continue;
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 8f08a51..4fdc728 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -897,6 +897,10 @@
                                                             RegLocation arg0, RegLocation arg1,
                                                             RegLocation arg2,
                                                             bool safepoint_pc);
+    void CallRuntimeHelperRegLocationRegLocationRegLocationRegLocation(
+        QuickEntrypointEnum trampoline, RegLocation arg0, RegLocation arg1,
+        RegLocation arg2, RegLocation arg3, bool safepoint_pc);
+
     void GenInvoke(CallInfo* info);
     void GenInvokeNoInline(CallInfo* info);
     virtual NextCallInsn GetNextSDCallInsn() = 0;
@@ -937,7 +941,11 @@
 
     bool GenInlinedReferenceGetReferent(CallInfo* info);
     virtual bool GenInlinedCharAt(CallInfo* info);
+    bool GenInlinedStringGetCharsNoCheck(CallInfo* info);
     bool GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty);
+    bool GenInlinedStringFactoryNewStringFromBytes(CallInfo* info);
+    bool GenInlinedStringFactoryNewStringFromChars(CallInfo* info);
+    bool GenInlinedStringFactoryNewStringFromString(CallInfo* info);
     virtual bool GenInlinedReverseBits(CallInfo* info, OpSize size);
     bool GenInlinedReverseBytes(CallInfo* info, OpSize size);
     virtual bool GenInlinedAbsInt(CallInfo* info);
@@ -1459,26 +1467,6 @@
       return InexpensiveConstantInt(value);
     }
 
-    /**
-     * @brief Whether division by the given divisor can be converted to multiply by its reciprocal.
-     * @param divisor A constant divisor bits of float type.
-     * @return Returns true iff, x/divisor == x*(1.0f/divisor), for every float x.
-     */
-    bool CanDivideByReciprocalMultiplyFloat(int32_t divisor) {
-      // True, if float value significand bits are 0.
-      return ((divisor & 0x7fffff) == 0);
-    }
-
-    /**
-     * @brief Whether division by the given divisor can be converted to multiply by its reciprocal.
-     * @param divisor A constant divisor bits of double type.
-     * @return Returns true iff, x/divisor == x*(1.0/divisor), for every double x.
-     */
-    bool CanDivideByReciprocalMultiplyDouble(int64_t divisor) {
-      // True, if double value significand bits are 0.
-      return ((divisor & ((UINT64_C(1) << 52) - 1)) == 0);
-    }
-
     // May be optimized by targets.
     virtual void GenMonitorEnter(int opt_flags, RegLocation rl_src);
     virtual void GenMonitorExit(int opt_flags, RegLocation rl_src);
@@ -1692,10 +1680,8 @@
      * @param r_result the result register.
      * @param trampoline the helper to call in slow path.
      * @param imm the immediate passed to the helper.
-     * @param r_method the register with ArtMethod* if available, otherwise RegStorage::Invalid().
      */
-    void GenIfNullUseHelperImmMethod(
-        RegStorage r_result, QuickEntrypointEnum trampoline, int imm, RegStorage r_method);
+    void GenIfNullUseHelperImm(RegStorage r_result, QuickEntrypointEnum trampoline, int imm);
 
     /**
      * @brief Generate code to retrieve Class* for another type to be used by SGET/SPUT.
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 39eb117..7ca4382 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -403,7 +403,6 @@
     kMirOpRangeCheck,
     kMirOpDivZeroCheck,
     kMirOpCheck,
-    kMirOpCheckPart2,
     kMirOpSelect,
 };
 
@@ -575,7 +574,7 @@
   // (1 << kNullCheckElimination) |
   // (1 << kClassInitCheckElimination) |
   // (1 << kGlobalValueNumbering) |
-  (1 << kGvnDeadCodeElimination) |
+  // (1 << kGvnDeadCodeElimination) |
   // (1 << kLocalValueNumbering) |
   // (1 << kPromoteRegs) |
   // (1 << kTrackLiveTemps) |
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index eb33357..8467b71 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -409,7 +409,7 @@
   EXT_0F_ENCODING_MAP(Paddq,     0x66, 0xD4, REG_DEF0_USE0),
   EXT_0F_ENCODING_MAP(Psadbw,    0x66, 0xF6, REG_DEF0_USE0),
   EXT_0F_ENCODING_MAP(Addps,     0x00, 0x58, REG_DEF0_USE0),
-  EXT_0F_ENCODING_MAP(Addpd,     0xF2, 0x58, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Addpd,     0x66, 0x58, REG_DEF0_USE0),
   EXT_0F_ENCODING_MAP(Psubb,     0x66, 0xF8, REG_DEF0_USE0),
   EXT_0F_ENCODING_MAP(Psubw,     0x66, 0xF9, REG_DEF0_USE0),
   EXT_0F_ENCODING_MAP(Psubd,     0x66, 0xFA, REG_DEF0_USE0),
@@ -428,7 +428,7 @@
   { kX86PextrwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0  | REG_USE1, { 0x66, 0, 0x0F, 0xC5, 0x00, 0, 0, 1, false }, "PextwRRI", "!0r,!1r,!2d" },
   { kX86PextrdRRI, kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0  | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "PextdRRI", "!0r,!1r,!2d" },
   { kX86PextrbMRI, kMemRegImm, IS_QUAD_OP     | REG_USE02 | IS_STORE, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "PextrbMRI", "[!0r+!1d],!2r,!3d" },
-  { kX86PextrwMRI, kMemRegImm, IS_QUAD_OP     | REG_USE02 | IS_STORE, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "PextrwMRI", "[!0r+!1d],!2r,!3d" },
+  { kX86PextrwMRI, kMemRegImm, IS_QUAD_OP     | REG_USE02 | IS_STORE, { 0x66, 0, 0x0F, 0x3A, 0x15, 0, 0, 1, false }, "PextrwMRI", "[!0r+!1d],!2r,!3d" },
   { kX86PextrdMRI, kMemRegImm, IS_QUAD_OP     | REG_USE02 | IS_STORE, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "PextrdMRI", "[!0r+!1d],!2r,!3d" },
 
   { kX86PshuflwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0xF2, 0, 0x0F, 0x70, 0, 0, 0, 1, false }, "PshuflwRRI", "!0r,!1r,!2d" },
@@ -1627,13 +1627,13 @@
  * instruction.  In those cases we will try to substitute a new code
  * sequence or request that the trace be shortened and retried.
  */
-AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) {
+AssemblerStatus X86Mir2Lir::AssembleInstructions(LIR* first_lir_insn, CodeOffset start_addr) {
   UNUSED(start_addr);
   LIR *lir;
   AssemblerStatus res = kSuccess;  // Assume success
 
   const bool kVerbosePcFixup = false;
-  for (lir = first_lir_insn_; lir != nullptr; lir = NEXT_LIR(lir)) {
+  for (lir = first_lir_insn; lir != nullptr; lir = NEXT_LIR(lir)) {
     if (IsPseudoLirOp(lir->opcode)) {
       continue;
     }
@@ -2034,7 +2034,7 @@
    */
 
   while (true) {
-    AssemblerStatus res = AssembleInstructions(0);
+    AssemblerStatus res = AssembleInstructions(first_lir_insn_, 0);
     if (res == kSuccess) {
       break;
     } else {
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index e2364d8..2495757 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -19,6 +19,7 @@
 #include "codegen_x86.h"
 
 #include "base/logging.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
@@ -343,11 +344,20 @@
 int X86Mir2Lir::X86NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
                                   int state, const MethodReference& target_method,
                                   uint32_t,
-                                  uintptr_t direct_code, uintptr_t direct_method,
+                                  uintptr_t direct_code ATTRIBUTE_UNUSED, uintptr_t direct_method,
                                   InvokeType type) {
-  UNUSED(info, direct_code);
   X86Mir2Lir* cg = static_cast<X86Mir2Lir*>(cu->cg.get());
-  if (direct_method != 0) {
+  if (info->string_init_offset != 0) {
+    RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
+    switch (state) {
+    case 0: {  // Grab target method* from thread pointer
+      cg->NewLIR2(kX86Mov32RT, arg0_ref.GetReg(), info->string_init_offset);
+      break;
+    }
+    default:
+      return -1;
+    }
+  } else if (direct_method != 0) {
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
       if (direct_method != static_cast<uintptr_t>(-1)) {
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 72580a3..5a46520 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -432,7 +432,7 @@
 
   int AssignInsnOffsets();
   void AssignOffsets();
-  AssemblerStatus AssembleInstructions(CodeOffset start_addr);
+  AssemblerStatus AssembleInstructions(LIR* first_lir_insn, CodeOffset start_addr);
 
   size_t ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_index,
                      int32_t raw_base, int32_t displacement);
@@ -972,6 +972,9 @@
   static const X86EncodingMap EncodingMap[kX86Last];
 
   friend std::ostream& operator<<(std::ostream& os, const X86OpCode& rhs);
+  friend class QuickAssembleX86Test;
+  friend class QuickAssembleX86MacroTest;
+  friend class QuickAssembleX86LowLevelTest;
 
   DISALLOW_COPY_AND_ASSIGN(X86Mir2Lir);
 };
diff --git a/compiler/dex/quick/x86/quick_assemble_x86_test.cc b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
new file mode 100644
index 0000000..f58f206
--- /dev/null
+++ b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
@@ -0,0 +1,270 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dex/quick/quick_compiler.h"
+#include "dex/pass_manager.h"
+#include "dex/verification_results.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
+#include "runtime/dex_file.h"
+#include "driver/compiler_options.h"
+#include "driver/compiler_driver.h"
+#include "codegen_x86.h"
+#include "gtest/gtest.h"
+#include "utils/assembler_test_base.h"
+
+namespace art {
+
+class QuickAssembleX86TestBase : public testing::Test {
+ protected:
+  X86Mir2Lir* Prepare(InstructionSet target) {
+    isa_ = target;
+    pool_.reset(new ArenaPool());
+    compiler_options_.reset(new CompilerOptions(
+        CompilerOptions::kDefaultCompilerFilter,
+        CompilerOptions::kDefaultHugeMethodThreshold,
+        CompilerOptions::kDefaultLargeMethodThreshold,
+        CompilerOptions::kDefaultSmallMethodThreshold,
+        CompilerOptions::kDefaultTinyMethodThreshold,
+        CompilerOptions::kDefaultNumDexMethodsThreshold,
+        false,
+        CompilerOptions::kDefaultTopKProfileThreshold,
+        false,
+        false,
+        false,
+        false,
+        false,
+        false,
+        false,
+        nullptr,
+        new PassManagerOptions(),
+        nullptr,
+        false));
+    verification_results_.reset(new VerificationResults(compiler_options_.get()));
+    method_inliner_map_.reset(new DexFileToMethodInlinerMap());
+    compiler_driver_.reset(new CompilerDriver(
+        compiler_options_.get(),
+        verification_results_.get(),
+        method_inliner_map_.get(),
+        Compiler::kQuick,
+        isa_,
+        nullptr,
+        false,
+        nullptr,
+        nullptr,
+        nullptr,
+        0,
+        false,
+        false,
+        "",
+        0,
+        -1,
+        ""));
+    cu_.reset(new CompilationUnit(pool_.get(), isa_, compiler_driver_.get(), nullptr));
+    DexFile::CodeItem* code_item = static_cast<DexFile::CodeItem*>(
+        cu_->arena.Alloc(sizeof(DexFile::CodeItem), kArenaAllocMisc));
+    memset(code_item, 0, sizeof(DexFile::CodeItem));
+    cu_->mir_graph.reset(new MIRGraph(cu_.get(), &cu_->arena));
+    cu_->mir_graph->current_code_item_ = code_item;
+    cu_->cg.reset(QuickCompiler::GetCodeGenerator(cu_.get(), nullptr));
+
+    test_helper_.reset(new AssemblerTestInfrastructure(
+        isa_ == kX86 ? "x86" : "x86_64",
+        "as",
+        isa_ == kX86 ? " --32" : "",
+        "objdump",
+        " -h",
+        "objdump",
+        isa_ == kX86 ?
+            " -D -bbinary -mi386 --no-show-raw-insn" :
+            " -D -bbinary -mi386:x86-64 -Mx86-64,addr64,data32 --no-show-raw-insn",
+        nullptr));
+
+    X86Mir2Lir* m2l = static_cast<X86Mir2Lir*>(cu_->cg.get());
+    m2l->CompilerInitializeRegAlloc();
+    return m2l;
+  }
+
+  void Release() {
+    cu_.reset();
+    compiler_driver_.reset();
+    method_inliner_map_.reset();
+    verification_results_.reset();
+    compiler_options_.reset();
+    pool_.reset();
+
+    test_helper_.reset();
+  }
+
+  void TearDown() OVERRIDE {
+    Release();
+  }
+
+  bool CheckTools(InstructionSet target) {
+    Prepare(target);
+    bool result = test_helper_->CheckTools();
+    Release();
+    return result;
+  }
+
+  std::unique_ptr<CompilationUnit> cu_;
+  std::unique_ptr<AssemblerTestInfrastructure> test_helper_;
+
+ private:
+  InstructionSet isa_;
+  std::unique_ptr<ArenaPool> pool_;
+  std::unique_ptr<CompilerOptions> compiler_options_;
+  std::unique_ptr<VerificationResults> verification_results_;
+  std::unique_ptr<DexFileToMethodInlinerMap> method_inliner_map_;
+  std::unique_ptr<CompilerDriver> compiler_driver_;
+};
+
+class QuickAssembleX86LowLevelTest : public QuickAssembleX86TestBase {
+ protected:
+  void Test(InstructionSet target, std::string test_name, std::string gcc_asm,
+            int opcode, int op0 = 0, int op1 = 0, int op2 = 0, int op3 = 0, int op4 = 0) {
+    X86Mir2Lir* m2l = Prepare(target);
+
+    LIR lir;
+    memset(&lir, 0, sizeof(LIR));
+    lir.opcode = opcode;
+    lir.operands[0] = op0;
+    lir.operands[1] = op1;
+    lir.operands[2] = op2;
+    lir.operands[3] = op3;
+    lir.operands[4] = op4;
+    lir.flags.size = m2l->GetInsnSize(&lir);
+
+    AssemblerStatus status = m2l->AssembleInstructions(&lir, 0);
+    // We don't expect a retry.
+    ASSERT_EQ(status, AssemblerStatus::kSuccess);
+
+    // Need a "base" std::vector.
+    std::vector<uint8_t> buffer(m2l->code_buffer_.begin(), m2l->code_buffer_.end());
+    test_helper_->Driver(buffer, gcc_asm, test_name);
+
+    Release();
+  }
+};
+
+TEST_F(QuickAssembleX86LowLevelTest, Addpd) {
+  Test(kX86, "Addpd", "addpd %xmm1, %xmm0\n", kX86AddpdRR,
+       RegStorage::Solo128(0).GetReg(), RegStorage::Solo128(1).GetReg());
+  Test(kX86_64, "Addpd", "addpd %xmm1, %xmm0\n", kX86AddpdRR,
+       RegStorage::Solo128(0).GetReg(), RegStorage::Solo128(1).GetReg());
+}
+
+TEST_F(QuickAssembleX86LowLevelTest, Subpd) {
+  Test(kX86, "Subpd", "subpd %xmm1, %xmm0\n", kX86SubpdRR,
+       RegStorage::Solo128(0).GetReg(), RegStorage::Solo128(1).GetReg());
+  Test(kX86_64, "Subpd", "subpd %xmm1, %xmm0\n", kX86SubpdRR,
+       RegStorage::Solo128(0).GetReg(), RegStorage::Solo128(1).GetReg());
+}
+
+TEST_F(QuickAssembleX86LowLevelTest, Mulpd) {
+  Test(kX86, "Mulpd", "mulpd %xmm1, %xmm0\n", kX86MulpdRR,
+       RegStorage::Solo128(0).GetReg(), RegStorage::Solo128(1).GetReg());
+  Test(kX86_64, "Mulpd", "mulpd %xmm1, %xmm0\n", kX86MulpdRR,
+       RegStorage::Solo128(0).GetReg(), RegStorage::Solo128(1).GetReg());
+}
+
+TEST_F(QuickAssembleX86LowLevelTest, Pextrw) {
+  Test(kX86, "Pextrw", "pextrw $7, %xmm3, 8(%eax)\n", kX86PextrwMRI,
+       RegStorage::Solo32(r0).GetReg(), 8, RegStorage::Solo128(3).GetReg(), 7);
+  Test(kX86_64, "Pextrw", "pextrw $7, %xmm8, 8(%r10)\n", kX86PextrwMRI,
+       RegStorage::Solo64(r10q).GetReg(), 8, RegStorage::Solo128(8).GetReg(), 7);
+}
+
+class QuickAssembleX86MacroTest : public QuickAssembleX86TestBase {
+ protected:
+  typedef void (X86Mir2Lir::*AsmFn)(MIR*);
+
+  void TestVectorFn(InstructionSet target,
+                    Instruction::Code opcode,
+                    AsmFn f,
+                    std::string inst_string) {
+    X86Mir2Lir *m2l = Prepare(target);
+
+    // Create a vector MIR.
+    MIR* mir = cu_->mir_graph->NewMIR();
+    mir->dalvikInsn.opcode = opcode;
+    mir->dalvikInsn.vA = 0;  // Destination and source.
+    mir->dalvikInsn.vB = 1;  // Source.
+    int vector_size = 128;
+    int vector_type = kDouble;
+    mir->dalvikInsn.vC = (vector_type << 16) | vector_size;  // Type size.
+    (m2l->*f)(mir);
+    m2l->AssembleLIR();
+
+    std::string gcc_asm = inst_string + " %xmm1, %xmm0\n";
+    // Need a "base" std::vector.
+    std::vector<uint8_t> buffer(m2l->code_buffer_.begin(), m2l->code_buffer_.end());
+    test_helper_->Driver(buffer, gcc_asm, inst_string);
+
+    Release();
+  }
+
+  // Tests are member functions as many of the assembler functions are protected or private,
+  // and it would be inelegant to define ART_FRIEND_TEST for all the tests.
+
+  void TestAddpd() {
+    TestVectorFn(kX86,
+                 static_cast<Instruction::Code>(kMirOpPackedAddition),
+                 &X86Mir2Lir::GenAddVector,
+                 "addpd");
+    TestVectorFn(kX86_64,
+                 static_cast<Instruction::Code>(kMirOpPackedAddition),
+                 &X86Mir2Lir::GenAddVector,
+                 "addpd");
+  }
+
+  void TestSubpd() {
+    TestVectorFn(kX86,
+                 static_cast<Instruction::Code>(kMirOpPackedSubtract),
+                 &X86Mir2Lir::GenSubtractVector,
+                 "subpd");
+    TestVectorFn(kX86_64,
+                 static_cast<Instruction::Code>(kMirOpPackedSubtract),
+                 &X86Mir2Lir::GenSubtractVector,
+                 "subpd");
+  }
+
+  void TestMulpd() {
+    TestVectorFn(kX86,
+                 static_cast<Instruction::Code>(kMirOpPackedMultiply),
+                 &X86Mir2Lir::GenMultiplyVector,
+                 "mulpd");
+    TestVectorFn(kX86_64,
+                 static_cast<Instruction::Code>(kMirOpPackedMultiply),
+                 &X86Mir2Lir::GenMultiplyVector,
+                 "mulpd");
+  }
+};
+
+TEST_F(QuickAssembleX86MacroTest, CheckTools) {
+  ASSERT_TRUE(CheckTools(kX86)) << "x86 tools not found.";
+  ASSERT_TRUE(CheckTools(kX86_64)) << "x86_64 tools not found.";
+}
+
+#define DECLARE_TEST(name)             \
+  TEST_F(QuickAssembleX86MacroTest, name) { \
+    Test ## name();                    \
+  }
+
+DECLARE_TEST(Addpd)
+DECLARE_TEST(Subpd)
+DECLARE_TEST(Mulpd)
+
+}  // namespace art
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index b460379..2f211da 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -1302,10 +1302,6 @@
   int value_offset = mirror::String::ValueOffset().Int32Value();
   // Location of count within the String object.
   int count_offset = mirror::String::CountOffset().Int32Value();
-  // Starting offset within data array.
-  int offset_offset = mirror::String::OffsetOffset().Int32Value();
-  // Start of char data with array_.
-  int data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
 
   // Compute the number of words to search in to rCX.
   Load32Disp(rs_rDX, count_offset, rs_rCX);
@@ -1388,15 +1384,13 @@
 
   // Load the address of the string into EDI.
   // In case of start index we have to add the address to existing value in EDI.
-  // The string starts at VALUE(String) + 2 * OFFSET(String) + DATA_OFFSET.
   if (zero_based || (!zero_based && rl_start.is_const && start_value == 0)) {
-    Load32Disp(rs_rDX, offset_offset, rs_rDI);
+    OpRegRegImm(kOpAdd, rs_rDI, rs_rDX, value_offset);
   } else {
-    OpRegMem(kOpAdd, rs_rDI, rs_rDX, offset_offset);
+    OpRegImm(kOpLsl, rs_rDI, 1);
+    OpRegReg(kOpAdd, rs_rDI, rs_rDX);
+    OpRegImm(kOpAdd, rs_rDI, value_offset);
   }
-  OpRegImm(kOpLsl, rs_rDI, 1);
-  OpRegMem(kOpAdd, rs_rDI, rs_rDX, value_offset);
-  OpRegImm(kOpAdd, rs_rDI, data_offset);
 
   // EDI now contains the start of the string to be searched.
   // We are all prepared to do the search for the character.
@@ -2423,24 +2417,15 @@
   int value_offset = mirror::String::ValueOffset().Int32Value();
   // Location of count
   int count_offset = mirror::String::CountOffset().Int32Value();
-  // Starting offset within data array
-  int offset_offset = mirror::String::OffsetOffset().Int32Value();
-  // Start of char data with array_
-  int data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
 
   RegLocation rl_obj = info->args[0];
   RegLocation rl_idx = info->args[1];
   rl_obj = LoadValue(rl_obj, kRefReg);
-  // X86 wants to avoid putting a constant index into a register.
-  if (!rl_idx.is_const) {
-    rl_idx = LoadValue(rl_idx, kCoreReg);
-  }
+  rl_idx = LoadValue(rl_idx, kCoreReg);
   RegStorage reg_max;
   GenNullCheck(rl_obj.reg, info->opt_flags);
   bool range_check = (!(info->opt_flags & MIR_IGNORE_RANGE_CHECK));
   LIR* range_check_branch = nullptr;
-  RegStorage reg_off;
-  RegStorage reg_ptr;
   if (range_check) {
     // On x86, we can compare to memory directly
     // Set up a launch pad to allow retry in case of bounds violation */
@@ -2456,24 +2441,11 @@
       range_check_branch = OpCondBranch(kCondUge, nullptr);
     }
   }
-  reg_off = AllocTemp();
-  reg_ptr = AllocTempRef();
-  Load32Disp(rl_obj.reg, offset_offset, reg_off);
-  LoadRefDisp(rl_obj.reg, value_offset, reg_ptr, kNotVolatile);
-  if (rl_idx.is_const) {
-    OpRegImm(kOpAdd, reg_off, mir_graph_->ConstantValue(rl_idx.orig_sreg));
-  } else {
-    OpRegReg(kOpAdd, reg_off, rl_idx.reg);
-  }
-  FreeTemp(rl_obj.reg);
-  if (rl_idx.location == kLocPhysReg) {
-    FreeTemp(rl_idx.reg);
-  }
   RegLocation rl_dest = InlineTarget(info);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  LoadBaseIndexedDisp(reg_ptr, reg_off, 1, data_offset, rl_result.reg, kUnsignedHalf);
-  FreeTemp(reg_off);
-  FreeTemp(reg_ptr);
+  LoadBaseIndexedDisp(rl_obj.reg, rl_idx.reg, 1, value_offset, rl_result.reg, kUnsignedHalf);
+  FreeTemp(rl_idx.reg);
+  FreeTemp(rl_obj.reg);
   StoreValue(rl_dest, rl_result);
   if (range_check) {
     DCHECK(range_check_branch != nullptr);
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index 7eba515..e788261 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -64,6 +64,9 @@
   if (method_verifier->HasCheckCasts()) {
     verified_method->GenerateSafeCastSet(method_verifier);
   }
+
+  verified_method->SetStringInitPcRegMap(method_verifier->GetStringInitPcRegMap());
+
   return verified_method.release();
 }
 
diff --git a/compiler/dex/verified_method.h b/compiler/dex/verified_method.h
index ad07639..242e3df 100644
--- a/compiler/dex/verified_method.h
+++ b/compiler/dex/verified_method.h
@@ -75,6 +75,13 @@
     return has_verification_failures_;
   }
 
+  void SetStringInitPcRegMap(SafeMap<uint32_t, std::set<uint32_t>>& string_init_pc_reg_map) {
+    string_init_pc_reg_map_ = string_init_pc_reg_map;
+  }
+  const SafeMap<uint32_t, std::set<uint32_t>>& GetStringInitPcRegMap() const {
+    return string_init_pc_reg_map_;
+  }
+
  private:
   VerifiedMethod() = default;
 
@@ -114,6 +121,10 @@
   SafeCastSet safe_cast_set_;
 
   bool has_verification_failures_;
+
+  // Copy of mapping generated by verifier of dex PCs of string init invocations
+  // to the set of other registers that the receiver has been copied into.
+  SafeMap<uint32_t, std::set<uint32_t>> string_init_pc_reg_map_;
 };
 
 }  // namespace art
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index bad8335..e54cbf6 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -127,34 +127,67 @@
   return std::make_pair(fast_get, fast_put);
 }
 
-inline std::pair<bool, bool> CompilerDriver::IsFastStaticField(
-    mirror::DexCache* dex_cache, mirror::Class* referrer_class,
-    ArtField* resolved_field, uint16_t field_idx, uint32_t* storage_index) {
-  DCHECK(resolved_field->IsStatic());
+template <typename ArtMember>
+inline bool CompilerDriver::CanAccessResolvedMember(mirror::Class* referrer_class ATTRIBUTE_UNUSED,
+                                                    mirror::Class* access_to ATTRIBUTE_UNUSED,
+                                                    ArtMember* member ATTRIBUTE_UNUSED,
+                                                    mirror::DexCache* dex_cache ATTRIBUTE_UNUSED,
+                                                    uint32_t field_idx ATTRIBUTE_UNUSED) {
+  // Not defined for ArtMember values other than ArtField or mirror::ArtMethod.
+  UNREACHABLE();
+}
+
+template <>
+inline bool CompilerDriver::CanAccessResolvedMember<ArtField>(mirror::Class* referrer_class,
+                                                              mirror::Class* access_to,
+                                                              ArtField* field,
+                                                              mirror::DexCache* dex_cache,
+                                                              uint32_t field_idx) {
+  return referrer_class->CanAccessResolvedField(access_to, field, dex_cache, field_idx);
+}
+
+template <>
+inline bool CompilerDriver::CanAccessResolvedMember<mirror::ArtMethod>(
+    mirror::Class* referrer_class,
+    mirror::Class* access_to,
+    mirror::ArtMethod* method,
+    mirror::DexCache* dex_cache,
+    uint32_t field_idx) {
+  return referrer_class->CanAccessResolvedMethod(access_to, method, dex_cache, field_idx);
+}
+
+template <typename ArtMember>
+inline std::pair<bool, bool> CompilerDriver::IsClassOfStaticMemberAvailableToReferrer(
+    mirror::DexCache* dex_cache,
+    mirror::Class* referrer_class,
+    ArtMember* resolved_member,
+    uint16_t member_idx,
+    uint32_t* storage_index) {
+  DCHECK(resolved_member->IsStatic());
   if (LIKELY(referrer_class != nullptr)) {
-    mirror::Class* fields_class = resolved_field->GetDeclaringClass();
-    if (fields_class == referrer_class) {
-      *storage_index = fields_class->GetDexTypeIndex();
+    mirror::Class* members_class = resolved_member->GetDeclaringClass();
+    if (members_class == referrer_class) {
+      *storage_index = members_class->GetDexTypeIndex();
       return std::make_pair(true, true);
     }
-    if (referrer_class->CanAccessResolvedField(fields_class, resolved_field,
-                                               dex_cache, field_idx)) {
-      // We have the resolved field, we must make it into a index for the referrer
+    if (CanAccessResolvedMember<ArtMember>(
+            referrer_class, members_class, resolved_member, dex_cache, member_idx)) {
+      // We have the resolved member, we must make it into a index for the referrer
       // in its static storage (which may fail if it doesn't have a slot for it)
       // TODO: for images we can elide the static storage base null check
       // if we know there's a non-null entry in the image
       const DexFile* dex_file = dex_cache->GetDexFile();
       uint32_t storage_idx = DexFile::kDexNoIndex;
-      if (LIKELY(fields_class->GetDexCache() == dex_cache)) {
-        // common case where the dex cache of both the referrer and the field are the same,
+      if (LIKELY(members_class->GetDexCache() == dex_cache)) {
+        // common case where the dex cache of both the referrer and the member are the same,
         // no need to search the dex file
-        storage_idx = fields_class->GetDexTypeIndex();
+        storage_idx = members_class->GetDexTypeIndex();
       } else {
-        // Search dex file for localized ssb index, may fail if field's class is a parent
+        // Search dex file for localized ssb index, may fail if member's class is a parent
         // of the class mentioned in the dex file and there is no dex cache entry.
         std::string temp;
         const DexFile::StringId* string_id =
-            dex_file->FindStringId(resolved_field->GetDeclaringClass()->GetDescriptor(&temp));
+            dex_file->FindStringId(resolved_member->GetDeclaringClass()->GetDescriptor(&temp));
         if (string_id != nullptr) {
           const DexFile::TypeId* type_id =
              dex_file->FindTypeId(dex_file->GetIndexForStringId(*string_id));
@@ -166,7 +199,7 @@
       }
       if (storage_idx != DexFile::kDexNoIndex) {
         *storage_index = storage_idx;
-        return std::make_pair(true, !resolved_field->IsFinal());
+        return std::make_pair(true, !resolved_member->IsFinal());
       }
     }
   }
@@ -175,6 +208,23 @@
   return std::make_pair(false, false);
 }
 
+inline std::pair<bool, bool> CompilerDriver::IsFastStaticField(
+    mirror::DexCache* dex_cache, mirror::Class* referrer_class,
+    ArtField* resolved_field, uint16_t field_idx, uint32_t* storage_index) {
+  return IsClassOfStaticMemberAvailableToReferrer(
+      dex_cache, referrer_class, resolved_field, field_idx, storage_index);
+}
+
+inline bool CompilerDriver::IsClassOfStaticMethodAvailableToReferrer(
+    mirror::DexCache* dex_cache, mirror::Class* referrer_class,
+    mirror::ArtMethod* resolved_method, uint16_t method_idx, uint32_t* storage_index) {
+  std::pair<bool, bool> result = IsClassOfStaticMemberAvailableToReferrer(
+      dex_cache, referrer_class, resolved_method, method_idx, storage_index);
+  // Only the first member of `result` is meaningful, as there is no
+  // "write access" to a method.
+  return result.first;
+}
+
 inline bool CompilerDriver::IsStaticFieldInReferrerClass(mirror::Class* referrer_class,
                                                          ArtField* resolved_field) {
   DCHECK(resolved_field->IsStatic());
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index c858326..47288b5 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -40,6 +40,7 @@
 #include "dex/verification_results.h"
 #include "dex/verified_method.h"
 #include "dex/quick/dex_file_method_inliner.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "driver/compiler_options.h"
 #include "elf_writer_quick.h"
 #include "jni_internal.h"
@@ -2485,4 +2486,16 @@
   return oss.str();
 }
 
+bool CompilerDriver::IsStringTypeIndex(uint16_t type_index, const DexFile* dex_file) {
+  const char* type = dex_file->GetTypeDescriptor(dex_file->GetTypeId(type_index));
+  return strcmp(type, "Ljava/lang/String;") == 0;
+}
+
+bool CompilerDriver::IsStringInit(uint32_t method_index, const DexFile* dex_file, int32_t* offset) {
+  DexFileMethodInliner* inliner = GetMethodInlinerMap()->GetMethodInliner(dex_file);
+  size_t pointer_size = InstructionSetPointerSize(GetInstructionSet());
+  *offset = inliner->GetOffsetForStringInit(method_index, pointer_size);
+  return inliner->IsStringInitMethodIndex(method_index);
+}
+
 }  // namespace art
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 03c5c5c..2b0985a 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -281,6 +281,18 @@
       ArtField* resolved_field, uint16_t field_idx, uint32_t* storage_index)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Return whether the declaring class of `resolved_method` is
+  // available to `referrer_class`. If this is true, compute the type
+  // index of the declaring class in the referrer's dex file and
+  // return it through the out argument `storage_index`; otherwise
+  // return DexFile::kDexNoIndex through `storage_index`.
+  bool IsClassOfStaticMethodAvailableToReferrer(mirror::DexCache* dex_cache,
+                                                mirror::Class* referrer_class,
+                                                mirror::ArtMethod* resolved_method,
+                                                uint16_t method_idx,
+                                                uint32_t* storage_index)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Is static field's in referrer's class?
   bool IsStaticFieldInReferrerClass(mirror::Class* referrer_class, ArtField* resolved_field)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -454,11 +466,41 @@
   // Get memory usage during compilation.
   std::string GetMemoryUsageString(bool extended) const;
 
+  bool IsStringTypeIndex(uint16_t type_index, const DexFile* dex_file);
+  bool IsStringInit(uint32_t method_index, const DexFile* dex_file, int32_t* offset);
+
   void SetHadHardVerifierFailure() {
     had_hard_verifier_failure_ = true;
   }
 
  private:
+  // Return whether the declaring class of `resolved_member` is
+  // available to `referrer_class` for read or write access using two
+  // Boolean values returned as a pair. If is true at least for read
+  // access, compute the type index of the declaring class in the
+  // referrer's dex file and return it through the out argument
+  // `storage_index`; otherwise return DexFile::kDexNoIndex through
+  // `storage_index`.
+  template <typename ArtMember>
+  std::pair<bool, bool> IsClassOfStaticMemberAvailableToReferrer(mirror::DexCache* dex_cache,
+                                                                 mirror::Class* referrer_class,
+                                                                 ArtMember* resolved_member,
+                                                                 uint16_t member_idx,
+                                                                 uint32_t* storage_index)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Can `referrer_class` access the resolved `member`?
+  // Dispatch call to mirror::Class::CanAccessResolvedField or
+  // mirror::Class::CanAccessResolvedMember depending on the value of
+  // ArtMember.
+  template <typename ArtMember>
+  static bool CanAccessResolvedMember(mirror::Class* referrer_class,
+                                      mirror::Class* access_to,
+                                      ArtMember* member,
+                                      mirror::DexCache* dex_cache,
+                                      uint32_t field_idx)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // These flags are internal to CompilerDriver for collecting INVOKE resolution statistics.
   // The only external contract is that unresolved method has flags 0 and resolved non-0.
   enum {
diff --git a/compiler/dwarf/dwarf_test.h b/compiler/dwarf/dwarf_test.h
index 99b8e79..230ebe3 100644
--- a/compiler/dwarf/dwarf_test.h
+++ b/compiler/dwarf/dwarf_test.h
@@ -57,44 +57,41 @@
 
   // Pretty-print the generated DWARF data using objdump.
   template<typename ElfTypes>
-  std::vector<std::string> Objdump(bool is64bit, const char* args) {
+  std::vector<std::string> Objdump(const char* args) {
     // Write simple elf file with just the DWARF sections.
+    InstructionSet isa = (sizeof(typename ElfTypes::Addr) == 8) ? kX86_64 : kX86;
     class NoCode : public CodeOutput {
-      virtual void SetCodeOffset(size_t) { }
-      virtual bool Write(OutputStream*) { return true; }
-    } code;
-    ScratchFile file;
-    InstructionSet isa = is64bit ? kX86_64 : kX86;
-    ElfBuilder<ElfTypes> builder(
-        &code, file.GetFile(), isa, 0, 0, 0, 0, 0, 0, false, false);
-    typedef ElfRawSectionBuilder<ElfTypes> Section;
-    Section debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    Section debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    Section debug_str(".debug_str", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    Section debug_line(".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    Section eh_frame(".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0);
+      bool Write(OutputStream*) OVERRIDE { return true; }  // NOLINT
+    } no_code;
+    ElfBuilder<ElfTypes> builder(isa, 0, &no_code, 0, &no_code, 0);
+    typedef typename ElfBuilder<ElfTypes>::RawSection RawSection;
+    RawSection debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+    RawSection debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+    RawSection debug_str(".debug_str", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+    RawSection debug_line(".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+    RawSection eh_frame(".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0);
     if (!debug_info_data_.empty()) {
       debug_info.SetBuffer(debug_info_data_);
-      builder.RegisterRawSection(&debug_info);
+      builder.RegisterSection(&debug_info);
     }
     if (!debug_abbrev_data_.empty()) {
       debug_abbrev.SetBuffer(debug_abbrev_data_);
-      builder.RegisterRawSection(&debug_abbrev);
+      builder.RegisterSection(&debug_abbrev);
     }
     if (!debug_str_data_.empty()) {
       debug_str.SetBuffer(debug_str_data_);
-      builder.RegisterRawSection(&debug_str);
+      builder.RegisterSection(&debug_str);
     }
     if (!debug_line_data_.empty()) {
       debug_line.SetBuffer(debug_line_data_);
-      builder.RegisterRawSection(&debug_line);
+      builder.RegisterSection(&debug_line);
     }
     if (!eh_frame_data_.empty()) {
       eh_frame.SetBuffer(eh_frame_data_);
-      builder.RegisterRawSection(&eh_frame);
+      builder.RegisterSection(&eh_frame);
     }
-    builder.Init();
-    builder.Write();
+    ScratchFile file;
+    builder.Write(file.GetFile());
 
     // Read the elf file back using objdump.
     std::vector<std::string> lines;
@@ -123,9 +120,9 @@
 
   std::vector<std::string> Objdump(bool is64bit, const char* args) {
     if (is64bit) {
-      return Objdump<ElfTypes64>(is64bit, args);
+      return Objdump<ElfTypes64>(args);
     } else {
-      return Objdump<ElfTypes32>(is64bit, args);
+      return Objdump<ElfTypes32>(args);
     }
   }
 
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index 32c8cce..63d3a0d 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,535 +17,32 @@
 #ifndef ART_COMPILER_ELF_BUILDER_H_
 #define ART_COMPILER_ELF_BUILDER_H_
 
+#include <vector>
+
 #include "arch/instruction_set.h"
-#include "base/stl_util.h"
-#include "base/value_object.h"
+#include "base/unix_file/fd_file.h"
 #include "buffered_output_stream.h"
 #include "elf_utils.h"
 #include "file_output_stream.h"
 
 namespace art {
 
-template <typename ElfTypes>
-class ElfSectionBuilder : public ValueObject {
- public:
-  using Elf_Word = typename ElfTypes::Word;
-  using Elf_Shdr = typename ElfTypes::Shdr;
-
-  ElfSectionBuilder(const std::string& sec_name, Elf_Word type, Elf_Word flags,
-                    const ElfSectionBuilder<ElfTypes> *link, Elf_Word info,
-                    Elf_Word align, Elf_Word entsize)
-      : section_index_(0), name_(sec_name), link_(link) {
-    memset(&section_, 0, sizeof(section_));
-    section_.sh_type = type;
-    section_.sh_flags = flags;
-    section_.sh_info = info;
-    section_.sh_addralign = align;
-    section_.sh_entsize = entsize;
-  }
-  ElfSectionBuilder(const ElfSectionBuilder&) = default;
-
-  ~ElfSectionBuilder() {}
-
-  Elf_Word GetLink() const {
-    return (link_ != nullptr) ? link_->section_index_ : 0;
-  }
-
-  const Elf_Shdr* GetSection() const {
-    return &section_;
-  }
-
-  Elf_Shdr* GetSection() {
-    return &section_;
-  }
-
-  Elf_Word GetSectionIndex() const {
-    return section_index_;
-  }
-
-  void SetSectionIndex(Elf_Word section_index) {
-    section_index_ = section_index;
-  }
-
-  const std::string& GetName() const {
-    return name_;
-  }
-
- private:
-  Elf_Shdr section_;
-  Elf_Word section_index_;
-  const std::string name_;
-  const ElfSectionBuilder* const link_;
-};
-
-template <typename ElfTypes>
-class ElfDynamicBuilder FINAL : public ElfSectionBuilder<ElfTypes> {
- public:
-  using Elf_Word = typename ElfTypes::Word;
-  using Elf_Sword = typename ElfTypes::Sword;
-  using Elf_Shdr = typename ElfTypes::Shdr;
-  using Elf_Dyn = typename ElfTypes::Dyn;
-
-  void AddDynamicTag(Elf_Sword tag, Elf_Word d_un) {
-    if (tag == DT_NULL) {
-      return;
-    }
-    dynamics_.push_back({nullptr, tag, d_un});
-  }
-
-  void AddDynamicTag(Elf_Sword tag, Elf_Word d_un,
-                     const ElfSectionBuilder<ElfTypes>* section) {
-    if (tag == DT_NULL) {
-      return;
-    }
-    dynamics_.push_back({section, tag, d_un});
-  }
-
-  ElfDynamicBuilder(const std::string& sec_name,
-                    ElfSectionBuilder<ElfTypes> *link)
-  : ElfSectionBuilder<ElfTypes>(sec_name, SHT_DYNAMIC, SHF_ALLOC | SHF_ALLOC,
-                                link, 0, kPageSize, sizeof(Elf_Dyn)) {}
-  ~ElfDynamicBuilder() {}
-
-  Elf_Word GetSize() const {
-    // Add 1 for the DT_NULL, 1 for DT_STRSZ, and 1 for DT_SONAME. All of
-    // these must be added when we actually put the file together because
-    // their values are very dependent on state.
-    return dynamics_.size() + 3;
-  }
-
-  // Create the actual dynamic vector. strsz should be the size of the .dynstr
-  // table and soname_off should be the offset of the soname in .dynstr.
-  // Since niether can be found prior to final layout we will wait until here
-  // to add them.
-  std::vector<Elf_Dyn> GetDynamics(Elf_Word strsz, Elf_Word soname) const {
-    std::vector<Elf_Dyn> ret;
-    for (auto it = dynamics_.cbegin(); it != dynamics_.cend(); ++it) {
-      if (it->section_ != nullptr) {
-        // We are adding an address relative to a section.
-        ret.push_back(
-            {it->tag_, {it->off_ + it->section_->GetSection()->sh_addr}});
-      } else {
-        ret.push_back({it->tag_, {it->off_}});
-      }
-    }
-    ret.push_back({DT_STRSZ, {strsz}});
-    ret.push_back({DT_SONAME, {soname}});
-    ret.push_back({DT_NULL, {0}});
-    return ret;
-  }
-
- private:
-  struct ElfDynamicState {
-    const ElfSectionBuilder<ElfTypes>* section_;
-    Elf_Sword tag_;
-    Elf_Word off_;
-  };
-  std::vector<ElfDynamicState> dynamics_;
-};
-
-template <typename ElfTypes>
-class ElfRawSectionBuilder FINAL : public ElfSectionBuilder<ElfTypes> {
- public:
-  using Elf_Word = typename ElfTypes::Word;
-
-  ElfRawSectionBuilder(const std::string& sec_name, Elf_Word type, Elf_Word flags,
-                       const ElfSectionBuilder<ElfTypes>* link, Elf_Word info,
-                       Elf_Word align, Elf_Word entsize)
-    : ElfSectionBuilder<ElfTypes>(sec_name, type, flags, link, info, align, entsize) {
-  }
-  ElfRawSectionBuilder(const ElfRawSectionBuilder&) = default;
-
-  ~ElfRawSectionBuilder() {}
-
-  std::vector<uint8_t>* GetBuffer() {
-    return &buf_;
-  }
-
-  void SetBuffer(const std::vector<uint8_t>& buf) {
-    buf_ = buf;
-  }
-
- private:
-  std::vector<uint8_t> buf_;
-};
-
-template <typename ElfTypes>
-class ElfOatSectionBuilder FINAL : public ElfSectionBuilder<ElfTypes> {
- public:
-  using Elf_Word = typename ElfTypes::Word;
-
-  ElfOatSectionBuilder(const std::string& sec_name, Elf_Word size, Elf_Word offset,
-                       Elf_Word type, Elf_Word flags)
-    : ElfSectionBuilder<ElfTypes>(sec_name, type, flags, nullptr, 0, kPageSize, 0),
-      offset_(offset), size_(size) {
-  }
-
-  ~ElfOatSectionBuilder() {}
-
-  Elf_Word GetOffset() const {
-    return offset_;
-  }
-
-  Elf_Word GetSize() const {
-    return size_;
-  }
-
- private:
-  // Offset of the content within the file.
-  Elf_Word offset_;
-  // Size of the content within the file.
-  Elf_Word size_;
-};
-
-static inline constexpr uint8_t MakeStInfo(uint8_t binding, uint8_t type) {
-  return ((binding) << 4) + ((type) & 0xf);
-}
-
-// from bionic
-static inline unsigned elfhash(const char *_name) {
-  const unsigned char *name = (const unsigned char *) _name;
-  unsigned h = 0, g;
-
-  while (*name) {
-    h = (h << 4) + *name++;
-    g = h & 0xf0000000;
-    h ^= g;
-    h ^= g >> 24;
-  }
-  return h;
-}
-
-template <typename ElfTypes>
-class ElfSymtabBuilder FINAL : public ElfSectionBuilder<ElfTypes> {
- public:
-  using Elf_Addr = typename ElfTypes::Addr;
-  using Elf_Word = typename ElfTypes::Word;
-  using Elf_Sym = typename ElfTypes::Sym;
-
-  // Add a symbol with given name to this symtab. The symbol refers to
-  // 'relative_addr' within the given section and has the given attributes.
-  void AddSymbol(const std::string& name,
-                 const ElfSectionBuilder<ElfTypes>* section,
-                 Elf_Addr addr,
-                 bool is_relative,
-                 Elf_Word size,
-                 uint8_t binding,
-                 uint8_t type,
-                 uint8_t other = 0) {
-    CHECK(section);
-    ElfSymtabBuilder::ElfSymbolState state {name, section, addr, size, is_relative,
-                                            MakeStInfo(binding, type), other, 0};
-    symbols_.push_back(state);
-  }
-
-  ElfSymtabBuilder(const std::string& sec_name, Elf_Word type,
-                   const std::string& str_name, Elf_Word str_type, bool alloc)
-  : ElfSectionBuilder<ElfTypes>(sec_name, type, ((alloc) ? SHF_ALLOC : 0U),
-                                &strtab_, 0, sizeof(Elf_Word),
-                                sizeof(Elf_Sym)), str_name_(str_name),
-                                str_type_(str_type),
-                                strtab_(str_name,
-                                        str_type,
-                                        ((alloc) ? SHF_ALLOC : 0U),
-                                        nullptr, 0, 1, 1) {
-  }
-
-  ~ElfSymtabBuilder() {}
-
-  std::vector<Elf_Word> GenerateHashContents() const {
-    // Here is how The ELF hash table works.
-    // There are 3 arrays to worry about.
-    // * The symbol table where the symbol information is.
-    // * The bucket array which is an array of indexes into the symtab and chain.
-    // * The chain array which is also an array of indexes into the symtab and chain.
-    //
-    // Lets say the state is something like this.
-    // +--------+       +--------+      +-----------+
-    // | symtab |       | bucket |      |   chain   |
-    // |  null  |       | 1      |      | STN_UNDEF |
-    // | <sym1> |       | 4      |      | 2         |
-    // | <sym2> |       |        |      | 5         |
-    // | <sym3> |       |        |      | STN_UNDEF |
-    // | <sym4> |       |        |      | 3         |
-    // | <sym5> |       |        |      | STN_UNDEF |
-    // +--------+       +--------+      +-----------+
-    //
-    // The lookup process (in python psudocode) is
-    //
-    // def GetSym(name):
-    //     # NB STN_UNDEF == 0
-    //     indx = bucket[elfhash(name) % num_buckets]
-    //     while indx != STN_UNDEF:
-    //         if GetSymbolName(symtab[indx]) == name:
-    //             return symtab[indx]
-    //         indx = chain[indx]
-    //     return SYMBOL_NOT_FOUND
-    //
-    // Between bucket and chain arrays every symtab index must be present exactly
-    // once (except for STN_UNDEF, which must be present 1 + num_bucket times).
-
-    // Select number of buckets.
-    // This is essentially arbitrary.
-    Elf_Word nbuckets;
-    Elf_Word chain_size = GetSize();
-    if (symbols_.size() < 8) {
-      nbuckets = 2;
-    } else if (symbols_.size() < 32) {
-      nbuckets = 4;
-    } else if (symbols_.size() < 256) {
-      nbuckets = 16;
-    } else {
-      // Have about 32 ids per bucket.
-      nbuckets = RoundUp(symbols_.size()/32, 2);
-    }
-    std::vector<Elf_Word> hash;
-    hash.push_back(nbuckets);
-    hash.push_back(chain_size);
-    uint32_t bucket_offset = hash.size();
-    uint32_t chain_offset = bucket_offset + nbuckets;
-    hash.resize(hash.size() + nbuckets + chain_size, 0);
-
-    Elf_Word* buckets = hash.data() + bucket_offset;
-    Elf_Word* chain   = hash.data() + chain_offset;
-
-    // Set up the actual hash table.
-    for (Elf_Word i = 0; i < symbols_.size(); i++) {
-      // Add 1 since we need to have the null symbol that is not in the symbols
-      // list.
-      Elf_Word index = i + 1;
-      Elf_Word hash_val = static_cast<Elf_Word>(elfhash(symbols_[i].name_.c_str())) % nbuckets;
-      if (buckets[hash_val] == 0) {
-        buckets[hash_val] = index;
-      } else {
-        hash_val = buckets[hash_val];
-        CHECK_LT(hash_val, chain_size);
-        while (chain[hash_val] != 0) {
-          hash_val = chain[hash_val];
-          CHECK_LT(hash_val, chain_size);
-        }
-        chain[hash_val] = index;
-        // Check for loops. Works because if this is non-empty then there must be
-        // another cell which already contains the same symbol index as this one,
-        // which means some symbol has more then one name, which isn't allowed.
-        CHECK_EQ(chain[index], static_cast<Elf_Word>(0));
-      }
-    }
-
-    return hash;
-  }
-
-  std::string GenerateStrtab() {
-    std::string tab;
-    tab += '\0';
-    for (auto it = symbols_.begin(); it != symbols_.end(); ++it) {
-      it->name_idx_ = tab.size();
-      tab += it->name_;
-      tab += '\0';
-    }
-    strtab_.GetSection()->sh_size = tab.size();
-    return tab;
-  }
-
-  std::vector<Elf_Sym> GenerateSymtab() {
-    std::vector<Elf_Sym> ret;
-    Elf_Sym undef_sym;
-    memset(&undef_sym, 0, sizeof(undef_sym));
-    undef_sym.st_shndx = SHN_UNDEF;
-    ret.push_back(undef_sym);
-
-    for (auto it = symbols_.cbegin(); it != symbols_.cend(); ++it) {
-      Elf_Sym sym;
-      memset(&sym, 0, sizeof(sym));
-      sym.st_name = it->name_idx_;
-      if (it->is_relative_) {
-        sym.st_value = it->addr_ + it->section_->GetSection()->sh_offset;
-      } else {
-        sym.st_value = it->addr_;
-      }
-      sym.st_size = it->size_;
-      sym.st_other = it->other_;
-      sym.st_shndx = it->section_->GetSectionIndex();
-      sym.st_info = it->info_;
-
-      ret.push_back(sym);
-    }
-    return ret;
-  }
-
-  Elf_Word GetSize() const {
-    // 1 is for the implicit null symbol.
-    return symbols_.size() + 1;
-  }
-
-  ElfSectionBuilder<ElfTypes>* GetStrTab() {
-    return &strtab_;
-  }
-
- private:
-  struct ElfSymbolState {
-    const std::string name_;
-    const ElfSectionBuilder<ElfTypes>* section_;
-    Elf_Addr addr_;
-    Elf_Word size_;
-    bool is_relative_;
-    uint8_t info_;
-    uint8_t other_;
-    // Used during Write() to temporarially hold name index in the strtab.
-    Elf_Word name_idx_;
-  };
-
-  // Information for the strsym for dynstr sections.
-  const std::string str_name_;
-  Elf_Word str_type_;
-  // The symbols in the same order they will be in the symbol table.
-  std::vector<ElfSymbolState> symbols_;
-  ElfSectionBuilder<ElfTypes> strtab_;
-};
-
-template <typename Elf_Word>
-class ElfFilePiece {
- public:
-  virtual ~ElfFilePiece() {}
-
-  virtual bool Write(File* elf_file) {
-    if (static_cast<off_t>(offset_) != lseek(elf_file->Fd(), offset_, SEEK_SET)) {
-      PLOG(ERROR) << "Failed to seek to " << GetDescription() << " offset " << offset_ << " for "
-          << elf_file->GetPath();
-      return false;
-    }
-
-    return DoActualWrite(elf_file);
-  }
-
-  static bool Compare(ElfFilePiece* a, ElfFilePiece* b) {
-    return a->offset_ < b->offset_;
-  }
-
- protected:
-  explicit ElfFilePiece(Elf_Word offset) : offset_(offset) {}
-
-  Elf_Word GetOffset() const {
-    return offset_;
-  }
-
-  virtual const char* GetDescription() const = 0;
-  virtual bool DoActualWrite(File* elf_file) = 0;
-
- private:
-  const Elf_Word offset_;
-
-  DISALLOW_COPY_AND_ASSIGN(ElfFilePiece);
-};
-
-template <typename Elf_Word>
-class ElfFileMemoryPiece FINAL : public ElfFilePiece<Elf_Word> {
- public:
-  ElfFileMemoryPiece(const std::string& name, Elf_Word offset, const void* data, Elf_Word size)
-      : ElfFilePiece<Elf_Word>(offset), dbg_name_(name), data_(data), size_(size) {}
-
- protected:
-  bool DoActualWrite(File* elf_file) OVERRIDE {
-    DCHECK(data_ != nullptr || size_ == 0U) << dbg_name_ << " " << size_;
-
-    if (!elf_file->WriteFully(data_, size_)) {
-      PLOG(ERROR) << "Failed to write " << dbg_name_ << " for " << elf_file->GetPath();
-      return false;
-    }
-
-    return true;
-  }
-
-  const char* GetDescription() const OVERRIDE {
-    return dbg_name_.c_str();
-  }
-
- private:
-  const std::string& dbg_name_;
-  const void *data_;
-  Elf_Word size_;
-};
-
 class CodeOutput {
  public:
-  virtual void SetCodeOffset(size_t offset) = 0;
   virtual bool Write(OutputStream* out) = 0;
   virtual ~CodeOutput() {}
 };
 
-template <typename Elf_Word>
-class ElfFileRodataPiece FINAL : public ElfFilePiece<Elf_Word> {
- public:
-  ElfFileRodataPiece(Elf_Word offset, CodeOutput* output) : ElfFilePiece<Elf_Word>(offset),
-      output_(output) {}
-
- protected:
-  bool DoActualWrite(File* elf_file) OVERRIDE {
-    output_->SetCodeOffset(this->GetOffset());
-    std::unique_ptr<BufferedOutputStream> output_stream(
-        new BufferedOutputStream(new FileOutputStream(elf_file)));
-    if (!output_->Write(output_stream.get())) {
-      PLOG(ERROR) << "Failed to write .rodata and .text for " << elf_file->GetPath();
-      return false;
-    }
-
-    return true;
-  }
-
-  const char* GetDescription() const OVERRIDE {
-    return ".rodata";
-  }
-
- private:
-  CodeOutput* const output_;
-
-  DISALLOW_COPY_AND_ASSIGN(ElfFileRodataPiece);
-};
-
-template <typename Elf_Word>
-class ElfFileOatTextPiece FINAL : public ElfFilePiece<Elf_Word> {
- public:
-  ElfFileOatTextPiece(Elf_Word offset, CodeOutput* output) : ElfFilePiece<Elf_Word>(offset),
-      output_(output) {}
-
- protected:
-  bool DoActualWrite(File* elf_file ATTRIBUTE_UNUSED) OVERRIDE {
-    // All data is written by the ElfFileRodataPiece right now, as the oat writer writes in one
-    // piece. This is for future flexibility.
-    UNUSED(output_);
-    return true;
-  }
-
-  const char* GetDescription() const OVERRIDE {
-    return ".text";
-  }
-
- private:
-  CodeOutput* const output_;
-
-  DISALLOW_COPY_AND_ASSIGN(ElfFileOatTextPiece);
-};
-
-template <typename Elf_Word>
-static bool WriteOutFile(const std::vector<ElfFilePiece<Elf_Word>*>& pieces, File* elf_file) {
-  // TODO It would be nice if this checked for overlap.
-  for (auto it = pieces.begin(); it != pieces.end(); ++it) {
-    if (!(*it)->Write(elf_file)) {
-      return false;
-    }
-  }
-  return true;
-}
-
-template <typename Elf_Word, typename Elf_Shdr>
-static inline constexpr Elf_Word NextOffset(const Elf_Shdr& cur, const Elf_Shdr& prev) {
-  return RoundUp(prev.sh_size + prev.sh_offset, cur.sh_addralign);
-}
-
+// Writes ELF file.
+// The main complication is that the sections often want to reference
+// each other.  We solve this by writing the ELF file in two stages:
+//  * Sections are asked about their size, and overall layout is calculated.
+//  * Sections do the actual writes which may use offsets of other sections.
 template <typename ElfTypes>
 class ElfBuilder FINAL {
  public:
   using Elf_Addr = typename ElfTypes::Addr;
+  using Elf_Off = typename ElfTypes::Off;
   using Elf_Word = typename ElfTypes::Word;
   using Elf_Sword = typename ElfTypes::Sword;
   using Elf_Ehdr = typename ElfTypes::Ehdr;
@@ -554,47 +51,464 @@
   using Elf_Phdr = typename ElfTypes::Phdr;
   using Elf_Dyn = typename ElfTypes::Dyn;
 
-  ElfBuilder(CodeOutput* oat_writer,
-             File* elf_file,
-             InstructionSet isa,
-             Elf_Word rodata_relative_offset,
-             Elf_Word rodata_size,
-             Elf_Word text_relative_offset,
-             Elf_Word text_size,
-             Elf_Word bss_relative_offset,
-             Elf_Word bss_size,
-             const bool add_symbols,
-             bool debug = false)
-    : oat_writer_(oat_writer),
-      elf_file_(elf_file),
-      add_symbols_(add_symbols),
-      debug_logging_(debug),
-      text_builder_(".text", text_size, text_relative_offset, SHT_PROGBITS,
-                    SHF_ALLOC | SHF_EXECINSTR),
-      rodata_builder_(".rodata", rodata_size, rodata_relative_offset, SHT_PROGBITS, SHF_ALLOC),
-      bss_builder_(".bss", bss_size, bss_relative_offset, SHT_NOBITS, SHF_ALLOC),
-      dynsym_builder_(".dynsym", SHT_DYNSYM, ".dynstr", SHT_STRTAB, true),
-      symtab_builder_(".symtab", SHT_SYMTAB, ".strtab", SHT_STRTAB, false),
-      hash_builder_(".hash", SHT_HASH, SHF_ALLOC, &dynsym_builder_, 0, sizeof(Elf_Word),
-                    sizeof(Elf_Word)),
-      dynamic_builder_(".dynamic", &dynsym_builder_),
-      shstrtab_builder_(".shstrtab", SHT_STRTAB, 0, nullptr, 0, 1, 1) {
-    SetupEhdr();
-    SetupDynamic();
-    SetupRequiredSymbols();
-    SetISA(isa);
+  // Base class of all sections.
+  class Section {
+   public:
+    Section(const std::string& name, Elf_Word type, Elf_Word flags,
+            const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize)
+        : header_(), section_index_(0), name_(name), link_(link) {
+      header_.sh_type = type;
+      header_.sh_flags = flags;
+      header_.sh_info = info;
+      header_.sh_addralign = align;
+      header_.sh_entsize = entsize;
+    }
+    virtual ~Section() {}
+
+    // Returns the size of the content of this section.  It is used to
+    // calculate file offsets of all sections before doing any writes.
+    virtual Elf_Word GetSize() const = 0;
+
+    // Write the content of this section to the given file.
+    // This must write exactly the number of bytes returned by GetSize().
+    // Offsets of all sections are known when this method is called.
+    virtual bool Write(File* elf_file) = 0;
+
+    Elf_Word GetLink() const {
+      return (link_ != nullptr) ? link_->GetSectionIndex() : 0;
+    }
+
+    const Elf_Shdr* GetHeader() const {
+      return &header_;
+    }
+
+    Elf_Shdr* GetHeader() {
+      return &header_;
+    }
+
+    Elf_Word GetSectionIndex() const {
+      DCHECK_NE(section_index_, 0u);
+      return section_index_;
+    }
+
+    void SetSectionIndex(Elf_Word section_index) {
+      section_index_ = section_index;
+    }
+
+    const std::string& GetName() const {
+      return name_;
+    }
+
+   private:
+    Elf_Shdr header_;
+    Elf_Word section_index_;
+    const std::string name_;
+    const Section* const link_;
+
+    DISALLOW_COPY_AND_ASSIGN(Section);
+  };
+
+  // Writer of .dynamic section.
+  class DynamicSection FINAL : public Section {
+   public:
+    void AddDynamicTag(Elf_Sword tag, Elf_Word value, const Section* section) {
+      DCHECK_NE(tag, static_cast<Elf_Sword>(DT_NULL));
+      dynamics_.push_back({tag, value, section});
+    }
+
+    DynamicSection(const std::string& name, Section* link)
+        : Section(name, SHT_DYNAMIC, SHF_ALLOC,
+                  link, 0, kPageSize, sizeof(Elf_Dyn)) {}
+
+    Elf_Word GetSize() const OVERRIDE {
+      return (dynamics_.size() + 1 /* DT_NULL */) * sizeof(Elf_Dyn);
+    }
+
+    bool Write(File* elf_file) OVERRIDE {
+      std::vector<Elf_Dyn> buffer;
+      buffer.reserve(dynamics_.size() + 1u);
+      for (const ElfDynamicState& it : dynamics_) {
+        if (it.section_ != nullptr) {
+          // We are adding an address relative to a section.
+          buffer.push_back(
+              {it.tag_, {it.value_ + it.section_->GetHeader()->sh_addr}});
+        } else {
+          buffer.push_back({it.tag_, {it.value_}});
+        }
+      }
+      buffer.push_back({DT_NULL, {0}});
+      return WriteArray(elf_file, buffer.data(), buffer.size());
+    }
+
+   private:
+    struct ElfDynamicState {
+      Elf_Sword tag_;
+      Elf_Word value_;
+      const Section* section_;
+    };
+    std::vector<ElfDynamicState> dynamics_;
+  };
+
+  using PatchFn = void (*)(const std::vector<uintptr_t>& patch_locations,
+                           Elf_Addr buffer_address,
+                           Elf_Addr base_address,
+                           std::vector<uint8_t>* buffer);
+
+  // Section with content based on simple memory buffer.
+  // The buffer can be optionally patched before writing.
+  class RawSection FINAL : public Section {
+   public:
+    RawSection(const std::string& name, Elf_Word type, Elf_Word flags,
+               const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize,
+               PatchFn patch = nullptr, const Section* patch_base_section = nullptr)
+        : Section(name, type, flags, link, info, align, entsize),
+          patched_(false), patch_(patch), patch_base_section_(patch_base_section) {
+    }
+
+    Elf_Word GetSize() const OVERRIDE {
+      return buffer_.size();
+    }
+
+    bool Write(File* elf_file) OVERRIDE {
+      if (!patch_locations_.empty()) {
+        DCHECK(!patched_);  // Do not patch twice.
+        DCHECK(patch_ != nullptr);
+        DCHECK(patch_base_section_ != nullptr);
+        patch_(patch_locations_,
+               this->GetHeader()->sh_addr,
+               patch_base_section_->GetHeader()->sh_addr,
+               &buffer_);
+        patched_ = true;
+      }
+      return WriteArray(elf_file, buffer_.data(), buffer_.size());
+    }
+
+    bool IsEmpty() const {
+      return buffer_.size() == 0;
+    }
+
+    std::vector<uint8_t>* GetBuffer() {
+      return &buffer_;
+    }
+
+    void SetBuffer(const std::vector<uint8_t>& buffer) {
+      buffer_ = buffer;
+    }
+
+    std::vector<uintptr_t>* GetPatchLocations() {
+      return &patch_locations_;
+    }
+
+   private:
+    std::vector<uint8_t> buffer_;
+    std::vector<uintptr_t> patch_locations_;
+    bool patched_;
+    // User-provided function to do the actual patching.
+    PatchFn patch_;
+    // The section that we patch against (usually .text).
+    const Section* patch_base_section_;
+  };
+
+  // Writer of .rodata section or .text section.
+  // The write is done lazily using the provided CodeOutput.
+  class OatSection FINAL : public Section {
+   public:
+    OatSection(const std::string& name, Elf_Word type, Elf_Word flags,
+               const Section* link, Elf_Word info, Elf_Word align,
+               Elf_Word entsize, Elf_Word size, CodeOutput* code_output)
+        : Section(name, type, flags, link, info, align, entsize),
+          size_(size), code_output_(code_output) {
+    }
+
+    Elf_Word GetSize() const OVERRIDE {
+      return size_;
+    }
+
+    bool Write(File* elf_file) OVERRIDE {
+      // The BufferedOutputStream class contains the buffer as field,
+      // therefore it is too big to allocate on the stack.
+      std::unique_ptr<BufferedOutputStream> output_stream(
+          new BufferedOutputStream(new FileOutputStream(elf_file)));
+      return code_output_->Write(output_stream.get());
+    }
+
+   private:
+    Elf_Word size_;
+    CodeOutput* code_output_;
+  };
+
+  // Writer of .bss section.
+  class NoBitsSection FINAL : public Section {
+   public:
+    NoBitsSection(const std::string& name, Elf_Word size)
+        : Section(name, SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
+          size_(size) {
+    }
+
+    Elf_Word GetSize() const OVERRIDE {
+      return size_;
+    }
+
+    bool Write(File* elf_file ATTRIBUTE_UNUSED) OVERRIDE {
+      LOG(ERROR) << "This section should not be written to the ELF file";
+      return false;
+    }
+
+   private:
+    Elf_Word size_;
+  };
+
+  // Writer of .dynstr .strtab and .shstrtab sections.
+  class StrtabSection FINAL : public Section {
+   public:
+    StrtabSection(const std::string& name, Elf_Word flags)
+        : Section(name, SHT_STRTAB, flags, nullptr, 0, 1, 1) {
+      buffer_.reserve(4 * KB);
+      // The first entry of strtab must be empty string.
+      buffer_ += '\0';
+    }
+
+    Elf_Word AddName(const std::string& name) {
+      Elf_Word offset = buffer_.size();
+      buffer_ += name;
+      buffer_ += '\0';
+      return offset;
+    }
+
+    Elf_Word GetSize() const OVERRIDE {
+      return buffer_.size();
+    }
+
+    bool Write(File* elf_file) OVERRIDE {
+      return WriteArray(elf_file, buffer_.data(), buffer_.size());
+    }
+
+   private:
+    std::string buffer_;
+  };
+
+  class HashSection;
+
+  // Writer of .dynsym and .symtab sections.
+  class SymtabSection FINAL : public Section {
+   public:
+    // Add a symbol with given name to this symtab. The symbol refers to
+    // 'relative_addr' within the given section and has the given attributes.
+    void AddSymbol(const std::string& name, const Section* section,
+                   Elf_Addr addr, bool is_relative, Elf_Word size,
+                   uint8_t binding, uint8_t type, uint8_t other = 0) {
+      CHECK(section != nullptr);
+      Elf_Word name_idx = strtab_->AddName(name);
+      symbols_.push_back({ name, section, addr, size, is_relative,
+                           MakeStInfo(binding, type), other, name_idx });
+    }
+
+    SymtabSection(const std::string& name, Elf_Word type, Elf_Word flags,
+                  StrtabSection* strtab)
+        : Section(name, type, flags, strtab, 0, sizeof(Elf_Word), sizeof(Elf_Sym)),
+          strtab_(strtab) {
+    }
+
+    bool IsEmpty() const {
+      return symbols_.empty();
+    }
+
+    Elf_Word GetSize() const OVERRIDE {
+      return (1 /* NULL */ + symbols_.size()) * sizeof(Elf_Sym);
+    }
+
+    bool Write(File* elf_file) OVERRIDE {
+      std::vector<Elf_Sym> buffer;
+      buffer.reserve(1u + symbols_.size());
+      buffer.push_back(Elf_Sym());  // NULL.
+      for (const ElfSymbolState& it : symbols_) {
+        Elf_Sym sym = Elf_Sym();
+        sym.st_name = it.name_idx_;
+        if (it.is_relative_) {
+          sym.st_value = it.addr_ + it.section_->GetHeader()->sh_addr;
+        } else {
+          sym.st_value = it.addr_;
+        }
+        sym.st_size = it.size_;
+        sym.st_other = it.other_;
+        sym.st_shndx = it.section_->GetSectionIndex();
+        sym.st_info = it.info_;
+        buffer.push_back(sym);
+      }
+      return WriteArray(elf_file, buffer.data(), buffer.size());
+    }
+
+   private:
+    struct ElfSymbolState {
+      const std::string name_;
+      const Section* section_;
+      Elf_Addr addr_;
+      Elf_Word size_;
+      bool is_relative_;
+      uint8_t info_;
+      uint8_t other_;
+      Elf_Word name_idx_;  // index in the strtab.
+    };
+
+    static inline constexpr uint8_t MakeStInfo(uint8_t binding, uint8_t type) {
+      return ((binding) << 4) + ((type) & 0xf);
+    }
+
+    // The symbols in the same order they will be in the symbol table.
+    std::vector<ElfSymbolState> symbols_;
+    StrtabSection* strtab_;
+
+    friend class HashSection;
+  };
+
+  // TODO: Consider removing.
+  // We use it only for the dynsym section which has only 5 symbols.
+  // We do not use it for symtab, and we probably do not have to
+  // since we use those symbols only to print backtraces.
+  class HashSection FINAL : public Section {
+   public:
+    HashSection(const std::string& name, Elf_Word flags, SymtabSection* symtab)
+        : Section(name, SHT_HASH, flags, symtab,
+                  0, sizeof(Elf_Word), sizeof(Elf_Word)),
+          symtab_(symtab) {
+    }
+
+    Elf_Word GetSize() const OVERRIDE {
+      Elf_Word nbuckets = GetNumBuckets();
+      Elf_Word chain_size = symtab_->symbols_.size() + 1 /* NULL */;
+      return (2 /* header */ + nbuckets + chain_size) * sizeof(Elf_Word);
+    }
+
+    bool Write(File* const elf_file) OVERRIDE {
+      // Here is how The ELF hash table works.
+      // There are 3 arrays to worry about.
+      // * The symbol table where the symbol information is.
+      // * The bucket array which is an array of indexes into the symtab and chain.
+      // * The chain array which is also an array of indexes into the symtab and chain.
+      //
+      // Lets say the state is something like this.
+      // +--------+       +--------+      +-----------+
+      // | symtab |       | bucket |      |   chain   |
+      // |  null  |       | 1      |      | STN_UNDEF |
+      // | <sym1> |       | 4      |      | 2         |
+      // | <sym2> |       |        |      | 5         |
+      // | <sym3> |       |        |      | STN_UNDEF |
+      // | <sym4> |       |        |      | 3         |
+      // | <sym5> |       |        |      | STN_UNDEF |
+      // +--------+       +--------+      +-----------+
+      //
+      // The lookup process (in python psudocode) is
+      //
+      // def GetSym(name):
+      //     # NB STN_UNDEF == 0
+      //     indx = bucket[elfhash(name) % num_buckets]
+      //     while indx != STN_UNDEF:
+      //         if GetSymbolName(symtab[indx]) == name:
+      //             return symtab[indx]
+      //         indx = chain[indx]
+      //     return SYMBOL_NOT_FOUND
+      //
+      // Between bucket and chain arrays every symtab index must be present exactly
+      // once (except for STN_UNDEF, which must be present 1 + num_bucket times).
+      const auto& symbols = symtab_->symbols_;
+      // Select number of buckets.
+      // This is essentially arbitrary.
+      Elf_Word nbuckets = GetNumBuckets();
+      // 1 is for the implicit NULL symbol.
+      Elf_Word chain_size = (symbols.size() + 1);
+      std::vector<Elf_Word> hash;
+      hash.push_back(nbuckets);
+      hash.push_back(chain_size);
+      uint32_t bucket_offset = hash.size();
+      uint32_t chain_offset = bucket_offset + nbuckets;
+      hash.resize(hash.size() + nbuckets + chain_size, 0);
+
+      Elf_Word* buckets = hash.data() + bucket_offset;
+      Elf_Word* chain   = hash.data() + chain_offset;
+
+      // Set up the actual hash table.
+      for (Elf_Word i = 0; i < symbols.size(); i++) {
+        // Add 1 since we need to have the null symbol that is not in the symbols
+        // list.
+        Elf_Word index = i + 1;
+        Elf_Word hash_val = static_cast<Elf_Word>(elfhash(symbols[i].name_.c_str())) % nbuckets;
+        if (buckets[hash_val] == 0) {
+          buckets[hash_val] = index;
+        } else {
+          hash_val = buckets[hash_val];
+          CHECK_LT(hash_val, chain_size);
+          while (chain[hash_val] != 0) {
+            hash_val = chain[hash_val];
+            CHECK_LT(hash_val, chain_size);
+          }
+          chain[hash_val] = index;
+          // Check for loops. Works because if this is non-empty then there must be
+          // another cell which already contains the same symbol index as this one,
+          // which means some symbol has more then one name, which isn't allowed.
+          CHECK_EQ(chain[index], static_cast<Elf_Word>(0));
+        }
+      }
+      return WriteArray(elf_file, hash.data(), hash.size());
+    }
+
+   private:
+    Elf_Word GetNumBuckets() const {
+      const auto& symbols = symtab_->symbols_;
+      if (symbols.size() < 8) {
+        return 2;
+      } else if (symbols.size() < 32) {
+        return 4;
+      } else if (symbols.size() < 256) {
+        return 16;
+      } else {
+        // Have about 32 ids per bucket.
+        return RoundUp(symbols.size()/32, 2);
+      }
+    }
+
+    // from bionic
+    static inline unsigned elfhash(const char *_name) {
+      const unsigned char *name = (const unsigned char *) _name;
+      unsigned h = 0, g;
+
+      while (*name) {
+        h = (h << 4) + *name++;
+        g = h & 0xf0000000;
+        h ^= g;
+        h ^= g >> 24;
+      }
+      return h;
+    }
+
+    SymtabSection* symtab_;
+
+    DISALLOW_COPY_AND_ASSIGN(HashSection);
+  };
+
+  ElfBuilder(InstructionSet isa,
+             Elf_Word rodata_size, CodeOutput* rodata_writer,
+             Elf_Word text_size, CodeOutput* text_writer,
+             Elf_Word bss_size)
+    : isa_(isa),
+      dynstr_(".dynstr", SHF_ALLOC),
+      dynsym_(".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_),
+      hash_(".hash", SHF_ALLOC, &dynsym_),
+      rodata_(".rodata", SHT_PROGBITS, SHF_ALLOC,
+              nullptr, 0, kPageSize, 0, rodata_size, rodata_writer),
+      text_(".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR,
+            nullptr, 0, kPageSize, 0, text_size, text_writer),
+      bss_(".bss", bss_size),
+      dynamic_(".dynamic", &dynsym_),
+      strtab_(".strtab", 0),
+      symtab_(".symtab", SHT_SYMTAB, 0, &strtab_),
+      shstrtab_(".shstrtab", 0) {
   }
   ~ElfBuilder() {}
 
-  const ElfOatSectionBuilder<ElfTypes>& GetTextBuilder() const {
-    return text_builder_;
-  }
+  OatSection* GetText() { return &text_; }
+  SymtabSection* GetSymtab() { return &symtab_; }
 
-  ElfSymtabBuilder<ElfTypes>* GetSymtabBuilder() {
-    return &symtab_builder_;
-  }
-
-  bool Init() {
+  bool Write(File* elf_file) {
     // Since the .text section of an oat file contains relative references to .rodata
     // and (optionally) .bss, we keep these 2 or 3 sections together. This creates
     // a non-traditional layout where the .bss section is mapped independently of the
@@ -605,11 +519,12 @@
     // | Elf_Ehdr                |
     // +-------------------------+
     // | Elf_Phdr PHDR           |
-    // | Elf_Phdr LOAD R         | .dynsym .dynstr .hash .eh_frame .eh_frame_hdr .rodata
+    // | Elf_Phdr LOAD R         | .dynsym .dynstr .hash .rodata
     // | Elf_Phdr LOAD R X       | .text
     // | Elf_Phdr LOAD RW        | .bss (Optional)
     // | Elf_Phdr LOAD RW        | .dynamic
     // | Elf_Phdr DYNAMIC        | .dynamic
+    // | Elf_Phdr LOAD R         | .eh_frame .eh_frame_hdr
     // | Elf_Phdr EH_FRAME R     | .eh_frame_hdr
     // +-------------------------+
     // | .dynsym                 |
@@ -621,25 +536,10 @@
     // | Elf_Sym  oatbsslastword | (Optional)
     // +-------------------------+
     // | .dynstr                 |
-    // | \0                      |
-    // | oatdata\0               |
-    // | oatexec\0               |
-    // | oatlastword\0           |
-    // | boot.oat\0              |
+    // | names for .dynsym       |
     // +-------------------------+
     // | .hash                   |
-    // | Elf_Word nbucket = b    |
-    // | Elf_Word nchain  = c    |
-    // | Elf_Word bucket[0]      |
-    // |         ...             |
-    // | Elf_Word bucket[b - 1]  |
-    // | Elf_Word chain[0]       |
-    // |         ...             |
-    // | Elf_Word chain[c - 1]   |
-    // +-------------------------+
-    // | .eh_frame               |  (Optional)
-    // +-------------------------+
-    // | .eh_frame_hdr           |  (Optional)
+    // | hashtable for dynsym    |
     // +-------------------------+
     // | .rodata                 |
     // | oatdata..oatexec-4      |
@@ -648,38 +548,23 @@
     // | oatexec..oatlastword    |
     // +-------------------------+
     // | .dynamic                |
-    // | Elf_Dyn DT_SONAME       |
     // | Elf_Dyn DT_HASH         |
+    // | Elf_Dyn DT_STRTAB       |
     // | Elf_Dyn DT_SYMTAB       |
     // | Elf_Dyn DT_SYMENT       |
-    // | Elf_Dyn DT_STRTAB       |
     // | Elf_Dyn DT_STRSZ        |
+    // | Elf_Dyn DT_SONAME       |
     // | Elf_Dyn DT_NULL         |
     // +-------------------------+  (Optional)
-    // | .strtab                 |  (Optional)
-    // | program symbol names    |  (Optional)
-    // +-------------------------+  (Optional)
     // | .symtab                 |  (Optional)
     // | program symbols         |  (Optional)
-    // +-------------------------+
-    // | .shstrtab               |
-    // | \0                      |
-    // | .dynamic\0              |
-    // | .dynsym\0               |
-    // | .dynstr\0               |
-    // | .hash\0                 |
-    // | .rodata\0               |
-    // | .text\0                 |
-    // | .bss\0                  |  (Optional)
-    // | .shstrtab\0             |
-    // | .symtab\0               |  (Optional)
-    // | .strtab\0               |  (Optional)
-    // | .eh_frame\0             |  (Optional)
-    // | .eh_frame_hdr\0         |  (Optional)
-    // | .debug_info\0           |  (Optional)
-    // | .debug_abbrev\0         |  (Optional)
-    // | .debug_str\0            |  (Optional)
-    // | .debug_line\0           |  (Optional)
+    // +-------------------------+  (Optional)
+    // | .strtab                 |  (Optional)
+    // | names for .symtab       |  (Optional)
+    // +-------------------------+  (Optional)
+    // | .eh_frame               |  (Optional)
+    // +-------------------------+  (Optional)
+    // | .eh_frame_hdr           |  (Optional)
     // +-------------------------+  (Optional)
     // | .debug_info             |  (Optional)
     // +-------------------------+  (Optional)
@@ -688,7 +573,10 @@
     // | .debug_str              |  (Optional)
     // +-------------------------+  (Optional)
     // | .debug_line             |  (Optional)
-    // +-------------------------+  (Optional)
+    // +-------------------------+
+    // | .shstrtab               |
+    // | names of sections       |
+    // +-------------------------+
     // | Elf_Shdr null           |
     // | Elf_Shdr .dynsym        |
     // | Elf_Shdr .dynstr        |
@@ -697,552 +585,266 @@
     // | Elf_Shdr .text          |
     // | Elf_Shdr .bss           |  (Optional)
     // | Elf_Shdr .dynamic       |
-    // | Elf_Shdr .shstrtab      |
+    // | Elf_Shdr .symtab        |  (Optional)
+    // | Elf_Shdr .strtab        |  (Optional)
     // | Elf_Shdr .eh_frame      |  (Optional)
     // | Elf_Shdr .eh_frame_hdr  |  (Optional)
     // | Elf_Shdr .debug_info    |  (Optional)
     // | Elf_Shdr .debug_abbrev  |  (Optional)
     // | Elf_Shdr .debug_str     |  (Optional)
     // | Elf_Shdr .debug_line    |  (Optional)
+    // | Elf_Shdr .oat_patches   |  (Optional)
+    // | Elf_Shdr .shstrtab      |
     // +-------------------------+
+    constexpr bool debug_logging_ = false;
 
-    if (fatal_error_) {
-      return false;
+    // Create a list of all section which we want to write.
+    // This is the order in which they will be written.
+    std::vector<Section*> sections;
+    sections.push_back(&dynsym_);
+    sections.push_back(&dynstr_);
+    sections.push_back(&hash_);
+    sections.push_back(&rodata_);
+    sections.push_back(&text_);
+    if (bss_.GetSize() != 0u) {
+      sections.push_back(&bss_);
     }
-    // Step 1. Figure out all the offsets.
-
-    if (debug_logging_) {
-      LOG(INFO) << "phdr_offset=" << PHDR_OFFSET << std::hex << " " << PHDR_OFFSET;
-      LOG(INFO) << "phdr_size=" << PHDR_SIZE << std::hex << " " << PHDR_SIZE;
+    sections.push_back(&dynamic_);
+    if (!symtab_.IsEmpty()) {
+      sections.push_back(&symtab_);
+      sections.push_back(&strtab_);
+    }
+    for (Section* section : other_sections_) {
+      sections.push_back(section);
+    }
+    sections.push_back(&shstrtab_);
+    for (size_t i = 0; i < sections.size(); i++) {
+      // The first section index is 1.  Index 0 is reserved for NULL.
+      // Section index is used for relative symbols and for section links.
+      sections[i]->SetSectionIndex(i + 1);
+      // Add section name to .shstrtab.
+      Elf_Word name_offset = shstrtab_.AddName(sections[i]->GetName());
+      sections[i]->GetHeader()->sh_name = name_offset;
     }
 
-    memset(&program_headers_, 0, sizeof(program_headers_));
-    program_headers_[PH_PHDR].p_type    = PT_PHDR;
-    program_headers_[PH_PHDR].p_offset  = PHDR_OFFSET;
-    program_headers_[PH_PHDR].p_vaddr   = PHDR_OFFSET;
-    program_headers_[PH_PHDR].p_paddr   = PHDR_OFFSET;
-    program_headers_[PH_PHDR].p_filesz  = sizeof(program_headers_);
-    program_headers_[PH_PHDR].p_memsz   = sizeof(program_headers_);
-    program_headers_[PH_PHDR].p_flags   = PF_R;
-    program_headers_[PH_PHDR].p_align   = sizeof(Elf_Word);
+    // The running program does not have access to section headers
+    // and the loader is not supposed to use them either.
+    // The dynamic sections therefore replicates some of the layout
+    // information like the address and size of .rodata and .text.
+    // It also contains other metadata like the SONAME.
+    // The .dynamic section is found using the PT_DYNAMIC program header.
+    BuildDynsymSection();
+    BuildDynamicSection(elf_file->GetPath());
 
-    program_headers_[PH_LOAD_R__].p_type    = PT_LOAD;
-    program_headers_[PH_LOAD_R__].p_offset  = 0;
-    program_headers_[PH_LOAD_R__].p_vaddr   = 0;
-    program_headers_[PH_LOAD_R__].p_paddr   = 0;
-    program_headers_[PH_LOAD_R__].p_flags   = PF_R;
+    // We do not know the number of headers until the final stages of write.
+    // It is easiest to just reserve a fixed amount of space for them.
+    constexpr size_t kMaxProgramHeaders = 8;
+    constexpr size_t kProgramHeadersOffset = sizeof(Elf_Ehdr);
+    constexpr size_t kProgramHeadersSize = sizeof(Elf_Phdr) * kMaxProgramHeaders;
 
-    program_headers_[PH_LOAD_R_X].p_type    = PT_LOAD;
-    program_headers_[PH_LOAD_R_X].p_flags   = PF_R | PF_X;
-
-    program_headers_[PH_LOAD_RW_BSS].p_type    = PT_LOAD;
-    program_headers_[PH_LOAD_RW_BSS].p_flags   = PF_R | PF_W;
-
-    program_headers_[PH_LOAD_RW_DYNAMIC].p_type    = PT_LOAD;
-    program_headers_[PH_LOAD_RW_DYNAMIC].p_flags   = PF_R | PF_W;
-
-    program_headers_[PH_DYNAMIC].p_type    = PT_DYNAMIC;
-    program_headers_[PH_DYNAMIC].p_flags   = PF_R | PF_W;
-
-    program_headers_[PH_EH_FRAME_HDR].p_type = PT_NULL;
-    program_headers_[PH_EH_FRAME_HDR].p_flags = PF_R;
-
-    // Get the dynstr string.
-    dynstr_ = dynsym_builder_.GenerateStrtab();
-
-    // Add the SONAME to the dynstr.
-    dynstr_soname_offset_ = dynstr_.size();
-    std::string file_name(elf_file_->GetPath());
-    size_t directory_separator_pos = file_name.rfind('/');
-    if (directory_separator_pos != std::string::npos) {
-      file_name = file_name.substr(directory_separator_pos + 1);
-    }
-    dynstr_ += file_name;
-    dynstr_ += '\0';
-    if (debug_logging_) {
-      LOG(INFO) << "dynstr size (bytes)   =" << dynstr_.size()
-                << std::hex << " " << dynstr_.size();
-      LOG(INFO) << "dynsym size (elements)=" << dynsym_builder_.GetSize()
-                << std::hex << " " << dynsym_builder_.GetSize();
-    }
-
-    // Get the section header string table.
-    shstrtab_ += '\0';
-
-    // Setup sym_undef
-    memset(&null_hdr_, 0, sizeof(null_hdr_));
-    null_hdr_.sh_type = SHT_NULL;
-    null_hdr_.sh_link = SHN_UNDEF;
-    section_ptrs_.push_back(&null_hdr_);
-
-    section_index_ = 1;
-
-    // setup .dynsym
-    section_ptrs_.push_back(dynsym_builder_.GetSection());
-    AssignSectionStr(&dynsym_builder_, &shstrtab_);
-    dynsym_builder_.SetSectionIndex(section_index_);
-    section_index_++;
-
-    // Setup .dynstr
-    section_ptrs_.push_back(dynsym_builder_.GetStrTab()->GetSection());
-    AssignSectionStr(dynsym_builder_.GetStrTab(), &shstrtab_);
-    dynsym_builder_.GetStrTab()->SetSectionIndex(section_index_);
-    section_index_++;
-
-    // Setup .hash
-    section_ptrs_.push_back(hash_builder_.GetSection());
-    AssignSectionStr(&hash_builder_, &shstrtab_);
-    hash_builder_.SetSectionIndex(section_index_);
-    section_index_++;
-
-    // Setup .rodata
-    section_ptrs_.push_back(rodata_builder_.GetSection());
-    AssignSectionStr(&rodata_builder_, &shstrtab_);
-    rodata_builder_.SetSectionIndex(section_index_);
-    section_index_++;
-
-    // Setup .text
-    section_ptrs_.push_back(text_builder_.GetSection());
-    AssignSectionStr(&text_builder_, &shstrtab_);
-    text_builder_.SetSectionIndex(section_index_);
-    section_index_++;
-
-    // Setup .bss
-    if (bss_builder_.GetSize() != 0u) {
-      section_ptrs_.push_back(bss_builder_.GetSection());
-      AssignSectionStr(&bss_builder_, &shstrtab_);
-      bss_builder_.SetSectionIndex(section_index_);
-      section_index_++;
-    }
-
-    // Setup .dynamic
-    section_ptrs_.push_back(dynamic_builder_.GetSection());
-    AssignSectionStr(&dynamic_builder_, &shstrtab_);
-    dynamic_builder_.SetSectionIndex(section_index_);
-    section_index_++;
-
-    // Fill in the hash section.
-    hash_ = dynsym_builder_.GenerateHashContents();
-
-    if (debug_logging_) {
-      LOG(INFO) << ".hash size (bytes)=" << hash_.size() * sizeof(Elf_Word)
-                << std::hex << " " << hash_.size() * sizeof(Elf_Word);
-    }
-
-    Elf_Word base_offset = sizeof(Elf_Ehdr) + sizeof(program_headers_);
-
-    // Get the layout in the sections.
-    //
-    // Get the layout of the dynsym section.
-    dynsym_builder_.GetSection()->sh_offset =
-        RoundUp(base_offset, dynsym_builder_.GetSection()->sh_addralign);
-    dynsym_builder_.GetSection()->sh_addr = dynsym_builder_.GetSection()->sh_offset;
-    dynsym_builder_.GetSection()->sh_size = dynsym_builder_.GetSize() * sizeof(Elf_Sym);
-    dynsym_builder_.GetSection()->sh_link = dynsym_builder_.GetLink();
-
-    // Get the layout of the dynstr section.
-    dynsym_builder_.GetStrTab()->GetSection()->sh_offset =
-        NextOffset<Elf_Word, Elf_Shdr>(*dynsym_builder_.GetStrTab()->GetSection(),
-                                       *dynsym_builder_.GetSection());
-    dynsym_builder_.GetStrTab()->GetSection()->sh_addr =
-        dynsym_builder_.GetStrTab()->GetSection()->sh_offset;
-    dynsym_builder_.GetStrTab()->GetSection()->sh_size = dynstr_.size();
-    dynsym_builder_.GetStrTab()->GetSection()->sh_link = dynsym_builder_.GetStrTab()->GetLink();
-
-    // Get the layout of the hash section
-    hash_builder_.GetSection()->sh_offset =
-        NextOffset<Elf_Word, Elf_Shdr>(*hash_builder_.GetSection(),
-                                       *dynsym_builder_.GetStrTab()->GetSection());
-    hash_builder_.GetSection()->sh_addr = hash_builder_.GetSection()->sh_offset;
-    hash_builder_.GetSection()->sh_size = hash_.size() * sizeof(Elf_Word);
-    hash_builder_.GetSection()->sh_link = hash_builder_.GetLink();
-
-    // Get the layout of the extra sections with SHF_ALLOC flag.
-    // This will deal with .eh_frame and .eh_frame_hdr.
-    // .eh_frame contains relative pointers to .text which we
-    // want to fixup between the calls to Init() and Write().
-    // Therefore we handle those sections here as opposed to Write().
-    // It also has the nice side effect of including .eh_frame
-    // with the rest of LOAD_R segment.  It must come before .rodata
-    // because .rodata and .text must be next to each other.
-    Elf_Shdr* prev = hash_builder_.GetSection();
-    for (auto* it : other_builders_) {
-      if ((it->GetSection()->sh_flags & SHF_ALLOC) != 0) {
-        it->GetSection()->sh_offset = NextOffset<Elf_Word, Elf_Shdr>(*it->GetSection(), *prev);
-        it->GetSection()->sh_addr = it->GetSection()->sh_offset;
-        it->GetSection()->sh_size = it->GetBuffer()->size();
-        it->GetSection()->sh_link = it->GetLink();
-        prev = it->GetSection();
+    // Layout of all sections - determine the final file offsets and addresses.
+    // This must be done after we have built all sections and know their size.
+    Elf_Off file_offset = kProgramHeadersOffset + kProgramHeadersSize;
+    Elf_Addr load_address = file_offset;
+    std::vector<Elf_Shdr> section_headers;
+    section_headers.reserve(1u + sections.size());
+    section_headers.push_back(Elf_Shdr());  // NULL at index 0.
+    for (auto* section : sections) {
+      Elf_Shdr* header = section->GetHeader();
+      Elf_Off alignment = header->sh_addralign > 0 ? header->sh_addralign : 1;
+      header->sh_size = section->GetSize();
+      header->sh_link = section->GetLink();
+      // Allocate memory for the section in the file.
+      if (header->sh_type != SHT_NOBITS) {
+        header->sh_offset = RoundUp(file_offset, alignment);
+        file_offset = header->sh_offset + header->sh_size;
       }
+      // Allocate memory for the section during program execution.
+      if ((header->sh_flags & SHF_ALLOC) != 0) {
+        header->sh_addr = RoundUp(load_address, alignment);
+        load_address = header->sh_addr + header->sh_size;
+      }
+      if (debug_logging_) {
+        LOG(INFO) << "Section " << section->GetName() << ":" << std::hex
+                  << " offset=0x" << header->sh_offset
+                  << " addr=0x" << header->sh_addr
+                  << " size=0x" << header->sh_size;
+      }
+      // Collect section headers into continuous array for convenience.
+      section_headers.push_back(*header);
     }
-    // If the sections exist, check that they have been handled.
-    const auto* eh_frame = FindRawSection(".eh_frame");
+    Elf_Off section_headers_offset = RoundUp(file_offset, sizeof(Elf_Word));
+
+    // Create program headers now that we know the layout of the whole file.
+    // Each segment contains one or more sections which are mapped together.
+    // Not all sections are mapped during the execution of the program.
+    // PT_LOAD does the mapping.  Other PT_* types allow the program to locate
+    // interesting parts of memory and their addresses overlap with PT_LOAD.
+    std::vector<Elf_Phdr> program_headers;
+    program_headers.push_back(MakeProgramHeader(PT_PHDR, PF_R,
+      kProgramHeadersOffset, kProgramHeadersSize, sizeof(Elf_Word)));
+    // Create the main LOAD R segment which spans all sections up to .rodata.
+    const Elf_Shdr* rodata = rodata_.GetHeader();
+    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R,
+      0, rodata->sh_offset + rodata->sh_size, rodata->sh_addralign));
+    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_X, text_));
+    if (bss_.GetHeader()->sh_size != 0u) {
+      program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, bss_));
+    }
+    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, dynamic_));
+    program_headers.push_back(MakeProgramHeader(PT_DYNAMIC, PF_R | PF_W, dynamic_));
+    const Section* eh_frame = FindSection(".eh_frame");
     if (eh_frame != nullptr) {
-      DCHECK_NE(eh_frame->GetSection()->sh_offset, 0u);
-    }
-    const auto* eh_frame_hdr = FindRawSection(".eh_frame_hdr");
-    if (eh_frame_hdr != nullptr) {
-      DCHECK_NE(eh_frame_hdr->GetSection()->sh_offset, 0u);
-    }
-
-    // Get the layout of the rodata section.
-    rodata_builder_.GetSection()->sh_offset =
-        NextOffset<Elf_Word, Elf_Shdr>(*rodata_builder_.GetSection(), *prev);
-    rodata_builder_.GetSection()->sh_addr = rodata_builder_.GetSection()->sh_offset;
-    rodata_builder_.GetSection()->sh_size = rodata_builder_.GetSize();
-    rodata_builder_.GetSection()->sh_link = rodata_builder_.GetLink();
-
-    // Get the layout of the text section.
-    text_builder_.GetSection()->sh_offset =
-        NextOffset<Elf_Word, Elf_Shdr>(*text_builder_.GetSection(),
-                                       *rodata_builder_.GetSection());
-    text_builder_.GetSection()->sh_addr = text_builder_.GetSection()->sh_offset;
-    text_builder_.GetSection()->sh_size = text_builder_.GetSize();
-    text_builder_.GetSection()->sh_link = text_builder_.GetLink();
-    CHECK_ALIGNED(rodata_builder_.GetSection()->sh_offset +
-                  rodata_builder_.GetSection()->sh_size, kPageSize);
-
-    // Get the layout of the .bss section.
-    bss_builder_.GetSection()->sh_offset =
-        NextOffset<Elf_Word, Elf_Shdr>(*bss_builder_.GetSection(),
-                                       *text_builder_.GetSection());
-    bss_builder_.GetSection()->sh_addr = bss_builder_.GetSection()->sh_offset;
-    bss_builder_.GetSection()->sh_size = bss_builder_.GetSize();
-    bss_builder_.GetSection()->sh_link = bss_builder_.GetLink();
-
-    // Get the layout of the dynamic section.
-    CHECK(IsAlignedParam(bss_builder_.GetSection()->sh_offset,
-                         dynamic_builder_.GetSection()->sh_addralign));
-    dynamic_builder_.GetSection()->sh_offset = bss_builder_.GetSection()->sh_offset;
-    dynamic_builder_.GetSection()->sh_addr =
-        NextOffset<Elf_Word, Elf_Shdr>(*dynamic_builder_.GetSection(), *bss_builder_.GetSection());
-    dynamic_builder_.GetSection()->sh_size = dynamic_builder_.GetSize() * sizeof(Elf_Dyn);
-    dynamic_builder_.GetSection()->sh_link = dynamic_builder_.GetLink();
-
-    if (debug_logging_) {
-      LOG(INFO) << "dynsym off=" << dynsym_builder_.GetSection()->sh_offset
-                << " dynsym size=" << dynsym_builder_.GetSection()->sh_size;
-      LOG(INFO) << "dynstr off=" << dynsym_builder_.GetStrTab()->GetSection()->sh_offset
-                << " dynstr size=" << dynsym_builder_.GetStrTab()->GetSection()->sh_size;
-      LOG(INFO) << "hash off=" << hash_builder_.GetSection()->sh_offset
-                << " hash size=" << hash_builder_.GetSection()->sh_size;
-      LOG(INFO) << "rodata off=" << rodata_builder_.GetSection()->sh_offset
-                << " rodata size=" << rodata_builder_.GetSection()->sh_size;
-      LOG(INFO) << "text off=" << text_builder_.GetSection()->sh_offset
-                << " text size=" << text_builder_.GetSection()->sh_size;
-      LOG(INFO) << "dynamic off=" << dynamic_builder_.GetSection()->sh_offset
-                << " dynamic size=" << dynamic_builder_.GetSection()->sh_size;
-    }
-
-    return true;
-  }
-
-  bool Write() {
-    std::vector<ElfFilePiece<Elf_Word>*> pieces;
-    Elf_Shdr* prev = dynamic_builder_.GetSection();
-    std::string strtab;
-
-    if (IncludingDebugSymbols()) {
-      // Setup .symtab
-      section_ptrs_.push_back(symtab_builder_.GetSection());
-      AssignSectionStr(&symtab_builder_, &shstrtab_);
-      symtab_builder_.SetSectionIndex(section_index_);
-      section_index_++;
-
-      // Setup .strtab
-      section_ptrs_.push_back(symtab_builder_.GetStrTab()->GetSection());
-      AssignSectionStr(symtab_builder_.GetStrTab(), &shstrtab_);
-      symtab_builder_.GetStrTab()->SetSectionIndex(section_index_);
-      section_index_++;
-
-      strtab = symtab_builder_.GenerateStrtab();
-      if (debug_logging_) {
-        LOG(INFO) << "strtab size (bytes)    =" << strtab.size()
-                  << std::hex << " " << strtab.size();
-        LOG(INFO) << "symtab size (elements) =" << symtab_builder_.GetSize()
-                  << std::hex << " " << symtab_builder_.GetSize();
+      program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R, *eh_frame));
+      const Section* eh_frame_hdr = FindSection(".eh_frame_hdr");
+      if (eh_frame_hdr != nullptr) {
+        // Check layout: eh_frame is before eh_frame_hdr and there is no gap.
+        CHECK_LE(eh_frame->GetHeader()->sh_offset, eh_frame_hdr->GetHeader()->sh_offset);
+        CHECK_EQ(eh_frame->GetHeader()->sh_offset + eh_frame->GetHeader()->sh_size,
+                 eh_frame_hdr->GetHeader()->sh_offset);
+        // Extend the PT_LOAD of .eh_frame to include the .eh_frame_hdr as well.
+        program_headers.back().p_filesz += eh_frame_hdr->GetHeader()->sh_size;
+        program_headers.back().p_memsz  += eh_frame_hdr->GetHeader()->sh_size;
+        program_headers.push_back(MakeProgramHeader(PT_GNU_EH_FRAME, PF_R, *eh_frame_hdr));
       }
     }
+    CHECK_LE(program_headers.size(), kMaxProgramHeaders);
 
-    // Setup all the other sections.
-    for (auto* builder : other_builders_) {
-      section_ptrs_.push_back(builder->GetSection());
-      AssignSectionStr(builder, &shstrtab_);
-      builder->SetSectionIndex(section_index_);
-      section_index_++;
-    }
+    // Create the main ELF header.
+    Elf_Ehdr elf_header = MakeElfHeader(isa_);
+    elf_header.e_phoff = kProgramHeadersOffset;
+    elf_header.e_shoff = section_headers_offset;
+    elf_header.e_phnum = program_headers.size();
+    elf_header.e_shnum = section_headers.size();
+    elf_header.e_shstrndx = shstrtab_.GetSectionIndex();
 
-    // Setup shstrtab
-    section_ptrs_.push_back(shstrtab_builder_.GetSection());
-    AssignSectionStr(&shstrtab_builder_, &shstrtab_);
-    shstrtab_builder_.SetSectionIndex(section_index_);
-    section_index_++;
-
-    if (debug_logging_) {
-      LOG(INFO) << ".shstrtab size    (bytes)   =" << shstrtab_.size()
-                << std::hex << " " << shstrtab_.size();
-      LOG(INFO) << "section list size (elements)=" << section_ptrs_.size()
-                << std::hex << " " << section_ptrs_.size();
-    }
-
-    if (IncludingDebugSymbols()) {
-      // Get the layout of the symtab section.
-      symtab_builder_.GetSection()->sh_offset =
-          NextOffset<Elf_Word, Elf_Shdr>(*symtab_builder_.GetSection(),
-                                         *dynamic_builder_.GetSection());
-      symtab_builder_.GetSection()->sh_addr = 0;
-      // Add to leave space for the null symbol.
-      symtab_builder_.GetSection()->sh_size = symtab_builder_.GetSize() * sizeof(Elf_Sym);
-      symtab_builder_.GetSection()->sh_link = symtab_builder_.GetLink();
-
-      // Get the layout of the dynstr section.
-      symtab_builder_.GetStrTab()->GetSection()->sh_offset =
-          NextOffset<Elf_Word, Elf_Shdr>(*symtab_builder_.GetStrTab()->GetSection(),
-                                         *symtab_builder_.GetSection());
-      symtab_builder_.GetStrTab()->GetSection()->sh_addr = 0;
-      symtab_builder_.GetStrTab()->GetSection()->sh_size = strtab.size();
-      symtab_builder_.GetStrTab()->GetSection()->sh_link = symtab_builder_.GetStrTab()->GetLink();
-
-      prev = symtab_builder_.GetStrTab()->GetSection();
-      if (debug_logging_) {
-        LOG(INFO) << "symtab off=" << symtab_builder_.GetSection()->sh_offset
-                  << " symtab size=" << symtab_builder_.GetSection()->sh_size;
-        LOG(INFO) << "strtab off=" << symtab_builder_.GetStrTab()->GetSection()->sh_offset
-                  << " strtab size=" << symtab_builder_.GetStrTab()->GetSection()->sh_size;
-      }
-    }
-
-    // Get the layout of the extra sections without SHF_ALLOC flag.
-    // (This will deal with the debug sections if they are there)
-    for (auto* it : other_builders_) {
-      if ((it->GetSection()->sh_flags & SHF_ALLOC) == 0) {
-        it->GetSection()->sh_offset = NextOffset<Elf_Word, Elf_Shdr>(*it->GetSection(), *prev);
-        it->GetSection()->sh_addr = 0;
-        it->GetSection()->sh_size = it->GetBuffer()->size();
-        it->GetSection()->sh_link = it->GetLink();
-
-        // We postpone adding an ElfFilePiece to keep the order in "pieces."
-
-        prev = it->GetSection();
-        if (debug_logging_) {
-          LOG(INFO) << it->GetName() << " off=" << it->GetSection()->sh_offset
-                    << " size=" << it->GetSection()->sh_size;
-        }
-      }
-    }
-
-    // Get the layout of the shstrtab section
-    shstrtab_builder_.GetSection()->sh_offset =
-        NextOffset<Elf_Word, Elf_Shdr>(*shstrtab_builder_.GetSection(), *prev);
-    shstrtab_builder_.GetSection()->sh_addr = 0;
-    shstrtab_builder_.GetSection()->sh_size = shstrtab_.size();
-    shstrtab_builder_.GetSection()->sh_link = shstrtab_builder_.GetLink();
-    if (debug_logging_) {
-        LOG(INFO) << "shstrtab off=" << shstrtab_builder_.GetSection()->sh_offset
-                  << " shstrtab size=" << shstrtab_builder_.GetSection()->sh_size;
-    }
-
-    // The section list comes after come after.
-    Elf_Word sections_offset = RoundUp(
-        shstrtab_builder_.GetSection()->sh_offset + shstrtab_builder_.GetSection()->sh_size,
-        sizeof(Elf_Word));
-
-    // Setup the actual symbol arrays.
-    std::vector<Elf_Sym> dynsym = dynsym_builder_.GenerateSymtab();
-    CHECK_EQ(dynsym.size() * sizeof(Elf_Sym), dynsym_builder_.GetSection()->sh_size);
-    std::vector<Elf_Sym> symtab;
-    if (IncludingDebugSymbols()) {
-      symtab = symtab_builder_.GenerateSymtab();
-      CHECK_EQ(symtab.size() * sizeof(Elf_Sym), symtab_builder_.GetSection()->sh_size);
-    }
-
-    // Setup the dynamic section.
-    // This will add the 2 values we cannot know until now time, namely the size
-    // and the soname_offset.
-    std::vector<Elf_Dyn> dynamic = dynamic_builder_.GetDynamics(dynstr_.size(),
-                                                                  dynstr_soname_offset_);
-    CHECK_EQ(dynamic.size() * sizeof(Elf_Dyn), dynamic_builder_.GetSection()->sh_size);
-
-    // Finish setup of the program headers now that we know the layout of the
-    // whole file.
-    Elf_Word load_r_size =
-        rodata_builder_.GetSection()->sh_offset + rodata_builder_.GetSection()->sh_size;
-    program_headers_[PH_LOAD_R__].p_filesz = load_r_size;
-    program_headers_[PH_LOAD_R__].p_memsz =  load_r_size;
-    program_headers_[PH_LOAD_R__].p_align =  rodata_builder_.GetSection()->sh_addralign;
-
-    Elf_Word load_rx_size = text_builder_.GetSection()->sh_size;
-    program_headers_[PH_LOAD_R_X].p_offset = text_builder_.GetSection()->sh_offset;
-    program_headers_[PH_LOAD_R_X].p_vaddr  = text_builder_.GetSection()->sh_offset;
-    program_headers_[PH_LOAD_R_X].p_paddr  = text_builder_.GetSection()->sh_offset;
-    program_headers_[PH_LOAD_R_X].p_filesz = load_rx_size;
-    program_headers_[PH_LOAD_R_X].p_memsz  = load_rx_size;
-    program_headers_[PH_LOAD_R_X].p_align  = text_builder_.GetSection()->sh_addralign;
-
-    program_headers_[PH_LOAD_RW_BSS].p_offset = bss_builder_.GetSection()->sh_offset;
-    program_headers_[PH_LOAD_RW_BSS].p_vaddr  = bss_builder_.GetSection()->sh_offset;
-    program_headers_[PH_LOAD_RW_BSS].p_paddr  = bss_builder_.GetSection()->sh_offset;
-    program_headers_[PH_LOAD_RW_BSS].p_filesz = 0;
-    program_headers_[PH_LOAD_RW_BSS].p_memsz  = bss_builder_.GetSection()->sh_size;
-    program_headers_[PH_LOAD_RW_BSS].p_align  = bss_builder_.GetSection()->sh_addralign;
-
-    program_headers_[PH_LOAD_RW_DYNAMIC].p_offset = dynamic_builder_.GetSection()->sh_offset;
-    program_headers_[PH_LOAD_RW_DYNAMIC].p_vaddr  = dynamic_builder_.GetSection()->sh_addr;
-    program_headers_[PH_LOAD_RW_DYNAMIC].p_paddr  = dynamic_builder_.GetSection()->sh_addr;
-    program_headers_[PH_LOAD_RW_DYNAMIC].p_filesz = dynamic_builder_.GetSection()->sh_size;
-    program_headers_[PH_LOAD_RW_DYNAMIC].p_memsz  = dynamic_builder_.GetSection()->sh_size;
-    program_headers_[PH_LOAD_RW_DYNAMIC].p_align  = dynamic_builder_.GetSection()->sh_addralign;
-
-    program_headers_[PH_DYNAMIC].p_offset = dynamic_builder_.GetSection()->sh_offset;
-    program_headers_[PH_DYNAMIC].p_vaddr  = dynamic_builder_.GetSection()->sh_addr;
-    program_headers_[PH_DYNAMIC].p_paddr  = dynamic_builder_.GetSection()->sh_addr;
-    program_headers_[PH_DYNAMIC].p_filesz = dynamic_builder_.GetSection()->sh_size;
-    program_headers_[PH_DYNAMIC].p_memsz  = dynamic_builder_.GetSection()->sh_size;
-    program_headers_[PH_DYNAMIC].p_align  = dynamic_builder_.GetSection()->sh_addralign;
-
-    const auto* eh_frame_hdr = FindRawSection(".eh_frame_hdr");
-    if (eh_frame_hdr != nullptr) {
-      const auto* eh_frame = FindRawSection(".eh_frame");
-      // Check layout:
-      // 1) eh_frame is before eh_frame_hdr.
-      // 2) There's no gap.
-      CHECK(eh_frame != nullptr);
-      CHECK_LE(eh_frame->GetSection()->sh_offset, eh_frame_hdr->GetSection()->sh_offset);
-      CHECK_EQ(eh_frame->GetSection()->sh_offset + eh_frame->GetSection()->sh_size,
-               eh_frame_hdr->GetSection()->sh_offset);
-
-      program_headers_[PH_EH_FRAME_HDR].p_type   = PT_GNU_EH_FRAME;
-      program_headers_[PH_EH_FRAME_HDR].p_offset = eh_frame_hdr->GetSection()->sh_offset;
-      program_headers_[PH_EH_FRAME_HDR].p_vaddr  = eh_frame_hdr->GetSection()->sh_addr;
-      program_headers_[PH_EH_FRAME_HDR].p_paddr  = eh_frame_hdr->GetSection()->sh_addr;
-      program_headers_[PH_EH_FRAME_HDR].p_filesz = eh_frame_hdr->GetSection()->sh_size;
-      program_headers_[PH_EH_FRAME_HDR].p_memsz  = eh_frame_hdr->GetSection()->sh_size;
-      program_headers_[PH_EH_FRAME_HDR].p_align  = eh_frame_hdr->GetSection()->sh_addralign;
-    }
-
-    // Finish setup of the Ehdr values.
-    elf_header_.e_phoff = PHDR_OFFSET;
-    elf_header_.e_shoff = sections_offset;
-    elf_header_.e_phnum = (bss_builder_.GetSection()->sh_size != 0u) ? PH_NUM : PH_NUM - 1;
-    elf_header_.e_shnum = section_ptrs_.size();
-    elf_header_.e_shstrndx = shstrtab_builder_.GetSectionIndex();
-
-    // Add the rest of the pieces to the list.
-    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Elf Header", 0, &elf_header_,
-                                                      sizeof(elf_header_)));
-    if (bss_builder_.GetSection()->sh_size != 0u) {
-      pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Program headers", PHDR_OFFSET,
-                                                        &program_headers_[0],
-                                                        elf_header_.e_phnum * sizeof(Elf_Phdr)));
-    } else {
-      // Skip PH_LOAD_RW_BSS.
-      Elf_Word part1_size = PH_LOAD_RW_BSS * sizeof(Elf_Phdr);
-      Elf_Word part2_size = (PH_NUM - PH_LOAD_RW_BSS - 1) * sizeof(Elf_Phdr);
-      CHECK_EQ(part1_size + part2_size, elf_header_.e_phnum * sizeof(Elf_Phdr));
-      pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Program headers", PHDR_OFFSET,
-                                                        &program_headers_[0], part1_size));
-      pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Program headers part 2",
-                                                        PHDR_OFFSET + part1_size,
-                                                        &program_headers_[PH_LOAD_RW_BSS + 1],
-                                                        part2_size));
-    }
-    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".dynamic",
-                                                      dynamic_builder_.GetSection()->sh_offset,
-                                                      dynamic.data(),
-                                                      dynamic_builder_.GetSection()->sh_size));
-    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".dynsym", dynsym_builder_.GetSection()->sh_offset,
-                                                      dynsym.data(),
-                                                      dynsym.size() * sizeof(Elf_Sym)));
-    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".dynstr",
-                                                    dynsym_builder_.GetStrTab()->GetSection()->sh_offset,
-                                                    dynstr_.c_str(), dynstr_.size()));
-    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".hash", hash_builder_.GetSection()->sh_offset,
-                                                      hash_.data(),
-                                                      hash_.size() * sizeof(Elf_Word)));
-    pieces.push_back(new ElfFileRodataPiece<Elf_Word>(rodata_builder_.GetSection()->sh_offset,
-                                                      oat_writer_));
-    pieces.push_back(new ElfFileOatTextPiece<Elf_Word>(text_builder_.GetSection()->sh_offset,
-                                                       oat_writer_));
-    if (IncludingDebugSymbols()) {
-      pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".symtab",
-                                                        symtab_builder_.GetSection()->sh_offset,
-                                                        symtab.data(),
-                                                        symtab.size() * sizeof(Elf_Sym)));
-      pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".strtab",
-                                                    symtab_builder_.GetStrTab()->GetSection()->sh_offset,
-                                                    strtab.c_str(), strtab.size()));
-    }
-    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".shstrtab",
-                                                      shstrtab_builder_.GetSection()->sh_offset,
-                                                      &shstrtab_[0], shstrtab_.size()));
-    for (uint32_t i = 0; i < section_ptrs_.size(); ++i) {
-      // Just add all the sections in induvidually since they are all over the
-      // place on the heap/stack.
-      Elf_Word cur_off = sections_offset + i * sizeof(Elf_Shdr);
-      pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("section table piece", cur_off,
-                                                        section_ptrs_[i], sizeof(Elf_Shdr)));
-    }
-
-    // Postponed debug info.
-    for (auto* it : other_builders_) {
-      pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(it->GetName(), it->GetSection()->sh_offset,
-                                                        it->GetBuffer()->data(),
-                                                        it->GetBuffer()->size()));
-    }
-
-    if (!WriteOutFile(pieces)) {
-      LOG(ERROR) << "Unable to write to file " << elf_file_->GetPath();
-
-      STLDeleteElements(&pieces);  // Have to manually clean pieces.
+    // Write all headers and section content to the file.
+    // Depending on the implementations of Section::Write, this
+    // might be just memory copies or some more elaborate operations.
+    if (!WriteArray(elf_file, &elf_header, 1)) {
+      LOG(INFO) << "Failed to write the ELF header";
       return false;
     }
-
-    STLDeleteElements(&pieces);  // Have to manually clean pieces.
+    if (!WriteArray(elf_file, program_headers.data(), program_headers.size())) {
+      LOG(INFO) << "Failed to write the program headers";
+      return false;
+    }
+    for (Section* section : sections) {
+      const Elf_Shdr* header = section->GetHeader();
+      if (header->sh_type != SHT_NOBITS) {
+        if (!SeekTo(elf_file, header->sh_offset) || !section->Write(elf_file)) {
+          LOG(INFO) << "Failed to write section " << section->GetName();
+          return false;
+        }
+        Elf_Word current_offset = lseek(elf_file->Fd(), 0, SEEK_CUR);
+        CHECK_EQ(current_offset, header->sh_offset + header->sh_size)
+          << "The number of bytes written does not match GetSize()";
+      }
+    }
+    if (!SeekTo(elf_file, section_headers_offset) ||
+        !WriteArray(elf_file, section_headers.data(), section_headers.size())) {
+      LOG(INFO) << "Failed to write the section headers";
+      return false;
+    }
     return true;
   }
 
-  // Adds the given raw section to the builder.  It does not take ownership.
-  void RegisterRawSection(ElfRawSectionBuilder<ElfTypes>* bld) {
-    other_builders_.push_back(bld);
+  // Adds the given section to the builder.  It does not take ownership.
+  void RegisterSection(Section* section) {
+    other_sections_.push_back(section);
   }
 
-  const ElfRawSectionBuilder<ElfTypes>* FindRawSection(const char* name) {
-    for (const auto* other_builder : other_builders_) {
-      if (other_builder->GetName() == name) {
-        return other_builder;
+  const Section* FindSection(const char* name) {
+    for (const auto* section : other_sections_) {
+      if (section->GetName() == name) {
+        return section;
       }
     }
     return nullptr;
   }
 
  private:
-  void SetISA(InstructionSet isa) {
+  static bool SeekTo(File* elf_file, Elf_Word offset) {
+    DCHECK_LE(lseek(elf_file->Fd(), 0, SEEK_CUR), static_cast<off_t>(offset))
+      << "Seeking backwards";
+    if (static_cast<off_t>(offset) != lseek(elf_file->Fd(), offset, SEEK_SET)) {
+      PLOG(ERROR) << "Failed to seek in file " << elf_file->GetPath();
+      return false;
+    }
+    return true;
+  }
+
+  template<typename T>
+  static bool WriteArray(File* elf_file, const T* data, size_t count) {
+    DCHECK(data != nullptr);
+    if (!elf_file->WriteFully(data, count * sizeof(T))) {
+      PLOG(ERROR) << "Failed to write to file " << elf_file->GetPath();
+      return false;
+    }
+    return true;
+  }
+
+  // Helper - create segment header based on memory range.
+  static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags,
+                                    Elf_Off offset, Elf_Word size, Elf_Word align) {
+    Elf_Phdr phdr = Elf_Phdr();
+    phdr.p_type    = type;
+    phdr.p_flags   = flags;
+    phdr.p_offset  = offset;
+    phdr.p_vaddr   = offset;
+    phdr.p_paddr   = offset;
+    phdr.p_filesz  = size;
+    phdr.p_memsz   = size;
+    phdr.p_align   = align;
+    return phdr;
+  }
+
+  // Helper - create segment header based on section header.
+  static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags,
+                                    const Section& section) {
+    const Elf_Shdr* shdr = section.GetHeader();
+    // Only run-time allocated sections should be in segment headers.
+    CHECK_NE(shdr->sh_flags & SHF_ALLOC, 0u);
+    Elf_Phdr phdr = Elf_Phdr();
+    phdr.p_type   = type;
+    phdr.p_flags  = flags;
+    phdr.p_offset = shdr->sh_offset;
+    phdr.p_vaddr  = shdr->sh_addr;
+    phdr.p_paddr  = shdr->sh_addr;
+    phdr.p_filesz = shdr->sh_type != SHT_NOBITS ? shdr->sh_size : 0u;
+    phdr.p_memsz  = shdr->sh_size;
+    phdr.p_align  = shdr->sh_addralign;
+    return phdr;
+  }
+
+  static Elf_Ehdr MakeElfHeader(InstructionSet isa) {
+    Elf_Ehdr elf_header = Elf_Ehdr();
     switch (isa) {
       case kArm:
         // Fall through.
       case kThumb2: {
-        elf_header_.e_machine = EM_ARM;
-        elf_header_.e_flags = EF_ARM_EABI_VER5;
+        elf_header.e_machine = EM_ARM;
+        elf_header.e_flags = EF_ARM_EABI_VER5;
         break;
       }
       case kArm64: {
-        elf_header_.e_machine = EM_AARCH64;
-        elf_header_.e_flags = 0;
+        elf_header.e_machine = EM_AARCH64;
+        elf_header.e_flags = 0;
         break;
       }
       case kX86: {
-        elf_header_.e_machine = EM_386;
-        elf_header_.e_flags = 0;
+        elf_header.e_machine = EM_386;
+        elf_header.e_flags = 0;
         break;
       }
       case kX86_64: {
-        elf_header_.e_machine = EM_X86_64;
-        elf_header_.e_flags = 0;
+        elf_header.e_machine = EM_X86_64;
+        elf_header.e_flags = 0;
         break;
       }
       case kMips: {
-        elf_header_.e_machine = EM_MIPS;
-        elf_header_.e_flags = (EF_MIPS_NOREORDER |
+        elf_header.e_machine = EM_MIPS;
+        elf_header.e_flags = (EF_MIPS_NOREORDER |
                                EF_MIPS_PIC       |
                                EF_MIPS_CPIC      |
                                EF_MIPS_ABI_O32   |
@@ -1250,147 +852,82 @@
         break;
       }
       case kMips64: {
-        elf_header_.e_machine = EM_MIPS;
-        elf_header_.e_flags = (EF_MIPS_NOREORDER |
+        elf_header.e_machine = EM_MIPS;
+        elf_header.e_flags = (EF_MIPS_NOREORDER |
                                EF_MIPS_PIC       |
                                EF_MIPS_CPIC      |
                                EF_MIPS_ARCH_64R6);
         break;
       }
-      default: {
-        fatal_error_ = true;
-        LOG(FATAL) << "Unknown instruction set: " << isa;
-        break;
+      case kNone: {
+        LOG(FATAL) << "No instruction set";
       }
     }
-  }
 
-  void SetupEhdr() {
-    memset(&elf_header_, 0, sizeof(elf_header_));
-    elf_header_.e_ident[EI_MAG0]       = ELFMAG0;
-    elf_header_.e_ident[EI_MAG1]       = ELFMAG1;
-    elf_header_.e_ident[EI_MAG2]       = ELFMAG2;
-    elf_header_.e_ident[EI_MAG3]       = ELFMAG3;
-    elf_header_.e_ident[EI_CLASS]      = (sizeof(Elf_Addr) == sizeof(Elf32_Addr))
+    elf_header.e_ident[EI_MAG0]       = ELFMAG0;
+    elf_header.e_ident[EI_MAG1]       = ELFMAG1;
+    elf_header.e_ident[EI_MAG2]       = ELFMAG2;
+    elf_header.e_ident[EI_MAG3]       = ELFMAG3;
+    elf_header.e_ident[EI_CLASS]      = (sizeof(Elf_Addr) == sizeof(Elf32_Addr))
                                          ? ELFCLASS32 : ELFCLASS64;;
-    elf_header_.e_ident[EI_DATA]       = ELFDATA2LSB;
-    elf_header_.e_ident[EI_VERSION]    = EV_CURRENT;
-    elf_header_.e_ident[EI_OSABI]      = ELFOSABI_LINUX;
-    elf_header_.e_ident[EI_ABIVERSION] = 0;
-    elf_header_.e_type = ET_DYN;
-    elf_header_.e_version = 1;
-    elf_header_.e_entry = 0;
-    elf_header_.e_ehsize = sizeof(Elf_Ehdr);
-    elf_header_.e_phentsize = sizeof(Elf_Phdr);
-    elf_header_.e_shentsize = sizeof(Elf_Shdr);
-    elf_header_.e_phoff = sizeof(Elf_Ehdr);
+    elf_header.e_ident[EI_DATA]       = ELFDATA2LSB;
+    elf_header.e_ident[EI_VERSION]    = EV_CURRENT;
+    elf_header.e_ident[EI_OSABI]      = ELFOSABI_LINUX;
+    elf_header.e_ident[EI_ABIVERSION] = 0;
+    elf_header.e_type = ET_DYN;
+    elf_header.e_version = 1;
+    elf_header.e_entry = 0;
+    elf_header.e_ehsize = sizeof(Elf_Ehdr);
+    elf_header.e_phentsize = sizeof(Elf_Phdr);
+    elf_header.e_shentsize = sizeof(Elf_Shdr);
+    elf_header.e_phoff = sizeof(Elf_Ehdr);
+    return elf_header;
   }
 
-  // Sets up a bunch of the required Dynamic Section entries.
-  // Namely it will initialize all the mandatory ones that it can.
-  // Specifically:
-  // DT_HASH
-  // DT_STRTAB
-  // DT_SYMTAB
-  // DT_SYMENT
-  //
-  // Some such as DT_SONAME, DT_STRSZ and DT_NULL will be put in later.
-  void SetupDynamic() {
-    dynamic_builder_.AddDynamicTag(DT_HASH, 0, &hash_builder_);
-    dynamic_builder_.AddDynamicTag(DT_STRTAB, 0, dynsym_builder_.GetStrTab());
-    dynamic_builder_.AddDynamicTag(DT_SYMTAB, 0, &dynsym_builder_);
-    dynamic_builder_.AddDynamicTag(DT_SYMENT, sizeof(Elf_Sym));
+  void BuildDynamicSection(const std::string& elf_file_path) {
+    std::string soname(elf_file_path);
+    size_t directory_separator_pos = soname.rfind('/');
+    if (directory_separator_pos != std::string::npos) {
+      soname = soname.substr(directory_separator_pos + 1);
+    }
+    // NB: We must add the name before adding DT_STRSZ.
+    Elf_Word soname_offset = dynstr_.AddName(soname);
+
+    dynamic_.AddDynamicTag(DT_HASH, 0, &hash_);
+    dynamic_.AddDynamicTag(DT_STRTAB, 0, &dynstr_);
+    dynamic_.AddDynamicTag(DT_SYMTAB, 0, &dynsym_);
+    dynamic_.AddDynamicTag(DT_SYMENT, sizeof(Elf_Sym), nullptr);
+    dynamic_.AddDynamicTag(DT_STRSZ, dynstr_.GetSize(), nullptr);
+    dynamic_.AddDynamicTag(DT_SONAME, soname_offset, nullptr);
   }
 
-  // Sets up the basic dynamic symbols that are needed, namely all those we
-  // can know already.
-  //
-  // Specifically adds:
-  // oatdata
-  // oatexec
-  // oatlastword
-  void SetupRequiredSymbols() {
-    dynsym_builder_.AddSymbol("oatdata", &rodata_builder_, 0, true,
-                              rodata_builder_.GetSize(), STB_GLOBAL, STT_OBJECT);
-    dynsym_builder_.AddSymbol("oatexec", &text_builder_, 0, true,
-                              text_builder_.GetSize(), STB_GLOBAL, STT_OBJECT);
-    dynsym_builder_.AddSymbol("oatlastword", &text_builder_, text_builder_.GetSize() - 4,
-                              true, 4, STB_GLOBAL, STT_OBJECT);
-    if (bss_builder_.GetSize() != 0u) {
-      dynsym_builder_.AddSymbol("oatbss", &bss_builder_, 0, true,
-                                bss_builder_.GetSize(), STB_GLOBAL, STT_OBJECT);
-      dynsym_builder_.AddSymbol("oatbsslastword", &bss_builder_, bss_builder_.GetSize() - 4,
-                                true, 4, STB_GLOBAL, STT_OBJECT);
+  void BuildDynsymSection() {
+    dynsym_.AddSymbol("oatdata", &rodata_, 0, true,
+                      rodata_.GetSize(), STB_GLOBAL, STT_OBJECT);
+    dynsym_.AddSymbol("oatexec", &text_, 0, true,
+                      text_.GetSize(), STB_GLOBAL, STT_OBJECT);
+    dynsym_.AddSymbol("oatlastword", &text_, text_.GetSize() - 4,
+                      true, 4, STB_GLOBAL, STT_OBJECT);
+    if (bss_.GetSize() != 0u) {
+      dynsym_.AddSymbol("oatbss", &bss_, 0, true,
+                        bss_.GetSize(), STB_GLOBAL, STT_OBJECT);
+      dynsym_.AddSymbol("oatbsslastword", &bss_, bss_.GetSize() - 4,
+                        true, 4, STB_GLOBAL, STT_OBJECT);
     }
   }
 
-  void AssignSectionStr(ElfSectionBuilder<ElfTypes>* builder, std::string* strtab) {
-    builder->GetSection()->sh_name = strtab->size();
-    *strtab += builder->GetName();
-    *strtab += '\0';
-    if (debug_logging_) {
-      LOG(INFO) << "adding section name \"" << builder->GetName() << "\" "
-                << "to shstrtab at offset " << builder->GetSection()->sh_name;
-    }
-  }
-
-
-  // Write each of the pieces out to the file.
-  bool WriteOutFile(const std::vector<ElfFilePiece<Elf_Word>*>& pieces) {
-    for (auto it = pieces.begin(); it != pieces.end(); ++it) {
-      if (!(*it)->Write(elf_file_)) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  bool IncludingDebugSymbols() const {
-    return add_symbols_ && symtab_builder_.GetSize() > 1;
-  }
-
-  CodeOutput* const oat_writer_;
-  File* const elf_file_;
-  const bool add_symbols_;
-  const bool debug_logging_;
-
-  bool fatal_error_ = false;
-
-  // What phdr is.
-  static const uint32_t PHDR_OFFSET = sizeof(Elf_Ehdr);
-  enum : uint8_t {
-    PH_PHDR             = 0,
-    PH_LOAD_R__         = 1,
-    PH_LOAD_R_X         = 2,
-    PH_LOAD_RW_BSS      = 3,
-    PH_LOAD_RW_DYNAMIC  = 4,
-    PH_DYNAMIC          = 5,
-    PH_EH_FRAME_HDR     = 6,
-    PH_NUM              = 7,
-  };
-  static const uint32_t PHDR_SIZE = sizeof(Elf_Phdr) * PH_NUM;
-  Elf_Phdr program_headers_[PH_NUM];
-
-  Elf_Ehdr elf_header_;
-
-  Elf_Shdr null_hdr_;
-  std::string shstrtab_;
-  // The index of the current section being built. The first being 1.
-  uint32_t section_index_;
-  std::string dynstr_;
-  uint32_t dynstr_soname_offset_;
-  std::vector<const Elf_Shdr*> section_ptrs_;
-  std::vector<Elf_Word> hash_;
-
-  ElfOatSectionBuilder<ElfTypes> text_builder_;
-  ElfOatSectionBuilder<ElfTypes> rodata_builder_;
-  ElfOatSectionBuilder<ElfTypes> bss_builder_;
-  ElfSymtabBuilder<ElfTypes> dynsym_builder_;
-  ElfSymtabBuilder<ElfTypes> symtab_builder_;
-  ElfSectionBuilder<ElfTypes> hash_builder_;
-  ElfDynamicBuilder<ElfTypes> dynamic_builder_;
-  ElfSectionBuilder<ElfTypes> shstrtab_builder_;
-  std::vector<ElfRawSectionBuilder<ElfTypes>*> other_builders_;
+  InstructionSet isa_;
+  StrtabSection dynstr_;
+  SymtabSection dynsym_;
+  HashSection hash_;
+  OatSection rodata_;
+  OatSection text_;
+  NoBitsSection bss_;
+  DynamicSection dynamic_;
+  StrtabSection strtab_;
+  SymtabSection symtab_;
+  std::vector<Section*> other_sections_;
+  StrtabSection shstrtab_;
 
   DISALLOW_COPY_AND_ASSIGN(ElfBuilder);
 };
diff --git a/compiler/elf_writer.cc b/compiler/elf_writer.cc
index 47402f3..f75638d 100644
--- a/compiler/elf_writer.cc
+++ b/compiler/elf_writer.cc
@@ -39,16 +39,17 @@
 }
 
 void ElfWriter::GetOatElfInformation(File* file,
-                                     size_t& oat_loaded_size,
-                                     size_t& oat_data_offset) {
+                                     size_t* oat_loaded_size,
+                                     size_t* oat_data_offset) {
   std::string error_msg;
   std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file, false, false, &error_msg));
   CHECK(elf_file.get() != nullptr) << error_msg;
 
-  oat_loaded_size = elf_file->GetLoadedSize();
-  CHECK_NE(0U, oat_loaded_size);
-  oat_data_offset = GetOatDataAddress(elf_file.get());
-  CHECK_NE(0U, oat_data_offset);
+  bool success = elf_file->GetLoadedSize(oat_loaded_size, &error_msg);
+  CHECK(success) << error_msg;
+  CHECK_NE(0U, *oat_loaded_size);
+  *oat_data_offset = GetOatDataAddress(elf_file.get());
+  CHECK_NE(0U, *oat_data_offset);
 }
 
 bool ElfWriter::Fixup(File* file, uintptr_t oat_data_begin) {
diff --git a/compiler/elf_writer.h b/compiler/elf_writer.h
index 033c1f8..8e13b51 100644
--- a/compiler/elf_writer.h
+++ b/compiler/elf_writer.h
@@ -38,8 +38,8 @@
   // Looks up information about location of oat file in elf file container.
   // Used for ImageWriter to perform memory layout.
   static void GetOatElfInformation(File* file,
-                                   size_t& oat_loaded_size,
-                                   size_t& oat_data_offset);
+                                   size_t* oat_loaded_size,
+                                   size_t* oat_data_offset);
 
   // Returns runtime oat_data runtime address for an opened ElfFile.
   static uintptr_t GetOatDataAddress(ElfFile* elf_file);
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index 28e6999..5e9cf76 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -18,6 +18,7 @@
 
 #include <unordered_set>
 
+#include "base/casts.h"
 #include "compiled_method.h"
 #include "driver/compiler_driver.h"
 #include "dex_file-inl.h"
@@ -162,33 +163,54 @@
                   ExceptionHeaderValueApplication address_type,
                   std::vector<uint8_t>* eh_frame,
                   std::vector<uintptr_t>* eh_frame_patches,
-                  std::vector<uint8_t>* eh_frame_hdr) {
+                  std::vector<uint8_t>* eh_frame_hdr,
+                  std::vector<uintptr_t>* eh_frame_hdr_patches) {
   const auto& method_infos = oat_writer->GetMethodDebugInfo();
   const InstructionSet isa = compiler->GetInstructionSet();
 
   // Write .eh_frame section.
+  std::map<uint32_t, size_t> address_to_fde_offset_map;
   size_t cie_offset = eh_frame->size();
   WriteEhFrameCIE(isa, address_type, eh_frame);
   for (const OatWriter::DebugInfo& mi : method_infos) {
-    const SwapVector<uint8_t>* opcodes = mi.compiled_method_->GetCFIInfo();
-    if (opcodes != nullptr) {
-      WriteEhFrameFDE(Is64BitInstructionSet(isa), cie_offset,
-                      mi.low_pc_, mi.high_pc_ - mi.low_pc_,
-                      opcodes, eh_frame, eh_frame_patches);
+    if (!mi.deduped_) {  // Only one FDE per unique address.
+      const SwapVector<uint8_t>* opcodes = mi.compiled_method_->GetCFIInfo();
+      if (opcodes != nullptr) {
+        address_to_fde_offset_map.emplace(mi.low_pc_, eh_frame->size());
+        WriteEhFrameFDE(Is64BitInstructionSet(isa), cie_offset,
+                        mi.low_pc_, mi.high_pc_ - mi.low_pc_,
+                        opcodes, eh_frame, eh_frame_patches);
+      }
     }
   }
 
   // Write .eh_frame_hdr section.
   Writer<> header(eh_frame_hdr);
   header.PushUint8(1);  // Version.
-  header.PushUint8(DW_EH_PE_pcrel | DW_EH_PE_sdata4);  // Encoding of .eh_frame pointer.
-  header.PushUint8(DW_EH_PE_omit);  // Encoding of binary search table size.
-  header.PushUint8(DW_EH_PE_omit);  // Encoding of binary search table addresses.
-  // .eh_frame pointer - .eh_frame_hdr section is after .eh_frame section, and need to encode
-  // relative to this location as libunwind doesn't honor datarel for eh_frame_hdr correctly.
-  header.PushInt32(-static_cast<int32_t>(eh_frame->size() + 4U));
-  // Omit binary search table size (number of entries).
-  // Omit binary search table.
+  // Encoding of .eh_frame pointer - libunwind does not honor datarel here,
+  // so we have to use pcrel which means relative to the pointer's location.
+  header.PushUint8(DW_EH_PE_pcrel | DW_EH_PE_sdata4);
+  // Encoding of binary search table size.
+  header.PushUint8(DW_EH_PE_udata4);
+  // Encoding of binary search table addresses - libunwind supports only this
+  // specific combination, which means relative to the start of .eh_frame_hdr.
+  header.PushUint8(DW_EH_PE_datarel | DW_EH_PE_sdata4);
+  // .eh_frame pointer - .eh_frame_hdr section is after .eh_frame section
+  const int32_t relative_eh_frame_begin = -static_cast<int32_t>(eh_frame->size());
+  header.PushInt32(relative_eh_frame_begin - 4U);
+  // Binary search table size (number of entries).
+  header.PushUint32(dchecked_integral_cast<uint32_t>(address_to_fde_offset_map.size()));
+  // Binary search table.
+  for (const auto& address_to_fde_offset : address_to_fde_offset_map) {
+    u_int32_t code_address = address_to_fde_offset.first;
+    int32_t fde_address = dchecked_integral_cast<int32_t>(address_to_fde_offset.second);
+    eh_frame_hdr_patches->push_back(header.data()->size());
+    header.PushUint32(code_address);
+    // We know the exact layout (eh_frame is immediately before eh_frame_hdr)
+    // and the data is relative to the start of the eh_frame_hdr,
+    // so patching isn't necessary (in contrast to the code address above).
+    header.PushInt32(relative_eh_frame_begin + fde_address);
+  }
 }
 
 /*
diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h
index 5bf4841..28d0e2c 100644
--- a/compiler/elf_writer_debug.h
+++ b/compiler/elf_writer_debug.h
@@ -30,7 +30,8 @@
                   ExceptionHeaderValueApplication address_type,
                   std::vector<uint8_t>* eh_frame,
                   std::vector<uintptr_t>* eh_frame_patches,
-                  std::vector<uint8_t>* eh_frame_hdr);
+                  std::vector<uint8_t>* eh_frame_hdr,
+                  std::vector<uintptr_t>* eh_frame_hdr_patches);
 
 void WriteDebugSections(const CompilerDriver* compiler,
                         const OatWriter* oat_writer,
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 3b2ca94..79f9955 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -21,7 +21,6 @@
 
 #include "base/logging.h"
 #include "base/unix_file/fd_file.h"
-#include "buffered_output_stream.h"
 #include "compiled_method.h"
 #include "dex_file-inl.h"
 #include "driver/compiler_driver.h"
@@ -30,7 +29,6 @@
 #include "elf_file.h"
 #include "elf_utils.h"
 #include "elf_writer_debug.h"
-#include "file_output_stream.h"
 #include "globals.h"
 #include "leb128.h"
 #include "oat.h"
@@ -50,20 +48,6 @@
   return elf_writer.Write(oat_writer, dex_files, android_root, is_host);
 }
 
-class OatWriterWrapper FINAL : public CodeOutput {
- public:
-  explicit OatWriterWrapper(OatWriter* oat_writer) : oat_writer_(oat_writer) {}
-
-  void SetCodeOffset(size_t offset) {
-    oat_writer_->SetOatDataOffset(offset);
-  }
-  bool Write(OutputStream* out) OVERRIDE {
-    return oat_writer_->Write(out);
-  }
- private:
-  OatWriter* const oat_writer_;
-};
-
 template <typename ElfTypes>
 static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writer);
 
@@ -99,15 +83,56 @@
   buffer->push_back(0);  // End of sections.
 }
 
-template<typename AddressType, bool SubtractPatchLocation = false>
-static void PatchAddresses(const std::vector<uintptr_t>* patch_locations,
-                           AddressType delta, std::vector<uint8_t>* buffer) {
-  // Addresses in .debug_* sections are unaligned.
-  typedef __attribute__((__aligned__(1))) AddressType UnalignedAddressType;
-  if (patch_locations != nullptr) {
-    for (uintptr_t patch_location : *patch_locations) {
-      *reinterpret_cast<UnalignedAddressType*>(buffer->data() + patch_location) +=
-          delta - (SubtractPatchLocation ? patch_location : 0);
+class RodataWriter FINAL : public CodeOutput {
+ public:
+  explicit RodataWriter(OatWriter* oat_writer) : oat_writer_(oat_writer) {}
+
+  bool Write(OutputStream* out) OVERRIDE {
+    return oat_writer_->WriteRodata(out);
+  }
+
+ private:
+  OatWriter* oat_writer_;
+};
+
+class TextWriter FINAL : public CodeOutput {
+ public:
+  explicit TextWriter(OatWriter* oat_writer) : oat_writer_(oat_writer) {}
+
+  bool Write(OutputStream* out) OVERRIDE {
+    return oat_writer_->WriteCode(out);
+  }
+
+ private:
+  OatWriter* oat_writer_;
+};
+
+enum PatchResult {
+  kAbsoluteAddress,  // Absolute memory location.
+  kPointerRelativeAddress,  // Offset relative to the location of the pointer.
+  kSectionRelativeAddress,  // Offset relative to start of containing section.
+};
+
+// Patch memory addresses within a buffer.
+// It assumes that the unpatched addresses are offsets relative to base_address.
+// (which generally means method's low_pc relative to the start of .text)
+template <typename Elf_Addr, typename Address, PatchResult kPatchResult>
+static void Patch(const std::vector<uintptr_t>& patch_locations,
+                  Elf_Addr buffer_address, Elf_Addr base_address,
+                  std::vector<uint8_t>* buffer) {
+  for (uintptr_t location : patch_locations) {
+    typedef __attribute__((__aligned__(1))) Address UnalignedAddress;
+    auto* to_patch = reinterpret_cast<UnalignedAddress*>(buffer->data() + location);
+    switch (kPatchResult) {
+      case kAbsoluteAddress:
+        *to_patch = (base_address + *to_patch);
+        break;
+      case kPointerRelativeAddress:
+        *to_patch = (base_address + *to_patch) - (buffer_address + location);
+        break;
+      case kSectionRelativeAddress:
+        *to_patch = (base_address + *to_patch) - buffer_address;
+        break;
     }
   }
 }
@@ -118,106 +143,80 @@
     const std::vector<const DexFile*>& dex_files_unused ATTRIBUTE_UNUSED,
     const std::string& android_root_unused ATTRIBUTE_UNUSED,
     bool is_host_unused ATTRIBUTE_UNUSED) {
-  constexpr bool debug = false;
-  const OatHeader& oat_header = oat_writer->GetOatHeader();
-  typename ElfTypes::Word oat_data_size = oat_header.GetExecutableOffset();
-  uint32_t oat_exec_size = oat_writer->GetSize() - oat_data_size;
-  uint32_t oat_bss_size = oat_writer->GetBssSize();
+  using Elf_Addr = typename ElfTypes::Addr;
+  const InstructionSet isa = compiler_driver_->GetInstructionSet();
 
-  OatWriterWrapper wrapper(oat_writer);
-
+  // Setup the builder with the main OAT sections (.rodata .text .bss).
+  const size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset();
+  const size_t text_size = oat_writer->GetSize() - rodata_size;
+  const size_t bss_size = oat_writer->GetBssSize();
+  RodataWriter rodata_writer(oat_writer);
+  TextWriter text_writer(oat_writer);
   std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(
-      &wrapper,
-      elf_file_,
-      compiler_driver_->GetInstructionSet(),
-      0,
-      oat_data_size,
-      oat_data_size,
-      oat_exec_size,
-      RoundUp(oat_data_size + oat_exec_size, kPageSize),
-      oat_bss_size,
-      compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols(),
-      debug));
+      isa, rodata_size, &rodata_writer, text_size, &text_writer, bss_size));
 
-  InstructionSet isa = compiler_driver_->GetInstructionSet();
-  int alignment = GetInstructionSetPointerSize(isa);
-  typedef ElfRawSectionBuilder<ElfTypes> RawSection;
-  RawSection eh_frame(".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, alignment, 0);
-  RawSection eh_frame_hdr(".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0);
-  RawSection debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+  // Add debug sections.
+  // They are stack allocated here (in the same scope as the builder),
+  // but they are registred with the builder only if they are used.
+  using RawSection = typename ElfBuilder<ElfTypes>::RawSection;
+  const auto* text = builder->GetText();
+  const bool is64bit = Is64BitInstructionSet(isa);
+  RawSection eh_frame(".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0,
+                      is64bit ? Patch<Elf_Addr, uint64_t, kPointerRelativeAddress> :
+                                Patch<Elf_Addr, uint32_t, kPointerRelativeAddress>,
+                      text);
+  RawSection eh_frame_hdr(".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0,
+                          Patch<Elf_Addr, uint32_t, kSectionRelativeAddress>, text);
+  RawSection debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0,
+                        Patch<Elf_Addr, uint32_t, kAbsoluteAddress>, text);
   RawSection debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
   RawSection debug_str(".debug_str", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-  RawSection debug_line(".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+  RawSection debug_line(".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0,
+                        Patch<Elf_Addr, uint32_t, kAbsoluteAddress>, text);
+  if (!oat_writer->GetMethodDebugInfo().empty()) {
+    if (compiler_driver_->GetCompilerOptions().GetIncludeCFI()) {
+      dwarf::WriteEhFrame(
+          compiler_driver_, oat_writer, dwarf::DW_EH_PE_pcrel,
+          eh_frame.GetBuffer(), eh_frame.GetPatchLocations(),
+          eh_frame_hdr.GetBuffer(), eh_frame_hdr.GetPatchLocations());
+      builder->RegisterSection(&eh_frame);
+      builder->RegisterSection(&eh_frame_hdr);
+    }
+    if (compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) {
+      // Add methods to .symtab.
+      WriteDebugSymbols(builder.get(), oat_writer);
+      // Generate DWARF .debug_* sections.
+      dwarf::WriteDebugSections(
+          compiler_driver_, oat_writer,
+          debug_info.GetBuffer(), debug_info.GetPatchLocations(),
+          debug_abbrev.GetBuffer(),
+          debug_str.GetBuffer(),
+          debug_line.GetBuffer(), debug_line.GetPatchLocations());
+      builder->RegisterSection(&debug_info);
+      builder->RegisterSection(&debug_abbrev);
+      builder->RegisterSection(&debug_str);
+      builder->RegisterSection(&debug_line);
+      *oat_writer->GetAbsolutePatchLocationsFor(".debug_info") =
+          *debug_info.GetPatchLocations();
+      *oat_writer->GetAbsolutePatchLocationsFor(".debug_line") =
+          *debug_line.GetPatchLocations();
+    }
+  }
+
+  // Add relocation section.
   RawSection oat_patches(".oat_patches", SHT_OAT_PATCH, 0, nullptr, 0, 1, 0);
-
-  // Do not add to .oat_patches since we will make the addresses relative.
-  std::vector<uintptr_t> eh_frame_patches;
-  if (compiler_driver_->GetCompilerOptions().GetIncludeCFI() &&
-      !oat_writer->GetMethodDebugInfo().empty()) {
-    dwarf::WriteEhFrame(compiler_driver_, oat_writer,
-                        dwarf::DW_EH_PE_pcrel,
-                        eh_frame.GetBuffer(), &eh_frame_patches,
-                        eh_frame_hdr.GetBuffer());
-    builder->RegisterRawSection(&eh_frame);
-    builder->RegisterRawSection(&eh_frame_hdr);
-  }
-
-  // Must be done after .eh_frame is created since it is used in the Elf layout.
-  if (!builder->Init()) {
-    return false;
-  }
-
-  std::vector<uintptr_t>* debug_info_patches = nullptr;
-  std::vector<uintptr_t>* debug_line_patches = nullptr;
-  if (compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols() &&
-      !oat_writer->GetMethodDebugInfo().empty()) {
-    // Add methods to .symtab.
-    WriteDebugSymbols(builder.get(), oat_writer);
-    // Generate DWARF .debug_* sections.
-    debug_info_patches = oat_writer->GetAbsolutePatchLocationsFor(".debug_info");
-    debug_line_patches = oat_writer->GetAbsolutePatchLocationsFor(".debug_line");
-    dwarf::WriteDebugSections(compiler_driver_, oat_writer,
-                              debug_info.GetBuffer(), debug_info_patches,
-                              debug_abbrev.GetBuffer(),
-                              debug_str.GetBuffer(),
-                              debug_line.GetBuffer(), debug_line_patches);
-    builder->RegisterRawSection(&debug_info);
-    builder->RegisterRawSection(&debug_abbrev);
-    builder->RegisterRawSection(&debug_str);
-    builder->RegisterRawSection(&debug_line);
-  }
-
   if (compiler_driver_->GetCompilerOptions().GetIncludePatchInformation() ||
       // ElfWriter::Fixup will be called regardless and it needs to be able
       // to patch debug sections so we have to include patches for them.
       compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) {
     EncodeOatPatches(oat_writer->GetAbsolutePatchLocations(), oat_patches.GetBuffer());
-    builder->RegisterRawSection(&oat_patches);
+    builder->RegisterSection(&oat_patches);
   }
 
-  // We know where .text and .eh_frame will be located, so patch the addresses.
-  typename ElfTypes::Addr text_addr = builder->GetTextBuilder().GetSection()->sh_addr;
-  // TODO: Simplify once we use Elf64 - we can use ElfTypes::Addr instead of branching.
-  if (Is64BitInstructionSet(compiler_driver_->GetInstructionSet())) {
-    // relative_address = (text_addr + address) - (eh_frame_addr + patch_location);
-    PatchAddresses<uint64_t, true>(&eh_frame_patches,
-        text_addr - eh_frame.GetSection()->sh_addr, eh_frame.GetBuffer());
-    PatchAddresses<uint64_t>(debug_info_patches, text_addr, debug_info.GetBuffer());
-    PatchAddresses<uint64_t>(debug_line_patches, text_addr, debug_line.GetBuffer());
-  } else {
-    // relative_address = (text_addr + address) - (eh_frame_addr + patch_location);
-    PatchAddresses<uint32_t, true>(&eh_frame_patches,
-        text_addr - eh_frame.GetSection()->sh_addr, eh_frame.GetBuffer());
-    PatchAddresses<uint32_t>(debug_info_patches, text_addr, debug_info.GetBuffer());
-    PatchAddresses<uint32_t>(debug_line_patches, text_addr, debug_line.GetBuffer());
-  }
-
-  return builder->Write();
+  return builder->Write(elf_file_);
 }
 
 template <typename ElfTypes>
-// Do not inline to avoid Clang stack frame problems. b/18738594
-NO_INLINE
 static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writer) {
   const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetMethodDebugInfo();
 
@@ -230,8 +229,11 @@
     }
   }
 
-  ElfSymtabBuilder<ElfTypes>* symtab = builder->GetSymtabBuilder();
+  auto* symtab = builder->GetSymtab();
   for (auto it = method_info.begin(); it != method_info.end(); ++it) {
+    if (it->deduped_) {
+      continue;  // Add symbol only for the first instance.
+    }
     std::string name = PrettyMethod(it->dex_method_index_, *it->dex_file_, true);
     if (deduped_addresses.find(it->low_pc_) != deduped_addresses.end()) {
       name += " [DEDUPED]";
@@ -240,13 +242,13 @@
     uint32_t low_pc = it->low_pc_;
     // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
     low_pc += it->compiled_method_->CodeDelta();
-    symtab->AddSymbol(name, &builder->GetTextBuilder(), low_pc,
+    symtab->AddSymbol(name, builder->GetText(), low_pc,
                       true, it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC);
 
     // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
     // instructions, so that disassembler tools can correctly disassemble.
     if (it->compiled_method_->GetInstructionSet() == kThumb2) {
-      symtab->AddSymbol("$t", &builder->GetTextBuilder(), it->low_pc_ & ~1, true,
+      symtab->AddSymbol("$t", builder->GetText(), it->low_pc_ & ~1, true,
                         0, STB_LOCAL, STT_NOTYPE);
     }
   }
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index fc70d8f..195949b 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -89,7 +89,12 @@
     Thread::Current()->TransitionFromSuspendedToRunnable();
     PruneNonImageClasses();  // Remove junk
     ComputeLazyFieldsForImageClasses();  // Add useful information
-    ProcessStrings();
+
+    // Calling this can in theory fill in some resolved strings. However, in practice it seems to
+    // never resolve any.
+    if (kComputeEagerResolvedStrings) {
+      ComputeEagerResolvedStrings();
+    }
     Thread::Current()->TransitionFromRunnableToSuspended(kNative);
   }
   gc::Heap* heap = Runtime::Current()->GetHeap();
@@ -161,7 +166,7 @@
 
   size_t oat_loaded_size = 0;
   size_t oat_data_offset = 0;
-  ElfWriter::GetOatElfInformation(oat_file.get(), oat_loaded_size, oat_data_offset);
+  ElfWriter::GetOatElfInformation(oat_file.get(), &oat_loaded_size, &oat_data_offset);
 
   Thread::Current()->TransitionFromSuspendedToRunnable();
   CreateHeader(oat_loaded_size, oat_data_offset);
@@ -529,14 +534,6 @@
   return true;
 }
 
-// Count the number of strings in the heap and put the result in arg as a size_t pointer.
-static void CountStringsCallback(Object* obj, void* arg)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  if (obj->GetClass()->IsStringClass()) {
-    ++*reinterpret_cast<size_t*>(arg);
-  }
-}
-
 // Collect all the java.lang.String in the heap and put them in the output strings_ array.
 class StringCollector {
  public:
@@ -566,99 +563,19 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::String* lhs_s = lhs.AsMirrorPtr();
     mirror::String* rhs_s = rhs.AsMirrorPtr();
-    uint16_t* lhs_begin = lhs_s->GetCharArray()->GetData() + lhs_s->GetOffset();
-    uint16_t* rhs_begin = rhs_s->GetCharArray()->GetData() + rhs_s->GetOffset();
+    uint16_t* lhs_begin = lhs_s->GetValue();
+    uint16_t* rhs_begin = rhs_s->GetValue();
     return std::lexicographical_compare(lhs_begin, lhs_begin + lhs_s->GetLength(),
                                         rhs_begin, rhs_begin + rhs_s->GetLength());
   }
 };
 
-static bool IsPrefix(mirror::String* pref, mirror::String* full)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  if (pref->GetLength() > full->GetLength()) {
-    return false;
-  }
-  uint16_t* pref_begin = pref->GetCharArray()->GetData() + pref->GetOffset();
-  uint16_t* full_begin = full->GetCharArray()->GetData() + full->GetOffset();
-  return std::equal(pref_begin, pref_begin + pref->GetLength(), full_begin);
-}
-
-void ImageWriter::ProcessStrings() {
-  size_t total_strings = 0;
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  ClassLinker* cl = Runtime::Current()->GetClassLinker();
-  // Count the strings.
-  heap->VisitObjects(CountStringsCallback, &total_strings);
-  Thread* self = Thread::Current();
-  StackHandleScope<1> hs(self);
-  auto strings = hs.NewHandle(cl->AllocStringArray(self, total_strings));
-  StringCollector string_collector(strings, 0U);
-  // Read strings into the array.
-  heap->VisitObjects(StringCollector::Callback, &string_collector);
-  // Some strings could have gotten freed if AllocStringArray caused a GC.
-  CHECK_LE(string_collector.GetIndex(), total_strings);
-  total_strings = string_collector.GetIndex();
-  auto* strings_begin = reinterpret_cast<mirror::HeapReference<mirror::String>*>(
-          strings->GetRawData(sizeof(mirror::HeapReference<mirror::String>), 0));
-  std::sort(strings_begin, strings_begin + total_strings, LexicographicalStringComparator());
-  // Characters of strings which are non equal prefix of another string (not the same string).
-  // We don't count the savings from equal strings since these would get interned later anyways.
-  size_t prefix_saved_chars = 0;
-  // Count characters needed for the strings.
-  size_t num_chars = 0u;
-  mirror::String* prev_s = nullptr;
-  for (size_t idx = 0; idx != total_strings; ++idx) {
-    mirror::String* s = strings->GetWithoutChecks(idx);
-    size_t length = s->GetLength();
-    num_chars += length;
-    if (prev_s != nullptr && IsPrefix(prev_s, s)) {
-      size_t prev_length = prev_s->GetLength();
-      num_chars -= prev_length;
-      if (prev_length != length) {
-        prefix_saved_chars += prev_length;
-      }
-    }
-    prev_s = s;
-  }
-  // Create character array, copy characters and point the strings there.
-  mirror::CharArray* array = mirror::CharArray::Alloc(self, num_chars);
-  string_data_array_ = array;
-  uint16_t* array_data = array->GetData();
-  size_t pos = 0u;
-  prev_s = nullptr;
-  for (size_t idx = 0; idx != total_strings; ++idx) {
-    mirror::String* s = strings->GetWithoutChecks(idx);
-    uint16_t* s_data = s->GetCharArray()->GetData() + s->GetOffset();
-    int32_t s_length = s->GetLength();
-    int32_t prefix_length = 0u;
-    if (idx != 0u && IsPrefix(prev_s, s)) {
-      prefix_length = prev_s->GetLength();
-    }
-    memcpy(array_data + pos, s_data + prefix_length, (s_length - prefix_length) * sizeof(*s_data));
-    s->SetOffset(pos - prefix_length);
-    s->SetArray(array);
-    pos += s_length - prefix_length;
-    prev_s = s;
-  }
-  CHECK_EQ(pos, num_chars);
-
-  if (kIsDebugBuild || VLOG_IS_ON(compiler)) {
-    LOG(INFO) << "Total # image strings=" << total_strings << " combined length="
-        << num_chars << " prefix saved chars=" << prefix_saved_chars;
-  }
-  // Calling this can in theory fill in some resolved strings. However, in practice it seems to
-  // never resolve any.
-  if (kComputeEagerResolvedStrings) {
-    ComputeEagerResolvedStrings();
-  }
-}
-
 void ImageWriter::ComputeEagerResolvedStringsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED) {
   if (!obj->GetClass()->IsStringClass()) {
     return;
   }
   mirror::String* string = obj->AsString();
-  const uint16_t* utf16_string = string->GetCharArray()->GetData() + string->GetOffset();
+  const uint16_t* utf16_string = string->GetValue();
   size_t utf16_length = static_cast<size_t>(string->GetLength());
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ReaderMutexLock mu(Thread::Current(), *class_linker->DexLock());
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index a2d99ee..c0cffa5 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -220,9 +220,6 @@
   static void ComputeEagerResolvedStringsCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Combine string char arrays.
-  void ProcessStrings() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Remove unwanted classes from various roots.
   void PruneNonImageClasses() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static bool NonImageClassesVisitor(mirror::Class* c, void* arg)
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 6a08548..7c400ee 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -62,7 +62,7 @@
 
 JitCompiler::JitCompiler() : total_time_(0) {
   auto* pass_manager_options = new PassManagerOptions;
-  pass_manager_options->SetDisablePassList("GVN,DCE");
+  pass_manager_options->SetDisablePassList("GVN,DCE,GVNCleanup");
   compiler_options_.reset(new CompilerOptions(
       CompilerOptions::kDefaultCompilerFilter,
       CompilerOptions::kDefaultHugeMethodThreshold,
diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h
index 0876499..d9a5ac6 100644
--- a/compiler/jit/jit_compiler.h
+++ b/compiler/jit/jit_compiler.h
@@ -67,10 +67,11 @@
       const uint8_t* mapping_table, const uint8_t* vmap_table, const uint8_t* gc_map);
   bool MakeExecutable(CompiledMethod* compiled_method, mirror::ArtMethod* method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  DISALLOW_COPY_AND_ASSIGN(JitCompiler);
 };
 
 }  // namespace jit
-
 }  // namespace art
 
 #endif  // ART_COMPILER_JIT_JIT_COMPILER_H_
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 6f2cb25..a06303d 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -138,7 +138,8 @@
     FrameOffset handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
     // Check handle scope offset is within frame
     CHECK_LT(handle_scope_offset.Uint32Value(), frame_size);
-    // TODO: Insert the read barrier for this load.
+    // Note this LoadRef() already includes the heap poisoning negation.
+    // Note this LoadRef() does not include read barrier. It will be handled below.
     __ LoadRef(main_jni_conv->InterproceduralScratchRegister(),
                mr_conv->MethodRegister(), mirror::ArtMethod::DeclaringClassOffset());
     __ VerifyObject(main_jni_conv->InterproceduralScratchRegister(), false);
@@ -189,6 +190,49 @@
   size_t current_out_arg_size = main_out_arg_size;
   __ IncreaseFrameSize(main_out_arg_size);
 
+  // Call the read barrier for the declaring class loaded from the method for a static call.
+  // Note that we always have outgoing param space available for at least two params.
+  if (kUseReadBarrier && is_static) {
+    ThreadOffset<4> read_barrier32 = QUICK_ENTRYPOINT_OFFSET(4, pReadBarrierJni);
+    ThreadOffset<8> read_barrier64 = QUICK_ENTRYPOINT_OFFSET(8, pReadBarrierJni);
+    main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
+    main_jni_conv->Next();  // Skip JNIEnv.
+    FrameOffset class_handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
+    main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
+    // Pass the handle for the class as the first argument.
+    if (main_jni_conv->IsCurrentParamOnStack()) {
+      FrameOffset out_off = main_jni_conv->CurrentParamStackOffset();
+      __ CreateHandleScopeEntry(out_off, class_handle_scope_offset,
+                         mr_conv->InterproceduralScratchRegister(),
+                         false);
+    } else {
+      ManagedRegister out_reg = main_jni_conv->CurrentParamRegister();
+      __ CreateHandleScopeEntry(out_reg, class_handle_scope_offset,
+                         ManagedRegister::NoRegister(), false);
+    }
+    main_jni_conv->Next();
+    // Pass the current thread as the second argument and call.
+    if (main_jni_conv->IsCurrentParamInRegister()) {
+      __ GetCurrentThread(main_jni_conv->CurrentParamRegister());
+      if (is_64_bit_target) {
+        __ Call(main_jni_conv->CurrentParamRegister(), Offset(read_barrier64),
+                main_jni_conv->InterproceduralScratchRegister());
+      } else {
+        __ Call(main_jni_conv->CurrentParamRegister(), Offset(read_barrier32),
+                main_jni_conv->InterproceduralScratchRegister());
+      }
+    } else {
+      __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset(),
+                          main_jni_conv->InterproceduralScratchRegister());
+      if (is_64_bit_target) {
+        __ CallFromThread64(read_barrier64, main_jni_conv->InterproceduralScratchRegister());
+      } else {
+        __ CallFromThread32(read_barrier32, main_jni_conv->InterproceduralScratchRegister());
+      }
+    }
+    main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));  // Reset.
+  }
+
   // 6. Call into appropriate JniMethodStart passing Thread* so that transition out of Runnable
   //    can occur. The result is the saved JNI local state that is restored by the exit call. We
   //    abuse the JNI calling convention here, that is guaranteed to support passing 2 pointer
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 925b507..a871a82 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -176,7 +176,7 @@
   EXPECT_EQ(72U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
   EXPECT_EQ(28U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(92 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+  EXPECT_EQ(112 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index d2d38da..15b4017 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -1112,13 +1112,14 @@
   return offset;
 }
 
-bool OatWriter::Write(OutputStream* out) {
+bool OatWriter::WriteRodata(OutputStream* out) {
   const off_t raw_file_offset = out->Seek(0, kSeekCurrent);
   if (raw_file_offset == (off_t) -1) {
     LOG(ERROR) << "Failed to get file offset in " << out->GetLocation();
     return false;
   }
   const size_t file_offset = static_cast<size_t>(raw_file_offset);
+  oat_data_offset_ = file_offset;
 
   // Reserve space for header. It will be written last - after updating the checksum.
   size_t header_size = oat_header_->GetHeaderSize();
@@ -1146,6 +1147,27 @@
     return false;
   }
 
+  // Write padding.
+  off_t new_offset = out->Seek(size_executable_offset_alignment_, kSeekCurrent);
+  relative_offset += size_executable_offset_alignment_;
+  DCHECK_EQ(relative_offset, oat_header_->GetExecutableOffset());
+  size_t expected_file_offset = file_offset + relative_offset;
+  if (static_cast<uint32_t>(new_offset) != expected_file_offset) {
+    PLOG(ERROR) << "Failed to seek to oat code section. Actual: " << new_offset
+                << " Expected: " << expected_file_offset << " File: " << out->GetLocation();
+    return 0;
+  }
+  DCHECK_OFFSET();
+
+  return true;
+}
+
+bool OatWriter::WriteCode(OutputStream* out) {
+  size_t header_size = oat_header_->GetHeaderSize();
+  const size_t file_offset = oat_data_offset_;
+  size_t relative_offset = oat_header_->GetExecutableOffset();
+  DCHECK_OFFSET();
+
   relative_offset = WriteCode(out, file_offset, relative_offset);
   if (relative_offset == 0) {
     LOG(ERROR) << "Failed to write oat code to " << out->GetLocation();
@@ -1215,7 +1237,7 @@
     PLOG(ERROR) << "Failed to seek to oat header position in " << out->GetLocation();
     return false;
   }
-  DCHECK_EQ(raw_file_offset, out->Seek(0, kSeekCurrent));
+  DCHECK_EQ(file_offset, static_cast<size_t>(out->Seek(0, kSeekCurrent)));
   if (!out->WriteFully(oat_header_, header_size)) {
     PLOG(ERROR) << "Failed to write oat header to " << out->GetLocation();
     return false;
@@ -1290,16 +1312,6 @@
 }
 
 size_t OatWriter::WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset) {
-  off_t new_offset = out->Seek(size_executable_offset_alignment_, kSeekCurrent);
-  relative_offset += size_executable_offset_alignment_;
-  DCHECK_EQ(relative_offset, oat_header_->GetExecutableOffset());
-  size_t expected_file_offset = file_offset + relative_offset;
-  if (static_cast<uint32_t>(new_offset) != expected_file_offset) {
-    PLOG(ERROR) << "Failed to seek to oat code section. Actual: " << new_offset
-                << " Expected: " << expected_file_offset << " File: " << out->GetLocation();
-    return 0;
-  }
-  DCHECK_OFFSET();
   if (compiler_driver_->IsImage()) {
     InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
 
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 8c79b44..6f1b4ec 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -118,11 +118,8 @@
     return it.first->second.get();
   }
 
-  void SetOatDataOffset(size_t oat_data_offset) {
-    oat_data_offset_ = oat_data_offset;
-  }
-
-  bool Write(OutputStream* out);
+  bool WriteRodata(OutputStream* out);
+  bool WriteCode(OutputStream* out);
 
   ~OatWriter();
 
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
index 6ebfb45..8100a29 100644
--- a/compiler/optimizing/boolean_simplifier.cc
+++ b/compiler/optimizing/boolean_simplifier.cc
@@ -18,6 +18,26 @@
 
 namespace art {
 
+void HBooleanSimplifier::TryRemovingNegatedCondition(HBasicBlock* block) {
+  DCHECK(block->EndsWithIf());
+
+  // Check if the condition is a Boolean negation.
+  HIf* if_instruction = block->GetLastInstruction()->AsIf();
+  HInstruction* boolean_not = if_instruction->InputAt(0);
+  if (!boolean_not->IsBooleanNot()) {
+    return;
+  }
+
+  // Make BooleanNot's input the condition of the If and swap branches.
+  if_instruction->ReplaceInput(boolean_not->InputAt(0), 0);
+  block->SwapSuccessors();
+
+  // Remove the BooleanNot if it is now unused.
+  if (!boolean_not->HasUses()) {
+    boolean_not->GetBlock()->RemoveInstruction(boolean_not);
+  }
+}
+
 // Returns true if 'block1' and 'block2' are empty, merge into the same single
 // successor and the successor can only be reached from them.
 static bool BlocksDoMergeTogether(HBasicBlock* block1, HBasicBlock* block2) {
@@ -78,55 +98,69 @@
   }
 }
 
+void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) {
+  DCHECK(block->EndsWithIf());
+
+  // Find elements of the pattern.
+  HIf* if_instruction = block->GetLastInstruction()->AsIf();
+  HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
+  HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
+  if (!BlocksDoMergeTogether(true_block, false_block)) {
+    return;
+  }
+  HBasicBlock* merge_block = true_block->GetSuccessors().Get(0);
+  if (!merge_block->HasSinglePhi()) {
+    return;
+  }
+  HPhi* phi = merge_block->GetFirstPhi()->AsPhi();
+  HInstruction* true_value = phi->InputAt(merge_block->GetPredecessorIndexOf(true_block));
+  HInstruction* false_value = phi->InputAt(merge_block->GetPredecessorIndexOf(false_block));
+
+  // Check if the selection negates/preserves the value of the condition and
+  // if so, generate a suitable replacement instruction.
+  HInstruction* if_condition = if_instruction->InputAt(0);
+  HInstruction* replacement;
+  if (NegatesCondition(true_value, false_value)) {
+    replacement = GetOppositeCondition(if_condition);
+    if (replacement->GetBlock() == nullptr) {
+      block->InsertInstructionBefore(replacement, if_instruction);
+    }
+  } else if (PreservesCondition(true_value, false_value)) {
+    replacement = if_condition;
+  } else {
+    return;
+  }
+
+  // Replace the selection outcome with the new instruction.
+  phi->ReplaceWith(replacement);
+  merge_block->RemovePhi(phi);
+
+  // Delete the true branch and merge the resulting chain of blocks
+  // 'block->false_block->merge_block' into one.
+  true_block->DisconnectAndDelete();
+  block->MergeWith(false_block);
+  block->MergeWith(merge_block);
+
+  // Remove the original condition if it is now unused.
+  if (!if_condition->HasUses()) {
+    if_condition->GetBlock()->RemoveInstructionOrPhi(if_condition);
+  }
+}
+
 void HBooleanSimplifier::Run() {
   // Iterate in post order in the unlikely case that removing one occurrence of
-  // the pattern empties a branch block of another occurrence. Otherwise the
-  // order does not matter.
+  // the selection pattern empties a branch block of another occurrence.
+  // Otherwise the order does not matter.
   for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     if (!block->EndsWithIf()) continue;
 
-    // Find elements of the pattern.
-    HIf* if_instruction = block->GetLastInstruction()->AsIf();
-    HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
-    HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
-    if (!BlocksDoMergeTogether(true_block, false_block)) {
-      continue;
-    }
-    HBasicBlock* merge_block = true_block->GetSuccessors().Get(0);
-    if (!merge_block->HasSinglePhi()) {
-      continue;
-    }
-    HPhi* phi = merge_block->GetFirstPhi()->AsPhi();
-    HInstruction* true_value = phi->InputAt(merge_block->GetPredecessorIndexOf(true_block));
-    HInstruction* false_value = phi->InputAt(merge_block->GetPredecessorIndexOf(false_block));
+    // If condition is negated, remove the negation and swap the branches.
+    TryRemovingNegatedCondition(block);
 
-    // Check if the selection negates/preserves the value of the condition and
-    // if so, generate a suitable replacement instruction.
-    HInstruction* if_condition = if_instruction->InputAt(0);
-    HInstruction* replacement;
-    if (NegatesCondition(true_value, false_value)) {
-      replacement = GetOppositeCondition(if_condition);
-      if (replacement->GetBlock() == nullptr) {
-        block->InsertInstructionBefore(replacement, if_instruction);
-      }
-    } else if (PreservesCondition(true_value, false_value)) {
-      replacement = if_condition;
-    } else {
-      continue;
-    }
-
-    // Replace the selection outcome with the new instruction.
-    phi->ReplaceWith(replacement);
-    merge_block->RemovePhi(phi);
-
-    // Link the start/end blocks and remove empty branches.
-    graph_->MergeEmptyBranches(block, merge_block);
-
-    // Remove the original condition if it is now unused.
-    if (!if_condition->HasUses()) {
-      if_condition->GetBlock()->RemoveInstruction(if_condition);
-    }
+    // If this is a boolean-selection diamond pattern, replace its result with
+    // the condition value (or its negation) and simplify the graph.
+    TryRemovingBooleanSelection(block);
   }
 }
 
diff --git a/compiler/optimizing/boolean_simplifier.h b/compiler/optimizing/boolean_simplifier.h
index a88733e..733ebaa 100644
--- a/compiler/optimizing/boolean_simplifier.h
+++ b/compiler/optimizing/boolean_simplifier.h
@@ -14,11 +14,15 @@
  * limitations under the License.
  */
 
-// This optimization recognizes a common pattern where a boolean value is
-// either cast to an integer or negated by selecting from zero/one integer
-// constants with an If statement. Because boolean values are internally
-// represented as zero/one, we can safely replace the pattern with a suitable
-// condition instruction.
+// This optimization recognizes two common patterns:
+//  (a) Boolean selection: Casting a boolean to an integer or negating it is
+//      carried out with an If statement selecting from zero/one integer
+//      constants. Because Boolean values are represented as zero/one, the
+//      pattern can be replaced with the condition instruction itself or its
+//      negation, depending on the layout.
+//  (b) Negated condition: Instruction simplifier may replace an If's condition
+//      with a boolean value. If this value is the result of a Boolean negation,
+//      the true/false branches can be swapped and negation removed.
 
 // Example: Negating a boolean value
 //     B1:
@@ -66,6 +70,9 @@
   static constexpr const char* kBooleanSimplifierPassName = "boolean_simplifier";
 
  private:
+  void TryRemovingNegatedCondition(HBasicBlock* block);
+  void TryRemovingBooleanSelection(HBasicBlock* block);
+
   DISALLOW_COPY_AND_ASSIGN(HBooleanSimplifier);
 };
 
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 6511120..b2b5496 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -246,6 +246,148 @@
   int32_t constant_;
 };
 
+// Collect array access data for a loop.
+// TODO: make it work for multiple arrays inside the loop.
+class ArrayAccessInsideLoopFinder : public ValueObject {
+ public:
+  explicit ArrayAccessInsideLoopFinder(HInstruction* induction_variable)
+      : induction_variable_(induction_variable),
+        found_array_length_(nullptr),
+        offset_low_(INT_MAX),
+        offset_high_(INT_MIN) {
+    Run();
+  }
+
+  HArrayLength* GetFoundArrayLength() const { return found_array_length_; }
+  bool HasFoundArrayLength() const { return found_array_length_ != nullptr; }
+  int32_t GetOffsetLow() const { return offset_low_; }
+  int32_t GetOffsetHigh() const { return offset_high_; }
+
+  // Returns if `block` that is in loop_info may exit the loop, unless it's
+  // the loop header for loop_info.
+  static bool EarlyExit(HBasicBlock* block, HLoopInformation* loop_info) {
+    DCHECK(loop_info->Contains(*block));
+    if (block == loop_info->GetHeader()) {
+      // Loop header of loop_info. Exiting loop is normal.
+      return false;
+    }
+    const GrowableArray<HBasicBlock*> successors = block->GetSuccessors();
+    for (size_t i = 0; i < successors.Size(); i++) {
+      if (!loop_info->Contains(*successors.Get(i))) {
+        // One of the successors exits the loop.
+        return true;
+      }
+    }
+    return false;
+  }
+
+  static bool DominatesAllBackEdges(HBasicBlock* block, HLoopInformation* loop_info) {
+    for (size_t i = 0, e = loop_info->GetBackEdges().Size(); i < e; ++i) {
+      HBasicBlock* back_edge = loop_info->GetBackEdges().Get(i);
+      if (!block->Dominates(back_edge)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  void Run() {
+    HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation();
+    for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) {
+      HBasicBlock* block = it_loop.Current();
+      DCHECK(block->IsInLoop());
+      if (!DominatesAllBackEdges(block, loop_info)) {
+        // In order not to trigger deoptimization unnecessarily, make sure
+        // that all array accesses collected are really executed in the loop.
+        // For array accesses in a branch inside the loop, don't collect the
+        // access. The bounds check in that branch might not be eliminated.
+        continue;
+      }
+      if (EarlyExit(block, loop_info)) {
+        // If the loop body can exit loop (like break, return, etc.), it's not guaranteed
+        // that the loop will loop through the full monotonic value range from
+        // initial_ to end_. So adding deoptimization might be too aggressive and can
+        // trigger deoptimization unnecessarily even if the loop won't actually throw
+        // AIOOBE. Otherwise, the loop induction variable is going to cover the full
+        // monotonic value range from initial_ to end_, and deoptimizations are added
+        // iff the loop will throw AIOOBE.
+        found_array_length_ = nullptr;
+        return;
+      }
+      for (HInstruction* instruction = block->GetFirstInstruction();
+           instruction != nullptr;
+           instruction = instruction->GetNext()) {
+        if (!instruction->IsArrayGet() && !instruction->IsArraySet()) {
+          continue;
+        }
+        HInstruction* index = instruction->InputAt(1);
+        if (!index->IsBoundsCheck()) {
+          continue;
+        }
+
+        HArrayLength* array_length = index->InputAt(1)->AsArrayLength();
+        if (array_length == nullptr) {
+          DCHECK(index->InputAt(1)->IsIntConstant());
+          // TODO: may optimize for constant case.
+          continue;
+        }
+
+        HInstruction* array = array_length->InputAt(0);
+        if (array->IsNullCheck()) {
+          array = array->AsNullCheck()->InputAt(0);
+        }
+        if (loop_info->Contains(*array->GetBlock())) {
+          // Array is defined inside the loop. Skip.
+          continue;
+        }
+
+        if (found_array_length_ != nullptr && found_array_length_ != array_length) {
+          // There is already access for another array recorded for the loop.
+          // TODO: handle multiple arrays.
+          continue;
+        }
+
+        index = index->AsBoundsCheck()->InputAt(0);
+        HInstruction* left = index;
+        int32_t right = 0;
+        if (left == induction_variable_ ||
+            (ValueBound::IsAddOrSubAConstant(index, &left, &right) &&
+             left == induction_variable_)) {
+          // For patterns like array[i] or array[i + 2].
+          if (right < offset_low_) {
+            offset_low_ = right;
+          }
+          if (right > offset_high_) {
+            offset_high_ = right;
+          }
+        } else {
+          // Access not in induction_variable/(induction_variable_ + constant)
+          // format. Skip.
+          continue;
+        }
+        // Record this array.
+        found_array_length_ = array_length;
+      }
+    }
+  }
+
+ private:
+  // The instruction that corresponds to a MonotonicValueRange.
+  HInstruction* induction_variable_;
+
+  // The array length of the array that's accessed inside the loop.
+  HArrayLength* found_array_length_;
+
+  // The lowest and highest constant offsets relative to induction variable
+  // instruction_ in all array accesses.
+  // If array access are: array[i-1], array[i], array[i+1],
+  // offset_low_ is -1 and offset_high is 1.
+  int32_t offset_low_;
+  int32_t offset_high_;
+
+  DISALLOW_COPY_AND_ASSIGN(ArrayAccessInsideLoopFinder);
+};
+
 /**
  * Represent a range of lower bound and upper bound, both being inclusive.
  * Currently a ValueRange may be generated as a result of the following:
@@ -332,21 +474,31 @@
 class MonotonicValueRange : public ValueRange {
  public:
   MonotonicValueRange(ArenaAllocator* allocator,
+                      HPhi* induction_variable,
                       HInstruction* initial,
                       int32_t increment,
                       ValueBound bound)
       // To be conservative, give it full range [INT_MIN, INT_MAX] in case it's
       // used as a regular value range, due to possible overflow/underflow.
       : ValueRange(allocator, ValueBound::Min(), ValueBound::Max()),
+        induction_variable_(induction_variable),
         initial_(initial),
+        end_(nullptr),
+        inclusive_(false),
         increment_(increment),
         bound_(bound) {}
 
   virtual ~MonotonicValueRange() {}
 
+  HInstruction* GetInductionVariable() const { return induction_variable_; }
   int32_t GetIncrement() const { return increment_; }
-
   ValueBound GetBound() const { return bound_; }
+  void SetEnd(HInstruction* end) { end_ = end; }
+  void SetInclusive(bool inclusive) { inclusive_ = inclusive; }
+  HBasicBlock* GetLoopHead() const {
+    DCHECK(induction_variable_->GetBlock()->IsLoopHeader());
+    return induction_variable_->GetBlock();
+  }
 
   MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; }
 
@@ -371,6 +523,10 @@
     if (increment_ > 0) {
       // Monotonically increasing.
       ValueBound lower = ValueBound::NarrowLowerBound(bound_, range->GetLower());
+      if (!lower.IsConstant() || lower.GetConstant() == INT_MIN) {
+        // Lower bound isn't useful. Leave it to deoptimization.
+        return this;
+      }
 
       // We currently conservatively assume max array length is INT_MAX. If we can
       // make assumptions about the max array length, e.g. due to the max heap size,
@@ -417,6 +573,11 @@
       DCHECK_NE(increment_, 0);
       // Monotonically decreasing.
       ValueBound upper = ValueBound::NarrowUpperBound(bound_, range->GetUpper());
+      if ((!upper.IsConstant() || upper.GetConstant() == INT_MAX) &&
+          !upper.IsRelatedToArrayLength()) {
+        // Upper bound isn't useful. Leave it to deoptimization.
+        return this;
+      }
 
       // Need to take care of underflow. Try to prove underflow won't happen
       // for common cases.
@@ -432,10 +593,217 @@
     }
   }
 
+  // Returns true if adding a (constant >= value) check for deoptimization
+  // is allowed and will benefit compiled code.
+  bool CanAddDeoptimizationConstant(HInstruction* value,
+                                    int32_t constant,
+                                    bool* is_proven) {
+    *is_proven = false;
+    // See if we can prove the relationship first.
+    if (value->IsIntConstant()) {
+      if (value->AsIntConstant()->GetValue() >= constant) {
+        // Already true.
+        *is_proven = true;
+        return true;
+      } else {
+        // May throw exception. Don't add deoptimization.
+        // Keep bounds checks in the loops.
+        return false;
+      }
+    }
+    // Can benefit from deoptimization.
+    return true;
+  }
+
+  // Adds a check that (value >= constant), and HDeoptimize otherwise.
+  void AddDeoptimizationConstant(HInstruction* value,
+                                 int32_t constant) {
+    HBasicBlock* block = induction_variable_->GetBlock();
+    DCHECK(block->IsLoopHeader());
+    HGraph* graph = block->GetGraph();
+    HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
+    HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck();
+    HIntConstant* const_instr = graph->GetIntConstant(constant);
+    HCondition* cond = new (graph->GetArena()) HLessThan(value, const_instr);
+    HDeoptimize* deoptimize = new (graph->GetArena())
+        HDeoptimize(cond, suspend_check->GetDexPc());
+    pre_header->InsertInstructionBefore(cond, pre_header->GetLastInstruction());
+    pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction());
+    deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
+        suspend_check->GetEnvironment(), block);
+  }
+
+  // Returns true if adding a (value <= array_length + offset) check for deoptimization
+  // is allowed and will benefit compiled code.
+  bool CanAddDeoptimizationArrayLength(HInstruction* value,
+                                       HArrayLength* array_length,
+                                       int32_t offset,
+                                       bool* is_proven) {
+    *is_proven = false;
+    if (offset > 0) {
+      // There might be overflow issue.
+      // TODO: handle this, possibly with some distance relationship between
+      // offset_low and offset_high, or using another deoptimization to make
+      // sure (array_length + offset) doesn't overflow.
+      return false;
+    }
+
+    // See if we can prove the relationship first.
+    if (value == array_length) {
+      if (offset >= 0) {
+        // Already true.
+        *is_proven = true;
+        return true;
+      } else {
+        // May throw exception. Don't add deoptimization.
+        // Keep bounds checks in the loops.
+        return false;
+      }
+    }
+    // Can benefit from deoptimization.
+    return true;
+  }
+
+  // Adds a check that (value <= array_length + offset), and HDeoptimize otherwise.
+  void AddDeoptimizationArrayLength(HInstruction* value,
+                                    HArrayLength* array_length,
+                                    int32_t offset) {
+    HBasicBlock* block = induction_variable_->GetBlock();
+    DCHECK(block->IsLoopHeader());
+    HGraph* graph = block->GetGraph();
+    HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
+    HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck();
+
+    // We may need to hoist null-check and array_length out of loop first.
+    if (!array_length->GetBlock()->Dominates(pre_header)) {
+      HInstruction* array = array_length->InputAt(0);
+      HNullCheck* null_check = array->AsNullCheck();
+      if (null_check != nullptr) {
+        array = null_check->InputAt(0);
+      }
+      // We've already made sure array is defined before the loop when collecting
+      // array accesses for the loop.
+      DCHECK(array->GetBlock()->Dominates(pre_header));
+      if (null_check != nullptr && !null_check->GetBlock()->Dominates(pre_header)) {
+        // Hoist null check out of loop with a deoptimization.
+        HNullConstant* null_constant = graph->GetNullConstant();
+        HCondition* null_check_cond = new (graph->GetArena()) HEqual(array, null_constant);
+        // TODO: for one dex_pc, share the same deoptimization slow path.
+        HDeoptimize* null_check_deoptimize = new (graph->GetArena())
+            HDeoptimize(null_check_cond, suspend_check->GetDexPc());
+        pre_header->InsertInstructionBefore(null_check_cond, pre_header->GetLastInstruction());
+        pre_header->InsertInstructionBefore(
+            null_check_deoptimize, pre_header->GetLastInstruction());
+        // Eliminate null check in the loop.
+        null_check->ReplaceWith(array);
+        null_check->GetBlock()->RemoveInstruction(null_check);
+        null_check_deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
+            suspend_check->GetEnvironment(), block);
+      }
+      // Hoist array_length out of loop.
+      array_length->MoveBefore(pre_header->GetLastInstruction());
+    }
+
+    HIntConstant* offset_instr = graph->GetIntConstant(offset);
+    HAdd* add = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr);
+    HCondition* cond = new (graph->GetArena()) HGreaterThan(value, add);
+    HDeoptimize* deoptimize = new (graph->GetArena())
+        HDeoptimize(cond, suspend_check->GetDexPc());
+    pre_header->InsertInstructionBefore(add, pre_header->GetLastInstruction());
+    pre_header->InsertInstructionBefore(cond, pre_header->GetLastInstruction());
+    pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction());
+    deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
+        suspend_check->GetEnvironment(), block);
+  }
+
+  // Add deoptimizations in loop pre-header with the collected array access
+  // data so that value ranges can be established in loop body.
+  // Returns true if deoptimizations are successfully added, or if it's proven
+  // it's not necessary.
+  bool AddDeoptimization(const ArrayAccessInsideLoopFinder& finder) {
+    int32_t offset_low = finder.GetOffsetLow();
+    int32_t offset_high = finder.GetOffsetHigh();
+    HArrayLength* array_length = finder.GetFoundArrayLength();
+
+    HBasicBlock* pre_header =
+        induction_variable_->GetBlock()->GetLoopInformation()->GetPreHeader();
+    if (!initial_->GetBlock()->Dominates(pre_header) ||
+        !end_->GetBlock()->Dominates(pre_header)) {
+      // Can't move initial_ or end_ into pre_header for comparisons.
+      return false;
+    }
+
+    bool is_constant_proven, is_length_proven;
+    if (increment_ == 1) {
+      // Increasing from initial_ to end_.
+      int32_t offset = inclusive_ ? -offset_high - 1 : -offset_high;
+      if (CanAddDeoptimizationConstant(initial_, -offset_low, &is_constant_proven) &&
+          CanAddDeoptimizationArrayLength(end_, array_length, offset, &is_length_proven)) {
+        if (!is_constant_proven) {
+          AddDeoptimizationConstant(initial_, -offset_low);
+        }
+        if (!is_length_proven) {
+          AddDeoptimizationArrayLength(end_, array_length, offset);
+        }
+        return true;
+      }
+    } else if (increment_ == -1) {
+      // Decreasing from initial_ to end_.
+      int32_t constant = inclusive_ ? -offset_low : -offset_low - 1;
+      if (CanAddDeoptimizationConstant(end_, constant, &is_constant_proven) &&
+          CanAddDeoptimizationArrayLength(
+              initial_, array_length, -offset_high - 1, &is_length_proven)) {
+        if (!is_constant_proven) {
+          AddDeoptimizationConstant(end_, constant);
+        }
+        if (!is_length_proven) {
+          AddDeoptimizationArrayLength(initial_, array_length, -offset_high - 1);
+        }
+        return true;
+      }
+    }
+    return false;
+  }
+
+  // Try to add HDeoptimize's in the loop pre-header first to narrow this range.
+  ValueRange* NarrowWithDeoptimization() {
+    if (increment_ != 1 && increment_ != -1) {
+      // TODO: possibly handle overflow/underflow issues with deoptimization.
+      return this;
+    }
+
+    if (end_ == nullptr) {
+      // No full info to add deoptimization.
+      return this;
+    }
+
+    ArrayAccessInsideLoopFinder finder(induction_variable_);
+
+    if (!finder.HasFoundArrayLength()) {
+      // No array access was found inside the loop that can benefit
+      // from deoptimization.
+      return this;
+    }
+
+    if (!AddDeoptimization(finder)) {
+      return this;
+    }
+
+    // After added deoptimizations, induction variable fits in
+    // [-offset_low, array.length-1-offset_high], adjusted with collected offsets.
+    ValueBound lower = ValueBound(0, -finder.GetOffsetLow());
+    ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh());
+    // We've narrowed the range after added deoptimizations.
+    return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper);
+  }
+
  private:
-  HInstruction* const initial_;
-  const int32_t increment_;
-  ValueBound bound_;  // Additional value bound info for initial_;
+  HPhi* const induction_variable_;  // Induction variable for this monotonic value range.
+  HInstruction* const initial_;     // Initial value.
+  HInstruction* end_;               // End value.
+  bool inclusive_;                  // Whether end value is inclusive.
+  const int32_t increment_;         // Increment for each loop iteration.
+  const ValueBound bound_;          // Additional value bound info for initial_.
 
   DISALLOW_COPY_AND_ASSIGN(MonotonicValueRange);
 };
@@ -598,6 +966,20 @@
     // There should be no critical edge at this point.
     DCHECK_EQ(false_successor->GetPredecessors().Size(), 1u);
 
+    ValueRange* left_range = LookupValueRange(left, block);
+    MonotonicValueRange* left_monotonic_range = nullptr;
+    if (left_range != nullptr) {
+      left_monotonic_range = left_range->AsMonotonicValueRange();
+      if (left_monotonic_range != nullptr) {
+        HBasicBlock* loop_head = left_monotonic_range->GetLoopHead();
+        if (instruction->GetBlock() != loop_head) {
+          // For monotonic value range, don't handle `instruction`
+          // if it's not defined in the loop header.
+          return;
+        }
+      }
+    }
+
     bool found;
     ValueBound bound = ValueBound::DetectValueBoundFromValue(right, &found);
     // Each comparison can establish a lower bound and an upper bound
@@ -610,7 +992,6 @@
       ValueRange* right_range = LookupValueRange(right, block);
       if (right_range != nullptr) {
         if (right_range->IsMonotonicValueRange()) {
-          ValueRange* left_range = LookupValueRange(left, block);
           if (left_range != nullptr && left_range->IsMonotonicValueRange()) {
             HandleIfBetweenTwoMonotonicValueRanges(instruction, left, right, cond,
                                                    left_range->AsMonotonicValueRange(),
@@ -628,6 +1009,17 @@
 
     bool overflow, underflow;
     if (cond == kCondLT || cond == kCondLE) {
+      if (left_monotonic_range != nullptr) {
+        // Update the info for monotonic value range.
+        if (left_monotonic_range->GetInductionVariable() == left &&
+            left_monotonic_range->GetIncrement() < 0 &&
+            block == left_monotonic_range->GetLoopHead() &&
+            instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) {
+          left_monotonic_range->SetEnd(right);
+          left_monotonic_range->SetInclusive(cond == kCondLT);
+        }
+      }
+
       if (!upper.Equals(ValueBound::Max())) {
         int32_t compensation = (cond == kCondLT) ? -1 : 0;  // upper bound is inclusive
         ValueBound new_upper = upper.Add(compensation, &overflow, &underflow);
@@ -651,6 +1043,17 @@
         ApplyRangeFromComparison(left, block, false_successor, new_range);
       }
     } else if (cond == kCondGT || cond == kCondGE) {
+      if (left_monotonic_range != nullptr) {
+        // Update the info for monotonic value range.
+        if (left_monotonic_range->GetInductionVariable() == left &&
+            left_monotonic_range->GetIncrement() > 0 &&
+            block == left_monotonic_range->GetLoopHead() &&
+            instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) {
+          left_monotonic_range->SetEnd(right);
+          left_monotonic_range->SetInclusive(cond == kCondGT);
+        }
+      }
+
       // array.length as a lower bound isn't considered useful.
       if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) {
         int32_t compensation = (cond == kCondGT) ? 1 : 0;  // lower bound is inclusive
@@ -755,9 +1158,26 @@
     bounds_check->GetBlock()->RemoveInstruction(bounds_check);
   }
 
+  static bool HasSameInputAtBackEdges(HPhi* phi) {
+    DCHECK(phi->IsLoopHeaderPhi());
+    // Start with input 1. Input 0 is from the incoming block.
+    HInstruction* input1 = phi->InputAt(1);
+    DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge(
+        *phi->GetBlock()->GetPredecessors().Get(1)));
+    for (size_t i = 2, e = phi->InputCount(); i < e; ++i) {
+      DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge(
+          *phi->GetBlock()->GetPredecessors().Get(i)));
+      if (input1 != phi->InputAt(i)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   void VisitPhi(HPhi* phi) {
-    if (phi->IsLoopHeaderPhi() && phi->GetType() == Primitive::kPrimInt) {
-      DCHECK_EQ(phi->InputCount(), 2U);
+    if (phi->IsLoopHeaderPhi()
+        && (phi->GetType() == Primitive::kPrimInt)
+        && HasSameInputAtBackEdges(phi)) {
       HInstruction* instruction = phi->InputAt(1);
       HInstruction *left;
       int32_t increment;
@@ -790,6 +1210,7 @@
             }
             range = new (GetGraph()->GetArena()) MonotonicValueRange(
                 GetGraph()->GetArena(),
+                phi,
                 initial_value,
                 increment,
                 bound);
@@ -809,6 +1230,36 @@
         HInstruction* left = cond->GetLeft();
         HInstruction* right = cond->GetRight();
         HandleIf(instruction, left, right, cmp);
+
+        HBasicBlock* block = instruction->GetBlock();
+        ValueRange* left_range = LookupValueRange(left, block);
+        if (left_range == nullptr) {
+          return;
+        }
+
+        if (left_range->IsMonotonicValueRange() &&
+            block == left_range->AsMonotonicValueRange()->GetLoopHead()) {
+          // The comparison is for an induction variable in the loop header.
+          DCHECK(left == left_range->AsMonotonicValueRange()->GetInductionVariable());
+          HBasicBlock* loop_body_successor;
+          if (LIKELY(block->GetLoopInformation()->
+              Contains(*instruction->IfFalseSuccessor()))) {
+            loop_body_successor = instruction->IfFalseSuccessor();
+          } else {
+            loop_body_successor = instruction->IfTrueSuccessor();
+          }
+          ValueRange* new_left_range = LookupValueRange(left, loop_body_successor);
+          if (new_left_range == left_range) {
+            // We are not successful in narrowing the monotonic value range to
+            // a regular value range. Try using deoptimization.
+            new_left_range = left_range->AsMonotonicValueRange()->
+                NarrowWithDeoptimization();
+            if (new_left_range != left_range) {
+              GetValueRangeMap(instruction->IfFalseSuccessor())->
+                  Overwrite(left->GetId(), new_left_range);
+            }
+          }
+        }
       }
     }
   }
@@ -1064,7 +1515,7 @@
 };
 
 void BoundsCheckElimination::Run() {
-  if (!graph_->HasArrayAccesses()) {
+  if (!graph_->HasBoundsChecks()) {
     return;
   }
 
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index 75cf1cf..163458f 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -42,8 +42,8 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
-  graph->SetHasArrayAccesses(true);
+  HGraph* graph = CreateGraph(&allocator);
+  graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
@@ -147,8 +147,8 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
-  graph->SetHasArrayAccesses(true);
+  HGraph* graph = CreateGraph(&allocator);
+  graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
@@ -219,8 +219,8 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
-  graph->SetHasArrayAccesses(true);
+  HGraph* graph = CreateGraph(&allocator);
+  graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
@@ -291,8 +291,8 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
-  graph->SetHasArrayAccesses(true);
+  HGraph* graph = CreateGraph(&allocator);
+  graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
@@ -364,8 +364,8 @@
                               int initial,
                               int increment,
                               IfCondition cond = kCondGE) {
-  HGraph* graph = new (allocator) HGraph(allocator);
-  graph->SetHasArrayAccesses(true);
+  HGraph* graph = CreateGraph(allocator);
+  graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
@@ -501,8 +501,8 @@
                               int initial,
                               int increment = -1,
                               IfCondition cond = kCondLE) {
-  HGraph* graph = new (allocator) HGraph(allocator);
-  graph->SetHasArrayAccesses(true);
+  HGraph* graph = CreateGraph(allocator);
+  graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
@@ -632,8 +632,8 @@
                               int initial,
                               int increment,
                               IfCondition cond) {
-  HGraph* graph = new (allocator) HGraph(allocator);
-  graph->SetHasArrayAccesses(true);
+  HGraph* graph = CreateGraph(allocator);
+  graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
@@ -743,8 +743,8 @@
                               HInstruction** bounds_check,
                               int initial,
                               IfCondition cond = kCondGE) {
-  HGraph* graph = new (allocator) HGraph(allocator);
-  graph->SetHasArrayAccesses(true);
+  HGraph* graph = CreateGraph(allocator);
+  graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
@@ -868,8 +868,8 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
-  graph->SetHasArrayAccesses(true);
+  HGraph* graph = CreateGraph(&allocator);
+  graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 818d671..a5c6f23 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -19,8 +19,10 @@
 #include "art_field-inl.h"
 #include "base/logging.h"
 #include "class_linker.h"
+#include "dex/verified_method.h"
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
+#include "dex/verified_method.h"
 #include "driver/compiler_driver-inl.h"
 #include "driver/compiler_options.h"
 #include "mirror/class_loader.h"
@@ -280,7 +282,10 @@
 
   // To avoid splitting blocks, we compute ahead of time the instructions that
   // start a new block, and create these blocks.
-  ComputeBranchTargets(code_ptr, code_end, &number_of_branches);
+  if (!ComputeBranchTargets(code_ptr, code_end, &number_of_branches)) {
+    MaybeRecordStat(MethodCompilationStat::kNotCompiledBranchOutsideMethodCode);
+    return false;
+  }
 
   // Note that the compiler driver is null when unit testing.
   if ((compiler_driver_ != nullptr) && SkipCompilation(code_item, number_of_branches)) {
@@ -347,7 +352,7 @@
   current_block_ = block;
 }
 
-void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
+bool HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
                                          const uint16_t* code_end,
                                          size_t* number_of_branches) {
   branch_targets_.SetSize(code_end - code_ptr);
@@ -372,7 +377,14 @@
       }
       dex_pc += instruction.SizeInCodeUnits();
       code_ptr += instruction.SizeInCodeUnits();
-      if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) {
+
+      if (code_ptr >= code_end) {
+        if (instruction.CanFlowThrough()) {
+          // In the normal case we should never hit this but someone can artificially forge a dex
+          // file to fall-through out the method code. In this case we bail out compilation.
+          return false;
+        }
+      } else if (FindBlockStartingAt(dex_pc) == nullptr) {
         block = new (arena_) HBasicBlock(graph_, dex_pc);
         branch_targets_.Put(dex_pc, block);
       }
@@ -404,7 +416,12 @@
       // Fall-through. Add a block if there is more code afterwards.
       dex_pc += instruction.SizeInCodeUnits();
       code_ptr += instruction.SizeInCodeUnits();
-      if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) {
+      if (code_ptr >= code_end) {
+        // In the normal case we should never hit this but someone can artificially forge a dex
+        // file to fall-through out the method code. In this case we bail out compilation.
+        // (A switch can fall-through so we don't need to check CanFlowThrough().)
+        return false;
+      } else if (FindBlockStartingAt(dex_pc) == nullptr) {
         block = new (arena_) HBasicBlock(graph_, dex_pc);
         branch_targets_.Put(dex_pc, block);
       }
@@ -413,6 +430,7 @@
       dex_pc += instruction.SizeInCodeUnits();
     }
   }
+  return true;
 }
 
 HBasicBlock* HGraphBuilder::FindBlockStartingAt(int32_t index) const {
@@ -587,7 +605,7 @@
   const char* descriptor = dex_file_->StringDataByIdx(proto_id.shorty_idx_);
   Primitive::Type return_type = Primitive::GetType(descriptor[0]);
   bool is_instance_call = invoke_type != kStatic;
-  const size_t number_of_arguments = strlen(descriptor) - (is_instance_call ? 0 : 1);
+  size_t number_of_arguments = strlen(descriptor) - (is_instance_call ? 0 : 1);
 
   MethodReference target_method(dex_file_, method_idx);
   uintptr_t direct_code;
@@ -605,7 +623,25 @@
   }
   DCHECK(optimized_invoke_type != kSuper);
 
+  // By default, consider that the called method implicitly requires
+  // an initialization check of its declaring method.
+  HInvokeStaticOrDirect::ClinitCheckRequirement clinit_check_requirement =
+      HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit;
+  // Potential class initialization check, in the case of a static method call.
+  HClinitCheck* clinit_check = nullptr;
+  // Replace calls to String.<init> with StringFactory.
+  int32_t string_init_offset = 0;
+  bool is_string_init = compiler_driver_->IsStringInit(method_idx, dex_file_, &string_init_offset);
+  if (is_string_init) {
+    return_type = Primitive::kPrimNot;
+    is_instance_call = false;
+    number_of_arguments--;
+    invoke_type = kStatic;
+    optimized_invoke_type = kStatic;
+  }
+
   HInvoke* invoke = nullptr;
+
   if (optimized_invoke_type == kVirtual) {
     invoke = new (arena_) HInvokeVirtual(
         arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index);
@@ -620,9 +656,76 @@
     bool is_recursive =
         (target_method.dex_method_index == dex_compilation_unit_->GetDexMethodIndex());
     DCHECK(!is_recursive || (target_method.dex_file == dex_compilation_unit_->GetDexFile()));
+
+    if (optimized_invoke_type == kStatic) {
+      ScopedObjectAccess soa(Thread::Current());
+      StackHandleScope<4> hs(soa.Self());
+      Handle<mirror::DexCache> dex_cache(hs.NewHandle(
+          dex_compilation_unit_->GetClassLinker()->FindDexCache(
+              *dex_compilation_unit_->GetDexFile())));
+      Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+          soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
+      mirror::ArtMethod* resolved_method = compiler_driver_->ResolveMethod(
+          soa, dex_cache, class_loader, dex_compilation_unit_, method_idx,
+          optimized_invoke_type);
+
+      if (resolved_method == nullptr) {
+        MaybeRecordStat(MethodCompilationStat::kNotCompiledUnresolvedMethod);
+        return false;
+      }
+
+      const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+      Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
+          outer_compilation_unit_->GetClassLinker()->FindDexCache(outer_dex_file)));
+      Handle<mirror::Class> referrer_class(hs.NewHandle(GetOutermostCompilingClass()));
+
+      // The index at which the method's class is stored in the DexCache's type array.
+      uint32_t storage_index = DexFile::kDexNoIndex;
+      bool is_referrer_class = (resolved_method->GetDeclaringClass() == referrer_class.Get());
+      if (is_referrer_class) {
+        storage_index = referrer_class->GetDexTypeIndex();
+      } else if (outer_dex_cache.Get() == dex_cache.Get()) {
+        // Get `storage_index` from IsClassOfStaticMethodAvailableToReferrer.
+        compiler_driver_->IsClassOfStaticMethodAvailableToReferrer(outer_dex_cache.Get(),
+                                                                   referrer_class.Get(),
+                                                                   resolved_method,
+                                                                   method_idx,
+                                                                   &storage_index);
+      }
+
+      if (referrer_class.Get()->IsSubClass(resolved_method->GetDeclaringClass())) {
+        // If the referrer class is the declaring class or a subclass
+        // of the declaring class, no class initialization is needed
+        // before the static method call.
+        clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
+      } else if (storage_index != DexFile::kDexNoIndex) {
+        // If the method's class type index is available, check
+        // whether we should add an explicit class initialization
+        // check for its declaring class before the static method call.
+
+        // TODO: find out why this check is needed.
+        bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache(
+            *outer_compilation_unit_->GetDexFile(), storage_index);
+        bool is_initialized =
+            resolved_method->GetDeclaringClass()->IsInitialized() && is_in_dex_cache;
+
+        if (is_initialized) {
+          clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
+        } else {
+          clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
+          HLoadClass* load_class =
+              new (arena_) HLoadClass(storage_index, is_referrer_class, dex_pc);
+          current_block_->AddInstruction(load_class);
+          clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
+          current_block_->AddInstruction(clinit_check);
+        }
+      }
+    }
+
     invoke = new (arena_) HInvokeStaticOrDirect(
         arena_, number_of_arguments, return_type, dex_pc, target_method.dex_method_index,
-        is_recursive, invoke_type, optimized_invoke_type);
+        is_recursive, string_init_offset, invoke_type, optimized_invoke_type,
+        clinit_check_requirement);
   }
 
   size_t start_index = 0;
@@ -638,6 +741,9 @@
 
   uint32_t descriptor_index = 1;
   uint32_t argument_index = start_index;
+  if (is_string_init) {
+    start_index = 1;
+  }
   for (size_t i = start_index; i < number_of_vreg_arguments; i++, argument_index++) {
     Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]);
     bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble);
@@ -654,10 +760,38 @@
       i++;
     }
   }
-
   DCHECK_EQ(argument_index, number_of_arguments);
+
+  if (clinit_check_requirement == HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit) {
+    // Add the class initialization check as last input of `invoke`.
+    DCHECK(clinit_check != nullptr);
+    invoke->SetArgumentAt(argument_index, clinit_check);
+  }
+
   current_block_->AddInstruction(invoke);
   latest_result_ = invoke;
+
+  // Add move-result for StringFactory method.
+  if (is_string_init) {
+    uint32_t orig_this_reg = is_range ? register_index : args[0];
+    const VerifiedMethod* verified_method =
+        compiler_driver_->GetVerifiedMethod(dex_file_, dex_compilation_unit_->GetDexMethodIndex());
+    if (verified_method == nullptr) {
+      LOG(WARNING) << "No verified method for method calling String.<init>: "
+                   << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_);
+      return false;
+    }
+    const SafeMap<uint32_t, std::set<uint32_t>>& string_init_map =
+        verified_method->GetStringInitPcRegMap();
+    auto map_it = string_init_map.find(dex_pc);
+    if (map_it != string_init_map.end()) {
+      std::set<uint32_t> reg_set = map_it->second;
+      for (auto set_it = reg_set.begin(); set_it != reg_set.end(); ++set_it) {
+        UpdateLocal(*set_it, invoke);
+      }
+    }
+    UpdateLocal(orig_this_reg, invoke);
+  }
   return true;
 }
 
@@ -732,7 +866,6 @@
   return compiling_class.Get() == cls.Get();
 }
 
-
 bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction,
                                            uint32_t dex_pc,
                                            bool is_put) {
@@ -764,7 +897,7 @@
   if (is_referrer_class) {
     storage_index = referrer_class->GetDexTypeIndex();
   } else if (outer_dex_cache.Get() != dex_cache.Get()) {
-    // The compiler driver cannot currently understand multple dex caches involved. Just bailout.
+    // The compiler driver cannot currently understand multiple dex caches involved. Just bailout.
     return false;
   } else {
     std::pair<bool, bool> pair = compiler_driver_->IsFastStaticField(
@@ -882,7 +1015,7 @@
     current_block_->AddInstruction(new (arena_) HArrayGet(object, index, anticipated_type));
     UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction());
   }
-  graph_->SetHasArrayAccesses(true);
+  graph_->SetHasBoundsChecks(true);
 }
 
 void HGraphBuilder::BuildFilledNewArray(uint32_t dex_pc,
@@ -984,6 +1117,7 @@
     default:
       LOG(FATAL) << "Unknown element width for " << payload->element_width;
   }
+  graph_->SetHasBoundsChecks(true);
 }
 
 void HGraphBuilder::BuildFillWideArrayData(HInstruction* object,
@@ -1834,12 +1968,19 @@
 
     case Instruction::NEW_INSTANCE: {
       uint16_t type_index = instruction.VRegB_21c();
-      QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index)
-          ? kQuickAllocObjectWithAccessCheck
-          : kQuickAllocObject;
+      if (compiler_driver_->IsStringTypeIndex(type_index, dex_file_)) {
+        // Turn new-instance of string into a const 0.
+        int32_t register_index = instruction.VRegA();
+        HNullConstant* constant = graph_->GetNullConstant();
+        UpdateLocal(register_index, constant);
+      } else {
+        QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index)
+            ? kQuickAllocObjectWithAccessCheck
+            : kQuickAllocObject;
 
-      current_block_->AddInstruction(new (arena_) HNewInstance(dex_pc, type_index, entrypoint));
-      UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+        current_block_->AddInstruction(new (arena_) HNewInstance(dex_pc, type_index, entrypoint));
+        UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+      }
       break;
     }
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index dc6d97e..36503ce 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -88,7 +88,10 @@
   // the newly created blocks.
   // As a side effect, also compute the number of dex instructions, blocks, and
   // branches.
-  void ComputeBranchTargets(const uint16_t* start,
+  // Returns true if all the branches fall inside the method code, false otherwise.
+  // (In normal cases this should always return true but someone can artificially
+  // create a code unit in which branches fall-through out of it).
+  bool ComputeBranchTargets(const uint16_t* start,
                             const uint16_t* end,
                             size_t* number_of_branches);
   void MaybeUpdateCurrentBlock(size_t index);
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index b14b69b..4805cee 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -100,11 +100,11 @@
   for (size_t i = 0; i < instruction->EnvironmentSize(); ++i) {
     if (environment->GetInstructionAt(i) != nullptr) {
       Primitive::Type type = environment->GetInstructionAt(i)->GetType();
-      DCHECK(CheckType(type, locations->GetEnvironmentAt(i)))
-        << type << " " << locations->GetEnvironmentAt(i);
+      DCHECK(CheckType(type, environment->GetLocationAt(i)))
+        << type << " " << environment->GetLocationAt(i);
     } else {
-      DCHECK(locations->GetEnvironmentAt(i).IsInvalid())
-        << locations->GetEnvironmentAt(i);
+      DCHECK(environment->GetLocationAt(i).IsInvalid())
+        << environment->GetLocationAt(i);
     }
   }
   return true;
@@ -153,6 +153,7 @@
 }
 
 void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) {
+  is_baseline_ = is_baseline;
   HGraphVisitor* instruction_visitor = GetInstructionVisitor();
   DCHECK_EQ(current_block_index_, 0u);
   GenerateFrameEntry();
@@ -612,7 +613,7 @@
 }
 
 void CodeGenerator::BuildStackMaps(std::vector<uint8_t>* data) {
-  uint32_t size = stack_map_stream_.ComputeNeededSize();
+  uint32_t size = stack_map_stream_.PrepareForFillIn();
   data->resize(size);
   MemoryRegion region(data->data(), size);
   stack_map_stream_.FillIn(region);
@@ -644,22 +645,34 @@
     }
   }
 
+  uint32_t outer_dex_pc = dex_pc;
+  uint32_t outer_environment_size = 0;
+  uint32_t inlining_depth = 0;
+  if (instruction != nullptr) {
+    for (HEnvironment* environment = instruction->GetEnvironment();
+         environment != nullptr;
+         environment = environment->GetParent()) {
+      outer_dex_pc = environment->GetDexPc();
+      outer_environment_size = environment->Size();
+      if (environment != instruction->GetEnvironment()) {
+        inlining_depth++;
+      }
+    }
+  }
+
   // Collect PC infos for the mapping table.
   struct PcInfo pc_info;
-  pc_info.dex_pc = dex_pc;
+  pc_info.dex_pc = outer_dex_pc;
   pc_info.native_pc = GetAssembler()->CodeSize();
   pc_infos_.Add(pc_info);
 
-  uint32_t inlining_depth = 0;
-
   if (instruction == nullptr) {
     // For stack overflow checks.
-    stack_map_stream_.AddStackMapEntry(dex_pc, pc_info.native_pc, 0, 0, 0, inlining_depth);
+    stack_map_stream_.BeginStackMapEntry(pc_info.dex_pc, pc_info.native_pc, 0, 0, 0, 0);
+    stack_map_stream_.EndStackMapEntry();
     return;
   }
   LocationSummary* locations = instruction->GetLocations();
-  HEnvironment* environment = instruction->GetEnvironment();
-  size_t environment_size = instruction->EnvironmentSize();
 
   uint32_t register_mask = locations->GetRegisterMask();
   if (locations->OnlyCallsOnSlowPath()) {
@@ -672,63 +685,80 @@
   }
   // The register mask must be a subset of callee-save registers.
   DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask);
-  stack_map_stream_.AddStackMapEntry(dex_pc,
-                                     pc_info.native_pc,
-                                     register_mask,
-                                     locations->GetStackMask(),
-                                     environment_size,
-                                     inlining_depth);
+  stack_map_stream_.BeginStackMapEntry(pc_info.dex_pc,
+                                       pc_info.native_pc,
+                                       register_mask,
+                                       locations->GetStackMask(),
+                                       outer_environment_size,
+                                       inlining_depth);
+
+  EmitEnvironment(instruction->GetEnvironment(), slow_path);
+  stack_map_stream_.EndStackMapEntry();
+}
+
+void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path) {
+  if (environment == nullptr) return;
+
+  if (environment->GetParent() != nullptr) {
+    // We emit the parent environment first.
+    EmitEnvironment(environment->GetParent(), slow_path);
+    stack_map_stream_.BeginInlineInfoEntry(
+        environment->GetMethodIdx(), environment->GetDexPc(), environment->Size());
+  }
 
   // Walk over the environment, and record the location of dex registers.
-  for (size_t i = 0; i < environment_size; ++i) {
+  for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) {
     HInstruction* current = environment->GetInstructionAt(i);
     if (current == nullptr) {
-      stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kNone, 0);
+      stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0);
       continue;
     }
 
-    Location location = locations->GetEnvironmentAt(i);
+    Location location = environment->GetLocationAt(i);
     switch (location.GetKind()) {
       case Location::kConstant: {
         DCHECK_EQ(current, location.GetConstant());
         if (current->IsLongConstant()) {
           int64_t value = current->AsLongConstant()->GetValue();
           stack_map_stream_.AddDexRegisterEntry(
-              i, DexRegisterLocation::Kind::kConstant, Low32Bits(value));
+              DexRegisterLocation::Kind::kConstant, Low32Bits(value));
           stack_map_stream_.AddDexRegisterEntry(
-              ++i, DexRegisterLocation::Kind::kConstant, High32Bits(value));
+              DexRegisterLocation::Kind::kConstant, High32Bits(value));
+          ++i;
           DCHECK_LT(i, environment_size);
         } else if (current->IsDoubleConstant()) {
           int64_t value = bit_cast<int64_t, double>(current->AsDoubleConstant()->GetValue());
           stack_map_stream_.AddDexRegisterEntry(
-              i, DexRegisterLocation::Kind::kConstant, Low32Bits(value));
+              DexRegisterLocation::Kind::kConstant, Low32Bits(value));
           stack_map_stream_.AddDexRegisterEntry(
-              ++i, DexRegisterLocation::Kind::kConstant, High32Bits(value));
+              DexRegisterLocation::Kind::kConstant, High32Bits(value));
+          ++i;
           DCHECK_LT(i, environment_size);
         } else if (current->IsIntConstant()) {
           int32_t value = current->AsIntConstant()->GetValue();
-          stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, value);
+          stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, value);
         } else if (current->IsNullConstant()) {
-          stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0);
+          stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, 0);
         } else {
           DCHECK(current->IsFloatConstant()) << current->DebugName();
           int32_t value = bit_cast<int32_t, float>(current->AsFloatConstant()->GetValue());
-          stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, value);
+          stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, value);
         }
         break;
       }
 
       case Location::kStackSlot: {
         stack_map_stream_.AddDexRegisterEntry(
-            i, DexRegisterLocation::Kind::kInStack, location.GetStackIndex());
+            DexRegisterLocation::Kind::kInStack, location.GetStackIndex());
         break;
       }
 
       case Location::kDoubleStackSlot: {
         stack_map_stream_.AddDexRegisterEntry(
-            i, DexRegisterLocation::Kind::kInStack, location.GetStackIndex());
+            DexRegisterLocation::Kind::kInStack, location.GetStackIndex());
         stack_map_stream_.AddDexRegisterEntry(
-            ++i, DexRegisterLocation::Kind::kInStack, location.GetHighStackIndex(kVRegSize));
+            DexRegisterLocation::Kind::kInStack, location.GetHighStackIndex(kVRegSize));
+        ++i;
         DCHECK_LT(i, environment_size);
         break;
       }
@@ -737,16 +767,18 @@
         int id = location.reg();
         if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(id)) {
           uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(id);
-          stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInStack, offset);
+          stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset);
           if (current->GetType() == Primitive::kPrimLong) {
             stack_map_stream_.AddDexRegisterEntry(
-                ++i, DexRegisterLocation::Kind::kInStack, offset + kVRegSize);
+                DexRegisterLocation::Kind::kInStack, offset + kVRegSize);
+            ++i;
             DCHECK_LT(i, environment_size);
           }
         } else {
-          stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInRegister, id);
+          stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, id);
           if (current->GetType() == Primitive::kPrimLong) {
-            stack_map_stream_.AddDexRegisterEntry(++i, DexRegisterLocation::Kind::kInRegister, id);
+            stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, id);
+            ++i;
             DCHECK_LT(i, environment_size);
           }
         }
@@ -757,17 +789,18 @@
         int id = location.reg();
         if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(id)) {
           uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(id);
-          stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInStack, offset);
+          stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset);
           if (current->GetType() == Primitive::kPrimDouble) {
             stack_map_stream_.AddDexRegisterEntry(
-                ++i, DexRegisterLocation::Kind::kInStack, offset + kVRegSize);
+                DexRegisterLocation::Kind::kInStack, offset + kVRegSize);
+            ++i;
             DCHECK_LT(i, environment_size);
           }
         } else {
-          stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInFpuRegister, id);
+          stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, id);
           if (current->GetType() == Primitive::kPrimDouble) {
-            stack_map_stream_.AddDexRegisterEntry(
-                ++i, DexRegisterLocation::Kind::kInFpuRegister, id);
+            stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, id);
+            ++i;
             DCHECK_LT(i, environment_size);
           }
         }
@@ -779,16 +812,17 @@
         int high = location.high();
         if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(low)) {
           uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(low);
-          stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInStack, offset);
+          stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset);
         } else {
-          stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInFpuRegister, low);
+          stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, low);
         }
         if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(high)) {
           uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(high);
-          stack_map_stream_.AddDexRegisterEntry(++i, DexRegisterLocation::Kind::kInStack, offset);
+          stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset);
+          ++i;
         } else {
-          stack_map_stream_.AddDexRegisterEntry(
-              ++i, DexRegisterLocation::Kind::kInFpuRegister, high);
+          stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, high);
+          ++i;
         }
         DCHECK_LT(i, environment_size);
         break;
@@ -799,23 +833,23 @@
         int high = location.high();
         if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(low)) {
           uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(low);
-          stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInStack, offset);
+          stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset);
         } else {
-          stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInRegister, low);
+          stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, low);
         }
         if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(high)) {
           uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(high);
-          stack_map_stream_.AddDexRegisterEntry(++i, DexRegisterLocation::Kind::kInStack, offset);
+          stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset);
         } else {
-          stack_map_stream_.AddDexRegisterEntry(
-              ++i, DexRegisterLocation::Kind::kInRegister, high);
+          stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, high);
         }
+        ++i;
         DCHECK_LT(i, environment_size);
         break;
       }
 
       case Location::kInvalid: {
-        stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kNone, 0);
+        stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0);
         break;
       }
 
@@ -823,6 +857,10 @@
         LOG(FATAL) << "Unexpected kind " << location.GetKind();
     }
   }
+
+  if (environment->GetParent() != nullptr) {
+    stack_map_stream_.EndInlineInfoEntry();
+  }
 }
 
 bool CodeGenerator::CanMoveNullCheckToUser(HNullCheck* null_check) {
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 9b3cf8a..740beab 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -34,10 +34,15 @@
 // Binary encoding of 2^31 for type double.
 static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
 
+// Minimum value for a primitive integer.
+static int32_t constexpr kPrimIntMin = 0x80000000;
+// Minimum value for a primitive long.
+static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000);
+
 // Maximum value for a primitive integer.
 static int32_t constexpr kPrimIntMax = 0x7fffffff;
 // Maximum value for a primitive long.
-static int64_t constexpr kPrimLongMax = 0x7fffffffffffffff;
+static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
 
 class Assembler;
 class CodeGenerator;
@@ -107,6 +112,25 @@
   DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
 };
 
+class InvokeDexCallingConventionVisitor {
+ public:
+  virtual Location GetNextLocation(Primitive::Type type) = 0;
+
+ protected:
+  InvokeDexCallingConventionVisitor() {}
+  virtual ~InvokeDexCallingConventionVisitor() {}
+
+  // The current index for core registers.
+  uint32_t gp_index_ = 0u;
+  // The current index for floating-point registers.
+  uint32_t float_index_ = 0u;
+  // The current stack index.
+  uint32_t stack_index_ = 0u;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+};
+
 class CodeGenerator {
  public:
   // Compiles the graph to executable instructions. Returns whether the compilation
@@ -214,6 +238,10 @@
       std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
   void BuildStackMaps(std::vector<uint8_t>* vector);
 
+  bool IsBaseline() const {
+    return is_baseline_;
+  }
+
   bool IsLeafMethod() const {
     return is_leaf_;
   }
@@ -306,6 +334,7 @@
     return GetFpuSpillSize() + GetCoreSpillSize();
   }
 
+  virtual ParallelMoveResolver* GetMoveResolver() = 0;
 
  protected:
   CodeGenerator(HGraph* graph,
@@ -327,6 +356,7 @@
         number_of_register_pairs_(number_of_register_pairs),
         core_callee_save_mask_(core_callee_save_mask),
         fpu_callee_save_mask_(fpu_callee_save_mask),
+        is_baseline_(false),
         graph_(graph),
         compiler_options_(compiler_options),
         pc_infos_(graph->GetArena(), 32),
@@ -348,7 +378,6 @@
 
   virtual Location GetStackLocation(HLoadLocal* load) const = 0;
 
-  virtual ParallelMoveResolver* GetMoveResolver() = 0;
   virtual HGraphVisitor* GetLocationBuilder() = 0;
   virtual HGraphVisitor* GetInstructionVisitor() = 0;
 
@@ -406,11 +435,15 @@
   const uint32_t core_callee_save_mask_;
   const uint32_t fpu_callee_save_mask_;
 
+  // Whether we are using baseline.
+  bool is_baseline_;
+
  private:
   void InitLocationsBaseline(HInstruction* instruction);
   size_t GetStackOffsetOfSavedRegister(size_t index);
   void CompileInternal(CodeAllocator* allocator, bool is_baseline);
   void BlockIfInRegister(Location location, bool is_out = false) const;
+  void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path);
 
   HGraph* const graph_;
   const CompilerOptions& compiler_options_;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index ae1fb53..672e55e 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -17,6 +17,7 @@
 #include "code_generator_arm.h"
 
 #include "arch/arm/instruction_set_features_arm.h"
+#include "code_generator_utils.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "gc/accounting/card_table.h"
 #include "intrinsics.h"
@@ -112,6 +113,10 @@
     return &return_label_;
   }
 
+  HBasicBlock* GetSuccessor() const {
+    return successor_;
+  }
+
  private:
   HSuspendCheck* const instruction_;
   // If not null, the block to branch to after the suspend check.
@@ -176,7 +181,6 @@
 
     InvokeRuntimeCallingConvention calling_convention;
     __ LoadImmediate(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex());
-    arm_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1));
     int32_t entry_point_offset = do_clinit_
         ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
         : QUICK_ENTRY_POINT(pInitializeType);
@@ -222,7 +226,6 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    arm_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1));
     __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex());
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this);
@@ -345,11 +348,11 @@
 }
 
 void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const {
-  stream << ArmManagedRegister::FromCoreRegister(Register(reg));
+  stream << Register(reg);
 }
 
 void CodeGeneratorARM::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
-  stream << ArmManagedRegister::FromSRegister(SRegister(reg));
+  stream << SRegister(reg);
 }
 
 size_t CodeGeneratorARM::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
@@ -607,7 +610,7 @@
   UNREACHABLE();
 }
 
-Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
+Location InvokeDexCallingConventionVisitorARM::GetNextLocation(Primitive::Type type) {
   switch (type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -682,7 +685,7 @@
   return Location();
 }
 
-Location InvokeDexCallingConventionVisitor::GetReturnLocation(Primitive::Type type) {
+Location InvokeDexCallingConventionVisitorARM::GetReturnLocation(Primitive::Type type) {
   switch (type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -1243,6 +1246,10 @@
 }
 
 void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // When we do not run baseline, explicit clinit checks triggered by static
+  // invokes must have been pruned by art::PrepareForRegisterAllocation.
+  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+
   IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(),
                                          codegen_->GetInstructionSetFeatures());
   if (intrinsic.TryDispatch(invoke)) {
@@ -1267,6 +1274,10 @@
 }
 
 void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // When we do not run baseline, explicit clinit checks triggered by static
+  // invokes must have been pruned by art::PrepareForRegisterAllocation.
+  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
   }
@@ -1282,8 +1293,8 @@
       new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
   locations->AddTemp(Location::RegisterLocation(R0));
 
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
-  for (size_t i = 0; i < invoke->InputCount(); i++) {
+  InvokeDexCallingConventionVisitorARM calling_convention_visitor;
+  for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
     HInstruction* input = invoke->InputAt(i);
     locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType()));
   }
@@ -2175,11 +2186,134 @@
   }
 }
 
+void InstructionCodeGeneratorARM::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  DCHECK(instruction->GetResultType() == Primitive::kPrimInt);
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  Register out = locations->Out().AsRegister<Register>();
+  Register dividend = locations->InAt(0).AsRegister<Register>();
+  int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+  DCHECK(imm == 1 || imm == -1);
+
+  if (instruction->IsRem()) {
+    __ LoadImmediate(out, 0);
+  } else {
+    if (imm == 1) {
+      __ Mov(out, dividend);
+    } else {
+      __ rsb(out, dividend, ShifterOperand(0));
+    }
+  }
+}
+
+void InstructionCodeGeneratorARM::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  DCHECK(instruction->GetResultType() == Primitive::kPrimInt);
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  Register out = locations->Out().AsRegister<Register>();
+  Register dividend = locations->InAt(0).AsRegister<Register>();
+  Register temp = locations->GetTemp(0).AsRegister<Register>();
+  int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+  int32_t abs_imm = std::abs(imm);
+  DCHECK(IsPowerOfTwo(abs_imm));
+  int ctz_imm = CTZ(abs_imm);
+
+  if (ctz_imm == 1) {
+    __ Lsr(temp, dividend, 32 - ctz_imm);
+  } else {
+    __ Asr(temp, dividend, 31);
+    __ Lsr(temp, temp, 32 - ctz_imm);
+  }
+  __ add(out, temp, ShifterOperand(dividend));
+
+  if (instruction->IsDiv()) {
+    __ Asr(out, out, ctz_imm);
+    if (imm < 0) {
+      __ rsb(out, out, ShifterOperand(0));
+    }
+  } else {
+    __ ubfx(out, out, 0, ctz_imm);
+    __ sub(out, out, ShifterOperand(temp));
+  }
+}
+
+void InstructionCodeGeneratorARM::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  DCHECK(instruction->GetResultType() == Primitive::kPrimInt);
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  Register out = locations->Out().AsRegister<Register>();
+  Register dividend = locations->InAt(0).AsRegister<Register>();
+  Register temp1 = locations->GetTemp(0).AsRegister<Register>();
+  Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+  int64_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+
+  int64_t magic;
+  int shift;
+  CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
+
+  __ LoadImmediate(temp1, magic);
+  __ smull(temp2, temp1, dividend, temp1);
+
+  if (imm > 0 && magic < 0) {
+    __ add(temp1, temp1, ShifterOperand(dividend));
+  } else if (imm < 0 && magic > 0) {
+    __ sub(temp1, temp1, ShifterOperand(dividend));
+  }
+
+  if (shift != 0) {
+    __ Asr(temp1, temp1, shift);
+  }
+
+  if (instruction->IsDiv()) {
+    __ sub(out, temp1, ShifterOperand(temp1, ASR, 31));
+  } else {
+    __ sub(temp1, temp1, ShifterOperand(temp1, ASR, 31));
+    // TODO: Strength reduction for mls.
+    __ LoadImmediate(temp2, imm);
+    __ mls(out, temp1, temp2, dividend);
+  }
+}
+
+void InstructionCodeGeneratorARM::GenerateDivRemConstantIntegral(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  DCHECK(instruction->GetResultType() == Primitive::kPrimInt);
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+  if (imm == 0) {
+    // Do not generate anything. DivZeroCheck would prevent any code to be executed.
+  } else if (imm == 1 || imm == -1) {
+    DivRemOneOrMinusOne(instruction);
+  } else if (IsPowerOfTwo(std::abs(imm))) {
+    DivRemByPowerOfTwo(instruction);
+  } else {
+    DCHECK(imm <= -2 || imm >= 2);
+    GenerateDivRemWithAnyConstant(instruction);
+  }
+}
+
 void LocationsBuilderARM::VisitDiv(HDiv* div) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   if (div->GetResultType() == Primitive::kPrimLong) {
     // pLdiv runtime call.
     call_kind = LocationSummary::kCall;
+  } else if (div->GetResultType() == Primitive::kPrimInt && div->InputAt(1)->IsConstant()) {
+    // sdiv will be replaced by other instruction sequence.
   } else if (div->GetResultType() == Primitive::kPrimInt &&
              !codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
     // pIdivmod runtime call.
@@ -2190,7 +2324,20 @@
 
   switch (div->GetResultType()) {
     case Primitive::kPrimInt: {
-      if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+      if (div->InputAt(1)->IsConstant()) {
+        locations->SetInAt(0, Location::RequiresRegister());
+        locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+        int32_t abs_imm = std::abs(div->InputAt(1)->AsIntConstant()->GetValue());
+        if (abs_imm <= 1) {
+          // No temp register required.
+        } else {
+          locations->AddTemp(Location::RequiresRegister());
+          if (!IsPowerOfTwo(abs_imm)) {
+            locations->AddTemp(Location::RequiresRegister());
+          }
+        }
+      } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
         locations->SetInAt(0, Location::RequiresRegister());
         locations->SetInAt(1, Location::RequiresRegister());
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
@@ -2234,7 +2381,9 @@
 
   switch (div->GetResultType()) {
     case Primitive::kPrimInt: {
-      if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+      if (second.IsConstant()) {
+        GenerateDivRemConstantIntegral(div);
+      } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
         __ sdiv(out.AsRegister<Register>(),
                 first.AsRegister<Register>(),
                 second.AsRegister<Register>());
@@ -2286,8 +2435,11 @@
 
   // Most remainders are implemented in the runtime.
   LocationSummary::CallKind call_kind = LocationSummary::kCall;
-  if (rem->GetResultType() == Primitive::kPrimInt &&
-      codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+  if (rem->GetResultType() == Primitive::kPrimInt && rem->InputAt(1)->IsConstant()) {
+    // sdiv will be replaced by other instruction sequence.
+    call_kind = LocationSummary::kNoCall;
+  } else if ((rem->GetResultType() == Primitive::kPrimInt)
+             && codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
     // Have hardware divide instruction for int, do it with three instructions.
     call_kind = LocationSummary::kNoCall;
   }
@@ -2296,7 +2448,20 @@
 
   switch (type) {
     case Primitive::kPrimInt: {
-      if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+      if (rem->InputAt(1)->IsConstant()) {
+        locations->SetInAt(0, Location::RequiresRegister());
+        locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+        int32_t abs_imm = std::abs(rem->InputAt(1)->AsIntConstant()->GetValue());
+        if (abs_imm <= 1) {
+          // No temp register required.
+        } else {
+          locations->AddTemp(Location::RequiresRegister());
+          if (!IsPowerOfTwo(abs_imm)) {
+            locations->AddTemp(Location::RequiresRegister());
+          }
+        }
+      } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
         locations->SetInAt(0, Location::RequiresRegister());
         locations->SetInAt(1, Location::RequiresRegister());
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
@@ -2353,7 +2518,9 @@
   Primitive::Type type = rem->GetResultType();
   switch (type) {
     case Primitive::kPrimInt: {
-      if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+        if (second.IsConstant()) {
+          GenerateDivRemConstantIntegral(rem);
+        } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
         Register reg1 = first.AsRegister<Register>();
         Register reg2 = second.AsRegister<Register>();
         Register temp = locations->GetTemp(0).AsRegister<Register>();
@@ -3533,8 +3700,18 @@
 void InstructionCodeGeneratorARM::GenerateSuspendCheck(HSuspendCheck* instruction,
                                                        HBasicBlock* successor) {
   SuspendCheckSlowPathARM* slow_path =
-      new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor);
-  codegen_->AddSlowPath(slow_path);
+      down_cast<SuspendCheckSlowPathARM*>(instruction->GetSlowPath());
+  if (slow_path == nullptr) {
+    slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor);
+    instruction->SetSlowPath(slow_path);
+    codegen_->AddSlowPath(slow_path);
+    if (successor != nullptr) {
+      DCHECK(successor->IsLoopHeader());
+      codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
+    }
+  } else {
+    DCHECK_EQ(slow_path->GetSuccessor(), successor);
+  }
 
   __ LoadFromOffset(
       kLoadUnsignedHalfword, IP, TR, Thread::ThreadFlagsOffset<kArmWordSize>().Int32Value());
@@ -4061,15 +4238,9 @@
   //
   // Currently we implement the app -> app logic, which looks up in the resolve cache.
 
-  // temp = method;
-  LoadCurrentMethod(temp);
-  if (!invoke->IsRecursive()) {
-    // temp = temp->dex_cache_resolved_methods_;
-    __ LoadFromOffset(
-        kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
-    // temp = temp[index_in_cache]
-    __ LoadFromOffset(
-        kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()));
+  if (invoke->IsStringInit()) {
+    // temp = thread->string_init_entrypoint
+    __ LoadFromOffset(kLoadWord, temp, TR, invoke->GetStringInitOffset());
     // LR = temp[offset_of_quick_compiled_code]
     __ LoadFromOffset(kLoadWord, LR, temp,
                       mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
@@ -4077,7 +4248,24 @@
     // LR()
     __ blx(LR);
   } else {
-    __ bl(GetFrameEntryLabel());
+    // temp = method;
+    LoadCurrentMethod(temp);
+    if (!invoke->IsRecursive()) {
+      // temp = temp->dex_cache_resolved_methods_;
+      __ LoadFromOffset(
+          kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
+      // temp = temp[index_in_cache]
+      __ LoadFromOffset(
+          kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()));
+      // LR = temp[offset_of_quick_compiled_code]
+      __ LoadFromOffset(kLoadWord, LR, temp,
+                        mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+                            kArmWordSize).Int32Value());
+      // LR()
+      __ blx(LR);
+    } else {
+      __ bl(GetFrameEntryLabel());
+    }
   }
 
   DCHECK(!IsLeafMethod());
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 6009036..2edbcf8 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -78,22 +78,19 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
 };
 
-class InvokeDexCallingConventionVisitor {
+class InvokeDexCallingConventionVisitorARM : public InvokeDexCallingConventionVisitor {
  public:
-  InvokeDexCallingConventionVisitor()
-      : gp_index_(0), float_index_(0), double_index_(0), stack_index_(0) {}
+  InvokeDexCallingConventionVisitorARM() {}
+  virtual ~InvokeDexCallingConventionVisitorARM() {}
 
-  Location GetNextLocation(Primitive::Type type);
+  Location GetNextLocation(Primitive::Type type) OVERRIDE;
   Location GetReturnLocation(Primitive::Type type);
 
  private:
   InvokeDexCallingConvention calling_convention;
-  uint32_t gp_index_;
-  uint32_t float_index_;
-  uint32_t double_index_;
-  uint32_t stack_index_;
+  uint32_t double_index_ = 0;
 
-  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARM);
 };
 
 class ParallelMoveResolverARM : public ParallelMoveResolverWithSwap {
@@ -151,7 +148,7 @@
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
 
   CodeGeneratorARM* const codegen_;
-  InvokeDexCallingConventionVisitor parameter_visitor_;
+  InvokeDexCallingConventionVisitorARM parameter_visitor_;
 
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM);
 };
@@ -192,6 +189,10 @@
                              Label* true_target,
                              Label* false_target,
                              Label* always_true_target);
+  void DivRemOneOrMinusOne(HBinaryOperation* instruction);
+  void DivRemByPowerOfTwo(HBinaryOperation* instruction);
+  void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
+  void GenerateDivRemConstantIntegral(HBinaryOperation* instruction);
 
   ArmAssembler* const assembler_;
   CodeGeneratorARM* const codegen_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 7e9cdac..34720e2 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -17,6 +17,7 @@
 #include "code_generator_arm64.h"
 
 #include "arch/arm64/instruction_set_features_arm64.h"
+#include "code_generator_utils.h"
 #include "common_arm64.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
@@ -256,14 +257,13 @@
 
     InvokeRuntimeCallingConvention calling_convention;
     __ Mov(calling_convention.GetRegisterAt(0).W(), cls_->GetTypeIndex());
-    arm64_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1).W());
     int32_t entry_point_offset = do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
                                             : QUICK_ENTRY_POINT(pInitializeType);
     arm64_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this);
     if (do_clinit_) {
-      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t, mirror::ArtMethod*>();
+      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
     } else {
-      CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t, mirror::ArtMethod*>();
+      CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
     }
 
     // Move the class to the desired location.
@@ -308,11 +308,10 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    arm64_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1).W());
     __ Mov(calling_convention.GetRegisterAt(0).W(), instruction_->GetStringIndex());
     arm64_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this);
-    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t, mirror::ArtMethod*>();
+    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
     Primitive::Type type = instruction_->GetType();
     arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type);
 
@@ -370,6 +369,10 @@
     return &return_label_;
   }
 
+  HBasicBlock* GetSuccessor() const {
+    return successor_;
+  }
+
  private:
   HSuspendCheck* const instruction_;
   // If not null, the block to branch to after the suspend check.
@@ -457,15 +460,15 @@
 
 #undef __
 
-Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
+Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) {
   Location next_location;
   if (type == Primitive::kPrimVoid) {
     LOG(FATAL) << "Unreachable type " << type;
   }
 
   if (Primitive::IsFloatingPointType(type) &&
-      (fp_index_ < calling_convention.GetNumberOfFpuRegisters())) {
-    next_location = LocationFrom(calling_convention.GetFpuRegisterAt(fp_index_++));
+      (float_index_ < calling_convention.GetNumberOfFpuRegisters())) {
+    next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
   } else if (!Primitive::IsFloatingPointType(type) &&
              (gp_index_ < calling_convention.GetNumberOfRegisters())) {
     next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++));
@@ -785,11 +788,11 @@
 }
 
 void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const {
-  stream << Arm64ManagedRegister::FromXRegister(XRegister(reg));
+  stream << XRegister(reg);
 }
 
 void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
-  stream << Arm64ManagedRegister::FromDRegister(DRegister(reg));
+  stream << DRegister(reg);
 }
 
 void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) {
@@ -1073,14 +1076,12 @@
   BlockPoolsScope block_pools(GetVIXLAssembler());
   __ Ldr(lr, MemOperand(tr, entry_point_offset));
   __ Blr(lr);
-  if (instruction != nullptr) {
-    RecordPcInfo(instruction, dex_pc, slow_path);
-    DCHECK(instruction->IsSuspendCheck()
-        || instruction->IsBoundsCheck()
-        || instruction->IsNullCheck()
-        || instruction->IsDivZeroCheck()
-        || !IsLeafMethod());
-    }
+  RecordPcInfo(instruction, dex_pc, slow_path);
+  DCHECK(instruction->IsSuspendCheck()
+         || instruction->IsBoundsCheck()
+         || instruction->IsNullCheck()
+         || instruction->IsDivZeroCheck()
+         || !IsLeafMethod());
 }
 
 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
@@ -1132,8 +1133,19 @@
 void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
                                                          HBasicBlock* successor) {
   SuspendCheckSlowPathARM64* slow_path =
-    new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor);
-  codegen_->AddSlowPath(slow_path);
+      down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath());
+  if (slow_path == nullptr) {
+    slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor);
+    instruction->SetSlowPath(slow_path);
+    codegen_->AddSlowPath(slow_path);
+    if (successor != nullptr) {
+      DCHECK(successor->IsLoopHeader());
+      codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
+    }
+  } else {
+    DCHECK_EQ(slow_path->GetSuccessor(), successor);
+  }
+
   UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
   Register temp = temps.AcquireW();
 
@@ -1688,6 +1700,152 @@
 #undef DEFINE_CONDITION_VISITORS
 #undef FOR_EACH_CONDITION_INSTRUCTION
 
+void InstructionCodeGeneratorARM64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  Register out = OutputRegister(instruction);
+  Register dividend = InputRegisterAt(instruction, 0);
+  int64_t imm = Int64FromConstant(second.GetConstant());
+  DCHECK(imm == 1 || imm == -1);
+
+  if (instruction->IsRem()) {
+    __ Mov(out, 0);
+  } else {
+    if (imm == 1) {
+      __ Mov(out, dividend);
+    } else {
+      __ Neg(out, dividend);
+    }
+  }
+}
+
+void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  Register out = OutputRegister(instruction);
+  Register dividend = InputRegisterAt(instruction, 0);
+  int64_t imm = Int64FromConstant(second.GetConstant());
+  int64_t abs_imm = std::abs(imm);
+  DCHECK(IsPowerOfTwo(abs_imm));
+  int ctz_imm = CTZ(abs_imm);
+
+  UseScratchRegisterScope temps(GetVIXLAssembler());
+  Register temp = temps.AcquireSameSizeAs(out);
+
+  if (instruction->IsDiv()) {
+    __ Add(temp, dividend, abs_imm - 1);
+    __ Cmp(dividend, 0);
+    __ Csel(out, temp, dividend, lt);
+    if (imm > 0) {
+      __ Asr(out, out, ctz_imm);
+    } else {
+      __ Neg(out, Operand(out, ASR, ctz_imm));
+    }
+  } else {
+    int bits = instruction->GetResultType() == Primitive::kPrimInt ? 32 : 64;
+    __ Asr(temp, dividend, bits - 1);
+    __ Lsr(temp, temp, bits - ctz_imm);
+    __ Add(out, dividend, temp);
+    __ And(out, out, abs_imm - 1);
+    __ Sub(out, out, temp);
+  }
+}
+
+void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  Register out = OutputRegister(instruction);
+  Register dividend = InputRegisterAt(instruction, 0);
+  int64_t imm = Int64FromConstant(second.GetConstant());
+
+  Primitive::Type type = instruction->GetResultType();
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  int64_t magic;
+  int shift;
+  CalculateMagicAndShiftForDivRem(imm, type == Primitive::kPrimLong /* is_long */, &magic, &shift);
+
+  UseScratchRegisterScope temps(GetVIXLAssembler());
+  Register temp = temps.AcquireSameSizeAs(out);
+
+  // temp = get_high(dividend * magic)
+  __ Mov(temp, magic);
+  if (type == Primitive::kPrimLong) {
+    __ Smulh(temp, dividend, temp);
+  } else {
+    __ Smull(temp.X(), dividend, temp);
+    __ Lsr(temp.X(), temp.X(), 32);
+  }
+
+  if (imm > 0 && magic < 0) {
+    __ Add(temp, temp, dividend);
+  } else if (imm < 0 && magic > 0) {
+    __ Sub(temp, temp, dividend);
+  }
+
+  if (shift != 0) {
+    __ Asr(temp, temp, shift);
+  }
+
+  if (instruction->IsDiv()) {
+    __ Sub(out, temp, Operand(temp, ASR, type == Primitive::kPrimLong ? 63 : 31));
+  } else {
+    __ Sub(temp, temp, Operand(temp, ASR, type == Primitive::kPrimLong ? 63 : 31));
+    // TODO: Strength reduction for msub.
+    Register temp_imm = temps.AcquireSameSizeAs(out);
+    __ Mov(temp_imm, imm);
+    __ Msub(out, temp, temp_imm, dividend);
+  }
+}
+
+void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  Primitive::Type type = instruction->GetResultType();
+  DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong);
+
+  LocationSummary* locations = instruction->GetLocations();
+  Register out = OutputRegister(instruction);
+  Location second = locations->InAt(1);
+
+  if (second.IsConstant()) {
+    int64_t imm = Int64FromConstant(second.GetConstant());
+
+    if (imm == 0) {
+      // Do not generate anything. DivZeroCheck would prevent any code to be executed.
+    } else if (imm == 1 || imm == -1) {
+      DivRemOneOrMinusOne(instruction);
+    } else if (IsPowerOfTwo(std::abs(imm))) {
+      DivRemByPowerOfTwo(instruction);
+    } else {
+      DCHECK(imm <= -2 || imm >= 2);
+      GenerateDivRemWithAnyConstant(instruction);
+    }
+  } else {
+    Register dividend = InputRegisterAt(instruction, 0);
+    Register divisor = InputRegisterAt(instruction, 1);
+    if (instruction->IsDiv()) {
+      __ Sdiv(out, dividend, divisor);
+    } else {
+      UseScratchRegisterScope temps(GetVIXLAssembler());
+      Register temp = temps.AcquireSameSizeAs(out);
+      __ Sdiv(temp, dividend, divisor);
+      __ Msub(out, temp, divisor, dividend);
+    }
+  }
+}
+
 void LocationsBuilderARM64::VisitDiv(HDiv* div) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
@@ -1695,7 +1853,7 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
@@ -1716,7 +1874,7 @@
   switch (type) {
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
-      __ Sdiv(OutputRegister(div), InputRegisterAt(div, 0), InputRegisterAt(div, 1));
+      GenerateDivRemIntegral(div);
       break;
 
     case Primitive::kPrimFloat:
@@ -2005,8 +2163,8 @@
       new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
   locations->AddTemp(LocationFrom(x0));
 
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
-  for (size_t i = 0; i < invoke->InputCount(); i++) {
+  InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
+  for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
     HInstruction* input = invoke->InputAt(i);
     locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType()));
   }
@@ -2066,6 +2224,10 @@
 }
 
 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // When we do not run baseline, explicit clinit checks triggered by static
+  // invokes must have been pruned by art::PrepareForRegisterAllocation.
+  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+
   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena());
   if (intrinsic.TryDispatch(invoke)) {
     return;
@@ -2096,26 +2258,40 @@
   //
   // Currently we implement the app -> app logic, which looks up in the resolve cache.
 
-  // temp = method;
-  LoadCurrentMethod(temp);
-  if (!invoke->IsRecursive()) {
-    // temp = temp->dex_cache_resolved_methods_;
-    __ Ldr(temp, HeapOperand(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset()));
-    // temp = temp[index_in_cache];
-    __ Ldr(temp, HeapOperand(temp, index_in_cache));
-    // lr = temp->entry_point_from_quick_compiled_code_;
+  if (invoke->IsStringInit()) {
+    // temp = thread->string_init_entrypoint
+    __ Ldr(temp, HeapOperand(tr, invoke->GetStringInitOffset()));
+    // LR = temp->entry_point_from_quick_compiled_code_;
     __ Ldr(lr, HeapOperand(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
         kArm64WordSize)));
-    // lr();
+    // lr()
     __ Blr(lr);
   } else {
-    __ Bl(&frame_entry_label_);
+    // temp = method;
+    LoadCurrentMethod(temp);
+    if (!invoke->IsRecursive()) {
+      // temp = temp->dex_cache_resolved_methods_;
+      __ Ldr(temp, HeapOperand(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset()));
+      // temp = temp[index_in_cache];
+      __ Ldr(temp, HeapOperand(temp, index_in_cache));
+      // lr = temp->entry_point_from_quick_compiled_code_;
+      __ Ldr(lr, HeapOperand(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+          kArm64WordSize)));
+      // lr();
+      __ Blr(lr);
+    } else {
+      __ Bl(&frame_entry_label_);
+    }
   }
 
   DCHECK(!IsLeafMethod());
 }
 
 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // When we do not run baseline, explicit clinit checks triggered by static
+  // invokes must have been pruned by art::PrepareForRegisterAllocation.
+  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
   }
@@ -2521,7 +2697,7 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
@@ -2546,14 +2722,7 @@
   switch (type) {
     case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
-      UseScratchRegisterScope temps(GetVIXLAssembler());
-      Register dividend = InputRegisterAt(rem, 0);
-      Register divisor = InputRegisterAt(rem, 1);
-      Register output = OutputRegister(rem);
-      Register temp = temps.AcquireSameSizeAs(output);
-
-      __ Sdiv(temp, dividend, divisor);
-      __ Msub(output, temp, divisor, dividend);
+      GenerateDivRemIntegral(rem);
       break;
     }
 
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 913d881..702bcd4 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -122,25 +122,20 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
 };
 
-class InvokeDexCallingConventionVisitor {
+class InvokeDexCallingConventionVisitorARM64 : public InvokeDexCallingConventionVisitor {
  public:
-  InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {}
+  InvokeDexCallingConventionVisitorARM64() {}
+  virtual ~InvokeDexCallingConventionVisitorARM64() {}
 
-  Location GetNextLocation(Primitive::Type type);
+  Location GetNextLocation(Primitive::Type type) OVERRIDE;
   Location GetReturnLocation(Primitive::Type return_type) {
     return calling_convention.GetReturnLocation(return_type);
   }
 
  private:
   InvokeDexCallingConvention calling_convention;
-  // The current index for core registers.
-  uint32_t gp_index_;
-  // The current index for floating-point registers.
-  uint32_t fp_index_;
-  // The current stack index.
-  uint32_t stack_index_;
 
-  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARM64);
 };
 
 class InstructionCodeGeneratorARM64 : public HGraphVisitor {
@@ -171,6 +166,11 @@
                              vixl::Label* true_target,
                              vixl::Label* false_target,
                              vixl::Label* always_true_target);
+  void DivRemOneOrMinusOne(HBinaryOperation* instruction);
+  void DivRemByPowerOfTwo(HBinaryOperation* instruction);
+  void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
+  void GenerateDivRemIntegral(HBinaryOperation* instruction);
+
 
   Arm64Assembler* const assembler_;
   CodeGeneratorARM64* const codegen_;
@@ -196,7 +196,7 @@
   void HandleShift(HBinaryOperation* instr);
 
   CodeGeneratorARM64* const codegen_;
-  InvokeDexCallingConventionVisitor parameter_visitor_;
+  InvokeDexCallingConventionVisitorARM64 parameter_visitor_;
 
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM64);
 };
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index c604842..0212da1 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -153,6 +153,10 @@
     return &return_label_;
   }
 
+  HBasicBlock* GetSuccessor() const {
+    return successor_;
+  }
+
  private:
   HSuspendCheck* const instruction_;
   HBasicBlock* const successor_;
@@ -174,7 +178,6 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    x86_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1));
     __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction_->GetStringIndex()));
     __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pResolveString)));
     RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
@@ -208,7 +211,6 @@
 
     InvokeRuntimeCallingConvention calling_convention;
     __ movl(calling_convention.GetRegisterAt(0), Immediate(cls_->GetTypeIndex()));
-    x86_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1));
     __ fs()->call(Address::Absolute(do_clinit_
         ? QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pInitializeStaticStorage)
         : QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pInitializeType)));
@@ -338,11 +340,11 @@
 }
 
 void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const {
-  stream << X86ManagedRegister::FromCpuRegister(Register(reg));
+  stream << Register(reg);
 }
 
 void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
-  stream << X86ManagedRegister::FromXmmRegister(XmmRegister(reg));
+  stream << XmmRegister(reg);
 }
 
 size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
@@ -553,7 +555,7 @@
   UNREACHABLE();
 }
 
-Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
+Location InvokeDexCallingConventionVisitorX86::GetNextLocation(Primitive::Type type) {
   switch (type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -584,7 +586,7 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t index = fp_index_++;
+      uint32_t index = float_index_++;
       stack_index_++;
       if (index < calling_convention.GetNumberOfFpuRegisters()) {
         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
@@ -594,7 +596,7 @@
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t index = fp_index_++;
+      uint32_t index = float_index_++;
       stack_index_ += 2;
       if (index < calling_convention.GetNumberOfFpuRegisters()) {
         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
@@ -811,7 +813,6 @@
 
   HLoopInformation* info = block->GetLoopInformation();
   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
-    codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
   }
@@ -1196,6 +1197,10 @@
 }
 
 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // When we do not run baseline, explicit clinit checks triggered by static
+  // invokes must have been pruned by art::PrepareForRegisterAllocation.
+  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+
   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
     return;
@@ -1214,6 +1219,10 @@
 }
 
 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // When we do not run baseline, explicit clinit checks triggered by static
+  // invokes must have been pruned by art::PrepareForRegisterAllocation.
+  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
   }
@@ -1232,8 +1241,8 @@
       new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
   locations->AddTemp(Location::RegisterLocation(EAX));
 
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
-  for (size_t i = 0; i < invoke->InputCount(); i++) {
+  InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
+  for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
     HInstruction* input = invoke->InputAt(i);
     locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType()));
   }
@@ -2734,17 +2743,12 @@
       new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall);
 
   switch (op->GetResultType()) {
-    case Primitive::kPrimInt: {
-      locations->SetInAt(0, Location::RequiresRegister());
-      // The shift count needs to be in CL.
-      locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
-      locations->SetOut(Location::SameAsFirstInput());
-      break;
-    }
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
+      // Can't have Location::Any() and output SameAsFirstInput()
       locations->SetInAt(0, Location::RequiresRegister());
-      // The shift count needs to be in CL.
-      locations->SetInAt(1, Location::RegisterLocation(ECX));
+      // The shift count needs to be in CL or a constant.
+      locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -2763,6 +2767,7 @@
 
   switch (op->GetResultType()) {
     case Primitive::kPrimInt: {
+      DCHECK(first.IsRegister());
       Register first_reg = first.AsRegister<Register>();
       if (second.IsRegister()) {
         Register second_reg = second.AsRegister<Register>();
@@ -2775,7 +2780,11 @@
           __ shrl(first_reg, second_reg);
         }
       } else {
-        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
+        int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue;
+        if (shift == 0) {
+          return;
+        }
+        Immediate imm(shift);
         if (op->IsShl()) {
           __ shll(first_reg, imm);
         } else if (op->IsShr()) {
@@ -2787,14 +2796,29 @@
       break;
     }
     case Primitive::kPrimLong: {
-      Register second_reg = second.AsRegister<Register>();
-      DCHECK_EQ(ECX, second_reg);
-      if (op->IsShl()) {
-        GenerateShlLong(first, second_reg);
-      } else if (op->IsShr()) {
-        GenerateShrLong(first, second_reg);
+      if (second.IsRegister()) {
+        Register second_reg = second.AsRegister<Register>();
+        DCHECK_EQ(ECX, second_reg);
+        if (op->IsShl()) {
+          GenerateShlLong(first, second_reg);
+        } else if (op->IsShr()) {
+          GenerateShrLong(first, second_reg);
+        } else {
+          GenerateUShrLong(first, second_reg);
+        }
       } else {
-        GenerateUShrLong(first, second_reg);
+        // Shift by a constant.
+        int shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue;
+        // Nothing to do if the shift is 0, as the input is already the output.
+        if (shift != 0) {
+          if (op->IsShl()) {
+            GenerateShlLong(first, shift);
+          } else if (op->IsShr()) {
+            GenerateShrLong(first, shift);
+          } else {
+            GenerateUShrLong(first, shift);
+          }
+        }
       }
       break;
     }
@@ -2803,6 +2827,34 @@
   }
 }
 
+void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
+  Register low = loc.AsRegisterPairLow<Register>();
+  Register high = loc.AsRegisterPairHigh<Register>();
+  if (shift == 1) {
+    // This is just an addition.
+    __ addl(low, low);
+    __ adcl(high, high);
+  } else if (shift == 32) {
+    // Shift by 32 is easy. High gets low, and low gets 0.
+    codegen_->EmitParallelMoves(
+        loc.ToLow(),
+        loc.ToHigh(),
+        Primitive::kPrimInt,
+        Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
+        loc.ToLow(),
+        Primitive::kPrimInt);
+  } else if (shift > 32) {
+    // Low part becomes 0.  High part is low part << (shift-32).
+    __ movl(high, low);
+    __ shll(high, Immediate(shift - 32));
+    __ xorl(low, low);
+  } else {
+    // Between 1 and 31.
+    __ shld(high, low, Immediate(shift));
+    __ shll(low, Immediate(shift));
+  }
+}
+
 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
   Label done;
   __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
@@ -2814,6 +2866,27 @@
   __ Bind(&done);
 }
 
+void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
+  Register low = loc.AsRegisterPairLow<Register>();
+  Register high = loc.AsRegisterPairHigh<Register>();
+  if (shift == 32) {
+    // Need to copy the sign.
+    DCHECK_NE(low, high);
+    __ movl(low, high);
+    __ sarl(high, Immediate(31));
+  } else if (shift > 32) {
+    DCHECK_NE(low, high);
+    // High part becomes sign. Low part is shifted by shift - 32.
+    __ movl(low, high);
+    __ sarl(high, Immediate(31));
+    __ sarl(low, Immediate(shift - 32));
+  } else {
+    // Between 1 and 31.
+    __ shrd(low, high, Immediate(shift));
+    __ sarl(high, Immediate(shift));
+  }
+}
+
 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
   Label done;
   __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
@@ -2825,6 +2898,30 @@
   __ Bind(&done);
 }
 
+void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
+  Register low = loc.AsRegisterPairLow<Register>();
+  Register high = loc.AsRegisterPairHigh<Register>();
+  if (shift == 32) {
+    // Shift by 32 is easy. Low gets high, and high gets 0.
+    codegen_->EmitParallelMoves(
+        loc.ToHigh(),
+        loc.ToLow(),
+        Primitive::kPrimInt,
+        Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
+        loc.ToHigh(),
+        Primitive::kPrimInt);
+  } else if (shift > 32) {
+    // Low part is high >> (shift - 32). High part becomes 0.
+    __ movl(low, high);
+    __ shrl(low, Immediate(shift - 32));
+    __ xorl(high, high);
+  } else {
+    // Between 1 and 31.
+    __ shrd(low, high, Immediate(shift));
+    __ shrl(high, Immediate(shift));
+  }
+}
+
 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
   Label done;
   __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
@@ -3104,18 +3201,27 @@
   // 3) app -> app
   //
   // Currently we implement the app -> app logic, which looks up in the resolve cache.
-  // temp = method;
-  LoadCurrentMethod(temp);
-  if (!invoke->IsRecursive()) {
-    // temp = temp->dex_cache_resolved_methods_;
-    __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
-    // temp = temp[index_in_cache]
-    __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
+
+  if (invoke->IsStringInit()) {
+    // temp = thread->string_init_entrypoint
+    __ fs()->movl(temp, Address::Absolute(invoke->GetStringInitOffset()));
     // (temp + offset_of_quick_compiled_code)()
     __ call(Address(
         temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
   } else {
-    __ call(GetFrameEntryLabel());
+    // temp = method;
+    LoadCurrentMethod(temp);
+    if (!invoke->IsRecursive()) {
+      // temp = temp->dex_cache_resolved_methods_;
+      __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
+      // temp = temp[index_in_cache]
+      __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
+      // (temp + offset_of_quick_compiled_code)()
+      __ call(Address(temp,
+          mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
+    } else {
+      __ call(GetFrameEntryLabel());
+    }
   }
 
   DCHECK(!IsLeafMethod());
@@ -3809,7 +3915,7 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
-  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
   }
@@ -3821,16 +3927,38 @@
   Location length_loc = locations->InAt(1);
   SlowPathCodeX86* slow_path =
     new (GetGraph()->GetArena()) BoundsCheckSlowPathX86(instruction, index_loc, length_loc);
-  codegen_->AddSlowPath(slow_path);
 
-  Register length = length_loc.AsRegister<Register>();
-  if (index_loc.IsConstant()) {
-    int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
-    __ cmpl(length, Immediate(value));
+  if (length_loc.IsConstant()) {
+    int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
+    if (index_loc.IsConstant()) {
+      // BCE will remove the bounds check if we are guarenteed to pass.
+      int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+      if (index < 0 || index >= length) {
+        codegen_->AddSlowPath(slow_path);
+        __ jmp(slow_path->GetEntryLabel());
+      } else {
+        // Some optimization after BCE may have generated this, and we should not
+        // generate a bounds check if it is a valid range.
+      }
+      return;
+    }
+
+    // We have to reverse the jump condition because the length is the constant.
+    Register index_reg = index_loc.AsRegister<Register>();
+    __ cmpl(index_reg, Immediate(length));
+    codegen_->AddSlowPath(slow_path);
+    __ j(kAboveEqual, slow_path->GetEntryLabel());
   } else {
-    __ cmpl(length, index_loc.AsRegister<Register>());
+    Register length = length_loc.AsRegister<Register>();
+    if (index_loc.IsConstant()) {
+      int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+      __ cmpl(length, Immediate(value));
+    } else {
+      __ cmpl(length, index_loc.AsRegister<Register>());
+    }
+    codegen_->AddSlowPath(slow_path);
+    __ j(kBelowEqual, slow_path->GetEntryLabel());
   }
-  __ j(kBelowEqual, slow_path->GetEntryLabel());
 }
 
 void LocationsBuilderX86::VisitTemporary(HTemporary* temp) {
@@ -3872,8 +4000,19 @@
 void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
                                                        HBasicBlock* successor) {
   SuspendCheckSlowPathX86* slow_path =
-      new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor);
-  codegen_->AddSlowPath(slow_path);
+      down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath());
+  if (slow_path == nullptr) {
+    slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor);
+    instruction->SetSlowPath(slow_path);
+    codegen_->AddSlowPath(slow_path);
+    if (successor != nullptr) {
+      DCHECK(successor->IsLoopHeader());
+      codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
+    }
+  } else {
+    DCHECK_EQ(slow_path->GetSuccessor(), successor);
+  }
+
   __ fs()->cmpw(Address::Absolute(
       Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()), Immediate(0));
   if (successor == nullptr) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 8bd3cd3..5a5a37b 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -75,22 +75,17 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
 };
 
-class InvokeDexCallingConventionVisitor {
+class InvokeDexCallingConventionVisitorX86 : public InvokeDexCallingConventionVisitor {
  public:
-  InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {}
+  InvokeDexCallingConventionVisitorX86() {}
+  virtual ~InvokeDexCallingConventionVisitorX86() {}
 
-  Location GetNextLocation(Primitive::Type type);
+  Location GetNextLocation(Primitive::Type type) OVERRIDE;
 
  private:
   InvokeDexCallingConvention calling_convention;
-  // The current index for cpu registers.
-  uint32_t gp_index_;
-  // The current index for fpu registers.
-  uint32_t fp_index_;
-  // The current stack index.
-  uint32_t stack_index_;
 
-  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86);
 };
 
 class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap {
@@ -137,7 +132,7 @@
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
 
   CodeGeneratorX86* const codegen_;
-  InvokeDexCallingConventionVisitor parameter_visitor_;
+  InvokeDexCallingConventionVisitorX86 parameter_visitor_;
 
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86);
 };
@@ -171,6 +166,9 @@
   void GenerateShlLong(const Location& loc, Register shifter);
   void GenerateShrLong(const Location& loc, Register shifter);
   void GenerateUShrLong(const Location& loc, Register shifter);
+  void GenerateShlLong(const Location& loc, int shift);
+  void GenerateShrLong(const Location& loc, int shift);
+  void GenerateUShrLong(const Location& loc, int shift);
   void GenerateMemoryBarrier(MemBarrierKind kind);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 47425fb..63d6846 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -99,7 +99,7 @@
       if (is_div_) {
         __ negq(cpu_reg_);
       } else {
-        __ movq(cpu_reg_, Immediate(0));
+        __ xorl(cpu_reg_, cpu_reg_);
       }
     }
     __ jmp(GetExitLabel());
@@ -136,6 +136,10 @@
     return &return_label_;
   }
 
+  HBasicBlock* GetSuccessor() const {
+    return successor_;
+  }
+
  private:
   HSuspendCheck* const instruction_;
   HBasicBlock* const successor_;
@@ -197,7 +201,6 @@
 
     InvokeRuntimeCallingConvention calling_convention;
     __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex()));
-    x64_codegen->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1)));
     __ gs()->call(Address::Absolute((do_clinit_
           ? QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pInitializeStaticStorage)
           : QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pInitializeType)) , true));
@@ -244,7 +247,6 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    x64_codegen->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1)));
     __ movl(CpuRegister(calling_convention.GetRegisterAt(0)),
             Immediate(instruction_->GetStringIndex()));
     __ gs()->call(Address::Absolute(
@@ -368,29 +370,37 @@
   //
   // Currently we implement the app -> app logic, which looks up in the resolve cache.
 
-  // temp = method;
-  LoadCurrentMethod(temp);
-  if (!invoke->IsRecursive()) {
-    // temp = temp->dex_cache_resolved_methods_;
-    __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
-    // temp = temp[index_in_cache]
-    __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
+  if (invoke->IsStringInit()) {
+    // temp = thread->string_init_entrypoint
+    __ gs()->movl(temp, Address::Absolute(invoke->GetStringInitOffset()));
     // (temp + offset_of_quick_compiled_code)()
     __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
         kX86_64WordSize).SizeValue()));
   } else {
-    __ call(&frame_entry_label_);
+    // temp = method;
+    LoadCurrentMethod(temp);
+    if (!invoke->IsRecursive()) {
+      // temp = temp->dex_cache_resolved_methods_;
+      __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
+      // temp = temp[index_in_cache]
+      __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
+      // (temp + offset_of_quick_compiled_code)()
+      __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+          kX86_64WordSize).SizeValue()));
+    } else {
+      __ call(&frame_entry_label_);
+    }
   }
 
   DCHECK(!IsLeafMethod());
 }
 
 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
-  stream << X86_64ManagedRegister::FromCpuRegister(Register(reg));
+  stream << Register(reg);
 }
 
 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
-  stream << X86_64ManagedRegister::FromXmmRegister(FloatRegister(reg));
+  stream << FloatRegister(reg);
 }
 
 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
@@ -665,7 +675,7 @@
         DCHECK(constant->IsLongConstant());
         value = constant->AsLongConstant()->GetValue();
       }
-      __ movq(CpuRegister(TMP), Immediate(value));
+      Load64BitValue(CpuRegister(TMP), value);
       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
     } else {
       DCHECK(source.IsDoubleStackSlot());
@@ -698,9 +708,9 @@
     } else if (const_to_move->IsLongConstant()) {
       int64_t value = const_to_move->AsLongConstant()->GetValue();
       if (location.IsRegister()) {
-        __ movq(location.AsRegister<CpuRegister>(), Immediate(value));
+        Load64BitValue(location.AsRegister<CpuRegister>(), value);
       } else if (location.IsDoubleStackSlot()) {
-        __ movq(CpuRegister(TMP), Immediate(value));
+        Load64BitValue(CpuRegister(TMP), value);
         __ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(TMP));
       } else {
         DCHECK(location.IsConstant());
@@ -765,7 +775,6 @@
 
   HLoopInformation* info = block->GetLoopInformation();
   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
-    codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
   }
@@ -950,7 +959,7 @@
     LocationSummary* locations = comp->GetLocations();
     CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
     // Clear register: setcc only sets the low byte.
-    __ xorq(reg, reg);
+    __ xorl(reg, reg);
     Location lhs = locations->InAt(0);
     Location rhs = locations->InAt(1);
     if (rhs.IsRegister()) {
@@ -1234,7 +1243,7 @@
   codegen_->GenerateFrameExit();
 }
 
-Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
+Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Type type) {
   switch (type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -1264,7 +1273,7 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t index = fp_index_++;
+      uint32_t index = float_index_++;
       stack_index_++;
       if (index < calling_convention.GetNumberOfFpuRegisters()) {
         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
@@ -1274,7 +1283,7 @@
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t index = fp_index_++;
+      uint32_t index = float_index_++;
       stack_index_ += 2;
       if (index < calling_convention.GetNumberOfFpuRegisters()) {
         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
@@ -1291,6 +1300,10 @@
 }
 
 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // When we do not run baseline, explicit clinit checks triggered by static
+  // invokes must have been pruned by art::PrepareForRegisterAllocation.
+  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+
   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
     return;
@@ -1309,6 +1322,10 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // When we do not run baseline, explicit clinit checks triggered by static
+  // invokes must have been pruned by art::PrepareForRegisterAllocation.
+  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
   }
@@ -1324,8 +1341,8 @@
       new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
   locations->AddTemp(Location::RegisterLocation(RDI));
 
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
-  for (size_t i = 0; i < invoke->InputCount(); i++) {
+  InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
+  for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
     HInstruction* input = invoke->InputAt(i);
     locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType()));
   }
@@ -1405,8 +1422,8 @@
   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
 
   // Set the hidden argument.
-  __ movq(invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>(),
-          Immediate(invoke->GetDexMethodIndex()));
+  CpuRegister hidden_reg = invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>();
+  codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
 
   // temp = object->GetClass();
   if (receiver.IsStackSlot()) {
@@ -1842,7 +1859,7 @@
           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
           Label done, nan;
 
-          __ movq(output, Immediate(kPrimLongMax));
+          codegen_->Load64BitValue(output, kPrimLongMax);
           // temp = long-to-float(output)
           __ cvtsi2ss(temp, output, true);
           // if input >= temp goto done
@@ -1855,7 +1872,7 @@
           __ jmp(&done);
           __ Bind(&nan);
           //  output = 0
-          __ xorq(output, output);
+          __ xorl(output, output);
           __ Bind(&done);
           break;
         }
@@ -1867,7 +1884,7 @@
           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
           Label done, nan;
 
-          __ movq(output, Immediate(kPrimLongMax));
+          codegen_->Load64BitValue(output, kPrimLongMax);
           // temp = long-to-double(output)
           __ cvtsi2sd(temp, output, true);
           // if input >= temp goto done
@@ -1880,7 +1897,7 @@
           __ jmp(&done);
           __ Bind(&nan);
           //  output = 0
-          __ xorq(output, output);
+          __ xorl(output, output);
           __ Bind(&done);
           break;
         }
@@ -2469,7 +2486,7 @@
 
     case Primitive::kPrimLong: {
       if (instruction->IsRem()) {
-        __ xorq(output_register, output_register);
+        __ xorl(output_register, output_register);
       } else {
         __ movq(output_register, input_register);
         if (imm == -1) {
@@ -2513,7 +2530,7 @@
     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
     CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
 
-    __ movq(rdx, Immediate(std::abs(imm) - 1));
+    codegen_->Load64BitValue(rdx, std::abs(imm) - 1);
     __ addq(rdx, numerator);
     __ testq(numerator, numerator);
     __ cmov(kGreaterEqual, rdx, numerator);
@@ -2610,7 +2627,7 @@
     __ movq(numerator, rax);
 
     // RAX = magic
-    __ movq(rax, Immediate(magic));
+    codegen_->Load64BitValue(rax, magic);
 
     // RDX:RAX = magic * numerator
     __ imulq(numerator);
@@ -2639,8 +2656,7 @@
       if (IsInt<32>(imm)) {
         __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
       } else {
-        __ movq(numerator, Immediate(imm));
-        __ imulq(rdx, numerator);
+        __ imulq(rdx, codegen_->LiteralInt64Address(imm));
       }
 
       __ subq(rax, rdx);
@@ -3006,8 +3022,8 @@
 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
   InvokeRuntimeCallingConvention calling_convention;
   codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1)));
-  __ movq(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction->GetTypeIndex()));
-
+  codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
+                           instruction->GetTypeIndex());
   __ gs()->call(
       Address::Absolute(GetThreadOffset<kX86_64WordSize>(instruction->GetEntrypoint()), true));
 
@@ -3028,7 +3044,8 @@
 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
   InvokeRuntimeCallingConvention calling_convention;
   codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(2)));
-  __ movq(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction->GetTypeIndex()));
+  codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
+                           instruction->GetTypeIndex());
 
   __ gs()->call(
       Address::Absolute(GetThreadOffset<kX86_64WordSize>(instruction->GetEntrypoint()), true));
@@ -3750,7 +3767,7 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
-  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
   }
@@ -3762,16 +3779,38 @@
   Location length_loc = locations->InAt(1);
   SlowPathCodeX86_64* slow_path =
     new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction, index_loc, length_loc);
-  codegen_->AddSlowPath(slow_path);
 
-  CpuRegister length = length_loc.AsRegister<CpuRegister>();
-  if (index_loc.IsConstant()) {
-    int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
-    __ cmpl(length, Immediate(value));
+  if (length_loc.IsConstant()) {
+    int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
+    if (index_loc.IsConstant()) {
+      // BCE will remove the bounds check if we are guarenteed to pass.
+      int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+      if (index < 0 || index >= length) {
+        codegen_->AddSlowPath(slow_path);
+        __ jmp(slow_path->GetEntryLabel());
+      } else {
+        // Some optimization after BCE may have generated this, and we should not
+        // generate a bounds check if it is a valid range.
+      }
+      return;
+    }
+
+    // We have to reverse the jump condition because the length is the constant.
+    CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
+    __ cmpl(index_reg, Immediate(length));
+    codegen_->AddSlowPath(slow_path);
+    __ j(kAboveEqual, slow_path->GetEntryLabel());
   } else {
-    __ cmpl(length, index_loc.AsRegister<CpuRegister>());
+    CpuRegister length = length_loc.AsRegister<CpuRegister>();
+    if (index_loc.IsConstant()) {
+      int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+      __ cmpl(length, Immediate(value));
+    } else {
+      __ cmpl(length, index_loc.AsRegister<CpuRegister>());
+    }
+    codegen_->AddSlowPath(slow_path);
+    __ j(kBelowEqual, slow_path->GetEntryLabel());
   }
-  __ j(kBelowEqual, slow_path->GetEntryLabel());
 }
 
 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
@@ -3828,8 +3867,19 @@
 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
                                                           HBasicBlock* successor) {
   SuspendCheckSlowPathX86_64* slow_path =
-      new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor);
-  codegen_->AddSlowPath(slow_path);
+      down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
+  if (slow_path == nullptr) {
+    slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor);
+    instruction->SetSlowPath(slow_path);
+    codegen_->AddSlowPath(slow_path);
+    if (successor != nullptr) {
+      DCHECK(successor->IsLoopHeader());
+      codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
+    }
+  } else {
+    DCHECK_EQ(slow_path->GetSuccessor(), successor);
+  }
+
   __ gs()->cmpw(Address::Absolute(
       Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(), true), Immediate(0));
   if (successor == nullptr) {
@@ -3902,45 +3952,42 @@
     } else if (constant->IsLongConstant()) {
       int64_t value = constant->AsLongConstant()->GetValue();
       if (destination.IsRegister()) {
-        __ movq(destination.AsRegister<CpuRegister>(), Immediate(value));
+        codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
       } else {
         DCHECK(destination.IsDoubleStackSlot()) << destination;
-        __ movq(CpuRegister(TMP), Immediate(value));
+        codegen_->Load64BitValue(CpuRegister(TMP), value);
         __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
       }
     } else if (constant->IsFloatConstant()) {
       float fp_value = constant->AsFloatConstant()->GetValue();
       int32_t value = bit_cast<int32_t, float>(fp_value);
-      Immediate imm(value);
       if (destination.IsFpuRegister()) {
         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
         if (value == 0) {
           // easy FP 0.0.
           __ xorps(dest, dest);
         } else {
-          __ movl(CpuRegister(TMP), imm);
-          __ movd(dest, CpuRegister(TMP));
+          __ movss(dest, codegen_->LiteralFloatAddress(fp_value));
         }
       } else {
         DCHECK(destination.IsStackSlot()) << destination;
+        Immediate imm(value);
         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
       }
     } else {
       DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
       double fp_value =  constant->AsDoubleConstant()->GetValue();
       int64_t value = bit_cast<int64_t, double>(fp_value);
-      Immediate imm(value);
       if (destination.IsFpuRegister()) {
         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
         if (value == 0) {
           __ xorpd(dest, dest);
         } else {
-          __ movq(CpuRegister(TMP), imm);
-          __ movd(dest, CpuRegister(TMP));
+          __ movsd(dest, codegen_->LiteralDoubleAddress(fp_value));
         }
       } else {
         DCHECK(destination.IsDoubleStackSlot()) << destination;
-        __ movq(CpuRegister(TMP), imm);
+        codegen_->Load64BitValue(CpuRegister(TMP), value);
         __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
       }
     }
@@ -4399,6 +4446,17 @@
   LOG(FATAL) << "Unreachable";
 }
 
+void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
+  if (value == 0) {
+    __ xorl(dest, dest);
+  } else if (value > 0 && IsInt<32>(value)) {
+    // We can use a 32 bit move, as it will zero-extend and is one byte shorter.
+    __ movl(dest, Immediate(static_cast<int32_t>(value)));
+  } else {
+    __ movq(dest, Immediate(value));
+  }
+}
+
 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
   // Generate the constant area if needed.
   X86_64Assembler* assembler = GetAssembler();
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 6cdc822..480ea6b 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -37,7 +37,7 @@
 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
 static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters);
 
-static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX };
+static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX, RCX };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
 static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 };
@@ -68,22 +68,17 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
 };
 
-class InvokeDexCallingConventionVisitor {
+class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor {
  public:
-  InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {}
+  InvokeDexCallingConventionVisitorX86_64() {}
+  virtual ~InvokeDexCallingConventionVisitorX86_64() {}
 
-  Location GetNextLocation(Primitive::Type type);
+  Location GetNextLocation(Primitive::Type type) OVERRIDE;
 
  private:
   InvokeDexCallingConvention calling_convention;
-  // The current index for cpu registers.
-  uint32_t gp_index_;
-  // The current index for fpu registers.
-  uint32_t fp_index_;
-  // The current stack index.
-  uint32_t stack_index_;
 
-  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86_64);
 };
 
 class CodeGeneratorX86_64;
@@ -147,7 +142,7 @@
   void HandleFieldGet(HInstruction* instruction);
 
   CodeGeneratorX86_64* const codegen_;
-  InvokeDexCallingConventionVisitor parameter_visitor_;
+  InvokeDexCallingConventionVisitorX86_64 parameter_visitor_;
 
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64);
 };
@@ -287,6 +282,9 @@
   Address LiteralInt32Address(int32_t v);
   Address LiteralInt64Address(int64_t v);
 
+  // Load a 64 bit value into a register in the most efficient manner.
+  void Load64BitValue(CpuRegister dest, int64_t value);
+
  private:
   // Labels for each block that will be compiled.
   GrowableArray<Label> block_labels_;
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 94f56e5..bfed1a8 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -225,7 +225,7 @@
 static void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0) {
   ArenaPool pool;
   ArenaAllocator arena(&pool);
-  HGraph* graph = new (&arena) HGraph(&arena);
+  HGraph* graph = CreateGraph(&arena);
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   bool graph_built = builder.BuildGraph(*item);
@@ -238,7 +238,7 @@
 static void TestCodeLong(const uint16_t* data, bool has_result, int64_t expected) {
   ArenaPool pool;
   ArenaAllocator arena(&pool);
-  HGraph* graph = new (&arena) HGraph(&arena);
+  HGraph* graph = CreateGraph(&arena);
   HGraphBuilder builder(graph, Primitive::kPrimLong);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   bool graph_built = builder.BuildGraph(*item);
@@ -504,7 +504,7 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -623,7 +623,7 @@
   for (size_t i = 0; i < arraysize(lhs); i++) {
     ArenaPool pool;
     ArenaAllocator allocator(&pool);
-    HGraph* graph = new (&allocator) HGraph(&allocator);
+    HGraph* graph = CreateGraph(&allocator);
 
     HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
     graph->AddBlock(entry_block);
@@ -669,7 +669,7 @@
   for (size_t i = 0; i < arraysize(lhs); i++) {
     ArenaPool pool;
     ArenaAllocator allocator(&pool);
-    HGraph* graph = new (&allocator) HGraph(&allocator);
+    HGraph* graph = CreateGraph(&allocator);
 
     HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
     graph->AddBlock(entry_block);
diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc
index b7a92b5..20ce110 100644
--- a/compiler/optimizing/constant_folding.cc
+++ b/compiler/optimizing/constant_folding.cc
@@ -28,6 +28,7 @@
   void VisitShift(HBinaryOperation* shift);
 
   void VisitAnd(HAnd* instruction) OVERRIDE;
+  void VisitCompare(HCompare* instruction) OVERRIDE;
   void VisitMul(HMul* instruction) OVERRIDE;
   void VisitOr(HOr* instruction) OVERRIDE;
   void VisitRem(HRem* instruction) OVERRIDE;
@@ -70,6 +71,14 @@
           inst->ReplaceWith(constant);
           inst->GetBlock()->RemoveInstruction(inst);
         }
+      } else if (inst->IsTypeConversion()) {
+        // Constant folding: replace `TypeConversion(a)' with a constant at
+        // compile time if `a' is a constant.
+        HConstant* constant = inst->AsTypeConversion()->TryStaticEvaluation();
+        if (constant != nullptr) {
+          inst->ReplaceWith(constant);
+          inst->GetBlock()->RemoveInstruction(inst);
+        }
       } else if (inst->IsDivZeroCheck()) {
         // We can safely remove the check if the input is a non-null constant.
         HDivZeroCheck* check = inst->AsDivZeroCheck();
@@ -108,6 +117,26 @@
   }
 }
 
+void InstructionWithAbsorbingInputSimplifier::VisitCompare(HCompare* instruction) {
+  HConstant* input_cst = instruction->GetConstantRight();
+  if (input_cst != nullptr) {
+    HInstruction* input_value = instruction->GetLeastConstantLeft();
+    if (Primitive::IsFloatingPointType(input_value->GetType()) &&
+        ((input_cst->IsFloatConstant() && input_cst->AsFloatConstant()->IsNaN()) ||
+         (input_cst->IsDoubleConstant() && input_cst->AsDoubleConstant()->IsNaN()))) {
+      // Replace code looking like
+      //    CMP{G,L} dst, src, NaN
+      // with
+      //    CONSTANT +1 (gt bias)
+      // or
+      //    CONSTANT -1 (lt bias)
+      instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimInt,
+                                                       (instruction->IsGtBias() ? 1 : -1)));
+      instruction->GetBlock()->RemoveInstruction(instruction);
+    }
+  }
+}
+
 void InstructionWithAbsorbingInputSimplifier::VisitMul(HMul* instruction) {
   HConstant* input_cst = instruction->GetConstantRight();
   Primitive::Type type = instruction->GetType();
diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h
index ac00824..66ff578 100644
--- a/compiler/optimizing/constant_folding.h
+++ b/compiler/optimizing/constant_folding.h
@@ -32,8 +32,8 @@
  */
 class HConstantFolding : public HOptimization {
  public:
-  explicit HConstantFolding(HGraph* graph)
-      : HOptimization(graph, true, kConstantFoldingPassName) {}
+  explicit HConstantFolding(HGraph* graph, const char* name = kConstantFoldingPassName)
+      : HOptimization(graph, true, name) {}
 
   void Run() OVERRIDE;
 
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index 02ad675..422223f 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -572,14 +572,19 @@
   };
 
   // Expected difference after dead code elimination.
-  diff_t expected_dce_diff = {
-    { "  3: IntConstant\n",     removed },
-    { "  13: IntConstant\n",    removed },
-    { "  18: IntConstant\n",    removed },
-    { "  24: IntConstant\n",    removed },
-    { "  34: IntConstant\n",    removed },
-  };
-  std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
+  std::string expected_after_dce =
+    "BasicBlock 0, succ: 1\n"
+    "  5: IntConstant []\n"
+    "  30: SuspendCheck\n"
+    "  32: IntConstant []\n"
+    "  33: IntConstant []\n"
+    "  35: IntConstant [28]\n"
+    "  31: Goto 1\n"
+    "BasicBlock 1, pred: 0, succ: 5\n"
+    "  21: SuspendCheck\n"
+    "  28: Return(35)\n"
+    "BasicBlock 5, pred: 1\n"
+    "  29: Exit\n";
 
   TestCode(data,
            expected_before,
@@ -647,13 +652,15 @@
     ASSERT_EQ(inst->AsIntConstant()->GetValue(), 1);
   };
 
-  // Expected difference after dead code elimination.
-  diff_t expected_dce_diff = {
-    { "  3: IntConstant [9, 15, 22]\n", "  3: IntConstant [9, 22]\n" },
-    { "  22: Phi(3, 5) [15]\n",         "  22: Phi(3, 5)\n" },
-    { "  15: Add(22, 3)\n",             removed }
-  };
-  std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
+  // Expected graph after dead code elimination.
+  std::string expected_after_dce =
+    "BasicBlock 0, succ: 1\n"
+    "  19: SuspendCheck\n"
+    "  20: Goto 1\n"
+    "BasicBlock 1, pred: 0, succ: 4\n"
+    "  17: ReturnVoid\n"
+    "BasicBlock 4, pred: 1\n"
+    "  18: Exit\n";
 
   TestCode(data,
            expected_before,
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 8045cc5..b31de98 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -17,13 +17,97 @@
 #include "dead_code_elimination.h"
 
 #include "base/bit_vector-inl.h"
+#include "ssa_phi_elimination.h"
 
 namespace art {
 
-void HDeadCodeElimination::Run() {
+static void MarkReachableBlocks(HBasicBlock* block, ArenaBitVector* visited) {
+  int block_id = block->GetBlockId();
+  if (visited->IsBitSet(block_id)) {
+    return;
+  }
+  visited->SetBit(block_id);
+
+  HInstruction* last_instruction = block->GetLastInstruction();
+  if (last_instruction->IsIf()) {
+    HIf* if_instruction = last_instruction->AsIf();
+    HInstruction* condition = if_instruction->InputAt(0);
+    if (!condition->IsIntConstant()) {
+      MarkReachableBlocks(if_instruction->IfTrueSuccessor(), visited);
+      MarkReachableBlocks(if_instruction->IfFalseSuccessor(), visited);
+    } else if (condition->AsIntConstant()->IsOne()) {
+      MarkReachableBlocks(if_instruction->IfTrueSuccessor(), visited);
+    } else {
+      DCHECK(condition->AsIntConstant()->IsZero());
+      MarkReachableBlocks(if_instruction->IfFalseSuccessor(), visited);
+    }
+  } else {
+    for (size_t i = 0, e = block->GetSuccessors().Size(); i < e; ++i) {
+      MarkReachableBlocks(block->GetSuccessors().Get(i), visited);
+    }
+  }
+}
+
+static void MarkLoopHeadersContaining(const HBasicBlock& block, ArenaBitVector* set) {
+  for (HLoopInformationOutwardIterator it(block); !it.Done(); it.Advance()) {
+    set->SetBit(it.Current()->GetHeader()->GetBlockId());
+  }
+}
+
+void HDeadCodeElimination::MaybeRecordDeadBlock(HBasicBlock* block) {
+  if (stats_ != nullptr) {
+    stats_->RecordStat(MethodCompilationStat::kRemovedDeadInstruction,
+                       block->GetPhis().CountSize() + block->GetInstructions().CountSize());
+  }
+}
+
+void HDeadCodeElimination::RemoveDeadBlocks() {
+  // Classify blocks as reachable/unreachable.
+  ArenaAllocator* allocator = graph_->GetArena();
+  ArenaBitVector live_blocks(allocator, graph_->GetBlocks().Size(), false);
+  ArenaBitVector affected_loops(allocator, graph_->GetBlocks().Size(), false);
+
+  MarkReachableBlocks(graph_->GetEntryBlock(), &live_blocks);
+
+  // Remove all dead blocks. Iterate in post order because removal needs the
+  // block's chain of dominators and nested loops need to be updated from the
+  // inside out.
+  for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block  = it.Current();
+    int id = block->GetBlockId();
+    if (live_blocks.IsBitSet(id)) {
+      if (affected_loops.IsBitSet(id)) {
+        DCHECK(block->IsLoopHeader());
+        block->GetLoopInformation()->Update();
+      }
+    } else {
+      MaybeRecordDeadBlock(block);
+      MarkLoopHeadersContaining(*block, &affected_loops);
+      block->DisconnectAndDelete();
+    }
+  }
+
+  // Connect successive blocks created by dead branches. Order does not matter.
+  for (HReversePostOrderIterator it(*graph_); !it.Done();) {
+    HBasicBlock* block  = it.Current();
+    if (block->IsEntryBlock() || block->GetSuccessors().Size() != 1u) {
+      it.Advance();
+      continue;
+    }
+    HBasicBlock* successor = block->GetSuccessors().Get(0);
+    if (successor->IsExitBlock() || successor->GetPredecessors().Size() != 1u) {
+      it.Advance();
+      continue;
+    }
+    block->MergeWith(successor);
+
+    // Reiterate on this block in case it can be merged with its new successor.
+  }
+}
+
+void HDeadCodeElimination::RemoveDeadInstructions() {
   // Process basic blocks in post-order in the dominator tree, so that
-  // a dead instruction depending on another dead instruction is
-  // removed.
+  // a dead instruction depending on another dead instruction is removed.
   for (HPostOrderIterator b(*graph_); !b.Done(); b.Advance()) {
     HBasicBlock* block = b.Current();
     // Traverse this block's instructions in backward order and remove
@@ -47,4 +131,10 @@
   }
 }
 
+void HDeadCodeElimination::Run() {
+  RemoveDeadBlocks();
+  SsaRedundantPhiElimination(graph_).Run();
+  RemoveDeadInstructions();
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
index cee9364..59a57c4 100644
--- a/compiler/optimizing/dead_code_elimination.h
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -31,15 +31,19 @@
  public:
   HDeadCodeElimination(HGraph* graph,
                        OptimizingCompilerStats* stats = nullptr,
-                       const char* name = kDeadCodeEliminationPassName)
+                       const char* name = kInitialDeadCodeEliminationPassName)
       : HOptimization(graph, true, name, stats) {}
 
   void Run() OVERRIDE;
 
-  static constexpr const char* kDeadCodeEliminationPassName =
-    "dead_code_elimination";
+  static constexpr const char* kInitialDeadCodeEliminationPassName = "dead_code_elimination";
+  static constexpr const char* kFinalDeadCodeEliminationPassName = "dead_code_elimination_final";
 
  private:
+  void MaybeRecordDeadBlock(HBasicBlock* block);
+  void RemoveDeadBlocks();
+  void RemoveDeadInstructions();
+
   DISALLOW_COPY_AND_ASSIGN(HDeadCodeElimination);
 };
 
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index 98ae1ec..3209d3e 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -169,20 +169,25 @@
     "BasicBlock 5, pred: 4\n"
     "  28: Exit\n";
 
-  // Expected difference after dead code elimination.
-  diff_t expected_diff = {
-    { "  13: IntConstant [14]\n", removed },
-    { "  24: IntConstant [25]\n", removed },
-    { "  14: Add(19, 13) [25]\n", removed },
-    // The SuspendCheck instruction following this Add instruction
-    // inserts the latter in an environment, thus making it "used" and
-    // therefore non removable.  It ensues that some other Add and
-    // IntConstant instructions cannot be removed, as they are direct
-    // or indirect inputs of the initial Add instruction.
-    { "  19: Add(9, 18) [14]\n",  "  19: Add(9, 18) []\n" },
-    { "  25: Add(14, 24)\n",      removed },
-  };
-  std::string expected_after = Patch(expected_before, expected_diff);
+  // The SuspendCheck instruction following this Add instruction
+  // inserts the latter in an environment, thus making it "used" and
+  // therefore non removable.  It ensures that some other Add and
+  // IntConstant instructions cannot be removed, as they are direct
+  // or indirect inputs of the initial Add instruction.
+  std::string expected_after =
+    "BasicBlock 0, succ: 1\n"
+    "  3: IntConstant [9]\n"
+    "  5: IntConstant [9]\n"
+    "  18: IntConstant [19]\n"
+    "  29: SuspendCheck\n"
+    "  30: Goto 1\n"
+    "BasicBlock 1, pred: 0, succ: 5\n"
+    "  9: Add(3, 5) [19]\n"
+    "  19: Add(9, 18) []\n"
+    "  21: SuspendCheck\n"
+    "  27: ReturnVoid\n"
+    "BasicBlock 5, pred: 1\n"
+    "  28: Exit\n";
 
   TestCode(data, expected_before, expected_after);
 }
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index 61a7697..78ae1dd 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -27,7 +27,7 @@
 static void TestCode(const uint16_t* data, const int* blocks, size_t blocks_length) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   bool graph_built = builder.BuildGraph(*item);
diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc
index 2bfecc6..29aa97a 100644
--- a/compiler/optimizing/find_loops_test.cc
+++ b/compiler/optimizing/find_loops_test.cc
@@ -28,7 +28,7 @@
 namespace art {
 
 static HGraph* TestCode(const uint16_t* data, ArenaAllocator* allocator) {
-  HGraph* graph = new (allocator) HGraph(allocator);
+  HGraph* graph = CreateGraph(allocator);
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   builder.BuildGraph(*item);
@@ -235,14 +235,13 @@
 
   TestBlock(graph, 0, false, -1);            // entry block
   TestBlock(graph, 1, false, -1);            // pre header
-  const int blocks2[] = {2, 3, 4, 5, 8};
-  TestBlock(graph, 2, true, 2, blocks2, 5);  // loop header
+  const int blocks2[] = {2, 3, 4, 5};
+  TestBlock(graph, 2, true, 2, blocks2, arraysize(blocks2));  // loop header
   TestBlock(graph, 3, false, 2);             // block in loop
-  TestBlock(graph, 4, false, 2);             // original back edge
-  TestBlock(graph, 5, false, 2);             // original back edge
+  TestBlock(graph, 4, false, 2);             // back edge
+  TestBlock(graph, 5, false, 2);             // back edge
   TestBlock(graph, 6, false, -1);            // return block
   TestBlock(graph, 7, false, -1);            // exit block
-  TestBlock(graph, 8, false, 2);             // synthesized back edge
 }
 
 
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 8950635..fd28f0b 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -121,6 +121,18 @@
   }
 }
 
+void GraphChecker::VisitBoundsCheck(HBoundsCheck* check) {
+  if (!GetGraph()->HasBoundsChecks()) {
+    AddError(StringPrintf("Instruction %s:%d is a HBoundsCheck, "
+                          "but HasBoundsChecks() returns false",
+                          check->DebugName(),
+                          check->GetId()));
+  }
+
+  // Perform the instruction base checks too.
+  VisitInstruction(check);
+}
+
 void GraphChecker::VisitInstruction(HInstruction* instruction) {
   if (seen_ids_.IsBitSet(instruction->GetId())) {
     AddError(StringPrintf("Instruction id %d is duplicate in graph.",
@@ -158,7 +170,8 @@
     }
   }
 
-  // Ensure the uses of `instruction` are defined in a block of the graph.
+  // Ensure the uses of `instruction` are defined in a block of the graph,
+  // and the entry in the use list is consistent.
   for (HUseIterator<HInstruction*> use_it(instruction->GetUses());
        !use_it.Done(); use_it.Advance()) {
     HInstruction* use = use_it.Current()->GetUser();
@@ -172,6 +185,27 @@
                             use->GetId(),
                             instruction->GetId()));
     }
+    size_t use_index = use_it.Current()->GetIndex();
+    if ((use_index >= use->InputCount()) || (use->InputAt(use_index) != instruction)) {
+      AddError(StringPrintf("User %s:%d of instruction %d has a wrong "
+                            "UseListNode index.",
+                            use->DebugName(),
+                            use->GetId(),
+                            instruction->GetId()));
+    }
+  }
+
+  // Ensure the environment uses entries are consistent.
+  for (HUseIterator<HEnvironment*> use_it(instruction->GetEnvUses());
+       !use_it.Done(); use_it.Advance()) {
+    HEnvironment* use = use_it.Current()->GetUser();
+    size_t use_index = use_it.Current()->GetIndex();
+    if ((use_index >= use->Size()) || (use->GetInstructionAt(use_index) != instruction)) {
+      AddError(StringPrintf("Environment user of %s:%d has a wrong "
+                            "UseListNode index.",
+                            instruction->DebugName(),
+                            instruction->GetId()));
+    }
   }
 
   // Ensure 'instruction' has pointers to its inputs' use entries.
@@ -179,7 +213,11 @@
     HUserRecord<HInstruction*> input_record = instruction->InputRecordAt(i);
     HInstruction* input = input_record.GetInstruction();
     HUseListNode<HInstruction*>* use_node = input_record.GetUseNode();
-    if (use_node == nullptr || !input->GetUses().Contains(use_node)) {
+    size_t use_index = use_node->GetIndex();
+    if ((use_node == nullptr)
+        || !input->GetUses().Contains(use_node)
+        || (use_index >= e)
+        || (use_index != i)) {
       AddError(StringPrintf("Instruction %s:%d has an invalid pointer to use entry "
                             "at input %u (%s:%d).",
                             instruction->DebugName(),
@@ -191,6 +229,30 @@
   }
 }
 
+void GraphChecker::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  VisitInstruction(invoke);
+
+  if (invoke->IsStaticWithExplicitClinitCheck()) {
+    size_t last_input_index = invoke->InputCount() - 1;
+    HInstruction* last_input = invoke->InputAt(last_input_index);
+    if (last_input == nullptr) {
+      AddError(StringPrintf("Static invoke %s:%d marked as having an explicit clinit check "
+                            "has a null pointer as last input.",
+                            invoke->DebugName(),
+                            invoke->GetId()));
+    }
+    if (!last_input->IsClinitCheck() && !last_input->IsLoadClass()) {
+      AddError(StringPrintf("Static invoke %s:%d marked as having an explicit clinit check "
+                            "has a last instruction (%s:%d) which is neither a clinit check "
+                            "nor a load class instruction.",
+                            invoke->DebugName(),
+                            invoke->GetId(),
+                            last_input->DebugName(),
+                            last_input->GetId()));
+    }
+  }
+}
+
 void SSAChecker::VisitBasicBlock(HBasicBlock* block) {
   super_type::VisitBasicBlock(block);
 
@@ -226,6 +288,7 @@
 
 void SSAChecker::CheckLoop(HBasicBlock* loop_header) {
   int id = loop_header->GetBlockId();
+  HLoopInformation* loop_information = loop_header->GetLoopInformation();
 
   // Ensure the pre-header block is first in the list of
   // predecessors of a loop header.
@@ -235,57 +298,61 @@
         id));
   }
 
-  // Ensure the loop header has only two predecessors and that only the
-  // second one is a back edge.
+  // Ensure the loop header has only one incoming branch and the remaining
+  // predecessors are back edges.
   size_t num_preds = loop_header->GetPredecessors().Size();
   if (num_preds < 2) {
     AddError(StringPrintf(
         "Loop header %d has less than two predecessors: %zu.",
         id,
         num_preds));
-  } else if (num_preds > 2) {
-    AddError(StringPrintf(
-        "Loop header %d has more than two predecessors: %zu.",
-        id,
-        num_preds));
   } else {
-    HLoopInformation* loop_information = loop_header->GetLoopInformation();
     HBasicBlock* first_predecessor = loop_header->GetPredecessors().Get(0);
     if (loop_information->IsBackEdge(*first_predecessor)) {
       AddError(StringPrintf(
           "First predecessor of loop header %d is a back edge.",
           id));
     }
-    HBasicBlock* second_predecessor = loop_header->GetPredecessors().Get(1);
-    if (!loop_information->IsBackEdge(*second_predecessor)) {
-      AddError(StringPrintf(
-          "Second predecessor of loop header %d is not a back edge.",
-          id));
+    for (size_t i = 1, e = loop_header->GetPredecessors().Size(); i < e; ++i) {
+      HBasicBlock* predecessor = loop_header->GetPredecessors().Get(i);
+      if (!loop_information->IsBackEdge(*predecessor)) {
+        AddError(StringPrintf(
+            "Loop header %d has multiple incoming (non back edge) blocks.",
+            id));
+      }
     }
   }
 
-  // Ensure there is only one back edge per loop.
-  size_t num_back_edges =
-    loop_header->GetLoopInformation()->GetBackEdges().Size();
+  const ArenaBitVector& loop_blocks = loop_information->GetBlocks();
+
+  // Ensure back edges belong to the loop.
+  size_t num_back_edges = loop_information->GetBackEdges().Size();
   if (num_back_edges == 0) {
     AddError(StringPrintf(
         "Loop defined by header %d has no back edge.",
         id));
-  } else if (num_back_edges > 1) {
-    AddError(StringPrintf(
-        "Loop defined by header %d has several back edges: %zu.",
-        id,
-        num_back_edges));
+  } else {
+    for (size_t i = 0; i < num_back_edges; ++i) {
+      int back_edge_id = loop_information->GetBackEdges().Get(i)->GetBlockId();
+      if (!loop_blocks.IsBitSet(back_edge_id)) {
+        AddError(StringPrintf(
+            "Loop defined by header %d has an invalid back edge %d.",
+            id,
+            back_edge_id));
+      }
+    }
   }
 
-  // Ensure all blocks in the loop are dominated by the loop header.
-  const ArenaBitVector& loop_blocks =
-    loop_header->GetLoopInformation()->GetBlocks();
+  // Ensure all blocks in the loop are live and dominated by the loop header.
   for (uint32_t i : loop_blocks.Indexes()) {
     HBasicBlock* loop_block = GetGraph()->GetBlocks().Get(i);
-    if (!loop_header->Dominates(loop_block)) {
+    if (loop_block == nullptr) {
+      AddError(StringPrintf("Loop defined by header %d contains a previously removed block %d.",
+                            id,
+                            i));
+    } else if (!loop_header->Dominates(loop_block)) {
       AddError(StringPrintf("Loop block %d not dominated by loop header %d.",
-                            loop_block->GetBlockId(),
+                            i,
                             id));
     }
   }
@@ -296,7 +363,7 @@
     if (!loop_blocks.IsSubsetOf(&outer_info->GetBlocks())) {
       AddError(StringPrintf("Blocks of loop defined by header %d are not a subset of blocks of "
                             "an outer loop defined by header %d.",
-                            loop_header->GetBlockId(),
+                            id,
                             outer_info->GetHeader()->GetBlockId()));
     }
   }
@@ -319,8 +386,9 @@
 
   // Ensure an instruction having an environment is dominated by the
   // instructions contained in the environment.
-  HEnvironment* environment = instruction->GetEnvironment();
-  if (environment != nullptr) {
+  for (HEnvironment* environment = instruction->GetEnvironment();
+       environment != nullptr;
+       environment = environment->GetParent()) {
     for (size_t i = 0, e = environment->Size(); i < e; ++i) {
       HInstruction* env_instruction = environment->GetInstructionAt(i);
       if (env_instruction != nullptr
@@ -483,7 +551,7 @@
           Primitive::PrettyDescriptor(op->InputAt(1)->GetType())));
     }
   } else {
-    if (PrimitiveKind(op->InputAt(1)->GetType()) != PrimitiveKind(op->InputAt(0)->GetType())) {
+    if (PrimitiveKind(op->InputAt(0)->GetType()) != PrimitiveKind(op->InputAt(1)->GetType())) {
       AddError(StringPrintf(
           "Binary operation %s %d has inputs of different types: "
           "%s, and %s.",
@@ -508,7 +576,7 @@
           "from its input type: %s vs %s.",
           op->DebugName(), op->GetId(),
           Primitive::PrettyDescriptor(op->GetType()),
-          Primitive::PrettyDescriptor(op->InputAt(1)->GetType())));
+          Primitive::PrettyDescriptor(op->InputAt(0)->GetType())));
     }
   }
 }
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index 24fee37..b4314da 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -42,6 +42,12 @@
   // Check `instruction`.
   void VisitInstruction(HInstruction* instruction) OVERRIDE;
 
+  // Perform control-flow graph checks on instruction.
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
+
+  // Check that the HasBoundsChecks() flag is set for bounds checks.
+  void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE;
+
   // Was the last visit of the graph valid?
   bool IsValid() const {
     return errors_.empty();
diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc
index 923468f..eca0d93 100644
--- a/compiler/optimizing/graph_checker_test.cc
+++ b/compiler/optimizing/graph_checker_test.cc
@@ -30,7 +30,7 @@
  *     1: Exit
  */
 HGraph* CreateSimpleCFG(ArenaAllocator* allocator) {
-  HGraph* graph = new (allocator) HGraph(allocator);
+  HGraph* graph = CreateGraph(allocator);
   HBasicBlock* entry_block = new (allocator) HBasicBlock(graph);
   entry_block->AddInstruction(new (allocator) HGoto());
   graph->AddBlock(entry_block);
diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc
index 50398b4..59d5092 100644
--- a/compiler/optimizing/graph_test.cc
+++ b/compiler/optimizing/graph_test.cc
@@ -73,7 +73,7 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
   HBasicBlock* if_block = createIfBlock(graph, &allocator);
   HBasicBlock* if_true = createGotoBlock(graph, &allocator);
@@ -108,7 +108,7 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
   HBasicBlock* if_block = createIfBlock(graph, &allocator);
   HBasicBlock* if_false = createGotoBlock(graph, &allocator);
@@ -143,7 +143,7 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
   HBasicBlock* if_block = createIfBlock(graph, &allocator);
   HBasicBlock* return_block = createReturnBlock(graph, &allocator);
@@ -178,7 +178,7 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
   HBasicBlock* if_block = createIfBlock(graph, &allocator);
   HBasicBlock* return_block = createReturnBlock(graph, &allocator);
@@ -213,7 +213,7 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
   HBasicBlock* first_if_block = createIfBlock(graph, &allocator);
   HBasicBlock* if_block = createIfBlock(graph, &allocator);
@@ -252,7 +252,7 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
   HBasicBlock* first_if_block = createIfBlock(graph, &allocator);
   HBasicBlock* if_block = createIfBlock(graph, &allocator);
@@ -288,7 +288,7 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* block = createGotoBlock(graph, &allocator);
   HInstruction* got = block->GetLastInstruction();
   ASSERT_TRUE(got->IsControlFlow());
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index ca9cbc3..be28755 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -17,14 +17,75 @@
 #include "graph_visualizer.h"
 
 #include "code_generator.h"
+#include "dead_code_elimination.h"
 #include "licm.h"
 #include "nodes.h"
 #include "optimization.h"
 #include "register_allocator.h"
 #include "ssa_liveness_analysis.h"
 
+#include <cctype>
+#include <sstream>
+
 namespace art {
 
+static bool HasWhitespace(const char* str) {
+  DCHECK(str != nullptr);
+  while (str[0] != 0) {
+    if (isspace(str[0])) {
+      return true;
+    }
+    str++;
+  }
+  return false;
+}
+
+class StringList {
+ public:
+  enum Format {
+    kArrayBrackets,
+    kSetBrackets,
+  };
+
+  // Create an empty list
+  explicit StringList(Format format = kArrayBrackets) : format_(format), is_empty_(true) {}
+
+  // Construct StringList from a linked list. List element class T
+  // must provide methods `GetNext` and `Dump`.
+  template<class T>
+  explicit StringList(T* first_entry, Format format = kArrayBrackets) : StringList(format) {
+    for (T* current = first_entry; current != nullptr; current = current->GetNext()) {
+      current->Dump(NewEntryStream());
+    }
+  }
+
+  std::ostream& NewEntryStream() {
+    if (is_empty_) {
+      is_empty_ = false;
+    } else {
+      sstream_ << ",";
+    }
+    return sstream_;
+  }
+
+ private:
+  Format format_;
+  bool is_empty_;
+  std::ostringstream sstream_;
+
+  friend std::ostream& operator<<(std::ostream& os, const StringList& list);
+};
+
+std::ostream& operator<<(std::ostream& os, const StringList& list) {
+  switch (list.format_) {
+    case StringList::kArrayBrackets: return os << "[" << list.sstream_.str() << "]";
+    case StringList::kSetBrackets:   return os << "{" << list.sstream_.str() << "}";
+    default:
+      LOG(FATAL) << "Invalid StringList format";
+      UNREACHABLE();
+  }
+}
+
 /**
  * HGraph visitor to generate a file suitable for the c1visualizer tool and IRHydra.
  */
@@ -124,76 +185,84 @@
     output_<< std::endl;
   }
 
-  void DumpLocation(Location location) {
+  void DumpLocation(std::ostream& stream, const Location& location) {
     if (location.IsRegister()) {
-      codegen_.DumpCoreRegister(output_, location.reg());
+      codegen_.DumpCoreRegister(stream, location.reg());
     } else if (location.IsFpuRegister()) {
-      codegen_.DumpFloatingPointRegister(output_, location.reg());
+      codegen_.DumpFloatingPointRegister(stream, location.reg());
     } else if (location.IsConstant()) {
-      output_ << "constant";
+      stream << "#";
       HConstant* constant = location.GetConstant();
       if (constant->IsIntConstant()) {
-        output_ << " " << constant->AsIntConstant()->GetValue();
+        stream << constant->AsIntConstant()->GetValue();
       } else if (constant->IsLongConstant()) {
-        output_ << " " << constant->AsLongConstant()->GetValue();
+        stream << constant->AsLongConstant()->GetValue();
       }
     } else if (location.IsInvalid()) {
-      output_ << "invalid";
+      stream << "invalid";
     } else if (location.IsStackSlot()) {
-      output_ << location.GetStackIndex() << "(sp)";
+      stream << location.GetStackIndex() << "(sp)";
     } else if (location.IsFpuRegisterPair()) {
-      codegen_.DumpFloatingPointRegister(output_, location.low());
-      output_ << " and ";
-      codegen_.DumpFloatingPointRegister(output_, location.high());
+      codegen_.DumpFloatingPointRegister(stream, location.low());
+      stream << "|";
+      codegen_.DumpFloatingPointRegister(stream, location.high());
     } else if (location.IsRegisterPair()) {
-      codegen_.DumpCoreRegister(output_, location.low());
-      output_ << " and ";
-      codegen_.DumpCoreRegister(output_, location.high());
+      codegen_.DumpCoreRegister(stream, location.low());
+      stream << "|";
+      codegen_.DumpCoreRegister(stream, location.high());
     } else if (location.IsUnallocated()) {
-      output_ << "<U>";
+      stream << "unallocated";
     } else {
       DCHECK(location.IsDoubleStackSlot());
-      output_ << "2x" << location.GetStackIndex() << "(sp)";
+      stream << "2x" << location.GetStackIndex() << "(sp)";
     }
   }
 
+  std::ostream& StartAttributeStream(const char* name = nullptr) {
+    if (name == nullptr) {
+      output_ << " ";
+    } else {
+      DCHECK(!HasWhitespace(name)) << "Checker does not allow spaces in attributes";
+      output_ << " " << name << ":";
+    }
+    return output_;
+  }
+
   void VisitParallelMove(HParallelMove* instruction) OVERRIDE {
-    output_ << " (";
+    StartAttributeStream("liveness") << instruction->GetLifetimePosition();
+    StringList moves;
     for (size_t i = 0, e = instruction->NumMoves(); i < e; ++i) {
       MoveOperands* move = instruction->MoveOperandsAt(i);
-      DumpLocation(move->GetSource());
-      output_ << " -> ";
-      DumpLocation(move->GetDestination());
-      if (i + 1 != e) {
-        output_ << ", ";
-      }
+      std::ostream& str = moves.NewEntryStream();
+      DumpLocation(str, move->GetSource());
+      str << "->";
+      DumpLocation(str, move->GetDestination());
     }
-    output_ << ")";
-    output_ << " (liveness: " << instruction->GetLifetimePosition() << ")";
+    StartAttributeStream("moves") <<  moves;
   }
 
   void VisitIntConstant(HIntConstant* instruction) OVERRIDE {
-    output_ << " " << instruction->GetValue();
+    StartAttributeStream() << instruction->GetValue();
   }
 
   void VisitLongConstant(HLongConstant* instruction) OVERRIDE {
-    output_ << " " << instruction->GetValue();
+    StartAttributeStream() << instruction->GetValue();
   }
 
   void VisitFloatConstant(HFloatConstant* instruction) OVERRIDE {
-    output_ << " " << instruction->GetValue();
+    StartAttributeStream() << instruction->GetValue();
   }
 
   void VisitDoubleConstant(HDoubleConstant* instruction) OVERRIDE {
-    output_ << " " << instruction->GetValue();
+    StartAttributeStream() << instruction->GetValue();
   }
 
   void VisitPhi(HPhi* phi) OVERRIDE {
-    output_ << " " << phi->GetRegNumber();
+    StartAttributeStream("reg") << phi->GetRegNumber();
   }
 
   void VisitMemoryBarrier(HMemoryBarrier* barrier) OVERRIDE {
-    output_ << " " << barrier->GetBarrierKind();
+    StartAttributeStream("kind") << barrier->GetBarrierKind();
   }
 
   bool IsPass(const char* name) {
@@ -202,59 +271,66 @@
 
   void PrintInstruction(HInstruction* instruction) {
     output_ << instruction->DebugName();
-    instruction->Accept(this);
     if (instruction->InputCount() > 0) {
-      output_ << " [ ";
-      for (HInputIterator inputs(instruction); !inputs.Done(); inputs.Advance()) {
-        output_ << GetTypeId(inputs.Current()->GetType()) << inputs.Current()->GetId() << " ";
+      StringList inputs;
+      for (HInputIterator it(instruction); !it.Done(); it.Advance()) {
+        inputs.NewEntryStream() << GetTypeId(it.Current()->GetType()) << it.Current()->GetId();
       }
-      output_ << "]";
+      StartAttributeStream() << inputs;
     }
+    instruction->Accept(this);
     if (instruction->HasEnvironment()) {
-      HEnvironment* env = instruction->GetEnvironment();
-      output_ << " (env: [ ";
-      for (size_t i = 0, e = env->Size(); i < e; ++i) {
-        HInstruction* insn = env->GetInstructionAt(i);
-        if (insn != nullptr) {
-          output_ << GetTypeId(insn->GetType()) << insn->GetId() << " ";
-        } else {
-          output_ << " _ ";
+      StringList envs;
+      for (HEnvironment* environment = instruction->GetEnvironment();
+           environment != nullptr;
+           environment = environment->GetParent()) {
+        StringList vregs;
+        for (size_t i = 0, e = environment->Size(); i < e; ++i) {
+          HInstruction* insn = environment->GetInstructionAt(i);
+          if (insn != nullptr) {
+            vregs.NewEntryStream() << GetTypeId(insn->GetType()) << insn->GetId();
+          } else {
+            vregs.NewEntryStream() << "_";
+          }
         }
+        envs.NewEntryStream() << vregs;
       }
-      output_ << "])";
+      StartAttributeStream("env") << envs;
     }
     if (IsPass(SsaLivenessAnalysis::kLivenessPassName)
         && is_after_pass_
         && instruction->GetLifetimePosition() != kNoLifetime) {
-      output_ << " (liveness: " << instruction->GetLifetimePosition();
+      StartAttributeStream("liveness") << instruction->GetLifetimePosition();
       if (instruction->HasLiveInterval()) {
-        output_ << " ";
-        const LiveInterval& interval = *instruction->GetLiveInterval();
-        interval.Dump(output_);
+        LiveInterval* interval = instruction->GetLiveInterval();
+        StartAttributeStream("ranges")
+            << StringList(interval->GetFirstRange(), StringList::kSetBrackets);
+        StartAttributeStream("uses") << StringList(interval->GetFirstUse());
+        StartAttributeStream("env_uses") << StringList(interval->GetFirstEnvironmentUse());
+        StartAttributeStream("is_fixed") << interval->IsFixed();
+        StartAttributeStream("is_split") << interval->IsSplit();
+        StartAttributeStream("is_low") << interval->IsLowInterval();
+        StartAttributeStream("is_high") << interval->IsHighInterval();
       }
-      output_ << ")";
     } else if (IsPass(RegisterAllocator::kRegisterAllocatorPassName) && is_after_pass_) {
+      StartAttributeStream("liveness") << instruction->GetLifetimePosition();
       LocationSummary* locations = instruction->GetLocations();
       if (locations != nullptr) {
-        output_ << " ( ";
+        StringList inputs;
         for (size_t i = 0; i < instruction->InputCount(); ++i) {
-          DumpLocation(locations->InAt(i));
-          output_ << " ";
+          DumpLocation(inputs.NewEntryStream(), locations->InAt(i));
         }
-        output_ << ")";
-        if (locations->Out().IsValid()) {
-          output_ << " -> ";
-          DumpLocation(locations->Out());
-        }
+        std::ostream& attr = StartAttributeStream("locations");
+        attr << inputs << "->";
+        DumpLocation(attr, locations->Out());
       }
-      output_ << " (liveness: " << instruction->GetLifetimePosition() << ")";
-    } else if (IsPass(LICM::kLoopInvariantCodeMotionPassName)) {
-      output_ << " ( loop_header:";
+    } else if (IsPass(LICM::kLoopInvariantCodeMotionPassName)
+               || IsPass(HDeadCodeElimination::kFinalDeadCodeEliminationPassName)) {
       HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
       if (info == nullptr) {
-        output_ << "null )";
+        StartAttributeStream("loop") << "none";
       } else {
-        output_ << "B" << info->GetHeader()->GetBlockId() << " )";
+        StartAttributeStream("loop") << "B" << info->GetHeader()->GetBlockId();
       }
     }
   }
@@ -274,7 +350,7 @@
       output_ << bci << " " << num_uses << " "
               << GetTypeId(instruction->GetType()) << instruction->GetId() << " ";
       PrintInstruction(instruction);
-      output_ << kEndInstructionMarker << std::endl;
+      output_ << " " << kEndInstructionMarker << std::endl;
     }
   }
 
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index a81d49a..c3ce7e1 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -29,7 +29,7 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -78,7 +78,7 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -133,7 +133,7 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -220,7 +220,7 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index bffd639..afffc7a 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -130,6 +130,16 @@
     return false;
   }
 
+  if (invoke_instruction->IsInvokeStaticOrDirect() &&
+      invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) {
+    // Case of a static method that cannot be inlined because it implicitly
+    // requires an initialization check of its declaring class.
+    VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+                   << " is not inlined because it is static and requires a clinit"
+                   << " check that cannot be emitted due to Dex cache limitations";
+    return false;
+  }
+
   if (!TryBuildAndInline(resolved_method, invoke_instruction, method_index, can_use_dex_cache)) {
     resolved_method->SetShouldNotInline();
     return false;
@@ -160,7 +170,11 @@
     nullptr);
 
   HGraph* callee_graph = new (graph_->GetArena()) HGraph(
-      graph_->GetArena(), graph_->IsDebuggable(), graph_->GetCurrentInstructionId());
+      graph_->GetArena(),
+      caller_dex_file,
+      method_index,
+      graph_->IsDebuggable(),
+      graph_->GetCurrentInstructionId());
 
   OptimizingCompilerStats inline_stats;
   HGraphBuilder builder(callee_graph,
@@ -258,8 +272,8 @@
 
   callee_graph->InlineInto(graph_, invoke_instruction);
 
-  if (callee_graph->HasArrayAccesses()) {
-    graph_->SetHasArrayAccesses(true);
+  if (callee_graph->HasBoundsChecks()) {
+    graph_->SetHasBoundsChecks(true);
   }
 
   return true;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 2df7c16..46fad17 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -137,13 +137,25 @@
   HConstant* input_cst = instruction->GetConstantRight();
   HInstruction* input_other = instruction->GetLeastConstantLeft();
 
-  if ((input_cst != nullptr) && input_cst->IsZero()) {
-    // Replace code looking like
-    //    SHL dst, src, 0
-    // with
-    //    src
-    instruction->ReplaceWith(input_other);
-    instruction->GetBlock()->RemoveInstruction(instruction);
+  if (input_cst != nullptr) {
+    if (input_cst->IsZero()) {
+      // Replace code looking like
+      //    SHL dst, src, 0
+      // with
+      //    src
+      instruction->ReplaceWith(input_other);
+      instruction->GetBlock()->RemoveInstruction(instruction);
+    } else if (instruction->IsShl() && input_cst->IsOne()) {
+      // Replace Shl looking like
+      //    SHL dst, src, 1
+      // with
+      //    ADD dst, src, src
+      HAdd *add = new(GetGraph()->GetArena()) HAdd(instruction->GetType(),
+                                                   input_other,
+                                                   input_other);
+      instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, add);
+      RecordSimplification();
+    }
   }
 }
 
@@ -377,15 +389,42 @@
     return;
   }
 
-  if ((input_cst != nullptr) && input_cst->IsMinusOne() &&
-      (Primitive::IsFloatingPointType(type) || Primitive::IsIntOrLongType(type))) {
+  if ((input_cst != nullptr) && input_cst->IsMinusOne()) {
     // Replace code looking like
     //    DIV dst, src, -1
     // with
     //    NEG dst, src
     instruction->GetBlock()->ReplaceAndRemoveInstructionWith(
-        instruction, (new (GetGraph()->GetArena()) HNeg(type, input_other)));
+        instruction, new (GetGraph()->GetArena()) HNeg(type, input_other));
     RecordSimplification();
+    return;
+  }
+
+  if ((input_cst != nullptr) && Primitive::IsFloatingPointType(type)) {
+    // Try replacing code looking like
+    //    DIV dst, src, constant
+    // with
+    //    MUL dst, src, 1 / constant
+    HConstant* reciprocal = nullptr;
+    if (type == Primitive::Primitive::kPrimDouble) {
+      double value = input_cst->AsDoubleConstant()->GetValue();
+      if (CanDivideByReciprocalMultiplyDouble(bit_cast<int64_t, double>(value))) {
+        reciprocal = GetGraph()->GetDoubleConstant(1.0 / value);
+      }
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimFloat);
+      float value = input_cst->AsFloatConstant()->GetValue();
+      if (CanDivideByReciprocalMultiplyFloat(bit_cast<int32_t, float>(value))) {
+        reciprocal = GetGraph()->GetFloatConstant(1.0f / value);
+      }
+    }
+
+    if (reciprocal != nullptr) {
+      instruction->GetBlock()->ReplaceAndRemoveInstructionWith(
+          instruction, new (GetGraph()->GetArena()) HMul(type, input_other, reciprocal));
+      RecordSimplification();
+      return;
+    }
   }
 }
 
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 20aa45f..43fe374 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -186,6 +186,8 @@
       return Intrinsics::kStringCharAt;
     case kIntrinsicCompareTo:
       return Intrinsics::kStringCompareTo;
+    case kIntrinsicGetCharsNoCheck:
+      return Intrinsics::kStringGetCharsNoCheck;
     case kIntrinsicIsEmptyOrLength:
       // The inliner can handle these two cases - and this is the preferred approach
       // since after inlining the call is no longer visible (as opposed to waiting
@@ -194,6 +196,12 @@
     case kIntrinsicIndexOf:
       return ((method.d.data & kIntrinsicFlagBase0) == 0) ?
           Intrinsics::kStringIndexOfAfter : Intrinsics::kStringIndexOf;
+    case kIntrinsicNewStringFromBytes:
+      return Intrinsics::kStringNewStringFromBytes;
+    case kIntrinsicNewStringFromChars:
+      return Intrinsics::kStringNewStringFromChars;
+    case kIntrinsicNewStringFromString:
+      return Intrinsics::kStringNewStringFromString;
 
     case kIntrinsicCas:
       switch (GetType(method.d.data, false)) {
@@ -280,6 +288,11 @@
     case kInlineOpIPut:
       return Intrinsics::kNone;
 
+    // String init cases, not intrinsics.
+
+    case kInlineStringInit:
+      return Intrinsics::kNone;
+
     // No default case to make the compiler warn on missing cases.
   }
   return Intrinsics::kNone;
@@ -361,4 +374,3 @@
 }
 
 }  // namespace art
-
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index dbb7cba..c243ef3 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -17,8 +17,10 @@
 #ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_H_
 #define ART_COMPILER_OPTIMIZING_INTRINSICS_H_
 
+#include "code_generator.h"
 #include "nodes.h"
 #include "optimization.h"
+#include "parallel_move_resolver.h"
 
 namespace art {
 
@@ -76,6 +78,38 @@
 #undef INTRINSICS_LIST
 #undef OPTIMIZING_INTRINSICS
 
+  static void MoveArguments(HInvoke* invoke,
+                            CodeGenerator* codegen,
+                            InvokeDexCallingConventionVisitor* calling_convention_visitor) {
+    if (kIsDebugBuild && invoke->IsInvokeStaticOrDirect()) {
+      HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
+      // When we do not run baseline, explicit clinit checks triggered by static
+      // invokes must have been pruned by art::PrepareForRegisterAllocation.
+      DCHECK(codegen->IsBaseline() || !invoke_static_or_direct->IsStaticWithExplicitClinitCheck());
+    }
+
+    if (invoke->GetNumberOfArguments() == 0) {
+      // No argument to move.
+      return;
+    }
+
+    LocationSummary* locations = invoke->GetLocations();
+
+    // We're moving potentially two or more locations to locations that could overlap, so we need
+    // a parallel move resolver.
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+
+    for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
+      HInstruction* input = invoke->InputAt(i);
+      Location cc_loc = calling_convention_visitor->GetNextLocation(input->GetType());
+      Location actual_loc = locations->InAt(i);
+
+      parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
+    }
+
+    codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+  }
+
  protected:
   IntrinsicVisitor() {}
 
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 932192e..dccfe9a 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -48,7 +48,7 @@
 
   DCHECK_NE(type, Primitive::kPrimVoid);
 
-  if (Primitive::IsIntegralType(type)) {
+  if (Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) {
     if (type == Primitive::kPrimLong) {
       Register trg_reg_lo = trg.AsRegisterPairLow<Register>();
       Register trg_reg_hi = trg.AsRegisterPairHigh<Register>();
@@ -77,27 +77,9 @@
   }
 }
 
-static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM* codegen) {
-  if (invoke->InputCount() == 0) {
-    return;
-  }
-
-  LocationSummary* locations = invoke->GetLocations();
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
-
-  // We're moving potentially two or more locations to locations that could overlap, so we need
-  // a parallel move resolver.
-  HParallelMove parallel_move(arena);
-
-  for (size_t i = 0; i < invoke->InputCount(); i++) {
-    HInstruction* input = invoke->InputAt(i);
-    Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
-    Location actual_loc = locations->InAt(i);
-
-    parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
-  }
-
-  codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+static void MoveArguments(HInvoke* invoke, CodeGeneratorARM* codegen) {
+  InvokeDexCallingConventionVisitorARM calling_convention_visitor;
+  IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
 }
 
 // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
@@ -116,7 +98,7 @@
 
     SaveLiveRegisters(codegen, invoke_->GetLocations());
 
-    MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+    MoveArguments(invoke_, codegen);
 
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister);
@@ -809,10 +791,6 @@
   const MemberOffset value_offset = mirror::String::ValueOffset();
   // Location of count
   const MemberOffset count_offset = mirror::String::CountOffset();
-  // Starting offset within data array
-  const MemberOffset offset_offset = mirror::String::OffsetOffset();
-  // Start of char data with array_
-  const MemberOffset data_offset = mirror::Array::DataOffset(sizeof(uint16_t));
 
   Register obj = locations->InAt(0).AsRegister<Register>();  // String object pointer.
   Register idx = locations->InAt(1).AsRegister<Register>();  // Index of character.
@@ -834,15 +812,10 @@
   __ cmp(idx, ShifterOperand(temp));
   __ b(slow_path->GetEntryLabel(), CS);
 
-  // Index computation.
-  __ ldr(temp, Address(obj, offset_offset.Int32Value()));         // temp := str.offset.
-  __ ldr(array_temp, Address(obj, value_offset.Int32Value()));    // array_temp := str.offset.
-  __ add(temp, temp, ShifterOperand(idx));
-  DCHECK_EQ(data_offset.Int32Value() % 2, 0);                     // We'll compensate by shifting.
-  __ add(temp, temp, ShifterOperand(data_offset.Int32Value() / 2));
+  __ add(array_temp, obj, ShifterOperand(value_offset.Int32Value()));  // array_temp := str.value.
 
   // Load the value.
-  __ ldrh(out, Address(array_temp, temp, LSL, 1));                // out := array_temp[temp].
+  __ ldrh(out, Address(array_temp, idx, LSL, 1));                 // out := array_temp[idx].
 
   __ Bind(slow_path->GetExitLabel());
 }
@@ -877,6 +850,169 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+static void GenerateVisitStringIndexOf(HInvoke* invoke,
+                                       ArmAssembler* assembler,
+                                       CodeGeneratorARM* codegen,
+                                       ArenaAllocator* allocator,
+                                       bool start_at_zero) {
+  LocationSummary* locations = invoke->GetLocations();
+  Register tmp_reg = locations->GetTemp(0).AsRegister<Register>();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+  // or directly dispatch if we have a constant.
+  SlowPathCodeARM* slow_path = nullptr;
+  if (invoke->InputAt(1)->IsIntConstant()) {
+    if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
+        std::numeric_limits<uint16_t>::max()) {
+      // Always needs the slow-path. We could directly dispatch to it, but this case should be
+      // rare, so for simplicity just put the full slow-path down and branch unconditionally.
+      slow_path = new (allocator) IntrinsicSlowPathARM(invoke);
+      codegen->AddSlowPath(slow_path);
+      __ b(slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+      return;
+    }
+  } else {
+    Register char_reg = locations->InAt(1).AsRegister<Register>();
+    __ LoadImmediate(tmp_reg, std::numeric_limits<uint16_t>::max());
+    __ cmp(char_reg, ShifterOperand(tmp_reg));
+    slow_path = new (allocator) IntrinsicSlowPathARM(invoke);
+    codegen->AddSlowPath(slow_path);
+    __ b(slow_path->GetEntryLabel(), HI);
+  }
+
+  if (start_at_zero) {
+    DCHECK_EQ(tmp_reg, R2);
+    // Start-index = 0.
+    __ LoadImmediate(tmp_reg, 0);
+  }
+
+  __ LoadFromOffset(kLoadWord, LR, TR,
+                    QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pIndexOf).Int32Value());
+  __ blx(LR);
+
+  if (slow_path != nullptr) {
+    __ Bind(slow_path->GetExitLabel());
+  }
+}
+
+void IntrinsicLocationsBuilderARM::VisitStringIndexOf(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+  // best to align the inputs accordingly.
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetOut(Location::RegisterLocation(R0));
+
+  // Need a temp for slow-path codepoint compare, and need to send start-index=0.
+  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+}
+
+void IntrinsicCodeGeneratorARM::VisitStringIndexOf(HInvoke* invoke) {
+  GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+}
+
+void IntrinsicLocationsBuilderARM::VisitStringIndexOfAfter(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+  // best to align the inputs accordingly.
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  locations->SetOut(Location::RegisterLocation(R0));
+
+  // Need a temp for slow-path codepoint compare.
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM::VisitStringIndexOfAfter(HInvoke* invoke) {
+  GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+}
+
+void IntrinsicLocationsBuilderARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
+  locations->SetOut(Location::RegisterLocation(R0));
+}
+
+void IntrinsicCodeGeneratorARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register byte_array = locations->InAt(0).AsRegister<Register>();
+  __ cmp(byte_array, ShifterOperand(0));
+  SlowPathCodeARM* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ b(slow_path->GetEntryLabel(), EQ);
+
+  __ LoadFromOffset(
+      kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromBytes).Int32Value());
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ blx(LR);
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderARM::VisitStringNewStringFromChars(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  locations->SetOut(Location::RegisterLocation(R0));
+}
+
+void IntrinsicCodeGeneratorARM::VisitStringNewStringFromChars(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+
+  __ LoadFromOffset(
+      kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromChars).Int32Value());
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ blx(LR);
+}
+
+void IntrinsicLocationsBuilderARM::VisitStringNewStringFromString(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetOut(Location::RegisterLocation(R0));
+}
+
+void IntrinsicCodeGeneratorARM::VisitStringNewStringFromString(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register string_to_copy = locations->InAt(0).AsRegister<Register>();
+  __ cmp(string_to_copy, ShifterOperand(0));
+  SlowPathCodeARM* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ b(slow_path->GetEntryLabel(), EQ);
+
+  __ LoadFromOffset(kLoadWord,
+      LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromString).Int32Value());
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ blx(LR);
+  __ Bind(slow_path->GetExitLabel());
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                  \
@@ -903,9 +1039,8 @@
 UNIMPLEMENTED_INTRINSIC(MathRoundFloat)    // Could be done by changing rounding mode, maybe?
 UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)     // High register pressure.
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(StringIndexOf)
-UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 117d6a4..2c4fab0 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -75,7 +75,7 @@
 
   DCHECK_NE(type, Primitive::kPrimVoid);
 
-  if (Primitive::IsIntegralType(type)) {
+  if (Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) {
     Register trg_reg = RegisterFrom(trg, type);
     Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type);
     __ Mov(trg_reg, res_reg, kDiscardForSameWReg);
@@ -86,27 +86,9 @@
   }
 }
 
-static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM64* codegen) {
-  if (invoke->InputCount() == 0) {
-    return;
-  }
-
-  LocationSummary* locations = invoke->GetLocations();
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
-
-  // We're moving potentially two or more locations to locations that could overlap, so we need
-  // a parallel move resolver.
-  HParallelMove parallel_move(arena);
-
-  for (size_t i = 0; i < invoke->InputCount(); i++) {
-    HInstruction* input = invoke->InputAt(i);
-    Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
-    Location actual_loc = locations->InAt(i);
-
-    parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
-  }
-
-  codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+static void MoveArguments(HInvoke* invoke, CodeGeneratorARM64* codegen) {
+  InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
+  IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
 }
 
 // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
@@ -125,7 +107,7 @@
 
     SaveLiveRegisters(codegen, invoke_->GetLocations());
 
-    MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+    MoveArguments(invoke_, codegen);
 
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister);
@@ -952,10 +934,6 @@
   const MemberOffset value_offset = mirror::String::ValueOffset();
   // Location of count
   const MemberOffset count_offset = mirror::String::CountOffset();
-  // Starting offset within data array
-  const MemberOffset offset_offset = mirror::String::OffsetOffset();
-  // Start of char data with array_
-  const MemberOffset data_offset = mirror::Array::DataOffset(sizeof(uint16_t));
 
   Register obj = WRegisterFrom(locations->InAt(0));  // String object pointer.
   Register idx = WRegisterFrom(locations->InAt(1));  // Index of character.
@@ -978,21 +956,15 @@
   __ Cmp(idx, temp);
   __ B(hs, slow_path->GetEntryLabel());
 
-  // Index computation.
-  __ Ldr(temp, HeapOperand(obj, offset_offset));         // temp := str.offset.
-  __ Ldr(array_temp, HeapOperand(obj, value_offset));    // array_temp := str.offset.
-  __ Add(temp, temp, idx);
-  DCHECK_EQ(data_offset.Int32Value() % 2, 0);            // We'll compensate by shifting.
-  __ Add(temp, temp, Operand(data_offset.Int32Value() / 2));
+  __ Add(array_temp, obj, Operand(value_offset.Int32Value()));  // array_temp := str.value.
 
   // Load the value.
-  __ Ldrh(out, MemOperand(array_temp.X(), temp, UXTW, 1));  // out := array_temp[temp].
+  __ Ldrh(out, MemOperand(array_temp.X(), idx, UXTW, 1));  // out := array_temp[idx].
 
   __ Bind(slow_path->GetExitLabel());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
-  // The inputs plus one temp.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                             LocationSummary::kCall,
                                                             kIntrinsified);
@@ -1021,6 +993,169 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+static void GenerateVisitStringIndexOf(HInvoke* invoke,
+                                       vixl::MacroAssembler* masm,
+                                       CodeGeneratorARM64* codegen,
+                                       ArenaAllocator* allocator,
+                                       bool start_at_zero) {
+  LocationSummary* locations = invoke->GetLocations();
+  Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+  // or directly dispatch if we have a constant.
+  SlowPathCodeARM64* slow_path = nullptr;
+  if (invoke->InputAt(1)->IsIntConstant()) {
+    if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > 0xFFFFU) {
+      // Always needs the slow-path. We could directly dispatch to it, but this case should be
+      // rare, so for simplicity just put the full slow-path down and branch unconditionally.
+      slow_path = new (allocator) IntrinsicSlowPathARM64(invoke);
+      codegen->AddSlowPath(slow_path);
+      __ B(slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+      return;
+    }
+  } else {
+    Register char_reg = WRegisterFrom(locations->InAt(1));
+    __ Mov(tmp_reg, 0xFFFF);
+    __ Cmp(char_reg, Operand(tmp_reg));
+    slow_path = new (allocator) IntrinsicSlowPathARM64(invoke);
+    codegen->AddSlowPath(slow_path);
+    __ B(hi, slow_path->GetEntryLabel());
+  }
+
+  if (start_at_zero) {
+    // Start-index = 0.
+    __ Mov(tmp_reg, 0);
+  }
+
+  __ Ldr(lr, MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pIndexOf).Int32Value()));
+  __ Blr(lr);
+
+  if (slow_path != nullptr) {
+    __ Bind(slow_path->GetExitLabel());
+  }
+}
+
+void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+  // best to align the inputs accordingly.
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+
+  // Need a temp for slow-path codepoint compare, and need to send start_index=0.
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
+}
+
+void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
+  GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, GetAllocator(), true);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+  // best to align the inputs accordingly.
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+  locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+
+  // Need a temp for slow-path codepoint compare.
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
+  GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, GetAllocator(), false);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+  locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
+  locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
+}
+
+void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register byte_array = WRegisterFrom(locations->InAt(0));
+  __ Cmp(byte_array, 0);
+  SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ B(eq, slow_path->GetEntryLabel());
+
+  __ Ldr(lr,
+      MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromBytes).Int32Value()));
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ Blr(lr);
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+  locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
+}
+
+void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
+  vixl::MacroAssembler* masm = GetVIXLAssembler();
+
+  __ Ldr(lr,
+      MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromChars).Int32Value()));
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ Blr(lr);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
+  // The inputs plus one temp.
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+  locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
+}
+
+void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) {
+  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register string_to_copy = WRegisterFrom(locations->InAt(0));
+  __ Cmp(string_to_copy, 0);
+  SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ B(eq, slow_path->GetEntryLabel());
+
+  __ Ldr(lr,
+      MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromString).Int32Value()));
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ Blr(lr);
+  __ Bind(slow_path->GetExitLabel());
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                  \
@@ -1030,9 +1165,8 @@
 }
 
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(StringIndexOf)
-UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
index 10f6e1d..2c9248f 100644
--- a/compiler/optimizing/intrinsics_list.h
+++ b/compiler/optimizing/intrinsics_list.h
@@ -60,8 +60,12 @@
   V(MemoryPokeShortNative, kStatic) \
   V(StringCharAt, kDirect) \
   V(StringCompareTo, kDirect) \
+  V(StringGetCharsNoCheck, kDirect) \
   V(StringIndexOf, kDirect) \
   V(StringIndexOfAfter, kDirect) \
+  V(StringNewStringFromBytes, kStatic) \
+  V(StringNewStringFromChars, kStatic) \
+  V(StringNewStringFromString, kStatic) \
   V(UnsafeCASInt, kDirect) \
   V(UnsafeCASLong, kDirect) \
   V(UnsafeCASObject, kDirect) \
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index a8e2cdf..28b7a07 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -16,6 +16,8 @@
 
 #include "intrinsics_x86.h"
 
+#include <limits>
+
 #include "arch/x86/instruction_set_features_x86.h"
 #include "code_generator_x86.h"
 #include "entrypoints/quick/quick_entrypoints.h"
@@ -111,27 +113,9 @@
   }
 }
 
-static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86* codegen) {
-  if (invoke->InputCount() == 0) {
-    return;
-  }
-
-  LocationSummary* locations = invoke->GetLocations();
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
-
-  // We're moving potentially two or more locations to locations that could overlap, so we need
-  // a parallel move resolver.
-  HParallelMove parallel_move(arena);
-
-  for (size_t i = 0; i < invoke->InputCount(); i++) {
-    HInstruction* input = invoke->InputAt(i);
-    Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
-    Location actual_loc = locations->InAt(i);
-
-    parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
-  }
-
-  codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
+  InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
+  IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
 }
 
 // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
@@ -142,11 +126,8 @@
 //       restored!
 class IntrinsicSlowPathX86 : public SlowPathCodeX86 {
  public:
-  explicit IntrinsicSlowPathX86(HInvoke* invoke, Register temp)
-    : invoke_(invoke) {
-      // The temporary register has to be EAX for x86 invokes.
-      DCHECK_EQ(temp, EAX);
-    }
+  explicit IntrinsicSlowPathX86(HInvoke* invoke)
+    : invoke_(invoke) { }
 
   void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
     CodeGeneratorX86* codegen = down_cast<CodeGeneratorX86*>(codegen_in);
@@ -154,7 +135,7 @@
 
     SaveLiveRegisters(codegen, invoke_->GetLocations());
 
-    MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+    MoveArguments(invoke_, codegen);
 
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), EAX);
@@ -748,7 +729,7 @@
 }
 
 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
-  MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
+  MoveArguments(invoke, codegen);
 
   DCHECK(invoke->IsInvokeStaticOrDirect());
   codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), EAX);
@@ -898,8 +879,6 @@
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetOut(Location::SameAsFirstInput());
-  // Needs to be EAX for the invoke.
-  locations->AddTemp(Location::RegisterLocation(EAX));
 }
 
 void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) {
@@ -909,23 +888,17 @@
   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
   // Location of count
   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
-  // Starting offset within data array
-  const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value();
-  // Start of char data with array_
-  const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
 
   Register obj = locations->InAt(0).AsRegister<Register>();
   Register idx = locations->InAt(1).AsRegister<Register>();
   Register out = locations->Out().AsRegister<Register>();
-  Location temp_loc = locations->GetTemp(0);
-  Register temp = temp_loc.AsRegister<Register>();
 
   // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
   //       the cost.
   // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
   //       we will not optimize the code for constants (which would save a register).
 
-  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke, temp);
+  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
   codegen_->AddSlowPath(slow_path);
 
   X86Assembler* assembler = GetAssembler();
@@ -934,12 +907,8 @@
   codegen_->MaybeRecordImplicitNullCheck(invoke);
   __ j(kAboveEqual, slow_path->GetEntryLabel());
 
-  // Get the actual element.
-  __ movl(temp, idx);                          // temp := idx.
-  __ addl(temp, Address(obj, offset_offset));  // temp := offset + idx.
-  __ movl(out, Address(obj, value_offset));    // obj := obj.array.
-  // out = out[2*temp].
-  __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset));
+  // out = out[2*idx].
+  __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
 
   __ Bind(slow_path->GetExitLabel());
 }
@@ -953,8 +922,6 @@
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   locations->SetOut(Location::RegisterLocation(EAX));
-  // Needs to be EAX for the invoke.
-  locations->AddTemp(Location::RegisterLocation(EAX));
 }
 
 void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
@@ -966,8 +933,7 @@
 
   Register argument = locations->InAt(1).AsRegister<Register>();
   __ testl(argument, argument);
-  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(
-      invoke, locations->GetTemp(0).AsRegister<Register>());
+  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
@@ -975,6 +941,227 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+static void CreateStringIndexOfLocations(HInvoke* invoke,
+                                         ArenaAllocator* allocator,
+                                         bool start_at_zero) {
+  LocationSummary* locations = new (allocator) LocationSummary(invoke,
+                                                               LocationSummary::kCallOnSlowPath,
+                                                               kIntrinsified);
+  // The data needs to be in EDI for scasw. So request that the string is there, anyways.
+  locations->SetInAt(0, Location::RegisterLocation(EDI));
+  // If we look for a constant char, we'll still have to copy it into EAX. So just request the
+  // allocator to do that, anyways. We can still do the constant check by checking the parameter
+  // of the instruction explicitly.
+  // Note: This works as we don't clobber EAX anywhere.
+  locations->SetInAt(1, Location::RegisterLocation(EAX));
+  if (!start_at_zero) {
+    locations->SetInAt(2, Location::RequiresRegister());          // The starting index.
+  }
+  // As we clobber EDI during execution anyways, also use it as the output.
+  locations->SetOut(Location::SameAsFirstInput());
+
+  // repne scasw uses ECX as the counter.
+  locations->AddTemp(Location::RegisterLocation(ECX));
+  // Need another temporary to be able to compute the result.
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void GenerateStringIndexOf(HInvoke* invoke,
+                                  X86Assembler* assembler,
+                                  CodeGeneratorX86* codegen,
+                                  ArenaAllocator* allocator,
+                                  bool start_at_zero) {
+  LocationSummary* locations = invoke->GetLocations();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  Register string_obj = locations->InAt(0).AsRegister<Register>();
+  Register search_value = locations->InAt(1).AsRegister<Register>();
+  Register counter = locations->GetTemp(0).AsRegister<Register>();
+  Register string_length = locations->GetTemp(1).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  // Check our assumptions for registers.
+  DCHECK_EQ(string_obj, EDI);
+  DCHECK_EQ(search_value, EAX);
+  DCHECK_EQ(counter, ECX);
+  DCHECK_EQ(out, EDI);
+
+  // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+  // or directly dispatch if we have a constant.
+  SlowPathCodeX86* slow_path = nullptr;
+  if (invoke->InputAt(1)->IsIntConstant()) {
+    if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
+    std::numeric_limits<uint16_t>::max()) {
+      // Always needs the slow-path. We could directly dispatch to it, but this case should be
+      // rare, so for simplicity just put the full slow-path down and branch unconditionally.
+      slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
+      codegen->AddSlowPath(slow_path);
+      __ jmp(slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+      return;
+    }
+  } else {
+    __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
+    slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
+    codegen->AddSlowPath(slow_path);
+    __ j(kAbove, slow_path->GetEntryLabel());
+  }
+
+  // From here down, we know that we are looking for a char that fits in 16 bits.
+  // Location of reference to data array within the String object.
+  int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+  // Location of count within the String object.
+  int32_t count_offset = mirror::String::CountOffset().Int32Value();
+
+  // Load string length, i.e., the count field of the string.
+  __ movl(string_length, Address(string_obj, count_offset));
+
+  // Do a zero-length check.
+  // TODO: Support jecxz.
+  Label not_found_label;
+  __ testl(string_length, string_length);
+  __ j(kEqual, &not_found_label);
+
+  if (start_at_zero) {
+    // Number of chars to scan is the same as the string length.
+    __ movl(counter, string_length);
+
+    // Move to the start of the string.
+    __ addl(string_obj, Immediate(value_offset));
+  } else {
+    Register start_index = locations->InAt(2).AsRegister<Register>();
+
+    // Do a start_index check.
+    __ cmpl(start_index, string_length);
+    __ j(kGreaterEqual, &not_found_label);
+
+    // Ensure we have a start index >= 0;
+    __ xorl(counter, counter);
+    __ cmpl(start_index, Immediate(0));
+    __ cmovl(kGreater, counter, start_index);
+
+    // Move to the start of the string: string_obj + value_offset + 2 * start_index.
+    __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
+
+    // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
+    // compare.
+    __ negl(counter);
+    __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
+  }
+
+  // Everything is set up for repne scasw:
+  //   * Comparison address in EDI.
+  //   * Counter in ECX.
+  __ repne_scasw();
+
+  // Did we find a match?
+  __ j(kNotEqual, &not_found_label);
+
+  // Yes, we matched.  Compute the index of the result.
+  __ subl(string_length, counter);
+  __ leal(out, Address(string_length, -1));
+
+  Label done;
+  __ jmp(&done);
+
+  // Failed to match; return -1.
+  __ Bind(&not_found_label);
+  __ movl(out, Immediate(-1));
+
+  // And join up at the end.
+  __ Bind(&done);
+  if (slow_path != nullptr) {
+    __ Bind(slow_path->GetExitLabel());
+  }
+}
+
+void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
+  CreateStringIndexOfLocations(invoke, arena_, true);
+}
+
+void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
+  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+}
+
+void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
+  CreateStringIndexOfLocations(invoke, arena_, false);
+}
+
+void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
+  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+}
+
+void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
+  locations->SetOut(Location::RegisterLocation(EAX));
+}
+
+void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  X86Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register byte_array = locations->InAt(0).AsRegister<Register>();
+  __ testl(byte_array, byte_array);
+  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ j(kEqual, slow_path->GetEntryLabel());
+
+  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromBytes)));
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  locations->SetOut(Location::RegisterLocation(EAX));
+}
+
+void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
+  X86Assembler* assembler = GetAssembler();
+
+  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromChars)));
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetOut(Location::RegisterLocation(EAX));
+}
+
+void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
+  X86Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register string_to_copy = locations->InAt(0).AsRegister<Register>();
+  __ testl(string_to_copy, string_to_copy);
+  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ j(kEqual, slow_path->GetEntryLabel());
+
+  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromString)));
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ Bind(slow_path->GetExitLabel());
+}
+
 static void GenPeek(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
   Register address = locations->InAt(0).AsRegisterPairLow<Register>();
   Location out_loc = locations->Out();
@@ -1038,7 +1225,7 @@
                                                            LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::RequiresRegister());
-  HInstruction *value = invoke->InputAt(1);
+  HInstruction* value = invoke->InputAt(1);
   if (size == Primitive::kPrimByte) {
     locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
   } else {
@@ -1535,8 +1722,7 @@
 }
 
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(StringIndexOf)
-UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
+UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 5d24d1f..0efa714 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -16,6 +16,8 @@
 
 #include "intrinsics_x86_64.h"
 
+#include <limits>
+
 #include "arch/x86_64/instruction_set_features_x86_64.h"
 #include "code_generator_x86_64.h"
 #include "entrypoints/quick/quick_entrypoints.h"
@@ -103,27 +105,9 @@
   }
 }
 
-static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) {
-  if (invoke->InputCount() == 0) {
-    return;
-  }
-
-  LocationSummary* locations = invoke->GetLocations();
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
-
-  // We're moving potentially two or more locations to locations that could overlap, so we need
-  // a parallel move resolver.
-  HParallelMove parallel_move(arena);
-
-  for (size_t i = 0; i < invoke->InputCount(); i++) {
-    HInstruction* input = invoke->InputAt(i);
-    Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
-    Location actual_loc = locations->InAt(i);
-
-    parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
-  }
-
-  codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+  InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
+  IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
 }
 
 // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
@@ -142,7 +126,7 @@
 
     SaveLiveRegisters(codegen, invoke_->GetLocations());
 
-    MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+    MoveArguments(invoke_, codegen);
 
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI));
@@ -622,7 +606,7 @@
 }
 
 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
-  MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
+  MoveArguments(invoke, codegen);
 
   DCHECK(invoke->IsInvokeStaticOrDirect());
   codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI));
@@ -801,7 +785,7 @@
   __ Bind(&nan);
 
   //  output = 0
-  __ xorq(out, out);
+  __ xorl(out, out);
   __ Bind(&done);
 }
 
@@ -823,16 +807,10 @@
   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
   // Location of count
   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
-  // Starting offset within data array
-  const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value();
-  // Start of char data with array_
-  const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
 
   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
   CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-  Location temp_loc = locations->GetTemp(0);
-  CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
 
   // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
   //       the cost.
@@ -848,12 +826,8 @@
   codegen_->MaybeRecordImplicitNullCheck(invoke);
   __ j(kAboveEqual, slow_path->GetEntryLabel());
 
-  // Get the actual element.
-  __ movl(temp, idx);                          // temp := idx.
-  __ addl(temp, Address(obj, offset_offset));  // temp := offset + idx.
-  __ movl(out, Address(obj, value_offset));    // obj := obj.array.
-  // out = out[2*temp].
-  __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset));
+  // out = out[2*idx].
+  __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
 
   __ Bind(slow_path->GetExitLabel());
 }
@@ -886,6 +860,229 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+static void CreateStringIndexOfLocations(HInvoke* invoke,
+                                         ArenaAllocator* allocator,
+                                         bool start_at_zero) {
+  LocationSummary* locations = new (allocator) LocationSummary(invoke,
+                                                               LocationSummary::kCallOnSlowPath,
+                                                               kIntrinsified);
+  // The data needs to be in RDI for scasw. So request that the string is there, anyways.
+  locations->SetInAt(0, Location::RegisterLocation(RDI));
+  // If we look for a constant char, we'll still have to copy it into RAX. So just request the
+  // allocator to do that, anyways. We can still do the constant check by checking the parameter
+  // of the instruction explicitly.
+  // Note: This works as we don't clobber RAX anywhere.
+  locations->SetInAt(1, Location::RegisterLocation(RAX));
+  if (!start_at_zero) {
+    locations->SetInAt(2, Location::RequiresRegister());          // The starting index.
+  }
+  // As we clobber RDI during execution anyways, also use it as the output.
+  locations->SetOut(Location::SameAsFirstInput());
+
+  // repne scasw uses RCX as the counter.
+  locations->AddTemp(Location::RegisterLocation(RCX));
+  // Need another temporary to be able to compute the result.
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void GenerateStringIndexOf(HInvoke* invoke,
+                                  X86_64Assembler* assembler,
+                                  CodeGeneratorX86_64* codegen,
+                                  ArenaAllocator* allocator,
+                                  bool start_at_zero) {
+  LocationSummary* locations = invoke->GetLocations();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
+  CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
+  CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+  // Check our assumptions for registers.
+  DCHECK_EQ(string_obj.AsRegister(), RDI);
+  DCHECK_EQ(search_value.AsRegister(), RAX);
+  DCHECK_EQ(counter.AsRegister(), RCX);
+  DCHECK_EQ(out.AsRegister(), RDI);
+
+  // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+  // or directly dispatch if we have a constant.
+  SlowPathCodeX86_64* slow_path = nullptr;
+  if (invoke->InputAt(1)->IsIntConstant()) {
+    if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
+    std::numeric_limits<uint16_t>::max()) {
+      // Always needs the slow-path. We could directly dispatch to it, but this case should be
+      // rare, so for simplicity just put the full slow-path down and branch unconditionally.
+      slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
+      codegen->AddSlowPath(slow_path);
+      __ jmp(slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+      return;
+    }
+  } else {
+    __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
+    slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
+    codegen->AddSlowPath(slow_path);
+    __ j(kAbove, slow_path->GetEntryLabel());
+  }
+
+  // From here down, we know that we are looking for a char that fits in 16 bits.
+  // Location of reference to data array within the String object.
+  int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+  // Location of count within the String object.
+  int32_t count_offset = mirror::String::CountOffset().Int32Value();
+
+  // Load string length, i.e., the count field of the string.
+  __ movl(string_length, Address(string_obj, count_offset));
+
+  // Do a length check.
+  // TODO: Support jecxz.
+  Label not_found_label;
+  __ testl(string_length, string_length);
+  __ j(kEqual, &not_found_label);
+
+  if (start_at_zero) {
+    // Number of chars to scan is the same as the string length.
+    __ movl(counter, string_length);
+
+    // Move to the start of the string.
+    __ addq(string_obj, Immediate(value_offset));
+  } else {
+    CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
+
+    // Do a start_index check.
+    __ cmpl(start_index, string_length);
+    __ j(kGreaterEqual, &not_found_label);
+
+    // Ensure we have a start index >= 0;
+    __ xorl(counter, counter);
+    __ cmpl(start_index, Immediate(0));
+    __ cmov(kGreater, counter, start_index, false);  // 32-bit copy is enough.
+
+    // Move to the start of the string: string_obj + value_offset + 2 * start_index.
+    __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
+
+    // Now update ecx, the work counter: it's gonna be string.length - start_index.
+    __ negq(counter);  // Needs to be 64-bit negation, as the address computation is 64-bit.
+    __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
+  }
+
+  // Everything is set up for repne scasw:
+  //   * Comparison address in RDI.
+  //   * Counter in ECX.
+  __ repne_scasw();
+
+  // Did we find a match?
+  __ j(kNotEqual, &not_found_label);
+
+  // Yes, we matched.  Compute the index of the result.
+  __ subl(string_length, counter);
+  __ leal(out, Address(string_length, -1));
+
+  Label done;
+  __ jmp(&done);
+
+  // Failed to match; return -1.
+  __ Bind(&not_found_label);
+  __ movl(out, Immediate(-1));
+
+  // And join up at the end.
+  __ Bind(&done);
+  if (slow_path != nullptr) {
+    __ Bind(slow_path->GetExitLabel());
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
+  CreateStringIndexOfLocations(invoke, arena_, true);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
+  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
+  CreateStringIndexOfLocations(invoke, arena_, false);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
+  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
+  locations->SetOut(Location::RegisterLocation(RAX));
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  X86_64Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
+  __ testl(byte_array, byte_array);
+  SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ j(kEqual, slow_path->GetEntryLabel());
+
+  __ gs()->call(Address::Absolute(
+        QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), true));
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  locations->SetOut(Location::RegisterLocation(RAX));
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
+  X86_64Assembler* assembler = GetAssembler();
+
+  __ gs()->call(Address::Absolute(
+        QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), true));
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetOut(Location::RegisterLocation(RAX));
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
+  X86_64Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
+  __ testl(string_to_copy, string_to_copy);
+  SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ j(kEqual, slow_path->GetEntryLabel());
+
+  __ gs()->call(Address::Absolute(
+        QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), true));
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ Bind(slow_path->GetExitLabel());
+}
+
 static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
   CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();  // == address, here for clarity.
@@ -1389,8 +1586,7 @@
 void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
-UNIMPLEMENTED_INTRINSIC(StringIndexOf)
-UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
+UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index bf9b8e5..2535ea2 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -39,8 +39,9 @@
     }
   }
 
-  if (instruction->HasEnvironment()) {
-    HEnvironment* environment = instruction->GetEnvironment();
+  for (HEnvironment* environment = instruction->GetEnvironment();
+       environment != nullptr;
+       environment = environment->GetParent()) {
     for (size_t i = 0, e = environment->Size(); i < e; ++i) {
       HInstruction* input = environment->GetInstructionAt(i);
       if (input != nullptr) {
@@ -63,13 +64,15 @@
  * If `environment` has a loop header phi, we replace it with its first input.
  */
 static void UpdateLoopPhisIn(HEnvironment* environment, HLoopInformation* info) {
-  for (size_t i = 0, e = environment->Size(); i < e; ++i) {
-    HInstruction* input = environment->GetInstructionAt(i);
-    if (input != nullptr && IsPhiOf(input, info->GetHeader())) {
-      environment->RemoveAsUserOfInput(i);
-      HInstruction* incoming = input->InputAt(0);
-      environment->SetRawEnvAt(i, incoming);
-      incoming->AddEnvUseAt(environment, i);
+  for (; environment != nullptr; environment = environment->GetParent()) {
+    for (size_t i = 0, e = environment->Size(); i < e; ++i) {
+      HInstruction* input = environment->GetInstructionAt(i);
+      if (input != nullptr && IsPhiOf(input, info->GetHeader())) {
+        environment->RemoveAsUserOfInput(i);
+        HInstruction* incoming = input->InputAt(0);
+        environment->SetRawEnvAt(i, incoming);
+        incoming->AddEnvUseAt(environment, i);
+      }
     }
   }
 }
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index 7818c60..4f259b5 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -39,7 +39,7 @@
 static void TestCode(const uint16_t* data, const int* expected_order, size_t number_of_blocks) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   bool graph_built = builder.BuildGraph(*item);
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 5236773..7cb00a1 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -32,7 +32,7 @@
 namespace art {
 
 static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) {
-  HGraph* graph = new (allocator) HGraph(allocator);
+  HGraph* graph = CreateGraph(allocator);
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   builder.BuildGraph(*item);
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 8a96ee9..9d7d0b6 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -46,7 +46,7 @@
 static void TestCode(const uint16_t* data, const char* expected) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   bool graph_built = builder.BuildGraph(*item);
@@ -445,44 +445,40 @@
 
 TEST(LivenessTest, Loop6) {
   // Bitsets are made of:
-  // (constant0, constant4, constant5, phi in block 2, phi in block 8)
+  // (constant0, constant4, constant5, phi in block 2)
   const char* expected =
     "Block 0\n"
-    "  live in: (00000)\n"
-    "  live out: (11100)\n"
-    "  kill: (11100)\n"
+    "  live in: (0000)\n"
+    "  live out: (1110)\n"
+    "  kill: (1110)\n"
     "Block 1\n"
-    "  live in: (11100)\n"
-    "  live out: (01100)\n"
-    "  kill: (00000)\n"
+    "  live in: (1110)\n"
+    "  live out: (0110)\n"
+    "  kill: (0000)\n"
     "Block 2\n"  // loop header
-    "  live in: (01100)\n"
-    "  live out: (01110)\n"
-    "  kill: (00010)\n"
+    "  live in: (0110)\n"
+    "  live out: (0111)\n"
+    "  kill: (0001)\n"
     "Block 3\n"
-    "  live in: (01100)\n"
-    "  live out: (01100)\n"
-    "  kill: (00000)\n"
-    "Block 4\n"  // original back edge
-    "  live in: (01100)\n"
-    "  live out: (01100)\n"
-    "  kill: (00000)\n"
-    "Block 5\n"  // original back edge
-    "  live in: (01100)\n"
-    "  live out: (01100)\n"
-    "  kill: (00000)\n"
+    "  live in: (0110)\n"
+    "  live out: (0110)\n"
+    "  kill: (0000)\n"
+    "Block 4\n"  // back edge
+    "  live in: (0110)\n"
+    "  live out: (0110)\n"
+    "  kill: (0000)\n"
+    "Block 5\n"  // back edge
+    "  live in: (0110)\n"
+    "  live out: (0110)\n"
+    "  kill: (0000)\n"
     "Block 6\n"  // return block
-    "  live in: (00010)\n"
-    "  live out: (00000)\n"
-    "  kill: (00000)\n"
+    "  live in: (0001)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n"
     "Block 7\n"  // exit block
-    "  live in: (00000)\n"
-    "  live out: (00000)\n"
-    "  kill: (00000)\n"
-    "Block 8\n"  // synthesized back edge
-    "  live in: (01100)\n"
-    "  live out: (01100)\n"
-    "  kill: (00001)\n";
+    "  live in: (0000)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index a1ae670..42aba04 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -25,8 +25,6 @@
                                  bool intrinsified)
     : inputs_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()),
       temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0),
-      environment_(instruction->GetBlock()->GetGraph()->GetArena(),
-                   instruction->EnvironmentSize()),
       output_overlaps_(Location::kOutputOverlap),
       call_kind_(call_kind),
       stack_mask_(nullptr),
@@ -37,10 +35,6 @@
   for (size_t i = 0; i < instruction->InputCount(); ++i) {
     inputs_.Put(i, Location());
   }
-  environment_.SetSize(instruction->EnvironmentSize());
-  for (size_t i = 0; i < instruction->EnvironmentSize(); ++i) {
-    environment_.Put(i, Location());
-  }
   instruction->SetLocations(this);
 
   if (NeedsSafepoint()) {
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index c3a9915..09bbb33 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -525,14 +525,6 @@
     return temps_.Size();
   }
 
-  void SetEnvironmentAt(uint32_t at, Location location) {
-    environment_.Put(at, location);
-  }
-
-  Location GetEnvironmentAt(uint32_t at) const {
-    return environment_.Get(at);
-  }
-
   Location Out() const { return output_; }
 
   bool CanCall() const { return call_kind_ != kNoCall; }
@@ -602,7 +594,6 @@
  private:
   GrowableArray<Location> inputs_;
   GrowableArray<Location> temps_;
-  GrowableArray<Location> environment_;
   // Whether the output overlaps with any of the inputs. If it overlaps, then it cannot
   // share the same register as the inputs.
   Location::OutputOverlap output_overlaps_;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 6ab57b8..47da9cc 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -16,7 +16,9 @@
 
 #include "nodes.h"
 
+#include "code_generator.h"
 #include "ssa_builder.h"
+#include "base/bit_vector-inl.h"
 #include "utils/growable_array.h"
 #include "scoped_thread_state_change.h"
 
@@ -37,8 +39,9 @@
     instruction->RemoveAsUserOfInput(i);
   }
 
-  HEnvironment* environment = instruction->GetEnvironment();
-  if (environment != nullptr) {
+  for (HEnvironment* environment = instruction->GetEnvironment();
+       environment != nullptr;
+       environment = environment->GetParent()) {
     for (size_t i = 0, e = environment->Size(); i < e; ++i) {
       if (environment->GetInstructionAt(i) != nullptr) {
         environment->RemoveAsUserOfInput(i);
@@ -191,24 +194,6 @@
 void HGraph::SimplifyLoop(HBasicBlock* header) {
   HLoopInformation* info = header->GetLoopInformation();
 
-  // If there are more than one back edge, make them branch to the same block that
-  // will become the only back edge. This simplifies finding natural loops in the
-  // graph.
-  // Also, if the loop is a do/while (that is the back edge is an if), change the
-  // back edge to be a goto. This simplifies code generation of suspend cheks.
-  if (info->NumberOfBackEdges() > 1 || info->GetBackEdges().Get(0)->GetLastInstruction()->IsIf()) {
-    HBasicBlock* new_back_edge = new (arena_) HBasicBlock(this, header->GetDexPc());
-    AddBlock(new_back_edge);
-    new_back_edge->AddInstruction(new (arena_) HGoto());
-    for (size_t pred = 0, e = info->GetBackEdges().Size(); pred < e; ++pred) {
-      HBasicBlock* back_edge = info->GetBackEdges().Get(pred);
-      back_edge->ReplaceSuccessor(header, new_back_edge);
-    }
-    info->ClearBackEdges();
-    info->AddBackEdge(new_back_edge);
-    new_back_edge->AddSuccessor(header);
-  }
-
   // Make sure the loop has only one pre header. This simplifies SSA building by having
   // to just look at the pre header to know which locals are initialized at entry of the
   // loop.
@@ -218,11 +203,9 @@
     AddBlock(pre_header);
     pre_header->AddInstruction(new (arena_) HGoto());
 
-    ArenaBitVector back_edges(arena_, GetBlocks().Size(), false);
-    HBasicBlock* back_edge = info->GetBackEdges().Get(0);
     for (size_t pred = 0; pred < header->GetPredecessors().Size(); ++pred) {
       HBasicBlock* predecessor = header->GetPredecessors().Get(pred);
-      if (predecessor != back_edge) {
+      if (!info->IsBackEdge(*predecessor)) {
         predecessor->ReplaceSuccessor(header, pre_header);
         pred--;
       }
@@ -230,9 +213,17 @@
     pre_header->AddSuccessor(header);
   }
 
-  // Make sure the second predecessor of a loop header is the back edge.
-  if (header->GetPredecessors().Get(1) != info->GetBackEdges().Get(0)) {
-    header->SwapPredecessors();
+  // Make sure the first predecessor of a loop header is the incoming block.
+  if (info->IsBackEdge(*header->GetPredecessors().Get(0))) {
+    HBasicBlock* to_swap = header->GetPredecessors().Get(0);
+    for (size_t pred = 1, e = header->GetPredecessors().Size(); pred < e; ++pred) {
+      HBasicBlock* predecessor = header->GetPredecessors().Get(pred);
+      if (!info->IsBackEdge(*predecessor)) {
+        header->predecessors_.Put(pred, to_swap);
+        header->predecessors_.Put(0, predecessor);
+        break;
+      }
+    }
   }
 
   // Place the suspend check at the beginning of the header, so that live registers
@@ -303,25 +294,6 @@
   return cached_null_constant_;
 }
 
-template <class InstructionType, typename ValueType>
-InstructionType* HGraph::CreateConstant(ValueType value,
-                                        ArenaSafeMap<ValueType, InstructionType*>* cache) {
-  // Try to find an existing constant of the given value.
-  InstructionType* constant = nullptr;
-  auto cached_constant = cache->find(value);
-  if (cached_constant != cache->end()) {
-    constant = cached_constant->second;
-  }
-
-  // If not found or previously deleted, create and cache a new instruction.
-  if (constant == nullptr || constant->GetBlock() == nullptr) {
-    constant = new (arena_) InstructionType(value);
-    cache->Overwrite(value, constant);
-    InsertConstant(constant);
-  }
-  return constant;
-}
-
 HConstant* HGraph::GetConstant(Primitive::Type type, int64_t value) {
   switch (type) {
     case Primitive::Type::kPrimBoolean:
@@ -343,6 +315,18 @@
   }
 }
 
+void HGraph::CacheFloatConstant(HFloatConstant* constant) {
+  int32_t value = bit_cast<int32_t, float>(constant->GetValue());
+  DCHECK(cached_float_constants_.find(value) == cached_float_constants_.end());
+  cached_float_constants_.Overwrite(value, constant);
+}
+
+void HGraph::CacheDoubleConstant(HDoubleConstant* constant) {
+  int64_t value = bit_cast<int64_t, double>(constant->GetValue());
+  DCHECK(cached_double_constants_.find(value) == cached_double_constants_.end());
+  cached_double_constants_.Overwrite(value, constant);
+}
+
 void HLoopInformation::Add(HBasicBlock* block) {
   blocks_.SetBit(block->GetBlockId());
 }
@@ -364,26 +348,60 @@
 }
 
 bool HLoopInformation::Populate() {
-  DCHECK_EQ(GetBackEdges().Size(), 1u);
-  HBasicBlock* back_edge = GetBackEdges().Get(0);
-  DCHECK(back_edge->GetDominator() != nullptr);
-  if (!header_->Dominates(back_edge)) {
-    // This loop is not natural. Do not bother going further.
-    return false;
-  }
+  DCHECK_EQ(blocks_.NumSetBits(), 0u) << "Loop information has already been populated";
+  for (size_t i = 0, e = GetBackEdges().Size(); i < e; ++i) {
+    HBasicBlock* back_edge = GetBackEdges().Get(i);
+    DCHECK(back_edge->GetDominator() != nullptr);
+    if (!header_->Dominates(back_edge)) {
+      // This loop is not natural. Do not bother going further.
+      return false;
+    }
 
-  // Populate this loop: starting with the back edge, recursively add predecessors
-  // that are not already part of that loop. Set the header as part of the loop
-  // to end the recursion.
-  // This is a recursive implementation of the algorithm described in
-  // "Advanced Compiler Design & Implementation" (Muchnick) p192.
-  blocks_.SetBit(header_->GetBlockId());
-  PopulateRecursive(back_edge);
+    // Populate this loop: starting with the back edge, recursively add predecessors
+    // that are not already part of that loop. Set the header as part of the loop
+    // to end the recursion.
+    // This is a recursive implementation of the algorithm described in
+    // "Advanced Compiler Design & Implementation" (Muchnick) p192.
+    blocks_.SetBit(header_->GetBlockId());
+    PopulateRecursive(back_edge);
+  }
   return true;
 }
 
+void HLoopInformation::Update() {
+  HGraph* graph = header_->GetGraph();
+  for (uint32_t id : blocks_.Indexes()) {
+    HBasicBlock* block = graph->GetBlocks().Get(id);
+    // Reset loop information of non-header blocks inside the loop, except
+    // members of inner nested loops because those should already have been
+    // updated by their own LoopInformation.
+    if (block->GetLoopInformation() == this && block != header_) {
+      block->SetLoopInformation(nullptr);
+    }
+  }
+  blocks_.ClearAllBits();
+
+  if (back_edges_.IsEmpty()) {
+    // The loop has been dismantled, delete its suspend check and remove info
+    // from the header.
+    DCHECK(HasSuspendCheck());
+    header_->RemoveInstruction(suspend_check_);
+    header_->SetLoopInformation(nullptr);
+    header_ = nullptr;
+    suspend_check_ = nullptr;
+  } else {
+    if (kIsDebugBuild) {
+      for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) {
+        DCHECK(header_->Dominates(back_edges_.Get(i)));
+      }
+    }
+    // This loop still has reachable back edges. Repopulate the list of blocks.
+    bool populate_successful = Populate();
+    DCHECK(populate_successful);
+  }
+}
+
 HBasicBlock* HLoopInformation::GetPreHeader() const {
-  DCHECK_EQ(header_->GetPredecessors().Size(), 2u);
   return header_->GetDominator();
 }
 
@@ -395,6 +413,14 @@
   return other.blocks_.IsBitSet(header_->GetBlockId());
 }
 
+size_t HLoopInformation::GetLifetimeEnd() const {
+  size_t last_position = 0;
+  for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) {
+    last_position = std::max(back_edges_.Get(i)->GetLifetimeEnd(), last_position);
+  }
+  return last_position;
+}
+
 bool HBasicBlock::Dominates(HBasicBlock* other) const {
   // Walk up the dominator tree from `other`, to find out if `this`
   // is an ancestor.
@@ -456,6 +482,20 @@
   instructions_.InsertInstructionBefore(instruction, cursor);
 }
 
+void HBasicBlock::InsertInstructionAfter(HInstruction* instruction, HInstruction* cursor) {
+  DCHECK(!cursor->IsPhi());
+  DCHECK(!instruction->IsPhi());
+  DCHECK_EQ(instruction->GetId(), -1);
+  DCHECK_NE(cursor->GetId(), -1);
+  DCHECK_EQ(cursor->GetBlock(), this);
+  DCHECK(!instruction->IsControlFlow());
+  DCHECK(!cursor->IsControlFlow());
+  instruction->SetBlock(this);
+  instruction->SetId(GetGraph()->GetNextInstructionId());
+  UpdateInputsUsers(instruction);
+  instructions_.InsertInstructionAfter(instruction, cursor);
+}
+
 void HBasicBlock::InsertPhiAfter(HPhi* phi, HPhi* cursor) {
   DCHECK_EQ(phi->GetId(), -1);
   DCHECK_NE(cursor->GetId(), -1);
@@ -481,6 +521,7 @@
 }
 
 void HBasicBlock::RemoveInstruction(HInstruction* instruction, bool ensure_safety) {
+  DCHECK(!instruction->IsPhi());
   Remove(&instructions_, this, instruction, ensure_safety);
 }
 
@@ -488,6 +529,24 @@
   Remove(&phis_, this, phi, ensure_safety);
 }
 
+void HBasicBlock::RemoveInstructionOrPhi(HInstruction* instruction, bool ensure_safety) {
+  if (instruction->IsPhi()) {
+    RemovePhi(instruction->AsPhi(), ensure_safety);
+  } else {
+    RemoveInstruction(instruction, ensure_safety);
+  }
+}
+
+void HEnvironment::CopyFrom(const GrowableArray<HInstruction*>& locals) {
+  for (size_t i = 0; i < locals.Size(); i++) {
+    HInstruction* instruction = locals.Get(i);
+    SetRawEnvAt(i, instruction);
+    if (instruction != nullptr) {
+      instruction->AddEnvUseAt(this, i);
+    }
+  }
+}
+
 void HEnvironment::CopyFrom(HEnvironment* env) {
   for (size_t i = 0; i < env->Size(); i++) {
     HInstruction* instruction = env->GetInstructionAt(i);
@@ -498,6 +557,28 @@
   }
 }
 
+void HEnvironment::CopyFromWithLoopPhiAdjustment(HEnvironment* env,
+                                                 HBasicBlock* loop_header) {
+  DCHECK(loop_header->IsLoopHeader());
+  for (size_t i = 0; i < env->Size(); i++) {
+    HInstruction* instruction = env->GetInstructionAt(i);
+    SetRawEnvAt(i, instruction);
+    if (instruction == nullptr) {
+      continue;
+    }
+    if (instruction->IsLoopHeaderPhi() && (instruction->GetBlock() == loop_header)) {
+      // At the end of the loop pre-header, the corresponding value for instruction
+      // is the first input of the phi.
+      HInstruction* initial = instruction->AsPhi()->InputAt(0);
+      DCHECK(initial->GetBlock()->Dominates(loop_header));
+      SetRawEnvAt(i, initial);
+      initial->AddEnvUseAt(this, i);
+    } else {
+      instruction->AddEnvUseAt(this, i);
+    }
+  }
+}
+
 void HEnvironment::RemoveAsUserOfInput(size_t index) const {
   const HUserRecord<HEnvironment*> user_record = vregs_.Get(index);
   user_record.GetInstruction()->RemoveEnvironmentUser(user_record.GetUseNode());
@@ -672,6 +753,14 @@
   input->AddUseAt(this, inputs_.Size() - 1);
 }
 
+void HPhi::RemoveInputAt(size_t index) {
+  RemoveAsUserOfInput(index);
+  inputs_.DeleteAt(index);
+  for (size_t i = index, e = InputCount(); i < e; ++i) {
+    InputRecordAt(i).GetUseNode()->SetIndex(i);
+  }
+}
+
 #define DEFINE_ACCEPT(name, super)                                             \
 void H##name::Accept(HGraphVisitor* visitor) {                                 \
   visitor->Visit##name(this);                                                  \
@@ -706,6 +795,84 @@
   }
 }
 
+HConstant* HTypeConversion::TryStaticEvaluation() const {
+  HGraph* graph = GetBlock()->GetGraph();
+  if (GetInput()->IsIntConstant()) {
+    int32_t value = GetInput()->AsIntConstant()->GetValue();
+    switch (GetResultType()) {
+      case Primitive::kPrimLong:
+        return graph->GetLongConstant(static_cast<int64_t>(value));
+      case Primitive::kPrimFloat:
+        return graph->GetFloatConstant(static_cast<float>(value));
+      case Primitive::kPrimDouble:
+        return graph->GetDoubleConstant(static_cast<double>(value));
+      default:
+        return nullptr;
+    }
+  } else if (GetInput()->IsLongConstant()) {
+    int64_t value = GetInput()->AsLongConstant()->GetValue();
+    switch (GetResultType()) {
+      case Primitive::kPrimInt:
+        return graph->GetIntConstant(static_cast<int32_t>(value));
+      case Primitive::kPrimFloat:
+        return graph->GetFloatConstant(static_cast<float>(value));
+      case Primitive::kPrimDouble:
+        return graph->GetDoubleConstant(static_cast<double>(value));
+      default:
+        return nullptr;
+    }
+  } else if (GetInput()->IsFloatConstant()) {
+    float value = GetInput()->AsFloatConstant()->GetValue();
+    switch (GetResultType()) {
+      case Primitive::kPrimInt:
+        if (std::isnan(value))
+          return graph->GetIntConstant(0);
+        if (value >= kPrimIntMax)
+          return graph->GetIntConstant(kPrimIntMax);
+        if (value <= kPrimIntMin)
+          return graph->GetIntConstant(kPrimIntMin);
+        return graph->GetIntConstant(static_cast<int32_t>(value));
+      case Primitive::kPrimLong:
+        if (std::isnan(value))
+          return graph->GetLongConstant(0);
+        if (value >= kPrimLongMax)
+          return graph->GetLongConstant(kPrimLongMax);
+        if (value <= kPrimLongMin)
+          return graph->GetLongConstant(kPrimLongMin);
+        return graph->GetLongConstant(static_cast<int64_t>(value));
+      case Primitive::kPrimDouble:
+        return graph->GetDoubleConstant(static_cast<double>(value));
+      default:
+        return nullptr;
+    }
+  } else if (GetInput()->IsDoubleConstant()) {
+    double value = GetInput()->AsDoubleConstant()->GetValue();
+    switch (GetResultType()) {
+      case Primitive::kPrimInt:
+        if (std::isnan(value))
+          return graph->GetIntConstant(0);
+        if (value >= kPrimIntMax)
+          return graph->GetIntConstant(kPrimIntMax);
+        if (value <= kPrimLongMin)
+          return graph->GetIntConstant(kPrimIntMin);
+        return graph->GetIntConstant(static_cast<int32_t>(value));
+      case Primitive::kPrimLong:
+        if (std::isnan(value))
+          return graph->GetLongConstant(0);
+        if (value >= kPrimLongMax)
+          return graph->GetLongConstant(kPrimLongMax);
+        if (value <= kPrimLongMin)
+          return graph->GetLongConstant(kPrimLongMin);
+        return graph->GetLongConstant(static_cast<int64_t>(value));
+      case Primitive::kPrimFloat:
+        return graph->GetFloatConstant(static_cast<float>(value));
+      default:
+        return nullptr;
+    }
+  }
+  return nullptr;
+}
+
 HConstant* HUnaryOperation::TryStaticEvaluation() const {
   if (GetInput()->IsIntConstant()) {
     int32_t value = Evaluate(GetInput()->AsIntConstant()->GetValue());
@@ -867,6 +1034,15 @@
   return !GetPhis().IsEmpty() && GetFirstPhi()->GetNext() == nullptr;
 }
 
+size_t HInstructionList::CountSize() const {
+  size_t size = 0;
+  HInstruction* current = first_instruction_;
+  for (; current != nullptr; current = current->GetNext()) {
+    size++;
+  }
+  return size;
+}
+
 void HInstructionList::SetBlockOfInstructions(HBasicBlock* block) const {
   for (HInstruction* current = first_instruction_;
        current != nullptr;
@@ -898,40 +1074,167 @@
   }
 }
 
-void HBasicBlock::DisconnectFromAll() {
-  DCHECK(dominated_blocks_.IsEmpty()) << "Unimplemented scenario";
+void HBasicBlock::DisconnectAndDelete() {
+  // Dominators must be removed after all the blocks they dominate. This way
+  // a loop header is removed last, a requirement for correct loop information
+  // iteration.
+  DCHECK(dominated_blocks_.IsEmpty());
 
+  // Remove the block from all loops it is included in.
+  for (HLoopInformationOutwardIterator it(*this); !it.Done(); it.Advance()) {
+    HLoopInformation* loop_info = it.Current();
+    loop_info->Remove(this);
+    if (loop_info->IsBackEdge(*this)) {
+      // If this was the last back edge of the loop, we deliberately leave the
+      // loop in an inconsistent state and will fail SSAChecker unless the
+      // entire loop is removed during the pass.
+      loop_info->RemoveBackEdge(this);
+    }
+  }
+
+  // Disconnect the block from its predecessors and update their control-flow
+  // instructions.
   for (size_t i = 0, e = predecessors_.Size(); i < e; ++i) {
-    predecessors_.Get(i)->successors_.Delete(this);
+    HBasicBlock* predecessor = predecessors_.Get(i);
+    HInstruction* last_instruction = predecessor->GetLastInstruction();
+    predecessor->RemoveInstruction(last_instruction);
+    predecessor->RemoveSuccessor(this);
+    if (predecessor->GetSuccessors().Size() == 1u) {
+      DCHECK(last_instruction->IsIf());
+      predecessor->AddInstruction(new (graph_->GetArena()) HGoto());
+    } else {
+      // The predecessor has no remaining successors and therefore must be dead.
+      // We deliberately leave it without a control-flow instruction so that the
+      // SSAChecker fails unless it is not removed during the pass too.
+      DCHECK_EQ(predecessor->GetSuccessors().Size(), 0u);
+    }
   }
-  for (size_t i = 0, e = successors_.Size(); i < e; ++i) {
-    successors_.Get(i)->predecessors_.Delete(this);
-  }
-  dominator_->dominated_blocks_.Delete(this);
-
   predecessors_.Reset();
+
+  // Disconnect the block from its successors and update their dominators
+  // and phis.
+  for (size_t i = 0, e = successors_.Size(); i < e; ++i) {
+    HBasicBlock* successor = successors_.Get(i);
+    // Delete this block from the list of predecessors.
+    size_t this_index = successor->GetPredecessorIndexOf(this);
+    successor->predecessors_.DeleteAt(this_index);
+
+    // Check that `successor` has other predecessors, otherwise `this` is the
+    // dominator of `successor` which violates the order DCHECKed at the top.
+    DCHECK(!successor->predecessors_.IsEmpty());
+
+    // Recompute the successor's dominator.
+    HBasicBlock* old_dominator = successor->GetDominator();
+    HBasicBlock* new_dominator = successor->predecessors_.Get(0);
+    for (size_t j = 1, f = successor->predecessors_.Size(); j < f; ++j) {
+      new_dominator = graph_->FindCommonDominator(
+          new_dominator, successor->predecessors_.Get(j));
+    }
+    if (old_dominator != new_dominator) {
+      successor->SetDominator(new_dominator);
+      old_dominator->RemoveDominatedBlock(successor);
+      new_dominator->AddDominatedBlock(successor);
+    }
+
+    // Remove this block's entries in the successor's phis.
+    if (successor->predecessors_.Size() == 1u) {
+      // The successor has just one predecessor left. Replace phis with the only
+      // remaining input.
+      for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+        HPhi* phi = phi_it.Current()->AsPhi();
+        phi->ReplaceWith(phi->InputAt(1 - this_index));
+        successor->RemovePhi(phi);
+      }
+    } else {
+      for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+        phi_it.Current()->AsPhi()->RemoveInputAt(this_index);
+      }
+    }
+  }
   successors_.Reset();
-  dominator_ = nullptr;
-  graph_ = nullptr;
+
+  // Disconnect from the dominator.
+  dominator_->RemoveDominatedBlock(this);
+  SetDominator(nullptr);
+
+  // Delete from the graph. The function safely deletes remaining instructions
+  // and updates the reverse post order.
+  graph_->DeleteDeadBlock(this);
+  SetGraph(nullptr);
 }
 
 void HBasicBlock::MergeWith(HBasicBlock* other) {
-  DCHECK(successors_.IsEmpty()) << "Unimplemented block merge scenario";
-  DCHECK(dominated_blocks_.IsEmpty()
-         || (dominated_blocks_.Size() == 1 && dominated_blocks_.Get(0) == other))
-      << "Unimplemented block merge scenario";
+  DCHECK_EQ(GetGraph(), other->GetGraph());
+  DCHECK(GetDominatedBlocks().Contains(other));
+  DCHECK_EQ(GetSuccessors().Size(), 1u);
+  DCHECK_EQ(GetSuccessors().Get(0), other);
+  DCHECK_EQ(other->GetPredecessors().Size(), 1u);
+  DCHECK_EQ(other->GetPredecessors().Get(0), this);
   DCHECK(other->GetPhis().IsEmpty());
 
-  successors_.Reset();
-  dominated_blocks_.Reset();
+  // Move instructions from `other` to `this`.
+  DCHECK(EndsWithControlFlowInstruction());
+  RemoveInstruction(GetLastInstruction());
   instructions_.Add(other->GetInstructions());
-  other->GetInstructions().SetBlockOfInstructions(this);
+  other->instructions_.SetBlockOfInstructions(this);
+  other->instructions_.Clear();
 
-  while (!other->GetSuccessors().IsEmpty()) {
-    HBasicBlock* successor = other->GetSuccessors().Get(0);
+  // Remove `other` from the loops it is included in.
+  for (HLoopInformationOutwardIterator it(*other); !it.Done(); it.Advance()) {
+    HLoopInformation* loop_info = it.Current();
+    loop_info->Remove(other);
+    if (loop_info->IsBackEdge(*other)) {
+      loop_info->ReplaceBackEdge(other, this);
+    }
+  }
+
+  // Update links to the successors of `other`.
+  successors_.Reset();
+  while (!other->successors_.IsEmpty()) {
+    HBasicBlock* successor = other->successors_.Get(0);
     successor->ReplacePredecessor(other, this);
   }
 
+  // Update the dominator tree.
+  dominated_blocks_.Delete(other);
+  for (size_t i = 0, e = other->GetDominatedBlocks().Size(); i < e; ++i) {
+    HBasicBlock* dominated = other->GetDominatedBlocks().Get(i);
+    dominated_blocks_.Add(dominated);
+    dominated->SetDominator(this);
+  }
+  other->dominated_blocks_.Reset();
+  other->dominator_ = nullptr;
+
+  // Clear the list of predecessors of `other` in preparation of deleting it.
+  other->predecessors_.Reset();
+
+  // Delete `other` from the graph. The function updates reverse post order.
+  graph_->DeleteDeadBlock(other);
+  other->SetGraph(nullptr);
+}
+
+void HBasicBlock::MergeWithInlined(HBasicBlock* other) {
+  DCHECK_NE(GetGraph(), other->GetGraph());
+  DCHECK(GetDominatedBlocks().IsEmpty());
+  DCHECK(GetSuccessors().IsEmpty());
+  DCHECK(!EndsWithControlFlowInstruction());
+  DCHECK_EQ(other->GetPredecessors().Size(), 1u);
+  DCHECK(other->GetPredecessors().Get(0)->IsEntryBlock());
+  DCHECK(other->GetPhis().IsEmpty());
+  DCHECK(!other->IsInLoop());
+
+  // Move instructions from `other` to `this`.
+  instructions_.Add(other->GetInstructions());
+  other->instructions_.SetBlockOfInstructions(this);
+
+  // Update links to the successors of `other`.
+  successors_.Reset();
+  while (!other->successors_.IsEmpty()) {
+    HBasicBlock* successor = other->successors_.Get(0);
+    successor->ReplacePredecessor(other, this);
+  }
+
+  // Update the dominator tree.
   for (size_t i = 0, e = other->GetDominatedBlocks().Size(); i < e; ++i) {
     HBasicBlock* dominated = other->GetDominatedBlocks().Get(i);
     dominated_blocks_.Add(dominated);
@@ -973,6 +1276,24 @@
   }
 }
 
+void HGraph::DeleteDeadBlock(HBasicBlock* block) {
+  DCHECK_EQ(block->GetGraph(), this);
+  DCHECK(block->GetSuccessors().IsEmpty());
+  DCHECK(block->GetPredecessors().IsEmpty());
+  DCHECK(block->GetDominatedBlocks().IsEmpty());
+  DCHECK(block->GetDominator() == nullptr);
+
+  for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+    block->RemoveInstruction(it.Current());
+  }
+  for (HBackwardInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+    block->RemovePhi(it.Current()->AsPhi());
+  }
+
+  reverse_post_order_.Delete(block);
+  blocks_.Put(block->GetBlockId(), nullptr);
+}
+
 void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
   if (GetBlocks().Size() == 3) {
     // Simple case of an entry block, a body block, and an exit block.
@@ -1005,7 +1326,7 @@
 
     HBasicBlock* first = entry_block_->GetSuccessors().Get(0);
     DCHECK(!first->IsInLoop());
-    at->MergeWith(first);
+    at->MergeWithInlined(first);
     exit_block_->ReplaceWith(to);
 
     // Update all predecessors of the exit block (now the `to` block)
@@ -1094,11 +1415,9 @@
         loop_it.Current()->Add(to);
       }
       if (info->IsBackEdge(*at)) {
-        // Only `at` can become a back edge, as the inlined blocks
-        // are predecessors of `at`.
-        DCHECK_EQ(1u, info->NumberOfBackEdges());
-        info->ClearBackEdges();
-        info->AddBackEdge(to);
+        // Only `to` can become a back edge, as the inlined blocks
+        // are predecessors of `to`.
+        info->ReplaceBackEdge(at, to);
       }
     }
   }
@@ -1113,7 +1432,7 @@
   // - Remove suspend checks, that hold an environment.
   // We must do this after the other blocks have been inlined, otherwise ids of
   // constants could overlap with the inner graph.
-  int parameter_index = 0;
+  size_t parameter_index = 0;
   for (HInstructionIterator it(entry_block_->GetInstructions()); !it.Done(); it.Advance()) {
     HInstruction* current = it.Current();
     if (current->IsNullConstant()) {
@@ -1122,10 +1441,19 @@
       current->ReplaceWith(outer_graph->GetIntConstant(current->AsIntConstant()->GetValue()));
     } else if (current->IsLongConstant()) {
       current->ReplaceWith(outer_graph->GetLongConstant(current->AsLongConstant()->GetValue()));
-    } else if (current->IsFloatConstant() || current->IsDoubleConstant()) {
-      // TODO: Don't duplicate floating-point constants.
-      current->MoveBefore(outer_graph->GetEntryBlock()->GetLastInstruction());
+    } else if (current->IsFloatConstant()) {
+      current->ReplaceWith(outer_graph->GetFloatConstant(current->AsFloatConstant()->GetValue()));
+    } else if (current->IsDoubleConstant()) {
+      current->ReplaceWith(outer_graph->GetDoubleConstant(current->AsDoubleConstant()->GetValue()));
     } else if (current->IsParameterValue()) {
+      if (kIsDebugBuild
+          && invoke->IsInvokeStaticOrDirect()
+          && invoke->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck()) {
+        // Ensure we do not use the last input of `invoke`, as it
+        // contains a clinit check which is not an actual argument.
+        size_t last_input_index = invoke->InputCount() - 1;
+        DCHECK(parameter_index != last_input_index);
+      }
       current->ReplaceWith(invoke->InputAt(parameter_index++));
     } else {
       DCHECK(current->IsGoto() || current->IsSuspendCheck());
@@ -1137,53 +1465,6 @@
   invoke->GetBlock()->RemoveInstruction(invoke);
 }
 
-void HGraph::MergeEmptyBranches(HBasicBlock* start_block, HBasicBlock* end_block) {
-  // Find the two branches of an If.
-  DCHECK_EQ(start_block->GetSuccessors().Size(), 2u);
-  HBasicBlock* left_branch = start_block->GetSuccessors().Get(0);
-  HBasicBlock* right_branch = start_block->GetSuccessors().Get(1);
-
-  // Make sure this is a diamond control-flow path.
-  DCHECK_EQ(left_branch->GetSuccessors().Get(0), end_block);
-  DCHECK_EQ(right_branch->GetSuccessors().Get(0), end_block);
-  DCHECK_EQ(end_block->GetPredecessors().Size(), 2u);
-  DCHECK_EQ(start_block, end_block->GetDominator());
-
-  // Disconnect the branches and merge the two blocks. This will move
-  // all instructions from 'end_block' to 'start_block'.
-  DCHECK(left_branch->IsSingleGoto());
-  DCHECK(right_branch->IsSingleGoto());
-  left_branch->DisconnectFromAll();
-  right_branch->DisconnectFromAll();
-  start_block->RemoveInstruction(start_block->GetLastInstruction());
-  start_block->MergeWith(end_block);
-
-  // Delete the now redundant blocks from the graph.
-  blocks_.Put(left_branch->GetBlockId(), nullptr);
-  blocks_.Put(right_branch->GetBlockId(), nullptr);
-  blocks_.Put(end_block->GetBlockId(), nullptr);
-
-  // Update reverse post order.
-  reverse_post_order_.Delete(left_branch);
-  reverse_post_order_.Delete(right_branch);
-  reverse_post_order_.Delete(end_block);
-
-  // Update loops which contain the code.
-  for (HLoopInformationOutwardIterator it(*start_block); !it.Done(); it.Advance()) {
-    HLoopInformation* loop_info = it.Current();
-    DCHECK(loop_info->Contains(*left_branch));
-    DCHECK(loop_info->Contains(*right_branch));
-    DCHECK(loop_info->Contains(*end_block));
-    loop_info->Remove(left_branch);
-    loop_info->Remove(right_branch);
-    loop_info->Remove(end_block);
-    if (loop_info->IsBackEdge(*end_block)) {
-      loop_info->RemoveBackEdge(end_block);
-      loop_info->AddBackEdge(start_block);
-    }
-  }
-}
-
 std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) {
   ScopedObjectAccess soa(Thread::Current());
   os << "["
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index b89487f..cb2e5cc 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -48,6 +48,7 @@
 class HSuspendCheck;
 class LiveInterval;
 class LocationSummary;
+class SlowPathCode;
 class SsaBuilder;
 
 static const int kDefaultNumberOfBlocks = 8;
@@ -97,6 +98,9 @@
   void AddAfter(HInstruction* cursor, const HInstructionList& instruction_list);
   void Add(const HInstructionList& instruction_list);
 
+  // Return the number of instructions in the list. This is an expensive operation.
+  size_t CountSize() const;
+
  private:
   HInstruction* first_instruction_;
   HInstruction* last_instruction_;
@@ -113,7 +117,11 @@
 // Control-flow graph of a method. Contains a list of basic blocks.
 class HGraph : public ArenaObject<kArenaAllocMisc> {
  public:
-  HGraph(ArenaAllocator* arena, bool debuggable = false, int start_instruction_id = 0)
+  HGraph(ArenaAllocator* arena,
+         const DexFile& dex_file,
+         uint32_t method_idx,
+         bool debuggable = false,
+         int start_instruction_id = 0)
       : arena_(arena),
         blocks_(arena, kDefaultNumberOfBlocks),
         reverse_post_order_(arena, kDefaultNumberOfBlocks),
@@ -124,12 +132,16 @@
         number_of_vregs_(0),
         number_of_in_vregs_(0),
         temporaries_vreg_slots_(0),
-        has_array_accesses_(false),
+        has_bounds_checks_(false),
         debuggable_(debuggable),
         current_instruction_id_(start_instruction_id),
+        dex_file_(dex_file),
+        method_idx_(method_idx),
         cached_null_constant_(nullptr),
         cached_int_constants_(std::less<int32_t>(), arena->Adapter()),
-        cached_long_constants_(std::less<int64_t>(), arena->Adapter()) {}
+        cached_float_constants_(std::less<int32_t>(), arena->Adapter()),
+        cached_long_constants_(std::less<int64_t>(), arena->Adapter()),
+        cached_double_constants_(std::less<int64_t>(), arena->Adapter()) {}
 
   ArenaAllocator* GetArena() const { return arena_; }
   const GrowableArray<HBasicBlock*>& GetBlocks() const { return blocks_; }
@@ -168,7 +180,8 @@
   // Inline this graph in `outer_graph`, replacing the given `invoke` instruction.
   void InlineInto(HGraph* outer_graph, HInvoke* invoke);
 
-  void MergeEmptyBranches(HBasicBlock* start_block, HBasicBlock* end_block);
+  // Removes `block` from the graph.
+  void DeleteDeadBlock(HBasicBlock* block);
 
   void SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor);
   void SimplifyLoop(HBasicBlock* header);
@@ -226,19 +239,19 @@
     return linear_order_;
   }
 
-  bool HasArrayAccesses() const {
-    return has_array_accesses_;
+  bool HasBoundsChecks() const {
+    return has_bounds_checks_;
   }
 
-  void SetHasArrayAccesses(bool value) {
-    has_array_accesses_ = value;
+  void SetHasBoundsChecks(bool value) {
+    has_bounds_checks_ = value;
   }
 
   bool IsDebuggable() const { return debuggable_; }
 
   // Returns a constant of the given type and value. If it does not exist
-  // already, it is created and inserted into the graph. Only integral types
-  // are currently supported.
+  // already, it is created and inserted into the graph. This method is only for
+  // integral types.
   HConstant* GetConstant(Primitive::Type type, int64_t value);
   HNullConstant* GetNullConstant();
   HIntConstant* GetIntConstant(int32_t value) {
@@ -247,9 +260,24 @@
   HLongConstant* GetLongConstant(int64_t value) {
     return CreateConstant(value, &cached_long_constants_);
   }
+  HFloatConstant* GetFloatConstant(float value) {
+    return CreateConstant(bit_cast<int32_t, float>(value), &cached_float_constants_);
+  }
+  HDoubleConstant* GetDoubleConstant(double value) {
+    return CreateConstant(bit_cast<int64_t, double>(value), &cached_double_constants_);
+  }
+
+  HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const;
+
+  const DexFile& GetDexFile() const {
+    return dex_file_;
+  }
+
+  uint32_t GetMethodIdx() const {
+    return method_idx_;
+  }
 
  private:
-  HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const;
   void VisitBlockForDominatorTree(HBasicBlock* block,
                                   HBasicBlock* predecessor,
                                   GrowableArray<size_t>* visits);
@@ -260,10 +288,34 @@
   void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
   void RemoveDeadBlocks(const ArenaBitVector& visited);
 
-  template <class InstType, typename ValueType>
-  InstType* CreateConstant(ValueType value, ArenaSafeMap<ValueType, InstType*>* cache);
+  template <class InstructionType, typename ValueType>
+  InstructionType* CreateConstant(ValueType value,
+                                  ArenaSafeMap<ValueType, InstructionType*>* cache) {
+    // Try to find an existing constant of the given value.
+    InstructionType* constant = nullptr;
+    auto cached_constant = cache->find(value);
+    if (cached_constant != cache->end()) {
+      constant = cached_constant->second;
+    }
+
+    // If not found or previously deleted, create and cache a new instruction.
+    if (constant == nullptr || constant->GetBlock() == nullptr) {
+      constant = new (arena_) InstructionType(value);
+      cache->Overwrite(value, constant);
+      InsertConstant(constant);
+    }
+    return constant;
+  }
+
   void InsertConstant(HConstant* instruction);
 
+  // Cache a float constant into the graph. This method should only be
+  // called by the SsaBuilder when creating "equivalent" instructions.
+  void CacheFloatConstant(HFloatConstant* constant);
+
+  // See CacheFloatConstant comment.
+  void CacheDoubleConstant(HDoubleConstant* constant);
+
   ArenaAllocator* const arena_;
 
   // List of blocks in insertion order.
@@ -290,8 +342,8 @@
   // Number of vreg size slots that the temporaries use (used in baseline compiler).
   size_t temporaries_vreg_slots_;
 
-  // Has array accesses. We can totally skip BCE if it's false.
-  bool has_array_accesses_;
+  // Has bounds checks. We can totally skip BCE if it's false.
+  bool has_bounds_checks_;
 
   // Indicates whether the graph should be compiled in a way that
   // ensures full debuggability. If false, we can apply more
@@ -301,11 +353,20 @@
   // The current id to assign to a newly added instruction. See HInstruction.id_.
   int32_t current_instruction_id_;
 
-  // Cached common constants often needed by optimization passes.
+  // The dex file from which the method is from.
+  const DexFile& dex_file_;
+
+  // The method index in the dex file.
+  const uint32_t method_idx_;
+
+  // Cached constants.
   HNullConstant* cached_null_constant_;
   ArenaSafeMap<int32_t, HIntConstant*> cached_int_constants_;
+  ArenaSafeMap<int32_t, HFloatConstant*> cached_float_constants_;
   ArenaSafeMap<int64_t, HLongConstant*> cached_long_constants_;
+  ArenaSafeMap<int64_t, HDoubleConstant*> cached_double_constants_;
 
+  friend class SsaBuilder;           // For caching constants.
   friend class SsaLivenessAnalysis;  // For the linear order.
   ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
   DISALLOW_COPY_AND_ASSIGN(HGraph);
@@ -357,14 +418,30 @@
     return back_edges_;
   }
 
-  void ClearBackEdges() {
-    back_edges_.Reset();
+  // Returns the lifetime position of the back edge that has the
+  // greatest lifetime position.
+  size_t GetLifetimeEnd() const;
+
+  void ReplaceBackEdge(HBasicBlock* existing, HBasicBlock* new_back_edge) {
+    for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) {
+      if (back_edges_.Get(i) == existing) {
+        back_edges_.Put(i, new_back_edge);
+        return;
+      }
+    }
+    UNREACHABLE();
   }
 
-  // Find blocks that are part of this loop. Returns whether the loop is a natural loop,
+  // Finds blocks that are part of this loop. Returns whether the loop is a natural loop,
   // that is the header dominates the back edge.
   bool Populate();
 
+  // Reanalyzes the loop by removing loop info from its blocks and re-running
+  // Populate(). If there are no back edges left, the loop info is completely
+  // removed as well as its SuspendCheck instruction. It must be run on nested
+  // inner loops first.
+  void Update();
+
   // Returns whether this loop information contains `block`.
   // Note that this loop information *must* be populated before entering this function.
   bool Contains(const HBasicBlock& block) const;
@@ -451,6 +528,7 @@
   HBasicBlock* GetDominator() const { return dominator_; }
   void SetDominator(HBasicBlock* dominator) { dominator_ = dominator; }
   void AddDominatedBlock(HBasicBlock* block) { dominated_blocks_.Add(block); }
+  void RemoveDominatedBlock(HBasicBlock* block) { dominated_blocks_.Delete(block); }
   void ReplaceDominatedBlock(HBasicBlock* existing, HBasicBlock* new_block) {
     for (size_t i = 0, e = dominated_blocks_.Size(); i < e; ++i) {
       if (dominated_blocks_.Get(i) == existing) {
@@ -520,6 +598,13 @@
     predecessors_.Put(1, temp);
   }
 
+  void SwapSuccessors() {
+    DCHECK_EQ(successors_.Size(), 2u);
+    HBasicBlock* temp = successors_.Get(0);
+    successors_.Put(0, successors_.Get(1));
+    successors_.Put(1, temp);
+  }
+
   size_t GetPredecessorIndexOf(HBasicBlock* predecessor) {
     for (size_t i = 0, e = predecessors_.Size(); i < e; ++i) {
       if (predecessors_.Get(i) == predecessor) {
@@ -550,7 +635,7 @@
   // that this method does not update the graph, reverse post order, loop
   // information, nor make sure the blocks are consistent (for example ending
   // with a control flow instruction).
-  void MergeWith(HBasicBlock* other);
+  void MergeWithInlined(HBasicBlock* other);
 
   // Replace `this` with `other`. Predecessors, successors, and dominated blocks
   // of `this` are moved to `other`.
@@ -559,15 +644,22 @@
   // with a control flow instruction).
   void ReplaceWith(HBasicBlock* other);
 
-  // Disconnects `this` from all its predecessors, successors and the dominator.
-  // It assumes that `this` does not dominate any blocks.
-  // Note that this method does not update the graph, reverse post order, loop
-  // information, nor make sure the blocks are consistent (for example ending
-  // with a control flow instruction).
-  void DisconnectFromAll();
+  // Merge `other` at the end of `this`. This method updates loops, reverse post
+  // order, links to predecessors, successors, dominators and deletes the block
+  // from the graph. The two blocks must be successive, i.e. `this` the only
+  // predecessor of `other` and vice versa.
+  void MergeWith(HBasicBlock* other);
+
+  // Disconnects `this` from all its predecessors, successors and dominator,
+  // removes it from all loops it is included in and eventually from the graph.
+  // The block must not dominate any other block. Predecessors and successors
+  // are safely updated.
+  void DisconnectAndDelete();
 
   void AddInstruction(HInstruction* instruction);
+  // Insert `instruction` before/after an existing instruction `cursor`.
   void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor);
+  void InsertInstructionAfter(HInstruction* instruction, HInstruction* cursor);
   // Replace instruction `initial` with `replacement` within this block.
   void ReplaceAndRemoveInstructionWith(HInstruction* initial,
                                        HInstruction* replacement);
@@ -578,9 +670,10 @@
   // instruction is not in use and removes it from the use lists of its inputs.
   void RemoveInstruction(HInstruction* instruction, bool ensure_safety = true);
   void RemovePhi(HPhi* phi, bool ensure_safety = true);
+  void RemoveInstructionOrPhi(HInstruction* instruction, bool ensure_safety = true);
 
   bool IsLoopHeader() const {
-    return (loop_information_ != nullptr) && (loop_information_->GetHeader() == this);
+    return IsInLoop() && (loop_information_->GetHeader() == this);
   }
 
   bool IsLoopPreHeaderFirstPredecessor() const {
@@ -599,7 +692,7 @@
   void SetInLoop(HLoopInformation* info) {
     if (IsLoopHeader()) {
       // Nothing to do. This just means `info` is an outer loop.
-    } else if (loop_information_ == nullptr) {
+    } else if (!IsInLoop()) {
       loop_information_ = info;
     } else if (loop_information_->Contains(*info->GetHeader())) {
       // Block is currently part of an outer loop. Make it part of this inner loop.
@@ -620,7 +713,7 @@
 
   bool IsInLoop() const { return loop_information_ != nullptr; }
 
-  // Returns wheter this block dominates the blocked passed as parameter.
+  // Returns whether this block dominates the blocked passed as parameter.
   bool Dominates(HBasicBlock* block) const;
 
   size_t GetLifetimeStart() const { return lifetime_start_; }
@@ -671,7 +764,7 @@
 
   void Advance() {
     DCHECK(!Done());
-    current_ = current_->GetHeader()->GetDominator()->GetLoopInformation();
+    current_ = current_->GetPreHeader()->GetLoopInformation();
   }
 
   HLoopInformation* Current() const {
@@ -784,13 +877,14 @@
   HUseListNode* GetNext() const { return next_; }
   T GetUser() const { return user_; }
   size_t GetIndex() const { return index_; }
+  void SetIndex(size_t index) { index_ = index; }
 
  private:
   HUseListNode(T user, size_t index)
       : user_(user), index_(index), prev_(nullptr), next_(nullptr) {}
 
   T const user_;
-  const size_t index_;
+  size_t index_;
   HUseListNode<T>* prev_;
   HUseListNode<T>* next_;
 
@@ -861,6 +955,14 @@
     return first_ != nullptr && first_->next_ == nullptr;
   }
 
+  size_t SizeSlow() const {
+    size_t count = 0;
+    for (HUseListNode<T>* current = first_; current != nullptr; current = current->GetNext()) {
+      ++count;
+    }
+    return count;
+  }
+
  private:
   HUseListNode<T>* first_;
 };
@@ -987,15 +1089,47 @@
 // A HEnvironment object contains the values of virtual registers at a given location.
 class HEnvironment : public ArenaObject<kArenaAllocMisc> {
  public:
-  HEnvironment(ArenaAllocator* arena, size_t number_of_vregs)
-     : vregs_(arena, number_of_vregs) {
+  HEnvironment(ArenaAllocator* arena,
+               size_t number_of_vregs,
+               const DexFile& dex_file,
+               uint32_t method_idx,
+               uint32_t dex_pc)
+     : vregs_(arena, number_of_vregs),
+       locations_(arena, number_of_vregs),
+       parent_(nullptr),
+       dex_file_(dex_file),
+       method_idx_(method_idx),
+       dex_pc_(dex_pc) {
     vregs_.SetSize(number_of_vregs);
     for (size_t i = 0; i < number_of_vregs; i++) {
       vregs_.Put(i, HUserRecord<HEnvironment*>());
     }
+
+    locations_.SetSize(number_of_vregs);
+    for (size_t i = 0; i < number_of_vregs; ++i) {
+      locations_.Put(i, Location());
+    }
   }
 
-  void CopyFrom(HEnvironment* env);
+  void SetAndCopyParentChain(ArenaAllocator* allocator, HEnvironment* parent) {
+    parent_ = new (allocator) HEnvironment(allocator,
+                                           parent->Size(),
+                                           parent->GetDexFile(),
+                                           parent->GetMethodIdx(),
+                                           parent->GetDexPc());
+    if (parent->GetParent() != nullptr) {
+      parent_->SetAndCopyParentChain(allocator, parent->GetParent());
+    }
+    parent_->CopyFrom(parent);
+  }
+
+  void CopyFrom(const GrowableArray<HInstruction*>& locals);
+  void CopyFrom(HEnvironment* environment);
+
+  // Copy from `env`. If it's a loop phi for `loop_header`, copy the first
+  // input to the loop phi instead. This is for inserting instructions that
+  // require an environment (like HDeoptimization) in the loop pre-header.
+  void CopyFromWithLoopPhiAdjustment(HEnvironment* env, HBasicBlock* loop_header);
 
   void SetRawEnvAt(size_t index, HInstruction* instruction) {
     vregs_.Put(index, HUserRecord<HEnvironment*>(instruction));
@@ -1009,6 +1143,28 @@
 
   size_t Size() const { return vregs_.Size(); }
 
+  HEnvironment* GetParent() const { return parent_; }
+
+  void SetLocationAt(size_t index, Location location) {
+    locations_.Put(index, location);
+  }
+
+  Location GetLocationAt(size_t index) const {
+    return locations_.Get(index);
+  }
+
+  uint32_t GetDexPc() const {
+    return dex_pc_;
+  }
+
+  uint32_t GetMethodIdx() const {
+    return method_idx_;
+  }
+
+  const DexFile& GetDexFile() const {
+    return dex_file_;
+  }
+
  private:
   // Record instructions' use entries of this environment for constant-time removal.
   // It should only be called by HInstruction when a new environment use is added.
@@ -1019,8 +1175,13 @@
   }
 
   GrowableArray<HUserRecord<HEnvironment*> > vregs_;
+  GrowableArray<Location> locations_;
+  HEnvironment* parent_;
+  const DexFile& dex_file_;
+  const uint32_t method_idx_;
+  const uint32_t dex_pc_;
 
-  friend HInstruction;
+  friend class HInstruction;
 
   DISALLOW_COPY_AND_ASSIGN(HEnvironment);
 };
@@ -1150,6 +1311,11 @@
   }
 
   virtual bool NeedsEnvironment() const { return false; }
+  virtual uint32_t GetDexPc() const {
+    LOG(FATAL) << "GetDexPc() cannot be called on an instruction that"
+                  " does not need an environment";
+    UNREACHABLE();
+  }
   virtual bool IsControlFlow() const { return false; }
   virtual bool CanThrow() const { return false; }
   bool HasSideEffects() const { return side_effects_.HasSideEffects(); }
@@ -1227,8 +1393,31 @@
   // copying, the uses lists are being updated.
   void CopyEnvironmentFrom(HEnvironment* environment) {
     ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena();
-    environment_ = new (allocator) HEnvironment(allocator, environment->Size());
+    environment_ = new (allocator) HEnvironment(
+        allocator,
+        environment->Size(),
+        environment->GetDexFile(),
+        environment->GetMethodIdx(),
+        environment->GetDexPc());
     environment_->CopyFrom(environment);
+    if (environment->GetParent() != nullptr) {
+      environment_->SetAndCopyParentChain(allocator, environment->GetParent());
+    }
+  }
+
+  void CopyEnvironmentFromWithLoopPhiAdjustment(HEnvironment* environment,
+                                                HBasicBlock* block) {
+    ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena();
+    environment_ = new (allocator) HEnvironment(
+        allocator,
+        environment->Size(),
+        environment->GetDexFile(),
+        environment->GetMethodIdx(),
+        environment->GetDexPc());
+    if (environment->GetParent() != nullptr) {
+      environment_->SetAndCopyParentChain(allocator, environment->GetParent());
+    }
+    environment_->CopyFromWithLoopPhiAdjustment(environment, block);
   }
 
   // Returns the number of entries in the environment. Typically, that is the
@@ -1604,7 +1793,7 @@
 
   bool NeedsEnvironment() const OVERRIDE { return true; }
   bool CanThrow() const OVERRIDE { return true; }
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   DECLARE_INSTRUCTION(Deoptimize);
 
@@ -2008,28 +2197,30 @@
   size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
 
   bool IsMinusOne() const OVERRIDE {
-    return bit_cast<uint32_t, float>(AsFloatConstant()->GetValue()) ==
-        bit_cast<uint32_t, float>((-1.0f));
+    return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>((-1.0f));
   }
   bool IsZero() const OVERRIDE {
-    return AsFloatConstant()->GetValue() == 0.0f;
+    return value_ == 0.0f;
   }
   bool IsOne() const OVERRIDE {
-    return bit_cast<uint32_t, float>(AsFloatConstant()->GetValue()) ==
-        bit_cast<uint32_t, float>(1.0f);
+    return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>(1.0f);
+  }
+  bool IsNaN() const {
+    return std::isnan(value_);
   }
 
   DECLARE_INSTRUCTION(FloatConstant);
 
  private:
   explicit HFloatConstant(float value) : HConstant(Primitive::kPrimFloat), value_(value) {}
+  explicit HFloatConstant(int32_t value)
+      : HConstant(Primitive::kPrimFloat), value_(bit_cast<float, int32_t>(value)) {}
 
   const float value_;
 
-  // Only the SsaBuilder can currently create floating-point constants. If we
-  // ever need to create them later in the pipeline, we will have to handle them
-  // the same way as integral constants.
+  // Only the SsaBuilder and HGraph can create floating-point constants.
   friend class SsaBuilder;
+  friend class HGraph;
   DISALLOW_COPY_AND_ASSIGN(HFloatConstant);
 };
 
@@ -2045,28 +2236,30 @@
   size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
 
   bool IsMinusOne() const OVERRIDE {
-    return bit_cast<uint64_t, double>(AsDoubleConstant()->GetValue()) ==
-        bit_cast<uint64_t, double>((-1.0));
+    return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>((-1.0));
   }
   bool IsZero() const OVERRIDE {
-    return AsDoubleConstant()->GetValue() == 0.0;
+    return value_ == 0.0;
   }
   bool IsOne() const OVERRIDE {
-    return bit_cast<uint64_t, double>(AsDoubleConstant()->GetValue()) ==
-        bit_cast<uint64_t, double>(1.0);
+    return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>(1.0);
+  }
+  bool IsNaN() const {
+    return std::isnan(value_);
   }
 
   DECLARE_INSTRUCTION(DoubleConstant);
 
  private:
   explicit HDoubleConstant(double value) : HConstant(Primitive::kPrimDouble), value_(value) {}
+  explicit HDoubleConstant(int64_t value)
+      : HConstant(Primitive::kPrimDouble), value_(bit_cast<double, int64_t>(value)) {}
 
   const double value_;
 
-  // Only the SsaBuilder can currently create floating-point constants. If we
-  // ever need to create them later in the pipeline, we will have to handle them
-  // the same way as integral constants.
+  // Only the SsaBuilder and HGraph can create floating-point constants.
   friend class SsaBuilder;
+  friend class HGraph;
   DISALLOW_COPY_AND_ASSIGN(HDoubleConstant);
 };
 
@@ -2163,9 +2356,15 @@
     SetRawInputAt(index, argument);
   }
 
+  // Return the number of arguments.  This number can be lower than
+  // the number of inputs returned by InputCount(), as some invoke
+  // instructions (e.g. HInvokeStaticOrDirect) can have non-argument
+  // inputs at the end of their list of inputs.
+  uint32_t GetNumberOfArguments() const { return number_of_arguments_; }
+
   Primitive::Type GetType() const OVERRIDE { return return_type_; }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   uint32_t GetDexMethodIndex() const { return dex_method_index_; }
 
@@ -2182,16 +2381,19 @@
  protected:
   HInvoke(ArenaAllocator* arena,
           uint32_t number_of_arguments,
+          uint32_t number_of_other_inputs,
           Primitive::Type return_type,
           uint32_t dex_pc,
           uint32_t dex_method_index)
     : HInstruction(SideEffects::All()),
+      number_of_arguments_(number_of_arguments),
       inputs_(arena, number_of_arguments),
       return_type_(return_type),
       dex_pc_(dex_pc),
       dex_method_index_(dex_method_index),
       intrinsic_(Intrinsics::kNone) {
-    inputs_.SetSize(number_of_arguments);
+    uint32_t number_of_inputs = number_of_arguments + number_of_other_inputs;
+    inputs_.SetSize(number_of_inputs);
   }
 
   const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { return inputs_.Get(i); }
@@ -2199,6 +2401,7 @@
     inputs_.Put(index, input);
   }
 
+  uint32_t number_of_arguments_;
   GrowableArray<HUserRecord<HInstruction*> > inputs_;
   const Primitive::Type return_type_;
   const uint32_t dex_pc_;
@@ -2211,18 +2414,35 @@
 
 class HInvokeStaticOrDirect : public HInvoke {
  public:
+  // Requirements of this method call regarding the class
+  // initialization (clinit) check of its declaring class.
+  enum class ClinitCheckRequirement {
+    kNone,      // Class already initialized.
+    kExplicit,  // Static call having explicit clinit check as last input.
+    kImplicit,  // Static call implicitly requiring a clinit check.
+  };
+
   HInvokeStaticOrDirect(ArenaAllocator* arena,
                         uint32_t number_of_arguments,
                         Primitive::Type return_type,
                         uint32_t dex_pc,
                         uint32_t dex_method_index,
                         bool is_recursive,
+                        int32_t string_init_offset,
                         InvokeType original_invoke_type,
-                        InvokeType invoke_type)
-      : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
+                        InvokeType invoke_type,
+                        ClinitCheckRequirement clinit_check_requirement)
+      : HInvoke(arena,
+                number_of_arguments,
+                clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u,
+                return_type,
+                dex_pc,
+                dex_method_index),
         original_invoke_type_(original_invoke_type),
         invoke_type_(invoke_type),
-        is_recursive_(is_recursive) {}
+        is_recursive_(is_recursive),
+        clinit_check_requirement_(clinit_check_requirement),
+        string_init_offset_(string_init_offset) {}
 
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
     UNUSED(obj);
@@ -2235,13 +2455,67 @@
   InvokeType GetInvokeType() const { return invoke_type_; }
   bool IsRecursive() const { return is_recursive_; }
   bool NeedsDexCache() const OVERRIDE { return !IsRecursive(); }
+  bool IsStringInit() const { return string_init_offset_ != 0; }
+  int32_t GetStringInitOffset() const { return string_init_offset_; }
+
+  // Is this instruction a call to a static method?
+  bool IsStatic() const {
+    return GetInvokeType() == kStatic;
+  }
+
+  // Remove the art::HLoadClass instruction set as last input by
+  // art::PrepareForRegisterAllocation::VisitClinitCheck in lieu of
+  // the initial art::HClinitCheck instruction (only relevant for
+  // static calls with explicit clinit check).
+  void RemoveLoadClassAsLastInput() {
+    DCHECK(IsStaticWithExplicitClinitCheck());
+    size_t last_input_index = InputCount() - 1;
+    HInstruction* last_input = InputAt(last_input_index);
+    DCHECK(last_input != nullptr);
+    DCHECK(last_input->IsLoadClass()) << last_input->DebugName();
+    RemoveAsUserOfInput(last_input_index);
+    inputs_.DeleteAt(last_input_index);
+    clinit_check_requirement_ = ClinitCheckRequirement::kImplicit;
+    DCHECK(IsStaticWithImplicitClinitCheck());
+  }
+
+  // Is this a call to a static method whose declaring class has an
+  // explicit intialization check in the graph?
+  bool IsStaticWithExplicitClinitCheck() const {
+    return IsStatic() && (clinit_check_requirement_ == ClinitCheckRequirement::kExplicit);
+  }
+
+  // Is this a call to a static method whose declaring class has an
+  // implicit intialization check requirement?
+  bool IsStaticWithImplicitClinitCheck() const {
+    return IsStatic() && (clinit_check_requirement_ == ClinitCheckRequirement::kImplicit);
+  }
 
   DECLARE_INSTRUCTION(InvokeStaticOrDirect);
 
+ protected:
+  const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE {
+    const HUserRecord<HInstruction*> input_record = HInvoke::InputRecordAt(i);
+    if (kIsDebugBuild && IsStaticWithExplicitClinitCheck() && (i == InputCount() - 1)) {
+      HInstruction* input = input_record.GetInstruction();
+      // `input` is the last input of a static invoke marked as having
+      // an explicit clinit check. It must either be:
+      // - an art::HClinitCheck instruction, set by art::HGraphBuilder; or
+      // - an art::HLoadClass instruction, set by art::PrepareForRegisterAllocation.
+      DCHECK(input != nullptr);
+      DCHECK(input->IsClinitCheck() || input->IsLoadClass()) << input->DebugName();
+    }
+    return input_record;
+  }
+
  private:
   const InvokeType original_invoke_type_;
   const InvokeType invoke_type_;
   const bool is_recursive_;
+  ClinitCheckRequirement clinit_check_requirement_;
+  // Thread entrypoint offset for string init method if this is a string init invoke.
+  // Note that there are multiple string init methods, each having its own offset.
+  int32_t string_init_offset_;
 
   DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect);
 };
@@ -2254,7 +2528,7 @@
                  uint32_t dex_pc,
                  uint32_t dex_method_index,
                  uint32_t vtable_index)
-      : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
+      : HInvoke(arena, number_of_arguments, 0u, return_type, dex_pc, dex_method_index),
         vtable_index_(vtable_index) {}
 
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
@@ -2280,7 +2554,7 @@
                    uint32_t dex_pc,
                    uint32_t dex_method_index,
                    uint32_t imt_index)
-      : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
+      : HInvoke(arena, number_of_arguments, 0u, return_type, dex_pc, dex_method_index),
         imt_index_(imt_index) {}
 
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
@@ -2307,7 +2581,7 @@
         type_index_(type_index),
         entrypoint_(entrypoint) {}
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
   uint16_t GetTypeIndex() const { return type_index_; }
 
   // Calls runtime so needs an environment.
@@ -2359,7 +2633,7 @@
     SetRawInputAt(0, length);
   }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
   uint16_t GetTypeIndex() const { return type_index_; }
 
   // Calls runtime so needs an environment.
@@ -2454,7 +2728,7 @@
     return (y == -1) ? -x : x / y;
   }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   DECLARE_INSTRUCTION(Div);
 
@@ -2481,7 +2755,7 @@
     return (y == -1) ? 0 : x % y;
   }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   DECLARE_INSTRUCTION(Rem);
 
@@ -2508,7 +2782,7 @@
   bool NeedsEnvironment() const OVERRIDE { return true; }
   bool CanThrow() const OVERRIDE { return true; }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   DECLARE_INSTRUCTION(DivZeroCheck);
 
@@ -2703,11 +2977,15 @@
 
   // Required by the x86 and ARM code generators when producing calls
   // to the runtime.
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   bool CanBeMoved() const OVERRIDE { return true; }
   bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; }
 
+  // Try to statically evaluate the conversion and return a HConstant
+  // containing the result.  If the input cannot be converted, return nullptr.
+  HConstant* TryStaticEvaluation() const;
+
   DECLARE_INSTRUCTION(TypeConversion);
 
  private:
@@ -2746,6 +3024,7 @@
   size_t InputCount() const OVERRIDE { return inputs_.Size(); }
 
   void AddInput(HInstruction* input);
+  void RemoveInputAt(size_t index);
 
   Primitive::Type GetType() const OVERRIDE { return type_; }
   void SetType(Primitive::Type type) { type_ = type; }
@@ -2812,7 +3091,7 @@
 
   bool CanBeNull() const OVERRIDE { return false; }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   DECLARE_INSTRUCTION(NullCheck);
 
@@ -2975,7 +3254,7 @@
 
   bool NeedsTypeCheck() const { return needs_type_check_; }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   HInstruction* GetArray() const { return InputAt(0); }
   HInstruction* GetIndex() const { return InputAt(1); }
@@ -3045,7 +3324,7 @@
 
   bool CanThrow() const OVERRIDE { return true; }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   DECLARE_INSTRUCTION(BoundsCheck);
 
@@ -3085,19 +3364,25 @@
 class HSuspendCheck : public HTemplateInstruction<0> {
  public:
   explicit HSuspendCheck(uint32_t dex_pc)
-      : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc) {}
+      : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc), slow_path_(nullptr) {}
 
   bool NeedsEnvironment() const OVERRIDE {
     return true;
   }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
+  void SetSlowPath(SlowPathCode* slow_path) { slow_path_ = slow_path; }
+  SlowPathCode* GetSlowPath() const { return slow_path_; }
 
   DECLARE_INSTRUCTION(SuspendCheck);
 
  private:
   const uint32_t dex_pc_;
 
+  // Only used for code generation, in order to share the same slow path between back edges
+  // of a same loop.
+  SlowPathCode* slow_path_;
+
   DISALLOW_COPY_AND_ASSIGN(HSuspendCheck);
 };
 
@@ -3124,7 +3409,7 @@
 
   size_t ComputeHashCode() const OVERRIDE { return type_index_; }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
   uint16_t GetTypeIndex() const { return type_index_; }
   bool IsReferrersClass() const { return is_referrers_class_; }
 
@@ -3198,7 +3483,7 @@
 
   size_t ComputeHashCode() const OVERRIDE { return string_index_; }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
   uint32_t GetStringIndex() const { return string_index_; }
 
   // TODO: Can we deopt or debug when we resolve a string?
@@ -3214,7 +3499,6 @@
   DISALLOW_COPY_AND_ASSIGN(HLoadString);
 };
 
-// TODO: Pass this check to HInvokeStaticOrDirect nodes.
 /**
  * Performs an initialization check on its Class object input.
  */
@@ -3237,7 +3521,7 @@
     return true;
   }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   HLoadClass* GetLoadClass() const { return InputAt(0)->AsLoadClass(); }
 
@@ -3337,7 +3621,7 @@
 
   bool CanThrow() const OVERRIDE { return true; }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   DECLARE_INSTRUCTION(Throw);
 
@@ -3371,7 +3655,7 @@
     return false;
   }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   bool IsClassFinal() const { return class_is_final_; }
 
@@ -3446,7 +3730,7 @@
   bool MustDoNullCheck() const { return must_do_null_check_; }
   void ClearMustDoNullCheck() { must_do_null_check_ = false; }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   bool IsClassFinal() const { return class_is_final_; }
 
@@ -3492,7 +3776,7 @@
   bool NeedsEnvironment() const OVERRIDE { return true; }
   bool CanThrow() const OVERRIDE { return true; }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   bool IsEnter() const { return kind_ == kEnter; }
 
diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc
index 4e83ce5..2736453 100644
--- a/compiler/optimizing/nodes_test.cc
+++ b/compiler/optimizing/nodes_test.cc
@@ -16,6 +16,7 @@
 
 #include "base/arena_allocator.h"
 #include "nodes.h"
+#include "optimizing_unit_test.h"
 
 #include "gtest/gtest.h"
 
@@ -29,7 +30,7 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -49,7 +50,8 @@
   first_block->AddSuccessor(exit_block);
   exit_block->AddInstruction(new (&allocator) HExit());
 
-  HEnvironment* environment = new (&allocator) HEnvironment(&allocator, 1);
+  HEnvironment* environment = new (&allocator) HEnvironment(
+      &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0);
   null_check->SetRawEnvironment(environment);
   environment->SetRawEnvAt(0, parameter);
   parameter->AddEnvUseAt(null_check->GetEnvironment(), 0);
@@ -70,7 +72,7 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -96,7 +98,7 @@
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -112,4 +114,51 @@
   ASSERT_TRUE(parameter->GetUses().HasOnlyOneUse());
 }
 
+TEST(Node, ParentEnvironment) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  HGraph* graph = CreateGraph(&allocator);
+  HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+  graph->AddBlock(entry);
+  graph->SetEntryBlock(entry);
+  HInstruction* parameter1 = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* with_environment = new (&allocator) HNullCheck(parameter1, 0);
+  entry->AddInstruction(parameter1);
+  entry->AddInstruction(with_environment);
+  entry->AddInstruction(new (&allocator) HExit());
+
+  ASSERT_TRUE(parameter1->HasUses());
+  ASSERT_TRUE(parameter1->GetUses().HasOnlyOneUse());
+
+  HEnvironment* environment = new (&allocator) HEnvironment(
+      &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0);
+  GrowableArray<HInstruction*> array(&allocator, 1);
+  array.Add(parameter1);
+
+  environment->CopyFrom(array);
+  with_environment->SetRawEnvironment(environment);
+
+  ASSERT_TRUE(parameter1->HasEnvironmentUses());
+  ASSERT_TRUE(parameter1->GetEnvUses().HasOnlyOneUse());
+
+  HEnvironment* parent1 = new (&allocator) HEnvironment(
+      &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0);
+  parent1->CopyFrom(array);
+
+  ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 2u);
+
+  HEnvironment* parent2 = new (&allocator) HEnvironment(
+      &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0);
+  parent2->CopyFrom(array);
+  parent1->SetAndCopyParentChain(&allocator, parent2);
+
+  // One use for parent2, and one other use for the new parent of parent1.
+  ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 4u);
+
+  // We have copied the parent chain. So we now have two more uses.
+  environment->SetAndCopyParentChain(&allocator, parent1);
+  ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 6u);
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index b13e07e..c46a219 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -21,9 +21,9 @@
 
 namespace art {
 
-void HOptimization::MaybeRecordStat(MethodCompilationStat compilation_stat) const {
+void HOptimization::MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count) const {
   if (stats_ != nullptr) {
-    stats_->RecordStat(compilation_stat);
+    stats_->RecordStat(compilation_stat, count);
   }
 }
 
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index 8b20281..ccf8de9 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -48,7 +48,7 @@
   void Check();
 
  protected:
-  void MaybeRecordStat(MethodCompilationStat compilation_stat) const;
+  void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const;
 
   HGraph* const graph_;
   // Used to record stats about the optimization.
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index b2c13ad..7aea249 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -21,6 +21,7 @@
 #include "cfi_test.h"
 #include "gtest/gtest.h"
 #include "optimizing/code_generator.h"
+#include "optimizing/optimizing_unit_test.h"
 #include "utils/assembler.h"
 
 #include "optimizing/optimizing_cfi_test_expected.inc"
@@ -45,10 +46,10 @@
     std::unique_ptr<const InstructionSetFeatures> isa_features;
     std::string error;
     isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
-    HGraph graph(&allocator);
+    HGraph* graph = CreateGraph(&allocator);
     // Generate simple frame with some spills.
     std::unique_ptr<CodeGenerator> code_gen(
-        CodeGenerator::Create(&graph, isa, *isa_features.get(), opts));
+        CodeGenerator::Create(graph, isa, *isa_features.get(), opts));
     const int frame_size = 64;
     int core_reg = 0;
     int fp_reg = 0;
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index d99d359..8bb5d8e 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -320,15 +320,17 @@
                              const DexCompilationUnit& dex_compilation_unit,
                              PassInfoPrinter* pass_info_printer,
                              StackHandleScopeCollection* handles) {
-  HDeadCodeElimination dce1(graph, stats);
-  HDeadCodeElimination dce2(graph, stats, "dead_code_elimination_final");
+  HDeadCodeElimination dce1(graph, stats,
+                            HDeadCodeElimination::kInitialDeadCodeEliminationPassName);
+  HDeadCodeElimination dce2(graph, stats,
+                            HDeadCodeElimination::kFinalDeadCodeEliminationPassName);
   HConstantFolding fold1(graph);
   InstructionSimplifier simplify1(graph, stats);
   HBooleanSimplifier boolean_simplify(graph);
 
   HInliner inliner(graph, dex_compilation_unit, dex_compilation_unit, driver, stats);
 
-  HConstantFolding fold2(graph);
+  HConstantFolding fold2(graph, "constant_folding_after_inlining");
   SideEffectsAnalysis side_effects(graph);
   GVNOptimization gvn(graph, side_effects);
   LICM licm(graph, side_effects);
@@ -512,7 +514,7 @@
 
   ArenaAllocator arena(Runtime::Current()->GetArenaPool());
   HGraph* graph = new (&arena) HGraph(
-      &arena, compiler_driver->GetCompilerOptions().GetDebuggable());
+      &arena, dex_file, method_idx, compiler_driver->GetCompilerOptions().GetDebuggable());
 
   // For testing purposes, we put a special marker on method names that should be compiled
   // with this compiler. This makes sure we're not regressing.
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index e6508c9..b6b1bb1 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -29,25 +29,26 @@
   kCompiledBaseline,
   kCompiledOptimized,
   kCompiledQuick,
-  kInstructionSimplifications,
   kInlinedInvoke,
-  kNotCompiledUnsupportedIsa,
-  kNotCompiledPathological,
+  kInstructionSimplifications,
+  kNotCompiledBranchOutsideMethodCode,
+  kNotCompiledCannotBuildSSA,
+  kNotCompiledCantAccesType,
+  kNotCompiledClassNotVerified,
   kNotCompiledHugeMethod,
   kNotCompiledLargeMethodNoBranches,
-  kNotCompiledCannotBuildSSA,
   kNotCompiledNoCodegen,
-  kNotCompiledUnresolvedMethod,
-  kNotCompiledUnresolvedField,
   kNotCompiledNonSequentialRegPair,
+  kNotCompiledPathological,
   kNotCompiledSpaceFilter,
-  kNotOptimizedTryCatch,
-  kNotOptimizedDisabled,
-  kNotCompiledCantAccesType,
-  kNotOptimizedRegisterAllocator,
   kNotCompiledUnhandledInstruction,
+  kNotCompiledUnresolvedField,
+  kNotCompiledUnresolvedMethod,
+  kNotCompiledUnsupportedIsa,
   kNotCompiledVerifyAtRuntime,
-  kNotCompiledClassNotVerified,
+  kNotOptimizedDisabled,
+  kNotOptimizedRegisterAllocator,
+  kNotOptimizedTryCatch,
   kRemovedCheckedCast,
   kRemovedDeadInstruction,
   kRemovedNullCheck,
@@ -58,8 +59,8 @@
  public:
   OptimizingCompilerStats() {}
 
-  void RecordStat(MethodCompilationStat stat) {
-    compile_stats_[stat]++;
+  void RecordStat(MethodCompilationStat stat, size_t count = 1) {
+    compile_stats_[stat] += count;
   }
 
   void Log() const {
@@ -98,23 +99,24 @@
       case kCompiledQuick : return "kCompiledQuick";
       case kInlinedInvoke : return "kInlinedInvoke";
       case kInstructionSimplifications: return "kInstructionSimplifications";
-      case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa";
-      case kNotCompiledPathological : return "kNotCompiledPathological";
+      case kNotCompiledBranchOutsideMethodCode: return "kNotCompiledBranchOutsideMethodCode";
+      case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA";
+      case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
+      case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified";
       case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod";
       case kNotCompiledLargeMethodNoBranches : return "kNotCompiledLargeMethodNoBranches";
-      case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA";
       case kNotCompiledNoCodegen : return "kNotCompiledNoCodegen";
-      case kNotCompiledUnresolvedMethod : return "kNotCompiledUnresolvedMethod";
-      case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField";
       case kNotCompiledNonSequentialRegPair : return "kNotCompiledNonSequentialRegPair";
-      case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
-      case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch";
-      case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
+      case kNotCompiledPathological : return "kNotCompiledPathological";
       case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter";
-      case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
       case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction";
+      case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField";
+      case kNotCompiledUnresolvedMethod : return "kNotCompiledUnresolvedMethod";
+      case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa";
       case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime";
-      case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified";
+      case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
+      case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
+      case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch";
       case kRemovedCheckedCast: return "kRemovedCheckedCast";
       case kRemovedDeadInstruction: return "kRemovedDeadInstruction";
       case kRemovedNullCheck: return "kRemovedNullCheck";
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 6b23692..4f8ec65 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -72,11 +72,16 @@
   }
 }
 
+inline HGraph* CreateGraph(ArenaAllocator* allocator) {
+  return new (allocator) HGraph(
+      allocator, *reinterpret_cast<DexFile*>(allocator->Alloc(sizeof(DexFile))), -1);
+}
+
 // Create a control-flow graph from Dex instructions.
 inline HGraph* CreateCFG(ArenaAllocator* allocator,
                          const uint16_t* data,
                          Primitive::Type return_type = Primitive::kPrimInt) {
-  HGraph* graph = new (allocator) HGraph(allocator);
+  HGraph* graph = CreateGraph(allocator);
   HGraphBuilder builder(graph, return_type);
   const DexFile::CodeItem* item =
     reinterpret_cast<const DexFile::CodeItem*>(data);
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index f5d8d82..78d1185 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -79,4 +79,26 @@
   }
 }
 
+void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  if (invoke->IsStaticWithExplicitClinitCheck()) {
+    size_t last_input_index = invoke->InputCount() - 1;
+    HInstruction* last_input = invoke->InputAt(last_input_index);
+    DCHECK(last_input->IsLoadClass()) << last_input->DebugName();
+
+    // Remove a load class instruction as last input of a static
+    // invoke, which has been added (along with a clinit check,
+    // removed by PrepareForRegisterAllocation::VisitClinitCheck
+    // previously) by the graph builder during the creation of the
+    // static invoke instruction, but is no longer required at this
+    // stage (i.e., after inlining has been performed).
+    invoke->RemoveLoadClassAsLastInput();
+
+    // If the load class instruction is no longer used, remove it from
+    // the graph.
+    if (!last_input->HasUses()) {
+      last_input->GetBlock()->RemoveInstruction(last_input);
+    }
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index c28507c..d7f277f 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -39,6 +39,7 @@
   void VisitBoundType(HBoundType* bound_type) OVERRIDE;
   void VisitClinitCheck(HClinitCheck* check) OVERRIDE;
   void VisitCondition(HCondition* condition) OVERRIDE;
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
 
   DISALLOW_COPY_AND_ASSIGN(PrepareForRegisterAllocation);
 };
diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc
index 293fde9..c56100d 100644
--- a/compiler/optimizing/pretty_printer_test.cc
+++ b/compiler/optimizing/pretty_printer_test.cc
@@ -30,7 +30,7 @@
 static void TestCode(const uint16_t* data, const char* expected) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   bool graph_built = builder.BuildGraph(*item);
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 0fdf051..925099a 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -768,14 +768,14 @@
     }
   } else {
     DCHECK(!current->IsHighInterval());
-    int hint = current->FindFirstRegisterHint(free_until);
+    int hint = current->FindFirstRegisterHint(free_until, liveness_);
     if (hint != kNoRegister) {
       DCHECK(!IsBlocked(hint));
       reg = hint;
     } else if (current->IsLowInterval()) {
       reg = FindAvailableRegisterPair(free_until, current->GetStart());
     } else {
-      reg = FindAvailableRegister(free_until);
+      reg = FindAvailableRegister(free_until, current);
     }
   }
 
@@ -839,14 +839,52 @@
   return reg;
 }
 
-int RegisterAllocator::FindAvailableRegister(size_t* next_use) const {
+bool RegisterAllocator::IsCallerSaveRegister(int reg) const {
+  return processing_core_registers_
+      ? !codegen_->IsCoreCalleeSaveRegister(reg)
+      : !codegen_->IsFloatingPointCalleeSaveRegister(reg);
+}
+
+int RegisterAllocator::FindAvailableRegister(size_t* next_use, LiveInterval* current) const {
+  // We special case intervals that do not span a safepoint to try to find a caller-save
+  // register if one is available. We iterate from 0 to the number of registers,
+  // so if there are caller-save registers available at the end, we continue the iteration.
+  bool prefers_caller_save = !current->HasWillCallSafepoint();
   int reg = kNoRegister;
-  // Pick the register that is used the last.
   for (size_t i = 0; i < number_of_registers_; ++i) {
-    if (IsBlocked(i)) continue;
-    if (reg == kNoRegister || next_use[i] > next_use[reg]) {
+    if (IsBlocked(i)) {
+      // Register cannot be used. Continue.
+      continue;
+    }
+
+    // Best case: we found a register fully available.
+    if (next_use[i] == kMaxLifetimePosition) {
+      if (prefers_caller_save && !IsCallerSaveRegister(i)) {
+        // We can get shorter encodings on some platforms by using
+        // small register numbers. So only update the candidate if the previous
+        // one was not available for the whole method.
+        if (reg == kNoRegister || next_use[reg] != kMaxLifetimePosition) {
+          reg = i;
+        }
+        // Continue the iteration in the hope of finding a caller save register.
+        continue;
+      } else {
+        reg = i;
+        // We know the register is good enough. Return it.
+        break;
+      }
+    }
+
+    // If we had no register before, take this one as a reference.
+    if (reg == kNoRegister) {
       reg = i;
-      if (next_use[i] == kMaxLifetimePosition) break;
+      continue;
+    }
+
+    // Pick the register that is used the last.
+    if (next_use[i] > next_use[reg]) {
+      reg = i;
+      continue;
     }
   }
   return reg;
@@ -971,7 +1009,7 @@
       || (first_use >= next_use[GetHighForLowRegister(reg)]);
   } else {
     DCHECK(!current->IsHighInterval());
-    reg = FindAvailableRegister(next_use);
+    reg = FindAvailableRegister(next_use, current);
     should_spill = (first_use >= next_use[reg]);
   }
 
@@ -1101,8 +1139,8 @@
 }
 
 LiveInterval* RegisterAllocator::SplitBetween(LiveInterval* interval, size_t from, size_t to) {
-  HBasicBlock* block_from = liveness_.GetBlockFromPosition(from);
-  HBasicBlock* block_to = liveness_.GetBlockFromPosition(to);
+  HBasicBlock* block_from = liveness_.GetBlockFromPosition(from / 2);
+  HBasicBlock* block_to = liveness_.GetBlockFromPosition(to / 2);
   DCHECK(block_from != nullptr);
   DCHECK(block_to != nullptr);
 
@@ -1111,6 +1149,41 @@
     return Split(interval, to);
   }
 
+  /*
+   * Non-linear control flow will force moves at every branch instruction to the new location.
+   * To avoid having all branches doing the moves, we find the next non-linear position and
+   * split the interval at this position. Take the following example (block number is the linear
+   * order position):
+   *
+   *     B1
+   *    /  \
+   *   B2  B3
+   *    \  /
+   *     B4
+   *
+   * B2 needs to split an interval, whose next use is in B4. If we were to split at the
+   * beginning of B4, B3 would need to do a move between B3 and B4 to ensure the interval
+   * is now in the correct location. It makes performance worst if the interval is spilled
+   * and both B2 and B3 need to reload it before entering B4.
+   *
+   * By splitting at B3, we give a chance to the register allocator to allocate the
+   * interval to the same register as in B1, and therefore avoid doing any
+   * moves in B3.
+   */
+  if (block_from->GetDominator() != nullptr) {
+    const GrowableArray<HBasicBlock*>& dominated = block_from->GetDominator()->GetDominatedBlocks();
+    for (size_t i = 0; i < dominated.Size(); ++i) {
+      size_t position = dominated.Get(i)->GetLifetimeStart();
+      if ((position > from) && (block_to->GetLifetimeStart() > position)) {
+        // Even if we found a better block, we continue iterating in case
+        // a dominated block is closer.
+        // Note that dominated blocks are not sorted in liveness order.
+        block_to = dominated.Get(i);
+        DCHECK_NE(block_to, block_from);
+      }
+    }
+  }
+
   // If `to` is in a loop, find the outermost loop header which does not contain `from`.
   for (HLoopInformationOutwardIterator it(*block_to); !it.Done(); it.Advance()) {
     HBasicBlock* header = it.Current()->GetHeader();
@@ -1455,6 +1528,7 @@
                         : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
   }
   UsePosition* use = current->GetFirstUse();
+  UsePosition* env_use = current->GetFirstEnvironmentUse();
 
   // Walk over all siblings, updating locations of use positions, and
   // connecting them when they are adjacent.
@@ -1467,15 +1541,14 @@
     LiveRange* range = current->GetFirstRange();
     while (range != nullptr) {
       while (use != nullptr && use->GetPosition() < range->GetStart()) {
-        DCHECK(use->GetIsEnvironment());
+        DCHECK(use->IsSynthesized());
         use = use->GetNext();
       }
       while (use != nullptr && use->GetPosition() <= range->GetEnd()) {
+        DCHECK(!use->GetIsEnvironment());
         DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd()));
-        LocationSummary* locations = use->GetUser()->GetLocations();
-        if (use->GetIsEnvironment()) {
-          locations->SetEnvironmentAt(use->GetInputIndex(), source);
-        } else {
+        if (!use->IsSynthesized()) {
+          LocationSummary* locations = use->GetUser()->GetLocations();
           Location expected_location = locations->InAt(use->GetInputIndex());
           // The expected (actual) location may be invalid in case the input is unused. Currently
           // this only happens for intrinsics.
@@ -1492,6 +1565,20 @@
         }
         use = use->GetNext();
       }
+
+      // Walk over the environment uses, and update their locations.
+      while (env_use != nullptr && env_use->GetPosition() < range->GetStart()) {
+        env_use = env_use->GetNext();
+      }
+
+      while (env_use != nullptr && env_use->GetPosition() <= range->GetEnd()) {
+        DCHECK(current->CoversSlow(env_use->GetPosition())
+               || (env_use->GetPosition() == range->GetEnd()));
+        HEnvironment* environment = env_use->GetUser()->GetEnvironment();
+        environment->SetLocationAt(env_use->GetInputIndex(), source);
+        env_use = env_use->GetNext();
+      }
+
       range = range->GetNext();
     }
 
@@ -1554,10 +1641,9 @@
   } while (current != nullptr);
 
   if (kIsDebugBuild) {
-    // Following uses can only be environment uses. The location for
-    // these environments will be none.
+    // Following uses can only be synthesized uses.
     while (use != nullptr) {
-      DCHECK(use->GetIsEnvironment());
+      DCHECK(use->IsSynthesized());
       use = use->GetNext();
     }
   }
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index dc9c708..6d5bfc3 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -140,7 +140,8 @@
   void DumpInterval(std::ostream& stream, LiveInterval* interval) const;
   void DumpAllIntervals(std::ostream& stream) const;
   int FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const;
-  int FindAvailableRegister(size_t* next_use) const;
+  int FindAvailableRegister(size_t* next_use, LiveInterval* current) const;
+  bool IsCallerSaveRegister(int reg) const;
 
   // Try splitting an active non-pair or unaligned pair interval at the given `position`.
   // Returns whether it was successful at finding such an interval.
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 8c6d904..b72ffb8 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -38,7 +38,7 @@
 static bool Check(const uint16_t* data) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   builder.BuildGraph(*item);
@@ -60,7 +60,7 @@
 TEST(RegisterAllocatorTest, ValidateIntervals) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
@@ -255,7 +255,7 @@
 }
 
 static HGraph* BuildSSAGraph(const uint16_t* data, ArenaAllocator* allocator) {
-  HGraph* graph = new (allocator) HGraph(allocator);
+  HGraph* graph = CreateGraph(allocator);
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   builder.BuildGraph(*item);
@@ -463,7 +463,7 @@
                                   HPhi** phi,
                                   HInstruction** input1,
                                   HInstruction** input2) {
-  HGraph* graph = new (allocator) HGraph(allocator);
+  HGraph* graph = CreateGraph(allocator);
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -593,7 +593,7 @@
 static HGraph* BuildFieldReturn(ArenaAllocator* allocator,
                                 HInstruction** field,
                                 HInstruction** ret) {
-  HGraph* graph = new (allocator) HGraph(allocator);
+  HGraph* graph = CreateGraph(allocator);
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -661,7 +661,7 @@
 static HGraph* BuildTwoSubs(ArenaAllocator* allocator,
                             HInstruction** first_sub,
                             HInstruction** second_sub) {
-  HGraph* graph = new (allocator) HGraph(allocator);
+  HGraph* graph = CreateGraph(allocator);
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -731,7 +731,7 @@
 
 static HGraph* BuildDiv(ArenaAllocator* allocator,
                         HInstruction** div) {
-  HGraph* graph = new (allocator) HGraph(allocator);
+  HGraph* graph = CreateGraph(allocator);
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -783,7 +783,7 @@
   // Create a synthesized graph to please the register_allocator and
   // ssa_liveness_analysis code.
   ArenaAllocator allocator(&pool);
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 7a252af..59a2852 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -332,7 +332,7 @@
 }
 
 HInstruction* SsaBuilder::ValueOfLocal(HBasicBlock* block, size_t local) {
-  return GetLocalsFor(block)->GetInstructionAt(local);
+  return GetLocalsFor(block)->Get(local);
 }
 
 void SsaBuilder::VisitBasicBlock(HBasicBlock* block) {
@@ -349,7 +349,7 @@
         HPhi* phi = new (GetGraph()->GetArena()) HPhi(
             GetGraph()->GetArena(), local, 0, Primitive::kPrimVoid);
         block->AddPhi(phi);
-        current_locals_->SetRawEnvAt(local, phi);
+        current_locals_->Put(local, phi);
       }
     }
     // Save the loop header so that the last phase of the analysis knows which
@@ -389,7 +389,7 @@
         block->AddPhi(phi);
         value = phi;
       }
-      current_locals_->SetRawEnvAt(local, value);
+      current_locals_->Put(local, value);
     }
   }
 
@@ -417,6 +417,7 @@
     ArenaAllocator* allocator = graph->GetArena();
     result = new (allocator) HFloatConstant(bit_cast<float, int32_t>(constant->GetValue()));
     constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
+    graph->CacheFloatConstant(result);
   } else {
     // If there is already a constant with the expected type, we know it is
     // the floating point equivalent of this constant.
@@ -439,6 +440,7 @@
     ArenaAllocator* allocator = graph->GetArena();
     result = new (allocator) HDoubleConstant(bit_cast<double, int64_t>(constant->GetValue()));
     constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
+    graph->CacheDoubleConstant(result);
   } else {
     // If there is already a constant with the expected type, we know it is
     // the floating point equivalent of this constant.
@@ -518,7 +520,7 @@
 }
 
 void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
-  HInstruction* value = current_locals_->GetInstructionAt(load->GetLocal()->GetRegNumber());
+  HInstruction* value = current_locals_->Get(load->GetLocal()->GetRegNumber());
   // If the operation requests a specific type, we make sure its input is of that type.
   if (load->GetType() != value->GetType()) {
     if (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble) {
@@ -532,7 +534,7 @@
 }
 
 void SsaBuilder::VisitStoreLocal(HStoreLocal* store) {
-  current_locals_->SetRawEnvAt(store->GetLocal()->GetRegNumber(), store->InputAt(1));
+  current_locals_->Put(store->GetLocal()->GetRegNumber(), store->InputAt(1));
   store->GetBlock()->RemoveInstruction(store);
 }
 
@@ -541,8 +543,12 @@
     return;
   }
   HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment(
-      GetGraph()->GetArena(), current_locals_->Size());
-  environment->CopyFrom(current_locals_);
+      GetGraph()->GetArena(),
+      current_locals_->Size(),
+      GetGraph()->GetDexFile(),
+      GetGraph()->GetMethodIdx(),
+      instruction->GetDexPc());
+  environment->CopyFrom(*current_locals_);
   instruction->SetRawEnvironment(environment);
 }
 
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 265e95b..1c83c4b 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -58,14 +58,15 @@
 
   void BuildSsa();
 
-  HEnvironment* GetLocalsFor(HBasicBlock* block) {
-    HEnvironment* env = locals_for_.Get(block->GetBlockId());
-    if (env == nullptr) {
-      env = new (GetGraph()->GetArena()) HEnvironment(
+  GrowableArray<HInstruction*>* GetLocalsFor(HBasicBlock* block) {
+    GrowableArray<HInstruction*>* locals = locals_for_.Get(block->GetBlockId());
+    if (locals == nullptr) {
+      locals = new (GetGraph()->GetArena()) GrowableArray<HInstruction*>(
           GetGraph()->GetArena(), GetGraph()->GetNumberOfVRegs());
-      locals_for_.Put(block->GetBlockId(), env);
+      locals->SetSize(GetGraph()->GetNumberOfVRegs());
+      locals_for_.Put(block->GetBlockId(), locals);
     }
-    return env;
+    return locals;
   }
 
   HInstruction* ValueOfLocal(HBasicBlock* block, size_t local);
@@ -93,14 +94,14 @@
   static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
 
   // Locals for the current block being visited.
-  HEnvironment* current_locals_;
+  GrowableArray<HInstruction*>* current_locals_;
 
   // Keep track of loop headers found. The last phase of the analysis iterates
   // over these blocks to set the inputs of their phis.
   GrowableArray<HBasicBlock*> loop_headers_;
 
   // HEnvironment for each block.
-  GrowableArray<HEnvironment*> locals_for_;
+  GrowableArray<GrowableArray<HInstruction*>*> locals_for_;
 
   DISALLOW_COPY_AND_ASSIGN(SsaBuilder);
 };
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index ea0e7c3..250eb04 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -75,9 +75,7 @@
     HBasicBlock* block = it.Current();
     size_t number_of_forward_predecessors = block->GetPredecessors().Size();
     if (block->IsLoopHeader()) {
-      // We rely on having simplified the CFG.
-      DCHECK_EQ(1u, block->GetLoopInformation()->NumberOfBackEdges());
-      number_of_forward_predecessors--;
+      number_of_forward_predecessors -= block->GetLoopInformation()->NumberOfBackEdges();
     }
     forward_predecessors.Put(block->GetBlockId(), number_of_forward_predecessors);
   }
@@ -220,10 +218,11 @@
 
       // Process the environment first, because we know their uses come after
       // or at the same liveness position of inputs.
-      if (current->HasEnvironment()) {
+      for (HEnvironment* environment = current->GetEnvironment();
+           environment != nullptr;
+           environment = environment->GetParent()) {
         // Handle environment uses. See statements (b) and (c) of the
         // SsaLivenessAnalysis.
-        HEnvironment* environment = current->GetEnvironment();
         for (size_t i = 0, e = environment->Size(); i < e; ++i) {
           HInstruction* instruction = environment->GetInstructionAt(i);
           bool should_be_live = ShouldBeLiveForEnvironment(instruction);
@@ -233,7 +232,7 @@
           }
           if (instruction != nullptr) {
             instruction->GetLiveInterval()->AddUse(
-                current, i, /* is_environment */ true, should_be_live);
+                current, environment, i, should_be_live);
           }
         }
       }
@@ -245,7 +244,7 @@
         // to be materialized.
         if (input->HasSsaIndex()) {
           live_in->SetBit(input->GetSsaIndex());
-          input->GetLiveInterval()->AddUse(current, i, /* is_environment */ false);
+          input->GetLiveInterval()->AddUse(current, /* environment */ nullptr, i);
         }
       }
     }
@@ -264,13 +263,12 @@
     }
 
     if (block->IsLoopHeader()) {
-      HBasicBlock* back_edge = block->GetLoopInformation()->GetBackEdges().Get(0);
+      size_t last_position = block->GetLoopInformation()->GetLifetimeEnd();
       // For all live_in instructions at the loop header, we need to create a range
       // that covers the full loop.
       for (uint32_t idx : live_in->Indexes()) {
         HInstruction* current = instructions_from_ssa_index_.Get(idx);
-        current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(),
-                                                 back_edge->GetLifetimeEnd());
+        current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(), last_position);
       }
     }
   }
@@ -322,7 +320,8 @@
   return location.IsPair() ? location.low() : location.reg();
 }
 
-int LiveInterval::FindFirstRegisterHint(size_t* free_until) const {
+int LiveInterval::FindFirstRegisterHint(size_t* free_until,
+                                        const SsaLivenessAnalysis& liveness) const {
   DCHECK(!IsHighInterval());
   if (IsTemp()) return kNoRegister;
 
@@ -336,12 +335,32 @@
     }
   }
 
+  if (IsSplit() && liveness.IsAtBlockBoundary(GetStart() / 2)) {
+    // If the start of this interval is at a block boundary, we look at the
+    // location of the interval in blocks preceding the block this interval
+    // starts at. If one location is a register we return it as a hint. This
+    // will avoid a move between the two blocks.
+    HBasicBlock* block = liveness.GetBlockFromPosition(GetStart() / 2);
+    for (size_t i = 0; i < block->GetPredecessors().Size(); ++i) {
+      size_t position = block->GetPredecessors().Get(i)->GetLifetimeEnd() - 1;
+      // We know positions above GetStart() do not have a location yet.
+      if (position < GetStart()) {
+        LiveInterval* existing = GetParent()->GetSiblingAt(position);
+        if (existing != nullptr
+            && existing->HasRegister()
+            && (free_until[existing->GetRegister()] > GetStart())) {
+          return existing->GetRegister();
+        }
+      }
+    }
+  }
+
   UsePosition* use = first_use_;
   size_t start = GetStart();
   size_t end = GetEnd();
   while (use != nullptr && use->GetPosition() <= end) {
     size_t use_position = use->GetPosition();
-    if (use_position >= start && !use->GetIsEnvironment()) {
+    if (use_position >= start && !use->IsSynthesized()) {
       HInstruction* user = use->GetUser();
       size_t input_index = use->GetInputIndex();
       if (user->IsPhi()) {
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 97254ed..4b19c5b 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -23,6 +23,7 @@
 namespace art {
 
 class CodeGenerator;
+class SsaLivenessAnalysis;
 
 static constexpr int kNoRegister = -1;
 
@@ -75,7 +76,7 @@
   }
 
   void Dump(std::ostream& stream) const {
-    stream << "[" << start_ << ", " << end_ << ")";
+    stream << "[" << start_ << "," << end_ << ")";
   }
 
   LiveRange* Dup(ArenaAllocator* allocator) const {
@@ -103,21 +104,24 @@
 class UsePosition : public ArenaObject<kArenaAllocMisc> {
  public:
   UsePosition(HInstruction* user,
+              HEnvironment* environment,
               size_t input_index,
-              bool is_environment,
               size_t position,
               UsePosition* next)
       : user_(user),
+        environment_(environment),
         input_index_(input_index),
-        is_environment_(is_environment),
         position_(position),
         next_(next) {
-    DCHECK(user->IsPhi()
+    DCHECK((user == nullptr)
+        || user->IsPhi()
         || (GetPosition() == user->GetLifetimePosition() + 1)
         || (GetPosition() == user->GetLifetimePosition()));
     DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition());
   }
 
+  static constexpr size_t kNoInput = -1;
+
   size_t GetPosition() const { return position_; }
 
   UsePosition* GetNext() const { return next_; }
@@ -125,27 +129,38 @@
 
   HInstruction* GetUser() const { return user_; }
 
-  bool GetIsEnvironment() const { return is_environment_; }
+  bool GetIsEnvironment() const { return environment_ != nullptr; }
+  bool IsSynthesized() const { return user_ == nullptr; }
 
   size_t GetInputIndex() const { return input_index_; }
 
   void Dump(std::ostream& stream) const {
     stream << position_;
-    if (is_environment_) {
-      stream << " (env)";
-    }
+  }
+
+  HLoopInformation* GetLoopInformation() const {
+    return user_->GetBlock()->GetLoopInformation();
   }
 
   UsePosition* Dup(ArenaAllocator* allocator) const {
     return new (allocator) UsePosition(
-        user_, input_index_, is_environment_, position_,
+        user_, environment_, input_index_, position_,
         next_ == nullptr ? nullptr : next_->Dup(allocator));
   }
 
+  bool RequiresRegister() const {
+    if (GetIsEnvironment()) return false;
+    if (IsSynthesized()) return false;
+    Location location = GetUser()->GetLocations()->InAt(GetInputIndex());
+    return location.IsUnallocated()
+        && (location.GetPolicy() == Location::kRequiresRegister
+            || location.GetPolicy() == Location::kRequiresFpuRegister);
+  }
+
  private:
   HInstruction* const user_;
+  HEnvironment* const environment_;
   const size_t input_index_;
-  const bool is_environment_;
   const size_t position_;
   UsePosition* next_;
 
@@ -219,17 +234,19 @@
   void AddTempUse(HInstruction* instruction, size_t temp_index) {
     DCHECK(IsTemp());
     DCHECK(first_use_ == nullptr) << "A temporary can only have one user";
+    DCHECK(first_env_use_ == nullptr) << "A temporary cannot have environment user";
     size_t position = instruction->GetLifetimePosition();
     first_use_ = new (allocator_) UsePosition(
-        instruction, temp_index, /* is_environment */ false, position, first_use_);
+        instruction, /* environment */ nullptr, temp_index, position, first_use_);
     AddRange(position, position + 1);
   }
 
   void AddUse(HInstruction* instruction,
+              HEnvironment* environment,
               size_t input_index,
-              bool is_environment,
               bool keep_alive = false) {
     // Set the use within the instruction.
+    bool is_environment = (environment != nullptr);
     size_t position = instruction->GetLifetimePosition() + 1;
     LocationSummary* locations = instruction->GetLocations();
     if (!is_environment) {
@@ -239,9 +256,15 @@
         // location of the input just before that instruction (and not potential moves due
         // to splitting).
         position = instruction->GetLifetimePosition();
+      } else if (!locations->InAt(input_index).IsValid()) {
+        return;
       }
     }
 
+    if (!is_environment && instruction->IsInLoop()) {
+      AddBackEdgeUses(*instruction->GetBlock());
+    }
+
     DCHECK(position == instruction->GetLifetimePosition()
            || position == instruction->GetLifetimePosition() + 1);
 
@@ -257,7 +280,7 @@
       }
       DCHECK(first_use_->GetPosition() + 1 == position);
       UsePosition* new_use = new (allocator_) UsePosition(
-          instruction, input_index, is_environment, position, cursor->GetNext());
+          instruction, environment, input_index, position, cursor->GetNext());
       cursor->SetNext(new_use);
       if (first_range_->GetEnd() == first_use_->GetPosition()) {
         first_range_->end_ = position;
@@ -265,8 +288,13 @@
       return;
     }
 
-    first_use_ = new (allocator_) UsePosition(
-        instruction, input_index, is_environment, position, first_use_);
+    if (is_environment) {
+      first_env_use_ = new (allocator_) UsePosition(
+          instruction, environment, input_index, position, first_env_use_);
+    } else {
+      first_use_ = new (allocator_) UsePosition(
+          instruction, environment, input_index, position, first_use_);
+    }
 
     if (is_environment && !keep_alive) {
       // If this environment use does not keep the instruction live, it does not
@@ -300,8 +328,11 @@
 
   void AddPhiUse(HInstruction* instruction, size_t input_index, HBasicBlock* block) {
     DCHECK(instruction->IsPhi());
+    if (block->IsInLoop()) {
+      AddBackEdgeUses(*block);
+    }
     first_use_ = new (allocator_) UsePosition(
-        instruction, input_index, false, block->GetLifetimeEnd(), first_use_);
+        instruction, /* environment */ nullptr, input_index, block->GetLifetimeEnd(), first_use_);
   }
 
   void AddRange(size_t start, size_t end) {
@@ -450,38 +481,17 @@
     if (is_temp_) {
       return position == GetStart() ? position : kNoLifetime;
     }
-    if (position == GetStart() && IsParent()) {
-      LocationSummary* locations = defined_by_->GetLocations();
-      Location location = locations->Out();
-      // This interval is the first interval of the instruction. If the output
-      // of the instruction requires a register, we return the position of that instruction
-      // as the first register use.
-      if (location.IsUnallocated()) {
-        if ((location.GetPolicy() == Location::kRequiresRegister)
-             || (location.GetPolicy() == Location::kSameAsFirstInput
-                 && (locations->InAt(0).IsRegister()
-                     || locations->InAt(0).IsRegisterPair()
-                     || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) {
-          return position;
-        } else if ((location.GetPolicy() == Location::kRequiresFpuRegister)
-                   || (location.GetPolicy() == Location::kSameAsFirstInput
-                       && locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister)) {
-          return position;
-        }
-      } else if (location.IsRegister() || location.IsRegisterPair()) {
-        return position;
-      }
+
+    if (IsDefiningPosition(position) && DefinitionRequiresRegister()) {
+      return position;
     }
 
     UsePosition* use = first_use_;
     size_t end = GetEnd();
     while (use != nullptr && use->GetPosition() <= end) {
       size_t use_position = use->GetPosition();
-      if (use_position > position && !use->GetIsEnvironment()) {
-        Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex());
-        if (location.IsUnallocated()
-            && (location.GetPolicy() == Location::kRequiresRegister
-                || location.GetPolicy() == Location::kRequiresFpuRegister)) {
+      if (use_position > position) {
+        if (use->RequiresRegister()) {
           return use_position;
         }
       }
@@ -499,21 +509,17 @@
       return position == GetStart() ? position : kNoLifetime;
     }
 
-    if (position == GetStart() && IsParent()) {
-      if (defined_by_->GetLocations()->Out().IsValid()) {
-        return position;
-      }
+    if (IsDefiningPosition(position)) {
+      DCHECK(defined_by_->GetLocations()->Out().IsValid());
+      return position;
     }
 
     UsePosition* use = first_use_;
     size_t end = GetEnd();
     while (use != nullptr && use->GetPosition() <= end) {
-      if (!use->GetIsEnvironment()) {
-        Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex());
-        size_t use_position = use->GetPosition();
-        if (use_position > position && location.IsValid()) {
-          return use_position;
-        }
+      size_t use_position = use->GetPosition();
+      if (use_position > position) {
+        return use_position;
       }
       use = use->GetNext();
     }
@@ -524,6 +530,10 @@
     return first_use_;
   }
 
+  UsePosition* GetFirstEnvironmentUse() const {
+    return first_env_use_;
+  }
+
   Primitive::Type GetType() const {
     return type_;
   }
@@ -532,6 +542,15 @@
     return defined_by_;
   }
 
+  bool HasWillCallSafepoint() const {
+    for (SafepointPosition* safepoint = first_safepoint_;
+         safepoint != nullptr;
+         safepoint = safepoint->GetNext()) {
+      if (safepoint->GetLocations()->WillCall()) return true;
+    }
+    return false;
+  }
+
   SafepointPosition* FindSafepointJustBefore(size_t position) const {
     for (SafepointPosition* safepoint = first_safepoint_, *previous = nullptr;
          safepoint != nullptr;
@@ -577,6 +596,7 @@
     new_interval->parent_ = parent_;
 
     new_interval->first_use_ = first_use_;
+    new_interval->first_env_use_ = first_env_use_;
     LiveRange* current = first_range_;
     LiveRange* previous = nullptr;
     // Iterate over the ranges, and either find a range that covers this position, or
@@ -655,10 +675,18 @@
         stream << " ";
       } while ((use = use->GetNext()) != nullptr);
     }
+    stream << "}, { ";
+    use = first_env_use_;
+    if (use != nullptr) {
+      do {
+        use->Dump(stream);
+        stream << " ";
+      } while ((use = use->GetNext()) != nullptr);
+    }
     stream << "}";
     stream << " is_fixed: " << is_fixed_ << ", is_split: " << IsSplit();
-    stream << " is_high: " << IsHighInterval();
     stream << " is_low: " << IsLowInterval();
+    stream << " is_high: " << IsHighInterval();
   }
 
   LiveInterval* GetNextSibling() const { return next_sibling_; }
@@ -673,7 +701,7 @@
   // Returns the first register hint that is at least free before
   // the value contained in `free_until`. If none is found, returns
   // `kNoRegister`.
-  int FindFirstRegisterHint(size_t* free_until) const;
+  int FindFirstRegisterHint(size_t* free_until, const SsaLivenessAnalysis& liveness) const;
 
   // If there is enough at the definition site to find a register (for example
   // it uses the same input as the first input), returns the register as a hint.
@@ -754,6 +782,10 @@
     if (first_use_ != nullptr) {
       high_or_low_interval_->first_use_ = first_use_->Dup(allocator_);
     }
+
+    if (first_env_use_ != nullptr) {
+      high_or_low_interval_->first_env_use_ = first_env_use_->Dup(allocator_);
+    }
   }
 
   // Returns whether an interval, when it is non-split, is using
@@ -851,6 +883,7 @@
         first_safepoint_(nullptr),
         last_safepoint_(nullptr),
         first_use_(nullptr),
+        first_env_use_(nullptr),
         type_(type),
         next_sibling_(nullptr),
         parent_(this),
@@ -888,6 +921,107 @@
     return range;
   }
 
+  bool DefinitionRequiresRegister() const {
+    DCHECK(IsParent());
+    LocationSummary* locations = defined_by_->GetLocations();
+    Location location = locations->Out();
+    // This interval is the first interval of the instruction. If the output
+    // of the instruction requires a register, we return the position of that instruction
+    // as the first register use.
+    if (location.IsUnallocated()) {
+      if ((location.GetPolicy() == Location::kRequiresRegister)
+           || (location.GetPolicy() == Location::kSameAsFirstInput
+               && (locations->InAt(0).IsRegister()
+                   || locations->InAt(0).IsRegisterPair()
+                   || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) {
+        return true;
+      } else if ((location.GetPolicy() == Location::kRequiresFpuRegister)
+                 || (location.GetPolicy() == Location::kSameAsFirstInput
+                     && (locations->InAt(0).IsFpuRegister()
+                         || locations->InAt(0).IsFpuRegisterPair()
+                         || locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister))) {
+        return true;
+      }
+    } else if (location.IsRegister() || location.IsRegisterPair()) {
+      return true;
+    }
+    return false;
+  }
+
+  bool IsDefiningPosition(size_t position) const {
+    return IsParent() && (position == GetStart());
+  }
+
+  bool HasSynthesizeUseAt(size_t position) const {
+    UsePosition* use = first_use_;
+    while (use != nullptr) {
+      size_t use_position = use->GetPosition();
+      if ((use_position == position) && use->IsSynthesized()) {
+        return true;
+      }
+      if (use_position > position) break;
+      use = use->GetNext();
+    }
+    return false;
+  }
+
+  void AddBackEdgeUses(const HBasicBlock& block_at_use) {
+    DCHECK(block_at_use.IsInLoop());
+    // Add synthesized uses at the back edge of loops to help the register allocator.
+    // Note that this method is called in decreasing liveness order, to faciliate adding
+    // uses at the head of the `first_use_` linked list. Because below
+    // we iterate from inner-most to outer-most, which is in increasing liveness order,
+    // we need to take extra care of how the `first_use_` linked list is being updated.
+    UsePosition* first_in_new_list = nullptr;
+    UsePosition* last_in_new_list = nullptr;
+    for (HLoopInformationOutwardIterator it(block_at_use);
+         !it.Done();
+         it.Advance()) {
+      HLoopInformation* current = it.Current();
+      if (GetDefinedBy()->GetLifetimePosition() >= current->GetHeader()->GetLifetimeStart()) {
+        // This interval is defined in the loop. We can stop going outward.
+        break;
+      }
+
+      // We're only adding a synthesized use at the last back edge. Adding syntehsized uses on
+      // all back edges is not necessary: anything used in the loop will have its use at the
+      // last back edge. If we want branches in a loop to have better register allocation than
+      // another branch, then it is the linear order we should change.
+      size_t back_edge_use_position = current->GetLifetimeEnd();
+      if ((first_use_ != nullptr) && (first_use_->GetPosition() <= back_edge_use_position)) {
+        // There was a use already seen in this loop. Therefore the previous call to `AddUse`
+        // already inserted the backedge use. We can stop going outward.
+        DCHECK(HasSynthesizeUseAt(back_edge_use_position));
+        break;
+      }
+
+      DCHECK(last_in_new_list == nullptr
+             || back_edge_use_position > last_in_new_list->GetPosition());
+
+      UsePosition* new_use = new (allocator_) UsePosition(
+          /* user */ nullptr,
+          /* environment */ nullptr,
+          UsePosition::kNoInput,
+          back_edge_use_position,
+          /* next */ nullptr);
+
+      if (last_in_new_list != nullptr) {
+        // Going outward. The latest created use needs to point to the new use.
+        last_in_new_list->SetNext(new_use);
+      } else {
+        // This is the inner-most loop.
+        DCHECK_EQ(current, block_at_use.GetLoopInformation());
+        first_in_new_list = new_use;
+      }
+      last_in_new_list = new_use;
+    }
+    // Link the newly created linked list with `first_use_`.
+    if (last_in_new_list != nullptr) {
+      last_in_new_list->SetNext(first_use_);
+      first_use_ = first_in_new_list;
+    }
+  }
+
   ArenaAllocator* const allocator_;
 
   // Ranges of this interval. We need a quick access to the last range to test
@@ -905,6 +1039,7 @@
 
   // Uses of this interval. Note that this linked list is shared amongst siblings.
   UsePosition* first_use_;
+  UsePosition* first_env_use_;
 
   // The instruction type this interval corresponds to.
   const Primitive::Type type_;
@@ -999,14 +1134,18 @@
   }
 
   HBasicBlock* GetBlockFromPosition(size_t index) const {
-    HInstruction* instruction = GetInstructionFromPosition(index / 2);
+    HInstruction* instruction = GetInstructionFromPosition(index);
     if (instruction == nullptr) {
       // If we are at a block boundary, get the block following.
-      instruction = GetInstructionFromPosition((index / 2) + 1);
+      instruction = GetInstructionFromPosition(index + 1);
     }
     return instruction->GetBlock();
   }
 
+  bool IsAtBlockBoundary(size_t index) const {
+    return GetInstructionFromPosition(index) == nullptr;
+  }
+
   HInstruction* GetTempUser(LiveInterval* temp) const {
     // A temporary shares the same lifetime start as the instruction that requires it.
     DCHECK(temp->IsTemp());
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index 00c241b..fb3e7d7 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -78,7 +78,7 @@
 static void TestCode(const uint16_t* data, const char* expected) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   bool graph_built = builder.BuildGraph(*item);
@@ -373,30 +373,26 @@
   const char* expected =
     "BasicBlock 0, succ: 1\n"
     "  0: IntConstant 0 [5]\n"
-    "  1: IntConstant 4 [14, 8, 8]\n"
-    "  2: IntConstant 5 [14]\n"
+    "  1: IntConstant 4 [5, 8, 8]\n"
+    "  2: IntConstant 5 [5]\n"
     "  3: Goto\n"
     "BasicBlock 1, pred: 0, succ: 2\n"
     "  4: Goto\n"
-    "BasicBlock 2, pred: 1, 8, succ: 6, 3\n"
-    "  5: Phi(0, 14) [12, 6, 6]\n"
+    "BasicBlock 2, pred: 1, 4, 5, succ: 6, 3\n"
+    "  5: Phi(0, 2, 1) [12, 6, 6]\n"
     "  6: Equal(5, 5) [7]\n"
     "  7: If(6)\n"
     "BasicBlock 3, pred: 2, succ: 5, 4\n"
     "  8: Equal(1, 1) [9]\n"
     "  9: If(8)\n"
-    "BasicBlock 4, pred: 3, succ: 8\n"
+    "BasicBlock 4, pred: 3, succ: 2\n"
     "  10: Goto\n"
-    "BasicBlock 5, pred: 3, succ: 8\n"
+    "BasicBlock 5, pred: 3, succ: 2\n"
     "  11: Goto\n"
     "BasicBlock 6, pred: 2, succ: 7\n"
     "  12: Return(5)\n"
     "BasicBlock 7, pred: 6\n"
-    "  13: Exit\n"
-    // Synthesized single back edge of loop.
-    "BasicBlock 8, pred: 5, 4, succ: 2\n"
-    "  14: Phi(1, 2) [5]\n"
-    "  15: Goto\n";
+    "  13: Exit\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index fcc86d5..89035a3 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -13,34 +13,33 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #include "stack_map_stream.h"
 
 namespace art {
 
-void StackMapStream::AddStackMapEntry(uint32_t dex_pc,
-                                     uint32_t native_pc_offset,
-                                     uint32_t register_mask,
-                                     BitVector* sp_mask,
-                                     uint32_t num_dex_registers,
-                                     uint8_t inlining_depth) {
-  StackMapEntry entry;
-  entry.dex_pc = dex_pc;
-  entry.native_pc_offset = native_pc_offset;
-  entry.register_mask = register_mask;
-  entry.sp_mask = sp_mask;
-  entry.num_dex_registers = num_dex_registers;
-  entry.inlining_depth = inlining_depth;
-  entry.dex_register_locations_start_index = dex_register_locations_.Size();
-  entry.inline_infos_start_index = inline_infos_.Size();
-  entry.dex_register_map_hash = 0;
+void StackMapStream::BeginStackMapEntry(uint32_t dex_pc,
+                                        uint32_t native_pc_offset,
+                                        uint32_t register_mask,
+                                        BitVector* sp_mask,
+                                        uint32_t num_dex_registers,
+                                        uint8_t inlining_depth) {
+  DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry";
+  current_entry_.dex_pc = dex_pc;
+  current_entry_.native_pc_offset = native_pc_offset;
+  current_entry_.register_mask = register_mask;
+  current_entry_.sp_mask = sp_mask;
+  current_entry_.num_dex_registers = num_dex_registers;
+  current_entry_.inlining_depth = inlining_depth;
+  current_entry_.dex_register_locations_start_index = dex_register_locations_.Size();
+  current_entry_.inline_infos_start_index = inline_infos_.Size();
+  current_entry_.dex_register_map_hash = 0;
+  current_entry_.same_dex_register_map_as_ = kNoSameDexMapFound;
   if (num_dex_registers != 0) {
-    entry.live_dex_registers_mask =
+    current_entry_.live_dex_registers_mask =
         new (allocator_) ArenaBitVector(allocator_, num_dex_registers, true);
   } else {
-    entry.live_dex_registers_mask = nullptr;
+    current_entry_.live_dex_registers_mask = nullptr;
   }
-  stack_maps_.Add(entry);
 
   if (sp_mask != nullptr) {
     stack_mask_max_ = std::max(stack_mask_max_, sp_mask->GetHighestBitSet());
@@ -52,14 +51,16 @@
   dex_pc_max_ = std::max(dex_pc_max_, dex_pc);
   native_pc_offset_max_ = std::max(native_pc_offset_max_, native_pc_offset);
   register_mask_max_ = std::max(register_mask_max_, register_mask);
+  current_dex_register_ = 0;
 }
 
-void StackMapStream::AddDexRegisterEntry(uint16_t dex_register,
-                                        DexRegisterLocation::Kind kind,
-                                        int32_t value) {
-  StackMapEntry entry = stack_maps_.Get(stack_maps_.Size() - 1);
-  DCHECK_LT(dex_register, entry.num_dex_registers);
+void StackMapStream::EndStackMapEntry() {
+  current_entry_.same_dex_register_map_as_ = FindEntryWithTheSameDexMap();
+  stack_maps_.Add(current_entry_);
+  current_entry_ = StackMapEntry();
+}
 
+void StackMapStream::AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value) {
   if (kind != DexRegisterLocation::Kind::kNone) {
     // Ensure we only use non-compressed location kind at this stage.
     DCHECK(DexRegisterLocation::IsShortLocationKind(kind))
@@ -82,44 +83,76 @@
       location_catalog_entries_indices_.Insert(std::make_pair(location, index));
     }
 
-    entry.live_dex_registers_mask->SetBit(dex_register);
-    entry.dex_register_map_hash +=
-      (1 << (dex_register % (sizeof(entry.dex_register_map_hash) * kBitsPerByte)));
-    entry.dex_register_map_hash += static_cast<uint32_t>(value);
-    entry.dex_register_map_hash += static_cast<uint32_t>(kind);
-    stack_maps_.Put(stack_maps_.Size() - 1, entry);
+    if (in_inline_frame_) {
+      // TODO: Support sharing DexRegisterMap across InlineInfo.
+      DCHECK_LT(current_dex_register_, current_inline_info_.num_dex_registers);
+      current_inline_info_.live_dex_registers_mask->SetBit(current_dex_register_);
+    } else {
+      DCHECK_LT(current_dex_register_, current_entry_.num_dex_registers);
+      current_entry_.live_dex_registers_mask->SetBit(current_dex_register_);
+      current_entry_.dex_register_map_hash += (1 <<
+          (current_dex_register_ % (sizeof(current_entry_.dex_register_map_hash) * kBitsPerByte)));
+      current_entry_.dex_register_map_hash += static_cast<uint32_t>(value);
+      current_entry_.dex_register_map_hash += static_cast<uint32_t>(kind);
+    }
   }
+  current_dex_register_++;
 }
 
-void StackMapStream::AddInlineInfoEntry(uint32_t method_index) {
-  InlineInfoEntry entry;
-  entry.method_index = method_index;
-  inline_infos_.Add(entry);
+void StackMapStream::BeginInlineInfoEntry(uint32_t method_index,
+                                          uint32_t dex_pc,
+                                          uint32_t num_dex_registers) {
+  DCHECK(!in_inline_frame_);
+  in_inline_frame_ = true;
+  current_inline_info_.method_index = method_index;
+  current_inline_info_.dex_pc = dex_pc;
+  current_inline_info_.num_dex_registers = num_dex_registers;
+  current_inline_info_.dex_register_locations_start_index = dex_register_locations_.Size();
+  if (num_dex_registers != 0) {
+    current_inline_info_.live_dex_registers_mask =
+        new (allocator_) ArenaBitVector(allocator_, num_dex_registers, true);
+  } else {
+    current_inline_info_.live_dex_registers_mask = nullptr;
+  }
+  current_dex_register_ = 0;
 }
 
-size_t StackMapStream::ComputeNeededSize() {
-  size_t size = CodeInfo::kFixedSize
-      + ComputeDexRegisterLocationCatalogSize()
-      + ComputeStackMapsSize()
-      + ComputeDexRegisterMapsSize()
-      + ComputeInlineInfoSize();
+void StackMapStream::EndInlineInfoEntry() {
+  DCHECK(in_inline_frame_);
+  DCHECK_EQ(current_dex_register_, current_inline_info_.num_dex_registers)
+      << "Inline information contains less registers than expected";
+  in_inline_frame_ = false;
+  inline_infos_.Add(current_inline_info_);
+  current_inline_info_ = InlineInfoEntry();
+}
+
+size_t StackMapStream::PrepareForFillIn() {
+  int stack_mask_number_of_bits = stack_mask_max_ + 1;  // Need room for max element too.
+  stack_mask_size_ = RoundUp(stack_mask_number_of_bits, kBitsPerByte) / kBitsPerByte;
+  inline_info_size_ = ComputeInlineInfoSize();
+  dex_register_maps_size_ = ComputeDexRegisterMapsSize();
+  stack_maps_size_ = stack_maps_.Size()
+      * StackMap::ComputeStackMapSize(stack_mask_size_,
+                                      inline_info_size_,
+                                      dex_register_maps_size_,
+                                      dex_pc_max_,
+                                      native_pc_offset_max_,
+                                      register_mask_max_);
+  dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize();
+
   // Note: use RoundUp to word-size here if you want CodeInfo objects to be word aligned.
-  return size;
-}
+  needed_size_ = CodeInfo::kFixedSize
+      + dex_register_location_catalog_size_
+      + stack_maps_size_
+      + dex_register_maps_size_
+      + inline_info_size_;
 
-size_t StackMapStream::ComputeStackMaskSize() const {
-  int number_of_bits = stack_mask_max_ + 1;  // Need room for max element too.
-  return RoundUp(number_of_bits, kBitsPerByte) / kBitsPerByte;
-}
+  dex_register_location_catalog_start_ = CodeInfo::kFixedSize;
+  stack_maps_start_ = dex_register_location_catalog_start_ + dex_register_location_catalog_size_;
+  dex_register_maps_start_ = stack_maps_start_ + stack_maps_size_;
+  inline_infos_start_ = dex_register_maps_start_ + dex_register_maps_size_;
 
-size_t StackMapStream::ComputeStackMapsSize() {
-  return stack_maps_.Size() * StackMap::ComputeStackMapSize(
-      ComputeStackMaskSize(),
-      ComputeInlineInfoSize(),
-      ComputeDexRegisterMapsSize(),
-      dex_pc_max_,
-      native_pc_offset_max_,
-      register_mask_max_);
+  return needed_size_;
 }
 
 size_t StackMapStream::ComputeDexRegisterLocationCatalogSize() const {
@@ -134,17 +167,18 @@
   return size;
 }
 
-size_t StackMapStream::ComputeDexRegisterMapSize(const StackMapEntry& entry) const {
+size_t StackMapStream::ComputeDexRegisterMapSize(uint32_t num_dex_registers,
+                                                 const BitVector& live_dex_registers_mask) const {
   // Size of the map in bytes.
   size_t size = DexRegisterMap::kFixedSize;
   // Add the live bit mask for the Dex register liveness.
-  size += DexRegisterMap::GetLiveBitMaskSize(entry.num_dex_registers);
+  size += DexRegisterMap::GetLiveBitMaskSize(num_dex_registers);
   // Compute the size of the set of live Dex register entries.
   size_t number_of_live_dex_registers = 0;
   for (size_t dex_register_number = 0;
-       dex_register_number < entry.num_dex_registers;
+       dex_register_number < num_dex_registers;
        ++dex_register_number) {
-    if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) {
+    if (live_dex_registers_mask.IsBitSet(dex_register_number)) {
       ++number_of_live_dex_registers;
     }
   }
@@ -157,12 +191,20 @@
   return size;
 }
 
-size_t StackMapStream::ComputeDexRegisterMapsSize() {
+size_t StackMapStream::ComputeDexRegisterMapsSize() const {
   size_t size = 0;
+  size_t inline_info_index = 0;
   for (size_t i = 0; i < stack_maps_.Size(); ++i) {
-    if (FindEntryWithTheSameDexMap(i) == kNoSameDexMapFound) {
+    StackMapEntry entry = stack_maps_.Get(i);
+    if (entry.same_dex_register_map_as_ == kNoSameDexMapFound) {
+      size += ComputeDexRegisterMapSize(entry.num_dex_registers, *entry.live_dex_registers_mask);
+    } else {
       // Entries with the same dex map will have the same offset.
-      size += ComputeDexRegisterMapSize(stack_maps_.Get(i));
+    }
+    for (size_t j = 0; j < entry.inlining_depth; ++j) {
+      InlineInfoEntry inline_entry = inline_infos_.Get(inline_info_index++);
+      size += ComputeDexRegisterMapSize(inline_entry.num_dex_registers,
+                                        *inline_entry.live_dex_registers_mask);
     }
   }
   return size;
@@ -174,55 +216,33 @@
     + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize);
 }
 
-size_t StackMapStream::ComputeDexRegisterLocationCatalogStart() const {
-  return CodeInfo::kFixedSize;
-}
-
-size_t StackMapStream::ComputeStackMapsStart() const {
-  return ComputeDexRegisterLocationCatalogStart() + ComputeDexRegisterLocationCatalogSize();
-}
-
-size_t StackMapStream::ComputeDexRegisterMapsStart() {
-  return ComputeStackMapsStart() + ComputeStackMapsSize();
-}
-
-size_t StackMapStream::ComputeInlineInfoStart() {
-  return ComputeDexRegisterMapsStart() + ComputeDexRegisterMapsSize();
-}
-
 void StackMapStream::FillIn(MemoryRegion region) {
+  DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry";
+  DCHECK_NE(0u, needed_size_) << "PrepareForFillIn not called before FillIn";
+
   CodeInfo code_info(region);
-  DCHECK_EQ(region.size(), ComputeNeededSize());
+  DCHECK_EQ(region.size(), needed_size_);
   code_info.SetOverallSize(region.size());
 
-  size_t stack_mask_size = ComputeStackMaskSize();
-
-  size_t dex_register_map_size = ComputeDexRegisterMapsSize();
-  size_t inline_info_size = ComputeInlineInfoSize();
-
   MemoryRegion dex_register_locations_region = region.Subregion(
-    ComputeDexRegisterMapsStart(),
-    dex_register_map_size);
+      dex_register_maps_start_, dex_register_maps_size_);
 
   MemoryRegion inline_infos_region = region.Subregion(
-    ComputeInlineInfoStart(),
-    inline_info_size);
+      inline_infos_start_, inline_info_size_);
 
-  code_info.SetEncoding(inline_info_size,
-                        dex_register_map_size,
+  code_info.SetEncoding(inline_info_size_,
+                        dex_register_maps_size_,
                         dex_pc_max_,
                         native_pc_offset_max_,
                         register_mask_max_);
   code_info.SetNumberOfStackMaps(stack_maps_.Size());
-  code_info.SetStackMaskSize(stack_mask_size);
-  DCHECK_EQ(code_info.GetStackMapsSize(), ComputeStackMapsSize());
+  code_info.SetStackMaskSize(stack_mask_size_);
+  DCHECK_EQ(code_info.GetStackMapsSize(), stack_maps_size_);
 
   // Set the Dex register location catalog.
-  code_info.SetNumberOfDexRegisterLocationCatalogEntries(
-      location_catalog_entries_.Size());
+  code_info.SetNumberOfDexRegisterLocationCatalogEntries(location_catalog_entries_.Size());
   MemoryRegion dex_register_location_catalog_region = region.Subregion(
-      ComputeDexRegisterLocationCatalogStart(),
-      ComputeDexRegisterLocationCatalogSize());
+      dex_register_location_catalog_start_, dex_register_location_catalog_size_);
   DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region);
   // Offset in `dex_register_location_catalog` where to store the next
   // register location.
@@ -253,41 +273,26 @@
       stack_map.SetDexRegisterMapOffset(code_info, StackMap::kNoDexRegisterMap);
     } else {
       // Search for an entry with the same dex map.
-      size_t entry_with_same_map = FindEntryWithTheSameDexMap(i);
-      if (entry_with_same_map != kNoSameDexMapFound) {
+      if (entry.same_dex_register_map_as_ != kNoSameDexMapFound) {
         // If we have a hit reuse the offset.
         stack_map.SetDexRegisterMapOffset(code_info,
-            code_info.GetStackMapAt(entry_with_same_map).GetDexRegisterMapOffset(code_info));
+            code_info.GetStackMapAt(entry.same_dex_register_map_as_)
+                     .GetDexRegisterMapOffset(code_info));
       } else {
         // New dex registers maps should be added to the stack map.
-        MemoryRegion register_region =
-            dex_register_locations_region.Subregion(
-                next_dex_register_map_offset,
-                ComputeDexRegisterMapSize(entry));
+        MemoryRegion register_region = dex_register_locations_region.Subregion(
+            next_dex_register_map_offset,
+            ComputeDexRegisterMapSize(entry.num_dex_registers, *entry.live_dex_registers_mask));
         next_dex_register_map_offset += register_region.size();
         DexRegisterMap dex_register_map(register_region);
         stack_map.SetDexRegisterMapOffset(
           code_info, register_region.start() - dex_register_locations_region.start());
 
-        // Set the live bit mask.
-        dex_register_map.SetLiveBitMask(entry.num_dex_registers, *entry.live_dex_registers_mask);
-
-        // Set the dex register location mapping data.
-        for (size_t dex_register_number = 0, index_in_dex_register_locations = 0;
-             dex_register_number < entry.num_dex_registers;
-             ++dex_register_number) {
-          if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) {
-            size_t location_catalog_entry_index =
-                dex_register_locations_.Get(entry.dex_register_locations_start_index
-                                            + index_in_dex_register_locations);
-            dex_register_map.SetLocationCatalogEntryIndex(
-                index_in_dex_register_locations,
-                location_catalog_entry_index,
-                entry.num_dex_registers,
-                location_catalog_entries_.Size());
-            ++index_in_dex_register_locations;
-          }
-        }
+        // Set the dex register location.
+        FillInDexRegisterMap(dex_register_map,
+                             entry.num_dex_registers,
+                             *entry.live_dex_registers_mask,
+                             entry.dex_register_locations_start_index);
       }
     }
 
@@ -304,54 +309,81 @@
           code_info, inline_region.start() - dex_register_locations_region.start());
 
       inline_info.SetDepth(entry.inlining_depth);
-      for (size_t j = 0; j < entry.inlining_depth; ++j) {
-        InlineInfoEntry inline_entry = inline_infos_.Get(j + entry.inline_infos_start_index);
-        inline_info.SetMethodReferenceIndexAtDepth(j, inline_entry.method_index);
+      for (size_t depth = 0; depth < entry.inlining_depth; ++depth) {
+        InlineInfoEntry inline_entry = inline_infos_.Get(depth + entry.inline_infos_start_index);
+        inline_info.SetMethodIndexAtDepth(depth, inline_entry.method_index);
+        inline_info.SetDexPcAtDepth(depth, inline_entry.dex_pc);
+        if (inline_entry.num_dex_registers == 0) {
+          // No dex map available.
+          inline_info.SetDexRegisterMapOffsetAtDepth(depth, StackMap::kNoDexRegisterMap);
+          DCHECK(inline_entry.live_dex_registers_mask == nullptr);
+        } else {
+          MemoryRegion register_region = dex_register_locations_region.Subregion(
+              next_dex_register_map_offset,
+              ComputeDexRegisterMapSize(inline_entry.num_dex_registers,
+                                        *inline_entry.live_dex_registers_mask));
+          next_dex_register_map_offset += register_region.size();
+          DexRegisterMap dex_register_map(register_region);
+          inline_info.SetDexRegisterMapOffsetAtDepth(
+            depth, register_region.start() - dex_register_locations_region.start());
+
+          FillInDexRegisterMap(dex_register_map,
+                               inline_entry.num_dex_registers,
+                               *inline_entry.live_dex_registers_mask,
+                               inline_entry.dex_register_locations_start_index);
+        }
       }
     } else {
-      if (inline_info_size != 0) {
+      if (inline_info_size_ != 0) {
         stack_map.SetInlineDescriptorOffset(code_info, StackMap::kNoInlineInfo);
       }
     }
   }
 }
 
-size_t StackMapStream::FindEntryWithTheSameDexMap(size_t entry_index) {
-  StackMapEntry entry = stack_maps_.Get(entry_index);
-  auto entries_it = dex_map_hash_to_stack_map_indices_.find(entry.dex_register_map_hash);
+void StackMapStream::FillInDexRegisterMap(DexRegisterMap dex_register_map,
+                                          uint32_t num_dex_registers,
+                                          const BitVector& live_dex_registers_mask,
+                                          uint32_t start_index_in_dex_register_locations) const {
+  dex_register_map.SetLiveBitMask(num_dex_registers, live_dex_registers_mask);
+  // Set the dex register location mapping data.
+  for (size_t dex_register_number = 0, index_in_dex_register_locations = 0;
+       dex_register_number < num_dex_registers;
+       ++dex_register_number) {
+    if (live_dex_registers_mask.IsBitSet(dex_register_number)) {
+      size_t location_catalog_entry_index = dex_register_locations_.Get(
+          start_index_in_dex_register_locations + index_in_dex_register_locations);
+      dex_register_map.SetLocationCatalogEntryIndex(
+          index_in_dex_register_locations,
+          location_catalog_entry_index,
+          num_dex_registers,
+          location_catalog_entries_.Size());
+      ++index_in_dex_register_locations;
+    }
+  }
+}
+
+size_t StackMapStream::FindEntryWithTheSameDexMap() {
+  size_t current_entry_index = stack_maps_.Size();
+  auto entries_it = dex_map_hash_to_stack_map_indices_.find(current_entry_.dex_register_map_hash);
   if (entries_it == dex_map_hash_to_stack_map_indices_.end()) {
     // We don't have a perfect hash functions so we need a list to collect all stack maps
     // which might have the same dex register map.
     GrowableArray<uint32_t> stack_map_indices(allocator_, 1);
-    stack_map_indices.Add(entry_index);
-    dex_map_hash_to_stack_map_indices_.Put(entry.dex_register_map_hash, stack_map_indices);
+    stack_map_indices.Add(current_entry_index);
+    dex_map_hash_to_stack_map_indices_.Put(current_entry_.dex_register_map_hash, stack_map_indices);
     return kNoSameDexMapFound;
   }
 
-  // TODO: We don't need to add ourselves to the map if we can guarantee that
-  // FindEntryWithTheSameDexMap is called just once per stack map entry.
-  // A good way to do this is to cache the offset in the stack map entry. This
-  // is easier to do if we add markers when the stack map constructions begins
-  // and when it ends.
-
-  // We might have collisions, so we need to check whether or not we should
-  // add the entry to the map. `needs_to_be_added` keeps track of this.
-  bool needs_to_be_added = true;
-  size_t result = kNoSameDexMapFound;
+  // We might have collisions, so we need to check whether or not we really have a match.
   for (size_t i = 0; i < entries_it->second.Size(); i++) {
     size_t test_entry_index = entries_it->second.Get(i);
-    if (test_entry_index == entry_index) {
-      needs_to_be_added = false;
-    } else if (HaveTheSameDexMaps(stack_maps_.Get(test_entry_index), entry)) {
-      result = test_entry_index;
-      needs_to_be_added = false;
-      break;
+    if (HaveTheSameDexMaps(stack_maps_.Get(test_entry_index), current_entry_)) {
+      return test_entry_index;
     }
   }
-  if (needs_to_be_added) {
-    entries_it->second.Add(entry_index);
-  }
-  return result;
+  entries_it->second.Add(current_entry_index);
+  return kNoSameDexMapFound;
 }
 
 bool StackMapStream::HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEntry& b) const {
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 990e682..4c03f9f 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -70,7 +70,21 @@
         native_pc_offset_max_(0),
         register_mask_max_(0),
         number_of_stack_maps_with_inline_info_(0),
-        dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()) {}
+        dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()),
+        current_entry_(),
+        current_inline_info_(),
+        stack_mask_size_(0),
+        inline_info_size_(0),
+        dex_register_maps_size_(0),
+        stack_maps_size_(0),
+        dex_register_location_catalog_size_(0),
+        dex_register_location_catalog_start_(0),
+        stack_maps_start_(0),
+        dex_register_maps_start_(0),
+        inline_infos_start_(0),
+        needed_size_(0),
+        current_dex_register_(0),
+        in_inline_frame_(false) {}
 
   // See runtime/stack_map.h to know what these fields contain.
   struct StackMapEntry {
@@ -84,45 +98,52 @@
     size_t inline_infos_start_index;
     BitVector* live_dex_registers_mask;
     uint32_t dex_register_map_hash;
+    size_t same_dex_register_map_as_;
   };
 
   struct InlineInfoEntry {
+    uint32_t dex_pc;
     uint32_t method_index;
+    uint32_t num_dex_registers;
+    BitVector* live_dex_registers_mask;
+    size_t dex_register_locations_start_index;
   };
 
-  void AddStackMapEntry(uint32_t dex_pc,
-                        uint32_t native_pc_offset,
-                        uint32_t register_mask,
-                        BitVector* sp_mask,
-                        uint32_t num_dex_registers,
-                        uint8_t inlining_depth);
+  void BeginStackMapEntry(uint32_t dex_pc,
+                          uint32_t native_pc_offset,
+                          uint32_t register_mask,
+                          BitVector* sp_mask,
+                          uint32_t num_dex_registers,
+                          uint8_t inlining_depth);
+  void EndStackMapEntry();
 
-  void AddDexRegisterEntry(uint16_t dex_register,
-                           DexRegisterLocation::Kind kind,
-                           int32_t value);
+  void AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value);
 
-  void AddInlineInfoEntry(uint32_t method_index);
+  void BeginInlineInfoEntry(uint32_t method_index,
+                            uint32_t dex_pc,
+                            uint32_t num_dex_registers);
+  void EndInlineInfoEntry();
 
-  size_t ComputeNeededSize();
-  size_t ComputeStackMaskSize() const;
-  size_t ComputeStackMapsSize();
-  size_t ComputeDexRegisterLocationCatalogSize() const;
-  size_t ComputeDexRegisterMapSize(const StackMapEntry& entry) const;
-  size_t ComputeDexRegisterMapsSize();
-  size_t ComputeInlineInfoSize() const;
-
-  size_t ComputeDexRegisterLocationCatalogStart() const;
-  size_t ComputeStackMapsStart() const;
-  size_t ComputeDexRegisterMapsStart();
-  size_t ComputeInlineInfoStart();
-
+  // Prepares the stream to fill in a memory region. Must be called before FillIn.
+  // Returns the size (in bytes) needed to store this stream.
+  size_t PrepareForFillIn();
   void FillIn(MemoryRegion region);
 
  private:
-  // Returns the index of an entry with the same dex register map
+  size_t ComputeDexRegisterLocationCatalogSize() const;
+  size_t ComputeDexRegisterMapSize(uint32_t num_dex_registers,
+                                   const BitVector& live_dex_registers_mask) const;
+  size_t ComputeDexRegisterMapsSize() const;
+  size_t ComputeInlineInfoSize() const;
+
+  // Returns the index of an entry with the same dex register map as the current_entry,
   // or kNoSameDexMapFound if no such entry exists.
-  size_t FindEntryWithTheSameDexMap(size_t entry_index);
+  size_t FindEntryWithTheSameDexMap();
   bool HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEntry& b) const;
+  void FillInDexRegisterMap(DexRegisterMap dex_register_map,
+                            uint32_t num_dex_registers,
+                            const BitVector& live_dex_registers_mask,
+                            uint32_t start_index_in_dex_register_locations) const;
 
   ArenaAllocator* allocator_;
   GrowableArray<StackMapEntry> stack_maps_;
@@ -146,6 +167,21 @@
 
   ArenaSafeMap<uint32_t, GrowableArray<uint32_t>> dex_map_hash_to_stack_map_indices_;
 
+  StackMapEntry current_entry_;
+  InlineInfoEntry current_inline_info_;
+  size_t stack_mask_size_;
+  size_t inline_info_size_;
+  size_t dex_register_maps_size_;
+  size_t stack_maps_size_;
+  size_t dex_register_location_catalog_size_;
+  size_t dex_register_location_catalog_start_;
+  size_t stack_maps_start_;
+  size_t dex_register_maps_start_;
+  size_t inline_infos_start_;
+  size_t needed_size_;
+  uint32_t current_dex_register_;
+  bool in_inline_frame_;
+
   static constexpr uint32_t kNoSameDexMapFound = -1;
 
   DISALLOW_COPY_AND_ASSIGN(StackMapStream);
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 8d160bc..e04fa98 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -40,11 +40,12 @@
 
   ArenaBitVector sp_mask(&arena, 0, false);
   size_t number_of_dex_registers = 2;
-  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
-  stream.AddDexRegisterEntry(0, Kind::kInStack, 0);         // Short location.
-  stream.AddDexRegisterEntry(1, Kind::kConstant, -2);       // Short location.
+  stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+  stream.AddDexRegisterEntry(Kind::kInStack, 0);         // Short location.
+  stream.AddDexRegisterEntry(Kind::kConstant, -2);       // Short location.
+  stream.EndStackMapEntry();
 
-  size_t size = stream.ComputeNeededSize();
+  size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
   stream.FillIn(region);
@@ -123,20 +124,25 @@
   sp_mask1.SetBit(2);
   sp_mask1.SetBit(4);
   size_t number_of_dex_registers = 2;
-  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2);
-  stream.AddDexRegisterEntry(0, Kind::kInStack, 0);         // Short location.
-  stream.AddDexRegisterEntry(1, Kind::kConstant, -2);       // Large location.
-  stream.AddInlineInfoEntry(42);
-  stream.AddInlineInfoEntry(82);
+  size_t number_of_dex_registers_in_inline_info = 0;
+  stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2);
+  stream.AddDexRegisterEntry(Kind::kInStack, 0);         // Short location.
+  stream.AddDexRegisterEntry(Kind::kConstant, -2);       // Large location.
+  stream.BeginInlineInfoEntry(82, 3, number_of_dex_registers_in_inline_info);
+  stream.EndInlineInfoEntry();
+  stream.BeginInlineInfoEntry(42, 2, number_of_dex_registers_in_inline_info);
+  stream.EndInlineInfoEntry();
+  stream.EndStackMapEntry();
 
   ArenaBitVector sp_mask2(&arena, 0, true);
   sp_mask2.SetBit(3);
   sp_mask1.SetBit(8);
-  stream.AddStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0);
-  stream.AddDexRegisterEntry(0, Kind::kInRegister, 18);     // Short location.
-  stream.AddDexRegisterEntry(1, Kind::kInFpuRegister, 3);   // Short location.
+  stream.BeginStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0);
+  stream.AddDexRegisterEntry(Kind::kInRegister, 18);     // Short location.
+  stream.AddDexRegisterEntry(Kind::kInFpuRegister, 3);   // Short location.
+  stream.EndStackMapEntry();
 
-  size_t size = stream.ComputeNeededSize();
+  size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
   stream.FillIn(region);
@@ -208,8 +214,10 @@
     ASSERT_TRUE(stack_map.HasInlineInfo(code_info));
     InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map);
     ASSERT_EQ(2u, inline_info.GetDepth());
-    ASSERT_EQ(42u, inline_info.GetMethodReferenceIndexAtDepth(0));
-    ASSERT_EQ(82u, inline_info.GetMethodReferenceIndexAtDepth(1));
+    ASSERT_EQ(82u, inline_info.GetMethodIndexAtDepth(0));
+    ASSERT_EQ(42u, inline_info.GetMethodIndexAtDepth(1));
+    ASSERT_EQ(3u, inline_info.GetDexPcAtDepth(0));
+    ASSERT_EQ(2u, inline_info.GetDexPcAtDepth(1));
   }
 
   // Second stack map.
@@ -273,11 +281,12 @@
 
   ArenaBitVector sp_mask(&arena, 0, false);
   uint32_t number_of_dex_registers = 2;
-  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
-  stream.AddDexRegisterEntry(0, Kind::kNone, 0);            // No location.
-  stream.AddDexRegisterEntry(1, Kind::kConstant, -2);       // Large location.
+  stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+  stream.AddDexRegisterEntry(Kind::kNone, 0);            // No location.
+  stream.AddDexRegisterEntry(Kind::kConstant, -2);       // Large location.
+  stream.EndStackMapEntry();
 
-  size_t size = stream.ComputeNeededSize();
+  size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
   stream.FillIn(region);
@@ -353,22 +362,24 @@
   ArenaBitVector sp_mask(&arena, 0, false);
   uint32_t number_of_dex_registers = 1024;
   // Create the first stack map (and its Dex register map).
-  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+  stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
   uint32_t number_of_dex_live_registers_in_dex_register_map_0 = number_of_dex_registers - 8;
   for (uint32_t i = 0; i < number_of_dex_live_registers_in_dex_register_map_0; ++i) {
     // Use two different Dex register locations to populate this map,
     // as using a single value (in the whole CodeInfo object) would
     // make this Dex register mapping data empty (see
     // art::DexRegisterMap::SingleEntrySizeInBits).
-    stream.AddDexRegisterEntry(i, Kind::kConstant, i % 2);  // Short location.
+    stream.AddDexRegisterEntry(Kind::kConstant, i % 2);  // Short location.
   }
+  stream.EndStackMapEntry();
   // Create the second stack map (and its Dex register map).
-  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+  stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
   for (uint32_t i = 0; i < number_of_dex_registers; ++i) {
-    stream.AddDexRegisterEntry(i, Kind::kConstant, 0);  // Short location.
+    stream.AddDexRegisterEntry(Kind::kConstant, 0);  // Short location.
   }
+  stream.EndStackMapEntry();
 
-  size_t size = stream.ComputeNeededSize();
+  size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
   stream.FillIn(region);
@@ -413,19 +424,22 @@
   ArenaBitVector sp_mask(&arena, 0, false);
   uint32_t number_of_dex_registers = 2;
   // First stack map.
-  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
-  stream.AddDexRegisterEntry(0, Kind::kInRegister, 0);  // Short location.
-  stream.AddDexRegisterEntry(1, Kind::kConstant, -2);   // Large location.
+  stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+  stream.AddDexRegisterEntry(Kind::kInRegister, 0);  // Short location.
+  stream.AddDexRegisterEntry(Kind::kConstant, -2);   // Large location.
+  stream.EndStackMapEntry();
   // Second stack map, which should share the same dex register map.
-  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
-  stream.AddDexRegisterEntry(0, Kind::kInRegister, 0);  // Short location.
-  stream.AddDexRegisterEntry(1, Kind::kConstant, -2);   // Large location.
+  stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+  stream.AddDexRegisterEntry(Kind::kInRegister, 0);  // Short location.
+  stream.AddDexRegisterEntry(Kind::kConstant, -2);   // Large location.
+  stream.EndStackMapEntry();
   // Third stack map (doesn't share the dex register map).
-  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
-  stream.AddDexRegisterEntry(0, Kind::kInRegister, 2);  // Short location.
-  stream.AddDexRegisterEntry(1, Kind::kConstant, -2);   // Large location.
+  stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+  stream.AddDexRegisterEntry(Kind::kInRegister, 2);  // Short location.
+  stream.AddDexRegisterEntry(Kind::kConstant, -2);   // Large location.
+  stream.EndStackMapEntry();
 
-  size_t size = stream.ComputeNeededSize();
+  size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
   stream.FillIn(region);
@@ -462,9 +476,10 @@
 
   ArenaBitVector sp_mask(&arena, 0, false);
   uint32_t number_of_dex_registers = 0;
-  stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+  stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+  stream.EndStackMapEntry();
 
-  size_t size = stream.ComputeNeededSize();
+  size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
   stream.FillIn(region);
@@ -490,4 +505,167 @@
   ASSERT_FALSE(stack_map.HasInlineInfo(code_info));
 }
 
+TEST(StackMapTest, InlineTest) {
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  StackMapStream stream(&arena);
+
+  ArenaBitVector sp_mask1(&arena, 0, true);
+  sp_mask1.SetBit(2);
+  sp_mask1.SetBit(4);
+
+  // First stack map.
+  stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask1, 2, 2);
+  stream.AddDexRegisterEntry(Kind::kInStack, 0);
+  stream.AddDexRegisterEntry(Kind::kConstant, 4);
+
+  stream.BeginInlineInfoEntry(42, 2, 1);
+  stream.AddDexRegisterEntry(Kind::kInStack, 8);
+  stream.EndInlineInfoEntry();
+  stream.BeginInlineInfoEntry(82, 3, 3);
+  stream.AddDexRegisterEntry(Kind::kInStack, 16);
+  stream.AddDexRegisterEntry(Kind::kConstant, 20);
+  stream.AddDexRegisterEntry(Kind::kInRegister, 15);
+  stream.EndInlineInfoEntry();
+
+  stream.EndStackMapEntry();
+
+  // Second stack map.
+  stream.BeginStackMapEntry(2, 22, 0x3, &sp_mask1, 2, 3);
+  stream.AddDexRegisterEntry(Kind::kInStack, 56);
+  stream.AddDexRegisterEntry(Kind::kConstant, 0);
+
+  stream.BeginInlineInfoEntry(42, 2, 1);
+  stream.AddDexRegisterEntry(Kind::kInStack, 12);
+  stream.EndInlineInfoEntry();
+  stream.BeginInlineInfoEntry(82, 3, 3);
+  stream.AddDexRegisterEntry(Kind::kInStack, 80);
+  stream.AddDexRegisterEntry(Kind::kConstant, 10);
+  stream.AddDexRegisterEntry(Kind::kInRegister, 5);
+  stream.EndInlineInfoEntry();
+  stream.BeginInlineInfoEntry(52, 5, 0);
+  stream.EndInlineInfoEntry();
+
+  stream.EndStackMapEntry();
+
+  // Third stack map.
+  stream.BeginStackMapEntry(4, 56, 0x3, &sp_mask1, 2, 0);
+  stream.AddDexRegisterEntry(Kind::kNone, 0);
+  stream.AddDexRegisterEntry(Kind::kConstant, 4);
+  stream.EndStackMapEntry();
+
+  // Fourth stack map.
+  stream.BeginStackMapEntry(6, 78, 0x3, &sp_mask1, 2, 3);
+  stream.AddDexRegisterEntry(Kind::kInStack, 56);
+  stream.AddDexRegisterEntry(Kind::kConstant, 0);
+
+  stream.BeginInlineInfoEntry(42, 2, 0);
+  stream.EndInlineInfoEntry();
+  stream.BeginInlineInfoEntry(52, 5, 1);
+  stream.AddDexRegisterEntry(Kind::kInRegister, 2);
+  stream.EndInlineInfoEntry();
+  stream.BeginInlineInfoEntry(52, 10, 2);
+  stream.AddDexRegisterEntry(Kind::kNone, 0);
+  stream.AddDexRegisterEntry(Kind::kInRegister, 3);
+  stream.EndInlineInfoEntry();
+
+  stream.EndStackMapEntry();
+
+  size_t size = stream.PrepareForFillIn();
+  void* memory = arena.Alloc(size, kArenaAllocMisc);
+  MemoryRegion region(memory, size);
+  stream.FillIn(region);
+
+  CodeInfo ci(region);
+
+  {
+    // Verify first stack map.
+    StackMap sm0 = ci.GetStackMapAt(0);
+
+    DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0, 2);
+    ASSERT_EQ(0, dex_registers0.GetStackOffsetInBytes(0, 2, ci));
+    ASSERT_EQ(4, dex_registers0.GetConstant(1, 2, ci));
+
+    InlineInfo if0 = ci.GetInlineInfoOf(sm0);
+    ASSERT_EQ(2u, if0.GetDepth());
+    ASSERT_EQ(2u, if0.GetDexPcAtDepth(0));
+    ASSERT_EQ(42u, if0.GetMethodIndexAtDepth(0));
+    ASSERT_EQ(3u, if0.GetDexPcAtDepth(1));
+    ASSERT_EQ(82u, if0.GetMethodIndexAtDepth(1));
+
+    DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, if0, 1);
+    ASSERT_EQ(8, dex_registers1.GetStackOffsetInBytes(0, 1, ci));
+
+    DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(1, if0, 3);
+    ASSERT_EQ(16, dex_registers2.GetStackOffsetInBytes(0, 3, ci));
+    ASSERT_EQ(20, dex_registers2.GetConstant(1, 3, ci));
+    ASSERT_EQ(15, dex_registers2.GetMachineRegister(2, 3, ci));
+  }
+
+  {
+    // Verify second stack map.
+    StackMap sm1 = ci.GetStackMapAt(1);
+
+    DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm1, 2);
+    ASSERT_EQ(56, dex_registers0.GetStackOffsetInBytes(0, 2, ci));
+    ASSERT_EQ(0, dex_registers0.GetConstant(1, 2, ci));
+
+    InlineInfo if1 = ci.GetInlineInfoOf(sm1);
+    ASSERT_EQ(3u, if1.GetDepth());
+    ASSERT_EQ(2u, if1.GetDexPcAtDepth(0));
+    ASSERT_EQ(42u, if1.GetMethodIndexAtDepth(0));
+    ASSERT_EQ(3u, if1.GetDexPcAtDepth(1));
+    ASSERT_EQ(82u, if1.GetMethodIndexAtDepth(1));
+    ASSERT_EQ(5u, if1.GetDexPcAtDepth(2));
+    ASSERT_EQ(52u, if1.GetMethodIndexAtDepth(2));
+
+    DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, if1, 1);
+    ASSERT_EQ(12, dex_registers1.GetStackOffsetInBytes(0, 1, ci));
+
+    DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(1, if1, 3);
+    ASSERT_EQ(80, dex_registers2.GetStackOffsetInBytes(0, 3, ci));
+    ASSERT_EQ(10, dex_registers2.GetConstant(1, 3, ci));
+    ASSERT_EQ(5, dex_registers2.GetMachineRegister(2, 3, ci));
+
+    ASSERT_FALSE(if1.HasDexRegisterMapAtDepth(2));
+  }
+
+  {
+    // Verify third stack map.
+    StackMap sm2 = ci.GetStackMapAt(2);
+
+    DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm2, 2);
+    ASSERT_FALSE(dex_registers0.IsDexRegisterLive(0));
+    ASSERT_EQ(4, dex_registers0.GetConstant(1, 2, ci));
+    ASSERT_FALSE(sm2.HasInlineInfo(ci));
+  }
+
+  {
+    // Verify fourth stack map.
+    StackMap sm3 = ci.GetStackMapAt(3);
+
+    DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm3, 2);
+    ASSERT_EQ(56, dex_registers0.GetStackOffsetInBytes(0, 2, ci));
+    ASSERT_EQ(0, dex_registers0.GetConstant(1, 2, ci));
+
+    InlineInfo if2 = ci.GetInlineInfoOf(sm3);
+    ASSERT_EQ(3u, if2.GetDepth());
+    ASSERT_EQ(2u, if2.GetDexPcAtDepth(0));
+    ASSERT_EQ(42u, if2.GetMethodIndexAtDepth(0));
+    ASSERT_EQ(5u, if2.GetDexPcAtDepth(1));
+    ASSERT_EQ(52u, if2.GetMethodIndexAtDepth(1));
+    ASSERT_EQ(10u, if2.GetDexPcAtDepth(2));
+    ASSERT_EQ(52u, if2.GetMethodIndexAtDepth(2));
+
+    ASSERT_FALSE(if2.HasDexRegisterMapAtDepth(0));
+
+    DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(1, if2, 1);
+    ASSERT_EQ(2, dex_registers1.GetMachineRegister(0, 1, ci));
+
+    DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(2, if2, 2);
+    ASSERT_FALSE(dex_registers2.IsDexRegisterLive(0));
+    ASSERT_EQ(3, dex_registers2.GetMachineRegister(1, 2, ci));
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/suspend_check_test.cc b/compiler/optimizing/suspend_check_test.cc
index a5a0eb2..5ca66a1 100644
--- a/compiler/optimizing/suspend_check_test.cc
+++ b/compiler/optimizing/suspend_check_test.cc
@@ -30,7 +30,7 @@
 static void TestCode(const uint16_t* data) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   bool graph_built = builder.BuildGraph(*item);
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index c410660..eca6f5a 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -860,8 +860,6 @@
   // Set up call to Thread::Current()->pDeliverException.
   __ LoadFromOffset(kLoadWord, R12, TR, QUICK_ENTRYPOINT_OFFSET(4, pDeliverException).Int32Value());
   __ blx(R12);
-  // Call never returns.
-  __ bkpt(0);
 #undef __
 }
 
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 313f365..dee8287 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -398,6 +398,8 @@
                    Condition cond = AL) = 0;
   virtual void mls(Register rd, Register rn, Register rm, Register ra,
                    Condition cond = AL) = 0;
+  virtual void smull(Register rd_lo, Register rd_hi, Register rn, Register rm,
+                     Condition cond = AL) = 0;
   virtual void umull(Register rd_lo, Register rd_hi, Register rn, Register rm,
                      Condition cond = AL) = 0;
 
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index 9579691..6e165fc 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -200,6 +200,13 @@
 }
 
 
+void Arm32Assembler::smull(Register rd_lo, Register rd_hi, Register rn,
+                           Register rm, Condition cond) {
+  // Assembler registers rd_lo, rd_hi, rn, rm are encoded as rd, rn, rm, rs.
+  EmitMulOp(cond, B23 | B22, rd_lo, rd_hi, rn, rm);
+}
+
+
 void Arm32Assembler::umull(Register rd_lo, Register rd_hi, Register rn,
                            Register rm, Condition cond) {
   // Assembler registers rd_lo, rd_hi, rn, rm are encoded as rd, rn, rm, rs.
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index b922d66..55ec7b4 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -90,6 +90,8 @@
            Condition cond = AL) OVERRIDE;
   void mls(Register rd, Register rn, Register rm, Register ra,
            Condition cond = AL) OVERRIDE;
+  void smull(Register rd_lo, Register rd_hi, Register rn, Register rm,
+             Condition cond = AL) OVERRIDE;
   void umull(Register rd_lo, Register rd_hi, Register rn, Register rm,
              Condition cond = AL) OVERRIDE;
 
diff --git a/compiler/utils/arm/assembler_arm32_test.cc b/compiler/utils/arm/assembler_arm32_test.cc
index 4a0ae0b..efd517b 100644
--- a/compiler/utils/arm/assembler_arm32_test.cc
+++ b/compiler/utils/arm/assembler_arm32_test.cc
@@ -293,12 +293,29 @@
     f();
   }
 
+  // NOTE: Only support simple test like "aaa=bbb"
+  bool EvalFilterString(std::string filter) {
+    if (filter.compare("") == 0) {
+      return false;
+    }
+
+    size_t equal_sign_index = filter.find('=');
+    if (equal_sign_index == std::string::npos) {
+      EXPECT_TRUE(false) << "Unsupported filter string.";
+    }
+
+    std::string lhs = filter.substr(0, equal_sign_index);
+    std::string rhs = filter.substr(equal_sign_index + 1, std::string::npos);
+    return lhs.compare(rhs) == 0;
+  }
+
   void TemplateHelper(std::function<void(arm::Register)> f, int depth ATTRIBUTE_UNUSED,
-                      bool without_pc,
-                      std::string fmt, std::ostringstream& oss) {
+                      bool without_pc, std::string fmt, std::string filter,
+                      std::ostringstream& oss) {
     std::vector<arm::Register*> registers = without_pc ? GetRegistersWithoutPC() : GetRegisters();
     for (auto reg : registers) {
       std::string after_reg = fmt;
+      std::string after_reg_filter = filter;
 
       std::string reg_string = GetRegName<RegisterView::kUsePrimaryName>(*reg);
       size_t reg_index;
@@ -308,14 +325,23 @@
         after_reg.replace(reg_index, strlen(reg_token), reg_string);
       }
 
+      while ((reg_index = after_reg_filter.find(reg_token)) != std::string::npos) {
+        after_reg_filter.replace(reg_index, strlen(reg_token), reg_string);
+      }
+      if (EvalFilterString(after_reg_filter)) {
+        continue;
+      }
+
       ExecuteAndPrint([&] () { f(*reg); }, after_reg, oss);
     }
   }
 
   void TemplateHelper(std::function<void(const arm::ShifterOperand&)> f, int depth ATTRIBUTE_UNUSED,
-                      bool without_pc ATTRIBUTE_UNUSED, std::string fmt, std::ostringstream& oss) {
+                      bool without_pc ATTRIBUTE_UNUSED, std::string fmt, std::string filter,
+                      std::ostringstream& oss) {
     for (const arm::ShifterOperand& shift : GetShiftOperands()) {
       std::string after_shift = fmt;
+      std::string after_shift_filter = filter;
 
       std::string shift_string = GetShiftString(shift);
       size_t shift_index;
@@ -323,30 +349,48 @@
         after_shift.replace(shift_index, ConstexprStrLen(SHIFT_TOKEN), shift_string);
       }
 
+      while ((shift_index = after_shift_filter.find(SHIFT_TOKEN)) != std::string::npos) {
+        after_shift_filter.replace(shift_index, ConstexprStrLen(SHIFT_TOKEN), shift_string);
+      }
+      if (EvalFilterString(after_shift_filter)) {
+        continue;
+      }
+
       ExecuteAndPrint([&] () { f(shift); }, after_shift, oss);
     }
   }
 
   void TemplateHelper(std::function<void(arm::Condition)> f, int depth ATTRIBUTE_UNUSED,
-                      bool without_pc ATTRIBUTE_UNUSED, std::string fmt, std::ostringstream& oss) {
+                      bool without_pc ATTRIBUTE_UNUSED, std::string fmt, std::string filter,
+                      std::ostringstream& oss) {
     for (arm::Condition c : GetConditions()) {
       std::string after_cond = fmt;
+      std::string after_cond_filter = filter;
 
       size_t cond_index = after_cond.find(COND_TOKEN);
       if (cond_index != std::string::npos) {
         after_cond.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
       }
 
+      cond_index = after_cond_filter.find(COND_TOKEN);
+      if (cond_index != std::string::npos) {
+        after_cond_filter.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+      }
+      if (EvalFilterString(after_cond_filter)) {
+        continue;
+      }
+
       ExecuteAndPrint([&] () { f(c); }, after_cond, oss);
     }
   }
 
   template <typename... Args>
   void TemplateHelper(std::function<void(arm::Register, Args...)> f, int depth, bool without_pc,
-                      std::string fmt, std::ostringstream& oss) {
+                      std::string fmt, std::string filter, std::ostringstream& oss) {
     std::vector<arm::Register*> registers = without_pc ? GetRegistersWithoutPC() : GetRegisters();
     for (auto reg : registers) {
       std::string after_reg = fmt;
+      std::string after_reg_filter = filter;
 
       std::string reg_string = GetRegName<RegisterView::kUsePrimaryName>(*reg);
       size_t reg_index;
@@ -356,17 +400,26 @@
         after_reg.replace(reg_index, strlen(reg_token), reg_string);
       }
 
+      while ((reg_index = after_reg_filter.find(reg_token)) != std::string::npos) {
+        after_reg_filter.replace(reg_index, strlen(reg_token), reg_string);
+      }
+      if (EvalFilterString(after_reg_filter)) {
+        continue;
+      }
+
       auto lambda = [&] (Args... args) { f(*reg, args...); };  // NOLINT [readability/braces] [4]
       TemplateHelper(std::function<void(Args...)>(lambda), depth + 1, without_pc,
-          after_reg, oss);
+          after_reg, after_reg_filter, oss);
     }
   }
 
   template <typename... Args>
   void TemplateHelper(std::function<void(const arm::ShifterOperand&, Args...)> f, int depth,
-                      bool without_pc, std::string fmt, std::ostringstream& oss) {
+                      bool without_pc, std::string fmt, std::string filter,
+                      std::ostringstream& oss) {
     for (const arm::ShifterOperand& shift : GetShiftOperands()) {
       std::string after_shift = fmt;
+      std::string after_shift_filter = filter;
 
       std::string shift_string = GetShiftString(shift);
       size_t shift_index;
@@ -374,26 +427,42 @@
         after_shift.replace(shift_index, ConstexprStrLen(SHIFT_TOKEN), shift_string);
       }
 
+      while ((shift_index = after_shift_filter.find(SHIFT_TOKEN)) != std::string::npos) {
+        after_shift_filter.replace(shift_index, ConstexprStrLen(SHIFT_TOKEN), shift_string);
+      }
+      if (EvalFilterString(after_shift_filter)) {
+        continue;
+      }
+
       auto lambda = [&] (Args... args) { f(shift, args...); };  // NOLINT [readability/braces] [4]
       TemplateHelper(std::function<void(Args...)>(lambda), depth, without_pc,
-          after_shift, oss);
+          after_shift, after_shift_filter, oss);
     }
   }
 
   template <typename... Args>
   void TemplateHelper(std::function<void(arm::Condition, Args...)> f, int depth, bool without_pc,
-                      std::string fmt, std::ostringstream& oss) {
+                      std::string fmt, std::string filter, std::ostringstream& oss) {
     for (arm::Condition c : GetConditions()) {
       std::string after_cond = fmt;
+      std::string after_cond_filter = filter;
 
       size_t cond_index = after_cond.find(COND_TOKEN);
       if (cond_index != std::string::npos) {
         after_cond.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
       }
 
+      cond_index = after_cond_filter.find(COND_TOKEN);
+      if (cond_index != std::string::npos) {
+        after_cond_filter.replace(cond_index, ConstexprStrLen(IMM1_TOKEN), GetConditionString(c));
+      }
+      if (EvalFilterString(after_cond_filter)) {
+        continue;
+      }
+
       auto lambda = [&] (Args... args) { f(c, args...); };  // NOLINT [readability/braces] [4]
       TemplateHelper(std::function<void(Args...)>(lambda), depth, without_pc,
-          after_cond, oss);
+          after_cond, after_cond_filter, oss);
     }
   }
 
@@ -421,13 +490,13 @@
 
   template <typename... Args>
   void GenericTemplateHelper(std::function<void(Args...)> f, bool without_pc,
-                             std::string fmt, std::string test_name) {
+                             std::string fmt, std::string test_name, std::string filter) {
     first_ = false;
     WarnOnCombinations(CountHelper<Args...>(without_pc));
 
     std::ostringstream oss;
 
-    TemplateHelper(f, 0, without_pc, fmt, oss);
+    TemplateHelper(f, 0, without_pc, fmt, filter, oss);
 
     oss << "\n";  // Trailing newline.
 
@@ -436,26 +505,26 @@
 
   template <typename... Args>
   void T2Helper(void (arm::Arm32Assembler::*f)(Args...), bool without_pc, std::string fmt,
-                std::string test_name) {
-    GenericTemplateHelper(GetBoundFunction2(f), without_pc, fmt, test_name);
+                std::string test_name, std::string filter = "") {
+    GenericTemplateHelper(GetBoundFunction2(f), without_pc, fmt, test_name, filter);
   }
 
   template <typename... Args>
   void T3Helper(void (arm::Arm32Assembler::*f)(Args...), bool without_pc, std::string fmt,
-      std::string test_name) {
-    GenericTemplateHelper(GetBoundFunction3(f), without_pc, fmt, test_name);
+      std::string test_name, std::string filter = "") {
+    GenericTemplateHelper(GetBoundFunction3(f), without_pc, fmt, test_name, filter);
   }
 
   template <typename... Args>
   void T4Helper(void (arm::Arm32Assembler::*f)(Args...), bool without_pc, std::string fmt,
-      std::string test_name) {
-    GenericTemplateHelper(GetBoundFunction4(f), without_pc, fmt, test_name);
+      std::string test_name, std::string filter = "") {
+    GenericTemplateHelper(GetBoundFunction4(f), without_pc, fmt, test_name, filter);
   }
 
   template <typename... Args>
   void T5Helper(void (arm::Arm32Assembler::*f)(Args...), bool without_pc, std::string fmt,
-      std::string test_name) {
-    GenericTemplateHelper(GetBoundFunction5(f), without_pc, fmt, test_name);
+      std::string test_name, std::string filter = "") {
+    GenericTemplateHelper(GetBoundFunction5(f), without_pc, fmt, test_name, filter);
   }
 
  private:
@@ -565,15 +634,18 @@
 }
 
 TEST_F(AssemblerArm32Test, Mla) {
-  T5Helper(&arm::Arm32Assembler::mla, true, "mla{cond} {reg1}, {reg2}, {reg3}, {reg4}", "mul");
+  T5Helper(&arm::Arm32Assembler::mla, true, "mla{cond} {reg1}, {reg2}, {reg3}, {reg4}", "mla");
 }
 
-/* TODO: Needs support to filter out register combinations, as rdhi must not be equal to rdlo.
 TEST_F(AssemblerArm32Test, Umull) {
   T5Helper(&arm::Arm32Assembler::umull, true, "umull{cond} {reg1}, {reg2}, {reg3}, {reg4}",
-           "umull");
+           "umull", "{reg1}={reg2}");  // Skip the cases where reg1 == reg2.
 }
-*/
+
+TEST_F(AssemblerArm32Test, Smull) {
+  T5Helper(&arm::Arm32Assembler::smull, true, "smull{cond} {reg1}, {reg2}, {reg3}, {reg4}",
+           "smull", "{reg1}={reg2}");  // Skip the cases where reg1 == reg2.
+}
 
 TEST_F(AssemblerArm32Test, Sdiv) {
   T4Helper(&arm::Arm32Assembler::sdiv, true, "sdiv{cond} {reg1}, {reg2}, {reg3}", "sdiv");
@@ -655,9 +727,10 @@
   T4Helper(&arm::Arm32Assembler::rsc, true, "rsc{cond} {reg1}, {reg2}, {shift}", "rsc");
 }
 
-/* TODO: Needs support to filter out register combinations, as reg1 must not be equal to reg3.
+/* TODO: Need better filter support.
 TEST_F(AssemblerArm32Test, Strex) {
-  RRRCWithoutPCHelper(&arm::Arm32Assembler::strex, "strex{cond} {reg1}, {reg2}, [{reg3}]", "strex");
+  T4Helper(&arm::Arm32Assembler::strex, "strex{cond} {reg1}, {reg2}, [{reg3}]", "strex",
+           "{reg1}={reg2}||{reg1}={reg3}");  // Skip the cases where reg1 == reg2 || reg1 == reg3.
 }
 */
 
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 3b42f63..e7cf26e 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -238,6 +238,24 @@
 }
 
 
+void Thumb2Assembler::smull(Register rd_lo, Register rd_hi, Register rn,
+                            Register rm, Condition cond) {
+  CheckCondition(cond);
+
+  uint32_t op1 = 0U /* 0b000; */;
+  uint32_t op2 = 0U /* 0b0000 */;
+  int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 | B24 | B23 |
+      op1 << 20 |
+      op2 << 4 |
+      static_cast<uint32_t>(rd_lo) << 12 |
+      static_cast<uint32_t>(rd_hi) << 8 |
+      static_cast<uint32_t>(rn) << 16 |
+      static_cast<uint32_t>(rm);
+
+  Emit32(encoding);
+}
+
+
 void Thumb2Assembler::umull(Register rd_lo, Register rd_hi, Register rn,
                             Register rm, Condition cond) {
   CheckCondition(cond);
@@ -740,13 +758,6 @@
     return true;
   }
 
-  // Check for MOV with an ROR.
-  if (opcode == MOV && so.IsRegister() && so.IsShift() && so.GetShift() == ROR) {
-    if (so.GetImmediate() != 0) {
-      return true;
-    }
-  }
-
   bool rn_is_valid = true;
 
   // Check for single operand instructions and ADD/SUB.
@@ -792,6 +803,19 @@
     }
   }
 
+  // Check for register shift operand.
+  if (so.IsRegister() && so.IsShift()) {
+    if (opcode != MOV) {
+      return true;
+    }
+    // Check for MOV with an ROR.
+    if (so.GetShift() == ROR) {
+      if (so.GetImmediate() != 0) {
+        return true;
+      }
+    }
+  }
+
   // The instruction can be encoded in 16 bits.
   return false;
 }
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index e33c240..17eae8b 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -112,6 +112,8 @@
            Condition cond = AL) OVERRIDE;
   void mls(Register rd, Register rn, Register rm, Register ra,
            Condition cond = AL) OVERRIDE;
+  void smull(Register rd_lo, Register rd_hi, Register rn, Register rm,
+             Condition cond = AL) OVERRIDE;
   void umull(Register rd_lo, Register rd_hi, Register rn, Register rm,
              Condition cond = AL) OVERRIDE;
 
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index 5f5561a..733441b 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -89,23 +89,24 @@
   EXPECT_TRUE(CheckTools());
 }
 
+#define __ GetAssembler()->
 
 TEST_F(AssemblerThumb2Test, Sbfx) {
-  GetAssembler()->sbfx(arm::R0, arm::R1, 0, 1);
-  GetAssembler()->sbfx(arm::R0, arm::R1, 0, 8);
-  GetAssembler()->sbfx(arm::R0, arm::R1, 0, 16);
-  GetAssembler()->sbfx(arm::R0, arm::R1, 0, 32);
+  __ sbfx(arm::R0, arm::R1, 0, 1);
+  __ sbfx(arm::R0, arm::R1, 0, 8);
+  __ sbfx(arm::R0, arm::R1, 0, 16);
+  __ sbfx(arm::R0, arm::R1, 0, 32);
 
-  GetAssembler()->sbfx(arm::R0, arm::R1, 8, 1);
-  GetAssembler()->sbfx(arm::R0, arm::R1, 8, 8);
-  GetAssembler()->sbfx(arm::R0, arm::R1, 8, 16);
-  GetAssembler()->sbfx(arm::R0, arm::R1, 8, 24);
+  __ sbfx(arm::R0, arm::R1, 8, 1);
+  __ sbfx(arm::R0, arm::R1, 8, 8);
+  __ sbfx(arm::R0, arm::R1, 8, 16);
+  __ sbfx(arm::R0, arm::R1, 8, 24);
 
-  GetAssembler()->sbfx(arm::R0, arm::R1, 16, 1);
-  GetAssembler()->sbfx(arm::R0, arm::R1, 16, 8);
-  GetAssembler()->sbfx(arm::R0, arm::R1, 16, 16);
+  __ sbfx(arm::R0, arm::R1, 16, 1);
+  __ sbfx(arm::R0, arm::R1, 16, 8);
+  __ sbfx(arm::R0, arm::R1, 16, 16);
 
-  GetAssembler()->sbfx(arm::R0, arm::R1, 31, 1);
+  __ sbfx(arm::R0, arm::R1, 31, 1);
 
   const char* expected =
       "sbfx r0, r1, #0, #1\n"
@@ -127,21 +128,21 @@
 }
 
 TEST_F(AssemblerThumb2Test, Ubfx) {
-  GetAssembler()->ubfx(arm::R0, arm::R1, 0, 1);
-  GetAssembler()->ubfx(arm::R0, arm::R1, 0, 8);
-  GetAssembler()->ubfx(arm::R0, arm::R1, 0, 16);
-  GetAssembler()->ubfx(arm::R0, arm::R1, 0, 32);
+  __ ubfx(arm::R0, arm::R1, 0, 1);
+  __ ubfx(arm::R0, arm::R1, 0, 8);
+  __ ubfx(arm::R0, arm::R1, 0, 16);
+  __ ubfx(arm::R0, arm::R1, 0, 32);
 
-  GetAssembler()->ubfx(arm::R0, arm::R1, 8, 1);
-  GetAssembler()->ubfx(arm::R0, arm::R1, 8, 8);
-  GetAssembler()->ubfx(arm::R0, arm::R1, 8, 16);
-  GetAssembler()->ubfx(arm::R0, arm::R1, 8, 24);
+  __ ubfx(arm::R0, arm::R1, 8, 1);
+  __ ubfx(arm::R0, arm::R1, 8, 8);
+  __ ubfx(arm::R0, arm::R1, 8, 16);
+  __ ubfx(arm::R0, arm::R1, 8, 24);
 
-  GetAssembler()->ubfx(arm::R0, arm::R1, 16, 1);
-  GetAssembler()->ubfx(arm::R0, arm::R1, 16, 8);
-  GetAssembler()->ubfx(arm::R0, arm::R1, 16, 16);
+  __ ubfx(arm::R0, arm::R1, 16, 1);
+  __ ubfx(arm::R0, arm::R1, 16, 8);
+  __ ubfx(arm::R0, arm::R1, 16, 16);
 
-  GetAssembler()->ubfx(arm::R0, arm::R1, 31, 1);
+  __ ubfx(arm::R0, arm::R1, 31, 1);
 
   const char* expected =
       "ubfx r0, r1, #0, #1\n"
@@ -163,7 +164,7 @@
 }
 
 TEST_F(AssemblerThumb2Test, Vmstat) {
-  GetAssembler()->vmstat();
+  __ vmstat();
 
   const char* expected = "vmrs APSR_nzcv, FPSCR\n";
 
@@ -171,10 +172,10 @@
 }
 
 TEST_F(AssemblerThumb2Test, ldrexd) {
-  GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R0);
-  GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R1);
-  GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R2);
-  GetAssembler()->ldrexd(arm::R5, arm::R3, arm::R7);
+  __ ldrexd(arm::R0, arm::R1, arm::R0);
+  __ ldrexd(arm::R0, arm::R1, arm::R1);
+  __ ldrexd(arm::R0, arm::R1, arm::R2);
+  __ ldrexd(arm::R5, arm::R3, arm::R7);
 
   const char* expected =
       "ldrexd r0, r1, [r0]\n"
@@ -185,10 +186,10 @@
 }
 
 TEST_F(AssemblerThumb2Test, strexd) {
-  GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R0);
-  GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R1);
-  GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R2);
-  GetAssembler()->strexd(arm::R9, arm::R5, arm::R3, arm::R7);
+  __ strexd(arm::R9, arm::R0, arm::R1, arm::R0);
+  __ strexd(arm::R9, arm::R0, arm::R1, arm::R1);
+  __ strexd(arm::R9, arm::R0, arm::R1, arm::R2);
+  __ strexd(arm::R9, arm::R5, arm::R3, arm::R7);
 
   const char* expected =
       "strexd r9, r0, r1, [r0]\n"
@@ -199,9 +200,9 @@
 }
 
 TEST_F(AssemblerThumb2Test, LdrdStrd) {
-  GetAssembler()->ldrd(arm::R0, arm::Address(arm::R2, 8));
-  GetAssembler()->ldrd(arm::R0, arm::Address(arm::R12));
-  GetAssembler()->strd(arm::R0, arm::Address(arm::R2, 8));
+  __ ldrd(arm::R0, arm::Address(arm::R2, 8));
+  __ ldrd(arm::R0, arm::Address(arm::R12));
+  __ strd(arm::R0, arm::Address(arm::R2, 8));
 
   const char* expected =
       "ldrd r0, r1, [r2, #8]\n"
@@ -211,7 +212,6 @@
 }
 
 TEST_F(AssemblerThumb2Test, eor) {
-#define __ GetAssembler()->
   __ eor(arm::R1, arm::R1, arm::ShifterOperand(arm::R0));
   __ eor(arm::R1, arm::R0, arm::ShifterOperand(arm::R1));
   __ eor(arm::R1, arm::R8, arm::ShifterOperand(arm::R0));
@@ -230,23 +230,47 @@
 TEST_F(AssemblerThumb2Test, sub) {
   __ subs(arm::R1, arm::R0, arm::ShifterOperand(42));
   __ sub(arm::R1, arm::R0, arm::ShifterOperand(42));
+  __ subs(arm::R1, arm::R0, arm::ShifterOperand(arm::R2, arm::ASR, 31));
+  __ sub(arm::R1, arm::R0, arm::ShifterOperand(arm::R2, arm::ASR, 31));
 
   const char* expected =
       "subs r1, r0, #42\n"
-      "subw r1, r0, #42\n";
+      "subw r1, r0, #42\n"
+      "subs r1, r0, r2, asr #31\n"
+      "sub r1, r0, r2, asr #31\n";
   DriverStr(expected, "sub");
 }
 
 TEST_F(AssemblerThumb2Test, add) {
   __ adds(arm::R1, arm::R0, arm::ShifterOperand(42));
   __ add(arm::R1, arm::R0, arm::ShifterOperand(42));
+  __ adds(arm::R1, arm::R0, arm::ShifterOperand(arm::R2, arm::ASR, 31));
+  __ add(arm::R1, arm::R0, arm::ShifterOperand(arm::R2, arm::ASR, 31));
 
   const char* expected =
       "adds r1, r0, #42\n"
-      "addw r1, r0, #42\n";
+      "addw r1, r0, #42\n"
+      "adds r1, r0, r2, asr #31\n"
+      "add r1, r0, r2, asr #31\n";
   DriverStr(expected, "add");
 }
 
+TEST_F(AssemblerThumb2Test, umull) {
+  __ umull(arm::R0, arm::R1, arm::R2, arm::R3);
+
+  const char* expected =
+      "umull r0, r1, r2, r3\n";
+  DriverStr(expected, "umull");
+}
+
+TEST_F(AssemblerThumb2Test, smull) {
+  __ smull(arm::R0, arm::R1, arm::R2, arm::R3);
+
+  const char* expected =
+      "smull r0, r1, r2, r3\n";
+  DriverStr(expected, "smull");
+}
+
 TEST_F(AssemblerThumb2Test, StoreWordToThumbOffset) {
   arm::StoreOperandType type = arm::kStoreWord;
   int32_t offset = 4092;
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 3fe1a31..a339633 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -19,6 +19,7 @@
 
 #include "assembler.h"
 
+#include "assembler_test_base.h"
 #include "common_runtime_test.h"  // For ScratchFile
 
 #include <cstdio>
@@ -29,19 +30,11 @@
 
 namespace art {
 
-// If you want to take a look at the differences between the ART assembler and GCC, set this flag
-// to true. The disassembled files will then remain in the tmp directory.
-static constexpr bool kKeepDisassembledFiles = false;
-
 // Helper for a constexpr string length.
 constexpr size_t ConstexprStrLen(char const* str, size_t count = 0) {
   return ('\0' == str[0]) ? count : ConstexprStrLen(str+1, count+1);
 }
 
-// Use a glocal static variable to keep the same name for all test data. Else we'll just spam the
-// temp directory.
-static std::string tmpnam_;
-
 enum class RegisterView {  // private
   kUsePrimaryName,
   kUseSecondaryName,
@@ -59,12 +52,12 @@
   typedef std::string (*TestFn)(AssemblerTest* assembler_test, Ass* assembler);
 
   void DriverFn(TestFn f, std::string test_name) {
-    Driver(f(this, assembler_.get()), test_name);
+    DriverWrapper(f(this, assembler_.get()), test_name);
   }
 
   // This driver assumes the assembler has already been called.
   void DriverStr(std::string assembly_string, std::string test_name) {
-    Driver(assembly_string, test_name);
+    DriverWrapper(assembly_string, test_name);
   }
 
   std::string RepeatR(void (Ass::*f)(Reg), std::string fmt) {
@@ -212,28 +205,7 @@
 
   // This is intended to be run as a test.
   bool CheckTools() {
-    if (!FileExists(FindTool(GetAssemblerCmdName()))) {
-      return false;
-    }
-    LOG(INFO) << "Chosen assembler command: " << GetAssemblerCommand();
-
-    if (!FileExists(FindTool(GetObjdumpCmdName()))) {
-      return false;
-    }
-    LOG(INFO) << "Chosen objdump command: " << GetObjdumpCommand();
-
-    // Disassembly is optional.
-    std::string disassembler = GetDisassembleCommand();
-    if (disassembler.length() != 0) {
-      if (!FileExists(FindTool(GetDisassembleCmdName()))) {
-        return false;
-      }
-      LOG(INFO) << "Chosen disassemble command: " << GetDisassembleCommand();
-    } else {
-      LOG(INFO) << "No disassembler given.";
-    }
-
-    return true;
+    return test_helper_->CheckTools();
   }
 
   // The following functions are public so that TestFn can use them...
@@ -272,17 +244,21 @@
 
   void SetUp() OVERRIDE {
     assembler_.reset(new Ass());
-
-    // Fake a runtime test for ScratchFile
-    CommonRuntimeTest::SetUpAndroidData(android_data_);
+    test_helper_.reset(
+        new AssemblerTestInfrastructure(GetArchitectureString(),
+                                        GetAssemblerCmdName(),
+                                        GetAssemblerParameters(),
+                                        GetObjdumpCmdName(),
+                                        GetObjdumpParameters(),
+                                        GetDisassembleCmdName(),
+                                        GetDisassembleParameters(),
+                                        GetAssemblyHeader()));
 
     SetUpHelpers();
   }
 
   void TearDown() OVERRIDE {
-    // We leave temporaries in case this failed so we can debug issues.
-    CommonRuntimeTest::TearDownAndroidData(android_data_, false);
-    tmpnam_ = "";
+    test_helper_.reset();  // Clean up the helper.
   }
 
   // Override this to set up any architecture-specific things, e.g., register vectors.
@@ -301,23 +277,6 @@
     return "";
   }
 
-  // Return the host assembler command for this test.
-  virtual std::string GetAssemblerCommand() {
-    // Already resolved it once?
-    if (resolved_assembler_cmd_.length() != 0) {
-      return resolved_assembler_cmd_;
-    }
-
-    std::string line = FindTool(GetAssemblerCmdName());
-    if (line.length() == 0) {
-      return line;
-    }
-
-    resolved_assembler_cmd_ = line + GetAssemblerParameters();
-
-    return resolved_assembler_cmd_;
-  }
-
   // Get the name of the objdump, e.g., "objdump" by default.
   virtual std::string GetObjdumpCmdName() {
     return "objdump";
@@ -328,23 +287,6 @@
     return " -h";
   }
 
-  // Return the host objdump command for this test.
-  virtual std::string GetObjdumpCommand() {
-    // Already resolved it once?
-    if (resolved_objdump_cmd_.length() != 0) {
-      return resolved_objdump_cmd_;
-    }
-
-    std::string line = FindTool(GetObjdumpCmdName());
-    if (line.length() == 0) {
-      return line;
-    }
-
-    resolved_objdump_cmd_ = line + GetObjdumpParameters();
-
-    return resolved_objdump_cmd_;
-  }
-
   // Get the name of the objdump, e.g., "objdump" by default.
   virtual std::string GetDisassembleCmdName() {
     return "objdump";
@@ -354,23 +296,6 @@
   // such to objdump, so it's architecture-specific and there is no default.
   virtual std::string GetDisassembleParameters() = 0;
 
-  // Return the host disassembler command for this test.
-  virtual std::string GetDisassembleCommand() {
-    // Already resolved it once?
-    if (resolved_disassemble_cmd_.length() != 0) {
-      return resolved_disassemble_cmd_;
-    }
-
-    std::string line = FindTool(GetDisassembleCmdName());
-    if (line.length() == 0) {
-      return line;
-    }
-
-    resolved_disassemble_cmd_ = line + GetDisassembleParameters();
-
-    return resolved_disassemble_cmd_;
-  }
-
   // Create a couple of immediate values up to the number of bytes given.
   virtual std::vector<int64_t> CreateImmediateValues(size_t imm_bytes, bool as_uint = false) {
     std::vector<int64_t> res;
@@ -618,395 +543,18 @@
     return str;
   }
 
-  // Driver() assembles and compares the results. If the results are not equal and we have a
-  // disassembler, disassemble both and check whether they have the same mnemonics (in which case
-  // we just warn).
-  void Driver(std::string assembly_text, std::string test_name) {
-    EXPECT_NE(assembly_text.length(), 0U) << "Empty assembly";
-
-    NativeAssemblerResult res;
-    Compile(assembly_text, &res, test_name);
-
-    EXPECT_TRUE(res.ok) << res.error_msg;
-    if (!res.ok) {
-      // No way of continuing.
-      return;
-    }
-
+  void DriverWrapper(std::string assembly_text, std::string test_name) {
     size_t cs = assembler_->CodeSize();
     std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
     MemoryRegion code(&(*data)[0], data->size());
     assembler_->FinalizeInstructions(code);
-
-    if (*data == *res.code) {
-      Clean(&res);
-    } else {
-      if (DisassembleBinaries(*data, *res.code, test_name)) {
-        if (data->size() > res.code->size()) {
-          // Fail this test with a fancy colored warning being printed.
-          EXPECT_TRUE(false) << "Assembly code is not identical, but disassembly of machine code "
-              "is equal: this implies sub-optimal encoding! Our code size=" << data->size() <<
-              ", gcc size=" << res.code->size();
-        } else {
-          // Otherwise just print an info message and clean up.
-          LOG(INFO) << "GCC chose a different encoding than ours, but the overall length is the "
-              "same.";
-          Clean(&res);
-        }
-      } else {
-        // This will output the assembly.
-        EXPECT_EQ(*res.code, *data) << "Outputs (and disassembly) not identical.";
-      }
-    }
-  }
-
-  // Structure to store intermediates and results.
-  struct NativeAssemblerResult {
-    bool ok;
-    std::string error_msg;
-    std::string base_name;
-    std::unique_ptr<std::vector<uint8_t>> code;
-    uintptr_t length;
-  };
-
-  // Compile the assembly file from_file to a binary file to_file. Returns true on success.
-  bool Assemble(const char* from_file, const char* to_file, std::string* error_msg) {
-    bool have_assembler = FileExists(FindTool(GetAssemblerCmdName()));
-    EXPECT_TRUE(have_assembler) << "Cannot find assembler:" << GetAssemblerCommand();
-    if (!have_assembler) {
-      return false;
-    }
-
-    std::vector<std::string> args;
-
-    // Encaspulate the whole command line in a single string passed to
-    // the shell, so that GetAssemblerCommand() may contain arguments
-    // in addition to the program name.
-    args.push_back(GetAssemblerCommand());
-    args.push_back("-o");
-    args.push_back(to_file);
-    args.push_back(from_file);
-    std::string cmd = Join(args, ' ');
-
-    args.clear();
-    args.push_back("/bin/sh");
-    args.push_back("-c");
-    args.push_back(cmd);
-
-    bool success = Exec(args, error_msg);
-    if (!success) {
-      LOG(INFO) << "Assembler command line:";
-      for (std::string arg : args) {
-        LOG(INFO) << arg;
-      }
-    }
-    return success;
-  }
-
-  // Runs objdump -h on the binary file and extracts the first line with .text.
-  // Returns "" on failure.
-  std::string Objdump(std::string file) {
-    bool have_objdump = FileExists(FindTool(GetObjdumpCmdName()));
-    EXPECT_TRUE(have_objdump) << "Cannot find objdump: " << GetObjdumpCommand();
-    if (!have_objdump) {
-      return "";
-    }
-
-    std::string error_msg;
-    std::vector<std::string> args;
-
-    // Encaspulate the whole command line in a single string passed to
-    // the shell, so that GetObjdumpCommand() may contain arguments
-    // in addition to the program name.
-    args.push_back(GetObjdumpCommand());
-    args.push_back(file);
-    args.push_back(">");
-    args.push_back(file+".dump");
-    std::string cmd = Join(args, ' ');
-
-    args.clear();
-    args.push_back("/bin/sh");
-    args.push_back("-c");
-    args.push_back(cmd);
-
-    if (!Exec(args, &error_msg)) {
-      EXPECT_TRUE(false) << error_msg;
-    }
-
-    std::ifstream dump(file+".dump");
-
-    std::string line;
-    bool found = false;
-    while (std::getline(dump, line)) {
-      if (line.find(".text") != line.npos) {
-        found = true;
-        break;
-      }
-    }
-
-    dump.close();
-
-    if (found) {
-      return line;
-    } else {
-      return "";
-    }
-  }
-
-  // Disassemble both binaries and compare the text.
-  bool DisassembleBinaries(std::vector<uint8_t>& data, std::vector<uint8_t>& as,
-                           std::string test_name) {
-    std::string disassembler = GetDisassembleCommand();
-    if (disassembler.length() == 0) {
-      LOG(WARNING) << "No dissassembler command.";
-      return false;
-    }
-
-    std::string data_name = WriteToFile(data, test_name + ".ass");
-    std::string error_msg;
-    if (!DisassembleBinary(data_name, &error_msg)) {
-      LOG(INFO) << "Error disassembling: " << error_msg;
-      std::remove(data_name.c_str());
-      return false;
-    }
-
-    std::string as_name = WriteToFile(as, test_name + ".gcc");
-    if (!DisassembleBinary(as_name, &error_msg)) {
-      LOG(INFO) << "Error disassembling: " << error_msg;
-      std::remove(data_name.c_str());
-      std::remove((data_name + ".dis").c_str());
-      std::remove(as_name.c_str());
-      return false;
-    }
-
-    bool result = CompareFiles(data_name + ".dis", as_name + ".dis");
-
-    if (!kKeepDisassembledFiles) {
-      std::remove(data_name.c_str());
-      std::remove(as_name.c_str());
-      std::remove((data_name + ".dis").c_str());
-      std::remove((as_name + ".dis").c_str());
-    }
-
-    return result;
-  }
-
-  bool DisassembleBinary(std::string file, std::string* error_msg) {
-    std::vector<std::string> args;
-
-    // Encaspulate the whole command line in a single string passed to
-    // the shell, so that GetDisassembleCommand() may contain arguments
-    // in addition to the program name.
-    args.push_back(GetDisassembleCommand());
-    args.push_back(file);
-    args.push_back("| sed -n \'/<.data>/,$p\' | sed -e \'s/.*://\'");
-    args.push_back(">");
-    args.push_back(file+".dis");
-    std::string cmd = Join(args, ' ');
-
-    args.clear();
-    args.push_back("/bin/sh");
-    args.push_back("-c");
-    args.push_back(cmd);
-
-    return Exec(args, error_msg);
-  }
-
-  std::string WriteToFile(std::vector<uint8_t>& buffer, std::string test_name) {
-    std::string file_name = GetTmpnam() + std::string("---") + test_name;
-    const char* data = reinterpret_cast<char*>(buffer.data());
-    std::ofstream s_out(file_name + ".o");
-    s_out.write(data, buffer.size());
-    s_out.close();
-    return file_name + ".o";
-  }
-
-  bool CompareFiles(std::string f1, std::string f2) {
-    std::ifstream f1_in(f1);
-    std::ifstream f2_in(f2);
-
-    bool result = std::equal(std::istreambuf_iterator<char>(f1_in),
-                             std::istreambuf_iterator<char>(),
-                             std::istreambuf_iterator<char>(f2_in));
-
-    f1_in.close();
-    f2_in.close();
-
-    return result;
-  }
-
-  // Compile the given assembly code and extract the binary, if possible. Put result into res.
-  bool Compile(std::string assembly_code, NativeAssemblerResult* res, std::string test_name) {
-    res->ok = false;
-    res->code.reset(nullptr);
-
-    res->base_name = GetTmpnam() + std::string("---") + test_name;
-
-    // TODO: Lots of error checking.
-
-    std::ofstream s_out(res->base_name + ".S");
-    const char* header = GetAssemblyHeader();
-    if (header != nullptr) {
-      s_out << header;
-    }
-    s_out << assembly_code;
-    s_out.close();
-
-    if (!Assemble((res->base_name + ".S").c_str(), (res->base_name + ".o").c_str(),
-                  &res->error_msg)) {
-      res->error_msg = "Could not compile.";
-      return false;
-    }
-
-    std::string odump = Objdump(res->base_name + ".o");
-    if (odump.length() == 0) {
-      res->error_msg = "Objdump failed.";
-      return false;
-    }
-
-    std::istringstream iss(odump);
-    std::istream_iterator<std::string> start(iss);
-    std::istream_iterator<std::string> end;
-    std::vector<std::string> tokens(start, end);
-
-    if (tokens.size() < OBJDUMP_SECTION_LINE_MIN_TOKENS) {
-      res->error_msg = "Objdump output not recognized: too few tokens.";
-      return false;
-    }
-
-    if (tokens[1] != ".text") {
-      res->error_msg = "Objdump output not recognized: .text not second token.";
-      return false;
-    }
-
-    std::string lengthToken = "0x" + tokens[2];
-    std::istringstream(lengthToken) >> std::hex >> res->length;
-
-    std::string offsetToken = "0x" + tokens[5];
-    uintptr_t offset;
-    std::istringstream(offsetToken) >> std::hex >> offset;
-
-    std::ifstream obj(res->base_name + ".o");
-    obj.seekg(offset);
-    res->code.reset(new std::vector<uint8_t>(res->length));
-    obj.read(reinterpret_cast<char*>(&(*res->code)[0]), res->length);
-    obj.close();
-
-    res->ok = true;
-    return true;
-  }
-
-  // Remove temporary files.
-  void Clean(const NativeAssemblerResult* res) {
-    std::remove((res->base_name + ".S").c_str());
-    std::remove((res->base_name + ".o").c_str());
-    std::remove((res->base_name + ".o.dump").c_str());
-  }
-
-  // Check whether file exists. Is used for commands, so strips off any parameters: anything after
-  // the first space. We skip to the last slash for this, so it should work with directories with
-  // spaces.
-  static bool FileExists(std::string file) {
-    if (file.length() == 0) {
-      return false;
-    }
-
-    // Need to strip any options.
-    size_t last_slash = file.find_last_of('/');
-    if (last_slash == std::string::npos) {
-      // No slash, start looking at the start.
-      last_slash = 0;
-    }
-    size_t space_index = file.find(' ', last_slash);
-
-    if (space_index == std::string::npos) {
-      std::ifstream infile(file.c_str());
-      return infile.good();
-    } else {
-      std::string copy = file.substr(0, space_index - 1);
-
-      struct stat buf;
-      return stat(copy.c_str(), &buf) == 0;
-    }
-  }
-
-  static std::string GetGCCRootPath() {
-    return "prebuilts/gcc/linux-x86";
-  }
-
-  static std::string GetRootPath() {
-    // 1) Check ANDROID_BUILD_TOP
-    char* build_top = getenv("ANDROID_BUILD_TOP");
-    if (build_top != nullptr) {
-      return std::string(build_top) + "/";
-    }
-
-    // 2) Do cwd
-    char temp[1024];
-    return getcwd(temp, 1024) ? std::string(temp) + "/" : std::string("");
-  }
-
-  std::string FindTool(std::string tool_name) {
-    // Find the current tool. Wild-card pattern is "arch-string*tool-name".
-    std::string gcc_path = GetRootPath() + GetGCCRootPath();
-    std::vector<std::string> args;
-    args.push_back("find");
-    args.push_back(gcc_path);
-    args.push_back("-name");
-    args.push_back(GetArchitectureString() + "*" + tool_name);
-    args.push_back("|");
-    args.push_back("sort");
-    args.push_back("|");
-    args.push_back("tail");
-    args.push_back("-n");
-    args.push_back("1");
-    std::string tmp_file = GetTmpnam();
-    args.push_back(">");
-    args.push_back(tmp_file);
-    std::string sh_args = Join(args, ' ');
-
-    args.clear();
-    args.push_back("/bin/sh");
-    args.push_back("-c");
-    args.push_back(sh_args);
-
-    std::string error_msg;
-    if (!Exec(args, &error_msg)) {
-      EXPECT_TRUE(false) << error_msg;
-      return "";
-    }
-
-    std::ifstream in(tmp_file.c_str());
-    std::string line;
-    if (!std::getline(in, line)) {
-      in.close();
-      std::remove(tmp_file.c_str());
-      return "";
-    }
-    in.close();
-    std::remove(tmp_file.c_str());
-    return line;
-  }
-
-  // Use a consistent tmpnam, so store it.
-  std::string GetTmpnam() {
-    if (tmpnam_.length() == 0) {
-      ScratchFile tmp;
-      tmpnam_ = tmp.GetFilename() + "asm";
-    }
-    return tmpnam_;
+    test_helper_->Driver(*data, assembly_text, test_name);
   }
 
   static constexpr size_t kWarnManyCombinationsThreshold = 500;
-  static constexpr size_t OBJDUMP_SECTION_LINE_MIN_TOKENS = 6;
 
   std::unique_ptr<Ass> assembler_;
-
-  std::string resolved_assembler_cmd_;
-  std::string resolved_objdump_cmd_;
-  std::string resolved_disassemble_cmd_;
-
-  std::string android_data_;
+  std::unique_ptr<AssemblerTestInfrastructure> test_helper_;
 
   DISALLOW_COPY_AND_ASSIGN(AssemblerTest);
 };
diff --git a/compiler/utils/assembler_test_base.h b/compiler/utils/assembler_test_base.h
new file mode 100644
index 0000000..3341151
--- /dev/null
+++ b/compiler/utils/assembler_test_base.h
@@ -0,0 +1,544 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_ASSEMBLER_TEST_BASE_H_
+#define ART_COMPILER_UTILS_ASSEMBLER_TEST_BASE_H_
+
+#include "common_runtime_test.h"  // For ScratchFile
+
+#include <cstdio>
+#include <cstdlib>
+#include <fstream>
+#include <iterator>
+#include <sys/stat.h>
+
+namespace art {
+
+// If you want to take a look at the differences between the ART assembler and GCC, set this flag
+// to true. The disassembled files will then remain in the tmp directory.
+static constexpr bool kKeepDisassembledFiles = false;
+
+// Use a glocal static variable to keep the same name for all test data. Else we'll just spam the
+// temp directory.
+static std::string tmpnam_;
+
+// We put this into a class as gtests are self-contained, so this helper needs to be in an h-file.
+class AssemblerTestInfrastructure {
+ public:
+  AssemblerTestInfrastructure(std::string architecture,
+                              std::string as,
+                              std::string as_params,
+                              std::string objdump,
+                              std::string objdump_params,
+                              std::string disasm,
+                              std::string disasm_params,
+                              const char* asm_header) :
+      architecture_string_(architecture),
+      asm_header_(asm_header),
+      assembler_cmd_name_(as),
+      assembler_parameters_(as_params),
+      objdump_cmd_name_(objdump),
+      objdump_parameters_(objdump_params),
+      disassembler_cmd_name_(disasm),
+      disassembler_parameters_(disasm_params) {
+    // Fake a runtime test for ScratchFile
+    CommonRuntimeTest::SetUpAndroidData(android_data_);
+  }
+
+  virtual ~AssemblerTestInfrastructure() {
+    // We leave temporaries in case this failed so we can debug issues.
+    CommonRuntimeTest::TearDownAndroidData(android_data_, false);
+    tmpnam_ = "";
+  }
+
+  // This is intended to be run as a test.
+  bool CheckTools() {
+    if (!FileExists(FindTool(assembler_cmd_name_))) {
+      return false;
+    }
+    LOG(INFO) << "Chosen assembler command: " << GetAssemblerCommand();
+
+    if (!FileExists(FindTool(objdump_cmd_name_))) {
+      return false;
+    }
+    LOG(INFO) << "Chosen objdump command: " << GetObjdumpCommand();
+
+    // Disassembly is optional.
+    std::string disassembler = GetDisassembleCommand();
+    if (disassembler.length() != 0) {
+      if (!FileExists(FindTool(disassembler_cmd_name_))) {
+        return false;
+      }
+      LOG(INFO) << "Chosen disassemble command: " << GetDisassembleCommand();
+    } else {
+      LOG(INFO) << "No disassembler given.";
+    }
+
+    return true;
+  }
+
+  // Driver() assembles and compares the results. If the results are not equal and we have a
+  // disassembler, disassemble both and check whether they have the same mnemonics (in which case
+  // we just warn).
+  void Driver(const std::vector<uint8_t>& data, std::string assembly_text, std::string test_name) {
+    EXPECT_NE(assembly_text.length(), 0U) << "Empty assembly";
+
+    NativeAssemblerResult res;
+    Compile(assembly_text, &res, test_name);
+
+    EXPECT_TRUE(res.ok) << res.error_msg;
+    if (!res.ok) {
+      // No way of continuing.
+      return;
+    }
+
+    if (data == *res.code) {
+      Clean(&res);
+    } else {
+      if (DisassembleBinaries(data, *res.code, test_name)) {
+        if (data.size() > res.code->size()) {
+          // Fail this test with a fancy colored warning being printed.
+          EXPECT_TRUE(false) << "Assembly code is not identical, but disassembly of machine code "
+              "is equal: this implies sub-optimal encoding! Our code size=" << data.size() <<
+              ", gcc size=" << res.code->size();
+        } else {
+          // Otherwise just print an info message and clean up.
+          LOG(INFO) << "GCC chose a different encoding than ours, but the overall length is the "
+              "same.";
+          Clean(&res);
+        }
+      } else {
+        // This will output the assembly.
+        EXPECT_EQ(*res.code, data) << "Outputs (and disassembly) not identical.";
+      }
+    }
+  }
+
+ protected:
+  // Return the host assembler command for this test.
+  virtual std::string GetAssemblerCommand() {
+    // Already resolved it once?
+    if (resolved_assembler_cmd_.length() != 0) {
+      return resolved_assembler_cmd_;
+    }
+
+    std::string line = FindTool(assembler_cmd_name_);
+    if (line.length() == 0) {
+      return line;
+    }
+
+    resolved_assembler_cmd_ = line + assembler_parameters_;
+
+    return resolved_assembler_cmd_;
+  }
+
+  // Return the host objdump command for this test.
+  virtual std::string GetObjdumpCommand() {
+    // Already resolved it once?
+    if (resolved_objdump_cmd_.length() != 0) {
+      return resolved_objdump_cmd_;
+    }
+
+    std::string line = FindTool(objdump_cmd_name_);
+    if (line.length() == 0) {
+      return line;
+    }
+
+    resolved_objdump_cmd_ = line + objdump_parameters_;
+
+    return resolved_objdump_cmd_;
+  }
+
+  // Return the host disassembler command for this test.
+  virtual std::string GetDisassembleCommand() {
+    // Already resolved it once?
+    if (resolved_disassemble_cmd_.length() != 0) {
+      return resolved_disassemble_cmd_;
+    }
+
+    std::string line = FindTool(disassembler_cmd_name_);
+    if (line.length() == 0) {
+      return line;
+    }
+
+    resolved_disassemble_cmd_ = line + disassembler_parameters_;
+
+    return resolved_disassemble_cmd_;
+  }
+
+ private:
+  // Structure to store intermediates and results.
+  struct NativeAssemblerResult {
+    bool ok;
+    std::string error_msg;
+    std::string base_name;
+    std::unique_ptr<std::vector<uint8_t>> code;
+    uintptr_t length;
+  };
+
+  // Compile the assembly file from_file to a binary file to_file. Returns true on success.
+  bool Assemble(const char* from_file, const char* to_file, std::string* error_msg) {
+    bool have_assembler = FileExists(FindTool(assembler_cmd_name_));
+    EXPECT_TRUE(have_assembler) << "Cannot find assembler:" << GetAssemblerCommand();
+    if (!have_assembler) {
+      return false;
+    }
+
+    std::vector<std::string> args;
+
+    // Encaspulate the whole command line in a single string passed to
+    // the shell, so that GetAssemblerCommand() may contain arguments
+    // in addition to the program name.
+    args.push_back(GetAssemblerCommand());
+    args.push_back("-o");
+    args.push_back(to_file);
+    args.push_back(from_file);
+    std::string cmd = Join(args, ' ');
+
+    args.clear();
+    args.push_back("/bin/sh");
+    args.push_back("-c");
+    args.push_back(cmd);
+
+    bool success = Exec(args, error_msg);
+    if (!success) {
+      LOG(INFO) << "Assembler command line:";
+      for (std::string arg : args) {
+        LOG(INFO) << arg;
+      }
+    }
+    return success;
+  }
+
+  // Runs objdump -h on the binary file and extracts the first line with .text.
+  // Returns "" on failure.
+  std::string Objdump(std::string file) {
+    bool have_objdump = FileExists(FindTool(objdump_cmd_name_));
+    EXPECT_TRUE(have_objdump) << "Cannot find objdump: " << GetObjdumpCommand();
+    if (!have_objdump) {
+      return "";
+    }
+
+    std::string error_msg;
+    std::vector<std::string> args;
+
+    // Encaspulate the whole command line in a single string passed to
+    // the shell, so that GetObjdumpCommand() may contain arguments
+    // in addition to the program name.
+    args.push_back(GetObjdumpCommand());
+    args.push_back(file);
+    args.push_back(">");
+    args.push_back(file+".dump");
+    std::string cmd = Join(args, ' ');
+
+    args.clear();
+    args.push_back("/bin/sh");
+    args.push_back("-c");
+    args.push_back(cmd);
+
+    if (!Exec(args, &error_msg)) {
+      EXPECT_TRUE(false) << error_msg;
+    }
+
+    std::ifstream dump(file+".dump");
+
+    std::string line;
+    bool found = false;
+    while (std::getline(dump, line)) {
+      if (line.find(".text") != line.npos) {
+        found = true;
+        break;
+      }
+    }
+
+    dump.close();
+
+    if (found) {
+      return line;
+    } else {
+      return "";
+    }
+  }
+
+  // Disassemble both binaries and compare the text.
+  bool DisassembleBinaries(const std::vector<uint8_t>& data, const std::vector<uint8_t>& as,
+                           std::string test_name) {
+    std::string disassembler = GetDisassembleCommand();
+    if (disassembler.length() == 0) {
+      LOG(WARNING) << "No dissassembler command.";
+      return false;
+    }
+
+    std::string data_name = WriteToFile(data, test_name + ".ass");
+    std::string error_msg;
+    if (!DisassembleBinary(data_name, &error_msg)) {
+      LOG(INFO) << "Error disassembling: " << error_msg;
+      std::remove(data_name.c_str());
+      return false;
+    }
+
+    std::string as_name = WriteToFile(as, test_name + ".gcc");
+    if (!DisassembleBinary(as_name, &error_msg)) {
+      LOG(INFO) << "Error disassembling: " << error_msg;
+      std::remove(data_name.c_str());
+      std::remove((data_name + ".dis").c_str());
+      std::remove(as_name.c_str());
+      return false;
+    }
+
+    bool result = CompareFiles(data_name + ".dis", as_name + ".dis");
+
+    if (!kKeepDisassembledFiles) {
+      std::remove(data_name.c_str());
+      std::remove(as_name.c_str());
+      std::remove((data_name + ".dis").c_str());
+      std::remove((as_name + ".dis").c_str());
+    }
+
+    return result;
+  }
+
+  bool DisassembleBinary(std::string file, std::string* error_msg) {
+    std::vector<std::string> args;
+
+    // Encaspulate the whole command line in a single string passed to
+    // the shell, so that GetDisassembleCommand() may contain arguments
+    // in addition to the program name.
+    args.push_back(GetDisassembleCommand());
+    args.push_back(file);
+    args.push_back("| sed -n \'/<.data>/,$p\' | sed -e \'s/.*://\'");
+    args.push_back(">");
+    args.push_back(file+".dis");
+    std::string cmd = Join(args, ' ');
+
+    args.clear();
+    args.push_back("/bin/sh");
+    args.push_back("-c");
+    args.push_back(cmd);
+
+    return Exec(args, error_msg);
+  }
+
+  std::string WriteToFile(const std::vector<uint8_t>& buffer, std::string test_name) {
+    std::string file_name = GetTmpnam() + std::string("---") + test_name;
+    const char* data = reinterpret_cast<const char*>(buffer.data());
+    std::ofstream s_out(file_name + ".o");
+    s_out.write(data, buffer.size());
+    s_out.close();
+    return file_name + ".o";
+  }
+
+  bool CompareFiles(std::string f1, std::string f2) {
+    std::ifstream f1_in(f1);
+    std::ifstream f2_in(f2);
+
+    bool result = std::equal(std::istreambuf_iterator<char>(f1_in),
+                             std::istreambuf_iterator<char>(),
+                             std::istreambuf_iterator<char>(f2_in));
+
+    f1_in.close();
+    f2_in.close();
+
+    return result;
+  }
+
+  // Compile the given assembly code and extract the binary, if possible. Put result into res.
+  bool Compile(std::string assembly_code, NativeAssemblerResult* res, std::string test_name) {
+    res->ok = false;
+    res->code.reset(nullptr);
+
+    res->base_name = GetTmpnam() + std::string("---") + test_name;
+
+    // TODO: Lots of error checking.
+
+    std::ofstream s_out(res->base_name + ".S");
+    if (asm_header_ != nullptr) {
+      s_out << asm_header_;
+    }
+    s_out << assembly_code;
+    s_out.close();
+
+    if (!Assemble((res->base_name + ".S").c_str(), (res->base_name + ".o").c_str(),
+                  &res->error_msg)) {
+      res->error_msg = "Could not compile.";
+      return false;
+    }
+
+    std::string odump = Objdump(res->base_name + ".o");
+    if (odump.length() == 0) {
+      res->error_msg = "Objdump failed.";
+      return false;
+    }
+
+    std::istringstream iss(odump);
+    std::istream_iterator<std::string> start(iss);
+    std::istream_iterator<std::string> end;
+    std::vector<std::string> tokens(start, end);
+
+    if (tokens.size() < OBJDUMP_SECTION_LINE_MIN_TOKENS) {
+      res->error_msg = "Objdump output not recognized: too few tokens.";
+      return false;
+    }
+
+    if (tokens[1] != ".text") {
+      res->error_msg = "Objdump output not recognized: .text not second token.";
+      return false;
+    }
+
+    std::string lengthToken = "0x" + tokens[2];
+    std::istringstream(lengthToken) >> std::hex >> res->length;
+
+    std::string offsetToken = "0x" + tokens[5];
+    uintptr_t offset;
+    std::istringstream(offsetToken) >> std::hex >> offset;
+
+    std::ifstream obj(res->base_name + ".o");
+    obj.seekg(offset);
+    res->code.reset(new std::vector<uint8_t>(res->length));
+    obj.read(reinterpret_cast<char*>(&(*res->code)[0]), res->length);
+    obj.close();
+
+    res->ok = true;
+    return true;
+  }
+
+  // Remove temporary files.
+  void Clean(const NativeAssemblerResult* res) {
+    std::remove((res->base_name + ".S").c_str());
+    std::remove((res->base_name + ".o").c_str());
+    std::remove((res->base_name + ".o.dump").c_str());
+  }
+
+  // Check whether file exists. Is used for commands, so strips off any parameters: anything after
+  // the first space. We skip to the last slash for this, so it should work with directories with
+  // spaces.
+  static bool FileExists(std::string file) {
+    if (file.length() == 0) {
+      return false;
+    }
+
+    // Need to strip any options.
+    size_t last_slash = file.find_last_of('/');
+    if (last_slash == std::string::npos) {
+      // No slash, start looking at the start.
+      last_slash = 0;
+    }
+    size_t space_index = file.find(' ', last_slash);
+
+    if (space_index == std::string::npos) {
+      std::ifstream infile(file.c_str());
+      return infile.good();
+    } else {
+      std::string copy = file.substr(0, space_index - 1);
+
+      struct stat buf;
+      return stat(copy.c_str(), &buf) == 0;
+    }
+  }
+
+  static std::string GetGCCRootPath() {
+    return "prebuilts/gcc/linux-x86";
+  }
+
+  static std::string GetRootPath() {
+    // 1) Check ANDROID_BUILD_TOP
+    char* build_top = getenv("ANDROID_BUILD_TOP");
+    if (build_top != nullptr) {
+      return std::string(build_top) + "/";
+    }
+
+    // 2) Do cwd
+    char temp[1024];
+    return getcwd(temp, 1024) ? std::string(temp) + "/" : std::string("");
+  }
+
+  std::string FindTool(std::string tool_name) {
+    // Find the current tool. Wild-card pattern is "arch-string*tool-name".
+    std::string gcc_path = GetRootPath() + GetGCCRootPath();
+    std::vector<std::string> args;
+    args.push_back("find");
+    args.push_back(gcc_path);
+    args.push_back("-name");
+    args.push_back(architecture_string_ + "*" + tool_name);
+    args.push_back("|");
+    args.push_back("sort");
+    args.push_back("|");
+    args.push_back("tail");
+    args.push_back("-n");
+    args.push_back("1");
+    std::string tmp_file = GetTmpnam();
+    args.push_back(">");
+    args.push_back(tmp_file);
+    std::string sh_args = Join(args, ' ');
+
+    args.clear();
+    args.push_back("/bin/sh");
+    args.push_back("-c");
+    args.push_back(sh_args);
+
+    std::string error_msg;
+    if (!Exec(args, &error_msg)) {
+      EXPECT_TRUE(false) << error_msg;
+      return "";
+    }
+
+    std::ifstream in(tmp_file.c_str());
+    std::string line;
+    if (!std::getline(in, line)) {
+      in.close();
+      std::remove(tmp_file.c_str());
+      return "";
+    }
+    in.close();
+    std::remove(tmp_file.c_str());
+    return line;
+  }
+
+  // Use a consistent tmpnam, so store it.
+  std::string GetTmpnam() {
+    if (tmpnam_.length() == 0) {
+      ScratchFile tmp;
+      tmpnam_ = tmp.GetFilename() + "asm";
+    }
+    return tmpnam_;
+  }
+
+  static constexpr size_t OBJDUMP_SECTION_LINE_MIN_TOKENS = 6;
+
+  std::string architecture_string_;
+  const char* asm_header_;
+
+  std::string assembler_cmd_name_;
+  std::string assembler_parameters_;
+
+  std::string objdump_cmd_name_;
+  std::string objdump_parameters_;
+
+  std::string disassembler_cmd_name_;
+  std::string disassembler_parameters_;
+
+  std::string resolved_assembler_cmd_;
+  std::string resolved_objdump_cmd_;
+  std::string resolved_disassemble_cmd_;
+
+  std::string android_data_;
+
+  DISALLOW_COPY_AND_ASSIGN(AssemblerTestInfrastructure);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_ASSEMBLER_TEST_BASE_H_
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 772fa9a..7738627 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -15,9 +15,11 @@
  */
 
 #include <dirent.h>
+#include <errno.h>
 #include <fstream>
-#include <sys/types.h>
 #include <map>
+#include <string.h>
+#include <sys/types.h>
 
 #include "gtest/gtest.h"
 #include "utils/arm/assembler_thumb2.h"
@@ -105,12 +107,14 @@
 
   // Assemble the .S
   snprintf(cmd, sizeof(cmd), "%sas %s -o %s.o", toolsdir.c_str(), filename, filename);
-  system(cmd);
+  int cmd_result = system(cmd);
+  ASSERT_EQ(cmd_result, 0) << strerror(errno);
 
   // Remove the $d symbols to prevent the disassembler dumping the instructions
   // as .word
   snprintf(cmd, sizeof(cmd), "%sobjcopy -N '$d' %s.o %s.oo", toolsdir.c_str(), filename, filename);
-  system(cmd);
+  int cmd_result2 = system(cmd);
+  ASSERT_EQ(cmd_result2, 0) << strerror(errno);
 
   // Disassemble.
 
@@ -119,7 +123,8 @@
   if (kPrintResults) {
     // Print the results only, don't check. This is used to generate new output for inserting
     // into the .inc file.
-    system(cmd);
+    int cmd_result3 = system(cmd);
+    ASSERT_EQ(cmd_result3, 0) << strerror(errno);
   } else {
     // Check the results match the appropriate results in the .inc file.
     FILE *fp = popen(cmd, "r");
diff --git a/compiler/utils/growable_array.h b/compiler/utils/growable_array.h
index 821e28b..e4b1e7d 100644
--- a/compiler/utils/growable_array.h
+++ b/compiler/utils/growable_array.h
@@ -46,6 +46,14 @@
       }
     }
 
+    bool Contains(T value) const {
+      for (size_t i = 0; i < num_used_; ++i) {
+        if (elem_list_[i] == value) {
+          return true;
+        }
+      }
+      return false;
+    }
 
     // Expand the list size to at least new length.
     void Resize(size_t new_length) {
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 282ab96..5e9653d 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -272,6 +272,10 @@
   EmitI(0x25, rs, rt, imm16);
 }
 
+void Mips64Assembler::Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  EmitI(0x27, rs, rt, imm16);
+}
+
 void Mips64Assembler::Lui(GpuRegister rt, uint16_t imm16) {
   EmitI(0xf, static_cast<GpuRegister>(0), rt, imm16);
 }
@@ -480,6 +484,9 @@
     case kLoadWord:
       Lw(reg, base, offset);
       break;
+    case kLoadUnsignedWord:
+      Lwu(reg, base, offset);
+      break;
     case kLoadDoubleword:
       // TODO: alignment issues ???
       Ld(reg, base, offset);
@@ -512,7 +519,6 @@
     CHECK_EQ(0u, size) << dst;
   } else if (dst.IsGpuRegister()) {
     if (size == 4) {
-      CHECK_EQ(4u, size) << dst;
       LoadFromOffset(kLoadWord, dst.AsGpuRegister(), src_register, src_offset);
     } else if (size == 8) {
       CHECK_EQ(8u, size) << dst;
@@ -740,14 +746,13 @@
 void Mips64Assembler::LoadRef(ManagedRegister mdest, FrameOffset src) {
   Mips64ManagedRegister dest = mdest.AsMips64();
   CHECK(dest.IsGpuRegister());
-  LoadFromOffset(kLoadWord, dest.AsGpuRegister(), SP, src.Int32Value());
+  LoadFromOffset(kLoadUnsignedWord, dest.AsGpuRegister(), SP, src.Int32Value());
 }
 
-void Mips64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base,
-                            MemberOffset offs) {
+void Mips64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs) {
   Mips64ManagedRegister dest = mdest.AsMips64();
-  CHECK(dest.IsGpuRegister() && dest.IsGpuRegister());
-  LoadFromOffset(kLoadWord, dest.AsGpuRegister(),
+  CHECK(dest.IsGpuRegister() && base.AsMips64().IsGpuRegister());
+  LoadFromOffset(kLoadUnsignedWord, dest.AsGpuRegister(),
                  base.AsMips64().AsGpuRegister(), offs.Int32Value());
   if (kPoisonHeapReferences) {
     Subu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister());
@@ -921,7 +926,7 @@
     // the address in the handle scope holding the reference.
     // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
     if (in_reg.IsNoRegister()) {
-      LoadFromOffset(kLoadWord, out_reg.AsGpuRegister(),
+      LoadFromOffset(kLoadUnsignedWord, out_reg.AsGpuRegister(),
                      SP, handle_scope_offset.Int32Value());
       in_reg = out_reg;
     }
@@ -944,7 +949,7 @@
   CHECK(scratch.IsGpuRegister()) << scratch;
   if (null_allowed) {
     Label null_arg;
-    LoadFromOffset(kLoadWord, scratch.AsGpuRegister(), SP,
+    LoadFromOffset(kLoadUnsignedWord, scratch.AsGpuRegister(), SP,
                    handle_scope_offset.Int32Value());
     // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
     // the address in the handle scope holding the reference.
@@ -998,7 +1003,7 @@
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
   // Call *(*(SP + base) + offset)
-  LoadFromOffset(kLoadWord, scratch.AsGpuRegister(),
+  LoadFromOffset(kLoadUnsignedWord, scratch.AsGpuRegister(),
                  SP, base.Int32Value());
   LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
                  scratch.AsGpuRegister(), offset.Int32Value());
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index b7f6a9e..2d7c661 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -36,6 +36,7 @@
   kLoadSignedHalfword,
   kLoadUnsignedHalfword,
   kLoadWord,
+  kLoadUnsignedWord,
   kLoadDoubleword
 };
 
@@ -85,6 +86,7 @@
   void Ld(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Lbu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Lui(GpuRegister rt, uint16_t imm16);
   void Mfhi(GpuRegister rd);
   void Mflo(GpuRegister rd);
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 329698c..7e75200 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1292,32 +1292,62 @@
 
 
 void X86Assembler::shll(Register reg, const Immediate& imm) {
-  EmitGenericShift(4, reg, imm);
+  EmitGenericShift(4, Operand(reg), imm);
 }
 
 
 void X86Assembler::shll(Register operand, Register shifter) {
-  EmitGenericShift(4, operand, shifter);
+  EmitGenericShift(4, Operand(operand), shifter);
+}
+
+
+void X86Assembler::shll(const Address& address, const Immediate& imm) {
+  EmitGenericShift(4, address, imm);
+}
+
+
+void X86Assembler::shll(const Address& address, Register shifter) {
+  EmitGenericShift(4, address, shifter);
 }
 
 
 void X86Assembler::shrl(Register reg, const Immediate& imm) {
-  EmitGenericShift(5, reg, imm);
+  EmitGenericShift(5, Operand(reg), imm);
 }
 
 
 void X86Assembler::shrl(Register operand, Register shifter) {
-  EmitGenericShift(5, operand, shifter);
+  EmitGenericShift(5, Operand(operand), shifter);
+}
+
+
+void X86Assembler::shrl(const Address& address, const Immediate& imm) {
+  EmitGenericShift(5, address, imm);
+}
+
+
+void X86Assembler::shrl(const Address& address, Register shifter) {
+  EmitGenericShift(5, address, shifter);
 }
 
 
 void X86Assembler::sarl(Register reg, const Immediate& imm) {
-  EmitGenericShift(7, reg, imm);
+  EmitGenericShift(7, Operand(reg), imm);
 }
 
 
 void X86Assembler::sarl(Register operand, Register shifter) {
-  EmitGenericShift(7, operand, shifter);
+  EmitGenericShift(7, Operand(operand), shifter);
+}
+
+
+void X86Assembler::sarl(const Address& address, const Immediate& imm) {
+  EmitGenericShift(7, address, imm);
+}
+
+
+void X86Assembler::sarl(const Address& address, Register shifter) {
+  EmitGenericShift(7, address, shifter);
 }
 
 
@@ -1330,6 +1360,15 @@
 }
 
 
+void X86Assembler::shld(Register dst, Register src, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xA4);
+  EmitRegisterOperand(src, dst);
+  EmitUint8(imm.value() & 0xFF);
+}
+
+
 void X86Assembler::shrd(Register dst, Register src, Register shifter) {
   DCHECK_EQ(ECX, shifter);
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1339,6 +1378,15 @@
 }
 
 
+void X86Assembler::shrd(Register dst, Register src, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xAC);
+  EmitRegisterOperand(src, dst);
+  EmitUint8(imm.value() & 0xFF);
+}
+
+
 void X86Assembler::negl(Register reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF7);
@@ -1459,6 +1507,14 @@
 }
 
 
+void X86Assembler::repne_scasw() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0xF2);
+  EmitUint8(0xAF);
+}
+
+
 X86Assembler* X86Assembler::lock() {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF0);
@@ -1622,28 +1678,28 @@
 
 
 void X86Assembler::EmitGenericShift(int reg_or_opcode,
-                                    Register reg,
+                                    const Operand& operand,
                                     const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK(imm.is_int8());
   if (imm.value() == 1) {
     EmitUint8(0xD1);
-    EmitOperand(reg_or_opcode, Operand(reg));
+    EmitOperand(reg_or_opcode, operand);
   } else {
     EmitUint8(0xC1);
-    EmitOperand(reg_or_opcode, Operand(reg));
+    EmitOperand(reg_or_opcode, operand);
     EmitUint8(imm.value() & 0xFF);
   }
 }
 
 
 void X86Assembler::EmitGenericShift(int reg_or_opcode,
-                                    Register operand,
+                                    const Operand& operand,
                                     Register shifter) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK_EQ(shifter, ECX);
   EmitUint8(0xD3);
-  EmitOperand(reg_or_opcode, Operand(operand));
+  EmitOperand(reg_or_opcode, operand);
 }
 
 static dwarf::Reg DWARFReg(Register reg) {
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 7fc8ef0..136b0cb 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -430,12 +430,20 @@
 
   void shll(Register reg, const Immediate& imm);
   void shll(Register operand, Register shifter);
+  void shll(const Address& address, const Immediate& imm);
+  void shll(const Address& address, Register shifter);
   void shrl(Register reg, const Immediate& imm);
   void shrl(Register operand, Register shifter);
+  void shrl(const Address& address, const Immediate& imm);
+  void shrl(const Address& address, Register shifter);
   void sarl(Register reg, const Immediate& imm);
   void sarl(Register operand, Register shifter);
+  void sarl(const Address& address, const Immediate& imm);
+  void sarl(const Address& address, Register shifter);
   void shld(Register dst, Register src, Register shifter);
+  void shld(Register dst, Register src, const Immediate& imm);
   void shrd(Register dst, Register src, Register shifter);
+  void shrd(Register dst, Register src, const Immediate& imm);
 
   void negl(Register reg);
   void notl(Register reg);
@@ -456,6 +464,8 @@
   void jmp(const Address& address);
   void jmp(Label* label);
 
+  void repne_scasw();
+
   X86Assembler* lock();
   void cmpxchgl(const Address& address, Register reg);
   void cmpxchg8b(const Address& address);
@@ -620,8 +630,8 @@
   void EmitLabelLink(Label* label);
   void EmitNearLabelLink(Label* label);
 
-  void EmitGenericShift(int rm, Register reg, const Immediate& imm);
-  void EmitGenericShift(int rm, Register operand, Register shifter);
+  void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm);
+  void EmitGenericShift(int rm, const Operand& operand, Register shifter);
 
   DISALLOW_COPY_AND_ASSIGN(X86Assembler);
 };
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index f326e49..aacc57b 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -190,4 +190,10 @@
   DriverStr(expected, "FPUIntegerStore");
 }
 
+TEST_F(AssemblerX86Test, Repnescasw) {
+  GetAssembler()->repne_scasw();
+  const char* expected = "repne scasw\n";
+  DriverStr(expected, "Repnescasw");
+}
+
 }  // namespace art
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index c0ca7ef..feceeca 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -2065,6 +2065,14 @@
 }
 
 
+void X86_64Assembler::repne_scasw() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0xF2);
+  EmitUint8(0xAF);
+}
+
+
 void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) {
   // TODO: Need to have a code constants table.
   int64_t constant = bit_cast<int64_t, double>(value);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index f5327a8..162714a 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -601,6 +601,8 @@
   void bswapl(CpuRegister dst);
   void bswapq(CpuRegister dst);
 
+  void repne_scasw();
+
   //
   // Macros for High-level operations.
   //
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 9e4144a..0be4d63 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1215,4 +1215,10 @@
   DriverStr(Repeatrb(&x86_64::X86_64Assembler::movsxb, "movsbl %{reg2}, %{reg1}"), "movsxb");
 }
 
+TEST_F(AssemblerX86_64Test, Repnescasw) {
+  GetAssembler()->repne_scasw();
+  const char* expected = "repne scasw\n";
+  DriverStr(expected, "Repnescasw");
+}
+
 }  // namespace art
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 2a3a346..b4a45c6 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <inttypes.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/stat.h>
@@ -324,26 +325,19 @@
     return nullptr;
   }
 
-  static void Message(char severity, const std::string& message) {
-    // TODO: Remove when we switch to LOG when we can guarantee it won't prevent shutdown in error
-    //       cases.
-    fprintf(stderr, "dex2oat%s %c %d %d %s\n",
-            kIsDebugBuild ? "d" : "",
-            severity,
-            getpid(),
-            GetTid(),
-            message.c_str());
-  }
-
   NO_RETURN static void Fatal(const std::string& message) {
-    Message('F', message);
+    // TODO: When we can guarantee it won't prevent shutdown in error cases, move to LOG. However,
+    //       it's rather easy to hang in unwinding.
+    //       LogLine also avoids ART logging lock issues, as it's really only a wrapper around
+    //       logcat logging or stderr output.
+    LogMessage::LogLine(__FILE__, __LINE__, LogSeverity::FATAL, message.c_str());
     exit(1);
   }
 
   void Wait() {
     // TODO: tune the multiplier for GC verification, the following is just to make the timeout
     //       large.
-    int64_t multiplier = kVerifyObjectSupport > kVerifyObjectModeFast ? 100 : 1;
+    constexpr int64_t multiplier = kVerifyObjectSupport > kVerifyObjectModeFast ? 100 : 1;
     timespec timeout_ts;
     InitTimeSpec(true, CLOCK_REALTIME, multiplier * kWatchDogTimeoutSeconds * 1000, 0, &timeout_ts);
     const char* reason = "dex2oat watch dog thread waiting";
@@ -351,7 +345,8 @@
     while (!shutting_down_) {
       int rc = TEMP_FAILURE_RETRY(pthread_cond_timedwait(&cond_, &mutex_, &timeout_ts));
       if (rc == ETIMEDOUT) {
-        Fatal(StringPrintf("dex2oat did not finish after %d seconds", kWatchDogTimeoutSeconds));
+        Fatal(StringPrintf("dex2oat did not finish after %" PRId64 " seconds",
+                           kWatchDogTimeoutSeconds));
       } else if (rc != 0) {
         std::string message(StringPrintf("pthread_cond_timedwait failed: %s",
                                          strerror(errno)));
@@ -363,10 +358,10 @@
 
   // When setting timeouts, keep in mind that the build server may not be as fast as your desktop.
   // Debug builds are slower so they have larger timeouts.
-  static const unsigned int kSlowdownFactor = kIsDebugBuild ? 5U : 1U;
+  static constexpr int64_t kSlowdownFactor = kIsDebugBuild ? 5U : 1U;
 
-  // 6 minutes scaled by kSlowdownFactor.
-  static const unsigned int kWatchDogTimeoutSeconds = kSlowdownFactor * 6 * 60;
+  // 10 minutes scaled by kSlowdownFactor.
+  static constexpr int64_t kWatchDogTimeoutSeconds = kSlowdownFactor * 10 * 60;
 
   bool is_watch_dog_enabled_;
   bool shutting_down_;
@@ -696,6 +691,8 @@
         include_cfi = false;
       } else if (option == "--debuggable") {
         debuggable = true;
+        include_debug_symbols = true;
+        include_cfi = true;
       } else if (option.starts_with("--profile-file=")) {
         profile_file_ = option.substr(strlen("--profile-file=")).data();
         VLOG(compiler) << "dex2oat: profile file is " << profile_file_;
@@ -1217,9 +1214,9 @@
       if (!UseSwap(image_, dex_files_)) {
         close(swap_fd_);
         swap_fd_ = -1;
-        LOG(INFO) << "Decided to run without swap.";
+        VLOG(compiler) << "Decided to run without swap.";
       } else {
-        LOG(INFO) << "Accepted running with swap.";
+        LOG(INFO) << "Large app, accepted running with swap.";
       }
     }
     // Note that dex2oat won't close the swap_fd_. The compiler driver's swap space will do that.
@@ -1806,8 +1803,6 @@
   DISALLOW_IMPLICIT_CONSTRUCTORS(Dex2Oat);
 };
 
-const unsigned int WatchDog::kWatchDogTimeoutSeconds;
-
 static void b13564922() {
 #if defined(__linux__) && defined(__arm__)
   int major, minor;
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index ba0c0bd..2ead4a2 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -587,6 +587,14 @@
               src_reg_file = SSE;
               immediate_bytes = 1;
               break;
+          case 0x15:
+              opcode1 = "pextrw";
+              prefix[2] = 0;
+              has_modrm = true;
+              store = true;
+              src_reg_file = SSE;
+              immediate_bytes = 1;
+              break;
             case 0x16:
               opcode1 = "pextrd";
               prefix[2] = 0;
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index f2e35af..949c2cb 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -76,41 +76,38 @@
   "kClassRoots",
 };
 
-class OatSymbolizer FINAL : public CodeOutput {
+class OatSymbolizer FINAL {
  public:
-  explicit OatSymbolizer(const OatFile* oat_file, const std::string& output_name) :
-      oat_file_(oat_file), builder_(nullptr), elf_output_(nullptr),
-      output_name_(output_name.empty() ? "symbolized.oat" : output_name) {
-  }
+  class RodataWriter FINAL : public CodeOutput {
+   public:
+    explicit RodataWriter(const OatFile* oat_file) : oat_file_(oat_file) {}
 
-  bool Init() {
-    Elf32_Word oat_data_size = oat_file_->GetOatHeader().GetExecutableOffset();
-
-    uint32_t diff = static_cast<uint32_t>(oat_file_->End() - oat_file_->Begin());
-    uint32_t oat_exec_size = diff - oat_data_size;
-    uint32_t oat_bss_size = oat_file_->BssSize();
-
-    elf_output_ = OS::CreateEmptyFile(output_name_.c_str());
-
-    builder_.reset(new ElfBuilder<ElfTypes32>(
-        this,
-        elf_output_,
-        oat_file_->GetOatHeader().GetInstructionSet(),
-        0,
-        oat_data_size,
-        oat_data_size,
-        oat_exec_size,
-        RoundUp(oat_data_size + oat_exec_size, kPageSize),
-        oat_bss_size,
-        true,
-        false));
-
-    if (!builder_->Init()) {
-      builder_.reset(nullptr);
-      return false;
+    bool Write(OutputStream* out) OVERRIDE {
+      const size_t rodata_size = oat_file_->GetOatHeader().GetExecutableOffset();
+      return out->WriteFully(oat_file_->Begin(), rodata_size);
     }
 
-    return true;
+   private:
+    const OatFile* oat_file_;
+  };
+
+  class TextWriter FINAL : public CodeOutput {
+   public:
+    explicit TextWriter(const OatFile* oat_file) : oat_file_(oat_file) {}
+
+    bool Write(OutputStream* out) OVERRIDE {
+      const size_t rodata_size = oat_file_->GetOatHeader().GetExecutableOffset();
+      const uint8_t* text_begin = oat_file_->Begin() + rodata_size;
+      return out->WriteFully(text_begin, oat_file_->End() - text_begin);
+    }
+
+   private:
+    const OatFile* oat_file_;
+  };
+
+  explicit OatSymbolizer(const OatFile* oat_file, const std::string& output_name) :
+      oat_file_(oat_file), builder_(nullptr),
+      output_name_(output_name.empty() ? "symbolized.oat" : output_name) {
   }
 
   typedef void (OatSymbolizer::*Callback)(const DexFile::ClassDef&,
@@ -122,9 +119,17 @@
                                           uint32_t);
 
   bool Symbolize() {
-    if (builder_.get() == nullptr) {
-      return false;
-    }
+    Elf32_Word rodata_size = oat_file_->GetOatHeader().GetExecutableOffset();
+    uint32_t size = static_cast<uint32_t>(oat_file_->End() - oat_file_->Begin());
+    uint32_t text_size = size - rodata_size;
+    uint32_t bss_size = oat_file_->BssSize();
+    RodataWriter rodata_writer(oat_file_);
+    TextWriter text_writer(oat_file_);
+    builder_.reset(new ElfBuilder<ElfTypes32>(
+        oat_file_->GetOatHeader().GetInstructionSet(),
+        rodata_size, &rodata_writer,
+        text_size, &text_writer,
+        bss_size));
 
     Walk(&art::OatSymbolizer::RegisterForDedup);
 
@@ -132,10 +137,11 @@
 
     Walk(&art::OatSymbolizer::AddSymbol);
 
-    bool result = builder_->Write();
+    File* elf_output = OS::CreateEmptyFile(output_name_.c_str());
+    bool result = builder_->Write(elf_output);
 
     // Ignore I/O errors.
-    UNUSED(elf_output_->FlushClose());
+    UNUSED(elf_output->FlushClose());
 
     return result;
   }
@@ -269,24 +275,14 @@
         pretty_name = "[Dedup]" + pretty_name;
       }
 
-      ElfSymtabBuilder<ElfTypes32>* symtab = builder_->GetSymtabBuilder();
+      auto* symtab = builder_->GetSymtab();
 
-      symtab->AddSymbol(pretty_name, &builder_->GetTextBuilder(),
+      symtab->AddSymbol(pretty_name, builder_->GetText(),
           oat_method.GetCodeOffset() - oat_file_->GetOatHeader().GetExecutableOffset(),
           true, oat_method.GetQuickCodeSize(), STB_GLOBAL, STT_FUNC);
     }
   }
 
-  // Set oat data offset. Required by ElfBuilder/CodeOutput.
-  void SetCodeOffset(size_t offset ATTRIBUTE_UNUSED) {
-    // Nothing to do.
-  }
-
-  // Write oat code. Required by ElfBuilder/CodeOutput.
-  bool Write(OutputStream* out) {
-    return out->WriteFully(oat_file_->Begin(), oat_file_->End() - oat_file_->Begin());
-  }
-
  private:
   static void SkipAllFields(ClassDataItemIterator* it) {
     while (it->HasNextStaticField()) {
@@ -299,7 +295,6 @@
 
   const OatFile* oat_file_;
   std::unique_ptr<ElfBuilder<ElfTypes32> > builder_;
-  File* elf_output_;
   std::unordered_map<uint32_t, uint32_t> state_;
   const std::string output_name_;
 };
@@ -2097,7 +2092,7 @@
   gc::space::ImageSpace& image_space_;
   const ImageHeader& image_header_;
   std::unique_ptr<OatDumper> oat_dumper_;
-  std::unique_ptr<OatDumperOptions> oat_dumper_options_;
+  OatDumperOptions* oat_dumper_options_;
 
   DISALLOW_COPY_AND_ASSIGN(ImageDumper);
 };
@@ -2203,10 +2198,6 @@
   }
 
   OatSymbolizer oat_symbolizer(oat_file, output_name);
-  if (!oat_symbolizer.Init()) {
-    fprintf(stderr, "Failed to initialize symbolizer\n");
-    return EXIT_FAILURE;
-  }
   if (!oat_symbolizer.Symbolize()) {
     fprintf(stderr, "Failed to symbolize\n");
     return EXIT_FAILURE;
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 4dc0967..ef84a17 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -650,29 +650,34 @@
 template <typename ElfFileImpl>
 bool PatchOat::PatchElf(ElfFileImpl* oat_file) {
   TimingLogger::ScopedTiming t("Fixup Elf Text Section", timings_);
+
+  // Fix up absolute references to locations within the boot image.
   if (!oat_file->ApplyOatPatchesTo(".text", delta_)) {
     return false;
   }
 
+  // Update the OatHeader fields referencing the boot image.
   if (!PatchOatHeader<ElfFileImpl>(oat_file)) {
     return false;
   }
 
-  bool need_fixup = false;
+  bool need_boot_oat_fixup = true;
   for (unsigned int i = 0; i < oat_file->GetProgramHeaderNum(); ++i) {
     auto hdr = oat_file->GetProgramHeader(i);
-    if ((hdr->p_vaddr != 0 && hdr->p_vaddr != hdr->p_offset) ||
-        (hdr->p_paddr != 0 && hdr->p_paddr != hdr->p_offset)) {
-      need_fixup = true;
+    if (hdr->p_type == PT_LOAD && hdr->p_vaddr == 0u) {
+      need_boot_oat_fixup = false;
       break;
     }
   }
-  if (!need_fixup) {
-    // This was never passed through ElfFixup so all headers/symbols just have their offset as
-    // their addr. Therefore we do not need to update these parts.
+  if (!need_boot_oat_fixup) {
+    // This is an app oat file that can be loaded at an arbitrary address in memory.
+    // Boot image references were patched above and there's nothing else to do.
     return true;
   }
 
+  // This is a boot oat file that's loaded at a particular address and we need
+  // to patch all absolute addresses, starting with ELF program headers.
+
   t.NewTiming("Fixup Elf Headers");
   // Fixup Phdr's
   oat_file->FixupProgramHeaders(delta_);
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 86201ba..ece9d4b 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -124,6 +124,7 @@
   native/java_lang_Object.cc \
   native/java_lang_Runtime.cc \
   native/java_lang_String.cc \
+  native/java_lang_StringFactory.cc \
   native/java_lang_System.cc \
   native/java_lang_Thread.cc \
   native/java_lang_Throwable.cc \
@@ -136,6 +137,7 @@
   native/java_lang_reflect_Method.cc \
   native/java_lang_reflect_Proxy.cc \
   native/java_util_concurrent_atomic_AtomicLong.cc \
+  native/libcore_util_CharsetUtils.cc \
   native/org_apache_harmony_dalvik_ddmc_DdmServer.cc \
   native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc \
   native/sun_misc_Unsafe.cc \
@@ -466,7 +468,7 @@
   ifeq ($$(art_target_or_host),target)
     LOCAL_SHARED_LIBRARIES += libdl
     # ZipArchive support, the order matters here to get all symbols.
-    LOCAL_STATIC_LIBRARIES := libziparchive libz
+    LOCAL_STATIC_LIBRARIES := libziparchive libz libbase
     # For android::FileMap used by libziparchive.
     LOCAL_SHARED_LIBRARIES += libutils
     # For liblog, atrace, properties, ashmem, set_sched_policy and socket_peer_is_trusted.
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index f14dfc2..cafc868 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -166,6 +166,9 @@
   qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
 
   qpoints->pDeoptimize = art_quick_deoptimize;
+
+  // Read barrier
+  qpoints->pReadBarrierJni = ReadBarrierJni;
 }
 
 }  // namespace art
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 8f6162f..7488578 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -669,6 +669,18 @@
 END art_quick_aput_obj
 
 // Macro to facilitate adding new allocation entrypoints.
+.macro ONE_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r1, r2  @ save callee saves in case of GC
+    mov    r1, r9                     @ pass Thread::Current
+    bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    \return
+END \name
+.endm
+
+// Macro to facilitate adding new allocation entrypoints.
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
@@ -693,10 +705,25 @@
 END \name
 .endm
 
-TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+// Macro to facilitate adding new allocation entrypoints.
+.macro FOUR_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r3, r12  @ save callee saves in case of GC
+    str    r9, [sp, #-16]!            @ expand the frame and pass Thread::Current
+    .pad #16
+    .cfi_adjust_cfa_offset 16
+    bl     \entrypoint
+    add    sp, #16                    @ strip the extra frame
+    .cfi_adjust_cfa_offset -16
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    \return
+END \name
+.endm
 
-TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
     /*
      * Called by managed code to resolve a static field and load a non-wide value.
@@ -805,11 +832,10 @@
 
     /*
      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
-     * exception on error. On success the String is returned. R0 holds the referring method,
-     * R1 holds the string index. The fast path check for hit in strings cache has already been
-     * performed.
+     * exception on error. On success the String is returned. R0 holds the string index. The fast
+     * path check for hit in strings cache has already been performed.
      */
-TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALL_ALLOC_ENTRYPOINTS
@@ -1178,8 +1204,7 @@
     .cfi_rel_offset r11, 8
     .cfi_rel_offset lr, 12
     ldr   r3, [r0, #MIRROR_STRING_COUNT_OFFSET]
-    ldr   r12, [r0, #MIRROR_STRING_OFFSET_OFFSET]
-    ldr   r0, [r0, #MIRROR_STRING_VALUE_OFFSET]
+    add   r0, #MIRROR_STRING_VALUE_OFFSET
 
     /* Clamp start to [0..count] */
     cmp   r2, #0
@@ -1189,10 +1214,6 @@
     it    gt
     movgt r2, r3
 
-    /* Build a pointer to the start of string data */
-    add   r0, #MIRROR_CHAR_ARRAY_DATA_OFFSET
-    add   r0, r0, r12, lsl #1
-
     /* Save a copy in r12 to later compute result */
     mov   r12, r0
 
@@ -1298,12 +1319,10 @@
     .cfi_rel_offset r12, 24
     .cfi_rel_offset lr, 28
 
-    ldr    r4, [r2, #MIRROR_STRING_OFFSET_OFFSET]
-    ldr    r9, [r1, #MIRROR_STRING_OFFSET_OFFSET]
     ldr    r7, [r2, #MIRROR_STRING_COUNT_OFFSET]
     ldr    r10, [r1, #MIRROR_STRING_COUNT_OFFSET]
-    ldr    r2, [r2, #MIRROR_STRING_VALUE_OFFSET]
-    ldr    r1, [r1, #MIRROR_STRING_VALUE_OFFSET]
+    add    r2, #MIRROR_STRING_VALUE_OFFSET
+    add    r1, #MIRROR_STRING_VALUE_OFFSET
 
     /*
      * At this point, we have:
@@ -1318,15 +1337,12 @@
      it    ls
      movls r10, r7
 
-     /* Now, build pointers to the string data */
-     add   r2, r2, r4, lsl #1
-     add   r1, r1, r9, lsl #1
      /*
       * Note: data pointers point to previous element so we can use pre-index
       * mode with base writeback.
       */
-     add   r2, #MIRROR_CHAR_ARRAY_DATA_OFFSET-2   @ offset to contents[-1]
-     add   r1, #MIRROR_CHAR_ARRAY_DATA_OFFSET-2   @ offset to contents[-1]
+     subs  r2, #2   @ offset to contents[-1]
+     subs  r1, #2   @ offset to contents[-1]
 
      /*
       * At this point we have:
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 4b12f00..8c8f8d5 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -159,6 +159,9 @@
 
   // Deoptimize
   qpoints->pDeoptimize = art_quick_deoptimize;
+
+  // Read barrier
+  qpoints->pReadBarrierJni = ReadBarrierJni;
 };
 
 }  // namespace art
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index cbd4b7c..f8b0734 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -182,7 +182,7 @@
     // Restore xSELF as it might be scratched.
     mov xSELF, xETR
     // ETR
-    ldr xETR, [sp, #16]
+    ldr xETR, [sp, #32]
     .cfi_restore x21
 
     add sp, sp, #112
@@ -1261,10 +1261,22 @@
 END art_quick_aput_obj
 
 // Macro to facilitate adding new allocation entrypoints.
+.macro ONE_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC
+    mov    x1, xSELF                  // pass Thread::Current
+    bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    \return
+END \name
+.endm
+
+// Macro to facilitate adding new allocation entrypoints.
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC
     mov    x2, xSELF                  // pass Thread::Current
     bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
@@ -1272,11 +1284,11 @@
 END \name
 .endm
 
-// Macro to facilitate adding new array allocation entrypoints.
+// Macro to facilitate adding new allocation entrypoints.
 .macro THREE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC
     mov    x3, xSELF                  // pass Thread::Current
     bl     \entrypoint
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
@@ -1284,6 +1296,19 @@
 END \name
 .endm
 
+// Macro to facilitate adding new allocation entrypoints.
+.macro FOUR_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC
+    mov    x4, xSELF                  // pass Thread::Current
+    bl     \entrypoint                //
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    \return
+    DELIVER_PENDING_EXCEPTION
+END \name
+.endm
+
 // Macros taking opportunity of code similarities for downcalls with referrer.
 .macro ONE_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
@@ -1339,10 +1364,10 @@
      * initializer and deliver the exception on error. On success the static storage base is
      * returned.
      */
-TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
-TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
 ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
 ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
@@ -1386,11 +1411,10 @@
 
     /*
      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
-     * exception on error. On success the String is returned. x0 holds the referring method,
-     * w1 holds the string index. The fast path check for hit in strings cache has already been
-     * performed.
+     * exception on error. On success the String is returned. w0 holds the string index. The fast
+     * path check for hit in strings cache has already been performed.
      */
-TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALL_ALLOC_ENTRYPOINTS
@@ -1714,8 +1738,7 @@
      */
 ENTRY art_quick_indexof
     ldr   w3, [x0, #MIRROR_STRING_COUNT_OFFSET]
-    ldr   w4, [x0, #MIRROR_STRING_OFFSET_OFFSET]
-    ldr   w0, [x0, #MIRROR_STRING_VALUE_OFFSET] // x0 ?
+    add   x0, x0, #MIRROR_STRING_VALUE_OFFSET
 
     /* Clamp start to [0..count] */
     cmp   w2, #0
@@ -1723,10 +1746,6 @@
     cmp   w2, w3
     csel  w2, w3, w2, gt
 
-    /* Build a pointer to the start of the string data */
-    add   x0, x0, #MIRROR_CHAR_ARRAY_DATA_OFFSET
-    add   x0, x0, x4, lsl #1
-
     /* Save a copy to compute result */
     mov   x5, x0
 
@@ -1818,17 +1837,15 @@
     ret
 1:                        // Different string objects.
 
-    ldr    w6, [x2, #MIRROR_STRING_OFFSET_OFFSET]
-    ldr    w5, [x1, #MIRROR_STRING_OFFSET_OFFSET]
     ldr    w4, [x2, #MIRROR_STRING_COUNT_OFFSET]
     ldr    w3, [x1, #MIRROR_STRING_COUNT_OFFSET]
-    ldr    w2, [x2, #MIRROR_STRING_VALUE_OFFSET]
-    ldr    w1, [x1, #MIRROR_STRING_VALUE_OFFSET]
+    add    x2, x2, #MIRROR_STRING_VALUE_OFFSET
+    add    x1, x1, #MIRROR_STRING_VALUE_OFFSET
 
     /*
-     * Now:           CharArray*    Offset   Count
-     *    first arg      x2          w6        w4
-     *   second arg      x1          w5        w3
+     * Now:           Data*  Count
+     *    first arg    x2      w4
+     *   second arg    x1      w3
      */
 
     // x0 := str1.length(w4) - str2.length(w3). ldr zero-extended w3/w4 into x3/x4.
@@ -1836,16 +1853,6 @@
     // Min(count1, count2) into w3.
     csel x3, x3, x4, ge
 
-    // Build pointer into string data.
-
-    // Add offset in array (substr etc.) (sign extend and << 1).
-    add x2, x2, w6, sxtw #1
-    add x1, x1, w5, sxtw #1
-
-    // Add offset in CharArray to array.
-    add x2, x2, #MIRROR_CHAR_ARRAY_DATA_OFFSET
-    add x1, x1, #MIRROR_CHAR_ARRAY_DATA_OFFSET
-
     // TODO: Tune this value.
     // Check for long string, do memcmp16 for them.
     cmp w3, #28  // Constant from arm32.
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index a980a86..ff04106 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -272,6 +272,9 @@
   static_assert(IsDirectEntrypoint(kQuickA64Load), "Non-direct C stub marked direct.");
   qpoints->pA64Store = QuasiAtomic::Write64;
   static_assert(IsDirectEntrypoint(kQuickA64Store), "Non-direct C stub marked direct.");
+
+  qpoints->pReadBarrierJni = ReadBarrierJni;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierJni), "Non-direct C stub marked direct.");
 };
 
 }  // namespace art
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 622c48f..ee5c59f 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -983,6 +983,16 @@
 END art_quick_set_obj_instance
 
 // Macro to facilitate adding new allocation entrypoints.
+.macro ONE_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC
+    jal     \entrypoint
+    move    $a1, rSELF                # pass Thread::Current
+    \return
+END \name
+.endm
+
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
@@ -1003,34 +1013,43 @@
 END \name
 .endm
 
+.macro FOUR_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC
+    jal     \entrypoint
+    sw      rSELF, 16($sp)            # pass Thread::Current
+    \return
+END \name
+.endm
+
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALL_ALLOC_ENTRYPOINTS
 
     /*
      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
-     * exception on error. On success the String is returned. R0 holds the referring method,
-     * R1 holds the string index. The fast path check for hit in strings cache has already been
-     * performed.
+     * exception on error. On success the String is returned. A0 holds the string index. The fast
+     * path check for hit in strings cache has already been performed.
      */
-TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
     /*
      * Entry from managed code when uninitialized static storage, this stub will run the class
      * initializer and deliver the exception on error. On success the static storage base is
      * returned.
      */
-TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
     /*
      * Entry from managed code when dex cache misses for a type_idx.
      */
-TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
     /*
      * Entry from managed code when type_idx needs to be checked for access and dex cache may also
      * miss.
      */
-TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
     /*
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
diff --git a/runtime/arch/mips64/asm_support_mips64.S b/runtime/arch/mips64/asm_support_mips64.S
index 10976bb..2613777 100644
--- a/runtime/arch/mips64/asm_support_mips64.S
+++ b/runtime/arch/mips64/asm_support_mips64.S
@@ -27,7 +27,8 @@
 #define rSELF $s1
 
 
-    //  Declare a function called name, sets up $gp.
+    // Declare a function called name, sets up $gp.
+    // This macro modifies t8.
 .macro ENTRY name
     .type \name, %function
     .global \name
@@ -35,10 +36,11 @@
     .balign 16
 \name:
     .cfi_startproc
+    // Set up $gp and store the previous $gp value to $t8. It will be pushed to the
+    // stack after the frame has been constructed.
+    .cpsetup $t9, $t8, \name
     // Ensure we get a sane starting CFA.
     .cfi_def_cfa $sp,0
-    // Load $gp. We expect that ".set noreorder" is in effect.
-    .cpload $t9
     // Declare a local convenience label to be branched to when $gp is already set up.
 .L\name\()_gp_set:
 .endm
diff --git a/runtime/arch/mips64/context_mips64.cc b/runtime/arch/mips64/context_mips64.cc
index ce99b40..6b3f4c9 100644
--- a/runtime/arch/mips64/context_mips64.cc
+++ b/runtime/arch/mips64/context_mips64.cc
@@ -18,7 +18,7 @@
 
 #include "mirror/art_method-inl.h"
 #include "quick/quick_method_frame_info.h"
-#include "util.h"
+#include "utils.h"
 
 namespace art {
 namespace mips64 {
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index b328708..321c27b 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -180,6 +180,9 @@
   // Atomic 64-bit load/store
   qpoints->pA64Load = QuasiAtomic::Read64;
   qpoints->pA64Store = QuasiAtomic::Write64;
+
+  // Read barrier
+  qpoints->pReadBarrierJni = ReadBarrierJni;
 };
 
 }  // namespace art
diff --git a/runtime/arch/mips64/jni_entrypoints_mips64.S b/runtime/arch/mips64/jni_entrypoints_mips64.S
index 1085666..70d7d97 100644
--- a/runtime/arch/mips64/jni_entrypoints_mips64.S
+++ b/runtime/arch/mips64/jni_entrypoints_mips64.S
@@ -44,8 +44,11 @@
     .cfi_rel_offset 5, 8
     sd     $a0, 0($sp)
     .cfi_rel_offset 4, 0
-    jal    artFindNativeMethod  # (Thread*)
     move   $a0, $s1             # pass Thread::Current()
+    jal    artFindNativeMethod  # (Thread*)
+    .cpreturn                   # Restore gp from t8 in branch delay slot. gp is not used
+                                # anymore, and t8 may be clobbered in artFindNativeMethod.
+
     ld     $a0, 0($sp)          # restore registers from stack
     .cfi_restore 4
     ld     $a1, 8($sp)
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index bf18dd5..ff79b5d 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -27,6 +27,19 @@
     .extern artDeliverPendingExceptionFromCode
 
     /*
+     * Macro that sets up $gp and stores the previous $gp value to $t8.
+     * This macro modifies v1 and t8.
+     */
+.macro SETUP_GP
+    move $v1, $ra
+    bal 1f
+    nop
+1:
+    .cpsetup $ra, $t8, 1b
+    move $ra, $v1
+.endm
+
+    /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveAll)
      * callee-save: padding + $f24-$f31 + $s0-$s7 + $gp + $ra + $s8 = 19 total + 1x8 bytes padding
@@ -44,8 +57,8 @@
     .cfi_rel_offset 31, 152
     sd     $s8, 144($sp)
     .cfi_rel_offset 30, 144
-    sd     $gp, 136($sp)
-    .cfi_rel_offset 28, 136
+    sd     $t8, 136($sp)           # t8 holds caller's gp, now save it to the stack.
+    .cfi_rel_offset 28, 136        # Value from gp is pushed, so set the cfi offset accordingly.
     sd     $s7, 128($sp)
     .cfi_rel_offset 23, 128
     sd     $s6, 120($sp)
@@ -102,8 +115,8 @@
     .cfi_rel_offset 31, 72
     sd     $s8, 64($sp)
     .cfi_rel_offset 30, 64
-    sd     $gp, 56($sp)
-    .cfi_rel_offset 28, 56
+    sd     $t8, 56($sp)            # t8 holds caller's gp, now save it to the stack.
+    .cfi_rel_offset 28, 56         # Value from gp is pushed, so set the cfi offset accordingly.
     sd     $s7, 48($sp)
     .cfi_rel_offset 23, 48
     sd     $s6, 40($sp)
@@ -130,7 +143,7 @@
     .cfi_restore 31
     ld     $s8, 64($sp)
     .cfi_restore 30
-    ld     $gp, 56($sp)
+    ld     $t8, 56($sp)            # Restore gp back to it's temp storage.
     .cfi_restore 28
     ld     $s7, 48($sp)
     .cfi_restore 23
@@ -146,6 +159,7 @@
     .cfi_restore 18
     daddiu $sp, $sp, 80
     .cfi_adjust_cfa_offset -80
+    .cpreturn
 .endm
 
 .macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
@@ -153,7 +167,7 @@
     .cfi_restore 31
     ld     $s8, 64($sp)
     .cfi_restore 30
-    ld     $gp, 56($sp)
+    ld     $t8, 56($sp)            # Restore gp back to it's temp storage.
     .cfi_restore 28
     ld     $s7, 48($sp)
     .cfi_restore 23
@@ -167,6 +181,7 @@
     .cfi_restore 19
     ld     $s2, 8($sp)
     .cfi_restore 18
+    .cpreturn
     jalr   $zero, $ra
     daddiu $sp, $sp, 80
     .cfi_adjust_cfa_offset -80
@@ -175,12 +190,6 @@
 // This assumes the top part of these stack frame types are identical.
 #define REFS_AND_ARGS_MINUS_REFS_SIZE (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
 
-    /*
-     * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs). Restoration assumes
-     * non-moving GC.
-     * callee-save: padding + $f12-$f19 + $a1-$a7 + $s2-$s7 + $gp + $ra + $s8 = 24 total + 1 words padding + Method*
-     */
 .macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
     daddiu  $sp, $sp, -208
     .cfi_adjust_cfa_offset 208
@@ -194,8 +203,8 @@
     .cfi_rel_offset 31, 200
     sd     $s8, 192($sp)
     .cfi_rel_offset 30, 192
-    sd     $gp, 184($sp)
-    .cfi_rel_offset 28, 184
+    sd     $t8, 184($sp)           # t8 holds caller's gp, now save it to the stack.
+    .cfi_rel_offset 28, 184        # Value from gp is pushed, so set the cfi offset accordingly.
     sd     $s7, 176($sp)
     .cfi_rel_offset 23, 176
     sd     $s6, 168($sp)
@@ -232,16 +241,15 @@
     s.d    $f14, 32($sp)
     s.d    $f13, 24($sp)           # = kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset
     s.d    $f12, 16($sp)           # This isn't necessary to store.
-
-    # 1x8 bytes paddig + Method*
-    ld      $v0, %got(_ZN3art7Runtime9instance_E)($gp)
-    ld      $v0, 0($v0)
-    THIS_LOAD_REQUIRES_READ_BARRIER
-    lwu     $v0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($v0)
-    sw      $v0, 0($sp)                                # Place Method* at bottom of stack.
-    sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
+    # 1x8 bytes padding + Method*
 .endm
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs). Restoration assumes
+     * non-moving GC.
+     * callee-save: padding + $f12-$f19 + $a1-$a7 + $s2-$s7 + $gp + $ra + $s8 = 24 total + 1 words padding + Method*
+     */
 .macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
     # load appropriate callee-save-method
@@ -253,12 +261,18 @@
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
 
+.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+    sw      $a0, 0($sp)                                # Place Method* at bottom of stack.
+    sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
+.endm
+
 .macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     ld     $ra, 200($sp)
     .cfi_restore 31
     ld     $s8, 192($sp)
     .cfi_restore 30
-    ld     $gp, 184($sp)
+    ld     $t8, 184($sp)           # Restore gp back to it's temp storage.
     .cfi_restore 28
     ld     $s7, 176($sp)
     .cfi_restore 23
@@ -297,6 +311,7 @@
     l.d    $f13, 24($sp)
     l.d    $f12, 16($sp)
 
+    .cpreturn
     daddiu $sp, $sp, 208
     .cfi_adjust_cfa_offset -208
 .endm
@@ -307,6 +322,7 @@
      * exception is Thread::Current()->exception_
      */
 .macro DELIVER_PENDING_EXCEPTION
+    SETUP_GP
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME     # save callee saves for throw
     dla     $t9, artDeliverPendingExceptionFromCode
     jalr    $zero, $t9                   # artDeliverPendingExceptionFromCode(Thread*)
@@ -348,7 +364,7 @@
      * On entry $a0 is uint32_t* gprs_ and $a1 is uint32_t* fprs_
      * FIXME: just guessing about the shape of the jmpbuf.  Where will pc be?
      */
-ENTRY art_quick_do_long_jump
+ENTRY_NO_GP art_quick_do_long_jump
     l.d     $f0, 0($a1)
     l.d     $f1, 8($a1)
     l.d     $f2, 16($a1)
@@ -605,7 +621,7 @@
      *   a4 = JValue* result
      *   a5 = shorty
      */
-ENTRY art_quick_invoke_stub
+ENTRY_NO_GP art_quick_invoke_stub
     # push a4, a5, s0(rSUSPEND), s1(rSELF), s8, ra onto the stack
     daddiu $sp, $sp, -48
     .cfi_adjust_cfa_offset 48
@@ -707,7 +723,7 @@
      *   a4 = JValue* result
      *   a5 = shorty
      */
-ENTRY art_quick_invoke_static_stub
+ENTRY_NO_GP art_quick_invoke_static_stub
 
     # push a4, a5, s0(rSUSPEND), s1(rSELF), s8, ra, onto the stack
     daddiu $sp, $sp, -48
@@ -851,7 +867,8 @@
     sd     $a1, 8($sp)
     sd     $a0, 0($sp)
     jal    artIsAssignableFromCode
-    nop
+    .cpreturn                       # Restore gp from t8 in branch delay slot.
+                                    # t8 may be clobbered in artIsAssignableFromCode.
     beq    $v0, $zero, .Lthrow_class_cast_exception
     ld     $ra, 24($sp)
     jalr   $zero, $ra
@@ -863,6 +880,7 @@
     ld     $a0, 0($sp)
     daddiu $sp, $sp, 32
     .cfi_adjust_cfa_offset -32
+    SETUP_GP
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     dla  $t9, artThrowClassCastException
     jalr $zero, $t9                 # artThrowClassCastException (Class*, Class*, Thread*)
@@ -908,13 +926,13 @@
     daddu $t1, $t1, $t0
     sb   $t0, ($t1)
     jalr $zero, $ra
-    nop
+    .cpreturn                       # Restore gp from t8 in branch delay slot.
 .Ldo_aput_null:
     dsll  $a1, $a1, 2
     daddu $t0, $a0, $a1
     sw   $a2, MIRROR_OBJECT_ARRAY_DATA_OFFSET($t0)
     jalr $zero, $ra
-    nop
+    .cpreturn                       # Restore gp from t8 in branch delay slot.
 .Lcheck_assignability:
     daddiu $sp, $sp, -64
     .cfi_adjust_cfa_offset 64
@@ -927,7 +945,8 @@
     move   $a1, $t1
     move   $a0, $t0
     jal    artIsAssignableFromCode  # (Class*, Class*)
-    nop
+    .cpreturn                       # Restore gp from t8 in branch delay slot.
+                                    # t8 may be clobbered in artIsAssignableFromCode.
     ld     $ra, 56($sp)
     ld     $t9, 24($sp)
     ld     $a2, 16($sp)
@@ -935,6 +954,7 @@
     ld     $a0, 0($sp)
     daddiu $sp, $sp, 64
     .cfi_adjust_cfa_offset -64
+    SETUP_GP
     bne    $v0, $zero, .Ldo_aput
     nop
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
@@ -945,45 +965,6 @@
 END art_quick_aput_obj
 
     /*
-     * Entry from managed code when uninitialized static storage, this stub will run the class
-     * initializer and deliver the exception on error. On success the static storage base is
-     * returned.
-     */
-    .extern artInitializeStaticStorageFromCode
-ENTRY art_quick_initialize_static_storage
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME           # save callee saves in case of GC
-    # artInitializeStaticStorageFromCode(uint32_t type_idx, Method* referrer, Thread*)
-    jal     artInitializeStaticStorageFromCode
-    move    $a2, rSELF                          # pass Thread::Current
-    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_initialize_static_storage
-
-    /*
-     * Entry from managed code when dex cache misses for a type_idx.
-     */
-    .extern artInitializeTypeFromCode
-ENTRY art_quick_initialize_type
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME          # save callee saves in case of GC
-    # artInitializeTypeFromCode(uint32_t type_idx, Method* referrer, Thread*)
-    jal     artInitializeTypeFromCode
-    move    $a2, rSELF                         # pass Thread::Current
-    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_initialize_type
-
-    /*
-     * Entry from managed code when type_idx needs to be checked for access and dex cache may also
-     * miss.
-     */
-    .extern artInitializeTypeAndVerifyAccessFromCode
-ENTRY art_quick_initialize_type_and_verify_access
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME          # save callee saves in case of GC
-    # artInitializeTypeFromCode(uint32_t type_idx, Method* referrer, Thread*)
-    jal     artInitializeTypeAndVerifyAccessFromCode
-    move    $a2, rSELF                         # pass Thread::Current
-    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_initialize_type_and_verify_access
-
-    /*
      * Called by managed code to resolve a static field and load a boolean primitive value.
      */
     .extern artGetBooleanStaticFromCode
@@ -1272,20 +1253,16 @@
     RETURN_IF_ZERO
 END art_quick_set_obj_instance
 
-    /*
-     * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
-     * exception on error. On success the String is returned. R0 holds the referring method,
-     * R1 holds the string index. The fast path check for hit in strings cache has already been
-     * performed.
-     */
-    .extern artResolveStringFromCode
-ENTRY art_quick_resolve_string
+// Macro to facilitate adding new allocation entrypoints.
+.macro ONE_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    # artResolveStringFromCode(Method* referrer, uint32_t string_idx, Thread*, $sp)
-    jal     artResolveStringFromCode
-    move    $a2, rSELF                 # pass Thread::Current
-    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_resolve_string
+    jal     \entrypoint
+    move    $a1, rSELF                 # pass Thread::Current
+    \return
+END \name
+.endm
 
 // Macro to facilitate adding new allocation entrypoints.
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
@@ -1308,10 +1285,45 @@
 END \name
 .endm
 
+.macro FOUR_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    jal     \entrypoint
+    move    $a4, rSELF                 # pass Thread::Current
+    \return
+END \name
+.endm
+
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALL_ALLOC_ENTRYPOINTS
 
     /*
+     * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
+     * exception on error. On success the String is returned. A0 holds the string index. The fast
+     * path check for hit in strings cache has already been performed.
+     */
+ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+    /*
+     * Entry from managed code when uninitialized static storage, this stub will run the class
+     * initializer and deliver the exception on error. On success the static storage base is
+     * returned.
+     */
+ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+    /*
+     * Entry from managed code when dex cache misses for a type_idx.
+     */
+ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+    /*
+     * Entry from managed code when type_idx needs to be checked for access and dex cache may also
+     * miss.
+     */
+ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+    /*
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
      */
     .extern artTestSuspendFromCode
@@ -1320,7 +1332,7 @@
     bne    $a0, $zero, 1f
     daddiu rSUSPEND, $zero, SUSPEND_CHECK_INTERVAL   # reset rSUSPEND to SUSPEND_CHECK_INTERVAL
     jalr   $zero, $ra
-    nop
+    .cpreturn                                 # Restore gp from t8 in branch delay slot.
 1:
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME         # save callee saves for stack crawl
     jal    artTestSuspendFromCode             # (Thread*)
@@ -1334,8 +1346,7 @@
      */
     .extern artQuickProxyInvokeHandler
 ENTRY art_quick_proxy_invoke_handler
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
-    sd      $a0, 0($sp)            # place proxy method at bottom of frame
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
     move    $a2, rSELF             # pass Thread::Current
     jal     artQuickProxyInvokeHandler  # (Method* proxy method, receiver, Thread*, SP)
     move    $a3, $sp               # pass $sp
@@ -1360,6 +1371,7 @@
     dsll    $t0, 2                 # convert target method offset to bytes
     daddu   $a0, $t0               # get address of target method
     dla     $t9, art_quick_invoke_interface_trampoline
+    .cpreturn
     jalr    $zero, $t9
     lwu     $a0, MIRROR_OBJECT_ARRAY_DATA_OFFSET($a0)  # load the target method
 END art_quick_imt_conflict_trampoline
@@ -1385,8 +1397,7 @@
     .extern artQuickGenericJniTrampoline
     .extern artQuickGenericJniEndTrampoline
 ENTRY art_quick_generic_jni_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
-    sd      $a0, 0($sp)            # store native ArtMethod* to bottom of stack
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
     move    $s8, $sp               # save $sp
 
     # prepare for call to artQuickGenericJniTrampoline(Thread*, SP)
@@ -1489,8 +1500,7 @@
     .global art_quick_instrumentation_exit
 art_quick_instrumentation_exit:
     .cfi_startproc
-    daddiu   $t9, $ra, 4       # put current address into $t9 to rebuild $gp
-    .cpload  $t9
+    SETUP_GP
     move     $ra, $zero        # link register is to here, so clobber with 0 for later checks
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
     move     $t0, $sp          # remember bottom of caller's frame
@@ -1502,8 +1512,11 @@
     mov.d    $f15, $f0         # pass fpr result
     move     $a2, $v0          # pass gpr result
     move     $a1, $t0          # pass $sp
-    jal      artInstrumentationMethodExitFromCode  # (Thread*, SP, gpr_res, fpr_res)
     move     $a0, rSELF        # pass Thread::Current
+    jal      artInstrumentationMethodExitFromCode  # (Thread*, SP, gpr_res, fpr_res)
+    .cpreturn                  # Restore gp from t8 in branch delay slot. gp is not used anymore,
+                               # and t8 may be clobbered in artInstrumentationMethodExitFromCode.
+
     move     $t9, $v0          # set aside returned link register
     move     $ra, $v1          # set link register for deoptimization
     ld       $v0, 0($sp)       # restore return values
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
index 037c26e..fe04bf5 100644
--- a/runtime/arch/quick_alloc_entrypoints.S
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -35,6 +35,12 @@
 THREE_ARG_DOWNCALL art_quick_check_and_alloc_array\c_suffix, artCheckAndAllocArrayFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 // Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY.
 THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check\c_suffix, artCheckAndAllocArrayFromCodeWithAccessCheck\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+// Called by managed code to allocate a string from bytes
+FOUR_ARG_DOWNCALL art_quick_alloc_string_from_bytes\c_suffix, artAllocStringFromBytesFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+// Called by managed code to allocate a string from chars
+THREE_ARG_DOWNCALL art_quick_alloc_string_from_chars\c_suffix, artAllocStringFromCharsFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+// Called by managed code to allocate a string from string
+ONE_ARG_DOWNCALL art_quick_alloc_string_from_string\c_suffix, artAllocStringFromStringFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 .endm
 
 .macro GENERATE_ALL_ALLOC_ENTRYPOINTS
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 0d9a888..a7d24b8 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -261,6 +261,132 @@
           "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
           "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
           "memory");  // clobber.
+#elif defined(__mips__) && !defined(__LP64__)
+    __asm__ __volatile__ (
+        // Spill a0-a3 and t0-t7 which we say we don't clobber. May contain args.
+        "addiu $sp, $sp, -64\n\t"
+        "sw $a0, 0($sp)\n\t"
+        "sw $a1, 4($sp)\n\t"
+        "sw $a2, 8($sp)\n\t"
+        "sw $a3, 12($sp)\n\t"
+        "sw $t0, 16($sp)\n\t"
+        "sw $t1, 20($sp)\n\t"
+        "sw $t2, 24($sp)\n\t"
+        "sw $t3, 28($sp)\n\t"
+        "sw $t4, 32($sp)\n\t"
+        "sw $t5, 36($sp)\n\t"
+        "sw $t6, 40($sp)\n\t"
+        "sw $t7, 44($sp)\n\t"
+        // Spill gp register since it is caller save.
+        "sw $gp, 52($sp)\n\t"
+
+        "addiu $sp, $sp, -16\n\t"  // Reserve stack space, 16B aligned.
+        "sw %[referrer], 0($sp)\n\t"
+
+        // Push everything on the stack, so we don't rely on the order.
+        "addiu $sp, $sp, -20\n\t"
+        "sw %[arg0], 0($sp)\n\t"
+        "sw %[arg1], 4($sp)\n\t"
+        "sw %[arg2], 8($sp)\n\t"
+        "sw %[code], 12($sp)\n\t"
+        "sw %[self], 16($sp)\n\t"
+
+        // Load call params into the right registers.
+        "lw $a0, 0($sp)\n\t"
+        "lw $a1, 4($sp)\n\t"
+        "lw $a2, 8($sp)\n\t"
+        "lw $t9, 12($sp)\n\t"
+        "lw $s1, 16($sp)\n\t"
+        "addiu $sp, $sp, 20\n\t"
+
+        "jalr $t9\n\t"             // Call the stub.
+        "nop\n\t"
+        "addiu $sp, $sp, 16\n\t"   // Drop the quick "frame".
+
+        // Restore stuff not named clobbered.
+        "lw $a0, 0($sp)\n\t"
+        "lw $a1, 4($sp)\n\t"
+        "lw $a2, 8($sp)\n\t"
+        "lw $a3, 12($sp)\n\t"
+        "lw $t0, 16($sp)\n\t"
+        "lw $t1, 20($sp)\n\t"
+        "lw $t2, 24($sp)\n\t"
+        "lw $t3, 28($sp)\n\t"
+        "lw $t4, 32($sp)\n\t"
+        "lw $t5, 36($sp)\n\t"
+        "lw $t6, 40($sp)\n\t"
+        "lw $t7, 44($sp)\n\t"
+        // Restore gp.
+        "lw $gp, 52($sp)\n\t"
+        "addiu $sp, $sp, 64\n\t"   // Free stack space, now sp as on entry.
+
+        "move %[result], $v0\n\t"  // Store the call result.
+        : [result] "=r" (result)
+        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer)
+        : "at", "v0", "v1", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "t8", "t9", "k0", "k1",
+          "fp", "ra",
+          "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13",
+          "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26",
+          "f27", "f28", "f29", "f30", "f31",
+          "memory");  // clobber.
+#elif defined(__mips__) && defined(__LP64__)
+    __asm__ __volatile__ (
+        // Spill a0-a7 which we say we don't clobber. May contain args.
+        "daddiu $sp, $sp, -64\n\t"
+        "sd $a0, 0($sp)\n\t"
+        "sd $a1, 8($sp)\n\t"
+        "sd $a2, 16($sp)\n\t"
+        "sd $a3, 24($sp)\n\t"
+        "sd $a4, 32($sp)\n\t"
+        "sd $a5, 40($sp)\n\t"
+        "sd $a6, 48($sp)\n\t"
+        "sd $a7, 56($sp)\n\t"
+
+        "daddiu $sp, $sp, -16\n\t"  // Reserve stack space, 16B aligned.
+        "sd %[referrer], 0($sp)\n\t"
+
+        // Push everything on the stack, so we don't rely on the order.
+        "daddiu $sp, $sp, -40\n\t"
+        "sd %[arg0], 0($sp)\n\t"
+        "sd %[arg1], 8($sp)\n\t"
+        "sd %[arg2], 16($sp)\n\t"
+        "sd %[code], 24($sp)\n\t"
+        "sd %[self], 32($sp)\n\t"
+
+        // Load call params into the right registers.
+        "ld $a0, 0($sp)\n\t"
+        "ld $a1, 8($sp)\n\t"
+        "ld $a2, 16($sp)\n\t"
+        "ld $t9, 24($sp)\n\t"
+        "ld $s1, 32($sp)\n\t"
+        "daddiu $sp, $sp, 40\n\t"
+
+        "jalr $t9\n\t"              // Call the stub.
+        "nop\n\t"
+        "daddiu $sp, $sp, 16\n\t"   // Drop the quick "frame".
+
+        // Restore stuff not named clobbered.
+        "ld $a0, 0($sp)\n\t"
+        "ld $a1, 8($sp)\n\t"
+        "ld $a2, 16($sp)\n\t"
+        "ld $a3, 24($sp)\n\t"
+        "ld $a4, 32($sp)\n\t"
+        "ld $a5, 40($sp)\n\t"
+        "ld $a6, 48($sp)\n\t"
+        "ld $a7, 56($sp)\n\t"
+        "daddiu $sp, $sp, 64\n\t"
+
+        "move %[result], $v0\n\t"   // Store the call result.
+        : [result] "=r" (result)
+        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer)
+        : "at", "v0", "v1", "t0", "t1", "t2", "t3", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
+          "t8", "t9", "k0", "k1", "fp", "ra",
+          "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13",
+          "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26",
+          "f27", "f28", "f29", "f30", "f31",
+          "memory");  // clobber.
 #elif defined(__x86_64__) && !defined(__APPLE__) && defined(__clang__)
     // Note: Uses the native convention
     // TODO: Set the thread?
@@ -487,6 +613,136 @@
           "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
           "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
           "memory");  // clobber.
+#elif defined(__mips__) && !defined(__LP64__)
+    __asm__ __volatile__ (
+        // Spill a0-a3 and t0-t7 which we say we don't clobber. May contain args.
+        "addiu $sp, $sp, -64\n\t"
+        "sw $a0, 0($sp)\n\t"
+        "sw $a1, 4($sp)\n\t"
+        "sw $a2, 8($sp)\n\t"
+        "sw $a3, 12($sp)\n\t"
+        "sw $t0, 16($sp)\n\t"
+        "sw $t1, 20($sp)\n\t"
+        "sw $t2, 24($sp)\n\t"
+        "sw $t3, 28($sp)\n\t"
+        "sw $t4, 32($sp)\n\t"
+        "sw $t5, 36($sp)\n\t"
+        "sw $t6, 40($sp)\n\t"
+        "sw $t7, 44($sp)\n\t"
+        // Spill gp register since it is caller save.
+        "sw $gp, 52($sp)\n\t"
+
+        "addiu $sp, $sp, -16\n\t"  // Reserve stack space, 16B aligned.
+        "sw %[referrer], 0($sp)\n\t"
+
+        // Push everything on the stack, so we don't rely on the order.
+        "addiu $sp, $sp, -24\n\t"
+        "sw %[arg0], 0($sp)\n\t"
+        "sw %[arg1], 4($sp)\n\t"
+        "sw %[arg2], 8($sp)\n\t"
+        "sw %[code], 12($sp)\n\t"
+        "sw %[self], 16($sp)\n\t"
+        "sw %[hidden], 20($sp)\n\t"
+
+        // Load call params into the right registers.
+        "lw $a0, 0($sp)\n\t"
+        "lw $a1, 4($sp)\n\t"
+        "lw $a2, 8($sp)\n\t"
+        "lw $t9, 12($sp)\n\t"
+        "lw $s1, 16($sp)\n\t"
+        "lw $t0, 20($sp)\n\t"
+        "addiu $sp, $sp, 24\n\t"
+
+        "jalr $t9\n\t"             // Call the stub.
+        "nop\n\t"
+        "addiu $sp, $sp, 16\n\t"   // Drop the quick "frame".
+
+        // Restore stuff not named clobbered.
+        "lw $a0, 0($sp)\n\t"
+        "lw $a1, 4($sp)\n\t"
+        "lw $a2, 8($sp)\n\t"
+        "lw $a3, 12($sp)\n\t"
+        "lw $t0, 16($sp)\n\t"
+        "lw $t1, 20($sp)\n\t"
+        "lw $t2, 24($sp)\n\t"
+        "lw $t3, 28($sp)\n\t"
+        "lw $t4, 32($sp)\n\t"
+        "lw $t5, 36($sp)\n\t"
+        "lw $t6, 40($sp)\n\t"
+        "lw $t7, 44($sp)\n\t"
+        // Restore gp.
+        "lw $gp, 52($sp)\n\t"
+        "addiu $sp, $sp, 64\n\t"   // Free stack space, now sp as on entry.
+
+        "move %[result], $v0\n\t"  // Store the call result.
+        : [result] "=r" (result)
+        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer), [hidden] "r"(hidden)
+        : "at", "v0", "v1", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "t8", "t9", "k0", "k1",
+          "fp", "ra",
+          "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13",
+          "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26",
+          "f27", "f28", "f29", "f30", "f31",
+          "memory");  // clobber.
+#elif defined(__mips__) && defined(__LP64__)
+    __asm__ __volatile__ (
+        // Spill a0-a7 which we say we don't clobber. May contain args.
+        "daddiu $sp, $sp, -64\n\t"
+        "sd $a0, 0($sp)\n\t"
+        "sd $a1, 8($sp)\n\t"
+        "sd $a2, 16($sp)\n\t"
+        "sd $a3, 24($sp)\n\t"
+        "sd $a4, 32($sp)\n\t"
+        "sd $a5, 40($sp)\n\t"
+        "sd $a6, 48($sp)\n\t"
+        "sd $a7, 56($sp)\n\t"
+
+        "daddiu $sp, $sp, -16\n\t"  // Reserve stack space, 16B aligned.
+        "sd %[referrer], 0($sp)\n\t"
+
+        // Push everything on the stack, so we don't rely on the order.
+        "daddiu $sp, $sp, -48\n\t"
+        "sd %[arg0], 0($sp)\n\t"
+        "sd %[arg1], 8($sp)\n\t"
+        "sd %[arg2], 16($sp)\n\t"
+        "sd %[code], 24($sp)\n\t"
+        "sd %[self], 32($sp)\n\t"
+        "sd %[hidden], 40($sp)\n\t"
+
+        // Load call params into the right registers.
+        "ld $a0, 0($sp)\n\t"
+        "ld $a1, 8($sp)\n\t"
+        "ld $a2, 16($sp)\n\t"
+        "ld $t9, 24($sp)\n\t"
+        "ld $s1, 32($sp)\n\t"
+        "ld $t0, 40($sp)\n\t"
+        "daddiu $sp, $sp, 48\n\t"
+
+        "jalr $t9\n\t"              // Call the stub.
+        "nop\n\t"
+        "daddiu $sp, $sp, 16\n\t"   // Drop the quick "frame".
+
+        // Restore stuff not named clobbered.
+        "ld $a0, 0($sp)\n\t"
+        "ld $a1, 8($sp)\n\t"
+        "ld $a2, 16($sp)\n\t"
+        "ld $a3, 24($sp)\n\t"
+        "ld $a4, 32($sp)\n\t"
+        "ld $a5, 40($sp)\n\t"
+        "ld $a6, 48($sp)\n\t"
+        "ld $a7, 56($sp)\n\t"
+        "daddiu $sp, $sp, 64\n\t"
+
+        "move %[result], $v0\n\t"   // Store the call result.
+        : [result] "=r" (result)
+        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer), [hidden] "r"(hidden)
+        : "at", "v0", "v1", "t0", "t1", "t2", "t3", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
+          "t8", "t9", "k0", "k1", "fp", "ra",
+          "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13",
+          "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26",
+          "f27", "f28", "f29", "f30", "f31",
+          "memory");  // clobber.
 #elif defined(__x86_64__) && !defined(__APPLE__) && defined(__clang__)
     // Note: Uses the native convention
     // TODO: Set the thread?
@@ -521,7 +777,8 @@
   // Method with 32b arg0, 64b arg1
   size_t Invoke3UWithReferrer(size_t arg0, uint64_t arg1, uintptr_t code, Thread* self,
                               mirror::ArtMethod* referrer) {
-#if (defined(__x86_64__) && !defined(__APPLE__)) || defined(__aarch64__)
+#if (defined(__x86_64__) && !defined(__APPLE__)) || (defined(__mips__) && defined(__LP64__)) || \
+    defined(__aarch64__)
     // Just pass through.
     return Invoke3WithReferrer(arg0, arg1, 0U, code, self, referrer);
 #else
@@ -549,7 +806,7 @@
 
 
 TEST_F(StubTest, Memcpy) {
-#if defined(__i386__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || (defined(__x86_64__) && !defined(__APPLE__)) || defined(__mips__)
   Thread* self = Thread::Current();
 
   uint32_t orig[20];
@@ -586,7 +843,8 @@
 }
 
 TEST_F(StubTest, LockObject) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   static constexpr size_t kThinLockLoops = 100;
 
   Thread* self = Thread::Current();
@@ -659,7 +917,8 @@
 
 // NO_THREAD_SAFETY_ANALYSIS as we do not want to grab exclusive mutator lock for MonitorInfo.
 static void TestUnlockObject(StubTest* test) NO_THREAD_SAFETY_ANALYSIS {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   static constexpr size_t kThinLockLoops = 100;
 
   Thread* self = Thread::Current();
@@ -809,12 +1068,14 @@
   TestUnlockObject(this);
 }
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
 extern "C" void art_quick_check_cast(void);
 #endif
 
 TEST_F(StubTest, CheckCast) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   Thread* self = Thread::Current();
 
   const uintptr_t art_quick_check_cast = StubTest::GetEntrypoint(self, kQuickCheckCast);
@@ -865,7 +1126,8 @@
 TEST_F(StubTest, APutObj) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   Thread* self = Thread::Current();
 
   // Do not check non-checked ones, we'd need handlers and stuff...
@@ -998,7 +1260,8 @@
 TEST_F(StubTest, AllocObject) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   // This will lead to OOM  error messages in the log.
   ScopedLogSeverity sls(LogSeverity::FATAL);
 
@@ -1123,7 +1386,8 @@
 TEST_F(StubTest, AllocObjectArray) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   // TODO: Check the "Unresolved" allocation stubs
 
   // This will lead to OOM  error messages in the log.
@@ -1229,32 +1493,15 @@
       "aacaacaacaacaacaacaacaacaacaacaacaac",     // This one's over.
       "aacaacaacaacaacaacaacaacaacaacaacaaca" };  // As is this one. We need a separate one to
                                                   // defeat object-equal optimizations.
-  static constexpr size_t kBaseStringCount  = arraysize(c);
-  static constexpr size_t kStringCount = 2 * kBaseStringCount;
+  static constexpr size_t kStringCount = arraysize(c);
 
   StackHandleScope<kStringCount> hs(self);
   Handle<mirror::String> s[kStringCount];
 
-  for (size_t i = 0; i < kBaseStringCount; ++i) {
+  for (size_t i = 0; i < kStringCount; ++i) {
     s[i] = hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), c[i]));
   }
 
-  RandGen r(0x1234);
-
-  for (size_t i = kBaseStringCount; i < kStringCount; ++i) {
-    s[i] = hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), c[i - kBaseStringCount]));
-    int32_t length = s[i]->GetLength();
-    if (length > 1) {
-      // Set a random offset and length.
-      int32_t new_offset = 1 + (r.next() % (length - 1));
-      int32_t rest = length - new_offset - 1;
-      int32_t new_length = 1 + (rest > 0 ? r.next() % rest : 0);
-
-      s[i]->SetField32<false>(mirror::String::CountOffset(), new_length);
-      s[i]->SetField32<false>(mirror::String::OffsetOffset(), new_offset);
-    }
-  }
-
   // TODO: wide characters
 
   // Matrix of expectations. First component is first parameter. Note we only check against the
@@ -1309,7 +1556,8 @@
 static void GetSetBooleanStatic(ArtField* f, Thread* self,
                                 mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   constexpr size_t num_values = 5;
   uint8_t values[num_values] = { 0, 1, 2, 128, 0xFF };
 
@@ -1339,7 +1587,8 @@
 static void GetSetByteStatic(ArtField* f, Thread* self, mirror::ArtMethod* referrer,
                              StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   int8_t values[] = { -128, -64, 0, 64, 127 };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1369,7 +1618,8 @@
 static void GetSetBooleanInstance(Handle<mirror::Object>* obj, ArtField* f, Thread* self,
                                   mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   uint8_t values[] = { 0, true, 2, 128, 0xFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1403,7 +1653,8 @@
 static void GetSetByteInstance(Handle<mirror::Object>* obj, ArtField* f,
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   int8_t values[] = { -128, -64, 0, 64, 127 };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1437,7 +1688,8 @@
 static void GetSetCharStatic(ArtField* f, Thread* self, mirror::ArtMethod* referrer,
                              StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   uint16_t values[] = { 0, 1, 2, 255, 32768, 0xFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1466,7 +1718,8 @@
 static void GetSetShortStatic(ArtField* f, Thread* self,
                               mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   int16_t values[] = { -0x7FFF, -32768, 0, 255, 32767, 0x7FFE };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1496,7 +1749,8 @@
 static void GetSetCharInstance(Handle<mirror::Object>* obj, ArtField* f,
                                Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   uint16_t values[] = { 0, 1, 2, 255, 32768, 0xFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1529,7 +1783,8 @@
 static void GetSetShortInstance(Handle<mirror::Object>* obj, ArtField* f,
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   int16_t values[] = { -0x7FFF, -32768, 0, 255, 32767, 0x7FFE };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1563,7 +1818,8 @@
 static void GetSet32Static(ArtField* f, Thread* self, mirror::ArtMethod* referrer,
                            StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   uint32_t values[] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1580,7 +1836,11 @@
                                            self,
                                            referrer);
 
+#if defined(__mips__) && defined(__LP64__)
+    EXPECT_EQ(static_cast<uint32_t>(res), values[i]) << "Iteration " << i;
+#else
     EXPECT_EQ(res, values[i]) << "Iteration " << i;
+#endif
   }
 #else
   UNUSED(f, self, referrer, test);
@@ -1594,7 +1854,8 @@
 static void GetSet32Instance(Handle<mirror::Object>* obj, ArtField* f,
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   uint32_t values[] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1628,7 +1889,8 @@
 }
 
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
 
 static void set_and_check_static(uint32_t f_idx, mirror::Object* val, Thread* self,
                                  mirror::ArtMethod* referrer, StubTest* test)
@@ -1653,7 +1915,8 @@
 static void GetSetObjStatic(ArtField* f, Thread* self, mirror::ArtMethod* referrer,
                             StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   set_and_check_static(f->GetDexFieldIndex(), nullptr, self, referrer, test);
 
   // Allocate a string object for simplicity.
@@ -1670,7 +1933,8 @@
 }
 
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
 static void set_and_check_instance(ArtField* f, mirror::Object* trg,
                                    mirror::Object* val, Thread* self, mirror::ArtMethod* referrer,
                                    StubTest* test)
@@ -1698,7 +1962,8 @@
 static void GetSetObjInstance(Handle<mirror::Object>* obj, ArtField* f,
                               Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   set_and_check_instance(f, obj->Get(), nullptr, self, referrer, test);
 
   // Allocate a string object for simplicity.
@@ -1720,7 +1985,8 @@
 static void GetSet64Static(ArtField* f, Thread* self, mirror::ArtMethod* referrer,
                            StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if (defined(__x86_64__) && !defined(__APPLE__)) || defined(__aarch64__)
+#if (defined(__x86_64__) && !defined(__APPLE__)) || (defined(__mips__) && defined(__LP64__)) || \
+    defined(__aarch64__)
   uint64_t values[] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1750,7 +2016,8 @@
 static void GetSet64Instance(Handle<mirror::Object>* obj, ArtField* f,
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if (defined(__x86_64__) && !defined(__APPLE__)) || defined(__aarch64__)
+#if (defined(__x86_64__) && !defined(__APPLE__)) || (defined(__mips__) && defined(__LP64__)) || \
+    defined(__aarch64__)
   uint64_t values[] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1950,7 +2217,8 @@
 }
 
 TEST_F(StubTest, IMT) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
   Thread* self = Thread::Current();
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index a371632..737f4d1 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -138,6 +138,9 @@
 
   // Deoptimize
   qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_slow_path;
+
+  // Read barrier
+  qpoints->pReadBarrierJni = ReadBarrierJni;
 };
 
 }  // namespace art
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index c5d8b8f..6ebeba3 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -685,6 +685,26 @@
     END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
+MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION RAW_VAR(c_name, 0)
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    // Outgoing argument set up
+    subl MACRO_LITERAL(12), %esp  // alignment padding
+    CFI_ADJUST_CFA_OFFSET(12)
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH ebx                      // pass arg4
+    PUSH edx                      // pass arg3
+    PUSH ecx                      // pass arg2
+    PUSH eax                      // pass arg1
+    call VAR(cxx_name, 1)         // cxx_name(arg1, arg2, arg3, arg4, Thread*)
+    addl MACRO_LITERAL(32), %esp  // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-32)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    CALL_MACRO(return_macro, 2)   // return or deliver exception
+    END_FUNCTION RAW_VAR(c_name, 0)
+END_MACRO
+
 MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION RAW_VAR(c_name, 0)
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx // save ref containing registers for GC
@@ -789,6 +809,12 @@
   THREE_ARG_DOWNCALL art_quick_check_and_alloc_array ## c_suffix, artCheckAndAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
 #define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
   THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check ## c_suffix, artCheckAndAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(c_suffix, cxx_suffix) \
+  FOUR_ARG_DOWNCALL art_quick_alloc_string_from_bytes ## c_suffix, artAllocStringFromBytesFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_alloc_string_from_chars ## c_suffix, artAllocStringFromCharsFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(c_suffix, cxx_suffix) \
+  ONE_ARG_DOWNCALL art_quick_alloc_string_from_string ## c_suffix, artAllocStringFromStringFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
@@ -799,6 +825,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc, DlMalloc)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc_instrumented, DlMallocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
@@ -809,6 +838,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc_instrumented, DlMallocInstrumented)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
@@ -819,6 +851,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc, RosAlloc)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc_instrumented, RosAllocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
@@ -829,6 +864,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc_instrumented, RosAllocInstrumented)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer, BumpPointer)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer, BumpPointer)
@@ -839,6 +877,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer, BumpPointer)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer, BumpPointer)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
@@ -849,6 +890,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer_instrumented, BumpPointerInstrumented)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
@@ -859,6 +903,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrumented)
@@ -869,6 +916,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab_instrumented, TLABInstrumented)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region, Region)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region, Region)
@@ -879,6 +929,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region, Region)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region, Region)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region, Region)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_instrumented, RegionInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_instrumented, RegionInstrumented)
@@ -889,6 +942,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_instrumented, RegionInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_instrumented, RegionInstrumented)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
@@ -899,6 +955,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented)
@@ -909,11 +968,14 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab_instrumented, RegionTLABInstrumented)
 
-TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
-TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
-TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO
-TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO
 
 TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
 
@@ -1567,13 +1629,8 @@
     PUSH edi                    // push callee save reg
     mov MIRROR_STRING_COUNT_OFFSET(%eax), %edx
     mov MIRROR_STRING_COUNT_OFFSET(%ecx), %ebx
-    mov MIRROR_STRING_VALUE_OFFSET(%eax), %esi
-    mov MIRROR_STRING_VALUE_OFFSET(%ecx), %edi
-    mov MIRROR_STRING_OFFSET_OFFSET(%eax), %eax
-    mov MIRROR_STRING_OFFSET_OFFSET(%ecx), %ecx
-    /* Build pointers to the start of string data */
-    lea  MIRROR_CHAR_ARRAY_DATA_OFFSET(%esi, %eax, 2), %esi
-    lea  MIRROR_CHAR_ARRAY_DATA_OFFSET(%edi, %ecx, 2), %edi
+    lea MIRROR_STRING_VALUE_OFFSET(%eax), %esi
+    lea MIRROR_STRING_VALUE_OFFSET(%ecx), %edi
     /* Calculate min length and count diff */
     mov   %edx, %ecx
     mov   %edx, %eax
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 0cddec4..d0ab9d5 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -142,6 +142,9 @@
 
   // Deoptimize
   qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_slow_path;
+
+  // Read barrier
+  qpoints->pReadBarrierJni = ReadBarrierJni;
 #endif  // __APPLE__
 };
 
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 8185deb..da4d92b 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -738,6 +738,17 @@
     END_FUNCTION VAR(c_name, 0)
 END_MACRO
 
+MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
+    // Outgoing argument set up
+    movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
+    call VAR(cxx_name, 1)               // cxx_name(arg1, arg2, arg3, arg4, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    CALL_MACRO(return_macro, 2)         // return or deliver exception
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
 MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name, 0)
     movl 8(%rsp), %esi                  // pass referrer
@@ -822,6 +833,12 @@
   THREE_ARG_DOWNCALL art_quick_check_and_alloc_array ## c_suffix, artCheckAndAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
 #define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
   THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check ## c_suffix, artCheckAndAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(c_suffix, cxx_suffix) \
+  FOUR_ARG_DOWNCALL art_quick_alloc_string_from_bytes ## c_suffix, artAllocStringFromBytesFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_alloc_string_from_chars ## c_suffix, artAllocStringFromCharsFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(c_suffix, cxx_suffix) \
+  ONE_ARG_DOWNCALL art_quick_alloc_string_from_string ## c_suffix, artAllocStringFromStringFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
@@ -832,6 +849,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc, DlMalloc)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc_instrumented, DlMallocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
@@ -842,6 +862,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc_instrumented, DlMallocInstrumented)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
@@ -852,6 +875,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc, RosAlloc)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc_instrumented, RosAllocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
@@ -862,6 +888,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc_instrumented, RosAllocInstrumented)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer, BumpPointer)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer, BumpPointer)
@@ -872,6 +901,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer, BumpPointer)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer, BumpPointer)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
@@ -882,6 +914,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer_instrumented, BumpPointerInstrumented)
 
 DEFINE_FUNCTION art_quick_alloc_object_tlab
     // Fast path tlab allocation.
@@ -929,6 +964,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrumented)
@@ -939,6 +977,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab_instrumented, TLABInstrumented)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region, Region)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region, Region)
@@ -949,6 +990,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region, Region)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region, Region)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region, Region)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_instrumented, RegionInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_instrumented, RegionInstrumented)
@@ -959,6 +1003,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_instrumented, RegionInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_instrumented, RegionInstrumented)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
@@ -969,6 +1016,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented)
@@ -979,11 +1029,14 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab_instrumented, RegionTLABInstrumented)
 
-TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
-TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
-TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO
-TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO
 
 TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
 
@@ -1622,13 +1675,9 @@
 DEFINE_FUNCTION art_quick_string_compareto
     movl MIRROR_STRING_COUNT_OFFSET(%edi), %r8d
     movl MIRROR_STRING_COUNT_OFFSET(%esi), %r9d
-    movl MIRROR_STRING_VALUE_OFFSET(%edi), %r10d
-    movl MIRROR_STRING_VALUE_OFFSET(%esi), %r11d
-    movl MIRROR_STRING_OFFSET_OFFSET(%edi), %eax
-    movl MIRROR_STRING_OFFSET_OFFSET(%esi), %ecx
     /* Build pointers to the start of string data */
-    leal MIRROR_CHAR_ARRAY_DATA_OFFSET(%r10d, %eax, 2), %esi
-    leal MIRROR_CHAR_ARRAY_DATA_OFFSET(%r11d, %ecx, 2), %edi
+    leal MIRROR_STRING_VALUE_OFFSET(%edi), %edi
+    leal MIRROR_STRING_VALUE_OFFSET(%esi), %esi
     /* Calculate min length and count diff */
     movl  %r8d, %ecx
     movl  %r8d, %eax
@@ -1638,8 +1687,8 @@
      * At this point we have:
      *   eax: value to return if first part of strings are equal
      *   ecx: minimum among the lengths of the two strings
-     *   esi: pointer to this string data
-     *   edi: pointer to comp string data
+     *   esi: pointer to comp string data
+     *   edi: pointer to this string data
      */
     jecxz .Lkeep_length
     repe cmpsw                    // find nonmatching chars in [%esi] and [%edi], up to length %ecx
@@ -1648,8 +1697,8 @@
     ret
     .balign 16
 .Lnot_equal:
-    movzwl  -2(%esi), %eax        // get last compared char from this string
-    movzwl  -2(%edi), %ecx        // get last compared char from comp string
+    movzwl  -2(%edi), %eax        // get last compared char from this string
+    movzwl  -2(%esi), %ecx        // get last compared char from comp string
     subl  %ecx, %eax              // return the difference
     ret
 END_FUNCTION art_quick_string_compareto
diff --git a/runtime/art_field.cc b/runtime/art_field.cc
index 2aed440..47d5a76 100644
--- a/runtime/art_field.cc
+++ b/runtime/art_field.cc
@@ -63,6 +63,17 @@
       FindInstanceFieldWithOffset(klass->GetSuperClass(), field_offset) : nullptr;
 }
 
+ArtField* ArtField::FindStaticFieldWithOffset(mirror::Class* klass, uint32_t field_offset) {
+  DCHECK(klass != nullptr);
+  auto* static_fields = klass->GetSFields();
+  for (size_t i = 0, count = klass->NumStaticFields(); i < count; ++i) {
+    if (static_fields[i].GetOffset().Uint32Value() == field_offset) {
+      return &static_fields[i];
+    }
+  }
+  return nullptr;
+}
+
 mirror::Class* ArtField::ProxyFindSystemClass(const char* descriptor) {
   DCHECK(GetDeclaringClass()->IsProxyClass());
   return Runtime::Current()->GetClassLinker()->FindSystemClass(Thread::Current(), descriptor);
diff --git a/runtime/art_field.h b/runtime/art_field.h
index c0620bf..9d3dbd9 100644
--- a/runtime/art_field.h
+++ b/runtime/art_field.h
@@ -161,6 +161,9 @@
   // Returns an instance field with this offset in the given class or null if not found.
   static ArtField* FindInstanceFieldWithOffset(mirror::Class* klass, uint32_t field_offset)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Returns a static field with this offset in the given class or null if not found.
+  static ArtField* FindStaticFieldWithOffset(mirror::Class* klass, uint32_t field_offset)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   const char* GetName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 8057dd1..3e677a4 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -89,7 +89,7 @@
             art::Thread::ThinLockIdOffset<__SIZEOF_POINTER__>().Int32Value())
 
 // Offset of field Thread::tlsPtr_.card_table.
-#define THREAD_CARD_TABLE_OFFSET 120
+#define THREAD_CARD_TABLE_OFFSET 128
 ADD_TEST_EQ(THREAD_CARD_TABLE_OFFSET,
             art::Thread::CardTableOffset<__SIZEOF_POINTER__>().Int32Value())
 
@@ -108,7 +108,7 @@
 ADD_TEST_EQ(THREAD_SELF_OFFSET,
             art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
 
-#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 126 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 146 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET,
             art::Thread::ThreadLocalPosOffset<__SIZEOF_POINTER__>().Int32Value())
 #define THREAD_LOCAL_END_OFFSET (THREAD_LOCAL_POS_OFFSET + __SIZEOF_POINTER__)
@@ -124,7 +124,7 @@
 #define MIRROR_OBJECT_LOCK_WORD_OFFSET 4
 ADD_TEST_EQ(MIRROR_OBJECT_LOCK_WORD_OFFSET, art::mirror::Object::MonitorOffset().Int32Value())
 
-#if defined(USE_BAKER_OR_BROOKS_READ_BARRIER)
+#if defined(USE_BROOKS_READ_BARRIER)
 #define MIRROR_OBJECT_HEADER_SIZE 16
 #else
 #define MIRROR_OBJECT_HEADER_SIZE 8
@@ -170,14 +170,11 @@
             sizeof(art::mirror::HeapReference<art::mirror::Object>))
 
 // Offsets within java.lang.String.
-#define MIRROR_STRING_VALUE_OFFSET  MIRROR_OBJECT_HEADER_SIZE
-ADD_TEST_EQ(MIRROR_STRING_VALUE_OFFSET, art::mirror::String::ValueOffset().Int32Value())
-
-#define MIRROR_STRING_COUNT_OFFSET  (4 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_STRING_COUNT_OFFSET  MIRROR_OBJECT_HEADER_SIZE
 ADD_TEST_EQ(MIRROR_STRING_COUNT_OFFSET, art::mirror::String::CountOffset().Int32Value())
 
-#define MIRROR_STRING_OFFSET_OFFSET (12 + MIRROR_OBJECT_HEADER_SIZE)
-ADD_TEST_EQ(MIRROR_STRING_OFFSET_OFFSET, art::mirror::String::OffsetOffset().Int32Value())
+#define MIRROR_STRING_VALUE_OFFSET (8 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_STRING_VALUE_OFFSET, art::mirror::String::ValueOffset().Int32Value())
 
 // Offsets within java.lang.reflect.ArtMethod.
 #define MIRROR_ART_METHOD_DEX_CACHE_METHODS_OFFSET (4 + MIRROR_OBJECT_HEADER_SIZE)
diff --git a/runtime/barrier.cc b/runtime/barrier.cc
index 66ee870..f80a65f 100644
--- a/runtime/barrier.cc
+++ b/runtime/barrier.cc
@@ -86,7 +86,7 @@
 }
 
 Barrier::~Barrier() {
-  CHECK(!count_) << "Attempted to destroy barrier with non zero count";
+  CHECK_EQ(count_, 0) << "Attempted to destroy barrier with non zero count";
 }
 
 }  // namespace art
diff --git a/runtime/base/bit_vector.cc b/runtime/base/bit_vector.cc
index 65cb028..39ce0d2 100644
--- a/runtime/base/bit_vector.cc
+++ b/runtime/base/bit_vector.cc
@@ -24,11 +24,6 @@
 
 namespace art {
 
-// The number of words necessary to encode bits.
-static constexpr uint32_t BitsToWords(uint32_t bits) {
-  return RoundUp(bits, 32) / 32;
-}
-
 // TODO: replace excessive argument defaulting when we are at gcc 4.7
 // or later on host with delegating constructor support. Specifically,
 // starts_bits and storage_size/storage are mutually exclusive.
diff --git a/runtime/base/bit_vector.h b/runtime/base/bit_vector.h
index be4d363..6e4367a 100644
--- a/runtime/base/bit_vector.h
+++ b/runtime/base/bit_vector.h
@@ -20,6 +20,8 @@
 #include <stdint.h>
 #include <iterator>
 
+#include "utils.h"
+
 namespace art {
 
 class Allocator;
@@ -116,6 +118,11 @@
 
   virtual ~BitVector();
 
+  // The number of words necessary to encode bits.
+  static constexpr uint32_t BitsToWords(uint32_t bits) {
+    return RoundUp(bits, kWordBits) / kWordBits;
+  }
+
   // Mark the specified bit as "set".
   void SetBit(uint32_t idx) {
     /*
diff --git a/runtime/base/casts.h b/runtime/base/casts.h
index c7e39a2..f884649 100644
--- a/runtime/base/casts.h
+++ b/runtime/base/casts.h
@@ -18,9 +18,11 @@
 #define ART_RUNTIME_BASE_CASTS_H_
 
 #include <assert.h>
+#include <limits>
 #include <string.h>
 #include <type_traits>
 
+#include "base/logging.h"
 #include "base/macros.h"
 
 namespace art {
@@ -83,6 +85,23 @@
   return dest;
 }
 
+// A version of static_cast that DCHECKs that the value can be precisely represented
+// when converting to Dest.
+template <typename Dest, typename Source>
+inline Dest dchecked_integral_cast(const Source source) {
+  DCHECK(
+      // Check that the value is within the lower limit of Dest.
+      (static_cast<intmax_t>(std::numeric_limits<Dest>::min()) <=
+          static_cast<intmax_t>(std::numeric_limits<Source>::min()) ||
+          source >= static_cast<Source>(std::numeric_limits<Dest>::min())) &&
+      // Check that the value is within the upper limit of Dest.
+      (static_cast<uintmax_t>(std::numeric_limits<Dest>::max()) >=
+          static_cast<uintmax_t>(std::numeric_limits<Source>::max()) ||
+          source <= static_cast<Source>(std::numeric_limits<Dest>::max())));
+
+  return static_cast<Dest>(source);
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_CASTS_H_
diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc
index 0ae7863..859de4b 100644
--- a/runtime/base/logging.cc
+++ b/runtime/base/logging.cc
@@ -289,17 +289,17 @@
   CHECK_EQ(strlen(log_characters), INTERNAL_FATAL + 1U);
 
   const char* program_name = ProgramInvocationShortName();
-  write(STDERR_FILENO, program_name, strlen(program_name));
-  write(STDERR_FILENO, " ", 1);
-  write(STDERR_FILENO, &log_characters[log_severity], 1);
-  write(STDERR_FILENO, " ", 1);
+  TEMP_FAILURE_RETRY(write(STDERR_FILENO, program_name, strlen(program_name)));
+  TEMP_FAILURE_RETRY(write(STDERR_FILENO, " ", 1));
+  TEMP_FAILURE_RETRY(write(STDERR_FILENO, &log_characters[log_severity], 1));
+  TEMP_FAILURE_RETRY(write(STDERR_FILENO, " ", 1));
   // TODO: pid and tid.
-  write(STDERR_FILENO, file, strlen(file));
+  TEMP_FAILURE_RETRY(write(STDERR_FILENO, file, strlen(file)));
   // TODO: line.
   UNUSED(line);
-  write(STDERR_FILENO, "] ", 2);
-  write(STDERR_FILENO, message, strlen(message));
-  write(STDERR_FILENO, "\n", 1);
+  TEMP_FAILURE_RETRY(write(STDERR_FILENO, "] ", 2));
+  TEMP_FAILURE_RETRY(write(STDERR_FILENO, message, strlen(message)));
+  TEMP_FAILURE_RETRY(write(STDERR_FILENO, "\n", 1));
 #endif
 }
 
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index f272d88..07cadc4 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -107,7 +107,7 @@
 }
 
 int FdFile::Close() {
-  int result = TEMP_FAILURE_RETRY(close(fd_));
+  int result = close(fd_);
 
   // Test here, so the file is closed and not leaked.
   if (kCheckSafeUsage) {
diff --git a/runtime/check_reference_map_visitor.h b/runtime/check_reference_map_visitor.h
index 5d9cd35..d87a563 100644
--- a/runtime/check_reference_map_visitor.h
+++ b/runtime/check_reference_map_visitor.h
@@ -29,7 +29,7 @@
 class CheckReferenceMapVisitor : public StackVisitor {
  public:
   explicit CheckReferenceMapVisitor(Thread* thread) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, nullptr) {}
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index dc8bf2a..b099088 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -31,6 +31,7 @@
 #include "base/scoped_flock.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
+#include "base/value_object.h"
 #include "class_linker-inl.h"
 #include "compiler_callbacks.h"
 #include "debugger.h"
@@ -344,8 +345,8 @@
   Handle<mirror::Class> java_lang_String(hs.NewHandle(
       AllocClass(self, java_lang_Class.Get(), mirror::String::ClassSize())));
   mirror::String::SetClass(java_lang_String.Get());
-  java_lang_String->SetObjectSize(mirror::String::InstanceSize());
   mirror::Class::SetStatus(java_lang_String, mirror::Class::kStatusResolved, self);
+  java_lang_String->SetStringClass();
 
   // Setup java.lang.ref.Reference.
   Handle<mirror::Class> java_lang_ref_Reference(hs.NewHandle(
@@ -473,7 +474,6 @@
     String_class->DumpClass(os2, mirror::Class::kDumpClassFullDetail);
     LOG(FATAL) << os1.str() << "\n\n" << os2.str();
   }
-  CHECK_EQ(java_lang_String->GetObjectSize(), mirror::String::InstanceSize());
   mirror::Class::SetStatus(java_lang_DexCache, mirror::Class::kStatusNotReady, self);
   CHECK_EQ(java_lang_DexCache.Get(), FindSystemClass(self, "Ljava/lang/DexCache;"));
   CHECK_EQ(java_lang_DexCache->GetObjectSize(), mirror::DexCache::InstanceSize());
@@ -712,6 +712,191 @@
   return *oat_file;
 }
 
+class DexFileAndClassPair : ValueObject {
+ public:
+  DexFileAndClassPair(const DexFile* dex_file, size_t current_class_index, bool from_loaded_oat)
+     : cached_descriptor_(GetClassDescriptor(dex_file, current_class_index)),
+       dex_file_(dex_file),
+       current_class_index_(current_class_index),
+       from_loaded_oat_(from_loaded_oat) {}
+
+  DexFileAndClassPair(const DexFileAndClassPair&) = default;
+
+  DexFileAndClassPair& operator=(const DexFileAndClassPair& rhs) {
+    cached_descriptor_ = rhs.cached_descriptor_;
+    dex_file_ = rhs.dex_file_;
+    current_class_index_ = rhs.current_class_index_;
+    from_loaded_oat_ = rhs.from_loaded_oat_;
+    return *this;
+  }
+
+  const char* GetCachedDescriptor() const {
+    return cached_descriptor_;
+  }
+
+  bool operator<(const DexFileAndClassPair& rhs) const {
+    const char* lhsDescriptor = cached_descriptor_;
+    const char* rhsDescriptor = rhs.cached_descriptor_;
+    int cmp = strcmp(lhsDescriptor, rhsDescriptor);
+    if (cmp != 0) {
+      // Note that the order must be reversed. We want to iterate over the classes in dex files.
+      // They are sorted lexicographically. Thus, the priority-queue must be a min-queue.
+      return cmp > 0;
+    }
+    return dex_file_ < rhs.dex_file_;
+  }
+
+  bool DexFileHasMoreClasses() const {
+    return current_class_index_ + 1 < dex_file_->NumClassDefs();
+  }
+
+  DexFileAndClassPair GetNext() const {
+    return DexFileAndClassPair(dex_file_, current_class_index_ + 1, from_loaded_oat_);
+  }
+
+  size_t GetCurrentClassIndex() const {
+    return current_class_index_;
+  }
+
+  bool FromLoadedOat() const {
+    return from_loaded_oat_;
+  }
+
+  const DexFile* GetDexFile() const {
+    return dex_file_;
+  }
+
+  void DeleteDexFile() {
+    delete dex_file_;
+    dex_file_ = nullptr;
+  }
+
+ private:
+  static const char* GetClassDescriptor(const DexFile* dex_file, size_t index) {
+    const DexFile::ClassDef& class_def = dex_file->GetClassDef(static_cast<uint16_t>(index));
+    return dex_file->StringByTypeIdx(class_def.class_idx_);
+  }
+
+  const char* cached_descriptor_;
+  const DexFile* dex_file_;
+  size_t current_class_index_;
+  bool from_loaded_oat_;  // We only need to compare mismatches between what we load now
+                          // and what was loaded before. Any old duplicates must have been
+                          // OK, and any new "internal" duplicates are as well (they must
+                          // be from multidex, which resolves correctly).
+};
+
+static void AddDexFilesFromOat(const OatFile* oat_file, bool already_loaded,
+                               std::priority_queue<DexFileAndClassPair>* heap) {
+  const std::vector<const OatDexFile*>& oat_dex_files = oat_file->GetOatDexFiles();
+  for (const OatDexFile* oat_dex_file : oat_dex_files) {
+    std::string error;
+    std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error);
+    if (dex_file.get() == nullptr) {
+      LOG(WARNING) << "Could not create dex file from oat file: " << error;
+    } else {
+      if (dex_file->NumClassDefs() > 0U) {
+        heap->emplace(dex_file.release(), 0U, already_loaded);
+      }
+    }
+  }
+}
+
+static void AddNext(DexFileAndClassPair* original,
+                    std::priority_queue<DexFileAndClassPair>* heap) {
+  if (original->DexFileHasMoreClasses()) {
+    heap->push(original->GetNext());
+  } else {
+    // Need to delete the dex file.
+    original->DeleteDexFile();
+  }
+}
+
+static void FreeDexFilesInHeap(std::priority_queue<DexFileAndClassPair>* heap) {
+  while (!heap->empty()) {
+    delete heap->top().GetDexFile();
+    heap->pop();
+  }
+}
+
+// Check for class-def collisions in dex files.
+//
+// This works by maintaining a heap with one class from each dex file, sorted by the class
+// descriptor. Then a dex-file/class pair is continually removed from the heap and compared
+// against the following top element. If the descriptor is the same, it is now checked whether
+// the two elements agree on whether their dex file was from an already-loaded oat-file or the
+// new oat file. Any disagreement indicates a collision.
+bool ClassLinker::HasCollisions(const OatFile* oat_file, std::string* error_msg) {
+  // Dex files are registered late - once a class is actually being loaded. We have to compare
+  // against the open oat files. Take the dex_lock_ that protects oat_files_ accesses.
+  ReaderMutexLock mu(Thread::Current(), dex_lock_);
+
+  std::priority_queue<DexFileAndClassPair> queue;
+
+  // Add dex files from already loaded oat files, but skip boot.
+  {
+    // To grab the boot oat, look at the dex files in the boot classpath. Any of those is fine, as
+    // they were all compiled into the same oat file. So grab the first one, which is guaranteed to
+    // exist if the boot class-path isn't empty.
+    const OatFile* boot_oat = nullptr;
+    if (!boot_class_path_.empty()) {
+      const DexFile* boot_dex_file = boot_class_path_[0];
+      // Is it from an oat file?
+      if (boot_dex_file->GetOatDexFile() != nullptr) {
+        boot_oat = boot_dex_file->GetOatDexFile()->GetOatFile();
+      }
+    }
+
+    for (const OatFile* loaded_oat_file : oat_files_) {
+      if (loaded_oat_file == boot_oat) {
+        continue;
+      }
+      AddDexFilesFromOat(loaded_oat_file, true, &queue);
+    }
+  }
+
+  if (queue.empty()) {
+    // No other oat files, return early.
+    return false;
+  }
+
+  // Add dex files from the oat file to check.
+  AddDexFilesFromOat(oat_file, false, &queue);
+
+  // Now drain the queue.
+  while (!queue.empty()) {
+    DexFileAndClassPair compare_pop = queue.top();
+    queue.pop();
+
+    // Compare against the following elements.
+    while (!queue.empty()) {
+      DexFileAndClassPair top = queue.top();
+
+      if (strcmp(compare_pop.GetCachedDescriptor(), top.GetCachedDescriptor()) == 0) {
+        // Same descriptor. Check whether it's crossing old-oat-files to new-oat-files.
+        if (compare_pop.FromLoadedOat() != top.FromLoadedOat()) {
+          *error_msg =
+              StringPrintf("Found duplicated class when checking oat files: '%s' in %s and %s",
+                           compare_pop.GetCachedDescriptor(),
+                           compare_pop.GetDexFile()->GetLocation().c_str(),
+                           top.GetDexFile()->GetLocation().c_str());
+          FreeDexFilesInHeap(&queue);
+          return true;
+        }
+        // Pop it.
+        queue.pop();
+        AddNext(&top, &queue);
+      } else {
+        // Something else. Done here.
+        break;
+      }
+    }
+    AddNext(&compare_pop, &queue);
+  }
+
+  return false;
+}
+
 std::vector<std::unique_ptr<const DexFile>> ClassLinker::OpenDexFilesFromOat(
     const char* dex_location, const char* oat_location,
     std::vector<std::string>* error_msgs) {
@@ -757,8 +942,32 @@
     // Get the oat file on disk.
     std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
     if (oat_file.get() != nullptr) {
-      source_oat_file = oat_file.release();
-      RegisterOatFile(source_oat_file);
+      // Take the file only if it has no collisions, or we must take it because of preopting.
+      bool accept_oat_file = !HasCollisions(oat_file.get(), &error_msg);
+      if (!accept_oat_file) {
+        // Failed the collision check. Print warning.
+        if (Runtime::Current()->IsDexFileFallbackEnabled()) {
+          LOG(WARNING) << "Found duplicate classes, falling back to interpreter mode for "
+                       << dex_location;
+        } else {
+          LOG(WARNING) << "Found duplicate classes, dex-file-fallback disabled, will be failing to "
+                          " load classes for " << dex_location;
+        }
+        LOG(WARNING) << error_msg;
+
+        // However, if the app was part of /system and preopted, there is no original dex file
+        // available. In that case grudgingly accept the oat file.
+        if (!DexFile::MaybeDex(dex_location)) {
+          accept_oat_file = true;
+          LOG(WARNING) << "Dex location " << dex_location << " does not seem to include dex file. "
+                       << "Allow oat file use. This is potentially dangerous.";
+        }
+      }
+
+      if (accept_oat_file) {
+        source_oat_file = oat_file.release();
+        RegisterOatFile(source_oat_file);
+      }
     }
   }
 
@@ -779,8 +988,7 @@
     if (Runtime::Current()->IsDexFileFallbackEnabled()) {
       if (!DexFile::Open(dex_location, dex_location, &error_msg, &dex_files)) {
         LOG(WARNING) << error_msg;
-        error_msgs->push_back("Failed to open dex files from "
-            + std::string(dex_location));
+        error_msgs->push_back("Failed to open dex files from " + std::string(dex_location));
       }
     } else {
       error_msgs->push_back("Fallback mode disabled, skipping dex files.");
@@ -1543,6 +1751,13 @@
 
   SetupClass(dex_file, dex_class_def, klass, class_loader.Get());
 
+  // Mark the string class by setting its access flag.
+  if (UNLIKELY(!init_done_)) {
+    if (strcmp(descriptor, "Ljava/lang/String;") == 0) {
+      klass->SetStringClass();
+    }
+  }
+
   ObjectLock<mirror::Class> lock(self, klass);
   klass->SetClinitThreadId(self->GetTid());
 
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 1bd9f0a..57989b2 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -663,6 +663,9 @@
   //       a recreation with a custom string.
   void ThrowEarlierClassFailure(mirror::Class* c) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Check for duplicate class definitions of the given oat file against all open oat files.
+  bool HasCollisions(const OatFile* oat_file, std::string* error_msg) LOCKS_EXCLUDED(dex_lock_);
+
   std::vector<const DexFile*> boot_class_path_;
   std::vector<std::unique_ptr<const DexFile>> opened_dex_files_;
 
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 7bee98f..d155941 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -394,8 +394,9 @@
 
     bool error = false;
 
-    // Art method have a different size due to the padding field.
-    if (!klass->IsArtMethodClass() && !klass->IsClassClass() && !is_static) {
+    // Methods and classes have a different size due to padding field. Strings are variable length.
+    if (!klass->IsArtMethodClass() && !klass->IsClassClass() && !klass->IsStringClass() &&
+        !is_static) {
       // Currently only required for AccessibleObject since of the padding fields. The class linker
       // says AccessibleObject is 9 bytes but sizeof(AccessibleObject) is 12 bytes due to padding.
       // The RoundUp is to get around this case.
@@ -479,7 +480,7 @@
   ObjectOffsets() : CheckOffsets<mirror::Object>(false, "Ljava/lang/Object;") {
     addOffset(OFFSETOF_MEMBER(mirror::Object, klass_), "shadow$_klass_");
     addOffset(OFFSETOF_MEMBER(mirror::Object, monitor_), "shadow$_monitor_");
-#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
+#ifdef USE_BROOKS_READ_BARRIER
     addOffset(OFFSETOF_MEMBER(mirror::Object, x_rb_ptr_), "shadow$_x_rb_ptr_");
     addOffset(OFFSETOF_MEMBER(mirror::Object, x_xpadding_), "shadow$_x_xpadding_");
 #endif
@@ -538,8 +539,6 @@
   StringOffsets() : CheckOffsets<mirror::String>(false, "Ljava/lang/String;") {
     addOffset(OFFSETOF_MEMBER(mirror::String, count_), "count");
     addOffset(OFFSETOF_MEMBER(mirror::String, hash_code_), "hashCode");
-    addOffset(OFFSETOF_MEMBER(mirror::String, offset_), "offset");
-    addOffset(OFFSETOF_MEMBER(mirror::String, array_), "value");
   };
 };
 
@@ -736,14 +735,14 @@
   EXPECT_FALSE(JavaLangObject->IsSynthetic());
   EXPECT_EQ(2U, JavaLangObject->NumDirectMethods());
   EXPECT_EQ(11U, JavaLangObject->NumVirtualMethods());
-  if (!kUseBakerOrBrooksReadBarrier) {
+  if (!kUseBrooksReadBarrier) {
     EXPECT_EQ(2U, JavaLangObject->NumInstanceFields());
   } else {
     EXPECT_EQ(4U, JavaLangObject->NumInstanceFields());
   }
   EXPECT_STREQ(JavaLangObject->GetInstanceField(0)->GetName(), "shadow$_klass_");
   EXPECT_STREQ(JavaLangObject->GetInstanceField(1)->GetName(), "shadow$_monitor_");
-  if (kUseBakerOrBrooksReadBarrier) {
+  if (kUseBrooksReadBarrier) {
     EXPECT_STREQ(JavaLangObject->GetInstanceField(2)->GetName(), "shadow$_x_rb_ptr_");
     EXPECT_STREQ(JavaLangObject->GetInstanceField(3)->GetName(), "shadow$_x_xpadding_");
   }
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 9917378..34fdd8d 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -182,7 +182,7 @@
   }
 
 #define TEST_DISABLED_FOR_MIPS() \
-  if (kRuntimeISA == kMips || kRuntimeISA == kMips64) { \
+  if (kRuntimeISA == kMips) { \
     printf("WARNING: TEST DISABLED FOR MIPS\n"); \
     return; \
   }
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index 0808999..b401066 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -283,8 +283,7 @@
 // NoSuchFieldError
 
 void ThrowNoSuchFieldError(const StringPiece& scope, mirror::Class* c,
-                           const StringPiece& type, const StringPiece& name)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+                           const StringPiece& type, const StringPiece& name) {
   std::ostringstream msg;
   std::string temp;
   msg << "No " << scope << "field " << name << " of type " << type
@@ -292,6 +291,13 @@
   ThrowException("Ljava/lang/NoSuchFieldError;", c, msg.str().c_str());
 }
 
+void ThrowNoSuchFieldException(mirror::Class* c, const StringPiece& name) {
+  std::ostringstream msg;
+  std::string temp;
+  msg << "No field " << name << " in class " << c->GetDescriptor(&temp);
+  ThrowException("Ljava/lang/NoSuchFieldException;", c, msg.str().c_str());
+}
+
 // NoSuchMethodError
 
 void ThrowNoSuchMethodError(InvokeType type, mirror::Class* c, const StringPiece& name,
diff --git a/runtime/common_throws.h b/runtime/common_throws.h
index df95cf9..49890e2 100644
--- a/runtime/common_throws.h
+++ b/runtime/common_throws.h
@@ -149,6 +149,9 @@
                            const StringPiece& type, const StringPiece& name)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+void ThrowNoSuchFieldException(mirror::Class* c, const StringPiece& name)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
 // NoSuchMethodError
 
 void ThrowNoSuchMethodError(InvokeType type, mirror::Class* c, const StringPiece& name,
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index f3ce552..852ba49 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -57,6 +57,9 @@
 
 namespace art {
 
+// The key identifying the debugger to update instrumentation.
+static constexpr const char* kDbgInstrumentationKey = "Debugger";
+
 static const size_t kMaxAllocRecordStackDepth = 16;  // Max 255.
 static const size_t kDefaultNumAllocRecords = 64*1024;  // Must be a power of 2. 2BE can hold 64k-1.
 
@@ -232,13 +235,29 @@
   virtual ~DebugInstrumentationListener() {}
 
   void MethodEntered(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
-                     uint32_t dex_pc ATTRIBUTE_UNUSED)
+                     uint32_t dex_pc)
       OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (method->IsNative()) {
       // TODO: post location events is a suspension point and native method entry stubs aren't.
       return;
     }
-    Dbg::UpdateDebugger(thread, this_object, method, 0, Dbg::kMethodEntry, nullptr);
+    if (IsListeningToDexPcMoved()) {
+      // We also listen to kDexPcMoved instrumentation event so we know the DexPcMoved method is
+      // going to be called right after us. To avoid sending JDWP events twice for this location,
+      // we report the event in DexPcMoved. However, we must remind this is method entry so we
+      // send the METHOD_ENTRY event. And we can also group it with other events for this location
+      // like BREAKPOINT or SINGLE_STEP (or even METHOD_EXIT if this is a RETURN instruction).
+      thread->SetDebugMethodEntry();
+    } else if (IsListeningToMethodExit() && IsReturn(method, dex_pc)) {
+      // We also listen to kMethodExited instrumentation event and the current instruction is a
+      // RETURN so we know the MethodExited method is going to be called right after us. To avoid
+      // sending JDWP events twice for this location, we report the event(s) in MethodExited.
+      // However, we must remind this is method entry so we send the METHOD_ENTRY event. And we can
+      // also group it with other events for this location like BREAKPOINT or SINGLE_STEP.
+      thread->SetDebugMethodEntry();
+    } else {
+      Dbg::UpdateDebugger(thread, this_object, method, 0, Dbg::kMethodEntry, nullptr);
+    }
   }
 
   void MethodExited(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
@@ -248,14 +267,20 @@
       // TODO: post location events is a suspension point and native method entry stubs aren't.
       return;
     }
-    Dbg::UpdateDebugger(thread, this_object, method, dex_pc, Dbg::kMethodExit, &return_value);
+    uint32_t events = Dbg::kMethodExit;
+    if (thread->IsDebugMethodEntry()) {
+      // It is also the method entry.
+      DCHECK(IsReturn(method, dex_pc));
+      events |= Dbg::kMethodEntry;
+      thread->ClearDebugMethodEntry();
+    }
+    Dbg::UpdateDebugger(thread, this_object, method, dex_pc, events, &return_value);
   }
 
-  void MethodUnwind(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
-                    uint32_t dex_pc)
+  void MethodUnwind(Thread* thread ATTRIBUTE_UNUSED, mirror::Object* this_object ATTRIBUTE_UNUSED,
+                    mirror::ArtMethod* method, uint32_t dex_pc)
       OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // We're not recorded to listen to this kind of event, so complain.
-    UNUSED(thread, this_object, method, dex_pc);
     LOG(ERROR) << "Unexpected method unwind event in debugger " << PrettyMethod(method)
                << " " << dex_pc;
   }
@@ -263,13 +288,27 @@
   void DexPcMoved(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
                   uint32_t new_dex_pc)
       OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    Dbg::UpdateDebugger(thread, this_object, method, new_dex_pc, 0, nullptr);
+    if (IsListeningToMethodExit() && IsReturn(method, new_dex_pc)) {
+      // We also listen to kMethodExited instrumentation event and the current instruction is a
+      // RETURN so we know the MethodExited method is going to be called right after us. Like in
+      // MethodEntered, we delegate event reporting to MethodExited.
+      // Besides, if this RETURN instruction is the only one in the method, we can send multiple
+      // JDWP events in the same packet: METHOD_ENTRY, METHOD_EXIT, BREAKPOINT and/or SINGLE_STEP.
+      // Therefore, we must not clear the debug method entry flag here.
+    } else {
+      uint32_t events = 0;
+      if (thread->IsDebugMethodEntry()) {
+        // It is also the method entry.
+        events = Dbg::kMethodEntry;
+        thread->ClearDebugMethodEntry();
+      }
+      Dbg::UpdateDebugger(thread, this_object, method, new_dex_pc, events, nullptr);
+    }
   }
 
-  void FieldRead(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
-                 uint32_t dex_pc, ArtField* field)
+  void FieldRead(Thread* thread ATTRIBUTE_UNUSED, mirror::Object* this_object,
+                 mirror::ArtMethod* method, uint32_t dex_pc, ArtField* field)
       OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    UNUSED(thread);
     Dbg::PostFieldAccessEvent(method, dex_pc, this_object, field);
   }
 
@@ -293,6 +332,26 @@
   }
 
  private:
+  static bool IsReturn(mirror::ArtMethod* method, uint32_t dex_pc)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    const DexFile::CodeItem* code_item = method->GetCodeItem();
+    const Instruction* instruction = Instruction::At(&code_item->insns_[dex_pc]);
+    return instruction->IsReturn();
+  }
+
+  static bool IsListeningToDexPcMoved() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return IsListeningTo(instrumentation::Instrumentation::kDexPcMoved);
+  }
+
+  static bool IsListeningToMethodExit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return IsListeningTo(instrumentation::Instrumentation::kMethodExited);
+  }
+
+  static bool IsListeningTo(instrumentation::Instrumentation::InstrumentationEvent event)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return (Dbg::GetInstrumentationEvents() & event) != 0;
+  }
+
   DISALLOW_COPY_AND_ASSIGN(DebugInstrumentationListener);
 } gDebugInstrumentationListener;
 
@@ -352,7 +411,7 @@
 }
 
 void SingleStepControl::VisitRoots(RootVisitor* visitor, const RootInfo& root_info) {
-  visitor->VisitRootIfNonNull(reinterpret_cast<mirror::Object**>(&method_), root_info);
+  method_.VisitRootIfNonNull(visitor, root_info);
 }
 
 void SingleStepControl::AddDexPc(uint32_t dex_pc) {
@@ -677,7 +736,7 @@
       instrumentation_events_ = 0;
     }
     if (RequiresDeoptimization()) {
-      runtime->GetInstrumentation()->DisableDeoptimization();
+      runtime->GetInstrumentation()->DisableDeoptimization(kDbgInstrumentationKey);
     }
     gDebuggerActive = false;
   }
@@ -831,8 +890,10 @@
                         std::vector<JDWP::ObjectId>* monitor_vector,
                         std::vector<uint32_t>* stack_depth_vector)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, context), current_stack_depth(0),
-        monitors(monitor_vector), stack_depths(stack_depth_vector) {}
+      : StackVisitor(thread, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        current_stack_depth(0),
+        monitors(monitor_vector),
+        stack_depths(stack_depth_vector) {}
 
     // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
     // annotalysis.
@@ -2082,6 +2143,7 @@
     case kWaitingForDebuggerToAttach:
     case kWaitingForDeoptimization:
     case kWaitingForGcToComplete:
+    case kWaitingForGetObjectsAllocated:
     case kWaitingForJniOnLoad:
     case kWaitingForMethodTracingStart:
     case kWaitingForSignalCatcherOutput:
@@ -2193,7 +2255,8 @@
 static int GetStackDepth(Thread* thread) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   struct CountStackDepthVisitor : public StackVisitor {
     explicit CountStackDepthVisitor(Thread* thread_in)
-        : StackVisitor(thread_in, nullptr), depth(0) {}
+        : StackVisitor(thread_in, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+          depth(0) {}
 
     // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
     // annotalysis.
@@ -2233,8 +2296,11 @@
     GetFrameVisitor(Thread* thread, size_t start_frame_in, size_t frame_count_in,
                     JDWP::ExpandBuf* buf_in)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-        : StackVisitor(thread, nullptr), depth_(0),
-          start_frame_(start_frame_in), frame_count_(frame_count_in), buf_(buf_in) {
+        : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+          depth_(0),
+          start_frame_(start_frame_in),
+          frame_count_(frame_count_in),
+          buf_(buf_in) {
       expandBufAdd4BE(buf_, frame_count_);
     }
 
@@ -2351,7 +2417,9 @@
 struct GetThisVisitor : public StackVisitor {
   GetThisVisitor(Thread* thread, Context* context, JDWP::FrameId frame_id_in)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, context), this_object(nullptr), frame_id(frame_id_in) {}
+      : StackVisitor(thread, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        this_object(nullptr),
+        frame_id(frame_id_in) {}
 
   // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
   // annotalysis.
@@ -2391,7 +2459,9 @@
  public:
   FindFrameVisitor(Thread* thread, Context* context, JDWP::FrameId frame_id)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, context), frame_id_(frame_id), error_(JDWP::ERR_INVALID_FRAMEID) {}
+      : StackVisitor(thread, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        frame_id_(frame_id),
+        error_(JDWP::ERR_INVALID_FRAMEID) {}
 
   // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
   // annotalysis.
@@ -2775,7 +2845,7 @@
  public:
   CatchLocationFinder(Thread* self, const Handle<mirror::Throwable>& exception, Context* context)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-    : StackVisitor(self, context),
+    : StackVisitor(self, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
       self_(self),
       exception_(exception),
       handle_scope_(self),
@@ -2859,10 +2929,11 @@
   if (!IsDebuggerActive()) {
     return;
   }
-  StackHandleScope<1> handle_scope(Thread::Current());
+  Thread* const self = Thread::Current();
+  StackHandleScope<1> handle_scope(self);
   Handle<mirror::Throwable> h_exception(handle_scope.NewHandle(exception_object));
   std::unique_ptr<Context> context(Context::Create());
-  CatchLocationFinder clf(Thread::Current(), h_exception, context.get());
+  CatchLocationFinder clf(self, h_exception, context.get());
   clf.WalkStack(/* include_transitions */ false);
   JDWP::EventLocation exception_throw_location;
   SetEventLocation(&exception_throw_location, clf.GetThrowMethod(), clf.GetThrowDexPc());
@@ -2998,12 +3069,12 @@
       break;
     case DeoptimizationRequest::kFullDeoptimization:
       VLOG(jdwp) << "Deoptimize the world ...";
-      instrumentation->DeoptimizeEverything();
+      instrumentation->DeoptimizeEverything(kDbgInstrumentationKey);
       VLOG(jdwp) << "Deoptimize the world DONE";
       break;
     case DeoptimizationRequest::kFullUndeoptimization:
       VLOG(jdwp) << "Undeoptimize the world ...";
-      instrumentation->UndeoptimizeEverything();
+      instrumentation->UndeoptimizeEverything(kDbgInstrumentationKey);
       VLOG(jdwp) << "Undeoptimize the world DONE";
       break;
     case DeoptimizationRequest::kSelectiveDeoptimization:
@@ -3523,8 +3594,10 @@
   // is for step-out.
   struct SingleStepStackVisitor : public StackVisitor {
     explicit SingleStepStackVisitor(Thread* thread) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-        : StackVisitor(thread, nullptr), stack_depth(0), method(nullptr), line_number(-1) {
-    }
+        : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+          stack_depth(0),
+          method(nullptr),
+          line_number(-1) {}
 
     // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
     // annotalysis.
@@ -3909,7 +3982,7 @@
   Handle<mirror::Object> object_result = hs.NewHandle(is_object_result ? result.GetL() : nullptr);
   Handle<mirror::Throwable> exception = hs.NewHandle(soa.Self()->GetException());
   soa.Self()->ClearException();
-  pReq->exception = gRegistry->Add(exception.Get());
+  pReq->exception = gRegistry->Add(exception);
   if (pReq->exception != 0) {
     VLOG(jdwp) << "  JDWP invocation returning with exception=" << exception.Get()
                << " " << exception->Dump();
@@ -4078,7 +4151,7 @@
     StackHandleScope<1> hs(soa.Self());
     Handle<mirror::String> name(hs.NewHandle(t->GetThreadName(soa)));
     size_t char_count = (name.Get() != nullptr) ? name->GetLength() : 0;
-    const jchar* chars = (name.Get() != nullptr) ? name->GetCharArray()->GetData() : nullptr;
+    const jchar* chars = (name.Get() != nullptr) ? name->GetValue() : nullptr;
 
     std::vector<uint8_t> bytes;
     JDWP::Append4BE(bytes, t->GetThreadId());
@@ -4637,7 +4710,9 @@
 struct AllocRecordStackVisitor : public StackVisitor {
   AllocRecordStackVisitor(Thread* thread, AllocRecord* record_in)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, nullptr), record(record_in), depth(0) {}
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        record(record_in),
+        depth(0) {}
 
   // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
   // annotalysis.
@@ -4774,7 +4849,7 @@
     for (const std::string& str : table_) {
       const char* s = str.c_str();
       size_t s_len = CountModifiedUtf8Chars(s);
-      std::unique_ptr<uint16_t> s_utf16(new uint16_t[s_len]);
+      std::unique_ptr<uint16_t[]> s_utf16(new uint16_t[s_len]);
       ConvertModifiedUtf8ToUtf16(s_utf16.get(), s);
       JDWP::AppendUtf16BE(bytes, s_utf16.get(), s_len);
     }
diff --git a/runtime/debugger.h b/runtime/debugger.h
index fe90eb6..811d345 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -109,8 +109,8 @@
     return stack_depth_;
   }
 
-  mirror::ArtMethod* GetMethod() const {
-    return method_;
+  mirror::ArtMethod* GetMethod() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return method_.Read();
   }
 
   const std::set<uint32_t>& GetDexPcs() const {
@@ -138,7 +138,7 @@
   // set of DEX pcs associated to the source line number where the suspension occurred.
   // This is used to support SD_INTO and SD_OVER single-step depths so we detect when a single-step
   // causes the execution of an instruction in a different method or at a different line number.
-  mirror::ArtMethod* method_;
+  GcRoot<mirror::ArtMethod> method_;
   std::set<uint32_t> dex_pcs_;
 
   DISALLOW_COPY_AND_ASSIGN(SingleStepControl);
@@ -714,6 +714,10 @@
 
   static JDWP::JdwpState* GetJdwpState();
 
+  static uint32_t GetInstrumentationEvents() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return instrumentation_events_;
+  }
+
  private:
   static JDWP::JdwpError GetLocalValue(const StackVisitor& visitor,
                                        ScopedObjectAccessUnchecked& soa, int slot,
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 0589cdd..dfe5a04 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -153,6 +153,31 @@
   return false;
 }
 
+static bool ContainsClassesDex(int fd, const char* filename) {
+  std::string error_msg;
+  std::unique_ptr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(fd, filename, &error_msg));
+  if (zip_archive.get() == nullptr) {
+    return false;
+  }
+  std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find(DexFile::kClassesDex, &error_msg));
+  return (zip_entry.get() != nullptr);
+}
+
+bool DexFile::MaybeDex(const char* filename) {
+  uint32_t magic;
+  std::string error_msg;
+  ScopedFd fd(OpenAndReadMagic(filename, &magic, &error_msg));
+  if (fd.get() == -1) {
+    return false;
+  }
+  if (IsZipMagic(magic)) {
+    return ContainsClassesDex(fd.release(), filename);
+  } else if (IsDexMagic(magic)) {
+    return true;
+  }
+  return false;
+}
+
 int DexFile::GetPermissions() const {
   if (mem_map_.get() == nullptr) {
     return 0;
@@ -296,6 +321,12 @@
   return dex_file;
 }
 
+// Technically we do not have a limitation with respect to the number of dex files that can be in a
+// multidex APK. However, it's bad practice, as each dex file requires its own tables for symbols
+// (types, classes, methods, ...) and dex caches. So warn the user that we open a zip with what
+// seems an excessive number.
+static constexpr size_t kWarnOnManyDexFilesThreshold = 100;
+
 bool DexFile::OpenFromZip(const ZipArchive& zip_archive, const std::string& location,
                           std::string* error_msg,
                           std::vector<std::unique_ptr<const DexFile>>* dex_files) {
@@ -310,14 +341,13 @@
     dex_files->push_back(std::move(dex_file));
 
     // Now try some more.
-    size_t i = 2;
 
     // We could try to avoid std::string allocations by working on a char array directly. As we
     // do not expect a lot of iterations, this seems too involved and brittle.
 
-    while (i < 100) {
-      std::string name = StringPrintf("classes%zu.dex", i);
-      std::string fake_location = location + kMultiDexSeparator + name;
+    for (size_t i = 1; ; ++i) {
+      std::string name = GetMultiDexClassesDexName(i);
+      std::string fake_location = GetMultiDexLocation(i, location.c_str());
       std::unique_ptr<const DexFile> next_dex_file(Open(zip_archive, name.c_str(), fake_location,
                                                         error_msg, &error_code));
       if (next_dex_file.get() == nullptr) {
@@ -329,7 +359,16 @@
         dex_files->push_back(std::move(next_dex_file));
       }
 
-      i++;
+      if (i == kWarnOnManyDexFilesThreshold) {
+        LOG(WARNING) << location << " has in excess of " << kWarnOnManyDexFilesThreshold
+                     << " dex files. Please consider coalescing and shrinking the number to "
+                        " avoid runtime overhead.";
+      }
+
+      if (i == std::numeric_limits<size_t>::max()) {
+        LOG(ERROR) << "Overflow in number of dex files!";
+        break;
+      }
     }
 
     return true;
@@ -973,11 +1012,19 @@
   return strrchr(location, kMultiDexSeparator) != nullptr;
 }
 
-std::string DexFile::GetMultiDexClassesDexName(size_t number, const char* dex_location) {
-  if (number == 0) {
+std::string DexFile::GetMultiDexClassesDexName(size_t index) {
+  if (index == 0) {
+    return "classes.dex";
+  } else {
+    return StringPrintf("classes%zu.dex", index + 1);
+  }
+}
+
+std::string DexFile::GetMultiDexLocation(size_t index, const char* dex_location) {
+  if (index == 0) {
     return dex_location;
   } else {
-    return StringPrintf("%s" kMultiDexSeparatorString "classes%zu.dex", dex_location, number + 1);
+    return StringPrintf("%s" kMultiDexSeparatorString "classes%zu.dex", dex_location, index + 1);
   }
 }
 
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 0d07358..84eaa4a 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -388,6 +388,10 @@
   static bool Open(const char* filename, const char* location, std::string* error_msg,
                    std::vector<std::unique_ptr<const DexFile>>* dex_files);
 
+  // Checks whether the given file has the dex magic, or is a zip file with a classes.dex entry.
+  // If this function returns false, Open will not succeed. The inverse is not true, however.
+  static bool MaybeDex(const char* filename);
+
   // Opens .dex file, backed by existing memory
   static std::unique_ptr<const DexFile> Open(const uint8_t* base, size_t size,
                                              const std::string& location,
@@ -888,7 +892,13 @@
     return size_;
   }
 
-  static std::string GetMultiDexClassesDexName(size_t number, const char* dex_location);
+  // Return the name of the index-th classes.dex in a multidex zip file. This is classes.dex for
+  // index == 0, and classes{index + 1}.dex else.
+  static std::string GetMultiDexClassesDexName(size_t index);
+
+  // Return the (possibly synthetic) dex location for a multidex entry. This is dex_location for
+  // index == 0, and dex_location + multi-dex-separator + GetMultiDexClassesDexName(index) else.
+  static std::string GetMultiDexLocation(size_t index, const char* dex_location);
 
   // Returns the canonical form of the given dex location.
   //
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index 4d099e1..90b35a3 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -350,11 +350,20 @@
 }
 
 TEST_F(DexFileTest, GetMultiDexClassesDexName) {
+  ASSERT_EQ("classes.dex", DexFile::GetMultiDexClassesDexName(0));
+  ASSERT_EQ("classes2.dex", DexFile::GetMultiDexClassesDexName(1));
+  ASSERT_EQ("classes3.dex", DexFile::GetMultiDexClassesDexName(2));
+  ASSERT_EQ("classes100.dex", DexFile::GetMultiDexClassesDexName(99));
+}
+
+TEST_F(DexFileTest, GetMultiDexLocation) {
   std::string dex_location_str = "/system/app/framework.jar";
   const char* dex_location = dex_location_str.c_str();
-  ASSERT_EQ("/system/app/framework.jar", DexFile::GetMultiDexClassesDexName(0, dex_location));
-  ASSERT_EQ("/system/app/framework.jar:classes2.dex", DexFile::GetMultiDexClassesDexName(1, dex_location));
-  ASSERT_EQ("/system/app/framework.jar:classes101.dex", DexFile::GetMultiDexClassesDexName(100, dex_location));
+  ASSERT_EQ("/system/app/framework.jar", DexFile::GetMultiDexLocation(0, dex_location));
+  ASSERT_EQ("/system/app/framework.jar:classes2.dex",
+            DexFile::GetMultiDexLocation(1, dex_location));
+  ASSERT_EQ("/system/app/framework.jar:classes101.dex",
+            DexFile::GetMultiDexLocation(100, dex_location));
 }
 
 TEST_F(DexFileTest, GetDexCanonicalLocation) {
@@ -363,7 +372,7 @@
   std::string dex_location(dex_location_real.get());
 
   ASSERT_EQ(dex_location, DexFile::GetDexCanonicalLocation(dex_location.c_str()));
-  std::string multidex_location = DexFile::GetMultiDexClassesDexName(1, dex_location.c_str());
+  std::string multidex_location = DexFile::GetMultiDexLocation(1, dex_location.c_str());
   ASSERT_EQ(multidex_location, DexFile::GetDexCanonicalLocation(multidex_location.c_str()));
 
   std::string dex_location_sym = dex_location + "symlink";
@@ -371,7 +380,7 @@
 
   ASSERT_EQ(dex_location, DexFile::GetDexCanonicalLocation(dex_location_sym.c_str()));
 
-  std::string multidex_location_sym = DexFile::GetMultiDexClassesDexName(1, dex_location_sym.c_str());
+  std::string multidex_location_sym = DexFile::GetMultiDexLocation(1, dex_location_sym.c_str());
   ASSERT_EQ(multidex_location, DexFile::GetDexCanonicalLocation(multidex_location_sym.c_str()));
 
   ASSERT_EQ(0, unlink(dex_location_sym.c_str()));
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 2603975..a66c38e 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -944,7 +944,7 @@
         uint32_t type_idx = DecodeUnsignedLeb128(&ptr_);
         if (type_idx != 0) {
           type_idx--;
-          if (!CheckIndex(type_idx, header_->string_ids_size_, "DBG_START_LOCAL type_idx")) {
+          if (!CheckIndex(type_idx, header_->type_ids_size_, "DBG_START_LOCAL type_idx")) {
             return false;
           }
         }
@@ -975,7 +975,7 @@
         uint32_t type_idx = DecodeUnsignedLeb128(&ptr_);
         if (type_idx != 0) {
           type_idx--;
-          if (!CheckIndex(type_idx, header_->string_ids_size_, "DBG_START_LOCAL_EXTENDED type_idx")) {
+          if (!CheckIndex(type_idx, header_->type_ids_size_, "DBG_START_LOCAL_EXTENDED type_idx")) {
             return false;
           }
         }
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index 95a47cc..9f1ffec 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -200,11 +200,11 @@
   return dex_file;
 }
 
-static bool ModifyAndLoad(const char* location, size_t offset, uint8_t new_val,
-                                    std::string* error_msg) {
+static bool ModifyAndLoad(const char* dex_file_content, const char* location, size_t offset,
+                          uint8_t new_val, std::string* error_msg) {
   // Decode base64.
   size_t length;
-  std::unique_ptr<uint8_t[]> dex_bytes(DecodeBase64(kGoodTestDex, &length));
+  std::unique_ptr<uint8_t[]> dex_bytes(DecodeBase64(dex_file_content, &length));
   CHECK(dex_bytes.get() != nullptr);
 
   // Make modifications.
@@ -221,7 +221,7 @@
     // Class error.
     ScratchFile tmp;
     std::string error_msg;
-    bool success = !ModifyAndLoad(tmp.GetFilename().c_str(), 220, 0xFFU, &error_msg);
+    bool success = !ModifyAndLoad(kGoodTestDex, tmp.GetFilename().c_str(), 220, 0xFFU, &error_msg);
     ASSERT_TRUE(success);
     ASSERT_NE(error_msg.find("inter_method_id_item class_idx"), std::string::npos) << error_msg;
   }
@@ -230,7 +230,7 @@
     // Proto error.
     ScratchFile tmp;
     std::string error_msg;
-    bool success = !ModifyAndLoad(tmp.GetFilename().c_str(), 222, 0xFFU, &error_msg);
+    bool success = !ModifyAndLoad(kGoodTestDex, tmp.GetFilename().c_str(), 222, 0xFFU, &error_msg);
     ASSERT_TRUE(success);
     ASSERT_NE(error_msg.find("inter_method_id_item proto_idx"), std::string::npos) << error_msg;
   }
@@ -239,10 +239,81 @@
     // Name error.
     ScratchFile tmp;
     std::string error_msg;
-    bool success = !ModifyAndLoad(tmp.GetFilename().c_str(), 224, 0xFFU, &error_msg);
+    bool success = !ModifyAndLoad(kGoodTestDex, tmp.GetFilename().c_str(), 224, 0xFFU, &error_msg);
     ASSERT_TRUE(success);
     ASSERT_NE(error_msg.find("inter_method_id_item name_idx"), std::string::npos) << error_msg;
   }
 }
 
+// Generated from:
+//
+// .class public LTest;
+// .super Ljava/lang/Object;
+// .source "Test.java"
+//
+// .method public constructor <init>()V
+//     .registers 1
+//
+//     .prologue
+//     .line 1
+//     invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+//
+//     return-void
+// .end method
+//
+// .method public static main()V
+//     .registers 2
+//
+//     const-string v0, "a"
+//     const-string v0, "b"
+//     const-string v0, "c"
+//     const-string v0, "d"
+//     const-string v0, "e"
+//     const-string v0, "f"
+//     const-string v0, "g"
+//     const-string v0, "h"
+//     const-string v0, "i"
+//     const-string v0, "j"
+//     const-string v0, "k"
+//
+//     .local v1, "local_var":Ljava/lang/String;
+//     const-string v1, "test"
+// .end method
+
+static const char kDebugInfoTestDex[] =
+    "ZGV4CjAzNQCHRkHix2eIMQgvLD/0VGrlllZLo0Rb6VyUAgAAcAAAAHhWNBIAAAAAAAAAAAwCAAAU"
+    "AAAAcAAAAAQAAADAAAAAAQAAANAAAAAAAAAAAAAAAAMAAADcAAAAAQAAAPQAAACAAQAAFAEAABQB"
+    "AAAcAQAAJAEAADgBAABMAQAAVwEAAFoBAABdAQAAYAEAAGMBAABmAQAAaQEAAGwBAABvAQAAcgEA"
+    "AHUBAAB4AQAAewEAAIYBAACMAQAAAQAAAAIAAAADAAAABQAAAAUAAAADAAAAAAAAAAAAAAAAAAAA"
+    "AAAAABIAAAABAAAAAAAAAAAAAAABAAAAAQAAAAAAAAAEAAAAAAAAAPwBAAAAAAAABjxpbml0PgAG"
+    "TFRlc3Q7ABJMamF2YS9sYW5nL09iamVjdDsAEkxqYXZhL2xhbmcvU3RyaW5nOwAJVGVzdC5qYXZh"
+    "AAFWAAFhAAFiAAFjAAFkAAFlAAFmAAFnAAFoAAFpAAFqAAFrAAlsb2NhbF92YXIABG1haW4ABHRl"
+    "c3QAAAABAAcOAAAAARYDARIDAAAAAQABAAEAAACUAQAABAAAAHAQAgAAAA4AAgAAAAAAAACZAQAA"
+    "GAAAABoABgAaAAcAGgAIABoACQAaAAoAGgALABoADAAaAA0AGgAOABoADwAaABAAGgETAAAAAgAA"
+    "gYAEpAMBCbwDAAALAAAAAAAAAAEAAAAAAAAAAQAAABQAAABwAAAAAgAAAAQAAADAAAAAAwAAAAEA"
+    "AADQAAAABQAAAAMAAADcAAAABgAAAAEAAAD0AAAAAiAAABQAAAAUAQAAAyAAAAIAAACUAQAAASAA"
+    "AAIAAACkAQAAACAAAAEAAAD8AQAAABAAAAEAAAAMAgAA";
+
+TEST_F(DexFileVerifierTest, DebugInfoTypeIdxTest) {
+  {
+    // The input dex file should be good before modification.
+    ScratchFile tmp;
+    std::string error_msg;
+    std::unique_ptr<const DexFile> raw(OpenDexFileBase64(kDebugInfoTestDex,
+                                                         tmp.GetFilename().c_str(),
+                                                         &error_msg));
+    ASSERT_TRUE(raw.get() != nullptr) << error_msg;
+  }
+
+  {
+    // Modify the debug information entry.
+    ScratchFile tmp;
+    std::string error_msg;
+    bool success = !ModifyAndLoad(kDebugInfoTestDex, tmp.GetFilename().c_str(), 416, 0x14U,
+                                  &error_msg);
+    ASSERT_TRUE(success);
+    ASSERT_NE(error_msg.find("DBG_START_LOCAL type_idx"), std::string::npos) << error_msg;
+  }
+}
+
 }  // namespace art
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index e909e64..0c5210d 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -1080,9 +1080,9 @@
 
 // Base on bionic phdr_table_get_load_size
 template <typename ElfTypes>
-size_t ElfFileImpl<ElfTypes>::GetLoadedSize() const {
-  Elf_Addr min_vaddr = 0xFFFFFFFFu;
-  Elf_Addr max_vaddr = 0x00000000u;
+bool ElfFileImpl<ElfTypes>::GetLoadedSize(size_t* size, std::string* error_msg) const {
+  Elf_Addr min_vaddr = static_cast<Elf_Addr>(-1);
+  Elf_Addr max_vaddr = 0u;
   for (Elf_Word i = 0; i < GetProgramHeaderNum(); i++) {
     Elf_Phdr* program_header = GetProgramHeader(i);
     if (program_header->p_type != PT_LOAD) {
@@ -1093,6 +1093,15 @@
        min_vaddr = begin_vaddr;
     }
     Elf_Addr end_vaddr = program_header->p_vaddr + program_header->p_memsz;
+    if (UNLIKELY(begin_vaddr > end_vaddr)) {
+      std::ostringstream oss;
+      oss << "Program header #" << i << " has overflow in p_vaddr+p_memsz: 0x" << std::hex
+          << program_header->p_vaddr << "+0x" << program_header->p_memsz << "=0x" << end_vaddr
+          << " in ELF file \"" << file_->GetPath() << "\"";
+      *error_msg = oss.str();
+      *size = static_cast<size_t>(-1);
+      return false;
+    }
     if (end_vaddr > max_vaddr) {
       max_vaddr = end_vaddr;
     }
@@ -1100,8 +1109,18 @@
   min_vaddr = RoundDown(min_vaddr, kPageSize);
   max_vaddr = RoundUp(max_vaddr, kPageSize);
   CHECK_LT(min_vaddr, max_vaddr) << file_->GetPath();
-  size_t loaded_size = max_vaddr - min_vaddr;
-  return loaded_size;
+  Elf_Addr loaded_size = max_vaddr - min_vaddr;
+  // Check that the loaded_size fits in size_t.
+  if (UNLIKELY(loaded_size > std::numeric_limits<size_t>::max())) {
+    std::ostringstream oss;
+    oss << "Loaded size is 0x" << std::hex << loaded_size << " but maximum size_t is 0x"
+        << std::numeric_limits<size_t>::max() << " for ELF file \"" << file_->GetPath() << "\"";
+    *error_msg = oss.str();
+    *size = static_cast<size_t>(-1);
+    return false;
+  }
+  *size = loaded_size;
+  return true;
 }
 
 template <typename ElfTypes>
@@ -1164,9 +1183,14 @@
       }
       std::string reservation_name("ElfFile reservation for ");
       reservation_name += file_->GetPath();
+      size_t loaded_size;
+      if (!GetLoadedSize(&loaded_size, error_msg)) {
+        DCHECK(!error_msg->empty());
+        return false;
+      }
       std::unique_ptr<MemMap> reserve(MemMap::MapAnonymous(reservation_name.c_str(),
                                                            reserve_base_override,
-                                                           GetLoadedSize(), PROT_NONE, false, false,
+                                                           loaded_size, PROT_NONE, false, false,
                                                            error_msg));
       if (reserve.get() == nullptr) {
         *error_msg = StringPrintf("Failed to allocate %s: %s",
@@ -1915,8 +1939,8 @@
   DELEGATE_TO_IMPL(FindSymbolAddress, section_type, symbol_name, build_map);
 }
 
-size_t ElfFile::GetLoadedSize() const {
-  DELEGATE_TO_IMPL(GetLoadedSize);
+bool ElfFile::GetLoadedSize(size_t* size, std::string* error_msg) const {
+  DELEGATE_TO_IMPL(GetLoadedSize, size, error_msg);
 }
 
 bool ElfFile::Strip(File* file, std::string* error_msg) {
diff --git a/runtime/elf_file.h b/runtime/elf_file.h
index fe6896d..48cb4b8 100644
--- a/runtime/elf_file.h
+++ b/runtime/elf_file.h
@@ -66,7 +66,7 @@
                              const std::string& symbol_name,
                              bool build_map);
 
-  size_t GetLoadedSize() const;
+  bool GetLoadedSize(size_t* size, std::string* error_msg) const;
 
   // Strip an ELF file of unneeded debugging information.
   // Returns true on success, false on failure.
diff --git a/runtime/elf_file_impl.h b/runtime/elf_file_impl.h
index 80950c6..3ad096f 100644
--- a/runtime/elf_file_impl.h
+++ b/runtime/elf_file_impl.h
@@ -106,8 +106,8 @@
   Elf_Word GetRelaNum(Elf_Shdr&) const;
   Elf_Rela& GetRela(Elf_Shdr&, Elf_Word) const;
 
-  // Returns the expected size when the file is loaded at runtime
-  size_t GetLoadedSize() const;
+  // Retrieves the expected size when the file is loaded at runtime. Returns true if successful.
+  bool GetLoadedSize(size_t* size, std::string* error_msg) const;
 
   // Load segments into memory based on PT_LOAD program headers.
   // executable is true at run time, false at compile time.
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 64b7ecd..9292cff 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -22,6 +22,8 @@
 #include "class_linker-inl.h"
 #include "common_throws.h"
 #include "dex_file.h"
+#include "entrypoints/quick/callee_save_frame.h"
+#include "handle_scope-inl.h"
 #include "indirect_reference_table.h"
 #include "invoke_type.h"
 #include "jni_internal.h"
@@ -30,11 +32,31 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/throwable.h"
-#include "handle_scope-inl.h"
+#include "nth_caller_visitor.h"
+#include "runtime.h"
 #include "thread.h"
 
 namespace art {
 
+inline mirror::ArtMethod* GetCalleeSaveMethodCaller(Thread* self, Runtime::CalleeSaveType type)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  auto* refs_only_sp = self->GetManagedStack()->GetTopQuickFrame();
+  DCHECK_EQ(refs_only_sp->AsMirrorPtr(), Runtime::Current()->GetCalleeSaveMethod(type));
+
+  const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA, type);
+  auto* caller_sp = reinterpret_cast<StackReference<mirror::ArtMethod>*>(
+          reinterpret_cast<uintptr_t>(refs_only_sp) + callee_frame_size);
+  auto* caller = caller_sp->AsMirrorPtr();
+
+  if (kIsDebugBuild) {
+    NthCallerVisitor visitor(self, 1, true);
+    visitor.WalkStack();
+    CHECK(caller == visitor.caller);
+  }
+
+  return caller;
+}
+
 template <const bool kAccessCheck>
 ALWAYS_INLINE
 inline mirror::Class* CheckObjectAlloc(uint32_t type_idx,
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index c049e3d..fa129af 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -153,6 +153,32 @@
   } else { \
     return CheckAndAllocArrayFromCodeInstrumented(type_idx, component_count, method, self, true, allocator_type); \
   } \
+} \
+extern "C" mirror::String* artAllocStringFromBytesFromCode##suffix##suffix2( \
+    mirror::ByteArray* byte_array, int32_t high, int32_t offset, int32_t byte_count, \
+    Thread* self) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  ScopedQuickEntrypointChecks sqec(self); \
+  StackHandleScope<1> hs(self); \
+  Handle<mirror::ByteArray> handle_array(hs.NewHandle(byte_array)); \
+  return mirror::String::AllocFromByteArray<instrumented_bool>(self, byte_count, handle_array, \
+                                                               offset, high, allocator_type); \
+} \
+extern "C" mirror::String* artAllocStringFromCharsFromCode##suffix##suffix2( \
+    int32_t offset, int32_t char_count, mirror::CharArray* char_array, Thread* self) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  StackHandleScope<1> hs(self); \
+  Handle<mirror::CharArray> handle_array(hs.NewHandle(char_array)); \
+  return mirror::String::AllocFromCharArray<instrumented_bool>(self, char_count, handle_array, \
+                                                               offset, allocator_type); \
+} \
+extern "C" mirror::String* artAllocStringFromStringFromCode##suffix##suffix2( \
+    mirror::String* string, Thread* self) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  StackHandleScope<1> hs(self); \
+  Handle<mirror::String> handle_string(hs.NewHandle(string)); \
+  return mirror::String::AllocFromString<instrumented_bool>(self, handle_string->GetLength(), \
+                                                            handle_string, 0, allocator_type); \
 }
 
 #define GENERATE_ENTRYPOINTS_FOR_ALLOCATOR(suffix, allocator_type) \
@@ -176,6 +202,9 @@
 extern "C" void* art_quick_alloc_object_with_access_check##suffix(uint32_t type_idx, mirror::ArtMethod* ref); \
 extern "C" void* art_quick_check_and_alloc_array##suffix(uint32_t, int32_t, mirror::ArtMethod* ref); \
 extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix(uint32_t, int32_t, mirror::ArtMethod* ref); \
+extern "C" void* art_quick_alloc_string_from_bytes##suffix(void*, int32_t, int32_t, int32_t); \
+extern "C" void* art_quick_alloc_string_from_chars##suffix(int32_t, int32_t, void*); \
+extern "C" void* art_quick_alloc_string_from_string##suffix(void*); \
 extern "C" void* art_quick_alloc_array##suffix##_instrumented(uint32_t, int32_t, mirror::ArtMethod* ref); \
 extern "C" void* art_quick_alloc_array_resolved##suffix##_instrumented(mirror::Class* klass, int32_t, mirror::ArtMethod* ref); \
 extern "C" void* art_quick_alloc_array_with_access_check##suffix##_instrumented(uint32_t, int32_t, mirror::ArtMethod* ref); \
@@ -185,6 +214,9 @@
 extern "C" void* art_quick_alloc_object_with_access_check##suffix##_instrumented(uint32_t type_idx, mirror::ArtMethod* ref); \
 extern "C" void* art_quick_check_and_alloc_array##suffix##_instrumented(uint32_t, int32_t, mirror::ArtMethod* ref); \
 extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix##_instrumented(uint32_t, int32_t, mirror::ArtMethod* ref); \
+extern "C" void* art_quick_alloc_string_from_bytes##suffix##_instrumented(void*, int32_t, int32_t, int32_t); \
+extern "C" void* art_quick_alloc_string_from_chars##suffix##_instrumented(int32_t, int32_t, void*); \
+extern "C" void* art_quick_alloc_string_from_string##suffix##_instrumented(void*); \
 void SetQuickAllocEntryPoints##suffix(QuickEntryPoints* qpoints, bool instrumented) { \
   if (instrumented) { \
     qpoints->pAllocArray = art_quick_alloc_array##suffix##_instrumented; \
@@ -196,6 +228,9 @@
     qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check##suffix##_instrumented; \
     qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array##suffix##_instrumented; \
     qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check##suffix##_instrumented; \
+    qpoints->pAllocStringFromBytes = art_quick_alloc_string_from_bytes##suffix##_instrumented; \
+    qpoints->pAllocStringFromChars = art_quick_alloc_string_from_chars##suffix##_instrumented; \
+    qpoints->pAllocStringFromString = art_quick_alloc_string_from_string##suffix##_instrumented; \
   } else { \
     qpoints->pAllocArray = art_quick_alloc_array##suffix; \
     qpoints->pAllocArrayResolved = art_quick_alloc_array_resolved##suffix; \
@@ -206,6 +241,9 @@
     qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check##suffix; \
     qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array##suffix; \
     qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check##suffix; \
+    qpoints->pAllocStringFromBytes = art_quick_alloc_string_from_bytes##suffix; \
+    qpoints->pAllocStringFromChars = art_quick_alloc_string_from_chars##suffix; \
+    qpoints->pAllocStringFromString = art_quick_alloc_string_from_string##suffix; \
   } \
 }
 
diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h
index b7e8d50..1fd8a949a 100644
--- a/runtime/entrypoints/quick/quick_default_externs.h
+++ b/runtime/entrypoints/quick/quick_default_externs.h
@@ -34,10 +34,10 @@
 extern "C" void art_quick_check_cast(const art::mirror::Class*, const art::mirror::Class*);
 
 // DexCache entrypoints.
-extern "C" void* art_quick_initialize_static_storage(uint32_t, art::mirror::ArtMethod*);
-extern "C" void* art_quick_initialize_type(uint32_t, art::mirror::ArtMethod*);
-extern "C" void* art_quick_initialize_type_and_verify_access(uint32_t, art::mirror::ArtMethod*);
-extern "C" void* art_quick_resolve_string(uint32_t, art::mirror::ArtMethod*);
+extern "C" void* art_quick_initialize_static_storage(uint32_t);
+extern "C" void* art_quick_initialize_type(uint32_t);
+extern "C" void* art_quick_initialize_type_and_verify_access(uint32_t);
+extern "C" void* art_quick_resolve_string(uint32_t);
 
 // Field entrypoints.
 extern "C" int art_quick_set8_instance(uint32_t, void*, int8_t);
diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
index 348495d..46629f5 100644
--- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
@@ -26,41 +26,41 @@
 namespace art {
 
 extern "C" mirror::Class* artInitializeStaticStorageFromCode(uint32_t type_idx,
-                                                             mirror::ArtMethod* referrer,
                                                              Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Called to ensure static storage base is initialized for direct static field reads and writes.
   // A class may be accessing another class' fields when it doesn't have access, as access has been
   // given by inheritance.
   ScopedQuickEntrypointChecks sqec(self);
-  return ResolveVerifyAndClinit(type_idx, referrer, self, true, false);
+  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kRefsOnly);
+  return ResolveVerifyAndClinit(type_idx, caller, self, true, false);
 }
 
 extern "C" mirror::Class* artInitializeTypeFromCode(uint32_t type_idx,
-                                                    mirror::ArtMethod* referrer,
                                                     Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Called when method->dex_cache_resolved_types_[] misses.
   ScopedQuickEntrypointChecks sqec(self);
-  return ResolveVerifyAndClinit(type_idx, referrer, self, false, false);
+  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kRefsOnly);
+  return ResolveVerifyAndClinit(type_idx, caller, self, false, false);
 }
 
 extern "C" mirror::Class* artInitializeTypeAndVerifyAccessFromCode(uint32_t type_idx,
-                                                                   mirror::ArtMethod* referrer,
                                                                    Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Called when caller isn't guaranteed to have access to a type and the dex cache may be
   // unpopulated.
   ScopedQuickEntrypointChecks sqec(self);
-  return ResolveVerifyAndClinit(type_idx, referrer, self, false, true);
+  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kRefsOnly);
+  return ResolveVerifyAndClinit(type_idx, caller, self, false, true);
 }
 
 extern "C" mirror::String* artResolveStringFromCode(int32_t string_idx,
-                                                    mirror::ArtMethod* referrer,
                                                     Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  return ResolveStringFromCode(referrer, string_idx);
+  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kRefsOnly);
+  return ResolveStringFromCode(caller, string_idx);
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index db8c0e3..b72ce34 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -32,6 +32,8 @@
 class ArtMethod;
 class Class;
 class Object;
+template<class MirrorType>
+class CompressedReference;
 }  // namespace mirror
 
 class Thread;
@@ -65,6 +67,10 @@
                                                              jobject locked, Thread* self)
     NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
 
+extern void ReadBarrierJni(mirror::CompressedReference<mirror::Object>* handle_on_stack,
+                           Thread* self)
+    NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_H_
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index eaf874e..0aca58f 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -29,14 +29,17 @@
   V(AllocObjectWithAccessCheck, void*, uint32_t, mirror::ArtMethod*) \
   V(CheckAndAllocArray, void*, uint32_t, int32_t, mirror::ArtMethod*) \
   V(CheckAndAllocArrayWithAccessCheck, void*, uint32_t, int32_t, mirror::ArtMethod*) \
+  V(AllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t) \
+  V(AllocStringFromChars, void*, int32_t, int32_t, void*) \
+  V(AllocStringFromString, void*, void*) \
 \
   V(InstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*) \
   V(CheckCast, void, const mirror::Class*, const mirror::Class*) \
 \
-  V(InitializeStaticStorage, void*, uint32_t, mirror::ArtMethod*) \
-  V(InitializeTypeAndVerifyAccess, void*, uint32_t, mirror::ArtMethod*) \
-  V(InitializeType, void*, uint32_t, mirror::ArtMethod*) \
-  V(ResolveString, void*, uint32_t, mirror::ArtMethod*) \
+  V(InitializeStaticStorage, void*, uint32_t) \
+  V(InitializeTypeAndVerifyAccess, void*, uint32_t) \
+  V(InitializeType, void*, uint32_t) \
+  V(ResolveString, void*, uint32_t) \
 \
   V(Set8Instance, int, uint32_t, void*, int8_t) \
   V(Set8Static, int, uint32_t, int8_t) \
@@ -123,8 +126,26 @@
   V(Deoptimize, void, void) \
 \
   V(A64Load, int64_t, volatile const int64_t *) \
-  V(A64Store, void, volatile int64_t *, int64_t)
-
+  V(A64Store, void, volatile int64_t *, int64_t) \
+\
+  V(NewEmptyString, void) \
+  V(NewStringFromBytes_B, void) \
+  V(NewStringFromBytes_BI, void) \
+  V(NewStringFromBytes_BII, void) \
+  V(NewStringFromBytes_BIII, void) \
+  V(NewStringFromBytes_BIIString, void) \
+  V(NewStringFromBytes_BString, void) \
+  V(NewStringFromBytes_BIICharset, void) \
+  V(NewStringFromBytes_BCharset, void) \
+  V(NewStringFromChars_C, void) \
+  V(NewStringFromChars_CII, void) \
+  V(NewStringFromChars_IIC, void) \
+  V(NewStringFromCodePoints, void) \
+  V(NewStringFromString, void) \
+  V(NewStringFromStringBuffer, void) \
+  V(NewStringFromStringBuilder, void) \
+\
+  V(ReadBarrierJni, void, mirror::CompressedReference<mirror::Object>*, Thread*)
 
 #endif  // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_
 #undef ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_   // #define is only for lint.
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index e478d2a..51817a2 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -22,6 +22,13 @@
 
 namespace art {
 
+extern void ReadBarrierJni(mirror::CompressedReference<mirror::Object>* handle_on_stack,
+                           Thread* self ATTRIBUTE_UNUSED) {
+  // Call the read barrier and update the handle.
+  mirror::Object* to_ref = ReadBarrier::BarrierForRoot(handle_on_stack);
+  handle_on_stack->Assign(to_ref);
+}
+
 // Called on entry to JNI, transition out of Runnable and release share of mutator_lock_.
 extern uint32_t JniMethodStart(Thread* self) {
   JNIEnvExt* env = self->GetJniEnv();
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 0664fa0..482f656 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -167,7 +167,13 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCheckAndAllocArray, pCheckAndAllocArrayWithAccessCheck,
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCheckAndAllocArrayWithAccessCheck,
-                         pInstanceofNonTrivial, sizeof(void*));
+                         pAllocStringFromBytes, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocStringFromBytes, pAllocStringFromChars,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocStringFromChars, pAllocStringFromString,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocStringFromString, pInstanceofNonTrivial,
+                         sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pInstanceofNonTrivial, pCheckCast, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCheckCast, pInitializeStaticStorage, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pInitializeStaticStorage, pInitializeTypeAndVerifyAccess,
@@ -269,7 +275,40 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pDeoptimize, pA64Load, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pA64Load, pA64Store, sizeof(void*));
 
-    CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pA64Store)
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pA64Store, pNewEmptyString, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewEmptyString, pNewStringFromBytes_B, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromBytes_B, pNewStringFromBytes_BI,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromBytes_BI, pNewStringFromBytes_BII,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromBytes_BII, pNewStringFromBytes_BIII,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromBytes_BIII, pNewStringFromBytes_BIIString,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromBytes_BIIString,
+                         pNewStringFromBytes_BString, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromBytes_BString,
+                         pNewStringFromBytes_BIICharset, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromBytes_BIICharset,
+                         pNewStringFromBytes_BCharset, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromBytes_BCharset,
+                         pNewStringFromChars_C, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromChars_C, pNewStringFromChars_CII,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromChars_CII, pNewStringFromChars_IIC,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromChars_IIC, pNewStringFromCodePoints,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromCodePoints, pNewStringFromString,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromString, pNewStringFromStringBuffer,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromStringBuffer, pNewStringFromStringBuilder,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromStringBuilder, pReadBarrierJni,
+                         sizeof(void*));
+
+    CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pReadBarrierJni)
             + sizeof(void*) == sizeof(QuickEntryPoints), QuickEntryPoints_all);
   }
 };
diff --git a/runtime/gc/accounting/bitmap.h b/runtime/gc/accounting/bitmap.h
index b294d49..eb00472 100644
--- a/runtime/gc/accounting/bitmap.h
+++ b/runtime/gc/accounting/bitmap.h
@@ -121,7 +121,7 @@
   const size_t bitmap_size_;
 
  private:
-  DISALLOW_COPY_AND_ASSIGN(Bitmap);
+  DISALLOW_IMPLICIT_CONSTRUCTORS(Bitmap);
 };
 
 // One bit per kAlignment in range (start, end]
@@ -184,6 +184,8 @@
 
   uintptr_t const cover_begin_;
   uintptr_t const cover_end_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(MemoryRangeBitmap);
 };
 
 }  // namespace accounting
diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h
index 75ef58a..34e6aa3 100644
--- a/runtime/gc/accounting/card_table.h
+++ b/runtime/gc/accounting/card_table.h
@@ -146,6 +146,8 @@
   // Card table doesn't begin at the beginning of the mem_map_, instead it is displaced by offset
   // to allow the byte value of biased_begin_ to equal GC_CARD_DIRTY
   const size_t offset_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(CardTable);
 };
 
 }  // namespace accounting
diff --git a/runtime/gc/accounting/heap_bitmap.h b/runtime/gc/accounting/heap_bitmap.h
index 245e074..1648aef 100644
--- a/runtime/gc/accounting/heap_bitmap.h
+++ b/runtime/gc/accounting/heap_bitmap.h
@@ -39,9 +39,11 @@
   void Clear(const mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
   template<typename LargeObjectSetVisitor>
   bool Set(const mirror::Object* obj, const LargeObjectSetVisitor& visitor)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) ALWAYS_INLINE;
   template<typename LargeObjectSetVisitor>
   bool AtomicTestAndSet(const mirror::Object* obj, const LargeObjectSetVisitor& visitor)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) ALWAYS_INLINE;
   ContinuousSpaceBitmap* GetContinuousSpaceBitmap(const mirror::Object* obj) const;
   LargeObjectBitmap* GetLargeObjectBitmap(const mirror::Object* obj) const;
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 85234dc..49c7fda 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -1042,10 +1042,11 @@
 
 inline uint32_t RosAlloc::Run::GetBitmapLastVectorMask(size_t num_slots, size_t num_vec) {
   const size_t kBitsPerVec = 32;
-  DCHECK_GE(num_slots * kBitsPerVec, num_vec);
+  DCHECK_GE(num_vec * kBitsPerVec, num_slots);
+  DCHECK_NE(num_vec, 0U);
   size_t remain = num_vec * kBitsPerVec - num_slots;
-  DCHECK_NE(remain, kBitsPerVec);
-  return ((1U << remain) - 1) << (kBitsPerVec - remain);
+  DCHECK_LT(remain, kBitsPerVec);
+  return ((1U << remain) - 1) << ((kBitsPerVec - remain) & 0x1F);
 }
 
 inline bool RosAlloc::Run::IsAllFree() {
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index eabb1c2..26f349a 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -718,6 +718,7 @@
       // Leave References gray so that GetReferent() will trigger RB.
       CHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref;
     } else {
+#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
       if (kUseBakerReadBarrier) {
         if (region_space_->IsInToSpace(to_ref)) {
           // If to-space, change from gray to white.
@@ -739,6 +740,9 @@
           CHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
         }
       }
+#else
+      DCHECK(!kUseBakerReadBarrier);
+#endif
     }
     if (ReadBarrier::kEnableToSpaceInvariantChecks || kIsDebugBuild) {
       ConcurrentCopyingAssertToSpaceInvariantObjectVisitor visitor(this);
@@ -815,7 +819,7 @@
     DCHECK(obj != nullptr);
     DCHECK(collector_->heap_->GetMarkBitmap()->Test(obj)) << obj;
     DCHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::BlackPtr()) << obj;
-    obj->SetReadBarrierPointer(ReadBarrier::WhitePtr());
+    obj->AtomicSetReadBarrierPointer(ReadBarrier::BlackPtr(), ReadBarrier::WhitePtr());
     DCHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << obj;
   }
 
@@ -963,7 +967,8 @@
     if (kUseBakerReadBarrier) {
       DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::BlackPtr()) << ref;
       // Clear the black ptr.
-      ref->SetReadBarrierPointer(ReadBarrier::WhitePtr());
+      ref->AtomicSetReadBarrierPointer(ReadBarrier::BlackPtr(), ReadBarrier::WhitePtr());
+      DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << ref;
     }
     size_t obj_size = ref->SizeOf();
     size_t alloc_size = RoundUp(obj_size, space::RegionSpace::kAlignment);
@@ -1330,10 +1335,6 @@
   while (true) {
     // Copy the object. TODO: copy only the lockword in the second iteration and on?
     memcpy(to_ref, from_ref, obj_size);
-    // Set the gray ptr.
-    if (kUseBakerReadBarrier) {
-      to_ref->SetReadBarrierPointer(ReadBarrier::GrayPtr());
-    }
 
     LockWord old_lock_word = to_ref->GetLockWord(false);
 
@@ -1378,6 +1379,11 @@
       return to_ref;
     }
 
+    // Set the gray ptr.
+    if (kUseBakerReadBarrier) {
+      to_ref->SetReadBarrierPointer(ReadBarrier::GrayPtr());
+    }
+
     LockWord new_lock_word = LockWord::FromForwardingAddress(reinterpret_cast<size_t>(to_ref));
 
     // Try to atomically write the fwd ptr.
@@ -1484,6 +1490,21 @@
   }
   DCHECK(from_ref != nullptr);
   DCHECK(heap_->collector_type_ == kCollectorTypeCC);
+  if (kUseBakerReadBarrier && !is_active_) {
+    // In the lock word forward address state, the read barrier bits
+    // in the lock word are part of the stored forwarding address and
+    // invalid. This is usually OK as the from-space copy of objects
+    // aren't accessed by mutators due to the to-space
+    // invariant. However, during the dex2oat image writing relocation
+    // and the zygote compaction, objects can be in the forward
+    // address state (to store the forward/relocation addresses) and
+    // they can still be accessed and the invalid read barrier bits
+    // are consulted. If they look like gray but aren't really, the
+    // read barriers slow path can trigger when it shouldn't. To guard
+    // against this, return here if the CC collector isn't running.
+    return from_ref;
+  }
+  DCHECK(region_space_ != nullptr) << "Read barrier slow path taken when CC isn't running?";
   space::RegionSpace::RegionType rtype = region_space_->GetRegionType(from_ref);
   if (rtype == space::RegionSpace::RegionType::kRegionTypeToSpace) {
     // It's already marked.
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 93de035..60ea6b6 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -279,7 +279,7 @@
   friend class FlipCallback;
   friend class ConcurrentCopyingComputeUnevacFromSpaceLiveRatioVisitor;
 
-  DISALLOW_COPY_AND_ASSIGN(ConcurrentCopying);
+  DISALLOW_IMPLICIT_CONSTRUCTORS(ConcurrentCopying);
 };
 
 }  // namespace collector
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index c5a8d5d..9b76d1a 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -190,6 +190,9 @@
   int64_t total_freed_bytes_;
   CumulativeLogger cumulative_timings_;
   mutable Mutex pause_histogram_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+
+ private:
+  DISALLOW_IMPLICIT_CONSTRUCTORS(GarbageCollector);
 };
 
 }  // namespace collector
diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h
index 4337644..f59a2cd 100644
--- a/runtime/gc/collector/mark_compact.h
+++ b/runtime/gc/collector/mark_compact.h
@@ -251,7 +251,7 @@
   friend class UpdateReferenceVisitor;
   friend class UpdateRootVisitor;
 
-  DISALLOW_COPY_AND_ASSIGN(MarkCompact);
+  DISALLOW_IMPLICIT_CONSTRUCTORS(MarkCompact);
 };
 
 }  // namespace collector
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index f0e8d14..53e56da 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -368,10 +368,13 @@
 
 class MarkSweepMarkObjectSlowPath {
  public:
-  explicit MarkSweepMarkObjectSlowPath(MarkSweep* mark_sweep) : mark_sweep_(mark_sweep) {
+  explicit MarkSweepMarkObjectSlowPath(MarkSweep* mark_sweep, Object* holder = nullptr,
+                                       MemberOffset offset = MemberOffset(0))
+      : mark_sweep_(mark_sweep), holder_(holder), offset_(offset) {
   }
 
-  void operator()(const Object* obj) const ALWAYS_INLINE {
+  void operator()(const Object* obj) const ALWAYS_INLINE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (kProfileLargeObjects) {
       // TODO: Differentiate between marking and testing somehow.
       ++mark_sweep_->large_object_test_;
@@ -381,18 +384,29 @@
     if (UNLIKELY(obj == nullptr || !IsAligned<kPageSize>(obj) ||
                  (kIsDebugBuild && large_object_space != nullptr &&
                      !large_object_space->Contains(obj)))) {
-      LOG(ERROR) << "Tried to mark " << obj << " not contained by any spaces";
-      LOG(ERROR) << "Attempting see if it's a bad root";
+      LOG(INTERNAL_FATAL) << "Tried to mark " << obj << " not contained by any spaces";
+      LOG(INTERNAL_FATAL) << "Attempting see if it's a bad root";
       mark_sweep_->VerifyRoots();
+      if (holder_ != nullptr) {
+        ArtField* field = holder_->FindFieldByOffset(offset_);
+        LOG(INTERNAL_FATAL) << "Field info: holder=" << holder_
+                            << " holder_type=" << PrettyTypeOf(holder_)
+                            << " offset=" << offset_.Uint32Value()
+                            << " field=" << (field != nullptr ? field->GetName() : "nullptr");
+      }
+      PrintFileToLog("/proc/self/maps", LogSeverity::INTERNAL_FATAL);
+      MemMap::DumpMaps(LOG(INTERNAL_FATAL), true);
       LOG(FATAL) << "Can't mark invalid object";
     }
   }
 
  private:
   MarkSweep* const mark_sweep_;
+  mirror::Object* const holder_;
+  MemberOffset offset_;
 };
 
-inline void MarkSweep::MarkObjectNonNull(Object* obj) {
+inline void MarkSweep::MarkObjectNonNull(Object* obj, Object* holder, MemberOffset offset) {
   DCHECK(obj != nullptr);
   if (kUseBakerOrBrooksReadBarrier) {
     // Verify all the objects have the correct pointer installed.
@@ -414,7 +428,7 @@
     if (kCountMarkedObjects) {
       ++mark_slowpath_count_;
     }
-    MarkSweepMarkObjectSlowPath visitor(this);
+    MarkSweepMarkObjectSlowPath visitor(this, holder, offset);
     // TODO: We already know that the object is not in the current_space_bitmap_ but MarkBitmap::Set
     // will check again.
     if (!mark_bitmap_->Set(obj, visitor)) {
@@ -454,9 +468,9 @@
 }
 
 // Used to mark objects when processing the mark stack. If an object is null, it is not marked.
-inline void MarkSweep::MarkObject(Object* obj) {
+inline void MarkSweep::MarkObject(Object* obj, Object* holder, MemberOffset offset) {
   if (obj != nullptr) {
-    MarkObjectNonNull(obj);
+    MarkObjectNonNull(obj, holder, offset);
   } else if (kCountMarkedObjects) {
     ++mark_null_count_;
   }
@@ -498,7 +512,7 @@
     if (heap->GetLiveBitmap()->GetContinuousSpaceBitmap(root) == nullptr) {
       space::LargeObjectSpace* large_object_space = heap->GetLargeObjectsSpace();
       if (large_object_space != nullptr && !large_object_space->Contains(root)) {
-        LOG(ERROR) << "Found invalid root: " << root << " " << info;
+        LOG(INTERNAL_FATAL) << "Found invalid root: " << root << " " << info;
       }
     }
   }
@@ -1207,7 +1221,7 @@
       Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
       Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
     }
-    mark_sweep_->MarkObject(obj->GetFieldObject<mirror::Object>(offset));
+    mark_sweep_->MarkObject(obj->GetFieldObject<mirror::Object>(offset), obj, offset);
   }
 
  private:
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index fad3403..d29d87a 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -199,7 +199,8 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Marks an object.
-  void MarkObject(mirror::Object* obj)
+  void MarkObject(mirror::Object* obj, mirror::Object* holder = nullptr,
+                  MemberOffset offset = MemberOffset(0))
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -222,7 +223,8 @@
   static void VerifyImageRootVisitor(mirror::Object* root, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  void MarkObjectNonNull(mirror::Object* obj)
+  void MarkObjectNonNull(mirror::Object* obj, mirror::Object* holder = nullptr,
+                         MemberOffset offset = MemberOffset(0))
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -336,7 +338,7 @@
   friend class VerifyRootMarkedVisitor;
   friend class VerifyRootVisitor;
 
-  DISALLOW_COPY_AND_ASSIGN(MarkSweep);
+  DISALLOW_IMPLICIT_CONSTRUCTORS(MarkSweep);
 };
 
 }  // namespace collector
diff --git a/runtime/gc/collector/partial_mark_sweep.h b/runtime/gc/collector/partial_mark_sweep.h
index ac0d068..1a211cd 100644
--- a/runtime/gc/collector/partial_mark_sweep.h
+++ b/runtime/gc/collector/partial_mark_sweep.h
@@ -40,7 +40,7 @@
   virtual void BindBitmaps() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
-  DISALLOW_COPY_AND_ASSIGN(PartialMarkSweep);
+  DISALLOW_IMPLICIT_CONSTRUCTORS(PartialMarkSweep);
 };
 
 }  // namespace collector
diff --git a/runtime/gc/collector/semi_space-inl.h b/runtime/gc/collector/semi_space-inl.h
index 922a71c..7b19dc9 100644
--- a/runtime/gc/collector/semi_space-inl.h
+++ b/runtime/gc/collector/semi_space-inl.h
@@ -60,10 +60,6 @@
   if (obj == nullptr) {
     return;
   }
-  if (kUseBakerOrBrooksReadBarrier) {
-    // Verify all the objects have the correct forward pointer installed.
-    obj->AssertReadBarrierPointer();
-  }
   if (from_space_->HasAddress(obj)) {
     mirror::Object* forward_address = GetForwardingAddressInFromSpace(obj);
     // If the object has already been moved, return the new forward address.
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 61fbead..3c25f53 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -278,7 +278,7 @@
 
  private:
   friend class BitmapSetSlowPathVisitor;
-  DISALLOW_COPY_AND_ASSIGN(SemiSpace);
+  DISALLOW_IMPLICIT_CONSTRUCTORS(SemiSpace);
 };
 
 }  // namespace collector
diff --git a/runtime/gc/collector/sticky_mark_sweep.h b/runtime/gc/collector/sticky_mark_sweep.h
index 4f9dabf..b9ef137 100644
--- a/runtime/gc/collector/sticky_mark_sweep.h
+++ b/runtime/gc/collector/sticky_mark_sweep.h
@@ -47,7 +47,7 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
  private:
-  DISALLOW_COPY_AND_ASSIGN(StickyMarkSweep);
+  DISALLOW_IMPLICIT_CONSTRUCTORS(StickyMarkSweep);
 };
 
 }  // namespace collector
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 3e56205..fbf36e8 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -371,11 +371,8 @@
 }
 
 inline Heap::AllocationTimer::AllocationTimer(Heap* heap, mirror::Object** allocated_obj_ptr)
-    : heap_(heap), allocated_obj_ptr_(allocated_obj_ptr) {
-  if (kMeasureAllocationTime) {
-    allocation_start_time_ = NanoTime() / kTimeAdjust;
-  }
-}
+    : heap_(heap), allocated_obj_ptr_(allocated_obj_ptr),
+      allocation_start_time_(kMeasureAllocationTime ? NanoTime() / kTimeAdjust : 0u) { }
 
 inline Heap::AllocationTimer::~AllocationTimer() {
   if (kMeasureAllocationTime) {
@@ -419,7 +416,7 @@
 inline void Heap::CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated,
                                     mirror::Object** obj) {
   if (UNLIKELY(new_num_bytes_allocated >= concurrent_start_bytes_)) {
-    RequestConcurrentGCAndSaveObject(self, obj);
+    RequestConcurrentGCAndSaveObject(self, false, obj);
   }
 }
 
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index b80c4b6..11a0e3c 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -491,7 +491,7 @@
     bool no_gap = MemMap::CheckNoGaps(GetImageSpace()->GetMemMap(),
                                       non_moving_space_->GetMemMap());
     if (!no_gap) {
-      MemMap::DumpMaps(LOG(ERROR));
+      MemMap::DumpMaps(LOG(ERROR), true);
       LOG(FATAL) << "There's a gap between the image space and the non-moving space";
     }
   }
@@ -1612,10 +1612,19 @@
 }
 
 size_t Heap::GetObjectsAllocated() const {
+  Thread* self = Thread::Current();
+  ScopedThreadStateChange tsc(self, kWaitingForGetObjectsAllocated);
+  auto* tl = Runtime::Current()->GetThreadList();
+  // Need SuspendAll here to prevent lock violation if RosAlloc does it during InspectAll.
+  tl->SuspendAll(__FUNCTION__);
   size_t total = 0;
-  for (space::AllocSpace* space : alloc_spaces_) {
-    total += space->GetObjectsAllocated();
+  {
+    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    for (space::AllocSpace* space : alloc_spaces_) {
+      total += space->GetObjectsAllocated();
+    }
   }
+  tl->ResumeAll();
   return total;
 }
 
@@ -3325,20 +3334,24 @@
   *object = soa.Decode<mirror::Object*>(arg.get());
 }
 
-void Heap::RequestConcurrentGCAndSaveObject(Thread* self, mirror::Object** obj) {
+void Heap::RequestConcurrentGCAndSaveObject(Thread* self, bool force_full, mirror::Object** obj) {
   StackHandleScope<1> hs(self);
   HandleWrapper<mirror::Object> wrapper(hs.NewHandleWrapper(obj));
-  RequestConcurrentGC(self);
+  RequestConcurrentGC(self, force_full);
 }
 
 class Heap::ConcurrentGCTask : public HeapTask {
  public:
-  explicit ConcurrentGCTask(uint64_t target_time) : HeapTask(target_time) { }
+  explicit ConcurrentGCTask(uint64_t target_time, bool force_full)
+    : HeapTask(target_time), force_full_(force_full) { }
   virtual void Run(Thread* self) OVERRIDE {
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    heap->ConcurrentGC(self);
+    heap->ConcurrentGC(self, force_full_);
     heap->ClearConcurrentGCRequest();
   }
+
+ private:
+  const bool force_full_;  // If true, force full (or partial) collection.
 };
 
 static bool CanAddHeapTask(Thread* self) LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_) {
@@ -3351,24 +3364,30 @@
   concurrent_gc_pending_.StoreRelaxed(false);
 }
 
-void Heap::RequestConcurrentGC(Thread* self) {
+void Heap::RequestConcurrentGC(Thread* self, bool force_full) {
   if (CanAddHeapTask(self) &&
       concurrent_gc_pending_.CompareExchangeStrongSequentiallyConsistent(false, true)) {
-    task_processor_->AddTask(self, new ConcurrentGCTask(NanoTime()));  // Start straight away.
+    task_processor_->AddTask(self, new ConcurrentGCTask(NanoTime(),  // Start straight away.
+                                                        force_full));
   }
 }
 
-void Heap::ConcurrentGC(Thread* self) {
+void Heap::ConcurrentGC(Thread* self, bool force_full) {
   if (!Runtime::Current()->IsShuttingDown(self)) {
     // Wait for any GCs currently running to finish.
     if (WaitForGcToComplete(kGcCauseBackground, self) == collector::kGcTypeNone) {
       // If the we can't run the GC type we wanted to run, find the next appropriate one and try that
       // instead. E.g. can't do partial, so do full instead.
-      if (CollectGarbageInternal(next_gc_type_, kGcCauseBackground, false) ==
+      collector::GcType next_gc_type = next_gc_type_;
+      // If forcing full and next gc type is sticky, override with a non-sticky type.
+      if (force_full && next_gc_type == collector::kGcTypeSticky) {
+        next_gc_type = HasZygoteSpace() ? collector::kGcTypePartial : collector::kGcTypeFull;
+      }
+      if (CollectGarbageInternal(next_gc_type, kGcCauseBackground, false) ==
           collector::kGcTypeNone) {
         for (collector::GcType gc_type : gc_plan_) {
           // Attempt to run the collector, if we succeed, we are done.
-          if (gc_type > next_gc_type_ &&
+          if (gc_type > next_gc_type &&
               CollectGarbageInternal(gc_type, kGcCauseBackground, false) !=
                   collector::kGcTypeNone) {
             break;
@@ -3553,7 +3572,7 @@
       UpdateMaxNativeFootprint();
     } else if (!IsGCRequestPending()) {
       if (IsGcConcurrent()) {
-        RequestConcurrentGC(self);
+        RequestConcurrentGC(self, true);  // Request non-sticky type.
       } else {
         CollectGarbageInternal(gc_type, kGcCauseForNativeAlloc, false);
       }
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 565687c..90249f9 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -288,7 +288,7 @@
 
   // Does a concurrent GC, should only be called by the GC daemon thread
   // through runtime.
-  void ConcurrentGC(Thread* self) LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_);
+  void ConcurrentGC(Thread* self, bool force_full) LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_);
 
   // Implements VMDebug.countInstancesOfClass and JDWP VM_InstanceCount.
   // The boolean decides whether to use IsAssignableFrom or == when comparing classes.
@@ -664,7 +664,7 @@
   void RequestTrim(Thread* self) LOCKS_EXCLUDED(pending_task_lock_);
 
   // Request asynchronous GC.
-  void RequestConcurrentGC(Thread* self) LOCKS_EXCLUDED(pending_task_lock_);
+  void RequestConcurrentGC(Thread* self, bool force_full) LOCKS_EXCLUDED(pending_task_lock_);
 
   // Whether or not we may use a garbage collector, used so that we only create collectors we need.
   bool MayUseCollector(CollectorType type) const;
@@ -786,7 +786,7 @@
   void RequestCollectorTransition(CollectorType desired_collector_type, uint64_t delta_time)
       LOCKS_EXCLUDED(pending_task_lock_);
 
-  void RequestConcurrentGCAndSaveObject(Thread* self, mirror::Object** obj)
+  void RequestConcurrentGCAndSaveObject(Thread* self, bool force_full, mirror::Object** obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool IsGCRequestPending() const;
 
@@ -1201,41 +1201,23 @@
   friend class VerifyReferenceVisitor;
   friend class VerifyObjectVisitor;
   friend class ScopedHeapFill;
-  friend class ScopedHeapLock;
   friend class space::SpaceTest;
 
   class AllocationTimer {
-   private:
-    Heap* heap_;
-    mirror::Object** allocated_obj_ptr_;
-    uint64_t allocation_start_time_;
    public:
-    AllocationTimer(Heap* heap, mirror::Object** allocated_obj_ptr);
-    ~AllocationTimer();
+    ALWAYS_INLINE AllocationTimer(Heap* heap, mirror::Object** allocated_obj_ptr);
+    ALWAYS_INLINE ~AllocationTimer();
+   private:
+    Heap* const heap_;
+    mirror::Object** allocated_obj_ptr_;
+    const uint64_t allocation_start_time_;
+
+    DISALLOW_IMPLICIT_CONSTRUCTORS(AllocationTimer);
   };
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(Heap);
 };
 
-// ScopedHeapFill changes the bytes allocated counter to be equal to the growth limit. This
-// causes the next allocation to perform a GC and possibly an OOM. It can be used to ensure that a
-// GC happens in specific methods such as ThrowIllegalMonitorStateExceptionF in Monitor::Wait.
-class ScopedHeapFill {
- public:
-  explicit ScopedHeapFill(Heap* heap)
-      : heap_(heap),
-        delta_(heap_->GetMaxMemory() - heap_->GetBytesAllocated()) {
-    heap_->num_bytes_allocated_.FetchAndAddSequentiallyConsistent(delta_);
-  }
-  ~ScopedHeapFill() {
-    heap_->num_bytes_allocated_.FetchAndSubSequentiallyConsistent(delta_);
-  }
-
- private:
-  Heap* const heap_;
-  const int64_t delta_;
-};
-
 }  // namespace gc
 }  // namespace art
 
diff --git a/runtime/gc/reference_processor.h b/runtime/gc/reference_processor.h
index c67fd98..a44319b 100644
--- a/runtime/gc/reference_processor.h
+++ b/runtime/gc/reference_processor.h
@@ -81,6 +81,9 @@
     IsHeapReferenceMarkedCallback* is_marked_callback_;
     MarkObjectCallback* mark_callback_;
     void* arg_;
+
+   private:
+    DISALLOW_IMPLICIT_CONSTRUCTORS(ProcessReferencesArgs);
   };
   bool SlowPathEnabled() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Called by ProcessReferences.
@@ -105,6 +108,8 @@
   ReferenceQueue finalizer_reference_queue_;
   ReferenceQueue phantom_reference_queue_;
   ReferenceQueue cleared_references_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReferenceProcessor);
 };
 
 }  // namespace gc
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index 4c93a4c..4ba3983 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -96,11 +96,11 @@
         << "ref=" << ref << " rb_ptr=" << ref->GetReadBarrierPointer();
     if (heap->ConcurrentCopyingCollector()->RegionSpace()->IsInToSpace(ref)) {
       // Moving objects.
-      ref->SetReadBarrierPointer(ReadBarrier::WhitePtr());
+      ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::WhitePtr());
       CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
     } else {
       // Non-moving objects.
-      ref->SetReadBarrierPointer(ReadBarrier::BlackPtr());
+      ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::BlackPtr());
       CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::BlackPtr());
     }
   }
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
index f7d89d0..c45be85 100644
--- a/runtime/gc/reference_queue.h
+++ b/runtime/gc/reference_queue.h
@@ -106,7 +106,7 @@
   // GC types.
   mirror::Reference* list_;
 
-  DISALLOW_COPY_AND_ASSIGN(ReferenceQueue);
+  DISALLOW_IMPLICIT_CONSTRUCTORS(ReferenceQueue);
 };
 
 }  // namespace gc
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index f2378d9..871ebac 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -187,7 +187,7 @@
 
  private:
   friend class art::gc::Heap;
-  DISALLOW_COPY_AND_ASSIGN(Space);
+  DISALLOW_IMPLICIT_CONSTRUCTORS(Space);
 };
 std::ostream& operator<<(std::ostream& os, const Space& space);
 
@@ -337,7 +337,7 @@
   uint8_t* limit_;
 
  private:
-  DISALLOW_COPY_AND_ASSIGN(ContinuousSpace);
+  DISALLOW_IMPLICIT_CONSTRUCTORS(ContinuousSpace);
 };
 
 // A space where objects may be allocated higgledy-piggledy throughout virtual memory. Currently
@@ -366,7 +366,7 @@
   std::unique_ptr<accounting::LargeObjectBitmap> mark_bitmap_;
 
  private:
-  DISALLOW_COPY_AND_ASSIGN(DiscontinuousSpace);
+  DISALLOW_IMPLICIT_CONSTRUCTORS(DiscontinuousSpace);
 };
 
 class MemMapSpace : public ContinuousSpace {
@@ -400,7 +400,7 @@
   std::unique_ptr<MemMap> mem_map_;
 
  private:
-  DISALLOW_COPY_AND_ASSIGN(MemMapSpace);
+  DISALLOW_IMPLICIT_CONSTRUCTORS(MemMapSpace);
 };
 
 // Used by the heap compaction interface to enable copying from one type of alloc space to another.
@@ -453,7 +453,7 @@
 
  private:
   friend class gc::Heap;
-  DISALLOW_COPY_AND_ASSIGN(ContinuousMemMapAllocSpace);
+  DISALLOW_IMPLICIT_CONSTRUCTORS(ContinuousMemMapAllocSpace);
 };
 
 }  // namespace space
diff --git a/runtime/gc/task_processor.h b/runtime/gc/task_processor.h
index 67e3a54..5f48619 100644
--- a/runtime/gc/task_processor.h
+++ b/runtime/gc/task_processor.h
@@ -46,6 +46,7 @@
   uint64_t target_run_time_;
 
   friend class TaskProcessor;
+  DISALLOW_IMPLICIT_CONSTRUCTORS(HeapTask);
 };
 
 // Used to process GC tasks (heap trim, heap transitions, concurrent GC).
@@ -78,6 +79,8 @@
   std::unique_ptr<ConditionVariable> cond_ GUARDED_BY(lock_);
   std::multiset<HeapTask*, CompareByTargetRunTime> tasks_ GUARDED_BY(lock_);
   Thread* running_thread_ GUARDED_BY(lock_);
+
+  DISALLOW_COPY_AND_ASSIGN(TaskProcessor);
 };
 
 }  // namespace gc
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index fb7ff54..efead51 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -981,6 +981,10 @@
     // ClassObjects have their static fields appended, so aren't all the same size.
     // But they're at least this size.
     __ AddU4(sizeof(mirror::Class));  // instance size
+  } else if (klass->IsStringClass()) {
+    // Strings are variable length with character data at the end like arrays.
+    // This outputs the size of an empty string.
+    __ AddU4(sizeof(mirror::String));
   } else if (klass->IsArrayClass() || klass->IsPrimitive()) {
     __ AddU4(0);
   } else {
@@ -1036,13 +1040,22 @@
 
   // Instance fields for this class (no superclass fields)
   int iFieldCount = klass->IsObjectClass() ? 0 : klass->NumInstanceFields();
-  __ AddU2((uint16_t)iFieldCount);
+  if (klass->IsStringClass()) {
+    __ AddU2((uint16_t)iFieldCount + 1);
+  } else {
+    __ AddU2((uint16_t)iFieldCount);
+  }
   for (int i = 0; i < iFieldCount; ++i) {
     ArtField* f = klass->GetInstanceField(i);
     __ AddStringId(LookupStringId(f->GetName()));
     HprofBasicType t = SignatureToBasicTypeAndSize(f->GetTypeDescriptor(), nullptr);
     __ AddU1(t);
   }
+  // Add native value character array for strings.
+  if (klass->IsStringClass()) {
+    __ AddStringId(LookupStringId("value"));
+    __ AddU1(hprof_basic_object);
+  }
 }
 
 void Hprof::DumpHeapArray(mirror::Array* obj, mirror::Class* klass) {
@@ -1099,6 +1112,7 @@
 
   // Write the instance data;  fields for this class, followed by super class fields,
   // and so on. Don't write the klass or monitor fields of Object.class.
+  mirror::Class* orig_klass = klass;
   while (!klass->IsObjectClass()) {
     int ifieldCount = klass->NumInstanceFields();
     for (int i = 0; i < ifieldCount; ++i) {
@@ -1133,8 +1147,31 @@
     klass = klass->GetSuperClass();
   }
 
-  // Patch the instance field length.
-  __ UpdateU4(size_patch_offset, output_->Length() - (size_patch_offset + 4));
+  // Output native value character array for strings.
+  if (orig_klass->IsStringClass()) {
+    mirror::String* s = obj->AsString();
+    mirror::Object* value;
+    if (s->GetLength() == 0) {
+      // If string is empty, use an object-aligned address within the string for the value.
+      value = reinterpret_cast<mirror::Object*>(reinterpret_cast<uintptr_t>(s) + kObjectAlignment);
+    } else {
+      value = reinterpret_cast<mirror::Object*>(s->GetValue());
+    }
+    __ AddObjectId(value);
+
+    // Patch the instance field length.
+    __ UpdateU4(size_patch_offset, output_->Length() - (size_patch_offset + 4));
+
+    __ AddU1(HPROF_PRIMITIVE_ARRAY_DUMP);
+    __ AddObjectId(value);
+    __ AddU4(StackTraceSerialNumber(obj));
+    __ AddU4(s->GetLength());
+    __ AddU1(hprof_basic_char);
+    __ AddU2List(s->GetValue(), s->GetLength());
+  } else {
+    // Patch the instance field length.
+    __ UpdateU4(size_patch_offset, output_->Length() - (size_patch_offset + 4));
+  }
 }
 
 void Hprof::VisitRoot(mirror::Object* obj, const RootInfo& info) {
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index e2b9559..0ef58ea 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -175,10 +175,16 @@
   DCHECK(table_ != nullptr);
   DCHECK_GE(segment_state_.parts.numHoles, prevState.parts.numHoles);
 
-  if (GetIndirectRefKind(iref) == kHandleScopeOrInvalid &&
-      Thread::Current()->HandleScopeContains(reinterpret_cast<jobject>(iref))) {
-    LOG(WARNING) << "Attempt to remove local handle scope entry from IRT, ignoring";
-    return true;
+  if (GetIndirectRefKind(iref) == kHandleScopeOrInvalid) {
+    auto* self = Thread::Current();
+    if (self->HandleScopeContains(reinterpret_cast<jobject>(iref))) {
+      auto* env = self->GetJniEnv();
+      DCHECK(env != nullptr);
+      if (env->check_jni) {
+        LOG(WARNING) << "Attempt to remove local handle scope entry from IRT, ignoring";
+      }
+      return true;
+    }
   }
   const int idx = ExtractIndex(iref);
   if (idx < bottomIndex) {
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index e6c333d..98e6200 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -16,13 +16,10 @@
 
 #include "instrumentation.h"
 
-#include <sys/uio.h>
-
 #include <sstream>
 
 #include "arch/context.h"
 #include "atomic.h"
-#include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "debugger.h"
 #include "dex_file-inl.h"
@@ -39,16 +36,18 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "nth_caller_visitor.h"
-#include "os.h"
-#include "scoped_thread_state_change.h"
 #include "thread.h"
 #include "thread_list.h"
 
 namespace art {
-
 namespace instrumentation {
 
-const bool kVerboseInstrumentation = false;
+constexpr bool kVerboseInstrumentation = false;
+
+// Instrumentation works on non-inlined frames by updating returned PCs
+// of compiled frames.
+static constexpr StackVisitor::StackWalkKind kInstrumentationStackWalk =
+    StackVisitor::StackWalkKind::kSkipInlinedFrames;
 
 static bool InstallStubsClassVisitor(mirror::Class* klass, void* arg)
     EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -64,7 +63,7 @@
       have_method_entry_listeners_(false), have_method_exit_listeners_(false),
       have_method_unwind_listeners_(false), have_dex_pc_listeners_(false),
       have_field_read_listeners_(false), have_field_write_listeners_(false),
-      have_exception_caught_listeners_(false),
+      have_exception_caught_listeners_(false), have_backward_branch_listeners_(false),
       deoptimized_methods_lock_("deoptimized methods lock"),
       deoptimization_enabled_(false),
       interpreter_handler_table_(kMainHandlerTable),
@@ -166,16 +165,16 @@
 // existing instrumentation frames.
 static void InstrumentationInstallStack(Thread* thread, void* arg)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  struct InstallStackVisitor : public StackVisitor {
+  struct InstallStackVisitor FINAL : public StackVisitor {
     InstallStackVisitor(Thread* thread_in, Context* context, uintptr_t instrumentation_exit_pc)
-        : StackVisitor(thread_in, context),
+        : StackVisitor(thread_in, context, kInstrumentationStackWalk),
           instrumentation_stack_(thread_in->GetInstrumentationStack()),
           instrumentation_exit_pc_(instrumentation_exit_pc),
           reached_existing_instrumentation_frames_(false), instrumentation_stack_depth_(0),
           last_return_pc_(0) {
     }
 
-    virtual bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    bool VisitFrame() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
       mirror::ArtMethod* m = GetMethod();
       if (m == nullptr) {
         if (kVerboseInstrumentation) {
@@ -306,16 +305,17 @@
 // Removes the instrumentation exit pc as the return PC for every quick frame.
 static void InstrumentationRestoreStack(Thread* thread, void* arg)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  struct RestoreStackVisitor : public StackVisitor {
+  struct RestoreStackVisitor FINAL : public StackVisitor {
     RestoreStackVisitor(Thread* thread_in, uintptr_t instrumentation_exit_pc,
                         Instrumentation* instrumentation)
-        : StackVisitor(thread_in, nullptr), thread_(thread_in),
+        : StackVisitor(thread_in, nullptr, kInstrumentationStackWalk),
+          thread_(thread_in),
           instrumentation_exit_pc_(instrumentation_exit_pc),
           instrumentation_(instrumentation),
           instrumentation_stack_(thread_in->GetInstrumentationStack()),
           frames_removed_(0) {}
 
-    virtual bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    bool VisitFrame() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
       if (instrumentation_stack_->size() == 0) {
         return false;  // Stop.
       }
@@ -390,25 +390,29 @@
   }
 }
 
+static bool HasEvent(Instrumentation::InstrumentationEvent expected, uint32_t events) {
+  return (events & expected) != 0;
+}
+
 void Instrumentation::AddListener(InstrumentationListener* listener, uint32_t events) {
   Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
-  if ((events & kMethodEntered) != 0) {
+  if (HasEvent(kMethodEntered, events)) {
     method_entry_listeners_.push_back(listener);
     have_method_entry_listeners_ = true;
   }
-  if ((events & kMethodExited) != 0) {
+  if (HasEvent(kMethodExited, events)) {
     method_exit_listeners_.push_back(listener);
     have_method_exit_listeners_ = true;
   }
-  if ((events & kMethodUnwind) != 0) {
+  if (HasEvent(kMethodUnwind, events)) {
     method_unwind_listeners_.push_back(listener);
     have_method_unwind_listeners_ = true;
   }
-  if ((events & kBackwardBranch) != 0) {
+  if (HasEvent(kBackwardBranch, events)) {
     backward_branch_listeners_.push_back(listener);
     have_backward_branch_listeners_ = true;
   }
-  if ((events & kDexPcMoved) != 0) {
+  if (HasEvent(kDexPcMoved, events)) {
     std::list<InstrumentationListener*>* modified;
     if (have_dex_pc_listeners_) {
       modified = new std::list<InstrumentationListener*>(*dex_pc_listeners_.get());
@@ -419,7 +423,7 @@
     dex_pc_listeners_.reset(modified);
     have_dex_pc_listeners_ = true;
   }
-  if ((events & kFieldRead) != 0) {
+  if (HasEvent(kFieldRead, events)) {
     std::list<InstrumentationListener*>* modified;
     if (have_field_read_listeners_) {
       modified = new std::list<InstrumentationListener*>(*field_read_listeners_.get());
@@ -430,7 +434,7 @@
     field_read_listeners_.reset(modified);
     have_field_read_listeners_ = true;
   }
-  if ((events & kFieldWritten) != 0) {
+  if (HasEvent(kFieldWritten, events)) {
     std::list<InstrumentationListener*>* modified;
     if (have_field_write_listeners_) {
       modified = new std::list<InstrumentationListener*>(*field_write_listeners_.get());
@@ -441,7 +445,7 @@
     field_write_listeners_.reset(modified);
     have_field_write_listeners_ = true;
   }
-  if ((events & kExceptionCaught) != 0) {
+  if (HasEvent(kExceptionCaught, events)) {
     std::list<InstrumentationListener*>* modified;
     if (have_exception_caught_listeners_) {
       modified = new std::list<InstrumentationListener*>(*exception_caught_listeners_.get());
@@ -458,102 +462,104 @@
 void Instrumentation::RemoveListener(InstrumentationListener* listener, uint32_t events) {
   Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
 
-  if ((events & kMethodEntered) != 0) {
-    if (have_method_entry_listeners_) {
-      method_entry_listeners_.remove(listener);
-      have_method_entry_listeners_ = !method_entry_listeners_.empty();
-    }
+  if (HasEvent(kMethodEntered, events) && have_method_entry_listeners_) {
+    method_entry_listeners_.remove(listener);
+    have_method_entry_listeners_ = !method_entry_listeners_.empty();
   }
-  if ((events & kMethodExited) != 0) {
-    if (have_method_exit_listeners_) {
-      method_exit_listeners_.remove(listener);
-      have_method_exit_listeners_ = !method_exit_listeners_.empty();
-    }
+  if (HasEvent(kMethodExited, events) && have_method_exit_listeners_) {
+    method_exit_listeners_.remove(listener);
+    have_method_exit_listeners_ = !method_exit_listeners_.empty();
   }
-  if ((events & kMethodUnwind) != 0) {
-    if (have_method_unwind_listeners_) {
+  if (HasEvent(kMethodUnwind, events) && have_method_unwind_listeners_) {
       method_unwind_listeners_.remove(listener);
       have_method_unwind_listeners_ = !method_unwind_listeners_.empty();
-    }
   }
-  if ((events & kDexPcMoved) != 0) {
+  if (HasEvent(kBackwardBranch, events) && have_backward_branch_listeners_) {
+      backward_branch_listeners_.remove(listener);
+      have_backward_branch_listeners_ = !backward_branch_listeners_.empty();
+    }
+  if (HasEvent(kDexPcMoved, events) && have_dex_pc_listeners_) {
+    std::list<InstrumentationListener*>* modified =
+        new std::list<InstrumentationListener*>(*dex_pc_listeners_.get());
+    modified->remove(listener);
+    have_dex_pc_listeners_ = !modified->empty();
     if (have_dex_pc_listeners_) {
-      std::list<InstrumentationListener*>* modified =
-          new std::list<InstrumentationListener*>(*dex_pc_listeners_.get());
-      modified->remove(listener);
-      have_dex_pc_listeners_ = !modified->empty();
-      if (have_dex_pc_listeners_) {
-        dex_pc_listeners_.reset(modified);
-      } else {
-        dex_pc_listeners_.reset();
-        delete modified;
-      }
+      dex_pc_listeners_.reset(modified);
+    } else {
+      dex_pc_listeners_.reset();
+      delete modified;
     }
   }
-  if ((events & kFieldRead) != 0) {
+  if (HasEvent(kFieldRead, events) && have_field_read_listeners_) {
+    std::list<InstrumentationListener*>* modified =
+        new std::list<InstrumentationListener*>(*field_read_listeners_.get());
+    modified->remove(listener);
+    have_field_read_listeners_ = !modified->empty();
     if (have_field_read_listeners_) {
-      std::list<InstrumentationListener*>* modified =
-          new std::list<InstrumentationListener*>(*field_read_listeners_.get());
-      modified->remove(listener);
-      have_field_read_listeners_ = !modified->empty();
-      if (have_field_read_listeners_) {
-        field_read_listeners_.reset(modified);
-      } else {
-        field_read_listeners_.reset();
-        delete modified;
-      }
+      field_read_listeners_.reset(modified);
+    } else {
+      field_read_listeners_.reset();
+      delete modified;
     }
   }
-  if ((events & kFieldWritten) != 0) {
+  if (HasEvent(kFieldWritten, events) && have_field_write_listeners_) {
+    std::list<InstrumentationListener*>* modified =
+        new std::list<InstrumentationListener*>(*field_write_listeners_.get());
+    modified->remove(listener);
+    have_field_write_listeners_ = !modified->empty();
     if (have_field_write_listeners_) {
-      std::list<InstrumentationListener*>* modified =
-          new std::list<InstrumentationListener*>(*field_write_listeners_.get());
-      modified->remove(listener);
-      have_field_write_listeners_ = !modified->empty();
-      if (have_field_write_listeners_) {
-        field_write_listeners_.reset(modified);
-      } else {
-        field_write_listeners_.reset();
-        delete modified;
-      }
+      field_write_listeners_.reset(modified);
+    } else {
+      field_write_listeners_.reset();
+      delete modified;
     }
   }
-  if ((events & kExceptionCaught) != 0) {
+  if (HasEvent(kExceptionCaught, events) && have_exception_caught_listeners_) {
+    std::list<InstrumentationListener*>* modified =
+        new std::list<InstrumentationListener*>(*exception_caught_listeners_.get());
+    modified->remove(listener);
+    have_exception_caught_listeners_ = !modified->empty();
     if (have_exception_caught_listeners_) {
-      std::list<InstrumentationListener*>* modified =
-          new std::list<InstrumentationListener*>(*exception_caught_listeners_.get());
-      modified->remove(listener);
-      have_exception_caught_listeners_ = !modified->empty();
-      if (have_exception_caught_listeners_) {
-        exception_caught_listeners_.reset(modified);
-      } else {
-        exception_caught_listeners_.reset();
-        delete modified;
-      }
+      exception_caught_listeners_.reset(modified);
+    } else {
+      exception_caught_listeners_.reset();
+      delete modified;
     }
   }
   UpdateInterpreterHandlerTable();
 }
 
-void Instrumentation::ConfigureStubs(bool require_entry_exit_stubs, bool require_interpreter) {
-  interpret_only_ = require_interpreter || forced_interpret_only_;
-  // Compute what level of instrumentation is required and compare to current.
-  int desired_level, current_level;
-  if (require_interpreter) {
-    desired_level = 2;
-  } else if (require_entry_exit_stubs) {
-    desired_level = 1;
-  } else {
-    desired_level = 0;
-  }
+Instrumentation::InstrumentationLevel Instrumentation::GetCurrentInstrumentationLevel() const {
   if (interpreter_stubs_installed_) {
-    current_level = 2;
+    return InstrumentationLevel::kInstrumentWithInterpreter;
   } else if (entry_exit_stubs_installed_) {
-    current_level = 1;
+    return InstrumentationLevel::kInstrumentWithInstrumentationStubs;
   } else {
-    current_level = 0;
+    return InstrumentationLevel::kInstrumentNothing;
   }
-  if (desired_level == current_level) {
+}
+
+void Instrumentation::ConfigureStubs(const char* key, InstrumentationLevel desired_level) {
+  // Store the instrumentation level for this key or remove it.
+  if (desired_level == InstrumentationLevel::kInstrumentNothing) {
+    // The client no longer needs instrumentation.
+    requested_instrumentation_levels_.erase(key);
+  } else {
+    // The client needs instrumentation.
+    requested_instrumentation_levels_.Overwrite(key, desired_level);
+  }
+
+  // Look for the highest required instrumentation level.
+  InstrumentationLevel requested_level = InstrumentationLevel::kInstrumentNothing;
+  for (const auto& v : requested_instrumentation_levels_) {
+    requested_level = std::max(requested_level, v.second);
+  }
+
+  interpret_only_ = (requested_level == InstrumentationLevel::kInstrumentWithInterpreter) ||
+                    forced_interpret_only_;
+
+  InstrumentationLevel current_level = GetCurrentInstrumentationLevel();
+  if (requested_level == current_level) {
     // We're already set.
     return;
   }
@@ -561,12 +567,14 @@
   Runtime* runtime = Runtime::Current();
   Locks::mutator_lock_->AssertExclusiveHeld(self);
   Locks::thread_list_lock_->AssertNotHeld(self);
-  if (desired_level > 0) {
-    if (require_interpreter) {
+  if (requested_level > InstrumentationLevel::kInstrumentNothing) {
+    if (requested_level == InstrumentationLevel::kInstrumentWithInterpreter) {
       interpreter_stubs_installed_ = true;
-    } else {
-      CHECK(require_entry_exit_stubs);
       entry_exit_stubs_installed_ = true;
+    } else {
+      CHECK_EQ(requested_level, InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+      entry_exit_stubs_installed_ = true;
+      interpreter_stubs_installed_ = false;
     }
     runtime->GetClassLinker()->VisitClasses(InstallStubsClassVisitor, this);
     instrumentation_stubs_installed_ = true;
@@ -590,8 +598,7 @@
   }
 }
 
-static void ResetQuickAllocEntryPointsForThread(Thread* thread, void* arg) {
-  UNUSED(arg);
+static void ResetQuickAllocEntryPointsForThread(Thread* thread, void* arg ATTRIBUTE_UNUSED) {
   thread->ResetQuickAllocEntryPointsForThread();
 }
 
@@ -804,11 +811,11 @@
   deoptimization_enabled_ = true;
 }
 
-void Instrumentation::DisableDeoptimization() {
+void Instrumentation::DisableDeoptimization(const char* key) {
   CHECK_EQ(deoptimization_enabled_, true);
   // If we deoptimized everything, undo it.
   if (interpreter_stubs_installed_) {
-    UndeoptimizeEverything();
+    UndeoptimizeEverything(key);
   }
   // Undeoptimized selected methods.
   while (true) {
@@ -828,25 +835,35 @@
 
 // Indicates if instrumentation should notify method enter/exit events to the listeners.
 bool Instrumentation::ShouldNotifyMethodEnterExitEvents() const {
+  if (!HasMethodEntryListeners() && !HasMethodExitListeners()) {
+    return false;
+  }
   return !deoptimization_enabled_ && !interpreter_stubs_installed_;
 }
 
-void Instrumentation::DeoptimizeEverything() {
-  CHECK(!interpreter_stubs_installed_);
-  ConfigureStubs(false, true);
+void Instrumentation::DeoptimizeEverything(const char* key) {
+  CHECK(deoptimization_enabled_);
+  ConfigureStubs(key, InstrumentationLevel::kInstrumentWithInterpreter);
 }
 
-void Instrumentation::UndeoptimizeEverything() {
+void Instrumentation::UndeoptimizeEverything(const char* key) {
   CHECK(interpreter_stubs_installed_);
-  ConfigureStubs(false, false);
+  CHECK(deoptimization_enabled_);
+  ConfigureStubs(key, InstrumentationLevel::kInstrumentNothing);
 }
 
-void Instrumentation::EnableMethodTracing(bool require_interpreter) {
-  ConfigureStubs(!require_interpreter, require_interpreter);
+void Instrumentation::EnableMethodTracing(const char* key, bool needs_interpreter) {
+  InstrumentationLevel level;
+  if (needs_interpreter) {
+    level = InstrumentationLevel::kInstrumentWithInterpreter;
+  } else {
+    level = InstrumentationLevel::kInstrumentWithInstrumentationStubs;
+  }
+  ConfigureStubs(key, level);
 }
 
-void Instrumentation::DisableMethodTracing() {
-  ConfigureStubs(false, false);
+void Instrumentation::DisableMethodTracing(const char* key) {
+  ConfigureStubs(key, InstrumentationLevel::kInstrumentNothing);
 }
 
 const void* Instrumentation::GetQuickCodeFor(mirror::ArtMethod* method, size_t pointer_size) const {
@@ -896,7 +913,7 @@
 void Instrumentation::MethodUnwindEvent(Thread* thread, mirror::Object* this_object,
                                         mirror::ArtMethod* method,
                                         uint32_t dex_pc) const {
-  if (have_method_unwind_listeners_) {
+  if (HasMethodUnwindListeners()) {
     for (InstrumentationListener* listener : method_unwind_listeners_) {
       listener->MethodUnwind(thread, this_object, method, dex_pc);
     }
@@ -906,11 +923,9 @@
 void Instrumentation::DexPcMovedEventImpl(Thread* thread, mirror::Object* this_object,
                                           mirror::ArtMethod* method,
                                           uint32_t dex_pc) const {
-  if (HasDexPcListeners()) {
-    std::shared_ptr<std::list<InstrumentationListener*>> original(dex_pc_listeners_);
-    for (InstrumentationListener* listener : *original.get()) {
-      listener->DexPcMoved(thread, this_object, method, dex_pc);
-    }
+  std::shared_ptr<std::list<InstrumentationListener*>> original(dex_pc_listeners_);
+  for (InstrumentationListener* listener : *original.get()) {
+    listener->DexPcMoved(thread, this_object, method, dex_pc);
   }
 }
 
@@ -924,22 +939,18 @@
 void Instrumentation::FieldReadEventImpl(Thread* thread, mirror::Object* this_object,
                                          mirror::ArtMethod* method, uint32_t dex_pc,
                                          ArtField* field) const {
-  if (HasFieldReadListeners()) {
-    std::shared_ptr<std::list<InstrumentationListener*>> original(field_read_listeners_);
-    for (InstrumentationListener* listener : *original.get()) {
-      listener->FieldRead(thread, this_object, method, dex_pc, field);
-    }
+  std::shared_ptr<std::list<InstrumentationListener*>> original(field_read_listeners_);
+  for (InstrumentationListener* listener : *original.get()) {
+    listener->FieldRead(thread, this_object, method, dex_pc, field);
   }
 }
 
 void Instrumentation::FieldWriteEventImpl(Thread* thread, mirror::Object* this_object,
                                          mirror::ArtMethod* method, uint32_t dex_pc,
                                          ArtField* field, const JValue& field_value) const {
-  if (HasFieldWriteListeners()) {
-    std::shared_ptr<std::list<InstrumentationListener*>> original(field_write_listeners_);
-    for (InstrumentationListener* listener : *original.get()) {
-      listener->FieldWritten(thread, this_object, method, dex_pc, field, field_value);
-    }
+  std::shared_ptr<std::list<InstrumentationListener*>> original(field_write_listeners_);
+  for (InstrumentationListener* listener : *original.get()) {
+    listener->FieldWritten(thread, this_object, method, dex_pc, field, field_value);
   }
 }
 
@@ -959,7 +970,7 @@
 static void CheckStackDepth(Thread* self, const InstrumentationStackFrame& instrumentation_frame,
                             int delta)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  size_t frame_id = StackVisitor::ComputeNumFrames(self) + delta;
+  size_t frame_id = StackVisitor::ComputeNumFrames(self, kInstrumentationStackWalk) + delta;
   if (frame_id != instrumentation_frame.frame_id_) {
     LOG(ERROR) << "Expected frame_id=" << frame_id << " but found "
         << instrumentation_frame.frame_id_;
@@ -972,7 +983,7 @@
                                                     mirror::ArtMethod* method,
                                                     uintptr_t lr, bool interpreter_entry) {
   // We have a callee-save frame meaning this value is guaranteed to never be 0.
-  size_t frame_id = StackVisitor::ComputeNumFrames(self);
+  size_t frame_id = StackVisitor::ComputeNumFrames(self, kInstrumentationStackWalk);
   std::deque<instrumentation::InstrumentationStackFrame>* stack = self->GetInstrumentationStack();
   if (kVerboseInstrumentation) {
     LOG(INFO) << "Entering " << PrettyMethod(method) << " from PC " << reinterpret_cast<void*>(lr);
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 8b7fcca..7d70d21 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -22,11 +22,10 @@
 #include <map>
 
 #include "arch/instruction_set.h"
-#include "atomic.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "gc_root.h"
-#include "object_callbacks.h"
+#include "safe_map.h"
 
 namespace art {
 namespace mirror {
@@ -67,8 +66,6 @@
                              uint32_t dex_pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
   // Call-back for when a method is exited.
-  // TODO: its likely passing the return value would be useful, however, we may need to get and
-  //       parse the shorty to determine what kind of register holds the result.
   virtual void MethodExited(Thread* thread, mirror::Object* this_object,
                             mirror::ArtMethod* method, uint32_t dex_pc,
                             const JValue& return_value)
@@ -119,6 +116,12 @@
     kBackwardBranch = 0x80,
   };
 
+  enum class InstrumentationLevel {
+    kInstrumentNothing,                   // execute without instrumentation
+    kInstrumentWithInstrumentationStubs,  // execute with instrumentation entry/exit stubs
+    kInstrumentWithInterpreter            // execute with interpreter
+  };
+
   Instrumentation();
 
   // Add a listener to be notified of the masked together sent of instrumentation events. This
@@ -138,7 +141,7 @@
   void EnableDeoptimization()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(deoptimized_methods_lock_);
-  void DisableDeoptimization()
+  void DisableDeoptimization(const char* key)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(deoptimized_methods_lock_);
   bool AreAllMethodsDeoptimized() const {
@@ -147,12 +150,12 @@
   bool ShouldNotifyMethodEnterExitEvents() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Executes everything with interpreter.
-  void DeoptimizeEverything()
+  void DeoptimizeEverything(const char* key)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::classlinker_classes_lock_);
 
   // Executes everything with compiled code (or interpreter if there is no code).
-  void UndeoptimizeEverything()
+  void UndeoptimizeEverything(const char* key)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::classlinker_classes_lock_);
 
@@ -170,18 +173,19 @@
       LOCKS_EXCLUDED(Locks::thread_list_lock_, deoptimized_methods_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Indicates whether the method has been deoptimized so it is executed with the interpreter.
   bool IsDeoptimized(mirror::ArtMethod* method)
       LOCKS_EXCLUDED(deoptimized_methods_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Enable method tracing by installing instrumentation entry/exit stubs.
-  void EnableMethodTracing(
-      bool require_interpreter = kDeoptimizeForAccurateMethodEntryExitListeners)
+  // Enable method tracing by installing instrumentation entry/exit stubs or interpreter.
+  void EnableMethodTracing(const char* key,
+                           bool needs_interpreter = kDeoptimizeForAccurateMethodEntryExitListeners)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::classlinker_classes_lock_);
 
-  // Disable method tracing by uninstalling instrumentation entry/exit stubs.
-  void DisableMethodTracing()
+  // Disable method tracing by uninstalling instrumentation entry/exit stubs or interpreter.
+  void DisableMethodTracing(const char* key)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::classlinker_classes_lock_);
 
@@ -236,6 +240,10 @@
     return have_method_exit_listeners_;
   }
 
+  bool HasMethodUnwindListeners() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return have_method_unwind_listeners_;
+  }
+
   bool HasDexPcListeners() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return have_dex_pc_listeners_;
   }
@@ -355,8 +363,14 @@
       LOCKS_EXCLUDED(deoptimized_methods_lock_);
 
  private:
+  InstrumentationLevel GetCurrentInstrumentationLevel() const;
+
   // Does the job of installing or removing instrumentation code within methods.
-  void ConfigureStubs(bool require_entry_exit_stubs, bool require_interpreter)
+  // In order to support multiple clients using instrumentation at the same time,
+  // the caller must pass a unique key (a string) identifying it so we remind which
+  // instrumentation level it needs. Therefore the current instrumentation level
+  // becomes the highest instrumentation level required by a client.
+  void ConfigureStubs(const char* key, InstrumentationLevel desired_instrumentation_level)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::classlinker_classes_lock_,
                      deoptimized_methods_lock_);
@@ -452,6 +466,11 @@
   // Do we have any backward branch listeners? Short-cut to avoid taking the instrumentation_lock_.
   bool have_backward_branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
 
+  // Contains the instrumentation level required by each client of the instrumentation identified
+  // by a string key.
+  typedef SafeMap<const char*, InstrumentationLevel> InstrumentationLevelTable;
+  InstrumentationLevelTable requested_instrumentation_levels_ GUARDED_BY(Locks::mutator_lock_);
+
   // The event listeners, written to with the mutator_lock_ exclusively held.
   std::list<InstrumentationListener*> method_entry_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> method_exit_listeners_ GUARDED_BY(Locks::mutator_lock_);
@@ -481,9 +500,12 @@
   size_t quick_alloc_entry_points_instrumentation_counter_
       GUARDED_BY(Locks::instrument_entrypoints_lock_);
 
+  friend class InstrumentationTest;  // For GetCurrentInstrumentationLevel and ConfigureStubs.
+
   DISALLOW_COPY_AND_ASSIGN(Instrumentation);
 };
 std::ostream& operator<<(std::ostream& os, const Instrumentation::InstrumentationEvent& rhs);
+std::ostream& operator<<(std::ostream& os, const Instrumentation::InstrumentationLevel& rhs);
 
 // An element in the instrumentation side stack maintained in art::Thread.
 struct InstrumentationStackFrame {
diff --git a/runtime/instrumentation_test.cc b/runtime/instrumentation_test.cc
new file mode 100644
index 0000000..5afacb8
--- /dev/null
+++ b/runtime/instrumentation_test.cc
@@ -0,0 +1,791 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instrumentation.h"
+
+#include "common_runtime_test.h"
+#include "common_throws.h"
+#include "class_linker-inl.h"
+#include "dex_file.h"
+#include "handle_scope-inl.h"
+#include "jvalue.h"
+#include "runtime.h"
+#include "scoped_thread_state_change.h"
+#include "thread_list.h"
+#include "thread-inl.h"
+
+namespace art {
+namespace instrumentation {
+
+class TestInstrumentationListener FINAL : public instrumentation::InstrumentationListener {
+ public:
+  TestInstrumentationListener()
+    : received_method_enter_event(false), received_method_exit_event(false),
+      received_method_unwind_event(false), received_dex_pc_moved_event(false),
+      received_field_read_event(false), received_field_written_event(false),
+      received_exception_caught_event(false), received_backward_branch_event(false) {}
+
+  virtual ~TestInstrumentationListener() {}
+
+  void MethodEntered(Thread* thread ATTRIBUTE_UNUSED,
+                     mirror::Object* this_object ATTRIBUTE_UNUSED,
+                     mirror::ArtMethod* method ATTRIBUTE_UNUSED,
+                     uint32_t dex_pc ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_method_enter_event = true;
+  }
+
+  void MethodExited(Thread* thread ATTRIBUTE_UNUSED,
+                    mirror::Object* this_object ATTRIBUTE_UNUSED,
+                    mirror::ArtMethod* method ATTRIBUTE_UNUSED,
+                    uint32_t dex_pc ATTRIBUTE_UNUSED,
+                    const JValue& return_value ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_method_exit_event = true;
+  }
+
+  void MethodUnwind(Thread* thread ATTRIBUTE_UNUSED,
+                    mirror::Object* this_object ATTRIBUTE_UNUSED,
+                    mirror::ArtMethod* method ATTRIBUTE_UNUSED,
+                    uint32_t dex_pc ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_method_unwind_event = true;
+  }
+
+  void DexPcMoved(Thread* thread ATTRIBUTE_UNUSED,
+                  mirror::Object* this_object ATTRIBUTE_UNUSED,
+                  mirror::ArtMethod* method ATTRIBUTE_UNUSED,
+                  uint32_t new_dex_pc ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_dex_pc_moved_event = true;
+  }
+
+  void FieldRead(Thread* thread ATTRIBUTE_UNUSED,
+                 mirror::Object* this_object ATTRIBUTE_UNUSED,
+                 mirror::ArtMethod* method ATTRIBUTE_UNUSED,
+                 uint32_t dex_pc ATTRIBUTE_UNUSED,
+                 ArtField* field ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_field_read_event = true;
+  }
+
+  void FieldWritten(Thread* thread ATTRIBUTE_UNUSED,
+                    mirror::Object* this_object ATTRIBUTE_UNUSED,
+                    mirror::ArtMethod* method ATTRIBUTE_UNUSED,
+                    uint32_t dex_pc ATTRIBUTE_UNUSED,
+                    ArtField* field ATTRIBUTE_UNUSED,
+                    const JValue& field_value ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_field_written_event = true;
+  }
+
+  void ExceptionCaught(Thread* thread ATTRIBUTE_UNUSED,
+                       mirror::Throwable* exception_object ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_exception_caught_event = true;
+  }
+
+  void BackwardBranch(Thread* thread ATTRIBUTE_UNUSED,
+                      mirror::ArtMethod* method ATTRIBUTE_UNUSED,
+                      int32_t dex_pc_offset ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_backward_branch_event = true;
+  }
+
+  void Reset() {
+    received_method_enter_event = false;
+    received_method_exit_event = false;
+    received_method_unwind_event = false;
+    received_dex_pc_moved_event = false;
+    received_field_read_event = false;
+    received_field_written_event = false;
+    received_exception_caught_event = false;
+    received_backward_branch_event = false;
+  }
+
+  bool received_method_enter_event;
+  bool received_method_exit_event;
+  bool received_method_unwind_event;
+  bool received_dex_pc_moved_event;
+  bool received_field_read_event;
+  bool received_field_written_event;
+  bool received_exception_caught_event;
+  bool received_backward_branch_event;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(TestInstrumentationListener);
+};
+
+class InstrumentationTest : public CommonRuntimeTest {
+ public:
+  // Unique keys used to test Instrumentation::ConfigureStubs.
+  static constexpr const char* kClientOneKey = "TestClient1";
+  static constexpr const char* kClientTwoKey = "TestClient2";
+
+  void CheckConfigureStubs(const char* key, Instrumentation::InstrumentationLevel level) {
+    ScopedObjectAccess soa(Thread::Current());
+    instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation();
+    {
+      soa.Self()->TransitionFromRunnableToSuspended(kSuspended);
+      Runtime* runtime = Runtime::Current();
+      runtime->GetThreadList()->SuspendAll("Instrumentation::ConfigureStubs");
+      instr->ConfigureStubs(key, level);
+      runtime->GetThreadList()->ResumeAll();
+      soa.Self()->TransitionFromSuspendedToRunnable();
+    }
+  }
+
+  Instrumentation::InstrumentationLevel GetCurrentInstrumentationLevel() {
+    return Runtime::Current()->GetInstrumentation()->GetCurrentInstrumentationLevel();
+  }
+
+  size_t GetInstrumentationUserCount() {
+    ScopedObjectAccess soa(Thread::Current());
+    return Runtime::Current()->GetInstrumentation()->requested_instrumentation_levels_.size();
+  }
+
+  void TestEvent(uint32_t instrumentation_event) {
+    ScopedObjectAccess soa(Thread::Current());
+    instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation();
+    TestInstrumentationListener listener;
+    {
+      soa.Self()->TransitionFromRunnableToSuspended(kSuspended);
+      Runtime* runtime = Runtime::Current();
+      runtime->GetThreadList()->SuspendAll("Add instrumentation listener");
+      instr->AddListener(&listener, instrumentation_event);
+      runtime->GetThreadList()->ResumeAll();
+      soa.Self()->TransitionFromSuspendedToRunnable();
+    }
+
+    mirror::ArtMethod* const event_method = nullptr;
+    mirror::Object* const event_obj = nullptr;
+    const uint32_t event_dex_pc = 0;
+
+    // Check the listener is registered and is notified of the event.
+    EXPECT_TRUE(HasEventListener(instr, instrumentation_event));
+    EXPECT_FALSE(DidListenerReceiveEvent(listener, instrumentation_event));
+    ReportEvent(instr, instrumentation_event, soa.Self(), event_method, event_obj, event_dex_pc);
+    EXPECT_TRUE(DidListenerReceiveEvent(listener, instrumentation_event));
+
+    listener.Reset();
+    {
+      soa.Self()->TransitionFromRunnableToSuspended(kSuspended);
+      Runtime* runtime = Runtime::Current();
+      runtime->GetThreadList()->SuspendAll("Remove instrumentation listener");
+      instr->RemoveListener(&listener, instrumentation_event);
+      runtime->GetThreadList()->ResumeAll();
+      soa.Self()->TransitionFromSuspendedToRunnable();
+    }
+
+    // Check the listener is not registered and is not notified of the event.
+    EXPECT_FALSE(HasEventListener(instr, instrumentation_event));
+    EXPECT_FALSE(DidListenerReceiveEvent(listener, instrumentation_event));
+    ReportEvent(instr, instrumentation_event, soa.Self(), event_method, event_obj, event_dex_pc);
+    EXPECT_FALSE(DidListenerReceiveEvent(listener, instrumentation_event));
+  }
+
+  void DeoptimizeMethod(Thread* self, Handle<mirror::ArtMethod> method,
+                        bool enable_deoptimization)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Runtime* runtime = Runtime::Current();
+    instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
+    self->TransitionFromRunnableToSuspended(kSuspended);
+    runtime->GetThreadList()->SuspendAll("Single method deoptimization");
+    if (enable_deoptimization) {
+      instrumentation->EnableDeoptimization();
+    }
+    instrumentation->Deoptimize(method.Get());
+    runtime->GetThreadList()->ResumeAll();
+    self->TransitionFromSuspendedToRunnable();
+  }
+
+  void UndeoptimizeMethod(Thread* self, Handle<mirror::ArtMethod> method,
+                          const char* key, bool disable_deoptimization)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Runtime* runtime = Runtime::Current();
+    instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
+    self->TransitionFromRunnableToSuspended(kSuspended);
+    runtime->GetThreadList()->SuspendAll("Single method undeoptimization");
+    instrumentation->Undeoptimize(method.Get());
+    if (disable_deoptimization) {
+      instrumentation->DisableDeoptimization(key);
+    }
+    runtime->GetThreadList()->ResumeAll();
+    self->TransitionFromSuspendedToRunnable();
+  }
+
+  void DeoptimizeEverything(Thread* self, const char* key, bool enable_deoptimization)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Runtime* runtime = Runtime::Current();
+    instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
+    self->TransitionFromRunnableToSuspended(kSuspended);
+    runtime->GetThreadList()->SuspendAll("Full deoptimization");
+    if (enable_deoptimization) {
+      instrumentation->EnableDeoptimization();
+    }
+    instrumentation->DeoptimizeEverything(key);
+    runtime->GetThreadList()->ResumeAll();
+    self->TransitionFromSuspendedToRunnable();
+  }
+
+  void UndeoptimizeEverything(Thread* self, const char* key, bool disable_deoptimization)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Runtime* runtime = Runtime::Current();
+    instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
+    self->TransitionFromRunnableToSuspended(kSuspended);
+    runtime->GetThreadList()->SuspendAll("Full undeoptimization");
+    instrumentation->UndeoptimizeEverything(key);
+    if (disable_deoptimization) {
+      instrumentation->DisableDeoptimization(key);
+    }
+    runtime->GetThreadList()->ResumeAll();
+    self->TransitionFromSuspendedToRunnable();
+  }
+
+  void EnableMethodTracing(Thread* self, const char* key, bool needs_interpreter)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Runtime* runtime = Runtime::Current();
+    instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
+    self->TransitionFromRunnableToSuspended(kSuspended);
+    runtime->GetThreadList()->SuspendAll("EnableMethodTracing");
+    instrumentation->EnableMethodTracing(key, needs_interpreter);
+    runtime->GetThreadList()->ResumeAll();
+    self->TransitionFromSuspendedToRunnable();
+  }
+
+  void DisableMethodTracing(Thread* self, const char* key)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Runtime* runtime = Runtime::Current();
+    instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
+    self->TransitionFromRunnableToSuspended(kSuspended);
+    runtime->GetThreadList()->SuspendAll("EnableMethodTracing");
+    instrumentation->DisableMethodTracing(key);
+    runtime->GetThreadList()->ResumeAll();
+    self->TransitionFromSuspendedToRunnable();
+  }
+
+ private:
+  static bool HasEventListener(const instrumentation::Instrumentation* instr, uint32_t event_type)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    switch (event_type) {
+      case instrumentation::Instrumentation::kMethodEntered:
+        return instr->HasMethodEntryListeners();
+      case instrumentation::Instrumentation::kMethodExited:
+        return instr->HasMethodExitListeners();
+      case instrumentation::Instrumentation::kMethodUnwind:
+        return instr->HasMethodUnwindListeners();
+      case instrumentation::Instrumentation::kDexPcMoved:
+        return instr->HasDexPcListeners();
+      case instrumentation::Instrumentation::kFieldRead:
+        return instr->HasFieldReadListeners();
+      case instrumentation::Instrumentation::kFieldWritten:
+        return instr->HasFieldWriteListeners();
+      case instrumentation::Instrumentation::kExceptionCaught:
+        return instr->HasExceptionCaughtListeners();
+      case instrumentation::Instrumentation::kBackwardBranch:
+        return instr->HasBackwardBranchListeners();
+      default:
+        LOG(FATAL) << "Unknown instrumentation event " << event_type;
+        UNREACHABLE();
+    }
+  }
+
+  static void ReportEvent(const instrumentation::Instrumentation* instr, uint32_t event_type,
+                          Thread* self, mirror::ArtMethod* method, mirror::Object* obj,
+                          uint32_t dex_pc)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    switch (event_type) {
+      case instrumentation::Instrumentation::kMethodEntered:
+        instr->MethodEnterEvent(self, obj, method, dex_pc);
+        break;
+      case instrumentation::Instrumentation::kMethodExited: {
+        JValue value;
+        instr->MethodExitEvent(self, obj, method, dex_pc, value);
+        break;
+      }
+      case instrumentation::Instrumentation::kMethodUnwind:
+        instr->MethodUnwindEvent(self, obj, method, dex_pc);
+        break;
+      case instrumentation::Instrumentation::kDexPcMoved:
+        instr->DexPcMovedEvent(self, obj, method, dex_pc);
+        break;
+      case instrumentation::Instrumentation::kFieldRead:
+        instr->FieldReadEvent(self, obj, method, dex_pc, nullptr);
+        break;
+      case instrumentation::Instrumentation::kFieldWritten: {
+        JValue value;
+        instr->FieldWriteEvent(self, obj, method, dex_pc, nullptr, value);
+        break;
+      }
+      case instrumentation::Instrumentation::kExceptionCaught: {
+        ThrowArithmeticExceptionDivideByZero();
+        mirror::Throwable* event_exception = self->GetException();
+        instr->ExceptionCaughtEvent(self, event_exception);
+        self->ClearException();
+        break;
+      }
+      case instrumentation::Instrumentation::kBackwardBranch:
+        instr->BackwardBranch(self, method, dex_pc);
+        break;
+      default:
+        LOG(FATAL) << "Unknown instrumentation event " << event_type;
+        UNREACHABLE();
+    }
+  }
+
+  static bool DidListenerReceiveEvent(const TestInstrumentationListener& listener,
+                                      uint32_t event_type) {
+    switch (event_type) {
+      case instrumentation::Instrumentation::kMethodEntered:
+        return listener.received_method_enter_event;
+      case instrumentation::Instrumentation::kMethodExited:
+        return listener.received_method_exit_event;
+      case instrumentation::Instrumentation::kMethodUnwind:
+        return listener.received_method_unwind_event;
+      case instrumentation::Instrumentation::kDexPcMoved:
+        return listener.received_dex_pc_moved_event;
+      case instrumentation::Instrumentation::kFieldRead:
+        return listener.received_field_read_event;
+      case instrumentation::Instrumentation::kFieldWritten:
+        return listener.received_field_written_event;
+      case instrumentation::Instrumentation::kExceptionCaught:
+        return listener.received_exception_caught_event;
+      case instrumentation::Instrumentation::kBackwardBranch:
+        return listener.received_backward_branch_event;
+      default:
+        LOG(FATAL) << "Unknown instrumentation event " << event_type;
+        UNREACHABLE();
+    }
+  }
+};
+
+TEST_F(InstrumentationTest, NoInstrumentation) {
+  ScopedObjectAccess soa(Thread::Current());
+  instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation();
+  ASSERT_NE(instr, nullptr);
+
+  EXPECT_FALSE(instr->AreExitStubsInstalled());
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_FALSE(instr->IsActive());
+  EXPECT_FALSE(instr->ShouldNotifyMethodEnterExitEvents());
+
+  // Test interpreter table is the default one.
+  EXPECT_EQ(instrumentation::kMainHandlerTable, instr->GetInterpreterHandlerTable());
+
+  // Check there is no registered listener.
+  EXPECT_FALSE(instr->HasDexPcListeners());
+  EXPECT_FALSE(instr->HasExceptionCaughtListeners());
+  EXPECT_FALSE(instr->HasFieldReadListeners());
+  EXPECT_FALSE(instr->HasFieldWriteListeners());
+  EXPECT_FALSE(instr->HasMethodEntryListeners());
+  EXPECT_FALSE(instr->HasMethodExitListeners());
+  EXPECT_FALSE(instr->IsActive());
+}
+
+// Test instrumentation listeners for each event.
+TEST_F(InstrumentationTest, MethodEntryEvent) {
+  TestEvent(instrumentation::Instrumentation::kMethodEntered);
+}
+
+TEST_F(InstrumentationTest, MethodExitEvent) {
+  TestEvent(instrumentation::Instrumentation::kMethodExited);
+}
+
+TEST_F(InstrumentationTest, MethodUnwindEvent) {
+  TestEvent(instrumentation::Instrumentation::kMethodUnwind);
+}
+
+TEST_F(InstrumentationTest, DexPcMovedEvent) {
+  TestEvent(instrumentation::Instrumentation::kDexPcMoved);
+}
+
+TEST_F(InstrumentationTest, FieldReadEvent) {
+  TestEvent(instrumentation::Instrumentation::kFieldRead);
+}
+
+TEST_F(InstrumentationTest, FieldWriteEvent) {
+  TestEvent(instrumentation::Instrumentation::kFieldWritten);
+}
+
+TEST_F(InstrumentationTest, ExceptionCaughtEvent) {
+  TestEvent(instrumentation::Instrumentation::kExceptionCaught);
+}
+
+TEST_F(InstrumentationTest, BackwardBranchEvent) {
+  TestEvent(instrumentation::Instrumentation::kBackwardBranch);
+}
+
+TEST_F(InstrumentationTest, DeoptimizeDirectMethod) {
+  ScopedObjectAccess soa(Thread::Current());
+  jobject class_loader = LoadDex("Instrumentation");
+  Runtime* const runtime = Runtime::Current();
+  instrumentation::Instrumentation* instr = runtime->GetInstrumentation();
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::ClassLoader> loader(hs.NewHandle(soa.Decode<mirror::ClassLoader*>(class_loader)));
+  mirror::Class* klass = class_linker->FindClass(soa.Self(), "LInstrumentation;", loader);
+  ASSERT_TRUE(klass != nullptr);
+  Handle<mirror::ArtMethod> method_to_deoptimize(
+      hs.NewHandle(klass->FindDeclaredDirectMethod("instanceMethod", "()V")));
+  ASSERT_TRUE(method_to_deoptimize.Get() != nullptr);
+
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_FALSE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+
+  DeoptimizeMethod(soa.Self(), method_to_deoptimize, true);
+
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_TRUE(instr->AreExitStubsInstalled());
+  EXPECT_TRUE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+
+  constexpr const char* instrumentation_key = "DeoptimizeDirectMethod";
+  UndeoptimizeMethod(soa.Self(), method_to_deoptimize, instrumentation_key, true);
+
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_FALSE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+}
+
+TEST_F(InstrumentationTest, FullDeoptimization) {
+  ScopedObjectAccess soa(Thread::Current());
+  Runtime* const runtime = Runtime::Current();
+  instrumentation::Instrumentation* instr = runtime->GetInstrumentation();
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+
+  constexpr const char* instrumentation_key = "FullDeoptimization";
+  DeoptimizeEverything(soa.Self(), instrumentation_key, true);
+
+  EXPECT_TRUE(instr->AreAllMethodsDeoptimized());
+  EXPECT_TRUE(instr->AreExitStubsInstalled());
+
+  UndeoptimizeEverything(soa.Self(), instrumentation_key, true);
+
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+}
+
+TEST_F(InstrumentationTest, MixedDeoptimization) {
+  ScopedObjectAccess soa(Thread::Current());
+  jobject class_loader = LoadDex("Instrumentation");
+  Runtime* const runtime = Runtime::Current();
+  instrumentation::Instrumentation* instr = runtime->GetInstrumentation();
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::ClassLoader> loader(hs.NewHandle(soa.Decode<mirror::ClassLoader*>(class_loader)));
+  mirror::Class* klass = class_linker->FindClass(soa.Self(), "LInstrumentation;", loader);
+  ASSERT_TRUE(klass != nullptr);
+  Handle<mirror::ArtMethod> method_to_deoptimize(
+      hs.NewHandle(klass->FindDeclaredDirectMethod("instanceMethod", "()V")));
+  ASSERT_TRUE(method_to_deoptimize.Get() != nullptr);
+
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_FALSE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+
+  DeoptimizeMethod(soa.Self(), method_to_deoptimize, true);
+  // Deoptimizing a method does not change instrumentation level.
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentNothing,
+            GetCurrentInstrumentationLevel());
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_TRUE(instr->AreExitStubsInstalled());
+  EXPECT_TRUE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+
+  constexpr const char* instrumentation_key = "MixedDeoptimization";
+  DeoptimizeEverything(soa.Self(), instrumentation_key, false);
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter,
+            GetCurrentInstrumentationLevel());
+  EXPECT_TRUE(instr->AreAllMethodsDeoptimized());
+  EXPECT_TRUE(instr->AreExitStubsInstalled());
+  EXPECT_TRUE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+
+  UndeoptimizeEverything(soa.Self(), instrumentation_key, false);
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentNothing,
+            GetCurrentInstrumentationLevel());
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_TRUE(instr->AreExitStubsInstalled());
+  EXPECT_TRUE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+
+  UndeoptimizeMethod(soa.Self(), method_to_deoptimize, instrumentation_key, true);
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentNothing,
+            GetCurrentInstrumentationLevel());
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_FALSE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+}
+
+TEST_F(InstrumentationTest, MethodTracing_Interpreter) {
+  ScopedObjectAccess soa(Thread::Current());
+  Runtime* const runtime = Runtime::Current();
+  instrumentation::Instrumentation* instr = runtime->GetInstrumentation();
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+
+  constexpr const char* instrumentation_key = "MethodTracing";
+  EnableMethodTracing(soa.Self(), instrumentation_key, true);
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter,
+            GetCurrentInstrumentationLevel());
+  EXPECT_TRUE(instr->AreAllMethodsDeoptimized());
+  EXPECT_TRUE(instr->AreExitStubsInstalled());
+
+  DisableMethodTracing(soa.Self(), instrumentation_key);
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentNothing,
+            GetCurrentInstrumentationLevel());
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+}
+
+TEST_F(InstrumentationTest, MethodTracing_InstrumentationEntryExitStubs) {
+  ScopedObjectAccess soa(Thread::Current());
+  Runtime* const runtime = Runtime::Current();
+  instrumentation::Instrumentation* instr = runtime->GetInstrumentation();
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+
+  constexpr const char* instrumentation_key = "MethodTracing";
+  EnableMethodTracing(soa.Self(), instrumentation_key, false);
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+            GetCurrentInstrumentationLevel());
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_TRUE(instr->AreExitStubsInstalled());
+
+  DisableMethodTracing(soa.Self(), instrumentation_key);
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentNothing,
+            GetCurrentInstrumentationLevel());
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+}
+
+// We use a macro to print the line number where the test is failing.
+#define CHECK_INSTRUMENTATION(_level, _user_count)                                      \
+  do {                                                                                  \
+    Instrumentation* const instr = Runtime::Current()->GetInstrumentation();            \
+    bool interpreter =                                                                  \
+      (_level == Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);    \
+    EXPECT_EQ(_level, GetCurrentInstrumentationLevel());                                \
+    EXPECT_EQ(_user_count, GetInstrumentationUserCount());                              \
+    if (instr->IsForcedInterpretOnly()) {                                               \
+      EXPECT_TRUE(instr->InterpretOnly());                                              \
+    } else if (interpreter) {                                                           \
+      EXPECT_TRUE(instr->InterpretOnly());                                              \
+    } else {                                                                            \
+      EXPECT_FALSE(instr->InterpretOnly());                                             \
+    }                                                                                   \
+    if (interpreter) {                                                                  \
+      EXPECT_TRUE(instr->AreAllMethodsDeoptimized());                                   \
+    } else {                                                                            \
+      EXPECT_FALSE(instr->AreAllMethodsDeoptimized());                                  \
+    }                                                                                   \
+  } while (false)
+
+TEST_F(InstrumentationTest, ConfigureStubs_Nothing) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Check no-op.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, ConfigureStubs_InstrumentationStubs) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Check we can switch to instrumentation stubs
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // Check we can disable instrumentation.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, ConfigureStubs_Interpreter) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Check we can switch to interpreter
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // Check we can disable instrumentation.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, ConfigureStubs_InstrumentationStubsToInterpreter) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Configure stubs with instrumentation stubs.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // Configure stubs with interpreter.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // Check we can disable instrumentation.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, ConfigureStubs_InterpreterToInstrumentationStubs) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Configure stubs with interpreter.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // Configure stubs with instrumentation stubs.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // Check we can disable instrumentation.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest,
+       ConfigureStubs_InstrumentationStubsToInterpreterToInstrumentationStubs) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Configure stubs with instrumentation stubs.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // Configure stubs with interpreter.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // Configure stubs with instrumentation stubs again.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // Check we can disable instrumentation.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, MultiConfigureStubs_Nothing) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Check kInstrumentNothing with two clients.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  CheckConfigureStubs(kClientTwoKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, MultiConfigureStubs_InstrumentationStubs) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Configure stubs with instrumentation stubs for 1st client.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // Configure stubs with instrumentation stubs for 2nd client.
+  CheckConfigureStubs(kClientTwoKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        2U);
+
+  // 1st client requests instrumentation deactivation but 2nd client still needs
+  // instrumentation stubs.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // 2nd client requests instrumentation deactivation
+  CheckConfigureStubs(kClientTwoKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, MultiConfigureStubs_Interpreter) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Configure stubs with interpreter for 1st client.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // Configure stubs with interpreter for 2nd client.
+  CheckConfigureStubs(kClientTwoKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 2U);
+
+  // 1st client requests instrumentation deactivation but 2nd client still needs interpreter.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // 2nd client requests instrumentation deactivation
+  CheckConfigureStubs(kClientTwoKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, MultiConfigureStubs_InstrumentationStubsThenInterpreter) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Configure stubs with instrumentation stubs for 1st client.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // Configure stubs with interpreter for 2nd client.
+  CheckConfigureStubs(kClientTwoKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 2U);
+
+  // 1st client requests instrumentation deactivation but 2nd client still needs interpreter.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // 2nd client requests instrumentation deactivation
+  CheckConfigureStubs(kClientTwoKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, MultiConfigureStubs_InterpreterThenInstrumentationStubs) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Configure stubs with interpreter for 1st client.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // Configure stubs with instrumentation stubs for 2nd client.
+  CheckConfigureStubs(kClientTwoKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 2U);
+
+  // 1st client requests instrumentation deactivation but 2nd client still needs
+  // instrumentation stubs.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // 2nd client requests instrumentation deactivation
+  CheckConfigureStubs(kClientTwoKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+}  // namespace instrumentation
+}  // namespace art
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 423b952..a37aee5 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -423,7 +423,7 @@
     }
     ShadowFrame* old_frame = shadow_frame;
     shadow_frame = shadow_frame->GetLink();
-    delete old_frame;
+    ShadowFrame::DeleteDeoptimizedFrame(old_frame);
   }
   ret_val->SetJ(value.GetJ());
 }
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 4765ebc..59d3008 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -21,6 +21,7 @@
 #include "debugger.h"
 #include "mirror/array-inl.h"
 #include "unstarted_runtime.h"
+#include "verifier/method_verifier.h"
 
 namespace art {
 namespace interpreter {
@@ -485,16 +486,29 @@
 template<bool is_range, bool do_assignability_check>
 bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
             const Instruction* inst, uint16_t inst_data, JValue* result) {
+  bool string_init = false;
+  // Replace calls to String.<init> with equivalent StringFactory call.
+  if (called_method->GetDeclaringClass()->IsStringClass() && called_method->IsConstructor()) {
+    ScopedObjectAccessUnchecked soa(self);
+    jmethodID mid = soa.EncodeMethod(called_method);
+    called_method = soa.DecodeMethod(WellKnownClasses::StringInitToStringFactoryMethodID(mid));
+    string_init = true;
+  }
+
   // Compute method information.
   const DexFile::CodeItem* code_item = called_method->GetCodeItem();
   const uint16_t num_ins = (is_range) ? inst->VRegA_3rc(inst_data) : inst->VRegA_35c(inst_data);
   uint16_t num_regs;
   if (LIKELY(code_item != nullptr)) {
     num_regs = code_item->registers_size_;
-    DCHECK_EQ(num_ins, code_item->ins_size_);
+    DCHECK_EQ(string_init ? num_ins - 1 : num_ins, code_item->ins_size_);
   } else {
     DCHECK(called_method->IsNative() || called_method->IsProxyMethod());
     num_regs = num_ins;
+    if (string_init) {
+      // The new StringFactory call is static and has one fewer argument.
+      num_regs--;
+    }
   }
 
   // Allocate shadow frame on the stack.
@@ -504,7 +518,7 @@
                                                     memory));
 
   // Initialize new shadow frame.
-  const size_t first_dest_reg = num_regs - num_ins;
+  size_t first_dest_reg = num_regs - num_ins;
   if (do_assignability_check) {
     // Slow path.
     // We might need to do class loading, which incurs a thread state change to kNative. So
@@ -536,6 +550,10 @@
       new_shadow_frame->SetVRegReference(dest_reg, shadow_frame.GetVRegReference(receiver_reg));
       ++dest_reg;
       ++arg_offset;
+    } else if (string_init) {
+      // Skip the referrer for the new static StringFactory call.
+      ++dest_reg;
+      ++arg_offset;
     }
     for (uint32_t shorty_pos = 0; dest_reg < num_regs; ++shorty_pos, ++dest_reg, ++arg_offset) {
       DCHECK_LT(shorty_pos + 1, shorty_len);
@@ -583,7 +601,12 @@
   } else {
     // Fast path: no extra checks.
     if (is_range) {
-      const uint16_t first_src_reg = inst->VRegC_3rc();
+      uint16_t first_src_reg = inst->VRegC_3rc();
+      if (string_init) {
+        // Skip the referrer for the new static StringFactory call.
+        ++first_src_reg;
+        ++first_dest_reg;
+      }
       for (size_t src_reg = first_src_reg, dest_reg = first_dest_reg; dest_reg < num_regs;
           ++dest_reg, ++src_reg) {
         AssignRegister(new_shadow_frame, shadow_frame, dest_reg, src_reg);
@@ -592,12 +615,19 @@
       DCHECK_LE(num_ins, 5U);
       uint16_t regList = inst->Fetch16(2);
       uint16_t count = num_ins;
+      size_t arg_index = 0;
       if (count == 5) {
         AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + 4U,
                        (inst_data >> 8) & 0x0f);
         --count;
-       }
-      for (size_t arg_index = 0; arg_index < count; ++arg_index, regList >>= 4) {
+      }
+      if (string_init) {
+        // Skip the referrer for the new static StringFactory call.
+        regList >>= 4;
+        ++first_dest_reg;
+        --count;
+      }
+      for (; arg_index < count; ++arg_index, regList >>= 4) {
         AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + arg_index, regList & 0x0f);
       }
     }
@@ -631,6 +661,38 @@
   } else {
     UnstartedRuntimeInvoke(self, code_item, new_shadow_frame, result, first_dest_reg);
   }
+
+  if (string_init && !self->IsExceptionPending()) {
+    // Set the new string result of the StringFactory.
+    uint32_t vregC = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
+    shadow_frame.SetVRegReference(vregC, result->GetL());
+    // Overwrite all potential copies of the original result of the new-instance of string with the
+    // new result of the StringFactory. Use the verifier to find this set of registers.
+    mirror::ArtMethod* method = shadow_frame.GetMethod();
+    MethodReference method_ref = method->ToMethodReference();
+    SafeMap<uint32_t, std::set<uint32_t>> string_init_map;
+    SafeMap<uint32_t, std::set<uint32_t>>* string_init_map_ptr;
+    MethodRefToStringInitRegMap& method_to_string_init_map = Runtime::Current()->GetStringInitMap();
+    auto it = method_to_string_init_map.find(method_ref);
+    if (it == method_to_string_init_map.end()) {
+      string_init_map = std::move(verifier::MethodVerifier::FindStringInitMap(method));
+      method_to_string_init_map.Overwrite(method_ref, string_init_map);
+      string_init_map_ptr = &string_init_map;
+    } else {
+      string_init_map_ptr = &it->second;
+    }
+    if (string_init_map_ptr->size() != 0) {
+      uint32_t dex_pc = shadow_frame.GetDexPC();
+      auto map_it = string_init_map_ptr->find(dex_pc);
+      if (map_it != string_init_map_ptr->end()) {
+        const std::set<uint32_t>& reg_set = map_it->second;
+        for (auto set_it = reg_set.begin(); set_it != reg_set.end(); ++set_it) {
+          shadow_frame.SetVRegReference(*set_it, result->GetL());
+        }
+      }
+    }
+  }
+
   return !self->IsExceptionPending();
 }
 
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index dbedc16..6acc72e 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -369,7 +369,7 @@
       oss << StringPrintf(" vreg%u=0x%08X", i, raw_value);
       if (ref_value != nullptr) {
         if (ref_value->GetClass()->IsStringClass() &&
-            ref_value->AsString()->GetCharArray() != nullptr) {
+            ref_value->AsString()->GetValue() != nullptr) {
           oss << "/java.lang.String \"" << ref_value->AsString()->ToModifiedUtf8() << "\"";
         } else {
           oss << "/" << PrettyTypeOf(ref_value);
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index dc0b687..dd1f55e 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -156,7 +156,6 @@
   const Instruction* inst = Instruction::At(code_item->insns_ + dex_pc);
   uint16_t inst_data;
   const void* const* currentHandlersTable;
-  bool notified_method_entry_event = false;
   UPDATE_HANDLER_TABLE();
   if (LIKELY(dex_pc == 0)) {  // We are entering the method as opposed to deoptimizing.
     if (kIsDebugBuild) {
@@ -166,7 +165,6 @@
     if (UNLIKELY(instrumentation->HasMethodEntryListeners())) {
       instrumentation->MethodEnterEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                         shadow_frame.GetMethod(), 0);
-      notified_method_entry_event = true;
     }
   }
 
@@ -264,9 +262,6 @@
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
-    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -281,9 +276,6 @@
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
-    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -299,9 +291,6 @@
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
-    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -316,9 +305,6 @@
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
-    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -352,9 +338,6 @@
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
-    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -526,10 +509,20 @@
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(NEW_INSTANCE) {
-    Runtime* runtime = Runtime::Current();
-    Object* obj = AllocObjectFromCode<do_access_check, true>(
-        inst->VRegB_21c(), shadow_frame.GetMethod(), self,
-        runtime->GetHeap()->GetCurrentAllocator());
+    Object* obj = nullptr;
+    Class* c = ResolveVerifyAndClinit(inst->VRegB_21c(), shadow_frame.GetMethod(),
+                                      self, false, do_access_check);
+    if (LIKELY(c != nullptr)) {
+      if (UNLIKELY(c->IsStringClass())) {
+        gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+        mirror::SetStringCountVisitor visitor(0);
+        obj = String::Alloc<true>(self, 0, allocator_type, visitor);
+      } else {
+        obj = AllocObjectFromCode<do_access_check, true>(
+            inst->VRegB_21c(), shadow_frame.GetMethod(), self,
+            Runtime::Current()->GetHeap()->GetCurrentAllocator());
+      }
+    }
     if (UNLIKELY(obj == nullptr)) {
       HANDLE_PENDING_EXCEPTION();
     } else {
@@ -2500,26 +2493,16 @@
 // Note: we do not use the kReturn instruction flag here (to test the instruction is a return). The
 // compiler seems to not evaluate "(Instruction::FlagsOf(Instruction::code) & kReturn) != 0" to
 // a constant condition that would remove the "if" statement so the test is free.
-#define INSTRUMENTATION_INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v)                            \
-  alt_op_##code: {                                                                                \
-    if (Instruction::code != Instruction::RETURN_VOID &&                                          \
-        Instruction::code != Instruction::RETURN_VOID_NO_BARRIER &&                               \
-        Instruction::code != Instruction::RETURN &&                                               \
-        Instruction::code != Instruction::RETURN_WIDE &&                                          \
-        Instruction::code != Instruction::RETURN_OBJECT) {                                        \
-      if (LIKELY(!notified_method_entry_event)) {                                                 \
-        Runtime* runtime = Runtime::Current();                                                    \
-        const instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();  \
-        if (UNLIKELY(instrumentation->HasDexPcListeners())) {                                     \
-          Object* this_object = shadow_frame.GetThisObject(code_item->ins_size_);                 \
-          instrumentation->DexPcMovedEvent(self, this_object, shadow_frame.GetMethod(), dex_pc);  \
-        }                                                                                         \
-      } else {                                                                                    \
-        notified_method_entry_event = false;                                                      \
-      }                                                                                           \
-    }                                                                                             \
-    UPDATE_HANDLER_TABLE();                                                                       \
-    goto *handlersTable[instrumentation::kMainHandlerTable][Instruction::code];                   \
+#define INSTRUMENTATION_INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v)                        \
+  alt_op_##code: {                                                                            \
+    Runtime* const runtime = Runtime::Current();                                              \
+    const instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();  \
+    if (UNLIKELY(instrumentation->HasDexPcListeners())) {                                     \
+      Object* this_object = shadow_frame.GetThisObject(code_item->ins_size_);                 \
+      instrumentation->DexPcMovedEvent(self, this_object, shadow_frame.GetMethod(), dex_pc);  \
+    }                                                                                         \
+    UPDATE_HANDLER_TABLE();                                                                   \
+    goto *handlersTable[instrumentation::kMainHandlerTable][Instruction::code];               \
   }
 #include "dex_instruction_list.h"
       DEX_INSTRUCTION_LIST(INSTRUMENTATION_INSTRUCTION_HANDLER)
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 82f0009..0e3420f 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -47,10 +47,7 @@
 // Code to run before each dex instruction.
 #define PREAMBLE()                                                                              \
   do {                                                                                          \
-    DCHECK(!inst->IsReturn());                                                                  \
-    if (UNLIKELY(notified_method_entry_event)) {                                                \
-      notified_method_entry_event = false;                                                      \
-    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {                                \
+    if (UNLIKELY(instrumentation->HasDexPcListeners())) {                                       \
       instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),  \
                                        shadow_frame.GetMethod(), dex_pc);                       \
     }                                                                                           \
@@ -67,7 +64,6 @@
   self->VerifyStack();
 
   uint32_t dex_pc = shadow_frame.GetDexPC();
-  bool notified_method_entry_event = false;
   const auto* const instrumentation = Runtime::Current()->GetInstrumentation();
   if (LIKELY(dex_pc == 0)) {  // We are entering the method as opposed to deoptimizing.
     if (kIsDebugBuild) {
@@ -76,7 +72,6 @@
     if (UNLIKELY(instrumentation->HasMethodEntryListeners())) {
       instrumentation->MethodEnterEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                         shadow_frame.GetMethod(), 0);
-      notified_method_entry_event = true;
     }
   }
   const uint16_t* const insns = code_item->insns_;
@@ -171,19 +166,18 @@
         break;
       }
       case Instruction::RETURN_VOID_NO_BARRIER: {
+        PREAMBLE();
         JValue result;
         self->AllowThreadSuspension();
         if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
-        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
       case Instruction::RETURN_VOID: {
+        PREAMBLE();
         QuasiAtomic::ThreadFenceForConstructor();
         JValue result;
         self->AllowThreadSuspension();
@@ -191,13 +185,11 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
-        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
       case Instruction::RETURN: {
+        PREAMBLE();
         JValue result;
         result.SetJ(0);
         result.SetI(shadow_frame.GetVReg(inst->VRegA_11x(inst_data)));
@@ -206,13 +198,11 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
-        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
       case Instruction::RETURN_WIDE: {
+        PREAMBLE();
         JValue result;
         result.SetJ(shadow_frame.GetVRegLong(inst->VRegA_11x(inst_data)));
         self->AllowThreadSuspension();
@@ -220,13 +210,11 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
-        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
       case Instruction::RETURN_OBJECT: {
+        PREAMBLE();
         JValue result;
         self->AllowThreadSuspension();
         const size_t ref_idx = inst->VRegA_11x(inst_data);
@@ -254,9 +242,6 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
-        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
@@ -428,10 +413,20 @@
       }
       case Instruction::NEW_INSTANCE: {
         PREAMBLE();
-        Runtime* runtime = Runtime::Current();
-        Object* obj = AllocObjectFromCode<do_access_check, true>(
-            inst->VRegB_21c(), shadow_frame.GetMethod(), self,
-            runtime->GetHeap()->GetCurrentAllocator());
+        Object* obj = nullptr;
+        Class* c = ResolveVerifyAndClinit(inst->VRegB_21c(), shadow_frame.GetMethod(),
+                                          self, false, do_access_check);
+        if (LIKELY(c != nullptr)) {
+          if (UNLIKELY(c->IsStringClass())) {
+            gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+            mirror::SetStringCountVisitor visitor(0);
+            obj = String::Alloc<true>(self, 0, allocator_type, visitor);
+          } else {
+            obj = AllocObjectFromCode<do_access_check, true>(
+              inst->VRegB_21c(), shadow_frame.GetMethod(), self,
+              Runtime::Current()->GetHeap()->GetCurrentAllocator());
+          }
+        }
         if (UNLIKELY(obj == nullptr)) {
           HANDLE_PENDING_EXCEPTION();
         } else {
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index f30c93a..317106b 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -755,6 +755,114 @@
   result->SetL(h_obj.Get());
 }
 
+// This allows reading the new style of String objects during compilation.
+static void UnstartedStringGetCharsNoCheck(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result ATTRIBUTE_UNUSED, size_t arg_offset)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  jint start = shadow_frame->GetVReg(arg_offset + 1);
+  jint end = shadow_frame->GetVReg(arg_offset + 2);
+  jint index = shadow_frame->GetVReg(arg_offset + 4);
+  mirror::String* string = shadow_frame->GetVRegReference(arg_offset)->AsString();
+  if (string == nullptr) {
+    AbortTransactionOrFail(self, "String.getCharsNoCheck with null object");
+    return;
+  }
+  DCHECK_GE(start, 0);
+  DCHECK_GE(end, string->GetLength());
+  StackHandleScope<1> hs(self);
+  Handle<mirror::CharArray> h_char_array(hs.NewHandle(shadow_frame->GetVRegReference(arg_offset + 3)->AsCharArray()));
+  DCHECK_LE(index, h_char_array->GetLength());
+  DCHECK_LE(end - start, h_char_array->GetLength() - index);
+  string->GetChars(start, end, h_char_array, index);
+}
+
+// This allows reading chars from the new style of String objects during compilation.
+static void UnstartedStringCharAt(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  jint index = shadow_frame->GetVReg(arg_offset + 1);
+  mirror::String* string = shadow_frame->GetVRegReference(arg_offset)->AsString();
+  if (string == nullptr) {
+    AbortTransactionOrFail(self, "String.charAt with null object");
+    return;
+  }
+  result->SetC(string->CharAt(index));
+}
+
+// This allows setting chars from the new style of String objects during compilation.
+static void UnstartedStringSetCharAt(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result ATTRIBUTE_UNUSED, size_t arg_offset)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  jint index = shadow_frame->GetVReg(arg_offset + 1);
+  jchar c = shadow_frame->GetVReg(arg_offset + 2);
+  mirror::String* string = shadow_frame->GetVRegReference(arg_offset)->AsString();
+  if (string == nullptr) {
+    AbortTransactionOrFail(self, "String.setCharAt with null object");
+    return;
+  }
+  string->SetCharAt(index, c);
+}
+
+// This allows creating the new style of String objects during compilation.
+static void UnstartedStringFactoryNewStringFromChars(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  jint offset = shadow_frame->GetVReg(arg_offset);
+  jint char_count = shadow_frame->GetVReg(arg_offset + 1);
+  DCHECK_GE(char_count, 0);
+  StackHandleScope<1> hs(self);
+  Handle<mirror::CharArray> h_char_array(hs.NewHandle(shadow_frame->GetVRegReference(arg_offset + 2)->AsCharArray()));
+  Runtime* runtime = Runtime::Current();
+  gc::AllocatorType allocator = runtime->GetHeap()->GetCurrentAllocator();
+  result->SetL(mirror::String::AllocFromCharArray<true>(self, char_count, h_char_array, offset, allocator));
+}
+
+// This allows creating the new style of String objects during compilation.
+static void UnstartedStringFactoryNewStringFromString(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  mirror::String* to_copy = shadow_frame->GetVRegReference(arg_offset)->AsString();
+  if (to_copy == nullptr) {
+    AbortTransactionOrFail(self, "StringFactory.newStringFromString with null object");
+    return;
+  }
+  StackHandleScope<1> hs(self);
+  Handle<mirror::String> h_string(hs.NewHandle(to_copy));
+  Runtime* runtime = Runtime::Current();
+  gc::AllocatorType allocator = runtime->GetHeap()->GetCurrentAllocator();
+  result->SetL(mirror::String::AllocFromString<true>(self, h_string->GetLength(), h_string, 0,
+                                                     allocator));
+}
+
+// This allows creating the new style of String objects during compilation.
+static void UnstartedStringFastSubstring(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  jint start = shadow_frame->GetVReg(arg_offset + 1);
+  jint length = shadow_frame->GetVReg(arg_offset + 2);
+  DCHECK_GE(start, 0);
+  DCHECK_GE(length, 0);
+  StackHandleScope<1> hs(self);
+  Handle<mirror::String> h_string(hs.NewHandle(shadow_frame->GetVRegReference(arg_offset)->AsString()));
+  DCHECK_LE(start, h_string->GetLength());
+  DCHECK_LE(start + length, h_string->GetLength());
+  Runtime* runtime = Runtime::Current();
+  gc::AllocatorType allocator = runtime->GetHeap()->GetCurrentAllocator();
+  result->SetL(mirror::String::AllocFromString<true>(self, length, h_string, start, allocator));
+}
+
+// This allows getting the char array for new style of String objects during compilation.
+static void UnstartedStringToCharArray(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  mirror::String* string = shadow_frame->GetVRegReference(arg_offset)->AsString();
+  if (string == nullptr) {
+    AbortTransactionOrFail(self, "String.charAt with null object");
+    return;
+  }
+  result->SetL(string->ToCharArray(self));
+}
+
 static void UnstartedJNIVMRuntimeNewUnpaddedArray(Thread* self,
                                                   mirror::ArtMethod* method ATTRIBUTE_UNUSED,
                                                   mirror::Object* receiver ATTRIBUTE_UNUSED,
@@ -1079,6 +1187,20 @@
           &UnstartedMemoryPeekArrayEntry },
       { "java.io.Reader java.security.Security.getSecurityPropertiesReader()",
           &UnstartedSecurityGetSecurityPropertiesReader },
+      { "void java.lang.String.getCharsNoCheck(int, int, char[], int)",
+          &UnstartedStringGetCharsNoCheck },
+      { "char java.lang.String.charAt(int)",
+          &UnstartedStringCharAt },
+      { "void java.lang.String.setCharAt(int, char)",
+          &UnstartedStringSetCharAt },
+      { "java.lang.String java.lang.StringFactory.newStringFromChars(int, int, char[])",
+          &UnstartedStringFactoryNewStringFromChars },
+      { "java.lang.String java.lang.StringFactory.newStringFromString(java.lang.String)",
+          &UnstartedStringFactoryNewStringFromString },
+      { "java.lang.String java.lang.String.fastSubstring(int, int)",
+          &UnstartedStringFastSubstring },
+      { "char[] java.lang.String.toCharArray()",
+          &UnstartedStringToCharArray },
   };
 
   for (auto& def : defs) {
@@ -1162,6 +1284,8 @@
   std::string name(PrettyMethod(shadow_frame->GetMethod()));
   const auto& iter = invoke_handlers_.find(name);
   if (iter != invoke_handlers_.end()) {
+    // Clear out the result in case it's not zeroed out.
+    result->SetL(0);
     (*iter->second)(self, shadow_frame, result, arg_offset);
   } else {
     // Not special, continue with regular interpreter execution.
@@ -1175,6 +1299,8 @@
   std::string name(PrettyMethod(method));
   const auto& iter = jni_handlers_.find(name);
   if (iter != jni_handlers_.end()) {
+    // Clear out the result in case it's not zeroed out.
+    result->SetL(0);
     (*iter->second)(self, method, receiver, args, result);
   } else if (Runtime::Current()->IsActiveTransaction()) {
     AbortTransactionF(self, "Attempt to invoke native method in non-started runtime: %s",
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 1ec800f..ff75268 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -32,6 +32,8 @@
 #include "scoped_thread_state_change.h"
 #include "thread-inl.h"
 
+#include "handle_scope-inl.h"
+
 /*
 General notes:
 
@@ -108,20 +110,32 @@
  * Stuff to compare against when deciding if a mod matches.  Only the
  * values for mods valid for the event being evaluated will be filled in.
  * The rest will be zeroed.
+ * Must be allocated on the stack only. This is enforced by removing the
+ * operator new.
  */
 struct ModBasket {
-  ModBasket() : pLoc(nullptr), thread(nullptr), locationClass(nullptr), exceptionClass(nullptr),
-                caught(false), field(nullptr), thisPtr(nullptr) { }
+  explicit ModBasket(Thread* self)
+    : hs(self), pLoc(nullptr), thread(self),
+      locationClass(hs.NewHandle<mirror::Class>(nullptr)),
+      exceptionClass(hs.NewHandle<mirror::Class>(nullptr)),
+      caught(false),
+      field(nullptr),
+      thisPtr(hs.NewHandle<mirror::Object>(nullptr)) { }
 
-  const EventLocation*  pLoc;             /* LocationOnly */
-  std::string           className;        /* ClassMatch/ClassExclude */
-  Thread*               thread;           /* ThreadOnly */
-  mirror::Class*        locationClass;    /* ClassOnly */
-  mirror::Class*        exceptionClass;   /* ExceptionOnly */
-  bool                  caught;           /* ExceptionOnly */
-  ArtField*             field;            /* FieldOnly */
-  mirror::Object*       thisPtr;          /* InstanceOnly */
+  StackHandleScope<3> hs;
+  const EventLocation*            pLoc;             /* LocationOnly */
+  std::string                     className;        /* ClassMatch/ClassExclude */
+  Thread* const                   thread;           /* ThreadOnly */
+  MutableHandle<mirror::Class>    locationClass;    /* ClassOnly */
+  MutableHandle<mirror::Class>    exceptionClass;   /* ExceptionOnly */
+  bool                            caught;           /* ExceptionOnly */
+  ArtField*                       field;            /* FieldOnly */
+  MutableHandle<mirror::Object>   thisPtr;          /* InstanceOnly */
   /* nothing for StepOnly -- handled differently */
+
+ private:
+  DISALLOW_ALLOCATION();  // forbids allocation on the heap.
+  DISALLOW_IMPLICIT_CONSTRUCTORS(ModBasket);
 };
 
 static bool NeedsFullDeoptimization(JdwpEventKind eventKind) {
@@ -141,6 +155,8 @@
     }
 }
 
+// Returns the instrumentation event the DebugInstrumentationListener must
+// listen to in order to properly report the given JDWP event to the debugger.
 static uint32_t GetInstrumentationEventFor(JdwpEventKind eventKind) {
   switch (eventKind) {
     case EK_BREAKPOINT:
@@ -455,7 +471,7 @@
       }
       break;
     case MK_CLASS_ONLY:
-      if (!Dbg::MatchType(basket.locationClass, pMod->classOnly.refTypeId)) {
+      if (!Dbg::MatchType(basket.locationClass.Get(), pMod->classOnly.refTypeId)) {
         return false;
       }
       break;
@@ -476,7 +492,7 @@
       break;
     case MK_EXCEPTION_ONLY:
       if (pMod->exceptionOnly.refTypeId != 0 &&
-          !Dbg::MatchType(basket.exceptionClass, pMod->exceptionOnly.refTypeId)) {
+          !Dbg::MatchType(basket.exceptionClass.Get(), pMod->exceptionOnly.refTypeId)) {
         return false;
       }
       if ((basket.caught && !pMod->exceptionOnly.caught) ||
@@ -495,7 +511,7 @@
       }
       break;
     case MK_INSTANCE_ONLY:
-      if (!Dbg::MatchInstance(pMod->instanceOnly.objectId, basket.thisPtr)) {
+      if (!Dbg::MatchInstance(pMod->instanceOnly.objectId, basket.thisPtr.Get())) {
         return false;
       }
       break;
@@ -823,12 +839,11 @@
   DCHECK(pLoc->method != nullptr);
   DCHECK_EQ(pLoc->method->IsStatic(), thisPtr == nullptr);
 
-  ModBasket basket;
+  ModBasket basket(Thread::Current());
   basket.pLoc = pLoc;
-  basket.locationClass = pLoc->method->GetDeclaringClass();
-  basket.thisPtr = thisPtr;
-  basket.thread = Thread::Current();
-  basket.className = Dbg::GetClassName(basket.locationClass);
+  basket.locationClass.Assign(pLoc->method->GetDeclaringClass());
+  basket.thisPtr.Assign(thisPtr);
+  basket.className = Dbg::GetClassName(basket.locationClass.Get());
 
   /*
    * On rare occasions we may need to execute interpreted code in the VM
@@ -922,16 +937,15 @@
   DCHECK_EQ(fieldValue != nullptr, is_modification);
   DCHECK_EQ(field->IsStatic(), this_object == nullptr);
 
-  ModBasket basket;
+  ModBasket basket(Thread::Current());
   basket.pLoc = pLoc;
-  basket.locationClass = pLoc->method->GetDeclaringClass();
-  basket.thisPtr = this_object;
-  basket.thread = Thread::Current();
-  basket.className = Dbg::GetClassName(basket.locationClass);
+  basket.locationClass.Assign(pLoc->method->GetDeclaringClass());
+  basket.thisPtr.Assign(this_object);
+  basket.className = Dbg::GetClassName(basket.locationClass.Get());
   basket.field = field;
 
   if (InvokeInProgress()) {
-    VLOG(jdwp) << "Not posting field event during invoke";
+    VLOG(jdwp) << "Not posting field event during invoke (" << basket.className << ")";
     return;
   }
 
@@ -973,7 +987,7 @@
   uint8_t tag;
   {
     ScopedObjectAccessUnchecked soa(Thread::Current());
-    tag = Dbg::TagFromObject(soa, basket.thisPtr);
+    tag = Dbg::TagFromObject(soa, basket.thisPtr.Get());
   }
 
   for (const JdwpEvent* pEvent : match_list) {
@@ -1026,8 +1040,7 @@
     return;
   }
 
-  ModBasket basket;
-  basket.thread = thread;
+  ModBasket basket(thread);
 
   std::vector<JdwpEvent*> match_list;
   const JdwpEventKind match_kind = (start) ? EK_THREAD_START : EK_THREAD_DEATH;
@@ -1104,18 +1117,15 @@
     VLOG(jdwp) << "Unexpected: exception event with empty throw location";
   }
 
-  ModBasket basket;
+  ModBasket basket(Thread::Current());
   basket.pLoc = pThrowLoc;
   if (pThrowLoc->method != nullptr) {
-    basket.locationClass = pThrowLoc->method->GetDeclaringClass();
-  } else {
-    basket.locationClass = nullptr;
+    basket.locationClass.Assign(pThrowLoc->method->GetDeclaringClass());
   }
-  basket.thread = Thread::Current();
-  basket.className = Dbg::GetClassName(basket.locationClass);
-  basket.exceptionClass = exception_object->GetClass();
+  basket.className = Dbg::GetClassName(basket.locationClass.Get());
+  basket.exceptionClass.Assign(exception_object->GetClass());
   basket.caught = (pCatchLoc->method != 0);
-  basket.thisPtr = thisPtr;
+  basket.thisPtr.Assign(thisPtr);
 
   /* don't try to post an exception caused by the debugger */
   if (InvokeInProgress()) {
@@ -1186,10 +1196,9 @@
 void JdwpState::PostClassPrepare(mirror::Class* klass) {
   DCHECK(klass != nullptr);
 
-  ModBasket basket;
-  basket.locationClass = klass;
-  basket.thread = Thread::Current();
-  basket.className = Dbg::GetClassName(basket.locationClass);
+  ModBasket basket(Thread::Current());
+  basket.locationClass.Assign(klass);
+  basket.className = Dbg::GetClassName(basket.locationClass.Get());
 
   /* suppress class prep caused by debugger */
   if (InvokeInProgress()) {
@@ -1212,7 +1221,7 @@
   // debuggers seem to like that.  There might be some advantage to honesty,
   // since the class may not yet be verified.
   int status = JDWP::CS_VERIFIED | JDWP::CS_PREPARED;
-  JDWP::JdwpTypeTag tag = Dbg::GetTypeTag(basket.locationClass);
+  JDWP::JdwpTypeTag tag = Dbg::GetTypeTag(basket.locationClass.Get());
   std::string temp;
   std::string signature(basket.locationClass->GetDescriptor(&temp));
 
diff --git a/runtime/jdwp/object_registry.cc b/runtime/jdwp/object_registry.cc
index a42a58f..2b28f7d 100644
--- a/runtime/jdwp/object_registry.cc
+++ b/runtime/jdwp/object_registry.cc
@@ -36,17 +36,45 @@
 }
 
 JDWP::RefTypeId ObjectRegistry::AddRefType(mirror::Class* c) {
-  return InternalAdd(c);
+  return Add(c);
+}
+
+JDWP::RefTypeId ObjectRegistry::AddRefType(Handle<mirror::Class> c_h) {
+  return Add(c_h);
 }
 
 JDWP::ObjectId ObjectRegistry::Add(mirror::Object* o) {
-  return InternalAdd(o);
-}
-
-JDWP::ObjectId ObjectRegistry::InternalAdd(mirror::Object* o) {
   if (o == nullptr) {
     return 0;
   }
+  Thread* const self = Thread::Current();
+  StackHandleScope<1> hs(self);
+  return InternalAdd(hs.NewHandle(o));
+}
+
+// Template instantiations must be declared below.
+template<class T>
+JDWP::ObjectId ObjectRegistry::Add(Handle<T> obj_h) {
+  if (obj_h.Get() == nullptr) {
+    return 0;
+  }
+  return InternalAdd(obj_h);
+}
+
+// Explicit template instantiation.
+template
+SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::thread_suspend_count_lock_)
+JDWP::ObjectId ObjectRegistry::Add(Handle<mirror::Object> obj_h);
+
+template
+SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::thread_suspend_count_lock_)
+JDWP::ObjectId ObjectRegistry::Add(Handle<mirror::Throwable> obj_h);
+
+template<class T>
+JDWP::ObjectId ObjectRegistry::InternalAdd(Handle<T> obj_h) {
+  CHECK(obj_h.Get() != nullptr);
 
   Thread* const self = Thread::Current();
   self->AssertNoPendingException();
@@ -55,9 +83,6 @@
   Locks::thread_list_lock_->AssertNotHeld(self);
   Locks::thread_suspend_count_lock_->AssertNotHeld(self);
 
-  StackHandleScope<1> hs(self);
-  Handle<mirror::Object> obj_h(hs.NewHandle(o));
-
   // Call IdentityHashCode here to avoid a lock level violation between lock_ and monitor_lock.
   int32_t identity_hash_code = obj_h->IdentityHashCode();
 
diff --git a/runtime/jdwp/object_registry.h b/runtime/jdwp/object_registry.h
index 27a4e55..4c149cd 100644
--- a/runtime/jdwp/object_registry.h
+++ b/runtime/jdwp/object_registry.h
@@ -23,6 +23,7 @@
 #include <map>
 
 #include "base/casts.h"
+#include "handle.h"
 #include "jdwp/jdwp.h"
 #include "safe_map.h"
 
@@ -65,11 +66,23 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_,
                      Locks::thread_suspend_count_lock_);
+
   JDWP::RefTypeId AddRefType(mirror::Class* c)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_,
                      Locks::thread_suspend_count_lock_);
 
+  template<class T>
+  JDWP::ObjectId Add(Handle<T> obj_h)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::thread_list_lock_,
+                     Locks::thread_suspend_count_lock_);
+
+  JDWP::RefTypeId AddRefType(Handle<mirror::Class> c_h)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::thread_list_lock_,
+                     Locks::thread_suspend_count_lock_);
+
   template<typename T> T Get(JDWP::ObjectId id, JDWP::JdwpError* error)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (id == 0) {
@@ -98,7 +111,8 @@
   jobject GetJObject(JDWP::ObjectId id) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
-  JDWP::ObjectId InternalAdd(mirror::Object* o)
+  template<class T>
+  JDWP::ObjectId InternalAdd(Handle<T> obj_h)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(lock_,
                      Locks::thread_list_lock_,
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 3e80aef..c698cfc 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -19,8 +19,6 @@
 
 #include <unordered_map>
 
-#include "instrumentation.h"
-
 #include "atomic.h"
 #include "base/macros.h"
 #include "base/mutex.h"
@@ -86,6 +84,8 @@
   std::unique_ptr<jit::JitInstrumentationCache> instrumentation_cache_;
   std::unique_ptr<jit::JitCodeCache> code_cache_;
   CompilerCallbacks* compiler_callbacks_;  // Owned by the jit compiler.
+
+  DISALLOW_COPY_AND_ASSIGN(Jit);
 };
 
 class JitOptions {
@@ -114,8 +114,9 @@
   bool dump_info_on_shutdown_;
 
   JitOptions() : use_jit_(false), code_cache_capacity_(0), compile_threshold_(0),
-      dump_info_on_shutdown_(false) {
-  }
+      dump_info_on_shutdown_(false) { }
+
+  DISALLOW_COPY_AND_ASSIGN(JitOptions);
 };
 
 }  // namespace jit
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index da891fe..8b76647 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -130,7 +130,7 @@
   // required since we have to implement ClassLinker::GetQuickOatCodeFor for walking stacks.
   SafeMap<mirror::ArtMethod*, const void*> method_code_map_ GUARDED_BY(lock_);
 
-  DISALLOW_COPY_AND_ASSIGN(JitCodeCache);
+  DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache);
 };
 
 
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
index 160e678..3232674 100644
--- a/runtime/jit/jit_instrumentation.cc
+++ b/runtime/jit/jit_instrumentation.cc
@@ -47,6 +47,8 @@
  private:
   mirror::ArtMethod* const method_;
   JitInstrumentationCache* const cache_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(JitCompileTask);
 };
 
 JitInstrumentationCache::JitInstrumentationCache(size_t hot_method_threshold)
@@ -75,7 +77,7 @@
   ScopedObjectAccessUnchecked soa(self);
   // Since we don't have on-stack replacement, some methods can remain in the interpreter longer
   // than we want resulting in samples even after the method is compiled.
-  if (method->IsClassInitializer() ||
+  if (method->IsClassInitializer() || method->IsNative() ||
       Runtime::Current()->GetJit()->GetCodeCache()->ContainsMethod(method)) {
     return;
   }
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
index 9d5d74f..72acaef 100644
--- a/runtime/jit/jit_instrumentation.h
+++ b/runtime/jit/jit_instrumentation.h
@@ -58,6 +58,8 @@
   std::unordered_map<jmethodID, size_t> samples_;
   size_t hot_method_threshold_;
   std::unique_ptr<ThreadPool> thread_pool_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(JitInstrumentationCache);
 };
 
 class JitInstrumentationListener : public instrumentation::InstrumentationListener {
@@ -97,6 +99,8 @@
 
  private:
   JitInstrumentationCache* const instrumentation_cache_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(JitInstrumentationListener);
 };
 
 }  // namespace jit
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index f5a3a6b..fd386d7 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -573,6 +573,12 @@
     if (c == nullptr) {
       return nullptr;
     }
+    if (c->IsStringClass()) {
+      gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+      mirror::SetStringCountVisitor visitor(0);
+      return soa.AddLocalReference<jobject>(mirror::String::Alloc<true>(soa.Self(), 0,
+                                                                        allocator_type, visitor));
+    }
     return soa.AddLocalReference<jobject>(c->AllocObject(soa.Self()));
   }
 
@@ -594,6 +600,11 @@
     if (c == nullptr) {
       return nullptr;
     }
+    if (c->IsStringClass()) {
+      // Replace calls to String.<init> with equivalent StringFactory call.
+      jmethodID sf_mid = WellKnownClasses::StringInitToStringFactoryMethodID(mid);
+      return CallStaticObjectMethodV(env, WellKnownClasses::java_lang_StringFactory, sf_mid, args);
+    }
     mirror::Object* result = c->AllocObject(soa.Self());
     if (result == nullptr) {
       return nullptr;
@@ -614,6 +625,11 @@
     if (c == nullptr) {
       return nullptr;
     }
+    if (c->IsStringClass()) {
+      // Replace calls to String.<init> with equivalent StringFactory call.
+      jmethodID sf_mid = WellKnownClasses::StringInitToStringFactoryMethodID(mid);
+      return CallStaticObjectMethodA(env, WellKnownClasses::java_lang_StringFactory, sf_mid, args);
+    }
     mirror::Object* result = c->AllocObject(soa.Self());
     if (result == nullptr) {
       return nullptr;
@@ -1649,7 +1665,7 @@
       ThrowSIOOBE(soa, start, length, s->GetLength());
     } else {
       CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
-      const jchar* chars = s->GetCharArray()->GetData() + s->GetOffset();
+      const jchar* chars = s->GetValue();
       memcpy(buf, chars + start, length * sizeof(jchar));
     }
   }
@@ -1663,7 +1679,7 @@
       ThrowSIOOBE(soa, start, length, s->GetLength());
     } else {
       CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
-      const jchar* chars = s->GetCharArray()->GetData() + s->GetOffset();
+      const jchar* chars = s->GetValue();
       ConvertUtf16ToModifiedUtf8(buf, chars + start, length);
     }
   }
@@ -1672,33 +1688,26 @@
     CHECK_NON_NULL_ARGUMENT(java_string);
     ScopedObjectAccess soa(env);
     mirror::String* s = soa.Decode<mirror::String*>(java_string);
-    mirror::CharArray* chars = s->GetCharArray();
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    if (heap->IsMovableObject(chars)) {
+    if (heap->IsMovableObject(s)) {
+      jchar* chars = new jchar[s->GetLength()];
+      memcpy(chars, s->GetValue(), sizeof(jchar) * s->GetLength());
       if (is_copy != nullptr) {
         *is_copy = JNI_TRUE;
       }
-      int32_t char_count = s->GetLength();
-      int32_t offset = s->GetOffset();
-      jchar* bytes = new jchar[char_count];
-      for (int32_t i = 0; i < char_count; i++) {
-        bytes[i] = chars->Get(i + offset);
-      }
-      return bytes;
-    } else {
-      if (is_copy != nullptr) {
-        *is_copy = JNI_FALSE;
-      }
-      return static_cast<jchar*>(chars->GetData() + s->GetOffset());
+      return chars;
     }
+    if (is_copy != nullptr) {
+      *is_copy = JNI_FALSE;
+    }
+    return static_cast<jchar*>(s->GetValue());
   }
 
   static void ReleaseStringChars(JNIEnv* env, jstring java_string, const jchar* chars) {
     CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_string);
     ScopedObjectAccess soa(env);
     mirror::String* s = soa.Decode<mirror::String*>(java_string);
-    mirror::CharArray* s_chars = s->GetCharArray();
-    if (chars != (s_chars->GetData() + s->GetOffset())) {
+    if (chars != s->GetValue()) {
       delete[] chars;
     }
   }
@@ -1707,18 +1716,16 @@
     CHECK_NON_NULL_ARGUMENT(java_string);
     ScopedObjectAccess soa(env);
     mirror::String* s = soa.Decode<mirror::String*>(java_string);
-    mirror::CharArray* chars = s->GetCharArray();
-    int32_t offset = s->GetOffset();
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    if (heap->IsMovableObject(chars)) {
+    if (heap->IsMovableObject(s)) {
       StackHandleScope<1> hs(soa.Self());
-      HandleWrapper<mirror::CharArray> h(hs.NewHandleWrapper(&chars));
+      HandleWrapper<mirror::String> h(hs.NewHandleWrapper(&s));
       heap->IncrementDisableMovingGC(soa.Self());
     }
     if (is_copy != nullptr) {
       *is_copy = JNI_FALSE;
     }
-    return static_cast<jchar*>(chars->GetData() + offset);
+    return static_cast<jchar*>(s->GetValue());
   }
 
   static void ReleaseStringCritical(JNIEnv* env, jstring java_string, const jchar* chars) {
@@ -1727,8 +1734,7 @@
     ScopedObjectAccess soa(env);
     gc::Heap* heap = Runtime::Current()->GetHeap();
     mirror::String* s = soa.Decode<mirror::String*>(java_string);
-    mirror::CharArray* s_chars = s->GetCharArray();
-    if (heap->IsMovableObject(s_chars)) {
+    if (heap->IsMovableObject(s)) {
       heap->DecrementDisableMovingGC(soa.Self());
     }
   }
@@ -1745,7 +1751,7 @@
     size_t byte_count = s->GetUtfLength();
     char* bytes = new char[byte_count + 1];
     CHECK(bytes != nullptr);  // bionic aborts anyway.
-    const uint16_t* chars = s->GetCharArray()->GetData() + s->GetOffset();
+    const uint16_t* chars = s->GetValue();
     ConvertUtf16ToModifiedUtf8(bytes, chars, s->GetLength());
     bytes[byte_count] = '\0';
     return bytes;
@@ -2093,6 +2099,35 @@
         return JNI_ERR;
       }
       bool is_fast = false;
+      // Notes about fast JNI calls:
+      //
+      // On a normal JNI call, the calling thread usually transitions
+      // from the kRunnable state to the kNative state. But if the
+      // called native function needs to access any Java object, it
+      // will have to transition back to the kRunnable state.
+      //
+      // There is a cost to this double transition. For a JNI call
+      // that should be quick, this cost may dominate the call cost.
+      //
+      // On a fast JNI call, the calling thread avoids this double
+      // transition by not transitioning from kRunnable to kNative and
+      // stays in the kRunnable state.
+      //
+      // There are risks to using a fast JNI call because it can delay
+      // a response to a thread suspension request which is typically
+      // used for a GC root scanning, etc. If a fast JNI call takes a
+      // long time, it could cause longer thread suspension latency
+      // and GC pauses.
+      //
+      // Thus, fast JNI should be used with care. It should be used
+      // for a JNI call that takes a short amount of time (eg. no
+      // long-running loop) and does not block (eg. no locks, I/O,
+      // etc.)
+      //
+      // A '!' prefix in the signature in the JNINativeMethod
+      // indicates that it's a fast JNI call and the runtime omits the
+      // thread state transition from kRunnable to kNative at the
+      // entry.
       if (*sig == '!') {
         is_fast = true;
         ++sig;
@@ -2103,10 +2138,12 @@
         m = c->FindVirtualMethod(name, sig);
       }
       if (m == nullptr) {
-        c->DumpClass(LOG(ERROR), mirror::Class::kDumpClassFullDetail);
-        LOG(return_errors ? ERROR : FATAL) << "Failed to register native method "
+        LOG(return_errors ? ERROR : INTERNAL_FATAL) << "Failed to register native method "
             << PrettyDescriptor(c) << "." << name << sig << " in "
             << c->GetDexCache()->GetLocation()->ToModifiedUtf8();
+        // Safe to pass in LOG(FATAL) since the log object aborts in destructor and only goes
+        // out of scope after the DumpClass is done executing.
+        c->DumpClass(LOG(return_errors ? ERROR : FATAL), mirror::Class::kDumpClassFullDetail);
         ThrowNoSuchMethodError(soa, c, name, sig, "static or non-static");
         return JNI_ERR;
       } else if (!m->IsNative()) {
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 77db404..3d14a4e 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -625,8 +625,6 @@
   // ...whose fields haven't been initialized because
   // we didn't call a constructor.
   ASSERT_EQ(0, env_->GetIntField(o, env_->GetFieldID(c, "count", "I")));
-  ASSERT_EQ(0, env_->GetIntField(o, env_->GetFieldID(c, "offset", "I")));
-  ASSERT_TRUE(env_->GetObjectField(o, env_->GetFieldID(c, "value", "[C")) == nullptr);
 }
 
 TEST_F(JniInternalTest, GetVersion) {
@@ -860,7 +858,9 @@
   jstring s = reinterpret_cast<jstring>(env_->AllocObject(c));
   ASSERT_NE(s, nullptr);
   env_->CallVoidMethod(s, mid2);
-  ASSERT_EQ(JNI_FALSE, env_->ExceptionCheck());
+  // With the string change, this should now throw an UnsupportedOperationException.
+  ASSERT_EQ(JNI_TRUE, env_->ExceptionCheck());
+  env_->ExceptionClear();
 
   mid = env_->GetMethodID(c, "length", "()I");
   ASSERT_NE(mid, nullptr);
@@ -1538,7 +1538,7 @@
 
   jboolean is_copy = JNI_FALSE;
   chars = env_->GetStringChars(s, &is_copy);
-  if (Runtime::Current()->GetHeap()->IsMovableObject(s_m->GetCharArray())) {
+  if (Runtime::Current()->GetHeap()->IsMovableObject(s_m)) {
     EXPECT_EQ(JNI_TRUE, is_copy);
   } else {
     EXPECT_EQ(JNI_FALSE, is_copy);
diff --git a/runtime/linear_alloc.h b/runtime/linear_alloc.h
index 6d8eda6..fcabcc8 100644
--- a/runtime/linear_alloc.h
+++ b/runtime/linear_alloc.h
@@ -42,6 +42,8 @@
  private:
   mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   ArenaAllocator allocator_ GUARDED_BY(lock_);
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(LinearAlloc);
 };
 
 }  // namespace art
diff --git a/runtime/lock_word-inl.h b/runtime/lock_word-inl.h
index d831bfb..341501b 100644
--- a/runtime/lock_word-inl.h
+++ b/runtime/lock_word-inl.h
@@ -53,6 +53,7 @@
 inline LockWord::LockWord(Monitor* mon, uint32_t rb_state)
     : value_(mon->GetMonitorId() | (rb_state << kReadBarrierStateShift) |
              (kStateFat << kStateShift)) {
+  DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
 #ifndef __LP64__
   DCHECK_ALIGNED(mon, kMonitorIdAlignment);
 #endif
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
index 46c3bd4..655aa3a 100644
--- a/runtime/lock_word.h
+++ b/runtime/lock_word.h
@@ -94,6 +94,7 @@
     kReadBarrierStateMaskShiftedToggled = ~kReadBarrierStateMaskShifted,
 
     // When the state is kHashCode, the non-state bits hold the hashcode.
+    // Note Object.hashCode() has the hash code layout hardcoded.
     kHashShift = 0,
     kHashSize = 32 - kStateSize - kReadBarrierStateSize,
     kHashMask = (1 << kHashSize) - 1,
@@ -110,6 +111,7 @@
   static LockWord FromThinLockId(uint32_t thread_id, uint32_t count, uint32_t rb_state) {
     CHECK_LE(thread_id, static_cast<uint32_t>(kThinLockMaxOwner));
     CHECK_LE(count, static_cast<uint32_t>(kThinLockMaxCount));
+    DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
     return LockWord((thread_id << kThinLockOwnerShift) | (count << kThinLockCountShift) |
                     (rb_state << kReadBarrierStateShift) |
                     (kStateThinOrUnlocked << kStateShift));
@@ -122,12 +124,14 @@
 
   static LockWord FromHashCode(uint32_t hash_code, uint32_t rb_state) {
     CHECK_LE(hash_code, static_cast<uint32_t>(kMaxHash));
+    DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
     return LockWord((hash_code << kHashShift) |
                     (rb_state << kReadBarrierStateShift) |
                     (kStateHash << kStateShift));
   }
 
   static LockWord FromDefault(uint32_t rb_state) {
+    DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
     return LockWord(rb_state << kReadBarrierStateShift);
   }
 
@@ -149,7 +153,8 @@
 
   LockState GetState() const {
     CheckReadBarrierState();
-    if (UNLIKELY(value_ == 0)) {
+    if ((!kUseReadBarrier && UNLIKELY(value_ == 0)) ||
+        (kUseReadBarrier && UNLIKELY((value_ & kReadBarrierStateMaskShiftedToggled) == 0))) {
       return kUnlocked;
     } else {
       uint32_t internal_state = (value_ >> kStateShift) & kStateMask;
@@ -171,6 +176,14 @@
     return (value_ >> kReadBarrierStateShift) & kReadBarrierStateMask;
   }
 
+  void SetReadBarrierState(uint32_t rb_state) {
+    DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
+    DCHECK_NE(static_cast<uint32_t>(GetState()), static_cast<uint32_t>(kForwardingAddress));
+    // Clear and or the bits.
+    value_ &= ~(kReadBarrierStateMask << kReadBarrierStateShift);
+    value_ |= (rb_state & kReadBarrierStateMask) << kReadBarrierStateShift;
+  }
+
   // Return the owner thin lock thread id.
   uint32_t ThinLockOwner() const;
 
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 959bb75..cf4233c 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -153,7 +153,7 @@
       return true;
     }
   }
-  PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+  PrintFileToLog("/proc/self/maps", LogSeverity::ERROR);
   *error_msg = StringPrintf("Requested region 0x%08" PRIxPTR "-0x%08" PRIxPTR " does not overlap "
                             "any existing map. See process maps in the log.", begin, end);
   return false;
@@ -256,7 +256,7 @@
     // Only use this if you actually made the page reservation yourself.
     CHECK(expected_ptr != nullptr);
 
-    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg)) << error_msg;
+    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg)) << *error_msg;
     flags |= MAP_FIXED;
   }
 
@@ -411,7 +411,7 @@
     // Only use this if you actually made the page reservation yourself.
     CHECK(expected_ptr != nullptr);
 
-    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg)) << error_msg;
+    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg)) << *error_msg;
     flags |= MAP_FIXED;
   } else {
     CHECK_EQ(0, flags & MAP_FIXED);
@@ -617,13 +617,68 @@
   return true;
 }
 
-void MemMap::DumpMaps(std::ostream& os) {
+void MemMap::DumpMaps(std::ostream& os, bool terse) {
   MutexLock mu(Thread::Current(), *Locks::mem_maps_lock_);
-  DumpMapsLocked(os);
+  DumpMapsLocked(os, terse);
 }
 
-void MemMap::DumpMapsLocked(std::ostream& os) {
-  os << *maps_;
+void MemMap::DumpMapsLocked(std::ostream& os, bool terse) {
+  const auto& mem_maps = *maps_;
+  if (!terse) {
+    os << mem_maps;
+    return;
+  }
+
+  // Terse output example:
+  //   [MemMap: 0x409be000+0x20P~0x11dP+0x20P~0x61cP+0x20P prot=0x3 LinearAlloc]
+  //   [MemMap: 0x451d6000+0x6bP(3) prot=0x3 large object space allocation]
+  // The details:
+  //   "+0x20P" means 0x20 pages taken by a single mapping,
+  //   "~0x11dP" means a gap of 0x11d pages,
+  //   "+0x6bP(3)" means 3 mappings one after another, together taking 0x6b pages.
+  os << "MemMap:" << std::endl;
+  for (auto it = mem_maps.begin(), maps_end = mem_maps.end(); it != maps_end;) {
+    MemMap* map = it->second;
+    void* base = it->first;
+    CHECK_EQ(base, map->BaseBegin());
+    os << "[MemMap: " << base;
+    ++it;
+    // Merge consecutive maps with the same protect flags and name.
+    constexpr size_t kMaxGaps = 9;
+    size_t num_gaps = 0;
+    size_t num = 1u;
+    size_t size = map->BaseSize();
+    CHECK(IsAligned<kPageSize>(size));
+    void* end = map->BaseEnd();
+    while (it != maps_end &&
+        it->second->GetProtect() == map->GetProtect() &&
+        it->second->GetName() == map->GetName() &&
+        (it->second->BaseBegin() == end || num_gaps < kMaxGaps)) {
+      if (it->second->BaseBegin() != end) {
+        ++num_gaps;
+        os << "+0x" << std::hex << (size / kPageSize) << "P";
+        if (num != 1u) {
+          os << "(" << std::dec << num << ")";
+        }
+        size_t gap =
+            reinterpret_cast<uintptr_t>(it->second->BaseBegin()) - reinterpret_cast<uintptr_t>(end);
+        CHECK(IsAligned<kPageSize>(gap));
+        os << "~0x" << std::hex << (gap / kPageSize) << "P";
+        num = 0u;
+        size = 0u;
+      }
+      CHECK(IsAligned<kPageSize>(it->second->BaseSize()));
+      ++num;
+      size += it->second->BaseSize();
+      end = it->second->BaseEnd();
+      ++it;
+    }
+    os << "+0x" << std::hex << (size / kPageSize) << "P";
+    if (num != 1u) {
+      os << "(" << std::dec << num << ")";
+    }
+    os << " prot=0x" << std::hex << map->GetProtect() << " " << map->GetName() << "]" << std::endl;
+  }
 }
 
 bool MemMap::HasMemMap(MemMap* map) {
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index dc6d935..6023a70 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -137,7 +137,7 @@
 
   static bool CheckNoGaps(MemMap* begin_map, MemMap* end_map)
       LOCKS_EXCLUDED(Locks::mem_maps_lock_);
-  static void DumpMaps(std::ostream& os)
+  static void DumpMaps(std::ostream& os, bool terse = false)
       LOCKS_EXCLUDED(Locks::mem_maps_lock_);
 
   typedef AllocationTrackingMultiMap<void*, MemMap*, kAllocatorTagMaps> Maps;
@@ -149,7 +149,7 @@
   MemMap(const std::string& name, uint8_t* begin, size_t size, void* base_begin, size_t base_size,
          int prot, bool reuse) LOCKS_EXCLUDED(Locks::mem_maps_lock_);
 
-  static void DumpMapsLocked(std::ostream& os)
+  static void DumpMapsLocked(std::ostream& os, bool terse)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mem_maps_lock_);
   static bool HasMemMap(MemMap* map)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mem_maps_lock_);
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 543cf9b..9518c9d 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -511,7 +511,6 @@
   if (class_linker->IsQuickGenericJniStub(entry_point)) {
     // Generic JNI frame.
     DCHECK(IsNative());
-    StackHandleScope<1> hs(Thread::Current());
     uint32_t handle_refs = GetNumberOfReferenceArgsWithoutReceiver(this) + 1;
     size_t scope_size = HandleScope::SizeOf(handle_refs);
     QuickMethodFrameInfo callee_info = runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 712286f..cc6f5c4 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -548,6 +548,10 @@
       << PrettyClass(this)
       << "A class object shouldn't be allocated through this "
       << "as it requires a pre-fence visitor that sets the class size.";
+  DCHECK(!IsStringClass())
+      << PrettyClass(this)
+      << "A string shouldn't be allocated through this "
+      << "as it requires a pre-fence visitor that sets the class size.";
   DCHECK(IsInstantiable()) << PrettyClass(this);
   // TODO: decide whether we want this check. It currently fails during bootstrap.
   // DCHECK(!Runtime::Current()->IsStarted() || IsInitializing()) << PrettyClass(this);
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 1739019..56c586a 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -330,10 +330,6 @@
   return IsInSamePackage(klass1->GetDescriptor(&temp1), klass2->GetDescriptor(&temp2));
 }
 
-bool Class::IsStringClass() const {
-  return this == String::GetJavaLangString();
-}
-
 bool Class::IsThrowableClass() {
   return WellKnownClasses::ToClass(WellKnownClasses::java_lang_Throwable)->IsAssignableFrom(this);
 }
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 18496fd..d3cfd01 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -235,6 +235,15 @@
     SetAccessFlags(flags | kAccClassIsFinalizable);
   }
 
+  ALWAYS_INLINE bool IsStringClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return (GetField32(AccessFlagsOffset()) & kAccClassIsStringClass) != 0;
+  }
+
+  ALWAYS_INLINE void SetStringClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    uint32_t flags = GetField32(OFFSET_OF_OBJECT_MEMBER(Class, access_flags_));
+    SetAccessFlags(flags | kAccClassIsStringClass);
+  }
+
   // Returns true if the class is abstract.
   ALWAYS_INLINE bool IsAbstract() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccAbstract) != 0;
@@ -416,8 +425,6 @@
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsClassClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsStringClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   bool IsThrowableClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
@@ -484,10 +491,10 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsVariableSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    // Classes and arrays vary in size, and so the object_size_ field cannot
+    // Classes, arrays, and strings vary in size, and so the object_size_ field cannot
     // be used to Get their instance size
     return IsClassClass<kVerifyFlags, kReadBarrierOption>() ||
-        IsArrayClass<kVerifyFlags, kReadBarrierOption>();
+        IsArrayClass<kVerifyFlags, kReadBarrierOption>() || IsStringClass();
   }
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 2581fad..39d0f56 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -28,8 +28,9 @@
 #include "monitor.h"
 #include "object_array-inl.h"
 #include "read_barrier-inl.h"
-#include "runtime.h"
 #include "reference.h"
+#include "runtime.h"
+#include "string-inl.h"
 #include "throwable.h"
 
 namespace art {
@@ -115,8 +116,11 @@
 }
 
 inline Object* Object::GetReadBarrierPointer() {
-#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
-  DCHECK(kUseBakerOrBrooksReadBarrier);
+#ifdef USE_BAKER_READ_BARRIER
+  DCHECK(kUseBakerReadBarrier);
+  return reinterpret_cast<Object*>(GetLockWord(false).ReadBarrierState());
+#elif USE_BROOKS_READ_BARRIER
+  DCHECK(kUseBrooksReadBarrier);
   return GetFieldObject<Object, kVerifyNone, kWithoutReadBarrier>(
       OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_));
 #else
@@ -126,8 +130,14 @@
 }
 
 inline void Object::SetReadBarrierPointer(Object* rb_ptr) {
-#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
-  DCHECK(kUseBakerOrBrooksReadBarrier);
+#ifdef USE_BAKER_READ_BARRIER
+  DCHECK(kUseBakerReadBarrier);
+  DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U);
+  LockWord lw = GetLockWord(false);
+  lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr)));
+  SetLockWord(lw, false);
+#elif USE_BROOKS_READ_BARRIER
+  DCHECK(kUseBrooksReadBarrier);
   // We don't mark the card as this occurs as part of object allocation. Not all objects have
   // backing cards, such as large objects.
   SetFieldObjectWithoutWriteBarrier<false, false, kVerifyNone>(
@@ -140,8 +150,27 @@
 }
 
 inline bool Object::AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr) {
-#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
-  DCHECK(kUseBakerOrBrooksReadBarrier);
+#ifdef USE_BAKER_READ_BARRIER
+  DCHECK(kUseBakerReadBarrier);
+  DCHECK_EQ(reinterpret_cast<uint64_t>(expected_rb_ptr) >> 32, 0U);
+  DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U);
+  LockWord expected_lw;
+  LockWord new_lw;
+  do {
+    LockWord lw = GetLockWord(false);
+    if (UNLIKELY(reinterpret_cast<Object*>(lw.ReadBarrierState()) != expected_rb_ptr)) {
+      // Lost the race.
+      return false;
+    }
+    expected_lw = lw;
+    expected_lw.SetReadBarrierState(
+        static_cast<uint32_t>(reinterpret_cast<uintptr_t>(expected_rb_ptr)));
+    new_lw = lw;
+    new_lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr)));
+  } while (!CasLockWordWeakSequentiallyConsistent(expected_lw, new_lw));
+  return true;
+#elif USE_BROOKS_READ_BARRIER
+  DCHECK(kUseBrooksReadBarrier);
   MemberOffset offset = OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_);
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + offset.SizeValue();
   Atomic<uint32_t>* atomic_rb_ptr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
@@ -337,9 +366,14 @@
   return down_cast<DoubleArray*>(this);
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
+inline bool Object::IsString() {
+  return GetClass<kVerifyFlags, kReadBarrierOption>()->IsStringClass();
+}
+
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline String* Object::AsString() {
-  DCHECK(GetClass<kVerifyFlags>()->IsStringClass());
+  DCHECK((IsString<kVerifyFlags, kReadBarrierOption>()));
   return down_cast<String*>(this);
 }
 
@@ -385,6 +419,9 @@
   } else if (IsClass<kNewFlags, kReadBarrierOption>()) {
     result = AsClass<kNewFlags, kReadBarrierOption>()->
         template SizeOf<kNewFlags, kReadBarrierOption>();
+  } else if (GetClass<kNewFlags, kReadBarrierOption>()->IsStringClass()) {
+    result = AsString<kNewFlags, kReadBarrierOption>()->
+        template SizeOf<kNewFlags>();
   } else {
     result = GetClass<kNewFlags, kReadBarrierOption>()->
         template GetObjectSize<kNewFlags, kReadBarrierOption>();
@@ -947,7 +984,7 @@
   mirror::Class* klass = GetClass<kVerifyFlags>();
   if (klass == Class::GetJavaLangClass()) {
     AsClass<kVerifyNone>()->VisitReferences<kVisitClass>(klass, visitor);
-  } else if (klass->IsArrayClass()) {
+  } else if (klass->IsArrayClass() || klass->IsStringClass()) {
     if (klass->IsObjectArrayClass<kVerifyNone>()) {
       AsObjectArray<mirror::Object, kVerifyNone>()->VisitReferences<kVisitClass>(visitor);
     } else if (kVisitClass) {
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 5dac985..f9740bb 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -244,5 +244,10 @@
   UNREACHABLE();
 }
 
+ArtField* Object::FindFieldByOffset(MemberOffset offset) {
+  return IsClass() ? ArtField::FindStaticFieldWithOffset(AsClass(), offset.Uint32Value())
+      : ArtField::FindInstanceFieldWithOffset(GetClass(), offset.Uint32Value());
+}
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 343c9bc..5afe99f 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -62,7 +62,7 @@
 static constexpr bool kCheckFieldAssignments = false;
 
 // Size of Object.
-static constexpr uint32_t kObjectHeaderSize = kUseBakerOrBrooksReadBarrier ? 16 : 8;
+static constexpr uint32_t kObjectHeaderSize = kUseBrooksReadBarrier ? 16 : 8;
 
 // C++ mirror of java.lang.Object
 class MANAGED LOCKABLE Object {
@@ -94,6 +94,9 @@
   NO_RETURN
 #endif
   void SetReadBarrierPointer(Object* rb_ptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+#ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
+  NO_RETURN
+#endif
   bool AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void AssertReadBarrierPointer() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -182,7 +185,12 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   DoubleArray* AsDoubleArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  bool IsString() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   String* AsString() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -431,6 +439,8 @@
   void VisitReferences(const Visitor& visitor, const JavaLangRefVisitor& ref_visitor)
       NO_THREAD_SAFETY_ANALYSIS;
 
+  ArtField* FindFieldByOffset(MemberOffset offset) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Used by object_test.
   static void SetHashCodeSeed(uint32_t new_seed);
   // Generate an identity hash code. Public for object test.
@@ -502,11 +512,11 @@
   // Monitor and hash code information.
   uint32_t monitor_;
 
-#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
+#ifdef USE_BROOKS_READ_BARRIER
   // Note names use a 'x' prefix and the x_rb_ptr_ is of type int
   // instead of Object to go with the alphabetical/by-type field order
   // on the Java side.
-  uint32_t x_rb_ptr_;      // For the Baker or Brooks pointer.
+  uint32_t x_rb_ptr_;      // For the Brooks pointer.
   uint32_t x_xpadding_;    // For 8-byte alignment. TODO: get rid of this.
 #endif
 
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index 2262af5..8e50a7a 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -61,14 +61,13 @@
     Handle<String> string(
         hs.NewHandle(String::AllocFromModifiedUtf8(self, expected_utf16_length, utf8_in)));
     ASSERT_EQ(expected_utf16_length, string->GetLength());
-    ASSERT_TRUE(string->GetCharArray() != nullptr);
-    ASSERT_TRUE(string->GetCharArray()->GetData() != nullptr);
+    ASSERT_TRUE(string->GetValue() != nullptr);
     // strlen is necessary because the 1-character string "\x00\x00" is interpreted as ""
     ASSERT_TRUE(string->Equals(utf8_in) || (expected_utf16_length == 1 && strlen(utf8_in) == 0));
     ASSERT_TRUE(string->Equals(StringPiece(utf8_in)) ||
                 (expected_utf16_length == 1 && strlen(utf8_in) == 0));
     for (int32_t i = 0; i < expected_utf16_length; i++) {
-      EXPECT_EQ(utf16_expected[i], string->UncheckedCharAt(i));
+      EXPECT_EQ(utf16_expected[i], string->CharAt(i));
     }
     EXPECT_EQ(expected_hash, string->GetHashCode());
   }
@@ -491,12 +490,6 @@
   Handle<String> string(hs.NewHandle(String::AllocFromModifiedUtf8(soa.Self(), "android")));
   EXPECT_EQ(string->GetLength(), 7);
   EXPECT_EQ(string->GetUtfLength(), 7);
-
-  string->SetOffset(2);
-  string->SetCount(5);
-  EXPECT_TRUE(string->Equals("droid"));
-  EXPECT_EQ(string->GetLength(), 5);
-  EXPECT_EQ(string->GetUtfLength(), 5);
 }
 
 TEST_F(ObjectTest, DescriptorCompare) {
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index b367cff..cd5d2f6 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -19,6 +19,7 @@
 
 #include "array.h"
 #include "class.h"
+#include "gc/heap-inl.h"
 #include "intern_table.h"
 #include "runtime.h"
 #include "string.h"
@@ -29,41 +30,173 @@
 namespace mirror {
 
 inline uint32_t String::ClassSize() {
-  uint32_t vtable_entries = Object::kVTableLength + 51;
+  uint32_t vtable_entries = Object::kVTableLength + 52;
   return Class::ComputeClassSize(true, vtable_entries, 0, 1, 0, 1, 2);
 }
 
-inline uint16_t String::UncheckedCharAt(int32_t index) {
-  return GetCharArray()->Get(index + GetOffset());
-}
+// Sets string count in the allocation code path to ensure it is guarded by a CAS.
+class SetStringCountVisitor {
+ public:
+  explicit SetStringCountVisitor(int32_t count) : count_(count) {
+  }
 
-inline CharArray* String::GetCharArray() {
-  return GetFieldObject<CharArray>(ValueOffset());
-}
+  void operator()(Object* obj, size_t usable_size ATTRIBUTE_UNUSED) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Avoid AsString as object is not yet in live bitmap or allocation stack.
+    String* string = down_cast<String*>(obj);
+    string->SetCount(count_);
+  }
 
-inline int32_t String::GetLength() {
-  int32_t result = GetField32(OFFSET_OF_OBJECT_MEMBER(String, count_));
-  DCHECK(result >= 0 && result <= GetCharArray()->GetLength());
-  return result;
-}
+ private:
+  const int32_t count_;
+};
 
-inline void String::SetArray(CharArray* new_array) {
-  // Array is invariant so use non-transactional mode. Also disable check as we may run inside
-  // a transaction.
-  DCHECK(new_array != nullptr);
-  SetFieldObject<false, false>(OFFSET_OF_OBJECT_MEMBER(String, array_), new_array);
-}
+// Sets string count and value in the allocation code path to ensure it is guarded by a CAS.
+class SetStringCountAndBytesVisitor {
+ public:
+  SetStringCountAndBytesVisitor(int32_t count, Handle<ByteArray> src_array, int32_t offset,
+                                int32_t high_byte)
+      : count_(count), src_array_(src_array), offset_(offset), high_byte_(high_byte) {
+  }
+
+  void operator()(Object* obj, size_t usable_size ATTRIBUTE_UNUSED) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Avoid AsString as object is not yet in live bitmap or allocation stack.
+    String* string = down_cast<String*>(obj);
+    string->SetCount(count_);
+    uint16_t* value = string->GetValue();
+    const uint8_t* const src = reinterpret_cast<uint8_t*>(src_array_->GetData()) + offset_;
+    for (int i = 0; i < count_; i++) {
+      value[i] = high_byte_ + (src[i] & 0xFF);
+    }
+  }
+
+ private:
+  const int32_t count_;
+  Handle<ByteArray> src_array_;
+  const int32_t offset_;
+  const int32_t high_byte_;
+};
+
+// Sets string count and value in the allocation code path to ensure it is guarded by a CAS.
+class SetStringCountAndValueVisitorFromCharArray {
+ public:
+  SetStringCountAndValueVisitorFromCharArray(int32_t count, Handle<CharArray> src_array,
+                                             int32_t offset) :
+    count_(count), src_array_(src_array), offset_(offset) {
+  }
+
+  void operator()(Object* obj, size_t usable_size ATTRIBUTE_UNUSED) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Avoid AsString as object is not yet in live bitmap or allocation stack.
+    String* string = down_cast<String*>(obj);
+    string->SetCount(count_);
+    const uint16_t* const src = src_array_->GetData() + offset_;
+    memcpy(string->GetValue(), src, count_ * sizeof(uint16_t));
+  }
+
+ private:
+  const int32_t count_;
+  Handle<CharArray> src_array_;
+  const int32_t offset_;
+};
+
+// Sets string count and value in the allocation code path to ensure it is guarded by a CAS.
+class SetStringCountAndValueVisitorFromString {
+ public:
+  SetStringCountAndValueVisitorFromString(int32_t count, Handle<String> src_string,
+                                          int32_t offset) :
+    count_(count), src_string_(src_string), offset_(offset) {
+  }
+
+  void operator()(Object* obj, size_t usable_size ATTRIBUTE_UNUSED) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Avoid AsString as object is not yet in live bitmap or allocation stack.
+    String* string = down_cast<String*>(obj);
+    string->SetCount(count_);
+    const uint16_t* const src = src_string_->GetValue() + offset_;
+    memcpy(string->GetValue(), src, count_ * sizeof(uint16_t));
+  }
+
+ private:
+  const int32_t count_;
+  Handle<String> src_string_;
+  const int32_t offset_;
+};
 
 inline String* String::Intern() {
   return Runtime::Current()->GetInternTable()->InternWeak(this);
 }
 
+inline uint16_t String::CharAt(int32_t index) {
+  int32_t count = GetField32(OFFSET_OF_OBJECT_MEMBER(String, count_));
+  if (UNLIKELY((index < 0) || (index >= count))) {
+    Thread* self = Thread::Current();
+    self->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
+                             "length=%i; index=%i", count, index);
+    return 0;
+  }
+  return GetValue()[index];
+}
+
+template<VerifyObjectFlags kVerifyFlags>
+inline size_t String::SizeOf() {
+  return sizeof(String) + (sizeof(uint16_t) * GetLength<kVerifyFlags>());
+}
+
+template <bool kIsInstrumented, typename PreFenceVisitor>
+inline String* String::Alloc(Thread* self, int32_t utf16_length, gc::AllocatorType allocator_type,
+                             const PreFenceVisitor& pre_fence_visitor) {
+  size_t header_size = sizeof(String);
+  size_t data_size = sizeof(uint16_t) * utf16_length;
+  size_t size = header_size + data_size;
+  Class* string_class = GetJavaLangString();
+
+  // Check for overflow and throw OutOfMemoryError if this was an unreasonable request.
+  if (UNLIKELY(size < data_size)) {
+    self->ThrowOutOfMemoryError(StringPrintf("%s of length %d would overflow",
+                                             PrettyDescriptor(string_class).c_str(),
+                                             utf16_length).c_str());
+    return nullptr;
+  }
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  return down_cast<String*>(
+      heap->AllocObjectWithAllocator<kIsInstrumented, false>(self, string_class, size,
+                                                             allocator_type, pre_fence_visitor));
+}
+
+template <bool kIsInstrumented>
+inline String* String::AllocFromByteArray(Thread* self, int32_t byte_length,
+                                          Handle<ByteArray> array, int32_t offset,
+                                          int32_t high_byte, gc::AllocatorType allocator_type) {
+  SetStringCountAndBytesVisitor visitor(byte_length, array, offset, high_byte << 8);
+  String* string = Alloc<kIsInstrumented>(self, byte_length, allocator_type, visitor);
+  return string;
+}
+
+template <bool kIsInstrumented>
+inline String* String::AllocFromCharArray(Thread* self, int32_t array_length,
+                                          Handle<CharArray> array, int32_t offset,
+                                          gc::AllocatorType allocator_type) {
+  SetStringCountAndValueVisitorFromCharArray visitor(array_length, array, offset);
+  String* new_string = Alloc<kIsInstrumented>(self, array_length, allocator_type, visitor);
+  return new_string;
+}
+
+template <bool kIsInstrumented>
+inline String* String::AllocFromString(Thread* self, int32_t string_length, Handle<String> string,
+                                       int32_t offset, gc::AllocatorType allocator_type) {
+  SetStringCountAndValueVisitorFromString visitor(string_length, string, offset);
+  String* new_string = Alloc<kIsInstrumented>(self, string_length, allocator_type, visitor);
+  return new_string;
+}
+
 inline int32_t String::GetHashCode() {
   int32_t result = GetField32(OFFSET_OF_OBJECT_MEMBER(String, hash_code_));
   if (UNLIKELY(result == 0)) {
     result = ComputeHashCode();
   }
-  DCHECK(result != 0 || ComputeUtf16Hash(GetCharArray(), GetOffset(), GetLength()) == 0)
+  DCHECK(result != 0 || ComputeUtf16Hash(GetValue(), GetLength()) == 0)
       << ToModifiedUtf8() << " " << result;
   return result;
 }
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index b7fd240..b6236b1 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -20,10 +20,11 @@
 #include "array.h"
 #include "class-inl.h"
 #include "gc/accounting/card_table-inl.h"
+#include "handle_scope-inl.h"
 #include "intern_table.h"
 #include "object-inl.h"
 #include "runtime.h"
-#include "handle_scope-inl.h"
+#include "string-inl.h"
 #include "thread.h"
 #include "utf-inl.h"
 
@@ -40,7 +41,7 @@
   } else if (start > count) {
     start = count;
   }
-  const uint16_t* chars = GetCharArray()->GetData() + GetOffset();
+  const uint16_t* chars = GetValue();
   const uint16_t* p = chars + start;
   const uint16_t* end = chars + count;
   while (p < end) {
@@ -62,36 +63,46 @@
   java_lang_String_ = GcRoot<Class>(nullptr);
 }
 
-int32_t String::ComputeHashCode() {
-  const int32_t hash_code = ComputeUtf16Hash(GetCharArray(), GetOffset(), GetLength());
+int String::ComputeHashCode() {
+  const int32_t hash_code = ComputeUtf16Hash(GetValue(), GetLength());
   SetHashCode(hash_code);
   return hash_code;
 }
 
 int32_t String::GetUtfLength() {
-  return CountUtf8Bytes(GetCharArray()->GetData() + GetOffset(), GetLength());
+  return CountUtf8Bytes(GetValue(), GetLength());
 }
 
-String* String::AllocFromUtf16(Thread* self,
-                               int32_t utf16_length,
-                               const uint16_t* utf16_data_in,
-                               int32_t hash_code) {
+void String::SetCharAt(int32_t index, uint16_t c) {
+  DCHECK((index >= 0) && (index < count_));
+  GetValue()[index] = c;
+}
+
+String* String::AllocFromStrings(Thread* self, Handle<String> string, Handle<String> string2) {
+  int32_t length = string->GetLength();
+  int32_t length2 = string2->GetLength();
+  gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+  SetStringCountVisitor visitor(length + length2);
+  String* new_string = Alloc<true>(self, length + length2, allocator_type, visitor);
+  if (UNLIKELY(new_string == nullptr)) {
+    return nullptr;
+  }
+  uint16_t* new_value = new_string->GetValue();
+  memcpy(new_value, string->GetValue(), length * sizeof(uint16_t));
+  memcpy(new_value + length, string2->GetValue(), length2 * sizeof(uint16_t));
+  return new_string;
+}
+
+String* String::AllocFromUtf16(Thread* self, int32_t utf16_length, const uint16_t* utf16_data_in) {
   CHECK(utf16_data_in != nullptr || utf16_length == 0);
-  String* string = Alloc(self, utf16_length);
+  gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+  SetStringCountVisitor visitor(utf16_length);
+  String* string = Alloc<true>(self, utf16_length, allocator_type, visitor);
   if (UNLIKELY(string == nullptr)) {
     return nullptr;
   }
-  CharArray* array = const_cast<CharArray*>(string->GetCharArray());
-  if (UNLIKELY(array == nullptr)) {
-    return nullptr;
-  }
-  memcpy(array->GetData(), utf16_data_in, utf16_length * sizeof(uint16_t));
-  if (hash_code != 0) {
-    DCHECK_EQ(hash_code, ComputeUtf16Hash(utf16_data_in, utf16_length));
-    string->SetHashCode(hash_code);
-  } else {
-    string->ComputeHashCode();
-  }
+  uint16_t* array = string->GetValue();
+  memcpy(array, utf16_data_in, utf16_length * sizeof(uint16_t));
   return string;
 }
 
@@ -103,33 +114,14 @@
 
 String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length,
                                       const char* utf8_data_in) {
-  String* string = Alloc(self, utf16_length);
+  gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+  SetStringCountVisitor visitor(utf16_length);
+  String* string = Alloc<true>(self, utf16_length, allocator_type, visitor);
   if (UNLIKELY(string == nullptr)) {
     return nullptr;
   }
-  uint16_t* utf16_data_out =
-      const_cast<uint16_t*>(string->GetCharArray()->GetData());
+  uint16_t* utf16_data_out = string->GetValue();
   ConvertModifiedUtf8ToUtf16(utf16_data_out, utf8_data_in);
-  string->ComputeHashCode();
-  return string;
-}
-
-String* String::Alloc(Thread* self, int32_t utf16_length) {
-  StackHandleScope<1> hs(self);
-  Handle<CharArray> array(hs.NewHandle(CharArray::Alloc(self, utf16_length)));
-  if (UNLIKELY(array.Get() == nullptr)) {
-    return nullptr;
-  }
-  return Alloc(self, array);
-}
-
-String* String::Alloc(Thread* self, Handle<CharArray> array) {
-  // Hold reference in case AllocObject causes GC.
-  String* string = down_cast<String*>(GetJavaLangString()->AllocObject(self));
-  if (LIKELY(string != nullptr)) {
-    string->SetArray(array.Get());
-    string->SetCount(array->GetLength());
-  }
   return string;
 }
 
@@ -147,7 +139,7 @@
     // Note: don't short circuit on hash code as we're presumably here as the
     // hash code was already equal
     for (int32_t i = 0; i < that->GetLength(); ++i) {
-      if (this->UncheckedCharAt(i) != that->UncheckedCharAt(i)) {
+      if (this->CharAt(i) != that->CharAt(i)) {
         return false;
       }
     }
@@ -160,7 +152,7 @@
     return false;
   } else {
     for (int32_t i = 0; i < that_length; ++i) {
-      if (this->UncheckedCharAt(i) != that_chars[that_offset + i]) {
+      if (this->CharAt(i) != that_chars[that_offset + i]) {
         return false;
       }
     }
@@ -177,7 +169,7 @@
       return false;
     }
 
-    if (GetLeadingUtf16Char(ch) != UncheckedCharAt(i++)) {
+    if (GetLeadingUtf16Char(ch) != CharAt(i++)) {
       return false;
     }
 
@@ -187,7 +179,7 @@
         return false;
       }
 
-      if (UncheckedCharAt(i++) != trailing) {
+      if (CharAt(i++) != trailing) {
         return false;
       }
     }
@@ -201,7 +193,7 @@
   for (int32_t i = 0; i < length; ++i) {
     uint32_t ch = GetUtf16FromUtf8(&p);
 
-    if (GetLeadingUtf16Char(ch) != UncheckedCharAt(i)) {
+    if (GetLeadingUtf16Char(ch) != CharAt(i)) {
       return false;
     }
 
@@ -211,7 +203,7 @@
         return false;
       }
 
-      if (UncheckedCharAt(++i) != trailing) {
+      if (CharAt(++i) != trailing) {
         return false;
       }
     }
@@ -221,7 +213,7 @@
 
 // Create a modified UTF-8 encoded std::string from a java/lang/String object.
 std::string String::ToModifiedUtf8() {
-  const uint16_t* chars = GetCharArray()->GetData() + GetOffset();
+  const uint16_t* chars = GetValue();
   size_t byte_count = GetUtfLength();
   std::string result(byte_count, static_cast<char>(0));
   ConvertUtf16ToModifiedUtf8(&result[0], chars, GetLength());
@@ -244,8 +236,8 @@
   int32_t rhsCount = rhs->GetLength();
   int32_t countDiff = lhsCount - rhsCount;
   int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
-  const uint16_t* lhsChars = lhs->GetCharArray()->GetData() + lhs->GetOffset();
-  const uint16_t* rhsChars = rhs->GetCharArray()->GetData() + rhs->GetOffset();
+  const uint16_t* lhsChars = lhs->GetValue();
+  const uint16_t* rhsChars = rhs->GetValue();
   int32_t otherRes = MemCmp16(lhsChars, rhsChars, minCount);
   if (otherRes != 0) {
     return otherRes;
@@ -257,5 +249,19 @@
   java_lang_String_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
+CharArray* String::ToCharArray(Thread* self) {
+  StackHandleScope<1> hs(self);
+  Handle<String> string(hs.NewHandle(this));
+  CharArray* result = CharArray::Alloc(self, GetLength());
+  memcpy(result->GetData(), string->GetValue(), string->GetLength() * sizeof(uint16_t));
+  return result;
+}
+
+void String::GetChars(int32_t start, int32_t end, Handle<CharArray> array, int32_t index) {
+  uint16_t* data = array->GetData() + index;
+  uint16_t* value = GetValue() + start;
+  memcpy(data, value, (end - start) * sizeof(uint16_t));
+}
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 0670d0b..fcfe976 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_MIRROR_STRING_H_
 
 #include "gc_root.h"
+#include "gc/allocator_type.h"
 #include "object.h"
 #include "object_callbacks.h"
 
@@ -45,22 +46,27 @@
   }
 
   static MemberOffset ValueOffset() {
-    return OFFSET_OF_OBJECT_MEMBER(String, array_);
+    return OFFSET_OF_OBJECT_MEMBER(String, value_);
   }
 
-  static MemberOffset OffsetOffset() {
-    return OFFSET_OF_OBJECT_MEMBER(String, offset_);
+  uint16_t* GetValue() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return &value_[0];
   }
 
-  CharArray* GetCharArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  size_t SizeOf() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  int32_t GetOffset() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    int32_t result = GetField32(OffsetOffset());
-    DCHECK_LE(0, result);
-    return result;
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  int32_t GetLength() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(String, count_));
   }
 
-  int32_t GetLength() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetCount(int32_t new_count) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Count is invariant so use non-transactional mode. Also disable check as we may run inside
+    // a transaction.
+    DCHECK_LE(0, new_count);
+    SetField32<false, false>(OFFSET_OF_OBJECT_MEMBER(String, count_), new_count);
+  }
 
   int32_t GetHashCode() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -69,19 +75,47 @@
 
   int32_t GetUtfLength() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  uint16_t CharAt(int32_t index) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void SetCharAt(int32_t index, uint16_t c) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   String* Intern() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static String* AllocFromUtf16(Thread* self,
-                                int32_t utf16_length,
-                                const uint16_t* utf16_data_in,
-                                int32_t hash_code = 0)
+  template <bool kIsInstrumented, typename PreFenceVisitor>
+  ALWAYS_INLINE static String* Alloc(Thread* self, int32_t utf16_length,
+                                     gc::AllocatorType allocator_type,
+                                     const PreFenceVisitor& pre_fence_visitor)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template <bool kIsInstrumented>
+  ALWAYS_INLINE static String* AllocFromByteArray(Thread* self, int32_t byte_length,
+                                                  Handle<ByteArray> array, int32_t offset,
+                                                  int32_t high_byte,
+                                                  gc::AllocatorType allocator_type)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template <bool kIsInstrumented>
+  ALWAYS_INLINE static String* AllocFromCharArray(Thread* self, int32_t array_length,
+                                                  Handle<CharArray> array, int32_t offset,
+                                                  gc::AllocatorType allocator_type)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template <bool kIsInstrumented>
+  ALWAYS_INLINE static String* AllocFromString(Thread* self, int32_t string_length,
+                                               Handle<String> string, int32_t offset,
+                                               gc::AllocatorType allocator_type)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static String* AllocFromStrings(Thread* self, Handle<String> string, Handle<String> string2)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static String* AllocFromUtf16(Thread* self, int32_t utf16_length, const uint16_t* utf16_data_in)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static String* AllocFromModifiedUtf8(Thread* self, const char* utf)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static String* AllocFromModifiedUtf8(Thread* self, int32_t utf16_length,
-                                       const char* utf8_data_in)
+  static String* AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, const char* utf8_data_in)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // TODO: This is only used in the interpreter to compare against
@@ -112,13 +146,10 @@
 
   int32_t CompareTo(String* other) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetOffset(int32_t new_offset) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    // Offset is only used during testing so use non-transactional mode.
-    DCHECK_LE(0, new_offset);
-    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(String, offset_), new_offset);
-  }
+  CharArray* ToCharArray(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetArray(CharArray* new_array) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void GetChars(int32_t start, int32_t end, Handle<CharArray> array, int32_t index)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static Class* GetJavaLangString() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(!java_lang_String_.IsNull());
@@ -130,9 +161,6 @@
   static void VisitRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // TODO: Make this private. It's only used on ObjectTest at the moment.
-  uint16_t UncheckedCharAt(int32_t index) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
  private:
   void SetHashCode(int32_t new_hash_code) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Hash code is invariant so use non-transactional mode. Also disable check as we may run inside
@@ -141,27 +169,12 @@
     SetField32<false, false>(OFFSET_OF_OBJECT_MEMBER(String, hash_code_), new_hash_code);
   }
 
-  void SetCount(int32_t new_count) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    // Count is invariant so use non-transactional mode. Also disable check as we may run inside
-    // a transaction.
-    DCHECK_LE(0, new_count);
-    SetField32<false, false>(OFFSET_OF_OBJECT_MEMBER(String, count_), new_count);
-  }
-
-  static String* Alloc(Thread* self, int32_t utf16_length)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  static String* Alloc(Thread* self, Handle<CharArray> array)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
-  HeapReference<CharArray> array_;
-
   int32_t count_;
 
   uint32_t hash_code_;
 
-  int32_t offset_;
+  uint16_t value_[0];
 
   static GcRoot<Class> java_lang_String_;
 
diff --git a/runtime/mirror/throwable.cc b/runtime/mirror/throwable.cc
index ca94644..782b9c0 100644
--- a/runtime/mirror/throwable.cc
+++ b/runtime/mirror/throwable.cc
@@ -115,10 +115,14 @@
       } else {
         for (int32_t i = 0; i < ste_array->GetLength(); ++i) {
           StackTraceElement* ste = ste_array->Get(i);
-          result += StringPrintf("  at %s (%s:%d)\n",
-                                 ste->GetMethodName()->ToModifiedUtf8().c_str(),
-                                 ste->GetFileName()->ToModifiedUtf8().c_str(),
-                                 ste->GetLineNumber());
+          DCHECK(ste != nullptr);
+          auto* method_name = ste->GetMethodName();
+          auto* file_name = ste->GetFileName();
+          result += StringPrintf(
+              "  at %s (%s:%d)\n",
+              method_name != nullptr ? method_name->ToModifiedUtf8().c_str() : "<unknown method>",
+              file_name != nullptr ? file_name->ToModifiedUtf8().c_str() : "(Unknown Source)",
+              ste->GetLineNumber());
         }
       }
     } else {
diff --git a/runtime/modifiers.h b/runtime/modifiers.h
index e7bd207..8586dd1 100644
--- a/runtime/modifiers.h
+++ b/runtime/modifiers.h
@@ -65,6 +65,8 @@
 static constexpr uint32_t kAccClassIsFinalizerReference = 0x02000000;
 // class is a phantom reference
 static constexpr uint32_t kAccClassIsPhantomReference   = 0x01000000;
+// class is the string class
+static constexpr uint32_t kAccClassIsStringClass        = 0x00800000;
 
 static constexpr uint32_t kAccReferenceFlagsMask = (kAccClassIsReference
                                                   | kAccClassIsWeakReference
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 95e4460..b7245c1 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -141,6 +141,10 @@
     CHECK_EQ(error, 0) << strerror(error);
     return result;
   }
+
+  void operator delete(void* ptr) {
+    free(ptr);
+  }
 #endif
 
  private:
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 53bb129..9736e15 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -223,7 +223,7 @@
 }
 
 static void VMRuntime_concurrentGC(JNIEnv* env, jobject) {
-  Runtime::Current()->GetHeap()->ConcurrentGC(ThreadForEnv(env));
+  Runtime::Current()->GetHeap()->ConcurrentGC(ThreadForEnv(env), true);
 }
 
 static void VMRuntime_requestHeapTrim(JNIEnv* env, jobject) {
@@ -231,7 +231,7 @@
 }
 
 static void VMRuntime_requestConcurrentGC(JNIEnv* env, jobject) {
-  Runtime::Current()->GetHeap()->RequestConcurrentGC(ThreadForEnv(env));
+  Runtime::Current()->GetHeap()->RequestConcurrentGC(ThreadForEnv(env), true);
 }
 
 static void VMRuntime_startHeapTaskProcessor(JNIEnv* env, jobject) {
diff --git a/runtime/native/dalvik_system_VMStack.cc b/runtime/native/dalvik_system_VMStack.cc
index 17fbc4f..1d7d853 100644
--- a/runtime/native/dalvik_system_VMStack.cc
+++ b/runtime/native/dalvik_system_VMStack.cc
@@ -84,7 +84,8 @@
 static jobject VMStack_getClosestUserClassLoader(JNIEnv* env, jclass) {
   struct ClosestUserClassLoaderVisitor : public StackVisitor {
     explicit ClosestUserClassLoaderVisitor(Thread* thread)
-      : StackVisitor(thread, nullptr), class_loader(nullptr) {}
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        class_loader(nullptr) {}
 
     bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
       DCHECK(class_loader == nullptr);
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index af01a02..1a7a3e5 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -65,6 +65,7 @@
     DEBUG_ENABLE_SAFEMODE           = 1 << 3,
     DEBUG_ENABLE_JNI_LOGGING        = 1 << 4,
     DEBUG_ENABLE_JIT                = 1 << 5,
+    DEBUG_GENERATE_CFI              = 1 << 6,
   };
 
   Runtime* const runtime = Runtime::Current();
@@ -111,6 +112,12 @@
   }
   runtime->GetJITOptions()->SetUseJIT(use_jit);
 
+  const bool generate_cfi = (debug_flags & DEBUG_GENERATE_CFI) != 0;
+  if (generate_cfi) {
+    runtime->AddCompilerOption("--include-cfi");
+    debug_flags &= ~DEBUG_GENERATE_CFI;
+  }
+
   // This is for backwards compatibility with Dalvik.
   debug_flags &= ~DEBUG_ENABLE_ASSERT;
 
@@ -145,6 +152,7 @@
   if (Trace::GetMethodTracingMode() != TracingMode::kTracingInactive) {
     Trace::TraceOutputMode output_mode = Trace::GetOutputMode();
     Trace::TraceMode trace_mode = Trace::GetMode();
+    size_t buffer_size = Trace::GetBufferSize();
 
     // Just drop it.
     Trace::Abort();
@@ -169,7 +177,7 @@
                                               proc_name.c_str());
         Trace::Start(trace_file.c_str(),
                      -1,
-                     -1,  // TODO: Expose buffer size.
+                     buffer_size,
                      0,   // TODO: Expose flags.
                      output_mode,
                      trace_mode,
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index b0d923b..795a0ea 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -29,6 +29,7 @@
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/string-inl.h"
+#include "reflection.h"
 #include "scoped_thread_state_change.h"
 #include "scoped_fast_native_object_access.h"
 #include "ScopedLocalRef.h"
@@ -191,7 +192,7 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   size_t low = 0;
   size_t high = num_fields;
-  const uint16_t* const data = name->GetCharArray()->GetData() + name->GetOffset();
+  const uint16_t* const data = name->GetValue();
   const size_t length = name->GetLength();
   while (low < high) {
     auto mid = (low + high) / 2;
@@ -251,7 +252,7 @@
     std::string name_str = name_string->ToModifiedUtf8();
     // We may have a pending exception if we failed to resolve.
     if (!soa.Self()->IsExceptionPending()) {
-      soa.Self()->ThrowNewException("Ljava/lang/NoSuchFieldException;", name_str.c_str());
+      ThrowNoSuchFieldException(DecodeClass(soa, javaThis), name_str.c_str());
     }
     return nullptr;
   }
@@ -391,8 +392,8 @@
       nullptr;
 }
 
-jobjectArray Class_getDeclaredMethodsUnchecked(JNIEnv* env, jobject javaThis,
-                                               jboolean publicOnly) {
+static jobjectArray Class_getDeclaredMethodsUnchecked(JNIEnv* env, jobject javaThis,
+                                                      jboolean publicOnly) {
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<5> hs(soa.Self());
   auto* klass = DecodeClass(soa, javaThis);
@@ -457,6 +458,85 @@
   return soa.AddLocalReference<jobjectArray>(ret.Get());
 }
 
+static jobject Class_newInstance(JNIEnv* env, jobject javaThis) {
+  ScopedFastNativeObjectAccess soa(env);
+  StackHandleScope<4> hs(soa.Self());
+  auto klass = hs.NewHandle(DecodeClass(soa, javaThis));
+  if (UNLIKELY(klass->GetPrimitiveType() != 0 || klass->IsInterface() || klass->IsArrayClass() ||
+               klass->IsAbstract())) {
+    soa.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;",
+                                   "%s cannot be instantiated", PrettyClass(klass.Get()).c_str());
+    return nullptr;
+  }
+  auto caller = hs.NewHandle<mirror::Class>(nullptr);
+  // Verify that we can access the class.
+  if (!klass->IsPublic()) {
+    caller.Assign(GetCallingClass(soa.Self(), 1));
+    if (caller.Get() != nullptr && !caller->CanAccess(klass.Get())) {
+      soa.Self()->ThrowNewExceptionF(
+          "Ljava/lang/IllegalAccessException;", "%s is not accessible from %s",
+          PrettyClass(klass.Get()).c_str(), PrettyClass(caller.Get()).c_str());
+      return nullptr;
+    }
+  }
+  auto* constructor = klass->GetDeclaredConstructor(
+      soa.Self(), NullHandle<mirror::ObjectArray<mirror::Class>>());
+  if (UNLIKELY(constructor == nullptr)) {
+    soa.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;",
+                                   "%s has no zero argument constructor",
+                                   PrettyClass(klass.Get()).c_str());
+    return nullptr;
+  }
+  // Invoke the string allocator to return an empty string for the string class.
+  if (klass->IsStringClass()) {
+    gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+    mirror::SetStringCountVisitor visitor(0);
+    mirror::Object* obj = mirror::String::Alloc<true>(soa.Self(), 0, allocator_type, visitor);
+    if (UNLIKELY(soa.Self()->IsExceptionPending())) {
+      return nullptr;
+    } else {
+      return soa.AddLocalReference<jobject>(obj);
+    }
+  }
+  auto receiver = hs.NewHandle(klass->AllocObject(soa.Self()));
+  if (UNLIKELY(receiver.Get() == nullptr)) {
+    soa.Self()->AssertPendingOOMException();
+    return nullptr;
+  }
+  // Verify that we can access the constructor.
+  auto* declaring_class = constructor->GetDeclaringClass();
+  if (!constructor->IsPublic()) {
+    if (caller.Get() == nullptr) {
+      caller.Assign(GetCallingClass(soa.Self(), 1));
+    }
+    if (UNLIKELY(caller.Get() != nullptr && !VerifyAccess(
+        soa.Self(), receiver.Get(), declaring_class, constructor->GetAccessFlags(),
+        caller.Get()))) {
+      soa.Self()->ThrowNewExceptionF(
+          "Ljava/lang/IllegalAccessException;", "%s is not accessible from %s",
+          PrettyMethod(constructor).c_str(), PrettyClass(caller.Get()).c_str());
+      return nullptr;
+    }
+  }
+  // Ensure that we are initialized.
+  if (UNLIKELY(!declaring_class->IsInitialized())) {
+    if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(
+        soa.Self(), hs.NewHandle(declaring_class), true, true)) {
+      soa.Self()->AssertPendingException();
+      return nullptr;
+    }
+  }
+  // Invoke the constructor.
+  JValue result;
+  uint32_t args[1] = { static_cast<uint32_t>(reinterpret_cast<uintptr_t>(receiver.Get())) };
+  constructor->Invoke(soa.Self(), args, sizeof(args), &result, "V");
+  if (UNLIKELY(soa.Self()->IsExceptionPending())) {
+    return nullptr;
+  }
+  // Constructors are ()V methods, so we shouldn't touch the result of InvokeMethod.
+  return soa.AddLocalReference<jobject>(receiver.Get());
+}
+
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(Class, classForName,
                 "!(Ljava/lang/String;ZLjava/lang/ClassLoader;)Ljava/lang/Class;"),
@@ -474,6 +554,7 @@
   NATIVE_METHOD(Class, getNameNative, "!()Ljava/lang/String;"),
   NATIVE_METHOD(Class, getProxyInterfaces, "!()[Ljava/lang/Class;"),
   NATIVE_METHOD(Class, getPublicDeclaredFields, "!()[Ljava/lang/reflect/Field;"),
+  NATIVE_METHOD(Class, newInstance, "!()Ljava/lang/Object;"),
 };
 
 void register_java_lang_Class(JNIEnv* env) {
diff --git a/runtime/native/java_lang_String.cc b/runtime/native/java_lang_String.cc
index 2d153d4..aa64b79 100644
--- a/runtime/native/java_lang_String.cc
+++ b/runtime/native/java_lang_String.cc
@@ -18,6 +18,9 @@
 
 #include "common_throws.h"
 #include "jni_internal.h"
+#include "mirror/array.h"
+#include "mirror/object-inl.h"
+#include "mirror/string.h"
 #include "mirror/string-inl.h"
 #include "scoped_fast_native_object_access.h"
 #include "scoped_thread_state_change.h"
@@ -26,36 +29,93 @@
 
 namespace art {
 
-static jint String_compareTo(JNIEnv* env, jobject javaThis, jobject javaRhs) {
+static jchar String_charAt(JNIEnv* env, jobject java_this, jint index) {
   ScopedFastNativeObjectAccess soa(env);
-  if (UNLIKELY(javaRhs == nullptr)) {
+  return soa.Decode<mirror::String*>(java_this)->CharAt(index);
+}
+
+static jint String_compareTo(JNIEnv* env, jobject java_this, jobject java_rhs) {
+  ScopedFastNativeObjectAccess soa(env);
+  if (UNLIKELY(java_rhs == nullptr)) {
     ThrowNullPointerException("rhs == null");
     return -1;
   } else {
-    return soa.Decode<mirror::String*>(javaThis)->CompareTo(soa.Decode<mirror::String*>(javaRhs));
+    return soa.Decode<mirror::String*>(java_this)->CompareTo(soa.Decode<mirror::String*>(java_rhs));
   }
 }
 
+static jstring String_concat(JNIEnv* env, jobject java_this, jobject java_string_arg) {
+  ScopedFastNativeObjectAccess soa(env);
+  if (UNLIKELY(java_string_arg == nullptr)) {
+    ThrowNullPointerException("string arg == null");
+    return nullptr;
+  }
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::String> string_this(hs.NewHandle(soa.Decode<mirror::String*>(java_this)));
+  Handle<mirror::String> string_arg(hs.NewHandle(soa.Decode<mirror::String*>(java_string_arg)));
+  int32_t length_this = string_this->GetLength();
+  int32_t length_arg = string_arg->GetLength();
+  if (length_arg > 0 && length_this > 0) {
+    mirror::String* result = mirror::String::AllocFromStrings(soa.Self(), string_this, string_arg);
+    return soa.AddLocalReference<jstring>(result);
+  }
+  jobject string_original = (length_this == 0) ? java_string_arg : java_this;
+  return reinterpret_cast<jstring>(string_original);
+}
+
 static jint String_fastIndexOf(JNIEnv* env, jobject java_this, jint ch, jint start) {
   ScopedFastNativeObjectAccess soa(env);
   // This method does not handle supplementary characters. They're dealt with in managed code.
   DCHECK_LE(ch, 0xffff);
-
-  mirror::String* s = soa.Decode<mirror::String*>(java_this);
-  return s->FastIndexOf(ch, start);
+  return soa.Decode<mirror::String*>(java_this)->FastIndexOf(ch, start);
 }
 
-static jstring String_intern(JNIEnv* env, jobject javaThis) {
+static jstring String_fastSubstring(JNIEnv* env, jobject java_this, jint start, jint length) {
   ScopedFastNativeObjectAccess soa(env);
-  mirror::String* s = soa.Decode<mirror::String*>(javaThis);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::String> string_this(hs.NewHandle(soa.Decode<mirror::String*>(java_this)));
+  gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+  mirror::String* result = mirror::String::AllocFromString<true>(soa.Self(), length, string_this,
+                                                                 start, allocator_type);
+  return soa.AddLocalReference<jstring>(result);
+}
+
+static void String_getCharsNoCheck(JNIEnv* env, jobject java_this, jint start, jint end,
+                                   jcharArray buffer, jint index) {
+  ScopedFastNativeObjectAccess soa(env);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::CharArray> char_array(hs.NewHandle(soa.Decode<mirror::CharArray*>(buffer)));
+  soa.Decode<mirror::String*>(java_this)->GetChars(start, end, char_array, index);
+}
+
+static jstring String_intern(JNIEnv* env, jobject java_this) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::String* s = soa.Decode<mirror::String*>(java_this);
   mirror::String* result = s->Intern();
   return soa.AddLocalReference<jstring>(result);
 }
 
+static void String_setCharAt(JNIEnv* env, jobject java_this, jint index, jchar c) {
+  ScopedFastNativeObjectAccess soa(env);
+  soa.Decode<mirror::String*>(java_this)->SetCharAt(index, c);
+}
+
+static jcharArray String_toCharArray(JNIEnv* env, jobject java_this) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::String* s = soa.Decode<mirror::String*>(java_this);
+  return soa.AddLocalReference<jcharArray>(s->ToCharArray(soa.Self()));
+}
+
 static JNINativeMethod gMethods[] = {
+  NATIVE_METHOD(String, charAt, "!(I)C"),
   NATIVE_METHOD(String, compareTo, "!(Ljava/lang/String;)I"),
+  NATIVE_METHOD(String, concat, "!(Ljava/lang/String;)Ljava/lang/String;"),
   NATIVE_METHOD(String, fastIndexOf, "!(II)I"),
+  NATIVE_METHOD(String, fastSubstring, "!(II)Ljava/lang/String;"),
+  NATIVE_METHOD(String, getCharsNoCheck, "!(II[CI)V"),
   NATIVE_METHOD(String, intern, "!()Ljava/lang/String;"),
+  NATIVE_METHOD(String, setCharAt, "!(IC)V"),
+  NATIVE_METHOD(String, toCharArray, "!()[C"),
 };
 
 void register_java_lang_String(JNIEnv* env) {
diff --git a/runtime/native/java_lang_StringFactory.cc b/runtime/native/java_lang_StringFactory.cc
new file mode 100644
index 0000000..34d6a37
--- /dev/null
+++ b/runtime/native/java_lang_StringFactory.cc
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "java_lang_StringFactory.h"
+
+#include "common_throws.h"
+#include "jni_internal.h"
+#include "mirror/object-inl.h"
+#include "mirror/string.h"
+#include "scoped_fast_native_object_access.h"
+#include "scoped_thread_state_change.h"
+#include "ScopedLocalRef.h"
+#include "ScopedPrimitiveArray.h"
+
+namespace art {
+
+static jstring StringFactory_newStringFromBytes(JNIEnv* env, jclass, jbyteArray java_data,
+                                                jint high, jint offset, jint byte_count) {
+  ScopedFastNativeObjectAccess soa(env);
+  if (UNLIKELY(java_data == nullptr)) {
+    ThrowNullPointerException("data == null");
+    return nullptr;
+  }
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::ByteArray> byte_array(hs.NewHandle(soa.Decode<mirror::ByteArray*>(java_data)));
+  int32_t data_size = byte_array->GetLength();
+  if ((offset | byte_count) < 0 || byte_count > data_size - offset) {
+    soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
+                                   "length=%d; regionStart=%d; regionLength=%d", data_size,
+                                   offset, byte_count);
+    return nullptr;
+  }
+  gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+  mirror::String* result = mirror::String::AllocFromByteArray<true>(soa.Self(), byte_count,
+                                                                    byte_array, offset, high,
+                                                                    allocator_type);
+  return soa.AddLocalReference<jstring>(result);
+}
+
+static jstring StringFactory_newStringFromChars(JNIEnv* env, jclass, jint offset,
+                                                jint char_count, jcharArray java_data) {
+  ScopedFastNativeObjectAccess soa(env);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::CharArray> char_array(hs.NewHandle(soa.Decode<mirror::CharArray*>(java_data)));
+  gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+  mirror::String* result = mirror::String::AllocFromCharArray<true>(soa.Self(), char_count,
+                                                                    char_array, offset,
+                                                                    allocator_type);
+  return soa.AddLocalReference<jstring>(result);
+}
+
+static jstring StringFactory_newStringFromString(JNIEnv* env, jclass, jstring to_copy) {
+  ScopedFastNativeObjectAccess soa(env);
+  if (UNLIKELY(to_copy == nullptr)) {
+    ThrowNullPointerException("toCopy == null");
+    return nullptr;
+  }
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String*>(to_copy)));
+  gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+  mirror::String* result = mirror::String::AllocFromString<true>(soa.Self(), string->GetLength(),
+                                                                 string, 0, allocator_type);
+  return soa.AddLocalReference<jstring>(result);
+}
+
+static JNINativeMethod gMethods[] = {
+  NATIVE_METHOD(StringFactory, newStringFromBytes, "!([BIII)Ljava/lang/String;"),
+  NATIVE_METHOD(StringFactory, newStringFromChars, "!(II[C)Ljava/lang/String;"),
+  NATIVE_METHOD(StringFactory, newStringFromString, "!(Ljava/lang/String;)Ljava/lang/String;"),
+};
+
+void register_java_lang_StringFactory(JNIEnv* env) {
+  REGISTER_NATIVE_METHODS("java/lang/StringFactory");
+}
+
+}  // namespace art
diff --git a/runtime/native/java_lang_StringFactory.h b/runtime/native/java_lang_StringFactory.h
new file mode 100644
index 0000000..c476ad3
--- /dev/null
+++ b/runtime/native/java_lang_StringFactory.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_NATIVE_JAVA_LANG_STRINGFACTORY_H_
+#define ART_RUNTIME_NATIVE_JAVA_LANG_STRINGFACTORY_H_
+
+#include <jni.h>
+
+namespace art {
+
+void register_java_lang_StringFactory(JNIEnv* env);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_NATIVE_JAVA_LANG_STRINGFACTORY_H_
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index be7022e..6569d83 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -84,6 +84,7 @@
     case kWaitingInMainDebuggerLoop:      return kJavaWaiting;
     case kWaitingForDebuggerSuspension:   return kJavaWaiting;
     case kWaitingForDeoptimization:       return kJavaWaiting;
+    case kWaitingForGetObjectsAllocated:  return kJavaWaiting;
     case kWaitingForJniOnLoad:            return kJavaWaiting;
     case kWaitingForSignalCatcherOutput:  return kJavaWaiting;
     case kWaitingInMainSignalCatcherLoop: return kJavaWaiting;
diff --git a/runtime/native/java_lang_reflect_Constructor.cc b/runtime/native/java_lang_reflect_Constructor.cc
index c33f81a..0fd6759 100644
--- a/runtime/native/java_lang_reflect_Constructor.cc
+++ b/runtime/native/java_lang_reflect_Constructor.cc
@@ -29,63 +29,61 @@
 
 namespace art {
 
-static ALWAYS_INLINE inline jobject NewInstanceHelper(
-    JNIEnv* env, jobject javaMethod, jobjectArray javaArgs, size_t num_frames) {
+/*
+ * We can also safely assume the constructor isn't associated
+ * with an interface, array, or primitive class. If this is coming from
+ * native, it is OK to avoid access checks since JNI does not enforce them.
+ */
+static jobject Constructor_newInstance(JNIEnv* env, jobject javaMethod, jobjectArray javaArgs) {
   ScopedFastNativeObjectAccess soa(env);
-  mirror::Method* m = soa.Decode<mirror::Method*>(javaMethod);
+  mirror::Constructor* m = soa.Decode<mirror::Constructor*>(javaMethod);
   StackHandleScope<1> hs(soa.Self());
   Handle<mirror::Class> c(hs.NewHandle(m->GetDeclaringClass()));
   if (UNLIKELY(c->IsAbstract())) {
-    soa.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;",
-                                   "Can't instantiate %s %s",
+    soa.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;", "Can't instantiate %s %s",
                                    c->IsInterface() ? "interface" : "abstract class",
                                    PrettyDescriptor(c.Get()).c_str());
     return nullptr;
   }
-
+  // Verify that we can access the class.
+  if (!m->IsAccessible() && !c->IsPublic()) {
+    auto* caller = GetCallingClass(soa.Self(), 1);
+    // If caller is null, then we called from JNI, just avoid the check since JNI avoids most
+    // access checks anyways. TODO: Investigate if this the correct behavior.
+    if (caller != nullptr && !caller->CanAccess(c.Get())) {
+      soa.Self()->ThrowNewExceptionF(
+          "Ljava/lang/IllegalAccessException;", "%s is not accessible from %s",
+          PrettyClass(c.Get()).c_str(), PrettyClass(caller).c_str());
+      return nullptr;
+    }
+  }
   if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(soa.Self(), c, true, true)) {
     DCHECK(soa.Self()->IsExceptionPending());
     return nullptr;
   }
-
   bool movable = true;
-  if (!kMovingMethods && c->IsArtMethodClass()) {
-    movable = false;
-  } else if (!kMovingClasses && c->IsClassClass()) {
+  if (!kMovingClasses && c->IsClassClass()) {
     movable = false;
   }
+
+  // String constructor is replaced by a StringFactory method in InvokeMethod.
+  if (c->IsStringClass()) {
+    return InvokeMethod(soa, javaMethod, nullptr, javaArgs, 1);
+  }
+
   mirror::Object* receiver =
       movable ? c->AllocObject(soa.Self()) : c->AllocNonMovableObject(soa.Self());
   if (receiver == nullptr) {
     return nullptr;
   }
-
   jobject javaReceiver = soa.AddLocalReference<jobject>(receiver);
-  InvokeMethod(soa, javaMethod, javaReceiver, javaArgs, num_frames);
-
+  InvokeMethod(soa, javaMethod, javaReceiver, javaArgs, 1);
   // Constructors are ()V methods, so we shouldn't touch the result of InvokeMethod.
   return javaReceiver;
 }
 
-/*
- * We get here through Constructor.newInstance().  The Constructor object
- * would not be available if the constructor weren't public (per the
- * definition of Class.getConstructor), so we can skip the method access
- * check.  We can also safely assume the constructor isn't associated
- * with an interface, array, or primitive class.
- */
-static jobject Constructor_newInstance(JNIEnv* env, jobject javaMethod, jobjectArray javaArgs) {
-  return NewInstanceHelper(env, javaMethod, javaArgs, 1);
-}
-
-static jobject Constructor_newInstanceTwoFrames(JNIEnv* env, jobject javaMethod,
-                                                jobjectArray javaArgs) {
-  return NewInstanceHelper(env, javaMethod, javaArgs, 2);
-}
-
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(Constructor, newInstance, "!([Ljava/lang/Object;)Ljava/lang/Object;"),
-  NATIVE_METHOD(Constructor, newInstanceTwoFrames, "!([Ljava/lang/Object;)Ljava/lang/Object;"),
 };
 
 void register_java_lang_reflect_Constructor(JNIEnv* env) {
diff --git a/runtime/native/libcore_util_CharsetUtils.cc b/runtime/native/libcore_util_CharsetUtils.cc
new file mode 100644
index 0000000..1216824
--- /dev/null
+++ b/runtime/native/libcore_util_CharsetUtils.cc
@@ -0,0 +1,264 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni_internal.h"
+#include "mirror/string.h"
+#include "mirror/string-inl.h"
+#include "native/libcore_util_CharsetUtils.h"
+#include "scoped_fast_native_object_access.h"
+#include "ScopedPrimitiveArray.h"
+#include "unicode/utf16.h"
+
+#include <string.h>
+
+namespace art {
+
+/**
+ * Approximates java.lang.UnsafeByteSequence so we don't have to pay the cost of calling back into
+ * Java when converting a char[] to a UTF-8 byte[]. This lets us have UTF-8 conversions slightly
+ * faster than ICU for large char[]s without paying for the NIO overhead with small char[]s.
+ *
+ * We could avoid this by keeping the UTF-8 bytes on the native heap until we're done and only
+ * creating a byte[] on the Java heap when we know how big it needs to be, but one shouldn't lie
+ * to the garbage collector (nor hide potentially large allocations from it).
+ *
+ * Because a call to append might require an allocation, it might fail. Callers should always
+ * check the return value of append.
+ */
+class NativeUnsafeByteSequence {
+ public:
+  explicit NativeUnsafeByteSequence(JNIEnv* env)
+    : mEnv(env), mJavaArray(nullptr), mRawArray(nullptr), mSize(-1), mOffset(0) {
+  }
+
+  ~NativeUnsafeByteSequence() {
+    // Release our pointer to the raw array, copying changes back to the Java heap.
+    if (mRawArray != nullptr) {
+      mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, 0);
+    }
+  }
+
+  bool append(jbyte b) {
+    if (mOffset == mSize && !resize(mSize * 2)) {
+      return false;
+    }
+    mRawArray[mOffset++] = b;
+    return true;
+  }
+
+  bool resize(int newSize) {
+    if (newSize == mSize) {
+      return true;
+    }
+
+    // Allocate a new array.
+    jbyteArray newJavaArray = mEnv->NewByteArray(newSize);
+    if (newJavaArray == nullptr) {
+      return false;
+    }
+    jbyte* newRawArray = mEnv->GetByteArrayElements(newJavaArray, nullptr);
+    if (newRawArray == nullptr) {
+      return false;
+    }
+
+    // Copy data out of the old array and then let go of it.
+    // Note that we may be trimming the array.
+    if (mRawArray != nullptr) {
+      memcpy(newRawArray, mRawArray, mOffset);
+      mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, JNI_ABORT);
+      mEnv->DeleteLocalRef(mJavaArray);
+    }
+
+    // Point ourselves at the new array.
+    mJavaArray = newJavaArray;
+    mRawArray = newRawArray;
+    mSize = newSize;
+    return true;
+  }
+
+  jbyteArray toByteArray() {
+    // Trim any unused space, if necessary.
+    bool okay = resize(mOffset);
+    return okay ? mJavaArray : nullptr;
+  }
+
+ private:
+  JNIEnv* mEnv;
+  jbyteArray mJavaArray;
+  jbyte* mRawArray;
+  jint mSize;
+  jint mOffset;
+
+  // Disallow copy and assignment.
+  NativeUnsafeByteSequence(const NativeUnsafeByteSequence&);
+  void operator=(const NativeUnsafeByteSequence&);
+};
+
+static void CharsetUtils_asciiBytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset,
+                                           jint length, jcharArray javaChars) {
+  ScopedByteArrayRO bytes(env, javaBytes);
+  if (bytes.get() == nullptr) {
+    return;
+  }
+  ScopedCharArrayRW chars(env, javaChars);
+  if (chars.get() == nullptr) {
+    return;
+  }
+
+  const jbyte* src = &bytes[offset];
+  jchar* dst = &chars[0];
+  static const jchar REPLACEMENT_CHAR = 0xfffd;
+  for (int i = length - 1; i >= 0; --i) {
+    jchar ch = static_cast<jchar>(*src++ & 0xff);
+    *dst++ = (ch <= 0x7f) ? ch : REPLACEMENT_CHAR;
+  }
+}
+
+static void CharsetUtils_isoLatin1BytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes,
+                                               jint offset, jint length, jcharArray javaChars) {
+  ScopedByteArrayRO bytes(env, javaBytes);
+  if (bytes.get() == nullptr) {
+    return;
+  }
+  ScopedCharArrayRW chars(env, javaChars);
+  if (chars.get() == nullptr) {
+    return;
+  }
+
+  const jbyte* src = &bytes[offset];
+  jchar* dst = &chars[0];
+  for (int i = length - 1; i >= 0; --i) {
+    *dst++ = static_cast<jchar>(*src++ & 0xff);
+  }
+}
+
+/**
+ * Translates the given characters to US-ASCII or ISO-8859-1 bytes, using the fact that
+ * Unicode code points between U+0000 and U+007f inclusive are identical to US-ASCII, while
+ * U+0000 to U+00ff inclusive are identical to ISO-8859-1.
+ */
+static jbyteArray charsToBytes(JNIEnv* env, jstring java_string, jint offset, jint length,
+                               jchar maxValidChar) {
+  ScopedObjectAccess soa(env);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String*>(java_string)));
+  if (string.Get() == nullptr) {
+    return nullptr;
+  }
+
+  jbyteArray javaBytes = env->NewByteArray(length);
+  ScopedByteArrayRW bytes(env, javaBytes);
+  if (bytes.get() == nullptr) {
+    return nullptr;
+  }
+
+  const jchar* src = &(string->GetValue()[offset]);
+  jbyte* dst = &bytes[0];
+  for (int i = length - 1; i >= 0; --i) {
+    jchar ch = *src++;
+    if (ch > maxValidChar) {
+      ch = '?';
+    }
+    *dst++ = static_cast<jbyte>(ch);
+  }
+
+  return javaBytes;
+}
+
+static jbyteArray CharsetUtils_toAsciiBytes(JNIEnv* env, jclass, jstring java_string, jint offset,
+                                            jint length) {
+    return charsToBytes(env, java_string, offset, length, 0x7f);
+}
+
+static jbyteArray CharsetUtils_toIsoLatin1Bytes(JNIEnv* env, jclass, jstring java_string,
+                                                jint offset, jint length) {
+    return charsToBytes(env, java_string, offset, length, 0xff);
+}
+
+static jbyteArray CharsetUtils_toUtf8Bytes(JNIEnv* env, jclass, jstring java_string, jint offset,
+                                           jint length) {
+  ScopedObjectAccess soa(env);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String*>(java_string)));
+  if (string.Get() == nullptr) {
+    return nullptr;
+  }
+
+  NativeUnsafeByteSequence out(env);
+  if (!out.resize(length)) {
+    return nullptr;
+  }
+
+  const int end = offset + length;
+  for (int i = offset; i < end; ++i) {
+    jint ch = string->CharAt(i);
+    if (ch < 0x80) {
+      // One byte.
+      if (!out.append(ch)) {
+        return nullptr;
+      }
+    } else if (ch < 0x800) {
+      // Two bytes.
+      if (!out.append((ch >> 6) | 0xc0) || !out.append((ch & 0x3f) | 0x80)) {
+        return nullptr;
+      }
+    } else if (U16_IS_SURROGATE(ch)) {
+      // A supplementary character.
+      jchar high = static_cast<jchar>(ch);
+      jchar low = (i + 1 != end) ? string->CharAt(i + 1) : 0;
+      if (!U16_IS_SURROGATE_LEAD(high) || !U16_IS_SURROGATE_TRAIL(low)) {
+        if (!out.append('?')) {
+          return nullptr;
+        }
+        continue;
+      }
+      // Now we know we have a *valid* surrogate pair, we can consume the low surrogate.
+      ++i;
+      ch = U16_GET_SUPPLEMENTARY(high, low);
+      // Four bytes.
+      jbyte b1 = (ch >> 18) | 0xf0;
+      jbyte b2 = ((ch >> 12) & 0x3f) | 0x80;
+      jbyte b3 = ((ch >> 6) & 0x3f) | 0x80;
+      jbyte b4 = (ch & 0x3f) | 0x80;
+      if (!out.append(b1) || !out.append(b2) || !out.append(b3) || !out.append(b4)) {
+        return nullptr;
+      }
+    } else {
+      // Three bytes.
+      jbyte b1 = (ch >> 12) | 0xe0;
+      jbyte b2 = ((ch >> 6) & 0x3f) | 0x80;
+      jbyte b3 = (ch & 0x3f) | 0x80;
+      if (!out.append(b1) || !out.append(b2) || !out.append(b3)) {
+        return nullptr;
+      }
+    }
+  }
+  return out.toByteArray();
+}
+
+static JNINativeMethod gMethods[] = {
+  NATIVE_METHOD(CharsetUtils, asciiBytesToChars, "!([BII[C)V"),
+  NATIVE_METHOD(CharsetUtils, isoLatin1BytesToChars, "!([BII[C)V"),
+  NATIVE_METHOD(CharsetUtils, toAsciiBytes, "!(Ljava/lang/String;II)[B"),
+  NATIVE_METHOD(CharsetUtils, toIsoLatin1Bytes, "!(Ljava/lang/String;II)[B"),
+  NATIVE_METHOD(CharsetUtils, toUtf8Bytes, "!(Ljava/lang/String;II)[B"),
+};
+
+void register_libcore_util_CharsetUtils(JNIEnv* env) {
+  REGISTER_NATIVE_METHODS("libcore/util/CharsetUtils");
+}
+
+}  // namespace art
diff --git a/runtime/native/libcore_util_CharsetUtils.h b/runtime/native/libcore_util_CharsetUtils.h
new file mode 100644
index 0000000..3518bdb
--- /dev/null
+++ b/runtime/native/libcore_util_CharsetUtils.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_NATIVE_LIBCORE_UTIL_CHARSETUTILS_H_
+#define ART_RUNTIME_NATIVE_LIBCORE_UTIL_CHARSETUTILS_H_
+
+#include <jni.h>
+
+namespace art {
+
+void register_libcore_util_CharsetUtils(JNIEnv* env);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_NATIVE_LIBCORE_UTIL_CHARSETUTILS_H_
diff --git a/runtime/nth_caller_visitor.h b/runtime/nth_caller_visitor.h
index 632ccde..d2d7fa8 100644
--- a/runtime/nth_caller_visitor.h
+++ b/runtime/nth_caller_visitor.h
@@ -27,8 +27,11 @@
 // Walks up the stack 'n' callers, when used with Thread::WalkStack.
 struct NthCallerVisitor : public StackVisitor {
   NthCallerVisitor(Thread* thread, size_t n_in, bool include_runtime_and_upcalls = false)
-      : StackVisitor(thread, nullptr), n(n_in),
-        include_runtime_and_upcalls_(include_runtime_and_upcalls), count(0), caller(nullptr) {}
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        n(n_in),
+        include_runtime_and_upcalls_(include_runtime_and_upcalls),
+        count(0),
+        caller(nullptr) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
diff --git a/runtime/oat.h b/runtime/oat.h
index a31e09a..aaf442a 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '6', '1', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '6', '2', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index 37e85ab..d07c09c 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -96,9 +96,8 @@
 
 OatFileAssistant::~OatFileAssistant() {
   // Clean up the lock file.
-  if (lock_file_.get() != nullptr) {
-    lock_file_->Erase();
-    TEMP_FAILURE_RETRY(unlink(lock_file_->GetPath().c_str()));
+  if (flock_.HasFile()) {
+    TEMP_FAILURE_RETRY(unlink(flock_.GetFile()->GetPath().c_str()));
   }
 }
 
@@ -121,7 +120,7 @@
 
 bool OatFileAssistant::Lock(std::string* error_msg) {
   CHECK(error_msg != nullptr);
-  CHECK(lock_file_.get() == nullptr) << "OatFileAssistant::Lock already acquired";
+  CHECK(!flock_.HasFile()) << "OatFileAssistant::Lock already acquired";
 
   if (OatFileName() == nullptr) {
     *error_msg = "Failed to determine lock file";
@@ -129,13 +128,7 @@
   }
   std::string lock_file_name = *OatFileName() + ".flock";
 
-  lock_file_.reset(OS::CreateEmptyFile(lock_file_name.c_str()));
-  if (lock_file_.get() == nullptr) {
-    *error_msg = "Failed to create lock file " + lock_file_name;
-    return false;
-  }
-
-  if (!flock_.Init(lock_file_.get(), error_msg)) {
+  if (!flock_.Init(lock_file_name.c_str(), error_msg)) {
     TEMP_FAILURE_RETRY(unlink(lock_file_name.c_str()));
     return false;
   }
@@ -230,8 +223,8 @@
   dex_files.push_back(std::move(dex_file));
 
   // Load secondary multidex files
-  for (int i = 1; ; i++) {
-    std::string secondary_dex_location = DexFile::GetMultiDexClassesDexName(i, dex_location);
+  for (size_t i = 1; ; i++) {
+    std::string secondary_dex_location = DexFile::GetMultiDexLocation(i, dex_location);
     oat_dex_file = oat_file.GetOatDexFile(secondary_dex_location.c_str(), nullptr, false);
     if (oat_dex_file == nullptr) {
       // There are no more secondary dex files to load.
@@ -403,9 +396,9 @@
   }
 
   // Verify the dex checksums for any secondary multidex files
-  for (int i = 1; ; i++) {
+  for (size_t i = 1; ; i++) {
     std::string secondary_dex_location
-      = DexFile::GetMultiDexClassesDexName(i, dex_location_);
+      = DexFile::GetMultiDexLocation(i, dex_location_);
     const OatFile::OatDexFile* secondary_oat_dex_file
       = file.GetOatDexFile(secondary_dex_location.c_str(), nullptr, false);
     if (secondary_oat_dex_file == nullptr) {
@@ -465,7 +458,7 @@
 
   const ImageInfo* image_info = GetImageInfo();
   if (image_info == nullptr) {
-    VLOG(oat) << "No image for to check oat relocation against.";
+    VLOG(oat) << "No image to check oat relocation against.";
     return false;
   }
 
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index a25ee31..4c0b0e2 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -353,7 +353,6 @@
   // To implement Lock(), we lock a dummy file where the oat file would go
   // (adding ".flock" to the target file name) and retain the lock for the
   // remaining lifetime of the OatFileAssistant object.
-  std::unique_ptr<File> lock_file_;
   ScopedFlock flock_;
 
   // In a properly constructed OatFileAssistant object, dex_location_ should
@@ -405,9 +404,9 @@
   bool cached_oat_file_name_found_;
   std::string cached_oat_file_name_;
 
-  // Cached value of the loaded odex file.
+  // Cached value of the loaded oat file.
   // Use the GetOatFile method rather than accessing this directly, unless you
-  // know the odex file isn't out of date.
+  // know the oat file isn't out of date.
   bool oat_file_load_attempted_ = false;
   std::unique_ptr<OatFile> cached_oat_file_;
 
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index 3f6b2d2..865fcb0 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -29,6 +29,7 @@
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "compiler_callbacks.h"
+#include "gc/space/image_space.h"
 #include "mem_map.h"
 #include "os.h"
 #include "scoped_thread_state_change.h"
@@ -610,10 +611,23 @@
   // Things aren't relocated, so it should fall back to interpreted.
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
+
   EXPECT_FALSE(oat_file->IsExecutable());
   std::vector<std::unique_ptr<const DexFile>> dex_files;
   dex_files = oat_file_assistant.LoadDexFiles(*oat_file, dex_location.c_str());
   EXPECT_EQ(1u, dex_files.size());
+
+  // Add some extra checks to help diagnose apparently flaky test failures.
+  Runtime* runtime = Runtime::Current();
+  const gc::space::ImageSpace* image_space = runtime->GetHeap()->GetImageSpace();
+  ASSERT_TRUE(image_space != nullptr);
+  const ImageHeader& image_header = image_space->GetImageHeader();
+  const OatHeader& oat_header = oat_file->GetOatHeader();
+  EXPECT_FALSE(oat_file->IsPic());
+  EXPECT_EQ(image_header.GetOatChecksum(), oat_header.GetImageFileLocationOatChecksum());
+  EXPECT_NE(reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin()),
+      oat_header.GetImageFileLocationOatDataBegin());
+  EXPECT_NE(image_header.GetPatchDelta(), oat_header.GetImagePatchDelta());
 }
 
 // Case: We have a DEX file and a PIC ODEX file, but no OAT file.
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
index 90a47b3..3b0e6c1 100644
--- a/runtime/profiler.cc
+++ b/runtime/profiler.cc
@@ -58,8 +58,10 @@
   BoundedStackVisitor(std::vector<std::pair<mirror::ArtMethod*, uint32_t>>* stack,
       Thread* thread, uint32_t max_depth)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, nullptr), stack_(stack), max_depth_(max_depth), depth_(0) {
-  }
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        stack_(stack),
+        max_depth_(max_depth),
+        depth_(0) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
@@ -300,7 +302,9 @@
   } while (length > 0);
 
   // Truncate the file to the new length.
-  ftruncate(fd, full_length);
+  if (ftruncate(fd, full_length) == -1) {
+    LOG(ERROR) << "Failed to truncate profile file " << full_name;
+  }
 
   // Now unlock the file, allowing another process in.
   err = flock(fd, LOCK_UN);
diff --git a/runtime/quick/inline_method_analyser.cc b/runtime/quick/inline_method_analyser.cc
index 9cf4b16..1c404ff 100644
--- a/runtime/quick/inline_method_analyser.cc
+++ b/runtime/quick/inline_method_analyser.cc
@@ -134,7 +134,10 @@
 bool InlineMethodAnalyser::IsSyntheticAccessor(MethodReference ref) {
   const DexFile::MethodId& method_id = ref.dex_file->GetMethodId(ref.dex_method_index);
   const char* method_name = ref.dex_file->GetMethodName(method_id);
-  return strncmp(method_name, "access$", strlen("access$")) == 0;
+  // javac names synthetic accessors "access$nnn",
+  // jack names them "-getN", "-putN", "-wrapN".
+  return strncmp(method_name, "access$", strlen("access$")) == 0 ||
+      strncmp(method_name, "-", strlen("-")) == 0;
 }
 
 bool InlineMethodAnalyser::AnalyseReturnMethod(const DexFile::CodeItem* code_item,
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index 3463025..0d39e22 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -56,8 +56,12 @@
   kIntrinsicReferenceGetReferent,
   kIntrinsicCharAt,
   kIntrinsicCompareTo,
+  kIntrinsicGetCharsNoCheck,
   kIntrinsicIsEmptyOrLength,
   kIntrinsicIndexOf,
+  kIntrinsicNewStringFromBytes,
+  kIntrinsicNewStringFromChars,
+  kIntrinsicNewStringFromString,
   kIntrinsicCurrentThread,
   kIntrinsicPeek,
   kIntrinsicPoke,
@@ -71,6 +75,7 @@
   kInlineOpNonWideConst,
   kInlineOpIGet,
   kInlineOpIPut,
+  kInlineStringInit,
 };
 std::ostream& operator<<(std::ostream& os, const InlineMethodOpcode& rhs);
 
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 2432603..730759a 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -46,7 +46,9 @@
   CatchBlockStackVisitor(Thread* self, Context* context, Handle<mirror::Throwable>* exception,
                          QuickExceptionHandler* exception_handler)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(self, context), self_(self), exception_(exception),
+      : StackVisitor(self, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        self_(self),
+        exception_(exception),
         exception_handler_(exception_handler) {
   }
 
@@ -160,7 +162,9 @@
  public:
   DeoptimizeStackVisitor(Thread* self, Context* context, QuickExceptionHandler* exception_handler)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(self, context), self_(self), exception_handler_(exception_handler),
+      : StackVisitor(self, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        self_(self),
+        exception_handler_(exception_handler),
         prev_shadow_frame_(nullptr) {
     CHECK(!self_->HasDeoptimizationShadowFrame());
   }
@@ -202,7 +206,8 @@
                                       h_method, m->GetAccessFlags(), true, true, true, true);
     bool verifier_success = verifier.Verify();
     CHECK(verifier_success) << PrettyMethod(h_method.Get());
-    ShadowFrame* new_frame = ShadowFrame::Create(num_regs, nullptr, h_method.Get(), dex_pc);
+    ShadowFrame* new_frame = ShadowFrame::CreateDeoptimizedFrame(
+        num_regs, nullptr, h_method.Get(), dex_pc);
     self_->SetShadowFrameUnderConstruction(new_frame);
     const std::vector<int32_t> kinds(verifier.DescribeVRegs(dex_pc));
 
@@ -222,7 +227,10 @@
           break;
         case kReferenceVReg: {
           uint32_t value = 0;
-          if (GetVReg(h_method.Get(), reg, kind, &value)) {
+          // Check IsReferenceVReg in case the compiled GC map doesn't agree with the verifier.
+          // We don't want to copy a stale reference into the shadow frame as a reference.
+          // b/20736048
+          if (GetVReg(h_method.Get(), reg, kind, &value) && IsReferenceVReg(h_method.Get(), reg)) {
             new_frame->SetVRegReference(reg, reinterpret_cast<mirror::Object*>(value));
           } else {
             new_frame->SetVReg(reg, kDeadValue);
@@ -334,7 +342,7 @@
  public:
   InstrumentationStackVisitor(Thread* self, size_t frame_depth)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(self, nullptr),
+      : StackVisitor(self, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
         frame_depth_(frame_depth),
         instrumentation_frames_to_pop_(0) {
     CHECK_NE(frame_depth_, kInvalidFrameDepth);
@@ -345,7 +353,12 @@
     if (current_frame_depth < frame_depth_) {
       CHECK(GetMethod() != nullptr);
       if (UNLIKELY(reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()) == GetReturnPc())) {
-        ++instrumentation_frames_to_pop_;
+        if (!IsInInlinedFrame()) {
+          // We do not count inlined frames, because we do not instrument them. The reason we
+          // include them in the stack walking is the check against `frame_depth_`, which is
+          // given to us by a visitor that visits inlined frames.
+          ++instrumentation_frames_to_pop_;
+        }
       }
       return true;
     } else {
diff --git a/runtime/read_barrier_c.h b/runtime/read_barrier_c.h
index a2c4c36..88bda3a 100644
--- a/runtime/read_barrier_c.h
+++ b/runtime/read_barrier_c.h
@@ -26,9 +26,9 @@
 // table-lookup read barriers.
 
 #ifdef ART_USE_READ_BARRIER
-// #define USE_BAKER_READ_BARRIER
+#define USE_BAKER_READ_BARRIER
 // #define USE_BROOKS_READ_BARRIER
-#define USE_TABLE_LOOKUP_READ_BARRIER
+// #define USE_TABLE_LOOKUP_READ_BARRIER
 #endif
 
 #if defined(USE_BAKER_READ_BARRIER) || defined(USE_BROOKS_READ_BARRIER)
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index 3099094..49e1b8e 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -520,23 +520,6 @@
   return result;
 }
 
-void InvokeWithShadowFrame(Thread* self, ShadowFrame* shadow_frame, uint16_t arg_offset,
-                           JValue* result) {
-  // We want to make sure that the stack is not within a small distance from the
-  // protected region in case we are calling into a leaf function whose stack
-  // check has been elided.
-  if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEnd())) {
-    ThrowStackOverflowError(self);
-    return;
-  }
-  uint32_t shorty_len;
-  const char* shorty = shadow_frame->GetMethod()->GetShorty(&shorty_len);
-  ArgArray arg_array(shorty, shorty_len);
-  arg_array.BuildArgArrayFromFrame(shadow_frame, arg_offset);
-  shadow_frame->GetMethod()->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), result,
-                                    shorty);
-}
-
 jobject InvokeMethod(const ScopedObjectAccessAlreadyRunnable& soa, jobject javaMethod,
                      jobject javaReceiver, jobject javaArgs, size_t num_frames) {
   // We want to make sure that the stack is not within a small distance from the
@@ -564,14 +547,21 @@
 
   mirror::Object* receiver = nullptr;
   if (!m->IsStatic()) {
-    // Check that the receiver is non-null and an instance of the field's declaring class.
-    receiver = soa.Decode<mirror::Object*>(javaReceiver);
-    if (!VerifyObjectIsClass(receiver, declaring_class)) {
-      return nullptr;
-    }
+    // Replace calls to String.<init> with equivalent StringFactory call.
+    if (declaring_class->IsStringClass() && m->IsConstructor()) {
+      jmethodID mid = soa.EncodeMethod(m);
+      m = soa.DecodeMethod(WellKnownClasses::StringInitToStringFactoryMethodID(mid));
+      CHECK(javaReceiver == nullptr);
+    } else {
+      // Check that the receiver is non-null and an instance of the field's declaring class.
+      receiver = soa.Decode<mirror::Object*>(javaReceiver);
+      if (!VerifyObjectIsClass(receiver, declaring_class)) {
+        return nullptr;
+      }
 
-    // Find the actual implementation of the virtual method.
-    m = receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(m);
+      // Find the actual implementation of the virtual method.
+      m = receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(m);
+    }
   }
 
   // Get our arrays of arguments and their types, and check they're the same size.
@@ -799,40 +789,48 @@
   return UnboxPrimitive(o, dst_class, f, unboxed_value);
 }
 
-bool UnboxPrimitiveForResult(mirror::Object* o,
-                             mirror::Class* dst_class, JValue* unboxed_value) {
+bool UnboxPrimitiveForResult(mirror::Object* o, mirror::Class* dst_class, JValue* unboxed_value) {
   return UnboxPrimitive(o, dst_class, nullptr, unboxed_value);
 }
 
+mirror::Class* GetCallingClass(Thread* self, size_t num_frames) {
+  NthCallerVisitor visitor(self, num_frames);
+  visitor.WalkStack();
+  return visitor.caller != nullptr ? visitor.caller->GetDeclaringClass() : nullptr;
+}
+
 bool VerifyAccess(Thread* self, mirror::Object* obj, mirror::Class* declaring_class,
                   uint32_t access_flags, mirror::Class** calling_class, size_t num_frames) {
   if ((access_flags & kAccPublic) != 0) {
     return true;
   }
-  NthCallerVisitor visitor(self, num_frames);
-  visitor.WalkStack();
-  if (UNLIKELY(visitor.caller == nullptr)) {
+  auto* klass = GetCallingClass(self, num_frames);
+  if (UNLIKELY(klass == nullptr)) {
     // The caller is an attached native thread.
     return false;
   }
-  mirror::Class* caller_class = visitor.caller->GetDeclaringClass();
-  if (caller_class == declaring_class) {
+  *calling_class = klass;
+  return VerifyAccess(self, obj, declaring_class, access_flags, klass);
+}
+
+bool VerifyAccess(Thread* self, mirror::Object* obj, mirror::Class* declaring_class,
+                  uint32_t access_flags, mirror::Class* calling_class) {
+  if (calling_class == declaring_class) {
     return true;
   }
   ScopedAssertNoThreadSuspension sants(self, "verify-access");
-  *calling_class = caller_class;
   if ((access_flags & kAccPrivate) != 0) {
     return false;
   }
   if ((access_flags & kAccProtected) != 0) {
-    if (obj != nullptr && !obj->InstanceOf(caller_class) &&
-        !declaring_class->IsInSamePackage(caller_class)) {
+    if (obj != nullptr && !obj->InstanceOf(calling_class) &&
+        !declaring_class->IsInSamePackage(calling_class)) {
       return false;
-    } else if (declaring_class->IsAssignableFrom(caller_class)) {
+    } else if (declaring_class->IsAssignableFrom(calling_class)) {
       return true;
     }
   }
-  return declaring_class->IsInSamePackage(caller_class);
+  return declaring_class->IsInSamePackage(calling_class);
 }
 
 void InvalidReceiverError(mirror::Object* o, mirror::Class* c) {
diff --git a/runtime/reflection.h b/runtime/reflection.h
index c63f858..37f8a6a 100644
--- a/runtime/reflection.h
+++ b/runtime/reflection.h
@@ -61,10 +61,6 @@
                                            jobject obj, jmethodID mid, va_list args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-void InvokeWithShadowFrame(Thread* self, ShadowFrame* shadow_frame, uint16_t arg_offset,
-                           JValue* result)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
 // num_frames is number of frames we look up for access check.
 jobject InvokeMethod(const ScopedObjectAccessAlreadyRunnable& soa, jobject method, jobject receiver,
                      jobject args, size_t num_frames = 1)
@@ -77,6 +73,15 @@
                   uint32_t access_flags, mirror::Class** calling_class, size_t num_frames)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+// This version takes a known calling class.
+bool VerifyAccess(Thread* self, mirror::Object* obj, mirror::Class* declaring_class,
+                  uint32_t access_flags, mirror::Class* calling_class)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+// Get the calling class by using a stack visitor, may return null for unattached native threads.
+mirror::Class* GetCallingClass(Thread* self, size_t num_frames)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
 void InvalidReceiverError(mirror::Object* o, mirror::Class* c)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 48bca62..2633898 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -91,6 +91,7 @@
 #include "native/java_lang_Object.h"
 #include "native/java_lang_Runtime.h"
 #include "native/java_lang_String.h"
+#include "native/java_lang_StringFactory.h"
 #include "native/java_lang_System.h"
 #include "native/java_lang_Thread.h"
 #include "native/java_lang_Throwable.h"
@@ -103,6 +104,7 @@
 #include "native/java_lang_reflect_Method.h"
 #include "native/java_lang_reflect_Proxy.h"
 #include "native/java_util_concurrent_atomic_AtomicLong.h"
+#include "native/libcore_util_CharsetUtils.h"
 #include "native/org_apache_harmony_dalvik_ddmc_DdmServer.h"
 #include "native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.h"
 #include "native/sun_misc_Unsafe.h"
@@ -1170,11 +1172,13 @@
   register_java_lang_ref_Reference(env);
   register_java_lang_Runtime(env);
   register_java_lang_String(env);
+  register_java_lang_StringFactory(env);
   register_java_lang_System(env);
   register_java_lang_Thread(env);
   register_java_lang_Throwable(env);
   register_java_lang_VMClassLoader(env);
   register_java_util_concurrent_atomic_AtomicLong(env);
+  register_libcore_util_CharsetUtils(env);
   register_org_apache_harmony_dalvik_ddmc_DdmServer(env);
   register_org_apache_harmony_dalvik_ddmc_DdmVmInternal(env);
   register_sun_misc_Unsafe(env);
@@ -1562,14 +1566,15 @@
   // Throwing an exception may cause its class initialization. If we mark the transaction
   // aborted before that, we may warn with a false alarm. Throwing the exception before
   // marking the transaction aborted avoids that.
-  preinitialization_transaction_->ThrowAbortError(self, false);
+  preinitialization_transaction_->ThrowAbortError(self, &abort_message);
   preinitialization_transaction_->Abort(abort_message);
 }
 
 void Runtime::ThrowTransactionAbortError(Thread* self) {
   DCHECK(IsAotCompiler());
   DCHECK(IsActiveTransaction());
-  preinitialization_transaction_->ThrowAbortError(self, true);
+  // Passing nullptr means we rethrow an exception with the earlier transaction abort message.
+  preinitialization_transaction_->ThrowAbortError(self, nullptr);
 }
 
 void Runtime::RecordWriteFieldBoolean(mirror::Object* obj, MemberOffset field_offset,
diff --git a/runtime/runtime.h b/runtime/runtime.h
index c35f4ca..348d5c6 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -31,6 +31,7 @@
 #include "gc_root.h"
 #include "instrumentation.h"
 #include "jobject_comparator.h"
+#include "method_reference.h"
 #include "object_callbacks.h"
 #include "offsets.h"
 #include "profiler_options.h"
@@ -86,6 +87,8 @@
 class Transaction;
 
 typedef std::vector<std::pair<std::string, const void*>> RuntimeOptions;
+typedef SafeMap<MethodReference, SafeMap<uint32_t, std::set<uint32_t>>,
+    MethodReferenceComparator> MethodRefToStringInitRegMap;
 
 // Not all combinations of flags are valid. You may not visit all roots as well as the new roots
 // (no logical reason to do this). You also may not start logging new roots and stop logging new
@@ -558,6 +561,10 @@
     return jit_options_.get();
   }
 
+  MethodRefToStringInitRegMap& GetStringInitMap() {
+    return method_ref_string_init_reg_map_;
+  }
+
  private:
   static void InitPlatformSignalHandlers();
 
@@ -737,6 +744,8 @@
   // zygote.
   uint32_t zygote_max_failed_boots_;
 
+  MethodRefToStringInitRegMap method_ref_string_init_reg_map_;
+
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
 std::ostream& operator<<(std::ostream& os, const Runtime::CalleeSaveType& rhs);
diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h
index b93fcb4..60ed55a 100644
--- a/runtime/scoped_thread_state_change.h
+++ b/runtime/scoped_thread_state_change.h
@@ -133,11 +133,8 @@
   T AddLocalReference(mirror::Object* obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
-    if (obj == nullptr) {
-      return nullptr;
-    }
-    DCHECK_NE((reinterpret_cast<uintptr_t>(obj) & 0xffff0000), 0xebad0000);
-    return Env()->AddLocalReference<T>(obj);
+    DCHECK_NE(obj, Runtime::Current()->GetClearedJniWeakGlobal());
+    return obj == nullptr ? nullptr : Env()->AddLocalReference<T>(obj);
   }
 
   template<typename T>
diff --git a/runtime/stack.cc b/runtime/stack.cc
index aa3e320..6795516 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -19,6 +19,7 @@
 #include "arch/context.h"
 #include "base/hex_dump.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
+#include "gc_map.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object.h"
@@ -84,25 +85,59 @@
   return false;
 }
 
-StackVisitor::StackVisitor(Thread* thread, Context* context)
-    : thread_(thread), cur_shadow_frame_(nullptr),
-      cur_quick_frame_(nullptr), cur_quick_frame_pc_(0), num_frames_(0), cur_depth_(0),
+StackVisitor::StackVisitor(Thread* thread, Context* context, StackWalkKind walk_kind)
+    : StackVisitor(thread, context, walk_kind, 0) {}
+
+StackVisitor::StackVisitor(Thread* thread,
+                           Context* context,
+                           StackWalkKind walk_kind,
+                           size_t num_frames)
+    : thread_(thread),
+      walk_kind_(walk_kind),
+      cur_shadow_frame_(nullptr),
+      cur_quick_frame_(nullptr),
+      cur_quick_frame_pc_(0),
+      num_frames_(num_frames),
+      cur_depth_(0),
+      current_inlining_depth_(0),
       context_(context) {
   DCHECK(thread == Thread::Current() || thread->IsSuspended()) << *thread;
 }
 
-StackVisitor::StackVisitor(Thread* thread, Context* context, size_t num_frames)
-    : thread_(thread), cur_shadow_frame_(nullptr),
-      cur_quick_frame_(nullptr), cur_quick_frame_pc_(0), num_frames_(num_frames), cur_depth_(0),
-      context_(context) {
-  DCHECK(thread == Thread::Current() || thread->IsSuspended()) << *thread;
+InlineInfo StackVisitor::GetCurrentInlineInfo() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  mirror::ArtMethod* outer_method = GetCurrentQuickFrame()->AsMirrorPtr();
+  uint32_t native_pc_offset = outer_method->NativeQuickPcOffset(cur_quick_frame_pc_);
+  CodeInfo code_info = outer_method->GetOptimizedCodeInfo();
+  StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset);
+  return code_info.GetInlineInfoOf(stack_map);
+}
+
+mirror::ArtMethod* StackVisitor::GetMethod() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  if (cur_shadow_frame_ != nullptr) {
+    return cur_shadow_frame_->GetMethod();
+  } else if (cur_quick_frame_ != nullptr) {
+    if (IsInInlinedFrame()) {
+      size_t depth_in_stack_map = current_inlining_depth_ - 1;
+      return GetCurrentQuickFrame()->AsMirrorPtr()->GetDexCacheResolvedMethod(
+          GetCurrentInlineInfo().GetMethodIndexAtDepth(depth_in_stack_map));
+    } else {
+      return cur_quick_frame_->AsMirrorPtr();
+    }
+  } else {
+    return nullptr;
+  }
 }
 
 uint32_t StackVisitor::GetDexPc(bool abort_on_failure) const {
   if (cur_shadow_frame_ != nullptr) {
     return cur_shadow_frame_->GetDexPC();
   } else if (cur_quick_frame_ != nullptr) {
-    return GetMethod()->ToDexPc(cur_quick_frame_pc_, abort_on_failure);
+    if (IsInInlinedFrame()) {
+      size_t depth_in_stack_map = current_inlining_depth_ - 1;
+      return GetCurrentInlineInfo().GetDexPcAtDepth(depth_in_stack_map);
+    } else {
+      return GetMethod()->ToDexPc(cur_quick_frame_pc_, abort_on_failure);
+    }
   } else {
     return 0;
   }
@@ -151,6 +186,33 @@
   return GetMethod()->NativeQuickPcOffset(cur_quick_frame_pc_);
 }
 
+bool StackVisitor::IsReferenceVReg(mirror::ArtMethod* m, uint16_t vreg) {
+  // Process register map (which native and runtime methods don't have)
+  if (m->IsNative() || m->IsRuntimeMethod() || m->IsProxyMethod()) {
+    return false;
+  }
+  if (m->IsOptimized(sizeof(void*))) {
+    return true;  // TODO: Implement.
+  }
+  const uint8_t* native_gc_map = m->GetNativeGcMap(sizeof(void*));
+  CHECK(native_gc_map != nullptr) << PrettyMethod(m);
+  const DexFile::CodeItem* code_item = m->GetCodeItem();
+  // Can't be null or how would we compile its instructions?
+  DCHECK(code_item != nullptr) << PrettyMethod(m);
+  NativePcOffsetToReferenceMap map(native_gc_map);
+  size_t num_regs = std::min(map.RegWidth() * 8, static_cast<size_t>(code_item->registers_size_));
+  const uint8_t* reg_bitmap = nullptr;
+  if (num_regs > 0) {
+    Runtime* runtime = Runtime::Current();
+    const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(m, sizeof(void*));
+    uintptr_t native_pc_offset = m->NativeQuickPcOffset(GetCurrentQuickFramePc(), entry_point);
+    reg_bitmap = map.FindBitMap(native_pc_offset);
+    DCHECK(reg_bitmap != nullptr);
+  }
+  // Does this register hold a reference?
+  return vreg < num_regs && TestBitmap(vreg, reg_bitmap);
+}
+
 bool StackVisitor::GetVReg(mirror::ArtMethod* m, uint16_t vreg, VRegKind kind,
                            uint32_t* val) const {
   if (cur_quick_frame_ != nullptr) {
@@ -193,18 +255,27 @@
 
 bool StackVisitor::GetVRegFromOptimizedCode(mirror::ArtMethod* m, uint16_t vreg, VRegKind kind,
                                             uint32_t* val) const {
-  const void* code_pointer = m->GetQuickOatCodePointer(sizeof(void*));
-  DCHECK(code_pointer != nullptr);
-  uint32_t native_pc_offset = m->NativeQuickPcOffset(cur_quick_frame_pc_);
-  CodeInfo code_info = m->GetOptimizedCodeInfo();
-  StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset);
+  DCHECK_EQ(m, GetMethod());
   const DexFile::CodeItem* code_item = m->GetCodeItem();
   DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be null or how would we compile
                                                     // its instructions?
-  DCHECK_LT(vreg, code_item->registers_size_);
   uint16_t number_of_dex_registers = code_item->registers_size_;
-  DexRegisterMap dex_register_map =
-      code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
+  DCHECK_LT(vreg, code_item->registers_size_);
+
+  mirror::ArtMethod* outer_method = GetCurrentQuickFrame()->AsMirrorPtr();
+  const void* code_pointer = outer_method->GetQuickOatCodePointer(sizeof(void*));
+  DCHECK(code_pointer != nullptr);
+  CodeInfo code_info = outer_method->GetOptimizedCodeInfo();
+
+  uint32_t native_pc_offset = outer_method->NativeQuickPcOffset(cur_quick_frame_pc_);
+  StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset);
+  size_t depth_in_stack_map = current_inlining_depth_ - 1;
+
+  DexRegisterMap dex_register_map = IsInInlinedFrame()
+      ? code_info.GetDexRegisterMapAtDepth(
+            depth_in_stack_map, code_info.GetInlineInfoOf(stack_map), number_of_dex_registers)
+      : code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
+
   DexRegisterLocation::Kind location_kind =
       dex_register_map.GetLocationKind(vreg, number_of_dex_registers, code_info);
   switch (location_kind) {
@@ -345,7 +416,7 @@
       DCHECK(context_ != nullptr);  // You can't reliably write registers without a context.
       DCHECK(m == GetMethod());
       if (m->IsOptimized(sizeof(void*))) {
-        return SetVRegFromOptimizedCode(m, vreg, new_value, kind);
+        return false;
       } else {
         return SetVRegFromQuickCode(m, vreg, new_value, kind);
       }
@@ -382,57 +453,6 @@
   }
 }
 
-bool StackVisitor::SetVRegFromOptimizedCode(mirror::ArtMethod* m, uint16_t vreg, uint32_t new_value,
-                                            VRegKind kind) {
-  const void* code_pointer = m->GetQuickOatCodePointer(sizeof(void*));
-  DCHECK(code_pointer != nullptr);
-  uint32_t native_pc_offset = m->NativeQuickPcOffset(cur_quick_frame_pc_);
-  CodeInfo code_info = m->GetOptimizedCodeInfo();
-  StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset);
-  const DexFile::CodeItem* code_item = m->GetCodeItem();
-  DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be null or how would we compile
-                                                    // its instructions?
-  uint16_t number_of_dex_registers = code_item->registers_size_;
-  DCHECK_LT(vreg, number_of_dex_registers);
-  DexRegisterMap dex_register_map =
-      code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
-  DexRegisterLocation::Kind location_kind =
-      dex_register_map.GetLocationKind(vreg, number_of_dex_registers, code_info);
-  uint32_t dex_pc = m->ToDexPc(cur_quick_frame_pc_, false);
-  switch (location_kind) {
-    case DexRegisterLocation::Kind::kInStack: {
-      const int32_t offset =
-          dex_register_map.GetStackOffsetInBytes(vreg, number_of_dex_registers, code_info);
-      uint8_t* addr = reinterpret_cast<uint8_t*>(cur_quick_frame_) + offset;
-      *reinterpret_cast<uint32_t*>(addr) = new_value;
-      return true;
-    }
-    case DexRegisterLocation::Kind::kInRegister:
-    case DexRegisterLocation::Kind::kInFpuRegister: {
-      uint32_t reg = dex_register_map.GetMachineRegister(vreg, number_of_dex_registers, code_info);
-      return SetRegisterIfAccessible(reg, new_value, kind);
-    }
-    case DexRegisterLocation::Kind::kConstant:
-      LOG(ERROR) << StringPrintf("Cannot change value of DEX register v%u used as a constant at "
-                                 "DEX pc 0x%x (native pc 0x%x) of method %s",
-                                 vreg, dex_pc, native_pc_offset,
-                                 PrettyMethod(cur_quick_frame_->AsMirrorPtr()).c_str());
-      return false;
-    case DexRegisterLocation::Kind::kNone:
-      LOG(ERROR) << StringPrintf("No location for DEX register v%u at DEX pc 0x%x "
-                                 "(native pc 0x%x) of method %s",
-                                 vreg, dex_pc, native_pc_offset,
-                                 PrettyMethod(cur_quick_frame_->AsMirrorPtr()).c_str());
-      return false;
-    default:
-      LOG(FATAL) << StringPrintf("Unknown location for DEX register v%u at DEX pc 0x%x "
-                                 "(native pc 0x%x) of method %s",
-                                 vreg, dex_pc, native_pc_offset,
-                                 PrettyMethod(cur_quick_frame_->AsMirrorPtr()).c_str());
-      UNREACHABLE();
-  }
-}
-
 bool StackVisitor::SetRegisterIfAccessible(uint32_t reg, uint32_t new_value, VRegKind kind) {
   const bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
   if (!IsAccessibleRegister(reg, is_float)) {
@@ -477,7 +497,7 @@
     DCHECK(context_ != nullptr);  // You can't reliably write registers without a context.
     DCHECK(m == GetMethod());
     if (m->IsOptimized(sizeof(void*))) {
-      return SetVRegPairFromOptimizedCode(m, vreg, new_value, kind_lo, kind_hi);
+      return false;
     } else {
       return SetVRegPairFromQuickCode(m, vreg, new_value, kind_lo, kind_hi);
     }
@@ -515,15 +535,6 @@
   }
 }
 
-bool StackVisitor::SetVRegPairFromOptimizedCode(
-    mirror::ArtMethod* m, uint16_t vreg, uint64_t new_value, VRegKind kind_lo, VRegKind kind_hi) {
-  uint32_t low_32bits = Low32Bits(new_value);
-  uint32_t high_32bits = High32Bits(new_value);
-  bool success = SetVRegFromOptimizedCode(m, vreg, low_32bits, kind_lo);
-  success &= SetVRegFromOptimizedCode(m, vreg + 1, high_32bits, kind_hi);
-  return success;
-}
-
 bool StackVisitor::SetRegisterPairIfAccessible(uint32_t reg_lo, uint32_t reg_hi,
                                                uint64_t new_value, bool is_float) {
   if (!IsAccessibleRegister(reg_lo, is_float) || !IsAccessibleRegister(reg_hi, is_float)) {
@@ -597,10 +608,10 @@
   *reinterpret_cast<uintptr_t*>(pc_addr) = new_ret_pc;
 }
 
-size_t StackVisitor::ComputeNumFrames(Thread* thread) {
+size_t StackVisitor::ComputeNumFrames(Thread* thread, StackWalkKind walk_kind) {
   struct NumFramesVisitor : public StackVisitor {
-    explicit NumFramesVisitor(Thread* thread_in)
-        : StackVisitor(thread_in, nullptr), frames(0) {}
+    NumFramesVisitor(Thread* thread_in, StackWalkKind walk_kind_in)
+        : StackVisitor(thread_in, nullptr, walk_kind_in), frames(0) {}
 
     bool VisitFrame() OVERRIDE {
       frames++;
@@ -609,16 +620,23 @@
 
     size_t frames;
   };
-  NumFramesVisitor visitor(thread);
+  NumFramesVisitor visitor(thread, walk_kind);
   visitor.WalkStack(true);
   return visitor.frames;
 }
 
 bool StackVisitor::GetNextMethodAndDexPc(mirror::ArtMethod** next_method, uint32_t* next_dex_pc) {
   struct HasMoreFramesVisitor : public StackVisitor {
-    explicit HasMoreFramesVisitor(Thread* thread, size_t num_frames, size_t frame_height)
-        : StackVisitor(thread, nullptr, num_frames), frame_height_(frame_height),
-          found_frame_(false), has_more_frames_(false), next_method_(nullptr), next_dex_pc_(0) {
+    HasMoreFramesVisitor(Thread* thread,
+                         StackWalkKind walk_kind,
+                         size_t num_frames,
+                         size_t frame_height)
+        : StackVisitor(thread, nullptr, walk_kind, num_frames),
+          frame_height_(frame_height),
+          found_frame_(false),
+          has_more_frames_(false),
+          next_method_(nullptr),
+          next_dex_pc_(0) {
     }
 
     bool VisitFrame() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -642,7 +660,7 @@
     mirror::ArtMethod* next_method_;
     uint32_t next_dex_pc_;
   };
-  HasMoreFramesVisitor visitor(thread_, GetNumFrames(), GetFrameHeight());
+  HasMoreFramesVisitor visitor(thread_, walk_kind_, GetNumFrames(), GetFrameHeight());
   visitor.WalkStack(true);
   *next_method = visitor.next_method_;
   *next_dex_pc = visitor.next_dex_pc_;
@@ -652,7 +670,7 @@
 void StackVisitor::DescribeStack(Thread* thread) {
   struct DescribeStackVisitor : public StackVisitor {
     explicit DescribeStackVisitor(Thread* thread_in)
-        : StackVisitor(thread_in, nullptr) {}
+        : StackVisitor(thread_in, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames) {}
 
     bool VisitFrame() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
       LOG(INFO) << "Frame Id=" << GetFrameId() << " " << DescribeLocation();
@@ -725,6 +743,26 @@
       mirror::ArtMethod* method = cur_quick_frame_->AsMirrorPtr();
       while (method != nullptr) {
         SanityCheckFrame();
+
+        if ((walk_kind_ == StackWalkKind::kIncludeInlinedFrames)
+            && method->IsOptimized(sizeof(void*))) {
+          CodeInfo code_info = method->GetOptimizedCodeInfo();
+          uint32_t native_pc_offset = method->NativeQuickPcOffset(cur_quick_frame_pc_);
+          StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset);
+          if (stack_map.HasInlineInfo(code_info)) {
+            InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map);
+            DCHECK_EQ(current_inlining_depth_, 0u);
+            for (current_inlining_depth_ = inline_info.GetDepth();
+                 current_inlining_depth_ != 0;
+                 --current_inlining_depth_) {
+              bool should_continue = VisitFrame();
+              if (UNLIKELY(!should_continue)) {
+                return;
+              }
+            }
+          }
+        }
+
         bool should_continue = VisitFrame();
         if (UNLIKELY(!should_continue)) {
           return;
diff --git a/runtime/stack.h b/runtime/stack.h
index ed9e458..5b43848 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -36,9 +36,10 @@
 }  // namespace mirror
 
 class Context;
-class ShadowFrame;
 class HandleScope;
+class InlineInfo;
 class ScopedObjectAccess;
+class ShadowFrame;
 class StackVisitor;
 class Thread;
 
@@ -74,12 +75,18 @@
   }
 
   // Create ShadowFrame in heap for deoptimization.
-  static ShadowFrame* Create(uint32_t num_vregs, ShadowFrame* link,
-                             mirror::ArtMethod* method, uint32_t dex_pc) {
+  static ShadowFrame* CreateDeoptimizedFrame(uint32_t num_vregs, ShadowFrame* link,
+                                             mirror::ArtMethod* method, uint32_t dex_pc) {
     uint8_t* memory = new uint8_t[ComputeSize(num_vregs)];
     return Create(num_vregs, link, method, dex_pc, memory);
   }
 
+  // Delete a ShadowFrame allocated on the heap for deoptimization.
+  static void DeleteDeoptimizedFrame(ShadowFrame* sf) {
+    uint8_t* memory = reinterpret_cast<uint8_t*>(sf);
+    delete[] memory;
+  }
+
   // Create ShadowFrame for interpreter using provided memory.
   static ShadowFrame* Create(uint32_t num_vregs, ShadowFrame* link,
                              mirror::ArtMethod* method, uint32_t dex_pc, void* memory) {
@@ -403,8 +410,17 @@
 };
 
 class StackVisitor {
+ public:
+  // This enum defines a flag to control whether inlined frames are included
+  // when walking the stack.
+  enum class StackWalkKind {
+    kIncludeInlinedFrames,
+    kSkipInlinedFrames,
+  };
+
  protected:
-  StackVisitor(Thread* thread, Context* context) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  StackVisitor(Thread* thread, Context* context, StackWalkKind walk_kind)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  public:
   virtual ~StackVisitor() {}
@@ -415,15 +431,7 @@
   void WalkStack(bool include_transitions = false)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::ArtMethod* GetMethod() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (cur_shadow_frame_ != nullptr) {
-      return cur_shadow_frame_->GetMethod();
-    } else if (cur_quick_frame_ != nullptr) {
-      return cur_quick_frame_->AsMirrorPtr();
-    } else {
-      return nullptr;
-    }
-  }
+  mirror::ArtMethod* GetMethod() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool IsShadowFrame() const {
     return cur_shadow_frame_ != nullptr;
@@ -459,7 +467,7 @@
 
   size_t GetNumFrames() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (num_frames_ == 0) {
-      num_frames_ = ComputeNumFrames(thread_);
+      num_frames_ = ComputeNumFrames(thread_, walk_kind_);
     }
     return num_frames_;
   }
@@ -472,6 +480,9 @@
   bool GetNextMethodAndDexPc(mirror::ArtMethod** next_method, uint32_t* next_dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  bool IsReferenceVReg(mirror::ArtMethod* m, uint16_t vreg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   bool GetVReg(mirror::ArtMethod* m, uint16_t vreg, VRegKind kind, uint32_t* val) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -592,6 +603,10 @@
     return sizeof(StackReference<mirror::ArtMethod>) + (out_num * sizeof(uint32_t));
   }
 
+  bool IsInInlinedFrame() const {
+    return current_inlining_depth_ != 0;
+  }
+
   uintptr_t GetCurrentQuickFramePc() const {
     return cur_quick_frame_pc_;
   }
@@ -612,13 +627,14 @@
 
   std::string DescribeLocation() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static size_t ComputeNumFrames(Thread* thread) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static size_t ComputeNumFrames(Thread* thread, StackWalkKind walk_kind)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static void DescribeStack(Thread* thread) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
   // Private constructor known in the case that num_frames_ has already been computed.
-  StackVisitor(Thread* thread, Context* context, size_t num_frames)
+  StackVisitor(Thread* thread, Context* context, StackWalkKind walk_kind, size_t num_frames)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool IsAccessibleRegister(uint32_t reg, bool is_float) const {
@@ -668,25 +684,22 @@
   bool SetVRegFromQuickCode(mirror::ArtMethod* m, uint16_t vreg, uint32_t new_value,
                             VRegKind kind)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool SetVRegFromOptimizedCode(mirror::ArtMethod* m, uint16_t vreg, uint32_t new_value,
-                                VRegKind kind)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool SetRegisterIfAccessible(uint32_t reg, uint32_t new_value, VRegKind kind)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool SetVRegPairFromQuickCode(mirror::ArtMethod* m, uint16_t vreg, uint64_t new_value,
                                 VRegKind kind_lo, VRegKind kind_hi)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool SetVRegPairFromOptimizedCode(mirror::ArtMethod* m, uint16_t vreg, uint64_t new_value,
-                                    VRegKind kind_lo, VRegKind kind_hi)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool SetRegisterPairIfAccessible(uint32_t reg_lo, uint32_t reg_hi, uint64_t new_value,
                                    bool is_float)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SanityCheckFrame() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  InlineInfo GetCurrentInlineInfo() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   Thread* const thread_;
+  const StackWalkKind walk_kind_;
   ShadowFrame* cur_shadow_frame_;
   StackReference<mirror::ArtMethod>* cur_quick_frame_;
   uintptr_t cur_quick_frame_pc_;
@@ -694,6 +707,9 @@
   size_t num_frames_;
   // Depth of the frame we're currently at.
   size_t cur_depth_;
+  // Current inlining depth of the method we are currently at.
+  // 0 if there is no inlined frame.
+  size_t current_inlining_depth_;
 
  protected:
   Context* const context_;
diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc
index 11e7e44..6a0c07d 100644
--- a/runtime/stack_map.cc
+++ b/runtime/stack_map.cc
@@ -257,21 +257,48 @@
     DumpStackMapHeader(os, i);
     if (stack_map.HasDexRegisterMap(*this)) {
       DexRegisterMap dex_register_map = GetDexRegisterMapOf(stack_map, number_of_dex_registers);
-      // TODO: Display the bit mask of live Dex registers.
-      for (size_t j = 0; j < number_of_dex_registers; ++j) {
-        if (dex_register_map.IsDexRegisterLive(j)) {
-          size_t location_catalog_entry_index = dex_register_map.GetLocationCatalogEntryIndex(
-              j, number_of_dex_registers, number_of_location_catalog_entries);
-          DexRegisterLocation location =
-              dex_register_map.GetDexRegisterLocation(j, number_of_dex_registers, *this);
-          DumpRegisterMapping(
-              os, j, location, "v",
-              "\t[entry " + std::to_string(static_cast<int>(location_catalog_entry_index)) + "]");
-        }
-      }
+      dex_register_map.Dump(os, *this, number_of_dex_registers);
     }
   }
-  // TODO: Dump the stack map's inline information.
+  // TODO: Dump the stack map's inline information? We need to know more from the caller:
+  //       we need to know the number of dex registers for each inlined method.
+}
+
+void DexRegisterMap::Dump(std::ostream& os,
+                          const CodeInfo& code_info,
+                          uint16_t number_of_dex_registers) const {
+  size_t number_of_location_catalog_entries =
+      code_info.GetNumberOfDexRegisterLocationCatalogEntries();
+  // TODO: Display the bit mask of live Dex registers.
+  for (size_t j = 0; j < number_of_dex_registers; ++j) {
+    if (IsDexRegisterLive(j)) {
+      size_t location_catalog_entry_index = GetLocationCatalogEntryIndex(
+          j, number_of_dex_registers, number_of_location_catalog_entries);
+      DexRegisterLocation location = GetDexRegisterLocation(j, number_of_dex_registers, code_info);
+      DumpRegisterMapping(
+          os, j, location, "v",
+          "\t[entry " + std::to_string(static_cast<int>(location_catalog_entry_index)) + "]");
+    }
+  }
+}
+
+void InlineInfo::Dump(std::ostream& os,
+                      const CodeInfo& code_info,
+                      uint16_t number_of_dex_registers[]) const {
+  os << "InlineInfo with depth " << static_cast<uint32_t>(GetDepth()) << "\n";
+
+  for (size_t i = 0; i < GetDepth(); ++i) {
+    os << " At depth " << i
+       << std::hex
+       << " (dex_pc=0x" << GetDexPcAtDepth(i)
+       << ", method_index=0x" << GetMethodIndexAtDepth(i)
+       << ")\n";
+    if (HasDexRegisterMapAtDepth(i)) {
+      DexRegisterMap dex_register_map =
+          code_info.GetDexRegisterMapAtDepth(i, *this, number_of_dex_registers[i]);
+      dex_register_map.Dump(os, code_info, number_of_dex_registers[i]);
+    }
+  }
 }
 
 }  // namespace art
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index f68cafe..16ae772 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -39,47 +39,6 @@
  * their own fields.
  */
 
-/**
- * Inline information for a specific PC. The information is of the form:
- * [inlining_depth, [method_dex reference]+]
- */
-class InlineInfo {
- public:
-  explicit InlineInfo(MemoryRegion region) : region_(region) {}
-
-  uint8_t GetDepth() const {
-    return region_.LoadUnaligned<uint8_t>(kDepthOffset);
-  }
-
-  void SetDepth(uint8_t depth) {
-    region_.StoreUnaligned<uint8_t>(kDepthOffset, depth);
-  }
-
-  uint32_t GetMethodReferenceIndexAtDepth(uint8_t depth) const {
-    return region_.LoadUnaligned<uint32_t>(kFixedSize + depth * SingleEntrySize());
-  }
-
-  void SetMethodReferenceIndexAtDepth(uint8_t depth, uint32_t index) {
-    region_.StoreUnaligned<uint32_t>(kFixedSize + depth * SingleEntrySize(), index);
-  }
-
-  static size_t SingleEntrySize() {
-    return sizeof(uint32_t);
-  }
-
- private:
-  // TODO: Instead of plain types such as "uint8_t", introduce
-  // typedefs (and document the memory layout of InlineInfo).
-  static constexpr int kDepthOffset = 0;
-  static constexpr int kFixedSize = kDepthOffset + sizeof(uint8_t);
-
-  MemoryRegion region_;
-
-  friend class CodeInfo;
-  friend class StackMap;
-  friend class StackMapStream;
-};
-
 // Dex register location container used by DexRegisterMap and StackMapStream.
 class DexRegisterLocation {
  public:
@@ -506,7 +465,8 @@
                       const CodeInfo& code_info) const {
     DexRegisterLocation location =
         GetDexRegisterLocation(dex_register_number, number_of_dex_registers, code_info);
-    DCHECK(location.GetKind() == DexRegisterLocation::Kind::kConstant);
+    DCHECK(location.GetKind() == DexRegisterLocation::Kind::kConstant)
+        << DexRegisterLocation::PrettyDescriptor(location.GetKind());
     return location.GetValue();
   }
 
@@ -641,6 +601,8 @@
     return region_.size();
   }
 
+  void Dump(std::ostream& o, const CodeInfo& code_info, uint16_t number_of_dex_registers) const;
+
  private:
   // Return the index in the Dex register map corresponding to the Dex
   // register number `dex_register_number`.
@@ -675,9 +637,6 @@
  * The information is of the form:
  * [dex_pc, native_pc_offset, dex_register_map_offset, inlining_info_offset, register_mask,
  * stack_mask].
- *
- * Note that register_mask is fixed size, but stack_mask is variable size, depending on the
- * stack size of a method.
  */
 class StackMap {
  public:
@@ -759,6 +718,72 @@
   friend class StackMapStream;
 };
 
+/**
+ * Inline information for a specific PC. The information is of the form:
+ * [inlining_depth, [dex_pc, method_index, dex_register_map_offset]+]
+ */
+class InlineInfo {
+ public:
+  explicit InlineInfo(MemoryRegion region) : region_(region) {}
+
+  uint8_t GetDepth() const {
+    return region_.LoadUnaligned<uint8_t>(kDepthOffset);
+  }
+
+  void SetDepth(uint8_t depth) {
+    region_.StoreUnaligned<uint8_t>(kDepthOffset, depth);
+  }
+
+  uint32_t GetMethodIndexAtDepth(uint8_t depth) const {
+    return region_.LoadUnaligned<uint32_t>(kFixedSize + depth * SingleEntrySize());
+  }
+
+  void SetMethodIndexAtDepth(uint8_t depth, uint32_t index) {
+    region_.StoreUnaligned<uint32_t>(kFixedSize + depth * SingleEntrySize(), index);
+  }
+
+  uint32_t GetDexPcAtDepth(uint8_t depth) const {
+    return region_.LoadUnaligned<uint32_t>(
+        kFixedSize + depth * SingleEntrySize() + sizeof(uint32_t));
+  }
+
+  void SetDexPcAtDepth(uint8_t depth, uint32_t dex_pc) {
+    region_.StoreUnaligned<uint32_t>(
+        kFixedSize + depth * SingleEntrySize() + sizeof(uint32_t), dex_pc);
+  }
+
+  uint32_t GetDexRegisterMapOffsetAtDepth(uint8_t depth) const {
+    return region_.LoadUnaligned<uint32_t>(
+        kFixedSize + depth * SingleEntrySize() + sizeof(uint32_t) + sizeof(uint32_t));
+  }
+
+  void SetDexRegisterMapOffsetAtDepth(uint8_t depth, uint32_t offset) {
+    region_.StoreUnaligned<uint32_t>(
+        kFixedSize + depth * SingleEntrySize() + sizeof(uint32_t) + sizeof(uint32_t), offset);
+  }
+
+  bool HasDexRegisterMapAtDepth(uint8_t depth) const {
+    return GetDexRegisterMapOffsetAtDepth(depth) != StackMap::kNoDexRegisterMap;
+  }
+
+  static size_t SingleEntrySize() {
+    return sizeof(uint32_t) + sizeof(uint32_t) + sizeof(uint32_t);
+  }
+
+  void Dump(std::ostream& os, const CodeInfo& info, uint16_t* number_of_dex_registers) const;
+
+ private:
+  // TODO: Instead of plain types such as "uint8_t", introduce
+  // typedefs (and document the memory layout of InlineInfo).
+  static constexpr int kDepthOffset = 0;
+  static constexpr int kFixedSize = kDepthOffset + sizeof(uint8_t);
+
+  MemoryRegion region_;
+
+  friend class CodeInfo;
+  friend class StackMap;
+  friend class StackMapStream;
+};
 
 /**
  * Wrapper around all compiler information collected for a method.
@@ -960,6 +985,17 @@
     return DexRegisterMap(region_.Subregion(offset, size));
   }
 
+  // Return the `DexRegisterMap` pointed by `inline_info` at depth `depth`.
+  DexRegisterMap GetDexRegisterMapAtDepth(uint8_t depth,
+                                          InlineInfo inline_info,
+                                          uint32_t number_of_dex_registers) const {
+    DCHECK(inline_info.HasDexRegisterMapAtDepth(depth));
+    uint32_t offset =
+        GetDexRegisterMapsOffset() + inline_info.GetDexRegisterMapOffsetAtDepth(depth);
+    size_t size = ComputeDexRegisterMapSizeOf(offset, number_of_dex_registers);
+    return DexRegisterMap(region_.Subregion(offset, size));
+  }
+
   InlineInfo GetInlineInfoOf(StackMap stack_map) const {
     DCHECK(stack_map.HasInlineInfo(*this));
     uint32_t offset = stack_map.GetInlineDescriptorOffset(*this) + GetDexRegisterMapsOffset();
diff --git a/runtime/thread.cc b/runtime/thread.cc
index b27ad4a..148bb6d 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -105,6 +105,43 @@
                   &tlsPtr_.quick_entrypoints);
 }
 
+void Thread::InitStringEntryPoints() {
+  ScopedObjectAccess soa(this);
+  QuickEntryPoints* qpoints = &tlsPtr_.quick_entrypoints;
+  qpoints->pNewEmptyString = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newEmptyString));
+  qpoints->pNewStringFromBytes_B = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromBytes_B));
+  qpoints->pNewStringFromBytes_BI = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BI));
+  qpoints->pNewStringFromBytes_BII = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BII));
+  qpoints->pNewStringFromBytes_BIII = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BIII));
+  qpoints->pNewStringFromBytes_BIIString = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BIIString));
+  qpoints->pNewStringFromBytes_BString = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BString));
+  qpoints->pNewStringFromBytes_BIICharset = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BIICharset));
+  qpoints->pNewStringFromBytes_BCharset = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BCharset));
+  qpoints->pNewStringFromChars_C = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromChars_C));
+  qpoints->pNewStringFromChars_CII = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromChars_CII));
+  qpoints->pNewStringFromChars_IIC = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromChars_IIC));
+  qpoints->pNewStringFromCodePoints = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromCodePoints));
+  qpoints->pNewStringFromString = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromString));
+  qpoints->pNewStringFromStringBuffer = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromStringBuffer));
+  qpoints->pNewStringFromStringBuilder = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromStringBuilder));
+}
+
 void Thread::ResetQuickAllocEntryPointsForThread() {
   ResetQuickAllocEntryPoints(&tlsPtr_.quick_entrypoints);
 }
@@ -163,6 +200,7 @@
   }
   {
     ScopedObjectAccess soa(self);
+    self->InitStringEntryPoints();
 
     // Copy peer into self, deleting global reference when done.
     CHECK(self->tlsPtr_.jpeer != nullptr);
@@ -409,6 +447,8 @@
     }
   }
 
+  self->InitStringEntryPoints();
+
   CHECK_NE(self->GetState(), kRunnable);
   self->SetState(kNative);
 
@@ -572,13 +612,13 @@
   if (GetThreadId() != 0) {
     // If we're in kStarting, we won't have a thin lock id or tid yet.
     os << GetThreadId()
-             << ",tid=" << GetTid() << ',';
+       << ",tid=" << GetTid() << ',';
   }
   os << GetState()
-           << ",Thread*=" << this
-           << ",peer=" << tlsPtr_.opeer
-           << ",\"" << *tlsPtr_.name << "\""
-           << "]";
+     << ",Thread*=" << this
+     << ",peer=" << tlsPtr_.opeer
+     << ",\"" << (tlsPtr_.name != nullptr ? *tlsPtr_.name : "null") << "\""
+     << "]";
 }
 
 void Thread::Dump(std::ostream& os) const {
@@ -900,10 +940,14 @@
 struct StackDumpVisitor : public StackVisitor {
   StackDumpVisitor(std::ostream& os_in, Thread* thread_in, Context* context, bool can_allocate_in)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread_in, context), os(os_in), thread(thread_in),
-        can_allocate(can_allocate_in), last_method(nullptr), last_line_number(0),
-        repetition_count(0), frame_count(0) {
-  }
+      : StackVisitor(thread_in, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        os(os_in),
+        thread(thread_in),
+        can_allocate(can_allocate_in),
+        last_method(nullptr),
+        last_line_number(0),
+        repetition_count(0),
+        frame_count(0) {}
 
   virtual ~StackDumpVisitor() {
     if (frame_count == 0) {
@@ -1488,7 +1532,7 @@
  public:
   explicit CountStackDepthVisitor(Thread* thread)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, nullptr),
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
         depth_(0), skip_depth_(0), skipping_(true) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -1528,8 +1572,12 @@
 class BuildInternalStackTraceVisitor : public StackVisitor {
  public:
   explicit BuildInternalStackTraceVisitor(Thread* self, Thread* thread, int skip_depth)
-      : StackVisitor(thread, nullptr), self_(self),
-        skip_depth_(skip_depth), count_(0), dex_pc_trace_(nullptr), method_trace_(nullptr) {}
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        self_(self),
+        skip_depth_(skip_depth),
+        count_(0),
+        dex_pc_trace_(nullptr),
+        method_trace_(nullptr) {}
 
   bool Init(int depth)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -1930,6 +1978,9 @@
   QUICK_ENTRY_POINT_INFO(pAllocObjectWithAccessCheck)
   QUICK_ENTRY_POINT_INFO(pCheckAndAllocArray)
   QUICK_ENTRY_POINT_INFO(pCheckAndAllocArrayWithAccessCheck)
+  QUICK_ENTRY_POINT_INFO(pAllocStringFromBytes)
+  QUICK_ENTRY_POINT_INFO(pAllocStringFromChars)
+  QUICK_ENTRY_POINT_INFO(pAllocStringFromString)
   QUICK_ENTRY_POINT_INFO(pInstanceofNonTrivial)
   QUICK_ENTRY_POINT_INFO(pCheckCast)
   QUICK_ENTRY_POINT_INFO(pInitializeStaticStorage)
@@ -2013,6 +2064,23 @@
   QUICK_ENTRY_POINT_INFO(pDeoptimize)
   QUICK_ENTRY_POINT_INFO(pA64Load)
   QUICK_ENTRY_POINT_INFO(pA64Store)
+  QUICK_ENTRY_POINT_INFO(pNewEmptyString)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromBytes_B)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromBytes_BI)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromBytes_BII)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromBytes_BIII)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromBytes_BIIString)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromBytes_BString)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromBytes_BIICharset)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromBytes_BCharset)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromChars_C)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromChars_CII)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromChars_IIC)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromCodePoints)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromString)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuffer)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuilder)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierJni)
 #undef QUICK_ENTRY_POINT_INFO
 
   os << offset;
@@ -2052,7 +2120,10 @@
 struct CurrentMethodVisitor FINAL : public StackVisitor {
   CurrentMethodVisitor(Thread* thread, Context* context, bool abort_on_error)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, context), this_object_(nullptr), method_(nullptr), dex_pc_(0),
+      : StackVisitor(thread, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        this_object_(nullptr),
+        method_(nullptr),
+        dex_pc_(0),
         abort_on_error_(abort_on_error) {}
   bool VisitFrame() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
@@ -2095,7 +2166,10 @@
  public:
   ReferenceMapVisitor(Thread* thread, Context* context, RootVisitor& visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, context), visitor_(visitor) {}
+        // We are visiting the references in compiled frames, so we do not need
+        // to know the inlined frames.
+      : StackVisitor(thread, context, StackVisitor::StackWalkKind::kSkipInlinedFrames),
+        visitor_(visitor) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (false) {
@@ -2252,10 +2326,6 @@
     }
   }
 
-  static bool TestBitmap(size_t reg, const uint8_t* reg_vector) {
-    return ((reg_vector[reg / kBitsPerByte] >> (reg % kBitsPerByte)) & 0x01) != 0;
-  }
-
   // Visitor for when we visit a root.
   RootVisitor& visitor_;
 };
diff --git a/runtime/thread.h b/runtime/thread.h
index 35b785d..9346813 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -541,6 +541,16 @@
   }
 
  public:
+  static uint32_t QuickEntryPointOffsetWithSize(size_t quick_entrypoint_offset,
+                                                size_t pointer_size) {
+    DCHECK(pointer_size == 4 || pointer_size == 8) << pointer_size;
+    if (pointer_size == 4) {
+      return QuickEntryPointOffset<4>(quick_entrypoint_offset).Uint32Value();
+    } else {
+      return QuickEntryPointOffset<8>(quick_entrypoint_offset).Uint32Value();
+    }
+  }
+
   template<size_t pointer_size>
   static ThreadOffset<pointer_size> QuickEntryPointOffset(size_t quick_entrypoint_offset) {
     return ThreadOffsetFromTlsPtr<pointer_size>(
@@ -742,6 +752,18 @@
     tls32_.ready_for_debug_invoke = ready;
   }
 
+  bool IsDebugMethodEntry() const {
+    return tls32_.debug_method_entry_;
+  }
+
+  void SetDebugMethodEntry() {
+    tls32_.debug_method_entry_ = true;
+  }
+
+  void ClearDebugMethodEntry() {
+    tls32_.debug_method_entry_ = false;
+  }
+
   // Activates single step control for debugging. The thread takes the
   // ownership of the given SingleStepControl*. It is deleted by a call
   // to DeactivateSingleStepControl or upon thread destruction.
@@ -911,6 +933,8 @@
   void PushVerifier(verifier::MethodVerifier* verifier);
   void PopVerifier(verifier::MethodVerifier* verifier);
 
+  void InitStringEntryPoints();
+
  private:
   explicit Thread(bool daemon);
   ~Thread() LOCKS_EXCLUDED(Locks::mutator_lock_,
@@ -1016,7 +1040,7 @@
       suspend_count(0), debug_suspend_count(0), thin_lock_thread_id(0), tid(0),
       daemon(is_daemon), throwing_OutOfMemoryError(false), no_thread_suspension(0),
       thread_exit_check_count(0), handling_signal_(false), suspended_at_suspend_check(false),
-      ready_for_debug_invoke(false) {
+      ready_for_debug_invoke(false), debug_method_entry_(false) {
     }
 
     union StateAndFlags state_and_flags;
@@ -1065,6 +1089,10 @@
     // used to invoke method from the debugger which is only allowed when
     // the thread is suspended by an event.
     bool32_t ready_for_debug_invoke;
+
+    // True if the thread enters a method. This is used to detect method entry
+    // event for the debugger.
+    bool32_t debug_method_entry_;
   } tls32_;
 
   struct PACKED(8) tls_64bit_sized_values {
diff --git a/runtime/thread_state.h b/runtime/thread_state.h
index b5479ed..c7ea7f4 100644
--- a/runtime/thread_state.h
+++ b/runtime/thread_state.h
@@ -42,6 +42,7 @@
   kWaitingForDeoptimization,        // WAITING        TS_WAIT      waiting for deoptimization suspend all
   kWaitingForMethodTracingStart,    // WAITING        TS_WAIT      waiting for method tracing to start
   kWaitingForVisitObjects,          // WAITING        TS_WAIT      waiting for visiting objects
+  kWaitingForGetObjectsAllocated,   // WAITING        TS_WAIT      waiting for getting the number of allocated objects
   kStarting,                        // NEW            TS_WAIT      native thread started, not yet ready to run managed code
   kNative,                          // RUNNABLE       TS_RUNNING   running in a JNI native method
   kSuspended,                       // RUNNABLE       TS_RUNNING   suspended by GC or debugger
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 5322f9f..7636792 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -22,6 +22,7 @@
 #define ATRACE_TAG ATRACE_TAG_DALVIK
 #include "cutils/trace.h"
 
+#include "base/casts.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
@@ -90,8 +91,9 @@
 
 class BuildStackTraceVisitor : public StackVisitor {
  public:
-  explicit BuildStackTraceVisitor(Thread* thread) : StackVisitor(thread, nullptr),
-      method_trace_(Trace::AllocStackTrace()) {}
+  explicit BuildStackTraceVisitor(Thread* thread)
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        method_trace_(Trace::AllocStackTrace()) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
@@ -125,6 +127,9 @@
 pthread_t Trace::sampling_pthread_ = 0U;
 std::unique_ptr<std::vector<mirror::ArtMethod*>> Trace::temp_stack_trace_;
 
+// The key identifying the tracer to update instrumentation.
+static constexpr const char* kTracerInstrumentationKey = "Tracer";
+
 static mirror::ArtMethod* DecodeTraceMethodId(uint32_t tmid) {
   return reinterpret_cast<mirror::ArtMethod*>(tmid & ~kTraceMethodActionMask);
 }
@@ -329,7 +334,7 @@
   return nullptr;
 }
 
-void Trace::Start(const char* trace_filename, int trace_fd, int buffer_size, int flags,
+void Trace::Start(const char* trace_filename, int trace_fd, size_t buffer_size, int flags,
                   TraceOutputMode output_mode, TraceMode trace_mode, int interval_us) {
   Thread* self = Thread::Current();
   {
@@ -392,7 +397,7 @@
                                                    instrumentation::Instrumentation::kMethodExited |
                                                    instrumentation::Instrumentation::kMethodUnwind);
         // TODO: In full-PIC mode, we don't need to fully deopt.
-        runtime->GetInstrumentation()->EnableMethodTracing();
+        runtime->GetInstrumentation()->EnableMethodTracing(kTracerInstrumentationKey);
       }
     }
   }
@@ -439,7 +444,7 @@
       MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
       runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr);
     } else {
-      runtime->GetInstrumentation()->DisableMethodTracing();
+      runtime->GetInstrumentation()->DisableMethodTracing(kTracerInstrumentationKey);
       runtime->GetInstrumentation()->RemoveListener(
           the_trace, instrumentation::Instrumentation::kMethodEntered |
           instrumentation::Instrumentation::kMethodExited |
@@ -521,7 +526,7 @@
       MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
       runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr);
     } else {
-      runtime->GetInstrumentation()->DisableMethodTracing();
+      runtime->GetInstrumentation()->DisableMethodTracing(kTracerInstrumentationKey);
       runtime->GetInstrumentation()->RemoveListener(the_trace,
                                                     instrumentation::Instrumentation::kMethodEntered |
                                                     instrumentation::Instrumentation::kMethodExited |
@@ -565,7 +570,7 @@
                                                instrumentation::Instrumentation::kMethodExited |
                                                instrumentation::Instrumentation::kMethodUnwind);
     // TODO: In full-PIC mode, we don't need to fully deopt.
-    runtime->GetInstrumentation()->EnableMethodTracing();
+    runtime->GetInstrumentation()->EnableMethodTracing(kTracerInstrumentationKey);
   }
 
   runtime->GetThreadList()->ResumeAll();
@@ -592,19 +597,15 @@
   }
 }
 
-static constexpr size_t kStreamingBufferSize = 16 * KB;
+static constexpr size_t kMinBufSize = 18U;  // Trace header is up to 18B.
 
-Trace::Trace(File* trace_file, const char* trace_name, int buffer_size, int flags,
+Trace::Trace(File* trace_file, const char* trace_name, size_t buffer_size, int flags,
              TraceOutputMode output_mode, TraceMode trace_mode)
     : trace_file_(trace_file),
-      buf_(new uint8_t[output_mode == TraceOutputMode::kStreaming ?
-          kStreamingBufferSize :
-          buffer_size]()),
+      buf_(new uint8_t[std::max(kMinBufSize, buffer_size)]()),
       flags_(flags), trace_output_mode_(output_mode), trace_mode_(trace_mode),
       clock_source_(default_clock_source_),
-      buffer_size_(output_mode == TraceOutputMode::kStreaming ?
-          kStreamingBufferSize :
-          buffer_size),
+      buffer_size_(std::max(kMinBufSize, buffer_size)),
       start_time_(MicroTime()), clock_overhead_ns_(GetClockOverheadNanoSeconds()), cur_offset_(0),
       overflow_(false), interval_us_(0), streaming_lock_(nullptr) {
   uint16_t trace_version = GetTraceVersion(clock_source_);
@@ -621,6 +622,7 @@
     uint16_t record_size = GetRecordSize(clock_source_);
     Append2LE(buf_.get() + 16, record_size);
   }
+  static_assert(18 <= kMinBufSize, "Minimum buffer size not large enough for trace header");
 
   // Update current offset.
   cur_offset_.StoreRelaxed(kTraceHeaderLength);
@@ -875,11 +877,21 @@
 void Trace::WriteToBuf(const uint8_t* src, size_t src_size) {
   int32_t old_offset = cur_offset_.LoadRelaxed();
   int32_t new_offset = old_offset + static_cast<int32_t>(src_size);
-  if (new_offset > buffer_size_) {
+  if (dchecked_integral_cast<size_t>(new_offset) > buffer_size_) {
     // Flush buffer.
     if (!trace_file_->WriteFully(buf_.get(), old_offset)) {
       PLOG(WARNING) << "Failed streaming a tracing event.";
     }
+
+    // Check whether the data is too large for the buffer, then write immediately.
+    if (src_size >= buffer_size_) {
+      if (!trace_file_->WriteFully(src, src_size)) {
+        PLOG(WARNING) << "Failed streaming a tracing event.";
+      }
+      cur_offset_.StoreRelease(0);  // Buffer is empty now.
+      return;
+    }
+
     old_offset = 0;
     new_offset = static_cast<int32_t>(src_size);
   }
@@ -900,7 +912,7 @@
     do {
       old_offset = cur_offset_.LoadRelaxed();
       new_offset = old_offset + GetRecordSize(clock_source_);
-      if (new_offset > buffer_size_) {
+      if (static_cast<size_t>(new_offset) > buffer_size_) {
         overflow_ = true;
         return;
       }
@@ -1034,4 +1046,10 @@
   return the_trace_->trace_mode_;
 }
 
+size_t Trace::GetBufferSize() {
+  MutexLock mu(Thread::Current(), *Locks::trace_lock_);
+  CHECK(the_trace_ != nullptr) << "Trace mode requested, but no trace currently running";
+  return the_trace_->buffer_size_;
+}
+
 }  // namespace art
diff --git a/runtime/trace.h b/runtime/trace.h
index 1ecd4d8..df6d5e7 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -72,7 +72,7 @@
 
   static void SetDefaultClockSource(TraceClockSource clock_source);
 
-  static void Start(const char* trace_filename, int trace_fd, int buffer_size, int flags,
+  static void Start(const char* trace_filename, int trace_fd, size_t buffer_size, int flags,
                     TraceOutputMode output_mode, TraceMode trace_mode, int interval_us)
       LOCKS_EXCLUDED(Locks::mutator_lock_,
                      Locks::thread_list_lock_,
@@ -136,9 +136,10 @@
 
   static TraceOutputMode GetOutputMode() LOCKS_EXCLUDED(Locks::trace_lock_);
   static TraceMode GetMode() LOCKS_EXCLUDED(Locks::trace_lock_);
+  static size_t GetBufferSize() LOCKS_EXCLUDED(Locks::trace_lock_);
 
  private:
-  Trace(File* trace_file, const char* trace_name, int buffer_size, int flags,
+  Trace(File* trace_file, const char* trace_name, size_t buffer_size, int flags,
         TraceOutputMode output_mode, TraceMode trace_mode);
 
   // The sampling interval in microseconds is passed as an argument.
@@ -188,7 +189,7 @@
   std::unique_ptr<File> trace_file_;
 
   // Buffer to store trace data.
-  std::unique_ptr<uint8_t> buf_;
+  std::unique_ptr<uint8_t[]> buf_;
 
   // Flags enabling extra tracing of things such as alloc counts.
   const int flags_;
@@ -202,7 +203,7 @@
   const TraceClockSource clock_source_;
 
   // Size of buf_.
-  const int buffer_size_;
+  const size_t buffer_size_;
 
   // Time trace was created.
   const uint64_t start_time_;
diff --git a/runtime/transaction.cc b/runtime/transaction.cc
index cc0f15f..ab821d7 100644
--- a/runtime/transaction.cc
+++ b/runtime/transaction.cc
@@ -70,13 +70,21 @@
   }
 }
 
-void Transaction::ThrowAbortError(Thread* self, bool rethrow) {
+void Transaction::ThrowAbortError(Thread* self, const std::string* abort_message) {
+  const bool rethrow = (abort_message == nullptr);
   if (kIsDebugBuild && rethrow) {
     CHECK(IsAborted()) << "Rethrow " << Transaction::kAbortExceptionDescriptor
                        << " while transaction is not aborted";
   }
-  std::string abort_msg(GetAbortMessage());
-  self->ThrowNewWrappedException(Transaction::kAbortExceptionSignature, abort_msg.c_str());
+  if (rethrow) {
+    // Rethrow an exception with the earlier abort message stored in the transaction.
+    self->ThrowNewWrappedException(Transaction::kAbortExceptionSignature,
+                                   GetAbortMessage().c_str());
+  } else {
+    // Throw an exception with the given abort message.
+    self->ThrowNewWrappedException(Transaction::kAbortExceptionSignature,
+                                   abort_message->c_str());
+  }
 }
 
 bool Transaction::IsAborted() {
diff --git a/runtime/transaction.h b/runtime/transaction.h
index 4d85662..030478c 100644
--- a/runtime/transaction.h
+++ b/runtime/transaction.h
@@ -48,7 +48,7 @@
   void Abort(const std::string& abort_message)
       LOCKS_EXCLUDED(log_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void ThrowAbortError(Thread* self, bool rethrow)
+  void ThrowAbortError(Thread* self, const std::string* abort_message)
       LOCKS_EXCLUDED(log_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool IsAborted() LOCKS_EXCLUDED(log_lock_);
diff --git a/runtime/utf.cc b/runtime/utf.cc
index 3d13c3e..10600e2 100644
--- a/runtime/utf.cc
+++ b/runtime/utf.cc
@@ -107,15 +107,6 @@
   }
 }
 
-int32_t ComputeUtf16Hash(mirror::CharArray* chars, int32_t offset,
-                         size_t char_count) {
-  uint32_t hash = 0;
-  for (size_t i = 0; i < char_count; i++) {
-    hash = hash * 31 + chars->Get(offset + i);
-  }
-  return static_cast<int32_t>(hash);
-}
-
 int32_t ComputeUtf16Hash(const uint16_t* chars, size_t char_count) {
   uint32_t hash = 0;
   while (char_count--) {
diff --git a/runtime/utf.h b/runtime/utf.h
index dd38afa..7f05248 100644
--- a/runtime/utf.h
+++ b/runtime/utf.h
@@ -87,9 +87,9 @@
 /*
  * Retrieve the next UTF-16 character or surrogate pair from a UTF-8 string.
  * single byte, 2-byte and 3-byte UTF-8 sequences result in a single UTF-16
- * character whereas 4-byte UTF-8 sequences result in a surrogate pair. Use
- * GetLeadingUtf16Char and GetTrailingUtf16Char to process the return value
- * of this function.
+ * character (possibly one half of a surrogate) whereas 4-byte UTF-8 sequences
+ * result in a surrogate pair. Use GetLeadingUtf16Char and GetTrailingUtf16Char
+ * to process the return value of this function.
  *
  * Advances "*utf8_data_in" to the start of the next character.
  *
diff --git a/runtime/utils.cc b/runtime/utils.cc
index ec7131d..7986cdc 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -262,8 +262,8 @@
 
 void NanoSleep(uint64_t ns) {
   timespec tm;
-  tm.tv_sec = 0;
-  tm.tv_nsec = ns;
+  tm.tv_sec = ns / MsToNs(1000);
+  tm.tv_nsec = ns - static_cast<uint64_t>(tm.tv_sec) * MsToNs(1000);
   nanosleep(&tm, nullptr);
 }
 
@@ -827,14 +827,21 @@
    */
 
   const uint32_t pair = GetUtf16FromUtf8(pUtf8Ptr);
-
   const uint16_t leading = GetLeadingUtf16Char(pair);
-  const uint32_t trailing = GetTrailingUtf16Char(pair);
 
-  if (trailing == 0) {
-    // Perform follow-up tests based on the high 8 bits of the
-    // lower surrogate.
-    switch (leading >> 8) {
+  // We have a surrogate pair resulting from a valid 4 byte UTF sequence.
+  // No further checks are necessary because 4 byte sequences span code
+  // points [U+10000, U+1FFFFF], which are valid codepoints in a dex
+  // identifier. Furthermore, GetUtf16FromUtf8 guarantees that each of
+  // the surrogate halves are valid and well formed in this instance.
+  if (GetTrailingUtf16Char(pair) != 0) {
+    return true;
+  }
+
+
+  // We've encountered a one, two or three byte UTF-8 sequence. The
+  // three byte UTF-8 sequence could be one half of a surrogate pair.
+  switch (leading >> 8) {
     case 0x00:
       // It's only valid if it's above the ISO-8859-1 high space (0xa0).
       return (leading > 0x00a0);
@@ -842,9 +849,14 @@
     case 0xd9:
     case 0xda:
     case 0xdb:
-      // It looks like a leading surrogate but we didn't find a trailing
-      // surrogate if we're here.
-      return false;
+      {
+        // We found a three byte sequence encoding one half of a surrogate.
+        // Look for the other half.
+        const uint32_t pair2 = GetUtf16FromUtf8(pUtf8Ptr);
+        const uint16_t trailing = GetLeadingUtf16Char(pair2);
+
+        return (GetTrailingUtf16Char(pair2) == 0) && (0xdc00 <= trailing && trailing <= 0xdfff);
+      }
     case 0xdc:
     case 0xdd:
     case 0xde:
@@ -855,21 +867,19 @@
     case 0xff:
       // It's in the range that has spaces, controls, and specials.
       switch (leading & 0xfff8) {
-      case 0x2000:
-      case 0x2008:
-      case 0x2028:
-      case 0xfff0:
-      case 0xfff8:
-        return false;
+        case 0x2000:
+        case 0x2008:
+        case 0x2028:
+        case 0xfff0:
+        case 0xfff8:
+          return false;
       }
-      break;
-    }
-
-    return true;
+      return true;
+    default:
+      return true;
   }
 
-  // We have a surrogate pair. Check that trailing surrogate is well formed.
-  return (trailing >= 0xdc00 && trailing <= 0xdfff);
+  UNREACHABLE();
 }
 
 /* Return whether the pointed-at modified-UTF-8 encoded character is
@@ -1298,7 +1308,7 @@
     if (!BacktraceMap::IsValid(it->map)) {
       os << StringPrintf("%08" PRIxPTR "  ???", it->pc);
     } else {
-      os << StringPrintf("%08" PRIxPTR "  ", it->pc - it->map.start);
+      os << StringPrintf("%08" PRIxPTR "  ", BacktraceMap::GetRelativePc(it->map, it->pc));
       os << it->map.name;
       os << " (";
       if (!it->func_name.empty()) {
diff --git a/runtime/utils.h b/runtime/utils.h
index 853fa08..71ccf85 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -300,6 +300,18 @@
   return CTZ(x);
 }
 
+// Return whether x / divisor == x * (1.0f / divisor), for every float x.
+static constexpr bool CanDivideByReciprocalMultiplyFloat(int32_t divisor) {
+  // True, if the most significant bits of divisor are 0.
+  return ((divisor & 0x7fffff) == 0);
+}
+
+// Return whether x / divisor == x * (1.0 / divisor), for every double x.
+static constexpr bool CanDivideByReciprocalMultiplyDouble(int64_t divisor) {
+  // True, if the most significant bits of divisor are 0.
+  return ((divisor & ((UINT64_C(1) << 52) - 1)) == 0);
+}
+
 template<typename T>
 static constexpr int POPCOUNT(T x) {
   return (sizeof(T) == sizeof(uint32_t))
@@ -592,6 +604,11 @@
   return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
 }
 
+inline bool TestBitmap(size_t idx, const uint8_t* bitmap) {
+  return ((bitmap[idx / kBitsPerByte] >> (idx % kBitsPerByte)) & 0x01) != 0;
+}
+
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_UTILS_H_
diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc
index ae24b77..869d305 100644
--- a/runtime/utils_test.cc
+++ b/runtime/utils_test.cc
@@ -151,9 +151,6 @@
   f = java_lang_String->FindDeclaredInstanceField("count", "I");
   EXPECT_EQ("int java.lang.String.count", PrettyField(f));
   EXPECT_EQ("java.lang.String.count", PrettyField(f, false));
-  f = java_lang_String->FindDeclaredInstanceField("value", "[C");
-  EXPECT_EQ("char[] java.lang.String.value", PrettyField(f));
-  EXPECT_EQ("java.lang.String.value", PrettyField(f, false));
 }
 
 TEST_F(UtilsTest, PrettySize) {
@@ -384,7 +381,8 @@
 TEST_F(UtilsTest, ExecSuccess) {
   std::vector<std::string> command;
   if (kIsTargetBuild) {
-    command.push_back("/system/bin/id");
+    std::string android_root(GetAndroidRoot());
+    command.push_back(android_root + "/bin/id");
   } else {
     command.push_back("/usr/bin/id");
   }
@@ -517,4 +515,33 @@
   EXPECT_FALSE(IsAbsoluteUint<32>(UINT_MAX_plus1));
 }
 
+TEST_F(UtilsTest, TestSleep) {
+  auto start = NanoTime();
+  NanoSleep(MsToNs(1500));
+  EXPECT_GT(NanoTime() - start, MsToNs(1000));
+}
+
+TEST_F(UtilsTest, IsValidDescriptor) {
+  std::vector<uint8_t> descriptor(
+      { 'L', 'a', '/', 'b', '$', 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, ';', 0x00 });
+  EXPECT_TRUE(IsValidDescriptor(reinterpret_cast<char*>(&descriptor[0])));
+
+  std::vector<uint8_t> unpaired_surrogate(
+      { 'L', 'a', '/', 'b', '$', 0xed, 0xa0, 0x80, ';', 0x00 });
+  EXPECT_FALSE(IsValidDescriptor(reinterpret_cast<char*>(&unpaired_surrogate[0])));
+
+  std::vector<uint8_t> unpaired_surrogate_at_end(
+      { 'L', 'a', '/', 'b', '$', 0xed, 0xa0, 0x80, 0x00 });
+  EXPECT_FALSE(IsValidDescriptor(reinterpret_cast<char*>(&unpaired_surrogate_at_end[0])));
+
+  std::vector<uint8_t> invalid_surrogate(
+      { 'L', 'a', '/', 'b', '$', 0xed, 0xb0, 0x80, ';', 0x00 });
+  EXPECT_FALSE(IsValidDescriptor(reinterpret_cast<char*>(&invalid_surrogate[0])));
+
+  std::vector<uint8_t> unpaired_surrogate_with_multibyte_sequence(
+      { 'L', 'a', '/', 'b', '$', 0xed, 0xb0, 0x80, 0xf0, 0x9f, 0x8f, 0xa0, ';', 0x00 });
+  EXPECT_FALSE(
+      IsValidDescriptor(reinterpret_cast<char*>(&unpaired_surrogate_with_multibyte_sequence[0])));
+}
+
 }  // namespace art
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 065df05..475fe8b 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -516,6 +516,23 @@
   return GetQuickInvokedMethod(inst, register_line, is_range, false);
 }
 
+SafeMap<uint32_t, std::set<uint32_t>> MethodVerifier::FindStringInitMap(mirror::ArtMethod* m) {
+  Thread* self = Thread::Current();
+  StackHandleScope<3> hs(self);
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(m->GetDexCache()));
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(m->GetClassLoader()));
+  Handle<mirror::ArtMethod> method(hs.NewHandle(m));
+  MethodVerifier verifier(self, m->GetDexFile(), dex_cache, class_loader, &m->GetClassDef(),
+                          m->GetCodeItem(), m->GetDexMethodIndex(), method, m->GetAccessFlags(),
+                          true, true, false, true);
+  return verifier.FindStringInitMap();
+}
+
+SafeMap<uint32_t, std::set<uint32_t>>& MethodVerifier::FindStringInitMap() {
+  Verify();
+  return GetStringInitPcRegMap();
+}
+
 bool MethodVerifier::Verify() {
   // If there aren't any instructions, make sure that's expected, then exit successfully.
   if (code_item_ == nullptr) {
@@ -2445,7 +2462,8 @@
          * Replace the uninitialized reference with an initialized one. We need to do this for all
          * registers that have the same object instance in them, not just the "this" register.
          */
-        work_line_->MarkRefsAsInitialized(this, this_type);
+        const uint32_t this_reg = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
+        work_line_->MarkRefsAsInitialized(this, this_type, this_reg, work_insn_idx_);
       }
       if (return_type == nullptr) {
         return_type = &reg_types_.FromDescriptor(GetClassLoader(), return_type_descriptor,
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 2914b7c..452d1dd 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -199,6 +199,9 @@
   static mirror::ArtMethod* FindInvokedMethodAtDexPc(mirror::ArtMethod* m, uint32_t dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  static SafeMap<uint32_t, std::set<uint32_t>> FindStringInitMap(mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static void Init() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void Shutdown();
 
@@ -263,6 +266,10 @@
     return (method_access_flags_ & kAccStatic) != 0;
   }
 
+  SafeMap<uint32_t, std::set<uint32_t>>& GetStringInitPcRegMap() {
+    return string_init_pc_reg_map_;
+  }
+
  private:
   // Private constructor for dumping.
   MethodVerifier(Thread* self, const DexFile* dex_file, Handle<mirror::DexCache> dex_cache,
@@ -307,6 +314,9 @@
   mirror::ArtMethod* FindInvokedMethodAtDexPc(uint32_t dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  SafeMap<uint32_t, std::set<uint32_t>>& FindStringInitMap()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   /*
    * Compute the width of the instruction at each address in the instruction stream, and store it in
    * insn_flags_. Addresses that are in the middle of an instruction, or that are part of switch
@@ -743,6 +753,12 @@
   MethodVerifier* link_;
 
   friend class art::Thread;
+
+  // Map of dex pcs of invocations of java.lang.String.<init> to the set of other registers that
+  // contain the uninitialized this pointer to that invoke. Will contain no entry if there are
+  // no other registers.
+  SafeMap<uint32_t, std::set<uint32_t>> string_init_pc_reg_map_;
+
   DISALLOW_COPY_AND_ASSIGN(MethodVerifier);
 };
 std::ostream& operator<<(std::ostream& os, const MethodVerifier::FailureKind& rhs);
diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc
index ed588fc..2838681 100644
--- a/runtime/verifier/register_line.cc
+++ b/runtime/verifier/register_line.cc
@@ -127,14 +127,25 @@
   return true;
 }
 
-void RegisterLine::MarkRefsAsInitialized(MethodVerifier* verifier, const RegType& uninit_type) {
+void RegisterLine::MarkRefsAsInitialized(MethodVerifier* verifier, const RegType& uninit_type,
+                                         uint32_t this_reg, uint32_t dex_pc) {
   DCHECK(uninit_type.IsUninitializedTypes());
+  bool is_string = !uninit_type.IsUnresolvedTypes() && uninit_type.GetClass()->IsStringClass();
   const RegType& init_type = verifier->GetRegTypeCache()->FromUninitialized(uninit_type);
   size_t changed = 0;
   for (uint32_t i = 0; i < num_regs_; i++) {
     if (GetRegisterType(verifier, i).Equals(uninit_type)) {
       line_[i] = init_type.GetId();
       changed++;
+      if (is_string && i != this_reg) {
+        auto it = verifier->GetStringInitPcRegMap().find(dex_pc);
+        if (it != verifier->GetStringInitPcRegMap().end()) {
+          it->second.insert(i);
+        } else {
+          std::set<uint32_t> reg_set = { i };
+          verifier->GetStringInitPcRegMap().Put(dex_pc, reg_set);
+        }
+      }
     }
   }
   DCHECK_GT(changed, 0u);
diff --git a/runtime/verifier/register_line.h b/runtime/verifier/register_line.h
index 376dbf1..0de0d9c 100644
--- a/runtime/verifier/register_line.h
+++ b/runtime/verifier/register_line.h
@@ -138,7 +138,8 @@
    * reference type. This is called when an appropriate constructor is invoked -- all copies of
    * the reference must be marked as initialized.
    */
-  void MarkRefsAsInitialized(MethodVerifier* verifier, const RegType& uninit_type)
+  void MarkRefsAsInitialized(MethodVerifier* verifier, const RegType& uninit_type,
+                             uint32_t this_reg, uint32_t dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index a2d0427..2843806 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -49,6 +49,7 @@
 jclass WellKnownClasses::java_lang_RuntimeException;
 jclass WellKnownClasses::java_lang_StackOverflowError;
 jclass WellKnownClasses::java_lang_String;
+jclass WellKnownClasses::java_lang_StringFactory;
 jclass WellKnownClasses::java_lang_System;
 jclass WellKnownClasses::java_lang_Thread;
 jclass WellKnownClasses::java_lang_Thread__UncaughtExceptionHandler;
@@ -79,6 +80,38 @@
 jmethodID WellKnownClasses::java_lang_reflect_Proxy_invoke;
 jmethodID WellKnownClasses::java_lang_Runtime_nativeLoad;
 jmethodID WellKnownClasses::java_lang_Short_valueOf;
+jmethodID WellKnownClasses::java_lang_String_init;
+jmethodID WellKnownClasses::java_lang_String_init_B;
+jmethodID WellKnownClasses::java_lang_String_init_BI;
+jmethodID WellKnownClasses::java_lang_String_init_BII;
+jmethodID WellKnownClasses::java_lang_String_init_BIII;
+jmethodID WellKnownClasses::java_lang_String_init_BIIString;
+jmethodID WellKnownClasses::java_lang_String_init_BString;
+jmethodID WellKnownClasses::java_lang_String_init_BIICharset;
+jmethodID WellKnownClasses::java_lang_String_init_BCharset;
+jmethodID WellKnownClasses::java_lang_String_init_C;
+jmethodID WellKnownClasses::java_lang_String_init_CII;
+jmethodID WellKnownClasses::java_lang_String_init_IIC;
+jmethodID WellKnownClasses::java_lang_String_init_String;
+jmethodID WellKnownClasses::java_lang_String_init_StringBuffer;
+jmethodID WellKnownClasses::java_lang_String_init_III;
+jmethodID WellKnownClasses::java_lang_String_init_StringBuilder;
+jmethodID WellKnownClasses::java_lang_StringFactory_newEmptyString;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromBytes_B;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BI;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BII;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BIII;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BIIString;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BString;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BIICharset;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BCharset;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromChars_C;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromChars_CII;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromChars_IIC;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromString;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromStringBuffer;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromCodePoints;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromStringBuilder;
 jmethodID WellKnownClasses::java_lang_System_runFinalization = nullptr;
 jmethodID WellKnownClasses::java_lang_Thread_init;
 jmethodID WellKnownClasses::java_lang_Thread_run;
@@ -188,6 +221,7 @@
   java_lang_RuntimeException = CacheClass(env, "java/lang/RuntimeException");
   java_lang_StackOverflowError = CacheClass(env, "java/lang/StackOverflowError");
   java_lang_String = CacheClass(env, "java/lang/String");
+  java_lang_StringFactory = CacheClass(env, "java/lang/StringFactory");
   java_lang_System = CacheClass(env, "java/lang/System");
   java_lang_Thread = CacheClass(env, "java/lang/Thread");
   java_lang_Thread__UncaughtExceptionHandler = CacheClass(env,
@@ -223,6 +257,62 @@
   org_apache_harmony_dalvik_ddmc_DdmServer_broadcast = CacheMethod(env, org_apache_harmony_dalvik_ddmc_DdmServer, true, "broadcast", "(I)V");
   org_apache_harmony_dalvik_ddmc_DdmServer_dispatch = CacheMethod(env, org_apache_harmony_dalvik_ddmc_DdmServer, true, "dispatch", "(I[BII)Lorg/apache/harmony/dalvik/ddmc/Chunk;");
 
+  java_lang_String_init = CacheMethod(env, java_lang_String, false, "<init>", "()V");
+  java_lang_String_init_B = CacheMethod(env, java_lang_String, false, "<init>", "([B)V");
+  java_lang_String_init_BI = CacheMethod(env, java_lang_String, false, "<init>", "([BI)V");
+  java_lang_String_init_BII = CacheMethod(env, java_lang_String, false, "<init>", "([BII)V");
+  java_lang_String_init_BIII = CacheMethod(env, java_lang_String, false, "<init>", "([BIII)V");
+  java_lang_String_init_BIIString = CacheMethod(env, java_lang_String, false, "<init>",
+      "([BIILjava/lang/String;)V");
+  java_lang_String_init_BString = CacheMethod(env, java_lang_String, false, "<init>",
+      "([BLjava/lang/String;)V");
+  java_lang_String_init_BIICharset = CacheMethod(env, java_lang_String, false, "<init>",
+      "([BIILjava/nio/charset/Charset;)V");
+  java_lang_String_init_BCharset = CacheMethod(env, java_lang_String, false, "<init>",
+      "([BLjava/nio/charset/Charset;)V");
+  java_lang_String_init_C = CacheMethod(env, java_lang_String, false, "<init>", "([C)V");
+  java_lang_String_init_CII = CacheMethod(env, java_lang_String, false, "<init>", "([CII)V");
+  java_lang_String_init_IIC = CacheMethod(env, java_lang_String, false, "<init>", "(II[C)V");
+  java_lang_String_init_String = CacheMethod(env, java_lang_String, false, "<init>",
+      "(Ljava/lang/String;)V");
+  java_lang_String_init_StringBuffer = CacheMethod(env, java_lang_String, false, "<init>",
+      "(Ljava/lang/StringBuffer;)V");
+  java_lang_String_init_III = CacheMethod(env, java_lang_String, false, "<init>", "([III)V");
+  java_lang_String_init_StringBuilder = CacheMethod(env, java_lang_String, false, "<init>",
+       "(Ljava/lang/StringBuilder;)V");
+  java_lang_StringFactory_newEmptyString = CacheMethod(env, java_lang_StringFactory, true,
+       "newEmptyString", "()Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromBytes_B = CacheMethod(env, java_lang_StringFactory, true,
+       "newStringFromBytes", "([B)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromBytes_BI = CacheMethod(env, java_lang_StringFactory, true,
+       "newStringFromBytes", "([BI)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromBytes_BII = CacheMethod(env, java_lang_StringFactory, true,
+       "newStringFromBytes", "([BII)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromBytes_BIII = CacheMethod(env, java_lang_StringFactory, true,
+       "newStringFromBytes", "([BIII)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromBytes_BIIString = CacheMethod(env, java_lang_StringFactory,
+       true, "newStringFromBytes", "([BIILjava/lang/String;)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromBytes_BString = CacheMethod(env, java_lang_StringFactory,
+       true, "newStringFromBytes", "([BLjava/lang/String;)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromBytes_BIICharset = CacheMethod(env, java_lang_StringFactory,
+       true, "newStringFromBytes", "([BIILjava/nio/charset/Charset;)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromBytes_BCharset = CacheMethod(env, java_lang_StringFactory,
+       true, "newStringFromBytes", "([BLjava/nio/charset/Charset;)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromChars_C = CacheMethod(env, java_lang_StringFactory, true,
+       "newStringFromChars", "([C)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromChars_CII = CacheMethod(env, java_lang_StringFactory, true,
+       "newStringFromChars", "([CII)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromChars_IIC = CacheMethod(env, java_lang_StringFactory, true,
+       "newStringFromChars", "(II[C)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromString = CacheMethod(env, java_lang_StringFactory, true,
+       "newStringFromString", "(Ljava/lang/String;)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromStringBuffer = CacheMethod(env, java_lang_StringFactory,
+       true, "newStringFromStringBuffer", "(Ljava/lang/StringBuffer;)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromCodePoints = CacheMethod(env, java_lang_StringFactory,
+       true, "newStringFromCodePoints", "([III)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromStringBuilder = CacheMethod(env, java_lang_StringFactory,
+       true, "newStringFromStringBuilder", "(Ljava/lang/StringBuilder;)Ljava/lang/String;");
+
   dalvik_system_DexFile_cookie = CacheField(env, dalvik_system_DexFile, false, "mCookie", "Ljava/lang/Object;");
   dalvik_system_PathClassLoader_pathList = CacheField(env, dalvik_system_PathClassLoader, false, "pathList", "Ldalvik/system/DexPathList;");
   dalvik_system_DexPathList_dexElements = CacheField(env, dalvik_system_DexPathList, false, "dexElements", "[Ldalvik/system/DexPathList$Element;");
@@ -265,6 +355,8 @@
   java_lang_Integer_valueOf = CachePrimitiveBoxingMethod(env, 'I', "java/lang/Integer");
   java_lang_Long_valueOf = CachePrimitiveBoxingMethod(env, 'J', "java/lang/Long");
   java_lang_Short_valueOf = CachePrimitiveBoxingMethod(env, 'S', "java/lang/Short");
+
+  Thread::Current()->InitStringEntryPoints();
 }
 
 void WellKnownClasses::LateInit(JNIEnv* env) {
@@ -276,4 +368,43 @@
   return reinterpret_cast<mirror::Class*>(Thread::Current()->DecodeJObject(global_jclass));
 }
 
+jmethodID WellKnownClasses::StringInitToStringFactoryMethodID(jmethodID string_init) {
+  // TODO: Prioritize ordering.
+  if (string_init == java_lang_String_init) {
+    return java_lang_StringFactory_newEmptyString;
+  } else if (string_init == java_lang_String_init_B) {
+    return java_lang_StringFactory_newStringFromBytes_B;
+  } else if (string_init == java_lang_String_init_BI) {
+    return java_lang_StringFactory_newStringFromBytes_BI;
+  } else if (string_init == java_lang_String_init_BII) {
+    return java_lang_StringFactory_newStringFromBytes_BII;
+  } else if (string_init == java_lang_String_init_BIII) {
+    return java_lang_StringFactory_newStringFromBytes_BIII;
+  } else if (string_init == java_lang_String_init_BIIString) {
+    return java_lang_StringFactory_newStringFromBytes_BIIString;
+  } else if (string_init == java_lang_String_init_BString) {
+    return java_lang_StringFactory_newStringFromBytes_BString;
+  } else if (string_init == java_lang_String_init_BIICharset) {
+    return java_lang_StringFactory_newStringFromBytes_BIICharset;
+  } else if (string_init == java_lang_String_init_BCharset) {
+    return java_lang_StringFactory_newStringFromBytes_BCharset;
+  } else if (string_init == java_lang_String_init_C) {
+    return java_lang_StringFactory_newStringFromChars_C;
+  } else if (string_init == java_lang_String_init_CII) {
+    return java_lang_StringFactory_newStringFromChars_CII;
+  } else if (string_init == java_lang_String_init_IIC) {
+    return java_lang_StringFactory_newStringFromChars_IIC;
+  } else if (string_init == java_lang_String_init_String) {
+    return java_lang_StringFactory_newStringFromString;
+  } else if (string_init == java_lang_String_init_StringBuffer) {
+    return java_lang_StringFactory_newStringFromStringBuffer;
+  } else if (string_init == java_lang_String_init_III) {
+    return java_lang_StringFactory_newStringFromCodePoints;
+  } else if (string_init == java_lang_String_init_StringBuilder) {
+    return java_lang_StringFactory_newStringFromStringBuilder;
+  }
+  LOG(FATAL) << "Could not find StringFactory method for String.<init>";
+  return nullptr;
+}
+
 }  // namespace art
diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h
index cef9d55..acb2656 100644
--- a/runtime/well_known_classes.h
+++ b/runtime/well_known_classes.h
@@ -35,6 +35,7 @@
  public:
   static void Init(JNIEnv* env);  // Run before native methods are registered.
   static void LateInit(JNIEnv* env);  // Run after native methods are registered.
+  static jmethodID StringInitToStringFactoryMethodID(jmethodID string_init);
 
   static mirror::Class* ToClass(jclass global_jclass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -60,6 +61,7 @@
   static jclass java_lang_RuntimeException;
   static jclass java_lang_StackOverflowError;
   static jclass java_lang_String;
+  static jclass java_lang_StringFactory;
   static jclass java_lang_System;
   static jclass java_lang_Thread;
   static jclass java_lang_ThreadGroup;
@@ -90,6 +92,38 @@
   static jmethodID java_lang_reflect_Proxy_invoke;
   static jmethodID java_lang_Runtime_nativeLoad;
   static jmethodID java_lang_Short_valueOf;
+  static jmethodID java_lang_String_init;
+  static jmethodID java_lang_String_init_B;
+  static jmethodID java_lang_String_init_BI;
+  static jmethodID java_lang_String_init_BII;
+  static jmethodID java_lang_String_init_BIII;
+  static jmethodID java_lang_String_init_BIIString;
+  static jmethodID java_lang_String_init_BString;
+  static jmethodID java_lang_String_init_BIICharset;
+  static jmethodID java_lang_String_init_BCharset;
+  static jmethodID java_lang_String_init_C;
+  static jmethodID java_lang_String_init_CII;
+  static jmethodID java_lang_String_init_IIC;
+  static jmethodID java_lang_String_init_String;
+  static jmethodID java_lang_String_init_StringBuffer;
+  static jmethodID java_lang_String_init_III;
+  static jmethodID java_lang_String_init_StringBuilder;
+  static jmethodID java_lang_StringFactory_newEmptyString;
+  static jmethodID java_lang_StringFactory_newStringFromBytes_B;
+  static jmethodID java_lang_StringFactory_newStringFromBytes_BI;
+  static jmethodID java_lang_StringFactory_newStringFromBytes_BII;
+  static jmethodID java_lang_StringFactory_newStringFromBytes_BIII;
+  static jmethodID java_lang_StringFactory_newStringFromBytes_BIIString;
+  static jmethodID java_lang_StringFactory_newStringFromBytes_BString;
+  static jmethodID java_lang_StringFactory_newStringFromBytes_BIICharset;
+  static jmethodID java_lang_StringFactory_newStringFromBytes_BCharset;
+  static jmethodID java_lang_StringFactory_newStringFromChars_C;
+  static jmethodID java_lang_StringFactory_newStringFromChars_CII;
+  static jmethodID java_lang_StringFactory_newStringFromChars_IIC;
+  static jmethodID java_lang_StringFactory_newStringFromString;
+  static jmethodID java_lang_StringFactory_newStringFromStringBuffer;
+  static jmethodID java_lang_StringFactory_newStringFromCodePoints;
+  static jmethodID java_lang_StringFactory_newStringFromStringBuilder;
   static jmethodID java_lang_System_runFinalization;
   static jmethodID java_lang_Thread_init;
   static jmethodID java_lang_Thread_run;
diff --git a/test/004-JniTest/jni_test.cc b/test/004-JniTest/jni_test.cc
index b23b97b..cdc5461 100644
--- a/test/004-JniTest/jni_test.cc
+++ b/test/004-JniTest/jni_test.cc
@@ -548,3 +548,23 @@
 extern "C" void JNICALL Java_Main_testCallNonvirtual(JNIEnv* env, jclass) {
   JniCallNonvirtualVoidMethodTest(env).Test();
 }
+
+extern "C" JNIEXPORT void JNICALL Java_Main_testNewStringObject(JNIEnv* env, jclass) {
+  const char* string = "Test";
+  int length = strlen(string);
+  jclass c = env->FindClass("java/lang/String");
+  assert(c != NULL);
+  jmethodID method = env->GetMethodID(c, "<init>", "([B)V");
+  assert(method != NULL);
+  assert(!env->ExceptionCheck());
+  jbyteArray array = env->NewByteArray(length);
+  env->SetByteArrayRegion(array, 0, length, reinterpret_cast<const jbyte*>(string));
+  jobject o = env->NewObject(c, method, array);
+  assert(o != NULL);
+  jstring s = reinterpret_cast<jstring>(o);
+  assert(env->GetStringLength(s) == length);
+  assert(env->GetStringUTFLength(s) == length);
+  const char* chars = env->GetStringUTFChars(s, nullptr);
+  assert(strcmp(string, chars) == 0);
+  env->ReleaseStringUTFChars(s, chars);
+}
diff --git a/test/004-JniTest/src/Main.java b/test/004-JniTest/src/Main.java
index 8e92010..584fae3 100644
--- a/test/004-JniTest/src/Main.java
+++ b/test/004-JniTest/src/Main.java
@@ -33,6 +33,7 @@
         testShallowGetCallingClassLoader();
         testShallowGetStackClass2();
         testCallNonvirtual();
+        testNewStringObject();
     }
 
     private static native void testFindClassOnAttachedNativeThread();
@@ -184,6 +185,8 @@
     private static native void nativeTestShallowGetStackClass2();
 
     private static native void testCallNonvirtual();
+
+    private static native void testNewStringObject();
 }
 
 class JniCallNonvirtualTest {
diff --git a/test/021-string2/expected.txt b/test/021-string2/expected.txt
index bd7f049..a9c6eb8 100644
--- a/test/021-string2/expected.txt
+++ b/test/021-string2/expected.txt
@@ -1 +1,2 @@
 Got expected npe
+OK
diff --git a/test/021-string2/src/Main.java b/test/021-string2/src/Main.java
index 0239a3c..0226614 100644
--- a/test/021-string2/src/Main.java
+++ b/test/021-string2/src/Main.java
@@ -15,12 +15,13 @@
  */
 
 import junit.framework.Assert;
+import java.lang.reflect.Method;
 
 /**
  * more string tests
  */
 public class Main {
-    public static void main(String args[]) {
+    public static void main(String args[]) throws Exception {
         String test = "0123456789";
         String test1 = new String("0123456789");    // different object
         String test2 = new String("0123456780");    // different value
@@ -83,5 +84,10 @@
 
         Assert.assertEquals("this is a path", test.replaceAll("/", " "));
         Assert.assertEquals("this is a path", test.replace("/", " "));
+
+        Class Strings = Class.forName("com.android.org.bouncycastle.util.Strings");
+        Method fromUTF8ByteArray = Strings.getDeclaredMethod("fromUTF8ByteArray", byte[].class);
+        String result = (String) fromUTF8ByteArray.invoke(null, new byte[] {'O', 'K'});
+        System.out.println(result);
     }
 }
diff --git a/test/046-reflect/src/Main.java b/test/046-reflect/src/Main.java
index 59f7001..0d8e576 100644
--- a/test/046-reflect/src/Main.java
+++ b/test/046-reflect/src/Main.java
@@ -233,6 +233,20 @@
             field.set(instance, null);
 
             /*
+             * Try getDeclaredField on a non-existant field.
+             */
+            try {
+                field = target.getDeclaredField("nonExistant");
+                System.out.println("ERROR: Expected NoSuchFieldException");
+            } catch (NoSuchFieldException nsfe) {
+                String msg = nsfe.getMessage();
+                if (!msg.contains("Target;")) {
+                    System.out.println("  NoSuchFieldException '" + msg +
+                        "' didn't contain class");
+                }
+            }
+
+            /*
              * Do some stuff with long.
              */
             long longVal;
@@ -868,4 +882,4 @@
             System.out.println(e);
         }
     }
-}
\ No newline at end of file
+}
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index 0e90c4d..4dfa73c 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -236,15 +236,6 @@
     String str10 = "abcdefghij";
     String str40 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabc";
 
-    int supplementaryChar = 0x20b9f;
-    String surrogatePair = "\ud842\udf9f";
-    String stringWithSurrogates = "hello " + surrogatePair + " world";
-
-    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar), "hello ".length());
-    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar, 2), "hello ".length());
-    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar, 6), 6);
-    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar, 7), -1);
-
     Assert.assertEquals(str0.indexOf('a'), -1);
     Assert.assertEquals(str3.indexOf('a'), 0);
     Assert.assertEquals(str3.indexOf('b'), 1);
@@ -269,24 +260,123 @@
     Assert.assertEquals(str40.indexOf('a',10), 10);
     Assert.assertEquals(str40.indexOf('b',40), -1);
 
+    testIndexOfNull();
+
+    // Same data as above, but stored so it's not a literal in the next test. -2 stands for
+    // indexOf(I) instead of indexOf(II).
+    start--;
+    int[][] searchData = {
+        { 'a', -2, -1 },
+        { 'a', -2, 0 },
+        { 'b', -2, 1 },
+        { 'c', -2, 2 },
+        { 'j', -2, 9 },
+        { 'a', -2, 0 },
+        { 'b', -2, 38 },
+        { 'c', -2, 39 },
+        { 'a', 20, -1 },
+        { 'a', 0, -1 },
+        { 'a', -1, -1 },
+        { '/', ++start, -1 },
+        { 'a', negIndex[0], -1 },
+        { 'a', 0, 0 },
+        { 'a', 1, -1 },
+        { 'a', 1234, -1 },
+        { 'b', 0, 1 },
+        { 'b', 1, 1 },
+        { 'c', 2, 2 },
+        { 'j', 5, 9 },
+        { 'j', 9, 9 },
+        { 'a', 10, 10 },
+        { 'b', 40, -1 },
+    };
+    testStringIndexOfChars(searchData);
+
+    testSurrogateIndexOf();
+  }
+
+  private static void testStringIndexOfChars(int[][] searchData) {
+    // Use a try-catch to avoid inlining.
+    try {
+      testStringIndexOfCharsImpl(searchData);
+    } catch (Exception e) {
+      System.out.println("Unexpected exception");
+    }
+  }
+
+  private static void testStringIndexOfCharsImpl(int[][] searchData) {
+    String str0 = "";
+    String str1 = "/";
+    String str3 = "abc";
+    String str10 = "abcdefghij";
+    String str40 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabc";
+
+    Assert.assertEquals(str0.indexOf(searchData[0][0]), searchData[0][2]);
+    Assert.assertEquals(str3.indexOf(searchData[1][0]), searchData[1][2]);
+    Assert.assertEquals(str3.indexOf(searchData[2][0]), searchData[2][2]);
+    Assert.assertEquals(str3.indexOf(searchData[3][0]), searchData[3][2]);
+    Assert.assertEquals(str10.indexOf(searchData[4][0]), searchData[4][2]);
+    Assert.assertEquals(str40.indexOf(searchData[5][0]), searchData[5][2]);
+    Assert.assertEquals(str40.indexOf(searchData[6][0]), searchData[6][2]);
+    Assert.assertEquals(str40.indexOf(searchData[7][0]), searchData[7][2]);
+    Assert.assertEquals(str0.indexOf(searchData[8][0], searchData[8][1]), searchData[8][2]);
+    Assert.assertEquals(str0.indexOf(searchData[9][0], searchData[9][1]), searchData[9][2]);
+    Assert.assertEquals(str0.indexOf(searchData[10][0], searchData[10][1]), searchData[10][2]);
+    Assert.assertEquals(str1.indexOf(searchData[11][0], searchData[11][1]), searchData[11][2]);
+    Assert.assertEquals(str1.indexOf(searchData[12][0], searchData[12][1]), searchData[12][2]);
+    Assert.assertEquals(str3.indexOf(searchData[13][0], searchData[13][1]), searchData[13][2]);
+    Assert.assertEquals(str3.indexOf(searchData[14][0], searchData[14][1]), searchData[14][2]);
+    Assert.assertEquals(str3.indexOf(searchData[15][0], searchData[15][1]), searchData[15][2]);
+    Assert.assertEquals(str3.indexOf(searchData[16][0], searchData[16][1]), searchData[16][2]);
+    Assert.assertEquals(str3.indexOf(searchData[17][0], searchData[17][1]), searchData[17][2]);
+    Assert.assertEquals(str3.indexOf(searchData[18][0], searchData[18][1]), searchData[18][2]);
+    Assert.assertEquals(str10.indexOf(searchData[19][0], searchData[19][1]), searchData[19][2]);
+    Assert.assertEquals(str10.indexOf(searchData[20][0], searchData[20][1]), searchData[20][2]);
+    Assert.assertEquals(str40.indexOf(searchData[21][0], searchData[21][1]), searchData[21][2]);
+    Assert.assertEquals(str40.indexOf(searchData[22][0], searchData[22][1]), searchData[22][2]);
+  }
+
+  private static void testSurrogateIndexOf() {
+    int supplementaryChar = 0x20b9f;
+    String surrogatePair = "\ud842\udf9f";
+    String stringWithSurrogates = "hello " + surrogatePair + " world";
+
+    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar), "hello ".length());
+    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar, 2), "hello ".length());
+    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar, 6), 6);
+    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar, 7), -1);
+
+    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar - 0x10000), -1);
+    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar | 0x80000000), -1);
+  }
+
+  private static void testIndexOfNull() {
     String strNull = null;
     try {
-      strNull.indexOf('a');
+      testNullIndex(strNull, 'a');
       Assert.fail();
     } catch (NullPointerException expected) {
     }
     try {
-      strNull.indexOf('a', 0);
+      testNullIndex(strNull, 'a', 0);
       Assert.fail();
     } catch (NullPointerException expected) {
     }
     try {
-      strNull.indexOf('a', -1);
+        testNullIndex(strNull, 'a', -1);
       Assert.fail();
     } catch (NullPointerException expected) {
     }
   }
 
+  private static int testNullIndex(String strNull, int c) {
+    return strNull.indexOf(c);
+  }
+
+  private static int testNullIndex(String strNull, int c, int startIndex) {
+    return strNull.indexOf(c, startIndex);
+  }
+
   public static void test_String_compareTo() {
     String test = "0123456789";
     String test1 = new String("0123456789");    // different object
diff --git a/test/090-loop-formation/expected.txt b/test/090-loop-formation/expected.txt
index b7e0bb3..b945c30 100644
--- a/test/090-loop-formation/expected.txt
+++ b/test/090-loop-formation/expected.txt
@@ -3,3 +3,4 @@
 counter3 is 32767
 counter4 is 0
 counter5 is 65534
+256
diff --git a/test/090-loop-formation/src/Main.java b/test/090-loop-formation/src/Main.java
index 7c16667..16ff3b2 100644
--- a/test/090-loop-formation/src/Main.java
+++ b/test/090-loop-formation/src/Main.java
@@ -52,5 +52,31 @@
         System.out.println("counter3 is " + counter3);
         System.out.println("counter4 is " + counter4);
         System.out.println("counter5 is " + counter5);
+
+        deeplyNested();
+    }
+
+    // GVN is limited to a maximum loop depth of 6. To track whether dependent passes are
+    // correctly turned off, test some very simple, but deeply nested loops.
+    private static void deeplyNested() {
+        int sum = 0;
+        for (int i = 0; i < 2; i++) {
+            for (int j = 0; j < 2; j++) {
+                for (int k = 0; k < 2; k++) {
+                    for (int l = 0; l < 2; l++) {
+                        for (int m = 0; m < 2; m++) {
+                            for (int n = 0; n < 2; n++) {
+                                for (int o = 0; o < 2; o++) {
+                                    for (int p = 0; p < 2; p++) {
+                                        sum++;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        System.out.println(sum);
     }
 }
diff --git a/test/098-ddmc/src/Main.java b/test/098-ddmc/src/Main.java
index 962bd7f..f41ff2a 100644
--- a/test/098-ddmc/src/Main.java
+++ b/test/098-ddmc/src/Main.java
@@ -44,7 +44,7 @@
         System.out.println("Confirm when we overflow, we don't roll over to zero. b/17392248");
         final int overflowAllocations = 64 * 1024;  // Won't fit in unsigned 16-bit value.
         for (int i = 0; i < overflowAllocations; i++) {
-            new String("fnord");
+            new Object();
         }
         Allocations after = new Allocations(DdmVmInternal.getRecentAllocations());
         System.out.println("before < overflowAllocations=" + (before.numberOfEntries < overflowAllocations));
diff --git a/test/100-reflect2/expected.txt b/test/100-reflect2/expected.txt
index 1f8df1d..7db61a1 100644
--- a/test/100-reflect2/expected.txt
+++ b/test/100-reflect2/expected.txt
@@ -32,8 +32,8 @@
 62 (class java.lang.Long)
 14 (class java.lang.Short)
 [public java.lang.String(), java.lang.String(int,int,char[]), public java.lang.String(java.lang.String), public java.lang.String(java.lang.StringBuffer), public java.lang.String(java.lang.StringBuilder), public java.lang.String(byte[]), public java.lang.String(byte[],int), public java.lang.String(byte[],int,int), public java.lang.String(byte[],int,int,int), public java.lang.String(byte[],int,int,java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],int,int,java.nio.charset.Charset), public java.lang.String(byte[],java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],java.nio.charset.Charset), public java.lang.String(char[]), public java.lang.String(char[],int,int), public java.lang.String(int[],int,int)]
-[private final int java.lang.String.count, private int java.lang.String.hashCode, private final int java.lang.String.offset, private final char[] java.lang.String.value, private static final char[] java.lang.String.ASCII, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER, private static final char java.lang.String.REPLACEMENT_CHAR, private static final long java.lang.String.serialVersionUID]
-[void java.lang.String._getChars(int,int,char[],int), public char java.lang.String.charAt(int), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public native int java.lang.String.compareTo(java.lang.String), public int java.lang.String.compareToIgnoreCase(java.lang.String), public java.lang.String java.lang.String.concat(java.lang.String), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public void java.lang.String.getBytes(int,int,byte[],int), public [B java.lang.String.getBytes(), public [B java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public [B java.lang.String.getBytes(java.nio.charset.Charset), public void java.lang.String.getChars(int,int,char[],int), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public native java.lang.String java.lang.String.intern(), public boolean java.lang.String.isEmpty(), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public boolean java.lang.String.matches(java.lang.String), public int java.lang.String.offsetByCodePoints(int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), public [Ljava.lang.String; java.lang.String.split(java.lang.String), public [Ljava.lang.String; java.lang.String.split(java.lang.String,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public [C java.lang.String.toCharArray(), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.failedBoundsCheck(int,int,int), private native int java.lang.String.fastIndexOf(int,int), private char java.lang.String.foldCase(char), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), private java.lang.StringIndexOutOfBoundsException java.lang.String.indexAndLength(int), private static int java.lang.String.indexOf(java.lang.String,java.lang.String,int,int,char), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.startEndAndLength(int,int), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(long), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int)]
+[private final int java.lang.String.count, private int java.lang.String.hashCode, private static final char[] java.lang.String.ASCII, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER, private static final char java.lang.String.REPLACEMENT_CHAR, private static final long java.lang.String.serialVersionUID]
+[public native char java.lang.String.charAt(int), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public native int java.lang.String.compareTo(java.lang.String), public int java.lang.String.compareToIgnoreCase(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public void java.lang.String.getBytes(int,int,byte[],int), public [B java.lang.String.getBytes(), public [B java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public [B java.lang.String.getBytes(java.nio.charset.Charset), public void java.lang.String.getChars(int,int,char[],int), native void java.lang.String.getCharsNoCheck(int,int,char[],int), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public native java.lang.String java.lang.String.intern(), public boolean java.lang.String.isEmpty(), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public boolean java.lang.String.matches(java.lang.String), public int java.lang.String.offsetByCodePoints(int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), native void java.lang.String.setCharAt(int,char), public [Ljava.lang.String; java.lang.String.split(java.lang.String), public [Ljava.lang.String; java.lang.String.split(java.lang.String,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public native [C java.lang.String.toCharArray(), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.failedBoundsCheck(int,int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.fastSubstring(int,int), private char java.lang.String.foldCase(char), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), private java.lang.StringIndexOutOfBoundsException java.lang.String.indexAndLength(int), private static int java.lang.String.indexOf(java.lang.String,java.lang.String,int,int,char), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.startEndAndLength(int,int), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(long), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int)]
 []
 [interface java.io.Serializable, interface java.lang.Comparable, interface java.lang.CharSequence]
 0
diff --git a/test/100-reflect2/src/Main.java b/test/100-reflect2/src/Main.java
index 0cc1488..72e14b1 100644
--- a/test/100-reflect2/src/Main.java
+++ b/test/100-reflect2/src/Main.java
@@ -266,9 +266,37 @@
     show(ctor.newInstance(new char[] { 'x', 'y', 'z', '!' }, 1, 2));
   }
 
+  private static void testPackagePrivateConstructor() {
+    try {
+      Class<?> c = Class.forName("sub.PPClass");
+      Constructor cons = c.getConstructor();
+      cons.newInstance();
+      throw new RuntimeException("Expected IllegalAccessException.");
+    } catch (IllegalAccessException e) {
+      // Expected.
+    } catch (Exception e) {
+      // Error.
+      e.printStackTrace();
+    }
+  }
+
+  private static void testPackagePrivateAccessibleConstructor() {
+    try {
+      Class<?> c = Class.forName("sub.PPClass");
+      Constructor cons = c.getConstructor();
+      cons.setAccessible(true);  // ensure we prevent IllegalAccessException
+      cons.newInstance();
+    } catch (Exception e) {
+      // Error.
+      e.printStackTrace();
+    }
+  }
+
   public static void main(String[] args) throws Exception {
     testFieldReflection();
     testMethodReflection();
     testConstructorReflection();
+    testPackagePrivateConstructor();
+    testPackagePrivateAccessibleConstructor();
   }
 }
diff --git a/test/100-reflect2/src/sub/PPClass.java b/test/100-reflect2/src/sub/PPClass.java
new file mode 100644
index 0000000..d972287
--- /dev/null
+++ b/test/100-reflect2/src/sub/PPClass.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package sub;
+
+// A package-private class with a public constructor.
+class PPClass {
+    public PPClass() {
+    }
+}
\ No newline at end of file
diff --git a/test/115-native-bridge/expected.txt b/test/115-native-bridge/expected.txt
index 16a71e4..deb70ba 100644
--- a/test/115-native-bridge/expected.txt
+++ b/test/115-native-bridge/expected.txt
@@ -4,7 +4,7 @@
 Ready for native bridge tests.
 Checking for support.
 Getting trampoline for JNI_OnLoad with shorty (null).
-Test ART callbacks: all JNI function number is 9.
+Test ART callbacks: all JNI function number is 10.
     name:booleanMethod, signature:(ZZZZZZZZZZ)Z, shorty:ZZZZZZZZZZZ.
     name:byteMethod, signature:(BBBBBBBBBB)B, shorty:BBBBBBBBBBB.
     name:charMethod, signature:(CCCCCCCCCC)C, shorty:CCCCCCCCCCC.
@@ -13,6 +13,7 @@
     name:testFindClassOnAttachedNativeThread, signature:()V, shorty:V.
     name:testFindFieldOnAttachedNativeThreadNative, signature:()V, shorty:V.
     name:testGetMirandaMethodNative, signature:()Ljava/lang/reflect/Method;, shorty:L.
+    name:testNewStringObject, signature:()V, shorty:V.
     name:testZeroLengthByteBuffers, signature:()V, shorty:V.
 trampoline_JNI_OnLoad called!
 Getting trampoline for Java_Main_testFindClassOnAttachedNativeThread with shorty V.
@@ -55,3 +56,5 @@
 trampoline_Java_Main_charMethod called!
 trampoline_Java_Main_charMethod called!
 trampoline_Java_Main_charMethod called!
+Getting trampoline for Java_Main_testNewStringObject with shorty V.
+trampoline_Java_Main_testNewStringObject called!
diff --git a/test/115-native-bridge/nativebridge.cc b/test/115-native-bridge/nativebridge.cc
index 6bcc1f5..24e9600 100644
--- a/test/115-native-bridge/nativebridge.cc
+++ b/test/115-native-bridge/nativebridge.cc
@@ -122,6 +122,14 @@
   return fnPtr(env, klass);
 }
 
+static void trampoline_Java_Main_testNewStringObject(JNIEnv* env, jclass klass) {
+  typedef void (*FnPtr_t)(JNIEnv*, jclass);
+  FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>
+    (find_native_bridge_method("testNewStringObject")->fnPtr);
+  printf("%s called!\n", __FUNCTION__);
+  return fnPtr(env, klass);
+}
+
 static void trampoline_Java_Main_testZeroLengthByteBuffers(JNIEnv* env, jclass klass) {
   typedef void (*FnPtr_t)(JNIEnv*, jclass);
   FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>
@@ -190,6 +198,8 @@
     reinterpret_cast<void*>(trampoline_Java_Main_testFindFieldOnAttachedNativeThreadNative) },
   { "testGetMirandaMethodNative", "()Ljava/lang/reflect/Method;", true, nullptr,
     reinterpret_cast<void*>(trampoline_Java_Main_testGetMirandaMethodNative) },
+  { "testNewStringObject", "()V", true, nullptr,
+    reinterpret_cast<void*>(trampoline_Java_Main_testNewStringObject) },
   { "testZeroLengthByteBuffers", "()V", true, nullptr,
     reinterpret_cast<void*>(trampoline_Java_Main_testZeroLengthByteBuffers) },
 };
diff --git a/test/115-native-bridge/src/NativeBridgeMain.java b/test/115-native-bridge/src/NativeBridgeMain.java
index 2405627..c843707 100644
--- a/test/115-native-bridge/src/NativeBridgeMain.java
+++ b/test/115-native-bridge/src/NativeBridgeMain.java
@@ -31,6 +31,7 @@
         testBooleanMethod();
         testCharMethod();
         testEnvironment();
+        testNewStringObject();
     }
 
     public static native void testFindClassOnAttachedNativeThread();
@@ -167,6 +168,8 @@
       //   throw new AssertionError("unexpected value for supported_abis");
       // }
     }
+
+    private static native void testNewStringObject();
 }
 
 public class NativeBridgeMain {
diff --git a/test/127-secondarydex/expected.txt b/test/127-secondarydex/expected.txt
index 29a1411..1c8defb 100644
--- a/test/127-secondarydex/expected.txt
+++ b/test/127-secondarydex/expected.txt
@@ -1,3 +1,4 @@
 testSlowPathDirectInvoke
 Test
 Got null pointer exception
+Test
diff --git a/test/127-secondarydex/src/Main.java b/test/127-secondarydex/src/Main.java
index c921c5b..0ede8ed 100644
--- a/test/127-secondarydex/src/Main.java
+++ b/test/127-secondarydex/src/Main.java
@@ -24,6 +24,7 @@
 public class Main {
     public static void main(String[] args) {
         testSlowPathDirectInvoke();
+        testString();
     }
 
     public static void testSlowPathDirectInvoke() {
@@ -40,4 +41,11 @@
             System.out.println("Got unexpected exception " + e);
         }
     }
+
+    // For string change, test that String.<init> is compiled properly in
+    // secondary dex. See http://b/20870917
+    public static void testString() {
+        Test t = new Test();
+        System.out.println(t.toString());
+    }
 }
diff --git a/test/127-secondarydex/src/Test.java b/test/127-secondarydex/src/Test.java
index 82cb901..8547e79 100644
--- a/test/127-secondarydex/src/Test.java
+++ b/test/127-secondarydex/src/Test.java
@@ -22,4 +22,8 @@
     private void print() {
         System.out.println("Test");
     }
+
+    public String toString() {
+        return new String("Test");
+    }
 }
diff --git a/test/138-duplicate-classes-check/expected.txt b/test/138-duplicate-classes-check/expected.txt
new file mode 100644
index 0000000..b2f7f08
--- /dev/null
+++ b/test/138-duplicate-classes-check/expected.txt
@@ -0,0 +1,2 @@
+10
+10
diff --git a/test/138-duplicate-classes-check/info.txt b/test/138-duplicate-classes-check/info.txt
new file mode 100644
index 0000000..22a66a2
--- /dev/null
+++ b/test/138-duplicate-classes-check/info.txt
@@ -0,0 +1 @@
+Check whether a duplicate class is detected.
diff --git a/test/138-duplicate-classes-check/src-ex/A.java b/test/138-duplicate-classes-check/src-ex/A.java
new file mode 100644
index 0000000..8e52cb3
--- /dev/null
+++ b/test/138-duplicate-classes-check/src-ex/A.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class A {
+    public volatile int i;
+
+    public A() {
+      i = 10;
+    }
+}
diff --git a/test/138-duplicate-classes-check/src-ex/TestEx.java b/test/138-duplicate-classes-check/src-ex/TestEx.java
new file mode 100644
index 0000000..87558fa
--- /dev/null
+++ b/test/138-duplicate-classes-check/src-ex/TestEx.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TestEx {
+    public static void test() {
+        System.out.println(new A().i);
+    }
+}
diff --git a/test/138-duplicate-classes-check/src/A.java b/test/138-duplicate-classes-check/src/A.java
new file mode 100644
index 0000000..e1773e5
--- /dev/null
+++ b/test/138-duplicate-classes-check/src/A.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class A {
+    // Object fields add padding in the Foo class object layout. Therefore the field 'i' should
+    // be at a different offset compared to the A class from the ex DEX file.
+    public final Object anObject = null;
+    public final Object anotherObject = null;
+    // Use volatile to defeat inlining of the constructor + load-elimination.
+    public volatile int i;
+
+    public A() {
+      i = 10;
+    }
+}
diff --git a/test/138-duplicate-classes-check/src/FancyLoader.java b/test/138-duplicate-classes-check/src/FancyLoader.java
new file mode 100644
index 0000000..03ec948
--- /dev/null
+++ b/test/138-duplicate-classes-check/src/FancyLoader.java
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+
+/**
+ * A class loader with atypical behavior: we try to load a private
+ * class implementation before asking the system or boot loader.  This
+ * is used to create multiple classes with identical names in a single VM.
+ *
+ * If DexFile is available, we use that; if not, we assume we're not in
+ * Dalvik and instantiate the class with defineClass().
+ *
+ * The location of the DEX files and class data is dependent upon the
+ * test framework.
+ */
+public class FancyLoader extends ClassLoader {
+    /* this is where the "alternate" .class files live */
+    static final String CLASS_PATH = "classes-ex/";
+
+    /* this is the "alternate" DEX/Jar file */
+    static final String DEX_FILE = System.getenv("DEX_LOCATION") +
+            "/138-duplicate-classes-check-ex.jar";
+
+    /* on Dalvik, this is a DexFile; otherwise, it's null */
+    private Class mDexClass;
+
+    private Object mDexFile;
+
+    /**
+     * Construct FancyLoader, grabbing a reference to the DexFile class
+     * if we're running under Dalvik.
+     */
+    public FancyLoader(ClassLoader parent) {
+        super(parent);
+
+        try {
+            mDexClass = parent.loadClass("dalvik.system.DexFile");
+        } catch (ClassNotFoundException cnfe) {
+            // ignore -- not running Dalvik
+        }
+    }
+
+    /**
+     * Finds the class with the specified binary name.
+     *
+     * We search for a file in CLASS_PATH or pull an entry from DEX_FILE.
+     * If we don't find a match, we throw an exception.
+     */
+    protected Class<?> findClass(String name) throws ClassNotFoundException
+    {
+        if (mDexClass != null) {
+            return findClassDalvik(name);
+        } else {
+            return findClassNonDalvik(name);
+        }
+    }
+
+    /**
+     * Finds the class with the specified binary name, from a DEX file.
+     */
+    private Class<?> findClassDalvik(String name)
+        throws ClassNotFoundException {
+
+        if (mDexFile == null) {
+            synchronized (FancyLoader.class) {
+                Constructor ctor;
+                /*
+                 * Construct a DexFile object through reflection.
+                 */
+                try {
+                    ctor = mDexClass.getConstructor(new Class[] {String.class});
+                } catch (NoSuchMethodException nsme) {
+                    throw new ClassNotFoundException("getConstructor failed",
+                        nsme);
+                }
+
+                try {
+                    mDexFile = ctor.newInstance(DEX_FILE);
+                } catch (InstantiationException ie) {
+                    throw new ClassNotFoundException("newInstance failed", ie);
+                } catch (IllegalAccessException iae) {
+                    throw new ClassNotFoundException("newInstance failed", iae);
+                } catch (InvocationTargetException ite) {
+                    throw new ClassNotFoundException("newInstance failed", ite);
+                }
+            }
+        }
+
+        /*
+         * Call DexFile.loadClass(String, ClassLoader).
+         */
+        Method meth;
+
+        try {
+            meth = mDexClass.getMethod("loadClass",
+                    new Class[] { String.class, ClassLoader.class });
+        } catch (NoSuchMethodException nsme) {
+            throw new ClassNotFoundException("getMethod failed", nsme);
+        }
+
+        try {
+            meth.invoke(mDexFile, name, this);
+        } catch (IllegalAccessException iae) {
+            throw new ClassNotFoundException("loadClass failed", iae);
+        } catch (InvocationTargetException ite) {
+            throw new ClassNotFoundException("loadClass failed",
+                ite.getCause());
+        }
+
+        return null;
+    }
+
+    /**
+     * Finds the class with the specified binary name, from .class files.
+     */
+    private Class<?> findClassNonDalvik(String name)
+        throws ClassNotFoundException {
+
+        String pathName = CLASS_PATH + name + ".class";
+        //System.out.println("--- Fancy: looking for " + pathName);
+
+        File path = new File(pathName);
+        RandomAccessFile raf;
+
+        try {
+            raf = new RandomAccessFile(path, "r");
+        } catch (FileNotFoundException fnfe) {
+            throw new ClassNotFoundException("Not found: " + pathName);
+        }
+
+        /* read the entire file in */
+        byte[] fileData;
+        try {
+            fileData = new byte[(int) raf.length()];
+            raf.readFully(fileData);
+        } catch (IOException ioe) {
+            throw new ClassNotFoundException("Read error: " + pathName);
+        } finally {
+            try {
+                raf.close();
+            } catch (IOException ioe) {
+                // drop
+            }
+        }
+
+        /* create the class */
+        //System.out.println("--- Fancy: defining " + name);
+        try {
+            return defineClass(name, fileData, 0, fileData.length);
+        } catch (Throwable th) {
+            throw new ClassNotFoundException("defineClass failed", th);
+        }
+    }
+
+    /**
+     * Load a class.
+     *
+     * Normally a class loader wouldn't override this, but we want our
+     * version of the class to take precedence over an already-loaded
+     * version.
+     *
+     * We still want the system classes (e.g. java.lang.Object) from the
+     * bootstrap class loader.
+     */
+    protected Class<?> loadClass(String name, boolean resolve)
+        throws ClassNotFoundException
+    {
+        Class res;
+
+        /*
+         * 1. Invoke findLoadedClass(String) to check if the class has
+         * already been loaded.
+         *
+         * This doesn't change.
+         */
+        res = findLoadedClass(name);
+        if (res != null) {
+            System.out.println("FancyLoader.loadClass: "
+                + name + " already loaded");
+            if (resolve)
+                resolveClass(res);
+            return res;
+        }
+
+        /*
+         * 3. Invoke the findClass(String) method to find the class.
+         */
+        try {
+            res = findClass(name);
+            if (resolve)
+                resolveClass(res);
+        }
+        catch (ClassNotFoundException e) {
+            // we couldn't find it, so eat the exception and keep going
+        }
+
+        /*
+         * 2. Invoke the loadClass method on the parent class loader.  If
+         * the parent loader is null the class loader built-in to the
+         * virtual machine is used, instead.
+         *
+         * (Since we're not in java.lang, we can't actually invoke the
+         * parent's loadClass() method, but we passed our parent to the
+         * super-class which can take care of it for us.)
+         */
+        res = super.loadClass(name, resolve);   // returns class or throws
+        return res;
+    }
+}
diff --git a/test/138-duplicate-classes-check/src/Main.java b/test/138-duplicate-classes-check/src/Main.java
new file mode 100644
index 0000000..a9b5bb0
--- /dev/null
+++ b/test/138-duplicate-classes-check/src/Main.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.lang.reflect.Method;
+
+/**
+ * Structural hazard test.
+ */
+public class Main {
+    public static void main(String[] args) {
+        new Main().run();
+    }
+
+    private void run() {
+        System.out.println(new A().i);
+
+        // Now run the class from the -ex file.
+
+        FancyLoader loader = new FancyLoader(getClass().getClassLoader());
+
+        try {
+            Class testEx = loader.loadClass("TestEx");
+            Method test = testEx.getDeclaredMethod("test");
+            test.invoke(null);
+        } catch (Exception exc) {
+            exc.printStackTrace();
+        }
+    }
+}
diff --git a/test/138-duplicate-classes-check2/build b/test/138-duplicate-classes-check2/build
new file mode 100755
index 0000000..abcbbb8
--- /dev/null
+++ b/test/138-duplicate-classes-check2/build
@@ -0,0 +1,32 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Stop if something fails.
+set -e
+
+mkdir classes
+${JAVAC} -d classes `find src -name '*.java'`
+
+mkdir classes-ex
+${JAVAC} -d classes-ex `find src-ex -name '*.java'`
+rm classes-ex/A.class
+
+if [ ${NEED_DEX} = "true" ]; then
+  ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 classes
+  zip $TEST_NAME.jar classes.dex
+  ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes.dex --dump-width=1000 classes-ex
+  zip ${TEST_NAME}-ex.jar classes.dex
+fi
diff --git a/test/138-duplicate-classes-check2/expected.txt b/test/138-duplicate-classes-check2/expected.txt
new file mode 100644
index 0000000..b2f7f08
--- /dev/null
+++ b/test/138-duplicate-classes-check2/expected.txt
@@ -0,0 +1,2 @@
+10
+10
diff --git a/test/138-duplicate-classes-check2/info.txt b/test/138-duplicate-classes-check2/info.txt
new file mode 100644
index 0000000..7100122
--- /dev/null
+++ b/test/138-duplicate-classes-check2/info.txt
@@ -0,0 +1,2 @@
+Check whether a duplicate class is not detected, even though we compiled against one (but removed
+it before creating the dex file).
diff --git a/test/138-duplicate-classes-check2/run b/test/138-duplicate-classes-check2/run
new file mode 100755
index 0000000..8494ad9
--- /dev/null
+++ b/test/138-duplicate-classes-check2/run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# We want to run as no-dex-file-fallback to confirm that even though the -ex file has a symbolic
+# reference to A, there's no class-def, so we don't detect a collision.
+exec ${RUN} --runtime-option -Xno-dex-file-fallback "${@}"
diff --git a/test/138-duplicate-classes-check2/src-ex/A.java b/test/138-duplicate-classes-check2/src-ex/A.java
new file mode 100644
index 0000000..8e52cb3
--- /dev/null
+++ b/test/138-duplicate-classes-check2/src-ex/A.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class A {
+    public volatile int i;
+
+    public A() {
+      i = 10;
+    }
+}
diff --git a/test/138-duplicate-classes-check2/src-ex/TestEx.java b/test/138-duplicate-classes-check2/src-ex/TestEx.java
new file mode 100644
index 0000000..87558fa
--- /dev/null
+++ b/test/138-duplicate-classes-check2/src-ex/TestEx.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TestEx {
+    public static void test() {
+        System.out.println(new A().i);
+    }
+}
diff --git a/test/138-duplicate-classes-check2/src/A.java b/test/138-duplicate-classes-check2/src/A.java
new file mode 100644
index 0000000..e1773e5
--- /dev/null
+++ b/test/138-duplicate-classes-check2/src/A.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class A {
+    // Object fields add padding in the Foo class object layout. Therefore the field 'i' should
+    // be at a different offset compared to the A class from the ex DEX file.
+    public final Object anObject = null;
+    public final Object anotherObject = null;
+    // Use volatile to defeat inlining of the constructor + load-elimination.
+    public volatile int i;
+
+    public A() {
+      i = 10;
+    }
+}
diff --git a/test/138-duplicate-classes-check2/src/FancyLoader.java b/test/138-duplicate-classes-check2/src/FancyLoader.java
new file mode 100644
index 0000000..7e2bb08
--- /dev/null
+++ b/test/138-duplicate-classes-check2/src/FancyLoader.java
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+
+/**
+ * A class loader with atypical behavior: we try to load a private
+ * class implementation before asking the system or boot loader.  This
+ * is used to create multiple classes with identical names in a single VM.
+ *
+ * If DexFile is available, we use that; if not, we assume we're not in
+ * Dalvik and instantiate the class with defineClass().
+ *
+ * The location of the DEX files and class data is dependent upon the
+ * test framework.
+ */
+public class FancyLoader extends ClassLoader {
+    /* this is where the "alternate" .class files live */
+    static final String CLASS_PATH = "classes-ex/";
+
+    /* this is the "alternate" DEX/Jar file */
+    static final String DEX_FILE = System.getenv("DEX_LOCATION") +
+            "/138-duplicate-classes-check2-ex.jar";
+
+    /* on Dalvik, this is a DexFile; otherwise, it's null */
+    private Class mDexClass;
+
+    private Object mDexFile;
+
+    /**
+     * Construct FancyLoader, grabbing a reference to the DexFile class
+     * if we're running under Dalvik.
+     */
+    public FancyLoader(ClassLoader parent) {
+        super(parent);
+
+        try {
+            mDexClass = parent.loadClass("dalvik.system.DexFile");
+        } catch (ClassNotFoundException cnfe) {
+            // ignore -- not running Dalvik
+        }
+    }
+
+    /**
+     * Finds the class with the specified binary name.
+     *
+     * We search for a file in CLASS_PATH or pull an entry from DEX_FILE.
+     * If we don't find a match, we throw an exception.
+     */
+    protected Class<?> findClass(String name) throws ClassNotFoundException
+    {
+        if (mDexClass != null) {
+            return findClassDalvik(name);
+        } else {
+            return findClassNonDalvik(name);
+        }
+    }
+
+    /**
+     * Finds the class with the specified binary name, from a DEX file.
+     */
+    private Class<?> findClassDalvik(String name)
+        throws ClassNotFoundException {
+
+        if (mDexFile == null) {
+            synchronized (FancyLoader.class) {
+                Constructor ctor;
+                /*
+                 * Construct a DexFile object through reflection.
+                 */
+                try {
+                    ctor = mDexClass.getConstructor(new Class[] {String.class});
+                } catch (NoSuchMethodException nsme) {
+                    throw new ClassNotFoundException("getConstructor failed",
+                        nsme);
+                }
+
+                try {
+                    mDexFile = ctor.newInstance(DEX_FILE);
+                } catch (InstantiationException ie) {
+                    throw new ClassNotFoundException("newInstance failed", ie);
+                } catch (IllegalAccessException iae) {
+                    throw new ClassNotFoundException("newInstance failed", iae);
+                } catch (InvocationTargetException ite) {
+                    throw new ClassNotFoundException("newInstance failed", ite);
+                }
+            }
+        }
+
+        /*
+         * Call DexFile.loadClass(String, ClassLoader).
+         */
+        Method meth;
+
+        try {
+            meth = mDexClass.getMethod("loadClass",
+                    new Class[] { String.class, ClassLoader.class });
+        } catch (NoSuchMethodException nsme) {
+            throw new ClassNotFoundException("getMethod failed", nsme);
+        }
+
+        try {
+            meth.invoke(mDexFile, name, this);
+        } catch (IllegalAccessException iae) {
+            throw new ClassNotFoundException("loadClass failed", iae);
+        } catch (InvocationTargetException ite) {
+            throw new ClassNotFoundException("loadClass failed",
+                ite.getCause());
+        }
+
+        return null;
+    }
+
+    /**
+     * Finds the class with the specified binary name, from .class files.
+     */
+    private Class<?> findClassNonDalvik(String name)
+        throws ClassNotFoundException {
+
+        String pathName = CLASS_PATH + name + ".class";
+        //System.out.println("--- Fancy: looking for " + pathName);
+
+        File path = new File(pathName);
+        RandomAccessFile raf;
+
+        try {
+            raf = new RandomAccessFile(path, "r");
+        } catch (FileNotFoundException fnfe) {
+            throw new ClassNotFoundException("Not found: " + pathName);
+        }
+
+        /* read the entire file in */
+        byte[] fileData;
+        try {
+            fileData = new byte[(int) raf.length()];
+            raf.readFully(fileData);
+        } catch (IOException ioe) {
+            throw new ClassNotFoundException("Read error: " + pathName);
+        } finally {
+            try {
+                raf.close();
+            } catch (IOException ioe) {
+                // drop
+            }
+        }
+
+        /* create the class */
+        //System.out.println("--- Fancy: defining " + name);
+        try {
+            return defineClass(name, fileData, 0, fileData.length);
+        } catch (Throwable th) {
+            throw new ClassNotFoundException("defineClass failed", th);
+        }
+    }
+
+    /**
+     * Load a class.
+     *
+     * Normally a class loader wouldn't override this, but we want our
+     * version of the class to take precedence over an already-loaded
+     * version.
+     *
+     * We still want the system classes (e.g. java.lang.Object) from the
+     * bootstrap class loader.
+     */
+    protected Class<?> loadClass(String name, boolean resolve)
+        throws ClassNotFoundException
+    {
+        Class res;
+
+        /*
+         * 1. Invoke findLoadedClass(String) to check if the class has
+         * already been loaded.
+         *
+         * This doesn't change.
+         */
+        res = findLoadedClass(name);
+        if (res != null) {
+            System.out.println("FancyLoader.loadClass: "
+                + name + " already loaded");
+            if (resolve)
+                resolveClass(res);
+            return res;
+        }
+
+        /*
+         * 3. Invoke the findClass(String) method to find the class.
+         */
+        try {
+            res = findClass(name);
+            if (resolve)
+                resolveClass(res);
+        }
+        catch (ClassNotFoundException e) {
+            // we couldn't find it, so eat the exception and keep going
+        }
+
+        /*
+         * 2. Invoke the loadClass method on the parent class loader.  If
+         * the parent loader is null the class loader built-in to the
+         * virtual machine is used, instead.
+         *
+         * (Since we're not in java.lang, we can't actually invoke the
+         * parent's loadClass() method, but we passed our parent to the
+         * super-class which can take care of it for us.)
+         */
+        res = super.loadClass(name, resolve);   // returns class or throws
+        return res;
+    }
+}
diff --git a/test/138-duplicate-classes-check2/src/Main.java b/test/138-duplicate-classes-check2/src/Main.java
new file mode 100644
index 0000000..a9b5bb0
--- /dev/null
+++ b/test/138-duplicate-classes-check2/src/Main.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.lang.reflect.Method;
+
+/**
+ * Structural hazard test.
+ */
+public class Main {
+    public static void main(String[] args) {
+        new Main().run();
+    }
+
+    private void run() {
+        System.out.println(new A().i);
+
+        // Now run the class from the -ex file.
+
+        FancyLoader loader = new FancyLoader(getClass().getClassLoader());
+
+        try {
+            Class testEx = loader.loadClass("TestEx");
+            Method test = testEx.getDeclaredMethod("test");
+            test.invoke(null);
+        } catch (Exception exc) {
+            exc.printStackTrace();
+        }
+    }
+}
diff --git a/test/201-built-in-exception-detail-messages/src/Main.java b/test/201-built-in-exception-detail-messages/src/Main.java
index 24ee6e0..52d4259 100644
--- a/test/201-built-in-exception-detail-messages/src/Main.java
+++ b/test/201-built-in-exception-detail-messages/src/Main.java
@@ -358,7 +358,8 @@
       field.set(new A(), 5);
       fail();
     } catch (IllegalArgumentException expected) {
-      assertEquals("field A.b has type java.lang.String, got java.lang.Integer", expected.getMessage());
+      assertEquals("field A.b has type java.lang.String, got java.lang.Integer",
+          expected.getMessage());
     }
 
     // Can't unbox null to a primitive.
@@ -385,7 +386,8 @@
       m.invoke(new A(), 2, 2);
       fail();
     } catch (IllegalArgumentException expected) {
-      assertEquals("method A.m argument 2 has type java.lang.String, got java.lang.Integer", expected.getMessage());
+      assertEquals("method A.m argument 2 has type java.lang.String, got java.lang.Integer",
+          expected.getMessage());
     }
 
     // Can't pass null as an int.
@@ -409,21 +411,24 @@
       m.invoke("hello", "world"); // Wrong type.
       fail();
     } catch (IllegalArgumentException iae) {
-      assertEquals("method java.lang.String.charAt argument 1 has type int, got java.lang.String", iae.getMessage());
+      assertEquals("method java.lang.String.charAt! argument 1 has type int, got java.lang.String",
+          iae.getMessage());
     }
     try {
       Method m = String.class.getMethod("charAt", int.class);
       m.invoke("hello", (Object) null); // Null for a primitive argument.
       fail();
     } catch (IllegalArgumentException iae) {
-      assertEquals("method java.lang.String.charAt argument 1 has type int, got null", iae.getMessage());
+      assertEquals("method java.lang.String.charAt! argument 1 has type int, got null",
+          iae.getMessage());
     }
     try {
       Method m = String.class.getMethod("charAt", int.class);
       m.invoke(new Integer(5)); // Wrong type for 'this'.
       fail();
     } catch (IllegalArgumentException iae) {
-      assertEquals("Expected receiver of type java.lang.String, but got java.lang.Integer", iae.getMessage());
+      assertEquals("Expected receiver of type java.lang.String, but got java.lang.Integer",
+          iae.getMessage());
     }
     try {
       Method m = String.class.getMethod("charAt", int.class);
diff --git a/test/422-type-conversion/src/Main.java b/test/422-type-conversion/src/Main.java
index 7ce2868..447b9b8 100644
--- a/test/422-type-conversion/src/Main.java
+++ b/test/422-type-conversion/src/Main.java
@@ -625,65 +625,67 @@
     assertCharEquals((char)0, $opt$IntToChar(-2147483648));  // -(2^31)
   }
 
+  // A dummy value to defeat inlining of these routines.
+  static boolean doThrow = false;
 
   // These methods produce int-to-long Dex instructions.
-  static long $opt$ByteToLong(byte a) { return (long)a; }
-  static long $opt$ShortToLong(short a) { return (long)a; }
-  static long $opt$IntToLong(int a) { return (long)a; }
-  static long $opt$CharToLong(int a) { return (long)a; }
+  static long $opt$ByteToLong(byte a) { if (doThrow) throw new Error(); return (long)a; }
+  static long $opt$ShortToLong(short a) { if (doThrow) throw new Error(); return (long)a; }
+  static long $opt$IntToLong(int a) { if (doThrow) throw new Error(); return (long)a; }
+  static long $opt$CharToLong(int a) { if (doThrow) throw new Error(); return (long)a; }
 
   // These methods produce int-to-float Dex instructions.
-  static float $opt$ByteToFloat(byte a) { return (float)a; }
-  static float $opt$ShortToFloat(short a) { return (float)a; }
-  static float $opt$IntToFloat(int a) { return (float)a; }
-  static float $opt$CharToFloat(char a) { return (float)a; }
+  static float $opt$ByteToFloat(byte a) { if (doThrow) throw new Error(); return (float)a; }
+  static float $opt$ShortToFloat(short a) { if (doThrow) throw new Error(); return (float)a; }
+  static float $opt$IntToFloat(int a) { if (doThrow) throw new Error(); return (float)a; }
+  static float $opt$CharToFloat(char a) { if (doThrow) throw new Error(); return (float)a; }
 
   // These methods produce int-to-double Dex instructions.
-  static double $opt$ByteToDouble(byte a) { return (double)a; }
-  static double $opt$ShortToDouble(short a) { return (double)a; }
-  static double $opt$IntToDouble(int a) { return (double)a; }
-  static double $opt$CharToDouble(int a) { return (double)a; }
+  static double $opt$ByteToDouble(byte a) { if (doThrow) throw new Error(); return (double)a; }
+  static double $opt$ShortToDouble(short a) { if (doThrow) throw new Error(); return (double)a; }
+  static double $opt$IntToDouble(int a) { if (doThrow) throw new Error(); return (double)a; }
+  static double $opt$CharToDouble(int a) { if (doThrow) throw new Error(); return (double)a; }
 
   // These methods produce long-to-int Dex instructions.
-  static int $opt$LongToInt(long a) { return (int)a; }
-  static int $opt$LongLiteralToInt() { return (int)42L; }
+  static int $opt$LongToInt(long a) { if (doThrow) throw new Error(); return (int)a; }
+  static int $opt$LongLiteralToInt() { if (doThrow) throw new Error(); return (int)42L; }
 
   // This method produces a long-to-float Dex instruction.
-  static float $opt$LongToFloat(long a) { return (float)a; }
+  static float $opt$LongToFloat(long a) { if (doThrow) throw new Error(); return (float)a; }
 
   // This method produces a long-to-double Dex instruction.
-  static double $opt$LongToDouble(long a) { return (double)a; }
+  static double $opt$LongToDouble(long a) { if (doThrow) throw new Error(); return (double)a; }
 
   // This method produces a float-to-int Dex instruction.
-  static int $opt$FloatToInt(float a) { return (int)a; }
+  static int $opt$FloatToInt(float a) { if (doThrow) throw new Error(); return (int)a; }
 
   // This method produces a float-to-long Dex instruction.
-  static long $opt$FloatToLong(float a){ return (long)a; }
+  static long $opt$FloatToLong(float a){ if (doThrow) throw new Error(); return (long)a; }
 
   // This method produces a float-to-double Dex instruction.
-  static double $opt$FloatToDouble(float a) { return (double)a; }
+  static double $opt$FloatToDouble(float a) { if (doThrow) throw new Error(); return (double)a; }
 
   // This method produces a double-to-int Dex instruction.
-  static int $opt$DoubleToInt(double a){ return (int)a; }
+  static int $opt$DoubleToInt(double a){ if (doThrow) throw new Error(); return (int)a; }
 
   // This method produces a double-to-long Dex instruction.
-  static long $opt$DoubleToLong(double a){ return (long)a; }
+  static long $opt$DoubleToLong(double a){ if (doThrow) throw new Error(); return (long)a; }
 
   // This method produces a double-to-float Dex instruction.
-  static float $opt$DoubleToFloat(double a) { return (float)a; }
+  static float $opt$DoubleToFloat(double a) { if (doThrow) throw new Error(); return (float)a; }
 
   // These methods produce int-to-byte Dex instructions.
-  static byte $opt$ShortToByte(short a) { return (byte)a; }
-  static byte $opt$IntToByte(int a) { return (byte)a; }
-  static byte $opt$CharToByte(char a) { return (byte)a; }
+  static byte $opt$ShortToByte(short a) { if (doThrow) throw new Error(); return (byte)a; }
+  static byte $opt$IntToByte(int a) { if (doThrow) throw new Error(); return (byte)a; }
+  static byte $opt$CharToByte(char a) { if (doThrow) throw new Error(); return (byte)a; }
 
   // These methods produce int-to-short Dex instructions.
-  static short $opt$ByteToShort(byte a) { return (short)a; }
-  static short $opt$IntToShort(int a) { return (short)a; }
-  static short $opt$CharToShort(char a) { return (short)a; }
+  static short $opt$ByteToShort(byte a) { if (doThrow) throw new Error(); return (short)a; }
+  static short $opt$IntToShort(int a) { if (doThrow) throw new Error(); return (short)a; }
+  static short $opt$CharToShort(char a) { if (doThrow) throw new Error(); return (short)a; }
 
   // These methods produce int-to-char Dex instructions.
-  static char $opt$ByteToChar(byte a) { return (char)a; }
-  static char $opt$ShortToChar(short a) { return (char)a; }
-  static char $opt$IntToChar(int a) { return (char)a; }
+  static char $opt$ByteToChar(byte a) { if (doThrow) throw new Error(); return (char)a; }
+  static char $opt$ShortToChar(short a) { if (doThrow) throw new Error(); return (char)a; }
+  static char $opt$IntToChar(int a) { if (doThrow) throw new Error(); return (char)a; }
 }
diff --git a/test/431-optimizing-arith-shifts/src/Main.java b/test/431-optimizing-arith-shifts/src/Main.java
index d8667c6..86422bd 100644
--- a/test/431-optimizing-arith-shifts/src/Main.java
+++ b/test/431-optimizing-arith-shifts/src/Main.java
@@ -52,7 +52,7 @@
     expectEquals(Integer.MIN_VALUE, $opt$Shl(1073741824, 1));  // overflow
     expectEquals(1073741824, $opt$Shl(268435456, 2));
 
-   // othe nly 5 lower bits should be used for shifting (& 0x1f).
+    // Only the 5 lower bits should be used for shifting (& 0x1f).
     expectEquals(7, $opt$Shl(7, 32));  // 32 & 0x1f = 0
     expectEquals(14, $opt$Shl(7, 33));  // 33 & 0x1f = 1
     expectEquals(32, $opt$Shl(1, 101));  // 101 & 0x1f = 5
@@ -97,6 +97,13 @@
 
     expectEquals(Long.MIN_VALUE, $opt$Shl(7L, Long.MAX_VALUE));
     expectEquals(7L, $opt$Shl(7L, Long.MIN_VALUE));
+
+    // Exercise some special cases handled by backends/simplifier.
+    expectEquals(24L, $opt$ShlConst1(12L));
+    expectEquals(0x2345678900000000L, $opt$ShlConst32(0x123456789L));
+    expectEquals(0x2490249000000000L, $opt$ShlConst33(0x12481248L));
+    expectEquals(0x4920492000000000L, $opt$ShlConst34(0x12481248L));
+    expectEquals(0x9240924000000000L, $opt$ShlConst35(0x12481248L));
   }
 
   private static void shrInt() {
@@ -277,7 +284,7 @@
     return a >>> 2L;
   }
 
-    static int $opt$ShlConst0(int a) {
+  static int $opt$ShlConst0(int a) {
     return a << 0;
   }
 
@@ -301,5 +308,25 @@
     return a >>> 0L;
   }
 
+  static long $opt$ShlConst1(long a) {
+    return a << 1L;
+  }
+
+  static long $opt$ShlConst32(long a) {
+    return a << 32L;
+  }
+
+  static long $opt$ShlConst33(long a) {
+    return a << 33L;
+  }
+
+  static long $opt$ShlConst34(long a) {
+    return a << 34L;
+  }
+
+  static long $opt$ShlConst35(long a) {
+    return a << 35L;
+  }
+
 }
 
diff --git a/test/441-checker-inliner/src/Main.java b/test/441-checker-inliner/src/Main.java
index 631b140..8894d4e 100644
--- a/test/441-checker-inliner/src/Main.java
+++ b/test/441-checker-inliner/src/Main.java
@@ -17,9 +17,9 @@
 public class Main {
 
   // CHECK-START: void Main.InlineVoid() inliner (before)
-  // CHECK-DAG:     [[Const42:i\d+]] IntConstant 42
+  // CHECK-DAG:     <<Const42:i\d+>> IntConstant 42
   // CHECK-DAG:                      InvokeStaticOrDirect
-  // CHECK-DAG:                      InvokeStaticOrDirect [ [[Const42]] ]
+  // CHECK-DAG:                      InvokeStaticOrDirect [<<Const42>>]
 
   // CHECK-START: void Main.InlineVoid() inliner (after)
   // CHECK-NOT:                      InvokeStaticOrDirect
@@ -30,94 +30,94 @@
   }
 
   // CHECK-START: int Main.InlineParameter(int) inliner (before)
-  // CHECK-DAG:     [[Param:i\d+]]  ParameterValue
-  // CHECK-DAG:     [[Result:i\d+]] InvokeStaticOrDirect [ [[Param]] ]
-  // CHECK-DAG:                     Return [ [[Result]] ]
+  // CHECK-DAG:     <<Param:i\d+>>  ParameterValue
+  // CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect [<<Param>>]
+  // CHECK-DAG:                     Return [<<Result>>]
 
   // CHECK-START: int Main.InlineParameter(int) inliner (after)
-  // CHECK-DAG:     [[Param:i\d+]]  ParameterValue
-  // CHECK-DAG:                     Return [ [[Param]] ]
+  // CHECK-DAG:     <<Param:i\d+>>  ParameterValue
+  // CHECK-DAG:                     Return [<<Param>>]
 
   public static int InlineParameter(int a) {
     return returnParameter(a);
   }
 
   // CHECK-START: long Main.InlineWideParameter(long) inliner (before)
-  // CHECK-DAG:     [[Param:j\d+]]  ParameterValue
-  // CHECK-DAG:     [[Result:j\d+]] InvokeStaticOrDirect [ [[Param]] ]
-  // CHECK-DAG:                     Return [ [[Result]] ]
+  // CHECK-DAG:     <<Param:j\d+>>  ParameterValue
+  // CHECK-DAG:     <<Result:j\d+>> InvokeStaticOrDirect [<<Param>>]
+  // CHECK-DAG:                     Return [<<Result>>]
 
   // CHECK-START: long Main.InlineWideParameter(long) inliner (after)
-  // CHECK-DAG:     [[Param:j\d+]]  ParameterValue
-  // CHECK-DAG:                     Return [ [[Param]] ]
+  // CHECK-DAG:     <<Param:j\d+>>  ParameterValue
+  // CHECK-DAG:                     Return [<<Param>>]
 
   public static long InlineWideParameter(long a) {
     return returnWideParameter(a);
   }
 
   // CHECK-START: java.lang.Object Main.InlineReferenceParameter(java.lang.Object) inliner (before)
-  // CHECK-DAG:     [[Param:l\d+]]  ParameterValue
-  // CHECK-DAG:     [[Result:l\d+]] InvokeStaticOrDirect [ [[Param]] ]
-  // CHECK-DAG:                     Return [ [[Result]] ]
+  // CHECK-DAG:     <<Param:l\d+>>  ParameterValue
+  // CHECK-DAG:     <<Result:l\d+>> InvokeStaticOrDirect [<<Param>>]
+  // CHECK-DAG:                     Return [<<Result>>]
 
   // CHECK-START: java.lang.Object Main.InlineReferenceParameter(java.lang.Object) inliner (after)
-  // CHECK-DAG:     [[Param:l\d+]]  ParameterValue
-  // CHECK-DAG:                     Return [ [[Param]] ]
+  // CHECK-DAG:     <<Param:l\d+>>  ParameterValue
+  // CHECK-DAG:                     Return [<<Param>>]
 
   public static Object InlineReferenceParameter(Object o) {
     return returnReferenceParameter(o);
   }
 
   // CHECK-START: int Main.InlineInt() inliner (before)
-  // CHECK-DAG:     [[Result:i\d+]] InvokeStaticOrDirect
-  // CHECK-DAG:                     Return [ [[Result]] ]
+  // CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect
+  // CHECK-DAG:                     Return [<<Result>>]
 
   // CHECK-START: int Main.InlineInt() inliner (after)
-  // CHECK-DAG:     [[Const4:i\d+]] IntConstant 4
-  // CHECK-DAG:                     Return [ [[Const4]] ]
+  // CHECK-DAG:     <<Const4:i\d+>> IntConstant 4
+  // CHECK-DAG:                     Return [<<Const4>>]
 
   public static int InlineInt() {
     return returnInt();
   }
 
   // CHECK-START: long Main.InlineWide() inliner (before)
-  // CHECK-DAG:     [[Result:j\d+]] InvokeStaticOrDirect
-  // CHECK-DAG:                     Return [ [[Result]] ]
+  // CHECK-DAG:     <<Result:j\d+>> InvokeStaticOrDirect
+  // CHECK-DAG:                     Return [<<Result>>]
 
   // CHECK-START: long Main.InlineWide() inliner (after)
-  // CHECK-DAG:     [[Const8:j\d+]] LongConstant 8
-  // CHECK-DAG:                     Return [ [[Const8]] ]
+  // CHECK-DAG:     <<Const8:j\d+>> LongConstant 8
+  // CHECK-DAG:                     Return [<<Const8>>]
 
   public static long InlineWide() {
     return returnWide();
   }
 
   // CHECK-START: int Main.InlineAdd() inliner (before)
-  // CHECK-DAG:     [[Const3:i\d+]] IntConstant 3
-  // CHECK-DAG:     [[Const5:i\d+]] IntConstant 5
-  // CHECK-DAG:     [[Result:i\d+]] InvokeStaticOrDirect
-  // CHECK-DAG:                     Return [ [[Result]] ]
+  // CHECK-DAG:     <<Const3:i\d+>> IntConstant 3
+  // CHECK-DAG:     <<Const5:i\d+>> IntConstant 5
+  // CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect
+  // CHECK-DAG:                     Return [<<Result>>]
 
   // CHECK-START: int Main.InlineAdd() inliner (after)
-  // CHECK-DAG:     [[Const3:i\d+]] IntConstant 3
-  // CHECK-DAG:     [[Const5:i\d+]] IntConstant 5
-  // CHECK-DAG:     [[Add:i\d+]]    Add [ [[Const3]] [[Const5]] ]
-  // CHECK-DAG:                     Return [ [[Add]] ]
+  // CHECK-DAG:     <<Const3:i\d+>> IntConstant 3
+  // CHECK-DAG:     <<Const5:i\d+>> IntConstant 5
+  // CHECK-DAG:     <<Add:i\d+>>    Add [<<Const3>>,<<Const5>>]
+  // CHECK-DAG:                     Return [<<Add>>]
 
   public static int InlineAdd() {
     return returnAdd(3, 5);
   }
 
   // CHECK-START: int Main.InlineFieldAccess() inliner (before)
-  // CHECK-DAG:     [[After:i\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                     Return [ [[After]] ]
+  // CHECK-DAG:     <<After:i\d+>>  InvokeStaticOrDirect
+  // CHECK-DAG:                     Return [<<After>>]
 
   // CHECK-START: int Main.InlineFieldAccess() inliner (after)
-  // CHECK-DAG:     [[Const1:i\d+]] IntConstant 1
-  // CHECK-DAG:     [[Before:i\d+]] StaticFieldGet
-  // CHECK-DAG:     [[After:i\d+]]  Add [ [[Before]] [[Const1]] ]
-  // CHECK-DAG:                     StaticFieldSet [ {{l\d+}} [[After]] ]
-  // CHECK-DAG:                     Return [ [[After]] ]
+  // CHECK-DAG:     <<Const1:i\d+>> IntConstant 1
+  // CHECK-DAG:     <<Before:i\d+>> StaticFieldGet
+  // CHECK-DAG:     <<After:i\d+>>  Add [<<Before>>,<<Const1>>]
+  // CHECK-DAG:                     StaticFieldSet [{{l\d+}},<<After>>]
+  // CHECK-DAG:                     Return [<<After>>]
 
   // CHECK-START: int Main.InlineFieldAccess() inliner (after)
   // CHECK-NOT:                     InvokeStaticOrDirect
@@ -127,22 +127,22 @@
   }
 
   // CHECK-START: int Main.InlineWithControlFlow(boolean) inliner (before)
-  // CHECK-DAG:     [[Const1:i\d+]] IntConstant 1
-  // CHECK-DAG:     [[Const3:i\d+]] IntConstant 3
-  // CHECK-DAG:     [[Const5:i\d+]] IntConstant 5
-  // CHECK-DAG:     [[Add:i\d+]]    InvokeStaticOrDirect [ [[Const1]] [[Const3]] ]
-  // CHECK-DAG:     [[Sub:i\d+]]    InvokeStaticOrDirect [ [[Const5]] [[Const3]] ]
-  // CHECK-DAG:     [[Phi:i\d+]]    Phi [ [[Add]] [[Sub]] ]
-  // CHECK-DAG:                     Return [ [[Phi]] ]
+  // CHECK-DAG:     <<Const1:i\d+>> IntConstant 1
+  // CHECK-DAG:     <<Const3:i\d+>> IntConstant 3
+  // CHECK-DAG:     <<Const5:i\d+>> IntConstant 5
+  // CHECK-DAG:     <<Add:i\d+>>    InvokeStaticOrDirect [<<Const1>>,<<Const3>>]
+  // CHECK-DAG:     <<Sub:i\d+>>    InvokeStaticOrDirect [<<Const5>>,<<Const3>>]
+  // CHECK-DAG:     <<Phi:i\d+>>    Phi [<<Add>>,<<Sub>>]
+  // CHECK-DAG:                     Return [<<Phi>>]
 
   // CHECK-START: int Main.InlineWithControlFlow(boolean) inliner (after)
-  // CHECK-DAG:     [[Const1:i\d+]] IntConstant 1
-  // CHECK-DAG:     [[Const3:i\d+]] IntConstant 3
-  // CHECK-DAG:     [[Const5:i\d+]] IntConstant 5
-  // CHECK-DAG:     [[Add:i\d+]]    Add [ [[Const1]] [[Const3]] ]
-  // CHECK-DAG:     [[Sub:i\d+]]    Sub [ [[Const5]] [[Const3]] ]
-  // CHECK-DAG:     [[Phi:i\d+]]    Phi [ [[Add]] [[Sub]] ]
-  // CHECK-DAG:                     Return [ [[Phi]] ]
+  // CHECK-DAG:     <<Const1:i\d+>> IntConstant 1
+  // CHECK-DAG:     <<Const3:i\d+>> IntConstant 3
+  // CHECK-DAG:     <<Const5:i\d+>> IntConstant 5
+  // CHECK-DAG:     <<Add:i\d+>>    Add [<<Const1>>,<<Const3>>]
+  // CHECK-DAG:     <<Sub:i\d+>>    Sub [<<Const5>>,<<Const3>>]
+  // CHECK-DAG:     <<Phi:i\d+>>    Phi [<<Add>>,<<Sub>>]
+  // CHECK-DAG:                     Return [<<Phi>>]
 
   public static int InlineWithControlFlow(boolean cond) {
     int x, const1, const3, const5;
diff --git a/test/442-checker-constant-folding/src/Main.java b/test/442-checker-constant-folding/src/Main.java
index 6b21fed..c258db9 100644
--- a/test/442-checker-constant-folding/src/Main.java
+++ b/test/442-checker-constant-folding/src/Main.java
@@ -16,6 +16,12 @@
 
 public class Main {
 
+  public static void assertFalse(boolean condition) {
+    if (condition) {
+      throw new Error();
+    }
+  }
+
   public static void assertIntEquals(int expected, int result) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
@@ -28,19 +34,31 @@
     }
   }
 
+  public static void assertFloatEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertDoubleEquals(double expected, double result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
   /**
    * Tiny three-register program exercising int constant folding
    * on negation.
    */
 
   // CHECK-START: int Main.IntNegation() constant_folding (before)
-  // CHECK-DAG:     [[Const42:i\d+]]  IntConstant 42
-  // CHECK-DAG:     [[Neg:i\d+]]      Neg [ [[Const42]] ]
-  // CHECK-DAG:                       Return [ [[Neg]] ]
+  // CHECK-DAG:     <<Const42:i\d+>>  IntConstant 42
+  // CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Const42>>]
+  // CHECK-DAG:                       Return [<<Neg>>]
 
   // CHECK-START: int Main.IntNegation() constant_folding (after)
-  // CHECK-DAG:     [[ConstN42:i\d+]] IntConstant -42
-  // CHECK-DAG:                       Return [ [[ConstN42]] ]
+  // CHECK-DAG:     <<ConstN42:i\d+>> IntConstant -42
+  // CHECK-DAG:                       Return [<<ConstN42>>]
 
   public static int IntNegation() {
     int x, y;
@@ -55,14 +73,14 @@
    */
 
   // CHECK-START: int Main.IntAddition1() constant_folding (before)
-  // CHECK-DAG:     [[Const1:i\d+]]  IntConstant 1
-  // CHECK-DAG:     [[Const2:i\d+]]  IntConstant 2
-  // CHECK-DAG:     [[Add:i\d+]]     Add [ [[Const1]] [[Const2]] ]
-  // CHECK-DAG:                      Return [ [[Add]] ]
+  // CHECK-DAG:     <<Const1:i\d+>>  IntConstant 1
+  // CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
+  // CHECK-DAG:     <<Add:i\d+>>     Add [<<Const1>>,<<Const2>>]
+  // CHECK-DAG:                      Return [<<Add>>]
 
   // CHECK-START: int Main.IntAddition1() constant_folding (after)
-  // CHECK-DAG:     [[Const3:i\d+]]  IntConstant 3
-  // CHECK-DAG:                      Return [ [[Const3]] ]
+  // CHECK-DAG:     <<Const3:i\d+>>  IntConstant 3
+  // CHECK-DAG:                      Return [<<Const3>>]
 
   public static int IntAddition1() {
     int a, b, c;
@@ -78,18 +96,18 @@
   */
 
   // CHECK-START: int Main.IntAddition2() constant_folding (before)
-  // CHECK-DAG:     [[Const1:i\d+]]  IntConstant 1
-  // CHECK-DAG:     [[Const2:i\d+]]  IntConstant 2
-  // CHECK-DAG:     [[Const5:i\d+]]  IntConstant 5
-  // CHECK-DAG:     [[Const6:i\d+]]  IntConstant 6
-  // CHECK-DAG:     [[Add1:i\d+]]    Add [ [[Const1]] [[Const2]] ]
-  // CHECK-DAG:     [[Add2:i\d+]]    Add [ [[Const5]] [[Const6]] ]
-  // CHECK-DAG:     [[Add3:i\d+]]    Add [ [[Add1]] [[Add2]] ]
-  // CHECK-DAG:                      Return [ [[Add3]] ]
+  // CHECK-DAG:     <<Const1:i\d+>>  IntConstant 1
+  // CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
+  // CHECK-DAG:     <<Const5:i\d+>>  IntConstant 5
+  // CHECK-DAG:     <<Const6:i\d+>>  IntConstant 6
+  // CHECK-DAG:     <<Add1:i\d+>>    Add [<<Const1>>,<<Const2>>]
+  // CHECK-DAG:     <<Add2:i\d+>>    Add [<<Const5>>,<<Const6>>]
+  // CHECK-DAG:     <<Add3:i\d+>>    Add [<<Add1>>,<<Add2>>]
+  // CHECK-DAG:                      Return [<<Add3>>]
 
   // CHECK-START: int Main.IntAddition2() constant_folding (after)
-  // CHECK-DAG:     [[Const14:i\d+]] IntConstant 14
-  // CHECK-DAG:                      Return [ [[Const14]] ]
+  // CHECK-DAG:     <<Const14:i\d+>> IntConstant 14
+  // CHECK-DAG:                      Return [<<Const14>>]
 
   public static int IntAddition2() {
     int a, b, c;
@@ -109,14 +127,14 @@
    */
 
   // CHECK-START: int Main.IntSubtraction() constant_folding (before)
-  // CHECK-DAG:     [[Const6:i\d+]]  IntConstant 6
-  // CHECK-DAG:     [[Const2:i\d+]]  IntConstant 2
-  // CHECK-DAG:     [[Sub:i\d+]]     Sub [ [[Const6]] [[Const2]] ]
-  // CHECK-DAG:                      Return [ [[Sub]] ]
+  // CHECK-DAG:     <<Const6:i\d+>>  IntConstant 6
+  // CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
+  // CHECK-DAG:     <<Sub:i\d+>>     Sub [<<Const6>>,<<Const2>>]
+  // CHECK-DAG:                      Return [<<Sub>>]
 
   // CHECK-START: int Main.IntSubtraction() constant_folding (after)
-  // CHECK-DAG:     [[Const4:i\d+]]  IntConstant 4
-  // CHECK-DAG:                      Return [ [[Const4]] ]
+  // CHECK-DAG:     <<Const4:i\d+>>  IntConstant 4
+  // CHECK-DAG:                      Return [<<Const4>>]
 
   public static int IntSubtraction() {
     int a, b, c;
@@ -132,14 +150,14 @@
    */
 
   // CHECK-START: long Main.LongAddition() constant_folding (before)
-  // CHECK-DAG:     [[Const1:j\d+]]  LongConstant 1
-  // CHECK-DAG:     [[Const2:j\d+]]  LongConstant 2
-  // CHECK-DAG:     [[Add:j\d+]]     Add [ [[Const1]] [[Const2]] ]
-  // CHECK-DAG:                      Return [ [[Add]] ]
+  // CHECK-DAG:     <<Const1:j\d+>>  LongConstant 1
+  // CHECK-DAG:     <<Const2:j\d+>>  LongConstant 2
+  // CHECK-DAG:     <<Add:j\d+>>     Add [<<Const1>>,<<Const2>>]
+  // CHECK-DAG:                      Return [<<Add>>]
 
   // CHECK-START: long Main.LongAddition() constant_folding (after)
-  // CHECK-DAG:     [[Const3:j\d+]]  LongConstant 3
-  // CHECK-DAG:                      Return [ [[Const3]] ]
+  // CHECK-DAG:     <<Const3:j\d+>>  LongConstant 3
+  // CHECK-DAG:                      Return [<<Const3>>]
 
   public static long LongAddition() {
     long a, b, c;
@@ -155,14 +173,14 @@
    */
 
   // CHECK-START: long Main.LongSubtraction() constant_folding (before)
-  // CHECK-DAG:     [[Const6:j\d+]]  LongConstant 6
-  // CHECK-DAG:     [[Const2:j\d+]]  LongConstant 2
-  // CHECK-DAG:     [[Sub:j\d+]]     Sub [ [[Const6]] [[Const2]] ]
-  // CHECK-DAG:                      Return [ [[Sub]] ]
+  // CHECK-DAG:     <<Const6:j\d+>>  LongConstant 6
+  // CHECK-DAG:     <<Const2:j\d+>>  LongConstant 2
+  // CHECK-DAG:     <<Sub:j\d+>>     Sub [<<Const6>>,<<Const2>>]
+  // CHECK-DAG:                      Return [<<Sub>>]
 
   // CHECK-START: long Main.LongSubtraction() constant_folding (after)
-  // CHECK-DAG:     [[Const4:j\d+]]  LongConstant 4
-  // CHECK-DAG:                      Return [ [[Const4]] ]
+  // CHECK-DAG:     <<Const4:j\d+>>  LongConstant 4
+  // CHECK-DAG:                      Return [<<Const4>>]
 
   public static long LongSubtraction() {
     long a, b, c;
@@ -177,14 +195,14 @@
    */
 
   // CHECK-START: int Main.StaticCondition() constant_folding (before)
-  // CHECK-DAG:     [[Const7:i\d+]]  IntConstant 7
-  // CHECK-DAG:     [[Const2:i\d+]]  IntConstant 2
-  // CHECK-DAG:     [[Cond:z\d+]]    GreaterThanOrEqual [ [[Const7]] [[Const2]] ]
-  // CHECK-DAG:                      If [ [[Cond]] ]
+  // CHECK-DAG:     <<Const7:i\d+>>  IntConstant 7
+  // CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
+  // CHECK-DAG:     <<Cond:z\d+>>    GreaterThanOrEqual [<<Const7>>,<<Const2>>]
+  // CHECK-DAG:                      If [<<Cond>>]
 
   // CHECK-START: int Main.StaticCondition() constant_folding (after)
-  // CHECK-DAG:     [[Const1:i\d+]]  IntConstant 1
-  // CHECK-DAG:                      If [ [[Const1]] ]
+  // CHECK-DAG:     <<Const1:i\d+>>  IntConstant 1
+  // CHECK-DAG:                      If [<<Const1>>]
 
   public static int StaticCondition() {
     int a, b, c;
@@ -207,18 +225,18 @@
    */
 
   // CHECK-START: int Main.JumpsAndConditionals(boolean) constant_folding (before)
-  // CHECK-DAG:     [[Const2:i\d+]]  IntConstant 2
-  // CHECK-DAG:     [[Const5:i\d+]]  IntConstant 5
-  // CHECK-DAG:     [[Add:i\d+]]     Add [ [[Const5]] [[Const2]] ]
-  // CHECK-DAG:     [[Sub:i\d+]]     Sub [ [[Const5]] [[Const2]] ]
-  // CHECK-DAG:     [[Phi:i\d+]]     Phi [ [[Add]] [[Sub]] ]
-  // CHECK-DAG:                      Return [ [[Phi]] ]
+  // CHECK-DAG:     <<Const2:i\d+>>  IntConstant 2
+  // CHECK-DAG:     <<Const5:i\d+>>  IntConstant 5
+  // CHECK-DAG:     <<Add:i\d+>>     Add [<<Const5>>,<<Const2>>]
+  // CHECK-DAG:     <<Sub:i\d+>>     Sub [<<Const5>>,<<Const2>>]
+  // CHECK-DAG:     <<Phi:i\d+>>     Phi [<<Add>>,<<Sub>>]
+  // CHECK-DAG:                      Return [<<Phi>>]
 
   // CHECK-START: int Main.JumpsAndConditionals(boolean) constant_folding (after)
-  // CHECK-DAG:     [[Const3:i\d+]]  IntConstant 3
-  // CHECK-DAG:     [[Const7:i\d+]]  IntConstant 7
-  // CHECK-DAG:     [[Phi:i\d+]]     Phi [ [[Const7]] [[Const3]] ]
-  // CHECK-DAG:                      Return [ [[Phi]] ]
+  // CHECK-DAG:     <<Const3:i\d+>>  IntConstant 3
+  // CHECK-DAG:     <<Const7:i\d+>>  IntConstant 7
+  // CHECK-DAG:     <<Phi:i\d+>>     Phi [<<Const7>>,<<Const3>>]
+  // CHECK-DAG:                      Return [<<Phi>>]
 
   public static int JumpsAndConditionals(boolean cond) {
     int a, b, c;
@@ -236,177 +254,393 @@
    */
 
   // CHECK-START: int Main.And0(int) constant_folding (before)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
-  // CHECK-DAG:     [[And:i\d+]]      And [ [[Arg]] [[Const0]] ]
-  // CHECK-DAG:                       Return [ [[And]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  // CHECK-DAG:     <<And:i\d+>>      And [<<Arg>>,<<Const0>>]
+  // CHECK-DAG:                       Return [<<And>>]
 
   // CHECK-START: int Main.And0(int) constant_folding (after)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   // CHECK-NOT:                       And
-  // CHECK-DAG:                       Return [ [[Const0]] ]
+  // CHECK-DAG:                       Return [<<Const0>>]
 
   public static int And0(int arg) {
     return arg & 0;
   }
 
   // CHECK-START: long Main.Mul0(long) constant_folding (before)
-  // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:j\d+]]   LongConstant 0
-  // CHECK-DAG:     [[Mul:j\d+]]      Mul [ [[Arg]] [[Const0]] ]
-  // CHECK-DAG:                       Return [ [[Mul]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:j\d+>>   LongConstant 0
+  // CHECK-DAG:     <<Mul:j\d+>>      Mul [<<Arg>>,<<Const0>>]
+  // CHECK-DAG:                       Return [<<Mul>>]
 
   // CHECK-START: long Main.Mul0(long) constant_folding (after)
-  // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:j\d+]]   LongConstant 0
+  // CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:j\d+>>   LongConstant 0
   // CHECK-NOT:                       Mul
-  // CHECK-DAG:                       Return [ [[Const0]] ]
+  // CHECK-DAG:                       Return [<<Const0>>]
 
   public static long Mul0(long arg) {
     return arg * 0;
   }
 
   // CHECK-START: int Main.OrAllOnes(int) constant_folding (before)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[ConstF:i\d+]]   IntConstant -1
-  // CHECK-DAG:     [[Or:i\d+]]       Or [ [[Arg]] [[ConstF]] ]
-  // CHECK-DAG:                       Return [ [[Or]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<ConstF:i\d+>>   IntConstant -1
+  // CHECK-DAG:     <<Or:i\d+>>       Or [<<Arg>>,<<ConstF>>]
+  // CHECK-DAG:                       Return [<<Or>>]
 
   // CHECK-START: int Main.OrAllOnes(int) constant_folding (after)
-  // CHECK-DAG:     [[ConstF:i\d+]]   IntConstant -1
+  // CHECK-DAG:     <<ConstF:i\d+>>   IntConstant -1
   // CHECK-NOT:                       Or
-  // CHECK-DAG:                       Return [ [[ConstF]] ]
+  // CHECK-DAG:                       Return [<<ConstF>>]
 
   public static int OrAllOnes(int arg) {
     return arg | -1;
   }
 
   // CHECK-START: long Main.Rem0(long) constant_folding (before)
-  // CHECK-DAG:     [[Arg:j\d+]]           ParameterValue
-  // CHECK-DAG:     [[Const0:j\d+]]        LongConstant 0
-  // CHECK-DAG:     [[DivZeroCheck:j\d+]]  DivZeroCheck [ [[Arg]] ]
-  // CHECK-DAG:     [[Rem:j\d+]]           Rem [ [[Const0]] [[DivZeroCheck]] ]
-  // CHECK-DAG:                            Return [ [[Rem]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>           ParameterValue
+  // CHECK-DAG:     <<Const0:j\d+>>        LongConstant 0
+  // CHECK-DAG:     <<DivZeroCheck:j\d+>>  DivZeroCheck [<<Arg>>]
+  // CHECK-DAG:     <<Rem:j\d+>>           Rem [<<Const0>>,<<DivZeroCheck>>]
+  // CHECK-DAG:                            Return [<<Rem>>]
 
   // CHECK-START: long Main.Rem0(long) constant_folding (after)
-  // CHECK-DAG:     [[Const0:j\d+]]        LongConstant 0
+  // CHECK-DAG:     <<Const0:j\d+>>        LongConstant 0
   // CHECK-NOT:                            Rem
-  // CHECK-DAG:                            Return [ [[Const0]] ]
+  // CHECK-DAG:                            Return [<<Const0>>]
 
   public static long Rem0(long arg) {
     return 0 % arg;
   }
 
   // CHECK-START: int Main.Rem1(int) constant_folding (before)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
-  // CHECK-DAG:     [[Rem:i\d+]]      Rem [ [[Arg]] [[Const1]] ]
-  // CHECK-DAG:                       Return [ [[Rem]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+  // CHECK-DAG:     <<Rem:i\d+>>      Rem [<<Arg>>,<<Const1>>]
+  // CHECK-DAG:                       Return [<<Rem>>]
 
   // CHECK-START: int Main.Rem1(int) constant_folding (after)
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   // CHECK-NOT:                       Rem
-  // CHECK-DAG:                       Return [ [[Const0]] ]
+  // CHECK-DAG:                       Return [<<Const0>>]
 
   public static int Rem1(int arg) {
     return arg % 1;
   }
 
   // CHECK-START: long Main.RemN1(long) constant_folding (before)
-  // CHECK-DAG:     [[Arg:j\d+]]           ParameterValue
-  // CHECK-DAG:     [[ConstN1:j\d+]]       LongConstant -1
-  // CHECK-DAG:     [[DivZeroCheck:j\d+]]  DivZeroCheck [ [[Arg]] ]
-  // CHECK-DAG:     [[Rem:j\d+]]           Rem [ [[Arg]] [[DivZeroCheck]] ]
-  // CHECK-DAG:                            Return [ [[Rem]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>           ParameterValue
+  // CHECK-DAG:     <<ConstN1:j\d+>>       LongConstant -1
+  // CHECK-DAG:     <<DivZeroCheck:j\d+>>  DivZeroCheck [<<ConstN1>>]
+  // CHECK-DAG:     <<Rem:j\d+>>           Rem [<<Arg>>,<<DivZeroCheck>>]
+  // CHECK-DAG:                            Return [<<Rem>>]
 
   // CHECK-START: long Main.RemN1(long) constant_folding (after)
-  // CHECK-DAG:     [[Const0:j\d+]]        LongConstant 0
+  // CHECK-DAG:     <<Const0:j\d+>>        LongConstant 0
   // CHECK-NOT:                            Rem
-  // CHECK-DAG:                            Return [ [[Const0]] ]
+  // CHECK-DAG:                            Return [<<Const0>>]
 
   public static long RemN1(long arg) {
     return arg % -1;
   }
 
   // CHECK-START: int Main.Shl0(int) constant_folding (before)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
-  // CHECK-DAG:     [[Shl:i\d+]]      Shl [ [[Const0]] [[Arg]] ]
-  // CHECK-DAG:                       Return [ [[Shl]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  // CHECK-DAG:     <<Shl:i\d+>>      Shl [<<Const0>>,<<Arg>>]
+  // CHECK-DAG:                       Return [<<Shl>>]
 
   // CHECK-START: int Main.Shl0(int) constant_folding (after)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   // CHECK-NOT:                       Shl
-  // CHECK-DAG:                       Return [ [[Const0]] ]
+  // CHECK-DAG:                       Return [<<Const0>>]
 
   public static int Shl0(int arg) {
     return 0 << arg;
   }
 
   // CHECK-START: long Main.Shr0(int) constant_folding (before)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:j\d+]]   LongConstant 0
-  // CHECK-DAG:     [[Shr:j\d+]]      Shr [ [[Const0]] [[Arg]] ]
-  // CHECK-DAG:                       Return [ [[Shr]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:j\d+>>   LongConstant 0
+  // CHECK-DAG:     <<Shr:j\d+>>      Shr [<<Const0>>,<<Arg>>]
+  // CHECK-DAG:                       Return [<<Shr>>]
 
   // CHECK-START: long Main.Shr0(int) constant_folding (after)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:j\d+]]   LongConstant 0
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:j\d+>>   LongConstant 0
   // CHECK-NOT:                       Shr
-  // CHECK-DAG:                       Return [ [[Const0]] ]
+  // CHECK-DAG:                       Return [<<Const0>>]
 
   public static long Shr0(int arg) {
     return (long)0 >> arg;
   }
 
   // CHECK-START: long Main.SubSameLong(long) constant_folding (before)
-  // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
-  // CHECK-DAG:     [[Sub:j\d+]]      Sub [ [[Arg]] [[Arg]] ]
-  // CHECK-DAG:                       Return [ [[Sub]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  // CHECK-DAG:     <<Sub:j\d+>>      Sub [<<Arg>>,<<Arg>>]
+  // CHECK-DAG:                       Return [<<Sub>>]
 
   // CHECK-START: long Main.SubSameLong(long) constant_folding (after)
-  // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:j\d+]]   LongConstant 0
+  // CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:j\d+>>   LongConstant 0
   // CHECK-NOT:                       Sub
-  // CHECK-DAG:                       Return [ [[Const0]] ]
+  // CHECK-DAG:                       Return [<<Const0>>]
 
   public static long SubSameLong(long arg) {
     return arg - arg;
   }
 
   // CHECK-START: int Main.UShr0(int) constant_folding (before)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
-  // CHECK-DAG:     [[UShr:i\d+]]     UShr [ [[Const0]] [[Arg]] ]
-  // CHECK-DAG:                       Return [ [[UShr]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  // CHECK-DAG:     <<UShr:i\d+>>     UShr [<<Const0>>,<<Arg>>]
+  // CHECK-DAG:                       Return [<<UShr>>]
 
   // CHECK-START: int Main.UShr0(int) constant_folding (after)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   // CHECK-NOT:                       UShr
-  // CHECK-DAG:                       Return [ [[Const0]] ]
+  // CHECK-DAG:                       Return [<<Const0>>]
 
   public static int UShr0(int arg) {
     return 0 >>> arg;
   }
 
   // CHECK-START: int Main.XorSameInt(int) constant_folding (before)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[Xor:i\d+]]      Xor [ [[Arg]] [[Arg]] ]
-  // CHECK-DAG:                       Return [ [[Xor]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Xor:i\d+>>      Xor [<<Arg>>,<<Arg>>]
+  // CHECK-DAG:                       Return [<<Xor>>]
 
   // CHECK-START: int Main.XorSameInt(int) constant_folding (after)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   // CHECK-NOT:                       Xor
-  // CHECK-DAG:                       Return [ [[Const0]] ]
+  // CHECK-DAG:                       Return [<<Const0>>]
 
   public static int XorSameInt(int arg) {
     return arg ^ arg;
   }
 
+  // CHECK-START: boolean Main.CmpFloatGreaterThanNaN(float) constant_folding (before)
+  // CHECK-DAG:     <<Arg:f\d+>>      ParameterValue
+  // CHECK-DAG:     <<ConstNan:f\d+>> FloatConstant nan
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  // CHECK-DAG:                       IntConstant 1
+  // CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Arg>>,<<ConstNan>>]
+  // CHECK-DAG:     <<Le:z\d+>>       LessThanOrEqual [<<Cmp>>,<<Const0>>]
+  // CHECK-DAG:                       If [<<Le>>]
+
+  // CHECK-START: boolean Main.CmpFloatGreaterThanNaN(float) constant_folding (after)
+  // CHECK-DAG:                       ParameterValue
+  // CHECK-DAG:                       FloatConstant nan
+  // CHECK-DAG:                       IntConstant 0
+  // CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+  // CHECK-DAG:                       If [<<Const1>>]
+
+  // CHECK-START: boolean Main.CmpFloatGreaterThanNaN(float) constant_folding (after)
+  // CHECK-NOT:                       Compare
+  // CHECK-NOT:                       LessThanOrEqual
+
+  public static boolean CmpFloatGreaterThanNaN(float arg) {
+    return arg > Float.NaN;
+  }
+
+  // CHECK-START: boolean Main.CmpDoubleLessThanNaN(double) constant_folding (before)
+  // CHECK-DAG:     <<Arg:d\d+>>      ParameterValue
+  // CHECK-DAG:     <<ConstNan:d\d+>> DoubleConstant nan
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  // CHECK-DAG:                       IntConstant 1
+  // CHECK-DAG:     <<Cmp:i\d+>>      Compare [<<Arg>>,<<ConstNan>>]
+  // CHECK-DAG:     <<Ge:z\d+>>       GreaterThanOrEqual [<<Cmp>>,<<Const0>>]
+  // CHECK-DAG:                       If [<<Ge>>]
+
+  // CHECK-START: boolean Main.CmpDoubleLessThanNaN(double) constant_folding (after)
+  // CHECK-DAG:                       ParameterValue
+  // CHECK-DAG:                       DoubleConstant nan
+  // CHECK-DAG:                       IntConstant 0
+  // CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+  // CHECK-DAG:                       If [<<Const1>>]
+
+  // CHECK-START: boolean Main.CmpDoubleLessThanNaN(double) constant_folding (after)
+  // CHECK-NOT:                       Compare
+  // CHECK-NOT:                       GreaterThanOrEqual
+
+  public static boolean CmpDoubleLessThanNaN(double arg) {
+    return arg < Double.NaN;
+  }
+
+  // CHECK-START: int Main.ReturnInt33() constant_folding (before)
+  // CHECK-DAG:     <<Const33:j\d+>>  LongConstant 33
+  // CHECK-DAG:     <<Convert:i\d+>>  TypeConversion [<<Const33>>]
+  // CHECK-DAG:                       Return [<<Convert>>]
+
+  // CHECK-START: int Main.ReturnInt33() constant_folding (after)
+  // CHECK-DAG:     <<Const33:i\d+>>  IntConstant 33
+  // CHECK-DAG:                       Return [<<Const33>>]
+
+  public static int ReturnInt33() {
+    long imm = 33L;
+    return (int) imm;
+  }
+
+  // CHECK-START: int Main.ReturnIntMax() constant_folding (before)
+  // CHECK-DAG:     <<ConstMax:f\d+>> FloatConstant 1e+34
+  // CHECK-DAG:     <<Convert:i\d+>>  TypeConversion [<<ConstMax>>]
+  // CHECK-DAG:                       Return [<<Convert>>]
+
+  // CHECK-START: int Main.ReturnIntMax() constant_folding (after)
+  // CHECK-DAG:     <<ConstMax:i\d+>> IntConstant 2147483647
+  // CHECK-DAG:                       Return [<<ConstMax>>]
+
+  public static int ReturnIntMax() {
+    float imm = 1.0e34f;
+    return (int) imm;
+  }
+
+  // CHECK-START: int Main.ReturnInt0() constant_folding (before)
+  // CHECK-DAG:     <<ConstNaN:d\d+>> DoubleConstant nan
+  // CHECK-DAG:     <<Convert:i\d+>>  TypeConversion [<<ConstNaN>>]
+  // CHECK-DAG:                       Return [<<Convert>>]
+
+  // CHECK-START: int Main.ReturnInt0() constant_folding (after)
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  // CHECK-DAG:                       Return [<<Const0>>]
+
+  public static int ReturnInt0() {
+    double imm = Double.NaN;
+    return (int) imm;
+  }
+
+  // CHECK-START: long Main.ReturnLong33() constant_folding (before)
+  // CHECK-DAG:     <<Const33:i\d+>>  IntConstant 33
+  // CHECK-DAG:     <<Convert:j\d+>>  TypeConversion [<<Const33>>]
+  // CHECK-DAG:                       Return [<<Convert>>]
+
+  // CHECK-START: long Main.ReturnLong33() constant_folding (after)
+  // CHECK-DAG:     <<Const33:j\d+>>  LongConstant 33
+  // CHECK-DAG:                       Return [<<Const33>>]
+
+  public static long ReturnLong33() {
+    int imm = 33;
+    return (long) imm;
+  }
+
+  // CHECK-START: long Main.ReturnLong34() constant_folding (before)
+  // CHECK-DAG:     <<Const34:f\d+>>  FloatConstant 34
+  // CHECK-DAG:     <<Convert:j\d+>>  TypeConversion [<<Const34>>]
+  // CHECK-DAG:                       Return [<<Convert>>]
+
+  // CHECK-START: long Main.ReturnLong34() constant_folding (after)
+  // CHECK-DAG:     <<Const34:j\d+>>  LongConstant 34
+  // CHECK-DAG:                       Return [<<Const34>>]
+
+  public static long ReturnLong34() {
+    float imm = 34.0f;
+    return (long) imm;
+  }
+
+  // CHECK-START: long Main.ReturnLong0() constant_folding (before)
+  // CHECK-DAG:     <<ConstNaN:d\d+>> DoubleConstant nan
+  // CHECK-DAG:     <<Convert:j\d+>>  TypeConversion [<<ConstNaN>>]
+  // CHECK-DAG:                       Return [<<Convert>>]
+
+  // CHECK-START: long Main.ReturnLong0() constant_folding (after)
+  // CHECK-DAG:     <<Const0:j\d+>>   LongConstant 0
+  // CHECK-DAG:                       Return [<<Const0>>]
+
+  public static long ReturnLong0() {
+    double imm = -Double.NaN;
+    return (long) imm;
+  }
+
+  // CHECK-START: float Main.ReturnFloat33() constant_folding (before)
+  // CHECK-DAG:     <<Const33:i\d+>>  IntConstant 33
+  // CHECK-DAG:     <<Convert:f\d+>>  TypeConversion [<<Const33>>]
+  // CHECK-DAG:                       Return [<<Convert>>]
+
+  // CHECK-START: float Main.ReturnFloat33() constant_folding (after)
+  // CHECK-DAG:     <<Const33:f\d+>>  FloatConstant 33
+  // CHECK-DAG:                       Return [<<Const33>>]
+
+  public static float ReturnFloat33() {
+    int imm = 33;
+    return (float) imm;
+  }
+
+  // CHECK-START: float Main.ReturnFloat34() constant_folding (before)
+  // CHECK-DAG:     <<Const34:j\d+>>  LongConstant 34
+  // CHECK-DAG:     <<Convert:f\d+>>  TypeConversion [<<Const34>>]
+  // CHECK-DAG:                       Return [<<Convert>>]
+
+  // CHECK-START: float Main.ReturnFloat34() constant_folding (after)
+  // CHECK-DAG:     <<Const34:f\d+>>  FloatConstant 34
+  // CHECK-DAG:                       Return [<<Const34>>]
+
+  public static float ReturnFloat34() {
+    long imm = 34L;
+    return (float) imm;
+  }
+
+  // CHECK-START: float Main.ReturnFloat99P25() constant_folding (before)
+  // CHECK-DAG:     <<Const:d\d+>>    DoubleConstant 99.25
+  // CHECK-DAG:     <<Convert:f\d+>>  TypeConversion [<<Const>>]
+  // CHECK-DAG:                       Return [<<Convert>>]
+
+  // CHECK-START: float Main.ReturnFloat99P25() constant_folding (after)
+  // CHECK-DAG:     <<Const:f\d+>>    FloatConstant 99.25
+  // CHECK-DAG:                       Return [<<Const>>]
+
+  public static float ReturnFloat99P25() {
+    double imm = 99.25;
+    return (float) imm;
+  }
+
+  // CHECK-START: double Main.ReturnDouble33() constant_folding (before)
+  // CHECK-DAG:     <<Const33:i\d+>>  IntConstant 33
+  // CHECK-DAG:     <<Convert:d\d+>>  TypeConversion [<<Const33>>]
+  // CHECK-DAG:                       Return [<<Convert>>]
+
+  // CHECK-START: double Main.ReturnDouble33() constant_folding (after)
+  // CHECK-DAG:     <<Const33:d\d+>>  DoubleConstant 33
+  // CHECK-DAG:                       Return [<<Const33>>]
+
+  public static double ReturnDouble33() {
+    int imm = 33;
+    return (double) imm;
+  }
+
+  // CHECK-START: double Main.ReturnDouble34() constant_folding (before)
+  // CHECK-DAG:     <<Const34:j\d+>>  LongConstant 34
+  // CHECK-DAG:     <<Convert:d\d+>>  TypeConversion [<<Const34>>]
+  // CHECK-DAG:                       Return [<<Convert>>]
+
+  // CHECK-START: double Main.ReturnDouble34() constant_folding (after)
+  // CHECK-DAG:     <<Const34:d\d+>>  DoubleConstant 34
+  // CHECK-DAG:                       Return [<<Const34>>]
+
+  public static double ReturnDouble34() {
+    long imm = 34L;
+    return (double) imm;
+  }
+
+  // CHECK-START: double Main.ReturnDouble99P25() constant_folding (before)
+  // CHECK-DAG:     <<Const:f\d+>>    FloatConstant 99.25
+  // CHECK-DAG:     <<Convert:d\d+>>  TypeConversion [<<Const>>]
+  // CHECK-DAG:                       Return [<<Convert>>]
+
+  // CHECK-START: double Main.ReturnDouble99P25() constant_folding (after)
+  // CHECK-DAG:     <<Const:d\d+>>    DoubleConstant 99.25
+  // CHECK-DAG:                       Return [<<Const>>]
+
+  public static double ReturnDouble99P25() {
+    float imm = 99.25f;
+    return (double) imm;
+  }
+
   public static void main(String[] args) {
     assertIntEquals(IntNegation(), -42);
     assertIntEquals(IntAddition1(), 3);
@@ -417,17 +651,31 @@
     assertIntEquals(StaticCondition(), 5);
     assertIntEquals(JumpsAndConditionals(true), 7);
     assertIntEquals(JumpsAndConditionals(false), 3);
-    int random = 123456;  // Chosen randomly.
-    assertIntEquals(And0(random), 0);
-    assertLongEquals(Mul0(random), 0);
-    assertIntEquals(OrAllOnes(random), -1);
-    assertLongEquals(Rem0(random), 0);
-    assertIntEquals(Rem1(random), 0);
-    assertLongEquals(RemN1(random), 0);
-    assertIntEquals(Shl0(random), 0);
-    assertLongEquals(Shr0(random), 0);
-    assertLongEquals(SubSameLong(random), 0);
-    assertIntEquals(UShr0(random), 0);
-    assertIntEquals(XorSameInt(random), 0);
+    int arbitrary = 123456;  // Value chosen arbitrarily.
+    assertIntEquals(And0(arbitrary), 0);
+    assertLongEquals(Mul0(arbitrary), 0);
+    assertIntEquals(OrAllOnes(arbitrary), -1);
+    assertLongEquals(Rem0(arbitrary), 0);
+    assertIntEquals(Rem1(arbitrary), 0);
+    assertLongEquals(RemN1(arbitrary), 0);
+    assertIntEquals(Shl0(arbitrary), 0);
+    assertLongEquals(Shr0(arbitrary), 0);
+    assertLongEquals(SubSameLong(arbitrary), 0);
+    assertIntEquals(UShr0(arbitrary), 0);
+    assertIntEquals(XorSameInt(arbitrary), 0);
+    assertFalse(CmpFloatGreaterThanNaN(arbitrary));
+    assertFalse(CmpDoubleLessThanNaN(arbitrary));
+    assertIntEquals(ReturnInt33(), 33);
+    assertIntEquals(ReturnIntMax(), 2147483647);
+    assertIntEquals(ReturnInt0(), 0);
+    assertLongEquals(ReturnLong33(), 33);
+    assertLongEquals(ReturnLong34(), 34);
+    assertLongEquals(ReturnLong0(), 0);
+    assertFloatEquals(ReturnFloat33(), 33);
+    assertFloatEquals(ReturnFloat34(), 34);
+    assertFloatEquals(ReturnFloat99P25(), 99.25f);
+    assertDoubleEquals(ReturnDouble33(), 33);
+    assertDoubleEquals(ReturnDouble34(), 34);
+    assertDoubleEquals(ReturnDouble99P25(), 99.25);
   }
 }
diff --git a/test/445-checker-licm/src/Main.java b/test/445-checker-licm/src/Main.java
index 91ac2ed..96918d3 100644
--- a/test/445-checker-licm/src/Main.java
+++ b/test/445-checker-licm/src/Main.java
@@ -17,13 +17,13 @@
 public class Main {
 
   // CHECK-START: int Main.div() licm (before)
-  // CHECK-DAG: Div ( loop_header:{{B\d+}} )
+  // CHECK-DAG: Div loop:{{B\d+}}
 
   // CHECK-START: int Main.div() licm (after)
-  // CHECK-NOT: Div ( loop_header:{{B\d+}} )
+  // CHECK-NOT: Div loop:{{B\d+}}
 
   // CHECK-START: int Main.div() licm (after)
-  // CHECK-DAG: Div ( loop_header:null )
+  // CHECK-DAG: Div loop:none
 
   public static int div() {
     int result = 0;
@@ -34,13 +34,13 @@
   }
 
   // CHECK-START: int Main.innerDiv() licm (before)
-  // CHECK-DAG: Div ( loop_header:{{B\d+}} )
+  // CHECK-DAG: Div loop:{{B\d+}}
 
   // CHECK-START: int Main.innerDiv() licm (after)
-  // CHECK-NOT: Div ( loop_header:{{B\d+}} )
+  // CHECK-NOT: Div loop:{{B\d+}}
 
   // CHECK-START: int Main.innerDiv() licm (after)
-  // CHECK-DAG: Div ( loop_header:null )
+  // CHECK-DAG: Div loop:none
 
   public static int innerDiv() {
     int result = 0;
@@ -53,10 +53,10 @@
   }
 
   // CHECK-START: int Main.innerDiv2() licm (before)
-  // CHECK-DAG: Mul ( loop_header:{{B4}} )
+  // CHECK-DAG: Mul loop:B4
 
   // CHECK-START: int Main.innerDiv2() licm (after)
-  // CHECK-DAG: Mul ( loop_header:{{B2}} )
+  // CHECK-DAG: Mul loop:B2
 
   public static int innerDiv2() {
     int result = 0;
@@ -72,10 +72,10 @@
   }
 
   // CHECK-START: int Main.innerDiv3(int, int) licm (before)
-  // CHECK-DAG: Div ( loop_header:{{B\d+}} )
+  // CHECK-DAG: Div loop:{{B\d+}}
 
   // CHECK-START: int Main.innerDiv3(int, int) licm (after)
-  // CHECK-DAG: Div ( loop_header:{{B\d+}} )
+  // CHECK-DAG: Div loop:{{B\d+}}
 
   public static int innerDiv3(int a, int b) {
     int result = 0;
@@ -88,16 +88,16 @@
   }
 
   // CHECK-START: int Main.arrayLength(int[]) licm (before)
-  // CHECK-DAG: [[NullCheck:l\d+]] NullCheck ( loop_header:{{B\d+}} )
-  // CHECK-DAG:                    ArrayLength [ [[NullCheck]] ] ( loop_header:{{B\d+}} )
+  // CHECK-DAG: <<NullCheck:l\d+>> NullCheck loop:{{B\d+}}
+  // CHECK-DAG:                    ArrayLength [<<NullCheck>>] loop:{{B\d+}}
 
   // CHECK-START: int Main.arrayLength(int[]) licm (after)
-  // CHECK-NOT:                    NullCheck ( loop_header:{{B\d+}} )
-  // CHECK-NOT:                    ArrayLength ( loop_header:{{B\d+}} )
+  // CHECK-NOT:                    NullCheck loop:{{B\d+}}
+  // CHECK-NOT:                    ArrayLength loop:{{B\d+}}
 
   // CHECK-START: int Main.arrayLength(int[]) licm (after)
-  // CHECK-DAG: [[NullCheck:l\d+]] NullCheck ( loop_header:null )
-  // CHECK-DAG:                    ArrayLength [ [[NullCheck]] ] ( loop_header:null )
+  // CHECK-DAG: <<NullCheck:l\d+>> NullCheck loop:none
+  // CHECK-DAG:                    ArrayLength [<<NullCheck>>] loop:none
 
   public static int arrayLength(int[] array) {
     int result = 0;
diff --git a/test/446-checker-inliner2/src/Main.java b/test/446-checker-inliner2/src/Main.java
index ecf071e..9ed66d6 100644
--- a/test/446-checker-inliner2/src/Main.java
+++ b/test/446-checker-inliner2/src/Main.java
@@ -17,15 +17,15 @@
 public class Main {
 
   // CHECK-START: int Main.inlineInstanceCall(Main) inliner (before)
-  // CHECK-DAG:     [[Invoke:i\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
+  // CHECK-DAG:     <<Invoke:i\d+>>  InvokeStaticOrDirect
+  // CHECK-DAG:                      Return [<<Invoke>>]
 
   // CHECK-START: int Main.inlineInstanceCall(Main) inliner (after)
   // CHECK-NOT:                      InvokeStaticOrDirect
 
   // CHECK-START: int Main.inlineInstanceCall(Main) inliner (after)
-  // CHECK-DAG:     [[Field:i\d+]]   InstanceFieldGet
-  // CHECK-DAG:                      Return [ [[Field]] ]
+  // CHECK-DAG:     <<Field:i\d+>>   InstanceFieldGet
+  // CHECK-DAG:                      Return [<<Field>>]
 
   public static int inlineInstanceCall(Main m) {
     return m.foo();
@@ -38,15 +38,15 @@
   int field = 42;
 
   // CHECK-START: int Main.inlineNestedCall() inliner (before)
-  // CHECK-DAG:     [[Invoke:i\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
+  // CHECK-DAG:     <<Invoke:i\d+>>  InvokeStaticOrDirect
+  // CHECK-DAG:                      Return [<<Invoke>>]
 
   // CHECK-START: int Main.inlineNestedCall() inliner (after)
   // CHECK-NOT:                      InvokeStaticOrDirect
 
   // CHECK-START: int Main.inlineNestedCall() inliner (after)
-  // CHECK-DAG:     [[Const38:i\d+]] IntConstant 38
-  // CHECK-DAG:                      Return [ [[Const38]] ]
+  // CHECK-DAG:     <<Const38:i\d+>> IntConstant 38
+  // CHECK-DAG:                      Return [<<Const38>>]
 
   public static int inlineNestedCall() {
     return nestedCall();
diff --git a/test/447-checker-inliner3/src/Main.java b/test/447-checker-inliner3/src/Main.java
index db4b236..9d022b9 100644
--- a/test/447-checker-inliner3/src/Main.java
+++ b/test/447-checker-inliner3/src/Main.java
@@ -17,8 +17,8 @@
 public class Main {
 
   // CHECK-START: int Main.inlineIfThenElse() inliner (before)
-  // CHECK-DAG:     [[Invoke:i\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
+  // CHECK-DAG:     <<Invoke:i\d+>>  InvokeStaticOrDirect
+  // CHECK-DAG:                      Return [<<Invoke>>]
 
   // CHECK-START: int Main.inlineIfThenElse() inliner (after)
   // CHECK-NOT:                      InvokeStaticOrDirect
diff --git a/test/449-checker-bce/expected.txt b/test/449-checker-bce/expected.txt
index 29d6383..e69de29 100644
--- a/test/449-checker-bce/expected.txt
+++ b/test/449-checker-bce/expected.txt
@@ -1 +0,0 @@
-100
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index 17039a3..f90d85d 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -608,6 +608,380 @@
   }
 
 
+  int sum;
+
+  // CHECK-START: void Main.foo1(int[], int, int) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  // CHECK-START: void Main.foo1(int[], int, int) BCE (after)
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK-NOT: Deoptimize
+  // CHECK: Phi
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  void foo1(int[] array, int start, int end) {
+    // Three HDeoptimize will be added. One for
+    // start >= 0, one for end <= array.length,
+    // and one for null check on array (to hoist null
+    // check and array.length out of loop).
+    for (int i = start ; i < end; i++) {
+      array[i] = 1;
+      sum += array[i];
+    }
+  }
+
+
+  // CHECK-START: void Main.foo2(int[], int, int) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  // CHECK-START: void Main.foo2(int[], int, int) BCE (after)
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK-NOT: Deoptimize
+  // CHECK: Phi
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  void foo2(int[] array, int start, int end) {
+    // Three HDeoptimize will be added. One for
+    // start >= 0, one for end <= array.length,
+    // and one for null check on array (to hoist null
+    // check and array.length out of loop).
+    for (int i = start ; i <= end; i++) {
+      array[i] = 1;
+      sum += array[i];
+    }
+  }
+
+
+  // CHECK-START: void Main.foo3(int[], int) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  // CHECK-START: void Main.foo3(int[], int) BCE (after)
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK-NOT: Deoptimize
+  // CHECK: Phi
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  void foo3(int[] array, int end) {
+    // Two HDeoptimize will be added. One for end < array.length,
+    // and one for null check on array (to hoist null check
+    // and array.length out of loop).
+    for (int i = 3 ; i <= end; i++) {
+      array[i] = 1;
+      sum += array[i];
+    }
+  }
+
+  // CHECK-START: void Main.foo4(int[], int) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  // CHECK-START: void Main.foo4(int[], int) BCE (after)
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK-NOT: Deoptimize
+  // CHECK: Phi
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  void foo4(int[] array, int end) {
+    // Two HDeoptimize will be added. One for end <= array.length,
+    // and one for null check on array (to hoist null check
+    // and array.length out of loop).
+    for (int i = end ; i > 0; i--) {
+      array[i - 1] = 1;
+      sum += array[i - 1];
+    }
+  }
+
+
+  // CHECK-START: void Main.foo5(int[], int) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+
+  // CHECK-START: void Main.foo5(int[], int) BCE (after)
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK: Deoptimize
+  // CHECK-NOT: Deoptimize
+  // CHECK: Phi
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  void foo5(int[] array, int end) {
+    // Bounds check in this loop can be eliminated without deoptimization.
+    for (int i = array.length - 1 ; i >= 0; i--) {
+      array[i] = 1;
+    }
+    // One HDeoptimize will be added.
+    // It's for (end - 2 <= array.length - 2).
+    for (int i = end - 2 ; i > 0; i--) {
+      sum += array[i - 1];
+      sum += array[i];
+      sum += array[i + 1];
+    }
+  }
+
+
+  // CHECK-START: void Main.foo6(int[], int, int) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArraySet
+
+  // CHECK-START: void Main.foo6(int[], int, int) BCE (after)
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK-NOT: Deoptimize
+  // CHECK: Phi
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArraySet
+
+  void foo6(int[] array, int start, int end) {
+    // Three HDeoptimize will be added. One for
+    // start >= 2, one for end <= array.length - 3,
+    // and one for null check on array (to hoist null
+    // check and array.length out of loop).
+    for (int i = end; i >= start; i--) {
+      array[i] = (array[i-2] + array[i-1] + array[i] + array[i+1] + array[i+2]) / 5;
+    }
+  }
+
+
+  // CHECK-START: void Main.foo7(int[], int, int, boolean) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+
+  // CHECK-START: void Main.foo7(int[], int, int, boolean) BCE (after)
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK-NOT: Deoptimize
+  // CHECK: Phi
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  void foo7(int[] array, int start, int end, boolean lowEnd) {
+    // Three HDeoptimize will be added. One for
+    // start >= 0, one for end <= array.length,
+    // and one for null check on array (to hoist null
+    // check and array.length out of loop).
+    for (int i = start ; i < end; i++) {
+      if (lowEnd) {
+        // This array access isn't certain. So we don't
+        // use +1000 offset in decision making for deoptimization
+        // conditions.
+        sum += array[i + 1000];
+      }
+      sum += array[i];
+    }
+  }
+
+
+  // CHECK-START: void Main.partialLooping(int[], int, int) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArraySet
+
+  // CHECK-START: void Main.partialLooping(int[], int, int) BCE (after)
+  // CHECK-NOT: Deoptimize
+  // CHECK: BoundsCheck
+  // CHECK: ArraySet
+
+  void partialLooping(int[] array, int start, int end) {
+    // This loop doesn't cover the full range of [start, end) so
+    // adding deoptimization is too aggressive, since end can be
+    // greater than array.length but the loop is never going to work on
+    // more than 2 elements.
+    for (int i = start; i < end; i++) {
+      if (i == 2) {
+        return;
+      }
+      array[i] = 1;
+    }
+  }
+
+
+  static void testUnknownBounds() {
+    boolean caught = false;
+    Main main = new Main();
+    main.foo1(new int[10], 0, 10);
+    if (main.sum != 10) {
+      System.out.println("foo1 failed!");
+    }
+
+    caught = false;
+    main = new Main();
+    try {
+      main.foo1(new int[10], 0, 11);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught || main.sum != 10) {
+      System.out.println("foo1 exception failed!");
+    }
+
+    main = new Main();
+    main.foo2(new int[10], 0, 9);
+    if (main.sum != 10) {
+      System.out.println("foo2 failed!");
+    }
+
+    caught = false;
+    main = new Main();
+    try {
+      main.foo2(new int[10], 0, 10);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught || main.sum != 10) {
+      System.out.println("foo2 exception failed!");
+    }
+
+    main = new Main();
+    main.foo3(new int[10], 9);
+    if (main.sum != 7) {
+      System.out.println("foo3 failed!");
+    }
+
+    caught = false;
+    main = new Main();
+    try {
+      main.foo3(new int[10], 10);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught || main.sum != 7) {
+      System.out.println("foo3 exception failed!");
+    }
+
+    main = new Main();
+    main.foo4(new int[10], 10);
+    if (main.sum != 10) {
+      System.out.println("foo4 failed!");
+    }
+
+    caught = false;
+    main = new Main();
+    try {
+      main.foo4(new int[10], 11);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught || main.sum != 0) {
+      System.out.println("foo4 exception failed!");
+    }
+
+    main = new Main();
+    main.foo5(new int[10], 10);
+    if (main.sum != 24) {
+      System.out.println("foo5 failed!");
+    }
+
+    caught = false;
+    main = new Main();
+    try {
+      main.foo5(new int[10], 11);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught || main.sum != 2) {
+      System.out.println("foo5 exception failed!");
+    }
+
+    main = new Main();
+    main.foo6(new int[10], 2, 7);
+
+    main = new Main();
+    int[] array = new int[4];
+    main.partialLooping(new int[3], 0, 4);
+    if ((array[0] != 1) && (array[1] != 1) &&
+        (array[2] != 0) && (array[3] != 0)) {
+      System.out.println("partialLooping failed!");
+    }
+
+    caught = false;
+    main = new Main();
+    try {
+      main.foo6(new int[10], 2, 8);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught) {
+      System.out.println("foo6 exception failed!");
+    }
+
+    caught = false;
+    main = new Main();
+    try {
+      main.foo6(new int[10], 1, 7);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught) {
+      System.out.println("foo6 exception failed!");
+    }
+
+  }
+
   // Make sure this method is compiled with optimizing.
   // CHECK-START: void Main.main(java.lang.String[]) register (after)
   // CHECK: ParallelMove
@@ -643,7 +1017,11 @@
 
     // Make sure this value is kept after deoptimization.
     int i = 1;
-    System.out.println(foo() + i);
+    if (foo() + i != 100) {
+      System.out.println("foo failed!");
+    };
+
+    testUnknownBounds();
   }
 
 }
diff --git a/test/454-get-vreg/get_vreg_jni.cc b/test/454-get-vreg/get_vreg_jni.cc
index 6b4bc11..0ef2964 100644
--- a/test/454-get-vreg/get_vreg_jni.cc
+++ b/test/454-get-vreg/get_vreg_jni.cc
@@ -29,7 +29,9 @@
  public:
   TestVisitor(Thread* thread, Context* context, mirror::Object* this_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, context), this_value_(this_value), found_method_index_(0) {}
+      : StackVisitor(thread, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        this_value_(this_value),
+        found_method_index_(0) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
diff --git a/test/455-set-vreg/set_vreg_jni.cc b/test/455-set-vreg/set_vreg_jni.cc
index 0a83ac0..dffbfa4 100644
--- a/test/455-set-vreg/set_vreg_jni.cc
+++ b/test/455-set-vreg/set_vreg_jni.cc
@@ -29,7 +29,8 @@
  public:
   TestVisitor(Thread* thread, Context* context, mirror::Object* this_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, context), this_value_(this_value) {}
+      : StackVisitor(thread, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        this_value_(this_value) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
diff --git a/test/457-regs/regs_jni.cc b/test/457-regs/regs_jni.cc
index 1b32348..193ab9d 100644
--- a/test/457-regs/regs_jni.cc
+++ b/test/457-regs/regs_jni.cc
@@ -29,7 +29,7 @@
  public:
   TestVisitor(Thread* thread, Context* context)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, context) {}
+      : StackVisitor(thread, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index 65be6cb..742210c 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -34,19 +34,31 @@
     }
   }
 
+  public static void assertFloatEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertDoubleEquals(double expected, double result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
   /**
    * Tiny programs exercising optimizations of arithmetic identities.
    */
 
   // CHECK-START: long Main.Add0(long) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:j\d+]]     ParameterValue
-  // CHECK-DAG:     [[Const0:j\d+]]  LongConstant 0
-  // CHECK-DAG:     [[Add:j\d+]]     Add [ [[Const0]] [[Arg]] ]
-  // CHECK-DAG:                      Return [ [[Add]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
+  // CHECK-DAG:     <<Const0:j\d+>>  LongConstant 0
+  // CHECK-DAG:     <<Add:j\d+>>     Add [<<Const0>>,<<Arg>>]
+  // CHECK-DAG:                      Return [<<Add>>]
 
   // CHECK-START: long Main.Add0(long) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:j\d+]]     ParameterValue
-  // CHECK-DAG:                      Return [ [[Arg]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
+  // CHECK-DAG:                      Return [<<Arg>>]
 
   // CHECK-START: long Main.Add0(long) instruction_simplifier (after)
   // CHECK-NOT:                        Add
@@ -56,14 +68,14 @@
   }
 
   // CHECK-START: int Main.AndAllOnes(int) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[ConstF:i\d+]]  IntConstant -1
-  // CHECK-DAG:     [[And:i\d+]]     And [ [[Arg]] [[ConstF]] ]
-  // CHECK-DAG:                      Return [ [[And]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<ConstF:i\d+>>  IntConstant -1
+  // CHECK-DAG:     <<And:i\d+>>     And [<<Arg>>,<<ConstF>>]
+  // CHECK-DAG:                      Return [<<And>>]
 
   // CHECK-START: int Main.AndAllOnes(int) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:i\d+]]     ParameterValue
-  // CHECK-DAG:                      Return [ [[Arg]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
+  // CHECK-DAG:                      Return [<<Arg>>]
 
   // CHECK-START: int Main.AndAllOnes(int) instruction_simplifier (after)
   // CHECK-NOT:                      And
@@ -73,14 +85,14 @@
   }
 
   // CHECK-START: long Main.Div1(long) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:j\d+]]     ParameterValue
-  // CHECK-DAG:     [[Const1:j\d+]]  LongConstant 1
-  // CHECK-DAG:     [[Div:j\d+]]     Div [ [[Arg]] [[Const1]] ]
-  // CHECK-DAG:                      Return [ [[Div]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
+  // CHECK-DAG:     <<Const1:j\d+>>  LongConstant 1
+  // CHECK-DAG:     <<Div:j\d+>>     Div [<<Arg>>,<<Const1>>]
+  // CHECK-DAG:                      Return [<<Div>>]
 
   // CHECK-START: long Main.Div1(long) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:j\d+]]     ParameterValue
-  // CHECK-DAG:                      Return [ [[Arg]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
+  // CHECK-DAG:                      Return [<<Arg>>]
 
   // CHECK-START: long Main.Div1(long) instruction_simplifier (after)
   // CHECK-NOT:                      Div
@@ -90,15 +102,15 @@
   }
 
   // CHECK-START: int Main.DivN1(int) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[ConstN1:i\d+]]  IntConstant -1
-  // CHECK-DAG:     [[Div:i\d+]]      Div [ [[Arg]] [[ConstN1]] ]
-  // CHECK-DAG:                       Return [ [[Div]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<ConstN1:i\d+>>  IntConstant -1
+  // CHECK-DAG:     <<Div:i\d+>>      Div [<<Arg>>,<<ConstN1>>]
+  // CHECK-DAG:                       Return [<<Div>>]
 
   // CHECK-START: int Main.DivN1(int) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[Neg:i\d+]]      Neg [ [[Arg]] ]
-  // CHECK-DAG:                       Return [ [[Neg]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg>>]
+  // CHECK-DAG:                       Return [<<Neg>>]
 
   // CHECK-START: int Main.DivN1(int) instruction_simplifier (after)
   // CHECK-NOT:                       Div
@@ -108,14 +120,14 @@
   }
 
   // CHECK-START: long Main.Mul1(long) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:j\d+]]     ParameterValue
-  // CHECK-DAG:     [[Const1:j\d+]]  LongConstant 1
-  // CHECK-DAG:     [[Mul:j\d+]]     Mul [ [[Arg]] [[Const1]] ]
-  // CHECK-DAG:                      Return [ [[Mul]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
+  // CHECK-DAG:     <<Const1:j\d+>>  LongConstant 1
+  // CHECK-DAG:     <<Mul:j\d+>>     Mul [<<Arg>>,<<Const1>>]
+  // CHECK-DAG:                      Return [<<Mul>>]
 
   // CHECK-START: long Main.Mul1(long) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:j\d+]]     ParameterValue
-  // CHECK-DAG:                      Return [ [[Arg]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
+  // CHECK-DAG:                      Return [<<Arg>>]
 
   // CHECK-START: long Main.Mul1(long) instruction_simplifier (after)
   // CHECK-NOT:                       Mul
@@ -125,15 +137,15 @@
   }
 
   // CHECK-START: int Main.MulN1(int) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[ConstN1:i\d+]]  IntConstant -1
-  // CHECK-DAG:     [[Mul:i\d+]]      Mul [ [[Arg]] [[ConstN1]] ]
-  // CHECK-DAG:                       Return [ [[Mul]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<ConstN1:i\d+>>  IntConstant -1
+  // CHECK-DAG:     <<Mul:i\d+>>      Mul [<<Arg>>,<<ConstN1>>]
+  // CHECK-DAG:                       Return [<<Mul>>]
 
   // CHECK-START: int Main.MulN1(int) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[Neg:i\d+]]      Neg [ [[Arg]] ]
-  // CHECK-DAG:                       Return [ [[Neg]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg>>]
+  // CHECK-DAG:                       Return [<<Neg>>]
 
   // CHECK-START: int Main.MulN1(int) instruction_simplifier (after)
   // CHECK-NOT:                       Mul
@@ -143,16 +155,16 @@
   }
 
   // CHECK-START: long Main.MulPowerOfTwo128(long) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:j\d+]]       ParameterValue
-  // CHECK-DAG:     [[Const128:j\d+]]  LongConstant 128
-  // CHECK-DAG:     [[Mul:j\d+]]       Mul [ [[Arg]] [[Const128]] ]
-  // CHECK-DAG:                        Return [ [[Mul]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>       ParameterValue
+  // CHECK-DAG:     <<Const128:j\d+>>  LongConstant 128
+  // CHECK-DAG:     <<Mul:j\d+>>       Mul [<<Arg>>,<<Const128>>]
+  // CHECK-DAG:                        Return [<<Mul>>]
 
   // CHECK-START: long Main.MulPowerOfTwo128(long) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:j\d+]]       ParameterValue
-  // CHECK-DAG:     [[Const7:i\d+]]    IntConstant 7
-  // CHECK-DAG:     [[Shl:j\d+]]       Shl [ [[Arg]] [[Const7]] ]
-  // CHECK-DAG:                        Return [ [[Shl]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>       ParameterValue
+  // CHECK-DAG:     <<Const7:i\d+>>    IntConstant 7
+  // CHECK-DAG:     <<Shl:j\d+>>       Shl [<<Arg>>,<<Const7>>]
+  // CHECK-DAG:                        Return [<<Shl>>]
 
   // CHECK-START: long Main.MulPowerOfTwo128(long) instruction_simplifier (after)
   // CHECK-NOT:                        Mul
@@ -162,14 +174,14 @@
   }
 
   // CHECK-START: int Main.Or0(int) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
-  // CHECK-DAG:     [[Or:i\d+]]       Or [ [[Arg]] [[Const0]] ]
-  // CHECK-DAG:                       Return [ [[Or]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  // CHECK-DAG:     <<Or:i\d+>>       Or [<<Arg>>,<<Const0>>]
+  // CHECK-DAG:                       Return [<<Or>>]
 
   // CHECK-START: int Main.Or0(int) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:                       Return [ [[Arg]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:                       Return [<<Arg>>]
 
   // CHECK-START: int Main.Or0(int) instruction_simplifier (after)
   // CHECK-NOT:                       Or
@@ -179,13 +191,13 @@
   }
 
   // CHECK-START: long Main.OrSame(long) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:j\d+]]       ParameterValue
-  // CHECK-DAG:     [[Or:j\d+]]        Or [ [[Arg]] [[Arg]] ]
-  // CHECK-DAG:                        Return [ [[Or]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>       ParameterValue
+  // CHECK-DAG:     <<Or:j\d+>>        Or [<<Arg>>,<<Arg>>]
+  // CHECK-DAG:                        Return [<<Or>>]
 
   // CHECK-START: long Main.OrSame(long) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:j\d+]]       ParameterValue
-  // CHECK-DAG:                        Return [ [[Arg]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>       ParameterValue
+  // CHECK-DAG:                        Return [<<Arg>>]
 
   // CHECK-START: long Main.OrSame(long) instruction_simplifier (after)
   // CHECK-NOT:                        Or
@@ -195,14 +207,14 @@
   }
 
   // CHECK-START: int Main.Shl0(int) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
-  // CHECK-DAG:     [[Shl:i\d+]]      Shl [ [[Arg]] [[Const0]] ]
-  // CHECK-DAG:                       Return [ [[Shl]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  // CHECK-DAG:     <<Shl:i\d+>>      Shl [<<Arg>>,<<Const0>>]
+  // CHECK-DAG:                       Return [<<Shl>>]
 
   // CHECK-START: int Main.Shl0(int) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:                       Return [ [[Arg]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:                       Return [<<Arg>>]
 
   // CHECK-START: int Main.Shl0(int) instruction_simplifier (after)
   // CHECK-NOT:                       Shl
@@ -211,15 +223,33 @@
     return arg << 0;
   }
 
+  // CHECK-START: int Main.Shl1(int) instruction_simplifier (before)
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+  // CHECK-DAG:     <<Shl:i\d+>>      Shl [<<Arg>>,<<Const1>>]
+  // CHECK-DAG:                       Return [<<Shl>>]
+
+  // CHECK-START: int Main.Shl1(int) instruction_simplifier (after)
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Add:i\d+>>      Add [<<Arg>>,<<Arg>>]
+  // CHECK-DAG:                       Return [<<Add>>]
+
+  // CHECK-START: int Main.Shl1(int) instruction_simplifier (after)
+  // CHECK-NOT:                       Shl
+
+  public static int Shl1(int arg) {
+    return arg << 1;
+  }
+
   // CHECK-START: long Main.Shr0(long) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
-  // CHECK-DAG:     [[Shr:j\d+]]      Shr [ [[Arg]] [[Const0]] ]
-  // CHECK-DAG:                       Return [ [[Shr]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  // CHECK-DAG:     <<Shr:j\d+>>      Shr [<<Arg>>,<<Const0>>]
+  // CHECK-DAG:                       Return [<<Shr>>]
 
   // CHECK-START: long Main.Shr0(long) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
-  // CHECK-DAG:                       Return [ [[Arg]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  // CHECK-DAG:                       Return [<<Arg>>]
 
   // CHECK-START: long Main.Shr0(long) instruction_simplifier (after)
   // CHECK-NOT:                       Shr
@@ -229,14 +259,14 @@
   }
 
   // CHECK-START: long Main.Sub0(long) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:j\d+]]   LongConstant 0
-  // CHECK-DAG:     [[Sub:j\d+]]      Sub [ [[Arg]] [[Const0]] ]
-  // CHECK-DAG:                       Return [ [[Sub]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:j\d+>>   LongConstant 0
+  // CHECK-DAG:     <<Sub:j\d+>>      Sub [<<Arg>>,<<Const0>>]
+  // CHECK-DAG:                       Return [<<Sub>>]
 
   // CHECK-START: long Main.Sub0(long) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
-  // CHECK-DAG:                       Return [ [[Arg]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  // CHECK-DAG:                       Return [<<Arg>>]
 
   // CHECK-START: long Main.Sub0(long) instruction_simplifier (after)
   // CHECK-NOT:                       Sub
@@ -246,15 +276,15 @@
   }
 
   // CHECK-START: int Main.SubAliasNeg(int) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
-  // CHECK-DAG:     [[Sub:i\d+]]      Sub [ [[Const0]] [[Arg]] ]
-  // CHECK-DAG:                       Return [ [[Sub]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  // CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Const0>>,<<Arg>>]
+  // CHECK-DAG:                       Return [<<Sub>>]
 
   // CHECK-START: int Main.SubAliasNeg(int) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[Neg:i\d+]]      Neg [ [[Arg]] ]
-  // CHECK-DAG:                       Return [ [[Neg]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg>>]
+  // CHECK-DAG:                       Return [<<Neg>>]
 
   // CHECK-START: int Main.SubAliasNeg(int) instruction_simplifier (after)
   // CHECK-NOT:                       Sub
@@ -264,14 +294,14 @@
   }
 
   // CHECK-START: long Main.UShr0(long) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
-  // CHECK-DAG:     [[UShr:j\d+]]     UShr [ [[Arg]] [[Const0]] ]
-  // CHECK-DAG:                       Return [ [[UShr]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  // CHECK-DAG:     <<UShr:j\d+>>     UShr [<<Arg>>,<<Const0>>]
+  // CHECK-DAG:                       Return [<<UShr>>]
 
   // CHECK-START: long Main.UShr0(long) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
-  // CHECK-DAG:                       Return [ [[Arg]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  // CHECK-DAG:                       Return [<<Arg>>]
 
   // CHECK-START: long Main.UShr0(long) instruction_simplifier (after)
   // CHECK-NOT:                       UShr
@@ -281,14 +311,14 @@
   }
 
   // CHECK-START: int Main.Xor0(int) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
-  // CHECK-DAG:     [[Xor:i\d+]]      Xor [ [[Arg]] [[Const0]] ]
-  // CHECK-DAG:                       Return [ [[Xor]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  // CHECK-DAG:     <<Xor:i\d+>>      Xor [<<Arg>>,<<Const0>>]
+  // CHECK-DAG:                       Return [<<Xor>>]
 
   // CHECK-START: int Main.Xor0(int) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:                       Return [ [[Arg]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:                       Return [<<Arg>>]
 
   // CHECK-START: int Main.Xor0(int) instruction_simplifier (after)
   // CHECK-NOT:                       Xor
@@ -298,15 +328,15 @@
   }
 
   // CHECK-START: int Main.XorAllOnes(int) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[ConstF:i\d+]]   IntConstant -1
-  // CHECK-DAG:     [[Xor:i\d+]]      Xor [ [[Arg]] [[ConstF]] ]
-  // CHECK-DAG:                       Return [ [[Xor]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<ConstF:i\d+>>   IntConstant -1
+  // CHECK-DAG:     <<Xor:i\d+>>      Xor [<<Arg>>,<<ConstF>>]
+  // CHECK-DAG:                       Return [<<Xor>>]
 
   // CHECK-START: int Main.XorAllOnes(int) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[Not:i\d+]]      Not [ [[Arg]] ]
-  // CHECK-DAG:                       Return [ [[Not]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Not:i\d+>>      Not [<<Arg>>]
+  // CHECK-DAG:                       Return [<<Not>>]
 
   // CHECK-START: int Main.XorAllOnes(int) instruction_simplifier (after)
   // CHECK-NOT:                       Xor
@@ -323,20 +353,20 @@
    */
 
   // CHECK-START: int Main.AddNegs1(int, int) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[Neg1:i\d+]]     Neg [ [[Arg1]] ]
-  // CHECK-DAG:     [[Neg2:i\d+]]     Neg [ [[Arg2]] ]
-  // CHECK-DAG:     [[Add:i\d+]]      Add [ [[Neg1]] [[Neg2]] ]
-  // CHECK-DAG:                       Return [ [[Add]] ]
+  // CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg1>>]
+  // CHECK-DAG:     <<Neg2:i\d+>>     Neg [<<Arg2>>]
+  // CHECK-DAG:     <<Add:i\d+>>      Add [<<Neg1>>,<<Neg2>>]
+  // CHECK-DAG:                       Return [<<Add>>]
 
   // CHECK-START: int Main.AddNegs1(int, int) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
+  // CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   // CHECK-NOT:                       Neg
-  // CHECK-DAG:     [[Add:i\d+]]      Add [ [[Arg1]] [[Arg2]] ]
-  // CHECK-DAG:     [[Neg:i\d+]]      Neg [ [[Add]] ]
-  // CHECK-DAG:                       Return [ [[Neg]] ]
+  // CHECK-DAG:     <<Add:i\d+>>      Add [<<Arg1>>,<<Arg2>>]
+  // CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Add>>]
+  // CHECK-DAG:                       Return [<<Neg>>]
 
   public static int AddNegs1(int arg1, int arg2) {
     return -arg1 + -arg2;
@@ -354,25 +384,34 @@
    */
 
   // CHECK-START: int Main.AddNegs2(int, int) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[Neg1:i\d+]]     Neg [ [[Arg1]] ]
-  // CHECK-DAG:     [[Neg2:i\d+]]     Neg [ [[Arg2]] ]
-  // CHECK-DAG:     [[Add1:i\d+]]     Add [ [[Neg1]] [[Neg2]] ]
-  // CHECK-DAG:     [[Add2:i\d+]]     Add [ [[Neg1]] [[Neg2]] ]
-  // CHECK-DAG:     [[Or:i\d+]]       Or [ [[Add1]] [[Add2]] ]
-  // CHECK-DAG:                       Return [ [[Or]] ]
+  // CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg1>>]
+  // CHECK-DAG:     <<Neg2:i\d+>>     Neg [<<Arg2>>]
+  // CHECK-DAG:     <<Add1:i\d+>>     Add [<<Neg1>>,<<Neg2>>]
+  // CHECK-DAG:     <<Add2:i\d+>>     Add [<<Neg1>>,<<Neg2>>]
+  // CHECK-DAG:     <<Or:i\d+>>       Or [<<Add1>>,<<Add2>>]
+  // CHECK-DAG:                       Return [<<Or>>]
 
   // CHECK-START: int Main.AddNegs2(int, int) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[Neg1:i\d+]]     Neg [ [[Arg1]] ]
-  // CHECK-DAG:     [[Neg2:i\d+]]     Neg [ [[Arg2]] ]
-  // CHECK-DAG:     [[Add1:i\d+]]     Add [ [[Neg1]] [[Neg2]] ]
-  // CHECK-DAG:     [[Add2:i\d+]]     Add [ [[Neg1]] [[Neg2]] ]
+  // CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg1>>]
+  // CHECK-DAG:     <<Neg2:i\d+>>     Neg [<<Arg2>>]
+  // CHECK-DAG:     <<Add1:i\d+>>     Add [<<Neg1>>,<<Neg2>>]
+  // CHECK-DAG:     <<Add2:i\d+>>     Add [<<Neg1>>,<<Neg2>>]
   // CHECK-NOT:                       Neg
-  // CHECK-DAG:     [[Or:i\d+]]       Or [ [[Add1]] [[Add2]] ]
-  // CHECK-DAG:                       Return [ [[Or]] ]
+  // CHECK-DAG:     <<Or:i\d+>>       Or [<<Add1>>,<<Add2>>]
+  // CHECK-DAG:                       Return [<<Or>>]
+
+  // CHECK-START: int Main.AddNegs2(int, int) GVN (after)
+  // CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg1>>]
+  // CHECK-DAG:     <<Neg2:i\d+>>     Neg [<<Arg2>>]
+  // CHECK-DAG:     <<Add:i\d+>>      Add [<<Neg1>>,<<Neg2>>]
+  // CHECK-DAG:     <<Or:i\d+>>       Or [<<Add>>,<<Add>>]
+  // CHECK-DAG:                       Return [<<Or>>]
 
   public static int AddNegs2(int arg1, int arg2) {
     int temp1 = -arg1;
@@ -390,26 +429,26 @@
 
   // CHECK-START: long Main.AddNegs3(long, long) instruction_simplifier (before)
   // -------------- Arguments and initial negation operations.
-  // CHECK-DAG:     [[Arg1:j\d+]]     ParameterValue
-  // CHECK-DAG:     [[Arg2:j\d+]]     ParameterValue
-  // CHECK-DAG:     [[Neg1:j\d+]]     Neg [ [[Arg1]] ]
-  // CHECK-DAG:     [[Neg2:j\d+]]     Neg [ [[Arg2]] ]
+  // CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
+  // CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
+  // CHECK-DAG:     <<Neg1:j\d+>>     Neg [<<Arg1>>]
+  // CHECK-DAG:     <<Neg2:j\d+>>     Neg [<<Arg2>>]
   // CHECK:                           Goto
   // -------------- Loop
   // CHECK:                           SuspendCheck
-  // CHECK:         [[Add:j\d+]]      Add [ [[Neg1]] [[Neg2]] ]
+  // CHECK:         <<Add:j\d+>>      Add [<<Neg1>>,<<Neg2>>]
   // CHECK:                           Goto
 
   // CHECK-START: long Main.AddNegs3(long, long) instruction_simplifier (after)
   // -------------- Arguments and initial negation operations.
-  // CHECK-DAG:     [[Arg1:j\d+]]     ParameterValue
-  // CHECK-DAG:     [[Arg2:j\d+]]     ParameterValue
-  // CHECK-DAG:     [[Neg1:j\d+]]     Neg [ [[Arg1]] ]
-  // CHECK-DAG:     [[Neg2:j\d+]]     Neg [ [[Arg2]] ]
+  // CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
+  // CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
+  // CHECK-DAG:     <<Neg1:j\d+>>     Neg [<<Arg1>>]
+  // CHECK-DAG:     <<Neg2:j\d+>>     Neg [<<Arg2>>]
   // CHECK:                           Goto
   // -------------- Loop
   // CHECK:                           SuspendCheck
-  // CHECK:         [[Add:j\d+]]      Add [ [[Neg1]] [[Neg2]] ]
+  // CHECK:         <<Add:j\d+>>      Add [<<Neg1>>,<<Neg2>>]
   // CHECK-NOT:                       Neg
   // CHECK:                           Goto
 
@@ -430,17 +469,17 @@
    */
 
   // CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg1:j\d+]]     ParameterValue
-  // CHECK-DAG:     [[Arg2:j\d+]]     ParameterValue
-  // CHECK-DAG:     [[Neg:j\d+]]      Neg [ [[Arg1]] ]
-  // CHECK-DAG:     [[Add:j\d+]]      Add [ [[Neg]] [[Arg2]] ]
-  // CHECK-DAG:                       Return [ [[Add]] ]
+  // CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
+  // CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
+  // CHECK-DAG:     <<Neg:j\d+>>      Neg [<<Arg1>>]
+  // CHECK-DAG:     <<Add:j\d+>>      Add [<<Neg>>,<<Arg2>>]
+  // CHECK-DAG:                       Return [<<Add>>]
 
   // CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg1:j\d+]]     ParameterValue
-  // CHECK-DAG:     [[Arg2:j\d+]]     ParameterValue
-  // CHECK-DAG:     [[Sub:j\d+]]      Sub [ [[Arg2]] [[Arg1]] ]
-  // CHECK-DAG:                       Return [ [[Sub]] ]
+  // CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
+  // CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
+  // CHECK-DAG:     <<Sub:j\d+>>      Sub [<<Arg2>>,<<Arg1>>]
+  // CHECK-DAG:                       Return [<<Sub>>]
 
   // CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (after)
   // CHECK-NOT:                       Neg
@@ -460,22 +499,22 @@
    */
 
   // CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg1:j\d+]]     ParameterValue
-  // CHECK-DAG:     [[Arg2:j\d+]]     ParameterValue
-  // CHECK-DAG:     [[Neg:j\d+]]      Neg [ [[Arg2]] ]
-  // CHECK-DAG:     [[Add1:j\d+]]     Add [ [[Arg1]] [[Neg]] ]
-  // CHECK-DAG:     [[Add2:j\d+]]     Add [ [[Arg1]] [[Neg]] ]
-  // CHECK-DAG:     [[Res:j\d+]]      Or [ [[Add1]] [[Add2]] ]
-  // CHECK-DAG:                       Return [ [[Res]] ]
+  // CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
+  // CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
+  // CHECK-DAG:     <<Neg:j\d+>>      Neg [<<Arg2>>]
+  // CHECK-DAG:     <<Add1:j\d+>>     Add [<<Arg1>>,<<Neg>>]
+  // CHECK-DAG:     <<Add2:j\d+>>     Add [<<Arg1>>,<<Neg>>]
+  // CHECK-DAG:     <<Res:j\d+>>      Or [<<Add1>>,<<Add2>>]
+  // CHECK-DAG:                       Return [<<Res>>]
 
   // CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg1:j\d+]]     ParameterValue
-  // CHECK-DAG:     [[Arg2:j\d+]]     ParameterValue
-  // CHECK-DAG:     [[Neg:j\d+]]      Neg [ [[Arg2]] ]
-  // CHECK-DAG:     [[Add1:j\d+]]     Add [ [[Arg1]] [[Neg]] ]
-  // CHECK-DAG:     [[Add2:j\d+]]     Add [ [[Arg1]] [[Neg]] ]
-  // CHECK-DAG:     [[Res:j\d+]]      Or [ [[Add1]] [[Add2]] ]
-  // CHECK-DAG:                       Return [ [[Res]] ]
+  // CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
+  // CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
+  // CHECK-DAG:     <<Neg:j\d+>>      Neg [<<Arg2>>]
+  // CHECK-DAG:     <<Add1:j\d+>>     Add [<<Arg1>>,<<Neg>>]
+  // CHECK-DAG:     <<Add2:j\d+>>     Add [<<Arg1>>,<<Neg>>]
+  // CHECK-DAG:     <<Res:j\d+>>      Or [<<Add1>>,<<Add2>>]
+  // CHECK-DAG:                       Return [<<Res>>]
 
   // CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (after)
   // CHECK-NOT:                       Sub
@@ -491,14 +530,14 @@
    */
 
   // CHECK-START: long Main.NegNeg1(long) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
-  // CHECK-DAG:     [[Neg1:j\d+]]     Neg [ [[Arg]] ]
-  // CHECK-DAG:     [[Neg2:j\d+]]     Neg [ [[Neg1]] ]
-  // CHECK-DAG:                       Return [ [[Neg2]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  // CHECK-DAG:     <<Neg1:j\d+>>     Neg [<<Arg>>]
+  // CHECK-DAG:     <<Neg2:j\d+>>     Neg [<<Neg1>>]
+  // CHECK-DAG:                       Return [<<Neg2>>]
 
   // CHECK-START: long Main.NegNeg1(long) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
-  // CHECK-DAG:                       Return [ [[Arg]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  // CHECK-DAG:                       Return [<<Arg>>]
 
   // CHECK-START: long Main.NegNeg1(long) instruction_simplifier (after)
   // CHECK-NOT:                       Neg
@@ -515,21 +554,27 @@
    */
 
   // CHECK-START: int Main.NegNeg2(int) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[Neg1:i\d+]]     Neg [ [[Arg]] ]
-  // CHECK-DAG:     [[Neg2:i\d+]]     Neg [ [[Neg1]] ]
-  // CHECK-DAG:     [[Add:i\d+]]      Add [ [[Neg1]] [[Neg2]] ]
-  // CHECK-DAG:                       Return [ [[Add]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg>>]
+  // CHECK-DAG:     <<Neg2:i\d+>>     Neg [<<Neg1>>]
+  // CHECK-DAG:     <<Add:i\d+>>      Add [<<Neg1>>,<<Neg2>>]
+  // CHECK-DAG:                       Return [<<Add>>]
 
   // CHECK-START: int Main.NegNeg2(int) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[Sub:i\d+]]      Sub [ [[Arg]] [[Arg]] ]
-  // CHECK-DAG:                       Return [ [[Sub]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg>>,<<Arg>>]
+  // CHECK-DAG:                       Return [<<Sub>>]
 
   // CHECK-START: int Main.NegNeg2(int) instruction_simplifier (after)
   // CHECK-NOT:                       Neg
   // CHECK-NOT:                       Add
 
+  // CHECK-START: int Main.NegNeg2(int) constant_folding_after_inlining (after)
+  // CHECK:         <<Const0:i\d+>>   IntConstant 0
+  // CHECK-NOT:                       Neg
+  // CHECK-NOT:                       Add
+  // CHECK:                           Return [<<Const0>>]
+
   public static int NegNeg2(int arg) {
     int temp = -arg;
     return temp + -temp;
@@ -543,15 +588,15 @@
    */
 
   // CHECK-START: long Main.NegNeg3(long) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:j\d+]]   LongConstant 0
-  // CHECK-DAG:     [[Neg:j\d+]]      Neg [ [[Arg]] ]
-  // CHECK-DAG:     [[Sub:j\d+]]      Sub [ [[Const0]] [[Neg]] ]
-  // CHECK-DAG:                       Return [ [[Sub]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:j\d+>>   LongConstant 0
+  // CHECK-DAG:     <<Neg:j\d+>>      Neg [<<Arg>>]
+  // CHECK-DAG:     <<Sub:j\d+>>      Sub [<<Const0>>,<<Neg>>]
+  // CHECK-DAG:                       Return [<<Sub>>]
 
   // CHECK-START: long Main.NegNeg3(long) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
-  // CHECK-DAG:                       Return [ [[Arg]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  // CHECK-DAG:                       Return [<<Arg>>]
 
   // CHECK-START: long Main.NegNeg3(long) instruction_simplifier (after)
   // CHECK-NOT:                       Neg
@@ -568,17 +613,17 @@
    */
 
   // CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[Sub:i\d+]]      Sub [ [[Arg1]] [[Arg2]] ]
-  // CHECK-DAG:     [[Neg:i\d+]]      Neg [ [[Sub]] ]
-  // CHECK-DAG:                       Return [ [[Neg]] ]
+  // CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg1>>,<<Arg2>>]
+  // CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Sub>>]
+  // CHECK-DAG:                       Return [<<Neg>>]
 
   // CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[Sub:i\d+]]      Sub [ [[Arg2]] [[Arg1]] ]
-  // CHECK-DAG:                       Return [ [[Sub]] ]
+  // CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg2>>,<<Arg1>>]
+  // CHECK-DAG:                       Return [<<Sub>>]
 
   // CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (after)
   // CHECK-NOT:                       Neg
@@ -598,22 +643,22 @@
    */
 
   // CHECK-START: int Main.NegSub2(int, int) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[Sub:i\d+]]      Sub [ [[Arg1]] [[Arg2]] ]
-  // CHECK-DAG:     [[Neg1:i\d+]]     Neg [ [[Sub]] ]
-  // CHECK-DAG:     [[Neg2:i\d+]]     Neg [ [[Sub]] ]
-  // CHECK-DAG:     [[Or:i\d+]]       Or [ [[Neg1]] [[Neg2]] ]
-  // CHECK-DAG:                       Return [ [[Or]] ]
+  // CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg1>>,<<Arg2>>]
+  // CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Sub>>]
+  // CHECK-DAG:     <<Neg2:i\d+>>     Neg [<<Sub>>]
+  // CHECK-DAG:     <<Or:i\d+>>       Or [<<Neg1>>,<<Neg2>>]
+  // CHECK-DAG:                       Return [<<Or>>]
 
   // CHECK-START: int Main.NegSub2(int, int) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[Sub:i\d+]]      Sub [ [[Arg1]] [[Arg2]] ]
-  // CHECK-DAG:     [[Neg1:i\d+]]     Neg [ [[Sub]] ]
-  // CHECK-DAG:     [[Neg2:i\d+]]     Neg [ [[Sub]] ]
-  // CHECK-DAG:     [[Or:i\d+]]       Or [ [[Neg1]] [[Neg2]] ]
-  // CHECK-DAG:                       Return [ [[Or]] ]
+  // CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg1>>,<<Arg2>>]
+  // CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Sub>>]
+  // CHECK-DAG:     <<Neg2:i\d+>>     Neg [<<Sub>>]
+  // CHECK-DAG:     <<Or:i\d+>>       Or [<<Neg1>>,<<Neg2>>]
+  // CHECK-DAG:                       Return [<<Or>>]
 
   public static int NegSub2(int arg1, int arg2) {
     int temp = arg1 - arg2;
@@ -626,15 +671,15 @@
    */
 
   // CHECK-START: long Main.NotNot1(long) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
-  // CHECK-DAG:     [[ConstF1:j\d+]]  LongConstant -1
-  // CHECK-DAG:     [[Xor1:j\d+]]     Xor [ [[Arg]] [[ConstF1]] ]
-  // CHECK-DAG:     [[Xor2:j\d+]]     Xor [ [[Xor1]] [[ConstF1]] ]
-  // CHECK-DAG:                       Return [ [[Xor2]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  // CHECK-DAG:     <<ConstF1:j\d+>>  LongConstant -1
+  // CHECK-DAG:     <<Xor1:j\d+>>     Xor [<<Arg>>,<<ConstF1>>]
+  // CHECK-DAG:     <<Xor2:j\d+>>     Xor [<<Xor1>>,<<ConstF1>>]
+  // CHECK-DAG:                       Return [<<Xor2>>]
 
   // CHECK-START: long Main.NotNot1(long) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
-  // CHECK-DAG:                       Return [ [[Arg]] ]
+  // CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  // CHECK-DAG:                       Return [<<Arg>>]
 
   // CHECK-START: long Main.NotNot1(long) instruction_simplifier (after)
   // CHECK-NOT:                       Xor
@@ -644,18 +689,18 @@
   }
 
   // CHECK-START: int Main.NotNot2(int) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[ConstF1:i\d+]]  IntConstant -1
-  // CHECK-DAG:     [[Xor1:i\d+]]     Xor [ [[Arg]] [[ConstF1]] ]
-  // CHECK-DAG:     [[Xor2:i\d+]]     Xor [ [[Xor1]] [[ConstF1]] ]
-  // CHECK-DAG:     [[Add:i\d+]]      Add [ [[Xor1]] [[Xor2]] ]
-  // CHECK-DAG:                       Return [ [[Add]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<ConstF1:i\d+>>  IntConstant -1
+  // CHECK-DAG:     <<Xor1:i\d+>>     Xor [<<Arg>>,<<ConstF1>>]
+  // CHECK-DAG:     <<Xor2:i\d+>>     Xor [<<Xor1>>,<<ConstF1>>]
+  // CHECK-DAG:     <<Add:i\d+>>      Add [<<Xor1>>,<<Xor2>>]
+  // CHECK-DAG:                       Return [<<Add>>]
 
   // CHECK-START: int Main.NotNot2(int) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-DAG:     [[Not:i\d+]]      Not [ [[Arg]] ]
-  // CHECK-DAG:     [[Add:i\d+]]      Add [ [[Not]] [[Arg]] ]
-  // CHECK-DAG:                       Return [ [[Add]] ]
+  // CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  // CHECK-DAG:     <<Not:i\d+>>      Not [<<Arg>>]
+  // CHECK-DAG:     <<Add:i\d+>>      Add [<<Not>>,<<Arg>>]
+  // CHECK-DAG:                       Return [<<Add>>]
 
   // CHECK-START: int Main.NotNot2(int) instruction_simplifier (after)
   // CHECK-NOT:                       Xor
@@ -671,18 +716,18 @@
    */
 
   // CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[Neg:i\d+]]      Neg [ [[Arg1]] ]
-  // CHECK-DAG:     [[Sub:i\d+]]      Sub [ [[Neg]] [[Arg2]] ]
-  // CHECK-DAG:                       Return [ [[Sub]] ]
+  // CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg1>>]
+  // CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Neg>>,<<Arg2>>]
+  // CHECK-DAG:                       Return [<<Sub>>]
 
   // CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[Add:i\d+]]      Add [ [[Arg1]] [[Arg2]] ]
-  // CHECK-DAG:     [[Neg:i\d+]]      Neg [ [[Add]] ]
-  // CHECK-DAG:                       Return [ [[Neg]] ]
+  // CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Add:i\d+>>      Add [<<Arg1>>,<<Arg2>>]
+  // CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Add>>]
+  // CHECK-DAG:                       Return [<<Neg>>]
 
   // CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (after)
   // CHECK-NOT:                       Sub
@@ -702,22 +747,22 @@
    */
 
   // CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[Neg:i\d+]]      Neg [ [[Arg1]] ]
-  // CHECK-DAG:     [[Sub1:i\d+]]     Sub [ [[Neg]] [[Arg2]] ]
-  // CHECK-DAG:     [[Sub2:i\d+]]     Sub [ [[Neg]] [[Arg2]] ]
-  // CHECK-DAG:     [[Or:i\d+]]       Or [ [[Sub1]] [[Sub2]] ]
-  // CHECK-DAG:                       Return [ [[Or]] ]
+  // CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg1>>]
+  // CHECK-DAG:     <<Sub1:i\d+>>     Sub [<<Neg>>,<<Arg2>>]
+  // CHECK-DAG:     <<Sub2:i\d+>>     Sub [<<Neg>>,<<Arg2>>]
+  // CHECK-DAG:     <<Or:i\d+>>       Or [<<Sub1>>,<<Sub2>>]
+  // CHECK-DAG:                       Return [<<Or>>]
 
   // CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
-  // CHECK-DAG:     [[Neg:i\d+]]      Neg [ [[Arg1]] ]
-  // CHECK-DAG:     [[Sub1:i\d+]]     Sub [ [[Neg]] [[Arg2]] ]
-  // CHECK-DAG:     [[Sub2:i\d+]]     Sub [ [[Neg]] [[Arg2]] ]
-  // CHECK-DAG:     [[Or:i\d+]]       Or [ [[Sub1]] [[Sub2]] ]
-  // CHECK-DAG:                       Return [ [[Or]] ]
+  // CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
+  // CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg1>>]
+  // CHECK-DAG:     <<Sub1:i\d+>>     Sub [<<Neg>>,<<Arg2>>]
+  // CHECK-DAG:     <<Sub2:i\d+>>     Sub [<<Neg>>,<<Arg2>>]
+  // CHECK-DAG:     <<Or:i\d+>>       Or [<<Sub1>>,<<Sub2>>]
+  // CHECK-DAG:                       Return [<<Or>>]
 
   // CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (after)
   // CHECK-NOT:                       Add
@@ -736,24 +781,24 @@
 
   // CHECK-START: long Main.SubNeg3(long, long) instruction_simplifier (before)
   // -------------- Arguments and initial negation operation.
-  // CHECK-DAG:     [[Arg1:j\d+]]     ParameterValue
-  // CHECK-DAG:     [[Arg2:j\d+]]     ParameterValue
-  // CHECK-DAG:     [[Neg:j\d+]]      Neg [ [[Arg1]] ]
+  // CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
+  // CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
+  // CHECK-DAG:     <<Neg:j\d+>>      Neg [<<Arg1>>]
   // CHECK:                           Goto
   // -------------- Loop
   // CHECK:                           SuspendCheck
-  // CHECK:         [[Sub:j\d+]]      Sub [ [[Neg]] [[Arg2]] ]
+  // CHECK:         <<Sub:j\d+>>      Sub [<<Neg>>,<<Arg2>>]
   // CHECK:                           Goto
 
   // CHECK-START: long Main.SubNeg3(long, long) instruction_simplifier (after)
   // -------------- Arguments and initial negation operation.
-  // CHECK-DAG:     [[Arg1:j\d+]]     ParameterValue
-  // CHECK-DAG:     [[Arg2:j\d+]]     ParameterValue
-  // CHECK-DAG:     [[Neg:j\d+]]      Neg [ [[Arg1]] ]
+  // CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
+  // CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
+  // CHECK-DAG:     <<Neg:j\d+>>      Neg [<<Arg1>>]
   // CHECK-DAG:                       Goto
   // -------------- Loop
   // CHECK:                           SuspendCheck
-  // CHECK:         [[Sub:j\d+]]      Sub [ [[Neg]] [[Arg2]] ]
+  // CHECK:         <<Sub:j\d+>>      Sub [<<Neg>>,<<Arg2>>]
   // CHECK-NOT:                       Neg
   // CHECK:                           Goto
 
@@ -767,116 +812,116 @@
   }
 
   // CHECK-START: int Main.EqualTrueRhs(boolean) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:z\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
-  // CHECK-DAG:     [[Cond:z\d+]]     Equal [ [[Arg]] [[Const1]] ]
-  // CHECK-DAG:                       If [ [[Cond]] ]
+  // CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+  // CHECK-DAG:     <<Cond:z\d+>>     Equal [<<Arg>>,<<Const1>>]
+  // CHECK-DAG:                       If [<<Cond>>]
 
   // CHECK-START: int Main.EqualTrueRhs(boolean) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:z\d+]]      ParameterValue
-  // CHECK-DAG:                       If [ [[Arg]] ]
+  // CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  // CHECK-DAG:                       If [<<Arg>>]
 
   public static int EqualTrueRhs(boolean arg) {
     return (arg != true) ? 3 : 5;
   }
 
   // CHECK-START: int Main.EqualTrueLhs(boolean) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:z\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
-  // CHECK-DAG:     [[Cond:z\d+]]     Equal [ [[Const1]] [[Arg]] ]
-  // CHECK-DAG:                       If [ [[Cond]] ]
+  // CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+  // CHECK-DAG:     <<Cond:z\d+>>     Equal [<<Const1>>,<<Arg>>]
+  // CHECK-DAG:                       If [<<Cond>>]
 
   // CHECK-START: int Main.EqualTrueLhs(boolean) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:z\d+]]      ParameterValue
-  // CHECK-DAG:                       If [ [[Arg]] ]
+  // CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  // CHECK-DAG:                       If [<<Arg>>]
 
   public static int EqualTrueLhs(boolean arg) {
     return (true != arg) ? 3 : 5;
   }
 
   // CHECK-START: int Main.EqualFalseRhs(boolean) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:z\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
-  // CHECK-DAG:     [[Cond:z\d+]]     Equal [ [[Arg]] [[Const0]] ]
-  // CHECK-DAG:                       If [ [[Cond]] ]
+  // CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  // CHECK-DAG:     <<Cond:z\d+>>     Equal [<<Arg>>,<<Const0>>]
+  // CHECK-DAG:                       If [<<Cond>>]
 
   // CHECK-START: int Main.EqualFalseRhs(boolean) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:z\d+]]      ParameterValue
-  // CHECK-DAG:     [[NotArg:z\d+]]   BooleanNot [ [[Arg]] ]
-  // CHECK-DAG:                       If [ [[NotArg]] ]
+  // CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  // CHECK-DAG:     <<NotArg:z\d+>>   BooleanNot [<<Arg>>]
+  // CHECK-DAG:                       If [<<NotArg>>]
 
   public static int EqualFalseRhs(boolean arg) {
     return (arg != false) ? 3 : 5;
   }
 
   // CHECK-START: int Main.EqualFalseLhs(boolean) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:z\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
-  // CHECK-DAG:     [[Cond:z\d+]]     Equal [ [[Const0]] [[Arg]] ]
-  // CHECK-DAG:                       If [ [[Cond]] ]
+  // CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  // CHECK-DAG:     <<Cond:z\d+>>     Equal [<<Const0>>,<<Arg>>]
+  // CHECK-DAG:                       If [<<Cond>>]
 
   // CHECK-START: int Main.EqualFalseLhs(boolean) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:z\d+]]      ParameterValue
-  // CHECK-DAG:     [[NotArg:z\d+]]   BooleanNot [ [[Arg]] ]
-  // CHECK-DAG:                       If [ [[NotArg]] ]
+  // CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  // CHECK-DAG:     <<NotArg:z\d+>>   BooleanNot [<<Arg>>]
+  // CHECK-DAG:                       If [<<NotArg>>]
 
   public static int EqualFalseLhs(boolean arg) {
     return (false != arg) ? 3 : 5;
   }
 
   // CHECK-START: int Main.NotEqualTrueRhs(boolean) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:z\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
-  // CHECK-DAG:     [[Cond:z\d+]]     NotEqual [ [[Arg]] [[Const1]] ]
-  // CHECK-DAG:                       If [ [[Cond]] ]
+  // CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+  // CHECK-DAG:     <<Cond:z\d+>>     NotEqual [<<Arg>>,<<Const1>>]
+  // CHECK-DAG:                       If [<<Cond>>]
 
   // CHECK-START: int Main.NotEqualTrueRhs(boolean) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:z\d+]]      ParameterValue
-  // CHECK-DAG:     [[NotArg:z\d+]]   BooleanNot [ [[Arg]] ]
-  // CHECK-DAG:                       If [ [[NotArg]] ]
+  // CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  // CHECK-DAG:     <<NotArg:z\d+>>   BooleanNot [<<Arg>>]
+  // CHECK-DAG:                       If [<<NotArg>>]
 
   public static int NotEqualTrueRhs(boolean arg) {
     return (arg == true) ? 3 : 5;
   }
 
   // CHECK-START: int Main.NotEqualTrueLhs(boolean) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:z\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
-  // CHECK-DAG:     [[Cond:z\d+]]     NotEqual [ [[Const1]] [[Arg]] ]
-  // CHECK-DAG:                       If [ [[Cond]] ]
+  // CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+  // CHECK-DAG:     <<Cond:z\d+>>     NotEqual [<<Const1>>,<<Arg>>]
+  // CHECK-DAG:                       If [<<Cond>>]
 
   // CHECK-START: int Main.NotEqualTrueLhs(boolean) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:z\d+]]      ParameterValue
-  // CHECK-DAG:     [[NotArg:z\d+]]   BooleanNot [ [[Arg]] ]
-  // CHECK-DAG:                       If [ [[NotArg]] ]
+  // CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  // CHECK-DAG:     <<NotArg:z\d+>>   BooleanNot [<<Arg>>]
+  // CHECK-DAG:                       If [<<NotArg>>]
 
   public static int NotEqualTrueLhs(boolean arg) {
     return (true == arg) ? 3 : 5;
   }
 
   // CHECK-START: int Main.NotEqualFalseRhs(boolean) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:z\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
-  // CHECK-DAG:     [[Cond:z\d+]]     NotEqual [ [[Arg]] [[Const0]] ]
-  // CHECK-DAG:                       If [ [[Cond]] ]
+  // CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  // CHECK-DAG:     <<Cond:z\d+>>     NotEqual [<<Arg>>,<<Const0>>]
+  // CHECK-DAG:                       If [<<Cond>>]
 
   // CHECK-START: int Main.NotEqualFalseRhs(boolean) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:z\d+]]      ParameterValue
-  // CHECK-DAG:                       If [ [[Arg]] ]
+  // CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  // CHECK-DAG:                       If [<<Arg>>]
 
   public static int NotEqualFalseRhs(boolean arg) {
     return (arg == false) ? 3 : 5;
   }
 
   // CHECK-START: int Main.NotEqualFalseLhs(boolean) instruction_simplifier (before)
-  // CHECK-DAG:     [[Arg:z\d+]]      ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
-  // CHECK-DAG:     [[Cond:z\d+]]     NotEqual [ [[Const0]] [[Arg]] ]
-  // CHECK-DAG:                       If [ [[Cond]] ]
+  // CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  // CHECK-DAG:     <<Cond:z\d+>>     NotEqual [<<Const0>>,<<Arg>>]
+  // CHECK-DAG:                       If [<<Cond>>]
 
   // CHECK-START: int Main.NotEqualFalseLhs(boolean) instruction_simplifier (after)
-  // CHECK-DAG:     [[Arg:z\d+]]      ParameterValue
-  // CHECK-DAG:                       If [ [[Arg]] ]
+  // CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
+  // CHECK-DAG:                       If [<<Arg>>]
 
   public static int NotEqualFalseLhs(boolean arg) {
     return (false == arg) ? 3 : 5;
@@ -889,22 +934,100 @@
    */
 
   // CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_types (before)
-  // CHECK-DAG:     [[Arg:z\d+]]       ParameterValue
-  // CHECK-DAG:     [[NotArg:z\d+]]    BooleanNot [ [[Arg]] ]
-  // CHECK-DAG:     [[NotNotArg:z\d+]] BooleanNot [ [[NotArg]] ]
-  // CHECK-DAG:                        Return [ [[NotNotArg]] ]
+  // CHECK-DAG:     <<Arg:z\d+>>       ParameterValue
+  // CHECK-DAG:     <<NotArg:z\d+>>    BooleanNot [<<Arg>>]
+  // CHECK-DAG:     <<NotNotArg:z\d+>> BooleanNot [<<NotArg>>]
+  // CHECK-DAG:                        Return [<<NotNotArg>>]
 
   // CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_types (after)
-  // CHECK-DAG:     [[Arg:z\d+]]       ParameterValue
-  // CHECK-DAG:                        BooleanNot [ [[Arg]] ]
-  // CHECK-DAG:                        Return [ [[Arg]] ]
+  // CHECK-DAG:     <<Arg:z\d+>>       ParameterValue
+  // CHECK-DAG:                        BooleanNot [<<Arg>>]
+  // CHECK-DAG:                        Return [<<Arg>>]
 
   // CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_types (after)
   // CHECK:                            BooleanNot
   // CHECK-NOT:                        BooleanNot
 
+  public static boolean NegateValue(boolean arg) {
+    return !arg;
+  }
+
   public static boolean NotNotBool(boolean arg) {
-    return !(!arg);
+    return !(NegateValue(arg));
+  }
+
+  // CHECK-START: float Main.Div2(float) instruction_simplifier (before)
+  // CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
+  // CHECK-DAG:      <<Const2:f\d+>>   FloatConstant 2
+  // CHECK-DAG:      <<Div:f\d+>>      Div [<<Arg>>,<<Const2>>]
+  // CHECK-DAG:                        Return [<<Div>>]
+
+  // CHECK-START: float Main.Div2(float) instruction_simplifier (after)
+  // CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
+  // CHECK-DAG:      <<ConstP5:f\d+>>  FloatConstant 0.5
+  // CHECK-DAG:      <<Mul:f\d+>>      Mul [<<Arg>>,<<ConstP5>>]
+  // CHECK-DAG:                        Return [<<Mul>>]
+
+  // CHECK-START: float Main.Div2(float) instruction_simplifier (after)
+  // CHECK-NOT:                        Div
+
+  public static float Div2(float arg) {
+    return arg / 2.0f;
+  }
+
+  // CHECK-START: double Main.Div2(double) instruction_simplifier (before)
+  // CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
+  // CHECK-DAG:      <<Const2:d\d+>>   DoubleConstant 2
+  // CHECK-DAG:      <<Div:d\d+>>      Div [<<Arg>>,<<Const2>>]
+  // CHECK-DAG:                        Return [<<Div>>]
+
+  // CHECK-START: double Main.Div2(double) instruction_simplifier (after)
+  // CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
+  // CHECK-DAG:      <<ConstP5:d\d+>>  DoubleConstant 0.5
+  // CHECK-DAG:      <<Mul:d\d+>>      Mul [<<Arg>>,<<ConstP5>>]
+  // CHECK-DAG:                        Return [<<Mul>>]
+
+  // CHECK-START: double Main.Div2(double) instruction_simplifier (after)
+  // CHECK-NOT:                        Div
+  public static double Div2(double arg) {
+    return arg / 2.0;
+  }
+
+  // CHECK-START: float Main.DivMP25(float) instruction_simplifier (before)
+  // CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
+  // CHECK-DAG:      <<ConstMP25:f\d+>>   FloatConstant -0.25
+  // CHECK-DAG:      <<Div:f\d+>>      Div [<<Arg>>,<<ConstMP25>>]
+  // CHECK-DAG:                        Return [<<Div>>]
+
+  // CHECK-START: float Main.DivMP25(float) instruction_simplifier (after)
+  // CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
+  // CHECK-DAG:      <<ConstM4:f\d+>>  FloatConstant -4
+  // CHECK-DAG:      <<Mul:f\d+>>      Mul [<<Arg>>,<<ConstM4>>]
+  // CHECK-DAG:                        Return [<<Mul>>]
+
+  // CHECK-START: float Main.DivMP25(float) instruction_simplifier (after)
+  // CHECK-NOT:                        Div
+
+  public static float DivMP25(float arg) {
+    return arg / -0.25f;
+  }
+
+  // CHECK-START: double Main.DivMP25(double) instruction_simplifier (before)
+  // CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
+  // CHECK-DAG:      <<ConstMP25:d\d+>>   DoubleConstant -0.25
+  // CHECK-DAG:      <<Div:d\d+>>      Div [<<Arg>>,<<ConstMP25>>]
+  // CHECK-DAG:                        Return [<<Div>>]
+
+  // CHECK-START: double Main.DivMP25(double) instruction_simplifier (after)
+  // CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
+  // CHECK-DAG:      <<ConstM4:d\d+>>  DoubleConstant -4
+  // CHECK-DAG:      <<Mul:d\d+>>      Mul [<<Arg>>,<<ConstM4>>]
+  // CHECK-DAG:                        Return [<<Mul>>]
+
+  // CHECK-START: double Main.DivMP25(double) instruction_simplifier (after)
+  // CHECK-NOT:                        Div
+  public static double DivMP25(double arg) {
+    return arg / -0.25f;
   }
 
   public static void main(String[] args) {
@@ -941,7 +1064,6 @@
     assertIntEquals(SubNeg1(arg, arg + 1), -(arg + arg + 1));
     assertIntEquals(SubNeg2(arg, arg + 1), -(arg + arg + 1));
     assertLongEquals(SubNeg3(arg, arg + 1), -(2 * arg + 1));
-
     assertIntEquals(EqualTrueRhs(true), 5);
     assertIntEquals(EqualTrueLhs(true), 5);
     assertIntEquals(EqualFalseRhs(true), 3);
@@ -952,5 +1074,10 @@
     assertIntEquals(NotEqualFalseLhs(true), 5);
     assertBooleanEquals(NotNotBool(true), true);
     assertBooleanEquals(NotNotBool(false), false);
+    assertFloatEquals(Div2(100.0f), 50.0f);
+    assertDoubleEquals(Div2(150.0), 75.0);
+    assertFloatEquals(DivMP25(100.0f), -400.0f);
+    assertDoubleEquals(DivMP25(150.0), -600.0);
+    assertLongEquals(Shl1(100), 200);
   }
 }
diff --git a/test/461-get-reference-vreg/get_reference_vreg_jni.cc b/test/461-get-reference-vreg/get_reference_vreg_jni.cc
index f0b78e1..a8ef684 100644
--- a/test/461-get-reference-vreg/get_reference_vreg_jni.cc
+++ b/test/461-get-reference-vreg/get_reference_vreg_jni.cc
@@ -29,7 +29,9 @@
  public:
   TestVisitor(Thread* thread, Context* context, mirror::Object* this_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, context), this_value_(this_value), found_method_index_(0) {}
+      : StackVisitor(thread, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        this_value_(this_value),
+        found_method_index_(0) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
diff --git a/test/462-checker-inlining-across-dex-files/src/Main.java b/test/462-checker-inlining-across-dex-files/src/Main.java
index d5563b8..3d583b4 100644
--- a/test/462-checker-inlining-across-dex-files/src/Main.java
+++ b/test/462-checker-inlining-across-dex-files/src/Main.java
@@ -22,7 +22,7 @@
 public class Main {
 
   // CHECK-START: void Main.inlineEmptyMethod() inliner (before)
-  // CHECK-DAG:     [[Invoke:v\d+]]  InvokeStaticOrDirect
+  // CHECK-DAG:     <<Invoke:v\d+>>  InvokeStaticOrDirect
   // CHECK-DAG:                      ReturnVoid
 
   // CHECK-START: void Main.inlineEmptyMethod() inliner (after)
@@ -33,120 +33,122 @@
   }
 
   // CHECK-START: int Main.inlineReturnIntMethod() inliner (before)
-  // CHECK-DAG:     [[Invoke:i\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
+  // CHECK-DAG:     <<Invoke:i\d+>>  InvokeStaticOrDirect
+  // CHECK-DAG:                      Return [<<Invoke>>]
 
   // CHECK-START: int Main.inlineReturnIntMethod() inliner (after)
   // CHECK-NOT:                      InvokeStaticOrDirect
 
   // CHECK-START: int Main.inlineReturnIntMethod() inliner (after)
-  // CHECK-DAG:     [[Const38:i\d+]] IntConstant 38
-  // CHECK-DAG:                      Return [ [[Const38]] ]
+  // CHECK-DAG:     <<Const38:i\d+>> IntConstant 38
+  // CHECK-DAG:                      Return [<<Const38>>]
 
   public static int inlineReturnIntMethod() {
     return OtherDex.returnIntMethod();
   }
 
   // CHECK-START: int Main.dontInlineOtherDexStatic() inliner (before)
-  // CHECK-DAG:     [[Invoke:i\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
+  // CHECK-DAG:     <<Invoke:i\d+>>  InvokeStaticOrDirect
+  // CHECK-DAG:                      Return [<<Invoke>>]
 
   // CHECK-START: int Main.dontInlineOtherDexStatic() inliner (after)
-  // CHECK-DAG:     [[Invoke:i\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
+  // CHECK-DAG:     <<Invoke:i\d+>>  InvokeStaticOrDirect
+  // CHECK-DAG:                      Return [<<Invoke>>]
 
   public static int dontInlineOtherDexStatic() {
     return OtherDex.returnOtherDexStatic();
   }
 
   // CHECK-START: int Main.inlineMainStatic() inliner (before)
-  // CHECK-DAG:     [[Invoke:i\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
+  // CHECK-DAG:     <<Invoke:i\d+>>  InvokeStaticOrDirect
+  // CHECK-DAG:                      Return [<<Invoke>>]
 
   // CHECK-START: int Main.inlineMainStatic() inliner (after)
   // CHECK-NOT:                      InvokeStaticOrDirect
 
   // CHECK-START: int Main.inlineMainStatic() inliner (after)
-  // CHECK-DAG:     [[Static:i\d+]]  StaticFieldGet
-  // CHECK-DAG:                      Return [ [[Static]] ]
+  // CHECK-DAG:     <<Static:i\d+>>  StaticFieldGet
+  // CHECK-DAG:                      Return [<<Static>>]
 
   public static int inlineMainStatic() {
     return OtherDex.returnMainStatic();
   }
 
   // CHECK-START: int Main.dontInlineRecursiveCall() inliner (before)
-  // CHECK-DAG:     [[Invoke:i\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
+  // CHECK-DAG:     <<Invoke:i\d+>>  InvokeStaticOrDirect
+  // CHECK-DAG:                      Return [<<Invoke>>]
 
   // CHECK-START: int Main.dontInlineRecursiveCall() inliner (after)
-  // CHECK-DAG:     [[Invoke:i\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
+  // CHECK-DAG:     <<Invoke:i\d+>>  InvokeStaticOrDirect
+  // CHECK-DAG:                      Return [<<Invoke>>]
 
   public static int dontInlineRecursiveCall() {
     return OtherDex.recursiveCall();
   }
 
   // CHECK-START: java.lang.String Main.dontInlineReturnString() inliner (before)
-  // CHECK-DAG:     [[Invoke:l\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
+  // CHECK-DAG:     <<Invoke:l\d+>>  InvokeStaticOrDirect
+  // CHECK-DAG:                      Return [<<Invoke>>]
 
   // CHECK-START: java.lang.String Main.dontInlineReturnString() inliner (after)
-  // CHECK-DAG:     [[Invoke:l\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
+  // CHECK-DAG:     <<Invoke:l\d+>>  InvokeStaticOrDirect
+  // CHECK-DAG:                      Return [<<Invoke>>]
 
   public static String dontInlineReturnString() {
     return OtherDex.returnString();
   }
 
   // CHECK-START: java.lang.Class Main.dontInlineOtherDexClass() inliner (before)
-  // CHECK-DAG:     [[Invoke:l\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
+  // CHECK-DAG:     <<Invoke:l\d+>>  InvokeStaticOrDirect
+  // CHECK-DAG:                      Return [<<Invoke>>]
 
   // CHECK-START: java.lang.Class Main.dontInlineOtherDexClass() inliner (after)
-  // CHECK-DAG:     [[Invoke:l\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
+  // CHECK-DAG:     <<Invoke:l\d+>>  InvokeStaticOrDirect
+  // CHECK-DAG:                      Return [<<Invoke>>]
 
   public static Class dontInlineOtherDexClass() {
     return OtherDex.returnOtherDexClass();
   }
 
   // CHECK-START: java.lang.Class Main.inlineMainClass() inliner (before)
-  // CHECK-DAG:     [[Invoke:l\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
+  // CHECK-DAG:     <<Invoke:l\d+>>  InvokeStaticOrDirect
+  // CHECK-DAG:                      Return [<<Invoke>>]
 
   // CHECK-START: java.lang.Class Main.inlineMainClass() inliner (after)
   // CHECK-NOT:                      InvokeStaticOrDirect
 
   // CHECK-START: java.lang.Class Main.inlineMainClass() inliner (after)
-  // CHECK-DAG:     [[Class:l\d+]]  LoadClass
-  // CHECK-DAG:                     Return [ [[Class]] ]
+  // CHECK-DAG:                     Return [<<Class:l\d+>>]
+  // CHECK-DAG:     <<Class>>       LoadClass
+  // Note: Verify backwards because there are two LoadClass instructions
 
   public static Class inlineMainClass() {
     return OtherDex.returnMainClass();
   }
 
   // CHECK-START: java.lang.Class Main.dontInlineOtherDexClassStaticCall() inliner (before)
-  // CHECK-DAG:     [[Invoke:l\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
+  // CHECK-DAG:     <<Invoke:l\d+>>  InvokeStaticOrDirect
+  // CHECK-DAG:                      Return [<<Invoke>>]
 
   // CHECK-START: java.lang.Class Main.dontInlineOtherDexClassStaticCall() inliner (after)
-  // CHECK-DAG:     [[Invoke:l\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
+  // CHECK-DAG:     <<Invoke:l\d+>>  InvokeStaticOrDirect
+  // CHECK-DAG:                      Return [<<Invoke>>]
 
   public static Class dontInlineOtherDexClassStaticCall() {
     return OtherDex.returnOtherDexClassStaticCall();
   }
 
   // CHECK-START: java.lang.Class Main.inlineOtherDexCallingMain() inliner (before)
-  // CHECK-DAG:     [[Invoke:l\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
+  // CHECK-DAG:     <<Invoke:l\d+>>  InvokeStaticOrDirect
+  // CHECK-DAG:                      Return [<<Invoke>>]
 
   // CHECK-START: java.lang.Class Main.inlineOtherDexCallingMain() inliner (after)
   // CHECK-NOT:                      InvokeStaticOrDirect
 
   // CHECK-START: java.lang.Class Main.inlineOtherDexCallingMain() inliner (after)
-  // CHECK-DAG:     [[Class:l\d+]]  LoadClass
-  // CHECK-DAG:                     Return [ [[Class]] ]
+  // CHECK-DAG:                     Return [<<Class:l\d+>>]
+  // CHECK-DAG:     <<Class>>       LoadClass
+  // Note: Verify backwards because there are two LoadClass instructions
 
   public static Class inlineOtherDexCallingMain() {
     return OtherDex.returnOtherDexCallingMain();
diff --git a/test/463-checker-boolean-simplifier/src/Main.java b/test/463-checker-boolean-simplifier/src/Main.java
index 3daf693..e237448 100644
--- a/test/463-checker-boolean-simplifier/src/Main.java
+++ b/test/463-checker-boolean-simplifier/src/Main.java
@@ -26,18 +26,24 @@
     }
   }
 
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
   /*
    * Elementary test negating a boolean. Verifies that blocks are merged and
    * empty branches removed.
    */
 
   // CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (before)
-  // CHECK-DAG:     [[Param:z\d+]]    ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
-  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
-  // CHECK-DAG:                       If [ [[Param]] ]
-  // CHECK-DAG:     [[Phi:i\d+]]      Phi [ [[Const1]] [[Const0]] ]
-  // CHECK-DAG:                       Return [ [[Phi]] ]
+  // CHECK-DAG:     <<Param:z\d+>>    ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  // CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+  // CHECK-DAG:                       If [<<Param>>]
+  // CHECK-DAG:     <<Phi:i\d+>>      Phi [<<Const1>>,<<Const0>>]
+  // CHECK-DAG:                       Return [<<Phi>>]
 
   // CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (before)
   // CHECK:                           Goto
@@ -46,10 +52,10 @@
   // CHECK-NOT:                       Goto
 
   // CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (after)
-  // CHECK-DAG:     [[Param:z\d+]]    ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
-  // CHECK-DAG:     [[NotParam:z\d+]] BooleanNot [ [[Param]] ]
-  // CHECK-DAG:                       Return [ [[NotParam]] ]
+  // CHECK-DAG:     <<Param:z\d+>>    ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  // CHECK-DAG:     <<NotParam:z\d+>> BooleanNot [<<Param>>]
+  // CHECK-DAG:                       Return [<<NotParam>>]
 
   // CHECK-START: boolean Main.BooleanNot(boolean) boolean_simplifier (after)
   // CHECK-NOT:                       If
@@ -69,22 +75,22 @@
    */
 
   // CHECK-START: boolean Main.GreaterThan(int, int) boolean_simplifier (before)
-  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
-  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
-  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
-  // CHECK-DAG:     [[Cond:z\d+]]     GreaterThan [ [[ParamX]] [[ParamY]] ]
-  // CHECK-DAG:                       If [ [[Cond]] ]
-  // CHECK-DAG:     [[Phi:i\d+]]      Phi [ [[Const0]] [[Const1]] ]
-  // CHECK-DAG:                       Return [ [[Phi]] ]
+  // CHECK-DAG:     <<ParamX:i\d+>>   ParameterValue
+  // CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  // CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+  // CHECK-DAG:     <<Cond:z\d+>>     GreaterThan [<<ParamX>>,<<ParamY>>]
+  // CHECK-DAG:                       If [<<Cond>>]
+  // CHECK-DAG:     <<Phi:i\d+>>      Phi [<<Const0>>,<<Const1>>]
+  // CHECK-DAG:                       Return [<<Phi>>]
 
   // CHECK-START: boolean Main.GreaterThan(int, int) boolean_simplifier (after)
-  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
-  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
-  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
-  // CHECK-DAG:     [[Cond:z\d+]]     GreaterThan [ [[ParamX]] [[ParamY]] ]
-  // CHECK-DAG:                       Return [ [[Cond]] ]
+  // CHECK-DAG:     <<ParamX:i\d+>>   ParameterValue
+  // CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  // CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+  // CHECK-DAG:     <<Cond:z\d+>>     GreaterThan [<<ParamX>>,<<ParamY>>]
+  // CHECK-DAG:                       Return [<<Cond>>]
 
   public static boolean GreaterThan(int x, int y) {
     return (x <= y) ? false : true;
@@ -96,22 +102,22 @@
    */
 
   // CHECK-START: boolean Main.LessThan(int, int) boolean_simplifier (before)
-  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
-  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
-  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
-  // CHECK-DAG:     [[Cond:z\d+]]     GreaterThanOrEqual [ [[ParamX]] [[ParamY]] ]
-  // CHECK-DAG:                       If [ [[Cond]] ]
-  // CHECK-DAG:     [[Phi:i\d+]]      Phi [ [[Const1]] [[Const0]] ]
-  // CHECK-DAG:                       Return [ [[Phi]] ]
+  // CHECK-DAG:     <<ParamX:i\d+>>   ParameterValue
+  // CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  // CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+  // CHECK-DAG:     <<Cond:z\d+>>     GreaterThanOrEqual [<<ParamX>>,<<ParamY>>]
+  // CHECK-DAG:                       If [<<Cond>>]
+  // CHECK-DAG:     <<Phi:i\d+>>      Phi [<<Const1>>,<<Const0>>]
+  // CHECK-DAG:                       Return [<<Phi>>]
 
   // CHECK-START: boolean Main.LessThan(int, int) boolean_simplifier (after)
-  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
-  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
-  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
-  // CHECK-DAG:     [[Cond:z\d+]]     LessThan [ [[ParamX]] [[ParamY]] ]
-  // CHECK-DAG:                       Return [ [[Cond]] ]
+  // CHECK-DAG:     <<ParamX:i\d+>>   ParameterValue
+  // CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  // CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+  // CHECK-DAG:     <<Cond:z\d+>>     LessThan [<<ParamX>>,<<ParamY>>]
+  // CHECK-DAG:                       Return [<<Cond>>]
 
   // CHECK-START: boolean Main.LessThan(int, int) boolean_simplifier (after)
   // CHECK-NOT:                       GreaterThanOrEqual
@@ -126,35 +132,65 @@
    */
 
   // CHECK-START: boolean Main.ValuesOrdered(int, int, int) boolean_simplifier (before)
-  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
-  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
-  // CHECK-DAG:     [[ParamZ:i\d+]]   ParameterValue
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
-  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
-  // CHECK-DAG:     [[CondXY:z\d+]]   GreaterThan [ [[ParamX]] [[ParamY]] ]
-  // CHECK-DAG:                       If [ [[CondXY]] ]
-  // CHECK-DAG:     [[CondYZ:z\d+]]   GreaterThan [ [[ParamY]] [[ParamZ]] ]
-  // CHECK-DAG:                       If [ [[CondYZ]] ]
-  // CHECK-DAG:     [[CondXYZ:z\d+]]  NotEqual [ [[PhiXY:i\d+]] [[PhiYZ:i\d+]] ]
-  // CHECK-DAG:                       If [ [[CondXYZ]] ]
-  // CHECK-DAG:                       Return [ [[PhiXYZ:i\d+]] ]
-  // CHECK-DAG:     [[PhiXY]]         Phi [ [[Const1]] [[Const0]] ]
-  // CHECK-DAG:     [[PhiYZ]]         Phi [ [[Const1]] [[Const0]] ]
-  // CHECK-DAG:     [[PhiXYZ]]        Phi [ [[Const1]] [[Const0]] ]
+  // CHECK-DAG:     <<ParamX:i\d+>>   ParameterValue
+  // CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
+  // CHECK-DAG:     <<ParamZ:i\d+>>   ParameterValue
+  // CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+  // CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+  // CHECK-DAG:     <<CondXY:z\d+>>   GreaterThan [<<ParamX>>,<<ParamY>>]
+  // CHECK-DAG:                       If [<<CondXY>>]
+  // CHECK-DAG:     <<CondYZ:z\d+>>   GreaterThan [<<ParamY>>,<<ParamZ>>]
+  // CHECK-DAG:                       If [<<CondYZ>>]
+  // CHECK-DAG:     <<CondXYZ:z\d+>>  NotEqual [<<PhiXY:i\d+>>,<<PhiYZ:i\d+>>]
+  // CHECK-DAG:                       If [<<CondXYZ>>]
+  // CHECK-DAG:                       Return [<<PhiXYZ:i\d+>>]
+  // CHECK-DAG:     <<PhiXY>>         Phi [<<Const1>>,<<Const0>>]
+  // CHECK-DAG:     <<PhiYZ>>         Phi [<<Const1>>,<<Const0>>]
+  // CHECK-DAG:     <<PhiXYZ>>        Phi [<<Const1>>,<<Const0>>]
 
   // CHECK-START: boolean Main.ValuesOrdered(int, int, int) boolean_simplifier (after)
-  // CHECK-DAG:     [[ParamX:i\d+]]   ParameterValue
-  // CHECK-DAG:     [[ParamY:i\d+]]   ParameterValue
-  // CHECK-DAG:     [[ParamZ:i\d+]]   ParameterValue
-  // CHECK-DAG:     [[CmpXY:z\d+]]    LessThanOrEqual [ [[ParamX]] [[ParamY]] ]
-  // CHECK-DAG:     [[CmpYZ:z\d+]]    LessThanOrEqual [ [[ParamY]] [[ParamZ]] ]
-  // CHECK-DAG:     [[CmpXYZ:z\d+]]   Equal [ [[CmpXY]] [[CmpYZ]] ]
-  // CHECK-DAG:                       Return [ [[CmpXYZ]] ]
+  // CHECK-DAG:     <<ParamX:i\d+>>   ParameterValue
+  // CHECK-DAG:     <<ParamY:i\d+>>   ParameterValue
+  // CHECK-DAG:     <<ParamZ:i\d+>>   ParameterValue
+  // CHECK-DAG:     <<CmpXY:z\d+>>    LessThanOrEqual [<<ParamX>>,<<ParamY>>]
+  // CHECK-DAG:     <<CmpYZ:z\d+>>    LessThanOrEqual [<<ParamY>>,<<ParamZ>>]
+  // CHECK-DAG:     <<CmpXYZ:z\d+>>   Equal [<<CmpXY>>,<<CmpYZ>>]
+  // CHECK-DAG:                       Return [<<CmpXYZ>>]
 
   public static boolean ValuesOrdered(int x, int y, int z) {
     return (x <= y) == (y <= z);
   }
 
+  // CHECK-START: int Main.NegatedCondition(boolean) boolean_simplifier (before)
+  // CHECK-DAG:     <<Param:z\d+>>    ParameterValue
+  // CHECK-DAG:     <<Const42:i\d+>>  IntConstant 42
+  // CHECK-DAG:     <<Const43:i\d+>>  IntConstant 43
+  // CHECK-DAG:     <<NotParam:z\d+>> BooleanNot [<<Param>>]
+  // CHECK-DAG:                       If [<<NotParam>>]
+  // CHECK-DAG:     <<Phi:i\d+>>      Phi [<<Const42>>,<<Const43>>]
+  // CHECK-DAG:                       Return [<<Phi>>]
+
+  // CHECK-START: int Main.NegatedCondition(boolean) boolean_simplifier (after)
+  // CHECK-DAG:     <<Param:z\d+>>    ParameterValue
+  // CHECK-DAG:     <<Const42:i\d+>>  IntConstant 42
+  // CHECK-DAG:     <<Const43:i\d+>>  IntConstant 43
+  // CHECK-DAG:                       If [<<Param>>]
+  // CHECK-DAG:     <<Phi:i\d+>>      Phi [<<Const42>>,<<Const43>>]
+  // CHECK-DAG:                       Return [<<Phi>>]
+
+  // Note: The fact that branches are swapped is verified by running the test.
+
+  // CHECK-START: int Main.NegatedCondition(boolean) boolean_simplifier (after)
+  // CHECK-NOT:                       BooleanNot
+
+  public static int NegatedCondition(boolean x) {
+    if (x != false) {
+      return 42;
+    } else {
+      return 43;
+    }
+  }
+
   public static void main(String[] args) {
     assertBoolEquals(false, BooleanNot(true));
     assertBoolEquals(true, BooleanNot(false));
@@ -171,5 +207,7 @@
     assertBoolEquals(true, ValuesOrdered(3, 3, 3));
     assertBoolEquals(true, ValuesOrdered(3, 3, 5));
     assertBoolEquals(false, ValuesOrdered(5, 5, 3));
+    assertIntEquals(42, NegatedCondition(true));
+    assertIntEquals(43, NegatedCondition(false));
   }
 }
diff --git a/test/464-checker-inline-sharpen-calls/src/Main.java b/test/464-checker-inline-sharpen-calls/src/Main.java
index 1b25b42..e451f70 100644
--- a/test/464-checker-inline-sharpen-calls/src/Main.java
+++ b/test/464-checker-inline-sharpen-calls/src/Main.java
@@ -20,7 +20,7 @@
   }
 
   // CHECK-START: void Main.inlineSharpenInvokeVirtual(Main) inliner (before)
-  // CHECK-DAG:     [[Invoke:v\d+]]  InvokeStaticOrDirect
+  // CHECK-DAG:     <<Invoke:v\d+>>  InvokeStaticOrDirect
   // CHECK-DAG:                      ReturnVoid
 
   // CHECK-START: void Main.inlineSharpenInvokeVirtual(Main) inliner (after)
@@ -31,15 +31,15 @@
   }
 
   // CHECK-START: int Main.inlineSharpenStringInvoke() inliner (before)
-  // CHECK-DAG:     [[Invoke:i\d+]]  InvokeStaticOrDirect
-  // CHECK-DAG:                      Return [ [[Invoke]] ]
+  // CHECK-DAG:     <<Invoke:i\d+>>  InvokeStaticOrDirect
+  // CHECK-DAG:                      Return [<<Invoke>>]
 
   // CHECK-START: int Main.inlineSharpenStringInvoke() inliner (after)
   // CHECK-NOT:                      InvokeStaticOrDirect
 
   // CHECK-START: int Main.inlineSharpenStringInvoke() inliner (after)
-  // CHECK-DAG:     [[Field:i\d+]]   InstanceFieldGet
-  // CHECK-DAG:                      Return [ [[Field]] ]
+  // CHECK-DAG:     <<Field:i\d+>>   InstanceFieldGet
+  // CHECK-DAG:                      Return [<<Field>>]
 
   public static int inlineSharpenStringInvoke() {
     return "Foo".length();
diff --git a/test/465-checker-clinit-gvn/src/Main.java b/test/465-checker-clinit-gvn/src/Main.java
index dcaef6f..ac2863c 100644
--- a/test/465-checker-clinit-gvn/src/Main.java
+++ b/test/465-checker-clinit-gvn/src/Main.java
@@ -27,14 +27,14 @@
 public final class Main {
 
   // CHECK-START: int Main.accessTwoStatics() GVN (before)
-  // CHECK-DAG:     [[Class1:l\d+]]  LoadClass
-  // CHECK-DAG:                      ClinitCheck [ [[Class1]] ]
-  // CHECK-DAG:     [[Class2:l\d+]]  LoadClass
-  // CHECK-DAG:                      ClinitCheck [ [[Class2]] ]
+  // CHECK-DAG:     <<Class1:l\d+>>  LoadClass
+  // CHECK-DAG:                      ClinitCheck [<<Class1>>]
+  // CHECK-DAG:     <<Class2:l\d+>>  LoadClass
+  // CHECK-DAG:                      ClinitCheck [<<Class2>>]
 
   // CHECK-START: int Main.accessTwoStatics() GVN (after)
-  // CHECK-DAG:     [[Class:l\d+]]   LoadClass
-  // CHECK-DAG:                      ClinitCheck [ [[Class]] ]
+  // CHECK-DAG:     <<Class:l\d+>>   LoadClass
+  // CHECK-DAG:                      ClinitCheck [<<Class>>]
   // CHECK-NOT:                      ClinitCheck
 
   public static int accessTwoStatics() {
@@ -42,14 +42,14 @@
   }
 
   // CHECK-START: int Main.accessTwoStaticsCallInBetween() GVN (before)
-  // CHECK-DAG:     [[Class1:l\d+]]  LoadClass
-  // CHECK-DAG:                      ClinitCheck [ [[Class1]] ]
-  // CHECK-DAG:     [[Class2:l\d+]]  LoadClass
-  // CHECK-DAG:                      ClinitCheck [ [[Class2]] ]
+  // CHECK-DAG:     <<Class1:l\d+>>  LoadClass
+  // CHECK-DAG:                      ClinitCheck [<<Class1>>]
+  // CHECK-DAG:     <<Class2:l\d+>>  LoadClass
+  // CHECK-DAG:                      ClinitCheck [<<Class2>>]
 
   // CHECK-START: int Main.accessTwoStaticsCallInBetween() GVN (after)
-  // CHECK-DAG:     [[Class:l\d+]]   LoadClass
-  // CHECK-DAG:                      ClinitCheck [ [[Class]] ]
+  // CHECK-DAG:     <<Class:l\d+>>   LoadClass
+  // CHECK-DAG:                      ClinitCheck [<<Class>>]
   // CHECK-NOT:                      ClinitCheck
 
   public static int accessTwoStaticsCallInBetween() {
diff --git a/test/466-get-live-vreg/get_live_vreg_jni.cc b/test/466-get-live-vreg/get_live_vreg_jni.cc
index 6715ba1..4724e8e 100644
--- a/test/466-get-live-vreg/get_live_vreg_jni.cc
+++ b/test/466-get-live-vreg/get_live_vreg_jni.cc
@@ -28,7 +28,7 @@
 class TestVisitor : public StackVisitor {
  public:
   TestVisitor(Thread* thread, Context* context) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, context) {}
+      : StackVisitor(thread, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
diff --git a/test/468-checker-bool-simplifier-regression/smali/TestCase.smali b/test/468-checker-bool-simplifier-regression/smali/TestCase.smali
index f36304d..33e6dc3 100644
--- a/test/468-checker-bool-simplifier-regression/smali/TestCase.smali
+++ b/test/468-checker-bool-simplifier-regression/smali/TestCase.smali
@@ -18,6 +18,19 @@
 
 .field public static value:Z
 
+# CHECK-START: boolean TestCase.testCase() boolean_simplifier (before)
+# CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
+# CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
+# CHECK-DAG:     <<Value:z\d+>>    StaticFieldGet
+# CHECK-DAG:                       If [<<Value>>]
+# CHECK-DAG:     <<Phi:i\d+>>      Phi [<<Const1>>,<<Const0>>]
+# CHECK-DAG:                       Return [<<Phi>>]
+
+# CHECK-START: boolean TestCase.testCase() boolean_simplifier (after)
+# CHECK-DAG:     <<Value:z\d+>>    StaticFieldGet
+# CHECK-DAG:     <<Not:z\d+>>      BooleanNot [<<Value>>]
+# CHECK-DAG:                       Return [<<Not>>]
+
 .method public static testCase()Z
     .registers 2
     sget-boolean v0, LTestCase;->value:Z
diff --git a/test/468-checker-bool-simplifier-regression/src/Main.java b/test/468-checker-bool-simplifier-regression/src/Main.java
index d45f3bf..8fe05c7 100644
--- a/test/468-checker-bool-simplifier-regression/src/Main.java
+++ b/test/468-checker-bool-simplifier-regression/src/Main.java
@@ -18,19 +18,6 @@
 
 public class Main {
 
-  // CHECK-START: boolean TestCase.testCase() boolean_simplifier (before)
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
-  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
-  // CHECK-DAG:     [[Value:z\d+]]    StaticFieldGet
-  // CHECK-DAG:                       If [ [[Value]] ]
-  // CHECK-DAG:     [[Phi:i\d+]]      Phi [ [[Const1]] [[Const0]] ]
-  // CHECK-DAG:                       Return [ [[Phi]] ]
-
-  // CHECK-START: boolean TestCase.testCase() boolean_simplifier (after)
-  // CHECK-DAG:     [[Value:z\d+]]    StaticFieldGet
-  // CHECK-DAG:     [[Not:z\d+]]      BooleanNot [ [[Value]] ]
-  // CHECK-DAG:                       Return [ [[Not]] ]
-
   public static boolean runTest(boolean input) throws Exception {
     Class<?> c = Class.forName("TestCase");
     Method m = c.getMethod("testCase");
diff --git a/test/472-unreachable-if-regression/expected.txt b/test/472-unreachable-if-regression/expected.txt
new file mode 100644
index 0000000..9fc8bea
--- /dev/null
+++ b/test/472-unreachable-if-regression/expected.txt
@@ -0,0 +1,3 @@
+Test started.
+Successfully called UnreachableIf().
+Successfully called UnreachablePackedSwitch().
diff --git a/test/472-unreachable-if-regression/info.txt b/test/472-unreachable-if-regression/info.txt
new file mode 100644
index 0000000..d8b5a45
--- /dev/null
+++ b/test/472-unreachable-if-regression/info.txt
@@ -0,0 +1,3 @@
+Regression test for crashes during compilation of methods which end
+with an if-cc or switch, i.e. there's a fall-through out of method code.
+Also tests a packed-switch with negative offset to its data.
diff --git a/test/472-unreachable-if-regression/smali/Test.smali b/test/472-unreachable-if-regression/smali/Test.smali
new file mode 100644
index 0000000..c7107d1
--- /dev/null
+++ b/test/472-unreachable-if-regression/smali/Test.smali
@@ -0,0 +1,46 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTest;
+
+.super Ljava/lang/Object;
+
+.method public static UnreachableIf()V
+    .registers 1
+    return-void
+    : unreachable
+    not-int v0, v0
+    if-lt v0, v0, :unreachable
+    # fall-through out of code item
+.end method
+
+.method public static UnreachablePackedSwitch()V
+    .registers 1
+    return-void
+    : unreachable
+    goto :pswitch_2
+    :pswitch_data
+    .packed-switch 1
+        :pswitch_1
+        :pswitch_2
+        :pswitch_1
+        :pswitch_2
+    .end packed-switch
+    :pswitch_1
+    not-int v0, v0
+    :pswitch_2
+    packed-switch v0, :pswitch_data
+    # fall-through out of code item
+.end method
diff --git a/test/472-unreachable-if-regression/src/Main.java b/test/472-unreachable-if-regression/src/Main.java
new file mode 100644
index 0000000..c9f9511
--- /dev/null
+++ b/test/472-unreachable-if-regression/src/Main.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String args[]) throws Exception {
+    System.out.println("Test started.");
+    Class<?> c = Class.forName("Test");
+
+    Method unreachableIf = c.getMethod("UnreachableIf", (Class[]) null);
+    unreachableIf.invoke(null, (Object[]) null);
+    System.out.println("Successfully called UnreachableIf().");
+
+    Method unreachablePackedSwitch = c.getMethod("UnreachablePackedSwitch", (Class[]) null);
+    unreachablePackedSwitch.invoke(null, (Object[]) null);
+    System.out.println("Successfully called UnreachablePackedSwitch().");
+  }
+
+}
diff --git a/test/474-checker-boolean-input/src/Main.java b/test/474-checker-boolean-input/src/Main.java
index 9151986..490f7f9 100644
--- a/test/474-checker-boolean-input/src/Main.java
+++ b/test/474-checker-boolean-input/src/Main.java
@@ -28,8 +28,8 @@
    */
 
   // CHECK-START: boolean Main.TestPhiAsBoolean(int) boolean_simplifier (after)
-  // CHECK-DAG:     [[Phi:i\d+]]     Phi
-  // CHECK-DAG:                      BooleanNot [ [[Phi]] ]
+  // CHECK-DAG:     <<Phi:i\d+>>     Phi
+  // CHECK-DAG:                      BooleanNot [<<Phi>>]
 
   public static boolean f1;
   public static boolean f2;
@@ -48,8 +48,8 @@
    */
 
   // CHECK-START: boolean Main.TestAndAsBoolean(boolean, boolean) boolean_simplifier (after)
-  // CHECK-DAG:     [[And:i\d+]]     And
-  // CHECK-DAG:                      BooleanNot [ [[And]] ]
+  // CHECK-DAG:     <<And:i\d+>>     And
+  // CHECK-DAG:                      BooleanNot [<<And>>]
 
   public static boolean InlineAnd(boolean x, boolean y) {
     return x & y;
@@ -65,8 +65,8 @@
    */
 
   // CHECK-START: boolean Main.TestOrAsBoolean(boolean, boolean) boolean_simplifier (after)
-  // CHECK-DAG:     [[Or:i\d+]]      Or
-  // CHECK-DAG:                      BooleanNot [ [[Or]] ]
+  // CHECK-DAG:     <<Or:i\d+>>      Or
+  // CHECK-DAG:                      BooleanNot [<<Or>>]
 
   public static boolean InlineOr(boolean x, boolean y) {
     return x | y;
@@ -82,8 +82,8 @@
    */
 
   // CHECK-START: boolean Main.TestXorAsBoolean(boolean, boolean) boolean_simplifier (after)
-  // CHECK-DAG:     [[Xor:i\d+]]     Xor
-  // CHECK-DAG:                      BooleanNot [ [[Xor]] ]
+  // CHECK-DAG:     <<Xor:i\d+>>     Xor
+  // CHECK-DAG:                      BooleanNot [<<Xor>>]
 
   public static boolean InlineXor(boolean x, boolean y) {
     return x ^ y;
diff --git a/test/476-checker-ctor-memory-barrier/src/Main.java b/test/476-checker-ctor-memory-barrier/src/Main.java
index 10aa2ab..769ae20 100644
--- a/test/476-checker-ctor-memory-barrier/src/Main.java
+++ b/test/476-checker-ctor-memory-barrier/src/Main.java
@@ -17,7 +17,7 @@
 
 class ClassWithoutFinals {
   // CHECK-START: void ClassWithoutFinals.<init>() register (after)
-  // CHECK-NOT: MemoryBarrier {{StoreStore}}
+  // CHECK-NOT: MemoryBarrier kind:StoreStore
   public ClassWithoutFinals() {}
 }
 
@@ -26,7 +26,7 @@
   public ClassWithFinals obj;
 
   // CHECK-START: void ClassWithFinals.<init>(boolean) register (after)
-  // CHECK:     MemoryBarrier {{StoreStore}}
+  // CHECK:     MemoryBarrier kind:StoreStore
   // CHECK-NOT: {{.*}}
   // CHECK:     ReturnVoid
   public ClassWithFinals(boolean cond) {
@@ -38,7 +38,7 @@
   }
 
   // CHECK-START: void ClassWithFinals.<init>() register (after)
-  // CHECK:     MemoryBarrier {{StoreStore}}
+  // CHECK:     MemoryBarrier kind:StoreStore
   // CHECK-NOT: {{.*}}
   // CHECK:     ReturnVoid
   public ClassWithFinals() {
@@ -46,8 +46,8 @@
   }
 
   // CHECK-START: void ClassWithFinals.<init>(int) register (after)
-  // CHECK:     MemoryBarrier {{StoreStore}}
-  // CHECK:     MemoryBarrier {{StoreStore}}
+  // CHECK:     MemoryBarrier kind:StoreStore
+  // CHECK:     MemoryBarrier kind:StoreStore
   // CHECK-NOT: {{.*}}
   // CHECK:     ReturnVoid
   public ClassWithFinals(int x) {
@@ -61,7 +61,7 @@
 
 class InheritFromClassWithFinals extends ClassWithFinals {
   // CHECK-START: void InheritFromClassWithFinals.<init>() register (after)
-  // CHECK:     MemoryBarrier {{StoreStore}}
+  // CHECK:     MemoryBarrier kind:StoreStore
   // CHECK-NOT: {{.*}}
   // CHECK:     ReturnVoid
 
@@ -75,7 +75,7 @@
   // CHECK:     InvokeStaticOrDirect
 
   // CHECK-START: void InheritFromClassWithFinals.<init>(boolean) register (after)
-  // CHECK-NOT: MemoryBarrier {{StoreStore}}
+  // CHECK-NOT: MemoryBarrier kind:StoreStore
   public InheritFromClassWithFinals(boolean cond) {
     super(cond);
     // should not inline the super constructor
@@ -86,8 +86,8 @@
   final int y;
 
   // CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>() register (after)
-  // CHECK:     MemoryBarrier {{StoreStore}}
-  // CHECK:     MemoryBarrier {{StoreStore}}
+  // CHECK:     MemoryBarrier kind:StoreStore
+  // CHECK:     MemoryBarrier kind:StoreStore
   // CHECK-NOT: {{.*}}
   // CHECK:     ReturnVoid
 
@@ -100,7 +100,7 @@
 
   // CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(boolean) register (after)
   // CHECK:     InvokeStaticOrDirect
-  // CHECK:     MemoryBarrier {{StoreStore}}
+  // CHECK:     MemoryBarrier kind:StoreStore
   // CHECK-NOT: {{.*}}
   // CHECK:     ReturnVoid
   public HaveFinalsAndInheritFromClassWithFinals(boolean cond) {
@@ -116,13 +116,13 @@
   // CHECK:     InvokeStaticOrDirect
 
   // CHECK-START: ClassWithFinals Main.noInlineNoConstructorBarrier() register (after)
-  // CHECK-NOT: MemoryBarrier {{StoreStore}}
+  // CHECK-NOT: MemoryBarrier kind:StoreStore
   public static ClassWithFinals noInlineNoConstructorBarrier() {
     return new ClassWithFinals(false);
   }
 
   // CHECK-START: ClassWithFinals Main.inlineConstructorBarrier() register (after)
-  // CHECK:     MemoryBarrier {{StoreStore}}
+  // CHECK:     MemoryBarrier kind:StoreStore
   // CHECK-NOT: {{.*}}
   // CHECK:     Return
 
@@ -133,7 +133,7 @@
   }
 
   // CHECK-START: InheritFromClassWithFinals Main.doubleInlineConstructorBarrier() register (after)
-  // CHECK:     MemoryBarrier {{StoreStore}}
+  // CHECK:     MemoryBarrier kind:StoreStore
   // CHECK-NOT: {{.*}}
   // CHECK:     Return
 
diff --git a/test/476-clinit-check-inlining-static-invoke/expected.txt b/test/476-clinit-check-inlining-static-invoke/expected.txt
new file mode 100644
index 0000000..c55bf72
--- /dev/null
+++ b/test/476-clinit-check-inlining-static-invoke/expected.txt
@@ -0,0 +1,2 @@
+checkClinitCheckBeforeStaticMethodInvoke START
+checkClinitCheckBeforeStaticMethodInvoke PASSED
diff --git a/test/476-clinit-check-inlining-static-invoke/info.txt b/test/476-clinit-check-inlining-static-invoke/info.txt
new file mode 100644
index 0000000..1a439fc
--- /dev/null
+++ b/test/476-clinit-check-inlining-static-invoke/info.txt
@@ -0,0 +1,3 @@
+Regression test for a bug where an inlined call to a static method
+failed to emit a prior initialization check of the method's declaring
+class.
diff --git a/test/476-clinit-check-inlining-static-invoke/src/Main.java b/test/476-clinit-check-inlining-static-invoke/src/Main.java
new file mode 100644
index 0000000..a7d3bcd
--- /dev/null
+++ b/test/476-clinit-check-inlining-static-invoke/src/Main.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String[] args) {
+    checkClinitCheckBeforeStaticMethodInvoke();
+  }
+
+  static void checkClinitCheckBeforeStaticMethodInvoke() {
+    System.out.println("checkClinitCheckBeforeStaticMethodInvoke START");
+
+    // Call static method to cause implicit class initialization, even
+    // if it is inlined.
+    ClassWithClinit.$opt$inline$StaticMethod();
+    if (!classWithClinitInitialized) {
+      System.out.println("checkClinitCheckBeforeStaticMethodInvoke FAILED");
+      return;
+    }
+
+    System.out.println("checkClinitCheckBeforeStaticMethodInvoke PASSED");
+  }
+
+  static class ClassWithClinit {
+    static {
+      Main.classWithClinitInitialized = true;
+    }
+
+    static void $opt$inline$StaticMethod() {
+    }
+  }
+
+  static boolean classWithClinitInitialized = false;
+}
diff --git a/test/478-checker-clinit-check-pruning/expected.txt b/test/478-checker-clinit-check-pruning/expected.txt
new file mode 100644
index 0000000..387e1a7
--- /dev/null
+++ b/test/478-checker-clinit-check-pruning/expected.txt
@@ -0,0 +1,6 @@
+Main$ClassWithClinit1's static initializer
+Main$ClassWithClinit2's static initializer
+Main$ClassWithClinit3's static initializer
+Main$ClassWithClinit4's static initializer
+Main$ClassWithClinit5's static initializer
+Main$ClassWithClinit6's static initializer
diff --git a/test/478-checker-clinit-check-pruning/info.txt b/test/478-checker-clinit-check-pruning/info.txt
new file mode 100644
index 0000000..deb64de
--- /dev/null
+++ b/test/478-checker-clinit-check-pruning/info.txt
@@ -0,0 +1,3 @@
+Test ensuring class initializations checks (and load class instructions)
+added by the graph builder during the construction of a static invoke
+are properly pruned.
diff --git a/test/478-checker-clinit-check-pruning/src/Main.java b/test/478-checker-clinit-check-pruning/src/Main.java
new file mode 100644
index 0000000..61199a7
--- /dev/null
+++ b/test/478-checker-clinit-check-pruning/src/Main.java
@@ -0,0 +1,276 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /*
+   * Ensure an inlined static invoke explicitly triggers the
+   * initialization check of the called method's declaring class, and
+   * that the corresponding load class instruction does not get
+   * removed before register allocation & code generation.
+   */
+
+  // CHECK-START: void Main.invokeStaticInlined() builder (after)
+  // CHECK-DAG:     <<LoadClass:l\d+>>    LoadClass
+  // CHECK-DAG:     <<ClinitCheck:l\d+>>  ClinitCheck [<<LoadClass>>]
+  // CHECK-DAG:                           InvokeStaticOrDirect [<<ClinitCheck>>]
+
+  // CHECK-START: void Main.invokeStaticInlined() inliner (after)
+  // CHECK-DAG:     <<LoadClass:l\d+>>    LoadClass
+  // CHECK-DAG:     <<ClinitCheck:l\d+>>  ClinitCheck [<<LoadClass>>]
+
+  // CHECK-START: void Main.invokeStaticInlined() inliner (after)
+  // CHECK-NOT:                           InvokeStaticOrDirect
+
+  // The following checks ensure the clinit check instruction added by
+  // the builder is pruned by the PrepareForRegisterAllocation, while
+  // the load class instruction is preserved.  As the control flow
+  // graph is not dumped after (nor before) this step, we check the
+  // CFG as it is before the next pass (liveness analysis) instead.
+
+  // CHECK-START: void Main.invokeStaticInlined() liveness (before)
+  // CHECK-DAG:                           LoadClass
+
+  // CHECK-START: void Main.invokeStaticInlined() liveness (before)
+  // CHECK-NOT:                           ClinitCheck
+  // CHECK-NOT:                           InvokeStaticOrDirect
+
+  static void invokeStaticInlined() {
+    ClassWithClinit1.$opt$inline$StaticMethod();
+  }
+
+  static class ClassWithClinit1 {
+    static {
+      System.out.println("Main$ClassWithClinit1's static initializer");
+    }
+
+    static void $opt$inline$StaticMethod() {
+    }
+  }
+
+  /*
+   * Ensure a non-inlined static invoke eventually has an implicit
+   * initialization check of the called method's declaring class.
+   */
+
+  // CHECK-START: void Main.invokeStaticNotInlined() builder (after)
+  // CHECK-DAG:     <<LoadClass:l\d+>>    LoadClass
+  // CHECK-DAG:     <<ClinitCheck:l\d+>>  ClinitCheck [<<LoadClass>>]
+  // CHECK-DAG:                           InvokeStaticOrDirect [<<ClinitCheck>>]
+
+  // CHECK-START: void Main.invokeStaticNotInlined() inliner (after)
+  // CHECK-DAG:     <<LoadClass:l\d+>>    LoadClass
+  // CHECK-DAG:     <<ClinitCheck:l\d+>>  ClinitCheck [<<LoadClass>>]
+  // CHECK-DAG:                           InvokeStaticOrDirect [<<ClinitCheck>>]
+
+  // The following checks ensure the clinit check and load class
+  // instructions added by the builder are pruned by the
+  // PrepareForRegisterAllocation.  As the control flow graph is not
+  // dumped after (nor before) this step, we check the CFG as it is
+  // before the next pass (liveness analysis) instead.
+
+  // CHECK-START: void Main.invokeStaticNotInlined() liveness (before)
+  // CHECK-DAG:                           InvokeStaticOrDirect
+
+  // CHECK-START: void Main.invokeStaticNotInlined() liveness (before)
+  // CHECK-NOT:                           LoadClass
+  // CHECK-NOT:                           ClinitCheck
+
+  static void invokeStaticNotInlined() {
+    ClassWithClinit2.staticMethod();
+  }
+
+  static class ClassWithClinit2 {
+    static {
+      System.out.println("Main$ClassWithClinit2's static initializer");
+    }
+
+    static boolean doThrow = false;
+
+    static void staticMethod() {
+      if (doThrow) {
+        // Try defeating inlining.
+        throw new Error();
+      }
+    }
+  }
+
+  /*
+   * Ensure an inlined call to a static method whose declaring class
+   * is statically known to have been initialized does not require an
+   * explicit clinit check.
+   */
+
+  // CHECK-START: void Main$ClassWithClinit3.invokeStaticInlined() builder (after)
+  // CHECK-DAG:                           InvokeStaticOrDirect
+
+  // CHECK-START: void Main$ClassWithClinit3.invokeStaticInlined() builder (after)
+  // CHECK-NOT:                           LoadClass
+  // CHECK-NOT:                           ClinitCheck
+
+  // CHECK-START: void Main$ClassWithClinit3.invokeStaticInlined() inliner (after)
+  // CHECK-NOT:                           LoadClass
+  // CHECK-NOT:                           ClinitCheck
+  // CHECK-NOT:                           InvokeStaticOrDirect
+
+  static class ClassWithClinit3 {
+    static void invokeStaticInlined() {
+      // The invocation of invokeStaticInlined triggers the
+      // initialization of ClassWithClinit3, meaning that the
+      // hereinbelow call to $opt$inline$StaticMethod does not need a
+      // clinit check.
+      $opt$inline$StaticMethod();
+    }
+
+    static {
+      System.out.println("Main$ClassWithClinit3's static initializer");
+    }
+
+    static void $opt$inline$StaticMethod() {
+    }
+  }
+
+  /*
+   * Ensure an non-inlined call to a static method whose declaring
+   * class is statically known to have been initialized does not
+   * require an explicit clinit check.
+   */
+
+  // CHECK-START: void Main$ClassWithClinit4.invokeStaticNotInlined() builder (after)
+  // CHECK-DAG:                           InvokeStaticOrDirect
+
+  // CHECK-START: void Main$ClassWithClinit4.invokeStaticNotInlined() builder (after)
+  // CHECK-NOT:                           LoadClass
+  // CHECK-NOT:                           ClinitCheck
+
+  // CHECK-START: void Main$ClassWithClinit4.invokeStaticNotInlined() inliner (after)
+  // CHECK-DAG:                           InvokeStaticOrDirect
+
+  // CHECK-START: void Main$ClassWithClinit4.invokeStaticNotInlined() inliner (after)
+  // CHECK-NOT:                           LoadClass
+  // CHECK-NOT:                           ClinitCheck
+
+  static class ClassWithClinit4 {
+    static void invokeStaticNotInlined() {
+      // The invocation of invokeStaticNotInlined triggers the
+      // initialization of ClassWithClinit4, meaning that the
+      // hereinbelow call to staticMethod does not need a clinit
+      // check.
+      staticMethod();
+    }
+
+    static {
+      System.out.println("Main$ClassWithClinit4's static initializer");
+    }
+
+    static boolean doThrow = false;
+
+    static void staticMethod() {
+      if (doThrow) {
+        // Try defeating inlining.
+        throw new Error();
+      }
+    }
+  }
+
+  /*
+   * Ensure an inlined call to a static method whose declaring class
+   * is a super class of the caller's class does not require an
+   * explicit clinit check.
+   */
+
+  // CHECK-START: void Main$SubClassOfClassWithClinit5.invokeStaticInlined() builder (after)
+  // CHECK-DAG:                           InvokeStaticOrDirect
+
+  // CHECK-START: void Main$SubClassOfClassWithClinit5.invokeStaticInlined() builder (after)
+  // CHECK-NOT:                           LoadClass
+  // CHECK-NOT:                           ClinitCheck
+
+  // CHECK-START: void Main$SubClassOfClassWithClinit5.invokeStaticInlined() inliner (after)
+  // CHECK-NOT:                           LoadClass
+  // CHECK-NOT:                           ClinitCheck
+  // CHECK-NOT:                           InvokeStaticOrDirect
+
+  static class ClassWithClinit5 {
+    static void $opt$inline$StaticMethod() {
+    }
+
+    static {
+      System.out.println("Main$ClassWithClinit5's static initializer");
+    }
+  }
+
+  static class SubClassOfClassWithClinit5 extends ClassWithClinit5 {
+    static void invokeStaticInlined() {
+      ClassWithClinit5.$opt$inline$StaticMethod();
+    }
+  }
+
+  /*
+   * Ensure an non-inlined call to a static method whose declaring
+   * class is a super class of the caller's class does not require an
+   * explicit clinit check.
+   */
+
+  // CHECK-START: void Main$SubClassOfClassWithClinit6.invokeStaticNotInlined() builder (after)
+  // CHECK-DAG:                           InvokeStaticOrDirect
+
+  // CHECK-START: void Main$SubClassOfClassWithClinit6.invokeStaticNotInlined() builder (after)
+  // CHECK-NOT:                           LoadClass
+  // CHECK-NOT:                           ClinitCheck
+
+  // CHECK-START: void Main$SubClassOfClassWithClinit6.invokeStaticNotInlined() inliner (after)
+  // CHECK-DAG:                           InvokeStaticOrDirect
+
+  // CHECK-START: void Main$SubClassOfClassWithClinit6.invokeStaticNotInlined() inliner (after)
+  // CHECK-NOT:                           LoadClass
+  // CHECK-NOT:                           ClinitCheck
+
+  static class ClassWithClinit6 {
+    static boolean doThrow = false;
+
+    static void staticMethod() {
+      if (doThrow) {
+        // Try defeating inlining.
+        throw new Error();
+      }
+    }
+
+    static {
+      System.out.println("Main$ClassWithClinit6's static initializer");
+    }
+  }
+
+  static class SubClassOfClassWithClinit6 extends ClassWithClinit6 {
+    static void invokeStaticNotInlined() {
+      ClassWithClinit6.staticMethod();
+    }
+  }
+
+  // TODO: Add a test for the case of a static method whose declaring
+  // class type index is not available (i.e. when `storage_index`
+  // equals `DexFile::kDexNoIndex` in
+  // art::HGraphBuilder::BuildInvoke).
+
+  public static void main(String[] args) {
+    invokeStaticInlined();
+    invokeStaticNotInlined();
+    ClassWithClinit3.invokeStaticInlined();
+    ClassWithClinit4.invokeStaticNotInlined();
+    SubClassOfClassWithClinit5.invokeStaticInlined();
+    SubClassOfClassWithClinit6.invokeStaticNotInlined();
+  }
+}
diff --git a/test/480-checker-dead-blocks/expected.txt b/test/480-checker-dead-blocks/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/480-checker-dead-blocks/expected.txt
diff --git a/test/480-checker-dead-blocks/info.txt b/test/480-checker-dead-blocks/info.txt
new file mode 100644
index 0000000..5aeafac
--- /dev/null
+++ b/test/480-checker-dead-blocks/info.txt
@@ -0,0 +1 @@
+Test removal of dead blocks.
\ No newline at end of file
diff --git a/test/480-checker-dead-blocks/src/Main.java b/test/480-checker-dead-blocks/src/Main.java
new file mode 100644
index 0000000..b76755e
--- /dev/null
+++ b/test/480-checker-dead-blocks/src/Main.java
@@ -0,0 +1,194 @@
+/*
+* Copyright (C) 2015 The Android Open Source Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+public class Main {
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static boolean inlineTrue() {
+    return true;
+  }
+
+  public static boolean inlineFalse() {
+    return false;
+  }
+
+  // CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination_final (before)
+  // CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
+  // CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
+  // CHECK-DAG:                      If
+  // CHECK-DAG:     <<Add:i\d+>>     Add [<<ArgX>>,<<ArgY>>]
+  // CHECK-DAG:     <<Sub:i\d+>>     Sub [<<ArgX>>,<<ArgY>>]
+  // CHECK-DAG:     <<Phi:i\d+>>     Phi [<<Add>>,<<Sub>>]
+  // CHECK-DAG:                      Return [<<Phi>>]
+
+  // CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination_final (after)
+  // CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
+  // CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
+  // CHECK-DAG:     <<Add:i\d+>>     Add [<<ArgX>>,<<ArgY>>]
+  // CHECK-DAG:                      Return [<<Add>>]
+
+  // CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination_final (after)
+  // CHECK-NOT:                      If
+  // CHECK-NOT:                      Sub
+  // CHECK-NOT:                      Phi
+
+  public static int testTrueBranch(int x, int y) {
+    int z;
+    if (inlineTrue()) {
+      z = x + y;
+    } else {
+      z = x - y;
+    }
+    return z;
+  }
+
+  // CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination_final (before)
+  // CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
+  // CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
+  // CHECK-DAG:                      If
+  // CHECK-DAG:     <<Add:i\d+>>     Add [<<ArgX>>,<<ArgY>>]
+  // CHECK-DAG:     <<Sub:i\d+>>     Sub [<<ArgX>>,<<ArgY>>]
+  // CHECK-DAG:     <<Phi:i\d+>>     Phi [<<Add>>,<<Sub>>]
+  // CHECK-DAG:                      Return [<<Phi>>]
+
+  // CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination_final (after)
+  // CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
+  // CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
+  // CHECK-DAG:     <<Sub:i\d+>>     Sub [<<ArgX>>,<<ArgY>>]
+  // CHECK-DAG:                      Return [<<Sub>>]
+
+  // CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination_final (after)
+  // CHECK-NOT:                      If
+  // CHECK-NOT:                      Add
+  // CHECK-NOT:                      Phi
+
+  public static int testFalseBranch(int x, int y) {
+    int z;
+    if (inlineFalse()) {
+      z = x + y;
+    } else {
+      z = x - y;
+    }
+    return z;
+  }
+
+  // CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination_final (before)
+  // CHECK:                          Mul
+
+  // CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination_final (after)
+  // CHECK-NOT:                      Mul
+
+  public static int testRemoveLoop(int x) {
+    if (inlineFalse()) {
+      for (int i = 0; i < x; ++i) {
+        x *= x;
+      }
+    }
+    return x;
+  }
+
+  // CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination_final (before)
+  // CHECK-DAG:                      Return
+  // CHECK-DAG:                      Exit
+
+  // CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination_final (after)
+  // CHECK-NOT:                      Return
+  // CHECK-NOT:                      Exit
+
+  public static int testInfiniteLoop(int x) {
+    while (inlineTrue()) {
+      x++;
+    }
+    return x;
+  }
+
+  // CHECK-START: int Main.testDeadLoop(int) dead_code_elimination_final (before)
+  // CHECK-DAG:                      If
+  // CHECK-DAG:                      Add
+
+  // CHECK-START: int Main.testDeadLoop(int) dead_code_elimination_final (after)
+  // CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
+  // CHECK-DAG:                      Return [<<Arg>>]
+
+  // CHECK-START: int Main.testDeadLoop(int) dead_code_elimination_final (after)
+  // CHECK-NOT:                      If
+  // CHECK-NOT:                      Add
+
+  public static int testDeadLoop(int x) {
+    while (inlineFalse()) {
+      x++;
+    }
+    return x;
+  }
+
+  // CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination_final (before)
+  // CHECK-DAG:                      If
+  // CHECK-DAG:                      If
+  // CHECK-DAG:                      Add
+
+  // CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination_final (after)
+  // CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
+  // CHECK-DAG:                      Return [<<Arg>>]
+
+  // CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination_final (after)
+  // CHECK-NOT:                      If
+  // CHECK-NOT:                      Add
+
+  public static int testUpdateLoopInformation(int x) {
+    // Use of Or in the condition generates a dead loop where not all of its
+    // blocks are removed. This forces DCE to update their loop information.
+    while (inlineFalse() || !inlineTrue()) {
+      x++;
+    }
+    return x;
+  }
+
+  // CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination_final (before)
+  // CHECK:                          SuspendCheck
+  // CHECK:                          SuspendCheck
+  // CHECK:                          SuspendCheck
+  // CHECK-NOT:                      SuspendCheck
+
+  // CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination_final (after)
+  // CHECK:                          SuspendCheck
+  // CHECK:                          SuspendCheck
+  // CHECK-NOT:                      SuspendCheck
+
+  public static int testRemoveSuspendCheck(int x, int y) {
+    // Inner loop will leave behind the header with its SuspendCheck. DCE must
+    // remove it, otherwise the outer loop would end up with two.
+    while (y > 0) {
+      while (inlineFalse() || !inlineTrue()) {
+        x++;
+      }
+      y--;
+    }
+    return x;
+  }
+
+  public static void main(String[] args) {
+    assertIntEquals(7, testTrueBranch(4, 3));
+    assertIntEquals(1, testFalseBranch(4, 3));
+    assertIntEquals(42, testRemoveLoop(42));
+    assertIntEquals(23, testUpdateLoopInformation(23));
+    assertIntEquals(12, testRemoveSuspendCheck(12, 5));
+  }
+}
diff --git a/test/481-regression-phi-cond/expected.txt b/test/481-regression-phi-cond/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/481-regression-phi-cond/expected.txt
diff --git a/test/481-regression-phi-cond/info.txt b/test/481-regression-phi-cond/info.txt
new file mode 100644
index 0000000..7ac3bb6
--- /dev/null
+++ b/test/481-regression-phi-cond/info.txt
@@ -0,0 +1,2 @@
+Tests a regression in which simplification of a boolean selection could attempt
+to remove a Phi from the wrong instruction list.
diff --git a/test/481-regression-phi-cond/src/Main.java b/test/481-regression-phi-cond/src/Main.java
new file mode 100644
index 0000000..bad9669
--- /dev/null
+++ b/test/481-regression-phi-cond/src/Main.java
@@ -0,0 +1,51 @@
+/*
+* Copyright (C) 2015 The Android Open Source Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+
+public class Main {
+  public static void assertBooleanEquals(boolean expected, boolean result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static boolean inlinePhi(boolean x, boolean y, boolean z) {
+    boolean phi;
+    if (z) {
+      phi = x;
+    } else {
+      phi = y;
+    }
+    return phi;
+  }
+
+  public static boolean dontUseParam(boolean x) {
+    return false;
+  }
+
+  public static boolean testCase(boolean x, boolean y, boolean z) {
+    // First create a Phi(x, y).
+    boolean phi = inlinePhi(x, y, z);
+    // Now use the phi as a condition which the boolean simplifier will try to
+    // optimize out. If the result is not used, the algorithm will try to remove
+    // the original condition (phi) and crash.
+    return dontUseParam(phi == false ? false : true);
+  }
+
+  public static void main(String[] args) {
+    assertBooleanEquals(false, testCase(true, true, true));
+  }
+}
diff --git a/test/482-checker-loop-back-edge-use/expected.txt b/test/482-checker-loop-back-edge-use/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/482-checker-loop-back-edge-use/expected.txt
diff --git a/test/482-checker-loop-back-edge-use/info.txt b/test/482-checker-loop-back-edge-use/info.txt
new file mode 100644
index 0000000..f7fdeff
--- /dev/null
+++ b/test/482-checker-loop-back-edge-use/info.txt
@@ -0,0 +1,2 @@
+Tests the register allocator's optimization of adding synthesized uses
+at back edges.
diff --git a/test/482-checker-loop-back-edge-use/src/Main.java b/test/482-checker-loop-back-edge-use/src/Main.java
new file mode 100644
index 0000000..0ed9267
--- /dev/null
+++ b/test/482-checker-loop-back-edge-use/src/Main.java
@@ -0,0 +1,131 @@
+/*
+* Copyright (C) 2015 The Android Open Source Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+
+public class Main {
+
+  // CHECK-START: void Main.loop1(boolean) liveness (after)
+  // CHECK:         ParameterValue  liveness:2  ranges:{[2,22)} uses:[17,22]
+  // CHECK:         Goto            liveness:20
+  public static void loop1(boolean incoming) {
+    while (incoming) {}
+  }
+
+  // CHECK-START: void Main.loop2(boolean) liveness (after)
+  // CHECK:         ParameterValue  liveness:2  ranges:{[2,42)} uses:[33,38,42]
+  // CHECK:         Goto            liveness:36
+  // CHECK:         Goto            liveness:40
+  public static void loop2(boolean incoming) {
+    while (true) {
+      System.out.println("foo");
+      while (incoming) {}
+    }
+  }
+
+  // CHECK-START: void Main.loop3(boolean) liveness (after)
+  // CHECK:         ParameterValue  liveness:2  ranges:{[2,60)} uses:[56,60]
+  // CHECK:         Goto            liveness:58
+
+  // CHECK-START: void Main.loop3(boolean) liveness (after)
+  // CHECK-NOT:     Goto liveness:54
+  public static void loop3(boolean incoming) {
+    // 'incoming' only needs a use at the outer loop's back edge.
+    while (System.currentTimeMillis() != 42) {
+      while (Runtime.getRuntime() != null) {}
+      System.out.println(incoming);
+    }
+  }
+
+  // CHECK-START: void Main.loop4(boolean) liveness (after)
+  // CHECK:         ParameterValue  liveness:2  ranges:{[2,22)} uses:[22]
+
+  // CHECK-START: void Main.loop4(boolean) liveness (after)
+  // CHECK-NOT:     Goto            liveness:20
+  public static void loop4(boolean incoming) {
+    // 'incoming' has no loop use, so should not have back edge uses.
+    System.out.println(incoming);
+    while (System.currentTimeMillis() != 42) {
+      while (Runtime.getRuntime() != null) {}
+    }
+  }
+
+  // CHECK-START: void Main.loop5(boolean) liveness (after)
+  // CHECK:         ParameterValue  liveness:2  ranges:{[2,50)} uses:[33,42,46,50]
+  // CHECK:         Goto            liveness:44
+  // CHECK:         Goto            liveness:48
+  public static void loop5(boolean incoming) {
+    // 'incoming' must have a use at both back edges.
+    while (Runtime.getRuntime() != null) {
+      while (incoming) {
+        System.out.println(incoming);
+      }
+    }
+  }
+
+  // CHECK-START: void Main.loop6(boolean) liveness (after)
+  // CHECK          ParameterValue  liveness:2  ranges:{[2,46)} uses:[24,46]
+  // CHECK:         Goto            liveness:44
+
+  // CHECK-START: void Main.loop6(boolean) liveness (after)
+  // CHECK-NOT:     Goto            liveness:22
+  public static void loop6(boolean incoming) {
+    // 'incoming' must have a use only at the first loop's back edge.
+    while (true) {
+      System.out.println(incoming);
+      while (Runtime.getRuntime() != null) {}
+    }
+  }
+
+  // CHECK-START: void Main.loop7(boolean) liveness (after)
+  // CHECK:         ParameterValue  liveness:2  ranges:{[2,50)} uses:[32,41,46,50]
+  // CHECK:         Goto            liveness:44
+  // CHECK:         Goto            liveness:48
+  public static void loop7(boolean incoming) {
+    // 'incoming' must have a use at both back edges.
+    while (Runtime.getRuntime() != null) {
+      System.out.println(incoming);
+      while (incoming) {}
+    }
+  }
+
+  // CHECK-START: void Main.loop8() liveness (after)
+  // CHECK:         StaticFieldGet  liveness:12 ranges:{[12,44)} uses:[35,40,44]
+  // CHECK:         Goto            liveness:38
+  // CHECK:         Goto            liveness:42
+  public static void loop8() {
+    // 'incoming' must have a use at both back edges.
+    boolean incoming = field;
+    while (Runtime.getRuntime() != null) {
+      while (incoming) {}
+    }
+  }
+
+  // CHECK-START: void Main.loop9() liveness (after)
+  // CHECK:         StaticFieldGet  liveness:22 ranges:{[22,36)} uses:[31,36]
+  // CHECK:         Goto            liveness:38
+  public static void loop9() {
+    while (Runtime.getRuntime() != null) {
+      // 'incoming' must only have a use in the inner loop.
+      boolean incoming = field;
+      while (incoming) {}
+    }
+  }
+
+  public static void main(String[] args) {
+  }
+
+  static boolean field;
+}
diff --git a/test/483-dce-block/expected.txt b/test/483-dce-block/expected.txt
new file mode 100644
index 0000000..ef48625
--- /dev/null
+++ b/test/483-dce-block/expected.txt
@@ -0,0 +1 @@
+class Main
diff --git a/test/483-dce-block/info.txt b/test/483-dce-block/info.txt
new file mode 100644
index 0000000..3db88ab
--- /dev/null
+++ b/test/483-dce-block/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing that used to crash
+compiling the `foo` method.
diff --git a/test/483-dce-block/src/Main.java b/test/483-dce-block/src/Main.java
new file mode 100644
index 0000000..2f66a74
--- /dev/null
+++ b/test/483-dce-block/src/Main.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void foo(Object o, int a) {
+    Object result = null;
+    if (o instanceof Main) {
+      // The compiler optimizes the type of `o` by introducing
+      // a `HBoundType` in this block.
+      while (a != 3) {
+        if (a == 2) {
+          a++;
+          result = o;
+          continue;
+        } else if (willInline()) {
+          // This block will be detected as dead after inlining.
+          result = new Object();
+          continue;
+        }
+        result = new Object();
+      }
+      // The compiler produces a phi at the back edge for `result`.
+      // Before dead block elimination, the phi has three inputs:
+      // result = (new Object(), new Object(), HBoundType)
+      //
+      // After dead block elimination, the phi has now two inputs:
+      // result = (new Object(), HBoundType)
+      //
+      // Our internal data structure for linking users and inputs expect
+      // the input index stored in that data structure to be the index
+      // in the inputs array. So the index before dead block elimination
+      // of the `HBoundType` would be 2. Dead block elimination must update
+      // that index to be 1.
+    }
+    System.out.println(result.getClass());
+  }
+
+  public static boolean willInline() {
+    return false;
+  }
+
+  public static void main(String[] args) {
+    foo(new Main(), 2);
+  }
+}
diff --git a/test/484-checker-register-hints/expected.txt b/test/484-checker-register-hints/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/484-checker-register-hints/expected.txt
diff --git a/test/484-checker-register-hints/info.txt b/test/484-checker-register-hints/info.txt
new file mode 100644
index 0000000..8923680
--- /dev/null
+++ b/test/484-checker-register-hints/info.txt
@@ -0,0 +1,4 @@
+Checks that the register allocator does not punish other
+blocks because one block forced spilling. The block that
+forces the spilling should restore the registers at the merge
+point.
diff --git a/test/484-checker-register-hints/src/Main.java b/test/484-checker-register-hints/src/Main.java
new file mode 100644
index 0000000..33952d9
--- /dev/null
+++ b/test/484-checker-register-hints/src/Main.java
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // CHECK-START: void Main.test1(boolean, int, int, int, int, int) register (after)
+  // CHECK:       name "B0"
+  // CHECK-NOT:     ParallelMove
+  // CHECK:       name "B1"
+  // CHECK-NOT:   end_block
+  // CHECK:         If
+  // CHECK-NOT:     ParallelMove
+  // CHECK:       name "B3"
+  // CHECK-NOT:   end_block
+  // CHECK:         ArraySet
+  // We could check here that there is a parallel move, but it's only valid
+  // for some architectures (for example x86), as other architectures may
+  // not do move at all.
+  // CHECK:       end_block
+  // CHECK-NOT:     ParallelMove
+
+  public static void test1(boolean z, int a, int b, int c, int d, int m) {
+    int e = live1;
+    int f = live2;
+    int g = live3;
+    if (z) {
+    } else {
+      // Create enough live instructions to force spilling on x86.
+      int h = live4;
+      int i = live5;
+      array[2] = e + i + h;
+      array[3] = f + i + h;
+      array[4] = g + i + h;
+      array[0] = h;
+      array[1] = i + h;
+
+    }
+    live1 = e + f + g;
+  }
+
+  // CHECK-START: void Main.test2(boolean, int, int, int, int, int) register (after)
+  // CHECK:       name "B0"
+  // CHECK-NOT:     ParallelMove
+  // CHECK:       name "B1"
+  // CHECK-NOT:   end_block
+  // CHECK:         If
+  // CHECK-NOT:     ParallelMove
+  // CHECK:       name "B3"
+  // CHECK-NOT:   end_block
+  // CHECK:         ArraySet
+  // We could check here that there is a parallel move, but it's only valid
+  // for some architectures (for example x86), as other architectures may
+  // not do move at all.
+  // CHECK:       end_block
+  // CHECK-NOT:     ParallelMove
+
+  public static void test2(boolean z, int a, int b, int c, int d, int m) {
+    int e = live1;
+    int f = live2;
+    int g = live3;
+    if (z) {
+      if (y) {
+        int h = live4;
+        int i = live5;
+        array[2] = e + i + h;
+        array[3] = f + i + h;
+        array[4] = g + i + h;
+        array[0] = h;
+        array[1] = i + h;
+      }
+    }
+    live1 = e + f + g;
+  }
+
+  // CHECK-START: void Main.test3(boolean, int, int, int, int, int) register (after)
+  // CHECK:       name "B0"
+  // CHECK-NOT:     ParallelMove
+  // CHECK:       name "B1"
+  // CHECK-NOT:   end_block
+  // CHECK:         If
+  // CHECK-NOT:     ParallelMove
+  // CHECK:       name "B6"
+  // CHECK-NOT:   end_block
+  // CHECK:         ArraySet
+  // We could check here that there is a parallel move, but it's only valid
+  // for some architectures (for example x86), as other architectures may
+  // not do move at all.
+  // CHECK:       end_block
+  // CHECK-NOT:     ParallelMove
+
+  public static void test3(boolean z, int a, int b, int c, int d, int m) {
+    // Same version as test2, but with branches reversed, to ensure
+    // whatever linear order is computed, we will get the same results.
+    int e = live1;
+    int f = live2;
+    int g = live3;
+    if (z) {
+      live1 = e;
+    } else {
+      if (y) {
+        live1 = e;
+      } else {
+        int h = live4;
+        int i = live5;
+        array[2] = e + i + h;
+        array[3] = f + i + h;
+        array[4] = g + i + h;
+        array[0] = h;
+        array[1] = i + h;
+      }
+    }
+    live1 = e + f + g;
+  }
+
+  public static void main(String[] args) {
+  }
+
+  static boolean y;
+  static int live1;
+  static int live2;
+  static int live3;
+  static int live4;
+  static int live5;
+  static int[] array;
+}
diff --git a/test/485-checker-dce-loop-update/expected.txt b/test/485-checker-dce-loop-update/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/485-checker-dce-loop-update/expected.txt
diff --git a/test/485-checker-dce-loop-update/info.txt b/test/485-checker-dce-loop-update/info.txt
new file mode 100644
index 0000000..fccf10c
--- /dev/null
+++ b/test/485-checker-dce-loop-update/info.txt
@@ -0,0 +1,2 @@
+Tests loop information update after DCE because block removal can disconnect loops, leaving other
+live blocks outside the loop they had been a member of.
\ No newline at end of file
diff --git a/test/485-checker-dce-loop-update/smali/TestCase.smali b/test/485-checker-dce-loop-update/smali/TestCase.smali
new file mode 100644
index 0000000..487a5df
--- /dev/null
+++ b/test/485-checker-dce-loop-update/smali/TestCase.smali
@@ -0,0 +1,274 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+
+.super Ljava/lang/Object;
+
+.method public static $inline$True()Z
+  .registers 1
+  const/4 v0, 1
+  return v0
+.end method
+
+
+# CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination_final (before)
+# CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
+# CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
+# CHECK-DAG:     <<Cst1:i\d+>>  IntConstant 1
+# CHECK-DAG:     <<Cst5:i\d+>>  IntConstant 5
+# CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
+# CHECK-DAG:     <<PhiX:i\d+>>  Phi [<<ArgX>>,<<Add5:i\d+>>,<<Add7:i\d+>>] loop:<<HeaderY:B\d+>>
+# CHECK-DAG:                    If [<<ArgY>>]                              loop:<<HeaderY>>
+# CHECK-DAG:                    If [<<Cst1>>]                              loop:<<HeaderY>>
+# CHECK-DAG:     <<Add5>>       Add [<<PhiX>>,<<Cst5>>]                    loop:<<HeaderY>>
+# CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
+# CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
+
+# CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination_final (after)
+# CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
+# CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
+# CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
+# CHECK-DAG:     <<PhiX:i\d+>>  Phi [<<ArgX>>,<<AddX:i\d+>>]               loop:<<HeaderY:B\d+>>
+# CHECK-DAG:                    If [<<ArgY>>]                              loop:<<HeaderY>>
+# CHECK-DAG:     <<AddX>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
+# CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
+
+.method public static testSingleExit(IZ)I
+  .registers 3
+
+  # p0 = int X
+  # p1 = boolean Y
+  # v0 = true
+
+  invoke-static {}, LTestCase;->$inline$True()Z
+  move-result v0
+
+  :loop_start
+  if-eqz p1, :loop_body   # cannot be determined statically
+  if-nez v0, :loop_end    # will always exit
+
+  # Dead block
+  add-int/lit8 p0, p0, 5
+  goto :loop_start
+
+  # Live block
+  :loop_body
+  add-int/lit8 p0, p0, 7
+  goto :loop_start
+
+  :loop_end
+  return p0
+.end method
+
+
+# CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination_final (before)
+# CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
+# CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
+# CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
+# CHECK-DAG:     <<Cst1:i\d+>>  IntConstant 1
+# CHECK-DAG:     <<Cst5:i\d+>>  IntConstant 5
+# CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
+# CHECK-DAG:     <<PhiX:i\d+>>  Phi [<<ArgX>>,<<Add5:i\d+>>,<<Add7:i\d+>>] loop:<<HeaderY:B\d+>>
+# CHECK-DAG:                    If [<<ArgY>>]                              loop:<<HeaderY>>
+# CHECK-DAG:                    If [<<ArgZ>>]                              loop:<<HeaderY>>
+# CHECK-DAG:                    If [<<Cst1>>]                              loop:<<HeaderY>>
+# CHECK-DAG:     <<Add5>>       Add [<<PhiX>>,<<Cst5>>]                    loop:<<HeaderY>>
+# CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
+# CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
+
+# CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination_final (after)
+# CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
+# CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
+# CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
+# CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
+# CHECK-DAG:     <<PhiX:i\d+>>  Phi [<<ArgX>>,<<Add7:i\d+>>]               loop:<<HeaderY:B\d+>>
+# CHECK-DAG:                    If [<<ArgY>>]                              loop:<<HeaderY>>
+# CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
+# CHECK-DAG:                    If [<<ArgZ>>]                              loop:none
+# CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
+
+.method public static testMultipleExits(IZZ)I
+  .registers 4
+
+  # p0 = int X
+  # p1 = boolean Y
+  # p2 = boolean Z
+  # v0 = true
+
+  invoke-static {}, LTestCase;->$inline$True()Z
+  move-result v0
+
+  :loop_start
+  if-eqz p1, :loop_body   # cannot be determined statically
+  if-nez p2, :loop_end    # may exit
+  if-nez v0, :loop_end    # will always exit
+
+  # Dead block
+  add-int/lit8 p0, p0, 5
+  goto :loop_start
+
+  # Live block
+  :loop_body
+  add-int/lit8 p0, p0, 7
+  goto :loop_start
+
+  :loop_end
+  return p0
+.end method
+
+
+# CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination_final (before)
+# CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
+# CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
+# CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
+# CHECK-DAG:     <<Cst1:i\d+>>  IntConstant 1
+# CHECK-DAG:     <<Cst5:i\d+>>  IntConstant 5
+# CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
+# CHECK-DAG:     <<Cst9:i\d+>>  IntConstant 9
+# CHECK-DAG:     <<PhiX1:i\d+>> Phi [<<ArgX>>,<<Add5:i\d+>>,<<Add7:i\d+>>] loop:<<HeaderY:B\d+>>
+# CHECK-DAG:                    If [<<ArgY>>]                              loop:<<HeaderY>>
+# CHECK-DAG:                    If [<<ArgZ>>]                              loop:<<HeaderY>>
+# CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX1>>,<<Cst9>>]                   loop:<<HeaderY>>
+# CHECK-DAG:     <<PhiX2:i\d+>> Phi [<<Mul9>>,<<PhiX1>>]                   loop:<<HeaderY>>
+# CHECK-DAG:                    If [<<Cst1>>]                              loop:<<HeaderY>>
+# CHECK-DAG:     <<Add5>>       Add [<<PhiX2>>,<<Cst5>>]                   loop:<<HeaderY>>
+# CHECK-DAG:     <<Add7>>       Add [<<PhiX1>>,<<Cst7>>]                   loop:<<HeaderY>>
+# CHECK-DAG:                    Return [<<PhiX2>>]                         loop:none
+
+# CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination_final (after)
+# CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
+# CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
+# CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
+# CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
+# CHECK-DAG:     <<Cst9:i\d+>>  IntConstant 9
+# CHECK-DAG:     <<PhiX1:i\d+>> Phi [<<ArgX>>,<<Add7:i\d+>>]               loop:<<HeaderY:B\d+>>
+# CHECK-DAG:                    If [<<ArgY>>]                              loop:<<HeaderY>>
+# CHECK-DAG:     <<Add7>>       Add [<<PhiX1>>,<<Cst7>>]                   loop:<<HeaderY>>
+# CHECK-DAG:                    If [<<ArgZ>>]                              loop:none
+# CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX1>>,<<Cst9>>]                   loop:none
+# CHECK-DAG:     <<PhiX2:i\d+>> Phi [<<Mul9>>,<<PhiX1>>]                   loop:none
+# CHECK-DAG:                    Return [<<PhiX2>>]                         loop:none
+
+.method public static testExitPredecessors(IZZ)I
+  .registers 4
+
+  # p0 = int X
+  # p1 = boolean Y
+  # p2 = boolean Z
+  # v0 = true
+
+  invoke-static {}, LTestCase;->$inline$True()Z
+  move-result v0
+
+  :loop_start
+  if-eqz p1, :loop_body   # cannot be determined statically
+
+  # Additional logic which will end up outside the loop
+  if-eqz p2, :skip_if
+  mul-int/lit8 p0, p0, 9
+  :skip_if
+
+  if-nez v0, :loop_end    # will always take the branch
+
+  # Dead block
+  add-int/lit8 p0, p0, 5
+  goto :loop_start
+
+  # Live block
+  :loop_body
+  add-int/lit8 p0, p0, 7
+  goto :loop_start
+
+  :loop_end
+  return p0
+.end method
+
+
+# CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination_final (before)
+# CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
+# CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
+# CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
+# CHECK-DAG:     <<Cst0:i\d+>>  IntConstant 0
+# CHECK-DAG:     <<Cst1:i\d+>>  IntConstant 1
+# CHECK-DAG:     <<Cst5:i\d+>>  IntConstant 5
+# CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
+#
+# CHECK-DAG:     <<PhiX:i\d+>>  Phi [<<ArgX>>,<<Add5:i\d+>>,<<Add7:i\d+>>] loop:<<HeaderY:B\d+>>
+# CHECK-DAG:     <<PhiZ1:i\d+>> Phi [<<ArgZ>>,<<XorZ:i\d+>>,<<PhiZ1>>]     loop:<<HeaderY>>
+# CHECK-DAG:                    If [<<ArgY>>]                              loop:<<HeaderY>>
+#
+#                               ### Inner loop ###
+# CHECK-DAG:     <<PhiZ2:i\d+>> Phi [<<PhiZ1>>,<<XorZ>>]                   loop:<<HeaderZ:B\d+>>
+# CHECK-DAG:     <<XorZ>>       Xor [<<PhiZ2>>,<<Cst1>>]                   loop:<<HeaderZ>>
+# CHECK-DAG:     <<CondZ:z\d+>> Equal [<<XorZ>>,<<Cst0>>]                  loop:<<HeaderZ>>
+# CHECK-DAG:                    If [<<CondZ>>]                             loop:<<HeaderZ>>
+#
+# CHECK-DAG:     <<Add5>>       Add [<<PhiX>>,<<Cst5>>]                    loop:<<HeaderY>>
+# CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
+# CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
+
+# CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination_final (after)
+# CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
+# CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
+# CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
+# CHECK-DAG:     <<Cst0:i\d+>>  IntConstant 0
+# CHECK-DAG:     <<Cst1:i\d+>>  IntConstant 1
+# CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
+#
+# CHECK-DAG:     <<PhiX:i\d+>>  Phi [<<ArgX>>,<<Add7:i\d+>>]               loop:<<HeaderY:B\d+>>
+# CHECK-DAG:                    If [<<ArgY>>]                              loop:<<HeaderY>>
+# CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
+#
+#                               ### Inner loop ###
+# CHECK-DAG:     <<PhiZ:i\d+>>  Phi [<<ArgZ>>,<<XorZ:i\d+>>]               loop:<<HeaderZ:B\d+>>
+# CHECK-DAG:     <<XorZ>>       Xor [<<PhiZ>>,<<Cst1>>]                    loop:<<HeaderZ>>
+# CHECK-DAG:     <<CondZ:z\d+>> Equal [<<XorZ>>,<<Cst0>>]                  loop:<<HeaderZ>>
+# CHECK-DAG:                    If [<<CondZ>>]                             loop:<<HeaderZ>>
+#
+# CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
+
+.method public static testInnerLoop(IZZ)I
+  .registers 4
+
+  # p0 = int X
+  # p1 = boolean Y
+  # p2 = boolean Z
+  # v0 = true
+
+  invoke-static {}, LTestCase;->$inline$True()Z
+  move-result v0
+
+  :loop_start
+  if-eqz p1, :loop_body   # cannot be determined statically
+
+  # Inner loop which will end up outside its parent
+  :inner_loop_start
+  xor-int/lit8 p2, p2, 1
+  if-eqz p2, :inner_loop_start
+
+  if-nez v0, :loop_end    # will always take the branch
+
+  # Dead block
+  add-int/lit8 p0, p0, 5
+  goto :loop_start
+
+  # Live block
+  :loop_body
+  add-int/lit8 p0, p0, 7
+  goto :loop_start
+
+  :loop_end
+  return p0
+.end method
diff --git a/test/485-checker-dce-loop-update/src/Main.java b/test/485-checker-dce-loop-update/src/Main.java
new file mode 100644
index 0000000..6bfe08b
--- /dev/null
+++ b/test/485-checker-dce-loop-update/src/Main.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    return;
+  }
+}
diff --git a/test/701-easy-div-rem/build b/test/701-easy-div-rem/build
new file mode 100644
index 0000000..1dc8452
--- /dev/null
+++ b/test/701-easy-div-rem/build
@@ -0,0 +1,28 @@
+#!/bin/bash
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Stop if something fails.
+set -e
+
+# Write out the source file.
+mkdir src
+python ./genMain.py
+
+# Increase the file size limitation for classes.lst as the machine generated
+# source file contains a lot of methods and is quite large.
+ulimit -S 4096
+
+./default-build
diff --git a/test/701-easy-div-rem/genMain.py b/test/701-easy-div-rem/genMain.py
index 80eac34..75eee17 100644
--- a/test/701-easy-div-rem/genMain.py
+++ b/test/701-easy-div-rem/genMain.py
@@ -12,15 +12,28 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+upper_bound_int_pow2 = 31
+upper_bound_long_pow2 = 63
+upper_bound_constant = 100
 all_tests = [
     ({'@INT@': 'int', '@SUFFIX@':''},
-     [('CheckDiv', 'idiv_by_pow2_', [2**i for i in range(31)]),
-      ('CheckDiv', 'idiv_by_small_', [i for i in range(3, 16) if i not in (4, 8)]),
-      ('CheckRem', 'irem_by_pow2_', [2**i for i in range(31)])]),
+     [('CheckDiv', 'idiv_by_pow2_', [2**i for i in range(upper_bound_int_pow2)]),
+      ('CheckDiv', 'idiv_by_pow2_neg_', [-2**i for i in range(upper_bound_int_pow2)]),
+      ('CheckDiv', 'idiv_by_constant_', [i for i in range(1, upper_bound_constant)]),
+      ('CheckDiv', 'idiv_by_constant_neg_', [-i for i in range(1, upper_bound_constant)]),
+      ('CheckRem', 'irem_by_pow2_', [2**i for i in range(upper_bound_int_pow2)]),
+      ('CheckRem', 'irem_by_pow2_neg_', [-2**i for i in range(upper_bound_int_pow2)]),
+      ('CheckRem', 'irem_by_constant_', [i for i in range(1, upper_bound_constant)]),
+      ('CheckRem', 'irem_by_constant_neg_', [-i for i in range(1, upper_bound_constant)])]),
     ({'@INT@': 'long', '@SUFFIX@': 'l'},
-     [('CheckDiv', 'ldiv_by_pow2_', [2**i for i in range(63)]),
-      ('CheckDiv', 'ldiv_by_small_', [i for i in range(3, 16) if i not in (4, 8)]),
-      ('CheckRem', 'lrem_by_pow2_', [2**i for i in range(63)])])
+     [('CheckDiv', 'ldiv_by_pow2_', [2**i for i in range(upper_bound_long_pow2)]),
+      ('CheckDiv', 'ldiv_by_pow2_neg_', [-2**i for i in range(upper_bound_long_pow2)]),
+      ('CheckDiv', 'ldiv_by_constant_', [i for i in range(1, upper_bound_constant)]),
+      ('CheckDiv', 'ldiv_by_constant_neg_', [-i for i in range(1, upper_bound_constant)]),
+      ('CheckRem', 'lrem_by_pow2_', [2**i for i in range(upper_bound_long_pow2)]),
+      ('CheckRem', 'lrem_by_pow2_neg_', [-2**i for i in range(upper_bound_long_pow2)]),
+      ('CheckRem', 'lrem_by_constant_', [i for i in range(1, upper_bound_constant)]),
+      ('CheckRem', 'lrem_by_constant_neg_', [-i for i in range(1, upper_bound_constant)])])
 ]
 
 def subst_vars(variables, text):
diff --git a/test/701-easy-div-rem/src/Main.java b/test/701-easy-div-rem/src/Main.java
deleted file mode 100644
index f995f61..0000000
--- a/test/701-easy-div-rem/src/Main.java
+++ /dev/null
@@ -1,529 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-public class Main {
-    public static int num_errors = 0;
-
-    public static void reportError(String message) {
-        if (num_errors == 10) {
-            System.out.println("Omitting other error messages...");
-        } else if (num_errors < 10) {
-            System.out.println(message);
-        }
-        num_errors += 1;
-    }
-
-    public static void intCheckDiv(String desc, int result, int dividend, int divisor) {
-        int correct_result = dividend / divisor;
-        if (result != correct_result) {
-            reportError(desc + "(" + dividend + ") == " + result +
-                        " should be " + correct_result);
-        }
-    }
-    public static void intCheckRem(String desc, int result, int dividend, int divisor) {
-        int correct_result = dividend % divisor;
-        if (result != correct_result) {
-            reportError(desc + "(" + dividend + ") == " + result +
-                        " should be " + correct_result);
-        }
-    }
-    public static void longCheckDiv(String desc, long result, long dividend, long divisor) {
-        long correct_result = dividend / divisor;
-        if (result != correct_result) {
-            reportError(desc + "(" + dividend + ") == " + result +
-                        " should be " + correct_result);
-        }
-    }
-    public static void longCheckRem(String desc, long result, long dividend, long divisor) {
-        long correct_result = dividend % divisor;
-        if (result != correct_result) {
-            reportError(desc + "(" + dividend + ") == " + result +
-                        " should be " + correct_result);
-        }
-    }
-
-    public static int idiv_by_pow2_0(int x) {return x / 1;}
-    public static int idiv_by_pow2_1(int x) {return x / 2;}
-    public static int idiv_by_pow2_2(int x) {return x / 4;}
-    public static int idiv_by_pow2_3(int x) {return x / 8;}
-    public static int idiv_by_pow2_4(int x) {return x / 16;}
-    public static int idiv_by_pow2_5(int x) {return x / 32;}
-    public static int idiv_by_pow2_6(int x) {return x / 64;}
-    public static int idiv_by_pow2_7(int x) {return x / 128;}
-    public static int idiv_by_pow2_8(int x) {return x / 256;}
-    public static int idiv_by_pow2_9(int x) {return x / 512;}
-    public static int idiv_by_pow2_10(int x) {return x / 1024;}
-    public static int idiv_by_pow2_11(int x) {return x / 2048;}
-    public static int idiv_by_pow2_12(int x) {return x / 4096;}
-    public static int idiv_by_pow2_13(int x) {return x / 8192;}
-    public static int idiv_by_pow2_14(int x) {return x / 16384;}
-    public static int idiv_by_pow2_15(int x) {return x / 32768;}
-    public static int idiv_by_pow2_16(int x) {return x / 65536;}
-    public static int idiv_by_pow2_17(int x) {return x / 131072;}
-    public static int idiv_by_pow2_18(int x) {return x / 262144;}
-    public static int idiv_by_pow2_19(int x) {return x / 524288;}
-    public static int idiv_by_pow2_20(int x) {return x / 1048576;}
-    public static int idiv_by_pow2_21(int x) {return x / 2097152;}
-    public static int idiv_by_pow2_22(int x) {return x / 4194304;}
-    public static int idiv_by_pow2_23(int x) {return x / 8388608;}
-    public static int idiv_by_pow2_24(int x) {return x / 16777216;}
-    public static int idiv_by_pow2_25(int x) {return x / 33554432;}
-    public static int idiv_by_pow2_26(int x) {return x / 67108864;}
-    public static int idiv_by_pow2_27(int x) {return x / 134217728;}
-    public static int idiv_by_pow2_28(int x) {return x / 268435456;}
-    public static int idiv_by_pow2_29(int x) {return x / 536870912;}
-    public static int idiv_by_pow2_30(int x) {return x / 1073741824;}
-    public static int idiv_by_small_0(int x) {return x / 3;}
-    public static int idiv_by_small_1(int x) {return x / 5;}
-    public static int idiv_by_small_2(int x) {return x / 6;}
-    public static int idiv_by_small_3(int x) {return x / 7;}
-    public static int idiv_by_small_4(int x) {return x / 9;}
-    public static int idiv_by_small_5(int x) {return x / 10;}
-    public static int idiv_by_small_6(int x) {return x / 11;}
-    public static int idiv_by_small_7(int x) {return x / 12;}
-    public static int idiv_by_small_8(int x) {return x / 13;}
-    public static int idiv_by_small_9(int x) {return x / 14;}
-    public static int idiv_by_small_10(int x) {return x / 15;}
-    public static int irem_by_pow2_0(int x) {return x % 1;}
-    public static int irem_by_pow2_1(int x) {return x % 2;}
-    public static int irem_by_pow2_2(int x) {return x % 4;}
-    public static int irem_by_pow2_3(int x) {return x % 8;}
-    public static int irem_by_pow2_4(int x) {return x % 16;}
-    public static int irem_by_pow2_5(int x) {return x % 32;}
-    public static int irem_by_pow2_6(int x) {return x % 64;}
-    public static int irem_by_pow2_7(int x) {return x % 128;}
-    public static int irem_by_pow2_8(int x) {return x % 256;}
-    public static int irem_by_pow2_9(int x) {return x % 512;}
-    public static int irem_by_pow2_10(int x) {return x % 1024;}
-    public static int irem_by_pow2_11(int x) {return x % 2048;}
-    public static int irem_by_pow2_12(int x) {return x % 4096;}
-    public static int irem_by_pow2_13(int x) {return x % 8192;}
-    public static int irem_by_pow2_14(int x) {return x % 16384;}
-    public static int irem_by_pow2_15(int x) {return x % 32768;}
-    public static int irem_by_pow2_16(int x) {return x % 65536;}
-    public static int irem_by_pow2_17(int x) {return x % 131072;}
-    public static int irem_by_pow2_18(int x) {return x % 262144;}
-    public static int irem_by_pow2_19(int x) {return x % 524288;}
-    public static int irem_by_pow2_20(int x) {return x % 1048576;}
-    public static int irem_by_pow2_21(int x) {return x % 2097152;}
-    public static int irem_by_pow2_22(int x) {return x % 4194304;}
-    public static int irem_by_pow2_23(int x) {return x % 8388608;}
-    public static int irem_by_pow2_24(int x) {return x % 16777216;}
-    public static int irem_by_pow2_25(int x) {return x % 33554432;}
-    public static int irem_by_pow2_26(int x) {return x % 67108864;}
-    public static int irem_by_pow2_27(int x) {return x % 134217728;}
-    public static int irem_by_pow2_28(int x) {return x % 268435456;}
-    public static int irem_by_pow2_29(int x) {return x % 536870912;}
-    public static int irem_by_pow2_30(int x) {return x % 1073741824;}
-    public static long ldiv_by_pow2_0(long x) {return x / 1l;}
-    public static long ldiv_by_pow2_1(long x) {return x / 2l;}
-    public static long ldiv_by_pow2_2(long x) {return x / 4l;}
-    public static long ldiv_by_pow2_3(long x) {return x / 8l;}
-    public static long ldiv_by_pow2_4(long x) {return x / 16l;}
-    public static long ldiv_by_pow2_5(long x) {return x / 32l;}
-    public static long ldiv_by_pow2_6(long x) {return x / 64l;}
-    public static long ldiv_by_pow2_7(long x) {return x / 128l;}
-    public static long ldiv_by_pow2_8(long x) {return x / 256l;}
-    public static long ldiv_by_pow2_9(long x) {return x / 512l;}
-    public static long ldiv_by_pow2_10(long x) {return x / 1024l;}
-    public static long ldiv_by_pow2_11(long x) {return x / 2048l;}
-    public static long ldiv_by_pow2_12(long x) {return x / 4096l;}
-    public static long ldiv_by_pow2_13(long x) {return x / 8192l;}
-    public static long ldiv_by_pow2_14(long x) {return x / 16384l;}
-    public static long ldiv_by_pow2_15(long x) {return x / 32768l;}
-    public static long ldiv_by_pow2_16(long x) {return x / 65536l;}
-    public static long ldiv_by_pow2_17(long x) {return x / 131072l;}
-    public static long ldiv_by_pow2_18(long x) {return x / 262144l;}
-    public static long ldiv_by_pow2_19(long x) {return x / 524288l;}
-    public static long ldiv_by_pow2_20(long x) {return x / 1048576l;}
-    public static long ldiv_by_pow2_21(long x) {return x / 2097152l;}
-    public static long ldiv_by_pow2_22(long x) {return x / 4194304l;}
-    public static long ldiv_by_pow2_23(long x) {return x / 8388608l;}
-    public static long ldiv_by_pow2_24(long x) {return x / 16777216l;}
-    public static long ldiv_by_pow2_25(long x) {return x / 33554432l;}
-    public static long ldiv_by_pow2_26(long x) {return x / 67108864l;}
-    public static long ldiv_by_pow2_27(long x) {return x / 134217728l;}
-    public static long ldiv_by_pow2_28(long x) {return x / 268435456l;}
-    public static long ldiv_by_pow2_29(long x) {return x / 536870912l;}
-    public static long ldiv_by_pow2_30(long x) {return x / 1073741824l;}
-    public static long ldiv_by_pow2_31(long x) {return x / 2147483648l;}
-    public static long ldiv_by_pow2_32(long x) {return x / 4294967296l;}
-    public static long ldiv_by_pow2_33(long x) {return x / 8589934592l;}
-    public static long ldiv_by_pow2_34(long x) {return x / 17179869184l;}
-    public static long ldiv_by_pow2_35(long x) {return x / 34359738368l;}
-    public static long ldiv_by_pow2_36(long x) {return x / 68719476736l;}
-    public static long ldiv_by_pow2_37(long x) {return x / 137438953472l;}
-    public static long ldiv_by_pow2_38(long x) {return x / 274877906944l;}
-    public static long ldiv_by_pow2_39(long x) {return x / 549755813888l;}
-    public static long ldiv_by_pow2_40(long x) {return x / 1099511627776l;}
-    public static long ldiv_by_pow2_41(long x) {return x / 2199023255552l;}
-    public static long ldiv_by_pow2_42(long x) {return x / 4398046511104l;}
-    public static long ldiv_by_pow2_43(long x) {return x / 8796093022208l;}
-    public static long ldiv_by_pow2_44(long x) {return x / 17592186044416l;}
-    public static long ldiv_by_pow2_45(long x) {return x / 35184372088832l;}
-    public static long ldiv_by_pow2_46(long x) {return x / 70368744177664l;}
-    public static long ldiv_by_pow2_47(long x) {return x / 140737488355328l;}
-    public static long ldiv_by_pow2_48(long x) {return x / 281474976710656l;}
-    public static long ldiv_by_pow2_49(long x) {return x / 562949953421312l;}
-    public static long ldiv_by_pow2_50(long x) {return x / 1125899906842624l;}
-    public static long ldiv_by_pow2_51(long x) {return x / 2251799813685248l;}
-    public static long ldiv_by_pow2_52(long x) {return x / 4503599627370496l;}
-    public static long ldiv_by_pow2_53(long x) {return x / 9007199254740992l;}
-    public static long ldiv_by_pow2_54(long x) {return x / 18014398509481984l;}
-    public static long ldiv_by_pow2_55(long x) {return x / 36028797018963968l;}
-    public static long ldiv_by_pow2_56(long x) {return x / 72057594037927936l;}
-    public static long ldiv_by_pow2_57(long x) {return x / 144115188075855872l;}
-    public static long ldiv_by_pow2_58(long x) {return x / 288230376151711744l;}
-    public static long ldiv_by_pow2_59(long x) {return x / 576460752303423488l;}
-    public static long ldiv_by_pow2_60(long x) {return x / 1152921504606846976l;}
-    public static long ldiv_by_pow2_61(long x) {return x / 2305843009213693952l;}
-    public static long ldiv_by_pow2_62(long x) {return x / 4611686018427387904l;}
-    public static long ldiv_by_small_0(long x) {return x / 3l;}
-    public static long ldiv_by_small_1(long x) {return x / 5l;}
-    public static long ldiv_by_small_2(long x) {return x / 6l;}
-    public static long ldiv_by_small_3(long x) {return x / 7l;}
-    public static long ldiv_by_small_4(long x) {return x / 9l;}
-    public static long ldiv_by_small_5(long x) {return x / 10l;}
-    public static long ldiv_by_small_6(long x) {return x / 11l;}
-    public static long ldiv_by_small_7(long x) {return x / 12l;}
-    public static long ldiv_by_small_8(long x) {return x / 13l;}
-    public static long ldiv_by_small_9(long x) {return x / 14l;}
-    public static long ldiv_by_small_10(long x) {return x / 15l;}
-    public static long lrem_by_pow2_0(long x) {return x % 1l;}
-    public static long lrem_by_pow2_1(long x) {return x % 2l;}
-    public static long lrem_by_pow2_2(long x) {return x % 4l;}
-    public static long lrem_by_pow2_3(long x) {return x % 8l;}
-    public static long lrem_by_pow2_4(long x) {return x % 16l;}
-    public static long lrem_by_pow2_5(long x) {return x % 32l;}
-    public static long lrem_by_pow2_6(long x) {return x % 64l;}
-    public static long lrem_by_pow2_7(long x) {return x % 128l;}
-    public static long lrem_by_pow2_8(long x) {return x % 256l;}
-    public static long lrem_by_pow2_9(long x) {return x % 512l;}
-    public static long lrem_by_pow2_10(long x) {return x % 1024l;}
-    public static long lrem_by_pow2_11(long x) {return x % 2048l;}
-    public static long lrem_by_pow2_12(long x) {return x % 4096l;}
-    public static long lrem_by_pow2_13(long x) {return x % 8192l;}
-    public static long lrem_by_pow2_14(long x) {return x % 16384l;}
-    public static long lrem_by_pow2_15(long x) {return x % 32768l;}
-    public static long lrem_by_pow2_16(long x) {return x % 65536l;}
-    public static long lrem_by_pow2_17(long x) {return x % 131072l;}
-    public static long lrem_by_pow2_18(long x) {return x % 262144l;}
-    public static long lrem_by_pow2_19(long x) {return x % 524288l;}
-    public static long lrem_by_pow2_20(long x) {return x % 1048576l;}
-    public static long lrem_by_pow2_21(long x) {return x % 2097152l;}
-    public static long lrem_by_pow2_22(long x) {return x % 4194304l;}
-    public static long lrem_by_pow2_23(long x) {return x % 8388608l;}
-    public static long lrem_by_pow2_24(long x) {return x % 16777216l;}
-    public static long lrem_by_pow2_25(long x) {return x % 33554432l;}
-    public static long lrem_by_pow2_26(long x) {return x % 67108864l;}
-    public static long lrem_by_pow2_27(long x) {return x % 134217728l;}
-    public static long lrem_by_pow2_28(long x) {return x % 268435456l;}
-    public static long lrem_by_pow2_29(long x) {return x % 536870912l;}
-    public static long lrem_by_pow2_30(long x) {return x % 1073741824l;}
-    public static long lrem_by_pow2_31(long x) {return x % 2147483648l;}
-    public static long lrem_by_pow2_32(long x) {return x % 4294967296l;}
-    public static long lrem_by_pow2_33(long x) {return x % 8589934592l;}
-    public static long lrem_by_pow2_34(long x) {return x % 17179869184l;}
-    public static long lrem_by_pow2_35(long x) {return x % 34359738368l;}
-    public static long lrem_by_pow2_36(long x) {return x % 68719476736l;}
-    public static long lrem_by_pow2_37(long x) {return x % 137438953472l;}
-    public static long lrem_by_pow2_38(long x) {return x % 274877906944l;}
-    public static long lrem_by_pow2_39(long x) {return x % 549755813888l;}
-    public static long lrem_by_pow2_40(long x) {return x % 1099511627776l;}
-    public static long lrem_by_pow2_41(long x) {return x % 2199023255552l;}
-    public static long lrem_by_pow2_42(long x) {return x % 4398046511104l;}
-    public static long lrem_by_pow2_43(long x) {return x % 8796093022208l;}
-    public static long lrem_by_pow2_44(long x) {return x % 17592186044416l;}
-    public static long lrem_by_pow2_45(long x) {return x % 35184372088832l;}
-    public static long lrem_by_pow2_46(long x) {return x % 70368744177664l;}
-    public static long lrem_by_pow2_47(long x) {return x % 140737488355328l;}
-    public static long lrem_by_pow2_48(long x) {return x % 281474976710656l;}
-    public static long lrem_by_pow2_49(long x) {return x % 562949953421312l;}
-    public static long lrem_by_pow2_50(long x) {return x % 1125899906842624l;}
-    public static long lrem_by_pow2_51(long x) {return x % 2251799813685248l;}
-    public static long lrem_by_pow2_52(long x) {return x % 4503599627370496l;}
-    public static long lrem_by_pow2_53(long x) {return x % 9007199254740992l;}
-    public static long lrem_by_pow2_54(long x) {return x % 18014398509481984l;}
-    public static long lrem_by_pow2_55(long x) {return x % 36028797018963968l;}
-    public static long lrem_by_pow2_56(long x) {return x % 72057594037927936l;}
-    public static long lrem_by_pow2_57(long x) {return x % 144115188075855872l;}
-    public static long lrem_by_pow2_58(long x) {return x % 288230376151711744l;}
-    public static long lrem_by_pow2_59(long x) {return x % 576460752303423488l;}
-    public static long lrem_by_pow2_60(long x) {return x % 1152921504606846976l;}
-    public static long lrem_by_pow2_61(long x) {return x % 2305843009213693952l;}
-    public static long lrem_by_pow2_62(long x) {return x % 4611686018427387904l;}
-
-    public static void intCheckAll(int x) {
-        intCheckDiv("idiv_by_pow2_0", idiv_by_pow2_0(x), x, 1);
-        intCheckDiv("idiv_by_pow2_1", idiv_by_pow2_1(x), x, 2);
-        intCheckDiv("idiv_by_pow2_2", idiv_by_pow2_2(x), x, 4);
-        intCheckDiv("idiv_by_pow2_3", idiv_by_pow2_3(x), x, 8);
-        intCheckDiv("idiv_by_pow2_4", idiv_by_pow2_4(x), x, 16);
-        intCheckDiv("idiv_by_pow2_5", idiv_by_pow2_5(x), x, 32);
-        intCheckDiv("idiv_by_pow2_6", idiv_by_pow2_6(x), x, 64);
-        intCheckDiv("idiv_by_pow2_7", idiv_by_pow2_7(x), x, 128);
-        intCheckDiv("idiv_by_pow2_8", idiv_by_pow2_8(x), x, 256);
-        intCheckDiv("idiv_by_pow2_9", idiv_by_pow2_9(x), x, 512);
-        intCheckDiv("idiv_by_pow2_10", idiv_by_pow2_10(x), x, 1024);
-        intCheckDiv("idiv_by_pow2_11", idiv_by_pow2_11(x), x, 2048);
-        intCheckDiv("idiv_by_pow2_12", idiv_by_pow2_12(x), x, 4096);
-        intCheckDiv("idiv_by_pow2_13", idiv_by_pow2_13(x), x, 8192);
-        intCheckDiv("idiv_by_pow2_14", idiv_by_pow2_14(x), x, 16384);
-        intCheckDiv("idiv_by_pow2_15", idiv_by_pow2_15(x), x, 32768);
-        intCheckDiv("idiv_by_pow2_16", idiv_by_pow2_16(x), x, 65536);
-        intCheckDiv("idiv_by_pow2_17", idiv_by_pow2_17(x), x, 131072);
-        intCheckDiv("idiv_by_pow2_18", idiv_by_pow2_18(x), x, 262144);
-        intCheckDiv("idiv_by_pow2_19", idiv_by_pow2_19(x), x, 524288);
-        intCheckDiv("idiv_by_pow2_20", idiv_by_pow2_20(x), x, 1048576);
-        intCheckDiv("idiv_by_pow2_21", idiv_by_pow2_21(x), x, 2097152);
-        intCheckDiv("idiv_by_pow2_22", idiv_by_pow2_22(x), x, 4194304);
-        intCheckDiv("idiv_by_pow2_23", idiv_by_pow2_23(x), x, 8388608);
-        intCheckDiv("idiv_by_pow2_24", idiv_by_pow2_24(x), x, 16777216);
-        intCheckDiv("idiv_by_pow2_25", idiv_by_pow2_25(x), x, 33554432);
-        intCheckDiv("idiv_by_pow2_26", idiv_by_pow2_26(x), x, 67108864);
-        intCheckDiv("idiv_by_pow2_27", idiv_by_pow2_27(x), x, 134217728);
-        intCheckDiv("idiv_by_pow2_28", idiv_by_pow2_28(x), x, 268435456);
-        intCheckDiv("idiv_by_pow2_29", idiv_by_pow2_29(x), x, 536870912);
-        intCheckDiv("idiv_by_pow2_30", idiv_by_pow2_30(x), x, 1073741824);
-        intCheckDiv("idiv_by_small_0", idiv_by_small_0(x), x, 3);
-        intCheckDiv("idiv_by_small_1", idiv_by_small_1(x), x, 5);
-        intCheckDiv("idiv_by_small_2", idiv_by_small_2(x), x, 6);
-        intCheckDiv("idiv_by_small_3", idiv_by_small_3(x), x, 7);
-        intCheckDiv("idiv_by_small_4", idiv_by_small_4(x), x, 9);
-        intCheckDiv("idiv_by_small_5", idiv_by_small_5(x), x, 10);
-        intCheckDiv("idiv_by_small_6", idiv_by_small_6(x), x, 11);
-        intCheckDiv("idiv_by_small_7", idiv_by_small_7(x), x, 12);
-        intCheckDiv("idiv_by_small_8", idiv_by_small_8(x), x, 13);
-        intCheckDiv("idiv_by_small_9", idiv_by_small_9(x), x, 14);
-        intCheckDiv("idiv_by_small_10", idiv_by_small_10(x), x, 15);
-        intCheckRem("irem_by_pow2_0", irem_by_pow2_0(x), x, 1);
-        intCheckRem("irem_by_pow2_1", irem_by_pow2_1(x), x, 2);
-        intCheckRem("irem_by_pow2_2", irem_by_pow2_2(x), x, 4);
-        intCheckRem("irem_by_pow2_3", irem_by_pow2_3(x), x, 8);
-        intCheckRem("irem_by_pow2_4", irem_by_pow2_4(x), x, 16);
-        intCheckRem("irem_by_pow2_5", irem_by_pow2_5(x), x, 32);
-        intCheckRem("irem_by_pow2_6", irem_by_pow2_6(x), x, 64);
-        intCheckRem("irem_by_pow2_7", irem_by_pow2_7(x), x, 128);
-        intCheckRem("irem_by_pow2_8", irem_by_pow2_8(x), x, 256);
-        intCheckRem("irem_by_pow2_9", irem_by_pow2_9(x), x, 512);
-        intCheckRem("irem_by_pow2_10", irem_by_pow2_10(x), x, 1024);
-        intCheckRem("irem_by_pow2_11", irem_by_pow2_11(x), x, 2048);
-        intCheckRem("irem_by_pow2_12", irem_by_pow2_12(x), x, 4096);
-        intCheckRem("irem_by_pow2_13", irem_by_pow2_13(x), x, 8192);
-        intCheckRem("irem_by_pow2_14", irem_by_pow2_14(x), x, 16384);
-        intCheckRem("irem_by_pow2_15", irem_by_pow2_15(x), x, 32768);
-        intCheckRem("irem_by_pow2_16", irem_by_pow2_16(x), x, 65536);
-        intCheckRem("irem_by_pow2_17", irem_by_pow2_17(x), x, 131072);
-        intCheckRem("irem_by_pow2_18", irem_by_pow2_18(x), x, 262144);
-        intCheckRem("irem_by_pow2_19", irem_by_pow2_19(x), x, 524288);
-        intCheckRem("irem_by_pow2_20", irem_by_pow2_20(x), x, 1048576);
-        intCheckRem("irem_by_pow2_21", irem_by_pow2_21(x), x, 2097152);
-        intCheckRem("irem_by_pow2_22", irem_by_pow2_22(x), x, 4194304);
-        intCheckRem("irem_by_pow2_23", irem_by_pow2_23(x), x, 8388608);
-        intCheckRem("irem_by_pow2_24", irem_by_pow2_24(x), x, 16777216);
-        intCheckRem("irem_by_pow2_25", irem_by_pow2_25(x), x, 33554432);
-        intCheckRem("irem_by_pow2_26", irem_by_pow2_26(x), x, 67108864);
-        intCheckRem("irem_by_pow2_27", irem_by_pow2_27(x), x, 134217728);
-        intCheckRem("irem_by_pow2_28", irem_by_pow2_28(x), x, 268435456);
-        intCheckRem("irem_by_pow2_29", irem_by_pow2_29(x), x, 536870912);
-        intCheckRem("irem_by_pow2_30", irem_by_pow2_30(x), x, 1073741824);
-    }
-
-    public static void longCheckAll(long x) {
-        longCheckDiv("ldiv_by_pow2_0", ldiv_by_pow2_0(x), x, 1l);
-        longCheckDiv("ldiv_by_pow2_1", ldiv_by_pow2_1(x), x, 2l);
-        longCheckDiv("ldiv_by_pow2_2", ldiv_by_pow2_2(x), x, 4l);
-        longCheckDiv("ldiv_by_pow2_3", ldiv_by_pow2_3(x), x, 8l);
-        longCheckDiv("ldiv_by_pow2_4", ldiv_by_pow2_4(x), x, 16l);
-        longCheckDiv("ldiv_by_pow2_5", ldiv_by_pow2_5(x), x, 32l);
-        longCheckDiv("ldiv_by_pow2_6", ldiv_by_pow2_6(x), x, 64l);
-        longCheckDiv("ldiv_by_pow2_7", ldiv_by_pow2_7(x), x, 128l);
-        longCheckDiv("ldiv_by_pow2_8", ldiv_by_pow2_8(x), x, 256l);
-        longCheckDiv("ldiv_by_pow2_9", ldiv_by_pow2_9(x), x, 512l);
-        longCheckDiv("ldiv_by_pow2_10", ldiv_by_pow2_10(x), x, 1024l);
-        longCheckDiv("ldiv_by_pow2_11", ldiv_by_pow2_11(x), x, 2048l);
-        longCheckDiv("ldiv_by_pow2_12", ldiv_by_pow2_12(x), x, 4096l);
-        longCheckDiv("ldiv_by_pow2_13", ldiv_by_pow2_13(x), x, 8192l);
-        longCheckDiv("ldiv_by_pow2_14", ldiv_by_pow2_14(x), x, 16384l);
-        longCheckDiv("ldiv_by_pow2_15", ldiv_by_pow2_15(x), x, 32768l);
-        longCheckDiv("ldiv_by_pow2_16", ldiv_by_pow2_16(x), x, 65536l);
-        longCheckDiv("ldiv_by_pow2_17", ldiv_by_pow2_17(x), x, 131072l);
-        longCheckDiv("ldiv_by_pow2_18", ldiv_by_pow2_18(x), x, 262144l);
-        longCheckDiv("ldiv_by_pow2_19", ldiv_by_pow2_19(x), x, 524288l);
-        longCheckDiv("ldiv_by_pow2_20", ldiv_by_pow2_20(x), x, 1048576l);
-        longCheckDiv("ldiv_by_pow2_21", ldiv_by_pow2_21(x), x, 2097152l);
-        longCheckDiv("ldiv_by_pow2_22", ldiv_by_pow2_22(x), x, 4194304l);
-        longCheckDiv("ldiv_by_pow2_23", ldiv_by_pow2_23(x), x, 8388608l);
-        longCheckDiv("ldiv_by_pow2_24", ldiv_by_pow2_24(x), x, 16777216l);
-        longCheckDiv("ldiv_by_pow2_25", ldiv_by_pow2_25(x), x, 33554432l);
-        longCheckDiv("ldiv_by_pow2_26", ldiv_by_pow2_26(x), x, 67108864l);
-        longCheckDiv("ldiv_by_pow2_27", ldiv_by_pow2_27(x), x, 134217728l);
-        longCheckDiv("ldiv_by_pow2_28", ldiv_by_pow2_28(x), x, 268435456l);
-        longCheckDiv("ldiv_by_pow2_29", ldiv_by_pow2_29(x), x, 536870912l);
-        longCheckDiv("ldiv_by_pow2_30", ldiv_by_pow2_30(x), x, 1073741824l);
-        longCheckDiv("ldiv_by_pow2_31", ldiv_by_pow2_31(x), x, 2147483648l);
-        longCheckDiv("ldiv_by_pow2_32", ldiv_by_pow2_32(x), x, 4294967296l);
-        longCheckDiv("ldiv_by_pow2_33", ldiv_by_pow2_33(x), x, 8589934592l);
-        longCheckDiv("ldiv_by_pow2_34", ldiv_by_pow2_34(x), x, 17179869184l);
-        longCheckDiv("ldiv_by_pow2_35", ldiv_by_pow2_35(x), x, 34359738368l);
-        longCheckDiv("ldiv_by_pow2_36", ldiv_by_pow2_36(x), x, 68719476736l);
-        longCheckDiv("ldiv_by_pow2_37", ldiv_by_pow2_37(x), x, 137438953472l);
-        longCheckDiv("ldiv_by_pow2_38", ldiv_by_pow2_38(x), x, 274877906944l);
-        longCheckDiv("ldiv_by_pow2_39", ldiv_by_pow2_39(x), x, 549755813888l);
-        longCheckDiv("ldiv_by_pow2_40", ldiv_by_pow2_40(x), x, 1099511627776l);
-        longCheckDiv("ldiv_by_pow2_41", ldiv_by_pow2_41(x), x, 2199023255552l);
-        longCheckDiv("ldiv_by_pow2_42", ldiv_by_pow2_42(x), x, 4398046511104l);
-        longCheckDiv("ldiv_by_pow2_43", ldiv_by_pow2_43(x), x, 8796093022208l);
-        longCheckDiv("ldiv_by_pow2_44", ldiv_by_pow2_44(x), x, 17592186044416l);
-        longCheckDiv("ldiv_by_pow2_45", ldiv_by_pow2_45(x), x, 35184372088832l);
-        longCheckDiv("ldiv_by_pow2_46", ldiv_by_pow2_46(x), x, 70368744177664l);
-        longCheckDiv("ldiv_by_pow2_47", ldiv_by_pow2_47(x), x, 140737488355328l);
-        longCheckDiv("ldiv_by_pow2_48", ldiv_by_pow2_48(x), x, 281474976710656l);
-        longCheckDiv("ldiv_by_pow2_49", ldiv_by_pow2_49(x), x, 562949953421312l);
-        longCheckDiv("ldiv_by_pow2_50", ldiv_by_pow2_50(x), x, 1125899906842624l);
-        longCheckDiv("ldiv_by_pow2_51", ldiv_by_pow2_51(x), x, 2251799813685248l);
-        longCheckDiv("ldiv_by_pow2_52", ldiv_by_pow2_52(x), x, 4503599627370496l);
-        longCheckDiv("ldiv_by_pow2_53", ldiv_by_pow2_53(x), x, 9007199254740992l);
-        longCheckDiv("ldiv_by_pow2_54", ldiv_by_pow2_54(x), x, 18014398509481984l);
-        longCheckDiv("ldiv_by_pow2_55", ldiv_by_pow2_55(x), x, 36028797018963968l);
-        longCheckDiv("ldiv_by_pow2_56", ldiv_by_pow2_56(x), x, 72057594037927936l);
-        longCheckDiv("ldiv_by_pow2_57", ldiv_by_pow2_57(x), x, 144115188075855872l);
-        longCheckDiv("ldiv_by_pow2_58", ldiv_by_pow2_58(x), x, 288230376151711744l);
-        longCheckDiv("ldiv_by_pow2_59", ldiv_by_pow2_59(x), x, 576460752303423488l);
-        longCheckDiv("ldiv_by_pow2_60", ldiv_by_pow2_60(x), x, 1152921504606846976l);
-        longCheckDiv("ldiv_by_pow2_61", ldiv_by_pow2_61(x), x, 2305843009213693952l);
-        longCheckDiv("ldiv_by_pow2_62", ldiv_by_pow2_62(x), x, 4611686018427387904l);
-        longCheckDiv("ldiv_by_small_0", ldiv_by_small_0(x), x, 3l);
-        longCheckDiv("ldiv_by_small_1", ldiv_by_small_1(x), x, 5l);
-        longCheckDiv("ldiv_by_small_2", ldiv_by_small_2(x), x, 6l);
-        longCheckDiv("ldiv_by_small_3", ldiv_by_small_3(x), x, 7l);
-        longCheckDiv("ldiv_by_small_4", ldiv_by_small_4(x), x, 9l);
-        longCheckDiv("ldiv_by_small_5", ldiv_by_small_5(x), x, 10l);
-        longCheckDiv("ldiv_by_small_6", ldiv_by_small_6(x), x, 11l);
-        longCheckDiv("ldiv_by_small_7", ldiv_by_small_7(x), x, 12l);
-        longCheckDiv("ldiv_by_small_8", ldiv_by_small_8(x), x, 13l);
-        longCheckDiv("ldiv_by_small_9", ldiv_by_small_9(x), x, 14l);
-        longCheckDiv("ldiv_by_small_10", ldiv_by_small_10(x), x, 15l);
-        longCheckRem("lrem_by_pow2_0", lrem_by_pow2_0(x), x, 1l);
-        longCheckRem("lrem_by_pow2_1", lrem_by_pow2_1(x), x, 2l);
-        longCheckRem("lrem_by_pow2_2", lrem_by_pow2_2(x), x, 4l);
-        longCheckRem("lrem_by_pow2_3", lrem_by_pow2_3(x), x, 8l);
-        longCheckRem("lrem_by_pow2_4", lrem_by_pow2_4(x), x, 16l);
-        longCheckRem("lrem_by_pow2_5", lrem_by_pow2_5(x), x, 32l);
-        longCheckRem("lrem_by_pow2_6", lrem_by_pow2_6(x), x, 64l);
-        longCheckRem("lrem_by_pow2_7", lrem_by_pow2_7(x), x, 128l);
-        longCheckRem("lrem_by_pow2_8", lrem_by_pow2_8(x), x, 256l);
-        longCheckRem("lrem_by_pow2_9", lrem_by_pow2_9(x), x, 512l);
-        longCheckRem("lrem_by_pow2_10", lrem_by_pow2_10(x), x, 1024l);
-        longCheckRem("lrem_by_pow2_11", lrem_by_pow2_11(x), x, 2048l);
-        longCheckRem("lrem_by_pow2_12", lrem_by_pow2_12(x), x, 4096l);
-        longCheckRem("lrem_by_pow2_13", lrem_by_pow2_13(x), x, 8192l);
-        longCheckRem("lrem_by_pow2_14", lrem_by_pow2_14(x), x, 16384l);
-        longCheckRem("lrem_by_pow2_15", lrem_by_pow2_15(x), x, 32768l);
-        longCheckRem("lrem_by_pow2_16", lrem_by_pow2_16(x), x, 65536l);
-        longCheckRem("lrem_by_pow2_17", lrem_by_pow2_17(x), x, 131072l);
-        longCheckRem("lrem_by_pow2_18", lrem_by_pow2_18(x), x, 262144l);
-        longCheckRem("lrem_by_pow2_19", lrem_by_pow2_19(x), x, 524288l);
-        longCheckRem("lrem_by_pow2_20", lrem_by_pow2_20(x), x, 1048576l);
-        longCheckRem("lrem_by_pow2_21", lrem_by_pow2_21(x), x, 2097152l);
-        longCheckRem("lrem_by_pow2_22", lrem_by_pow2_22(x), x, 4194304l);
-        longCheckRem("lrem_by_pow2_23", lrem_by_pow2_23(x), x, 8388608l);
-        longCheckRem("lrem_by_pow2_24", lrem_by_pow2_24(x), x, 16777216l);
-        longCheckRem("lrem_by_pow2_25", lrem_by_pow2_25(x), x, 33554432l);
-        longCheckRem("lrem_by_pow2_26", lrem_by_pow2_26(x), x, 67108864l);
-        longCheckRem("lrem_by_pow2_27", lrem_by_pow2_27(x), x, 134217728l);
-        longCheckRem("lrem_by_pow2_28", lrem_by_pow2_28(x), x, 268435456l);
-        longCheckRem("lrem_by_pow2_29", lrem_by_pow2_29(x), x, 536870912l);
-        longCheckRem("lrem_by_pow2_30", lrem_by_pow2_30(x), x, 1073741824l);
-        longCheckRem("lrem_by_pow2_31", lrem_by_pow2_31(x), x, 2147483648l);
-        longCheckRem("lrem_by_pow2_32", lrem_by_pow2_32(x), x, 4294967296l);
-        longCheckRem("lrem_by_pow2_33", lrem_by_pow2_33(x), x, 8589934592l);
-        longCheckRem("lrem_by_pow2_34", lrem_by_pow2_34(x), x, 17179869184l);
-        longCheckRem("lrem_by_pow2_35", lrem_by_pow2_35(x), x, 34359738368l);
-        longCheckRem("lrem_by_pow2_36", lrem_by_pow2_36(x), x, 68719476736l);
-        longCheckRem("lrem_by_pow2_37", lrem_by_pow2_37(x), x, 137438953472l);
-        longCheckRem("lrem_by_pow2_38", lrem_by_pow2_38(x), x, 274877906944l);
-        longCheckRem("lrem_by_pow2_39", lrem_by_pow2_39(x), x, 549755813888l);
-        longCheckRem("lrem_by_pow2_40", lrem_by_pow2_40(x), x, 1099511627776l);
-        longCheckRem("lrem_by_pow2_41", lrem_by_pow2_41(x), x, 2199023255552l);
-        longCheckRem("lrem_by_pow2_42", lrem_by_pow2_42(x), x, 4398046511104l);
-        longCheckRem("lrem_by_pow2_43", lrem_by_pow2_43(x), x, 8796093022208l);
-        longCheckRem("lrem_by_pow2_44", lrem_by_pow2_44(x), x, 17592186044416l);
-        longCheckRem("lrem_by_pow2_45", lrem_by_pow2_45(x), x, 35184372088832l);
-        longCheckRem("lrem_by_pow2_46", lrem_by_pow2_46(x), x, 70368744177664l);
-        longCheckRem("lrem_by_pow2_47", lrem_by_pow2_47(x), x, 140737488355328l);
-        longCheckRem("lrem_by_pow2_48", lrem_by_pow2_48(x), x, 281474976710656l);
-        longCheckRem("lrem_by_pow2_49", lrem_by_pow2_49(x), x, 562949953421312l);
-        longCheckRem("lrem_by_pow2_50", lrem_by_pow2_50(x), x, 1125899906842624l);
-        longCheckRem("lrem_by_pow2_51", lrem_by_pow2_51(x), x, 2251799813685248l);
-        longCheckRem("lrem_by_pow2_52", lrem_by_pow2_52(x), x, 4503599627370496l);
-        longCheckRem("lrem_by_pow2_53", lrem_by_pow2_53(x), x, 9007199254740992l);
-        longCheckRem("lrem_by_pow2_54", lrem_by_pow2_54(x), x, 18014398509481984l);
-        longCheckRem("lrem_by_pow2_55", lrem_by_pow2_55(x), x, 36028797018963968l);
-        longCheckRem("lrem_by_pow2_56", lrem_by_pow2_56(x), x, 72057594037927936l);
-        longCheckRem("lrem_by_pow2_57", lrem_by_pow2_57(x), x, 144115188075855872l);
-        longCheckRem("lrem_by_pow2_58", lrem_by_pow2_58(x), x, 288230376151711744l);
-        longCheckRem("lrem_by_pow2_59", lrem_by_pow2_59(x), x, 576460752303423488l);
-        longCheckRem("lrem_by_pow2_60", lrem_by_pow2_60(x), x, 1152921504606846976l);
-        longCheckRem("lrem_by_pow2_61", lrem_by_pow2_61(x), x, 2305843009213693952l);
-        longCheckRem("lrem_by_pow2_62", lrem_by_pow2_62(x), x, 4611686018427387904l);
-    }
-
-    public static void main(String[] args) {
-      int i;
-      long l;
-
-      System.out.println("Begin");
-
-      System.out.println("Int: checking some equally spaced dividends...");
-      for (i = -1000; i < 1000; i += 300) {
-          intCheckAll(i);
-          intCheckAll(-i);
-      }
-
-      System.out.println("Int: checking small dividends...");
-      for (i = 1; i < 100; i += 1) {
-          intCheckAll(i);
-          intCheckAll(-i);
-      }
-
-      System.out.println("Int: checking big dividends...");
-      for (i = 0; i < 100; i += 1) {
-          intCheckAll(Integer.MAX_VALUE - i);
-          intCheckAll(Integer.MIN_VALUE + i);
-      }
-
-      System.out.println("Long: checking some equally spaced dividends...");
-      for (l = 0l; l < 1000000000000l; l += 300000000000l) {
-          longCheckAll(l);
-          longCheckAll(-l);
-      }
-
-      System.out.println("Long: checking small dividends...");
-      for (l = 1l; l < 100l; l += 1l) {
-          longCheckAll(l);
-          longCheckAll(-l);
-      }
-
-      System.out.println("Long: checking big dividends...");
-      for (l = 0l; l < 100l; l += 1l) {
-          longCheckAll(Long.MAX_VALUE - l);
-          longCheckAll(Long.MIN_VALUE + l);
-      }
-
-      System.out.println("End");
-    }
-}
diff --git a/test/702-LargeBranchOffset/build b/test/702-LargeBranchOffset/build
index eacf730..20030fa 100644
--- a/test/702-LargeBranchOffset/build
+++ b/test/702-LargeBranchOffset/build
@@ -17,11 +17,7 @@
 # Stop if something fails.
 set -e
 
-# Write out a bunch of source files.
+# Write out the source file.
 cpp -P src/Main.java.in src/Main.java
 
-mkdir classes
-${JAVAC} -d classes src/*.java
-
-${DX} --debug --dex --output=classes.dex classes
-zip $TEST_NAME.jar classes.dex
+./default-build
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index c5abd46..07e7620 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -95,7 +95,7 @@
   RELOCATE_TYPES += no-relocate
 endif
 ifeq ($(ART_TEST_RUN_TEST_RELOCATE_NO_PATCHOAT),true)
-  RELOCATE_TYPES := relocate-npatchoat
+  RELOCATE_TYPES += relocate-npatchoat
 endif
 TRACE_TYPES := ntrace
 ifeq ($(ART_TEST_TRACE),true)
@@ -250,6 +250,12 @@
     $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \
     $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),130-hprof,$(ALL_ADDRESS_SIZES))
 
+# 131 is an old test. The functionality has been implemented at an earlier stage and is checked
+# in tests 138.
+ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+    $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \
+    $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),131-structural-change,$(ALL_ADDRESS_SIZES))
+
 # All these tests check that we have sane behavior if we don't have a patchoat or dex2oat.
 # Therefore we shouldn't run them in situations where we actually don't have these since they
 # explicitly test for them. These all also assume we have an image.
@@ -257,7 +263,12 @@
   116-nodex2oat \
   117-nopatchoat \
   118-noimage-dex2oat \
-  119-noimage-patchoat
+  119-noimage-patchoat \
+  138-duplicate-classes-check2
+
+# This test fails without an image.
+TEST_ART_BROKEN_NO_IMAGE_RUN_TESTS := \
+  138-duplicate-classes-check
 
 ifneq (,$(filter no-dex2oat,$(PREBUILD_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),no-dex2oat, \
@@ -270,6 +281,9 @@
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
       $(COMPILER_TYPES), $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),no-image, \
       $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_FALLBACK_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+      $(COMPILER_TYPES), $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),no-image, \
+      $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_NO_IMAGE_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 endif
 
 ifneq (,$(filter relocate-npatchoat,$(RELOCATE_TYPES)))
@@ -371,8 +385,6 @@
 
 # Known broken tests for the optimizing compiler.
 TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS :=
-TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS += 099-vmdebug # b/18098594
-TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS += 802-deoptimization # b/18547544
 
 ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -415,6 +427,16 @@
 
 TEST_ART_BROKEN_OPTIMIZING_DEBUGGABLE_RUN_TESTS :=
 
+# Tests that should fail in the read barrier configuration.
+TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS :=
+
+ifeq ($(ART_USE_READ_BARRIER),true)
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+      $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+      $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+endif
+
+TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS :=
 
 # Clear variables ahead of appending to them when defining tests.
 $(foreach target, $(TARGET_TYPES), $(eval ART_RUN_TEST_$(call name-to-var,$(target))_RULES :=))
diff --git a/test/Instrumentation/Instrumentation.java b/test/Instrumentation/Instrumentation.java
new file mode 100644
index 0000000..09d4342
--- /dev/null
+++ b/test/Instrumentation/Instrumentation.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Instrumentation {
+  // Direct method
+  private void instanceMethod() {
+    System.out.println("instanceMethod");
+  }
+}
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 8dd7573..1c44958 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -364,6 +364,7 @@
              export ANDROID_ROOT=$ANDROID_ROOT && \
              $mkdir_cmdline && \
              export LD_LIBRARY_PATH=$LD_LIBRARY_PATH && \
+             export PATH=$ANDROID_ROOT/bin:$PATH && \
              $dex2oat_cmdline && \
              $dalvikvm_cmdline"
 
diff --git a/test/run-test b/test/run-test
index 2873a35..54c6bbd 100755
--- a/test/run-test
+++ b/test/run-test
@@ -39,7 +39,7 @@
 else
   tmp_dir="${TMPDIR}/$USER/${test_dir}"
 fi
-checker="${progdir}/../tools/checker.py"
+checker="${progdir}/../tools/checker/checker.py"
 
 export JAVA="java"
 export JAVAC="javac -g"
@@ -501,14 +501,20 @@
 
 if [ '!' -r "$build" ]; then
     cp "${progdir}/etc/default-build" build
+else
+    cp "${progdir}/etc/default-build" .
 fi
 
 if [ '!' -r "$run" ]; then
     cp "${progdir}/etc/default-run" run
+else
+    cp "${progdir}/etc/default-run" .
 fi
 
 if [ '!' -r "$check_cmd" ]; then
     cp "${progdir}/etc/default-check" check
+else
+    cp "${progdir}/etc/default-check" .
 fi
 
 chmod 755 "$build"
diff --git a/tools/art b/tools/art
index 6c89a60..f167a73 100644
--- a/tools/art
+++ b/tools/art
@@ -92,8 +92,10 @@
 ANDROID_DATA=$ANDROID_DATA \
   ANDROID_ROOT=$ANDROID_ROOT \
   LD_LIBRARY_PATH=$LD_LIBRARY_PATH \
+  PATH=$ANDROID_ROOT/bin:$PATH \
   $invoke_with $ANDROID_ROOT/bin/$DALVIKVM $lib \
     -XXlib:$LIBART \
+    -Xnorelocate \
     -Ximage:$ANDROID_ROOT/framework/core.art \
     -Xcompiler-option --include-debug-symbols \
     "$@"
diff --git a/tools/checker.py b/tools/checker.py
deleted file mode 100755
index 0bce236..0000000
--- a/tools/checker.py
+++ /dev/null
@@ -1,777 +0,0 @@
-#!/usr/bin/env python2
-#
-# Copyright (C) 2014 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Checker is a testing tool which compiles a given test file and compares the
-# state of the control-flow graph before and after each optimization pass
-# against a set of assertions specified alongside the tests.
-#
-# Tests are written in Java, turned into DEX and compiled with the Optimizing
-# compiler. "Check lines" are assertions formatted as comments of the Java file.
-# They begin with prefix 'CHECK' followed by a pattern that the engine attempts
-# to match in the compiler-generated output.
-#
-# Assertions are tested in groups which correspond to the individual compiler
-# passes. Each group of check lines therefore must start with a 'CHECK-START'
-# header which specifies the output group it should be tested against. The group
-# name must exactly match one of the groups recognized in the output (they can
-# be listed with the '--list-groups' command-line flag).
-#
-# Matching of check lines is carried out in the order of appearance in the
-# source file. There are three types of check lines:
-#  - CHECK:     Must match an output line which appears in the output group
-#               later than lines matched against any preceeding checks. Output
-#               lines must therefore match the check lines in the same order.
-#               These are referred to as "in-order" checks in the code.
-#  - CHECK-DAG: Must match an output line which appears in the output group
-#               later than lines matched against any preceeding in-order checks.
-#               In other words, the order of output lines does not matter
-#               between consecutive DAG checks.
-#  - CHECK-NOT: Must not match any output line which appears in the output group
-#               later than lines matched against any preceeding checks and
-#               earlier than lines matched against any subsequent checks.
-#               Surrounding non-negative checks (or boundaries of the group)
-#               therefore create a scope within which the assertion is verified.
-#
-# Check-line patterns are treated as plain text rather than regular expressions
-# but are whitespace agnostic.
-#
-# Actual regex patterns can be inserted enclosed in '{{' and '}}' brackets. If
-# curly brackets need to be used inside the body of the regex, they need to be
-# enclosed in round brackets. For example, the pattern '{{foo{2}}}' will parse
-# the invalid regex 'foo{2', but '{{(fo{2})}}' will match 'foo'.
-#
-# Regex patterns can be named and referenced later. A new variable is defined
-# with '[[name:regex]]' and can be referenced with '[[name]]'. Variables are
-# only valid within the scope of the defining group. Within a group they cannot
-# be redefined or used undefined.
-#
-# Example:
-#   The following assertions can be placed in a Java source file:
-#
-#   // CHECK-START: int MyClass.MyMethod() constant_folding (after)
-#   // CHECK:         [[ID:i[0-9]+]] IntConstant {{11|22}}
-#   // CHECK:                        Return [ [[ID]] ]
-#
-#   The engine will attempt to match the check lines against the output of the
-#   group named on the first line. Together they verify that the CFG after
-#   constant folding returns an integer constant with value either 11 or 22.
-#
-
-from __future__ import print_function
-import argparse
-import os
-import re
-import shutil
-import sys
-import tempfile
-
-class Logger(object):
-
-  class Level(object):
-    NoOutput, Error, Info = range(3)
-
-  class Color(object):
-    Default, Blue, Gray, Purple, Red = range(5)
-
-    @staticmethod
-    def terminalCode(color, out=sys.stdout):
-      if not out.isatty():
-        return ''
-      elif color == Logger.Color.Blue:
-        return '\033[94m'
-      elif color == Logger.Color.Gray:
-        return '\033[37m'
-      elif color == Logger.Color.Purple:
-        return '\033[95m'
-      elif color == Logger.Color.Red:
-        return '\033[91m'
-      else:
-        return '\033[0m'
-
-  Verbosity = Level.Info
-
-  @staticmethod
-  def log(text, level=Level.Info, color=Color.Default, newLine=True, out=sys.stdout):
-    if level <= Logger.Verbosity:
-      text = Logger.Color.terminalCode(color, out) + text + \
-             Logger.Color.terminalCode(Logger.Color.Default, out)
-      if newLine:
-        print(text, file=out)
-      else:
-        print(text, end="", file=out)
-      out.flush()
-
-  @staticmethod
-  def fail(msg, file=None, line=-1):
-    location = ""
-    if file:
-      location += file + ":"
-    if line > 0:
-      location += str(line) + ":"
-    if location:
-      location += " "
-
-    Logger.log(location, Logger.Level.Error, color=Logger.Color.Gray, newLine=False, out=sys.stderr)
-    Logger.log("error: ", Logger.Level.Error, color=Logger.Color.Red, newLine=False, out=sys.stderr)
-    Logger.log(msg, Logger.Level.Error, out=sys.stderr)
-    sys.exit(msg)
-
-  @staticmethod
-  def startTest(name):
-    Logger.log("TEST ", color=Logger.Color.Purple, newLine=False)
-    Logger.log(name + "... ", newLine=False)
-
-  @staticmethod
-  def testPassed():
-    Logger.log("PASS", color=Logger.Color.Blue)
-
-  @staticmethod
-  def testFailed(msg, file=None, line=-1):
-    Logger.log("FAIL", color=Logger.Color.Red)
-    Logger.fail(msg, file, line)
-
-class CommonEqualityMixin:
-  """Mixin for class equality as equality of the fields."""
-  def __eq__(self, other):
-    return (isinstance(other, self.__class__)
-           and self.__dict__ == other.__dict__)
-
-  def __ne__(self, other):
-    return not self.__eq__(other)
-
-  def __repr__(self):
-    return "<%s: %s>" % (type(self).__name__, str(self.__dict__))
-
-
-class CheckElement(CommonEqualityMixin):
-  """Single element of the check line."""
-
-  class Variant(object):
-    """Supported language constructs."""
-    Text, Pattern, VarRef, VarDef, Separator = range(5)
-
-  rStartOptional = r"("
-  rEndOptional = r")?"
-
-  rName = r"([a-zA-Z][a-zA-Z0-9]*)"
-  rRegex = r"(.+?)"
-  rPatternStartSym = r"(\{\{)"
-  rPatternEndSym = r"(\}\})"
-  rVariableStartSym = r"(\[\[)"
-  rVariableEndSym = r"(\]\])"
-  rVariableSeparator = r"(:)"
-
-  regexPattern = rPatternStartSym + rRegex + rPatternEndSym
-  regexVariable = rVariableStartSym + \
-                    rName + \
-                    (rStartOptional + rVariableSeparator + rRegex + rEndOptional) + \
-                  rVariableEndSym
-
-  def __init__(self, variant, name, pattern):
-    self.variant = variant
-    self.name = name
-    self.pattern = pattern
-
-  @staticmethod
-  def newSeparator():
-    return CheckElement(CheckElement.Variant.Separator, None, None)
-
-  @staticmethod
-  def parseText(text):
-    return CheckElement(CheckElement.Variant.Text, None, re.escape(text))
-
-  @staticmethod
-  def parsePattern(patternElem):
-    return CheckElement(CheckElement.Variant.Pattern, None, patternElem[2:-2])
-
-  @staticmethod
-  def parseVariable(varElem):
-    colonPos = varElem.find(":")
-    if colonPos == -1:
-      # Variable reference
-      name = varElem[2:-2]
-      return CheckElement(CheckElement.Variant.VarRef, name, None)
-    else:
-      # Variable definition
-      name = varElem[2:colonPos]
-      body = varElem[colonPos+1:-2]
-      return CheckElement(CheckElement.Variant.VarDef, name, body)
-
-class CheckLine(CommonEqualityMixin):
-  """Representation of a single assertion in the check file formed of one or
-     more regex elements. Matching against an output line is successful only
-     if all regex elements can be matched in the given order."""
-
-  class Variant(object):
-    """Supported types of assertions."""
-    InOrder, DAG, Not = range(3)
-
-  def __init__(self, content, variant=Variant.InOrder, fileName=None, lineNo=-1):
-    self.fileName = fileName
-    self.lineNo = lineNo
-    self.content = content.strip()
-
-    self.variant = variant
-    self.lineParts = self.__parse(self.content)
-    if not self.lineParts:
-      Logger.fail("Empty check line", self.fileName, self.lineNo)
-
-    if self.variant == CheckLine.Variant.Not:
-      for elem in self.lineParts:
-        if elem.variant == CheckElement.Variant.VarDef:
-          Logger.fail("CHECK-NOT lines cannot define variables", self.fileName, self.lineNo)
-
-  def __eq__(self, other):
-    return (isinstance(other, self.__class__) and
-            self.variant == other.variant and
-            self.lineParts == other.lineParts)
-
-  # Returns True if the given Match object was at the beginning of the line.
-  def __isMatchAtStart(self, match):
-    return (match is not None) and (match.start() == 0)
-
-  # Takes in a list of Match objects and returns the minimal start point among
-  # them. If there aren't any successful matches it returns the length of
-  # the searched string.
-  def __firstMatch(self, matches, string):
-    starts = map(lambda m: len(string) if m is None else m.start(), matches)
-    return min(starts)
-
-  # This method parses the content of a check line stripped of the initial
-  # comment symbol and the CHECK keyword.
-  def __parse(self, line):
-    lineParts = []
-    # Loop as long as there is something to parse.
-    while line:
-      # Search for the nearest occurrence of the special markers.
-      matchWhitespace = re.search(r"\s+", line)
-      matchPattern = re.search(CheckElement.regexPattern, line)
-      matchVariable = re.search(CheckElement.regexVariable, line)
-
-      # If one of the above was identified at the current position, extract them
-      # from the line, parse them and add to the list of line parts.
-      if self.__isMatchAtStart(matchWhitespace):
-        # A whitespace in the check line creates a new separator of line parts.
-        # This allows for ignored output between the previous and next parts.
-        line = line[matchWhitespace.end():]
-        lineParts.append(CheckElement.newSeparator())
-      elif self.__isMatchAtStart(matchPattern):
-        pattern = line[0:matchPattern.end()]
-        line = line[matchPattern.end():]
-        lineParts.append(CheckElement.parsePattern(pattern))
-      elif self.__isMatchAtStart(matchVariable):
-        var = line[0:matchVariable.end()]
-        line = line[matchVariable.end():]
-        lineParts.append(CheckElement.parseVariable(var))
-      else:
-        # If we're not currently looking at a special marker, this is a plain
-        # text match all the way until the first special marker (or the end
-        # of the line).
-        firstMatch = self.__firstMatch([ matchWhitespace, matchPattern, matchVariable ], line)
-        text = line[0:firstMatch]
-        line = line[firstMatch:]
-        lineParts.append(CheckElement.parseText(text))
-    return lineParts
-
-  # Returns the regex pattern to be matched in the output line. Variable
-  # references are substituted with their current values provided in the
-  # 'varState' argument.
-  # An exception is raised if a referenced variable is undefined.
-  def __generatePattern(self, linePart, varState):
-    if linePart.variant == CheckElement.Variant.VarRef:
-      try:
-        return re.escape(varState[linePart.name])
-      except KeyError:
-        Logger.testFailed("Use of undefined variable \"" + linePart.name + "\"",
-                          self.fileName, self.lineNo)
-    else:
-      return linePart.pattern
-
-  def __isSeparated(self, outputLine, matchStart):
-    return (matchStart == 0) or (outputLine[matchStart - 1:matchStart].isspace())
-
-  # Attempts to match the check line against a line from the output file with
-  # the given initial variable values. It returns the new variable state if
-  # successful and None otherwise.
-  def match(self, outputLine, initialVarState):
-    # Do the full matching on a shadow copy of the variable state. If the
-    # matching fails half-way, we will not need to revert the state.
-    varState = dict(initialVarState)
-
-    matchStart = 0
-    isAfterSeparator = True
-
-    # Now try to parse all of the parts of the check line in the right order.
-    # Variable values are updated on-the-fly, meaning that a variable can
-    # be referenced immediately after its definition.
-    for part in self.lineParts:
-      if part.variant == CheckElement.Variant.Separator:
-        isAfterSeparator = True
-        continue
-
-      # Find the earliest match for this line part.
-      pattern = self.__generatePattern(part, varState)
-      while True:
-        match = re.search(pattern, outputLine[matchStart:])
-        if (match is None) or (not isAfterSeparator and not self.__isMatchAtStart(match)):
-          return None
-        matchEnd = matchStart + match.end()
-        matchStart += match.start()
-
-        # Check if this is a valid match if we expect a whitespace separator
-        # before the matched text. Otherwise loop and look for another match.
-        if not isAfterSeparator or self.__isSeparated(outputLine, matchStart):
-          break
-        else:
-          matchStart += 1
-
-      if part.variant == CheckElement.Variant.VarDef:
-        if part.name in varState:
-          Logger.testFailed("Multiple definitions of variable \"" + part.name + "\"",
-                            self.fileName, self.lineNo)
-        varState[part.name] = outputLine[matchStart:matchEnd]
-
-      matchStart = matchEnd
-      isAfterSeparator = False
-
-    # All parts were successfully matched. Return the new variable state.
-    return varState
-
-
-class CheckGroup(CommonEqualityMixin):
-  """Represents a named collection of check lines which are to be matched
-     against an output group of the same name."""
-
-  def __init__(self, name, lines, fileName=None, lineNo=-1):
-    self.fileName = fileName
-    self.lineNo = lineNo
-
-    if not name:
-      Logger.fail("Check group does not have a name", self.fileName, self.lineNo)
-    if not lines:
-      Logger.fail("Check group does not have a body", self.fileName, self.lineNo)
-
-    self.name = name
-    self.lines = lines
-
-  def __eq__(self, other):
-    return (isinstance(other, self.__class__) and
-            self.name == other.name and
-            self.lines == other.lines)
-
-  def __headAndTail(self, list):
-    return list[0], list[1:]
-
-  # Splits a list of check lines at index 'i' such that lines[i] is the first
-  # element whose variant is not equal to the given parameter.
-  def __splitByVariant(self, lines, variant):
-    i = 0
-    while i < len(lines) and lines[i].variant == variant:
-      i += 1
-    return lines[:i], lines[i:]
-
-  # Extracts the first sequence of check lines which are independent of each
-  # other's match location, i.e. either consecutive DAG lines or a single
-  # InOrder line. Any Not lines preceeding this sequence are also extracted.
-  def __nextIndependentChecks(self, checkLines):
-    notChecks, checkLines = self.__splitByVariant(checkLines, CheckLine.Variant.Not)
-    if not checkLines:
-      return notChecks, [], []
-
-    head, tail = self.__headAndTail(checkLines)
-    if head.variant == CheckLine.Variant.InOrder:
-      return notChecks, [head], tail
-    else:
-      assert head.variant == CheckLine.Variant.DAG
-      independentChecks, checkLines = self.__splitByVariant(checkLines, CheckLine.Variant.DAG)
-      return notChecks, independentChecks, checkLines
-
-  # If successful, returns the line number of the first output line matching the
-  # check line and the updated variable state. Otherwise returns -1 and None,
-  # respectively. The 'lineFilter' parameter can be used to supply a list of
-  # line numbers (counting from 1) which should be skipped.
-  def __findFirstMatch(self, checkLine, outputLines, startLineNo, lineFilter, varState):
-    matchLineNo = startLineNo
-    for outputLine in outputLines:
-      if matchLineNo not in lineFilter:
-        newVarState = checkLine.match(outputLine, varState)
-        if newVarState is not None:
-          return matchLineNo, newVarState
-      matchLineNo += 1
-    return -1, None
-
-  # Matches the given positive check lines against the output in order of
-  # appearance. Variable state is propagated but the scope of the search remains
-  # the same for all checks. Each output line can only be matched once.
-  # If all check lines are matched, the resulting variable state is returned
-  # together with the remaining output. The function also returns output lines
-  # which appear before either of the matched lines so they can be tested
-  # against Not checks.
-  def __matchIndependentChecks(self, checkLines, outputLines, startLineNo, varState):
-    # If no checks are provided, skip over the entire output.
-    if not checkLines:
-      return outputLines, [], startLineNo + len(outputLines), varState
-
-    # Keep track of which lines have been matched.
-    matchedLines = []
-
-    # Find first unused output line which matches each check line.
-    for checkLine in checkLines:
-      matchLineNo, varState = \
-        self.__findFirstMatch(checkLine, outputLines, startLineNo, matchedLines, varState)
-      if varState is None:
-        Logger.testFailed("Could not match check line \"" + checkLine.content + "\" " +
-                          "starting from output line " + str(startLineNo),
-                          self.fileName, checkLine.lineNo)
-      matchedLines.append(matchLineNo)
-
-    # Return new variable state and the output lines which lie outside the
-    # match locations of this independent group.
-    minMatchLineNo = min(matchedLines)
-    maxMatchLineNo = max(matchedLines)
-    preceedingLines = outputLines[:minMatchLineNo - startLineNo]
-    remainingLines = outputLines[maxMatchLineNo - startLineNo + 1:]
-    return preceedingLines, remainingLines, maxMatchLineNo + 1, varState
-
-  # Makes sure that the given check lines do not match any of the given output
-  # lines. Variable state does not change.
-  def __matchNotLines(self, checkLines, outputLines, startLineNo, varState):
-    for checkLine in checkLines:
-      assert checkLine.variant == CheckLine.Variant.Not
-      matchLineNo, matchVarState = \
-        self.__findFirstMatch(checkLine, outputLines, startLineNo, [], varState)
-      if matchVarState is not None:
-        Logger.testFailed("CHECK-NOT line \"" + checkLine.content + "\" matches output line " + \
-                          str(matchLineNo), self.fileName, checkLine.lineNo)
-
-  # Matches the check lines in this group against an output group. It is
-  # responsible for running the checks in the right order and scope, and
-  # for propagating the variable state between the check lines.
-  def match(self, outputGroup):
-    varState = {}
-    checkLines = self.lines
-    outputLines = outputGroup.body
-    startLineNo = outputGroup.lineNo
-
-    while checkLines:
-      # Extract the next sequence of location-independent checks to be matched.
-      notChecks, independentChecks, checkLines = self.__nextIndependentChecks(checkLines)
-
-      # Match the independent checks.
-      notOutput, outputLines, newStartLineNo, newVarState = \
-        self.__matchIndependentChecks(independentChecks, outputLines, startLineNo, varState)
-
-      # Run the Not checks against the output lines which lie between the last
-      # two independent groups or the bounds of the output.
-      self.__matchNotLines(notChecks, notOutput, startLineNo, varState)
-
-      # Update variable state.
-      startLineNo = newStartLineNo
-      varState = newVarState
-
-class OutputGroup(CommonEqualityMixin):
-  """Represents a named part of the test output against which a check group of
-     the same name is to be matched."""
-
-  def __init__(self, name, body, fileName=None, lineNo=-1):
-    if not name:
-      Logger.fail("Output group does not have a name", fileName, lineNo)
-    if not body:
-      Logger.fail("Output group does not have a body", fileName, lineNo)
-
-    self.name = name
-    self.body = body
-    self.lineNo = lineNo
-
-  def __eq__(self, other):
-    return (isinstance(other, self.__class__) and
-            self.name == other.name and
-            self.body == other.body)
-
-
-class FileSplitMixin(object):
-  """Mixin for representing text files which need to be split into smaller
-     chunks before being parsed."""
-
-  def _parseStream(self, stream):
-    lineNo = 0
-    allGroups = []
-    currentGroup = None
-
-    for line in stream:
-      lineNo += 1
-      line = line.strip()
-      if not line:
-        continue
-
-      # Let the child class process the line and return information about it.
-      # The _processLine method can modify the content of the line (or delete it
-      # entirely) and specify whether it starts a new group.
-      processedLine, newGroupName = self._processLine(line, lineNo)
-      if newGroupName is not None:
-        currentGroup = (newGroupName, [], lineNo)
-        allGroups.append(currentGroup)
-      if processedLine is not None:
-        if currentGroup is not None:
-          currentGroup[1].append(processedLine)
-        else:
-          self._exceptionLineOutsideGroup(line, lineNo)
-
-    # Finally, take the generated line groups and let the child class process
-    # each one before storing the final outcome.
-    return list(map(lambda group: self._processGroup(group[0], group[1], group[2]), allGroups))
-
-
-class CheckFile(FileSplitMixin):
-  """Collection of check groups extracted from the input test file."""
-
-  def __init__(self, prefix, checkStream, fileName=None):
-    self.fileName = fileName
-    self.prefix = prefix
-    self.groups = self._parseStream(checkStream)
-
-  # Attempts to parse a check line. The regex searches for a comment symbol
-  # followed by the CHECK keyword, given attribute and a colon at the very
-  # beginning of the line. Whitespaces are ignored.
-  def _extractLine(self, prefix, line):
-    rIgnoreWhitespace = r"\s*"
-    rCommentSymbols = [r"//", r"#"]
-    regexPrefix = rIgnoreWhitespace + \
-                  r"(" + r"|".join(rCommentSymbols) + r")" + \
-                  rIgnoreWhitespace + \
-                  prefix + r":"
-
-    # The 'match' function succeeds only if the pattern is matched at the
-    # beginning of the line.
-    match = re.match(regexPrefix, line)
-    if match is not None:
-      return line[match.end():].strip()
-    else:
-      return None
-
-  # This function is invoked on each line of the check file and returns a pair
-  # which instructs the parser how the line should be handled. If the line is to
-  # be included in the current check group, it is returned in the first value.
-  # If the line starts a new check group, the name of the group is returned in
-  # the second value.
-  def _processLine(self, line, lineNo):
-    # Lines beginning with 'CHECK-START' start a new check group.
-    startLine = self._extractLine(self.prefix + "-START", line)
-    if startLine is not None:
-      return None, startLine
-
-    # Lines starting only with 'CHECK' are matched in order.
-    plainLine = self._extractLine(self.prefix, line)
-    if plainLine is not None:
-      return (plainLine, CheckLine.Variant.InOrder, lineNo), None
-
-    # 'CHECK-DAG' lines are no-order assertions.
-    dagLine = self._extractLine(self.prefix + "-DAG", line)
-    if dagLine is not None:
-      return (dagLine, CheckLine.Variant.DAG, lineNo), None
-
-    # 'CHECK-NOT' lines are no-order negative assertions.
-    notLine = self._extractLine(self.prefix + "-NOT", line)
-    if notLine is not None:
-      return (notLine, CheckLine.Variant.Not, lineNo), None
-
-    # Other lines are ignored.
-    return None, None
-
-  def _exceptionLineOutsideGroup(self, line, lineNo):
-    Logger.fail("Check line not inside a group", self.fileName, lineNo)
-
-  # Constructs a check group from the parser-collected check lines.
-  def _processGroup(self, name, lines, lineNo):
-    checkLines = list(map(lambda line: CheckLine(line[0], line[1], self.fileName, line[2]), lines))
-    return CheckGroup(name, checkLines, self.fileName, lineNo)
-
-  def match(self, outputFile):
-    for checkGroup in self.groups:
-      # TODO: Currently does not handle multiple occurrences of the same group
-      # name, e.g. when a pass is run multiple times. It will always try to
-      # match a check group against the first output group of the same name.
-      outputGroup = outputFile.findGroup(checkGroup.name)
-      if outputGroup is None:
-        Logger.fail("Group \"" + checkGroup.name + "\" not found in the output",
-                    self.fileName, checkGroup.lineNo)
-      Logger.startTest(checkGroup.name)
-      checkGroup.match(outputGroup)
-      Logger.testPassed()
-
-
-class OutputFile(FileSplitMixin):
-  """Representation of the output generated by the test and split into groups
-     within which the checks are performed.
-
-     C1visualizer format is parsed with a state machine which differentiates
-     between the 'compilation' and 'cfg' blocks. The former marks the beginning
-     of a method. It is parsed for the method's name but otherwise ignored. Each
-     subsequent CFG block represents one stage of the compilation pipeline and
-     is parsed into an output group named "<method name> <pass name>".
-     """
-
-  class ParsingState:
-    OutsideBlock, InsideCompilationBlock, StartingCfgBlock, InsideCfgBlock = range(4)
-
-  def __init__(self, outputStream, fileName=None):
-    self.fileName = fileName
-
-    # Initialize the state machine
-    self.lastMethodName = None
-    self.state = OutputFile.ParsingState.OutsideBlock
-    self.groups = self._parseStream(outputStream)
-
-  # This function is invoked on each line of the output file and returns a pair
-  # which instructs the parser how the line should be handled. If the line is to
-  # be included in the current group, it is returned in the first value. If the
-  # line starts a new output group, the name of the group is returned in the
-  # second value.
-  def _processLine(self, line, lineNo):
-    if self.state == OutputFile.ParsingState.StartingCfgBlock:
-      # Previous line started a new 'cfg' block which means that this one must
-      # contain the name of the pass (this is enforced by C1visualizer).
-      if re.match("name\s+\"[^\"]+\"", line):
-        # Extract the pass name, prepend it with the name of the method and
-        # return as the beginning of a new group.
-        self.state = OutputFile.ParsingState.InsideCfgBlock
-        return (None, self.lastMethodName + " " + line.split("\"")[1])
-      else:
-        Logger.fail("Expected output group name", self.fileName, lineNo)
-
-    elif self.state == OutputFile.ParsingState.InsideCfgBlock:
-      if line == "end_cfg":
-        self.state = OutputFile.ParsingState.OutsideBlock
-        return (None, None)
-      else:
-        return (line, None)
-
-    elif self.state == OutputFile.ParsingState.InsideCompilationBlock:
-      # Search for the method's name. Format: method "<name>"
-      if re.match("method\s+\"[^\"]*\"", line):
-        methodName = line.split("\"")[1].strip()
-        if not methodName:
-          Logger.fail("Empty method name in output", self.fileName, lineNo)
-        self.lastMethodName = methodName
-      elif line == "end_compilation":
-        self.state = OutputFile.ParsingState.OutsideBlock
-      return (None, None)
-
-    else:
-      assert self.state == OutputFile.ParsingState.OutsideBlock
-      if line == "begin_cfg":
-        # The line starts a new group but we'll wait until the next line from
-        # which we can extract the name of the pass.
-        if self.lastMethodName is None:
-          Logger.fail("Expected method header", self.fileName, lineNo)
-        self.state = OutputFile.ParsingState.StartingCfgBlock
-        return (None, None)
-      elif line == "begin_compilation":
-        self.state = OutputFile.ParsingState.InsideCompilationBlock
-        return (None, None)
-      else:
-        Logger.fail("Output line not inside a group", self.fileName, lineNo)
-
-  # Constructs an output group from the parser-collected output lines.
-  def _processGroup(self, name, lines, lineNo):
-    return OutputGroup(name, lines, self.fileName, lineNo + 1)
-
-  def findGroup(self, name):
-    for group in self.groups:
-      if group.name == name:
-        return group
-    return None
-
-
-def ParseArguments():
-  parser = argparse.ArgumentParser()
-  parser.add_argument("tested_file",
-                      help="text file the checks should be verified against")
-  parser.add_argument("source_path", nargs="?",
-                      help="path to file/folder with checking annotations")
-  parser.add_argument("--check-prefix", dest="check_prefix", default="CHECK", metavar="PREFIX",
-                      help="prefix of checks in the test files (default: CHECK)")
-  parser.add_argument("--list-groups", dest="list_groups", action="store_true",
-                      help="print a list of all groups found in the tested file")
-  parser.add_argument("--dump-group", dest="dump_group", metavar="GROUP",
-                      help="print the contents of an output group")
-  parser.add_argument("-q", "--quiet", action="store_true",
-                      help="print only errors")
-  return parser.parse_args()
-
-
-def ListGroups(outputFilename):
-  outputFile = OutputFile(open(outputFilename, "r"))
-  for group in outputFile.groups:
-    Logger.log(group.name)
-
-
-def DumpGroup(outputFilename, groupName):
-  outputFile = OutputFile(open(outputFilename, "r"))
-  group = outputFile.findGroup(groupName)
-  if group:
-    lineNo = group.lineNo
-    maxLineNo = lineNo + len(group.body)
-    lenLineNo = len(str(maxLineNo)) + 2
-    for line in group.body:
-      Logger.log((str(lineNo) + ":").ljust(lenLineNo) + line)
-      lineNo += 1
-  else:
-    Logger.fail("Group \"" + groupName + "\" not found in the output")
-
-
-# Returns a list of files to scan for check annotations in the given path. Path
-# to a file is returned as a single-element list, directories are recursively
-# traversed and all '.java' files returned.
-def FindCheckFiles(path):
-  if not path:
-    Logger.fail("No source path provided")
-  elif os.path.isfile(path):
-    return [ path ]
-  elif os.path.isdir(path):
-    foundFiles = []
-    for root, dirs, files in os.walk(path):
-      for file in files:
-        if os.path.splitext(file)[1] == ".java":
-          foundFiles.append(os.path.join(root, file))
-    return foundFiles
-  else:
-    Logger.fail("Source path \"" + path + "\" not found")
-
-
-def RunChecks(checkPrefix, checkPath, outputFilename):
-  outputBaseName = os.path.basename(outputFilename)
-  outputFile = OutputFile(open(outputFilename, "r"), outputBaseName)
-
-  for checkFilename in FindCheckFiles(checkPath):
-    checkBaseName = os.path.basename(checkFilename)
-    checkFile = CheckFile(checkPrefix, open(checkFilename, "r"), checkBaseName)
-    checkFile.match(outputFile)
-
-
-if __name__ == "__main__":
-  args = ParseArguments()
-
-  if args.quiet:
-    Logger.Verbosity = Logger.Level.Error
-
-  if args.list_groups:
-    ListGroups(args.tested_file)
-  elif args.dump_group:
-    DumpGroup(args.tested_file, args.dump_group)
-  else:
-    RunChecks(args.check_prefix, args.source_path, args.tested_file)
diff --git a/tools/checker/README b/tools/checker/README
new file mode 100644
index 0000000..2763948
--- /dev/null
+++ b/tools/checker/README
@@ -0,0 +1,54 @@
+Checker is a testing tool which compiles a given test file and compares the
+state of the control-flow graph before and after each optimization pass
+against a set of assertions specified alongside the tests.
+
+Tests are written in Java, turned into DEX and compiled with the Optimizing
+compiler. "Check lines" are assertions formatted as comments of the Java file.
+They begin with prefix 'CHECK' followed by a pattern that the engine attempts
+to match in the compiler-generated output.
+
+Assertions are tested in groups which correspond to the individual compiler
+passes. Each group of check lines therefore must start with a 'CHECK-START'
+header which specifies the output group it should be tested against. The group
+name must exactly match one of the groups recognized in the output (they can
+be listed with the '--list-passes' command-line flag).
+
+Matching of check lines is carried out in the order of appearance in the
+source file. There are three types of check lines:
+ - CHECK:     Must match an output line which appears in the output group
+              later than lines matched against any preceeding checks. Output
+              lines must therefore match the check lines in the same order.
+              These are referred to as "in-order" checks in the code.
+ - CHECK-DAG: Must match an output line which appears in the output group
+              later than lines matched against any preceeding in-order checks.
+              In other words, the order of output lines does not matter
+              between consecutive DAG checks.
+ - CHECK-NOT: Must not match any output line which appears in the output group
+              later than lines matched against any preceeding checks and
+              earlier than lines matched against any subsequent checks.
+              Surrounding non-negative checks (or boundaries of the group)
+              therefore create a scope within which the assertion is verified.
+
+Check-line patterns are treated as plain text rather than regular expressions
+but are whitespace agnostic.
+
+Actual regex patterns can be inserted enclosed in '{{' and '}}' brackets. If
+curly brackets need to be used inside the body of the regex, they need to be
+enclosed in round brackets. For example, the pattern '{{foo{2}}}' will parse
+the invalid regex 'foo{2', but '{{(fo{2})}}' will match 'foo'.
+
+Regex patterns can be named and referenced later. A new variable is defined
+with '<<name:regex>>' and can be referenced with '<<name>>'. Variables are
+only valid within the scope of the defining group. Within a group they cannot
+be redefined or used undefined.
+
+Example:
+  The following assertions can be placed in a Java source file:
+
+  // CHECK-START: int MyClass.MyMethod() constant_folding (after)
+  // CHECK:         <<ID:i\d+>>  IntConstant {{11|22}}
+  // CHECK:                      Return [ <<ID>> ]
+
+  The engine will attempt to match the check lines against the output of the
+  group named on the first line. Together they verify that the CFG after
+  constant folding returns an integer constant with value either 11 or 22.
diff --git a/tools/checker/checker.py b/tools/checker/checker.py
new file mode 100755
index 0000000..ed630e3
--- /dev/null
+++ b/tools/checker/checker.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python2
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+
+from common.logger                    import Logger
+from file_format.c1visualizer.parser  import ParseC1visualizerStream
+from file_format.checker.parser       import ParseCheckerStream
+from match.file                       import MatchFiles
+
+def ParseArguments():
+  parser = argparse.ArgumentParser()
+  parser.add_argument("tested_file",
+                      help="text file the checks should be verified against")
+  parser.add_argument("source_path", nargs="?",
+                      help="path to file/folder with checking annotations")
+  parser.add_argument("--check-prefix", dest="check_prefix", default="CHECK", metavar="PREFIX",
+                      help="prefix of checks in the test files (default: CHECK)")
+  parser.add_argument("--list-passes", dest="list_passes", action="store_true",
+                      help="print a list of all passes found in the tested file")
+  parser.add_argument("--dump-pass", dest="dump_pass", metavar="PASS",
+                      help="print a compiler pass dump")
+  parser.add_argument("-q", "--quiet", action="store_true",
+                      help="print only errors")
+  return parser.parse_args()
+
+
+def ListPasses(outputFilename):
+  c1File = ParseC1visualizerStream(os.path.basename(outputFilename), open(outputFilename, "r"))
+  for compiler_pass in c1File.passes:
+    Logger.log(compiler_pass.name)
+
+
+def DumpPass(outputFilename, passName):
+  c1File = ParseC1visualizerStream(os.path.basename(outputFilename), open(outputFilename, "r"))
+  compiler_pass = c1File.findPass(passName)
+  if compiler_pass:
+    maxLineNo = compiler_pass.startLineNo + len(compiler_pass.body)
+    lenLineNo = len(str(maxLineNo)) + 2
+    curLineNo = compiler_pass.startLineNo
+    for line in compiler_pass.body:
+      Logger.log((str(curLineNo) + ":").ljust(lenLineNo) + line)
+      curLineNo += 1
+  else:
+    Logger.fail("Pass \"" + passName + "\" not found in the output")
+
+
+def FindCheckerFiles(path):
+  """ Returns a list of files to scan for check annotations in the given path.
+      Path to a file is returned as a single-element list, directories are
+      recursively traversed and all '.java' files returned.
+  """
+  if not path:
+    Logger.fail("No source path provided")
+  elif os.path.isfile(path):
+    return [ path ]
+  elif os.path.isdir(path):
+    foundFiles = []
+    for root, dirs, files in os.walk(path):
+      for file in files:
+        extension = os.path.splitext(file)[1]
+        if extension in [".java", ".smali"]:
+          foundFiles.append(os.path.join(root, file))
+    return foundFiles
+  else:
+    Logger.fail("Source path \"" + path + "\" not found")
+
+
+def RunTests(checkPrefix, checkPath, outputFilename):
+  c1File = ParseC1visualizerStream(os.path.basename(outputFilename), open(outputFilename, "r"))
+  for checkFilename in FindCheckerFiles(checkPath):
+    checkerFile = ParseCheckerStream(os.path.basename(checkFilename),
+                                     checkPrefix,
+                                     open(checkFilename, "r"))
+    MatchFiles(checkerFile, c1File)
+
+
+if __name__ == "__main__":
+  args = ParseArguments()
+
+  if args.quiet:
+    Logger.Verbosity = Logger.Level.Error
+
+  if args.list_passes:
+    ListPasses(args.tested_file)
+  elif args.dump_pass:
+    DumpPass(args.tested_file, args.dump_pass)
+  else:
+    RunTests(args.check_prefix, args.source_path, args.tested_file)
diff --git a/tools/checker/common/__init__.py b/tools/checker/common/__init__.py
new file mode 100644
index 0000000..d0a140b
--- /dev/null
+++ b/tools/checker/common/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tools/checker/common/logger.py b/tools/checker/common/logger.py
new file mode 100644
index 0000000..28bb458
--- /dev/null
+++ b/tools/checker/common/logger.py
@@ -0,0 +1,81 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+import sys
+
+class Logger(object):
+
+  class Level(object):
+    NoOutput, Error, Info = range(3)
+
+  class Color(object):
+    Default, Blue, Gray, Purple, Red = range(5)
+
+    @staticmethod
+    def terminalCode(color, out=sys.stdout):
+      if not out.isatty():
+        return ''
+      elif color == Logger.Color.Blue:
+        return '\033[94m'
+      elif color == Logger.Color.Gray:
+        return '\033[37m'
+      elif color == Logger.Color.Purple:
+        return '\033[95m'
+      elif color == Logger.Color.Red:
+        return '\033[91m'
+      else:
+        return '\033[0m'
+
+  Verbosity = Level.Info
+
+  @staticmethod
+  def log(text, level=Level.Info, color=Color.Default, newLine=True, out=sys.stdout):
+    if level <= Logger.Verbosity:
+      text = Logger.Color.terminalCode(color, out) + text + \
+             Logger.Color.terminalCode(Logger.Color.Default, out)
+      if newLine:
+        print(text, file=out)
+      else:
+        print(text, end="", file=out)
+      out.flush()
+
+  @staticmethod
+  def fail(msg, file=None, line=-1):
+    location = ""
+    if file:
+      location += file + ":"
+    if line > 0:
+      location += str(line) + ":"
+    if location:
+      location += " "
+
+    Logger.log(location, Logger.Level.Error, color=Logger.Color.Gray, newLine=False, out=sys.stderr)
+    Logger.log("error: ", Logger.Level.Error, color=Logger.Color.Red, newLine=False, out=sys.stderr)
+    Logger.log(msg, Logger.Level.Error, out=sys.stderr)
+    sys.exit(msg)
+
+  @staticmethod
+  def startTest(name):
+    Logger.log("TEST ", color=Logger.Color.Purple, newLine=False)
+    Logger.log(name + "... ", newLine=False)
+
+  @staticmethod
+  def testPassed():
+    Logger.log("PASS", color=Logger.Color.Blue)
+
+  @staticmethod
+  def testFailed(msg, file=None, line=-1):
+    Logger.log("FAIL", color=Logger.Color.Red)
+    Logger.fail(msg, file, line)
diff --git a/tools/checker/common/mixins.py b/tools/checker/common/mixins.py
new file mode 100644
index 0000000..819de24
--- /dev/null
+++ b/tools/checker/common/mixins.py
@@ -0,0 +1,26 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+class EqualityMixin:
+  """ Object equality via equality of dictionaries. """
+
+  def __eq__(self, other):
+    return isinstance(other, self.__class__) \
+       and self.__dict__ == other.__dict__
+
+class PrintableMixin:
+  """ Prints object as name-dictionary pair. """
+
+  def __repr__(self):
+    return "<%s: %s>" % (type(self).__name__, str(self.__dict__))
diff --git a/tools/checker/common/testing.py b/tools/checker/common/testing.py
new file mode 100644
index 0000000..1299c07
--- /dev/null
+++ b/tools/checker/common/testing.py
@@ -0,0 +1,22 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+def ToUnicode(string):
+  """ Converts a string into Unicode.
+
+  This is a delegate function for the built-in `unicode`. It checks if the input
+  is not `None`, because `unicode` turns it into an actual "None" string.
+  """
+  assert string is not None
+  return unicode(string)
diff --git a/tools/checker/file_format/__init__.py b/tools/checker/file_format/__init__.py
new file mode 100644
index 0000000..d0a140b
--- /dev/null
+++ b/tools/checker/file_format/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tools/checker/file_format/c1visualizer/__init__.py b/tools/checker/file_format/c1visualizer/__init__.py
new file mode 100644
index 0000000..d0a140b
--- /dev/null
+++ b/tools/checker/file_format/c1visualizer/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tools/checker/file_format/c1visualizer/parser.py b/tools/checker/file_format/c1visualizer/parser.py
new file mode 100644
index 0000000..335a195
--- /dev/null
+++ b/tools/checker/file_format/c1visualizer/parser.py
@@ -0,0 +1,87 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from common.logger                   import Logger
+from file_format.common              import SplitStream
+from file_format.c1visualizer.struct import C1visualizerFile, C1visualizerPass
+
+import re
+
+class C1ParserState:
+  OutsideBlock, InsideCompilationBlock, StartingCfgBlock, InsideCfgBlock = range(4)
+
+  def __init__(self):
+    self.currentState = C1ParserState.OutsideBlock
+    self.lastMethodName = None
+
+def __parseC1Line(line, lineNo, state, fileName):
+  """ This function is invoked on each line of the output file and returns
+      a pair which instructs the parser how the line should be handled. If the
+      line is to be included in the current group, it is returned in the first
+      value. If the line starts a new output group, the name of the group is
+      returned in the second value.
+  """
+  if state.currentState == C1ParserState.StartingCfgBlock:
+    # Previous line started a new 'cfg' block which means that this one must
+    # contain the name of the pass (this is enforced by C1visualizer).
+    if re.match("name\s+\"[^\"]+\"", line):
+      # Extract the pass name, prepend it with the name of the method and
+      # return as the beginning of a new group.
+      state.currentState = C1ParserState.InsideCfgBlock
+      return (None, state.lastMethodName + " " + line.split("\"")[1])
+    else:
+      Logger.fail("Expected output group name", fileName, lineNo)
+
+  elif state.currentState == C1ParserState.InsideCfgBlock:
+    if line == "end_cfg":
+      state.currentState = C1ParserState.OutsideBlock
+      return (None, None)
+    else:
+      return (line, None)
+
+  elif state.currentState == C1ParserState.InsideCompilationBlock:
+    # Search for the method's name. Format: method "<name>"
+    if re.match("method\s+\"[^\"]*\"", line):
+      methodName = line.split("\"")[1].strip()
+      if not methodName:
+        Logger.fail("Empty method name in output", fileName, lineNo)
+      state.lastMethodName = methodName
+    elif line == "end_compilation":
+      state.currentState = C1ParserState.OutsideBlock
+    return (None, None)
+
+  else:
+    assert state.currentState == C1ParserState.OutsideBlock
+    if line == "begin_cfg":
+      # The line starts a new group but we'll wait until the next line from
+      # which we can extract the name of the pass.
+      if state.lastMethodName is None:
+        Logger.fail("Expected method header", fileName, lineNo)
+      state.currentState = C1ParserState.StartingCfgBlock
+      return (None, None)
+    elif line == "begin_compilation":
+      state.currentState = C1ParserState.InsideCompilationBlock
+      return (None, None)
+    else:
+      Logger.fail("C1visualizer line not inside a group", fileName, lineNo)
+
+def ParseC1visualizerStream(fileName, stream):
+  c1File = C1visualizerFile(fileName)
+  state = C1ParserState()
+  fnProcessLine = lambda line, lineNo: __parseC1Line(line, lineNo, state, fileName)
+  fnLineOutsideChunk = lambda line, lineNo: \
+      Logger.fail("C1visualizer line not inside a group", fileName, lineNo)
+  for passName, passLines, startLineNo in SplitStream(stream, fnProcessLine, fnLineOutsideChunk):
+    C1visualizerPass(c1File, passName, passLines, startLineNo + 1)
+  return c1File
diff --git a/tools/checker/file_format/c1visualizer/struct.py b/tools/checker/file_format/c1visualizer/struct.py
new file mode 100644
index 0000000..991564e
--- /dev/null
+++ b/tools/checker/file_format/c1visualizer/struct.py
@@ -0,0 +1,60 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from common.logger import Logger
+from common.mixins import PrintableMixin
+
+class C1visualizerFile(PrintableMixin):
+
+  def __init__(self, fileName):
+    self.fileName = fileName
+    self.passes = []
+
+  def addPass(self, new_pass):
+    self.passes.append(new_pass)
+
+  def findPass(self, name):
+    for entry in self.passes:
+      if entry.name == name:
+        return entry
+    return None
+
+  def __eq__(self, other):
+    return isinstance(other, self.__class__) \
+       and self.passes == other.passes
+
+
+class C1visualizerPass(PrintableMixin):
+
+  def __init__(self, parent, name, body, startLineNo):
+    self.parent = parent
+    self.name = name
+    self.body = body
+    self.startLineNo = startLineNo
+
+    if not self.name:
+      Logger.fail("C1visualizer pass does not have a name", self.fileName, self.startLineNo)
+    if not self.body:
+      Logger.fail("C1visualizer pass does not have a body", self.fileName, self.startLineNo)
+
+    self.parent.addPass(self)
+
+  @property
+  def fileName(self):
+    return self.parent.fileName
+
+  def __eq__(self, other):
+    return isinstance(other, self.__class__) \
+       and self.name == other.name \
+       and self.body == other.body
diff --git a/tools/checker/file_format/c1visualizer/test.py b/tools/checker/file_format/c1visualizer/test.py
new file mode 100644
index 0000000..812a4cf
--- /dev/null
+++ b/tools/checker/file_format/c1visualizer/test.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python2
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from common.testing                  import ToUnicode
+from file_format.c1visualizer.parser import ParseC1visualizerStream
+from file_format.c1visualizer.struct import C1visualizerFile, C1visualizerPass
+
+import io
+import unittest
+
+class C1visualizerParser_Test(unittest.TestCase):
+
+  def createFile(self, passList):
+    """ Creates an instance of CheckerFile from provided info.
+
+    Data format: [ ( <case-name>, [ ( <text>, <assert-variant> ), ... ] ), ... ]
+    """
+    c1File = C1visualizerFile("<c1_file>")
+    for passEntry in passList:
+      passName = passEntry[0]
+      passBody = passEntry[1]
+      c1Pass = C1visualizerPass(c1File, passName, passBody, 0)
+    return c1File
+
+  def assertParsesTo(self, c1Text, expectedData):
+    expectedFile = self.createFile(expectedData)
+    actualFile = ParseC1visualizerStream("<c1_file>", io.StringIO(ToUnicode(c1Text)))
+    return self.assertEqual(expectedFile, actualFile)
+
+  def test_EmptyFile(self):
+    self.assertParsesTo("", [])
+
+  def test_SingleGroup(self):
+    self.assertParsesTo(
+      """
+        begin_compilation
+          method "MyMethod"
+        end_compilation
+        begin_cfg
+          name "pass1"
+          foo
+          bar
+        end_cfg
+      """,
+      [ ( "MyMethod pass1", [ "foo", "bar" ] ) ])
+
+  def test_MultipleGroups(self):
+    self.assertParsesTo(
+      """
+        begin_compilation
+          name "xyz1"
+          method "MyMethod1"
+          date 1234
+        end_compilation
+        begin_cfg
+          name "pass1"
+          foo
+          bar
+        end_cfg
+        begin_cfg
+          name "pass2"
+          abc
+          def
+        end_cfg
+      """,
+      [ ( "MyMethod1 pass1", [ "foo", "bar" ] ),
+        ( "MyMethod1 pass2", [ "abc", "def" ] ) ])
+    self.assertParsesTo(
+      """
+        begin_compilation
+          name "xyz1"
+          method "MyMethod1"
+          date 1234
+        end_compilation
+        begin_cfg
+          name "pass1"
+          foo
+          bar
+        end_cfg
+        begin_compilation
+          name "xyz2"
+          method "MyMethod2"
+          date 5678
+        end_compilation
+        begin_cfg
+          name "pass2"
+          abc
+          def
+        end_cfg
+      """,
+      [ ( "MyMethod1 pass1", [ "foo", "bar" ] ),
+        ( "MyMethod2 pass2", [ "abc", "def" ] ) ])
diff --git a/tools/checker/file_format/checker/__init__.py b/tools/checker/file_format/checker/__init__.py
new file mode 100644
index 0000000..d0a140b
--- /dev/null
+++ b/tools/checker/file_format/checker/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tools/checker/file_format/checker/parser.py b/tools/checker/file_format/checker/parser.py
new file mode 100644
index 0000000..d7a38da
--- /dev/null
+++ b/tools/checker/file_format/checker/parser.py
@@ -0,0 +1,142 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from file_format.common         import SplitStream
+from file_format.checker.struct import CheckerFile, TestCase, TestAssertion, RegexExpression
+
+import re
+
+def __extractLine(prefix, line):
+  """ Attempts to parse a check line. The regex searches for a comment symbol
+      followed by the CHECK keyword, given attribute and a colon at the very
+      beginning of the line. Whitespaces are ignored.
+  """
+  rIgnoreWhitespace = r"\s*"
+  rCommentSymbols = [r"//", r"#"]
+  regexPrefix = rIgnoreWhitespace + \
+                r"(" + r"|".join(rCommentSymbols) + r")" + \
+                rIgnoreWhitespace + \
+                prefix + r":"
+
+  # The 'match' function succeeds only if the pattern is matched at the
+  # beginning of the line.
+  match = re.match(regexPrefix, line)
+  if match is not None:
+    return line[match.end():].strip()
+  else:
+    return None
+
+def __processLine(line, lineNo, prefix):
+  """ This function is invoked on each line of the check file and returns a pair
+      which instructs the parser how the line should be handled. If the line is
+      to be included in the current check group, it is returned in the first
+      value. If the line starts a new check group, the name of the group is
+      returned in the second value.
+  """
+  # Lines beginning with 'CHECK-START' start a new test case.
+  startLine = __extractLine(prefix + "-START", line)
+  if startLine is not None:
+    return None, startLine
+
+  # Lines starting only with 'CHECK' are matched in order.
+  plainLine = __extractLine(prefix, line)
+  if plainLine is not None:
+    return (plainLine, TestAssertion.Variant.InOrder, lineNo), None
+
+  # 'CHECK-DAG' lines are no-order assertions.
+  dagLine = __extractLine(prefix + "-DAG", line)
+  if dagLine is not None:
+    return (dagLine, TestAssertion.Variant.DAG, lineNo), None
+
+  # 'CHECK-NOT' lines are no-order negative assertions.
+  notLine = __extractLine(prefix + "-NOT", line)
+  if notLine is not None:
+    return (notLine, TestAssertion.Variant.Not, lineNo), None
+
+  # Other lines are ignored.
+  return None, None
+
+def __isMatchAtStart(match):
+  """ Tests if the given Match occurred at the beginning of the line. """
+  return (match is not None) and (match.start() == 0)
+
+def __firstMatch(matches, string):
+  """ Takes in a list of Match objects and returns the minimal start point among
+      them. If there aren't any successful matches it returns the length of
+      the searched string.
+  """
+  starts = map(lambda m: len(string) if m is None else m.start(), matches)
+  return min(starts)
+
+def ParseCheckerAssertion(parent, line, variant, lineNo):
+  """ This method parses the content of a check line stripped of the initial
+      comment symbol and the CHECK keyword.
+  """
+  assertion = TestAssertion(parent, variant, line, lineNo)
+  # Loop as long as there is something to parse.
+  while line:
+    # Search for the nearest occurrence of the special markers.
+    matchWhitespace = re.search(r"\s+", line)
+    matchPattern = re.search(RegexExpression.Regex.regexPattern, line)
+    matchVariableReference = re.search(RegexExpression.Regex.regexVariableReference, line)
+    matchVariableDefinition = re.search(RegexExpression.Regex.regexVariableDefinition, line)
+
+    # If one of the above was identified at the current position, extract them
+    # from the line, parse them and add to the list of line parts.
+    if __isMatchAtStart(matchWhitespace):
+      # A whitespace in the check line creates a new separator of line parts.
+      # This allows for ignored output between the previous and next parts.
+      line = line[matchWhitespace.end():]
+      assertion.addExpression(RegexExpression.createSeparator())
+    elif __isMatchAtStart(matchPattern):
+      pattern = line[0:matchPattern.end()]
+      pattern = pattern[2:-2]
+      line = line[matchPattern.end():]
+      assertion.addExpression(RegexExpression.createPattern(pattern))
+    elif __isMatchAtStart(matchVariableReference):
+      var = line[0:matchVariableReference.end()]
+      line = line[matchVariableReference.end():]
+      name = var[2:-2]
+      assertion.addExpression(RegexExpression.createVariableReference(name))
+    elif __isMatchAtStart(matchVariableDefinition):
+      var = line[0:matchVariableDefinition.end()]
+      line = line[matchVariableDefinition.end():]
+      colonPos = var.find(":")
+      name = var[2:colonPos]
+      body = var[colonPos+1:-2]
+      assertion.addExpression(RegexExpression.createVariableDefinition(name, body))
+    else:
+      # If we're not currently looking at a special marker, this is a plain
+      # text match all the way until the first special marker (or the end
+      # of the line).
+      firstMatch = __firstMatch([ matchWhitespace,
+                                  matchPattern,
+                                  matchVariableReference,
+                                  matchVariableDefinition ],
+                                line)
+      text = line[0:firstMatch]
+      line = line[firstMatch:]
+      assertion.addExpression(RegexExpression.createText(text))
+  return assertion
+
+def ParseCheckerStream(fileName, prefix, stream):
+  checkerFile = CheckerFile(fileName)
+  fnProcessLine = lambda line, lineNo: __processLine(line, lineNo, prefix)
+  fnLineOutsideChunk = lambda line, lineNo: \
+      Logger.fail("C1visualizer line not inside a group", fileName, lineNo)
+  for caseName, caseLines, startLineNo in SplitStream(stream, fnProcessLine, fnLineOutsideChunk):
+    testCase = TestCase(checkerFile, caseName, startLineNo)
+    for caseLine in caseLines:
+      ParseCheckerAssertion(testCase, caseLine[0], caseLine[1], caseLine[2])
+  return checkerFile
diff --git a/tools/checker/file_format/checker/struct.py b/tools/checker/file_format/checker/struct.py
new file mode 100644
index 0000000..381c92b
--- /dev/null
+++ b/tools/checker/file_format/checker/struct.py
@@ -0,0 +1,156 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from common.logger import Logger
+from common.mixins import EqualityMixin, PrintableMixin
+
+import re
+
+class CheckerFile(PrintableMixin):
+
+  def __init__(self, fileName):
+    self.fileName = fileName
+    self.testCases = []
+
+  def addTestCase(self, new_test_case):
+    self.testCases.append(new_test_case)
+
+  def __eq__(self, other):
+    return isinstance(other, self.__class__) \
+       and self.testCases == other.testCases
+
+
+class TestCase(PrintableMixin):
+
+  def __init__(self, parent, name, startLineNo):
+    assert isinstance(parent, CheckerFile)
+
+    self.parent = parent
+    self.name = name
+    self.assertions = []
+    self.startLineNo = startLineNo
+
+    if not self.name:
+      Logger.fail("Test case does not have a name", self.parent.fileName, self.startLineNo)
+
+    self.parent.addTestCase(self)
+
+  @property
+  def fileName(self):
+    return self.parent.fileName
+
+  def addAssertion(self, new_assertion):
+    self.assertions.append(new_assertion)
+
+  def __eq__(self, other):
+    return isinstance(other, self.__class__) \
+       and self.name == other.name \
+       and self.assertions == other.assertions
+
+
+class TestAssertion(PrintableMixin):
+
+  class Variant(object):
+    """Supported types of assertions."""
+    InOrder, DAG, Not = range(3)
+
+  def __init__(self, parent, variant, originalText, lineNo):
+    assert isinstance(parent, TestCase)
+
+    self.parent = parent
+    self.variant = variant
+    self.expressions = []
+    self.lineNo = lineNo
+    self.originalText = originalText
+
+    self.parent.addAssertion(self)
+
+  @property
+  def fileName(self):
+    return self.parent.fileName
+
+  def addExpression(self, new_expression):
+    assert isinstance(new_expression, RegexExpression)
+    if self.variant == TestAssertion.Variant.Not:
+      if new_expression.variant == RegexExpression.Variant.VarDef:
+        Logger.fail("CHECK-NOT lines cannot define variables", self.fileName, self.lineNo)
+    self.expressions.append(new_expression)
+
+  def toRegex(self):
+    """ Returns a regex pattern for this entire assertion. Only used in tests. """
+    regex = ""
+    for expression in self.expressions:
+      if expression.variant == RegexExpression.Variant.Separator:
+        regex = regex + ", "
+      else:
+        regex = regex + "(" + expression.pattern + ")"
+    return regex
+
+  def __eq__(self, other):
+    return isinstance(other, self.__class__) \
+       and self.variant == other.variant \
+       and self.expressions == other.expressions
+
+
+class RegexExpression(EqualityMixin, PrintableMixin):
+
+  class Variant(object):
+    """Supported language constructs."""
+    Text, Pattern, VarRef, VarDef, Separator = range(5)
+
+  class Regex(object):
+    rName = r"([a-zA-Z][a-zA-Z0-9]*)"
+    rRegex = r"(.+?)"
+    rPatternStartSym = r"(\{\{)"
+    rPatternEndSym = r"(\}\})"
+    rVariableStartSym = r"(<<)"
+    rVariableEndSym = r"(>>)"
+    rVariableSeparator = r"(:)"
+
+    regexPattern = rPatternStartSym + rRegex + rPatternEndSym
+    regexVariableReference = rVariableStartSym + rName + rVariableEndSym
+    regexVariableDefinition = rVariableStartSym + rName + rVariableSeparator + rRegex + rVariableEndSym
+
+  def __init__(self, variant, name, pattern):
+    self.variant = variant
+    self.name = name
+    self.pattern = pattern
+
+  def __eq__(self, other):
+    return isinstance(other, self.__class__) \
+       and self.variant == other.variant \
+       and self.name == other.name \
+       and self.pattern == other.pattern
+
+  @staticmethod
+  def createSeparator():
+    return RegexExpression(RegexExpression.Variant.Separator, None, None)
+
+  @staticmethod
+  def createText(text):
+    return RegexExpression(RegexExpression.Variant.Text, None, re.escape(text))
+
+  @staticmethod
+  def createPattern(pattern):
+    return RegexExpression(RegexExpression.Variant.Pattern, None, pattern)
+
+  @staticmethod
+  def createVariableReference(name):
+    assert re.match(RegexExpression.Regex.rName, name)
+    return RegexExpression(RegexExpression.Variant.VarRef, name, None)
+
+  @staticmethod
+  def createVariableDefinition(name, pattern):
+    assert re.match(RegexExpression.Regex.rName, name)
+    return RegexExpression(RegexExpression.Variant.VarDef, name, pattern)
diff --git a/tools/checker/file_format/checker/test.py b/tools/checker/file_format/checker/test.py
new file mode 100644
index 0000000..475e8c3
--- /dev/null
+++ b/tools/checker/file_format/checker/test.py
@@ -0,0 +1,238 @@
+#!/usr/bin/env python2
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from common.testing             import ToUnicode
+from file_format.checker.parser import ParseCheckerStream
+from file_format.checker.struct import CheckerFile, TestCase, TestAssertion, RegexExpression
+
+import io
+import unittest
+
+CheckerException = SystemExit
+
+class CheckerParser_PrefixTest(unittest.TestCase):
+
+  def tryParse(self, string):
+    checkerText = u"// CHECK-START: pass\n" + ToUnicode(string)
+    checkFile = ParseCheckerStream("<test-file>", "CHECK", io.StringIO(checkerText))
+    self.assertEqual(len(checkFile.testCases), 1)
+    testCase = checkFile.testCases[0]
+    return len(testCase.assertions) != 0
+
+  def test_InvalidFormat(self):
+    self.assertFalse(self.tryParse("CHECK"))
+    self.assertFalse(self.tryParse(":CHECK"))
+    self.assertFalse(self.tryParse("CHECK:"))
+    self.assertFalse(self.tryParse("//CHECK"))
+    self.assertFalse(self.tryParse("#CHECK"))
+
+    self.assertTrue(self.tryParse("//CHECK:foo"))
+    self.assertTrue(self.tryParse("#CHECK:bar"))
+
+  def test_InvalidLabel(self):
+    self.assertFalse(self.tryParse("//ACHECK:foo"))
+    self.assertFalse(self.tryParse("#ACHECK:foo"))
+
+  def test_NotFirstOnTheLine(self):
+    self.assertFalse(self.tryParse("A// CHECK: foo"))
+    self.assertFalse(self.tryParse("A # CHECK: foo"))
+    self.assertFalse(self.tryParse("// // CHECK: foo"))
+    self.assertFalse(self.tryParse("# # CHECK: foo"))
+
+  def test_WhitespaceAgnostic(self):
+    self.assertTrue(self.tryParse("  //CHECK: foo"))
+    self.assertTrue(self.tryParse("//  CHECK: foo"))
+    self.assertTrue(self.tryParse("    //CHECK: foo"))
+    self.assertTrue(self.tryParse("//    CHECK: foo"))
+
+
+class CheckerParser_RegexExpressionTest(unittest.TestCase):
+
+  def parseAssertion(self, string, variant=""):
+    checkerText = u"// CHECK-START: pass\n// CHECK" + ToUnicode(variant) + u": " + ToUnicode(string)
+    checkerFile = ParseCheckerStream("<test-file>", "CHECK", io.StringIO(checkerText))
+    self.assertEqual(len(checkerFile.testCases), 1)
+    testCase = checkerFile.testCases[0]
+    self.assertEqual(len(testCase.assertions), 1)
+    return testCase.assertions[0]
+
+  def parseExpression(self, string):
+    line = self.parseAssertion(string)
+    self.assertEqual(1, len(line.expressions))
+    return line.expressions[0]
+
+  def assertEqualsRegex(self, string, expected):
+    self.assertEqual(expected, self.parseAssertion(string).toRegex())
+
+  def assertEqualsText(self, string, text):
+    self.assertEqual(self.parseExpression(string), RegexExpression.createText(text))
+
+  def assertEqualsPattern(self, string, pattern):
+    self.assertEqual(self.parseExpression(string), RegexExpression.createPattern(pattern))
+
+  def assertEqualsVarRef(self, string, name):
+    self.assertEqual(self.parseExpression(string), RegexExpression.createVariableReference(name))
+
+  def assertEqualsVarDef(self, string, name, pattern):
+    self.assertEqual(self.parseExpression(string),
+                     RegexExpression.createVariableDefinition(name, pattern))
+
+  def assertVariantNotEqual(self, string, variant):
+    self.assertNotEqual(variant, self.parseExpression(string).variant)
+
+  # Test that individual parts of the line are recognized
+
+  def test_TextOnly(self):
+    self.assertEqualsText("foo", "foo")
+    self.assertEqualsText("  foo  ", "foo")
+    self.assertEqualsRegex("f$o^o", "(f\$o\^o)")
+
+  def test_PatternOnly(self):
+    self.assertEqualsPattern("{{a?b.c}}", "a?b.c")
+
+  def test_VarRefOnly(self):
+    self.assertEqualsVarRef("<<ABC>>", "ABC")
+
+  def test_VarDefOnly(self):
+    self.assertEqualsVarDef("<<ABC:a?b.c>>", "ABC", "a?b.c")
+
+  def test_TextWithWhitespace(self):
+    self.assertEqualsRegex("foo bar", "(foo), (bar)")
+    self.assertEqualsRegex("foo   bar", "(foo), (bar)")
+
+  def test_TextWithRegex(self):
+    self.assertEqualsRegex("foo{{abc}}bar", "(foo)(abc)(bar)")
+
+  def test_TextWithVar(self):
+    self.assertEqualsRegex("foo<<ABC:abc>>bar", "(foo)(abc)(bar)")
+
+  def test_PlainWithRegexAndWhitespaces(self):
+    self.assertEqualsRegex("foo {{abc}}bar", "(foo), (abc)(bar)")
+    self.assertEqualsRegex("foo{{abc}} bar", "(foo)(abc), (bar)")
+    self.assertEqualsRegex("foo {{abc}} bar", "(foo), (abc), (bar)")
+
+  def test_PlainWithVarAndWhitespaces(self):
+    self.assertEqualsRegex("foo <<ABC:abc>>bar", "(foo), (abc)(bar)")
+    self.assertEqualsRegex("foo<<ABC:abc>> bar", "(foo)(abc), (bar)")
+    self.assertEqualsRegex("foo <<ABC:abc>> bar", "(foo), (abc), (bar)")
+
+  def test_AllKinds(self):
+    self.assertEqualsRegex("foo <<ABC:abc>>{{def}}bar", "(foo), (abc)(def)(bar)")
+    self.assertEqualsRegex("foo<<ABC:abc>> {{def}}bar", "(foo)(abc), (def)(bar)")
+    self.assertEqualsRegex("foo <<ABC:abc>> {{def}} bar", "(foo), (abc), (def), (bar)")
+
+  # # Test that variables and patterns are parsed correctly
+
+  def test_ValidPattern(self):
+    self.assertEqualsPattern("{{abc}}", "abc")
+    self.assertEqualsPattern("{{a[b]c}}", "a[b]c")
+    self.assertEqualsPattern("{{(a{bc})}}", "(a{bc})")
+
+  def test_ValidRef(self):
+    self.assertEqualsVarRef("<<ABC>>", "ABC")
+    self.assertEqualsVarRef("<<A1BC2>>", "A1BC2")
+
+  def test_ValidDef(self):
+    self.assertEqualsVarDef("<<ABC:abc>>", "ABC", "abc")
+    self.assertEqualsVarDef("<<ABC:ab:c>>", "ABC", "ab:c")
+    self.assertEqualsVarDef("<<ABC:a[b]c>>", "ABC", "a[b]c")
+    self.assertEqualsVarDef("<<ABC:(a[bc])>>", "ABC", "(a[bc])")
+
+  def test_Empty(self):
+    self.assertVariantNotEqual("{{}}", RegexExpression.Variant.Pattern)
+    self.assertVariantNotEqual("<<>>", RegexExpression.Variant.VarRef)
+    self.assertVariantNotEqual("<<:>>", RegexExpression.Variant.VarDef)
+
+  def test_InvalidVarName(self):
+    self.assertVariantNotEqual("<<0ABC>>", RegexExpression.Variant.VarRef)
+    self.assertVariantNotEqual("<<AB=C>>", RegexExpression.Variant.VarRef)
+    self.assertVariantNotEqual("<<ABC=>>", RegexExpression.Variant.VarRef)
+    self.assertVariantNotEqual("<<0ABC:abc>>", RegexExpression.Variant.VarDef)
+    self.assertVariantNotEqual("<<AB=C:abc>>", RegexExpression.Variant.VarDef)
+    self.assertVariantNotEqual("<<ABC=:abc>>", RegexExpression.Variant.VarDef)
+
+  def test_BodyMatchNotGreedy(self):
+    self.assertEqualsRegex("{{abc}}{{def}}", "(abc)(def)")
+    self.assertEqualsRegex("<<ABC:abc>><<DEF:def>>", "(abc)(def)")
+
+  def test_NoVarDefsInNotChecks(self):
+    with self.assertRaises(CheckerException):
+      self.parseAssertion("<<ABC:abc>>", "-NOT")
+
+
+class CheckerParser_FileLayoutTest(unittest.TestCase):
+
+  # Creates an instance of CheckerFile from provided info.
+  # Data format: [ ( <case-name>, [ ( <text>, <assert-variant> ), ... ] ), ... ]
+  def createFile(self, caseList):
+    testFile = CheckerFile("<test_file>")
+    for caseEntry in caseList:
+      caseName = caseEntry[0]
+      testCase = TestCase(testFile, caseName, 0)
+      assertionList = caseEntry[1]
+      for assertionEntry in assertionList:
+        content = assertionEntry[0]
+        variant = assertionEntry[1]
+        assertion = TestAssertion(testCase, variant, content, 0)
+        assertion.addExpression(RegexExpression.createText(content))
+    return testFile
+
+  def assertParsesTo(self, checkerText, expectedData):
+    expectedFile = self.createFile(expectedData)
+    actualFile = ParseCheckerStream("<test_file>", "CHECK", io.StringIO(ToUnicode(checkerText)))
+    return self.assertEqual(expectedFile, actualFile)
+
+  def test_EmptyFile(self):
+    self.assertParsesTo("", [])
+
+  def test_SingleGroup(self):
+    self.assertParsesTo(
+      """
+        // CHECK-START: Example Group
+        // CHECK:  foo
+        // CHECK:    bar
+      """,
+      [ ( "Example Group", [ ("foo", TestAssertion.Variant.InOrder),
+                             ("bar", TestAssertion.Variant.InOrder) ] ) ])
+
+  def test_MultipleGroups(self):
+    self.assertParsesTo(
+      """
+        // CHECK-START: Example Group1
+        // CHECK: foo
+        // CHECK: bar
+        // CHECK-START: Example Group2
+        // CHECK: abc
+        // CHECK: def
+      """,
+      [ ( "Example Group1", [ ("foo", TestAssertion.Variant.InOrder),
+                              ("bar", TestAssertion.Variant.InOrder) ] ),
+        ( "Example Group2", [ ("abc", TestAssertion.Variant.InOrder),
+                              ("def", TestAssertion.Variant.InOrder) ] ) ])
+
+  def test_AssertionVariants(self):
+    self.assertParsesTo(
+      """
+        // CHECK-START: Example Group
+        // CHECK:     foo
+        // CHECK-NOT: bar
+        // CHECK-DAG: abc
+        // CHECK-DAG: def
+      """,
+      [ ( "Example Group", [ ("foo", TestAssertion.Variant.InOrder),
+                             ("bar", TestAssertion.Variant.Not),
+                             ("abc", TestAssertion.Variant.DAG),
+                             ("def", TestAssertion.Variant.DAG) ] ) ])
diff --git a/tools/checker/file_format/common.py b/tools/checker/file_format/common.py
new file mode 100644
index 0000000..f91fdeb
--- /dev/null
+++ b/tools/checker/file_format/common.py
@@ -0,0 +1,48 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+def SplitStream(stream, fnProcessLine, fnLineOutsideChunk):
+  """ Reads the given input stream and splits it into chunks based on
+      information extracted from individual lines.
+
+  Arguments:
+   - fnProcessLine: Called on each line with the text and line number. Must
+     return a pair, name of the chunk started on this line and data extracted
+     from this line (or None in both cases).
+   - fnLineOutsideChunk: Called on attempt to attach data prior to creating
+     a chunk.
+  """
+  lineNo = 0
+  allChunks = []
+  currentChunk = None
+
+  for line in stream:
+    lineNo += 1
+    line = line.strip()
+    if not line:
+      continue
+
+    # Let the child class process the line and return information about it.
+    # The _processLine method can modify the content of the line (or delete it
+    # entirely) and specify whether it starts a new group.
+    processedLine, newChunkName = fnProcessLine(line, lineNo)
+    if newChunkName is not None:
+      currentChunk = (newChunkName, [], lineNo)
+      allChunks.append(currentChunk)
+    if processedLine is not None:
+      if currentChunk is not None:
+        currentChunk[1].append(processedLine)
+      else:
+        fnLineOutsideChunk(line, lineNo)
+  return allChunks
diff --git a/tools/checker/match/__init__.py b/tools/checker/match/__init__.py
new file mode 100644
index 0000000..d0a140b
--- /dev/null
+++ b/tools/checker/match/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tools/checker/match/file.py b/tools/checker/match/file.py
new file mode 100644
index 0000000..2ed4aa7
--- /dev/null
+++ b/tools/checker/match/file.py
@@ -0,0 +1,147 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from common.logger                    import Logger
+from file_format.c1visualizer.struct  import C1visualizerFile, C1visualizerPass
+from file_format.checker.struct       import CheckerFile, TestCase, TestAssertion
+from match.line                       import MatchLines
+
+def __headAndTail(list):
+  return list[0], list[1:]
+
+def __splitByVariant(lines, variant):
+  """ Splits a list of check lines at index 'i' such that lines[i] is the first
+      element whose variant is not equal to the given parameter.
+  """
+  i = 0
+  while i < len(lines) and lines[i].variant == variant:
+    i += 1
+  return lines[:i], lines[i:]
+
+def __nextIndependentChecks(checkLines):
+  """ Extracts the first sequence of check lines which are independent of each
+      other's match location, i.e. either consecutive DAG lines or a single
+      InOrder line. Any Not lines preceeding this sequence are also extracted.
+  """
+  notChecks, checkLines = __splitByVariant(checkLines, TestAssertion.Variant.Not)
+  if not checkLines:
+    return notChecks, [], []
+
+  head, tail = __headAndTail(checkLines)
+  if head.variant == TestAssertion.Variant.InOrder:
+    return notChecks, [head], tail
+  else:
+    assert head.variant == TestAssertion.Variant.DAG
+    independentChecks, checkLines = __splitByVariant(checkLines, TestAssertion.Variant.DAG)
+    return notChecks, independentChecks, checkLines
+
+def __findFirstMatch(checkLine, outputLines, startLineNo, lineFilter, varState):
+  """ If successful, returns the line number of the first output line matching
+      the check line and the updated variable state. Otherwise returns -1 and
+      None, respectively. The 'lineFilter' parameter can be used to supply a
+      list of line numbers (counting from 1) which should be skipped.
+  """
+  matchLineNo = startLineNo
+  for outputLine in outputLines:
+    if matchLineNo not in lineFilter:
+      newVarState = MatchLines(checkLine, outputLine, varState)
+      if newVarState is not None:
+        return matchLineNo, newVarState
+    matchLineNo += 1
+  return -1, None
+
+def __matchIndependentChecks(checkLines, outputLines, startLineNo, varState):
+  """ Matches the given positive check lines against the output in order of
+      appearance. Variable state is propagated but the scope of the search
+      remains the same for all checks. Each output line can only be matched
+      once. If all check lines are matched, the resulting variable state is
+      returned together with the remaining output. The function also returns
+      output lines which appear before either of the matched lines so they can
+      be tested against Not checks.
+  """
+  # If no checks are provided, skip over the entire output.
+  if not checkLines:
+    return outputLines, [], startLineNo + len(outputLines), varState
+
+  # Keep track of which lines have been matched.
+  matchedLines = []
+
+  # Find first unused output line which matches each check line.
+  for checkLine in checkLines:
+    matchLineNo, varState = \
+      __findFirstMatch(checkLine, outputLines, startLineNo, matchedLines, varState)
+    if varState is None:
+      Logger.testFailed("Could not match check line \"" + checkLine.originalText + "\" " +
+                        "starting from output line " + str(startLineNo),
+                        checkLine.fileName, checkLine.lineNo)
+    matchedLines.append(matchLineNo)
+
+  # Return new variable state and the output lines which lie outside the
+  # match locations of this independent group.
+  minMatchLineNo = min(matchedLines)
+  maxMatchLineNo = max(matchedLines)
+  preceedingLines = outputLines[:minMatchLineNo - startLineNo]
+  remainingLines = outputLines[maxMatchLineNo - startLineNo + 1:]
+  return preceedingLines, remainingLines, maxMatchLineNo + 1, varState
+
+def __matchNotLines(checkLines, outputLines, startLineNo, varState):
+  """ Makes sure that the given check lines do not match any of the given output
+      lines. Variable state does not change.
+  """
+  for checkLine in checkLines:
+    assert checkLine.variant == TestAssertion.Variant.Not
+    matchLineNo, matchVarState = \
+      __findFirstMatch(checkLine, outputLines, startLineNo, [], varState)
+    if matchVarState is not None:
+      Logger.testFailed("CHECK-NOT line \"" + checkLine.originalText + "\" matches output line " + \
+                        str(matchLineNo), checkLine.fileName, checkLine.lineNo)
+
+def __matchGroups(checkGroup, outputGroup):
+  """ Matches the check lines in this group against an output group. It is
+      responsible for running the checks in the right order and scope, and
+      for propagating the variable state between the check lines.
+  """
+  varState = {}
+  checkLines = checkGroup.assertions
+  outputLines = outputGroup.body
+  startLineNo = outputGroup.startLineNo
+
+  while checkLines:
+    # Extract the next sequence of location-independent checks to be matched.
+    notChecks, independentChecks, checkLines = __nextIndependentChecks(checkLines)
+
+    # Match the independent checks.
+    notOutput, outputLines, newStartLineNo, newVarState = \
+      __matchIndependentChecks(independentChecks, outputLines, startLineNo, varState)
+
+    # Run the Not checks against the output lines which lie between the last
+    # two independent groups or the bounds of the output.
+    __matchNotLines(notChecks, notOutput, startLineNo, varState)
+
+    # Update variable state.
+    startLineNo = newStartLineNo
+    varState = newVarState
+
+def MatchFiles(checkerFile, c1File):
+  for testCase in checkerFile.testCases:
+    # TODO: Currently does not handle multiple occurrences of the same group
+    # name, e.g. when a pass is run multiple times. It will always try to
+    # match a check group against the first output group of the same name.
+    c1Pass = c1File.findPass(testCase.name)
+    if c1Pass is None:
+      Logger.fail("Test case \"" + testCase.name + "\" not found in the C1visualizer output",
+                  testCase.fileName, testCase.startLineNo)
+    Logger.startTest(testCase.name)
+    __matchGroups(testCase, c1Pass)
+    Logger.testPassed()
diff --git a/tools/checker/match/line.py b/tools/checker/match/line.py
new file mode 100644
index 0000000..f0253c3
--- /dev/null
+++ b/tools/checker/match/line.py
@@ -0,0 +1,89 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from common.logger              import Logger
+from file_format.checker.struct import TestAssertion, RegexExpression
+
+import re
+
+def __isMatchAtStart(match):
+  """ Tests if the given Match occurred at the beginning of the line. """
+  return (match is not None) and (match.start() == 0)
+
+def __generatePattern(checkLine, linePart, varState):
+  """ Returns the regex pattern to be matched in the output line. Variable
+      references are substituted with their current values provided in the
+      'varState' argument.
+
+  An exception is raised if a referenced variable is undefined.
+  """
+  if linePart.variant == RegexExpression.Variant.VarRef:
+    try:
+      return re.escape(varState[linePart.name])
+    except KeyError:
+      Logger.testFailed("Use of undefined variable \"" + linePart.name + "\"",
+                        checkLine.fileName, checkLine.lineNo)
+  else:
+    return linePart.pattern
+
+def __isSeparated(outputLine, matchStart):
+  return (matchStart == 0) or (outputLine[matchStart - 1:matchStart].isspace())
+
+def MatchLines(checkLine, outputLine, initialVarState):
+  """ Attempts to match the check line against a line from the output file with
+      the given initial variable values. It returns the new variable state if
+      successful and None otherwise.
+  """
+  # Do the full matching on a shadow copy of the variable state. If the
+  # matching fails half-way, we will not need to revert the state.
+  varState = dict(initialVarState)
+
+  matchStart = 0
+  isAfterSeparator = True
+
+  # Now try to parse all of the parts of the check line in the right order.
+  # Variable values are updated on-the-fly, meaning that a variable can
+  # be referenced immediately after its definition.
+  for part in checkLine.expressions:
+    if part.variant == RegexExpression.Variant.Separator:
+      isAfterSeparator = True
+      continue
+
+    # Find the earliest match for this line part.
+    pattern = __generatePattern(checkLine, part, varState)
+    while True:
+      match = re.search(pattern, outputLine[matchStart:])
+      if (match is None) or (not isAfterSeparator and not __isMatchAtStart(match)):
+        return None
+      matchEnd = matchStart + match.end()
+      matchStart += match.start()
+
+      # Check if this is a valid match if we expect a whitespace separator
+      # before the matched text. Otherwise loop and look for another match.
+      if not isAfterSeparator or __isSeparated(outputLine, matchStart):
+        break
+      else:
+        matchStart += 1
+
+    if part.variant == RegexExpression.Variant.VarDef:
+      if part.name in varState:
+        Logger.testFailed("Multiple definitions of variable \"" + part.name + "\"",
+                          checkLine.fileName, checkLine.lineNo)
+      varState[part.name] = outputLine[matchStart:matchEnd]
+
+    matchStart = matchEnd
+    isAfterSeparator = False
+
+  # All parts were successfully matched. Return the new variable state.
+  return varState
diff --git a/tools/checker/match/test.py b/tools/checker/match/test.py
new file mode 100644
index 0000000..bb3b1af
--- /dev/null
+++ b/tools/checker/match/test.py
@@ -0,0 +1,326 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from common.testing                  import ToUnicode
+from file_format.c1visualizer.parser import ParseC1visualizerStream
+from file_format.c1visualizer.struct import C1visualizerFile, C1visualizerPass
+from file_format.checker.parser      import ParseCheckerStream, ParseCheckerAssertion
+from file_format.checker.struct      import CheckerFile, TestCase, TestAssertion, RegexExpression
+from match.file                      import MatchFiles
+from match.line                      import MatchLines
+
+import io
+import unittest
+
+CheckerException = SystemExit
+
+class MatchLines_Test(unittest.TestCase):
+
+  def createTestAssertion(self, checkerString):
+    checkerFile = CheckerFile("<checker-file>")
+    testCase = TestCase(checkerFile, "TestMethod TestPass", 0)
+    return ParseCheckerAssertion(testCase, checkerString, TestAssertion.Variant.InOrder, 0)
+
+  def tryMatch(self, checkerString, c1String, varState={}):
+    return MatchLines(self.createTestAssertion(checkerString), ToUnicode(c1String), varState)
+
+  def matches(self, checkerString, c1String, varState={}):
+    return self.tryMatch(checkerString, c1String, varState) is not None
+
+  def test_TextAndWhitespace(self):
+    self.assertTrue(self.matches("foo", "foo"))
+    self.assertTrue(self.matches("foo", "  foo  "))
+    self.assertTrue(self.matches("foo", "foo bar"))
+    self.assertFalse(self.matches("foo", "XfooX"))
+    self.assertFalse(self.matches("foo", "zoo"))
+
+    self.assertTrue(self.matches("foo bar", "foo   bar"))
+    self.assertTrue(self.matches("foo bar", "abc foo bar def"))
+    self.assertTrue(self.matches("foo bar", "foo foo bar bar"))
+
+    self.assertTrue(self.matches("foo bar", "foo X bar"))
+    self.assertFalse(self.matches("foo bar", "foo Xbar"))
+
+  def test_Pattern(self):
+    self.assertTrue(self.matches("foo{{A|B}}bar", "fooAbar"))
+    self.assertTrue(self.matches("foo{{A|B}}bar", "fooBbar"))
+    self.assertFalse(self.matches("foo{{A|B}}bar", "fooCbar"))
+
+  def test_VariableReference(self):
+    self.assertTrue(self.matches("foo<<X>>bar", "foobar", {"X": ""}))
+    self.assertTrue(self.matches("foo<<X>>bar", "fooAbar", {"X": "A"}))
+    self.assertTrue(self.matches("foo<<X>>bar", "fooBbar", {"X": "B"}))
+    self.assertFalse(self.matches("foo<<X>>bar", "foobar", {"X": "A"}))
+    self.assertFalse(self.matches("foo<<X>>bar", "foo bar", {"X": "A"}))
+    with self.assertRaises(CheckerException):
+      self.assertTrue(self.matches("foo<<X>>bar", "foobar", {}))
+
+  def test_VariableDefinition(self):
+    self.assertTrue(self.matches("foo<<X:A|B>>bar", "fooAbar"))
+    self.assertTrue(self.matches("foo<<X:A|B>>bar", "fooBbar"))
+    self.assertFalse(self.matches("foo<<X:A|B>>bar", "fooCbar"))
+
+    env = self.tryMatch("foo<<X:A.*B>>bar", "fooABbar", {})
+    self.assertEqual(env, {"X": "AB"})
+    env = self.tryMatch("foo<<X:A.*B>>bar", "fooAxxBbar", {})
+    self.assertEqual(env, {"X": "AxxB"})
+
+    self.assertTrue(self.matches("foo<<X:A|B>>bar<<X>>baz", "fooAbarAbaz"))
+    self.assertTrue(self.matches("foo<<X:A|B>>bar<<X>>baz", "fooBbarBbaz"))
+    self.assertFalse(self.matches("foo<<X:A|B>>bar<<X>>baz", "fooAbarBbaz"))
+
+  def test_NoVariableRedefinition(self):
+    with self.assertRaises(CheckerException):
+      self.matches("<<X:...>><<X>><<X:...>><<X>>", "foofoobarbar")
+
+  def test_EnvNotChangedOnPartialMatch(self):
+    env = {"Y": "foo"}
+    self.assertFalse(self.matches("<<X:A>>bar", "Abaz", env))
+    self.assertFalse("X" in env.keys())
+
+  def test_VariableContentEscaped(self):
+    self.assertTrue(self.matches("<<X:..>>foo<<X>>", ".*foo.*"))
+    self.assertFalse(self.matches("<<X:..>>foo<<X>>", ".*fooAAAA"))
+
+
+class MatchFiles_Test(unittest.TestCase):
+
+  def matches(self, checkerString, c1String):
+    checkerString = \
+      """
+        // CHECK-START: MyMethod MyPass
+      """ + checkerString
+    c1String = \
+      """
+        begin_compilation
+          name "MyMethod"
+          method "MyMethod"
+          date 1234
+        end_compilation
+        begin_cfg
+          name "MyPass"
+      """ + c1String + \
+      """
+        end_cfg
+      """
+    checkerFile = ParseCheckerStream("<test-file>", "CHECK", io.StringIO(ToUnicode(checkerString)))
+    c1File = ParseC1visualizerStream("<c1-file>", io.StringIO(ToUnicode(c1String)))
+    try:
+      MatchFiles(checkerFile, c1File)
+      return True
+    except CheckerException:
+      return False
+
+  def test_Text(self):
+    self.assertTrue(self.matches( "// CHECK: foo bar", "foo bar"))
+    self.assertFalse(self.matches("// CHECK: foo bar", "abc def"))
+
+  def test_Pattern(self):
+    self.assertTrue(self.matches( "// CHECK: abc {{de.}}", "abc de#"))
+    self.assertFalse(self.matches("// CHECK: abc {{de.}}", "abc d#f"))
+
+  def test_Variables(self):
+    self.assertTrue(self.matches(
+    """
+      // CHECK: foo<<X:.>>bar
+      // CHECK: abc<<X>>def
+    """,
+    """
+      foo bar
+      abc def
+    """))
+    self.assertTrue(self.matches(
+    """
+      // CHECK: foo<<X:([0-9]+)>>bar
+      // CHECK: abc<<X>>def
+      // CHECK: ### <<X>> ###
+    """,
+    """
+      foo1234bar
+      abc1234def
+      ### 1234 ###
+    """))
+    self.assertFalse(self.matches(
+    """
+      // CHECK: foo<<X:([0-9]+)>>bar
+      // CHECK: abc<<X>>def
+    """,
+    """
+      foo1234bar
+      abc1235def
+    """))
+
+  def test_InOrderAssertions(self):
+    self.assertTrue(self.matches(
+    """
+      // CHECK: foo
+      // CHECK: bar
+    """,
+    """
+      foo
+      bar
+    """))
+    self.assertFalse(self.matches(
+    """
+      // CHECK: foo
+      // CHECK: bar
+    """,
+    """
+      bar
+      foo
+    """))
+
+  def test_DagAssertions(self):
+    self.assertTrue(self.matches(
+    """
+      // CHECK-DAG: foo
+      // CHECK-DAG: bar
+    """,
+    """
+      foo
+      bar
+    """))
+    self.assertTrue(self.matches(
+    """
+      // CHECK-DAG: foo
+      // CHECK-DAG: bar
+    """,
+    """
+      bar
+      foo
+    """))
+
+  def test_DagAssertionsScope(self):
+    self.assertTrue(self.matches(
+    """
+      // CHECK:     foo
+      // CHECK-DAG: abc
+      // CHECK-DAG: def
+      // CHECK:     bar
+    """,
+    """
+      foo
+      def
+      abc
+      bar
+    """))
+    self.assertFalse(self.matches(
+    """
+      // CHECK:     foo
+      // CHECK-DAG: abc
+      // CHECK-DAG: def
+      // CHECK:     bar
+    """,
+    """
+      foo
+      abc
+      bar
+      def
+    """))
+    self.assertFalse(self.matches(
+    """
+      // CHECK:     foo
+      // CHECK-DAG: abc
+      // CHECK-DAG: def
+      // CHECK:     bar
+    """,
+    """
+      foo
+      def
+      bar
+      abc
+    """))
+
+  def test_NotAssertions(self):
+    self.assertTrue(self.matches(
+    """
+      // CHECK-NOT: foo
+    """,
+    """
+      abc
+      def
+    """))
+    self.assertFalse(self.matches(
+    """
+      // CHECK-NOT: foo
+    """,
+    """
+      abc foo
+      def
+    """))
+    self.assertFalse(self.matches(
+    """
+      // CHECK-NOT: foo
+      // CHECK-NOT: bar
+    """,
+    """
+      abc
+      def bar
+    """))
+
+  def test_NotAssertionsScope(self):
+    self.assertTrue(self.matches(
+    """
+      // CHECK:     abc
+      // CHECK-NOT: foo
+      // CHECK:     def
+    """,
+    """
+      abc
+      def
+    """))
+    self.assertTrue(self.matches(
+    """
+      // CHECK:     abc
+      // CHECK-NOT: foo
+      // CHECK:     def
+    """,
+    """
+      abc
+      def
+      foo
+    """))
+    self.assertFalse(self.matches(
+    """
+      // CHECK:     abc
+      // CHECK-NOT: foo
+      // CHECK:     def
+    """,
+    """
+      abc
+      foo
+      def
+    """))
+
+  def test_LineOnlyMatchesOnce(self):
+    self.assertTrue(self.matches(
+    """
+      // CHECK-DAG: foo
+      // CHECK-DAG: foo
+    """,
+    """
+      foo
+      abc
+      foo
+    """))
+    self.assertFalse(self.matches(
+    """
+      // CHECK-DAG: foo
+      // CHECK-DAG: foo
+    """,
+    """
+      foo
+      abc
+      bar
+    """))
diff --git a/tools/checker/run_unit_tests.py b/tools/checker/run_unit_tests.py
new file mode 100755
index 0000000..01708db
--- /dev/null
+++ b/tools/checker/run_unit_tests.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python2
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from common.logger                 import Logger
+from file_format.c1visualizer.test import C1visualizerParser_Test
+from file_format.checker.test      import CheckerParser_PrefixTest, \
+                                          CheckerParser_RegexExpressionTest, \
+                                          CheckerParser_FileLayoutTest
+from match.test                    import MatchLines_Test, \
+                                          MatchFiles_Test
+
+import unittest
+
+if __name__ == '__main__':
+  Logger.Verbosity = Logger.Level.NoOutput
+  unittest.main(verbosity=2)
diff --git a/tools/checker_test.py b/tools/checker_test.py
deleted file mode 100755
index 667ca90..0000000
--- a/tools/checker_test.py
+++ /dev/null
@@ -1,474 +0,0 @@
-#!/usr/bin/env python2
-#
-# Copyright (C) 2014 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# This is a test file which exercises all feautres supported by the domain-
-# specific markup language implemented by Checker.
-
-import checker
-import io
-import unittest
-
-# The parent type of exception expected to be thrown by Checker during tests.
-# It must be specific enough to not cover exceptions thrown due to actual flaws
-# in Checker.
-CheckerException = SystemExit
-
-
-class TestCheckFile_PrefixExtraction(unittest.TestCase):
-  def __tryParse(self, string):
-    checkFile = checker.CheckFile(None, [])
-    return checkFile._extractLine("CHECK", string)
-
-  def test_InvalidFormat(self):
-    self.assertIsNone(self.__tryParse("CHECK"))
-    self.assertIsNone(self.__tryParse(":CHECK"))
-    self.assertIsNone(self.__tryParse("CHECK:"))
-    self.assertIsNone(self.__tryParse("//CHECK"))
-    self.assertIsNone(self.__tryParse("#CHECK"))
-
-    self.assertIsNotNone(self.__tryParse("//CHECK:foo"))
-    self.assertIsNotNone(self.__tryParse("#CHECK:bar"))
-
-  def test_InvalidLabel(self):
-    self.assertIsNone(self.__tryParse("//ACHECK:foo"))
-    self.assertIsNone(self.__tryParse("#ACHECK:foo"))
-
-  def test_NotFirstOnTheLine(self):
-    self.assertIsNone(self.__tryParse("A// CHECK: foo"))
-    self.assertIsNone(self.__tryParse("A # CHECK: foo"))
-    self.assertIsNone(self.__tryParse("// // CHECK: foo"))
-    self.assertIsNone(self.__tryParse("# # CHECK: foo"))
-
-  def test_WhitespaceAgnostic(self):
-    self.assertIsNotNone(self.__tryParse("  //CHECK: foo"))
-    self.assertIsNotNone(self.__tryParse("//  CHECK: foo"))
-    self.assertIsNotNone(self.__tryParse("    //CHECK: foo"))
-    self.assertIsNotNone(self.__tryParse("//    CHECK: foo"))
-
-
-class TestCheckLine_Parse(unittest.TestCase):
-  def __getPartPattern(self, linePart):
-    if linePart.variant == checker.CheckElement.Variant.Separator:
-      return "\s+"
-    else:
-      return linePart.pattern
-
-  def __getRegex(self, checkLine):
-    return "".join(map(lambda x: "(" + self.__getPartPattern(x) + ")", checkLine.lineParts))
-
-  def __tryParse(self, string):
-    return checker.CheckLine(string)
-
-  def __parsesTo(self, string, expected):
-    self.assertEqual(expected, self.__getRegex(self.__tryParse(string)))
-
-  def __tryParseNot(self, string):
-    return checker.CheckLine(string, checker.CheckLine.Variant.Not)
-
-  def __parsesPattern(self, string, pattern):
-    line = self.__tryParse(string)
-    self.assertEqual(1, len(line.lineParts))
-    self.assertEqual(checker.CheckElement.Variant.Pattern, line.lineParts[0].variant)
-    self.assertEqual(pattern, line.lineParts[0].pattern)
-
-  def __parsesVarRef(self, string, name):
-    line = self.__tryParse(string)
-    self.assertEqual(1, len(line.lineParts))
-    self.assertEqual(checker.CheckElement.Variant.VarRef, line.lineParts[0].variant)
-    self.assertEqual(name, line.lineParts[0].name)
-
-  def __parsesVarDef(self, string, name, body):
-    line = self.__tryParse(string)
-    self.assertEqual(1, len(line.lineParts))
-    self.assertEqual(checker.CheckElement.Variant.VarDef, line.lineParts[0].variant)
-    self.assertEqual(name, line.lineParts[0].name)
-    self.assertEqual(body, line.lineParts[0].pattern)
-
-  def __doesNotParse(self, string, partType):
-    line = self.__tryParse(string)
-    self.assertEqual(1, len(line.lineParts))
-    self.assertNotEqual(partType, line.lineParts[0].variant)
-
-  # Test that individual parts of the line are recognized
-
-  def test_TextOnly(self):
-    self.__parsesTo("foo", "(foo)")
-    self.__parsesTo("  foo  ", "(foo)")
-    self.__parsesTo("f$o^o", "(f\$o\^o)")
-
-  def test_TextWithWhitespace(self):
-    self.__parsesTo("foo bar", "(foo)(\s+)(bar)")
-    self.__parsesTo("foo   bar", "(foo)(\s+)(bar)")
-
-  def test_RegexOnly(self):
-    self.__parsesPattern("{{a?b.c}}", "a?b.c")
-
-  def test_VarRefOnly(self):
-    self.__parsesVarRef("[[ABC]]", "ABC")
-
-  def test_VarDefOnly(self):
-    self.__parsesVarDef("[[ABC:a?b.c]]", "ABC", "a?b.c")
-
-  def test_TextWithRegex(self):
-    self.__parsesTo("foo{{abc}}bar", "(foo)(abc)(bar)")
-
-  def test_TextWithVar(self):
-    self.__parsesTo("foo[[ABC:abc]]bar", "(foo)(abc)(bar)")
-
-  def test_PlainWithRegexAndWhitespaces(self):
-    self.__parsesTo("foo {{abc}}bar", "(foo)(\s+)(abc)(bar)")
-    self.__parsesTo("foo{{abc}} bar", "(foo)(abc)(\s+)(bar)")
-    self.__parsesTo("foo {{abc}} bar", "(foo)(\s+)(abc)(\s+)(bar)")
-
-  def test_PlainWithVarAndWhitespaces(self):
-    self.__parsesTo("foo [[ABC:abc]]bar", "(foo)(\s+)(abc)(bar)")
-    self.__parsesTo("foo[[ABC:abc]] bar", "(foo)(abc)(\s+)(bar)")
-    self.__parsesTo("foo [[ABC:abc]] bar", "(foo)(\s+)(abc)(\s+)(bar)")
-
-  def test_AllKinds(self):
-    self.__parsesTo("foo [[ABC:abc]]{{def}}bar", "(foo)(\s+)(abc)(def)(bar)")
-    self.__parsesTo("foo[[ABC:abc]] {{def}}bar", "(foo)(abc)(\s+)(def)(bar)")
-    self.__parsesTo("foo [[ABC:abc]] {{def}} bar", "(foo)(\s+)(abc)(\s+)(def)(\s+)(bar)")
-
-  # Test that variables and patterns are parsed correctly
-
-  def test_ValidPattern(self):
-    self.__parsesPattern("{{abc}}", "abc")
-    self.__parsesPattern("{{a[b]c}}", "a[b]c")
-    self.__parsesPattern("{{(a{bc})}}", "(a{bc})")
-
-  def test_ValidRef(self):
-    self.__parsesVarRef("[[ABC]]", "ABC")
-    self.__parsesVarRef("[[A1BC2]]", "A1BC2")
-
-  def test_ValidDef(self):
-    self.__parsesVarDef("[[ABC:abc]]", "ABC", "abc")
-    self.__parsesVarDef("[[ABC:ab:c]]", "ABC", "ab:c")
-    self.__parsesVarDef("[[ABC:a[b]c]]", "ABC", "a[b]c")
-    self.__parsesVarDef("[[ABC:(a[bc])]]", "ABC", "(a[bc])")
-
-  def test_Empty(self):
-    self.__doesNotParse("{{}}", checker.CheckElement.Variant.Pattern)
-    self.__doesNotParse("[[]]", checker.CheckElement.Variant.VarRef)
-    self.__doesNotParse("[[:]]", checker.CheckElement.Variant.VarDef)
-
-  def test_InvalidVarName(self):
-    self.__doesNotParse("[[0ABC]]", checker.CheckElement.Variant.VarRef)
-    self.__doesNotParse("[[AB=C]]", checker.CheckElement.Variant.VarRef)
-    self.__doesNotParse("[[ABC=]]", checker.CheckElement.Variant.VarRef)
-    self.__doesNotParse("[[0ABC:abc]]", checker.CheckElement.Variant.VarDef)
-    self.__doesNotParse("[[AB=C:abc]]", checker.CheckElement.Variant.VarDef)
-    self.__doesNotParse("[[ABC=:abc]]", checker.CheckElement.Variant.VarDef)
-
-  def test_BodyMatchNotGreedy(self):
-    self.__parsesTo("{{abc}}{{def}}", "(abc)(def)")
-    self.__parsesTo("[[ABC:abc]][[DEF:def]]", "(abc)(def)")
-
-  def test_NoVarDefsInNotChecks(self):
-    with self.assertRaises(CheckerException):
-      self.__tryParseNot("[[ABC:abc]]")
-
-class TestCheckLine_Match(unittest.TestCase):
-  def __matchSingle(self, checkString, outputString, varState={}):
-    checkLine = checker.CheckLine(checkString)
-    newVarState = checkLine.match(outputString, varState)
-    self.assertIsNotNone(newVarState)
-    return newVarState
-
-  def __notMatchSingle(self, checkString, outputString, varState={}):
-    checkLine = checker.CheckLine(checkString)
-    self.assertIsNone(checkLine.match(outputString, varState))
-
-  def test_TextAndWhitespace(self):
-    self.__matchSingle("foo", "foo")
-    self.__matchSingle("foo", "  foo  ")
-    self.__matchSingle("foo", "foo bar")
-    self.__notMatchSingle("foo", "XfooX")
-    self.__notMatchSingle("foo", "zoo")
-
-    self.__matchSingle("foo bar", "foo   bar")
-    self.__matchSingle("foo bar", "abc foo bar def")
-    self.__matchSingle("foo bar", "foo foo bar bar")
-
-    self.__matchSingle("foo bar", "foo X bar")
-    self.__notMatchSingle("foo bar", "foo Xbar")
-
-  def test_Pattern(self):
-    self.__matchSingle("foo{{A|B}}bar", "fooAbar")
-    self.__matchSingle("foo{{A|B}}bar", "fooBbar")
-    self.__notMatchSingle("foo{{A|B}}bar", "fooCbar")
-
-  def test_VariableReference(self):
-    self.__matchSingle("foo[[X]]bar", "foobar", {"X": ""})
-    self.__matchSingle("foo[[X]]bar", "fooAbar", {"X": "A"})
-    self.__matchSingle("foo[[X]]bar", "fooBbar", {"X": "B"})
-    self.__notMatchSingle("foo[[X]]bar", "foobar", {"X": "A"})
-    self.__notMatchSingle("foo[[X]]bar", "foo bar", {"X": "A"})
-    with self.assertRaises(CheckerException):
-      self.__matchSingle("foo[[X]]bar", "foobar", {})
-
-  def test_VariableDefinition(self):
-    self.__matchSingle("foo[[X:A|B]]bar", "fooAbar")
-    self.__matchSingle("foo[[X:A|B]]bar", "fooBbar")
-    self.__notMatchSingle("foo[[X:A|B]]bar", "fooCbar")
-
-    env = self.__matchSingle("foo[[X:A.*B]]bar", "fooABbar", {})
-    self.assertEqual(env, {"X": "AB"})
-    env = self.__matchSingle("foo[[X:A.*B]]bar", "fooAxxBbar", {})
-    self.assertEqual(env, {"X": "AxxB"})
-
-    self.__matchSingle("foo[[X:A|B]]bar[[X]]baz", "fooAbarAbaz")
-    self.__matchSingle("foo[[X:A|B]]bar[[X]]baz", "fooBbarBbaz")
-    self.__notMatchSingle("foo[[X:A|B]]bar[[X]]baz", "fooAbarBbaz")
-
-  def test_NoVariableRedefinition(self):
-    with self.assertRaises(CheckerException):
-      self.__matchSingle("[[X:...]][[X]][[X:...]][[X]]", "foofoobarbar")
-
-  def test_EnvNotChangedOnPartialMatch(self):
-    env = {"Y": "foo"}
-    self.__notMatchSingle("[[X:A]]bar", "Abaz", env)
-    self.assertFalse("X" in env.keys())
-
-  def test_VariableContentEscaped(self):
-    self.__matchSingle("[[X:..]]foo[[X]]", ".*foo.*")
-    self.__notMatchSingle("[[X:..]]foo[[X]]", ".*fooAAAA")
-
-
-CheckVariant = checker.CheckLine.Variant
-
-def prepareSingleCheck(line):
-  if isinstance(line, str):
-    return checker.CheckLine(line)
-  else:
-    return checker.CheckLine(line[0], line[1])
-
-def prepareChecks(lines):
-  if isinstance(lines, str):
-    lines = lines.splitlines()
-  return list(map(lambda line: prepareSingleCheck(line), lines))
-
-
-class TestCheckGroup_Match(unittest.TestCase):
-  def __matchMulti(self, checkLines, outputString):
-    checkGroup = checker.CheckGroup("MyGroup", prepareChecks(checkLines))
-    outputGroup = checker.OutputGroup("MyGroup", outputString.splitlines())
-    return checkGroup.match(outputGroup)
-
-  def __notMatchMulti(self, checkString, outputString):
-    with self.assertRaises(CheckerException):
-      self.__matchMulti(checkString, outputString)
-
-  def test_TextAndPattern(self):
-    self.__matchMulti("""foo bar
-                         abc {{def}}""",
-                      """foo bar
-                         abc def""");
-    self.__matchMulti("""foo bar
-                         abc {{de.}}""",
-                      """=======
-                         foo bar
-                         =======
-                         abc de#
-                         =======""");
-    self.__notMatchMulti("""//XYZ: foo bar
-                            //XYZ: abc {{def}}""",
-                         """=======
-                            foo bar
-                            =======
-                            abc de#
-                            =======""");
-
-  def test_Variables(self):
-    self.__matchMulti("""foo[[X:.]]bar
-                         abc[[X]]def""",
-                      """foo bar
-                         abc def""");
-    self.__matchMulti("""foo[[X:([0-9]+)]]bar
-                         abc[[X]]def
-                         ### [[X]] ###""",
-                      """foo1234bar
-                         abc1234def
-                         ### 1234 ###""");
-
-  def test_Ordering(self):
-    self.__matchMulti([("foo", CheckVariant.InOrder),
-                       ("bar", CheckVariant.InOrder)],
-                      """foo
-                         bar""")
-    self.__notMatchMulti([("foo", CheckVariant.InOrder),
-                          ("bar", CheckVariant.InOrder)],
-                         """bar
-                            foo""")
-    self.__matchMulti([("abc", CheckVariant.DAG),
-                       ("def", CheckVariant.DAG)],
-                      """abc
-                         def""")
-    self.__matchMulti([("abc", CheckVariant.DAG),
-                       ("def", CheckVariant.DAG)],
-                      """def
-                         abc""")
-    self.__matchMulti([("foo", CheckVariant.InOrder),
-                       ("abc", CheckVariant.DAG),
-                       ("def", CheckVariant.DAG),
-                       ("bar", CheckVariant.InOrder)],
-                      """foo
-                         def
-                         abc
-                         bar""")
-    self.__notMatchMulti([("foo", CheckVariant.InOrder),
-                          ("abc", CheckVariant.DAG),
-                          ("def", CheckVariant.DAG),
-                          ("bar", CheckVariant.InOrder)],
-                         """foo
-                            abc
-                            bar""")
-    self.__notMatchMulti([("foo", CheckVariant.InOrder),
-                          ("abc", CheckVariant.DAG),
-                          ("def", CheckVariant.DAG),
-                          ("bar", CheckVariant.InOrder)],
-                         """foo
-                            def
-                            bar""")
-
-  def test_NotAssertions(self):
-    self.__matchMulti([("foo", CheckVariant.Not)],
-                      """abc
-                         def""")
-    self.__notMatchMulti([("foo", CheckVariant.Not)],
-                         """abc foo
-                            def""")
-    self.__notMatchMulti([("foo", CheckVariant.Not),
-                          ("bar", CheckVariant.Not)],
-                         """abc
-                            def bar""")
-
-  def test_LineOnlyMatchesOnce(self):
-    self.__matchMulti([("foo", CheckVariant.DAG),
-                       ("foo", CheckVariant.DAG)],
-                       """foo
-                          foo""")
-    self.__notMatchMulti([("foo", CheckVariant.DAG),
-                          ("foo", CheckVariant.DAG)],
-                          """foo
-                             bar""")
-
-class TestOutputFile_Parse(unittest.TestCase):
-  def __parsesTo(self, string, expected):
-    if isinstance(string, str):
-      string = unicode(string)
-    outputStream = io.StringIO(string)
-    return self.assertEqual(checker.OutputFile(outputStream).groups, expected)
-
-  def test_NoInput(self):
-    self.__parsesTo(None, [])
-    self.__parsesTo("", [])
-
-  def test_SingleGroup(self):
-    self.__parsesTo("""begin_compilation
-                         method "MyMethod"
-                       end_compilation
-                       begin_cfg
-                         name "pass1"
-                         foo
-                         bar
-                       end_cfg""",
-                    [ checker.OutputGroup("MyMethod pass1", [ "foo", "bar" ]) ])
-
-  def test_MultipleGroups(self):
-    self.__parsesTo("""begin_compilation
-                         name "xyz1"
-                         method "MyMethod1"
-                         date 1234
-                       end_compilation
-                       begin_cfg
-                         name "pass1"
-                         foo
-                         bar
-                       end_cfg
-                       begin_cfg
-                         name "pass2"
-                         abc
-                         def
-                       end_cfg""",
-                    [ checker.OutputGroup("MyMethod1 pass1", [ "foo", "bar" ]),
-                      checker.OutputGroup("MyMethod1 pass2", [ "abc", "def" ]) ])
-
-    self.__parsesTo("""begin_compilation
-                         name "xyz1"
-                         method "MyMethod1"
-                         date 1234
-                       end_compilation
-                       begin_cfg
-                         name "pass1"
-                         foo
-                         bar
-                       end_cfg
-                       begin_compilation
-                         name "xyz2"
-                         method "MyMethod2"
-                         date 5678
-                       end_compilation
-                       begin_cfg
-                         name "pass2"
-                         abc
-                         def
-                       end_cfg""",
-                    [ checker.OutputGroup("MyMethod1 pass1", [ "foo", "bar" ]),
-                      checker.OutputGroup("MyMethod2 pass2", [ "abc", "def" ]) ])
-
-class TestCheckFile_Parse(unittest.TestCase):
-  def __parsesTo(self, string, expected):
-    if isinstance(string, str):
-      string = unicode(string)
-    checkStream = io.StringIO(string)
-    return self.assertEqual(checker.CheckFile("CHECK", checkStream).groups, expected)
-
-  def test_NoInput(self):
-    self.__parsesTo(None, [])
-    self.__parsesTo("", [])
-
-  def test_SingleGroup(self):
-    self.__parsesTo("""// CHECK-START: Example Group
-                       // CHECK:  foo
-                       // CHECK:    bar""",
-                    [ checker.CheckGroup("Example Group", prepareChecks([ "foo", "bar" ])) ])
-
-  def test_MultipleGroups(self):
-    self.__parsesTo("""// CHECK-START: Example Group1
-                       // CHECK: foo
-                       // CHECK: bar
-                       // CHECK-START: Example Group2
-                       // CHECK: abc
-                       // CHECK: def""",
-                    [ checker.CheckGroup("Example Group1", prepareChecks([ "foo", "bar" ])),
-                      checker.CheckGroup("Example Group2", prepareChecks([ "abc", "def" ])) ])
-
-  def test_CheckVariants(self):
-    self.__parsesTo("""// CHECK-START: Example Group
-                       // CHECK:     foo
-                       // CHECK-NOT: bar
-                       // CHECK-DAG: abc
-                       // CHECK-DAG: def""",
-                    [ checker.CheckGroup("Example Group",
-                                         prepareChecks([ ("foo", CheckVariant.InOrder),
-                                                         ("bar", CheckVariant.Not),
-                                                         ("abc", CheckVariant.DAG),
-                                                         ("def", CheckVariant.DAG) ])) ])
-
-if __name__ == '__main__':
-  checker.Logger.Verbosity = checker.Logger.Level.NoOutput
-  unittest.main()
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 2040b57..a8bc4e1 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -109,20 +109,20 @@
   bug: 19165288
 },
 {
-  description: "Bug in libcore",
-  result: EXEC_FAILED,
-  names: ["libcore.javax.crypto.ECDHKeyAgreementTest#testInit_withUnsupportedPrivateKeyType"],
-  bug: 19730263
-},
-{
-  description: "Needs to be run as root",
-  result: EXEC_FAILED,
-  modes: [host],
-  names: ["libcore.io.OsTest#test_PacketSocketAddress"]
-},
-{
   description: "Needs kernel updates on host/device",
   result: EXEC_FAILED,
   names: ["libcore.io.OsTest#test_socketPing"]
+},
+{
+  description: "Linker issues in chrooted environment",
+  modes: [device],
+  result: EXEC_FAILED,
+  names: ["org.apache.harmony.tests.java.lang.ProcessManagerTest#testEnvironment"]
+},
+{
+  description: "Crypto failures",
+  result: EXEC_FAILED,
+  names: ["libcore.javax.crypto.CipherTest#testCipher_ShortBlock_Failure",
+          "libcore.javax.crypto.CipherTest#testCipher_Success"]
 }
 ]
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index 503ec71..77e8004 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -19,22 +19,17 @@
   exit 1
 fi
 
-if [[ $ANDROID_SERIAL == HT4CTJT03670 ]] || [[ $ANDROID_SERIAL == HT49CJT00070 ]]; then
-  echo "Not running on buildbot because of failures on volantis. Investigating."
-  exit 0
-fi
-
 # Jar containing all the tests.
 test_jar=out/host/linux-x86/framework/apache-harmony-jdwp-tests-hostdex.jar
-junit_jar=out/host/linux-x86/framework/junit.jar
 
-if [ ! -f $test_jar -o ! -f $junit_jar ]; then
+if [ ! -f $test_jar ]; then
   echo "Before running, you must build jdwp tests and vogar:" \
-       "make junit apache-harmony-jdwp-tests-hostdex vogar vogar.jar"
+       "make apache-harmony-jdwp-tests-hostdex vogar vogar.jar"
   exit 1
 fi
 
 art="/data/local/tmp/system/bin/art"
+art_debugee="sh /data/local/tmp/system/bin/art"
 # We use Quick's image on target because optimizing's image is not compiled debuggable.
 image="-Ximage:/data/art-test/core.art"
 args=$@
@@ -50,6 +45,7 @@
     # Specify bash explicitly since the art script cannot, since it has to run on the device
     # with mksh.
     art="bash out/host/linux-x86/bin/art"
+    art_debugee="bash out/host/linux-x86/bin/art"
     # We force generation of a new image to avoid build-time and run-time classpath differences.
     image="-Ximage:/system/non/existent"
     # We do not need a device directory on host.
@@ -77,13 +73,12 @@
       $args \
       $device_dir \
       $image_compiler_option \
-      --timeout 600 \
+      --timeout 800 \
       --vm-arg -Djpda.settings.verbose=true \
       --vm-arg -Djpda.settings.syncPort=34016 \
       --vm-arg -Djpda.settings.transportAddress=127.0.0.1:55107 \
-      --vm-arg -Djpda.settings.debuggeeJavaPath="\"$art $image $debuggee_args\"" \
+      --vm-arg -Djpda.settings.debuggeeJavaPath="\"$art_debugee $image $debuggee_args\"" \
       --classpath $test_jar \
-      --classpath $junit_jar \
       --vm-arg -Xcompiler-option --vm-arg --compiler-backend=Optimizing \
       --vm-arg -Xcompiler-option --vm-arg --debuggable \
       org.apache.harmony.jpda.tests.share.AllTests