Merge "Enforce class definition ordering rules in v37 Dex files."
diff --git a/Android.mk b/Android.mk
index 25796a0..bb1334a 100644
--- a/Android.mk
+++ b/Android.mk
@@ -93,6 +93,7 @@
 include $(art_path)/tools/dexfuzz/Android.mk
 include $(art_path)/tools/dmtracedump/Android.mk
 include $(art_path)/sigchainlib/Android.mk
+include $(art_path)/libart_fake/Android.mk
 
 
 # ART_HOST_DEPENDENCIES depends on Android.executable.mk above for ART_HOST_EXECUTABLES
diff --git a/benchmark/jni-perf/src/JniPerfBenchmark.java b/benchmark/jni-perf/src/JniPerfBenchmark.java
index b1b21ce..1e7cc2b 100644
--- a/benchmark/jni-perf/src/JniPerfBenchmark.java
+++ b/benchmark/jni-perf/src/JniPerfBenchmark.java
@@ -14,9 +14,7 @@
  * limitations under the License.
  */
 
-import com.google.caliper.SimpleBenchmark;
-
-public class JniPerfBenchmark extends SimpleBenchmark {
+public class JniPerfBenchmark {
   private static final String MSG = "ABCDE";
 
   native void perfJniEmptyCall();
diff --git a/benchmark/jobject-benchmark/src/JObjectBenchmark.java b/benchmark/jobject-benchmark/src/JObjectBenchmark.java
index f4c059c..90a53b3 100644
--- a/benchmark/jobject-benchmark/src/JObjectBenchmark.java
+++ b/benchmark/jobject-benchmark/src/JObjectBenchmark.java
@@ -14,9 +14,7 @@
  * limitations under the License.
  */
 
-import com.google.caliper.SimpleBenchmark;
-
-public class JObjectBenchmark extends SimpleBenchmark {
+public class JObjectBenchmark {
   public JObjectBenchmark() {
     // Make sure to link methods before benchmark starts.
     System.loadLibrary("artbenchmark");
diff --git a/benchmark/scoped-primitive-array/src/ScopedPrimitiveArrayBenchmark.java b/benchmark/scoped-primitive-array/src/ScopedPrimitiveArrayBenchmark.java
index be276fe..0ad9c36 100644
--- a/benchmark/scoped-primitive-array/src/ScopedPrimitiveArrayBenchmark.java
+++ b/benchmark/scoped-primitive-array/src/ScopedPrimitiveArrayBenchmark.java
@@ -14,9 +14,7 @@
  * limitations under the License.
  */
 
-import com.google.caliper.SimpleBenchmark; 
-
-public class ScopedPrimitiveArrayBenchmark extends SimpleBenchmark {
+public class ScopedPrimitiveArrayBenchmark {
   // Measure adds the first and last element of the array by using ScopedPrimitiveArray.
   static native long measureByteArray(int reps, byte[] arr);
   static native long measureShortArray(int reps, short[] arr);
diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h
index 230cb9a..f8b7460 100644
--- a/compiler/cfi_test.h
+++ b/compiler/cfi_test.h
@@ -55,7 +55,9 @@
                     kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches);
     ReformatCfi(Objdump(false, "-W"), &lines);
     // Pretty-print assembly.
-    auto* opts = new DisassemblerOptions(false, actual_asm.data(), true);
+    const uint8_t* asm_base = actual_asm.data();
+    const uint8_t* asm_end = asm_base + actual_asm.size();
+    auto* opts = new DisassemblerOptions(false, asm_base, asm_end, true);
     std::unique_ptr<Disassembler> disasm(Disassembler::Create(isa, opts));
     std::stringstream stream;
     const uint8_t* base = actual_asm.data() + (isa == kThumb2 ? 1 : 0);
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 00ff522..be720ad 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -1028,6 +1028,9 @@
     for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
       mirror::DexCache* dex_cache =
           down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
+      if (dex_cache == nullptr) {
+        continue;
+      }
       const DexFile* dex_file = dex_cache->GetDexFile();
       if (!IsInBootImage(dex_cache)) {
         dex_cache_count += image_dex_files.find(dex_file) != image_dex_files.end() ? 1u : 0u;
@@ -1044,6 +1047,9 @@
     for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
       mirror::DexCache* dex_cache =
           down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
+      if (dex_cache == nullptr) {
+        continue;
+      }
       const DexFile* dex_file = dex_cache->GetDexFile();
       if (!IsInBootImage(dex_cache)) {
         non_image_dex_caches += image_dex_files.find(dex_file) != image_dex_files.end() ? 1u : 0u;
@@ -1055,6 +1061,9 @@
     for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
       mirror::DexCache* dex_cache =
           down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
+      if (dex_cache == nullptr) {
+        continue;
+      }
       const DexFile* dex_file = dex_cache->GetDexFile();
       if (!IsInBootImage(dex_cache) && image_dex_files.find(dex_file) != image_dex_files.end()) {
         dex_caches->Set<false>(i, dex_cache);
@@ -1213,18 +1222,17 @@
           AssignMethodOffset(&m, type, oat_index);
         }
         (any_dirty ? dirty_methods_ : clean_methods_) += num_methods;
-
-        // Assign offsets for all runtime methods in the IMT since these may hold conflict tables
-        // live.
-        if (as_klass->ShouldHaveEmbeddedImtAndVTable()) {
-          for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-            ArtMethod* imt_method = as_klass->GetEmbeddedImTableEntry(i, target_ptr_size_);
-            DCHECK(imt_method != nullptr);
-            if (imt_method->IsRuntimeMethod() &&
-                !IsInBootImage(imt_method) &&
-                !NativeRelocationAssigned(imt_method)) {
-              AssignMethodOffset(imt_method, kNativeObjectRelocationTypeRuntimeMethod, oat_index);
-            }
+      }
+      // Assign offsets for all runtime methods in the IMT since these may hold conflict tables
+      // live.
+      if (as_klass->ShouldHaveEmbeddedImtAndVTable()) {
+        for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
+          ArtMethod* imt_method = as_klass->GetEmbeddedImTableEntry(i, target_ptr_size_);
+          DCHECK(imt_method != nullptr);
+          if (imt_method->IsRuntimeMethod() &&
+              !IsInBootImage(imt_method) &&
+              !NativeRelocationAssigned(imt_method)) {
+            AssignMethodOffset(imt_method, kNativeObjectRelocationTypeRuntimeMethod, oat_index);
           }
         }
       }
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
index 371019a..3526802 100644
--- a/compiler/jni/jni_cfi_test.cc
+++ b/compiler/jni/jni_cfi_test.cc
@@ -52,7 +52,7 @@
     std::unique_ptr<ManagedRuntimeCallingConvention> mr_conv(
         ManagedRuntimeCallingConvention::Create(&arena, is_static, is_synchronized, shorty, isa));
     const int frame_size(jni_conv->FrameSize());
-    const std::vector<ManagedRegister>& callee_save_regs = jni_conv->CalleeSaveRegisters();
+    ArrayRef<const ManagedRegister> callee_save_regs = jni_conv->CalleeSaveRegisters();
 
     // Assemble the method.
     std::unique_ptr<Assembler> jni_asm(Assembler::Create(&arena, isa));
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index 9d2732a..29411f0 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -31,10 +31,6 @@
   S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15
 };
 
-static const SRegister kHFSCalleeSaveRegisters[] = {
-  S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31
-};
-
 static const DRegister kHFDArgumentRegisters[] = {
   D0, D1, D2, D3, D4, D5, D6, D7
 };
@@ -42,6 +38,57 @@
 static_assert(arraysize(kHFDArgumentRegisters) * 2 == arraysize(kHFSArgumentRegisters),
     "ks d argument registers mismatch");
 
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    ArmManagedRegister::FromCoreRegister(R5),
+    ArmManagedRegister::FromCoreRegister(R6),
+    ArmManagedRegister::FromCoreRegister(R7),
+    ArmManagedRegister::FromCoreRegister(R8),
+    ArmManagedRegister::FromCoreRegister(R10),
+    ArmManagedRegister::FromCoreRegister(R11),
+    // Hard float registers.
+    ArmManagedRegister::FromSRegister(S16),
+    ArmManagedRegister::FromSRegister(S17),
+    ArmManagedRegister::FromSRegister(S18),
+    ArmManagedRegister::FromSRegister(S19),
+    ArmManagedRegister::FromSRegister(S20),
+    ArmManagedRegister::FromSRegister(S21),
+    ArmManagedRegister::FromSRegister(S22),
+    ArmManagedRegister::FromSRegister(S23),
+    ArmManagedRegister::FromSRegister(S24),
+    ArmManagedRegister::FromSRegister(S25),
+    ArmManagedRegister::FromSRegister(S26),
+    ArmManagedRegister::FromSRegister(S27),
+    ArmManagedRegister::FromSRegister(S28),
+    ArmManagedRegister::FromSRegister(S29),
+    ArmManagedRegister::FromSRegister(S30),
+    ArmManagedRegister::FromSRegister(S31)
+};
+
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  // LR is a special callee save which is not reported by CalleeSaveRegisters().
+  uint32_t result = 1 << LR;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsArm().IsCoreRegister()) {
+      result |= (1 << r.AsArm().AsCoreRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t CalculateFpCalleeSpillMask() {
+  uint32_t result = 0;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsArm().IsSRegister()) {
+      result |= (1 << r.AsArm().AsSRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask();
+
 // Calling convention
 
 ManagedRegister ArmManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
@@ -223,32 +270,15 @@
     cur_reg++;  // bump the iterator for every argument
   }
   padding_ = padding;
-
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R5));
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R6));
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R7));
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R8));
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R10));
-  callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R11));
-
-  for (size_t i = 0; i < arraysize(kHFSCalleeSaveRegisters); ++i) {
-    callee_save_regs_.push_back(ArmManagedRegister::FromSRegister(kHFSCalleeSaveRegisters[i]));
-  }
 }
 
 uint32_t ArmJniCallingConvention::CoreSpillMask() const {
   // Compute spill mask to agree with callee saves initialized in the constructor
-  uint32_t result = 0;
-  result = 1 << R5 | 1 << R6 | 1 << R7 | 1 << R8 | 1 << R10 | 1 << R11 | 1 << LR;
-  return result;
+  return kCoreCalleeSpillMask;
 }
 
 uint32_t ArmJniCallingConvention::FpSpillMask() const {
-  uint32_t result = 0;
-  for (size_t i = 0; i < arraysize(kHFSCalleeSaveRegisters); ++i) {
-    result |= (1 << kHFSCalleeSaveRegisters[i]);
-  }
-  return result;
+  return kFpCalleeSpillMask;
 }
 
 ManagedRegister ArmJniCallingConvention::ReturnScratchRegister() const {
@@ -269,6 +299,10 @@
                  kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> ArmJniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 // JniCallingConvention ABI follows AAPCS where longs and doubles must occur
 // in even register numbers and stack slots
 void ArmJniCallingConvention::Next() {
diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h
index 35b5093..157880b 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.h
+++ b/compiler/jni/quick/arm/calling_convention_arm.h
@@ -58,9 +58,7 @@
   void Next() OVERRIDE;  // Override default behavior for AAPCS
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
   uint32_t FpSpillMask() const OVERRIDE;
@@ -78,9 +76,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   // Padding to ensure longs and doubles are not split in AAPCS
   size_t padding_;
 
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index 9aef10e..ab56c1c 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -38,10 +38,65 @@
   S0, S1, S2, S3, S4, S5, S6, S7
 };
 
-static const DRegister kDCalleeSaveRegisters[] = {
-  D8, D9, D10, D11, D12, D13, D14, D15
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    // Note: The native jni function may call to some VM runtime functions which may suspend
+    // or trigger GC. And the jni method frame will become top quick frame in those cases.
+    // So we need to satisfy GC to save LR and callee-save registers which is similar to
+    // CalleeSaveMethod(RefOnly) frame.
+    // Jni function is the native function which the java code wants to call.
+    // Jni method is the method that is compiled by jni compiler.
+    // Call chain: managed code(java) --> jni method --> jni function.
+    // Thread register(X19) is saved on stack.
+    Arm64ManagedRegister::FromXRegister(X19),
+    Arm64ManagedRegister::FromXRegister(X20),
+    Arm64ManagedRegister::FromXRegister(X21),
+    Arm64ManagedRegister::FromXRegister(X22),
+    Arm64ManagedRegister::FromXRegister(X23),
+    Arm64ManagedRegister::FromXRegister(X24),
+    Arm64ManagedRegister::FromXRegister(X25),
+    Arm64ManagedRegister::FromXRegister(X26),
+    Arm64ManagedRegister::FromXRegister(X27),
+    Arm64ManagedRegister::FromXRegister(X28),
+    Arm64ManagedRegister::FromXRegister(X29),
+    Arm64ManagedRegister::FromXRegister(LR),
+    // Hard float registers.
+    // Considering the case, java_method_1 --> jni method --> jni function --> java_method_2,
+    // we may break on java_method_2 and we still need to find out the values of DEX registers
+    // in java_method_1. So all callee-saves(in managed code) need to be saved.
+    Arm64ManagedRegister::FromDRegister(D8),
+    Arm64ManagedRegister::FromDRegister(D9),
+    Arm64ManagedRegister::FromDRegister(D10),
+    Arm64ManagedRegister::FromDRegister(D11),
+    Arm64ManagedRegister::FromDRegister(D12),
+    Arm64ManagedRegister::FromDRegister(D13),
+    Arm64ManagedRegister::FromDRegister(D14),
+    Arm64ManagedRegister::FromDRegister(D15),
 };
 
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  uint32_t result = 0u;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsArm64().IsXRegister()) {
+      result |= (1 << r.AsArm64().AsXRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t CalculateFpCalleeSpillMask() {
+  uint32_t result = 0;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsArm64().IsDRegister()) {
+      result |= (1 << r.AsArm64().AsDRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask();
+
 // Calling convention
 ManagedRegister Arm64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
   return Arm64ManagedRegister::FromXRegister(X20);  // saved on entry restored on exit
@@ -157,47 +212,14 @@
 Arm64JniCallingConvention::Arm64JniCallingConvention(bool is_static, bool is_synchronized,
                                                      const char* shorty)
     : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
-  uint32_t core_spill_mask = CoreSpillMask();
-  DCHECK_EQ(XZR, kNumberOfXRegisters - 1);  // Exclude XZR from the loop (avoid 1 << 32).
-  for (int x_reg = 0; x_reg < kNumberOfXRegisters - 1; ++x_reg) {
-    if (((1 << x_reg) & core_spill_mask) != 0) {
-      callee_save_regs_.push_back(
-          Arm64ManagedRegister::FromXRegister(static_cast<XRegister>(x_reg)));
-    }
-  }
-
-  uint32_t fp_spill_mask = FpSpillMask();
-  for (int d_reg = 0; d_reg < kNumberOfDRegisters; ++d_reg) {
-    if (((1 << d_reg) & fp_spill_mask) != 0) {
-      callee_save_regs_.push_back(
-          Arm64ManagedRegister::FromDRegister(static_cast<DRegister>(d_reg)));
-    }
-  }
 }
 
 uint32_t Arm64JniCallingConvention::CoreSpillMask() const {
-  // Compute spill mask to agree with callee saves initialized in the constructor.
-  // Note: The native jni function may call to some VM runtime functions which may suspend
-  // or trigger GC. And the jni method frame will become top quick frame in those cases.
-  // So we need to satisfy GC to save LR and callee-save registers which is similar to
-  // CalleeSaveMethod(RefOnly) frame.
-  // Jni function is the native function which the java code wants to call.
-  // Jni method is the method that compiled by jni compiler.
-  // Call chain: managed code(java) --> jni method --> jni function.
-  // Thread register(X19) is saved on stack.
-  return 1 << X19 | 1 << X20 | 1 << X21 | 1 << X22 | 1 << X23 | 1 << X24 |
-         1 << X25 | 1 << X26 | 1 << X27 | 1 << X28 | 1 << X29 | 1 << LR;
+  return kCoreCalleeSpillMask;
 }
 
 uint32_t Arm64JniCallingConvention::FpSpillMask() const {
-  // Considering the case, java_method_1 --> jni method --> jni function --> java_method_2, we may
-  // break on java_method_2 and we still need to find out the values of DEX registers in
-  // java_method_1. So all callee-saves(in managed code) need to be saved.
-  uint32_t result = 0;
-  for (size_t i = 0; i < arraysize(kDCalleeSaveRegisters); ++i) {
-    result |= (1 << kDCalleeSaveRegisters[i]);
-  }
-  return result;
+  return kFpCalleeSpillMask;
 }
 
 ManagedRegister Arm64JniCallingConvention::ReturnScratchRegister() const {
@@ -218,6 +240,10 @@
   return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize, kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> Arm64JniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 bool Arm64JniCallingConvention::IsCurrentParamInRegister() {
   if (IsCurrentParamAFloatOrDouble()) {
     return (itr_float_and_doubles_ < 8);
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h
index 37c92b2..337e881 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.h
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.h
@@ -57,9 +57,7 @@
   // JNI calling convention
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
   uint32_t FpSpillMask() const OVERRIDE;
@@ -77,9 +75,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   DISALLOW_COPY_AND_ASSIGN(Arm64JniCallingConvention);
 };
 
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index 2c4b15c..e8f738d 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -17,12 +17,11 @@
 #ifndef ART_COMPILER_JNI_QUICK_CALLING_CONVENTION_H_
 #define ART_COMPILER_JNI_QUICK_CALLING_CONVENTION_H_
 
-#include <vector>
-
 #include "base/arena_object.h"
 #include "handle_scope.h"
 #include "primitive.h"
 #include "thread.h"
+#include "utils/array_ref.h"
 #include "utils/managed_register.h"
 
 namespace art {
@@ -301,7 +300,7 @@
   virtual bool RequiresSmallResultTypeExtension() const = 0;
 
   // Callee save registers to spill prior to native code (which may clobber)
-  virtual const std::vector<ManagedRegister>& CalleeSaveRegisters() const = 0;
+  virtual ArrayRef<const ManagedRegister> CalleeSaveRegisters() const = 0;
 
   // Spill mask values
   virtual uint32_t CoreSpillMask() const = 0;
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 27714b8..4311a34 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -112,7 +112,7 @@
 
   // 1. Build the frame saving all callee saves
   const size_t frame_size(main_jni_conv->FrameSize());
-  const std::vector<ManagedRegister>& callee_save_regs = main_jni_conv->CalleeSaveRegisters();
+  ArrayRef<const ManagedRegister> callee_save_regs = main_jni_conv->CalleeSaveRegisters();
   __ BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs, mr_conv->EntrySpills());
   DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size));
 
diff --git a/compiler/jni/quick/mips/calling_convention_mips.cc b/compiler/jni/quick/mips/calling_convention_mips.cc
index 2d31a98..3d4d140 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.cc
+++ b/compiler/jni/quick/mips/calling_convention_mips.cc
@@ -27,6 +27,32 @@
 static const FRegister kFArgumentRegisters[] = { F12, F14 };
 static const DRegister kDArgumentRegisters[] = { D6, D7 };
 
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    MipsManagedRegister::FromCoreRegister(S2),
+    MipsManagedRegister::FromCoreRegister(S3),
+    MipsManagedRegister::FromCoreRegister(S4),
+    MipsManagedRegister::FromCoreRegister(S5),
+    MipsManagedRegister::FromCoreRegister(S6),
+    MipsManagedRegister::FromCoreRegister(S7),
+    MipsManagedRegister::FromCoreRegister(FP),
+    // No hard float callee saves.
+};
+
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  // RA is a special callee save which is not reported by CalleeSaveRegisters().
+  uint32_t result = 1 << RA;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsMips().IsCoreRegister()) {
+      result |= (1 << r.AsMips().AsCoreRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = 0u;
+
 // Calling convention
 ManagedRegister MipsManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
   return MipsManagedRegister::FromCoreRegister(T9);
@@ -161,21 +187,14 @@
     cur_reg++;  // bump the iterator for every argument
   }
   padding_ = padding;
-
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S2));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S3));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S4));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S5));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S6));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(S7));
-  callee_save_regs_.push_back(MipsManagedRegister::FromCoreRegister(FP));
 }
 
 uint32_t MipsJniCallingConvention::CoreSpillMask() const {
-  // Compute spill mask to agree with callee saves initialized in the constructor
-  uint32_t result = 0;
-  result = 1 << S2 | 1 << S3 | 1 << S4 | 1 << S5 | 1 << S6 | 1 << S7 | 1 << FP | 1 << RA;
-  return result;
+  return kCoreCalleeSpillMask;
+}
+
+uint32_t MipsJniCallingConvention::FpSpillMask() const {
+  return kFpCalleeSpillMask;
 }
 
 ManagedRegister MipsJniCallingConvention::ReturnScratchRegister() const {
@@ -196,6 +215,10 @@
   return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize + padding_, kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> MipsJniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 // JniCallingConvention ABI follows AAPCS where longs and doubles must occur
 // in even register numbers and stack slots
 void MipsJniCallingConvention::Next() {
diff --git a/compiler/jni/quick/mips/calling_convention_mips.h b/compiler/jni/quick/mips/calling_convention_mips.h
index dc45432..5c128b0 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.h
+++ b/compiler/jni/quick/mips/calling_convention_mips.h
@@ -58,14 +58,10 @@
   void Next() OVERRIDE;  // Override default behavior for AAPCS
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
-  uint32_t FpSpillMask() const OVERRIDE {
-    return 0;  // Floats aren't spilled in JNI down call
-  }
+  uint32_t FpSpillMask() const OVERRIDE;
   bool IsCurrentParamInRegister() OVERRIDE;
   bool IsCurrentParamOnStack() OVERRIDE;
   ManagedRegister CurrentParamRegister() OVERRIDE;
@@ -80,9 +76,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   // Padding to ensure longs and doubles are not split in AAPCS
   size_t padding_;
 
diff --git a/compiler/jni/quick/mips64/calling_convention_mips64.cc b/compiler/jni/quick/mips64/calling_convention_mips64.cc
index 807d740..f2e1da8 100644
--- a/compiler/jni/quick/mips64/calling_convention_mips64.cc
+++ b/compiler/jni/quick/mips64/calling_convention_mips64.cc
@@ -31,6 +31,33 @@
   F12, F13, F14, F15, F16, F17, F18, F19
 };
 
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    Mips64ManagedRegister::FromGpuRegister(S2),
+    Mips64ManagedRegister::FromGpuRegister(S3),
+    Mips64ManagedRegister::FromGpuRegister(S4),
+    Mips64ManagedRegister::FromGpuRegister(S5),
+    Mips64ManagedRegister::FromGpuRegister(S6),
+    Mips64ManagedRegister::FromGpuRegister(S7),
+    Mips64ManagedRegister::FromGpuRegister(GP),
+    Mips64ManagedRegister::FromGpuRegister(S8),
+    // No hard float callee saves.
+};
+
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  // RA is a special callee save which is not reported by CalleeSaveRegisters().
+  uint32_t result = 1 << RA;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsMips64().IsGpuRegister()) {
+      result |= (1 << r.AsMips64().AsGpuRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = 0u;
+
 // Calling convention
 ManagedRegister Mips64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
   return Mips64ManagedRegister::FromGpuRegister(T9);
@@ -126,22 +153,14 @@
 Mips64JniCallingConvention::Mips64JniCallingConvention(bool is_static, bool is_synchronized,
                                                        const char* shorty)
     : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S2));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S3));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S4));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S5));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S6));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S7));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(GP));
-  callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S8));
 }
 
 uint32_t Mips64JniCallingConvention::CoreSpillMask() const {
-  // Compute spill mask to agree with callee saves initialized in the constructor
-  uint32_t result = 0;
-  result = 1 << S2 | 1 << S3 | 1 << S4 | 1 << S5 | 1 << S6 | 1 << S7 | 1 << GP | 1 << S8 | 1 << RA;
-  DCHECK_EQ(static_cast<size_t>(POPCOUNT(result)), callee_save_regs_.size() + 1);
-  return result;
+  return kCoreCalleeSpillMask;
+}
+
+uint32_t Mips64JniCallingConvention::FpSpillMask() const {
+  return kFpCalleeSpillMask;
 }
 
 ManagedRegister Mips64JniCallingConvention::ReturnScratchRegister() const {
@@ -162,6 +181,10 @@
   return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize, kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> Mips64JniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 bool Mips64JniCallingConvention::IsCurrentParamInRegister() {
   return itr_args_ < 8;
 }
diff --git a/compiler/jni/quick/mips64/calling_convention_mips64.h b/compiler/jni/quick/mips64/calling_convention_mips64.h
index 3d6aab7..99ea3cd 100644
--- a/compiler/jni/quick/mips64/calling_convention_mips64.h
+++ b/compiler/jni/quick/mips64/calling_convention_mips64.h
@@ -57,14 +57,10 @@
   // JNI calling convention
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
-  uint32_t FpSpillMask() const OVERRIDE {
-    return 0;  // Floats aren't spilled in JNI down call
-  }
+  uint32_t FpSpillMask() const OVERRIDE;
   bool IsCurrentParamInRegister() OVERRIDE;
   bool IsCurrentParamOnStack() OVERRIDE;
   ManagedRegister CurrentParamRegister() OVERRIDE;
@@ -79,9 +75,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   DISALLOW_COPY_AND_ASSIGN(Mips64JniCallingConvention);
 };
 
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 322caca..22c7cd0 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -23,6 +23,28 @@
 namespace art {
 namespace x86 {
 
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    X86ManagedRegister::FromCpuRegister(EBP),
+    X86ManagedRegister::FromCpuRegister(ESI),
+    X86ManagedRegister::FromCpuRegister(EDI),
+    // No hard float callee saves.
+};
+
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  // The spilled PC gets a special marker.
+  uint32_t result = 1 << kNumberOfCpuRegisters;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsX86().IsCpuRegister()) {
+      result |= (1 << r.AsX86().AsCpuRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = 0u;
+
 // Calling convention
 
 ManagedRegister X86ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
@@ -169,13 +191,14 @@
 X86JniCallingConvention::X86JniCallingConvention(bool is_static, bool is_synchronized,
                                                  const char* shorty)
     : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
-  callee_save_regs_.push_back(X86ManagedRegister::FromCpuRegister(EBP));
-  callee_save_regs_.push_back(X86ManagedRegister::FromCpuRegister(ESI));
-  callee_save_regs_.push_back(X86ManagedRegister::FromCpuRegister(EDI));
 }
 
 uint32_t X86JniCallingConvention::CoreSpillMask() const {
-  return 1 << EBP | 1 << ESI | 1 << EDI | 1 << kNumberOfCpuRegisters;
+  return kCoreCalleeSpillMask;
+}
+
+uint32_t X86JniCallingConvention::FpSpillMask() const {
+  return kFpCalleeSpillMask;
 }
 
 size_t X86JniCallingConvention::FrameSize() {
@@ -192,6 +215,10 @@
   return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize, kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> X86JniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 bool X86JniCallingConvention::IsCurrentParamInRegister() {
   return false;  // Everything is passed by stack.
 }
diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h
index cdf0956..9d678b7 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.h
+++ b/compiler/jni/quick/x86/calling_convention_x86.h
@@ -59,14 +59,10 @@
   // JNI calling convention
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
-  uint32_t FpSpillMask() const OVERRIDE {
-    return 0;
-  }
+  uint32_t FpSpillMask() const OVERRIDE;
   bool IsCurrentParamInRegister() OVERRIDE;
   bool IsCurrentParamOnStack() OVERRIDE;
   ManagedRegister CurrentParamRegister() OVERRIDE;
@@ -81,9 +77,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   DISALLOW_COPY_AND_ASSIGN(X86JniCallingConvention);
 };
 
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index b6b11ca..cc4d232 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -24,6 +24,45 @@
 namespace art {
 namespace x86_64 {
 
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    X86_64ManagedRegister::FromCpuRegister(RBX),
+    X86_64ManagedRegister::FromCpuRegister(RBP),
+    X86_64ManagedRegister::FromCpuRegister(R12),
+    X86_64ManagedRegister::FromCpuRegister(R13),
+    X86_64ManagedRegister::FromCpuRegister(R14),
+    X86_64ManagedRegister::FromCpuRegister(R15),
+    // Hard float registers.
+    X86_64ManagedRegister::FromXmmRegister(XMM12),
+    X86_64ManagedRegister::FromXmmRegister(XMM13),
+    X86_64ManagedRegister::FromXmmRegister(XMM14),
+    X86_64ManagedRegister::FromXmmRegister(XMM15),
+};
+
+static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+  // The spilled PC gets a special marker.
+  uint32_t result = 1 << kNumberOfCpuRegisters;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsX86_64().IsCpuRegister()) {
+      result |= (1 << r.AsX86_64().AsCpuRegister().AsRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t CalculateFpCalleeSpillMask() {
+  uint32_t result = 0;
+  for (auto&& r : kCalleeSaveRegisters) {
+    if (r.AsX86_64().IsXmmRegister()) {
+      result |= (1 << r.AsX86_64().AsXmmRegister().AsFloatRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask();
+
 // Calling convention
 
 ManagedRegister X86_64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
@@ -125,25 +164,14 @@
 X86_64JniCallingConvention::X86_64JniCallingConvention(bool is_static, bool is_synchronized,
                                                        const char* shorty)
     : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(RBX));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(RBP));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R12));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R13));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R14));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R15));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM12));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM13));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM14));
-  callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM15));
 }
 
 uint32_t X86_64JniCallingConvention::CoreSpillMask() const {
-  return 1 << RBX | 1 << RBP | 1 << R12 | 1 << R13 | 1 << R14 | 1 << R15 |
-      1 << kNumberOfCpuRegisters;
+  return kCoreCalleeSpillMask;
 }
 
 uint32_t X86_64JniCallingConvention::FpSpillMask() const {
-  return 1 << XMM12 | 1 << XMM13 | 1 << XMM14 | 1 << XMM15;
+  return kFpCalleeSpillMask;
 }
 
 size_t X86_64JniCallingConvention::FrameSize() {
@@ -160,6 +188,10 @@
   return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize, kStackAlignment);
 }
 
+ArrayRef<const ManagedRegister> X86_64JniCallingConvention::CalleeSaveRegisters() const {
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+}
+
 bool X86_64JniCallingConvention::IsCurrentParamInRegister() {
   return !IsCurrentParamOnStack();
 }
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
index 6e47c9f..e2d3d48 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
@@ -55,9 +55,7 @@
   // JNI calling convention
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
-  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
-    return callee_save_regs_;
-  }
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
   uint32_t FpSpillMask() const OVERRIDE;
@@ -75,9 +73,6 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // TODO: these values aren't unique and can be shared amongst instances
-  std::vector<ManagedRegister> callee_save_regs_;
-
   DISALLOW_COPY_AND_ASSIGN(X86_64JniCallingConvention);
 };
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index e7fa4e4..08670a0 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -50,6 +50,7 @@
 #include "mirror/array-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object_reference.h"
+#include "mirror/string.h"
 #include "parallel_move_resolver.h"
 #include "ssa_liveness_analysis.h"
 #include "utils/assembler.h"
@@ -139,6 +140,12 @@
   return pointer_size * index;
 }
 
+uint32_t CodeGenerator::GetArrayLengthOffset(HArrayLength* array_length) {
+  return array_length->IsStringLength()
+      ? mirror::String::CountOffset().Uint32Value()
+      : mirror::Array::LengthOffset().Uint32Value();
+}
+
 bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const {
   DCHECK_EQ((*block_order_)[current_block_index_], current);
   return GetNextBlockToEmit() == FirstNonEmptyBlock(next);
@@ -1298,4 +1305,18 @@
   locations->AddTemp(Location::RequiresRegister());
 }
 
+uint32_t CodeGenerator::GetReferenceSlowFlagOffset() const {
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::Class* klass = mirror::Reference::GetJavaLangRefReference();
+  DCHECK(klass->IsInitialized());
+  return klass->GetSlowPathFlagOffset().Uint32Value();
+}
+
+uint32_t CodeGenerator::GetReferenceDisableFlagOffset() const {
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::Class* klass = mirror::Reference::GetJavaLangRefReference();
+  DCHECK(klass->IsInitialized());
+  return klass->GetDisableIntrinsicFlagOffset().Uint32Value();
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index d69c410..82a54d2 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -340,6 +340,11 @@
   // Pointer variant for ArtMethod and ArtField arrays.
   size_t GetCachePointerOffset(uint32_t index);
 
+  // Helper that returns the offset of the array's length field.
+  // Note: Besides the normal arrays, we also use the HArrayLength for
+  // accessing the String's `count` field in String intrinsics.
+  static uint32_t GetArrayLengthOffset(HArrayLength* array_length);
+
   void EmitParallelMoves(Location from1,
                          Location to1,
                          Primitive::Type type1,
@@ -464,6 +469,9 @@
 
   virtual void GenerateNop() = 0;
 
+  uint32_t GetReferenceSlowFlagOffset() const;
+  uint32_t GetReferenceDisableFlagOffset() const;
+
  protected:
   // Method patch info used for recording locations of required linker patches and
   // target methods. The target method can be used for various purposes, whether for
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 197e473..e010662 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -4742,7 +4742,7 @@
 
 void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   Register obj = locations->InAt(0).AsRegister<Register>();
   Register out = locations->Out().AsRegister<Register>();
   __ LoadFromOffset(kLoadWord, out, obj, offset);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 9680f2b..261c04f 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -2118,9 +2118,9 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   BlockPoolsScope block_pools(GetVIXLAssembler());
-  __ Ldr(OutputRegister(instruction),
-         HeapOperand(InputRegisterAt(instruction, 0), mirror::Array::LengthOffset()));
+  __ Ldr(OutputRegister(instruction), HeapOperand(InputRegisterAt(instruction, 0), offset));
   codegen_->MaybeRecordImplicitNullCheck(instruction);
 }
 
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 12d1164..fb50680 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1803,7 +1803,7 @@
 
 void InstructionCodeGeneratorMIPS::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   Register obj = locations->InAt(0).AsRegister<Register>();
   Register out = locations->Out().AsRegister<Register>();
   __ LoadFromOffset(kLoadWord, out, obj, offset);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 56ac38e..e67d8d0 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1426,7 +1426,7 @@
 
 void InstructionCodeGeneratorMIPS64::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
   GpuRegister out = locations->Out().AsRegister<GpuRegister>();
   __ LoadFromOffset(kLoadWord, out, obj, offset);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 6dc480b..bdbafcd 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -4308,16 +4308,18 @@
   // save one load. However, since this is just an intrinsic slow path we prefer this
   // simple and more robust approach rather that trying to determine if that's the case.
   SlowPathCode* slow_path = GetCurrentSlowPath();
-  DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
-  if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
-    int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
-    __ movl(temp, Address(ESP, stack_offset));
-    return temp;
+  if (slow_path != nullptr) {
+    if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+      int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
+      __ movl(temp, Address(ESP, stack_offset));
+      return temp;
+    }
   }
   return location.AsRegister<Register>();
 }
 
-void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+                                                                  Location temp) {
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   switch (invoke->GetMethodLoadKind()) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
@@ -4366,6 +4368,11 @@
       break;
     }
   }
+  return callee_method;
+}
+
+void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+  Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
 
   switch (invoke->GetCodePtrLocation()) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
@@ -5480,7 +5487,7 @@
 
 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   Register obj = locations->InAt(0).AsRegister<Register>();
   Register out = locations->Out().AsRegister<Register>();
   __ movl(out, Address(obj, offset));
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index fe7d3ed..98dc8ca 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -398,6 +398,7 @@
       MethodReference target_method) OVERRIDE;
 
   // Generate a call to a static or direct method.
+  Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   // Generate a call to a virtual method.
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 96ec09c..30eca2c 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -762,10 +762,9 @@
   }
 }
 
-void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
-                                                     Location temp) {
+Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+                                                                     Location temp) {
   // All registers are assumed to be correctly set up.
-
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   switch (invoke->GetMethodLoadKind()) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
@@ -815,6 +814,13 @@
       break;
     }
   }
+  return callee_method;
+}
+
+void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+                                                     Location temp) {
+  // All registers are assumed to be correctly set up.
+  Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
 
   switch (invoke->GetCodePtrLocation()) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
@@ -4956,7 +4962,7 @@
 
 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
   __ movl(out, Address(obj, offset));
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index d9908bb..7cf1245 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -394,6 +394,7 @@
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       MethodReference target_method) OVERRIDE;
 
+  Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 9efc13f..6aec463 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -98,7 +98,9 @@
                                               DisassemblerOptions* options);
 class HGraphVisualizerDisassembler {
  public:
-  HGraphVisualizerDisassembler(InstructionSet instruction_set, const uint8_t* base_address)
+  HGraphVisualizerDisassembler(InstructionSet instruction_set,
+                               const uint8_t* base_address,
+                               const uint8_t* end_address)
       : instruction_set_(instruction_set), disassembler_(nullptr) {
     libart_disassembler_handle_ =
         dlopen(kIsDebugBuild ? "libartd-disassembler.so" : "libart-disassembler.so", RTLD_NOW);
@@ -119,6 +121,7 @@
             instruction_set,
             new DisassemblerOptions(/* absolute_addresses */ false,
                                     base_address,
+                                    end_address,
                                     /* can_read_literals */ true)));
   }
 
@@ -174,7 +177,9 @@
         disassembler_(disasm_info_ != nullptr
                       ? new HGraphVisualizerDisassembler(
                             codegen_.GetInstructionSet(),
-                            codegen_.GetAssembler().CodeBufferBaseAddress())
+                            codegen_.GetAssembler().CodeBufferBaseAddress(),
+                            codegen_.GetAssembler().CodeBufferBaseAddress()
+                                + codegen_.GetAssembler().CodeSize())
                       : nullptr),
         indent_(0) {}
 
@@ -389,6 +394,11 @@
         << instance_of->MustDoNullCheck() << std::noboolalpha;
   }
 
+  void VisitArrayLength(HArrayLength* array_length) OVERRIDE {
+    StartAttributeStream("is_string_length") << std::boolalpha
+        << array_length->IsStringLength() << std::noboolalpha;
+  }
+
   void VisitArraySet(HArraySet* array_set) OVERRIDE {
     StartAttributeStream("value_can_be_null") << std::boolalpha
         << array_set->GetValueCanBeNull() << std::noboolalpha;
@@ -544,26 +554,19 @@
       }
     }
 
-    if (IsPass(LICM::kLoopInvariantCodeMotionPassName)
-        || IsPass(HDeadCodeElimination::kFinalDeadCodeEliminationPassName)
-        || IsPass(HDeadCodeElimination::kInitialDeadCodeEliminationPassName)
-        || IsPass(BoundsCheckElimination::kBoundsCheckEliminationPassName)
-        || IsPass(RegisterAllocator::kRegisterAllocatorPassName)
-        || IsPass(HGraphBuilder::kBuilderPassName)) {
-      HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
-      if (info == nullptr) {
-        StartAttributeStream("loop") << "none";
+    HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation();
+    if (loop_info == nullptr) {
+      StartAttributeStream("loop") << "none";
+    } else {
+      StartAttributeStream("loop") << "B" << loop_info->GetHeader()->GetBlockId();
+      HLoopInformation* outer = loop_info->GetPreHeader()->GetLoopInformation();
+      if (outer != nullptr) {
+        StartAttributeStream("outer_loop") << "B" << outer->GetHeader()->GetBlockId();
       } else {
-        StartAttributeStream("loop") << "B" << info->GetHeader()->GetBlockId();
-        HLoopInformation* outer = info->GetPreHeader()->GetLoopInformation();
-        if (outer != nullptr) {
-          StartAttributeStream("outer_loop") << "B" << outer->GetHeader()->GetBlockId();
-        } else {
-          StartAttributeStream("outer_loop") << "none";
-        }
-        StartAttributeStream("irreducible")
-            << std::boolalpha << info->IsIrreducible() << std::noboolalpha;
+        StartAttributeStream("outer_loop") << "none";
       }
+      StartAttributeStream("irreducible")
+          << std::boolalpha << loop_info->IsIrreducible() << std::noboolalpha;
     }
 
     if ((IsPass(HGraphBuilder::kBuilderPassName)
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index d0d52bf..1e86b75 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -454,11 +454,16 @@
 
     if (!set->IsEmpty()) {
       if (block->IsLoopHeader()) {
-        if (block->GetLoopInformation()->IsIrreducible()) {
+        if (block->GetLoopInformation()->ContainsIrreducibleLoop()) {
           // To satisfy our linear scan algorithm, no instruction should flow in an irreducible
-          // loop header.
+          // loop header. We clear the set at entry of irreducible loops and any loop containing
+          // an irreducible loop, as in both cases, GVN can extend the liveness of an instruction
+          // across the irreducible loop.
+          // Note that, if we're not compiling OSR, we could still do GVN and introduce
+          // phis at irreducible loop headers. We decided it was not worth the complexity.
           set->Clear();
         } else {
+          DCHECK(!block->GetLoopInformation()->IsIrreducible());
           DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader());
           set->Kill(side_effects_.GetLoopEffects(block));
         }
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index d7b3856..fd79901 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -101,6 +101,7 @@
   void SimplifyCompare(HInvoke* invoke, bool is_signum, Primitive::Type type);
   void SimplifyIsNaN(HInvoke* invoke);
   void SimplifyFP2Int(HInvoke* invoke);
+  void SimplifyStringIsEmptyOrLength(HInvoke* invoke);
   void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind);
 
   OptimizingCompilerStats* stats_;
@@ -1673,6 +1674,27 @@
   invoke->ReplaceWithExceptInReplacementAtIndex(select, 0);  // false at index 0
 }
 
+void InstructionSimplifierVisitor::SimplifyStringIsEmptyOrLength(HInvoke* invoke) {
+  HInstruction* str = invoke->InputAt(0);
+  uint32_t dex_pc = invoke->GetDexPc();
+  // We treat String as an array to allow DCE and BCE to seamlessly work on strings,
+  // so create the HArrayLength.
+  HArrayLength* length = new (GetGraph()->GetArena()) HArrayLength(str, dex_pc);
+  length->MarkAsStringLength();
+  HInstruction* replacement;
+  if (invoke->GetIntrinsic() == Intrinsics::kStringIsEmpty) {
+    // For String.isEmpty(), create the `HEqual` representing the `length == 0`.
+    invoke->GetBlock()->InsertInstructionBefore(length, invoke);
+    HIntConstant* zero = GetGraph()->GetIntConstant(0);
+    HEqual* equal = new (GetGraph()->GetArena()) HEqual(length, zero, dex_pc);
+    replacement = equal;
+  } else {
+    DCHECK_EQ(invoke->GetIntrinsic(), Intrinsics::kStringLength);
+    replacement = length;
+  }
+  invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, replacement);
+}
+
 void InstructionSimplifierVisitor::SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind) {
   uint32_t dex_pc = invoke->GetDexPc();
   HMemoryBarrier* mem_barrier = new (GetGraph()->GetArena()) HMemoryBarrier(barrier_kind, dex_pc);
@@ -1719,6 +1741,10 @@
     case Intrinsics::kDoubleDoubleToLongBits:
       SimplifyFP2Int(instruction);
       break;
+    case Intrinsics::kStringIsEmpty:
+    case Intrinsics::kStringLength:
+      SimplifyStringIsEmptyOrLength(instruction);
+      break;
     case Intrinsics::kUnsafeLoadFence:
       SimplifyMemBarrier(instruction, MemBarrierKind::kLoadAny);
       break;
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 5d4c4e2..418d59c 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -388,10 +388,8 @@
     case kIntrinsicGetCharsNoCheck:
       return Intrinsics::kStringGetCharsNoCheck;
     case kIntrinsicIsEmptyOrLength:
-      // The inliner can handle these two cases - and this is the preferred approach
-      // since after inlining the call is no longer visible (as opposed to waiting
-      // until codegen to handle intrinsic).
-      return Intrinsics::kNone;
+      return ((method.d.data & kIntrinsicFlagIsEmpty) == 0) ?
+          Intrinsics::kStringLength : Intrinsics::kStringIsEmpty;
     case kIntrinsicIndexOf:
       return ((method.d.data & kIntrinsicFlagBase0) == 0) ?
           Intrinsics::kStringIndexOfAfter : Intrinsics::kStringIndexOf;
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 39a1313..214250f 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -239,6 +239,8 @@
 UNREACHABLE_INTRINSIC(Arch, LongCompare)            \
 UNREACHABLE_INTRINSIC(Arch, IntegerSignum)          \
 UNREACHABLE_INTRINSIC(Arch, LongSignum)             \
+UNREACHABLE_INTRINSIC(Arch, StringIsEmpty)          \
+UNREACHABLE_INTRINSIC(Arch, StringLength)           \
 UNREACHABLE_INTRINSIC(Arch, UnsafeLoadFence)        \
 UNREACHABLE_INTRINSIC(Arch, UnsafeStoreFence)       \
 UNREACHABLE_INTRINSIC(Arch, UnsafeFullFence)
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 4e3ace4..97b8839 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -987,31 +987,126 @@
 void IntrinsicLocationsBuilderARM::VisitStringCompareTo(HInvoke* invoke) {
   // The inputs plus one temp.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCall,
+                                                            invoke->InputAt(1)->CanBeNull()
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall,
                                                             kIntrinsified);
-  InvokeRuntimeCallingConvention calling_convention;
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-  locations->SetOut(Location::RegisterLocation(R0));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
 }
 
 void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) {
   ArmAssembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
+  Register str = locations->InAt(0).AsRegister<Register>();
+  Register arg = locations->InAt(1).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  Register temp0 = locations->GetTemp(0).AsRegister<Register>();
+  Register temp1 = locations->GetTemp(1).AsRegister<Register>();
+  Register temp2 = locations->GetTemp(2).AsRegister<Register>();
+
+  Label loop;
+  Label find_char_diff;
+  Label end;
+
+  // Get offsets of count and value fields within a string object.
+  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  Register argument = locations->InAt(1).AsRegister<Register>();
-  __ cmp(argument, ShifterOperand(0));
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
-  codegen_->AddSlowPath(slow_path);
-  __ b(slow_path->GetEntryLabel(), EQ);
+  // Take slow path and throw if input can be and is null.
+  SlowPathCode* slow_path = nullptr;
+  const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
+  if (can_slow_path) {
+    slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+    codegen_->AddSlowPath(slow_path);
+    __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
+  }
 
-  __ LoadFromOffset(
-      kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pStringCompareTo).Int32Value());
-  __ blx(LR);
-  __ Bind(slow_path->GetExitLabel());
+  // Reference equality check, return 0 if same reference.
+  __ subs(out, str, ShifterOperand(arg));
+  __ b(&end, EQ);
+  // Load lengths of this and argument strings.
+  __ ldr(temp2, Address(str, count_offset));
+  __ ldr(temp1, Address(arg, count_offset));
+  // out = length diff.
+  __ subs(out, temp2, ShifterOperand(temp1));
+  // temp0 = min(len(str), len(arg)).
+  __ it(Condition::LT, kItElse);
+  __ mov(temp0, ShifterOperand(temp2), Condition::LT);
+  __ mov(temp0, ShifterOperand(temp1), Condition::GE);
+  // Shorter string is empty?
+  __ CompareAndBranchIfZero(temp0, &end);
+
+  // Store offset of string value in preparation for comparison loop.
+  __ mov(temp1, ShifterOperand(value_offset));
+
+  // Assertions that must hold in order to compare multiple characters at a time.
+  CHECK_ALIGNED(value_offset, 8);
+  static_assert(IsAligned<8>(kObjectAlignment),
+                "String data must be 8-byte aligned for unrolled CompareTo loop.");
+
+  const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+  DCHECK_EQ(char_size, 2u);
+
+  // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
+  __ Bind(&loop);
+  __ ldr(IP, Address(str, temp1));
+  __ ldr(temp2, Address(arg, temp1));
+  __ cmp(IP, ShifterOperand(temp2));
+  __ b(&find_char_diff, NE);
+  __ add(temp1, temp1, ShifterOperand(char_size * 2));
+  __ sub(temp0, temp0, ShifterOperand(2));
+
+  __ ldr(IP, Address(str, temp1));
+  __ ldr(temp2, Address(arg, temp1));
+  __ cmp(IP, ShifterOperand(temp2));
+  __ b(&find_char_diff, NE);
+  __ add(temp1, temp1, ShifterOperand(char_size * 2));
+  __ subs(temp0, temp0, ShifterOperand(2));
+
+  __ b(&loop, GT);
+  __ b(&end);
+
+  // Find the single 16-bit character difference.
+  __ Bind(&find_char_diff);
+  // Get the bit position of the first character that differs.
+  __ eor(temp1, temp2, ShifterOperand(IP));
+  __ rbit(temp1, temp1);
+  __ clz(temp1, temp1);
+
+  // temp0 = number of 16-bit characters remaining to compare.
+  // (it could be < 1 if a difference is found after the first SUB in the comparison loop, and
+  // after the end of the shorter string data).
+
+  // (temp1 >> 4) = character where difference occurs between the last two words compared, on the
+  // interval [0,1] (0 for low half-word different, 1 for high half-word different).
+
+  // If temp0 <= (temp1 >> 4), the difference occurs outside the remaining string data, so just
+  // return length diff (out).
+  __ cmp(temp0, ShifterOperand(temp1, LSR, 4));
+  __ b(&end, LE);
+  // Extract the characters and calculate the difference.
+  __ bic(temp1, temp1, ShifterOperand(0xf));
+  __ Lsr(temp2, temp2, temp1);
+  __ Lsr(IP, IP, temp1);
+  __ movt(temp2, 0);
+  __ movt(IP, 0);
+  __ sub(out, IP, ShifterOperand(temp2));
+
+  __ Bind(&end);
+
+  if (can_slow_path) {
+    __ Bind(slow_path->GetExitLabel());
+  }
 }
 
 void IntrinsicLocationsBuilderARM::VisitStringEquals(HInvoke* invoke) {
@@ -1082,7 +1177,7 @@
 
   // Assertions that must hold in order to compare strings 2 characters at a time.
   DCHECK_ALIGNED(value_offset, 4);
-  static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
+  static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
 
   __ LoadImmediate(temp1, value_offset);
 
@@ -1124,7 +1219,7 @@
   SlowPathCode* slow_path = nullptr;
   HInstruction* code_point = invoke->InputAt(1);
   if (code_point->IsIntConstant()) {
-    if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
+    if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
         std::numeric_limits<uint16_t>::max()) {
       // Always needs the slow-path. We could directly dispatch to it, but this case should be
       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index cc5fd65..07d9d87 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1270,12 +1270,12 @@
   __ Eor(temp1, temp0, temp4);
   __ Rbit(temp1, temp1);
   __ Clz(temp1, temp1);
-  __ Bic(temp1, temp1, 0xf);
   // If the number of 16-bit chars remaining <= the index where the difference occurs (0-3), then
   // the difference occurs outside the remaining string data, so just return length diff (out).
   __ Cmp(temp2, Operand(temp1, LSR, 4));
   __ B(le, &end);
   // Extract the characters and calculate the difference.
+  __ Bic(temp1, temp1, 0xf);
   __ Lsr(temp0, temp0, temp1);
   __ Lsr(temp4, temp4, temp1);
   __ And(temp4, temp4, 0xffff);
@@ -1399,7 +1399,7 @@
   SlowPathCodeARM64* slow_path = nullptr;
   HInstruction* code_point = invoke->InputAt(1);
   if (code_point->IsIntConstant()) {
-    if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > 0xFFFFU) {
+    if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) {
       // Always needs the slow-path. We could directly dispatch to it, but this case should be
       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
       slow_path = new (allocator) IntrinsicSlowPathARM64(invoke);
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
index dd9294d..db60238 100644
--- a/compiler/optimizing/intrinsics_list.h
+++ b/compiler/optimizing/intrinsics_list.h
@@ -107,6 +107,8 @@
   V(StringGetCharsNoCheck, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
   V(StringIndexOf, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
   V(StringIndexOfAfter, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \
+  V(StringIsEmpty, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow) \
+  V(StringLength, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow) \
   V(StringNewStringFromBytes, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
   V(StringNewStringFromChars, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
   V(StringNewStringFromString, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 20b61f8..140f56a 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -2072,7 +2072,7 @@
   SlowPathCodeMIPS* slow_path = nullptr;
   HInstruction* code_point = invoke->InputAt(1);
   if (code_point->IsIntConstant()) {
-    if (!IsUint<16>(invoke->InputAt(1)->AsIntConstant()->GetValue())) {
+    if (!IsUint<16>(code_point->AsIntConstant()->GetValue())) {
       // Always needs the slow-path. We could directly dispatch to it,
       // but this case should be rare, so for simplicity just put the
       // full slow-path down and branch unconditionally.
@@ -2433,13 +2433,128 @@
   GenLowestOneBit(invoke->GetLocations(), Primitive::kPrimLong, IsR6(), GetAssembler());
 }
 
+// int java.lang.Math.round(float)
+void IntrinsicLocationsBuilderMIPS::VisitMathRoundFloat(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->AddTemp(Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  MipsAssembler* assembler = GetAssembler();
+  FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
+  FRegister half = locations->GetTemp(0).AsFpuRegister<FRegister>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  MipsLabel done;
+  MipsLabel finite;
+  MipsLabel add;
+
+  // if (in.isNaN) {
+  //   return 0;
+  // }
+  //
+  // out = floor.w.s(in);
+  //
+  // /*
+  //  * This "if" statement is only needed for the pre-R6 version of floor.w.s
+  //  * which outputs Integer.MAX_VALUE for negative numbers with magnitudes
+  //  * too large to fit in a 32-bit integer.
+  //  *
+  //  * Starting with MIPSR6, which always sets FCSR.NAN2008=1, negative
+  //  * numbers which are too large to be represented in a 32-bit signed
+  //  * integer will be processed by floor.w.s to output Integer.MIN_VALUE,
+  //  * and will no longer be processed by this "if" statement.
+  //  */
+  // if (out == Integer.MAX_VALUE) {
+  //   TMP = (in < 0.0f) ? 1 : 0;
+  //   /*
+  //    * If TMP is 1, then adding it to out will wrap its value from
+  //    * Integer.MAX_VALUE to Integer.MIN_VALUE.
+  //    */
+  //   return out += TMP;
+  // }
+  //
+  // /*
+  //  * For negative values not handled by the previous "if" statement the
+  //  * test here will correctly set the value of TMP.
+  //  */
+  // TMP = ((in - out) >= 0.5f) ? 1 : 0;
+  // return out += TMP;
+
+  // Test for NaN.
+  if (IsR6()) {
+    __ CmpUnS(FTMP, in, in);
+  } else {
+    __ CunS(in, in);
+  }
+
+  // Return zero for NaN.
+  __ Move(out, ZERO);
+  if (IsR6()) {
+    __ Bc1nez(FTMP, &done);
+  } else {
+    __ Bc1t(&done);
+  }
+
+  // out = floor(in);
+  __ FloorWS(FTMP, in);
+  __ Mfc1(out, FTMP);
+
+  __ LoadConst32(TMP, 1);
+
+  // TMP = (out = java.lang.Integer.MAX_VALUE) ? 1 : 0;
+  __ LoadConst32(AT, std::numeric_limits<int32_t>::max());
+  __ Bne(AT, out, &finite);
+
+  __ Mtc1(ZERO, FTMP);
+  if (IsR6()) {
+    __ CmpLtS(FTMP, in, FTMP);
+    __ Mfc1(AT, FTMP);
+  } else {
+    __ ColtS(in, FTMP);
+  }
+
+  __ B(&add);
+
+  __ Bind(&finite);
+
+  // TMP = (0.5f <= (in - out)) ? 1 : 0;
+  __ Cvtsw(FTMP, FTMP);  // Convert output of floor.w.s back to "float".
+  __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f));
+  __ SubS(FTMP, in, FTMP);
+  __ Mtc1(AT, half);
+  if (IsR6()) {
+    __ CmpLeS(FTMP, half, FTMP);
+    __ Mfc1(AT, FTMP);
+  } else {
+    __ ColeS(half, FTMP);
+  }
+
+  __ Bind(&add);
+
+  if (IsR6()) {
+    __ Selnez(TMP, TMP, AT);
+  } else {
+    __ Movf(TMP, ZERO);
+  }
+
+  // Return out += TMP.
+  __ Addu(out, out, TMP);
+
+  __ Bind(&done);
+}
+
 // Unimplemented intrinsics.
 
 UNIMPLEMENTED_INTRINSIC(MIPS, MathCeil)
 UNIMPLEMENTED_INTRINSIC(MIPS, MathFloor)
 UNIMPLEMENTED_INTRINSIC(MIPS, MathRint)
 UNIMPLEMENTED_INTRINSIC(MIPS, MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(MIPS, MathRoundFloat)
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeCASLong)
 
 UNIMPLEMENTED_INTRINSIC(MIPS, ReferenceGetReferent)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 7188e1c..6c4e64e 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1568,7 +1568,7 @@
   SlowPathCodeMIPS64* slow_path = nullptr;
   HInstruction* code_point = invoke->InputAt(1);
   if (code_point->IsIntConstant()) {
-    if (!IsUint<16>(invoke->InputAt(1)->AsIntConstant()->GetValue())) {
+    if (!IsUint<16>(code_point->AsIntConstant()->GetValue())) {
       // Always needs the slow-path. We could directly dispatch to it,
       // but this case should be rare, so for simplicity just put the
       // full slow-path down and branch unconditionally.
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index d0edeca..d66940f 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -1422,7 +1422,7 @@
   SlowPathCode* slow_path = nullptr;
   HInstruction* code_point = invoke->InputAt(1);
   if (code_point->IsIntConstant()) {
-    if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
+    if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
     std::numeric_limits<uint16_t>::max()) {
       // Always needs the slow-path. We could directly dispatch to it, but this case should be
       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
@@ -2631,8 +2631,66 @@
   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
 }
 
+void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) {
+  if (kEmitCompilerReadBarrier) {
+    // Do not intrinsify this call with the read barrier configuration.
+    return;
+  }
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnSlowPath,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
+  DCHECK(!kEmitCompilerReadBarrier);
+  LocationSummary* locations = invoke->GetLocations();
+  X86Assembler* assembler = GetAssembler();
+
+  Register obj = locations->InAt(0).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  // Load ArtMethod first.
+  HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
+  DCHECK(invoke_direct != nullptr);
+  Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall(
+      invoke_direct, locations->GetTemp(0));
+  DCHECK(temp_loc.Equals(locations->GetTemp(0)));
+  Register temp = temp_loc.AsRegister<Register>();
+
+  // Now get declaring class.
+  __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
+
+  uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
+  uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
+  DCHECK_NE(slow_path_flag_offset, 0u);
+  DCHECK_NE(disable_flag_offset, 0u);
+  DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
+
+  // Check static flags preventing us for using intrinsic.
+  if (slow_path_flag_offset == disable_flag_offset + 1) {
+    __ cmpw(Address(temp, disable_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+  } else {
+    __ cmpb(Address(temp, disable_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+    __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+  }
+
+  // Fast path.
+  __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ MaybeUnpoisonHeapReference(out);
+  __ Bind(slow_path->GetExitLabel());
+}
+
 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(X86, SystemArrayCopy)
 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 4ee2368..2a86769 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -1521,7 +1521,7 @@
   SlowPathCode* slow_path = nullptr;
   HInstruction* code_point = invoke->InputAt(1);
   if (code_point->IsIntConstant()) {
-    if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
+    if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
     std::numeric_limits<uint16_t>::max()) {
       // Always needs the slow-path. We could directly dispatch to it, but this case should be
       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
@@ -2719,7 +2719,65 @@
   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
 }
 
-UNIMPLEMENTED_INTRINSIC(X86_64, ReferenceGetReferent)
+void IntrinsicLocationsBuilderX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
+  if (kEmitCompilerReadBarrier) {
+    // Do not intrinsify this call with the read barrier configuration.
+    return;
+  }
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnSlowPath,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
+  DCHECK(!kEmitCompilerReadBarrier);
+  LocationSummary* locations = invoke->GetLocations();
+  X86_64Assembler* assembler = GetAssembler();
+
+  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  // Load ArtMethod first.
+  HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
+  DCHECK(invoke_direct != nullptr);
+  Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall(
+      invoke_direct, locations->GetTemp(0));
+  DCHECK(temp_loc.Equals(locations->GetTemp(0)));
+  CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
+
+  // Now get declaring class.
+  __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
+
+  uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
+  uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
+  DCHECK_NE(slow_path_flag_offset, 0u);
+  DCHECK_NE(disable_flag_offset, 0u);
+  DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
+
+  // Check static flags preventing us for using intrinsic.
+  if (slow_path_flag_offset == disable_flag_offset + 1) {
+    __ cmpw(Address(temp, disable_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+  } else {
+    __ cmpb(Address(temp, disable_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+    __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+  }
+
+  // Fast path.
+  __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ MaybeUnpoisonHeapReference(out);
+  __ Bind(slow_path->GetExitLabel());
+}
+
 UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
 
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index 5a0b89c..7543cd6 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -101,16 +101,6 @@
     SideEffects loop_effects = side_effects_.GetLoopEffects(block);
     HBasicBlock* pre_header = loop_info->GetPreHeader();
 
-    bool contains_irreducible_loop = false;
-    if (graph_->HasIrreducibleLoops()) {
-      for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) {
-        if (it_loop.Current()->GetLoopInformation()->IsIrreducible()) {
-          contains_irreducible_loop = true;
-          break;
-        }
-      }
-    }
-
     for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) {
       HBasicBlock* inner = it_loop.Current();
       DCHECK(inner->IsInLoop());
@@ -123,11 +113,12 @@
         visited->SetBit(inner->GetBlockId());
       }
 
-      if (contains_irreducible_loop) {
+      if (loop_info->ContainsIrreducibleLoop()) {
         // We cannot licm in an irreducible loop, or in a natural loop containing an
         // irreducible loop.
         continue;
       }
+      DCHECK(!loop_info->IsIrreducible());
 
       // We can move an instruction that can throw only if it is the first
       // throwing instruction in the loop. Note that the first potentially
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 1e6bf07..60329cc 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -446,8 +446,10 @@
 }
 
 GraphAnalysisResult HGraph::AnalyzeLoops() const {
-  // Order does not matter.
-  for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
+  // We iterate post order to ensure we visit inner loops before outer loops.
+  // `PopulateRecursive` needs this guarantee to know whether a natural loop
+  // contains an irreducible loop.
+  for (HPostOrderIterator it(*this); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     if (block->IsLoopHeader()) {
       if (block->IsCatchBlock()) {
@@ -580,6 +582,14 @@
 
   blocks_.SetBit(block->GetBlockId());
   block->SetInLoop(this);
+  if (block->IsLoopHeader()) {
+    // We're visiting loops in post-order, so inner loops must have been
+    // populated already.
+    DCHECK(block->GetLoopInformation()->IsPopulated());
+    if (block->GetLoopInformation()->IsIrreducible()) {
+      contains_irreducible_loop_ = true;
+    }
+  }
   for (HBasicBlock* predecessor : block->GetPredecessors()) {
     PopulateRecursive(predecessor);
   }
@@ -683,6 +693,7 @@
   }
   if (is_irreducible_loop) {
     irreducible_ = true;
+    contains_irreducible_loop_ = true;
     graph->SetHasIrreducibleLoops(true);
   }
 }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 829fe71..c08323a 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -650,6 +650,7 @@
       : header_(header),
         suspend_check_(nullptr),
         irreducible_(false),
+        contains_irreducible_loop_(false),
         back_edges_(graph->GetArena()->Adapter(kArenaAllocLoopInfoBackEdges)),
         // Make bit vector growable, as the number of blocks may change.
         blocks_(graph->GetArena(), graph->GetBlocks().size(), true, kArenaAllocLoopInfoBackEdges) {
@@ -657,6 +658,7 @@
   }
 
   bool IsIrreducible() const { return irreducible_; }
+  bool ContainsIrreducibleLoop() const { return contains_irreducible_loop_; }
 
   void Dump(std::ostream& os);
 
@@ -727,6 +729,10 @@
 
   bool HasBackEdgeNotDominatedByHeader() const;
 
+  bool IsPopulated() const {
+    return blocks_.GetHighestBitSet() != -1;
+  }
+
  private:
   // Internal recursive implementation of `Populate`.
   void PopulateRecursive(HBasicBlock* block);
@@ -735,6 +741,7 @@
   HBasicBlock* header_;
   HSuspendCheck* suspend_check_;
   bool irreducible_;
+  bool contains_irreducible_loop_;
   ArenaVector<HBasicBlock*> back_edges_;
   ArenaBitVector blocks_;
 
@@ -2283,7 +2290,7 @@
 
 // Represents dex's RETURN_VOID opcode. A HReturnVoid is a control flow
 // instruction that branches to the exit block.
-class HReturnVoid : public HTemplateInstruction<0> {
+class HReturnVoid FINAL : public HTemplateInstruction<0> {
  public:
   explicit HReturnVoid(uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::None(), dex_pc) {}
@@ -2298,7 +2305,7 @@
 
 // Represents dex's RETURN opcodes. A HReturn is a control flow
 // instruction that branches to the exit block.
-class HReturn : public HTemplateInstruction<1> {
+class HReturn FINAL : public HTemplateInstruction<1> {
  public:
   explicit HReturn(HInstruction* value, uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::None(), dex_pc) {
@@ -2313,7 +2320,7 @@
   DISALLOW_COPY_AND_ASSIGN(HReturn);
 };
 
-class HPhi : public HInstruction {
+class HPhi FINAL : public HInstruction {
  public:
   HPhi(ArenaAllocator* arena,
        uint32_t reg_number,
@@ -2417,7 +2424,7 @@
 // The exit instruction is the only instruction of the exit block.
 // Instructions aborting the method (HThrow and HReturn) must branch to the
 // exit block.
-class HExit : public HTemplateInstruction<0> {
+class HExit FINAL : public HTemplateInstruction<0> {
  public:
   explicit HExit(uint32_t dex_pc = kNoDexPc) : HTemplateInstruction(SideEffects::None(), dex_pc) {}
 
@@ -2430,7 +2437,7 @@
 };
 
 // Jumps from one block to another.
-class HGoto : public HTemplateInstruction<0> {
+class HGoto FINAL : public HTemplateInstruction<0> {
  public:
   explicit HGoto(uint32_t dex_pc = kNoDexPc) : HTemplateInstruction(SideEffects::None(), dex_pc) {}
 
@@ -2470,7 +2477,7 @@
   DISALLOW_COPY_AND_ASSIGN(HConstant);
 };
 
-class HNullConstant : public HConstant {
+class HNullConstant FINAL : public HConstant {
  public:
   bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
@@ -2494,7 +2501,7 @@
 
 // Constants of the type int. Those can be from Dex instructions, or
 // synthesized (for example with the if-eqz instruction).
-class HIntConstant : public HConstant {
+class HIntConstant FINAL : public HConstant {
  public:
   int32_t GetValue() const { return value_; }
 
@@ -2535,7 +2542,7 @@
   DISALLOW_COPY_AND_ASSIGN(HIntConstant);
 };
 
-class HLongConstant : public HConstant {
+class HLongConstant FINAL : public HConstant {
  public:
   int64_t GetValue() const { return value_; }
 
@@ -2565,7 +2572,7 @@
   DISALLOW_COPY_AND_ASSIGN(HLongConstant);
 };
 
-class HFloatConstant : public HConstant {
+class HFloatConstant FINAL : public HConstant {
  public:
   float GetValue() const { return value_; }
 
@@ -2618,7 +2625,7 @@
   DISALLOW_COPY_AND_ASSIGN(HFloatConstant);
 };
 
-class HDoubleConstant : public HConstant {
+class HDoubleConstant FINAL : public HConstant {
  public:
   double GetValue() const { return value_; }
 
@@ -2671,7 +2678,7 @@
 
 // Conditional branch. A block ending with an HIf instruction must have
 // two successors.
-class HIf : public HTemplateInstruction<1> {
+class HIf FINAL : public HTemplateInstruction<1> {
  public:
   explicit HIf(HInstruction* input, uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::None(), dex_pc) {
@@ -2700,7 +2707,7 @@
 // non-exceptional control flow.
 // Normal-flow successor is stored at index zero, exception handlers under
 // higher indices in no particular order.
-class HTryBoundary : public HTemplateInstruction<0> {
+class HTryBoundary FINAL : public HTemplateInstruction<0> {
  public:
   enum class BoundaryKind {
     kEntry,
@@ -2758,7 +2765,7 @@
 };
 
 // Deoptimize to interpreter, upon checking a condition.
-class HDeoptimize : public HTemplateInstruction<1> {
+class HDeoptimize FINAL : public HTemplateInstruction<1> {
  public:
   // We set CanTriggerGC to prevent any intermediate address to be live
   // at the point of the `HDeoptimize`.
@@ -2783,7 +2790,7 @@
 // Represents the ArtMethod that was passed as a first argument to
 // the method. It is used by instructions that depend on it, like
 // instructions that work with the dex cache.
-class HCurrentMethod : public HExpression<0> {
+class HCurrentMethod FINAL : public HExpression<0> {
  public:
   explicit HCurrentMethod(Primitive::Type type, uint32_t dex_pc = kNoDexPc)
       : HExpression(type, SideEffects::None(), dex_pc) {}
@@ -2796,7 +2803,7 @@
 
 // Fetches an ArtMethod from the virtual table or the interface method table
 // of a class.
-class HClassTableGet : public HExpression<1> {
+class HClassTableGet FINAL : public HExpression<1> {
  public:
   enum class TableKind {
     kVTable,
@@ -2843,7 +2850,7 @@
 // PackedSwitch (jump table). A block ending with a PackedSwitch instruction will
 // have one successor for each entry in the switch table, and the final successor
 // will be the block containing the next Dex opcode.
-class HPackedSwitch : public HTemplateInstruction<1> {
+class HPackedSwitch FINAL : public HTemplateInstruction<1> {
  public:
   HPackedSwitch(int32_t start_value,
                 uint32_t num_entries,
@@ -3088,7 +3095,7 @@
 };
 
 // Instruction to check if two inputs are equal to each other.
-class HEqual : public HCondition {
+class HEqual FINAL : public HCondition {
  public:
   HEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3132,7 +3139,7 @@
   DISALLOW_COPY_AND_ASSIGN(HEqual);
 };
 
-class HNotEqual : public HCondition {
+class HNotEqual FINAL : public HCondition {
  public:
   HNotEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3175,7 +3182,7 @@
   DISALLOW_COPY_AND_ASSIGN(HNotEqual);
 };
 
-class HLessThan : public HCondition {
+class HLessThan FINAL : public HCondition {
  public:
   HLessThan(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3212,7 +3219,7 @@
   DISALLOW_COPY_AND_ASSIGN(HLessThan);
 };
 
-class HLessThanOrEqual : public HCondition {
+class HLessThanOrEqual FINAL : public HCondition {
  public:
   HLessThanOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3249,7 +3256,7 @@
   DISALLOW_COPY_AND_ASSIGN(HLessThanOrEqual);
 };
 
-class HGreaterThan : public HCondition {
+class HGreaterThan FINAL : public HCondition {
  public:
   HGreaterThan(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3286,7 +3293,7 @@
   DISALLOW_COPY_AND_ASSIGN(HGreaterThan);
 };
 
-class HGreaterThanOrEqual : public HCondition {
+class HGreaterThanOrEqual FINAL : public HCondition {
  public:
   HGreaterThanOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3323,7 +3330,7 @@
   DISALLOW_COPY_AND_ASSIGN(HGreaterThanOrEqual);
 };
 
-class HBelow : public HCondition {
+class HBelow FINAL : public HCondition {
  public:
   HBelow(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3363,7 +3370,7 @@
   DISALLOW_COPY_AND_ASSIGN(HBelow);
 };
 
-class HBelowOrEqual : public HCondition {
+class HBelowOrEqual FINAL : public HCondition {
  public:
   HBelowOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3403,7 +3410,7 @@
   DISALLOW_COPY_AND_ASSIGN(HBelowOrEqual);
 };
 
-class HAbove : public HCondition {
+class HAbove FINAL : public HCondition {
  public:
   HAbove(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3443,7 +3450,7 @@
   DISALLOW_COPY_AND_ASSIGN(HAbove);
 };
 
-class HAboveOrEqual : public HCondition {
+class HAboveOrEqual FINAL : public HCondition {
  public:
   HAboveOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
       : HCondition(first, second, dex_pc) {}
@@ -3485,7 +3492,7 @@
 
 // Instruction to check how two inputs compare to each other.
 // Result is 0 if input0 == input1, 1 if input0 > input1, or -1 if input0 < input1.
-class HCompare : public HBinaryOperation {
+class HCompare FINAL : public HBinaryOperation {
  public:
   // Note that `comparison_type` is the type of comparison performed
   // between the comparison's inputs, not the type of the instantiated
@@ -3574,7 +3581,7 @@
   DISALLOW_COPY_AND_ASSIGN(HCompare);
 };
 
-class HNewInstance : public HExpression<2> {
+class HNewInstance FINAL : public HExpression<2> {
  public:
   HNewInstance(HInstruction* cls,
                HCurrentMethod* current_method,
@@ -3777,7 +3784,7 @@
   DISALLOW_COPY_AND_ASSIGN(HInvoke);
 };
 
-class HInvokeUnresolved : public HInvoke {
+class HInvokeUnresolved FINAL : public HInvoke {
  public:
   HInvokeUnresolved(ArenaAllocator* arena,
                     uint32_t number_of_arguments,
@@ -3800,7 +3807,7 @@
   DISALLOW_COPY_AND_ASSIGN(HInvokeUnresolved);
 };
 
-class HInvokeStaticOrDirect : public HInvoke {
+class HInvokeStaticOrDirect FINAL : public HInvoke {
  public:
   // Requirements of this method call regarding the class
   // initialization (clinit) check of its declaring class.
@@ -4089,7 +4096,7 @@
 std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs);
 std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs);
 
-class HInvokeVirtual : public HInvoke {
+class HInvokeVirtual FINAL : public HInvoke {
  public:
   HInvokeVirtual(ArenaAllocator* arena,
                  uint32_t number_of_arguments,
@@ -4115,7 +4122,7 @@
   DISALLOW_COPY_AND_ASSIGN(HInvokeVirtual);
 };
 
-class HInvokeInterface : public HInvoke {
+class HInvokeInterface FINAL : public HInvoke {
  public:
   HInvokeInterface(ArenaAllocator* arena,
                    uint32_t number_of_arguments,
@@ -4142,7 +4149,7 @@
   DISALLOW_COPY_AND_ASSIGN(HInvokeInterface);
 };
 
-class HNeg : public HUnaryOperation {
+class HNeg FINAL : public HUnaryOperation {
  public:
   HNeg(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc)
       : HUnaryOperation(result_type, input, dex_pc) {
@@ -4170,7 +4177,7 @@
   DISALLOW_COPY_AND_ASSIGN(HNeg);
 };
 
-class HNewArray : public HExpression<2> {
+class HNewArray FINAL : public HExpression<2> {
  public:
   HNewArray(HInstruction* length,
             HCurrentMethod* current_method,
@@ -4209,7 +4216,7 @@
   DISALLOW_COPY_AND_ASSIGN(HNewArray);
 };
 
-class HAdd : public HBinaryOperation {
+class HAdd FINAL : public HBinaryOperation {
  public:
   HAdd(Primitive::Type result_type,
        HInstruction* left,
@@ -4244,7 +4251,7 @@
   DISALLOW_COPY_AND_ASSIGN(HAdd);
 };
 
-class HSub : public HBinaryOperation {
+class HSub FINAL : public HBinaryOperation {
  public:
   HSub(Primitive::Type result_type,
        HInstruction* left,
@@ -4277,7 +4284,7 @@
   DISALLOW_COPY_AND_ASSIGN(HSub);
 };
 
-class HMul : public HBinaryOperation {
+class HMul FINAL : public HBinaryOperation {
  public:
   HMul(Primitive::Type result_type,
        HInstruction* left,
@@ -4312,7 +4319,7 @@
   DISALLOW_COPY_AND_ASSIGN(HMul);
 };
 
-class HDiv : public HBinaryOperation {
+class HDiv FINAL : public HBinaryOperation {
  public:
   HDiv(Primitive::Type result_type,
        HInstruction* left,
@@ -4364,7 +4371,7 @@
   DISALLOW_COPY_AND_ASSIGN(HDiv);
 };
 
-class HRem : public HBinaryOperation {
+class HRem FINAL : public HBinaryOperation {
  public:
   HRem(Primitive::Type result_type,
        HInstruction* left,
@@ -4415,7 +4422,7 @@
   DISALLOW_COPY_AND_ASSIGN(HRem);
 };
 
-class HDivZeroCheck : public HExpression<1> {
+class HDivZeroCheck FINAL : public HExpression<1> {
  public:
   // `HDivZeroCheck` can trigger GC, as it may call the `ArithmeticException`
   // constructor.
@@ -4441,7 +4448,7 @@
   DISALLOW_COPY_AND_ASSIGN(HDivZeroCheck);
 };
 
-class HShl : public HBinaryOperation {
+class HShl FINAL : public HBinaryOperation {
  public:
   HShl(Primitive::Type result_type,
        HInstruction* value,
@@ -4487,7 +4494,7 @@
   DISALLOW_COPY_AND_ASSIGN(HShl);
 };
 
-class HShr : public HBinaryOperation {
+class HShr FINAL : public HBinaryOperation {
  public:
   HShr(Primitive::Type result_type,
        HInstruction* value,
@@ -4533,7 +4540,7 @@
   DISALLOW_COPY_AND_ASSIGN(HShr);
 };
 
-class HUShr : public HBinaryOperation {
+class HUShr FINAL : public HBinaryOperation {
  public:
   HUShr(Primitive::Type result_type,
         HInstruction* value,
@@ -4581,7 +4588,7 @@
   DISALLOW_COPY_AND_ASSIGN(HUShr);
 };
 
-class HAnd : public HBinaryOperation {
+class HAnd FINAL : public HBinaryOperation {
  public:
   HAnd(Primitive::Type result_type,
        HInstruction* left,
@@ -4618,7 +4625,7 @@
   DISALLOW_COPY_AND_ASSIGN(HAnd);
 };
 
-class HOr : public HBinaryOperation {
+class HOr FINAL : public HBinaryOperation {
  public:
   HOr(Primitive::Type result_type,
       HInstruction* left,
@@ -4655,7 +4662,7 @@
   DISALLOW_COPY_AND_ASSIGN(HOr);
 };
 
-class HXor : public HBinaryOperation {
+class HXor FINAL : public HBinaryOperation {
  public:
   HXor(Primitive::Type result_type,
        HInstruction* left,
@@ -4692,7 +4699,7 @@
   DISALLOW_COPY_AND_ASSIGN(HXor);
 };
 
-class HRor : public HBinaryOperation {
+class HRor FINAL : public HBinaryOperation {
  public:
   HRor(Primitive::Type result_type, HInstruction* value, HInstruction* distance)
     : HBinaryOperation(result_type, value, distance) {
@@ -4745,7 +4752,7 @@
 
 // The value of a parameter in this method. Its location depends on
 // the calling convention.
-class HParameterValue : public HExpression<0> {
+class HParameterValue FINAL : public HExpression<0> {
  public:
   HParameterValue(const DexFile& dex_file,
                   uint16_t type_index,
@@ -4787,7 +4794,7 @@
   DISALLOW_COPY_AND_ASSIGN(HParameterValue);
 };
 
-class HNot : public HUnaryOperation {
+class HNot FINAL : public HUnaryOperation {
  public:
   HNot(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc)
       : HUnaryOperation(result_type, input, dex_pc) {}
@@ -4820,7 +4827,7 @@
   DISALLOW_COPY_AND_ASSIGN(HNot);
 };
 
-class HBooleanNot : public HUnaryOperation {
+class HBooleanNot FINAL : public HUnaryOperation {
  public:
   explicit HBooleanNot(HInstruction* input, uint32_t dex_pc = kNoDexPc)
       : HUnaryOperation(Primitive::Type::kPrimBoolean, input, dex_pc) {}
@@ -4857,7 +4864,7 @@
   DISALLOW_COPY_AND_ASSIGN(HBooleanNot);
 };
 
-class HTypeConversion : public HExpression<1> {
+class HTypeConversion FINAL : public HExpression<1> {
  public:
   // Instantiate a type conversion of `input` to `result_type`.
   HTypeConversion(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc)
@@ -4900,7 +4907,7 @@
 
 static constexpr uint32_t kNoRegNumber = -1;
 
-class HNullCheck : public HExpression<1> {
+class HNullCheck FINAL : public HExpression<1> {
  public:
   // `HNullCheck` can trigger GC, as it may call the `NullPointerException`
   // constructor.
@@ -4962,7 +4969,7 @@
   const Handle<mirror::DexCache> dex_cache_;
 };
 
-class HInstanceFieldGet : public HExpression<1> {
+class HInstanceFieldGet FINAL : public HExpression<1> {
  public:
   HInstanceFieldGet(HInstruction* value,
                     Primitive::Type field_type,
@@ -5014,7 +5021,7 @@
   DISALLOW_COPY_AND_ASSIGN(HInstanceFieldGet);
 };
 
-class HInstanceFieldSet : public HTemplateInstruction<2> {
+class HInstanceFieldSet FINAL : public HTemplateInstruction<2> {
  public:
   HInstanceFieldSet(HInstruction* object,
                     HInstruction* value,
@@ -5065,7 +5072,7 @@
   DISALLOW_COPY_AND_ASSIGN(HInstanceFieldSet);
 };
 
-class HArrayGet : public HExpression<2> {
+class HArrayGet FINAL : public HExpression<2> {
  public:
   HArrayGet(HInstruction* array, HInstruction* index, Primitive::Type type, uint32_t dex_pc)
       : HExpression(type, SideEffects::ArrayReadOfType(type), dex_pc) {
@@ -5111,7 +5118,7 @@
   DISALLOW_COPY_AND_ASSIGN(HArrayGet);
 };
 
-class HArraySet : public HTemplateInstruction<3> {
+class HArraySet FINAL : public HTemplateInstruction<3> {
  public:
   HArraySet(HInstruction* array,
             HInstruction* index,
@@ -5211,7 +5218,7 @@
   DISALLOW_COPY_AND_ASSIGN(HArraySet);
 };
 
-class HArrayLength : public HExpression<1> {
+class HArrayLength FINAL : public HExpression<1> {
  public:
   HArrayLength(HInstruction* array, uint32_t dex_pc)
       : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) {
@@ -5228,13 +5235,26 @@
     return obj == InputAt(0);
   }
 
+  void MarkAsStringLength() { SetPackedFlag<kFlagIsStringLength>(); }
+  bool IsStringLength() const { return GetPackedFlag<kFlagIsStringLength>(); }
+
   DECLARE_INSTRUCTION(ArrayLength);
 
  private:
+  // We treat a String as an array, creating the HArrayLength from String.length()
+  // or String.isEmpty() intrinsic in the instruction simplifier. We can always
+  // determine whether a particular HArrayLength is actually a String.length() by
+  // looking at the type of the input but that requires holding the mutator lock, so
+  // we prefer to use a flag, so that code generators don't need to do the locking.
+  static constexpr size_t kFlagIsStringLength = kNumberOfExpressionPackedBits;
+  static constexpr size_t kNumberOfArrayLengthPackedBits = kFlagIsStringLength + 1;
+  static_assert(kNumberOfArrayLengthPackedBits <= HInstruction::kMaxNumberOfPackedBits,
+                "Too many packed fields.");
+
   DISALLOW_COPY_AND_ASSIGN(HArrayLength);
 };
 
-class HBoundsCheck : public HExpression<2> {
+class HBoundsCheck FINAL : public HExpression<2> {
  public:
   // `HBoundsCheck` can trigger GC, as it may call the `IndexOutOfBoundsException`
   // constructor.
@@ -5262,7 +5282,7 @@
   DISALLOW_COPY_AND_ASSIGN(HBoundsCheck);
 };
 
-class HSuspendCheck : public HTemplateInstruction<0> {
+class HSuspendCheck FINAL : public HTemplateInstruction<0> {
  public:
   explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc), slow_path_(nullptr) {}
@@ -5304,7 +5324,7 @@
 /**
  * Instruction to load a Class object.
  */
-class HLoadClass : public HExpression<1> {
+class HLoadClass FINAL : public HExpression<1> {
  public:
   HLoadClass(HCurrentMethod* current_method,
              uint16_t type_index,
@@ -5408,7 +5428,7 @@
   DISALLOW_COPY_AND_ASSIGN(HLoadClass);
 };
 
-class HLoadString : public HExpression<1> {
+class HLoadString FINAL : public HExpression<1> {
  public:
   // Determines how to load the String.
   enum class LoadKind {
@@ -5610,7 +5630,7 @@
 /**
  * Performs an initialization check on its Class object input.
  */
-class HClinitCheck : public HExpression<1> {
+class HClinitCheck FINAL : public HExpression<1> {
  public:
   HClinitCheck(HLoadClass* constant, uint32_t dex_pc)
       : HExpression(
@@ -5640,7 +5660,7 @@
   DISALLOW_COPY_AND_ASSIGN(HClinitCheck);
 };
 
-class HStaticFieldGet : public HExpression<1> {
+class HStaticFieldGet FINAL : public HExpression<1> {
  public:
   HStaticFieldGet(HInstruction* cls,
                   Primitive::Type field_type,
@@ -5689,7 +5709,7 @@
   DISALLOW_COPY_AND_ASSIGN(HStaticFieldGet);
 };
 
-class HStaticFieldSet : public HTemplateInstruction<2> {
+class HStaticFieldSet FINAL : public HTemplateInstruction<2> {
  public:
   HStaticFieldSet(HInstruction* cls,
                   HInstruction* value,
@@ -5737,7 +5757,7 @@
   DISALLOW_COPY_AND_ASSIGN(HStaticFieldSet);
 };
 
-class HUnresolvedInstanceFieldGet : public HExpression<1> {
+class HUnresolvedInstanceFieldGet FINAL : public HExpression<1> {
  public:
   HUnresolvedInstanceFieldGet(HInstruction* obj,
                               Primitive::Type field_type,
@@ -5762,7 +5782,7 @@
   DISALLOW_COPY_AND_ASSIGN(HUnresolvedInstanceFieldGet);
 };
 
-class HUnresolvedInstanceFieldSet : public HTemplateInstruction<2> {
+class HUnresolvedInstanceFieldSet FINAL : public HTemplateInstruction<2> {
  public:
   HUnresolvedInstanceFieldSet(HInstruction* obj,
                               HInstruction* value,
@@ -5800,7 +5820,7 @@
   DISALLOW_COPY_AND_ASSIGN(HUnresolvedInstanceFieldSet);
 };
 
-class HUnresolvedStaticFieldGet : public HExpression<0> {
+class HUnresolvedStaticFieldGet FINAL : public HExpression<0> {
  public:
   HUnresolvedStaticFieldGet(Primitive::Type field_type,
                             uint32_t field_index,
@@ -5823,7 +5843,7 @@
   DISALLOW_COPY_AND_ASSIGN(HUnresolvedStaticFieldGet);
 };
 
-class HUnresolvedStaticFieldSet : public HTemplateInstruction<1> {
+class HUnresolvedStaticFieldSet FINAL : public HTemplateInstruction<1> {
  public:
   HUnresolvedStaticFieldSet(HInstruction* value,
                             Primitive::Type field_type,
@@ -5860,7 +5880,7 @@
 };
 
 // Implement the move-exception DEX instruction.
-class HLoadException : public HExpression<0> {
+class HLoadException FINAL : public HExpression<0> {
  public:
   explicit HLoadException(uint32_t dex_pc = kNoDexPc)
       : HExpression(Primitive::kPrimNot, SideEffects::None(), dex_pc) {}
@@ -5875,7 +5895,7 @@
 
 // Implicit part of move-exception which clears thread-local exception storage.
 // Must not be removed because the runtime expects the TLS to get cleared.
-class HClearException : public HTemplateInstruction<0> {
+class HClearException FINAL : public HTemplateInstruction<0> {
  public:
   explicit HClearException(uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::AllWrites(), dex_pc) {}
@@ -5886,7 +5906,7 @@
   DISALLOW_COPY_AND_ASSIGN(HClearException);
 };
 
-class HThrow : public HTemplateInstruction<1> {
+class HThrow FINAL : public HTemplateInstruction<1> {
  public:
   HThrow(HInstruction* exception, uint32_t dex_pc)
       : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) {
@@ -5923,7 +5943,7 @@
 
 std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs);
 
-class HInstanceOf : public HExpression<2> {
+class HInstanceOf FINAL : public HExpression<2> {
  public:
   HInstanceOf(HInstruction* object,
               HLoadClass* constant,
@@ -5977,7 +5997,7 @@
   DISALLOW_COPY_AND_ASSIGN(HInstanceOf);
 };
 
-class HBoundType : public HExpression<1> {
+class HBoundType FINAL : public HExpression<1> {
  public:
   HBoundType(HInstruction* input, uint32_t dex_pc = kNoDexPc)
       : HExpression(Primitive::kPrimNot, SideEffects::None(), dex_pc),
@@ -6021,7 +6041,7 @@
   DISALLOW_COPY_AND_ASSIGN(HBoundType);
 };
 
-class HCheckCast : public HTemplateInstruction<2> {
+class HCheckCast FINAL : public HTemplateInstruction<2> {
  public:
   HCheckCast(HInstruction* object,
              HLoadClass* constant,
@@ -6066,7 +6086,7 @@
   DISALLOW_COPY_AND_ASSIGN(HCheckCast);
 };
 
-class HMemoryBarrier : public HTemplateInstruction<0> {
+class HMemoryBarrier FINAL : public HTemplateInstruction<0> {
  public:
   explicit HMemoryBarrier(MemBarrierKind barrier_kind, uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(
@@ -6091,7 +6111,7 @@
   DISALLOW_COPY_AND_ASSIGN(HMemoryBarrier);
 };
 
-class HMonitorOperation : public HTemplateInstruction<1> {
+class HMonitorOperation FINAL : public HTemplateInstruction<1> {
  public:
   enum class OperationKind {
     kEnter,
@@ -6136,7 +6156,7 @@
   DISALLOW_COPY_AND_ASSIGN(HMonitorOperation);
 };
 
-class HSelect : public HExpression<3> {
+class HSelect FINAL : public HExpression<3> {
  public:
   HSelect(HInstruction* condition,
           HInstruction* true_value,
@@ -6249,7 +6269,7 @@
 
 static constexpr size_t kDefaultNumberOfMoves = 4;
 
-class HParallelMove : public HTemplateInstruction<0> {
+class HParallelMove FINAL : public HTemplateInstruction<0> {
  public:
   explicit HParallelMove(ArenaAllocator* arena, uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::None(), dex_pc),
diff --git a/compiler/optimizing/nodes_arm.h b/compiler/optimizing/nodes_arm.h
index 6a1dbb9..371e8ef 100644
--- a/compiler/optimizing/nodes_arm.h
+++ b/compiler/optimizing/nodes_arm.h
@@ -19,7 +19,7 @@
 
 namespace art {
 
-class HArmDexCacheArraysBase : public HExpression<0> {
+class HArmDexCacheArraysBase FINAL : public HExpression<0> {
  public:
   explicit HArmDexCacheArraysBase(const DexFile& dex_file)
       : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc),
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 173852a..737aece 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -21,7 +21,7 @@
 
 namespace art {
 
-class HArm64DataProcWithShifterOp : public HExpression<2> {
+class HArm64DataProcWithShifterOp FINAL : public HExpression<2> {
  public:
   enum OpKind {
     kLSL,   // Logical shift left.
@@ -97,7 +97,7 @@
 // This instruction computes an intermediate address pointing in the 'middle' of an object. The
 // result pointer cannot be handled by GC, so extra care is taken to make sure that this value is
 // never used across anything that can trigger GC.
-class HArm64IntermediateAddress : public HExpression<2> {
+class HArm64IntermediateAddress FINAL : public HExpression<2> {
  public:
   HArm64IntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc)
       : HExpression(Primitive::kPrimNot, SideEffects::DependsOnGC(), dex_pc) {
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index c10c718..bdcf54a 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -19,7 +19,7 @@
 
 namespace art {
 
-class HMultiplyAccumulate : public HExpression<3> {
+class HMultiplyAccumulate FINAL : public HExpression<3> {
  public:
   HMultiplyAccumulate(Primitive::Type type,
                       InstructionKind op,
@@ -53,7 +53,7 @@
   DISALLOW_COPY_AND_ASSIGN(HMultiplyAccumulate);
 };
 
-class HBitwiseNegatedRight : public HBinaryOperation {
+class HBitwiseNegatedRight FINAL : public HBinaryOperation {
  public:
   HBitwiseNegatedRight(Primitive::Type result_type,
                             InstructionKind op,
diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h
index 0b3a84d..c3696b5 100644
--- a/compiler/optimizing/nodes_x86.h
+++ b/compiler/optimizing/nodes_x86.h
@@ -20,7 +20,7 @@
 namespace art {
 
 // Compute the address of the method for X86 Constant area support.
-class HX86ComputeBaseMethodAddress : public HExpression<0> {
+class HX86ComputeBaseMethodAddress FINAL : public HExpression<0> {
  public:
   // Treat the value as an int32_t, but it is really a 32 bit native pointer.
   HX86ComputeBaseMethodAddress()
@@ -33,7 +33,7 @@
 };
 
 // Load a constant value from the constant table.
-class HX86LoadFromConstantTable : public HExpression<2> {
+class HX86LoadFromConstantTable FINAL : public HExpression<2> {
  public:
   HX86LoadFromConstantTable(HX86ComputeBaseMethodAddress* method_base,
                             HConstant* constant)
@@ -57,7 +57,7 @@
 };
 
 // Version of HNeg with access to the constant table for FP types.
-class HX86FPNeg : public HExpression<2> {
+class HX86FPNeg FINAL : public HExpression<2> {
  public:
   HX86FPNeg(Primitive::Type result_type,
             HInstruction* input,
@@ -76,7 +76,7 @@
 };
 
 // X86 version of HPackedSwitch that holds a pointer to the base method address.
-class HX86PackedSwitch : public HTemplateInstruction<2> {
+class HX86PackedSwitch FINAL : public HTemplateInstruction<2> {
  public:
   HX86PackedSwitch(int32_t start_value,
                    int32_t num_entries,
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index b1f9cbc..4405b80 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1773,7 +1773,9 @@
     // therefore will not have a location for that instruction for `to`.
     // Because the instruction is a constant or the ArtMethod, we don't need to
     // do anything: it will be materialized in the irreducible loop.
-    DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by));
+    DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by))
+        << defined_by->DebugName() << ":" << defined_by->GetId()
+        << " " << from->GetBlockId() << " -> " << to->GetBlockId();
     return;
   }
 
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 5534aea..36e0d99 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -309,17 +309,8 @@
     }
 
     if (block->IsLoopHeader()) {
-      if (kIsDebugBuild && block->GetLoopInformation()->IsIrreducible()) {
-        // To satisfy our liveness algorithm, we need to ensure loop headers of
-        // irreducible loops do not have any live-in instructions, except constants
-        // and the current method, which can be trivially re-materialized.
-        for (uint32_t idx : live_in->Indexes()) {
-          HInstruction* instruction = GetInstructionFromSsaIndex(idx);
-          DCHECK(instruction->GetBlock()->IsEntryBlock()) << instruction->DebugName();
-          DCHECK(!instruction->IsParameterValue());
-          DCHECK(instruction->IsCurrentMethod() || instruction->IsConstant())
-              << instruction->DebugName();
-        }
+      if (kIsDebugBuild) {
+        CheckNoLiveInIrreducibleLoop(*block);
       }
       size_t last_position = block->GetLoopInformation()->GetLifetimeEnd();
       // For all live_in instructions at the loop header, we need to create a range
@@ -344,6 +335,9 @@
       // change in this loop), and the live_out set.  If the live_out
       // set does not change, there is no need to update the live_in set.
       if (UpdateLiveOut(block) && UpdateLiveIn(block)) {
+        if (kIsDebugBuild) {
+          CheckNoLiveInIrreducibleLoop(block);
+        }
         changed = true;
       }
     }
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 1141fd1..1fcba8b 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -1260,6 +1260,23 @@
     return instruction->GetType() == Primitive::kPrimNot;
   }
 
+  void CheckNoLiveInIrreducibleLoop(const HBasicBlock& block) const {
+    if (!block.IsLoopHeader() || !block.GetLoopInformation()->IsIrreducible()) {
+      return;
+    }
+    BitVector* live_in = GetLiveInSet(block);
+    // To satisfy our liveness algorithm, we need to ensure loop headers of
+    // irreducible loops do not have any live-in instructions, except constants
+    // and the current method, which can be trivially re-materialized.
+    for (uint32_t idx : live_in->Indexes()) {
+      HInstruction* instruction = GetInstructionFromSsaIndex(idx);
+      DCHECK(instruction->GetBlock()->IsEntryBlock()) << instruction->DebugName();
+      DCHECK(!instruction->IsParameterValue());
+      DCHECK(instruction->IsCurrentMethod() || instruction->IsConstant())
+          << instruction->DebugName();
+    }
+  }
+
   HGraph* const graph_;
   CodeGenerator* const codegen_;
   ArenaVector<BlockInfo*> block_infos_;
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index 44bcadf..c67612e 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -17,6 +17,7 @@
 #include "ssa_phi_elimination.h"
 
 #include "base/arena_containers.h"
+#include "base/arena_bit_vector.h"
 #include "base/bit_vector-inl.h"
 
 namespace art {
@@ -127,8 +128,10 @@
     }
   }
 
-  ArenaSet<uint32_t> visited_phis_in_cycle(
-      graph_->GetArena()->Adapter(kArenaAllocSsaPhiElimination));
+  ArenaBitVector visited_phis_in_cycle(graph_->GetArena(),
+                                       graph_->GetCurrentInstructionId(),
+                                       /* expandable */ false,
+                                       kArenaAllocSsaPhiElimination);
   ArenaVector<HPhi*> cycle_worklist(graph_->GetArena()->Adapter(kArenaAllocSsaPhiElimination));
 
   while (!worklist_.empty()) {
@@ -147,11 +150,11 @@
     }
 
     HInstruction* candidate = nullptr;
-    visited_phis_in_cycle.clear();
+    visited_phis_in_cycle.ClearAllBits();
     cycle_worklist.clear();
 
     cycle_worklist.push_back(phi);
-    visited_phis_in_cycle.insert(phi->GetId());
+    visited_phis_in_cycle.SetBit(phi->GetId());
     bool catch_phi_in_cycle = phi->IsCatchPhi();
     bool irreducible_loop_phi_in_cycle = phi->IsIrreducibleLoopHeaderPhi();
 
@@ -183,9 +186,9 @@
           if (input == current) {
             continue;
           } else if (input->IsPhi()) {
-            if (!ContainsElement(visited_phis_in_cycle, input->GetId())) {
+            if (!visited_phis_in_cycle.IsBitSet(input->GetId())) {
               cycle_worklist.push_back(input->AsPhi());
-              visited_phis_in_cycle.insert(input->GetId());
+              visited_phis_in_cycle.SetBit(input->GetId());
               catch_phi_in_cycle |= input->AsPhi()->IsCatchPhi();
               irreducible_loop_phi_in_cycle |= input->IsIrreducibleLoopHeaderPhi();
             } else {
@@ -234,7 +237,7 @@
       // for elimination. Add phis that use this phi to the worklist.
       for (const HUseListNode<HInstruction*>& use : current->GetUses()) {
         HInstruction* user = use.GetUser();
-        if (user->IsPhi() && !ContainsElement(visited_phis_in_cycle, user->GetId())) {
+        if (user->IsPhi() && !visited_phis_in_cycle.IsBitSet(user->GetId())) {
           worklist_.push_back(user->AsPhi());
         }
       }
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index e5f91dc..a7f4547 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -386,8 +386,9 @@
 
 constexpr size_t kFramePointerSize = kArmPointerSize;
 
-void ArmAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                              const std::vector<ManagedRegister>& callee_save_regs,
+void ArmAssembler::BuildFrame(size_t frame_size,
+                              ManagedRegister method_reg,
+                              ArrayRef<const ManagedRegister> callee_save_regs,
                               const ManagedRegisterEntrySpills& entry_spills) {
   CHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet
   CHECK_ALIGNED(frame_size, kStackAlignment);
@@ -442,7 +443,7 @@
 }
 
 void ArmAssembler::RemoveFrame(size_t frame_size,
-                              const std::vector<ManagedRegister>& callee_save_regs) {
+                               ArrayRef<const ManagedRegister> callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
   cfi_.RememberState();
 
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index ffbe786..274d0de 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -907,12 +907,13 @@
   //
 
   // Emit code that will create an activation on the stack
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
 
   // Emit code that will remove an activation from the stack
-  void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
     OVERRIDE;
 
   void IncreaseFrameSize(size_t adjust) OVERRIDE;
diff --git a/compiler/utils/arm/managed_register_arm.h b/compiler/utils/arm/managed_register_arm.h
index 5b84058..276db44 100644
--- a/compiler/utils/arm/managed_register_arm.h
+++ b/compiler/utils/arm/managed_register_arm.h
@@ -85,34 +85,34 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class ArmManagedRegister : public ManagedRegister {
  public:
-  Register AsCoreRegister() const {
+  constexpr Register AsCoreRegister() const {
     CHECK(IsCoreRegister());
     return static_cast<Register>(id_);
   }
 
-  SRegister AsSRegister() const {
+  constexpr SRegister AsSRegister() const {
     CHECK(IsSRegister());
     return static_cast<SRegister>(id_ - kNumberOfCoreRegIds);
   }
 
-  DRegister AsDRegister() const {
+  constexpr DRegister AsDRegister() const {
     CHECK(IsDRegister());
     return static_cast<DRegister>(id_ - kNumberOfCoreRegIds - kNumberOfSRegIds);
   }
 
-  SRegister AsOverlappingDRegisterLow() const {
+  constexpr SRegister AsOverlappingDRegisterLow() const {
     CHECK(IsOverlappingDRegister());
     DRegister d_reg = AsDRegister();
     return static_cast<SRegister>(d_reg * 2);
   }
 
-  SRegister AsOverlappingDRegisterHigh() const {
+  constexpr SRegister AsOverlappingDRegisterHigh() const {
     CHECK(IsOverlappingDRegister());
     DRegister d_reg = AsDRegister();
     return static_cast<SRegister>(d_reg * 2 + 1);
   }
 
-  RegisterPair AsRegisterPair() const {
+  constexpr RegisterPair AsRegisterPair() const {
     CHECK(IsRegisterPair());
     Register reg_low = AsRegisterPairLow();
     if (reg_low == R1) {
@@ -122,50 +122,50 @@
     }
   }
 
-  Register AsRegisterPairLow() const {
+  constexpr Register AsRegisterPairLow() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdLow().
     return FromRegId(AllocIdLow()).AsCoreRegister();
   }
 
-  Register AsRegisterPairHigh() const {
+  constexpr Register AsRegisterPairHigh() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdHigh().
     return FromRegId(AllocIdHigh()).AsCoreRegister();
   }
 
-  bool IsCoreRegister() const {
+  constexpr bool IsCoreRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfCoreRegIds);
   }
 
-  bool IsSRegister() const {
+  constexpr bool IsSRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfCoreRegIds;
     return (0 <= test) && (test < kNumberOfSRegIds);
   }
 
-  bool IsDRegister() const {
+  constexpr bool IsDRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCoreRegIds + kNumberOfSRegIds);
     return (0 <= test) && (test < kNumberOfDRegIds);
   }
 
   // Returns true if this DRegister overlaps SRegisters.
-  bool IsOverlappingDRegister() const {
+  constexpr bool IsOverlappingDRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCoreRegIds + kNumberOfSRegIds);
     return (0 <= test) && (test < kNumberOfOverlappingDRegIds);
   }
 
-  bool IsRegisterPair() const {
+  constexpr bool IsRegisterPair() const {
     CHECK(IsValidManagedRegister());
     const int test =
         id_ - (kNumberOfCoreRegIds + kNumberOfSRegIds + kNumberOfDRegIds);
     return (0 <= test) && (test < kNumberOfPairRegIds);
   }
 
-  bool IsSameType(ArmManagedRegister test) const {
+  constexpr bool IsSameType(ArmManagedRegister test) const {
     CHECK(IsValidManagedRegister() && test.IsValidManagedRegister());
     return
       (IsCoreRegister() && test.IsCoreRegister()) ||
@@ -182,29 +182,29 @@
 
   void Print(std::ostream& os) const;
 
-  static ArmManagedRegister FromCoreRegister(Register r) {
+  static constexpr ArmManagedRegister FromCoreRegister(Register r) {
     CHECK_NE(r, kNoRegister);
     return FromRegId(r);
   }
 
-  static ArmManagedRegister FromSRegister(SRegister r) {
+  static constexpr ArmManagedRegister FromSRegister(SRegister r) {
     CHECK_NE(r, kNoSRegister);
     return FromRegId(r + kNumberOfCoreRegIds);
   }
 
-  static ArmManagedRegister FromDRegister(DRegister r) {
+  static constexpr ArmManagedRegister FromDRegister(DRegister r) {
     CHECK_NE(r, kNoDRegister);
     return FromRegId(r + (kNumberOfCoreRegIds + kNumberOfSRegIds));
   }
 
-  static ArmManagedRegister FromRegisterPair(RegisterPair r) {
+  static constexpr ArmManagedRegister FromRegisterPair(RegisterPair r) {
     CHECK_NE(r, kNoRegisterPair);
     return FromRegId(r + (kNumberOfCoreRegIds +
                           kNumberOfSRegIds + kNumberOfDRegIds));
   }
 
   // Return a RegisterPair consisting of Register r_low and r_low + 1.
-  static ArmManagedRegister FromCoreRegisterPair(Register r_low) {
+  static constexpr ArmManagedRegister FromCoreRegisterPair(Register r_low) {
     if (r_low != R1) {  // not the dalvik special case
       CHECK_NE(r_low, kNoRegister);
       CHECK_EQ(0, (r_low % 2));
@@ -217,7 +217,7 @@
   }
 
   // Return a DRegister overlapping SRegister r_low and r_low + 1.
-  static ArmManagedRegister FromSRegisterPair(SRegister r_low) {
+  static constexpr ArmManagedRegister FromSRegisterPair(SRegister r_low) {
     CHECK_NE(r_low, kNoSRegister);
     CHECK_EQ(0, (r_low % 2));
     const int r = r_low / 2;
@@ -226,7 +226,7 @@
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
@@ -251,9 +251,9 @@
 
   friend class ManagedRegister;
 
-  explicit ArmManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr ArmManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static ArmManagedRegister FromRegId(int reg_id) {
+  static constexpr ArmManagedRegister FromRegId(int reg_id) {
     ArmManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -264,7 +264,7 @@
 
 }  // namespace arm
 
-inline arm::ArmManagedRegister ManagedRegister::AsArm() const {
+constexpr inline arm::ArmManagedRegister ManagedRegister::AsArm() const {
   arm::ArmManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index eb5112b..1842f00 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -683,8 +683,9 @@
   DCHECK(registers.IsEmpty());
 }
 
-void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                                const std::vector<ManagedRegister>& callee_save_regs,
+void Arm64Assembler::BuildFrame(size_t frame_size,
+                                ManagedRegister method_reg,
+                                ArrayRef<const ManagedRegister> callee_save_regs,
                                 const ManagedRegisterEntrySpills& entry_spills) {
   // Setup VIXL CPURegList for callee-saves.
   CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0);
@@ -741,7 +742,7 @@
 }
 
 void Arm64Assembler::RemoveFrame(size_t frame_size,
-                                 const std::vector<ManagedRegister>& callee_save_regs) {
+                                 ArrayRef<const ManagedRegister> callee_save_regs) {
   // Setup VIXL CPURegList for callee-saves.
   CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0);
   CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0);
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index c4e5de7..91171a8 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -109,12 +109,13 @@
   void UnspillRegisters(vixl::CPURegList registers, int offset);
 
   // Emit code that will create an activation on the stack.
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
 
   // Emit code that will remove an activation from the stack.
-  void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
       OVERRIDE;
 
   void IncreaseFrameSize(size_t adjust) OVERRIDE;
diff --git a/compiler/utils/arm64/managed_register_arm64.h b/compiler/utils/arm64/managed_register_arm64.h
index 46be1c5..f7d74d2 100644
--- a/compiler/utils/arm64/managed_register_arm64.h
+++ b/compiler/utils/arm64/managed_register_arm64.h
@@ -56,80 +56,80 @@
 
 class Arm64ManagedRegister : public ManagedRegister {
  public:
-  XRegister AsXRegister() const {
+  constexpr XRegister AsXRegister() const {
     CHECK(IsXRegister());
     return static_cast<XRegister>(id_);
   }
 
-  WRegister AsWRegister() const {
+  constexpr WRegister AsWRegister() const {
     CHECK(IsWRegister());
     return static_cast<WRegister>(id_ - kNumberOfXRegIds);
   }
 
-  DRegister AsDRegister() const {
+  constexpr DRegister AsDRegister() const {
     CHECK(IsDRegister());
     return static_cast<DRegister>(id_ - kNumberOfXRegIds - kNumberOfWRegIds);
   }
 
-  SRegister AsSRegister() const {
+  constexpr SRegister AsSRegister() const {
     CHECK(IsSRegister());
     return static_cast<SRegister>(id_ - kNumberOfXRegIds - kNumberOfWRegIds -
                                   kNumberOfDRegIds);
   }
 
-  WRegister AsOverlappingWRegister() const {
+  constexpr WRegister AsOverlappingWRegister() const {
     CHECK(IsValidManagedRegister());
     if (IsZeroRegister()) return WZR;
     return static_cast<WRegister>(AsXRegister());
   }
 
-  XRegister AsOverlappingXRegister() const {
+  constexpr XRegister AsOverlappingXRegister() const {
     CHECK(IsValidManagedRegister());
     return static_cast<XRegister>(AsWRegister());
   }
 
-  SRegister AsOverlappingSRegister() const {
+  constexpr SRegister AsOverlappingSRegister() const {
     CHECK(IsValidManagedRegister());
     return static_cast<SRegister>(AsDRegister());
   }
 
-  DRegister AsOverlappingDRegister() const {
+  constexpr DRegister AsOverlappingDRegister() const {
     CHECK(IsValidManagedRegister());
     return static_cast<DRegister>(AsSRegister());
   }
 
-  bool IsXRegister() const {
+  constexpr bool IsXRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfXRegIds);
   }
 
-  bool IsWRegister() const {
+  constexpr bool IsWRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfXRegIds;
     return (0 <= test) && (test < kNumberOfWRegIds);
   }
 
-  bool IsDRegister() const {
+  constexpr bool IsDRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfXRegIds + kNumberOfWRegIds);
     return (0 <= test) && (test < kNumberOfDRegIds);
   }
 
-  bool IsSRegister() const {
+  constexpr bool IsSRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfXRegIds + kNumberOfWRegIds + kNumberOfDRegIds);
     return (0 <= test) && (test < kNumberOfSRegIds);
   }
 
-  bool IsGPRegister() const {
+  constexpr bool IsGPRegister() const {
     return IsXRegister() || IsWRegister();
   }
 
-  bool IsFPRegister() const {
+  constexpr bool IsFPRegister() const {
     return IsDRegister() || IsSRegister();
   }
 
-  bool IsSameType(Arm64ManagedRegister test) const {
+  constexpr bool IsSameType(Arm64ManagedRegister test) const {
     CHECK(IsValidManagedRegister() && test.IsValidManagedRegister());
     return
       (IsXRegister() && test.IsXRegister()) ||
@@ -145,53 +145,53 @@
 
   void Print(std::ostream& os) const;
 
-  static Arm64ManagedRegister FromXRegister(XRegister r) {
+  static constexpr Arm64ManagedRegister FromXRegister(XRegister r) {
     CHECK_NE(r, kNoRegister);
     return FromRegId(r);
   }
 
-  static Arm64ManagedRegister FromWRegister(WRegister r) {
+  static constexpr Arm64ManagedRegister FromWRegister(WRegister r) {
     CHECK_NE(r, kNoWRegister);
     return FromRegId(r + kNumberOfXRegIds);
   }
 
-  static Arm64ManagedRegister FromDRegister(DRegister r) {
+  static constexpr Arm64ManagedRegister FromDRegister(DRegister r) {
     CHECK_NE(r, kNoDRegister);
     return FromRegId(r + (kNumberOfXRegIds + kNumberOfWRegIds));
   }
 
-  static Arm64ManagedRegister FromSRegister(SRegister r) {
+  static constexpr Arm64ManagedRegister FromSRegister(SRegister r) {
     CHECK_NE(r, kNoSRegister);
     return FromRegId(r + (kNumberOfXRegIds + kNumberOfWRegIds +
                           kNumberOfDRegIds));
   }
 
   // Returns the X register overlapping W register r.
-  static Arm64ManagedRegister FromWRegisterX(WRegister r) {
+  static constexpr Arm64ManagedRegister FromWRegisterX(WRegister r) {
     CHECK_NE(r, kNoWRegister);
     return FromRegId(r);
   }
 
   // Return the D register overlapping S register r.
-  static Arm64ManagedRegister FromSRegisterD(SRegister r) {
+  static constexpr Arm64ManagedRegister FromSRegisterD(SRegister r) {
     CHECK_NE(r, kNoSRegister);
     return FromRegId(r + (kNumberOfXRegIds + kNumberOfWRegIds));
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
-  bool IsStackPointer() const {
+  constexpr bool IsStackPointer() const {
     return IsXRegister() && (id_ == SP);
   }
 
-  bool IsZeroRegister() const {
+  constexpr bool IsZeroRegister() const {
     return IsXRegister() && (id_ == XZR);
   }
 
-  int RegId() const {
+  constexpr int RegId() const {
     CHECK(!IsNoRegister());
     return id_;
   }
@@ -202,9 +202,9 @@
 
   friend class ManagedRegister;
 
-  explicit Arm64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr Arm64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static Arm64ManagedRegister FromRegId(int reg_id) {
+  static constexpr Arm64ManagedRegister FromRegId(int reg_id) {
     Arm64ManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -215,7 +215,7 @@
 
 }  // namespace arm64
 
-inline arm64::Arm64ManagedRegister ManagedRegister::AsArm64() const {
+constexpr inline arm64::Arm64ManagedRegister ManagedRegister::AsArm64() const {
   arm64::Arm64ManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 5267dc3..80aa630 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -32,6 +32,7 @@
 #include "memory_region.h"
 #include "mips/constants_mips.h"
 #include "offsets.h"
+#include "utils/array_ref.h"
 #include "x86/constants_x86.h"
 #include "x86_64/constants_x86_64.h"
 
@@ -375,13 +376,14 @@
   virtual void Comment(const char* format ATTRIBUTE_UNUSED, ...) {}
 
   // Emit code that will create an activation on the stack
-  virtual void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                          const std::vector<ManagedRegister>& callee_save_regs,
+  virtual void BuildFrame(size_t frame_size,
+                          ManagedRegister method_reg,
+                          ArrayRef<const ManagedRegister> callee_save_regs,
                           const ManagedRegisterEntrySpills& entry_spills) = 0;
 
   // Emit code that will remove an activation from the stack
   virtual void RemoveFrame(size_t frame_size,
-                           const std::vector<ManagedRegister>& callee_save_regs) = 0;
+                           ArrayRef<const ManagedRegister> callee_save_regs) = 0;
 
   virtual void IncreaseFrameSize(size_t adjust) = 0;
   virtual void DecreaseFrameSize(size_t adjust) = 0;
diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h
index 893daff..46adb3f 100644
--- a/compiler/utils/managed_register.h
+++ b/compiler/utils/managed_register.h
@@ -47,40 +47,40 @@
   // ManagedRegister is a value class. There exists no method to change the
   // internal state. We therefore allow a copy constructor and an
   // assignment-operator.
-  ManagedRegister(const ManagedRegister& other) : id_(other.id_) { }
+  constexpr ManagedRegister(const ManagedRegister& other) : id_(other.id_) { }
 
   ManagedRegister& operator=(const ManagedRegister& other) {
     id_ = other.id_;
     return *this;
   }
 
-  arm::ArmManagedRegister AsArm() const;
-  arm64::Arm64ManagedRegister AsArm64() const;
-  mips::MipsManagedRegister AsMips() const;
-  mips64::Mips64ManagedRegister AsMips64() const;
-  x86::X86ManagedRegister AsX86() const;
-  x86_64::X86_64ManagedRegister AsX86_64() const;
+  constexpr arm::ArmManagedRegister AsArm() const;
+  constexpr arm64::Arm64ManagedRegister AsArm64() const;
+  constexpr mips::MipsManagedRegister AsMips() const;
+  constexpr mips64::Mips64ManagedRegister AsMips64() const;
+  constexpr x86::X86ManagedRegister AsX86() const;
+  constexpr x86_64::X86_64ManagedRegister AsX86_64() const;
 
   // It is valid to invoke Equals on and with a NoRegister.
-  bool Equals(const ManagedRegister& other) const {
+  constexpr bool Equals(const ManagedRegister& other) const {
     return id_ == other.id_;
   }
 
-  bool IsNoRegister() const {
+  constexpr bool IsNoRegister() const {
     return id_ == kNoRegister;
   }
 
-  static ManagedRegister NoRegister() {
+  static constexpr ManagedRegister NoRegister() {
     return ManagedRegister();
   }
 
-  int RegId() const { return id_; }
-  explicit ManagedRegister(int reg_id) : id_(reg_id) { }
+  constexpr int RegId() const { return id_; }
+  explicit constexpr ManagedRegister(int reg_id) : id_(reg_id) { }
 
  protected:
   static const int kNoRegister = -1;
 
-  ManagedRegister() : id_(kNoRegister) { }
+  constexpr ManagedRegister() : id_(kNoRegister) { }
 
   int id_;
 };
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index a1798c0..9368301 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -2438,8 +2438,9 @@
 
 constexpr size_t kFramePointerSize = 4;
 
-void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                               const std::vector<ManagedRegister>& callee_save_regs,
+void MipsAssembler::BuildFrame(size_t frame_size,
+                               ManagedRegister method_reg,
+                               ArrayRef<const ManagedRegister> callee_save_regs,
                                const ManagedRegisterEntrySpills& entry_spills) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
   DCHECK(!overwriting_);
@@ -2453,7 +2454,7 @@
   cfi_.RelOffset(DWARFReg(RA), stack_offset);
   for (int i = callee_save_regs.size() - 1; i >= 0; --i) {
     stack_offset -= kFramePointerSize;
-    Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister();
+    Register reg = callee_save_regs[i].AsMips().AsCoreRegister();
     StoreToOffset(kStoreWord, reg, SP, stack_offset);
     cfi_.RelOffset(DWARFReg(reg), stack_offset);
   }
@@ -2482,7 +2483,7 @@
 }
 
 void MipsAssembler::RemoveFrame(size_t frame_size,
-                                const std::vector<ManagedRegister>& callee_save_regs) {
+                                ArrayRef<const ManagedRegister> callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
   DCHECK(!overwriting_);
   cfi_.RememberState();
@@ -2490,7 +2491,7 @@
   // Pop callee saves and return address.
   int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize;
   for (size_t i = 0; i < callee_save_regs.size(); ++i) {
-    Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister();
+    Register reg = callee_save_regs[i].AsMips().AsCoreRegister();
     LoadFromOffset(kLoadWord, reg, SP, stack_offset);
     cfi_.Restore(DWARFReg(reg));
     stack_offset += kFramePointerSize;
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index ecb67bd..d5e6285 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -414,11 +414,11 @@
   // Emit code that will create an activation on the stack.
   void BuildFrame(size_t frame_size,
                   ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
 
   // Emit code that will remove an activation from the stack.
-  void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
       OVERRIDE;
 
   void IncreaseFrameSize(size_t adjust) OVERRIDE;
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index cec43ba..56e5884 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -561,6 +561,14 @@
   DriverStr(RepeatFF(&mips::MipsAssembler::NegD, "neg.d ${reg1}, ${reg2}"), "NegD");
 }
 
+TEST_F(AssemblerMIPSTest, FloorWS) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::FloorWS, "floor.w.s ${reg1}, ${reg2}"), "floor.w.s");
+}
+
+TEST_F(AssemblerMIPSTest, FloorWD) {
+  DriverStr(RepeatFF(&mips::MipsAssembler::FloorWD, "floor.w.d ${reg1}, ${reg2}"), "floor.w.d");
+}
+
 TEST_F(AssemblerMIPSTest, CunS) {
   DriverStr(RepeatIbFF(&mips::MipsAssembler::CunS, 3, "c.un.s $fcc{imm}, ${reg1}, ${reg2}"),
             "CunS");
diff --git a/compiler/utils/mips/managed_register_mips.h b/compiler/utils/mips/managed_register_mips.h
index 5e7ed11..66204e7 100644
--- a/compiler/utils/mips/managed_register_mips.h
+++ b/compiler/utils/mips/managed_register_mips.h
@@ -87,70 +87,70 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class MipsManagedRegister : public ManagedRegister {
  public:
-  Register AsCoreRegister() const {
+  constexpr Register AsCoreRegister() const {
     CHECK(IsCoreRegister());
     return static_cast<Register>(id_);
   }
 
-  FRegister AsFRegister() const {
+  constexpr FRegister AsFRegister() const {
     CHECK(IsFRegister());
     return static_cast<FRegister>(id_ - kNumberOfCoreRegIds);
   }
 
-  DRegister AsDRegister() const {
+  constexpr DRegister AsDRegister() const {
     CHECK(IsDRegister());
     return static_cast<DRegister>(id_ - kNumberOfCoreRegIds - kNumberOfFRegIds);
   }
 
-  FRegister AsOverlappingDRegisterLow() const {
+  constexpr FRegister AsOverlappingDRegisterLow() const {
     CHECK(IsOverlappingDRegister());
     DRegister d_reg = AsDRegister();
     return static_cast<FRegister>(d_reg * 2);
   }
 
-  FRegister AsOverlappingDRegisterHigh() const {
+  constexpr FRegister AsOverlappingDRegisterHigh() const {
     CHECK(IsOverlappingDRegister());
     DRegister d_reg = AsDRegister();
     return static_cast<FRegister>(d_reg * 2 + 1);
   }
 
-  Register AsRegisterPairLow() const {
+  constexpr Register AsRegisterPairLow() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdLow().
     return FromRegId(AllocIdLow()).AsCoreRegister();
   }
 
-  Register AsRegisterPairHigh() const {
+  constexpr Register AsRegisterPairHigh() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdHigh().
     return FromRegId(AllocIdHigh()).AsCoreRegister();
   }
 
-  bool IsCoreRegister() const {
+  constexpr bool IsCoreRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfCoreRegIds);
   }
 
-  bool IsFRegister() const {
+  constexpr bool IsFRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfCoreRegIds;
     return (0 <= test) && (test < kNumberOfFRegIds);
   }
 
-  bool IsDRegister() const {
+  constexpr bool IsDRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCoreRegIds + kNumberOfFRegIds);
     return (0 <= test) && (test < kNumberOfDRegIds);
   }
 
   // Returns true if this DRegister overlaps FRegisters.
-  bool IsOverlappingDRegister() const {
+  constexpr bool IsOverlappingDRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCoreRegIds + kNumberOfFRegIds);
     return (0 <= test) && (test < kNumberOfOverlappingDRegIds);
   }
 
-  bool IsRegisterPair() const {
+  constexpr bool IsRegisterPair() const {
     CHECK(IsValidManagedRegister());
     const int test =
         id_ - (kNumberOfCoreRegIds + kNumberOfFRegIds + kNumberOfDRegIds);
@@ -164,32 +164,32 @@
   // then false is returned.
   bool Overlaps(const MipsManagedRegister& other) const;
 
-  static MipsManagedRegister FromCoreRegister(Register r) {
+  static constexpr MipsManagedRegister FromCoreRegister(Register r) {
     CHECK_NE(r, kNoRegister);
     return FromRegId(r);
   }
 
-  static MipsManagedRegister FromFRegister(FRegister r) {
+  static constexpr MipsManagedRegister FromFRegister(FRegister r) {
     CHECK_NE(r, kNoFRegister);
     return FromRegId(r + kNumberOfCoreRegIds);
   }
 
-  static MipsManagedRegister FromDRegister(DRegister r) {
+  static constexpr MipsManagedRegister FromDRegister(DRegister r) {
     CHECK_NE(r, kNoDRegister);
     return FromRegId(r + kNumberOfCoreRegIds + kNumberOfFRegIds);
   }
 
-  static MipsManagedRegister FromRegisterPair(RegisterPair r) {
+  static constexpr MipsManagedRegister FromRegisterPair(RegisterPair r) {
     CHECK_NE(r, kNoRegisterPair);
     return FromRegId(r + (kNumberOfCoreRegIds + kNumberOfFRegIds + kNumberOfDRegIds));
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
-  int RegId() const {
+  constexpr int RegId() const {
     CHECK(!IsNoRegister());
     return id_;
   }
@@ -205,9 +205,9 @@
 
   friend class ManagedRegister;
 
-  explicit MipsManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr MipsManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static MipsManagedRegister FromRegId(int reg_id) {
+  static constexpr MipsManagedRegister FromRegId(int reg_id) {
     MipsManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -218,7 +218,7 @@
 
 }  // namespace mips
 
-inline mips::MipsManagedRegister ManagedRegister::AsMips() const {
+constexpr inline mips::MipsManagedRegister ManagedRegister::AsMips() const {
   mips::MipsManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index ab480caf..447ede5 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -1977,8 +1977,9 @@
 
 constexpr size_t kFramePointerSize = 8;
 
-void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                                 const std::vector<ManagedRegister>& callee_save_regs,
+void Mips64Assembler::BuildFrame(size_t frame_size,
+                                 ManagedRegister method_reg,
+                                 ArrayRef<const ManagedRegister> callee_save_regs,
                                  const ManagedRegisterEntrySpills& entry_spills) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
   DCHECK(!overwriting_);
@@ -1992,7 +1993,7 @@
   cfi_.RelOffset(DWARFReg(RA), stack_offset);
   for (int i = callee_save_regs.size() - 1; i >= 0; --i) {
     stack_offset -= kFramePointerSize;
-    GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister();
+    GpuRegister reg = callee_save_regs[i].AsMips64().AsGpuRegister();
     StoreToOffset(kStoreDoubleword, reg, SP, stack_offset);
     cfi_.RelOffset(DWARFReg(reg), stack_offset);
   }
@@ -2003,7 +2004,7 @@
   // Write out entry spills.
   int32_t offset = frame_size + kFramePointerSize;
   for (size_t i = 0; i < entry_spills.size(); ++i) {
-    Mips64ManagedRegister reg = entry_spills.at(i).AsMips64();
+    Mips64ManagedRegister reg = entry_spills[i].AsMips64();
     ManagedRegisterSpill spill = entry_spills.at(i);
     int32_t size = spill.getSize();
     if (reg.IsNoRegister()) {
@@ -2022,7 +2023,7 @@
 }
 
 void Mips64Assembler::RemoveFrame(size_t frame_size,
-                                  const std::vector<ManagedRegister>& callee_save_regs) {
+                                  ArrayRef<const ManagedRegister> callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
   DCHECK(!overwriting_);
   cfi_.RememberState();
@@ -2030,7 +2031,7 @@
   // Pop callee saves and return address
   int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize;
   for (size_t i = 0; i < callee_save_regs.size(); ++i) {
-    GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister();
+    GpuRegister reg = callee_save_regs[i].AsMips64().AsGpuRegister();
     LoadFromOffset(kLoadDoubleword, reg, SP, stack_offset);
     cfi_.Restore(DWARFReg(reg));
     stack_offset += kFramePointerSize;
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 8acc38a..0cd0708 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -365,13 +365,13 @@
   //
 
   // Emit code that will create an activation on the stack.
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
 
   // Emit code that will remove an activation from the stack.
-  void RemoveFrame(size_t frame_size,
-                   const std::vector<ManagedRegister>& callee_save_regs) OVERRIDE;
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs) OVERRIDE;
 
   void IncreaseFrameSize(size_t adjust) OVERRIDE;
   void DecreaseFrameSize(size_t adjust) OVERRIDE;
diff --git a/compiler/utils/mips64/managed_register_mips64.h b/compiler/utils/mips64/managed_register_mips64.h
index 1d36128..c9f9556 100644
--- a/compiler/utils/mips64/managed_register_mips64.h
+++ b/compiler/utils/mips64/managed_register_mips64.h
@@ -39,22 +39,22 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class Mips64ManagedRegister : public ManagedRegister {
  public:
-  GpuRegister AsGpuRegister() const {
+  constexpr GpuRegister AsGpuRegister() const {
     CHECK(IsGpuRegister());
     return static_cast<GpuRegister>(id_);
   }
 
-  FpuRegister AsFpuRegister() const {
+  constexpr FpuRegister AsFpuRegister() const {
     CHECK(IsFpuRegister());
     return static_cast<FpuRegister>(id_ - kNumberOfGpuRegIds);
   }
 
-  bool IsGpuRegister() const {
+  constexpr bool IsGpuRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfGpuRegIds);
   }
 
-  bool IsFpuRegister() const {
+  constexpr bool IsFpuRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfGpuRegIds;
     return (0 <= test) && (test < kNumberOfFpuRegIds);
@@ -67,22 +67,22 @@
   // then false is returned.
   bool Overlaps(const Mips64ManagedRegister& other) const;
 
-  static Mips64ManagedRegister FromGpuRegister(GpuRegister r) {
+  static constexpr Mips64ManagedRegister FromGpuRegister(GpuRegister r) {
     CHECK_NE(r, kNoGpuRegister);
     return FromRegId(r);
   }
 
-  static Mips64ManagedRegister FromFpuRegister(FpuRegister r) {
+  static constexpr Mips64ManagedRegister FromFpuRegister(FpuRegister r) {
     CHECK_NE(r, kNoFpuRegister);
     return FromRegId(r + kNumberOfGpuRegIds);
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
-  int RegId() const {
+  constexpr int RegId() const {
     CHECK(!IsNoRegister());
     return id_;
   }
@@ -98,9 +98,9 @@
 
   friend class ManagedRegister;
 
-  explicit Mips64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr Mips64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static Mips64ManagedRegister FromRegId(int reg_id) {
+  static constexpr Mips64ManagedRegister FromRegId(int reg_id) {
     Mips64ManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -111,7 +111,7 @@
 
 }  // namespace mips64
 
-inline mips64::Mips64ManagedRegister ManagedRegister::AsMips64() const {
+constexpr inline mips64::Mips64ManagedRegister ManagedRegister::AsMips64() const {
   mips64::Mips64ManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 2203646..f931d75 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1030,6 +1030,14 @@
 }
 
 
+void X86Assembler::cmpb(const Address& address, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x80);
+  EmitOperand(7, address);
+  EmitUint8(imm.value() & 0xFF);
+}
+
+
 void X86Assembler::cmpw(const Address& address, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
@@ -1924,15 +1932,16 @@
 
 constexpr size_t kFramePointerSize = 4;
 
-void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                              const std::vector<ManagedRegister>& spill_regs,
+void X86Assembler::BuildFrame(size_t frame_size,
+                              ManagedRegister method_reg,
+                              ArrayRef<const ManagedRegister> spill_regs,
                               const ManagedRegisterEntrySpills& entry_spills) {
   DCHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet.
   cfi_.SetCurrentCFAOffset(4);  // Return address on stack.
   CHECK_ALIGNED(frame_size, kStackAlignment);
   int gpr_count = 0;
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
-    Register spill = spill_regs.at(i).AsX86().AsCpuRegister();
+    Register spill = spill_regs[i].AsX86().AsCpuRegister();
     pushl(spill);
     gpr_count++;
     cfi_.AdjustCFAOffset(kFramePointerSize);
@@ -1966,7 +1975,7 @@
   }
 }
 
-void X86Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& spill_regs) {
+void X86Assembler::RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> spill_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
   cfi_.RememberState();
   // -kFramePointerSize for ArtMethod*.
@@ -1974,7 +1983,7 @@
   addl(ESP, Immediate(adjust));
   cfi_.AdjustCFAOffset(-adjust);
   for (size_t i = 0; i < spill_regs.size(); ++i) {
-    Register spill = spill_regs.at(i).AsX86().AsCpuRegister();
+    Register spill = spill_regs[i].AsX86().AsCpuRegister();
     popl(spill);
     cfi_.AdjustCFAOffset(-static_cast<int>(kFramePointerSize));
     cfi_.Restore(DWARFReg(spill));
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 8567ad2..fa61662 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -479,6 +479,7 @@
   void xchgl(Register dst, Register src);
   void xchgl(Register reg, const Address& address);
 
+  void cmpb(const Address& address, const Immediate& imm);
   void cmpw(const Address& address, const Immediate& imm);
 
   void cmpl(Register reg, const Immediate& imm);
@@ -632,12 +633,13 @@
   //
 
   // Emit code that will create an activation on the stack
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
 
   // Emit code that will remove an activation from the stack
-  void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
       OVERRIDE;
 
   void IncreaseFrameSize(size_t adjust) OVERRIDE;
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 1d1df6e..28043c9 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -389,4 +389,10 @@
   DriverStr(expected, "near_label");
 }
 
+TEST_F(AssemblerX86Test, Cmpb) {
+  GetAssembler()->cmpb(x86::Address(x86::EDI, 128), x86::Immediate(0));
+  const char* expected = "cmpb $0, 128(%EDI)\n";
+  DriverStr(expected, "cmpb");
+}
+
 }  // namespace art
diff --git a/compiler/utils/x86/managed_register_x86.h b/compiler/utils/x86/managed_register_x86.h
index fc20d7e..c0c2b65 100644
--- a/compiler/utils/x86/managed_register_x86.h
+++ b/compiler/utils/x86/managed_register_x86.h
@@ -89,64 +89,64 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class X86ManagedRegister : public ManagedRegister {
  public:
-  ByteRegister AsByteRegister() const {
+  constexpr ByteRegister AsByteRegister() const {
     CHECK(IsCpuRegister());
     CHECK_LT(AsCpuRegister(), ESP);  // ESP, EBP, ESI and EDI cannot be encoded as byte registers.
     return static_cast<ByteRegister>(id_);
   }
 
-  Register AsCpuRegister() const {
+  constexpr Register AsCpuRegister() const {
     CHECK(IsCpuRegister());
     return static_cast<Register>(id_);
   }
 
-  XmmRegister AsXmmRegister() const {
+  constexpr XmmRegister AsXmmRegister() const {
     CHECK(IsXmmRegister());
     return static_cast<XmmRegister>(id_ - kNumberOfCpuRegIds);
   }
 
-  X87Register AsX87Register() const {
+  constexpr X87Register AsX87Register() const {
     CHECK(IsX87Register());
     return static_cast<X87Register>(id_ -
                                     (kNumberOfCpuRegIds + kNumberOfXmmRegIds));
   }
 
-  Register AsRegisterPairLow() const {
+  constexpr Register AsRegisterPairLow() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdLow().
     return FromRegId(AllocIdLow()).AsCpuRegister();
   }
 
-  Register AsRegisterPairHigh() const {
+  constexpr Register AsRegisterPairHigh() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdHigh().
     return FromRegId(AllocIdHigh()).AsCpuRegister();
   }
 
-  RegisterPair AsRegisterPair() const {
+  constexpr RegisterPair AsRegisterPair() const {
     CHECK(IsRegisterPair());
     return static_cast<RegisterPair>(id_ -
         (kNumberOfCpuRegIds + kNumberOfXmmRegIds + kNumberOfX87RegIds));
   }
 
-  bool IsCpuRegister() const {
+  constexpr bool IsCpuRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfCpuRegIds);
   }
 
-  bool IsXmmRegister() const {
+  constexpr bool IsXmmRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfCpuRegIds;
     return (0 <= test) && (test < kNumberOfXmmRegIds);
   }
 
-  bool IsX87Register() const {
+  constexpr bool IsX87Register() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCpuRegIds + kNumberOfXmmRegIds);
     return (0 <= test) && (test < kNumberOfX87RegIds);
   }
 
-  bool IsRegisterPair() const {
+  constexpr bool IsRegisterPair() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ -
         (kNumberOfCpuRegIds + kNumberOfXmmRegIds + kNumberOfX87RegIds);
@@ -160,33 +160,33 @@
   // then false is returned.
   bool Overlaps(const X86ManagedRegister& other) const;
 
-  static X86ManagedRegister FromCpuRegister(Register r) {
+  static constexpr X86ManagedRegister FromCpuRegister(Register r) {
     CHECK_NE(r, kNoRegister);
     return FromRegId(r);
   }
 
-  static X86ManagedRegister FromXmmRegister(XmmRegister r) {
+  static constexpr X86ManagedRegister FromXmmRegister(XmmRegister r) {
     CHECK_NE(r, kNoXmmRegister);
     return FromRegId(r + kNumberOfCpuRegIds);
   }
 
-  static X86ManagedRegister FromX87Register(X87Register r) {
+  static constexpr X86ManagedRegister FromX87Register(X87Register r) {
     CHECK_NE(r, kNoX87Register);
     return FromRegId(r + kNumberOfCpuRegIds + kNumberOfXmmRegIds);
   }
 
-  static X86ManagedRegister FromRegisterPair(RegisterPair r) {
+  static constexpr X86ManagedRegister FromRegisterPair(RegisterPair r) {
     CHECK_NE(r, kNoRegisterPair);
     return FromRegId(r + (kNumberOfCpuRegIds + kNumberOfXmmRegIds +
                           kNumberOfX87RegIds));
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
-  int RegId() const {
+  constexpr int RegId() const {
     CHECK(!IsNoRegister());
     return id_;
   }
@@ -202,9 +202,9 @@
 
   friend class ManagedRegister;
 
-  explicit X86ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr X86ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static X86ManagedRegister FromRegId(int reg_id) {
+  static constexpr X86ManagedRegister FromRegId(int reg_id) {
     X86ManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -215,7 +215,7 @@
 
 }  // namespace x86
 
-inline x86::X86ManagedRegister ManagedRegister::AsX86() const {
+constexpr inline x86::X86ManagedRegister ManagedRegister::AsX86() const {
   x86::X86ManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 32eb4a3..3046710 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1224,6 +1224,16 @@
 }
 
 
+void X86_64Assembler::cmpb(const Address& address, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int32());
+  EmitOptionalRex32(address);
+  EmitUint8(0x80);
+  EmitOperand(7, address);
+  EmitUint8(imm.value() & 0xFF);
+}
+
+
 void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK(imm.is_int32());
@@ -2638,15 +2648,16 @@
 
 constexpr size_t kFramePointerSize = 8;
 
-void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                                 const std::vector<ManagedRegister>& spill_regs,
+void X86_64Assembler::BuildFrame(size_t frame_size,
+                                 ManagedRegister method_reg,
+                                 ArrayRef<const ManagedRegister> spill_regs,
                                  const ManagedRegisterEntrySpills& entry_spills) {
   DCHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet.
   cfi_.SetCurrentCFAOffset(8);  // Return address on stack.
   CHECK_ALIGNED(frame_size, kStackAlignment);
   int gpr_count = 0;
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
-    x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
+    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
     if (spill.IsCpuRegister()) {
       pushq(spill.AsCpuRegister());
       gpr_count++;
@@ -2664,7 +2675,7 @@
   // spill xmms
   int64_t offset = rest_of_frame;
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
-    x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
+    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
     if (spill.IsXmmRegister()) {
       offset -= sizeof(double);
       movsd(Address(CpuRegister(RSP), offset), spill.AsXmmRegister());
@@ -2697,15 +2708,14 @@
   }
 }
 
-void X86_64Assembler::RemoveFrame(size_t frame_size,
-                            const std::vector<ManagedRegister>& spill_regs) {
+void X86_64Assembler::RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> spill_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
   cfi_.RememberState();
   int gpr_count = 0;
   // unspill xmms
   int64_t offset = static_cast<int64_t>(frame_size) - (spill_regs.size() * kFramePointerSize) - 2 * kFramePointerSize;
   for (size_t i = 0; i < spill_regs.size(); ++i) {
-    x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
+    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
     if (spill.IsXmmRegister()) {
       offset += sizeof(double);
       movsd(spill.AsXmmRegister(), Address(CpuRegister(RSP), offset));
@@ -2718,7 +2728,7 @@
   addq(CpuRegister(RSP), Immediate(adjust));
   cfi_.AdjustCFAOffset(-adjust);
   for (size_t i = 0; i < spill_regs.size(); ++i) {
-    x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
+    x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
     if (spill.IsCpuRegister()) {
       popq(spill.AsCpuRegister());
       cfi_.AdjustCFAOffset(-static_cast<int>(kFramePointerSize));
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 92c7d0a..361f73c 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -506,6 +506,7 @@
   void xchgq(CpuRegister dst, CpuRegister src);
   void xchgl(CpuRegister reg, const Address& address);
 
+  void cmpb(const Address& address, const Immediate& imm);
   void cmpw(const Address& address, const Immediate& imm);
 
   void cmpl(CpuRegister reg, const Immediate& imm);
@@ -703,12 +704,13 @@
   //
 
   // Emit code that will create an activation on the stack
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                  const std::vector<ManagedRegister>& callee_save_regs,
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs,
                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
 
   // Emit code that will remove an activation from the stack
-  void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
+  void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs)
       OVERRIDE;
 
   void IncreaseFrameSize(size_t adjust) OVERRIDE;
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index afe9207..788c725 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1498,9 +1498,11 @@
   // TODO: more interesting spill registers / entry spills.
 
   // Two random spill regs.
-  std::vector<ManagedRegister> spill_regs;
-  spill_regs.push_back(ManagedFromCpu(x86_64::R10));
-  spill_regs.push_back(ManagedFromCpu(x86_64::RSI));
+  const ManagedRegister raw_spill_regs[] = {
+      ManagedFromCpu(x86_64::R10),
+      ManagedFromCpu(x86_64::RSI)
+  };
+  ArrayRef<const ManagedRegister> spill_regs(raw_spill_regs);
 
   // Three random entry spills.
   ManagedRegisterEntrySpills entry_spills;
@@ -1543,9 +1545,11 @@
   // TODO: more interesting spill registers / entry spills.
 
   // Two random spill regs.
-  std::vector<ManagedRegister> spill_regs;
-  spill_regs.push_back(ManagedFromCpu(x86_64::R10));
-  spill_regs.push_back(ManagedFromCpu(x86_64::RSI));
+  const ManagedRegister raw_spill_regs[] = {
+      ManagedFromCpu(x86_64::R10),
+      ManagedFromCpu(x86_64::RSI)
+  };
+  ArrayRef<const ManagedRegister> spill_regs(raw_spill_regs);
 
   size_t frame_size = 10 * kStackAlignment;
   assembler->RemoveFrame(10 * kStackAlignment, spill_regs);
@@ -1637,4 +1641,11 @@
   DriverStr(expected, "Repecmpsq");
 }
 
+TEST_F(AssemblerX86_64Test, Cmpb) {
+  GetAssembler()->cmpb(x86_64::Address(x86_64::CpuRegister(x86_64::RDI), 128),
+                       x86_64::Immediate(0));
+  const char* expected = "cmpb $0, 128(%RDI)\n";
+  DriverStr(expected, "cmpb");
+}
+
 }  // namespace art
diff --git a/compiler/utils/x86_64/constants_x86_64.h b/compiler/utils/x86_64/constants_x86_64.h
index 0c782d4..37db6b1 100644
--- a/compiler/utils/x86_64/constants_x86_64.h
+++ b/compiler/utils/x86_64/constants_x86_64.h
@@ -29,15 +29,15 @@
 
 class CpuRegister {
  public:
-  explicit CpuRegister(Register r) : reg_(r) {}
-  explicit CpuRegister(int r) : reg_(Register(r)) {}
-  Register AsRegister() const {
+  explicit constexpr CpuRegister(Register r) : reg_(r) {}
+  explicit constexpr CpuRegister(int r) : reg_(Register(r)) {}
+  constexpr Register AsRegister() const {
     return reg_;
   }
-  uint8_t LowBits() const {
+  constexpr uint8_t LowBits() const {
     return reg_ & 7;
   }
-  bool NeedsRex() const {
+  constexpr bool NeedsRex() const {
     return reg_ > 7;
   }
  private:
@@ -47,15 +47,15 @@
 
 class XmmRegister {
  public:
-  explicit XmmRegister(FloatRegister r) : reg_(r) {}
-  explicit XmmRegister(int r) : reg_(FloatRegister(r)) {}
-  FloatRegister AsFloatRegister() const {
+  explicit constexpr XmmRegister(FloatRegister r) : reg_(r) {}
+  explicit constexpr XmmRegister(int r) : reg_(FloatRegister(r)) {}
+  constexpr FloatRegister AsFloatRegister() const {
     return reg_;
   }
-  uint8_t LowBits() const {
+  constexpr uint8_t LowBits() const {
     return reg_ & 7;
   }
-  bool NeedsRex() const {
+  constexpr bool NeedsRex() const {
     return reg_ > 7;
   }
  private:
diff --git a/compiler/utils/x86_64/managed_register_x86_64.h b/compiler/utils/x86_64/managed_register_x86_64.h
index c4228c1..32af672 100644
--- a/compiler/utils/x86_64/managed_register_x86_64.h
+++ b/compiler/utils/x86_64/managed_register_x86_64.h
@@ -88,52 +88,52 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class X86_64ManagedRegister : public ManagedRegister {
  public:
-  CpuRegister AsCpuRegister() const {
+  constexpr CpuRegister AsCpuRegister() const {
     CHECK(IsCpuRegister());
     return CpuRegister(static_cast<Register>(id_));
   }
 
-  XmmRegister AsXmmRegister() const {
+  constexpr XmmRegister AsXmmRegister() const {
     CHECK(IsXmmRegister());
     return XmmRegister(static_cast<FloatRegister>(id_ - kNumberOfCpuRegIds));
   }
 
-  X87Register AsX87Register() const {
+  constexpr X87Register AsX87Register() const {
     CHECK(IsX87Register());
     return static_cast<X87Register>(id_ -
                                     (kNumberOfCpuRegIds + kNumberOfXmmRegIds));
   }
 
-  CpuRegister AsRegisterPairLow() const {
+  constexpr CpuRegister AsRegisterPairLow() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdLow().
     return FromRegId(AllocIdLow()).AsCpuRegister();
   }
 
-  CpuRegister AsRegisterPairHigh() const {
+  constexpr CpuRegister AsRegisterPairHigh() const {
     CHECK(IsRegisterPair());
     // Appropriate mapping of register ids allows to use AllocIdHigh().
     return FromRegId(AllocIdHigh()).AsCpuRegister();
   }
 
-  bool IsCpuRegister() const {
+  constexpr bool IsCpuRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfCpuRegIds);
   }
 
-  bool IsXmmRegister() const {
+  constexpr bool IsXmmRegister() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - kNumberOfCpuRegIds;
     return (0 <= test) && (test < kNumberOfXmmRegIds);
   }
 
-  bool IsX87Register() const {
+  constexpr bool IsX87Register() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ - (kNumberOfCpuRegIds + kNumberOfXmmRegIds);
     return (0 <= test) && (test < kNumberOfX87RegIds);
   }
 
-  bool IsRegisterPair() const {
+  constexpr bool IsRegisterPair() const {
     CHECK(IsValidManagedRegister());
     const int test = id_ -
         (kNumberOfCpuRegIds + kNumberOfXmmRegIds + kNumberOfX87RegIds);
@@ -147,32 +147,32 @@
   // then false is returned.
   bool Overlaps(const X86_64ManagedRegister& other) const;
 
-  static X86_64ManagedRegister FromCpuRegister(Register r) {
+  static constexpr X86_64ManagedRegister FromCpuRegister(Register r) {
     CHECK_NE(r, kNoRegister);
     return FromRegId(r);
   }
 
-  static X86_64ManagedRegister FromXmmRegister(FloatRegister r) {
+  static constexpr X86_64ManagedRegister FromXmmRegister(FloatRegister r) {
     return FromRegId(r + kNumberOfCpuRegIds);
   }
 
-  static X86_64ManagedRegister FromX87Register(X87Register r) {
+  static constexpr X86_64ManagedRegister FromX87Register(X87Register r) {
     CHECK_NE(r, kNoX87Register);
     return FromRegId(r + kNumberOfCpuRegIds + kNumberOfXmmRegIds);
   }
 
-  static X86_64ManagedRegister FromRegisterPair(RegisterPair r) {
+  static constexpr X86_64ManagedRegister FromRegisterPair(RegisterPair r) {
     CHECK_NE(r, kNoRegisterPair);
     return FromRegId(r + (kNumberOfCpuRegIds + kNumberOfXmmRegIds +
                           kNumberOfX87RegIds));
   }
 
  private:
-  bool IsValidManagedRegister() const {
+  constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
   }
 
-  int RegId() const {
+  constexpr int RegId() const {
     CHECK(!IsNoRegister());
     return id_;
   }
@@ -188,9 +188,9 @@
 
   friend class ManagedRegister;
 
-  explicit X86_64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+  explicit constexpr X86_64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
 
-  static X86_64ManagedRegister FromRegId(int reg_id) {
+  static constexpr X86_64ManagedRegister FromRegId(int reg_id) {
     X86_64ManagedRegister reg(reg_id);
     CHECK(reg.IsValidManagedRegister());
     return reg;
@@ -201,7 +201,7 @@
 
 }  // namespace x86_64
 
-inline x86_64::X86_64ManagedRegister ManagedRegister::AsX86_64() const {
+constexpr inline x86_64::X86_64ManagedRegister ManagedRegister::AsX86_64() const {
   x86_64::X86_64ManagedRegister reg(id_);
   CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
   return reg;
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index be38336..cb274dc 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1312,7 +1312,7 @@
     if (IsBootImage() && image_filenames_.size() > 1) {
       // If we're compiling the boot image, store the boot classpath into the Key-Value store.
       // We need this for the multi-image case.
-      key_value_store_->Put(OatHeader::kBootClassPath, GetMultiImageBootClassPath());
+      key_value_store_->Put(OatHeader::kBootClassPathKey, GetMultiImageBootClassPath());
     }
 
     if (!IsBootImage()) {
@@ -1348,12 +1348,22 @@
       // Open dex files for class path.
       const std::vector<std::string> class_path_locations =
           GetClassPathLocations(runtime_->GetClassPathString());
-      OpenClassPathFiles(class_path_locations, &class_path_files_);
+      OpenClassPathFiles(class_path_locations,
+                         &class_path_files_,
+                         &opened_oat_files_,
+                         runtime_->GetInstructionSet());
 
       // Store the classpath we have right now.
       std::vector<const DexFile*> class_path_files = MakeNonOwningPointerVector(class_path_files_);
-      key_value_store_->Put(OatHeader::kClassPathKey,
-                            OatFile::EncodeDexFileDependencies(class_path_files));
+      std::string encoded_class_path;
+      if (class_path_locations.size() == 1 &&
+          class_path_locations[0] == OatFile::kSpecialSharedLibrary) {
+        // When passing the special shared library as the classpath, it is the only path.
+        encoded_class_path = OatFile::kSpecialSharedLibrary;
+      } else {
+        encoded_class_path = OatFile::EncodeDexFileDependencies(class_path_files);
+      }
+      key_value_store_->Put(OatHeader::kClassPathKey, encoded_class_path);
     }
 
     // Now that we have finalized key_value_store_, start writing the oat file.
@@ -1964,14 +1974,37 @@
     return parsed;
   }
 
-  // Opens requested class path files and appends them to opened_dex_files.
+  // Opens requested class path files and appends them to opened_dex_files. If the dex files have
+  // been stripped, this opens them from their oat files and appends them to opened_oat_files.
   static void OpenClassPathFiles(const std::vector<std::string>& class_path_locations,
-                                 std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
-    DCHECK(opened_dex_files != nullptr) << "OpenClassPathFiles out-param is nullptr";
+                                 std::vector<std::unique_ptr<const DexFile>>* opened_dex_files,
+                                 std::vector<std::unique_ptr<OatFile>>* opened_oat_files,
+                                 InstructionSet isa) {
+    DCHECK(opened_dex_files != nullptr) << "OpenClassPathFiles dex out-param is nullptr";
+    DCHECK(opened_oat_files != nullptr) << "OpenClassPathFiles oat out-param is nullptr";
     for (const std::string& location : class_path_locations) {
+      // Stop early if we detect the special shared library, which may be passed as the classpath
+      // for dex2oat when we want to skip the shared libraries check.
+      if (location == OatFile::kSpecialSharedLibrary) {
+        break;
+      }
       std::string error_msg;
       if (!DexFile::Open(location.c_str(), location.c_str(), &error_msg, opened_dex_files)) {
-        LOG(WARNING) << "Failed to open dex file '" << location << "': " << error_msg;
+        // If we fail to open the dex file because it's been stripped, try to open the dex file
+        // from its corresponding oat file.
+        OatFileAssistant oat_file_assistant(location.c_str(), isa, false, false);
+        std::unique_ptr<OatFile> oat_file(oat_file_assistant.GetBestOatFile());
+        if (oat_file == nullptr) {
+          LOG(WARNING) << "Failed to open dex file and associated oat file for '" << location
+                       << "': " << error_msg;
+        } else {
+          std::vector<std::unique_ptr<const DexFile>> oat_dex_files =
+              oat_file_assistant.LoadDexFiles(*oat_file, location.c_str());
+          opened_oat_files->push_back(std::move(oat_file));
+          opened_dex_files->insert(opened_dex_files->end(),
+                                   std::make_move_iterator(oat_dex_files.begin()),
+                                   std::make_move_iterator(oat_dex_files.end()));
+        }
       }
     }
   }
@@ -2441,6 +2474,7 @@
   std::unique_ptr<CompilerDriver> driver_;
 
   std::vector<std::unique_ptr<MemMap>> opened_dex_files_maps_;
+  std::vector<std::unique_ptr<OatFile>> opened_oat_files_;
   std::vector<std::unique_ptr<const DexFile>> opened_dex_files_;
 
   std::vector<const DexFile*> no_inline_from_dex_files_;
diff --git a/disassembler/disassembler.h b/disassembler/disassembler.h
index b99e5c2..b080315 100644
--- a/disassembler/disassembler.h
+++ b/disassembler/disassembler.h
@@ -31,16 +31,23 @@
   // Should the disassembler print absolute or relative addresses.
   const bool absolute_addresses_;
 
-  // Base addess for calculating relative code offsets when absolute_addresses_ is false.
+  // Base address for calculating relative code offsets when absolute_addresses_ is false.
   const uint8_t* const base_address_;
 
+  // End address (exclusive);
+  const uint8_t* const end_address_;
+
   // If set, the disassembler is allowed to look at load targets in literal
   // pools.
   const bool can_read_literals_;
 
-  DisassemblerOptions(bool absolute_addresses, const uint8_t* base_address,
+  DisassemblerOptions(bool absolute_addresses,
+                      const uint8_t* base_address,
+                      const uint8_t* end_address,
                       bool can_read_literals)
-      : absolute_addresses_(absolute_addresses), base_address_(base_address),
+      : absolute_addresses_(absolute_addresses),
+        base_address_(base_address),
+        end_address_(end_address),
         can_read_literals_(can_read_literals) {}
 
  private:
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index bcb0438..286faf2 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -418,7 +418,12 @@
   return os << static_cast<int>(type);
 }
 
-void DumpThumb2Literal(std::ostream& args, const uint8_t* instr_ptr, uint32_t U, uint32_t imm32,
+void DumpThumb2Literal(std::ostream& args,
+                       const uint8_t* instr_ptr,
+                       const uintptr_t lo_adr,
+                       const uintptr_t hi_adr,
+                       uint32_t U,
+                       uint32_t imm32,
                        T2LitType type) {
   // Literal offsets (imm32) are not required to be aligned so we may need unaligned access.
   typedef const int16_t unaligned_int16_t __attribute__ ((aligned (1)));
@@ -428,8 +433,16 @@
   typedef const int64_t unaligned_int64_t __attribute__ ((aligned (1)));
   typedef const uint64_t unaligned_uint64_t __attribute__ ((aligned (1)));
 
+  // Get address of literal. Bail if not within expected buffer range to
+  // avoid trying to fetch invalid literals (we can encounter this when
+  // interpreting raw data as instructions).
   uintptr_t pc = RoundDown(reinterpret_cast<intptr_t>(instr_ptr) + 4, 4);
   uintptr_t lit_adr = U ? pc + imm32 : pc - imm32;
+  if (lit_adr < lo_adr || lit_adr >= hi_adr) {
+    args << "  ; (?)";
+    return;
+  }
+
   args << "  ; ";
   switch (type) {
     case kT2LitUByte:
@@ -482,6 +495,10 @@
     return DumpThumb16(os, instr_ptr);
   }
 
+  // Set valid address range of backing buffer.
+  const uintptr_t lo_adr = reinterpret_cast<intptr_t>(GetDisassemblerOptions()->base_address_);
+  const uintptr_t hi_adr = reinterpret_cast<intptr_t>(GetDisassemblerOptions()->end_address_);
+
   uint32_t op2 = (instr >> 20) & 0x7F;
   std::ostringstream opcode;
   std::ostringstream args;
@@ -824,7 +841,7 @@
                 args << d << ", [" << Rn << ", #" << ((U == 1) ? "" : "-")
                      << (imm8 << 2) << "]";
                 if (Rn.r == 15 && U == 1) {
-                  DumpThumb2Literal(args, instr_ptr, U, imm8 << 2, kT2LitHexLong);
+                  DumpThumb2Literal(args, instr_ptr, lo_adr, hi_adr, U, imm8 << 2, kT2LitHexLong);
                 }
               } else if (Rn.r == 13 && W == 1 && U == L) {  // VPUSH/VPOP
                 opcode << (L == 1 ? "vpop" : "vpush");
@@ -1410,7 +1427,7 @@
               };
               DCHECK_LT(op2 >> 1, arraysize(lit_type));
               DCHECK_NE(lit_type[op2 >> 1], kT2LitInvalid);
-              DumpThumb2Literal(args, instr_ptr, U, imm12, lit_type[op2 >> 1]);
+              DumpThumb2Literal(args, instr_ptr, lo_adr, hi_adr, U, imm12, lit_type[op2 >> 1]);
             }
           } else if ((instr & 0xFC0) == 0) {
             opcode << ldr_str << sign << type << ".w";
@@ -1711,10 +1728,13 @@
           break;
       }
     } else if (opcode1 == 0x12 || opcode1 == 0x13) {  // 01001x
+      const uintptr_t lo_adr = reinterpret_cast<intptr_t>(GetDisassemblerOptions()->base_address_);
+      const uintptr_t hi_adr = reinterpret_cast<intptr_t>(GetDisassemblerOptions()->end_address_);
       ThumbRegister Rt(instr, 8);
       uint16_t imm8 = instr & 0xFF;
       opcode << "ldr";
       args << Rt << ", [pc, #" << (imm8 << 2) << "]";
+      DumpThumb2Literal(args, instr_ptr, lo_adr, hi_adr, /*U*/ 1u, imm8 << 2, kT2LitHexWord);
     } else if ((opcode1 >= 0x14 && opcode1 <= 0x17) ||  // 0101xx
                (opcode1 >= 0x18 && opcode1 <= 0x1f) ||  // 011xxx
                (opcode1 >= 0x20 && opcode1 <= 0x27)) {  // 100xxx
diff --git a/disassembler/disassembler_arm64.cc b/disassembler/disassembler_arm64.cc
index 5f88714..6a9afe5 100644
--- a/disassembler/disassembler_arm64.cc
+++ b/disassembler/disassembler_arm64.cc
@@ -63,9 +63,17 @@
     return;
   }
 
+  // Get address of literal. Bail if not within expected buffer range to
+  // avoid trying to fetch invalid literals (we can encounter this when
+  // interpreting raw data as instructions).
   void* data_address = instr->LiteralAddress<void*>();
-  vixl::Instr op = instr->Mask(vixl::LoadLiteralMask);
+  if (data_address < base_address_ || data_address >= end_address_) {
+    AppendToOutput(" (?)");
+    return;
+  }
 
+  // Output information on literal.
+  vixl::Instr op = instr->Mask(vixl::LoadLiteralMask);
   switch (op) {
     case vixl::LDR_w_lit:
     case vixl::LDR_x_lit:
diff --git a/disassembler/disassembler_arm64.h b/disassembler/disassembler_arm64.h
index 44fa53f..a4e5ee8a 100644
--- a/disassembler/disassembler_arm64.h
+++ b/disassembler/disassembler_arm64.h
@@ -30,8 +30,11 @@
 
 class CustomDisassembler FINAL : public vixl::Disassembler {
  public:
-  explicit CustomDisassembler(DisassemblerOptions* options) :
-      vixl::Disassembler(), read_literals_(options->can_read_literals_) {
+  explicit CustomDisassembler(DisassemblerOptions* options)
+      : vixl::Disassembler(),
+        read_literals_(options->can_read_literals_),
+        base_address_(options->base_address_),
+        end_address_(options->end_address_) {
     if (!options->absolute_addresses_) {
       MapCodeAddress(0, reinterpret_cast<const vixl::Instruction*>(options->base_address_));
     }
@@ -55,6 +58,10 @@
   //           true | 0x72681558: 1c000acb  ldr s11, pc+344 (addr 0x726816b0)
   //          false | 0x72681558: 1c000acb  ldr s11, pc+344 (addr 0x726816b0) (3.40282e+38)
   const bool read_literals_;
+
+  // Valid address range: [base_address_, end_address_)
+  const void* const base_address_;
+  const void* const end_address_;
 };
 
 class DisassemblerArm64 FINAL : public Disassembler {
diff --git a/libart_fake/Android.mk b/libart_fake/Android.mk
new file mode 100644
index 0000000..ed868a5
--- /dev/null
+++ b/libart_fake/Android.mk
@@ -0,0 +1,34 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+LOCAL_PATH := $(my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := libart_fake
+LOCAL_INSTALLED_MODULE_STEM := libart.so
+LOCAL_SDK_VERSION := 9
+LOCAL_CPP_EXTENSION := .cc
+LOCAL_SRC_FILES := fake.cc
+LOCAL_SHARED_LIBRARIES := liblog
+
+ifdef TARGET_2ND_ARCH
+    LOCAL_MODULE_PATH_32 := $(TARGET_OUT)/fake-libs
+    LOCAL_MODULE_PATH_64 := $(TARGET_OUT)/fake-libs64
+else
+    LOCAL_MODULE_PATH := $(TARGET_OUT)/fake-libs
+endif
+
+include $(BUILD_SHARED_LIBRARY)
diff --git a/libart_fake/README.md b/libart_fake/README.md
new file mode 100644
index 0000000..6e3621e
--- /dev/null
+++ b/libart_fake/README.md
@@ -0,0 +1,5 @@
+libart_fake
+====
+
+A fake libart made to satisfy some misbehaving apps that will attempt to link
+against libart.so.
diff --git a/libart_fake/fake.cc b/libart_fake/fake.cc
new file mode 100644
index 0000000..8842421
--- /dev/null
+++ b/libart_fake/fake.cc
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "libart_fake"
+
+#include <android/log.h>
+
+#define LOGIT(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
+namespace art {
+class Dbg {
+ public:
+  void SuspendVM();
+  void ResumeVM();
+};
+
+class FaultManager {
+ public:
+  void EnsureArtActionInFrontOfSignalChain();
+};
+
+void Dbg::SuspendVM() {
+  LOGIT("Linking to and calling into libart.so internal functions is not supported. "
+        "This call to '%s' is being ignored.", __func__);
+}
+void Dbg::ResumeVM() {
+  LOGIT("Linking to and calling into libart.so internal functions is not supported. "
+        "This call to '%s' is being ignored.", __func__);
+}
+void FaultManager::EnsureArtActionInFrontOfSignalChain() {
+  LOGIT("Linking to and calling into libart.so internal functions is not supported. "
+        "This call to '%s' is being ignored.", __func__);
+}
+};  // namespace art
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index d2ab699..f5458c0 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -335,6 +335,7 @@
       disassembler_(Disassembler::Create(instruction_set_,
                                          new DisassemblerOptions(options_.absolute_addresses_,
                                                                  oat_file.Begin(),
+                                                                 oat_file.End(),
                                                                  true /* can_read_literals_ */))) {
     CHECK(options_.class_loader_ != nullptr);
     CHECK(options_.class_filter_ != nullptr);
diff --git a/profman/profman.cc b/profman/profman.cc
index 37a560d..4d9276f 100644
--- a/profman/profman.cc
+++ b/profman/profman.cc
@@ -189,7 +189,6 @@
       return -1;
     }
     std::string dump = info.DumpInfo(/*dex_files*/ nullptr);
-    info.Save(fd);
     std::cout << dump << "\n";
     return 0;
   }
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index f0e9ac5..4c68862 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -97,7 +97,8 @@
 
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
-  qpoints->pStringCompareTo = art_quick_string_compareto;
+  // The ARM StringCompareTo intrinsic does not call the runtime.
+  qpoints->pStringCompareTo = nullptr;
   qpoints->pMemcpy = memcpy;
 
   // Read barrier.
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 321b9d2..1bba4f9 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1679,145 +1679,6 @@
     pop {r4, r10-r11, pc}
 END art_quick_indexof
 
-   /*
-     * String's compareTo.
-     *
-     * Requires rARG0/rARG1 to have been previously checked for null.  Will
-     * return negative if this's string is < comp, 0 if they are the
-     * same and positive if >.
-     *
-     * On entry:
-     *    r0:   this object pointer
-     *    r1:   comp object pointer
-     *
-     */
-    .extern __memcmp16
-ENTRY art_quick_string_compareto
-    mov    r2, r0         @ this to r2, opening up r0 for return value
-    sub    r0, r2, r1     @ Same?
-    cbnz   r0,1f
-    bx     lr
-1:                        @ Same strings, return.
-
-    push {r4, r7-r12, lr} @ 8 words - keep alignment
-    .cfi_adjust_cfa_offset 32
-    .cfi_rel_offset r4, 0
-    .cfi_rel_offset r7, 4
-    .cfi_rel_offset r8, 8
-    .cfi_rel_offset r9, 12
-    .cfi_rel_offset r10, 16
-    .cfi_rel_offset r11, 20
-    .cfi_rel_offset r12, 24
-    .cfi_rel_offset lr, 28
-
-    ldr    r7, [r2, #MIRROR_STRING_COUNT_OFFSET]
-    ldr    r10, [r1, #MIRROR_STRING_COUNT_OFFSET]
-    add    r2, #MIRROR_STRING_VALUE_OFFSET
-    add    r1, #MIRROR_STRING_VALUE_OFFSET
-
-    /*
-     * At this point, we have:
-     *    value:  r2/r1
-     *    offset: r4/r9
-     *    count:  r7/r10
-     * We're going to compute
-     *    r11 <- countDiff
-     *    r10 <- minCount
-     */
-     subs  r11, r7, r10
-     it    ls
-     movls r10, r7
-
-     /*
-      * Note: data pointers point to previous element so we can use pre-index
-      * mode with base writeback.
-      */
-     subs  r2, #2   @ offset to contents[-1]
-     subs  r1, #2   @ offset to contents[-1]
-
-     /*
-      * At this point we have:
-      *   r2: *this string data
-      *   r1: *comp string data
-      *   r10: iteration count for comparison
-      *   r11: value to return if the first part of the string is equal
-      *   r0: reserved for result
-      *   r3, r4, r7, r8, r9, r12 available for loading string data
-      */
-
-    subs  r10, #2
-    blt   .Ldo_remainder2
-
-      /*
-       * Unroll the first two checks so we can quickly catch early mismatch
-       * on long strings (but preserve incoming alignment)
-       */
-
-    ldrh  r3, [r2, #2]!
-    ldrh  r4, [r1, #2]!
-    ldrh  r7, [r2, #2]!
-    ldrh  r8, [r1, #2]!
-    subs  r0, r3, r4
-    it    eq
-    subseq  r0, r7, r8
-    bne   .Ldone
-    cmp   r10, #28
-    bgt   .Ldo_memcmp16
-    subs  r10, #3
-    blt   .Ldo_remainder
-
-.Lloopback_triple:
-    ldrh  r3, [r2, #2]!
-    ldrh  r4, [r1, #2]!
-    ldrh  r7, [r2, #2]!
-    ldrh  r8, [r1, #2]!
-    ldrh  r9, [r2, #2]!
-    ldrh  r12,[r1, #2]!
-    subs  r0, r3, r4
-    it    eq
-    subseq  r0, r7, r8
-    it    eq
-    subseq  r0, r9, r12
-    bne   .Ldone
-    subs  r10, #3
-    bge   .Lloopback_triple
-
-.Ldo_remainder:
-    adds  r10, #3
-    beq   .Lreturn_diff
-
-.Lloopback_single:
-    ldrh  r3, [r2, #2]!
-    ldrh  r4, [r1, #2]!
-    subs  r0, r3, r4
-    bne   .Ldone
-    subs  r10, #1
-    bne   .Lloopback_single
-
-.Lreturn_diff:
-    mov   r0, r11
-    pop   {r4, r7-r12, pc}
-
-.Ldo_remainder2:
-    adds  r10, #2
-    bne   .Lloopback_single
-    mov   r0, r11
-    pop   {r4, r7-r12, pc}
-
-    /* Long string case */
-.Ldo_memcmp16:
-    mov   r7, r11
-    add   r0, r2, #2
-    add   r1, r1, #2
-    mov   r2, r10
-    bl    __memcmp16
-    cmp   r0, #0
-    it    eq
-    moveq r0, r7
-.Ldone:
-    pop   {r4, r7-r12, pc}
-END art_quick_string_compareto
-
     /* Assembly routines used to handle ABI differences. */
 
     /* double fmod(double a, double b) */
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 02629e8..a7d6d6f 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -1205,9 +1205,9 @@
 
 
 TEST_F(StubTest, StringCompareTo) {
-  // There is no StringCompareTo runtime entrypoint for __aarch64__.
-#if defined(__i386__) || defined(__arm__) || \
-    defined(__mips__) || (defined(__x86_64__) && !defined(__APPLE__))
+  // There is no StringCompareTo runtime entrypoint for __arm__ or __aarch64__.
+#if defined(__i386__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   // TODO: Check the "Unresolved" allocation stubs
 
   Thread* self = Thread::Current();
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 06156f5..1790df6 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -253,14 +253,17 @@
   Runtime* runtime = Runtime::Current();
   // Call the invoke stub, passing everything as arguments.
   // If the runtime is not yet started or it is required by the debugger, then perform the
-  // Invocation by the interpreter.
+  // Invocation by the interpreter, explicitly forcing interpretation over JIT to prevent
+  // cycling around the various JIT/Interpreter methods that handle method invocation.
   if (UNLIKELY(!runtime->IsStarted() || Dbg::IsForcedInterpreterNeededForCalling(self, this))) {
     if (IsStatic()) {
-      art::interpreter::EnterInterpreterFromInvoke(self, this, nullptr, args, result);
+      art::interpreter::EnterInterpreterFromInvoke(
+          self, this, nullptr, args, result, /*stay_in_interpreter*/ true);
     } else {
       mirror::Object* receiver =
           reinterpret_cast<StackReference<mirror::Object>*>(&args[0])->AsMirrorPtr();
-      art::interpreter::EnterInterpreterFromInvoke(self, this, receiver, args + 1, result);
+      art::interpreter::EnterInterpreterFromInvoke(
+          self, this, receiver, args + 1, result, /*stay_in_interpreter*/ true);
     }
   } else {
     DCHECK_EQ(runtime->GetClassLinker()->GetImagePointerSize(), sizeof(void*));
diff --git a/runtime/base/histogram.h b/runtime/base/histogram.h
index bcb7b3b..0e3bc8e 100644
--- a/runtime/base/histogram.h
+++ b/runtime/base/histogram.h
@@ -85,6 +85,10 @@
     return max_value_added_;
   }
 
+  Value BucketWidth() const {
+    return bucket_width_;
+  }
+
   const std::string& Name() const {
     return name_;
   }
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index 3b5b8b5..e9e97b8 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -198,14 +198,14 @@
 // types of LHS and RHS.
 template <typename LHS, typename RHS>
 struct EagerEvaluator {
-  EagerEvaluator(LHS l, RHS r) : lhs(l), rhs(r) { }
+  constexpr EagerEvaluator(LHS l, RHS r) : lhs(l), rhs(r) { }
   LHS lhs;
   RHS rhs;
 };
 
 // Helper function for CHECK_xx.
 template <typename LHS, typename RHS>
-static inline EagerEvaluator<LHS, RHS> MakeEagerEvaluator(LHS lhs, RHS rhs) {
+static inline constexpr EagerEvaluator<LHS, RHS> MakeEagerEvaluator(LHS lhs, RHS rhs) {
   return EagerEvaluator<LHS, RHS>(lhs, rhs);
 }
 
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index beabce3..639f913 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -1176,14 +1176,16 @@
       return false;
     }
 
-    // Get the *correct* JNIEnv by going through our TLS pointer.
+    // Get the current thread's JNIEnv by going through our TLS pointer.
     JNIEnvExt* threadEnv = self->GetJniEnv();
 
     // Verify that the current thread is (a) attached and (b) associated with
     // this particular instance of JNIEnv.
     if (env != threadEnv) {
+      // Get the thread owning the JNIEnv that's being used.
+      Thread* envThread = reinterpret_cast<JNIEnvExt*>(env)->self;
       AbortF("thread %s using JNIEnv* from thread %s",
-             ToStr<Thread>(*self).c_str(), ToStr<Thread>(*self).c_str());
+             ToStr<Thread>(*self).c_str(), ToStr<Thread>(*envThread).c_str());
       return false;
     }
 
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 35c40cd..d03b57c 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -52,6 +52,7 @@
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/heap_bitmap-inl.h"
 #include "gc/heap.h"
+#include "gc/scoped_gc_critical_section.h"
 #include "gc/space/image_space.h"
 #include "handle_scope-inl.h"
 #include "image-inl.h"
@@ -1063,9 +1064,8 @@
   return true;
 }
 
-static bool IsBootClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
-                              mirror::ClassLoader* class_loader)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+bool ClassLinker::IsBootClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
+                                    mirror::ClassLoader* class_loader) {
   return class_loader == nullptr ||
       class_loader->GetClass() ==
           soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_BootClassLoader);
@@ -1106,7 +1106,7 @@
       soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements);
   CHECK(dex_path_list_field != nullptr);
   CHECK(dex_elements_field != nullptr);
-  while (!IsBootClassLoader(soa, class_loader)) {
+  while (!ClassLinker::IsBootClassLoader(soa, class_loader)) {
     if (class_loader->GetClass() !=
         soa.Decode<mirror::Class*>(WellKnownClasses::dalvik_system_PathClassLoader)) {
       *error_msg = StringPrintf("Unknown class loader type %s", PrettyTypeOf(class_loader).c_str());
@@ -5317,6 +5317,19 @@
   const DexFile::ClassDef& class_def = dex_file.GetClassDef(klass->GetDexClassDefIndex());
   uint16_t super_class_idx = class_def.superclass_idx_;
   if (super_class_idx != DexFile::kDexNoIndex16) {
+    // Check that a class does not inherit from itself directly.
+    //
+    // TODO: This is a cheap check to detect the straightforward case
+    // of a class extending itself (b/28685551), but we should do a
+    // proper cycle detection on loaded classes, to detect all cases
+    // of class circularity errors (b/28830038).
+    if (super_class_idx == class_def.class_idx_) {
+      ThrowClassCircularityError(klass.Get(),
+                                 "Class %s extends itself",
+                                 PrettyDescriptor(klass.Get()).c_str());
+      return false;
+    }
+
     mirror::Class* super_class = ResolveType(dex_file, super_class_idx, klass.Get());
     if (super_class == nullptr) {
       DCHECK(Thread::Current()->IsExceptionPending());
@@ -6969,7 +6982,13 @@
       }
     }
     // Put some random garbage in old methods to help find stale pointers.
-    if (methods != old_methods && old_methods != nullptr) {
+    if (methods != old_methods && old_methods != nullptr && kIsDebugBuild) {
+      // Need to make sure the GC is not running since it could be scanning the methods we are
+      // about to overwrite.
+      ScopedThreadStateChange tsc(self, kSuspended);
+      gc::ScopedGCCriticalSection gcs(self,
+                                      gc::kGcCauseClassLinker,
+                                      gc::kCollectorTypeClassLinker);
       memset(old_methods, 0xFEu, old_size);
     }
   } else {
@@ -7814,7 +7833,8 @@
   return descriptor;
 }
 
-jobject ClassLinker::CreatePathClassLoader(Thread* self, std::vector<const DexFile*>& dex_files) {
+jobject ClassLinker::CreatePathClassLoader(Thread* self,
+                                           const std::vector<const DexFile*>& dex_files) {
   // SOAAlreadyRunnable is protected, and we need something to add a global reference.
   // We could move the jobject to the callers, but all call-sites do this...
   ScopedObjectAccessUnchecked soa(self);
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index ece171c..f6ce545 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -255,7 +255,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Resolve a Type with the given index from the DexFile, storing the
-  // result in the DexCache. The referrer is used to identity the
+  // result in the DexCache. The referrer is used to identify the
   // target DexCache and ClassLoader to use for resolution.
   mirror::Class* ResolveType(const DexFile& dex_file, uint16_t type_idx, mirror::Class* referrer)
       SHARED_REQUIRES(Locks::mutator_lock_)
@@ -560,7 +560,7 @@
 
   // Creates a GlobalRef PathClassLoader that can be used to load classes from the given dex files.
   // Note: the objects are not completely set up. Do not use this outside of tests and the compiler.
-  jobject CreatePathClassLoader(Thread* self, std::vector<const DexFile*>& dex_files)
+  jobject CreatePathClassLoader(Thread* self, const std::vector<const DexFile*>& dex_files)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
@@ -611,6 +611,10 @@
       const std::set<DexCacheResolvedClasses>& classes)
       REQUIRES(!dex_lock_);
 
+  static bool IsBootClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
+                                mirror::ClassLoader* class_loader)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   ArtMethod* AddMethodToConflictTable(mirror::Class* klass,
                                       ArtMethod* conflict_method,
                                       ArtMethod* interface_method,
@@ -1020,7 +1024,7 @@
 
   // Returns null if not found.
   ClassTable* ClassTableForClassLoader(mirror::ClassLoader* class_loader)
-      SHARED_REQUIRES(Locks::mutator_lock_, Locks::classlinker_classes_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Insert a new class table if not found.
   ClassTable* InsertClassTableForClassLoader(mirror::ClassLoader* class_loader)
diff --git a/runtime/class_table.h b/runtime/class_table.h
index eb784b5..686381d 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -153,6 +153,10 @@
       REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  ReaderWriterMutex& GetLock() {
+    return lock_;
+  }
+
  private:
   // Lock to guard inserting and removing.
   mutable ReaderWriterMutex lock_;
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index b4208fe..75cce42 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -137,13 +137,21 @@
   ThrowException("Ljava/lang/ClassCircularityError;", c, msg.str().c_str());
 }
 
+void ThrowClassCircularityError(mirror::Class* c, const char* fmt, ...) {
+  va_list args;
+  va_start(args, fmt);
+  ThrowException("Ljava/lang/ClassCircularityError;", c, fmt, &args);
+  va_end(args);
+}
+
 // ClassFormatError
 
 void ThrowClassFormatError(mirror::Class* referrer, const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
   ThrowException("Ljava/lang/ClassFormatError;", referrer, fmt, &args);
-  va_end(args);}
+  va_end(args);
+}
 
 // IllegalAccessError
 
diff --git a/runtime/common_throws.h b/runtime/common_throws.h
index 39c4e52..c3a1f09 100644
--- a/runtime/common_throws.h
+++ b/runtime/common_throws.h
@@ -58,6 +58,9 @@
 void ThrowClassCircularityError(mirror::Class* c)
     SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
+void ThrowClassCircularityError(mirror::Class* c, const char* fmt, ...)
+    SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
+
 // ClassCastException
 
 void ThrowClassCastException(mirror::Class* dest_type, mirror::Class* src_type)
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 55f68d3..8005642 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -582,7 +582,7 @@
       if (Runtime::Current()->GetHeap()->IsInBootImageOatFile(code) &&
           !m.IsNative() &&
           !m.IsProxyMethod()) {
-        instrumentation_->UpdateMethodsCode(&m, GetQuickToInterpreterBridge());
+        instrumentation_->UpdateMethodsCodeFromDebugger(&m, GetQuickToInterpreterBridge());
       }
     }
     return true;
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 63f3f08..af12abf 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -518,7 +518,7 @@
     return (class_def_idx != DexFile::kDexNoIndex) ? &GetClassDef(class_def_idx) : nullptr;
   }
 
-  // Fast path for rate no class defs case.
+  // Fast path for rare no class defs case.
   const uint32_t num_class_defs = NumClassDefs();
   if (num_class_defs == 0) {
     return nullptr;
@@ -548,8 +548,8 @@
 }
 
 const DexFile::FieldId* DexFile::FindFieldId(const DexFile::TypeId& declaring_klass,
-                                              const DexFile::StringId& name,
-                                              const DexFile::TypeId& type) const {
+                                             const DexFile::StringId& name,
+                                             const DexFile::TypeId& type) const {
   // Binary search MethodIds knowing that they are sorted by class_idx, name_idx then proto_idx
   const uint16_t class_idx = GetIndexForTypeId(declaring_klass);
   const uint32_t name_idx = GetIndexForStringId(name);
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
index 26f5ad3..64fa434 100644
--- a/runtime/gc/collector/concurrent_copying-inl.h
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -28,6 +28,47 @@
 namespace gc {
 namespace collector {
 
+inline mirror::Object* ConcurrentCopying::MarkUnevacFromSpaceRegionOrImmuneSpace(
+    mirror::Object* ref, accounting::ContinuousSpaceBitmap* bitmap) {
+  // For the Baker-style RB, in a rare case, we could incorrectly change the object from white
+  // to gray even though the object has already been marked through. This happens if a mutator
+  // thread gets preempted before the AtomicSetReadBarrierPointer below, GC marks through the
+  // object (changes it from white to gray and back to white), and the thread runs and
+  // incorrectly changes it from white to gray. We need to detect such "false gray" cases and
+  // change the objects back to white at the end of marking.
+  if (kUseBakerReadBarrier) {
+    // Test the bitmap first to reduce the chance of false gray cases.
+    if (bitmap->Test(ref)) {
+      return ref;
+    }
+  }
+  // This may or may not succeed, which is ok because the object may already be gray.
+  bool cas_success = false;
+  if (kUseBakerReadBarrier) {
+    cas_success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(),
+                                                   ReadBarrier::GrayPtr());
+  }
+  if (bitmap->AtomicTestAndSet(ref)) {
+    // Already marked.
+    if (kUseBakerReadBarrier &&
+        cas_success &&
+        // The object could be white here if a thread gets preempted after a success at the
+        // above AtomicSetReadBarrierPointer, GC has marked through it, and the thread runs up
+        // to this point.
+        ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+      // Register a "false-gray" object to change it from gray to white at the end of marking.
+      PushOntoFalseGrayStack(ref);
+    }
+  } else {
+    // Newly marked.
+    if (kUseBakerReadBarrier) {
+      DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
+    }
+    PushOntoMarkStack(ref);
+  }
+  return ref;
+}
+
 inline mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) {
   if (from_ref == nullptr) {
     return nullptr;
@@ -68,21 +109,7 @@
       return to_ref;
     }
     case space::RegionSpace::RegionType::kRegionTypeUnevacFromSpace: {
-      // This may or may not succeed, which is ok.
-      if (kUseBakerReadBarrier) {
-        from_ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
-      }
-      mirror::Object* to_ref = from_ref;
-      if (region_space_bitmap_->AtomicTestAndSet(from_ref)) {
-        // Already marked.
-      } else {
-        // Newly marked.
-        if (kUseBakerReadBarrier) {
-          DCHECK_EQ(to_ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
-        }
-        PushOntoMarkStack(to_ref);
-      }
-      return to_ref;
+      return MarkUnevacFromSpaceRegionOrImmuneSpace(from_ref, region_space_bitmap_);
     }
     case space::RegionSpace::RegionType::kRegionTypeNone:
       return MarkNonMoving(from_ref);
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index d393f0b..3f8f628 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -165,6 +165,10 @@
               << reinterpret_cast<void*>(region_space_->Limit());
   }
   CheckEmptyMarkStack();
+  if (kIsDebugBuild) {
+    MutexLock mu(Thread::Current(), mark_stack_lock_);
+    CHECK(false_gray_stack_.empty());
+  }
   immune_spaces_.Reset();
   bytes_moved_.StoreRelaxed(0);
   objects_moved_.StoreRelaxed(0);
@@ -247,6 +251,9 @@
     }
     cc->is_marking_ = true;
     cc->mark_stack_mode_.StoreRelaxed(ConcurrentCopying::kMarkStackModeThreadLocal);
+    if (kIsDebugBuild) {
+      cc->region_space_->AssertAllRegionLiveBytesZeroOrCleared();
+    }
     if (UNLIKELY(Runtime::Current()->IsActiveTransaction())) {
       CHECK(Runtime::Current()->IsAotCompiler());
       TimingLogger::ScopedTiming split2("(Paused)VisitTransactionRoots", cc->GetTimings());
@@ -314,17 +321,7 @@
       DCHECK(collector_->heap_->GetMarkBitmap()->Test(obj))
           << "Immune space object must be already marked";
     }
-    // This may or may not succeed, which is ok.
-    if (kUseBakerReadBarrier) {
-      obj->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
-    }
-    if (cc_bitmap->AtomicTestAndSet(obj)) {
-      // Already marked. Do nothing.
-    } else {
-      // Newly marked. Set the gray bit and push it onto the mark stack.
-      CHECK(!kUseBakerReadBarrier || obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
-      collector_->PushOntoMarkStack(obj);
-    }
+    collector_->MarkUnevacFromSpaceRegionOrImmuneSpace(obj, cc_bitmap);
   }
 
  private:
@@ -459,6 +456,9 @@
     Runtime::Current()->GetClassLinker()->CleanupClassLoaders();
     // Marking is done. Disable marking.
     DisableMarking();
+    if (kUseBakerReadBarrier) {
+      ProcessFalseGrayStack();
+    }
     CheckEmptyMarkStack();
   }
 
@@ -548,6 +548,32 @@
   mark_stack_mode_.StoreSequentiallyConsistent(kMarkStackModeOff);
 }
 
+void ConcurrentCopying::PushOntoFalseGrayStack(mirror::Object* ref) {
+  CHECK(kUseBakerReadBarrier);
+  DCHECK(ref != nullptr);
+  MutexLock mu(Thread::Current(), mark_stack_lock_);
+  false_gray_stack_.push_back(ref);
+}
+
+void ConcurrentCopying::ProcessFalseGrayStack() {
+  CHECK(kUseBakerReadBarrier);
+  // Change the objects on the false gray stack from gray to white.
+  MutexLock mu(Thread::Current(), mark_stack_lock_);
+  for (mirror::Object* obj : false_gray_stack_) {
+    DCHECK(IsMarked(obj));
+    // The object could be white here if a thread got preempted after a success at the
+    // AtomicSetReadBarrierPointer in Mark(), GC started marking through it (but not finished so
+    // still gray), and the thread ran to register it onto the false gray stack.
+    if (obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+      bool success = obj->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
+                                                      ReadBarrier::WhitePtr());
+      DCHECK(success);
+    }
+  }
+  false_gray_stack_.clear();
+}
+
+
 void ConcurrentCopying::IssueEmptyCheckpoint() {
   Thread* self = Thread::Current();
   EmptyCheckpoint check_point(this);
@@ -655,8 +681,8 @@
   return heap_->live_stack_.get();
 }
 
-// The following visitors are that used to verify that there's no
-// references to the from-space left after marking.
+// The following visitors are used to verify that there's no references to the from-space left after
+// marking.
 class ConcurrentCopyingVerifyNoFromSpaceRefsVisitor : public SingleRootVisitor {
  public:
   explicit ConcurrentCopyingVerifyNoFromSpaceRefsVisitor(ConcurrentCopying* collector)
@@ -670,20 +696,9 @@
     }
     collector_->AssertToSpaceInvariant(nullptr, MemberOffset(0), ref);
     if (kUseBakerReadBarrier) {
-      if (collector_->RegionSpace()->IsInToSpace(ref)) {
-        CHECK(ref->GetReadBarrierPointer() == nullptr)
-            << "To-space ref " << ref << " " << PrettyTypeOf(ref)
-            << " has non-white rb_ptr " << ref->GetReadBarrierPointer();
-      } else {
-        CHECK(ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr() ||
-              (ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr() &&
-               collector_->IsOnAllocStack(ref)))
-            << "Non-moving/unevac from space ref " << ref << " " << PrettyTypeOf(ref)
-            << " has non-black rb_ptr " << ref->GetReadBarrierPointer()
-            << " but isn't on the alloc stack (and has white rb_ptr)."
-            << " Is it in the non-moving space="
-            << (collector_->GetHeap()->GetNonMovingSpace()->HasAddress(ref));
-      }
+      CHECK(ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr())
+          << "Ref " << ref << " " << PrettyTypeOf(ref)
+          << " has non-white rb_ptr " << ref->GetReadBarrierPointer();
     }
   }
 
@@ -749,18 +764,8 @@
     ConcurrentCopyingVerifyNoFromSpaceRefsFieldVisitor visitor(collector);
     obj->VisitReferences(visitor, visitor);
     if (kUseBakerReadBarrier) {
-      if (collector->RegionSpace()->IsInToSpace(obj)) {
-        CHECK(obj->GetReadBarrierPointer() == nullptr)
-            << "obj=" << obj << " non-white rb_ptr " << obj->GetReadBarrierPointer();
-      } else {
-        CHECK(obj->GetReadBarrierPointer() == ReadBarrier::BlackPtr() ||
-              (obj->GetReadBarrierPointer() == ReadBarrier::WhitePtr() &&
-               collector->IsOnAllocStack(obj)))
-            << "Non-moving space/unevac from space ref " << obj << " " << PrettyTypeOf(obj)
-            << " has non-black rb_ptr " << obj->GetReadBarrierPointer()
-            << " but isn't on the alloc stack (and has white rb_ptr). Is it in the non-moving space="
-            << (collector->GetHeap()->GetNonMovingSpace()->HasAddress(obj));
-      }
+      CHECK(obj->GetReadBarrierPointer() == ReadBarrier::WhitePtr())
+          << "obj=" << obj << " non-white rb_ptr " << obj->GetReadBarrierPointer();
     }
   }
 
@@ -1069,7 +1074,6 @@
   }
   // Scan ref fields.
   Scan(to_ref);
-  // Mark the gray ref as white or black.
   if (kUseBakerReadBarrier) {
     DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
         << " " << to_ref << " " << to_ref->GetReadBarrierPointer()
@@ -1079,41 +1083,34 @@
   if (UNLIKELY((to_ref->GetClass<kVerifyNone, kWithoutReadBarrier>()->IsTypeOfReferenceClass() &&
                 to_ref->AsReference()->GetReferent<kWithoutReadBarrier>() != nullptr &&
                 !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())))) {
-    // Leave this Reference gray in the queue so that GetReferent() will trigger a read barrier. We
-    // will change it to black or white later in ReferenceQueue::DequeuePendingReference().
+    // Leave this reference gray in the queue so that GetReferent() will trigger a read barrier. We
+    // will change it to white later in ReferenceQueue::DequeuePendingReference().
     DCHECK(to_ref->AsReference()->GetPendingNext() != nullptr) << "Left unenqueued ref gray " << to_ref;
   } else {
-    // We may occasionally leave a Reference black or white in the queue if its referent happens to
-    // be concurrently marked after the Scan() call above has enqueued the Reference, in which case
-    // the above IsInToSpace() evaluates to true and we change the color from gray to black or white
-    // here in this else block.
+    // We may occasionally leave a reference white in the queue if its referent happens to be
+    // concurrently marked after the Scan() call above has enqueued the Reference, in which case the
+    // above IsInToSpace() evaluates to true and we change the color from gray to white here in this
+    // else block.
     if (kUseBakerReadBarrier) {
-      if (region_space_->IsInToSpace(to_ref)) {
-        // If to-space, change from gray to white.
-        bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>(
-            ReadBarrier::GrayPtr(),
-            ReadBarrier::WhitePtr());
-        DCHECK(success) << "Must succeed as we won the race.";
-        DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
-      } else {
-        // If non-moving space/unevac from space, change from gray
-        // to black. We can't change gray to white because it's not
-        // safe to use CAS if two threads change values in opposite
-        // directions (A->B and B->A). So, we change it to black to
-        // indicate non-moving objects that have been marked
-        // through. Note we'd need to change from black to white
-        // later (concurrently).
-        bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>(
-            ReadBarrier::GrayPtr(),
-            ReadBarrier::BlackPtr());
-        DCHECK(success) << "Must succeed as we won the race.";
-        DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
-      }
+      bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>(
+          ReadBarrier::GrayPtr(),
+          ReadBarrier::WhitePtr());
+      DCHECK(success) << "Must succeed as we won the race.";
     }
   }
 #else
   DCHECK(!kUseBakerReadBarrier);
 #endif
+
+  if (region_space_->IsInUnevacFromSpace(to_ref)) {
+    // Add to the live bytes per unevacuated from space. Note this code is always run by the
+    // GC-running thread (no synchronization required).
+    DCHECK(region_space_bitmap_->Test(to_ref));
+    // Disable the read barrier in SizeOf for performance, which is safe.
+    size_t obj_size = to_ref->SizeOf<kDefaultVerifyFlags, kWithoutReadBarrier>();
+    size_t alloc_size = RoundUp(obj_size, space::RegionSpace::kAlignment);
+    region_space_->AddLiveBytes(to_ref, alloc_size);
+  }
   if (ReadBarrier::kEnableToSpaceInvariantChecks || kIsDebugBuild) {
     ConcurrentCopyingAssertToSpaceInvariantObjectVisitor visitor(this);
     visitor(to_ref);
@@ -1226,61 +1223,6 @@
   RecordFreeLOS(heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps));
 }
 
-class ConcurrentCopyingClearBlackPtrsVisitor {
- public:
-  explicit ConcurrentCopyingClearBlackPtrsVisitor(ConcurrentCopying* cc)
-      : collector_(cc) {}
-  void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_)
-      SHARED_REQUIRES(Locks::heap_bitmap_lock_) {
-    DCHECK(obj != nullptr);
-    DCHECK(collector_->heap_->GetMarkBitmap()->Test(obj)) << obj;
-    DCHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::BlackPtr()) << obj;
-    obj->AtomicSetReadBarrierPointer(ReadBarrier::BlackPtr(), ReadBarrier::WhitePtr());
-    DCHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << obj;
-  }
-
- private:
-  ConcurrentCopying* const collector_;
-};
-
-// Clear the black ptrs in non-moving objects back to white.
-void ConcurrentCopying::ClearBlackPtrs() {
-  CHECK(kUseBakerReadBarrier);
-  TimingLogger::ScopedTiming split("ClearBlackPtrs", GetTimings());
-  ConcurrentCopyingClearBlackPtrsVisitor visitor(this);
-  for (auto& space : heap_->GetContinuousSpaces()) {
-    if (space == region_space_) {
-      continue;
-    }
-    accounting::ContinuousSpaceBitmap* mark_bitmap = space->GetMarkBitmap();
-    if (kVerboseMode) {
-      LOG(INFO) << "ClearBlackPtrs: " << *space << " bitmap: " << *mark_bitmap;
-    }
-    mark_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
-                                  reinterpret_cast<uintptr_t>(space->Limit()),
-                                  visitor);
-  }
-  space::LargeObjectSpace* large_object_space = heap_->GetLargeObjectsSpace();
-  large_object_space->GetMarkBitmap()->VisitMarkedRange(
-      reinterpret_cast<uintptr_t>(large_object_space->Begin()),
-      reinterpret_cast<uintptr_t>(large_object_space->End()),
-      visitor);
-  // Objects on the allocation stack?
-  if (ReadBarrier::kEnableReadBarrierInvariantChecks || kIsDebugBuild) {
-    size_t count = GetAllocationStack()->Size();
-    auto* it = GetAllocationStack()->Begin();
-    auto* end = GetAllocationStack()->End();
-    for (size_t i = 0; i < count; ++i, ++it) {
-      CHECK_LT(it, end);
-      mirror::Object* obj = it->AsMirrorPtr();
-      if (obj != nullptr) {
-        // Must have been cleared above.
-        CHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << obj;
-      }
-    }
-  }
-}
-
 void ConcurrentCopying::ReclaimPhase() {
   TimingLogger::ScopedTiming split("ReclaimPhase", GetTimings());
   if (kVerboseMode) {
@@ -1338,20 +1280,12 @@
   }
 
   {
-    TimingLogger::ScopedTiming split3("ComputeUnevacFromSpaceLiveRatio", GetTimings());
-    ComputeUnevacFromSpaceLiveRatio();
-  }
-
-  {
     TimingLogger::ScopedTiming split4("ClearFromSpace", GetTimings());
     region_space_->ClearFromSpace();
   }
 
   {
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    if (kUseBakerReadBarrier) {
-      ClearBlackPtrs();
-    }
     Sweep(false);
     SwapBitmaps();
     heap_->UnBindBitmaps();
@@ -1373,39 +1307,6 @@
   }
 }
 
-class ConcurrentCopyingComputeUnevacFromSpaceLiveRatioVisitor {
- public:
-  explicit ConcurrentCopyingComputeUnevacFromSpaceLiveRatioVisitor(ConcurrentCopying* cc)
-      : collector_(cc) {}
-  void operator()(mirror::Object* ref) const SHARED_REQUIRES(Locks::mutator_lock_)
-      SHARED_REQUIRES(Locks::heap_bitmap_lock_) {
-    DCHECK(ref != nullptr);
-    DCHECK(collector_->region_space_bitmap_->Test(ref)) << ref;
-    DCHECK(collector_->region_space_->IsInUnevacFromSpace(ref)) << ref;
-    if (kUseBakerReadBarrier) {
-      DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::BlackPtr()) << ref;
-      // Clear the black ptr.
-      ref->AtomicSetReadBarrierPointer(ReadBarrier::BlackPtr(), ReadBarrier::WhitePtr());
-      DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << ref;
-    }
-    size_t obj_size = ref->SizeOf();
-    size_t alloc_size = RoundUp(obj_size, space::RegionSpace::kAlignment);
-    collector_->region_space_->AddLiveBytes(ref, alloc_size);
-  }
-
- private:
-  ConcurrentCopying* const collector_;
-};
-
-// Compute how much live objects are left in regions.
-void ConcurrentCopying::ComputeUnevacFromSpaceLiveRatio() {
-  region_space_->AssertAllRegionLiveBytesZeroOrCleared();
-  ConcurrentCopyingComputeUnevacFromSpaceLiveRatioVisitor visitor(this);
-  region_space_bitmap_->VisitMarkedRange(reinterpret_cast<uintptr_t>(region_space_->Begin()),
-                                         reinterpret_cast<uintptr_t>(region_space_->Limit()),
-                                         visitor);
-}
-
 // Assert the to-space invariant.
 void ConcurrentCopying::AssertToSpaceInvariant(mirror::Object* obj, MemberOffset offset,
                                                mirror::Object* ref) {
@@ -1999,19 +1900,7 @@
       DCHECK(heap_mark_bitmap_->GetContinuousSpaceBitmap(ref)->Test(ref))
           << "Immune space object must be already marked";
     }
-    // This may or may not succeed, which is ok.
-    if (kUseBakerReadBarrier) {
-      ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
-    }
-    if (cc_bitmap->AtomicTestAndSet(ref)) {
-      // Already marked.
-    } else {
-      // Newly marked.
-      if (kUseBakerReadBarrier) {
-        DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
-      }
-      PushOntoMarkStack(ref);
-    }
+    MarkUnevacFromSpaceRegionOrImmuneSpace(ref, cc_bitmap);
   } else {
     // Use the mark bitmap.
     accounting::ContinuousSpaceBitmap* mark_bitmap =
@@ -2024,13 +1913,13 @@
       // Already marked.
       if (kUseBakerReadBarrier) {
         DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
-               ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
+               ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
       }
     } else if (is_los && los_bitmap->Test(ref)) {
       // Already marked in LOS.
       if (kUseBakerReadBarrier) {
         DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
-               ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
+               ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
       }
     } else {
       // Not marked.
@@ -2046,15 +1935,34 @@
           DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
         }
       } else {
+        // For the baker-style RB, we need to handle 'false-gray' cases. See the
+        // kRegionTypeUnevacFromSpace-case comment in Mark().
+        if (kUseBakerReadBarrier) {
+          // Test the bitmap first to reduce the chance of false gray cases.
+          if ((!is_los && mark_bitmap->Test(ref)) ||
+              (is_los && los_bitmap->Test(ref))) {
+            return ref;
+          }
+        }
         // Not marked or on the allocation stack. Try to mark it.
         // This may or may not succeed, which is ok.
+        bool cas_success = false;
         if (kUseBakerReadBarrier) {
-          ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
+          cas_success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(),
+                                                         ReadBarrier::GrayPtr());
         }
         if (!is_los && mark_bitmap->AtomicTestAndSet(ref)) {
           // Already marked.
+          if (kUseBakerReadBarrier && cas_success &&
+              ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+            PushOntoFalseGrayStack(ref);
+          }
         } else if (is_los && los_bitmap->AtomicTestAndSet(ref)) {
           // Already marked in LOS.
+          if (kUseBakerReadBarrier && cas_success &&
+              ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+            PushOntoFalseGrayStack(ref);
+          }
         } else {
           // Newly marked.
           if (kUseBakerReadBarrier) {
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 76315fe..e9ff618 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -160,8 +160,6 @@
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_);
   void SweepLargeObjects(bool swap_bitmaps)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
-  void ClearBlackPtrs()
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
   void FillWithDummyObject(mirror::Object* dummy_obj, size_t byte_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::Object* AllocateInSkippedBlock(size_t alloc_size)
@@ -185,10 +183,19 @@
   void ExpandGcMarkStack() SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::Object* MarkNonMoving(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+  ALWAYS_INLINE mirror::Object* MarkUnevacFromSpaceRegionOrImmuneSpace(mirror::Object* from_ref,
+      accounting::SpaceBitmap<kObjectAlignment>* bitmap)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+  void PushOntoFalseGrayStack(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_);
+  void ProcessFalseGrayStack() SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_);
 
   space::RegionSpace* region_space_;      // The underlying region space.
   std::unique_ptr<Barrier> gc_barrier_;
   std::unique_ptr<accounting::ObjectStack> gc_mark_stack_;
+  std::vector<mirror::Object*> false_gray_stack_ GUARDED_BY(mark_stack_lock_);
   Mutex mark_stack_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::vector<accounting::ObjectStack*> revoked_mark_stacks_
       GUARDED_BY(mark_stack_lock_);
diff --git a/runtime/gc/collector_type.h b/runtime/gc/collector_type.h
index 4ffc8af..c602081 100644
--- a/runtime/gc/collector_type.h
+++ b/runtime/gc/collector_type.h
@@ -49,6 +49,8 @@
   // A homogeneous space compaction collector used in background transition
   // when both foreground and background collector are CMS.
   kCollectorTypeHomogeneousSpaceCompact,
+  // Class linker fake collector.
+  kCollectorTypeClassLinker,
 };
 std::ostream& operator<<(std::ostream& os, const CollectorType& collector_type);
 
diff --git a/runtime/gc/gc_cause.cc b/runtime/gc/gc_cause.cc
index 18e5703..ad9bb92 100644
--- a/runtime/gc/gc_cause.cc
+++ b/runtime/gc/gc_cause.cc
@@ -36,6 +36,7 @@
     case kGcCauseInstrumentation: return "Instrumentation";
     case kGcCauseAddRemoveAppImageSpace: return "AddRemoveAppImageSpace";
     case kGcCauseDebugger: return "Debugger";
+    case kGcCauseClassLinker: return "ClassLinker";
     default:
       LOG(FATAL) << "Unreachable";
       UNREACHABLE();
diff --git a/runtime/gc/gc_cause.h b/runtime/gc/gc_cause.h
index ad67eb7..797ec34 100644
--- a/runtime/gc/gc_cause.h
+++ b/runtime/gc/gc_cause.h
@@ -47,6 +47,8 @@
   kGcCauseDebugger,
   // GC triggered for background transition when both foreground and background collector are CMS.
   kGcCauseHomogeneousSpaceCompact,
+  // Class linker cause, used to guard filling art methods with special values.
+  kGcCauseClassLinker,
 };
 
 const char* PrettyCause(GcCause cause);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index df5aa0a..cdd5f2e 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -119,6 +119,8 @@
 // Dump the rosalloc stats on SIGQUIT.
 static constexpr bool kDumpRosAllocStatsOnSigQuit = false;
 
+static constexpr size_t kNativeAllocationHistogramBuckets = 16;
+
 static inline bool CareAboutPauseTimes() {
   return Runtime::Current()->InJankPerceptibleProcessState();
 }
@@ -186,6 +188,11 @@
       total_objects_freed_ever_(0),
       num_bytes_allocated_(0),
       native_bytes_allocated_(0),
+      native_histogram_lock_("Native allocation lock"),
+      native_allocation_histogram_("Native allocation sizes",
+                                   1U,
+                                   kNativeAllocationHistogramBuckets),
+      native_free_histogram_("Native free sizes", 1U, kNativeAllocationHistogramBuckets),
       num_bytes_freed_revoke_(0),
       verify_missing_card_marks_(false),
       verify_system_weaks_(false),
@@ -304,7 +311,7 @@
 
           const OatHeader& boot_oat_header = boot_oat_file->GetOatHeader();
           const char* boot_classpath =
-              boot_oat_header.GetStoreValueByKey(OatHeader::kBootClassPath);
+              boot_oat_header.GetStoreValueByKey(OatHeader::kBootClassPathKey);
           if (boot_classpath == nullptr) {
             continue;
           }
@@ -1185,6 +1192,20 @@
     rosalloc_space_->DumpStats(os);
   }
 
+  {
+    MutexLock mu(Thread::Current(), native_histogram_lock_);
+    if (native_allocation_histogram_.SampleSize() > 0u) {
+      os << "Histogram of native allocation ";
+      native_allocation_histogram_.DumpBins(os);
+      os << " bucket size " << native_allocation_histogram_.BucketWidth() << "\n";
+    }
+    if (native_free_histogram_.SampleSize() > 0u) {
+      os << "Histogram of native free ";
+      native_free_histogram_.DumpBins(os);
+      os << " bucket size " << native_free_histogram_.BucketWidth() << "\n";
+    }
+  }
+
   BaseMutex::DumpAll(os);
 }
 
@@ -3848,6 +3869,10 @@
 
 void Heap::RegisterNativeAllocation(JNIEnv* env, size_t bytes) {
   Thread* self = ThreadForEnv(env);
+  {
+    MutexLock mu(self, native_histogram_lock_);
+    native_allocation_histogram_.AddValue(bytes);
+  }
   if (native_need_to_run_finalization_) {
     RunFinalization(env, kNativeAllocationFinalizeTimeout);
     UpdateMaxNativeFootprint();
@@ -3892,6 +3917,10 @@
 
 void Heap::RegisterNativeFree(JNIEnv* env, size_t bytes) {
   size_t expected_size;
+  {
+    MutexLock mu(Thread::Current(), native_histogram_lock_);
+    native_free_histogram_.AddValue(bytes);
+  }
   do {
     expected_size = native_bytes_allocated_.LoadRelaxed();
     if (UNLIKELY(bytes > expected_size)) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index fada1a2..2a1a4a1 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -241,9 +241,9 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void RegisterNativeAllocation(JNIEnv* env, size_t bytes)
-      REQUIRES(!*gc_complete_lock_, !*pending_task_lock_);
+      REQUIRES(!*gc_complete_lock_, !*pending_task_lock_, !native_histogram_lock_);
   void RegisterNativeFree(JNIEnv* env, size_t bytes)
-      REQUIRES(!*gc_complete_lock_, !*pending_task_lock_);
+      REQUIRES(!*gc_complete_lock_, !*pending_task_lock_, !native_histogram_lock_);
 
   // Change the allocator, updates entrypoints.
   void ChangeAllocator(AllocatorType allocator)
@@ -532,7 +532,7 @@
   space::Space* FindSpaceFromObject(const mirror::Object*, bool fail_ok) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void DumpForSigQuit(std::ostream& os) REQUIRES(!*gc_complete_lock_);
+  void DumpForSigQuit(std::ostream& os) REQUIRES(!*gc_complete_lock_, !native_histogram_lock_);
 
   // Do a pending collector transition.
   void DoPendingCollectorTransition() REQUIRES(!*gc_complete_lock_);
@@ -654,7 +654,8 @@
   std::string SafePrettyTypeOf(mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS;
 
   // GC performance measuring
-  void DumpGcPerformanceInfo(std::ostream& os) REQUIRES(!*gc_complete_lock_);
+  void DumpGcPerformanceInfo(std::ostream& os)
+      REQUIRES(!*gc_complete_lock_, !native_histogram_lock_);
   void ResetGcPerformanceInfo() REQUIRES(!*gc_complete_lock_);
 
   // Thread pool.
@@ -1156,6 +1157,11 @@
   // Bytes which are allocated and managed by native code but still need to be accounted for.
   Atomic<size_t> native_bytes_allocated_;
 
+  // Native allocation stats.
+  Mutex native_histogram_lock_;
+  Histogram<uint64_t> native_allocation_histogram_;
+  Histogram<uint64_t> native_free_histogram_;
+
   // Number of bytes freed by thread local buffer revokes. This will
   // cancel out the ahead-of-time bulk counting of bytes allocated in
   // rosalloc thread-local buffers.  It is temporarily accumulated
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index 03ab9a1..6088a43 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -68,31 +68,19 @@
   Heap* heap = Runtime::Current()->GetHeap();
   if (kUseBakerOrBrooksReadBarrier && heap->CurrentCollectorType() == kCollectorTypeCC &&
       heap->ConcurrentCopyingCollector()->IsActive()) {
-    // Change the gray ptr we left in ConcurrentCopying::ProcessMarkStackRef() to black or white.
+    // Change the gray ptr we left in ConcurrentCopying::ProcessMarkStackRef() to white.
     // We check IsActive() above because we don't want to do this when the zygote compaction
     // collector (SemiSpace) is running.
     CHECK(ref != nullptr);
     collector::ConcurrentCopying* concurrent_copying = heap->ConcurrentCopyingCollector();
-    const bool is_moving = concurrent_copying->RegionSpace()->IsInToSpace(ref);
-    if (ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
-      if (is_moving) {
-        ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::WhitePtr());
-        CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
-      } else {
-        ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::BlackPtr());
-        CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::BlackPtr());
-      }
+    mirror::Object* rb_ptr = ref->GetReadBarrierPointer();
+    if (rb_ptr == ReadBarrier::GrayPtr()) {
+      ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::WhitePtr());
+      CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
     } else {
-      // In ConcurrentCopying::ProcessMarkStackRef() we may leave a black or white Reference in the
-      // queue and find it here, which is OK. Check that the color makes sense depending on whether
-      // the Reference is moving or not and that the referent has been marked.
-      if (is_moving) {
-        CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr())
-            << "ref=" << ref << " rb_ptr=" << ref->GetReadBarrierPointer();
-      } else {
-        CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::BlackPtr())
-            << "ref=" << ref << " rb_ptr=" << ref->GetReadBarrierPointer();
-      }
+      // In ConcurrentCopying::ProcessMarkStackRef() we may leave a white reference in the queue and
+      // find it here, which is OK.
+      CHECK_EQ(rb_ptr, ReadBarrier::WhitePtr()) << "ref=" << ref << " rb_ptr=" << rb_ptr;
       mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
       // The referent could be null if it's cleared by a mutator (Reference.clear()).
       if (referent != nullptr) {
diff --git a/runtime/gc/scoped_gc_critical_section.cc b/runtime/gc/scoped_gc_critical_section.cc
index e7786a1..b5eb979 100644
--- a/runtime/gc/scoped_gc_critical_section.cc
+++ b/runtime/gc/scoped_gc_critical_section.cc
@@ -38,4 +38,3 @@
 
 }  // namespace gc
 }  // namespace art
-
diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc
index 9a2d0c6..5d710bf 100644
--- a/runtime/gc/space/region_space.cc
+++ b/runtime/gc/space/region_space.cc
@@ -216,17 +216,6 @@
   evac_region_ = nullptr;
 }
 
-void RegionSpace::AssertAllRegionLiveBytesZeroOrCleared() {
-  if (kIsDebugBuild) {
-    MutexLock mu(Thread::Current(), region_lock_);
-    for (size_t i = 0; i < num_regions_; ++i) {
-      Region* r = &regions_[i];
-      size_t live_bytes = r->LiveBytes();
-      CHECK(live_bytes == 0U || live_bytes == static_cast<size_t>(-1)) << live_bytes;
-    }
-  }
-}
-
 void RegionSpace::LogFragmentationAllocFailure(std::ostream& os,
                                                size_t /* failed_alloc_bytes */) {
   size_t max_contiguous_allocation = 0;
diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h
index 14e8005..4e8dfe8 100644
--- a/runtime/gc/space/region_space.h
+++ b/runtime/gc/space/region_space.h
@@ -215,7 +215,16 @@
     reg->AddLiveBytes(alloc_size);
   }
 
-  void AssertAllRegionLiveBytesZeroOrCleared() REQUIRES(!region_lock_);
+  void AssertAllRegionLiveBytesZeroOrCleared() REQUIRES(!region_lock_) {
+    if (kIsDebugBuild) {
+      MutexLock mu(Thread::Current(), region_lock_);
+      for (size_t i = 0; i < num_regions_; ++i) {
+        Region* r = &regions_[i];
+        size_t live_bytes = r->LiveBytes();
+        CHECK(live_bytes == 0U || live_bytes == static_cast<size_t>(-1)) << live_bytes;
+      }
+    }
+  }
 
   void RecordAlloc(mirror::Object* ref) REQUIRES(!region_lock_);
   bool AllocNewTlab(Thread* self) REQUIRES(!region_lock_);
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 34bc458..61119f8 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -687,8 +687,7 @@
   }
 }
 
-void Instrumentation::UpdateMethodsCode(ArtMethod* method, const void* quick_code) {
-  DCHECK(method->GetDeclaringClass()->IsResolved());
+void Instrumentation::UpdateMethodsCodeImpl(ArtMethod* method, const void* quick_code) {
   const void* new_quick_code;
   if (LIKELY(!instrumentation_stubs_installed_)) {
     new_quick_code = quick_code;
@@ -710,6 +709,18 @@
   UpdateEntrypoints(method, new_quick_code);
 }
 
+void Instrumentation::UpdateMethodsCode(ArtMethod* method, const void* quick_code) {
+  DCHECK(method->GetDeclaringClass()->IsResolved());
+  UpdateMethodsCodeImpl(method, quick_code);
+}
+
+void Instrumentation::UpdateMethodsCodeFromDebugger(ArtMethod* method, const void* quick_code) {
+  // When debugger attaches, we may update the entry points of all methods of a class
+  // to the interpreter bridge. A method's declaring class might not be in resolved
+  // state yet in that case.
+  UpdateMethodsCodeImpl(method, quick_code);
+}
+
 bool Instrumentation::AddDeoptimizedMethod(ArtMethod* method) {
   if (IsDeoptimizedMethod(method)) {
     // Already in the map. Return.
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index a4c3d41..ce6ead4 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -227,6 +227,10 @@
   void UpdateMethodsCode(ArtMethod* method, const void* quick_code)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!deoptimized_methods_lock_);
 
+  // Update the code of a method respecting any installed stubs from debugger.
+  void UpdateMethodsCodeFromDebugger(ArtMethod* method, const void* quick_code)
+      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!deoptimized_methods_lock_);
+
   // Get the quick code for the given method. More efficient than asking the class linker as it
   // will short-cut to GetCode if instrumentation and static method resolution stubs aren't
   // installed.
@@ -493,6 +497,9 @@
       SHARED_REQUIRES(Locks::mutator_lock_, deoptimized_methods_lock_);
   bool IsDeoptimizedMethodsEmpty() const
       SHARED_REQUIRES(Locks::mutator_lock_, deoptimized_methods_lock_);
+  void UpdateMethodsCodeImpl(ArtMethod* method, const void* quick_code)
+      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!deoptimized_methods_lock_);
+
 
   // Have we hijacked ArtMethod::code_ so that it calls instrumentation/interpreter code?
   bool instrumentation_stubs_installed_;
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 6c630cc..8c42b3a 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -264,12 +264,12 @@
                                     ShadowFrame& shadow_frame, JValue result_register);
 #endif
 
-static JValue Execute(Thread* self, const DexFile::CodeItem* code_item, ShadowFrame& shadow_frame,
-                      JValue result_register)
-    SHARED_REQUIRES(Locks::mutator_lock_);
-
-static inline JValue Execute(Thread* self, const DexFile::CodeItem* code_item,
-                             ShadowFrame& shadow_frame, JValue result_register) {
+static inline JValue Execute(
+    Thread* self,
+    const DexFile::CodeItem* code_item,
+    ShadowFrame& shadow_frame,
+    JValue result_register,
+    bool stay_in_interpreter = false) SHARED_REQUIRES(Locks::mutator_lock_) {
   DCHECK(!shadow_frame.GetMethod()->IsAbstract());
   DCHECK(!shadow_frame.GetMethod()->IsNative());
   if (LIKELY(shadow_frame.GetDexPC() == 0)) {  // Entering the method, but not via deoptimization.
@@ -284,19 +284,21 @@
                                         method, 0);
     }
 
-    jit::Jit* jit = Runtime::Current()->GetJit();
-    if (jit != nullptr) {
-      jit->MethodEntered(self, shadow_frame.GetMethod());
-      if (jit->CanInvokeCompiledCode(method)) {
-        JValue result;
+    if (!stay_in_interpreter) {
+      jit::Jit* jit = Runtime::Current()->GetJit();
+      if (jit != nullptr) {
+        jit->MethodEntered(self, shadow_frame.GetMethod());
+        if (jit->CanInvokeCompiledCode(method)) {
+          JValue result;
 
-        // Pop the shadow frame before calling into compiled code.
-        self->PopShadowFrame();
-        ArtInterpreterToCompiledCodeBridge(self, nullptr, code_item, &shadow_frame, &result);
-        // Push the shadow frame back as the caller will expect it.
-        self->PushShadowFrame(&shadow_frame);
+          // Pop the shadow frame before calling into compiled code.
+          self->PopShadowFrame();
+          ArtInterpreterToCompiledCodeBridge(self, nullptr, code_item, &shadow_frame, &result);
+          // Push the shadow frame back as the caller will expect it.
+          self->PushShadowFrame(&shadow_frame);
 
-        return result;
+          return result;
+        }
       }
     }
   }
@@ -387,7 +389,8 @@
 }
 
 void EnterInterpreterFromInvoke(Thread* self, ArtMethod* method, Object* receiver,
-                                uint32_t* args, JValue* result) {
+                                uint32_t* args, JValue* result,
+                                bool stay_in_interpreter) {
   DCHECK_EQ(self, Thread::Current());
   bool implicit_check = !Runtime::Current()->ExplicitStackOverflowChecks();
   if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEndForInterpreter(implicit_check))) {
@@ -462,7 +465,7 @@
     }
   }
   if (LIKELY(!method->IsNative())) {
-    JValue r = Execute(self, code_item, *shadow_frame, JValue());
+    JValue r = Execute(self, code_item, *shadow_frame, JValue(), stay_in_interpreter);
     if (result != nullptr) {
       *result = r;
     }
@@ -481,6 +484,36 @@
   self->PopShadowFrame();
 }
 
+static bool IsStringInit(const Instruction* instr, ArtMethod* caller)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (instr->Opcode() == Instruction::INVOKE_DIRECT ||
+      instr->Opcode() == Instruction::INVOKE_DIRECT_RANGE) {
+    // Instead of calling ResolveMethod() which has suspend point and can trigger
+    // GC, look up the callee method symbolically.
+    uint16_t callee_method_idx = (instr->Opcode() == Instruction::INVOKE_DIRECT_RANGE) ?
+        instr->VRegB_3rc() : instr->VRegB_35c();
+    const DexFile* dex_file = caller->GetDexFile();
+    const DexFile::MethodId& method_id = dex_file->GetMethodId(callee_method_idx);
+    const char* class_name = dex_file->StringByTypeIdx(method_id.class_idx_);
+    const char* method_name = dex_file->GetMethodName(method_id);
+    // Compare method's class name and method name against string init.
+    // It's ok since it's not allowed to create your own java/lang/String.
+    // TODO: verify that assumption.
+    if ((strcmp(class_name, "Ljava/lang/String;") == 0) &&
+        (strcmp(method_name, "<init>") == 0)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+static int16_t GetReceiverRegisterForStringInit(const Instruction* instr) {
+  DCHECK(instr->Opcode() == Instruction::INVOKE_DIRECT_RANGE ||
+         instr->Opcode() == Instruction::INVOKE_DIRECT);
+  return (instr->Opcode() == Instruction::INVOKE_DIRECT_RANGE) ?
+      instr->VRegC_3rc() : instr->VRegC_35c();
+}
+
 void EnterInterpreterFromDeoptimize(Thread* self,
                                     ShadowFrame* shadow_frame,
                                     bool from_code,
@@ -516,22 +549,38 @@
       // TODO: should be tested more once b/17586779 is fixed.
       const Instruction* instr = Instruction::At(&code_item->insns_[dex_pc]);
       if (instr->IsInvoke()) {
+        if (IsStringInit(instr, shadow_frame->GetMethod())) {
+          uint16_t this_obj_vreg = GetReceiverRegisterForStringInit(instr);
+          // Move the StringFactory.newStringFromChars() result into the register representing
+          // "this object" when invoking the string constructor in the original dex instruction.
+          // Also move the result into all aliases.
+          DCHECK(value.GetL()->IsString());
+          SetStringInitValueToAllAliases(shadow_frame, this_obj_vreg, value);
+          // Calling string constructor in the original dex code doesn't generate a result value.
+          value.SetJ(0);
+        }
         new_dex_pc = dex_pc + instr->SizeInCodeUnits();
       } else if (instr->Opcode() == Instruction::NEW_INSTANCE) {
         // It's possible to deoptimize at a NEW_INSTANCE dex instruciton that's for a
         // java string, which is turned into a call into StringFactory.newEmptyString();
+        // Move the StringFactory.newEmptyString() result into the destination register.
+        DCHECK(value.GetL()->IsString());
+        shadow_frame->SetVRegReference(instr->VRegA_21c(), value.GetL());
+        // new-instance doesn't generate a result value.
+        value.SetJ(0);
+        // Skip the dex instruction since we essentially come back from an invocation.
+        new_dex_pc = dex_pc + instr->SizeInCodeUnits();
         if (kIsDebugBuild) {
           ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+          // This is a suspend point. But it's ok since value has been set into shadow_frame.
           mirror::Class* klass = class_linker->ResolveType(
               instr->VRegB_21c(), shadow_frame->GetMethod());
           DCHECK(klass->IsStringClass());
         }
-        // Skip the dex instruction since we essentially come back from an invocation.
-        new_dex_pc = dex_pc + instr->SizeInCodeUnits();
       } else {
-        DCHECK(false) << "Unexpected instruction opcode " << instr->Opcode()
-                      << " at dex_pc " << dex_pc
-                      << " of method: " << PrettyMethod(shadow_frame->GetMethod(), false);
+        CHECK(false) << "Unexpected instruction opcode " << instr->Opcode()
+                     << " at dex_pc " << dex_pc
+                     << " of method: " << PrettyMethod(shadow_frame->GetMethod(), false);
       }
     } else {
       // Nothing to do, the dex_pc is the one at which the code requested
diff --git a/runtime/interpreter/interpreter.h b/runtime/interpreter/interpreter.h
index 6353a9b..bf4bcff 100644
--- a/runtime/interpreter/interpreter.h
+++ b/runtime/interpreter/interpreter.h
@@ -33,8 +33,11 @@
 namespace interpreter {
 
 // Called by ArtMethod::Invoke, shadow frames arguments are taken from the args array.
+// The optional stay_in_interpreter parameter (false by default) can be used by clients to
+// explicitly force interpretation in the remaining path that implements method invocation.
 extern void EnterInterpreterFromInvoke(Thread* self, ArtMethod* method,
-                                       mirror::Object* receiver, uint32_t* args, JValue* result)
+                                       mirror::Object* receiver, uint32_t* args, JValue* result,
+                                       bool stay_in_interpreter = false)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
 // 'from_code' denotes whether the deoptimization was explicitly triggered by compiled code.
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 12d70c5..53d5e43 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -540,6 +540,30 @@
                  result, method->GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty());
 }
 
+void SetStringInitValueToAllAliases(ShadowFrame* shadow_frame,
+                                    uint16_t this_obj_vreg,
+                                    JValue result)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  Object* existing = shadow_frame->GetVRegReference(this_obj_vreg);
+  if (existing == nullptr) {
+    // If it's null, we come from compiled code that was deoptimized. Nothing to do,
+    // as the compiler verified there was no alias.
+    // Set the new string result of the StringFactory.
+    shadow_frame->SetVRegReference(this_obj_vreg, result.GetL());
+    return;
+  }
+  // Set the string init result into all aliases.
+  for (uint32_t i = 0, e = shadow_frame->NumberOfVRegs(); i < e; ++i) {
+    if (shadow_frame->GetVRegReference(i) == existing) {
+      DCHECK_EQ(shadow_frame->GetVRegReference(i),
+                reinterpret_cast<mirror::Object*>(shadow_frame->GetVReg(i)));
+      shadow_frame->SetVRegReference(i, result.GetL());
+      DCHECK_EQ(shadow_frame->GetVRegReference(i),
+                reinterpret_cast<mirror::Object*>(shadow_frame->GetVReg(i)));
+    }
+  }
+}
+
 template <bool is_range,
           bool do_assignability_check,
           size_t kVarArgMax>
@@ -739,24 +763,7 @@
   }
 
   if (string_init && !self->IsExceptionPending()) {
-    mirror::Object* existing = shadow_frame.GetVRegReference(string_init_vreg_this);
-    if (existing == nullptr) {
-      // If it's null, we come from compiled code that was deoptimized. Nothing to do,
-      // as the compiler verified there was no alias.
-      // Set the new string result of the StringFactory.
-      shadow_frame.SetVRegReference(string_init_vreg_this, result->GetL());
-    } else {
-      // Replace the fake string that was allocated with the StringFactory result.
-      for (uint32_t i = 0; i < shadow_frame.NumberOfVRegs(); ++i) {
-        if (shadow_frame.GetVRegReference(i) == existing) {
-          DCHECK_EQ(shadow_frame.GetVRegReference(i),
-                    reinterpret_cast<mirror::Object*>(shadow_frame.GetVReg(i)));
-          shadow_frame.SetVRegReference(i, result->GetL());
-          DCHECK_EQ(shadow_frame.GetVRegReference(i),
-                    reinterpret_cast<mirror::Object*>(shadow_frame.GetVReg(i)));
-        }
-      }
-    }
+    SetStringInitValueToAllAliases(&shadow_frame, string_init_vreg_this, *result);
   }
 
   return !self->IsExceptionPending();
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 69376fd..cc470f3 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -1021,6 +1021,12 @@
                                         ShadowFrame* shadow_frame,
                                         JValue* result);
 
+// Set string value created from StringFactory.newStringFromXXX() into all aliases of
+// StringFactory.newEmptyString().
+void SetStringInitValueToAllAliases(ShadowFrame* shadow_frame,
+                                    uint16_t this_obj_vreg,
+                                    JValue result);
+
 // Explicitly instantiate all DoInvoke functions.
 #define EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, _is_range, _do_check)                      \
   template SHARED_REQUIRES(Locks::mutator_lock_)                                     \
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index dcc6300..ae5a0f6 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -444,6 +444,13 @@
       return false;
     }
 
+    // Before allowing the jump, make sure the debugger is not active to avoid jumping from
+    // interpreter to OSR while e.g. single stepping. Note that we could selectively disable
+    // OSR when single stepping, but that's currently hard to know at this point.
+    if (Dbg::IsDebuggerActive()) {
+      return false;
+    }
+
     // We found a stack map, now fill the frame with dex register values from the interpreter's
     // shadow frame.
     DexRegisterMap vreg_map =
@@ -565,6 +572,7 @@
         VLOG(jit) << "Start profiling " << PrettyMethod(method_);
       }
     }
+    ProfileSaver::NotifyJitActivity();
   }
 
   void Finalize() OVERRIDE {
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index e8462a1..b0a786e 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -33,14 +33,15 @@
 // TODO: read the constants from ProfileOptions,
 // Add a random delay each time we go to sleep so that we don't hammer the CPU
 // with all profile savers running at the same time.
-static constexpr const uint64_t kRandomDelayMaxMs = 30 * 1000;  // 30 seconds
-static constexpr const uint64_t kMaxBackoffMs = 10 * 60 * 1000;  // 10 minutes
-static constexpr const uint64_t kSavePeriodMs = 20 * 1000;  // 20 seconds
+static constexpr const uint64_t kMinSavePeriodNs = MsToNs(20 * 1000);  // 20 seconds
 static constexpr const uint64_t kSaveResolvedClassesDelayMs = 2 * 1000;  // 2 seconds
-static constexpr const double kBackoffCoef = 2.0;
 
 static constexpr const uint32_t kMinimumNumberOfMethodsToSave = 10;
 static constexpr const uint32_t kMinimumNumberOfClassesToSave = 10;
+static constexpr const uint32_t kMinimumNumberOfNotificationBeforeWake =
+    kMinimumNumberOfMethodsToSave;
+static constexpr const uint32_t kMaximumNumberOfNotificationBeforeWake = 50;
+
 
 ProfileSaver* ProfileSaver::instance_ = nullptr;
 pthread_t ProfileSaver::profiler_pthread_ = 0U;
@@ -55,6 +56,8 @@
       shutting_down_(false),
       last_save_number_of_methods_(0),
       last_save_number_of_classes_(0),
+      last_time_ns_saver_woke_up_(0),
+      jit_activity_notifications_(0),
       wait_lock_("ProfileSaver wait lock"),
       period_condition_("ProfileSaver period condition", wait_lock_),
       total_bytes_written_(0),
@@ -65,7 +68,9 @@
       total_ms_of_sleep_(0),
       total_ns_of_work_(0),
       total_number_of_foreign_dex_marks_(0),
-      max_number_of_profile_entries_cached_(0) {
+      max_number_of_profile_entries_cached_(0),
+      total_number_of_hot_spikes_(0),
+      total_number_of_wake_ups_(0) {
   AddTrackedLocations(output_filename, app_data_dir, code_paths);
   if (!app_data_dir.empty()) {
     // The application directory is used to determine which dex files are owned by app.
@@ -83,55 +88,111 @@
 }
 
 void ProfileSaver::Run() {
-  srand(MicroTime() * getpid());
   Thread* self = Thread::Current();
 
-  uint64_t save_period_ms = kSavePeriodMs;
-  VLOG(profiler) << "Save profiling information every " << save_period_ms << " ms";
-  bool cache_resolved_classes = true;
+  // Fetch the resolved classes for the app images after sleeping for
+  // kSaveResolvedClassesDelayMs.
+  // TODO(calin) This only considers the case of the primary profile file.
+  // Anything that gets loaded in the same VM will not have their resolved
+  // classes save (unless they started before the initial saving was done).
+  {
+    MutexLock mu(self, wait_lock_);
+    constexpr uint64_t kSleepTime = kSaveResolvedClassesDelayMs;
+    const uint64_t end_time = NanoTime() + MsToNs(kSleepTime);
+    while (true) {
+      const uint64_t current_time = NanoTime();
+      if (current_time >= end_time) {
+        break;
+      }
+      period_condition_.TimedWait(self, NsToMs(end_time - current_time), 0);
+    }
+    total_ms_of_sleep_ += kSaveResolvedClassesDelayMs;
+  }
+  FetchAndCacheResolvedClasses();
+
+  // Loop for the profiled methods.
   while (!ShuttingDown(self)) {
-    uint64_t sleep_time_ms;
-    if (cache_resolved_classes) {
-      // Sleep less long for the first iteration since we want to record loaded classes shortly
-      // after app launch.
-      sleep_time_ms = kSaveResolvedClassesDelayMs;
-    } else {
-      const uint64_t random_sleep_delay_ms = rand() % kRandomDelayMaxMs;
-      sleep_time_ms = save_period_ms + random_sleep_delay_ms;
-    }
+    uint64_t sleep_start = NanoTime();
     {
-      MutexLock mu(self, wait_lock_);
-      period_condition_.TimedWait(self, sleep_time_ms, 0);
+      uint64_t sleep_time = 0;
+      {
+        MutexLock mu(self, wait_lock_);
+        period_condition_.Wait(self);
+        sleep_time = NanoTime() - last_time_ns_saver_woke_up_;
+      }
+      // Check if the thread was woken up for shutdown.
+      if (ShuttingDown(self)) {
+        break;
+      }
+      total_number_of_wake_ups_++;
+      // We might have been woken up by a huge number of notifications to guarantee saving.
+      // If we didn't meet the minimum saving period go back to sleep (only if missed by
+      // a reasonable margin).
+      while (kMinSavePeriodNs - sleep_time > (kMinSavePeriodNs / 10)) {
+        {
+          MutexLock mu(self, wait_lock_);
+          period_condition_.TimedWait(self, NsToMs(kMinSavePeriodNs - sleep_time), 0);
+          sleep_time = NanoTime() - last_time_ns_saver_woke_up_;
+        }
+        // Check if the thread was woken up for shutdown.
+        if (ShuttingDown(self)) {
+          break;
+        }
+        total_number_of_wake_ups_++;
+      }
     }
-    total_ms_of_sleep_ += sleep_time_ms;
+    total_ms_of_sleep_ += NsToMs(NanoTime() - sleep_start);
+
     if (ShuttingDown(self)) {
       break;
     }
 
-    uint64_t start = NanoTime();
-    if (cache_resolved_classes) {
-      // TODO(calin) This only considers the case of the primary profile file.
-      // Anything that gets loaded in the same VM will not have their resolved
-      // classes save (unless they started before the initial saving was done).
-      FetchAndCacheResolvedClasses();
-    } else {
-      bool profile_saved_to_disk = ProcessProfilingInfo();
-      if (profile_saved_to_disk) {
-        // Reset the period to the initial value as it's highly likely to JIT again.
-        save_period_ms = kSavePeriodMs;
-        VLOG(profiler) << "Profile saver: saved something, period reset to: " << save_period_ms;
-      } else {
-        // If we don't need to save now it is less likely that we will need to do
-        // so in the future. Increase the time between saves according to the
-        // kBackoffCoef, but make it no larger than kMaxBackoffMs.
-        save_period_ms = std::min(kMaxBackoffMs,
-                                  static_cast<uint64_t>(kBackoffCoef * save_period_ms));
-        VLOG(profiler) << "Profile saver: nothing to save, delaying period to: " << save_period_ms;
-      }
+    uint16_t new_methods = 0;
+    uint64_t start_work = NanoTime();
+    bool profile_saved_to_disk = ProcessProfilingInfo(&new_methods);
+    // Update the notification counter based on result. Note that there might be contention on this
+    // but we don't care about to be 100% precise.
+    if (!profile_saved_to_disk) {
+      // If we didn't save to disk it may be because we didn't have enough new methods.
+      // Set the jit activity notifications to new_methods so we can wake up earlier if needed.
+      jit_activity_notifications_ = new_methods;
     }
-    cache_resolved_classes = false;
+    total_ns_of_work_ += NanoTime() - start_work;
+  }
+}
 
-    total_ns_of_work_ += (NanoTime() - start);
+void ProfileSaver::NotifyJitActivity() {
+  MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+  if (instance_ == nullptr || instance_->shutting_down_) {
+    return;
+  }
+  instance_->NotifyJitActivityInternal();
+}
+
+void ProfileSaver::WakeUpSaver() {
+  jit_activity_notifications_ = 0;
+  last_time_ns_saver_woke_up_ = NanoTime();
+  period_condition_.Signal(Thread::Current());
+}
+
+void ProfileSaver::NotifyJitActivityInternal() {
+  // Unlikely to overflow but if it happens,
+  // we would have waken up the saver long before that.
+  jit_activity_notifications_++;
+  // Note that we are not as precise as we could be here but we don't want to wake the saver
+  // every time we see a hot method.
+  if (jit_activity_notifications_ > kMinimumNumberOfNotificationBeforeWake) {
+    MutexLock wait_mutex(Thread::Current(), wait_lock_);
+    if ((NanoTime() - last_time_ns_saver_woke_up_) > kMinSavePeriodNs) {
+      WakeUpSaver();
+    }
+  } else if (jit_activity_notifications_ > kMaximumNumberOfNotificationBeforeWake) {
+    // Make sure to wake up the saver if we see a spike in the number of notifications.
+    // This is a precaution to avoid "loosing" a big number of methods in case
+    // this is a spike with no jit after.
+    total_number_of_hot_spikes_++;
+    MutexLock wait_mutex(Thread::Current(), wait_lock_);
+    WakeUpSaver();
   }
 }
 
@@ -175,7 +236,7 @@
       total_number_of_profile_entries_cached);
 }
 
-bool ProfileSaver::ProcessProfilingInfo() {
+bool ProfileSaver::ProcessProfilingInfo(uint16_t* new_methods) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
   SafeMap<std::string, std::set<std::string>> tracked_locations;
   {
@@ -186,6 +247,8 @@
 
   bool profile_file_saved = false;
   uint64_t total_number_of_profile_entries_cached = 0;
+  *new_methods = 0;
+
   for (const auto& it : tracked_locations) {
     if (ShuttingDown(Thread::Current())) {
       return true;
@@ -216,6 +279,7 @@
       total_number_of_skipped_writes_++;
       continue;
     }
+    *new_methods = std::max(static_cast<uint16_t>(delta_number_of_methods), *new_methods);
     uint64_t bytes_written;
     // Force the save. In case the profile data is corrupted or the the profile
     // has the wrong version this will "fix" the file to the correct format.
@@ -490,17 +554,20 @@
   // frameworks/base/services/core/java/com/android/server/pm/PackageDexOptimizer.java)
   std::replace(dex_location_real_path_str.begin(), dex_location_real_path_str.end(), '/', '@');
   std::string flag_path = foreign_dex_profile_path + "/" + dex_location_real_path_str;
-  // No need to give any sort of access to flag_path. The system has enough permissions
-  // to test for its existence.
-  int fd = TEMP_FAILURE_RETRY(open(flag_path.c_str(), O_CREAT | O_EXCL, 0));
+  // We use O_RDONLY as the access mode because we must supply some access
+  // mode, and there is no access mode that means 'create but do not read' the
+  // file. We will not not actually read from the file.
+  int fd = TEMP_FAILURE_RETRY(open(flag_path.c_str(),
+        O_CREAT | O_RDONLY | O_EXCL | O_CLOEXEC | O_NOFOLLOW, 0));
   if (fd != -1) {
     if (close(fd) != 0) {
       PLOG(WARNING) << "Could not close file after flagging foreign dex use " << flag_path;
     }
     return true;
   } else {
-    if (errno != EEXIST) {
-      // Another app could have already created the file.
+    if (errno != EEXIST && errno != EACCES) {
+      // Another app could have already created the file, and selinux may not
+      // allow the read access to the file implied by the call to open.
       PLOG(WARNING) << "Could not create foreign dex use mark " << flag_path;
       return false;
     }
@@ -527,7 +594,9 @@
      << "ProfileSaver total_number_of_foreign_dex_marks="
      << total_number_of_foreign_dex_marks_ << '\n'
      << "ProfileSaver max_number_profile_entries_cached="
-    << max_number_of_profile_entries_cached_ << '\n';
+     << max_number_of_profile_entries_cached_ << '\n'
+     << "ProfileSaver total_number_of_hot_spikes=" << total_number_of_hot_spikes_ << '\n'
+     << "ProfileSaver total_number_of_wake_ups=" << total_number_of_wake_ups_ << '\n';
 }
 
 
@@ -541,7 +610,8 @@
   // but we only use this in testing when we now this won't happen.
   // Refactor the way we handle the instance so that we don't end up in this situation.
   if (saver != nullptr) {
-    saver->ProcessProfilingInfo();
+    uint16_t new_methods;
+    saver->ProcessProfilingInfo(&new_methods);
   }
 }
 
diff --git a/runtime/jit/profile_saver.h b/runtime/jit/profile_saver.h
index 4f3cdc2..c6da959 100644
--- a/runtime/jit/profile_saver.h
+++ b/runtime/jit/profile_saver.h
@@ -49,6 +49,11 @@
   // If the profile saver is running, dumps statistics to the `os`. Otherwise it does nothing.
   static void DumpInstanceInfo(std::ostream& os);
 
+  // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
+  static void NotifyJitActivity()
+      REQUIRES(!Locks::profiler_lock_, !wait_lock_)
+      NO_THREAD_SAFETY_ANALYSIS;
+
   // Just for testing purpose.
   static void ForceProcessProfiles();
   static bool HasSeenMethod(const std::string& profile,
@@ -71,10 +76,13 @@
   void Run() REQUIRES(!Locks::profiler_lock_, !wait_lock_);
   // Processes the existing profiling info from the jit code cache and returns
   // true if it needed to be saved to disk.
-  bool ProcessProfilingInfo()
+  bool ProcessProfilingInfo(uint16_t* new_methods)
     REQUIRES(!Locks::profiler_lock_)
     REQUIRES(!Locks::mutator_lock_);
 
+  void NotifyJitActivityInternal() REQUIRES(!wait_lock_);
+  void WakeUpSaver() REQUIRES(wait_lock_);
+
   // Returns true if the saver is shutting down (ProfileSaver::Stop() has been called).
   bool ShuttingDown(Thread* self) REQUIRES(!Locks::profiler_lock_);
 
@@ -121,6 +129,8 @@
   bool shutting_down_ GUARDED_BY(Locks::profiler_lock_);
   uint32_t last_save_number_of_methods_;
   uint32_t last_save_number_of_classes_;
+  uint64_t last_time_ns_saver_woke_up_ GUARDED_BY(wait_lock_);
+  uint32_t jit_activity_notifications_;
 
   // A local cache for the profile information. Maps each tracked file to its
   // profile information. The size of this cache is usually very small and tops
@@ -142,6 +152,8 @@
   uint64_t total_number_of_foreign_dex_marks_;
   // TODO(calin): replace with an actual size.
   uint64_t max_number_of_profile_entries_cached_;
+  uint64_t total_number_of_hot_spikes_;
+  uint64_t total_number_of_wake_ups_;
 
   DISALLOW_COPY_AND_ASSIGN(ProfileSaver);
 };
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 5b6ded1..8c20fa6 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -673,7 +673,7 @@
   // `This` and `klass` must be classes.
   Class* GetCommonSuperClass(Handle<Class> klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void SetSuperClass(Class *new_super_class) SHARED_REQUIRES(Locks::mutator_lock_) {
+  void SetSuperClass(Class* new_super_class) SHARED_REQUIRES(Locks::mutator_lock_) {
     // Super class is assigned once, except during class linker initialization.
     Class* old_super_class = GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, super_class_));
     DCHECK(old_super_class == nullptr || old_super_class == new_super_class);
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index f4bc222..71c866f 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -964,17 +964,13 @@
           if (!kUseReadBarrier) {
             DCHECK_EQ(new_lw.ReadBarrierState(), 0U);
             h_obj->SetLockWord(new_lw, true);
-            if (ATRACE_ENABLED()) {
-              ATRACE_END();
-            }
+            AtraceMonitorUnlock();
             // Success!
             return true;
           } else {
             // Use CAS to preserve the read barrier state.
             if (h_obj->CasLockWordWeakSequentiallyConsistent(lock_word, new_lw)) {
-              if (ATRACE_ENABLED()) {
-                ATRACE_END();
-              }
+              AtraceMonitorUnlock();
               // Success!
               return true;
             }
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 0abe39d..0126b4d 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -16,6 +16,8 @@
 
 #include "dalvik_system_DexFile.h"
 
+#include <sstream>
+
 #include "base/logging.h"
 #include "base/stl_util.h"
 #include "base/stringprintf.h"
@@ -27,6 +29,7 @@
 #include "mirror/class_loader.h"
 #include "mirror/object-inl.h"
 #include "mirror/string.h"
+#include "oat_file.h"
 #include "oat_file_assistant.h"
 #include "oat_file_manager.h"
 #include "os.h"
@@ -387,6 +390,61 @@
   return oat_file_assistant.GetDexOptNeeded(filter);
 }
 
+static jstring DexFile_getDexFileStatus(JNIEnv* env,
+                                        jclass,
+                                        jstring javaFilename,
+                                        jstring javaInstructionSet) {
+  ScopedUtfChars filename(env, javaFilename);
+  if (env->ExceptionCheck()) {
+    return nullptr;
+  }
+
+  ScopedUtfChars instruction_set(env, javaInstructionSet);
+  if (env->ExceptionCheck()) {
+    return nullptr;
+  }
+
+  const InstructionSet target_instruction_set = GetInstructionSetFromString(
+      instruction_set.c_str());
+  if (target_instruction_set == kNone) {
+    ScopedLocalRef<jclass> iae(env, env->FindClass("java/lang/IllegalArgumentException"));
+    std::string message(StringPrintf("Instruction set %s is invalid.", instruction_set.c_str()));
+    env->ThrowNew(iae.get(), message.c_str());
+    return nullptr;
+  }
+
+  OatFileAssistant oat_file_assistant(filename.c_str(), target_instruction_set,
+                                      false /* profile_changed */,
+                                      false /* load_executable */);
+
+  std::ostringstream status;
+  bool oat_file_exists = false;
+  bool odex_file_exists = false;
+  if (oat_file_assistant.OatFileExists()) {
+    oat_file_exists = true;
+    status << *oat_file_assistant.OatFileName() << " [compilation_filter=";
+    status << CompilerFilter::NameOfFilter(oat_file_assistant.OatFileCompilerFilter());
+    status << ", status=" << oat_file_assistant.OatFileStatus();
+  }
+
+  if (oat_file_assistant.OdexFileExists()) {
+    odex_file_exists = true;
+    if (oat_file_exists) {
+      status << "] ";
+    }
+    status << *oat_file_assistant.OdexFileName() << " [compilation_filter=";
+    status << CompilerFilter::NameOfFilter(oat_file_assistant.OdexFileCompilerFilter());
+    status << ", status=" << oat_file_assistant.OdexFileStatus();
+  }
+
+  if (!oat_file_exists && !odex_file_exists) {
+    status << "invalid[";
+  }
+
+  status << "]";
+  return env->NewStringUTF(status.str().c_str());
+}
+
 static jint DexFile_getDexOptNeeded(JNIEnv* env,
                                     jclass,
                                     jstring javaFilename,
@@ -481,6 +539,16 @@
   return env->NewStringUTF(new_filter_str.c_str());
 }
 
+static jboolean DexFile_isBackedByOatFile(JNIEnv* env, jclass, jobject cookie) {
+  const OatFile* oat_file = nullptr;
+  std::vector<const DexFile*> dex_files;
+  if (!ConvertJavaArrayToDexFiles(env, cookie, /*out */ dex_files, /* out */ oat_file)) {
+    DCHECK(env->ExceptionCheck());
+    return false;
+  }
+  return oat_file != nullptr;
+}
+
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(DexFile, closeDexFile, "(Ljava/lang/Object;)Z"),
   NATIVE_METHOD(DexFile,
@@ -506,6 +574,9 @@
   NATIVE_METHOD(DexFile,
                 getNonProfileGuidedCompilerFilter,
                 "(Ljava/lang/String;)Ljava/lang/String;"),
+  NATIVE_METHOD(DexFile, isBackedByOatFile, "(Ljava/lang/Object;)Z"),
+  NATIVE_METHOD(DexFile, getDexFileStatus,
+                "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;")
 };
 
 void register_dalvik_system_DexFile(JNIEnv* env) {
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 5ba8df7..6c943dc 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -145,6 +145,10 @@
   return Dbg::IsDebuggerActive();
 }
 
+static jboolean VMRuntime_isNativeDebuggable(JNIEnv*, jobject) {
+  return Runtime::Current()->IsNativeDebuggable();
+}
+
 static jobjectArray VMRuntime_properties(JNIEnv* env, jobject) {
   return toStringArray(env, Runtime::Current()->GetProperties());
 }
@@ -641,6 +645,7 @@
   NATIVE_METHOD(VMRuntime, disableJitCompilation, "()V"),
   NATIVE_METHOD(VMRuntime, getTargetHeapUtilization, "()F"),
   NATIVE_METHOD(VMRuntime, isDebuggerActive, "!()Z"),
+  NATIVE_METHOD(VMRuntime, isNativeDebuggable, "!()Z"),
   NATIVE_METHOD(VMRuntime, nativeSetTargetHeapUtilization, "(F)V"),
   NATIVE_METHOD(VMRuntime, newNonMovableArray, "!(Ljava/lang/Class;I)Ljava/lang/Object;"),
   NATIVE_METHOD(VMRuntime, newUnpaddedArray, "!(Ljava/lang/Class;I)Ljava/lang/Object;"),
diff --git a/runtime/native/java_lang_reflect_Constructor.cc b/runtime/native/java_lang_reflect_Constructor.cc
index ddcaade..54b8afd 100644
--- a/runtime/native/java_lang_reflect_Constructor.cc
+++ b/runtime/native/java_lang_reflect_Constructor.cc
@@ -34,20 +34,38 @@
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
   ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
-  Handle<mirror::Class> klass(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
-  return soa.AddLocalReference<jobject>(
-      method->GetDexFile()->GetAnnotationForMethod(method, klass));
+  if (method->IsProxyMethod()) {
+    return nullptr;
+  } else {
+    Handle<mirror::Class> klass(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
+    return soa.AddLocalReference<jobject>(
+        method->GetDexFile()->GetAnnotationForMethod(method, klass));
+  }
 }
 
 static jobjectArray Constructor_getDeclaredAnnotations(JNIEnv* env, jobject javaMethod) {
   ScopedFastNativeObjectAccess soa(env);
   ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
-  return soa.AddLocalReference<jobjectArray>(method->GetDexFile()->GetAnnotationsForMethod(method));
+  if (method->IsProxyMethod()) {
+    mirror::Class* class_class = mirror::Class::GetJavaLangClass();
+    mirror::Class* class_array_class =
+        Runtime::Current()->GetClassLinker()->FindArrayClass(soa.Self(), &class_class);
+    if (class_array_class == nullptr) {
+      return nullptr;
+    }
+    mirror::ObjectArray<mirror::Class>* empty_array =
+        mirror::ObjectArray<mirror::Class>::Alloc(soa.Self(), class_array_class, 0);
+    return soa.AddLocalReference<jobjectArray>(empty_array);
+  } else {
+    return soa.AddLocalReference<jobjectArray>(
+        method->GetDexFile()->GetAnnotationsForMethod(method));
+  }
 }
 
 static jobjectArray Constructor_getExceptionTypes(JNIEnv* env, jobject javaMethod) {
   ScopedFastNativeObjectAccess soa(env);
-  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
+  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod)
+      ->GetInterfaceMethodIfProxy(sizeof(void*));
   mirror::ObjectArray<mirror::Class>* result_array =
       method->GetDexFile()->GetExceptionTypesForMethod(method);
   if (result_array == nullptr) {
@@ -69,7 +87,12 @@
 static jobjectArray Constructor_getParameterAnnotationsNative(JNIEnv* env, jobject javaMethod) {
   ScopedFastNativeObjectAccess soa(env);
   ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
-  return soa.AddLocalReference<jobjectArray>(method->GetDexFile()->GetParameterAnnotations(method));
+  if (method->IsProxyMethod()) {
+    return nullptr;
+  } else {
+    return soa.AddLocalReference<jobjectArray>(
+        method->GetDexFile()->GetParameterAnnotations(method));
+  }
 }
 
 static jboolean Constructor_isAnnotationPresentNative(JNIEnv* env, jobject javaMethod,
@@ -77,6 +100,10 @@
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
   ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
+  if (method->IsProxyMethod()) {
+    // Proxies have no annotations.
+    return false;
+  }
   Handle<mirror::Class> klass(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
   return method->GetDexFile()->IsMethodAnnotationPresent(method, klass);
 }
diff --git a/runtime/oat.h b/runtime/oat.h
index 543d99f..57675dc 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -43,7 +43,7 @@
   static constexpr const char* kNativeDebuggableKey = "native-debuggable";
   static constexpr const char* kCompilerFilter = "compiler-filter";
   static constexpr const char* kClassPathKey = "classpath";
-  static constexpr const char* kBootClassPath = "bootclasspath";
+  static constexpr const char* kBootClassPathKey = "bootclasspath";
 
   static constexpr const char kTrueValue[] = "true";
   static constexpr const char kFalseValue[] = "false";
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index ae84019..ec28685 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -98,6 +98,8 @@
   virtual const uint8_t* FindDynamicSymbolAddress(const std::string& symbol_name,
                                                   std::string* error_msg) const = 0;
 
+  virtual void PreLoad() = 0;
+
   virtual bool Load(const std::string& elf_filename,
                     uint8_t* oat_file_begin,
                     bool writable,
@@ -138,6 +140,9 @@
                                       const char* abs_dex_location,
                                       std::string* error_msg) {
   std::unique_ptr<OatFileBase> ret(new kOatFileBaseSubType(location, executable));
+
+  ret->PreLoad();
+
   if (!ret->Load(elf_filename,
                  oat_file_begin,
                  writable,
@@ -150,6 +155,7 @@
   if (!ret->ComputeFields(requested_base, elf_filename, error_msg)) {
     return nullptr;
   }
+
   ret->PreSetup(elf_filename);
 
   if (!ret->Setup(abs_dex_location, error_msg)) {
@@ -509,6 +515,7 @@
   DlOpenOatFile(const std::string& filename, bool executable)
       : OatFileBase(filename, executable),
         dlopen_handle_(nullptr),
+        shared_objects_before_(0),
         first_oat_(RegisterOatFileLocation(filename)) {
   }
 
@@ -530,6 +537,8 @@
     return ptr;
   }
 
+  void PreLoad() OVERRIDE;
+
   bool Load(const std::string& elf_filename,
             uint8_t* oat_file_begin,
             bool writable,
@@ -551,12 +560,38 @@
   // Dummy memory map objects corresponding to the regions mapped by dlopen.
   std::vector<std::unique_ptr<MemMap>> dlopen_mmaps_;
 
+  // The number of shared objects the linker told us about before loading. Used to
+  // (optimistically) optimize the PreSetup stage (see comment there).
+  size_t shared_objects_before_;
+
   // Track the registration status (= was this the first oat file) for the location.
   const bool first_oat_;
 
   DISALLOW_COPY_AND_ASSIGN(DlOpenOatFile);
 };
 
+void DlOpenOatFile::PreLoad() {
+#ifdef __APPLE__
+  UNUSED(shared_objects_before_);
+  LOG(FATAL) << "Should not reach here.";
+  UNREACHABLE();
+#else
+  // Count the entries in dl_iterate_phdr we get at this point in time.
+  struct dl_iterate_context {
+    static int callback(struct dl_phdr_info *info ATTRIBUTE_UNUSED,
+                        size_t size ATTRIBUTE_UNUSED,
+                        void *data) {
+      reinterpret_cast<dl_iterate_context*>(data)->count++;
+      return 0;  // Continue iteration.
+    }
+    size_t count = 0;
+  } context;
+
+  dl_iterate_phdr(dl_iterate_context::callback, &context);
+  shared_objects_before_ = context.count;
+#endif
+}
+
 bool DlOpenOatFile::Load(const std::string& elf_filename,
                          uint8_t* oat_file_begin,
                          bool writable,
@@ -657,6 +692,14 @@
   struct dl_iterate_context {
     static int callback(struct dl_phdr_info *info, size_t /* size */, void *data) {
       auto* context = reinterpret_cast<dl_iterate_context*>(data);
+      context->shared_objects_seen++;
+      if (context->shared_objects_seen < context->shared_objects_before) {
+        // We haven't been called yet for anything we haven't seen before. Just continue.
+        // Note: this is aggressively optimistic. If another thread was unloading a library,
+        //       we may miss out here. However, this does not happen often in practice.
+        return 0;
+      }
+
       // See whether this callback corresponds to the file which we have just loaded.
       bool contains_begin = false;
       for (int i = 0; i < info->dlpi_phnum; i++) {
@@ -687,11 +730,22 @@
     }
     const uint8_t* const begin_;
     std::vector<std::unique_ptr<MemMap>>* const dlopen_mmaps_;
-  } context = { Begin(), &dlopen_mmaps_ };
+    const size_t shared_objects_before;
+    size_t shared_objects_seen;
+  };
+  dl_iterate_context context = { Begin(), &dlopen_mmaps_, shared_objects_before_, 0};
 
   if (dl_iterate_phdr(dl_iterate_context::callback, &context) == 0) {
-    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
-    LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but cannot find its mmaps.";
+    // Hm. Maybe our optimization went wrong. Try another time with shared_objects_before == 0
+    // before giving up. This should be unusual.
+    VLOG(oat) << "Need a second run in PreSetup, didn't find with shared_objects_before="
+              << shared_objects_before_;
+    dl_iterate_context context0 = { Begin(), &dlopen_mmaps_, 0, 0};
+    if (dl_iterate_phdr(dl_iterate_context::callback, &context0) == 0) {
+      // OK, give up and print an error.
+      PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+      LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but cannot find its mmaps.";
+    }
   }
 #endif
 }
@@ -728,6 +782,9 @@
     return ptr;
   }
 
+  void PreLoad() OVERRIDE {
+  }
+
   bool Load(const std::string& elf_filename,
             uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
             bool writable,
@@ -897,7 +954,12 @@
   ScopedTrace trace("Open oat file " + location);
   CHECK(!filename.empty()) << location;
   CheckLocation(location);
-  std::unique_ptr<OatFile> ret;
+
+  // Check that the file even exists, fast-fail.
+  if (!OS::FileExists(filename.c_str())) {
+    *error_msg = StringPrintf("File %s does not exist.", filename.c_str());
+    return nullptr;
+  }
 
   // Try dlopen first, as it is required for native debuggability. This will fail fast if dlopen is
   // disabled.
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 9470624..aa727ff 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -48,6 +48,9 @@
 
 class OatFile {
  public:
+  // Special classpath that skips shared library check.
+  static constexpr const char* kSpecialSharedLibrary = "&";
+
   typedef art::OatDexFile OatDexFile;
 
   // Opens an oat file contained within the given elf file. This is always opened as
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index a508e87..fba10ca 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -30,6 +30,7 @@
 
 #include "base/logging.h"
 #include "base/stringprintf.h"
+#include "compiler_filter.h"
 #include "class_linker.h"
 #include "gc/heap.h"
 #include "gc/space/image_space.h"
@@ -43,6 +44,24 @@
 
 namespace art {
 
+std::ostream& operator << (std::ostream& stream, const OatFileAssistant::OatStatus status) {
+  switch (status) {
+    case OatFileAssistant::kOatOutOfDate:
+      stream << "kOatOutOfDate";
+      break;
+    case OatFileAssistant::kOatUpToDate:
+      stream << "kOatUpToDate";
+      break;
+    case OatFileAssistant::kOatNeedsRelocation:
+      stream << "kOatNeedsRelocation";
+      break;
+    default:
+      UNREACHABLE();
+  }
+
+  return stream;
+}
+
 OatFileAssistant::OatFileAssistant(const char* dex_location,
                                    const InstructionSet isa,
                                    bool profile_changed,
@@ -377,6 +396,12 @@
   return cached_odex_file_is_up_to_date_;
 }
 
+CompilerFilter::Filter OatFileAssistant::OdexFileCompilerFilter() {
+  const OatFile* odex_file = GetOdexFile();
+  CHECK(odex_file != nullptr);
+
+  return odex_file->GetCompilerFilter();
+}
 std::string OatFileAssistant::ArtFileName(const OatFile* oat_file) const {
   const std::string oat_file_location = oat_file->GetLocation();
   // Replace extension with .art
@@ -455,6 +480,13 @@
   return cached_oat_file_is_up_to_date_;
 }
 
+CompilerFilter::Filter OatFileAssistant::OatFileCompilerFilter() {
+  const OatFile* oat_file = GetOatFile();
+  CHECK(oat_file != nullptr);
+
+  return oat_file->GetCompilerFilter();
+}
+
 OatFileAssistant::OatStatus OatFileAssistant::GivenOatFileStatus(const OatFile& file) {
   // TODO: This could cause GivenOatFileIsOutOfDate to be called twice, which
   // is more work than we need to do. If performance becomes a concern, and
@@ -739,7 +771,11 @@
   argv.push_back("--runtime-arg");
   argv.push_back("-classpath");
   argv.push_back("--runtime-arg");
-  argv.push_back(runtime->GetClassPathString());
+  std::string class_path = runtime->GetClassPathString();
+  if (class_path == "") {
+    class_path = OatFile::kSpecialSharedLibrary;
+  }
+  argv.push_back(class_path);
   if (runtime->IsDebuggable()) {
     argv.push_back("--debuggable");
   }
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index 85f4a47..f48cdf3 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -19,6 +19,7 @@
 
 #include <cstdint>
 #include <memory>
+#include <sstream>
 #include <string>
 
 #include "arch/instruction_set.h"
@@ -211,6 +212,9 @@
   bool OdexFileIsOutOfDate();
   bool OdexFileNeedsRelocation();
   bool OdexFileIsUpToDate();
+  // Must only be called if the associated odex file exists, i.e, if
+  // |OdexFileExists() == true|.
+  CompilerFilter::Filter OdexFileCompilerFilter();
 
   // When the dex files is compiled on the target device, the oat file is the
   // result. The oat file will have been relocated to some
@@ -227,6 +231,9 @@
   bool OatFileIsOutOfDate();
   bool OatFileNeedsRelocation();
   bool OatFileIsUpToDate();
+  // Must only be called if the associated oat file exists, i.e, if
+  // |OatFileExists() == true|.
+  CompilerFilter::Filter OatFileCompilerFilter();
 
   // Return image file name. Does not cache since it relies on the oat file.
   std::string ArtFileName(const OatFile* oat_file) const;
@@ -436,6 +443,8 @@
   DISALLOW_COPY_AND_ASSIGN(OatFileAssistant);
 };
 
+std::ostream& operator << (std::ostream& stream, const OatFileAssistant::OatStatus status);
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_OAT_FILE_ASSISTANT_H_
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index 764b969..15a1aa4 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -1264,8 +1264,7 @@
 class RaceGenerateTask : public Task {
  public:
   explicit RaceGenerateTask(const std::string& dex_location, const std::string& oat_location)
-    : dex_location_(dex_location), oat_location_(oat_location),
-      loaded_oat_file_(nullptr)
+    : dex_location_(dex_location), oat_location_(oat_location), loaded_oat_file_(nullptr)
   {}
 
   void Run(Thread* self ATTRIBUTE_UNUSED) {
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index bc01da4..0af6716 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -36,11 +36,6 @@
 
 namespace art {
 
-// For b/21333911.
-// Only enabled for debug builds to prevent bit rot. There are too many performance regressions for
-// normal builds.
-static constexpr bool kDuplicateClassesCheck = kIsDebugBuild;
-
 // If true, then we attempt to load the application image if it exists.
 static constexpr bool kEnableAppImage = true;
 
@@ -173,7 +168,7 @@
 
   void Next() {
     ++current_class_index_;
-    cached_descriptor_ = GetClassDescriptor(dex_file_.get(), current_class_index_);
+    cached_descriptor_ = GetClassDescriptor(dex_file_, current_class_index_);
   }
 
   size_t GetCurrentClassIndex() const {
@@ -185,7 +180,12 @@
   }
 
   const DexFile* GetDexFile() const {
-    return dex_file_.get();
+    return dex_file_;
+  }
+
+  void DeleteDexFile() {
+    delete dex_file_;
+    dex_file_ = nullptr;
   }
 
  private:
@@ -196,7 +196,7 @@
   }
 
   const char* cached_descriptor_;
-  std::shared_ptr<const DexFile> dex_file_;
+  const DexFile* dex_file_;
   size_t current_class_index_;
   bool from_loaded_oat_;  // We only need to compare mismatches between what we load now
                           // and what was loaded before. Any old duplicates must have been
@@ -219,53 +219,299 @@
 }
 
 static void AddNext(/*inout*/DexFileAndClassPair* original,
-                    /*inout*/std::priority_queue<DexFileAndClassPair>* heap) {
+                    /*inout*/std::priority_queue<DexFileAndClassPair>* heap,
+                    bool owning_dex_files) {
   if (original->DexFileHasMoreClasses()) {
     original->Next();
     heap->push(std::move(*original));
+  } else if (owning_dex_files) {
+    original->DeleteDexFile();
   }
 }
 
+static void FreeDexFilesInHeap(std::priority_queue<DexFileAndClassPair>* heap,
+                               bool owning_dex_files) {
+  if (owning_dex_files) {
+    while (!heap->empty()) {
+      delete heap->top().GetDexFile();
+      heap->pop();
+    }
+  }
+}
+
+static void IterateOverJavaDexFile(mirror::Object* dex_file,
+                                   ArtField* const cookie_field,
+                                   std::function<bool(const DexFile*)> fn)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (dex_file != nullptr) {
+    mirror::LongArray* long_array = cookie_field->GetObject(dex_file)->AsLongArray();
+    if (long_array == nullptr) {
+      // This should never happen so log a warning.
+      LOG(WARNING) << "Null DexFile::mCookie";
+      return;
+    }
+    int32_t long_array_size = long_array->GetLength();
+    // Start from 1 to skip the oat file.
+    for (int32_t j = 1; j < long_array_size; ++j) {
+      const DexFile* cp_dex_file = reinterpret_cast<const DexFile*>(static_cast<uintptr_t>(
+          long_array->GetWithoutChecks(j)));
+      if (!fn(cp_dex_file)) {
+        return;
+      }
+    }
+  }
+}
+
+static void IterateOverPathClassLoader(
+    ScopedObjectAccessAlreadyRunnable& soa,
+    Handle<mirror::ClassLoader> class_loader,
+    MutableHandle<mirror::ObjectArray<mirror::Object>> dex_elements,
+    std::function<bool(const DexFile*)> fn) SHARED_REQUIRES(Locks::mutator_lock_) {
+  // Handle this step.
+  // Handle as if this is the child PathClassLoader.
+  // The class loader is a PathClassLoader which inherits from BaseDexClassLoader.
+  // We need to get the DexPathList and loop through it.
+  ArtField* const cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
+  ArtField* const dex_file_field =
+      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+  mirror::Object* dex_path_list =
+      soa.DecodeField(WellKnownClasses::dalvik_system_PathClassLoader_pathList)->
+      GetObject(class_loader.Get());
+  if (dex_path_list != nullptr && dex_file_field != nullptr && cookie_field != nullptr) {
+    // DexPathList has an array dexElements of Elements[] which each contain a dex file.
+    mirror::Object* dex_elements_obj =
+        soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
+        GetObject(dex_path_list);
+    // Loop through each dalvik.system.DexPathList$Element's dalvik.system.DexFile and look
+    // at the mCookie which is a DexFile vector.
+    if (dex_elements_obj != nullptr) {
+      dex_elements.Assign(dex_elements_obj->AsObjectArray<mirror::Object>());
+      for (int32_t i = 0; i < dex_elements->GetLength(); ++i) {
+        mirror::Object* element = dex_elements->GetWithoutChecks(i);
+        if (element == nullptr) {
+          // Should never happen, fall back to java code to throw a NPE.
+          break;
+        }
+        mirror::Object* dex_file = dex_file_field->GetObject(element);
+        IterateOverJavaDexFile(dex_file, cookie_field, fn);
+      }
+    }
+  }
+}
+
+static bool GetDexFilesFromClassLoader(
+    ScopedObjectAccessAlreadyRunnable& soa,
+    mirror::ClassLoader* class_loader,
+    std::priority_queue<DexFileAndClassPair>* queue) SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (ClassLinker::IsBootClassLoader(soa, class_loader)) {
+    // The boot class loader. We don't load any of these files, as we know we compiled against
+    // them correctly.
+    return true;
+  }
+
+  // Unsupported class-loader?
+  if (class_loader->GetClass() !=
+      soa.Decode<mirror::Class*>(WellKnownClasses::dalvik_system_PathClassLoader)) {
+    VLOG(class_linker) << "Unsupported class-loader " << PrettyClass(class_loader->GetClass());
+    return false;
+  }
+
+  bool recursive_result = GetDexFilesFromClassLoader(soa, class_loader->GetParent(), queue);
+  if (!recursive_result) {
+    // Something wrong up the chain.
+    return false;
+  }
+
+  // Collect all the dex files.
+  auto GetDexFilesFn = [&] (const DexFile* cp_dex_file)
+            SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (cp_dex_file->NumClassDefs() > 0) {
+      queue->emplace(cp_dex_file, 0U, true);
+    }
+    return true;  // Continue looking.
+  };
+
+  // Handle for dex-cache-element.
+  StackHandleScope<3> hs(soa.Self());
+  MutableHandle<mirror::ObjectArray<mirror::Object>> dex_elements(
+      hs.NewHandle<mirror::ObjectArray<mirror::Object>>(nullptr));
+  Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(class_loader));
+
+  IterateOverPathClassLoader(soa, h_class_loader, dex_elements, GetDexFilesFn);
+
+  return true;
+}
+
+static void GetDexFilesFromDexElementsArray(
+    ScopedObjectAccessAlreadyRunnable& soa,
+    Handle<mirror::ObjectArray<mirror::Object>> dex_elements,
+    std::priority_queue<DexFileAndClassPair>* queue) SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (dex_elements.Get() == nullptr) {
+    // Nothing to do.
+    return;
+  }
+
+  ArtField* const cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
+  ArtField* const dex_file_field =
+      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+  const mirror::Class* const element_class = soa.Decode<mirror::Class*>(
+      WellKnownClasses::dalvik_system_DexPathList__Element);
+  const mirror::Class* const dexfile_class = soa.Decode<mirror::Class*>(
+        WellKnownClasses::dalvik_system_DexFile);
+
+  // Collect all the dex files.
+  auto GetDexFilesFn = [&] (const DexFile* cp_dex_file)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (cp_dex_file != nullptr && cp_dex_file->NumClassDefs() > 0) {
+      queue->emplace(cp_dex_file, 0U, true);
+    }
+    return true;  // Continue looking.
+  };
+
+  for (int32_t i = 0; i < dex_elements->GetLength(); ++i) {
+    mirror::Object* element = dex_elements->GetWithoutChecks(i);
+    if (element == nullptr) {
+      continue;
+    }
+
+    // We support this being dalvik.system.DexPathList$Element and dalvik.system.DexFile.
+
+    mirror::Object* dex_file;
+    if (element->GetClass() == element_class) {
+      dex_file = dex_file_field->GetObject(element);
+    } else if (element->GetClass() == dexfile_class) {
+      dex_file = element;
+    } else {
+      LOG(WARNING) << "Unsupported element in dex_elements: " << PrettyClass(element->GetClass());
+      continue;
+    }
+
+    IterateOverJavaDexFile(dex_file, cookie_field, GetDexFilesFn);
+  }
+}
+
+static bool AreSharedLibrariesOk(const std::string shared_libraries,
+                                 std::priority_queue<DexFileAndClassPair>& queue) {
+  if (shared_libraries.empty()) {
+    if (queue.empty()) {
+      // No shared libraries or oat files, as expected.
+      return true;
+    }
+  } else {
+    if (shared_libraries.compare(OatFile::kSpecialSharedLibrary) == 0) {
+      // If we find the special shared library, skip the shared libraries check.
+      return true;
+    }
+    // Shared libraries is a series of dex file paths and their checksums, each separated by '*'.
+    std::vector<std::string> shared_libraries_split;
+    Split(shared_libraries, '*', &shared_libraries_split);
+
+    size_t index = 0;
+    std::priority_queue<DexFileAndClassPair> temp = queue;
+    while (!temp.empty() && index < shared_libraries_split.size() - 1) {
+      DexFileAndClassPair pair(temp.top());
+      const DexFile* dex_file = pair.GetDexFile();
+      std::string dex_filename(dex_file->GetLocation());
+      uint32_t dex_checksum = dex_file->GetLocationChecksum();
+      if (dex_filename != shared_libraries_split[index] ||
+          dex_checksum != std::stoul(shared_libraries_split[index + 1])) {
+        break;
+      }
+      temp.pop();
+      index += 2;
+    }
+
+    // Check is successful if it made it through the queue and all the shared libraries.
+    return temp.empty() && index == shared_libraries_split.size();
+  }
+  return false;
+}
+
 // Check for class-def collisions in dex files.
 //
-// This works by maintaining a heap with one class from each dex file, sorted by the class
-// descriptor. Then a dex-file/class pair is continually removed from the heap and compared
+// This first walks the class loader chain, getting all the dex files from the class loader. If
+// the class loader is null or one of the class loaders in the chain is unsupported, we collect
+// dex files from all open non-boot oat files to be safe.
+//
+// This first checks whether the shared libraries are in the expected order and the oat files
+// have the expected checksums. If so, we exit early. Otherwise, we do the collision check.
+//
+// The collision check works by maintaining a heap with one class from each dex file, sorted by the
+// class descriptor. Then a dex-file/class pair is continually removed from the heap and compared
 // against the following top element. If the descriptor is the same, it is now checked whether
 // the two elements agree on whether their dex file was from an already-loaded oat-file or the
 // new oat file. Any disagreement indicates a collision.
 bool OatFileManager::HasCollisions(const OatFile* oat_file,
+                                   jobject class_loader,
+                                   jobjectArray dex_elements,
                                    std::string* error_msg /*out*/) const {
   DCHECK(oat_file != nullptr);
   DCHECK(error_msg != nullptr);
-  if (!kDuplicateClassesCheck) {
-    return false;
+
+  std::priority_queue<DexFileAndClassPair> queue;
+  bool owning_dex_files = false;
+
+  // Try to get dex files from the given class loader. If the class loader is null, or we do
+  // not support one of the class loaders in the chain, conservatively compare against all
+  // (non-boot) oat files.
+  bool class_loader_ok = false;
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    StackHandleScope<2> hs(Thread::Current());
+    Handle<mirror::ClassLoader> h_class_loader =
+        hs.NewHandle(soa.Decode<mirror::ClassLoader*>(class_loader));
+    Handle<mirror::ObjectArray<mirror::Object>> h_dex_elements =
+        hs.NewHandle(soa.Decode<mirror::ObjectArray<mirror::Object>*>(dex_elements));
+    if (h_class_loader.Get() != nullptr &&
+        GetDexFilesFromClassLoader(soa, h_class_loader.Get(), &queue)) {
+      class_loader_ok = true;
+
+      // In this case, also take into account the dex_elements array, if given. We don't need to
+      // read it otherwise, as we'll compare against all open oat files anyways.
+      GetDexFilesFromDexElementsArray(soa, h_dex_elements, &queue);
+    } else if (h_class_loader.Get() != nullptr) {
+      VLOG(class_linker) << "Something unsupported with "
+                         << PrettyClass(h_class_loader->GetClass());
+    }
   }
 
   // Dex files are registered late - once a class is actually being loaded. We have to compare
   // against the open oat files. Take the oat_file_manager_lock_ that protects oat_files_ accesses.
   ReaderMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
 
-  std::priority_queue<DexFileAndClassPair> queue;
+  if (!class_loader_ok) {
+    // Add dex files from already loaded oat files, but skip boot.
 
-  // Add dex files from already loaded oat files, but skip boot.
-  std::vector<const OatFile*> boot_oat_files = GetBootOatFiles();
-  // The same OatFile can be loaded multiple times at different addresses. In this case, we don't
-  // need to check both against each other since they would have resolved the same way at compile
-  // time.
-  std::unordered_set<std::string> unique_locations;
-  for (const std::unique_ptr<const OatFile>& loaded_oat_file : oat_files_) {
-    DCHECK_NE(loaded_oat_file.get(), oat_file);
-    const std::string& location = loaded_oat_file->GetLocation();
-    if (std::find(boot_oat_files.begin(), boot_oat_files.end(), loaded_oat_file.get()) ==
-        boot_oat_files.end() && location != oat_file->GetLocation() &&
-        unique_locations.find(location) == unique_locations.end()) {
-      unique_locations.insert(location);
-      AddDexFilesFromOat(loaded_oat_file.get(), /*already_loaded*/true, &queue);
+    // Clean up the queue.
+    while (!queue.empty()) {
+      queue.pop();
+    }
+
+    // Anything we load now is something we own and must be released later.
+    owning_dex_files = true;
+
+    std::vector<const OatFile*> boot_oat_files = GetBootOatFiles();
+    // The same OatFile can be loaded multiple times at different addresses. In this case, we don't
+    // need to check both against each other since they would have resolved the same way at compile
+    // time.
+    std::unordered_set<std::string> unique_locations;
+    for (const std::unique_ptr<const OatFile>& loaded_oat_file : oat_files_) {
+      DCHECK_NE(loaded_oat_file.get(), oat_file);
+      const std::string& location = loaded_oat_file->GetLocation();
+      if (std::find(boot_oat_files.begin(), boot_oat_files.end(), loaded_oat_file.get()) ==
+          boot_oat_files.end() && location != oat_file->GetLocation() &&
+          unique_locations.find(location) == unique_locations.end()) {
+        unique_locations.insert(location);
+        AddDexFilesFromOat(loaded_oat_file.get(), /*already_loaded*/true, &queue);
+      }
     }
   }
 
-  if (queue.empty()) {
-    // No other oat files, return early.
+  // Exit if shared libraries are ok. Do a full duplicate classes check otherwise.
+  const std::string
+      shared_libraries(oat_file->GetOatHeader().GetStoreValueByKey(OatHeader::kClassPathKey));
+  if (AreSharedLibrariesOk(shared_libraries, queue)) {
+    FreeDexFilesInHeap(&queue, owning_dex_files);
     return false;
   }
 
@@ -290,16 +536,17 @@
                            compare_pop.GetCachedDescriptor(),
                            compare_pop.GetDexFile()->GetLocation().c_str(),
                            top.GetDexFile()->GetLocation().c_str());
+          FreeDexFilesInHeap(&queue, owning_dex_files);
           return true;
         }
         queue.pop();
-        AddNext(&top, &queue);
+        AddNext(&top, &queue, owning_dex_files);
       } else {
         // Something else. Done here.
         break;
       }
     }
-    AddNext(&compare_pop, &queue);
+    AddNext(&compare_pop, &queue, owning_dex_files);
   }
 
   return false;
@@ -363,7 +610,8 @@
 
   if (oat_file != nullptr) {
     // Take the file only if it has no collisions, or we must take it because of preopting.
-    bool accept_oat_file = !HasCollisions(oat_file.get(), /*out*/ &error_msg);
+    bool accept_oat_file =
+        !HasCollisions(oat_file.get(), class_loader, dex_elements, /*out*/ &error_msg);
     if (!accept_oat_file) {
       // Failed the collision check. Print warning.
       if (Runtime::Current()->IsDexFileFallbackEnabled()) {
diff --git a/runtime/oat_file_manager.h b/runtime/oat_file_manager.h
index 7017dfc..a1d1275 100644
--- a/runtime/oat_file_manager.h
+++ b/runtime/oat_file_manager.h
@@ -116,9 +116,16 @@
   void DumpForSigQuit(std::ostream& os);
 
  private:
-  // Check for duplicate class definitions of the given oat file against all open oat files.
+  // Check that the shared libraries in the given oat file match those in the given class loader and
+  // dex elements. If the class loader is null or we do not support one of the class loaders in the
+  // chain, compare against all non-boot oat files instead. If the shared libraries are not ok,
+  // check for duplicate class definitions of the given oat file against the oat files (either from
+  // the class loader and dex elements if possible or all non-boot oat files otherwise).
   // Return true if there are any class definition collisions in the oat_file.
-  bool HasCollisions(const OatFile* oat_file, /*out*/std::string* error_msg) const
+  bool HasCollisions(const OatFile* oat_file,
+                     jobject class_loader,
+                     jobjectArray dex_elements,
+                     /*out*/ std::string* error_msg) const
       REQUIRES(!Locks::oat_file_manager_lock_);
 
   const OatFile* FindOpenedOatFileFromOatLocationLocked(const std::string& oat_location) const
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 45ba7d0..63976d0 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -577,9 +577,14 @@
 
   if (!IsImageDex2OatEnabled() || !GetHeap()->HasBootImageSpace()) {
     ScopedObjectAccess soa(self);
-    StackHandleScope<1> hs(soa.Self());
-    auto klass(hs.NewHandle<mirror::Class>(mirror::Class::GetJavaLangClass()));
-    class_linker_->EnsureInitialized(soa.Self(), klass, true, true);
+    StackHandleScope<2> hs(soa.Self());
+
+    auto class_class(hs.NewHandle<mirror::Class>(mirror::Class::GetJavaLangClass()));
+    auto field_class(hs.NewHandle<mirror::Class>(mirror::Field::StaticClass()));
+
+    class_linker_->EnsureInitialized(soa.Self(), class_class, true, true);
+    // Field class is needed for register_java_net_InetAddress in libcore, b/28153851.
+    class_linker_->EnsureInitialized(soa.Self(), field_class, true, true);
   }
 
   // InitNativeMethods needs to be after started_ so that the classes
@@ -851,7 +856,7 @@
     if (index == 0) {
       // First file. See if this is a multi-image environment, and if so, enqueue the other images.
       const OatHeader& boot_oat_header = oat_file->GetOatHeader();
-      const char* boot_cp = boot_oat_header.GetStoreValueByKey(OatHeader::kBootClassPath);
+      const char* boot_cp = boot_oat_header.GetStoreValueByKey(OatHeader::kBootClassPathKey);
       if (boot_cp != nullptr) {
         gc::space::ImageSpace::CreateMultiImageLocations(image_locations[0],
                                                          boot_cp,
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 6db0cb2..635ff51 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -129,8 +129,10 @@
 RUNTIME_OPTIONS_KEY (bool (*)(),          HookIsSensitiveThread)
 RUNTIME_OPTIONS_KEY (int32_t (*)(FILE* stream, const char* format, va_list ap), \
                                           HookVfprintf,                   vfprintf)
+// Use _exit instead of exit so that we won't get DCHECK failures in global data
+// destructors. b/28106055.
 RUNTIME_OPTIONS_KEY (void (*)(int32_t status), \
-                                          HookExit,                       exit)
+                                          HookExit,                       _exit)
                                                                           // We don't call abort(3) by default; see
                                                                           // Runtime::Abort.
 RUNTIME_OPTIONS_KEY (void (*)(),          HookAbort,                      nullptr)
diff --git a/runtime/thread.cc b/runtime/thread.cc
index fb24828..4248944 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -513,14 +513,20 @@
   return stack_size;
 }
 
+// Return the nearest page-aligned address below the current stack top.
+NO_INLINE
+static uint8_t* FindStackTop() {
+  return reinterpret_cast<uint8_t*>(
+      AlignDown(__builtin_frame_address(0), kPageSize));
+}
+
 // Install a protected region in the stack.  This is used to trigger a SIGSEGV if a stack
 // overflow is detected.  It is located right below the stack_begin_.
 ATTRIBUTE_NO_SANITIZE_ADDRESS
 void Thread::InstallImplicitProtection() {
   uint8_t* pregion = tlsPtr_.stack_begin - kStackOverflowProtectedSize;
-  uint8_t* stack_himem = tlsPtr_.stack_end;
-  uint8_t* stack_top = reinterpret_cast<uint8_t*>(reinterpret_cast<uintptr_t>(&stack_himem) &
-      ~(kPageSize - 1));    // Page containing current top of stack.
+  // Page containing current top of stack.
+  uint8_t* stack_top = FindStackTop();
 
   // Try to directly protect the stack.
   VLOG(threads) << "installing stack protected region at " << std::hex <<
@@ -932,8 +938,7 @@
   }
 
   // Sanity check.
-  int stack_variable;
-  CHECK_GT(&stack_variable, reinterpret_cast<void*>(tlsPtr_.stack_end));
+  CHECK_GT(FindStackTop(), reinterpret_cast<void*>(tlsPtr_.stack_end));
 
   return true;
 }
@@ -2764,7 +2769,7 @@
     VisitDeclaringClass(m);
 
     // Process register map (which native and runtime methods don't have)
-    if (!m->IsNative() && !m->IsRuntimeMethod() && !m->IsProxyMethod()) {
+    if (!m->IsNative() && !m->IsRuntimeMethod() && (!m->IsProxyMethod() || m->IsConstructor())) {
       const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
       DCHECK(method_header->IsOptimized());
       auto* vreg_base = reinterpret_cast<StackReference<mirror::Object>*>(
diff --git a/test/044-proxy/expected.txt b/test/044-proxy/expected.txt
index be7023e..2a5f0b9 100644
--- a/test/044-proxy/expected.txt
+++ b/test/044-proxy/expected.txt
@@ -95,3 +95,5 @@
 5.8
 JNI_OnLoad called
 callback
+Found constructor.
+Found constructors with 0 exceptions
diff --git a/test/044-proxy/src/ConstructorProxy.java b/test/044-proxy/src/ConstructorProxy.java
new file mode 100644
index 0000000..95d150c
--- /dev/null
+++ b/test/044-proxy/src/ConstructorProxy.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationHandler;
+import java.lang.reflect.Method;
+import java.lang.reflect.Proxy;
+
+/**
+ * Tests proxies when used with constructor methods.
+ */
+class ConstructorProxy implements InvocationHandler {
+  public static void main() {
+    try {
+      new ConstructorProxy().runTest();
+    } catch (Exception e) {
+      System.out.println("Unexpected failure occured");
+      e.printStackTrace();
+    }
+  }
+
+  public void runTest() throws Exception {
+    Class<?> proxyClass = Proxy.getProxyClass(
+            getClass().getClassLoader(),
+            new Class<?>[] { Runnable.class }
+    );
+    Constructor<?> constructor = proxyClass.getConstructor(InvocationHandler.class);
+    System.out.println("Found constructor.");
+    // We used to crash when asking the exception types of the constructor, because the runtime was
+    // not using the non-proxy ArtMethod
+    Object[] exceptions = constructor.getExceptionTypes();
+    System.out.println("Found constructors with " + exceptions.length + " exceptions");
+  }
+
+  @Override
+  public Object invoke(Object proxy, Method method, Object[] args) throws Throwable {
+    return args[0];
+  }
+}
+
diff --git a/test/044-proxy/src/Main.java b/test/044-proxy/src/Main.java
index 1f23b95..9dadb7c 100644
--- a/test/044-proxy/src/Main.java
+++ b/test/044-proxy/src/Main.java
@@ -31,6 +31,7 @@
         NarrowingTest.main(null);
         FloatSelect.main(null);
         NativeProxy.main(args);
+        ConstructorProxy.main();
     }
 
     // The following code maps from the actual proxy class names (eg $Proxy2) to their test output
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index 9aaed9d..bf561e9 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -809,6 +809,7 @@
     Assert.assertEquals(Math.round(-3.0d), -3l);
     Assert.assertEquals(Math.round(0.49999999999999994d), 0l);
     Assert.assertEquals(Math.round(9007199254740991.0d), 9007199254740991l);  // 2^53 - 1
+    Assert.assertEquals(Math.round(-9007199254740991.0d), -9007199254740991l);  // -(2^53 - 1)
     Assert.assertEquals(Math.round(Double.NaN), (long)+0.0d);
     Assert.assertEquals(Math.round(Long.MAX_VALUE + 1.0d), Long.MAX_VALUE);
     Assert.assertEquals(Math.round(Long.MIN_VALUE - 1.0d), Long.MIN_VALUE);
@@ -832,7 +833,16 @@
     Assert.assertEquals(Math.round(-3.0f), -3);
     // 0.4999999701976776123046875
     Assert.assertEquals(Math.round(Float.intBitsToFloat(0x3EFFFFFF)), (int)+0.0f);
+    Assert.assertEquals(Math.round(8388607.0f), 8388607);  // 2^23 - 1
+    Assert.assertEquals(Math.round(8388607.5f), 8388608);  // 2^23 - 0.5
+    Assert.assertEquals(Math.round(8388608.0f), 8388608);  // 2^23
+    Assert.assertEquals(Math.round(-8388607.0f), -8388607);  // -(2^23 - 1)
+    Assert.assertEquals(Math.round(-8388607.5f), -8388607);  // -(2^23 - 0.5)
+    Assert.assertEquals(Math.round(-8388608.0f), -8388608);  // -2^23
     Assert.assertEquals(Math.round(16777215.0f), 16777215);  // 2^24 - 1
+    Assert.assertEquals(Math.round(16777216.0f), 16777216);  // 2^24
+    Assert.assertEquals(Math.round(-16777215.0f), -16777215);  // -(2^24 - 1)
+    Assert.assertEquals(Math.round(-16777216.0f), -16777216);  // -2^24
     Assert.assertEquals(Math.round(Float.NaN), (int)+0.0f);
     Assert.assertEquals(Math.round(Integer.MAX_VALUE + 1.0f), Integer.MAX_VALUE);
     Assert.assertEquals(Math.round(Integer.MIN_VALUE - 1.0f), Integer.MIN_VALUE);
@@ -1144,6 +1154,7 @@
     Assert.assertEquals(StrictMath.round(-3.0d), -3l);
     Assert.assertEquals(StrictMath.round(0.49999999999999994d), 0l);
     Assert.assertEquals(StrictMath.round(9007199254740991.0d), 9007199254740991l);  // 2^53 - 1
+    Assert.assertEquals(StrictMath.round(-9007199254740991.0d), -9007199254740991l);  // -(2^53 - 1)
     Assert.assertEquals(StrictMath.round(Double.NaN), (long)+0.0d);
     Assert.assertEquals(StrictMath.round(Long.MAX_VALUE + 1.0d), Long.MAX_VALUE);
     Assert.assertEquals(StrictMath.round(Long.MIN_VALUE - 1.0d), Long.MIN_VALUE);
@@ -1167,7 +1178,16 @@
     Assert.assertEquals(StrictMath.round(-3.0f), -3);
     // 0.4999999701976776123046875
     Assert.assertEquals(StrictMath.round(Float.intBitsToFloat(0x3EFFFFFF)), (int)+0.0f);
+    Assert.assertEquals(StrictMath.round(8388607.0f), 8388607);  // 2^23 - 1
+    Assert.assertEquals(StrictMath.round(8388607.5f), 8388608);  // 2^23 - 0.5
+    Assert.assertEquals(StrictMath.round(8388608.0f), 8388608);  // 2^23
+    Assert.assertEquals(StrictMath.round(-8388607.0f), -8388607);  // -(2^23 - 1)
+    Assert.assertEquals(StrictMath.round(-8388607.5f), -8388607);  // -(2^23 - 0.5)
+    Assert.assertEquals(StrictMath.round(-8388608.0f), -8388608);  // -2^23
     Assert.assertEquals(StrictMath.round(16777215.0f), 16777215);  // 2^24 - 1
+    Assert.assertEquals(StrictMath.round(16777216.0f), 16777216);  // 2^24
+    Assert.assertEquals(StrictMath.round(-16777215.0f), -16777215);  // -(2^24 - 1)
+    Assert.assertEquals(StrictMath.round(-16777216.0f), -16777216);  // -2^24
     Assert.assertEquals(StrictMath.round(Float.NaN), (int)+0.0f);
     Assert.assertEquals(StrictMath.round(Integer.MAX_VALUE + 1.0f), Integer.MAX_VALUE);
     Assert.assertEquals(StrictMath.round(Integer.MIN_VALUE - 1.0f), Integer.MIN_VALUE);
diff --git a/test/138-duplicate-classes-check/src/FancyLoader.java b/test/138-duplicate-classes-check/src/FancyLoader.java
deleted file mode 100644
index 03ec948..0000000
--- a/test/138-duplicate-classes-check/src/FancyLoader.java
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Copyright (C) 2008 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.RandomAccessFile;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.Method;
-import java.lang.reflect.InvocationTargetException;
-
-/**
- * A class loader with atypical behavior: we try to load a private
- * class implementation before asking the system or boot loader.  This
- * is used to create multiple classes with identical names in a single VM.
- *
- * If DexFile is available, we use that; if not, we assume we're not in
- * Dalvik and instantiate the class with defineClass().
- *
- * The location of the DEX files and class data is dependent upon the
- * test framework.
- */
-public class FancyLoader extends ClassLoader {
-    /* this is where the "alternate" .class files live */
-    static final String CLASS_PATH = "classes-ex/";
-
-    /* this is the "alternate" DEX/Jar file */
-    static final String DEX_FILE = System.getenv("DEX_LOCATION") +
-            "/138-duplicate-classes-check-ex.jar";
-
-    /* on Dalvik, this is a DexFile; otherwise, it's null */
-    private Class mDexClass;
-
-    private Object mDexFile;
-
-    /**
-     * Construct FancyLoader, grabbing a reference to the DexFile class
-     * if we're running under Dalvik.
-     */
-    public FancyLoader(ClassLoader parent) {
-        super(parent);
-
-        try {
-            mDexClass = parent.loadClass("dalvik.system.DexFile");
-        } catch (ClassNotFoundException cnfe) {
-            // ignore -- not running Dalvik
-        }
-    }
-
-    /**
-     * Finds the class with the specified binary name.
-     *
-     * We search for a file in CLASS_PATH or pull an entry from DEX_FILE.
-     * If we don't find a match, we throw an exception.
-     */
-    protected Class<?> findClass(String name) throws ClassNotFoundException
-    {
-        if (mDexClass != null) {
-            return findClassDalvik(name);
-        } else {
-            return findClassNonDalvik(name);
-        }
-    }
-
-    /**
-     * Finds the class with the specified binary name, from a DEX file.
-     */
-    private Class<?> findClassDalvik(String name)
-        throws ClassNotFoundException {
-
-        if (mDexFile == null) {
-            synchronized (FancyLoader.class) {
-                Constructor ctor;
-                /*
-                 * Construct a DexFile object through reflection.
-                 */
-                try {
-                    ctor = mDexClass.getConstructor(new Class[] {String.class});
-                } catch (NoSuchMethodException nsme) {
-                    throw new ClassNotFoundException("getConstructor failed",
-                        nsme);
-                }
-
-                try {
-                    mDexFile = ctor.newInstance(DEX_FILE);
-                } catch (InstantiationException ie) {
-                    throw new ClassNotFoundException("newInstance failed", ie);
-                } catch (IllegalAccessException iae) {
-                    throw new ClassNotFoundException("newInstance failed", iae);
-                } catch (InvocationTargetException ite) {
-                    throw new ClassNotFoundException("newInstance failed", ite);
-                }
-            }
-        }
-
-        /*
-         * Call DexFile.loadClass(String, ClassLoader).
-         */
-        Method meth;
-
-        try {
-            meth = mDexClass.getMethod("loadClass",
-                    new Class[] { String.class, ClassLoader.class });
-        } catch (NoSuchMethodException nsme) {
-            throw new ClassNotFoundException("getMethod failed", nsme);
-        }
-
-        try {
-            meth.invoke(mDexFile, name, this);
-        } catch (IllegalAccessException iae) {
-            throw new ClassNotFoundException("loadClass failed", iae);
-        } catch (InvocationTargetException ite) {
-            throw new ClassNotFoundException("loadClass failed",
-                ite.getCause());
-        }
-
-        return null;
-    }
-
-    /**
-     * Finds the class with the specified binary name, from .class files.
-     */
-    private Class<?> findClassNonDalvik(String name)
-        throws ClassNotFoundException {
-
-        String pathName = CLASS_PATH + name + ".class";
-        //System.out.println("--- Fancy: looking for " + pathName);
-
-        File path = new File(pathName);
-        RandomAccessFile raf;
-
-        try {
-            raf = new RandomAccessFile(path, "r");
-        } catch (FileNotFoundException fnfe) {
-            throw new ClassNotFoundException("Not found: " + pathName);
-        }
-
-        /* read the entire file in */
-        byte[] fileData;
-        try {
-            fileData = new byte[(int) raf.length()];
-            raf.readFully(fileData);
-        } catch (IOException ioe) {
-            throw new ClassNotFoundException("Read error: " + pathName);
-        } finally {
-            try {
-                raf.close();
-            } catch (IOException ioe) {
-                // drop
-            }
-        }
-
-        /* create the class */
-        //System.out.println("--- Fancy: defining " + name);
-        try {
-            return defineClass(name, fileData, 0, fileData.length);
-        } catch (Throwable th) {
-            throw new ClassNotFoundException("defineClass failed", th);
-        }
-    }
-
-    /**
-     * Load a class.
-     *
-     * Normally a class loader wouldn't override this, but we want our
-     * version of the class to take precedence over an already-loaded
-     * version.
-     *
-     * We still want the system classes (e.g. java.lang.Object) from the
-     * bootstrap class loader.
-     */
-    protected Class<?> loadClass(String name, boolean resolve)
-        throws ClassNotFoundException
-    {
-        Class res;
-
-        /*
-         * 1. Invoke findLoadedClass(String) to check if the class has
-         * already been loaded.
-         *
-         * This doesn't change.
-         */
-        res = findLoadedClass(name);
-        if (res != null) {
-            System.out.println("FancyLoader.loadClass: "
-                + name + " already loaded");
-            if (resolve)
-                resolveClass(res);
-            return res;
-        }
-
-        /*
-         * 3. Invoke the findClass(String) method to find the class.
-         */
-        try {
-            res = findClass(name);
-            if (resolve)
-                resolveClass(res);
-        }
-        catch (ClassNotFoundException e) {
-            // we couldn't find it, so eat the exception and keep going
-        }
-
-        /*
-         * 2. Invoke the loadClass method on the parent class loader.  If
-         * the parent loader is null the class loader built-in to the
-         * virtual machine is used, instead.
-         *
-         * (Since we're not in java.lang, we can't actually invoke the
-         * parent's loadClass() method, but we passed our parent to the
-         * super-class which can take care of it for us.)
-         */
-        res = super.loadClass(name, resolve);   // returns class or throws
-        return res;
-    }
-}
diff --git a/test/138-duplicate-classes-check/src/Main.java b/test/138-duplicate-classes-check/src/Main.java
index a9b5bb0..a2ef281 100644
--- a/test/138-duplicate-classes-check/src/Main.java
+++ b/test/138-duplicate-classes-check/src/Main.java
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+import dalvik.system.DexClassLoader;
 import java.io.File;
 import java.lang.reflect.Method;
 
@@ -30,7 +31,11 @@
 
         // Now run the class from the -ex file.
 
-        FancyLoader loader = new FancyLoader(getClass().getClassLoader());
+        String dexPath = System.getenv("DEX_LOCATION") + "/138-duplicate-classes-check-ex.jar";
+        String optimizedDirectory = System.getenv("DEX_LOCATION");
+        String librarySearchPath = null;
+        DexClassLoader loader = new DexClassLoader(dexPath, optimizedDirectory, librarySearchPath,
+                getClass().getClassLoader());
 
         try {
             Class testEx = loader.loadClass("TestEx");
diff --git a/test/148-multithread-gc-annotations/check b/test/148-multithread-gc-annotations/check
new file mode 100755
index 0000000..842bdc6
--- /dev/null
+++ b/test/148-multithread-gc-annotations/check
@@ -0,0 +1,22 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Check that the string "error" isn't present
+if grep error "$2"; then
+    exit 1
+else
+    exit 0
+fi
diff --git a/test/148-multithread-gc-annotations/expected.txt b/test/148-multithread-gc-annotations/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/148-multithread-gc-annotations/expected.txt
diff --git a/test/148-multithread-gc-annotations/gc_coverage.cc b/test/148-multithread-gc-annotations/gc_coverage.cc
new file mode 100644
index 0000000..263eefd
--- /dev/null
+++ b/test/148-multithread-gc-annotations/gc_coverage.cc
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gc/heap.h"
+#include "jni.h"
+#include "runtime.h"
+#include "scoped_thread_state_change.h"
+#include "thread-inl.h"
+
+namespace art {
+namespace {
+
+extern "C" JNIEXPORT jboolean JNICALL Java_MovingGCThread_performHomogeneousSpaceCompact(JNIEnv*, jclass) {
+  return Runtime::Current()->GetHeap()->PerformHomogeneousSpaceCompact() == gc::kSuccess ?
+      JNI_TRUE : JNI_FALSE;
+}
+
+extern "C" JNIEXPORT jboolean JNICALL Java_MovingGCThread_supportHomogeneousSpaceCompact(JNIEnv*, jclass) {
+  return Runtime::Current()->GetHeap()->SupportHomogeneousSpaceCompactAndCollectorTransitions() ?
+      JNI_TRUE : JNI_FALSE;
+}
+
+extern "C" JNIEXPORT jlong JNICALL Java_MovingGCThread_objectAddress(JNIEnv* env, jclass, jobject object) {
+  ScopedObjectAccess soa(env);
+  return reinterpret_cast<jlong>(soa.Decode<mirror::Object*>(object));
+}
+
+}  // namespace
+}  // namespace art
diff --git a/test/148-multithread-gc-annotations/info.txt b/test/148-multithread-gc-annotations/info.txt
new file mode 100644
index 0000000..c62e544
--- /dev/null
+++ b/test/148-multithread-gc-annotations/info.txt
@@ -0,0 +1 @@
+Tests that getting annotations works during moving gc.
diff --git a/test/148-multithread-gc-annotations/src/AnnoClass1.java b/test/148-multithread-gc-annotations/src/AnnoClass1.java
new file mode 100644
index 0000000..b82c61f
--- /dev/null
+++ b/test/148-multithread-gc-annotations/src/AnnoClass1.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.annotation.*;
+
+@Retention(RetentionPolicy.RUNTIME)
+@Target(ElementType.TYPE)
+public @interface AnnoClass1 {
+    Class value();
+}
diff --git a/test/148-multithread-gc-annotations/src/AnnoClass2.java b/test/148-multithread-gc-annotations/src/AnnoClass2.java
new file mode 100644
index 0000000..c75d950
--- /dev/null
+++ b/test/148-multithread-gc-annotations/src/AnnoClass2.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.annotation.*;
+
+@Retention(RetentionPolicy.RUNTIME)
+@Target(ElementType.TYPE)
+public @interface AnnoClass2 {
+    Class value();
+}
diff --git a/test/148-multithread-gc-annotations/src/AnnoClass3.java b/test/148-multithread-gc-annotations/src/AnnoClass3.java
new file mode 100644
index 0000000..5b4a378
--- /dev/null
+++ b/test/148-multithread-gc-annotations/src/AnnoClass3.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.annotation.*;
+
+@Retention(RetentionPolicy.RUNTIME)
+@Target(ElementType.TYPE)
+public @interface AnnoClass3 {
+    Class value();
+}
diff --git a/test/148-multithread-gc-annotations/src/AnnotationThread.java b/test/148-multithread-gc-annotations/src/AnnotationThread.java
new file mode 100644
index 0000000..ebc14e9
--- /dev/null
+++ b/test/148-multithread-gc-annotations/src/AnnotationThread.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.annotation.*;
+
+@AnnoClass1(AnnoClass2.class)
+@AnnoClass2(AnnoClass3.class)
+@AnnoClass3(AnnoClass1.class)
+public class AnnotationThread implements Runnable {
+    public void run() {
+        for (int i = 0; i < 20; i++) {
+            Annotation[] annotations = AnnotationThread.class.getAnnotations();
+            if (annotations == null) {
+                System.out.println("error: AnnotationThread class has no annotations");
+                return;
+            }
+        }
+    }
+}
diff --git a/test/148-multithread-gc-annotations/src/Main.java b/test/148-multithread-gc-annotations/src/Main.java
new file mode 100644
index 0000000..b652ed6
--- /dev/null
+++ b/test/148-multithread-gc-annotations/src/Main.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+    public static void main(String[] args) {
+        System.loadLibrary(args[0]);
+        Thread annoThread = new Thread(new AnnotationThread(), "Annotation thread");
+        Thread gcThread = new Thread(new MovingGCThread(), "Moving GC thread");
+        annoThread.start();
+        gcThread.start();
+        try {
+            annoThread.join();
+            gcThread.join();
+        } catch (InterruptedException e) {
+            System.out.println("error: " + e);
+        }
+        System.out.println("Done.");
+    }
+}
diff --git a/test/148-multithread-gc-annotations/src/MovingGCThread.java b/test/148-multithread-gc-annotations/src/MovingGCThread.java
new file mode 100644
index 0000000..87de9f4
--- /dev/null
+++ b/test/148-multithread-gc-annotations/src/MovingGCThread.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.TreeMap;
+
+public class MovingGCThread implements Runnable {
+    private static TreeMap treeMap = new TreeMap();
+
+    public void run() {
+        for (int i = 0; i < 20; i++) {
+            testHomogeneousCompaction();
+        }
+    }
+
+    public static void testHomogeneousCompaction() {
+        final boolean supportHSC = supportHomogeneousSpaceCompact();
+        if (!supportHSC) {
+            return;
+        }
+        Object o = new Object();
+        long addressBefore = objectAddress(o);
+        allocateStuff();
+        final boolean success = performHomogeneousSpaceCompact();
+        allocateStuff();
+        if (!success) {
+            System.out.println("error: Expected " + supportHSC + " but got " + success);
+        }
+        allocateStuff();
+        long addressAfter = objectAddress(o);
+        // This relies on the compaction copying from one space to another space and there being
+        // no overlap.
+        if (addressBefore == addressAfter) {
+            System.out.println("error: Expected different adddress " + addressBefore + " vs " +
+                    addressAfter);
+        }
+    }
+
+    private static void allocateStuff() {
+        for (int i = 0; i < 1000; ++i) {
+            Object o = new Object();
+            treeMap.put(o.hashCode(), o);
+        }
+    }
+
+    // Methods to get access to ART internals.
+    private static native boolean supportHomogeneousSpaceCompact();
+    private static native boolean performHomogeneousSpaceCompact();
+    private static native long objectAddress(Object object);
+}
diff --git a/test/536-checker-intrinsic-optimization/src/Main.java b/test/536-checker-intrinsic-optimization/src/Main.java
index be666e9..15a9504 100644
--- a/test/536-checker-intrinsic-optimization/src/Main.java
+++ b/test/536-checker-intrinsic-optimization/src/Main.java
@@ -16,9 +16,69 @@
 
 
 public class Main {
+  public static boolean doThrow = false;
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertBooleanEquals(boolean expected, boolean result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
   public static void main(String[] args) {
     stringEqualsSame();
     stringArgumentNotNull("Foo");
+
+    assertIntEquals(0, $opt$noinline$getStringLength(""));
+    assertIntEquals(3, $opt$noinline$getStringLength("abc"));
+    assertIntEquals(10, $opt$noinline$getStringLength("0123456789"));
+
+    assertBooleanEquals(true, $opt$noinline$isStringEmpty(""));
+    assertBooleanEquals(false, $opt$noinline$isStringEmpty("abc"));
+    assertBooleanEquals(false, $opt$noinline$isStringEmpty("0123456789"));
+  }
+
+  /// CHECK-START: int Main.$opt$noinline$getStringLength(java.lang.String) instruction_simplifier (before)
+  /// CHECK-DAG:  <<Length:i\d+>>   InvokeVirtual intrinsic:StringLength
+  /// CHECK-DAG:                    Return [<<Length>>]
+
+  /// CHECK-START: int Main.$opt$noinline$getStringLength(java.lang.String) instruction_simplifier (after)
+  /// CHECK-DAG:  <<String:l\d+>>   ParameterValue
+  /// CHECK-DAG:  <<NullCk:l\d+>>   NullCheck [<<String>>]
+  /// CHECK-DAG:  <<Length:i\d+>>   ArrayLength [<<NullCk>>] is_string_length:true
+  /// CHECK-DAG:                    Return [<<Length>>]
+
+  /// CHECK-START: int Main.$opt$noinline$getStringLength(java.lang.String) instruction_simplifier (after)
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringLength
+
+  static public int $opt$noinline$getStringLength(String s) {
+    if (doThrow) { throw new Error(); }
+    return s.length();
+  }
+
+  /// CHECK-START: boolean Main.$opt$noinline$isStringEmpty(java.lang.String) instruction_simplifier (before)
+  /// CHECK-DAG:  <<IsEmpty:z\d+>>  InvokeVirtual intrinsic:StringIsEmpty
+  /// CHECK-DAG:                    Return [<<IsEmpty>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$isStringEmpty(java.lang.String) instruction_simplifier (after)
+  /// CHECK-DAG:  <<String:l\d+>>   ParameterValue
+  /// CHECK-DAG:  <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:  <<NullCk:l\d+>>   NullCheck [<<String>>]
+  /// CHECK-DAG:  <<Length:i\d+>>   ArrayLength [<<NullCk>>] is_string_length:true
+  /// CHECK-DAG:  <<IsEmpty:z\d+>>  Equal [<<Length>>,<<Const0>>]
+  /// CHECK-DAG:                    Return [<<IsEmpty>>]
+
+  /// CHECK-START: boolean Main.$opt$noinline$isStringEmpty(java.lang.String) instruction_simplifier (after)
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringIsEmpty
+
+  static public boolean $opt$noinline$isStringEmpty(String s) {
+    if (doThrow) { throw new Error(); }
+    return s.isEmpty();
   }
 
   /// CHECK-START: boolean Main.stringEqualsSame() instruction_simplifier (before)
diff --git a/test/597-deopt-new-string/src/Main.java b/test/597-deopt-new-string/src/Main.java
index 1224e40..e78f0d3 100644
--- a/test/597-deopt-new-string/src/Main.java
+++ b/test/597-deopt-new-string/src/Main.java
@@ -48,7 +48,12 @@
             throw new Error();
         }
         char[] arr = {'a', 'b', 'c'};
-        return new String(arr, 0, arr.length);
+        String str = new String(arr, 0, arr.length);
+        if (!str.equals("abc")) {
+            System.out.println("Failure 1! " + str);
+            System.exit(0);
+        }
+        return str;
     }
 
     public void run() {
@@ -68,7 +73,11 @@
         } else {
             // This thread keeps doing new String() from a char array.
             while (!done) {
-                $noinline$run0();
+                String str = $noinline$run0();
+                if (!str.equals("abc")) {
+                    System.out.println("Failure 2! " + str);
+                    System.exit(0);
+                }
             }
         }
     }
diff --git a/test/599-checker-irreducible-loop/expected.txt b/test/599-checker-irreducible-loop/expected.txt
new file mode 100644
index 0000000..573541a
--- /dev/null
+++ b/test/599-checker-irreducible-loop/expected.txt
@@ -0,0 +1 @@
+0
diff --git a/test/599-checker-irreducible-loop/info.txt b/test/599-checker-irreducible-loop/info.txt
new file mode 100644
index 0000000..1e0dd02
--- /dev/null
+++ b/test/599-checker-irreducible-loop/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing in the presence of
+an irreducible loop.
diff --git a/test/599-checker-irreducible-loop/smali/IrreducibleLoop.smali b/test/599-checker-irreducible-loop/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..5331fd6
--- /dev/null
+++ b/test/599-checker-irreducible-loop/smali/IrreducibleLoop.smali
@@ -0,0 +1,56 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+
+.super Ljava/lang/Object;
+
+## CHECK-START: int IrreducibleLoop.test(int) GVN (before)
+## CHECK-DAG:                     LoadClass loop:none
+## CHECK-DAG:                     LoadClass loop:{{B\d+}} outer_loop:none
+
+## CHECK-START: int IrreducibleLoop.test(int) GVN (after)
+## CHECK-DAG:                     LoadClass loop:none
+## CHECK-DAG:                     LoadClass loop:{{B\d+}} outer_loop:none
+.method public static test(I)I
+   .registers 2
+
+   sget v0, LIrreducibleLoop;->field1:I
+   sput v0, LIrreducibleLoop;->field2:I
+
+   if-eqz p0, :loop_entry
+   goto :exit
+
+   :loop_entry
+   if-eqz p0, :irreducible_loop_entry
+   sget v0, LIrreducibleLoop;->field2:I
+   sput v0, LIrreducibleLoop;->field1:I
+   if-eqz v0, :exit
+   goto :irreducible_other_loop_entry
+
+   :irreducible_loop_entry
+   if-eqz p0, :loop_back_edge
+   :irreducible_other_loop_entry
+   if-eqz v0, :loop_back_edge
+   goto :irreducible_loop_entry
+
+   :loop_back_edge
+   goto :loop_entry
+
+   :exit
+   return v0
+.end method
+
+.field public static field1:I
+.field public static field2:I
diff --git a/test/599-checker-irreducible-loop/src/Main.java b/test/599-checker-irreducible-loop/src/Main.java
new file mode 100644
index 0000000..b47721f
--- /dev/null
+++ b/test/599-checker-irreducible-loop/src/Main.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("IrreducibleLoop");
+    Method m = c.getMethod("test", int.class);
+    Object[] arguments = { 42 };
+    // Invoke the code just for sanity checking.
+    System.out.println(m.invoke(null, arguments));
+  }
+}
diff --git a/test/800-smali/smali/b_28187158.smali b/test/800-smali/smali/b_28187158.smali
index 14d5cec..47e5ef6 100644
--- a/test/800-smali/smali/b_28187158.smali
+++ b/test/800-smali/smali/b_28187158.smali
@@ -9,4 +9,3 @@
    iget v0, p0, Ljava/lang/System;->in:Ljava/io/InputStream;
    return-void
 .end method
-
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index c883b7f..b2fc005 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -174,7 +174,7 @@
         testCases.add(new TestCase("b/27799205 (5)", "B27799205Helper", "run5", null,
                 new VerifyError(), null));
         testCases.add(new TestCase("b/27799205 (6)", "B27799205Helper", "run6", null, null, null));
-        testCases.add(new TestCase("b/28187158", "B28187158", "run", new Object[] { null} ,
+        testCases.add(new TestCase("b/28187158", "B28187158", "run", new Object[] { null },
                 new VerifyError(), null));
     }
 
diff --git a/test/804-class-extends-itself/expected.txt b/test/804-class-extends-itself/expected.txt
new file mode 100644
index 0000000..b98f963
--- /dev/null
+++ b/test/804-class-extends-itself/expected.txt
@@ -0,0 +1,2 @@
+Caught ClassCircularityError
+Done!
diff --git a/test/804-class-extends-itself/info.txt b/test/804-class-extends-itself/info.txt
new file mode 100644
index 0000000..c48934c
--- /dev/null
+++ b/test/804-class-extends-itself/info.txt
@@ -0,0 +1 @@
+Exercise class linker check for classes extending themselves (b/28685551).
diff --git a/test/804-class-extends-itself/smali/Main.smali b/test/804-class-extends-itself/smali/Main.smali
new file mode 100644
index 0000000..5c349ed
--- /dev/null
+++ b/test/804-class-extends-itself/smali/Main.smali
@@ -0,0 +1,57 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# We cannot implement Main in Java, as this would require to run
+# dexmerger (to merge the Dex file produced from Smali code and the
+# Dex file produced from Java code), which loops indefinitely when
+# processing class B28685551, as this class inherits from itself.  As
+# a workaround, implement Main using Smali (we could also have used
+# multidex, but this requires a custom build script).
+
+.class public LMain;
+.super Ljava/lang/Object;
+
+.method public static main([Ljava/lang/String;)V
+    .registers 3
+    .param p0, "args"
+
+    invoke-static {}, LMain;->test()V
+    sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v1, "Done!"
+    invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+    return-void
+.end method
+
+.method static test()V
+    .registers 4
+
+    :try_start
+    const-string v2, "B28685551"
+    invoke-static {v2}, Ljava/lang/Class;->forName(Ljava/lang/String;)Ljava/lang/Class;
+    :try_end
+    .catch Ljava/lang/ClassCircularityError; {:try_start .. :try_end} :catch
+
+    move-result-object v0
+
+    :goto_7
+    return-void
+
+    :catch
+    move-exception v1
+    .local v1, "e":Ljava/lang/ClassCircularityError;
+    sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v3, "Caught ClassCircularityError"
+    invoke-virtual {v2, v3}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+    goto :goto_7
+.end method
diff --git a/test/804-class-extends-itself/smali/b_28685551.smali b/test/804-class-extends-itself/smali/b_28685551.smali
new file mode 100644
index 0000000..d98c6e3
--- /dev/null
+++ b/test/804-class-extends-itself/smali/b_28685551.smali
@@ -0,0 +1,18 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Regression test for a class inheriting from itself.
+
+.class public LB28685551;
+.super LB28685551;
diff --git a/test/976-conflict-no-methods/expected.txt b/test/976-conflict-no-methods/expected.txt
new file mode 100644
index 0000000..656dfc5
--- /dev/null
+++ b/test/976-conflict-no-methods/expected.txt
@@ -0,0 +1 @@
+Pass
diff --git a/test/976-conflict-no-methods/info.txt b/test/976-conflict-no-methods/info.txt
new file mode 100644
index 0000000..cdc3149
--- /dev/null
+++ b/test/976-conflict-no-methods/info.txt
@@ -0,0 +1 @@
+Regression test for classes that have conflict tables but no methods. b/28707801
\ No newline at end of file
diff --git a/test/976-conflict-no-methods/smali/Iface.smali b/test/976-conflict-no-methods/smali/Iface.smali
new file mode 100644
index 0000000..aa4ec37
--- /dev/null
+++ b/test/976-conflict-no-methods/smali/Iface.smali
@@ -0,0 +1,281 @@
+# /*
+#  * Copyright (C) 2016 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public interface Iface2 {
+#    public void abstractMethod0();
+#    public void abstractMethod1();
+#    public void abstractMethod2();
+#    public void abstractMethod3();
+#    public void abstractMethod4();
+#    public void abstractMethod5();
+#    public void abstractMethod6();
+#    public void abstractMethod7();
+#    public void abstractMethod8();
+#    public void abstractMethod9();
+#    public void abstractMethod10();
+#    public void abstractMethod11();
+#    public void abstractMethod12();
+#    public void abstractMethod13();
+#    public void abstractMethod14();
+#    public void abstractMethod15();
+#    public void abstractMethod16();
+#    public void abstractMethod17();
+#    public void abstractMethod18();
+#    public void abstractMethod19();
+#    public void abstractMethod20();
+#    public void abstractMethod21();
+#    public void abstractMethod22();
+#    public void abstractMethod23();
+#    public void abstractMethod24();
+#    public void abstractMethod25();
+#    public void abstractMethod26();
+#    public void abstractMethod27();
+#    public void abstractMethod28();
+#    public void abstractMethod29();
+#    public void abstractMethod30();
+#    public void abstractMethod31();
+#    public void abstractMethod32();
+#    public void abstractMethod33();
+#    public void abstractMethod34();
+#    public void abstractMethod35();
+#    public void abstractMethod36();
+#    public void abstractMethod37();
+#    public void abstractMethod38();
+#    public void abstractMethod39();
+#    public void abstractMethod40();
+#    public void abstractMethod41();
+#    public void abstractMethod42();
+#    public void abstractMethod43();
+#    public void abstractMethod44();
+#    public void abstractMethod45();
+#    public void abstractMethod46();
+#    public void abstractMethod47();
+#    public void abstractMethod48();
+#    public void abstractMethod49();
+#    public void abstractMethod50();
+#    public void abstractMethod51();
+#    public void abstractMethod52();
+#    public void abstractMethod53();
+#    public void abstractMethod54();
+#    public void abstractMethod55();
+#    public void abstractMethod56();
+#    public void abstractMethod57();
+#    public void abstractMethod58();
+#    public void abstractMethod59();
+#    public void abstractMethod60();
+#    public void abstractMethod61();
+#    public void abstractMethod62();
+#    public void abstractMethod63();
+#    public void abstractMethod64();
+# }
+
+.class public abstract interface LIface;
+.super Ljava/lang/Object;
+
+.method public abstract abstractMethod0()V
+.end method
+
+.method public abstract abstractMethod1()V
+.end method
+
+.method public abstract abstractMethod2()V
+.end method
+
+.method public abstract abstractMethod3()V
+.end method
+
+.method public abstract abstractMethod4()V
+.end method
+
+.method public abstract abstractMethod5()V
+.end method
+
+.method public abstract abstractMethod6()V
+.end method
+
+.method public abstract abstractMethod7()V
+.end method
+
+.method public abstract abstractMethod8()V
+.end method
+
+.method public abstract abstractMethod9()V
+.end method
+
+.method public abstract abstractMethod10()V
+.end method
+
+.method public abstract abstractMethod11()V
+.end method
+
+.method public abstract abstractMethod12()V
+.end method
+
+.method public abstract abstractMethod13()V
+.end method
+
+.method public abstract abstractMethod14()V
+.end method
+
+.method public abstract abstractMethod15()V
+.end method
+
+.method public abstract abstractMethod16()V
+.end method
+
+.method public abstract abstractMethod17()V
+.end method
+
+.method public abstract abstractMethod18()V
+.end method
+
+.method public abstract abstractMethod19()V
+.end method
+
+.method public abstract abstractMethod20()V
+.end method
+
+.method public abstract abstractMethod21()V
+.end method
+
+.method public abstract abstractMethod22()V
+.end method
+
+.method public abstract abstractMethod23()V
+.end method
+
+.method public abstract abstractMethod24()V
+.end method
+
+.method public abstract abstractMethod25()V
+.end method
+
+.method public abstract abstractMethod26()V
+.end method
+
+.method public abstract abstractMethod27()V
+.end method
+
+.method public abstract abstractMethod28()V
+.end method
+
+.method public abstract abstractMethod29()V
+.end method
+
+.method public abstract abstractMethod30()V
+.end method
+
+.method public abstract abstractMethod31()V
+.end method
+
+.method public abstract abstractMethod32()V
+.end method
+
+.method public abstract abstractMethod33()V
+.end method
+
+.method public abstract abstractMethod34()V
+.end method
+
+.method public abstract abstractMethod35()V
+.end method
+
+.method public abstract abstractMethod36()V
+.end method
+
+.method public abstract abstractMethod37()V
+.end method
+
+.method public abstract abstractMethod38()V
+.end method
+
+.method public abstract abstractMethod39()V
+.end method
+
+.method public abstract abstractMethod40()V
+.end method
+
+.method public abstract abstractMethod41()V
+.end method
+
+.method public abstract abstractMethod42()V
+.end method
+
+.method public abstract abstractMethod43()V
+.end method
+
+.method public abstract abstractMethod44()V
+.end method
+
+.method public abstract abstractMethod45()V
+.end method
+
+.method public abstract abstractMethod46()V
+.end method
+
+.method public abstract abstractMethod47()V
+.end method
+
+.method public abstract abstractMethod48()V
+.end method
+
+.method public abstract abstractMethod49()V
+.end method
+
+.method public abstract abstractMethod50()V
+.end method
+
+.method public abstract abstractMethod51()V
+.end method
+
+.method public abstract abstractMethod52()V
+.end method
+
+.method public abstract abstractMethod53()V
+.end method
+
+.method public abstract abstractMethod54()V
+.end method
+
+.method public abstract abstractMethod55()V
+.end method
+
+.method public abstract abstractMethod56()V
+.end method
+
+.method public abstract abstractMethod57()V
+.end method
+
+.method public abstract abstractMethod58()V
+.end method
+
+.method public abstract abstractMethod59()V
+.end method
+
+.method public abstract abstractMethod60()V
+.end method
+
+.method public abstract abstractMethod61()V
+.end method
+
+.method public abstract abstractMethod62()V
+.end method
+
+.method public abstract abstractMethod63()V
+.end method
+
+.method public abstract abstractMethod64()V
+.end method
diff --git a/test/976-conflict-no-methods/smali/Main.smali b/test/976-conflict-no-methods/smali/Main.smali
new file mode 100644
index 0000000..7dd1160
--- /dev/null
+++ b/test/976-conflict-no-methods/smali/Main.smali
@@ -0,0 +1,358 @@
+# /*
+#  * Copyright (C) 2016 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+.class public LMain;
+.super Ljava/lang/Object;
+.implements LIface;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public static main([Ljava/lang/String;)V
+    .locals 2
+    sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v1, "Pass"
+    invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    return-void
+.end method
+
+.method public abstractMethod0()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod1()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod2()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod3()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod4()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod5()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod6()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod7()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod8()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod9()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod10()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod11()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod12()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod13()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod14()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod15()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod16()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod17()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod18()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod19()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod20()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod21()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod22()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod23()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod24()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod25()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod26()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod27()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod28()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod29()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod30()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod31()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod32()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod33()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod34()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod35()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod36()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod37()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod38()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod39()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod40()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod41()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod42()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod43()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod44()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod45()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod46()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod47()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod48()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod49()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod50()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod51()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod52()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod53()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod54()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod55()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod56()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod57()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod58()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod59()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod60()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod61()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod62()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod63()V
+    .locals 0
+    return-void
+.end method
+
+.method public abstractMethod64()V
+    .locals 0
+    return-void
+.end method
diff --git a/test/976-conflict-no-methods/smali/NoMethods.smali b/test/976-conflict-no-methods/smali/NoMethods.smali
new file mode 100644
index 0000000..787e34a
--- /dev/null
+++ b/test/976-conflict-no-methods/smali/NoMethods.smali
@@ -0,0 +1,19 @@
+# /*
+#  * Copyright (C) 2016 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+
+.class public LNoMethods;
+.super LMain;
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index 21f8141..97204d3 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -34,6 +34,7 @@
   137-cfi/cfi.cc \
   139-register-natives/regnative.cc \
   141-class-unload/jni_unload.cc \
+  148-multithread-gc-annotations/gc_coverage.cc \
   454-get-vreg/get_vreg_jni.cc \
   457-regs/regs_jni.cc \
   461-get-reference-vreg/get_reference_vreg_jni.cc \
diff --git a/test/run-test b/test/run-test
index 047e3fb..2710ea3 100755
--- a/test/run-test
+++ b/test/run-test
@@ -122,10 +122,12 @@
 have_dex2oat="yes"
 have_patchoat="yes"
 have_image="yes"
-image_suffix=""
 pic_image_suffix=""
 multi_image_suffix=""
 android_root="/system"
+# By default we will use optimizing.
+image_args=""
+image_suffix="-optimizing"
 
 while true; do
     if [ "x$1" = "x--host" ]; then
@@ -249,18 +251,18 @@
         image_suffix="-interpreter"
         shift
     elif [ "x$1" = "x--jit" ]; then
-        run_args="${run_args} --jit"
+        image_args="--jit"
         image_suffix="-jit"
         shift
     elif [ "x$1" = "x--optimizing" ]; then
-        run_args="${run_args} -Xcompiler-option --compiler-backend=Optimizing"
+        image_args="-Xcompiler-option --compiler-backend=Optimizing"
         image_suffix="-optimizing"
         shift
     elif [ "x$1" = "x--no-verify" ]; then
         run_args="${run_args} --no-verify"
         shift
     elif [ "x$1" = "x--verify-soft-fail" ]; then
-        run_args="${run_args} --verify-soft-fail"
+        image_args="--verify-soft-fail"
         image_suffix="-interp-ac"
         shift
     elif [ "x$1" = "x--no-optimize" ]; then
@@ -349,6 +351,7 @@
     fi
 done
 
+run_args="${run_args} ${image_args}"
 # Allocate file descriptor real_stderr and redirect it to the shell's error
 # output (fd 2).
 if [ ${BASH_VERSINFO[1]} -ge 4 ] && [ ${BASH_VERSINFO[2]} -ge 1 ]; then
diff --git a/tools/ahat/test-dump/Main.java b/tools/ahat/test-dump/Main.java
index d61a98d..3936f29 100644
--- a/tools/ahat/test-dump/Main.java
+++ b/tools/ahat/test-dump/Main.java
@@ -50,7 +50,8 @@
         bigArray[i] = (byte)((i*i) & 0xFF);
       }
 
-      NativeAllocationRegistry registry = new NativeAllocationRegistry(0x12345, 42);
+      NativeAllocationRegistry registry = new NativeAllocationRegistry(
+          Main.class.getClassLoader(), 0x12345, 42);
       registry.registerNativeAllocation(anObject, 0xABCDABCD);
     }
   }
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index dd2cc31..bdcf86d 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -248,16 +248,15 @@
   names: [ "libcore.java.io.FileTest#testJavaIoTmpdirMutable" ]
 },
 {
+  description: "Investigate whether the test is making a wrong assumption with the newly enforced classpath.",
+  result: EXEC_FAILED,
+  names: ["dalvik.system.DexClassLoaderTest#testDexThenPathClassLoader"]
+},
+{
   description: "Made for extending, shouldn't be run",
   result: EXEC_FAILED,
   names: ["jsr166.CollectionTest#testEmptyMeansEmpty",
           "jsr166.Collection8Test#testForEach",
           "jsr166.Collection8Test#testForEachConcurrentStressTest"]
-},
-{
-  description: "Unclear why this started to fail",
-  result: EXEC_FAILED,
-  bug: 28574453,
-  names: [ "org.apache.harmony.tests.javax.security.cert.X509CertificateTest#testVerifyPublicKey" ]
 }
 ]
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index b6a19b7..976e1d8 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -24,7 +24,7 @@
 
 if [ ! -f $test_jack ]; then
   echo "Before running, you must build jdwp tests and vogar:" \
-       "make apache-harmony-jdwp-tests-hostdex vogar vogar.jar"
+       "make apache-harmony-jdwp-tests-hostdex vogar"
   exit 1
 fi
 
@@ -44,6 +44,8 @@
 # By default, we run the whole JDWP test suite.
 test="org.apache.harmony.jpda.tests.share.AllTests"
 host="no"
+# Use JIT compiling by default.
+use_jit=true
 
 while true; do
   if [[ "$1" == "--mode=host" ]]; then
@@ -62,6 +64,11 @@
   elif [[ $1 == -Ximage:* ]]; then
     image="$1"
     shift
+  elif [[ "$1" == "--no-jit" ]]; then
+    use_jit=false
+    # Remove the --no-jit from the arguments.
+    args=${args/$1}
+    shift
   elif [[ $1 == "--debug" ]]; then
     debug="yes"
     # Remove the --debug from the arguments.
@@ -90,8 +97,12 @@
 if [[ "$image" != "" ]]; then
   vm_args="--vm-arg $image"
 fi
-vm_args="$vm_args --vm-arg -Xusejit:true"
-debuggee_args="$debuggee_args -Xusejit:true"
+if $use_jit; then
+  vm_args="$vm_args --vm-arg -Xcompiler-option --vm-arg --compiler-filter=interpret-only"
+  debuggee_args="$debuggee_args -Xcompiler-option --compiler-filter=interpret-only"
+fi
+vm_args="$vm_args --vm-arg -Xusejit:$use_jit"
+debuggee_args="$debuggee_args -Xusejit:$use_jit"
 if [[ $debug == "yes" ]]; then
   art="$art -d"
   art_debugee="$art_debugee -d"
@@ -111,7 +122,6 @@
       $image_compiler_option \
       --timeout 800 \
       --vm-arg -Djpda.settings.verbose=true \
-      --vm-arg -Djpda.settings.syncPort=34016 \
       --vm-arg -Djpda.settings.transportAddress=127.0.0.1:55107 \
       --vm-arg -Djpda.settings.debuggeeJavaPath="$art_debugee $image $debuggee_args" \
       --classpath $test_jack \
diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh
index 00bb3c5..3e2a512 100755
--- a/tools/run-libcore-tests.sh
+++ b/tools/run-libcore-tests.sh
@@ -28,7 +28,7 @@
 
 if [ ! -f $test_jack ]; then
   echo "Before running, you must build core-tests, jsr166-tests and vogar: \
-        make core-tests jsr166-tests vogar vogar.jar"
+        make core-tests jsr166-tests vogar"
   exit 1
 fi
 
@@ -43,6 +43,9 @@
   emulator="yes"
 fi
 
+# Use JIT compiling by default.
+use_jit=true
+
 # Packages that currently work correctly with the expectation files.
 working_packages=("dalvik.system"
                   "libcore.icu"
@@ -91,6 +94,11 @@
     # classpath/resources differences when compiling the boot image.
     vogar_args="$vogar_args --vm-arg -Ximage:/non/existent/vogar.art"
     shift
+  elif [[ "$1" == "--no-jit" ]]; then
+    # Remove the --no-jit from the arguments.
+    vogar_args=${vogar_args/$1}
+    use_jit=false
+    shift
   elif [[ "$1" == "--debug" ]]; then
     # Remove the --debug from the arguments.
     vogar_args=${vogar_args/$1}
@@ -111,7 +119,13 @@
 # Use Jack with "1.8" configuration.
 vogar_args="$vogar_args --toolchain jack --language JN"
 
+# JIT settings.
+if $use_jit; then
+  vogar_args="$vogar_args --vm-arg -Xcompiler-option --vm-arg --compiler-filter=interpret-only"
+fi
+vogar_args="$vogar_args --vm-arg -Xusejit:$use_jit"
+
 # Run the tests using vogar.
 echo "Running tests for the following test packages:"
 echo ${working_packages[@]} | tr " " "\n"
-vogar $vogar_args --vm-arg -Xusejit:true $expectations --classpath $jsr166_test_jack --classpath $test_jack ${working_packages[@]}
+vogar $vogar_args $expectations --classpath $jsr166_test_jack --classpath $test_jack ${working_packages[@]}