Merge "Remove segfault."
diff --git a/Android.mk b/Android.mk
index 7beb30f..803ba50 100644
--- a/Android.mk
+++ b/Android.mk
@@ -123,7 +123,7 @@
 ifeq ($(ART_TEST_ANDROID_ROOT),)
 test-art-target-sync: $(TEST_ART_TARGET_SYNC_DEPS)
 	$(TEST_ART_ADB_ROOT_AND_REMOUNT)
-	adb sync
+	adb sync system && adb sync data
 else
 test-art-target-sync: $(TEST_ART_TARGET_SYNC_DEPS)
 	$(TEST_ART_ADB_ROOT_AND_REMOUNT)
diff --git a/benchmark/Android.bp b/benchmark/Android.bp
index e784508..d0dfec9 100644
--- a/benchmark/Android.bp
+++ b/benchmark/Android.bp
@@ -49,7 +49,7 @@
     name: "libartbenchmark-micronative-host",
     host_supported: true,
     device_supported: false,
-    defaults: ["art_defaults", "art_debug_defaults"],
+    defaults: ["art_debug_defaults", "art_defaults" ],
     srcs: [
         "jni_loader.cc",
         "micro-native/micro_native.cc",
diff --git a/build/Android.bp b/build/Android.bp
index 6c9f1d4..c54f436 100644
--- a/build/Android.bp
+++ b/build/Android.bp
@@ -64,12 +64,6 @@
         "-Wno-constant-conversion",
         "-Wno-undefined-var-template",
 
-        "-DART_STACK_OVERFLOW_GAP_arm=8192",
-        "-DART_STACK_OVERFLOW_GAP_arm64=8192",
-        "-DART_STACK_OVERFLOW_GAP_mips=16384",
-        "-DART_STACK_OVERFLOW_GAP_mips64=16384",
-        "-DART_STACK_OVERFLOW_GAP_x86=8192",
-        "-DART_STACK_OVERFLOW_GAP_x86_64=8192",
         // Enable thread annotations for std::mutex, etc.
         "-D_LIBCPP_ENABLE_THREAD_SAFETY_ANNOTATIONS",
     ],
diff --git a/build/art.go b/build/art.go
index 61a9759..6dca793 100644
--- a/build/art.go
+++ b/build/art.go
@@ -76,6 +76,14 @@
 		asflags = append(asflags, "-DART_USE_OLD_ARM_BACKEND=1")
 	}
 
+	cflags = append(cflags,
+			"-DART_STACK_OVERFLOW_GAP_arm=8192",
+			"-DART_STACK_OVERFLOW_GAP_arm64=8192",
+			"-DART_STACK_OVERFLOW_GAP_mips=16384",
+			"-DART_STACK_OVERFLOW_GAP_mips64=16384",
+			"-DART_STACK_OVERFLOW_GAP_x86=8192",
+			"-DART_STACK_OVERFLOW_GAP_x86_64=8192")
+
 	return cflags, asflags
 }
 
diff --git a/compiler/Android.bp b/compiler/Android.bp
index df896dc..307a42c 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -67,6 +67,7 @@
         "optimizing/intrinsics.cc",
         "optimizing/licm.cc",
         "optimizing/linear_order.cc",
+        "optimizing/load_store_analysis.cc",
         "optimizing/load_store_elimination.cc",
         "optimizing/locations.cc",
         "optimizing/loop_optimization.cc",
@@ -374,6 +375,7 @@
 
         "jni/jni_cfi_test.cc",
         "optimizing/codegen_test.cc",
+        "optimizing/load_store_analysis_test.cc",
         "optimizing/optimizing_cfi_test.cc",
         "optimizing/scheduler_test.cc",
     ],
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 912c964..0ca23a5 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -119,7 +119,7 @@
   // choose to squeeze the Type into fewer than 8 bits, we'll have to declare
   // patch_type_ as an uintN_t and do explicit static_cast<>s.
   enum class Type : uint8_t {
-    kMethod,
+    kMethodRelative,          // NOTE: Actual patching is instruction_set-dependent.
     kCall,
     kCallRelative,            // NOTE: Actual patching is instruction_set-dependent.
     kTypeRelative,            // NOTE: Actual patching is instruction_set-dependent.
@@ -130,11 +130,13 @@
     kBakerReadBarrierBranch,  // NOTE: Actual patching is instruction_set-dependent.
   };
 
-  static LinkerPatch MethodPatch(size_t literal_offset,
-                                 const DexFile* target_dex_file,
-                                 uint32_t target_method_idx) {
-    LinkerPatch patch(literal_offset, Type::kMethod, target_dex_file);
+  static LinkerPatch RelativeMethodPatch(size_t literal_offset,
+                                         const DexFile* target_dex_file,
+                                         uint32_t pc_insn_offset,
+                                         uint32_t target_method_idx) {
+    LinkerPatch patch(literal_offset, Type::kMethodRelative, target_dex_file);
     patch.method_idx_ = target_method_idx;
+    patch.pc_insn_offset_ = pc_insn_offset;
     return patch;
   }
 
@@ -226,6 +228,7 @@
 
   bool IsPcRelative() const {
     switch (GetType()) {
+      case Type::kMethodRelative:
       case Type::kCallRelative:
       case Type::kTypeRelative:
       case Type::kTypeBssEntry:
@@ -240,7 +243,7 @@
   }
 
   MethodReference TargetMethod() const {
-    DCHECK(patch_type_ == Type::kMethod ||
+    DCHECK(patch_type_ == Type::kMethodRelative ||
            patch_type_ == Type::kCall ||
            patch_type_ == Type::kCallRelative);
     return MethodReference(target_dex_file_, method_idx_);
@@ -281,7 +284,8 @@
   }
 
   uint32_t PcInsnOffset() const {
-    DCHECK(patch_type_ == Type::kTypeRelative ||
+    DCHECK(patch_type_ == Type::kMethodRelative ||
+           patch_type_ == Type::kTypeRelative ||
            patch_type_ == Type::kTypeBssEntry ||
            patch_type_ == Type::kStringRelative ||
            patch_type_ == Type::kStringBssEntry ||
diff --git a/compiler/compiled_method_test.cc b/compiler/compiled_method_test.cc
index 81a2cde..72b2282 100644
--- a/compiler/compiled_method_test.cc
+++ b/compiler/compiled_method_test.cc
@@ -50,10 +50,14 @@
   const DexFile* dex_file1 = reinterpret_cast<const DexFile*>(1);
   const DexFile* dex_file2 = reinterpret_cast<const DexFile*>(2);
   LinkerPatch patches[] = {
-      LinkerPatch::MethodPatch(16u, dex_file1, 1000u),
-      LinkerPatch::MethodPatch(16u, dex_file1, 1001u),
-      LinkerPatch::MethodPatch(16u, dex_file2, 1000u),
-      LinkerPatch::MethodPatch(16u, dex_file2, 1001u),  // Index 3.
+      LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3000u, 1000u),
+      LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3001u, 1000u),
+      LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3000u, 1001u),
+      LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3001u, 1001u),  // Index 3.
+      LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3000u, 1000u),
+      LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3001u, 1000u),
+      LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3000u, 1001u),
+      LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3001u, 1001u),
       LinkerPatch::CodePatch(16u, dex_file1, 1000u),
       LinkerPatch::CodePatch(16u, dex_file1, 1001u),
       LinkerPatch::CodePatch(16u, dex_file2, 1000u),
@@ -107,10 +111,14 @@
       LinkerPatch::BakerReadBarrierBranchPatch(16u, 1u, 0u),
       LinkerPatch::BakerReadBarrierBranchPatch(16u, 1u, 1u),
 
-      LinkerPatch::MethodPatch(32u, dex_file1, 1000u),
-      LinkerPatch::MethodPatch(32u, dex_file1, 1001u),
-      LinkerPatch::MethodPatch(32u, dex_file2, 1000u),
-      LinkerPatch::MethodPatch(32u, dex_file2, 1001u),  // Index 3.
+      LinkerPatch::RelativeMethodPatch(32u, dex_file1, 3000u, 1000u),
+      LinkerPatch::RelativeMethodPatch(32u, dex_file1, 3001u, 1000u),
+      LinkerPatch::RelativeMethodPatch(32u, dex_file1, 3000u, 1001u),
+      LinkerPatch::RelativeMethodPatch(32u, dex_file1, 3001u, 1001u),
+      LinkerPatch::RelativeMethodPatch(32u, dex_file2, 3000u, 1000u),
+      LinkerPatch::RelativeMethodPatch(32u, dex_file2, 3001u, 1000u),
+      LinkerPatch::RelativeMethodPatch(32u, dex_file2, 3000u, 1001u),
+      LinkerPatch::RelativeMethodPatch(32u, dex_file2, 3001u, 1001u),
       LinkerPatch::CodePatch(32u, dex_file1, 1000u),
       LinkerPatch::CodePatch(32u, dex_file1, 1001u),
       LinkerPatch::CodePatch(32u, dex_file2, 1000u),
@@ -164,7 +172,7 @@
       LinkerPatch::BakerReadBarrierBranchPatch(32u, 1u, 0u),
       LinkerPatch::BakerReadBarrierBranchPatch(32u, 1u, 1u),
 
-      LinkerPatch::MethodPatch(16u, dex_file2, 1001u),  // identical with patch at index 3.
+      LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3001u, 1001u),  // Same as patch at index 3.
   };
   constexpr size_t last_index = arraysize(patches) - 1u;
 
diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc
index 6572d17..bbd28b2 100644
--- a/compiler/driver/compiled_method_storage_test.cc
+++ b/compiler/driver/compiled_method_storage_test.cc
@@ -71,11 +71,11 @@
   };
   const LinkerPatch raw_patches1[] = {
       LinkerPatch::CodePatch(0u, nullptr, 1u),
-      LinkerPatch::MethodPatch(4u, nullptr, 1u),
+      LinkerPatch::RelativeMethodPatch(4u, nullptr, 0u, 1u),
   };
   const LinkerPatch raw_patches2[] = {
       LinkerPatch::CodePatch(0u, nullptr, 1u),
-      LinkerPatch::MethodPatch(4u, nullptr, 2u),
+      LinkerPatch::RelativeMethodPatch(4u, nullptr, 0u, 2u),
   };
   ArrayRef<const LinkerPatch> patches[] = {
       ArrayRef<const LinkerPatch>(raw_patches1),
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index c033c2d..117684a 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -54,11 +54,11 @@
 
 inline bool IsAdrpPatch(const LinkerPatch& patch) {
   switch (patch.GetType()) {
-    case LinkerPatch::Type::kMethod:
     case LinkerPatch::Type::kCall:
     case LinkerPatch::Type::kCallRelative:
     case LinkerPatch::Type::kBakerReadBarrierBranch:
       return false;
+    case LinkerPatch::Type::kMethodRelative:
     case LinkerPatch::Type::kTypeRelative:
     case LinkerPatch::Type::kTypeBssEntry:
     case LinkerPatch::Type::kStringRelative:
@@ -250,11 +250,13 @@
     if ((insn & 0xfffffc00) == 0x91000000) {
       // ADD immediate, 64-bit with imm12 == 0 (unset).
       if (!kEmitCompilerReadBarrier) {
-        DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative ||
+        DCHECK(patch.GetType() == LinkerPatch::Type::kMethodRelative ||
+               patch.GetType() == LinkerPatch::Type::kStringRelative ||
                patch.GetType() == LinkerPatch::Type::kTypeRelative) << patch.GetType();
       } else {
         // With the read barrier (non-Baker) enabled, it could be kStringBssEntry or kTypeBssEntry.
-        DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative ||
+        DCHECK(patch.GetType() == LinkerPatch::Type::kMethodRelative ||
+               patch.GetType() == LinkerPatch::Type::kStringRelative ||
                patch.GetType() == LinkerPatch::Type::kTypeRelative ||
                patch.GetType() == LinkerPatch::Type::kStringBssEntry ||
                patch.GetType() == LinkerPatch::Type::kTypeBssEntry) << patch.GetType();
@@ -565,10 +567,10 @@
       return false;
     }
 
-    // And since LinkerPatch::Type::kStringRelative is using the result of the ADRP
-    // for an ADD immediate, check for that as well. We generalize a bit to include
-    // ADD/ADDS/SUB/SUBS immediate that either uses the ADRP destination or stores
-    // the result to a different register.
+    // And since LinkerPatch::Type::k{Method,Type,String}Relative is using the result
+    // of the ADRP for an ADD immediate, check for that as well. We generalize a bit
+    // to include ADD/ADDS/SUB/SUBS immediate that either uses the ADRP destination
+    // or stores the result to a different register.
     if ((next_insn & 0x1f000000) == 0x11000000 &&
         ((((next_insn >> 5) ^ adrp) & 0x1f) == 0 || ((next_insn ^ adrp) & 0x1f) != 0)) {
       return false;
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 5091c0b..bc411df 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -1331,9 +1331,12 @@
                 PatchCodeAddress(&patched_code_, literal_offset, target_offset);
                 break;
               }
-              case LinkerPatch::Type::kMethod: {
-                ArtMethod* method = GetTargetMethod(patch);
-                PatchMethodAddress(&patched_code_, literal_offset, method);
+              case LinkerPatch::Type::kMethodRelative: {
+                uint32_t target_offset = GetTargetMethodOffset(GetTargetMethod(patch));
+                writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_,
+                                                                     patch,
+                                                                     offset_ + literal_offset,
+                                                                     target_offset);
                 break;
               }
               case LinkerPatch::Type::kBakerReadBarrierBranch: {
@@ -1457,6 +1460,15 @@
     }
   }
 
+  uint32_t GetTargetMethodOffset(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_) {
+    DCHECK(writer_->HasBootImage());
+    method = writer_->image_writer_->GetImageMethodAddress(method);
+    size_t oat_index = writer_->image_writer_->GetOatIndexForDexFile(dex_file_);
+    uintptr_t oat_data_begin = writer_->image_writer_->GetOatDataBegin(oat_index);
+    // TODO: Clean up offset types. The target offset must be treated as signed.
+    return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(method) - oat_data_begin);
+  }
+
   uint32_t GetTargetObjectOffset(mirror::Object* object) REQUIRES_SHARED(Locks::mutator_lock_) {
     DCHECK(writer_->HasBootImage());
     object = writer_->image_writer_->GetImageAddress(object);
@@ -1486,34 +1498,6 @@
     data[3] = (address >> 24) & 0xffu;
   }
 
-  void PatchMethodAddress(std::vector<uint8_t>* code, uint32_t offset, ArtMethod* method)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    if (writer_->HasBootImage()) {
-      method = writer_->image_writer_->GetImageMethodAddress(method);
-    } else if (kIsDebugBuild) {
-      // NOTE: We're using linker patches for app->boot references when the image can
-      // be relocated and therefore we need to emit .oat_patches. We're not using this
-      // for app->app references, so check that the method is an image method.
-      std::vector<gc::space::ImageSpace*> image_spaces =
-          Runtime::Current()->GetHeap()->GetBootImageSpaces();
-      bool contains_method = false;
-      for (gc::space::ImageSpace* image_space : image_spaces) {
-        size_t method_offset = reinterpret_cast<const uint8_t*>(method) - image_space->Begin();
-        contains_method |=
-            image_space->GetImageHeader().GetMethodsSection().Contains(method_offset);
-      }
-      CHECK(contains_method);
-    }
-    // Note: We only patch targeting ArtMethods in image which is in the low 4gb.
-    uint32_t address = PointerToLowMemUInt32(method);
-    DCHECK_LE(offset + 4, code->size());
-    uint8_t* data = &(*code)[offset];
-    data[0] = address & 0xffu;
-    data[1] = (address >> 8) & 0xffu;
-    data[2] = (address >> 16) & 0xffu;
-    data[3] = (address >> 24) & 0xffu;
-  }
-
   void PatchCodeAddress(std::vector<uint8_t>* code, uint32_t offset, uint32_t target_offset)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     uint32_t address = target_offset;
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index cb6e14b..a949c33 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -43,7 +43,7 @@
   void RunBCE() {
     graph_->BuildDominatorTree();
 
-    InstructionSimplifier(graph_, /* codegen */ nullptr).Run();
+    InstructionSimplifier(graph_, /* codegen */ nullptr, /* driver */ nullptr).Run();
 
     SideEffectsAnalysis side_effects(graph_);
     side_effects.Run();
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 713d370..c66bd77 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1656,6 +1656,34 @@
   }
 }
 
+static int64_t AdjustConstantForCondition(int64_t value,
+                                          IfCondition* condition,
+                                          IfCondition* opposite) {
+  if (value == 1) {
+    if (*condition == kCondB) {
+      value = 0;
+      *condition = kCondEQ;
+      *opposite = kCondNE;
+    } else if (*condition == kCondAE) {
+      value = 0;
+      *condition = kCondNE;
+      *opposite = kCondEQ;
+    }
+  } else if (value == -1) {
+    if (*condition == kCondGT) {
+      value = 0;
+      *condition = kCondGE;
+      *opposite = kCondLT;
+    } else if (*condition == kCondLE) {
+      value = 0;
+      *condition = kCondLT;
+      *opposite = kCondGE;
+    }
+  }
+
+  return value;
+}
+
 static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* condition,
                                                                 bool invert,
                                                                 CodeGeneratorARM* codegen) {
@@ -1669,7 +1697,7 @@
     std::swap(cond, opposite);
   }
 
-  std::pair<Condition, Condition> ret;
+  std::pair<Condition, Condition> ret(EQ, NE);
   const Location left = locations->InAt(0);
   const Location right = locations->InAt(1);
 
@@ -1677,7 +1705,38 @@
 
   const Register left_high = left.AsRegisterPairHigh<Register>();
   const Register left_low = left.AsRegisterPairLow<Register>();
-  int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
+  int64_t value = AdjustConstantForCondition(right.GetConstant()->AsLongConstant()->GetValue(),
+                                             &cond,
+                                             &opposite);
+
+  // Comparisons against 0 are common enough to deserve special attention.
+  if (value == 0) {
+    switch (cond) {
+      case kCondNE:
+      // x > 0 iff x != 0 when the comparison is unsigned.
+      case kCondA:
+        ret = std::make_pair(NE, EQ);
+        FALLTHROUGH_INTENDED;
+      case kCondEQ:
+      // x <= 0 iff x == 0 when the comparison is unsigned.
+      case kCondBE:
+        __ orrs(IP, left_low, ShifterOperand(left_high));
+        return ret;
+      case kCondLT:
+      case kCondGE:
+        __ cmp(left_high, ShifterOperand(0));
+        return std::make_pair(ARMCondition(cond), ARMCondition(opposite));
+      // Trivially true or false.
+      case kCondB:
+        ret = std::make_pair(NE, EQ);
+        FALLTHROUGH_INTENDED;
+      case kCondAE:
+        __ cmp(left_low, ShifterOperand(left_low));
+        return ret;
+      default:
+        break;
+    }
+  }
 
   switch (cond) {
     case kCondEQ:
@@ -1837,10 +1896,14 @@
 static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) {
   if (condition->GetLeft()->GetType() == Primitive::kPrimLong) {
     const LocationSummary* const locations = condition->GetLocations();
-    const IfCondition c = condition->GetCondition();
 
     if (locations->InAt(1).IsConstant()) {
-      const int64_t value = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue();
+      IfCondition c = condition->GetCondition();
+      IfCondition opposite = condition->GetOppositeCondition();
+      const int64_t value = AdjustConstantForCondition(
+          Int64FromConstant(locations->InAt(1).GetConstant()),
+          &c,
+          &opposite);
       ShifterOperand so;
 
       if (c < kCondLT || c > kCondGE) {
@@ -1848,9 +1911,11 @@
         // we check that the least significant half of the first input to be compared
         // is in a low register (the other half is read outside an IT block), and
         // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP
-        // encoding can be used.
-        if (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) ||
-            !IsUint<8>(Low32Bits(value))) {
+        // encoding can be used; 0 is always handled, no matter what registers are
+        // used by the first input.
+        if (value != 0 &&
+            (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) ||
+             !IsUint<8>(Low32Bits(value)))) {
           return false;
         }
       } else if (c == kCondLE || c == kCondGT) {
@@ -1877,6 +1942,329 @@
   return true;
 }
 
+static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARM* codegen) {
+  DCHECK(CanGenerateTest(cond, codegen->GetAssembler()));
+
+  const Register out = cond->GetLocations()->Out().AsRegister<Register>();
+  const auto condition = GenerateTest(cond, false, codegen);
+
+  __ mov(out, ShifterOperand(0), AL, kCcKeep);
+
+  if (ArmAssembler::IsLowRegister(out)) {
+    __ it(condition.first);
+    __ mov(out, ShifterOperand(1), condition.first);
+  } else {
+    Label done_label;
+    Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
+
+    __ b(final_label, condition.second);
+    __ LoadImmediate(out, 1);
+
+    if (done_label.IsLinked()) {
+      __ Bind(&done_label);
+    }
+  }
+}
+
+static void GenerateEqualLong(HCondition* cond, CodeGeneratorARM* codegen) {
+  DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong);
+
+  const LocationSummary* const locations = cond->GetLocations();
+  IfCondition condition = cond->GetCondition();
+  const Register out = locations->Out().AsRegister<Register>();
+  const Location left = locations->InAt(0);
+  const Location right = locations->InAt(1);
+  Register left_high = left.AsRegisterPairHigh<Register>();
+  Register left_low = left.AsRegisterPairLow<Register>();
+
+  if (right.IsConstant()) {
+    IfCondition opposite = cond->GetOppositeCondition();
+    const int64_t value = AdjustConstantForCondition(Int64FromConstant(right.GetConstant()),
+                                                     &condition,
+                                                     &opposite);
+    int32_t value_high = -High32Bits(value);
+    int32_t value_low = -Low32Bits(value);
+
+    // The output uses Location::kNoOutputOverlap.
+    if (out == left_high) {
+      std::swap(left_low, left_high);
+      std::swap(value_low, value_high);
+    }
+
+    __ AddConstant(out, left_low, value_low);
+    __ AddConstant(IP, left_high, value_high);
+  } else {
+    DCHECK(right.IsRegisterPair());
+    __ sub(IP, left_high, ShifterOperand(right.AsRegisterPairHigh<Register>()));
+    __ sub(out, left_low, ShifterOperand(right.AsRegisterPairLow<Register>()));
+  }
+
+  // Need to check after calling AdjustConstantForCondition().
+  DCHECK(condition == kCondEQ || condition == kCondNE) << condition;
+
+  if (condition == kCondNE && ArmAssembler::IsLowRegister(out)) {
+    __ orrs(out, out, ShifterOperand(IP));
+    __ it(NE);
+    __ mov(out, ShifterOperand(1), NE);
+  } else {
+    __ orr(out, out, ShifterOperand(IP));
+    codegen->GenerateConditionWithZero(condition, out, out, IP);
+  }
+}
+
+static void GenerateLongComparesAndJumps(HCondition* cond,
+                                         Label* true_label,
+                                         Label* false_label,
+                                         CodeGeneratorARM* codegen) {
+  LocationSummary* locations = cond->GetLocations();
+  Location left = locations->InAt(0);
+  Location right = locations->InAt(1);
+  IfCondition if_cond = cond->GetCondition();
+
+  Register left_high = left.AsRegisterPairHigh<Register>();
+  Register left_low = left.AsRegisterPairLow<Register>();
+  IfCondition true_high_cond = if_cond;
+  IfCondition false_high_cond = cond->GetOppositeCondition();
+  Condition final_condition = ARMUnsignedCondition(if_cond);  // unsigned on lower part
+
+  // Set the conditions for the test, remembering that == needs to be
+  // decided using the low words.
+  switch (if_cond) {
+    case kCondEQ:
+    case kCondNE:
+      // Nothing to do.
+      break;
+    case kCondLT:
+      false_high_cond = kCondGT;
+      break;
+    case kCondLE:
+      true_high_cond = kCondLT;
+      break;
+    case kCondGT:
+      false_high_cond = kCondLT;
+      break;
+    case kCondGE:
+      true_high_cond = kCondGT;
+      break;
+    case kCondB:
+      false_high_cond = kCondA;
+      break;
+    case kCondBE:
+      true_high_cond = kCondB;
+      break;
+    case kCondA:
+      false_high_cond = kCondB;
+      break;
+    case kCondAE:
+      true_high_cond = kCondA;
+      break;
+  }
+  if (right.IsConstant()) {
+    int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
+    int32_t val_low = Low32Bits(value);
+    int32_t val_high = High32Bits(value);
+
+    __ CmpConstant(left_high, val_high);
+    if (if_cond == kCondNE) {
+      __ b(true_label, ARMCondition(true_high_cond));
+    } else if (if_cond == kCondEQ) {
+      __ b(false_label, ARMCondition(false_high_cond));
+    } else {
+      __ b(true_label, ARMCondition(true_high_cond));
+      __ b(false_label, ARMCondition(false_high_cond));
+    }
+    // Must be equal high, so compare the lows.
+    __ CmpConstant(left_low, val_low);
+  } else {
+    Register right_high = right.AsRegisterPairHigh<Register>();
+    Register right_low = right.AsRegisterPairLow<Register>();
+
+    __ cmp(left_high, ShifterOperand(right_high));
+    if (if_cond == kCondNE) {
+      __ b(true_label, ARMCondition(true_high_cond));
+    } else if (if_cond == kCondEQ) {
+      __ b(false_label, ARMCondition(false_high_cond));
+    } else {
+      __ b(true_label, ARMCondition(true_high_cond));
+      __ b(false_label, ARMCondition(false_high_cond));
+    }
+    // Must be equal high, so compare the lows.
+    __ cmp(left_low, ShifterOperand(right_low));
+  }
+  // The last comparison might be unsigned.
+  // TODO: optimize cases where this is always true/false
+  __ b(true_label, final_condition);
+}
+
+static void GenerateConditionLong(HCondition* cond, CodeGeneratorARM* codegen) {
+  DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong);
+
+  const LocationSummary* const locations = cond->GetLocations();
+  IfCondition condition = cond->GetCondition();
+  const Register out = locations->Out().AsRegister<Register>();
+  const Location left = locations->InAt(0);
+  const Location right = locations->InAt(1);
+
+  if (right.IsConstant()) {
+    IfCondition opposite = cond->GetOppositeCondition();
+
+    // Comparisons against 0 are common enough to deserve special attention.
+    if (AdjustConstantForCondition(Int64FromConstant(right.GetConstant()),
+                                   &condition,
+                                   &opposite) == 0) {
+      switch (condition) {
+        case kCondNE:
+        case kCondA:
+          if (ArmAssembler::IsLowRegister(out)) {
+            // We only care if both input registers are 0 or not.
+            __ orrs(out,
+                    left.AsRegisterPairLow<Register>(),
+                    ShifterOperand(left.AsRegisterPairHigh<Register>()));
+            __ it(NE);
+            __ mov(out, ShifterOperand(1), NE);
+            return;
+          }
+
+          FALLTHROUGH_INTENDED;
+        case kCondEQ:
+        case kCondBE:
+          // We only care if both input registers are 0 or not.
+          __ orr(out,
+                 left.AsRegisterPairLow<Register>(),
+                 ShifterOperand(left.AsRegisterPairHigh<Register>()));
+          codegen->GenerateConditionWithZero(condition, out, out);
+          return;
+        case kCondLT:
+        case kCondGE:
+          // We only care about the sign bit.
+          FALLTHROUGH_INTENDED;
+        case kCondAE:
+        case kCondB:
+          codegen->GenerateConditionWithZero(condition, out, left.AsRegisterPairHigh<Register>());
+          return;
+        case kCondLE:
+        case kCondGT:
+        default:
+          break;
+      }
+    }
+  }
+
+  if ((condition == kCondEQ || condition == kCondNE) &&
+      // If `out` is a low register, then the GenerateConditionGeneric()
+      // function generates a shorter code sequence that is still branchless.
+      (!ArmAssembler::IsLowRegister(out) || !CanGenerateTest(cond, codegen->GetAssembler()))) {
+    GenerateEqualLong(cond, codegen);
+    return;
+  }
+
+  if (CanGenerateTest(cond, codegen->GetAssembler())) {
+    GenerateConditionGeneric(cond, codegen);
+    return;
+  }
+
+  // Convert the jumps into the result.
+  Label done_label;
+  Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
+  Label true_label, false_label;
+
+  GenerateLongComparesAndJumps(cond, &true_label, &false_label, codegen);
+
+  // False case: result = 0.
+  __ Bind(&false_label);
+  __ mov(out, ShifterOperand(0));
+  __ b(final_label);
+
+  // True case: result = 1.
+  __ Bind(&true_label);
+  __ mov(out, ShifterOperand(1));
+
+  if (done_label.IsLinked()) {
+    __ Bind(&done_label);
+  }
+}
+
+static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond, CodeGeneratorARM* codegen) {
+  const Primitive::Type type = cond->GetLeft()->GetType();
+
+  DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+
+  if (type == Primitive::kPrimLong) {
+    GenerateConditionLong(cond, codegen);
+    return;
+  }
+
+  const LocationSummary* const locations = cond->GetLocations();
+  IfCondition condition = cond->GetCondition();
+  Register in = locations->InAt(0).AsRegister<Register>();
+  const Register out = locations->Out().AsRegister<Register>();
+  const Location right = cond->GetLocations()->InAt(1);
+  int64_t value;
+
+  if (right.IsConstant()) {
+    IfCondition opposite = cond->GetOppositeCondition();
+
+    value = AdjustConstantForCondition(Int64FromConstant(right.GetConstant()),
+                                       &condition,
+                                       &opposite);
+
+    // Comparisons against 0 are common enough to deserve special attention.
+    if (value == 0) {
+      switch (condition) {
+        case kCondNE:
+        case kCondA:
+          if (ArmAssembler::IsLowRegister(out) && out == in) {
+            __ cmp(out, ShifterOperand(0));
+            __ it(NE);
+            __ mov(out, ShifterOperand(1), NE);
+            return;
+          }
+
+          FALLTHROUGH_INTENDED;
+        case kCondEQ:
+        case kCondBE:
+        case kCondLT:
+        case kCondGE:
+        case kCondAE:
+        case kCondB:
+          codegen->GenerateConditionWithZero(condition, out, in);
+          return;
+        case kCondLE:
+        case kCondGT:
+        default:
+          break;
+      }
+    }
+  }
+
+  if (condition == kCondEQ || condition == kCondNE) {
+    ShifterOperand operand;
+
+    if (right.IsConstant()) {
+      operand = ShifterOperand(value);
+    } else if (out == right.AsRegister<Register>()) {
+      // Avoid 32-bit instructions if possible.
+      operand = ShifterOperand(in);
+      in = right.AsRegister<Register>();
+    } else {
+      operand = ShifterOperand(right.AsRegister<Register>());
+    }
+
+    if (condition == kCondNE && ArmAssembler::IsLowRegister(out)) {
+      __ subs(out, in, operand);
+      __ it(NE);
+      __ mov(out, ShifterOperand(1), NE);
+    } else {
+      __ sub(out, in, operand);
+      codegen->GenerateConditionWithZero(condition, out, out);
+    }
+
+    return;
+  }
+
+  GenerateConditionGeneric(cond, codegen);
+}
+
 static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) {
   const Primitive::Type type = constant->GetType();
   bool ret = false;
@@ -2009,9 +2397,10 @@
       uint32_literals_(std::less<uint32_t>(),
                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
@@ -2479,89 +2868,6 @@
 void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 }
 
-void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
-                                                               Label* true_label,
-                                                               Label* false_label) {
-  LocationSummary* locations = cond->GetLocations();
-  Location left = locations->InAt(0);
-  Location right = locations->InAt(1);
-  IfCondition if_cond = cond->GetCondition();
-
-  Register left_high = left.AsRegisterPairHigh<Register>();
-  Register left_low = left.AsRegisterPairLow<Register>();
-  IfCondition true_high_cond = if_cond;
-  IfCondition false_high_cond = cond->GetOppositeCondition();
-  Condition final_condition = ARMUnsignedCondition(if_cond);  // unsigned on lower part
-
-  // Set the conditions for the test, remembering that == needs to be
-  // decided using the low words.
-  switch (if_cond) {
-    case kCondEQ:
-    case kCondNE:
-      // Nothing to do.
-      break;
-    case kCondLT:
-      false_high_cond = kCondGT;
-      break;
-    case kCondLE:
-      true_high_cond = kCondLT;
-      break;
-    case kCondGT:
-      false_high_cond = kCondLT;
-      break;
-    case kCondGE:
-      true_high_cond = kCondGT;
-      break;
-    case kCondB:
-      false_high_cond = kCondA;
-      break;
-    case kCondBE:
-      true_high_cond = kCondB;
-      break;
-    case kCondA:
-      false_high_cond = kCondB;
-      break;
-    case kCondAE:
-      true_high_cond = kCondA;
-      break;
-  }
-  if (right.IsConstant()) {
-    int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
-    int32_t val_low = Low32Bits(value);
-    int32_t val_high = High32Bits(value);
-
-    __ CmpConstant(left_high, val_high);
-    if (if_cond == kCondNE) {
-      __ b(true_label, ARMCondition(true_high_cond));
-    } else if (if_cond == kCondEQ) {
-      __ b(false_label, ARMCondition(false_high_cond));
-    } else {
-      __ b(true_label, ARMCondition(true_high_cond));
-      __ b(false_label, ARMCondition(false_high_cond));
-    }
-    // Must be equal high, so compare the lows.
-    __ CmpConstant(left_low, val_low);
-  } else {
-    Register right_high = right.AsRegisterPairHigh<Register>();
-    Register right_low = right.AsRegisterPairLow<Register>();
-
-    __ cmp(left_high, ShifterOperand(right_high));
-    if (if_cond == kCondNE) {
-      __ b(true_label, ARMCondition(true_high_cond));
-    } else if (if_cond == kCondEQ) {
-      __ b(false_label, ARMCondition(false_high_cond));
-    } else {
-      __ b(true_label, ARMCondition(true_high_cond));
-      __ b(false_label, ARMCondition(false_high_cond));
-    }
-    // Must be equal high, so compare the lows.
-    __ cmp(left_low, ShifterOperand(right_low));
-  }
-  // The last comparison might be unsigned.
-  // TODO: optimize cases where this is always true/false
-  __ b(true_label, final_condition);
-}
-
 void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condition,
                                                                Label* true_target_in,
                                                                Label* false_target_in) {
@@ -2596,7 +2902,7 @@
   Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
 
   DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
-  GenerateLongComparesAndJumps(condition, true_target, false_target);
+  GenerateLongComparesAndJumps(condition, true_target, false_target, codegen_);
 
   if (false_target != &fallthrough_target) {
     __ b(false_target);
@@ -2911,6 +3217,80 @@
   __ nop();
 }
 
+// `temp` is an extra temporary register that is used for some conditions;
+// callers may not specify it, in which case the method will use a scratch
+// register instead.
+void CodeGeneratorARM::GenerateConditionWithZero(IfCondition condition,
+                                                 Register out,
+                                                 Register in,
+                                                 Register temp) {
+  switch (condition) {
+    case kCondEQ:
+    // x <= 0 iff x == 0 when the comparison is unsigned.
+    case kCondBE:
+      if (temp == kNoRegister || (ArmAssembler::IsLowRegister(out) && out != in)) {
+        temp = out;
+      }
+
+      // Avoid 32-bit instructions if possible; note that `in` and `temp` must be
+      // different as well.
+      if (ArmAssembler::IsLowRegister(in) && ArmAssembler::IsLowRegister(temp) && in != temp) {
+        // temp = - in; only 0 sets the carry flag.
+        __ rsbs(temp, in, ShifterOperand(0));
+
+        if (out == in) {
+          std::swap(in, temp);
+        }
+
+        // out = - in + in + carry = carry
+        __ adc(out, temp, ShifterOperand(in));
+      } else {
+        // If `in` is 0, then it has 32 leading zeros, and less than that otherwise.
+        __ clz(out, in);
+        // Any number less than 32 logically shifted right by 5 bits results in 0;
+        // the same operation on 32 yields 1.
+        __ Lsr(out, out, 5);
+      }
+
+      break;
+    case kCondNE:
+    // x > 0 iff x != 0 when the comparison is unsigned.
+    case kCondA:
+      if (out == in) {
+        if (temp == kNoRegister || in == temp) {
+          temp = IP;
+        }
+      } else if (temp == kNoRegister || !ArmAssembler::IsLowRegister(temp)) {
+        temp = out;
+      }
+
+      // temp = in - 1; only 0 does not set the carry flag.
+      __ subs(temp, in, ShifterOperand(1));
+      // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry
+      __ sbc(out, in, ShifterOperand(temp));
+      break;
+    case kCondGE:
+      __ mvn(out, ShifterOperand(in));
+      in = out;
+      FALLTHROUGH_INTENDED;
+    case kCondLT:
+      // We only care about the sign bit.
+      __ Lsr(out, in, 31);
+      break;
+    case kCondAE:
+      // Trivially true.
+      __ mov(out, ShifterOperand(1));
+      break;
+    case kCondB:
+      // Trivially false.
+      __ mov(out, ShifterOperand(0));
+      break;
+    default:
+      LOG(FATAL) << "Unexpected condition " << condition;
+      UNREACHABLE();
+  }
+}
+
 void LocationsBuilderARM::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
@@ -2947,48 +3327,48 @@
     return;
   }
 
-  const Register out = cond->GetLocations()->Out().AsRegister<Register>();
+  const Primitive::Type type = cond->GetLeft()->GetType();
 
-  if (ArmAssembler::IsLowRegister(out) && CanGenerateTest(cond, codegen_->GetAssembler())) {
-    const auto condition = GenerateTest(cond, false, codegen_);
-
-    __ it(condition.first);
-    __ mov(out, ShifterOperand(1), condition.first);
-    __ it(condition.second);
-    __ mov(out, ShifterOperand(0), condition.second);
+  if (Primitive::IsFloatingPointType(type)) {
+    GenerateConditionGeneric(cond, codegen_);
     return;
   }
 
-  // Convert the jumps into the result.
-  Label done_label;
-  Label* const final_label = codegen_->GetFinalLabel(cond, &done_label);
+  DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
 
-  if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) {
-    Label true_label, false_label;
+  const IfCondition condition = cond->GetCondition();
 
-    GenerateLongComparesAndJumps(cond, &true_label, &false_label);
+  // A condition with only one boolean input, or two boolean inputs without being equality or
+  // inequality results from transformations done by the instruction simplifier, and is handled
+  // as a regular condition with integral inputs.
+  if (type == Primitive::kPrimBoolean &&
+      cond->GetRight()->GetType() == Primitive::kPrimBoolean &&
+      (condition == kCondEQ || condition == kCondNE)) {
+    const LocationSummary* const locations = cond->GetLocations();
+    Register left = locations->InAt(0).AsRegister<Register>();
+    const Register out = locations->Out().AsRegister<Register>();
+    const Location right_loc = locations->InAt(1);
 
-    // False case: result = 0.
-    __ Bind(&false_label);
-    __ LoadImmediate(out, 0);
-    __ b(final_label);
+    // The constant case is handled by the instruction simplifier.
+    DCHECK(!right_loc.IsConstant());
 
-    // True case: result = 1.
-    __ Bind(&true_label);
-    __ LoadImmediate(out, 1);
-  } else {
-    DCHECK(CanGenerateTest(cond, codegen_->GetAssembler()));
+    Register right = right_loc.AsRegister<Register>();
 
-    const auto condition = GenerateTest(cond, false, codegen_);
+    // Avoid 32-bit instructions if possible.
+    if (out == right) {
+      std::swap(left, right);
+    }
 
-    __ mov(out, ShifterOperand(0), AL, kCcKeep);
-    __ b(final_label, condition.second);
-    __ LoadImmediate(out, 1);
+    __ eor(out, left, ShifterOperand(right));
+
+    if (condition == kCondEQ) {
+      __ eor(out, out, ShifterOperand(1));
+    }
+
+    return;
   }
 
-  if (done_label.IsLinked()) {
-    __ Bind(&done_label);
-  }
+  GenerateConditionIntegralOrNonPrimitive(cond, codegen_);
 }
 
 void LocationsBuilderARM::VisitEqual(HEqual* comp) {
@@ -8588,6 +8968,18 @@
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(GetCompilerOptions().IsBootImage());
+      Register temp_reg = temp.AsRegister<Register>();
+      PcRelativePatchInfo* labels = NewPcRelativeMethodPatch(invoke->GetTargetMethod());
+      __ BindTrackedLabel(&labels->movw_label);
+      __ movw(temp_reg, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->movt_label);
+      __ movt(temp_reg, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->add_pc_label);
+      __ add(temp_reg, temp_reg, ShifterOperand(PC));
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ LoadImmediate(temp.AsRegister<Register>(), invoke->GetMethodAddress());
       break;
@@ -8680,9 +9072,11 @@
   __ blx(LR);
 }
 
-CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeStringPatch(
-    const DexFile& dex_file, dex::StringIndex string_index) {
-  return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
+CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeMethodPatch(
+    MethodReference target_method) {
+  return NewPcRelativePatch(*target_method.dex_file,
+                            target_method.dex_method_index,
+                            &pc_relative_method_patches_);
 }
 
 CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeTypePatch(
@@ -8695,6 +9089,11 @@
   return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_);
 }
 
+CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeStringPatch(
+    const DexFile& dex_file, dex::StringIndex string_index) {
+  return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
+}
+
 CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeDexCacheArrayPatch(
     const DexFile& dex_file, uint32_t element_offset) {
   return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
@@ -8759,22 +9158,26 @@
   DCHECK(linker_patches->empty());
   size_t size =
       /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() +
-      /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
+      /* MOVW+MOVT for each entry */ 2u * pc_relative_method_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
+      /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
       baker_read_barrier_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
-  if (!GetCompilerOptions().IsBootImage()) {
-    DCHECK(pc_relative_type_patches_.empty());
-    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+  if (GetCompilerOptions().IsBootImage()) {
+    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_,
                                                                   linker_patches);
-  } else {
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
                                                                 linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
                                                                   linker_patches);
+  } else {
+    DCHECK(pc_relative_method_patches_.empty());
+    DCHECK(pc_relative_type_patches_.empty());
+    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+                                                                  linker_patches);
   }
   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
                                                               linker_patches);
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 47e6be5..fa1c14d 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -299,7 +299,6 @@
   void GenerateCompareTestAndBranch(HCondition* condition,
                                     Label* true_target,
                                     Label* false_target);
-  void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
   void DivRemByPowerOfTwo(HBinaryOperation* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
@@ -482,10 +481,11 @@
     Label add_pc_label;
   };
 
-  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
-                                                dex::StringIndex string_index);
+  PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method);
   PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
   PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
+  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
+                                                dex::StringIndex string_index);
   PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                        uint32_t element_offset);
 
@@ -623,6 +623,14 @@
   void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE;
   void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE;
 
+  // `temp` is an extra temporary register that is used for some conditions;
+  // callers may not specify it, in which case the method will use a scratch
+  // register instead.
+  void GenerateConditionWithZero(IfCondition condition,
+                                 Register out,
+                                 Register in,
+                                 Register temp = kNoRegister);
+
  private:
   Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
 
@@ -662,12 +670,14 @@
   Uint32ToLiteralMap uint32_literals_;
   // PC-relative patch info for each HArmDexCacheArraysBase.
   ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
-  // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
-  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // PC-relative method patch info for kBootImageLinkTimePcRelative.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_;
   // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+  // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
+  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
   // Baker read barrier patch info.
   ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 7ff100d..096eb07 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1449,9 +1449,10 @@
       uint64_literals_(std::less<uint64_t>(),
                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
@@ -4510,6 +4511,17 @@
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(GetCompilerOptions().IsBootImage());
+      // Add ADRP with its PC-relative method patch.
+      vixl::aarch64::Label* adrp_label = NewPcRelativeMethodPatch(invoke->GetTargetMethod());
+      EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
+      // Add ADD with its PC-relative method patch.
+      vixl::aarch64::Label* add_label =
+          NewPcRelativeMethodPatch(invoke->GetTargetMethod(), adrp_label);
+      EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp));
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       // Load method address from literal pool.
       __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress()));
@@ -4633,12 +4645,13 @@
   codegen_->GenerateInvokePolymorphicCall(invoke);
 }
 
-vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch(
-    const DexFile& dex_file,
-    dex::StringIndex string_index,
+vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeMethodPatch(
+    MethodReference target_method,
     vixl::aarch64::Label* adrp_label) {
-  return
-      NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_);
+  return NewPcRelativePatch(*target_method.dex_file,
+                            target_method.dex_method_index,
+                            adrp_label,
+                            &pc_relative_method_patches_);
 }
 
 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeTypePatch(
@@ -4655,6 +4668,14 @@
   return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_);
 }
 
+vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch(
+    const DexFile& dex_file,
+    dex::StringIndex string_index,
+    vixl::aarch64::Label* adrp_label) {
+  return
+      NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_);
+}
+
 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch(
     const DexFile& dex_file,
     uint32_t element_offset,
@@ -4747,9 +4768,10 @@
   DCHECK(linker_patches->empty());
   size_t size =
       pc_relative_dex_cache_patches_.size() +
-      pc_relative_string_patches_.size() +
+      pc_relative_method_patches_.size() +
       pc_relative_type_patches_.size() +
       type_bss_entry_patches_.size() +
+      pc_relative_string_patches_.size() +
       baker_read_barrier_patches_.size();
   linker_patches->reserve(size);
   for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
@@ -4758,15 +4780,18 @@
                                                               info.pc_insn_label->GetLocation(),
                                                               info.offset_or_index));
   }
-  if (!GetCompilerOptions().IsBootImage()) {
-    DCHECK(pc_relative_type_patches_.empty());
-    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+  if (GetCompilerOptions().IsBootImage()) {
+    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_,
                                                                   linker_patches);
-  } else {
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
                                                                 linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
                                                                   linker_patches);
+  } else {
+    DCHECK(pc_relative_method_patches_.empty());
+    DCHECK(pc_relative_type_patches_.empty());
+    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+                                                                  linker_patches);
   }
   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
                                                               linker_patches);
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 56444dc..71e221d 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -549,12 +549,11 @@
     UNIMPLEMENTED(FATAL);
   }
 
-  // Add a new PC-relative string patch for an instruction and return the label
+  // Add a new PC-relative method patch for an instruction and return the label
   // to be bound before the instruction. The instruction will be either the
   // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
   // to the associated ADRP patch label).
-  vixl::aarch64::Label* NewPcRelativeStringPatch(const DexFile& dex_file,
-                                                 dex::StringIndex string_index,
+  vixl::aarch64::Label* NewPcRelativeMethodPatch(MethodReference target_method,
                                                  vixl::aarch64::Label* adrp_label = nullptr);
 
   // Add a new PC-relative type patch for an instruction and return the label
@@ -573,6 +572,14 @@
                                              dex::TypeIndex type_index,
                                              vixl::aarch64::Label* adrp_label = nullptr);
 
+  // Add a new PC-relative string patch for an instruction and return the label
+  // to be bound before the instruction. The instruction will be either the
+  // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
+  // to the associated ADRP patch label).
+  vixl::aarch64::Label* NewPcRelativeStringPatch(const DexFile& dex_file,
+                                                 dex::StringIndex string_index,
+                                                 vixl::aarch64::Label* adrp_label = nullptr);
+
   // Add a new PC-relative dex cache array patch for an instruction and return
   // the label to be bound before the instruction. The instruction will be
   // either the ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label`
@@ -787,12 +794,14 @@
   Uint64ToLiteralMap uint64_literals_;
   // PC-relative DexCache access info.
   ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
-  // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
-  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // PC-relative method patch info for kBootImageLinkTimePcRelative.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_;
   // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+  // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
+  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
   // Baker read barrier patch info.
   ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
 
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 015e6dd..1f8e1ef 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -1775,6 +1775,34 @@
   }
 }
 
+static int64_t AdjustConstantForCondition(int64_t value,
+                                          IfCondition* condition,
+                                          IfCondition* opposite) {
+  if (value == 1) {
+    if (*condition == kCondB) {
+      value = 0;
+      *condition = kCondEQ;
+      *opposite = kCondNE;
+    } else if (*condition == kCondAE) {
+      value = 0;
+      *condition = kCondNE;
+      *opposite = kCondEQ;
+    }
+  } else if (value == -1) {
+    if (*condition == kCondGT) {
+      value = 0;
+      *condition = kCondGE;
+      *opposite = kCondLT;
+    } else if (*condition == kCondLE) {
+      value = 0;
+      *condition = kCondLT;
+      *opposite = kCondGE;
+    }
+  }
+
+  return value;
+}
+
 static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant(
     HCondition* condition,
     bool invert,
@@ -1797,7 +1825,37 @@
 
   const vixl32::Register left_high = HighRegisterFrom(left);
   const vixl32::Register left_low = LowRegisterFrom(left);
-  int64_t value = Int64ConstantFrom(right);
+  int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right), &cond, &opposite);
+  UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
+
+  // Comparisons against 0 are common enough to deserve special attention.
+  if (value == 0) {
+    switch (cond) {
+      case kCondNE:
+      // x > 0 iff x != 0 when the comparison is unsigned.
+      case kCondA:
+        ret = std::make_pair(ne, eq);
+        FALLTHROUGH_INTENDED;
+      case kCondEQ:
+      // x <= 0 iff x == 0 when the comparison is unsigned.
+      case kCondBE:
+        __ Orrs(temps.Acquire(), left_low, left_high);
+        return ret;
+      case kCondLT:
+      case kCondGE:
+        __ Cmp(left_high, 0);
+        return std::make_pair(ARMCondition(cond), ARMCondition(opposite));
+      // Trivially true or false.
+      case kCondB:
+        ret = std::make_pair(ne, eq);
+        FALLTHROUGH_INTENDED;
+      case kCondAE:
+        __ Cmp(left_low, left_low);
+        return ret;
+      default:
+        break;
+    }
+  }
 
   switch (cond) {
     case kCondEQ:
@@ -1842,8 +1900,6 @@
       FALLTHROUGH_INTENDED;
     case kCondGE:
     case kCondLT: {
-      UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
-
       __ Cmp(left_low, Low32Bits(value));
       __ Sbcs(temps.Acquire(), left_high, High32Bits(value));
       ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
@@ -1961,18 +2017,22 @@
 static bool CanGenerateTest(HCondition* condition, ArmVIXLAssembler* assembler) {
   if (condition->GetLeft()->GetType() == Primitive::kPrimLong) {
     const LocationSummary* const locations = condition->GetLocations();
-    const IfCondition c = condition->GetCondition();
 
     if (locations->InAt(1).IsConstant()) {
-      const int64_t value = Int64ConstantFrom(locations->InAt(1));
+      IfCondition c = condition->GetCondition();
+      IfCondition opposite = condition->GetOppositeCondition();
+      const int64_t value =
+          AdjustConstantForCondition(Int64ConstantFrom(locations->InAt(1)), &c, &opposite);
 
       if (c < kCondLT || c > kCondGE) {
         // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
         // we check that the least significant half of the first input to be compared
         // is in a low register (the other half is read outside an IT block), and
         // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP
-        // encoding can be used.
-        if (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value))) {
+        // encoding can be used; 0 is always handled, no matter what registers are
+        // used by the first input.
+        if (value != 0 &&
+            (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value)))) {
           return false;
         }
       // TODO(VIXL): The rest of the checks are there to keep the backend in sync with
@@ -1991,6 +2051,354 @@
   return true;
 }
 
+static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
+  DCHECK(CanGenerateTest(cond, codegen->GetAssembler()));
+
+  const vixl32::Register out = OutputRegister(cond);
+  const auto condition = GenerateTest(cond, false, codegen);
+
+  __ Mov(LeaveFlags, out, 0);
+
+  if (out.IsLow()) {
+    // We use the scope because of the IT block that follows.
+    ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
+                             2 * vixl32::k16BitT32InstructionSizeInBytes,
+                             CodeBufferCheckScope::kExactSize);
+
+    __ it(condition.first);
+    __ mov(condition.first, out, 1);
+  } else {
+    vixl32::Label done_label;
+    vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
+
+    __ B(condition.second, final_label, /* far_target */ false);
+    __ Mov(out, 1);
+
+    if (done_label.IsReferenced()) {
+      __ Bind(&done_label);
+    }
+  }
+}
+
+static void GenerateEqualLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
+  DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong);
+
+  const LocationSummary* const locations = cond->GetLocations();
+  IfCondition condition = cond->GetCondition();
+  const vixl32::Register out = OutputRegister(cond);
+  const Location left = locations->InAt(0);
+  const Location right = locations->InAt(1);
+  vixl32::Register left_high = HighRegisterFrom(left);
+  vixl32::Register left_low = LowRegisterFrom(left);
+  vixl32::Register temp;
+  UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
+
+  if (right.IsConstant()) {
+    IfCondition opposite = cond->GetOppositeCondition();
+    const int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right),
+                                                     &condition,
+                                                     &opposite);
+    Operand right_high = High32Bits(value);
+    Operand right_low = Low32Bits(value);
+
+    // The output uses Location::kNoOutputOverlap.
+    if (out.Is(left_high)) {
+      std::swap(left_low, left_high);
+      std::swap(right_low, right_high);
+    }
+
+    __ Sub(out, left_low, right_low);
+    temp = temps.Acquire();
+    __ Sub(temp, left_high, right_high);
+  } else {
+    DCHECK(right.IsRegisterPair());
+    temp = temps.Acquire();
+    __ Sub(temp, left_high, HighRegisterFrom(right));
+    __ Sub(out, left_low, LowRegisterFrom(right));
+  }
+
+  // Need to check after calling AdjustConstantForCondition().
+  DCHECK(condition == kCondEQ || condition == kCondNE) << condition;
+
+  if (condition == kCondNE && out.IsLow()) {
+    __ Orrs(out, out, temp);
+
+    // We use the scope because of the IT block that follows.
+    ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
+                             2 * vixl32::k16BitT32InstructionSizeInBytes,
+                             CodeBufferCheckScope::kExactSize);
+
+    __ it(ne);
+    __ mov(ne, out, 1);
+  } else {
+    __ Orr(out, out, temp);
+    codegen->GenerateConditionWithZero(condition, out, out, temp);
+  }
+}
+
+static void GenerateLongComparesAndJumps(HCondition* cond,
+                                         vixl32::Label* true_label,
+                                         vixl32::Label* false_label,
+                                         CodeGeneratorARMVIXL* codegen,
+                                         bool is_far_target = true) {
+  LocationSummary* locations = cond->GetLocations();
+  Location left = locations->InAt(0);
+  Location right = locations->InAt(1);
+  IfCondition if_cond = cond->GetCondition();
+
+  vixl32::Register left_high = HighRegisterFrom(left);
+  vixl32::Register left_low = LowRegisterFrom(left);
+  IfCondition true_high_cond = if_cond;
+  IfCondition false_high_cond = cond->GetOppositeCondition();
+  vixl32::Condition final_condition = ARMUnsignedCondition(if_cond);  // unsigned on lower part
+
+  // Set the conditions for the test, remembering that == needs to be
+  // decided using the low words.
+  switch (if_cond) {
+    case kCondEQ:
+    case kCondNE:
+      // Nothing to do.
+      break;
+    case kCondLT:
+      false_high_cond = kCondGT;
+      break;
+    case kCondLE:
+      true_high_cond = kCondLT;
+      break;
+    case kCondGT:
+      false_high_cond = kCondLT;
+      break;
+    case kCondGE:
+      true_high_cond = kCondGT;
+      break;
+    case kCondB:
+      false_high_cond = kCondA;
+      break;
+    case kCondBE:
+      true_high_cond = kCondB;
+      break;
+    case kCondA:
+      false_high_cond = kCondB;
+      break;
+    case kCondAE:
+      true_high_cond = kCondA;
+      break;
+  }
+  if (right.IsConstant()) {
+    int64_t value = Int64ConstantFrom(right);
+    int32_t val_low = Low32Bits(value);
+    int32_t val_high = High32Bits(value);
+
+    __ Cmp(left_high, val_high);
+    if (if_cond == kCondNE) {
+      __ B(ARMCondition(true_high_cond), true_label, is_far_target);
+    } else if (if_cond == kCondEQ) {
+      __ B(ARMCondition(false_high_cond), false_label, is_far_target);
+    } else {
+      __ B(ARMCondition(true_high_cond), true_label, is_far_target);
+      __ B(ARMCondition(false_high_cond), false_label, is_far_target);
+    }
+    // Must be equal high, so compare the lows.
+    __ Cmp(left_low, val_low);
+  } else {
+    vixl32::Register right_high = HighRegisterFrom(right);
+    vixl32::Register right_low = LowRegisterFrom(right);
+
+    __ Cmp(left_high, right_high);
+    if (if_cond == kCondNE) {
+      __ B(ARMCondition(true_high_cond), true_label, is_far_target);
+    } else if (if_cond == kCondEQ) {
+      __ B(ARMCondition(false_high_cond), false_label, is_far_target);
+    } else {
+      __ B(ARMCondition(true_high_cond), true_label, is_far_target);
+      __ B(ARMCondition(false_high_cond), false_label, is_far_target);
+    }
+    // Must be equal high, so compare the lows.
+    __ Cmp(left_low, right_low);
+  }
+  // The last comparison might be unsigned.
+  // TODO: optimize cases where this is always true/false
+  __ B(final_condition, true_label, is_far_target);
+}
+
+static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
+  DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong);
+
+  const LocationSummary* const locations = cond->GetLocations();
+  IfCondition condition = cond->GetCondition();
+  const vixl32::Register out = OutputRegister(cond);
+  const Location left = locations->InAt(0);
+  const Location right = locations->InAt(1);
+
+  if (right.IsConstant()) {
+    IfCondition opposite = cond->GetOppositeCondition();
+
+    // Comparisons against 0 are common enough to deserve special attention.
+    if (AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite) == 0) {
+      switch (condition) {
+        case kCondNE:
+        case kCondA:
+          if (out.IsLow()) {
+            // We only care if both input registers are 0 or not.
+            __ Orrs(out, LowRegisterFrom(left), HighRegisterFrom(left));
+
+            // We use the scope because of the IT block that follows.
+            ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
+                                     2 * vixl32::k16BitT32InstructionSizeInBytes,
+                                     CodeBufferCheckScope::kExactSize);
+
+            __ it(ne);
+            __ mov(ne, out, 1);
+            return;
+          }
+
+          FALLTHROUGH_INTENDED;
+        case kCondEQ:
+        case kCondBE:
+          // We only care if both input registers are 0 or not.
+          __ Orr(out, LowRegisterFrom(left), HighRegisterFrom(left));
+          codegen->GenerateConditionWithZero(condition, out, out);
+          return;
+        case kCondLT:
+        case kCondGE:
+          // We only care about the sign bit.
+          FALLTHROUGH_INTENDED;
+        case kCondAE:
+        case kCondB:
+          codegen->GenerateConditionWithZero(condition, out, HighRegisterFrom(left));
+          return;
+        case kCondLE:
+        case kCondGT:
+        default:
+          break;
+      }
+    }
+  }
+
+  if ((condition == kCondEQ || condition == kCondNE) &&
+      // If `out` is a low register, then the GenerateConditionGeneric()
+      // function generates a shorter code sequence that is still branchless.
+      (!out.IsLow() || !CanGenerateTest(cond, codegen->GetAssembler()))) {
+    GenerateEqualLong(cond, codegen);
+    return;
+  }
+
+  if (CanGenerateTest(cond, codegen->GetAssembler())) {
+    GenerateConditionGeneric(cond, codegen);
+    return;
+  }
+
+  // Convert the jumps into the result.
+  vixl32::Label done_label;
+  vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
+  vixl32::Label true_label, false_label;
+
+  GenerateLongComparesAndJumps(cond, &true_label, &false_label, codegen, /* is_far_target */ false);
+
+  // False case: result = 0.
+  __ Bind(&false_label);
+  __ Mov(out, 0);
+  __ B(final_label);
+
+  // True case: result = 1.
+  __ Bind(&true_label);
+  __ Mov(out, 1);
+
+  if (done_label.IsReferenced()) {
+    __ Bind(&done_label);
+  }
+}
+
+static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
+  const Primitive::Type type = cond->GetLeft()->GetType();
+
+  DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+
+  if (type == Primitive::kPrimLong) {
+    GenerateConditionLong(cond, codegen);
+    return;
+  }
+
+  IfCondition condition = cond->GetCondition();
+  vixl32::Register in = InputRegisterAt(cond, 0);
+  const vixl32::Register out = OutputRegister(cond);
+  const Location right = cond->GetLocations()->InAt(1);
+  int64_t value;
+
+  if (right.IsConstant()) {
+    IfCondition opposite = cond->GetOppositeCondition();
+
+    value = AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite);
+
+    // Comparisons against 0 are common enough to deserve special attention.
+    if (value == 0) {
+      switch (condition) {
+        case kCondNE:
+        case kCondA:
+          if (out.IsLow() && out.Is(in)) {
+            __ Cmp(out, 0);
+
+            // We use the scope because of the IT block that follows.
+            ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
+                                     2 * vixl32::k16BitT32InstructionSizeInBytes,
+                                     CodeBufferCheckScope::kExactSize);
+
+            __ it(ne);
+            __ mov(ne, out, 1);
+            return;
+          }
+
+          FALLTHROUGH_INTENDED;
+        case kCondEQ:
+        case kCondBE:
+        case kCondLT:
+        case kCondGE:
+        case kCondAE:
+        case kCondB:
+          codegen->GenerateConditionWithZero(condition, out, in);
+          return;
+        case kCondLE:
+        case kCondGT:
+        default:
+          break;
+      }
+    }
+  }
+
+  if (condition == kCondEQ || condition == kCondNE) {
+    Operand operand(0);
+
+    if (right.IsConstant()) {
+      operand = Operand::From(value);
+    } else if (out.Is(RegisterFrom(right))) {
+      // Avoid 32-bit instructions if possible.
+      operand = InputOperandAt(cond, 0);
+      in = RegisterFrom(right);
+    } else {
+      operand = InputOperandAt(cond, 1);
+    }
+
+    if (condition == kCondNE && out.IsLow()) {
+      __ Subs(out, in, operand);
+
+      // We use the scope because of the IT block that follows.
+      ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
+                               2 * vixl32::k16BitT32InstructionSizeInBytes,
+                               CodeBufferCheckScope::kExactSize);
+
+      __ it(ne);
+      __ mov(ne, out, 1);
+    } else {
+      __ Sub(out, in, operand);
+      codegen->GenerateConditionWithZero(condition, out, out);
+    }
+
+    return;
+  }
+
+  GenerateConditionGeneric(cond, codegen);
+}
+
 static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) {
   const Primitive::Type type = constant->GetType();
   bool ret = false;
@@ -2093,9 +2501,10 @@
       uint32_literals_(std::less<uint32_t>(),
                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
@@ -2547,92 +2956,10 @@
 void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 }
 
-void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* cond,
-                                                                   vixl32::Label* true_label,
-                                                                   vixl32::Label* false_label) {
-  LocationSummary* locations = cond->GetLocations();
-  Location left = locations->InAt(0);
-  Location right = locations->InAt(1);
-  IfCondition if_cond = cond->GetCondition();
-
-  vixl32::Register left_high = HighRegisterFrom(left);
-  vixl32::Register left_low = LowRegisterFrom(left);
-  IfCondition true_high_cond = if_cond;
-  IfCondition false_high_cond = cond->GetOppositeCondition();
-  vixl32::Condition final_condition = ARMUnsignedCondition(if_cond);  // unsigned on lower part
-
-  // Set the conditions for the test, remembering that == needs to be
-  // decided using the low words.
-  switch (if_cond) {
-    case kCondEQ:
-    case kCondNE:
-      // Nothing to do.
-      break;
-    case kCondLT:
-      false_high_cond = kCondGT;
-      break;
-    case kCondLE:
-      true_high_cond = kCondLT;
-      break;
-    case kCondGT:
-      false_high_cond = kCondLT;
-      break;
-    case kCondGE:
-      true_high_cond = kCondGT;
-      break;
-    case kCondB:
-      false_high_cond = kCondA;
-      break;
-    case kCondBE:
-      true_high_cond = kCondB;
-      break;
-    case kCondA:
-      false_high_cond = kCondB;
-      break;
-    case kCondAE:
-      true_high_cond = kCondA;
-      break;
-  }
-  if (right.IsConstant()) {
-    int64_t value = Int64ConstantFrom(right);
-    int32_t val_low = Low32Bits(value);
-    int32_t val_high = High32Bits(value);
-
-    __ Cmp(left_high, val_high);
-    if (if_cond == kCondNE) {
-      __ B(ARMCondition(true_high_cond), true_label);
-    } else if (if_cond == kCondEQ) {
-      __ B(ARMCondition(false_high_cond), false_label);
-    } else {
-      __ B(ARMCondition(true_high_cond), true_label);
-      __ B(ARMCondition(false_high_cond), false_label);
-    }
-    // Must be equal high, so compare the lows.
-    __ Cmp(left_low, val_low);
-  } else {
-    vixl32::Register right_high = HighRegisterFrom(right);
-    vixl32::Register right_low = LowRegisterFrom(right);
-
-    __ Cmp(left_high, right_high);
-    if (if_cond == kCondNE) {
-      __ B(ARMCondition(true_high_cond), true_label);
-    } else if (if_cond == kCondEQ) {
-      __ B(ARMCondition(false_high_cond), false_label);
-    } else {
-      __ B(ARMCondition(true_high_cond), true_label);
-      __ B(ARMCondition(false_high_cond), false_label);
-    }
-    // Must be equal high, so compare the lows.
-    __ Cmp(left_low, right_low);
-  }
-  // The last comparison might be unsigned.
-  // TODO: optimize cases where this is always true/false
-  __ B(final_condition, true_label);
-}
-
 void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition,
                                                                    vixl32::Label* true_target_in,
-                                                                   vixl32::Label* false_target_in) {
+                                                                   vixl32::Label* false_target_in,
+                                                                   bool is_far_target) {
   if (CanGenerateTest(condition, codegen_->GetAssembler())) {
     vixl32::Label* non_fallthrough_target;
     bool invert;
@@ -2648,7 +2975,7 @@
 
     const auto cond = GenerateTest(condition, invert, codegen_);
 
-    __ B(cond.first, non_fallthrough_target);
+    __ B(cond.first, non_fallthrough_target, is_far_target);
 
     if (false_target_in != nullptr && false_target_in != non_fallthrough_target) {
       __ B(false_target_in);
@@ -2664,7 +2991,7 @@
   vixl32::Label* false_target = (false_target_in == nullptr) ? &fallthrough : false_target_in;
 
   DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
-  GenerateLongComparesAndJumps(condition, true_target, false_target);
+  GenerateLongComparesAndJumps(condition, true_target, false_target, codegen_, is_far_target);
 
   if (false_target != &fallthrough) {
     __ B(false_target);
@@ -2732,7 +3059,7 @@
     // the HCondition, generate the comparison directly.
     Primitive::Type type = condition->InputAt(0)->GetType();
     if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
-      GenerateCompareTestAndBranch(condition, true_target, false_target);
+      GenerateCompareTestAndBranch(condition, true_target, false_target, far_target);
       return;
     }
 
@@ -2751,14 +3078,14 @@
 
     if (right.IsImmediate() && right.GetImmediate() == 0 && (arm_cond.Is(ne) || arm_cond.Is(eq))) {
       if (arm_cond.Is(eq)) {
-        __ CompareAndBranchIfZero(left, non_fallthrough_target);
+        __ CompareAndBranchIfZero(left, non_fallthrough_target, far_target);
       } else {
         DCHECK(arm_cond.Is(ne));
-        __ CompareAndBranchIfNonZero(left, non_fallthrough_target);
+        __ CompareAndBranchIfNonZero(left, non_fallthrough_target, far_target);
       }
     } else {
       __ Cmp(left, right);
-      __ B(arm_cond, non_fallthrough_target);
+      __ B(arm_cond, non_fallthrough_target, far_target);
     }
   }
 
@@ -2975,6 +3302,83 @@
   __ Nop();
 }
 
+// `temp` is an extra temporary register that is used for some conditions;
+// callers may not specify it, in which case the method will use a scratch
+// register instead.
+void CodeGeneratorARMVIXL::GenerateConditionWithZero(IfCondition condition,
+                                                     vixl32::Register out,
+                                                     vixl32::Register in,
+                                                     vixl32::Register temp) {
+  switch (condition) {
+    case kCondEQ:
+    // x <= 0 iff x == 0 when the comparison is unsigned.
+    case kCondBE:
+      if (!temp.IsValid() || (out.IsLow() && !out.Is(in))) {
+        temp = out;
+      }
+
+      // Avoid 32-bit instructions if possible; note that `in` and `temp` must be
+      // different as well.
+      if (in.IsLow() && temp.IsLow() && !in.Is(temp)) {
+        // temp = - in; only 0 sets the carry flag.
+        __ Rsbs(temp, in, 0);
+
+        if (out.Is(in)) {
+          std::swap(in, temp);
+        }
+
+        // out = - in + in + carry = carry
+        __ Adc(out, temp, in);
+      } else {
+        // If `in` is 0, then it has 32 leading zeros, and less than that otherwise.
+        __ Clz(out, in);
+        // Any number less than 32 logically shifted right by 5 bits results in 0;
+        // the same operation on 32 yields 1.
+        __ Lsr(out, out, 5);
+      }
+
+      break;
+    case kCondNE:
+    // x > 0 iff x != 0 when the comparison is unsigned.
+    case kCondA: {
+      UseScratchRegisterScope temps(GetVIXLAssembler());
+
+      if (out.Is(in)) {
+        if (!temp.IsValid() || in.Is(temp)) {
+          temp = temps.Acquire();
+        }
+      } else if (!temp.IsValid() || !temp.IsLow()) {
+        temp = out;
+      }
+
+      // temp = in - 1; only 0 does not set the carry flag.
+      __ Subs(temp, in, 1);
+      // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry
+      __ Sbc(out, in, temp);
+      break;
+    }
+    case kCondGE:
+      __ Mvn(out, in);
+      in = out;
+      FALLTHROUGH_INTENDED;
+    case kCondLT:
+      // We only care about the sign bit.
+      __ Lsr(out, in, 31);
+      break;
+    case kCondAE:
+      // Trivially true.
+      __ Mov(out, 1);
+      break;
+    case kCondB:
+      // Trivially false.
+      __ Mov(out, 0);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected condition " << condition;
+      UNREACHABLE();
+  }
+}
+
 void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
@@ -3011,52 +3415,47 @@
     return;
   }
 
-  const vixl32::Register out = OutputRegister(cond);
+  const Primitive::Type type = cond->GetLeft()->GetType();
 
-  if (out.IsLow() && CanGenerateTest(cond, codegen_->GetAssembler())) {
-    const auto condition = GenerateTest(cond, false, codegen_);
-    // We use the scope because of the IT block that follows.
-    ExactAssemblyScope guard(GetVIXLAssembler(),
-                             4 * vixl32::k16BitT32InstructionSizeInBytes,
-                             CodeBufferCheckScope::kExactSize);
-
-    __ it(condition.first);
-    __ mov(condition.first, out, 1);
-    __ it(condition.second);
-    __ mov(condition.second, out, 0);
+  if (Primitive::IsFloatingPointType(type)) {
+    GenerateConditionGeneric(cond, codegen_);
     return;
   }
 
-  // Convert the jumps into the result.
-  vixl32::Label done_label;
-  vixl32::Label* const final_label = codegen_->GetFinalLabel(cond, &done_label);
+  DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
 
-  if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) {
-    vixl32::Label true_label, false_label;
+  const IfCondition condition = cond->GetCondition();
 
-    GenerateLongComparesAndJumps(cond, &true_label, &false_label);
+  // A condition with only one boolean input, or two boolean inputs without being equality or
+  // inequality results from transformations done by the instruction simplifier, and is handled
+  // as a regular condition with integral inputs.
+  if (type == Primitive::kPrimBoolean &&
+      cond->GetRight()->GetType() == Primitive::kPrimBoolean &&
+      (condition == kCondEQ || condition == kCondNE)) {
+    vixl32::Register left = InputRegisterAt(cond, 0);
+    const vixl32::Register out = OutputRegister(cond);
+    const Location right_loc = cond->GetLocations()->InAt(1);
 
-    // False case: result = 0.
-    __ Bind(&false_label);
-    __ Mov(out, 0);
-    __ B(final_label);
+    // The constant case is handled by the instruction simplifier.
+    DCHECK(!right_loc.IsConstant());
 
-    // True case: result = 1.
-    __ Bind(&true_label);
-    __ Mov(out, 1);
-  } else {
-    DCHECK(CanGenerateTest(cond, codegen_->GetAssembler()));
+    vixl32::Register right = RegisterFrom(right_loc);
 
-    const auto condition = GenerateTest(cond, false, codegen_);
+    // Avoid 32-bit instructions if possible.
+    if (out.Is(right)) {
+      std::swap(left, right);
+    }
 
-    __ Mov(LeaveFlags, out, 0);
-    __ B(condition.second, final_label, /* far_target */ false);
-    __ Mov(out, 1);
+    __ Eor(out, left, right);
+
+    if (condition == kCondEQ) {
+      __ Eor(out, out, 1);
+    }
+
+    return;
   }
 
-  if (done_label.IsReferenced()) {
-    __ Bind(&done_label);
-  }
+  GenerateConditionIntegralOrNonPrimitive(cond, codegen_);
 }
 
 void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) {
@@ -8734,6 +9133,13 @@
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(GetCompilerOptions().IsBootImage());
+      PcRelativePatchInfo* labels = NewPcRelativeMethodPatch(invoke->GetTargetMethod());
+      vixl32::Register temp_reg = RegisterFrom(temp);
+      EmitMovwMovtPlaceholder(labels, temp_reg);
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress()));
       break;
@@ -8850,9 +9256,11 @@
   __ blx(lr);
 }
 
-CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeStringPatch(
-    const DexFile& dex_file, dex::StringIndex string_index) {
-  return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
+CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeMethodPatch(
+    MethodReference target_method) {
+  return NewPcRelativePatch(*target_method.dex_file,
+                            target_method.dex_method_index,
+                            &pc_relative_method_patches_);
 }
 
 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeTypePatch(
@@ -8865,6 +9273,11 @@
   return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_);
 }
 
+CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeStringPatch(
+    const DexFile& dex_file, dex::StringIndex string_index) {
+  return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
+}
+
 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeDexCacheArrayPatch(
     const DexFile& dex_file, uint32_t element_offset) {
   return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
@@ -8934,22 +9347,26 @@
   DCHECK(linker_patches->empty());
   size_t size =
       /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() +
-      /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
+      /* MOVW+MOVT for each entry */ 2u * pc_relative_method_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
+      /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
       baker_read_barrier_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
-  if (!GetCompilerOptions().IsBootImage()) {
-    DCHECK(pc_relative_type_patches_.empty());
-    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+  if (GetCompilerOptions().IsBootImage()) {
+    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_,
                                                                   linker_patches);
-  } else {
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
                                                                 linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
                                                                   linker_patches);
+  } else {
+    DCHECK(pc_relative_method_patches_.empty());
+    DCHECK(pc_relative_type_patches_.empty());
+    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+                                                                  linker_patches);
   }
   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
                                                               linker_patches);
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index daba9bf..91e9a3ed 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -400,10 +400,8 @@
                              bool far_target = true);
   void GenerateCompareTestAndBranch(HCondition* condition,
                                     vixl::aarch32::Label* true_target,
-                                    vixl::aarch32::Label* false_target);
-  void GenerateLongComparesAndJumps(HCondition* cond,
-                                    vixl::aarch32::Label* true_label,
-                                    vixl::aarch32::Label* false_label);
+                                    vixl::aarch32::Label* false_target,
+                                    bool is_far_target = true);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
   void DivRemByPowerOfTwo(HBinaryOperation* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
@@ -566,10 +564,11 @@
     vixl::aarch32::Label add_pc_label;
   };
 
-  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
-                                                dex::StringIndex string_index);
+  PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method);
   PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
   PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
+  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
+                                                dex::StringIndex string_index);
   PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                        uint32_t element_offset);
 
@@ -716,6 +715,14 @@
   void EmitMovwMovtPlaceholder(CodeGeneratorARMVIXL::PcRelativePatchInfo* labels,
                                vixl::aarch32::Register out);
 
+  // `temp` is an extra temporary register that is used for some conditions;
+  // callers may not specify it, in which case the method will use a scratch
+  // register instead.
+  void GenerateConditionWithZero(IfCondition condition,
+                                 vixl::aarch32::Register out,
+                                 vixl::aarch32::Register in,
+                                 vixl::aarch32::Register temp = vixl32::Register());
+
  private:
   vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
                                                                 vixl::aarch32::Register temp);
@@ -760,12 +767,14 @@
   Uint32ToLiteralMap uint32_literals_;
   // PC-relative patch info for each HArmDexCacheArraysBase.
   ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
-  // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
-  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // PC-relative method patch info for kBootImageLinkTimePcRelative.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_;
   // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+  // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
+  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
   // Baker read barrier patch info.
   ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
 
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 95be3d7..d8ac99a 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1061,9 +1061,10 @@
       uint32_literals_(std::less<uint32_t>(),
                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       clobbered_ra_(false) {
@@ -1602,30 +1603,36 @@
   DCHECK(linker_patches->empty());
   size_t size =
       pc_relative_dex_cache_patches_.size() +
-      pc_relative_string_patches_.size() +
+      pc_relative_method_patches_.size() +
       pc_relative_type_patches_.size() +
-      type_bss_entry_patches_.size();
+      type_bss_entry_patches_.size() +
+      pc_relative_string_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
-  if (!GetCompilerOptions().IsBootImage()) {
-    DCHECK(pc_relative_type_patches_.empty());
-    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+  if (GetCompilerOptions().IsBootImage()) {
+    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_,
                                                                   linker_patches);
-  } else {
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
                                                                 linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
                                                                   linker_patches);
+  } else {
+    DCHECK(pc_relative_method_patches_.empty());
+    DCHECK(pc_relative_type_patches_.empty());
+    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+                                                                  linker_patches);
   }
   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
                                                               linker_patches);
   DCHECK_EQ(size, linker_patches->size());
 }
 
-CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPatch(
-    const DexFile& dex_file, dex::StringIndex string_index) {
-  return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeMethodPatch(
+    MethodReference target_method) {
+  return NewPcRelativePatch(*target_method.dex_file,
+                            target_method.dex_method_index,
+                            &pc_relative_method_patches_);
 }
 
 CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeTypePatch(
@@ -1638,6 +1645,11 @@
   return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_);
 }
 
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPatch(
+    const DexFile& dex_file, dex::StringIndex string_index) {
+  return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
+}
+
 CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeDexCacheArrayPatch(
     const DexFile& dex_file, uint32_t element_offset) {
   return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
@@ -6947,7 +6959,7 @@
   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   bool is_r6 = codegen_->GetInstructionSetFeatures().IsR6();
-  bool has_extra_input = invoke->HasPcRelativeDexCache() && !is_r6;
+  bool has_extra_input = invoke->HasPcRelativeMethodLoadKind() && !is_r6;
 
   IntrinsicLocationsBuilderMIPS intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -7084,6 +7096,7 @@
   bool is_r6 = GetInstructionSetFeatures().IsR6();
   bool fallback_load = has_irreducible_loops && !is_r6;
   switch (dispatch_info.method_load_kind) {
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
       break;
     default:
@@ -7103,7 +7116,7 @@
   HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind();
   HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation();
   bool is_r6 = GetInstructionSetFeatures().IsR6();
-  Register base_reg = (invoke->HasPcRelativeDexCache() && !is_r6)
+  Register base_reg = (invoke->HasPcRelativeMethodLoadKind() && !is_r6)
       ? GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>())
       : ZERO;
 
@@ -7121,6 +7134,16 @@
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(GetCompilerOptions().IsBootImage());
+      PcRelativePatchInfo* info = NewPcRelativeMethodPatch(invoke->GetTargetMethod());
+      bool reordering = __ SetReorder(false);
+      Register temp_reg = temp.AsRegister<Register>();
+      EmitPcRelativeAddressPlaceholderHigh(info, temp_reg, base_reg);
+      __ Addiu(temp_reg, temp_reg, /* placeholder */ 0x5678);
+      __ SetReorder(reordering);
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress());
       break;
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 449cb4c..ff1fde6 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -582,10 +582,11 @@
     MipsLabel pc_rel_label;
   };
 
-  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
-                                                dex::StringIndex string_index);
+  PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method);
   PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
   PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
+  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
+                                                dex::StringIndex string_index);
   PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                        uint32_t element_offset);
   Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
@@ -644,12 +645,15 @@
   Uint32ToLiteralMap uint32_literals_;
   // PC-relative patch info for each HMipsDexCacheArraysBase.
   ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
-  // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
-  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // PC-relative method patch info for kBootImageLinkTimePcRelative.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_;
   // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+  // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
+  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+
   // Patches for string root accesses in JIT compiled code.
   ArenaDeque<JitPatchInfo> jit_string_patches_;
   // Patches for class root accesses in JIT compiled code.
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 5cdff5a..0961391 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -958,9 +958,10 @@
       uint64_literals_(std::less<uint64_t>(),
                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_class_patches_(TypeReferenceValueComparator(),
@@ -1440,30 +1441,36 @@
   DCHECK(linker_patches->empty());
   size_t size =
       pc_relative_dex_cache_patches_.size() +
-      pc_relative_string_patches_.size() +
+      pc_relative_method_patches_.size() +
       pc_relative_type_patches_.size() +
-      type_bss_entry_patches_.size();
+      type_bss_entry_patches_.size() +
+      pc_relative_string_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
-  if (!GetCompilerOptions().IsBootImage()) {
-    DCHECK(pc_relative_type_patches_.empty());
-    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+  if (GetCompilerOptions().IsBootImage()) {
+    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_,
                                                                   linker_patches);
-  } else {
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
                                                                 linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
                                                                   linker_patches);
+  } else {
+    DCHECK(pc_relative_method_patches_.empty());
+    DCHECK(pc_relative_type_patches_.empty());
+    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+                                                                  linker_patches);
   }
   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
                                                               linker_patches);
   DCHECK_EQ(size, linker_patches->size());
 }
 
-CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeStringPatch(
-    const DexFile& dex_file, dex::StringIndex string_index) {
-  return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
+CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeMethodPatch(
+    MethodReference target_method) {
+  return NewPcRelativePatch(*target_method.dex_file,
+                            target_method.dex_method_index,
+                            &pc_relative_method_patches_);
 }
 
 CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeTypePatch(
@@ -1476,6 +1483,11 @@
   return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_);
 }
 
+CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeStringPatch(
+    const DexFile& dex_file, dex::StringIndex string_index) {
+  return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
+}
+
 CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeDexCacheArrayPatch(
     const DexFile& dex_file, uint32_t element_offset) {
   return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
@@ -4923,6 +4935,14 @@
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(GetCompilerOptions().IsBootImage());
+      CodeGeneratorMIPS64::PcRelativePatchInfo* info =
+          NewPcRelativeMethodPatch(invoke->GetTargetMethod());
+      EmitPcRelativeAddressPlaceholderHigh(info, AT);
+      __ Daddiu(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678);
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ LoadLiteral(temp.AsRegister<GpuRegister>(),
                      kLoadDoubleword,
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 1f34ced..f49ad49 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -545,10 +545,11 @@
     Mips64Label pc_rel_label;
   };
 
-  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
-                                                dex::StringIndex string_index);
+  PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method);
   PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
   PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
+  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
+                                                dex::StringIndex string_index);
   PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                        uint32_t element_offset);
   PcRelativePatchInfo* NewPcRelativeCallPatch(const DexFile& dex_file,
@@ -605,12 +606,15 @@
   Uint64ToLiteralMap uint64_literals_;
   // PC-relative patch info.
   ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
-  // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
-  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // PC-relative method patch info for kBootImageLinkTimePcRelative.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_;
   // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+  // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
+  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+
   // Patches for string root accesses in JIT compiled code.
   StringToLiteralMap jit_string_patches_;
   // Patches for class root accesses in JIT compiled code.
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 4a279d8..f3ec112 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1032,9 +1032,10 @@
       assembler_(graph->GetArena()),
       isa_features_(isa_features),
       pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       constant_area_start_(-1),
@@ -2167,7 +2168,7 @@
 
   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
-    if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) {
+    if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeMethodLoadKind()) {
       invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
     }
     return;
@@ -2176,7 +2177,7 @@
   HandleInvoke(invoke);
 
   // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
-  if (invoke->HasPcRelativeDexCache()) {
+  if (invoke->HasPcRelativeMethodLoadKind()) {
     invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
   }
 }
@@ -4543,6 +4544,14 @@
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(GetCompilerOptions().IsBootImage());
+      Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
+                                                                temp.AsRegister<Register>());
+      __ leal(temp.AsRegister<Register>(), Address(base_reg, CodeGeneratorX86::kDummy32BitOffset));
+      RecordBootMethodPatch(invoke);
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress()));
       break;
@@ -4631,13 +4640,14 @@
       temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
 }
 
-void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) {
-  DCHECK(GetCompilerOptions().IsBootImage());
-  HX86ComputeBaseMethodAddress* address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
-  string_patches_.emplace_back(address,
-                               load_string->GetDexFile(),
-                               load_string->GetStringIndex().index_);
-  __ Bind(&string_patches_.back().label);
+void CodeGeneratorX86::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) {
+  DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
+  HX86ComputeBaseMethodAddress* address =
+      invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
+  boot_image_method_patches_.emplace_back(address,
+                                          *invoke->GetTargetMethod().dex_file,
+                                          invoke->GetTargetMethod().dex_method_index);
+  __ Bind(&boot_image_method_patches_.back().label);
 }
 
 void CodeGeneratorX86::RecordBootTypePatch(HLoadClass* load_class) {
@@ -4656,6 +4666,15 @@
   return &type_bss_entry_patches_.back().label;
 }
 
+void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) {
+  DCHECK(GetCompilerOptions().IsBootImage());
+  HX86ComputeBaseMethodAddress* address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
+  string_patches_.emplace_back(address,
+                               load_string->GetDexFile(),
+                               load_string->GetStringIndex().index_);
+  __ Bind(&string_patches_.back().label);
+}
+
 Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
   DCHECK(!GetCompilerOptions().IsBootImage());
   HX86ComputeBaseMethodAddress* address =
@@ -4693,17 +4712,21 @@
   DCHECK(linker_patches->empty());
   size_t size =
       pc_relative_dex_cache_patches_.size() +
-      string_patches_.size() +
+      boot_image_method_patches_.size() +
       boot_image_type_patches_.size() +
-      type_bss_entry_patches_.size();
+      type_bss_entry_patches_.size() +
+      string_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
   if (GetCompilerOptions().IsBootImage()) {
+    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_,
+                                                                  linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_,
                                                                 linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches);
   } else {
+    DCHECK(boot_image_method_patches_.empty());
     DCHECK(boot_image_type_patches_.empty());
     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
   }
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index f08d642..21c527e 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -413,9 +413,10 @@
   // Generate a call to a virtual method.
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
-  void RecordBootStringPatch(HLoadString* load_string);
+  void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke);
   void RecordBootTypePatch(HLoadClass* load_class);
   Label* NewTypeBssEntryPatch(HLoadClass* load_class);
+  void RecordBootStringPatch(HLoadString* load_string);
   Label* NewStringBssEntryPatch(HLoadString* load_string);
   Label* NewPcRelativeDexCacheArrayPatch(HX86ComputeBaseMethodAddress* method_address,
                                          const DexFile& dex_file,
@@ -633,16 +634,17 @@
 
   // PC-relative DexCache access info.
   ArenaDeque<X86PcRelativePatchInfo> pc_relative_dex_cache_patches_;
-  // String patch locations; type depends on configuration (app .bss or boot image).
-  ArenaDeque<X86PcRelativePatchInfo> string_patches_;
+  // PC-relative method patch info for kBootImageLinkTimePcRelative.
+  ArenaDeque<X86PcRelativePatchInfo> boot_image_method_patches_;
   // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_;
   // Type patch locations for kBssEntry.
   ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_;
+  // String patch locations; type depends on configuration (app .bss or boot image).
+  ArenaDeque<X86PcRelativePatchInfo> string_patches_;
 
   // Patches for string root accesses in JIT compiled code.
   ArenaDeque<PatchInfo<Label>> jit_string_patches_;
-
   // Patches for class root accesses in JIT compiled code.
   ArenaDeque<PatchInfo<Label>> jit_class_patches_;
 
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index ac0f37b..bf1c42a 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -991,6 +991,12 @@
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().IsBootImage());
+      __ leal(temp.AsRegister<CpuRegister>(),
+              Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
+      RecordBootMethodPatch(invoke);
+      break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress());
       break;
@@ -1079,10 +1085,10 @@
       kX86_64PointerSize).SizeValue()));
 }
 
-void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) {
-  DCHECK(GetCompilerOptions().IsBootImage());
-  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
-  __ Bind(&string_patches_.back().label);
+void CodeGeneratorX86_64::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) {
+  boot_image_method_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
+                                          invoke->GetTargetMethod().dex_method_index);
+  __ Bind(&boot_image_method_patches_.back().label);
 }
 
 void CodeGeneratorX86_64::RecordBootTypePatch(HLoadClass* load_class) {
@@ -1096,6 +1102,12 @@
   return &type_bss_entry_patches_.back().label;
 }
 
+void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) {
+  DCHECK(GetCompilerOptions().IsBootImage());
+  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
+  __ Bind(&string_patches_.back().label);
+}
+
 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
   DCHECK(!GetCompilerOptions().IsBootImage());
   string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
@@ -1128,17 +1140,21 @@
   DCHECK(linker_patches->empty());
   size_t size =
       pc_relative_dex_cache_patches_.size() +
-      string_patches_.size() +
+      boot_image_method_patches_.size() +
       boot_image_type_patches_.size() +
-      type_bss_entry_patches_.size();
+      type_bss_entry_patches_.size() +
+      string_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
   if (GetCompilerOptions().IsBootImage()) {
+    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_,
+                                                                  linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_,
                                                                 linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches);
   } else {
+    DCHECK(boot_image_method_patches_.empty());
     DCHECK(boot_image_type_patches_.empty());
     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
   }
@@ -1231,12 +1247,13 @@
         isa_features_(isa_features),
         constant_area_start_(0),
         pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-        string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        boot_image_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-        fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-        jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+        jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
 
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index d8005cc..3039e05 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -408,9 +408,10 @@
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
-  void RecordBootStringPatch(HLoadString* load_string);
+  void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke);
   void RecordBootTypePatch(HLoadClass* load_class);
   Label* NewTypeBssEntryPatch(HLoadClass* load_class);
+  void RecordBootStringPatch(HLoadString* load_string);
   Label* NewStringBssEntryPatch(HLoadString* load_string);
   Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset);
   Label* NewJitRootStringPatch(const DexFile& dex_file,
@@ -603,22 +604,23 @@
 
   // PC-relative DexCache access info.
   ArenaDeque<PatchInfo<Label>> pc_relative_dex_cache_patches_;
-  // String patch locations; type depends on configuration (app .bss or boot image).
-  ArenaDeque<PatchInfo<Label>> string_patches_;
+  // PC-relative method patch info for kBootImageLinkTimePcRelative.
+  ArenaDeque<PatchInfo<Label>> boot_image_method_patches_;
   // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PatchInfo<Label>> boot_image_type_patches_;
   // Type patch locations for kBssEntry.
   ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_;
-
-  // Fixups for jump tables need to be handled specially.
-  ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
+  // String patch locations; type depends on configuration (app .bss or boot image).
+  ArenaDeque<PatchInfo<Label>> string_patches_;
 
   // Patches for string literals in JIT compiled code.
   ArenaDeque<PatchInfo<Label>> jit_string_patches_;
-
   // Patches for class literals in JIT compiled code.
   ArenaDeque<PatchInfo<Label>> jit_class_patches_;
 
+  // Fixups for jump tables need to be handled specially.
+  ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
 };
 
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 8674e72..4284c68 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -578,12 +578,12 @@
     return kInlineCacheNoData;
   }
 
-  ProfileCompilationInfo::OfflineProfileMethodInfo offline_profile;
-  bool found = pci->GetMethod(caller_dex_file.GetLocation(),
-                              caller_dex_file.GetLocationChecksum(),
-                              caller_compilation_unit_.GetDexMethodIndex(),
-                              &offline_profile);
-  if (!found) {
+  // Use the profile arena when extracting the method info.
+  std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> offline_profile =
+      pci->GetMethod(caller_dex_file.GetLocation(),
+                     caller_dex_file.GetLocationChecksum(),
+                     caller_compilation_unit_.GetDexMethodIndex());
+  if (offline_profile == nullptr) {
     return kInlineCacheNoData;  // no profile information for this invocation.
   }
 
@@ -593,7 +593,7 @@
     return kInlineCacheNoData;
   } else {
     return ExtractClassesFromOfflineProfile(invoke_instruction,
-                                            offline_profile,
+                                            *(offline_profile.get()),
                                             *inline_cache);
   }
 }
@@ -1856,7 +1856,7 @@
   HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner");
   HConstantFolding fold(callee_graph, "constant_folding$inliner");
   HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_, handles_);
-  InstructionSimplifier simplify(callee_graph, codegen_, inline_stats_);
+  InstructionSimplifier simplify(callee_graph, codegen_, compiler_driver_, inline_stats_);
   IntrinsicsRecognizer intrinsics(callee_graph, inline_stats_);
 
   HOptimization* optimizations[] = {
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 40fafb0..df9e716 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -1000,8 +1000,8 @@
 
 void HInstructionBuilder::BuildConstructorFenceForAllocation(HInstruction* allocation) {
   DCHECK(allocation != nullptr &&
-             allocation->IsNewInstance() ||
-             allocation->IsNewArray());  // corresponding to "new" keyword in JLS.
+             (allocation->IsNewInstance() ||
+              allocation->IsNewArray()));  // corresponding to "new" keyword in JLS.
 
   if (allocation->IsNewInstance()) {
     // STRING SPECIAL HANDLING:
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 2cedde9..d147166 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -30,9 +30,11 @@
  public:
   InstructionSimplifierVisitor(HGraph* graph,
                                CodeGenerator* codegen,
+                               CompilerDriver* compiler_driver,
                                OptimizingCompilerStats* stats)
       : HGraphDelegateVisitor(graph),
         codegen_(codegen),
+        compiler_driver_(compiler_driver),
         stats_(stats) {}
 
   void Run();
@@ -119,6 +121,7 @@
   void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind);
 
   CodeGenerator* codegen_;
+  CompilerDriver* compiler_driver_;
   OptimizingCompilerStats* stats_;
   bool simplification_occurred_ = false;
   int simplifications_at_current_position_ = 0;
@@ -130,7 +133,7 @@
 };
 
 void InstructionSimplifier::Run() {
-  InstructionSimplifierVisitor visitor(graph_, codegen_, stats_);
+  InstructionSimplifierVisitor visitor(graph_, codegen_, compiler_driver_, stats_);
   visitor.Run();
 }
 
@@ -1896,7 +1899,7 @@
       // the invoke, as we would need to look it up in the current dex file, and it
       // is unlikely that it exists. The most usual situation for such typed
       // arraycopy methods is a direct pointer to the boot image.
-      HSharpening::SharpenInvokeStaticOrDirect(invoke, codegen_);
+      HSharpening::SharpenInvokeStaticOrDirect(invoke, codegen_, compiler_driver_);
     }
   }
 }
diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h
index f7329a4..5e20455 100644
--- a/compiler/optimizing/instruction_simplifier.h
+++ b/compiler/optimizing/instruction_simplifier.h
@@ -24,6 +24,7 @@
 namespace art {
 
 class CodeGenerator;
+class CompilerDriver;
 
 /**
  * Implements optimizations specific to each instruction.
@@ -37,12 +38,14 @@
  */
 class InstructionSimplifier : public HOptimization {
  public:
-  explicit InstructionSimplifier(HGraph* graph,
-                                 CodeGenerator* codegen,
-                                 OptimizingCompilerStats* stats = nullptr,
-                                 const char* name = kInstructionSimplifierPassName)
+  InstructionSimplifier(HGraph* graph,
+                        CodeGenerator* codegen,
+                        CompilerDriver* compiler_driver,
+                        OptimizingCompilerStats* stats = nullptr,
+                        const char* name = kInstructionSimplifierPassName)
       : HOptimization(graph, name, stats),
-        codegen_(codegen) {}
+        codegen_(codegen),
+        compiler_driver_(compiler_driver) {}
 
   static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier";
 
@@ -50,6 +53,7 @@
 
  private:
   CodeGenerator* codegen_;
+  CompilerDriver* compiler_driver_;
 
   DISALLOW_COPY_AND_ASSIGN(InstructionSimplifier);
 };
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 1df884e..9803c9a 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -2598,11 +2598,7 @@
   // We don't care about the sign bit, so shift left.
   __ Lsl(out, out, 1);
   __ eor(out, out, ShifterOperand(infinity));
-  // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
-  __ clz(out, out);
-  // Any number less than 32 logically shifted right by 5 bits results in 0;
-  // the same operation on 32 yields 1.
-  __ Lsr(out, out, 5);
+  codegen_->GenerateConditionWithZero(kCondEQ, out, out);
 }
 
 void IntrinsicLocationsBuilderARM::VisitDoubleIsInfinite(HInvoke* invoke) {
@@ -2625,11 +2621,7 @@
   __ eor(out, out, ShifterOperand(infinity_high2));
   // We don't care about the sign bit, so shift left.
   __ orr(out, IP, ShifterOperand(out, LSL, 1));
-  // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
-  __ clz(out, out);
-  // Any number less than 32 logically shifted right by 5 bits results in 0;
-  // the same operation on 32 yields 1.
-  __ Lsr(out, out, 5);
+  codegen_->GenerateConditionWithZero(kCondEQ, out, out);
 }
 
 void IntrinsicLocationsBuilderARM::VisitReferenceGetReferent(HInvoke* invoke) {
@@ -2766,12 +2758,15 @@
   int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value();
   __ LoadFromOffset(kLoadWord, out, TR, offset);
   Label done;
-  __ CompareAndBranchIfZero(out, &done);
+  Label* const final_label = codegen_->GetFinalLabel(invoke, &done);
+  __ CompareAndBranchIfZero(out, final_label);
   __ dmb(ISH);
   __ LoadImmediate(IP, 0);
   __ StoreToOffset(kStoreWord, IP, TR, offset);
   __ dmb(ISH);
-  __ Bind(&done);
+  if (done.IsLinked()) {
+    __ Bind(&done);
+  }
 }
 
 UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble)
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 2d9781a..1a33b0e 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -2971,11 +2971,7 @@
   // We don't care about the sign bit, so shift left.
   __ Lsl(out, out, 1);
   __ Eor(out, out, infinity);
-  // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
-  __ Clz(out, out);
-  // Any number less than 32 logically shifted right by 5 bits results in 0;
-  // the same operation on 32 yields 1.
-  __ Lsr(out, out, 5);
+  codegen_->GenerateConditionWithZero(kCondEQ, out, out);
 }
 
 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
@@ -3001,11 +2997,7 @@
   __ Eor(out, out, infinity_high2);
   // We don't care about the sign bit, so shift left.
   __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
-  // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
-  __ Clz(out, out);
-  // Any number less than 32 logically shifted right by 5 bits results in 0;
-  // the same operation on 32 yields 1.
-  __ Lsr(out, out, 5);
+  codegen_->GenerateConditionWithZero(kCondEQ, out, out);
 }
 
 void IntrinsicLocationsBuilderARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
@@ -3135,7 +3127,7 @@
     __ Add(out, in, -info.low);
     __ Cmp(out, info.high - info.low + 1);
     vixl32::Label allocate, done;
-    __ B(hs, &allocate);
+    __ B(hs, &allocate, /* is_far_target */ false);
     // If the value is within the bounds, load the j.l.Integer directly from the array.
     uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
     uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
@@ -3172,12 +3164,15 @@
   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
   vixl32::Register temp = temps.Acquire();
   vixl32::Label done;
-  __ CompareAndBranchIfZero(out, &done, /* far_target */ false);
+  vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done);
+  __ CompareAndBranchIfZero(out, final_label, /* far_target */ false);
   __ Dmb(vixl32::ISH);
   __ Mov(temp, 0);
   assembler->StoreToOffset(kStoreWord, temp, tr, offset);
   __ Dmb(vixl32::ISH);
-  __ Bind(&done);
+  if (done.IsReferenced()) {
+    __ Bind(&done);
+  }
 }
 
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble)   // Could be done by changing rounding mode, maybe?
diff --git a/compiler/optimizing/load_store_analysis.cc b/compiler/optimizing/load_store_analysis.cc
new file mode 100644
index 0000000..f2ee345
--- /dev/null
+++ b/compiler/optimizing/load_store_analysis.cc
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "load_store_analysis.h"
+
+namespace art {
+
+// A cap for the number of heap locations to prevent pathological time/space consumption.
+// The number of heap locations for most of the methods stays below this threshold.
+constexpr size_t kMaxNumberOfHeapLocations = 32;
+
+void LoadStoreAnalysis::Run() {
+  for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+    heap_location_collector_.VisitBasicBlock(block);
+  }
+
+  if (heap_location_collector_.GetNumberOfHeapLocations() > kMaxNumberOfHeapLocations) {
+    // Bail out if there are too many heap locations to deal with.
+    heap_location_collector_.CleanUp();
+    return;
+  }
+  if (!heap_location_collector_.HasHeapStores()) {
+    // Without heap stores, this pass would act mostly as GVN on heap accesses.
+    heap_location_collector_.CleanUp();
+    return;
+  }
+  if (heap_location_collector_.HasVolatile() || heap_location_collector_.HasMonitorOps()) {
+    // Don't do load/store elimination if the method has volatile field accesses or
+    // monitor operations, for now.
+    // TODO: do it right.
+    heap_location_collector_.CleanUp();
+    return;
+  }
+
+  heap_location_collector_.BuildAliasingMatrix();
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h
new file mode 100644
index 0000000..4e940f3
--- /dev/null
+++ b/compiler/optimizing/load_store_analysis.h
@@ -0,0 +1,518 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_
+#define ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_
+
+#include "escape.h"
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+// A ReferenceInfo contains additional info about a reference such as
+// whether it's a singleton, returned, etc.
+class ReferenceInfo : public ArenaObject<kArenaAllocMisc> {
+ public:
+  ReferenceInfo(HInstruction* reference, size_t pos)
+      : reference_(reference),
+        position_(pos),
+        is_singleton_(true),
+        is_singleton_and_not_returned_(true),
+        is_singleton_and_not_deopt_visible_(true),
+        has_index_aliasing_(false) {
+    CalculateEscape(reference_,
+                    nullptr,
+                    &is_singleton_,
+                    &is_singleton_and_not_returned_,
+                    &is_singleton_and_not_deopt_visible_);
+  }
+
+  HInstruction* GetReference() const {
+    return reference_;
+  }
+
+  size_t GetPosition() const {
+    return position_;
+  }
+
+  // Returns true if reference_ is the only name that can refer to its value during
+  // the lifetime of the method. So it's guaranteed to not have any alias in
+  // the method (including its callees).
+  bool IsSingleton() const {
+    return is_singleton_;
+  }
+
+  // Returns true if reference_ is a singleton and not returned to the caller or
+  // used as an environment local of an HDeoptimize instruction.
+  // The allocation and stores into reference_ may be eliminated for such cases.
+  bool IsSingletonAndRemovable() const {
+    return is_singleton_and_not_returned_ && is_singleton_and_not_deopt_visible_;
+  }
+
+  // Returns true if reference_ is a singleton and returned to the caller or
+  // used as an environment local of an HDeoptimize instruction.
+  bool IsSingletonAndNonRemovable() const {
+    return is_singleton_ &&
+           (!is_singleton_and_not_returned_ || !is_singleton_and_not_deopt_visible_);
+  }
+
+  bool HasIndexAliasing() {
+    return has_index_aliasing_;
+  }
+
+  void SetHasIndexAliasing(bool has_index_aliasing) {
+    // Only allow setting to true.
+    DCHECK(has_index_aliasing);
+    has_index_aliasing_ = has_index_aliasing;
+  }
+
+ private:
+  HInstruction* const reference_;
+  const size_t position_;  // position in HeapLocationCollector's ref_info_array_.
+
+  // Can only be referred to by a single name in the method.
+  bool is_singleton_;
+  // Is singleton and not returned to caller.
+  bool is_singleton_and_not_returned_;
+  // Is singleton and not used as an environment local of HDeoptimize.
+  bool is_singleton_and_not_deopt_visible_;
+  // Some heap locations with reference_ have array index aliasing,
+  // e.g. arr[i] and arr[j] may be the same location.
+  bool has_index_aliasing_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReferenceInfo);
+};
+
+// A heap location is a reference-offset/index pair that a value can be loaded from
+// or stored to.
+class HeapLocation : public ArenaObject<kArenaAllocMisc> {
+ public:
+  static constexpr size_t kInvalidFieldOffset = -1;
+
+  // TODO: more fine-grained array types.
+  static constexpr int16_t kDeclaringClassDefIndexForArrays = -1;
+
+  HeapLocation(ReferenceInfo* ref_info,
+               size_t offset,
+               HInstruction* index,
+               int16_t declaring_class_def_index)
+      : ref_info_(ref_info),
+        offset_(offset),
+        index_(index),
+        declaring_class_def_index_(declaring_class_def_index),
+        value_killed_by_loop_side_effects_(true) {
+    DCHECK(ref_info != nullptr);
+    DCHECK((offset == kInvalidFieldOffset && index != nullptr) ||
+           (offset != kInvalidFieldOffset && index == nullptr));
+    if (ref_info->IsSingleton() && !IsArrayElement()) {
+      // Assume this location's value cannot be killed by loop side effects
+      // until proven otherwise.
+      value_killed_by_loop_side_effects_ = false;
+    }
+  }
+
+  ReferenceInfo* GetReferenceInfo() const { return ref_info_; }
+  size_t GetOffset() const { return offset_; }
+  HInstruction* GetIndex() const { return index_; }
+
+  // Returns the definition of declaring class' dex index.
+  // It's kDeclaringClassDefIndexForArrays for an array element.
+  int16_t GetDeclaringClassDefIndex() const {
+    return declaring_class_def_index_;
+  }
+
+  bool IsArrayElement() const {
+    return index_ != nullptr;
+  }
+
+  bool IsValueKilledByLoopSideEffects() const {
+    return value_killed_by_loop_side_effects_;
+  }
+
+  void SetValueKilledByLoopSideEffects(bool val) {
+    value_killed_by_loop_side_effects_ = val;
+  }
+
+ private:
+  ReferenceInfo* const ref_info_;      // reference for instance/static field or array access.
+  const size_t offset_;                // offset of static/instance field.
+  HInstruction* const index_;          // index of an array element.
+  const int16_t declaring_class_def_index_;  // declaring class's def's dex index.
+  bool value_killed_by_loop_side_effects_;   // value of this location may be killed by loop
+                                             // side effects because this location is stored
+                                             // into inside a loop. This gives
+                                             // better info on whether a singleton's location
+                                             // value may be killed by loop side effects.
+
+  DISALLOW_COPY_AND_ASSIGN(HeapLocation);
+};
+
+// A HeapLocationCollector collects all relevant heap locations and keeps
+// an aliasing matrix for all locations.
+class HeapLocationCollector : public HGraphVisitor {
+ public:
+  static constexpr size_t kHeapLocationNotFound = -1;
+  // Start with a single uint32_t word. That's enough bits for pair-wise
+  // aliasing matrix of 8 heap locations.
+  static constexpr uint32_t kInitialAliasingMatrixBitVectorSize = 32;
+
+  explicit HeapLocationCollector(HGraph* graph)
+      : HGraphVisitor(graph),
+        ref_info_array_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+        heap_locations_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+        aliasing_matrix_(graph->GetArena(),
+                         kInitialAliasingMatrixBitVectorSize,
+                         true,
+                         kArenaAllocLSE),
+        has_heap_stores_(false),
+        has_volatile_(false),
+        has_monitor_operations_(false) {}
+
+  void CleanUp() {
+    heap_locations_.clear();
+    ref_info_array_.clear();
+  }
+
+  size_t GetNumberOfHeapLocations() const {
+    return heap_locations_.size();
+  }
+
+  HeapLocation* GetHeapLocation(size_t index) const {
+    return heap_locations_[index];
+  }
+
+  HInstruction* HuntForOriginalReference(HInstruction* ref) const {
+    DCHECK(ref != nullptr);
+    while (ref->IsNullCheck() || ref->IsBoundType()) {
+      ref = ref->InputAt(0);
+    }
+    return ref;
+  }
+
+  ReferenceInfo* FindReferenceInfoOf(HInstruction* ref) const {
+    for (size_t i = 0; i < ref_info_array_.size(); i++) {
+      ReferenceInfo* ref_info = ref_info_array_[i];
+      if (ref_info->GetReference() == ref) {
+        DCHECK_EQ(i, ref_info->GetPosition());
+        return ref_info;
+      }
+    }
+    return nullptr;
+  }
+
+  bool HasHeapStores() const {
+    return has_heap_stores_;
+  }
+
+  bool HasVolatile() const {
+    return has_volatile_;
+  }
+
+  bool HasMonitorOps() const {
+    return has_monitor_operations_;
+  }
+
+  // Find and return the heap location index in heap_locations_.
+  size_t FindHeapLocationIndex(ReferenceInfo* ref_info,
+                               size_t offset,
+                               HInstruction* index,
+                               int16_t declaring_class_def_index) const {
+    for (size_t i = 0; i < heap_locations_.size(); i++) {
+      HeapLocation* loc = heap_locations_[i];
+      if (loc->GetReferenceInfo() == ref_info &&
+          loc->GetOffset() == offset &&
+          loc->GetIndex() == index &&
+          loc->GetDeclaringClassDefIndex() == declaring_class_def_index) {
+        return i;
+      }
+    }
+    return kHeapLocationNotFound;
+  }
+
+  // Returns true if heap_locations_[index1] and heap_locations_[index2] may alias.
+  bool MayAlias(size_t index1, size_t index2) const {
+    if (index1 < index2) {
+      return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index1, index2));
+    } else if (index1 > index2) {
+      return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index2, index1));
+    } else {
+      DCHECK(false) << "index1 and index2 are expected to be different";
+      return true;
+    }
+  }
+
+  void BuildAliasingMatrix() {
+    const size_t number_of_locations = heap_locations_.size();
+    if (number_of_locations == 0) {
+      return;
+    }
+    size_t pos = 0;
+    // Compute aliasing info between every pair of different heap locations.
+    // Save the result in a matrix represented as a BitVector.
+    for (size_t i = 0; i < number_of_locations - 1; i++) {
+      for (size_t j = i + 1; j < number_of_locations; j++) {
+        if (ComputeMayAlias(i, j)) {
+          aliasing_matrix_.SetBit(CheckedAliasingMatrixPosition(i, j, pos));
+        }
+        pos++;
+      }
+    }
+  }
+
+ private:
+  // An allocation cannot alias with a name which already exists at the point
+  // of the allocation, such as a parameter or a load happening before the allocation.
+  bool MayAliasWithPreexistenceChecking(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const {
+    if (ref_info1->GetReference()->IsNewInstance() || ref_info1->GetReference()->IsNewArray()) {
+      // Any reference that can alias with the allocation must appear after it in the block/in
+      // the block's successors. In reverse post order, those instructions will be visited after
+      // the allocation.
+      return ref_info2->GetPosition() >= ref_info1->GetPosition();
+    }
+    return true;
+  }
+
+  bool CanReferencesAlias(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const {
+    if (ref_info1 == ref_info2) {
+      return true;
+    } else if (ref_info1->IsSingleton()) {
+      return false;
+    } else if (ref_info2->IsSingleton()) {
+      return false;
+    } else if (!MayAliasWithPreexistenceChecking(ref_info1, ref_info2) ||
+        !MayAliasWithPreexistenceChecking(ref_info2, ref_info1)) {
+      return false;
+    }
+    return true;
+  }
+
+  // `index1` and `index2` are indices in the array of collected heap locations.
+  // Returns the position in the bit vector that tracks whether the two heap
+  // locations may alias.
+  size_t AliasingMatrixPosition(size_t index1, size_t index2) const {
+    DCHECK(index2 > index1);
+    const size_t number_of_locations = heap_locations_.size();
+    // It's (num_of_locations - 1) + ... + (num_of_locations - index1) + (index2 - index1 - 1).
+    return (number_of_locations * index1 - (1 + index1) * index1 / 2 + (index2 - index1 - 1));
+  }
+
+  // An additional position is passed in to make sure the calculated position is correct.
+  size_t CheckedAliasingMatrixPosition(size_t index1, size_t index2, size_t position) {
+    size_t calculated_position = AliasingMatrixPosition(index1, index2);
+    DCHECK_EQ(calculated_position, position);
+    return calculated_position;
+  }
+
+  // Compute if two locations may alias to each other.
+  bool ComputeMayAlias(size_t index1, size_t index2) const {
+    HeapLocation* loc1 = heap_locations_[index1];
+    HeapLocation* loc2 = heap_locations_[index2];
+    if (loc1->GetOffset() != loc2->GetOffset()) {
+      // Either two different instance fields, or one is an instance
+      // field and the other is an array element.
+      return false;
+    }
+    if (loc1->GetDeclaringClassDefIndex() != loc2->GetDeclaringClassDefIndex()) {
+      // Different types.
+      return false;
+    }
+    if (!CanReferencesAlias(loc1->GetReferenceInfo(), loc2->GetReferenceInfo())) {
+      return false;
+    }
+    if (loc1->IsArrayElement() && loc2->IsArrayElement()) {
+      HInstruction* array_index1 = loc1->GetIndex();
+      HInstruction* array_index2 = loc2->GetIndex();
+      DCHECK(array_index1 != nullptr);
+      DCHECK(array_index2 != nullptr);
+      if (array_index1->IsIntConstant() &&
+          array_index2->IsIntConstant() &&
+          array_index1->AsIntConstant()->GetValue() != array_index2->AsIntConstant()->GetValue()) {
+        // Different constant indices do not alias.
+        return false;
+      }
+      ReferenceInfo* ref_info = loc1->GetReferenceInfo();
+      ref_info->SetHasIndexAliasing(true);
+    }
+    return true;
+  }
+
+  ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* instruction) {
+    ReferenceInfo* ref_info = FindReferenceInfoOf(instruction);
+    if (ref_info == nullptr) {
+      size_t pos = ref_info_array_.size();
+      ref_info = new (GetGraph()->GetArena()) ReferenceInfo(instruction, pos);
+      ref_info_array_.push_back(ref_info);
+    }
+    return ref_info;
+  }
+
+  void CreateReferenceInfoForReferenceType(HInstruction* instruction) {
+    if (instruction->GetType() != Primitive::kPrimNot) {
+      return;
+    }
+    DCHECK(FindReferenceInfoOf(instruction) == nullptr);
+    GetOrCreateReferenceInfo(instruction);
+  }
+
+  HeapLocation* GetOrCreateHeapLocation(HInstruction* ref,
+                                        size_t offset,
+                                        HInstruction* index,
+                                        int16_t declaring_class_def_index) {
+    HInstruction* original_ref = HuntForOriginalReference(ref);
+    ReferenceInfo* ref_info = GetOrCreateReferenceInfo(original_ref);
+    size_t heap_location_idx = FindHeapLocationIndex(
+        ref_info, offset, index, declaring_class_def_index);
+    if (heap_location_idx == kHeapLocationNotFound) {
+      HeapLocation* heap_loc = new (GetGraph()->GetArena())
+          HeapLocation(ref_info, offset, index, declaring_class_def_index);
+      heap_locations_.push_back(heap_loc);
+      return heap_loc;
+    }
+    return heap_locations_[heap_location_idx];
+  }
+
+  HeapLocation* VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) {
+    if (field_info.IsVolatile()) {
+      has_volatile_ = true;
+    }
+    const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex();
+    const size_t offset = field_info.GetFieldOffset().SizeValue();
+    return GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index);
+  }
+
+  void VisitArrayAccess(HInstruction* array, HInstruction* index) {
+    GetOrCreateHeapLocation(array, HeapLocation::kInvalidFieldOffset,
+        index, HeapLocation::kDeclaringClassDefIndexForArrays);
+  }
+
+  void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE {
+    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
+    HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
+    has_heap_stores_ = true;
+    if (location->GetReferenceInfo()->IsSingleton()) {
+      // A singleton's location value may be killed by loop side effects if it's
+      // defined before that loop, and it's stored into inside that loop.
+      HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation();
+      if (loop_info != nullptr) {
+        HInstruction* ref = location->GetReferenceInfo()->GetReference();
+        DCHECK(ref->IsNewInstance());
+        if (loop_info->IsDefinedOutOfTheLoop(ref)) {
+          // ref's location value may be killed by this loop's side effects.
+          location->SetValueKilledByLoopSideEffects(true);
+        } else {
+          // ref is defined inside this loop so this loop's side effects cannot
+          // kill its location value at the loop header since ref/its location doesn't
+          // exist yet at the loop header.
+        }
+      }
+    } else {
+      // For non-singletons, value_killed_by_loop_side_effects_ is inited to
+      // true.
+      DCHECK_EQ(location->IsValueKilledByLoopSideEffects(), true);
+    }
+  }
+
+  void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE {
+    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE {
+    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
+    has_heap_stores_ = true;
+  }
+
+  // We intentionally don't collect HUnresolvedInstanceField/HUnresolvedStaticField accesses
+  // since we cannot accurately track the fields.
+
+  void VisitArrayGet(HArrayGet* instruction) OVERRIDE {
+    VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1));
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitArraySet(HArraySet* instruction) OVERRIDE {
+    VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1));
+    has_heap_stores_ = true;
+  }
+
+  void VisitNewInstance(HNewInstance* new_instance) OVERRIDE {
+    // Any references appearing in the ref_info_array_ so far cannot alias with new_instance.
+    CreateReferenceInfoForReferenceType(new_instance);
+  }
+
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitInvokeVirtual(HInvokeVirtual* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitInvokeInterface(HInvokeInterface* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitParameterValue(HParameterValue* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitSelect(HSelect* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) OVERRIDE {
+    has_monitor_operations_ = true;
+  }
+
+  ArenaVector<ReferenceInfo*> ref_info_array_;   // All references used for heap accesses.
+  ArenaVector<HeapLocation*> heap_locations_;    // All heap locations.
+  ArenaBitVector aliasing_matrix_;    // aliasing info between each pair of locations.
+  bool has_heap_stores_;    // If there is no heap stores, LSE acts as GVN with better
+                            // alias analysis and won't be as effective.
+  bool has_volatile_;       // If there are volatile field accesses.
+  bool has_monitor_operations_;    // If there are monitor operations.
+
+  DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector);
+};
+
+class LoadStoreAnalysis : public HOptimization {
+ public:
+  explicit LoadStoreAnalysis(HGraph* graph)
+    : HOptimization(graph, kLoadStoreAnalysisPassName),
+      heap_location_collector_(graph) {}
+
+  const HeapLocationCollector& GetHeapLocationCollector() const {
+    return heap_location_collector_;
+  }
+
+  void Run() OVERRIDE;
+
+  static constexpr const char* kLoadStoreAnalysisPassName = "load_store_analysis";
+
+ private:
+  HeapLocationCollector heap_location_collector_;
+
+  DISALLOW_COPY_AND_ASSIGN(LoadStoreAnalysis);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_
diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc
new file mode 100644
index 0000000..2418777
--- /dev/null
+++ b/compiler/optimizing/load_store_analysis_test.cc
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "load_store_analysis.h"
+#include "nodes.h"
+#include "optimizing_unit_test.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+class LoadStoreAnalysisTest : public CommonCompilerTest {
+ public:
+  LoadStoreAnalysisTest() : pool_(), allocator_(&pool_) {
+    graph_ = CreateGraph(&allocator_);
+  }
+
+  ArenaPool pool_;
+  ArenaAllocator allocator_;
+  HGraph* graph_;
+};
+
+TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) {
+  HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(entry);
+  graph_->SetEntryBlock(entry);
+
+  // entry:
+  // array         ParameterValue
+  // index         ParameterValue
+  // c1            IntConstant
+  // c2            IntConstant
+  // c3            IntConstant
+  // array_get1    ArrayGet [array, c1]
+  // array_get2    ArrayGet [array, c2]
+  // array_set1    ArraySet [array, c1, c3]
+  // array_set2    ArraySet [array, index, c3]
+  HInstruction* array = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);
+  HInstruction* index = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(1), 1, Primitive::kPrimInt);
+  HInstruction* c1 = graph_->GetIntConstant(1);
+  HInstruction* c2 = graph_->GetIntConstant(2);
+  HInstruction* c3 = graph_->GetIntConstant(3);
+  HInstruction* array_get1 = new (&allocator_) HArrayGet(array, c1, Primitive::kPrimInt, 0);
+  HInstruction* array_get2 = new (&allocator_) HArrayGet(array, c2, Primitive::kPrimInt, 0);
+  HInstruction* array_set1 = new (&allocator_) HArraySet(array, c1, c3, Primitive::kPrimInt, 0);
+  HInstruction* array_set2 = new (&allocator_) HArraySet(array, index, c3, Primitive::kPrimInt, 0);
+  entry->AddInstruction(array);
+  entry->AddInstruction(index);
+  entry->AddInstruction(array_get1);
+  entry->AddInstruction(array_get2);
+  entry->AddInstruction(array_set1);
+  entry->AddInstruction(array_set2);
+
+  // Test HeapLocationCollector initialization.
+  // Should be no heap locations, no operations on the heap.
+  HeapLocationCollector heap_location_collector(graph_);
+  ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 0U);
+  ASSERT_FALSE(heap_location_collector.HasHeapStores());
+
+  // Test that after visiting the graph_, it must see following heap locations
+  // array[c1], array[c2], array[index]; and it should see heap stores.
+  heap_location_collector.VisitBasicBlock(entry);
+  ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 3U);
+  ASSERT_TRUE(heap_location_collector.HasHeapStores());
+
+  // Test queries on HeapLocationCollector's ref info and index records.
+  ReferenceInfo* ref = heap_location_collector.FindReferenceInfoOf(array);
+  size_t field_off = HeapLocation::kInvalidFieldOffset;
+  size_t class_def = HeapLocation::kDeclaringClassDefIndexForArrays;
+  size_t loc1 = heap_location_collector.FindHeapLocationIndex(ref, field_off, c1, class_def);
+  size_t loc2 = heap_location_collector.FindHeapLocationIndex(ref, field_off, c2, class_def);
+  size_t loc3 = heap_location_collector.FindHeapLocationIndex(ref, field_off, index, class_def);
+  // must find this reference info for array in HeapLocationCollector.
+  ASSERT_TRUE(ref != nullptr);
+  // must find these heap locations;
+  // and array[1], array[2], array[3] should be different heap locations.
+  ASSERT_TRUE(loc1 != HeapLocationCollector::kHeapLocationNotFound);
+  ASSERT_TRUE(loc2 != HeapLocationCollector::kHeapLocationNotFound);
+  ASSERT_TRUE(loc3 != HeapLocationCollector::kHeapLocationNotFound);
+  ASSERT_TRUE(loc1 != loc2);
+  ASSERT_TRUE(loc2 != loc3);
+  ASSERT_TRUE(loc1 != loc3);
+
+  // Test alias relationships after building aliasing matrix.
+  // array[1] and array[2] clearly should not alias;
+  // array[index] should alias with the others, because index is an unknow value.
+  heap_location_collector.BuildAliasingMatrix();
+  ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
+  ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc3));
+  ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc3));
+}
+
+TEST_F(LoadStoreAnalysisTest, FieldHeapLocations) {
+  HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(entry);
+  graph_->SetEntryBlock(entry);
+
+  // entry:
+  // object              ParameterValue
+  // c1                  IntConstant
+  // set_field10         InstanceFieldSet [object, c1, 10]
+  // get_field10         InstanceFieldGet [object, 10]
+  // get_field20         InstanceFieldGet [object, 20]
+
+  HInstruction* c1 = graph_->GetIntConstant(1);
+  HInstruction* object = new (&allocator_) HParameterValue(graph_->GetDexFile(),
+                                                           dex::TypeIndex(0),
+                                                           0,
+                                                           Primitive::kPrimNot);
+  HInstanceFieldSet* set_field10 = new (&allocator_) HInstanceFieldSet(object,
+                                                                       c1,
+                                                                       nullptr,
+                                                                       Primitive::kPrimInt,
+                                                                       MemberOffset(10),
+                                                                       false,
+                                                                       kUnknownFieldIndex,
+                                                                       kUnknownClassDefIndex,
+                                                                       graph_->GetDexFile(),
+                                                                       0);
+  HInstanceFieldGet* get_field10 = new (&allocator_) HInstanceFieldGet(object,
+                                                                       nullptr,
+                                                                       Primitive::kPrimInt,
+                                                                       MemberOffset(10),
+                                                                       false,
+                                                                       kUnknownFieldIndex,
+                                                                       kUnknownClassDefIndex,
+                                                                       graph_->GetDexFile(),
+                                                                       0);
+  HInstanceFieldGet* get_field20 = new (&allocator_) HInstanceFieldGet(object,
+                                                                       nullptr,
+                                                                       Primitive::kPrimInt,
+                                                                       MemberOffset(20),
+                                                                       false,
+                                                                       kUnknownFieldIndex,
+                                                                       kUnknownClassDefIndex,
+                                                                       graph_->GetDexFile(),
+                                                                       0);
+  entry->AddInstruction(object);
+  entry->AddInstruction(set_field10);
+  entry->AddInstruction(get_field10);
+  entry->AddInstruction(get_field20);
+
+  // Test HeapLocationCollector initialization.
+  // Should be no heap locations, no operations on the heap.
+  HeapLocationCollector heap_location_collector(graph_);
+  ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 0U);
+  ASSERT_FALSE(heap_location_collector.HasHeapStores());
+
+  // Test that after visiting the graph, it must see following heap locations
+  // object.field10, object.field20 and it should see heap stores.
+  heap_location_collector.VisitBasicBlock(entry);
+  ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 2U);
+  ASSERT_TRUE(heap_location_collector.HasHeapStores());
+
+  // Test queries on HeapLocationCollector's ref info and index records.
+  ReferenceInfo* ref = heap_location_collector.FindReferenceInfoOf(object);
+  size_t loc1 = heap_location_collector.FindHeapLocationIndex(
+      ref, 10, nullptr, kUnknownClassDefIndex);
+  size_t loc2 = heap_location_collector.FindHeapLocationIndex(
+      ref, 20, nullptr, kUnknownClassDefIndex);
+  // must find references info for object and in HeapLocationCollector.
+  ASSERT_TRUE(ref != nullptr);
+  // must find these heap locations.
+  ASSERT_TRUE(loc1 != HeapLocationCollector::kHeapLocationNotFound);
+  ASSERT_TRUE(loc2 != HeapLocationCollector::kHeapLocationNotFound);
+  // different fields of same object.
+  ASSERT_TRUE(loc1 != loc2);
+  // accesses to different fields of the same object should not alias.
+  ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 76c9d23..211528b 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "load_store_analysis.h"
 #include "load_store_elimination.h"
 
 #include "escape.h"
@@ -23,477 +24,6 @@
 
 namespace art {
 
-class ReferenceInfo;
-
-// A cap for the number of heap locations to prevent pathological time/space consumption.
-// The number of heap locations for most of the methods stays below this threshold.
-constexpr size_t kMaxNumberOfHeapLocations = 32;
-
-// A ReferenceInfo contains additional info about a reference such as
-// whether it's a singleton, returned, etc.
-class ReferenceInfo : public ArenaObject<kArenaAllocMisc> {
- public:
-  ReferenceInfo(HInstruction* reference, size_t pos)
-      : reference_(reference),
-        position_(pos),
-        is_singleton_(true),
-        is_singleton_and_not_returned_(true),
-        is_singleton_and_not_deopt_visible_(true),
-        has_index_aliasing_(false) {
-    CalculateEscape(reference_,
-                    nullptr,
-                    &is_singleton_,
-                    &is_singleton_and_not_returned_,
-                    &is_singleton_and_not_deopt_visible_);
-  }
-
-  HInstruction* GetReference() const {
-    return reference_;
-  }
-
-  size_t GetPosition() const {
-    return position_;
-  }
-
-  // Returns true if reference_ is the only name that can refer to its value during
-  // the lifetime of the method. So it's guaranteed to not have any alias in
-  // the method (including its callees).
-  bool IsSingleton() const {
-    return is_singleton_;
-  }
-
-  // Returns true if reference_ is a singleton and not returned to the caller or
-  // used as an environment local of an HDeoptimize instruction.
-  // The allocation and stores into reference_ may be eliminated for such cases.
-  bool IsSingletonAndRemovable() const {
-    return is_singleton_and_not_returned_ && is_singleton_and_not_deopt_visible_;
-  }
-
-  // Returns true if reference_ is a singleton and returned to the caller or
-  // used as an environment local of an HDeoptimize instruction.
-  bool IsSingletonAndNonRemovable() const {
-    return is_singleton_ &&
-           (!is_singleton_and_not_returned_ || !is_singleton_and_not_deopt_visible_);
-  }
-
-  bool HasIndexAliasing() {
-    return has_index_aliasing_;
-  }
-
-  void SetHasIndexAliasing(bool has_index_aliasing) {
-    // Only allow setting to true.
-    DCHECK(has_index_aliasing);
-    has_index_aliasing_ = has_index_aliasing;
-  }
-
- private:
-  HInstruction* const reference_;
-  const size_t position_;  // position in HeapLocationCollector's ref_info_array_.
-
-  // Can only be referred to by a single name in the method.
-  bool is_singleton_;
-  // Is singleton and not returned to caller.
-  bool is_singleton_and_not_returned_;
-  // Is singleton and not used as an environment local of HDeoptimize.
-  bool is_singleton_and_not_deopt_visible_;
-  // Some heap locations with reference_ have array index aliasing,
-  // e.g. arr[i] and arr[j] may be the same location.
-  bool has_index_aliasing_;
-
-  DISALLOW_COPY_AND_ASSIGN(ReferenceInfo);
-};
-
-// A heap location is a reference-offset/index pair that a value can be loaded from
-// or stored to.
-class HeapLocation : public ArenaObject<kArenaAllocMisc> {
- public:
-  static constexpr size_t kInvalidFieldOffset = -1;
-
-  // TODO: more fine-grained array types.
-  static constexpr int16_t kDeclaringClassDefIndexForArrays = -1;
-
-  HeapLocation(ReferenceInfo* ref_info,
-               size_t offset,
-               HInstruction* index,
-               int16_t declaring_class_def_index)
-      : ref_info_(ref_info),
-        offset_(offset),
-        index_(index),
-        declaring_class_def_index_(declaring_class_def_index),
-        value_killed_by_loop_side_effects_(true) {
-    DCHECK(ref_info != nullptr);
-    DCHECK((offset == kInvalidFieldOffset && index != nullptr) ||
-           (offset != kInvalidFieldOffset && index == nullptr));
-    if (ref_info->IsSingleton() && !IsArrayElement()) {
-      // Assume this location's value cannot be killed by loop side effects
-      // until proven otherwise.
-      value_killed_by_loop_side_effects_ = false;
-    }
-  }
-
-  ReferenceInfo* GetReferenceInfo() const { return ref_info_; }
-  size_t GetOffset() const { return offset_; }
-  HInstruction* GetIndex() const { return index_; }
-
-  // Returns the definition of declaring class' dex index.
-  // It's kDeclaringClassDefIndexForArrays for an array element.
-  int16_t GetDeclaringClassDefIndex() const {
-    return declaring_class_def_index_;
-  }
-
-  bool IsArrayElement() const {
-    return index_ != nullptr;
-  }
-
-  bool IsValueKilledByLoopSideEffects() const {
-    return value_killed_by_loop_side_effects_;
-  }
-
-  void SetValueKilledByLoopSideEffects(bool val) {
-    value_killed_by_loop_side_effects_ = val;
-  }
-
- private:
-  ReferenceInfo* const ref_info_;      // reference for instance/static field or array access.
-  const size_t offset_;                // offset of static/instance field.
-  HInstruction* const index_;          // index of an array element.
-  const int16_t declaring_class_def_index_;  // declaring class's def's dex index.
-  bool value_killed_by_loop_side_effects_;   // value of this location may be killed by loop
-                                             // side effects because this location is stored
-                                             // into inside a loop. This gives
-                                             // better info on whether a singleton's location
-                                             // value may be killed by loop side effects.
-
-  DISALLOW_COPY_AND_ASSIGN(HeapLocation);
-};
-
-static HInstruction* HuntForOriginalReference(HInstruction* ref) {
-  DCHECK(ref != nullptr);
-  while (ref->IsNullCheck() || ref->IsBoundType()) {
-    ref = ref->InputAt(0);
-  }
-  return ref;
-}
-
-// A HeapLocationCollector collects all relevant heap locations and keeps
-// an aliasing matrix for all locations.
-class HeapLocationCollector : public HGraphVisitor {
- public:
-  static constexpr size_t kHeapLocationNotFound = -1;
-  // Start with a single uint32_t word. That's enough bits for pair-wise
-  // aliasing matrix of 8 heap locations.
-  static constexpr uint32_t kInitialAliasingMatrixBitVectorSize = 32;
-
-  explicit HeapLocationCollector(HGraph* graph)
-      : HGraphVisitor(graph),
-        ref_info_array_(graph->GetArena()->Adapter(kArenaAllocLSE)),
-        heap_locations_(graph->GetArena()->Adapter(kArenaAllocLSE)),
-        aliasing_matrix_(graph->GetArena(),
-                         kInitialAliasingMatrixBitVectorSize,
-                         true,
-                         kArenaAllocLSE),
-        has_heap_stores_(false),
-        has_volatile_(false),
-        has_monitor_operations_(false) {}
-
-  size_t GetNumberOfHeapLocations() const {
-    return heap_locations_.size();
-  }
-
-  HeapLocation* GetHeapLocation(size_t index) const {
-    return heap_locations_[index];
-  }
-
-  ReferenceInfo* FindReferenceInfoOf(HInstruction* ref) const {
-    for (size_t i = 0; i < ref_info_array_.size(); i++) {
-      ReferenceInfo* ref_info = ref_info_array_[i];
-      if (ref_info->GetReference() == ref) {
-        DCHECK_EQ(i, ref_info->GetPosition());
-        return ref_info;
-      }
-    }
-    return nullptr;
-  }
-
-  bool HasHeapStores() const {
-    return has_heap_stores_;
-  }
-
-  bool HasVolatile() const {
-    return has_volatile_;
-  }
-
-  bool HasMonitorOps() const {
-    return has_monitor_operations_;
-  }
-
-  // Find and return the heap location index in heap_locations_.
-  size_t FindHeapLocationIndex(ReferenceInfo* ref_info,
-                               size_t offset,
-                               HInstruction* index,
-                               int16_t declaring_class_def_index) const {
-    for (size_t i = 0; i < heap_locations_.size(); i++) {
-      HeapLocation* loc = heap_locations_[i];
-      if (loc->GetReferenceInfo() == ref_info &&
-          loc->GetOffset() == offset &&
-          loc->GetIndex() == index &&
-          loc->GetDeclaringClassDefIndex() == declaring_class_def_index) {
-        return i;
-      }
-    }
-    return kHeapLocationNotFound;
-  }
-
-  // Returns true if heap_locations_[index1] and heap_locations_[index2] may alias.
-  bool MayAlias(size_t index1, size_t index2) const {
-    if (index1 < index2) {
-      return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index1, index2));
-    } else if (index1 > index2) {
-      return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index2, index1));
-    } else {
-      DCHECK(false) << "index1 and index2 are expected to be different";
-      return true;
-    }
-  }
-
-  void BuildAliasingMatrix() {
-    const size_t number_of_locations = heap_locations_.size();
-    if (number_of_locations == 0) {
-      return;
-    }
-    size_t pos = 0;
-    // Compute aliasing info between every pair of different heap locations.
-    // Save the result in a matrix represented as a BitVector.
-    for (size_t i = 0; i < number_of_locations - 1; i++) {
-      for (size_t j = i + 1; j < number_of_locations; j++) {
-        if (ComputeMayAlias(i, j)) {
-          aliasing_matrix_.SetBit(CheckedAliasingMatrixPosition(i, j, pos));
-        }
-        pos++;
-      }
-    }
-  }
-
- private:
-  // An allocation cannot alias with a name which already exists at the point
-  // of the allocation, such as a parameter or a load happening before the allocation.
-  bool MayAliasWithPreexistenceChecking(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const {
-    if (ref_info1->GetReference()->IsNewInstance() || ref_info1->GetReference()->IsNewArray()) {
-      // Any reference that can alias with the allocation must appear after it in the block/in
-      // the block's successors. In reverse post order, those instructions will be visited after
-      // the allocation.
-      return ref_info2->GetPosition() >= ref_info1->GetPosition();
-    }
-    return true;
-  }
-
-  bool CanReferencesAlias(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const {
-    if (ref_info1 == ref_info2) {
-      return true;
-    } else if (ref_info1->IsSingleton()) {
-      return false;
-    } else if (ref_info2->IsSingleton()) {
-      return false;
-    } else if (!MayAliasWithPreexistenceChecking(ref_info1, ref_info2) ||
-        !MayAliasWithPreexistenceChecking(ref_info2, ref_info1)) {
-      return false;
-    }
-    return true;
-  }
-
-  // `index1` and `index2` are indices in the array of collected heap locations.
-  // Returns the position in the bit vector that tracks whether the two heap
-  // locations may alias.
-  size_t AliasingMatrixPosition(size_t index1, size_t index2) const {
-    DCHECK(index2 > index1);
-    const size_t number_of_locations = heap_locations_.size();
-    // It's (num_of_locations - 1) + ... + (num_of_locations - index1) + (index2 - index1 - 1).
-    return (number_of_locations * index1 - (1 + index1) * index1 / 2 + (index2 - index1 - 1));
-  }
-
-  // An additional position is passed in to make sure the calculated position is correct.
-  size_t CheckedAliasingMatrixPosition(size_t index1, size_t index2, size_t position) {
-    size_t calculated_position = AliasingMatrixPosition(index1, index2);
-    DCHECK_EQ(calculated_position, position);
-    return calculated_position;
-  }
-
-  // Compute if two locations may alias to each other.
-  bool ComputeMayAlias(size_t index1, size_t index2) const {
-    HeapLocation* loc1 = heap_locations_[index1];
-    HeapLocation* loc2 = heap_locations_[index2];
-    if (loc1->GetOffset() != loc2->GetOffset()) {
-      // Either two different instance fields, or one is an instance
-      // field and the other is an array element.
-      return false;
-    }
-    if (loc1->GetDeclaringClassDefIndex() != loc2->GetDeclaringClassDefIndex()) {
-      // Different types.
-      return false;
-    }
-    if (!CanReferencesAlias(loc1->GetReferenceInfo(), loc2->GetReferenceInfo())) {
-      return false;
-    }
-    if (loc1->IsArrayElement() && loc2->IsArrayElement()) {
-      HInstruction* array_index1 = loc1->GetIndex();
-      HInstruction* array_index2 = loc2->GetIndex();
-      DCHECK(array_index1 != nullptr);
-      DCHECK(array_index2 != nullptr);
-      if (array_index1->IsIntConstant() &&
-          array_index2->IsIntConstant() &&
-          array_index1->AsIntConstant()->GetValue() != array_index2->AsIntConstant()->GetValue()) {
-        // Different constant indices do not alias.
-        return false;
-      }
-      ReferenceInfo* ref_info = loc1->GetReferenceInfo();
-      ref_info->SetHasIndexAliasing(true);
-    }
-    return true;
-  }
-
-  ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* instruction) {
-    ReferenceInfo* ref_info = FindReferenceInfoOf(instruction);
-    if (ref_info == nullptr) {
-      size_t pos = ref_info_array_.size();
-      ref_info = new (GetGraph()->GetArena()) ReferenceInfo(instruction, pos);
-      ref_info_array_.push_back(ref_info);
-    }
-    return ref_info;
-  }
-
-  void CreateReferenceInfoForReferenceType(HInstruction* instruction) {
-    if (instruction->GetType() != Primitive::kPrimNot) {
-      return;
-    }
-    DCHECK(FindReferenceInfoOf(instruction) == nullptr);
-    GetOrCreateReferenceInfo(instruction);
-  }
-
-  HeapLocation* GetOrCreateHeapLocation(HInstruction* ref,
-                                        size_t offset,
-                                        HInstruction* index,
-                                        int16_t declaring_class_def_index) {
-    HInstruction* original_ref = HuntForOriginalReference(ref);
-    ReferenceInfo* ref_info = GetOrCreateReferenceInfo(original_ref);
-    size_t heap_location_idx = FindHeapLocationIndex(
-        ref_info, offset, index, declaring_class_def_index);
-    if (heap_location_idx == kHeapLocationNotFound) {
-      HeapLocation* heap_loc = new (GetGraph()->GetArena())
-          HeapLocation(ref_info, offset, index, declaring_class_def_index);
-      heap_locations_.push_back(heap_loc);
-      return heap_loc;
-    }
-    return heap_locations_[heap_location_idx];
-  }
-
-  HeapLocation* VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) {
-    if (field_info.IsVolatile()) {
-      has_volatile_ = true;
-    }
-    const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex();
-    const size_t offset = field_info.GetFieldOffset().SizeValue();
-    return GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index);
-  }
-
-  void VisitArrayAccess(HInstruction* array, HInstruction* index) {
-    GetOrCreateHeapLocation(array, HeapLocation::kInvalidFieldOffset,
-        index, HeapLocation::kDeclaringClassDefIndexForArrays);
-  }
-
-  void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE {
-    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
-    CreateReferenceInfoForReferenceType(instruction);
-  }
-
-  void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
-    HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
-    has_heap_stores_ = true;
-    if (location->GetReferenceInfo()->IsSingleton()) {
-      // A singleton's location value may be killed by loop side effects if it's
-      // defined before that loop, and it's stored into inside that loop.
-      HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation();
-      if (loop_info != nullptr) {
-        HInstruction* ref = location->GetReferenceInfo()->GetReference();
-        DCHECK(ref->IsNewInstance());
-        if (loop_info->IsDefinedOutOfTheLoop(ref)) {
-          // ref's location value may be killed by this loop's side effects.
-          location->SetValueKilledByLoopSideEffects(true);
-        } else {
-          // ref is defined inside this loop so this loop's side effects cannot
-          // kill its location value at the loop header since ref/its location doesn't
-          // exist yet at the loop header.
-        }
-      }
-    } else {
-      // For non-singletons, value_killed_by_loop_side_effects_ is inited to
-      // true.
-      DCHECK_EQ(location->IsValueKilledByLoopSideEffects(), true);
-    }
-  }
-
-  void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE {
-    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
-    CreateReferenceInfoForReferenceType(instruction);
-  }
-
-  void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE {
-    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
-    has_heap_stores_ = true;
-  }
-
-  // We intentionally don't collect HUnresolvedInstanceField/HUnresolvedStaticField accesses
-  // since we cannot accurately track the fields.
-
-  void VisitArrayGet(HArrayGet* instruction) OVERRIDE {
-    VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1));
-    CreateReferenceInfoForReferenceType(instruction);
-  }
-
-  void VisitArraySet(HArraySet* instruction) OVERRIDE {
-    VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1));
-    has_heap_stores_ = true;
-  }
-
-  void VisitNewInstance(HNewInstance* new_instance) OVERRIDE {
-    // Any references appearing in the ref_info_array_ so far cannot alias with new_instance.
-    CreateReferenceInfoForReferenceType(new_instance);
-  }
-
-  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) OVERRIDE {
-    CreateReferenceInfoForReferenceType(instruction);
-  }
-
-  void VisitInvokeVirtual(HInvokeVirtual* instruction) OVERRIDE {
-    CreateReferenceInfoForReferenceType(instruction);
-  }
-
-  void VisitInvokeInterface(HInvokeInterface* instruction) OVERRIDE {
-    CreateReferenceInfoForReferenceType(instruction);
-  }
-
-  void VisitParameterValue(HParameterValue* instruction) OVERRIDE {
-    CreateReferenceInfoForReferenceType(instruction);
-  }
-
-  void VisitSelect(HSelect* instruction) OVERRIDE {
-    CreateReferenceInfoForReferenceType(instruction);
-  }
-
-  void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) OVERRIDE {
-    has_monitor_operations_ = true;
-  }
-
-  ArenaVector<ReferenceInfo*> ref_info_array_;   // All references used for heap accesses.
-  ArenaVector<HeapLocation*> heap_locations_;    // All heap locations.
-  ArenaBitVector aliasing_matrix_;    // aliasing info between each pair of locations.
-  bool has_heap_stores_;    // If there is no heap stores, LSE acts as GVN with better
-                            // alias analysis and won't be as effective.
-  bool has_volatile_;       // If there are volatile field accesses.
-  bool has_monitor_operations_;    // If there are monitor operations.
-
-  DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector);
-};
-
 // An unknown heap value. Loads with such a value in the heap location cannot be eliminated.
 // A heap location can be set to kUnknownHeapValue when:
 // - initially set a value.
@@ -516,7 +46,7 @@
         side_effects_(side_effects),
         heap_values_for_(graph->GetBlocks().size(),
                          ArenaVector<HInstruction*>(heap_locations_collector.
-                                                        GetNumberOfHeapLocations(),
+                                                    GetNumberOfHeapLocations(),
                                                     kUnknownHeapValue,
                                                     graph->GetArena()->Adapter(kArenaAllocLSE)),
                          graph->GetArena()->Adapter(kArenaAllocLSE)),
@@ -760,7 +290,7 @@
                         size_t offset,
                         HInstruction* index,
                         int16_t declaring_class_def_index) {
-    HInstruction* original_ref = HuntForOriginalReference(ref);
+    HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref);
     ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(original_ref);
     size_t idx = heap_location_collector_.FindHeapLocationIndex(
         ref_info, offset, index, declaring_class_def_index);
@@ -827,7 +357,7 @@
                         HInstruction* index,
                         int16_t declaring_class_def_index,
                         HInstruction* value) {
-    HInstruction* original_ref = HuntForOriginalReference(ref);
+    HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref);
     ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(original_ref);
     size_t idx = heap_location_collector_.FindHeapLocationIndex(
         ref_info, offset, index, declaring_class_def_index);
@@ -1127,25 +657,12 @@
     // Skip this optimization.
     return;
   }
-  HeapLocationCollector heap_location_collector(graph_);
-  for (HBasicBlock* block : graph_->GetReversePostOrder()) {
-    heap_location_collector.VisitBasicBlock(block);
-  }
-  if (heap_location_collector.GetNumberOfHeapLocations() > kMaxNumberOfHeapLocations) {
-    // Bail out if there are too many heap locations to deal with.
+  const HeapLocationCollector& heap_location_collector = lsa_.GetHeapLocationCollector();
+  if (heap_location_collector.GetNumberOfHeapLocations() == 0) {
+    // No HeapLocation information from LSA, skip this optimization.
     return;
   }
-  if (!heap_location_collector.HasHeapStores()) {
-    // Without heap stores, this pass would act mostly as GVN on heap accesses.
-    return;
-  }
-  if (heap_location_collector.HasVolatile() || heap_location_collector.HasMonitorOps()) {
-    // Don't do load/store elimination if the method has volatile field accesses or
-    // monitor operations, for now.
-    // TODO: do it right.
-    return;
-  }
-  heap_location_collector.BuildAliasingMatrix();
+
   LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_);
   for (HBasicBlock* block : graph_->GetReversePostOrder()) {
     lse_visitor.VisitBasicBlock(block);
diff --git a/compiler/optimizing/load_store_elimination.h b/compiler/optimizing/load_store_elimination.h
index 1d9e5c8..efe71c7 100644
--- a/compiler/optimizing/load_store_elimination.h
+++ b/compiler/optimizing/load_store_elimination.h
@@ -22,12 +22,16 @@
 namespace art {
 
 class SideEffectsAnalysis;
+class LoadStoreAnalysis;
 
 class LoadStoreElimination : public HOptimization {
  public:
-  LoadStoreElimination(HGraph* graph, const SideEffectsAnalysis& side_effects)
+  LoadStoreElimination(HGraph* graph,
+                       const SideEffectsAnalysis& side_effects,
+                       const LoadStoreAnalysis& lsa)
       : HOptimization(graph, kLoadStoreEliminationPassName),
-        side_effects_(side_effects) {}
+        side_effects_(side_effects),
+        lsa_(lsa) {}
 
   void Run() OVERRIDE;
 
@@ -35,6 +39,7 @@
 
  private:
   const SideEffectsAnalysis& side_effects_;
+  const LoadStoreAnalysis& lsa_;
 
   DISALLOW_COPY_AND_ASSIGN(LoadStoreElimination);
 };
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 963df5a..94787c9 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -173,6 +173,39 @@
   return false;
 }
 
+// Detect situations with same-extension narrower operands.
+// Returns true on success and sets is_unsigned accordingly.
+static bool IsNarrowerOperands(HInstruction* a,
+                               HInstruction* b,
+                               Primitive::Type type,
+                               /*out*/ HInstruction** r,
+                               /*out*/ HInstruction** s,
+                               /*out*/ bool* is_unsigned) {
+  if (IsSignExtensionAndGet(a, type, r) && IsSignExtensionAndGet(b, type, s)) {
+    *is_unsigned = false;
+    return true;
+  } else if (IsZeroExtensionAndGet(a, type, r) && IsZeroExtensionAndGet(b, type, s)) {
+    *is_unsigned = true;
+    return true;
+  }
+  return false;
+}
+
+// As above, single operand.
+static bool IsNarrowerOperand(HInstruction* a,
+                              Primitive::Type type,
+                              /*out*/ HInstruction** r,
+                              /*out*/ bool* is_unsigned) {
+  if (IsSignExtensionAndGet(a, type, r)) {
+    *is_unsigned = false;
+    return true;
+  } else if (IsZeroExtensionAndGet(a, type, r)) {
+    *is_unsigned = true;
+    return true;
+  }
+  return false;
+}
+
 // Detect up to two instructions a and b, and an acccumulated constant c.
 static bool IsAddConstHelper(HInstruction* instruction,
                              /*out*/ HInstruction** a,
@@ -756,7 +789,7 @@
   return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite();
 }
 
-// TODO: more operations and intrinsics, detect saturation arithmetic, etc.
+// TODO: saturation arithmetic.
 bool HLoopOptimization::VectorizeUse(LoopNode* node,
                                      HInstruction* instruction,
                                      bool generate_code,
@@ -867,25 +900,38 @@
       return true;
     }
     // Deal with vector restrictions.
+    HInstruction* opa = instruction->InputAt(0);
+    HInstruction* opb = instruction->InputAt(1);
+    HInstruction* r = opa;
+    bool is_unsigned = false;
     if ((HasVectorRestrictions(restrictions, kNoShift)) ||
         (instruction->IsShr() && HasVectorRestrictions(restrictions, kNoShr))) {
       return false;  // unsupported instruction
-    } else if ((instruction->IsShr() || instruction->IsUShr()) &&
-               HasVectorRestrictions(restrictions, kNoHiBits)) {
-      return false;  // hibits may impact lobits; TODO: we can do better!
+    } else if (HasVectorRestrictions(restrictions, kNoHiBits)) {
+      // Shifts right need extra care to account for higher order bits.
+      // TODO: less likely shr/unsigned and ushr/signed can by flipping signess.
+      if (instruction->IsShr() &&
+          (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) {
+        return false;  // reject, unless all operands are sign-extension narrower
+      } else if (instruction->IsUShr() &&
+                 (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || !is_unsigned)) {
+        return false;  // reject, unless all operands are zero-extension narrower
+      }
     }
     // Accept shift operator for vectorizable/invariant operands.
     // TODO: accept symbolic, albeit loop invariant shift factors.
-    HInstruction* opa = instruction->InputAt(0);
-    HInstruction* opb = instruction->InputAt(1);
+    DCHECK(r != nullptr);
+    if (generate_code && vector_mode_ != kVector) {  // de-idiom
+      r = opa;
+    }
     int64_t distance = 0;
-    if (VectorizeUse(node, opa, generate_code, type, restrictions) &&
+    if (VectorizeUse(node, r, generate_code, type, restrictions) &&
         IsInt64AndGet(opb, /*out*/ &distance)) {
       // Restrict shift distance to packed data type width.
       int64_t max_distance = Primitive::ComponentSize(type) * 8;
       if (0 <= distance && distance < max_distance) {
         if (generate_code) {
-          GenerateVecOp(instruction, vector_map_->Get(opa), opb, type);
+          GenerateVecOp(instruction, vector_map_->Get(r), opb, type);
         }
         return true;
       }
@@ -899,16 +945,23 @@
       case Intrinsics::kMathAbsFloat:
       case Intrinsics::kMathAbsDouble: {
         // Deal with vector restrictions.
-        if (HasVectorRestrictions(restrictions, kNoAbs) ||
-            HasVectorRestrictions(restrictions, kNoHiBits)) {
-          // TODO: we can do better for some hibits cases.
+        HInstruction* opa = instruction->InputAt(0);
+        HInstruction* r = opa;
+        bool is_unsigned = false;
+        if (HasVectorRestrictions(restrictions, kNoAbs)) {
           return false;
+        } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
+                   (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) {
+          return false;  // reject, unless operand is sign-extension narrower
         }
         // Accept ABS(x) for vectorizable operand.
-        HInstruction* opa = instruction->InputAt(0);
-        if (VectorizeUse(node, opa, generate_code, type, restrictions)) {
+        DCHECK(r != nullptr);
+        if (generate_code && vector_mode_ != kVector) {  // de-idiom
+          r = opa;
+        }
+        if (VectorizeUse(node, r, generate_code, type, restrictions)) {
           if (generate_code) {
-            GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type);
+            GenerateVecOp(instruction, vector_map_->Get(r), nullptr, type);
           }
           return true;
         }
@@ -923,18 +976,28 @@
       case Intrinsics::kMathMaxFloatFloat:
       case Intrinsics::kMathMaxDoubleDouble: {
         // Deal with vector restrictions.
-        if (HasVectorRestrictions(restrictions, kNoMinMax) ||
-            HasVectorRestrictions(restrictions, kNoHiBits)) {
-          // TODO: we can do better for some hibits cases.
-          return false;
-        }
-        // Accept MIN/MAX(x, y) for vectorizable operands.
         HInstruction* opa = instruction->InputAt(0);
         HInstruction* opb = instruction->InputAt(1);
-        if (VectorizeUse(node, opa, generate_code, type, restrictions) &&
-            VectorizeUse(node, opb, generate_code, type, restrictions)) {
+        HInstruction* r = opa;
+        HInstruction* s = opb;
+        bool is_unsigned = false;
+        if (HasVectorRestrictions(restrictions, kNoMinMax)) {
+          return false;
+        } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
+                   !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) {
+          return false;  // reject, unless all operands are same-extension narrower
+        }
+        // Accept MIN/MAX(x, y) for vectorizable operands.
+        DCHECK(r != nullptr && s != nullptr);
+        if (generate_code && vector_mode_ != kVector) {  // de-idiom
+          r = opa;
+          s = opb;
+        }
+        if (VectorizeUse(node, r, generate_code, type, restrictions) &&
+            VectorizeUse(node, s, generate_code, type, restrictions)) {
           if (generate_code) {
-            GenerateVecOp(instruction, vector_map_->Get(opa), vector_map_->Get(opb), type);
+            GenerateVecOp(
+                instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned);
           }
           return true;
         }
@@ -959,11 +1022,11 @@
       switch (type) {
         case Primitive::kPrimBoolean:
         case Primitive::kPrimByte:
-          *restrictions |= kNoDiv | kNoAbs;
+          *restrictions |= kNoDiv;
           return TrySetVectorLength(16);
         case Primitive::kPrimChar:
         case Primitive::kPrimShort:
-          *restrictions |= kNoDiv | kNoAbs;
+          *restrictions |= kNoDiv;
           return TrySetVectorLength(8);
         case Primitive::kPrimInt:
           *restrictions |= kNoDiv;
@@ -1098,13 +1161,14 @@
 void HLoopOptimization::GenerateVecOp(HInstruction* org,
                                       HInstruction* opa,
                                       HInstruction* opb,
-                                      Primitive::Type type) {
+                                      Primitive::Type type,
+                                      bool is_unsigned) {
   if (vector_mode_ == kSequential) {
-    // Scalar code follows implicit integral promotion.
-    if (type == Primitive::kPrimBoolean ||
-        type == Primitive::kPrimByte ||
-        type == Primitive::kPrimChar ||
-        type == Primitive::kPrimShort) {
+    // Non-converting scalar code follows implicit integral promotion.
+    if (!org->IsTypeConversion() && (type == Primitive::kPrimBoolean ||
+                                     type == Primitive::kPrimByte ||
+                                     type == Primitive::kPrimChar ||
+                                     type == Primitive::kPrimShort)) {
       type = Primitive::kPrimInt;
     }
   }
@@ -1185,7 +1249,6 @@
           case Intrinsics::kMathMinLongLong:
           case Intrinsics::kMathMinFloatFloat:
           case Intrinsics::kMathMinDoubleDouble: {
-            bool is_unsigned = false;  // TODO: detect unsigned versions
             vector = new (global_allocator_)
                 HVecMin(global_allocator_, opa, opb, type, vector_length_, is_unsigned);
             break;
@@ -1194,7 +1257,6 @@
           case Intrinsics::kMathMaxLongLong:
           case Intrinsics::kMathMaxFloatFloat:
           case Intrinsics::kMathMaxDoubleDouble: {
-            bool is_unsigned = false;  // TODO: detect unsigned versions
             vector = new (global_allocator_)
                 HVecMax(global_allocator_, opa, opb, type, vector_length_, is_unsigned);
             break;
@@ -1258,7 +1320,7 @@
                                                  Primitive::Type type,
                                                  uint64_t restrictions) {
   // Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1
-  // (note whether the sign bit in higher precision is shifted in has no effect
+  // (note whether the sign bit in wider precision is shifted in has no effect
   // on the narrow precision computed by the idiom).
   int64_t distance = 0;
   if ((instruction->IsShr() ||
@@ -1269,6 +1331,7 @@
     HInstruction* b = nullptr;
     int64_t       c = 0;
     if (IsAddConst(instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) {
+      DCHECK(a != nullptr && b != nullptr);
       // Accept c == 1 (rounded) or c == 0 (not rounded).
       bool is_rounded = false;
       if (c == 1) {
@@ -1280,11 +1343,7 @@
       HInstruction* r = nullptr;
       HInstruction* s = nullptr;
       bool is_unsigned = false;
-      if (IsZeroExtensionAndGet(a, type, &r) && IsZeroExtensionAndGet(b, type, &s)) {
-        is_unsigned = true;
-      } else if (IsSignExtensionAndGet(a, type, &r) && IsSignExtensionAndGet(b, type, &s)) {
-        is_unsigned = false;
-      } else {
+      if (!IsNarrowerOperands(a, b, type, &r, &s, &is_unsigned)) {
         return false;
       }
       // Deal with vector restrictions.
@@ -1295,6 +1354,10 @@
       // Accept recognized halving add for vectorizable operands. Vectorized code uses the
       // shorthand idiomatic operation. Sequential code uses the original scalar expressions.
       DCHECK(r != nullptr && s != nullptr);
+      if (generate_code && vector_mode_ != kVector) {  // de-idiom
+        r = instruction->InputAt(0);
+        s = instruction->InputAt(1);
+      }
       if (VectorizeUse(node, r, generate_code, type, restrictions) &&
           VectorizeUse(node, s, generate_code, type, restrictions)) {
         if (generate_code) {
@@ -1308,12 +1371,7 @@
                 is_unsigned,
                 is_rounded));
           } else {
-            VectorizeUse(node, instruction->InputAt(0), generate_code, type, restrictions);
-            VectorizeUse(node, instruction->InputAt(1), generate_code, type, restrictions);
-            GenerateVecOp(instruction,
-                          vector_map_->Get(instruction->InputAt(0)),
-                          vector_map_->Get(instruction->InputAt(1)),
-                          type);
+            GenerateVecOp(instruction, vector_map_->Get(r), vector_map_->Get(s), type);
           }
         }
         return true;
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 6d5978d..35298d4 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -137,7 +137,11 @@
                       HInstruction* opa,
                       HInstruction* opb,
                       Primitive::Type type);
-  void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type);
+  void GenerateVecOp(HInstruction* org,
+                     HInstruction* opa,
+                     HInstruction* opb,
+                     Primitive::Type type,
+                     bool is_unsigned = false);
 
   // Vectorization idioms.
   bool VectorizeHalvingAddIdiom(LoopNode* node,
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 833f32b..6899910 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2636,15 +2636,17 @@
 std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs) {
   switch (rhs) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
-      return os << "string_init";
+      return os << "StringInit";
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      return os << "recursive";
+      return os << "Recursive";
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
+      return os << "BootImageLinkTimePcRelative";
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
-      return os << "direct";
+      return os << "DirectAddress";
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      return os << "dex_cache_pc_relative";
+      return os << "DexCachePcRelative";
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod:
-      return os << "dex_cache_via_method";
+      return os << "DexCacheViaMethod";
     default:
       LOG(FATAL) << "Unknown MethodLoadKind: " << static_cast<int>(rhs);
       UNREACHABLE();
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 72774da..4d96fbe 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -4153,6 +4153,10 @@
     // Use the method's own ArtMethod* loaded by the register allocator.
     kRecursive,
 
+    // Use PC-relative boot image ArtMethod* address that will be known at link time.
+    // Used for boot image methods referenced by boot image code.
+    kBootImageLinkTimePcRelative,
+
     // Use ArtMethod* at a known address, embed the direct address in the code.
     // Used for app->boot calls with non-relocatable image and for JIT-compiled calls.
     kDirectAddress,
@@ -4292,6 +4296,10 @@
   bool HasPcRelativeDexCache() const {
     return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative;
   }
+  bool HasPcRelativeMethodLoadKind() const {
+    return GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative ||
+           GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative;
+  }
   bool HasCurrentMethodInput() const {
     // This function can be called only after the invoke has been fully initialized by the builder.
     if (NeedsCurrentMethodInput(GetMethodLoadKind())) {
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index f928f71..e5ab00b 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -83,6 +83,7 @@
 #include "jit/jit_code_cache.h"
 #include "jni/quick/jni_compiler.h"
 #include "licm.h"
+#include "load_store_analysis.h"
 #include "load_store_elimination.h"
 #include "loop_optimization.h"
 #include "nodes.h"
@@ -465,7 +466,8 @@
     const DexCompilationUnit& dex_compilation_unit,
     VariableSizedHandleScope* handles,
     SideEffectsAnalysis* most_recent_side_effects,
-    HInductionVarAnalysis* most_recent_induction) {
+    HInductionVarAnalysis* most_recent_induction,
+    LoadStoreAnalysis* most_recent_lsa) {
   std::string opt_name = ConvertPassNameToOptimizationName(pass_name);
   if (opt_name == BoundsCheckElimination::kBoundsCheckEliminationPassName) {
     CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr);
@@ -499,15 +501,18 @@
   } else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
     return new (arena) HInductionVarAnalysis(graph);
   } else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) {
-    return new (arena) InstructionSimplifier(graph, codegen, stats, pass_name.c_str());
+    return new (arena) InstructionSimplifier(graph, codegen, driver, stats, pass_name.c_str());
   } else if (opt_name == IntrinsicsRecognizer::kIntrinsicsRecognizerPassName) {
     return new (arena) IntrinsicsRecognizer(graph, stats);
   } else if (opt_name == LICM::kLoopInvariantCodeMotionPassName) {
     CHECK(most_recent_side_effects != nullptr);
     return new (arena) LICM(graph, *most_recent_side_effects, stats);
+  } else if (opt_name == LoadStoreAnalysis::kLoadStoreAnalysisPassName) {
+    return new (arena) LoadStoreAnalysis(graph);
   } else if (opt_name == LoadStoreElimination::kLoadStoreEliminationPassName) {
     CHECK(most_recent_side_effects != nullptr);
-    return new (arena) LoadStoreElimination(graph, *most_recent_side_effects);
+    CHECK(most_recent_lsa != nullptr);
+    return new (arena) LoadStoreElimination(graph, *most_recent_side_effects, *most_recent_lsa);
   } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
     return new (arena) SideEffectsAnalysis(graph);
   } else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) {
@@ -556,6 +561,7 @@
   // in the pass name list.
   SideEffectsAnalysis* most_recent_side_effects = nullptr;
   HInductionVarAnalysis* most_recent_induction = nullptr;
+  LoadStoreAnalysis* most_recent_lsa = nullptr;
   ArenaVector<HOptimization*> ret(arena->Adapter());
   for (const std::string& pass_name : pass_names) {
     HOptimization* opt = BuildOptimization(
@@ -568,7 +574,8 @@
         dex_compilation_unit,
         handles,
         most_recent_side_effects,
-        most_recent_induction);
+        most_recent_induction,
+        most_recent_lsa);
     CHECK(opt != nullptr) << "Couldn't build optimization: \"" << pass_name << "\"";
     ret.push_back(opt);
 
@@ -577,6 +584,8 @@
       most_recent_side_effects = down_cast<SideEffectsAnalysis*>(opt);
     } else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
       most_recent_induction = down_cast<HInductionVarAnalysis*>(opt);
+    } else if (opt_name == LoadStoreAnalysis::kLoadStoreAnalysisPassName) {
+      most_recent_lsa = down_cast<LoadStoreAnalysis*>(opt);
     }
   }
   return ret;
@@ -763,7 +772,8 @@
   HDeadCodeElimination* dce3 = new (arena) HDeadCodeElimination(
       graph, stats, "dead_code_elimination$final");
   HConstantFolding* fold1 = new (arena) HConstantFolding(graph, "constant_folding");
-  InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, codegen, stats);
+  InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(
+      graph, codegen, driver, stats);
   HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats);
   HConstantFolding* fold2 = new (arena) HConstantFolding(
       graph, "constant_folding$after_inlining");
@@ -777,15 +787,16 @@
   HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
   BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects1, induction);
   HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction);
-  LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2);
+  LoadStoreAnalysis* lsa = new (arena) LoadStoreAnalysis(graph);
+  LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2, *lsa);
   HSharpening* sharpening = new (arena) HSharpening(
       graph, codegen, dex_compilation_unit, driver, handles);
   InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
-      graph, codegen, stats, "instruction_simplifier$after_inlining");
+      graph, codegen, driver, stats, "instruction_simplifier$after_inlining");
   InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
-      graph, codegen, stats, "instruction_simplifier$after_bce");
+      graph, codegen, driver, stats, "instruction_simplifier$after_bce");
   InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier(
-      graph, codegen, stats, "instruction_simplifier$before_codegen");
+      graph, codegen, driver, stats, "instruction_simplifier$before_codegen");
   IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats);
   CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph);
   CodeSinking* code_sinking = new (arena) CodeSinking(graph, stats);
@@ -817,6 +828,7 @@
     fold3,  // evaluates code generated by dynamic bce
     simplify3,
     side_effects2,
+    lsa,
     lse,
     cha_guard,
     dce3,
diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc
index ef2c432..bce54bf 100644
--- a/compiler/optimizing/pc_relative_fixups_mips.cc
+++ b/compiler/optimizing/pc_relative_fixups_mips.cc
@@ -58,6 +58,19 @@
     DCHECK(base_ != nullptr);
   }
 
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+    // If this is an invoke with PC-relative pointer to a method,
+    // we need to add the base as the special input.
+    if (invoke->GetMethodLoadKind() ==
+            HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative &&
+        !IsCallFreeIntrinsic<IntrinsicLocationsBuilderMIPS>(invoke, codegen_)) {
+      InitializePCRelativeBasePointer();
+      // Add the special argument base to the method.
+      DCHECK(!invoke->HasCurrentMethodInput());
+      invoke->AddSpecialInput(base_);
+    }
+  }
+
   void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
     HLoadClass::LoadKind load_kind = load_class->GetLoadKind();
     switch (load_kind) {
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index a1c916f..2743df9 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -205,13 +205,13 @@
     // method pointer from the invoke.
     if (invoke_static_or_direct != nullptr &&
         invoke_static_or_direct->HasCurrentMethodInput()) {
-      DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache());
+      DCHECK(!invoke_static_or_direct->HasPcRelativeMethodLoadKind());
       return;
     }
 
     bool base_added = false;
     if (invoke_static_or_direct != nullptr &&
-        invoke_static_or_direct->HasPcRelativeDexCache() &&
+        invoke_static_or_direct->HasPcRelativeMethodLoadKind() &&
         !IsCallFreeIntrinsic<IntrinsicLocationsBuilderX86>(invoke, codegen_)) {
       HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(invoke);
       // Add the extra parameter.
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 9a03163..7b8104b 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -16,6 +16,7 @@
 
 #include "sharpening.h"
 
+#include "art_method-inl.h"
 #include "base/casts.h"
 #include "base/enums.h"
 #include "class_linker.h"
@@ -41,7 +42,9 @@
     for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* instruction = it.Current();
       if (instruction->IsInvokeStaticOrDirect()) {
-        SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(), codegen_);
+        SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(),
+                                    codegen_,
+                                    compiler_driver_);
       } else if (instruction->IsLoadString()) {
         ProcessLoadString(instruction->AsLoadString());
       }
@@ -68,9 +71,21 @@
   return IsInBootImage(method) && !options.GetCompilePic();
 }
 
+static bool BootImageAOTCanEmbedMethod(ArtMethod* method, CompilerDriver* compiler_driver) {
+  DCHECK(compiler_driver->GetCompilerOptions().IsBootImage());
+  if (!compiler_driver->GetSupportBootImageFixup()) {
+    return false;
+  }
+  ScopedObjectAccess soa(Thread::Current());
+  ObjPtr<mirror::Class> klass = method->GetDeclaringClass();
+  DCHECK(klass != nullptr);
+  const DexFile& dex_file = klass->GetDexFile();
+  return compiler_driver->IsImageClass(dex_file.StringByTypeIdx(klass->GetDexTypeIndex()));
+}
 
 void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
-                                              CodeGenerator* codegen) {
+                                              CodeGenerator* codegen,
+                                              CompilerDriver* compiler_driver) {
   if (invoke->IsStringInit()) {
     // Not using the dex cache arrays. But we could still try to use a better dispatch...
     // TODO: Use direct_method and direct_code for the appropriate StringFactory method.
@@ -108,6 +123,10 @@
     method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress;
     method_load_data = reinterpret_cast<uintptr_t>(callee);
     code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
+  } else if (codegen->GetCompilerOptions().IsBootImage() &&
+             BootImageAOTCanEmbedMethod(callee, compiler_driver)) {
+    method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative;
+    code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
   } else {
     // Use PC-relative access to the dex cache arrays.
     method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative;
@@ -167,8 +186,8 @@
       if (!compiler_driver->GetSupportBootImageFixup()) {
         // compiler_driver_test. Do not sharpen.
         desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
-      } else if ((klass != nullptr) && compiler_driver->IsImageClass(
-          dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) {
+      } else if ((klass != nullptr) &&
+                 compiler_driver->IsImageClass(dex_file.StringByTypeIdx(type_index))) {
         is_in_boot_image = true;
         desired_load_kind = HLoadClass::LoadKind::kBootImageLinkTimePcRelative;
       } else {
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
index 10707c7..f74b0af 100644
--- a/compiler/optimizing/sharpening.h
+++ b/compiler/optimizing/sharpening.h
@@ -55,7 +55,9 @@
     REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Used by Sharpening and InstructionSimplifier.
-  static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, CodeGenerator* codegen);
+  static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
+                                          CodeGenerator* codegen,
+                                          CompilerDriver* compiler_driver);
 
  private:
   void ProcessLoadString(HLoadString* load_string);
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 660409f..8bdc576 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -2130,7 +2130,9 @@
 
   bool LoadProfile() {
     DCHECK(UseProfile());
-
+    // TODO(calin): We should be using the runtime arena pool (instead of the default profile arean).
+    // However the setup logic is messy and needs cleaning up before that (e.g. the oat writers are
+    // created before the runtime).
     profile_compilation_info_.reset(new ProfileCompilationInfo());
     ScopedFlock flock;
     bool success = true;
diff --git a/dexoptanalyzer/Android.bp b/dexoptanalyzer/Android.bp
index cf4c99e..da6663d 100644
--- a/dexoptanalyzer/Android.bp
+++ b/dexoptanalyzer/Android.bp
@@ -48,8 +48,8 @@
 art_cc_binary {
     name: "dexoptanalyzerd",
     defaults: [
-        "dexoptanalyzer-defaults",
         "art_debug_defaults",
+        "dexoptanalyzer-defaults",
     ],
     shared_libs: [
         "libartd",
diff --git a/disassembler/Android.bp b/disassembler/Android.bp
index 8dfada2..086b8c7 100644
--- a/disassembler/Android.bp
+++ b/disassembler/Android.bp
@@ -47,8 +47,8 @@
 art_cc_library {
     name: "libartd-disassembler",
     defaults: [
-        "libart-disassembler-defaults",
         "art_debug_defaults",
+        "libart-disassembler-defaults",
     ],
     shared_libs: [
         // For disassembler_arm*.
diff --git a/imgdiag/Android.bp b/imgdiag/Android.bp
index eaeb78e..9459bb5 100644
--- a/imgdiag/Android.bp
+++ b/imgdiag/Android.bp
@@ -64,8 +64,8 @@
 art_cc_binary {
     name: "imgdiagd",
     defaults: [
-        "imgdiag-defaults",
         "art_debug_defaults",
+        "imgdiag-defaults",
     ],
     shared_libs: [
         "libartd",
diff --git a/patchoat/Android.bp b/patchoat/Android.bp
index a78f97d..d3bc2a7 100644
--- a/patchoat/Android.bp
+++ b/patchoat/Android.bp
@@ -40,8 +40,8 @@
 art_cc_binary {
     name: "patchoatd",
     defaults: [
-        "patchoat-defaults",
         "art_debug_defaults",
+        "patchoat-defaults",
     ],
     shared_libs: [
         "libartd",
diff --git a/profman/Android.bp b/profman/Android.bp
index 2dcbaee..a327ef2 100644
--- a/profman/Android.bp
+++ b/profman/Android.bp
@@ -49,8 +49,8 @@
 art_cc_binary {
     name: "profmand",
     defaults: [
-        "profman-defaults",
         "art_debug_defaults",
+        "profman-defaults",
     ],
     shared_libs: [
         "libartd",
diff --git a/profman/profile_assistant_test.cc b/profman/profile_assistant_test.cc
index 38254e2..b836632 100644
--- a/profman/profile_assistant_test.cc
+++ b/profman/profile_assistant_test.cc
@@ -21,6 +21,7 @@
 #include "common_runtime_test.h"
 #include "exec_utils.h"
 #include "jit/profile_compilation_info.h"
+#include "linear_alloc.h"
 #include "mirror/class-inl.h"
 #include "obj_ptr-inl.h"
 #include "profile_assistant.h"
@@ -30,6 +31,11 @@
 namespace art {
 
 class ProfileAssistantTest : public CommonRuntimeTest {
+ public:
+  virtual void PostRuntimeCreate() {
+    arena_.reset(new ArenaAllocator(Runtime::Current()->GetArenaPool()));
+  }
+
  protected:
   void SetupProfile(const std::string& id,
                     uint32_t checksum,
@@ -69,19 +75,19 @@
   ProfileCompilationInfo::OfflineProfileMethodInfo GetOfflineProfileMethodInfo(
         const std::string& dex_location1, uint32_t dex_checksum1,
         const std::string& dex_location2, uint32_t dex_checksum2) {
-    ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
+    ProfileCompilationInfo::OfflineProfileMethodInfo pmi(arena_.get());
     pmi.dex_references.emplace_back(dex_location1, dex_checksum1);
     pmi.dex_references.emplace_back(dex_location2, dex_checksum2);
 
     // Monomorphic
     for (uint16_t dex_pc = 0; dex_pc < 11; dex_pc++) {
-      ProfileCompilationInfo::DexPcData dex_pc_data;
+      ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
       dex_pc_data.AddClass(0, dex::TypeIndex(0));
       pmi.inline_caches.Put(dex_pc, dex_pc_data);
     }
     // Polymorphic
     for (uint16_t dex_pc = 11; dex_pc < 22; dex_pc++) {
-      ProfileCompilationInfo::DexPcData dex_pc_data;
+      ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
       dex_pc_data.AddClass(0, dex::TypeIndex(0));
       dex_pc_data.AddClass(1, dex::TypeIndex(1));
 
@@ -89,13 +95,13 @@
     }
     // Megamorphic
     for (uint16_t dex_pc = 22; dex_pc < 33; dex_pc++) {
-      ProfileCompilationInfo::DexPcData dex_pc_data;
+      ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
       dex_pc_data.SetIsMegamorphic();
       pmi.inline_caches.Put(dex_pc, dex_pc_data);
     }
     // Missing types
     for (uint16_t dex_pc = 33; dex_pc < 44; dex_pc++) {
-      ProfileCompilationInfo::DexPcData dex_pc_data;
+      ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
       dex_pc_data.SetIsMissingTypes();
       pmi.inline_caches.Put(dex_pc, dex_pc_data);
     }
@@ -247,13 +253,13 @@
                           bool is_megamorphic,
                           bool is_missing_types)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
-    ASSERT_TRUE(info.GetMethod(method->GetDexFile()->GetLocation(),
-                               method->GetDexFile()->GetLocationChecksum(),
-                               method->GetDexMethodIndex(),
-                               &pmi));
-    ASSERT_EQ(pmi.inline_caches.size(), 1u);
-    ProfileCompilationInfo::DexPcData dex_pc_data = pmi.inline_caches.begin()->second;
+    std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> pmi =
+        info.GetMethod(method->GetDexFile()->GetLocation(),
+                       method->GetDexFile()->GetLocationChecksum(),
+                       method->GetDexMethodIndex());
+    ASSERT_TRUE(pmi != nullptr);
+    ASSERT_EQ(pmi->inline_caches.size(), 1u);
+    ProfileCompilationInfo::DexPcData dex_pc_data = pmi->inline_caches.begin()->second;
 
     ASSERT_EQ(dex_pc_data.is_megamorphic, is_megamorphic);
     ASSERT_EQ(dex_pc_data.is_missing_types, is_missing_types);
@@ -262,7 +268,7 @@
     for (mirror::Class* it : expected_clases) {
       for (const auto& class_ref : dex_pc_data.classes) {
         ProfileCompilationInfo::DexReference dex_ref =
-            pmi.dex_references[class_ref.dex_profile_index];
+            pmi->dex_references[class_ref.dex_profile_index];
         if (dex_ref.MatchesDex(&(it->GetDexFile())) &&
             class_ref.type_index == it->GetDexTypeIndex()) {
           found++;
@@ -272,6 +278,8 @@
 
     ASSERT_EQ(expected_clases.size(), found);
   }
+
+  std::unique_ptr<ArenaAllocator> arena_;
 };
 
 TEST_F(ProfileAssistantTest, AdviseCompilationEmptyReferences) {
@@ -541,11 +549,11 @@
   for (ArtMethod& method : klass->GetMethods(kRuntimePointerSize)) {
     if (!method.IsCopied() && method.GetCodeItem() != nullptr) {
       ++method_count;
-      ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
-      ASSERT_TRUE(info.GetMethod(method.GetDexFile()->GetLocation(),
-                                 method.GetDexFile()->GetLocationChecksum(),
-                                 method.GetDexMethodIndex(),
-                                 &pmi));
+      std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> pmi =
+          info.GetMethod(method.GetDexFile()->GetLocation(),
+                         method.GetDexFile()->GetLocationChecksum(),
+                         method.GetDexMethodIndex());
+      ASSERT_TRUE(pmi != nullptr);
     }
   }
   EXPECT_GT(method_count, 0u);
@@ -689,12 +697,12 @@
     // Verify that method noInlineCache has no inline caches in the profile.
     ArtMethod* no_inline_cache = GetVirtualMethod(class_loader, "LTestInline;", "noInlineCache");
     ASSERT_TRUE(no_inline_cache != nullptr);
-    ProfileCompilationInfo::OfflineProfileMethodInfo pmi_no_inline_cache;
-    ASSERT_TRUE(info.GetMethod(no_inline_cache->GetDexFile()->GetLocation(),
-                               no_inline_cache->GetDexFile()->GetLocationChecksum(),
-                               no_inline_cache->GetDexMethodIndex(),
-                               &pmi_no_inline_cache));
-    ASSERT_TRUE(pmi_no_inline_cache.inline_caches.empty());
+    std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> pmi_no_inline_cache =
+        info.GetMethod(no_inline_cache->GetDexFile()->GetLocation(),
+                       no_inline_cache->GetDexFile()->GetLocationChecksum(),
+                       no_inline_cache->GetDexMethodIndex());
+    ASSERT_TRUE(pmi_no_inline_cache != nullptr);
+    ASSERT_TRUE(pmi_no_inline_cache->inline_caches.empty());
   }
 }
 
diff --git a/profman/profman.cc b/profman/profman.cc
index 384e129..26e7e46 100644
--- a/profman/profman.cc
+++ b/profman/profman.cc
@@ -423,7 +423,8 @@
     }
     for (const std::unique_ptr<const DexFile>& dex_file : *dex_files) {
       std::set<dex::TypeIndex> class_types;
-      ProfileCompilationInfo::MethodMap methods;
+      ProfileCompilationInfo::MethodMap methods(std::less<uint16_t>(),
+                                                profile_info.GetArena()->Adapter());
       if (profile_info.GetClassesAndMethods(dex_file.get(), &class_types, &methods)) {
         for (const dex::TypeIndex& type_index : class_types) {
           const DexFile::TypeId& type_id = dex_file->GetTypeId(type_index);
diff --git a/runtime/Android.bp b/runtime/Android.bp
index 8ee5498..aa7dc65 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -352,6 +352,7 @@
                 "libdl",
                 // For android::FileMap used by libziparchive.
                 "libutils",
+                "libtombstoned_client"
             ],
             static_libs: [
                 // ZipArchive support, the order matters here to get all symbols.
diff --git a/runtime/arch/instruction_set.cc b/runtime/arch/instruction_set.cc
index 8f64dcd..64af7ec 100644
--- a/runtime/arch/instruction_set.cc
+++ b/runtime/arch/instruction_set.cc
@@ -18,8 +18,8 @@
 
 // Explicitly include our own elf.h to avoid Linux and other dependencies.
 #include "../elf.h"
+#include "android-base/logging.h"
 #include "base/bit_utils.h"
-#include "base/logging.h"
 #include "globals.h"
 
 namespace art {
@@ -36,11 +36,9 @@
     case kNone:
       LOG(FATAL) << "Unsupported instruction set " << isa;
       UNREACHABLE();
-
-    default:
-      LOG(FATAL) << "Unknown ISA " << isa;
-      UNREACHABLE();
   }
+  LOG(FATAL) << "Unknown ISA " << isa;
+  UNREACHABLE();
 }
 
 const char* GetInstructionSetString(InstructionSet isa) {
@@ -60,10 +58,9 @@
       return "mips64";
     case kNone:
       return "none";
-    default:
-      LOG(FATAL) << "Unknown ISA " << isa;
-      UNREACHABLE();
   }
+  LOG(FATAL) << "Unknown ISA " << isa;
+  UNREACHABLE();
 }
 
 InstructionSet GetInstructionSetFromString(const char* isa_str) {
@@ -128,10 +125,9 @@
     case kNone:
       LOG(FATAL) << "ISA kNone does not have alignment.";
       UNREACHABLE();
-    default:
-      LOG(FATAL) << "Unknown ISA " << isa;
-      UNREACHABLE();
   }
+  LOG(FATAL) << "Unknown ISA " << isa;
+  UNREACHABLE();
 }
 
 #if !defined(ART_STACK_OVERFLOW_GAP_arm) || !defined(ART_STACK_OVERFLOW_GAP_arm64) || \
@@ -197,11 +193,9 @@
     case kNone:
       LOG(FATAL) << "kNone has no stack overflow size";
       UNREACHABLE();
-
-    default:
-      LOG(FATAL) << "Unknown instruction set" << isa;
-      UNREACHABLE();
   }
+  LOG(FATAL) << "Unknown instruction set" << isa;
+  UNREACHABLE();
 }
 
 }  // namespace art
diff --git a/runtime/arch/instruction_set.h b/runtime/arch/instruction_set.h
index 7ef9a7a..7203b18 100644
--- a/runtime/arch/instruction_set.h
+++ b/runtime/arch/instruction_set.h
@@ -93,7 +93,7 @@
 // Fatal logging out of line to keep the header clean of logging.h.
 NO_RETURN void InstructionSetAbort(InstructionSet isa);
 
-static inline PointerSize GetInstructionSetPointerSize(InstructionSet isa) {
+constexpr PointerSize GetInstructionSetPointerSize(InstructionSet isa) {
   switch (isa) {
     case kArm:
       // Fall-through.
@@ -109,23 +109,37 @@
       return kMipsPointerSize;
     case kMips64:
       return kMips64PointerSize;
-    default:
-      InstructionSetAbort(isa);
+
+    case kNone:
+      break;
   }
+  InstructionSetAbort(isa);
 }
 
-ALWAYS_INLINE static inline constexpr size_t GetInstructionSetInstructionAlignment(
-    InstructionSet isa) {
-  return (isa == kThumb2 || isa == kArm) ? kThumb2InstructionAlignment :
-         (isa == kArm64) ? kArm64InstructionAlignment :
-         (isa == kX86) ? kX86InstructionAlignment :
-         (isa == kX86_64) ? kX86_64InstructionAlignment :
-         (isa == kMips) ? kMipsInstructionAlignment :
-         (isa == kMips64) ? kMips64InstructionAlignment :
-         0;  // Invalid case, but constexpr doesn't support asserts.
+constexpr size_t GetInstructionSetInstructionAlignment(InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+      // Fall-through.
+    case kThumb2:
+      return kThumb2InstructionAlignment;
+    case kArm64:
+      return kArm64InstructionAlignment;
+    case kX86:
+      return kX86InstructionAlignment;
+    case kX86_64:
+      return kX86_64InstructionAlignment;
+    case kMips:
+      return kMipsInstructionAlignment;
+    case kMips64:
+      return kMips64InstructionAlignment;
+
+    case kNone:
+      break;
+  }
+  InstructionSetAbort(isa);
 }
 
-static inline bool IsValidInstructionSet(InstructionSet isa) {
+constexpr bool IsValidInstructionSet(InstructionSet isa) {
   switch (isa) {
     case kArm:
     case kThumb2:
@@ -135,15 +149,16 @@
     case kMips:
     case kMips64:
       return true;
+
     case kNone:
-    default:
       return false;
   }
+  return false;
 }
 
 size_t GetInstructionSetAlignment(InstructionSet isa);
 
-static inline bool Is64BitInstructionSet(InstructionSet isa) {
+constexpr bool Is64BitInstructionSet(InstructionSet isa) {
   switch (isa) {
     case kArm:
     case kThumb2:
@@ -156,16 +171,17 @@
     case kMips64:
       return true;
 
-    default:
-      InstructionSetAbort(isa);
+    case kNone:
+      break;
   }
+  InstructionSetAbort(isa);
 }
 
-static inline PointerSize InstructionSetPointerSize(InstructionSet isa) {
+constexpr PointerSize InstructionSetPointerSize(InstructionSet isa) {
   return Is64BitInstructionSet(isa) ? PointerSize::k64 : PointerSize::k32;
 }
 
-static inline size_t GetBytesPerGprSpillLocation(InstructionSet isa) {
+constexpr size_t GetBytesPerGprSpillLocation(InstructionSet isa) {
   switch (isa) {
     case kArm:
       // Fall-through.
@@ -182,12 +198,13 @@
     case kMips64:
       return 8;
 
-    default:
-      InstructionSetAbort(isa);
+    case kNone:
+      break;
   }
+  InstructionSetAbort(isa);
 }
 
-static inline size_t GetBytesPerFprSpillLocation(InstructionSet isa) {
+constexpr size_t GetBytesPerFprSpillLocation(InstructionSet isa) {
   switch (isa) {
     case kArm:
       // Fall-through.
@@ -204,9 +221,10 @@
     case kMips64:
       return 8;
 
-    default:
-      InstructionSetAbort(isa);
+    case kNone:
+      break;
   }
+  InstructionSetAbort(isa);
 }
 
 size_t GetStackOverflowReservedBytes(InstructionSet isa);
@@ -243,7 +261,7 @@
 }
 
 // Use the lower 32b for the method pointer and the upper 32b for the code pointer.
-static inline TwoWordReturn GetTwoWordSuccessValue(uintptr_t hi, uintptr_t lo) {
+static inline constexpr TwoWordReturn GetTwoWordSuccessValue(uintptr_t hi, uintptr_t lo) {
   static_assert(sizeof(uint32_t) == sizeof(uintptr_t), "Unexpected size difference");
   uint32_t lo32 = lo;
   uint64_t hi64 = static_cast<uint64_t>(hi);
@@ -251,6 +269,10 @@
 }
 
 #elif defined(__x86_64__) || defined(__aarch64__) || (defined(__mips__) && defined(__LP64__))
+
+// Note: TwoWordReturn can't be constexpr for 64-bit targets. We'd need a constexpr constructor,
+//       which would violate C-linkage in the entrypoint functions.
+
 struct TwoWordReturn {
   uintptr_t lo;
   uintptr_t hi;
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index fc5b5b1..f672882 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -88,6 +88,7 @@
   "CallingConv  ",
   "CHA          ",
   "Scheduler    ",
+  "Profile      ",
 };
 
 template <bool kCount>
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index 5430458..ebde82d 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -98,6 +98,7 @@
   kArenaAllocCallingConvention,
   kArenaAllocCHA,
   kArenaAllocScheduler,
+  kArenaAllocProfile,
   kNumArenaAllocKinds
 };
 
diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc
index 55b4306..553928d 100644
--- a/runtime/base/logging.cc
+++ b/runtime/base/logging.cc
@@ -112,7 +112,7 @@
   if (priority == ANDROID_LOG_FATAL) {
     // Allocate buffer for snprintf(buf, buf_size, "%s:%u] %s", file, line, message) below.
     // If allocation fails, fall back to printing only the message.
-    buf_size = strlen(file) + 1 /* ':' */ + std::numeric_limits<typeof(line)>::max_digits10 +
+    buf_size = strlen(file) + 1 /* ':' */ + std::numeric_limits<decltype(line)>::max_digits10 +
         2 /* "] " */ + strlen(message) + 1 /* terminating 0 */;
     buf = reinterpret_cast<char*>(malloc(buf_size));
   }
diff --git a/runtime/base/safe_copy_test.cc b/runtime/base/safe_copy_test.cc
index 987895e..a9ec952 100644
--- a/runtime/base/safe_copy_test.cc
+++ b/runtime/base/safe_copy_test.cc
@@ -23,80 +23,86 @@
 #include <sys/mman.h>
 #include <sys/user.h>
 
+#include "globals.h"
+
 namespace art {
 
 #if defined(__linux__)
 
 TEST(SafeCopyTest, smoke) {
+  DCHECK_EQ(kPageSize, static_cast<decltype(kPageSize)>(PAGE_SIZE));
+
   // Map four pages, mark the second one as PROT_NONE, unmap the last one.
-  void* map = mmap(nullptr, PAGE_SIZE * 4, PROT_READ | PROT_WRITE,
+  void* map = mmap(nullptr, kPageSize * 4, PROT_READ | PROT_WRITE,
                    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
   ASSERT_NE(MAP_FAILED, map);
   char* page1 = static_cast<char*>(map);
-  char* page2 = page1 + PAGE_SIZE;
-  char* page3 = page2 + PAGE_SIZE;
-  char* page4 = page3 + PAGE_SIZE;
-  ASSERT_EQ(0, mprotect(page1 + PAGE_SIZE, PAGE_SIZE, PROT_NONE));
-  ASSERT_EQ(0, munmap(page4, PAGE_SIZE));
+  char* page2 = page1 + kPageSize;
+  char* page3 = page2 + kPageSize;
+  char* page4 = page3 + kPageSize;
+  ASSERT_EQ(0, mprotect(page1 + kPageSize, kPageSize, PROT_NONE));
+  ASSERT_EQ(0, munmap(page4, kPageSize));
 
   page1[0] = 'a';
-  page1[PAGE_SIZE - 1] = 'z';
+  page1[kPageSize - 1] = 'z';
 
   page3[0] = 'b';
-  page3[PAGE_SIZE - 1] = 'y';
+  page3[kPageSize - 1] = 'y';
 
-  char buf[PAGE_SIZE];
+  char buf[kPageSize];
 
   // Completely valid read.
   memset(buf, 0xCC, sizeof(buf));
-  EXPECT_EQ(static_cast<ssize_t>(PAGE_SIZE), SafeCopy(buf, page1, PAGE_SIZE)) << strerror(errno);
-  EXPECT_EQ(0, memcmp(buf, page1, PAGE_SIZE));
+  EXPECT_EQ(static_cast<ssize_t>(kPageSize), SafeCopy(buf, page1, kPageSize)) << strerror(errno);
+  EXPECT_EQ(0, memcmp(buf, page1, kPageSize));
 
   // Reading into a guard page.
   memset(buf, 0xCC, sizeof(buf));
-  EXPECT_EQ(static_cast<ssize_t>(PAGE_SIZE - 1), SafeCopy(buf, page1 + 1, PAGE_SIZE));
-  EXPECT_EQ(0, memcmp(buf, page1 + 1, PAGE_SIZE - 1));
+  EXPECT_EQ(static_cast<ssize_t>(kPageSize - 1), SafeCopy(buf, page1 + 1, kPageSize));
+  EXPECT_EQ(0, memcmp(buf, page1 + 1, kPageSize - 1));
 
   // Reading from a guard page into a real page.
   memset(buf, 0xCC, sizeof(buf));
-  EXPECT_EQ(0, SafeCopy(buf, page2 + PAGE_SIZE - 1, PAGE_SIZE));
+  EXPECT_EQ(0, SafeCopy(buf, page2 + kPageSize - 1, kPageSize));
 
   // Reading off of the end of a mapping.
   memset(buf, 0xCC, sizeof(buf));
-  EXPECT_EQ(static_cast<ssize_t>(PAGE_SIZE), SafeCopy(buf, page3, PAGE_SIZE * 2));
-  EXPECT_EQ(0, memcmp(buf, page3, PAGE_SIZE));
+  EXPECT_EQ(static_cast<ssize_t>(kPageSize), SafeCopy(buf, page3, kPageSize * 2));
+  EXPECT_EQ(0, memcmp(buf, page3, kPageSize));
 
   // Completely invalid.
-  EXPECT_EQ(0, SafeCopy(buf, page1 + PAGE_SIZE, PAGE_SIZE));
+  EXPECT_EQ(0, SafeCopy(buf, page1 + kPageSize, kPageSize));
 
   // Clean up.
-  ASSERT_EQ(0, munmap(map, PAGE_SIZE * 3));
+  ASSERT_EQ(0, munmap(map, kPageSize * 3));
 }
 
 TEST(SafeCopyTest, alignment) {
+  DCHECK_EQ(kPageSize, static_cast<decltype(kPageSize)>(PAGE_SIZE));
+
   // Copy the middle of a mapping to the end of another one.
-  void* src_map = mmap(nullptr, PAGE_SIZE * 3, PROT_READ | PROT_WRITE,
+  void* src_map = mmap(nullptr, kPageSize * 3, PROT_READ | PROT_WRITE,
                        MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
   ASSERT_NE(MAP_FAILED, src_map);
 
   // Add a guard page to make sure we don't write past the end of the mapping.
-  void* dst_map = mmap(nullptr, PAGE_SIZE * 4, PROT_READ | PROT_WRITE,
+  void* dst_map = mmap(nullptr, kPageSize * 4, PROT_READ | PROT_WRITE,
                        MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
   ASSERT_NE(MAP_FAILED, dst_map);
 
   char* src = static_cast<char*>(src_map);
   char* dst = static_cast<char*>(dst_map);
-  ASSERT_EQ(0, mprotect(dst + 3 * PAGE_SIZE, PAGE_SIZE, PROT_NONE));
+  ASSERT_EQ(0, mprotect(dst + 3 * kPageSize, kPageSize, PROT_NONE));
 
   src[512] = 'a';
-  src[PAGE_SIZE * 3 - 512 - 1] = 'z';
+  src[kPageSize * 3 - 512 - 1] = 'z';
 
-  EXPECT_EQ(static_cast<ssize_t>(PAGE_SIZE * 3 - 1024),
-            SafeCopy(dst + 1024, src + 512, PAGE_SIZE * 3 - 1024));
-  EXPECT_EQ(0, memcmp(dst + 1024, src + 512, PAGE_SIZE * 3 - 1024));
+  EXPECT_EQ(static_cast<ssize_t>(kPageSize * 3 - 1024),
+            SafeCopy(dst + 1024, src + 512, kPageSize * 3 - 1024));
+  EXPECT_EQ(0, memcmp(dst + 1024, src + 512, kPageSize * 3 - 1024));
 
-  ASSERT_EQ(0, munmap(src_map, PAGE_SIZE * 3));
-  ASSERT_EQ(0, munmap(dst_map, PAGE_SIZE * 4));
+  ASSERT_EQ(0, munmap(src_map, kPageSize * 3));
+  ASSERT_EQ(0, munmap(dst_map, kPageSize * 4));
 }
 
 #endif  // defined(__linux__)
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index 03fc959..00b5567 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -91,6 +91,7 @@
   fd_ = other.fd_;
   file_path_ = std::move(other.file_path_);
   auto_close_ = other.auto_close_;
+  read_only_mode_ = other.read_only_mode_;
   other.Release();  // Release other.
 
   return *this;
diff --git a/runtime/base/unix_file/fd_file_test.cc b/runtime/base/unix_file/fd_file_test.cc
index 7657a38..6aef348 100644
--- a/runtime/base/unix_file/fd_file_test.cc
+++ b/runtime/base/unix_file/fd_file_test.cc
@@ -186,6 +186,20 @@
   ASSERT_EQ(file2.Close(), 0);
 }
 
+TEST_F(FdFileTest, OperatorMoveEquals) {
+  // Make sure the read_only_ flag is correctly copied
+  // over.
+  art::ScratchFile tmp;
+  FdFile file(tmp.GetFilename(), O_RDONLY, false);
+  ASSERT_TRUE(file.ReadOnlyMode());
+
+  FdFile file2(tmp.GetFilename(), O_RDWR, false);
+  ASSERT_FALSE(file2.ReadOnlyMode());
+
+  file2 = std::move(file);
+  ASSERT_TRUE(file2.ReadOnlyMode());
+}
+
 TEST_F(FdFileTest, EraseWithPathUnlinks) {
   // New scratch file, zero-length.
   art::ScratchFile tmp;
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index e2d45ac..74e7c18 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -145,22 +145,22 @@
   V(A64Load, int64_t, volatile const int64_t *) \
   V(A64Store, void, volatile int64_t *, int64_t) \
 \
-  V(NewEmptyString, void) \
-  V(NewStringFromBytes_B, void) \
-  V(NewStringFromBytes_BI, void) \
-  V(NewStringFromBytes_BII, void) \
-  V(NewStringFromBytes_BIII, void) \
-  V(NewStringFromBytes_BIIString, void) \
-  V(NewStringFromBytes_BString, void) \
-  V(NewStringFromBytes_BIICharset, void) \
-  V(NewStringFromBytes_BCharset, void) \
-  V(NewStringFromChars_C, void) \
-  V(NewStringFromChars_CII, void) \
-  V(NewStringFromChars_IIC, void) \
-  V(NewStringFromCodePoints, void) \
-  V(NewStringFromString, void) \
-  V(NewStringFromStringBuffer, void) \
-  V(NewStringFromStringBuilder, void) \
+  V(NewEmptyString, void, void) \
+  V(NewStringFromBytes_B, void, void) \
+  V(NewStringFromBytes_BI, void, void) \
+  V(NewStringFromBytes_BII, void, void) \
+  V(NewStringFromBytes_BIII, void, void) \
+  V(NewStringFromBytes_BIIString, void, void) \
+  V(NewStringFromBytes_BString, void, void) \
+  V(NewStringFromBytes_BIICharset, void, void) \
+  V(NewStringFromBytes_BCharset, void, void) \
+  V(NewStringFromChars_C, void, void) \
+  V(NewStringFromChars_CII, void, void) \
+  V(NewStringFromChars_IIC, void, void) \
+  V(NewStringFromCodePoints, void, void) \
+  V(NewStringFromString, void, void) \
+  V(NewStringFromStringBuffer, void, void) \
+  V(NewStringFromStringBuilder, void, void) \
 \
   V(ReadBarrierJni, void, mirror::CompressedReference<mirror::Object>*, Thread*) \
   V(ReadBarrierMarkReg00, mirror::Object*, mirror::Object*) \
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index ef4fa28..df097a0 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -4004,7 +4004,8 @@
       native_blocking_gcs_finished_++;
       native_blocking_gc_cond_->Broadcast(self);
     }
-  } else if (new_value > NativeAllocationGcWatermark() && !IsGCRequestPending()) {
+  } else if (new_value > NativeAllocationGcWatermark() * HeapGrowthMultiplier() &&
+             !IsGCRequestPending()) {
     // Trigger another GC because there have been enough native bytes
     // allocated since the last GC.
     if (IsGcConcurrent()) {
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 8bdf6b1..ec860c7 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -1198,26 +1198,67 @@
     // Class is allocated but not yet resolved: we cannot access its fields or super class.
     return;
   }
-  const size_t num_static_fields = klass->NumStaticFields();
-  // Total class size including embedded IMT, embedded vtable, and static fields.
-  const size_t class_size = klass->GetClassSize();
-  // Class size excluding static fields (relies on reference fields being the first static fields).
-  const size_t class_size_without_overhead = sizeof(mirror::Class);
-  CHECK_LE(class_size_without_overhead, class_size);
-  const size_t overhead_size = class_size - class_size_without_overhead;
 
-  if (overhead_size != 0) {
+  // Note: We will emit instance fields of Class as synthetic static fields with a prefix of
+  //       "$class$" so the class fields are visible in hprof dumps. For tools to account for that
+  //       correctly, we'll emit an instance size of zero for java.lang.Class, and also emit the
+  //       instance fields of java.lang.Object.
+  //
+  //       For other overhead (currently only the embedded vtable), we will generate a synthetic
+  //       byte array (or field[s] in case the overhead size is of reference size or less).
+
+  const size_t num_static_fields = klass->NumStaticFields();
+
+  // Total class size:
+  //   * class instance fields (including Object instance fields)
+  //   * vtable
+  //   * class static fields
+  const size_t total_class_size = klass->GetClassSize();
+
+  // Base class size (common parts of all Class instances):
+  //   * class instance fields (including Object instance fields)
+  constexpr size_t base_class_size = sizeof(mirror::Class);
+  CHECK_LE(base_class_size, total_class_size);
+
+  // Difference of Total and Base:
+  //   * vtable
+  //   * class static fields
+  const size_t base_overhead_size = total_class_size - base_class_size;
+
+  // Tools (ahat/Studio) will count the static fields and account for them in the class size. We
+  // must thus subtract them from base_overhead_size or they will be double-counted.
+  size_t class_static_fields_size = 0;
+  for (ArtField& class_static_field : klass->GetSFields()) {
+    size_t size = 0;
+    SignatureToBasicTypeAndSize(class_static_field.GetTypeDescriptor(), &size);
+    class_static_fields_size += size;
+  }
+
+  CHECK_GE(base_overhead_size, class_static_fields_size);
+  // Now we have:
+  //   * vtable
+  const size_t base_no_statics_overhead_size = base_overhead_size - class_static_fields_size;
+
+  // We may decide to display native overhead (the actual IMT, ArtFields and ArtMethods) in the
+  // future.
+  const size_t java_heap_overhead_size = base_no_statics_overhead_size;
+
+  // For overhead greater 4, we'll allocate a synthetic array.
+  if (java_heap_overhead_size > 4) {
     // Create a byte array to reflect the allocation of the
     // StaticField array at the end of this class.
     __ AddU1(HPROF_PRIMITIVE_ARRAY_DUMP);
     __ AddClassStaticsId(klass);
     __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(klass));
-    __ AddU4(overhead_size);
+    __ AddU4(java_heap_overhead_size - 4);
     __ AddU1(hprof_basic_byte);
-    for (size_t i = 0; i < overhead_size; ++i) {
+    for (size_t i = 0; i < java_heap_overhead_size - 4; ++i) {
       __ AddU1(0);
     }
   }
+  const size_t java_heap_overhead_field_count = java_heap_overhead_size > 0
+                                                    ? (java_heap_overhead_size == 3 ? 2u : 1u)
+                                                    : 0;
 
   __ AddU1(HPROF_CLASS_DUMP);
   __ AddClassId(LookupClassId(klass));
@@ -1228,10 +1269,11 @@
   __ AddObjectId(nullptr);    // no prot domain
   __ AddObjectId(nullptr);    // reserved
   __ AddObjectId(nullptr);    // reserved
+  // Instance size.
   if (klass->IsClassClass()) {
-    // ClassObjects have their static fields appended, so aren't all the same size.
-    // But they're at least this size.
-    __ AddU4(class_size_without_overhead);  // instance size
+    // As mentioned above, we will emit instance fields as synthetic static fields. So the
+    // base object is "empty."
+    __ AddU4(0);
   } else if (klass->IsStringClass()) {
     // Strings are variable length with character data at the end like arrays.
     // This outputs the size of an empty string.
@@ -1245,48 +1287,116 @@
   __ AddU2(0);  // empty const pool
 
   // Static fields
-  if (overhead_size == 0) {
-    __ AddU2(static_cast<uint16_t>(0));
-  } else {
-    __ AddU2(static_cast<uint16_t>(num_static_fields + 1));
+  //
+  // Note: we report Class' and Object's instance fields here, too. This is for visibility reasons.
+  //       (b/38167721)
+  mirror::Class* class_class = klass->GetClass();
+
+  DCHECK(class_class->GetSuperClass()->IsObjectClass());
+  const size_t static_fields_reported = class_class->NumInstanceFields()
+                                        + class_class->GetSuperClass()->NumInstanceFields()
+                                        + java_heap_overhead_field_count
+                                        + num_static_fields;
+  __ AddU2(dchecked_integral_cast<uint16_t>(static_fields_reported));
+
+  if (java_heap_overhead_size != 0) {
     __ AddStringId(LookupStringId(kClassOverheadName));
-    __ AddU1(hprof_basic_object);
-    __ AddClassStaticsId(klass);
+    size_t overhead_fields = 0;
+    if (java_heap_overhead_size > 4) {
+      __ AddU1(hprof_basic_object);
+      __ AddClassStaticsId(klass);
+      ++overhead_fields;
+    } else {
+      switch (java_heap_overhead_size) {
+        case 4: {
+          __ AddU1(hprof_basic_int);
+          __ AddU4(0);
+          ++overhead_fields;
+          break;
+        }
 
-    for (size_t i = 0; i < num_static_fields; ++i) {
-      ArtField* f = klass->GetStaticField(i);
+        case 2: {
+          __ AddU1(hprof_basic_short);
+          __ AddU2(0);
+          ++overhead_fields;
+          break;
+        }
 
-      size_t size;
-      HprofBasicType t = SignatureToBasicTypeAndSize(f->GetTypeDescriptor(), &size);
-      __ AddStringId(LookupStringId(f->GetName()));
-      __ AddU1(t);
-      switch (t) {
-        case hprof_basic_byte:
-          __ AddU1(f->GetByte(klass));
+        case 3: {
+          __ AddU1(hprof_basic_short);
+          __ AddU2(0);
+          __ AddStringId(LookupStringId(std::string(kClassOverheadName) + "2"));
+          ++overhead_fields;
+        }
+        FALLTHROUGH_INTENDED;
+
+        case 1: {
+          __ AddU1(hprof_basic_byte);
+          __ AddU1(0);
+          ++overhead_fields;
           break;
-        case hprof_basic_boolean:
-          __ AddU1(f->GetBoolean(klass));
-          break;
-        case hprof_basic_char:
-          __ AddU2(f->GetChar(klass));
-          break;
-        case hprof_basic_short:
-          __ AddU2(f->GetShort(klass));
-          break;
-        case hprof_basic_float:
-        case hprof_basic_int:
-        case hprof_basic_object:
-          __ AddU4(f->Get32(klass));
-          break;
-        case hprof_basic_double:
-        case hprof_basic_long:
-          __ AddU8(f->Get64(klass));
-          break;
-        default:
-          LOG(FATAL) << "Unexpected size " << size;
-          UNREACHABLE();
+        }
       }
     }
+    DCHECK_EQ(java_heap_overhead_field_count, overhead_fields);
+  }
+
+  // Helper lambda to emit the given static field. The second argument name_fn will be called to
+  // generate the name to emit. This can be used to emit something else than the field's actual
+  // name.
+  auto static_field_writer = [&](ArtField& field, auto name_fn)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    __ AddStringId(LookupStringId(name_fn(field)));
+
+    size_t size;
+    HprofBasicType t = SignatureToBasicTypeAndSize(field.GetTypeDescriptor(), &size);
+    __ AddU1(t);
+    switch (t) {
+      case hprof_basic_byte:
+        __ AddU1(field.GetByte(klass));
+        return;
+      case hprof_basic_boolean:
+        __ AddU1(field.GetBoolean(klass));
+        return;
+      case hprof_basic_char:
+        __ AddU2(field.GetChar(klass));
+        return;
+      case hprof_basic_short:
+        __ AddU2(field.GetShort(klass));
+        return;
+      case hprof_basic_float:
+      case hprof_basic_int:
+      case hprof_basic_object:
+        __ AddU4(field.Get32(klass));
+        return;
+      case hprof_basic_double:
+      case hprof_basic_long:
+        __ AddU8(field.Get64(klass));
+        return;
+    }
+    LOG(FATAL) << "Unexpected size " << size;
+    UNREACHABLE();
+  };
+
+  {
+    auto class_instance_field_name_fn = [](ArtField& field) REQUIRES_SHARED(Locks::mutator_lock_) {
+      return std::string("$class$") + field.GetName();
+    };
+    for (ArtField& class_instance_field : class_class->GetIFields()) {
+      static_field_writer(class_instance_field, class_instance_field_name_fn);
+    }
+    for (ArtField& object_instance_field : class_class->GetSuperClass()->GetIFields()) {
+      static_field_writer(object_instance_field, class_instance_field_name_fn);
+    }
+  }
+
+  {
+    auto class_static_field_name_fn = [](ArtField& field) REQUIRES_SHARED(Locks::mutator_lock_) {
+      return field.GetName();
+    };
+    for (ArtField& class_static_field : klass->GetSFields()) {
+      static_field_writer(class_static_field, class_static_field_name_fn);
+    }
   }
 
   // Instance fields for this class (no superclass fields)
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 2589ad0..fdc0505 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -140,12 +140,6 @@
     result->SetJ(0);
     return false;
   } else {
-    if (called_method->IsIntrinsic()) {
-      if (MterpHandleIntrinsic(&shadow_frame, called_method, inst, inst_data,
-                               shadow_frame.GetResultRegister())) {
-        return !self->IsExceptionPending();
-      }
-    }
     jit::Jit* jit = Runtime::Current()->GetJit();
     if (jit != nullptr) {
       if (type == kVirtual) {
@@ -153,6 +147,12 @@
       }
       jit->AddSamples(self, sf_method, 1, /*with_backedges*/false);
     }
+    if (called_method->IsIntrinsic()) {
+      if (MterpHandleIntrinsic(&shadow_frame, called_method, inst, inst_data,
+                               shadow_frame.GetResultRegister())) {
+        return !self->IsExceptionPending();
+      }
+    }
     return DoCall<false, false>(called_method, self, shadow_frame, inst, inst_data, result);
   }
 }
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index a53040c..5f94d04 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -276,6 +276,12 @@
         vtable_idx, kRuntimePointerSize);
     if ((called_method != nullptr) && called_method->IsIntrinsic()) {
       if (MterpHandleIntrinsic(shadow_frame, called_method, inst, inst_data, result_register)) {
+        jit::Jit* jit = Runtime::Current()->GetJit();
+        if (jit != nullptr) {
+          jit->InvokeVirtualOrInterface(
+              receiver, shadow_frame->GetMethod(), shadow_frame->GetDexPC(), called_method);
+          jit->AddSamples(self, shadow_frame->GetMethod(), 1, /*with_backedges*/false);
+        }
         return !self->IsExceptionPending();
       }
     }
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 5232252..5ce5447 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -1269,6 +1269,7 @@
                                       std::vector<ProfileMethodInfo>& methods) {
   ScopedTrace trace(__FUNCTION__);
   MutexLock mu(Thread::Current(), lock_);
+  uint16_t jit_compile_threshold = Runtime::Current()->GetJITOptions()->GetCompileThreshold();
   for (const ProfilingInfo* info : profiling_infos_) {
     ArtMethod* method = info->GetMethod();
     const DexFile* dex_file = method->GetDexFile();
@@ -1277,6 +1278,16 @@
       continue;
     }
     std::vector<ProfileMethodInfo::ProfileInlineCache> inline_caches;
+
+    // If the method didn't reach the compilation threshold don't save the inline caches.
+    // They might be incomplete and cause unnecessary deoptimizations.
+    // If the inline cache is empty the compiler will generate a regular invoke virtual/interface.
+    if (method->GetCounter() < jit_compile_threshold) {
+      methods.emplace_back(/*ProfileMethodInfo*/
+          dex_file, method->GetDexMethodIndex(), inline_caches);
+      continue;
+    }
+
     for (size_t i = 0; i < info->number_of_inline_caches_; ++i) {
       std::vector<ProfileMethodInfo::ProfileClassReference> profile_classes;
       const InlineCache& cache = info->cache_[i];
diff --git a/runtime/jit/profile_compilation_info.cc b/runtime/jit/profile_compilation_info.cc
index 220f298..9b80ad7 100644
--- a/runtime/jit/profile_compilation_info.cc
+++ b/runtime/jit/profile_compilation_info.cc
@@ -31,6 +31,8 @@
 #include <zlib.h>
 #include <base/time_utils.h>
 
+#include "base/arena_allocator.h"
+#include "base/dumpable.h"
 #include "base/mutex.h"
 #include "base/scoped_flock.h"
 #include "base/stl_util.h"
@@ -66,12 +68,25 @@
 static_assert(InlineCache::kIndividualCacheSize < kIsMissingTypesEncoding,
               "InlineCache::kIndividualCacheSize is larger than expected");
 
-ProfileCompilationInfo::ProfileCompilationInfo(const ProfileCompilationInfo& pci) {
-  MergeWith(pci);
+ProfileCompilationInfo::ProfileCompilationInfo(ArenaPool* custom_arena_pool)
+    : default_arena_pool_(nullptr),
+      arena_(new ArenaAllocator(custom_arena_pool)),
+      info_(arena_->Adapter(kArenaAllocProfile)),
+      profile_key_map_(std::less<const std::string>(), arena_->Adapter(kArenaAllocProfile)) {
+}
+
+ProfileCompilationInfo::ProfileCompilationInfo()
+    : default_arena_pool_(new ArenaPool(/*use_malloc*/true, /*low_4gb*/false, "ProfileCompilationInfo")),
+      arena_(new ArenaAllocator(default_arena_pool_.get())),
+      info_(arena_->Adapter(kArenaAllocProfile)),
+      profile_key_map_(std::less<const std::string>(), arena_->Adapter(kArenaAllocProfile)) {
 }
 
 ProfileCompilationInfo::~ProfileCompilationInfo() {
-  ClearProfile();
+  VLOG(profiler) << Dumpable<MemStats>(arena_->GetMemStats());
+  for (DexFileData* data : info_) {
+    delete data;
+  }
 }
 
 void ProfileCompilationInfo::DexPcData::AddClass(uint16_t dex_profile_idx,
@@ -460,7 +475,9 @@
   uint8_t profile_index = profile_index_it->second;
   if (info_.size() <= profile_index) {
     // This is a new addition. Add it to the info_ array.
-    info_.emplace_back(new DexFileData(profile_key, checksum, profile_index));
+    DexFileData* dex_file_data = new (arena_.get()) DexFileData(
+        arena_.get(), profile_key, checksum, profile_index);
+    info_.push_back(dex_file_data);
   }
   DexFileData* result = info_[profile_index];
   // DCHECK that profile info map key is consistent with the one stored in the dex file data.
@@ -506,7 +523,7 @@
 bool ProfileCompilationInfo::AddMethodIndex(const std::string& dex_location,
                                             uint32_t dex_checksum,
                                             uint16_t method_index) {
-  return AddMethod(dex_location, dex_checksum, method_index, OfflineProfileMethodInfo());
+  return AddMethod(dex_location, dex_checksum, method_index, OfflineProfileMethodInfo(arena_.get()));
 }
 
 bool ProfileCompilationInfo::AddMethod(const std::string& dex_location,
@@ -517,22 +534,22 @@
   if (data == nullptr) {  // checksum mismatch
     return false;
   }
-  auto inline_cache_it = data->method_map.FindOrAdd(method_index);
+  InlineCacheMap* inline_cache = data->FindOrAddMethod(method_index);
   for (const auto& pmi_inline_cache_it : pmi.inline_caches) {
     uint16_t pmi_ic_dex_pc = pmi_inline_cache_it.first;
     const DexPcData& pmi_ic_dex_pc_data = pmi_inline_cache_it.second;
-    DexPcData& dex_pc_data = inline_cache_it->second.FindOrAdd(pmi_ic_dex_pc)->second;
-    if (dex_pc_data.is_missing_types || dex_pc_data.is_megamorphic) {
+    DexPcData* dex_pc_data = FindOrAddDexPc(inline_cache, pmi_ic_dex_pc);
+    if (dex_pc_data->is_missing_types || dex_pc_data->is_megamorphic) {
       // We are already megamorphic or we are missing types; no point in going forward.
       continue;
     }
 
     if (pmi_ic_dex_pc_data.is_missing_types) {
-      dex_pc_data.SetIsMissingTypes();
+      dex_pc_data->SetIsMissingTypes();
       continue;
     }
     if (pmi_ic_dex_pc_data.is_megamorphic) {
-      dex_pc_data.SetIsMegamorphic();
+      dex_pc_data->SetIsMegamorphic();
       continue;
     }
 
@@ -544,7 +561,7 @@
       if (class_dex_data == nullptr) {  // checksum mismatch
         return false;
       }
-      dex_pc_data.AddClass(class_dex_data->profile_index, class_ref.type_index);
+      dex_pc_data->AddClass(class_dex_data->profile_index, class_ref.type_index);
     }
   }
   return true;
@@ -557,12 +574,11 @@
   if (data == nullptr) {  // checksum mismatch
     return false;
   }
-  auto inline_cache_it = data->method_map.FindOrAdd(pmi.dex_method_index);
+  InlineCacheMap* inline_cache = data->FindOrAddMethod(pmi.dex_method_index);
 
   for (const ProfileMethodInfo::ProfileInlineCache& cache : pmi.inline_caches) {
     if (cache.is_missing_types) {
-      auto dex_pc_data_it = inline_cache_it->second.FindOrAdd(cache.dex_pc);
-      dex_pc_data_it->second.SetIsMissingTypes();
+      FindOrAddDexPc(inline_cache, cache.dex_pc)->SetIsMissingTypes();
       continue;
     }
     for (const ProfileMethodInfo::ProfileClassReference& class_ref : cache.classes) {
@@ -572,12 +588,12 @@
       if (class_dex_data == nullptr) {  // checksum mismatch
         return false;
       }
-      auto dex_pc_data_it = inline_cache_it->second.FindOrAdd(cache.dex_pc);
-      if (dex_pc_data_it->second.is_missing_types) {
+      DexPcData* dex_pc_data = FindOrAddDexPc(inline_cache, cache.dex_pc);
+      if (dex_pc_data->is_missing_types) {
         // Don't bother adding classes if we are missing types.
         break;
       }
-      dex_pc_data_it->second.AddClass(class_dex_data->profile_index, class_ref.type_index);
+      dex_pc_data->AddClass(class_dex_data->profile_index, class_ref.type_index);
     }
   }
   return true;
@@ -614,13 +630,13 @@
     uint8_t dex_to_classes_map_size;
     READ_UINT(uint16_t, buffer, dex_pc, error);
     READ_UINT(uint8_t, buffer, dex_to_classes_map_size, error);
-    auto dex_pc_data_it = inline_cache->FindOrAdd(dex_pc);
+    DexPcData* dex_pc_data = FindOrAddDexPc(inline_cache, dex_pc);
     if (dex_to_classes_map_size == kIsMissingTypesEncoding) {
-      dex_pc_data_it->second.SetIsMissingTypes();
+      dex_pc_data->SetIsMissingTypes();
       continue;
     }
     if (dex_to_classes_map_size == kIsMegamorphicEncoding) {
-      dex_pc_data_it->second.SetIsMegamorphic();
+      dex_pc_data->SetIsMegamorphic();
       continue;
     }
     for (; dex_to_classes_map_size > 0; dex_to_classes_map_size--) {
@@ -636,7 +652,7 @@
       for (; dex_classes_size > 0; dex_classes_size--) {
         uint16_t type_index;
         READ_UINT(uint16_t, buffer, type_index, error);
-        dex_pc_data_it->second.AddClass(dex_profile_index, dex::TypeIndex(type_index));
+        dex_pc_data->AddClass(dex_profile_index, dex::TypeIndex(type_index));
       }
     }
   }
@@ -661,8 +677,8 @@
     READ_UINT(uint16_t, buffer, diff_with_last_method_index, error);
     uint16_t method_index = last_method_index + diff_with_last_method_index;
     last_method_index = method_index;
-    auto it = data->method_map.FindOrAdd(method_index);
-    if (!ReadInlineCache(buffer, number_of_dex_files, &(it->second), error)) {
+    InlineCacheMap* inline_cache = data->FindOrAddMethod(method_index);
+    if (!ReadInlineCache(buffer, number_of_dex_files, inline_cache, error)) {
       return false;
     }
   }
@@ -1091,19 +1107,19 @@
     // Merge the methods and the inline caches.
     for (const auto& other_method_it : other_dex_data->method_map) {
       uint16_t other_method_index = other_method_it.first;
-      auto method_it = dex_data->method_map.FindOrAdd(other_method_index);
+      InlineCacheMap* inline_cache = dex_data->FindOrAddMethod(other_method_index);
       const auto& other_inline_cache = other_method_it.second;
       for (const auto& other_ic_it : other_inline_cache) {
         uint16_t other_dex_pc = other_ic_it.first;
         const ClassSet& other_class_set = other_ic_it.second.classes;
-        auto class_set = method_it->second.FindOrAdd(other_dex_pc);
+        DexPcData* dex_pc_data = FindOrAddDexPc(inline_cache, other_dex_pc);
         if (other_ic_it.second.is_missing_types) {
-          class_set->second.SetIsMissingTypes();
+          dex_pc_data->SetIsMissingTypes();
         } else if (other_ic_it.second.is_megamorphic) {
-          class_set->second.SetIsMegamorphic();
+          dex_pc_data->SetIsMegamorphic();
         } else {
           for (const auto& class_it : other_class_set) {
-            class_set->second.AddClass(dex_profile_index_remap.Get(
+            dex_pc_data->AddClass(dex_profile_index_remap.Get(
                 class_it.dex_profile_index), class_it.type_index);
           }
         }
@@ -1143,14 +1159,15 @@
   return nullptr;
 }
 
-bool ProfileCompilationInfo::GetMethod(const std::string& dex_location,
-                                       uint32_t dex_checksum,
-                                       uint16_t dex_method_index,
-                                       /*out*/OfflineProfileMethodInfo* pmi) const {
+std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> ProfileCompilationInfo::GetMethod(
+      const std::string& dex_location,
+      uint32_t dex_checksum,
+      uint16_t dex_method_index) const {
   const InlineCacheMap* inline_caches = FindMethod(dex_location, dex_checksum, dex_method_index);
   if (inline_caches == nullptr) {
-    return false;
+    return nullptr;
   }
+  std::unique_ptr<OfflineProfileMethodInfo> pmi(new OfflineProfileMethodInfo(arena_.get()));
 
   pmi->dex_references.resize(info_.size());
   for (const DexFileData* dex_data : info_) {
@@ -1160,7 +1177,7 @@
 
   // TODO(calin): maybe expose a direct pointer to avoid copying
   pmi->inline_caches = *inline_caches;
-  return true;
+  return pmi;
 }
 
 
@@ -1170,7 +1187,7 @@
     if (!ChecksumMatch(dex_file, dex_data->checksum)) {
       return false;
     }
-    const std::set<dex::TypeIndex>& classes = dex_data->class_set;
+    const ArenaSet<dex::TypeIndex>& classes = dex_data->class_set;
     return classes.find(type_idx) != classes.end();
   }
   return false;
@@ -1286,7 +1303,9 @@
     return false;
   }
   *method_map = dex_data->method_map;
-  *class_set = dex_data->class_set;
+  for (const dex::TypeIndex& type_index : dex_data->class_set) {
+    class_set->insert(type_index);
+  }
   return true;
 }
 
@@ -1324,12 +1343,6 @@
   return ret;
 }
 
-void ProfileCompilationInfo::ClearResolvedClasses() {
-  for (DexFileData* dex_data : info_) {
-    dex_data->class_set.clear();
-  }
-}
-
 // Naive implementation to generate a random profile file suitable for testing.
 bool ProfileCompilationInfo::GenerateTestProfile(int fd,
                                                  uint16_t number_of_dex_files,
@@ -1441,17 +1454,21 @@
   return true;
 }
 
-void ProfileCompilationInfo::ClearProfile() {
-  for (DexFileData* dex_data : info_) {
-    delete dex_data;
-  }
-  info_.clear();
-  profile_key_map_.clear();
-}
-
 bool ProfileCompilationInfo::IsEmpty() const {
   DCHECK_EQ(info_.empty(), profile_key_map_.empty());
   return info_.empty();
 }
 
+ProfileCompilationInfo::InlineCacheMap*
+ProfileCompilationInfo::DexFileData::FindOrAddMethod(uint16_t method_index) {
+  return &(method_map.FindOrAdd(
+      method_index,
+      InlineCacheMap(std::less<uint16_t>(), arena_->Adapter(kArenaAllocProfile)))->second);
+}
+
+ProfileCompilationInfo::DexPcData*
+ProfileCompilationInfo::FindOrAddDexPc(InlineCacheMap* inline_cache, uint32_t dex_pc) {
+  return &(inline_cache->FindOrAdd(dex_pc, DexPcData(arena_.get()))->second);
+}
+
 }  // namespace art
diff --git a/runtime/jit/profile_compilation_info.h b/runtime/jit/profile_compilation_info.h
index ee1935f..6756352 100644
--- a/runtime/jit/profile_compilation_info.h
+++ b/runtime/jit/profile_compilation_info.h
@@ -17,11 +17,12 @@
 #ifndef ART_RUNTIME_JIT_PROFILE_COMPILATION_INFO_H_
 #define ART_RUNTIME_JIT_PROFILE_COMPILATION_INFO_H_
 
-#include <memory>
 #include <set>
 #include <vector>
 
 #include "atomic.h"
+#include "base/arena_object.h"
+#include "base/arena_containers.h"
 #include "dex_cache_resolved_classes.h"
 #include "dex_file.h"
 #include "dex_file_types.h"
@@ -115,7 +116,7 @@
   // We cannot rely on the actual multidex index because a single profile may store
   // data from multiple splits. This means that a profile may contain a classes2.dex from split-A
   // and one from split-B.
-  struct ClassReference {
+  struct ClassReference : public ValueObject {
     ClassReference(uint8_t dex_profile_idx, const dex::TypeIndex& type_idx) :
       dex_profile_index(dex_profile_idx), type_index(type_idx) {}
 
@@ -133,13 +134,16 @@
   };
 
   // The set of classes that can be found at a given dex pc.
-  using ClassSet = std::set<ClassReference>;
+  using ClassSet = ArenaSet<ClassReference>;
 
   // Encodes the actual inline cache for a given dex pc (whether or not the receiver is
   // megamorphic and its possible types).
   // If the receiver is megamorphic or is missing types the set of classes will be empty.
-  struct DexPcData {
-    DexPcData() : is_missing_types(false), is_megamorphic(false) {}
+  struct DexPcData : public ArenaObject<kArenaAllocProfile> {
+    explicit DexPcData(ArenaAllocator* arena)
+        : is_missing_types(false),
+          is_megamorphic(false),
+          classes(std::less<ClassReference>(), arena->Adapter(kArenaAllocProfile)) {}
     void AddClass(uint16_t dex_profile_idx, const dex::TypeIndex& type_idx);
     void SetIsMegamorphic() {
       if (is_missing_types) return;
@@ -166,16 +170,19 @@
   };
 
   // The inline cache map: DexPc -> DexPcData.
-  using InlineCacheMap = SafeMap<uint16_t, DexPcData>;
+  using InlineCacheMap = ArenaSafeMap<uint16_t, DexPcData>;
 
   // Maps a method dex index to its inline cache.
-  using MethodMap = SafeMap<uint16_t, InlineCacheMap>;
+  using MethodMap = ArenaSafeMap<uint16_t, InlineCacheMap>;
 
   // Encodes the full set of inline caches for a given method.
   // The dex_references vector is indexed according to the ClassReference::dex_profile_index.
   // i.e. the dex file of any ClassReference present in the inline caches can be found at
   // dex_references[ClassReference::dex_profile_index].
   struct OfflineProfileMethodInfo {
+    explicit OfflineProfileMethodInfo(ArenaAllocator* allocator)
+        : inline_caches(std::less<uint16_t>(), allocator->Adapter(kArenaAllocProfile)) {}
+
     bool operator==(const OfflineProfileMethodInfo& other) const;
 
     std::vector<DexReference> dex_references;
@@ -183,9 +190,9 @@
   };
 
   // Public methods to create, extend or query the profile.
+  ProfileCompilationInfo();
+  explicit ProfileCompilationInfo(ArenaPool* arena_pool);
 
-  ProfileCompilationInfo() {}
-  ProfileCompilationInfo(const ProfileCompilationInfo& pci);
   ~ProfileCompilationInfo();
 
   // Add the given methods and classes to the current profile object.
@@ -223,12 +230,12 @@
   // Return true if the class's type is present in the profiling info.
   bool ContainsClass(const DexFile& dex_file, dex::TypeIndex type_idx) const;
 
-  // Return true if the method is present in the profiling info.
-  // If the method is found, `pmi` is populated with its inline caches.
-  bool GetMethod(const std::string& dex_location,
-                 uint32_t dex_checksum,
-                 uint16_t dex_method_index,
-                 /*out*/OfflineProfileMethodInfo* pmi) const;
+  // Return the method data for the given location and index from the profiling info.
+  // If the method index is not found or the checksum doesn't match, null is returned.
+  // The allocations for the method info are done on the current profile arena.
+  std::unique_ptr<OfflineProfileMethodInfo> GetMethod(const std::string& dex_location,
+                                                      uint32_t dex_checksum,
+                                                      uint16_t dex_method_index) const;
 
   // Dump all the loaded profile info into a string and returns it.
   // If dex_files is not null then the method indices will be resolved to their
@@ -253,9 +260,6 @@
   std::set<DexCacheResolvedClasses> GetResolvedClasses(
       const std::unordered_set<std::string>& dex_files_locations) const;
 
-  // Clear the resolved classes from the current object.
-  void ClearResolvedClasses();
-
   // Return the profile key associated with the given dex location.
   static std::string GetProfileDexFileKey(const std::string& dex_location);
 
@@ -277,6 +281,8 @@
   static bool Equals(const ProfileCompilationInfo::OfflineProfileMethodInfo& pmi1,
                      const ProfileCompilationInfo::OfflineProfileMethodInfo& pmi2);
 
+  ArenaAllocator* GetArena() { return arena_.get(); }
+
  private:
   enum ProfileLoadSatus {
     kProfileLoadWouldOverwiteData,
@@ -295,9 +301,20 @@
   // profile) fields in this struct because we can infer them from
   // profile_key_map_ and info_. However, it makes the profiles logic much
   // simpler if we have references here as well.
-  struct DexFileData {
-    DexFileData(const std::string& key, uint32_t location_checksum, uint16_t index)
-         : profile_key(key), profile_index(index), checksum(location_checksum) {}
+  struct DexFileData : public DeletableArenaObject<kArenaAllocProfile> {
+    DexFileData(ArenaAllocator* arena,
+                const std::string& key,
+                uint32_t location_checksum,
+                uint16_t index)
+        : arena_(arena),
+          profile_key(key),
+          profile_index(index),
+          checksum(location_checksum),
+          method_map(std::less<uint16_t>(), arena->Adapter(kArenaAllocProfile)),
+          class_set(std::less<dex::TypeIndex>(), arena->Adapter(kArenaAllocProfile)) {}
+
+    // The arena used to allocate new inline cache maps.
+    ArenaAllocator* arena_;
     // The profile key this data belongs to.
     std::string profile_key;
     // The profile index of this dex file (matches ClassReference#dex_profile_index).
@@ -308,11 +325,15 @@
     MethodMap method_map;
     // The classes which have been profiled. Note that these don't necessarily include
     // all the classes that can be found in the inline caches reference.
-    std::set<dex::TypeIndex> class_set;
+    ArenaSet<dex::TypeIndex> class_set;
 
     bool operator==(const DexFileData& other) const {
       return checksum == other.checksum && method_map == other.method_map;
     }
+
+    // Find the inline caches of the the given method index. Add an empty entry if
+    // no previous data is found.
+    InlineCacheMap* FindOrAddMethod(uint16_t method_index);
   };
 
   // Return the profile data for the given profile key or null if the dex location
@@ -352,9 +373,6 @@
   // doesn't contain the key.
   const DexFileData* FindDexData(const std::string& profile_key) const;
 
-  // Clear all the profile data.
-  void ClearProfile();
-
   // Checks if the profile is empty.
   bool IsEmpty() const;
 
@@ -485,20 +503,27 @@
       const ClassSet& classes,
       /*out*/SafeMap<uint8_t, std::vector<dex::TypeIndex>>* dex_to_classes_map);
 
+  // Find the data for the dex_pc in the inline cache. Adds an empty entry
+  // if no previous data exists.
+  DexPcData* FindOrAddDexPc(InlineCacheMap* inline_cache, uint32_t dex_pc);
+
   friend class ProfileCompilationInfoTest;
   friend class CompilerDriverProfileTest;
   friend class ProfileAssistantTest;
   friend class Dex2oatLayoutTest;
 
+  std::unique_ptr<ArenaPool> default_arena_pool_;
+  std::unique_ptr<ArenaAllocator> arena_;
+
   // Vector containing the actual profile info.
   // The vector index is the profile index of the dex data and
   // matched DexFileData::profile_index.
-  std::vector<DexFileData*> info_;
+  ArenaVector<DexFileData*> info_;
 
   // Cache mapping profile keys to profile index.
   // This is used to speed up searches since it avoids iterating
   // over the info_ vector when searching by profile key.
-  SafeMap<const std::string, uint8_t> profile_key_map_;
+  ArenaSafeMap<const std::string, uint8_t> profile_key_map_;
 };
 
 }  // namespace art
diff --git a/runtime/jit/profile_compilation_info_test.cc b/runtime/jit/profile_compilation_info_test.cc
index e8f4ce2..a054199 100644
--- a/runtime/jit/profile_compilation_info_test.cc
+++ b/runtime/jit/profile_compilation_info_test.cc
@@ -25,12 +25,18 @@
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "handle_scope-inl.h"
+#include "linear_alloc.h"
 #include "jit/profile_compilation_info.h"
 #include "scoped_thread_state_change-inl.h"
 
 namespace art {
 
 class ProfileCompilationInfoTest : public CommonRuntimeTest {
+ public:
+  virtual void PostRuntimeCreate() {
+    arena_.reset(new ArenaAllocator(Runtime::Current()->GetArenaPool()));
+  }
+
  protected:
   std::vector<ArtMethod*> GetVirtualMethods(jobject class_loader,
                                             const std::string& clazz) {
@@ -158,11 +164,12 @@
 
   ProfileCompilationInfo::OfflineProfileMethodInfo ConvertProfileMethodInfo(
         const ProfileMethodInfo& pmi) {
-    ProfileCompilationInfo::OfflineProfileMethodInfo offline_pmi;
+    ProfileCompilationInfo::OfflineProfileMethodInfo offline_pmi(arena_.get());
     SafeMap<DexFile*, uint8_t> dex_map;  // dex files to profile index
     for (const auto& inline_cache : pmi.inline_caches) {
       ProfileCompilationInfo::DexPcData& dex_pc_data =
-          offline_pmi.inline_caches.FindOrAdd(inline_cache.dex_pc)->second;
+          offline_pmi.inline_caches.FindOrAdd(
+              inline_cache.dex_pc, ProfileCompilationInfo::DexPcData(arena_.get()))->second;
       if (inline_cache.is_missing_types) {
         dex_pc_data.SetIsMissingTypes();
       }
@@ -184,7 +191,7 @@
 
   // Creates an offline profile used for testing inline caches.
   ProfileCompilationInfo::OfflineProfileMethodInfo GetOfflineProfileMethodInfo() {
-    ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
+    ProfileCompilationInfo::OfflineProfileMethodInfo pmi(arena_.get());
 
     pmi.dex_references.emplace_back("dex_location1", /* checksum */1);
     pmi.dex_references.emplace_back("dex_location2", /* checksum */2);
@@ -192,13 +199,13 @@
 
     // Monomorphic
     for (uint16_t dex_pc = 0; dex_pc < 11; dex_pc++) {
-      ProfileCompilationInfo::DexPcData dex_pc_data;
+      ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
       dex_pc_data.AddClass(0, dex::TypeIndex(0));
       pmi.inline_caches.Put(dex_pc, dex_pc_data);
     }
     // Polymorphic
     for (uint16_t dex_pc = 11; dex_pc < 22; dex_pc++) {
-      ProfileCompilationInfo::DexPcData dex_pc_data;
+      ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
       dex_pc_data.AddClass(0, dex::TypeIndex(0));
       dex_pc_data.AddClass(1, dex::TypeIndex(1));
       dex_pc_data.AddClass(2, dex::TypeIndex(2));
@@ -207,13 +214,13 @@
     }
     // Megamorphic
     for (uint16_t dex_pc = 22; dex_pc < 33; dex_pc++) {
-      ProfileCompilationInfo::DexPcData dex_pc_data;
+      ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
       dex_pc_data.SetIsMegamorphic();
       pmi.inline_caches.Put(dex_pc, dex_pc_data);
     }
     // Missing types
     for (uint16_t dex_pc = 33; dex_pc < 44; dex_pc++) {
-      ProfileCompilationInfo::DexPcData dex_pc_data;
+      ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
       dex_pc_data.SetIsMissingTypes();
       pmi.inline_caches.Put(dex_pc, dex_pc_data);
     }
@@ -239,6 +246,8 @@
   // They should not change anyway.
   static constexpr int kProfileMagicSize = 4;
   static constexpr int kProfileVersionSize = 4;
+
+  std::unique_ptr<ArenaAllocator> arena_;
 };
 
 TEST_F(ProfileCompilationInfoTest, SaveArtMethods) {
@@ -500,18 +509,14 @@
 
   ASSERT_TRUE(loaded_info.Equals(saved_info));
 
-  ProfileCompilationInfo::OfflineProfileMethodInfo loaded_pmi1;
-  ASSERT_TRUE(loaded_info.GetMethod("dex_location1",
-                                    /* checksum */ 1,
-                                    /* method_idx */ 3,
-                                    &loaded_pmi1));
-  ASSERT_TRUE(loaded_pmi1 == pmi);
-  ProfileCompilationInfo::OfflineProfileMethodInfo loaded_pmi2;
-  ASSERT_TRUE(loaded_info.GetMethod("dex_location4",
-                                    /* checksum */ 4,
-                                    /* method_idx */ 3,
-                                    &loaded_pmi2));
-  ASSERT_TRUE(loaded_pmi2 == pmi);
+  std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> loaded_pmi1 =
+      loaded_info.GetMethod("dex_location1", /* checksum */ 1, /* method_idx */ 3);
+  ASSERT_TRUE(loaded_pmi1 != nullptr);
+  ASSERT_TRUE(*loaded_pmi1 == pmi);
+  std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> loaded_pmi2 =
+      loaded_info.GetMethod("dex_location4", /* checksum */ 4, /* method_idx */ 3);
+  ASSERT_TRUE(loaded_pmi2 != nullptr);
+  ASSERT_TRUE(*loaded_pmi2 == pmi);
 }
 
 TEST_F(ProfileCompilationInfoTest, MegamorphicInlineCaches) {
@@ -550,12 +555,11 @@
 
   ASSERT_TRUE(loaded_info.Equals(saved_info));
 
-  ProfileCompilationInfo::OfflineProfileMethodInfo loaded_pmi1;
-  ASSERT_TRUE(loaded_info.GetMethod("dex_location1",
-                                    /* checksum */ 1,
-                                    /* method_idx */ 3,
-                                    &loaded_pmi1));
-  ASSERT_TRUE(loaded_pmi1 == pmi_extra);
+  std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> loaded_pmi1 =
+      loaded_info.GetMethod("dex_location1", /* checksum */ 1, /* method_idx */ 3);
+
+  ASSERT_TRUE(loaded_pmi1 != nullptr);
+  ASSERT_TRUE(*loaded_pmi1 == pmi_extra);
 }
 
 TEST_F(ProfileCompilationInfoTest, MissingTypesInlineCaches) {
@@ -602,12 +606,10 @@
 
   ASSERT_TRUE(loaded_info.Equals(saved_info));
 
-  ProfileCompilationInfo::OfflineProfileMethodInfo loaded_pmi1;
-  ASSERT_TRUE(loaded_info.GetMethod("dex_location1",
-                                    /* checksum */ 1,
-                                    /* method_idx */ 3,
-                                    &loaded_pmi1));
-  ASSERT_TRUE(loaded_pmi1 == pmi_extra);
+  std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> loaded_pmi1 =
+      loaded_info.GetMethod("dex_location1", /* checksum */ 1, /* method_idx */ 3);
+  ASSERT_TRUE(loaded_pmi1 != nullptr);
+  ASSERT_TRUE(*loaded_pmi1 == pmi_extra);
 }
 
 TEST_F(ProfileCompilationInfoTest, SaveArtMethodsWithInlineCaches) {
@@ -638,14 +640,14 @@
     for (ArtMethod* m : main_methods) {
       ASSERT_TRUE(info.ContainsMethod(MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
       const ProfileMethodInfo& pmi = profile_methods_map.find(m)->second;
-      ProfileCompilationInfo::OfflineProfileMethodInfo offline_pmi;
-      ASSERT_TRUE(info.GetMethod(m->GetDexFile()->GetLocation(),
-                                 m->GetDexFile()->GetLocationChecksum(),
-                                 m->GetDexMethodIndex(),
-                                 &offline_pmi));
+      std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> offline_pmi =
+          info.GetMethod(m->GetDexFile()->GetLocation(),
+                         m->GetDexFile()->GetLocationChecksum(),
+                         m->GetDexMethodIndex());
+      ASSERT_TRUE(offline_pmi != nullptr);
       ProfileCompilationInfo::OfflineProfileMethodInfo converted_pmi =
           ConvertProfileMethodInfo(pmi);
-      ASSERT_EQ(converted_pmi, offline_pmi);
+      ASSERT_EQ(converted_pmi, *offline_pmi);
     }
   }
 }
@@ -671,21 +673,21 @@
   ProfileCompilationInfo info;
   ProfileCompilationInfo info_reindexed;
 
-  ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
+  ProfileCompilationInfo::OfflineProfileMethodInfo pmi(arena_.get());
   pmi.dex_references.emplace_back("dex_location1", /* checksum */ 1);
   pmi.dex_references.emplace_back("dex_location2", /* checksum */ 2);
   for (uint16_t dex_pc = 1; dex_pc < 5; dex_pc++) {
-    ProfileCompilationInfo::DexPcData dex_pc_data;
+    ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
     dex_pc_data.AddClass(0, dex::TypeIndex(0));
     dex_pc_data.AddClass(1, dex::TypeIndex(1));
     pmi.inline_caches.Put(dex_pc, dex_pc_data);
   }
 
-  ProfileCompilationInfo::OfflineProfileMethodInfo pmi_reindexed;
+  ProfileCompilationInfo::OfflineProfileMethodInfo pmi_reindexed(arena_.get());
   pmi_reindexed.dex_references.emplace_back("dex_location2", /* checksum */ 2);
   pmi_reindexed.dex_references.emplace_back("dex_location1", /* checksum */ 1);
   for (uint16_t dex_pc = 1; dex_pc < 5; dex_pc++) {
-    ProfileCompilationInfo::DexPcData dex_pc_data;
+    ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
     dex_pc_data.AddClass(1, dex::TypeIndex(0));
     dex_pc_data.AddClass(0, dex::TypeIndex(1));
     pmi_reindexed.inline_caches.Put(dex_pc, dex_pc_data);
@@ -705,23 +707,20 @@
       "dex_location1", /* checksum */ 1, method_idx, pmi_reindexed, &info_reindexed));
   }
 
-  ProfileCompilationInfo info_backup = info;
+  ProfileCompilationInfo info_backup;
+  info_backup.MergeWith(info);
   ASSERT_TRUE(info.MergeWith(info_reindexed));
   // Merging should have no effect as we're adding the exact same stuff.
   ASSERT_TRUE(info.Equals(info_backup));
   for (uint16_t method_idx = 0; method_idx < 10; method_idx++) {
-    ProfileCompilationInfo::OfflineProfileMethodInfo loaded_pmi1;
-    ASSERT_TRUE(info.GetMethod("dex_location1",
-                                      /* checksum */ 1,
-                                      /* method_idx */ method_idx,
-                                      &loaded_pmi1));
-    ASSERT_TRUE(loaded_pmi1 == pmi);
-    ProfileCompilationInfo::OfflineProfileMethodInfo loaded_pmi2;
-    ASSERT_TRUE(info.GetMethod("dex_location2",
-                                      /* checksum */ 2,
-                                      /* method_idx */ method_idx,
-                                      &loaded_pmi2));
-    ASSERT_TRUE(loaded_pmi2 == pmi);
+    std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> loaded_pmi1 =
+        info.GetMethod("dex_location1", /* checksum */ 1, method_idx);
+    ASSERT_TRUE(loaded_pmi1 != nullptr);
+    ASSERT_TRUE(*loaded_pmi1 == pmi);
+    std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> loaded_pmi2 =
+        info.GetMethod("dex_location2", /* checksum */ 2, method_idx);
+    ASSERT_TRUE(loaded_pmi2 != nullptr);
+    ASSERT_TRUE(*loaded_pmi2 == pmi);
   }
 }
 
@@ -739,9 +738,9 @@
 
 TEST_F(ProfileCompilationInfoTest, MegamorphicInlineCachesMerge) {
   // Create a megamorphic inline cache.
-  ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
+  ProfileCompilationInfo::OfflineProfileMethodInfo pmi(arena_.get());
   pmi.dex_references.emplace_back("dex_location1", /* checksum */ 1);
-  ProfileCompilationInfo::DexPcData dex_pc_data;
+  ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
   dex_pc_data.SetIsMegamorphic();
   pmi.inline_caches.Put(/*dex_pc*/ 0, dex_pc_data);
 
@@ -768,9 +767,9 @@
 
 TEST_F(ProfileCompilationInfoTest, MissingTypesInlineCachesMerge) {
   // Create an inline cache with missing types
-  ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
+  ProfileCompilationInfo::OfflineProfileMethodInfo pmi(arena_.get());
   pmi.dex_references.emplace_back("dex_location1", /* checksum */ 1);
-  ProfileCompilationInfo::DexPcData dex_pc_data;
+  ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
   dex_pc_data.SetIsMissingTypes();
   pmi.inline_caches.Put(/*dex_pc*/ 0, dex_pc_data);
 
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index 2dba9b7..0c94a94 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -64,6 +64,12 @@
   AddTrackedLocations(output_filename, code_paths);
 }
 
+ProfileSaver::~ProfileSaver() {
+  for (auto& it : profile_cache_) {
+    delete it.second;
+  }
+}
+
 void ProfileSaver::Run() {
   Thread* self = Thread::Current();
 
@@ -253,9 +259,9 @@
                        << " (" << classes.GetDexLocation() << ")";
       }
     }
-    auto info_it = profile_cache_.Put(filename, ProfileCompilationInfo());
+    auto info_it = profile_cache_.Put(filename, new ProfileCompilationInfo(Runtime::Current()->GetArenaPool()));
 
-    ProfileCompilationInfo* cached_info = &(info_it->second);
+    ProfileCompilationInfo* cached_info = info_it->second;
     cached_info->AddMethodsAndClasses(profile_methods_for_location,
                                       resolved_classes_for_location);
     total_number_of_profile_entries_cached += resolved_classes_for_location.size();
@@ -279,7 +285,6 @@
   }
 
   bool profile_file_saved = false;
-  uint64_t total_number_of_profile_entries_cached = 0;
   if (number_of_new_methods != nullptr) {
     *number_of_new_methods = 0;
   }
@@ -300,60 +305,68 @@
       jit_code_cache_->GetProfiledMethods(locations, profile_methods);
       total_number_of_code_cache_queries_++;
     }
-    ProfileCompilationInfo info;
-    if (!info.Load(filename, /*clear_if_invalid*/ true)) {
-      LOG(WARNING) << "Could not forcefully load profile " << filename;
-      continue;
-    }
-    uint64_t last_save_number_of_methods = info.GetNumberOfMethods();
-    uint64_t last_save_number_of_classes = info.GetNumberOfResolvedClasses();
+    {
+      ProfileCompilationInfo info(Runtime::Current()->GetArenaPool());
+      if (!info.Load(filename, /*clear_if_invalid*/ true)) {
+        LOG(WARNING) << "Could not forcefully load profile " << filename;
+        continue;
+      }
+      uint64_t last_save_number_of_methods = info.GetNumberOfMethods();
+      uint64_t last_save_number_of_classes = info.GetNumberOfResolvedClasses();
 
-    info.AddMethodsAndClasses(profile_methods, std::set<DexCacheResolvedClasses>());
-    auto profile_cache_it = profile_cache_.find(filename);
-    if (profile_cache_it != profile_cache_.end()) {
-      info.MergeWith(profile_cache_it->second);
-    }
-
-    int64_t delta_number_of_methods = info.GetNumberOfMethods() - last_save_number_of_methods;
-    int64_t delta_number_of_classes = info.GetNumberOfResolvedClasses() - last_save_number_of_classes;
-
-    if (!force_save &&
-        delta_number_of_methods < options_.GetMinMethodsToSave() &&
-        delta_number_of_classes < options_.GetMinClassesToSave()) {
-      VLOG(profiler) << "Not enough information to save to: " << filename
-          << " Number of methods: " << delta_number_of_methods
-          << " Number of classes: " << delta_number_of_classes;
-      total_number_of_skipped_writes_++;
-      continue;
-    }
-    if (number_of_new_methods != nullptr) {
-      *number_of_new_methods = std::max(static_cast<uint16_t>(delta_number_of_methods),
-                                        *number_of_new_methods);
-    }
-    uint64_t bytes_written;
-    // Force the save. In case the profile data is corrupted or the the profile
-    // has the wrong version this will "fix" the file to the correct format.
-    if (info.Save(filename, &bytes_written)) {
-      // We managed to save the profile. Clear the cache stored during startup.
+      info.AddMethodsAndClasses(profile_methods,
+                                std::set<DexCacheResolvedClasses>());
+      auto profile_cache_it = profile_cache_.find(filename);
       if (profile_cache_it != profile_cache_.end()) {
-        profile_cache_.erase(profile_cache_it);
-        total_number_of_profile_entries_cached = 0;
+        info.MergeWith(*(profile_cache_it->second));
       }
-      if (bytes_written > 0) {
-        total_number_of_writes_++;
-        total_bytes_written_ += bytes_written;
-        profile_file_saved = true;
-      } else {
-        // At this point we could still have avoided the write.
-        // We load and merge the data from the file lazily at its first ever
-        // save attempt. So, whatever we are trying to save could already be
-        // in the file.
+
+      int64_t delta_number_of_methods =
+          info.GetNumberOfMethods() - last_save_number_of_methods;
+      int64_t delta_number_of_classes =
+          info.GetNumberOfResolvedClasses() - last_save_number_of_classes;
+
+      if (!force_save &&
+          delta_number_of_methods < options_.GetMinMethodsToSave() &&
+          delta_number_of_classes < options_.GetMinClassesToSave()) {
+        VLOG(profiler) << "Not enough information to save to: " << filename
+                       << " Number of methods: " << delta_number_of_methods
+                       << " Number of classes: " << delta_number_of_classes;
         total_number_of_skipped_writes_++;
+        continue;
       }
-    } else {
-      LOG(WARNING) << "Could not save profiling info to " << filename;
-      total_number_of_failed_writes_++;
+      if (number_of_new_methods != nullptr) {
+        *number_of_new_methods =
+            std::max(static_cast<uint16_t>(delta_number_of_methods),
+                     *number_of_new_methods);
+      }
+      uint64_t bytes_written;
+      // Force the save. In case the profile data is corrupted or the the profile
+      // has the wrong version this will "fix" the file to the correct format.
+      if (info.Save(filename, &bytes_written)) {
+        // We managed to save the profile. Clear the cache stored during startup.
+        if (profile_cache_it != profile_cache_.end()) {
+          ProfileCompilationInfo *cached_info = profile_cache_it->second;
+          profile_cache_.erase(profile_cache_it);
+          delete cached_info;
+        }
+        if (bytes_written > 0) {
+          total_number_of_writes_++;
+          total_bytes_written_ += bytes_written;
+          profile_file_saved = true;
+        } else {
+          // At this point we could still have avoided the write.
+          // We load and merge the data from the file lazily at its first ever
+          // save attempt. So, whatever we are trying to save could already be
+          // in the file.
+          total_number_of_skipped_writes_++;
+        }
+      } else {
+        LOG(WARNING) << "Could not save profiling info to " << filename;
+        total_number_of_failed_writes_++;
+      }
     }
+    Runtime::Current()->GetArenaPool()->TrimMaps();
   }
 
   return profile_file_saved;
@@ -579,7 +592,7 @@
                                  uint16_t method_idx) {
   MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
   if (instance_ != nullptr) {
-    ProfileCompilationInfo info;
+    ProfileCompilationInfo info(Runtime::Current()->GetArenaPool());
     if (!info.Load(profile, /*clear_if_invalid*/false)) {
       return false;
     }
diff --git a/runtime/jit/profile_saver.h b/runtime/jit/profile_saver.h
index 60c9cc6..01d72fe 100644
--- a/runtime/jit/profile_saver.h
+++ b/runtime/jit/profile_saver.h
@@ -65,6 +65,7 @@
                const std::string& output_filename,
                jit::JitCodeCache* jit_code_cache,
                const std::vector<std::string>& code_paths);
+  ~ProfileSaver();
 
   // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
   static void* RunProfileSaverThread(void* arg)
@@ -131,7 +132,7 @@
   // we don't hammer the disk to save them right away.
   // The size of this cache is usually very small and tops
   // to just a few hundreds entries in the ProfileCompilationInfo objects.
-  SafeMap<std::string, ProfileCompilationInfo> profile_cache_;
+  SafeMap<std::string, ProfileCompilationInfo*> profile_cache_;
 
   // Save period condition support.
   Mutex wait_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index d7527d5..6230ae9 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -582,7 +582,7 @@
 
   // Primitive types are only assignable to themselves
   const char* prims = "ZBCSIJFD";
-  Class* prim_types[strlen(prims)];
+  std::vector<Class*> prim_types(strlen(prims));
   for (size_t i = 0; i < strlen(prims); i++) {
     prim_types[i] = class_linker_->FindPrimitiveClass(prims[i]);
   }
diff --git a/runtime/openjdkjvmti/ti_class.cc b/runtime/openjdkjvmti/ti_class.cc
index e0af6e8..dd90a71 100644
--- a/runtime/openjdkjvmti/ti_class.cc
+++ b/runtime/openjdkjvmti/ti_class.cc
@@ -129,6 +129,25 @@
   return dex_file;
 }
 
+// A deleter that acts like the jvmtiEnv->Deallocate so that asan does not get tripped up.
+// TODO We should everything use the actual jvmtiEnv->Allocate/Deallocate functions once we can
+// figure out which env to use.
+template <typename T>
+class FakeJvmtiDeleter {
+ public:
+  FakeJvmtiDeleter() {}
+
+  FakeJvmtiDeleter(FakeJvmtiDeleter&) = default;
+  FakeJvmtiDeleter(FakeJvmtiDeleter&&) = default;
+  FakeJvmtiDeleter& operator=(const FakeJvmtiDeleter&) = default;
+
+  template <typename U> void operator()(const U* ptr) const {
+    if (ptr != nullptr) {
+      free(const_cast<U*>(ptr));
+    }
+  }
+};
+
 struct ClassCallback : public art::ClassLoadCallback {
   void ClassPreDefine(const char* descriptor,
                       art::Handle<art::mirror::Class> klass,
@@ -173,7 +192,8 @@
     // Call all Non-retransformable agents.
     jint post_no_redefine_len = 0;
     unsigned char* post_no_redefine_dex_data = nullptr;
-    std::unique_ptr<const unsigned char> post_no_redefine_unique_ptr(nullptr);
+    std::unique_ptr<const unsigned char, FakeJvmtiDeleter<const unsigned char>>
+        post_no_redefine_unique_ptr(nullptr, FakeJvmtiDeleter<const unsigned char>());
     event_handler->DispatchEvent<ArtJvmtiEvent::kClassFileLoadHookNonRetransformable>(
         self,
         static_cast<JNIEnv*>(env),
@@ -190,13 +210,16 @@
       post_no_redefine_dex_data = const_cast<unsigned char*>(dex_file_copy->Begin());
       post_no_redefine_len = dex_file_copy->Size();
     } else {
-      post_no_redefine_unique_ptr = std::unique_ptr<const unsigned char>(post_no_redefine_dex_data);
+      post_no_redefine_unique_ptr =
+          std::unique_ptr<const unsigned char, FakeJvmtiDeleter<const unsigned char>>(
+              post_no_redefine_dex_data, FakeJvmtiDeleter<const unsigned char>());
       DCHECK_GT(post_no_redefine_len, 0);
     }
     // Call all retransformable agents.
     jint final_len = 0;
     unsigned char* final_dex_data = nullptr;
-    std::unique_ptr<const unsigned char> final_dex_unique_ptr(nullptr);
+    std::unique_ptr<const unsigned char, FakeJvmtiDeleter<const unsigned char>>
+        final_dex_unique_ptr(nullptr, FakeJvmtiDeleter<const unsigned char>());
     event_handler->DispatchEvent<ArtJvmtiEvent::kClassFileLoadHookRetransformable>(
         self,
         static_cast<JNIEnv*>(env),
@@ -213,7 +236,9 @@
       final_dex_data = post_no_redefine_dex_data;
       final_len = post_no_redefine_len;
     } else {
-      final_dex_unique_ptr = std::unique_ptr<const unsigned char>(final_dex_data);
+      final_dex_unique_ptr =
+          std::unique_ptr<const unsigned char, FakeJvmtiDeleter<const unsigned char>>(
+              final_dex_data, FakeJvmtiDeleter<const unsigned char>());
       DCHECK_GT(final_len, 0);
     }
 
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index fc91efa..ef4957c 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -238,9 +238,9 @@
       .Define("-Xlockprofthreshold:_")
           .WithType<unsigned int>()
           .IntoKey(M::LockProfThreshold)
-      .Define("-Xstacktracedir:_")
-          .WithType<std::string>()
-          .IntoKey(M::StackTraceDir)
+      .Define("-Xusetombstonedtraces")
+          .WithValue(true)
+          .IntoKey(M::UseTombstonedTraces)
       .Define("-Xstacktracefile:_")
           .WithType<std::string>()
           .IntoKey(M::StackTraceFile)
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 3697f21..968f02a 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -834,7 +834,7 @@
 
 void Runtime::StartSignalCatcher() {
   if (!is_zygote_) {
-    signal_catcher_ = new SignalCatcher(stack_trace_dir_, stack_trace_file_);
+    signal_catcher_ = new SignalCatcher(stack_trace_file_, use_tombstoned_traces_);
   }
 }
 
@@ -1017,6 +1017,30 @@
 
   MemMap::Init();
 
+  // Try to reserve a dedicated fault page. This is allocated for clobbered registers and sentinels.
+  // If we cannot reserve it, log a warning.
+  // Note: We allocate this first to have a good chance of grabbing the page. The address (0xebad..)
+  //       is out-of-the-way enough that it should not collide with boot image mapping.
+  // Note: Don't request an error message. That will lead to a maps dump in the case of failure,
+  //       leading to logspam.
+  {
+    constexpr uintptr_t kSentinelAddr =
+        RoundDown(static_cast<uintptr_t>(Context::kBadGprBase), kPageSize);
+    protected_fault_page_.reset(MemMap::MapAnonymous("Sentinel fault page",
+                                                     reinterpret_cast<uint8_t*>(kSentinelAddr),
+                                                     kPageSize,
+                                                     PROT_NONE,
+                                                     /* low_4g */ true,
+                                                     /* reuse */ false,
+                                                     /* error_msg */ nullptr));
+    if (protected_fault_page_ == nullptr) {
+      LOG(WARNING) << "Could not reserve sentinel fault page";
+    } else if (reinterpret_cast<uintptr_t>(protected_fault_page_->Begin()) != kSentinelAddr) {
+      LOG(WARNING) << "Could not reserve sentinel fault page at the right address.";
+      protected_fault_page_.reset();
+    }
+  }
+
   using Opt = RuntimeArgumentMap;
   VLOG(startup) << "Runtime::Init -verbose:startup enabled";
 
@@ -1045,7 +1069,11 @@
   abort_ = runtime_options.GetOrDefault(Opt::HookAbort);
 
   default_stack_size_ = runtime_options.GetOrDefault(Opt::StackSize);
-  stack_trace_dir_ = runtime_options.ReleaseOrDefault(Opt::StackTraceDir);
+  use_tombstoned_traces_ = runtime_options.GetOrDefault(Opt::UseTombstonedTraces);
+#if !defined(ART_TARGET_ANDROID)
+  CHECK(!use_tombstoned_traces_)
+      << "-Xusetombstonedtraces is only supported in an Android environment";
+#endif
   stack_trace_file_ = runtime_options.ReleaseOrDefault(Opt::StackTraceFile);
 
   compiler_executable_ = runtime_options.ReleaseOrDefault(Opt::Compiler);
@@ -1401,27 +1429,6 @@
     callbacks_->NextRuntimePhase(RuntimePhaseCallback::RuntimePhase::kInitialAgents);
   }
 
-  // Try to reserve a dedicated fault page. This is allocated for clobbered registers and sentinels.
-  // If we cannot reserve it, log a warning.
-  // Note: This is allocated last so that the heap and other things have priority, if necessary.
-  {
-    constexpr uintptr_t kSentinelAddr =
-        RoundDown(static_cast<uintptr_t>(Context::kBadGprBase), kPageSize);
-    protected_fault_page_.reset(MemMap::MapAnonymous("Sentinel fault page",
-                                                     reinterpret_cast<uint8_t*>(kSentinelAddr),
-                                                     kPageSize,
-                                                     PROT_NONE,
-                                                     true,
-                                                     false,
-                                                     &error_msg));
-    if (protected_fault_page_ == nullptr) {
-      LOG(WARNING) << "Could not reserve sentinel fault page: " << error_msg;
-    } else if (reinterpret_cast<uintptr_t>(protected_fault_page_->Begin()) != kSentinelAddr) {
-      LOG(WARNING) << "Could not reserve sentinel fault page at the right address.";
-      protected_fault_page_.reset();
-    }
-  }
-
   VLOG(startup) << "Runtime::Init exiting";
 
   return true;
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 2e3b8d7..4e143e0 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -681,6 +681,14 @@
     deoptimization_counts_[static_cast<size_t>(kind)]++;
   }
 
+  uint32_t GetNumberOfDeoptimizations() const {
+    uint32_t result = 0;
+    for (size_t i = 0; i <= static_cast<size_t>(DeoptimizationKind::kLast); ++i) {
+      result += deoptimization_counts_[i];
+    }
+    return result;
+  }
+
  private:
   static void InitPlatformSignalHandlers();
 
@@ -783,7 +791,13 @@
   ClassLinker* class_linker_;
 
   SignalCatcher* signal_catcher_;
-  std::string stack_trace_dir_;
+
+  // If true, the runtime will connect to tombstoned via a socket to
+  // request an open file descriptor to write its traces to.
+  bool use_tombstoned_traces_;
+
+  // Location to which traces must be written on SIGQUIT. Only used if
+  // tombstoned_traces_ == false.
   std::string stack_trace_file_;
 
   std::unique_ptr<JavaVMExt> java_vm_;
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 77132a8..cfc681f 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -100,7 +100,7 @@
 RUNTIME_OPTIONS_KEY (Unit,                ForceNativeBridge)
 RUNTIME_OPTIONS_KEY (LogVerbosity,        Verbose)
 RUNTIME_OPTIONS_KEY (unsigned int,        LockProfThreshold)
-RUNTIME_OPTIONS_KEY (std::string,         StackTraceDir)
+RUNTIME_OPTIONS_KEY (bool,                UseTombstonedTraces, false)
 RUNTIME_OPTIONS_KEY (std::string,         StackTraceFile)
 RUNTIME_OPTIONS_KEY (Unit,                MethodTrace)
 RUNTIME_OPTIONS_KEY (std::string,         MethodTraceFile,                "/data/misc/trace/method-trace-file.bin")
diff --git a/runtime/signal_catcher.cc b/runtime/signal_catcher.cc
index faea7b3..e3dfc74 100644
--- a/runtime/signal_catcher.cc
+++ b/runtime/signal_catcher.cc
@@ -42,6 +42,10 @@
 #include "thread_list.h"
 #include "utils.h"
 
+#if defined(ART_TARGET_ANDROID)
+#include "tombstoned/tombstoned.h"
+#endif
+
 namespace art {
 
 static void DumpCmdLine(std::ostream& os) {
@@ -66,13 +70,19 @@
 #endif
 }
 
-SignalCatcher::SignalCatcher(const std::string& stack_trace_dir,
-                             const std::string& stack_trace_file)
-    : stack_trace_dir_(stack_trace_dir),
-      stack_trace_file_(stack_trace_file),
+SignalCatcher::SignalCatcher(const std::string& stack_trace_file,
+                             bool use_tombstoned_stack_trace_fd)
+    : stack_trace_file_(stack_trace_file),
+      use_tombstoned_stack_trace_fd_(use_tombstoned_stack_trace_fd),
       lock_("SignalCatcher lock"),
       cond_("SignalCatcher::cond_", lock_),
       thread_(nullptr) {
+#if !defined(ART_TARGET_ANDROID)
+  // We're not running on Android, so we can't communicate with tombstoned
+  // to ask for an open file.
+  CHECK(!use_tombstoned_stack_trace_fd_);
+#endif
+
   SetHaltFlag(false);
 
   // Create a raw pthread; its start routine will attach to the runtime.
@@ -103,62 +113,65 @@
   return halt_;
 }
 
-std::string SignalCatcher::GetStackTraceFileName() {
-  if (!stack_trace_dir_.empty()) {
-    // We'll try a maximum of ten times (arbitrarily selected) to create a file
-    // with a unique name, seeding the pseudo random generator each time.
-    //
-    // If this doesn't work, give up and log to stdout. Note that we could try
-    // indefinitely, but that would make problems in this code harder to detect
-    // since we'd be spinning in the signal catcher thread.
-    static constexpr uint32_t kMaxRetries = 10;
-
-    for (uint32_t i = 0; i < kMaxRetries; ++i) {
-        std::srand(NanoTime());
-        // Sample output for PID 1234 : /data/anr/anr-pid1234-cafeffee.txt
-        const std::string file_name = android::base::StringPrintf(
-            "%s/anr-pid%" PRId32 "-%08" PRIx32 ".txt",
-            stack_trace_dir_.c_str(),
-            static_cast<int32_t>(getpid()),
-            static_cast<uint32_t>(std::rand()));
-
-        if (!OS::FileExists(file_name.c_str())) {
-          return file_name;
-        }
-    }
-
-    LOG(ERROR) << "Unable to obtain stack trace filename at path : " << stack_trace_dir_;
-    return "";
+bool SignalCatcher::OpenStackTraceFile(android::base::unique_fd* tombstone_fd,
+                                       android::base::unique_fd* output_fd) {
+  if (use_tombstoned_stack_trace_fd_) {
+#if defined(ART_TARGET_ANDROID)
+    return tombstoned_connect(getpid(), tombstone_fd, output_fd, false /* is_native_crash */);
+#else
+    UNUSED(tombstone_fd);
+    UNUSED(output_fd);
+#endif
   }
 
-  return stack_trace_file_;
+  // The runtime is not configured to dump traces to a file, will LOG(INFO)
+  // instead.
+  if (stack_trace_file_.empty()) {
+    return false;
+  }
+
+  int fd = open(stack_trace_file_.c_str(), O_APPEND | O_CREAT | O_WRONLY, 0666);
+  if (fd == -1) {
+      PLOG(ERROR) << "Unable to open stack trace file '" << stack_trace_file_ << "'";
+      return false;
+  }
+
+  output_fd->reset(fd);
+  return true;
 }
 
 void SignalCatcher::Output(const std::string& s) {
-  const std::string output_file = GetStackTraceFileName();
-  if (output_file.empty()) {
+  android::base::unique_fd tombstone_fd;
+  android::base::unique_fd output_fd;
+  if (!OpenStackTraceFile(&tombstone_fd, &output_fd)) {
     LOG(INFO) << s;
     return;
   }
 
   ScopedThreadStateChange tsc(Thread::Current(), kWaitingForSignalCatcherOutput);
-  int fd = open(output_file.c_str(), O_APPEND | O_CREAT | O_WRONLY, 0666);
-  if (fd == -1) {
-    PLOG(ERROR) << "Unable to open stack trace file '" << output_file << "'";
-    return;
-  }
-  std::unique_ptr<File> file(new File(fd, output_file, true));
+
+  std::unique_ptr<File> file(new File(output_fd.release(), true /* check_usage */));
   bool success = file->WriteFully(s.data(), s.size());
   if (success) {
     success = file->FlushCloseOrErase() == 0;
   } else {
     file->Erase();
   }
+
+  const std::string output_path_msg = (use_tombstoned_stack_trace_fd_) ?
+      "[tombstoned]" : stack_trace_file_;
+
   if (success) {
-    LOG(INFO) << "Wrote stack traces to '" << output_file << "'";
+    LOG(INFO) << "Wrote stack traces to '" << output_path_msg << "'";
   } else {
-    PLOG(ERROR) << "Failed to write stack traces to '" << output_file << "'";
+    PLOG(ERROR) << "Failed to write stack traces to '" << output_path_msg << "'";
   }
+
+#if defined(ART_TARGET_ANDROID)
+  if (!tombstoned_notify_completion(tombstone_fd)) {
+    LOG(WARNING) << "Unable to notify tombstoned of dump completion.";
+  }
+#endif
 }
 
 void SignalCatcher::HandleSigQuit() {
diff --git a/runtime/signal_catcher.h b/runtime/signal_catcher.h
index 4cd7a98..8a2a728 100644
--- a/runtime/signal_catcher.h
+++ b/runtime/signal_catcher.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_SIGNAL_CATCHER_H_
 #define ART_RUNTIME_SIGNAL_CATCHER_H_
 
+#include "android-base/unique_fd.h"
 #include "base/mutex.h"
 
 namespace art {
@@ -32,15 +33,17 @@
  */
 class SignalCatcher {
  public:
-  // If |stack_trace_dir| is non empty, traces will be written to a
-  // unique file under that directory.
+  // If |use_tombstoned_stack_trace_fd| is |true|, traces will be
+  // written to a file descriptor provided by tombstoned. The process
+  // will communicate with tombstoned via a unix domain socket. This
+  // mode of stack trace dumping is only supported in an Android
+  // environment.
   //
-  // If |stack_trace_dir| is empty, and |stack_frace_file| is non-empty,
-  // traces will be appended to |stack_trace_file|.
-  //
-  // If both are empty, all traces will be written to the log buffer.
-  explicit SignalCatcher(const std::string& stack_trace_dir,
-                         const std::string& stack_trace_file);
+  // If false, all traces will be dumped to |stack_trace_file| if it's
+  // non-empty. If |stack_trace_file| is empty, all traces will be written
+  // to the log buffer.
+  SignalCatcher(const std::string& stack_trace_file,
+                const bool use_tombstoned_stack_trace_fd);
   ~SignalCatcher();
 
   void HandleSigQuit() REQUIRES(!Locks::mutator_lock_, !Locks::thread_list_lock_,
@@ -51,15 +54,18 @@
   // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
   static void* Run(void* arg) NO_THREAD_SAFETY_ANALYSIS;
 
-  std::string GetStackTraceFileName();
+  // NOTE: We're using android::base::unique_fd here for easier
+  // interoperability with tombstoned client APIs.
+  bool OpenStackTraceFile(android::base::unique_fd* tombstone_fd,
+                          android::base::unique_fd* output_fd);
   void HandleSigUsr1();
   void Output(const std::string& s);
   void SetHaltFlag(bool new_value) REQUIRES(!lock_);
   bool ShouldHalt() REQUIRES(!lock_);
   int WaitForSignal(Thread* self, SignalSet& signals) REQUIRES(!lock_);
 
-  std::string stack_trace_dir_;
   std::string stack_trace_file_;
+  const bool use_tombstoned_stack_trace_fd_;
 
   mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   ConditionVariable cond_ GUARDED_BY(lock_);
diff --git a/test/409-materialized-condition/src/Main.java b/test/409-materialized-condition/src/Main.java
index 0c179a9..5f21bc3 100644
--- a/test/409-materialized-condition/src/Main.java
+++ b/test/409-materialized-condition/src/Main.java
@@ -50,6 +50,49 @@
     return b;
   }
 
+  public static boolean $noinline$intEq0(int x) {
+    return x == 0;
+  }
+
+  public static boolean $noinline$intNe0(int x) {
+    return x != 0;
+  }
+
+  public static boolean $noinline$longEq0(long x) {
+    return x == 0;
+  }
+
+  public static boolean $noinline$longNe0(long x) {
+    return x != 0;
+  }
+
+  public static boolean $noinline$longEqCst(long x) {
+    return x == 0x0123456789ABCDEFL;
+  }
+
+  public static boolean $noinline$longNeCst(long x) {
+    return x != 0x0123456789ABCDEFL;
+  }
+
+  public static void assertEqual(boolean expected, boolean actual) {
+    if (expected != actual) {
+      throw new Error("Assertion failed: " + expected + " != " + actual);
+    }
+  }
+
+  // The purpose of this method is to test code generation for a materialized
+  // HCondition that is not equality or inequality, and that has one boolean
+  // input. That can't be done directly, so we have to rely on the instruction
+  // simplifier to transform the control-flow graph appropriately.
+  public static boolean $noinline$booleanCondition(boolean in) {
+    int value = in ? 1 : 0;
+
+    // Calling a non-inlineable method that uses `value` as well prevents a
+    // transformation of the return value into `false`.
+    $noinline$intNe0(value);
+    return value > 127;
+  }
+
   public static void main(String[] args) {
     System.out.println("foo1");
     int res = foo1();
@@ -62,5 +105,49 @@
     if (res != 42) {
       throw new Error("Unexpected return value for foo2: " + res + ", expected 42.");
     }
+
+    assertEqual($noinline$booleanCondition(false), false);
+    assertEqual($noinline$booleanCondition(true), false);
+
+    int[] int_inputs = {0, 1, -1, Integer.MIN_VALUE, Integer.MAX_VALUE, 42, -9000};
+    long[] long_inputs = {
+        0L, 1L, -1L, Long.MIN_VALUE, Long.MAX_VALUE, 0x100000000L,
+        0x100000001L, -9000L, 0x0123456789ABCDEFL};
+
+    boolean[] int_eq_0_expected = {true, false, false, false, false, false, false};
+
+    for (int i = 0; i < int_inputs.length; i++) {
+      assertEqual(int_eq_0_expected[i], $noinline$intEq0(int_inputs[i]));
+    }
+
+    boolean[] int_ne_0_expected = {false, true, true, true, true, true, true};
+
+    for (int i = 0; i < int_inputs.length; i++) {
+      assertEqual(int_ne_0_expected[i], $noinline$intNe0(int_inputs[i]));
+    }
+
+    boolean[] long_eq_0_expected = {true, false, false, false, false, false, false, false, false};
+
+    for (int i = 0; i < long_inputs.length; i++) {
+      assertEqual(long_eq_0_expected[i], $noinline$longEq0(long_inputs[i]));
+    }
+
+    boolean[] long_ne_0_expected = {false, true, true, true, true, true, true, true, true};
+
+    for (int i = 0; i < long_inputs.length; i++) {
+      assertEqual(long_ne_0_expected[i], $noinline$longNe0(long_inputs[i]));
+    }
+
+    boolean[] long_eq_cst_expected = {false, false, false, false, false, false, false, false, true};
+
+    for (int i = 0; i < long_inputs.length; i++) {
+      assertEqual(long_eq_cst_expected[i], $noinline$longEqCst(long_inputs[i]));
+    }
+
+    boolean[] long_ne_cst_expected = {true, true, true, true, true, true, true, true, false};
+
+    for (int i = 0; i < long_inputs.length; i++) {
+      assertEqual(long_ne_cst_expected[i], $noinline$longNeCst(long_inputs[i]));
+    }
   }
 }
diff --git a/test/476-checker-ctor-memory-barrier/src/Main.java b/test/476-checker-ctor-memory-barrier/src/Main.java
index 70c5121..e887cd3 100644
--- a/test/476-checker-ctor-memory-barrier/src/Main.java
+++ b/test/476-checker-ctor-memory-barrier/src/Main.java
@@ -261,7 +261,7 @@
 
   /// CHECK-START: void Main.testNewString() inliner (after)
   /// CHECK-NOT:  ConstructorFence
-  /// CHECK:      InvokeStaticOrDirect method_load_kind:string_init
+  /// CHECK:      InvokeStaticOrDirect method_load_kind:StringInit
   /// CHECK-NOT:  ConstructorFence
   /// CHECK-NOT:  InvokeStaticOrDirect
   public static void testNewString() {
diff --git a/test/488-checker-inline-recursive-calls/src/Main.java b/test/488-checker-inline-recursive-calls/src/Main.java
index 87ff3f7..441dbbf 100644
--- a/test/488-checker-inline-recursive-calls/src/Main.java
+++ b/test/488-checker-inline-recursive-calls/src/Main.java
@@ -25,10 +25,10 @@
   }
 
   /// CHECK-START: void Main.doTopCall(boolean) inliner (before)
-  /// CHECK-NOT:   InvokeStaticOrDirect method_load_kind:recursive
+  /// CHECK-NOT:   InvokeStaticOrDirect method_load_kind:Recursive
 
   /// CHECK-START: void Main.doTopCall(boolean) inliner (after)
-  /// CHECK:       InvokeStaticOrDirect method_load_kind:recursive
+  /// CHECK:       InvokeStaticOrDirect method_load_kind:Recursive
   public static void doTopCall(boolean first_call) {
     if (first_call) {
       inline1();
diff --git a/test/552-checker-sharpening/src/Main.java b/test/552-checker-sharpening/src/Main.java
index dd77423..3f81fd6 100644
--- a/test/552-checker-sharpening/src/Main.java
+++ b/test/552-checker-sharpening/src/Main.java
@@ -42,27 +42,27 @@
   }
 
   /// CHECK-START: int Main.testSimple(int) sharpening (before)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_via_method
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:DexCacheViaMethod
 
   /// CHECK-START-ARM: int Main.testSimple(int) sharpening (after)
   /// CHECK-NOT:            ArmDexCacheArraysBase
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:DexCachePcRelative
 
   /// CHECK-START-ARM64: int Main.testSimple(int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:DexCachePcRelative
 
   /// CHECK-START-MIPS: int Main.testSimple(int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:DexCachePcRelative
 
   /// CHECK-START-MIPS64: int Main.testSimple(int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:DexCachePcRelative
 
   /// CHECK-START-X86: int Main.testSimple(int) sharpening (after)
   /// CHECK-NOT:            X86ComputeBaseMethodAddress
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:DexCachePcRelative
 
   /// CHECK-START-X86_64: int Main.testSimple(int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:DexCachePcRelative
 
   /// CHECK-START-ARM: int Main.testSimple(int) dex_cache_array_fixups_arm (after)
   /// CHECK:                ArmDexCacheArraysBase
@@ -78,33 +78,33 @@
   }
 
   /// CHECK-START: int Main.testDiamond(boolean, int) sharpening (before)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_via_method
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:DexCacheViaMethod
 
   /// CHECK-START-ARM: int Main.testDiamond(boolean, int) sharpening (after)
   /// CHECK-NOT:            ArmDexCacheArraysBase
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:DexCachePcRelative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:DexCachePcRelative
 
   /// CHECK-START-ARM64: int Main.testDiamond(boolean, int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:DexCachePcRelative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:DexCachePcRelative
 
   /// CHECK-START-MIPS: int Main.testDiamond(boolean, int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:DexCachePcRelative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:DexCachePcRelative
 
   /// CHECK-START-MIPS64: int Main.testDiamond(boolean, int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:DexCachePcRelative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:DexCachePcRelative
 
   /// CHECK-START-X86: int Main.testDiamond(boolean, int) sharpening (after)
   /// CHECK-NOT:            X86ComputeBaseMethodAddress
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:DexCachePcRelative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:DexCachePcRelative
 
   /// CHECK-START-X86_64: int Main.testDiamond(boolean, int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:DexCachePcRelative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:DexCachePcRelative
 
   /// CHECK-START-ARM: int Main.testDiamond(boolean, int) dex_cache_array_fixups_arm (after)
   /// CHECK:                ArmDexCacheArraysBase
@@ -148,7 +148,7 @@
   /// CHECK-NEXT:           X86ComputeBaseMethodAddress
   /// CHECK-NEXT:           Goto
   /// CHECK:                begin_block
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:DexCachePcRelative
 
   /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (before)
   /// CHECK-NOT:            ArmDexCacheArraysBase
@@ -166,7 +166,7 @@
   /// CHECK-NEXT:           ArmDexCacheArraysBase
   /// CHECK-NEXT:           Goto
   /// CHECK:                begin_block
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:DexCachePcRelative
 
   public static int testLoop(int[] array, int x) {
     // PC-relative bases used by ARM, MIPS and X86 should be pulled before the loop.
@@ -216,33 +216,27 @@
 
   /// CHECK-START-X86: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}}
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry}}
 
   /// CHECK-START-X86_64: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}}
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry}}
 
   /// CHECK-START-ARM: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}}
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry}}
 
   /// CHECK-START-ARM64: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}}
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry}}
 
   /// CHECK-START-MIPS: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}}
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry}}
 
   /// CHECK-START-MIPS64: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}}
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry}}
 
   public static String $noinline$getBootImageString() {
     // Prevent inlining to avoid the string comparison being optimized away.
@@ -285,33 +279,27 @@
 
   /// CHECK-START-X86: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry}} class_name:java.lang.String
 
   /// CHECK-START-X86_64: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry}} class_name:java.lang.String
 
   /// CHECK-START-ARM: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry}} class_name:java.lang.String
 
   /// CHECK-START-ARM64: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry}} class_name:java.lang.String
 
   /// CHECK-START-MIPS: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry}} class_name:java.lang.String
 
   /// CHECK-START-MIPS64: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry}} class_name:java.lang.String
 
   public static Class<?> $noinline$getStringClass() {
     // Prevent inlining to avoid the string comparison being optimized away.
diff --git a/test/570-checker-select/src/Main.java b/test/570-checker-select/src/Main.java
index 3ac6f89..2dad14c 100644
--- a/test/570-checker-select/src/Main.java
+++ b/test/570-checker-select/src/Main.java
@@ -414,6 +414,46 @@
     return a > 0x7FFFFFFFFFFFFFFFL ? x : y;
   }
 
+  /// CHECK-START-ARM: long Main.$noinline$LongNonmatCondCst_LongVarVar4(long, long, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            orrs ip, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:             cmp
+  /// CHECK-NOT:             sbcs
+
+  public static long $noinline$LongNonmatCondCst_LongVarVar4(long a, long x, long y) {
+    return a == 0 ? x : y;
+  }
+
+  /// CHECK-START-ARM: long Main.$noinline$LongNonmatCondCst_LongVarVar5(long, long, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            orrs ip, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:             cmp
+  /// CHECK-NOT:             sbcs
+
+  public static long $noinline$LongNonmatCondCst_LongVarVar5(long a, long x, long y) {
+    return a != 0 ? x : y;
+  }
+
+  /// CHECK-START-ARM: long Main.$noinline$LongNonmatCondCst_LongVarVar6(long, long, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp {{r\d+}}, #0
+  /// CHECK-NOT:             cmp
+  /// CHECK-NOT:             sbcs
+
+  public static long $noinline$LongNonmatCondCst_LongVarVar6(long a, long x, long y) {
+    return a >= 0 ? x : y;
+  }
+
+  /// CHECK-START-ARM: long Main.$noinline$LongNonmatCondCst_LongVarVar7(long, long, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp {{r\d+}}, #0
+  /// CHECK-NOT:             cmp
+  /// CHECK-NOT:             sbcs
+
+  public static long $noinline$LongNonmatCondCst_LongVarVar7(long a, long x, long y) {
+    return a < 0 ? x : y;
+  }
+
   /// CHECK-START: long Main.LongMatCond_LongVarVar(long, long, long, long) register (after)
   /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
   /// CHECK:            <<Sel1:j\d+>> Select [{{j\d+}},{{j\d+}},<<Cond>>]
@@ -688,6 +728,37 @@
 
     assertEqual(7L, $noinline$LongNonmatCondCst_LongVarVar3(2L, 5L, 7L));
 
+    long[] long_inputs = {
+        0L, 1L, -1L, Long.MIN_VALUE, Long.MAX_VALUE, 2L, 0x100000000L, 0xFFFFFFFF00000000L, -9000L};
+
+    long[] expected_1 = {5L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L};
+
+    for (int i = 0; i < long_inputs.length; i++) {
+      assertEqual(expected_1[i], $noinline$LongNonmatCondCst_LongVarVar4(long_inputs[i], 5L, 7L));
+    }
+
+    long[] expected_2 = {7L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L};
+
+    for (int i = 0; i < long_inputs.length; i++) {
+      assertEqual(expected_2[i], $noinline$LongNonmatCondCst_LongVarVar5(long_inputs[i], 5L, 7L));
+    }
+
+    long[] expected_3 = {5L, 5L, 7L, 7L, 5L, 5L, 5L, 7L, 7L};
+
+    for (int i = 0; i < long_inputs.length; i++) {
+      assertEqual(expected_3[i], $noinline$LongNonmatCondCst_LongVarVar6(long_inputs[i], 5L, 7L));
+    }
+
+    long[] expected_4 = {7L, 7L, 5L, 5L, 7L, 7L, 7L, 5L, 5L};
+
+    for (int i = 0; i < long_inputs.length; i++) {
+      assertEqual(expected_4[i], $noinline$LongNonmatCondCst_LongVarVar7(long_inputs[i], 5L, 7L));
+    }
+
+    assertEqual(7L, $noinline$LongNonmatCondCst_LongVarVar7(0L, 5L, 7L));
+    assertEqual(7L, $noinline$LongNonmatCondCst_LongVarVar7(2L, 5L, 7L));
+    assertEqual(5L, $noinline$LongNonmatCondCst_LongVarVar7(-9000L, 5L, 7L));
+
     assertEqual(5, FloatLtNonmatCond_IntVarVar(3, 2, 5, 7));
     assertEqual(7, FloatLtNonmatCond_IntVarVar(2, 3, 5, 7));
     assertEqual(7, FloatLtNonmatCond_IntVarVar(Float.NaN, 2, 5, 7));
diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java
index 520e7c3..3a2145bf 100644
--- a/test/623-checker-loop-regressions/src/Main.java
+++ b/test/623-checker-loop-regressions/src/Main.java
@@ -351,6 +351,35 @@
     }
   }
 
+  /// CHECK-START: void Main.typeConv(byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<One:i\d+>>  IntConstant 1                       loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:b\d+>>  ArrayGet [{{l\d+}},<<Phi>>]         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get>>,<<One>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Add>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.typeConv(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<One:i\d+>>  IntConstant 1                         loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<One>>]          loop:none
+  /// CHECK-DAG: <<Phi1:i\d+>> Phi                                   loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>]           loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: <<Vadd:d\d+>> VecAdd [<<Load>>,<<Repl>>]            loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi1>>,<<Vadd>>] loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>> Phi                                   loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:b\d+>>  ArrayGet [{{l\d+}},<<Phi2>>]          loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get>>,<<One>>]                 loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Add>>]              loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi2>>,<<Cnv>>]  loop:<<Loop2>>      outer_loop:none
+  //
+  // Scalar code in cleanup loop uses correct byte type on array get and type conversion.
+  private static void typeConv(byte[] a, byte[] b) {
+    int len = Math.min(a.length, b.length);
+    for (int i = 0; i < len; i++) {
+      a[i] = (byte) (b[i] + 1);
+    }
+  }
+
   public static void main(String[] args) {
     expectEquals(10, earlyExitFirst(-1));
     for (int i = 0; i <= 10; i++) {
@@ -453,6 +482,17 @@
       expectEquals(40, bt[i]);
     }
 
+    byte[] b1 = new byte[259];  // few extra iterations
+    byte[] b2 = new byte[259];
+    for (int i = 0; i < 259; i++) {
+      b1[i] = 0;
+      b2[i] = (byte) i;
+    }
+    typeConv(b1, b2);
+    for (int i = 0; i < 259; i++) {
+      expectEquals((byte)(i + 1), b1[i]);
+    }
+
     System.out.println("passed");
   }
 
diff --git a/test/640-checker-byte-simd/src/Main.java b/test/640-checker-byte-simd/src/Main.java
index 10b20b8..21d71e8 100644
--- a/test/640-checker-byte-simd/src/Main.java
+++ b/test/640-checker-byte-simd/src/Main.java
@@ -135,8 +135,10 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
-  //
-  // TODO: fill in when supported
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sar2() {
     for (int i = 0; i < 128; i++)
       a[i] >>= 2;
@@ -147,9 +149,9 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after)
-  //
-  // TODO: fill in when supported
+  // TODO: would need signess flip.
+  /// CHECK-START: void Main.shr2() loop_optimization (after)
+  /// CHECK-NOT: VecUShr
   static void shr2() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= 2;
diff --git a/test/640-checker-char-simd/src/Main.java b/test/640-checker-char-simd/src/Main.java
index 0628b36..89d4b6b 100644
--- a/test/640-checker-char-simd/src/Main.java
+++ b/test/640-checker-char-simd/src/Main.java
@@ -134,9 +134,9 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
-  //
-  // TODO: fill in when supported
+  // TODO: would need signess flip.
+  /// CHECK-START: void Main.sar2() loop_optimization (after)
+  /// CHECK-NOT: VecShr
   static void sar2() {
     for (int i = 0; i < 128; i++)
       a[i] >>= 2;
@@ -148,8 +148,10 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after)
-  //
-  // TODO: fill in when supported
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void shr2() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= 2;
diff --git a/test/640-checker-double-simd/src/Main.java b/test/640-checker-double-simd/src/Main.java
index 0d4f87a..5709b5d 100644
--- a/test/640-checker-double-simd/src/Main.java
+++ b/test/640-checker-double-simd/src/Main.java
@@ -122,8 +122,10 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.conv(long[]) loop_optimization (after)
+  /// CHECK-NOT: VecLoad
+  /// CHECK-NOT: VecStore
   //
-  // TODO: fill in when supported
+  // TODO: fill in when long2double is supported
   static void conv(long[] b) {
     for (int i = 0; i < 128; i++)
       a[i] = b[i];
diff --git a/test/640-checker-int-simd/src/Main.java b/test/640-checker-int-simd/src/Main.java
index 97048eb..9ee553c 100644
--- a/test/640-checker-int-simd/src/Main.java
+++ b/test/640-checker-int-simd/src/Main.java
@@ -136,8 +136,10 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
-  //
-  // TODO: fill in when supported
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sar2() {
     for (int i = 0; i < 128; i++)
       a[i] >>= 2;
@@ -149,8 +151,10 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after)
-  //
-  // TODO: fill in when supported
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void shr2() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= 2;
diff --git a/test/640-checker-long-simd/src/Main.java b/test/640-checker-long-simd/src/Main.java
index e42c716..8f6af9d 100644
--- a/test/640-checker-long-simd/src/Main.java
+++ b/test/640-checker-long-simd/src/Main.java
@@ -134,8 +134,10 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
-  //
-  // TODO: fill in when supported
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sar2() {
     for (int i = 0; i < 128; i++)
       a[i] >>= 2;
@@ -147,8 +149,10 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after)
-  //
-  // TODO: fill in when supported
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void shr2() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= 2;
diff --git a/test/640-checker-short-simd/src/Main.java b/test/640-checker-short-simd/src/Main.java
index 241f8e6..f62c726 100644
--- a/test/640-checker-short-simd/src/Main.java
+++ b/test/640-checker-short-simd/src/Main.java
@@ -135,8 +135,10 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
-  //
-  // TODO: fill in when supported
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sar2() {
     for (int i = 0; i < 128; i++)
       a[i] >>= 2;
@@ -147,9 +149,9 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after)
-  //
-  // TODO: fill in when supported
+  // TODO: would need signess flip.
+  /// CHECK-START: void Main.shr2() loop_optimization (after)
+  /// CHECK-NOT: VecUShr
   static void shr2() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= 2;
diff --git a/test/645-checker-abs-simd/src/Main.java b/test/645-checker-abs-simd/src/Main.java
index 76850ab..5a63d9f 100644
--- a/test/645-checker-abs-simd/src/Main.java
+++ b/test/645-checker-abs-simd/src/Main.java
@@ -22,6 +22,67 @@
   private static final int SPQUIET = 1 << 22;
   private static final long DPQUIET = 1L << 51;
 
+  /// CHECK-START: void Main.doitByte(byte[]) loop_optimization (before)
+  /// CHECK-DAG: Phi                                       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.doitByte(byte[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                       loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                   loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                    loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                  loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                       loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                  loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                  loop:<<Loop2>>      outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  private static void doitByte(byte[] x) {
+    for (int i = 0; i < x.length; i++) {
+      x[i] = (byte) Math.abs(x[i]);
+    }
+  }
+
+  /// CHECK-START: void Main.doitChar(char[]) loop_optimization (before)
+  /// CHECK-DAG: Phi                                       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.doitChar(char[]) loop_optimization (after)
+  /// CHECK-NOT: VecAbs
+  private static void doitChar(char[] x) {
+    // Basically a nop due to zero extension.
+    for (int i = 0; i < x.length; i++) {
+      x[i] = (char) Math.abs(x[i]);
+    }
+  }
+
+  /// CHECK-START: void Main.doitShort(short[]) loop_optimization (before)
+  /// CHECK-DAG: Phi                                       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.doitShort(short[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                       loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                   loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                    loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                  loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                       loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                  loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                  loop:<<Loop2>>      outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  private static void doitShort(short[] x) {
+    for (int i = 0; i < x.length; i++) {
+      x[i] = (short) Math.abs(x[i]);
+    }
+  }
+
   /// CHECK-START: void Main.doitInt(int[]) loop_optimization (before)
   /// CHECK-DAG: Phi                                       loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArrayGet                                  loop:<<Loop>>      outer_loop:none
@@ -52,8 +113,16 @@
   /// CHECK-DAG: ArraySet                                   loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.doitLong(long[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                        loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                    loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                     loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                   loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                        loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                   loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsLong loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                   loop:<<Loop2>>      outer_loop:none
   //
-  // TODO: Not supported yet.
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   private static void doitLong(long[] x) {
     for (int i = 0; i < x.length; i++) {
       x[i] = Math.abs(x[i]);
@@ -90,8 +159,16 @@
   /// CHECK-DAG: ArraySet                                     loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.doitDouble(double[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                          loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                      loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                       loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                     loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                          loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                     loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsDouble loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                     loop:<<Loop2>>      outer_loop:none
   //
-  // TODO: Not supported yet.
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   private static void doitDouble(double[] x) {
     for (int i = 0; i < x.length; i++) {
       x[i] = Math.abs(x[i]);
@@ -99,6 +176,31 @@
   }
 
   public static void main(String[] args) {
+    // Bytes, chars, shorts.
+    byte[] xb = new byte[256];
+    for (int i = 0; i < 256; i++) {
+      xb[i] = (byte) i;
+    }
+    doitByte(xb);
+    for (int i = 0; i < 256; i++) {
+      expectEquals32((byte) Math.abs((byte) i), xb[i]);
+    }
+    char[] xc = new char[1024 * 64];
+    for (int i = 0; i < 1024 * 64; i++) {
+      xc[i] = (char) i;
+    }
+    doitChar(xc);
+    for (int i = 0; i < 1024 *64; i++) {
+      expectEquals32((char) Math.abs((char) i), xc[i]);
+    }
+    short[] xs = new short[1024 * 64];
+    for (int i = 0; i < 1024 * 64; i++) {
+      xs[i] = (short) i;
+    }
+    doitShort(xs);
+    for (int i = 0; i < 1024 * 64; i++) {
+      expectEquals32((short) Math.abs((short) i), xs[i]);
+    }
     // Set up minint32, maxint32 and some others.
     int[] xi = new int[8];
     xi[0] = 0x80000000;
diff --git a/test/651-checker-byte-simd-minmax/src/Main.java b/test/651-checker-byte-simd-minmax/src/Main.java
index 8211ace..fe45807 100644
--- a/test/651-checker-byte-simd-minmax/src/Main.java
+++ b/test/651-checker-byte-simd-minmax/src/Main.java
@@ -27,9 +27,12 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  // TODO: narrow type vectorization.
-  /// CHECK-START: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-NOT: VecMin
+  /// CHECK-START-ARM64: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMin(byte[] x, byte[] y, byte[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -37,6 +40,30 @@
     }
   }
 
+  /// CHECK-START-ARM64: void Main.doitMinUnsigned(byte[], byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                     loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:i\d+>>  InvokeStaticOrDirect [<<And1>>,<<And2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.doitMinUnsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMinUnsigned(byte[] x, byte[] y, byte[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (byte) Math.min(y[i] & 0xff, z[i] & 0xff);
+    }
+  }
+
   /// CHECK-START: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (before)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
@@ -45,9 +72,12 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  // TODO: narrow type vectorization.
-  /// CHECK-START: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-NOT: VecMax
+  /// CHECK-START-ARM64: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMax(byte[] x, byte[] y, byte[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -55,6 +85,30 @@
     }
   }
 
+  /// CHECK-START-ARM64: void Main.doitMaxUnsigned(byte[], byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                     loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:i\d+>>  InvokeStaticOrDirect [<<And1>>,<<And2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.doitMaxUnsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMaxUnsigned(byte[] x, byte[] y, byte[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (byte) Math.max(y[i] & 0xff, z[i] & 0xff);
+    }
+  }
+
   public static void main(String[] args) {
     // Initialize cross-values for all possible values.
     int total = 256 * 256;
@@ -77,11 +131,21 @@
       byte expected = (byte) Math.min(y[i], z[i]);
       expectEquals(expected, x[i]);
     }
+    doitMinUnsigned(x, y, z);
+    for (int i = 0; i < total; i++) {
+      byte expected = (byte) Math.min(y[i] & 0xff, z[i] & 0xff);
+      expectEquals(expected, x[i]);
+    }
     doitMax(x, y, z);
     for (int i = 0; i < total; i++) {
       byte expected = (byte) Math.max(y[i], z[i]);
       expectEquals(expected, x[i]);
     }
+    doitMaxUnsigned(x, y, z);
+    for (int i = 0; i < total; i++) {
+      byte expected = (byte) Math.max(y[i] & 0xff, z[i] & 0xff);
+      expectEquals(expected, x[i]);
+    }
 
     System.out.println("passed");
   }
diff --git a/test/651-checker-char-simd-minmax/src/Main.java b/test/651-checker-char-simd-minmax/src/Main.java
index 5ce7b94..e2998da 100644
--- a/test/651-checker-char-simd-minmax/src/Main.java
+++ b/test/651-checker-char-simd-minmax/src/Main.java
@@ -27,9 +27,12 @@
   /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  // TODO: narrow type vectorization.
-  /// CHECK-START: void Main.doitMin(char[], char[], char[]) loop_optimization (after)
-  /// CHECK-NOT: VecMin
+  /// CHECK-START-ARM64: void Main.doitMin(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMin(char[] x, char[] y, char[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -45,9 +48,12 @@
   /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  // TODO: narrow type vectorization.
-  /// CHECK-START: void Main.doitMax(char[], char[], char[]) loop_optimization (after)
-  /// CHECK-NOT: VecMax
+  /// CHECK-START-ARM64: void Main.doitMax(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMax(char[] x, char[] y, char[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
diff --git a/test/651-checker-double-simd-minmax/src/Main.java b/test/651-checker-double-simd-minmax/src/Main.java
index e1711ae..cf04f85 100644
--- a/test/651-checker-double-simd-minmax/src/Main.java
+++ b/test/651-checker-double-simd-minmax/src/Main.java
@@ -48,7 +48,7 @@
   /// CHECK-DAG: <<Max:d\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxDoubleDouble loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   //
-  // TODO-x86: 0.0 vs -0.0?
+  // TODO x86: 0.0 vs -0.0?
   //
   /// CHECK-START-ARM64: void Main.doitMax(double[], double[], double[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
diff --git a/test/651-checker-int-simd-minmax/src/Main.java b/test/651-checker-int-simd-minmax/src/Main.java
index 4e05a9d..6cee7b5 100644
--- a/test/651-checker-int-simd-minmax/src/Main.java
+++ b/test/651-checker-int-simd-minmax/src/Main.java
@@ -30,7 +30,7 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMin(int[] x, int[] y, int[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
@@ -50,7 +50,7 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMax(int[] x, int[] y, int[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
diff --git a/test/651-checker-short-simd-minmax/src/Main.java b/test/651-checker-short-simd-minmax/src/Main.java
index f34f526..09485a2 100644
--- a/test/651-checker-short-simd-minmax/src/Main.java
+++ b/test/651-checker-short-simd-minmax/src/Main.java
@@ -27,9 +27,12 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  // TODO: narrow type vectorization.
-  /// CHECK-START: void Main.doitMin(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-NOT: VecMin
+  /// CHECK-START-ARM64: void Main.doitMin(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMin(short[] x, short[] y, short[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -37,6 +40,30 @@
     }
   }
 
+  /// CHECK-START-ARM64: void Main.doitMinUnsigned(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<IMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<IMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<IMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:i\d+>>  InvokeStaticOrDirect [<<And1>>,<<And2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.doitMinUnsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMinUnsigned(short[] x, short[] y, short[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (short) Math.min(y[i] & 0xffff, z[i] & 0xffff);
+    }
+  }
+
   /// CHECK-START: void Main.doitMax(short[], short[], short[]) loop_optimization (before)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
@@ -45,9 +72,12 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  // TODO: narrow type vectorization.
-  /// CHECK-START: void Main.doitMax(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-NOT: VecMax
+  /// CHECK-START-ARM64: void Main.doitMax(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMax(short[] x, short[] y, short[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -55,6 +85,30 @@
     }
   }
 
+  /// CHECK-START-ARM64: void Main.doitMaxUnsigned(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<IMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<IMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<IMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:i\d+>>  InvokeStaticOrDirect [<<And1>>,<<And2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.doitMaxUnsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMaxUnsigned(short[] x, short[] y, short[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (short) Math.max(y[i] & 0xffff, z[i] & 0xffff);
+    }
+  }
+
   public static void main(String[] args) {
     short[] interesting = {
       (short) 0x0000, (short) 0x0001, (short) 0x007f,
@@ -91,11 +145,21 @@
       short expected = (short) Math.min(y[i], z[i]);
       expectEquals(expected, x[i]);
     }
+    doitMinUnsigned(x, y, z);
+    for (int i = 0; i < total; i++) {
+      short expected = (short) Math.min(y[i] & 0xffff, z[i] & 0xffff);
+      expectEquals(expected, x[i]);
+    }
     doitMax(x, y, z);
     for (int i = 0; i < total; i++) {
       short expected = (short) Math.max(y[i], z[i]);
       expectEquals(expected, x[i]);
     }
+    doitMaxUnsigned(x, y, z);
+    for (int i = 0; i < total; i++) {
+      short expected = (short) Math.max(y[i] & 0xffff, z[i] & 0xffff);
+      expectEquals(expected, x[i]);
+    }
 
     System.out.println("passed");
   }
diff --git a/test/652-deopt-intrinsic/expected.txt b/test/652-deopt-intrinsic/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/652-deopt-intrinsic/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/652-deopt-intrinsic/info.txt b/test/652-deopt-intrinsic/info.txt
new file mode 100644
index 0000000..58a90fa
--- /dev/null
+++ b/test/652-deopt-intrinsic/info.txt
@@ -0,0 +1,2 @@
+Regression test for the interpreter/JIT, where the interpreter used to not
+record inline caches when seeing an intrinsic.
diff --git a/test/652-deopt-intrinsic/src/Main.java b/test/652-deopt-intrinsic/src/Main.java
new file mode 100644
index 0000000..a82580c
--- /dev/null
+++ b/test/652-deopt-intrinsic/src/Main.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+    loop();
+    ensureJitCompiled(Main.class, "$noinline$doCall");
+    loop();
+  }
+
+  public static void loop() {
+    Main m = new Main();
+    for (int i = 0; i < 5000; i++) {
+      $noinline$doCall("foo");
+      $noinline$doCall(m);
+      if (numberOfDeoptimizations() != 0) {
+        throw new Error("Unexpected deoptimizations");
+      }
+    }
+  }
+
+  public static boolean $noinline$doCall(Object foo) {
+    return foo.equals(Main.class);
+  }
+
+  public static native int numberOfDeoptimizations();
+  public static native void ensureJitCompiled(Class<?> cls, String methodName);
+}
diff --git a/test/987-stack-trace-dumping/expected.txt b/test/987-stack-trace-dumping/expected.txt
deleted file mode 100644
index e69de29..0000000
--- a/test/987-stack-trace-dumping/expected.txt
+++ /dev/null
diff --git a/test/987-stack-trace-dumping/info.txt b/test/987-stack-trace-dumping/info.txt
deleted file mode 100644
index e69de29..0000000
--- a/test/987-stack-trace-dumping/info.txt
+++ /dev/null
diff --git a/test/987-stack-trace-dumping/run b/test/987-stack-trace-dumping/run
deleted file mode 100755
index dee3e8b..0000000
--- a/test/987-stack-trace-dumping/run
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-#
-# Copyright 2017 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Ask for stack traces to be dumped to a file rather than to stdout.
-./default-run "$@" --set-stack-trace-dump-dir
diff --git a/test/987-stack-trace-dumping/src/Main.java b/test/987-stack-trace-dumping/src/Main.java
deleted file mode 100644
index d1e8a1b..0000000
--- a/test/987-stack-trace-dumping/src/Main.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.File;
-
-public class Main {
-    public static void main(String[] args) throws Exception {
-        if (args.length != 3) {
-            throw new AssertionError("Unexpected number of args: " + args.length);
-        }
-
-        if (!"--stack-trace-dir".equals(args[1])) {
-            throw new AssertionError("Unexpected argument in position 1: " + args[1]);
-        }
-
-        // Send ourselves signal 3, which forces stack traces to be written to disk.
-        android.system.Os.kill(android.system.Os.getpid(), 3);
-
-        File[] files = null;
-        final String stackTraceDir = args[2];
-        for (int i = 0; i < 5; ++i) {
-            // Give the signal handler some time to run and dump traces - up to a maximum
-            // of 5 seconds. This is a kludge, but it's hard to do this without using things
-            // like inotify / WatchService and the like.
-            Thread.sleep(1000);
-
-            files = (new File(stackTraceDir)).listFiles();
-            if (files != null && files.length == 1) {
-                break;
-            }
-        }
-
-
-        if (files == null) {
-            throw new AssertionError("Gave up waiting for traces: " + java.util.Arrays.toString(files));
-        }
-
-        final String fileName = files[0].getName();
-        if (!fileName.startsWith("anr-pid")) {
-            throw new AssertionError("Unexpected prefix: " + fileName);
-        }
-
-        if (!fileName.contains(String.valueOf(android.system.Os.getpid()))) {
-            throw new AssertionError("File name does not contain process PID: " + fileName);
-        }
-    }
-}
diff --git a/test/Android.bp b/test/Android.bp
index 1679669..599b011 100644
--- a/test/Android.bp
+++ b/test/Android.bp
@@ -51,9 +51,9 @@
     // These really are gtests, but the gtest library comes from libart-gtest.so
     gtest: false,
     defaults: [
-        "art_defaults",
-        "art_debug_defaults",
         "art_test_defaults",
+        "art_debug_defaults",
+        "art_defaults",
     ],
 
     shared_libs: [
@@ -128,8 +128,8 @@
     name: "libart-gtest-defaults",
     host_supported: true,
     defaults: [
-        "art_defaults",
         "art_debug_defaults",
+        "art_defaults",
     ],
     shared_libs: [
         "libartd",
@@ -202,8 +202,8 @@
 cc_defaults {
     name: "libartagent-defaults",
     defaults: [
-        "art_defaults",
         "art_test_defaults",
+        "art_defaults",
     ],
     shared_libs: [
         "libbacktrace",
@@ -234,8 +234,8 @@
     name: "libartagentd",
     srcs: ["900-hello-plugin/load_unload.cc"],
     defaults: [
-        "libartagent-defaults",
         "art_debug_defaults",
+        "libartagent-defaults",
     ],
     shared_libs: ["libartd"],
 }
@@ -313,8 +313,8 @@
 art_cc_test_library {
     name: "libtiagentd",
     defaults: [
-        "libtiagent-defaults",
         "art_debug_defaults",
+        "libtiagent-defaults",
     ],
     shared_libs: ["libartd"],
 }
@@ -340,8 +340,8 @@
 art_cc_test_library {
     name: "libtistressd",
     defaults: [
-        "libtistress-defaults",
         "art_debug_defaults",
+        "libtistress-defaults",
     ],
     shared_libs: ["libartd"],
 }
@@ -355,8 +355,8 @@
 cc_defaults {
     name: "libarttest-defaults",
     defaults: [
-        "art_defaults",
         "art_test_defaults",
+        "art_defaults",
     ],
     srcs: [
         "common/runtime_state.cc",
@@ -421,8 +421,8 @@
 art_cc_test_library {
     name: "libarttestd",
     defaults: [
-        "libarttest-defaults",
         "art_debug_defaults",
+        "libarttest-defaults",
     ],
     shared_libs: ["libartd"],
 }
@@ -431,9 +431,9 @@
     name: "libnativebridgetest",
     shared_libs: ["libart"],
     defaults: [
-        "art_defaults",
-        "art_debug_defaults",
         "art_test_defaults",
+        "art_debug_defaults",
+        "art_defaults",
     ],
     srcs: ["115-native-bridge/nativebridge.cc"],
     target: {
diff --git a/test/Android.run-test-jvmti-java-library.mk b/test/Android.run-test-jvmti-java-library.mk
index c480be5..da28b4c 100644
--- a/test/Android.run-test-jvmti-java-library.mk
+++ b/test/Android.run-test-jvmti-java-library.mk
@@ -151,4 +151,8 @@
   $(eval $(call GEN_JVMTI_RUN_TEST_GENERATED_FILE,$(NR))))
 LOCAL_JAVA_RESOURCE_FILES := $(JVMTI_RUN_TEST_GENERATED_FILES)
 
+# We only want to depend on libcore.
+LOCAL_NO_STANDARD_LIBRARIES := true
+LOCAL_JAVA_LIBRARIES := core-all
+
 include $(BUILD_JAVA_LIBRARY)
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
index b683a27..d2cfbff 100644
--- a/test/common/runtime_state.cc
+++ b/test/common/runtime_state.cc
@@ -238,4 +238,8 @@
   return method->GetCounter();
 }
 
+extern "C" JNIEXPORT int JNICALL Java_Main_numberOfDeoptimizations(JNIEnv*, jclass) {
+  return Runtime::Current()->GetNumberOfDeoptimizations();
+}
+
 }  // namespace art
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index a89fe5b..ca52a99 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -668,12 +668,6 @@
 # Note: this is required as envsetup right now exports detect_leaks=0.
 RUN_TEST_ASAN_OPTIONS=""
 
-# JVMTI has a mismatch of malloc with delete. b/38322765
-if [ "x$RUN_TEST_ASAN_OPTIONS" != "x" ] ; then
-  RUN_TEST_ASAN_OPTIONS="${RUN_TEST_ASAN_OPTIONS}:"
-fi
-RUN_TEST_ASAN_OPTIONS="${RUN_TEST_ASAN_OPTIONS}alloc_dealloc_mismatch=0"
-
 # Multiple shutdown leaks. b/38341789
 if [ "x$RUN_TEST_ASAN_OPTIONS" != "x" ] ; then
   RUN_TEST_ASAN_OPTIONS="${RUN_TEST_ASAN_OPTIONS}:"
diff --git a/test/knownfailures.json b/test/knownfailures.json
index 4b44df7..96c2967 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -651,8 +651,7 @@
             "969-iface-super",
             "981-dedup-original-dex",
             "984-obsolete-invoke",
-            "985-re-obsolete",
-            "987-stack-trace-dumping"
+            "985-re-obsolete"
         ],
         "description": "The tests above fail with --build-with-javac-dx.",
         "env_vars": {"ANDROID_COMPILE_WITH_JACK": "false"},
diff --git a/tools/ahat/README.txt b/tools/ahat/README.txt
index 133426f..3049871 100644
--- a/tools/ahat/README.txt
+++ b/tools/ahat/README.txt
@@ -75,6 +75,9 @@
  * Instance.isRoot and Instance.getRootTypes.
 
 Release History:
+ 1.2 Pending
+   Simplify presentation of sample path from gc root.
+
  1.1 Feb 21, 2017
    Show java.lang.ref.Reference referents as "unreachable" instead of null.
 
diff --git a/tools/ahat/src/ObjectHandler.java b/tools/ahat/src/ObjectHandler.java
index 2e0ae6e..b1d7904 100644
--- a/tools/ahat/src/ObjectHandler.java
+++ b/tools/ahat/src/ObjectHandler.java
@@ -19,7 +19,6 @@
 import com.android.ahat.heapdump.AhatArrayInstance;
 import com.android.ahat.heapdump.AhatClassInstance;
 import com.android.ahat.heapdump.AhatClassObj;
-import com.android.ahat.heapdump.AhatHeap;
 import com.android.ahat.heapdump.AhatInstance;
 import com.android.ahat.heapdump.AhatSnapshot;
 import com.android.ahat.heapdump.Diff;
@@ -29,7 +28,6 @@
 import com.android.ahat.heapdump.Value;
 import java.io.IOException;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.List;
 import java.util.Objects;
 
@@ -249,47 +247,16 @@
   private void printGcRootPath(Doc doc, Query query, AhatInstance inst) {
     doc.section("Sample Path from GC Root");
     List<PathElement> path = inst.getPathFromGcRoot();
-
-    // Add a dummy PathElement as a marker for the root.
-    final PathElement root = new PathElement(null, null);
-    path.add(0, root);
-
-    HeapTable.TableConfig<PathElement> table = new HeapTable.TableConfig<PathElement>() {
-      public String getHeapsDescription() {
-        return "Bytes Retained by Heap (Dominators Only)";
-      }
-
-      public long getSize(PathElement element, AhatHeap heap) {
-        if (element == root) {
-          return heap.getSize();
-        }
-        if (element.isDominator) {
-          return element.instance.getRetainedSize(heap);
-        }
-        return 0;
-      }
-
-      public List<HeapTable.ValueConfig<PathElement>> getValueConfigs() {
-        HeapTable.ValueConfig<PathElement> value = new HeapTable.ValueConfig<PathElement>() {
-          public String getDescription() {
-            return "Path Element";
-          }
-
-          public DocString render(PathElement element) {
-            if (element == root) {
-              return DocString.link(DocString.uri("rooted"), DocString.text("ROOT"));
-            } else {
-              DocString label = DocString.text("→ ");
-              label.append(Summarizer.summarize(element.instance));
-              label.append(element.field);
-              return label;
-            }
-          }
-        };
-        return Collections.singletonList(value);
-      }
+    doc.table(new Column(""), new Column("Path Element"));
+    doc.row(DocString.text("(rooted)"),
+        DocString.link(DocString.uri("root"), DocString.text("ROOT")));
+    for (PathElement element : path) {
+      DocString label = DocString.text("→ ");
+      label.append(Summarizer.summarize(element.instance));
+      label.append(element.field);
+      doc.row(DocString.text(element.isDominator ? "(dominator)" : ""), label);
     };
-    HeapTable.render(doc, query, DOMINATOR_PATH_ID, table, mSnapshot, path);
+    doc.end();
   }
 
   public void printDominatedObjects(Doc doc, Query query, AhatInstance inst) {
diff --git a/tools/ahat/src/heapdump/AhatClassInstance.java b/tools/ahat/src/heapdump/AhatClassInstance.java
index 273530a..c10d604 100644
--- a/tools/ahat/src/heapdump/AhatClassInstance.java
+++ b/tools/ahat/src/heapdump/AhatClassInstance.java
@@ -154,10 +154,7 @@
   }
 
   @Override public AhatInstance getAssociatedBitmapInstance() {
-    if (isInstanceOfClass("android.graphics.Bitmap")) {
-      return this;
-    }
-    return null;
+    return getBitmapInfo() == null ? null : this;
   }
 
   @Override public boolean isClassInstance() {
@@ -178,14 +175,27 @@
    * Returns null if the field value is null, not a byte[] or could not be read.
    */
   private byte[] getByteArrayField(String fieldName) {
-    Value value = getField(fieldName);
-    if (!value.isAhatInstance()) {
-      return null;
-    }
-    return value.asAhatInstance().asByteArray();
+    AhatInstance field = getRefField(fieldName);
+    return field == null ? null : field.asByteArray();
   }
 
-  public BufferedImage asBitmap() {
+  private static class BitmapInfo {
+    public final int width;
+    public final int height;
+    public final byte[] buffer;
+
+    public BitmapInfo(int width, int height, byte[] buffer) {
+      this.width = width;
+      this.height = height;
+      this.buffer = buffer;
+    }
+  }
+
+  /**
+   * Return bitmap info for this object, or null if no appropriate bitmap
+   * info is available.
+   */
+  private BitmapInfo getBitmapInfo() {
     if (!isInstanceOfClass("android.graphics.Bitmap")) {
       return null;
     }
@@ -205,20 +215,34 @@
       return null;
     }
 
+    if (buffer.length < 4 * height * width) {
+      return null;
+    }
+
+    return new BitmapInfo(width, height, buffer);
+
+  }
+
+  public BufferedImage asBitmap() {
+    BitmapInfo info = getBitmapInfo();
+    if (info == null) {
+      return null;
+    }
+
     // Convert the raw data to an image
     // Convert BGRA to ABGR
-    int[] abgr = new int[height * width];
+    int[] abgr = new int[info.height * info.width];
     for (int i = 0; i < abgr.length; i++) {
       abgr[i] = (
-          (((int) buffer[i * 4 + 3] & 0xFF) << 24)
-          + (((int) buffer[i * 4 + 0] & 0xFF) << 16)
-          + (((int) buffer[i * 4 + 1] & 0xFF) << 8)
-          + ((int) buffer[i * 4 + 2] & 0xFF));
+          (((int) info.buffer[i * 4 + 3] & 0xFF) << 24)
+          + (((int) info.buffer[i * 4 + 0] & 0xFF) << 16)
+          + (((int) info.buffer[i * 4 + 1] & 0xFF) << 8)
+          + ((int) info.buffer[i * 4 + 2] & 0xFF));
     }
 
     BufferedImage bitmap = new BufferedImage(
-        width, height, BufferedImage.TYPE_4BYTE_ABGR);
-    bitmap.setRGB(0, 0, width, height, abgr, 0, width);
+        info.width, info.height, BufferedImage.TYPE_4BYTE_ABGR);
+    bitmap.setRGB(0, 0, info.width, info.height, abgr, 0, info.width);
     return bitmap;
   }
 }