Merge "Add array initializer to jfuzz' potential output."
diff --git a/Android.mk b/Android.mk
index 7eb0bf9..2df1b13 100644
--- a/Android.mk
+++ b/Android.mk
@@ -388,6 +388,7 @@
 LOCAL_REQUIRED_MODULES += \
     libartd \
     libartd-compiler \
+    libopenjdkd \
     libopenjdkjvmd \
     libopenjdkjvmtid \
 
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 2db99cd..fba1136 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -22,12 +22,14 @@
 #include "art_method-inl.h"
 #include "base/logging.h"
 #include "base/mutex.h"
+#include "bytecode_utils.h"
 #include "compiled_method.h"
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
 #include "mirror/dex_cache.h"
+#include "quicken_info.h"
 #include "thread-current-inl.h"
 
 namespace art {
@@ -110,13 +112,9 @@
 
 void DexCompiler::Compile() {
   DCHECK_EQ(dex_to_dex_compilation_level_, DexToDexCompilationLevel::kOptimize);
-  const DexFile::CodeItem* code_item = unit_.GetCodeItem();
-  const uint16_t* insns = code_item->insns_;
-  const uint32_t insns_size = code_item->insns_size_in_code_units_;
-  Instruction* inst = const_cast<Instruction*>(Instruction::At(insns));
-
-  for (uint32_t dex_pc = 0; dex_pc < insns_size;
-       inst = const_cast<Instruction*>(inst->Next()), dex_pc = inst->GetDexPc(insns)) {
+  for (CodeItemIterator it(*unit_.GetCodeItem()); !it.Done(); it.Advance()) {
+    Instruction* inst = const_cast<Instruction*>(&it.CurrentInstruction());
+    const uint32_t dex_pc = it.CurrentDexPc();
     switch (inst->Opcode()) {
       case Instruction::RETURN_VOID:
         CompileReturnVoid(inst, dex_pc);
@@ -124,6 +122,11 @@
 
       case Instruction::CHECK_CAST:
         inst = CompileCheckCast(inst, dex_pc);
+        if (inst->Opcode() == Instruction::NOP) {
+          // We turned the CHECK_CAST into two NOPs, avoid visiting the second NOP twice since this
+          // would add 2 quickening info entries.
+          it.Advance();
+        }
         break;
 
       case Instruction::IGET:
@@ -190,7 +193,14 @@
         CompileInvokeVirtual(inst, dex_pc, Instruction::INVOKE_VIRTUAL_RANGE_QUICK, true);
         break;
 
+      case Instruction::NOP:
+        // We need to differentiate between check cast inserted NOP and normal NOP, put an invalid
+        // index in the map for normal nops. This should be rare in real code.
+        quickened_info_.push_back(QuickenedInfo(dex_pc, DexFile::kDexNoIndex16));
+        break;
+
       default:
+        DCHECK(!inst->IsQuickened());
         // Nothing to do.
         break;
     }
@@ -348,10 +358,26 @@
     }
 
     // Create a `CompiledMethod`, with the quickened information in the vmap table.
-    Leb128EncodingVector<> builder;
+    if (kIsDebugBuild) {
+      // Double check that the counts line up with the size of the quicken info.
+      size_t quicken_count = 0;
+      for (CodeItemIterator it(*code_item); !it.Done(); it.Advance()) {
+        if (QuickenInfoTable::NeedsIndexForInstruction(&it.CurrentInstruction())) {
+          ++quicken_count;
+        }
+      }
+      CHECK_EQ(quicken_count, dex_compiler.GetQuickenedInfo().size());
+    }
+    std::vector<uint8_t> quicken_data;
     for (QuickenedInfo info : dex_compiler.GetQuickenedInfo()) {
-      builder.PushBackUnsigned(info.dex_pc);
-      builder.PushBackUnsigned(info.dex_member_index);
+      // Dex pc is not serialized, only used for checking the instructions. Since we access the
+      // array based on the index of the quickened instruction, the indexes must line up perfectly.
+      // The reader side uses the NeedsIndexForInstruction function too.
+      const Instruction* inst = Instruction::At(code_item->insns_ + info.dex_pc);
+      CHECK(QuickenInfoTable::NeedsIndexForInstruction(inst)) << inst->Opcode();
+      // Add the index.
+      quicken_data.push_back(static_cast<uint8_t>(info.dex_member_index >> 0));
+      quicken_data.push_back(static_cast<uint8_t>(info.dex_member_index >> 8));
     }
     InstructionSet instruction_set = driver->GetInstructionSet();
     if (instruction_set == kThumb2) {
@@ -366,7 +392,7 @@
         0,
         0,
         ArrayRef<const uint8_t>(),                   // method_info
-        ArrayRef<const uint8_t>(builder.GetData()),  // vmap_table
+        ArrayRef<const uint8_t>(quicken_data),       // vmap_table
         ArrayRef<const uint8_t>(),                   // cfi data
         ArrayRef<const LinkerPatch>());
   }
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 1c73dfa..4f1fef9 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -737,16 +737,82 @@
   return IsBootClassLoaderClass(klass) && !IsInBootImage(klass);
 }
 
+// This visitor follows the references of an instance, recursively then prune this class
+// if a type of any field is pruned.
+class ImageWriter::PruneObjectReferenceVisitor {
+ public:
+  PruneObjectReferenceVisitor(ImageWriter* image_writer,
+                        bool* early_exit,
+                        std::unordered_set<mirror::Object*>* visited,
+                        bool* result)
+      : image_writer_(image_writer), early_exit_(early_exit), visited_(visited), result_(result) {}
+
+  ALWAYS_INLINE void VisitRootIfNonNull(
+      mirror::CompressedReference<mirror::Object>* root ATTRIBUTE_UNUSED) const
+      REQUIRES_SHARED(Locks::mutator_lock_) { }
+
+  ALWAYS_INLINE void VisitRoot(
+      mirror::CompressedReference<mirror::Object>* root ATTRIBUTE_UNUSED) const
+      REQUIRES_SHARED(Locks::mutator_lock_) { }
+
+  ALWAYS_INLINE void operator() (ObjPtr<mirror::Object> obj,
+                                 MemberOffset offset,
+                                 bool is_static ATTRIBUTE_UNUSED) const
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    mirror::Object* ref =
+        obj->GetFieldObject<mirror::Object, kVerifyNone, kWithoutReadBarrier>(offset);
+    if (ref == nullptr || visited_->find(ref) != visited_->end()) {
+      return;
+    }
+
+    ObjPtr<mirror::Class> klass = ref->IsClass() ? ref->AsClass() : ref->GetClass();
+    if (klass == mirror::Method::StaticClass() || klass == mirror::Constructor::StaticClass()) {
+      // Prune all classes using reflection because the content they held will not be fixup.
+      *result_ = true;
+    }
+
+    // Record the object visited in case of circular reference.
+    visited_->emplace(ref);
+    if (ref->IsClass()) {
+      *result_ = *result_ ||
+          image_writer_->PruneAppImageClassInternal(ref->AsClass(), early_exit_, visited_);
+    } else {
+      *result_ = *result_ ||
+          image_writer_->PruneAppImageClassInternal(klass, early_exit_, visited_);
+      ref->VisitReferences(*this, *this);
+    }
+    // Clean up before exit for next call of this function.
+    visited_->erase(ref);
+  }
+
+  ALWAYS_INLINE void operator() (ObjPtr<mirror::Class> klass ATTRIBUTE_UNUSED,
+                                 ObjPtr<mirror::Reference> ref) const
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    operator()(ref, mirror::Reference::ReferentOffset(), /* is_static */ false);
+  }
+
+  ALWAYS_INLINE bool GetResult() const {
+    return result_;
+  }
+
+ private:
+  ImageWriter* image_writer_;
+  bool* early_exit_;
+  std::unordered_set<mirror::Object*>* visited_;
+  bool* const result_;
+};
+
+
 bool ImageWriter::PruneAppImageClass(ObjPtr<mirror::Class> klass) {
   bool early_exit = false;
-  std::unordered_set<mirror::Class*> visited;
+  std::unordered_set<mirror::Object*> visited;
   return PruneAppImageClassInternal(klass, &early_exit, &visited);
 }
 
 bool ImageWriter::PruneAppImageClassInternal(
     ObjPtr<mirror::Class> klass,
     bool* early_exit,
-    std::unordered_set<mirror::Class*>* visited) {
+    std::unordered_set<mirror::Object*>* visited) {
   DCHECK(early_exit != nullptr);
   DCHECK(visited != nullptr);
   DCHECK(compile_app_image_);
@@ -807,9 +873,18 @@
                                                         &my_early_exit,
                                                         visited);
         } else {
-          result = result || PruneAppImageClassInternal(ref->GetClass(),
+          mirror::Class* type = ref->GetClass();
+          result = result || PruneAppImageClassInternal(type,
                                                         &my_early_exit,
                                                         visited);
+          if (!result) {
+            // For non-class case, also go through all the types mentioned by it's fields'
+            // references recursively to decide whether to keep this class.
+            bool tmp = false;
+            PruneObjectReferenceVisitor visitor(this, &my_early_exit, visited, &tmp);
+            ref->VisitReferences(visitor, visitor);
+            result = result || tmp;
+          }
         }
       }
       field_offset = MemberOffset(field_offset.Uint32Value() +
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 5e2db7d..c42523b 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -484,7 +484,7 @@
   // early_exit is true if we had a cyclic dependency anywhere down the chain.
   bool PruneAppImageClassInternal(ObjPtr<mirror::Class> klass,
                                   bool* early_exit,
-                                  std::unordered_set<mirror::Class*>* visited)
+                                  std::unordered_set<mirror::Object*>* visited)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   bool IsMultiImage() const {
@@ -621,6 +621,7 @@
   class PruneClassLoaderClassesVisitor;
   class RegisterBootClassPathClassesVisitor;
   class VisitReferencesVisitor;
+  class PruneObjectReferenceVisitor;
 
   DISALLOW_COPY_AND_ASSIGN(ImageWriter);
 };
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
index 28b7290..23106e5 100644
--- a/compiler/jni/jni_cfi_test.cc
+++ b/compiler/jni/jni_cfi_test.cc
@@ -112,18 +112,28 @@
 #ifdef ART_ENABLE_CODEGEN_arm
 TEST_ISA(kThumb2)
 #endif
+
 #ifdef ART_ENABLE_CODEGEN_arm64
+// Run the tests for ARM64 only with Baker read barriers, as the
+// expected generated code contains a Marking Register refresh
+// instruction.
+#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
 TEST_ISA(kArm64)
 #endif
+#endif
+
 #ifdef ART_ENABLE_CODEGEN_x86
 TEST_ISA(kX86)
 #endif
+
 #ifdef ART_ENABLE_CODEGEN_x86_64
 TEST_ISA(kX86_64)
 #endif
+
 #ifdef ART_ENABLE_CODEGEN_mips
 TEST_ISA(kMips)
 #endif
+
 #ifdef ART_ENABLE_CODEGEN_mips64
 TEST_ISA(kMips64)
 #endif
diff --git a/compiler/jni/jni_cfi_test_expected.inc b/compiler/jni/jni_cfi_test_expected.inc
index 2710ae9..acb8a57 100644
--- a/compiler/jni/jni_cfi_test_expected.inc
+++ b/compiler/jni/jni_cfi_test_expected.inc
@@ -89,7 +89,8 @@
     0xF3, 0x53, 0x46, 0xA9, 0xF5, 0x5B, 0x47, 0xA9, 0xF7, 0x63, 0x48, 0xA9,
     0xF9, 0x6B, 0x49, 0xA9, 0xFB, 0x73, 0x4A, 0xA9, 0xFD, 0x7B, 0x4B, 0xA9,
     0xE8, 0x27, 0x42, 0x6D, 0xEA, 0x2F, 0x43, 0x6D, 0xEC, 0x37, 0x44, 0x6D,
-    0xEE, 0x3F, 0x45, 0x6D, 0xFF, 0x03, 0x03, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
+    0xEE, 0x3F, 0x45, 0x6D, 0x74, 0x36, 0x40, 0xB9, 0xFF, 0x03, 0x03, 0x91,
+    0xC0, 0x03, 0x5F, 0xD6,
 };
 static constexpr uint8_t expected_cfi_kArm64[] = {
     0x44, 0x0E, 0xC0, 0x01, 0x44, 0x93, 0x18, 0x94, 0x16, 0x44, 0x95, 0x14,
@@ -101,7 +102,7 @@
     0xD3, 0xD4, 0x44, 0xD5, 0xD6, 0x44, 0xD7, 0xD8, 0x44, 0xD9, 0xDA, 0x44,
     0xDB, 0xDC, 0x44, 0xDD, 0xDE, 0x44, 0x06, 0x48, 0x06, 0x49, 0x44, 0x06,
     0x4A, 0x06, 0x4B, 0x44, 0x06, 0x4C, 0x06, 0x4D, 0x44, 0x06, 0x4E, 0x06,
-    0x4F, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0xC0, 0x01,
+    0x4F, 0x48, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0xC0, 0x01,
 };
 // 0x00000000: sub sp, sp, #0xc0 (192)
 // 0x00000004: .cfi_def_cfa_offset: 192
@@ -175,11 +176,12 @@
 // 0x0000006c: ldp d14, d15, [sp, #80]
 // 0x00000070: .cfi_restore_extended: r78
 // 0x00000070: .cfi_restore_extended: r79
-// 0x00000070: add sp, sp, #0xc0 (192)
-// 0x00000074: .cfi_def_cfa_offset: 0
-// 0x00000074: ret
-// 0x00000078: .cfi_restore_state
-// 0x00000078: .cfi_def_cfa_offset: 192
+// 0x00000070: ldr w20, [tr, #52] ; is_gc_marking
+// 0x00000074: add sp, sp, #0xc0 (192)
+// 0x00000078: .cfi_def_cfa_offset: 0
+// 0x00000078: ret
+// 0x0000007c: .cfi_restore_state
+// 0x0000007c: .cfi_def_cfa_offset: 192
 
 static constexpr uint8_t expected_asm_kX86[] = {
     0x57, 0x56, 0x55, 0x83, 0xC4, 0xE4, 0x50, 0x89, 0x4C, 0x24, 0x34, 0xF3,
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index b34d938..6ce7d75 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -49,6 +49,9 @@
   return count + 1;
 }
 
+// TODO: In the Baker read barrier configuration, add checks to ensure
+// the Marking Register's value is correct.
+
 namespace art {
 
 enum class JniKind {
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index 33f4d77..e086455 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -108,11 +108,25 @@
 
 // Calling convention
 ManagedRegister Arm64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
-  return Arm64ManagedRegister::FromXRegister(X20);  // saved on entry restored on exit
+  // X20 is safe to use as a scratch register:
+  // - with Baker read barriers, it is reserved as Marking Register,
+  //   and thus does not actually need to be saved/restored; it is
+  //   refreshed on exit (see Arm64JNIMacroAssembler::RemoveFrame);
+  // - in other cases, it is saved on entry (in
+  //   Arm64JNIMacroAssembler::BuildFrame) and restored on exit (in
+  //   Arm64JNIMacroAssembler::RemoveFrame).
+  return Arm64ManagedRegister::FromXRegister(X20);
 }
 
 ManagedRegister Arm64JniCallingConvention::InterproceduralScratchRegister() {
-  return Arm64ManagedRegister::FromXRegister(X20);  // saved on entry restored on exit
+  // X20 is safe to use as a scratch register:
+  // - with Baker read barriers, it is reserved as Marking Register,
+  //   and thus does not actually need to be saved/restored; it is
+  //   refreshed on exit (see Arm64JNIMacroAssembler::RemoveFrame);
+  // - in other cases, it is saved on entry (in
+  //   Arm64JNIMacroAssembler::BuildFrame) and restored on exit (in
+  //   Arm64JNIMacroAssembler::RemoveFrame).
+  return Arm64ManagedRegister::FromXRegister(X20);
 }
 
 static ManagedRegister ReturnRegisterForShorty(const char* shorty) {
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index bc21607..38c732b 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -381,6 +381,21 @@
   // Note: The fake dependency is unnecessary for the slow path.
 }
 
+// Load the read barrier introspection entrypoint in register `entrypoint`.
+static void LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler& assembler,
+                                                       vixl::aarch64::Register entrypoint) {
+  using vixl::aarch64::MemOperand;
+  using vixl::aarch64::ip0;
+  // Thread Register.
+  const vixl::aarch64::Register tr = vixl::aarch64::x19;
+
+  // entrypoint = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+  DCHECK_EQ(ip0.GetCode(), 16u);
+  const int32_t entry_point_offset =
+      Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
+  __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
+}
+
 void Arm64RelativePatcher::CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler,
                                                         uint32_t encoded_data) {
   using namespace vixl::aarch64;  // NOLINT(build/namespaces)
@@ -412,6 +427,7 @@
       __ Bind(&slow_path);
       MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
       __ Ldr(ip0.W(), ldr_address);         // Load the LDR (immediate) unsigned offset.
+      LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
       __ Ubfx(ip0.W(), ip0.W(), 10, 12);    // Extract the offset.
       __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2));   // Load the reference.
       // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
@@ -441,6 +457,7 @@
       __ Bind(&slow_path);
       MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
       __ Ldr(ip0.W(), ldr_address);         // Load the LDR (register) unsigned offset.
+      LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
       __ Ubfx(ip0, ip0, 16, 6);             // Extract the index register, plus 32 (bit 21 is set).
       __ Bfi(ip1, ip0, 3, 6);               // Insert ip0 to the entrypoint address to create
                                             // a switch case target based on the index register.
@@ -469,6 +486,7 @@
       __ Bind(&not_marked);
       __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1));
       __ B(&forwarding_address, mi);
+      LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
       // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to
       // art_quick_read_barrier_mark_introspection_gc_roots.
       __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET));
diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc
index 1e75f10..fe7ecd1 100644
--- a/compiler/optimizing/block_builder.cc
+++ b/compiler/optimizing/block_builder.cc
@@ -17,6 +17,7 @@
 #include "block_builder.h"
 
 #include "bytecode_utils.h"
+#include "quicken_info.h"
 
 namespace art {
 
@@ -121,13 +122,18 @@
   HBasicBlock* block = graph_->GetEntryBlock();
   graph_->AddBlock(block);
 
+  size_t quicken_index = 0;
   bool is_throwing_block = false;
+  // Calculate the qucikening index here instead of CreateBranchTargets since it's easier to
+  // calculate in dex_pc order.
   for (CodeItemIterator it(code_item_); !it.Done(); it.Advance()) {
     uint32_t dex_pc = it.CurrentDexPc();
 
     // Check if this dex_pc address starts a new basic block.
     HBasicBlock* next_block = GetBlockAt(dex_pc);
     if (next_block != nullptr) {
+      // We only need quicken index entries for basic block boundaries.
+      quicken_index_for_dex_pc_.Put(dex_pc, quicken_index);
       if (block != nullptr) {
         // Last instruction did not end its basic block but a new one starts here.
         // It must have been a block falling through into the next one.
@@ -137,6 +143,10 @@
       is_throwing_block = false;
       graph_->AddBlock(block);
     }
+    // Make sure to increment this before the continues.
+    if (QuickenInfoTable::NeedsIndexForInstruction(&it.CurrentInstruction())) {
+      ++quicken_index;
+    }
 
     if (block == nullptr) {
       // Ignore dead code.
@@ -371,4 +381,8 @@
   return true;
 }
 
+size_t HBasicBlockBuilder::GetQuickenIndex(uint32_t dex_pc) const {
+  return quicken_index_for_dex_pc_.Get(dex_pc);
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/block_builder.h b/compiler/optimizing/block_builder.h
index 1be0b4c..6adce81 100644
--- a/compiler/optimizing/block_builder.h
+++ b/compiler/optimizing/block_builder.h
@@ -37,7 +37,8 @@
                         nullptr,
                         arena_->Adapter(kArenaAllocGraphBuilder)),
         throwing_blocks_(kDefaultNumberOfThrowingBlocks, arena_->Adapter(kArenaAllocGraphBuilder)),
-        number_of_branches_(0u) {}
+        number_of_branches_(0u),
+        quicken_index_for_dex_pc_(std::less<uint32_t>(), arena_->Adapter()) {}
 
   // Creates basic blocks in `graph_` at branch target dex_pc positions of the
   // `code_item_`. Blocks are connected but left unpopulated with instructions.
@@ -48,6 +49,8 @@
   size_t GetNumberOfBranches() const { return number_of_branches_; }
   HBasicBlock* GetBlockAt(uint32_t dex_pc) const { return branch_targets_[dex_pc]; }
 
+  size_t GetQuickenIndex(uint32_t dex_pc) const;
+
  private:
   // Creates a basic block starting at given `dex_pc`.
   HBasicBlock* MaybeCreateBlockAt(uint32_t dex_pc);
@@ -78,6 +81,9 @@
   ArenaVector<HBasicBlock*> throwing_blocks_;
   size_t number_of_branches_;
 
+  // A table to quickly find the quicken index for the first instruction of a basic block.
+  ArenaSafeMap<uint32_t, uint32_t> quicken_index_for_dex_pc_;
+
   static constexpr size_t kDefaultNumberOfThrowingBlocks = 2u;
 
   DISALLOW_COPY_AND_ASSIGN(HBasicBlockBuilder);
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 7bf43f7..73202b4 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -404,17 +404,6 @@
   // accessing the String's `value` field in String intrinsics.
   static uint32_t GetArrayDataOffset(HArrayGet* array_get);
 
-  // Return the entry point offset for ReadBarrierMarkRegX, where X is `reg`.
-  template <PointerSize pointer_size>
-  static int32_t GetReadBarrierMarkEntryPointsOffset(size_t reg) {
-    // The entry point list defines 30 ReadBarrierMarkRegX entry points.
-    DCHECK_LT(reg, 30u);
-    // The ReadBarrierMarkRegX entry points are ordered by increasing
-    // register number in Thread::tls_Ptr_.quick_entrypoints.
-    return QUICK_ENTRYPOINT_OFFSET(pointer_size, pReadBarrierMarkReg00).Int32Value()
-        + static_cast<size_t>(pointer_size) * reg;
-  }
-
   void EmitParallelMoves(Location from1,
                          Location to1,
                          Primitive::Type type1,
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 6b9f232..92467fe 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -729,7 +729,7 @@
     } else {
       // Entrypoint is not already loaded, load from the thread.
       int32_t entry_point_offset =
-          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
+          Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
       // This runtime call does not require a stack map.
       arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     }
@@ -8428,7 +8428,7 @@
         //     Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
         DCHECK_EQ(IP, 12);
         const int32_t entry_point_offset =
-            CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
+            Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
         __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset);
 
         Label return_address;
@@ -8469,7 +8469,7 @@
 
         // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
         const int32_t entry_point_offset =
-            CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
+            Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
         // Loading the entrypoint does not require a load acquire since it is only changed when
         // threads are suspended or running a checkpoint.
         __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
@@ -8572,7 +8572,7 @@
     //     Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
     DCHECK_EQ(IP, 12);
     const int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
+        Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
     __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset);
 
     Label return_address;
@@ -8655,7 +8655,7 @@
     //     Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
     DCHECK_EQ(IP, 12);
     const int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
+        Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
     __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset);
     __ AddConstant(data_reg, obj, data_offset);
 
@@ -8736,7 +8736,7 @@
 
   // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
   const int32_t entry_point_offset =
-      CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
+      Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
   // Loading the entrypoint does not require a load acquire since it is only changed when
   // threads are suspended or running a checkpoint.
   __ LoadFromOffset(kLoadWord, temp2.AsRegister<Register>(), TR, entry_point_offset);
@@ -8805,7 +8805,7 @@
 
   // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
   const int32_t entry_point_offset =
-      CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
+      Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
   // Loading the entrypoint does not require a load acquire since it is only changed when
   // threads are suspended or running a checkpoint.
   __ LoadFromOffset(kLoadWord, temp3.AsRegister<Register>(), TR, entry_point_offset);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 2561ed0..7e5b1a0 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -672,7 +672,9 @@
 // `ref`.
 //
 // Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked.
+// barrier marking runtime entry point to be invoked or an empty
+// location; in the latter case, the read barrier marking runtime
+// entry point will be loaded by the slow path code itself.
 class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 {
  protected:
   ReadBarrierMarkSlowPathBaseARM64(HInstruction* instruction, Location ref, Location entrypoint)
@@ -716,7 +718,7 @@
     } else {
       // Entrypoint is not already loaded, load from the thread.
       int32_t entry_point_offset =
-          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
+          Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
       // This runtime call does not require a stack map.
       arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     }
@@ -743,9 +745,10 @@
 // another thread, or if another thread installed another object
 // reference (different from `ref`) in `obj.field`).
 //
-// If `entrypoint` is a valid location it is assumed to already be
-// holding the entrypoint. The case where the entrypoint is passed in
-// is when the decision to mark is based on whether the GC is marking.
+// Argument `entrypoint` must be a register location holding the read
+// barrier marking runtime entry point to be invoked or an empty
+// location; in the latter case, the read barrier marking runtime
+// entry point will be loaded by the slow path code itself.
 class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
  public:
   ReadBarrierMarkSlowPathARM64(HInstruction* instruction,
@@ -791,7 +794,9 @@
 // reference (different from `ref`) in `obj.field`).
 //
 // Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked.
+// barrier marking runtime entry point to be invoked or an empty
+// location; in the latter case, the read barrier marking runtime
+// entry point will be loaded by the slow path code itself.
 class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
  public:
   LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction* instruction,
@@ -803,7 +808,7 @@
                                                  bool needs_null_check,
                                                  bool use_load_acquire,
                                                  Register temp,
-                                                 Location entrypoint)
+                                                 Location entrypoint = Location::NoLocation())
       : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
         obj_(obj),
         offset_(offset),
@@ -947,20 +952,23 @@
 // another object reference (different from `ref`) in `obj.field`).
 //
 // Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked.
+// barrier marking runtime entry point to be invoked or an empty
+// location; in the latter case, the read barrier marking runtime
+// entry point will be loaded by the slow path code itself.
 class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
     : public ReadBarrierMarkSlowPathBaseARM64 {
  public:
-  LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(HInstruction* instruction,
-                                                               Location ref,
-                                                               Register obj,
-                                                               uint32_t offset,
-                                                               Location index,
-                                                               size_t scale_factor,
-                                                               bool needs_null_check,
-                                                               bool use_load_acquire,
-                                                               Register temp,
-                                                               Location entrypoint)
+  LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
+      HInstruction* instruction,
+      Location ref,
+      Register obj,
+      uint32_t offset,
+      Location index,
+      size_t scale_factor,
+      bool needs_null_check,
+      bool use_load_acquire,
+      Register temp,
+      Location entrypoint = Location::NoLocation())
       : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
         obj_(obj),
         offset_(offset),
@@ -1655,7 +1663,7 @@
   // Blocked core registers:
   //      lr        : Runtime reserved.
   //      tr        : Runtime reserved.
-  //      xSuspend  : Runtime reserved. TODO: Unblock this when the runtime stops using it.
+  //      mr        : Runtime reserved.
   //      ip1       : VIXL core temp.
   //      ip0       : VIXL core temp.
   //
@@ -5921,20 +5929,17 @@
       // Baker's read barrier are used.
       if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
           !Runtime::Current()->UseJitCompilation()) {
-        // Note that we do not actually check the value of `GetIsGcMarking()`
-        // to decide whether to mark the loaded GC root or not.  Instead, we
-        // load into `temp` (actually IP1) the read barrier mark introspection
-        // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
-        // false, and vice versa.
+        // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
+        // the Marking Register) to decide whether we need to enter
+        // the slow path to mark the GC root.
         //
         // We use link-time generated thunks for the slow path. That thunk
         // checks the reference and jumps to the entrypoint if needed.
         //
-        //     temp = Thread::Current()->pReadBarrierMarkIntrospection
         //     lr = &return_address;
         //     GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
-        //     if (temp != nullptr) {
-        //        goto gc_root_thunk<root_reg>(lr)
+        //     if (mr) {  // Thread::Current()->GetIsGcMarking()
+        //       goto gc_root_thunk<root_reg>(lr)
         //     }
         //   return_address:
 
@@ -5946,11 +5951,6 @@
             linker::Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
         vixl::aarch64::Label* cbnz_label = codegen_->NewBakerReadBarrierPatch(custom_data);
 
-        // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
-        DCHECK_EQ(ip0.GetCode(), 16u);
-        const int32_t entry_point_offset =
-            CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
-        __ Ldr(ip1, MemOperand(tr, entry_point_offset));
         EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
         vixl::aarch64::Label return_address;
         __ adr(lr, &return_address);
@@ -5961,36 +5961,26 @@
                       "GC root LDR must be 2 instruction (8B) before the return address label.");
         __ ldr(root_reg, MemOperand(obj.X(), offset));
         __ Bind(cbnz_label);
-        __ cbnz(ip1, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
+        __ cbnz(mr, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
         __ Bind(&return_address);
       } else {
-        // Note that we do not actually check the value of
-        // `GetIsGcMarking()` to decide whether to mark the loaded GC
-        // root or not.  Instead, we load into `temp` the read barrier
-        // mark entry point corresponding to register `root`. If `temp`
-        // is null, it means that `GetIsGcMarking()` is false, and vice
-        // versa.
+        // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
+        // the Marking Register) to decide whether we need to enter
+        // the slow path to mark the GC root.
         //
-        //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
         //   GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
-        //   if (temp != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
+        //   if (mr) {  // Thread::Current()->GetIsGcMarking()
         //     // Slow path.
-        //     root = temp(root);  // root = ReadBarrier::Mark(root);  // Runtime entry point call.
+        //     entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+        //     root = entrypoint(root);  // root = ReadBarrier::Mark(root);  // Entry point call.
         //   }
 
-        // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
-        Register temp = lr;
-        SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(
-            instruction, root, /* entrypoint */ LocationFrom(temp));
+        // Slow path marking the GC root `root`. The entrypoint will
+        // be loaded by the slow path code.
+        SlowPathCodeARM64* slow_path =
+            new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root);
         codegen_->AddSlowPath(slow_path);
 
-        // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
-        const int32_t entry_point_offset =
-            CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg());
-        // Loading the entrypoint does not require a load acquire since it is only changed when
-        // threads are suspended or running a checkpoint.
-        __ Ldr(temp, MemOperand(tr, entry_point_offset));
-
         // /* GcRoot<mirror::Object> */ root = *(obj + offset)
         if (fixup_label == nullptr) {
           __ Ldr(root_reg, MemOperand(obj, offset));
@@ -6005,9 +5995,7 @@
                       "art::mirror::CompressedReference<mirror::Object> and int32_t "
                       "have different sizes.");
 
-        // The entrypoint is null when the GC is not marking, this prevents one load compared to
-        // checking GetIsGcMarking.
-        __ Cbnz(temp, slow_path->GetEntryLabel());
+        __ Cbnz(mr, slow_path->GetEntryLabel());
         __ Bind(slow_path->GetExitLabel());
       }
     } else {
@@ -6048,20 +6036,19 @@
   if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
       !use_load_acquire &&
       !Runtime::Current()->UseJitCompilation()) {
-    // Note that we do not actually check the value of `GetIsGcMarking()`
-    // to decide whether to mark the loaded reference or not.  Instead, we
-    // load into `temp` (actually IP1) the read barrier mark introspection
-    // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
-    // false, and vice versa.
+    // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
+    // Marking Register) to decide whether we need to enter the slow
+    // path to mark the reference. Then, in the slow path, check the
+    // gray bit in the lock word of the reference's holder (`obj`) to
+    // decide whether to mark `ref` or not.
     //
     // We use link-time generated thunks for the slow path. That thunk checks
     // the holder and jumps to the entrypoint if needed. If the holder is not
     // gray, it creates a fake dependency and returns to the LDR instruction.
     //
-    //     temp = Thread::Current()->pReadBarrierMarkIntrospection
     //     lr = &gray_return_address;
-    //     if (temp != nullptr) {
-    //        goto field_thunk<holder_reg, base_reg>(lr)
+    //     if (mr) {  // Thread::Current()->GetIsGcMarking()
+    //       goto field_thunk<holder_reg, base_reg>(lr)
     //     }
     //   not_gray_return_address:
     //     // Original reference load. If the offset is too large to fit
@@ -6087,17 +6074,12 @@
         obj.GetCode());
     vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
 
-    // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
-    DCHECK_EQ(ip0.GetCode(), 16u);
-    const int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
-    __ Ldr(ip1, MemOperand(tr, entry_point_offset));
     EmissionCheckScope guard(GetVIXLAssembler(),
                              (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
     vixl::aarch64::Label return_address;
     __ adr(lr, &return_address);
     __ Bind(cbnz_label);
-    __ cbnz(ip1, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
+    __ cbnz(mr, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
     static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
                   "Field LDR must be 1 instruction (4B) before the return address label; "
                   " 2 instructions (8B) for heap poisoning.");
@@ -6143,20 +6125,19 @@
 
   if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
       !Runtime::Current()->UseJitCompilation()) {
-    // Note that we do not actually check the value of `GetIsGcMarking()`
-    // to decide whether to mark the loaded reference or not.  Instead, we
-    // load into `temp` (actually IP1) the read barrier mark introspection
-    // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
-    // false, and vice versa.
+    // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
+    // Marking Register) to decide whether we need to enter the slow
+    // path to mark the reference. Then, in the slow path, check the
+    // gray bit in the lock word of the reference's holder (`obj`) to
+    // decide whether to mark `ref` or not.
     //
     // We use link-time generated thunks for the slow path. That thunk checks
     // the holder and jumps to the entrypoint if needed. If the holder is not
     // gray, it creates a fake dependency and returns to the LDR instruction.
     //
-    //     temp = Thread::Current()->pReadBarrierMarkIntrospection
     //     lr = &gray_return_address;
-    //     if (temp != nullptr) {
-    //        goto field_thunk<holder_reg, base_reg>(lr)
+    //     if (mr) {  // Thread::Current()->GetIsGcMarking()
+    //       goto array_thunk<base_reg>(lr)
     //     }
     //   not_gray_return_address:
     //     // Original reference load. If the offset is too large to fit
@@ -6176,18 +6157,13 @@
         linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode());
     vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
 
-    // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
-    DCHECK_EQ(ip0.GetCode(), 16u);
-    const int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
-    __ Ldr(ip1, MemOperand(tr, entry_point_offset));
     __ Add(temp.X(), obj.X(), Operand(data_offset));
     EmissionCheckScope guard(GetVIXLAssembler(),
                              (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
     vixl::aarch64::Label return_address;
     __ adr(lr, &return_address);
     __ Bind(cbnz_label);
-    __ cbnz(ip1, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
+    __ cbnz(mr, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
     static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
                   "Array LDR must be 1 instruction (4B) before the return address label; "
                   " 2 instructions (8B) for heap poisoning.");
@@ -6231,35 +6207,28 @@
   // `instruction->IsArrayGet()` => `!use_load_acquire`.
   DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
 
-  // Query `art::Thread::Current()->GetIsGcMarking()` to decide
-  // whether we need to enter the slow path to mark the reference.
-  // Then, in the slow path, check the gray bit in the lock word of
-  // the reference's holder (`obj`) to decide whether to mark `ref` or
-  // not.
+  // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
+  // Marking Register) to decide whether we need to enter the slow
+  // path to mark the reference. Then, in the slow path, check the
+  // gray bit in the lock word of the reference's holder (`obj`) to
+  // decide whether to mark `ref` or not.
   //
-  // Note that we do not actually check the value of `GetIsGcMarking()`;
-  // instead, we load into `temp2` the read barrier mark entry point
-  // corresponding to register `ref`. If `temp2` is null, it means
-  // that `GetIsGcMarking()` is false, and vice versa.
-  //
-  //   temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
-  //   if (temp2 != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
+  //   if (mr) {  // Thread::Current()->GetIsGcMarking()
   //     // Slow path.
   //     uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
   //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
   //     bool is_gray = (rb_state == ReadBarrier::GrayState());
   //     if (is_gray) {
-  //       ref = temp2(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+  //       entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+  //       ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
   //     }
   //   } else {
   //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
   //   }
 
   // Slow path marking the object `ref` when the GC is marking. The
-  // entrypoint will already be loaded in `temp2`.
-  Register temp2 = lr;
-  Location temp2_loc = LocationFrom(temp2);
+  // entrypoint will be loaded by the slow path code.
   SlowPathCodeARM64* slow_path =
       new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64(
           instruction,
@@ -6270,19 +6239,10 @@
           scale_factor,
           needs_null_check,
           use_load_acquire,
-          temp,
-          /* entrypoint */ temp2_loc);
+          temp);
   AddSlowPath(slow_path);
 
-  // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
-  const int32_t entry_point_offset =
-      CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref.reg());
-  // Loading the entrypoint does not require a load acquire since it is only changed when
-  // threads are suspended or running a checkpoint.
-  __ Ldr(temp2, MemOperand(tr, entry_point_offset));
-  // The entrypoint is null when the GC is not marking, this prevents one load compared to
-  // checking GetIsGcMarking.
-  __ Cbnz(temp2, slow_path->GetEntryLabel());
+  __ Cbnz(mr, slow_path->GetEntryLabel());
   // Fast path: the GC is not marking: just load the reference.
   GenerateRawReferenceLoad(
       instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
@@ -6303,19 +6263,14 @@
   // `instruction->IsArrayGet()` => `!use_load_acquire`.
   DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
 
-  // Query `art::Thread::Current()->GetIsGcMarking()` to decide
-  // whether we need to enter the slow path to update the reference
-  // field within `obj`.  Then, in the slow path, check the gray bit
-  // in the lock word of the reference's holder (`obj`) to decide
-  // whether to mark `ref` and update the field or not.
+  // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
+  // Marking Register) to decide whether we need to enter the slow
+  // path to update the reference field within `obj`. Then, in the
+  // slow path, check the gray bit in the lock word of the reference's
+  // holder (`obj`) to decide whether to mark `ref` and update the
+  // field or not.
   //
-  // Note that we do not actually check the value of `GetIsGcMarking()`;
-  // instead, we load into `temp2` the read barrier mark entry point
-  // corresponding to register `ref`. If `temp2` is null, it means
-  // that `GetIsGcMarking()` is false, and vice versa.
-  //
-  //   temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
-  //   if (temp2 != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
+  //   if (mr) {  // Thread::Current()->GetIsGcMarking()
   //     // Slow path.
   //     uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
   //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
@@ -6323,15 +6278,14 @@
   //     bool is_gray = (rb_state == ReadBarrier::GrayState());
   //     if (is_gray) {
   //       old_ref = ref;
-  //       ref = temp2(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
+  //       entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+  //       ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
   //       compareAndSwapObject(obj, field_offset, old_ref, ref);
   //     }
   //   }
 
   // Slow path updating the object reference at address `obj + field_offset`
-  // when the GC is marking. The entrypoint will already be loaded in `temp2`.
-  Register temp2 = lr;
-  Location temp2_loc = LocationFrom(temp2);
+  // when the GC is marking. The entrypoint will be loaded by the slow path code.
   SlowPathCodeARM64* slow_path =
       new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
           instruction,
@@ -6342,19 +6296,10 @@
           /* scale_factor */ 0u /* "times 1" */,
           needs_null_check,
           use_load_acquire,
-          temp,
-          /* entrypoint */ temp2_loc);
+          temp);
   AddSlowPath(slow_path);
 
-  // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
-  const int32_t entry_point_offset =
-      CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref.reg());
-  // Loading the entrypoint does not require a load acquire since it is only changed when
-  // threads are suspended or running a checkpoint.
-  __ Ldr(temp2, MemOperand(tr, entry_point_offset));
-  // The entrypoint is null when the GC is not marking, this prevents one load compared to
-  // checking GetIsGcMarking.
-  __ Cbnz(temp2, slow_path->GetEntryLabel());
+  __ Cbnz(mr, slow_path->GetEntryLabel());
   // Fast path: the GC is not marking: nothing to do (the field is
   // up-to-date, and we don't need to load the reference).
   __ Bind(slow_path->GetExitLabel());
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index d9c49d1..584eead 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -70,21 +70,32 @@
 };
 static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegisters);
 
-// Thread Register
+// Thread Register.
 const vixl::aarch64::Register tr = vixl::aarch64::x19;
+// Marking Register.
+const vixl::aarch64::Register mr = vixl::aarch64::x20;
 // Method register on invoke.
 static const vixl::aarch64::Register kArtMethodRegister = vixl::aarch64::x0;
 const vixl::aarch64::CPURegList vixl_reserved_core_registers(vixl::aarch64::ip0,
                                                              vixl::aarch64::ip1);
 const vixl::aarch64::CPURegList vixl_reserved_fp_registers(vixl::aarch64::d31);
 
-const vixl::aarch64::CPURegList runtime_reserved_core_registers(tr, vixl::aarch64::lr);
+const vixl::aarch64::CPURegList runtime_reserved_core_registers =
+    vixl::aarch64::CPURegList(
+        tr,
+        // Reserve X20 as Marking Register when emitting Baker read barriers.
+        ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) ? mr : vixl::aarch64::NoCPUReg),
+        vixl::aarch64::lr);
 
-// Callee-saved registers AAPCS64 (without x19 - Thread Register)
-const vixl::aarch64::CPURegList callee_saved_core_registers(vixl::aarch64::CPURegister::kRegister,
-                                                            vixl::aarch64::kXRegSize,
-                                                            vixl::aarch64::x20.GetCode(),
-                                                            vixl::aarch64::x30.GetCode());
+// Callee-save registers AAPCS64, without x19 (Thread Register) (nor
+// x20 (Marking Register) when emitting Baker read barriers).
+const vixl::aarch64::CPURegList callee_saved_core_registers(
+    vixl::aarch64::CPURegister::kRegister,
+    vixl::aarch64::kXRegSize,
+    ((kEmitCompilerReadBarrier && kUseBakerReadBarrier)
+         ? vixl::aarch64::x21.GetCode()
+         : vixl::aarch64::x20.GetCode()),
+     vixl::aarch64::x30.GetCode());
 const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegister::kFPRegister,
                                                           vixl::aarch64::kDRegSize,
                                                           vixl::aarch64::d8.GetCode(),
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 9a2402b..7334678 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -786,7 +786,7 @@
     } else {
       // Entrypoint is not already loaded, load from the thread.
       int32_t entry_point_offset =
-          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode());
+          Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode());
       // This runtime call does not require a stack map.
       arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     }
@@ -8559,7 +8559,7 @@
         //     Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
         DCHECK_EQ(ip.GetCode(), 12u);
         const int32_t entry_point_offset =
-            CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+            Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
         __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
 
         vixl::EmissionCheckScope guard(GetVIXLAssembler(),
@@ -8601,7 +8601,7 @@
 
         // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
         const int32_t entry_point_offset =
-            CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
+            Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
         // Loading the entrypoint does not require a load acquire since it is only changed when
         // threads are suspended or running a checkpoint.
         GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset);
@@ -8705,7 +8705,7 @@
     //     Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
     DCHECK_EQ(ip.GetCode(), 12u);
     const int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+        Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
     __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
 
     vixl::EmissionCheckScope guard(
@@ -8797,7 +8797,7 @@
     //     Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
     DCHECK_EQ(ip.GetCode(), 12u);
     const int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+        Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
     __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
     __ Add(data_reg, obj, Operand(data_offset));
 
@@ -8883,7 +8883,7 @@
 
   // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
   const int32_t entry_point_offset =
-      CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
+      Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
   // Loading the entrypoint does not require a load acquire since it is only changed when
   // threads are suspended or running a checkpoint.
   GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp2), tr, entry_point_offset);
@@ -8951,7 +8951,7 @@
 
   // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
   const int32_t entry_point_offset =
-      CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
+      Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
   // Loading the entrypoint does not require a load acquire since it is only changed when
   // threads are suspended or running a checkpoint.
   GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp3), tr, entry_point_offset);
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index abe1d70..be8f9e9 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -656,7 +656,7 @@
       __ NopIfNoReordering();
     } else {
       int32_t entry_point_offset =
-          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1);
+          Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1);
       // This runtime call does not require a stack map.
       mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
                                                         instruction_,
@@ -750,7 +750,7 @@
     //   rX <- ReadBarrierMarkRegX(rX)
     //
     int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1);
+        Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1);
     // This runtime call does not require a stack map.
     mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
                                                       instruction_,
@@ -6497,7 +6497,7 @@
 
       // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
       const int32_t entry_point_offset =
-          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(root.reg() - 1);
+          Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(root.reg() - 1);
       // Loading the entrypoint does not require a load acquire since it is only changed when
       // threads are suspended or running a checkpoint.
       __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 1afa1b9..52ee852 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -346,6 +346,10 @@
                                  uint32_t num_entries,
                                  HBasicBlock* switch_block,
                                  HBasicBlock* default_block);
+
+  int32_t VecAddress(LocationSummary* locations,
+                     size_t size,
+                     /* out */ Register* adjusted_base);
   void GenConditionalMoveR2(HSelect* select);
   void GenConditionalMoveR6(HSelect* select);
 
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 232241c..cf6b3d5 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -606,7 +606,7 @@
       __ Nop();
     } else {
       int32_t entry_point_offset =
-          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1);
+          Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1);
       // This runtime call does not require a stack map.
       mips64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
                                                           instruction_,
@@ -699,7 +699,7 @@
     //   rX <- ReadBarrierMarkRegX(rX)
     //
     int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1);
+        Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1);
     // This runtime call does not require a stack map.
     mips64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
                                                         instruction_,
@@ -4421,7 +4421,7 @@
 
       // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
       const int32_t entry_point_offset =
-          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(root.reg() - 1);
+          Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(root.reg() - 1);
       // Loading the entrypoint does not require a load acquire since it is only changed when
       // threads are suspended or running a checkpoint.
       __ LoadFromOffset(kLoadDoubleword, temp.AsRegister<GpuRegister>(), TR, entry_point_offset);
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index c4a3225..ea36e90 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -15,6 +15,7 @@
  */
 
 #include "code_generator_mips.h"
+#include "mirror/array-inl.h"
 
 namespace art {
 namespace mips {
@@ -23,11 +24,68 @@
 #define __ down_cast<MipsAssembler*>(GetAssembler())->  // NOLINT
 
 void LocationsBuilderMIPS::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresFpuRegister());
+      break;
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void InstructionCodeGeneratorMIPS::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ FillB(dst, locations->InAt(0).AsRegister<Register>());
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ FillH(dst, locations->InAt(0).AsRegister<Register>());
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FillW(dst, locations->InAt(0).AsRegister<Register>());
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ Mtc1(locations->InAt(0).AsRegisterPairLow<Register>(), FTMP);
+      __ MoveToFpuHigh(locations->InAt(0).AsRegisterPairHigh<Register>(), FTMP);
+      __ ReplicateFPToVectorRegister(dst, FTMP, /* is_double */ true);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ ReplicateFPToVectorRegister(dst,
+                                     locations->InAt(0).AsFpuRegister<FRegister>(),
+                                     /* is_double */ false);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ ReplicateFPToVectorRegister(dst,
+                                     locations->InAt(0).AsFpuRegister<FRegister>(),
+                                     /* is_double */ true);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
@@ -51,13 +109,23 @@
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case Primitive::kPrimBoolean:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(),
+                        instruction->IsVecNot() ? Location::kOutputOverlap
+                                                : Location::kNoOutputOverlap);
+      break;
     case Primitive::kPrimByte:
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      DCHECK(locations);
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(),
+                        (instruction->IsVecNeg() || instruction->IsVecAbs())
+                            ? Location::kOutputOverlap
+                            : Location::kNoOutputOverlap);
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
@@ -70,7 +138,17 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitVecCnv(HVecCnv* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister src = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  Primitive::Type from = instruction->GetInputType();
+  Primitive::Type to = instruction->GetResultType();
+  if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) {
+    DCHECK_EQ(4u, instruction->GetVectorLength());
+    __ Ffint_sW(dst, src);
+  } else {
+    LOG(FATAL) << "Unsupported SIMD type";
+  }
 }
 
 void LocationsBuilderMIPS::VisitVecNeg(HVecNeg* instruction) {
@@ -78,7 +156,45 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitVecNeg(HVecNeg* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister src = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ FillB(dst, ZERO);
+      __ SubvB(dst, dst, src);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ FillH(dst, ZERO);
+      __ SubvH(dst, dst, src);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FillW(dst, ZERO);
+      __ SubvW(dst, dst, src);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ FillW(dst, ZERO);
+      __ SubvD(dst, dst, src);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FillW(dst, ZERO);
+      __ FsubW(dst, dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ FillW(dst, ZERO);
+      __ FsubD(dst, dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS::VisitVecAbs(HVecAbs* instruction) {
@@ -86,7 +202,47 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitVecAbs(HVecAbs* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister src = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ FillB(dst, ZERO);       // all zeroes
+      __ Add_aB(dst, dst, src);  // dst = abs(0) + abs(src)
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ FillH(dst, ZERO);       // all zeroes
+      __ Add_aH(dst, dst, src);  // dst = abs(0) + abs(src)
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FillW(dst, ZERO);       // all zeroes
+      __ Add_aW(dst, dst, src);  // dst = abs(0) + abs(src)
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ FillW(dst, ZERO);       // all zeroes
+      __ Add_aD(dst, dst, src);  // dst = abs(0) + abs(src)
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ LdiW(dst, -1);          // all ones
+      __ SrliW(dst, dst, 1);
+      __ AndV(dst, dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ LdiD(dst, -1);          // all ones
+      __ SrliD(dst, dst, 1);
+      __ AndV(dst, dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS::VisitVecNot(HVecNot* instruction) {
@@ -94,7 +250,30 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitVecNot(HVecNot* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister src = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:  // special case boolean-not
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ LdiB(dst, 1);
+      __ XorV(dst, dst, src);
+      break;
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ NorV(dst, src, src);  // lanes do not matter
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 // Helper to set up locations for vector binary operations.
@@ -106,9 +285,12 @@
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      DCHECK(locations);
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
@@ -121,7 +303,40 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitVecAdd(HVecAdd* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ AddvB(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ AddvH(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ AddvW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ AddvD(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FaddW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ FaddD(dst, lhs, rhs);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
@@ -129,7 +344,40 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        instruction->IsRounded()
+            ? __ Aver_uB(dst, lhs, rhs)
+            : __ Ave_uB(dst, lhs, rhs);
+      } else {
+        instruction->IsRounded()
+            ? __ Aver_sB(dst, lhs, rhs)
+            : __ Ave_sB(dst, lhs, rhs);
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        instruction->IsRounded()
+            ? __ Aver_uH(dst, lhs, rhs)
+            : __ Ave_uH(dst, lhs, rhs);
+      } else {
+        instruction->IsRounded()
+            ? __ Aver_sH(dst, lhs, rhs)
+            : __ Ave_sH(dst, lhs, rhs);
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS::VisitVecSub(HVecSub* instruction) {
@@ -137,7 +385,40 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitVecSub(HVecSub* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ SubvB(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ SubvH(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ SubvW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ SubvD(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FsubW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ FsubD(dst, lhs, rhs);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS::VisitVecMul(HVecMul* instruction) {
@@ -145,7 +426,40 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitVecMul(HVecMul* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ MulvB(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ MulvH(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ MulvW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ MulvD(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FmulW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ FmulD(dst, lhs, rhs);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS::VisitVecDiv(HVecDiv* instruction) {
@@ -153,7 +467,23 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitVecDiv(HVecDiv* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FdivW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ FdivD(dst, lhs, rhs);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS::VisitVecMin(HVecMin* instruction) {
@@ -161,7 +491,60 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitVecMin(HVecMin* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Min_uB(dst, lhs, rhs);
+      } else {
+        __ Min_sB(dst, lhs, rhs);
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Min_uH(dst, lhs, rhs);
+      } else {
+        __ Min_sH(dst, lhs, rhs);
+      }
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Min_uW(dst, lhs, rhs);
+      } else {
+        __ Min_sW(dst, lhs, rhs);
+      }
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Min_uD(dst, lhs, rhs);
+      } else {
+        __ Min_sD(dst, lhs, rhs);
+      }
+      break;
+    // When one of arguments is NaN, fmin.df returns other argument, but Java expects a NaN value.
+    // TODO: Fix min(x, NaN) cases for float and double.
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ FminW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ FminD(dst, lhs, rhs);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS::VisitVecMax(HVecMax* instruction) {
@@ -169,7 +552,60 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitVecMax(HVecMax* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Max_uB(dst, lhs, rhs);
+      } else {
+        __ Max_sB(dst, lhs, rhs);
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Max_uH(dst, lhs, rhs);
+      } else {
+        __ Max_sH(dst, lhs, rhs);
+      }
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Max_uW(dst, lhs, rhs);
+      } else {
+        __ Max_sW(dst, lhs, rhs);
+      }
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Max_uD(dst, lhs, rhs);
+      } else {
+        __ Max_sD(dst, lhs, rhs);
+      }
+      break;
+    // When one of arguments is NaN, fmax.df returns other argument, but Java expects a NaN value.
+    // TODO: Fix max(x, NaN) cases for float and double.
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ FmaxW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ FmaxD(dst, lhs, rhs);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS::VisitVecAnd(HVecAnd* instruction) {
@@ -177,7 +613,27 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitVecAnd(HVecAnd* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ AndV(dst, lhs, rhs);  // lanes do not matter
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS::VisitVecAndNot(HVecAndNot* instruction) {
@@ -193,7 +649,27 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitVecOr(HVecOr* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ OrV(dst, lhs, rhs);  // lanes do not matter
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS::VisitVecXor(HVecXor* instruction) {
@@ -201,7 +677,27 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitVecXor(HVecXor* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ XorV(dst, lhs, rhs);  // lanes do not matter
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 // Helper to set up locations for vector shift operations.
@@ -213,7 +709,9 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
-      DCHECK(locations);
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
@@ -226,7 +724,32 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitVecShl(HVecShl* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ SlliB(dst, lhs, value);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ SlliH(dst, lhs, value);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ SlliW(dst, lhs, value);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ SlliD(dst, lhs, value);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS::VisitVecShr(HVecShr* instruction) {
@@ -234,7 +757,32 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitVecShr(HVecShr* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ SraiB(dst, lhs, value);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ SraiH(dst, lhs, value);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ SraiW(dst, lhs, value);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ SraiD(dst, lhs, value);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS::VisitVecUShr(HVecUShr* instruction) {
@@ -242,7 +790,32 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ SrliB(dst, lhs, value);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ SrliH(dst, lhs, value);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ SrliW(dst, lhs, value);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ SrliD(dst, lhs, value);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
@@ -253,20 +826,143 @@
   LOG(FATAL) << "No SIMD for " << instr->GetId();
 }
 
+// Helper to set up locations for vector memory operations.
+static void CreateVecMemLocations(ArenaAllocator* arena,
+                                  HVecMemoryOperation* instruction,
+                                  bool is_load) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+      if (is_load) {
+        locations->SetOut(Location::RequiresFpuRegister());
+      } else {
+        locations->SetInAt(2, Location::RequiresFpuRegister());
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+// Helper to prepare register and offset for vector memory operations. Returns the offset and sets
+// the output parameter adjusted_base to the original base or to a reserved temporary register (AT).
+int32_t InstructionCodeGeneratorMIPS::VecAddress(LocationSummary* locations,
+                                                 size_t size,
+                                                 /* out */ Register* adjusted_base) {
+  Register base = locations->InAt(0).AsRegister<Register>();
+  Location index = locations->InAt(1);
+  int scale = TIMES_1;
+  switch (size) {
+    case 2: scale = TIMES_2; break;
+    case 4: scale = TIMES_4; break;
+    case 8: scale = TIMES_8; break;
+    default: break;
+  }
+  int32_t offset = mirror::Array::DataOffset(size).Int32Value();
+
+  if (index.IsConstant()) {
+    offset += index.GetConstant()->AsIntConstant()->GetValue() << scale;
+    __ AdjustBaseOffsetAndElementSizeShift(base, offset, scale);
+    *adjusted_base = base;
+  } else {
+    Register index_reg = index.AsRegister<Register>();
+    if (scale != TIMES_1) {
+      __ Lsa(AT, index_reg, base, scale);
+    } else {
+      __ Addu(AT, base, index_reg);
+    }
+    *adjusted_base = AT;
+  }
+  return offset;
+}
+
 void LocationsBuilderMIPS::VisitVecLoad(HVecLoad* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  CreateVecMemLocations(GetGraph()->GetArena(), instruction, /* is_load */ true);
 }
 
 void InstructionCodeGeneratorMIPS::VisitVecLoad(HVecLoad* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+  VectorRegister reg = VectorRegisterFrom(locations->Out());
+  Register base;
+  int32_t offset = VecAddress(locations, size, &base);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ LdB(reg, base, offset);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      // Loading 8-bytes (needed if dealing with compressed strings in StringCharAt) from unaligned
+      // memory address may cause a trap to the kernel if the CPU doesn't directly support unaligned
+      // loads and stores.
+      // TODO: Implement support for StringCharAt.
+      DCHECK(!instruction->IsStringCharAt());
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ LdH(reg, base, offset);
+      break;
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ LdW(reg, base, offset);
+      break;
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ LdD(reg, base, offset);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS::VisitVecStore(HVecStore* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  CreateVecMemLocations(GetGraph()->GetArena(), instruction, /* is_load */ false);
 }
 
 void InstructionCodeGeneratorMIPS::VisitVecStore(HVecStore* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+  VectorRegister reg = VectorRegisterFrom(locations->InAt(2));
+  Register base;
+  int32_t offset = VecAddress(locations, size, &base);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ StB(reg, base, offset);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ StH(reg, base, offset);
+      break;
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ StW(reg, base, offset);
+      break;
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ StD(reg, base, offset);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 #undef __
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 79fccfe..af0e646 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -509,8 +509,7 @@
     //
     //   rX <- ReadBarrierMarkRegX(rX)
     //
-    int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
+    int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
     // This runtime call does not require a stack map.
     x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     __ jmp(GetExitLabel());
@@ -595,8 +594,7 @@
     //
     //   rX <- ReadBarrierMarkRegX(rX)
     //
-    int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
+    int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
     // This runtime call does not require a stack map.
     x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
 
@@ -7153,7 +7151,7 @@
 
       // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`).
       const int32_t entry_point_offset =
-          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
+          Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
       __ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0));
       // The entrypoint is null when the GC is not marking.
       __ j(kNotEqual, slow_path->GetEntryLabel());
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 57319ce..86f6d51 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -524,7 +524,7 @@
     //   rX <- ReadBarrierMarkRegX(rX)
     //
     int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
+        Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
     // This runtime call does not require a stack map.
     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     __ jmp(GetExitLabel());
@@ -615,7 +615,7 @@
     //   rX <- ReadBarrierMarkRegX(rX)
     //
     int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
+        Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
     // This runtime call does not require a stack map.
     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
 
@@ -6540,7 +6540,7 @@
 
       // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
       const int32_t entry_point_offset =
-          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
+          Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
       __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip */ true), Immediate(0));
       // The entrypoint is null when the GC is not marking.
       __ j(kNotEqual, slow_path->GetEntryLabel());
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index a73b124..839f328 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -22,6 +22,7 @@
 #include "dex_instruction-inl.h"
 #include "driver/compiler_options.h"
 #include "imtable-inl.h"
+#include "quicken_info.h"
 #include "sharpening.h"
 #include "scoped_thread_state_change-inl.h"
 
@@ -312,6 +313,11 @@
 
     DCHECK(!IsBlockPopulated(current_block_));
 
+    uint32_t quicken_index = 0;
+    if (CanDecodeQuickenedInfo()) {
+      quicken_index = block_builder_->GetQuickenIndex(block_dex_pc);
+    }
+
     for (CodeItemIterator it(code_item_, block_dex_pc); !it.Done(); it.Advance()) {
       if (current_block_ == nullptr) {
         // The previous instruction ended this block.
@@ -332,9 +338,13 @@
         AppendInstruction(new (arena_) HNativeDebugInfo(dex_pc));
       }
 
-      if (!ProcessDexInstruction(it.CurrentInstruction(), dex_pc)) {
+      if (!ProcessDexInstruction(it.CurrentInstruction(), dex_pc, quicken_index)) {
         return false;
       }
+
+      if (QuickenInfoTable::NeedsIndexForInstruction(&it.CurrentInstruction())) {
+        ++quicken_index;
+      }
     }
 
     if (current_block_ != nullptr) {
@@ -1261,7 +1271,8 @@
 
 bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instruction,
                                                    uint32_t dex_pc,
-                                                   bool is_put) {
+                                                   bool is_put,
+                                                   size_t quicken_index) {
   uint32_t source_or_dest_reg = instruction.VRegA_22c();
   uint32_t obj_reg = instruction.VRegB_22c();
   uint16_t field_index;
@@ -1269,7 +1280,7 @@
     if (!CanDecodeQuickenedInfo()) {
       return false;
     }
-    field_index = LookupQuickenedInfo(dex_pc);
+    field_index = LookupQuickenedInfo(quicken_index);
   } else {
     field_index = instruction.VRegC_22c();
   }
@@ -1805,40 +1816,17 @@
 }
 
 bool HInstructionBuilder::CanDecodeQuickenedInfo() const {
-  return interpreter_metadata_ != nullptr;
+  return !quicken_info_.IsNull();
 }
 
-uint16_t HInstructionBuilder::LookupQuickenedInfo(uint32_t dex_pc) {
-  DCHECK(interpreter_metadata_ != nullptr);
-
-  // First check if the info has already been decoded from `interpreter_metadata_`.
-  auto it = skipped_interpreter_metadata_.find(dex_pc);
-  if (it != skipped_interpreter_metadata_.end()) {
-    // Remove the entry from the map and return the parsed info.
-    uint16_t value_in_map = it->second;
-    skipped_interpreter_metadata_.erase(it);
-    return value_in_map;
-  }
-
-  // Otherwise start parsing `interpreter_metadata_` until the slot for `dex_pc`
-  // is found. Store skipped values in the `skipped_interpreter_metadata_` map.
-  while (true) {
-    uint32_t dex_pc_in_map = DecodeUnsignedLeb128(&interpreter_metadata_);
-    uint16_t value_in_map = DecodeUnsignedLeb128(&interpreter_metadata_);
-    DCHECK_LE(dex_pc_in_map, dex_pc);
-
-    if (dex_pc_in_map == dex_pc) {
-      return value_in_map;
-    } else {
-      // Overwrite and not Put, as quickened CHECK-CAST has two entries with
-      // the same dex_pc. This is OK, because the compiler does not care about those
-      // entries.
-      skipped_interpreter_metadata_.Overwrite(dex_pc_in_map, value_in_map);
-    }
-  }
+uint16_t HInstructionBuilder::LookupQuickenedInfo(uint32_t quicken_index) {
+  DCHECK(CanDecodeQuickenedInfo());
+  return quicken_info_.GetData(quicken_index);
 }
 
-bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, uint32_t dex_pc) {
+bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
+                                                uint32_t dex_pc,
+                                                size_t quicken_index) {
   switch (instruction.Opcode()) {
     case Instruction::CONST_4: {
       int32_t register_index = instruction.VRegA();
@@ -1995,7 +1983,7 @@
         if (!CanDecodeQuickenedInfo()) {
           return false;
         }
-        method_idx = LookupQuickenedInfo(dex_pc);
+        method_idx = LookupQuickenedInfo(quicken_index);
       } else {
         method_idx = instruction.VRegB_35c();
       }
@@ -2020,7 +2008,7 @@
         if (!CanDecodeQuickenedInfo()) {
           return false;
         }
-        method_idx = LookupQuickenedInfo(dex_pc);
+        method_idx = LookupQuickenedInfo(quicken_index);
       } else {
         method_idx = instruction.VRegB_3rc();
       }
@@ -2693,7 +2681,7 @@
     case Instruction::IGET_CHAR_QUICK:
     case Instruction::IGET_SHORT:
     case Instruction::IGET_SHORT_QUICK: {
-      if (!BuildInstanceFieldAccess(instruction, dex_pc, false)) {
+      if (!BuildInstanceFieldAccess(instruction, dex_pc, false, quicken_index)) {
         return false;
       }
       break;
@@ -2713,7 +2701,7 @@
     case Instruction::IPUT_CHAR_QUICK:
     case Instruction::IPUT_SHORT:
     case Instruction::IPUT_SHORT_QUICK: {
-      if (!BuildInstanceFieldAccess(instruction, dex_pc, true)) {
+      if (!BuildInstanceFieldAccess(instruction, dex_pc, true, quicken_index)) {
         return false;
       }
       break;
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index e968760..5a83df3 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -27,6 +27,7 @@
 #include "mirror/dex_cache.h"
 #include "nodes.h"
 #include "optimizing_compiler_stats.h"
+#include "quicken_info.h"
 #include "ssa_builder.h"
 
 namespace art {
@@ -67,9 +68,7 @@
         code_generator_(code_generator),
         dex_compilation_unit_(dex_compilation_unit),
         outer_compilation_unit_(outer_compilation_unit),
-        interpreter_metadata_(interpreter_metadata),
-        skipped_interpreter_metadata_(std::less<uint32_t>(),
-                                      arena_->Adapter(kArenaAllocGraphBuilder)),
+        quicken_info_(interpreter_metadata),
         compilation_stats_(compiler_stats),
         dex_cache_(dex_cache),
         loop_headers_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)) {
@@ -85,11 +84,11 @@
   void PropagateLocalsToCatchBlocks();
   void SetLoopHeaderPhiInputs();
 
-  bool ProcessDexInstruction(const Instruction& instruction, uint32_t dex_pc);
+  bool ProcessDexInstruction(const Instruction& instruction, uint32_t dex_pc, size_t quicken_index);
   void FindNativeDebugInfoLocations(ArenaBitVector* locations);
 
   bool CanDecodeQuickenedInfo() const;
-  uint16_t LookupQuickenedInfo(uint32_t dex_pc);
+  uint16_t LookupQuickenedInfo(uint32_t quicken_index);
 
   HBasicBlock* FindBlockStartingAt(uint32_t dex_pc) const;
 
@@ -159,7 +158,10 @@
   void BuildReturn(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
 
   // Builds an instance field access node and returns whether the instruction is supported.
-  bool BuildInstanceFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
+  bool BuildInstanceFieldAccess(const Instruction& instruction,
+                                uint32_t dex_pc,
+                                bool is_put,
+                                size_t quicken_index);
 
   void BuildUnresolvedStaticFieldAccess(const Instruction& instruction,
                                         uint32_t dex_pc,
@@ -349,14 +351,8 @@
   // methods.
   const DexCompilationUnit* const outer_compilation_unit_;
 
-  // Original values kept after instruction quickening. This is a data buffer
-  // of Leb128-encoded (dex_pc, value) pairs sorted by dex_pc.
-  const uint8_t* interpreter_metadata_;
-
-  // InstructionBuilder does not parse instructions in dex_pc order. Quickening
-  // info for out-of-order dex_pcs is stored in a map until the positions
-  // are eventually visited.
-  ArenaSafeMap<uint32_t, uint16_t> skipped_interpreter_metadata_;
+  // Original values kept after instruction quickening.
+  QuickenInfoTable quicken_info_;
 
   OptimizingCompilerStats* compilation_stats_;
   Handle<mirror::DexCache> dex_cache_;
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index ae5f8d1..3795866 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -154,8 +154,7 @@
     DCHECK(0 <= tmp && tmp < kNumberOfCoreRegisters) << tmp;
     // TODO: Load the entrypoint once before the loop, instead of
     // loading it at every iteration.
-    int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp);
+    int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp);
     // This runtime call does not require a stack map.
     arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     __ MaybePoisonHeapReference(tmp);
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 37d7981..aec1ec7 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -205,7 +205,7 @@
     // TODO: Load the entrypoint once before the loop, instead of
     // loading it at every iteration.
     int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
+        Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
     // This runtime call does not require a stack map.
     codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg);
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 3c9b613..ced931b 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -226,7 +226,7 @@
     // TODO: Load the entrypoint once before the loop, instead of
     // loading it at every iteration.
     int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
+        Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
     // This runtime call does not require a stack map.
     arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     assembler->MaybePoisonHeapReference(tmp);
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 6b4851d..a18b0cc 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -143,8 +143,7 @@
     // explanations.)
     DCHECK_NE(temp2, ESP);
     DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2;
-    int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
+    int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
     // This runtime call does not require a stack map.
     x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     __ MaybePoisonHeapReference(temp2);
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index ef98b7b..5abdb1d 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -105,8 +105,7 @@
     // No need to save live registers; it's taken care of by the
     // entrypoint. Also, there is no need to update the stack mask,
     // as this runtime call will not trigger a garbage collection.
-    int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP);
+    int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP);
     // This runtime call does not require a stack map.
     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     __ MaybePoisonHeapReference(CpuRegister(TMP));
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 83f31c7..422e58d 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -1171,7 +1171,32 @@
       }
       return false;
     case kMips:
-      // TODO: implement MIPS SIMD.
+      if (features->AsMipsInstructionSetFeatures()->HasMsa()) {
+        switch (type) {
+          case Primitive::kPrimBoolean:
+          case Primitive::kPrimByte:
+            *restrictions |= kNoDiv;
+            return TrySetVectorLength(16);
+          case Primitive::kPrimChar:
+          case Primitive::kPrimShort:
+            *restrictions |= kNoDiv | kNoStringCharAt;
+            return TrySetVectorLength(8);
+          case Primitive::kPrimInt:
+            *restrictions |= kNoDiv;
+            return TrySetVectorLength(4);
+          case Primitive::kPrimLong:
+            *restrictions |= kNoDiv;
+            return TrySetVectorLength(2);
+          case Primitive::kPrimFloat:
+            *restrictions |= kNoMinMax;  // min/max(x, NaN)
+            return TrySetVectorLength(4);
+          case Primitive::kPrimDouble:
+            *restrictions |= kNoMinMax;  // min/max(x, NaN)
+            return TrySetVectorLength(2);
+          default:
+            break;
+        }  // switch type
+      }
       return false;
     case kMips64:
       if (features->AsMips64InstructionSetFeatures()->HasMsa()) {
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index 490e50c..f59bfb6 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -171,18 +171,31 @@
 #ifdef ART_ENABLE_CODEGEN_arm
 TEST_ISA(kThumb2)
 #endif
+
 #ifdef ART_ENABLE_CODEGEN_arm64
+// Run the tests for ARM64 only with Baker read barriers, as the
+// expected generated code saves and restore X21 and X22 (instead of
+// X20 and X21), as X20 is used as Marking Register in the Baker read
+// barrier configuration, and as such is removed from the set of
+// callee-save registers in the ARM64 code generator of the Optimizing
+// compiler.
+#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
 TEST_ISA(kArm64)
 #endif
+#endif
+
 #ifdef ART_ENABLE_CODEGEN_x86
 TEST_ISA(kX86)
 #endif
+
 #ifdef ART_ENABLE_CODEGEN_x86_64
 TEST_ISA(kX86_64)
 #endif
+
 #ifdef ART_ENABLE_CODEGEN_mips
 TEST_ISA(kMips)
 #endif
+
 #ifdef ART_ENABLE_CODEGEN_mips64
 TEST_ISA(kMips64)
 #endif
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index 60af2b4..abab431 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -31,21 +31,21 @@
 // 0x00000010: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kArm64[] = {
-    0xFF, 0x03, 0x01, 0xD1, 0xF4, 0x17, 0x00, 0xF9, 0xF5, 0x7B, 0x03, 0xA9,
-    0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF4, 0x17, 0x40, 0xF9,
-    0xF5, 0x7B, 0x43, 0xA9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
+    0xFF, 0x03, 0x01, 0xD1, 0xF5, 0x17, 0x00, 0xF9, 0xF6, 0x7B, 0x03, 0xA9,
+    0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF5, 0x17, 0x40, 0xF9,
+    0xF6, 0x7B, 0x43, 0xA9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
 };
 static constexpr uint8_t expected_cfi_kArm64[] = {
-    0x44, 0x0E, 0x40, 0x44, 0x94, 0x06, 0x44, 0x95, 0x04, 0x9E, 0x02, 0x44,
+    0x44, 0x0E, 0x40, 0x44, 0x95, 0x06, 0x44, 0x96, 0x04, 0x9E, 0x02, 0x44,
     0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49,
-    0x44, 0xD4, 0x44, 0xD5, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
+    0x44, 0xD5, 0x44, 0xD6, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
 };
 // 0x00000000: sub sp, sp, #0x40 (64)
 // 0x00000004: .cfi_def_cfa_offset: 64
-// 0x00000004: str x20, [sp, #40]
-// 0x00000008: .cfi_offset: r20 at cfa-24
-// 0x00000008: stp x21, lr, [sp, #48]
-// 0x0000000c: .cfi_offset: r21 at cfa-16
+// 0x00000004: str x21, [sp, #40]
+// 0x00000008: .cfi_offset: r21 at cfa-24
+// 0x00000008: stp x22, lr, [sp, #48]
+// 0x0000000c: .cfi_offset: r22 at cfa-16
 // 0x0000000c: .cfi_offset: r30 at cfa-8
 // 0x0000000c: stp d8, d9, [sp, #24]
 // 0x00000010: .cfi_offset_extended: r72 at cfa-40
@@ -54,10 +54,10 @@
 // 0x00000010: ldp d8, d9, [sp, #24]
 // 0x00000014: .cfi_restore_extended: r72
 // 0x00000014: .cfi_restore_extended: r73
-// 0x00000014: ldr x20, [sp, #40]
-// 0x00000018: .cfi_restore: r20
-// 0x00000018: ldp x21, lr, [sp, #48]
-// 0x0000001c: .cfi_restore: r21
+// 0x00000014: ldr x21, [sp, #40]
+// 0x00000018: .cfi_restore: r21
+// 0x00000018: ldp x22, lr, [sp, #48]
+// 0x0000001c: .cfi_restore: r22
 // 0x0000001c: .cfi_restore: r30
 // 0x0000001c: add sp, sp, #0x40 (64)
 // 0x00000020: .cfi_def_cfa_offset: 0
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index 9cd6884..c436fd9 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -772,6 +772,13 @@
   asm_.UnspillRegisters(core_reg_list, frame_size - core_reg_size);
   asm_.UnspillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size);
 
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // Refresh Mark Register.
+    // TODO: Refresh MR only if suspend is taken.
+    ___ Ldr(reg_w(MR),
+            MemOperand(reg_x(TR), Thread::IsGcMarkingOffset<kArm64PointerSize>().Int32Value()));
+  }
+
   // Decrease frame size to start of callee saved regs.
   DecreaseFrameSize(frame_size);
 
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 44b9bb4..c581f1c 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -2904,6 +2904,17 @@
                 static_cast<FRegister>(wt));
 }
 
+void MipsAssembler::ReplicateFPToVectorRegister(VectorRegister dst,
+                                                FRegister src,
+                                                bool is_double) {
+  // Float or double in FPU register Fx can be considered as 0th element in vector register Wx.
+  if (is_double) {
+    SplatiD(dst, static_cast<VectorRegister>(src), 0);
+  } else {
+    SplatiW(dst, static_cast<VectorRegister>(src), 0);
+  }
+}
+
 void MipsAssembler::LoadConst32(Register rd, int32_t value) {
   if (IsUint<16>(value)) {
     // Use OR with (unsigned) immediate to encode 16b unsigned int.
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index a229882..33803bb 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -612,6 +612,9 @@
   void IlvrW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void IlvrD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
 
+  // Helper for replicating floating point value in all destination elements.
+  void ReplicateFPToVectorRegister(VectorRegister dst, FRegister src, bool is_double);
+
   // Higher level composite instructions.
   void LoadConst32(Register rd, int32_t value);
   void LoadConst64(Register reg_hi, Register reg_lo, int64_t value);
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
index 1505eb5..b08b055 100644
--- a/dex2oat/dex2oat_test.cc
+++ b/dex2oat/dex2oat_test.cc
@@ -937,7 +937,7 @@
     return GetOdexDir() + "/Context.odex";
   }
 
-  const char* kEmptyClassPathKey = "";
+  const char* kEmptyClassPathKey = "PCL[]";
 };
 
 TEST_F(Dex2oatClassLoaderContextTest, InvalidContext) {
@@ -961,10 +961,10 @@
 
 TEST_F(Dex2oatClassLoaderContextTest, ContextWithOtherDexFiles) {
   std::vector<std::unique_ptr<const DexFile>> dex_files = OpenTestDexFiles("Nested");
-  std::string expected_classpath_key =
-      OatFile::EncodeDexFileDependencies(MakeNonOwningPointerVector(dex_files), "");
 
   std::string context = "PCL[" + dex_files[0]->GetLocation() + "]";
+  std::string expected_classpath_key = "PCL[" +
+      dex_files[0]->GetLocation() + "*" + std::to_string(dex_files[0]->GetLocationChecksum()) + "]";
   RunTest(context.c_str(), expected_classpath_key.c_str(), true);
 }
 
@@ -974,7 +974,7 @@
 
   std::string context = "PCL[" + stripped_classpath + "]";
   // Expect an empty context because stripped dex files cannot be open.
-  RunTest(context.c_str(), /*expected_classpath_key*/ "" , /*expected_success*/ true);
+  RunTest(context.c_str(), kEmptyClassPathKey , /*expected_success*/ true);
 }
 
 TEST_F(Dex2oatClassLoaderContextTest, ContextWithStrippedDexFilesBackedByOdex) {
@@ -993,19 +993,26 @@
   Copy(GetStrippedDexSrc1(), stripped_classpath);
 
   std::string context = "PCL[" + stripped_classpath + "]";
-  std::string expected_classpath;
+  std::string expected_classpath_key;
   {
     // Open the oat file to get the expected classpath.
     OatFileAssistant oat_file_assistant(stripped_classpath.c_str(), kRuntimeISA, false);
     std::unique_ptr<OatFile> oat_file(oat_file_assistant.GetBestOatFile());
     std::vector<std::unique_ptr<const DexFile>> oat_dex_files =
         OatFileAssistant::LoadDexFiles(*oat_file, stripped_classpath.c_str());
-    expected_classpath = OatFile::EncodeDexFileDependencies(
-        MakeNonOwningPointerVector(oat_dex_files), "");
+    expected_classpath_key = "PCL[";
+    for (size_t i = 0; i < oat_dex_files.size(); i++) {
+      if (i > 0) {
+        expected_classpath_key + ":";
+      }
+      expected_classpath_key += oat_dex_files[i]->GetLocation() + "*" +
+          std::to_string(oat_dex_files[i]->GetLocationChecksum());
+    }
+    expected_classpath_key += "]";
   }
 
   RunTest(context.c_str(),
-          expected_classpath.c_str(),
+          expected_classpath_key.c_str(),
           /*expected_success*/ true,
           /*use_second_source*/ true);
 }
diff --git a/profman/profile_assistant_test.cc b/profman/profile_assistant_test.cc
index 75f8ec9..c78d34e 100644
--- a/profman/profile_assistant_test.cc
+++ b/profman/profile_assistant_test.cc
@@ -639,10 +639,13 @@
   // Method that doesn't add the class since its only in one profile. Should still show up in the
   // boot profile.
   const std::string kOtherMethod = "Ljava/util/HashMap;-><init>()V";
+  // Method that gets marked as hot since it's in multiple profiles.
+  const std::string kMultiMethod = "Ljava/util/ArrayList;->clear()V";
 
   // Thresholds for this test.
   static const size_t kDirtyThreshold = 3;
   static const size_t kCleanThreshold = 2;
+  static const size_t kMethodThreshold = 2;
 
   // Create a bunch of boot profiles.
   std::string dex1 =
@@ -659,6 +662,7 @@
       kCleanClass + "\n" +
       kDirtyClass + "\n" +
       "P" + kHotMethod + "\n" +
+      "P" + kMultiMethod + "\n" +
       kUncommonDirtyClass;
   profiles.emplace_back(ScratchFile());
   EXPECT_TRUE(CreateProfile(dex2, profiles.back().GetFilename(), core_dex));
@@ -667,6 +671,7 @@
   std::string dex3 =
       "S" + kHotMethod + "\n" +
       "P" + kOtherMethod + "\n" +
+      "P" + kMultiMethod + "\n" +
       kDirtyClass + "\n";
   profiles.emplace_back(ScratchFile());
   EXPECT_TRUE(CreateProfile(dex3, profiles.back().GetFilename(), core_dex));
@@ -678,6 +683,7 @@
   args.push_back("--generate-boot-image-profile");
   args.push_back("--boot-image-class-threshold=" + std::to_string(kDirtyThreshold));
   args.push_back("--boot-image-clean-class-threshold=" + std::to_string(kCleanThreshold));
+  args.push_back("--boot-image-sampled-method-threshold=" + std::to_string(kMethodThreshold));
   args.push_back("--reference-profile-file=" + out_profile.GetFilename());
   args.push_back("--apk=" + core_dex);
   args.push_back("--dex-location=" + core_dex);
@@ -708,11 +714,18 @@
   // Aggregated methods hotness information.
   EXPECT_NE(output_file_contents.find("HSP" + kHotMethod), std::string::npos)
       << output_file_contents;
-  EXPECT_NE(output_file_contents.find(kOtherMethod), std::string::npos)
+  EXPECT_NE(output_file_contents.find("P" + kOtherMethod), std::string::npos)
       << output_file_contents;
   // Not inferred class, method is only in one profile.
   EXPECT_EQ(output_file_contents.find("Ljava/util/HashMap;\n"), std::string::npos)
       << output_file_contents;
+  // Test the sampled methods that became hot.
+  // Other method is in only one profile, it should not become hot.
+  EXPECT_EQ(output_file_contents.find("HP" + kOtherMethod), std::string::npos)
+      << output_file_contents;
+  // Multi method is in at least two profiles, it should become hot.
+  EXPECT_NE(output_file_contents.find("HP" + kMultiMethod), std::string::npos)
+      << output_file_contents;
 }
 
 TEST_F(ProfileAssistantTest, TestProfileCreationOneNotMatched) {
diff --git a/profman/profman.cc b/profman/profman.cc
index 94e81c7..6c8ca56 100644
--- a/profman/profman.cc
+++ b/profman/profman.cc
@@ -142,6 +142,9 @@
   UsageError("      occurrences to include a class in the boot image profile. A clean class is a");
   UsageError("      class that doesn't have any static fields or native methods and is likely to");
   UsageError("      remain clean in the image. Default is 3.");
+  UsageError("  --boot-image-sampled-method-threshold=<value>: minimum number of profiles a");
+  UsageError("      non-hot method needs to be in order to be hot in the output profile. The");
+  UsageError("      default is max int.");
   UsageError("");
 
   exit(EXIT_FAILURE);
@@ -225,6 +228,11 @@
                         "--boot-image-clean-class-threshold",
                         &boot_image_options_.image_class_clean_theshold,
                         Usage);
+      } else if (option.starts_with("--boot-image-sampled-method-threshold=")) {
+        ParseUintOption(option,
+                        "--boot-image-sampled-method-threshold",
+                        &boot_image_options_.compiled_method_threshold,
+                        Usage);
       } else if (option.starts_with("--profile-file=")) {
         profile_files_.push_back(option.substr(strlen("--profile-file=")).ToString());
       } else if (option.starts_with("--profile-file-fd=")) {
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 676efc4..7e6e574 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -227,6 +227,7 @@
     .cfi_restore r1
     .cfi_restore r2
     .cfi_restore r3
+    .cfi_restore r4
     .cfi_restore r5
     .cfi_restore r6
     .cfi_restore r7
@@ -251,6 +252,7 @@
     .cfi_restore r1
     .cfi_restore r2
     .cfi_restore r3
+    .cfi_restore r4
     .cfi_restore r5
     .cfi_restore r6
     .cfi_restore r7
@@ -1331,7 +1333,7 @@
 // r0: type r1: component_count r2: total_size r9: Thread::Current, r3, r12: free.
 // Need to preserve r0 and r1 to the slow path.
 .macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE slowPathLabel
-    and    r2, r2, #OBJECT_ALIGNMENT_MASK_TOGGLED             // Apply alignemnt mask
+    and    r2, r2, #OBJECT_ALIGNMENT_MASK_TOGGLED             // Apply alignment mask
                                                               // (addr + 7) & ~7.
 
                                                               // Load thread_local_pos (r3) and
diff --git a/runtime/arch/arm64/asm_support_arm64.S b/runtime/arch/arm64/asm_support_arm64.S
index bcf55e3..715fc35 100644
--- a/runtime/arch/arm64/asm_support_arm64.S
+++ b/runtime/arch/arm64/asm_support_arm64.S
@@ -33,6 +33,12 @@
 #define xIP1 x17
 #define wIP1 w17
 
+#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
+// Marking Register, holding Thread::Current()->GetIsGcMarking().
+// Only used with the Concurrent Copying (CC) garbage
+// collector, with the Baker read barrier configuration.
+#define wMR w20
+#endif
 
 .macro ENTRY name
     .type \name, #function
@@ -55,14 +61,14 @@
     END \name
 .endm
 
-// Macros to poison (negate) the reference for heap poisoning.
+// Macro to poison (negate) the reference for heap poisoning.
 .macro POISON_HEAP_REF rRef
 #ifdef USE_HEAP_POISONING
     neg \rRef, \rRef
 #endif  // USE_HEAP_POISONING
 .endm
 
-// Macros to unpoison (negate) the reference for heap poisoning.
+// Macro to unpoison (negate) the reference for heap poisoning.
 .macro UNPOISON_HEAP_REF rRef
 #ifdef USE_HEAP_POISONING
     neg \rRef, \rRef
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index 0465c1e..0f0814a 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -137,7 +137,9 @@
   for (size_t i = 0; i < kNumberOfDRegisters; ++i) {
     fprs[i] = fprs_[i] != nullptr ? *fprs_[i] : Arm64Context::kBadFprBase + i;
   }
+  // Ensure the Thread Register contains the address of the current thread.
   DCHECK_EQ(reinterpret_cast<uintptr_t>(Thread::Current()), gprs[TR]);
+  // The Marking Register will be updated by art_quick_do_long_jump.
   art_quick_do_long_jump(gprs, fprs);
 }
 
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 138dbf9..e097a33 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -39,6 +39,18 @@
     .cfi_restore \reg
 .endm
 
+.macro SAVE_REG_INCREASE_FRAME reg, frame_adjustment
+    str \reg, [sp, #-(\frame_adjustment)]!
+    .cfi_adjust_cfa_offset (\frame_adjustment)
+    .cfi_rel_offset \reg, 0
+.endm
+
+.macro RESTORE_REG_DECREASE_FRAME reg, frame_adjustment
+    ldr \reg, [sp], #(\frame_adjustment)
+    .cfi_restore \reg
+    .cfi_adjust_cfa_offset -(\frame_adjustment)
+.endm
+
 .macro SAVE_TWO_REGS reg1, reg2, offset
     stp \reg1, \reg2, [sp, #(\offset)]
     .cfi_rel_offset \reg1, (\offset)
@@ -140,6 +152,9 @@
     SAVE_TWO_REGS x29, xLR, 80
 
     // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsOnly].
+    // Note: We could avoid saving X20 in the case of Baker read
+    // barriers, as it is overwritten by REFRESH_MARKING_REGISTER
+    // later; but it's not worth handling this special case.
     stp xIP0, x20, [sp]
     .cfi_rel_offset x20, 8
 
@@ -151,6 +166,9 @@
 // TODO: Probably no need to restore registers preserved by aapcs64.
 .macro RESTORE_SAVE_REFS_ONLY_FRAME
     // Callee-saves.
+    // Note: Likewise, we could avoid restoring X20 in the case of Baker
+    // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER
+    // later; but it's not worth handling this special case.
     RESTORE_REG x20, 8
     RESTORE_TWO_REGS x21, x22, 16
     RESTORE_TWO_REGS x23, x24, 32
@@ -165,11 +183,6 @@
     DECREASE_FRAME 96
 .endm
 
-.macro RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
-    RESTORE_SAVE_REFS_ONLY_FRAME
-    ret
-.endm
-
 
 .macro SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
     INCREASE_FRAME 224
@@ -192,6 +205,9 @@
     SAVE_TWO_REGS x5, x6, 112
 
     // x7, Callee-saves.
+    // Note: We could avoid saving X20 in the case of Baker read
+    // barriers, as it is overwritten by REFRESH_MARKING_REGISTER
+    // later; but it's not worth handling this special case.
     SAVE_TWO_REGS x7, x20, 128
     SAVE_TWO_REGS x21, x22, 144
     SAVE_TWO_REGS x23, x24, 160
@@ -250,6 +266,9 @@
     RESTORE_TWO_REGS x5, x6, 112
 
     // x7, Callee-saves.
+    // Note: Likewise, we could avoid restoring X20 in the case of Baker
+    // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER
+    // later; but it's not worth handling this special case.
     RESTORE_TWO_REGS x7, x20, 128
     RESTORE_TWO_REGS x21, x22, 144
     RESTORE_TWO_REGS x23, x24, 160
@@ -358,7 +377,7 @@
     ldp d29, d30, [sp, #240]
     ldr d31,      [sp, #256]
 
-    // Restore core registers.
+    // Restore core registers, except x0.
     RESTORE_TWO_REGS  x1,  x2, 272
     RESTORE_TWO_REGS  x3,  x4, 288
     RESTORE_TWO_REGS  x5,  x6, 304
@@ -379,10 +398,21 @@
 .endm
 
 .macro RESTORE_SAVE_EVERYTHING_FRAME
-    RESTORE_REG            x0, 264
+    RESTORE_REG       x0,      264
     RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
 .endm
 
+// Macro to refresh the Marking Register (W20).
+//
+// This macro must be called at the end of functions implementing
+// entrypoints that possibly (directly or indirectly) perform a
+// suspend check (before they return).
+.macro REFRESH_MARKING_REGISTER
+#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
+    ldr wMR, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
+#endif
+.endm
+
 .macro RETURN_IF_RESULT_IS_ZERO
     cbnz x0, 1f                // result non-zero branch over
     ret                        // return
@@ -562,6 +592,7 @@
     bl     \cxx_name                      // (method_idx, this, Thread*, SP)
     mov    xIP0, x1                       // save Method*->code_
     RESTORE_SAVE_REFS_AND_ARGS_FRAME
+    REFRESH_MARKING_REGISTER
     cbz    x0, 1f                         // did we find the target? if not go to exception delivery
     br     xIP0                           // tail call to target
 1:
@@ -661,13 +692,15 @@
 
 .macro INVOKE_STUB_CALL_AND_RETURN
 
+    REFRESH_MARKING_REGISTER
+
     // load method-> METHOD_QUICK_CODE_OFFSET
     ldr x9, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
     // Branch to method.
     blr x9
 
     // Restore return value address and shorty address.
-    ldp x4,x5, [xFP, #16]
+    ldp x4, x5, [xFP, #16]
     .cfi_restore x4
     .cfi_restore x5
 
@@ -1046,6 +1079,7 @@
     stp x3, x4, [sp, #16]                 // Save result and shorty addresses.
     stp xFP, xLR, [sp]                    // Store LR & FP.
     mov xSELF, x5                         // Move thread pointer into SELF register.
+    REFRESH_MARKING_REGISTER
 
     sub sp, sp, #16
     str xzr, [sp]                         // Store null for ArtMethod* slot
@@ -1152,7 +1186,7 @@
     ldp x24, x25, [x0], #-16
     ldp x22, x23, [x0], #-16
     ldp x20, x21, [x0], #-16
-    ldp x18, x19, [x0], #-16
+    ldp x18, x19, [x0], #-16         // X18 & xSELF
     ldp x16, x17, [x0], #-16
     ldp x14, x15, [x0], #-16
     ldp x12, x13, [x0], #-16
@@ -1163,6 +1197,8 @@
     ldp x2, x3, [x0], #-16
     mov sp, x1
 
+    REFRESH_MARKING_REGISTER
+
     // Need to load PC, it's at the end (after the space for the unused XZR). Use x1.
     ldr x1, [x0, #33*8]
     // And the value of x0.
@@ -1213,6 +1249,7 @@
     mov    x1, xSELF                  // pass Thread::Current
     bl     artLockObjectFromCode      // (Object* obj, Thread*)
     RESTORE_SAVE_REFS_ONLY_FRAME
+    REFRESH_MARKING_REGISTER
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_lock_object
 
@@ -1221,6 +1258,7 @@
     mov    x1, xSELF                  // pass Thread::Current
     bl     artLockObjectFromCode      // (Object* obj, Thread*)
     RESTORE_SAVE_REFS_ONLY_FRAME
+    REFRESH_MARKING_REGISTER
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_lock_object_no_inline
 
@@ -1275,6 +1313,7 @@
     mov    x1, xSELF                  // pass Thread::Current
     bl     artUnlockObjectFromCode    // (Object* obj, Thread*)
     RESTORE_SAVE_REFS_ONLY_FRAME
+    REFRESH_MARKING_REGISTER
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_unlock_object
 
@@ -1283,6 +1322,7 @@
     mov    x1, xSELF                  // pass Thread::Current
     bl     artUnlockObjectFromCode    // (Object* obj, Thread*)
     RESTORE_SAVE_REFS_ONLY_FRAME
+    REFRESH_MARKING_REGISTER
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_unlock_object_no_inline
 
@@ -1356,7 +1396,7 @@
      */
 .macro READ_BARRIER xDest, wDest, xObj, xTemp, wTemp, offset, number
 #ifdef USE_READ_BARRIER
-#ifdef USE_BAKER_READ_BARRIER
+# ifdef USE_BAKER_READ_BARRIER
     ldr \wTemp, [\xObj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
     tbnz \wTemp, #LOCK_WORD_READ_BARRIER_STATE_SHIFT, .Lrb_slowpath\number
     // False dependency to avoid needing load/load fence.
@@ -1364,7 +1404,7 @@
     ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
     UNPOISON_HEAP_REF \wDest
     b .Lrb_exit\number
-#endif
+# endif  // USE_BAKER_READ_BARRIER
 .Lrb_slowpath\number:
     // Store registers used in art_quick_aput_obj (x0-x4, LR), stack is 16B aligned.
     SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 48
@@ -1471,6 +1511,7 @@
     mov    x1, xSELF                  // pass Thread::Current
     bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
     RESTORE_SAVE_REFS_ONLY_FRAME
+    REFRESH_MARKING_REGISTER
     \return
 END \name
 .endm
@@ -1483,6 +1524,7 @@
     mov    x2, xSELF                  // pass Thread::Current
     bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
     RESTORE_SAVE_REFS_ONLY_FRAME
+    REFRESH_MARKING_REGISTER
     \return
 END \name
 .endm
@@ -1495,6 +1537,7 @@
     mov    x3, xSELF                  // pass Thread::Current
     bl     \entrypoint
     RESTORE_SAVE_REFS_ONLY_FRAME
+    REFRESH_MARKING_REGISTER
     \return
 END \name
 .endm
@@ -1507,8 +1550,8 @@
     mov    x4, xSELF                  // pass Thread::Current
     bl     \entrypoint                //
     RESTORE_SAVE_REFS_ONLY_FRAME
+    REFRESH_MARKING_REGISTER
     \return
-    DELIVER_PENDING_EXCEPTION
 END \name
 .endm
 
@@ -1520,6 +1563,7 @@
     mov    x1, xSELF                  // pass Thread::Current
     bl     \entrypoint                // (uint32_t type_idx, Thread*)
     RESTORE_SAVE_REFS_ONLY_FRAME
+    REFRESH_MARKING_REGISTER
     \return
 END \name
 .endm
@@ -1531,6 +1575,7 @@
     mov    x2, xSELF                  // pass Thread::Current
     bl     \entrypoint
     RESTORE_SAVE_REFS_ONLY_FRAME
+    REFRESH_MARKING_REGISTER
     \return
 END \name
 .endm
@@ -1542,6 +1587,7 @@
     mov    x3, xSELF                  // pass Thread::Current
     bl     \entrypoint
     RESTORE_SAVE_REFS_ONLY_FRAME
+    REFRESH_MARKING_REGISTER
     \return
 END \name
 .endm
@@ -1556,6 +1602,7 @@
     cbz   w0, 1f                      // If result is null, deliver the OOME.
     .cfi_remember_state
     RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
+    REFRESH_MARKING_REGISTER
     ret                        // return
     .cfi_restore_state
     .cfi_def_cfa_offset FRAME_SIZE_SAVE_EVERYTHING  // workaround for clang bug: 31975598
@@ -1588,6 +1635,9 @@
 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode
 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
 
+// Note: Functions `art{Get,Set}<Kind>{Static,Instance>FromCompiledCode` are
+// defined by macros in runtime/entrypoints/quick/quick_field_entrypoints.cc.
+
 ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
 ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
 ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
@@ -1752,6 +1802,7 @@
     mov    x1, xSELF                                // pass Thread::Current
     bl     \cxx_name
     RESTORE_SAVE_REFS_ONLY_FRAME
+    REFRESH_MARKING_REGISTER
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 END \c_name
 .endm
@@ -1815,6 +1866,7 @@
     mov    x1, xSELF                           // Pass Thread::Current.
     bl     \entrypoint                         // (mirror::Class*, Thread*)
     RESTORE_SAVE_REFS_ONLY_FRAME
+    REFRESH_MARKING_REGISTER
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 END \name
 .endm
@@ -1825,7 +1877,7 @@
 GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_tlab, artAllocObjectFromCodeInitializedTLAB, /* isInitialized */ 1
 
 .macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
-    and    \xTemp1, \xTemp1, #OBJECT_ALIGNMENT_MASK_TOGGLED64 // Apply alignemnt mask
+    and    \xTemp1, \xTemp1, #OBJECT_ALIGNMENT_MASK_TOGGLED64 // Apply alignment mask
                                                               // (addr + 7) & ~7. The mask must
                                                               // be 64 bits to keep high bits in
                                                               // case of overflow.
@@ -1887,6 +1939,7 @@
     mov    x2, xSELF                  // pass Thread::Current
     bl     \entrypoint
     RESTORE_SAVE_REFS_ONLY_FRAME
+    REFRESH_MARKING_REGISTER
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 END \name
 .endm
@@ -1937,8 +1990,8 @@
     add    \xTemp1, \xTemp1, #(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
 .endm
 
-# TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm64, remove
-# the entrypoint once all backends have been updated to use the size variants.
+// TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm64, remove
+// the entrypoint once all backends have been updated to use the size variants.
 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
@@ -1959,6 +2012,7 @@
     mov    x0, xSELF
     bl     artTestSuspendFromCode             // (Thread*)
     RESTORE_SAVE_EVERYTHING_FRAME
+    REFRESH_MARKING_REGISTER
     ret
 END art_quick_test_suspend
 
@@ -1966,7 +2020,9 @@
     mov    x0, xSELF
     SETUP_SAVE_REFS_ONLY_FRAME                // save callee saves for stack crawl
     bl     artTestSuspendFromCode             // (Thread*)
-    RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    REFRESH_MARKING_REGISTER
+    ret
 END art_quick_implicit_suspend
 
      /*
@@ -1983,6 +2039,7 @@
     ldr     x2, [xSELF, THREAD_EXCEPTION_OFFSET]
     cbnz    x2, .Lexception_in_proxy    // success if no exception is pending
     RESTORE_SAVE_REFS_AND_ARGS_FRAME    // Restore frame
+    REFRESH_MARKING_REGISTER
     fmov    d0, x0                      // Store result in d0 in case it was float or double
     ret                                 // return on success
 .Lexception_in_proxy:
@@ -2035,6 +2092,7 @@
     mov xIP0, x0            // Remember returned code pointer in xIP0.
     ldr x0, [sp, #0]        // artQuickResolutionTrampoline puts called method in *SP.
     RESTORE_SAVE_REFS_AND_ARGS_FRAME
+    REFRESH_MARKING_REGISTER
     br xIP0
 1:
     RESTORE_SAVE_REFS_AND_ARGS_FRAME
@@ -2170,6 +2228,7 @@
 
     // Tear down the callee-save frame.
     RESTORE_SAVE_REFS_AND_ARGS_FRAME
+    REFRESH_MARKING_REGISTER
 
     // store into fpr, for when it's a fpr return...
     fmov d0, x0
@@ -2202,6 +2261,7 @@
     bl   artQuickToInterpreterBridge
 
     RESTORE_SAVE_REFS_AND_ARGS_FRAME       // TODO: no need to restore arguments in this case.
+    REFRESH_MARKING_REGISTER
 
     fmov d0, x0
 
@@ -2231,6 +2291,7 @@
     mov   x0, x20             // Reload method reference.
 
     RESTORE_SAVE_REFS_AND_ARGS_FRAME  // Note: will restore xSELF
+    REFRESH_MARKING_REGISTER
     cbz   xIP0, 1f            // Deliver the pending exception if method is null.
     adr   xLR, art_quick_instrumentation_exit
     br    xIP0                // Tail-call method with lr set to art_quick_instrumentation_exit.
@@ -2263,6 +2324,7 @@
     .cfi_adjust_cfa_offset -16
 
     RESTORE_SAVE_REFS_ONLY_FRAME
+    REFRESH_MARKING_REGISTER
     cbz   xIP0, 1f            // Handle error
     br    xIP0                // Tail-call out.
 1:
@@ -2831,6 +2893,7 @@
 .Lcleanup_and_return:
     DECREASE_FRAME 16
     RESTORE_SAVE_REFS_AND_ARGS_FRAME
+    REFRESH_MARKING_REGISTER
     RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
 
     .section    .rodata                           // Place handler table in read-only section away from text.
diff --git a/runtime/arch/arm64/registers_arm64.h b/runtime/arch/arm64/registers_arm64.h
index 4683fc3..d4c9192 100644
--- a/runtime/arch/arm64/registers_arm64.h
+++ b/runtime/arch/arm64/registers_arm64.h
@@ -61,6 +61,7 @@
   kNumberOfXRegisters = 33,
   // Aliases.
   TR  = X19,     // ART Thread Register - Managed Runtime (Callee Saved Reg)
+  MR  = X20,     // ART Marking Register - Managed Runtime (Callee Saved Reg)
   IP0 = X16,     // Used as scratch by VIXL.
   IP1 = X17,     // Used as scratch by ART JNI Assembler.
   FP  = X29,
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
index 2b3525b..fbfa756 100644
--- a/runtime/arch/quick_alloc_entrypoints.S
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -53,7 +53,7 @@
 .endm
 
 // Generate the allocation entrypoints for each allocator. This is used as an alternative to
-// GNERATE_ALL_ALLOC_ENTRYPOINTS for selectively implementing allocation fast paths in
+// GENERATE_ALL_ALLOC_ENTRYPOINTS for selectively implementing allocation fast paths in
 // hand-written assembly.
 #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(c_suffix, cxx_suffix) \
   ONE_ARG_DOWNCALL art_quick_alloc_object_resolved ## c_suffix, artAllocObjectFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
diff --git a/runtime/class_loader_context.cc b/runtime/class_loader_context.cc
index 5cbcd8f..2bed1d5 100644
--- a/runtime/class_loader_context.cc
+++ b/runtime/class_loader_context.cc
@@ -31,8 +31,9 @@
 static constexpr char kDelegateLastClassLoaderString[] = "DLC";
 static constexpr char kClassLoaderOpeningMark = '[';
 static constexpr char kClassLoaderClosingMark = ']';
-static constexpr char kClassLoaderSep = ';';
-static constexpr char kClasspathSep = ':';
+static constexpr char kClassLoaderSeparator = ';';
+static constexpr char kClasspathSeparator = ':';
+static constexpr char kDexFileChecksumSeparator = '*';
 
 ClassLoaderContext::ClassLoaderContext()
     : special_shared_library_(false),
@@ -48,9 +49,11 @@
   }
 }
 
-// The expected format is: "ClassLoaderType1[ClasspathElem1:ClasspathElem2...]".
+// The expected format is: "ClassLoaderType1[ClasspathElem1*Checksum1:ClasspathElem2*Checksum2...]".
+// The checksum part of the format is expected only if parse_cheksums is true.
 bool ClassLoaderContext::ParseClassLoaderSpec(const std::string& class_loader_spec,
-                                              ClassLoaderType class_loader_type) {
+                                              ClassLoaderType class_loader_type,
+                                              bool parse_checksums) {
   const char* class_loader_type_str = GetClassLoaderTypeName(class_loader_type);
   size_t type_str_size = strlen(class_loader_type_str);
 
@@ -70,7 +73,26 @@
                                                    class_loader_spec.length() - type_str_size - 2);
 
   class_loader_chain_.push_back(ClassLoaderInfo(class_loader_type));
-  Split(classpath, kClasspathSep, &class_loader_chain_.back().classpath);
+
+  if (!parse_checksums) {
+    Split(classpath, kClasspathSeparator, &class_loader_chain_.back().classpath);
+  } else {
+    std::vector<std::string> classpath_elements;
+    Split(classpath, kClasspathSeparator, &classpath_elements);
+    for (const std::string& element : classpath_elements) {
+      std::vector<std::string> dex_file_with_checksum;
+      Split(element, kDexFileChecksumSeparator, &dex_file_with_checksum);
+      if (dex_file_with_checksum.size() != 2) {
+        return false;
+      }
+      uint32_t checksum = 0;
+      if (!ParseInt(dex_file_with_checksum[1].c_str(), &checksum)) {
+        return false;
+      }
+      class_loader_chain_.back().classpath.push_back(dex_file_with_checksum[0]);
+      class_loader_chain_.back().checksums.push_back(checksum);
+    }
+  }
 
   return true;
 }
@@ -93,11 +115,11 @@
 // The format: ClassLoaderType1[ClasspathElem1:ClasspathElem2...];ClassLoaderType2[...]...
 // ClassLoaderType is either "PCL" (PathClassLoader) or "DLC" (DelegateLastClassLoader).
 // ClasspathElem is the path of dex/jar/apk file.
-bool ClassLoaderContext::Parse(const std::string& spec) {
+bool ClassLoaderContext::Parse(const std::string& spec, bool parse_checksums) {
   if (spec.empty()) {
-    LOG(ERROR) << "Empty string passed to Parse";
-    return false;
+    return true;
   }
+
   // Stop early if we detect the special shared library, which may be passed as the classpath
   // for dex2oat when we want to skip the shared libraries check.
   if (spec == OatFile::kSpecialSharedLibrary) {
@@ -107,7 +129,7 @@
   }
 
   std::vector<std::string> class_loaders;
-  Split(spec, kClassLoaderSep, &class_loaders);
+  Split(spec, kClassLoaderSeparator, &class_loaders);
 
   for (const std::string& class_loader : class_loaders) {
     ClassLoaderType type = ExtractClassLoaderType(class_loader);
@@ -115,7 +137,7 @@
       LOG(ERROR) << "Invalid class loader type: " << class_loader;
       return false;
     }
-    if (!ParseClassLoaderSpec(class_loader, type)) {
+    if (!ParseClassLoaderSpec(class_loader, type, parse_checksums)) {
       LOG(ERROR) << "Invalid class loader spec: " << class_loader;
       return false;
     }
@@ -219,12 +241,33 @@
     return "";
   }
 
-  // TODO(calin): Transition period: assume we only have a classloader until
-  // the oat file assistant implements the full class loader check.
-  CHECK_EQ(1u, class_loader_chain_.size());
+  std::ostringstream out;
 
-  return OatFile::EncodeDexFileDependencies(MakeNonOwningPointerVector(
-      class_loader_chain_[0].opened_dex_files), base_dir);
+  for (size_t i = 0; i < class_loader_chain_.size(); i++) {
+    const ClassLoaderInfo& info = class_loader_chain_[i];
+    if (i > 0) {
+      out << kClassLoaderSeparator;
+    }
+    out << GetClassLoaderTypeName(info.type);
+    out << kClassLoaderOpeningMark;
+    for (size_t k = 0; k < info.opened_dex_files.size(); k++) {
+      const std::unique_ptr<const DexFile>& dex_file = info.opened_dex_files[k];
+      const std::string& location = dex_file->GetLocation();
+      if (k > 0) {
+        out << kClasspathSeparator;
+      }
+      // Find paths that were relative and convert them back from absolute.
+      if (!base_dir.empty() && location.substr(0, base_dir.length()) == base_dir) {
+        out << location.substr(base_dir.length() + 1).c_str();
+      } else {
+        out << dex_file->GetLocation().c_str();
+      }
+      out << kDexFileChecksumSeparator;
+      out << dex_file->GetLocationChecksum();
+    }
+    out << kClassLoaderClosingMark;
+  }
+  return out.str();
 }
 
 jobject ClassLoaderContext::CreateClassLoader(
@@ -281,5 +324,37 @@
       << "Dex files were not successfully opened before the call to " << calling_method
       << "attempt=" << dex_files_open_attempted_ << ", result=" << dex_files_open_result_;
 }
+
+bool ClassLoaderContext::DecodePathClassLoaderContextFromOatFileKey(
+    const std::string& context_spec,
+    std::vector<std::string>* out_classpath,
+    std::vector<uint32_t>* out_checksums,
+    bool* out_is_special_shared_library) {
+  ClassLoaderContext context;
+  if (!context.Parse(context_spec, /*parse_checksums*/ true)) {
+    LOG(ERROR) << "Invalid class loader context: " << context_spec;
+    return false;
+  }
+
+  *out_is_special_shared_library = context.special_shared_library_;
+  if (context.special_shared_library_) {
+    return true;
+  }
+
+  if (context.class_loader_chain_.empty()) {
+    return true;
+  }
+
+  // TODO(calin): assert that we only have a PathClassLoader until the logic for
+  // checking the context covers all case.
+  CHECK_EQ(1u, context.class_loader_chain_.size());
+  const ClassLoaderInfo& info = context.class_loader_chain_[0];
+  CHECK_EQ(kPathClassLoader, info.type);
+  DCHECK_EQ(info.classpath.size(), info.checksums.size());
+
+  *out_classpath = info.classpath;
+  *out_checksums = info.checksums;
+  return true;
+}
 }  // namespace art
 
diff --git a/runtime/class_loader_context.h b/runtime/class_loader_context.h
index 4af5017..9727a3b 100644
--- a/runtime/class_loader_context.h
+++ b/runtime/class_loader_context.h
@@ -59,6 +59,8 @@
   // The compilation sources are appended to the classpath of the top class loader
   // (i.e the class loader whose parent is the BootClassLoader).
   // Should only be called if OpenDexFiles() returned true.
+  // If the context is empty, this method only creates a single PathClassLoader with the
+  // given compilation_sources.
   jobject CreateClassLoader(const std::vector<const DexFile*>& compilation_sources) const;
 
   // Encodes the context as a string suitable to be added in oat files.
@@ -80,6 +82,17 @@
   // class loader for the source dex files.
   static std::unique_ptr<ClassLoaderContext> Create(const std::string& spec);
 
+  // Decodes the class loader context stored in the oat file with EncodeContextForOatFile.
+  // Returns true if the format matches, or false otherwise. If the return is true, the out
+  // arguments will contain the classpath dex files, their checksums and whether or not the
+  // context is a special shared library.
+  // The method asserts that the context is made out of only one PathClassLoader.
+  static bool DecodePathClassLoaderContextFromOatFileKey(
+      const std::string& context_spec,
+      std::vector<std::string>* out_classpath,
+      std::vector<uint32_t>* out_checksums,
+      bool* out_is_special_shared_library);
+
  private:
   enum ClassLoaderType {
     kInvalidClassLoader = 0,
@@ -93,6 +106,9 @@
     // The list of class path elements that this loader loads.
     // Note that this list may contain relative paths.
     std::vector<std::string> classpath;
+    // The list of class path elements checksums.
+    // May be empty if the checksums are not given when the context is created.
+    std::vector<uint32_t> checksums;
     // After OpenDexFiles is called this holds the opened dex files.
     std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
     // After OpenDexFiles, in case some of the dex files were opened from their oat files
@@ -104,13 +120,14 @@
 
   // Reads the class loader spec in place and returns true if the spec is valid and the
   // compilation context was constructed.
-  bool Parse(const std::string& spec);
+  bool Parse(const std::string& spec, bool parse_checksums = false);
 
   // Attempts to parse a single class loader spec for the given class_loader_type.
   // If successful the class loader spec will be added to the chain.
   // Returns whether or not the operation was successful.
   bool ParseClassLoaderSpec(const std::string& class_loader_spec,
-                            ClassLoaderType class_loader_type);
+                            ClassLoaderType class_loader_type,
+                            bool parse_checksums = false);
 
   // Extracts the class loader type from the given spec.
   // Return ClassLoaderContext::kInvalidClassLoader if the class loader type is not
diff --git a/runtime/class_loader_context_test.cc b/runtime/class_loader_context_test.cc
index 4643e78..03eb0e4 100644
--- a/runtime/class_loader_context_test.cc
+++ b/runtime/class_loader_context_test.cc
@@ -230,6 +230,42 @@
   }
 }
 
+TEST_F(ClassLoaderContextTest, CreateClassLoaderWithEmptyContext) {
+  std::unique_ptr<ClassLoaderContext> context =
+      ClassLoaderContext::Create("");
+  ASSERT_TRUE(context->OpenDexFiles(InstructionSet::kArm, ""));
+
+  std::vector<std::unique_ptr<const DexFile>> compilation_sources = OpenTestDexFiles("MultiDex");
+
+  std::vector<const DexFile*> compilation_sources_raw =
+      MakeNonOwningPointerVector(compilation_sources);
+  jobject jclass_loader = context->CreateClassLoader(compilation_sources_raw);
+  ASSERT_TRUE(jclass_loader != nullptr);
+
+  ScopedObjectAccess soa(Thread::Current());
+
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader = hs.NewHandle(
+      soa.Decode<mirror::ClassLoader>(jclass_loader));
+
+  ASSERT_TRUE(class_loader->GetClass() ==
+      soa.Decode<mirror::Class>(WellKnownClasses::dalvik_system_PathClassLoader));
+  ASSERT_TRUE(class_loader->GetParent()->GetClass() ==
+      soa.Decode<mirror::Class>(WellKnownClasses::java_lang_BootClassLoader));
+
+
+  std::vector<const DexFile*> class_loader_dex_files = GetDexFiles(jclass_loader);
+
+  // The compilation sources should be the only files present in the class loader
+  ASSERT_EQ(compilation_sources.size(), class_loader_dex_files.size());
+  for (size_t i = 0; i < compilation_sources.size(); i++) {
+    ASSERT_EQ(compilation_sources[i]->GetLocation(),
+        class_loader_dex_files[i]->GetLocation());
+    ASSERT_EQ(compilation_sources[i]->GetLocationChecksum(),
+        class_loader_dex_files[i]->GetLocationChecksum());
+  }
+}
+
 TEST_F(ClassLoaderContextTest, RemoveSourceLocations) {
   std::unique_ptr<ClassLoaderContext> context =
       ClassLoaderContext::Create("PCL[a.dex]");
@@ -256,10 +292,46 @@
   std::vector<std::unique_ptr<const DexFile>> dex1 = OpenTestDexFiles("Main");
   std::vector<std::unique_ptr<const DexFile>> dex2 = OpenTestDexFiles("MyClass");
   std::string encoding = context->EncodeContextForOatFile("");
-  std::string expected_encoding =
-      dex1[0]->GetLocation() + "*" + std::to_string(dex1[0]->GetLocationChecksum()) + "*" +
-      dex2[0]->GetLocation() + "*" + std::to_string(dex2[0]->GetLocationChecksum()) + "*";
+  std::string expected_encoding = "PCL[" +
+      dex1[0]->GetLocation() + "*" + std::to_string(dex1[0]->GetLocationChecksum()) + ":" +
+      dex2[0]->GetLocation() + "*" + std::to_string(dex2[0]->GetLocationChecksum()) + "]";
   ASSERT_EQ(expected_encoding, context->EncodeContextForOatFile(""));
 }
 
+TEST_F(ClassLoaderContextTest, DecodeOatFileKey) {
+  std::string oat_file_encoding = "PCL[a.dex*123:b.dex*456]";
+  std::vector<std::string> classpath;
+  std::vector<uint32_t> checksums;
+  bool is_special_shared_library;
+  bool result = ClassLoaderContext::DecodePathClassLoaderContextFromOatFileKey(
+      oat_file_encoding,
+      &classpath,
+      &checksums,
+      &is_special_shared_library);
+  ASSERT_TRUE(result);
+  ASSERT_FALSE(is_special_shared_library);
+  ASSERT_EQ(2u, classpath.size());
+  ASSERT_EQ(2u, checksums.size());
+  ASSERT_EQ("a.dex", classpath[0]);
+  ASSERT_EQ(123u, checksums[0]);
+  ASSERT_EQ("b.dex", classpath[1]);
+  ASSERT_EQ(456u, checksums[1]);
+}
+
+TEST_F(ClassLoaderContextTest, DecodeOatFileKeySpecialLibrary) {
+  std::string oat_file_encoding = "&";
+  std::vector<std::string> classpath;
+  std::vector<uint32_t> checksums;
+  bool is_special_shared_library;
+  bool result = ClassLoaderContext::DecodePathClassLoaderContextFromOatFileKey(
+      oat_file_encoding,
+      &classpath,
+      &checksums,
+      &is_special_shared_library);
+  ASSERT_TRUE(result);
+  ASSERT_TRUE(is_special_shared_library);
+  ASSERT_TRUE(classpath.empty());
+  ASSERT_TRUE(checksums.empty());
+}
+
 }  // namespace art
diff --git a/runtime/dex_to_dex_decompiler.cc b/runtime/dex_to_dex_decompiler.cc
index c15c9ec..908405b 100644
--- a/runtime/dex_to_dex_decompiler.cc
+++ b/runtime/dex_to_dex_decompiler.cc
@@ -18,9 +18,10 @@
 
 #include "base/logging.h"
 #include "base/mutex.h"
+#include "bytecode_utils.h"
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
-#include "bytecode_utils.h"
+#include "quicken_info.h"
 
 namespace art {
 namespace optimizer {
@@ -31,27 +32,21 @@
                 const ArrayRef<const uint8_t>& quickened_info,
                 bool decompile_return_instruction)
     : code_item_(code_item),
-      quickened_info_ptr_(quickened_info.data()),
-      quickened_info_start_(quickened_info.data()),
-      quickened_info_end_(quickened_info.data() + quickened_info.size()),
+      quicken_info_(quickened_info.data()),
+      quicken_info_number_of_indices_(QuickenInfoTable::NumberOfIndices(quickened_info.size())),
       decompile_return_instruction_(decompile_return_instruction) {}
 
   bool Decompile();
 
  private:
-  void DecompileInstanceFieldAccess(Instruction* inst,
-                                    uint32_t dex_pc,
-                                    Instruction::Code new_opcode) {
-    uint16_t index = GetIndexAt(dex_pc);
+  void DecompileInstanceFieldAccess(Instruction* inst, Instruction::Code new_opcode) {
+    uint16_t index = NextIndex();
     inst->SetOpcode(new_opcode);
     inst->SetVRegC_22c(index);
   }
 
-  void DecompileInvokeVirtual(Instruction* inst,
-                              uint32_t dex_pc,
-                              Instruction::Code new_opcode,
-                              bool is_range) {
-    uint16_t index = GetIndexAt(dex_pc);
+  void DecompileInvokeVirtual(Instruction* inst, Instruction::Code new_opcode, bool is_range) {
+    const uint16_t index = NextIndex();
     inst->SetOpcode(new_opcode);
     if (is_range) {
       inst->SetVRegB_3rc(index);
@@ -60,40 +55,32 @@
     }
   }
 
-  void DecompileNop(Instruction* inst, uint32_t dex_pc) {
-    if (quickened_info_ptr_ == quickened_info_end_) {
+  void DecompileNop(Instruction* inst) {
+    const uint16_t reference_index = NextIndex();
+    if (reference_index == DexFile::kDexNoIndex16) {
+      // This means it was a normal nop and not a check-cast.
       return;
     }
-    const uint8_t* temporary_pointer = quickened_info_ptr_;
-    uint32_t quickened_pc = DecodeUnsignedLeb128(&temporary_pointer);
-    if (quickened_pc != dex_pc) {
-      return;
-    }
-    uint16_t reference_index = GetIndexAt(dex_pc);
-    uint16_t type_index = GetIndexAt(dex_pc);
+    const uint16_t type_index = NextIndex();
     inst->SetOpcode(Instruction::CHECK_CAST);
     inst->SetVRegA_21c(reference_index);
     inst->SetVRegB_21c(type_index);
   }
 
-  uint16_t GetIndexAt(uint32_t dex_pc) {
-    // Note that as a side effect, DecodeUnsignedLeb128 update the given pointer
-    // to the new position in the buffer.
-    DCHECK_LT(quickened_info_ptr_, quickened_info_end_);
-    uint32_t quickened_pc = DecodeUnsignedLeb128(&quickened_info_ptr_);
-    DCHECK_LT(quickened_info_ptr_, quickened_info_end_);
-    uint16_t index = DecodeUnsignedLeb128(&quickened_info_ptr_);
-    DCHECK_LE(quickened_info_ptr_, quickened_info_end_);
-    DCHECK_EQ(quickened_pc, dex_pc);
-    return index;
+  uint16_t NextIndex() {
+    DCHECK_LT(quicken_index_, quicken_info_number_of_indices_);
+    const uint16_t ret = quicken_info_.GetData(quicken_index_);
+    quicken_index_++;
+    return ret;
   }
 
   const DexFile::CodeItem& code_item_;
-  const uint8_t* quickened_info_ptr_;
-  const uint8_t* const quickened_info_start_;
-  const uint8_t* const quickened_info_end_;
+  const QuickenInfoTable quicken_info_;
+  const size_t quicken_info_number_of_indices_;
   const bool decompile_return_instruction_;
 
+  size_t quicken_index_ = 0u;
+
   DISALLOW_COPY_AND_ASSIGN(DexDecompiler);
 };
 
@@ -103,7 +90,6 @@
   // unquickening is a rare need and not performance sensitive, it is not worth the
   // added storage to also add the RETURN_VOID quickening in the quickened data.
   for (CodeItemIterator it(code_item_); !it.Done(); it.Advance()) {
-    uint32_t dex_pc = it.CurrentDexPc();
     Instruction* inst = const_cast<Instruction*>(&it.CurrentInstruction());
 
     switch (inst->Opcode()) {
@@ -114,71 +100,76 @@
         break;
 
       case Instruction::NOP:
-        DecompileNop(inst, dex_pc);
+        if (quicken_info_number_of_indices_ > 0) {
+          // Only try to decompile NOP if there are more than 0 indices. Not having
+          // any index happens when we unquicken a code item that only has
+          // RETURN_VOID_NO_BARRIER as quickened instruction.
+          DecompileNop(inst);
+        }
         break;
 
       case Instruction::IGET_QUICK:
-        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET);
+        DecompileInstanceFieldAccess(inst, Instruction::IGET);
         break;
 
       case Instruction::IGET_WIDE_QUICK:
-        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_WIDE);
+        DecompileInstanceFieldAccess(inst, Instruction::IGET_WIDE);
         break;
 
       case Instruction::IGET_OBJECT_QUICK:
-        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_OBJECT);
+        DecompileInstanceFieldAccess(inst, Instruction::IGET_OBJECT);
         break;
 
       case Instruction::IGET_BOOLEAN_QUICK:
-        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_BOOLEAN);
+        DecompileInstanceFieldAccess(inst, Instruction::IGET_BOOLEAN);
         break;
 
       case Instruction::IGET_BYTE_QUICK:
-        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_BYTE);
+        DecompileInstanceFieldAccess(inst, Instruction::IGET_BYTE);
         break;
 
       case Instruction::IGET_CHAR_QUICK:
-        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_CHAR);
+        DecompileInstanceFieldAccess(inst, Instruction::IGET_CHAR);
         break;
 
       case Instruction::IGET_SHORT_QUICK:
-        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_SHORT);
+        DecompileInstanceFieldAccess(inst, Instruction::IGET_SHORT);
         break;
 
       case Instruction::IPUT_QUICK:
-        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT);
+        DecompileInstanceFieldAccess(inst, Instruction::IPUT);
         break;
 
       case Instruction::IPUT_BOOLEAN_QUICK:
-        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_BOOLEAN);
+        DecompileInstanceFieldAccess(inst, Instruction::IPUT_BOOLEAN);
         break;
 
       case Instruction::IPUT_BYTE_QUICK:
-        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_BYTE);
+        DecompileInstanceFieldAccess(inst, Instruction::IPUT_BYTE);
         break;
 
       case Instruction::IPUT_CHAR_QUICK:
-        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_CHAR);
+        DecompileInstanceFieldAccess(inst, Instruction::IPUT_CHAR);
         break;
 
       case Instruction::IPUT_SHORT_QUICK:
-        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_SHORT);
+        DecompileInstanceFieldAccess(inst, Instruction::IPUT_SHORT);
         break;
 
       case Instruction::IPUT_WIDE_QUICK:
-        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_WIDE);
+        DecompileInstanceFieldAccess(inst, Instruction::IPUT_WIDE);
         break;
 
       case Instruction::IPUT_OBJECT_QUICK:
-        DecompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_OBJECT);
+        DecompileInstanceFieldAccess(inst, Instruction::IPUT_OBJECT);
         break;
 
       case Instruction::INVOKE_VIRTUAL_QUICK:
-        DecompileInvokeVirtual(inst, dex_pc, Instruction::INVOKE_VIRTUAL, false);
+        DecompileInvokeVirtual(inst, Instruction::INVOKE_VIRTUAL, false);
         break;
 
       case Instruction::INVOKE_VIRTUAL_RANGE_QUICK:
-        DecompileInvokeVirtual(inst, dex_pc, Instruction::INVOKE_VIRTUAL_RANGE, true);
+        DecompileInvokeVirtual(inst, Instruction::INVOKE_VIRTUAL_RANGE, true);
         break;
 
       default:
@@ -186,14 +177,14 @@
     }
   }
 
-  if (quickened_info_ptr_ != quickened_info_end_) {
-    if (quickened_info_start_ == quickened_info_ptr_) {
+  if (quicken_index_ != quicken_info_number_of_indices_) {
+    if (quicken_index_ == 0) {
       LOG(WARNING) << "Failed to use any value in quickening info,"
                    << " potentially due to duplicate methods.";
     } else {
       LOG(FATAL) << "Failed to use all values in quickening info."
-                 << " Actual: " << std::hex << reinterpret_cast<uintptr_t>(quickened_info_ptr_)
-                 << " Expected: " << reinterpret_cast<uintptr_t>(quickened_info_end_);
+                 << " Actual: " << std::hex << quicken_index_
+                 << " Expected: " << quicken_info_number_of_indices_;
       return false;
     }
   }
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 458e830..8d3c62f 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -166,7 +166,7 @@
   }
   if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) {
     // Switch to read barrier mark entrypoints before we gray the objects. This is required in case
-    // a mutator sees a gray bit and dispatches on the entrpoint. (b/37876887).
+    // a mutator sees a gray bit and dispatches on the entrypoint. (b/37876887).
     ActivateReadBarrierEntrypoints();
     // Gray dirty immune objects concurrently to reduce GC pause times. We re-process gray cards in
     // the pause.
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 1bf9285..b229b6c 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -1008,6 +1008,20 @@
         }
       }
 
+      if (obj->IsClass()) {
+        mirror::Class* klass = obj->AsClass<kVerifyNone, kWithoutReadBarrier>();
+        // Fixup super class before visiting instance fields which require
+        // information from their super class to calculate offsets.
+        mirror::Class* super_class = klass->GetSuperClass();
+        if (super_class != nullptr) {
+          mirror::Class* new_super_class = down_cast<mirror::Class*>(ForwardObject(super_class));
+          if (new_super_class != super_class && IsInAppImage(new_super_class)) {
+            // Recursively fix all dependencies.
+            operator()(new_super_class);
+          }
+        }
+      }
+
       obj->VisitReferences</*visit native roots*/false, kVerifyNone, kWithoutReadBarrier>(
           *this,
           *this);
diff --git a/runtime/jit/profile_compilation_info.cc b/runtime/jit/profile_compilation_info.cc
index 147173c..a247b56 100644
--- a/runtime/jit/profile_compilation_info.cc
+++ b/runtime/jit/profile_compilation_info.cc
@@ -336,7 +336,9 @@
         methods_region_size +
         dex_data.bitmap_storage.size();
   }
-  if (required_capacity > kProfileSizeErrorThresholdInBytes) {
+  // Allow large profiles for non target builds for the case where we are merging many profiles
+  // to generate a boot image profile.
+  if (kIsTargetBuild && required_capacity > kProfileSizeErrorThresholdInBytes) {
     LOG(ERROR) << "Profile data size exceeds "
                << std::to_string(kProfileSizeErrorThresholdInBytes)
                << " bytes. Profile will not be written to disk.";
@@ -1030,8 +1032,9 @@
   if (status != kProfileLoadSuccess) {
     return status;
   }
-
-  if (uncompressed_data_size > kProfileSizeErrorThresholdInBytes) {
+  // Allow large profiles for non target builds for the case where we are merging many profiles
+  // to generate a boot image profile.
+  if (kIsTargetBuild && uncompressed_data_size > kProfileSizeErrorThresholdInBytes) {
     LOG(ERROR) << "Profile data size exceeds "
                << std::to_string(kProfileSizeErrorThresholdInBytes)
                << " bytes";
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 419a4db..003cd4e 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -673,11 +673,7 @@
 }
 
 inline void Class::SetClinitThreadId(pid_t new_clinit_thread_id) {
-  if (Runtime::Current()->IsActiveTransaction()) {
-    SetField32<true>(OFFSET_OF_OBJECT_MEMBER(Class, clinit_thread_id_), new_clinit_thread_id);
-  } else {
-    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, clinit_thread_id_), new_clinit_thread_id);
-  }
+  SetField32Transaction(OFFSET_OF_OBJECT_MEMBER(Class, clinit_thread_id_), new_clinit_thread_id);
 }
 
 inline String* Class::GetName() {
@@ -685,11 +681,7 @@
 }
 
 inline void Class::SetName(ObjPtr<String> name) {
-  if (Runtime::Current()->IsActiveTransaction()) {
-    SetFieldObject<true>(OFFSET_OF_OBJECT_MEMBER(Class, name_), name);
-  } else {
-    SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, name_), name);
-  }
+    SetFieldObjectTransaction(OFFSET_OF_OBJECT_MEMBER(Class, name_), name);
 }
 
 template<VerifyObjectFlags kVerifyFlags>
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index e4b5320..b0e5b6a 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -191,7 +191,7 @@
 }
 
 void Class::SetDexCache(ObjPtr<DexCache> new_dex_cache) {
-  SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, dex_cache_), new_dex_cache);
+  SetFieldObjectTransaction(OFFSET_OF_OBJECT_MEMBER(Class, dex_cache_), new_dex_cache);
 }
 
 void Class::SetClassSize(uint32_t new_class_size) {
@@ -200,8 +200,7 @@
     LOG(FATAL_WITHOUT_ABORT) << new_class_size << " vs " << GetClassSize();
     LOG(FATAL) << "class=" << PrettyTypeOf();
   }
-  // Not called within a transaction.
-  SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, class_size_), new_class_size);
+  SetField32Transaction(OFFSET_OF_OBJECT_MEMBER(Class, class_size_), new_class_size);
 }
 
 // Return the class' name. The exact format is bizarre, but it's the specified behavior for
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 00498bc..b60ddcf 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -408,7 +408,7 @@
     DCHECK_EQ(v32 & kPrimitiveTypeMask, v32) << "upper 16 bits aren't zero";
     // Store the component size shift in the upper 16 bits.
     v32 |= Primitive::ComponentSizeShift(new_type) << kPrimitiveTypeSizeShiftShift;
-    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_), v32);
+    SetField32Transaction(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_), v32);
   }
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -1169,8 +1169,7 @@
   }
 
   void SetDexClassDefIndex(uint16_t class_def_idx) REQUIRES_SHARED(Locks::mutator_lock_) {
-    // Not called within a transaction.
-    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, dex_class_def_idx_), class_def_idx);
+    SetField32Transaction(OFFSET_OF_OBJECT_MEMBER(Class, dex_class_def_idx_), class_def_idx);
   }
 
   dex::TypeIndex GetDexTypeIndex() REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -1179,8 +1178,7 @@
   }
 
   void SetDexTypeIndex(dex::TypeIndex type_idx) REQUIRES_SHARED(Locks::mutator_lock_) {
-    // Not called within a transaction.
-    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, dex_type_idx_), type_idx.index_);
+    SetField32Transaction(OFFSET_OF_OBJECT_MEMBER(Class, dex_type_idx_), type_idx.index_);
   }
 
   dex::TypeIndex FindTypeIndexInOtherDexFile(const DexFile& dex_file)
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 95f829d..43d70b7 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -560,6 +560,15 @@
   SetField32<kTransactionActive, kCheckTransaction, kVerifyFlags, true>(field_offset, new_value);
 }
 
+template<bool kCheckTransaction, VerifyObjectFlags kVerifyFlags, bool kIsVolatile>
+inline void Object::SetField32Transaction(MemberOffset field_offset, int32_t new_value) {
+  if (Runtime::Current()->IsActiveTransaction()) {
+    SetField32<true, kCheckTransaction, kVerifyFlags, kIsVolatile>(field_offset, new_value);
+  } else {
+    SetField32<false, kCheckTransaction, kVerifyFlags, kIsVolatile>(field_offset, new_value);
+  }
+}
+
 // TODO: Pass memory_order_ and strong/weak as arguments to avoid code duplication?
 
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
@@ -657,6 +666,15 @@
                                                                                new_value);
 }
 
+template<bool kCheckTransaction, VerifyObjectFlags kVerifyFlags, bool kIsVolatile>
+inline void Object::SetField64Transaction(MemberOffset field_offset, int32_t new_value) {
+  if (Runtime::Current()->IsActiveTransaction()) {
+    SetField64<true, kCheckTransaction, kVerifyFlags, kIsVolatile>(field_offset, new_value);
+  } else {
+    SetField64<false, kCheckTransaction, kVerifyFlags, kIsVolatile>(field_offset, new_value);
+  }
+}
+
 template<typename kSize>
 inline kSize Object::GetFieldAcquire(MemberOffset field_offset) {
   const uint8_t* raw_addr = reinterpret_cast<const uint8_t*>(this) + field_offset.Int32Value();
@@ -775,6 +793,15 @@
                                                                             new_value);
 }
 
+template<bool kCheckTransaction, VerifyObjectFlags kVerifyFlags, bool kIsVolatile>
+inline void Object::SetFieldObjectTransaction(MemberOffset field_offset, ObjPtr<Object> new_value) {
+  if (Runtime::Current()->IsActiveTransaction()) {
+    SetFieldObject<true, kCheckTransaction, kVerifyFlags, kIsVolatile>(field_offset, new_value);
+  } else {
+    SetFieldObject<false, kCheckTransaction, kVerifyFlags, kIsVolatile>(field_offset, new_value);
+  }
+}
+
 template <VerifyObjectFlags kVerifyFlags>
 inline HeapReference<Object>* Object::GetFieldObjectReferenceAddr(MemberOffset field_offset) {
   if (kVerifyFlags & kVerifyThis) {
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 9cf4252..886780f 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -312,6 +312,11 @@
                                             ObjPtr<Object> new_value)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  template<bool kCheckTransaction = true, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           bool kIsVolatile = false>
+  ALWAYS_INLINE void SetFieldObjectTransaction(MemberOffset field_offset, ObjPtr<Object> new_value)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   template<bool kTransactionActive,
            bool kCheckTransaction = true,
            VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -470,6 +475,12 @@
   ALWAYS_INLINE void SetField32Volatile(MemberOffset field_offset, int32_t new_value)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  template<bool kCheckTransaction = true,
+           VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           bool kIsVolatile = false>
+  ALWAYS_INLINE void SetField32Transaction(MemberOffset field_offset, int32_t new_value)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ALWAYS_INLINE bool CasFieldWeakSequentiallyConsistent32(MemberOffset field_offset,
@@ -525,6 +536,12 @@
   ALWAYS_INLINE void SetField64Volatile(MemberOffset field_offset, int64_t new_value)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  template<bool kCheckTransaction = true,
+           VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           bool kIsVolatile = false>
+  ALWAYS_INLINE void SetField64Transaction(MemberOffset field_offset, int32_t new_value)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool CasFieldWeakSequentiallyConsistent64(MemberOffset field_offset, int64_t old_value,
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index 7560639..84587c8 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -251,6 +251,7 @@
                                           Handle<ByteArray> array, int32_t offset,
                                           int32_t high_byte, gc::AllocatorType allocator_type) {
   const uint8_t* const src = reinterpret_cast<uint8_t*>(array->GetData()) + offset;
+  high_byte &= 0xff;  // Extract the relevant bits before determining `compressible`.
   const bool compressible =
       kUseStringCompression && String::AllASCII<uint8_t>(src, byte_length) && (high_byte == 0);
   const int32_t length_with_flag = String::GetFlaggedCount(byte_length, compressible);
diff --git a/runtime/oat.h b/runtime/oat.h
index 521cc40..5e61907 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,8 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '1', '2', '7', '\0' };  // .bss ArtMethod* section.
+  // Last oat version changed reason: update classpath key format.
+  static constexpr uint8_t kOatVersion[] = { '1', '2', '8', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 2ed30df..1c1189d 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -1574,28 +1574,6 @@
   return GetOatHeader().GetCompilerFilter();
 }
 
-static constexpr char kDexClassPathEncodingSeparator = '*';
-
-std::string OatFile::EncodeDexFileDependencies(const std::vector<const DexFile*>& dex_files,
-                                               const std::string& base_dir) {
-  std::ostringstream out;
-
-  for (const DexFile* dex_file : dex_files) {
-    const std::string& location = dex_file->GetLocation();
-    // Find paths that were relative and convert them back from absolute.
-    if (!base_dir.empty() && location.substr(0, base_dir.length()) == base_dir) {
-      out << location.substr(base_dir.length() + 1).c_str();
-    } else {
-      out << dex_file->GetLocation().c_str();
-    }
-    out << kDexClassPathEncodingSeparator;
-    out << dex_file->GetLocationChecksum();
-    out << kDexClassPathEncodingSeparator;
-  }
-
-  return out.str();
-}
-
 OatFile::OatClass OatFile::FindOatClass(const DexFile& dex_file,
                                         uint16_t class_def_idx,
                                         bool* found) {
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 6393e09..b112b84 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -296,11 +296,6 @@
   static std::string ResolveRelativeEncodedDexLocation(
       const char* abs_dex_location, const std::string& rel_dex_location);
 
-  // Create a dependency list (dex locations and checksums) for the given dex files.
-  // Removes dex file paths prefixed with base_dir to convert them back to relative paths.
-  static std::string EncodeDexFileDependencies(const std::vector<const DexFile*>& dex_files,
-                                               const std::string& base_dir);
-
   // Finds the associated oat class for a dex_file and descriptor. Returns an invalid OatClass on
   // error and sets found to false.
   static OatClass FindOatClass(const DexFile& dex_file, uint16_t class_def_idx, bool* found);
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index 630945a..b166961 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -28,6 +28,7 @@
 #include "base/stl_util.h"
 #include "base/systrace.h"
 #include "class_linker.h"
+#include "class_loader_context.h"
 #include "dex_file-inl.h"
 #include "dex_file_tracking_registrar.h"
 #include "gc/scoped_gc_critical_section.h"
@@ -421,38 +422,47 @@
   }
 }
 
-static bool AreSharedLibrariesOk(const std::string& shared_libraries,
-                                 std::vector<const DexFile*>& dex_files) {
-  // If no shared libraries, we expect no dex files.
-  if (shared_libraries.empty()) {
-    return dex_files.empty();
-  }
-  // If we find the special shared library, skip the shared libraries check.
-  if (shared_libraries.compare(OatFile::kSpecialSharedLibrary) == 0) {
-    return true;
-  }
-  // Shared libraries is a series of dex file paths and their checksums, each separated by '*'.
-  std::vector<std::string> shared_libraries_split;
-  Split(shared_libraries, '*', &shared_libraries_split);
-
-  // Sanity check size of dex files and split shared libraries. Should be 2x as many entries in
-  // the split shared libraries since it contains pairs of filename/checksum.
-  if (dex_files.size() * 2 != shared_libraries_split.size()) {
+static bool AreSharedLibrariesOk(const std::string& context_spec,
+                                 std::vector<const DexFile*>& dex_files,
+                                 std::string* error_msg) {
+  std::vector<std::string> classpath;
+  std::vector<uint32_t> checksums;
+  bool is_special_shared_library;
+  if (!ClassLoaderContext::DecodePathClassLoaderContextFromOatFileKey(
+          context_spec, &classpath, &checksums, &is_special_shared_library)) {
+    *error_msg = "Could not decode the class loader context from the oat file key.";
     return false;
   }
 
+  DCHECK_EQ(classpath.size(), checksums.size());
+
+  // The classpath size should match the number of dex files.
+  if (classpath.size() != dex_files.size()) {
+    *error_msg = "The number of loaded dex files does not match the number of files "
+        "specified in the context. Expected=" + std::to_string(classpath.size()) +
+        ", found=" + std::to_string(dex_files.size());
+    return false;
+  }
+
+  // If we find the special shared library, skip the shared libraries check.
+  if (is_special_shared_library) {
+    return true;
+  }
+
   // Check that the loaded dex files have the same order and checksums as the shared libraries.
   for (size_t i = 0; i < dex_files.size(); ++i) {
+    const std::string& dex_location = dex_files[i]->GetLocation();
+    uint32_t dex_location_checksum = dex_files[i]->GetLocationChecksum();
     std::string absolute_library_path =
-        OatFile::ResolveRelativeEncodedDexLocation(dex_files[i]->GetLocation().c_str(),
-                                                   shared_libraries_split[i * 2]);
-    if (dex_files[i]->GetLocation() != absolute_library_path) {
+        OatFile::ResolveRelativeEncodedDexLocation(dex_location.c_str(), classpath[i]);
+    if (dex_location != absolute_library_path) {
+      *error_msg = "SharedLibraryCheck: expected=" + absolute_library_path + ", found=" +
+          dex_location;
       return false;
     }
-    char* end;
-    size_t shared_lib_checksum = strtoul(shared_libraries_split[i * 2 + 1].c_str(), &end, 10);
-    uint32_t dex_checksum = dex_files[i]->GetLocationChecksum();
-    if (*end != '\0' || dex_checksum != shared_lib_checksum) {
+    if (dex_location_checksum  != checksums[i]) {
+      *error_msg = "SharedLibraryCheck: checksum mismatch for " + dex_location + ". Expected=" +
+          std::to_string(checksums[i]) + ", found=" + std::to_string(dex_location_checksum);
       return false;
     }
   }
@@ -586,7 +596,7 @@
   // Exit if shared libraries are ok. Do a full duplicate classes check otherwise.
   const std::string
       shared_libraries(oat_file->GetOatHeader().GetStoreValueByKey(OatHeader::kClassPathKey));
-  if (AreSharedLibrariesOk(shared_libraries, dex_files_loaded)) {
+  if (AreSharedLibrariesOk(shared_libraries, dex_files_loaded, error_msg)) {
     return false;
   }
 
diff --git a/runtime/quicken_info.h b/runtime/quicken_info.h
new file mode 100644
index 0000000..5b72468
--- /dev/null
+++ b/runtime/quicken_info.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_QUICKEN_INFO_H_
+#define ART_RUNTIME_QUICKEN_INFO_H_
+
+#include "dex_instruction.h"
+
+namespace art {
+
+// QuickenInfoTable is a table of 16 bit dex indices. There is one slot fo every instruction that is
+// possibly dequickenable.
+class QuickenInfoTable {
+ public:
+  explicit QuickenInfoTable(const uint8_t* data) : data_(data) {}
+
+  bool IsNull() const {
+    return data_ == nullptr;
+  }
+
+  uint16_t GetData(size_t index) const {
+    return data_[index * 2] | (static_cast<uint16_t>(data_[index * 2 + 1]) << 8);
+  }
+
+  // Returns true if the dex instruction has an index in the table. (maybe dequickenable).
+  static bool NeedsIndexForInstruction(const Instruction* inst) {
+    return inst->IsQuickened() || inst->Opcode() == Instruction::NOP;
+  }
+
+  static size_t NumberOfIndices(size_t bytes) {
+    return bytes / sizeof(uint16_t);
+  }
+
+ private:
+  const uint8_t* const data_;
+
+  DISALLOW_COPY_AND_ASSIGN(QuickenInfoTable);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_QUICKEN_INFO_H_
diff --git a/runtime/safe_map.h b/runtime/safe_map.h
index b54f587..f298691 100644
--- a/runtime/safe_map.h
+++ b/runtime/safe_map.h
@@ -79,6 +79,9 @@
   iterator lower_bound(const K& k) { return map_.lower_bound(k); }
   const_iterator lower_bound(const K& k) const { return map_.lower_bound(k); }
 
+  iterator upper_bound(const K& k) { return map_.upper_bound(k); }
+  const_iterator upper_bound(const K& k) const { return map_.upper_bound(k); }
+
   size_type count(const K& k) const { return map_.count(k); }
 
   // Note that unlike std::map's operator[], this doesn't return a reference to the value.
diff --git a/runtime/signal_catcher.cc b/runtime/signal_catcher.cc
index 8c934d5..f0b6ee4 100644
--- a/runtime/signal_catcher.cc
+++ b/runtime/signal_catcher.cc
@@ -168,7 +168,7 @@
   }
 
 #if defined(ART_TARGET_ANDROID)
-  if (!tombstoned_notify_completion(tombstone_fd)) {
+  if (use_tombstoned_stack_trace_fd_ && !tombstoned_notify_completion(tombstone_fd)) {
     LOG(WARNING) << "Unable to notify tombstoned of dump completion.";
   }
 #endif
diff --git a/runtime/thread.cc b/runtime/thread.cc
index fc30832..b01d50a 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2861,6 +2861,7 @@
   DO_THREAD_OFFSET(SelfOffset<ptr_size>(), "self")
   DO_THREAD_OFFSET(StackEndOffset<ptr_size>(), "stack_end")
   DO_THREAD_OFFSET(ThinLockIdOffset<ptr_size>(), "thin_lock_thread_id")
+  DO_THREAD_OFFSET(IsGcMarkingOffset<ptr_size>(), "is_gc_marking")
   DO_THREAD_OFFSET(TopOfManagedStackOffset<ptr_size>(), "top_quick_frame_method")
   DO_THREAD_OFFSET(TopShadowFrameOffset<ptr_size>(), "top_shadow_frame")
   DO_THREAD_OFFSET(TopHandleScopeOffset<ptr_size>(), "top_handle_scope")
diff --git a/runtime/thread.h b/runtime/thread.h
index e785ddc..24d126f 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -656,6 +656,17 @@
         OFFSETOF_MEMBER(tls_ptr_sized_values, jni_entrypoints) + jni_entrypoint_offset);
   }
 
+  // Return the entry point offset integer value for ReadBarrierMarkRegX, where X is `reg`.
+  template <PointerSize pointer_size>
+  static int32_t ReadBarrierMarkEntryPointsOffset(size_t reg) {
+    // The entry point list defines 30 ReadBarrierMarkRegX entry points.
+    DCHECK_LT(reg, 30u);
+    // The ReadBarrierMarkRegX entry points are ordered by increasing
+    // register number in Thread::tls_Ptr_.quick_entrypoints.
+    return QUICK_ENTRYPOINT_OFFSET(pointer_size, pReadBarrierMarkReg00).Int32Value()
+        + static_cast<size_t>(pointer_size) * reg;
+  }
+
   template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> SelfOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, self));
diff --git a/runtime/vdex_file.h b/runtime/vdex_file.h
index 93d282b..4845c02 100644
--- a/runtime/vdex_file.h
+++ b/runtime/vdex_file.h
@@ -65,8 +65,8 @@
 
    private:
     static constexpr uint8_t kVdexMagic[] = { 'v', 'd', 'e', 'x' };
-    // Last update: Disable in-place vdex update
-    static constexpr uint8_t kVdexVersion[] = { '0', '0', '6', '\0' };
+    // Last update: Smaller quickening info
+    static constexpr uint8_t kVdexVersion[] = { '0', '0', '7', '\0' };
 
     uint8_t magic_[4];
     uint8_t version_[4];
diff --git a/test/021-string2/src/Main.java b/test/021-string2/src/Main.java
index 194f4a1..3b81d8e 100644
--- a/test/021-string2/src/Main.java
+++ b/test/021-string2/src/Main.java
@@ -700,6 +700,11 @@
             $noinline$constNonAsciiString35Equals("\u0440123456789012345678901234567890123x"));
         Assert.assertFalse(
             $noinline$constNonAsciiString35Equals("01234567890123456789012345678901234"));
+
+        // Regression test for incorrectly creating an uncompressed string when the
+        // string should be compressed. Only the low 8 bits are relevant but the whole
+        // `hibyte` was erroneously tested. Bug: 63661357
+        Assert.assertTrue("A".equals(new String(new byte[] { (byte)'A' }, /* hibyte */ 0x100)));
     }
 
     public static boolean $noinline$equalsConstString0(String s) {
diff --git a/test/079-phantom/src/Bitmap.java b/test/079-phantom/src/Bitmap.java
index ff43749..0d6e2d8 100644
--- a/test/079-phantom/src/Bitmap.java
+++ b/test/079-phantom/src/Bitmap.java
@@ -17,6 +17,7 @@
 import java.lang.ref.ReferenceQueue;
 import java.lang.ref.PhantomReference;
 import java.util.ArrayList;
+import java.util.concurrent.CountDownLatch;
 
 public class Bitmap {
     String mName;           /* for debugging */
@@ -76,11 +77,14 @@
         PhantomWrapper phan = new PhantomWrapper(wrapper, sPhantomQueue,
                 nativeData);
         sPhantomList.add(phan);
+        wrapper.mPhantomWrapper = phan;
         return wrapper;
     }
 
-    static void freeNativeStorage(int nativeDataPtr) {
+    static void freeNativeStorage(int nativeDataPtr, CountDownLatch freeSignal) {
         System.out.println("freeNativeStorage: " + nativeDataPtr);
+        // Wake up the main thread that is [or will be] blocked until this native data is freed.
+        freeSignal.countDown();
     }
 
     /*
@@ -93,6 +97,9 @@
         }
         public int mNativeData;
 
+        // The PhantomWrapper corresponding to this NativeWrapper.
+        public PhantomWrapper mPhantomWrapper;
+
         /*
         @Override
         protected void finalize() throws Throwable {
@@ -118,6 +125,8 @@
     }
 
     public int mNativeData;
+    // This will be signaled once mNativeData has been freed.
+    public CountDownLatch mFreeSignal = new CountDownLatch(1);
 }
 
 /*
@@ -137,8 +146,7 @@
                 PhantomWrapper ref = (PhantomWrapper) mQueue.remove();
                 //System.out.println("dequeued ref " + ref.mNativeData +
                 //    " - " + ref);
-                Bitmap.freeNativeStorage(ref.mNativeData);
-                //ref.clear();
+                Bitmap.freeNativeStorage(ref.mNativeData, ref.mFreeSignal);
             } catch (InterruptedException ie) {
                 System.out.println("intr");
                 break;
diff --git a/test/079-phantom/src/Main.java b/test/079-phantom/src/Main.java
index daead2e..ae2c688 100644
--- a/test/079-phantom/src/Main.java
+++ b/test/079-phantom/src/Main.java
@@ -14,8 +14,11 @@
  * limitations under the License.
  */
 
+import java.util.concurrent.CountDownLatch;
+
 public class Main {
     Bitmap mBitmap1, mBitmap2, mBitmap3, mBitmap4;
+    CountDownLatch mFreeSignalA, mFreeSignalB;
 
     public static void sleep(int ms) {
         try {
@@ -31,7 +34,6 @@
         Main main = new Main();
         main.run();
 
-        sleep(1000);
         System.out.println("done");
     }
 
@@ -46,22 +48,30 @@
         System.out.println("nulling 1");
         mBitmap1 = null;
         Runtime.getRuntime().gc();
-        sleep(500);
+        try {
+          mFreeSignalA.await();  // Block until dataA is definitely freed.
+        } catch (InterruptedException e) {
+          System.out.println("got unexpected InterruptedException e: " + e);
+        }
 
         System.out.println("nulling 2");
         mBitmap2 = null;
         Runtime.getRuntime().gc();
-        sleep(500);
+        sleep(200);
 
         System.out.println("nulling 3");
         mBitmap3 = null;
         Runtime.getRuntime().gc();
-        sleep(500);
+        sleep(200);
 
         System.out.println("nulling 4");
         mBitmap4 = null;
         Runtime.getRuntime().gc();
-        sleep(500);
+        try {
+          mFreeSignalB.await();  // Block until dataB is definitely freed.
+        } catch (InterruptedException e) {
+          System.out.println("got unexpected InterruptedException e: " + e);
+        }
 
         Bitmap.shutDown();
     }
@@ -77,7 +87,10 @@
      */
     public void createBitmaps() {
         Bitmap.NativeWrapper dataA = Bitmap.allocNativeStorage(10, 10);
+        mFreeSignalA = dataA.mPhantomWrapper.mFreeSignal;
         Bitmap.NativeWrapper dataB = Bitmap.allocNativeStorage(20, 20);
+        mFreeSignalB = dataB.mPhantomWrapper.mFreeSignal;
+
         mBitmap1 = new Bitmap("one", 10, 10, dataA);
         mBitmap2 = new Bitmap("two", 20, 20, dataB);
         mBitmap3 = mBitmap4 = new Bitmap("three/four", 20, 20, dataB);
diff --git a/test/990-method-handle-and-mr/build b/test/990-method-handle-and-mr/build
new file mode 100755
index 0000000..12a8e18
--- /dev/null
+++ b/test/990-method-handle-and-mr/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Exit on failure.
+set -e
+
+./default-build "$@" --experimental method-handles
diff --git a/test/990-method-handle-and-mr/expected.txt b/test/990-method-handle-and-mr/expected.txt
new file mode 100644
index 0000000..8483fb5
--- /dev/null
+++ b/test/990-method-handle-and-mr/expected.txt
@@ -0,0 +1,4 @@
+Test
+Test
+Test
+passed
diff --git a/test/990-method-handle-and-mr/info.txt b/test/990-method-handle-and-mr/info.txt
new file mode 100644
index 0000000..85a957c
--- /dev/null
+++ b/test/990-method-handle-and-mr/info.txt
@@ -0,0 +1,2 @@
+Test stressing code generated for invoke-polymorphic instructions with
+respect to Marking Register (on architectures supporting MR).
diff --git a/test/990-method-handle-and-mr/src/Main.java b/test/990-method-handle-and-mr/src/Main.java
new file mode 100644
index 0000000..739b8eb
--- /dev/null
+++ b/test/990-method-handle-and-mr/src/Main.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This test was inspired by benchmarks.MicroMethodHandles.java.MicroMethodHandles.
+
+import java.io.PrintStream;
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.MethodType;
+
+class A {
+  public Long binaryFunction(int x, double y) {
+    return 1000l;
+  }
+}
+
+class Test {
+  Test() throws Throwable {
+    this.handle = MethodHandles.lookup().findVirtual(A.class, "binaryFunction",
+                                                     MethodType.methodType(Long.class, int.class,
+                                                                           double.class));
+    this.a = new A();
+    this.x = new Integer(72);
+    this.y = new Double(-1.39e-31);
+  }
+
+  void execute() {
+    try {
+      executeFor(2000);
+      System.out.println(getName());
+    } catch (Throwable t) {
+      System.err.println("Exception during the execution of " + getName());
+      System.err.println(t);
+      t.printStackTrace(new PrintStream(System.err));
+      System.exit(1);
+    }
+  }
+
+  void executeFor(long timeMinimumMillis) throws Throwable {
+    long startTime = System.currentTimeMillis();
+    long elapsed = 0;
+    while (elapsed < timeMinimumMillis) {
+      exercise();
+      elapsed = System.currentTimeMillis() - startTime;
+    }
+  }
+
+  void exercise() throws Throwable {
+    for (int i = 0; i < EXERCISE_ITERATIONS; ++i) {
+      run();
+    }
+  }
+
+  void run() throws Throwable {
+    long result = (long) handle.invoke(a, x, y);
+  }
+
+  String getName() {
+    return getClass().getSimpleName();
+  }
+
+  private static final int EXERCISE_ITERATIONS = 500;
+
+  private MethodHandle handle;
+  private A a;
+  private Integer x;
+  private Double y;
+}
+
+public class Main {
+  public static void main(String[] args) throws Throwable {
+    Test[] tests = new Test[] { new Test(), new Test(), new Test() };
+    for (Test test : tests) {
+      test.execute();
+    }
+    System.out.println("passed");
+  }
+}
diff --git a/test/knownfailures.json b/test/knownfailures.json
index 87d857e..3edb0a8 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -100,6 +100,11 @@
         "bug": "http://b/35800768"
     },
     {
+        "tests": "163-app-image-methods",
+        "variant": "gcstress",
+        "description": ["This test sometimes runs out of memory initializing the boot classpath."]
+    },
+    {
         "tests": ["908-gc-start-finish",
                   "913-heaps"],
         "variant": "gcstress",
@@ -640,6 +645,36 @@
         "env_vars": {"SANITIZE_TARGET": "address"}
     },
     {
+        "tests": [
+            "059-finalizer-throw",
+            "074-gc-thrash",
+            "911-get-stack-trace",
+            "913-heaps",
+            "980-redefine-object"
+        ],
+        "description": [
+            "Interpreter with access checks stack frames are too large and result in",
+            "StackOverFlow errors being thrown."
+        ],
+        "variant": "interp-ac & host",
+        "env_vars": {"SANITIZE_HOST": "address"}
+    },
+    {
+        "tests": [
+            "059-finalizer-throw",
+            "074-gc-thrash",
+            "911-get-stack-trace",
+            "913-heaps",
+            "980-redefine-object"
+        ],
+        "description": [
+            "Interpreter with access checks stack frames are too large and result in",
+            "StackOverFlow errors being thrown."
+        ],
+        "variant": "interp-ac & target",
+        "env_vars": {"SANITIZE_TARGET": "address"}
+    },
+    {
         "tests": "071-dexfile-map-clean",
         "description": [ "We use prebuilt zipalign on master-art-host to avoid pulling in a lot",
                          "of the framework. But a non-sanitized zipalign binary does not work with",
diff --git a/tools/generate-boot-image-profile.sh b/tools/generate-boot-image-profile.sh
new file mode 100755
index 0000000..d87123a
--- /dev/null
+++ b/tools/generate-boot-image-profile.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# This script creates a boot image profile based on input profiles.
+#
+
+if [[ "$#" -lt 2 ]]; then
+  echo "Usage $0 <output> <profman args> <profiles>+"
+  echo "Also outputs <output>.txt and <output>.preloaded-classes"
+  echo 'Example: generate-boot-image-profile.sh boot.prof --profman-arg --boot-image-sampled-method-threshold=1 profiles/0/*/primary.prof'
+  exit 1
+fi
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+TOP="$DIR/../.."
+source "${TOP}/build/envsetup.sh" >&/dev/null # import get_build_var
+
+OUT_PROFILE=$1
+shift
+
+# Read the profman args.
+profman_args=()
+while [[ "$#" -ge 2 ]] && [[ "$1" = '--profman-arg' ]]; do
+  profman_args+=("$2")
+  shift 2
+done
+
+# Remaining args are all the profiles.
+for file in "$@"; do
+  if [[ -s $file ]]; then
+    profman_args+=("--profile-file=$file")
+  fi
+done
+
+jar_args=()
+boot_jars=$("$ANDROID_BUILD_TOP"/art/tools/bootjars.sh --target)
+jar_dir=$ANDROID_BUILD_TOP/$(get_build_var TARGET_OUT_JAVA_LIBRARIES)
+for file in $boot_jars; do
+  filename="$jar_dir/$file.jar"
+  jar_args+=("--apk=$filename")
+  jar_args+=("--dex-location=$filename")
+done
+profman_args+=("${jar_args[@]}")
+
+# Generate the profile.
+"$ANDROID_HOST_OUT/bin/profman" --generate-boot-image-profile "--reference-profile-file=$OUT_PROFILE" "${profman_args[@]}"
+
+# Convert it to text.
+echo Dumping profile to $OUT_PROFILE.txt
+"$ANDROID_HOST_OUT/bin/profman" --dump-classes-and-methods "--profile-file=$OUT_PROFILE" "${jar_args[@]}" > "$OUT_PROFILE.txt"
+
+# Generate preloaded classes
+# Filter only classes by using grep -v
+# Remove first and last characters L and ;
+# Replace / with . to make dot format
+grep -v "\\->" "$OUT_PROFILE.txt" | sed 's/.\(.*\)./\1/g' | tr "/" "." > "$OUT_PROFILE.preloaded-classes"
+
+# You may need to filter some classes out since creating threads is not allowed in the zygote.
+# i.e. using: grep -v -E '(android.net.ConnectivityThread\$Singleton)'
diff --git a/tools/runtime_memusage/sanitizer_logcat_analysis.sh b/tools/runtime_memusage/sanitizer_logcat_analysis.sh
index 75cb9a9..66b48fa 100755
--- a/tools/runtime_memusage/sanitizer_logcat_analysis.sh
+++ b/tools/runtime_memusage/sanitizer_logcat_analysis.sh
@@ -18,13 +18,22 @@
 #
 # This script takes in a logcat containing Sanitizer traces and outputs several
 # files, prints information regarding the traces, and plots information as well.
+ALL_PIDS=false
 USE_TEMP=true
 DO_REDO=false
+PACKAGE_NAME=""
 # EXACT_ARG and MIN_ARG are passed to prune_sanitizer_output.py
 EXACT_ARG=""
 MIN_ARG=""
+OFFSET_ARGS=""
+TIME_ARGS=""
 usage() {
   echo "Usage: $0 [options] [LOGCAT_FILE] [CATEGORIES...]"
+  echo "    -a"
+  echo "        Forces all pids associated with registered dex"
+  echo "        files in the logcat to be processed."
+  echo "        default: only the last pid is processed"
+  echo
   echo "    -d  OUT_DIRECTORY"
   echo "        Puts all output in specified directory."
   echo "        If not given, output will be put in a local"
@@ -37,7 +46,7 @@
   echo "        the -m argument or by prune_sanitizer_output.py"
   echo
   echo "    -f"
-  echo "        forces redo of all commands even if output"
+  echo "        Forces redo of all commands even if output"
   echo "        files exist. Steps are skipped if their output"
   echo "        exist already and this is not enabled."
   echo
@@ -46,6 +55,22 @@
   echo "        at least MINIMUM_CALLS_PER_TRACE lines."
   echo "        default: specified by prune_sanitizer_output.py"
   echo
+  echo "    -o  [OFFSET],[OFFSET]"
+  echo "        Filters out all Dex File offsets outside the"
+  echo "        range between provided offsets. 'inf' can be"
+  echo "        provided for infinity."
+  echo "        default: 0,inf"
+  echo
+  echo "    -p  [PACKAGE_NAME]"
+  echo "        Using the package name, uses baksmali to get"
+  echo "        a dump of the Dex File format for the package."
+  echo
+  echo "    -t  [TIME_OFFSET],[TIME_OFFSET]"
+  echo "        Filters out all time offsets outside the"
+  echo "        range between provided offsets. 'inf' can be"
+  echo "        provided for infinity."
+  echo "        default: 0,inf"
+  echo
   echo "    CATEGORIES are words that are expected to show in"
   echo "       a large subset of symbolized traces. Splits"
   echo "       output based on each word."
@@ -55,33 +80,61 @@
 }
 
 
-while [[ $# -gt 1 ]]; do
-case $1 in
-  -d)
-  shift
-  USE_TEMP=false
-  OUT_DIR=$1
-  shift
-  break
-  ;;
-  -e)
-  shift
-  EXACT_ARG='-e'
-  ;;
-  -f)
-  shift
-  DO_REDO=true
-  ;;
-  -m)
-  shift
-  MIN_ARG='-m '"$1"''
-  shift
-  ;;
-  *)
-  usage
-  exit
+while getopts ":ad:efm:o:p:t:" opt ; do
+case ${opt} in
+  a)
+    ALL_PIDS=true
+    ;;
+  d)
+    USE_TEMP=false
+    OUT_DIR=$OPTARG
+    ;;
+  e)
+    EXACT_ARG='-e'
+    ;;
+  f)
+    DO_REDO=true
+    ;;
+  m)
+    if ! [ "$OPTARG" -eq "$OPTARG" ]; then
+      usage
+      exit
+    fi
+    MIN_ARG='-m '"$OPTARG"
+    ;;
+  o)
+    set -f
+    OLD_IFS=$IFS
+    IFS=","
+    OFFSET_ARGS=( $OPTARG )
+    if [ "${#OFFSET_ARGS[@]}" -ne 2 ]; then
+      usage
+      exit
+    fi
+    OFFSET_ARGS=( "--offsets" "${OFFSET_ARGS[@]}" )
+    IFS=$OLD_IFS
+    ;;
+  t)
+    set -f
+    OLD_IFS=$IFS
+    IFS=","
+    TIME_ARGS=( $OPTARG )
+    if [ "${#TIME_ARGS[@]}" -ne 2 ]; then
+      usage
+      exit
+    fi
+    TIME_ARGS=( "--times" "${TIME_ARGS[@]}" )
+    IFS=$OLD_IFS
+    ;;
+  p)
+    PACKAGE_NAME=$OPTARG
+    ;;
+  \?)
+    usage
+    exit
 esac
 done
+shift $((OPTIND -1))
 
 if [ $# -lt 1 ]; then
   usage
@@ -103,78 +156,147 @@
 fi
 
 # Note: Steps are skipped if their output exists until -f flag is enabled
-# Step 1 - Only output lines related to Sanitizer
-# Folder that holds all file output
 echo "Output folder: $OUT_DIR"
-ASAN_OUT=$OUT_DIR/asan_output
-if [ ! -f $ASAN_OUT ] || [ $DO_REDO = true ]; then
-  DO_REDO=true
-  echo "Extracting ASAN output"
-  grep "app_process64" $LOGCAT_FILE > $ASAN_OUT
-else
-  echo "Skipped: Extracting ASAN output"
+unique_pids=( $(grep "RegisterDexFile" "$LOGCAT_FILE" | grep -v "zygote64" | tr -s ' ' | cut -f3 -d' ' | awk '!a[$0]++') )
+echo "List of pids: ${unique_pids[@]}"
+if [ $ALL_PIDS = false ]; then
+  unique_pids=( ${unique_pids[-1]} )
 fi
 
-# Step 2 - Only output lines containing Dex File Start Addresses
-DEX_START=$OUT_DIR/dex_start
-if [ ! -f $DEX_START ] || [ $DO_REDO = true ]; then
-  DO_REDO=true
-  echo "Extracting Start of Dex File(s)"
-  grep "RegisterDexFile" $LOGCAT_FILE > $DEX_START
-else
-  echo "Skipped: Extracting Start of Dex File(s)"
-fi
+for pid in "${unique_pids[@]}"
+do
+  echo
+  echo "Current pid: $pid"
+  echo
+  PID_DIR=$OUT_DIR/$pid
+  if [ ! -d "$PID_DIR" ]; then
+    mkdir $PID_DIR
+    DO_REDO[$pid]=true
+  fi
 
-# Step 3 - Clean Sanitizer output from Step 2 since logcat cannot
-# handle large amounts of output.
-ASAN_OUT_FILTERED=$OUT_DIR/asan_output_filtered
-if [ ! -f $ASAN_OUT_FILTERED ] || [ $DO_REDO = true ]; then
-  DO_REDO=true
-  echo "Filtering/Cleaning ASAN output"
-  python $ANDROID_BUILD_TOP/art/tools/runtime_memusage/prune_sanitizer_output.py \
-  $EXACT_ARG $MIN_ARG -d $OUT_DIR $ASAN_OUT
-else
-  echo "Skipped: Filtering/Cleaning ASAN output"
-fi
+  INTERMEDIATES_DIR=$PID_DIR/intermediates
+  RESULTS_DIR=$PID_DIR/results
+  LOGCAT_PID_FILE=$PID_DIR/logcat
 
-# Step 4 - Retrieve symbolized stack traces from Step 3 output
-SYM_FILTERED=$OUT_DIR/sym_filtered
-if [ ! -f $SYM_FILTERED ] || [ $DO_REDO = true ]; then
-  DO_REDO=true
-  echo "Retrieving symbolized traces"
-  $ANDROID_BUILD_TOP/development/scripts/stack $ASAN_OUT_FILTERED > $SYM_FILTERED
-else
-  echo "Skipped: Retrieving symbolized traces"
-fi
+  if [ ! -f "$PID_DIR/logcat" ] || [ "${DO_REDO[$pid]}" = true ] || [ $DO_REDO = true ]; then
+    DO_REDO[$pid]=true
+    awk '{if($3 == '$pid') print $0}' $LOGCAT_FILE > $LOGCAT_PID_FILE
+  fi
 
-# Step 5 - Using Steps 2, 3, 4 outputs in order to output graph data
-# and trace data
-# Only the category names are needed for the commands giving final output
-shift
-TIME_OUTPUT=($OUT_DIR/time_output_*.dat)
-if [ ! -e ${TIME_OUTPUT[0]} ] || [ $DO_REDO = true ]; then
-  DO_REDO=true
-  echo "Creating Categorized Time Table"
-  python $ANDROID_BUILD_TOP/art/tools/runtime_memusage/symbol_trace_info.py \
-    -d $OUT_DIR $ASAN_OUT_FILTERED $SYM_FILTERED $DEX_START $@
-else
-  echo "Skipped: Creating Categorized Time Table"
-fi
+  if [ ! -d "$INTERMEDIATES_DIR" ]; then
+    mkdir $INTERMEDIATES_DIR
+    DO_REDO[$pid]=true
+  fi
 
-# Step 6 - Use graph data from Step 5 to plot graph
-# Contains the category names used for legend of gnuplot
-PLOT_CATS=`echo \"Uncategorized $@\"`
-echo "Plotting Categorized Time Table"
-# Plots the information from logcat
-gnuplot --persist -e \
-  'filename(n) = sprintf("'"$OUT_DIR"'/time_output_%d.dat", n);
-   catnames = '"$PLOT_CATS"';
-   set title "Dex File Offset vs. Time accessed since App Start";
-   set xlabel "Time (milliseconds)";
-   set ylabel "Dex File Offset (bytes)";
-   plot for [i=0:'"$NUM_CAT"'] filename(i) using 1:2 title word(catnames, i + 1);'
+  # Step 1 - Only output lines related to Sanitizer
+  # Folder that holds all file output
+  ASAN_OUT=$INTERMEDIATES_DIR/asan_output
+  if [ ! -f $ASAN_OUT ] || [ "${DO_REDO[$pid]}" = true ] || [ $DO_REDO = true ]; then
+    DO_REDO[$pid]=true
+    echo "Extracting ASAN output"
+    grep "app_process64" $LOGCAT_PID_FILE > $ASAN_OUT
+  else
+    echo "Skipped: Extracting ASAN output"
+  fi
 
-if [ $USE_TEMP = true ]; then
-  echo "Removing temp directory and files"
-  rm -rf $OUT_DIR
-fi
+  # Step 2 - Only output lines containing Dex File Start Addresses
+  DEX_START=$INTERMEDIATES_DIR/dex_start
+  if [ ! -f $DEX_START ] || [ "${DO_REDO[$pid]}" = true ] || [ $DO_REDO = true ]; then
+    DO_REDO[$pid]=true
+    echo "Extracting Start of Dex File(s)"
+    grep "RegisterDexFile" $LOGCAT_PID_FILE > $DEX_START
+  else
+    echo "Skipped: Extracting Start of Dex File(s)"
+  fi
+
+  # Step 3 - Clean Sanitizer output from Step 2 since logcat cannot
+  # handle large amounts of output.
+  ASAN_OUT_FILTERED=$INTERMEDIATES_DIR/asan_output_filtered
+  if [ ! -f $ASAN_OUT_FILTERED ] || [ "${DO_REDO[$pid]}" = true ] || [ $DO_REDO = true ]; then
+    DO_REDO[$pid]=true
+    echo "Filtering/Cleaning ASAN output"
+    python $ANDROID_BUILD_TOP/art/tools/runtime_memusage/prune_sanitizer_output.py \
+    $EXACT_ARG $MIN_ARG -d $INTERMEDIATES_DIR $ASAN_OUT
+  else
+    echo "Skipped: Filtering/Cleaning ASAN output"
+  fi
+
+  # Step 4 - Retrieve symbolized stack traces from Step 3 output
+  SYM_FILTERED=$INTERMEDIATES_DIR/sym_filtered
+  if [ ! -f $SYM_FILTERED ] || [ "${DO_REDO[$pid]}" = true ] || [ $DO_REDO = true ]; then
+    DO_REDO[$pid]=true
+    echo "Retrieving symbolized traces"
+    $ANDROID_BUILD_TOP/development/scripts/stack $ASAN_OUT_FILTERED > $SYM_FILTERED
+  else
+    echo "Skipped: Retrieving symbolized traces"
+  fi
+
+  # Step 4.5 - Obtain Dex File Format of dex file related to package
+  BAKSMALI_DMP_OUT="$INTERMEDIATES_DIR""/baksmali_dex_file"
+  BAKSMALI_DMP_ARG="--dex-file="$BAKSMALI_DMP_OUT
+  if [ ! -f $BAKSMALI_DMP_OUT ] || [ "${DO_REDO[$pid]}" = true ] || [ $DO_REDO = true ]; then
+    if [ $PACKAGE_NAME != "" ]; then
+      # Extracting Dex File path on device from Dex File related to package
+      apk_directory=$(dirname $(grep $PACKAGE_NAME $DEX_START | tail -n1 | awk '{print $8}'))
+      apk_dex_files=$(adb shell find $apk_directory -name "*.?dex" -type f 2> /dev/null)
+      for apk_file in $apk_dex_files; do
+        base_name=$(basename $apk_file)
+        adb pull $apk_file $INTERMEDIATES_DIR/base."${base_name#*.}"
+      done
+      oatdump --oat-file=$INTERMEDIATES_DIR/base.odex --export-dex-to=$INTERMEDIATES_DIR --output=/dev/null
+      export_dex=( $INTERMEDIATES_DIR/*apk_export* )
+      baksmali -JXmx1024M dump $export_dex > $BAKSMALI_DMP_OUT 2> /dev/null
+      if ! [ -s $BAKSMALI_DMP_OUT ]; then
+        rm $BAKSMALI_DMP_OUT
+        BAKSMALI_DMP_ARG=""
+        echo "Failed to retrieve Dex File format"
+      fi
+    else
+      BAKSMALI_DMP_ARG=""
+      echo "Failed to retrieve Dex File format"
+    fi
+  else
+    echo "Skipped: Retrieving Dex File format from baksmali"
+  fi
+
+  if [ ! -d "$RESULTS_DIR" ]; then
+    mkdir $RESULTS_DIR
+    DO_REDO[$pid]=true
+  fi
+
+  # Step 5 - Using Steps 2, 3, 4 outputs in order to output graph data
+  # and trace data
+  # Only the category names are needed for the commands giving final output
+  shift
+  TIME_OUTPUT=($RESULTS_DIR/time_output_*.dat)
+  if [ ! -e ${TIME_OUTPUT[0]} ] || [ "${DO_REDO[$pid]}" = true ] || [ $DO_REDO = true ]; then
+    DO_REDO[$pid]=true
+    echo "Creating Categorized Time Table"
+    python $ANDROID_BUILD_TOP/art/tools/runtime_memusage/symbol_trace_info.py \
+      -d $RESULTS_DIR ${OFFSET_ARGS[@]} ${TIME_ARGS[@]} $BAKSMALI_DMP_ARG $ASAN_OUT_FILTERED $SYM_FILTERED $DEX_START $@
+  else
+    echo "Skipped: Creating Categorized Time Table"
+  fi
+
+  # Step 6 - Use graph data from Step 5 to plot graph
+  # Contains the category names used for legend of gnuplot
+  PLOT_CATS=`echo \"Uncategorized $@\"`
+  PACKAGE_STRING=""
+  if [ $PACKAGE_NAME != "" ]; then
+    PACKAGE_STRING="Package name: "$PACKAGE_NAME" "
+  fi
+  echo "Plotting Categorized Time Table"
+  # Plots the information from logcat
+  gnuplot --persist -e \
+    'filename(n) = sprintf("'"$RESULTS_DIR"'/time_output_%d.dat", n);
+     catnames = '"$PLOT_CATS"';
+     set title "'"$PACKAGE_STRING"'PID: '"$pid"'";
+     set xlabel "Time (milliseconds)";
+     set ylabel "Dex File Offset (bytes)";
+     plot for [i=0:'"$NUM_CAT"'] filename(i) using 1:2 title word(catnames, i + 1);'
+
+  if [ $USE_TEMP = true ]; then
+    echo "Removing temp directory and files"
+    rm -rf $OUT_DIR
+  fi
+done
diff --git a/tools/runtime_memusage/symbol_trace_info.py b/tools/runtime_memusage/symbol_trace_info.py
index e539be2..a5ced38 100755
--- a/tools/runtime_memusage/symbol_trace_info.py
+++ b/tools/runtime_memusage/symbol_trace_info.py
@@ -25,7 +25,7 @@
 import argparse
 import bisect
 import os
-import sys
+import re
 
 
 def find_match(list_substrings, big_string):
@@ -36,8 +36,13 @@
     return list_substrings.index("Uncategorized")
 
 
-def absolute_to_relative(plot_list, dex_start_list, cat_list):
+def absolute_to_relative(data_lists, symbol_traces):
     """Address changed to Dex File offset and shifting time to 0 min in ms."""
+    plot_list = data_lists["plot_list"]
+    dex_start_list = data_lists["dex_start_list"]
+    cat_list = data_lists["cat_list"]
+    offsets = data_lists["offsets"]
+    time_offsets = data_lists["time_offsets"]
     time_format_str = "%H:%M:%S.%f"
     first_access_time = datetime.strptime(plot_list[0][0],
                                           time_format_str)
@@ -52,9 +57,22 @@
         dex_file_start = dex_start_list[bisect.bisect(dex_start_list,
                                                       address_access) - 1
                                         ]
-        elem.insert(1, address_access - dex_file_start)
-        # Category that a data point belongs to
-        elem.insert(2, cat_list[ind])
+        dex_offset = address_access - dex_file_start
+        # Meant to nullify data that does not meet offset criteria if specified
+        # Assumes that offsets is already sorted
+        if (dex_offset >= offsets[0] and dex_offset < offsets[1] and
+            elem[0] >= time_offsets[0] and elem[0] < time_offsets[1]):
+
+            elem.insert(1, dex_offset)
+            # Category that a data point belongs to
+            elem.insert(2, cat_list[ind])
+        else:
+            elem[0] = None
+            elem[1] = None
+            elem.append(None)
+            elem.append(None)
+            symbol_traces[ind] = None
+            cat_list[ind] = None
 
 
 def print_category_info(cat_split, outname, out_dir_name, title):
@@ -67,7 +85,7 @@
           str(len(trace_counts_list_ordered)))
     print("\tSum of trace counts: " +
           str(sum([trace[1] for trace in trace_counts_list_ordered])))
-    print("\n\tCount: How many traces appeared with count\n\t")
+    print("\n\tCount: How many traces appeared with count\n\t", end="")
     print(Counter([trace[1] for trace in trace_counts_list_ordered]))
     with open(os.path.join(out_dir_name, outname), "w") as output_file:
         for trace in trace_counts_list_ordered:
@@ -79,6 +97,8 @@
 
 def print_categories(categories, symbol_file_split, out_dir_name):
     """Prints details of all categories."""
+    symbol_file_split = [trace for trace in symbol_file_split
+                          if trace is not None]
     # Info of traces containing a call to current category
     for cat_num, cat_name in enumerate(categories[1:]):
         print("\nCategory #%d" % (cat_num + 1))
@@ -123,6 +143,26 @@
     parser.add_argument("-d", action="store",
                         default="", dest="out_dir_name", type=is_directory,
                         help="Output Directory")
+    parser.add_argument("--dex-file", action="store",
+                        default=None, dest="dex_file",
+                        type=argparse.FileType("r"),
+                        help="Baksmali Dex File Dump")
+    parser.add_argument("--offsets", action="store", nargs=2,
+                        default=[float(0), float("inf")],
+                        dest="offsets",
+                        metavar="OFFSET",
+                        type=float,
+                        help="Filters out accesses not between provided"
+                             " offsets if provided. Can provide 'inf'"
+                             " for infinity")
+    parser.add_argument("--times", action="store", nargs=2,
+                        default=[float(0), float("inf")],
+                        dest="times",
+                        metavar="TIME",
+                        type=float,
+                        help="Filters out accesses not between provided"
+                             " time offsets if provided. Can provide 'inf'"
+                             " for infinity")
     parser.add_argument("sanitizer_trace", action="store",
                         type=argparse.FileType("r"),
                         help="File containing sanitizer traces filtered by "
@@ -141,6 +181,14 @@
     return parser.parse_args(argv)
 
 
+def get_dex_offset_data(line, dex_file_item):
+    """ Returns a tuple of dex file offset, item name, and data of a line."""
+    return (int(line[:line.find(":")], 16),
+                (dex_file_item,
+                 line.split("|")[1].strip())
+            )
+
+
 def read_data(parsed_argv):
     """Reads data from filepath arguments and parses them into lists."""
     # Using a dictionary to establish relation between lists added
@@ -149,6 +197,12 @@
     # Makes sure each trace maps to some category
     categories.insert(0, "Uncategorized")
 
+    data_lists["offsets"] = parsed_argv.offsets
+    data_lists["offsets"].sort()
+
+    data_lists["times"] = parsed_argv.times
+    data_lists["times"].sort()
+
     logcat_file_data = parsed_argv.sanitizer_trace.readlines()
     parsed_argv.sanitizer_trace.close()
 
@@ -159,6 +213,25 @@
     dex_start_file_data = parsed_argv.dex_starts.readlines()
     parsed_argv.dex_starts.close()
 
+    if parsed_argv.dex_file != None:
+        dex_file_data = parsed_argv.dex_file.read()
+        parsed_argv.dex_file.close()
+        # Splits baksmali dump by each item
+        item_split = [s.splitlines() for s in re.split(r"\|\[[0-9]+\] ",
+                                                          dex_file_data)]
+        # Splits each item by line and creates a list of offsets and a
+        # corresponding list of the data associated with that line
+        offset_list, offset_data = zip(*[get_dex_offset_data(line, item[0])
+                                      for item in item_split
+                                         for line in item[1:]
+                                            if re.search("[0-9a-f]{6}:", line)
+                                                is not None
+                                            and line.find("|") != -1])
+        data_lists["offset_list"] = offset_list
+        data_lists["offset_data"] = offset_data
+    else:
+        dex_file_data = None
+
     # Each element is a tuple of time and address accessed
     data_lists["plot_list"] = [[elem[1] for elem in enumerate(line.split())
                                 if elem[0] in (1, 11)
@@ -184,23 +257,26 @@
     return data_lists, categories, symbol_file_split
 
 
-def main(argv=None):
+def main():
     """Takes in trace information and outputs details about them."""
-    if argv is None:
-        argv = sys.argv
-    parsed_argv = parse_args(argv[1:])
 
+    parsed_argv = parse_args(None)
     data_lists, categories, symbol_file_split = read_data(parsed_argv)
+
     # Formats plot_list such that each element is a data point
-    absolute_to_relative(data_lists["plot_list"], data_lists["dex_start_list"],
-                         data_lists["cat_list"])
+    #absolute_to_relative(data_lists["plot_list"], data_lists["dex_start_list"],
+    #                        data_lists["cat_list"], data_lists["offsets"],
+    #                            data_lists["times"], symbol_file_split)
+    absolute_to_relative(data_lists, symbol_file_split)
     for file_ext, cat_name in enumerate(categories):
         out_file_name = os.path.join(parsed_argv.out_dir_name, "time_output_" +
                                      str(file_ext) +
                                      ".dat")
         with open(out_file_name, "w") as output_file:
             output_file.write("# Category: " + cat_name + "\n")
-            output_file.write("# Time, Dex File Offset, Address \n")
+            output_file.write("# Time, Dex File Offset_10, Dex File Offset_16,"
+                              " Address, Item Accessed, Item Member Accessed"
+                              " Unaligned\n")
             for time, dex_offset, category, address in data_lists["plot_list"]:
                 if category == cat_name:
                     output_file.write(
@@ -208,9 +284,23 @@
                         " " +
                         str(dex_offset) +
                         " #" +
-                        str(address) +
-                        "\n")
-
+                        hex(dex_offset) +
+                        " " +
+                        str(address))
+                    if data_lists.has_key("offset_list"):
+                        dex_offset_index = bisect.bisect(
+                                              data_lists["offset_list"],
+                                                            dex_offset) - 1
+                        aligned_dex_offset = (data_lists["offset_list"]
+                                                        [dex_offset_index])
+                        dex_offset_data = (data_lists["offset_data"]
+                                                     [dex_offset_index])
+                        output_file.write(
+                            " " +
+                            "|".join(dex_offset_data) +
+                            " " +
+                            str(aligned_dex_offset != dex_offset))
+                    output_file.write("\n")
     print_categories(categories, symbol_file_split, parsed_argv.out_dir_name)