Merge "Revert "Refactor image loading.""
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index d811e07..8c5eafd 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -111,7 +111,7 @@
  public:
   EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label)
       : assembler_(assembler), rd_(rd), label_(label) {
-    ExactAssemblyScope aas(assembler, kMaxInstructionSizeInBytes);
+    DCHECK(!assembler->AllowMacroInstructions());  // In ExactAssemblyScope.
     adr_location_ = assembler->GetCursorOffset();
     assembler->adr(EncodingSize(Wide), rd, label);
   }
@@ -715,294 +715,6 @@
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARMVIXL);
 };
 
-// Abstract base class for read barrier slow paths marking a reference
-// `ref`.
-//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked or an empty
-// location; in the latter case, the read barrier marking runtime
-// entry point will be loaded by the slow path code itself.
-class ReadBarrierMarkSlowPathBaseARMVIXL : public SlowPathCodeARMVIXL {
- protected:
-  ReadBarrierMarkSlowPathBaseARMVIXL(HInstruction* instruction, Location ref, Location entrypoint)
-      : SlowPathCodeARMVIXL(instruction), ref_(ref), entrypoint_(entrypoint) {
-    DCHECK(kEmitCompilerReadBarrier);
-  }
-
-  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARMVIXL"; }
-
-  // Generate assembly code calling the read barrier marking runtime
-  // entry point (ReadBarrierMarkRegX).
-  void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
-    vixl32::Register ref_reg = RegisterFrom(ref_);
-
-    // No need to save live registers; it's taken care of by the
-    // entrypoint. Also, there is no need to update the stack mask,
-    // as this runtime call will not trigger a garbage collection.
-    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
-    DCHECK(!ref_reg.Is(sp));
-    DCHECK(!ref_reg.Is(lr));
-    DCHECK(!ref_reg.Is(pc));
-    // IP is used internally by the ReadBarrierMarkRegX entry point
-    // as a temporary, it cannot be the entry point's input/output.
-    DCHECK(!ref_reg.Is(ip));
-    DCHECK(ref_reg.IsRegister()) << ref_reg;
-    // "Compact" slow path, saving two moves.
-    //
-    // Instead of using the standard runtime calling convention (input
-    // and output in R0):
-    //
-    //   R0 <- ref
-    //   R0 <- ReadBarrierMark(R0)
-    //   ref <- R0
-    //
-    // we just use rX (the register containing `ref`) as input and output
-    // of a dedicated entrypoint:
-    //
-    //   rX <- ReadBarrierMarkRegX(rX)
-    //
-    if (entrypoint_.IsValid()) {
-      arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
-      __ Blx(RegisterFrom(entrypoint_));
-    } else {
-      // Entrypoint is not already loaded, load from the thread.
-      int32_t entry_point_offset =
-          Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode());
-      // This runtime call does not require a stack map.
-      arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
-    }
-  }
-
-  // The location (register) of the marked object reference.
-  const Location ref_;
-
-  // The location of the entrypoint if already loaded.
-  const Location entrypoint_;
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARMVIXL);
-};
-
-// Slow path loading `obj`'s lock word, loading a reference from
-// object `*(obj + offset + (index << scale_factor))` into `ref`, and
-// marking `ref` if `obj` is gray according to the lock word (Baker
-// read barrier). If needed, this slow path also atomically updates
-// the field `obj.field` in the object `obj` holding this reference
-// after marking.
-//
-// This means that after the execution of this slow path, both `ref`
-// and `obj.field` will be up-to-date; i.e., after the flip, both will
-// hold the same to-space reference (unless another thread installed
-// another object reference (different from `ref`) in `obj.field`).
-//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked or an empty
-// location; in the latter case, the read barrier marking runtime
-// entry point will be loaded by the slow path code itself.
-class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
-    : public ReadBarrierMarkSlowPathBaseARMVIXL {
- public:
-  LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(
-      HInstruction* instruction,
-      Location ref,
-      vixl32::Register obj,
-      uint32_t offset,
-      Location index,
-      ScaleFactor scale_factor,
-      bool needs_null_check,
-      vixl32::Register temp1,
-      vixl32::Register temp2,
-      Location entrypoint = Location::NoLocation())
-      : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint),
-        obj_(obj),
-        offset_(offset),
-        index_(index),
-        scale_factor_(scale_factor),
-        needs_null_check_(needs_null_check),
-        temp1_(temp1),
-        temp2_(temp2) {
-    DCHECK(kEmitCompilerReadBarrier);
-    DCHECK(kUseBakerReadBarrier);
-  }
-
-  const char* GetDescription() const OVERRIDE {
-    return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL";
-  }
-
-  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    LocationSummary* locations = instruction_->GetLocations();
-    vixl32::Register ref_reg = RegisterFrom(ref_);
-    DCHECK(locations->CanCall());
-    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg;
-    DCHECK_NE(ref_.reg(), LocationFrom(temp1_).reg());
-
-    // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
-    DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
-        << "Unexpected instruction in read barrier marking and field updating slow path: "
-        << instruction_->DebugName();
-    DCHECK(instruction_->GetLocations()->Intrinsified());
-    DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
-    DCHECK_EQ(offset_, 0u);
-    DCHECK_EQ(scale_factor_, ScaleFactor::TIMES_1);
-    Location field_offset = index_;
-    DCHECK(field_offset.IsRegisterPair()) << field_offset;
-
-    // Temporary register `temp1_`, used to store the lock word, must
-    // not be IP, as we may use it to emit the reference load (in the
-    // call to GenerateRawReferenceLoad below), and we need the lock
-    // word to still be in `temp1_` after the reference load.
-    DCHECK(!temp1_.Is(ip));
-
-    __ Bind(GetEntryLabel());
-
-    // The implementation is:
-    //
-    //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
-    //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
-    //   HeapReference<mirror::Object> ref = *src;  // Original reference load.
-    //   bool is_gray = (rb_state == ReadBarrier::GrayState());
-    //   if (is_gray) {
-    //     old_ref = ref;
-    //     ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
-    //     compareAndSwapObject(obj, field_offset, old_ref, ref);
-    //   }
-
-    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
-
-    // /* int32_t */ monitor = obj->monitor_
-    uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
-    arm_codegen->GetAssembler()->LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset);
-    if (needs_null_check_) {
-      codegen->MaybeRecordImplicitNullCheck(instruction_);
-    }
-    // /* LockWord */ lock_word = LockWord(monitor)
-    static_assert(sizeof(LockWord) == sizeof(int32_t),
-                  "art::LockWord and int32_t have different sizes.");
-
-    // Introduce a dependency on the lock_word including the rb_state,
-    // which shall prevent load-load reordering without using
-    // a memory barrier (which would be more expensive).
-    // `obj` is unchanged by this operation, but its value now depends
-    // on `temp`.
-    __ Add(obj_, obj_, Operand(temp1_, ShiftType::LSR, 32));
-
-    // The actual reference load.
-    // A possible implicit null check has already been handled above.
-    arm_codegen->GenerateRawReferenceLoad(
-        instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false);
-
-    // Mark the object `ref` when `obj` is gray.
-    //
-    //   if (rb_state == ReadBarrier::GrayState())
-    //     ref = ReadBarrier::Mark(ref);
-    //
-    // Given the numeric representation, it's enough to check the low bit of the
-    // rb_state. We do that by shifting the bit out of the lock word with LSRS
-    // which can be a 16-bit instruction unlike the TST immediate.
-    static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
-    static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
-    __ Lsrs(temp1_, temp1_, LockWord::kReadBarrierStateShift + 1);
-    __ B(cc, GetExitLabel());  // Carry flag is the last bit shifted out by LSRS.
-
-    // Save the old value of the reference before marking it.
-    // Note that we cannot use IP to save the old reference, as IP is
-    // used internally by the ReadBarrierMarkRegX entry point, and we
-    // need the old reference after the call to that entry point.
-    DCHECK(!temp1_.Is(ip));
-    __ Mov(temp1_, ref_reg);
-
-    GenerateReadBarrierMarkRuntimeCall(codegen);
-
-    // If the new reference is different from the old reference,
-    // update the field in the holder (`*(obj_ + field_offset)`).
-    //
-    // Note that this field could also hold a different object, if
-    // another thread had concurrently changed it. In that case, the
-    // LDREX/CMP/BNE sequence of instructions in the compare-and-set
-    // (CAS) operation below would abort the CAS, leaving the field
-    // as-is.
-    __ Cmp(temp1_, ref_reg);
-    __ B(eq, GetExitLabel());
-
-    // Update the the holder's field atomically.  This may fail if
-    // mutator updates before us, but it's OK.  This is achieved
-    // using a strong compare-and-set (CAS) operation with relaxed
-    // memory synchronization ordering, where the expected value is
-    // the old reference and the desired value is the new reference.
-
-    UseScratchRegisterScope temps(arm_codegen->GetVIXLAssembler());
-    // Convenience aliases.
-    vixl32::Register base = obj_;
-    // The UnsafeCASObject intrinsic uses a register pair as field
-    // offset ("long offset"), of which only the low part contains
-    // data.
-    vixl32::Register offset = LowRegisterFrom(field_offset);
-    vixl32::Register expected = temp1_;
-    vixl32::Register value = ref_reg;
-    vixl32::Register tmp_ptr = temps.Acquire();       // Pointer to actual memory.
-    vixl32::Register tmp = temp2_;                    // Value in memory.
-
-    __ Add(tmp_ptr, base, offset);
-
-    if (kPoisonHeapReferences) {
-      arm_codegen->GetAssembler()->PoisonHeapReference(expected);
-      if (value.Is(expected)) {
-        // Do not poison `value`, as it is the same register as
-        // `expected`, which has just been poisoned.
-      } else {
-        arm_codegen->GetAssembler()->PoisonHeapReference(value);
-      }
-    }
-
-    // do {
-    //   tmp = [r_ptr] - expected;
-    // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
-
-    vixl32::Label loop_head, comparison_failed, exit_loop;
-    __ Bind(&loop_head);
-    __ Ldrex(tmp, MemOperand(tmp_ptr));
-    __ Cmp(tmp, expected);
-    __ B(ne, &comparison_failed, /* far_target */ false);
-    __ Strex(tmp, value, MemOperand(tmp_ptr));
-    __ CompareAndBranchIfZero(tmp, &exit_loop, /* far_target */ false);
-    __ B(&loop_head);
-    __ Bind(&comparison_failed);
-    __ Clrex();
-    __ Bind(&exit_loop);
-
-    if (kPoisonHeapReferences) {
-      arm_codegen->GetAssembler()->UnpoisonHeapReference(expected);
-      if (value.Is(expected)) {
-        // Do not unpoison `value`, as it is the same register as
-        // `expected`, which has just been unpoisoned.
-      } else {
-        arm_codegen->GetAssembler()->UnpoisonHeapReference(value);
-      }
-    }
-
-    __ B(GetExitLabel());
-  }
-
- private:
-  // The register containing the object holding the marked object reference field.
-  const vixl32::Register obj_;
-  // The offset, index and scale factor to access the reference in `obj_`.
-  uint32_t offset_;
-  Location index_;
-  ScaleFactor scale_factor_;
-  // Is a null check required?
-  bool needs_null_check_;
-  // A temporary register used to hold the lock word of `obj_`; and
-  // also to hold the original reference value, when the reference is
-  // marked.
-  const vixl32::Register temp1_;
-  // A temporary register used in the implementation of the CAS, to
-  // update the object's reference field.
-  const vixl32::Register temp2_;
-
-  DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL);
-};
-
 // Slow path generating a read barrier for a heap reference.
 class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL {
  public:
@@ -2295,6 +2007,14 @@
           }
           break;
         }
+        case BakerReadBarrierKind::kUnsafeCas: {
+          DCHECK_GE(literal_offset, 4u);
+          uint32_t prev_insn = GetInsn32(literal_offset - 4u);
+          // ADD (register), encoding T3, with correct root_reg.
+          const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+          CHECK_EQ(prev_insn & 0xfff0fff0u, 0xeb000000u | (root_reg << 8));
+          break;
+        }
         default:
           LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
           UNREACHABLE();
@@ -8626,7 +8346,11 @@
       bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
       uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow);
 
-      vixl::EmissionCheckScope guard(GetVIXLAssembler(), 4 * vixl32::kMaxInstructionSizeInBytes);
+      size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u) + /* LDR */ (narrow ? 1u : 0u);
+      size_t wide_instructions = /* ADR+CMP+LDR+BNE */ 4u - narrow_instructions;
+      size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
+                          narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
+      ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
       vixl32::Label return_address;
       EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
       __ cmp(mr, Operand(0));
@@ -8636,7 +8360,7 @@
       ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
       __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
       EmitBakerReadBarrierBne(custom_data);
-      __ Bind(&return_address);
+      __ bind(&return_address);
       DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
                 narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
                        : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
@@ -8658,6 +8382,32 @@
   MaybeGenerateMarkingRegisterCheck(/* code */ 19);
 }
 
+void CodeGeneratorARMVIXL::GenerateUnsafeCasOldValueAddWithBakerReadBarrier(
+    vixl::aarch32::Register old_value,
+    vixl::aarch32::Register adjusted_old_value,
+    vixl::aarch32::Register expected) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with an ADD instead of LDR.
+  uint32_t custom_data = EncodeBakerReadBarrierUnsafeCasData(old_value.GetCode());
+
+  size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u);
+  size_t wide_instructions = /* ADR+CMP+ADD+BNE */ 4u - narrow_instructions;
+  size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
+                      narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
+  ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
+  vixl32::Label return_address;
+  EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+  __ cmp(mr, Operand(0));
+  ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+  __ add(EncodingSize(Wide), old_value, adjusted_old_value, Operand(expected));  // Preserves flags.
+  EmitBakerReadBarrierBne(custom_data);
+  __ bind(&return_address);
+  DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+            BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ADD_OFFSET);
+}
+
 void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
                                                                  Location ref,
                                                                  vixl32::Register obj,
@@ -8698,9 +8448,14 @@
       EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode(), narrow);
 
   {
-    vixl::EmissionCheckScope guard(
-        GetVIXLAssembler(),
-        (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+    size_t narrow_instructions =
+        /* CMP */ (mr.IsLow() ? 1u : 0u) +
+        /* LDR+unpoison? */ (narrow ? (kPoisonHeapReferences ? 2u : 1u) : 0u);
+    size_t wide_instructions =
+        /* ADR+CMP+LDR+BNE+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions;
+    size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
+                        narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
+    ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
     vixl32::Label return_address;
     EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
     __ cmp(mr, Operand(0));
@@ -8719,7 +8474,7 @@
         __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
       }
     }
-    __ Bind(&return_address);
+    __ bind(&return_address);
     DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
               narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
                      : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
@@ -8791,9 +8546,12 @@
 
   __ Add(data_reg, obj, Operand(data_offset));
   {
-    vixl::EmissionCheckScope guard(
-        GetVIXLAssembler(),
-        (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+    size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u);
+    size_t wide_instructions =
+        /* ADR+CMP+BNE+LDR+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions;
+    size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
+                        narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
+    ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
     vixl32::Label return_address;
     EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
     __ cmp(mr, Operand(0));
@@ -8805,127 +8563,13 @@
     if (kPoisonHeapReferences) {
       __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
     }
-    __ Bind(&return_address);
+    __ bind(&return_address);
     DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
               BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
   }
   MaybeGenerateMarkingRegisterCheck(/* code */ 21, /* temp_loc */ LocationFrom(ip));
 }
 
-void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
-                                                                    Location ref,
-                                                                    vixl32::Register obj,
-                                                                    Location field_offset,
-                                                                    Location temp,
-                                                                    bool needs_null_check,
-                                                                    vixl32::Register temp2) {
-  DCHECK(kEmitCompilerReadBarrier);
-  DCHECK(kUseBakerReadBarrier);
-
-  // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
-  // Marking Register) to decide whether we need to enter the slow
-  // path to update the reference field within `obj`. Then, in the
-  // slow path, check the gray bit in the lock word of the reference's
-  // holder (`obj`) to decide whether to mark `ref` and update the
-  // field or not.
-  //
-  //   if (mr) {  // Thread::Current()->GetIsGcMarking()
-  //     // Slow path.
-  //     uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
-  //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
-  //     HeapReference<mirror::Object> ref = *(obj + field_offset);  // Reference load.
-  //     bool is_gray = (rb_state == ReadBarrier::GrayState());
-  //     if (is_gray) {
-  //       old_ref = ref;
-  //       entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
-  //       ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
-  //       compareAndSwapObject(obj, field_offset, old_ref, ref);
-  //     }
-  //   }
-
-  vixl32::Register temp_reg = RegisterFrom(temp);
-
-  // Slow path updating the object reference at address `obj + field_offset`
-  // when the GC is marking. The entrypoint will be loaded by the slow path code.
-  SlowPathCodeARMVIXL* slow_path =
-      new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(
-          instruction,
-          ref,
-          obj,
-          /* offset */ 0u,
-          /* index */ field_offset,
-          /* scale_factor */ ScaleFactor::TIMES_1,
-          needs_null_check,
-          temp_reg,
-          temp2);
-  AddSlowPath(slow_path);
-
-  __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel());
-  // Fast path: the GC is not marking: nothing to do (the field is
-  // up-to-date, and we don't need to load the reference).
-  __ Bind(slow_path->GetExitLabel());
-  MaybeGenerateMarkingRegisterCheck(/* code */ 23);
-}
-
-void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction,
-                                                    Location ref,
-                                                    vixl32::Register obj,
-                                                    uint32_t offset,
-                                                    Location index,
-                                                    ScaleFactor scale_factor,
-                                                    bool needs_null_check) {
-  DataType::Type type = DataType::Type::kReference;
-  vixl32::Register ref_reg = RegisterFrom(ref, type);
-
-  // If needed, vixl::EmissionCheckScope guards are used to ensure
-  // that no pools are emitted between the load (macro) instruction
-  // and MaybeRecordImplicitNullCheck.
-
-  if (index.IsValid()) {
-    // Load types involving an "index": ArrayGet,
-    // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
-    // intrinsics.
-    // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor))
-    if (index.IsConstant()) {
-      size_t computed_offset =
-          (Int32ConstantFrom(index) << scale_factor) + offset;
-      vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
-      GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset);
-      if (needs_null_check) {
-        MaybeRecordImplicitNullCheck(instruction);
-      }
-    } else {
-      // Handle the special case of the
-      // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
-      // intrinsics, which use a register pair as index ("long
-      // offset"), of which only the low part contains data.
-      vixl32::Register index_reg = index.IsRegisterPair()
-          ? LowRegisterFrom(index)
-          : RegisterFrom(index);
-      UseScratchRegisterScope temps(GetVIXLAssembler());
-      vixl32::Register temp = temps.Acquire();
-      __ Add(temp, obj, Operand(index_reg, ShiftType::LSL, scale_factor));
-      {
-        vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
-        GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, temp, offset);
-        if (needs_null_check) {
-          MaybeRecordImplicitNullCheck(instruction);
-        }
-      }
-    }
-  } else {
-    // /* HeapReference<mirror::Object> */ ref = *(obj + offset)
-    vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
-    GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, offset);
-    if (needs_null_check) {
-      MaybeRecordImplicitNullCheck(instruction);
-    }
-  }
-
-  // Object* ref = ref_addr->AsMirrorPtr()
-  GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
-}
-
 void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
   // The following condition is a compile-time one, so it does not have a run-time cost.
   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) {
@@ -9215,7 +8859,7 @@
 }
 
 void CodeGeneratorARMVIXL::EmitBakerReadBarrierBne(uint32_t custom_data) {
-  ExactAssemblyScope eas(GetVIXLAssembler(), 1 * k32BitT32InstructionSizeInBytes);
+  DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
   if (Runtime::Current()->UseJitCompilation()) {
     auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data);
     vixl::aarch32::Label* slow_path_entry = &it->second.label;
@@ -9774,7 +9418,8 @@
       __ Bx(ep_reg);                          // Jump to the entrypoint's array switch case.
       break;
     }
-    case BakerReadBarrierKind::kGcRoot: {
+    case BakerReadBarrierKind::kGcRoot:
+    case BakerReadBarrierKind::kUnsafeCas: {
       // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
       // and it does not have a forwarding address), call the correct introspection entrypoint;
       // otherwise return the reference (or the extracted forwarding address).
@@ -9802,10 +9447,14 @@
       __ B(hs, &forwarding_address);
       vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
       // Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister
-      // to art_quick_read_barrier_mark_introspection_gc_roots.
-      int32_t entrypoint_offset = (width == BakerReadBarrierWidth::kWide)
-          ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
-          : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET;
+      // to one of art_quick_read_barrier_mark_introspection_{gc_roots_{wide,narrow},unsafe_cas}.
+      DCHECK(kind != BakerReadBarrierKind::kUnsafeCas || width == BakerReadBarrierWidth::kWide);
+      int32_t entrypoint_offset =
+          (kind == BakerReadBarrierKind::kGcRoot)
+              ? (width == BakerReadBarrierWidth::kWide)
+                  ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
+                  : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET
+              : BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ENTRYPOINT_OFFSET;
       __ Add(ep_reg, ep_reg, Operand(entrypoint_offset));
       __ Mov(ip, root_reg);
       __ Bx(ep_reg);
@@ -9851,6 +9500,12 @@
         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
                   BakerReadBarrierSecondRegField::Decode(encoded_data));
         break;
+      case BakerReadBarrierKind::kUnsafeCas:
+        oss << "UnsafeCas_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
+        DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+                  BakerReadBarrierSecondRegField::Decode(encoded_data));
+        DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
+        break;
     }
     *debug_name = oss.str();
   }
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 2fd18ca..cb131a7 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -622,6 +622,11 @@
                                vixl::aarch32::Register obj,
                                uint32_t offset,
                                ReadBarrierOption read_barrier_option);
+  // Generate ADD for UnsafeCASObject to reconstruct the old value from
+  // `old_value - expected` and mark it with Baker read barrier.
+  void GenerateUnsafeCasOldValueAddWithBakerReadBarrier(vixl::aarch32::Register old_value,
+                                                        vixl::aarch32::Register adjusted_old_value,
+                                                        vixl::aarch32::Register expected);
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   // Overload suitable for Unsafe.getObject/-Volatile() intrinsic.
@@ -647,35 +652,6 @@
                                              Location temp,
                                              bool needs_null_check);
 
-  // Generate code checking whether the the reference field at the
-  // address `obj + field_offset`, held by object `obj`, needs to be
-  // marked, and if so, marking it and updating the field within `obj`
-  // with the marked value.
-  //
-  // This routine is used for the implementation of the
-  // UnsafeCASObject intrinsic with Baker read barriers.
-  //
-  // This method has a structure similar to
-  // GenerateReferenceLoadWithBakerReadBarrier, but note that argument
-  // `ref` is only as a temporary here, and thus its value should not
-  // be used afterwards.
-  void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
-                                                Location ref,
-                                                vixl::aarch32::Register obj,
-                                                Location field_offset,
-                                                Location temp,
-                                                bool needs_null_check,
-                                                vixl::aarch32::Register temp2);
-
-  // Generate a heap reference load (with no read barrier).
-  void GenerateRawReferenceLoad(HInstruction* instruction,
-                                Location ref,
-                                vixl::aarch32::Register obj,
-                                uint32_t offset,
-                                Location index,
-                                ScaleFactor scale_factor,
-                                bool needs_null_check);
-
   // Emit code checking the status of the Marking Register, and
   // aborting the program if MR does not match the value stored in the
   // art::Thread object. Code is only emitted in debug mode and if
@@ -772,10 +748,11 @@
   // Encoding of thunk type and data for link-time generated thunks for Baker read barriers.
 
   enum class BakerReadBarrierKind : uint8_t {
-    kField,   // Field get or array get with constant offset (i.e. constant index).
-    kArray,   // Array get with index in register.
-    kGcRoot,  // GC root load.
-    kLast = kGcRoot
+    kField,       // Field get or array get with constant offset (i.e. constant index).
+    kArray,       // Array get with index in register.
+    kGcRoot,      // GC root load.
+    kUnsafeCas,   // UnsafeCASObject intrinsic.
+    kLast = kUnsafeCas
   };
 
   enum class BakerReadBarrierWidth : uint8_t {
@@ -842,6 +819,14 @@
            BakerReadBarrierWidthField::Encode(width);
   }
 
+  static uint32_t EncodeBakerReadBarrierUnsafeCasData(uint32_t root_reg) {
+    CheckValidReg(root_reg);
+    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kUnsafeCas) |
+           BakerReadBarrierFirstRegField::Encode(root_reg) |
+           BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg) |
+           BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide);
+  }
+
   void CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler,
                                     uint32_t encoded_data,
                                     /*out*/ std::string* debug_name);
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 53b0aa2..74a779d 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -936,9 +936,7 @@
                codegen_);
 }
 
-static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator,
-                                                HInvoke* invoke,
-                                                DataType::Type type) {
+static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator, HInvoke* invoke) {
   bool can_call = kEmitCompilerReadBarrier &&
       kUseBakerReadBarrier &&
       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
@@ -948,20 +946,16 @@
                                           ? LocationSummary::kCallOnSlowPath
                                           : LocationSummary::kNoCall,
                                       kIntrinsified);
+  if (can_call) {
+    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  }
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
   locations->SetInAt(3, Location::RequiresRegister());
   locations->SetInAt(4, Location::RequiresRegister());
 
-  // If heap poisoning is enabled, we don't want the unpoisoning
-  // operations to potentially clobber the output. Likewise when
-  // emitting a (Baker) read barrier, which may call.
-  Location::OutputOverlap overlaps =
-      ((kPoisonHeapReferences && type == DataType::Type::kReference) || can_call)
-      ? Location::kOutputOverlap
-      : Location::kNoOutputOverlap;
-  locations->SetOut(Location::RequiresRegister(), overlaps);
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 
   // Temporary registers used in CAS. In the object case
   // (UnsafeCASObject intrinsic), these are also used for
@@ -970,24 +964,92 @@
   locations->AddTemp(Location::RequiresRegister());  // Temp 1.
 }
 
+class BakerReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+  explicit BakerReadBarrierCasSlowPathARMVIXL(HInvoke* invoke)
+      : SlowPathCodeARMVIXL(invoke) {}
+
+  const char* GetDescription() const OVERRIDE { return "BakerReadBarrierCasSlowPathARMVIXL"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+    ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
+    __ Bind(GetEntryLabel());
+
+    LocationSummary* locations = instruction_->GetLocations();
+    vixl32::Register base = InputRegisterAt(instruction_, 1);           // Object pointer.
+    vixl32::Register offset = LowRegisterFrom(locations->InAt(2));      // Offset (discard high 4B).
+    vixl32::Register expected = InputRegisterAt(instruction_, 3);       // Expected.
+    vixl32::Register value = InputRegisterAt(instruction_, 4);          // Value.
+
+    vixl32::Register tmp_ptr = RegisterFrom(locations->GetTemp(0));     // Pointer to actual memory.
+    vixl32::Register tmp = RegisterFrom(locations->GetTemp(1));         // Temporary.
+
+    // The `tmp` is initialized to `[tmp_ptr] - expected` in the main path. Reconstruct
+    // and mark the old value and compare with `expected`. We clobber `tmp_ptr` in the
+    // process due to lack of other temps suitable for the read barrier.
+    arm_codegen->GenerateUnsafeCasOldValueAddWithBakerReadBarrier(tmp_ptr, tmp, expected);
+    __ Cmp(tmp_ptr, expected);
+    __ B(ne, GetExitLabel());
+
+    // The old value we have read did not match `expected` (which is always a to-space reference)
+    // but after the read barrier in GenerateUnsafeCasOldValueAddWithBakerReadBarrier() the marked
+    // to-space value matched, so the old value must be a from-space reference to the same object.
+    // Do the same CAS loop as the main path but check for both `expected` and the unmarked
+    // old value representing the to-space and from-space references for the same object.
+
+    UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+    vixl32::Register adjusted_old_value = temps.Acquire();      // For saved `tmp` from main path.
+
+    // Recalculate the `tmp_ptr` clobbered above and store the `adjusted_old_value`, i.e. IP.
+    __ Add(tmp_ptr, base, offset);
+    __ Mov(adjusted_old_value, tmp);
+
+    // do {
+    //   tmp = [r_ptr] - expected;
+    // } while ((tmp == 0 || tmp == adjusted_old_value) && failure([r_ptr] <- r_new_value));
+    // result = (tmp == 0 || tmp == adjusted_old_value);
+
+    vixl32::Label loop_head;
+    __ Bind(&loop_head);
+    __ Ldrex(tmp, MemOperand(tmp_ptr));  // This can now load null stored by another thread.
+    assembler->MaybeUnpoisonHeapReference(tmp);
+    __ Subs(tmp, tmp, expected);         // Use SUBS to get non-zero value if both compares fail.
+    {
+      // If the newly loaded value did not match `expected`, compare with `adjusted_old_value`.
+      ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
+      __ it(ne);
+      __ cmp(ne, tmp, adjusted_old_value);
+    }
+    __ B(ne, GetExitLabel());
+    assembler->MaybePoisonHeapReference(value);
+    __ Strex(tmp, value, MemOperand(tmp_ptr));
+    assembler->MaybeUnpoisonHeapReference(value);
+    __ Cmp(tmp, 0);
+    __ B(ne, &loop_head, /* far_target */ false);
+    __ B(GetExitLabel());
+  }
+};
+
 static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMVIXL* codegen) {
   DCHECK_NE(type, DataType::Type::kInt64);
 
   ArmVIXLAssembler* assembler = codegen->GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
-  Location out_loc = locations->Out();
   vixl32::Register out = OutputRegister(invoke);                      // Boolean result.
 
   vixl32::Register base = InputRegisterAt(invoke, 1);                 // Object pointer.
-  Location offset_loc = locations->InAt(2);
-  vixl32::Register offset = LowRegisterFrom(offset_loc);              // Offset (discard high 4B).
+  vixl32::Register offset = LowRegisterFrom(locations->InAt(2));      // Offset (discard high 4B).
   vixl32::Register expected = InputRegisterAt(invoke, 3);             // Expected.
   vixl32::Register value = InputRegisterAt(invoke, 4);                // Value.
 
-  Location tmp_ptr_loc = locations->GetTemp(0);
-  vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc);               // Pointer to actual memory.
-  vixl32::Register tmp = RegisterFrom(locations->GetTemp(1));         // Value in memory.
+  vixl32::Register tmp_ptr = RegisterFrom(locations->GetTemp(0));     // Pointer to actual memory.
+  vixl32::Register tmp = RegisterFrom(locations->GetTemp(1));         // Temporary.
+
+  vixl32::Label loop_exit_label;
+  vixl32::Label* loop_exit = &loop_exit_label;
+  vixl32::Label* failure = &loop_exit_label;
 
   if (type == DataType::Type::kReference) {
     // The only read barrier implementation supporting the
@@ -1000,87 +1062,62 @@
     codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
 
     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
-      // Need to make sure the reference stored in the field is a to-space
-      // one before attempting the CAS or the CAS could fail incorrectly.
-      codegen->UpdateReferenceFieldWithBakerReadBarrier(
-          invoke,
-          out_loc,  // Unused, used only as a "temporary" within the read barrier.
-          base,
-          /* field_offset */ offset_loc,
-          tmp_ptr_loc,
-          /* needs_null_check */ false,
-          tmp);
+      // If marking, check if the stored reference is a from-space reference to the same
+      // object as the to-space reference `expected`. If so, perform a custom CAS loop.
+      BakerReadBarrierCasSlowPathARMVIXL* slow_path =
+          new (codegen->GetScopedAllocator()) BakerReadBarrierCasSlowPathARMVIXL(invoke);
+      codegen->AddSlowPath(slow_path);
+      failure = slow_path->GetEntryLabel();
+      loop_exit = slow_path->GetExitLabel();
     }
   }
 
   // Prevent reordering with prior memory operations.
   // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
-  // latter allows a preceding load to be delayed past the STXR
+  // latter allows a preceding load to be delayed past the STREX
   // instruction below.
   __ Dmb(vixl32::ISH);
 
   __ Add(tmp_ptr, base, offset);
 
-  if (kPoisonHeapReferences && type == DataType::Type::kReference) {
-    codegen->GetAssembler()->PoisonHeapReference(expected);
-    if (value.Is(expected)) {
-      // Do not poison `value`, as it is the same register as
-      // `expected`, which has just been poisoned.
-    } else {
-      codegen->GetAssembler()->PoisonHeapReference(value);
-    }
-  }
-
   // do {
   //   tmp = [r_ptr] - expected;
   // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
-  // result = tmp != 0;
+  // result = tmp == 0;
 
   vixl32::Label loop_head;
   __ Bind(&loop_head);
-
   __ Ldrex(tmp, MemOperand(tmp_ptr));
-
-  __ Subs(tmp, tmp, expected);
-
-  {
-    ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
-                           3 * kMaxInstructionSizeInBytes,
-                           CodeBufferCheckScope::kMaximumSize);
-
-    __ itt(eq);
-    __ strex(eq, tmp, value, MemOperand(tmp_ptr));
-    __ cmp(eq, tmp, 1);
+  if (type == DataType::Type::kReference) {
+    assembler->MaybeUnpoisonHeapReference(tmp);
   }
+  __ Subs(tmp, tmp, expected);
+  __ B(ne, failure, (failure == loop_exit) ? kNear : kBranchWithoutHint);
+  if (type == DataType::Type::kReference) {
+    assembler->MaybePoisonHeapReference(value);
+  }
+  __ Strex(tmp, value, MemOperand(tmp_ptr));
+  if (type == DataType::Type::kReference) {
+    assembler->MaybeUnpoisonHeapReference(value);
+  }
+  __ Cmp(tmp, 0);
+  __ B(ne, &loop_head, /* far_target */ false);
 
-  __ B(eq, &loop_head, /* far_target */ false);
+  __ Bind(loop_exit);
 
   __ Dmb(vixl32::ISH);
 
-  __ Rsbs(out, tmp, 1);
+  // out = tmp == 0.
+  __ Clz(out, tmp);
+  __ Lsr(out, out, WhichPowerOf2(out.GetSizeInBits()));
 
-  {
-    ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
-                           2 * kMaxInstructionSizeInBytes,
-                           CodeBufferCheckScope::kMaximumSize);
-
-    __ it(cc);
-    __ mov(cc, out, 0);
-  }
-
-  if (kPoisonHeapReferences && type == DataType::Type::kReference) {
-    codegen->GetAssembler()->UnpoisonHeapReference(expected);
-    if (value.Is(expected)) {
-      // Do not unpoison `value`, as it is the same register as
-      // `expected`, which has just been unpoisoned.
-    } else {
-      codegen->GetAssembler()->UnpoisonHeapReference(value);
-    }
+  if (type == DataType::Type::kReference) {
+    codegen->MaybeGenerateMarkingRegisterCheck(/* code */ 128);
   }
 }
 
 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
-  CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke, DataType::Type::kInt32);
+  CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke);
 }
 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
   // The only read barrier implementation supporting the
@@ -1089,7 +1126,7 @@
     return;
   }
 
-  CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke, DataType::Type::kReference);
+  CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke);
 }
 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
   GenCas(invoke, DataType::Type::kInt32, codegen_);
diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h
index 7123ae7..9a01656 100644
--- a/runtime/arch/arm/asm_support_arm.h
+++ b/runtime/arch/arm/asm_support_arm.h
@@ -37,6 +37,9 @@
 // The offset from art_quick_read_barrier_mark_introspection to the array switch cases,
 // i.e. art_quick_read_barrier_mark_introspection_arrays.
 #define BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET 0x100
+// The offset from art_quick_read_barrier_mark_introspection to the entrypoint for the
+// UnsafeCASObject intrinsic, i.e. art_quick_read_barrier_mark_introspection_unsafe_cas.
+#define BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ENTRYPOINT_OFFSET 0x180
 
 // The offset of the reference load LDR from the return address in LR for field loads.
 #ifdef USE_HEAP_POISONING
@@ -55,5 +58,7 @@
 // The offset of the reference load LDR from the return address in LR for GC root loads.
 #define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET -8
 #define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET -6
+// The offset of the ADD from the return address in LR for UnsafeCASObject intrinsic.
+#define BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ADD_OFFSET -8
 
 #endif  // ART_RUNTIME_ARCH_ARM_ASM_SUPPORT_ARM_H_
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index b4e9036..2c5465e 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -59,6 +59,7 @@
 extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots_wide(mirror::Object*);
 extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots_narrow(
     mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_unsafe_cas(mirror::Object*);
 
 // Used by soft float.
 // Single-precision FP arithmetics.
@@ -113,6 +114,10 @@
         reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_gc_roots_narrow) -
         reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection);
     DCHECK_EQ(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET, gc_roots_narrow_diff);
+    intptr_t unsafe_cas_diff =
+        reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_unsafe_cas) -
+        reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection);
+    DCHECK_EQ(BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ENTRYPOINT_OFFSET, unsafe_cas_diff);
     // The register 12, i.e. IP, is reserved, so there is no art_quick_read_barrier_mark_reg12.
     // We're using the entry to hold a pointer to the introspection entrypoint instead.
     qpoints->pReadBarrierMarkReg12 =
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index c86baa1..8f56430 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -2575,6 +2575,12 @@
      * The thunk also performs all the fast-path checks, so we need just the
      * slow path.
      *
+     * The UnsafeCASObject intrinsic is similar to the GC roots wide approach
+     * but using ADD (register, T3) instead of the LDR (immediate, T3), so the
+     * destination register is in bits 8-11 rather than 12-15. Therefore it has
+     * its own entrypoint, art_quick_read_barrier_mark_introspection_unsafe_cas
+     * at the offset BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ENTRYPOINT_OFFSET.
+     *
      * The code structure is
      *   art_quick_read_barrier_mark_introspection:                   // @0x00
      *     Up to 32 bytes code for main entrypoint fast-path code for fields
@@ -2610,6 +2616,9 @@
      *     return switch.
      *   art_quick_read_barrier_mark_introspection_arrays:            // @0x100
      *     Exactly 128 bytes for array load switch cases (16x2 instructions).
+     *   art_quick_read_barrier_mark_introspection_unsafe_cas:        // @0x180
+     *     UnsafeCASObject intrinsic entrypoint for ADD (register) encoding T3
+     *     (6 bytes). Loads the return register and jumps to the runtime call.
      */
 #if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
     .balign 512
@@ -2669,7 +2678,6 @@
     BRBMI_RUNTIME_CALL
     b       .Lmark_introspection_return_switch
 
-
     .balign 256
     .thumb_func
     .type art_quick_read_barrier_mark_introspection_arrays, #function
@@ -2677,6 +2685,19 @@
     .global art_quick_read_barrier_mark_introspection_arrays
 art_quick_read_barrier_mark_introspection_arrays:
     BRBMI_FOR_REGISTERS BRBMI_ARRAY_LOAD, BRBMI_BKPT_FILL_8B
+
+    .balign 8
+    .thumb_func
+    .type art_quick_read_barrier_mark_introspection_unsafe_cas, #function
+    .hidden art_quick_read_barrier_mark_introspection_unsafe_cas
+    .global art_quick_read_barrier_mark_introspection_unsafe_cas
+art_quick_read_barrier_mark_introspection_unsafe_cas:
+    // Load the byte of the ADD instruction that contains Rd. Adjust for the thumb state in LR.
+    // The ADD (register, T3) is |11101011000|S|Rn|(0)imm3|Rd|imm2|type|Rm| and we're using
+    // no shift (type=0, imm2=0, imm3=0), so the byte we read here, i.e. |(0)imm3|Rd|,
+    // contains only the register number, the top 4 bits are 0.
+    ldrb    rMR, [lr, #(-1 + BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ADD_OFFSET + 3)]
+    b .Lmark_introspection_runtime_call
 END art_quick_read_barrier_mark_introspection
 #else  // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
 ENTRY art_quick_read_barrier_mark_introspection
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 7b888b1..044c4c2 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -1142,10 +1142,6 @@
     DCHECK(subtype_check_lock_ == nullptr);
     subtype_check_lock_ = new Mutex("SubtypeCheck lock", current_lock_level);
 
-    UPDATE_CURRENT_LOCK_LEVEL(kCHALock);
-    DCHECK(cha_lock_ == nullptr);
-    cha_lock_ = new Mutex("CHA lock", current_lock_level);
-
     UPDATE_CURRENT_LOCK_LEVEL(kClassLinkerClassesLock);
     DCHECK(classlinker_classes_lock_ == nullptr);
     classlinker_classes_lock_ = new ReaderWriterMutex("ClassLinker classes lock",
@@ -1226,6 +1222,10 @@
     DCHECK(custom_tls_lock_ == nullptr);
     custom_tls_lock_ = new Mutex("Thread::custom_tls_ lock", current_lock_level);
 
+    UPDATE_CURRENT_LOCK_LEVEL(kCHALock);
+    DCHECK(cha_lock_ == nullptr);
+    cha_lock_ = new Mutex("CHA lock", current_lock_level);
+
     UPDATE_CURRENT_LOCK_LEVEL(kNativeDebugInterfaceLock);
     DCHECK(native_debug_interface_lock_ == nullptr);
     native_debug_interface_lock_ = new Mutex("Native debug interface lock", current_lock_level);
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index af2e7b2..fba209a 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -72,6 +72,7 @@
   kJdwpSocketLock,
   kRegionSpaceRegionLock,
   kMarkSweepMarkStackLock,
+  kCHALock,
   kJitCodeCacheLock,
   kRosAllocGlobalLock,
   kRosAllocBracketLock,
@@ -109,7 +110,6 @@
   kMonitorPoolLock,
   kClassLinkerClassesLock,  // TODO rename.
   kDexToDexCompilerLock,
-  kCHALock,
   kSubtypeCheckLock,
   kBreakpointLock,
   kMonitorLock,
@@ -661,14 +661,11 @@
   // TODO: improve name, perhaps instrumentation_update_lock_.
   static Mutex* deoptimization_lock_ ACQUIRED_AFTER(alloc_tracker_lock_);
 
-  // Guards Class Hierarchy Analysis (CHA).
-  static Mutex* cha_lock_ ACQUIRED_AFTER(deoptimization_lock_);
-
   // Guard the update of the SubtypeCheck data stores in each Class::status_ field.
   // This lock is used in SubtypeCheck methods which are the interface for
   // any SubtypeCheck-mutating methods.
   // In Class::IsSubClass, the lock is not required since it does not update the SubtypeCheck data.
-  static Mutex* subtype_check_lock_ ACQUIRED_AFTER(cha_lock_);
+  static Mutex* subtype_check_lock_ ACQUIRED_AFTER(deoptimization_lock_);
 
   // The thread_list_lock_ guards ThreadList::list_. It is also commonly held to stop threads
   // attaching and detaching.
@@ -745,11 +742,14 @@
   // GetThreadLocalStorage.
   static Mutex* custom_tls_lock_ ACQUIRED_AFTER(jni_function_table_lock_);
 
+  // Guards Class Hierarchy Analysis (CHA).
+  static Mutex* cha_lock_ ACQUIRED_AFTER(custom_tls_lock_);
+
   // When declaring any Mutex add BOTTOM_MUTEX_ACQUIRED_AFTER to use annotalysis to check the code
   // doesn't try to acquire a higher level Mutex. NB Due to the way the annotalysis works this
   // actually only encodes the mutex being below jni_function_table_lock_ although having
   // kGenericBottomLock level is lower than this.
-  #define BOTTOM_MUTEX_ACQUIRED_AFTER ACQUIRED_AFTER(art::Locks::custom_tls_lock_)
+  #define BOTTOM_MUTEX_ACQUIRED_AFTER ACQUIRED_AFTER(art::Locks::cha_lock_)
 
   // Have an exclusive aborting thread.
   static Mutex* abort_lock_ ACQUIRED_AFTER(custom_tls_lock_);
diff --git a/runtime/cha.cc b/runtime/cha.cc
index ccbe066..ce84e8c 100644
--- a/runtime/cha.cc
+++ b/runtime/cha.cc
@@ -636,38 +636,54 @@
       // We do this under cha_lock_. Committing code also grabs this lock to
       // make sure the code is only committed when all single-implementation
       // assumptions are still true.
-      MutexLock cha_mu(self, *Locks::cha_lock_);
-      // Invalidate compiled methods that assume some virtual calls have only
-      // single implementations.
-      for (ArtMethod* invalidated : invalidated_single_impl_methods) {
-        if (!invalidated->HasSingleImplementation()) {
-          // It might have been invalidated already when other class linking is
-          // going on.
-          continue;
-        }
-        invalidated->SetHasSingleImplementation(false);
-        if (invalidated->IsAbstract()) {
-          // Clear the single implementation method.
-          invalidated->SetSingleImplementation(nullptr, image_pointer_size);
-        }
+      std::vector<std::pair<ArtMethod*, OatQuickMethodHeader*>> headers;
+      {
+        MutexLock cha_mu(self, *Locks::cha_lock_);
+        // Invalidate compiled methods that assume some virtual calls have only
+        // single implementations.
+        for (ArtMethod* invalidated : invalidated_single_impl_methods) {
+          if (!invalidated->HasSingleImplementation()) {
+            // It might have been invalidated already when other class linking is
+            // going on.
+            continue;
+          }
+          invalidated->SetHasSingleImplementation(false);
+          if (invalidated->IsAbstract()) {
+            // Clear the single implementation method.
+            invalidated->SetSingleImplementation(nullptr, image_pointer_size);
+          }
 
-        if (runtime->IsAotCompiler()) {
-          // No need to invalidate any compiled code as the AotCompiler doesn't
-          // run any code.
-          continue;
-        }
+          if (runtime->IsAotCompiler()) {
+            // No need to invalidate any compiled code as the AotCompiler doesn't
+            // run any code.
+            continue;
+          }
 
-        // Invalidate all dependents.
-        for (const auto& dependent : GetDependents(invalidated)) {
-          ArtMethod* method = dependent.first;;
-          OatQuickMethodHeader* method_header = dependent.second;
-          VLOG(class_linker) << "CHA invalidated compiled code for " << method->PrettyMethod();
-          DCHECK(runtime->UseJitCompilation());
-          runtime->GetJit()->GetCodeCache()->InvalidateCompiledCodeFor(
-              method, method_header);
-          dependent_method_headers.insert(method_header);
+          // Invalidate all dependents.
+          for (const auto& dependent : GetDependents(invalidated)) {
+            ArtMethod* method = dependent.first;;
+            OatQuickMethodHeader* method_header = dependent.second;
+            VLOG(class_linker) << "CHA invalidated compiled code for " << method->PrettyMethod();
+            DCHECK(runtime->UseJitCompilation());
+            // We need to call JitCodeCache::InvalidateCompiledCodeFor but we cannot do it here
+            // since it would run into problems with lock-ordering. We don't want to re-order the
+            // locks since that would make code-commit racy.
+            headers.push_back({method, method_header});
+            dependent_method_headers.insert(method_header);
+          }
+          RemoveAllDependenciesFor(invalidated);
         }
-        RemoveAllDependenciesFor(invalidated);
+      }
+      // Since we are still loading the class that invalidated the code it's fine we have this after
+      // getting rid of the dependency. Any calls would need to be with the old version (since the
+      // new one isn't loaded yet) which still works fine. We will deoptimize just after this to
+      // ensure everything gets the new state.
+      jit::Jit* jit = Runtime::Current()->GetJit();
+      if (jit != nullptr) {
+        jit::JitCodeCache* code_cache = jit->GetCodeCache();
+        for (const auto& pair : headers) {
+          code_cache->InvalidateCompiledCodeFor(pair.first, pair.second);
+        }
       }
     }
 
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
index cde5dc7..783f2fc 100644
--- a/runtime/gc/collector/concurrent_copying-inl.h
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -151,7 +151,9 @@
       case space::RegionSpace::RegionType::kRegionTypeUnevacFromSpace:
         return MarkUnevacFromSpaceRegion(self, from_ref, region_space_bitmap_);
       default:
-        // The reference is in an unused region.
+        // The reference is in an unused region. Remove memory protection from
+        // the region space and log debugging information.
+        region_space_->Unprotect();
         LOG(FATAL_WITHOUT_ABORT) << DumpHeapReference(holder, offset, from_ref);
         region_space_->DumpNonFreeRegions(LOG_STREAM(FATAL_WITHOUT_ABORT));
         heap_->GetVerification()->LogHeapCorruption(holder, offset, from_ref, /* fatal */ true);
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 07abbfc..7688b54 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -1873,17 +1873,20 @@
       } else if (type == RegionType::kRegionTypeUnevacFromSpace) {
         if (!IsMarkedInUnevacFromSpace(ref)) {
           LOG(FATAL_WITHOUT_ABORT) << "Found unmarked reference in unevac from-space:";
+          // Remove memory protection from the region space and log debugging information.
+          region_space_->Unprotect();
           LOG(FATAL_WITHOUT_ABORT) << DumpHeapReference(obj, offset, ref);
         }
         CHECK(IsMarkedInUnevacFromSpace(ref)) << ref;
      } else {
         // Not OK: either a from-space ref or a reference in an unused region.
-        // Do extra logging.
         if (type == RegionType::kRegionTypeFromSpace) {
           LOG(FATAL_WITHOUT_ABORT) << "Found from-space reference:";
         } else {
           LOG(FATAL_WITHOUT_ABORT) << "Found reference in region with type " << type << ":";
         }
+        // Remove memory protection from the region space and log debugging information.
+        region_space_->Unprotect();
         LOG(FATAL_WITHOUT_ABORT) << DumpHeapReference(obj, offset, ref);
         if (obj != nullptr) {
           LogFromSpaceRefHolder(obj, offset);
@@ -1951,17 +1954,20 @@
       } else if (type == RegionType::kRegionTypeUnevacFromSpace) {
         if (!IsMarkedInUnevacFromSpace(ref)) {
           LOG(FATAL_WITHOUT_ABORT) << "Found unmarked reference in unevac from-space:";
+          // Remove memory protection from the region space and log debugging information.
+          region_space_->Unprotect();
           LOG(FATAL_WITHOUT_ABORT) << DumpGcRoot(ref);
         }
         CHECK(IsMarkedInUnevacFromSpace(ref)) << ref;
       } else {
         // Not OK: either a from-space ref or a reference in an unused region.
-        // Do extra logging.
         if (type == RegionType::kRegionTypeFromSpace) {
           LOG(FATAL_WITHOUT_ABORT) << "Found from-space reference:";
         } else {
           LOG(FATAL_WITHOUT_ABORT) << "Found reference in region with type " << type << ":";
         }
+        // Remove memory protection from the region space and log debugging information.
+        region_space_->Unprotect();
         LOG(FATAL_WITHOUT_ABORT) << DumpGcRoot(ref);
         if (gc_root_source == nullptr) {
           // No info.
@@ -2361,6 +2367,8 @@
   // from a previous GC that is either inside or outside the allocated region.
   mirror::Class* klass = from_ref->GetClass<kVerifyNone, kWithoutReadBarrier>();
   if (UNLIKELY(klass == nullptr)) {
+    // Remove memory protection from the region space and log debugging information.
+    region_space_->Unprotect();
     heap_->GetVerification()->LogHeapCorruption(holder, offset, from_ref, /* fatal */ true);
   }
   // There must not be a read barrier to avoid nested RB that might violate the to-space invariant.
@@ -2640,8 +2648,11 @@
         }
       }
       if (is_los && !IsAligned<kPageSize>(ref)) {
-        // Ref is a large object that is not aligned, it must be heap corruption. Dump data before
-        // AtomicSetReadBarrierState since it will fault if the address is not valid.
+        // Ref is a large object that is not aligned, it must be heap
+        // corruption. Remove memory protection and dump data before
+        // AtomicSetReadBarrierState since it will fault if the address is not
+        // valid.
+        region_space_->Unprotect();
         heap_->GetVerification()->LogHeapCorruption(holder, offset, ref, /* fatal */ true);
       }
       // Not marked nor on the allocation stack. Try to mark it.
diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc
index 0569092..6d494fa 100644
--- a/runtime/gc/space/region_space.cc
+++ b/runtime/gc/space/region_space.cc
@@ -30,9 +30,8 @@
 // value of the region size, evaculate the region.
 static constexpr uint kEvacuateLivePercentThreshold = 75U;
 
-// Whether we protect the cleared regions.
-// Only protect for target builds to prevent flaky test failures (b/63131961).
-static constexpr bool kProtectClearedRegions = kIsTargetBuild;
+// Whether we protect the unused and cleared regions.
+static constexpr bool kProtectClearedRegions = true;
 
 // Wether we poison memory areas occupied by dead objects in unevacuated regions.
 static constexpr bool kPoisonDeadObjectsInUnevacuatedRegions = true;
@@ -132,6 +131,8 @@
   DCHECK(full_region_.IsAllocated());
   size_t ignored;
   DCHECK(full_region_.Alloc(kAlignment, &ignored, nullptr, &ignored) == nullptr);
+  // Protect the whole region space from the start.
+  Protect();
 }
 
 size_t RegionSpace::FromSpaceSize() {
@@ -552,6 +553,18 @@
   evac_region_ = &full_region_;
 }
 
+void RegionSpace::Protect() {
+  if (kProtectClearedRegions) {
+    CheckedCall(mprotect, __FUNCTION__, Begin(), Size(), PROT_NONE);
+  }
+}
+
+void RegionSpace::Unprotect() {
+  if (kProtectClearedRegions) {
+    CheckedCall(mprotect, __FUNCTION__, Begin(), Size(), PROT_READ | PROT_WRITE);
+  }
+}
+
 void RegionSpace::ClampGrowthLimit(size_t new_capacity) {
   MutexLock mu(Thread::Current(), region_lock_);
   CHECK_LE(new_capacity, NonGrowthLimitCapacity());
diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h
index 90f1f1d..ef2e137 100644
--- a/runtime/gc/space/region_space.h
+++ b/runtime/gc/space/region_space.h
@@ -108,6 +108,17 @@
 
   void Clear() OVERRIDE REQUIRES(!region_lock_);
 
+  // Remove read and write memory protection from the whole region space,
+  // i.e. make memory pages backing the region area not readable and not
+  // writable.
+  void Protect();
+
+  // Remove memory protection from the whole region space, i.e. make memory
+  // pages backing the region area readable and writable. This method is useful
+  // to avoid page protection faults when dumping information about an invalid
+  // reference.
+  void Unprotect();
+
   // Change the non growth limit capacity to new capacity by shrinking or expanding the map.
   // Currently, only shrinking is supported.
   // Unlike implementations of this function in other spaces, we need to pass
diff --git a/runtime/interpreter/mterp/arm/op_iget.S b/runtime/interpreter/mterp/arm/op_iget.S
index c7f777b..c45880b 100644
--- a/runtime/interpreter/mterp/arm/op_iget.S
+++ b/runtime/interpreter/mterp/arm/op_iget.S
@@ -1,4 +1,4 @@
-%default { "is_object":"0", "helper":"artGet32InstanceFromCode"}
+%default { "is_object":"0", "helper":"artGet32InstanceFromMterp"}
     /*
      * General instance field get.
      *
diff --git a/runtime/interpreter/mterp/arm/op_iget_boolean.S b/runtime/interpreter/mterp/arm/op_iget_boolean.S
index 628f40a..9da6c8a 100644
--- a/runtime/interpreter/mterp/arm/op_iget_boolean.S
+++ b/runtime/interpreter/mterp/arm/op_iget_boolean.S
@@ -1 +1 @@
-%include "arm/op_iget.S" { "helper":"artGetBooleanInstanceFromCode" }
+%include "arm/op_iget.S" { "helper":"artGetBooleanInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_byte.S b/runtime/interpreter/mterp/arm/op_iget_byte.S
index c4e08e2..3d1f52d 100644
--- a/runtime/interpreter/mterp/arm/op_iget_byte.S
+++ b/runtime/interpreter/mterp/arm/op_iget_byte.S
@@ -1 +1 @@
-%include "arm/op_iget.S" { "helper":"artGetByteInstanceFromCode" }
+%include "arm/op_iget.S" { "helper":"artGetByteInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_char.S b/runtime/interpreter/mterp/arm/op_iget_char.S
index 5e8da66..6b7154d 100644
--- a/runtime/interpreter/mterp/arm/op_iget_char.S
+++ b/runtime/interpreter/mterp/arm/op_iget_char.S
@@ -1 +1 @@
-%include "arm/op_iget.S" { "helper":"artGetCharInstanceFromCode" }
+%include "arm/op_iget.S" { "helper":"artGetCharInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_object.S b/runtime/interpreter/mterp/arm/op_iget_object.S
index 1cf2e3c..a35b1c8 100644
--- a/runtime/interpreter/mterp/arm/op_iget_object.S
+++ b/runtime/interpreter/mterp/arm/op_iget_object.S
@@ -1 +1 @@
-%include "arm/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromCode" }
+%include "arm/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_short.S b/runtime/interpreter/mterp/arm/op_iget_short.S
index 460f045..3254c07 100644
--- a/runtime/interpreter/mterp/arm/op_iget_short.S
+++ b/runtime/interpreter/mterp/arm/op_iget_short.S
@@ -1 +1 @@
-%include "arm/op_iget.S" { "helper":"artGetShortInstanceFromCode" }
+%include "arm/op_iget.S" { "helper":"artGetShortInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm/op_iget_wide.S b/runtime/interpreter/mterp/arm/op_iget_wide.S
index e287d51..30405bd 100644
--- a/runtime/interpreter/mterp/arm/op_iget_wide.S
+++ b/runtime/interpreter/mterp/arm/op_iget_wide.S
@@ -9,7 +9,7 @@
     GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
     ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
     mov      r3, rSELF                     @ r3<- self
-    bl       artGet64InstanceFromCode
+    bl       artGet64InstanceFromMterp
     ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     ubfx     r2, rINST, #8, #4             @ r2<- A
     PREFETCH_INST 2
diff --git a/runtime/interpreter/mterp/arm64/op_iget.S b/runtime/interpreter/mterp/arm64/op_iget.S
index 88533bd..d9feac7 100644
--- a/runtime/interpreter/mterp/arm64/op_iget.S
+++ b/runtime/interpreter/mterp/arm64/op_iget.S
@@ -1,4 +1,4 @@
-%default { "extend":"", "is_object":"0", "helper":"artGet32InstanceFromCode"}
+%default { "extend":"", "is_object":"0", "helper":"artGet32InstanceFromMterp"}
     /*
      * General instance field get.
      *
diff --git a/runtime/interpreter/mterp/arm64/op_iget_boolean.S b/runtime/interpreter/mterp/arm64/op_iget_boolean.S
index 36a9b6b..f6ea4dd 100644
--- a/runtime/interpreter/mterp/arm64/op_iget_boolean.S
+++ b/runtime/interpreter/mterp/arm64/op_iget_boolean.S
@@ -1 +1 @@
-%include "arm64/op_iget.S" { "helper":"artGetBooleanInstanceFromCode", "extend":"uxtb w0, w0" }
+%include "arm64/op_iget.S" { "helper":"artGetBooleanInstanceFromMterp", "extend":"uxtb w0, w0" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_byte.S b/runtime/interpreter/mterp/arm64/op_iget_byte.S
index fd3f164..497e2bf 100644
--- a/runtime/interpreter/mterp/arm64/op_iget_byte.S
+++ b/runtime/interpreter/mterp/arm64/op_iget_byte.S
@@ -1 +1 @@
-%include "arm64/op_iget.S" { "helper":"artGetByteInstanceFromCode", "extend":"sxtb w0, w0" }
+%include "arm64/op_iget.S" { "helper":"artGetByteInstanceFromMterp", "extend":"sxtb w0, w0" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_char.S b/runtime/interpreter/mterp/arm64/op_iget_char.S
index ea23275..4669859 100644
--- a/runtime/interpreter/mterp/arm64/op_iget_char.S
+++ b/runtime/interpreter/mterp/arm64/op_iget_char.S
@@ -1 +1 @@
-%include "arm64/op_iget.S" { "helper":"artGetCharInstanceFromCode", "extend":"uxth w0, w0" }
+%include "arm64/op_iget.S" { "helper":"artGetCharInstanceFromMterp", "extend":"uxth w0, w0" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_object.S b/runtime/interpreter/mterp/arm64/op_iget_object.S
index 03be78d..eb7bdea 100644
--- a/runtime/interpreter/mterp/arm64/op_iget_object.S
+++ b/runtime/interpreter/mterp/arm64/op_iget_object.S
@@ -1 +1 @@
-%include "arm64/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromCode" }
+%include "arm64/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_short.S b/runtime/interpreter/mterp/arm64/op_iget_short.S
index c347542..6f0a505 100644
--- a/runtime/interpreter/mterp/arm64/op_iget_short.S
+++ b/runtime/interpreter/mterp/arm64/op_iget_short.S
@@ -1 +1 @@
-%include "arm64/op_iget.S" { "helper":"artGetShortInstanceFromCode", "extend":"sxth w0, w0" }
+%include "arm64/op_iget.S" { "helper":"artGetShortInstanceFromMterp", "extend":"sxth w0, w0" }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_wide.S b/runtime/interpreter/mterp/arm64/op_iget_wide.S
index 9718390..02ef0a7 100644
--- a/runtime/interpreter/mterp/arm64/op_iget_wide.S
+++ b/runtime/interpreter/mterp/arm64/op_iget_wide.S
@@ -9,7 +9,7 @@
     GET_VREG w1, w1                        // w1<- fp[B], the object pointer
     ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
     mov      x3, xSELF                     // w3<- self
-    bl       artGet64InstanceFromCode
+    bl       artGet64InstanceFromMterp
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
diff --git a/runtime/interpreter/mterp/mips/op_iget.S b/runtime/interpreter/mterp/mips/op_iget.S
index 01f42d9..0785ac5 100644
--- a/runtime/interpreter/mterp/mips/op_iget.S
+++ b/runtime/interpreter/mterp/mips/op_iget.S
@@ -1,4 +1,4 @@
-%default { "is_object":"0", "helper":"artGet32InstanceFromCode"}
+%default { "is_object":"0", "helper":"artGet32InstanceFromMterp"}
     /*
      * General instance field get.
      *
diff --git a/runtime/interpreter/mterp/mips/op_iget_boolean.S b/runtime/interpreter/mterp/mips/op_iget_boolean.S
index e03364e..a932c37 100644
--- a/runtime/interpreter/mterp/mips/op_iget_boolean.S
+++ b/runtime/interpreter/mterp/mips/op_iget_boolean.S
@@ -1 +1 @@
-%include "mips/op_iget.S" { "helper":"artGetBooleanInstanceFromCode" }
+%include "mips/op_iget.S" { "helper":"artGetBooleanInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/mips/op_iget_byte.S b/runtime/interpreter/mterp/mips/op_iget_byte.S
index dc87cfe..e498a8c 100644
--- a/runtime/interpreter/mterp/mips/op_iget_byte.S
+++ b/runtime/interpreter/mterp/mips/op_iget_byte.S
@@ -1 +1 @@
-%include "mips/op_iget.S" { "helper":"artGetByteInstanceFromCode" }
+%include "mips/op_iget.S" { "helper":"artGetByteInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/mips/op_iget_char.S b/runtime/interpreter/mterp/mips/op_iget_char.S
index 55f8a93..efd5b99 100644
--- a/runtime/interpreter/mterp/mips/op_iget_char.S
+++ b/runtime/interpreter/mterp/mips/op_iget_char.S
@@ -1 +1 @@
-%include "mips/op_iget.S" { "helper":"artGetCharInstanceFromCode" }
+%include "mips/op_iget.S" { "helper":"artGetCharInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/mips/op_iget_object.S b/runtime/interpreter/mterp/mips/op_iget_object.S
index 11d93a4..8fa96521 100644
--- a/runtime/interpreter/mterp/mips/op_iget_object.S
+++ b/runtime/interpreter/mterp/mips/op_iget_object.S
@@ -1 +1 @@
-%include "mips/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromCode" }
+%include "mips/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/mips/op_iget_short.S b/runtime/interpreter/mterp/mips/op_iget_short.S
index 9086246..efc5de4 100644
--- a/runtime/interpreter/mterp/mips/op_iget_short.S
+++ b/runtime/interpreter/mterp/mips/op_iget_short.S
@@ -1 +1 @@
-%include "mips/op_iget.S" { "helper":"artGetShortInstanceFromCode" }
+%include "mips/op_iget.S" { "helper":"artGetShortInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/mips/op_iget_wide.S b/runtime/interpreter/mterp/mips/op_iget_wide.S
index cf5019e..e1d83a4 100644
--- a/runtime/interpreter/mterp/mips/op_iget_wide.S
+++ b/runtime/interpreter/mterp/mips/op_iget_wide.S
@@ -10,7 +10,7 @@
     GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
     lw    a2, OFF_FP_METHOD(rFP)           # a2 <- referrer
     move  a3, rSELF                        # a3 <- self
-    JAL(artGet64InstanceFromCode)
+    JAL(artGet64InstanceFromMterp)
     lw   a3, THREAD_EXCEPTION_OFFSET(rSELF)
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
diff --git a/runtime/interpreter/mterp/mips64/op_iget.S b/runtime/interpreter/mterp/mips64/op_iget.S
index ade4b31..4158603 100644
--- a/runtime/interpreter/mterp/mips64/op_iget.S
+++ b/runtime/interpreter/mterp/mips64/op_iget.S
@@ -1,4 +1,4 @@
-%default { "is_object":"0", "helper":"artGet32InstanceFromCode"}
+%default { "is_object":"0", "helper":"artGet32InstanceFromMterp"}
     /*
      * General instance field get.
      *
diff --git a/runtime/interpreter/mterp/mips64/op_iget_boolean.S b/runtime/interpreter/mterp/mips64/op_iget_boolean.S
index cb2c8be..e64b798 100644
--- a/runtime/interpreter/mterp/mips64/op_iget_boolean.S
+++ b/runtime/interpreter/mterp/mips64/op_iget_boolean.S
@@ -1 +1 @@
-%include "mips64/op_iget.S" { "helper":"artGetBooleanInstanceFromCode" }
+%include "mips64/op_iget.S" { "helper":"artGetBooleanInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/mips64/op_iget_byte.S b/runtime/interpreter/mterp/mips64/op_iget_byte.S
index 099d8d0..fefe53f 100644
--- a/runtime/interpreter/mterp/mips64/op_iget_byte.S
+++ b/runtime/interpreter/mterp/mips64/op_iget_byte.S
@@ -1 +1 @@
-%include "mips64/op_iget.S" { "helper":"artGetByteInstanceFromCode" }
+%include "mips64/op_iget.S" { "helper":"artGetByteInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/mips64/op_iget_char.S b/runtime/interpreter/mterp/mips64/op_iget_char.S
index 927b7af..9caf40e 100644
--- a/runtime/interpreter/mterp/mips64/op_iget_char.S
+++ b/runtime/interpreter/mterp/mips64/op_iget_char.S
@@ -1 +1 @@
-%include "mips64/op_iget.S" { "helper":"artGetCharInstanceFromCode" }
+%include "mips64/op_iget.S" { "helper":"artGetCharInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/mips64/op_iget_object.S b/runtime/interpreter/mterp/mips64/op_iget_object.S
index c658556..ce3421a 100644
--- a/runtime/interpreter/mterp/mips64/op_iget_object.S
+++ b/runtime/interpreter/mterp/mips64/op_iget_object.S
@@ -1 +1 @@
-%include "mips64/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromCode" }
+%include "mips64/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/mips64/op_iget_short.S b/runtime/interpreter/mterp/mips64/op_iget_short.S
index 28b5093..e2d122d 100644
--- a/runtime/interpreter/mterp/mips64/op_iget_short.S
+++ b/runtime/interpreter/mterp/mips64/op_iget_short.S
@@ -1 +1 @@
-%include "mips64/op_iget.S" { "helper":"artGetShortInstanceFromCode" }
+%include "mips64/op_iget.S" { "helper":"artGetShortInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/mips64/op_iget_wide.S b/runtime/interpreter/mterp/mips64/op_iget_wide.S
index 85cf670..ca793e0 100644
--- a/runtime/interpreter/mterp/mips64/op_iget_wide.S
+++ b/runtime/interpreter/mterp/mips64/op_iget_wide.S
@@ -3,14 +3,14 @@
      *
      * for: iget-wide
      */
-    .extern artGet64InstanceFromCode
+    .extern artGet64InstanceFromMterp
     EXPORT_PC
     lhu      a0, 2(rPC)                 # a0 <- field ref CCCC
     srl      a1, rINST, 12              # a1 <- B
     GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
     ld       a2, OFF_FP_METHOD(rFP)     # a2 <- referrer
     move     a3, rSELF                  # a3 <- self
-    jal      artGet64InstanceFromCode
+    jal      artGet64InstanceFromMterp
     ld       a3, THREAD_EXCEPTION_OFFSET(rSELF)
     ext      a2, rINST, 8, 4            # a2 <- A
     PREFETCH_INST 2
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index e4cc6d3..abbc509 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -681,6 +681,83 @@
   return MterpShouldSwitchInterpreters();
 }
 
+template<typename PrimType, typename RetType, typename Getter, FindFieldType kType>
+NO_INLINE RetType artGetInstanceFromMterp(uint32_t field_idx,
+                                          mirror::Object* obj,
+                                          ArtMethod* referrer,
+                                          Thread* self)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+  StackHandleScope<1> hs(self);
+  HandleWrapper<mirror::Object> h(hs.NewHandleWrapper(&obj));  // GC might move the object.
+  ArtField* field = FindFieldFromCode<kType, /* access_checks */ false>(
+      field_idx, referrer, self, sizeof(PrimType));
+  if (UNLIKELY(field == nullptr)) {
+    return 0;  // Will throw exception by checking with Thread::Current.
+  }
+  if (UNLIKELY(h == nullptr)) {
+    ThrowNullPointerExceptionForFieldAccess(field, /*is_read*/ true);
+    return 0;  // Will throw exception by checking with Thread::Current.
+  }
+  return Getter::Get(obj, field);
+}
+
+template<typename PrimType, typename RetType, typename Getter>
+ALWAYS_INLINE RetType artGetInstanceFromMterpFast(uint32_t field_idx,
+                                                  mirror::Object* obj,
+                                                  ArtMethod* referrer,
+                                                  Thread* self)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+  constexpr bool kIsObject = std::is_same<RetType, mirror::Object*>::value;
+  constexpr FindFieldType kType = kIsObject ? InstanceObjectRead : InstancePrimitiveRead;
+
+  // This effectively inlines the fast path from ArtMethod::GetDexCache.
+  // It avoids non-inlined call which in turn allows elimination of the prologue and epilogue.
+  if (LIKELY(!referrer->IsObsolete())) {
+    // Avoid read barriers, since we need only the pointer to the native (non-movable)
+    // DexCache field array which we can get even through from-space objects.
+    ObjPtr<mirror::Class> klass = referrer->GetDeclaringClass<kWithoutReadBarrier>();
+    mirror::DexCache* dex_cache = klass->GetDexCache<kDefaultVerifyFlags, kWithoutReadBarrier>();
+    // Try to find the desired field in DexCache.
+    ArtField* field = dex_cache->GetResolvedField(field_idx, kRuntimePointerSize);
+    if (LIKELY(field != nullptr & obj != nullptr)) {
+      if (kIsDebugBuild) {
+        // Compare the fast path and slow path.
+        StackHandleScope<1> hs(self);
+        HandleWrapper<mirror::Object> h(hs.NewHandleWrapper(&obj));  // GC might move the object.
+        DCHECK_EQ(field, (FindFieldFromCode<kType, /* access_checks */ false>(
+            field_idx, referrer, self, sizeof(PrimType))));
+      }
+      return Getter::Get(obj, field);
+    }
+  }
+  // Slow path. Last and with identical arguments so that it becomes single instruction tail call.
+  return artGetInstanceFromMterp<PrimType, RetType, Getter, kType>(field_idx, obj, referrer, self);
+}
+
+#define ART_GET_FIELD_FROM_MTERP(Kind, PrimType, RetType, Ptr)                                    \
+extern "C" RetType artGet ## Kind ## InstanceFromMterp(uint32_t field_idx,                        \
+                                                       mirror::Object* obj,                       \
+                                                       ArtMethod* referrer,                       \
+                                                       Thread* self)                              \
+      REQUIRES_SHARED(Locks::mutator_lock_) {                                                     \
+  struct Getter { /* Specialize the field load depending on the field type */                     \
+    static RetType Get(mirror::Object* o, ArtField* f) REQUIRES_SHARED(Locks::mutator_lock_) {    \
+      return f->Get##Kind(o)Ptr;                                                                  \
+    }                                                                                             \
+  };                                                                                              \
+  return artGetInstanceFromMterpFast<PrimType, RetType, Getter>(field_idx, obj, referrer, self);  \
+}                                                                                                 \
+
+ART_GET_FIELD_FROM_MTERP(Byte, int8_t, ssize_t, )
+ART_GET_FIELD_FROM_MTERP(Boolean, uint8_t, size_t, )
+ART_GET_FIELD_FROM_MTERP(Short, int16_t, ssize_t, )
+ART_GET_FIELD_FROM_MTERP(Char, uint16_t, size_t, )
+ART_GET_FIELD_FROM_MTERP(32, uint32_t, size_t, )
+ART_GET_FIELD_FROM_MTERP(64, uint64_t, uint64_t, )
+ART_GET_FIELD_FROM_MTERP(Obj, mirror::HeapReference<mirror::Object>, mirror::Object*, .Ptr())
+
+#undef ART_GET_FIELD_FROM_MTERP
+
 extern "C" ssize_t artSet8InstanceFromMterp(uint32_t field_idx,
                                             mirror::Object* obj,
                                             uint8_t new_value,
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index 73b957f..fd5d647 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -2255,7 +2255,7 @@
     GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
     ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
     mov      r3, rSELF                     @ r3<- self
-    bl       artGet32InstanceFromCode
+    bl       artGet32InstanceFromMterp
     ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     ubfx     r2, rINST, #8, #4             @ r2<- A
     PREFETCH_INST 2
@@ -2285,7 +2285,7 @@
     GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
     ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
     mov      r3, rSELF                     @ r3<- self
-    bl       artGet64InstanceFromCode
+    bl       artGet64InstanceFromMterp
     ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     ubfx     r2, rINST, #8, #4             @ r2<- A
     PREFETCH_INST 2
@@ -2314,7 +2314,7 @@
     GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
     ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
     mov      r3, rSELF                     @ r3<- self
-    bl       artGetObjInstanceFromCode
+    bl       artGetObjInstanceFromMterp
     ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     ubfx     r2, rINST, #8, #4             @ r2<- A
     PREFETCH_INST 2
@@ -2346,7 +2346,7 @@
     GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
     ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
     mov      r3, rSELF                     @ r3<- self
-    bl       artGetBooleanInstanceFromCode
+    bl       artGetBooleanInstanceFromMterp
     ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     ubfx     r2, rINST, #8, #4             @ r2<- A
     PREFETCH_INST 2
@@ -2378,7 +2378,7 @@
     GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
     ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
     mov      r3, rSELF                     @ r3<- self
-    bl       artGetByteInstanceFromCode
+    bl       artGetByteInstanceFromMterp
     ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     ubfx     r2, rINST, #8, #4             @ r2<- A
     PREFETCH_INST 2
@@ -2410,7 +2410,7 @@
     GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
     ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
     mov      r3, rSELF                     @ r3<- self
-    bl       artGetCharInstanceFromCode
+    bl       artGetCharInstanceFromMterp
     ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     ubfx     r2, rINST, #8, #4             @ r2<- A
     PREFETCH_INST 2
@@ -2442,7 +2442,7 @@
     GET_VREG r1, r1                        @ r1<- fp[B], the object pointer
     ldr      r2, [rFP, #OFF_FP_METHOD]     @ r2<- referrer
     mov      r3, rSELF                     @ r3<- self
-    bl       artGetShortInstanceFromCode
+    bl       artGetShortInstanceFromMterp
     ldr      r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     ubfx     r2, rINST, #8, #4             @ r2<- A
     PREFETCH_INST 2
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index 2a0c4df..213f7ff 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -2192,7 +2192,7 @@
     GET_VREG w1, w1                        // w1<- fp[B], the object pointer
     ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
     mov      x3, xSELF                     // w3<- self
-    bl       artGet32InstanceFromCode
+    bl       artGet32InstanceFromMterp
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     
     ubfx     w2, wINST, #8, #4             // w2<- A
@@ -2222,7 +2222,7 @@
     GET_VREG w1, w1                        // w1<- fp[B], the object pointer
     ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
     mov      x3, xSELF                     // w3<- self
-    bl       artGet64InstanceFromCode
+    bl       artGet64InstanceFromMterp
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
@@ -2249,7 +2249,7 @@
     GET_VREG w1, w1                        // w1<- fp[B], the object pointer
     ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
     mov      x3, xSELF                     // w3<- self
-    bl       artGetObjInstanceFromCode
+    bl       artGetObjInstanceFromMterp
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     
     ubfx     w2, wINST, #8, #4             // w2<- A
@@ -2281,7 +2281,7 @@
     GET_VREG w1, w1                        // w1<- fp[B], the object pointer
     ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
     mov      x3, xSELF                     // w3<- self
-    bl       artGetBooleanInstanceFromCode
+    bl       artGetBooleanInstanceFromMterp
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     uxtb w0, w0
     ubfx     w2, wINST, #8, #4             // w2<- A
@@ -2313,7 +2313,7 @@
     GET_VREG w1, w1                        // w1<- fp[B], the object pointer
     ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
     mov      x3, xSELF                     // w3<- self
-    bl       artGetByteInstanceFromCode
+    bl       artGetByteInstanceFromMterp
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     sxtb w0, w0
     ubfx     w2, wINST, #8, #4             // w2<- A
@@ -2345,7 +2345,7 @@
     GET_VREG w1, w1                        // w1<- fp[B], the object pointer
     ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
     mov      x3, xSELF                     // w3<- self
-    bl       artGetCharInstanceFromCode
+    bl       artGetCharInstanceFromMterp
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     uxth w0, w0
     ubfx     w2, wINST, #8, #4             // w2<- A
@@ -2377,7 +2377,7 @@
     GET_VREG w1, w1                        // w1<- fp[B], the object pointer
     ldr      x2, [xFP, #OFF_FP_METHOD]     // w2<- referrer
     mov      x3, xSELF                     // w3<- self
-    bl       artGetShortInstanceFromCode
+    bl       artGetShortInstanceFromMterp
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     sxth w0, w0
     ubfx     w2, wINST, #8, #4             // w2<- A
diff --git a/runtime/interpreter/mterp/out/mterp_mips.S b/runtime/interpreter/mterp/out/mterp_mips.S
index 3b86279..c749057 100644
--- a/runtime/interpreter/mterp/out/mterp_mips.S
+++ b/runtime/interpreter/mterp/out/mterp_mips.S
@@ -2677,7 +2677,7 @@
     GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
     lw    a2, OFF_FP_METHOD(rFP)           # a2 <- referrer
     move  a3, rSELF                        # a3 <- self
-    JAL(artGet32InstanceFromCode)
+    JAL(artGet32InstanceFromMterp)
     lw   a3, THREAD_EXCEPTION_OFFSET(rSELF)
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
@@ -2706,7 +2706,7 @@
     GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
     lw    a2, OFF_FP_METHOD(rFP)           # a2 <- referrer
     move  a3, rSELF                        # a3 <- self
-    JAL(artGet64InstanceFromCode)
+    JAL(artGet64InstanceFromMterp)
     lw   a3, THREAD_EXCEPTION_OFFSET(rSELF)
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
@@ -2732,7 +2732,7 @@
     GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
     lw    a2, OFF_FP_METHOD(rFP)           # a2 <- referrer
     move  a3, rSELF                        # a3 <- self
-    JAL(artGetObjInstanceFromCode)
+    JAL(artGetObjInstanceFromMterp)
     lw   a3, THREAD_EXCEPTION_OFFSET(rSELF)
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
@@ -2763,7 +2763,7 @@
     GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
     lw    a2, OFF_FP_METHOD(rFP)           # a2 <- referrer
     move  a3, rSELF                        # a3 <- self
-    JAL(artGetBooleanInstanceFromCode)
+    JAL(artGetBooleanInstanceFromMterp)
     lw   a3, THREAD_EXCEPTION_OFFSET(rSELF)
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
@@ -2794,7 +2794,7 @@
     GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
     lw    a2, OFF_FP_METHOD(rFP)           # a2 <- referrer
     move  a3, rSELF                        # a3 <- self
-    JAL(artGetByteInstanceFromCode)
+    JAL(artGetByteInstanceFromMterp)
     lw   a3, THREAD_EXCEPTION_OFFSET(rSELF)
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
@@ -2825,7 +2825,7 @@
     GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
     lw    a2, OFF_FP_METHOD(rFP)           # a2 <- referrer
     move  a3, rSELF                        # a3 <- self
-    JAL(artGetCharInstanceFromCode)
+    JAL(artGetCharInstanceFromMterp)
     lw   a3, THREAD_EXCEPTION_OFFSET(rSELF)
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
@@ -2856,7 +2856,7 @@
     GET_VREG(a1, a1)                       # a1 <- fp[B], the object pointer
     lw    a2, OFF_FP_METHOD(rFP)           # a2 <- referrer
     move  a3, rSELF                        # a3 <- self
-    JAL(artGetShortInstanceFromCode)
+    JAL(artGetShortInstanceFromMterp)
     lw   a3, THREAD_EXCEPTION_OFFSET(rSELF)
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
diff --git a/runtime/interpreter/mterp/out/mterp_mips64.S b/runtime/interpreter/mterp/out/mterp_mips64.S
index 58f98df..f9b270b 100644
--- a/runtime/interpreter/mterp/out/mterp_mips64.S
+++ b/runtime/interpreter/mterp/out/mterp_mips64.S
@@ -2246,14 +2246,14 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
-    .extern artGet32InstanceFromCode
+    .extern artGet32InstanceFromMterp
     EXPORT_PC
     lhu      a0, 2(rPC)                 # a0 <- field ref CCCC
     srl      a1, rINST, 12              # a1 <- B
     GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
     ld       a2, OFF_FP_METHOD(rFP)     # a2 <- referrer
     move     a3, rSELF                  # a3 <- self
-    jal      artGet32InstanceFromCode
+    jal      artGet32InstanceFromMterp
     ld       a3, THREAD_EXCEPTION_OFFSET(rSELF)
     ext      a2, rINST, 8, 4            # a2 <- A
     PREFETCH_INST 2
@@ -2276,14 +2276,14 @@
      *
      * for: iget-wide
      */
-    .extern artGet64InstanceFromCode
+    .extern artGet64InstanceFromMterp
     EXPORT_PC
     lhu      a0, 2(rPC)                 # a0 <- field ref CCCC
     srl      a1, rINST, 12              # a1 <- B
     GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
     ld       a2, OFF_FP_METHOD(rFP)     # a2 <- referrer
     move     a3, rSELF                  # a3 <- self
-    jal      artGet64InstanceFromCode
+    jal      artGet64InstanceFromMterp
     ld       a3, THREAD_EXCEPTION_OFFSET(rSELF)
     ext      a2, rINST, 8, 4            # a2 <- A
     PREFETCH_INST 2
@@ -2303,14 +2303,14 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
-    .extern artGetObjInstanceFromCode
+    .extern artGetObjInstanceFromMterp
     EXPORT_PC
     lhu      a0, 2(rPC)                 # a0 <- field ref CCCC
     srl      a1, rINST, 12              # a1 <- B
     GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
     ld       a2, OFF_FP_METHOD(rFP)     # a2 <- referrer
     move     a3, rSELF                  # a3 <- self
-    jal      artGetObjInstanceFromCode
+    jal      artGetObjInstanceFromMterp
     ld       a3, THREAD_EXCEPTION_OFFSET(rSELF)
     ext      a2, rINST, 8, 4            # a2 <- A
     PREFETCH_INST 2
@@ -2335,14 +2335,14 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
-    .extern artGetBooleanInstanceFromCode
+    .extern artGetBooleanInstanceFromMterp
     EXPORT_PC
     lhu      a0, 2(rPC)                 # a0 <- field ref CCCC
     srl      a1, rINST, 12              # a1 <- B
     GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
     ld       a2, OFF_FP_METHOD(rFP)     # a2 <- referrer
     move     a3, rSELF                  # a3 <- self
-    jal      artGetBooleanInstanceFromCode
+    jal      artGetBooleanInstanceFromMterp
     ld       a3, THREAD_EXCEPTION_OFFSET(rSELF)
     ext      a2, rINST, 8, 4            # a2 <- A
     PREFETCH_INST 2
@@ -2367,14 +2367,14 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
-    .extern artGetByteInstanceFromCode
+    .extern artGetByteInstanceFromMterp
     EXPORT_PC
     lhu      a0, 2(rPC)                 # a0 <- field ref CCCC
     srl      a1, rINST, 12              # a1 <- B
     GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
     ld       a2, OFF_FP_METHOD(rFP)     # a2 <- referrer
     move     a3, rSELF                  # a3 <- self
-    jal      artGetByteInstanceFromCode
+    jal      artGetByteInstanceFromMterp
     ld       a3, THREAD_EXCEPTION_OFFSET(rSELF)
     ext      a2, rINST, 8, 4            # a2 <- A
     PREFETCH_INST 2
@@ -2399,14 +2399,14 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
-    .extern artGetCharInstanceFromCode
+    .extern artGetCharInstanceFromMterp
     EXPORT_PC
     lhu      a0, 2(rPC)                 # a0 <- field ref CCCC
     srl      a1, rINST, 12              # a1 <- B
     GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
     ld       a2, OFF_FP_METHOD(rFP)     # a2 <- referrer
     move     a3, rSELF                  # a3 <- self
-    jal      artGetCharInstanceFromCode
+    jal      artGetCharInstanceFromMterp
     ld       a3, THREAD_EXCEPTION_OFFSET(rSELF)
     ext      a2, rINST, 8, 4            # a2 <- A
     PREFETCH_INST 2
@@ -2431,14 +2431,14 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
-    .extern artGetShortInstanceFromCode
+    .extern artGetShortInstanceFromMterp
     EXPORT_PC
     lhu      a0, 2(rPC)                 # a0 <- field ref CCCC
     srl      a1, rINST, 12              # a1 <- B
     GET_VREG_U a1, a1                   # a1 <- fp[B], the object pointer
     ld       a2, OFF_FP_METHOD(rFP)     # a2 <- referrer
     move     a3, rSELF                  # a3 <- self
-    jal      artGetShortInstanceFromCode
+    jal      artGetShortInstanceFromMterp
     ld       a3, THREAD_EXCEPTION_OFFSET(rSELF)
     ext      a2, rINST, 8, 4            # a2 <- A
     PREFETCH_INST 2
diff --git a/runtime/interpreter/mterp/out/mterp_x86.S b/runtime/interpreter/mterp/out/mterp_x86.S
index 6be70cc..ad74b29 100644
--- a/runtime/interpreter/mterp/out/mterp_x86.S
+++ b/runtime/interpreter/mterp/out/mterp_x86.S
@@ -2132,7 +2132,7 @@
     movl    %eax, OUT_ARG2(%esp)            # referrer
     mov     rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
-    call    SYMBOL(artGet32InstanceFromCode)
+    call    SYMBOL(artGet32InstanceFromMterp)
     movl    rSELF, %ecx
     RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
@@ -2165,7 +2165,7 @@
     movl    %eax, OUT_ARG2(%esp)            # referrer
     mov     rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
-    call    SYMBOL(artGet64InstanceFromCode)
+    call    SYMBOL(artGet64InstanceFromMterp)
     mov     rSELF, %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
@@ -2196,7 +2196,7 @@
     movl    %eax, OUT_ARG2(%esp)            # referrer
     mov     rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
-    call    SYMBOL(artGetObjInstanceFromCode)
+    call    SYMBOL(artGetObjInstanceFromMterp)
     movl    rSELF, %ecx
     RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
@@ -2231,7 +2231,7 @@
     movl    %eax, OUT_ARG2(%esp)            # referrer
     mov     rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
-    call    SYMBOL(artGetBooleanInstanceFromCode)
+    call    SYMBOL(artGetBooleanInstanceFromMterp)
     movl    rSELF, %ecx
     RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
@@ -2266,7 +2266,7 @@
     movl    %eax, OUT_ARG2(%esp)            # referrer
     mov     rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
-    call    SYMBOL(artGetByteInstanceFromCode)
+    call    SYMBOL(artGetByteInstanceFromMterp)
     movl    rSELF, %ecx
     RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
@@ -2301,7 +2301,7 @@
     movl    %eax, OUT_ARG2(%esp)            # referrer
     mov     rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
-    call    SYMBOL(artGetCharInstanceFromCode)
+    call    SYMBOL(artGetCharInstanceFromMterp)
     movl    rSELF, %ecx
     RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
@@ -2336,7 +2336,7 @@
     movl    %eax, OUT_ARG2(%esp)            # referrer
     mov     rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
-    call    SYMBOL(artGetShortInstanceFromCode)
+    call    SYMBOL(artGetShortInstanceFromMterp)
     movl    rSELF, %ecx
     RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
diff --git a/runtime/interpreter/mterp/out/mterp_x86_64.S b/runtime/interpreter/mterp/out/mterp_x86_64.S
index 562cf7c..56d68e6 100644
--- a/runtime/interpreter/mterp/out/mterp_x86_64.S
+++ b/runtime/interpreter/mterp/out/mterp_x86_64.S
@@ -2075,7 +2075,7 @@
     GET_VREG OUT_32_ARG1, %rcx              # the object pointer
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
-    call    SYMBOL(artGet32InstanceFromCode)
+    call    SYMBOL(artGet32InstanceFromMterp)
     movq    rSELF, %rcx
     cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
@@ -2108,7 +2108,7 @@
     GET_VREG OUT_32_ARG1, %rcx              # the object pointer
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
-    call    SYMBOL(artGet64InstanceFromCode)
+    call    SYMBOL(artGet64InstanceFromMterp)
     movq    rSELF, %rcx
     cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
@@ -2142,7 +2142,7 @@
     GET_VREG OUT_32_ARG1, %rcx              # the object pointer
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
-    call    SYMBOL(artGetObjInstanceFromCode)
+    call    SYMBOL(artGetObjInstanceFromMterp)
     movq    rSELF, %rcx
     cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
@@ -2176,7 +2176,7 @@
     GET_VREG OUT_32_ARG1, %rcx              # the object pointer
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
-    call    SYMBOL(artGetBooleanInstanceFromCode)
+    call    SYMBOL(artGetBooleanInstanceFromMterp)
     movq    rSELF, %rcx
     cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
@@ -2210,7 +2210,7 @@
     GET_VREG OUT_32_ARG1, %rcx              # the object pointer
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
-    call    SYMBOL(artGetByteInstanceFromCode)
+    call    SYMBOL(artGetByteInstanceFromMterp)
     movq    rSELF, %rcx
     cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
@@ -2244,7 +2244,7 @@
     GET_VREG OUT_32_ARG1, %rcx              # the object pointer
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
-    call    SYMBOL(artGetCharInstanceFromCode)
+    call    SYMBOL(artGetCharInstanceFromMterp)
     movq    rSELF, %rcx
     cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
@@ -2278,7 +2278,7 @@
     GET_VREG OUT_32_ARG1, %rcx              # the object pointer
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
-    call    SYMBOL(artGetShortInstanceFromCode)
+    call    SYMBOL(artGetShortInstanceFromMterp)
     movq    rSELF, %rcx
     cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
diff --git a/runtime/interpreter/mterp/x86/op_iget.S b/runtime/interpreter/mterp/x86/op_iget.S
index e3304ba..219463b 100644
--- a/runtime/interpreter/mterp/x86/op_iget.S
+++ b/runtime/interpreter/mterp/x86/op_iget.S
@@ -1,4 +1,4 @@
-%default { "is_object":"0", "helper":"artGet32InstanceFromCode"}
+%default { "is_object":"0", "helper":"artGet32InstanceFromMterp"}
 /*
  * General instance field get.
  *
diff --git a/runtime/interpreter/mterp/x86/op_iget_boolean.S b/runtime/interpreter/mterp/x86/op_iget_boolean.S
index 9ddad04..4ab2afc 100644
--- a/runtime/interpreter/mterp/x86/op_iget_boolean.S
+++ b/runtime/interpreter/mterp/x86/op_iget_boolean.S
@@ -1 +1 @@
-%include "x86/op_iget.S" { "helper":"artGetBooleanInstanceFromCode" }
+%include "x86/op_iget.S" { "helper":"artGetBooleanInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_byte.S b/runtime/interpreter/mterp/x86/op_iget_byte.S
index 8250788..bb282d4 100644
--- a/runtime/interpreter/mterp/x86/op_iget_byte.S
+++ b/runtime/interpreter/mterp/x86/op_iget_byte.S
@@ -1 +1 @@
-%include "x86/op_iget.S" { "helper":"artGetByteInstanceFromCode" }
+%include "x86/op_iget.S" { "helper":"artGetByteInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_char.S b/runtime/interpreter/mterp/x86/op_iget_char.S
index e9d2156..a13203b 100644
--- a/runtime/interpreter/mterp/x86/op_iget_char.S
+++ b/runtime/interpreter/mterp/x86/op_iget_char.S
@@ -1 +1 @@
-%include "x86/op_iget.S" { "helper":"artGetCharInstanceFromCode" }
+%include "x86/op_iget.S" { "helper":"artGetCharInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_object.S b/runtime/interpreter/mterp/x86/op_iget_object.S
index 3abeefc..79d5e5f 100644
--- a/runtime/interpreter/mterp/x86/op_iget_object.S
+++ b/runtime/interpreter/mterp/x86/op_iget_object.S
@@ -1 +1 @@
-%include "x86/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromCode" }
+%include "x86/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_short.S b/runtime/interpreter/mterp/x86/op_iget_short.S
index c8fad89..8fc18a5 100644
--- a/runtime/interpreter/mterp/x86/op_iget_short.S
+++ b/runtime/interpreter/mterp/x86/op_iget_short.S
@@ -1 +1 @@
-%include "x86/op_iget.S" { "helper":"artGetShortInstanceFromCode" }
+%include "x86/op_iget.S" { "helper":"artGetShortInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86/op_iget_wide.S b/runtime/interpreter/mterp/x86/op_iget_wide.S
index a5d7e69..b111b29 100644
--- a/runtime/interpreter/mterp/x86/op_iget_wide.S
+++ b/runtime/interpreter/mterp/x86/op_iget_wide.S
@@ -14,7 +14,7 @@
     movl    %eax, OUT_ARG2(%esp)            # referrer
     mov     rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
-    call    SYMBOL(artGet64InstanceFromCode)
+    call    SYMBOL(artGet64InstanceFromMterp)
     mov     rSELF, %ecx
     cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException                  # bail out
diff --git a/runtime/interpreter/mterp/x86_64/op_iget.S b/runtime/interpreter/mterp/x86_64/op_iget.S
index df43efe..ffc14b5 100644
--- a/runtime/interpreter/mterp/x86_64/op_iget.S
+++ b/runtime/interpreter/mterp/x86_64/op_iget.S
@@ -1,4 +1,4 @@
-%default { "is_object":"0", "helper":"artGet32InstanceFromCode", "wide":"0"}
+%default { "is_object":"0", "helper":"artGet32InstanceFromMterp", "wide":"0"}
 /*
  * General instance field get.
  *
diff --git a/runtime/interpreter/mterp/x86_64/op_iget_boolean.S b/runtime/interpreter/mterp/x86_64/op_iget_boolean.S
index 6ac5523..1379d53 100644
--- a/runtime/interpreter/mterp/x86_64/op_iget_boolean.S
+++ b/runtime/interpreter/mterp/x86_64/op_iget_boolean.S
@@ -1 +1 @@
-%include "x86_64/op_iget.S" { "helper":"artGetBooleanInstanceFromCode" }
+%include "x86_64/op_iget.S" { "helper":"artGetBooleanInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iget_byte.S b/runtime/interpreter/mterp/x86_64/op_iget_byte.S
index 6a861b1..93047ec 100644
--- a/runtime/interpreter/mterp/x86_64/op_iget_byte.S
+++ b/runtime/interpreter/mterp/x86_64/op_iget_byte.S
@@ -1 +1 @@
-%include "x86_64/op_iget.S" { "helper":"artGetByteInstanceFromCode" }
+%include "x86_64/op_iget.S" { "helper":"artGetByteInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iget_char.S b/runtime/interpreter/mterp/x86_64/op_iget_char.S
index 021a0f1..239f0d0 100644
--- a/runtime/interpreter/mterp/x86_64/op_iget_char.S
+++ b/runtime/interpreter/mterp/x86_64/op_iget_char.S
@@ -1 +1 @@
-%include "x86_64/op_iget.S" { "helper":"artGetCharInstanceFromCode" }
+%include "x86_64/op_iget.S" { "helper":"artGetCharInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iget_object.S b/runtime/interpreter/mterp/x86_64/op_iget_object.S
index d92bc9c..2104d2c 100644
--- a/runtime/interpreter/mterp/x86_64/op_iget_object.S
+++ b/runtime/interpreter/mterp/x86_64/op_iget_object.S
@@ -1 +1 @@
-%include "x86_64/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromCode" }
+%include "x86_64/op_iget.S" { "is_object":"1", "helper":"artGetObjInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iget_short.S b/runtime/interpreter/mterp/x86_64/op_iget_short.S
index f158bea..3525eff 100644
--- a/runtime/interpreter/mterp/x86_64/op_iget_short.S
+++ b/runtime/interpreter/mterp/x86_64/op_iget_short.S
@@ -1 +1 @@
-%include "x86_64/op_iget.S" { "helper":"artGetShortInstanceFromCode" }
+%include "x86_64/op_iget.S" { "helper":"artGetShortInstanceFromMterp" }
diff --git a/runtime/interpreter/mterp/x86_64/op_iget_wide.S b/runtime/interpreter/mterp/x86_64/op_iget_wide.S
index 74bb9ff..706c441 100644
--- a/runtime/interpreter/mterp/x86_64/op_iget_wide.S
+++ b/runtime/interpreter/mterp/x86_64/op_iget_wide.S
@@ -1 +1 @@
-%include "x86_64/op_iget.S" { "helper":"artGet64InstanceFromCode", "wide":"1" }
+%include "x86_64/op_iget.S" { "helper":"artGet64InstanceFromMterp", "wide":"1" }
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index b92affa..a8692a0 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -608,17 +608,17 @@
 
 void JitCodeCache::FreeAllMethodHeaders(
     const std::unordered_set<OatQuickMethodHeader*>& method_headers) {
-  {
-    MutexLock mu(Thread::Current(), *Locks::cha_lock_);
-    Runtime::Current()->GetClassLinker()->GetClassHierarchyAnalysis()
-        ->RemoveDependentsWithMethodHeaders(method_headers);
-  }
-
   // We need to remove entries in method_headers from CHA dependencies
   // first since once we do FreeCode() below, the memory can be reused
   // so it's possible for the same method_header to start representing
   // different compile code.
   MutexLock mu(Thread::Current(), lock_);
+  {
+    MutexLock mu2(Thread::Current(), *Locks::cha_lock_);
+    Runtime::Current()->GetClassLinker()->GetClassHierarchyAnalysis()
+        ->RemoveDependentsWithMethodHeaders(method_headers);
+  }
+
   ScopedCodeCacheWrite scc(this);
   for (const OatQuickMethodHeader* method_header : method_headers) {
     FreeCodeAndData(method_header->GetCode());
@@ -742,6 +742,18 @@
   method->SetCounter(std::min(jit_warmup_threshold - 1, 1));
 }
 
+void JitCodeCache::WaitForPotentialCollectionToCompleteRunnable(Thread* self) {
+  while (collection_in_progress_) {
+    lock_.Unlock(self);
+    {
+      ScopedThreadSuspension sts(self, kSuspended);
+      MutexLock mu(self, lock_);
+      WaitForPotentialCollectionToComplete(self);
+    }
+    lock_.Lock(self);
+  }
+}
+
 uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,
                                           ArtMethod* method,
                                           uint8_t* stack_map,
@@ -755,6 +767,13 @@
                                           const ArenaSet<ArtMethod*>&
                                               cha_single_implementation_list) {
   DCHECK(!method->IsNative() || !osr);
+
+  if (!method->IsNative()) {
+    // We need to do this before grabbing the lock_ because it needs to be able to see the string
+    // InternTable. Native methods do not have roots.
+    DCheckRootsAreValid(roots);
+  }
+
   size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
   // Ensure the header ends up at expected instruction alignment.
   size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
@@ -763,44 +782,45 @@
   OatQuickMethodHeader* method_header = nullptr;
   uint8_t* code_ptr = nullptr;
   uint8_t* memory = nullptr;
+  MutexLock mu(self, lock_);
+  // We need to make sure that there will be no jit-gcs going on and wait for any ongoing one to
+  // finish.
+  WaitForPotentialCollectionToCompleteRunnable(self);
   {
-    ScopedThreadSuspension sts(self, kSuspended);
-    MutexLock mu(self, lock_);
-    WaitForPotentialCollectionToComplete(self);
-    {
-      ScopedCodeCacheWrite scc(this);
-      memory = AllocateCode(total_size);
-      if (memory == nullptr) {
-        return nullptr;
-      }
-      code_ptr = memory + header_size;
+    ScopedCodeCacheWrite scc(this);
+    memory = AllocateCode(total_size);
+    if (memory == nullptr) {
+      return nullptr;
+    }
+    code_ptr = memory + header_size;
 
-      std::copy(code, code + code_size, code_ptr);
-      method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
-      new (method_header) OatQuickMethodHeader(
-          (stack_map != nullptr) ? code_ptr - stack_map : 0u,
-          code_size);
-      // Flush caches before we remove write permission because some ARMv8 Qualcomm kernels may
-      // trigger a segfault if a page fault occurs when requesting a cache maintenance operation.
-      // This is a kernel bug that we need to work around until affected devices (e.g. Nexus 5X and
-      // 6P) stop being supported or their kernels are fixed.
-      //
-      // For reference, this behavior is caused by this commit:
-      // https://android.googlesource.com/kernel/msm/+/3fbe6bc28a6b9939d0650f2f17eb5216c719950c
-      FlushInstructionCache(reinterpret_cast<char*>(code_ptr),
-                            reinterpret_cast<char*>(code_ptr + code_size));
-      DCHECK(!Runtime::Current()->IsAotCompiler());
-      if (has_should_deoptimize_flag) {
-        method_header->SetHasShouldDeoptimizeFlag();
-      }
+    std::copy(code, code + code_size, code_ptr);
+    method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+    new (method_header) OatQuickMethodHeader(
+        (stack_map != nullptr) ? code_ptr - stack_map : 0u,
+        code_size);
+    // Flush caches before we remove write permission because some ARMv8 Qualcomm kernels may
+    // trigger a segfault if a page fault occurs when requesting a cache maintenance operation.
+    // This is a kernel bug that we need to work around until affected devices (e.g. Nexus 5X and
+    // 6P) stop being supported or their kernels are fixed.
+    //
+    // For reference, this behavior is caused by this commit:
+    // https://android.googlesource.com/kernel/msm/+/3fbe6bc28a6b9939d0650f2f17eb5216c719950c
+    FlushInstructionCache(reinterpret_cast<char*>(code_ptr),
+                          reinterpret_cast<char*>(code_ptr + code_size));
+    DCHECK(!Runtime::Current()->IsAotCompiler());
+    if (has_should_deoptimize_flag) {
+      method_header->SetHasShouldDeoptimizeFlag();
     }
 
     number_of_compilations_++;
   }
   // We need to update the entry point in the runnable state for the instrumentation.
   {
-    // Need cha_lock_ for checking all single-implementation flags and register
-    // dependencies.
+    // The following needs to be guarded by cha_lock_ also. Otherwise it's possible that the
+    // compiled code is considered invalidated by some class linking, but below we still make the
+    // compiled code valid for the method.  Need cha_lock_ for checking all single-implementation
+    // flags and register dependencies.
     MutexLock cha_mu(self, *Locks::cha_lock_);
     bool single_impl_still_valid = true;
     for (ArtMethod* single_impl : cha_single_implementation_list) {
@@ -826,16 +846,6 @@
           single_impl, method, method_header);
     }
 
-    if (!method->IsNative()) {
-      // We need to do this before grabbing the lock_ because it needs to be able to see the string
-      // InternTable. Native methods do not have roots.
-      DCheckRootsAreValid(roots);
-    }
-
-    // The following needs to be guarded by cha_lock_ also. Otherwise it's
-    // possible that the compiled code is considered invalidated by some class linking,
-    // but below we still make the compiled code valid for the method.
-    MutexLock mu(self, lock_);
     if (UNLIKELY(method->IsNative())) {
       auto it = jni_stubs_map_.find(JniStubKey(method));
       DCHECK(it != jni_stubs_map_.end())
@@ -867,11 +877,6 @@
             method, method_header->GetEntryPoint());
       }
     }
-    if (collection_in_progress_) {
-      // We need to update the live bitmap if there is a GC to ensure it sees this new
-      // code.
-      GetLiveBitmap()->AtomicTestAndSet(FromCodeToAllocation(code_ptr));
-    }
     VLOG(jit)
         << "JIT added (osr=" << std::boolalpha << osr << std::noboolalpha << ") "
         << ArtMethod::PrettyMethod(method) << "@" << method
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 29f9c9c..632b45b 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -314,6 +314,12 @@
       REQUIRES(lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // If a collection is in progress, wait for it to finish. Must be called with the mutator lock.
+  // The non-mutator lock version should be used if possible. This method will release then
+  // re-acquire the mutator lock.
+  void WaitForPotentialCollectionToCompleteRunnable(Thread* self)
+      REQUIRES(lock_, !Roles::uninterruptible_) REQUIRES_SHARED(Locks::mutator_lock_);
+
   // If a collection is in progress, wait for it to finish. Return
   // whether the thread actually waited.
   bool WaitForPotentialCollectionToComplete(Thread* self)
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index 13871f7..b7f0a7a 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -122,7 +122,7 @@
   return thread->HoldsLock(object);
 }
 
-static void Thread_nativeInterrupt(JNIEnv* env, jobject java_thread) {
+static void Thread_interrupt0(JNIEnv* env, jobject java_thread) {
   ScopedFastNativeObjectAccess soa(env);
   MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
   Thread* thread = Thread::FromManagedThread(soa, java_thread);
@@ -131,7 +131,7 @@
   }
 }
 
-static void Thread_nativeSetName(JNIEnv* env, jobject peer, jstring java_name) {
+static void Thread_setNativeName(JNIEnv* env, jobject peer, jstring java_name) {
   ScopedUtfChars name(env, java_name);
   {
     ScopedObjectAccess soa(env);
@@ -168,7 +168,7 @@
  * from Thread.MIN_PRIORITY to Thread.MAX_PRIORITY (1-10), with "normal"
  * threads at Thread.NORM_PRIORITY (5).
  */
-static void Thread_nativeSetPriority(JNIEnv* env, jobject java_thread, jint new_priority) {
+static void Thread_setPriority0(JNIEnv* env, jobject java_thread, jint new_priority) {
   ScopedObjectAccess soa(env);
   MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
   Thread* thread = Thread::FromManagedThread(soa, java_thread);
@@ -200,9 +200,9 @@
   NATIVE_METHOD(Thread, nativeCreate, "(Ljava/lang/Thread;JZ)V"),
   NATIVE_METHOD(Thread, nativeGetStatus, "(Z)I"),
   NATIVE_METHOD(Thread, holdsLock, "(Ljava/lang/Object;)Z"),
-  FAST_NATIVE_METHOD(Thread, nativeInterrupt, "()V"),
-  NATIVE_METHOD(Thread, nativeSetName, "(Ljava/lang/String;)V"),
-  NATIVE_METHOD(Thread, nativeSetPriority, "(I)V"),
+  FAST_NATIVE_METHOD(Thread, interrupt0, "()V"),
+  NATIVE_METHOD(Thread, setNativeName, "(Ljava/lang/String;)V"),
+  NATIVE_METHOD(Thread, setPriority0, "(I)V"),
   FAST_NATIVE_METHOD(Thread, sleep, "(Ljava/lang/Object;JI)V"),
   NATIVE_METHOD(Thread, yield, "()V"),
 };
diff --git a/test/458-checker-instruct-simplification/src/Main.java b/test/458-checker-instruct-simplification/src/Main.java
index 9e714f5..5ffb75f 100644
--- a/test/458-checker-instruct-simplification/src/Main.java
+++ b/test/458-checker-instruct-simplification/src/Main.java
@@ -1070,7 +1070,7 @@
   /// CHECK-DAG:     <<Phi1:i\d+>>     Phi [<<Const0>>,<<Const1>>]
   /// CHECK-DAG:     <<Cond:z\d+>>     Equal [<<Phi1>>,<<Const2>>]
   /// CHECK-DAG:                       If [<<Cond>>]
-  /// CHECK-DAG:     <<Phi2:i\d+>>     Phi [<<Const1>>,<<Const0>>]
+  /// CHECK-DAG:     <<Phi2:i\d+>>     Phi [<<Const0>>,<<Const1>>]
   /// CHECK-DAG:                       Return [<<Phi2>>]
 
   /// CHECK-START: boolean Main.$noinline$EqualBoolVsIntConst(boolean) dead_code_elimination$after_inlining (after)
@@ -1096,7 +1096,7 @@
   /// CHECK-DAG:     <<Phi1:i\d+>>     Phi [<<Const0>>,<<Const1>>]
   /// CHECK-DAG:     <<Cond:z\d+>>     NotEqual [<<Phi1>>,<<Const2>>]
   /// CHECK-DAG:                       If [<<Cond>>]
-  /// CHECK-DAG:     <<Phi2:i\d+>>     Phi [<<Const1>>,<<Const0>>]
+  /// CHECK-DAG:     <<Phi2:i\d+>>     Phi [<<Const0>>,<<Const1>>]
   /// CHECK-DAG:                       Return [<<Phi2>>]
 
   /// CHECK-START: boolean Main.$noinline$NotEqualBoolVsIntConst(boolean) dead_code_elimination$after_inlining (after)
diff --git a/test/618-checker-induction/src/Main.java b/test/618-checker-induction/src/Main.java
index 1460725..dd76e41 100644
--- a/test/618-checker-induction/src/Main.java
+++ b/test/618-checker-induction/src/Main.java
@@ -290,7 +290,7 @@
   /// CHECK-DAG: <<Phi2:i\d+>> Phi               loop:<<Loop1>>      outer_loop:none
   /// CHECK-DAG: <<Phi3:i\d+>> Phi               loop:<<Loop2:B\d+>> outer_loop:<<Loop1>>
   /// CHECK-DAG: <<Phi4:i\d+>> Phi               loop:<<Loop2>>      outer_loop:<<Loop1>>
-  /// CHECK-DAG:               Return [<<Phi2>>] loop:none
+  /// CHECK-DAG:               Return [<<Phi1>>] loop:none
   //
   /// CHECK-START: int Main.closedFormNested() loop_optimization (after)
   /// CHECK-NOT:               Phi
@@ -313,7 +313,7 @@
   /// CHECK-DAG: <<Phi2:i\d+>> Phi               loop:<<Loop1>>      outer_loop:none
   /// CHECK-DAG: <<Phi3:i\d+>> Phi               loop:<<Loop2:B\d+>> outer_loop:<<Loop1>>
   /// CHECK-DAG: <<Phi4:i\d+>> Phi               loop:<<Loop2>>      outer_loop:<<Loop1>>
-  /// CHECK-DAG:               Return [<<Phi2>>] loop:none
+  /// CHECK-DAG:               Return [<<Phi1>>] loop:none
   //
   /// CHECK-START: int Main.closedFormNestedAlt() loop_optimization (after)
   /// CHECK-NOT:               Phi
@@ -547,7 +547,7 @@
   /// CHECK-DAG: <<Phi2:i\d+>> Phi               loop:<<Loop1>>      outer_loop:none
   /// CHECK-DAG: <<Phi3:i\d+>> Phi               loop:<<Loop2:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Phi4:i\d+>> Phi               loop:<<Loop2>>      outer_loop:none
-  /// CHECK-DAG:               Return [<<Phi4>>] loop:none
+  /// CHECK-DAG:               Return [<<Phi3>>] loop:none
   /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   //
   /// CHECK-START: int Main.closedFeed() loop_optimization (after)
@@ -634,7 +634,7 @@
   /// CHECK-START: boolean Main.periodicBoolIdiom1() loop_optimization (before)
   /// CHECK-DAG: <<Phi1:i\d+>> Phi               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Phi2:i\d+>> Phi               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:               Return [<<Phi2>>] loop:none
+  /// CHECK-DAG:               Return [<<Phi1>>] loop:none
   //
   /// CHECK-START: boolean Main.periodicBoolIdiom1() loop_optimization (after)
   /// CHECK-NOT:               Phi
@@ -653,7 +653,7 @@
   /// CHECK-START: boolean Main.periodicBoolIdiom2() loop_optimization (before)
   /// CHECK-DAG: <<Phi1:i\d+>> Phi               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Phi2:i\d+>> Phi               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:               Return [<<Phi2>>] loop:none
+  /// CHECK-DAG:               Return [<<Phi1>>] loop:none
   //
   /// CHECK-START: boolean Main.periodicBoolIdiom2() loop_optimization (after)
   /// CHECK-NOT:               Phi
@@ -672,7 +672,7 @@
   /// CHECK-START: boolean Main.periodicBoolIdiom3() loop_optimization (before)
   /// CHECK-DAG: <<Phi1:i\d+>> Phi               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Phi2:i\d+>> Phi               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:               Return [<<Phi2>>] loop:none
+  /// CHECK-DAG:               Return [<<Phi1>>] loop:none
   //
   /// CHECK-START: boolean Main.periodicBoolIdiom3() loop_optimization (after)
   /// CHECK-NOT:               Phi
@@ -691,7 +691,7 @@
   /// CHECK-START: boolean Main.periodicBoolIdiom1N(boolean, int) loop_optimization (before)
   /// CHECK-DAG: <<Phi1:i\d+>> Phi               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Phi2:i\d+>> Phi               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:               Return [<<Phi1>>] loop:none
+  /// CHECK-DAG:               Return [<<Phi2>>] loop:none
   //
   /// CHECK-START: boolean Main.periodicBoolIdiom1N(boolean, int) loop_optimization (after)
   /// CHECK-NOT:               Phi
@@ -705,7 +705,7 @@
   /// CHECK-START: boolean Main.periodicBoolIdiom2N(boolean, int) loop_optimization (before)
   /// CHECK-DAG: <<Phi1:i\d+>> Phi               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Phi2:i\d+>> Phi               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:               Return [<<Phi1>>] loop:none
+  /// CHECK-DAG:               Return [<<Phi2>>] loop:none
   //
   /// CHECK-START: boolean Main.periodicBoolIdiom2N(boolean, int) loop_optimization (after)
   /// CHECK-NOT:               Phi
@@ -719,7 +719,7 @@
   /// CHECK-START: boolean Main.periodicBoolIdiom3N(boolean, int) loop_optimization (before)
   /// CHECK-DAG: <<Phi1:i\d+>> Phi               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Phi2:i\d+>> Phi               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:               Return [<<Phi1>>] loop:none
+  /// CHECK-DAG:               Return [<<Phi2>>] loop:none
   //
   /// CHECK-START: boolean Main.periodicBoolIdiom3N(boolean, int) loop_optimization (after)
   /// CHECK-NOT:               Phi
diff --git a/test/669-checker-break/src/Main.java b/test/669-checker-break/src/Main.java
index e59061b..c40e4a6 100644
--- a/test/669-checker-break/src/Main.java
+++ b/test/669-checker-break/src/Main.java
@@ -232,8 +232,8 @@
   /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0                  loop:none
   /// CHECK-DAG: <<One:i\d+>>  IntConstant 1                  loop:none
   /// CHECK-DAG: <<Nil:l\d+>>  NullCheck [<<Par>>]                 loop:none
-  /// CHECK-DAG: <<Phi:i\d+>>  Phi [<<Zero>>,<<AddI:i\d+>>]   loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Red:i\d+>>  Phi [<<Zero>>,<<RedI:i\d+>>]   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Red:i\d+>>  Phi [<<Zero>>,<<RedI:i\d+>>]   loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi [<<Zero>>,<<AddI:i\d+>>]   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Bnd:i\d+>>  BoundsCheck [<<Phi>>,{{i\d+}}] loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get:i\d+>>  ArrayGet [<<Nil>>,<<Bnd>>]     loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<RedI>>      Add [<<Red>>,<<Get>>]          loop:<<Loop>>      outer_loop:none
@@ -248,8 +248,8 @@
   /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0                      loop:none
   /// CHECK-DAG: <<One:i\d+>>  IntConstant 1                      loop:none
   /// CHECK-DAG: <<Nil:l\d+>>  NullCheck [<<Par>>]                loop:none
-  /// CHECK-DAG: <<Phi:i\d+>>  Phi [<<Zero>>,<<AddI:i\d+>>]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Red:i\d+>>  Phi [<<Zero>>,<<RedI:i\d+>>]       loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Red:i\d+>>  Phi [<<Zero>>,<<RedI:i\d+>>]       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi [<<Zero>>,<<AddI:i\d+>>]       loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<LE:z\d+>>   LessThanOrEqual [<<Phi>>,{{i\d+}}] loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               If [<<LE>>]                        loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Bnd:i\d+>>  BoundsCheck [<<Phi>>,{{i\d+}}]     loop:<<Loop>>      outer_loop:none
diff --git a/tools/class2greylist/src/com/android/class2greylist/AnnotationVisitor.java b/tools/class2greylist/src/com/android/class2greylist/AnnotationVisitor.java
index c5c8ef0..5914b26 100644
--- a/tools/class2greylist/src/com/android/class2greylist/AnnotationVisitor.java
+++ b/tools/class2greylist/src/com/android/class2greylist/AnnotationVisitor.java
@@ -119,7 +119,9 @@
         for (AnnotationEntry a : member.getAnnotationEntries()) {
             if (mAnnotationType.equals(a.getAnnotationType())) {
                 mStatus.debug("Member has annotation %s", mAnnotationType);
-                boolean bridge = (member.getAccessFlags() & Const.ACC_BRIDGE) != 0;
+                // For fields, the same access flag means volatile, so only check for methods.
+                boolean bridge = (member instanceof Method)
+                        && (member.getAccessFlags() & Const.ACC_BRIDGE) != 0;
                 if (bridge) {
                     mStatus.debug("Member is a bridge", mAnnotationType);
                 }
diff --git a/tools/class2greylist/test/src/com/android/javac/AnnotationVisitorTest.java b/tools/class2greylist/test/src/com/android/javac/AnnotationVisitorTest.java
index ff9c265..20c959d 100644
--- a/tools/class2greylist/test/src/com/android/javac/AnnotationVisitorTest.java
+++ b/tools/class2greylist/test/src/com/android/javac/AnnotationVisitorTest.java
@@ -361,4 +361,40 @@
         verify(mStatus, times(1)).greylistEntry(greylist.capture());
         assertThat(greylist.getValue()).isEqualTo("La/b/Class;->method(Ljava/lang/String;)V");
     }
+
+    @Test
+    public void testVolatileField() throws IOException {
+        mJavac.addSource("a.b.Class", Joiner.on('\n').join(
+                "package a.b;",
+                "import annotation.Anno;",
+                "public class Class {",
+                "  @Anno(expectedSignature=\"La/b/Class;->field:I\")",
+                "  public volatile int field;",
+                "}"));
+        assertThat(mJavac.compile()).isTrue();
+
+        new AnnotationVisitor(mJavac.getCompiledClass("a.b.Class"), ANNOTATION,
+                member -> !member.bridge, // exclude bridge methods
+                mStatus).visit();
+        assertNoErrors();
+        ArgumentCaptor<String> greylist = ArgumentCaptor.forClass(String.class);
+        verify(mStatus, times(1)).greylistEntry(greylist.capture());
+        assertThat(greylist.getValue()).isEqualTo("La/b/Class;->field:I");
+    }
+
+    @Test
+    public void testVolatileFieldWrongSignature() throws IOException {
+        mJavac.addSource("a.b.Class", Joiner.on('\n').join(
+                "package a.b;",
+                "import annotation.Anno;",
+                "public class Class {",
+                "  @Anno(expectedSignature=\"La/b/Class;->wrong:I\")",
+                "  public volatile int field;",
+                "}"));
+        assertThat(mJavac.compile()).isTrue();
+
+        new AnnotationVisitor(mJavac.getCompiledClass("a.b.Class"), ANNOTATION,
+                x -> true, mStatus).visit();
+        verify(mStatus, times(1)).error(any(String.class));
+    }
 }
diff --git a/tools/dexanalyze/dexanalyze_bytecode.cc b/tools/dexanalyze/dexanalyze_bytecode.cc
index 1c5a5d5..0bb3f91 100644
--- a/tools/dexanalyze/dexanalyze_bytecode.cc
+++ b/tools/dexanalyze/dexanalyze_bytecode.cc
@@ -164,13 +164,7 @@
                                               std::map<size_t, TypeLinkage>& types) {
   TypeLinkage& current_type = types[current_class_type.index_];
   bool skip_next = false;
-  size_t last_start = 0u;
   for (auto inst = code_item.begin(); ; ++inst) {
-    if (!count_types && last_start != buffer_.size()) {
-      // Register the instruction blob.
-      ++instruction_freq_[std::vector<uint8_t>(buffer_.begin() + last_start, buffer_.end())];
-      last_start = buffer_.size();
-    }
     if (inst == code_item.end()) {
       break;
     }
@@ -334,31 +328,31 @@
               }
             }
 
-            bool result = false;
             uint32_t type_idx = current_type.types_.Get(receiver_type.index_);
             uint32_t local_idx = types[receiver_type.index_].methods_.Get(method_idx);
-            ExtendPrefix(&type_idx, &local_idx);
-            ExtendPrefix(&dest_reg, &local_idx);
-            if (arg_count == 0) {
-              result = InstNibbles(opcode, {dest_reg, type_idx, local_idx});
-            } else if (arg_count == 1) {
-              result = InstNibbles(opcode, {dest_reg, type_idx, local_idx, args[0]});
-            } else if (arg_count == 2) {
-              result = InstNibbles(opcode, {dest_reg, type_idx, local_idx, args[0],
-                                            args[1]});
-            } else if (arg_count == 3) {
-              result = InstNibbles(opcode, {dest_reg, type_idx, local_idx, args[0],
-                                            args[1], args[2]});
-            } else if (arg_count == 4) {
-              result = InstNibbles(opcode, {dest_reg, type_idx, local_idx, args[0],
-                                            args[1], args[2], args[3]});
-            } else if (arg_count == 5) {
-              result = InstNibbles(opcode, {dest_reg, type_idx, local_idx, args[0],
-                                            args[1], args[2], args[3], args[4]});
-            }
 
-            if (result) {
+            // If true, we always put the return value in r0.
+            static constexpr bool kMoveToDestReg = true;
+
+            std::vector<uint32_t> new_args;
+            if (kMoveToDestReg && arg_count % 2 == 1) {
+              // Use the extra nibble to sneak in part of the type index.
+              new_args.push_back(local_idx >> 4);
+              local_idx ^= local_idx & 0xF0;
+            }
+            ExtendPrefix(&type_idx, &local_idx);
+            new_args.push_back(type_idx);
+            new_args.push_back(local_idx);
+            if (!kMoveToDestReg) {
+              ExtendPrefix(&dest_reg, &local_idx);
+              new_args.push_back(dest_reg);
+            }
+            new_args.insert(new_args.end(), args, args + arg_count);
+            if (InstNibbles(opcode, new_args)) {
               skip_next = next_move_result;
+              if (kMoveToDestReg && dest_reg != 0u) {
+                CHECK(InstNibbles(Instruction::MOVE, {dest_reg >> 4, dest_reg & 0xF}));
+              }
               continue;
             }
           }
@@ -466,8 +460,11 @@
 
 void NewRegisterInstructions::Add(Instruction::Code opcode, const Instruction& inst) {
   const uint8_t* start = reinterpret_cast<const uint8_t*>(&inst);
+  const size_t buffer_start = buffer_.size();
   buffer_.push_back(opcode);
   buffer_.insert(buffer_.end(), start + 1, start + 2 * inst.SizeInCodeUnits());
+  // Register the instruction blob.
+  ++instruction_freq_[std::vector<uint8_t>(buffer_.begin() + buffer_start, buffer_.end())];
 }
 
 void NewRegisterInstructions::ExtendPrefix(uint32_t* value1, uint32_t* value2) {
@@ -500,17 +497,6 @@
   *value2 &= 0XF;
 }
 
-bool NewRegisterInstructions::InstNibblesAndIndex(uint8_t opcode,
-                                             uint16_t idx,
-                                             const std::vector<uint32_t>& args) {
-  if (!InstNibbles(opcode, args)) {
-    return false;
-  }
-  buffer_.push_back(static_cast<uint8_t>(idx >> 8));
-  buffer_.push_back(static_cast<uint8_t>(idx));
-  return true;
-}
-
 bool NewRegisterInstructions::InstNibbles(uint8_t opcode, const std::vector<uint32_t>& args) {
   if (verbose_level_ >= VerboseLevel::kEverything) {
     std::cout << " ==> " << Instruction::Name(static_cast<Instruction::Code>(opcode)) << " ";
@@ -526,6 +512,7 @@
       return false;
     }
   }
+  const size_t buffer_start = buffer_.size();
   buffer_.push_back(opcode);
   for (size_t i = 0; i < args.size(); i += 2) {
     buffer_.push_back(args[i] << 4);
@@ -536,6 +523,8 @@
   while (buffer_.size() % alignment_ != 0) {
     buffer_.push_back(0);
   }
+  // Register the instruction blob.
+  ++instruction_freq_[std::vector<uint8_t>(buffer_.begin() + buffer_start, buffer_.end())];
   return true;
 }
 
diff --git a/tools/dexanalyze/dexanalyze_bytecode.h b/tools/dexanalyze/dexanalyze_bytecode.h
index ed40ba7..db009b0 100644
--- a/tools/dexanalyze/dexanalyze_bytecode.h
+++ b/tools/dexanalyze/dexanalyze_bytecode.h
@@ -64,7 +64,6 @@
                        bool count_types,
                        std::map<size_t, TypeLinkage>& types);
   void Add(Instruction::Code opcode, const Instruction& inst);
-  bool InstNibblesAndIndex(uint8_t opcode, uint16_t idx, const std::vector<uint32_t>& args);
   bool InstNibbles(uint8_t opcode, const std::vector<uint32_t>& args);
   void ExtendPrefix(uint32_t* value1, uint32_t* value2);
   bool Enabled(BytecodeExperiment experiment) const {