Merge "Disable ART run-test 648-many-direct-methods with "no-image"."
diff --git a/compiler/linker/arm/relative_patcher_arm_base.h b/compiler/linker/arm/relative_patcher_arm_base.h
index 2cb1b6c..47f840f 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.h
+++ b/compiler/linker/arm/relative_patcher_arm_base.h
@@ -43,10 +43,11 @@
   enum class ThunkType {
     kMethodCall,              // Method call thunk.
     kBakerReadBarrierField,   // Baker read barrier, load field or array element at known offset.
+    kBakerReadBarrierArray,   // Baker read barrier, array load with index in register.
     kBakerReadBarrierRoot,    // Baker read barrier, GC root load.
   };
 
-  struct BakerReadBarrierOffsetParams {
+  struct BakerReadBarrierFieldParams {
     uint32_t holder_reg;      // Holder object for reading lock word.
     uint32_t base_reg;        // Base register, different from holder for large offset.
                               // If base differs from holder, it should be a pre-defined
@@ -54,9 +55,16 @@
                               // The offset is retrieved using introspection.
   };
 
+  struct BakerReadBarrierArrayParams {
+    uint32_t base_reg;        // Reference to the start of the data.
+    uint32_t dummy;           // Dummy field.
+                              // The index register is retrieved using introspection
+                              // to limit the number of thunks we need to emit.
+  };
+
   struct BakerReadBarrierRootParams {
     uint32_t root_reg;        // The register holding the GC root.
-    uint32_t dummy;
+    uint32_t dummy;           // Dummy field.
   };
 
   struct RawThunkParams {
@@ -66,8 +74,12 @@
 
   union ThunkParams {
     RawThunkParams raw_params;
-    BakerReadBarrierOffsetParams offset_params;
+    BakerReadBarrierFieldParams field_params;
+    BakerReadBarrierArrayParams array_params;
     BakerReadBarrierRootParams root_params;
+    static_assert(sizeof(raw_params) == sizeof(field_params), "field_params size check");
+    static_assert(sizeof(raw_params) == sizeof(array_params), "array_params size check");
+    static_assert(sizeof(raw_params) == sizeof(root_params), "root_params size check");
   };
 
   class ThunkKey {
@@ -78,9 +90,14 @@
       return type_;
     }
 
-    BakerReadBarrierOffsetParams GetOffsetParams() const {
+    BakerReadBarrierFieldParams GetFieldParams() const {
       DCHECK(type_ == ThunkType::kBakerReadBarrierField);
-      return params_.offset_params;
+      return params_.field_params;
+    }
+
+    BakerReadBarrierArrayParams GetArrayParams() const {
+      DCHECK(type_ == ThunkType::kBakerReadBarrierArray);
+      return params_.array_params;
     }
 
     BakerReadBarrierRootParams GetRootParams() const {
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 551c73b..5c6fb50 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -29,6 +29,7 @@
 #include "mirror/array-inl.h"
 #include "oat.h"
 #include "oat_quick_method_header.h"
+#include "read_barrier.h"
 #include "utils/arm64/assembler_arm64.h"
 
 namespace art {
@@ -313,7 +314,17 @@
         uint32_t next_insn = GetInsn(code, literal_offset + 4u);
         // LDR (immediate) with correct base_reg.
         CheckValidReg(next_insn & 0x1fu);  // Check destination register.
-        CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (key.GetOffsetParams().base_reg << 5));
+        CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (key.GetFieldParams().base_reg << 5));
+        break;
+      }
+      case ThunkType::kBakerReadBarrierArray: {
+        DCHECK_GE(code->size() - literal_offset, 8u);
+        uint32_t next_insn = GetInsn(code, literal_offset + 4u);
+        // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL),
+        // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2].
+        CheckValidReg(next_insn & 0x1fu);  // Check destination register.
+        CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (key.GetArrayParams().base_reg << 5));
+        CheckValidReg((next_insn >> 16) & 0x1f);  // Check index register
         break;
       }
       case ThunkType::kBakerReadBarrierRoot: {
@@ -344,10 +355,16 @@
   ThunkParams params;
   switch (type) {
     case BakerReadBarrierKind::kField:
-      params.offset_params.base_reg = BakerReadBarrierFirstRegField::Decode(value);
-      CheckValidReg(params.offset_params.base_reg);
-      params.offset_params.holder_reg = BakerReadBarrierSecondRegField::Decode(value);
-      CheckValidReg(params.offset_params.holder_reg);
+      params.field_params.base_reg = BakerReadBarrierFirstRegField::Decode(value);
+      CheckValidReg(params.field_params.base_reg);
+      params.field_params.holder_reg = BakerReadBarrierSecondRegField::Decode(value);
+      CheckValidReg(params.field_params.holder_reg);
+      break;
+    case BakerReadBarrierKind::kArray:
+      params.array_params.base_reg = BakerReadBarrierFirstRegField::Decode(value);
+      CheckValidReg(params.array_params.base_reg);
+      params.array_params.dummy = 0u;
+      DCHECK_EQ(BakerReadBarrierSecondRegField::Decode(value), kInvalidEncodedReg);
       break;
     case BakerReadBarrierKind::kGcRoot:
       params.root_params.root_reg = BakerReadBarrierFirstRegField::Decode(value);
@@ -363,6 +380,9 @@
   static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kField) + kTypeTranslationOffset ==
                 static_cast<uint32_t>(ThunkType::kBakerReadBarrierField),
                 "Thunk type translation check.");
+  static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kArray) + kTypeTranslationOffset ==
+                static_cast<uint32_t>(ThunkType::kBakerReadBarrierArray),
+                "Thunk type translation check.");
   static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kGcRoot) + kTypeTranslationOffset ==
                 static_cast<uint32_t>(ThunkType::kBakerReadBarrierRoot),
                 "Thunk type translation check.");
@@ -394,7 +414,7 @@
   // Introduce a dependency on the lock_word including rb_state,
   // to prevent load-load reordering, and without using
   // a memory barrier (which would be more expensive).
-  __ Add(base_reg, base_reg, Operand(vixl::aarch64::ip0, LSR, 32));
+  __ Add(base_reg, base_reg, Operand(ip0, LSR, 32));
   __ Br(lr);          // And return back to the function.
   // Note: The fake dependency is unnecessary for the slow path.
 }
@@ -419,8 +439,8 @@
       // and return to the LDR instruction to load the reference. Otherwise, use introspection
       // to load the reference and call the entrypoint (in IP1) that performs further checks
       // on the reference and marks it if needed.
-      auto holder_reg = Register::GetXRegFromCode(key.GetOffsetParams().holder_reg);
-      auto base_reg = Register::GetXRegFromCode(key.GetOffsetParams().base_reg);
+      auto holder_reg = Register::GetXRegFromCode(key.GetFieldParams().holder_reg);
+      auto base_reg = Register::GetXRegFromCode(key.GetFieldParams().base_reg);
       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
       temps.Exclude(ip0, ip1);
       // If base_reg differs from holder_reg, the offset was too large and we must have
@@ -444,11 +464,31 @@
         // Add null check slow path. The stack map is at the address pointed to by LR.
         __ Bind(&throw_npe);
         int32_t offset = GetThreadOffset<kArm64PointerSize>(kQuickThrowNullPointer).Int32Value();
-        __ Ldr(ip0, MemOperand(vixl::aarch64::x19, offset));
+        __ Ldr(ip0, MemOperand(/* Thread* */ vixl::aarch64::x19, offset));
         __ Br(ip0);
       }
       break;
     }
+    case ThunkType::kBakerReadBarrierArray: {
+      auto base_reg = Register::GetXRegFromCode(key.GetArrayParams().base_reg);
+      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+      temps.Exclude(ip0, ip1);
+      vixl::aarch64::Label slow_path;
+      int32_t data_offset =
+          mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+      MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
+      DCHECK_LT(lock_word.GetOffset(), 0);
+      EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
+      __ Bind(&slow_path);
+      MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
+      __ Ldr(ip0.W(), ldr_address);         // Load the LDR (register) unsigned offset.
+      __ Ubfx(ip0, ip0, 16, 6);             // Extract the index register, plus 32 (bit 21 is set).
+      __ Bfi(ip1, ip0, 3, 6);               // Insert ip0 to the entrypoint address to create
+                                            // a switch case target based on the index register.
+      __ Mov(ip0, base_reg);                // Move the base register to ip0.
+      __ Br(ip1);                           // Jump to the entrypoint's array switch case.
+      break;
+    }
     case ThunkType::kBakerReadBarrierRoot: {
       // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
       // and it does not have a forwarding address), call the correct introspection entrypoint;
@@ -494,6 +534,7 @@
     case ThunkType::kMethodCall:
       return kMaxMethodCallPositiveDisplacement;
     case ThunkType::kBakerReadBarrierField:
+    case ThunkType::kBakerReadBarrierArray:
     case ThunkType::kBakerReadBarrierRoot:
       return kMaxBcondPositiveDisplacement;
   }
@@ -504,6 +545,7 @@
     case ThunkType::kMethodCall:
       return kMaxMethodCallNegativeDisplacement;
     case ThunkType::kBakerReadBarrierField:
+    case ThunkType::kBakerReadBarrierArray:
     case ThunkType::kBakerReadBarrierRoot:
       return kMaxBcondNegativeDisplacement;
   }
diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h
index 7887cea..71ab70e 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.h
+++ b/compiler/linker/arm64/relative_patcher_arm64.h
@@ -19,6 +19,7 @@
 
 #include "base/array_ref.h"
 #include "base/bit_field.h"
+#include "base/bit_utils.h"
 #include "linker/arm/relative_patcher_arm_base.h"
 
 namespace art {
@@ -28,6 +29,7 @@
  public:
   enum class BakerReadBarrierKind : uint8_t {
     kField,   // Field get or array get with constant offset (i.e. constant index).
+    kArray,   // Array get with index in register.
     kGcRoot,  // GC root load.
     kLast
   };
@@ -40,6 +42,13 @@
            BakerReadBarrierSecondRegField::Encode(holder_reg);
   }
 
+  static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
+    CheckValidReg(base_reg);
+    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
+           BakerReadBarrierFirstRegField::Encode(base_reg) |
+           BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg);
+  }
+
   static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) {
     CheckValidReg(root_reg);
     return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
@@ -68,14 +77,14 @@
                                    uint32_t patch_offset) OVERRIDE;
 
  protected:
-  static constexpr uint32_t kInvalidEncodedReg = /* sp/zr is invalid */ 31u;
-
   ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) OVERRIDE;
   std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE;
   uint32_t MaxPositiveDisplacement(ThunkType type) OVERRIDE;
   uint32_t MaxNegativeDisplacement(ThunkType type) OVERRIDE;
 
  private:
+  static constexpr uint32_t kInvalidEncodedReg = /* sp/zr is invalid */ 31u;
+
   static constexpr size_t kBitsForBakerReadBarrierKind =
       MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
   static constexpr size_t kBitsForRegister = 5u;
diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc
index b4d35ab..57ea886 100644
--- a/compiler/linker/arm64/relative_patcher_arm64_test.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc
@@ -18,6 +18,7 @@
 #include "linker/relative_patcher_test.h"
 #include "linker/arm64/relative_patcher_arm64.h"
 #include "lock_word.h"
+#include "mirror/array-inl.h"
 #include "mirror/object.h"
 #include "oat_quick_method_header.h"
 
@@ -46,9 +47,15 @@
   static constexpr uint32_t kBlPlusMax = 0x95ffffffu;
   static constexpr uint32_t kBlMinusMax = 0x96000000u;
 
-  // LDR immediate, unsigned offset.
+  // LDR immediate, 32-bit, unsigned offset.
   static constexpr uint32_t kLdrWInsn = 0xb9400000u;
 
+  // LDR register, 32-bit, LSL #2.
+  static constexpr uint32_t kLdrWLsl2Insn = 0xb8607800u;
+
+  // LDUR, 32-bit.
+  static constexpr uint32_t kLdurWInsn = 0xb8400000u;
+
   // ADD/ADDS/SUB/SUBS immediate, 64-bit.
   static constexpr uint32_t kAddXInsn = 0x91000000u;
   static constexpr uint32_t kAddsXInsn = 0xb1000000u;
@@ -68,7 +75,7 @@
   static constexpr uint32_t kLdrXSpRelInsn = 0xf94003edu;
 
   // CBNZ x17, +0. Bits 5-23 are a placeholder for target offset from PC in units of 4-bytes.
-  static constexpr uint32_t kCbnzIP1Plus0Insn = 0xb5000011;
+  static constexpr uint32_t kCbnzIP1Plus0Insn = 0xb5000011u;
 
   void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) {
     CHECK_LE(pos, code->size());
@@ -188,7 +195,7 @@
 
   std::vector<uint8_t> GenNops(size_t num_nops) {
     std::vector<uint8_t> result;
-    result.reserve(num_nops * 4u + 4u);
+    result.reserve(num_nops * 4u);
     for (size_t i = 0; i != num_nops; ++i) {
       PushBackInsn(&result, kNopInsn);
     }
@@ -228,7 +235,7 @@
     } else {
       LOG(FATAL) << "Unexpected instruction: 0x" << std::hex << use_insn;
     }
-    uint32_t adrp = 0x90000000 |              // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64)
+    uint32_t adrp = 0x90000000u |             // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64)
         ((disp & 0x3000u) << (29 - 12)) |     // immlo = ((disp & 0x3000u) >> 12) is at bit 29,
         ((disp & 0xffffc000) >> (14 - 5)) |   // immhi = (disp >> 14) is at bit 5,
         // We take the sign bit from the disp, limiting disp to +- 2GiB.
@@ -471,6 +478,14 @@
     return patcher->CompileThunk(key);
   }
 
+  std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) {
+    LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
+        0u, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg));
+    auto* patcher = down_cast<Arm64RelativePatcher*>(patcher_.get());
+    ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch);
+    return patcher->CompileThunk(key);
+  }
+
   std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) {
     LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
         0u, Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg));
@@ -488,7 +503,7 @@
            (static_cast<uint32_t>(output_[offset + 3]) << 24);
   }
 
-  void TestBakerField(uint32_t offset, uint32_t root_reg);
+  void TestBakerField(uint32_t offset, uint32_t ref_reg);
 };
 
 const uint8_t Arm64RelativePatcherTest::kCallRawCode[] = {
@@ -885,7 +900,7 @@
 
 TEST_FOR_OFFSETS(LDRX_SPREL_ADD_TEST, 0, 8)
 
-void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg) {
+void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg) {
   uint32_t valid_regs[] = {
       0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
       10, 11, 12, 13, 14, 15,         18, 19,  // IP0 and IP1 are reserved.
@@ -899,7 +914,7 @@
   uint32_t method_idx = 0u;
   for (uint32_t base_reg : valid_regs) {
     for (uint32_t holder_reg : valid_regs) {
-      uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | root_reg;
+      uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | ref_reg;
       const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr});
       ASSERT_EQ(kMethodCodeSize, raw_code.size());
       ArrayRef<const uint8_t> code(raw_code);
@@ -922,7 +937,7 @@
       ++method_idx;
       uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset);
       uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2));
-      uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | root_reg;
+      uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | ref_reg;
       const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr});
       ASSERT_EQ(kMethodCodeSize, expected_code.size());
       ASSERT_TRUE(
@@ -942,7 +957,7 @@
       if (holder_reg == base_reg) {
         // Verify that the null-check CBZ uses the correct register, i.e. holder_reg.
         ASSERT_GE(output_.size() - gray_check_offset, 4u);
-        ASSERT_EQ(0x34000000 | holder_reg, GetOutputInsn(thunk_offset) & 0xff00001f);
+        ASSERT_EQ(0x34000000u | holder_reg, GetOutputInsn(thunk_offset) & 0xff00001fu);
         gray_check_offset +=4u;
       }
       // Verify that the lock word for gray bit check is loaded from the holder address.
@@ -955,12 +970,12 @@
           /* ip0 */ 16;
       EXPECT_EQ(load_lock_word, GetOutputInsn(gray_check_offset));
       // Verify the gray bit check.
-      const uint32_t check_gray_bit_witout_offset =
-          0x37000000 | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16;
-      EXPECT_EQ(check_gray_bit_witout_offset, GetOutputInsn(gray_check_offset + 4u) & 0xfff8001f);
+      const uint32_t check_gray_bit_without_offset =
+          0x37000000u | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16;
+      EXPECT_EQ(check_gray_bit_without_offset, GetOutputInsn(gray_check_offset + 4u) & 0xfff8001fu);
       // Verify the fake dependency.
       const uint32_t fake_dependency =
-          0x8b408000 |              // ADD Xd, Xn, Xm, LSR 32
+          0x8b408000u |             // ADD Xd, Xn, Xm, LSR 32
           (/* ip0 */ 16 << 16) |    // Xm = ip0
           (base_reg << 5) |         // Xn = base_reg
           base_reg;                 // Xd = base_reg
@@ -973,19 +988,19 @@
   }
 }
 
-#define TEST_BAKER_FIELD(offset, root_reg)    \
+#define TEST_BAKER_FIELD(offset, ref_reg)     \
   TEST_F(Arm64RelativePatcherTestDefault,     \
-    BakerOffset##offset##_##root_reg) {       \
-    TestBakerField(offset, root_reg);         \
+    BakerOffset##offset##_##ref_reg) {        \
+    TestBakerField(offset, ref_reg);          \
   }
 
-TEST_BAKER_FIELD(/* offset */ 0, /* root_reg */ 0)
-TEST_BAKER_FIELD(/* offset */ 8, /* root_reg */ 15)
-TEST_BAKER_FIELD(/* offset */ 0x3ffc, /* root_reg */ 29)
+TEST_BAKER_FIELD(/* offset */ 0, /* ref_reg */ 0)
+TEST_BAKER_FIELD(/* offset */ 8, /* ref_reg */ 15)
+TEST_BAKER_FIELD(/* offset */ 0x3ffc, /* ref_reg */ 29)
 
 TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddle) {
   // One thunk in the middle with maximum distance branches to it from both sides.
-  // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`.
+  // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
   constexpr uint32_t kLiteralOffset1 = 4;
   const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn});
   ArrayRef<const uint8_t> code1(raw_code1);
@@ -1046,7 +1061,7 @@
 TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkBeforeFiller) {
   // Based on the first part of BakerOffsetThunkInTheMiddle but the CBNZ is one instruction
   // earlier, so the thunk is emitted before the filler.
-  // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`.
+  // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
   constexpr uint32_t kLiteralOffset1 = 0;
   const std::vector<uint8_t> raw_code1 = RawCode({kCbnzIP1Plus0Insn, kLdrWInsn, kNopInsn});
   ArrayRef<const uint8_t> code1(raw_code1);
@@ -1076,7 +1091,7 @@
 TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddleUnreachableFromLast) {
   // Based on the BakerOffsetThunkInTheMiddle but the CBNZ in the last method is preceded
   // by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end.
-  // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`.
+  // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
   constexpr uint32_t kLiteralOffset1 = 4;
   const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn});
   ArrayRef<const uint8_t> code1(raw_code1);
@@ -1132,7 +1147,88 @@
   ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2)));
 }
 
-TEST_F(Arm64RelativePatcherTestDefault, BakerRootGcRoot) {
+TEST_F(Arm64RelativePatcherTestDefault, BakerArray) {
+  uint32_t valid_regs[] = {
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
+      10, 11, 12, 13, 14, 15,         18, 19,  // IP0 and IP1 are reserved.
+      20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+      // LR and SP/ZR are reserved.
+  };
+  auto ldr = [](uint32_t base_reg) {
+    uint32_t index_reg = (base_reg == 0u) ? 1u : 0u;
+    uint32_t ref_reg = (base_reg == 2) ? 3u : 2u;
+    return kLdrWLsl2Insn | (index_reg << 16) | (base_reg << 5) | ref_reg;
+  };
+  constexpr size_t kMethodCodeSize = 8u;
+  constexpr size_t kLiteralOffset = 0u;
+  uint32_t method_idx = 0u;
+  for (uint32_t base_reg : valid_regs) {
+    ++method_idx;
+    const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr(base_reg)});
+    ASSERT_EQ(kMethodCodeSize, raw_code.size());
+    ArrayRef<const uint8_t> code(raw_code);
+    const LinkerPatch patches[] = {
+        LinkerPatch::BakerReadBarrierBranchPatch(
+            kLiteralOffset, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)),
+    };
+    AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+  }
+  Link();
+
+  // All thunks are at the end.
+  uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArm64Alignment);
+  method_idx = 0u;
+  for (uint32_t base_reg : valid_regs) {
+    ++method_idx;
+    uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset);
+    uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2));
+    const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr(base_reg)});
+    ASSERT_EQ(kMethodCodeSize, expected_code.size());
+    EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+    std::vector<uint8_t> expected_thunk = CompileBakerArrayThunk(base_reg);
+    ASSERT_GT(output_.size(), thunk_offset);
+    ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+    ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+                                           expected_thunk.size());
+    if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+      DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+      ASSERT_TRUE(false);
+    }
+
+    // Verify that the lock word for gray bit check is loaded from the correct address
+    // before the base_reg which points to the array data.
+    static constexpr size_t kGrayCheckInsns = 5;
+    ASSERT_GE(output_.size() - thunk_offset, 4u * kGrayCheckInsns);
+    int32_t data_offset =
+        mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+    int32_t offset = mirror::Object::MonitorOffset().Int32Value() - data_offset;
+    ASSERT_LT(offset, 0);
+    const uint32_t load_lock_word =
+        kLdurWInsn |
+        ((offset & 0x1ffu) << 12) |
+        (base_reg << 5) |
+        /* ip0 */ 16;
+    EXPECT_EQ(load_lock_word, GetOutputInsn(thunk_offset));
+    // Verify the gray bit check.
+    const uint32_t check_gray_bit_without_offset =
+        0x37000000u | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16;
+    EXPECT_EQ(check_gray_bit_without_offset, GetOutputInsn(thunk_offset + 4u) & 0xfff8001fu);
+    // Verify the fake dependency.
+    const uint32_t fake_dependency =
+        0x8b408000u |             // ADD Xd, Xn, Xm, LSR 32
+        (/* ip0 */ 16 << 16) |    // Xm = ip0
+        (base_reg << 5) |         // Xn = base_reg
+        base_reg;                 // Xd = base_reg
+    EXPECT_EQ(fake_dependency, GetOutputInsn(thunk_offset + 12u));
+    // Do not check the rest of the implementation.
+
+    // The next thunk follows on the next aligned offset.
+    thunk_offset += RoundUp(expected_thunk.size(), kArm64Alignment);
+  }
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, BakerGcRoot) {
   uint32_t valid_regs[] = {
       0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
       10, 11, 12, 13, 14, 15,         18, 19,  // IP0 and IP1 are reserved.
@@ -1180,7 +1276,7 @@
 
     // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg.
     ASSERT_GE(output_.size() - thunk_offset, 4u);
-    ASSERT_EQ(0x34000000 | root_reg, GetOutputInsn(thunk_offset) & 0xff00001f);
+    ASSERT_EQ(0x34000000u | root_reg, GetOutputInsn(thunk_offset) & 0xff00001fu);
     // Do not check the rest of the implementation.
 
     // The next thunk follows on the next aligned offset.
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 4629c54..eee832a 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -91,6 +91,7 @@
 
 // Flags controlling the use of link-time generated thunks for Baker read barriers.
 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
 
 // Some instructions have special requirements for a temporary, for example
@@ -2759,6 +2760,7 @@
     // Object ArrayGet with Baker's read barrier case.
     // Note that a potential implicit null check is handled in the
     // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
+    DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
     if (index.IsConstant()) {
       // Array load with a constant index can be treated as a field load.
       offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
@@ -2769,12 +2771,12 @@
                                                       obj.W(),
                                                       offset,
                                                       maybe_temp,
-                                                      /* needs_null_check */ true,
+                                                      /* needs_null_check */ false,
                                                       /* use_load_acquire */ false);
     } else {
       Register temp = WRegisterFrom(locations->GetTemp(0));
       codegen_->GenerateArrayLoadWithBakerReadBarrier(
-          instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true);
+          instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ false);
     }
   } else {
     // General case.
@@ -5928,9 +5930,9 @@
           !Runtime::Current()->UseJitCompilation()) {
         // Note that we do not actually check the value of `GetIsGcMarking()`
         // to decide whether to mark the loaded GC root or not.  Instead, we
-        // load into `temp` the read barrier mark introspection entrypoint.
-        // If `temp` is null, it means that `GetIsGcMarking()` is false, and
-        // vice versa.
+        // load into `temp` (actually IP1) the read barrier mark introspection
+        // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
+        // false, and vice versa.
         //
         // We use link-time generated thunks for the slow path. That thunk
         // checks the reference and jumps to the entrypoint if needed.
@@ -6054,24 +6056,24 @@
       !use_load_acquire &&
       !Runtime::Current()->UseJitCompilation()) {
     // Note that we do not actually check the value of `GetIsGcMarking()`
-    // to decide whether to mark the loaded GC root or not.  Instead, we
-    // load into `temp` the read barrier mark introspection entrypoint.
-    // If `temp` is null, it means that `GetIsGcMarking()` is false, and
-    // vice versa.
+    // to decide whether to mark the loaded reference or not.  Instead, we
+    // load into `temp` (actually IP1) the read barrier mark introspection
+    // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
+    // false, and vice versa.
     //
     // We use link-time generated thunks for the slow path. That thunk checks
     // the holder and jumps to the entrypoint if needed. If the holder is not
     // gray, it creates a fake dependency and returns to the LDR instruction.
     //
     //     temp = Thread::Current()->pReadBarrierMarkIntrospection
-    //     lr = &return_address;
+    //     lr = &gray_return_address;
     //     if (temp != nullptr) {
     //        goto field_thunk<holder_reg, base_reg>(lr)
     //     }
     //   not_gray_return_address:
     //     // Original reference load. If the offset is too large to fit
     //     // into LDR, we use an adjusted base register here.
-    //     GcRoot<mirror::Object> root = *(obj+offset);
+    //     GcRoot<mirror::Object> reference = *(obj+offset);
     //   gray_return_address:
 
     DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
@@ -6141,16 +6143,74 @@
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
+  static_assert(
+      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+  size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot);
+
+  if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+      !Runtime::Current()->UseJitCompilation()) {
+    // Note that we do not actually check the value of `GetIsGcMarking()`
+    // to decide whether to mark the loaded reference or not.  Instead, we
+    // load into `temp` (actually IP1) the read barrier mark introspection
+    // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
+    // false, and vice versa.
+    //
+    // We use link-time generated thunks for the slow path. That thunk checks
+    // the holder and jumps to the entrypoint if needed. If the holder is not
+    // gray, it creates a fake dependency and returns to the LDR instruction.
+    //
+    //     temp = Thread::Current()->pReadBarrierMarkIntrospection
+    //     lr = &gray_return_address;
+    //     if (temp != nullptr) {
+    //        goto field_thunk<holder_reg, base_reg>(lr)
+    //     }
+    //   not_gray_return_address:
+    //     // Original reference load. If the offset is too large to fit
+    //     // into LDR, we use an adjusted base register here.
+    //     GcRoot<mirror::Object> reference = data[index];
+    //   gray_return_address:
+
+    DCHECK(index.IsValid());
+    Register index_reg = RegisterFrom(index, Primitive::kPrimInt);
+    Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+
+    UseScratchRegisterScope temps(GetVIXLAssembler());
+    DCHECK(temps.IsAvailable(ip0));
+    DCHECK(temps.IsAvailable(ip1));
+    temps.Exclude(ip0, ip1);
+    uint32_t custom_data =
+        linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode());
+    vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
+
+    // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+    DCHECK_EQ(ip0.GetCode(), 16u);
+    const int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
+    __ Ldr(ip1, MemOperand(tr, entry_point_offset));
+    __ Add(temp.X(), obj.X(), Operand(data_offset));
+    EmissionCheckScope guard(GetVIXLAssembler(),
+                             (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
+    vixl::aarch64::Label return_address;
+    __ adr(lr, &return_address);
+    __ Bind(cbnz_label);
+    __ cbnz(ip1, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
+    static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+                  "Array LDR must be 1 instruction (4B) before the return address label; "
+                  " 2 instructions (8B) for heap poisoning.");
+    __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
+    DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
+    GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+    __ Bind(&return_address);
+    return;
+  }
+
   // Array cells are never volatile variables, therefore array loads
   // never use Load-Acquire instructions on ARM64.
   const bool use_load_acquire = false;
 
-  static_assert(
-      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   // /* HeapReference<Object> */ ref =
   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
-  size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot);
   GenerateReferenceLoadWithBakerReadBarrier(instruction,
                                             ref,
                                             obj,
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index 7d1f146..c39e5f4 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -247,6 +247,7 @@
       access->GetType() == Primitive::kPrimNot) {
     // For object arrays, the read barrier instrumentation requires
     // the original array pointer.
+    // TODO: This can be relaxed for Baker CC.
     return false;
   }
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 8368026..36c7df7 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -5377,10 +5377,16 @@
   }
   bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
     // TODO: We can be smarter here.
-    // Currently, the array access is always preceded by an ArrayLength or a NullCheck
-    // which generates the implicit null check. There are cases when these can be removed
-    // to produce better code. If we ever add optimizations to do so we should allow an
-    // implicit check here (as long as the address falls in the first page).
+    // Currently, unless the array is the result of NewArray, the array access is always
+    // preceded by some form of null NullCheck necessary for the bounds check, usually
+    // implicit null check on the ArrayLength input to BoundsCheck or Deoptimize for
+    // dynamic BCE. There are cases when these could be removed to produce better code.
+    // If we ever add optimizations to do so we should allow an implicit check here
+    // (as long as the address falls in the first page).
+    //
+    // As an example of such fancy optimization, we could eliminate BoundsCheck for
+    //     a = cond ? new int[1] : null;
+    //     a[0];  // The Phi does not need bounds check for either input.
     return false;
   }
 
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 4f390fd..8bdf6b1 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -34,7 +34,6 @@
 #include <time.h>
 #include <time.h>
 #include <unistd.h>
-
 #include <set>
 
 #include "android-base/stringprintf.h"
@@ -502,9 +501,16 @@
   void DumpHeapArray(mirror::Array* obj, mirror::Class* klass)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass)
+  void DumpFakeObjectArray(mirror::Object* obj, const std::set<mirror::Object*>& elements)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  void DumpHeapInstanceObject(mirror::Object* obj,
+                              mirror::Class* klass,
+                              const std::set<mirror::Object*>& fake_roots)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  bool AddRuntimeInternalObjectsField(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_);
+
   void ProcessHeap(bool header_first)
       REQUIRES(Locks::mutator_lock_) {
     // Reset current heap and object count.
@@ -1062,37 +1068,17 @@
   ++objects_in_segment_;
 }
 
-// Use for visiting the GcRoots held live by ArtFields, ArtMethods, and ClassLoaders.
-class GcRootVisitor {
- public:
-  explicit GcRootVisitor(Hprof* hprof) : hprof_(hprof) {}
-
-  void operator()(mirror::Object* obj ATTRIBUTE_UNUSED,
-                  MemberOffset offset ATTRIBUTE_UNUSED,
-                  bool is_static ATTRIBUTE_UNUSED) const {}
-
-  // Note that these don't have read barriers. Its OK however since the GC is guaranteed to not be
-  // running during the hprof dumping process.
-  void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    if (!root->IsNull()) {
-      VisitRoot(root);
-    }
+bool Hprof::AddRuntimeInternalObjectsField(mirror::Class* klass) {
+  if (klass->IsDexCacheClass()) {
+    return true;
   }
-
-  void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    mirror::Object* obj = root->AsMirrorPtr();
-    // The two cases are either classes or dex cache arrays. If it is a dex cache array, then use
-    // VM internal. Otherwise the object is a declaring class of an ArtField or ArtMethod or a
-    // class from a ClassLoader.
-    hprof_->VisitRoot(obj, RootInfo(obj->IsClass() ? kRootStickyClass : kRootVMInternal));
+  // IsClassLoaderClass is true for subclasses of classloader but we only want to add the fake
+  // field to the java.lang.ClassLoader class.
+  if (klass->IsClassLoaderClass() && klass->GetSuperClass()->IsObjectClass()) {
+    return true;
   }
-
-
- private:
-  Hprof* const hprof_;
-};
+  return false;
+}
 
 void Hprof::DumpHeapObject(mirror::Object* obj) {
   // Ignore classes that are retired.
@@ -1103,8 +1089,41 @@
 
   ++total_objects_;
 
-  GcRootVisitor visitor(this);
-  obj->VisitReferences(visitor, VoidFunctor());
+  class RootCollector {
+   public:
+    explicit RootCollector() {}
+
+    void operator()(mirror::Object*, MemberOffset, bool) const {}
+
+    // Note that these don't have read barriers. Its OK however since the GC is guaranteed to not be
+    // running during the hprof dumping process.
+    void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
+        REQUIRES_SHARED(Locks::mutator_lock_) {
+      if (!root->IsNull()) {
+        VisitRoot(root);
+      }
+    }
+
+    void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
+        REQUIRES_SHARED(Locks::mutator_lock_) {
+      roots_.insert(root->AsMirrorPtr());
+    }
+
+    const std::set<mirror::Object*>& GetRoots() const {
+      return roots_;
+    }
+
+   private:
+    // These roots are actually live from the object. Avoid marking them as roots in hprof to make
+    // it easier to debug class unloading.
+    mutable std::set<mirror::Object*> roots_;
+  };
+
+  RootCollector visitor;
+  // Collect all native roots.
+  if (!obj->IsClass()) {
+    obj->VisitReferences(visitor, VoidFunctor());
+  }
 
   gc::Heap* const heap = Runtime::Current()->GetHeap();
   const gc::space::ContinuousSpace* const space = heap->FindContinuousSpaceFromObject(obj, true);
@@ -1112,15 +1131,18 @@
   if (space != nullptr) {
     if (space->IsZygoteSpace()) {
       heap_type = HPROF_HEAP_ZYGOTE;
+      VisitRoot(obj, RootInfo(kRootVMInternal));
     } else if (space->IsImageSpace() && heap->ObjectIsInBootImageSpace(obj)) {
       // Only count objects in the boot image as HPROF_HEAP_IMAGE, this leaves app image objects as
       // HPROF_HEAP_APP. b/35762934
       heap_type = HPROF_HEAP_IMAGE;
+      VisitRoot(obj, RootInfo(kRootVMInternal));
     }
   } else {
     const auto* los = heap->GetLargeObjectsSpace();
     if (los->Contains(obj) && los->IsZygoteLargeObject(Thread::Current(), obj)) {
       heap_type = HPROF_HEAP_ZYGOTE;
+      VisitRoot(obj, RootInfo(kRootVMInternal));
     }
   }
   CheckHeapSegmentConstraints();
@@ -1164,7 +1186,7 @@
     } else if (c->IsArrayClass()) {
       DumpHeapArray(obj->AsArray(), c);
     } else {
-      DumpHeapInstanceObject(obj, c);
+      DumpHeapInstanceObject(obj, c, visitor.GetRoots());
     }
   }
 
@@ -1269,7 +1291,10 @@
 
   // Instance fields for this class (no superclass fields)
   int iFieldCount = klass->NumInstanceFields();
-  if (klass->IsStringClass()) {
+  // add_internal_runtime_objects is only for classes that may retain objects live through means
+  // other than fields. It is never the case for strings.
+  const bool add_internal_runtime_objects = AddRuntimeInternalObjectsField(klass);
+  if (klass->IsStringClass() || add_internal_runtime_objects) {
     __ AddU2((uint16_t)iFieldCount + 1);
   } else {
     __ AddU2((uint16_t)iFieldCount);
@@ -1284,6 +1309,21 @@
   if (klass->IsStringClass()) {
     __ AddStringId(LookupStringId("value"));
     __ AddU1(hprof_basic_object);
+  } else if (add_internal_runtime_objects) {
+    __ AddStringId(LookupStringId("runtimeInternalObjects"));
+    __ AddU1(hprof_basic_object);
+  }
+}
+
+void Hprof::DumpFakeObjectArray(mirror::Object* obj, const std::set<mirror::Object*>& elements) {
+  __ AddU1(HPROF_OBJECT_ARRAY_DUMP);
+  __ AddObjectId(obj);
+  __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(obj));
+  __ AddU4(elements.size());
+  __ AddClassId(LookupClassId(
+      Runtime::Current()->GetClassLinker()->GetClassRoot(ClassLinker::kObjectArrayClass)));
+  for (mirror::Object* e : elements) {
+    __ AddObjectId(e);
   }
 }
 
@@ -1327,7 +1367,9 @@
   }
 }
 
-void Hprof::DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass) {
+void Hprof::DumpHeapInstanceObject(mirror::Object* obj,
+                                   mirror::Class* klass,
+                                   const std::set<mirror::Object*>& fake_roots) {
   // obj is an instance object.
   __ AddU1(HPROF_INSTANCE_DUMP);
   __ AddObjectId(obj);
@@ -1341,6 +1383,7 @@
 
   // What we will use for the string value if the object is a string.
   mirror::Object* string_value = nullptr;
+  mirror::Object* fake_object_array = nullptr;
 
   // Write the instance data;  fields for this class, followed by super class fields, and so on.
   do {
@@ -1396,8 +1439,12 @@
         }
       }
       __ AddObjectId(string_value);
+    } else if (AddRuntimeInternalObjectsField(klass)) {
+      // We need an id that is guaranteed to not be used, use 1/2 of the object alignment.
+      fake_object_array = reinterpret_cast<mirror::Object*>(
+          reinterpret_cast<uintptr_t>(obj) + kObjectAlignment / 2);
+      __ AddObjectId(fake_object_array);
     }
-
     klass = klass->GetSuperClass();
   } while (klass != nullptr);
 
@@ -1419,6 +1466,8 @@
       __ AddU1(hprof_basic_char);
       __ AddU2List(s->GetValue(), s->GetLength());
     }
+  } else if (fake_object_array != nullptr) {
+    DumpFakeObjectArray(fake_object_array, fake_roots);
   }
 }
 
diff --git a/test/911-get-stack-trace/src/art/PrintThread.java b/test/911-get-stack-trace/src/art/PrintThread.java
index f50a66b..fee5ba0 100644
--- a/test/911-get-stack-trace/src/art/PrintThread.java
+++ b/test/911-get-stack-trace/src/art/PrintThread.java
@@ -41,7 +41,8 @@
   // We have to ignore some threads when printing all stack traces. These are threads that may or
   // may not exist depending on the environment.
   public final static String IGNORE_THREAD_NAME_REGEX =
-      "Binder:|RenderThread|hwuiTask|Jit thread pool worker|Instr:|JDWP|Profile Saver|main";
+      "Binder:|RenderThread|hwuiTask|Jit thread pool worker|Instr:|JDWP|Profile Saver|main|" +
+      "queued-work-looper";
   public final static Matcher IGNORE_THREADS =
       Pattern.compile(IGNORE_THREAD_NAME_REGEX).matcher("");
 
@@ -88,4 +89,4 @@
   }
 
   public static native String[][] getStackTrace(Thread thread, int start, int max);
-}
\ No newline at end of file
+}