Merge "Disable ART run-test 648-many-direct-methods with "no-image"."
diff --git a/compiler/linker/arm/relative_patcher_arm_base.h b/compiler/linker/arm/relative_patcher_arm_base.h
index 2cb1b6c..47f840f 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.h
+++ b/compiler/linker/arm/relative_patcher_arm_base.h
@@ -43,10 +43,11 @@
enum class ThunkType {
kMethodCall, // Method call thunk.
kBakerReadBarrierField, // Baker read barrier, load field or array element at known offset.
+ kBakerReadBarrierArray, // Baker read barrier, array load with index in register.
kBakerReadBarrierRoot, // Baker read barrier, GC root load.
};
- struct BakerReadBarrierOffsetParams {
+ struct BakerReadBarrierFieldParams {
uint32_t holder_reg; // Holder object for reading lock word.
uint32_t base_reg; // Base register, different from holder for large offset.
// If base differs from holder, it should be a pre-defined
@@ -54,9 +55,16 @@
// The offset is retrieved using introspection.
};
+ struct BakerReadBarrierArrayParams {
+ uint32_t base_reg; // Reference to the start of the data.
+ uint32_t dummy; // Dummy field.
+ // The index register is retrieved using introspection
+ // to limit the number of thunks we need to emit.
+ };
+
struct BakerReadBarrierRootParams {
uint32_t root_reg; // The register holding the GC root.
- uint32_t dummy;
+ uint32_t dummy; // Dummy field.
};
struct RawThunkParams {
@@ -66,8 +74,12 @@
union ThunkParams {
RawThunkParams raw_params;
- BakerReadBarrierOffsetParams offset_params;
+ BakerReadBarrierFieldParams field_params;
+ BakerReadBarrierArrayParams array_params;
BakerReadBarrierRootParams root_params;
+ static_assert(sizeof(raw_params) == sizeof(field_params), "field_params size check");
+ static_assert(sizeof(raw_params) == sizeof(array_params), "array_params size check");
+ static_assert(sizeof(raw_params) == sizeof(root_params), "root_params size check");
};
class ThunkKey {
@@ -78,9 +90,14 @@
return type_;
}
- BakerReadBarrierOffsetParams GetOffsetParams() const {
+ BakerReadBarrierFieldParams GetFieldParams() const {
DCHECK(type_ == ThunkType::kBakerReadBarrierField);
- return params_.offset_params;
+ return params_.field_params;
+ }
+
+ BakerReadBarrierArrayParams GetArrayParams() const {
+ DCHECK(type_ == ThunkType::kBakerReadBarrierArray);
+ return params_.array_params;
}
BakerReadBarrierRootParams GetRootParams() const {
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 551c73b..5c6fb50 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -29,6 +29,7 @@
#include "mirror/array-inl.h"
#include "oat.h"
#include "oat_quick_method_header.h"
+#include "read_barrier.h"
#include "utils/arm64/assembler_arm64.h"
namespace art {
@@ -313,7 +314,17 @@
uint32_t next_insn = GetInsn(code, literal_offset + 4u);
// LDR (immediate) with correct base_reg.
CheckValidReg(next_insn & 0x1fu); // Check destination register.
- CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (key.GetOffsetParams().base_reg << 5));
+ CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (key.GetFieldParams().base_reg << 5));
+ break;
+ }
+ case ThunkType::kBakerReadBarrierArray: {
+ DCHECK_GE(code->size() - literal_offset, 8u);
+ uint32_t next_insn = GetInsn(code, literal_offset + 4u);
+ // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL),
+ // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2].
+ CheckValidReg(next_insn & 0x1fu); // Check destination register.
+ CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (key.GetArrayParams().base_reg << 5));
+ CheckValidReg((next_insn >> 16) & 0x1f); // Check index register
break;
}
case ThunkType::kBakerReadBarrierRoot: {
@@ -344,10 +355,16 @@
ThunkParams params;
switch (type) {
case BakerReadBarrierKind::kField:
- params.offset_params.base_reg = BakerReadBarrierFirstRegField::Decode(value);
- CheckValidReg(params.offset_params.base_reg);
- params.offset_params.holder_reg = BakerReadBarrierSecondRegField::Decode(value);
- CheckValidReg(params.offset_params.holder_reg);
+ params.field_params.base_reg = BakerReadBarrierFirstRegField::Decode(value);
+ CheckValidReg(params.field_params.base_reg);
+ params.field_params.holder_reg = BakerReadBarrierSecondRegField::Decode(value);
+ CheckValidReg(params.field_params.holder_reg);
+ break;
+ case BakerReadBarrierKind::kArray:
+ params.array_params.base_reg = BakerReadBarrierFirstRegField::Decode(value);
+ CheckValidReg(params.array_params.base_reg);
+ params.array_params.dummy = 0u;
+ DCHECK_EQ(BakerReadBarrierSecondRegField::Decode(value), kInvalidEncodedReg);
break;
case BakerReadBarrierKind::kGcRoot:
params.root_params.root_reg = BakerReadBarrierFirstRegField::Decode(value);
@@ -363,6 +380,9 @@
static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kField) + kTypeTranslationOffset ==
static_cast<uint32_t>(ThunkType::kBakerReadBarrierField),
"Thunk type translation check.");
+ static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kArray) + kTypeTranslationOffset ==
+ static_cast<uint32_t>(ThunkType::kBakerReadBarrierArray),
+ "Thunk type translation check.");
static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kGcRoot) + kTypeTranslationOffset ==
static_cast<uint32_t>(ThunkType::kBakerReadBarrierRoot),
"Thunk type translation check.");
@@ -394,7 +414,7 @@
// Introduce a dependency on the lock_word including rb_state,
// to prevent load-load reordering, and without using
// a memory barrier (which would be more expensive).
- __ Add(base_reg, base_reg, Operand(vixl::aarch64::ip0, LSR, 32));
+ __ Add(base_reg, base_reg, Operand(ip0, LSR, 32));
__ Br(lr); // And return back to the function.
// Note: The fake dependency is unnecessary for the slow path.
}
@@ -419,8 +439,8 @@
// and return to the LDR instruction to load the reference. Otherwise, use introspection
// to load the reference and call the entrypoint (in IP1) that performs further checks
// on the reference and marks it if needed.
- auto holder_reg = Register::GetXRegFromCode(key.GetOffsetParams().holder_reg);
- auto base_reg = Register::GetXRegFromCode(key.GetOffsetParams().base_reg);
+ auto holder_reg = Register::GetXRegFromCode(key.GetFieldParams().holder_reg);
+ auto base_reg = Register::GetXRegFromCode(key.GetFieldParams().base_reg);
UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
temps.Exclude(ip0, ip1);
// If base_reg differs from holder_reg, the offset was too large and we must have
@@ -444,11 +464,31 @@
// Add null check slow path. The stack map is at the address pointed to by LR.
__ Bind(&throw_npe);
int32_t offset = GetThreadOffset<kArm64PointerSize>(kQuickThrowNullPointer).Int32Value();
- __ Ldr(ip0, MemOperand(vixl::aarch64::x19, offset));
+ __ Ldr(ip0, MemOperand(/* Thread* */ vixl::aarch64::x19, offset));
__ Br(ip0);
}
break;
}
+ case ThunkType::kBakerReadBarrierArray: {
+ auto base_reg = Register::GetXRegFromCode(key.GetArrayParams().base_reg);
+ UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+ temps.Exclude(ip0, ip1);
+ vixl::aarch64::Label slow_path;
+ int32_t data_offset =
+ mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+ MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
+ DCHECK_LT(lock_word.GetOffset(), 0);
+ EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
+ __ Bind(&slow_path);
+ MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
+ __ Ldr(ip0.W(), ldr_address); // Load the LDR (register) unsigned offset.
+ __ Ubfx(ip0, ip0, 16, 6); // Extract the index register, plus 32 (bit 21 is set).
+ __ Bfi(ip1, ip0, 3, 6); // Insert ip0 to the entrypoint address to create
+ // a switch case target based on the index register.
+ __ Mov(ip0, base_reg); // Move the base register to ip0.
+ __ Br(ip1); // Jump to the entrypoint's array switch case.
+ break;
+ }
case ThunkType::kBakerReadBarrierRoot: {
// Check if the reference needs to be marked and if so (i.e. not null, not marked yet
// and it does not have a forwarding address), call the correct introspection entrypoint;
@@ -494,6 +534,7 @@
case ThunkType::kMethodCall:
return kMaxMethodCallPositiveDisplacement;
case ThunkType::kBakerReadBarrierField:
+ case ThunkType::kBakerReadBarrierArray:
case ThunkType::kBakerReadBarrierRoot:
return kMaxBcondPositiveDisplacement;
}
@@ -504,6 +545,7 @@
case ThunkType::kMethodCall:
return kMaxMethodCallNegativeDisplacement;
case ThunkType::kBakerReadBarrierField:
+ case ThunkType::kBakerReadBarrierArray:
case ThunkType::kBakerReadBarrierRoot:
return kMaxBcondNegativeDisplacement;
}
diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h
index 7887cea..71ab70e 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.h
+++ b/compiler/linker/arm64/relative_patcher_arm64.h
@@ -19,6 +19,7 @@
#include "base/array_ref.h"
#include "base/bit_field.h"
+#include "base/bit_utils.h"
#include "linker/arm/relative_patcher_arm_base.h"
namespace art {
@@ -28,6 +29,7 @@
public:
enum class BakerReadBarrierKind : uint8_t {
kField, // Field get or array get with constant offset (i.e. constant index).
+ kArray, // Array get with index in register.
kGcRoot, // GC root load.
kLast
};
@@ -40,6 +42,13 @@
BakerReadBarrierSecondRegField::Encode(holder_reg);
}
+ static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
+ CheckValidReg(base_reg);
+ return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
+ BakerReadBarrierFirstRegField::Encode(base_reg) |
+ BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg);
+ }
+
static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) {
CheckValidReg(root_reg);
return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
@@ -68,14 +77,14 @@
uint32_t patch_offset) OVERRIDE;
protected:
- static constexpr uint32_t kInvalidEncodedReg = /* sp/zr is invalid */ 31u;
-
ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) OVERRIDE;
std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE;
uint32_t MaxPositiveDisplacement(ThunkType type) OVERRIDE;
uint32_t MaxNegativeDisplacement(ThunkType type) OVERRIDE;
private:
+ static constexpr uint32_t kInvalidEncodedReg = /* sp/zr is invalid */ 31u;
+
static constexpr size_t kBitsForBakerReadBarrierKind =
MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
static constexpr size_t kBitsForRegister = 5u;
diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc
index b4d35ab..57ea886 100644
--- a/compiler/linker/arm64/relative_patcher_arm64_test.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc
@@ -18,6 +18,7 @@
#include "linker/relative_patcher_test.h"
#include "linker/arm64/relative_patcher_arm64.h"
#include "lock_word.h"
+#include "mirror/array-inl.h"
#include "mirror/object.h"
#include "oat_quick_method_header.h"
@@ -46,9 +47,15 @@
static constexpr uint32_t kBlPlusMax = 0x95ffffffu;
static constexpr uint32_t kBlMinusMax = 0x96000000u;
- // LDR immediate, unsigned offset.
+ // LDR immediate, 32-bit, unsigned offset.
static constexpr uint32_t kLdrWInsn = 0xb9400000u;
+ // LDR register, 32-bit, LSL #2.
+ static constexpr uint32_t kLdrWLsl2Insn = 0xb8607800u;
+
+ // LDUR, 32-bit.
+ static constexpr uint32_t kLdurWInsn = 0xb8400000u;
+
// ADD/ADDS/SUB/SUBS immediate, 64-bit.
static constexpr uint32_t kAddXInsn = 0x91000000u;
static constexpr uint32_t kAddsXInsn = 0xb1000000u;
@@ -68,7 +75,7 @@
static constexpr uint32_t kLdrXSpRelInsn = 0xf94003edu;
// CBNZ x17, +0. Bits 5-23 are a placeholder for target offset from PC in units of 4-bytes.
- static constexpr uint32_t kCbnzIP1Plus0Insn = 0xb5000011;
+ static constexpr uint32_t kCbnzIP1Plus0Insn = 0xb5000011u;
void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) {
CHECK_LE(pos, code->size());
@@ -188,7 +195,7 @@
std::vector<uint8_t> GenNops(size_t num_nops) {
std::vector<uint8_t> result;
- result.reserve(num_nops * 4u + 4u);
+ result.reserve(num_nops * 4u);
for (size_t i = 0; i != num_nops; ++i) {
PushBackInsn(&result, kNopInsn);
}
@@ -228,7 +235,7 @@
} else {
LOG(FATAL) << "Unexpected instruction: 0x" << std::hex << use_insn;
}
- uint32_t adrp = 0x90000000 | // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64)
+ uint32_t adrp = 0x90000000u | // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64)
((disp & 0x3000u) << (29 - 12)) | // immlo = ((disp & 0x3000u) >> 12) is at bit 29,
((disp & 0xffffc000) >> (14 - 5)) | // immhi = (disp >> 14) is at bit 5,
// We take the sign bit from the disp, limiting disp to +- 2GiB.
@@ -471,6 +478,14 @@
return patcher->CompileThunk(key);
}
+ std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) {
+ LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
+ 0u, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg));
+ auto* patcher = down_cast<Arm64RelativePatcher*>(patcher_.get());
+ ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch);
+ return patcher->CompileThunk(key);
+ }
+
std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) {
LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
0u, Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg));
@@ -488,7 +503,7 @@
(static_cast<uint32_t>(output_[offset + 3]) << 24);
}
- void TestBakerField(uint32_t offset, uint32_t root_reg);
+ void TestBakerField(uint32_t offset, uint32_t ref_reg);
};
const uint8_t Arm64RelativePatcherTest::kCallRawCode[] = {
@@ -885,7 +900,7 @@
TEST_FOR_OFFSETS(LDRX_SPREL_ADD_TEST, 0, 8)
-void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg) {
+void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg) {
uint32_t valid_regs[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved.
@@ -899,7 +914,7 @@
uint32_t method_idx = 0u;
for (uint32_t base_reg : valid_regs) {
for (uint32_t holder_reg : valid_regs) {
- uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | root_reg;
+ uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | ref_reg;
const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr});
ASSERT_EQ(kMethodCodeSize, raw_code.size());
ArrayRef<const uint8_t> code(raw_code);
@@ -922,7 +937,7 @@
++method_idx;
uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset);
uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2));
- uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | root_reg;
+ uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | ref_reg;
const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr});
ASSERT_EQ(kMethodCodeSize, expected_code.size());
ASSERT_TRUE(
@@ -942,7 +957,7 @@
if (holder_reg == base_reg) {
// Verify that the null-check CBZ uses the correct register, i.e. holder_reg.
ASSERT_GE(output_.size() - gray_check_offset, 4u);
- ASSERT_EQ(0x34000000 | holder_reg, GetOutputInsn(thunk_offset) & 0xff00001f);
+ ASSERT_EQ(0x34000000u | holder_reg, GetOutputInsn(thunk_offset) & 0xff00001fu);
gray_check_offset +=4u;
}
// Verify that the lock word for gray bit check is loaded from the holder address.
@@ -955,12 +970,12 @@
/* ip0 */ 16;
EXPECT_EQ(load_lock_word, GetOutputInsn(gray_check_offset));
// Verify the gray bit check.
- const uint32_t check_gray_bit_witout_offset =
- 0x37000000 | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16;
- EXPECT_EQ(check_gray_bit_witout_offset, GetOutputInsn(gray_check_offset + 4u) & 0xfff8001f);
+ const uint32_t check_gray_bit_without_offset =
+ 0x37000000u | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16;
+ EXPECT_EQ(check_gray_bit_without_offset, GetOutputInsn(gray_check_offset + 4u) & 0xfff8001fu);
// Verify the fake dependency.
const uint32_t fake_dependency =
- 0x8b408000 | // ADD Xd, Xn, Xm, LSR 32
+ 0x8b408000u | // ADD Xd, Xn, Xm, LSR 32
(/* ip0 */ 16 << 16) | // Xm = ip0
(base_reg << 5) | // Xn = base_reg
base_reg; // Xd = base_reg
@@ -973,19 +988,19 @@
}
}
-#define TEST_BAKER_FIELD(offset, root_reg) \
+#define TEST_BAKER_FIELD(offset, ref_reg) \
TEST_F(Arm64RelativePatcherTestDefault, \
- BakerOffset##offset##_##root_reg) { \
- TestBakerField(offset, root_reg); \
+ BakerOffset##offset##_##ref_reg) { \
+ TestBakerField(offset, ref_reg); \
}
-TEST_BAKER_FIELD(/* offset */ 0, /* root_reg */ 0)
-TEST_BAKER_FIELD(/* offset */ 8, /* root_reg */ 15)
-TEST_BAKER_FIELD(/* offset */ 0x3ffc, /* root_reg */ 29)
+TEST_BAKER_FIELD(/* offset */ 0, /* ref_reg */ 0)
+TEST_BAKER_FIELD(/* offset */ 8, /* ref_reg */ 15)
+TEST_BAKER_FIELD(/* offset */ 0x3ffc, /* ref_reg */ 29)
TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddle) {
// One thunk in the middle with maximum distance branches to it from both sides.
- // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`.
+ // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
constexpr uint32_t kLiteralOffset1 = 4;
const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn});
ArrayRef<const uint8_t> code1(raw_code1);
@@ -1046,7 +1061,7 @@
TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkBeforeFiller) {
// Based on the first part of BakerOffsetThunkInTheMiddle but the CBNZ is one instruction
// earlier, so the thunk is emitted before the filler.
- // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`.
+ // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
constexpr uint32_t kLiteralOffset1 = 0;
const std::vector<uint8_t> raw_code1 = RawCode({kCbnzIP1Plus0Insn, kLdrWInsn, kNopInsn});
ArrayRef<const uint8_t> code1(raw_code1);
@@ -1076,7 +1091,7 @@
TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddleUnreachableFromLast) {
// Based on the BakerOffsetThunkInTheMiddle but the CBNZ in the last method is preceded
// by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end.
- // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`.
+ // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
constexpr uint32_t kLiteralOffset1 = 4;
const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn});
ArrayRef<const uint8_t> code1(raw_code1);
@@ -1132,7 +1147,88 @@
ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2)));
}
-TEST_F(Arm64RelativePatcherTestDefault, BakerRootGcRoot) {
+TEST_F(Arm64RelativePatcherTestDefault, BakerArray) {
+ uint32_t valid_regs[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved.
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ // LR and SP/ZR are reserved.
+ };
+ auto ldr = [](uint32_t base_reg) {
+ uint32_t index_reg = (base_reg == 0u) ? 1u : 0u;
+ uint32_t ref_reg = (base_reg == 2) ? 3u : 2u;
+ return kLdrWLsl2Insn | (index_reg << 16) | (base_reg << 5) | ref_reg;
+ };
+ constexpr size_t kMethodCodeSize = 8u;
+ constexpr size_t kLiteralOffset = 0u;
+ uint32_t method_idx = 0u;
+ for (uint32_t base_reg : valid_regs) {
+ ++method_idx;
+ const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr(base_reg)});
+ ASSERT_EQ(kMethodCodeSize, raw_code.size());
+ ArrayRef<const uint8_t> code(raw_code);
+ const LinkerPatch patches[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(
+ kLiteralOffset, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)),
+ };
+ AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+ }
+ Link();
+
+ // All thunks are at the end.
+ uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArm64Alignment);
+ method_idx = 0u;
+ for (uint32_t base_reg : valid_regs) {
+ ++method_idx;
+ uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset);
+ uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2));
+ const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr(base_reg)});
+ ASSERT_EQ(kMethodCodeSize, expected_code.size());
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+ std::vector<uint8_t> expected_thunk = CompileBakerArrayThunk(base_reg);
+ ASSERT_GT(output_.size(), thunk_offset);
+ ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+ ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+ expected_thunk.size());
+ if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+ DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+ ASSERT_TRUE(false);
+ }
+
+ // Verify that the lock word for gray bit check is loaded from the correct address
+ // before the base_reg which points to the array data.
+ static constexpr size_t kGrayCheckInsns = 5;
+ ASSERT_GE(output_.size() - thunk_offset, 4u * kGrayCheckInsns);
+ int32_t data_offset =
+ mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+ int32_t offset = mirror::Object::MonitorOffset().Int32Value() - data_offset;
+ ASSERT_LT(offset, 0);
+ const uint32_t load_lock_word =
+ kLdurWInsn |
+ ((offset & 0x1ffu) << 12) |
+ (base_reg << 5) |
+ /* ip0 */ 16;
+ EXPECT_EQ(load_lock_word, GetOutputInsn(thunk_offset));
+ // Verify the gray bit check.
+ const uint32_t check_gray_bit_without_offset =
+ 0x37000000u | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16;
+ EXPECT_EQ(check_gray_bit_without_offset, GetOutputInsn(thunk_offset + 4u) & 0xfff8001fu);
+ // Verify the fake dependency.
+ const uint32_t fake_dependency =
+ 0x8b408000u | // ADD Xd, Xn, Xm, LSR 32
+ (/* ip0 */ 16 << 16) | // Xm = ip0
+ (base_reg << 5) | // Xn = base_reg
+ base_reg; // Xd = base_reg
+ EXPECT_EQ(fake_dependency, GetOutputInsn(thunk_offset + 12u));
+ // Do not check the rest of the implementation.
+
+ // The next thunk follows on the next aligned offset.
+ thunk_offset += RoundUp(expected_thunk.size(), kArm64Alignment);
+ }
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, BakerGcRoot) {
uint32_t valid_regs[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved.
@@ -1180,7 +1276,7 @@
// Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg.
ASSERT_GE(output_.size() - thunk_offset, 4u);
- ASSERT_EQ(0x34000000 | root_reg, GetOutputInsn(thunk_offset) & 0xff00001f);
+ ASSERT_EQ(0x34000000u | root_reg, GetOutputInsn(thunk_offset) & 0xff00001fu);
// Do not check the rest of the implementation.
// The next thunk follows on the next aligned offset.
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 4629c54..eee832a 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -91,6 +91,7 @@
// Flags controlling the use of link-time generated thunks for Baker read barriers.
constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
// Some instructions have special requirements for a temporary, for example
@@ -2759,6 +2760,7 @@
// Object ArrayGet with Baker's read barrier case.
// Note that a potential implicit null check is handled in the
// CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
+ DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
if (index.IsConstant()) {
// Array load with a constant index can be treated as a field load.
offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
@@ -2769,12 +2771,12 @@
obj.W(),
offset,
maybe_temp,
- /* needs_null_check */ true,
+ /* needs_null_check */ false,
/* use_load_acquire */ false);
} else {
Register temp = WRegisterFrom(locations->GetTemp(0));
codegen_->GenerateArrayLoadWithBakerReadBarrier(
- instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true);
+ instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ false);
}
} else {
// General case.
@@ -5928,9 +5930,9 @@
!Runtime::Current()->UseJitCompilation()) {
// Note that we do not actually check the value of `GetIsGcMarking()`
// to decide whether to mark the loaded GC root or not. Instead, we
- // load into `temp` the read barrier mark introspection entrypoint.
- // If `temp` is null, it means that `GetIsGcMarking()` is false, and
- // vice versa.
+ // load into `temp` (actually IP1) the read barrier mark introspection
+ // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
+ // false, and vice versa.
//
// We use link-time generated thunks for the slow path. That thunk
// checks the reference and jumps to the entrypoint if needed.
@@ -6054,24 +6056,24 @@
!use_load_acquire &&
!Runtime::Current()->UseJitCompilation()) {
// Note that we do not actually check the value of `GetIsGcMarking()`
- // to decide whether to mark the loaded GC root or not. Instead, we
- // load into `temp` the read barrier mark introspection entrypoint.
- // If `temp` is null, it means that `GetIsGcMarking()` is false, and
- // vice versa.
+ // to decide whether to mark the loaded reference or not. Instead, we
+ // load into `temp` (actually IP1) the read barrier mark introspection
+ // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
+ // false, and vice versa.
//
// We use link-time generated thunks for the slow path. That thunk checks
// the holder and jumps to the entrypoint if needed. If the holder is not
// gray, it creates a fake dependency and returns to the LDR instruction.
//
// temp = Thread::Current()->pReadBarrierMarkIntrospection
- // lr = &return_address;
+ // lr = &gray_return_address;
// if (temp != nullptr) {
// goto field_thunk<holder_reg, base_reg>(lr)
// }
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> root = *(obj+offset);
+ // GcRoot<mirror::Object> reference = *(obj+offset);
// gray_return_address:
DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
@@ -6141,16 +6143,74 @@
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot);
+
+ if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded reference or not. Instead, we
+ // load into `temp` (actually IP1) the read barrier mark introspection
+ // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
+ // false, and vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk checks
+ // the holder and jumps to the entrypoint if needed. If the holder is not
+ // gray, it creates a fake dependency and returns to the LDR instruction.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &gray_return_address;
+ // if (temp != nullptr) {
+ // goto field_thunk<holder_reg, base_reg>(lr)
+ // }
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // GcRoot<mirror::Object> reference = data[index];
+ // gray_return_address:
+
+ DCHECK(index.IsValid());
+ Register index_reg = RegisterFrom(index, Primitive::kPrimInt);
+ Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ DCHECK(temps.IsAvailable(ip0));
+ DCHECK(temps.IsAvailable(ip1));
+ temps.Exclude(ip0, ip1);
+ uint32_t custom_data =
+ linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode());
+ vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
+
+ // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(ip0.GetCode(), 16u);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
+ __ Ldr(ip1, MemOperand(tr, entry_point_offset));
+ __ Add(temp.X(), obj.X(), Operand(data_offset));
+ EmissionCheckScope guard(GetVIXLAssembler(),
+ (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
+ vixl::aarch64::Label return_address;
+ __ adr(lr, &return_address);
+ __ Bind(cbnz_label);
+ __ cbnz(ip1, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
+ static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+ "Array LDR must be 1 instruction (4B) before the return address label; "
+ " 2 instructions (8B) for heap poisoning.");
+ __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
+ DCHECK(!needs_null_check); // The thunk cannot handle the null check.
+ GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+ __ Bind(&return_address);
+ return;
+ }
+
// Array cells are never volatile variables, therefore array loads
// never use Load-Acquire instructions on ARM64.
const bool use_load_acquire = false;
- static_assert(
- sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
// /* HeapReference<Object> */ ref =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
- size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot);
GenerateReferenceLoadWithBakerReadBarrier(instruction,
ref,
obj,
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index 7d1f146..c39e5f4 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -247,6 +247,7 @@
access->GetType() == Primitive::kPrimNot) {
// For object arrays, the read barrier instrumentation requires
// the original array pointer.
+ // TODO: This can be relaxed for Baker CC.
return false;
}
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 8368026..36c7df7 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -5377,10 +5377,16 @@
}
bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
// TODO: We can be smarter here.
- // Currently, the array access is always preceded by an ArrayLength or a NullCheck
- // which generates the implicit null check. There are cases when these can be removed
- // to produce better code. If we ever add optimizations to do so we should allow an
- // implicit check here (as long as the address falls in the first page).
+ // Currently, unless the array is the result of NewArray, the array access is always
+ // preceded by some form of null NullCheck necessary for the bounds check, usually
+ // implicit null check on the ArrayLength input to BoundsCheck or Deoptimize for
+ // dynamic BCE. There are cases when these could be removed to produce better code.
+ // If we ever add optimizations to do so we should allow an implicit check here
+ // (as long as the address falls in the first page).
+ //
+ // As an example of such fancy optimization, we could eliminate BoundsCheck for
+ // a = cond ? new int[1] : null;
+ // a[0]; // The Phi does not need bounds check for either input.
return false;
}
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 4f390fd..8bdf6b1 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -34,7 +34,6 @@
#include <time.h>
#include <time.h>
#include <unistd.h>
-
#include <set>
#include "android-base/stringprintf.h"
@@ -502,9 +501,16 @@
void DumpHeapArray(mirror::Array* obj, mirror::Class* klass)
REQUIRES_SHARED(Locks::mutator_lock_);
- void DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass)
+ void DumpFakeObjectArray(mirror::Object* obj, const std::set<mirror::Object*>& elements)
REQUIRES_SHARED(Locks::mutator_lock_);
+ void DumpHeapInstanceObject(mirror::Object* obj,
+ mirror::Class* klass,
+ const std::set<mirror::Object*>& fake_roots)
+ REQUIRES_SHARED(Locks::mutator_lock_);
+
+ bool AddRuntimeInternalObjectsField(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_);
+
void ProcessHeap(bool header_first)
REQUIRES(Locks::mutator_lock_) {
// Reset current heap and object count.
@@ -1062,37 +1068,17 @@
++objects_in_segment_;
}
-// Use for visiting the GcRoots held live by ArtFields, ArtMethods, and ClassLoaders.
-class GcRootVisitor {
- public:
- explicit GcRootVisitor(Hprof* hprof) : hprof_(hprof) {}
-
- void operator()(mirror::Object* obj ATTRIBUTE_UNUSED,
- MemberOffset offset ATTRIBUTE_UNUSED,
- bool is_static ATTRIBUTE_UNUSED) const {}
-
- // Note that these don't have read barriers. Its OK however since the GC is guaranteed to not be
- // running during the hprof dumping process.
- void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
- REQUIRES_SHARED(Locks::mutator_lock_) {
- if (!root->IsNull()) {
- VisitRoot(root);
- }
+bool Hprof::AddRuntimeInternalObjectsField(mirror::Class* klass) {
+ if (klass->IsDexCacheClass()) {
+ return true;
}
-
- void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
- REQUIRES_SHARED(Locks::mutator_lock_) {
- mirror::Object* obj = root->AsMirrorPtr();
- // The two cases are either classes or dex cache arrays. If it is a dex cache array, then use
- // VM internal. Otherwise the object is a declaring class of an ArtField or ArtMethod or a
- // class from a ClassLoader.
- hprof_->VisitRoot(obj, RootInfo(obj->IsClass() ? kRootStickyClass : kRootVMInternal));
+ // IsClassLoaderClass is true for subclasses of classloader but we only want to add the fake
+ // field to the java.lang.ClassLoader class.
+ if (klass->IsClassLoaderClass() && klass->GetSuperClass()->IsObjectClass()) {
+ return true;
}
-
-
- private:
- Hprof* const hprof_;
-};
+ return false;
+}
void Hprof::DumpHeapObject(mirror::Object* obj) {
// Ignore classes that are retired.
@@ -1103,8 +1089,41 @@
++total_objects_;
- GcRootVisitor visitor(this);
- obj->VisitReferences(visitor, VoidFunctor());
+ class RootCollector {
+ public:
+ explicit RootCollector() {}
+
+ void operator()(mirror::Object*, MemberOffset, bool) const {}
+
+ // Note that these don't have read barriers. Its OK however since the GC is guaranteed to not be
+ // running during the hprof dumping process.
+ void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ if (!root->IsNull()) {
+ VisitRoot(root);
+ }
+ }
+
+ void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ roots_.insert(root->AsMirrorPtr());
+ }
+
+ const std::set<mirror::Object*>& GetRoots() const {
+ return roots_;
+ }
+
+ private:
+ // These roots are actually live from the object. Avoid marking them as roots in hprof to make
+ // it easier to debug class unloading.
+ mutable std::set<mirror::Object*> roots_;
+ };
+
+ RootCollector visitor;
+ // Collect all native roots.
+ if (!obj->IsClass()) {
+ obj->VisitReferences(visitor, VoidFunctor());
+ }
gc::Heap* const heap = Runtime::Current()->GetHeap();
const gc::space::ContinuousSpace* const space = heap->FindContinuousSpaceFromObject(obj, true);
@@ -1112,15 +1131,18 @@
if (space != nullptr) {
if (space->IsZygoteSpace()) {
heap_type = HPROF_HEAP_ZYGOTE;
+ VisitRoot(obj, RootInfo(kRootVMInternal));
} else if (space->IsImageSpace() && heap->ObjectIsInBootImageSpace(obj)) {
// Only count objects in the boot image as HPROF_HEAP_IMAGE, this leaves app image objects as
// HPROF_HEAP_APP. b/35762934
heap_type = HPROF_HEAP_IMAGE;
+ VisitRoot(obj, RootInfo(kRootVMInternal));
}
} else {
const auto* los = heap->GetLargeObjectsSpace();
if (los->Contains(obj) && los->IsZygoteLargeObject(Thread::Current(), obj)) {
heap_type = HPROF_HEAP_ZYGOTE;
+ VisitRoot(obj, RootInfo(kRootVMInternal));
}
}
CheckHeapSegmentConstraints();
@@ -1164,7 +1186,7 @@
} else if (c->IsArrayClass()) {
DumpHeapArray(obj->AsArray(), c);
} else {
- DumpHeapInstanceObject(obj, c);
+ DumpHeapInstanceObject(obj, c, visitor.GetRoots());
}
}
@@ -1269,7 +1291,10 @@
// Instance fields for this class (no superclass fields)
int iFieldCount = klass->NumInstanceFields();
- if (klass->IsStringClass()) {
+ // add_internal_runtime_objects is only for classes that may retain objects live through means
+ // other than fields. It is never the case for strings.
+ const bool add_internal_runtime_objects = AddRuntimeInternalObjectsField(klass);
+ if (klass->IsStringClass() || add_internal_runtime_objects) {
__ AddU2((uint16_t)iFieldCount + 1);
} else {
__ AddU2((uint16_t)iFieldCount);
@@ -1284,6 +1309,21 @@
if (klass->IsStringClass()) {
__ AddStringId(LookupStringId("value"));
__ AddU1(hprof_basic_object);
+ } else if (add_internal_runtime_objects) {
+ __ AddStringId(LookupStringId("runtimeInternalObjects"));
+ __ AddU1(hprof_basic_object);
+ }
+}
+
+void Hprof::DumpFakeObjectArray(mirror::Object* obj, const std::set<mirror::Object*>& elements) {
+ __ AddU1(HPROF_OBJECT_ARRAY_DUMP);
+ __ AddObjectId(obj);
+ __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(obj));
+ __ AddU4(elements.size());
+ __ AddClassId(LookupClassId(
+ Runtime::Current()->GetClassLinker()->GetClassRoot(ClassLinker::kObjectArrayClass)));
+ for (mirror::Object* e : elements) {
+ __ AddObjectId(e);
}
}
@@ -1327,7 +1367,9 @@
}
}
-void Hprof::DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass) {
+void Hprof::DumpHeapInstanceObject(mirror::Object* obj,
+ mirror::Class* klass,
+ const std::set<mirror::Object*>& fake_roots) {
// obj is an instance object.
__ AddU1(HPROF_INSTANCE_DUMP);
__ AddObjectId(obj);
@@ -1341,6 +1383,7 @@
// What we will use for the string value if the object is a string.
mirror::Object* string_value = nullptr;
+ mirror::Object* fake_object_array = nullptr;
// Write the instance data; fields for this class, followed by super class fields, and so on.
do {
@@ -1396,8 +1439,12 @@
}
}
__ AddObjectId(string_value);
+ } else if (AddRuntimeInternalObjectsField(klass)) {
+ // We need an id that is guaranteed to not be used, use 1/2 of the object alignment.
+ fake_object_array = reinterpret_cast<mirror::Object*>(
+ reinterpret_cast<uintptr_t>(obj) + kObjectAlignment / 2);
+ __ AddObjectId(fake_object_array);
}
-
klass = klass->GetSuperClass();
} while (klass != nullptr);
@@ -1419,6 +1466,8 @@
__ AddU1(hprof_basic_char);
__ AddU2List(s->GetValue(), s->GetLength());
}
+ } else if (fake_object_array != nullptr) {
+ DumpFakeObjectArray(fake_object_array, fake_roots);
}
}
diff --git a/test/911-get-stack-trace/src/art/PrintThread.java b/test/911-get-stack-trace/src/art/PrintThread.java
index f50a66b..fee5ba0 100644
--- a/test/911-get-stack-trace/src/art/PrintThread.java
+++ b/test/911-get-stack-trace/src/art/PrintThread.java
@@ -41,7 +41,8 @@
// We have to ignore some threads when printing all stack traces. These are threads that may or
// may not exist depending on the environment.
public final static String IGNORE_THREAD_NAME_REGEX =
- "Binder:|RenderThread|hwuiTask|Jit thread pool worker|Instr:|JDWP|Profile Saver|main";
+ "Binder:|RenderThread|hwuiTask|Jit thread pool worker|Instr:|JDWP|Profile Saver|main|" +
+ "queued-work-looper";
public final static Matcher IGNORE_THREADS =
Pattern.compile(IGNORE_THREAD_NAME_REGEX).matcher("");
@@ -88,4 +89,4 @@
}
public static native String[][] getStackTrace(Thread thread, int start, int max);
-}
\ No newline at end of file
+}