ARM: Use 64-bit literals for LoadDImmediate().
And rewrite the medium-range long/fp literal to use
MOVW+ADD+LDRD/VLDR because the old instruction sequence
was broken if the "ADD ip, pc" was not 4-byte aligned.
Test: assembler_thumb2_test has been updated. Standard
ART test suite has been run on host and Nexus 5.
Change-Id: I37c6a62aa6e77c6a9701b5a1fb4db2e666c1eae9
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index a571d14..9cf72a2 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -754,32 +754,7 @@
}
}
- void LoadDImmediate(DRegister sd, double value, Condition cond = AL) {
- if (!vmovd(sd, value, cond)) {
- uint64_t int_value = bit_cast<uint64_t, double>(value);
- if (int_value == bit_cast<uint64_t, double>(0.0)) {
- // 0.0 is quite common, so we special case it by loading
- // 2.0 in `sd` and then substracting it.
- bool success = vmovd(sd, 2.0, cond);
- CHECK(success);
- vsubd(sd, sd, sd, cond);
- } else {
- if (sd < 16) {
- SRegister low = static_cast<SRegister>(sd << 1);
- SRegister high = static_cast<SRegister>(low + 1);
- LoadSImmediate(low, bit_cast<float, uint32_t>(Low32Bits(int_value)), cond);
- if (High32Bits(int_value) == Low32Bits(int_value)) {
- vmovs(high, low);
- } else {
- LoadSImmediate(high, bit_cast<float, uint32_t>(High32Bits(int_value)), cond);
- }
- } else {
- LOG(FATAL) << "Unimplemented loading of double into a D register "
- << "that cannot be split into two S registers";
- }
- }
- }
- }
+ virtual void LoadDImmediate(DRegister dd, double value, Condition cond = AL) = 0;
virtual void MarkExceptionHandler(Label* label) = 0;
virtual void LoadFromOffset(LoadOperandType type,
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index 6f7119d..c95dfa8 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -1486,6 +1486,34 @@
}
}
+void Arm32Assembler::LoadDImmediate(DRegister dd, double value, Condition cond) {
+ if (!vmovd(dd, value, cond)) {
+ uint64_t int_value = bit_cast<uint64_t, double>(value);
+ if (int_value == bit_cast<uint64_t, double>(0.0)) {
+ // 0.0 is quite common, so we special case it by loading
+ // 2.0 in `dd` and then subtracting it.
+ bool success = vmovd(dd, 2.0, cond);
+ CHECK(success);
+ vsubd(dd, dd, dd, cond);
+ } else {
+ if (dd < 16) {
+ // Note: Depending on the particular CPU, this may cause register
+ // forwarding hazard, negatively impacting the performance.
+ SRegister low = static_cast<SRegister>(dd << 1);
+ SRegister high = static_cast<SRegister>(low + 1);
+ LoadSImmediate(low, bit_cast<float, uint32_t>(Low32Bits(int_value)), cond);
+ if (High32Bits(int_value) == Low32Bits(int_value)) {
+ vmovs(high, low);
+ } else {
+ LoadSImmediate(high, bit_cast<float, uint32_t>(High32Bits(int_value)), cond);
+ }
+ } else {
+ LOG(FATAL) << "Unimplemented loading of double into a D register "
+ << "that cannot be split into two S registers";
+ }
+ }
+ }
+}
// Implementation note: this method must emit at most one instruction when
// Address::CanHoldLoadOffsetArm.
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index 8726ac8..554dd23 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -270,6 +270,7 @@
// Load and Store. May clobber IP.
void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
+ void LoadDImmediate(DRegister dd, double value, Condition cond = AL) OVERRIDE;
void MarkExceptionHandler(Label* label) OVERRIDE;
void LoadFromOffset(LoadOperandType type,
Register reg,
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index a72ea41..8747dad 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -1917,7 +1917,7 @@
case kLongOrFPLiteral1KiB:
return 4u;
- case kLongOrFPLiteral256KiB:
+ case kLongOrFPLiteral64KiB:
return 10u;
case kLongOrFPLiteralFar:
return 14u;
@@ -1989,7 +1989,7 @@
break;
case kLiteral1MiB:
case kLiteral64KiB:
- case kLongOrFPLiteral256KiB:
+ case kLongOrFPLiteral64KiB:
case kLiteralAddr64KiB:
DCHECK_GE(diff, 4); // The target must be at least 4 bytes after the ADD rX, PC.
diff -= 4; // One extra 32-bit MOV.
@@ -2105,10 +2105,10 @@
if (IsUint<10>(GetOffset(current_code_size))) {
break;
}
- current_code_size += IncreaseSize(kLongOrFPLiteral256KiB);
+ current_code_size += IncreaseSize(kLongOrFPLiteral64KiB);
FALLTHROUGH_INTENDED;
- case kLongOrFPLiteral256KiB:
- if (IsUint<18>(GetOffset(current_code_size))) {
+ case kLongOrFPLiteral64KiB:
+ if (IsUint<16>(GetOffset(current_code_size))) {
break;
}
current_code_size += IncreaseSize(kLongOrFPLiteralFar);
@@ -2269,11 +2269,10 @@
buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff));
break;
}
- case kLongOrFPLiteral256KiB: {
- int32_t offset = GetOffset(code_size);
- int32_t mov_encoding = MovModImmEncoding32(IP, offset & ~0x3ff);
+ case kLongOrFPLiteral64KiB: {
+ int32_t mov_encoding = MovwEncoding32(IP, GetOffset(code_size));
int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC);
- int32_t ldr_encoding = LoadWideOrFpEncoding(IP, offset & 0x3ff); // DCHECKs type_.
+ int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0u); // DCHECKs type_.
buffer->Store<int16_t>(location_, mov_encoding >> 16);
buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
buffer->Store<int16_t>(location_ + 4u, add_pc_encoding);
@@ -3598,6 +3597,24 @@
}
}
+void Thumb2Assembler::LoadDImmediate(DRegister dd, double value, Condition cond) {
+ if (!vmovd(dd, value, cond)) {
+ uint64_t int_value = bit_cast<uint64_t, double>(value);
+ if (int_value == bit_cast<uint64_t, double>(0.0)) {
+ // 0.0 is quite common, so we special case it by loading
+ // 2.0 in `dd` and then subtracting it.
+ bool success = vmovd(dd, 2.0, cond);
+ CHECK(success);
+ vsubd(dd, dd, dd, cond);
+ } else {
+ Literal* literal = literal64_dedupe_map_.GetOrCreate(
+ int_value,
+ [this, int_value]() { return NewLiteral<uint64_t>(int_value); });
+ LoadLiteral(dd, literal);
+ }
+ }
+}
+
int32_t Thumb2Assembler::GetAllowedLoadOffsetBits(LoadOperandType type) {
switch (type) {
case kLoadSignedByte:
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 2ca74fc..4ee23c0 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -43,6 +43,7 @@
fixups_(arena->Adapter(kArenaAllocAssembler)),
fixup_dependents_(arena->Adapter(kArenaAllocAssembler)),
literals_(arena->Adapter(kArenaAllocAssembler)),
+ literal64_dedupe_map_(std::less<uint64_t>(), arena->Adapter(kArenaAllocAssembler)),
jump_tables_(arena->Adapter(kArenaAllocAssembler)),
last_position_adjustment_(0u),
last_old_position_(0u),
@@ -319,6 +320,7 @@
// Load and Store. May clobber IP.
void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
+ void LoadDImmediate(DRegister dd, double value, Condition cond = AL) OVERRIDE;
void MarkExceptionHandler(Label* label) OVERRIDE;
void LoadFromOffset(LoadOperandType type,
Register reg,
@@ -464,8 +466,8 @@
// Load long or FP literal variants.
// VLDR s/dX, label; 32-bit insn, up to 1KiB offset; 4 bytes.
kLongOrFPLiteral1KiB,
- // MOV ip, modimm + ADD ip, pc + VLDR s/dX, [IP, #imm8*4]; up to 256KiB offset; 10 bytes.
- kLongOrFPLiteral256KiB,
+ // MOV ip, imm16 + ADD ip, pc + VLDR s/dX, [IP, #0]; up to 64KiB offset; 10 bytes.
+ kLongOrFPLiteral64KiB,
// MOV ip, imm16 + MOVT ip, imm16 + ADD ip, pc + VLDR s/dX, [IP]; any offset; 14 bytes.
kLongOrFPLiteralFar,
};
@@ -500,7 +502,7 @@
// Load wide literal.
static Fixup LoadWideLiteral(uint32_t location, Register rt, Register rt2,
Size size = kLongOrFPLiteral1KiB) {
- DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB ||
+ DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB ||
size == kLongOrFPLiteralFar);
DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB));
return Fixup(rt, rt2, kNoSRegister, kNoDRegister,
@@ -510,7 +512,7 @@
// Load FP single literal.
static Fixup LoadSingleLiteral(uint32_t location, SRegister sd,
Size size = kLongOrFPLiteral1KiB) {
- DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB ||
+ DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB ||
size == kLongOrFPLiteralFar);
return Fixup(kNoRegister, kNoRegister, sd, kNoDRegister,
AL, kLoadFPLiteralSingle, size, location);
@@ -519,7 +521,7 @@
// Load FP double literal.
static Fixup LoadDoubleLiteral(uint32_t location, DRegister dd,
Size size = kLongOrFPLiteral1KiB) {
- DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB ||
+ DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB ||
size == kLongOrFPLiteralFar);
return Fixup(kNoRegister, kNoRegister, kNoSRegister, dd,
AL, kLoadFPLiteralDouble, size, location);
@@ -870,6 +872,9 @@
// without invalidating pointers and references to existing elements.
ArenaDeque<Literal> literals_;
+ // Deduplication map for 64-bit literals, used for LoadDImmediate().
+ ArenaSafeMap<uint64_t, Literal*> literal64_dedupe_map_;
+
// Jump table list.
ArenaDeque<JumpTable> jump_tables_;
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index 7f1dc49..f3fa72c 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -869,10 +869,11 @@
}
std::string expected =
- "mov.w ip, #((2f - 1f - 4) & ~0x3ff)\n"
+ // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
+ "movw ip, #(0x408 - 0x4 - 4)\n"
"1:\n"
"add ip, pc\n"
- "ldrd r1, r3, [ip, #((2f - 1b - 4) & 0x3ff)]\n" +
+ "ldrd r1, r3, [ip, #0]\n" +
RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
".align 2, 0\n"
"2:\n"
@@ -884,48 +885,78 @@
__ GetAdjustedPosition(label.Position()));
}
-TEST_F(AssemblerThumb2Test, LoadLiteralSingleMax256KiB) {
+TEST_F(AssemblerThumb2Test, LoadLiteralSingleMax64KiB) {
// The literal size must match but the type doesn't, so use an int32_t rather than float.
arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678);
__ LoadLiteral(arm::S3, literal);
Label label;
__ Bind(&label);
- constexpr size_t kLdrR0R0Count = (1 << 17) - 3u;
- for (size_t i = 0; i != kLdrR0R0Count; ++i) {
- __ ldr(arm::R0, arm::Address(arm::R0));
- }
-
- std::string expected =
- "mov.w ip, #((2f - 1f - 4) & ~0x3ff)\n"
- "1:\n"
- "add ip, pc\n"
- "vldr s3, [ip, #((2f - 1b - 4) & 0x3ff)]\n" +
- RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
- ".align 2, 0\n"
- "2:\n"
- ".word 0x12345678\n";
- DriverStr(expected, "LoadLiteralSingleMax256KiB");
-
- EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u,
- __ GetAdjustedPosition(label.Position()));
-}
-
-TEST_F(AssemblerThumb2Test, LoadLiteralDoubleBeyondMax256KiB) {
- // The literal size must match but the type doesn't, so use an int64_t rather than double.
- arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321));
- __ LoadLiteral(arm::D3, literal);
- Label label;
- __ Bind(&label);
- constexpr size_t kLdrR0R0Count = (1 << 17) - 2u;
+ constexpr size_t kLdrR0R0Count = (1 << 15) - 3u;
for (size_t i = 0; i != kLdrR0R0Count; ++i) {
__ ldr(arm::R0, arm::Address(arm::R0));
}
std::string expected =
// "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
- "movw ip, #(0x40000 & 0xffff)\n"
+ "movw ip, #(0x10004 - 0x4 - 4)\n"
+ "1:\n"
+ "add ip, pc\n"
+ "vldr s3, [ip, #0]\n" +
+ RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+ ".align 2, 0\n"
+ "2:\n"
+ ".word 0x12345678\n";
+ DriverStr(expected, "LoadLiteralSingleMax64KiB");
+
+ EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u,
+ __ GetAdjustedPosition(label.Position()));
+}
+
+TEST_F(AssemblerThumb2Test, LoadLiteralSingleMax64KiB_UnalignedPC) {
+ // The literal size must match but the type doesn't, so use an int32_t rather than float.
+ arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678);
+ __ ldr(arm::R0, arm::Address(arm::R0));
+ __ LoadLiteral(arm::S3, literal);
+ Label label;
+ __ Bind(&label);
+ constexpr size_t kLdrR0R0Count = (1 << 15) - 4u;
+ for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+ __ ldr(arm::R0, arm::Address(arm::R0));
+ }
+
+ std::string expected =
+ "ldr r0, [r0]\n"
+ // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
+ "movw ip, #(0x10004 - 0x6 - 4)\n"
+ "1:\n"
+ "add ip, pc\n"
+ "vldr s3, [ip, #0]\n" +
+ RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+ ".align 2, 0\n"
+ "2:\n"
+ ".word 0x12345678\n";
+ DriverStr(expected, "LoadLiteralSingleMax64KiB_UnalignedPC");
+
+ EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u,
+ __ GetAdjustedPosition(label.Position()));
+}
+
+TEST_F(AssemblerThumb2Test, LoadLiteralDoubleBeyondMax64KiB) {
+ // The literal size must match but the type doesn't, so use an int64_t rather than double.
+ arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321));
+ __ LoadLiteral(arm::D3, literal);
+ Label label;
+ __ Bind(&label);
+ constexpr size_t kLdrR0R0Count = (1 << 15) - 2u;
+ for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+ __ ldr(arm::R0, arm::Address(arm::R0));
+ }
+
+ std::string expected =
+ // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
+ "movw ip, #((0x1000c - 0x8 - 4) & 0xffff)\n"
// "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt.
- "movt ip, #(0x40000 >> 16)\n"
+ "movt ip, #((0x1000c - 0x8 - 4) >> 16)\n"
"1:\n"
"add ip, pc\n"
"vldr d3, [ip, #0]\n" +
@@ -934,7 +965,7 @@
"2:\n"
".word 0x87654321\n"
".word 0x12345678\n";
- DriverStr(expected, "LoadLiteralDoubleBeyondMax256KiB");
+ DriverStr(expected, "LoadLiteralDoubleBeyondMax64KiB");
EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 10u,
__ GetAdjustedPosition(label.Position()));
@@ -946,16 +977,16 @@
__ LoadLiteral(arm::D3, literal);
Label label;
__ Bind(&label);
- constexpr size_t kLdrR0R0Count = (1 << 17) - 2u + 0x1234;
+ constexpr size_t kLdrR0R0Count = (1 << 15) - 2u + 0x1234;
for (size_t i = 0; i != kLdrR0R0Count; ++i) {
__ ldr(arm::R0, arm::Address(arm::R0));
}
std::string expected =
// "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
- "movw ip, #((0x40000 + 2 * 0x1234) & 0xffff)\n"
+ "movw ip, #((0x1000c + 2 * 0x1234 - 0x8 - 4) & 0xffff)\n"
// "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt.
- "movt ip, #((0x40000 + 2 * 0x1234) >> 16)\n"
+ "movt ip, #((0x1000c + 2 * 0x1234 - 0x8 - 4) >> 16)\n"
"1:\n"
"add ip, pc\n"
"vldr d3, [ip, #0]\n" +