AArch64: fixes in A64 code generation.
- Disabled special method compilation, as it requires hard-float ABI,
- Disabled suspend checks, as runtime is not yet ready (e.g. trampolines
are not setting the suspend register, etc),
- Changing definition of zero register (the zero register has now 0x3f
as its register number),
- Fixing some issues with handling of cmp instructions in the assembler:
we now use the shift-register rather than the extended-register variant
of cmp and cmn,
- Partially fixing register setup (register sN is now mapped to dN),
- Fixing and completing implementation of register spills/unspills,
- Fixing LoadBaseDispBody() and StoreBaseDispBody().
Change-Id: Ia49ba48b6ca0f782380066345b7a198cb6c1dc1d
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index f98e366..3d28665 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -95,13 +95,8 @@
* +========================+
*/
-// Offset to distinguish FP regs.
-#define ARM_FP_REG_OFFSET 32
// First FP callee save.
-#define ARM_FP_CALLEE_SAVE_BASE 16
-
-// Mask to strip off fp flags.
-#define ARM_FP_REG_MASK (ARM_FP_REG_OFFSET - 1)
+#define A64_FP_CALLEE_SAVE_BASE 16
// Temporary macros, used to mark code which wants to distinguish betweek zr/sp.
#define A64_REG_IS_SP(reg_num) ((reg_num) == rwsp || (reg_num) == rsp)
@@ -147,14 +142,11 @@
// TODO(Arm64): can we change the lines below such that rwzr != rwsp && rxzr != rsp?
// This would be desirable to allow detecting usage-errors in the assembler.
- rwzr = rw31,
- rxzr = rx31,
+ rwzr = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 0x3f,
+ rxzr = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 0x3f,
rwsp = rw31,
rsp = rx31,
- // TODO: rx4 is an argument register in C ABI which is not a good idea,
- // But we need to decide to use caller save register in C ABI or callee save register.
- // Because it will result to different implementation in the trampoline.
- rA64_SUSPEND = rx4,
+ rA64_SUSPEND = rx19,
rA64_SELF = rx18,
rA64_SP = rx31,
rA64_LR = rx30
@@ -233,9 +225,11 @@
kA64B1t, // b [00010100] offset_26[25-0].
kA64Cbnz2rt, // cbnz[00110101] imm_19[23-5] rt[4-0].
kA64Cbz2rt, // cbz [00110100] imm_19[23-5] rt[4-0].
- kA64Cmn3Rro, // cmn [s0101011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] [11111].
+ kA64Cmn3rro, // cmn [s0101011] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] [11111].
+ kA64Cmn3Rre, // cmn [s0101011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] [11111].
kA64Cmn3RdT, // cmn [00110001] shift[23-22] imm_12[21-10] rn[9-5] [11111].
- kA64Cmp3Rro, // cmp [s1101011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] [11111].
+ kA64Cmp3rro, // cmp [s1101011] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] [11111].
+ kA64Cmp3Rre, // cmp [s1101011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] [11111].
kA64Cmp3RdT, // cmp [01110001] shift[23-22] imm_12[21-10] rn[9-5] [11111].
kA64Csel4rrrc, // csel[s0011010100] rm[20-16] cond[15-12] [00] rn[9-5] rd[4-0].
kA64Csinc4rrrc, // csinc [s0011010100] rm[20-16] cond[15-12] [01] rn[9-5] rd[4-0].
@@ -279,6 +273,7 @@
kA64Ldr4fXxG, // ldr [1s111100011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
kA64Ldr4rXxG, // ldr [1s111000011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
kA64LdrPost3rXd, // ldr [1s111000010] imm_9[20-12] [01] rn[9-5] rt[4-0].
+ kA64Ldp4ffXD, // ldp [0s10110101] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
kA64Ldp4rrXD, // ldp [s010100101] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
kA64LdpPost4rrXD, // ldp [s010100011] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
kA64Ldur3fXd, // ldur[1s111100010] imm_9[20-12] [00] rn[9-5] rt[4-0].
@@ -306,7 +301,8 @@
kA64Scvtf2fx, // scvtf [100111100s100010000000] rn[9-5] rd[4-0].
kA64Sdiv3rrr, // sdiv[s0011010110] rm[20-16] [000011] rn[9-5] rd[4-0].
kA64Smaddl4xwwx, // smaddl [10011011001] rm[20-16] [0] ra[14-10] rn[9-5] rd[4-0].
- kA64Stp4rrXD, // stp [s010100101] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+ kA64Stp4ffXD, // stp [0s10110100] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+ kA64Stp4rrXD, // stp [s010100100] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
kA64StpPost4rrXD, // stp [s010100010] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
kA64StpPre4rrXD, // stp [s010100110] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
kA64Str3fXD, // str [1s11110100] imm_12[21-10] rn[9-5] rt[4-0].
@@ -355,9 +351,6 @@
#define FUNWIDE UNWIDE
#define IS_FWIDE IS_WIDE
-#define OP_KIND_UNWIDE(opcode) (opcode)
-#define OP_KIND_IS_WIDE(opcode) (false)
-
enum ArmOpDmbOptions {
kSY = 0xf,
kST = 0xe,
@@ -390,6 +383,9 @@
kFmtSkip, // Unused field, but continue to next.
};
+// TODO(Arm64): should we get rid of kFmtExtend?
+// Note: the only instructions that use it (cmp, cmn) are not used themselves.
+
// Struct used to define the snippet positions for each A64 opcode.
struct ArmEncodingMap {
uint32_t wskeleton;
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index 93caf89..01fcc0d 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -168,18 +168,26 @@
kFmtUnused, -1, -1,
IS_BINARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP,
"cbz", "!0r, !1t", kFixupCBxZ),
- ENCODING_MAP(WIDE(kA64Cmn3Rro), SF_VARIANTS(0x6b20001f),
- kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
+ ENCODING_MAP(WIDE(kA64Cmn3rro), SF_VARIANTS(0x2b00001f),
+ kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
- "cmn", "!0R, !1r!2o", kFixupNone),
+ "cmn", "!0r, !1r!2o", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Cmn3Rre), SF_VARIANTS(0x2b20001f),
+ kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16, kFmtExtend, -1, -1,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
+ "cmn", "!0R, !1r!2e", kFixupNone),
ENCODING_MAP(WIDE(kA64Cmn3RdT), SF_VARIANTS(0x3100001f),
kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtBitBlt, 23, 22,
kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | SETS_CCODES,
"cmn", "!0R, #!1d!2T", kFixupNone),
- ENCODING_MAP(WIDE(kA64Cmp3Rro), SF_VARIANTS(0x6b00001f),
- kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
+ ENCODING_MAP(WIDE(kA64Cmp3rro), SF_VARIANTS(0x6b00001f),
+ kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
- "cmp", "!0R, !1r!2o", kFixupNone),
+ "cmp", "!0r, !1r!2o", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Cmp3Rre), SF_VARIANTS(0x6b20001f),
+ kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16, kFmtExtend, -1, -1,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
+ "cmp", "!0R, !1r!2e", kFixupNone),
ENCODING_MAP(WIDE(kA64Cmp3RdT), SF_VARIANTS(0x7100001f),
kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtBitBlt, 23, 22,
kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | SETS_CCODES,
@@ -354,9 +362,13 @@
kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01 | REG_USE1 | IS_LOAD,
"ldr", "!0r, [!1X], #!2d", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Ldp4ffXD), CUSTOM_VARIANTS(0x2d400000, 0x6d400000),
+ kFmtRegF, 4, 0, kFmtRegF, 14, 10, kFmtRegXOrSp, 9, 5,
+ kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF01 | IS_LOAD,
+ "ldp", "!0f, !1f, [!2X, #!3D]", kFixupNone),
ENCODING_MAP(WIDE(kA64Ldp4rrXD), SF_VARIANTS(0x29400000),
kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
- kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF012 | IS_LOAD,
+ kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF01 | IS_LOAD,
"ldp", "!0r, !1r, [!2X, #!3D]", kFixupNone),
ENCODING_MAP(WIDE(kA64LdpPost4rrXD), CUSTOM_VARIANTS(0x28c00000, 0xa8c00000),
kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
@@ -462,9 +474,13 @@
kFmtRegX, 4, 0, kFmtRegW, 9, 5, kFmtRegW, 20, 16,
kFmtRegX, -1, -1, IS_QUAD_OP | REG_DEF0_USE123,
"smaddl", "!0x, !1w, !2w, !3x", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Stp4ffXD), CUSTOM_VARIANTS(0x2d000000, 0x6d000000),
+ kFmtRegF, 4, 0, kFmtRegF, 14, 10, kFmtRegXOrSp, 9, 5,
+ kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE012 | IS_STORE,
+ "stp", "!0f, !1f, [!2X, #!3D]", kFixupNone),
ENCODING_MAP(WIDE(kA64Stp4rrXD), SF_VARIANTS(0x29000000),
kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
- kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_DEF2 | REG_USE012 | IS_STORE,
+ kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE012 | IS_STORE,
"stp", "!0r, !1r, [!2X, #!3D]", kFixupNone),
ENCODING_MAP(WIDE(kA64StpPost4rrXD), CUSTOM_VARIANTS(0x28800000, 0xa8800000),
kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 136a04f..f7a0199 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -25,7 +25,10 @@
bool Arm64Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir,
const InlineMethod& special) {
- return Mir2Lir::GenSpecialCase(bb, mir, special);
+ // TODO(Arm64): re-enable this, once hard-float ABI is implemented.
+ // (this currently does not work, as GetArgMappingToPhysicalReg returns InvalidReg()).
+ // return Mir2Lir::GenSpecialCase(bb, mir, special);
+ return false;
}
/*
@@ -348,18 +351,16 @@
OpRegImm64(kOpSub, rs_rA64_SP, frame_size_, /*is_wide*/true);
}
- /* Spill core callee saves */
- if (core_spill_mask_) {
- SpillCoreRegs(rs_rA64_SP, frame_size_, core_spill_mask_);
- }
/* Need to spill any FP regs? */
- if (num_fp_spills_) {
- /*
- * NOTE: fp spills are a little different from core spills in that
- * they are pushed as a contiguous block. When promoting from
- * the fp set, we must allocate all singles from s16..highest-promoted
- */
- // TODO(Arm64): SpillFPRegs(rA64_SP, frame_size_, core_spill_mask_);
+ if (fp_spill_mask_) {
+ int spill_offset = frame_size_ - kArm64PointerSize*(num_fp_spills_ + num_core_spills_);
+ SpillFPRegs(rs_rA64_SP, spill_offset, fp_spill_mask_);
+ }
+
+ /* Spill core callee saves. */
+ if (core_spill_mask_) {
+ int spill_offset = frame_size_ - kArm64PointerSize*num_core_spills_;
+ SpillCoreRegs(rs_rA64_SP, spill_offset, core_spill_mask_);
}
FlushIns(ArgLocs, rl_method);
@@ -379,12 +380,15 @@
LockTemp(rs_x1);
NewLIR0(kPseudoMethodExit);
+
/* Need to restore any FP callee saves? */
- if (num_fp_spills_) {
- // TODO(Arm64): UnspillFPRegs(num_fp_spills_);
+ if (fp_spill_mask_) {
+ int spill_offset = frame_size_ - kArm64PointerSize*(num_fp_spills_ + num_core_spills_);
+ UnSpillFPRegs(rs_rA64_SP, spill_offset, fp_spill_mask_);
}
if (core_spill_mask_) {
- UnSpillCoreRegs(rs_rA64_SP, frame_size_, core_spill_mask_);
+ int spill_offset = frame_size_ - kArm64PointerSize*num_core_spills_;
+ UnSpillCoreRegs(rs_rA64_SP, spill_offset, core_spill_mask_);
}
OpRegImm64(kOpAdd, rs_rA64_SP, frame_size_, /*is_wide*/true);
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 418a989..404138c 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -73,7 +73,6 @@
void MarkPreservedSingle(int v_reg, RegStorage reg);
void MarkPreservedDouble(int v_reg, RegStorage reg);
void CompilerInitializeRegAlloc();
- RegStorage AllocPreservedDouble(int s_reg);
// Required for target - miscellaneous.
void AssembleLIR();
@@ -157,6 +156,8 @@
uint32_t GenPairWise(uint32_t reg_mask, int* reg1, int* reg2);
void UnSpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask);
void SpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask);
+ void UnSpillFPRegs(RegStorage base, int offset, uint32_t reg_mask);
+ void SpillFPRegs(RegStorage base, int offset, uint32_t reg_mask);
// Required for target - single operation generators.
LIR* OpUnconditionalBranch(LIR* target);
@@ -195,7 +196,7 @@
LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
LIR* OpRegRegRegShift(OpKind op, int r_dest, int r_src1, int r_src2, int shift,
bool is_wide = false);
- LIR* OpRegRegShift(OpKind op, int r_dest_src1, int r_src2, int shift, bool is_wide = false);
+ LIR* OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift);
static const ArmEncodingMap EncodingMap[kA64Last];
int EncodeShift(int code, int amount);
int EncodeExtend(int extend_type, int amount);
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index f2a57e7..b0f5904 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -697,11 +697,19 @@
GenDivZeroCheck(kCondEq);
}
-// TODO(Arm64): the function below should go.
// Test suspend flag, return target of taken suspend branch
LIR* Arm64Mir2Lir::OpTestSuspend(LIR* target) {
+ // TODO(Arm64): re-enable suspend checks, once art_quick_test_suspend is implemented and
+ // the suspend register is properly handled in the trampolines.
+#if 0
NewLIR3(kA64Subs3rRd, rA64_SUSPEND, rA64_SUSPEND, 1);
return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target);
+#else
+ // TODO(Arm64): Fake suspend check. Will always fail to branch. Remove this.
+ LIR* branch = NewLIR2((target == NULL) ? kA64Cbnz2rt : kA64Cbz2rt, rwzr, 0);
+ branch->target = target;
+ return branch;
+#endif
}
// Decrement register and branch on condition
@@ -1199,34 +1207,61 @@
void Arm64Mir2Lir::UnSpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask) {
int reg1 = -1, reg2 = -1;
- const int pop_log2_size = 3;
+ const int reg_log2_size = 3;
- for (offset = (offset >> pop_log2_size) - 1; reg_mask; offset--) {
+ for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
if (UNLIKELY(reg2 < 0)) {
- // TODO(Arm64): replace Solo32 with Solo64, once rxN are defined properly.
- NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo32(reg1).GetReg(), base.GetReg(), offset);
+ NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
} else {
- // TODO(Arm64): replace Solo32 with Solo64 (twice below), once rxN are defined properly.
- NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo32(reg1).GetReg(),
- RegStorage::Solo32(reg2).GetReg(), base.GetReg(), offset);
+ NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo64(reg2).GetReg(),
+ RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
}
}
}
void Arm64Mir2Lir::SpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask) {
int reg1 = -1, reg2 = -1;
- const int pop_log2_size = 3;
+ const int reg_log2_size = 3;
- for (offset = (offset >> pop_log2_size) - 1; reg_mask; offset--) {
+ for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
if (UNLIKELY(reg2 < 0)) {
- // TODO(Arm64): replace Solo32 with Solo64, once rxN are defined properly.
- NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo32(reg1).GetReg(), base.GetReg(), offset);
+ NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
} else {
- // TODO(Arm64): replace Solo32 with Solo64 (twice below), once rxN are defined properly.
- NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo32(reg1).GetReg(),
- RegStorage::Solo32(reg2).GetReg(), base.GetReg(), offset);
+ NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
+ RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
+ }
+ }
+}
+
+void Arm64Mir2Lir::UnSpillFPRegs(RegStorage base, int offset, uint32_t reg_mask) {
+ int reg1 = -1, reg2 = -1;
+ const int reg_log2_size = 3;
+
+ for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
+ reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
+ if (UNLIKELY(reg2 < 0)) {
+ NewLIR3(FWIDE(kA64Ldr3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
+ } else {
+ NewLIR4(WIDE(kA64Ldp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
+ RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
+ }
+ }
+}
+
+// TODO(Arm64): consider using ld1 and st1?
+void Arm64Mir2Lir::SpillFPRegs(RegStorage base, int offset, uint32_t reg_mask) {
+ int reg1 = -1, reg2 = -1;
+ const int reg_log2_size = 3;
+
+ for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
+ reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
+ if (UNLIKELY(reg2 < 0)) {
+ NewLIR3(FWIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
+ } else {
+ NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
+ RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
}
}
}
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 10be0d6..c072ae3 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -30,7 +30,8 @@
{rs_x0, rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7,
rs_x8, rs_x9, rs_x10, rs_x11, rs_x12, rs_x13, rs_x14, rs_x15,
rs_x16, rs_x17, rs_x18, rs_x19, rs_x20, rs_x21, rs_x22, rs_x23,
- rs_x24, rs_x25, rs_x26, rs_x27, rs_x28, rs_x29, rs_x30, rs_x31};
+ rs_x24, rs_x25, rs_x26, rs_x27, rs_x28, rs_x29, rs_x30, rs_x31,
+ rs_xzr};
static const RegStorage sp_regs_arr[] =
{rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7,
rs_f8, rs_f9, rs_f10, rs_f11, rs_f12, rs_f13, rs_f14, rs_f15,
@@ -42,8 +43,8 @@
rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23,
rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31};
static const RegStorage reserved_regs_arr[] =
- {rs_rA64_SUSPEND, rs_rA64_SELF, rs_rA64_SP, rs_rA64_LR};
-// TUING: Are there too many temp registers and too less promote target?
+ {rs_rA64_SUSPEND, rs_rA64_SELF, rs_rA64_SP, rs_rA64_LR, rs_xzr};
+// TUNING: Are there too many temp registers and too less promote target?
// This definition need to be matched with runtime.cc, quick entry assembly and JNI compiler
// Note: we are not able to call to C function directly if it un-match C ABI.
// Currently, rs_rA64_SELF is not a callee save register which does not match C ABI.
@@ -377,14 +378,14 @@
strcpy(tbuf, name);
break;
case 's':
- snprintf(tbuf, arraysize(tbuf), "s%d", operand & ARM_FP_REG_MASK);
+ snprintf(tbuf, arraysize(tbuf), "s%d", operand & RegStorage::kRegNumMask);
break;
case 'S':
- snprintf(tbuf, arraysize(tbuf), "d%d", operand & ARM_FP_REG_MASK);
+ snprintf(tbuf, arraysize(tbuf), "d%d", operand & RegStorage::kRegNumMask);
break;
case 'f':
snprintf(tbuf, arraysize(tbuf), "%c%d", (IS_FWIDE(lir->opcode)) ? 'd' : 's',
- operand & ARM_FP_REG_MASK);
+ operand & RegStorage::kRegNumMask);
break;
case 'l': {
bool is_wide = IS_WIDE(lir->opcode);
@@ -463,7 +464,7 @@
break;
case 'R': {
bool is_wide = IS_WIDE(lir->opcode);
- if (LIKELY(operand != rwsp || operand != rsp)) {
+ if (LIKELY(operand != rwsp && operand != rsp)) {
snprintf(tbuf, arraysize(tbuf), "%c%d", (is_wide) ? 'x' : 'w',
operand & RegStorage::kRegNumMask);
} else {
@@ -599,13 +600,11 @@
core_temps, sp_temps, dp_temps);
// Target-specific adjustments.
-
- // Alias single precision floats to appropriate half of overlapping double.
- GrowableArray<RegisterInfo*>::Iterator it(®_pool_->sp_regs_);
- for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
- int sp_reg_num = info->GetReg().GetRegNum();
- int dp_reg_num = sp_reg_num >> 1;
- RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | dp_reg_num);
+ // Alias single precision float registers to corresponding double registers.
+ GrowableArray<RegisterInfo*>::Iterator fp_it(®_pool_->sp_regs_);
+ for (RegisterInfo* info = fp_it.Next(); info != nullptr; info = fp_it.Next()) {
+ int fp_reg_num = info->GetReg().GetRegNum();
+ RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | fp_reg_num);
RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
// Double precision register's master storage should refer to itself.
DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
@@ -613,10 +612,6 @@
info->SetMaster(dp_reg_info);
// Singles should show a single 32-bit mask bit, at first referring to the low half.
DCHECK_EQ(info->StorageMask(), 0x1U);
- if (sp_reg_num & 1) {
- // For odd singles, change to user the high word of the backing double.
- info->SetStorageMask(0x2);
- }
}
// TODO: re-enable this when we can safely save r4 over the suspension code path.
@@ -648,14 +643,11 @@
}
/*
- * Mark a callee-save fp register as promoted. Note that
- * vpush/vpop uses contiguous register lists so we must
- * include any holes in the mask. Associate holes with
- * Dalvik register INVALID_VREG (0xFFFFU).
+ * Mark a callee-save fp register as promoted.
*/
void Arm64Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) {
- DCHECK_GE(reg.GetRegNum(), ARM_FP_CALLEE_SAVE_BASE);
- int adjusted_reg_num = reg.GetRegNum() - ARM_FP_CALLEE_SAVE_BASE;
+ DCHECK(reg.IsFloat());
+ int adjusted_reg_num = reg.GetRegNum() - A64_FP_CALLEE_SAVE_BASE;
// Ensure fp_vmap_table is large enough
int table_size = fp_vmap_table_.size();
for (int i = table_size; i < (adjusted_reg_num + 1); i++) {
@@ -665,29 +657,36 @@
fp_vmap_table_[adjusted_reg_num] = v_reg;
// Size of fp_vmap_table is high-water mark, use to set mask
num_fp_spills_ = fp_vmap_table_.size();
- fp_spill_mask_ = ((1 << num_fp_spills_) - 1) << ARM_FP_CALLEE_SAVE_BASE;
+ fp_spill_mask_ = ((1 << num_fp_spills_) - 1) << A64_FP_CALLEE_SAVE_BASE;
}
void Arm64Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) {
- // TEMP: perform as 2 singles.
- int reg_num = reg.GetRegNum() << 1;
- RegStorage lo = RegStorage::Solo32(RegStorage::kFloatingPoint | reg_num);
- RegStorage hi = RegStorage::Solo32(RegStorage::kFloatingPoint | reg_num | 1);
- MarkPreservedSingle(v_reg, lo);
- MarkPreservedSingle(v_reg + 1, hi);
+ DCHECK(reg.IsDouble());
+ MarkPreservedSingle(v_reg, reg);
}
/* Clobber all regs that might be used by an external C call */
void Arm64Mir2Lir::ClobberCallerSave() {
- // TODO(Arm64): implement this.
- UNIMPLEMENTED(WARNING);
-
Clobber(rs_x0);
Clobber(rs_x1);
Clobber(rs_x2);
Clobber(rs_x3);
+ Clobber(rs_x4);
+ Clobber(rs_x5);
+ Clobber(rs_x6);
+ Clobber(rs_x7);
+ Clobber(rs_x8);
+ Clobber(rs_x9);
+ Clobber(rs_x10);
+ Clobber(rs_x11);
Clobber(rs_x12);
+ Clobber(rs_x13);
+ Clobber(rs_x14);
+ Clobber(rs_x15);
+ Clobber(rs_x16);
+ Clobber(rs_x17);
Clobber(rs_x30);
+
Clobber(rs_f0);
Clobber(rs_f1);
Clobber(rs_f2);
@@ -696,14 +695,22 @@
Clobber(rs_f5);
Clobber(rs_f6);
Clobber(rs_f7);
- Clobber(rs_f8);
- Clobber(rs_f9);
- Clobber(rs_f10);
- Clobber(rs_f11);
- Clobber(rs_f12);
- Clobber(rs_f13);
- Clobber(rs_f14);
- Clobber(rs_f15);
+ Clobber(rs_f16);
+ Clobber(rs_f17);
+ Clobber(rs_f18);
+ Clobber(rs_f19);
+ Clobber(rs_f20);
+ Clobber(rs_f21);
+ Clobber(rs_f22);
+ Clobber(rs_f23);
+ Clobber(rs_f24);
+ Clobber(rs_f25);
+ Clobber(rs_f26);
+ Clobber(rs_f27);
+ Clobber(rs_f28);
+ Clobber(rs_f29);
+ Clobber(rs_f30);
+ Clobber(rs_f31);
}
RegLocation Arm64Mir2Lir::GetReturnWideAlt() {
@@ -776,61 +783,6 @@
return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].fmt;
}
-/*
- * Somewhat messy code here. We want to allocate a pair of contiguous
- * physical single-precision floating point registers starting with
- * an even numbered reg. It is possible that the paired s_reg (s_reg+1)
- * has already been allocated - try to fit if possible. Fail to
- * allocate if we can't meet the requirements for the pair of
- * s_reg<=sX[even] & (s_reg+1)<= sX+1.
- */
-// TODO: needs rewrite to support non-backed 64-bit float regs.
-RegStorage Arm64Mir2Lir::AllocPreservedDouble(int s_reg) {
- RegStorage res;
- int v_reg = mir_graph_->SRegToVReg(s_reg);
- int p_map_idx = SRegToPMap(s_reg);
- if (promotion_map_[p_map_idx+1].fp_location == kLocPhysReg) {
- // Upper reg is already allocated. Can we fit?
- int high_reg = promotion_map_[p_map_idx+1].FpReg;
- if ((high_reg & 1) == 0) {
- // High reg is even - fail.
- return res; // Invalid.
- }
- // Is the low reg of the pair free?
- // FIXME: rework.
- RegisterInfo* p = GetRegInfo(RegStorage::FloatSolo32(high_reg - 1));
- if (p->InUse() || p->IsTemp()) {
- // Already allocated or not preserved - fail.
- return res; // Invalid.
- }
- // OK - good to go.
- res = RegStorage::FloatSolo64(p->GetReg().GetRegNum() >> 1);
- p->MarkInUse();
- MarkPreservedSingle(v_reg, p->GetReg());
- } else {
- /*
- * TODO: until runtime support is in, make sure we avoid promoting the same vreg to
- * different underlying physical registers.
- */
- GrowableArray<RegisterInfo*>::Iterator it(®_pool_->dp_regs_);
- for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
- if (!info->IsTemp() && !info->InUse()) {
- res = info->GetReg();
- info->MarkInUse();
- MarkPreservedDouble(v_reg, info->GetReg());
- break;
- }
- }
- }
- if (res.Valid()) {
- promotion_map_[p_map_idx].fp_location = kLocPhysReg;
- promotion_map_[p_map_idx].FpReg = res.DoubleToLowSingle().GetReg();
- promotion_map_[p_map_idx+1].fp_location = kLocPhysReg;
- promotion_map_[p_map_idx+1].FpReg = res.DoubleToHighSingle().GetReg();
- }
- return res;
-}
-
// TODO(Arm64): reuse info in QuickArgumentVisitor?
static RegStorage GetArgPhysicalReg(RegLocation* loc, int* num_gpr_used, int* num_fpr_used,
OpSize* op_size) {
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index 77e4c3c..39e9fad 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -360,18 +360,17 @@
return NewLIR1(opcode, r_dest_src.GetReg());
}
-LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, int r_dest_src1, int r_src2,
- int shift, bool is_wide) {
- ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
+LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift) {
+ ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
+ CHECK_EQ(r_dest_src1.Is64Bit(), r_src2.Is64Bit());
ArmOpcode opcode = kA64Brk1d;
- switch (OP_KIND_UNWIDE(op)) {
+ switch (op) {
case kOpCmn:
- opcode = kA64Cmn3Rro;
+ opcode = kA64Cmn3rro;
break;
case kOpCmp:
- // TODO(Arm64): check the instruction above: "cmp w0, w1" is rendered as "cmp w0, w1, uxtb".
- opcode = kA64Cmp3Rro;
+ opcode = kA64Cmp3rro;
break;
case kOpMov:
opcode = kA64Mov2rr;
@@ -388,39 +387,38 @@
case kOpRev:
DCHECK_EQ(shift, 0);
// Binary, but rm is encoded twice.
- return NewLIR3(kA64Rev2rr | wide, r_dest_src1, r_src2, r_src2);
+ return NewLIR3(kA64Rev2rr | wide, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg());
break;
case kOpRevsh:
// Binary, but rm is encoded twice.
- return NewLIR3(kA64Rev162rr | wide, r_dest_src1, r_src2, r_src2);
+ return NewLIR3(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg());
break;
case kOp2Byte:
DCHECK_EQ(shift, ENCODE_NO_SHIFT);
// "sbfx r1, r2, #imm1, #imm2" is "sbfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
// For now we use sbfm directly.
- return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1, r_src2, 0, 7);
+ return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 7);
case kOp2Short:
DCHECK_EQ(shift, ENCODE_NO_SHIFT);
// For now we use sbfm rather than its alias, sbfx.
- return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1, r_src2, 0, 15);
+ return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15);
case kOp2Char:
// "ubfx r1, r2, #imm1, #imm2" is "ubfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
// For now we use ubfm directly.
DCHECK_EQ(shift, ENCODE_NO_SHIFT);
- return NewLIR4(kA64Ubfm4rrdd | wide, r_dest_src1, r_src2, 0, 15);
+ return NewLIR4(kA64Ubfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15);
default:
- return OpRegRegRegShift(op, r_dest_src1, r_dest_src1, r_src2, shift);
+ return OpRegRegRegShift(op, r_dest_src1.GetReg(), r_dest_src1.GetReg(), r_src2.GetReg(), shift);
}
DCHECK(!IsPseudoLirOp(opcode));
if (EncodingMap[opcode].flags & IS_BINARY_OP) {
DCHECK_EQ(shift, ENCODE_NO_SHIFT);
- return NewLIR2(opcode | wide, r_dest_src1, r_src2);
+ return NewLIR2(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg());
} else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
- if (kind == kFmtExtend || kind == kFmtShift) {
- DCHECK_EQ(kind == kFmtExtend, IsExtendEncoding(shift));
- return NewLIR3(opcode | wide, r_dest_src1, r_src2, shift);
+ if (kind == kFmtShift) {
+ return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), shift);
}
}
@@ -429,8 +427,7 @@
}
LIR* Arm64Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) {
- return OpRegRegShift(op, r_dest_src1.GetReg(), r_src2.GetReg(), ENCODE_NO_SHIFT,
- r_dest_src1.Is64Bit());
+ return OpRegRegShift(op, r_dest_src1, r_src2, ENCODE_NO_SHIFT);
}
LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) {
@@ -452,7 +449,7 @@
int r_src2, int shift, bool is_wide) {
ArmOpcode opcode = kA64Brk1d;
- switch (OP_KIND_UNWIDE(op)) {
+ switch (op) {
case kOpAdd:
opcode = kA64Add4rrro;
break;
@@ -525,10 +522,10 @@
ArmOpcode opcode = kA64Brk1d;
ArmOpcode alt_opcode = kA64Brk1d;
int32_t log_imm = -1;
- bool is_wide = OP_KIND_IS_WIDE(op);
+ bool is_wide = r_dest.Is64Bit();
ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
- switch (OP_KIND_UNWIDE(op)) {
+ switch (op) {
case kOpLsl: {
// "lsl w1, w2, #imm" is an alias of "ubfm w1, w2, #(-imm MOD 32), #(31-imm)"
// and "lsl x1, x2, #imm" of "ubfm x1, x2, #(-imm MOD 32), #(31-imm)".
@@ -639,7 +636,7 @@
return res;
}
- switch (OP_KIND_UNWIDE(op)) {
+ switch (op) {
case kOpAdd:
neg_opcode = kA64Sub4RRdT;
opcode = kA64Add4RRdT;
@@ -828,99 +825,66 @@
OpSize size) {
LIR* load = NULL;
ArmOpcode opcode = kA64Brk1d;
- bool short_form = false;
- int encoded_disp = displacement;
+ ArmOpcode alt_opcode = kA64Brk1d;
+ int scale = 0;
+
switch (size) {
case kDouble: // Intentional fall-through.
case kWord: // Intentional fall-through.
case k64:
- DCHECK_EQ(encoded_disp & 0x3, 0);
+ scale = 3;
if (r_dest.IsFloat()) {
- // Currently double values may be misaligned.
- if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) {
- // Can use scaled load.
- opcode = FWIDE(kA64Ldr3fXD);
- encoded_disp >>= 3;
- short_form = true;
- } else if (IS_SIGNED_IMM9(displacement)) {
- // Can use unscaled load.
- opcode = FWIDE(kA64Ldur3fXd);
- short_form = true;
- } else {
- short_form = false;
- }
+ DCHECK(r_dest.IsDouble());
+ opcode = FWIDE(kA64Ldr3fXD);
+ alt_opcode = FWIDE(kA64Ldur3fXd);
} else {
- // Currently long values may be misaligned.
- if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) {
- // Can use scaled store.
- opcode = FWIDE(kA64Ldr3rXD);
- encoded_disp >>= 3;
- short_form = true;
- } else if (IS_SIGNED_IMM9(displacement)) {
- // Can use unscaled store.
- opcode = FWIDE(kA64Ldur3rXd);
- short_form = true;
- } // else: use long sequence (short_form = false).
+ opcode = FWIDE(kA64Ldr3rXD);
+ alt_opcode = FWIDE(kA64Ldur3rXd);
}
break;
case kSingle: // Intentional fall-through.
case k32: // Intentional fall-trough.
case kReference:
+ scale = 2;
if (r_dest.IsFloat()) {
+ DCHECK(r_dest.IsSingle());
opcode = kA64Ldr3fXD;
- if (displacement <= 1020) {
- short_form = true;
- encoded_disp >>= 2;
- }
- break;
- }
- if (displacement <= 16380 && displacement >= 0) {
- DCHECK_EQ((displacement & 0x3), 0);
- short_form = true;
- encoded_disp >>= 2;
+ } else {
opcode = kA64Ldr3rXD;
}
break;
case kUnsignedHalf:
- if (displacement < 64 && displacement >= 0) {
- DCHECK_EQ((displacement & 0x1), 0);
- short_form = true;
- encoded_disp >>= 1;
- opcode = kA64Ldrh3wXF;
- } else if (displacement < 4092 && displacement >= 0) {
- short_form = true;
- opcode = kA64Ldrh3wXF;
- }
+ scale = 1;
+ opcode = kA64Ldrh3wXF;
break;
case kSignedHalf:
- short_form = true;
+ scale = 1;
opcode = kA64Ldrsh3rXF;
break;
case kUnsignedByte:
- short_form = true;
opcode = kA64Ldrb3wXd;
break;
case kSignedByte:
- short_form = true;
opcode = kA64Ldrsb3rXd;
break;
default:
LOG(FATAL) << "Bad size: " << size;
}
- if (short_form) {
- load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), encoded_disp);
+ bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0;
+ int scaled_disp = displacement >> scale;
+ if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) {
+ // Can use scaled load.
+ load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), scaled_disp);
+ } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) {
+ // Can use unscaled load.
+ load = NewLIR3(alt_opcode, r_dest.GetReg(), r_base.GetReg(), displacement);
} else {
- RegStorage reg_offset = AllocTemp();
- LoadConstant(reg_offset, encoded_disp);
- if (r_dest.IsFloat()) {
- // No index ops - must use a long sequence. Turn the offset into a direct pointer.
- OpRegReg(kOpAdd, reg_offset, r_base);
- load = LoadBaseDispBody(reg_offset, 0, r_dest, size);
- } else {
- load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size);
- }
- FreeTemp(reg_offset);
+ // Use long sequence.
+ RegStorage r_scratch = AllocTemp();
+ LoadConstant(r_scratch, displacement);
+ load = LoadBaseIndexed(r_base, r_scratch, r_dest, 0, size);
+ FreeTemp(r_scratch);
}
// TODO: in future may need to differentiate Dalvik accesses w/ spills
@@ -947,92 +911,64 @@
OpSize size) {
LIR* store = NULL;
ArmOpcode opcode = kA64Brk1d;
- bool short_form = false;
- int encoded_disp = displacement;
+ ArmOpcode alt_opcode = kA64Brk1d;
+ int scale = 0;
+
switch (size) {
case kDouble: // Intentional fall-through.
case kWord: // Intentional fall-through.
case k64:
- DCHECK_EQ(encoded_disp & 0x3, 0);
+ scale = 3;
if (r_src.IsFloat()) {
- // Currently double values may be misaligned.
- if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) {
- // Can use scaled store.
- opcode = FWIDE(kA64Str3fXD);
- encoded_disp >>= 3;
- short_form = true;
- } else if (IS_SIGNED_IMM9(displacement)) {
- // Can use unscaled store.
- opcode = FWIDE(kA64Stur3fXd);
- short_form = true;
- } // else: use long sequence (short_form = false).
+ DCHECK(r_src.IsDouble());
+ opcode = FWIDE(kA64Str3fXD);
+ alt_opcode = FWIDE(kA64Stur3fXd);
} else {
- // Currently long values may be misaligned.
- if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) {
- // Can use scaled store.
- opcode = FWIDE(kA64Str3rXD);
- encoded_disp >>= 3;
- short_form = true;
- } else if (IS_SIGNED_IMM9(displacement)) {
- // Can use unscaled store.
- opcode = FWIDE(kA64Stur3rXd);
- short_form = true;
- } // else: use long sequence (short_form = false).
+ opcode = FWIDE(kA64Str3rXD);
+ alt_opcode = FWIDE(kA64Stur3rXd);
}
break;
case kSingle: // Intentional fall-through.
case k32: // Intentional fall-trough.
case kReference:
+ scale = 2;
if (r_src.IsFloat()) {
DCHECK(r_src.IsSingle());
- DCHECK_EQ(encoded_disp & 0x3, 0);
opcode = kA64Str3fXD;
- if (displacement <= 1020) {
- short_form = true;
- encoded_disp >>= 2;
- }
- break;
- }
-
- if (displacement <= 16380 && displacement >= 0) {
- DCHECK_EQ((displacement & 0x3), 0);
- short_form = true;
- encoded_disp >>= 2;
+ } else {
opcode = kA64Str3rXD;
}
break;
case kUnsignedHalf:
case kSignedHalf:
- DCHECK_EQ((displacement & 0x1), 0);
- short_form = true;
- encoded_disp >>= 1;
+ scale = 1;
opcode = kA64Strh3wXF;
break;
case kUnsignedByte:
case kSignedByte:
- short_form = true;
opcode = kA64Strb3wXd;
break;
default:
LOG(FATAL) << "Bad size: " << size;
}
- if (short_form) {
- store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), encoded_disp);
+ bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0;
+ int scaled_disp = displacement >> scale;
+ if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) {
+ // Can use scaled store.
+ store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), scaled_disp);
+ } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) {
+ // Can use unscaled store.
+ store = NewLIR3(alt_opcode, r_src.GetReg(), r_base.GetReg(), displacement);
} else {
+ // Use long sequence.
RegStorage r_scratch = AllocTemp();
- LoadConstant(r_scratch, encoded_disp);
- if (r_src.IsFloat()) {
- // No index ops - must use a long sequence. Turn the offset into a direct pointer.
- OpRegReg(kOpAdd, r_scratch, r_base);
- store = StoreBaseDispBody(r_scratch, 0, r_src, size);
- } else {
- store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
- }
+ LoadConstant(r_scratch, displacement);
+ store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
FreeTemp(r_scratch);
}
- // TODO: In future, may need to differentiate Dalvik & spill accesses
+ // TODO: In future, may need to differentiate Dalvik & spill accesses.
if (r_base == rs_rA64_SP) {
AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
}