x86_64: Support r8-r15, xmm8-xmm15 in assembler
Added REX support. The TARGET_REX_SUPPORT should be used during build.
Change-Id: I82b457ff5085c8192ad873923bd939fbb91022ce
Signed-off-by: Dmitry Petrochenko <dmitry.petrochenko@intel.com>
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index b8222ef..39a0365 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -478,7 +478,8 @@
{ kX86RepneScasw, kPrefix2Nullary, NO_OPERAND | REG_USEA | REG_USEC | SETS_CCODES, { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0 }, "RepNE ScasW", "" },
};
-size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displacement, bool has_sib) {
+size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displacement,
+ int reg_r, int reg_x, bool has_sib) {
size_t size = 0;
if (entry->skeleton.prefix1 > 0) {
++size;
@@ -486,6 +487,10 @@
++size;
}
}
+ if ((NeedsRex(base) || NeedsRex(reg_r) || NeedsRex(reg_x)) &&
+ entry->skeleton.prefix1 != REX_W && entry->skeleton.prefix2 != REX_W) {
+ ++size; // REX_R
+ }
++size; // opcode
if (entry->skeleton.opcode == 0x0F) {
++size;
@@ -494,13 +499,13 @@
}
}
++size; // modrm
- if (has_sib || RegStorage::RegNum(base) == rs_rX86_SP.GetRegNum()
+ if (has_sib || LowRegisterBits(RegStorage::RegNum(base)) == rs_rX86_SP.GetRegNum()
|| (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX)) {
// SP requires a SIB byte.
// GS access also needs a SIB byte for absolute adressing in 64-bit mode.
++size;
}
- if (displacement != 0 || RegStorage::RegNum(base) == rs_rBP.GetRegNum()) {
+ if (displacement != 0 || LowRegisterBits(RegStorage::RegNum(base)) == rs_rBP.GetRegNum()) {
// BP requires an explicit displacement, even when it's 0.
if (entry->opcode != kX86Lea32RA) {
DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), 0ULL) << entry->name;
@@ -524,33 +529,41 @@
case kPrefix2Nullary:
return 3; // 1 byte of opcode + 2 prefixes
case kRegOpcode: // lir operands - 0: reg
- return ComputeSize(entry, 0, 0, false) - 1; // substract 1 for modrm
+ // substract 1 for modrm
+ return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false) - 1;
case kReg: // lir operands - 0: reg
- return ComputeSize(entry, 0, 0, false);
+ return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false);
case kMem: // lir operands - 0: base, 1: disp
- return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
+ return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false);
case kArray: // lir operands - 0: base, 1: index, 2: scale, 3: disp
- return ComputeSize(entry, lir->operands[0], lir->operands[3], true);
+ return ComputeSize(entry, lir->operands[0], lir->operands[3],
+ NO_REG, lir->operands[1], true);
case kMemReg: // lir operands - 0: base, 1: disp, 2: reg
- return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
+ return ComputeSize(entry, lir->operands[0], lir->operands[1],
+ lir->operands[2], NO_REG, false);
case kMemRegImm: // lir operands - 0: base, 1: disp, 2: reg 3: immediate
- return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
+ return ComputeSize(entry, lir->operands[0], lir->operands[1],
+ lir->operands[2], NO_REG, false);
case kArrayReg: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
- return ComputeSize(entry, lir->operands[0], lir->operands[3], true);
+ return ComputeSize(entry, lir->operands[0], lir->operands[3],
+ lir->operands[4], lir->operands[1], true);
case kThreadReg: // lir operands - 0: disp, 1: reg
- return ComputeSize(entry, 0, lir->operands[0], false);
- case kRegReg:
- return ComputeSize(entry, 0, 0, false);
- case kRegRegStore:
- return ComputeSize(entry, 0, 0, false);
+ return ComputeSize(entry, 0, lir->operands[0], lir->operands[1], NO_REG, false);
+ case kRegReg: // lir operands - 0: reg1, 1: reg2
+ return ComputeSize(entry, 0, 0, lir->operands[0], lir->operands[1], false);
+ case kRegRegStore: // lir operands - 0: reg2, 1: reg1
+ return ComputeSize(entry, 0, 0, lir->operands[1], lir->operands[0], false);
case kRegMem: // lir operands - 0: reg, 1: base, 2: disp
- return ComputeSize(entry, lir->operands[1], lir->operands[2], false);
+ return ComputeSize(entry, lir->operands[1], lir->operands[2],
+ lir->operands[0], NO_REG, false);
case kRegArray: // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp
- return ComputeSize(entry, lir->operands[1], lir->operands[4], true);
+ return ComputeSize(entry, lir->operands[1], lir->operands[4],
+ lir->operands[0], lir->operands[2], true);
case kRegThread: // lir operands - 0: reg, 1: disp
- return ComputeSize(entry, 0, 0x12345678, false); // displacement size is always 32bit
+ // displacement size is always 32bit
+ return ComputeSize(entry, 0, 0x12345678, lir->operands[0], NO_REG, false);
case kRegImm: { // lir operands - 0: reg, 1: immediate
- size_t size = ComputeSize(entry, 0, 0, false);
+ size_t size = ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false);
if (entry->skeleton.ax_opcode == 0) {
return size;
} else {
@@ -560,47 +573,58 @@
}
}
case kMemImm: // lir operands - 0: base, 1: disp, 2: immediate
- return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
+ return ComputeSize(entry, lir->operands[0], lir->operands[1],
+ NO_REG, lir->operands[0], false);
case kArrayImm: // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate
- return ComputeSize(entry, lir->operands[0], lir->operands[3], true);
+ return ComputeSize(entry, lir->operands[0], lir->operands[3],
+ NO_REG, lir->operands[1], true);
case kThreadImm: // lir operands - 0: disp, 1: imm
- return ComputeSize(entry, 0, 0x12345678, false); // displacement size is always 32bit
+ // displacement size is always 32bit
+ return ComputeSize(entry, 0, 0x12345678, NO_REG, NO_REG, false);
case kRegRegImm: // lir operands - 0: reg, 1: reg, 2: imm
case kRegRegImmRev:
- return ComputeSize(entry, 0, 0, false);
+ return ComputeSize(entry, 0, 0, lir->operands[0], lir->operands[1], false);
case kRegMemImm: // lir operands - 0: reg, 1: base, 2: disp, 3: imm
- return ComputeSize(entry, lir->operands[1], lir->operands[2], false);
+ return ComputeSize(entry, lir->operands[1], lir->operands[2],
+ lir->operands[0], NO_REG, false);
case kRegArrayImm: // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp, 5: imm
- return ComputeSize(entry, lir->operands[1], lir->operands[4], true);
+ return ComputeSize(entry, lir->operands[1], lir->operands[4],
+ lir->operands[0], lir->operands[2], true);
case kMovRegImm: // lir operands - 0: reg, 1: immediate
- return (entry->skeleton.prefix1 != 0?1:0) + 1 + entry->skeleton.immediate_bytes; // TODO(64): reg1
+ return (entry->skeleton.prefix1 != 0 || NeedsRex(lir->operands[0])?1:0) +
+ 1 + entry->skeleton.immediate_bytes;
case kShiftRegImm: // lir operands - 0: reg, 1: immediate
// Shift by immediate one has a shorter opcode.
- return ComputeSize(entry, 0, 0, false) - (lir->operands[1] == 1 ? 1 : 0);
+ return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false) -
+ (lir->operands[1] == 1 ? 1 : 0);
case kShiftMemImm: // lir operands - 0: base, 1: disp, 2: immediate
// Shift by immediate one has a shorter opcode.
- return ComputeSize(entry, lir->operands[0], lir->operands[1], false) -
+ return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false) -
(lir->operands[2] == 1 ? 1 : 0);
case kShiftArrayImm: // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate
// Shift by immediate one has a shorter opcode.
- return ComputeSize(entry, lir->operands[0], lir->operands[3], true) -
+ return ComputeSize(entry, lir->operands[0], lir->operands[3],
+ NO_REG, lir->operands[1], true) -
(lir->operands[4] == 1 ? 1 : 0);
- case kShiftRegCl:
- return ComputeSize(entry, 0, 0, false);
+ case kShiftRegCl: // lir operands - 0: reg, 1: cl
+ return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false);
case kShiftMemCl: // lir operands - 0: base, 1: disp, 2: cl
- return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
+ return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false);
case kShiftArrayCl: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
- return ComputeSize(entry, lir->operands[0], lir->operands[3], true);
+ return ComputeSize(entry, lir->operands[0], lir->operands[3],
+ lir->operands[4], lir->operands[1], true);
case kRegCond: // lir operands - 0: reg, 1: cond
- return ComputeSize(entry, 0, 0, false);
+ return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false);
case kMemCond: // lir operands - 0: base, 1: disp, 2: cond
- return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
+ return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false);
case kArrayCond: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: cond
- return ComputeSize(entry, lir->operands[0], lir->operands[3], true);
+ return ComputeSize(entry, lir->operands[0], lir->operands[3],
+ NO_REG, lir->operands[1], true);
case kRegRegCond: // lir operands - 0: reg, 1: reg, 2: cond
- return ComputeSize(entry, 0, 0, false);
+ return ComputeSize(entry, 0, 0, lir->operands[0], lir->operands[1], false);
case kRegMemCond: // lir operands - 0: reg, 1: reg, 2: disp, 3:cond
- return ComputeSize(entry, lir->operands[1], lir->operands[2], false);
+ return ComputeSize(entry, lir->operands[1], lir->operands[2],
+ lir->operands[0], lir->operands[1], false);
case kJcc:
if (lir->opcode == kX86Jcc8) {
return 2; // opcode + rel8
@@ -614,21 +638,28 @@
} else if (lir->opcode == kX86Jmp32) {
return 5; // opcode + rel32
} else if (lir->opcode == kX86JmpT) {
- return ComputeSize(entry, 0, 0x12345678, false); // displacement size is always 32bit
+ // displacement size is always 32bit
+ return ComputeSize(entry, 0, 0x12345678, NO_REG, NO_REG, false);
} else {
DCHECK(lir->opcode == kX86JmpR);
- return 2; // opcode + modrm
+ if (NeedsRex(lir->operands[0])) {
+ return 3; // REX.B + opcode + modrm
+ } else {
+ return 2; // opcode + modrm
+ }
}
case kCall:
switch (lir->opcode) {
case kX86CallI: return 5; // opcode 0:disp
case kX86CallR: return 2; // opcode modrm
case kX86CallM: // lir operands - 0: base, 1: disp
- return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
+ return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false);
case kX86CallA: // lir operands - 0: base, 1: index, 2: scale, 3: disp
- return ComputeSize(entry, lir->operands[0], lir->operands[3], true);
+ return ComputeSize(entry, lir->operands[0], lir->operands[3],
+ NO_REG, lir->operands[1], true);
case kX86CallT: // lir operands - 0: disp
- return ComputeSize(entry, 0, 0x12345678, false); // displacement size is always 32bit
+ // displacement size is always 32bit
+ return ComputeSize(entry, 0, 0x12345678, NO_REG, NO_REG, false);
default:
break;
}
@@ -636,16 +667,19 @@
case kPcRel:
if (entry->opcode == kX86PcRelLoadRA) {
// lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table
- return ComputeSize(entry, lir->operands[1], 0x12345678, true);
+ return ComputeSize(entry, lir->operands[1], 0x12345678,
+ lir->operands[0], lir->operands[2], true);
} else {
DCHECK(entry->opcode == kX86PcRelAdr);
return 5; // opcode with reg + 4 byte immediate
}
- case kMacro:
+ case kMacro: // lir operands - 0: reg
DCHECK_EQ(lir->opcode, static_cast<int>(kX86StartOfMethod));
return 5 /* call opcode + 4 byte displacement */ + 1 /* pop reg */ +
- ComputeSize(&X86Mir2Lir::EncodingMap[kX86Sub32RI], 0, 0, false) -
- (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum() ? 1 : 0); // shorter ax encoding
+ ComputeSize(&X86Mir2Lir::EncodingMap[kX86Sub32RI], 0, 0,
+ lir->operands[0], NO_REG, false) -
+ // shorter ax encoding
+ (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum() ? 1 : 0);
default:
break;
}
@@ -654,19 +688,62 @@
}
void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry) {
+ EmitPrefix(entry, NO_REG, NO_REG, NO_REG);
+}
+
+void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry,
+ uint8_t reg_r, uint8_t reg_x, uint8_t reg_b) {
+ // REX.WRXB
+ // W - 64-bit operand
+ // R - MODRM.reg
+ // X - SIB.index
+ // B - MODRM.rm/SIB.base
+ bool force = false;
+ bool w = (entry->skeleton.prefix1 == REX_W) || (entry->skeleton.prefix2 == REX_W);
+ bool r = NeedsRex(reg_r);
+ bool x = NeedsRex(reg_x);
+ bool b = NeedsRex(reg_b);
+ uint8_t rex = force ? 0x40 : 0;
+ if (w) {
+ rex |= 0x48; // REX.W000
+ }
+ if (r) {
+ rex |= 0x44; // REX.0R00
+ }
+ if (x) {
+ rex |= 0x42; // REX.00X0
+ }
+ if (b) {
+ rex |= 0x41; // REX.000B
+ }
if (entry->skeleton.prefix1 != 0) {
if (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX) {
// 64 bit adresses by GS, not FS
code_buffer_.push_back(THREAD_PREFIX_GS);
} else {
- code_buffer_.push_back(entry->skeleton.prefix1);
+ if (entry->skeleton.prefix1 == REX_W) {
+ rex |= entry->skeleton.prefix1;
+ code_buffer_.push_back(rex);
+ rex = 0;
+ } else {
+ code_buffer_.push_back(entry->skeleton.prefix1);
+ }
}
if (entry->skeleton.prefix2 != 0) {
- code_buffer_.push_back(entry->skeleton.prefix2);
+ if (entry->skeleton.prefix2 == REX_W) {
+ rex |= entry->skeleton.prefix2;
+ code_buffer_.push_back(rex);
+ rex = 0;
+ } else {
+ code_buffer_.push_back(entry->skeleton.prefix2);
+ }
}
} else {
DCHECK_EQ(0, entry->skeleton.prefix2);
}
+ if (rex != 0) {
+ code_buffer_.push_back(rex);
+ }
}
void X86Mir2Lir::EmitOpcode(const X86EncodingMap* entry) {
@@ -685,7 +762,12 @@
}
void X86Mir2Lir::EmitPrefixAndOpcode(const X86EncodingMap* entry) {
- EmitPrefix(entry);
+ EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG);
+}
+
+void X86Mir2Lir::EmitPrefixAndOpcode(const X86EncodingMap* entry,
+ uint8_t reg_r, uint8_t reg_x, uint8_t reg_b) {
+ EmitPrefix(entry, reg_r, reg_x, reg_b);
EmitOpcode(entry);
}
@@ -766,6 +848,11 @@
code_buffer_.push_back((imm >> 8) & 0xFF);
break;
case 4:
+ if (imm <0) {
+ CHECK_EQ((-imm) & 0x0FFFFFFFFl, -imm);
+ } else {
+ CHECK_EQ(imm & 0x0FFFFFFFFl, imm);
+ }
code_buffer_.push_back(imm & 0xFF);
code_buffer_.push_back((imm >> 8) & 0xFF);
code_buffer_.push_back((imm >> 16) & 0xFF);
@@ -789,7 +876,8 @@
}
void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, uint8_t reg) {
- EmitPrefixAndOpcode(entry);
+ EmitPrefixAndOpcode(entry, reg, NO_REG, NO_REG);
+ reg = LowRegisterBits(reg);
// There's no 3-byte instruction with +rd
DCHECK(entry->skeleton.opcode != 0x0F ||
(entry->skeleton.extra_opcode1 != 0x38 && entry->skeleton.extra_opcode1 != 0x3A));
@@ -801,7 +889,8 @@
}
void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, uint8_t reg) {
- EmitPrefixAndOpcode(entry);
+ EmitPrefixAndOpcode(entry, reg, NO_REG, NO_REG);
+ reg = LowRegisterBits(reg);
if (RegStorage::RegNum(reg) >= 4) {
DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " "
<< static_cast<int>(RegStorage::RegNum(reg))
@@ -815,7 +904,8 @@
}
void X86Mir2Lir::EmitOpMem(const X86EncodingMap* entry, uint8_t base, int disp) {
- EmitPrefix(entry);
+ EmitPrefix(entry, NO_REG, NO_REG, base);
+ base = LowRegisterBits(base);
code_buffer_.push_back(entry->skeleton.opcode);
DCHECK_NE(0x0F, entry->skeleton.opcode);
DCHECK_EQ(0, entry->skeleton.extra_opcode1);
@@ -827,15 +917,29 @@
void X86Mir2Lir::EmitOpArray(const X86EncodingMap* entry, uint8_t base, uint8_t index,
int scale, int disp) {
- EmitPrefixAndOpcode(entry);
+ EmitPrefixAndOpcode(entry, NO_REG, index, base);
+ index = LowRegisterBits(index);
+ base = LowRegisterBits(base);
EmitModrmSibDisp(entry->skeleton.modrm_opcode, base, index, scale, disp);
DCHECK_EQ(0, entry->skeleton.ax_opcode);
DCHECK_EQ(0, entry->skeleton.immediate_bytes);
}
+uint8_t X86Mir2Lir::LowRegisterBits(uint8_t reg) {
+ uint8_t res = reg;
+ res = reg & kRegNumMask32; // 3 bits
+ return res;
+}
+
+bool X86Mir2Lir::NeedsRex(uint8_t reg) {
+ return RegStorage::RegNum(reg) > 7;
+}
+
void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry,
uint8_t base, int disp, uint8_t reg) {
- EmitPrefixAndOpcode(entry);
+ EmitPrefixAndOpcode(entry, reg, NO_REG, base);
+ reg = LowRegisterBits(reg);
+ base = LowRegisterBits(base);
if (RegStorage::RegNum(reg) >= 4) {
DCHECK(strchr(entry->name, '8') == NULL ||
entry->opcode == kX86Movzx8RM || entry->opcode == kX86Movsx8RM)
@@ -854,9 +958,12 @@
EmitMemReg(entry, base, disp, reg);
}
-void X86Mir2Lir::EmitRegArray(const X86EncodingMap* entry, uint8_t reg, uint8_t base, uint8_t index,
- int scale, int disp) {
- EmitPrefixAndOpcode(entry);
+void X86Mir2Lir::EmitRegArray(const X86EncodingMap* entry, uint8_t reg, uint8_t base,
+ uint8_t index, int scale, int disp) {
+ EmitPrefixAndOpcode(entry, reg, index, base);
+ reg = LowRegisterBits(reg);
+ index = LowRegisterBits(index);
+ base = LowRegisterBits(base);
EmitModrmSibDisp(reg, base, index, scale, disp);
DCHECK_EQ(0, entry->skeleton.modrm_opcode);
DCHECK_EQ(0, entry->skeleton.ax_opcode);
@@ -871,7 +978,9 @@
void X86Mir2Lir::EmitArrayImm(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale,
int disp, int32_t imm) {
- EmitPrefixAndOpcode(entry);
+ EmitPrefixAndOpcode(entry, NO_REG, index, base);
+ index = LowRegisterBits(index);
+ base = LowRegisterBits(base);
EmitModrmSibDisp(entry->skeleton.modrm_opcode, base, index, scale, disp);
DCHECK_EQ(0, entry->skeleton.ax_opcode);
EmitImm(entry, imm);
@@ -879,7 +988,8 @@
void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int disp) {
DCHECK_NE(entry->skeleton.prefix1, 0);
- EmitPrefixAndOpcode(entry);
+ EmitPrefixAndOpcode(entry, reg, NO_REG, NO_REG);
+ reg = LowRegisterBits(reg);
if (RegStorage::RegNum(reg) >= 4) {
DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " "
<< static_cast<int>(RegStorage::RegNum(reg))
@@ -897,7 +1007,9 @@
}
void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2) {
- EmitPrefixAndOpcode(entry);
+ EmitPrefixAndOpcode(entry, reg1, NO_REG, reg2);
+ reg1 = LowRegisterBits(reg1);
+ reg2 = LowRegisterBits(reg2);
DCHECK_LT(RegStorage::RegNum(reg1), 8);
DCHECK_LT(RegStorage::RegNum(reg2), 8);
uint8_t modrm = (3 << 6) | (RegStorage::RegNum(reg1) << 3) | RegStorage::RegNum(reg2);
@@ -909,7 +1021,9 @@
void X86Mir2Lir::EmitRegRegImm(const X86EncodingMap* entry,
uint8_t reg1, uint8_t reg2, int32_t imm) {
- EmitPrefixAndOpcode(entry);
+ EmitPrefixAndOpcode(entry, reg1, NO_REG, reg2);
+ reg1 = LowRegisterBits(reg1);
+ reg2 = LowRegisterBits(reg2);
DCHECK_LT(RegStorage::RegNum(reg1), 8);
DCHECK_LT(RegStorage::RegNum(reg2), 8);
uint8_t modrm = (3 << 6) | (RegStorage::RegNum(reg1) << 3) | RegStorage::RegNum(reg2);
@@ -926,7 +1040,9 @@
void X86Mir2Lir::EmitRegMemImm(const X86EncodingMap* entry,
uint8_t reg, uint8_t base, int disp, int32_t imm) {
- EmitPrefixAndOpcode(entry);
+ EmitPrefixAndOpcode(entry, reg, NO_REG, base);
+ reg = LowRegisterBits(reg);
+ base = LowRegisterBits(base);
DCHECK(!RegStorage::IsFloat(reg));
DCHECK_LT(RegStorage::RegNum(reg), 8);
EmitModrmDisp(reg, base, disp);
@@ -941,10 +1057,11 @@
}
void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) {
- EmitPrefix(entry);
+ EmitPrefix(entry, NO_REG, NO_REG, reg);
if (RegStorage::RegNum(reg) == rs_rAX.GetRegNum() && entry->skeleton.ax_opcode != 0) {
code_buffer_.push_back(entry->skeleton.ax_opcode);
} else {
+ reg = LowRegisterBits(reg);
EmitOpcode(entry);
uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg);
code_buffer_.push_back(modrm);
@@ -953,7 +1070,8 @@
}
void X86Mir2Lir::EmitMemImm(const X86EncodingMap* entry, uint8_t base, int disp, int32_t imm) {
- EmitPrefixAndOpcode(entry);
+ EmitPrefixAndOpcode(entry, NO_REG, NO_REG, base);
+ base = LowRegisterBits(base);
EmitModrmDisp(entry->skeleton.modrm_opcode, base, disp);
DCHECK_EQ(0, entry->skeleton.ax_opcode);
EmitImm(entry, imm);
@@ -971,6 +1089,8 @@
}
void X86Mir2Lir::EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int64_t imm) {
+ EmitPrefix(entry, NO_REG, NO_REG, reg);
+ reg = LowRegisterBits(reg);
DCHECK_LT(RegStorage::RegNum(reg), 8);
code_buffer_.push_back(0xB8 + RegStorage::RegNum(reg));
switch (entry->skeleton.immediate_bytes) {
@@ -991,12 +1111,14 @@
code_buffer_.push_back((imm >> 56) & 0xFF);
break;
default:
- LOG(FATAL) << "Unsupported immediate size for EmitMovRegImm: " << static_cast<uint32_t>(entry->skeleton.immediate_bytes);
+ LOG(FATAL) << "Unsupported immediate size for EmitMovRegImm: "
+ << static_cast<uint32_t>(entry->skeleton.immediate_bytes);
}
}
void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) {
- EmitPrefix(entry);
+ EmitPrefix(entry, NO_REG, NO_REG, reg);
+ reg = LowRegisterBits(reg);
if (imm != 1) {
code_buffer_.push_back(entry->skeleton.opcode);
} else {
@@ -1023,7 +1145,8 @@
void X86Mir2Lir::EmitShiftRegCl(const X86EncodingMap* entry, uint8_t reg, uint8_t cl) {
DCHECK_EQ(cl, static_cast<uint8_t>(rs_rCX.GetReg()));
- EmitPrefix(entry);
+ EmitPrefix(entry, reg, NO_REG, NO_REG);
+ reg = LowRegisterBits(reg);
code_buffer_.push_back(entry->skeleton.opcode);
DCHECK_NE(0x0F, entry->skeleton.opcode);
DCHECK_EQ(0, entry->skeleton.extra_opcode1);
@@ -1038,7 +1161,8 @@
void X86Mir2Lir::EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base,
int displacement, uint8_t cl) {
DCHECK_EQ(cl, static_cast<uint8_t>(rs_rCX.GetReg()));
- EmitPrefix(entry);
+ EmitPrefix(entry, NO_REG, NO_REG, base);
+ base = LowRegisterBits(base);
code_buffer_.push_back(entry->skeleton.opcode);
DCHECK_NE(0x0F, entry->skeleton.opcode);
DCHECK_EQ(0, entry->skeleton.extra_opcode1);
@@ -1051,7 +1175,8 @@
void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, uint8_t base,
int displacement, int imm) {
- EmitPrefix(entry);
+ EmitPrefix(entry, NO_REG, NO_REG, base);
+ base = LowRegisterBits(base);
if (imm != 1) {
code_buffer_.push_back(entry->skeleton.opcode);
} else {
@@ -1070,7 +1195,8 @@
}
void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition) {
- EmitPrefix(entry);
+ EmitPrefix(entry, reg, NO_REG, NO_REG);
+ reg = LowRegisterBits(reg);
DCHECK_EQ(0, entry->skeleton.ax_opcode);
DCHECK_EQ(0x0F, entry->skeleton.opcode);
code_buffer_.push_back(0x0F);
@@ -1083,7 +1209,8 @@
DCHECK_EQ(entry->skeleton.immediate_bytes, 0);
}
-void X86Mir2Lir::EmitMemCond(const X86EncodingMap* entry, uint8_t base, int displacement, uint8_t condition) {
+void X86Mir2Lir::EmitMemCond(const X86EncodingMap* entry, uint8_t base, int displacement,
+ uint8_t condition) {
if (entry->skeleton.prefix1 != 0) {
code_buffer_.push_back(entry->skeleton.prefix1);
if (entry->skeleton.prefix2 != 0) {
@@ -1105,7 +1232,9 @@
void X86Mir2Lir::EmitRegRegCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2,
uint8_t condition) {
// Generate prefix and opcode without the condition
- EmitPrefixAndOpcode(entry);
+ EmitPrefixAndOpcode(entry, reg1, NO_REG, reg2);
+ reg1 = LowRegisterBits(reg1);
+ reg2 = LowRegisterBits(reg2);
// Now add the condition. The last byte of opcode is the one that receives it.
DCHECK_LE(condition, 0xF);
@@ -1127,9 +1256,12 @@
code_buffer_.push_back(modrm);
}
-void X86Mir2Lir::EmitRegMemCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, int displacement, uint8_t condition) {
+void X86Mir2Lir::EmitRegMemCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t base,
+ int displacement, uint8_t condition) {
// Generate prefix and opcode without the condition
- EmitPrefixAndOpcode(entry);
+ EmitPrefixAndOpcode(entry, reg1, NO_REG, base);
+ reg1 = LowRegisterBits(reg1);
+ base = LowRegisterBits(base);
// Now add the condition. The last byte of opcode is the one that receives it.
DCHECK_LE(condition, 0xF);
@@ -1162,8 +1294,10 @@
code_buffer_.push_back(rel & 0xFF);
} else {
DCHECK(entry->opcode == kX86JmpR);
- code_buffer_.push_back(entry->skeleton.opcode);
uint8_t reg = static_cast<uint8_t>(rel);
+ EmitPrefix(entry, NO_REG, NO_REG, reg);
+ code_buffer_.push_back(entry->skeleton.opcode);
+ reg = LowRegisterBits(reg);
DCHECK_LT(RegStorage::RegNum(reg), 8);
uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg);
code_buffer_.push_back(modrm);
@@ -1188,7 +1322,8 @@
}
void X86Mir2Lir::EmitCallMem(const X86EncodingMap* entry, uint8_t base, int disp) {
- EmitPrefixAndOpcode(entry);
+ EmitPrefixAndOpcode(entry, NO_REG, NO_REG, base);
+ base = LowRegisterBits(base);
EmitModrmDisp(entry->skeleton.modrm_opcode, base, disp);
DCHECK_EQ(0, entry->skeleton.ax_opcode);
DCHECK_EQ(0, entry->skeleton.immediate_bytes);
@@ -1229,9 +1364,12 @@
reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(base_or_table));
disp = tab_rec->offset;
}
- EmitPrefix(entry);
- DCHECK_LT(RegStorage::RegNum(reg), 8);
if (entry->opcode == kX86PcRelLoadRA) {
+ EmitPrefix(entry, reg, index, base_or_table);
+ reg = LowRegisterBits(reg);
+ base_or_table = LowRegisterBits(base_or_table);
+ index = LowRegisterBits(index);
+ DCHECK_LT(RegStorage::RegNum(reg), 8);
code_buffer_.push_back(entry->skeleton.opcode);
DCHECK_NE(0x0F, entry->skeleton.opcode);
DCHECK_EQ(0, entry->skeleton.extra_opcode1);
@@ -1246,6 +1384,7 @@
code_buffer_.push_back(sib);
DCHECK_EQ(0, entry->skeleton.immediate_bytes);
} else {
+ DCHECK_LT(RegStorage::RegNum(reg), 8);
code_buffer_.push_back(entry->skeleton.opcode + RegStorage::RegNum(reg));
}
code_buffer_.push_back(disp & 0xFF);
@@ -1258,6 +1397,8 @@
void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, uint8_t reg, int offset) {
DCHECK(entry->opcode == kX86StartOfMethod) << entry->name;
+ EmitPrefix(entry, reg, NO_REG, NO_REG);
+ reg = LowRegisterBits(reg);
code_buffer_.push_back(0xE8); // call +0
code_buffer_.push_back(0);
code_buffer_.push_back(0);
@@ -1532,7 +1673,8 @@
EmitRegRegCond(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
break;
case kRegMemCond: // lir operands - 0: reg, 1: reg, displacement, 3: condition
- EmitRegMemCond(entry, lir->operands[0], lir->operands[1], lir->operands[2], lir->operands[3]);
+ EmitRegMemCond(entry, lir->operands[0], lir->operands[1], lir->operands[2],
+ lir->operands[3]);
break;
case kJmp: // lir operands - 0: rel
if (entry->opcode == kX86JmpT) {
@@ -1566,7 +1708,7 @@
EmitPcRel(entry, lir->operands[0], lir->operands[1], lir->operands[2],
lir->operands[3], lir->operands[4]);
break;
- case kMacro:
+ case kMacro: // lir operands - 0: reg
EmitMacro(entry, lir->operands[0], lir->offset);
break;
default:
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index fcc846f..d66790d 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -331,10 +331,16 @@
std::vector<uint8_t>* ReturnCallFrameInformation();
protected:
- size_t ComputeSize(const X86EncodingMap* entry, int base, int displacement, bool has_sib);
+ size_t ComputeSize(const X86EncodingMap* entry, int base, int displacement,
+ int reg_r, int reg_x, bool has_sib);
+ uint8_t LowRegisterBits(uint8_t reg);
+ bool NeedsRex(uint8_t reg);
void EmitPrefix(const X86EncodingMap* entry);
+ void EmitPrefix(const X86EncodingMap* entry, uint8_t reg_r, uint8_t reg_x, uint8_t reg_b);
void EmitOpcode(const X86EncodingMap* entry);
void EmitPrefixAndOpcode(const X86EncodingMap* entry);
+ void EmitPrefixAndOpcode(const X86EncodingMap* entry,
+ uint8_t reg_r, uint8_t reg_x, uint8_t reg_b);
void EmitDisp(uint8_t base, int disp);
void EmitModrmThread(uint8_t reg_or_opcode);
void EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int disp);
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 19ad2f8..8b34168 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -38,7 +38,7 @@
static const RegStorage core_regs_arr_64q[] = {
rs_r0q, rs_r1q, rs_r2q, rs_r3q, rs_rX86_SP_64, rs_r5q, rs_r6q, rs_r7q,
#ifdef TARGET_REX_SUPPORT
- rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
+ rs_r8q, rs_r9q, rs_r10q, rs_r11q, rs_r12q, rs_r13q, rs_r14q, rs_r15q
#endif
};
static const RegStorage sp_regs_arr_32[] = {
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 24c64cd..bb8df89 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -175,6 +175,16 @@
fr5 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 5,
fr6 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 6,
fr7 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 7,
+#ifdef TARGET_REX_SUPPORT
+ fr8 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 8,
+ fr9 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 9,
+ fr10 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 10,
+ fr11 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 11,
+ fr12 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 12,
+ fr13 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 13,
+ fr14 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 14,
+ fr15 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 15,
+#endif
// xmm registers, double precision aliases.
dr0 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 0,
@@ -185,8 +195,18 @@
dr5 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 5,
dr6 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 6,
dr7 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 7,
+#ifdef TARGET_REX_SUPPORT
+ dr8 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 8,
+ dr9 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 9,
+ dr10 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
+ dr11 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 11,
+ dr12 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12,
+ dr13 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 13,
+ dr14 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
+ dr15 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15,
+#endif
- // xmm registers aliases.
+ // xmm registers, quad precision aliases
xr0 = RegStorage::k128BitSolo | 0,
xr1 = RegStorage::k128BitSolo | 1,
xr2 = RegStorage::k128BitSolo | 2,
@@ -195,6 +215,16 @@
xr5 = RegStorage::k128BitSolo | 5,
xr6 = RegStorage::k128BitSolo | 6,
xr7 = RegStorage::k128BitSolo | 7,
+#ifdef TARGET_REX_SUPPORT
+ xr8 = RegStorage::k128BitSolo | 8,
+ xr9 = RegStorage::k128BitSolo | 9,
+ xr10 = RegStorage::k128BitSolo | 10,
+ xr11 = RegStorage::k128BitSolo | 11,
+ xr12 = RegStorage::k128BitSolo | 12,
+ xr13 = RegStorage::k128BitSolo | 13,
+ xr14 = RegStorage::k128BitSolo | 14,
+ xr15 = RegStorage::k128BitSolo | 15,
+#endif
// TODO: as needed, add 256, 512 and 1024-bit xmm views.
};
@@ -251,6 +281,16 @@
constexpr RegStorage rs_fr5(RegStorage::kValid | fr5);
constexpr RegStorage rs_fr6(RegStorage::kValid | fr6);
constexpr RegStorage rs_fr7(RegStorage::kValid | fr7);
+#ifdef TARGET_REX_SUPPORT
+constexpr RegStorage rs_fr8(RegStorage::kValid | fr8);
+constexpr RegStorage rs_fr9(RegStorage::kValid | fr9);
+constexpr RegStorage rs_fr10(RegStorage::kValid | fr10);
+constexpr RegStorage rs_fr11(RegStorage::kValid | fr11);
+constexpr RegStorage rs_fr12(RegStorage::kValid | fr12);
+constexpr RegStorage rs_fr13(RegStorage::kValid | fr13);
+constexpr RegStorage rs_fr14(RegStorage::kValid | fr14);
+constexpr RegStorage rs_fr15(RegStorage::kValid | fr15);
+#endif
constexpr RegStorage rs_dr0(RegStorage::kValid | dr0);
constexpr RegStorage rs_dr1(RegStorage::kValid | dr1);
@@ -260,6 +300,16 @@
constexpr RegStorage rs_dr5(RegStorage::kValid | dr5);
constexpr RegStorage rs_dr6(RegStorage::kValid | dr6);
constexpr RegStorage rs_dr7(RegStorage::kValid | dr7);
+#ifdef TARGET_REX_SUPPORT
+constexpr RegStorage rs_dr8(RegStorage::kValid | dr8);
+constexpr RegStorage rs_dr9(RegStorage::kValid | dr9);
+constexpr RegStorage rs_dr10(RegStorage::kValid | dr10);
+constexpr RegStorage rs_dr11(RegStorage::kValid | dr11);
+constexpr RegStorage rs_dr12(RegStorage::kValid | dr12);
+constexpr RegStorage rs_dr13(RegStorage::kValid | dr13);
+constexpr RegStorage rs_dr14(RegStorage::kValid | dr14);
+constexpr RegStorage rs_dr15(RegStorage::kValid | dr15);
+#endif
constexpr RegStorage rs_xr0(RegStorage::kValid | xr0);
constexpr RegStorage rs_xr1(RegStorage::kValid | xr1);
@@ -269,6 +319,16 @@
constexpr RegStorage rs_xr5(RegStorage::kValid | xr5);
constexpr RegStorage rs_xr6(RegStorage::kValid | xr6);
constexpr RegStorage rs_xr7(RegStorage::kValid | xr7);
+#ifdef TARGET_REX_SUPPORT
+constexpr RegStorage rs_xr8(RegStorage::kValid | xr8);
+constexpr RegStorage rs_xr9(RegStorage::kValid | xr9);
+constexpr RegStorage rs_xr10(RegStorage::kValid | xr10);
+constexpr RegStorage rs_xr11(RegStorage::kValid | xr11);
+constexpr RegStorage rs_xr12(RegStorage::kValid | xr12);
+constexpr RegStorage rs_xr13(RegStorage::kValid | xr13);
+constexpr RegStorage rs_xr14(RegStorage::kValid | xr14);
+constexpr RegStorage rs_xr15(RegStorage::kValid | xr15);
+#endif
extern X86NativeRegisterPool rX86_ARG0;
extern X86NativeRegisterPool rX86_ARG1;
@@ -628,6 +688,15 @@
// 64 Bit Operand Size
#define REX_W 0x48
// Extension of the ModR/M reg field
+#define REX_R 0x44
+// Extension of the SIB index field
+#define REX_X 0x42
+// Extension of the ModR/M r/m field, SIB base field, or Opcode reg field
+#define REX_B 0x41
+// Mask extracting the least 3 bits of r0..r15
+#define kRegNumMask32 0x07
+// Value indicating that base or reg is not used
+#define NO_REG 0
#define IS_SIMM8(v) ((-128 <= (v)) && ((v) <= 127))
#define IS_SIMM16(v) ((-32768 <= (v)) && ((v) <= 32767))