Added thin-lock fast path for monitor-enter/exit on x86.
Change-Id: Iba187ae1acde6e341ae510d4b47f59e6984fc354
diff --git a/src/compiler/codegen/x86/Assemble.cc b/src/compiler/codegen/x86/Assemble.cc
index a245660..0c5d3cf 100644
--- a/src/compiler/codegen/x86/Assemble.cc
+++ b/src/compiler/codegen/x86/Assemble.cc
@@ -298,6 +298,14 @@
EXT_0F_ENCODING_MAP(Imul16, 0x66, 0xAF, REG_DEF0 | SETS_CCODES),
EXT_0F_ENCODING_MAP(Imul32, 0x00, 0xAF, REG_DEF0 | SETS_CCODES),
+
+ { kX86CmpxchgRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE01 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "!0r,!1r" },
+ { kX86CmpxchgMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "[!0r+!1d],!2r" },
+ { kX86CmpxchgAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" },
+ { kX86LockCmpxchgRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE01 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Lock Cmpxchg", "!0r,!1r" },
+ { kX86LockCmpxchgMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Lock Cmpxchg", "[!0r+!1d],!2r" },
+ { kX86LockCmpxchgAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Lock Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" },
+
EXT_0F_ENCODING_MAP(Movzx8, 0x00, 0xB6, REG_DEF0),
EXT_0F_ENCODING_MAP(Movzx16, 0x00, 0xB7, REG_DEF0),
EXT_0F_ENCODING_MAP(Movsx8, 0x00, 0xBE, REG_DEF0),
diff --git a/src/compiler/codegen/x86/FP/X86FP.cc b/src/compiler/codegen/x86/FP/X86FP.cc
index 6003465..bb648bf 100644
--- a/src/compiler/codegen/x86/FP/X86FP.cc
+++ b/src/compiler/codegen/x86/FP/X86FP.cc
@@ -49,7 +49,7 @@
rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg);
rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
tempReg = oatAllocTemp(cUnit);
- loadConstant(cUnit, tempReg, 0x80000000);
+ loadConstantNoClobber(cUnit, tempReg, 0x80000000);
int rDest = rlResult.lowReg;
int rSrc1 = rlSrc1.lowReg;
if (rDest == rSrc1) {
@@ -114,7 +114,7 @@
rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg);
rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
tempReg = oatAllocTemp(cUnit);
- loadConstant(cUnit, tempReg, 0x80000000);
+ loadConstantNoClobber(cUnit, tempReg, 0x80000000);
int rDest = S2D(rlResult.lowReg, rlResult.highReg);
int rSrc1 = S2D(rlSrc1.lowReg, rlSrc1.highReg);
if (rDest == rSrc1) {
diff --git a/src/compiler/codegen/x86/X86/Gen.cc b/src/compiler/codegen/x86/X86/Gen.cc
index 894202e..69aaaad 100644
--- a/src/compiler/codegen/x86/X86/Gen.cc
+++ b/src/compiler/codegen/x86/X86/Gen.cc
@@ -211,30 +211,43 @@
LIR* genNullCheck(CompilationUnit* cUnit, int sReg, int mReg, int optFlags);
void callRuntimeHelperReg(CompilationUnit* cUnit, int helperOffset, int arg0);
-/*
- * TODO: implement fast path to short-circuit thin-lock case
- */
void genMonitorEnter(CompilationUnit* cUnit, int optFlags, RegLocation rlSrc)
{
oatFlushAllRegs(cUnit);
- loadValueDirectFixed(cUnit, rlSrc, rARG0); // Get obj
+ loadValueDirectFixed(cUnit, rlSrc, rCX); // Get obj
oatLockCallTemps(cUnit); // Prepare for explicit register usage
- genNullCheck(cUnit, rlSrc.sRegLow, rARG0, optFlags);
- // Go expensive route - artLockObjectFromCode(self, obj);
- callRuntimeHelperReg(cUnit, ENTRYPOINT_OFFSET(pLockObjectFromCode), rARG0);
+ genNullCheck(cUnit, rlSrc.sRegLow, rCX, optFlags);
+ // If lock is unheld, try to grab it quickly with compare and exchange
+ // TODO: copy and clear hash state?
+ newLIR2(cUnit, kX86Mov32RT, rDX, Thread::ThinLockIdOffset().Int32Value());
+ newLIR2(cUnit, kX86Sal32RI, rDX, LW_LOCK_OWNER_SHIFT);
+ newLIR2(cUnit, kX86Xor32RR, rAX, rAX);
+ newLIR3(cUnit, kX86LockCmpxchgMR, rCX, Object::MonitorOffset().Int32Value(), rDX);
+ LIR* branch = newLIR2(cUnit, kX86Jcc8, 0, kX86CondEq);
+ // If lock is held, go the expensive route - artLockObjectFromCode(self, obj);
+ callRuntimeHelperReg(cUnit, ENTRYPOINT_OFFSET(pLockObjectFromCode), rCX);
+ branch->target = newLIR0(cUnit, kPseudoTargetLabel);
}
-/*
- * TODO: implement fast path to short-circuit thin-lock case
- */
void genMonitorExit(CompilationUnit* cUnit, int optFlags, RegLocation rlSrc)
{
oatFlushAllRegs(cUnit);
- loadValueDirectFixed(cUnit, rlSrc, rARG0); // Get obj
+ loadValueDirectFixed(cUnit, rlSrc, rAX); // Get obj
oatLockCallTemps(cUnit); // Prepare for explicit register usage
- genNullCheck(cUnit, rlSrc.sRegLow, rARG0, optFlags);
- // Go expensive route - UnlockObjectFromCode(obj);
- callRuntimeHelperReg(cUnit, ENTRYPOINT_OFFSET(pUnlockObjectFromCode), rARG0);
+ genNullCheck(cUnit, rlSrc.sRegLow, rAX, optFlags);
+ // If lock is held by the current thread, clear it to quickly release it
+ // TODO: clear hash state?
+ newLIR2(cUnit, kX86Mov32RT, rDX, Thread::ThinLockIdOffset().Int32Value());
+ newLIR2(cUnit, kX86Sal32RI, rDX, LW_LOCK_OWNER_SHIFT);
+ newLIR3(cUnit, kX86Mov32RM, rCX, rAX, Object::MonitorOffset().Int32Value());
+ opRegReg(cUnit, kOpSub, rCX, rDX);
+ LIR* branch = newLIR2(cUnit, kX86Jcc8, 0, kX86CondNe);
+ newLIR3(cUnit, kX86Mov32MR, rAX, Object::MonitorOffset().Int32Value(), rCX);
+ LIR* branch2 = newLIR1(cUnit, kX86Jmp8, 0);
+ branch->target = newLIR0(cUnit, kPseudoTargetLabel);
+ // Otherwise, go the expensive route - UnlockObjectFromCode(obj);
+ callRuntimeHelperReg(cUnit, ENTRYPOINT_OFFSET(pUnlockObjectFromCode), rAX);
+ branch2->target = newLIR0(cUnit, kPseudoTargetLabel);
}
/*
diff --git a/src/compiler/codegen/x86/X86LIR.h b/src/compiler/codegen/x86/X86LIR.h
index 5bf4dd9..72c8c03 100644
--- a/src/compiler/codegen/x86/X86LIR.h
+++ b/src/compiler/codegen/x86/X86LIR.h
@@ -445,6 +445,8 @@
kX86Mfence, // memory barrier
Binary0fOpCode(kX86Imul16), // 16bit multiply
Binary0fOpCode(kX86Imul32), // 32bit multiply
+ kX86CmpxchgRR, kX86CmpxchgMR, kX86CmpxchgAR,// compare and exchange
+ kX86LockCmpxchgRR, kX86LockCmpxchgMR, kX86LockCmpxchgAR,// locked compare and exchange
Binary0fOpCode(kX86Movzx8), // zero-extend 8-bit value
Binary0fOpCode(kX86Movzx16), // zero-extend 16-bit value
Binary0fOpCode(kX86Movsx8), // sign-extend 8-bit value
diff --git a/src/disassembler_x86.cc b/src/disassembler_x86.cc
index f70289f..646e978 100644
--- a/src/disassembler_x86.cc
+++ b/src/disassembler_x86.cc
@@ -511,6 +511,7 @@
no_ops = true;
}
break;
+ case 0xB1: opcode << "cmpxchg"; has_modrm = true; store = true; break;
case 0xB6: opcode << "movzxb"; has_modrm = true; load = true; break;
case 0xB7: opcode << "movzxw"; has_modrm = true; load = true; break;
case 0xBE: opcode << "movsxb"; has_modrm = true; load = true; break;