More x86 fixes for floating point operations.
- Fixed encoding for cvtss2sd and cvtsd2ss (they were swapped).
- Implemented neg-float and neg-double in assembly.
- Fixed float-to-int and double-to-int to return 0 if given NaN.
- Created helpers for rem-float and rem-double.
Change-Id: I456fcc1252a63dbb96c8280562e601f4efa3a5df
diff --git a/src/compiler/codegen/x86/Assemble.cc b/src/compiler/codegen/x86/Assemble.cc
index b9605cc..671e728 100644
--- a/src/compiler/codegen/x86/Assemble.cc
+++ b/src/compiler/codegen/x86/Assemble.cc
@@ -262,8 +262,8 @@
EXT_0F_ENCODING_MAP(Addss, 0xF3, 0x58),
EXT_0F_ENCODING_MAP(Mulsd, 0xF2, 0x59),
EXT_0F_ENCODING_MAP(Mulss, 0xF3, 0x59),
- EXT_0F_ENCODING_MAP(Cvtss2sd, 0xF2, 0x5A),
- EXT_0F_ENCODING_MAP(Cvtsd2ss, 0xF3, 0x5A),
+ EXT_0F_ENCODING_MAP(Cvtsd2ss, 0xF2, 0x5A),
+ EXT_0F_ENCODING_MAP(Cvtss2sd, 0xF3, 0x5A),
EXT_0F_ENCODING_MAP(Subsd, 0xF2, 0x5C),
EXT_0F_ENCODING_MAP(Subss, 0xF3, 0x5C),
EXT_0F_ENCODING_MAP(Divsd, 0xF2, 0x5E),
diff --git a/src/compiler/codegen/x86/FP/X86FP.cc b/src/compiler/codegen/x86/FP/X86FP.cc
index 24cd7d3..e6b47d2 100644
--- a/src/compiler/codegen/x86/FP/X86FP.cc
+++ b/src/compiler/codegen/x86/FP/X86FP.cc
@@ -44,8 +44,12 @@
op = kX86MulssRR;
break;
case Instruction::NEG_FLOAT:
- UNIMPLEMENTED(WARNING) << "inline fneg"; // pxor xmm, [0x80000000]
- // fall-through
+ rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg);
+ rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
+ newLIR2(cUnit, kX86XorpsRR, rlResult.lowReg, rlResult.lowReg);
+ newLIR2(cUnit, kX86SubssRR, rlResult.lowReg, rlSrc1.lowReg);
+ storeValue(cUnit, rlDest, rlResult);
+ return false;
case Instruction::REM_FLOAT_2ADDR:
case Instruction::REM_FLOAT: {
return genArithOpFloatPortable(cUnit, mir, rlDest, rlSrc1, rlSrc2);
@@ -91,6 +95,12 @@
op = kX86MulsdRR;
break;
case Instruction::NEG_DOUBLE:
+ rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg);
+ rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
+ newLIR2(cUnit, kX86XorpsRR, rlResult.lowReg, rlResult.lowReg);
+ newLIR2(cUnit, kX86SubsdRR, rlResult.lowReg, rlSrc1.lowReg);
+ storeValueWide(cUnit, rlDest, rlResult);
+ return false;
case Instruction::REM_DOUBLE_2ADDR:
case Instruction::REM_DOUBLE: {
return genArithOpDoublePortable(cUnit, mir, rlDest, rlSrc1, rlSrc2);
@@ -124,9 +134,7 @@
RegLocation rlDest;
X86OpCode op = kX86Nop;
int srcReg;
- int tempReg;
RegLocation rlResult;
- LIR* branch = NULL;
switch (opcode) {
case Instruction::INT_TO_FLOAT:
longSrc = false;
@@ -152,40 +160,52 @@
rcSrc = kCoreReg;
op = kX86Cvtsi2sdRR;
break;
- case Instruction::FLOAT_TO_INT:
+ case Instruction::FLOAT_TO_INT: {
rlSrc = oatGetSrc(cUnit, mir, 0);
rlSrc = loadValue(cUnit, rlSrc, kFPReg);
srcReg = rlSrc.lowReg;
rlDest = oatGetDest(cUnit, mir, 0);
oatClobberSReg(cUnit, rlDest.sRegLow);
rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
- tempReg = oatAllocTempFloat(cUnit);
+ int tempReg = oatAllocTempFloat(cUnit);
loadConstant(cUnit, rlResult.lowReg, 0x7fffffff);
newLIR2(cUnit, kX86Cvtsi2ssRR, tempReg, rlResult.lowReg);
newLIR2(cUnit, kX86ComissRR, srcReg, tempReg);
- branch = newLIR2(cUnit, kX86Jcc8, 0, kX86CondA);
- newLIR2(cUnit, kX86Cvtss2siRR, rlResult.lowReg, srcReg);
- branch->target = newLIR0(cUnit, kPseudoTargetLabel);
+ LIR* branchPosOverflow = newLIR2(cUnit, kX86Jcc8, 0, kX86CondA);
+ LIR* branchNaN = newLIR2(cUnit, kX86Jcc8, 0, kX86CondP);
+ newLIR2(cUnit, kX86Cvttss2siRR, rlResult.lowReg, srcReg);
+ LIR* branchNormal = newLIR1(cUnit, kX86Jmp8, 0);
+ branchNaN->target = newLIR0(cUnit, kPseudoTargetLabel);
+ newLIR2(cUnit, kX86Xor32RR, rlResult.lowReg, rlResult.lowReg);
+ branchPosOverflow->target = newLIR0(cUnit, kPseudoTargetLabel);
+ branchNormal->target = newLIR0(cUnit, kPseudoTargetLabel);
storeValue(cUnit, rlDest, rlResult);
return false;
- case Instruction::DOUBLE_TO_INT:
+ }
+ case Instruction::DOUBLE_TO_INT: {
rlSrc = oatGetSrcWide(cUnit, mir, 0, 1);
rlSrc = loadValueWide(cUnit, rlSrc, kFPReg);
srcReg = rlSrc.lowReg;
rlDest = oatGetDest(cUnit, mir, 0);
oatClobberSReg(cUnit, rlDest.sRegLow);
rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
- tempReg = oatAllocTempDouble(cUnit);
+ int tempReg = oatAllocTempDouble(cUnit);
loadConstant(cUnit, rlResult.lowReg, 0x7fffffff);
newLIR2(cUnit, kX86Cvtsi2sdRR, tempReg, rlResult.lowReg);
newLIR2(cUnit, kX86ComisdRR, srcReg, tempReg);
- branch = newLIR2(cUnit, kX86Jcc8, 0, kX86CondA);
- newLIR2(cUnit, kX86Cvtsd2siRR, rlResult.lowReg, srcReg);
- branch->target = newLIR0(cUnit, kPseudoTargetLabel);
+ LIR* branchPosOverflow = newLIR2(cUnit, kX86Jcc8, 0, kX86CondA);
+ LIR* branchNaN = newLIR2(cUnit, kX86Jcc8, 0, kX86CondP);
+ newLIR2(cUnit, kX86Cvttsd2siRR, rlResult.lowReg, srcReg);
+ LIR* branchNormal = newLIR1(cUnit, kX86Jmp8, 0);
+ branchNaN->target = newLIR0(cUnit, kPseudoTargetLabel);
+ newLIR2(cUnit, kX86Xor32RR, rlResult.lowReg, rlResult.lowReg);
+ branchPosOverflow->target = newLIR0(cUnit, kPseudoTargetLabel);
+ branchNormal->target = newLIR0(cUnit, kPseudoTargetLabel);
storeValue(cUnit, rlDest, rlResult);
return false;
+ }
case Instruction::LONG_TO_DOUBLE:
case Instruction::LONG_TO_FLOAT:
// These can be implemented inline by using memory as a 64-bit source.
diff --git a/src/compiler/codegen/x86/X86LIR.h b/src/compiler/codegen/x86/X86LIR.h
index 36e459c..3ec1112 100644
--- a/src/compiler/codegen/x86/X86LIR.h
+++ b/src/compiler/codegen/x86/X86LIR.h
@@ -427,8 +427,8 @@
Binary0fOpCode(kX86Addss), // float add
Binary0fOpCode(kX86Mulsd), // double multiply
Binary0fOpCode(kX86Mulss), // float multiply
- Binary0fOpCode(kX86Cvtss2sd), // float to double
Binary0fOpCode(kX86Cvtsd2ss), // double to float
+ Binary0fOpCode(kX86Cvtss2sd), // float to double
Binary0fOpCode(kX86Subsd), // double subtract
Binary0fOpCode(kX86Subss), // float subtract
Binary0fOpCode(kX86Divsd), // double divide
diff --git a/src/oat/runtime/x86/oat_support_entrypoints_x86.cc b/src/oat/runtime/x86/oat_support_entrypoints_x86.cc
index 605024e..a28a898 100644
--- a/src/oat/runtime/x86/oat_support_entrypoints_x86.cc
+++ b/src/oat/runtime/x86/oat_support_entrypoints_x86.cc
@@ -67,6 +67,8 @@
extern "C" void art_unlock_object_from_code(void*);
// Math entrypoints.
+extern "C" double art_fmod_from_code(double, double);
+extern "C" float art_fmodf_from_code(float, float);
extern "C" double art_l2d_from_code(int64_t);
extern "C" float art_l2f_from_code(int64_t);
extern "C" int64_t art_d2l_from_code(double);
@@ -74,7 +76,7 @@
extern "C" int32_t art_idivmod_from_code(int32_t, int32_t);
extern "C" int64_t art_ldiv_from_code(int64_t, int64_t);
extern "C" int64_t art_ldivmod_from_code(int64_t, int64_t);
-extern "C" int64_t art_lmul_from_code(int64_t a, int64_t b);
+extern "C" int64_t art_lmul_from_code(int64_t, int64_t);
extern "C" uint64_t art_lshl_from_code(uint64_t, uint32_t);
extern "C" uint64_t art_lshr_from_code(uint64_t, uint32_t);
extern "C" uint64_t art_lushr_from_code(uint64_t, uint32_t);
@@ -168,13 +170,13 @@
//points->pDmul = NULL; // Not needed on x86.
//points->pDsub = NULL; // Not needed on x86.
//points->pF2d = NULL;
- //points->pFmod = NULL;
+ points->pFmod = art_fmod_from_code;
//points->pI2d = NULL;
points->pL2d = art_l2d_from_code;
//points->pD2f = NULL;
//points->pFadd = NULL; // Not needed on x86.
//points->pFdiv = NULL; // Not needed on x86.
- //points->pFmodf = NULL;
+ points->pFmodf = art_fmodf_from_code;
//points->pFmul = NULL; // Not needed on x86.
//points->pFsub = NULL; // Not needed on x86.
//points->pI2f = NULL;
diff --git a/src/oat/runtime/x86/runtime_support_x86.S b/src/oat/runtime/x86/runtime_support_x86.S
index 028d7ec..d37fa5c 100644
--- a/src/oat/runtime/x86/runtime_support_x86.S
+++ b/src/oat/runtime/x86/runtime_support_x86.S
@@ -395,12 +395,34 @@
NO_ARG_DOWNCALL art_test_suspend, artTestSuspendFromCode, ret
+DEFINE_FUNCTION art_fmod_from_code
+ movl %ebx, -4(%esp) // put hi arg2 into memory
+ movl %edx, -8(%esp) // put lo arg2 into memory
+ fldl -8(%esp) // push arg2 onto fp stack
+ movl %ecx, -4(%esp) // put hi arg1 into memory
+ movl %eax, -8(%esp) // put lo arg1 into memory
+ fldl -8(%esp) // push arg1 onto fp stack
+ fprem1 // calculate IEEE remainder
+ fstpl -8(%esp) // pop return value off fp stack
+ movsd -8(%esp), %xmm0 // place into %xmm0
+ ret
+
+DEFINE_FUNCTION art_fmodf_from_code
+ movl %ecx, -4(%esp) // put arg2 into memory
+ fld -4(%esp) // push arg2 onto fp stack
+ movl %eax, -4(%esp) // put arg1 into memory
+ fld -4(%esp) // push arg1 onto fp stack
+ fprem1 // calculate IEEE remainder
+ fstp -4(%esp) // pop return value off fp stack
+ movss -4(%esp), %xmm0 // place into %xmm0
+ ret
+
DEFINE_FUNCTION art_l2d_from_code
pushl %eax // alignment padding
pushl %ecx // pass arg2
pushl %eax // pass arg1
- call SYMBOL(art_l2d) // (jlong a, Thread*, SP)
- fstpl (%esp) // get return value
+ call SYMBOL(art_l2d) // (jlong a, Thread*, SP)
+ fstpl (%esp) // pop return value off fp stack
movsd (%esp), %xmm0 // place into %xmm0
addl LITERAL(12), %esp // pop arguments
ret
@@ -409,8 +431,8 @@
pushl %eax // alignment padding
pushl %ecx // pass arg2
pushl %eax // pass arg1
- call SYMBOL(art_l2f) // (jlong a, Thread*, SP)
- fstp (%esp) // get return value
+ call SYMBOL(art_l2f) // (jlong a, Thread*, SP)
+ fstp (%esp) // pop return value off fp stack
movss (%esp), %xmm0 // place into %xmm0
addl LITERAL(12), %esp // pop arguments
ret
@@ -419,14 +441,14 @@
pushl %eax // alignment padding
pushl %ecx // pass arg2
pushl %eax // pass arg1
- call SYMBOL(art_d2l) // (jdouble a, Thread*, SP)
+ call SYMBOL(art_d2l) // (jdouble a, Thread*, SP)
addl LITERAL(12), %esp // pop arguments
ret
DEFINE_FUNCTION art_f2l_from_code
subl LITERAL(8), %esp // alignment padding
pushl %eax // pass arg1
- call SYMBOL(art_f2l) // (jfloat a, Thread*, SP)
+ call SYMBOL(art_f2l) // (jfloat a, Thread*, SP)
addl LITERAL(12), %esp // pop arguments
ret