Merge "More x86 fixes for floating point operations." into ics-mr1-plus-art
diff --git a/src/compiler/codegen/x86/Assemble.cc b/src/compiler/codegen/x86/Assemble.cc
index b9605cc..671e728 100644
--- a/src/compiler/codegen/x86/Assemble.cc
+++ b/src/compiler/codegen/x86/Assemble.cc
@@ -262,8 +262,8 @@
   EXT_0F_ENCODING_MAP(Addss,     0xF3, 0x58),
   EXT_0F_ENCODING_MAP(Mulsd,     0xF2, 0x59),
   EXT_0F_ENCODING_MAP(Mulss,     0xF3, 0x59),
-  EXT_0F_ENCODING_MAP(Cvtss2sd,  0xF2, 0x5A),
-  EXT_0F_ENCODING_MAP(Cvtsd2ss,  0xF3, 0x5A),
+  EXT_0F_ENCODING_MAP(Cvtsd2ss,  0xF2, 0x5A),
+  EXT_0F_ENCODING_MAP(Cvtss2sd,  0xF3, 0x5A),
   EXT_0F_ENCODING_MAP(Subsd,     0xF2, 0x5C),
   EXT_0F_ENCODING_MAP(Subss,     0xF3, 0x5C),
   EXT_0F_ENCODING_MAP(Divsd,     0xF2, 0x5E),
diff --git a/src/compiler/codegen/x86/FP/X86FP.cc b/src/compiler/codegen/x86/FP/X86FP.cc
index 24cd7d3..e6b47d2 100644
--- a/src/compiler/codegen/x86/FP/X86FP.cc
+++ b/src/compiler/codegen/x86/FP/X86FP.cc
@@ -44,8 +44,12 @@
       op = kX86MulssRR;
       break;
     case Instruction::NEG_FLOAT:
-      UNIMPLEMENTED(WARNING) << "inline fneg"; // pxor xmm, [0x80000000]
-                                                             // fall-through
+      rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg);
+      rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
+      newLIR2(cUnit, kX86XorpsRR, rlResult.lowReg, rlResult.lowReg);
+      newLIR2(cUnit, kX86SubssRR, rlResult.lowReg, rlSrc1.lowReg);
+      storeValue(cUnit, rlDest, rlResult);
+      return false;
     case Instruction::REM_FLOAT_2ADDR:
     case Instruction::REM_FLOAT: {
       return genArithOpFloatPortable(cUnit, mir, rlDest, rlSrc1, rlSrc2);
@@ -91,6 +95,12 @@
       op = kX86MulsdRR;
       break;
     case Instruction::NEG_DOUBLE:
+      rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg);
+      rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
+      newLIR2(cUnit, kX86XorpsRR, rlResult.lowReg, rlResult.lowReg);
+      newLIR2(cUnit, kX86SubsdRR, rlResult.lowReg, rlSrc1.lowReg);
+      storeValueWide(cUnit, rlDest, rlResult);
+      return false;
     case Instruction::REM_DOUBLE_2ADDR:
     case Instruction::REM_DOUBLE: {
       return genArithOpDoublePortable(cUnit, mir, rlDest, rlSrc1, rlSrc2);
@@ -124,9 +134,7 @@
   RegLocation rlDest;
   X86OpCode op = kX86Nop;
   int srcReg;
-  int tempReg;
   RegLocation rlResult;
-  LIR* branch = NULL;
   switch (opcode) {
     case Instruction::INT_TO_FLOAT:
       longSrc = false;
@@ -152,40 +160,52 @@
       rcSrc = kCoreReg;
       op = kX86Cvtsi2sdRR;
       break;
-    case Instruction::FLOAT_TO_INT:
+    case Instruction::FLOAT_TO_INT: {
       rlSrc = oatGetSrc(cUnit, mir, 0);
       rlSrc = loadValue(cUnit, rlSrc, kFPReg);
       srcReg = rlSrc.lowReg;
       rlDest = oatGetDest(cUnit, mir, 0);
       oatClobberSReg(cUnit, rlDest.sRegLow);
       rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
-      tempReg = oatAllocTempFloat(cUnit);
+      int tempReg = oatAllocTempFloat(cUnit);
 
       loadConstant(cUnit, rlResult.lowReg, 0x7fffffff);
       newLIR2(cUnit, kX86Cvtsi2ssRR, tempReg, rlResult.lowReg);
       newLIR2(cUnit, kX86ComissRR, srcReg, tempReg);
-      branch = newLIR2(cUnit, kX86Jcc8, 0, kX86CondA);
-      newLIR2(cUnit, kX86Cvtss2siRR, rlResult.lowReg, srcReg);
-      branch->target = newLIR0(cUnit, kPseudoTargetLabel);
+      LIR* branchPosOverflow = newLIR2(cUnit, kX86Jcc8, 0, kX86CondA);
+      LIR* branchNaN = newLIR2(cUnit, kX86Jcc8, 0, kX86CondP);
+      newLIR2(cUnit, kX86Cvttss2siRR, rlResult.lowReg, srcReg);
+      LIR* branchNormal = newLIR1(cUnit, kX86Jmp8, 0);
+      branchNaN->target = newLIR0(cUnit, kPseudoTargetLabel);
+      newLIR2(cUnit, kX86Xor32RR, rlResult.lowReg, rlResult.lowReg);
+      branchPosOverflow->target = newLIR0(cUnit, kPseudoTargetLabel);
+      branchNormal->target = newLIR0(cUnit, kPseudoTargetLabel);
       storeValue(cUnit, rlDest, rlResult);
       return false;
-    case Instruction::DOUBLE_TO_INT:
+    }
+    case Instruction::DOUBLE_TO_INT: {
       rlSrc = oatGetSrcWide(cUnit, mir, 0, 1);
       rlSrc = loadValueWide(cUnit, rlSrc, kFPReg);
       srcReg = rlSrc.lowReg;
       rlDest = oatGetDest(cUnit, mir, 0);
       oatClobberSReg(cUnit, rlDest.sRegLow);
       rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
-      tempReg = oatAllocTempDouble(cUnit);
+      int tempReg = oatAllocTempDouble(cUnit);
 
       loadConstant(cUnit, rlResult.lowReg, 0x7fffffff);
       newLIR2(cUnit, kX86Cvtsi2sdRR, tempReg, rlResult.lowReg);
       newLIR2(cUnit, kX86ComisdRR, srcReg, tempReg);
-      branch = newLIR2(cUnit, kX86Jcc8, 0, kX86CondA);
-      newLIR2(cUnit, kX86Cvtsd2siRR, rlResult.lowReg, srcReg);
-      branch->target = newLIR0(cUnit, kPseudoTargetLabel);
+      LIR* branchPosOverflow = newLIR2(cUnit, kX86Jcc8, 0, kX86CondA);
+      LIR* branchNaN = newLIR2(cUnit, kX86Jcc8, 0, kX86CondP);
+      newLIR2(cUnit, kX86Cvttsd2siRR, rlResult.lowReg, srcReg);
+      LIR* branchNormal = newLIR1(cUnit, kX86Jmp8, 0);
+      branchNaN->target = newLIR0(cUnit, kPseudoTargetLabel);
+      newLIR2(cUnit, kX86Xor32RR, rlResult.lowReg, rlResult.lowReg);
+      branchPosOverflow->target = newLIR0(cUnit, kPseudoTargetLabel);
+      branchNormal->target = newLIR0(cUnit, kPseudoTargetLabel);
       storeValue(cUnit, rlDest, rlResult);
       return false;
+    }
     case Instruction::LONG_TO_DOUBLE:
     case Instruction::LONG_TO_FLOAT:
       // These can be implemented inline by using memory as a 64-bit source.
diff --git a/src/compiler/codegen/x86/X86LIR.h b/src/compiler/codegen/x86/X86LIR.h
index 36e459c..3ec1112 100644
--- a/src/compiler/codegen/x86/X86LIR.h
+++ b/src/compiler/codegen/x86/X86LIR.h
@@ -427,8 +427,8 @@
   Binary0fOpCode(kX86Addss),    // float add
   Binary0fOpCode(kX86Mulsd),    // double multiply
   Binary0fOpCode(kX86Mulss),    // float multiply
-  Binary0fOpCode(kX86Cvtss2sd), // float to double
   Binary0fOpCode(kX86Cvtsd2ss), // double to float
+  Binary0fOpCode(kX86Cvtss2sd), // float to double
   Binary0fOpCode(kX86Subsd),    // double subtract
   Binary0fOpCode(kX86Subss),    // float subtract
   Binary0fOpCode(kX86Divsd),    // double divide
diff --git a/src/oat/runtime/x86/oat_support_entrypoints_x86.cc b/src/oat/runtime/x86/oat_support_entrypoints_x86.cc
index 605024e..a28a898 100644
--- a/src/oat/runtime/x86/oat_support_entrypoints_x86.cc
+++ b/src/oat/runtime/x86/oat_support_entrypoints_x86.cc
@@ -67,6 +67,8 @@
 extern "C" void art_unlock_object_from_code(void*);
 
 // Math entrypoints.
+extern "C" double art_fmod_from_code(double, double);
+extern "C" float art_fmodf_from_code(float, float);
 extern "C" double art_l2d_from_code(int64_t);
 extern "C" float art_l2f_from_code(int64_t);
 extern "C" int64_t art_d2l_from_code(double);
@@ -74,7 +76,7 @@
 extern "C" int32_t art_idivmod_from_code(int32_t, int32_t);
 extern "C" int64_t art_ldiv_from_code(int64_t, int64_t);
 extern "C" int64_t art_ldivmod_from_code(int64_t, int64_t);
-extern "C" int64_t art_lmul_from_code(int64_t a, int64_t b);
+extern "C" int64_t art_lmul_from_code(int64_t, int64_t);
 extern "C" uint64_t art_lshl_from_code(uint64_t, uint32_t);
 extern "C" uint64_t art_lshr_from_code(uint64_t, uint32_t);
 extern "C" uint64_t art_lushr_from_code(uint64_t, uint32_t);
@@ -168,13 +170,13 @@
   //points->pDmul = NULL; // Not needed on x86.
   //points->pDsub = NULL; // Not needed on x86.
   //points->pF2d = NULL;
-  //points->pFmod = NULL;
+  points->pFmod = art_fmod_from_code;
   //points->pI2d = NULL;
   points->pL2d = art_l2d_from_code;
   //points->pD2f = NULL;
   //points->pFadd = NULL; // Not needed on x86.
   //points->pFdiv = NULL; // Not needed on x86.
-  //points->pFmodf = NULL;
+  points->pFmodf = art_fmodf_from_code;
   //points->pFmul = NULL; // Not needed on x86.
   //points->pFsub = NULL; // Not needed on x86.
   //points->pI2f = NULL;
diff --git a/src/oat/runtime/x86/runtime_support_x86.S b/src/oat/runtime/x86/runtime_support_x86.S
index 028d7ec..d37fa5c 100644
--- a/src/oat/runtime/x86/runtime_support_x86.S
+++ b/src/oat/runtime/x86/runtime_support_x86.S
@@ -395,12 +395,34 @@
 
 NO_ARG_DOWNCALL art_test_suspend, artTestSuspendFromCode, ret
 
+DEFINE_FUNCTION art_fmod_from_code
+    movl  %ebx, -4(%esp)          // put hi arg2 into memory
+    movl  %edx, -8(%esp)          // put lo arg2 into memory
+    fldl  -8(%esp)                // push arg2 onto fp stack
+    movl  %ecx, -4(%esp)          // put hi arg1 into memory
+    movl  %eax, -8(%esp)          // put lo arg1 into memory
+    fldl  -8(%esp)                // push arg1 onto fp stack
+    fprem1                        // calculate IEEE remainder
+    fstpl -8(%esp)                // pop return value off fp stack
+    movsd -8(%esp), %xmm0         // place into %xmm0
+    ret
+
+DEFINE_FUNCTION art_fmodf_from_code
+    movl  %ecx, -4(%esp)          // put arg2 into memory
+    fld   -4(%esp)                // push arg2 onto fp stack
+    movl  %eax, -4(%esp)          // put arg1 into memory
+    fld   -4(%esp)                // push arg1 onto fp stack
+    fprem1                        // calculate IEEE remainder
+    fstp  -4(%esp)                // pop return value off fp stack
+    movss -4(%esp), %xmm0         // place into %xmm0
+    ret
+
 DEFINE_FUNCTION art_l2d_from_code
     pushl %eax                    // alignment padding
     pushl %ecx                    // pass arg2
     pushl %eax                    // pass arg1
-    call SYMBOL(art_l2d) // (jlong a, Thread*, SP)
-    fstpl (%esp)                  // get return value
+    call SYMBOL(art_l2d)          // (jlong a, Thread*, SP)
+    fstpl (%esp)                  // pop return value off fp stack
     movsd (%esp), %xmm0           // place into %xmm0
     addl LITERAL(12), %esp        // pop arguments
     ret
@@ -409,8 +431,8 @@
     pushl %eax                    // alignment padding
     pushl %ecx                    // pass arg2
     pushl %eax                    // pass arg1
-    call SYMBOL(art_l2f) // (jlong a, Thread*, SP)
-    fstp  (%esp)                  // get return value
+    call SYMBOL(art_l2f)          // (jlong a, Thread*, SP)
+    fstp  (%esp)                  // pop return value off fp stack
     movss (%esp), %xmm0           // place into %xmm0
     addl LITERAL(12), %esp        // pop arguments
     ret
@@ -419,14 +441,14 @@
     pushl %eax                    // alignment padding
     pushl %ecx                    // pass arg2
     pushl %eax                    // pass arg1
-    call SYMBOL(art_d2l) // (jdouble a, Thread*, SP)
+    call SYMBOL(art_d2l)          // (jdouble a, Thread*, SP)
     addl LITERAL(12), %esp        // pop arguments
     ret
 
 DEFINE_FUNCTION art_f2l_from_code
     subl LITERAL(8), %esp         // alignment padding
     pushl %eax                    // pass arg1
-    call SYMBOL(art_f2l) // (jfloat a, Thread*, SP)
+    call SYMBOL(art_f2l)          // (jfloat a, Thread*, SP)
     addl LITERAL(12), %esp        // pop arguments
     ret