Fixes for x86 compilation with floats.

Corrected compilation of neg-float and neg-double, as well as other
float and double operations. There was an issue of the destination
overwriting the second source if they were the same virtual register.

Change-Id: If75e5f74b5609323c7068b68be1b72510a6ac5b1
diff --git a/src/compiler/codegen/x86/FP/X86FP.cc b/src/compiler/codegen/x86/FP/X86FP.cc
index 460f56b..c00b5fc 100644
--- a/src/compiler/codegen/x86/FP/X86FP.cc
+++ b/src/compiler/codegen/x86/FP/X86FP.cc
@@ -44,10 +44,12 @@
       op = kX86MulssRR;
       break;
     case Instruction::NEG_FLOAT:
-      rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg);
-      rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
-      newLIR2(cUnit, kX86XorpsRR, rlResult.lowReg, rlResult.lowReg);
-      newLIR2(cUnit, kX86SubssRR, rlResult.lowReg, rlSrc1.lowReg);
+      // TODO: Make this nicer. Subtracting the source from 0 doesn't work in
+      // the 0 case, and using FCHS is difficult with register promotion. This
+      // code treats the value as a CoreReg to make it easy to manipulate.
+      rlSrc1 = loadValue(cUnit, rlSrc1, kCoreReg);
+      rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+      opRegRegImm(cUnit, kOpAdd, rlResult.lowReg, rlSrc1.lowReg, 0x80000000);
       storeValue(cUnit, rlDest, rlResult);
       return false;
     case Instruction::REM_FLOAT_2ADDR:
@@ -63,7 +65,10 @@
   int rDest = rlResult.lowReg;
   int rSrc1 = rlSrc1.lowReg;
   int rSrc2 = rlSrc2.lowReg;
-  // TODO: at least CHECK_NE(rDest, rSrc2);
+  if (rSrc2 == rDest) {
+    rSrc2 = oatAllocTempFloat(cUnit);
+    opRegCopy(cUnit, rSrc2, rDest);
+  }
   opRegCopy(cUnit, rDest, rSrc1);
   newLIR2(cUnit, op, rDest, rSrc2);
   storeValue(cUnit, rlDest, rlResult);
@@ -95,10 +100,13 @@
       op = kX86MulsdRR;
       break;
     case Instruction::NEG_DOUBLE:
-      rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg);
-      rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
-      newLIR2(cUnit, kX86XorpsRR, rlResult.lowReg, rlResult.lowReg);
-      newLIR2(cUnit, kX86SubsdRR, rlResult.lowReg, rlSrc1.lowReg);
+      // TODO: Make this nicer. Subtracting the source from 0 doesn't work in
+      // the 0 case, and using FCHS is difficult with register promotion. This
+      // code treats the value as a CoreReg to make it easy to manipulate.
+      rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
+      rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+      opRegRegImm(cUnit, kOpAdd, rlResult.highReg, rlSrc1.highReg, 0x80000000);
+      opRegCopy(cUnit, rlResult.lowReg, rlSrc1.lowReg);
       storeValueWide(cUnit, rlDest, rlResult);
       return false;
     case Instruction::REM_DOUBLE_2ADDR:
@@ -118,7 +126,10 @@
   int rDest = S2D(rlResult.lowReg, rlResult.highReg);
   int rSrc1 = S2D(rlSrc1.lowReg, rlSrc1.highReg);
   int rSrc2 = S2D(rlSrc2.lowReg, rlSrc2.highReg);
-  // TODO: at least CHECK_NE(rDest, rSrc2);
+  if (rDest == rSrc2) {
+    rSrc2 = oatAllocTempDouble(cUnit) | FP_DOUBLE;
+    opRegCopy(cUnit, rSrc2, rDest);
+  }
   opRegCopy(cUnit, rDest, rSrc1);
   newLIR2(cUnit, op, rDest, rSrc2);
   storeValueWide(cUnit, rlDest, rlResult);
@@ -174,7 +185,7 @@
       srcReg = rlSrc.lowReg;
       oatClobberSReg(cUnit, rlDest.sRegLow);
       rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
-      int tempReg = oatAllocTempDouble(cUnit);
+      int tempReg = oatAllocTempDouble(cUnit) | FP_DOUBLE;
 
       loadConstant(cUnit, rlResult.lowReg, 0x7fffffff);
       newLIR2(cUnit, kX86Cvtsi2sdRR, tempReg, rlResult.lowReg);