Add support for float-to-long in the optimizing compiler.

- Add support for the float-to-long Dex instruction in the
  optimizing compiler.
- Add a Dex PC field to art::HTypeConversion to allow the
  x86 and ARM code generators to produce runtime calls.
- Instruct art::CodeGenerator::RecordPcInfo not to record
  PC information for HTypeConversion instructions.
- Add S0 to the list of ARM FPU parameter registers.
- Have art::x86_64::X86_64Assembler::cvttss2si work with
  64-bit operands.
- Generate x86, x86-64 and ARM (but not ARM64) code for
  float to long HTypeConversion nodes.
- Add related tests to test/422-type-conversion.

Change-Id: I954214f0d537187883f83f7a83a1bb2dd8a21fd4
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index eb6181c..0ac93a2 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -337,9 +337,10 @@
 
 void HGraphBuilder::Conversion_12x(const Instruction& instruction,
                                    Primitive::Type input_type,
-                                   Primitive::Type result_type) {
+                                   Primitive::Type result_type,
+                                   uint32_t dex_pc) {
   HInstruction* first = LoadLocal(instruction.VRegB(), input_type);
-  current_block_->AddInstruction(new (arena_) HTypeConversion(result_type, first));
+  current_block_->AddInstruction(new (arena_) HTypeConversion(result_type, first, dex_pc));
   UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
 }
 
@@ -1079,52 +1080,57 @@
     }
 
     case Instruction::INT_TO_LONG: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimLong);
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimLong, dex_pc);
       break;
     }
 
     case Instruction::INT_TO_FLOAT: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimFloat);
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimFloat, dex_pc);
       break;
     }
 
     case Instruction::INT_TO_DOUBLE: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimDouble);
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimDouble, dex_pc);
       break;
     }
 
     case Instruction::LONG_TO_INT: {
-      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimInt);
+      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimInt, dex_pc);
       break;
     }
 
     case Instruction::LONG_TO_FLOAT: {
-      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimFloat);
+      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimFloat, dex_pc);
       break;
     }
 
     case Instruction::LONG_TO_DOUBLE: {
-      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimDouble);
+      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimDouble, dex_pc);
       break;
     }
 
     case Instruction::FLOAT_TO_INT: {
-      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimInt);
+      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::FLOAT_TO_LONG: {
+      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimLong, dex_pc);
       break;
     }
 
     case Instruction::INT_TO_BYTE: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimByte);
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimByte, dex_pc);
       break;
     }
 
     case Instruction::INT_TO_SHORT: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimShort);
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimShort, dex_pc);
       break;
     }
 
     case Instruction::INT_TO_CHAR: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimChar);
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimChar, dex_pc);
       break;
     }
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 8519bcb..59aba6a 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -135,7 +135,8 @@
 
   void Conversion_12x(const Instruction& instruction,
                       Primitive::Type input_type,
-                      Primitive::Type result_type);
+                      Primitive::Type result_type,
+                      uint32_t dex_pc);
 
   void BuildCheckedDivRem(uint16_t out_reg,
                           uint16_t first_reg,
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index e581af2..7f358ea 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -499,6 +499,21 @@
 }
 
 void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) {
+  if (instruction != nullptr && instruction->IsTypeConversion()) {
+    // The code generated for some type conversions may call the
+    // runtime, thus normally requiring a subsequent call to this
+    // method.  However, the method verifier does not produce PC
+    // information for Dex type conversion instructions, as it
+    // considers them as "atomic" (they cannot join a GC).
+    // Therefore we do not currently record PC information for such
+    // instructions.  As this may change later, we added this special
+    // case so that code generators may nevertheless call
+    // CodeGenerator::RecordPcInfo without triggering an error in
+    // CodeGenerator::BuildNativeGCMap ("Missing ref for dex pc 0x")
+    // thereafter.
+    return;
+  }
+
   // Collect PC infos for the mapping table.
   struct PcInfo pc_info;
   pc_info.dex_pc = dex_pc;
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 7c8f6a2..1d42c47 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -37,6 +37,8 @@
 
 // Maximum value for a primitive integer.
 static int32_t constexpr kPrimIntMax = 0x7fffffff;
+// Maximum value for a primitive long.
+static int64_t constexpr kPrimLongMax = 0x7fffffffffffffff;
 
 class Assembler;
 class CodeGenerator;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 448a5a0..f050605 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -44,8 +44,9 @@
 static constexpr Register kRuntimeParameterCoreRegisters[] = { R0, R1, R2, R3 };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
-static constexpr SRegister kRuntimeParameterFpuRegisters[] = { };
-static constexpr size_t kRuntimeParameterFpuRegistersLength = 0;
+static constexpr SRegister kRuntimeParameterFpuRegisters[] = { S0 };
+static constexpr size_t kRuntimeParameterFpuRegistersLength =
+    arraysize(kRuntimeParameterFpuRegisters);
 
 class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegister> {
  public:
@@ -874,6 +875,7 @@
       || instruction->IsBoundsCheck()
       || instruction->IsNullCheck()
       || instruction->IsDivZeroCheck()
+      || instruction->GetLocations()->CanCall()
       || !IsLeafMethod());
 }
 
@@ -1359,11 +1361,18 @@
 }
 
 void LocationsBuilderARM::VisitTypeConversion(HTypeConversion* conversion) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
   DCHECK_NE(result_type, input_type);
+
+  // Float-to-long conversions invoke the runtime.
+  LocationSummary::CallKind call_kind =
+      (input_type == Primitive::kPrimFloat && result_type == Primitive::kPrimLong)
+      ? LocationSummary::kCall
+      : LocationSummary::kNoCall;
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
+
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
@@ -1434,7 +1443,15 @@
           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
           break;
 
-        case Primitive::kPrimFloat:
+        case Primitive::kPrimFloat: {
+          // Processing a Dex `float-to-long' instruction.
+          InvokeRuntimeCallingConvention calling_convention;
+          locations->SetInAt(0, Location::FpuRegisterLocation(
+              calling_convention.GetFpuRegisterAt(0)));
+          locations->SetOut(Location::RegisterPairLocation(R0, R1));
+          break;
+        }
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type << " to "
                      << result_type << " not yet implemented";
@@ -1623,6 +1640,13 @@
           break;
 
         case Primitive::kPrimFloat:
+          // Processing a Dex `float-to-long' instruction.
+          // This call does not actually record PC information.
+          codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pF2l),
+                                  conversion,
+                                  conversion->GetDexPc());
+          break;
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type << " to "
                      << result_type << " not yet implemented";
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 6f83d9f..38dc8c5 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1326,11 +1326,18 @@
 }
 
 void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
   DCHECK_NE(result_type, input_type);
+
+  // Float-to-long conversions invoke the runtime.
+  LocationSummary::CallKind call_kind =
+      (input_type == Primitive::kPrimFloat && result_type == Primitive::kPrimLong)
+      ? LocationSummary::kCall
+      : LocationSummary::kNoCall;
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
+
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
@@ -1401,7 +1408,15 @@
           locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
           break;
 
-        case Primitive::kPrimFloat:
+        case Primitive::kPrimFloat: {
+          // Processing a Dex `float-to-long' instruction.
+          InvokeRuntimeCallingConvention calling_convention;
+          locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+          // The runtime helper puts the result in EAX, EDX.
+          locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
+          break;
+        }
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type << " to "
                      << result_type << " not yet implemented";
@@ -1615,6 +1630,12 @@
           break;
 
         case Primitive::kPrimFloat:
+          // Processing a Dex `float-to-long' instruction.
+          __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pF2l)));
+          // This call does not actually record PC information.
+          codegen_->RecordPcInfo(conversion, conversion->GetDexPc());
+          break;
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type << " to "
                      << result_type << " not yet implemented";
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 47fd304..7eecac1 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1394,6 +1394,12 @@
           break;
 
         case Primitive::kPrimFloat:
+          // Processing a Dex `float-to-long' instruction.
+          locations->SetInAt(0, Location::RequiresFpuRegister());
+          locations->SetOut(Location::RequiresRegister());
+          locations->AddTemp(Location::RequiresFpuRegister());
+          break;
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type << " to "
                      << result_type << " not yet implemented";
@@ -1565,14 +1571,14 @@
 
           __ movl(output, Immediate(kPrimIntMax));
           // temp = int-to-float(output)
-          __ cvtsi2ss(temp, output);
+          __ cvtsi2ss(temp, output, false);
           // if input >= temp goto done
           __ comiss(input, temp);
           __ j(kAboveEqual, &done);
           // if input == NaN goto nan
           __ j(kUnordered, &nan);
           // output = float-to-int-truncate(input)
-          __ cvttss2si(output, input);
+          __ cvttss2si(output, input, false);
           __ jmp(&done);
           __ Bind(&nan);
           //  output = 0
@@ -1604,7 +1610,31 @@
           __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
           break;
 
-        case Primitive::kPrimFloat:
+        case Primitive::kPrimFloat: {
+          // Processing a Dex `float-to-long' instruction.
+          XmmRegister input = in.AsFpuRegister<XmmRegister>();
+          CpuRegister output = out.AsRegister<CpuRegister>();
+          XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+          Label done, nan;
+
+          __ movq(output, Immediate(kPrimLongMax));
+          // temp = int-to-float(output)
+          __ cvtsi2ss(temp, output, true);
+          // if input >= temp goto done
+          __ comiss(input, temp);
+          __ j(kAboveEqual, &done);
+          // if input == NaN goto nan
+          __ j(kUnordered, &nan);
+          // output = float-to-int-truncate(input)
+          __ cvttss2si(output, input, true);
+          __ jmp(&done);
+          __ Bind(&nan);
+          //  output = 0
+          __ xorq(output, output);
+          __ Bind(&done);
+          break;
+        }
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type << " to "
                      << result_type << " not yet implemented";
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 3908a61..8a25de1 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -2001,8 +2001,8 @@
 class HTypeConversion : public HExpression<1> {
  public:
   // Instantiate a type conversion of `input` to `result_type`.
-  HTypeConversion(Primitive::Type result_type, HInstruction* input)
-      : HExpression(result_type, SideEffects::None()) {
+  HTypeConversion(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc)
+      : HExpression(result_type, SideEffects::None()), dex_pc_(dex_pc) {
     SetRawInputAt(0, input);
     DCHECK_NE(input->GetType(), result_type);
   }
@@ -2011,12 +2011,18 @@
   Primitive::Type GetInputType() const { return GetInput()->GetType(); }
   Primitive::Type GetResultType() const { return GetType(); }
 
+  // Required by the x86 and ARM code generators when producing calls
+  // to the runtime.
+  uint32_t GetDexPc() const { return dex_pc_; }
+
   bool CanBeMoved() const OVERRIDE { return true; }
   bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; }
 
   DECLARE_INSTRUCTION(TypeConversion);
 
  private:
+  const uint32_t dex_pc_;
+
   DISALLOW_COPY_AND_ASSIGN(HTypeConversion);
 };
 
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 474d8a9..3c21236 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -663,9 +663,19 @@
 
 
 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src) {
+  cvttss2si(dst, src, false);
+}
+
+
+void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF3);
-  EmitOptionalRex32(dst, src);
+  if (is64bit) {
+    // Emit a REX.W prefix if the operand size is 64 bits.
+    EmitRex64(dst, src);
+  } else {
+    EmitOptionalRex32(dst, src);
+  }
   EmitUint8(0x0F);
   EmitUint8(0x2C);
   EmitXmmRegisterOperand(dst.LowBits(), src);
@@ -1997,6 +2007,10 @@
   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
 }
 
+void X86_64Assembler::EmitRex64(CpuRegister dst, XmmRegister src) {
+  EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
+}
+
 void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) {
   uint8_t rex = 0x48 | operand.rex();  // REX.W000
   if (dst.NeedsRex()) {
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 6e71e4a..4c28366 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -340,6 +340,7 @@
   void cvtsd2ss(XmmRegister dst, XmmRegister src);
 
   void cvttss2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
+  void cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit);
   void cvttsd2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
 
   void cvtdq2pd(XmmRegister dst, XmmRegister src);
@@ -688,6 +689,7 @@
   void EmitRex64(CpuRegister dst, CpuRegister src);
   void EmitRex64(CpuRegister dst, const Operand& operand);
   void EmitRex64(XmmRegister dst, CpuRegister src);
+  void EmitRex64(CpuRegister dst, XmmRegister src);
 
   // Emit a REX prefix to normalize byte registers plus necessary register bit encodings.
   void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src);
diff --git a/test/422-type-conversion/src/Main.java b/test/422-type-conversion/src/Main.java
index e7dbe24..05cdf42 100644
--- a/test/422-type-conversion/src/Main.java
+++ b/test/422-type-conversion/src/Main.java
@@ -94,6 +94,9 @@
     // Generate, compile and check float-to-int Dex instructions.
     floatToInt();
 
+    // Generate, compile and check float-to-long Dex instructions.
+    floatToLong();
+
     // Generate, compile and check int-to-byte Dex instructions.
     shortToByte();
     intToByte();
@@ -342,6 +345,32 @@
     assertIntEquals(-2147483648, $opt$FloatToInt(Float.NEGATIVE_INFINITY));
   }
 
+  private static void floatToLong() {
+    assertLongEquals(1L, $opt$FloatToLong(1F));
+    assertLongEquals(0L, $opt$FloatToLong(0F));
+    assertLongEquals(0L, $opt$FloatToLong(-0F));
+    assertLongEquals(-1L, $opt$FloatToLong(-1F));
+    assertLongEquals(51L, $opt$FloatToLong(51F));
+    assertLongEquals(-51L, $opt$FloatToLong(-51F));
+    assertLongEquals(0L, $opt$FloatToLong(0.5F));
+    assertLongEquals(0L, $opt$FloatToLong(0.4999999F));
+    assertLongEquals(0L, $opt$FloatToLong(-0.4999999F));
+    assertLongEquals(0L, $opt$FloatToLong(-0.5F));
+    assertLongEquals(42L, $opt$FloatToLong(42.199F));
+    assertLongEquals(-42L, $opt$FloatToLong(-42.199F));
+    assertLongEquals(2147483648L, $opt$FloatToLong(2147483647F));  // 2^31 - 1
+    assertLongEquals(-2147483648L, $opt$FloatToLong(-2147483647F));  // -(2^31 - 1)
+    assertLongEquals(-2147483648L, $opt$FloatToLong(-2147483648F));  // -(2^31)
+    assertLongEquals(2147483648L, $opt$FloatToLong(2147483648F));  // (2^31)
+    assertLongEquals(-2147483648L, $opt$FloatToLong(-2147483649F));  // -(2^31 + 1)
+    assertLongEquals(9223372036854775807L, $opt$FloatToLong(9223372036854775807F));  // 2^63 - 1
+    assertLongEquals(-9223372036854775808L, $opt$FloatToLong(-9223372036854775807F));  // -(2^63 - 1)
+    assertLongEquals(-9223372036854775808L, $opt$FloatToLong(-9223372036854775808F));  // -(2^63)
+    assertLongEquals(0L, $opt$FloatToLong(Float.NaN));
+    assertLongEquals(9223372036854775807L, $opt$FloatToLong(Float.POSITIVE_INFINITY));
+    assertLongEquals(-9223372036854775808L, $opt$FloatToLong(Float.NEGATIVE_INFINITY));
+  }
+
   private static void shortToByte() {
     assertByteEquals((byte)1, $opt$ShortToByte((short)1));
     assertByteEquals((byte)0, $opt$ShortToByte((short)0));
@@ -500,6 +529,9 @@
   // This method produces a float-to-int Dex instruction.
   static int $opt$FloatToInt(float a){ return (int)a; }
 
+  // This method produces a float-to-long Dex instruction.
+  static long $opt$FloatToLong(float a){ return (long)a; }
+
   // These methods produce int-to-byte Dex instructions.
   static byte $opt$ShortToByte(short a){ return (byte)a; }
   static byte $opt$IntToByte(int a){ return (byte)a; }