Math.round float intrinsic for ARM.

Test: m test-art-target
Test: m 082-inline-execute
Change-Id: I1b384e388b9959eca662922bffa8675cdbfe6e6b
diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h
index e184745..01304ac 100644
--- a/compiler/optimizing/common_arm.h
+++ b/compiler/optimizing/common_arm.h
@@ -66,6 +66,11 @@
   return vixl::aarch32::SRegister(location.AsFpuRegisterPairLow<vixl::aarch32::SRegister>());
 }
 
+inline vixl::aarch32::SRegister HighSRegisterFrom(Location location) {
+  DCHECK(location.IsFpuRegisterPair()) << location;
+  return vixl::aarch32::SRegister(location.AsFpuRegisterPairHigh<vixl::aarch32::SRegister>());
+}
+
 inline vixl::aarch32::Register RegisterFrom(Location location) {
   DCHECK(location.IsRegister()) << location;
   return vixl::aarch32::Register(location.reg());
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index b25bad7..0d933ea 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -39,6 +39,7 @@
 using helpers::LocationFrom;
 using helpers::LowRegisterFrom;
 using helpers::LowSRegisterFrom;
+using helpers::HighSRegisterFrom;
 using helpers::OutputDRegister;
 using helpers::OutputSRegister;
 using helpers::OutputRegister;
@@ -794,6 +795,58 @@
   __ Vrintn(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
 }
 
+void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
+  if (features_.HasARMv8AInstructions()) {
+    LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                              LocationSummary::kNoCall,
+                                                              kIntrinsified);
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+    locations->SetOut(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresFpuRegister());
+  }
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
+  DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
+
+  ArmVIXLAssembler* assembler = GetAssembler();
+  vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0);
+  vixl32::Register out_reg = OutputRegister(invoke);
+  vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0));
+  vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0));
+  vixl32::Label done;
+  vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
+
+  // Round to nearest integer, ties away from zero.
+  __ Vcvta(S32, F32, temp1, in_reg);
+  __ Vmov(out_reg, temp1);
+
+  // For positive, zero or NaN inputs, rounding is done.
+  __ Cmp(out_reg, 0);
+  __ B(ge, final_label, /* far_target */ false);
+
+  // Handle input < 0 cases.
+  // If input is negative but not a tie, previous result (round to nearest) is valid.
+  // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1.
+  __ Vrinta(F32, F32, temp1, in_reg);
+  __ Vmov(temp2, 0.5);
+  __ Vsub(F32, temp1, in_reg, temp1);
+  __ Vcmp(F32, temp1, temp2);
+  __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+  {
+    // Use ExactAsemblyScope here because we are using IT.
+    ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
+                                2 * kMaxInstructionSizeInBytes,
+                                CodeBufferCheckScope::kMaximumSize);
+    __ it(eq);
+    __ add(eq, out_reg, out_reg, 1);
+  }
+
+  if (done.IsReferenced()) {
+    __ Bind(&done);
+  }
+}
+
 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
   CreateIntToIntLocations(arena_, invoke);
 }
@@ -3100,7 +3153,6 @@
 }
 
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble)   // Could be done by changing rounding mode, maybe?
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat)    // Could be done by changing rounding mode, maybe?
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong)     // High register pressure.
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit)