ARM: Inline codegen for long-to-float on ARM.
long-to-double-to-float using vfp instructions should be faster than the
function provided by toolchain.
Change-Id: I7ff809bca6665f0c1a0d7e6db98d570ce86b7c66
diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc
index dd0a429..0aef489 100644
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ b/compiler/dex/quick/arm/fp_arm.cc
@@ -162,9 +162,30 @@
case Instruction::FLOAT_TO_LONG:
GenConversionCall(QUICK_ENTRYPOINT_OFFSET(pF2l), rl_dest, rl_src);
return;
- case Instruction::LONG_TO_FLOAT:
- GenConversionCall(QUICK_ENTRYPOINT_OFFSET(pL2f), rl_dest, rl_src);
+ case Instruction::LONG_TO_FLOAT: {
+ rl_src = LoadValueWide(rl_src, kFPReg);
+ src_reg = S2d(rl_src.reg.GetReg(), rl_src.reg.GetHighReg());
+ rl_result = EvalLoc(rl_dest, kFPReg, true);
+ // Allocate temp registers.
+ int high_val = AllocTempDouble();
+ int low_val = AllocTempDouble();
+ int const_val = AllocTempDouble();
+ // Long to double.
+ NewLIR2(kThumb2VcvtF64S32, high_val | ARM_FP_DOUBLE, (src_reg & ~ARM_FP_DOUBLE) + 1);
+ NewLIR2(kThumb2VcvtF64U32, low_val | ARM_FP_DOUBLE, (src_reg & ~ARM_FP_DOUBLE));
+ LoadConstantWide(const_val, const_val + 1, 0x41f0000000000000LL);
+ NewLIR3(kThumb2VmlaF64, low_val | ARM_FP_DOUBLE, high_val | ARM_FP_DOUBLE,
+ const_val | ARM_FP_DOUBLE);
+ // Double to float.
+ NewLIR2(kThumb2VcvtDF, rl_result.reg.GetReg(), low_val | ARM_FP_DOUBLE);
+ // Free temp registers.
+ FreeTemp(high_val);
+ FreeTemp(low_val);
+ FreeTemp(const_val);
+ // Store result.
+ StoreValue(rl_dest, rl_result);
return;
+ }
case Instruction::DOUBLE_TO_LONG:
GenConversionCall(QUICK_ENTRYPOINT_OFFSET(pD2l), rl_dest, rl_src);
return;