AArch64: Enable FLOAT and DOUBLE opcodes

This patch enables all the DOUBLE and FLOAT opcodes except for REM ones.
It has been tested and passes all Dalvik tests except for:

failed: 018-stack-overflow[pid=1076]
failed: 107-int-math2[pid=1593]

Change-Id: I581f219bde354e3402aa3ad6e24ef15566da5f78
Signed-off-by: Serban Constantinescu <serban.constantinescu@arm.com>
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 5b4492f..767ffbf 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -22,6 +22,7 @@
 namespace art {
 
 enum RegisterClass {
+  kInvalidRegClass,
   kCoreReg,
   kFPReg,
   kAnyReg,
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 9bad736..beebe62 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -180,10 +180,10 @@
     // Instruction::GOTO_32,
     // Instruction::PACKED_SWITCH,
     // Instruction::SPARSE_SWITCH,
-    // Instruction::CMPL_FLOAT,
-    // Instruction::CMPG_FLOAT,
-    // Instruction::CMPL_DOUBLE,
-    // Instruction::CMPG_DOUBLE,
+    Instruction::CMPL_FLOAT,
+    Instruction::CMPG_FLOAT,
+    Instruction::CMPL_DOUBLE,
+    Instruction::CMPG_DOUBLE,
     Instruction::CMP_LONG,
     // Instruction::IF_EQ,
     // Instruction::IF_NE,
@@ -262,20 +262,20 @@
     Instruction::NOT_INT,
     Instruction::NEG_LONG,
     Instruction::NOT_LONG,
-    // Instruction::NEG_FLOAT,
-    // Instruction::NEG_DOUBLE,
+    Instruction::NEG_FLOAT,
+    Instruction::NEG_DOUBLE,
     Instruction::INT_TO_LONG,
-    // Instruction::INT_TO_FLOAT,
-    // Instruction::INT_TO_DOUBLE,
+    Instruction::INT_TO_FLOAT,
+    Instruction::INT_TO_DOUBLE,
     Instruction::LONG_TO_INT,
-    // Instruction::LONG_TO_FLOAT,
-    // Instruction::LONG_TO_DOUBLE,
-    // Instruction::FLOAT_TO_INT,
-    // Instruction::FLOAT_TO_LONG,
-    // Instruction::FLOAT_TO_DOUBLE,
-    // Instruction::DOUBLE_TO_INT,
-    // Instruction::DOUBLE_TO_LONG,
-    // Instruction::DOUBLE_TO_FLOAT,
+    Instruction::LONG_TO_FLOAT,
+    Instruction::LONG_TO_DOUBLE,
+    Instruction::FLOAT_TO_INT,
+    Instruction::FLOAT_TO_LONG,
+    Instruction::FLOAT_TO_DOUBLE,
+    Instruction::DOUBLE_TO_INT,
+    Instruction::DOUBLE_TO_LONG,
+    Instruction::DOUBLE_TO_FLOAT,
     Instruction::INT_TO_BYTE,
     Instruction::INT_TO_CHAR,
     Instruction::INT_TO_SHORT,
@@ -301,15 +301,15 @@
     Instruction::SHL_LONG,
     Instruction::SHR_LONG,
     Instruction::USHR_LONG,
-    // Instruction::ADD_FLOAT,
-    // Instruction::SUB_FLOAT,
-    // Instruction::MUL_FLOAT,
-    // Instruction::DIV_FLOAT,
+    Instruction::ADD_FLOAT,
+    Instruction::SUB_FLOAT,
+    Instruction::MUL_FLOAT,
+    Instruction::DIV_FLOAT,
     // Instruction::REM_FLOAT,
-    // Instruction::ADD_DOUBLE,
-    // Instruction::SUB_DOUBLE,
-    // Instruction::MUL_DOUBLE,
-    // Instruction::DIV_DOUBLE,
+    Instruction::ADD_DOUBLE,
+    Instruction::SUB_DOUBLE,
+    Instruction::MUL_DOUBLE,
+    Instruction::DIV_DOUBLE,
     // Instruction::REM_DOUBLE,
     Instruction::ADD_INT_2ADDR,
     Instruction::SUB_INT_2ADDR,
@@ -333,15 +333,15 @@
     Instruction::SHL_LONG_2ADDR,
     Instruction::SHR_LONG_2ADDR,
     Instruction::USHR_LONG_2ADDR,
-    // Instruction::ADD_FLOAT_2ADDR,
-    // Instruction::SUB_FLOAT_2ADDR,
-    // Instruction::MUL_FLOAT_2ADDR,
-    // Instruction::DIV_FLOAT_2ADDR,
+    Instruction::ADD_FLOAT_2ADDR,
+    Instruction::SUB_FLOAT_2ADDR,
+    Instruction::MUL_FLOAT_2ADDR,
+    Instruction::DIV_FLOAT_2ADDR,
     // Instruction::REM_FLOAT_2ADDR,
-    // Instruction::ADD_DOUBLE_2ADDR,
-    // Instruction::SUB_DOUBLE_2ADDR,
-    // Instruction::MUL_DOUBLE_2ADDR,
-    // Instruction::DIV_DOUBLE_2ADDR,
+    Instruction::ADD_DOUBLE_2ADDR,
+    Instruction::SUB_DOUBLE_2ADDR,
+    Instruction::MUL_DOUBLE_2ADDR,
+    Instruction::DIV_DOUBLE_2ADDR,
     // Instruction::REM_DOUBLE_2ADDR,
     Instruction::ADD_INT_LIT16,
     Instruction::RSUB_INT,
@@ -699,7 +699,7 @@
 // V : void
 // (ARM64) Current calling conversion only support 32bit softfp
 //         which has problems with long, float, double
-constexpr char arm64_supported_types[] = "ZBSCILVJ";
+constexpr char arm64_supported_types[] = "ZBSCILVJFD";
 // (x84_64) We still have troubles with compiling longs/doubles/floats
 constexpr char x86_64_supported_types[] = "ZBSCILV";
 
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index 87ab6fe..882ee66 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -25,10 +25,6 @@
   int op = kA64Brk1d;
   RegLocation rl_result;
 
-  /*
-   * Don't attempt to optimize register usage since these opcodes call out to
-   * the handlers.
-   */
   switch (opcode) {
     case Instruction::ADD_FLOAT_2ADDR:
     case Instruction::ADD_FLOAT:
@@ -119,49 +115,75 @@
                                  RegLocation rl_dest, RegLocation rl_src) {
   int op = kA64Brk1d;
   RegLocation rl_result;
+  RegisterClass src_reg_class = kInvalidRegClass;
+  RegisterClass dst_reg_class = kInvalidRegClass;
 
   switch (opcode) {
     case Instruction::INT_TO_FLOAT:
       op = kA64Scvtf2fw;
+      src_reg_class = kCoreReg;
+      dst_reg_class = kFPReg;
       break;
     case Instruction::FLOAT_TO_INT:
       op = kA64Fcvtzs2wf;
+      src_reg_class = kFPReg;
+      dst_reg_class = kCoreReg;
       break;
     case Instruction::DOUBLE_TO_FLOAT:
       op = kA64Fcvt2sS;
+      src_reg_class = kFPReg;
+      dst_reg_class = kFPReg;
       break;
     case Instruction::FLOAT_TO_DOUBLE:
       op = kA64Fcvt2Ss;
+      src_reg_class = kFPReg;
+      dst_reg_class = kFPReg;
       break;
     case Instruction::INT_TO_DOUBLE:
       op = FWIDE(kA64Scvtf2fw);
+      src_reg_class = kCoreReg;
+      dst_reg_class = kFPReg;
       break;
     case Instruction::DOUBLE_TO_INT:
       op = FWIDE(kA64Fcvtzs2wf);
+      src_reg_class = kFPReg;
+      dst_reg_class = kCoreReg;
       break;
     case Instruction::LONG_TO_DOUBLE:
       op = FWIDE(kA64Scvtf2fx);
+      src_reg_class = kCoreReg;
+      dst_reg_class = kFPReg;
       break;
     case Instruction::FLOAT_TO_LONG:
       op = kA64Fcvtzs2xf;
+      src_reg_class = kFPReg;
+      dst_reg_class = kCoreReg;
       break;
     case Instruction::LONG_TO_FLOAT:
       op = kA64Scvtf2fx;
+      src_reg_class = kCoreReg;
+      dst_reg_class = kFPReg;
       break;
     case Instruction::DOUBLE_TO_LONG:
       op = FWIDE(kA64Fcvtzs2xf);
+      src_reg_class = kFPReg;
+      dst_reg_class = kCoreReg;
       break;
     default:
       LOG(FATAL) << "Unexpected opcode: " << opcode;
   }
 
+  DCHECK_NE(src_reg_class, kInvalidRegClass);
+  DCHECK_NE(dst_reg_class, kInvalidRegClass);
+  DCHECK_NE(op, kA64Brk1d);
+
   if (rl_src.wide) {
-    rl_src = LoadValueWide(rl_src, kFPReg);
+    rl_src = LoadValueWide(rl_src, src_reg_class);
   } else {
-    rl_src = LoadValue(rl_src, kFPReg);
+    rl_src = LoadValue(rl_src, src_reg_class);
   }
 
-  rl_result = EvalLoc(rl_dest, kFPReg, true);
+  rl_result = EvalLoc(rl_dest, dst_reg_class, true);
   NewLIR2(op, rl_result.reg.GetReg(), rl_src.reg.GetReg());
 
   if (rl_dest.wide) {
@@ -296,25 +318,11 @@
 }
 
 bool Arm64Mir2Lir::GenInlinedSqrt(CallInfo* info) {
-  // TODO(Arm64): implement this.
-  UNIMPLEMENTED(FATAL) << "GenInlinedSqrt not implemented for Arm64";
-
-  DCHECK_EQ(cu_->instruction_set, kArm64);
-  LIR *branch;
   RegLocation rl_src = info->args[0];
   RegLocation rl_dest = InlineTargetWide(info);  // double place for result
   rl_src = LoadValueWide(rl_src, kFPReg);
   RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
   NewLIR2(FWIDE(kA64Fsqrt2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
-  NewLIR2(FWIDE(kA64Fcmp2ff), rl_result.reg.GetReg(), rl_result.reg.GetReg());
-  branch = NewLIR2(kA64B2ct, kArmCondEq, 0);
-  ClobberCallerSave();
-  LockCallTemps();  // Using fixed registers
-  RegStorage r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(8, pSqrt));
-  // NewLIR3(kThumb2Fmrrd, r0, r1, rl_src.reg.GetReg());
-  NewLIR1(kA64Blr1x, r_tgt.GetReg());
-  // NewLIR3(kThumb2Fmdrr, rl_result.reg.GetReg(), r0, r1);
-  branch->target = NewLIR0(kPseudoTargetLabel);
   StoreValueWide(rl_dest, rl_result);
   return true;
 }