AArch64: Enable GenSpecialCase.

With the following fixes:
1. Map reference register to 64-bit argument register.
2. Set max_mapped_in_ to the index of last in. (Originally it was the
low part of the wide register)

Change-Id: If41de654a004a0561396e349d1f150517a72ab2f
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index c3f4711..cb126f2 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -24,14 +24,6 @@
 
 namespace art {
 
-bool Arm64Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir,
-                                  const InlineMethod& special) {
-  // TODO(Arm64): re-enable this, once hard-float ABI is implemented.
-  //   (this currently does not work, as GetArgMappingToPhysicalReg returns InvalidReg()).
-  // return Mir2Lir::GenSpecialCase(bb, mir, special);
-  return false;
-}
-
 /*
  * The sparse table in the literal pool is an array of <key,displacement>
  * pairs.  For each set, we'll load them as a pair using ldp.
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index a887a3b..a9340a5 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -34,7 +34,7 @@
   // TODO: consolidate 64-bit target support.
   class InToRegStorageMapper {
    public:
-    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide) = 0;
+    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref) = 0;
     virtual ~InToRegStorageMapper() {}
   };
 
@@ -42,7 +42,7 @@
    public:
     InToRegStorageArm64Mapper() : cur_core_reg_(0), cur_fp_reg_(0) {}
     virtual ~InToRegStorageArm64Mapper() {}
-    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide);
+    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref);
    private:
     int cur_core_reg_;
     int cur_fp_reg_;
@@ -205,7 +205,6 @@
     void GenNegFloat(RegLocation rl_dest, RegLocation rl_src);
     void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
     void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
-    bool GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special);
 
     uint32_t GenPairWise(uint32_t reg_mask, int* reg1, int* reg2);
     void UnSpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask);
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index dfaa483..0ebcefb 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -816,7 +816,8 @@
 }
 
 RegStorage Arm64Mir2Lir::InToRegStorageArm64Mapper::GetNextReg(bool is_double_or_float,
-                                                               bool is_wide) {
+                                                               bool is_wide,
+                                                               bool is_ref) {
   const RegStorage coreArgMappingToPhysicalReg[] =
       {rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7};
   const int coreArgMappingToPhysicalRegSize =
@@ -829,6 +830,7 @@
   RegStorage result = RegStorage::InvalidReg();
   if (is_double_or_float) {
     if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
+      DCHECK(!is_ref);
       result = fpArgMappingToPhysicalReg[cur_fp_reg_++];
       if (result.Valid()) {
         // TODO: switching between widths remains a bit ugly.  Better way?
@@ -842,7 +844,8 @@
       if (result.Valid()) {
         // TODO: switching between widths remains a bit ugly.  Better way?
         int res_reg = result.GetReg();
-        result = is_wide ? RegStorage::Solo64(res_reg) : RegStorage::Solo32(res_reg);
+        DCHECK(!(is_wide && is_ref));
+        result = (is_wide || is_ref) ? RegStorage::Solo64(res_reg) : RegStorage::Solo32(res_reg);
       }
     }
   }
@@ -861,14 +864,16 @@
   max_mapped_in_ = -1;
   is_there_stack_mapped_ = false;
   for (int in_position = 0; in_position < count; in_position++) {
-     RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp, arg_locs[in_position].wide);
+     RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp,
+                                         arg_locs[in_position].wide,
+                                         arg_locs[in_position].ref);
      if (reg.Valid()) {
        mapping_[in_position] = reg;
-       max_mapped_in_ = std::max(max_mapped_in_, in_position);
-       if (reg.Is64BitSolo()) {
+       if (arg_locs[in_position].wide) {
          // We covered 2 args, so skip the next one
          in_position++;
        }
+       max_mapped_in_ = std::max(max_mapped_in_, in_position);
      } else {
        is_there_stack_mapped_ = true;
      }
@@ -1042,16 +1047,14 @@
   InToRegStorageMapping in_to_reg_storage_mapping;
   in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper);
   const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn();
-  const int size_of_the_last_mapped = last_mapped_in == -1 ? 1 :
-          in_to_reg_storage_mapping.Get(last_mapped_in).Is64BitSolo() ? 2 : 1;
-  int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + size_of_the_last_mapped);
+  int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + 1);
 
   // Fisrt of all, check whether it make sense to use bulk copying
   // Optimization is aplicable only for range case
   // TODO: make a constant instead of 2
   if (info->is_range && regs_left_to_pass_via_stack >= 2) {
     // Scan the rest of the args - if in phys_reg flush to memory
-    for (int next_arg = last_mapped_in + size_of_the_last_mapped; next_arg < info->num_arg_words;) {
+    for (int next_arg = last_mapped_in + 1; next_arg < info->num_arg_words;) {
       RegLocation loc = info->args[next_arg];
       if (loc.wide) {
         loc = UpdateLocWide(loc);
@@ -1074,8 +1077,8 @@
     DCHECK_EQ(VRegOffset(static_cast<int>(kVRegMethodPtrBaseReg)), 0);
 
     // The rest can be copied together
-    int start_offset = SRegOffset(info->args[last_mapped_in + size_of_the_last_mapped].s_reg_low);
-    int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + size_of_the_last_mapped,
+    int start_offset = SRegOffset(info->args[last_mapped_in + 1].s_reg_low);
+    int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + 1,
                                                    cu_->instruction_set);
 
     int current_src_offset = start_offset;