Re-factor Quick ABI support

Now every architecture must provide a mapper between
VRs parameters and physical registers. Additionally as
a helper function architecture can provide a bulk copy
helper for GenDalvikArgs utility.
All other things becomes a common code stuff:
GetArgMappingToPhysicalReg, GenDalvikArgsNoRange,
GenDalvikArgsRange, FlushIns.

Mapper now uses shorty representation of input
parameters. This is required due to location are not
enough to detect the type of parameter (fp or core).
For the details
see https://android-review.googlesource.com/#/c/113936/.

Change-Id: Ie762b921e0acaa936518ee6b63c9a9d25f83e434
Signed-off-by: Serguei Katkov <serguei.i.katkov@intel.com>
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 023abca..6b4d737 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -1590,6 +1590,12 @@
   return cu_->dex_file->GetShorty(method_id.proto_idx_);
 }
 
+const char* MIRGraph::GetShortyFromMethodReference(const MethodReference& target_method) {
+  const DexFile::MethodId& method_id =
+      target_method.dex_file->GetMethodId(target_method.dex_method_index);
+  return target_method.dex_file->GetShorty(method_id.proto_idx_);
+}
+
 /* Debug Utility - dump a compilation unit */
 void MIRGraph::DumpMIRGraph() {
   const char* block_type_names[] = {
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 1a18841..da0dd88 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -1113,6 +1113,7 @@
   std::string GetSSANameWithConst(int ssa_reg, bool singles_only);
   void GetBlockName(BasicBlock* bb, char* name);
   const char* GetShortyFromTargetIdx(int);
+  const char* GetShortyFromMethodReference(const MethodReference& target_method);
   void DumpMIRGraph();
   CallInfo* NewMemCallInfo(BasicBlock* bb, MIR* mir, InvokeType type, bool is_range);
   BasicBlock* NewMemBB(BBType block_type, int block_id);
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index e8d0c32..c3b19a3 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -26,16 +26,6 @@
 
 class ArmMir2Lir FINAL : public Mir2Lir {
  protected:
-  // TODO: Consolidate hard float target support.
-  // InToRegStorageMapper and InToRegStorageMapping can be shared with all backends.
-  // Base class used to get RegStorage for next argument.
-  class InToRegStorageMapper {
-   public:
-    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide) = 0;
-    virtual ~InToRegStorageMapper() {
-    }
-  };
-
   // Inherited class for ARM backend.
   class InToRegStorageArmMapper FINAL : public InToRegStorageMapper {
    public:
@@ -43,45 +33,25 @@
         : cur_core_reg_(0), cur_fp_reg_(0), cur_fp_double_reg_(0) {
     }
 
-    virtual ~InToRegStorageArmMapper() {
-    }
+    RegStorage GetNextReg(ShortyArg arg) OVERRIDE;
 
-    RegStorage GetNextReg(bool is_double_or_float, bool is_wide) OVERRIDE;
+    virtual void Reset() OVERRIDE {
+      cur_core_reg_ = 0;
+      cur_fp_reg_ = 0;
+      cur_fp_double_reg_ = 0;
+    }
 
    private:
-    uint32_t cur_core_reg_;
-    uint32_t cur_fp_reg_;
-    uint32_t cur_fp_double_reg_;
+    size_t cur_core_reg_;
+    size_t cur_fp_reg_;
+    size_t cur_fp_double_reg_;
   };
 
-  // Class to map argument to RegStorage. The mapping object is initialized by a mapper.
-  class InToRegStorageMapping FINAL {
-   public:
-    InToRegStorageMapping()
-        : max_mapped_in_(0), is_there_stack_mapped_(false), initialized_(false) {
-    }
-
-    int GetMaxMappedIn() const {
-      return max_mapped_in_;
-    }
-
-    bool IsThereStackMapped() const {
-      return is_there_stack_mapped_;
-    }
-
-    bool IsInitialized() const {
-      return initialized_;
-    }
-
-    void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper);
-    RegStorage Get(int in_position) const;
-
-   private:
-    std::map<int, RegStorage> mapping_;
-    int max_mapped_in_;
-    bool is_there_stack_mapped_;
-    bool initialized_;
-  };
+  InToRegStorageArmMapper in_to_reg_storage_arm_mapper_;
+  InToRegStorageMapper* GetResetedInToRegStorageMapper() OVERRIDE {
+    in_to_reg_storage_arm_mapper_.Reset();
+    return &in_to_reg_storage_arm_mapper_;
+  }
 
   public:
     ArmMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
@@ -127,7 +97,6 @@
       }
     }
 
-    RegStorage GetArgMappingToPhysicalReg(int arg_num) OVERRIDE;
     RegLocation GetReturnAlt() OVERRIDE;
     RegLocation GetReturnWideAlt() OVERRIDE;
     RegLocation LocCReturn() OVERRIDE;
@@ -289,19 +258,6 @@
     LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
     size_t GetInstructionOffset(LIR* lir);
 
-    int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                             NextCallInsn next_call_insn,
-                             const MethodReference& target_method,
-                             uint32_t vtable_idx,
-                             uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                             bool skip_this) OVERRIDE;
-    int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                           NextCallInsn next_call_insn,
-                           const MethodReference& target_method,
-                           uint32_t vtable_idx,
-                           uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                           bool skip_this) OVERRIDE;
-
   private:
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
     void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
@@ -360,7 +316,7 @@
                                      RegStorage::FloatSolo32(reg_num * 2 + 1));
     }
 
-    InToRegStorageMapping in_to_reg_storage_mapping_;
+    int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 0e8f645..7190a49 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -896,7 +896,7 @@
   Mir2Lir::InstallLiteralPools();
 }
 
-RegStorage ArmMir2Lir::InToRegStorageArmMapper::GetNextReg(bool is_double_or_float, bool is_wide) {
+RegStorage ArmMir2Lir::InToRegStorageArmMapper::GetNextReg(ShortyArg arg) {
   const RegStorage coreArgMappingToPhysicalReg[] =
       {rs_r1, rs_r2, rs_r3};
   const int coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
@@ -906,28 +906,18 @@
   constexpr uint32_t fpArgMappingToPhysicalRegSize = arraysize(fpArgMappingToPhysicalReg);
   static_assert(fpArgMappingToPhysicalRegSize % 2 == 0, "Number of FP Arg regs is not even");
 
-  if (kArm32QuickCodeUseSoftFloat) {
-    is_double_or_float = false;  // Regard double as long, float as int.
-    is_wide = false;  // Map long separately.
-  }
-
   RegStorage result = RegStorage::InvalidReg();
-  if (is_double_or_float) {
-    // TODO: Remove "cur_fp_double_reg_ % 2 != 0" when we return double as double.
-    if (is_wide || cur_fp_double_reg_ % 2 != 0) {
+  // Regard double as long, float as int for kArm32QuickCodeUseSoftFloat.
+  if (arg.IsFP() && !kArm32QuickCodeUseSoftFloat) {
+    if (arg.IsWide()) {
       cur_fp_double_reg_ = std::max(cur_fp_double_reg_, RoundUp(cur_fp_reg_, 2));
       if (cur_fp_double_reg_ < fpArgMappingToPhysicalRegSize) {
-        // TODO: Replace by following code in the branch when FlushIns() support 64-bit registers.
-        // result = RegStorage::MakeRegPair(fpArgMappingToPhysicalReg[cur_fp_double_reg_],
-        //                                  fpArgMappingToPhysicalReg[cur_fp_double_reg_ + 1]);
-        // result = As64BitFloatReg(result);
-        // cur_fp_double_reg_ += 2;
-        result = fpArgMappingToPhysicalReg[cur_fp_double_reg_];
-        cur_fp_double_reg_++;
+        result = RegStorage::MakeRegPair(fpArgMappingToPhysicalReg[cur_fp_double_reg_],
+                                         fpArgMappingToPhysicalReg[cur_fp_double_reg_ + 1]);
+        result = As64BitFloatReg(result);
+        cur_fp_double_reg_ += 2;
       }
     } else {
-      // TODO: Remove the check when we return double as double.
-      DCHECK_EQ(cur_fp_double_reg_ % 2, 0U);
       if (cur_fp_reg_ % 2 == 0) {
         cur_fp_reg_ = std::max(cur_fp_double_reg_, cur_fp_reg_);
       }
@@ -939,270 +929,23 @@
   } else {
     if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
       result = coreArgMappingToPhysicalReg[cur_core_reg_++];
-      // TODO: Enable following code when FlushIns() support 64-bit registers.
-      // if (is_wide && cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
-      //   result = RegStorage::MakeRegPair(result, coreArgMappingToPhysicalReg[cur_core_reg_++]);
-      // }
+      if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+        result = RegStorage::MakeRegPair(result, coreArgMappingToPhysicalReg[cur_core_reg_++]);
+      }
     }
   }
   return result;
 }
 
-RegStorage ArmMir2Lir::InToRegStorageMapping::Get(int in_position) const {
-  DCHECK(IsInitialized());
-  auto res = mapping_.find(in_position);
-  return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
-}
-
-void ArmMir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count,
-                                                   InToRegStorageMapper* mapper) {
-  DCHECK(mapper != nullptr);
-  max_mapped_in_ = -1;
-  is_there_stack_mapped_ = false;
-  for (int in_position = 0; in_position < count; in_position++) {
-     RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp,
-                                         arg_locs[in_position].wide);
-     if (reg.Valid()) {
-       mapping_[in_position] = reg;
-       // TODO: Enable the following code when FlushIns() support 64-bit argument registers.
-       // if (arg_locs[in_position].wide) {
-       //  if (reg.Is32Bit()) {
-       //    // As it is a split long, the hi-part is on stack.
-       //    is_there_stack_mapped_ = true;
-       //  }
-       //  // We covered 2 v-registers, so skip the next one
-       //  in_position++;
-       // }
-       max_mapped_in_ = std::max(max_mapped_in_, in_position);
-     } else {
-       is_there_stack_mapped_ = true;
-     }
-  }
-  initialized_ = true;
-}
-
-// TODO: Should be able to return long, double registers.
-// Need check some common code as it will break some assumption.
-RegStorage ArmMir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
-  if (!in_to_reg_storage_mapping_.IsInitialized()) {
-    int start_vreg = mir_graph_->GetFirstInVR();
-    RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg];
-
-    InToRegStorageArmMapper mapper;
-    in_to_reg_storage_mapping_.Initialize(arg_locs, mir_graph_->GetNumOfInVRs(), &mapper);
-  }
-  return in_to_reg_storage_mapping_.Get(arg_num);
-}
-
-int ArmMir2Lir::GenDalvikArgsNoRange(CallInfo* info,
-                                     int call_state, LIR** pcrLabel, NextCallInsn next_call_insn,
-                                     const MethodReference& target_method,
-                                     uint32_t vtable_idx, uintptr_t direct_code,
-                                     uintptr_t direct_method, InvokeType type, bool skip_this) {
+int ArmMir2Lir::GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) {
   if (kArm32QuickCodeUseSoftFloat) {
-    return Mir2Lir::GenDalvikArgsNoRange(info, call_state, pcrLabel, next_call_insn, target_method,
-                                         vtable_idx, direct_code, direct_method, type, skip_this);
-  } else {
-    return GenDalvikArgsRange(info, call_state, pcrLabel, next_call_insn, target_method, vtable_idx,
-                              direct_code, direct_method, type, skip_this);
+    return Mir2Lir::GenDalvikArgsBulkCopy(info, first, count);
   }
-}
-
-int ArmMir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
-                                   LIR** pcrLabel, NextCallInsn next_call_insn,
-                                   const MethodReference& target_method,
-                                   uint32_t vtable_idx, uintptr_t direct_code,
-                                   uintptr_t direct_method, InvokeType type, bool skip_this) {
-  if (kArm32QuickCodeUseSoftFloat) {
-    return Mir2Lir::GenDalvikArgsRange(info, call_state, pcrLabel, next_call_insn, target_method,
-                                       vtable_idx, direct_code, direct_method, type, skip_this);
-  }
-
-  // TODO: Rework the implementation when argument register can be long or double.
-
-  /* If no arguments, just return */
-  if (info->num_arg_words == 0) {
-    return call_state;
-  }
-
-  const int start_index = skip_this ? 1 : 0;
-
-  InToRegStorageArmMapper mapper;
-  InToRegStorageMapping in_to_reg_storage_mapping;
-  in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper);
-  const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn();
-  int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + 1);
-
-  // First of all, check whether it makes sense to use bulk copying.
-  // Bulk copying is done only for the range case.
-  // TODO: make a constant instead of 2
-  if (info->is_range && regs_left_to_pass_via_stack >= 2) {
-    // Scan the rest of the args - if in phys_reg flush to memory
-    for (int next_arg = last_mapped_in + 1; next_arg < info->num_arg_words;) {
-      RegLocation loc = info->args[next_arg];
-      if (loc.wide) {
-        // TODO: Only flush hi-part.
-        if (loc.high_word) {
-          loc = info->args[--next_arg];
-        }
-        loc = UpdateLocWide(loc);
-        if (loc.location == kLocPhysReg) {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile);
-        }
-        next_arg += 2;
-      } else {
-        loc = UpdateLoc(loc);
-        if (loc.location == kLocPhysReg) {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          if (loc.ref) {
-            StoreRefDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile);
-          } else {
-            StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32,
-                          kNotVolatile);
-          }
-        }
-        next_arg++;
-      }
-    }
-
-    // The rest can be copied together
-    int start_offset = SRegOffset(info->args[last_mapped_in + 1].s_reg_low);
-    int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + 1,
-                                                   cu_->instruction_set);
-
-    int current_src_offset = start_offset;
-    int current_dest_offset = outs_offset;
-
-    // Only davik regs are accessed in this loop; no next_call_insn() calls.
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    while (regs_left_to_pass_via_stack > 0) {
-      /*
-       * TODO: Improve by adding block copy for large number of arguments.  This
-       * should be done, if possible, as a target-depending helper.  For now, just
-       * copy a Dalvik vreg at a time.
-       */
-      // Moving 32-bits via general purpose register.
-      size_t bytes_to_move = sizeof(uint32_t);
-
-      // Instead of allocating a new temp, simply reuse one of the registers being used
-      // for argument passing.
-      RegStorage temp = TargetReg(kArg3, kNotWide);
-
-      // Now load the argument VR and store to the outs.
-      Load32Disp(TargetPtrReg(kSp), current_src_offset, temp);
-      Store32Disp(TargetPtrReg(kSp), current_dest_offset, temp);
-
-      current_src_offset += bytes_to_move;
-      current_dest_offset += bytes_to_move;
-      regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
-    }
-    DCHECK_EQ(regs_left_to_pass_via_stack, 0);
-  }
-
-  // Now handle rest not registers if they are
-  if (in_to_reg_storage_mapping.IsThereStackMapped()) {
-    RegStorage regWide = TargetReg(kArg2, kWide);
-    for (int i = start_index; i <= last_mapped_in + regs_left_to_pass_via_stack; i++) {
-      RegLocation rl_arg = info->args[i];
-      rl_arg = UpdateRawLoc(rl_arg);
-      RegStorage reg = in_to_reg_storage_mapping.Get(i);
-      // TODO: Only pass split wide hi-part via stack.
-      if (!reg.Valid() || rl_arg.wide) {
-        int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
-
-        {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          if (rl_arg.wide) {
-            if (rl_arg.location == kLocPhysReg) {
-              StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile);
-            } else {
-              LoadValueDirectWideFixed(rl_arg, regWide);
-              StoreBaseDisp(TargetPtrReg(kSp), out_offset, regWide, k64, kNotVolatile);
-            }
-          } else {
-            if (rl_arg.location == kLocPhysReg) {
-              if (rl_arg.ref) {
-                StoreRefDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, kNotVolatile);
-              } else {
-                StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k32, kNotVolatile);
-              }
-            } else {
-              if (rl_arg.ref) {
-                RegStorage regSingle = TargetReg(kArg2, kRef);
-                LoadValueDirectFixed(rl_arg, regSingle);
-                StoreRefDisp(TargetPtrReg(kSp), out_offset, regSingle, kNotVolatile);
-              } else {
-                RegStorage regSingle = TargetReg(kArg2, kNotWide);
-                LoadValueDirectFixed(rl_arg, regSingle);
-                StoreBaseDisp(TargetPtrReg(kSp), out_offset, regSingle, k32, kNotVolatile);
-              }
-            }
-          }
-        }
-
-        call_state = next_call_insn(cu_, info, call_state, target_method,
-                                    vtable_idx, direct_code, direct_method, type);
-      }
-      if (rl_arg.wide) {
-        i++;
-      }
-    }
-  }
-
-  // Finish with mapped registers
-  for (int i = start_index; i <= last_mapped_in; i++) {
-    RegLocation rl_arg = info->args[i];
-    rl_arg = UpdateRawLoc(rl_arg);
-    RegStorage reg = in_to_reg_storage_mapping.Get(i);
-    if (reg.Valid()) {
-      if (reg.Is64Bit()) {
-        LoadValueDirectWideFixed(rl_arg, reg);
-      } else {
-        // TODO: Only split long should be the case we need to care about.
-        if (rl_arg.wide) {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          int high_word = rl_arg.high_word ? 1 : 0;
-          rl_arg = high_word ? info->args[i - 1] : rl_arg;
-          if (rl_arg.location == kLocPhysReg) {
-            RegStorage rs_arg = rl_arg.reg;
-            if (rs_arg.IsDouble() && rs_arg.Is64BitSolo()) {
-              rs_arg = As64BitFloatRegPair(rs_arg);
-            }
-            RegStorage rs_arg_low = rs_arg.GetLow();
-            RegStorage rs_arg_high = rs_arg.GetHigh();
-            OpRegCopy(reg, high_word ? rs_arg_high : rs_arg_low);
-          } else {
-            Load32Disp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low + high_word), reg);
-          }
-        } else {
-          LoadValueDirectFixed(rl_arg, reg);
-        }
-      }
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                                  direct_code, direct_method, type);
-    }
-    if (reg.Is64Bit()) {
-      i++;
-    }
-  }
-
-  call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                           direct_code, direct_method, type);
-  if (pcrLabel) {
-    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
-      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
-    } else {
-      *pcrLabel = nullptr;
-      // In lieu of generating a check for kArg1 being null, we need to
-      // perform a load when doing implicit checks.
-      RegStorage tmp = AllocTemp();
-      Load32Disp(TargetReg(kArg1, kRef), 0, tmp);
-      MarkPossibleNullPointerException(info->opt_flags);
-      FreeTemp(tmp);
-    }
-  }
-  return call_state;
+  /*
+   * TODO: Improve by adding block copy for large number of arguments.  For now, just
+   * copy a Dalvik vreg at a time.
+   */
+  return count;
 }
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 5e10f80..766ac23 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -27,38 +27,25 @@
 
 class Arm64Mir2Lir FINAL : public Mir2Lir {
  protected:
-  // TODO: consolidate 64-bit target support.
-  class InToRegStorageMapper {
-   public:
-    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref) = 0;
-    virtual ~InToRegStorageMapper() {}
-  };
-
   class InToRegStorageArm64Mapper : public InToRegStorageMapper {
    public:
     InToRegStorageArm64Mapper() : cur_core_reg_(0), cur_fp_reg_(0) {}
     virtual ~InToRegStorageArm64Mapper() {}
-    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref);
+    virtual RegStorage GetNextReg(ShortyArg arg);
+    virtual void Reset() OVERRIDE {
+      cur_core_reg_ = 0;
+      cur_fp_reg_ = 0;
+    }
    private:
-    int cur_core_reg_;
-    int cur_fp_reg_;
+    size_t cur_core_reg_;
+    size_t cur_fp_reg_;
   };
 
-  class InToRegStorageMapping {
-   public:
-    InToRegStorageMapping() : max_mapped_in_(0), is_there_stack_mapped_(false),
-    initialized_(false) {}
-    void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper);
-    int GetMaxMappedIn() { return max_mapped_in_; }
-    bool IsThereStackMapped() { return is_there_stack_mapped_; }
-    RegStorage Get(int in_position);
-    bool IsInitialized() { return initialized_; }
-   private:
-    std::map<int, RegStorage> mapping_;
-    int max_mapped_in_;
-    bool is_there_stack_mapped_;
-    bool initialized_;
-  };
+  InToRegStorageArm64Mapper in_to_reg_storage_arm64_mapper_;
+  InToRegStorageMapper* GetResetedInToRegStorageMapper() OVERRIDE {
+    in_to_reg_storage_arm64_mapper_.Reset();
+    return &in_to_reg_storage_arm64_mapper_;
+  }
 
  public:
   Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
@@ -113,7 +100,6 @@
   RegStorage TargetPtrReg(SpecialTargetRegister symbolic_reg) OVERRIDE {
     return As64BitReg(TargetReg(symbolic_reg));
   }
-  RegStorage GetArgMappingToPhysicalReg(int arg_num) OVERRIDE;
   RegLocation GetReturnAlt() OVERRIDE;
   RegLocation GetReturnWideAlt() OVERRIDE;
   RegLocation LocCReturn() OVERRIDE;
@@ -240,22 +226,6 @@
   bool InexpensiveConstantLong(int64_t value) OVERRIDE;
   bool InexpensiveConstantDouble(int64_t value) OVERRIDE;
 
-  void FlushIns(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE;
-
-  int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                           NextCallInsn next_call_insn,
-                           const MethodReference& target_method,
-                           uint32_t vtable_idx,
-                           uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                           bool skip_this) OVERRIDE;
-
-  int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                         NextCallInsn next_call_insn,
-                         const MethodReference& target_method,
-                         uint32_t vtable_idx,
-                         uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                         bool skip_this) OVERRIDE;
-
   bool WideGPRsAreAliases() const OVERRIDE {
     return true;  // 64b architecture.
   }
@@ -422,10 +392,11 @@
   void GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                      RegLocation rl_src2, bool is_div, int flags);
 
-  InToRegStorageMapping in_to_reg_storage_mapping_;
   static const A64EncodingMap EncodingMap[kA64Last];
 
   ArenaVector<LIR*> call_method_insns_;
+
+  int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 094ff51..e7fa8ed 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -790,27 +790,23 @@
   return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].fmt;
 }
 
-RegStorage Arm64Mir2Lir::InToRegStorageArm64Mapper::GetNextReg(bool is_double_or_float,
-                                                               bool is_wide,
-                                                               bool is_ref) {
+RegStorage Arm64Mir2Lir::InToRegStorageArm64Mapper::GetNextReg(ShortyArg arg) {
   const RegStorage coreArgMappingToPhysicalReg[] =
       {rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7};
-  const int coreArgMappingToPhysicalRegSize =
-      sizeof(coreArgMappingToPhysicalReg) / sizeof(RegStorage);
+  const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
   const RegStorage fpArgMappingToPhysicalReg[] =
       {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7};
-  const int fpArgMappingToPhysicalRegSize =
-      sizeof(fpArgMappingToPhysicalReg) / sizeof(RegStorage);
+  const size_t fpArgMappingToPhysicalRegSize = arraysize(fpArgMappingToPhysicalReg);
 
   RegStorage result = RegStorage::InvalidReg();
-  if (is_double_or_float) {
+  if (arg.IsFP()) {
     if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
-      DCHECK(!is_ref);
+      DCHECK(!arg.IsRef());
       result = fpArgMappingToPhysicalReg[cur_fp_reg_++];
       if (result.Valid()) {
         // TODO: switching between widths remains a bit ugly.  Better way?
         int res_reg = result.GetReg();
-        result = is_wide ? RegStorage::FloatSolo64(res_reg) : RegStorage::FloatSolo32(res_reg);
+        result = arg.IsWide() ? RegStorage::FloatSolo64(res_reg) : RegStorage::FloatSolo32(res_reg);
       }
     }
   } else {
@@ -819,388 +815,15 @@
       if (result.Valid()) {
         // TODO: switching between widths remains a bit ugly.  Better way?
         int res_reg = result.GetReg();
-        DCHECK(!(is_wide && is_ref));
-        result = (is_wide || is_ref) ? RegStorage::Solo64(res_reg) : RegStorage::Solo32(res_reg);
+        DCHECK(!(arg.IsWide() && arg.IsRef()));
+        result = (arg.IsWide() || arg.IsRef()) ?
+                 RegStorage::Solo64(res_reg) : RegStorage::Solo32(res_reg);
       }
     }
   }
   return result;
 }
 
-RegStorage Arm64Mir2Lir::InToRegStorageMapping::Get(int in_position) {
-  DCHECK(IsInitialized());
-  auto res = mapping_.find(in_position);
-  return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
-}
-
-void Arm64Mir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count,
-                                                     InToRegStorageMapper* mapper) {
-  DCHECK(mapper != nullptr);
-  max_mapped_in_ = -1;
-  is_there_stack_mapped_ = false;
-  for (int in_position = 0; in_position < count; in_position++) {
-     RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp,
-                                         arg_locs[in_position].wide,
-                                         arg_locs[in_position].ref);
-     if (reg.Valid()) {
-       mapping_[in_position] = reg;
-       if (arg_locs[in_position].wide) {
-         // We covered 2 args, so skip the next one
-         in_position++;
-       }
-       max_mapped_in_ = std::max(max_mapped_in_, in_position);
-     } else {
-       is_there_stack_mapped_ = true;
-     }
-  }
-  initialized_ = true;
-}
-
-
-// Deprecate.  Use the new mechanism.
-// TODO(Arm64): reuse info in QuickArgumentVisitor?
-static RegStorage GetArgPhysicalReg(RegLocation* loc, int* num_gpr_used, int* num_fpr_used,
-                                    OpSize* op_size) {
-  if (loc->fp) {
-    int n = *num_fpr_used;
-    if (n < 8) {
-      *num_fpr_used = n + 1;
-      RegStorage::RegStorageKind reg_kind;
-      if (loc->wide) {
-        *op_size = kDouble;
-        reg_kind = RegStorage::k64BitSolo;
-      } else {
-        *op_size = kSingle;
-        reg_kind = RegStorage::k32BitSolo;
-      }
-      return RegStorage(RegStorage::kValid | reg_kind | RegStorage::kFloatingPoint | n);
-    }
-  } else {
-    int n = *num_gpr_used;
-    if (n < 8) {
-      *num_gpr_used = n + 1;
-      if (loc->wide || loc->ref) {
-        *op_size = k64;
-        return RegStorage::Solo64(n);
-      } else {
-        *op_size = k32;
-        return RegStorage::Solo32(n);
-      }
-    }
-  }
-  *op_size = kWord;
-  return RegStorage::InvalidReg();
-}
-
-RegStorage Arm64Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
-  if (!in_to_reg_storage_mapping_.IsInitialized()) {
-    int start_vreg = mir_graph_->GetFirstInVR();
-    RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg];
-
-    InToRegStorageArm64Mapper mapper;
-    in_to_reg_storage_mapping_.Initialize(arg_locs, mir_graph_->GetNumOfInVRs(), &mapper);
-  }
-  return in_to_reg_storage_mapping_.Get(arg_num);
-}
-
-
-/*
- * If there are any ins passed in registers that have not been promoted
- * to a callee-save register, flush them to the frame.  Perform initial
- * assignment of promoted arguments.
- *
- * ArgLocs is an array of location records describing the incoming arguments
- * with one location record per word of argument.
- */
-void Arm64Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
-  int num_gpr_used = 1;
-  int num_fpr_used = 0;
-
-  /*
-   * Dummy up a RegLocation for the incoming StackReference<mirror::ArtMethod>
-   * It will attempt to keep kArg0 live (or copy it to home location
-   * if promoted).
-   */
-  RegLocation rl_src = rl_method;
-  rl_src.location = kLocPhysReg;
-  rl_src.reg = TargetReg(kArg0, kRef);
-  rl_src.home = false;
-  MarkLive(rl_src);
-  StoreValue(rl_method, rl_src);
-  // If Method* has been promoted, explicitly flush
-  if (rl_method.location == kLocPhysReg) {
-    StoreRefDisp(TargetPtrReg(kSp), 0, rl_src.reg, kNotVolatile);
-  }
-
-  if (mir_graph_->GetNumOfInVRs() == 0) {
-    return;
-  }
-
-  // Handle dalvik registers.
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-  int start_vreg = mir_graph_->GetFirstInVR();
-  for (uint32_t i = 0; i < mir_graph_->GetNumOfInVRs(); i++) {
-    RegLocation* t_loc = &ArgLocs[i];
-    OpSize op_size;
-    RegStorage reg = GetArgPhysicalReg(t_loc, &num_gpr_used, &num_fpr_used, &op_size);
-
-    if (reg.Valid()) {
-      // If arriving in register.
-
-      // We have already updated the arg location with promoted info
-      // so we can be based on it.
-      if (t_loc->location == kLocPhysReg) {
-        // Just copy it.
-        OpRegCopy(t_loc->reg, reg);
-      } else {
-        // Needs flush.
-        if (t_loc->ref) {
-          StoreRefDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), reg, kNotVolatile);
-        } else {
-          StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), reg, t_loc->wide ? k64 : k32,
-              kNotVolatile);
-        }
-      }
-    } else {
-      // If arriving in frame & promoted.
-      if (t_loc->location == kLocPhysReg) {
-        if (t_loc->ref) {
-          LoadRefDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), t_loc->reg, kNotVolatile);
-        } else {
-          LoadBaseDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), t_loc->reg,
-                       t_loc->wide ? k64 : k32, kNotVolatile);
-        }
-      }
-    }
-    if (t_loc->wide) {
-      // Increment i to skip the next one.
-      i++;
-    }
-    //      if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
-    //        OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
-    //      } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
-    //        OpRegCopy(RegStorage::Solo32(v_map->fp_reg), reg);
-    //      } else {
-    //        StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, op_size, kNotVolatile);
-    //        if (reg.Is64Bit()) {
-    //          if (SRegOffset(start_vreg + i) + 4 != SRegOffset(start_vreg + i + 1)) {
-    //            LOG(FATAL) << "64 bit value stored in non-consecutive 4 bytes slots";
-    //          }
-    //          i += 1;
-    //        }
-    //      }
-    //    } else {
-    //      // If arriving in frame & promoted
-    //      if (v_map->core_location == kLocPhysReg) {
-    //        LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i),
-    //                     RegStorage::Solo32(v_map->core_reg));
-    //      }
-    //      if (v_map->fp_location == kLocPhysReg) {
-    //        LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->fp_reg));
-    //      }
-  }
-}
-
-/*
- * Load up to 5 arguments, the first three of which will be in
- * kArg1 .. kArg3.  On entry kArg0 contains the current method pointer,
- * and as part of the load sequence, it must be replaced with
- * the target method pointer.
- */
-int Arm64Mir2Lir::GenDalvikArgsNoRange(CallInfo* info,
-                                       int call_state, LIR** pcrLabel, NextCallInsn next_call_insn,
-                                       const MethodReference& target_method,
-                                       uint32_t vtable_idx, uintptr_t direct_code,
-                                       uintptr_t direct_method, InvokeType type, bool skip_this) {
-  return GenDalvikArgsRange(info,
-                       call_state, pcrLabel, next_call_insn,
-                       target_method,
-                       vtable_idx, direct_code,
-                       direct_method, type, skip_this);
-}
-
-/*
- * May have 0+ arguments (also used for jumbo).  Note that
- * source virtual registers may be in physical registers, so may
- * need to be flushed to home location before copying.  This
- * applies to arg3 and above (see below).
- *
- * FIXME: update comments.
- *
- * Two general strategies:
- *    If < 20 arguments
- *       Pass args 3-18 using vldm/vstm block copy
- *       Pass arg0, arg1 & arg2 in kArg1-kArg3
- *    If 20+ arguments
- *       Pass args arg19+ using memcpy block copy
- *       Pass arg0, arg1 & arg2 in kArg1-kArg3
- *
- */
-int Arm64Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
-                                     LIR** pcrLabel, NextCallInsn next_call_insn,
-                                     const MethodReference& target_method,
-                                     uint32_t vtable_idx, uintptr_t direct_code,
-                                     uintptr_t direct_method, InvokeType type, bool skip_this) {
-  /* If no arguments, just return */
-  if (info->num_arg_words == 0)
-    return call_state;
-
-  const int start_index = skip_this ? 1 : 0;
-
-  InToRegStorageArm64Mapper mapper;
-  InToRegStorageMapping in_to_reg_storage_mapping;
-  in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper);
-  const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn();
-  int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + 1);
-
-  // First of all, check whether it makes sense to use bulk copying.
-  // Bulk copying is done only for the range case.
-  // TODO: make a constant instead of 2
-  if (info->is_range && regs_left_to_pass_via_stack >= 2) {
-    // Scan the rest of the args - if in phys_reg flush to memory
-    for (int next_arg = last_mapped_in + 1; next_arg < info->num_arg_words;) {
-      RegLocation loc = info->args[next_arg];
-      if (loc.wide) {
-        loc = UpdateLocWide(loc);
-        if (loc.location == kLocPhysReg) {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile);
-        }
-        next_arg += 2;
-      } else {
-        loc = UpdateLoc(loc);
-        if (loc.location == kLocPhysReg) {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          if (loc.ref) {
-            StoreRefDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile);
-          } else {
-            StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32,
-                          kNotVolatile);
-          }
-        }
-        next_arg++;
-      }
-    }
-
-    // The rest can be copied together
-    int start_offset = SRegOffset(info->args[last_mapped_in + 1].s_reg_low);
-    int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + 1,
-                                                   cu_->instruction_set);
-
-    int current_src_offset = start_offset;
-    int current_dest_offset = outs_offset;
-
-    // Only davik regs are accessed in this loop; no next_call_insn() calls.
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    while (regs_left_to_pass_via_stack > 0) {
-      /*
-       * TODO: Improve by adding block copy for large number of arguments.  This
-       * should be done, if possible, as a target-depending helper.  For now, just
-       * copy a Dalvik vreg at a time.
-       */
-      // Moving 32-bits via general purpose register.
-      size_t bytes_to_move = sizeof(uint32_t);
-
-      // Instead of allocating a new temp, simply reuse one of the registers being used
-      // for argument passing.
-      RegStorage temp = TargetReg(kArg3, kNotWide);
-
-      // Now load the argument VR and store to the outs.
-      Load32Disp(TargetPtrReg(kSp), current_src_offset, temp);
-      Store32Disp(TargetPtrReg(kSp), current_dest_offset, temp);
-
-      current_src_offset += bytes_to_move;
-      current_dest_offset += bytes_to_move;
-      regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
-    }
-    DCHECK_EQ(regs_left_to_pass_via_stack, 0);
-  }
-
-  // Now handle rest not registers if they are
-  if (in_to_reg_storage_mapping.IsThereStackMapped()) {
-    RegStorage regWide = TargetReg(kArg3, kWide);
-    for (int i = start_index; i <= last_mapped_in + regs_left_to_pass_via_stack; i++) {
-      RegLocation rl_arg = info->args[i];
-      rl_arg = UpdateRawLoc(rl_arg);
-      RegStorage reg = in_to_reg_storage_mapping.Get(i);
-      if (!reg.Valid()) {
-        int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
-
-        {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          if (rl_arg.wide) {
-            if (rl_arg.location == kLocPhysReg) {
-              StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile);
-            } else {
-              LoadValueDirectWideFixed(rl_arg, regWide);
-              StoreBaseDisp(TargetPtrReg(kSp), out_offset, regWide, k64, kNotVolatile);
-            }
-          } else {
-            if (rl_arg.location == kLocPhysReg) {
-              if (rl_arg.ref) {
-                StoreRefDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, kNotVolatile);
-              } else {
-                StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k32, kNotVolatile);
-              }
-            } else {
-              if (rl_arg.ref) {
-                RegStorage regSingle = TargetReg(kArg2, kRef);
-                LoadValueDirectFixed(rl_arg, regSingle);
-                StoreRefDisp(TargetPtrReg(kSp), out_offset, regSingle, kNotVolatile);
-              } else {
-                RegStorage regSingle = TargetReg(kArg2, kNotWide);
-                LoadValueDirectFixed(rl_arg, regSingle);
-                StoreBaseDisp(TargetPtrReg(kSp), out_offset, regSingle, k32, kNotVolatile);
-              }
-            }
-          }
-        }
-        call_state = next_call_insn(cu_, info, call_state, target_method,
-                                    vtable_idx, direct_code, direct_method, type);
-      }
-      if (rl_arg.wide) {
-        i++;
-      }
-    }
-  }
-
-  // Finish with mapped registers
-  for (int i = start_index; i <= last_mapped_in; i++) {
-    RegLocation rl_arg = info->args[i];
-    rl_arg = UpdateRawLoc(rl_arg);
-    RegStorage reg = in_to_reg_storage_mapping.Get(i);
-    if (reg.Valid()) {
-      if (rl_arg.wide) {
-        LoadValueDirectWideFixed(rl_arg, reg);
-      } else {
-        LoadValueDirectFixed(rl_arg, reg);
-      }
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                                  direct_code, direct_method, type);
-    }
-    if (rl_arg.wide) {
-      i++;
-    }
-  }
-
-  call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                           direct_code, direct_method, type);
-  if (pcrLabel) {
-    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
-      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
-    } else {
-      *pcrLabel = nullptr;
-      // In lieu of generating a check for kArg1 being null, we need to
-      // perform a load when doing implicit checks.
-      RegStorage tmp = AllocTemp();
-      Load32Disp(TargetReg(kArg1, kRef), 0, tmp);
-      MarkPossibleNullPointerException(info->opt_flags);
-      FreeTemp(tmp);
-    }
-  }
-  return call_state;
-}
-
 void Arm64Mir2Lir::InstallLiteralPools() {
   // PC-relative calls to methods.
   patches_.reserve(call_method_insns_.size());
@@ -1218,4 +841,12 @@
   Mir2Lir::InstallLiteralPools();
 }
 
+int Arm64Mir2Lir::GenDalvikArgsBulkCopy(CallInfo* /*info*/, int /*first*/, int count) {
+  /*
+   * TODO: Improve by adding block copy for large number of arguments.  For now, just
+   * copy a Dalvik vreg at a time.
+   */
+  return count;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 58bcee2..e1b4546 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -990,7 +990,8 @@
       last_lir_insn_(nullptr),
       slow_paths_(arena->Adapter(kArenaAllocSlowPaths)),
       mem_ref_type_(ResourceMask::kHeapRef),
-      mask_cache_(arena) {
+      mask_cache_(arena),
+      in_to_reg_storage_mapping_(arena) {
   switch_tables_.reserve(4);
   fill_array_data_.reserve(4);
   tempreg_info_.reserve(20);
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 31b81bf..9462d3d 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -401,59 +401,50 @@
    * half to memory as well.
    */
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-  for (uint32_t i = 0; i < mir_graph_->GetNumOfInVRs(); i++) {
-    PromotionMap* v_map = &promotion_map_[start_vreg + i];
+  RegLocation* t_loc = nullptr;
+  for (uint32_t i = 0; i < mir_graph_->GetNumOfInVRs(); i += t_loc->wide ? 2 : 1) {
+    // get reg corresponding to input
     RegStorage reg = GetArgMappingToPhysicalReg(i);
+    t_loc = &ArgLocs[i];
+
+    // If the wide input appeared as single, flush it and go
+    // as it comes from memory.
+    if (t_loc->wide && reg.Valid() && !reg.Is64Bit()) {
+      StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), reg, k32, kNotVolatile);
+      reg = RegStorage::InvalidReg();
+    }
 
     if (reg.Valid()) {
-      // If arriving in register
-      bool need_flush = true;
-      RegLocation* t_loc = &ArgLocs[i];
-      if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
-        OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
-        need_flush = false;
-      } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
-        OpRegCopy(RegStorage::Solo32(v_map->fp_reg), reg);
-        need_flush = false;
-      } else {
-        need_flush = true;
-      }
+      // If arriving in register.
 
-      // For wide args, force flush if not fully promoted
-      if (t_loc->wide) {
-        PromotionMap* p_map = v_map + (t_loc->high_word ? -1 : +1);
-        // Is only half promoted?
-        need_flush |= (p_map->core_location != v_map->core_location) ||
-            (p_map->fp_location != v_map->fp_location);
-        if ((cu_->instruction_set == kThumb2) && t_loc->fp && !need_flush) {
-          /*
-           * In Arm, a double is represented as a pair of consecutive single float
-           * registers starting at an even number.  It's possible that both Dalvik vRegs
-           * representing the incoming double were independently promoted as singles - but
-           * not in a form usable as a double.  If so, we need to flush - even though the
-           * incoming arg appears fully in register.  At this point in the code, both
-           * halves of the double are promoted.  Make sure they are in a usable form.
-           */
-          int lowreg_index = start_vreg + i + (t_loc->high_word ? -1 : 0);
-          int low_reg = promotion_map_[lowreg_index].fp_reg;
-          int high_reg = promotion_map_[lowreg_index + 1].fp_reg;
-          if (((low_reg & 0x1) != 0) || (high_reg != (low_reg + 1))) {
-            need_flush = true;
-          }
+      // We have already updated the arg location with promoted info
+      // so we can be based on it.
+      if (t_loc->location == kLocPhysReg) {
+        // Just copy it.
+        if (t_loc->wide) {
+          OpRegCopyWide(t_loc->reg, reg);
+        } else {
+          OpRegCopy(t_loc->reg, reg);
+        }
+      } else {
+        // Needs flush.
+        int offset = SRegOffset(start_vreg + i);
+        if (t_loc->ref) {
+          StoreRefDisp(TargetPtrReg(kSp), offset, reg, kNotVolatile);
+        } else {
+          StoreBaseDisp(TargetPtrReg(kSp), offset, reg, t_loc->wide ? k64 : k32, kNotVolatile);
         }
       }
-      if (need_flush) {
-        Store32Disp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), reg);
-      }
     } else {
-      // If arriving in frame & promoted
-      if (v_map->core_location == kLocPhysReg) {
-        Load32Disp(TargetPtrReg(kSp), SRegOffset(start_vreg + i),
-                   RegStorage::Solo32(v_map->core_reg));
-      }
-      if (v_map->fp_location == kLocPhysReg) {
-        Load32Disp(TargetPtrReg(kSp), SRegOffset(start_vreg + i),
-                   RegStorage::Solo32(v_map->fp_reg));
+      // If arriving in frame & promoted.
+      if (t_loc->location == kLocPhysReg) {
+        int offset = SRegOffset(start_vreg + i);
+        if (t_loc->ref) {
+          LoadRefDisp(TargetPtrReg(kSp), offset, t_loc->reg, kNotVolatile);
+        } else {
+          LoadBaseDisp(TargetPtrReg(kSp), offset, t_loc->reg, t_loc->wide ? k64 : k32,
+                       kNotVolatile);
+        }
       }
     }
   }
@@ -568,7 +559,7 @@
  * emit the next instruction in a virtual invoke sequence.
  * We can use kLr as a temp prior to target address loading
  * Note also that we'll load the first argument ("this") into
- * kArg1 here rather than the standard LoadArgRegs.
+ * kArg1 here rather than the standard GenDalvikArgs.
  */
 static int NextVCallInsn(CompilationUnit* cu, CallInfo* info,
                          int state, const MethodReference& target_method,
@@ -612,7 +603,7 @@
  * Emit the next instruction in an invoke interface sequence. This will do a lookup in the
  * class's IMT, calling either the actual method or art_quick_imt_conflict_trampoline if
  * more than one interface method map to the same index. Note also that we'll load the first
- * argument ("this") into kArg1 here rather than the standard LoadArgRegs.
+ * argument ("this") into kArg1 here rather than the standard GenDalvikArgs.
  */
 static int NextInterfaceCallInsn(CompilationUnit* cu, CallInfo* info, int state,
                                  const MethodReference& target_method,
@@ -719,158 +710,6 @@
                           target_method, 0);
 }
 
-int Mir2Lir::LoadArgRegs(CallInfo* info, int call_state,
-                         NextCallInsn next_call_insn,
-                         const MethodReference& target_method,
-                         uint32_t vtable_idx, uintptr_t direct_code,
-                         uintptr_t direct_method, InvokeType type, bool skip_this) {
-  int last_arg_reg = 3 - 1;
-  int arg_regs[3] = {TargetReg(kArg1, kNotWide).GetReg(), TargetReg(kArg2, kNotWide).GetReg(),
-                     TargetReg(kArg3, kNotWide).GetReg()};
-
-  int next_reg = 0;
-  int next_arg = 0;
-  if (skip_this) {
-    next_reg++;
-    next_arg++;
-  }
-  for (; (next_reg <= last_arg_reg) && (next_arg < info->num_arg_words); next_reg++) {
-    RegLocation rl_arg = info->args[next_arg++];
-    rl_arg = UpdateRawLoc(rl_arg);
-    if (rl_arg.wide && (next_reg <= last_arg_reg - 1)) {
-      RegStorage r_tmp(RegStorage::k64BitPair, arg_regs[next_reg], arg_regs[next_reg + 1]);
-      LoadValueDirectWideFixed(rl_arg, r_tmp);
-      next_reg++;
-      next_arg++;
-    } else {
-      if (rl_arg.wide) {
-        rl_arg = NarrowRegLoc(rl_arg);
-        rl_arg.is_const = false;
-      }
-      LoadValueDirectFixed(rl_arg, RegStorage::Solo32(arg_regs[next_reg]));
-    }
-    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                                direct_code, direct_method, type);
-  }
-  return call_state;
-}
-
-/*
- * Load up to 5 arguments, the first three of which will be in
- * kArg1 .. kArg3.  On entry kArg0 contains the current method pointer,
- * and as part of the load sequence, it must be replaced with
- * the target method pointer.  Note, this may also be called
- * for "range" variants if the number of arguments is 5 or fewer.
- */
-int Mir2Lir::GenDalvikArgsNoRange(CallInfo* info,
-                                  int call_state, LIR** pcrLabel, NextCallInsn next_call_insn,
-                                  const MethodReference& target_method,
-                                  uint32_t vtable_idx, uintptr_t direct_code,
-                                  uintptr_t direct_method, InvokeType type, bool skip_this) {
-  RegLocation rl_arg;
-
-  /* If no arguments, just return */
-  if (info->num_arg_words == 0)
-    return call_state;
-
-  call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                              direct_code, direct_method, type);
-
-  DCHECK_LE(info->num_arg_words, 5);
-  if (info->num_arg_words > 3) {
-    int32_t next_use = 3;
-    // Detect special case of wide arg spanning arg3/arg4
-    RegLocation rl_use0 = info->args[0];
-    RegLocation rl_use1 = info->args[1];
-    RegLocation rl_use2 = info->args[2];
-    if (((!rl_use0.wide && !rl_use1.wide) || rl_use0.wide) && rl_use2.wide) {
-      RegStorage reg;
-      // Wide spans, we need the 2nd half of uses[2].
-      rl_arg = UpdateLocWide(rl_use2);
-      if (rl_arg.location == kLocPhysReg) {
-        if (rl_arg.reg.IsPair()) {
-          reg = rl_arg.reg.GetHigh();
-        } else {
-          RegisterInfo* reg_info = GetRegInfo(rl_arg.reg);
-          reg_info = reg_info->FindMatchingView(RegisterInfo::kHighSingleStorageMask);
-          if (reg_info == nullptr) {
-            // NOTE: For hard float convention we won't split arguments across reg/mem.
-            UNIMPLEMENTED(FATAL) << "Needs hard float api.";
-          }
-          reg = reg_info->GetReg();
-        }
-      } else {
-        // kArg2 & rArg3 can safely be used here
-        reg = TargetReg(kArg3, kNotWide);
-        {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          Load32Disp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low) + 4, reg);
-        }
-        call_state = next_call_insn(cu_, info, call_state, target_method,
-                                    vtable_idx, direct_code, direct_method, type);
-      }
-      {
-        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-        Store32Disp(TargetPtrReg(kSp), (next_use + 1) * 4, reg);
-      }
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                                  direct_code, direct_method, type);
-      next_use++;
-    }
-    // Loop through the rest
-    while (next_use < info->num_arg_words) {
-      RegStorage arg_reg;
-      rl_arg = info->args[next_use];
-      rl_arg = UpdateRawLoc(rl_arg);
-      if (rl_arg.location == kLocPhysReg) {
-        arg_reg = rl_arg.reg;
-      } else {
-        arg_reg = TargetReg(kArg2, rl_arg.wide ? kWide : kNotWide);
-        if (rl_arg.wide) {
-          LoadValueDirectWideFixed(rl_arg, arg_reg);
-        } else {
-          LoadValueDirectFixed(rl_arg, arg_reg);
-        }
-        call_state = next_call_insn(cu_, info, call_state, target_method,
-                                    vtable_idx, direct_code, direct_method, type);
-      }
-      int outs_offset = (next_use + 1) * 4;
-      {
-        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-        if (rl_arg.wide) {
-          StoreBaseDisp(TargetPtrReg(kSp), outs_offset, arg_reg, k64, kNotVolatile);
-          next_use += 2;
-        } else {
-          Store32Disp(TargetPtrReg(kSp), outs_offset, arg_reg);
-          next_use++;
-        }
-      }
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                               direct_code, direct_method, type);
-    }
-  }
-
-  call_state = LoadArgRegs(info, call_state, next_call_insn,
-                           target_method, vtable_idx, direct_code, direct_method,
-                           type, skip_this);
-
-  if (pcrLabel) {
-    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
-      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
-    } else {
-      *pcrLabel = nullptr;
-      if (!(cu_->disable_opt & (1 << kNullCheckElimination)) &&
-          (info->opt_flags & MIR_IGNORE_NULL_CHECK)) {
-        return call_state;
-      }
-      // In lieu of generating a check for kArg1 being null, we need to
-      // perform a load when doing implicit checks.
-      GenImplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
-    }
-  }
-  return call_state;
-}
-
 // Default implementation of implicit null pointer check.
 // Overridden by arch specific as necessary.
 void Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) {
@@ -883,209 +722,194 @@
   FreeTemp(tmp);
 }
 
-
-/*
- * May have 0+ arguments (also used for jumbo).  Note that
- * source virtual registers may be in physical registers, so may
- * need to be flushed to home location before copying.  This
- * applies to arg3 and above (see below).
- *
- * Two general strategies:
- *    If < 20 arguments
- *       Pass args 3-18 using vldm/vstm block copy
- *       Pass arg0, arg1 & arg2 in kArg1-kArg3
- *    If 20+ arguments
- *       Pass args arg19+ using memcpy block copy
- *       Pass arg0, arg1 & arg2 in kArg1-kArg3
- *
+/**
+ * @brief Used to flush promoted registers if they are used as argument
+ * in an invocation.
+ * @param info the infromation about arguments for invocation.
+ * @param start the first argument we should start to look from.
  */
-int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
-                                LIR** pcrLabel, NextCallInsn next_call_insn,
-                                const MethodReference& target_method,
-                                uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method,
-                                InvokeType type, bool skip_this) {
-  // If we can treat it as non-range (Jumbo ops will use range form)
-  if (info->num_arg_words <= 5)
-    return GenDalvikArgsNoRange(info, call_state, pcrLabel,
-                                next_call_insn, target_method, vtable_idx,
-                                direct_code, direct_method, type, skip_this);
-  /*
-   * First load the non-register arguments.  Both forms expect all
-   * of the source arguments to be in their home frame location, so
-   * scan the s_reg names and flush any that have been promoted to
-   * frame backing storage.
-   */
+void Mir2Lir::GenDalvikArgsFlushPromoted(CallInfo* info, int start) {
+  if (cu_->disable_opt & (1 << kPromoteRegs)) {
+    // This make sense only if promotion is enabled.
+    return;
+  }
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   // Scan the rest of the args - if in phys_reg flush to memory
-  for (int next_arg = 0; next_arg < info->num_arg_words;) {
+  for (int next_arg = start; next_arg < info->num_arg_words;) {
     RegLocation loc = info->args[next_arg];
     if (loc.wide) {
       loc = UpdateLocWide(loc);
-      if ((next_arg >= 2) && (loc.location == kLocPhysReg)) {
-        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+      if (loc.location == kLocPhysReg) {
         StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile);
       }
       next_arg += 2;
     } else {
       loc = UpdateLoc(loc);
-      if ((next_arg >= 3) && (loc.location == kLocPhysReg)) {
-        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-        Store32Disp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg);
+      if (loc.location == kLocPhysReg) {
+        if (loc.ref) {
+          StoreRefDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile);
+        } else {
+          StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32,
+                        kNotVolatile);
+        }
       }
       next_arg++;
     }
   }
+}
 
-  // The first 3 arguments are passed via registers.
-  // TODO: For 64-bit, instead of hardcoding 4 for Method* size, we should either
-  // get size of uintptr_t or size of object reference according to model being used.
-  int outs_offset = 4 /* Method* */ + (3 * sizeof(uint32_t));
-  int start_offset = SRegOffset(info->args[3].s_reg_low);
-  int regs_left_to_pass_via_stack = info->num_arg_words - 3;
-  DCHECK_GT(regs_left_to_pass_via_stack, 0);
+/**
+ * @brief Used to optimize the copying of VRs which are arguments of invocation.
+ * Please note that you should flush promoted registers first if you copy.
+ * If implementation does copying it may skip several of the first VRs but must copy
+ * till the end. Implementation must return the number of skipped VRs
+ * (it might be all VRs).
+ * @see GenDalvikArgsFlushPromoted
+ * @param info the information about arguments for invocation.
+ * @param first the first argument we should start to look from.
+ * @param count the number of remaining arguments we can handle.
+ * @return the number of arguments which we did not handle. Unhandled arguments
+ * must be attached to the first one.
+ */
+int Mir2Lir::GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) {
+  // call is pretty expensive, let's use it if count is big.
+  if (count > 16) {
+    GenDalvikArgsFlushPromoted(info, first);
+    int start_offset = SRegOffset(info->args[first].s_reg_low);
+    int outs_offset = StackVisitor::GetOutVROffset(first, cu_->instruction_set);
 
-  if (cu_->instruction_set == kThumb2 && regs_left_to_pass_via_stack <= 16) {
-    // Use vldm/vstm pair using kArg3 as a temp
-    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                             direct_code, direct_method, type);
-    OpRegRegImm(kOpAdd, TargetReg(kArg3, kRef), TargetPtrReg(kSp), start_offset);
-    LIR* ld = nullptr;
-    {
-      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-      ld = OpVldm(TargetReg(kArg3, kRef), regs_left_to_pass_via_stack);
-    }
-    // TUNING: loosen barrier
-    ld->u.m.def_mask = &kEncodeAll;
-    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                             direct_code, direct_method, type);
-    OpRegRegImm(kOpAdd, TargetReg(kArg3, kRef), TargetPtrReg(kSp), 4 /* Method* */ + (3 * 4));
-    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                             direct_code, direct_method, type);
-    LIR* st = nullptr;
-    {
-      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-      st = OpVstm(TargetReg(kArg3, kRef), regs_left_to_pass_via_stack);
-    }
-    st->u.m.def_mask = &kEncodeAll;
-    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                             direct_code, direct_method, type);
-  } else if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
-    int current_src_offset = start_offset;
-    int current_dest_offset = outs_offset;
-
-    // Only davik regs are accessed in this loop; no next_call_insn() calls.
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    while (regs_left_to_pass_via_stack > 0) {
-      // This is based on the knowledge that the stack itself is 16-byte aligned.
-      bool src_is_16b_aligned = (current_src_offset & 0xF) == 0;
-      bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0;
-      size_t bytes_to_move;
-
-      /*
-       * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a
-       * a 128-bit move because we won't get the chance to try to aligned. If there are more than
-       * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned.
-       * We do this because we could potentially do a smaller move to align.
-       */
-      if (regs_left_to_pass_via_stack == 4 ||
-          (regs_left_to_pass_via_stack > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) {
-        // Moving 128-bits via xmm register.
-        bytes_to_move = sizeof(uint32_t) * 4;
-
-        // Allocate a free xmm temp. Since we are working through the calling sequence,
-        // we expect to have an xmm temporary available.  AllocTempDouble will abort if
-        // there are no free registers.
-        RegStorage temp = AllocTempDouble();
-
-        LIR* ld1 = nullptr;
-        LIR* ld2 = nullptr;
-        LIR* st1 = nullptr;
-        LIR* st2 = nullptr;
-
-        /*
-         * The logic is similar for both loads and stores. If we have 16-byte alignment,
-         * do an aligned move. If we have 8-byte alignment, then do the move in two
-         * parts. This approach prevents possible cache line splits. Finally, fall back
-         * to doing an unaligned move. In most cases we likely won't split the cache
-         * line but we cannot prove it and thus take a conservative approach.
-         */
-        bool src_is_8b_aligned = (current_src_offset & 0x7) == 0;
-        bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0;
-
-        if (src_is_16b_aligned) {
-          ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovA128FP);
-        } else if (src_is_8b_aligned) {
-          ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovLo128FP);
-          ld2 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset + (bytes_to_move >> 1),
-                            kMovHi128FP);
-        } else {
-          ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovU128FP);
-        }
-
-        if (dest_is_16b_aligned) {
-          st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovA128FP);
-        } else if (dest_is_8b_aligned) {
-          st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovLo128FP);
-          st2 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset + (bytes_to_move >> 1),
-                            temp, kMovHi128FP);
-        } else {
-          st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovU128FP);
-        }
-
-        // TODO If we could keep track of aliasing information for memory accesses that are wider
-        // than 64-bit, we wouldn't need to set up a barrier.
-        if (ld1 != nullptr) {
-          if (ld2 != nullptr) {
-            // For 64-bit load we can actually set up the aliasing information.
-            AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true);
-            AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true,
-                                    true);
-          } else {
-            // Set barrier for 128-bit load.
-            ld1->u.m.def_mask = &kEncodeAll;
-          }
-        }
-        if (st1 != nullptr) {
-          if (st2 != nullptr) {
-            // For 64-bit store we can actually set up the aliasing information.
-            AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true);
-            AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false,
-                                    true);
-          } else {
-            // Set barrier for 128-bit store.
-            st1->u.m.def_mask = &kEncodeAll;
-          }
-        }
-
-        // Free the temporary used for the data movement.
-        FreeTemp(temp);
-      } else {
-        // Moving 32-bits via general purpose register.
-        bytes_to_move = sizeof(uint32_t);
-
-        // Instead of allocating a new temp, simply reuse one of the registers being used
-        // for argument passing.
-        RegStorage temp = TargetReg(kArg3, kNotWide);
-
-        // Now load the argument VR and store to the outs.
-        Load32Disp(TargetPtrReg(kSp), current_src_offset, temp);
-        Store32Disp(TargetPtrReg(kSp), current_dest_offset, temp);
-      }
-
-      current_src_offset += bytes_to_move;
-      current_dest_offset += bytes_to_move;
-      regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
-    }
-  } else {
-    // Generate memcpy
     OpRegRegImm(kOpAdd, TargetReg(kArg0, kRef), TargetPtrReg(kSp), outs_offset);
     OpRegRegImm(kOpAdd, TargetReg(kArg1, kRef), TargetPtrReg(kSp), start_offset);
     CallRuntimeHelperRegRegImm(kQuickMemcpy, TargetReg(kArg0, kRef), TargetReg(kArg1, kRef),
-                               (info->num_arg_words - 3) * 4, false);
+                               count * 4, false);
+    count = 0;
+  }
+  return count;
+}
+
+int Mir2Lir::GenDalvikArgs(CallInfo* info, int call_state,
+                           LIR** pcrLabel, NextCallInsn next_call_insn,
+                           const MethodReference& target_method,
+                           uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method,
+                           InvokeType type, bool skip_this) {
+  // If no arguments, just return.
+  if (info->num_arg_words == 0)
+    return call_state;
+
+  const int start_index = skip_this ? 1 : 0;
+
+  // Get architecture dependent mapping between output VRs and physical registers
+  // basing on shorty of method to call.
+  InToRegStorageMapping in_to_reg_storage_mapping(arena_);
+  {
+    const char* target_shorty = mir_graph_->GetShortyFromMethodReference(target_method);
+    ShortyIterator shorty_iterator(target_shorty, type == kStatic);
+    in_to_reg_storage_mapping.Initialize(&shorty_iterator, GetResetedInToRegStorageMapper());
   }
 
-  call_state = LoadArgRegs(info, call_state, next_call_insn,
-                           target_method, vtable_idx, direct_code, direct_method,
-                           type, skip_this);
+  int stack_map_start = std::max(in_to_reg_storage_mapping.GetMaxMappedIn() + 1, start_index);
+  if ((stack_map_start < info->num_arg_words) && info->args[stack_map_start].high_word) {
+    // It is possible that the last mapped reg is 32 bit while arg is 64-bit.
+    // It will be handled together with low part mapped to register.
+    stack_map_start++;
+  }
+  int regs_left_to_pass_via_stack = info->num_arg_words - stack_map_start;
+
+  // If it is a range case we can try to copy remaining VRs (not mapped to physical registers)
+  // using more optimal algorithm.
+  if (info->is_range && regs_left_to_pass_via_stack > 1) {
+    regs_left_to_pass_via_stack = GenDalvikArgsBulkCopy(info, stack_map_start,
+                                                        regs_left_to_pass_via_stack);
+  }
+
+  // Now handle any remaining VRs mapped to stack.
+  if (in_to_reg_storage_mapping.HasArgumentsOnStack()) {
+    // Two temps but do not use kArg1, it might be this which we can skip.
+    // Separate single and wide - it can give some advantage.
+    RegStorage regRef = TargetReg(kArg3, kRef);
+    RegStorage regSingle = TargetReg(kArg3, kNotWide);
+    RegStorage regWide = TargetReg(kArg2, kWide);
+    for (int i = start_index;
+         i < stack_map_start + regs_left_to_pass_via_stack; i++) {
+      RegLocation rl_arg = info->args[i];
+      rl_arg = UpdateRawLoc(rl_arg);
+      RegStorage reg = in_to_reg_storage_mapping.Get(i);
+      if (!reg.Valid()) {
+        int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
+        {
+          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+          if (rl_arg.wide) {
+            if (rl_arg.location == kLocPhysReg) {
+              StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile);
+            } else {
+              LoadValueDirectWideFixed(rl_arg, regWide);
+              StoreBaseDisp(TargetPtrReg(kSp), out_offset, regWide, k64, kNotVolatile);
+            }
+          } else {
+            if (rl_arg.location == kLocPhysReg) {
+              if (rl_arg.ref) {
+                StoreRefDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, kNotVolatile);
+              } else {
+                StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k32, kNotVolatile);
+              }
+            } else {
+              if (rl_arg.ref) {
+                LoadValueDirectFixed(rl_arg, regRef);
+                StoreRefDisp(TargetPtrReg(kSp), out_offset, regRef, kNotVolatile);
+              } else {
+                LoadValueDirectFixed(rl_arg, regSingle);
+                StoreBaseDisp(TargetPtrReg(kSp), out_offset, regSingle, k32, kNotVolatile);
+              }
+            }
+          }
+        }
+        call_state = next_call_insn(cu_, info, call_state, target_method,
+                                    vtable_idx, direct_code, direct_method, type);
+      }
+      if (rl_arg.wide) {
+        i++;
+      }
+    }
+  }
+
+  // Finish with VRs mapped to physical registers.
+  for (int i = start_index; i < stack_map_start; i++) {
+    RegLocation rl_arg = info->args[i];
+    rl_arg = UpdateRawLoc(rl_arg);
+    RegStorage reg = in_to_reg_storage_mapping.Get(i);
+    if (reg.Valid()) {
+      if (rl_arg.wide) {
+        // if reg is not 64-bit (it is half of 64-bit) then handle it separately.
+        if (!reg.Is64Bit()) {
+          // TODO: REVISIT: This adds a spill of low part while we could just copy it.
+          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+          if (rl_arg.location == kLocPhysReg) {
+            int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
+            // Dump it to memory and then load only low part
+            StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile);
+            LoadBaseDisp(TargetPtrReg(kSp), out_offset, reg, k32, kNotVolatile);
+          } else {
+            int out_offset = StackVisitor::GetOutVROffset(i + 1, cu_->instruction_set);
+            // First, use target reg for high part.
+            LoadBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low + 1), reg, k32,
+                         kNotVolatile);
+            StoreBaseDisp(TargetPtrReg(kSp), out_offset, reg, k32, kNotVolatile);
+            // Now load target reg with low part.
+            LoadBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low), reg, k32, kNotVolatile);
+          }
+        } else {
+          LoadValueDirectWideFixed(rl_arg, reg);
+        }
+      } else {
+        LoadValueDirectFixed(rl_arg, reg);
+      }
+      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                               direct_code, direct_method, type);
+    }
+    if (rl_arg.wide) {
+      i++;
+    }
+  }
 
   call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                            direct_code, direct_method, type);
@@ -1094,18 +918,20 @@
       *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
-      if (!(cu_->disable_opt & (1 << kNullCheckElimination)) &&
-          (info->opt_flags & MIR_IGNORE_NULL_CHECK)) {
-        return call_state;
-      }
-      // In lieu of generating a check for kArg1 being null, we need to
-      // perform a load when doing implicit checks.
       GenImplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
     }
   }
   return call_state;
 }
 
+RegStorage Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
+  if (!in_to_reg_storage_mapping_.IsInitialized()) {
+    ShortyIterator shorty_iterator(cu_->shorty, cu_->invoke_type == kStatic);
+    in_to_reg_storage_mapping_.Initialize(&shorty_iterator, GetResetedInToRegStorageMapper());
+  }
+  return in_to_reg_storage_mapping_.Get(arg_num);
+}
+
 RegLocation Mir2Lir::InlineTarget(CallInfo* info) {
   RegLocation res;
   if (info->result.location == kLocInvalid) {
@@ -1719,17 +1545,10 @@
     skip_this = fast_path;
   }
   MethodReference target_method = method_info.GetTargetMethod();
-  if (!info->is_range) {
-    call_state = GenDalvikArgsNoRange(info, call_state, p_null_ck,
-                                      next_call_insn, target_method, method_info.VTableIndex(),
-                                      method_info.DirectCode(), method_info.DirectMethod(),
-                                      original_type, skip_this);
-  } else {
-    call_state = GenDalvikArgsRange(info, call_state, p_null_ck,
-                                    next_call_insn, target_method, method_info.VTableIndex(),
-                                    method_info.DirectCode(), method_info.DirectMethod(),
-                                    original_type, skip_this);
-  }
+  call_state = GenDalvikArgs(info, call_state, p_null_ck,
+                             next_call_insn, target_method, method_info.VTableIndex(),
+                             method_info.DirectCode(), method_info.DirectMethod(),
+                             original_type, skip_this);
   // Finish up any of the call sequence not interleaved in arg loading
   while (call_state >= 0) {
     call_state = next_call_insn(cu_, info, call_state, target_method, method_info.VTableIndex(),
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index e08846c..8f976df 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -24,6 +24,26 @@
 namespace art {
 
 class MipsMir2Lir FINAL : public Mir2Lir {
+ protected:
+  class InToRegStorageMipsMapper : public InToRegStorageMapper {
+   public:
+    explicit InToRegStorageMipsMapper(Mir2Lir* m2l) : m2l_(m2l), cur_core_reg_(0) {}
+    virtual RegStorage GetNextReg(ShortyArg arg);
+    virtual void Reset() OVERRIDE {
+      cur_core_reg_ = 0;
+    }
+   protected:
+    Mir2Lir* m2l_;
+   private:
+    size_t cur_core_reg_;
+  };
+
+  InToRegStorageMipsMapper in_to_reg_storage_mips_mapper_;
+  InToRegStorageMapper* GetResetedInToRegStorageMapper() OVERRIDE {
+    in_to_reg_storage_mips_mapper_.Reset();
+    return &in_to_reg_storage_mips_mapper_;
+  }
+
   public:
     MipsMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
 
@@ -56,7 +76,6 @@
     // Required for target - register utilities.
     RegStorage Solo64ToPair64(RegStorage reg);
     RegStorage TargetReg(SpecialTargetRegister reg);
-    RegStorage GetArgMappingToPhysicalReg(int arg_num);
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 185112d..efa130c 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -122,18 +122,20 @@
   return res_reg;
 }
 
-RegStorage MipsMir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
-  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
-  switch (arg_num) {
-    case 0:
-      return rs_rMIPS_ARG1;
-    case 1:
-      return rs_rMIPS_ARG2;
-    case 2:
-      return rs_rMIPS_ARG3;
-    default:
-      return RegStorage::InvalidReg();
+RegStorage MipsMir2Lir::InToRegStorageMipsMapper::GetNextReg(ShortyArg arg) {
+  const SpecialTargetRegister coreArgMappingToPhysicalReg[] = {kArg1, kArg2, kArg3};
+  const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
+
+  RegStorage result = RegStorage::InvalidReg();
+  if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+    result = m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++],
+                             arg.IsRef() ? kRef : kNotWide);
+    if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+      result = RegStorage::MakeRegPair(
+          result, m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], kNotWide));
+    }
   }
+  return result;
 }
 
 /*
@@ -602,7 +604,7 @@
 }
 
 MipsMir2Lir::MipsMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
-    : Mir2Lir(cu, mir_graph, arena) {
+    : Mir2Lir(cu, mir_graph, arena), in_to_reg_storage_mips_mapper_(this) {
   for (int i = 0; i < kMipsLast; i++) {
     DCHECK_EQ(MipsMir2Lir::EncodingMap[i].opcode, i)
         << "Encoding order for " << MipsMir2Lir::EncodingMap[i].name
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
index 0aefc2d..144790e 100644
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ b/compiler/dex/quick/mir_to_lir-inl.h
@@ -276,6 +276,24 @@
   }
 }
 
+inline Mir2Lir::ShortyIterator::ShortyIterator(const char* shorty, bool is_static)
+    : cur_(shorty + 1), pending_this_(!is_static), initialized_(false) {
+  DCHECK(shorty != nullptr);
+  DCHECK_NE(*shorty, 0);
+}
+
+inline bool Mir2Lir::ShortyIterator::Next() {
+  if (!initialized_) {
+    initialized_ = true;
+  } else if (pending_this_) {
+    pending_this_ = false;
+  } else if (*cur_ != 0) {
+    cur_++;
+  }
+
+  return *cur_ != 0 || pending_this_;
+}
+
 }  // namespace art
 
 #endif  // ART_COMPILER_DEX_QUICK_MIR_TO_LIR_INL_H_
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 320c0f4..bd88091 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -53,20 +53,14 @@
   return res;
 }
 
-void Mir2Lir::LockArg(int in_position, bool wide) {
-  RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
-  RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) :
-      RegStorage::InvalidReg();
+void Mir2Lir::LockArg(int in_position, bool) {
+  RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
 
-  if (reg_arg_low.Valid()) {
-    LockTemp(reg_arg_low);
-  }
-  if (reg_arg_high.Valid() && reg_arg_low.NotExactlyEquals(reg_arg_high)) {
-    LockTemp(reg_arg_high);
+  if (reg_arg.Valid()) {
+    LockTemp(reg_arg);
   }
 }
 
-// TODO: simplify when 32-bit targets go hard-float.
 RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) {
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
@@ -87,81 +81,38 @@
     offset += sizeof(uint64_t);
   }
 
-  if (cu_->target64) {
-    RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
-    if (!reg_arg.Valid()) {
-      RegStorage new_reg =
-          wide ?  AllocTypedTempWide(false, reg_class) : AllocTypedTemp(false, reg_class);
-      LoadBaseDisp(TargetPtrReg(kSp), offset, new_reg, wide ? k64 : k32, kNotVolatile);
-      return new_reg;
-    } else {
-      // Check if we need to copy the arg to a different reg_class.
-      if (!RegClassMatches(reg_class, reg_arg)) {
-        if (wide) {
-          RegStorage new_reg = AllocTypedTempWide(false, reg_class);
-          OpRegCopyWide(new_reg, reg_arg);
-          reg_arg = new_reg;
-        } else {
-          RegStorage new_reg = AllocTypedTemp(false, reg_class);
-          OpRegCopy(new_reg, reg_arg);
-          reg_arg = new_reg;
-        }
+  RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
+
+  // TODO: REVISIT: This adds a spill of low part while we could just copy it.
+  if (reg_arg.Valid() && wide && (reg_arg.GetWideKind() == kNotWide)) {
+    // For wide register we've got only half of it.
+    // Flush it to memory then.
+    StoreBaseDisp(TargetPtrReg(kSp), offset, reg_arg, k32, kNotVolatile);
+    reg_arg = RegStorage::InvalidReg();
+  }
+
+  if (!reg_arg.Valid()) {
+    reg_arg = wide ?  AllocTypedTempWide(false, reg_class) : AllocTypedTemp(false, reg_class);
+    LoadBaseDisp(TargetPtrReg(kSp), offset, reg_arg, wide ? k64 : k32, kNotVolatile);
+  } else {
+    // Check if we need to copy the arg to a different reg_class.
+    if (!RegClassMatches(reg_class, reg_arg)) {
+      if (wide) {
+        RegStorage new_reg = AllocTypedTempWide(false, reg_class);
+        OpRegCopyWide(new_reg, reg_arg);
+        reg_arg = new_reg;
+      } else {
+        RegStorage new_reg = AllocTypedTemp(false, reg_class);
+        OpRegCopy(new_reg, reg_arg);
+        reg_arg = new_reg;
       }
     }
-    return reg_arg;
-  }
-
-  RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
-  RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) :
-      RegStorage::InvalidReg();
-
-  // If the VR is wide and there is no register for high part, we need to load it.
-  if (wide && !reg_arg_high.Valid()) {
-    // If the low part is not in a reg, we allocate a pair. Otherwise, we just load to high reg.
-    if (!reg_arg_low.Valid()) {
-      RegStorage new_regs = AllocTypedTempWide(false, reg_class);
-      LoadBaseDisp(TargetPtrReg(kSp), offset, new_regs, k64, kNotVolatile);
-      return new_regs;  // The reg_class is OK, we can return.
-    } else {
-      // Assume that no ABI allows splitting a wide fp reg between a narrow fp reg and memory,
-      // i.e. the low part is in a core reg. Load the second part in a core reg as well for now.
-      DCHECK(!reg_arg_low.IsFloat());
-      reg_arg_high = AllocTemp();
-      int offset_high = offset + sizeof(uint32_t);
-      Load32Disp(TargetPtrReg(kSp), offset_high, reg_arg_high);
-      // Continue below to check the reg_class.
-    }
-  }
-
-  // If the low part is not in a register yet, we need to load it.
-  if (!reg_arg_low.Valid()) {
-    // Assume that if the low part of a wide arg is passed in memory, so is the high part,
-    // thus we don't get here for wide args as it's handled above. Big-endian ABIs could
-    // conceivably break this assumption but Android supports only little-endian architectures.
-    DCHECK(!wide);
-    reg_arg_low = AllocTypedTemp(false, reg_class);
-    Load32Disp(TargetPtrReg(kSp), offset, reg_arg_low);
-    return reg_arg_low;  // The reg_class is OK, we can return.
-  }
-
-  RegStorage reg_arg = wide ? RegStorage::MakeRegPair(reg_arg_low, reg_arg_high) : reg_arg_low;
-  // Check if we need to copy the arg to a different reg_class.
-  if (!RegClassMatches(reg_class, reg_arg)) {
-    if (wide) {
-      RegStorage new_regs = AllocTypedTempWide(false, reg_class);
-      OpRegCopyWide(new_regs, reg_arg);
-      reg_arg = new_regs;
-    } else {
-      RegStorage new_reg = AllocTypedTemp(false, reg_class);
-      OpRegCopy(new_reg, reg_arg);
-      reg_arg = new_reg;
-    }
   }
   return reg_arg;
 }
 
-// TODO: simpilfy when 32-bit targets go hard float.
 void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
+  DCHECK_EQ(rl_dest.location, kLocPhysReg);
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
   if (cu_->instruction_set == kX86) {
@@ -180,48 +131,23 @@
     offset += sizeof(uint64_t);
   }
 
-  if (!rl_dest.wide) {
-    RegStorage reg = GetArgMappingToPhysicalReg(in_position);
-    if (reg.Valid()) {
-      OpRegCopy(rl_dest.reg, reg);
-    } else {
-      Load32Disp(TargetPtrReg(kSp), offset, rl_dest.reg);
-    }
+  RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
+
+  // TODO: REVISIT: This adds a spill of low part while we could just copy it.
+  if (reg_arg.Valid() && rl_dest.wide && (reg_arg.GetWideKind() == kNotWide)) {
+    // For wide register we've got only half of it.
+    // Flush it to memory then.
+    StoreBaseDisp(TargetPtrReg(kSp), offset, reg_arg, k32, kNotVolatile);
+    reg_arg = RegStorage::InvalidReg();
+  }
+
+  if (!reg_arg.Valid()) {
+    LoadBaseDisp(TargetPtrReg(kSp), offset, rl_dest.reg, rl_dest.wide ? k64 : k32, kNotVolatile);
   } else {
-    if (cu_->target64) {
-      RegStorage reg = GetArgMappingToPhysicalReg(in_position);
-      if (reg.Valid()) {
-        OpRegCopy(rl_dest.reg, reg);
-      } else {
-        LoadBaseDisp(TargetPtrReg(kSp), offset, rl_dest.reg, k64, kNotVolatile);
-      }
-      return;
-    }
-
-    RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
-    RegStorage reg_arg_high = GetArgMappingToPhysicalReg(in_position + 1);
-
-    if (cu_->instruction_set == kX86) {
-      // Can't handle double split between reg & memory.  Flush reg half to memory.
-      if (rl_dest.reg.IsDouble() && (reg_arg_low.Valid() != reg_arg_high.Valid())) {
-        DCHECK(reg_arg_low.Valid());
-        DCHECK(!reg_arg_high.Valid());
-        Store32Disp(TargetPtrReg(kSp), offset, reg_arg_low);
-        reg_arg_low = RegStorage::InvalidReg();
-      }
-    }
-
-    if (reg_arg_low.Valid() && reg_arg_high.Valid()) {
-      OpRegCopyWide(rl_dest.reg, RegStorage::MakeRegPair(reg_arg_low, reg_arg_high));
-    } else if (reg_arg_low.Valid() && !reg_arg_high.Valid()) {
-      OpRegCopy(rl_dest.reg, reg_arg_low);
-      int offset_high = offset + sizeof(uint32_t);
-      Load32Disp(TargetPtrReg(kSp), offset_high, rl_dest.reg.GetHigh());
-    } else if (!reg_arg_low.Valid() && reg_arg_high.Valid()) {
-      OpRegCopy(rl_dest.reg.GetHigh(), reg_arg_high);
-      Load32Disp(TargetPtrReg(kSp), offset, rl_dest.reg.GetLow());
+    if (rl_dest.wide) {
+      OpRegCopyWide(rl_dest.reg, reg_arg);
     } else {
-      LoadBaseDisp(TargetPtrReg(kSp), offset, rl_dest.reg, k64, kNotVolatile);
+      OpRegCopy(rl_dest.reg, reg_arg);
     }
   }
 }
@@ -1372,4 +1298,35 @@
   UNREACHABLE();
 }
 
+void Mir2Lir::InToRegStorageMapping::Initialize(ShortyIterator* shorty,
+                                                InToRegStorageMapper* mapper) {
+  DCHECK(mapper != nullptr);
+  DCHECK(shorty != nullptr);
+  max_mapped_in_ = -1;
+  has_arguments_on_stack_ = false;
+  while (shorty->Next()) {
+     ShortyArg arg = shorty->GetArg();
+     RegStorage reg = mapper->GetNextReg(arg);
+     if (reg.Valid()) {
+       mapping_.Put(count_, reg);
+       max_mapped_in_ = count_;
+       // If the VR is wide and was mapped as wide then account for it.
+       if (arg.IsWide() && reg.Is64Bit()) {
+         max_mapped_in_++;
+       }
+     } else {
+       has_arguments_on_stack_ = true;
+     }
+     count_ += arg.IsWide() ? 2 : 1;
+  }
+  initialized_ = true;
+}
+
+RegStorage Mir2Lir::InToRegStorageMapping::Get(int in_position) {
+  DCHECK(IsInitialized());
+  DCHECK_LT(in_position, count_);
+  auto res = mapping_.find(in_position);
+  return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 5d78a6e..dd09330 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -905,19 +905,14 @@
     virtual LIR* GenCallInsn(const MirMethodLoweringInfo& method_info);
 
     virtual void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
-    virtual int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                             NextCallInsn next_call_insn,
-                             const MethodReference& target_method,
-                             uint32_t vtable_idx,
-                             uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                             bool skip_this);
-    virtual int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                           NextCallInsn next_call_insn,
-                           const MethodReference& target_method,
-                           uint32_t vtable_idx,
-                           uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                           bool skip_this);
-
+    virtual int GenDalvikArgs(CallInfo* info, int call_state, LIR** pcrLabel,
+                      NextCallInsn next_call_insn,
+                      const MethodReference& target_method,
+                      uint32_t vtable_idx,
+                      uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+                      bool skip_this);
+    virtual int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count);
+    virtual void GenDalvikArgsFlushPromoted(CallInfo* info, int start);
     /**
      * @brief Used to determine the register location of destination.
      * @details This is needed during generation of inline intrinsics because it finds destination
@@ -958,12 +953,6 @@
     bool GenInlinedUnsafeGet(CallInfo* info, bool is_long, bool is_volatile);
     bool GenInlinedUnsafePut(CallInfo* info, bool is_long, bool is_object,
                              bool is_volatile, bool is_ordered);
-    virtual int LoadArgRegs(CallInfo* info, int call_state,
-                    NextCallInsn next_call_insn,
-                    const MethodReference& target_method,
-                    uint32_t vtable_idx,
-                    uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                    bool skip_this);
 
     // Shared by all targets - implemented in gen_loadstore.cc.
     RegLocation LoadCurrMethod();
@@ -1228,7 +1217,7 @@
       }
     }
 
-    virtual RegStorage GetArgMappingToPhysicalReg(int arg_num) = 0;
+    RegStorage GetArgMappingToPhysicalReg(int arg_num);
     virtual RegLocation GetReturnAlt() = 0;
     virtual RegLocation GetReturnWideAlt() = 0;
     virtual RegLocation LocCReturn() = 0;
@@ -1780,6 +1769,63 @@
     // to deduplicate the masks.
     ResourceMaskCache mask_cache_;
 
+  protected:
+    // ABI support
+    class ShortyArg {
+      public:
+        explicit ShortyArg(char type) : type_(type) { }
+        bool IsFP() { return type_ == 'F' || type_ == 'D'; }
+        bool IsWide() { return type_ == 'J' || type_ == 'D'; }
+        bool IsRef() { return type_ == 'L'; }
+        char GetType() { return type_; }
+      private:
+        char type_;
+    };
+
+    class ShortyIterator {
+      public:
+        ShortyIterator(const char* shorty, bool is_static);
+        bool Next();
+        ShortyArg GetArg() { return ShortyArg(pending_this_ ? 'L' : *cur_); }
+      private:
+        const char* cur_;
+        bool pending_this_;
+        bool initialized_;
+    };
+
+    class InToRegStorageMapper {
+     public:
+      virtual RegStorage GetNextReg(ShortyArg arg) = 0;
+      virtual ~InToRegStorageMapper() {}
+      virtual void Reset() = 0;
+    };
+
+    class InToRegStorageMapping {
+     public:
+      explicit InToRegStorageMapping(ArenaAllocator* arena)
+          : mapping_(std::less<int>(), arena->Adapter()), count_(0),
+            max_mapped_in_(0), has_arguments_on_stack_(false),  initialized_(false) {}
+      void Initialize(ShortyIterator* shorty, InToRegStorageMapper* mapper);
+      /**
+       * @return the index of last VR mapped to physical register. In other words
+       * any VR starting from (return value + 1) index is mapped to memory.
+       */
+      int GetMaxMappedIn() { return max_mapped_in_; }
+      bool HasArgumentsOnStack() { return has_arguments_on_stack_; }
+      RegStorage Get(int in_position);
+      bool IsInitialized() { return initialized_; }
+     private:
+      ArenaSafeMap<int, RegStorage> mapping_;
+      int count_;
+      int max_mapped_in_;
+      bool has_arguments_on_stack_;
+      bool initialized_;
+    };
+
+  // Cached mapping of method input to reg storage according to ABI.
+  InToRegStorageMapping in_to_reg_storage_mapping_;
+  virtual InToRegStorageMapper* GetResetedInToRegStorageMapper() = 0;
+
   private:
     static bool SizeMatchesTypeForEntrypoint(OpSize size, Primitive::Type type);
 };  // Class Mir2Lir
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 9cb0bf5..c7d83dd 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -28,40 +28,48 @@
 
 class X86Mir2Lir : public Mir2Lir {
  protected:
-  class InToRegStorageMapper {
-   public:
-    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref) = 0;
-    virtual ~InToRegStorageMapper() {}
-  };
-
   class InToRegStorageX86_64Mapper : public InToRegStorageMapper {
    public:
-    explicit InToRegStorageX86_64Mapper(Mir2Lir* ml) : ml_(ml), cur_core_reg_(0), cur_fp_reg_(0) {}
-    virtual ~InToRegStorageX86_64Mapper() {}
-    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref);
+    explicit InToRegStorageX86_64Mapper(Mir2Lir* m2l)
+        : m2l_(m2l), cur_core_reg_(0), cur_fp_reg_(0) {}
+    virtual RegStorage GetNextReg(ShortyArg arg);
+    virtual void Reset() OVERRIDE {
+      cur_core_reg_ = 0;
+      cur_fp_reg_ = 0;
+    }
    protected:
-    Mir2Lir* ml_;
+    Mir2Lir* m2l_;
    private:
-    int cur_core_reg_;
-    int cur_fp_reg_;
+    size_t cur_core_reg_;
+    size_t cur_fp_reg_;
   };
 
-  class InToRegStorageMapping {
+  class InToRegStorageX86Mapper : public InToRegStorageMapper {
    public:
-    InToRegStorageMapping() : max_mapped_in_(0), is_there_stack_mapped_(false),
-    initialized_(false) {}
-    void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper);
-    int GetMaxMappedIn() { return max_mapped_in_; }
-    bool IsThereStackMapped() { return is_there_stack_mapped_; }
-    RegStorage Get(int in_position);
-    bool IsInitialized() { return initialized_; }
+    explicit InToRegStorageX86Mapper(Mir2Lir* m2l) : m2l_(m2l), cur_core_reg_(0) {}
+    virtual RegStorage GetNextReg(ShortyArg arg);
+    virtual void Reset() OVERRIDE {
+      cur_core_reg_ = 0;
+    }
+   protected:
+    Mir2Lir* m2l_;
    private:
-    std::map<int, RegStorage> mapping_;
-    int max_mapped_in_;
-    bool is_there_stack_mapped_;
-    bool initialized_;
+    size_t cur_core_reg_;
   };
 
+  InToRegStorageX86_64Mapper in_to_reg_storage_x86_64_mapper_;
+  InToRegStorageX86Mapper in_to_reg_storage_x86_mapper_;
+  InToRegStorageMapper* GetResetedInToRegStorageMapper() OVERRIDE {
+    InToRegStorageMapper* res;
+    if (cu_->target64) {
+      res = &in_to_reg_storage_x86_64_mapper_;
+    } else {
+      res = &in_to_reg_storage_x86_mapper_;
+    }
+    res->Reset();
+    return res;
+  }
+
   class ExplicitTempRegisterLock {
   public:
     ExplicitTempRegisterLock(X86Mir2Lir* mir_to_lir, int n_regs, ...);
@@ -71,6 +79,8 @@
     X86Mir2Lir* const mir_to_lir_;
   };
 
+  virtual int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE;
+
  public:
   X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
 
@@ -125,8 +135,6 @@
     return TargetReg(symbolic_reg, cu_->target64 ? kWide : kNotWide);
   }
 
-  RegStorage GetArgMappingToPhysicalReg(int arg_num) OVERRIDE;
-
   RegLocation GetReturnAlt() OVERRIDE;
   RegLocation GetReturnWideAlt() OVERRIDE;
   RegLocation LocCReturn() OVERRIDE;
@@ -350,22 +358,7 @@
   void LoadClassType(const DexFile& dex_file, uint32_t type_idx,
                      SpecialTargetRegister symbolic_reg) OVERRIDE;
 
-  void FlushIns(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE;
-
   NextCallInsn GetNextSDCallInsn() OVERRIDE;
-  int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                           NextCallInsn next_call_insn,
-                           const MethodReference& target_method,
-                           uint32_t vtable_idx,
-                           uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                           bool skip_this) OVERRIDE;
-
-  int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                         NextCallInsn next_call_insn,
-                         const MethodReference& target_method,
-                         uint32_t vtable_idx,
-                         uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                         bool skip_this) OVERRIDE;
 
   /*
    * @brief Generate a relative call to the method that will be patched at link time.
@@ -439,8 +432,6 @@
   LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
                             RegStorage r_src, OpSize size, int opt_flags = 0);
 
-  RegStorage GetCoreArgMappingToPhysicalReg(int core_arg_num) const;
-
   int AssignInsnOffsets();
   void AssignOffsets();
   AssemblerStatus AssembleInstructions(CodeOffset start_addr);
@@ -1000,8 +991,6 @@
    */
   static void DumpRegLocation(RegLocation loc);
 
-  InToRegStorageMapping in_to_reg_storage_mapping_;
-
  private:
   void SwapBits(RegStorage result_reg, int shift, int32_t value);
   void SwapBits64(RegStorage result_reg, int shift, int64_t value);
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 998aeff..e16a70b 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -808,6 +808,7 @@
 
 X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
     : Mir2Lir(cu, mir_graph, arena),
+      in_to_reg_storage_x86_64_mapper_(this), in_to_reg_storage_x86_mapper_(this),
       base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false),
       method_address_insns_(arena->Adapter()),
       class_type_address_insns_(arena->Adapter()),
@@ -2396,452 +2397,45 @@
 }
 
 // ------------ ABI support: mapping of args to physical registers -------------
-RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(bool is_double_or_float, bool is_wide,
-                                                              bool is_ref) {
+RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(ShortyArg arg) {
   const SpecialTargetRegister coreArgMappingToPhysicalReg[] = {kArg1, kArg2, kArg3, kArg4, kArg5};
-  const int coreArgMappingToPhysicalRegSize = sizeof(coreArgMappingToPhysicalReg) /
-      sizeof(SpecialTargetRegister);
+  const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
   const SpecialTargetRegister fpArgMappingToPhysicalReg[] = {kFArg0, kFArg1, kFArg2, kFArg3,
                                                              kFArg4, kFArg5, kFArg6, kFArg7};
-  const int fpArgMappingToPhysicalRegSize = sizeof(fpArgMappingToPhysicalReg) /
-      sizeof(SpecialTargetRegister);
+  const size_t fpArgMappingToPhysicalRegSize = arraysize(fpArgMappingToPhysicalReg);
 
-  if (is_double_or_float) {
+  if (arg.IsFP()) {
     if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
-      return ml_->TargetReg(fpArgMappingToPhysicalReg[cur_fp_reg_++], is_wide ? kWide : kNotWide);
+      return m2l_->TargetReg(fpArgMappingToPhysicalReg[cur_fp_reg_++],
+                             arg.IsWide() ? kWide : kNotWide);
     }
   } else {
     if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
-      return ml_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++],
-                            is_ref ? kRef : (is_wide ? kWide : kNotWide));
+      return m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++],
+                             arg.IsRef() ? kRef : (arg.IsWide() ? kWide : kNotWide));
     }
   }
   return RegStorage::InvalidReg();
 }
 
-RegStorage X86Mir2Lir::InToRegStorageMapping::Get(int in_position) {
-  DCHECK(IsInitialized());
-  auto res = mapping_.find(in_position);
-  return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
-}
+RegStorage X86Mir2Lir::InToRegStorageX86Mapper::GetNextReg(ShortyArg arg) {
+  const SpecialTargetRegister coreArgMappingToPhysicalReg[] = {kArg1, kArg2, kArg3};
+  const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
 
-void X86Mir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count,
-                                                   InToRegStorageMapper* mapper) {
-  DCHECK(mapper != nullptr);
-  max_mapped_in_ = -1;
-  is_there_stack_mapped_ = false;
-  for (int in_position = 0; in_position < count; in_position++) {
-     RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp,
-             arg_locs[in_position].wide, arg_locs[in_position].ref);
-     if (reg.Valid()) {
-       mapping_[in_position] = reg;
-       max_mapped_in_ = std::max(max_mapped_in_, in_position);
-       if (arg_locs[in_position].wide) {
-         // We covered 2 args, so skip the next one
-         in_position++;
-       }
-     } else {
-       is_there_stack_mapped_ = true;
-     }
+  RegStorage result = RegStorage::InvalidReg();
+  if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+    result = m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++],
+                          arg.IsRef() ? kRef : kNotWide);
+    if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+      result = RegStorage::MakeRegPair(
+          result, m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], kNotWide));
+    }
   }
-  initialized_ = true;
-}
-
-RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
-  if (!cu_->target64) {
-    return GetCoreArgMappingToPhysicalReg(arg_num);
-  }
-
-  if (!in_to_reg_storage_mapping_.IsInitialized()) {
-    int start_vreg = cu_->mir_graph->GetFirstInVR();
-    RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg];
-
-    InToRegStorageX86_64Mapper mapper(this);
-    in_to_reg_storage_mapping_.Initialize(arg_locs, mir_graph_->GetNumOfInVRs(), &mapper);
-  }
-  return in_to_reg_storage_mapping_.Get(arg_num);
-}
-
-RegStorage X86Mir2Lir::GetCoreArgMappingToPhysicalReg(int core_arg_num) const {
-  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
-  // Not used for 64-bit, TODO: Move X86_32 to the same framework
-  switch (core_arg_num) {
-    case 0: return TargetReg32(kArg1);
-    case 1: return TargetReg32(kArg2);
-    case 2: return TargetReg32(kArg3);
-    default: return RegStorage::InvalidReg();
-  }
+  return result;
 }
 
 // ---------End of ABI support: mapping of args to physical registers -------------
 
-/*
- * If there are any ins passed in registers that have not been promoted
- * to a callee-save register, flush them to the frame.  Perform initial
- * assignment of promoted arguments.
- *
- * ArgLocs is an array of location records describing the incoming arguments
- * with one location record per word of argument.
- */
-void X86Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
-  if (!cu_->target64) return Mir2Lir::FlushIns(ArgLocs, rl_method);
-  /*
-   * Dummy up a RegLocation for the incoming Method*
-   * It will attempt to keep kArg0 live (or copy it to home location
-   * if promoted).
-   */
-
-  RegLocation rl_src = rl_method;
-  rl_src.location = kLocPhysReg;
-  rl_src.reg = TargetReg(kArg0, kRef);
-  rl_src.home = false;
-  MarkLive(rl_src);
-  StoreValue(rl_method, rl_src);
-  // If Method* has been promoted, explicitly flush
-  if (rl_method.location == kLocPhysReg) {
-    const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-    StoreRefDisp(rs_rSP, 0, As32BitReg(TargetReg(kArg0, kRef)), kNotVolatile);
-  }
-
-  if (mir_graph_->GetNumOfInVRs() == 0) {
-    return;
-  }
-
-  int start_vreg = cu_->mir_graph->GetFirstInVR();
-  /*
-   * Copy incoming arguments to their proper home locations.
-   * NOTE: an older version of dx had an issue in which
-   * it would reuse static method argument registers.
-   * This could result in the same Dalvik virtual register
-   * being promoted to both core and fp regs. To account for this,
-   * we only copy to the corresponding promoted physical register
-   * if it matches the type of the SSA name for the incoming
-   * argument.  It is also possible that long and double arguments
-   * end up half-promoted.  In those cases, we must flush the promoted
-   * half to memory as well.
-   */
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-  for (uint32_t i = 0; i < mir_graph_->GetNumOfInVRs(); i++) {
-    // get reg corresponding to input
-    RegStorage reg = GetArgMappingToPhysicalReg(i);
-
-    RegLocation* t_loc = &ArgLocs[i];
-    if (reg.Valid()) {
-      // If arriving in register.
-
-      // We have already updated the arg location with promoted info
-      // so we can be based on it.
-      if (t_loc->location == kLocPhysReg) {
-        // Just copy it.
-        OpRegCopy(t_loc->reg, reg);
-      } else {
-        // Needs flush.
-        if (t_loc->ref) {
-          StoreRefDisp(rs_rX86_SP_64, SRegOffset(start_vreg + i), reg, kNotVolatile);
-        } else {
-          StoreBaseDisp(rs_rX86_SP_64, SRegOffset(start_vreg + i), reg, t_loc->wide ? k64 : k32,
-                        kNotVolatile);
-        }
-      }
-    } else {
-      // If arriving in frame & promoted.
-      if (t_loc->location == kLocPhysReg) {
-        if (t_loc->ref) {
-          LoadRefDisp(rs_rX86_SP_64, SRegOffset(start_vreg + i), t_loc->reg, kNotVolatile);
-        } else {
-          LoadBaseDisp(rs_rX86_SP_64, SRegOffset(start_vreg + i), t_loc->reg,
-                       t_loc->wide ? k64 : k32, kNotVolatile);
-        }
-      }
-    }
-    if (t_loc->wide) {
-      // Increment i to skip the next one.
-      i++;
-    }
-  }
-}
-
-/*
- * Load up to 5 arguments, the first three of which will be in
- * kArg1 .. kArg3.  On entry kArg0 contains the current method pointer,
- * and as part of the load sequence, it must be replaced with
- * the target method pointer.  Note, this may also be called
- * for "range" variants if the number of arguments is 5 or fewer.
- */
-int X86Mir2Lir::GenDalvikArgsNoRange(CallInfo* info,
-                                  int call_state, LIR** pcrLabel, NextCallInsn next_call_insn,
-                                  const MethodReference& target_method,
-                                  uint32_t vtable_idx, uintptr_t direct_code,
-                                  uintptr_t direct_method, InvokeType type, bool skip_this) {
-  if (!cu_->target64) {
-    return Mir2Lir::GenDalvikArgsNoRange(info,
-                                         call_state, pcrLabel, next_call_insn,
-                                         target_method,
-                                         vtable_idx, direct_code,
-                                         direct_method, type, skip_this);
-  }
-  return GenDalvikArgsRange(info,
-                            call_state, pcrLabel, next_call_insn,
-                            target_method,
-                            vtable_idx, direct_code,
-                            direct_method, type, skip_this);
-}
-
-/*
- * May have 0+ arguments (also used for jumbo).  Note that
- * source virtual registers may be in physical registers, so may
- * need to be flushed to home location before copying.  This
- * applies to arg3 and above (see below).
- *
- * Two general strategies:
- *    If < 20 arguments
- *       Pass args 3-18 using vldm/vstm block copy
- *       Pass arg0, arg1 & arg2 in kArg1-kArg3
- *    If 20+ arguments
- *       Pass args arg19+ using memcpy block copy
- *       Pass arg0, arg1 & arg2 in kArg1-kArg3
- *
- */
-int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
-                                LIR** pcrLabel, NextCallInsn next_call_insn,
-                                const MethodReference& target_method,
-                                uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method,
-                                InvokeType type, bool skip_this) {
-  if (!cu_->target64) {
-    return Mir2Lir::GenDalvikArgsRange(info, call_state,
-                                pcrLabel, next_call_insn,
-                                target_method,
-                                vtable_idx, direct_code, direct_method,
-                                type, skip_this);
-  }
-
-  /* If no arguments, just return */
-  if (info->num_arg_words == 0)
-    return call_state;
-
-  const int start_index = skip_this ? 1 : 0;
-
-  InToRegStorageX86_64Mapper mapper(this);
-  InToRegStorageMapping in_to_reg_storage_mapping;
-  in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper);
-  const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn();
-  const int size_of_the_last_mapped = last_mapped_in == -1 ? 1 :
-          info->args[last_mapped_in].wide ? 2 : 1;
-  int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + size_of_the_last_mapped);
-
-  // Fisrt of all, check whether it make sense to use bulk copying
-  // Optimization is aplicable only for range case
-  // TODO: make a constant instead of 2
-  if (info->is_range && regs_left_to_pass_via_stack >= 2) {
-    // Scan the rest of the args - if in phys_reg flush to memory
-    for (int next_arg = last_mapped_in + size_of_the_last_mapped; next_arg < info->num_arg_words;) {
-      RegLocation loc = info->args[next_arg];
-      if (loc.wide) {
-        loc = UpdateLocWide(loc);
-        if (loc.location == kLocPhysReg) {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          StoreBaseDisp(rs_rX86_SP_64, SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile);
-        }
-        next_arg += 2;
-      } else {
-        loc = UpdateLoc(loc);
-        if (loc.location == kLocPhysReg) {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          StoreBaseDisp(rs_rX86_SP_64, SRegOffset(loc.s_reg_low), loc.reg, k32, kNotVolatile);
-        }
-        next_arg++;
-      }
-    }
-
-    // The rest can be copied together
-    int start_offset = SRegOffset(info->args[last_mapped_in + size_of_the_last_mapped].s_reg_low);
-    int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + size_of_the_last_mapped,
-                                                   cu_->instruction_set);
-
-    int current_src_offset = start_offset;
-    int current_dest_offset = outs_offset;
-
-    // Only davik regs are accessed in this loop; no next_call_insn() calls.
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    while (regs_left_to_pass_via_stack > 0) {
-      // This is based on the knowledge that the stack itself is 16-byte aligned.
-      bool src_is_16b_aligned = (current_src_offset & 0xF) == 0;
-      bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0;
-      size_t bytes_to_move;
-
-      /*
-       * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a
-       * a 128-bit move because we won't get the chance to try to aligned. If there are more than
-       * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned.
-       * We do this because we could potentially do a smaller move to align.
-       */
-      if (regs_left_to_pass_via_stack == 4 ||
-          (regs_left_to_pass_via_stack > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) {
-        // Moving 128-bits via xmm register.
-        bytes_to_move = sizeof(uint32_t) * 4;
-
-        // Allocate a free xmm temp. Since we are working through the calling sequence,
-        // we expect to have an xmm temporary available.  AllocTempDouble will abort if
-        // there are no free registers.
-        RegStorage temp = AllocTempDouble();
-
-        LIR* ld1 = nullptr;
-        LIR* ld2 = nullptr;
-        LIR* st1 = nullptr;
-        LIR* st2 = nullptr;
-
-        /*
-         * The logic is similar for both loads and stores. If we have 16-byte alignment,
-         * do an aligned move. If we have 8-byte alignment, then do the move in two
-         * parts. This approach prevents possible cache line splits. Finally, fall back
-         * to doing an unaligned move. In most cases we likely won't split the cache
-         * line but we cannot prove it and thus take a conservative approach.
-         */
-        bool src_is_8b_aligned = (current_src_offset & 0x7) == 0;
-        bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0;
-
-        ScopedMemRefType mem_ref_type2(this, ResourceMask::kDalvikReg);
-        if (src_is_16b_aligned) {
-          ld1 = OpMovRegMem(temp, rs_rX86_SP_64, current_src_offset, kMovA128FP);
-        } else if (src_is_8b_aligned) {
-          ld1 = OpMovRegMem(temp, rs_rX86_SP_64, current_src_offset, kMovLo128FP);
-          ld2 = OpMovRegMem(temp, rs_rX86_SP_64, current_src_offset + (bytes_to_move >> 1),
-                            kMovHi128FP);
-        } else {
-          ld1 = OpMovRegMem(temp, rs_rX86_SP_64, current_src_offset, kMovU128FP);
-        }
-
-        if (dest_is_16b_aligned) {
-          st1 = OpMovMemReg(rs_rX86_SP_64, current_dest_offset, temp, kMovA128FP);
-        } else if (dest_is_8b_aligned) {
-          st1 = OpMovMemReg(rs_rX86_SP_64, current_dest_offset, temp, kMovLo128FP);
-          st2 = OpMovMemReg(rs_rX86_SP_64, current_dest_offset + (bytes_to_move >> 1),
-                            temp, kMovHi128FP);
-        } else {
-          st1 = OpMovMemReg(rs_rX86_SP_64, current_dest_offset, temp, kMovU128FP);
-        }
-
-        // TODO If we could keep track of aliasing information for memory accesses that are wider
-        // than 64-bit, we wouldn't need to set up a barrier.
-        if (ld1 != nullptr) {
-          if (ld2 != nullptr) {
-            // For 64-bit load we can actually set up the aliasing information.
-            AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true);
-            AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, true);
-          } else {
-            // Set barrier for 128-bit load.
-            ld1->u.m.def_mask = &kEncodeAll;
-          }
-        }
-        if (st1 != nullptr) {
-          if (st2 != nullptr) {
-            // For 64-bit store we can actually set up the aliasing information.
-            AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true);
-            AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, true);
-          } else {
-            // Set barrier for 128-bit store.
-            st1->u.m.def_mask = &kEncodeAll;
-          }
-        }
-
-        // Free the temporary used for the data movement.
-        FreeTemp(temp);
-      } else {
-        // Moving 32-bits via general purpose register.
-        bytes_to_move = sizeof(uint32_t);
-
-        // Instead of allocating a new temp, simply reuse one of the registers being used
-        // for argument passing.
-        RegStorage temp = TargetReg(kArg3, kNotWide);
-
-        // Now load the argument VR and store to the outs.
-        Load32Disp(rs_rX86_SP_64, current_src_offset, temp);
-        Store32Disp(rs_rX86_SP_64, current_dest_offset, temp);
-      }
-
-      current_src_offset += bytes_to_move;
-      current_dest_offset += bytes_to_move;
-      regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
-    }
-    DCHECK_EQ(regs_left_to_pass_via_stack, 0);
-  }
-
-  // Now handle rest not registers if they are
-  if (in_to_reg_storage_mapping.IsThereStackMapped()) {
-    RegStorage regSingle = TargetReg(kArg2, kNotWide);
-    RegStorage regWide = TargetReg(kArg3, kWide);
-    for (int i = start_index;
-         i < last_mapped_in + size_of_the_last_mapped + regs_left_to_pass_via_stack; i++) {
-      RegLocation rl_arg = info->args[i];
-      rl_arg = UpdateRawLoc(rl_arg);
-      RegStorage reg = in_to_reg_storage_mapping.Get(i);
-      if (!reg.Valid()) {
-        int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
-
-        {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          if (rl_arg.wide) {
-            if (rl_arg.location == kLocPhysReg) {
-              StoreBaseDisp(rs_rX86_SP_64, out_offset, rl_arg.reg, k64, kNotVolatile);
-            } else {
-              LoadValueDirectWideFixed(rl_arg, regWide);
-              StoreBaseDisp(rs_rX86_SP_64, out_offset, regWide, k64, kNotVolatile);
-            }
-          } else {
-            if (rl_arg.location == kLocPhysReg) {
-              StoreBaseDisp(rs_rX86_SP_64, out_offset, rl_arg.reg, k32, kNotVolatile);
-            } else {
-              LoadValueDirectFixed(rl_arg, regSingle);
-              StoreBaseDisp(rs_rX86_SP_64, out_offset, regSingle, k32, kNotVolatile);
-            }
-          }
-        }
-        call_state = next_call_insn(cu_, info, call_state, target_method,
-                                    vtable_idx, direct_code, direct_method, type);
-      }
-      if (rl_arg.wide) {
-        i++;
-      }
-    }
-  }
-
-  // Finish with mapped registers
-  for (int i = start_index; i <= last_mapped_in; i++) {
-    RegLocation rl_arg = info->args[i];
-    rl_arg = UpdateRawLoc(rl_arg);
-    RegStorage reg = in_to_reg_storage_mapping.Get(i);
-    if (reg.Valid()) {
-      if (rl_arg.wide) {
-        LoadValueDirectWideFixed(rl_arg, reg);
-      } else {
-        LoadValueDirectFixed(rl_arg, reg);
-      }
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                               direct_code, direct_method, type);
-    }
-    if (rl_arg.wide) {
-      i++;
-    }
-  }
-
-  call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                           direct_code, direct_method, type);
-  if (pcrLabel) {
-    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
-      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
-    } else {
-      *pcrLabel = nullptr;
-      // In lieu of generating a check for kArg1 being null, we need to
-      // perform a load when doing implicit checks.
-      RegStorage tmp = AllocTemp();
-      Load32Disp(TargetReg(kArg1, kRef), 0, tmp);
-      MarkPossibleNullPointerException(info->opt_flags);
-      FreeTemp(tmp);
-    }
-  }
-  return call_state;
-}
-
 bool X86Mir2Lir::GenInlinedCharAt(CallInfo* info) {
   // Location of reference to data array
   int value_offset = mirror::String::ValueOffset().Int32Value();
@@ -2969,4 +2563,122 @@
   }
 }
 
+int X86Mir2Lir::GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) {
+  if (count < 4) {
+    // It does not make sense to use this utility if we have no chance to use
+    // 128-bit move.
+    return count;
+  }
+  GenDalvikArgsFlushPromoted(info, first);
+
+  // The rest can be copied together
+  int current_src_offset = SRegOffset(info->args[first].s_reg_low);
+  int current_dest_offset = StackVisitor::GetOutVROffset(first, cu_->instruction_set);
+
+  // Only davik regs are accessed in this loop; no next_call_insn() calls.
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+  while (count > 0) {
+    // This is based on the knowledge that the stack itself is 16-byte aligned.
+    bool src_is_16b_aligned = (current_src_offset & 0xF) == 0;
+    bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0;
+    size_t bytes_to_move;
+
+    /*
+     * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a
+     * a 128-bit move because we won't get the chance to try to aligned. If there are more than
+     * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned.
+     * We do this because we could potentially do a smaller move to align.
+     */
+    if (count == 4 || (count > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) {
+      // Moving 128-bits via xmm register.
+      bytes_to_move = sizeof(uint32_t) * 4;
+
+      // Allocate a free xmm temp. Since we are working through the calling sequence,
+      // we expect to have an xmm temporary available. AllocTempDouble will abort if
+      // there are no free registers.
+      RegStorage temp = AllocTempDouble();
+
+      LIR* ld1 = nullptr;
+      LIR* ld2 = nullptr;
+      LIR* st1 = nullptr;
+      LIR* st2 = nullptr;
+
+      /*
+       * The logic is similar for both loads and stores. If we have 16-byte alignment,
+       * do an aligned move. If we have 8-byte alignment, then do the move in two
+       * parts. This approach prevents possible cache line splits. Finally, fall back
+       * to doing an unaligned move. In most cases we likely won't split the cache
+       * line but we cannot prove it and thus take a conservative approach.
+       */
+      bool src_is_8b_aligned = (current_src_offset & 0x7) == 0;
+      bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0;
+
+      if (src_is_16b_aligned) {
+        ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovA128FP);
+      } else if (src_is_8b_aligned) {
+        ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovLo128FP);
+        ld2 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset + (bytes_to_move >> 1),
+                          kMovHi128FP);
+      } else {
+        ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovU128FP);
+      }
+
+      if (dest_is_16b_aligned) {
+        st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovA128FP);
+      } else if (dest_is_8b_aligned) {
+        st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovLo128FP);
+        st2 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset + (bytes_to_move >> 1),
+                          temp, kMovHi128FP);
+      } else {
+        st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovU128FP);
+      }
+
+      // TODO If we could keep track of aliasing information for memory accesses that are wider
+      // than 64-bit, we wouldn't need to set up a barrier.
+      if (ld1 != nullptr) {
+        if (ld2 != nullptr) {
+          // For 64-bit load we can actually set up the aliasing information.
+          AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true);
+          AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true,
+                                  true);
+        } else {
+          // Set barrier for 128-bit load.
+          ld1->u.m.def_mask = &kEncodeAll;
+        }
+      }
+      if (st1 != nullptr) {
+        if (st2 != nullptr) {
+          // For 64-bit store we can actually set up the aliasing information.
+          AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true);
+          AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false,
+                                  true);
+        } else {
+          // Set barrier for 128-bit store.
+          st1->u.m.def_mask = &kEncodeAll;
+        }
+      }
+
+      // Free the temporary used for the data movement.
+      FreeTemp(temp);
+    } else {
+      // Moving 32-bits via general purpose register.
+      bytes_to_move = sizeof(uint32_t);
+
+      // Instead of allocating a new temp, simply reuse one of the registers being used
+      // for argument passing.
+      RegStorage temp = TargetReg(kArg3, kNotWide);
+
+      // Now load the argument VR and store to the outs.
+      Load32Disp(TargetPtrReg(kSp), current_src_offset, temp);
+      Store32Disp(TargetPtrReg(kSp), current_dest_offset, temp);
+    }
+
+    current_src_offset += bytes_to_move;
+    current_dest_offset += bytes_to_move;
+    count -= (bytes_to_move >> 2);
+  }
+  DCHECK_EQ(count, 0);
+  return count;
+}
+
 }  // namespace art
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 0f7001f..5f86f1e 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -8,4 +8,5 @@
 invoke-super abstract
 BadCaseInOpRegRegReg
 CmpLong
+FloatIntConstPassing
 Done!
diff --git a/test/800-smali/smali/FloatIntConstPassing.smali b/test/800-smali/smali/FloatIntConstPassing.smali
new file mode 100644
index 0000000..a2916c5
--- /dev/null
+++ b/test/800-smali/smali/FloatIntConstPassing.smali
@@ -0,0 +1,29 @@
+.class public LFloatIntConstPassing;
+
+.super Ljava/lang/Object;
+
+.method public static getInt(I)I
+  .registers 2
+  const/4 v0, 1
+  add-int/2addr v0, p0
+  return v0
+.end method
+
+.method public static getFloat(F)F
+  .registers 2
+  const/4 v0, 0
+  mul-float/2addr v0, p0
+  return v0
+.end method
+
+.method public static run()I
+  .registers 3
+  const/4 v0, 1
+  invoke-static {v0}, LFloatIntConstPassing;->getInt(I)I
+  move-result v1
+  invoke-static {v0}, LFloatIntConstPassing;->getFloat(F)F
+  move-result v2
+  float-to-int v2, v2
+  add-int/2addr v1, v2
+  return v1
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index f2c1ab5..a2db051 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -64,6 +64,7 @@
             new Object[]{0}, new AbstractMethodError(), null));
         testCases.add(new TestCase("BadCaseInOpRegRegReg", "BadCaseInOpRegRegReg", "getInt", null, null, 2));
         testCases.add(new TestCase("CmpLong", "CmpLong", "run", null, null, 0));
+        testCases.add(new TestCase("FloatIntConstPassing", "FloatIntConstPassing", "run", null, null, 2));
     }
 
     public void runTests() {