Add suspend checks to special methods.

Generate suspend checks at the beginning of special methods.
If we need to call to runtime, go to the slow path where we
create a simplified but valid frame, spill all arguments,
call art_quick_test_suspend, restore necessary arguments and
return back to the fast path. This keeps the fast path
overhead to a minimum.

Bug: 19245639
Change-Id: I3de5aee783943941322a49c4cf2c4c94411dbaa2
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 08ca1b2..590fe1d 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -1738,7 +1738,7 @@
   info->num_arg_words = mir->ssa_rep->num_uses;
   info->args = (info->num_arg_words == 0) ? nullptr :
       arena_->AllocArray<RegLocation>(info->num_arg_words, kArenaAllocMisc);
-  for (int i = 0; i < info->num_arg_words; i++) {
+  for (size_t i = 0; i < info->num_arg_words; i++) {
     info->args[i] = GetRawSrc(mir, i);
   }
   info->opt_flags = mir->optimization_flags;
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 020136c..d74e7b6 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -498,19 +498,19 @@
  * more efficient invoke code generation.
  */
 struct CallInfo {
-  int num_arg_words;    // Note: word count, not arg count.
-  RegLocation* args;    // One for each word of arguments.
-  RegLocation result;   // Eventual target of MOVE_RESULT.
+  size_t num_arg_words;   // Note: word count, not arg count.
+  RegLocation* args;      // One for each word of arguments.
+  RegLocation result;     // Eventual target of MOVE_RESULT.
   int opt_flags;
   InvokeType type;
   uint32_t dex_idx;
-  uint32_t index;       // Method idx for invokes, type idx for FilledNewArray.
+  uint32_t index;         // Method idx for invokes, type idx for FilledNewArray.
   uintptr_t direct_code;
   uintptr_t direct_method;
-  RegLocation target;    // Target of following move_result.
+  RegLocation target;     // Target of following move_result.
   bool skip_this;
   bool is_range;
-  DexOffset offset;      // Offset in code units.
+  DexOffset offset;       // Offset in code units.
   MIR* mir;
 };
 
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 1b5dde2..9cf005b 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -484,6 +484,28 @@
   NewLIR1(kThumbBx, rs_rARM_LR.GetReg());
 }
 
+void ArmMir2Lir::GenSpecialEntryForSuspend() {
+  // Keep 16-byte stack alignment - push r0, i.e. ArtMethod*, r5, r6, lr.
+  DCHECK(!IsTemp(rs_r5));
+  DCHECK(!IsTemp(rs_r6));
+  core_spill_mask_ =
+      (1u << rs_r5.GetRegNum()) | (1u << rs_r6.GetRegNum()) | (1u << rs_rARM_LR.GetRegNum());
+  num_core_spills_ = 3u;
+  fp_spill_mask_ = 0u;
+  num_fp_spills_ = 0u;
+  frame_size_ = 16u;
+  core_vmap_table_.clear();
+  fp_vmap_table_.clear();
+  NewLIR1(kThumbPush, (1u << rs_r0.GetRegNum()) |                 // ArtMethod*
+          (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) |  // Spills other than LR.
+          (1u << 8));                                             // LR encoded for 16-bit push.
+}
+
+void ArmMir2Lir::GenSpecialExitForSuspend() {
+  // Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
+  NewLIR1(kThumb2Pop, (1u << rs_r0.GetRegNum()) | core_spill_mask_);  // 32-bit because of LR.
+}
+
 static bool ArmUseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) {
   // Emit relative calls only within a dex file due to the limited range of the BL insn.
   return cu->dex_file == target_method.dex_file;
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 025e69f..a62e597 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -167,7 +167,9 @@
     void GenDivZeroCheckWide(RegStorage reg);
     void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method);
     void GenExitSequence();
-    void GenSpecialExitSequence();
+    void GenSpecialExitSequence() OVERRIDE;
+    void GenSpecialEntryForSuspend() OVERRIDE;
+    void GenSpecialExitForSuspend() OVERRIDE;
     void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
     void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
     void GenSelect(BasicBlock* bb, MIR* mir);
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index d1e4b7e..24e8fdf 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -392,6 +392,23 @@
   NewLIR0(kA64Ret);
 }
 
+void Arm64Mir2Lir::GenSpecialEntryForSuspend() {
+  // Keep 16-byte stack alignment - push x0, i.e. ArtMethod*, lr.
+  core_spill_mask_ = (1u << rs_xLR.GetRegNum());
+  num_core_spills_ = 1u;
+  fp_spill_mask_ = 0u;
+  num_fp_spills_ = 0u;
+  frame_size_ = 16u;
+  core_vmap_table_.clear();
+  fp_vmap_table_.clear();
+  NewLIR4(WIDE(kA64StpPre4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), -frame_size_ / 8);
+}
+
+void Arm64Mir2Lir::GenSpecialExitForSuspend() {
+  // Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
+  NewLIR4(WIDE(kA64LdpPost4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), frame_size_ / 8);
+}
+
 static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) {
   // Emit relative calls anywhere in the image or within a dex file otherwise.
   return cu->compiler_driver->IsImage() || cu->dex_file == target_method.dex_file;
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 49ca625..d5f0536 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -169,6 +169,8 @@
   void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE;
   void GenExitSequence() OVERRIDE;
   void GenSpecialExitSequence() OVERRIDE;
+  void GenSpecialEntryForSuspend() OVERRIDE;
+  void GenSpecialExitForSuspend() OVERRIDE;
   void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double) OVERRIDE;
   void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) OVERRIDE;
   void GenSelect(BasicBlock* bb, MIR* mir) OVERRIDE;
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 9f53b89..3c9b7a3 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -413,7 +413,7 @@
  * Current code also throws internal unimp if not 'L', '[' or 'I'.
  */
 void Mir2Lir::GenFilledNewArray(CallInfo* info) {
-  int elems = info->num_arg_words;
+  size_t elems = info->num_arg_words;
   int type_idx = info->index;
   FlushAllRegs();  /* Everything to home location */
   QuickEntrypointEnum target;
@@ -450,7 +450,7 @@
      * of any regs in the source range that have been promoted to
      * home location.
      */
-    for (int i = 0; i < elems; i++) {
+    for (size_t i = 0; i < elems; i++) {
       RegLocation loc = UpdateLoc(info->args[i]);
       if (loc.location == kLocPhysReg) {
         ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
@@ -493,7 +493,7 @@
     OpRegRegImm(kOpAdd, r_dst, ref_reg,
                 mirror::Array::DataOffset(component_size).Int32Value());
     // Set up the loop counter (known to be > 0)
-    LoadConstant(r_idx, elems - 1);
+    LoadConstant(r_idx, static_cast<int>(elems - 1));
     // Generate the copy loop.  Going backwards for convenience
     LIR* loop_head_target = NewLIR0(kPseudoTargetLabel);
     // Copy next element
@@ -515,9 +515,9 @@
     FreeTemp(r_dst);
     FreeTemp(r_src);
   } else {
-    DCHECK_LE(elems, 5);  // Usually but not necessarily non-range.
+    DCHECK_LE(elems, 5u);  // Usually but not necessarily non-range.
     // TUNING: interleave
-    for (int i = 0; i < elems; i++) {
+    for (size_t i = 0; i < elems; i++) {
       RegLocation rl_arg;
       if (info->args[i].ref) {
         rl_arg = LoadValue(info->args[i], kRefReg);
@@ -537,7 +537,7 @@
   }
   if (elems != 0 && info->args[0].ref) {
     // If there is at least one potentially non-null value, unconditionally mark the GC card.
-    for (int i = 0; i < elems; i++) {
+    for (size_t i = 0; i < elems; i++) {
       if (!mir_graph_->IsConstantNullRef(info->args[i])) {
         UnconditionallyMarkGCCard(ref_reg);
         break;
@@ -2158,7 +2158,7 @@
   }
 }
 
-class SuspendCheckSlowPath : public Mir2Lir::LIRSlowPath {
+class Mir2Lir::SuspendCheckSlowPath : public Mir2Lir::LIRSlowPath {
  public:
   SuspendCheckSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont)
       : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, cont) {
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index bb5b0cd..8e3df7c 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -405,9 +405,10 @@
    */
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   RegLocation* t_loc = nullptr;
+  EnsureInitializedArgMappingToPhysicalReg();
   for (uint32_t i = 0; i < mir_graph_->GetNumOfInVRs(); i += t_loc->wide ? 2 : 1) {
     // get reg corresponding to input
-    RegStorage reg = GetArgMappingToPhysicalReg(i);
+    RegStorage reg = in_to_reg_storage_mapping_.GetReg(i);
     t_loc = &ArgLocs[i];
 
     // If the wide input appeared as single, flush it and go
@@ -661,7 +662,7 @@
   }
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   // Scan the rest of the args - if in phys_reg flush to memory
-  for (int next_arg = start; next_arg < info->num_arg_words;) {
+  for (size_t next_arg = start; next_arg < info->num_arg_words;) {
     RegLocation loc = info->args[next_arg];
     if (loc.wide) {
       loc = UpdateLocWide(loc);
@@ -719,10 +720,10 @@
                            uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method,
                            InvokeType type, bool skip_this) {
   // If no arguments, just return.
-  if (info->num_arg_words == 0)
+  if (info->num_arg_words == 0u)
     return call_state;
 
-  const int start_index = skip_this ? 1 : 0;
+  const size_t start_index = skip_this ? 1 : 0;
 
   // Get architecture dependent mapping between output VRs and physical registers
   // basing on shorty of method to call.
@@ -733,13 +734,13 @@
     in_to_reg_storage_mapping.Initialize(&shorty_iterator, GetResetedInToRegStorageMapper());
   }
 
-  int stack_map_start = std::max(in_to_reg_storage_mapping.GetMaxMappedIn() + 1, start_index);
+  size_t stack_map_start = std::max(in_to_reg_storage_mapping.GetEndMappedIn(), start_index);
   if ((stack_map_start < info->num_arg_words) && info->args[stack_map_start].high_word) {
     // It is possible that the last mapped reg is 32 bit while arg is 64-bit.
     // It will be handled together with low part mapped to register.
     stack_map_start++;
   }
-  int regs_left_to_pass_via_stack = info->num_arg_words - stack_map_start;
+  size_t regs_left_to_pass_via_stack = info->num_arg_words - stack_map_start;
 
   // If it is a range case we can try to copy remaining VRs (not mapped to physical registers)
   // using more optimal algorithm.
@@ -755,11 +756,10 @@
     RegStorage regRef = TargetReg(kArg3, kRef);
     RegStorage regSingle = TargetReg(kArg3, kNotWide);
     RegStorage regWide = TargetReg(kArg2, kWide);
-    for (int i = start_index;
-         i < stack_map_start + regs_left_to_pass_via_stack; i++) {
+    for (size_t i = start_index; i < stack_map_start + regs_left_to_pass_via_stack; i++) {
       RegLocation rl_arg = info->args[i];
       rl_arg = UpdateRawLoc(rl_arg);
-      RegStorage reg = in_to_reg_storage_mapping.Get(i);
+      RegStorage reg = in_to_reg_storage_mapping.GetReg(i);
       if (!reg.Valid()) {
         int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
         {
@@ -799,10 +799,10 @@
   }
 
   // Finish with VRs mapped to physical registers.
-  for (int i = start_index; i < stack_map_start; i++) {
+  for (size_t i = start_index; i < stack_map_start; i++) {
     RegLocation rl_arg = info->args[i];
     rl_arg = UpdateRawLoc(rl_arg);
-    RegStorage reg = in_to_reg_storage_mapping.Get(i);
+    RegStorage reg = in_to_reg_storage_mapping.GetReg(i);
     if (reg.Valid()) {
       if (rl_arg.wide) {
         // if reg is not 64-bit (it is half of 64-bit) then handle it separately.
@@ -852,12 +852,11 @@
   return call_state;
 }
 
-RegStorage Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
+void Mir2Lir::EnsureInitializedArgMappingToPhysicalReg() {
   if (!in_to_reg_storage_mapping_.IsInitialized()) {
     ShortyIterator shorty_iterator(cu_->shorty, cu_->invoke_type == kStatic);
     in_to_reg_storage_mapping_.Initialize(&shorty_iterator, GetResetedInToRegStorageMapper());
   }
-  return in_to_reg_storage_mapping_.Get(arg_num);
 }
 
 RegLocation Mir2Lir::InlineTarget(CallInfo* info) {
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index 0719b52..d9471f6 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -315,6 +315,26 @@
   OpReg(kOpBx, rs_rRA);
 }
 
+void MipsMir2Lir::GenSpecialEntryForSuspend() {
+  // Keep 16-byte stack alignment - push A0, i.e. ArtMethod*, 2 filler words and RA.
+  core_spill_mask_ = (1u << rs_rRA.GetRegNum());
+  num_core_spills_ = 1u;
+  fp_spill_mask_ = 0u;
+  num_fp_spills_ = 0u;
+  frame_size_ = 16u;
+  core_vmap_table_.clear();
+  fp_vmap_table_.clear();
+  OpRegImm(kOpSub, rs_rMIPS_SP, frame_size_);
+  Store32Disp(rs_rMIPS_SP, frame_size_ - 4, rs_rRA);
+  Store32Disp(rs_rMIPS_SP, 0, rs_rA0);
+}
+
+void MipsMir2Lir::GenSpecialExitForSuspend() {
+  // Pop the frame. Don't pop ArtMethod*, it's no longer needed.
+  Load32Disp(rs_rMIPS_SP, frame_size_ - 4, rs_rRA);
+  OpRegImm(kOpAdd, rs_rMIPS_SP, frame_size_);
+}
+
 /*
  * Bit of a hack here - in the absence of a real scheduling pass,
  * emit the next instruction in static & direct invoke sequences.
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index a37fe40..e1b43ca 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -141,7 +141,9 @@
     void GenDivZeroCheckWide(RegStorage reg);
     void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method);
     void GenExitSequence();
-    void GenSpecialExitSequence();
+    void GenSpecialExitSequence() OVERRIDE;
+    void GenSpecialEntryForSuspend() OVERRIDE;
+    void GenSpecialExitForSuspend() OVERRIDE;
     void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
     void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
     void GenSelect(BasicBlock* bb, MIR* mir);
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 9f6d8af..34e5e25 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -24,6 +24,69 @@
 
 namespace art {
 
+class Mir2Lir::SpecialSuspendCheckSlowPath : public Mir2Lir::LIRSlowPath {
+ public:
+  SpecialSuspendCheckSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont)
+      : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, cont),
+        num_used_args_(0u) {
+  }
+
+  void PreserveArg(int in_position) {
+    // Avoid duplicates.
+    for (size_t i = 0; i != num_used_args_; ++i) {
+      if (used_args_[i] == in_position) {
+        return;
+      }
+    }
+    DCHECK_LT(num_used_args_, kMaxArgsToPreserve);
+    used_args_[num_used_args_] = in_position;
+    ++num_used_args_;
+  }
+
+  void Compile() OVERRIDE {
+    m2l_->ResetRegPool();
+    m2l_->ResetDefTracking();
+    GenerateTargetLabel(kPseudoSuspendTarget);
+
+    m2l_->LockCallTemps();
+
+    // Generate frame.
+    m2l_->GenSpecialEntryForSuspend();
+
+    // Spill all args.
+    for (size_t i = 0, end = m2l_->in_to_reg_storage_mapping_.GetEndMappedIn(); i < end;
+        i += m2l_->in_to_reg_storage_mapping_.GetShorty(i).IsWide() ? 2u : 1u) {
+      m2l_->SpillArg(i);
+    }
+
+    m2l_->FreeCallTemps();
+
+    // Do the actual suspend call to runtime.
+    m2l_->CallRuntimeHelper(kQuickTestSuspend, true);
+
+    m2l_->LockCallTemps();
+
+    // Unspill used regs. (Don't unspill unused args.)
+    for (size_t i = 0; i != num_used_args_; ++i) {
+      m2l_->UnspillArg(used_args_[i]);
+    }
+
+    // Pop the frame.
+    m2l_->GenSpecialExitForSuspend();
+
+    // Branch to the continue label.
+    DCHECK(cont_ != nullptr);
+    m2l_->OpUnconditionalBranch(cont_);
+
+    m2l_->FreeCallTemps();
+  }
+
+ private:
+  static constexpr size_t kMaxArgsToPreserve = 2u;
+  size_t num_used_args_;
+  int used_args_[kMaxArgsToPreserve];
+};
+
 RegisterClass Mir2Lir::ShortyToRegClass(char shorty_type) {
   RegisterClass res;
   switch (shorty_type) {
@@ -54,15 +117,15 @@
   return res;
 }
 
-void Mir2Lir::LockArg(int in_position, bool) {
-  RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
+void Mir2Lir::LockArg(size_t in_position) {
+  RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position);
 
   if (reg_arg.Valid()) {
     LockTemp(reg_arg);
   }
 }
 
-RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) {
+RegStorage Mir2Lir::LoadArg(size_t in_position, RegisterClass reg_class, bool wide) {
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
 
@@ -82,7 +145,7 @@
     offset += sizeof(uint64_t);
   }
 
-  RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
+  RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position);
 
   // TODO: REVISIT: This adds a spill of low part while we could just copy it.
   if (reg_arg.Valid() && wide && (reg_arg.GetWideKind() == kNotWide)) {
@@ -112,7 +175,7 @@
   return reg_arg;
 }
 
-void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
+void Mir2Lir::LoadArgDirect(size_t in_position, RegLocation rl_dest) {
   DCHECK_EQ(rl_dest.location, kLocPhysReg);
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
@@ -132,7 +195,7 @@
     offset += sizeof(uint64_t);
   }
 
-  RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
+  RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position);
 
   // TODO: REVISIT: This adds a spill of low part while we could just copy it.
   if (reg_arg.Valid() && rl_dest.wide && (reg_arg.GetWideKind() == kNotWide)) {
@@ -153,6 +216,41 @@
   }
 }
 
+void Mir2Lir::SpillArg(size_t in_position) {
+  RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position);
+
+  if (reg_arg.Valid()) {
+    int offset = frame_size_ + StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
+    ShortyArg arg = in_to_reg_storage_mapping_.GetShorty(in_position);
+    OpSize size = arg.IsRef() ? kReference :
+        (arg.IsWide() && reg_arg.GetWideKind() == kWide) ? k64 : k32;
+    StoreBaseDisp(TargetPtrReg(kSp), offset, reg_arg, size, kNotVolatile);
+  }
+}
+
+void Mir2Lir::UnspillArg(size_t in_position) {
+  RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position);
+
+  if (reg_arg.Valid()) {
+    int offset = frame_size_ + StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
+    ShortyArg arg = in_to_reg_storage_mapping_.GetShorty(in_position);
+    OpSize size = arg.IsRef() ? kReference :
+        (arg.IsWide() && reg_arg.GetWideKind() == kWide) ? k64 : k32;
+    LoadBaseDisp(TargetPtrReg(kSp), offset, reg_arg, size, kNotVolatile);
+  }
+}
+
+Mir2Lir::SpecialSuspendCheckSlowPath* Mir2Lir::GenSpecialSuspendTest() {
+  LockCallTemps();
+  LIR* branch = OpTestSuspend(nullptr);
+  FreeCallTemps();
+  LIR* cont = NewLIR0(kPseudoTargetLabel);
+  SpecialSuspendCheckSlowPath* slow_path =
+      new (arena_) SpecialSuspendCheckSlowPath(this, branch, cont);
+  AddSlowPath(slow_path);
+  return slow_path;
+}
+
 bool Mir2Lir::GenSpecialIGet(MIR* mir, const InlineMethod& special) {
   // FastInstance() already checked by DexFileMethodInliner.
   const InlineIGetIPutData& data = special.d.ifield_data;
@@ -161,13 +259,16 @@
     return false;
   }
 
-  OpSize size = k32;
+  OpSize size;
   switch (data.op_variant) {
-    case InlineMethodAnalyser::IGetVariant(Instruction::IGET_OBJECT):
-      size = kReference;
+    case InlineMethodAnalyser::IGetVariant(Instruction::IGET):
+      size = in_to_reg_storage_mapping_.GetShorty(data.src_arg).IsFP() ? kSingle : k32;
       break;
     case InlineMethodAnalyser::IGetVariant(Instruction::IGET_WIDE):
-      size = k64;
+      size = in_to_reg_storage_mapping_.GetShorty(data.src_arg).IsFP() ? kDouble : k64;
+      break;
+    case InlineMethodAnalyser::IGetVariant(Instruction::IGET_OBJECT):
+      size = kReference;
       break;
     case InlineMethodAnalyser::IGetVariant(Instruction::IGET_SHORT):
       size = kSignedHalf;
@@ -181,11 +282,18 @@
     case InlineMethodAnalyser::IGetVariant(Instruction::IGET_BOOLEAN):
       size = kUnsignedByte;
       break;
+    default:
+      LOG(FATAL) << "Unknown variant: " << data.op_variant;
+      UNREACHABLE();
   }
 
   // Point of no return - no aborts after this
-  GenPrintLabel(mir);
+  if (!kLeafOptimization) {
+    auto* slow_path = GenSpecialSuspendTest();
+    slow_path->PreserveArg(data.object_arg);
+  }
   LockArg(data.object_arg);
+  GenPrintLabel(mir);
   RegStorage reg_obj = LoadArg(data.object_arg, kRefReg);
   RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile);
   RegisterClass ret_reg_class = ShortyToRegClass(cu_->shorty[0]);
@@ -223,13 +331,16 @@
     return false;
   }
 
-  OpSize size = k32;
+  OpSize size;
   switch (data.op_variant) {
-    case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_OBJECT):
-      size = kReference;
+    case InlineMethodAnalyser::IPutVariant(Instruction::IPUT):
+      size = in_to_reg_storage_mapping_.GetShorty(data.src_arg).IsFP() ? kSingle : k32;
       break;
     case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_WIDE):
-      size = k64;
+      size = in_to_reg_storage_mapping_.GetShorty(data.src_arg).IsFP() ? kDouble : k64;
+      break;
+    case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_OBJECT):
+      size = kReference;
       break;
     case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_SHORT):
       size = kSignedHalf;
@@ -243,12 +354,20 @@
     case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_BOOLEAN):
       size = kUnsignedByte;
       break;
+    default:
+      LOG(FATAL) << "Unknown variant: " << data.op_variant;
+      UNREACHABLE();
   }
 
   // Point of no return - no aborts after this
-  GenPrintLabel(mir);
+  if (!kLeafOptimization) {
+    auto* slow_path = GenSpecialSuspendTest();
+    slow_path->PreserveArg(data.object_arg);
+    slow_path->PreserveArg(data.src_arg);
+  }
   LockArg(data.object_arg);
-  LockArg(data.src_arg, IsWide(size));
+  LockArg(data.src_arg);
+  GenPrintLabel(mir);
   RegStorage reg_obj = LoadArg(data.object_arg, kRefReg);
   RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile);
   RegStorage reg_src = LoadArg(data.src_arg, reg_class, IsWide(size));
@@ -269,8 +388,12 @@
   bool wide = (data.is_wide != 0u);
 
   // Point of no return - no aborts after this
+  if (!kLeafOptimization) {
+    auto* slow_path = GenSpecialSuspendTest();
+    slow_path->PreserveArg(data.arg);
+  }
+  LockArg(data.arg);
   GenPrintLabel(mir);
-  LockArg(data.arg, wide);
   RegisterClass reg_class = ShortyToRegClass(cu_->shorty[0]);
   RegLocation rl_dest = wide ? GetReturnWide(reg_class) : GetReturn(reg_class);
   LoadArgDirect(data.arg, rl_dest);
@@ -285,15 +408,22 @@
   current_dalvik_offset_ = mir->offset;
   MIR* return_mir = nullptr;
   bool successful = false;
+  EnsureInitializedArgMappingToPhysicalReg();
 
   switch (special.opcode) {
     case kInlineOpNop:
       successful = true;
       DCHECK_EQ(mir->dalvikInsn.opcode, Instruction::RETURN_VOID);
+      if (!kLeafOptimization) {
+        GenSpecialSuspendTest();
+      }
       return_mir = mir;
       break;
     case kInlineOpNonWideConst: {
       successful = true;
+      if (!kLeafOptimization) {
+        GenSpecialSuspendTest();
+      }
       RegLocation rl_dest = GetReturn(ShortyToRegClass(cu_->shorty[0]));
       GenPrintLabel(mir);
       LoadConstant(rl_dest.reg, static_cast<int>(special.d.data));
@@ -333,13 +463,17 @@
     }
     GenSpecialExitSequence();
 
-    core_spill_mask_ = 0;
-    num_core_spills_ = 0;
-    fp_spill_mask_ = 0;
-    num_fp_spills_ = 0;
-    frame_size_ = 0;
-    core_vmap_table_.clear();
-    fp_vmap_table_.clear();
+    if (!kLeafOptimization) {
+      HandleSlowPaths();
+    } else {
+      core_spill_mask_ = 0;
+      num_core_spills_ = 0;
+      fp_spill_mask_ = 0;
+      num_fp_spills_ = 0;
+      frame_size_ = 0;
+      core_vmap_table_.clear();
+      fp_vmap_table_.clear();
+    }
   }
 
   return successful;
@@ -1287,31 +1421,41 @@
                                                 InToRegStorageMapper* mapper) {
   DCHECK(mapper != nullptr);
   DCHECK(shorty != nullptr);
-  max_mapped_in_ = -1;
-  has_arguments_on_stack_ = false;
+  DCHECK(!IsInitialized());
+  DCHECK_EQ(end_mapped_in_, 0u);
+  DCHECK(!has_arguments_on_stack_);
   while (shorty->Next()) {
      ShortyArg arg = shorty->GetArg();
      RegStorage reg = mapper->GetNextReg(arg);
+     mapping_.emplace_back(arg, reg);
+     if (arg.IsWide()) {
+       mapping_.emplace_back(ShortyArg(kInvalidShorty), RegStorage::InvalidReg());
+     }
      if (reg.Valid()) {
-       mapping_.Put(count_, reg);
-       max_mapped_in_ = count_;
-       // If the VR is wide and was mapped as wide then account for it.
-       if (arg.IsWide() && reg.Is64Bit()) {
-         max_mapped_in_++;
+       end_mapped_in_ = mapping_.size();
+       // If the VR is wide but wasn't mapped as wide then account for it.
+       if (arg.IsWide() && !reg.Is64Bit()) {
+         --end_mapped_in_;
        }
      } else {
        has_arguments_on_stack_ = true;
      }
-     count_ += arg.IsWide() ? 2 : 1;
   }
   initialized_ = true;
 }
 
-RegStorage Mir2Lir::InToRegStorageMapping::Get(int in_position) {
+RegStorage Mir2Lir::InToRegStorageMapping::GetReg(size_t in_position) {
   DCHECK(IsInitialized());
-  DCHECK_LT(in_position, count_);
-  auto res = mapping_.find(in_position);
-  return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
+  DCHECK_LT(in_position, mapping_.size());
+  DCHECK_NE(mapping_[in_position].first.GetType(), kInvalidShorty);
+  return mapping_[in_position].second;
+}
+
+Mir2Lir::ShortyArg Mir2Lir::InToRegStorageMapping::GetShorty(size_t in_position) {
+  DCHECK(IsInitialized());
+  DCHECK_LT(static_cast<size_t>(in_position), mapping_.size());
+  DCHECK_NE(mapping_[in_position].first.GetType(), kInvalidShorty);
+  return mapping_[in_position].first;
 }
 
 }  // namespace art
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 88ca911..d713493 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -515,6 +515,9 @@
       LIR* const cont_;
     };
 
+    class SuspendCheckSlowPath;
+    class SpecialSuspendCheckSlowPath;
+
     // Helper class for changing mem_ref_type_ until the end of current scope. See mem_ref_type_.
     class ScopedMemRefType {
      public:
@@ -1203,7 +1206,7 @@
       }
     }
 
-    RegStorage GetArgMappingToPhysicalReg(int arg_num);
+    void EnsureInitializedArgMappingToPhysicalReg();
     virtual RegLocation GetReturnAlt() = 0;
     virtual RegLocation GetReturnWideAlt() = 0;
     virtual RegLocation LocCReturn() = 0;
@@ -1570,6 +1573,16 @@
     virtual void GenSpecialExitSequence() = 0;
 
     /**
+     * @brief Used to generate stack frame for suspend path of special methods.
+     */
+    virtual void GenSpecialEntryForSuspend() = 0;
+
+    /**
+     * @brief Used to pop the stack frame for suspend path of special methods.
+     */
+    virtual void GenSpecialExitForSuspend() = 0;
+
+    /**
      * @brief Used to generate code for special methods that are known to be
      * small enough to work in frameless mode.
      * @param bb The basic block of the first MIR.
@@ -1590,9 +1603,8 @@
      * @brief Used to lock register if argument at in_position was passed that way.
      * @details Does nothing if the argument is passed via stack.
      * @param in_position The argument number whose register to lock.
-     * @param wide Whether the argument is wide.
      */
-    void LockArg(int in_position, bool wide = false);
+    void LockArg(size_t in_position);
 
     /**
      * @brief Used to load VR argument to a physical register.
@@ -1602,14 +1614,33 @@
      * @param wide Whether the argument is 64-bit or not.
      * @return Returns the register (or register pair) for the loaded argument.
      */
-    RegStorage LoadArg(int in_position, RegisterClass reg_class, bool wide = false);
+    RegStorage LoadArg(size_t in_position, RegisterClass reg_class, bool wide = false);
 
     /**
      * @brief Used to load a VR argument directly to a specified register location.
      * @param in_position The argument number to place in register.
      * @param rl_dest The register location where to place argument.
      */
-    void LoadArgDirect(int in_position, RegLocation rl_dest);
+    void LoadArgDirect(size_t in_position, RegLocation rl_dest);
+
+    /**
+     * @brief Used to spill register if argument at in_position was passed that way.
+     * @details Does nothing if the argument is passed via stack.
+     * @param in_position The argument number whose register to spill.
+     */
+    void SpillArg(size_t in_position);
+
+    /**
+     * @brief Used to unspill register if argument at in_position was passed that way.
+     * @details Does nothing if the argument is passed via stack.
+     * @param in_position The argument number whose register to spill.
+     */
+    void UnspillArg(size_t in_position);
+
+    /**
+     * @brief Generate suspend test in a special method.
+     */
+    SpecialSuspendCheckSlowPath* GenSpecialSuspendTest();
 
     /**
      * @brief Used to generate LIR for special getter method.
@@ -1802,21 +1833,22 @@
     class InToRegStorageMapping {
      public:
       explicit InToRegStorageMapping(ArenaAllocator* arena)
-          : mapping_(std::less<int>(), arena->Adapter()), count_(0),
-            max_mapped_in_(0), has_arguments_on_stack_(false),  initialized_(false) {}
+          : mapping_(arena->Adapter()),
+            end_mapped_in_(0u), has_arguments_on_stack_(false),  initialized_(false) {}
       void Initialize(ShortyIterator* shorty, InToRegStorageMapper* mapper);
       /**
-       * @return the index of last VR mapped to physical register. In other words
-       * any VR starting from (return value + 1) index is mapped to memory.
+       * @return the past-the-end index of VRs mapped to physical registers.
+       * In other words any VR starting from this index is mapped to memory.
        */
-      int GetMaxMappedIn() { return max_mapped_in_; }
+      size_t GetEndMappedIn() { return end_mapped_in_; }
       bool HasArgumentsOnStack() { return has_arguments_on_stack_; }
-      RegStorage Get(int in_position);
+      RegStorage GetReg(size_t in_position);
+      ShortyArg GetShorty(size_t in_position);
       bool IsInitialized() { return initialized_; }
      private:
-      ArenaSafeMap<int, RegStorage> mapping_;
-      int count_;
-      int max_mapped_in_;
+      static constexpr char kInvalidShorty = '-';
+      ArenaVector<std::pair<ShortyArg, RegStorage>> mapping_;
+      size_t end_mapped_in_;
       bool has_arguments_on_stack_;
       bool initialized_;
     };
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 284e8f6..f964691 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -272,6 +272,41 @@
   NewLIR0(kX86Ret);
 }
 
+void X86Mir2Lir::GenSpecialEntryForSuspend() {
+  // Keep 16-byte stack alignment, there's already the return address, so
+  //   - for 32-bit push EAX, i.e. ArtMethod*, ESI, EDI,
+  //   - for 64-bit push RAX, i.e. ArtMethod*.
+  if (!cu_->target64) {
+    DCHECK(!IsTemp(rs_rSI));
+    DCHECK(!IsTemp(rs_rDI));
+    core_spill_mask_ =
+        (1u << rs_rSI.GetRegNum()) | (1u << rs_rSI.GetRegNum()) | (1u << rs_rRET.GetRegNum());
+    num_core_spills_ = 3u;
+  } else {
+    core_spill_mask_ = (1u << rs_rRET.GetRegNum());
+    num_core_spills_ = 1u;
+  }
+  fp_spill_mask_ = 0u;
+  num_fp_spills_ = 0u;
+  frame_size_ = 16u;
+  core_vmap_table_.clear();
+  fp_vmap_table_.clear();
+  if (!cu_->target64) {
+    NewLIR1(kX86Push32R, rs_rDI.GetReg());
+    NewLIR1(kX86Push32R, rs_rSI.GetReg());
+  }
+  NewLIR1(kX86Push32R, TargetReg(kArg0, kRef).GetReg());  // ArtMethod*
+}
+
+void X86Mir2Lir::GenSpecialExitForSuspend() {
+  // Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
+  NewLIR1(kX86Pop32R, TargetReg(kArg0, kRef).GetReg());  // ArtMethod*
+  if (!cu_->target64) {
+    NewLIR1(kX86Pop32R, rs_rSI.GetReg());
+    NewLIR1(kX86Pop32R, rs_rDI.GetReg());
+  }
+}
+
 void X86Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) {
   if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
     return;
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index ca60400..20163b4 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -259,6 +259,8 @@
   void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE;
   void GenExitSequence() OVERRIDE;
   void GenSpecialExitSequence() OVERRIDE;
+  void GenSpecialEntryForSuspend() OVERRIDE;
+  void GenSpecialExitForSuspend() OVERRIDE;
   void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double) OVERRIDE;
   void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) OVERRIDE;
   void GenSelect(BasicBlock* bb, MIR* mir) OVERRIDE;
diff --git a/test/109-suspend-check/src/Main.java b/test/109-suspend-check/src/Main.java
index cd5130d..8046d75 100644
--- a/test/109-suspend-check/src/Main.java
+++ b/test/109-suspend-check/src/Main.java
@@ -19,30 +19,33 @@
 
     public static void main(String[] args) {
         System.out.println("Running (" + TEST_TIME + " seconds) ...");
-        InfiniteForLoop forLoop = new InfiniteForLoop();
-        InfiniteWhileLoop whileLoop = new InfiniteWhileLoop();
-        InfiniteWhileLoopWithIntrinsic whileLoopWithIntrinsic =
-            new InfiniteWhileLoopWithIntrinsic();
         InfiniteDoWhileLoopWithLong doWhileLoopWithLong = new InfiniteDoWhileLoopWithLong();
-        InfiniteDoWhileLoop doWhileLoop = new InfiniteDoWhileLoop();
-        MakeGarbage garbage = new MakeGarbage();
-        forLoop.start();
-        whileLoop.start();
-        whileLoopWithIntrinsic.start();
+        SimpleLoopThread[] simpleLoops = {
+                new InfiniteForLoop(),
+                new InfiniteWhileLoop(),
+                new InfiniteWhileLoopWithIntrinsic(),
+                new InfiniteDoWhileLoop(),
+                new MakeGarbage(),
+                new InfiniteWhileLoopWithSpecialReturnArgOrConst(new SpecialMethods1()),
+                new InfiniteWhileLoopWithSpecialReturnArgOrConst(new SpecialMethods2()),
+                new InfiniteWhileLoopWithSpecialPutOrNop(new SpecialMethods1()),
+                new InfiniteWhileLoopWithSpecialPutOrNop(new SpecialMethods2()),
+                new InfiniteWhileLoopWithSpecialConstOrIGet(new SpecialMethods1()),
+                new InfiniteWhileLoopWithSpecialConstOrIGet(new SpecialMethods2()),
+        };
         doWhileLoopWithLong.start();
-        doWhileLoop.start();
-        garbage.start();
+        for (SimpleLoopThread loop : simpleLoops) {
+            loop.start();
+        }
         for (int i = 0; i < TEST_TIME; i++) {
           Runtime.getRuntime().gc();
           System.out.println(".");
           sleep(1000);
         }
-        forLoop.stopNow();
-        whileLoop.stopNow();
-        whileLoopWithIntrinsic.stopNow();
         doWhileLoopWithLong.stopNow();
-        doWhileLoop.stopNow();
-        garbage.stopNow();
+        for (SimpleLoopThread loop : simpleLoops) {
+            loop.stopNow();
+        }
         System.out.println("Done.");
     }
 
@@ -55,8 +58,84 @@
     }
 }
 
-class InfiniteWhileLoopWithIntrinsic extends Thread {
-  volatile private boolean keepGoing = true;
+class SimpleLoopThread extends Thread {
+  volatile protected boolean keepGoing = true;
+  public void stopNow() {
+    keepGoing = false;
+  }
+}
+
+interface SpecialMethodInterface {
+  long ReturnArgOrConst(long arg);
+  void PutOrNop(long arg);
+  long ConstOrIGet();
+}
+
+class SpecialMethods1 implements SpecialMethodInterface {
+  public long ReturnArgOrConst(long arg) {
+    return 42L;
+  }
+  public void PutOrNop(long arg) {
+  }
+  public long ConstOrIGet() {
+    return 42L;
+  }
+}
+
+class SpecialMethods2 implements SpecialMethodInterface {
+  public long value = 42L;
+  public long ReturnArgOrConst(long arg) {
+    return arg;
+  }
+  public void PutOrNop(long arg) {
+    value = arg;
+  }
+  public long ConstOrIGet() {
+    return value;
+  }
+}
+
+class InfiniteWhileLoopWithSpecialReturnArgOrConst extends SimpleLoopThread {
+  private SpecialMethodInterface smi;
+  public InfiniteWhileLoopWithSpecialReturnArgOrConst(SpecialMethodInterface smi) {
+    this.smi = smi;
+  }
+  public void run() {
+    long i = 0L;
+    while (keepGoing) {
+      i += smi.ReturnArgOrConst(i);
+    }
+  }
+}
+
+class InfiniteWhileLoopWithSpecialPutOrNop extends SimpleLoopThread {
+  private SpecialMethodInterface smi;
+  public InfiniteWhileLoopWithSpecialPutOrNop(SpecialMethodInterface smi) {
+    this.smi = smi;
+  }
+  public void run() {
+    long i = 0L;
+    while (keepGoing) {
+      smi.PutOrNop(i);
+      i++;
+    }
+  }
+}
+
+class InfiniteWhileLoopWithSpecialConstOrIGet extends SimpleLoopThread {
+  private SpecialMethodInterface smi;
+  public InfiniteWhileLoopWithSpecialConstOrIGet(SpecialMethodInterface smi) {
+    this.smi = smi;
+  }
+  public void run() {
+    long i = 0L;
+    while (keepGoing) {
+      i += smi.ConstOrIGet();
+    }
+  }
+}
+
+class InfiniteWhileLoopWithIntrinsic extends SimpleLoopThread {
   private String[] strings = { "a", "b", "c", "d" };
   private int sum = 0;
   public void run() {
@@ -66,9 +145,6 @@
       sum += strings[i & 3].length();
     }
   }
-  public void stopNow() {
-    keepGoing = false;
-  }
 }
 
 class InfiniteDoWhileLoopWithLong extends Thread {
@@ -84,55 +160,37 @@
   }
 }
 
-class InfiniteWhileLoop extends Thread {
-  volatile private boolean keepGoing = true;
+class InfiniteWhileLoop extends SimpleLoopThread {
   public void run() {
     int i = 0;
     while (keepGoing) {
       i++;
     }
   }
-  public void stopNow() {
-    keepGoing = false;
-  }
 }
 
-class InfiniteDoWhileLoop extends Thread {
-  volatile private boolean keepGoing = true;
+class InfiniteDoWhileLoop extends SimpleLoopThread {
   public void run() {
     int i = 0;
     do {
       i++;
     } while (keepGoing);
   }
-  public void stopNow() {
-    keepGoing = false;
-  }
 }
 
-class InfiniteForLoop extends Thread {
-  int count = 100000;
-  volatile private boolean keepGoing = true;
+class InfiniteForLoop extends SimpleLoopThread {
   public void run() {
     int i = 0;
     for (int j = 0; keepGoing; j++) {
       i += j;
     }
   }
-  public void stopNow() {
-    keepGoing = false;
-  }
 }
 
-
-class MakeGarbage extends Thread {
-  volatile private boolean keepGoing = true;
+class MakeGarbage extends SimpleLoopThread {
   public void run() {
     while (keepGoing) {
       byte[] garbage = new byte[100000];
     }
   }
-  public void stopNow() {
-    keepGoing = false;
-  }
 }