Merge "Add a variable to disable PREBUILD in run-tests."
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index d0633af..3a8ea3f 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -142,6 +142,8 @@
   rwsp = rw31,
 
   // Aliases which are not defined in "ARM Architecture Reference, register names".
+  rxIP0 = rx16,
+  rxIP1 = rx17,
   rxSUSPEND = rx19,
   rxSELF = rx18,
   rxLR = rx30,
@@ -150,6 +152,8 @@
    * the 64-bit view. However, for now we'll define a 32-bit view to keep these from being
    * allocated as 32-bit temp registers.
    */
+  rwIP0 = rw16,
+  rwIP1 = rw17,
   rwSUSPEND = rw19,
   rwSELF = rw18,
   rwLR = rw30,
@@ -165,6 +169,10 @@
 
 constexpr RegStorage rs_xzr(RegStorage::kValid | rxzr);
 constexpr RegStorage rs_wzr(RegStorage::kValid | rwzr);
+constexpr RegStorage rs_xIP0(RegStorage::kValid | rxIP0);
+constexpr RegStorage rs_wIP0(RegStorage::kValid | rwIP0);
+constexpr RegStorage rs_xIP1(RegStorage::kValid | rxIP1);
+constexpr RegStorage rs_wIP1(RegStorage::kValid | rwIP1);
 // Reserved registers.
 constexpr RegStorage rs_xSUSPEND(RegStorage::kValid | rxSUSPEND);
 constexpr RegStorage rs_xSELF(RegStorage::kValid | rxSELF);
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 5e95500..e584548 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -319,8 +319,8 @@
   LockTemp(rs_x5);
   LockTemp(rs_x6);
   LockTemp(rs_x7);
-  LockTemp(rs_x8);
-  LockTemp(rs_x9);
+  LockTemp(rs_xIP0);
+  LockTemp(rs_xIP1);
 
   /*
    * We can safely skip the stack overflow check if we're
@@ -341,7 +341,7 @@
     if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks()) {
       if (!large_frame) {
         // Load stack limit
-        LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_x9);
+        LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_xIP1);
       }
     } else {
       // TODO(Arm64) Implement implicit checks.
@@ -386,10 +386,10 @@
           m2l_->OpRegImm(kOpAdd, rs_sp, sp_displace_);
           m2l_->ClobberCallerSave();
           ThreadOffset<8> func_offset = QUICK_ENTRYPOINT_OFFSET(8, pThrowStackOverflow);
-          m2l_->LockTemp(rs_x8);
-          m2l_->LoadWordDisp(rs_xSELF, func_offset.Int32Value(), rs_x8);
-          m2l_->NewLIR1(kA64Br1x, rs_x8.GetReg());
-          m2l_->FreeTemp(rs_x8);
+          m2l_->LockTemp(rs_xIP0);
+          m2l_->LoadWordDisp(rs_xSELF, func_offset.Int32Value(), rs_xIP0);
+          m2l_->NewLIR1(kA64Br1x, rs_xIP0.GetReg());
+          m2l_->FreeTemp(rs_xIP0);
         }
 
       private:
@@ -399,11 +399,11 @@
       if (large_frame) {
         // Compare Expected SP against bottom of stack.
         // Branch to throw target if there is not enough room.
-        OpRegRegImm(kOpSub, rs_x9, rs_sp, frame_size_without_spills);
-        LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_x8);
-        LIR* branch = OpCmpBranch(kCondUlt, rs_x9, rs_x8, nullptr);
+        OpRegRegImm(kOpSub, rs_xIP1, rs_sp, frame_size_without_spills);
+        LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_xIP0);
+        LIR* branch = OpCmpBranch(kCondUlt, rs_xIP1, rs_xIP0, nullptr);
         AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_size));
-        OpRegCopy(rs_sp, rs_x9);  // Establish stack after checks.
+        OpRegCopy(rs_sp, rs_xIP1);  // Establish stack after checks.
       } else {
         /*
          * If the frame is small enough we are guaranteed to have enough space that remains to
@@ -411,7 +411,7 @@
          * Establishes stack before checks.
          */
         OpRegRegImm(kOpSub, rs_sp, rs_sp, frame_size_without_spills);
-        LIR* branch = OpCmpBranch(kCondUlt, rs_sp, rs_x9, nullptr);
+        LIR* branch = OpCmpBranch(kCondUlt, rs_sp, rs_xIP1, nullptr);
         AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, frame_size_));
       }
     } else {
@@ -431,8 +431,8 @@
   FreeTemp(rs_x5);
   FreeTemp(rs_x6);
   FreeTemp(rs_x7);
-  FreeTemp(rs_x8);
-  FreeTemp(rs_x9);
+  FreeTemp(rs_xIP0);
+  FreeTemp(rs_xIP1);
 }
 
 void Arm64Mir2Lir::GenExitSequence() {
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index dec81cb..9b4546a 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -48,14 +48,12 @@
      rs_d8, rs_d9, rs_d10, rs_d11, rs_d12, rs_d13, rs_d14, rs_d15,
      rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23,
      rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31};
+// Note: we are not able to call to C function since rs_xSELF is a special register need to be
+// preserved but would be scratched by native functions follow aapcs64.
 static constexpr RegStorage reserved_regs_arr[] =
     {rs_wSUSPEND, rs_wSELF, rs_wsp, rs_wLR, rs_wzr};
 static constexpr RegStorage reserved64_regs_arr[] =
     {rs_xSUSPEND, rs_xSELF, rs_sp, rs_xLR, rs_xzr};
-// TUNING: Are there too many temp registers and too less promote target?
-// This definition need to be matched with runtime.cc, quick entry assembly and JNI compiler
-// Note: we are not able to call to C function directly if it un-match C ABI.
-// Currently, rs_rA64_SELF is not a callee save register which does not match C ABI.
 static constexpr RegStorage core_temps_arr[] =
     {rs_w0, rs_w1, rs_w2, rs_w3, rs_w4, rs_w5, rs_w6, rs_w7,
      rs_w8, rs_w9, rs_w10, rs_w11, rs_w12, rs_w13, rs_w14, rs_w15, rs_w16,
@@ -132,7 +130,7 @@
     case kRet0: res_reg = rs_w0; break;
     case kRet1: res_reg = rs_w1; break;
     case kInvokeTgt: res_reg = rs_wLR; break;
-    case kHiddenArg: res_reg = rs_w12; break;
+    case kHiddenArg: res_reg = rs_wIP1; break;
     case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
     case kCount: res_reg = RegStorage::InvalidReg(); break;
     default: res_reg = RegStorage::InvalidReg();
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index 0a00d7d..b95dad2 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -152,7 +152,8 @@
 Arm64JniCallingConvention::Arm64JniCallingConvention(bool is_static, bool is_synchronized,
                                                      const char* shorty)
     : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
-  callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X19));
+  // TODO: Ugly hard code...
+  // Should generate these according to the spill mask automatically.
   callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X20));
   callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X21));
   callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X22));
@@ -164,30 +165,28 @@
   callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X28));
   callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X29));
   callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X30));
-  callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D8));
-  callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D9));
-  callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D10));
-  callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D11));
-  callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D12));
-  callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D13));
-  callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D14));
-  callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D15));
 }
 
 uint32_t Arm64JniCallingConvention::CoreSpillMask() const {
   // Compute spill mask to agree with callee saves initialized in the constructor
-  uint32_t result = 0;
-  result =  1 << X19 | 1 << X20 | 1 << X21 | 1 << X22 | 1 << X23 | 1 << X24 |
-            1 << X25 | 1 << X26 | 1 << X27 | 1 << X28 | 1 << X29 | 1 << LR;
-  return result;
+  // Note: The native jni function may call to some VM runtime functions which may suspend
+  // or trigger GC. And the jni method frame will become top quick frame in those cases.
+  // So we need to satisfy GC to save LR and callee-save registers which is similar to
+  // CalleeSaveMethod(RefOnly) frame.
+  // Jni function is the native function which the java code wants to call.
+  // Jni method is the method that compiled by jni compiler.
+  // Call chain: managed code(java) --> jni method --> jni function.
+  // Thread register(X18, scratched by aapcs64) is not saved on stack, it is saved in ETR(X21).
+  // Suspend register(x19) is preserved by aapcs64 and it is not used in Jni method.
+  return 1 << X20 | 1 << X21 | 1 << X22 | 1 << X23 | 1 << X24 | 1 << X25 |
+         1 << X26 | 1 << X27 | 1 << X28 | 1 << X29 | 1 << LR;
 }
 
 uint32_t Arm64JniCallingConvention::FpSpillMask() const {
   // Compute spill mask to agree with callee saves initialized in the constructor
-  uint32_t result = 0;
-  result = 1 << D8 | 1 << D9 | 1 << D10 | 1 << D11 | 1 << D12 | 1 << D13 |
-           1 << D14 | 1 << D15;
-  return result;
+  // Note: All callee-save fp registers will be preserved by aapcs64. And they are not used
+  // in the jni method.
+  return 0;
 }
 
 ManagedRegister Arm64JniCallingConvention::ReturnScratchRegister() const {
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 3c6ad8f..5e1329e 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -35,7 +35,8 @@
       : stack_maps_(allocator, 10),
         dex_register_maps_(allocator, 10 * 4),
         inline_infos_(allocator, 2),
-        stack_mask_max_(-1) {}
+        stack_mask_max_(-1),
+        number_of_stack_maps_with_inline_info_(0) {}
 
   // Compute bytes needed to encode a mask with the given maximum element.
   static uint32_t StackMaskEncodingSize(int max_element) {
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 5b97ba0..3f90f21 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -626,7 +626,7 @@
 
   // Move ETR(Callee saved) back to TR(Caller saved) reg. We use ETR on calls
   // to external functions that might trash TR. We do not need the original
-  // X19 saved in BuildFrame().
+  // ETR(X21) saved in BuildFrame().
   ___ Mov(reg_x(TR), reg_x(ETR));
 
   ___ Blr(temp);
@@ -644,20 +644,43 @@
 
   // TODO: *create APCS FP - end of FP chain;
   //       *add support for saving a different set of callee regs.
-  // For now we check that the size of callee regs vector is 20
-  // equivalent to the APCS callee saved regs [X19, x30] [D8, D15].
-  CHECK_EQ(callee_save_regs.size(), kCalleeSavedRegsSize);
-  ___ PushCalleeSavedRegisters();
-
-  // Move TR(Caller saved) to ETR(Callee saved). The original X19 has been
-  // saved by PushCalleeSavedRegisters(). This way we make sure that TR is not
-  // trashed by native code.
-  ___ Mov(reg_x(ETR), reg_x(TR));
-
+  // For now we check that the size of callee regs vector is 11.
+  CHECK_EQ(callee_save_regs.size(), kJniRefSpillRegsSize);
   // Increase frame to required size - must be at least space to push StackReference<Method>.
-  CHECK_GT(frame_size, kCalleeSavedRegsSize * kFramePointerSize);
-  size_t adjust = frame_size - (kCalleeSavedRegsSize * kFramePointerSize);
-  IncreaseFrameSize(adjust);
+  CHECK_GT(frame_size, kJniRefSpillRegsSize * kFramePointerSize);
+  IncreaseFrameSize(frame_size);
+
+  // TODO: Ugly hard code...
+  // Should generate these according to the spill mask automatically.
+  // TUNING: Use stp.
+  // Note: Must match Arm64JniCallingConvention::CoreSpillMask().
+  size_t reg_offset = frame_size;
+  reg_offset -= 8;
+  StoreToOffset(LR, SP, reg_offset);
+  reg_offset -= 8;
+  StoreToOffset(X29, SP, reg_offset);
+  reg_offset -= 8;
+  StoreToOffset(X28, SP, reg_offset);
+  reg_offset -= 8;
+  StoreToOffset(X27, SP, reg_offset);
+  reg_offset -= 8;
+  StoreToOffset(X26, SP, reg_offset);
+  reg_offset -= 8;
+  StoreToOffset(X25, SP, reg_offset);
+  reg_offset -= 8;
+  StoreToOffset(X24, SP, reg_offset);
+  reg_offset -= 8;
+  StoreToOffset(X23, SP, reg_offset);
+  reg_offset -= 8;
+  StoreToOffset(X22, SP, reg_offset);
+  reg_offset -= 8;
+  StoreToOffset(X21, SP, reg_offset);
+  reg_offset -= 8;
+  StoreToOffset(X20, SP, reg_offset);
+
+  // Move TR(Caller saved) to ETR(Callee saved). The original (ETR)X21 has been saved on stack.
+  // This way we make sure that TR is not trashed by native code.
+  ___ Mov(reg_x(ETR), reg_x(TR));
 
   // Write StackReference<Method>.
   DCHECK_EQ(4U, sizeof(StackReference<mirror::ArtMethod>));
@@ -690,22 +713,46 @@
 void Arm64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
 
-  // For now we only check that the size of the frame is greater than the
-  // no of APCS callee saved regs [X19, X30] [D8, D15].
-  CHECK_EQ(callee_save_regs.size(), kCalleeSavedRegsSize);
-  CHECK_GT(frame_size, kCalleeSavedRegsSize * kFramePointerSize);
+  // For now we only check that the size of the frame is greater than the spill size.
+  CHECK_EQ(callee_save_regs.size(), kJniRefSpillRegsSize);
+  CHECK_GT(frame_size, kJniRefSpillRegsSize * kFramePointerSize);
 
-  // Decrease frame size to start of callee saved regs.
-  size_t adjust = frame_size - (kCalleeSavedRegsSize * kFramePointerSize);
-  DecreaseFrameSize(adjust);
-
-  // We move ETR (Callee Saved) back to TR (Caller Saved) which might have
-  // been trashed in the native call. The original X19 (ETR) is restored as
-  // part of PopCalleeSavedRegisters().
+  // We move ETR(aapcs64 callee saved) back to TR(aapcs64 caller saved) which might have
+  // been trashed in the native call. The original ETR(X21) is restored from stack.
   ___ Mov(reg_x(TR), reg_x(ETR));
 
+  // TODO: Ugly hard code...
+  // Should generate these according to the spill mask automatically.
+  // TUNING: Use ldp.
+  // Note: Must match Arm64JniCallingConvention::CoreSpillMask().
+  size_t reg_offset = frame_size;
+  reg_offset -= 8;
+  LoadFromOffset(LR, SP, reg_offset);
+  reg_offset -= 8;
+  LoadFromOffset(X29, SP, reg_offset);
+  reg_offset -= 8;
+  LoadFromOffset(X28, SP, reg_offset);
+  reg_offset -= 8;
+  LoadFromOffset(X27, SP, reg_offset);
+  reg_offset -= 8;
+  LoadFromOffset(X26, SP, reg_offset);
+  reg_offset -= 8;
+  LoadFromOffset(X25, SP, reg_offset);
+  reg_offset -= 8;
+  LoadFromOffset(X24, SP, reg_offset);
+  reg_offset -= 8;
+  LoadFromOffset(X23, SP, reg_offset);
+  reg_offset -= 8;
+  LoadFromOffset(X22, SP, reg_offset);
+  reg_offset -= 8;
+  LoadFromOffset(X21, SP, reg_offset);
+  reg_offset -= 8;
+  LoadFromOffset(X20, SP, reg_offset);
+
+  // Decrease frame size to start of callee saved regs.
+  DecreaseFrameSize(frame_size);
+
   // Pop callee saved and return to LR.
-  ___ PopCalleeSavedRegisters();
   ___ Ret();
 }
 
diff --git a/compiler/utils/arm64/constants_arm64.h b/compiler/utils/arm64/constants_arm64.h
index 2a08c95..0cbbb1e 100644
--- a/compiler/utils/arm64/constants_arm64.h
+++ b/compiler/utils/arm64/constants_arm64.h
@@ -29,12 +29,12 @@
 namespace art {
 namespace arm64 {
 
-constexpr unsigned int kCalleeSavedRegsSize = 20;
+constexpr unsigned int kJniRefSpillRegsSize = 11;
 
 // Vixl buffer size.
 constexpr size_t kBufferSizeArm64 = 4096*2;
 
-}  // arm64
-}  // art
+}  // namespace arm64
+}  // namespace art
 
 #endif  // ART_COMPILER_UTILS_ARM64_CONSTANTS_ARM64_H_
diff --git a/runtime/arch/arm/quick_method_frame_info_arm.h b/runtime/arch/arm/quick_method_frame_info_arm.h
index 83cacac..7595e94 100644
--- a/runtime/arch/arm/quick_method_frame_info_arm.h
+++ b/runtime/arch/arm/quick_method_frame_info_arm.h
@@ -63,6 +63,22 @@
                               ArmCalleeSaveFpSpills(type));
 }
 
+constexpr size_t ArmCalleeSaveFpr1Offset(Runtime::CalleeSaveType type) {
+  return ArmCalleeSaveFrameSize(type) -
+         (POPCOUNT(ArmCalleeSaveCoreSpills(type)) +
+          POPCOUNT(ArmCalleeSaveFpSpills(type))) * kArmPointerSize;
+}
+
+constexpr size_t ArmCalleeSaveGpr1Offset(Runtime::CalleeSaveType type) {
+  return ArmCalleeSaveFrameSize(type) -
+         POPCOUNT(ArmCalleeSaveCoreSpills(type)) * kArmPointerSize;
+}
+
+constexpr size_t ArmCalleeSaveLrOffset(Runtime::CalleeSaveType type) {
+  return ArmCalleeSaveFrameSize(type) -
+      POPCOUNT(ArmCalleeSaveCoreSpills(type) & (-(1 << LR))) * kArmPointerSize;
+}
+
 }  // namespace arm
 }  // namespace art
 
diff --git a/runtime/arch/arm64/asm_support_arm64.S b/runtime/arch/arm64/asm_support_arm64.S
index 55de1ec..be167fa 100644
--- a/runtime/arch/arm64/asm_support_arm64.S
+++ b/runtime/arch/arm64/asm_support_arm64.S
@@ -24,15 +24,22 @@
 // Register holding suspend check count down.
 // 32-bit is enough for the suspend register.
 #define wSUSPEND w19
+// xSUSPEND is 64-bit view of wSUSPEND.
+// Used to save/restore the register scratched by managed code.
+#define xSUSPEND x19
 // Register holding Thread::Current().
 #define xSELF x18
+// x18 is not preserved by aapcs64, save it on xETR(External Thread reg) for restore and later use.
+#define xETR x21
 // Frame Pointer
 #define xFP   x29
 // Link Register
 #define xLR   x30
 // Define the intraprocedural linkage temporary registers.
 #define xIP0 x16
+#define wIP0 w16
 #define xIP1 x17
+#define wIP1 w17
 
 
 .macro ENTRY name
diff --git a/runtime/arch/arm64/asm_support_arm64.h b/runtime/arch/arm64/asm_support_arm64.h
index f353408..7f0f56f 100644
--- a/runtime/arch/arm64/asm_support_arm64.h
+++ b/runtime/arch/arm64/asm_support_arm64.h
@@ -19,28 +19,26 @@
 
 #include "asm_support.h"
 
-// TODO Thread offsets need to be checked when on Aarch64.
-
 // Note: these callee save methods loads require read barriers.
-// Offset of field Runtime::callee_save_methods_[kSaveAll]
+// Offset of field Runtime::callee_save_methods_[kSaveAll] verified in InitCpu
 #define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 0
-// Offset of field Runtime::callee_save_methods_[kRefsOnly]
+// Offset of field Runtime::callee_save_methods_[kRefsOnly] verified in InitCpu
 #define RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET 8
-// Offset of field Runtime::callee_save_methods_[kRefsAndArgs]
+// Offset of field Runtime::callee_save_methods_[kRefsAndArgs] verified in InitCpu
 #define RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET 16
 
-// Offset of field Thread::suspend_count_ verified in InitCpu
+// Offset of field Thread::suspend_count_
 #define THREAD_FLAGS_OFFSET 0
-// Offset of field Thread::card_table_ verified in InitCpu
+// Offset of field Thread::card_table_
 #define THREAD_CARD_TABLE_OFFSET 112
-// Offset of field Thread::exception_ verified in InitCpu
+// Offset of field Thread::exception_
 #define THREAD_EXCEPTION_OFFSET 120
-// Offset of field Thread::thin_lock_thread_id_ verified in InitCpu
+// Offset of field Thread::thin_lock_thread_id_
 #define THREAD_ID_OFFSET 12
 
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 368
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 176
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 304
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 176
+#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 96
+#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 224
 
 // Expected size of a heap reference
 #define HEAP_REFERENCE_SIZE 4
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 2201b55..ba85d32 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -24,101 +24,22 @@
      * Runtime::CreateCalleeSaveMethod(kSaveAll)
      */
 .macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
-    adrp x9, :got:_ZN3art7Runtime9instance_E
-    ldr x9, [x9, #:got_lo12:_ZN3art7Runtime9instance_E]
+    adrp xIP0, :got:_ZN3art7Runtime9instance_E
+    ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
 
     // Our registers aren't intermixed - just spill in order.
-    ldr x9,[x9]  // x9 = & (art::Runtime * art::Runtime.instance_) .
+    ldr xIP0, [xIP0]  // xIP0 = & (art::Runtime * art::Runtime.instance_) .
 
-    // x9 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
+    // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
     THIS_LOAD_REQUIRES_READ_BARRIER
-    ldr x9, [x9, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET ]
-
-    sub sp, sp, #368
-    .cfi_adjust_cfa_offset 368
-
-    // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 368)
-#error "SAVE_ALL_CALLEE_SAVE_FRAME(ARM64) size not as expected."
-#endif
-
-    // FP args
-    stp d0, d1, [sp, #8]
-    stp d2, d3, [sp, #24]
-    stp d4, d5, [sp, #40]
-    stp d6, d7, [sp, #56]
-
-    // FP callee-saves
-    stp d8, d9,   [sp, #72]
-    stp d10, d11, [sp, #88]
-    stp d12, d13, [sp, #104]
-    stp d14, d15, [sp, #120]
-
-    stp d16, d17,   [sp, #136]
-    stp d18, d19,   [sp, #152]
-    stp d20, d21,   [sp, #168]
-    stp d22, d23,   [sp, #184]
-    stp d24, d25,   [sp, #200]
-    stp d26, d27,   [sp, #216]
-    stp d28, d29,   [sp, #232]
-    stp d30, d31,   [sp, #248]
-
-
-    // Callee saved.
-    stp xSELF, x19, [sp, #264]
-    .cfi_rel_offset x18, 264
-    .cfi_rel_offset x19, 272
-
-    stp x20, x21, [sp, #280]
-    .cfi_rel_offset x20, 280
-    .cfi_rel_offset x21, 288
-
-    stp x22, x23, [sp, #296]
-    .cfi_rel_offset x22, 296
-    .cfi_rel_offset x23, 304
-
-    stp x24, x25, [sp, #312]
-    .cfi_rel_offset x24, 312
-    .cfi_rel_offset x25, 320
-
-    stp x26, x27, [sp, #328]
-    .cfi_rel_offset x26, 328
-    .cfi_rel_offset x27, 336
-
-    stp x28, xFP, [sp, #344]    // Save FP.
-    .cfi_rel_offset x28, 344
-    .cfi_rel_offset x29, 352
-
-    str xLR, [sp, #360]
-    .cfi_rel_offset x30, 360
-
-    // Loads appropriate callee-save-method
-    str x9, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs]
-
-.endm
-
-    /*
-     * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsOnly).
-     */
-// WIP.
-.macro SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-    adrp x9, :got:_ZN3art7Runtime9instance_E
-    ldr x9, [x9, #:got_lo12:_ZN3art7Runtime9instance_E]
-
-    // Our registers aren't intermixed - just spill in order.
-    ldr x9,[x9]  // x9 = & (art::Runtime * art::Runtime.instance_) .
-
-    // x9 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
-    THIS_LOAD_REQUIRES_READ_BARRIER
-    ldr x9, [x9, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET ]
+    ldr xIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET ]
 
     sub sp, sp, #176
     .cfi_adjust_cfa_offset 176
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 176)
-#error "REFS_ONLY_CALLEE_SAVE_FRAME(ARM64) size not as expected."
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 176)
+#error "SAVE_ALL_CALLEE_SAVE_FRAME(ARM64) size not as expected."
 #endif
 
     // FP callee-saves
@@ -127,11 +48,12 @@
     stp d12, d13, [sp, #40]
     stp d14, d15, [sp, #56]
 
-    // Callee saved.
-    stp xSELF, x19, [sp, #72]
+    // Reserved registers
+    stp xSELF, xSUSPEND, [sp, #72]
     .cfi_rel_offset x18, 72
     .cfi_rel_offset x19, 80
 
+    // callee-saves
     stp x20, x21, [sp, #88]
     .cfi_rel_offset x20, 88
     .cfi_rel_offset x21, 96
@@ -148,7 +70,7 @@
     .cfi_rel_offset x26, 136
     .cfi_rel_offset x27, 144
 
-    stp x28, xFP, [sp, #152]    // Save FP.
+    stp x28, x29, [sp, #152]
     .cfi_rel_offset x28, 152
     .cfi_rel_offset x29, 160
 
@@ -156,51 +78,107 @@
     .cfi_rel_offset x30, 168
 
     // Loads appropriate callee-save-method
-    str x9, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs]
+    str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs]
 .endm
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kRefsOnly).
+     */
+.macro SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+    adrp xIP0, :got:_ZN3art7Runtime9instance_E
+    ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
+
+    // Our registers aren't intermixed - just spill in order.
+    ldr xIP0, [xIP0]  // xIP0 = & (art::Runtime * art::Runtime.instance_) .
+
+    // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
+    THIS_LOAD_REQUIRES_READ_BARRIER
+    ldr xIP0, [xIP0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET ]
+
+    sub sp, sp, #96
+    .cfi_adjust_cfa_offset 96
+
+    // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 96)
+#error "REFS_ONLY_CALLEE_SAVE_FRAME(ARM64) size not as expected."
+#endif
+
+    // Callee-saves
+    stp x20, x21,  [sp, #8]
+    .cfi_rel_offset x20, 8
+    .cfi_rel_offset x21, 16
+
+    stp x22, x23, [sp, #24]
+    .cfi_rel_offset x22, 24
+    .cfi_rel_offset x23, 32
+
+    stp x24, x25, [sp, #40]
+    .cfi_rel_offset x24, 40
+    .cfi_rel_offset x25, 48
+
+    stp x26, x27, [sp, #56]
+    .cfi_rel_offset x26, 56
+    .cfi_rel_offset x27, 64
+
+    stp x28, x29, [sp, #72]
+    .cfi_rel_offset x28, 72
+    .cfi_rel_offset x29, 80
+
+    // LR
+    str xLR, [sp, #88]
+    .cfi_rel_offset x30, 88
+
+    // Save xSELF to xETR.
+    mov xETR, xSELF
+
+    // Loads appropriate callee-save-method
+    str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs]
+.endm
+
+// TODO: Probably no need to restore registers preserved by aapcs64.
 .macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    // FP callee saves
-    ldp d8, d9,   [sp, #8]
-    ldp d10, d11, [sp, #24]
-    ldp d12, d13, [sp, #40]
-    ldp d14, d15, [sp, #56]
+    // Restore xSELF.
+    mov xSELF, xETR
 
-    // Callee saved.
-    ldp xSELF, x19, [sp, #72]
-    .cfi_restore x18
-    .cfi_restore x19
-
-    ldp x20, x21, [sp, #88]
+    // Callee-saves
+    ldp x20, x21,  [sp, #8]
     .cfi_restore x20
     .cfi_restore x21
 
-    ldp x22, x23, [sp, #104]
+    ldp x22, x23, [sp, #24]
     .cfi_restore x22
     .cfi_restore x23
 
-    ldp x24, x25, [sp, #120]
+    ldp x24, x25, [sp, #40]
     .cfi_restore x24
     .cfi_restore x25
 
-    ldp x26, x27, [sp, #136]
+    ldp x26, x27, [sp, #56]
     .cfi_restore x26
     .cfi_restore x27
 
-    ldp x28, xFP, [sp, #152]    // Save FP.
+    ldp x28, x29, [sp, #72]
     .cfi_restore x28
     .cfi_restore x29
 
-    ldr xLR, [sp, #168]
+    // LR
+    ldr xLR, [sp, #88]
     .cfi_restore x30
 
-    add sp, sp, #176
-    .cfi_adjust_cfa_offset -176
+    add sp, sp, #96
+    .cfi_adjust_cfa_offset -96
 .endm
 
 .macro POP_REF_ONLY_CALLEE_SAVE_FRAME
-    add sp, sp, #176
-    .cfi_adjust_cfa_offset -176
+    // Restore xSELF as it might be scratched.
+    mov xSELF, xETR
+    // ETR
+    ldr xETR, [sp, #16]
+    .cfi_restore x21
+
+    add sp, sp, #96
+    .cfi_adjust_cfa_offset -96
 .endm
 
 .macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
@@ -210,62 +188,61 @@
 
 
 .macro SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
-    sub sp, sp, #304
-    .cfi_adjust_cfa_offset 304
+    sub sp, sp, #224
+    .cfi_adjust_cfa_offset 224
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 304)
+#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 224)
 #error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM64) size not as expected."
 #endif
 
-    stp d0, d1,   [sp, #16]
-    stp d2, d3,   [sp, #32]
-    stp d4, d5,   [sp, #48]
-    stp d6, d7,   [sp, #64]
-    stp d8, d9,   [sp, #80]
-    stp d10, d11, [sp, #96]
-    stp d12, d13, [sp, #112]
-    stp d14, d15, [sp, #128]
+    // FP args
+    stp d0, d1, [sp, #16]
+    stp d2, d3, [sp, #32]
+    stp d4, d5, [sp, #48]
+    stp d6, d7, [sp, #64]
 
-    stp x1,  x2, [sp, #144]
-    .cfi_rel_offset x1, 144
-    .cfi_rel_offset x2, 152
+    // args and x20(callee-save)
+    stp x1,  x2, [sp, #80]
+    .cfi_rel_offset x1, 80
+    .cfi_rel_offset x2, 88
 
-    stp x3,  x4, [sp, #160]
-    .cfi_rel_offset x3, 160
-    .cfi_rel_offset x4, 168
+    stp x3,  x4, [sp, #96]
+    .cfi_rel_offset x3, 96
+    .cfi_rel_offset x4, 104
 
-    stp x5,  x6, [sp, #176]
-    .cfi_rel_offset x5, 176
-    .cfi_rel_offset x6, 184
+    stp x5,  x6, [sp, #112]
+    .cfi_rel_offset x5, 112
+    .cfi_rel_offset x6, 120
 
-    stp x7,  xSELF, [sp, #192]
-    .cfi_rel_offset x7, 192
-    .cfi_rel_offset x18, 200
+    stp x7, x20, [sp, #128]
+    .cfi_rel_offset x7, 128
+    .cfi_rel_offset x20, 136
 
-    stp x19, x20, [sp, #208]
-    .cfi_rel_offset x19, 208
-    .cfi_rel_offset x20, 216
+    // Callee-saves.
+    stp x21, x22, [sp, #144]
+    .cfi_rel_offset x21, 144
+    .cfi_rel_offset x22, 152
 
-    stp x21, x22, [sp, #224]
-    .cfi_rel_offset x21, 224
-    .cfi_rel_offset x22, 232
+    stp x23, x24, [sp, #160]
+    .cfi_rel_offset x23, 160
+    .cfi_rel_offset x24, 168
 
-    stp x23, x24, [sp, #240]
-    .cfi_rel_offset x23, 240
-    .cfi_rel_offset x24, 248
+    stp x25, x26, [sp, #176]
+    .cfi_rel_offset x25, 176
+    .cfi_rel_offset x26, 184
 
-    stp x25, x26, [sp, #256]
-    .cfi_rel_offset x25, 256
-    .cfi_rel_offset x26, 264
+    stp x27, x28, [sp, #192]
+    .cfi_rel_offset x27, 192
+    .cfi_rel_offset x28, 200
 
-    stp x27, x28, [sp, #272]
-    .cfi_rel_offset x27, 272
-    .cfi_rel_offset x28, 280
+    // x29(callee-save) and LR
+    stp x29, xLR, [sp, #208]
+    .cfi_rel_offset x29, 208
+    .cfi_rel_offset x30, 216
 
-    stp xFP, xLR, [sp, #288]
-    .cfi_rel_offset x29, 288
-    .cfi_rel_offset x30, 296
+    // Save xSELF to xETR.
+    mov xETR, xSELF
 .endm
 
     /*
@@ -275,75 +252,73 @@
      * TODO This is probably too conservative - saving FP & LR.
      */
 .macro SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
-    adrp x9, :got:_ZN3art7Runtime9instance_E
-    ldr x9, [x9, #:got_lo12:_ZN3art7Runtime9instance_E]
+    adrp xIP0, :got:_ZN3art7Runtime9instance_E
+    ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
 
     // Our registers aren't intermixed - just spill in order.
-    ldr x9,[x9]  // x9 = & (art::Runtime * art::Runtime.instance_) .
+    ldr xIP0, [xIP0]  // xIP0 = & (art::Runtime * art::Runtime.instance_) .
 
-    // x9 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
+    // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
     THIS_LOAD_REQUIRES_READ_BARRIER
-    ldr x9, [x9, RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET ]
+    ldr xIP0, [xIP0, RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET ]
 
     SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
 
-    str x9, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs]
+    str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs]
 .endm
 
+// TODO: Probably no need to restore registers preserved by aapcs64.
 .macro RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    // Restore xSELF.
+    mov xSELF, xETR
 
-    ldp d0, d1,   [sp, #16]
-    ldp d2, d3,   [sp, #32]
-    ldp d4, d5,   [sp, #48]
-    ldp d6, d7,   [sp, #64]
-    ldp d8, d9,   [sp, #80]
-    ldp d10, d11, [sp, #96]
-    ldp d12, d13, [sp, #112]
-    ldp d14, d15, [sp, #128]
+    // FP args
+    ldp d0, d1, [sp, #16]
+    ldp d2, d3, [sp, #32]
+    ldp d4, d5, [sp, #48]
+    ldp d6, d7, [sp, #64]
 
-    // args.
-    ldp x1,  x2, [sp, #144]
+    // args and x20(callee-save)
+    ldp x1,  x2, [sp, #80]
     .cfi_restore x1
     .cfi_restore x2
 
-    ldp x3,  x4, [sp, #160]
+    ldp x3,  x4, [sp, #96]
     .cfi_restore x3
     .cfi_restore x4
 
-    ldp x5,  x6, [sp, #176]
+    ldp x5,  x6, [sp, #112]
     .cfi_restore x5
     .cfi_restore x6
 
-    ldp x7,  xSELF, [sp, #192]
+    ldp x7, x20, [sp, #128]
     .cfi_restore x7
-    .cfi_restore x18
-
-    ldp x19, x20, [sp, #208]
-    .cfi_restore x19
     .cfi_restore x20
 
-    ldp x21, x22, [sp, #224]
+    // Callee-saves.
+    ldp x21, x22, [sp, #144]
     .cfi_restore x21
     .cfi_restore x22
 
-    ldp x23, x24, [sp, #240]
+    ldp x23, x24, [sp, #160]
     .cfi_restore x23
     .cfi_restore x24
 
-    ldp x25, x26, [sp, #256]
+    ldp x25, x26, [sp, #176]
     .cfi_restore x25
     .cfi_restore x26
 
-    ldp x27, x28, [sp, #272]
+    ldp x27, x28, [sp, #192]
     .cfi_restore x27
     .cfi_restore x28
 
-    ldp xFP, xLR, [sp, #288]
+    // x29(callee-save) and LR
+    ldp x29, xLR, [sp, #208]
     .cfi_restore x29
     .cfi_restore x30
 
-    add sp, sp, #304
-    .cfi_adjust_cfa_offset -304
+    add sp, sp, #224
+    .cfi_adjust_cfa_offset -224
 .endm
 
 .macro RETURN_IF_RESULT_IS_ZERO
@@ -381,7 +356,7 @@
 .endm
 
 .macro RETURN_OR_DELIVER_PENDING_EXCEPTION
-    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG x9
+    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG xIP0
 .endm
 
 // Same as above with x1. This is helpful in stubs that want to avoid clobbering another register.
@@ -400,7 +375,7 @@
     .extern \cxx_name
 ENTRY \c_name
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
-    mov x0, xSELF                        // pass Thread::Current
+    mov x0, xSELF                     // pass Thread::Current
     mov x1, sp                        // pass SP
     b   \cxx_name                     // \cxx_name(Thread*, SP)
 END \c_name
@@ -410,7 +385,7 @@
     .extern \cxx_name
 ENTRY \c_name
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context.
-    mov x1, xSELF                       // pass Thread::Current.
+    mov x1, xSELF                     // pass Thread::Current.
     mov x2, sp                        // pass SP.
     b   \cxx_name                     // \cxx_name(arg, Thread*, SP).
     brk 0
@@ -421,7 +396,7 @@
     .extern \cxx_name
 ENTRY \c_name
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
-    mov x2, xSELF                       // pass Thread::Current
+    mov x2, xSELF                     // pass Thread::Current
     mov x3, sp                        // pass SP
     b   \cxx_name                     // \cxx_name(arg1, arg2, Thread*, SP)
     brk 0
@@ -478,7 +453,7 @@
      *
      * Adapted from ARM32 code.
      *
-     * Clobbers x12.
+     * Clobbers xIP0.
      */
 .macro INVOKE_TRAMPOLINE c_name, cxx_name
     .extern \cxx_name
@@ -491,10 +466,10 @@
     mov    x3, xSELF                      // pass Thread::Current
     mov    x4, sp
     bl     \cxx_name                      // (method_idx, this, caller, Thread*, SP)
-    mov    x12, x1                         // save Method*->code_
+    mov    xIP0, x1                       // save Method*->code_
     RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
     cbz    x0, 1f                         // did we find the target? if not go to exception delivery
-    br     x12                             // tail call to target
+    br     xIP0                           // tail call to target
 1:
     DELIVER_PENDING_EXCEPTION
 END \c_name
@@ -511,7 +486,7 @@
 
 .macro INVOKE_STUB_CREATE_FRAME
 
-SAVE_SIZE=6*8   // x4, x5, x19(wSUSPEND), SP, LR & FP saved.
+SAVE_SIZE=6*8   // x4, x5, xSUSPEND, SP, LR & FP saved.
 SAVE_SIZE_AND_METHOD=SAVE_SIZE+STACK_REFERENCE_SIZE
 
 
@@ -527,7 +502,7 @@
     .cfi_def_cfa_register x10              // before this.
     .cfi_adjust_cfa_offset SAVE_SIZE
 
-    stp x9, x19, [x10, #32]                // Save old stack pointer and x19(wSUSPEND)
+    stp x9, xSUSPEND, [x10, #32]           // Save old stack pointer and xSUSPEND
     .cfi_rel_offset sp, 32
     .cfi_rel_offset x19, 40
 
@@ -608,7 +583,7 @@
     str x0, [x4]
 
 .Lexit_art_quick_invoke_stub\@:
-    ldp x2, x19, [xFP, #32]   // Restore stack pointer and x19.
+    ldp x2, xSUSPEND, [xFP, #32]   // Restore stack pointer and xSUSPEND.
     .cfi_restore x19
     mov sp, x2
     .cfi_restore sp
@@ -636,6 +611,7 @@
  *  |       FP''           | <- SP'
  *  +----------------------+
  *  +----------------------+
+ *  |        x19           | <- Used as wSUSPEND, won't be restored by managed code.
  *  |        SP'           |
  *  |        X5            |
  *  |        X4            |        Saved registers
@@ -1241,8 +1217,6 @@
 .endm
 
 // Macros taking opportunity of code similarities for downcalls with referrer.
-
-// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
 .macro ONE_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
@@ -1256,7 +1230,6 @@
 END \name
 .endm
 
-// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
 .macro TWO_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
@@ -1270,7 +1243,6 @@
 END \name
 .endm
 
-// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
 .macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
@@ -1363,8 +1335,8 @@
     mov     x2, xSELF                   // pass Thread::Current
     mov     x3, sp                      // pass SP
     bl      artQuickProxyInvokeHandler  // (Method* proxy method, receiver, Thread*, SP)
-    ldr     xSELF, [sp, #200]           // Restore self pointer.
-    ldr     x2, [xSELF, THREAD_EXCEPTION_OFFSET]
+    // Use xETR as xSELF might be scratched by native function above.
+    ldr     x2, [xETR, THREAD_EXCEPTION_OFFSET]
     cbnz    x2, .Lexception_in_proxy    // success if no exception is pending
     RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME // Restore frame
     fmov    d0, x0                      // Store result in d0 in case it was float or double
@@ -1375,14 +1347,14 @@
 END art_quick_proxy_invoke_handler
 
     /*
-     * Called to resolve an imt conflict. x12 is a hidden argument that holds the target method's
+     * Called to resolve an imt conflict. xIP1 is a hidden argument that holds the target method's
      * dex method index.
      */
 ENTRY art_quick_imt_conflict_trampoline
     ldr    w0, [sp, #0]                                // load caller Method*
     ldr    w0, [x0, #METHOD_DEX_CACHE_METHODS_OFFSET]  // load dex_cache_resolved_methods
     add    x0, x0, #OBJECT_ARRAY_DATA_OFFSET           // get starting address of data
-    ldr    w0, [x0, x12, lsl 2]                        // load the target method
+    ldr    w0, [x0, xIP1, lsl 2]                       // load the target method
     b art_quick_invoke_interface_trampoline
 END art_quick_imt_conflict_trampoline
 
@@ -1392,10 +1364,10 @@
     mov x3, sp
     bl artQuickResolutionTrampoline  // (called, receiver, Thread*, SP)
     cbz x0, 1f
-    mov x9, x0              // Remember returned code pointer in x9.
+    mov xIP0, x0            // Remember returned code pointer in xIP0.
     ldr w0, [sp, #0]        // artQuickResolutionTrampoline puts called method in *SP.
     RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
-    br x9
+    br xIP0
 1:
     RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
     DELIVER_PENDING_EXCEPTION
@@ -1419,7 +1391,6 @@
  * | X22               |    callee save
  * | X21               |    callee save
  * | X20               |    callee save
- * | X19               |    callee save
  * | X7                |    arg7
  * | X6                |    arg6
  * | X5                |    arg5
@@ -1427,14 +1398,6 @@
  * | X3                |    arg3
  * | X2                |    arg2
  * | X1                |    arg1
- * | D15               |    float arg 8
- * | D14               |    float arg 8
- * | D13               |    float arg 8
- * | D12               |    callee save
- * | D11               |    callee save
- * | D10               |    callee save
- * | D9                |    callee save
- * | D8                |    callee save
  * | D7                |    float arg 8
  * | D6                |    float arg 7
  * | D5                |    float arg 6
@@ -1476,8 +1439,8 @@
     // of the frame when the handle scope is inserted.
     mov xFP, sp
 
-    mov x8, #5120
-    sub sp, sp, x8
+    mov xIP0, #5120
+    sub sp, sp, xIP0
 
     // prepare for artQuickGenericJniTrampoline call
     // (Thread*,  SP)
@@ -1517,17 +1480,14 @@
 
     add sp, sp, #128
 
-    blr xIP0           // native call.
-
-    // Restore self pointer.
-    ldr xSELF, [x28, #200]
+    blr xIP0        // native call.
 
     // result sign extension is handled in C code
     // prepare for artQuickGenericJniEndTrampoline call
     // (Thread*, result, result_f)
     //    x0       x1       x2        <= C calling convention
     mov x1, x0      // Result (from saved)
-    mov x0, xSELF   // Thread register
+    mov x0, xETR    // Thread register, original xSELF might be scratched by native code.
     fmov x2, d0     // d0 will contain floating point result, but needs to go into x2
 
     bl artQuickGenericJniEndTrampoline
@@ -1536,11 +1496,9 @@
     mov sp, x28
     .cfi_def_cfa_register sp
 
-    // Restore self pointer.
-    ldr xSELF, [x28, #200]
-
     // Pending exceptions possible.
-    ldr x1, [xSELF, THREAD_EXCEPTION_OFFSET]
+    // Use xETR as xSELF might be scratched by native code
+    ldr x1, [xETR, THREAD_EXCEPTION_OFFSET]
     cbnz x1, .Lexception_in_native
 
     // Tear down the callee-save frame.
@@ -1553,7 +1511,6 @@
 .Lentry_error:
     mov sp, x28
     .cfi_def_cfa_register sp
-    ldr xSELF, [x28, #200]
 .Lexception_in_native:
     RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
     DELIVER_PENDING_EXCEPTION
@@ -1592,19 +1549,19 @@
 ENTRY art_quick_instrumentation_entry
     SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
 
-    mov   x19, x0             // Preserve method reference in a callee-save.
+    mov   x20, x0             // Preserve method reference in a callee-save.
 
     mov   x2, xSELF
     mov   x3, sp
     mov   x4, xLR
     bl    artInstrumentationMethodEntryFromCode  // (Method*, Object*, Thread*, SP, LR)
 
-    mov   x9, x0              // x0 = result of call.
-    mov   x0, x19             // Reload method reference.
+    mov   xIP0, x0            // x0 = result of call.
+    mov   x0, x20             // Reload method reference.
 
     RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME  // Note: will restore xSELF
     adr   xLR, art_quick_instrumentation_exit
-    br    x9                 // Tail-call method with lr set to art_quick_instrumentation_exit.
+    br    xIP0                // Tail-call method with lr set to art_quick_instrumentation_exit.
 END art_quick_instrumentation_entry
 
     .extern artInstrumentationMethodExitFromCode
@@ -1627,18 +1584,16 @@
     mov   x0, xSELF           // Pass Thread.
     bl   artInstrumentationMethodExitFromCode    // (Thread*, SP, gpr_res, fpr_res)
 
-    mov   x9, x0              // Return address from instrumentation call.
+    mov   xIP0, x0            // Return address from instrumentation call.
     mov   xLR, x1             // r1 is holding link register if we're to bounce to deoptimize
 
     ldr   d0, [sp, #8]        // Restore floating-point result.
     ldr   x0, [sp], 16        // Restore integer result, and drop stack area.
     .cfi_adjust_cfa_offset 16
 
-    // Need to restore x18.
-    ldr   xSELF, [sp, #72]
     POP_REF_ONLY_CALLEE_SAVE_FRAME
 
-    br    x9                  // Tail-call out.
+    br    xIP0                // Tail-call out.
 END art_quick_instrumentation_exit
 
     /*
@@ -1703,15 +1658,15 @@
 .Lindexof_loop4:
     ldrh  w6, [x0, #2]!
     ldrh  w7, [x0, #2]!
-    ldrh  w8, [x0, #2]!
-    ldrh  w9, [x0, #2]!
+    ldrh  wIP0, [x0, #2]!
+    ldrh  wIP1, [x0, #2]!
     cmp   w6, w1
     b.eq  .Lmatch_0
     cmp   w7, w1
     b.eq  .Lmatch_1
-    cmp   w8, w1
+    cmp   wIP0, w1
     b.eq  .Lmatch_2
-    cmp   w9, w1
+    cmp   wIP1, w1
     b.eq  .Lmatch_3
     subs  w2, w2, #4
     b.ge  .Lindexof_loop4
@@ -1855,17 +1810,17 @@
     ret
 
 .Ldo_memcmp16:
-    mov x14, x0                  // Save x0 and LR. __memcmp16 does not use these temps.
-    mov x15, xLR                 //                 TODO: Codify and check that?
+    mov xIP0, x0                  // Save x0 and LR. __memcmp16 does not use these temps.
+    mov xIP1, xLR                 //                 TODO: Codify and check that?
 
     mov x0, x2
     uxtw x2, w3
     bl __memcmp16
 
-    mov xLR, x15                 // Restore LR.
+    mov xLR, xIP1                 // Restore LR.
 
     cmp x0, #0                   // Check the memcmp difference.
-    csel x0, x0, x14, ne         // x0 := x0 != 0 ? x14(prev x0=length diff) : x1.
+    csel x0, x0, xIP0, ne         // x0 := x0 != 0 ? xIP0(prev x0=length diff) : x1.
     ret
 END art_quick_string_compareto
 
diff --git a/runtime/arch/arm64/quick_method_frame_info_arm64.h b/runtime/arch/arm64/quick_method_frame_info_arm64.h
index cb830ac..15c6c07 100644
--- a/runtime/arch/arm64/quick_method_frame_info_arm64.h
+++ b/runtime/arch/arm64/quick_method_frame_info_arm64.h
@@ -20,53 +20,53 @@
 #include "quick/quick_method_frame_info.h"
 #include "registers_arm64.h"
 #include "runtime.h"  // for Runtime::CalleeSaveType.
+#include "utils.h"  // for POPCOUNT
 
 namespace art {
 namespace arm64 {
 
+// Registers need to be restored but not preserved by aapcs64.
+static constexpr uint32_t kArm64CalleeSaveAlwaysSpills =
+    // Note: ArtMethod::GetReturnPcOffsetInBytes() rely on the assumption that
+    // LR is always saved on the top of the frame for all targets.
+    // That is, lr = *(sp + framesize - pointsize).
+    (1 << art::arm64::LR);
 // Callee saved registers
 static constexpr uint32_t kArm64CalleeSaveRefSpills =
-    (1 << art::arm64::X19) | (1 << art::arm64::X20) | (1 << art::arm64::X21) |
-    (1 << art::arm64::X22) | (1 << art::arm64::X23) | (1 << art::arm64::X24) |
-    (1 << art::arm64::X25) | (1 << art::arm64::X26) | (1 << art::arm64::X27) |
-    (1 << art::arm64::X28);
+    (1 << art::arm64::X20) | (1 << art::arm64::X21) | (1 << art::arm64::X22) |
+    (1 << art::arm64::X23) | (1 << art::arm64::X24) | (1 << art::arm64::X25) |
+    (1 << art::arm64::X26) | (1 << art::arm64::X27) | (1 << art::arm64::X28) |
+    (1 << art::arm64::X29);
 // X0 is the method pointer. Not saved.
 static constexpr uint32_t kArm64CalleeSaveArgSpills =
     (1 << art::arm64::X1) | (1 << art::arm64::X2) | (1 << art::arm64::X3) |
     (1 << art::arm64::X4) | (1 << art::arm64::X5) | (1 << art::arm64::X6) |
     (1 << art::arm64::X7);
-// TODO  This is conservative. Only ALL should include the thread register.
-// The thread register is not preserved by the aapcs64.
-// LR is always saved.
-static constexpr uint32_t kArm64CalleeSaveAllSpills =  0;  // (1 << art::arm64::LR);
+static constexpr uint32_t kArm64CalleeSaveAllSpills =
+    // Thread register.
+    (1 << art::arm64::X18) |
+    // Suspend register.
+    1 << art::arm64::X19;
 
-// Save callee-saved floating point registers. Rest are scratch/parameters.
+static constexpr uint32_t kArm64CalleeSaveFpAlwaysSpills = 0;
+static constexpr uint32_t kArm64CalleeSaveFpRefSpills = 0;
 static constexpr uint32_t kArm64CalleeSaveFpArgSpills =
     (1 << art::arm64::D0) | (1 << art::arm64::D1) | (1 << art::arm64::D2) |
     (1 << art::arm64::D3) | (1 << art::arm64::D4) | (1 << art::arm64::D5) |
     (1 << art::arm64::D6) | (1 << art::arm64::D7);
-static constexpr uint32_t kArm64CalleeSaveFpRefSpills =
+static constexpr uint32_t kArm64FpAllSpills =
     (1 << art::arm64::D8)  | (1 << art::arm64::D9)  | (1 << art::arm64::D10) |
     (1 << art::arm64::D11)  | (1 << art::arm64::D12)  | (1 << art::arm64::D13) |
     (1 << art::arm64::D14)  | (1 << art::arm64::D15);
-static constexpr uint32_t kArm64FpAllSpills =
-    kArm64CalleeSaveFpArgSpills |
-    (1 << art::arm64::D16)  | (1 << art::arm64::D17) | (1 << art::arm64::D18) |
-    (1 << art::arm64::D19)  | (1 << art::arm64::D20) | (1 << art::arm64::D21) |
-    (1 << art::arm64::D22)  | (1 << art::arm64::D23) | (1 << art::arm64::D24) |
-    (1 << art::arm64::D25)  | (1 << art::arm64::D26) | (1 << art::arm64::D27) |
-    (1 << art::arm64::D28)  | (1 << art::arm64::D29) | (1 << art::arm64::D30) |
-    (1 << art::arm64::D31);
 
 constexpr uint32_t Arm64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
-  return kArm64CalleeSaveRefSpills |
+  return kArm64CalleeSaveAlwaysSpills | kArm64CalleeSaveRefSpills |
       (type == Runtime::kRefsAndArgs ? kArm64CalleeSaveArgSpills : 0) |
-      (type == Runtime::kSaveAll ? kArm64CalleeSaveAllSpills : 0) | (1 << art::arm64::FP) |
-      (1 << art::arm64::X18) | (1 << art::arm64::LR);
+      (type == Runtime::kSaveAll ? kArm64CalleeSaveAllSpills : 0);
 }
 
 constexpr uint32_t Arm64CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
-  return kArm64CalleeSaveFpRefSpills |
+  return kArm64CalleeSaveFpAlwaysSpills | kArm64CalleeSaveFpRefSpills |
       (type == Runtime::kRefsAndArgs ? kArm64CalleeSaveFpArgSpills: 0) |
       (type == Runtime::kSaveAll ? kArm64FpAllSpills : 0);
 }
@@ -83,6 +83,22 @@
                               Arm64CalleeSaveFpSpills(type));
 }
 
+constexpr size_t Arm64CalleeSaveFpr1Offset(Runtime::CalleeSaveType type) {
+  return Arm64CalleeSaveFrameSize(type) -
+         (POPCOUNT(Arm64CalleeSaveCoreSpills(type)) +
+          POPCOUNT(Arm64CalleeSaveFpSpills(type))) * kArm64PointerSize;
+}
+
+constexpr size_t Arm64CalleeSaveGpr1Offset(Runtime::CalleeSaveType type) {
+  return Arm64CalleeSaveFrameSize(type) -
+         POPCOUNT(Arm64CalleeSaveCoreSpills(type)) * kArm64PointerSize;
+}
+
+constexpr size_t Arm64CalleeSaveLrOffset(Runtime::CalleeSaveType type) {
+  return Arm64CalleeSaveFrameSize(type) -
+      POPCOUNT(Arm64CalleeSaveCoreSpills(type) & (-(1 << LR))) * kArm64PointerSize;
+}
+
 }  // namespace arm64
 }  // namespace art
 
diff --git a/runtime/arch/arm64/registers_arm64.h b/runtime/arch/arm64/registers_arm64.h
index ea346e0..9ccab70 100644
--- a/runtime/arch/arm64/registers_arm64.h
+++ b/runtime/arch/arm64/registers_arm64.h
@@ -57,7 +57,7 @@
   X30 = 30,
   X31 = 31,
   TR  = 18,     // ART Thread Register - Managed Runtime (Caller Saved Reg)
-  ETR = 19,     // ART Thread Register - External Calls  (Callee Saved Reg)
+  ETR = 21,     // ART Thread Register - External Calls  (Callee Saved Reg)
   IP0 = 16,     // Used as scratch by VIXL.
   IP1 = 17,     // Used as scratch by ART JNI Assembler.
   FP  = 29,
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 5599c21..3e8dc23 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -3501,14 +3501,19 @@
       proxy_class->GetDirectMethods();
   CHECK_EQ(proxy_direct_methods->GetLength(), 16);
   mirror::ArtMethod* proxy_constructor = proxy_direct_methods->Get(2);
-  // Clone the existing constructor of Proxy (our constructor would just invoke it so steal its
-  // code_ too)
-  mirror::ArtMethod* constructor =
-      down_cast<mirror::ArtMethod*>(proxy_constructor->Clone(self));
-  if (constructor == NULL) {
+  mirror::ArtMethod* constructor = down_cast<mirror::ArtMethod*>(proxy_constructor->Clone(self));
+  if (constructor == nullptr) {
     CHECK(self->IsExceptionPending());  // OOME.
-    return NULL;
+    return nullptr;
   }
+  // Make the proxy constructor's code always point to the uninstrumented code. This avoids
+  // getting a method enter event for the proxy constructor as the proxy constructor doesn't
+  // have an activation.
+  bool have_portable_code;
+  constructor->SetEntryPointFromQuickCompiledCode(GetQuickOatCodeFor(proxy_constructor));
+  constructor->SetEntryPointFromPortableCompiledCode(GetPortableOatCodeFor(proxy_constructor,
+                                                                           &have_portable_code));
+
   // Make this constructor public and fix the class to be our Proxy version
   constructor->SetAccessFlags((constructor->GetAccessFlags() & ~kAccProtected) | kAccPublic);
   constructor->SetDeclaringClass(klass.Get());
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 338bd06..fa198d7 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -58,9 +58,12 @@
   static constexpr bool kQuickSoftFloatAbi = true;  // This is a soft float ABI.
   static constexpr size_t kNumQuickGprArgs = 3;  // 3 arguments passed in GPRs.
   static constexpr size_t kNumQuickFprArgs = 0;  // 0 arguments passed in FPRs.
-  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 0;  // Offset of first FPR arg.
-  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 8;  // Offset of first GPR arg.
-  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 44;  // Offset of return address.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset =
+      arm::ArmCalleeSaveFpr1Offset(Runtime::kRefsAndArgs);  // Offset of first FPR arg.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset =
+      arm::ArmCalleeSaveGpr1Offset(Runtime::kRefsAndArgs);  // Offset of first GPR arg.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset =
+      arm::ArmCalleeSaveLrOffset(Runtime::kRefsAndArgs);  // Offset of return address.
   static size_t GprIndexToGprOffset(uint32_t gpr_index) {
     return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA);
   }
@@ -74,13 +77,13 @@
   // | arg1 spill |  |
   // | Method*    | ---
   // | LR         |
-  // | X28        |
+  // | X29        |
   // |  :         |
-  // | X19        |
+  // | X20        |
   // | X7         |
   // | :          |
   // | X1         |
-  // | D15        |
+  // | D7         |
   // |  :         |
   // | D0         |
   // |            |    padding
@@ -88,9 +91,12 @@
   static constexpr bool kQuickSoftFloatAbi = false;  // This is a hard float ABI.
   static constexpr size_t kNumQuickGprArgs = 7;  // 7 arguments passed in GPRs.
   static constexpr size_t kNumQuickFprArgs = 8;  // 8 arguments passed in FPRs.
-  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 16;  // Offset of first FPR arg.
-  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 144;  // Offset of first GPR arg.
-  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 296;  // Offset of return address.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset =
+      arm64::Arm64CalleeSaveFpr1Offset(Runtime::kRefsAndArgs);  // Offset of first FPR arg.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset =
+      arm64::Arm64CalleeSaveGpr1Offset(Runtime::kRefsAndArgs);  // Offset of first GPR arg.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset =
+      arm64::Arm64CalleeSaveLrOffset(Runtime::kRefsAndArgs);  // Offset of return address.
   static size_t GprIndexToGprOffset(uint32_t gpr_index) {
     return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA);
   }
@@ -586,8 +592,7 @@
   const char* old_cause =
       self->StartAssertNoThreadSuspension("Adding to IRT proxy object arguments");
   // Register the top of the managed stack, making stack crawlable.
-  DCHECK_EQ(sp->AsMirrorPtr(), proxy_method)
-  << PrettyMethod(proxy_method);
+  DCHECK_EQ(sp->AsMirrorPtr(), proxy_method) << PrettyMethod(proxy_method);
   self->SetTopOfStack(sp, 0);
   DCHECK_EQ(proxy_method->GetFrameSizeInBytes(),
             Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)->GetFrameSizeInBytes())
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 211ba1d..c27b203 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -158,12 +158,12 @@
       }
     }
   }
-#ifndef NDEBUG
-  StackHandleScope<2> hs(Thread::Current());
-  MethodHelper result_mh(hs.NewHandle(result));
-  MethodHelper this_mh(hs.NewHandle(this));
-  DCHECK(result == NULL || this_mh.HasSameNameAndSignature(&result_mh));
-#endif
+  if (kIsDebugBuild) {
+    StackHandleScope<2> hs(Thread::Current());
+    MethodHelper result_mh(hs.NewHandle(result));
+    MethodHelper this_mh(hs.NewHandle(this));
+    DCHECK(result == nullptr || this_mh.HasSameNameAndSignature(&result_mh));
+  }
   return result;
 }
 
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 3d03b89..d05c9c9 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -81,38 +81,10 @@
 
 # Tests that are broken in --trace mode.
 TEST_ART_BROKEN_TRACE_RUN_TESTS := \
-  003-omnibus-opcodes \
-  004-InterfaceTest \
   004-SignalTest \
-  004-ThreadStress \
-  005-annotations \
-  012-math \
   018-stack-overflow \
-  023-many-interfaces \
-  027-arithmetic \
-  031-class-attributes \
-  037-inherit \
-  044-proxy \
-  046-reflect \
-  051-thread \
-  055-enum-performance \
-  062-character-encodings \
-  064-field-access \
-  074-gc-thrash \
-  078-polymorphic-virtual \
-  080-oom-throw \
-  082-inline-execute \
-  083-compiler-regressions \
-  093-serialization \
   097-duplicate-method \
-  100-reflect2 \
-  102-concurrent-gc \
-  103-string-append \
-  107-int-math2 \
-  112-double-math \
-  114-ParallelGC \
-  700-LoadArgRegs \
-  701-easy-div-rem
+  107-int-math2
 
 ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_BROKEN_TRACE_RUN_TESTS), $(call all-run-test-names,$(test),-trace,-relocate))
 ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_BROKEN_TRACE_RUN_TESTS), $(call all-run-test-names,$(test),-trace,-no-prebuild))