Revert "Revert "Use implicit null checks inside try blocks.""

Fix implicit checks in try blocks to emit stack maps.
Fix arm64 null expection from signal entrypoint to call
the runtime handler instead or simply jumping there.

On Nexus 9, AOSP ToT, the boot.oat size reduction is
  prebuilt multi-part boot image:
    - 32-bit boot.oat: -448KiB (-1.3%)
    - 64-bit boot.oat: -528KiB (-1.2%)
  on-device built single boot image:
    - 32-bit boot.oat: -448KiB (-1.4%)
    - 64-bit boot.oat: -528KiB (-1.3%)
Note that the oat files no longer contain dex files which
have been moved to vdex, so the percentages are not directly
comparable with the those reported in the original commit.

Test: Run ART test suite including gc-stress on host and Nexus 9.
Bug: 30212852
Bug: 31468464

This reverts commit 0719b5b9b458cb3eb9f0823f0dacdfe1a71214dd.

Change-Id: If8a9da8c11adf2aad203e93b6684ce16ed776285
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 2087888..6732670 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -1081,13 +1081,6 @@
   }
 }
 
-bool CodeGenerator::IsImplicitNullCheckAllowed(HNullCheck* null_check) const {
-  return compiler_options_.GetImplicitNullChecks() &&
-         // Null checks which might throw into a catch block need to save live
-         // registers and therefore cannot be done implicitly.
-         !null_check->CanThrowIntoCatchBlock();
-}
-
 bool CodeGenerator::CanMoveNullCheckToUser(HNullCheck* null_check) {
   HInstruction* first_next_not_move = null_check->GetNextDisregardingMoves();
 
@@ -1096,6 +1089,10 @@
 }
 
 void CodeGenerator::MaybeRecordImplicitNullCheck(HInstruction* instr) {
+  if (!compiler_options_.GetImplicitNullChecks()) {
+    return;
+  }
+
   // If we are from a static path don't record the pc as we can't throw NPE.
   // NB: having the checks here makes the code much less verbose in the arch
   // specific code generators.
@@ -1114,16 +1111,35 @@
   // and needs to record the pc.
   if (first_prev_not_move != nullptr && first_prev_not_move->IsNullCheck()) {
     HNullCheck* null_check = first_prev_not_move->AsNullCheck();
-    if (IsImplicitNullCheckAllowed(null_check)) {
-      // TODO: The parallel moves modify the environment. Their changes need to be
-      // reverted otherwise the stack maps at the throw point will not be correct.
-      RecordPcInfo(null_check, null_check->GetDexPc());
-    }
+    // TODO: The parallel moves modify the environment. Their changes need to be
+    // reverted otherwise the stack maps at the throw point will not be correct.
+    RecordPcInfo(null_check, null_check->GetDexPc());
   }
 }
 
+LocationSummary* CodeGenerator::CreateNullCheckLocations(HNullCheck* null_check) {
+  // Note: Using kNoCall allows the method to be treated as leaf (and eliminate the
+  // HSuspendCheck from entry block). However, it will still get a valid stack frame
+  // because the HNullCheck needs an environment.
+  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+  // When throwing from a try block, we may need to retrieve dalvik registers from
+  // physical registers and we also need to set up stack mask for GC. This is
+  // implicitly achieved by passing kCallOnSlowPath to the LocationSummary.
+  bool can_throw_into_catch_block = null_check->CanThrowIntoCatchBlock();
+  if (can_throw_into_catch_block) {
+    call_kind = LocationSummary::kCallOnSlowPath;
+  }
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(null_check, call_kind);
+  if (can_throw_into_catch_block && compiler_options_.GetImplicitNullChecks()) {
+    locations->SetCustomSlowPathCallerSaves(RegisterSet());  // No caller-save registers.
+  }
+  locations->SetInAt(0, Location::RequiresRegister());
+  DCHECK(!null_check->HasUses());
+  return locations;
+}
+
 void CodeGenerator::GenerateNullCheck(HNullCheck* instruction) {
-  if (IsImplicitNullCheckAllowed(instruction)) {
+  if (compiler_options_.GetImplicitNullChecks()) {
     MaybeRecordStat(kImplicitNullCheckGenerated);
     GenerateImplicitNullCheck(instruction);
   } else {
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 0c60a98..b4d4b9b 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -313,6 +313,7 @@
 
   bool CanMoveNullCheckToUser(HNullCheck* null_check);
   void MaybeRecordImplicitNullCheck(HInstruction* instruction);
+  LocationSummary* CreateNullCheckLocations(HNullCheck* null_check);
   void GenerateNullCheck(HNullCheck* null_check);
   virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0;
   virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0;
@@ -322,12 +323,6 @@
   // TODO: Replace with a catch-entering instruction that records the environment.
   void RecordCatchBlockInfo();
 
-  // Returns true if implicit null checks are allowed in the compiler options
-  // and if the null check is not inside a try block. We currently cannot do
-  // implicit null checks in that case because we need the NullCheckSlowPath to
-  // save live registers, which may be needed by the runtime to set catch phis.
-  bool IsImplicitNullCheckAllowed(HNullCheck* null_check) const;
-
   // TODO: Avoid creating the `std::unique_ptr` here.
   void AddSlowPath(SlowPathCode* slow_path) {
     slow_paths_.push_back(std::unique_ptr<SlowPathCode>(slow_path));
@@ -713,6 +708,8 @@
   bool is_leaf_;
 
   // Whether an instruction in the graph accesses the current method.
+  // TODO: Rename: this actually indicates that some instruction in the method
+  // needs the environment including a valid stack frame.
   bool requires_current_method_;
 
   friend class OptimizingCFITest;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 3cc2598f..40c2b9c 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -4251,14 +4251,7 @@
 }
 
 void LocationsBuilderARM::VisitNullCheck(HNullCheck* instruction) {
-  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
-      ? LocationSummary::kCallOnSlowPath
-      : LocationSummary::kNoCall;
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  locations->SetInAt(0, Location::RequiresRegister());
-  if (instruction->HasUses()) {
-    locations->SetOut(Location::SameAsFirstInput());
-  }
+  codegen_->CreateNullCheckLocations(instruction);
 }
 
 void CodeGeneratorARM::GenerateImplicitNullCheck(HNullCheck* instruction) {
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 179bf76..c00ab56 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -4384,14 +4384,7 @@
 }
 
 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
-  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
-      ? LocationSummary::kCallOnSlowPath
-      : LocationSummary::kNoCall;
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  locations->SetInAt(0, Location::RequiresRegister());
-  if (instruction->HasUses()) {
-    locations->SetOut(Location::SameAsFirstInput());
-  }
+  codegen_->CreateNullCheckLocations(instruction);
 }
 
 void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index f07f8a0..fdfc551 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -5075,14 +5075,7 @@
 }
 
 void LocationsBuilderMIPS::VisitNullCheck(HNullCheck* instruction) {
-  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
-      ? LocationSummary::kCallOnSlowPath
-      : LocationSummary::kNoCall;
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  locations->SetInAt(0, Location::RequiresRegister());
-  if (instruction->HasUses()) {
-    locations->SetOut(Location::SameAsFirstInput());
-  }
+  codegen_->CreateNullCheckLocations(instruction);
 }
 
 void CodeGeneratorMIPS::GenerateImplicitNullCheck(HNullCheck* instruction) {
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 664d498..4d87523 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -3461,14 +3461,7 @@
 }
 
 void LocationsBuilderMIPS64::VisitNullCheck(HNullCheck* instruction) {
-  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
-      ? LocationSummary::kCallOnSlowPath
-      : LocationSummary::kNoCall;
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  locations->SetInAt(0, Location::RequiresRegister());
-  if (instruction->HasUses()) {
-    locations->SetOut(Location::SameAsFirstInput());
-  }
+  codegen_->CreateNullCheckLocations(instruction);
 }
 
 void CodeGeneratorMIPS64::GenerateImplicitNullCheck(HNullCheck* instruction) {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index e18b366..28db29c 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -4950,16 +4950,10 @@
 }
 
 void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
-  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
-      ? LocationSummary::kCallOnSlowPath
-      : LocationSummary::kNoCall;
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  Location loc = codegen_->IsImplicitNullCheckAllowed(instruction)
-      ? Location::RequiresRegister()
-      : Location::Any();
-  locations->SetInAt(0, loc);
-  if (instruction->HasUses()) {
-    locations->SetOut(Location::SameAsFirstInput());
+  LocationSummary* locations = codegen_->CreateNullCheckLocations(instruction);
+  if (!codegen_->GetCompilerOptions().GetImplicitNullChecks()) {
+    // Explicit null checks can use any location.
+    locations->SetInAt(0, Location::Any());
   }
 }
 
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 15307fe..88d98fc 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -4459,16 +4459,10 @@
 }
 
 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
-  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
-      ? LocationSummary::kCallOnSlowPath
-      : LocationSummary::kNoCall;
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  Location loc = codegen_->IsImplicitNullCheckAllowed(instruction)
-      ? Location::RequiresRegister()
-      : Location::Any();
-  locations->SetInAt(0, loc);
-  if (instruction->HasUses()) {
-    locations->SetOut(Location::SameAsFirstInput());
+  LocationSummary* locations = codegen_->CreateNullCheckLocations(instruction);
+  if (!codegen_->GetCompilerOptions().GetImplicitNullChecks()) {
+    // Explicit null checks can use any location.
+    locations->SetInAt(0, Location::Any());
   }
 }
 
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index befdd48..daa2dff 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -122,13 +122,16 @@
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc);
-
   uint32_t instr_size = GetInstructionSize(ptr);
-  sc->arm_lr = (sc->arm_pc + instr_size) | 1;      // LR needs to point to gc map location
+  uintptr_t gc_map_location = (sc->arm_pc + instr_size) | 1;
+
+  // Push the gc map location to the stack and pass the fault address in LR.
+  sc->arm_sp -= sizeof(uintptr_t);
+  *reinterpret_cast<uintptr_t*>(sc->arm_sp) = gc_map_location;
+  sc->arm_lr = reinterpret_cast<uintptr_t>(info->si_addr);
   sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
   // Pass the faulting address as the first argument of
   // art_quick_throw_null_pointer_exception_from_signal.
-  sc->arm_r0 = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index a3f053b..3fc83ba 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -173,6 +173,29 @@
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     * when core registers are already saved.
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED rTemp
+                                        @ 14 words of callee saves and args already saved.
+    vpush {d0-d15}                      @ 32 words, 2 for each of the 16 saved doubles.
+    .cfi_adjust_cfa_offset 128
+    sub sp, #8                          @ 2 words of space, alignment padding and Method*
+    .cfi_adjust_cfa_offset 8
+    RUNTIME_CURRENT1 \rTemp             @ Load Runtime::Current into rTemp.
+    @ Load kSaveEverything Method* into rTemp.
+    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET]
+    str \rTemp, [sp, #0]                @ Place Method* at bottom of stack.
+    str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
+
+    // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_SAVE_EVERYTHING != 56 + 128 + 8)
+#error "FRAME_SIZE_SAVE_EVERYTHING(ARM) size not as expected."
+#endif
+.endm
+
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
      */
 .macro SETUP_SAVE_EVERYTHING_FRAME rTemp
     push {r0-r12, lr}                   @ 14 words of callee saves and args.
@@ -191,20 +214,7 @@
     .cfi_rel_offset r11, 44
     .cfi_rel_offset ip, 48
     .cfi_rel_offset lr, 52
-    vpush {d0-d15}                      @ 32 words, 2 for each of the 16 saved doubles.
-    .cfi_adjust_cfa_offset 128
-    sub sp, #8                          @ 2 words of space, alignment padding and Method*
-    .cfi_adjust_cfa_offset 8
-    RUNTIME_CURRENT1 \rTemp             @ Load Runtime::Current into rTemp.
-    @ Load kSaveEverything Method* into rTemp.
-    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET]
-    str \rTemp, [sp, #0]                @ Place Method* at bottom of stack.
-    str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
-
-    // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_SAVE_EVERYTHING != 56 + 128 + 8)
-#error "FRAME_SIZE_SAVE_EVERYTHING(ARM) size not as expected."
-#endif
+    SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED \rTemp
 .endm
 
 .macro RESTORE_SAVE_EVERYTHING_FRAME
@@ -356,7 +366,34 @@
     /*
      * Call installed by a signal handler to create and deliver a NullPointerException.
      */
-ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+    .extern art_quick_throw_null_pointer_exception_from_signal
+ENTRY art_quick_throw_null_pointer_exception_from_signal
+    // The fault handler pushes the gc map address, i.e. "return address", to stack
+    // and passes the fault address in LR. So we need to set up the CFI info accordingly.
+    .cfi_def_cfa_offset __SIZEOF_POINTER__
+    .cfi_rel_offset lr, 0
+    push {r0-r12}                   @ 13 words of callee saves and args; LR already saved.
+    .cfi_adjust_cfa_offset 52
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset r1, 4
+    .cfi_rel_offset r2, 8
+    .cfi_rel_offset r3, 12
+    .cfi_rel_offset r4, 16
+    .cfi_rel_offset r5, 20
+    .cfi_rel_offset r6, 24
+    .cfi_rel_offset r7, 28
+    .cfi_rel_offset r8, 32
+    .cfi_rel_offset r9, 36
+    .cfi_rel_offset r10, 40
+    .cfi_rel_offset r11, 44
+    .cfi_rel_offset ip, 48
+
+    @ save all registers as basis for long jump context
+    SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED r1
+    mov r0, lr                      @ pass the fault address stored in LR by the fault handler.
+    mov r1, r9                      @ pass Thread::Current
+    b   artThrowNullPointerExceptionFromSignal  @ (Thread*)
+END art_quick_throw_null_pointer_exception_from_signal
 
     /*
      * Called by managed code to create and deliver an ArithmeticException.
diff --git a/runtime/arch/arm64/fault_handler_arm64.cc b/runtime/arch/arm64/fault_handler_arm64.cc
index 6724d6d..c02be87 100644
--- a/runtime/arch/arm64/fault_handler_arm64.cc
+++ b/runtime/arch/arm64/fault_handler_arm64.cc
@@ -96,12 +96,12 @@
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
 
-  sc->regs[30] = sc->pc + 4;      // LR needs to point to gc map location
+  // Push the gc map location to the stack and pass the fault address in LR.
+  sc->sp -= sizeof(uintptr_t);
+  *reinterpret_cast<uintptr_t*>(sc->sp) = sc->pc + 4;
+  sc->regs[30] = reinterpret_cast<uintptr_t>(info->si_addr);
 
   sc->pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
-  // Pass the faulting address as the first argument of
-  // art_quick_throw_null_pointer_exception_from_signal.
-  sc->regs[0] = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 25aa8ce..5a92659 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -265,10 +265,10 @@
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     * when the SP has already been decremented by FRAME_SIZE_SAVE_EVERYTHING
+     * and saving registers x29 and LR is handled elsewhere.
      */
-.macro SETUP_SAVE_EVERYTHING_FRAME
-    INCREASE_FRAME 512
-
+.macro SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR
     // Ugly compile-time check, but we only have the preprocessor.
 #if (FRAME_SIZE_SAVE_EVERYTHING != 512)
 #error "FRAME_SIZE_SAVE_EVERYTHING(ARM64) size not as expected."
@@ -310,7 +310,6 @@
     SAVE_TWO_REGS x23, x24, 448
     SAVE_TWO_REGS x25, x26, 464
     SAVE_TWO_REGS x27, x28, 480
-    SAVE_TWO_REGS x29, xLR, 496
 
     // art::Runtime** xIP0 = &art::Runtime::instance_
     adrp xIP0, :got:_ZN3art7Runtime9instance_E
@@ -328,6 +327,16 @@
     str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
 .endm
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME
+    INCREASE_FRAME 512
+    SAVE_TWO_REGS x29, xLR, 496
+    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR
+.endm
+
 .macro RESTORE_SAVE_EVERYTHING_FRAME
     // Restore FP registers.
     // For better performance, load d0 and d31 separately, so that all LDPs are 16-byte aligned.
@@ -462,7 +471,22 @@
     /*
      * Call installed by a signal handler to create and deliver a NullPointerException.
      */
-ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+    .extern art_quick_throw_null_pointer_exception_from_signal
+ENTRY art_quick_throw_null_pointer_exception_from_signal
+    // The fault handler pushes the gc map address, i.e. "return address", to stack
+    // and passes the fault address in LR. So we need to set up the CFI info accordingly.
+    .cfi_def_cfa_offset __SIZEOF_POINTER__
+    .cfi_rel_offset lr, 0
+    // Save all registers as basis for long jump context.
+    INCREASE_FRAME (FRAME_SIZE_SAVE_EVERYTHING - __SIZEOF_POINTER__)
+    SAVE_REG x29, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)  // LR already saved.
+    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR
+    mov x0, lr                        // pass the fault address stored in LR by the fault handler.
+    mov x1, xSELF                     // pass Thread::Current.
+    // TODO: Change other throwing entrypoints to use BL instead of B. http://b/31468464
+    bl   artThrowNullPointerExceptionFromSignal  // (arg, Thread*).
+    brk 0
+END art_quick_throw_null_pointer_exception_from_signal
 
     /*
      * Called by managed code to create and deliver an ArithmeticException.
diff --git a/runtime/arch/mips/asm_support_mips.S b/runtime/arch/mips/asm_support_mips.S
index 801f708..7955b1d 100644
--- a/runtime/arch/mips/asm_support_mips.S
+++ b/runtime/arch/mips/asm_support_mips.S
@@ -43,7 +43,7 @@
 .endm
 
      // Declare a function called name, doesn't set up $gp.
-.macro ENTRY_NO_GP name
+.macro ENTRY_NO_GP_CUSTOM_CFA name, cfa_offset
     .type \name, %function
     .global \name
     // Cache alignment for function entry.
@@ -51,7 +51,12 @@
 \name:
     .cfi_startproc
      // Ensure we get a sane starting CFA.
-    .cfi_def_cfa $sp,0
+    .cfi_def_cfa $sp, \cfa_offset
+.endm
+
+     // Declare a function called name, doesn't set up $gp.
+.macro ENTRY_NO_GP name
+    ENTRY_NO_GP_CUSTOM_CFA \name, 0
 .endm
 
 .macro END name
diff --git a/runtime/arch/mips/fault_handler_mips.cc b/runtime/arch/mips/fault_handler_mips.cc
index 7969a8f..b6a63ca 100644
--- a/runtime/arch/mips/fault_handler_mips.cc
+++ b/runtime/arch/mips/fault_handler_mips.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-
+#include "arch/mips/quick_method_frame_info_mips.h"
 #include "fault_handler.h"
 #include <sys/ucontext.h>
 #include "art_method-inl.h"
@@ -82,12 +82,15 @@
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
 
+  // Decrement $sp by the frame size of the kSaveEverything method and store
+  // the fault address in the padding right after the ArtMethod*.
+  sc->sc_regs[mips::SP] -= mips::MipsCalleeSaveFrameSize(Runtime::kSaveEverything);
+  uintptr_t* padding = reinterpret_cast<uintptr_t*>(sc->sc_regs[mips::SP]) + /* ArtMethod* */ 1;
+  *padding = reinterpret_cast<uintptr_t>(info->si_addr);
+
   sc->sc_regs[mips::RA] = sc->sc_pc + 4;      // RA needs to point to gc map location
   sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
   sc->sc_regs[mips::T9] = sc->sc_pc;          // make sure T9 points to the function
-  // Pass the faulting address as the first argument of
-  // art_quick_throw_null_pointer_exception_from_signal.
-  sc->sc_regs[mips::A0] = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 4bd1314..71b8ae2 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -279,6 +279,7 @@
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything).
+     * when the $sp has already been decremented by FRAME_SIZE_SAVE_EVERYTHING.
      * Callee-save: $at, $v0-$v1, $a0-$a3, $t0-$t7, $s0-$s7, $t8-$t9, $gp, $fp $ra, $f0-$f31;
      *              28(GPR)+ 32(FPR) + 3 words for padding and 1 word for Method*
      * Clobbers $t0 and $t1.
@@ -286,10 +287,7 @@
      * Reserves FRAME_SIZE_SAVE_EVERYTHING + ARG_SLOT_SIZE bytes on the stack.
      * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
      */
-.macro SETUP_SAVE_EVERYTHING_FRAME
-    addiu  $sp, $sp, -256
-    .cfi_adjust_cfa_offset 256
-
+.macro SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
      // Ugly compile-time check, but we only have the preprocessor.
 #if (FRAME_SIZE_SAVE_EVERYTHING != 256)
 #error "FRAME_SIZE_SAVE_EVERYTHING(MIPS) size not as expected."
@@ -388,6 +386,22 @@
     .cfi_adjust_cfa_offset ARG_SLOT_SIZE
 .endm
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything).
+     * Callee-save: $at, $v0-$v1, $a0-$a3, $t0-$t7, $s0-$s7, $t8-$t9, $gp, $fp $ra, $f0-$f31;
+     *              28(GPR)+ 32(FPR) + 3 words for padding and 1 word for Method*
+     * Clobbers $t0 and $t1.
+     * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
+     * Reserves FRAME_SIZE_SAVE_EVERYTHING + ARG_SLOT_SIZE bytes on the stack.
+     * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME
+    addiu  $sp, $sp, -(FRAME_SIZE_SAVE_EVERYTHING)
+    .cfi_adjust_cfa_offset (FRAME_SIZE_SAVE_EVERYTHING)
+    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
+.endm
+
 .macro RESTORE_SAVE_EVERYTHING_FRAME
     addiu  $sp, $sp, ARG_SLOT_SIZE                # remove argument slots on the stack
     .cfi_adjust_cfa_offset -ARG_SLOT_SIZE
@@ -708,8 +722,10 @@
      * Call installed by a signal handler to create and deliver a NullPointerException.
      */
     .extern artThrowNullPointerExceptionFromSignal
-ENTRY art_quick_throw_null_pointer_exception_from_signal
-    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+ENTRY_NO_GP_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, FRAME_SIZE_SAVE_EVERYTHING
+    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
+    # Retrieve the fault address from the padding where the signal handler stores it.
+    lw   $a0, (ARG_SLOT_SIZE + __SIZEOF_POINTER__)($sp)
     la   $t9, artThrowNullPointerExceptionFromSignal
     jalr $zero, $t9                 # artThrowNullPointerExceptionFromSignal(uintptr_t, Thread*)
     move $a1, rSELF                 # pass Thread::Current
diff --git a/runtime/arch/mips64/asm_support_mips64.S b/runtime/arch/mips64/asm_support_mips64.S
index 786e860..6c58fcf 100644
--- a/runtime/arch/mips64/asm_support_mips64.S
+++ b/runtime/arch/mips64/asm_support_mips64.S
@@ -45,8 +45,8 @@
 .L\name\()_gp_set:
 .endm
 
-     // Declare a function called name, doesn't set up $gp.
-.macro ENTRY_NO_GP name
+    // Declare a function called name, doesn't set up $gp.
+.macro ENTRY_NO_GP_CUSTOM_CFA name, cfa_offset
     .type \name, %function
     .global \name
     // Cache alignment for function entry.
@@ -54,7 +54,12 @@
 \name:
     .cfi_startproc
      // Ensure we get a sane starting CFA.
-    .cfi_def_cfa $sp,0
+    .cfi_def_cfa $sp, \cfa_offset
+.endm
+
+    // Declare a function called name, doesn't set up $gp.
+.macro ENTRY_NO_GP name
+    ENTRY_NO_GP_CUSTOM_CFA \name, 0
 .endm
 
 .macro END name
diff --git a/runtime/arch/mips64/fault_handler_mips64.cc b/runtime/arch/mips64/fault_handler_mips64.cc
index 0bbb6e1..e52dc73 100644
--- a/runtime/arch/mips64/fault_handler_mips64.cc
+++ b/runtime/arch/mips64/fault_handler_mips64.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-
+#include "arch/mips64/quick_method_frame_info_mips64.h"
 #include "fault_handler.h"
 #include <sys/ucontext.h>
 #include "art_method-inl.h"
@@ -83,12 +83,15 @@
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
 
+  // Decrement $sp by the frame size of the kSaveEverything method and store
+  // the fault address in the padding right after the ArtMethod*.
+  sc->sc_regs[mips64::SP] -= mips64::Mips64CalleeSaveFrameSize(Runtime::kSaveEverything);
+  uintptr_t* padding = reinterpret_cast<uintptr_t*>(sc->sc_regs[mips64::SP]) + /* ArtMethod* */ 1;
+  *padding = reinterpret_cast<uintptr_t>(info->si_addr);
+
   sc->sc_regs[mips64::RA] = sc->sc_pc + 4;      // RA needs to point to gc map location
   sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
   sc->sc_regs[mips64::T9] = sc->sc_pc;          // make sure T9 points to the function
-  // Pass the faulting address as the first argument of
-  // art_quick_throw_null_pointer_exception_from_signal.
-  sc->sc_regs[mips64::A0] = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 26717ad..61c9019 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -316,14 +316,12 @@
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything).
+     * when the $sp has already been decremented by FRAME_SIZE_SAVE_EVERYTHING.
      * callee-save: $at + $v0-$v1 + $a0-$a7 + $t0-$t3 + $s0-$s7 + $t8-$t9 + $gp + $s8 + $ra + $s8,
      *              $f0-$f31; 28(GPR)+ 32(FPR) + 1x8 bytes padding + method*
      * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
      */
-.macro SETUP_SAVE_EVERYTHING_FRAME
-    daddiu $sp, $sp, -496
-    .cfi_adjust_cfa_offset 496
-
+.macro SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
      // Ugly compile-time check, but we only have the preprocessor.
 #if (FRAME_SIZE_SAVE_EVERYTHING != 496)
 #error "FRAME_SIZE_SAVE_EVERYTHING(MIPS64) size not as expected."
@@ -436,6 +434,19 @@
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)
 .endm
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything).
+     * callee-save: $at + $v0-$v1 + $a0-$a7 + $t0-$t3 + $s0-$s7 + $t8-$t9 + $gp + $s8 + $ra + $s8,
+     *              $f0-$f31; 28(GPR)+ 32(FPR) + 1x8 bytes padding + method*
+     * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME
+    daddiu $sp, $sp, -(FRAME_SIZE_SAVE_EVERYTHING)
+    .cfi_adjust_cfa_offset (FRAME_SIZE_SAVE_EVERYTHING)
+    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
+.endm
+
 .macro RESTORE_SAVE_EVERYTHING_FRAME
     // Restore FP registers.
     l.d    $f31, 264($sp)
@@ -818,8 +829,10 @@
      * Call installed by a signal handler to create and deliver a NullPointerException
      */
     .extern artThrowNullPointerExceptionFromSignal
-ENTRY art_quick_throw_null_pointer_exception_from_signal
-    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+ENTRY_NO_GP_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, FRAME_SIZE_SAVE_EVERYTHING
+    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
+    # Retrieve the fault address from the padding where the signal handler stores it.
+    ld   $a0, (__SIZEOF_POINTER__)($sp)
     dla  $t9, artThrowNullPointerExceptionFromSignal
     jalr $zero, $t9                 # artThrowNullPointerExceptionFromSignal(uinptr_t, Thread*)
     move $a1, rSELF                 # pass Thread::Current
diff --git a/runtime/arch/x86/asm_support_x86.S b/runtime/arch/x86/asm_support_x86.S
index 3e47209..14b01c5 100644
--- a/runtime/arch/x86/asm_support_x86.S
+++ b/runtime/arch/x86/asm_support_x86.S
@@ -114,7 +114,7 @@
     .balign 16
 END_MACRO
 
-MACRO1(DEFINE_FUNCTION, c_name)
+MACRO2(DEFINE_FUNCTION_CUSTOM_CFA, c_name, cfa_offset)
     FUNCTION_TYPE(SYMBOL(\c_name))
     ASM_HIDDEN CALLVAR(c_name)
     .globl CALLVAR(c_name)
@@ -122,7 +122,11 @@
 CALLVAR(c_name):
     CFI_STARTPROC
     // Ensure we get a sane starting CFA.
-    CFI_DEF_CFA(esp, 4)
+    CFI_DEF_CFA(esp, RAW_VAR(cfa_offset))
+END_MACRO
+
+MACRO1(DEFINE_FUNCTION, c_name)
+    DEFINE_FUNCTION_CUSTOM_CFA RAW_VAR(c_name), __SIZEOF_POINTER__
 END_MACRO
 
 MACRO1(END_FUNCTION, c_name)
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index c7af249..a4d6bb4 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -325,21 +325,15 @@
   // next instruction (this instruction + instruction size).  The return address
   // is on the stack at the top address of the current frame.
 
-  // Push the return address onto the stack.
+  // Push the return address and fault address onto the stack.
   uintptr_t retaddr = reinterpret_cast<uintptr_t>(pc + instr_size);
-  uintptr_t* next_sp = reinterpret_cast<uintptr_t*>(sp - sizeof(uintptr_t));
-  *next_sp = retaddr;
+  uintptr_t* next_sp = reinterpret_cast<uintptr_t*>(sp - 2 * sizeof(uintptr_t));
+  next_sp[1] = retaddr;
+  next_sp[0] = reinterpret_cast<uintptr_t>(sig->si_addr);
   uc->CTX_ESP = reinterpret_cast<uintptr_t>(next_sp);
 
   uc->CTX_EIP = reinterpret_cast<uintptr_t>(
       art_quick_throw_null_pointer_exception_from_signal);
-  // Pass the faulting address as the first argument of
-  // art_quick_throw_null_pointer_exception_from_signal.
-#if defined(__x86_64__)
-  uc->CTX_RDI = reinterpret_cast<uintptr_t>(sig->si_addr);
-#else
-  uc->CTX_EAX = reinterpret_cast<uintptr_t>(sig->si_addr);
-#endif
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 646a80c..0beb2a4 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -224,10 +224,11 @@
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     * when EDI is already saved.
      */
-MACRO2(SETUP_SAVE_EVERYTHING_FRAME, got_reg, temp_reg)
-    // Save core registers.
-    PUSH edi
+MACRO2(SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED, got_reg, temp_reg)
+    // Save core registers from highest to lowest to agree with core spills bitmap.
+    // EDI, or at least a placeholder for it, is already on the stack.
     PUSH esi
     PUSH ebp
     PUSH ebx
@@ -264,6 +265,15 @@
 #endif
 END_MACRO
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     */
+MACRO2(SETUP_SAVE_EVERYTHING_FRAME, got_reg, temp_reg)
+    PUSH edi
+    SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED RAW_VAR(got_reg), RAW_VAR(temp_reg)
+END_MACRO
+
 MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
     // Restore FPRs. Method and padding is still on the stack.
     movsd 16(%esp), %xmm0
@@ -320,7 +330,6 @@
 MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name)
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context
-    mov %esp, %ecx
     // Outgoing argument set up
     subl MACRO_LITERAL(8), %esp                // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
@@ -354,7 +363,23 @@
     /*
      * Call installed by a signal handler to create and deliver a NullPointerException.
      */
-ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+DEFINE_FUNCTION_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, 2 * __SIZEOF_POINTER__
+    // Fault address and return address were saved by the fault handler.
+    // Save all registers as basis for long jump context; EDI will replace fault address later.
+    SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED ebx, ebx
+    // Retrieve fault address and save EDI.
+    movl (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%esp), %eax
+    movl %edi, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%esp)
+    CFI_REL_OFFSET(%edi, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__))
+    // Outgoing argument set up
+    subl MACRO_LITERAL(8), %esp                           // alignment padding
+    CFI_ADJUST_CFA_OFFSET(8)
+    pushl %fs:THREAD_SELF_OFFSET                          // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH eax                                              // pass arg1
+    call SYMBOL(artThrowNullPointerExceptionFromSignal)   // (addr, self)
+    UNREACHABLE
+END_FUNCTION art_quick_throw_null_pointer_exception
 
     /*
      * Called by managed code to create and deliver an ArithmeticException.
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
index 0728f99..af4a6c4 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.S
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -110,7 +110,7 @@
 
 // TODO: we might need to use SYMBOL() here to add the underscore prefix
 // for mac builds.
-MACRO1(DEFINE_FUNCTION, c_name)
+MACRO2(DEFINE_FUNCTION_CUSTOM_CFA, c_name, cfa_offset)
     FUNCTION_TYPE(SYMBOL(\c_name))
     ASM_HIDDEN CALLVAR(c_name)
     .globl CALLVAR(c_name)
@@ -118,7 +118,11 @@
 CALLVAR(c_name):
     CFI_STARTPROC
     // Ensure we get a sane starting CFA.
-    CFI_DEF_CFA(rsp, 8)
+    CFI_DEF_CFA(rsp, RAW_VAR(cfa_offset))
+END_MACRO
+
+MACRO1(DEFINE_FUNCTION, c_name)
+    DEFINE_FUNCTION_CUSTOM_CFA RAW_VAR(c_name), __SIZEOF_POINTER__
 END_MACRO
 
 MACRO1(END_FUNCTION, c_name)
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 5ea58af..089ed75 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -263,14 +263,15 @@
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     * when R15 is already saved.
      */
-MACRO0(SETUP_SAVE_EVERYTHING_FRAME)
+MACRO0(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED)
 #if defined(__APPLE__)
     int3
     int3
 #else
     // Save core registers from highest to lowest to agree with core spills bitmap.
-    PUSH r15
+    // R15, or at least a placeholder for it, is already on the stack.
     PUSH r14
     PUSH r13
     PUSH r12
@@ -322,6 +323,15 @@
 #endif  // __APPLE__
 END_MACRO
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     */
+MACRO0(SETUP_SAVE_EVERYTHING_FRAME)
+    PUSH r15
+    SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED
+END_MACRO
+
 MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
     // Restore FPRs. Method and padding is still on the stack.
     movq 16(%rsp), %xmm0
@@ -413,7 +423,19 @@
     /*
      * Call installed by a signal handler to create and deliver a NullPointerException.
      */
-ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+DEFINE_FUNCTION_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, 2 * __SIZEOF_POINTER__
+    // Fault address and return address were saved by the fault handler.
+    // Save all registers as basis for long jump context; R15 will replace fault address later.
+    SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED
+    // Retrieve fault address and save R15.
+    movq (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp), %rdi
+    movq %r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp)
+    CFI_REL_OFFSET(%r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__))
+    // Outgoing argument set up; RDI already contains the fault address.
+    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
+    call SYMBOL(artThrowNullPointerExceptionFromSignal)  // (addr, self)
+    UNREACHABLE
+END_FUNCTION art_quick_throw_null_pointer_exception_from_signal
 
     /*
      * Called by managed code to create and deliver an ArithmeticException.
diff --git a/test/439-npe/expected.txt b/test/439-npe/expected.txt
index 34855ee..b4fd6bb 100644
--- a/test/439-npe/expected.txt
+++ b/test/439-npe/expected.txt
@@ -52,3 +52,73 @@
 $opt$noinline$getBooleanElement
 $opt$noinline$getCharElement
 $opt$noinline$getShortElement
+i0=4
+i1=8
+i2=12
+i3=16
+i4=20
+i5=24
+i6=28
+i7=32
+i8=36
+i9=40
+i10=44
+i11=48
+i12=52
+i13=56
+i14=44
+i15=57
+l0=84
+l1=88
+l2=92
+l3=96
+l4=100
+l5=104
+l6=108
+l7=112
+l8=116
+l9=120
+l10=124
+l11=128
+l12=132
+l13=136
+l14=104
+l15=146
+f0=164.0
+f1=168.0
+f2=172.0
+f3=176.0
+f4=180.0
+f5=184.0
+f6=188.0
+f7=192.0
+f8=196.0
+f9=200.0
+f10=204.0
+f11=208.0
+f12=212.0
+f13=216.0
+f14=164.0
+f15=55.5
+d0=244.0
+d1=248.0
+d2=252.0
+d3=256.0
+d4=260.0
+d5=264.0
+d6=268.0
+d7=272.0
+d8=276.0
+d9=280.0
+d10=284.0
+d11=288.0
+d12=292.0
+d13=296.0
+d14=224.0
+d15=75.125
+addInt=42
+addLong=111
+addFloat=0.5
+addDouble=0.125
+m=null
+i=2
diff --git a/test/439-npe/src/Main.java b/test/439-npe/src/Main.java
index 8f66da0..bc044a4 100644
--- a/test/439-npe/src/Main.java
+++ b/test/439-npe/src/Main.java
@@ -634,12 +634,246 @@
     } catch (NullPointerException npe) {
       check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getShortElement");
     }
+
+    $opt$noinline$testRegisterRetrieval();
   }
 
-  static void check(NullPointerException npe, int mainLine, int medthodLine, String methodName) {
+  static void $opt$noinline$testRegisterRetrieval() {
+    Main[] array = $noinline$PrepareArray();
+    int i0 = 0;
+    int i1 = 1;
+    int i2 = 2;
+    int i3 = 3;
+    int i4 = 4;
+    int i5 = 5;
+    int i6 = 6;
+    int i7 = 7;
+    int i8 = 8;
+    int i9 = 9;
+    int i10 = 10;
+    int i11 = 11;
+    int i12 = 12;
+    int i13 = 13;
+    int i14 = 14;
+    int i15 = 15;
+    long l0 = 20L;
+    long l1 = 21L;
+    long l2 = 22L;
+    long l3 = 23L;
+    long l4 = 24L;
+    long l5 = 25L;
+    long l6 = 26L;
+    long l7 = 27L;
+    long l8 = 28L;
+    long l9 = 29L;
+    long l10 = 30L;
+    long l11 = 31L;
+    long l12 = 32L;
+    long l13 = 33L;
+    long l14 = 34L;
+    long l15 = 35L;
+    float f0 = 40.0f;
+    float f1 = 41.0f;
+    float f2 = 42.0f;
+    float f3 = 43.0f;
+    float f4 = 44.0f;
+    float f5 = 45.0f;
+    float f6 = 46.0f;
+    float f7 = 47.0f;
+    float f8 = 48.0f;
+    float f9 = 49.0f;
+    float f10 = 50.0f;
+    float f11 = 51.0f;
+    float f12 = 52.0f;
+    float f13 = 53.0f;
+    float f14 = 54.0f;
+    float f15 = 55.0f;
+    double d0 = 60.0;
+    double d1 = 61.0;
+    double d2 = 62.0;
+    double d3 = 63.0;
+    double d4 = 64.0;
+    double d5 = 65.0;
+    double d6 = 66.0;
+    double d7 = 67.0;
+    double d8 = 68.0;
+    double d9 = 69.0;
+    double d10 = 70.0;
+    double d11 = 71.0;
+    double d12 = 72.0;
+    double d13 = 73.0;
+    double d14 = 74.0;
+    double d15 = 75.0;
+    int addInt = -1;
+    long addLong = -2L;
+    float addFloat = -3.0f;
+    double addDouble = -4.0;
+    Main m = null;
+    int i = 0;
+    try {
+      for (i = 0; i < array.length; ++i) {
+        m = array[i];
+        // We have 16 ints, 16 longs, 16 floats, 16 doubles and a few helper variables here,
+        // none of them anonymous. Hopefully, all available physical registers will be allocated
+        // to these variables, so that when `m.intField` throws NPE during the third iteration,
+        // we will fully test retrieval of values from all physical registers.
+        addInt = m.intField;
+        addLong = m.longField;
+        addFloat = m.floatField;
+        addDouble = m.doubleField;
+        i0 += i1;
+        i1 += i2;
+        i2 += i3;
+        i3 += i4;
+        i4 += i5;
+        i5 += i6;
+        i6 += i7;
+        i7 += i8;
+        i8 += i9;
+        i9 += i10;
+        i10 += i11;
+        i11 += i12;
+        i12 += i13;
+        i13 += i14;
+        i14 += i15;
+        i15 += addInt;
+        l0 += l1;
+        l1 += l2;
+        l2 += l3;
+        l3 += l4;
+        l4 += l5;
+        l5 += l6;
+        l6 += l7;
+        l7 += l8;
+        l8 += l9;
+        l9 += l10;
+        l10 += l11;
+        l11 += l12;
+        l12 += l13;
+        l13 += l14;
+        l14 += l15;
+        l15 += addLong;
+        f0 += f1;
+        f1 += f2;
+        f2 += f3;
+        f3 += f4;
+        f4 += f5;
+        f5 += f6;
+        f6 += f7;
+        f7 += f8;
+        f8 += f9;
+        f9 += f10;
+        f10 += f11;
+        f11 += f12;
+        f12 += f13;
+        f13 += f14;
+        f14 += f15;
+        f15 += addFloat;
+        d0 += d1;
+        d1 += d2;
+        d2 += d3;
+        d3 += d4;
+        d4 += d5;
+        d5 += d6;
+        d6 += d7;
+        d7 += d8;
+        d8 += d9;
+        d9 += d10;
+        d10 += d11;
+        d11 += d12;
+        d12 += d13;
+        d13 += d14;
+        d14 += d15;
+        d15 += addDouble;
+      }
+    } catch (NullPointerException npe) {
+      System.out.println("i0=" + i0);
+      System.out.println("i1=" + i1);
+      System.out.println("i2=" + i2);
+      System.out.println("i3=" + i3);
+      System.out.println("i4=" + i4);
+      System.out.println("i5=" + i5);
+      System.out.println("i6=" + i6);
+      System.out.println("i7=" + i7);
+      System.out.println("i8=" + i8);
+      System.out.println("i9=" + i9);
+      System.out.println("i10=" + i10);
+      System.out.println("i11=" + i11);
+      System.out.println("i12=" + i12);
+      System.out.println("i13=" + i13);
+      System.out.println("i14=" + i14);
+      System.out.println("i15=" + i15);
+      System.out.println("l0=" + l0);
+      System.out.println("l1=" + l1);
+      System.out.println("l2=" + l2);
+      System.out.println("l3=" + l3);
+      System.out.println("l4=" + l4);
+      System.out.println("l5=" + l5);
+      System.out.println("l6=" + l6);
+      System.out.println("l7=" + l7);
+      System.out.println("l8=" + l8);
+      System.out.println("l9=" + l9);
+      System.out.println("l10=" + l10);
+      System.out.println("l11=" + l11);
+      System.out.println("l12=" + l12);
+      System.out.println("l13=" + l13);
+      System.out.println("l14=" + l14);
+      System.out.println("l15=" + l15);
+      System.out.println("f0=" + f0);
+      System.out.println("f1=" + f1);
+      System.out.println("f2=" + f2);
+      System.out.println("f3=" + f3);
+      System.out.println("f4=" + f4);
+      System.out.println("f5=" + f5);
+      System.out.println("f6=" + f6);
+      System.out.println("f7=" + f7);
+      System.out.println("f8=" + f8);
+      System.out.println("f9=" + f9);
+      System.out.println("f10=" + f10);
+      System.out.println("f11=" + f11);
+      System.out.println("f12=" + f12);
+      System.out.println("f13=" + f13);
+      System.out.println("f14=" + f14);
+      System.out.println("f15=" + f15);
+      System.out.println("d0=" + d0);
+      System.out.println("d1=" + d1);
+      System.out.println("d2=" + d2);
+      System.out.println("d3=" + d3);
+      System.out.println("d4=" + d4);
+      System.out.println("d5=" + d5);
+      System.out.println("d6=" + d6);
+      System.out.println("d7=" + d7);
+      System.out.println("d8=" + d8);
+      System.out.println("d9=" + d9);
+      System.out.println("d10=" + d10);
+      System.out.println("d11=" + d11);
+      System.out.println("d12=" + d12);
+      System.out.println("d13=" + d13);
+      System.out.println("d14=" + d14);
+      System.out.println("d15=" + d15);
+      System.out.println("addInt=" + addInt);
+      System.out.println("addLong=" + addLong);
+      System.out.println("addFloat=" + addFloat);
+      System.out.println("addDouble=" + addDouble);
+      System.out.println("m=" + m);
+      System.out.println("i=" + i);
+    }
+  }
+
+  static Main[] $noinline$PrepareArray() {
+    if (doThrow) { throw new Error(); }
+    Main[] array = new Main[] { new Main(), new Main(), null, new Main() };
+    array[1].intField = 42;
+    array[1].longField = 111L;
+    array[1].floatField = 0.5f;
+    array[1].doubleField = 0.125;
+    return array;
+  }
+
+  static void check(NullPointerException npe, int mainLine, int methodLine, String methodName) {
     System.out.println(methodName);
     StackTraceElement[] trace = npe.getStackTrace();
-    checkElement(trace[0], "Main", methodName, "Main.java", medthodLine);
+    checkElement(trace[0], "Main", methodName, "Main.java", methodLine);
     checkElement(trace[1], "Main", "main", "Main.java", mainLine);
   }