Use implicit null checks inside try blocks.

Make implicit null check entrypoint save all registers, use
platform-specific approach to still pass the fault address.
Allow implicit null checks in try blocks.

On Nexus 9, AOSP ToT, the boot.oat size reduction is
  prebuilt multi-part boot image:
    - 32-bit boot.oat: -452KiB (-0.7%)
    - 64-bit boot.oat: -482KiB (-0.7%)
  on-device built single boot image:
    - 32-bit boot.oat: -444KiB (-0.7%)
    - 64-bit boot.oat: -488KiB (-0.7%)

Test: Run ART test suite on host and Nexus 9.
Test: Build aosp_mips64-eng.
Change-Id: I279f3ab57e2e2f338131c5cac45c51b673bdca19
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 2087888..ac7d5fe 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -1081,13 +1081,6 @@
   }
 }
 
-bool CodeGenerator::IsImplicitNullCheckAllowed(HNullCheck* null_check) const {
-  return compiler_options_.GetImplicitNullChecks() &&
-         // Null checks which might throw into a catch block need to save live
-         // registers and therefore cannot be done implicitly.
-         !null_check->CanThrowIntoCatchBlock();
-}
-
 bool CodeGenerator::CanMoveNullCheckToUser(HNullCheck* null_check) {
   HInstruction* first_next_not_move = null_check->GetNextDisregardingMoves();
 
@@ -1096,6 +1089,10 @@
 }
 
 void CodeGenerator::MaybeRecordImplicitNullCheck(HInstruction* instr) {
+  if (!compiler_options_.GetImplicitNullChecks()) {
+    return;
+  }
+
   // If we are from a static path don't record the pc as we can't throw NPE.
   // NB: having the checks here makes the code much less verbose in the arch
   // specific code generators.
@@ -1114,16 +1111,31 @@
   // and needs to record the pc.
   if (first_prev_not_move != nullptr && first_prev_not_move->IsNullCheck()) {
     HNullCheck* null_check = first_prev_not_move->AsNullCheck();
-    if (IsImplicitNullCheckAllowed(null_check)) {
-      // TODO: The parallel moves modify the environment. Their changes need to be
-      // reverted otherwise the stack maps at the throw point will not be correct.
-      RecordPcInfo(null_check, null_check->GetDexPc());
-    }
+    // TODO: The parallel moves modify the environment. Their changes need to be
+    // reverted otherwise the stack maps at the throw point will not be correct.
+    RecordPcInfo(null_check, null_check->GetDexPc());
   }
 }
 
+LocationSummary* CodeGenerator::CreateNullCheckLocations(HNullCheck* null_check) {
+  // Note: Using kNoCall allows the method to be treated as leaf (and eliminate the
+  // HSuspendCheck from entry block). However, it will still get a valid stack frame
+  // because the HNullCheck needs an environment.
+  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+  // When throwing from a try block, we may need to retrieve dalvik registers from
+  // physical registers. For implicit null checks, this is done by using kSaveEverything
+  // runtime method but for explicit null checks we need to save live registers.
+  if (!compiler_options_.GetImplicitNullChecks() && null_check->CanThrowIntoCatchBlock()) {
+    call_kind = LocationSummary::kCallOnSlowPath;
+  }
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(null_check, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  DCHECK(!null_check->HasUses());
+  return locations;
+}
+
 void CodeGenerator::GenerateNullCheck(HNullCheck* instruction) {
-  if (IsImplicitNullCheckAllowed(instruction)) {
+  if (compiler_options_.GetImplicitNullChecks()) {
     MaybeRecordStat(kImplicitNullCheckGenerated);
     GenerateImplicitNullCheck(instruction);
   } else {
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 0c60a98..b4d4b9b 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -313,6 +313,7 @@
 
   bool CanMoveNullCheckToUser(HNullCheck* null_check);
   void MaybeRecordImplicitNullCheck(HInstruction* instruction);
+  LocationSummary* CreateNullCheckLocations(HNullCheck* null_check);
   void GenerateNullCheck(HNullCheck* null_check);
   virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0;
   virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0;
@@ -322,12 +323,6 @@
   // TODO: Replace with a catch-entering instruction that records the environment.
   void RecordCatchBlockInfo();
 
-  // Returns true if implicit null checks are allowed in the compiler options
-  // and if the null check is not inside a try block. We currently cannot do
-  // implicit null checks in that case because we need the NullCheckSlowPath to
-  // save live registers, which may be needed by the runtime to set catch phis.
-  bool IsImplicitNullCheckAllowed(HNullCheck* null_check) const;
-
   // TODO: Avoid creating the `std::unique_ptr` here.
   void AddSlowPath(SlowPathCode* slow_path) {
     slow_paths_.push_back(std::unique_ptr<SlowPathCode>(slow_path));
@@ -713,6 +708,8 @@
   bool is_leaf_;
 
   // Whether an instruction in the graph accesses the current method.
+  // TODO: Rename: this actually indicates that some instruction in the method
+  // needs the environment including a valid stack frame.
   bool requires_current_method_;
 
   friend class OptimizingCFITest;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 5301a6b..4f80931 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -4251,14 +4251,7 @@
 }
 
 void LocationsBuilderARM::VisitNullCheck(HNullCheck* instruction) {
-  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
-      ? LocationSummary::kCallOnSlowPath
-      : LocationSummary::kNoCall;
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  locations->SetInAt(0, Location::RequiresRegister());
-  if (instruction->HasUses()) {
-    locations->SetOut(Location::SameAsFirstInput());
-  }
+  codegen_->CreateNullCheckLocations(instruction);
 }
 
 void CodeGeneratorARM::GenerateImplicitNullCheck(HNullCheck* instruction) {
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 36f7b4d..09808bb 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -4539,14 +4539,7 @@
 }
 
 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
-  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
-      ? LocationSummary::kCallOnSlowPath
-      : LocationSummary::kNoCall;
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  locations->SetInAt(0, Location::RequiresRegister());
-  if (instruction->HasUses()) {
-    locations->SetOut(Location::SameAsFirstInput());
-  }
+  codegen_->CreateNullCheckLocations(instruction);
 }
 
 void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 92e9cd9..9abe485 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -4856,14 +4856,7 @@
 }
 
 void LocationsBuilderMIPS::VisitNullCheck(HNullCheck* instruction) {
-  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
-      ? LocationSummary::kCallOnSlowPath
-      : LocationSummary::kNoCall;
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  locations->SetInAt(0, Location::RequiresRegister());
-  if (instruction->HasUses()) {
-    locations->SetOut(Location::SameAsFirstInput());
-  }
+  codegen_->CreateNullCheckLocations(instruction);
 }
 
 void CodeGeneratorMIPS::GenerateImplicitNullCheck(HNullCheck* instruction) {
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 664d498..4d87523 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -3461,14 +3461,7 @@
 }
 
 void LocationsBuilderMIPS64::VisitNullCheck(HNullCheck* instruction) {
-  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
-      ? LocationSummary::kCallOnSlowPath
-      : LocationSummary::kNoCall;
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  locations->SetInAt(0, Location::RequiresRegister());
-  if (instruction->HasUses()) {
-    locations->SetOut(Location::SameAsFirstInput());
-  }
+  codegen_->CreateNullCheckLocations(instruction);
 }
 
 void CodeGeneratorMIPS64::GenerateImplicitNullCheck(HNullCheck* instruction) {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 4689ccb..bf96be2 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -4979,16 +4979,10 @@
 }
 
 void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
-  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
-      ? LocationSummary::kCallOnSlowPath
-      : LocationSummary::kNoCall;
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  Location loc = codegen_->IsImplicitNullCheckAllowed(instruction)
-      ? Location::RequiresRegister()
-      : Location::Any();
-  locations->SetInAt(0, loc);
-  if (instruction->HasUses()) {
-    locations->SetOut(Location::SameAsFirstInput());
+  LocationSummary* locations = codegen_->CreateNullCheckLocations(instruction);
+  if (!codegen_->GetCompilerOptions().GetImplicitNullChecks()) {
+    // Explicit null checks can use any location.
+    locations->SetInAt(0, Location::Any());
   }
 }
 
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index a21a09e..d569f1f 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -4490,16 +4490,10 @@
 }
 
 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
-  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
-      ? LocationSummary::kCallOnSlowPath
-      : LocationSummary::kNoCall;
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  Location loc = codegen_->IsImplicitNullCheckAllowed(instruction)
-      ? Location::RequiresRegister()
-      : Location::Any();
-  locations->SetInAt(0, loc);
-  if (instruction->HasUses()) {
-    locations->SetOut(Location::SameAsFirstInput());
+  LocationSummary* locations = codegen_->CreateNullCheckLocations(instruction);
+  if (!codegen_->GetCompilerOptions().GetImplicitNullChecks()) {
+    // Explicit null checks can use any location.
+    locations->SetInAt(0, Location::Any());
   }
 }
 
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index befdd48..daa2dff 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -122,13 +122,16 @@
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc);
-
   uint32_t instr_size = GetInstructionSize(ptr);
-  sc->arm_lr = (sc->arm_pc + instr_size) | 1;      // LR needs to point to gc map location
+  uintptr_t gc_map_location = (sc->arm_pc + instr_size) | 1;
+
+  // Push the gc map location to the stack and pass the fault address in LR.
+  sc->arm_sp -= sizeof(uintptr_t);
+  *reinterpret_cast<uintptr_t*>(sc->arm_sp) = gc_map_location;
+  sc->arm_lr = reinterpret_cast<uintptr_t>(info->si_addr);
   sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
   // Pass the faulting address as the first argument of
   // art_quick_throw_null_pointer_exception_from_signal.
-  sc->arm_r0 = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index a3f053b..3fc83ba 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -173,6 +173,29 @@
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     * when core registers are already saved.
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED rTemp
+                                        @ 14 words of callee saves and args already saved.
+    vpush {d0-d15}                      @ 32 words, 2 for each of the 16 saved doubles.
+    .cfi_adjust_cfa_offset 128
+    sub sp, #8                          @ 2 words of space, alignment padding and Method*
+    .cfi_adjust_cfa_offset 8
+    RUNTIME_CURRENT1 \rTemp             @ Load Runtime::Current into rTemp.
+    @ Load kSaveEverything Method* into rTemp.
+    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET]
+    str \rTemp, [sp, #0]                @ Place Method* at bottom of stack.
+    str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
+
+    // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_SAVE_EVERYTHING != 56 + 128 + 8)
+#error "FRAME_SIZE_SAVE_EVERYTHING(ARM) size not as expected."
+#endif
+.endm
+
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
      */
 .macro SETUP_SAVE_EVERYTHING_FRAME rTemp
     push {r0-r12, lr}                   @ 14 words of callee saves and args.
@@ -191,20 +214,7 @@
     .cfi_rel_offset r11, 44
     .cfi_rel_offset ip, 48
     .cfi_rel_offset lr, 52
-    vpush {d0-d15}                      @ 32 words, 2 for each of the 16 saved doubles.
-    .cfi_adjust_cfa_offset 128
-    sub sp, #8                          @ 2 words of space, alignment padding and Method*
-    .cfi_adjust_cfa_offset 8
-    RUNTIME_CURRENT1 \rTemp             @ Load Runtime::Current into rTemp.
-    @ Load kSaveEverything Method* into rTemp.
-    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET]
-    str \rTemp, [sp, #0]                @ Place Method* at bottom of stack.
-    str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
-
-    // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_SAVE_EVERYTHING != 56 + 128 + 8)
-#error "FRAME_SIZE_SAVE_EVERYTHING(ARM) size not as expected."
-#endif
+    SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED \rTemp
 .endm
 
 .macro RESTORE_SAVE_EVERYTHING_FRAME
@@ -356,7 +366,34 @@
     /*
      * Call installed by a signal handler to create and deliver a NullPointerException.
      */
-ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+    .extern art_quick_throw_null_pointer_exception_from_signal
+ENTRY art_quick_throw_null_pointer_exception_from_signal
+    // The fault handler pushes the gc map address, i.e. "return address", to stack
+    // and passes the fault address in LR. So we need to set up the CFI info accordingly.
+    .cfi_def_cfa_offset __SIZEOF_POINTER__
+    .cfi_rel_offset lr, 0
+    push {r0-r12}                   @ 13 words of callee saves and args; LR already saved.
+    .cfi_adjust_cfa_offset 52
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset r1, 4
+    .cfi_rel_offset r2, 8
+    .cfi_rel_offset r3, 12
+    .cfi_rel_offset r4, 16
+    .cfi_rel_offset r5, 20
+    .cfi_rel_offset r6, 24
+    .cfi_rel_offset r7, 28
+    .cfi_rel_offset r8, 32
+    .cfi_rel_offset r9, 36
+    .cfi_rel_offset r10, 40
+    .cfi_rel_offset r11, 44
+    .cfi_rel_offset ip, 48
+
+    @ save all registers as basis for long jump context
+    SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED r1
+    mov r0, lr                      @ pass the fault address stored in LR by the fault handler.
+    mov r1, r9                      @ pass Thread::Current
+    b   artThrowNullPointerExceptionFromSignal  @ (Thread*)
+END art_quick_throw_null_pointer_exception_from_signal
 
     /*
      * Called by managed code to create and deliver an ArithmeticException.
diff --git a/runtime/arch/arm64/fault_handler_arm64.cc b/runtime/arch/arm64/fault_handler_arm64.cc
index 6724d6d..c02be87 100644
--- a/runtime/arch/arm64/fault_handler_arm64.cc
+++ b/runtime/arch/arm64/fault_handler_arm64.cc
@@ -96,12 +96,12 @@
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
 
-  sc->regs[30] = sc->pc + 4;      // LR needs to point to gc map location
+  // Push the gc map location to the stack and pass the fault address in LR.
+  sc->sp -= sizeof(uintptr_t);
+  *reinterpret_cast<uintptr_t*>(sc->sp) = sc->pc + 4;
+  sc->regs[30] = reinterpret_cast<uintptr_t>(info->si_addr);
 
   sc->pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
-  // Pass the faulting address as the first argument of
-  // art_quick_throw_null_pointer_exception_from_signal.
-  sc->regs[0] = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 25aa8ce..ea4669d 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -265,10 +265,10 @@
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     * when the SP has already been decremented by FRAME_SIZE_SAVE_EVERYTHING
+     * and saving registers x29 and LR is handled elsewhere.
      */
-.macro SETUP_SAVE_EVERYTHING_FRAME
-    INCREASE_FRAME 512
-
+.macro SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR
     // Ugly compile-time check, but we only have the preprocessor.
 #if (FRAME_SIZE_SAVE_EVERYTHING != 512)
 #error "FRAME_SIZE_SAVE_EVERYTHING(ARM64) size not as expected."
@@ -310,7 +310,6 @@
     SAVE_TWO_REGS x23, x24, 448
     SAVE_TWO_REGS x25, x26, 464
     SAVE_TWO_REGS x27, x28, 480
-    SAVE_TWO_REGS x29, xLR, 496
 
     // art::Runtime** xIP0 = &art::Runtime::instance_
     adrp xIP0, :got:_ZN3art7Runtime9instance_E
@@ -328,6 +327,16 @@
     str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
 .endm
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME
+    INCREASE_FRAME 512
+    SAVE_TWO_REGS x29, xLR, 496
+    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR
+.endm
+
 .macro RESTORE_SAVE_EVERYTHING_FRAME
     // Restore FP registers.
     // For better performance, load d0 and d31 separately, so that all LDPs are 16-byte aligned.
@@ -462,7 +471,21 @@
     /*
      * Call installed by a signal handler to create and deliver a NullPointerException.
      */
-ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+    .extern art_quick_throw_null_pointer_exception_from_signal
+ENTRY art_quick_throw_null_pointer_exception_from_signal
+    // The fault handler pushes the gc map address, i.e. "return address", to stack
+    // and passes the fault address in LR. So we need to set up the CFI info accordingly.
+    .cfi_def_cfa_offset __SIZEOF_POINTER__
+    .cfi_rel_offset lr, 0
+    // Save all registers as basis for long jump context.
+    INCREASE_FRAME (FRAME_SIZE_SAVE_EVERYTHING - __SIZEOF_POINTER__)
+    SAVE_REG x29, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)  // LR already saved.
+    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR
+    mov x0, lr                        // pass the fault address stored in LR by the fault handler.
+    mov x1, xSELF                     // pass Thread::Current.
+    b   artThrowNullPointerExceptionFromSignal  // (arg, Thread*).
+    brk 0
+END art_quick_throw_null_pointer_exception_from_signal
 
     /*
      * Called by managed code to create and deliver an ArithmeticException.
diff --git a/runtime/arch/mips/asm_support_mips.S b/runtime/arch/mips/asm_support_mips.S
index 801f708..7955b1d 100644
--- a/runtime/arch/mips/asm_support_mips.S
+++ b/runtime/arch/mips/asm_support_mips.S
@@ -43,7 +43,7 @@
 .endm
 
      // Declare a function called name, doesn't set up $gp.
-.macro ENTRY_NO_GP name
+.macro ENTRY_NO_GP_CUSTOM_CFA name, cfa_offset
     .type \name, %function
     .global \name
     // Cache alignment for function entry.
@@ -51,7 +51,12 @@
 \name:
     .cfi_startproc
      // Ensure we get a sane starting CFA.
-    .cfi_def_cfa $sp,0
+    .cfi_def_cfa $sp, \cfa_offset
+.endm
+
+     // Declare a function called name, doesn't set up $gp.
+.macro ENTRY_NO_GP name
+    ENTRY_NO_GP_CUSTOM_CFA \name, 0
 .endm
 
 .macro END name
diff --git a/runtime/arch/mips/fault_handler_mips.cc b/runtime/arch/mips/fault_handler_mips.cc
index 7969a8f..b6a63ca 100644
--- a/runtime/arch/mips/fault_handler_mips.cc
+++ b/runtime/arch/mips/fault_handler_mips.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-
+#include "arch/mips/quick_method_frame_info_mips.h"
 #include "fault_handler.h"
 #include <sys/ucontext.h>
 #include "art_method-inl.h"
@@ -82,12 +82,15 @@
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
 
+  // Decrement $sp by the frame size of the kSaveEverything method and store
+  // the fault address in the padding right after the ArtMethod*.
+  sc->sc_regs[mips::SP] -= mips::MipsCalleeSaveFrameSize(Runtime::kSaveEverything);
+  uintptr_t* padding = reinterpret_cast<uintptr_t*>(sc->sc_regs[mips::SP]) + /* ArtMethod* */ 1;
+  *padding = reinterpret_cast<uintptr_t>(info->si_addr);
+
   sc->sc_regs[mips::RA] = sc->sc_pc + 4;      // RA needs to point to gc map location
   sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
   sc->sc_regs[mips::T9] = sc->sc_pc;          // make sure T9 points to the function
-  // Pass the faulting address as the first argument of
-  // art_quick_throw_null_pointer_exception_from_signal.
-  sc->sc_regs[mips::A0] = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 4bd1314..71b8ae2 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -279,6 +279,7 @@
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything).
+     * when the $sp has already been decremented by FRAME_SIZE_SAVE_EVERYTHING.
      * Callee-save: $at, $v0-$v1, $a0-$a3, $t0-$t7, $s0-$s7, $t8-$t9, $gp, $fp $ra, $f0-$f31;
      *              28(GPR)+ 32(FPR) + 3 words for padding and 1 word for Method*
      * Clobbers $t0 and $t1.
@@ -286,10 +287,7 @@
      * Reserves FRAME_SIZE_SAVE_EVERYTHING + ARG_SLOT_SIZE bytes on the stack.
      * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
      */
-.macro SETUP_SAVE_EVERYTHING_FRAME
-    addiu  $sp, $sp, -256
-    .cfi_adjust_cfa_offset 256
-
+.macro SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
      // Ugly compile-time check, but we only have the preprocessor.
 #if (FRAME_SIZE_SAVE_EVERYTHING != 256)
 #error "FRAME_SIZE_SAVE_EVERYTHING(MIPS) size not as expected."
@@ -388,6 +386,22 @@
     .cfi_adjust_cfa_offset ARG_SLOT_SIZE
 .endm
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything).
+     * Callee-save: $at, $v0-$v1, $a0-$a3, $t0-$t7, $s0-$s7, $t8-$t9, $gp, $fp $ra, $f0-$f31;
+     *              28(GPR)+ 32(FPR) + 3 words for padding and 1 word for Method*
+     * Clobbers $t0 and $t1.
+     * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
+     * Reserves FRAME_SIZE_SAVE_EVERYTHING + ARG_SLOT_SIZE bytes on the stack.
+     * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME
+    addiu  $sp, $sp, -(FRAME_SIZE_SAVE_EVERYTHING)
+    .cfi_adjust_cfa_offset (FRAME_SIZE_SAVE_EVERYTHING)
+    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
+.endm
+
 .macro RESTORE_SAVE_EVERYTHING_FRAME
     addiu  $sp, $sp, ARG_SLOT_SIZE                # remove argument slots on the stack
     .cfi_adjust_cfa_offset -ARG_SLOT_SIZE
@@ -708,8 +722,10 @@
      * Call installed by a signal handler to create and deliver a NullPointerException.
      */
     .extern artThrowNullPointerExceptionFromSignal
-ENTRY art_quick_throw_null_pointer_exception_from_signal
-    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+ENTRY_NO_GP_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, FRAME_SIZE_SAVE_EVERYTHING
+    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
+    # Retrieve the fault address from the padding where the signal handler stores it.
+    lw   $a0, (ARG_SLOT_SIZE + __SIZEOF_POINTER__)($sp)
     la   $t9, artThrowNullPointerExceptionFromSignal
     jalr $zero, $t9                 # artThrowNullPointerExceptionFromSignal(uintptr_t, Thread*)
     move $a1, rSELF                 # pass Thread::Current
diff --git a/runtime/arch/mips64/asm_support_mips64.S b/runtime/arch/mips64/asm_support_mips64.S
index 786e860..6c58fcf 100644
--- a/runtime/arch/mips64/asm_support_mips64.S
+++ b/runtime/arch/mips64/asm_support_mips64.S
@@ -45,8 +45,8 @@
 .L\name\()_gp_set:
 .endm
 
-     // Declare a function called name, doesn't set up $gp.
-.macro ENTRY_NO_GP name
+    // Declare a function called name, doesn't set up $gp.
+.macro ENTRY_NO_GP_CUSTOM_CFA name, cfa_offset
     .type \name, %function
     .global \name
     // Cache alignment for function entry.
@@ -54,7 +54,12 @@
 \name:
     .cfi_startproc
      // Ensure we get a sane starting CFA.
-    .cfi_def_cfa $sp,0
+    .cfi_def_cfa $sp, \cfa_offset
+.endm
+
+    // Declare a function called name, doesn't set up $gp.
+.macro ENTRY_NO_GP name
+    ENTRY_NO_GP_CUSTOM_CFA \name, 0
 .endm
 
 .macro END name
diff --git a/runtime/arch/mips64/fault_handler_mips64.cc b/runtime/arch/mips64/fault_handler_mips64.cc
index 0bbb6e1..e52dc73 100644
--- a/runtime/arch/mips64/fault_handler_mips64.cc
+++ b/runtime/arch/mips64/fault_handler_mips64.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-
+#include "arch/mips64/quick_method_frame_info_mips64.h"
 #include "fault_handler.h"
 #include <sys/ucontext.h>
 #include "art_method-inl.h"
@@ -83,12 +83,15 @@
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
 
+  // Decrement $sp by the frame size of the kSaveEverything method and store
+  // the fault address in the padding right after the ArtMethod*.
+  sc->sc_regs[mips64::SP] -= mips64::Mips64CalleeSaveFrameSize(Runtime::kSaveEverything);
+  uintptr_t* padding = reinterpret_cast<uintptr_t*>(sc->sc_regs[mips64::SP]) + /* ArtMethod* */ 1;
+  *padding = reinterpret_cast<uintptr_t>(info->si_addr);
+
   sc->sc_regs[mips64::RA] = sc->sc_pc + 4;      // RA needs to point to gc map location
   sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
   sc->sc_regs[mips64::T9] = sc->sc_pc;          // make sure T9 points to the function
-  // Pass the faulting address as the first argument of
-  // art_quick_throw_null_pointer_exception_from_signal.
-  sc->sc_regs[mips64::A0] = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 0bf2a35..d51d18a 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -316,14 +316,12 @@
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything).
+     * when the $sp has already been decremented by FRAME_SIZE_SAVE_EVERYTHING.
      * callee-save: $at + $v0-$v1 + $a0-$a7 + $t0-$t3 + $s0-$s7 + $t8-$t9 + $gp + $s8 + $ra + $s8,
      *              $f0-$f31; 28(GPR)+ 32(FPR) + 1x8 bytes padding + method*
      * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
      */
-.macro SETUP_SAVE_EVERYTHING_FRAME
-    daddiu $sp, $sp, -496
-    .cfi_adjust_cfa_offset 496
-
+.macro SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
      // Ugly compile-time check, but we only have the preprocessor.
 #if (FRAME_SIZE_SAVE_EVERYTHING != 496)
 #error "FRAME_SIZE_SAVE_EVERYTHING(MIPS64) size not as expected."
@@ -436,6 +434,19 @@
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)
 .endm
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything).
+     * callee-save: $at + $v0-$v1 + $a0-$a7 + $t0-$t3 + $s0-$s7 + $t8-$t9 + $gp + $s8 + $ra + $s8,
+     *              $f0-$f31; 28(GPR)+ 32(FPR) + 1x8 bytes padding + method*
+     * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
+     */
+.macro SETUP_SAVE_EVERYTHING_FRAME
+    daddiu $sp, $sp, -(FRAME_SIZE_SAVE_EVERYTHING)
+    .cfi_adjust_cfa_offset (FRAME_SIZE_SAVE_EVERYTHING)
+    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
+.endm
+
 .macro RESTORE_SAVE_EVERYTHING_FRAME
     // Restore FP registers.
     l.d    $f31, 264($sp)
@@ -818,8 +829,10 @@
      * Call installed by a signal handler to create and deliver a NullPointerException
      */
     .extern artThrowNullPointerExceptionFromSignal
-ENTRY art_quick_throw_null_pointer_exception_from_signal
-    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+ENTRY_NO_GP_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, FRAME_SIZE_SAVE_EVERYTHING
+    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
+    # Retrieve the fault address from the padding where the signal handler stores it.
+    ld   $a0, (__SIZEOF_POINTER__)($sp)
     dla  $t9, artThrowNullPointerExceptionFromSignal
     jalr $zero, $t9                 # artThrowNullPointerExceptionFromSignal(uinptr_t, Thread*)
     move $a1, rSELF                 # pass Thread::Current
diff --git a/runtime/arch/x86/asm_support_x86.S b/runtime/arch/x86/asm_support_x86.S
index 3e47209..14b01c5 100644
--- a/runtime/arch/x86/asm_support_x86.S
+++ b/runtime/arch/x86/asm_support_x86.S
@@ -114,7 +114,7 @@
     .balign 16
 END_MACRO
 
-MACRO1(DEFINE_FUNCTION, c_name)
+MACRO2(DEFINE_FUNCTION_CUSTOM_CFA, c_name, cfa_offset)
     FUNCTION_TYPE(SYMBOL(\c_name))
     ASM_HIDDEN CALLVAR(c_name)
     .globl CALLVAR(c_name)
@@ -122,7 +122,11 @@
 CALLVAR(c_name):
     CFI_STARTPROC
     // Ensure we get a sane starting CFA.
-    CFI_DEF_CFA(esp, 4)
+    CFI_DEF_CFA(esp, RAW_VAR(cfa_offset))
+END_MACRO
+
+MACRO1(DEFINE_FUNCTION, c_name)
+    DEFINE_FUNCTION_CUSTOM_CFA RAW_VAR(c_name), __SIZEOF_POINTER__
 END_MACRO
 
 MACRO1(END_FUNCTION, c_name)
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index c7af249..a4d6bb4 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -325,21 +325,15 @@
   // next instruction (this instruction + instruction size).  The return address
   // is on the stack at the top address of the current frame.
 
-  // Push the return address onto the stack.
+  // Push the return address and fault address onto the stack.
   uintptr_t retaddr = reinterpret_cast<uintptr_t>(pc + instr_size);
-  uintptr_t* next_sp = reinterpret_cast<uintptr_t*>(sp - sizeof(uintptr_t));
-  *next_sp = retaddr;
+  uintptr_t* next_sp = reinterpret_cast<uintptr_t*>(sp - 2 * sizeof(uintptr_t));
+  next_sp[1] = retaddr;
+  next_sp[0] = reinterpret_cast<uintptr_t>(sig->si_addr);
   uc->CTX_ESP = reinterpret_cast<uintptr_t>(next_sp);
 
   uc->CTX_EIP = reinterpret_cast<uintptr_t>(
       art_quick_throw_null_pointer_exception_from_signal);
-  // Pass the faulting address as the first argument of
-  // art_quick_throw_null_pointer_exception_from_signal.
-#if defined(__x86_64__)
-  uc->CTX_RDI = reinterpret_cast<uintptr_t>(sig->si_addr);
-#else
-  uc->CTX_EAX = reinterpret_cast<uintptr_t>(sig->si_addr);
-#endif
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 646a80c..0beb2a4 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -224,10 +224,11 @@
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     * when EDI is already saved.
      */
-MACRO2(SETUP_SAVE_EVERYTHING_FRAME, got_reg, temp_reg)
-    // Save core registers.
-    PUSH edi
+MACRO2(SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED, got_reg, temp_reg)
+    // Save core registers from highest to lowest to agree with core spills bitmap.
+    // EDI, or at least a placeholder for it, is already on the stack.
     PUSH esi
     PUSH ebp
     PUSH ebx
@@ -264,6 +265,15 @@
 #endif
 END_MACRO
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     */
+MACRO2(SETUP_SAVE_EVERYTHING_FRAME, got_reg, temp_reg)
+    PUSH edi
+    SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED RAW_VAR(got_reg), RAW_VAR(temp_reg)
+END_MACRO
+
 MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
     // Restore FPRs. Method and padding is still on the stack.
     movsd 16(%esp), %xmm0
@@ -320,7 +330,6 @@
 MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name)
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context
-    mov %esp, %ecx
     // Outgoing argument set up
     subl MACRO_LITERAL(8), %esp                // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
@@ -354,7 +363,23 @@
     /*
      * Call installed by a signal handler to create and deliver a NullPointerException.
      */
-ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+DEFINE_FUNCTION_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, 2 * __SIZEOF_POINTER__
+    // Fault address and return address were saved by the fault handler.
+    // Save all registers as basis for long jump context; EDI will replace fault address later.
+    SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED ebx, ebx
+    // Retrieve fault address and save EDI.
+    movl (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%esp), %eax
+    movl %edi, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%esp)
+    CFI_REL_OFFSET(%edi, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__))
+    // Outgoing argument set up
+    subl MACRO_LITERAL(8), %esp                           // alignment padding
+    CFI_ADJUST_CFA_OFFSET(8)
+    pushl %fs:THREAD_SELF_OFFSET                          // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH eax                                              // pass arg1
+    call SYMBOL(artThrowNullPointerExceptionFromSignal)   // (addr, self)
+    UNREACHABLE
+END_FUNCTION art_quick_throw_null_pointer_exception
 
     /*
      * Called by managed code to create and deliver an ArithmeticException.
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
index 0728f99..af4a6c4 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.S
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -110,7 +110,7 @@
 
 // TODO: we might need to use SYMBOL() here to add the underscore prefix
 // for mac builds.
-MACRO1(DEFINE_FUNCTION, c_name)
+MACRO2(DEFINE_FUNCTION_CUSTOM_CFA, c_name, cfa_offset)
     FUNCTION_TYPE(SYMBOL(\c_name))
     ASM_HIDDEN CALLVAR(c_name)
     .globl CALLVAR(c_name)
@@ -118,7 +118,11 @@
 CALLVAR(c_name):
     CFI_STARTPROC
     // Ensure we get a sane starting CFA.
-    CFI_DEF_CFA(rsp, 8)
+    CFI_DEF_CFA(rsp, RAW_VAR(cfa_offset))
+END_MACRO
+
+MACRO1(DEFINE_FUNCTION, c_name)
+    DEFINE_FUNCTION_CUSTOM_CFA RAW_VAR(c_name), __SIZEOF_POINTER__
 END_MACRO
 
 MACRO1(END_FUNCTION, c_name)
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 5ea58af..089ed75 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -263,14 +263,15 @@
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     * when R15 is already saved.
      */
-MACRO0(SETUP_SAVE_EVERYTHING_FRAME)
+MACRO0(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED)
 #if defined(__APPLE__)
     int3
     int3
 #else
     // Save core registers from highest to lowest to agree with core spills bitmap.
-    PUSH r15
+    // R15, or at least a placeholder for it, is already on the stack.
     PUSH r14
     PUSH r13
     PUSH r12
@@ -322,6 +323,15 @@
 #endif  // __APPLE__
 END_MACRO
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     */
+MACRO0(SETUP_SAVE_EVERYTHING_FRAME)
+    PUSH r15
+    SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED
+END_MACRO
+
 MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
     // Restore FPRs. Method and padding is still on the stack.
     movq 16(%rsp), %xmm0
@@ -413,7 +423,19 @@
     /*
      * Call installed by a signal handler to create and deliver a NullPointerException.
      */
-ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+DEFINE_FUNCTION_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, 2 * __SIZEOF_POINTER__
+    // Fault address and return address were saved by the fault handler.
+    // Save all registers as basis for long jump context; R15 will replace fault address later.
+    SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED
+    // Retrieve fault address and save R15.
+    movq (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp), %rdi
+    movq %r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp)
+    CFI_REL_OFFSET(%r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__))
+    // Outgoing argument set up; RDI already contains the fault address.
+    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
+    call SYMBOL(artThrowNullPointerExceptionFromSignal)  // (addr, self)
+    UNREACHABLE
+END_FUNCTION art_quick_throw_null_pointer_exception_from_signal
 
     /*
      * Called by managed code to create and deliver an ArithmeticException.
diff --git a/test/439-npe/expected.txt b/test/439-npe/expected.txt
index 34855ee..b4fd6bb 100644
--- a/test/439-npe/expected.txt
+++ b/test/439-npe/expected.txt
@@ -52,3 +52,73 @@
 $opt$noinline$getBooleanElement
 $opt$noinline$getCharElement
 $opt$noinline$getShortElement
+i0=4
+i1=8
+i2=12
+i3=16
+i4=20
+i5=24
+i6=28
+i7=32
+i8=36
+i9=40
+i10=44
+i11=48
+i12=52
+i13=56
+i14=44
+i15=57
+l0=84
+l1=88
+l2=92
+l3=96
+l4=100
+l5=104
+l6=108
+l7=112
+l8=116
+l9=120
+l10=124
+l11=128
+l12=132
+l13=136
+l14=104
+l15=146
+f0=164.0
+f1=168.0
+f2=172.0
+f3=176.0
+f4=180.0
+f5=184.0
+f6=188.0
+f7=192.0
+f8=196.0
+f9=200.0
+f10=204.0
+f11=208.0
+f12=212.0
+f13=216.0
+f14=164.0
+f15=55.5
+d0=244.0
+d1=248.0
+d2=252.0
+d3=256.0
+d4=260.0
+d5=264.0
+d6=268.0
+d7=272.0
+d8=276.0
+d9=280.0
+d10=284.0
+d11=288.0
+d12=292.0
+d13=296.0
+d14=224.0
+d15=75.125
+addInt=42
+addLong=111
+addFloat=0.5
+addDouble=0.125
+m=null
+i=2
diff --git a/test/439-npe/src/Main.java b/test/439-npe/src/Main.java
index 8f66da0..bc044a4 100644
--- a/test/439-npe/src/Main.java
+++ b/test/439-npe/src/Main.java
@@ -634,12 +634,246 @@
     } catch (NullPointerException npe) {
       check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getShortElement");
     }
+
+    $opt$noinline$testRegisterRetrieval();
   }
 
-  static void check(NullPointerException npe, int mainLine, int medthodLine, String methodName) {
+  static void $opt$noinline$testRegisterRetrieval() {
+    Main[] array = $noinline$PrepareArray();
+    int i0 = 0;
+    int i1 = 1;
+    int i2 = 2;
+    int i3 = 3;
+    int i4 = 4;
+    int i5 = 5;
+    int i6 = 6;
+    int i7 = 7;
+    int i8 = 8;
+    int i9 = 9;
+    int i10 = 10;
+    int i11 = 11;
+    int i12 = 12;
+    int i13 = 13;
+    int i14 = 14;
+    int i15 = 15;
+    long l0 = 20L;
+    long l1 = 21L;
+    long l2 = 22L;
+    long l3 = 23L;
+    long l4 = 24L;
+    long l5 = 25L;
+    long l6 = 26L;
+    long l7 = 27L;
+    long l8 = 28L;
+    long l9 = 29L;
+    long l10 = 30L;
+    long l11 = 31L;
+    long l12 = 32L;
+    long l13 = 33L;
+    long l14 = 34L;
+    long l15 = 35L;
+    float f0 = 40.0f;
+    float f1 = 41.0f;
+    float f2 = 42.0f;
+    float f3 = 43.0f;
+    float f4 = 44.0f;
+    float f5 = 45.0f;
+    float f6 = 46.0f;
+    float f7 = 47.0f;
+    float f8 = 48.0f;
+    float f9 = 49.0f;
+    float f10 = 50.0f;
+    float f11 = 51.0f;
+    float f12 = 52.0f;
+    float f13 = 53.0f;
+    float f14 = 54.0f;
+    float f15 = 55.0f;
+    double d0 = 60.0;
+    double d1 = 61.0;
+    double d2 = 62.0;
+    double d3 = 63.0;
+    double d4 = 64.0;
+    double d5 = 65.0;
+    double d6 = 66.0;
+    double d7 = 67.0;
+    double d8 = 68.0;
+    double d9 = 69.0;
+    double d10 = 70.0;
+    double d11 = 71.0;
+    double d12 = 72.0;
+    double d13 = 73.0;
+    double d14 = 74.0;
+    double d15 = 75.0;
+    int addInt = -1;
+    long addLong = -2L;
+    float addFloat = -3.0f;
+    double addDouble = -4.0;
+    Main m = null;
+    int i = 0;
+    try {
+      for (i = 0; i < array.length; ++i) {
+        m = array[i];
+        // We have 16 ints, 16 longs, 16 floats, 16 doubles and a few helper variables here,
+        // none of them anonymous. Hopefully, all available physical registers will be allocated
+        // to these variables, so that when `m.intField` throws NPE during the third iteration,
+        // we will fully test retrieval of values from all physical registers.
+        addInt = m.intField;
+        addLong = m.longField;
+        addFloat = m.floatField;
+        addDouble = m.doubleField;
+        i0 += i1;
+        i1 += i2;
+        i2 += i3;
+        i3 += i4;
+        i4 += i5;
+        i5 += i6;
+        i6 += i7;
+        i7 += i8;
+        i8 += i9;
+        i9 += i10;
+        i10 += i11;
+        i11 += i12;
+        i12 += i13;
+        i13 += i14;
+        i14 += i15;
+        i15 += addInt;
+        l0 += l1;
+        l1 += l2;
+        l2 += l3;
+        l3 += l4;
+        l4 += l5;
+        l5 += l6;
+        l6 += l7;
+        l7 += l8;
+        l8 += l9;
+        l9 += l10;
+        l10 += l11;
+        l11 += l12;
+        l12 += l13;
+        l13 += l14;
+        l14 += l15;
+        l15 += addLong;
+        f0 += f1;
+        f1 += f2;
+        f2 += f3;
+        f3 += f4;
+        f4 += f5;
+        f5 += f6;
+        f6 += f7;
+        f7 += f8;
+        f8 += f9;
+        f9 += f10;
+        f10 += f11;
+        f11 += f12;
+        f12 += f13;
+        f13 += f14;
+        f14 += f15;
+        f15 += addFloat;
+        d0 += d1;
+        d1 += d2;
+        d2 += d3;
+        d3 += d4;
+        d4 += d5;
+        d5 += d6;
+        d6 += d7;
+        d7 += d8;
+        d8 += d9;
+        d9 += d10;
+        d10 += d11;
+        d11 += d12;
+        d12 += d13;
+        d13 += d14;
+        d14 += d15;
+        d15 += addDouble;
+      }
+    } catch (NullPointerException npe) {
+      System.out.println("i0=" + i0);
+      System.out.println("i1=" + i1);
+      System.out.println("i2=" + i2);
+      System.out.println("i3=" + i3);
+      System.out.println("i4=" + i4);
+      System.out.println("i5=" + i5);
+      System.out.println("i6=" + i6);
+      System.out.println("i7=" + i7);
+      System.out.println("i8=" + i8);
+      System.out.println("i9=" + i9);
+      System.out.println("i10=" + i10);
+      System.out.println("i11=" + i11);
+      System.out.println("i12=" + i12);
+      System.out.println("i13=" + i13);
+      System.out.println("i14=" + i14);
+      System.out.println("i15=" + i15);
+      System.out.println("l0=" + l0);
+      System.out.println("l1=" + l1);
+      System.out.println("l2=" + l2);
+      System.out.println("l3=" + l3);
+      System.out.println("l4=" + l4);
+      System.out.println("l5=" + l5);
+      System.out.println("l6=" + l6);
+      System.out.println("l7=" + l7);
+      System.out.println("l8=" + l8);
+      System.out.println("l9=" + l9);
+      System.out.println("l10=" + l10);
+      System.out.println("l11=" + l11);
+      System.out.println("l12=" + l12);
+      System.out.println("l13=" + l13);
+      System.out.println("l14=" + l14);
+      System.out.println("l15=" + l15);
+      System.out.println("f0=" + f0);
+      System.out.println("f1=" + f1);
+      System.out.println("f2=" + f2);
+      System.out.println("f3=" + f3);
+      System.out.println("f4=" + f4);
+      System.out.println("f5=" + f5);
+      System.out.println("f6=" + f6);
+      System.out.println("f7=" + f7);
+      System.out.println("f8=" + f8);
+      System.out.println("f9=" + f9);
+      System.out.println("f10=" + f10);
+      System.out.println("f11=" + f11);
+      System.out.println("f12=" + f12);
+      System.out.println("f13=" + f13);
+      System.out.println("f14=" + f14);
+      System.out.println("f15=" + f15);
+      System.out.println("d0=" + d0);
+      System.out.println("d1=" + d1);
+      System.out.println("d2=" + d2);
+      System.out.println("d3=" + d3);
+      System.out.println("d4=" + d4);
+      System.out.println("d5=" + d5);
+      System.out.println("d6=" + d6);
+      System.out.println("d7=" + d7);
+      System.out.println("d8=" + d8);
+      System.out.println("d9=" + d9);
+      System.out.println("d10=" + d10);
+      System.out.println("d11=" + d11);
+      System.out.println("d12=" + d12);
+      System.out.println("d13=" + d13);
+      System.out.println("d14=" + d14);
+      System.out.println("d15=" + d15);
+      System.out.println("addInt=" + addInt);
+      System.out.println("addLong=" + addLong);
+      System.out.println("addFloat=" + addFloat);
+      System.out.println("addDouble=" + addDouble);
+      System.out.println("m=" + m);
+      System.out.println("i=" + i);
+    }
+  }
+
+  static Main[] $noinline$PrepareArray() {
+    if (doThrow) { throw new Error(); }
+    Main[] array = new Main[] { new Main(), new Main(), null, new Main() };
+    array[1].intField = 42;
+    array[1].longField = 111L;
+    array[1].floatField = 0.5f;
+    array[1].doubleField = 0.125;
+    return array;
+  }
+
+  static void check(NullPointerException npe, int mainLine, int methodLine, String methodName) {
     System.out.println(methodName);
     StackTraceElement[] trace = npe.getStackTrace();
-    checkElement(trace[0], "Main", methodName, "Main.java", medthodLine);
+    checkElement(trace[0], "Main", methodName, "Main.java", methodLine);
     checkElement(trace[1], "Main", "main", "Main.java", mainLine);
   }