Revert "Reduce stack usage for overflow checks"

This reverts commit 63c051a540e6dfc806f656b88ac3a63e99395429.

Change-Id: I282a048994fcd130fe73842b16c21680053c592f
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 4ba3c4b..b133991 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -354,14 +354,13 @@
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !FrameNeedsStackCheck(frame_size_, kArm);
+  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kArm);
   NewLIR0(kPseudoMethodEntry);
-  const size_t kStackOverflowReservedUsableBytes = GetStackOverflowReservedBytes(kArm);
+  const size_t kStackOverflowReservedUsableBytes = GetStackOverflowReservedBytes(kArm) -
+      Thread::kStackOverflowSignalReservedBytes;
   bool large_frame = (static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes);
-  bool generate_explicit_stack_overflow_check = large_frame ||
-    !cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks();
   if (!skip_overflow_check) {
-    if (generate_explicit_stack_overflow_check) {
+    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks()) {
       if (!large_frame) {
         /* Load stack limit */
         LockTemp(rs_r12);
@@ -400,7 +399,7 @@
   const int spill_size = spill_count * 4;
   const int frame_size_without_spills = frame_size_ - spill_size;
   if (!skip_overflow_check) {
-    if (generate_explicit_stack_overflow_check) {
+    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks()) {
       class StackOverflowSlowPath : public LIRSlowPath {
        public:
         StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, bool restore_lr, size_t sp_displace)
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 0538c31..28b747b 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -329,20 +329,16 @@
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !FrameNeedsStackCheck(frame_size_, kArm64);
+  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kArm64);
 
   NewLIR0(kPseudoMethodEntry);
 
-  const size_t kStackOverflowReservedUsableBytes = GetStackOverflowReservedBytes(kArm64);
-  const bool large_frame = static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes;
-  bool generate_explicit_stack_overflow_check = large_frame ||
-    !cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks();
   const int spill_count = num_core_spills_ + num_fp_spills_;
   const int spill_size = (spill_count * kArm64PointerSize + 15) & ~0xf;  // SP 16 byte alignment.
   const int frame_size_without_spills = frame_size_ - spill_size;
 
   if (!skip_overflow_check) {
-    if (generate_explicit_stack_overflow_check) {
+    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks()) {
       // Load stack limit
       LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_xIP1);
     } else {
@@ -369,7 +365,7 @@
   }
 
   if (!skip_overflow_check) {
-    if (generate_explicit_stack_overflow_check) {
+    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks()) {
       class StackOverflowSlowPath: public LIRSlowPath {
       public:
         StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace) :
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index e8cb356..4577a4c 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -303,7 +303,7 @@
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !FrameNeedsStackCheck(frame_size_, kMips);
+  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kMips);
   NewLIR0(kPseudoMethodEntry);
   RegStorage check_reg = AllocTemp();
   RegStorage new_sp = AllocTemp();
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 996689a..f5f8671 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -219,7 +219,7 @@
    * a leaf *and* our frame size < fudge factor.
    */
   InstructionSet isa =  cu_->target64 ? kX86_64 : kX86;
-  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !FrameNeedsStackCheck(frame_size_, isa);
+  const bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, isa);
 
   // If we doing an implicit stack overflow check, perform the load immediately
   // before the stack pointer is decremented and anything is saved.
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 2c954a0..eccc970 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -265,7 +265,7 @@
         codegen_(codegen) {}
 
 void CodeGeneratorARM::GenerateFrameEntry() {
-  bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
+  bool skip_overflow_check = IsLeafMethod() && !IsLargeFrame(GetFrameSize(), InstructionSet::kArm);
   if (!skip_overflow_check) {
     if (kExplicitStackOverflowCheck) {
       SlowPathCode* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathARM();
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index f544d47..2264638 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -241,7 +241,7 @@
   static const int kFakeReturnRegister = 8;
   core_spill_mask_ |= (1 << kFakeReturnRegister);
 
-  bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
+  bool skip_overflow_check = IsLeafMethod() && !IsLargeFrame(GetFrameSize(), InstructionSet::kX86);
   if (!skip_overflow_check && !kExplicitStackOverflowCheck) {
     __ testl(EAX, Address(ESP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86))));
     RecordPcInfo(0);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index e1807dc..2ff2a17 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -213,7 +213,7 @@
           Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize));
 
   bool skip_overflow_check = IsLeafMethod()
-      && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
+      && !IsLargeFrame(GetFrameSize(), InstructionSet::kX86_64);
 
   if (!skip_overflow_check) {
     if (kExplicitStackOverflowCheck) {
diff --git a/compiler/utils/stack_checks.h b/compiler/utils/stack_checks.h
index ce01077..63adbc2 100644
--- a/compiler/utils/stack_checks.h
+++ b/compiler/utils/stack_checks.h
@@ -33,9 +33,10 @@
 // Determine whether a frame is small or large, used in the decision on whether to elide a
 // stack overflow check on method entry.
 //
-// A frame is considered large when it's above kLargeFrameSize.
-static inline bool FrameNeedsStackCheck(size_t size, InstructionSet isa) {
-  return size >= kLargeFrameSize;
+// A frame is considered large when it's either above kLargeFrameSize, or a quarter of the
+// overflow-usable stack space.
+static inline bool IsLargeFrame(size_t size, InstructionSet isa) {
+  return size >= kLargeFrameSize || size >= GetStackOverflowReservedBytes(isa) / 4;
 }
 
 }  // namespace art
diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h
index 330924e..ae30aee 100644
--- a/runtime/arch/arm/asm_support_arm.h
+++ b/runtime/arch/arm/asm_support_arm.h
@@ -24,9 +24,9 @@
 // Offset of field Thread::tls32_.thin_lock_thread_id verified in InitCpu
 #define THREAD_ID_OFFSET 12
 // Offset of field Thread::tlsPtr_.card_table verified in InitCpu
-#define THREAD_CARD_TABLE_OFFSET 120
+#define THREAD_CARD_TABLE_OFFSET 112
 // Offset of field Thread::tlsPtr_.exception verified in InitCpu
-#define THREAD_EXCEPTION_OFFSET 124
+#define THREAD_EXCEPTION_OFFSET 116
 
 #define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 176
 #define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 32
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index 28b69ec..be28544 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -35,7 +35,7 @@
 namespace art {
 
 extern "C" void art_quick_throw_null_pointer_exception();
-extern "C" void art_quick_throw_stack_overflow();
+extern "C" void art_quick_throw_stack_overflow_from_signal();
 extern "C" void art_quick_implicit_suspend();
 
 // Get the size of a thumb2 instruction in bytes.
@@ -194,19 +194,40 @@
 
   uintptr_t overflow_addr = sp - GetStackOverflowReservedBytes(kArm);
 
+  Thread* self = reinterpret_cast<Thread*>(sc->arm_r9);
+  CHECK_EQ(self, Thread::Current());
+  uintptr_t pregion = reinterpret_cast<uintptr_t>(self->GetStackEnd()) -
+      Thread::kStackOverflowProtectedSize;
+
   // Check that the fault address is the value expected for a stack overflow.
   if (fault_addr != overflow_addr) {
     VLOG(signals) << "Not a stack overflow";
     return false;
   }
 
-  VLOG(signals) << "Stack overflow found";
+  // We know this is a stack overflow.  We need to move the sp to the overflow region
+  // that exists below the protected region.  Determine the address of the next
+  // available valid address below the protected region.
+  uintptr_t prevsp = sp;
+  sp = pregion;
+  VLOG(signals) << "setting sp to overflow region at " << std::hex << sp;
 
-  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow_from.
+  // Since the compiler puts the implicit overflow
+  // check before the callee save instructions, the SP is already pointing to
+  // the previous frame.
+  VLOG(signals) << "previous frame: " << std::hex << prevsp;
+
+  // Now establish the stack pointer for the signal return.
+  sc->arm_sp = prevsp;
+
+  // Tell the stack overflow code where the new stack pointer should be.
+  sc->arm_ip = sp;      // aka r12
+
+  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow_from_signal.
   // The value of LR must be the same as it was when we entered the code that
   // caused this fault.  This will be inserted into a callee save frame by
-  // the function to which this handler returns (art_quick_throw_stack_overflow).
-  sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
+  // the function to which this handler returns (art_quick_throw_stack_overflow_from_signal).
+  sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow_from_signal);
 
   // The kernel will now return to the address in sc->arm_pc.
   return true;
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index dd1f04a..6c63a1a 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -235,6 +235,31 @@
      */
 ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode
 
+  /*
+   * Invoke stack overflow exception from signal handler.
+   * On entry:
+   * r9: thread
+   * sp: address of last known frame
+   * r12: address of next valid SP below protected region in stack
+   *
+   * This is deceptively simple but hides some complexity.  It is called in the case of
+   * a stack overflow condition during implicit checks.  The signal handler has been
+   * called by the kernel due to a load from the protected stack region.  The handler
+   * works out the address of the previous frame and passes this in SP.  However there
+   * is a piece of memory somewhere below the current SP that is not accessible (the
+   * memory that caused the signal).  The signal handler works out the next
+   * accessible value of SP and passes this in r12.  This code then sets up the SP
+   * to be this new value and calls the code to create and throw the stack overflow
+   * exception.
+   */
+ENTRY art_quick_throw_stack_overflow_from_signal
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    mov r0, r9                      @ pass Thread::Current
+    mov r1, sp                      @ pass SP
+    mov sp, r12                     @ move SP down to below protected region.
+    b   artThrowStackOverflowFromCode                   @ artThrowStackOverflowFromCode(Thread*, SP)
+END art_quick_throw_stack_overflow_from_signal
+
     /*
      * All generated callsites for interface invokes and invocation slow paths will load arguments
      * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
diff --git a/runtime/arch/arm64/asm_support_arm64.h b/runtime/arch/arm64/asm_support_arm64.h
index a926449..7f0f56f 100644
--- a/runtime/arch/arm64/asm_support_arm64.h
+++ b/runtime/arch/arm64/asm_support_arm64.h
@@ -30,9 +30,9 @@
 // Offset of field Thread::suspend_count_
 #define THREAD_FLAGS_OFFSET 0
 // Offset of field Thread::card_table_
-#define THREAD_CARD_TABLE_OFFSET 120
+#define THREAD_CARD_TABLE_OFFSET 112
 // Offset of field Thread::exception_
-#define THREAD_EXCEPTION_OFFSET 128
+#define THREAD_EXCEPTION_OFFSET 120
 // Offset of field Thread::thin_lock_thread_id_
 #define THREAD_ID_OFFSET 12
 
diff --git a/runtime/arch/arm64/fault_handler_arm64.cc b/runtime/arch/arm64/fault_handler_arm64.cc
index b5948cb..3a7e689 100644
--- a/runtime/arch/arm64/fault_handler_arm64.cc
+++ b/runtime/arch/arm64/fault_handler_arm64.cc
@@ -27,7 +27,7 @@
 #include "thread.h"
 #include "thread-inl.h"
 
-extern "C" void art_quick_throw_stack_overflow();
+extern "C" void art_quick_throw_stack_overflow_from_signal();
 extern "C" void art_quick_throw_null_pointer_exception();
 extern "C" void art_quick_implicit_suspend();
 
@@ -157,19 +157,40 @@
 
   uintptr_t overflow_addr = sp - GetStackOverflowReservedBytes(kArm64);
 
+  Thread* self = reinterpret_cast<Thread*>(sc->regs[art::arm64::TR]);
+  CHECK_EQ(self, Thread::Current());
+  uintptr_t pregion = reinterpret_cast<uintptr_t>(self->GetStackEnd()) -
+      Thread::kStackOverflowProtectedSize;
+
   // Check that the fault address is the value expected for a stack overflow.
   if (fault_addr != overflow_addr) {
     VLOG(signals) << "Not a stack overflow";
     return false;
   }
 
-  VLOG(signals) << "Stack overflow found";
+  // We know this is a stack overflow.  We need to move the sp to the overflow region
+  // that exists below the protected region.  Determine the address of the next
+  // available valid address below the protected region.
+  uintptr_t prevsp = sp;
+  sp = pregion;
+  VLOG(signals) << "setting sp to overflow region at " << std::hex << sp;
 
-  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow.
+  // Since the compiler puts the implicit overflow
+  // check before the callee save instructions, the SP is already pointing to
+  // the previous frame.
+  VLOG(signals) << "previous frame: " << std::hex << prevsp;
+
+  // Now establish the stack pointer for the signal return.
+  sc->sp = prevsp;
+
+  // Tell the stack overflow code where the new stack pointer should be.
+  sc->regs[art::arm64::IP0] = sp;      // aka x16
+
+  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow_from_signal.
   // The value of LR must be the same as it was when we entered the code that
   // caused this fault.  This will be inserted into a callee save frame by
-  // the function to which this handler returns (art_quick_throw_stack_overflow).
-  sc->pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
+  // the function to which this handler returns (art_quick_throw_stack_overflow_from_signal).
+  sc->pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow_from_signal);
 
   // The kernel will now return to the address in sc->pc.
   return true;
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index ab9035a..62ae099 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -435,6 +435,31 @@
      */
 ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode
 
+  /*
+   * Invoke stack overflow exception from signal handler.
+   * On entry:
+   * xSELF: thread
+   * SP: address of last known frame
+   * IP0: address of next valid SP below protected region in stack
+   *
+   * This is deceptively simple but hides some complexity.  It is called in the case of
+   * a stack overflow condition during implicit checks.  The signal handler has been
+   * called by the kernel due to a load from the protected stack region.  The handler
+   * works out the address of the previous frame and passes this in SP.  However there
+   * is a piece of memory somewhere below the current SP that is not accessible (the
+   * memory that caused the signal).  The signal handler works out the next
+   * accessible value of SP and passes this in x16/IP0.  This code then sets up the SP
+   * to be this new value and calls the code to create and throw the stack overflow
+   * exception.
+   */
+ENTRY art_quick_throw_stack_overflow_from_signal
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    mov x0, xSELF                   // pass Thread::Current
+    mov x1, sp                      // pass SP
+    mov sp, xIP0                    // move SP down to below protected region.
+    b   artThrowStackOverflowFromCode                  // artThrowStackOverflowFromCode(Thread*, SP)
+END art_quick_throw_stack_overflow_from_signal
+
     /*
      * All generated callsites for interface invokes and invocation slow paths will load arguments
      * as usual - except instead of loading arg0/x0 with the target Method*, arg0/x0 will contain
diff --git a/runtime/arch/x86/asm_support_x86.h b/runtime/arch/x86/asm_support_x86.h
index c9f5a25..531ed77 100644
--- a/runtime/arch/x86/asm_support_x86.h
+++ b/runtime/arch/x86/asm_support_x86.h
@@ -20,11 +20,11 @@
 #include "asm_support.h"
 
 // Offset of field Thread::self_ verified in InitCpu
-#define THREAD_SELF_OFFSET 156
+#define THREAD_SELF_OFFSET 148
 // Offset of field Thread::card_table_ verified in InitCpu
-#define THREAD_CARD_TABLE_OFFSET 120
+#define THREAD_CARD_TABLE_OFFSET 112
 // Offset of field Thread::exception_ verified in InitCpu
-#define THREAD_EXCEPTION_OFFSET 124
+#define THREAD_EXCEPTION_OFFSET 116
 // Offset of field Thread::thin_lock_thread_id_ verified in InitCpu
 #define THREAD_ID_OFFSET 12
 
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index c143c5d..8b6c9b1 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -28,29 +28,16 @@
 
 #if defined(__APPLE__)
 #define ucontext __darwin_ucontext
-
-#if defined(__x86_64__)
-// 64 bit mac build.
-#define CTX_ESP uc_mcontext->__ss.__rsp
-#define CTX_EIP uc_mcontext->__ss.__rip
-#define CTX_EAX uc_mcontext->__ss.__rax
-#define CTX_METHOD uc_mcontext->__ss.__rdi
-#else
-// 32 bit mac build.
 #define CTX_ESP uc_mcontext->__ss.__esp
 #define CTX_EIP uc_mcontext->__ss.__eip
 #define CTX_EAX uc_mcontext->__ss.__eax
 #define CTX_METHOD uc_mcontext->__ss.__eax
-#endif
-
 #elif defined(__x86_64__)
-// 64 bit linux build.
 #define CTX_ESP uc_mcontext.gregs[REG_RSP]
 #define CTX_EIP uc_mcontext.gregs[REG_RIP]
 #define CTX_EAX uc_mcontext.gregs[REG_RAX]
 #define CTX_METHOD uc_mcontext.gregs[REG_RDI]
 #else
-// 32 bit linux build.
 #define CTX_ESP uc_mcontext.gregs[REG_ESP]
 #define CTX_EIP uc_mcontext.gregs[REG_EIP]
 #define CTX_EAX uc_mcontext.gregs[REG_EAX]
@@ -63,18 +50,9 @@
 
 namespace art {
 
-#if defined(__APPLE__) && defined(__x86_64__)
-// mac symbols have a prefix of _ on x86_64
-extern "C" void _art_quick_throw_null_pointer_exception();
-extern "C" void _art_quick_throw_stack_overflow_from_signal();
-extern "C" void _art_quick_test_suspend();
-#define EXT_SYM(sym) _ ## sym
-#else
 extern "C" void art_quick_throw_null_pointer_exception();
-extern "C" void art_quick_throw_stack_overflow();
+extern "C" void art_quick_throw_stack_overflow_from_signal();
 extern "C" void art_quick_test_suspend();
-#define EXT_SYM(sym) sym
-#endif
 
 // Get the size of an instruction in bytes.
 // Return 0 if the instruction is not handled.
@@ -275,7 +253,7 @@
   *next_sp = retaddr;
   uc->CTX_ESP = reinterpret_cast<uintptr_t>(next_sp);
 
-  uc->CTX_EIP = reinterpret_cast<uintptr_t>(EXT_SYM(art_quick_throw_null_pointer_exception));
+  uc->CTX_EIP = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
@@ -349,7 +327,7 @@
     *next_sp = retaddr;
     uc->CTX_ESP = reinterpret_cast<uintptr_t>(next_sp);
 
-    uc->CTX_EIP = reinterpret_cast<uintptr_t>(EXT_SYM(art_quick_test_suspend));
+    uc->CTX_EIP = reinterpret_cast<uintptr_t>(art_quick_test_suspend);
 
     // Now remove the suspend trigger that caused this fault.
     Thread::Current()->RemoveSuspendTrigger();
@@ -382,20 +360,30 @@
   uintptr_t overflow_addr = sp - GetStackOverflowReservedBytes(kX86);
 #endif
 
+  Thread* self = Thread::Current();
+  uintptr_t pregion = reinterpret_cast<uintptr_t>(self->GetStackEnd()) -
+      Thread::kStackOverflowProtectedSize;
+
   // Check that the fault address is the value expected for a stack overflow.
   if (fault_addr != overflow_addr) {
     VLOG(signals) << "Not a stack overflow";
     return false;
   }
 
-  VLOG(signals) << "Stack overflow found";
+  // We know this is a stack overflow.  We need to move the sp to the overflow region
+  // that exists below the protected region.  Determine the address of the next
+  // available valid address below the protected region.
+  VLOG(signals) << "setting sp to overflow region at " << std::hex << pregion;
 
   // Since the compiler puts the implicit overflow
   // check before the callee save instructions, the SP is already pointing to
   // the previous frame.
 
-  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow.
-  uc->CTX_EIP = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
+  // Tell the stack overflow code where the new stack pointer should be.
+  uc->CTX_EAX = pregion;
+
+  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow_from_signal.
+  uc->CTX_EIP = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow_from_signal);
 
   return true;
 }
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 75ec49d..dc4019d 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -174,6 +174,21 @@
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
 
+// On entry to this function, EAX contains the ESP value for the overflow region.
+DEFINE_FUNCTION art_quick_throw_stack_overflow_from_signal
+    // Here, the ESP is above the protected region.  We need to create a
+    // callee save frame and then move ESP down to the overflow region.
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    mov %esp, %ecx                // get current stack pointer
+    mov %eax, %esp                // move ESP to the overflow region.
+    PUSH ecx                      // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    SETUP_GOT_NOSAVE              // clobbers ebx (harmless here)
+    call PLT_SYMBOL(artThrowStackOverflowFromCode)    // artThrowStackOverflowFromCode(Thread*, SP)
+    int3                          // unreached
+END_FUNCTION art_quick_throw_stack_overflow_from_signal
+
     /*
      * Called by managed code, saves callee saves and then calls artThrowException
      * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
diff --git a/runtime/arch/x86_64/asm_support_x86_64.h b/runtime/arch/x86_64/asm_support_x86_64.h
index 40958dc..c3637ef 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.h
+++ b/runtime/arch/x86_64/asm_support_x86_64.h
@@ -28,11 +28,11 @@
 #define RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET 16
 
 // Offset of field Thread::self_ verified in InitCpu
-#define THREAD_SELF_OFFSET 192
+#define THREAD_SELF_OFFSET 184
 // Offset of field Thread::card_table_ verified in InitCpu
-#define THREAD_CARD_TABLE_OFFSET 120
+#define THREAD_CARD_TABLE_OFFSET 112
 // Offset of field Thread::exception_ verified in InitCpu
-#define THREAD_EXCEPTION_OFFSET 128
+#define THREAD_EXCEPTION_OFFSET 120
 // Offset of field Thread::thin_lock_thread_id_ verified in InitCpu
 #define THREAD_ID_OFFSET 12
 
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 48bc240..f021ada 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -284,6 +284,18 @@
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
 
+// On entry to this function, RAX contains the ESP value for the overflow region.
+DEFINE_FUNCTION art_quick_throw_stack_overflow_from_signal
+    // Here, the RSP is above the protected region.  We need to create a
+    // callee save frame and then move RSP down to the overflow region.
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    mov %rsp, %rsi                    // get current stack pointer, pass SP as second arg
+    mov %rax, %rsp                    // move RSP to the overflow region.
+    mov %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current() as first arg
+    call PLT_SYMBOL(artThrowStackOverflowFromCode)    // artThrowStackOverflowFromCode(Thread*, SP)
+    int3                              // unreached
+END_FUNCTION art_quick_throw_stack_overflow_from_signal
+
     /*
      * Called by managed code, saves callee saves and then calls artThrowException
      * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 2767dc4..be3895a 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -212,11 +212,6 @@
 
   bool explicit_overflow_check = Runtime::Current()->ExplicitStackOverflowChecks();
   self->ResetDefaultStackEnd(!explicit_overflow_check);  // Return to default stack size.
-
-  // And restore protection if implicit checks are on.
-  if (!explicit_overflow_check) {
-    self->ProtectStack();
-  }
 }
 
 void CheckReferenceResult(mirror::Object* o, Thread* self) {
diff --git a/runtime/entrypoints/quick/quick_throw_entrypoints.cc b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
index 13decc8..879010e 100644
--- a/runtime/entrypoints/quick/quick_throw_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
@@ -58,10 +58,8 @@
                                                      StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
-  self->NoteSignalBeingHandled();
   ThrowLocation throw_location = self->GetCurrentLocationForThrow();
   ThrowNullPointerExceptionFromDexPC(throw_location);
-  self->NoteSignalHandlerDone();
   self->QuickDeliverException();
 }
 
@@ -85,9 +83,7 @@
 extern "C" void artThrowStackOverflowFromCode(Thread* self, StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
-  self->NoteSignalBeingHandled();
   ThrowStackOverflowError(self);
-  self->NoteSignalHandlerDone();
   self->QuickDeliverException();
 }
 
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index f572d27..ae1b94f 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -70,10 +70,6 @@
     EXPECT_OFFSET_DIFFP(Thread, tls32_, daemon, throwing_OutOfMemoryError, 4);
     EXPECT_OFFSET_DIFFP(Thread, tls32_, throwing_OutOfMemoryError, no_thread_suspension, 4);
     EXPECT_OFFSET_DIFFP(Thread, tls32_, no_thread_suspension, thread_exit_check_count, 4);
-    EXPECT_OFFSET_DIFFP(Thread, tls32_, thread_exit_check_count,
-                        is_exception_reported_to_instrumentation_, 4);
-    EXPECT_OFFSET_DIFFP(Thread, tls32_, is_exception_reported_to_instrumentation_,
-                        handling_signal_, 4);
 
     // TODO: Better connection. Take alignment into account.
     EXPECT_OFFSET_DIFF_GT3(Thread, tls32_.thread_exit_check_count, tls64_.trace_clock_base, 4,
diff --git a/runtime/instruction_set.cc b/runtime/instruction_set.cc
index d8a38f4..d7e358c 100644
--- a/runtime/instruction_set.cc
+++ b/runtime/instruction_set.cc
@@ -87,10 +87,11 @@
 static constexpr size_t kDefaultStackOverflowReservedBytes = 16 * KB;
 static constexpr size_t kMipsStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes;
 
-static constexpr size_t kArmStackOverflowReservedBytes =    8 * KB;
-static constexpr size_t kArm64StackOverflowReservedBytes =  8 * KB;
-static constexpr size_t kX86StackOverflowReservedBytes =    8 * KB;
-static constexpr size_t kX86_64StackOverflowReservedBytes = 8 * KB;
+// TODO: Lower once implicit stack-overflow checks can work with less than 16K.
+static constexpr size_t kArmStackOverflowReservedBytes =    (kIsDebugBuild ? 16 : 16) * KB;
+static constexpr size_t kArm64StackOverflowReservedBytes =  (kIsDebugBuild ? 16 : 16) * KB;
+static constexpr size_t kX86StackOverflowReservedBytes =    (kIsDebugBuild ? 16 : 16) * KB;
+static constexpr size_t kX86_64StackOverflowReservedBytes = (kIsDebugBuild ? 16 : 16) * KB;
 
 size_t GetStackOverflowReservedBytes(InstructionSet isa) {
   switch (isa) {
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 8206cd4..8413763 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -283,11 +283,6 @@
 
 void ArtMethod::Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* result,
                        const char* shorty) {
-  if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEnd())) {
-    ThrowStackOverflowError(self);
-    return;
-  }
-
   if (kIsDebugBuild) {
     self->AssertThreadSuspensionIsAllowable();
     CHECK_EQ(kRunnable, self->GetState());
diff --git a/runtime/oat.cc b/runtime/oat.cc
index ede108c..0a8c35b 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -23,7 +23,7 @@
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '3', '9', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '3', '8', '\0' };
 
 static size_t ComputeOatHeaderSize(const SafeMap<std::string, std::string>* variable_data) {
   size_t estimate = 0U;
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index 4cd61a5..0d38d3d 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -420,14 +420,6 @@
 JValue InvokeWithVarArgs(const ScopedObjectAccessAlreadyRunnable& soa, jobject obj, jmethodID mid,
                          va_list args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  // We want to make sure that the stack is not within a small distance from the
-  // protected region in case we are calling into a leaf function whose stack
-  // check has been elided.
-  if (UNLIKELY(__builtin_frame_address(0) < soa.Self()->GetStackEnd())) {
-    ThrowStackOverflowError(soa.Self());
-    return JValue();
-  }
-
   mirror::ArtMethod* method = soa.DecodeMethod(mid);
   mirror::Object* receiver = method->IsStatic() ? nullptr : soa.Decode<mirror::Object*>(obj);
   uint32_t shorty_len = 0;
@@ -441,14 +433,6 @@
 
 JValue InvokeWithJValues(const ScopedObjectAccessAlreadyRunnable& soa, mirror::Object* receiver,
                          jmethodID mid, jvalue* args) {
-  // We want to make sure that the stack is not within a small distance from the
-  // protected region in case we are calling into a leaf function whose stack
-  // check has been elided.
-  if (UNLIKELY(__builtin_frame_address(0) < soa.Self()->GetStackEnd())) {
-    ThrowStackOverflowError(soa.Self());
-    return JValue();
-  }
-
   mirror::ArtMethod* method = soa.DecodeMethod(mid);
   uint32_t shorty_len = 0;
   const char* shorty = method->GetShorty(&shorty_len);
@@ -461,14 +445,6 @@
 
 JValue InvokeVirtualOrInterfaceWithJValues(const ScopedObjectAccessAlreadyRunnable& soa,
                                            mirror::Object* receiver, jmethodID mid, jvalue* args) {
-  // We want to make sure that the stack is not within a small distance from the
-  // protected region in case we are calling into a leaf function whose stack
-  // check has been elided.
-  if (UNLIKELY(__builtin_frame_address(0) < soa.Self()->GetStackEnd())) {
-    ThrowStackOverflowError(soa.Self());
-    return JValue();
-  }
-
   mirror::ArtMethod* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
   uint32_t shorty_len = 0;
   const char* shorty = method->GetShorty(&shorty_len);
@@ -481,14 +457,6 @@
 
 JValue InvokeVirtualOrInterfaceWithVarArgs(const ScopedObjectAccessAlreadyRunnable& soa,
                                            jobject obj, jmethodID mid, va_list args) {
-  // We want to make sure that the stack is not within a small distance from the
-  // protected region in case we are calling into a leaf function whose stack
-  // check has been elided.
-  if (UNLIKELY(__builtin_frame_address(0) < soa.Self()->GetStackEnd())) {
-    ThrowStackOverflowError(soa.Self());
-    return JValue();
-  }
-
   mirror::Object* receiver = soa.Decode<mirror::Object*>(obj);
   mirror::ArtMethod* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
   uint32_t shorty_len = 0;
@@ -502,14 +470,6 @@
 
 void InvokeWithShadowFrame(Thread* self, ShadowFrame* shadow_frame, uint16_t arg_offset,
                            MethodHelper& mh, JValue* result) {
-  // We want to make sure that the stack is not within a small distance from the
-  // protected region in case we are calling into a leaf function whose stack
-  // check has been elided.
-  if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEnd())) {
-    ThrowStackOverflowError(self);
-    return;
-  }
-
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
   arg_array.BuildArgArrayFromFrame(shadow_frame, arg_offset);
   shadow_frame->GetMethod()->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), result,
@@ -518,15 +478,6 @@
 
 jobject InvokeMethod(const ScopedObjectAccessAlreadyRunnable& soa, jobject javaMethod,
                      jobject javaReceiver, jobject javaArgs, bool accessible) {
-  // We want to make sure that the stack is not within a small distance from the
-  // protected region in case we are calling into a leaf function whose stack
-  // check has been elided.
-  if (UNLIKELY(__builtin_frame_address(0) <
-               soa.Self()->GetStackEndForInterpreter(true))) {
-    ThrowStackOverflowError(soa.Self());
-    return nullptr;
-  }
-
   mirror::ArtMethod* m = mirror::ArtMethod::FromReflectedMethod(soa, javaMethod);
 
   mirror::Class* declaring_class = m->GetDeclaringClass();
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 4ce8c96..18e28ea 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -76,7 +76,8 @@
 bool Thread::is_started_ = false;
 pthread_key_t Thread::pthread_key_self_;
 ConditionVariable* Thread::resume_cond_ = nullptr;
-const size_t Thread::kStackOverflowImplicitCheckSize = GetStackOverflowReservedBytes(kRuntimeISA);
+const size_t Thread::kStackOverflowImplicitCheckSize = kStackOverflowProtectedSize +
+    GetStackOverflowReservedBytes(kRuntimeISA);
 
 static const char* kThreadNameDuringStartup = "<native thread without managed peer>";
 
@@ -237,48 +238,92 @@
 byte dont_optimize_this;
 
 // Install a protected region in the stack.  This is used to trigger a SIGSEGV if a stack
-// overflow is detected.  It is located right below the stack_begin_.
+// overflow is detected.  It is located right below the stack_end_.  Just below that
+// is the StackOverflow reserved region used when creating the StackOverflow
+// exception.
 //
-// There is a little complexity here that deserves a special mention.  On some
-// architectures, the stack created using a VM_GROWSDOWN flag
+// There is a little complexity here that deserves a special mention.  When running on the
+// host (glibc), the process's main thread's stack is allocated with a special flag
 // to prevent memory being allocated when it's not needed.  This flag makes the
 // kernel only allocate memory for the stack by growing down in memory.  Because we
 // want to put an mprotected region far away from that at the stack top, we need
 // to make sure the pages for the stack are mapped in before we call mprotect.  We do
 // this by reading every page from the stack bottom (highest address) to the stack top.
 // We then madvise this away.
-void Thread::InstallImplicitProtection() {
-  byte* pregion = tlsPtr_.stack_begin - kStackOverflowProtectedSize;
-  byte* stack_himem = tlsPtr_.stack_end;
-  byte* stack_top = reinterpret_cast<byte*>(reinterpret_cast<uintptr_t>(&stack_himem) &
+void Thread::InstallImplicitProtection(bool is_main_stack) {
+  byte* pregion = tlsPtr_.stack_end;
+  byte* stack_lowmem = tlsPtr_.stack_begin;
+  byte* stack_top = reinterpret_cast<byte*>(reinterpret_cast<uintptr_t>(&pregion) &
       ~(kPageSize - 1));    // Page containing current top of stack.
 
-  // First remove the protection on the protected region as will want to read and
-  // write it.  This may fail (on the first attempt when the stack is not mapped)
-  // but we ignore that.
-  UnprotectStack();
+  const bool running_on_intel = (kRuntimeISA == kX86) || (kRuntimeISA == kX86_64);
 
-  // Map in the stack.  This must be done by reading from the
-  // current stack pointer downwards as the stack may be mapped using VM_GROWSDOWN
-  // in the kernel.  Any access more than a page below the current SP might cause
-  // a segv.
+  if (running_on_intel) {
+    // On Intel, we need to map in the main stack.  This must be done by reading from the
+    // current stack pointer downwards as the stack is mapped using VM_GROWSDOWN
+    // in the kernel.  Any access more than a page below the current SP will cause
+    // a segv.
+    if (is_main_stack) {
+      // First we need to unprotect the protected region because this may
+      // be called more than once for a particular stack and we will crash
+      // if we try to read the protected page.
+      mprotect(pregion - kStackOverflowProtectedSize, kStackOverflowProtectedSize, PROT_READ);
 
-  // Read every page from the high address to the low.
-  for (byte* p = stack_top; p > pregion; p -= kPageSize) {
-    dont_optimize_this = *p;
+      // Read every page from the high address to the low.
+      for (byte* p = stack_top; p > stack_lowmem; p -= kPageSize) {
+        dont_optimize_this = *p;
+      }
+    }
   }
 
+  // Check and place a marker word at the lowest usable address in the stack.  This
+  // is used to prevent a double protection.
+  constexpr uint32_t kMarker = 0xdadadada;
+  uintptr_t *marker = reinterpret_cast<uintptr_t*>(pregion);
+  if (*marker == kMarker) {
+    // The region has already been set up.  But on the main stack on the host we have
+    // removed the protected region in order to read the stack memory.  We need to put
+    // this back again.
+    if (is_main_stack && running_on_intel) {
+      mprotect(pregion - kStackOverflowProtectedSize, kStackOverflowProtectedSize, PROT_NONE);
+      madvise(stack_lowmem, stack_top - stack_lowmem, MADV_DONTNEED);
+    }
+    return;
+  }
+  // Add marker so that we can detect a second attempt to do this.
+  *marker = kMarker;
+
+  if (!running_on_intel) {
+    // Running on !Intel, stacks are mapped cleanly.  The protected region for the
+    // main stack just needs to be mapped in.  We do this by writing one byte per page.
+    for (byte* p = pregion - kStackOverflowProtectedSize;  p < pregion; p += kPageSize) {
+      *p = 0;
+    }
+  }
+
+  pregion -= kStackOverflowProtectedSize;
+
   VLOG(threads) << "installing stack protected region at " << std::hex <<
       static_cast<void*>(pregion) << " to " <<
       static_cast<void*>(pregion + kStackOverflowProtectedSize - 1);
 
-  // Protect the bottom of the stack to prevent read/write to it.
-  ProtectStack();
+
+  if (mprotect(pregion, kStackOverflowProtectedSize, PROT_NONE) == -1) {
+    LOG(FATAL) << "Unable to create protected region in stack for implicit overflow check. Reason:"
+        << strerror(errno) << kStackOverflowProtectedSize;
+  }
 
   // Tell the kernel that we won't be needing these pages any more.
   // NB. madvise will probably write zeroes into the memory (on linux it does).
-  uint32_t unwanted_size = stack_top - pregion - kPageSize;
-  madvise(pregion, unwanted_size, MADV_DONTNEED);
+  if (is_main_stack) {
+    if (running_on_intel) {
+      // On the host, it's the whole stack (minus a page to prevent overwrite of stack top).
+      madvise(stack_lowmem, stack_top - stack_lowmem - kPageSize, MADV_DONTNEED);
+    } else {
+      // On Android, just the protected region.
+      madvise(pregion, kStackOverflowProtectedSize, MADV_DONTNEED);
+    }
+  }
 }
 
 void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_size, bool is_daemon) {
@@ -493,13 +538,7 @@
   tlsPtr_.stack_begin = reinterpret_cast<byte*>(read_stack_base);
   tlsPtr_.stack_size = read_stack_size;
 
-  // The minimum stack size we can cope with is the overflow reserved bytes (typically
-  // 8K) + the protected region size (4K) + another page (4K).  Typically this will
-  // be 8+4+4 = 16K.  The thread won't be able to do much with this stack even the GC takes
-  // between 8K and 12K.
-  uint32_t min_stack = GetStackOverflowReservedBytes(kRuntimeISA) + kStackOverflowProtectedSize
-    + 4 * KB;
-  if (read_stack_size <= min_stack) {
+  if (read_stack_size <= GetStackOverflowReservedBytes(kRuntimeISA)) {
     LOG(FATAL) << "Attempt to attach a thread with a too-small stack (" << read_stack_size
         << " bytes)";
   }
@@ -543,19 +582,20 @@
 
   // Install the protected region if we are doing implicit overflow checks.
   if (implicit_stack_check) {
-    size_t guardsize;
-    pthread_attr_t attributes;
-    CHECK_PTHREAD_CALL(pthread_attr_init, (&attributes), "guard size query");
-    CHECK_PTHREAD_CALL(pthread_attr_getguardsize, (&attributes, &guardsize), "guard size query");
-    CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attributes), "guard size query");
-    // The thread might have protected region at the bottom.  We need
-    // to install our own region so we need to move the limits
-    // of the stack to make room for it.
-    tlsPtr_.stack_begin += guardsize;
-    tlsPtr_.stack_end += guardsize;
-    tlsPtr_.stack_size -= guardsize;
-
-    InstallImplicitProtection();
+    if (is_main_thread) {
+      size_t guardsize;
+      pthread_attr_t attributes;
+      CHECK_PTHREAD_CALL(pthread_attr_init, (&attributes), "guard size query");
+      CHECK_PTHREAD_CALL(pthread_attr_getguardsize, (&attributes, &guardsize), "guard size query");
+      CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attributes), "guard size query");
+      // The main thread might have protected region at the bottom.  We need
+      // to install our own region so we need to move the limits
+      // of the stack to make room for it.
+      tlsPtr_.stack_begin += guardsize;
+      tlsPtr_.stack_end += guardsize;
+      tlsPtr_.stack_size -= guardsize;
+    }
+    InstallImplicitProtection(is_main_thread);
   }
 
   // Sanity check.
@@ -2219,14 +2259,6 @@
   }
 
   tlsPtr_.stack_end = tlsPtr_.stack_begin;
-
-  // Remove the stack overflow protection if is it set up.
-  bool implicit_stack_check = !Runtime::Current()->ExplicitStackOverflowChecks();
-  if (implicit_stack_check) {
-    if (!UnprotectStack()) {
-      LOG(ERROR) << "Unable to remove stack protection for stack overflow";
-    }
-  }
 }
 
 void Thread::SetTlab(byte* start, byte* end) {
@@ -2252,21 +2284,4 @@
   return os;
 }
 
-void Thread::ProtectStack() {
-  void* pregion = tlsPtr_.stack_begin - kStackOverflowProtectedSize;
-  VLOG(threads) << "Protecting stack at " << pregion;
-  if (mprotect(pregion, kStackOverflowProtectedSize, PROT_NONE) == -1) {
-    LOG(FATAL) << "Unable to create protected region in stack for implicit overflow check. "
-        "Reason: "
-        << strerror(errno) << " size:  " << kStackOverflowProtectedSize;
-  }
-}
-
-bool Thread::UnprotectStack() {
-  void* pregion = tlsPtr_.stack_begin - kStackOverflowProtectedSize;
-  VLOG(threads) << "Unprotecting stack at " << pregion;
-  return mprotect(pregion, kStackOverflowProtectedSize, PROT_READ|PROT_WRITE) == 0;
-}
-
-
 }  // namespace art
diff --git a/runtime/thread.h b/runtime/thread.h
index 49f4244..3e9372f 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -94,41 +94,16 @@
 
 static constexpr size_t kNumRosAllocThreadLocalSizeBrackets = 34;
 
-// Thread's stack layout for implicit stack overflow checks:
-//
-//   +---------------------+  <- highest address of stack memory
-//   |                     |
-//   .                     .  <- SP
-//   |                     |
-//   |                     |
-//   +---------------------+  <- stack_end
-//   |                     |
-//   |  Gap                |
-//   |                     |
-//   +---------------------+  <- stack_begin
-//   |                     |
-//   | Protected region    |
-//   |                     |
-//   +---------------------+  <- lowest address of stack memory
-//
-// The stack always grows down in memory.  At the lowest address is a region of memory
-// that is set mprotect(PROT_NONE).  Any attempt to read/write to this region will
-// result in a segmentation fault signal.  At any point, the thread's SP will be somewhere
-// between the stack_end and the highest address in stack memory.  An implicit stack
-// overflow check is a read of memory at a certain offset below the current SP (4K typically).
-// If the thread's SP is below the stack_end address this will be a read into the protected
-// region.  If the SP is above the stack_end address, the thread is guaranteed to have
-// at least 4K of space.  Because stack overflow checks are only performed in generated code,
-// if the thread makes a call out to a native function (through JNI), that native function
-// might only have 4K of memory (if the SP is adjacent to stack_end).
-
 class Thread {
  public:
+  // How much of the reserved bytes is reserved for incoming signals.
+  static constexpr size_t kStackOverflowSignalReservedBytes = 2 * KB;
+
   // For implicit overflow checks we reserve an extra piece of memory at the bottom
   // of the stack (lowest memory).  The higher portion of the memory
   // is protected against reads and the lower is available for use while
   // throwing the StackOverflow exception.
-  static constexpr size_t kStackOverflowProtectedSize = 4 * KB;
+  static constexpr size_t kStackOverflowProtectedSize = 16 * KB;
   static const size_t kStackOverflowImplicitCheckSize;
 
   // Creates a new native thread corresponding to the given managed peer.
@@ -608,7 +583,7 @@
   }
 
   // Install the protected region for implicit stack checks.
-  void InstallImplicitProtection();
+  void InstallImplicitProtection(bool is_main_stack);
 
   bool IsHandlingStackOverflow() const {
     return tlsPtr_.stack_end == tlsPtr_.stack_begin;
@@ -840,20 +815,6 @@
     tls32_.is_exception_reported_to_instrumentation_ = reported;
   }
 
-  void ProtectStack();
-  bool UnprotectStack();
-
-  void NoteSignalBeingHandled() {
-    if (tls32_.handling_signal_) {
-      LOG(FATAL) << "Detected signal while processing a signal";
-    }
-    tls32_.handling_signal_ = true;
-  }
-
-  void NoteSignalHandlerDone() {
-    tls32_.handling_signal_ = false;
-  }
-
  private:
   explicit Thread(bool daemon);
   ~Thread() LOCKS_EXCLUDED(Locks::mutator_lock_,
@@ -958,8 +919,7 @@
     explicit tls_32bit_sized_values(bool is_daemon) :
       suspend_count(0), debug_suspend_count(0), thin_lock_thread_id(0), tid(0),
       daemon(is_daemon), throwing_OutOfMemoryError(false), no_thread_suspension(0),
-      thread_exit_check_count(0), is_exception_reported_to_instrumentation_(false),
-      handling_signal_(false), padding_(0) {
+      thread_exit_check_count(0), is_exception_reported_to_instrumentation_(false) {
     }
 
     union StateAndFlags state_and_flags;
@@ -999,12 +959,6 @@
     // When true this field indicates that the exception associated with this thread has already
     // been reported to instrumentation.
     bool32_t is_exception_reported_to_instrumentation_;
-
-    // True if signal is being handled by this thread.
-    bool32_t handling_signal_;
-
-    // Padding to make the size aligned to 8.  Remove this if we add another 32 bit field.
-    int32_t padding_;
   } tls32_;
 
   struct PACKED(8) tls_64bit_sized_values {
diff --git a/test/004-InterfaceTest/src/Main.java b/test/004-InterfaceTest/src/Main.java
index 297cbb0..9ebac59 100644
--- a/test/004-InterfaceTest/src/Main.java
+++ b/test/004-InterfaceTest/src/Main.java
@@ -23,7 +23,7 @@
     Integer intobj = new Integer(0);
     String s = "asdf";
     long start = System.currentTimeMillis();
-    for (int i = 0; i < 10000; i++) {
+    for (int i = 0; i < 1000000; i++) {
         map.put(intobj, s);
     }
     long end = System.currentTimeMillis();
@@ -34,7 +34,7 @@
     Integer intobj = new Integer(0);
     String s = "asdf";
     long start = System.currentTimeMillis();
-    for (int i = 0; i < 10000; i++) {
+    for (int i = 0; i < 1000000; i++) {
         map.put(intobj, s);
     }
     long end = System.currentTimeMillis();
diff --git a/test/004-SignalTest/signaltest.cc b/test/004-SignalTest/signaltest.cc
index c05dc22..a2dd664 100644
--- a/test/004-SignalTest/signaltest.cc
+++ b/test/004-SignalTest/signaltest.cc
@@ -16,7 +16,6 @@
 
 #include <signal.h>
 #include <stdio.h>
-#include <stdlib.h>
 #include <unistd.h>
 
 #include "jni.h"
@@ -25,15 +24,8 @@
 #include <sys/ucontext.h>
 #endif
 
-static int signal_count;
-static const int kMaxSignal = 2;
-
 static void signalhandler(int sig, siginfo_t* info, void* context) {
   printf("signal caught\n");
-  ++signal_count;
-  if (signal_count > kMaxSignal) {
-     abort();
-  }
 #ifdef __arm__
   // On ARM we do a more exhaustive test to make sure the signal
   // context is OK.