Merge "Add implicit null and stack checks for x86"
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 10cd1cc..a82d1f5 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -235,7 +235,7 @@
 
 .PHONY: $$(gtest_rule)
 $$(gtest_rule): $$(gtest_exe) $$(ART_GTEST_$(1)_HOST_DEPS) $(foreach file,$(ART_GTEST_$(1)_DEX_DEPS),$(ART_TEST_HOST_GTEST_$(file)_DEX)) $$(gtest_deps)
-	$(hide) ($$(call ART_TEST_SKIP,$$@) && $$< && $$(call ART_TEST_PASSED,$$@)) \
+	$(hide) ($$(call ART_TEST_SKIP,$$@) && LD_PRELOAD=libsigchain$$(ART_HOST_SHLIB_EXTENSION) $$< && $$(call ART_TEST_PASSED,$$@)) \
 	  || $$(call ART_TEST_FAILED,$$@)
 
   ART_TEST_HOST_GTEST$$($(2)ART_PHONY_TEST_HOST_SUFFIX)_RULES += $$(gtest_rule)
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index dd87f4a..d1caf93 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -50,7 +50,7 @@
 
 IMPLICIT_CHECKS_arm := null,stack
 IMPLICIT_CHECKS_arm64 := none
-IMPLICIT_CHECKS_x86 := none
+IMPLICIT_CHECKS_x86 := null,stack
 IMPLICIT_CHECKS_x86_64 := none
 IMPLICIT_CHECKS_mips := none
 define create-core-oat-target-rules
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 7d75da9..01d6354 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -96,7 +96,7 @@
                               RegStorage r_src, OpSize size) OVERRIDE;
     void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) OVERRIDE;
     LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                           int offset, int check_value, LIR* target) OVERRIDE;
+                           int offset, int check_value, LIR* target, LIR** compare) OVERRIDE;
 
     // Required for target - register utilities.
     RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE;
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index e8f5cb9..6be66a2 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -163,7 +163,8 @@
 
 LIR* Arm64Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg,
                                      RegStorage base_reg, int offset, int check_value,
-                                     LIR* target) {
+                                     LIR* target, LIR** compare) {
+  DCHECK(compare == nullptr);
   // It is possible that temp register is 64-bit. (ArgReg or RefReg)
   // Always compare 32-bit value no matter what temp_reg is.
   if (temp_reg.Is64Bit()) {
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 5870d22..1ac4707 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1162,9 +1162,12 @@
 }
 
 LIR *Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                                int offset, int check_value, LIR* target) {
+                                int offset, int check_value, LIR* target, LIR** compare) {
   // Handle this for architectures that can't compare to memory.
-  Load32Disp(base_reg, offset, temp_reg);
+  LIR* inst = Load32Disp(base_reg, offset, temp_reg);
+  if (compare != nullptr) {
+    *compare = inst;
+  }
   LIR* branch = OpCmpImmBranch(cond, temp_reg, check_value, target);
   return branch;
 }
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index b31e9a2..0bb253c 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -195,6 +195,7 @@
     if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
       return;
     }
+    // Insert after last instruction.
     MarkSafepointPC(last_lir_insn_);
   }
 }
@@ -622,7 +623,7 @@
         LockTemp(r_tmp);
         LIR* uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
                                           mirror::Class::StatusOffset().Int32Value(),
-                                          mirror::Class::kStatusInitialized, NULL);
+                                          mirror::Class::kStatusInitialized, nullptr, nullptr);
         LIR* cont = NewLIR0(kPseudoTargetLabel);
 
         AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont,
@@ -715,7 +716,7 @@
         LockTemp(r_tmp);
         LIR* uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
                                           mirror::Class::StatusOffset().Int32Value(),
-                                          mirror::Class::kStatusInitialized, NULL);
+                                          mirror::Class::kStatusInitialized, nullptr, nullptr);
         LIR* cont = NewLIR0(kPseudoTargetLabel);
 
         AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont,
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 6c0dfe8..55b68e6 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -985,17 +985,31 @@
       *pcrLabel = GenExplicitNullCheck(TargetRefReg(kArg1), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
+      if (!(cu_->disable_opt & (1 << kNullCheckElimination)) &&
+          (info->opt_flags & MIR_IGNORE_NULL_CHECK)) {
+        return call_state;
+      }
       // In lieu of generating a check for kArg1 being null, we need to
       // perform a load when doing implicit checks.
-      RegStorage tmp = AllocTemp();
-      Load32Disp(TargetRefReg(kArg1), 0, tmp);
-      MarkPossibleNullPointerException(info->opt_flags);
-      FreeTemp(tmp);
+      GenImplicitNullCheck(TargetReg(kArg1, false), info->opt_flags);
     }
   }
   return call_state;
 }
 
+// Default implementation of implicit null pointer check.
+// Overridden by arch specific as necessary.
+void Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) {
+  if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
+    return;
+  }
+  RegStorage tmp = AllocTemp();
+  Load32Disp(reg, 0, tmp);
+  MarkPossibleNullPointerException(opt_flags);
+  FreeTemp(tmp);
+}
+
+
 /*
  * May have 0+ arguments (also used for jumbo).  Note that
  * source virtual registers may be in physical registers, so may
@@ -1212,12 +1226,13 @@
       *pcrLabel = GenExplicitNullCheck(TargetRefReg(kArg1), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
+      if (!(cu_->disable_opt & (1 << kNullCheckElimination)) &&
+          (info->opt_flags & MIR_IGNORE_NULL_CHECK)) {
+        return call_state;
+      }
       // In lieu of generating a check for kArg1 being null, we need to
       // perform a load when doing implicit checks.
-      RegStorage tmp = AllocTemp();
-      Load32Disp(TargetRefReg(kArg1), 0, tmp);
-      MarkPossibleNullPointerException(info->opt_flags);
-      FreeTemp(tmp);
+      GenImplicitNullCheck(TargetReg(kArg1, false), info->opt_flags);
     }
   }
   return call_state;
@@ -1293,11 +1308,14 @@
       // On x86, we can compare to memory directly
       // Set up a launch pad to allow retry in case of bounds violation */
       if (rl_idx.is_const) {
+        LIR* comparison;
         range_check_branch = OpCmpMemImmBranch(
             kCondUlt, RegStorage::InvalidReg(), rl_obj.reg, count_offset,
-            mir_graph_->ConstantValue(rl_idx.orig_sreg), nullptr);
-      } else {
+            mir_graph_->ConstantValue(rl_idx.orig_sreg), nullptr, &comparison);
+        MarkPossibleNullPointerExceptionAfter(0, comparison);
+     } else {
         OpRegMem(kOpCmp, rl_idx.reg, rl_obj.reg, count_offset);
+        MarkPossibleNullPointerException(0);
         range_check_branch = OpCondBranch(kCondUge, nullptr);
       }
     }
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 4885501..e93c6e3 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -838,6 +838,7 @@
     LIR* GenImmedCheck(ConditionCode c_code, RegStorage reg, int imm_val, ThrowKind kind);
     LIR* GenNullCheck(RegStorage m_reg, int opt_flags);
     LIR* GenExplicitNullCheck(RegStorage m_reg, int opt_flags);
+    virtual void GenImplicitNullCheck(RegStorage reg, int opt_flags);
     void GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1,
                              RegLocation rl_src2, LIR* taken, LIR* fall_through);
     void GenCompareZeroAndBranch(Instruction::Code opcode, RegLocation rl_src,
@@ -1147,10 +1148,12 @@
      * @param base_reg The register holding the base address.
      * @param offset The offset from the base.
      * @param check_value The immediate to compare to.
+     * @param target branch target (or nullptr)
+     * @param compare output for getting LIR for comparison (or nullptr)
      * @returns The branch instruction that was generated.
      */
     virtual LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                                   int offset, int check_value, LIR* target);
+                                   int offset, int check_value, LIR* target, LIR** compare);
 
     // Required for target - codegen helpers.
     virtual bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 8df5b6d..ebe3f0a 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -271,21 +271,22 @@
   { kX86Shrd64RRI,  kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES,            { REX_W,    0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd64RRI", "!0r,!1r,!2d" },
   { kX86Shrd64MRI,  kMemRegImm,      IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { REX_W,    0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd64MRI", "[!0r+!1d],!2r,!3d" },
 
-  { kX86Test8RI,  kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8RI", "!0r,!1d" },
-  { kX86Test8MI,  kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8MI", "[!0r+!1d],!2d" },
-  { kX86Test8AI,  kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Test16RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16RI", "!0r,!1d" },
-  { kX86Test16MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16MI", "[!0r+!1d],!2d" },
-  { kX86Test16AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16AI", "[!0r+!1r<<!2d+!3d],!4d" },
-  { kX86Test32RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32RI", "!0r,!1d" },
-  { kX86Test32MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32MI", "[!0r+!1d],!2d" },
-  { kX86Test32AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Test8RI,  kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,     0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8RI", "!0r,!1d" },
+  { kX86Test8MI,  kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,     0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8MI", "[!0r+!1d],!2d" },
+  { kX86Test8AI,  kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,     0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Test16RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0x66,  0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16RI", "!0r,!1d" },
+  { kX86Test16MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0x66,  0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16MI", "[!0r+!1d],!2d" },
+  { kX86Test16AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0x66,  0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Test32RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,     0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32RI", "!0r,!1d" },
+  { kX86Test32MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,     0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32MI", "[!0r+!1d],!2d" },
+  { kX86Test32AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,     0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32AI", "[!0r+!1r<<!2d+!3d],!4d" },
   { kX86Test64RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64RI", "!0r,!1d" },
   { kX86Test64MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64MI", "[!0r+!1d],!2d" },
   { kX86Test64AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64AI", "[!0r+!1r<<!2d+!3d],!4d" },
 
-  { kX86Test32RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { 0,    0, 0x85, 0, 0, 0, 0, 0, false }, "Test32RR", "!0r,!1r" },
+  { kX86Test32RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { 0,     0, 0x85, 0, 0, 0, 0, 0, false }, "Test32RR", "!0r,!1r" },
   { kX86Test64RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { REX_W, 0, 0x85, 0, 0, 0, 0, 0, false }, "Test64RR", "!0r,!1r" },
+  { kX86Test32RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,     0, 0x85, 0, 0, 0, 0, 0, false }, "Test32RM", "!0r,[!1r+!1d]" },
 
 #define UNARY_ENCODING_MAP(opname, modrm, is_store, sets_ccodes, \
                            reg, reg_kind, reg_flags, \
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 9000514..916198d 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -222,15 +222,27 @@
   LockTemp(rs_rX86_ARG1);
   LockTemp(rs_rX86_ARG2);
 
-  /* Build frame, return address already on stack */
-  stack_decrement_ = OpRegImm(kOpSub, rs_rX86_SP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
-
   /*
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  const bool skip_overflow_check = mir_graph_->MethodIsLeaf() &&
-      !IsLargeFrame(frame_size_, cu_->target64 ? kX86_64 : kX86);
+  InstructionSet isa =  cu_->target64 ? kX86_64 : kX86;
+  const bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, isa);
+
+  // If we doing an implicit stack overflow check, perform the load immediately
+  // before the stack pointer is decremented and anything is saved.
+  if (!skip_overflow_check && !Runtime::Current()->ExplicitStackOverflowChecks()) {
+    // Implicit stack overflow check.
+    // test eax,[esp + -overflow]
+    int overflow = GetStackOverflowReservedBytes(isa);
+    NewLIR3(kX86Test32RM, rs_rAX.GetReg(), rs_rX86_SP.GetReg(), -overflow);
+    MarkPossibleStackOverflowException();
+  }
+
+  /* Build frame, return address already on stack */
+  stack_decrement_ = OpRegImm(kOpSub, rs_rX86_SP, frame_size_ -
+                              GetInstructionSetPointerSize(cu_->instruction_set));
+
   NewLIR0(kPseudoMethodEntry);
   /* Spill core callee saves */
   SpillCoreRegs();
@@ -261,25 +273,27 @@
      private:
       const size_t sp_displace_;
     };
-    // TODO: for large frames we should do something like:
-    // spill ebp
-    // lea ebp, [esp + frame_size]
-    // cmp ebp, fs:[stack_end_]
-    // jcc stack_overflow_exception
-    // mov esp, ebp
-    // in case a signal comes in that's not using an alternate signal stack and the large frame may
-    // have moved us outside of the reserved area at the end of the stack.
-    // cmp rs_rX86_SP, fs:[stack_end_]; jcc throw_slowpath
-    if (cu_->target64) {
-      OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<8>());
-    } else {
-      OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<4>());
-    }
-    LIR* branch = OpCondBranch(kCondUlt, nullptr);
-    AddSlowPath(
+    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+      // TODO: for large frames we should do something like:
+      // spill ebp
+      // lea ebp, [esp + frame_size]
+      // cmp ebp, fs:[stack_end_]
+      // jcc stack_overflow_exception
+      // mov esp, ebp
+      // in case a signal comes in that's not using an alternate signal stack and the large frame
+      // may have moved us outside of the reserved area at the end of the stack.
+      // cmp rs_rX86_SP, fs:[stack_end_]; jcc throw_slowpath
+      if (cu_->target64) {
+        OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<8>());
+      } else {
+        OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<4>());
+      }
+      LIR* branch = OpCondBranch(kCondUlt, nullptr);
+      AddSlowPath(
         new(arena_)StackOverflowSlowPath(this, branch,
                                          frame_size_ -
                                          GetInstructionSetPointerSize(cu_->instruction_set)));
+    }
   }
 
   FlushIns(ArgLocs, rl_method);
@@ -318,4 +332,14 @@
   NewLIR0(kX86Ret);
 }
 
+void X86Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) {
+  if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
+    return;
+  }
+  // Implicit null pointer check.
+  // test eax,[arg1+0]
+  NewLIR3(kX86Test32RM, rs_rAX.GetReg(), reg.GetReg(), 0);
+  MarkPossibleNullPointerException(opt_flags);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index ff7b30e..7e77364 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -85,6 +85,7 @@
   LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
                             RegStorage r_src, OpSize size) OVERRIDE;
   void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
+  void GenImplicitNullCheck(RegStorage reg, int opt_flags);
 
   // Required for target - register utilities.
   RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE;
@@ -796,9 +797,11 @@
    * @param base_reg The register holding the base address.
    * @param offset The offset from the base.
    * @param check_value The immediate to compare to.
+   * @param target branch target (or nullptr)
+   * @param compare output for getting LIR for comparison (or nullptr)
    */
   LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                         int offset, int check_value, LIR* target);
+                         int offset, int check_value, LIR* target, LIR** compare);
 
   /*
    * Can this operation be using core registers without temporaries?
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index f1166f6..e8118dc 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -1092,6 +1092,7 @@
   };
 
   OpRegMem(kOpCmp, index, array_base, len_offset);
+  MarkPossibleNullPointerException(0);
   LIR* branch = OpCondBranch(kCondUge, nullptr);
   AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
                                                     index, array_base, len_offset));
@@ -1132,6 +1133,7 @@
   };
 
   NewLIR3(IS_SIMM8(index) ? kX86Cmp32MI8 : kX86Cmp32MI, array_base.GetReg(), len_offset, index);
+  MarkPossibleNullPointerException(0);
   LIR* branch = OpCondBranch(kCondLs, nullptr);
   AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
                                                     index, array_base, len_offset));
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index e81f505..6731b38 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -825,8 +825,10 @@
 }
 
 LIR* X86Mir2Lir::CheckSuspendUsingLoad() {
-  LOG(FATAL) << "Unexpected use of CheckSuspendUsingLoad in x86";
-  return nullptr;
+  // First load the pointer in fs:[suspend-trigger] into eax
+  // Then use a test instruction to indirect via that address.
+  NewLIR2(kX86Mov32RT, rs_rAX.GetReg(),  Thread::ThreadSuspendTriggerOffset<4>().Int32Value());
+  return NewLIR3(kX86Test32RM, rs_rAX.GetReg(), rs_rAX.GetReg(), 0);
 }
 
 uint64_t X86Mir2Lir::GetTargetInstFlags(int opcode) {
@@ -1189,6 +1191,7 @@
   // Is the string non-NULL?
   LoadValueDirectFixed(rl_obj, rs_rDX);
   GenNullCheck(rs_rDX, info->opt_flags);
+  // uint32_t opt_flags = info->opt_flags;
   info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
 
   // Does the character fit in 16 bits?
@@ -1215,12 +1218,20 @@
   // Character is in EAX.
   // Object pointer is in EDX.
 
+  // Compute the number of words to search in to rCX.
+  Load32Disp(rs_rDX, count_offset, rs_rCX);
+
+  // Possible signal here due to null pointer dereference.
+  // Note that the signal handler will expect the top word of
+  // the stack to be the ArtMethod*.  If the PUSH edi instruction
+  // below is ahead of the load above then this will not be true
+  // and the signal handler will not work.
+  MarkPossibleNullPointerException(0);
+
   // We need to preserve EDI, but have no spare registers, so push it on the stack.
   // We have to remember that all stack addresses after this are offset by sizeof(EDI).
   NewLIR1(kX86Push32R, rs_rDI.GetReg());
 
-  // Compute the number of words to search in to rCX.
-  Load32Disp(rs_rDX, count_offset, rs_rCX);
   LIR *length_compare = nullptr;
   int start_value = 0;
   bool is_index_on_stack = false;
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 657160f..09fce91 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -684,9 +684,9 @@
     } else {
       DCHECK(!r_dest.IsFloat());  // Make sure we're not still using a pair here.
       if (r_base == r_dest.GetLow()) {
-        load2 = NewLIR3(opcode, r_dest.GetHighReg(), r_base.GetReg(),
+        load = NewLIR3(opcode, r_dest.GetHighReg(), r_base.GetReg(),
                         displacement + HIWORD_OFFSET);
-        load = NewLIR3(opcode, r_dest.GetLowReg(), r_base.GetReg(), displacement + LOWORD_OFFSET);
+        load2 = NewLIR3(opcode, r_dest.GetLowReg(), r_base.GetReg(), displacement + LOWORD_OFFSET);
       } else {
         load = NewLIR3(opcode, r_dest.GetLowReg(), r_base.GetReg(), displacement + LOWORD_OFFSET);
         load2 = NewLIR3(opcode, r_dest.GetHighReg(), r_base.GetReg(),
@@ -712,16 +712,16 @@
         if (r_dest.GetHigh() == r_index) {
           // We can't use either register for the first load.
           RegStorage temp = AllocTemp();
-          load2 = NewLIR5(opcode, temp.GetReg(), r_base.GetReg(), r_index.GetReg(), scale,
+          load = NewLIR5(opcode, temp.GetReg(), r_base.GetReg(), r_index.GetReg(), scale,
                           displacement + HIWORD_OFFSET);
-          load = NewLIR5(opcode, r_dest.GetLowReg(), r_base.GetReg(), r_index.GetReg(), scale,
+          load2 = NewLIR5(opcode, r_dest.GetLowReg(), r_base.GetReg(), r_index.GetReg(), scale,
                          displacement + LOWORD_OFFSET);
           OpRegCopy(r_dest.GetHigh(), temp);
           FreeTemp(temp);
         } else {
-          load2 = NewLIR5(opcode, r_dest.GetHighReg(), r_base.GetReg(), r_index.GetReg(), scale,
+          load = NewLIR5(opcode, r_dest.GetHighReg(), r_base.GetReg(), r_index.GetReg(), scale,
                           displacement + HIWORD_OFFSET);
-          load = NewLIR5(opcode, r_dest.GetLowReg(), r_base.GetReg(), r_index.GetReg(), scale,
+          load2 = NewLIR5(opcode, r_dest.GetLowReg(), r_base.GetReg(), r_index.GetReg(), scale,
                          displacement + LOWORD_OFFSET);
         }
       } else {
@@ -744,6 +744,7 @@
     }
   }
 
+  // Always return first load generated as this might cause a fault if base is nullptr.
   return load;
 }
 
@@ -881,9 +882,12 @@
 }
 
 LIR* X86Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
-                                   int offset, int check_value, LIR* target) {
-    NewLIR3(IS_SIMM8(check_value) ? kX86Cmp32MI8 : kX86Cmp32MI, base_reg.GetReg(), offset,
-            check_value);
+                                   int offset, int check_value, LIR* target, LIR** compare) {
+    LIR* inst = NewLIR3(IS_SIMM8(check_value) ? kX86Cmp32MI8 : kX86Cmp32MI, base_reg.GetReg(),
+            offset, check_value);
+    if (compare != nullptr) {
+        *compare = inst;
+    }
     LIR* branch = OpCondBranch(cond, target);
     return branch;
 }
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 2789923..1ec3d41 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -497,6 +497,7 @@
   UnaryOpcode(kX86Test, RI, MI, AI),
   kX86Test32RR,
   kX86Test64RR,
+  kX86Test32RM,
   UnaryOpcode(kX86Not, R, M, A),
   UnaryOpcode(kX86Neg, R, M, A),
   UnaryOpcode(kX86Mul,  DaR, DaM, DaA),
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 80e7724..8800d18 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -738,6 +738,7 @@
   switch (isa) {
     case kArm:
     case kThumb2:
+    case kX86:
       break;  // All checks implemented, leave as is.
 
     default:  // No checks implemented, reset all to explicit checks.
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 7f5cf0c..3774b32 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -412,6 +412,7 @@
     LOCAL_STATIC_LIBRARIES := libziparchive libz
   else # host
     LOCAL_STATIC_LIBRARIES += libcutils libziparchive-host libz libutils
+    LOCAL_SHARED_LIBRARIES += libsigchain
     LOCAL_LDLIBS += -ldl -lpthread
     ifeq ($$(HOST_OS),linux)
       LOCAL_LDLIBS += -lrt
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index 2a82129..e22c56e 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -46,9 +46,10 @@
   return instr_size;
 }
 
-void FaultManager::GetMethodAndReturnPCAndSP(void* context, mirror::ArtMethod** out_method,
+void FaultManager::GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context,
+                                             mirror::ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
-  struct ucontext *uc = (struct ucontext *)context;
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   *out_sp = static_cast<uintptr_t>(sc->arm_sp);
   VLOG(signals) << "sp: " << *out_sp;
@@ -114,7 +115,7 @@
   uint32_t checkinst1 = 0xf8d90000 + Thread::ThreadSuspendTriggerOffset<4>().Int32Value();
   uint16_t checkinst2 = 0x6800;
 
-  struct ucontext *uc = (struct ucontext *)context;
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   uint8_t* ptr2 = reinterpret_cast<uint8_t*>(sc->arm_pc);
   uint8_t* ptr1 = ptr2 - 4;
@@ -178,7 +179,7 @@
 // to the overflow region below the protected region.
 
 bool StackOverflowHandler::Action(int sig, siginfo_t* info, void* context) {
-  struct ucontext *uc = (struct ucontext *)context;
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   VLOG(signals) << "stack overflow handler with sp at " << std::hex << &uc;
   VLOG(signals) << "sigcontext: " << std::hex << sc;
@@ -205,7 +206,7 @@
   }
 
   // We know this is a stack overflow.  We need to move the sp to the overflow region
-  // the exists below the protected region.  Determine the address of the next
+  // that exists below the protected region.  Determine the address of the next
   // available valid address below the protected region.
   uintptr_t prevsp = sp;
   sp = pregion;
diff --git a/runtime/arch/arm64/fault_handler_arm64.cc b/runtime/arch/arm64/fault_handler_arm64.cc
index 74c3023..34eede6 100644
--- a/runtime/arch/arm64/fault_handler_arm64.cc
+++ b/runtime/arch/arm64/fault_handler_arm64.cc
@@ -29,7 +29,8 @@
 
 namespace art {
 
-void FaultManager::GetMethodAndReturnPCAndSP(void* context, mirror::ArtMethod** out_method,
+void FaultManager::GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context,
+                                             mirror::ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
 }
 
diff --git a/runtime/arch/mips/fault_handler_mips.cc b/runtime/arch/mips/fault_handler_mips.cc
index 1ecd7d9..5a64a69 100644
--- a/runtime/arch/mips/fault_handler_mips.cc
+++ b/runtime/arch/mips/fault_handler_mips.cc
@@ -29,7 +29,8 @@
 
 namespace art {
 
-void FaultManager::GetMethodAndReturnPCAndSP(void* context, mirror::ArtMethod** out_method,
+void FaultManager::GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context,
+                                             mirror::ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
 }
 
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index 7c1980e..f62200a 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -21,6 +21,10 @@
 #include "globals.h"
 #include "base/logging.h"
 #include "base/hex_dump.h"
+#include "mirror/art_method.h"
+#include "mirror/art_method-inl.h"
+#include "thread.h"
+#include "thread-inl.h"
 
 
 //
@@ -29,19 +33,294 @@
 
 namespace art {
 
-void FaultManager::GetMethodAndReturnPCAndSP(void* context, mirror::ArtMethod** out_method,
+extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_stack_overflow_from_signal();
+extern "C" void art_quick_test_suspend();
+
+// From the x86 disassembler...
+enum SegmentPrefix {
+  kCs = 0x2e,
+  kSs = 0x36,
+  kDs = 0x3e,
+  kEs = 0x26,
+  kFs = 0x64,
+  kGs = 0x65,
+};
+
+// Get the size of an instruction in bytes.
+static uint32_t GetInstructionSize(uint8_t* pc) {
+  uint8_t* instruction_start = pc;
+  bool have_prefixes = true;
+  bool two_byte = false;
+
+  // Skip all the prefixes.
+  do {
+    switch (*pc) {
+        // Group 1 - lock and repeat prefixes:
+      case 0xF0:
+      case 0xF2:
+      case 0xF3:
+        // Group 2 - segment override prefixes:
+      case kCs:
+      case kSs:
+      case kDs:
+      case kEs:
+      case kFs:
+      case kGs:
+        // Group 3 - operand size override:
+      case 0x66:
+        // Group 4 - address size override:
+      case 0x67:
+        break;
+      default:
+        have_prefixes = false;
+        break;
+    }
+    if (have_prefixes) {
+      pc++;
+    }
+  } while (have_prefixes);
+
+#if defined(__x86_64__)
+  // Skip REX is present.
+  if (*pc >= 0x40 && *pc <= 0x4F) {
+    ++pc;
+  }
+#endif
+
+  // Check for known instructions.
+  uint32_t known_length = 0;
+  switch (*pc) {
+  case 0x83:                // cmp [r + v], b: 4 byte instruction
+    known_length = 4;
+    break;
+  }
+
+  if (known_length > 0) {
+    VLOG(signals) << "known instruction with length " << known_length;
+    return known_length;
+  }
+
+  // Unknown instruction, work out length.
+
+  // Work out if we have a ModR/M byte.
+  uint8_t opcode = *pc++;
+  if (opcode == 0xf) {
+    two_byte = true;
+    opcode = *pc++;
+  }
+
+  bool has_modrm = false;         // Is ModR/M byte present?
+  uint8_t hi = opcode >> 4;       // Opcode high nybble.
+  uint8_t lo = opcode & 0b1111;   // Opcode low nybble.
+
+  // From the Intel opcode tables.
+  if (two_byte) {
+    has_modrm = true;   // TODO: all of these?
+  } else if (hi < 4) {
+    has_modrm = lo < 4 || (lo >= 8 && lo <= 0xb);
+  } else if (hi == 6) {
+    has_modrm = lo == 3 || lo == 9 || lo == 0xb;
+  } else if (hi == 8) {
+    has_modrm = lo != 0xd;
+  } else if (hi == 0xc) {
+    has_modrm = lo == 1 || lo == 2 || lo == 6 || lo == 7;
+  } else if (hi == 0xd) {
+    has_modrm = lo < 4;
+  } else if (hi == 0xf) {
+    has_modrm = lo == 6 || lo == 7;
+  }
+
+  if (has_modrm) {
+    uint8_t modrm = *pc++;
+    uint8_t mod = (modrm >> 6) & 0b11;
+    uint8_t reg = (modrm >> 3) & 0b111;
+    switch (mod) {
+      case 0:
+        break;
+      case 1:
+        if (reg == 4) {
+          // SIB + 1 byte displacement.
+          pc += 2;
+        } else {
+          pc += 1;
+        }
+        break;
+      case 2:
+        // SIB + 4 byte displacement.
+        pc += 5;
+        break;
+      case 3:
+        break;
+    }
+  }
+
+  VLOG(signals) << "calculated X86 instruction size is " << (pc - instruction_start);
+  return pc - instruction_start;
+}
+
+void FaultManager::GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context,
+                                             mirror::ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
+  *out_sp = static_cast<uintptr_t>(uc->uc_mcontext.gregs[REG_ESP]);
+  VLOG(signals) << "sp: " << std::hex << *out_sp;
+  if (*out_sp == 0) {
+    return;
+  }
+
+  // In the case of a stack overflow, the stack is not valid and we can't
+  // get the method from the top of the stack.  However it's in EAX.
+  uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(siginfo->si_addr);
+  uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
+      reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(kX86));
+  if (overflow_addr == fault_addr) {
+    *out_method = reinterpret_cast<mirror::ArtMethod*>(uc->uc_mcontext.gregs[REG_EAX]);
+  } else {
+    // The method is at the top of the stack.
+    *out_method = reinterpret_cast<mirror::ArtMethod*>(reinterpret_cast<uintptr_t*>(*out_sp)[0]);
+  }
+
+  uint8_t* pc = reinterpret_cast<uint8_t*>(uc->uc_mcontext.gregs[REG_EIP]);
+  VLOG(signals) << HexDump(pc, 32, true, "PC ");
+
+  uint32_t instr_size = GetInstructionSize(pc);
+  *out_return_pc = reinterpret_cast<uintptr_t>(pc + instr_size);
 }
 
 bool NullPointerHandler::Action(int sig, siginfo_t* info, void* context) {
+  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
+  uint8_t* pc = reinterpret_cast<uint8_t*>(uc->uc_mcontext.gregs[REG_EIP]);
+  uint8_t* sp = reinterpret_cast<uint8_t*>(uc->uc_mcontext.gregs[REG_ESP]);
+
+  uint32_t instr_size = GetInstructionSize(pc);
+  // We need to arrange for the signal handler to return to the null pointer
+  // exception generator.  The return address must be the address of the
+  // next instruction (this instruction + instruction size).  The return address
+  // is on the stack at the top address of the current frame.
+
+  // Push the return address onto the stack.
+  uint32_t retaddr = reinterpret_cast<uint32_t>(pc + instr_size);
+  uint32_t* next_sp = reinterpret_cast<uint32_t*>(sp - 4);
+  *next_sp = retaddr;
+  uc->uc_mcontext.gregs[REG_ESP] = reinterpret_cast<uint32_t>(next_sp);
+
+  uc->uc_mcontext.gregs[REG_EIP] =
+        reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
+  VLOG(signals) << "Generating null pointer exception";
+  return true;
+}
+
+// A suspend check is done using the following instruction sequence:
+// 0xf720f1df:         648B058C000000      mov     eax, fs:[0x8c]  ; suspend_trigger
+// .. some intervening instructions.
+// 0xf720f1e6:                   8500      test    eax, [eax]
+
+// The offset from fs is Thread::ThreadSuspendTriggerOffset().
+// To check for a suspend check, we examine the instructions that caused
+// the fault.
+bool SuspensionHandler::Action(int sig, siginfo_t* info, void* context) {
+  // These are the instructions to check for.  The first one is the mov eax, fs:[xxx]
+  // where xxx is the offset of the suspend trigger.
+  uint32_t trigger = Thread::ThreadSuspendTriggerOffset<4>().Int32Value();
+
+  VLOG(signals) << "Checking for suspension point";
+  uint8_t checkinst1[] = {0x64, 0x8b, 0x05, static_cast<uint8_t>(trigger & 0xff),
+      static_cast<uint8_t>((trigger >> 8) & 0xff), 0, 0};
+  uint8_t checkinst2[] = {0x85, 0x00};
+
+  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
+  uint8_t* pc = reinterpret_cast<uint8_t*>(uc->uc_mcontext.gregs[REG_EIP]);
+  uint8_t* sp = reinterpret_cast<uint8_t*>(uc->uc_mcontext.gregs[REG_ESP]);
+
+  if (pc[0] != checkinst2[0] || pc[1] != checkinst2[1]) {
+    // Second instruction is not correct (test eax,[eax]).
+    VLOG(signals) << "Not a suspension point";
+    return false;
+  }
+
+  // The first instruction can a little bit up the stream due to load hoisting
+  // in the compiler.
+  uint8_t* limit = pc - 100;   // Compiler will hoist to a max of 20 instructions.
+  uint8_t* ptr = pc - sizeof(checkinst1);
+  bool found = false;
+  while (ptr > limit) {
+    if (memcmp(ptr, checkinst1, sizeof(checkinst1)) == 0) {
+      found = true;
+      break;
+    }
+    ptr -= 1;
+  }
+
+  if (found) {
+    VLOG(signals) << "suspend check match";
+
+    // We need to arrange for the signal handler to return to the null pointer
+    // exception generator.  The return address must be the address of the
+    // next instruction (this instruction + 2).  The return address
+    // is on the stack at the top address of the current frame.
+
+    // Push the return address onto the stack.
+    uint32_t retaddr = reinterpret_cast<uint32_t>(pc + 2);
+    uint32_t* next_sp = reinterpret_cast<uint32_t*>(sp - 4);
+    *next_sp = retaddr;
+    uc->uc_mcontext.gregs[REG_ESP] = reinterpret_cast<uint32_t>(next_sp);
+
+    uc->uc_mcontext.gregs[REG_EIP] = reinterpret_cast<uintptr_t>(art_quick_test_suspend);
+
+    // Now remove the suspend trigger that caused this fault.
+    Thread::Current()->RemoveSuspendTrigger();
+    VLOG(signals) << "removed suspend trigger invoking test suspend";
+    return true;
+  }
+  VLOG(signals) << "Not a suspend check match, first instruction mismatch";
   return false;
 }
 
-bool SuspensionHandler::Action(int sig, siginfo_t* info, void* context) {
-  return false;
-}
+// The stack overflow check is done using the following instruction:
+// test eax, [esp+ -xxx]
+// where 'xxx' is the size of the overflow area.
+//
+// This is done before any frame is established in the method.  The return
+// address for the previous method is on the stack at ESP.
 
 bool StackOverflowHandler::Action(int sig, siginfo_t* info, void* context) {
-  return false;
+  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
+  uintptr_t sp = static_cast<uintptr_t>(uc->uc_mcontext.gregs[REG_ESP]);
+
+  uintptr_t fault_addr = reinterpret_cast<uintptr_t>(info->si_addr);
+  VLOG(signals) << "fault_addr: " << std::hex << fault_addr;
+  VLOG(signals) << "checking for stack overflow, sp: " << std::hex << sp <<
+    ", fault_addr: " << fault_addr;
+
+  uintptr_t overflow_addr = sp - GetStackOverflowReservedBytes(kX86);
+
+  Thread* self = Thread::Current();
+  uintptr_t pregion = reinterpret_cast<uintptr_t>(self->GetStackEnd()) -
+      Thread::kStackOverflowProtectedSize;
+
+  // Check that the fault address is the value expected for a stack overflow.
+  if (fault_addr != overflow_addr) {
+    VLOG(signals) << "Not a stack overflow";
+    return false;
+  }
+
+  // We know this is a stack overflow.  We need to move the sp to the overflow region
+  // that exists below the protected region.  Determine the address of the next
+  // available valid address below the protected region.
+  VLOG(signals) << "setting sp to overflow region at " << std::hex << pregion;
+
+  // Since the compiler puts the implicit overflow
+  // check before the callee save instructions, the SP is already pointing to
+  // the previous frame.
+
+  // Tell the stack overflow code where the new stack pointer should be.
+  uc->uc_mcontext.gregs[REG_EAX] = pregion;
+
+  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow_from_signal.
+  uc->uc_mcontext.gregs[REG_EIP] = reinterpret_cast<uintptr_t>(
+    art_quick_throw_stack_overflow_from_signal);
+
+  return true;
 }
 }       // namespace art
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 24b9e46..68f46ad 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -173,6 +173,21 @@
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
 
+// On entry to this function, EAX contains the ESP value for the overflow region.
+DEFINE_FUNCTION art_quick_throw_stack_overflow_from_signal
+    // Here, the ESP is above the protected region.  We need to create a
+    // callee save frame and then move ESP down to the overflow region.
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    mov %esp, %ecx                // get current stack pointer
+    mov %eax, %esp                // move ESP to the overflow region.
+    PUSH ecx                      // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    SETUP_GOT_NOSAVE              // clobbers ebx (harmless here)
+    call PLT_SYMBOL(artThrowStackOverflowFromCode)    // artThrowStackOverflowFromCode(Thread*, SP)
+    int3                          // unreached
+END_FUNCTION art_quick_throw_stack_overflow_from_signal
+
     /*
      * Called by managed code, saves callee saves and then calls artThrowException
      * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
diff --git a/runtime/arch/x86_64/fault_handler_x86_64.cc b/runtime/arch/x86_64/fault_handler_x86_64.cc
index 233d3c7..88ae7f3 100644
--- a/runtime/arch/x86_64/fault_handler_x86_64.cc
+++ b/runtime/arch/x86_64/fault_handler_x86_64.cc
@@ -29,7 +29,8 @@
 
 namespace art {
 
-void FaultManager::GetMethodAndReturnPCAndSP(void* context, mirror::ArtMethod** out_method,
+void FaultManager::GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context,
+                                             mirror::ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
 }
 
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 3112bc0..f99ce07 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -29,9 +29,7 @@
 #include "mirror/object-inl.h"
 #include "object_utils.h"
 #include "scoped_thread_state_change.h"
-#ifdef HAVE_ANDROID_OS
 #include "sigchain.h"
-#endif
 #include "verify_object-inl.h"
 
 namespace art {
@@ -47,6 +45,7 @@
 
 // Signal handler called on SIGSEGV.
 static void art_fault_handler(int sig, siginfo_t* info, void* context) {
+  // std::cout << "handling fault in ART handler\n";
   fault_manager.HandleFault(sig, info, context);
 }
 
@@ -55,9 +54,7 @@
 }
 
 FaultManager::~FaultManager() {
-#ifdef HAVE_ANDROID_OS
   UnclaimSignalChain(SIGSEGV);
-#endif
   sigaction(SIGSEGV, &oldaction_, nullptr);   // Restore old handler.
 }
 
@@ -72,11 +69,12 @@
 #endif
 
   // Set our signal handler now.
-  sigaction(SIGSEGV, &action, &oldaction_);
-#ifdef HAVE_ANDROID_OS
+  int e = sigaction(SIGSEGV, &action, &oldaction_);
+  if (e != 0) {
+    VLOG(signals) << "Failed to claim SEGV: " << strerror(errno);
+  }
   // Make sure our signal handler is called before any user handlers.
   ClaimSignalChain(SIGSEGV, &oldaction_);
-#endif
 }
 
 void FaultManager::HandleFault(int sig, siginfo_t* info, void* context) {
@@ -84,8 +82,12 @@
   //
   // If malloc calls abort, it will be holding its lock.
   // If the handler tries to call malloc, it will deadlock.
+
+  // Also, there is only an 8K stack available here to logging can cause memory
+  // overwrite issues if you are unlucky.  If you want to enable logging and
+  // are getting crashes, allocate more space for the alternate signal stack.
   VLOG(signals) << "Handling fault";
-  if (IsInGeneratedCode(context, true)) {
+  if (IsInGeneratedCode(info, context, true)) {
     VLOG(signals) << "in generated code, looking for handler";
     for (const auto& handler : generated_code_handlers_) {
       VLOG(signals) << "invoking Action on handler " << handler;
@@ -101,11 +103,8 @@
   }
   art_sigsegv_fault();
 
-#ifdef HAVE_ANDROID_OS
+  // Pass this on to the next handler in the chain, or the default if none.
   InvokeUserSignalHandler(sig, info, context);
-#else
-  oldaction_.sa_sigaction(sig, info, context);
-#endif
 }
 
 void FaultManager::AddHandler(FaultHandler* handler, bool generated_code) {
@@ -132,7 +131,7 @@
 
 // This function is called within the signal handler.  It checks that
 // the mutator_lock is held (shared).  No annotalysis is done.
-bool FaultManager::IsInGeneratedCode(void* context, bool check_dex_pc) {
+bool FaultManager::IsInGeneratedCode(siginfo_t* siginfo, void* context, bool check_dex_pc) {
   // We can only be running Java code in the current thread if it
   // is in Runnable state.
   VLOG(signals) << "Checking for generated code";
@@ -161,7 +160,7 @@
 
   // Get the architecture specific method address and return address.  These
   // are in architecture specific files in arch/<arch>/fault_handler_<arch>.
-  GetMethodAndReturnPCAndSP(context, &method_obj, &return_pc, &sp);
+  GetMethodAndReturnPCAndSP(siginfo, context, &method_obj, &return_pc, &sp);
 
   // If we don't have a potential method, we're outta here.
   VLOG(signals) << "potential method: " << method_obj;
@@ -242,12 +241,12 @@
 
 bool JavaStackTraceHandler::Action(int sig, siginfo_t* siginfo, void* context) {
   // Make sure that we are in the generated code, but we may not have a dex pc.
-  if (manager_->IsInGeneratedCode(context, false)) {
+  if (manager_->IsInGeneratedCode(siginfo, context, false)) {
     LOG(ERROR) << "Dumping java stack trace for crash in generated code";
     mirror::ArtMethod* method = nullptr;
     uintptr_t return_pc = 0;
     uintptr_t sp = 0;
-    manager_->GetMethodAndReturnPCAndSP(context, &method, &return_pc, &sp);
+    manager_->GetMethodAndReturnPCAndSP(siginfo, context, &method, &return_pc, &sp);
     Thread* self = Thread::Current();
     // Inside of generated code, sp[0] is the method, so sp is the frame.
     StackReference<mirror::ArtMethod>* frame =
diff --git a/runtime/fault_handler.h b/runtime/fault_handler.h
index 026f5b9..71c9977 100644
--- a/runtime/fault_handler.h
+++ b/runtime/fault_handler.h
@@ -43,9 +43,10 @@
   void HandleFault(int sig, siginfo_t* info, void* context);
   void AddHandler(FaultHandler* handler, bool generated_code);
   void RemoveHandler(FaultHandler* handler);
-  void GetMethodAndReturnPCAndSP(void* context, mirror::ArtMethod** out_method,
+  void GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context, mirror::ArtMethod** out_method,
                                  uintptr_t* out_return_pc, uintptr_t* out_sp);
-  bool IsInGeneratedCode(void *context, bool check_dex_pc) NO_THREAD_SAFETY_ANALYSIS;
+  bool IsInGeneratedCode(siginfo_t* siginfo, void *context, bool check_dex_pc)
+                         NO_THREAD_SAFETY_ANALYSIS;
 
  private:
   std::vector<FaultHandler*> generated_code_handlers_;
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index e1e133f..333ba03 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -286,8 +286,8 @@
     }
   }
 #else
-  explicit_checks_ = kExplicitNullCheck | kExplicitSuspendCheck |
-    kExplicitStackOverflowCheck;
+  // Host.  Only suspend check is explicit by default.
+  explicit_checks_ = kExplicitSuspendCheck;
 #endif
 
   for (size_t i = 0; i < options.size(); ++i) {
@@ -305,6 +305,7 @@
       Exit(0);
     } else if (StartsWith(option, "-Xbootclasspath:")) {
       boot_class_path_string_ = option.substr(strlen("-Xbootclasspath:")).data();
+      LOG(INFO) << "setting boot class path to " << boot_class_path_string_;
     } else if (option == "-classpath" || option == "-cp") {
       // TODO: support -Djava.class.path
       i++;
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index efa205e..6459a52 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -580,10 +580,41 @@
     GetInstrumentation()->ForceInterpretOnly();
   }
 
+  heap_ = new gc::Heap(options->heap_initial_size_,
+                       options->heap_growth_limit_,
+                       options->heap_min_free_,
+                       options->heap_max_free_,
+                       options->heap_target_utilization_,
+                       options->foreground_heap_growth_multiplier_,
+                       options->heap_maximum_size_,
+                       options->image_,
+                       options->image_isa_,
+                       options->collector_type_,
+                       options->background_collector_type_,
+                       options->parallel_gc_threads_,
+                       options->conc_gc_threads_,
+                       options->low_memory_mode_,
+                       options->long_pause_log_threshold_,
+                       options->long_gc_log_threshold_,
+                       options->ignore_max_footprint_,
+                       options->use_tlab_,
+                       options->verify_pre_gc_heap_,
+                       options->verify_pre_sweeping_heap_,
+                       options->verify_post_gc_heap_,
+                       options->verify_pre_gc_rosalloc_,
+                       options->verify_pre_sweeping_rosalloc_,
+                       options->verify_post_gc_rosalloc_);
+
+  dump_gc_performance_on_shutdown_ = options->dump_gc_performance_on_shutdown_;
+
+  BlockSignals();
+  InitPlatformSignalHandlers();
+
   bool implicit_checks_supported = false;
   switch (kRuntimeISA) {
     case kArm:
     case kThumb2:
+    case kX86:
       implicit_checks_supported = true;
       break;
     default:
@@ -615,36 +646,6 @@
     }
   }
 
-  heap_ = new gc::Heap(options->heap_initial_size_,
-                       options->heap_growth_limit_,
-                       options->heap_min_free_,
-                       options->heap_max_free_,
-                       options->heap_target_utilization_,
-                       options->foreground_heap_growth_multiplier_,
-                       options->heap_maximum_size_,
-                       options->image_,
-                       options->image_isa_,
-                       options->collector_type_,
-                       options->background_collector_type_,
-                       options->parallel_gc_threads_,
-                       options->conc_gc_threads_,
-                       options->low_memory_mode_,
-                       options->long_pause_log_threshold_,
-                       options->long_gc_log_threshold_,
-                       options->ignore_max_footprint_,
-                       options->use_tlab_,
-                       options->verify_pre_gc_heap_,
-                       options->verify_pre_sweeping_heap_,
-                       options->verify_post_gc_heap_,
-                       options->verify_pre_gc_rosalloc_,
-                       options->verify_pre_sweeping_rosalloc_,
-                       options->verify_post_gc_rosalloc_);
-
-  dump_gc_performance_on_shutdown_ = options->dump_gc_performance_on_shutdown_;
-
-  BlockSignals();
-  InitPlatformSignalHandlers();
-
   java_vm_ = new JavaVMExt(this, options.get());
 
   Thread::Startup();
diff --git a/runtime/thread.cc b/runtime/thread.cc
index d60fb49..4147dc2 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -233,47 +233,99 @@
   return stack_size;
 }
 
+// Global variable to prevent the compiler optimizing away the page reads for the stack.
+byte dont_optimize_this;
+
 // Install a protected region in the stack.  This is used to trigger a SIGSEGV if a stack
 // overflow is detected.  It is located right below the stack_end_.  Just below that
 // is the StackOverflow reserved region used when creating the StackOverflow
 // exception.
+//
+// There is a little complexity here that deserves a special mention.  When running on the
+// host (glibc), the process's main thread's stack is allocated with a special flag
+// to prevent memory being allocated when it's not needed.  This flag makes the
+// kernel only allocate memory for the stack by growing down in memory.  Because we
+// want to put an mprotected region far away from that at the stack top, we need
+// to make sure the pages for the stack are mapped in before we call mprotect.  We do
+// this by reading every page from the stack bottom (highest address) to the stack top.
+// We then madvise this away.
 void Thread::InstallImplicitProtection(bool is_main_stack) {
   byte* pregion = tlsPtr_.stack_end;
+  byte* stack_lowmem = tlsPtr_.stack_begin;
+  byte* stack_top = reinterpret_cast<byte*>(reinterpret_cast<uintptr_t>(&pregion) &
+      ~(kPageSize - 1));    // Page containing current top of stack.
 
+#ifndef HAVE_ANDROID_OS
+  bool running_on_host = true;
+#else
+  bool running_on_host = false;
+#endif
+
+  if (running_on_host) {
+    // On Host, we need to map in the main stack.  This must be done by reading from the
+    // current stack pointer downwards as the stack is mapped using VM_GROWSDOWN
+    // in the kernel.  Any access more than a page below the current SP will cause
+    // a segv.
+    if (is_main_stack) {
+      // First we need to unprotect the protected region because this may
+      // be called more than once for a particular stack and we will crash
+      // if we try to read the protected page.
+      mprotect(pregion - kStackOverflowProtectedSize, kStackOverflowProtectedSize, PROT_READ);
+
+      // Read every page from the high address to the low.
+      for (byte* p = stack_top; p > stack_lowmem; p -= kPageSize) {
+        dont_optimize_this = *p;
+      }
+    }
+  }
+
+  // Check and place a marker word at the lowest usable address in the stack.  This
+  // is used to prevent a double protection.
   constexpr uint32_t kMarker = 0xdadadada;
   uintptr_t *marker = reinterpret_cast<uintptr_t*>(pregion);
   if (*marker == kMarker) {
-    // The region has already been set up.
+    // The region has already been set up.  But on the main stack on the host we have
+    // removed the protected region in order to read the stack memory.  We need to put
+    // this back again.
+    if (is_main_stack && running_on_host) {
+      mprotect(pregion - kStackOverflowProtectedSize, kStackOverflowProtectedSize, PROT_NONE);
+      madvise(stack_lowmem, stack_top - stack_lowmem, MADV_DONTNEED);
+    }
     return;
   }
   // Add marker so that we can detect a second attempt to do this.
   *marker = kMarker;
 
-  pregion -= kStackOverflowProtectedSize;
-
-  // Touch the pages in the region to map them in.  Otherwise mprotect fails.  Only
-  // need to do this on the main stack.  We only need to touch one byte per page.
-  if (is_main_stack) {
-    byte* start = pregion;
-    byte* end = pregion + kStackOverflowProtectedSize;
-    while (start < end) {
-      *start = static_cast<byte>(0);
-      start += kPageSize;
+  if (!running_on_host) {
+    // Running on Android, stacks are mapped cleanly.  The protected region for the
+    // main stack just needs to be mapped in.  We do this by writing one byte per page.
+    for (byte* p = pregion - kStackOverflowProtectedSize;  p < pregion; p += kPageSize) {
+      *p = 0;
     }
   }
 
+  pregion -= kStackOverflowProtectedSize;
+
   VLOG(threads) << "installing stack protected region at " << std::hex <<
       static_cast<void*>(pregion) << " to " <<
       static_cast<void*>(pregion + kStackOverflowProtectedSize - 1);
 
+
   if (mprotect(pregion, kStackOverflowProtectedSize, PROT_NONE) == -1) {
     LOG(FATAL) << "Unable to create protected region in stack for implicit overflow check. Reason:"
         << strerror(errno);
   }
 
   // Tell the kernel that we won't be needing these pages any more.
+  // NB. madvise will probably write zeroes into the memory (on linux it does).
   if (is_main_stack) {
-    madvise(pregion, kStackOverflowProtectedSize, MADV_DONTNEED);
+    if (running_on_host) {
+      // On the host, it's the whole stack (minus a page to prevent overwrite of stack top).
+      madvise(stack_lowmem, stack_top - stack_lowmem - kPageSize, MADV_DONTNEED);
+    } else {
+      // On Android, just the protected region.
+      madvise(pregion, kStackOverflowProtectedSize, MADV_DONTNEED);
+    }
   }
 }
 
@@ -534,13 +586,17 @@
   // Install the protected region if we are doing implicit overflow checks.
   if (implicit_stack_check) {
     if (is_main_thread) {
-      // The main thread has a 16K protected region at the bottom.  We need
+      size_t guardsize;
+      pthread_attr_t attributes;
+      CHECK_PTHREAD_CALL(pthread_attr_init, (&attributes), "guard size query");
+      CHECK_PTHREAD_CALL(pthread_attr_getguardsize, (&attributes, &guardsize), "guard size query");
+      CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attributes), "guard size query");
+      // The main thread might have protected region at the bottom.  We need
       // to install our own region so we need to move the limits
       // of the stack to make room for it.
-      constexpr uint32_t kDelta = 16 * KB;
-      tlsPtr_.stack_begin += kDelta;
-      tlsPtr_.stack_end += kDelta;
-      tlsPtr_.stack_size -= kDelta;
+      tlsPtr_.stack_begin += guardsize;
+      tlsPtr_.stack_end += guardsize;
+      tlsPtr_.stack_size -= guardsize;
     }
     InstallImplicitProtection(is_main_thread);
   }
diff --git a/runtime/thread_linux.cc b/runtime/thread_linux.cc
index ee66ccc..518211b 100644
--- a/runtime/thread_linux.cc
+++ b/runtime/thread_linux.cc
@@ -35,8 +35,8 @@
 void Thread::SetUpAlternateSignalStack() {
   // Create and set an alternate signal stack.
   stack_t ss;
-  ss.ss_sp = new uint8_t[SIGSTKSZ];
-  ss.ss_size = SIGSTKSZ;
+  ss.ss_sp = new uint8_t[SIGSTKSZ * 2];   // NB. this is 16K.
+  ss.ss_size = SIGSTKSZ * 2;
   ss.ss_flags = 0;
   CHECK(ss.ss_sp != NULL);
   SigAltStack(&ss, NULL);
@@ -56,7 +56,7 @@
   // Tell the kernel to stop using it.
   ss.ss_sp = NULL;
   ss.ss_flags = SS_DISABLE;
-  ss.ss_size = SIGSTKSZ;  // Avoid ENOMEM failure with Mac OS' buggy libc.
+  ss.ss_size = SIGSTKSZ * 2;  // Avoid ENOMEM failure with Mac OS' buggy libc.
   SigAltStack(&ss, NULL);
 
   // Free it.
diff --git a/sigchainlib/Android.mk b/sigchainlib/Android.mk
index 8e25339..20c8cac 100644
--- a/sigchainlib/Android.mk
+++ b/sigchainlib/Android.mk
@@ -28,3 +28,16 @@
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.common_build.mk
 include $(BUILD_SHARED_LIBRARY)
+
+# Build host library.
+include $(CLEAR_VARS)
+LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
+LOCAL_MODULE_TAGS := optional
+LOCAL_IS_HOST_MODULE := true
+LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
+LOCAL_SRC_FILES := sigchain.cc
+LOCAL_MODULE:= libsigchain
+LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
+LOCAL_LDLIBS = -ldl
+LOCAL_MULTILIB := both
+include $(BUILD_HOST_SHARED_LIBRARY)
diff --git a/sigchainlib/sigchain.cc b/sigchainlib/sigchain.cc
index 5a5805f..458ad69 100644
--- a/sigchainlib/sigchain.cc
+++ b/sigchainlib/sigchain.cc
@@ -14,7 +14,13 @@
  * limitations under the License.
  */
 
+#ifdef HAVE_ANDROID_OS
 #include <android/log.h>
+#else
+#include <stdarg.h>
+#include <iostream>
+#endif
+
 #include <dlfcn.h>
 #include <signal.h>
 #include <stdio.h>
@@ -67,7 +73,11 @@
   va_list ap;
   va_start(ap, format);
   vsnprintf(buf, sizeof(buf), format, ap);
+#ifdef HAVE_ANDROID_OS
   __android_log_write(ANDROID_LOG_ERROR, "libsigchain", buf);
+#else
+  std::cout << buf << "\n";
+#endif
   va_end(ap);
 }
 
@@ -104,10 +114,16 @@
   if ((action.sa_flags & SA_SIGINFO) == 0) {
     if (action.sa_handler != NULL) {
       action.sa_handler(sig);
+    } else {
+       signal(sig, SIG_DFL);
+       raise(sig);
     }
   } else {
     if (action.sa_sigaction != NULL) {
       action.sa_sigaction(sig, info, context);
+    } else {
+       signal(sig, SIG_DFL);
+       raise(sig);
     }
   }
 }
diff --git a/test/Android.oat.mk b/test/Android.oat.mk
index fec2540..efd1793 100644
--- a/test/Android.oat.mk
+++ b/test/Android.oat.mk
@@ -203,6 +203,7 @@
 	ANDROID_ROOT=$(HOST_OUT) \
 	ANDROID_LOG_TAGS='*:d' \
 	LD_LIBRARY_PATH=$$($(2)ART_HOST_OUT_SHARED_LIBRARIES) \
+	LD_PRELOAD=libsigchain$$(ART_HOST_SHLIB_EXTENSION) \
 	$(HOST_OUT_EXECUTABLES)/dalvikvm$$($(2)ART_PHONY_TEST_HOST_SUFFIX) $(DALVIKVM_FLAGS) $(5) \
 	    -XXlib:libartd$(HOST_SHLIB_SUFFIX) -Ximage:$$(HOST_CORE_IMG_LOCATION) \
 	    -classpath $(ART_HOST_TEST_DIR)/android-data-$$@/oat-test-dex-$(1).jar \