Stack overflow checks and NPE checks for optimizing.

Change-Id: I59e97448bf29778769b79b51ee4ea43f43493d96
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index 1666b8b..56be8ea 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -21,10 +21,7 @@
 
 # List of known broken tests that we won't attempt to execute. The test name must be the full
 # rule name such as test-art-host-oat-optimizing-HelloWorld64.
-ART_TEST_KNOWN_BROKEN := \
-  test-art-host-oat-optimizing-SignalTest64 \
-  test-art-host-oat-optimizing-SignalTest32 \
-  test-art-target-oat-optimizing-SignalTest32
+ART_TEST_KNOWN_BROKEN :=
 
 # List of known failing tests that when executed won't cause test execution to not finish.
 # The test name must be the full rule name such as test-art-host-oat-optimizing-HelloWorld64.
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index c7fafcb..19a7a7a 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -32,6 +32,7 @@
 #include "utils/array_ref.h"
 #include "utils/arena_allocator.h"
 #include "utils/growable_array.h"
+#include "utils/stack_checks.h"
 
 namespace art {
 
@@ -205,29 +206,11 @@
 #define SLOW_TYPE_PATH (cu_->enable_debug & (1 << kDebugSlowTypePath))
 #define EXERCISE_SLOWEST_STRING_PATH (cu_->enable_debug & (1 << kDebugSlowestStringPath))
 
-// Size of a frame that we definitely consider large. Anything larger than this should
-// definitely get a stack overflow check.
-static constexpr size_t kLargeFrameSize = 2 * KB;
-
-// Size of a frame that should be small. Anything leaf method smaller than this should run
-// without a stack overflow check.
-// The constant is from experience with frameworks code.
-static constexpr size_t kSmallFrameSize = 1 * KB;
-
 class Mir2Lir : public Backend {
   public:
     static constexpr bool kFailOnSizeError = true && kIsDebugBuild;
     static constexpr bool kReportSizeError = true && kIsDebugBuild;
 
-    // Determine whether a frame is small or large, used in the decision on whether to elide a
-    // stack overflow check on method entry.
-    //
-    // A frame is considered large when it's either above kLargeFrameSize, or a quarter of the
-    // overflow-usable stack space.
-    static bool IsLargeFrame(size_t size, InstructionSet isa) {
-      return size >= kLargeFrameSize || size >= GetStackOverflowReservedBytes(isa) / 4;
-    }
-
     /*
      * Auxiliary information describing the location of data embedded in the Dalvik
      * byte code stream.
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index f594129..1f0b361 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -34,6 +34,37 @@
 
 namespace art {
 
+/**
+ * Helper class to add HTemporary instructions. This class is used when
+ * converting a DEX instruction to multiple HInstruction, and where those
+ * instructions do not die at the following instruction, but instead spans
+ * multiple instructions.
+ */
+class Temporaries : public ValueObject {
+ public:
+  Temporaries(HGraph* graph, size_t count) : graph_(graph), count_(count), index_(0) {
+    graph_->UpdateNumberOfTemporaries(count_);
+  }
+
+  void Add(HInstruction* instruction) {
+    // We currently only support vreg size temps.
+    DCHECK(instruction->GetType() != Primitive::kPrimLong
+           && instruction->GetType() != Primitive::kPrimDouble);
+    HInstruction* temp = new (graph_->GetArena()) HTemporary(index_++);
+    instruction->GetBlock()->AddInstruction(temp);
+    DCHECK(temp->GetPrevious() == instruction);
+  }
+
+ private:
+  HGraph* const graph_;
+
+  // The total number of temporaries that will be used.
+  const size_t count_;
+
+  // Current index in the temporary stack, updated by `Add`.
+  size_t index_;
+};
+
 static bool IsTypeSupported(Primitive::Type type) {
   return type != Primitive::kPrimFloat && type != Primitive::kPrimDouble;
 }
@@ -308,9 +339,13 @@
       arena_, number_of_arguments, return_type, dex_offset, method_idx);
 
   size_t start_index = 0;
+  Temporaries temps(graph_, is_instance_call ? 1 : 0);
   if (is_instance_call) {
     HInstruction* arg = LoadLocal(is_range ? register_index : args[0], Primitive::kPrimNot);
-    invoke->SetArgumentAt(0, arg);
+    HNullCheck* null_check = new (arena_) HNullCheck(arg, dex_offset);
+    current_block_->AddInstruction(null_check);
+    temps.Add(null_check);
+    invoke->SetArgumentAt(0, null_check);
     start_index = 1;
   }
 
@@ -343,37 +378,6 @@
   return true;
 }
 
-/**
- * Helper class to add HTemporary instructions. This class is used when
- * converting a DEX instruction to multiple HInstruction, and where those
- * instructions do not die at the following instruction, but instead spans
- * multiple instructions.
- */
-class Temporaries : public ValueObject {
- public:
-  Temporaries(HGraph* graph, size_t count) : graph_(graph), count_(count), index_(0) {
-    graph_->UpdateNumberOfTemporaries(count_);
-  }
-
-  void Add(HInstruction* instruction) {
-    // We currently only support vreg size temps.
-    DCHECK(instruction->GetType() != Primitive::kPrimLong
-           && instruction->GetType() != Primitive::kPrimDouble);
-    HInstruction* temp = new (graph_->GetArena()) HTemporary(index_++);
-    instruction->GetBlock()->AddInstruction(temp);
-    DCHECK(temp->GetPrevious() == instruction);
-  }
-
- private:
-  HGraph* const graph_;
-
-  // The total number of temporaries that will be used.
-  const size_t count_;
-
-  // Current index in the temporary stack, updated by `Add`.
-  size_t index_;
-};
-
 bool HGraphBuilder::BuildFieldAccess(const Instruction& instruction,
                                      uint32_t dex_offset,
                                      bool is_put) {
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index e0db0f1..c0964e6 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -37,6 +37,9 @@
   block_labels_.SetSize(blocks.Size());
 
   DCHECK_EQ(frame_size_, kUninitializedFrameSize);
+  // The baseline compiler does not do graph analysis prior to generating
+  // code.
+  MarkNotLeaf();
   ComputeFrameSize(GetGraph()->GetMaximumNumberOfOutVRegs()
                    + GetGraph()->GetNumberOfLocalVRegs()
                    + GetGraph()->GetNumberOfTemporaries()
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 18e3e5a..936ca28 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -131,6 +131,14 @@
   void BuildNativeGCMap(
       std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
 
+  bool IsLeafMethod() const {
+    return is_leaf_;
+  }
+
+  void MarkNotLeaf() {
+    is_leaf_ = false;
+  }
+
  protected:
   CodeGenerator(HGraph* graph, size_t number_of_registers)
       : frame_size_(kUninitializedFrameSize),
@@ -138,7 +146,8 @@
         block_labels_(graph->GetArena(), 0),
         pc_infos_(graph->GetArena(), 32),
         slow_paths_(graph->GetArena(), 8),
-        blocked_registers_(graph->GetArena()->AllocArray<bool>(number_of_registers)) {}
+        blocked_registers_(graph->GetArena()->AllocArray<bool>(number_of_registers)),
+        is_leaf_(true) {}
   ~CodeGenerator() {}
 
   // Register allocation logic.
@@ -171,6 +180,8 @@
   // Temporary data structure used when doing register allocation.
   bool* const blocked_registers_;
 
+  bool is_leaf_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
 };
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 93e7367..90ec6cf 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -24,6 +24,7 @@
 #include "utils/assembler.h"
 #include "utils/arm/assembler_arm.h"
 #include "utils/arm/managed_register_arm.h"
+#include "utils/stack_checks.h"
 
 namespace art {
 
@@ -33,6 +34,11 @@
 
 namespace arm {
 
+static constexpr bool kExplicitStackOverflowCheck = false;
+
+static constexpr int kNumberOfPushedRegistersAtEntry = 1 + 2;  // LR, R6, R7
+static constexpr int kCurrentMethodStackOffset = 0;
+
 #define __ reinterpret_cast<ArmAssembler*>(codegen->GetAssembler())->
 
 class NullCheckSlowPathARM : public SlowPathCode {
@@ -52,6 +58,20 @@
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM);
 };
 
+class StackOverflowCheckSlowPathARM : public SlowPathCode {
+ public:
+  StackOverflowCheckSlowPathARM() {}
+
+  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    __ Bind(GetEntryLabel());
+    __ LoadFromOffset(kLoadWord, PC, TR,
+        QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pThrowStackOverflow).Int32Value());
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathARM);
+};
+
 #undef __
 #define __ reinterpret_cast<ArmAssembler*>(GetAssembler())->
 
@@ -83,9 +103,6 @@
   return EQ;        // Unreachable.
 }
 
-static constexpr int kNumberOfPushedRegistersAtEntry = 1 + 2;  // LR, R6, R7
-static constexpr int kCurrentMethodStackOffset = 0;
-
 void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const {
   stream << ArmManagedRegister::FromCoreRegister(Register(reg));
 }
@@ -207,6 +224,22 @@
         codegen_(codegen) {}
 
 void CodeGeneratorARM::GenerateFrameEntry() {
+  bool skip_overflow_check = IsLeafMethod() && !IsLargeFrame(GetFrameSize(), InstructionSet::kArm);
+  if (!skip_overflow_check) {
+    if (kExplicitStackOverflowCheck) {
+      SlowPathCode* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathARM();
+      AddSlowPath(slow_path);
+
+      __ LoadFromOffset(kLoadWord, IP, TR, Thread::StackEndOffset<kArmWordSize>().Int32Value());
+      __ cmp(SP, ShifterOperand(IP));
+      __ b(slow_path->GetEntryLabel(), CC);
+    } else {
+      __ AddConstant(IP, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm)));
+      __ ldr(IP, Address(IP, 0));
+      RecordPcInfo(0);
+    }
+  }
+
   core_spill_mask_ |= (1 << LR | 1 << R6 | 1 << R7);
   __ PushList(1 << LR | 1 << R6 | 1 << R7);
 
@@ -720,6 +753,7 @@
 }
 
 void LocationsBuilderARM::VisitInvokeStatic(HInvokeStatic* invoke) {
+  codegen_->MarkNotLeaf();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke);
   locations->AddTemp(ArmCoreLocation(R0));
 
@@ -785,6 +819,7 @@
   __ blx(LR);
 
   codegen_->RecordPcInfo(invoke->GetDexPc());
+  DCHECK(!codegen_->IsLeafMethod());
 }
 
 void LocationsBuilderARM::VisitAdd(HAdd* add) {
@@ -923,6 +958,7 @@
 };
 
 void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) {
+  codegen_->MarkNotLeaf();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   InvokeRuntimeCallingConvention calling_convention;
   locations->AddTemp(ArmCoreLocation(calling_convention.GetRegisterAt(0)));
@@ -941,6 +977,7 @@
   __ blx(LR);
 
   codegen_->RecordPcInfo(instruction->GetDexPc());
+  DCHECK(!codegen_->IsLeafMethod());
 }
 
 void LocationsBuilderARM::VisitParameterValue(HParameterValue* instruction) {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index c44b761..f1eb372 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -17,6 +17,7 @@
 #include "code_generator_x86.h"
 #include "gc/accounting/card_table.h"
 #include "utils/assembler.h"
+#include "utils/stack_checks.h"
 #include "utils/x86/assembler_x86.h"
 #include "utils/x86/managed_register_x86.h"
 
@@ -33,6 +34,11 @@
 
 namespace x86 {
 
+static constexpr bool kExplicitStackOverflowCheck = false;
+
+static constexpr int kNumberOfPushedRegistersAtEntry = 1;
+static constexpr int kCurrentMethodStackOffset = 0;
+
 #define __ reinterpret_cast<X86Assembler*>(codegen->GetAssembler())->
 
 class NullCheckSlowPathX86 : public SlowPathCode {
@@ -50,6 +56,21 @@
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
 };
 
+class StackOverflowCheckSlowPathX86 : public SlowPathCode {
+ public:
+  StackOverflowCheckSlowPathX86() {}
+
+  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    __ Bind(GetEntryLabel());
+    __ addl(ESP,
+            Immediate(codegen->GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize));
+    __ fs()->jmp(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowStackOverflow)));
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathX86);
+};
+
 #undef __
 #define __ reinterpret_cast<X86Assembler*>(GetAssembler())->
 
@@ -67,9 +88,6 @@
   return kEqual;
 }
 
-static constexpr int kNumberOfPushedRegistersAtEntry = 1;
-static constexpr int kCurrentMethodStackOffset = 0;
-
 void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const {
   stream << X86ManagedRegister::FromCpuRegister(Register(reg));
 }
@@ -186,6 +204,20 @@
 
   // The return PC has already been pushed on the stack.
   __ subl(ESP, Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize));
+
+  bool skip_overflow_check = IsLeafMethod() && !IsLargeFrame(GetFrameSize(), InstructionSet::kX86);
+  if (!skip_overflow_check) {
+    if (kExplicitStackOverflowCheck) {
+      SlowPathCode* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathX86();
+      AddSlowPath(slow_path);
+
+      __ fs()->cmpl(ESP, Address::Absolute(Thread::StackEndOffset<kX86WordSize>()));
+      __ j(kLess, slow_path->GetEntryLabel());
+    } else {
+      __ testl(EAX, Address(ESP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86))));
+    }
+  }
+
   __ movl(Address(ESP, kCurrentMethodStackOffset), EAX);
 }
 
@@ -682,6 +714,7 @@
 }
 
 void LocationsBuilderX86::VisitInvokeStatic(HInvokeStatic* invoke) {
+  codegen_->MarkNotLeaf();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke);
   locations->AddTemp(X86CpuLocation(EAX));
 
@@ -739,6 +772,7 @@
   // (temp + offset_of_quick_compiled_code)()
   __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value()));
 
+  DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke->GetDexPc());
 }
 
@@ -889,6 +923,7 @@
 }
 
 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
+  codegen_->MarkNotLeaf();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   locations->SetOut(X86CpuLocation(EAX));
   InvokeRuntimeCallingConvention calling_convention;
@@ -906,6 +941,7 @@
       Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocObjectWithAccessCheck)));
 
   codegen_->RecordPcInfo(instruction->GetDexPc());
+  DCHECK(!codegen_->IsLeafMethod());
 }
 
 void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 9c12771..b7c2ad8 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -126,7 +126,7 @@
 class CodeGeneratorX86 : public CodeGenerator {
  public:
   explicit CodeGeneratorX86(HGraph* graph);
-  virtual ~CodeGeneratorX86() { }
+  virtual ~CodeGeneratorX86() {}
 
   virtual void GenerateFrameEntry() OVERRIDE;
   virtual void GenerateFrameExit() OVERRIDE;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index d20dff0..641e8e1 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -23,6 +23,7 @@
 #include "mirror/object_reference.h"
 #include "thread.h"
 #include "utils/assembler.h"
+#include "utils/stack_checks.h"
 #include "utils/x86_64/assembler_x86_64.h"
 #include "utils/x86_64/managed_register_x86_64.h"
 
@@ -34,6 +35,15 @@
 
 namespace x86_64 {
 
+static constexpr bool kExplicitStackOverflowCheck = true;
+
+// Some x86_64 instructions require a register to be available as temp.
+static constexpr Register TMP = R11;
+
+static constexpr int kNumberOfPushedRegistersAtEntry = 1;
+static constexpr int kCurrentMethodStackOffset = 0;
+
+
 #define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
 
 class NullCheckSlowPathX86_64 : public SlowPathCode {
@@ -42,7 +52,8 @@
 
   virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
-    __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowNullPointer), true));
+    __ gs()->call(
+        Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowNullPointer), true));
     codegen->RecordPcInfo(dex_pc_);
   }
 
@@ -51,6 +62,22 @@
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
 };
 
+class StackOverflowCheckSlowPathX86_64 : public SlowPathCode {
+ public:
+  StackOverflowCheckSlowPathX86_64() {}
+
+  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    __ Bind(GetEntryLabel());
+    __ addq(CpuRegister(RSP),
+            Immediate(codegen->GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize));
+    __ gs()->jmp(
+        Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowStackOverflow), true));
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathX86_64);
+};
+
 #undef __
 #define __ reinterpret_cast<X86_64Assembler*>(GetAssembler())->
 
@@ -68,12 +95,6 @@
   return kEqual;
 }
 
-// Some x86_64 instructions require a register to be available as temp.
-static constexpr Register TMP = R11;
-
-static constexpr int kNumberOfPushedRegistersAtEntry = 1;
-static constexpr int kCurrentMethodStackOffset = 0;
-
 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
   stream << X86_64ManagedRegister::FromCpuRegister(Register(reg));
 }
@@ -148,7 +169,26 @@
   core_spill_mask_ |= (1 << kFakeReturnRegister);
 
   // The return PC has already been pushed on the stack.
-  __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize));
+  __ subq(CpuRegister(RSP),
+          Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize));
+
+  bool skip_overflow_check = IsLeafMethod()
+      && !IsLargeFrame(GetFrameSize(), InstructionSet::kX86_64);
+
+  if (!skip_overflow_check) {
+    if (kExplicitStackOverflowCheck) {
+      SlowPathCode* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathX86_64();
+      AddSlowPath(slow_path);
+
+      __ gs()->cmpq(CpuRegister(RSP),
+                    Address::Absolute(Thread::StackEndOffset<kX86_64WordSize>(), true));
+      __ j(kLess, slow_path->GetEntryLabel());
+    } else {
+      __ testq(CpuRegister(RAX), Address(
+          CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64))));
+    }
+  }
+
   __ movl(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI));
 }
 
@@ -619,6 +659,7 @@
 }
 
 void LocationsBuilderX86_64::VisitInvokeStatic(HInvokeStatic* invoke) {
+  codegen_->MarkNotLeaf();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke);
   locations->AddTemp(X86_64CpuLocation(RDI));
 
@@ -673,6 +714,7 @@
   // (temp + offset_of_quick_compiled_code)()
   __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().SizeValue()));
 
+  DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke->GetDexPc());
 }
 
@@ -809,6 +851,7 @@
 }
 
 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
+  codegen_->MarkNotLeaf();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   locations->SetOut(X86_64CpuLocation(RAX));
   instruction->SetLocations(locations);
@@ -822,6 +865,7 @@
   __ gs()->call(Address::Absolute(
       QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocObjectWithAccessCheck), true));
 
+  DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(instruction->GetDexPc());
 }
 
diff --git a/compiler/utils/stack_checks.h b/compiler/utils/stack_checks.h
new file mode 100644
index 0000000..63adbc2
--- /dev/null
+++ b/compiler/utils/stack_checks.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_STACK_CHECKS_H_
+#define ART_COMPILER_UTILS_STACK_CHECKS_H_
+
+#include "instruction_set.h"
+
+namespace art {
+
+// Size of a frame that we definitely consider large. Anything larger than this should
+// definitely get a stack overflow check.
+static constexpr size_t kLargeFrameSize = 2 * KB;
+
+// Size of a frame that should be small. Anything leaf method smaller than this should run
+// without a stack overflow check.
+// The constant is from experience with frameworks code.
+static constexpr size_t kSmallFrameSize = 1 * KB;
+
+// Determine whether a frame is small or large, used in the decision on whether to elide a
+// stack overflow check on method entry.
+//
+// A frame is considered large when it's either above kLargeFrameSize, or a quarter of the
+// overflow-usable stack space.
+static inline bool IsLargeFrame(size_t size, InstructionSet isa) {
+  return size >= kLargeFrameSize || size >= GetStackOverflowReservedBytes(isa) / 4;
+}
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_STACK_CHECKS_H_
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 56c6536..b6a5c20 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -806,6 +806,13 @@
 }
 
 
+void X86Assembler::testl(Register reg, const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x85);
+  EmitOperand(reg, address);
+}
+
+
 void X86Assembler::testl(Register reg, const Immediate& immediate) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   // For registers that have a byte variant (EAX, EBX, ECX, and EDX)
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 2fc6049..ce20768 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -346,6 +346,7 @@
 
   void testl(Register reg1, Register reg2);
   void testl(Register reg, const Immediate& imm);
+  void testl(Register reg1, const Address& address);
 
   void andl(Register dst, const Immediate& imm);
   void andl(Register dst, Register src);
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 1dbef95..0d14376 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -950,6 +950,14 @@
 }
 
 
+void X86_64Assembler::testq(CpuRegister reg, const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(reg);
+  EmitUint8(0x85);
+  EmitOperand(reg.LowBits(), address);
+}
+
+
 void X86_64Assembler::andl(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(dst, src);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index e988029..1d6655c 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -391,6 +391,8 @@
   void testl(CpuRegister reg1, CpuRegister reg2);
   void testl(CpuRegister reg, const Immediate& imm);
 
+  void testq(CpuRegister reg, const Address& address);
+
   void andl(CpuRegister dst, const Immediate& imm);
   void andl(CpuRegister dst, CpuRegister src);
   void andq(CpuRegister dst, const Immediate& imm);
diff --git a/test/401-optimizing-compiler/src/Main.java b/test/401-optimizing-compiler/src/Main.java
index 0d8eeb9..2c6d1c2 100644
--- a/test/401-optimizing-compiler/src/Main.java
+++ b/test/401-optimizing-compiler/src/Main.java
@@ -85,6 +85,27 @@
         Object o = new byte[1024];
       }
     }
+
+    // Test that we do NPE checks on invokedirect.
+    Exception exception = null;
+    try {
+      invokePrivate();
+    } catch (NullPointerException e) {
+      exception = e;
+    }
+
+    if (exception == null) {
+      throw new Error("Missing NullPointerException");
+    }
+  }
+
+  public static void invokePrivate() {
+    Main m = null;
+    m.privateMethod();
+  }
+
+  private void privateMethod() {
+    Object o = new Object();
   }
 
   static int $opt$TestInvokeIntParameter(int param) {