Add reserve area to allocation stacks.

This fixes an issue with heap verification which was caused when
the allocation stack overflowed. This resulted in heap verification
failures since we were storing the newly allocated object in a
handle scope without having it be live either in the live bitmap
or allocation stack. We now push the object in the reserve area
before we do a GC due to allocation stack overflow.

Change-Id: I83b42c4b3250d7eaab1b49e53066e21c8656a740
diff --git a/runtime/gc/accounting/atomic_stack.h b/runtime/gc/accounting/atomic_stack.h
index 979970c..bd04473 100644
--- a/runtime/gc/accounting/atomic_stack.h
+++ b/runtime/gc/accounting/atomic_stack.h
@@ -35,8 +35,8 @@
 class AtomicStack {
  public:
   // Capacity is how many elements we can store in the stack.
-  static AtomicStack* Create(const std::string& name, size_t capacity) {
-    std::unique_ptr<AtomicStack> mark_stack(new AtomicStack(name, capacity));
+  static AtomicStack* Create(const std::string& name, size_t growth_limit, size_t capacity) {
+    std::unique_ptr<AtomicStack> mark_stack(new AtomicStack(name, growth_limit, capacity));
     mark_stack->Init();
     return mark_stack.release();
   }
@@ -44,7 +44,7 @@
   ~AtomicStack() {}
 
   void Reset() {
-    DCHECK(mem_map_.get() != NULL);
+    DCHECK(mem_map_.get() != nullptr);
     DCHECK(begin_ != NULL);
     front_index_.StoreRelaxed(0);
     back_index_.StoreRelaxed(0);
@@ -58,20 +58,13 @@
   // Beware: Mixing atomic pushes and atomic pops will cause ABA problem.
 
   // Returns false if we overflowed the stack.
+  bool AtomicPushBackIgnoreGrowthLimit(const T& value) {
+    return AtomicPushBackInternal(value, capacity_);
+  }
+
+  // Returns false if we overflowed the stack.
   bool AtomicPushBack(const T& value) {
-    if (kIsDebugBuild) {
-      debug_is_sorted_ = false;
-    }
-    int32_t index;
-    do {
-      index = back_index_.LoadRelaxed();
-      if (UNLIKELY(static_cast<size_t>(index) >= capacity_)) {
-        // Stack overflow.
-        return false;
-      }
-    } while (!back_index_.CompareExchangeWeakRelaxed(index, index + 1));
-    begin_[index] = value;
-    return true;
+    return AtomicPushBackInternal(value, growth_limit_);
   }
 
   // Atomically bump the back index by the given number of
@@ -85,7 +78,7 @@
     do {
       index = back_index_.LoadRelaxed();
       new_index = index + num_slots;
-      if (UNLIKELY(static_cast<size_t>(new_index) >= capacity_)) {
+      if (UNLIKELY(static_cast<size_t>(new_index) >= growth_limit_)) {
         // Stack overflow.
         return false;
       }
@@ -115,7 +108,7 @@
       debug_is_sorted_ = false;
     }
     int32_t index = back_index_.LoadRelaxed();
-    DCHECK_LT(static_cast<size_t>(index), capacity_);
+    DCHECK_LT(static_cast<size_t>(index), growth_limit_);
     back_index_.StoreRelaxed(index + 1);
     begin_[index] = value;
   }
@@ -165,6 +158,7 @@
   // Will clear the stack.
   void Resize(size_t new_capacity) {
     capacity_ = new_capacity;
+    growth_limit_ = new_capacity;
     Init();
   }
 
@@ -189,15 +183,33 @@
   }
 
  private:
-  AtomicStack(const std::string& name, const size_t capacity)
+  AtomicStack(const std::string& name, size_t growth_limit, size_t capacity)
       : name_(name),
         back_index_(0),
         front_index_(0),
-        begin_(NULL),
+        begin_(nullptr),
+        growth_limit_(growth_limit),
         capacity_(capacity),
         debug_is_sorted_(true) {
   }
 
+  // Returns false if we overflowed the stack.
+  bool AtomicPushBackInternal(const T& value, size_t limit) ALWAYS_INLINE {
+    if (kIsDebugBuild) {
+      debug_is_sorted_ = false;
+    }
+    int32_t index;
+    do {
+      index = back_index_.LoadRelaxed();
+      if (UNLIKELY(static_cast<size_t>(index) >= limit)) {
+        // Stack overflow.
+        return false;
+      }
+    } while (!back_index_.CompareExchangeWeakRelaxed(index, index + 1));
+    begin_[index] = value;
+    return true;
+  }
+
   // Size in number of elements.
   void Init() {
     std::string error_msg;
@@ -213,22 +225,18 @@
 
   // Name of the mark stack.
   std::string name_;
-
   // Memory mapping of the atomic stack.
   std::unique_ptr<MemMap> mem_map_;
-
   // Back index (index after the last element pushed).
   AtomicInteger back_index_;
-
   // Front index, used for implementing PopFront.
   AtomicInteger front_index_;
-
   // Base of the atomic stack.
   T* begin_;
-
+  // Current maximum which we can push back to, must be <= capacity_.
+  size_t growth_limit_;
   // Maximum number of elements.
   size_t capacity_;
-
   // Whether or not the stack is sorted, only updated in debug mode to avoid performance overhead.
   bool debug_is_sorted_;
 
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 03b72b6..58ba61b 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -137,33 +137,11 @@
 
 inline void Heap::PushOnAllocationStack(Thread* self, mirror::Object** obj) {
   if (kUseThreadLocalAllocationStack) {
-    bool success = self->PushOnThreadLocalAllocationStack(*obj);
-    if (UNLIKELY(!success)) {
-      // Slow path. Allocate a new thread-local allocation stack.
-      mirror::Object** start_address;
-      mirror::Object** end_address;
-      while (!allocation_stack_->AtomicBumpBack(kThreadLocalAllocationStackSize,
-                                                &start_address, &end_address)) {
-        // TODO: Add handle VerifyObject.
-        StackHandleScope<1> hs(self);
-        HandleWrapper<mirror::Object> wrapper(hs.NewHandleWrapper(obj));
-        CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
-      }
-      self->SetThreadLocalAllocationStack(start_address, end_address);
-      // Retry on the new thread-local allocation stack.
-      success = self->PushOnThreadLocalAllocationStack(*obj);
-      // Must succeed.
-      CHECK(success);
+    if (UNLIKELY(!self->PushOnThreadLocalAllocationStack(*obj))) {
+      PushOnThreadLocalAllocationStackWithInternalGC(self, obj);
     }
-  } else {
-    // This is safe to do since the GC will never free objects which are neither in the allocation
-    // stack or the live bitmap.
-    while (!allocation_stack_->AtomicPushBack(*obj)) {
-      // TODO: Add handle VerifyObject.
-      StackHandleScope<1> hs(self);
-      HandleWrapper<mirror::Object> wrapper(hs.NewHandleWrapper(obj));
-      CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
-    }
+  } else if (UNLIKELY(!allocation_stack_->AtomicPushBack(*obj))) {
+    PushOnAllocationStackWithInternalGC(self, obj);
   }
 }
 
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index fdc4367..a6093ca 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -84,9 +84,14 @@
 static constexpr double kStickyGcThroughputAdjustment = 1.0;
 // Whether or not we use the free list large object space.
 static constexpr bool kUseFreeListSpaceForLOS = false;
-// Whtehr or not we compact the zygote in PreZygoteFork.
+// Whether or not we compact the zygote in PreZygoteFork.
 static constexpr bool kCompactZygote = kMovingCollector;
 static constexpr size_t kNonMovingSpaceCapacity = 64 * MB;
+// How many reserve entries are at the end of the allocation stack, these are only needed if the
+// allocation stack overflows.
+static constexpr size_t kAllocationStackReserveSize = 1024;
+// Default mark stack size in bytes.
+static const size_t kDefaultMarkStackSize = 64 * KB;
 
 Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max_free,
            double target_utilization, double foreground_heap_growth_multiplier, size_t capacity,
@@ -295,13 +300,13 @@
   // TODO: Count objects in the image space here.
   num_bytes_allocated_.StoreRelaxed(0);
 
-  // Default mark stack size in bytes.
-  static const size_t default_mark_stack_size = 64 * KB;
-  mark_stack_.reset(accounting::ObjectStack::Create("mark stack", default_mark_stack_size));
-  allocation_stack_.reset(accounting::ObjectStack::Create("allocation stack",
-                                                          max_allocation_stack_size_));
-  live_stack_.reset(accounting::ObjectStack::Create("live stack",
-                                                    max_allocation_stack_size_));
+  mark_stack_.reset(accounting::ObjectStack::Create("mark stack", kDefaultMarkStackSize,
+                                                    kDefaultMarkStackSize));
+  const size_t alloc_stack_capacity = max_allocation_stack_size_ + kAllocationStackReserveSize;
+  allocation_stack_.reset(accounting::ObjectStack::Create(
+      "allocation stack", max_allocation_stack_size_, alloc_stack_capacity));
+  live_stack_.reset(accounting::ObjectStack::Create(
+      "live stack", max_allocation_stack_size_, alloc_stack_capacity));
 
   // It's still too early to take a lock because there are no threads yet, but we can create locks
   // now. We don't create it earlier to make it clear that you can't use locks during heap
@@ -2035,6 +2040,43 @@
   const bool verify_referent_;
 };
 
+void Heap::PushOnAllocationStackWithInternalGC(Thread* self, mirror::Object** obj) {
+  // Slow path, the allocation stack push back must have already failed.
+  DCHECK(!allocation_stack_->AtomicPushBack(*obj));
+  do {
+    // TODO: Add handle VerifyObject.
+    StackHandleScope<1> hs(self);
+    HandleWrapper<mirror::Object> wrapper(hs.NewHandleWrapper(obj));
+    // Push our object into the reserve region of the allocaiton stack. This is only required due
+    // to heap verification requiring that roots are live (either in the live bitmap or in the
+    // allocation stack).
+    CHECK(allocation_stack_->AtomicPushBackIgnoreGrowthLimit(*obj));
+    CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
+  } while (!allocation_stack_->AtomicPushBack(*obj));
+}
+
+void Heap::PushOnThreadLocalAllocationStackWithInternalGC(Thread* self, mirror::Object** obj) {
+  // Slow path, the allocation stack push back must have already failed.
+  DCHECK(!self->PushOnThreadLocalAllocationStack(*obj));
+  mirror::Object** start_address;
+  mirror::Object** end_address;
+  while (!allocation_stack_->AtomicBumpBack(kThreadLocalAllocationStackSize, &start_address,
+                                            &end_address)) {
+    // TODO: Add handle VerifyObject.
+    StackHandleScope<1> hs(self);
+    HandleWrapper<mirror::Object> wrapper(hs.NewHandleWrapper(obj));
+    // Push our object into the reserve region of the allocaiton stack. This is only required due
+    // to heap verification requiring that roots are live (either in the live bitmap or in the
+    // allocation stack).
+    CHECK(allocation_stack_->AtomicPushBackIgnoreGrowthLimit(*obj));
+    // Push into the reserve allocation stack.
+    CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
+  }
+  self->SetThreadLocalAllocationStack(start_address, end_address);
+  // Retry on the new thread-local allocation stack.
+  CHECK(self->PushOnThreadLocalAllocationStack(*obj));  // Must succeed.
+}
+
 // Must do this with mutators suspended since we are directly accessing the allocation stacks.
 size_t Heap::VerifyHeapReferences(bool verify_referents) {
   Thread* self = Thread::Current();
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 887b17e..e11671b 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -698,6 +698,10 @@
   // Push an object onto the allocation stack.
   void PushOnAllocationStack(Thread* self, mirror::Object** obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void PushOnAllocationStackWithInternalGC(Thread* self, mirror::Object** obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void PushOnThreadLocalAllocationStackWithInternalGC(Thread* thread, mirror::Object** obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // What kind of concurrency behavior is the runtime after? Currently true for concurrent mark
   // sweep GC, false for other GC types.