Bin packing the zygote (best fit).

We now use bin packing to fill holes in the non movable space with
objects from the bump pointer space when we create the zygote.

Zygote space PSS reduction on AOSP: ~300k.
Zygote size on AOSP: 2121040 bytes -> 1597944 bytes.

Deleted some unreachable code.

Bug: 11902311

Change-Id: Idc957d69e93b3a941e0c298d47a21b73526dd286
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 0150609..ffa9154 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -306,62 +306,69 @@
   return false;
 }
 
+mirror::Object* SemiSpace::MarkNonForwardedObject(mirror::Object* obj) {
+  size_t object_size = obj->SizeOf();
+  size_t bytes_allocated = 0;
+  mirror::Object* forward_address = nullptr;
+  if (kEnableSimplePromo && reinterpret_cast<byte*>(obj) < last_gc_to_space_end_) {
+    // If it's allocated before the last GC (older), move (pseudo-promote) it to
+    // the non-moving space (as sort of an old generation).
+    size_t bytes_promoted;
+    space::MallocSpace* non_moving_space = GetHeap()->GetNonMovingSpace();
+    forward_address = non_moving_space->Alloc(self_, object_size, &bytes_promoted);
+    if (forward_address == nullptr) {
+      // If out of space, fall back to the to-space.
+      forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated);
+    } else {
+      GetHeap()->num_bytes_allocated_.FetchAndAdd(bytes_promoted);
+      bytes_promoted_ += bytes_promoted;
+      // Mark forward_address on the live bit map.
+      accounting::SpaceBitmap* live_bitmap = non_moving_space->GetLiveBitmap();
+      DCHECK(live_bitmap != nullptr);
+      DCHECK(!live_bitmap->Test(forward_address));
+      live_bitmap->Set(forward_address);
+      // Mark forward_address on the mark bit map.
+      accounting::SpaceBitmap* mark_bitmap = non_moving_space->GetMarkBitmap();
+      DCHECK(mark_bitmap != nullptr);
+      DCHECK(!mark_bitmap->Test(forward_address));
+      mark_bitmap->Set(forward_address);
+    }
+    DCHECK(forward_address != nullptr);
+  } else {
+    // If it's allocated after the last GC (younger), copy it to the to-space.
+    forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated);
+  }
+  // Copy over the object and add it to the mark stack since we still need to update its
+  // references.
+  memcpy(reinterpret_cast<void*>(forward_address), obj, object_size);
+  if (to_space_live_bitmap_ != nullptr) {
+    to_space_live_bitmap_->Set(forward_address);
+  }
+  return forward_address;
+}
+
 // Used to mark and copy objects. Any newly-marked objects who are in the from space get moved to
 // the to-space and have their forward address updated. Objects which have been newly marked are
 // pushed on the mark stack.
 Object* SemiSpace::MarkObject(Object* obj) {
-  Object* ret = obj;
+  Object* forward_address = obj;
   if (obj != nullptr && !IsImmune(obj)) {
     if (from_space_->HasAddress(obj)) {
-      mirror::Object* forward_address = GetForwardingAddressInFromSpace(obj);
+      forward_address = GetForwardingAddressInFromSpace(obj);
       // If the object has already been moved, return the new forward address.
       if (forward_address == nullptr) {
-        // Otherwise, we need to move the object and add it to the markstack for processing.
-        size_t object_size = obj->SizeOf();
-        size_t bytes_allocated = 0;
-        if (kEnableSimplePromo && reinterpret_cast<byte*>(obj) < last_gc_to_space_end_) {
-          // If it's allocated before the last GC (older), move (pseudo-promote) it to
-          // the non-moving space (as sort of an old generation.)
-          size_t bytes_promoted;
-          space::MallocSpace* non_moving_space = GetHeap()->GetNonMovingSpace();
-          forward_address = non_moving_space->Alloc(self_, object_size, &bytes_promoted);
-          if (forward_address == nullptr) {
-            // If out of space, fall back to the to-space.
-            forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated);
-          } else {
-            GetHeap()->num_bytes_allocated_.FetchAndAdd(bytes_promoted);
-            bytes_promoted_ += bytes_promoted;
-            // Mark forward_address on the live bit map.
-            accounting::SpaceBitmap* live_bitmap = non_moving_space->GetLiveBitmap();
-            DCHECK(live_bitmap != nullptr);
-            DCHECK(!live_bitmap->Test(forward_address));
-            live_bitmap->Set(forward_address);
-            // Mark forward_address on the mark bit map.
-            accounting::SpaceBitmap* mark_bitmap = non_moving_space->GetMarkBitmap();
-            DCHECK(mark_bitmap != nullptr);
-            DCHECK(!mark_bitmap->Test(forward_address));
-            mark_bitmap->Set(forward_address);
-          }
-          DCHECK(forward_address != nullptr);
-        } else {
-          // If it's allocated after the last GC (younger), copy it to the to-space.
-          forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated);
-        }
-        // Copy over the object and add it to the mark stack since we still need to update it's
-        // references.
-        memcpy(reinterpret_cast<void*>(forward_address), obj, object_size);
+        forward_address = MarkNonForwardedObject(obj);
+        DCHECK(forward_address != nullptr);
         // Make sure to only update the forwarding address AFTER you copy the object so that the
         // monitor word doesn't get stomped over.
-        obj->SetLockWord(LockWord::FromForwardingAddress(reinterpret_cast<size_t>(forward_address)));
-        if (to_space_live_bitmap_ != nullptr) {
-          to_space_live_bitmap_->Set(forward_address);
-        }
+        obj->SetLockWord(LockWord::FromForwardingAddress(
+            reinterpret_cast<size_t>(forward_address)));
+        // Push the object onto the mark stack for later processing.
         MarkStackPush(forward_address);
       } else {
         DCHECK(to_space_->HasAddress(forward_address) ||
                (kEnableSimplePromo && GetHeap()->GetNonMovingSpace()->HasAddress(forward_address)));
       }
-      ret = forward_address;
       // TODO: Do we need this if in the else statement?
     } else {
       accounting::SpaceBitmap* object_bitmap = heap_->GetMarkBitmap()->GetContinuousSpaceBitmap(obj);
@@ -379,7 +386,7 @@
       }
     }
   }
-  return ret;
+  return forward_address;
 }
 
 Object* SemiSpace::RecursiveMarkObjectCallback(Object* root, void* arg) {
@@ -431,43 +438,19 @@
   timings_.EndSplit();
 }
 
-struct SweepCallbackContext {
-  SemiSpace* mark_sweep;
-  space::AllocSpace* space;
-  Thread* self;
-};
-
-void SemiSpace::SweepCallback(size_t num_ptrs, Object** ptrs, void* arg) {
-  SweepCallbackContext* context = static_cast<SweepCallbackContext*>(arg);
-  SemiSpace* gc = context->mark_sweep;
-  Heap* heap = gc->GetHeap();
-  space::AllocSpace* space = context->space;
-  Thread* self = context->self;
-  Locks::heap_bitmap_lock_->AssertExclusiveHeld(self);
-  size_t freed_bytes = space->FreeList(self, num_ptrs, ptrs);
-  heap->RecordFree(num_ptrs, freed_bytes);
-  gc->freed_objects_.FetchAndAdd(num_ptrs);
-  gc->freed_bytes_.FetchAndAdd(freed_bytes);
-}
-
-void SemiSpace::ZygoteSweepCallback(size_t num_ptrs, Object** ptrs, void* arg) {
-  SweepCallbackContext* context = static_cast<SweepCallbackContext*>(arg);
-  Locks::heap_bitmap_lock_->AssertExclusiveHeld(context->self);
-  Heap* heap = context->mark_sweep->GetHeap();
-  // We don't free any actual memory to avoid dirtying the shared zygote pages.
-  for (size_t i = 0; i < num_ptrs; ++i) {
-    Object* obj = static_cast<Object*>(ptrs[i]);
-    heap->GetLiveBitmap()->Clear(obj);
-    heap->GetCardTable()->MarkCard(obj);
-  }
+bool SemiSpace::ShouldSweepSpace(space::MallocSpace* space) const {
+  return space != from_space_ && space != to_space_;
 }
 
 void SemiSpace::Sweep(bool swap_bitmaps) {
   DCHECK(mark_stack_->IsEmpty());
   TimingLogger::ScopedSplit("Sweep", &timings_);
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsMallocSpace() && space != from_space_ && space != to_space_) {
+    if (space->IsMallocSpace()) {
       space::MallocSpace* malloc_space = space->AsMallocSpace();
+      if (!ShouldSweepSpace(malloc_space)) {
+        continue;
+      }
       TimingLogger::ScopedSplit split(
           malloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", &timings_);
       size_t freed_objects = 0;
@@ -552,12 +535,9 @@
     // If the object is forwarded then it MUST be marked.
     DCHECK(forwarding_address == nullptr || to_space_->HasAddress(forwarding_address) ||
            (kEnableSimplePromo && GetHeap()->GetNonMovingSpace()->HasAddress(forwarding_address)));
-    if (forwarding_address != nullptr) {
-      return forwarding_address;
-    }
-    // Must not be marked, return nullptr;
-    return nullptr;
+    return forwarding_address;  // Returns either the forwarding address or nullptr.
   } else if (to_space_->HasAddress(obj)) {
+    // Should be unlikely.
     // Already forwarded, must be marked.
     return obj;
   }
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index b76ef5f..7bb7b19 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -54,6 +54,7 @@
   class BumpPointerSpace;
   class ContinuousMemMapAllocSpace;
   class ContinuousSpace;
+  class MallocSpace;
 }  // namespace space
 
 class Heap;
@@ -149,6 +150,9 @@
   static mirror::Object* RecursiveMarkObjectCallback(mirror::Object* root, void* arg)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
+  virtual mirror::Object* MarkNonForwardedObject(mirror::Object* obj)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
  protected:
   // Returns null if the object is not marked, otherwise returns the forwarding address (same as
   // object for non movable things).
@@ -162,16 +166,12 @@
   bool MarkLargeObject(const mirror::Object* obj)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static void SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
-  // Special sweep for zygote that just marks objects / dirties cards.
-  static void ZygoteSweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
   // Expand mark stack to 2x its current size.
   void ResizeMarkStack(size_t new_size);
 
+  // Returns true if we should sweep the space.
+  virtual bool ShouldSweepSpace(space::MallocSpace* space) const;
+
   // Returns how many threads we should use for the current GC phase based on if we are paused,
   // whether or not we care about pauses.
   size_t GetThreadCount(bool paused) const;
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 6e2bf91..a43c88c 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1275,6 +1275,92 @@
   reinterpret_cast<accounting::SpaceBitmap*>(arg)->Set(obj);
 }
 
+// Special compacting collector which uses sub-optimal bin packing to reduce zygote space size.
+class ZygoteCompactingCollector : public collector::SemiSpace {
+ public:
+  explicit ZygoteCompactingCollector(gc::Heap* heap) : SemiSpace(heap, "zygote collector") {
+  }
+
+  void BuildBins(space::ContinuousSpace* space) {
+    bin_live_bitmap_ = space->GetLiveBitmap();
+    bin_mark_bitmap_ = space->GetMarkBitmap();
+    BinContext context;
+    context.prev_ = reinterpret_cast<uintptr_t>(space->Begin());
+    context.collector_ = this;
+    WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    // Note: This requires traversing the space in increasing order of object addresses.
+    bin_live_bitmap_->Walk(Callback, reinterpret_cast<void*>(&context));
+    // Add the last bin which spans after the last object to the end of the space.
+    AddBin(reinterpret_cast<uintptr_t>(space->End()) - context.prev_, context.prev_);
+  }
+
+ private:
+  struct BinContext {
+    uintptr_t prev_;  // The end of the previous object.
+    ZygoteCompactingCollector* collector_;
+  };
+  // Maps from bin sizes to locations.
+  std::multimap<size_t, uintptr_t> bins_;
+  // Live bitmap of the space which contains the bins.
+  accounting::SpaceBitmap* bin_live_bitmap_;
+  // Mark bitmap of the space which contains the bins.
+  accounting::SpaceBitmap* bin_mark_bitmap_;
+
+  static void Callback(mirror::Object* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(arg != nullptr);
+    BinContext* context = reinterpret_cast<BinContext*>(arg);
+    ZygoteCompactingCollector* collector = context->collector_;
+    uintptr_t object_addr = reinterpret_cast<uintptr_t>(obj);
+    size_t bin_size = object_addr - context->prev_;
+    // Add the bin consisting of the end of the previous object to the start of the current object.
+    collector->AddBin(bin_size, context->prev_);
+    context->prev_ = object_addr + RoundUp(obj->SizeOf(), kObjectAlignment);
+  }
+
+  void AddBin(size_t size, uintptr_t position) {
+    if (size != 0) {
+      bins_.insert(std::make_pair(size, position));
+    }
+  }
+
+  virtual bool ShouldSweepSpace(space::MallocSpace* space) const {
+    // Don't sweep any spaces since we probably blasted the internal accounting of the free list
+    // allocator.
+    return false;
+  }
+
+  virtual mirror::Object* MarkNonForwardedObject(mirror::Object* obj)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
+    size_t object_size = RoundUp(obj->SizeOf(), kObjectAlignment);
+    mirror::Object* forward_address = nullptr;
+    // Find the smallest bin which we can move obj in.
+    auto it = bins_.lower_bound(object_size);
+    if (it == bins_.end()) {
+      // No available space in the bins, place it in the target space instead (grows the zygote
+      // space).
+      size_t bytes_allocated = 0;
+      forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated);
+      if (to_space_live_bitmap_ != nullptr) {
+        to_space_live_bitmap_->Set(forward_address);
+      }
+    } else {
+      size_t size = it->first;
+      uintptr_t pos = it->second;
+      bins_.erase(it);  // Erase the old bin which we replace with the new smaller bin.
+      forward_address = reinterpret_cast<mirror::Object*>(pos);
+      // Set the live and mark bits so that sweeping system weaks works properly.
+      bin_live_bitmap_->Set(forward_address);
+      bin_mark_bitmap_->Set(forward_address);
+      DCHECK_GE(size, object_size);
+      AddBin(size - object_size, pos + object_size);  // Add a new bin with the remaining space.
+    }
+    // Copy the object over to its new location.
+    memcpy(reinterpret_cast<void*>(forward_address), obj, object_size);
+    return forward_address;
+  }
+};
+
 void Heap::PreZygoteFork() {
   static Mutex zygote_creation_lock_("zygote creation lock", kZygoteCreationLock);
   Thread* self = Thread::Current();
@@ -1292,12 +1378,16 @@
   ChangeCollector(post_zygote_collector_type_);
   // TODO: Delete bump_pointer_space_ and temp_pointer_space_?
   if (semi_space_collector_ != nullptr) {
+    ZygoteCompactingCollector zygote_collector(this);
+    zygote_collector.BuildBins(non_moving_space_);
     // Create a new bump pointer space which we will compact into.
     space::BumpPointerSpace target_space("zygote bump space", non_moving_space_->End(),
                                          non_moving_space_->Limit());
     // Compact the bump pointer space to a new zygote bump pointer space.
     temp_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
-    Compact(&target_space, bump_pointer_space_);
+    zygote_collector.SetFromSpace(bump_pointer_space_);
+    zygote_collector.SetToSpace(&target_space);
+    zygote_collector.Run(false);
     CHECK(temp_space_->IsEmpty());
     total_objects_freed_ever_ += semi_space_collector_->GetFreedObjects();
     total_bytes_freed_ever_ += semi_space_collector_->GetFreedBytes();
@@ -1306,7 +1396,7 @@
     non_moving_space_->SetLimit(target_space.Limit());
     accounting::SpaceBitmap* bitmap = non_moving_space_->GetLiveBitmap();
     // Record the allocations in the bitmap.
-    VLOG(heap) << "Recording zygote allocations";
+    VLOG(heap) << "Zygote size " << non_moving_space_->Size() << " bytes";
     target_space.Walk(MarkInBitmapCallback, bitmap);
   }
   // Turn the current alloc space into a zygote space and obtain the new alloc space composed of
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
index 31d878c..2b2b26e 100644
--- a/runtime/gc/space/malloc_space.cc
+++ b/runtime/gc/space/malloc_space.cc
@@ -187,8 +187,8 @@
   DCHECK(IsAligned<kPageSize>(begin_));
   DCHECK(IsAligned<kPageSize>(end_));
   size_t size = RoundUp(Size(), kPageSize);
-  // Trim the heap so that we minimize the size of the Zygote space.
-  Trim();
+  // Trimming the heap should be done by the caller since we may have invalidated the accounting
+  // stored in between objects.
   // Remaining size is for the new alloc space.
   const size_t growth_limit = growth_limit_ - size;
   const size_t capacity = Capacity() - size;