Fix an array copy benchmark regression.

Add different page release modes to rosalloc.

Bug: 12064551
Change-Id: Ib837bbd1a2757741a4e2743e0a1272bf46a30252
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 469b098..8ae61a3 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -37,12 +37,16 @@
 size_t RosAlloc::threadLocalFreeBitMapOffsets[kNumOfSizeBrackets];
 bool RosAlloc::initialized_ = false;
 
-RosAlloc::RosAlloc(void* base, size_t capacity)
+RosAlloc::RosAlloc(void* base, size_t capacity,
+                   PageReleaseMode page_release_mode, size_t page_release_size_threshold)
     : base_(reinterpret_cast<byte*>(base)), footprint_(capacity),
       capacity_(capacity),
       lock_("rosalloc global lock", kRosAllocGlobalLock),
-      bulk_free_lock_("rosalloc bulk free lock", kRosAllocBulkFreeLock) {
+      bulk_free_lock_("rosalloc bulk free lock", kRosAllocBulkFreeLock),
+      page_release_mode_(page_release_mode),
+      page_release_size_threshold_(page_release_size_threshold) {
   DCHECK(RoundUp(capacity, kPageSize) == capacity);
+  CHECK(IsAligned<kPageSize>(page_release_size_threshold_));
   if (!initialized_) {
     Initialize();
   }
@@ -65,7 +69,9 @@
   }
   free_pages->SetByteSize(this, capacity_);
   DCHECK_EQ(capacity_ % kPageSize, static_cast<size_t>(0));
+  DCHECK(free_pages->IsFree());
   free_pages->ReleasePages(this);
+  DCHECK(free_pages->IsFree());
   free_page_runs_.insert(free_pages);
   if (kTraceRosAlloc) {
     LOG(INFO) << "RosAlloc::RosAlloc() : Inserted run 0x" << std::hex
@@ -387,7 +393,9 @@
   // Insert it.
   DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
   DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
+  DCHECK(fpr->IsFree());
   fpr->ReleasePages(this);
+  DCHECK(fpr->IsFree());
   free_page_runs_.insert(fpr);
   DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
   if (kTraceRosAlloc) {
@@ -404,20 +412,26 @@
     MutexLock mu(self, lock_);
     r = AllocPages(self, num_pages, kPageMapLargeObject);
   }
+  if (UNLIKELY(r == nullptr)) {
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::AllocLargeObject() : NULL";
+    }
+    return nullptr;
+  }
   if (bytes_allocated != NULL) {
     *bytes_allocated = num_pages * kPageSize;
   }
   if (kTraceRosAlloc) {
-    if (r != NULL) {
-      LOG(INFO) << "RosAlloc::AllocLargeObject() : 0x" << std::hex << reinterpret_cast<intptr_t>(r)
-                << "-0x" << (reinterpret_cast<intptr_t>(r) + num_pages * kPageSize)
-                << "(" << std::dec << (num_pages * kPageSize) << ")";
-    } else {
-      LOG(INFO) << "RosAlloc::AllocLargeObject() : NULL";
-    }
+    LOG(INFO) << "RosAlloc::AllocLargeObject() : 0x" << std::hex << reinterpret_cast<intptr_t>(r)
+              << "-0x" << (reinterpret_cast<intptr_t>(r) + num_pages * kPageSize)
+              << "(" << std::dec << (num_pages * kPageSize) << ")";
+  }
+  if (!DoesReleaseAllPages()) {
+    // If it does not release all pages, pages may not be zeroed out.
+    memset(r, 0, size);
   }
   // Check if the returned memory is really all zero.
-  if (kCheckZeroMemory && r != NULL) {
+  if (kCheckZeroMemory) {
     byte* bytes = reinterpret_cast<byte*>(r);
     for (size_t i = 0; i < size; ++i) {
       DCHECK_EQ(bytes[i], 0);
@@ -1366,7 +1380,12 @@
         size_t fpr_size = fpr->ByteSize(this);
         DCHECK(IsAligned<kPageSize>(fpr_size));
         void* start = fpr;
-        void* end = reinterpret_cast<byte*>(start) + fpr_size;
+        if (kIsDebugBuild) {
+          // In the debug build, the first page of a free page run
+          // contains a magic number for debugging. Exclude it.
+          start = reinterpret_cast<byte*>(fpr) + kPageSize;
+        }
+        void* end = reinterpret_cast<byte*>(fpr) + fpr_size;
         handler(start, end, 0, arg);
         size_t num_pages = fpr_size / kPageSize;
         if (kIsDebugBuild) {
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index d5b6de1..4eb13315 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -91,18 +91,50 @@
       byte* end = fpr_base + ByteSize(rosalloc);
       return end;
     }
+    bool IsLargerThanPageReleaseThreshold(RosAlloc* rosalloc)
+        EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+      return ByteSize(rosalloc) >= rosalloc->page_release_size_threshold_;
+    }
+    bool IsAtEndOfSpace(RosAlloc* rosalloc)
+        EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+      return reinterpret_cast<byte*>(this) + ByteSize(rosalloc) == rosalloc->base_ + rosalloc->footprint_;
+    }
+    bool ShouldReleasePages(RosAlloc* rosalloc) EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+      switch (rosalloc->page_release_mode_) {
+        case kPageReleaseModeNone:
+          return false;
+        case kPageReleaseModeEnd:
+          return IsAtEndOfSpace(rosalloc);
+        case kPageReleaseModeSize:
+          return IsLargerThanPageReleaseThreshold(rosalloc);
+        case kPageReleaseModeSizeAndEnd:
+          return IsLargerThanPageReleaseThreshold(rosalloc) && IsAtEndOfSpace(rosalloc);
+        case kPageReleaseModeAll:
+          return true;
+        default:
+          LOG(FATAL) << "Unexpected page release mode ";
+          return false;
+      }
+    }
     void ReleasePages(RosAlloc* rosalloc) EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+      byte* start = reinterpret_cast<byte*>(this);
       size_t byte_size = ByteSize(rosalloc);
       DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
+      bool release_pages = ShouldReleasePages(rosalloc);
       if (kIsDebugBuild) {
         // Exclude the first page that stores the magic number.
         DCHECK_GE(byte_size, static_cast<size_t>(kPageSize));
+        start += kPageSize;
         byte_size -= kPageSize;
         if (byte_size > 0) {
-          madvise(reinterpret_cast<byte*>(this) + kPageSize, byte_size, MADV_DONTNEED);
+          if (release_pages) {
+            madvise(start, byte_size, MADV_DONTNEED);
+          }
         }
       } else {
-        madvise(this, byte_size, MADV_DONTNEED);
+        if (release_pages) {
+          madvise(start, byte_size, MADV_DONTNEED);
+        }
       }
     }
   };
@@ -363,6 +395,21 @@
     }
   };
 
+ public:
+  // Different page release modes.
+  enum PageReleaseMode {
+    kPageReleaseModeNone,         // Release no empty pages.
+    kPageReleaseModeEnd,          // Release empty pages at the end of the space.
+    kPageReleaseModeSize,         // Release empty pages that are larger than the threshold.
+    kPageReleaseModeSizeAndEnd,   // Release empty pages that are larger than the threshold or
+                                  // at the end of the space.
+    kPageReleaseModeAll,          // Release all empty pages.
+  };
+
+  // The default value for page_release_size_threshold_.
+  static constexpr size_t kDefaultPageReleaseSizeThreshold = 4 * MB;
+
+ private:
   // The base address of the memory region that's managed by this allocator.
   byte* base_;
 
@@ -412,6 +459,12 @@
   // allowing multiple individual frees at the same time.
   ReaderWriterMutex bulk_free_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
 
+  // The page release mode.
+  const PageReleaseMode page_release_mode_;
+  // Under kPageReleaseModeSize(AndEnd), if the free page run size is
+  // greater than or equal to this value, release pages.
+  const size_t page_release_size_threshold_;
+
   // The base address of the memory region that's managed by this allocator.
   byte* Begin() { return base_; }
   // The end address of the memory region that's managed by this allocator.
@@ -439,7 +492,9 @@
   void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
 
  public:
-  RosAlloc(void* base, size_t capacity);
+  RosAlloc(void* base, size_t capacity,
+           PageReleaseMode page_release_mode,
+           size_t page_release_size_threshold = kDefaultPageReleaseSizeThreshold);
   void* Alloc(Thread* self, size_t size, size_t* bytes_allocated)
       LOCKS_EXCLUDED(lock_);
   void Free(Thread* self, void* ptr)
@@ -480,6 +535,10 @@
   // allocated and objects allocated, respectively.
   static void BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
   static void ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
+
+  bool DoesReleaseAllPages() const {
+    return page_release_mode_ == kPageReleaseModeAll;
+  }
 };
 
 }  // namespace allocator
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 76a8e79..61c66e7 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -180,7 +180,7 @@
                                                      requested_alloc_space_begin);
   } else {
     non_moving_space_ = space::RosAllocSpace::Create(name, initial_size, growth_limit, capacity,
-                                                     requested_alloc_space_begin);
+                                                     requested_alloc_space_begin, low_memory_mode_);
   }
   if (kMovingCollector) {
     // TODO: Place bump-pointer spaces somewhere to minimize size of card table.
@@ -1151,7 +1151,7 @@
   // Turn the current alloc space into a zygote space and obtain the new alloc space composed of
   // the remaining available heap memory.
   space::MallocSpace* zygote_space = non_moving_space_;
-  non_moving_space_ = zygote_space->CreateZygoteSpace("alloc space");
+  non_moving_space_ = zygote_space->CreateZygoteSpace("alloc space", low_memory_mode_);
   non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
   // Change the GC retention policy of the zygote space to only collect when full.
   zygote_space->SetGcRetentionPolicy(space::kGcRetentionPolicyFullCollect);
diff --git a/runtime/gc/space/dlmalloc_space.h b/runtime/gc/space/dlmalloc_space.h
index d18d4ad..c529314 100644
--- a/runtime/gc/space/dlmalloc_space.h
+++ b/runtime/gc/space/dlmalloc_space.h
@@ -117,7 +117,7 @@
   mirror::Object* AllocWithoutGrowthLocked(Thread* self, size_t num_bytes, size_t* bytes_allocated)
       EXCLUSIVE_LOCKS_REQUIRED(lock_);
 
-  void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size) {
+  void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size, bool /*low_memory_mode*/) {
     return CreateMspace(base, morecore_start, initial_size);
   }
   static void* CreateMspace(void* base, size_t morecore_start, size_t initial_size);
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
index 785b5ed..46df0a1 100644
--- a/runtime/gc/space/malloc_space.cc
+++ b/runtime/gc/space/malloc_space.cc
@@ -176,7 +176,7 @@
   DCHECK(temp_bitmap_.get() == NULL);
 }
 
-MallocSpace* MallocSpace::CreateZygoteSpace(const char* alloc_space_name) {
+MallocSpace* MallocSpace::CreateZygoteSpace(const char* alloc_space_name, bool low_memory_mode) {
   // For RosAlloc, revoke thread local runs before creating a new
   // alloc space so that we won't mix thread local runs from different
   // alloc spaces.
@@ -213,7 +213,7 @@
   UniquePtr<MemMap> mem_map(GetMemMap()->RemapAtEnd(end_, alloc_space_name,
                                                     PROT_READ | PROT_WRITE, &error_msg));
   CHECK(mem_map.get() != nullptr) << error_msg;
-  void* allocator = CreateAllocator(end_, starting_size, initial_size);
+  void* allocator = CreateAllocator(end_, starting_size, initial_size, low_memory_mode);
   // Protect memory beyond the initial size.
   byte* end = mem_map->Begin() + starting_size;
   if (capacity - initial_size > 0) {
diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h
index 0f882d3..4c8b05f 100644
--- a/runtime/gc/space/malloc_space.h
+++ b/runtime/gc/space/malloc_space.h
@@ -127,8 +127,11 @@
   virtual MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
                                       byte* begin, byte* end, byte* limit, size_t growth_limit) = 0;
 
-  // Turn ourself into a zygote space and return a new alloc space which has our unused memory.
-  MallocSpace* CreateZygoteSpace(const char* alloc_space_name);
+  // Turn ourself into a zygote space and return a new alloc space
+  // which has our unused memory.  When true, the low memory mode
+  // argument specifies that the heap wishes the created space to be
+  // more aggressive in releasing unused pages.
+  MallocSpace* CreateZygoteSpace(const char* alloc_space_name, bool low_memory_mode);
 
   virtual uint64_t GetBytesAllocated() = 0;
   virtual uint64_t GetObjectsAllocated() = 0;
@@ -154,7 +157,11 @@
   static MemMap* CreateMemMap(const std::string& name, size_t starting_size, size_t* initial_size,
                               size_t* growth_limit, size_t* capacity, byte* requested_begin);
 
-  virtual void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size) = 0;
+  // When true the low memory mode argument specifies that the heap
+  // wishes the created allocator to be more aggressive in releasing
+  // unused pages.
+  virtual void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size,
+                                bool low_memory_mode) = 0;
 
   void RegisterRecentFree(mirror::Object* ptr) EXCLUSIVE_LOCKS_REQUIRED(lock_);
 
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index 1f8e324..0438f8d 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -45,7 +45,7 @@
 }
 
 RosAllocSpace* RosAllocSpace::Create(const std::string& name, size_t initial_size, size_t growth_limit,
-                                     size_t capacity, byte* requested_begin) {
+                                     size_t capacity, byte* requested_begin, bool low_memory_mode) {
   uint64_t start_time = 0;
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     start_time = NanoTime();
@@ -68,7 +68,8 @@
                << PrettySize(capacity);
     return NULL;
   }
-  allocator::RosAlloc* rosalloc = CreateRosAlloc(mem_map->Begin(), starting_size, initial_size);
+  allocator::RosAlloc* rosalloc = CreateRosAlloc(mem_map->Begin(), starting_size, initial_size,
+                                                 low_memory_mode);
   if (rosalloc == NULL) {
     LOG(ERROR) << "Failed to initialize rosalloc for alloc space (" << name << ")";
     return NULL;
@@ -97,13 +98,18 @@
   return space;
 }
 
-allocator::RosAlloc* RosAllocSpace::CreateRosAlloc(void* begin, size_t morecore_start, size_t initial_size) {
+allocator::RosAlloc* RosAllocSpace::CreateRosAlloc(void* begin, size_t morecore_start, size_t initial_size,
+                                                   bool low_memory_mode) {
   // clear errno to allow PLOG on error
   errno = 0;
   // create rosalloc using our backing storage starting at begin and
   // with a footprint of morecore_start. When morecore_start bytes of
   // memory is exhaused morecore will be called.
-  allocator::RosAlloc* rosalloc = new art::gc::allocator::RosAlloc(begin, morecore_start);
+  allocator::RosAlloc* rosalloc = new art::gc::allocator::RosAlloc(
+      begin, morecore_start,
+      low_memory_mode ?
+          art::gc::allocator::RosAlloc::kPageReleaseModeAll :
+          art::gc::allocator::RosAlloc::kPageReleaseModeSizeAndEnd);
   if (rosalloc != NULL) {
     rosalloc->SetFootprintLimit(initial_size);
   } else {
@@ -216,10 +222,18 @@
 }
 
 size_t RosAllocSpace::Trim() {
-  MutexLock mu(Thread::Current(), lock_);
-  // Trim to release memory at the end of the space.
-  rosalloc_->Trim();
-  // No inspect_all necessary here as trimming of pages is built-in.
+  {
+    MutexLock mu(Thread::Current(), lock_);
+    // Trim to release memory at the end of the space.
+    rosalloc_->Trim();
+  }
+  // Attempt to release pages if it does not release all empty pages.
+  if (!rosalloc_->DoesReleaseAllPages()) {
+    VLOG(heap) << "RosAllocSpace::Trim() ";
+    size_t reclaimed = 0;
+    InspectAllRosAlloc(DlmallocMadviseCallback, &reclaimed);
+    return reclaimed;
+  }
   return 0;
 }
 
diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h
index 6311580..5b7616c 100644
--- a/runtime/gc/space/rosalloc_space.h
+++ b/runtime/gc/space/rosalloc_space.h
@@ -38,7 +38,7 @@
   // the caller should call Begin on the returned space to confirm the
   // request was granted.
   static RosAllocSpace* Create(const std::string& name, size_t initial_size, size_t growth_limit,
-                               size_t capacity, byte* requested_begin);
+                               size_t capacity, byte* requested_begin, bool low_memory_mode);
 
   virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
                                           size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
@@ -115,11 +115,11 @@
   size_t InternalAllocationSize(const mirror::Object* obj);
   mirror::Object* AllocWithoutGrowthLocked(Thread* self, size_t num_bytes, size_t* bytes_allocated);
 
-  void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size) {
-    return CreateRosAlloc(base, morecore_start, initial_size);
+  void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size, bool low_memory_mode) {
+    return CreateRosAlloc(base, morecore_start, initial_size, low_memory_mode);
   }
-  static allocator::RosAlloc* CreateRosAlloc(void* base, size_t morecore_start, size_t initial_size);
-
+  static allocator::RosAlloc* CreateRosAlloc(void* base, size_t morecore_start, size_t initial_size,
+                                             bool low_memory_mode);
 
   void InspectAllRosAlloc(void (*callback)(void *start, void *end, size_t num_bytes, void* callback_arg),
                           void* arg)
diff --git a/runtime/gc/space/space_test.cc b/runtime/gc/space/space_test.cc
index 60c3b1c..b1be9d8 100644
--- a/runtime/gc/space/space_test.cc
+++ b/runtime/gc/space/space_test.cc
@@ -58,7 +58,8 @@
   }
   static MallocSpace* CreateRosAllocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
                                           size_t capacity, byte* requested_begin) {
-    return RosAllocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin);
+    return RosAllocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin,
+                                 Runtime::Current()->GetHeap()->IsLowMemoryMode());
   }
 
   typedef MallocSpace* (*CreateSpaceFn)(const std::string& name, size_t initial_size, size_t growth_limit,
@@ -178,7 +179,7 @@
 
   // Make sure that the zygote space isn't directly at the start of the space.
   space->Alloc(self, 1U * MB, &dummy);
-  space = space->CreateZygoteSpace("alloc space");
+  space = space->CreateZygoteSpace("alloc space", Runtime::Current()->GetHeap()->IsLowMemoryMode());
 
   // Make space findable to the heap, will also delete space when runtime is cleaned up
   AddSpace(space);