Add notion of released vs empty pages to ROSAlloc.

A notion of released vs empty pages helps get a more accurate view of
how much memory was released during heap trimming. Otherwise we get
that the same pages possibly get madvised multiple times without
getting dirtied.

Also enabled heap trimming of rosalloc spaces even when we care about
jank. This is safe to do since the trimming process only acquires
locks for short periods of time.

Dalvik PSS reduces from ~52M to ~50M after boot on N4.

Bug: 9969166

Change-Id: I4012e0a2554f413d18efe1a0371fe18d1edabaa9
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 09fb97a..722576f 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -159,7 +159,7 @@
     if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
       // There is a free page run at the end.
       DCHECK(last_free_page_run->IsFree());
-      DCHECK_EQ(page_map_[ToPageMapIndex(last_free_page_run)], kPageMapEmpty);
+      DCHECK(IsFreePage(ToPageMapIndex(last_free_page_run)));
       last_free_page_run_size = last_free_page_run->ByteSize(this);
     } else {
       // There is no free page run at the end.
@@ -248,7 +248,7 @@
     // Update the page map.
     size_t page_map_idx = ToPageMapIndex(res);
     for (size_t i = 0; i < num_pages; i++) {
-      DCHECK_EQ(page_map_[page_map_idx + i], kPageMapEmpty);
+      DCHECK(IsFreePage(page_map_idx + i));
     }
     switch (page_map_type) {
     case kPageMapRun:
@@ -301,8 +301,7 @@
     pm_part_type = kPageMapLargeObjectPart;
     break;
   default:
-    pm_part_type = kPageMapEmpty;
-    LOG(FATAL) << "Unreachable - RosAlloc::FreePages() : " << "pm_idx=" << pm_idx << ", pm_type="
+    LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << " : " << "pm_idx=" << pm_idx << ", pm_type="
                << static_cast<int>(pm_type) << ", ptr=" << std::hex
                << reinterpret_cast<intptr_t>(ptr);
     return 0;
@@ -330,7 +329,7 @@
   }
 
   if (kTraceRosAlloc) {
-    LOG(INFO) << "RosAlloc::FreePages() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
+    LOG(INFO) << __PRETTY_FUNCTION__ << " : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
               << "-0x" << (reinterpret_cast<intptr_t>(ptr) + byte_size)
               << "(" << std::dec << (num_pages * kPageSize) << ")";
   }
@@ -347,7 +346,7 @@
   if (!free_page_runs_.empty()) {
     // Try to coalesce in the higher address direction.
     if (kTraceRosAlloc) {
-      LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce a free page run 0x"
+      LOG(INFO) << __PRETTY_FUNCTION__ << "RosAlloc::FreePages() : trying to coalesce a free page run 0x"
                 << std::hex << reinterpret_cast<uintptr_t>(fpr) << " [" << std::dec << pm_idx << "] -0x"
                 << std::hex << reinterpret_cast<uintptr_t>(fpr->End(this)) << " [" << std::dec
                 << (fpr->End(this) == End() ? page_map_size_ : ToPageMapIndex(fpr->End(this))) << "]";
@@ -497,27 +496,27 @@
                 << ", page_map_entry=" << static_cast<int>(page_map_entry);
     }
     switch (page_map_[pm_idx]) {
-      case kPageMapEmpty:
-        LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
-        return 0;
       case kPageMapLargeObject:
         return FreePages(self, ptr, false);
       case kPageMapLargeObjectPart:
         LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
         return 0;
-      case kPageMapRun:
       case kPageMapRunPart: {
-        size_t pi = pm_idx;
-        DCHECK(page_map_[pi] == kPageMapRun || page_map_[pi] == kPageMapRunPart);
         // Find the beginning of the run.
-        while (page_map_[pi] != kPageMapRun) {
-          pi--;
-          DCHECK_LT(pi, capacity_ / kPageSize);
-        }
-        DCHECK_EQ(page_map_[pi], kPageMapRun);
-        run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
+        do {
+          --pm_idx;
+          DCHECK_LT(pm_idx, capacity_ / kPageSize);
+        } while (page_map_[pm_idx] != kPageMapRun);
+        // Fall-through.
+      case kPageMapRun:
+        run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
         DCHECK_EQ(run->magic_num_, kMagicNum);
         break;
+      case kPageMapReleased:
+        // Fall-through.
+      case kPageMapEmpty:
+        LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+        return 0;
       }
       default:
         LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
@@ -594,7 +593,8 @@
     if (kIsDebugBuild && current_run != dedicated_full_run_) {
       full_runs_[idx].insert(current_run);
       if (kTraceRosAlloc) {
-        LOG(INFO) << __FUNCTION__ << " : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(current_run)
+        LOG(INFO) << __PRETTY_FUNCTION__ << " : Inserted run 0x" << std::hex
+                  << reinterpret_cast<intptr_t>(current_run)
                   << " into full_runs_[" << std::dec << idx << "]";
       }
       DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
@@ -1358,6 +1358,8 @@
   for (size_t i = 0; i < end; ++i) {
     byte pm = page_map_[i];
     switch (pm) {
+      case kPageMapReleased:
+        // Fall-through.
       case kPageMapEmpty: {
         FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
         if (free_page_runs_.find(fpr) != free_page_runs_.end()) {
@@ -1370,8 +1372,8 @@
           curr_fpr_size = fpr->ByteSize(this);
           DCHECK_EQ(curr_fpr_size % kPageSize, static_cast<size_t>(0));
           remaining_curr_fpr_size = curr_fpr_size - kPageSize;
-          stream << "[" << i << "]=Empty (FPR start)"
-                 << " fpr_size=" << curr_fpr_size
+          stream << "[" << i << "]=" << (pm == kPageMapReleased ? "Released" : "Empty")
+                 << " (FPR start) fpr_size=" << curr_fpr_size
                  << " remaining_fpr_size=" << remaining_curr_fpr_size << std::endl;
           if (remaining_curr_fpr_size == 0) {
             // Reset at the end of the current free page run.
@@ -1441,43 +1443,46 @@
   size_t pm_idx = RoundDownToPageMapIndex(ptr);
   MutexLock mu(Thread::Current(), lock_);
   switch (page_map_[pm_idx]) {
-  case kPageMapEmpty:
-    LOG(FATAL) << "Unreachable - RosAlloc::UsableSize(): pm_idx=" << pm_idx << ", ptr=" << std::hex
-               << reinterpret_cast<intptr_t>(ptr);
-    break;
-  case kPageMapLargeObject: {
-    size_t num_pages = 1;
-    size_t idx = pm_idx + 1;
-    size_t end = page_map_size_;
-    while (idx < end && page_map_[idx] == kPageMapLargeObjectPart) {
-      num_pages++;
-      idx++;
+    case kPageMapReleased:
+      // Fall-through.
+    case kPageMapEmpty:
+      LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << ": pm_idx=" << pm_idx << ", ptr="
+                 << std::hex << reinterpret_cast<intptr_t>(ptr);
+      break;
+    case kPageMapLargeObject: {
+      size_t num_pages = 1;
+      size_t idx = pm_idx + 1;
+      size_t end = page_map_size_;
+      while (idx < end && page_map_[idx] == kPageMapLargeObjectPart) {
+        num_pages++;
+        idx++;
+      }
+      return num_pages * kPageSize;
     }
-    return num_pages * kPageSize;
-  }
-  case kPageMapLargeObjectPart:
-    LOG(FATAL) << "Unreachable - RosAlloc::UsableSize(): pm_idx=" << pm_idx << ", ptr=" << std::hex
-               << reinterpret_cast<intptr_t>(ptr);
-    break;
-  case kPageMapRun:
-  case kPageMapRunPart: {
-    // Find the beginning of the run.
-    while (page_map_[pm_idx] != kPageMapRun) {
-      pm_idx--;
-      DCHECK_LT(pm_idx, capacity_ / kPageSize);
+    case kPageMapLargeObjectPart:
+      LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << ": pm_idx=" << pm_idx << ", ptr="
+                 << std::hex << reinterpret_cast<intptr_t>(ptr);
+      break;
+    case kPageMapRun:
+    case kPageMapRunPart: {
+      // Find the beginning of the run.
+      while (page_map_[pm_idx] != kPageMapRun) {
+        pm_idx--;
+        DCHECK_LT(pm_idx, capacity_ / kPageSize);
+      }
+      DCHECK_EQ(page_map_[pm_idx], kPageMapRun);
+      Run* run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
+      DCHECK_EQ(run->magic_num_, kMagicNum);
+      size_t idx = run->size_bracket_idx_;
+      size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
+          - (reinterpret_cast<byte*>(run) + headerSizes[idx]);
+      DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
+      return IndexToBracketSize(idx);
     }
-    DCHECK_EQ(page_map_[pm_idx], kPageMapRun);
-    Run* run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
-    DCHECK_EQ(run->magic_num_, kMagicNum);
-    size_t idx = run->size_bracket_idx_;
-    size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
-        - (reinterpret_cast<byte*>(run) + headerSizes[idx]);
-    DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
-    return IndexToBracketSize(idx);
-  }
-  default:
-    LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
-    break;
+    default: {
+      LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+      break;
+    }
   }
   return 0;
 }
@@ -1490,7 +1495,7 @@
   if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
     // Remove the last free page run, if any.
     DCHECK(last_free_page_run->IsFree());
-    DCHECK_EQ(page_map_[ToPageMapIndex(last_free_page_run)], kPageMapEmpty);
+    DCHECK(IsFreePage(ToPageMapIndex(last_free_page_run)));
     DCHECK_EQ(last_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
     DCHECK_EQ(last_free_page_run->End(this), base_ + footprint_);
     free_page_runs_.erase(last_free_page_run);
@@ -1500,7 +1505,7 @@
     size_t new_num_of_pages = new_footprint / kPageSize;
     DCHECK_GE(page_map_size_, new_num_of_pages);
     // Zero out the tail of the page map.
-    byte* zero_begin = page_map_ + new_num_of_pages;
+    byte* zero_begin = const_cast<byte*>(page_map_) + new_num_of_pages;
     byte* madvise_begin = AlignUp(zero_begin, kPageSize);
     DCHECK_LE(madvise_begin, page_map_mem_map_->End());
     size_t madvise_size = page_map_mem_map_->End() - madvise_begin;
@@ -1543,6 +1548,8 @@
   while (i < pm_end) {
     byte pm = page_map_[i];
     switch (pm) {
+      case kPageMapReleased:
+        // Fall-through.
       case kPageMapEmpty: {
         // The start of a free page run.
         FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
@@ -1560,7 +1567,7 @@
         size_t num_pages = fpr_size / kPageSize;
         if (kIsDebugBuild) {
           for (size_t j = i + 1; j < i + num_pages; ++j) {
-            DCHECK_EQ(page_map_[j], kPageMapEmpty);
+            DCHECK(IsFreePage(j));
           }
         }
         i += fpr_size / kPageSize;
@@ -1672,7 +1679,7 @@
       full_runs_[idx].insert(run);
       DCHECK(full_runs_[idx].find(run) != full_runs_[idx].end());
       if (kTraceRosAlloc) {
-        LOG(INFO) << __FUNCTION__  << " : Inserted run 0x" << std::hex
+        LOG(INFO) << __PRETTY_FUNCTION__  << " : Inserted run 0x" << std::hex
                   << reinterpret_cast<intptr_t>(run)
                   << " into full_runs_[" << std::dec << idx << "]";
       }
@@ -1685,7 +1692,7 @@
     non_full_runs_[idx].insert(run);
     DCHECK(non_full_runs_[idx].find(run) != non_full_runs_[idx].end());
     if (kTraceRosAlloc) {
-      LOG(INFO) << __FUNCTION__ << " : Inserted run 0x" << std::hex
+      LOG(INFO) << __PRETTY_FUNCTION__ << " : Inserted run 0x" << std::hex
                 << reinterpret_cast<intptr_t>(run)
                 << " into non_full_runs_[" << std::dec << idx << "]";
     }
@@ -1865,7 +1872,7 @@
 void RosAlloc::Verify() {
   Thread* self = Thread::Current();
   CHECK(Locks::mutator_lock_->IsExclusiveHeld(self))
-      << "The mutator locks isn't exclusively locked at RosAlloc::Verify()";
+      << "The mutator locks isn't exclusively locked at " << __PRETTY_FUNCTION__;
   MutexLock mu(self, *Locks::thread_list_lock_);
   ReaderMutexLock wmu(self, bulk_free_lock_);
   std::vector<Run*> runs;
@@ -1876,6 +1883,8 @@
     while (i < pm_end) {
       byte pm = page_map_[i];
       switch (pm) {
+        case kPageMapReleased:
+          // Fall-through.
         case kPageMapEmpty: {
           // The start of a free page run.
           FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
@@ -1889,7 +1898,7 @@
           CHECK_GT(num_pages, static_cast<uintptr_t>(0))
               << "A free page run size must be > 0 : " << fpr_size;
           for (size_t j = i + 1; j < i + num_pages; ++j) {
-            CHECK_EQ(page_map_[j], kPageMapEmpty)
+            CHECK(IsFreePage(j))
                 << "A mismatch between the page map table for kPageMapEmpty "
                 << " at page index " << j
                 << " and the free page run size : page index range : "
@@ -2097,48 +2106,36 @@
   Thread* self = Thread::Current();
   size_t reclaimed_bytes = 0;
   size_t i = 0;
-  while (true) {
-    MutexLock mu(self, lock_);
-    // Check the page map size which might have changed due to grow/shrink.
-    size_t pm_end = page_map_size_;
-    if (i >= pm_end) {
-      // Reached the end.
-      break;
-    }
+  // Check the page map size which might have changed due to grow/shrink.
+  while (i < page_map_size_) {
+    // Reading the page map without a lock is racy but the race is benign since it should only
+    // result in occasionally not releasing pages which we could release.
     byte pm = page_map_[i];
     switch (pm) {
       case kPageMapEmpty: {
-        // The start of a free page run. Release pages.
-        FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
-        DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
-        size_t fpr_size = fpr->ByteSize(this);
-        DCHECK(IsAligned<kPageSize>(fpr_size));
-        byte* start = reinterpret_cast<byte*>(fpr);
-        if (kIsDebugBuild) {
-          // In the debug build, the first page of a free page run
-          // contains a magic number for debugging. Exclude it.
-          start = reinterpret_cast<byte*>(fpr) + kPageSize;
+        // Only lock if we have an empty page since we want to prevent other threads racing in.
+        MutexLock mu(self, lock_);
+        // Check that it's still empty after we acquired the lock since another thread could have
+        // raced in and placed an allocation here.
+        pm = page_map_[i];
+        if (LIKELY(pm == kPageMapEmpty)) {
+          // The start of a free page run. Release pages.
+          FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
+          DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
+          size_t fpr_size = fpr->ByteSize(this);
+          DCHECK(IsAligned<kPageSize>(fpr_size));
+          byte* start = reinterpret_cast<byte*>(fpr);
+          reclaimed_bytes += ReleasePageRange(start, start + fpr_size);
+          i += fpr_size / kPageSize;
+          DCHECK_LE(i, page_map_size_);
         }
-        byte* end = reinterpret_cast<byte*>(fpr) + fpr_size;
-        if (!kMadviseZeroes) {
-          memset(start, 0, end - start);
-        }
-        CHECK_EQ(madvise(start, end - start, MADV_DONTNEED), 0);
-        reclaimed_bytes += fpr_size;
-        size_t num_pages = fpr_size / kPageSize;
-        if (kIsDebugBuild) {
-          for (size_t j = i + 1; j < i + num_pages; ++j) {
-            DCHECK_EQ(page_map_[j], kPageMapEmpty);
-          }
-        }
-        i += num_pages;
-        DCHECK_LE(i, pm_end);
         break;
       }
       case kPageMapLargeObject:      // Fall through.
       case kPageMapLargeObjectPart:  // Fall through.
       case kPageMapRun:              // Fall through.
       case kPageMapRunPart:          // Fall through.
+      case kPageMapReleased:         // Fall through since it is already released.
         ++i;
         break;  // Skip.
       default:
@@ -2149,6 +2146,35 @@
   return reclaimed_bytes;
 }
 
+size_t RosAlloc::ReleasePageRange(byte* start, byte* end) {
+  DCHECK_ALIGNED(start, kPageSize);
+  DCHECK_ALIGNED(end, kPageSize);
+  DCHECK_LT(start, end);
+  if (kIsDebugBuild) {
+    // In the debug build, the first page of a free page run
+    // contains a magic number for debugging. Exclude it.
+    start += kPageSize;
+  }
+  if (!kMadviseZeroes) {
+    // TODO: Do this when we resurrect the page instead.
+    memset(start, 0, end - start);
+  }
+  CHECK_EQ(madvise(start, end - start, MADV_DONTNEED), 0);
+  size_t pm_idx = ToPageMapIndex(start);
+  size_t reclaimed_bytes = 0;
+  // Calculate reclaimed bytes and upate page map.
+  const size_t max_idx = pm_idx + (end - start) / kPageSize;
+  for (; pm_idx < max_idx; ++pm_idx) {
+    DCHECK(IsFreePage(pm_idx));
+    if (page_map_[pm_idx] == kPageMapEmpty) {
+      // Mark the page as released and update how many bytes we released.
+      reclaimed_bytes += kPageSize;
+      page_map_[pm_idx] = kPageMapReleased;
+    }
+  }
+  return reclaimed_bytes;
+}
+
 }  // namespace allocator
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 13f61ec..fad0dc8 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -99,27 +99,8 @@
       byte* start = reinterpret_cast<byte*>(this);
       size_t byte_size = ByteSize(rosalloc);
       DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
-      bool release_pages = ShouldReleasePages(rosalloc);
-      if (kIsDebugBuild) {
-        // Exclude the first page that stores the magic number.
-        DCHECK_GE(byte_size, static_cast<size_t>(kPageSize));
-        start += kPageSize;
-        byte_size -= kPageSize;
-        if (byte_size > 0) {
-          if (release_pages) {
-            if (!kMadviseZeroes) {
-              memset(start, 0, byte_size);
-            }
-            madvise(start, byte_size, MADV_DONTNEED);
-          }
-        }
-      } else {
-        if (release_pages) {
-          if (!kMadviseZeroes) {
-            memset(start, 0, byte_size);
-          }
-          madvise(start, byte_size, MADV_DONTNEED);
-        }
+      if (ShouldReleasePages(rosalloc)) {
+        rosalloc->ReleasePageRange(start, start + byte_size);
       }
     }
   };
@@ -462,14 +443,15 @@
   std::string size_bracket_lock_names[kNumOfSizeBrackets];
   // The types of page map entries.
   enum {
-    kPageMapEmpty           = 0,  // Not allocated.
-    kPageMapRun             = 1,  // The beginning of a run.
-    kPageMapRunPart         = 2,  // The non-beginning part of a run.
-    kPageMapLargeObject     = 3,  // The beginning of a large object.
-    kPageMapLargeObjectPart = 4,  // The non-beginning part of a large object.
+    kPageMapReleased = 0,     // Zero and released back to the OS.
+    kPageMapEmpty,            // Zero but probably dirty.
+    kPageMapRun,              // The beginning of a run.
+    kPageMapRunPart,          // The non-beginning part of a run.
+    kPageMapLargeObject,      // The beginning of a large object.
+    kPageMapLargeObjectPart,  // The non-beginning part of a large object.
   };
   // The table that indicates what pages are currently used for.
-  byte* page_map_;  // No GUARDED_BY(lock_) for kReadPageMapEntryWithoutLockInBulkFree.
+  volatile byte* page_map_;  // No GUARDED_BY(lock_) for kReadPageMapEntryWithoutLockInBulkFree.
   size_t page_map_size_;
   size_t max_page_map_size_;
   std::unique_ptr<MemMap> page_map_mem_map_;
@@ -536,6 +518,9 @@
   // Revoke the current runs which share an index with the thread local runs.
   void RevokeThreadUnsafeCurrentRuns();
 
+  // Release a range of pages.
+  size_t ReleasePageRange(byte* start, byte* end) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
  public:
   RosAlloc(void* base, size_t capacity, size_t max_capacity,
            PageReleaseMode page_release_mode,
@@ -588,6 +573,11 @@
   static Run* GetDedicatedFullRun() {
     return dedicated_full_run_;
   }
+  bool IsFreePage(size_t idx) const {
+    DCHECK_LT(idx, capacity_ / kPageSize);
+    byte pm_type = page_map_[idx];
+    return pm_type == kPageMapReleased || pm_type == kPageMapEmpty;
+  }
 
   // Callbacks for InspectAll that will count the number of bytes
   // allocated and objects allocated, respectively.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 696728b..e9adca0 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -114,7 +114,7 @@
       desired_collector_type_(foreground_collector_type_),
       heap_trim_request_lock_(nullptr),
       last_trim_time_(0),
-      heap_transition_target_time_(0),
+      heap_transition_or_trim_target_time_(0),
       heap_trim_request_pending_(false),
       parallel_gc_threads_(parallel_gc_threads),
       conc_gc_threads_(conc_gc_threads),
@@ -850,10 +850,10 @@
       MutexLock mu(self, *heap_trim_request_lock_);
       desired_collector_type = desired_collector_type_;
       uint64_t current_time = NanoTime();
-      if (current_time >= heap_transition_target_time_) {
+      if (current_time >= heap_transition_or_trim_target_time_) {
         break;
       }
-      wait_time = heap_transition_target_time_ - current_time;
+      wait_time = heap_transition_or_trim_target_time_ - current_time;
     }
     ScopedThreadStateChange tsc(self, kSleeping);
     usleep(wait_time / 1000);  // Usleep takes microseconds.
@@ -871,9 +871,9 @@
     VLOG(heap) << "Deflating " << count << " monitors took "
         << PrettyDuration(NanoTime() - start_time);
     runtime->GetThreadList()->ResumeAll();
-    // Do a heap trim if it is needed.
-    Trim();
   }
+  // Do a heap trim if it is needed.
+  Trim();
 }
 
 void Heap::Trim() {
@@ -904,9 +904,13 @@
   uint64_t managed_reclaimed = 0;
   for (const auto& space : continuous_spaces_) {
     if (space->IsMallocSpace()) {
-      gc::space::MallocSpace* alloc_space = space->AsMallocSpace();
-      total_alloc_space_size += alloc_space->Size();
-      managed_reclaimed += alloc_space->Trim();
+      gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
+      if (malloc_space->IsRosAllocSpace() || !CareAboutPauseTimes()) {
+        // Don't trim dlmalloc spaces if we care about pauses since this can hold the space lock
+        // for a long period of time.
+        managed_reclaimed += malloc_space->Trim();
+      }
+      total_alloc_space_size += malloc_space->Size();
     }
   }
   total_alloc_space_allocated = GetBytesAllocated() - large_object_space_->GetBytesAllocated();
@@ -919,15 +923,18 @@
   // We never move things in the native heap, so we can finish the GC at this point.
   FinishGC(self, collector::kGcTypeNone);
   size_t native_reclaimed = 0;
+  // Only trim the native heap if we don't care about pauses.
+  if (!CareAboutPauseTimes()) {
 #if defined(USE_DLMALLOC)
-  // Trim the native heap.
-  dlmalloc_trim(0);
-  dlmalloc_inspect_all(DlmallocMadviseCallback, &native_reclaimed);
+    // Trim the native heap.
+    dlmalloc_trim(0);
+    dlmalloc_inspect_all(DlmallocMadviseCallback, &native_reclaimed);
 #elif defined(USE_JEMALLOC)
-  // Jemalloc does it's own internal trimming.
+    // Jemalloc does it's own internal trimming.
 #else
-  UNIMPLEMENTED(WARNING) << "Add trimming support";
+    UNIMPLEMENTED(WARNING) << "Add trimming support";
 #endif
+  }
   uint64_t end_ns = NanoTime();
   VLOG(heap) << "Heap trim of managed (duration=" << PrettyDuration(gc_heap_end_ns - start_ns)
       << ", advised=" << PrettySize(managed_reclaimed) << ") and native (duration="
@@ -2693,17 +2700,14 @@
     if (desired_collector_type_ == desired_collector_type) {
       return;
     }
-    heap_transition_target_time_ = std::max(heap_transition_target_time_, NanoTime() + delta_time);
+    heap_transition_or_trim_target_time_ =
+        std::max(heap_transition_or_trim_target_time_, NanoTime() + delta_time);
     desired_collector_type_ = desired_collector_type;
   }
   SignalHeapTrimDaemon(self);
 }
 
 void Heap::RequestHeapTrim() {
-  // Request a heap trim only if we do not currently care about pause times.
-  if (CareAboutPauseTimes()) {
-    return;
-  }
   // GC completed and now we must decide whether to request a heap trim (advising pages back to the
   // kernel) or not. Issuing a request will also cause trimming of the libc heap. As a trim scans
   // a space it will hold its lock and can become a cause of jank.
@@ -2733,6 +2737,10 @@
       return;
     }
     heap_trim_request_pending_ = true;
+    uint64_t current_time = NanoTime();
+    if (heap_transition_or_trim_target_time_ < current_time) {
+      heap_transition_or_trim_target_time_ = current_time + kHeapTrimWait;
+    }
   }
   // Notify the daemon thread which will actually do the heap trim.
   SignalHeapTrimDaemon(self);
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 511e9f8..c9ea03e 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -769,8 +769,8 @@
   Mutex* heap_trim_request_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   // When we want to perform the next heap trim (nano seconds).
   uint64_t last_trim_time_ GUARDED_BY(heap_trim_request_lock_);
-  // When we want to perform the next heap transition (nano seconds).
-  uint64_t heap_transition_target_time_ GUARDED_BY(heap_trim_request_lock_);
+  // When we want to perform the next heap transition (nano seconds) or heap trim.
+  uint64_t heap_transition_or_trim_target_time_ GUARDED_BY(heap_trim_request_lock_);
   // If we have a heap trim request pending.
   bool heap_trim_request_pending_ GUARDED_BY(heap_trim_request_lock_);