Bytes_moved accounting fix and accounting cleanup

Bytes_moved should be incremented by the correct number of bytes needed
for the space the object is allocated in, not the number of bytes it
would take in region space.

Various minor cleanups for code that I found hard to read while
attempting to track this down.

Remove a CHECK that held only because of the incorrect accounting.
It now results in TreeHugger test failures.

Bug: 79921586
Test: Build and boot AOSP.
Change-Id: Iab75d271eb5b9812a127e708cf6b567d0c4c16f1
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 8786365..f1572cd 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -1000,7 +1000,7 @@
 // If true, read the page map entries in BulkFree() without using the
 // lock for better performance, assuming that the existence of an
 // allocated chunk/pointer being freed in BulkFree() guarantees that
-// the page map entry won't change. Disabled for now.
+// the page map entry won't change.
 static constexpr bool kReadPageMapEntryWithoutLockInBulkFree = true;
 
 size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) {
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 30da1ae..81bc445 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -2393,11 +2393,9 @@
     CheckEmptyMarkStack();
     TimingLogger::ScopedTiming split("Sweep", GetTimings());
     for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-      if (space->IsContinuousMemMapAllocSpace()) {
+      if (space->IsContinuousMemMapAllocSpace() && space != region_space_
+          && !immune_spaces_.ContainsSpace(space)) {
         space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
-        if (space == region_space_ || immune_spaces_.ContainsSpace(space)) {
-          continue;
-        }
         TimingLogger::ScopedTiming split2(
             alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", GetTimings());
         RecordFree(alloc_space->Sweep(swap_bitmaps));
@@ -2643,7 +2641,8 @@
       CHECK_EQ(from_space_num_bytes_at_first_pause_, from_bytes + unevac_from_bytes);
     }
     CHECK_LE(to_objects, from_objects);
-    CHECK_LE(to_bytes, from_bytes);
+    // to_bytes <= from_bytes is only approximately true, because objects expand a little when
+    // copying to non-moving space in near-OOM situations.
     if (from_bytes > 0) {
       copied_live_bytes_ratio_sum_ += static_cast<float>(to_bytes) / from_bytes;
       gc_count_++;
@@ -2660,8 +2659,10 @@
       CHECK_GE(cleared_bytes, from_bytes);
       CHECK_GE(cleared_objects, from_objects);
     }
-    int64_t freed_bytes = cleared_bytes - to_bytes;
-    int64_t freed_objects = cleared_objects - to_objects;
+    // freed_bytes could conceivably be negative if we fall back to nonmoving space and have to
+    // pad to a larger size.
+    int64_t freed_bytes = (int64_t)cleared_bytes - (int64_t)to_bytes;
+    uint64_t freed_objects = cleared_objects - to_objects;
     if (kVerboseMode) {
       LOG(INFO) << "RecordFree:"
                 << " from_bytes=" << from_bytes << " from_objects=" << from_objects
@@ -3451,10 +3452,10 @@
       DCHECK(thread_running_gc_ != nullptr);
       if (LIKELY(self == thread_running_gc_)) {
         objects_moved_gc_thread_ += 1;
-        bytes_moved_gc_thread_ += region_space_alloc_size;
+        bytes_moved_gc_thread_ += bytes_allocated;
       } else {
         objects_moved_.fetch_add(1, std::memory_order_relaxed);
-        bytes_moved_.fetch_add(region_space_alloc_size, std::memory_order_relaxed);
+        bytes_moved_.fetch_add(bytes_allocated, std::memory_order_relaxed);
       }
 
       if (LIKELY(!fall_back_to_non_moving)) {
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 44ee7c2..2e5752b 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -369,8 +369,8 @@
   // A cache of Heap::GetMarkBitmap().
   accounting::HeapBitmap* heap_mark_bitmap_;
   size_t live_stack_freeze_size_;
-  size_t from_space_num_objects_at_first_pause_;
-  size_t from_space_num_bytes_at_first_pause_;
+  size_t from_space_num_objects_at_first_pause_;  // Computed if kEnableFromSpaceAccountingCheck
+  size_t from_space_num_bytes_at_first_pause_;  // Computed if kEnableFromSpaceAccountingCheck
   Atomic<int> is_mark_stack_push_disallowed_;
   enum MarkStackMode {
     kMarkStackModeOff = 0,      // Mark stack is off.
@@ -384,9 +384,9 @@
   Atomic<MarkStackMode> mark_stack_mode_;
   bool weak_ref_access_enabled_ GUARDED_BY(Locks::thread_list_lock_);
 
-  // How many objects and bytes we moved. Used for accounting.
-  // GC thread moves many more objects than mutators.
-  // Therefore, we separate the two to avoid CAS.
+  // How many objects and bytes we moved. The GC thread moves many more objects
+  // than mutators.  Therefore, we separate the two to avoid CAS.  Bytes_moved_ and
+  // bytes_moved_gc_thread_ are critical for GC triggering; the others are just informative.
   Atomic<size_t> bytes_moved_;  // Used by mutators
   Atomic<size_t> objects_moved_;  // Used by mutators
   size_t bytes_moved_gc_thread_;  // Used by GC
@@ -417,7 +417,11 @@
 
   // The skipped blocks are memory blocks/chucks that were copies of
   // objects that were unused due to lost races (cas failures) at
-  // object copy/forward pointer install. They are reused.
+  // object copy/forward pointer install. They may be reused.
+  // Skipped blocks are always in region space. Their size is included directly
+  // in num_bytes_allocated_, i.e. they are treated as allocated, but may be directly
+  // used without going through a GC cycle like other objects. They are reused only
+  // if we run out of region space. TODO: Revisit this design.
   Mutex skipped_blocks_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::multimap<size_t, uint8_t*> skipped_blocks_map_ GUARDED_BY(skipped_blocks_lock_);
   Atomic<size_t> to_space_bytes_skipped_;
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 532b3ef..59d7953 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1613,8 +1613,8 @@
 
 void Heap::RecordFree(uint64_t freed_objects, int64_t freed_bytes) {
   // Use signed comparison since freed bytes can be negative when background compaction foreground
-  // transitions occurs. This is caused by the moving objects from a bump pointer space to a
-  // free list backed space typically increasing memory footprint due to padding and binning.
+  // transitions occurs. This is typically due to objects moving from a bump pointer space to a
+  // free list backed space, which may increase memory footprint due to padding and binning.
   RACING_DCHECK_LE(freed_bytes,
                    static_cast<int64_t>(num_bytes_allocated_.load(std::memory_order_relaxed)));
   // Note: This relies on 2s complement for handling negative freed_bytes.
diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc
index 5179702..a8b0d55 100644
--- a/runtime/gc/space/region_space.cc
+++ b/runtime/gc/space/region_space.cc
@@ -466,6 +466,7 @@
       CheckLiveBytesAgainstRegionBitmap(r);
     }
     if (r->IsInFromSpace()) {
+      DCHECK(!r->IsTlab());
       *cleared_bytes += r->BytesAllocated();
       *cleared_objects += r->ObjectsAllocated();
       --num_non_free_regions_;
diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h
index 0bbc76a..26af633 100644
--- a/runtime/gc/space/region_space.h
+++ b/runtime/gc/space/region_space.h
@@ -360,7 +360,9 @@
     }
   }
 
+  // Increment object allocation count for region containing ref.
   void RecordAlloc(mirror::Object* ref) REQUIRES(!region_lock_);
+
   bool AllocNewTlab(Thread* self, size_t min_bytes) REQUIRES(!region_lock_);
 
   uint32_t Time() {
@@ -482,6 +484,10 @@
       return is_newly_allocated_;
     }
 
+    bool IsTlab() const {
+      return is_a_tlab_;
+    }
+
     bool IsInFromSpace() const {
       return type_ == RegionType::kRegionTypeFromSpace;
     }
@@ -552,6 +558,7 @@
       return live_bytes_;
     }
 
+    // Returns the number of allocated bytes.  "Bulk allocated" bytes in active TLABs are excluded.
     size_t BytesAllocated() const;
 
     size_t ObjectsAllocated() const;
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index 05ff55b..6a4095c 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -204,7 +204,7 @@
   // Alloc can be called from multiple threads at the same time and must be thread-safe.
   //
   // bytes_tl_bulk_allocated - bytes allocated in bulk ahead of time for a thread local allocation,
-  // if applicable. It can be
+  // if applicable. It is
   // 1) equal to bytes_allocated if it's not a thread local allocation,
   // 2) greater than bytes_allocated if it's a thread local
   //    allocation that required a new buffer, or
@@ -228,7 +228,7 @@
   // Returns how many bytes were freed.
   virtual size_t Free(Thread* self, mirror::Object* ptr) = 0;
 
-  // Returns how many bytes were freed.
+  // Free (deallocate) all objects in a list, and return the number of bytes freed.
   virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) = 0;
 
   // Revoke any sort of thread-local buffers that are used to speed up allocations for the given