ART: add post gc weighted allocated bytes metrics

Test: Run art with -XX:DumpGCPerformanceOnShutdown on some benchmarks.
Bug: 112187497
Change-Id: I2fc30c7dbb2d12ab8c3f98ad1f96672af6c5d438
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 46ff7dc..2ef3d92 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -90,13 +90,14 @@
   Thread* self = Thread::Current();
   uint64_t start_time = NanoTime();
   uint64_t thread_cpu_start_time = ThreadCpuNanoTime();
-  GetHeap()->CalculateWeightedAllocatedBytes();
+  GetHeap()->CalculatePreGcWeightedAllocatedBytes();
   Iteration* current_iteration = GetCurrentIteration();
   current_iteration->Reset(gc_cause, clear_soft_references);
   // Note transaction mode is single-threaded and there's no asynchronous GC and this flag doesn't
   // change in the middle of a GC.
   is_transaction_active_ = Runtime::Current()->IsActiveTransaction();
   RunPhases();  // Run all the GC phases.
+  GetHeap()->CalculatePostGcWeightedAllocatedBytes();
   // Add the current timings to the cumulative timings.
   cumulative_timings_.AddLogger(*GetTimings());
   // Update cumulative statistics with how many bytes the GC iteration freed.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index bfb1019..dc79731 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -214,8 +214,10 @@
       long_pause_log_threshold_(long_pause_log_threshold),
       long_gc_log_threshold_(long_gc_log_threshold),
       process_cpu_start_time_ns_(ProcessCpuNanoTime()),
-      last_process_cpu_time_ns_(process_cpu_start_time_ns_),
-      weighted_allocated_bytes_(0.0),
+      pre_gc_last_process_cpu_time_ns_(process_cpu_start_time_ns_),
+      post_gc_last_process_cpu_time_ns_(process_cpu_start_time_ns_),
+      pre_gc_weighted_allocated_bytes_(0.0),
+      post_gc_weighted_allocated_bytes_(0.0),
       ignore_max_footprint_(ignore_max_footprint),
       zygote_creation_lock_("zygote creation lock", kZygoteCreationLock),
       zygote_space_(nullptr),
@@ -1070,12 +1072,25 @@
   }
 }
 
-void Heap::CalculateWeightedAllocatedBytes() {
-  uint64_t current_process_cpu_time = ProcessCpuNanoTime();
+double Heap::CalculateGcWeightedAllocatedBytes(uint64_t gc_last_process_cpu_time_ns,
+                                               uint64_t current_process_cpu_time) const {
   uint64_t bytes_allocated = GetBytesAllocated();
-  double weight = current_process_cpu_time - last_process_cpu_time_ns_;
-  weighted_allocated_bytes_ += weight * bytes_allocated;
-  last_process_cpu_time_ns_ = current_process_cpu_time;
+  double weight = current_process_cpu_time - gc_last_process_cpu_time_ns;
+  return weight * bytes_allocated;
+}
+
+void Heap::CalculatePreGcWeightedAllocatedBytes() {
+  uint64_t current_process_cpu_time = ProcessCpuNanoTime();
+  pre_gc_weighted_allocated_bytes_ +=
+    CalculateGcWeightedAllocatedBytes(pre_gc_last_process_cpu_time_ns_, current_process_cpu_time);
+  pre_gc_last_process_cpu_time_ns_ = current_process_cpu_time;
+}
+
+void Heap::CalculatePostGcWeightedAllocatedBytes() {
+  uint64_t current_process_cpu_time = ProcessCpuNanoTime();
+  post_gc_weighted_allocated_bytes_ +=
+    CalculateGcWeightedAllocatedBytes(post_gc_last_process_cpu_time_ns_, current_process_cpu_time);
+  post_gc_last_process_cpu_time_ns_ = current_process_cpu_time;
 }
 
 uint64_t Heap::GetTotalGcCpuTime() {
@@ -1157,8 +1172,12 @@
   }
 
   process_cpu_start_time_ns_ = ProcessCpuNanoTime();
-  last_process_cpu_time_ns_ = process_cpu_start_time_ns_;
-  weighted_allocated_bytes_ = 0u;
+
+  pre_gc_last_process_cpu_time_ns_ = process_cpu_start_time_ns_;
+  pre_gc_weighted_allocated_bytes_ = 0u;
+
+  post_gc_last_process_cpu_time_ns_ = process_cpu_start_time_ns_;
+  post_gc_weighted_allocated_bytes_ = 0u;
 
   total_bytes_freed_ever_ = 0;
   total_objects_freed_ever_ = 0;
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 57c7376..504eff2 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -397,11 +397,16 @@
     REQUIRES(!Locks::heap_bitmap_lock_)
     REQUIRES(Locks::mutator_lock_);
 
-  double GetWeightedAllocatedBytes() const {
-    return weighted_allocated_bytes_;
+  double GetPreGcWeightedAllocatedBytes() const {
+    return pre_gc_weighted_allocated_bytes_;
   }
 
-  void CalculateWeightedAllocatedBytes();
+  double GetPostGcWeightedAllocatedBytes() const {
+    return post_gc_weighted_allocated_bytes_;
+  }
+
+  void CalculatePreGcWeightedAllocatedBytes();
+  void CalculatePostGcWeightedAllocatedBytes();
   uint64_t GetTotalGcCpuTime();
 
   uint64_t GetProcessCpuStartTime() const {
@@ -858,6 +863,9 @@
       REQUIRES(!*gc_complete_lock_);
   void FinishGC(Thread* self, collector::GcType gc_type) REQUIRES(!*gc_complete_lock_);
 
+  double CalculateGcWeightedAllocatedBytes(uint64_t gc_last_process_cpu_time_ns,
+                                           uint64_t current_process_cpu_time) const;
+
   // Create a mem map with a preferred base address.
   static MemMap MapAnonymousPreferredAddress(const char* name,
                                              uint8_t* request_begin,
@@ -1175,11 +1183,14 @@
   // Starting time of the new process; meant to be used for measuring total process CPU time.
   uint64_t process_cpu_start_time_ns_;
 
-  // Last time GC started; meant to be used to measure the duration between two GCs.
-  uint64_t last_process_cpu_time_ns_;
+  // Last time (before and after) GC started; meant to be used to measure the
+  // duration between two GCs.
+  uint64_t pre_gc_last_process_cpu_time_ns_;
+  uint64_t post_gc_last_process_cpu_time_ns_;
 
-  // allocated_bytes * (current_process_cpu_time - last_process_cpu_time)
-  double weighted_allocated_bytes_;
+  // allocated_bytes * (current_process_cpu_time - [pre|post]_gc_last_process_cpu_time)
+  double pre_gc_weighted_allocated_bytes_;
+  double post_gc_weighted_allocated_bytes_;
 
   // If we ignore the max footprint it lets the heap grow until it hits the heap capacity, this is
   // useful for benchmarking since it reduces time spent in GC to a low %.
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 69ef2fb..d2c915e 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -320,7 +320,8 @@
   }
 
   if (dump_gc_performance_on_shutdown_) {
-    heap_->CalculateWeightedAllocatedBytes();
+    heap_->CalculatePreGcWeightedAllocatedBytes();
+    heap_->CalculatePostGcWeightedAllocatedBytes();
     uint64_t process_cpu_end_time = ProcessCpuNanoTime();
     ScopedLogSeverity sls(LogSeverity::INFO);
     // This can't be called from the Heap destructor below because it
@@ -335,9 +336,15 @@
         << " out of process CPU time " << PrettyDuration(process_cpu_time)
         << " (" << ratio << ")"
         << "\n";
-    double weighted_allocated_bytes = heap_->GetWeightedAllocatedBytes() / process_cpu_time;
-    LOG_STREAM(INFO) << "Weighted bytes allocated over CPU time: "
-        << " (" <<  PrettySize(weighted_allocated_bytes)  << ")"
+    double pre_gc_weighted_allocated_bytes =
+        heap_->GetPreGcWeightedAllocatedBytes() / process_cpu_time;
+    double post_gc_weighted_allocated_bytes =
+        heap_->GetPostGcWeightedAllocatedBytes() / process_cpu_time;
+
+    LOG_STREAM(INFO) << "Pre GC weighted bytes allocated over CPU time: "
+        << " (" <<  PrettySize(pre_gc_weighted_allocated_bytes)  << ")";
+    LOG_STREAM(INFO) << "Post GC weighted bytes allocated over CPU time: "
+        << " (" <<  PrettySize(post_gc_weighted_allocated_bytes)  << ")"
         << "\n";
   }