ART: add GC live ratio metrics to GC performance

Record average GC live ratio (copied bytes over allocated bytes in
form-space), and GC count for both minor and major ConcurrentCopy GC,
and print both if DumpGCPerformanceOnShutdown is set.

Test: Run art with -XX:DumpGCPerformanceOnShutdown on some benchmarks.
Bug: 112187497
Change-Id: I587f92d847b899c81bf7a6d7303f7b54527c6f33
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 26a8d13..aba1c5a 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -93,6 +93,8 @@
       from_space_num_bytes_at_first_pause_(0),
       mark_stack_mode_(kMarkStackModeOff),
       weak_ref_access_enabled_(true),
+      copied_live_bytes_ratio_sum_(0.f),
+      gc_count_(0),
       young_gen_(young_gen),
       skipped_blocks_lock_("concurrent copying bytes blocks lock", kMarkSweepMarkStackLock),
       measure_read_barrier_slow_path_(measure_read_barrier_slow_path),
@@ -2038,6 +2040,11 @@
     }
     CHECK_LE(to_objects, from_objects);
     CHECK_LE(to_bytes, from_bytes);
+    if (from_bytes > 0) {
+      copied_live_bytes_ratio_sum_ += static_cast<float>(to_bytes) / from_bytes;
+      gc_count_++;
+    }
+
     // Cleared bytes and objects, populated by the call to RegionSpace::ClearFromSpace below.
     uint64_t cleared_bytes;
     uint64_t cleared_objects;
@@ -3204,6 +3211,15 @@
   if (rb_slow_path_count_gc_total_ > 0) {
     os << "GC slow path count " << rb_slow_path_count_gc_total_ << "\n";
   }
+  float average_ratio = copied_live_bytes_ratio_sum_ / gc_count_;
+
+  if (young_gen_) {
+    os << "Average minor GC copied live bytes ratio "
+       << average_ratio << " over " << gc_count_ << " minor GCs\n";
+  } else {
+    os << "Average major GC copied live bytes ratio "
+       << average_ratio << " over " << gc_count_ << " major GCs\n";
+  }
   os << "Cumulative bytes moved "
      << cumulative_bytes_moved_.load(std::memory_order_relaxed) << "\n";
   os << "Cumulative objects moved "
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 1a7464a..cd086c4 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -359,6 +359,18 @@
   Atomic<uint64_t> cumulative_bytes_moved_;
   Atomic<uint64_t> cumulative_objects_moved_;
 
+  // copied_live_bytes_ratio_sum_ and gc_count_ are read and written by CC per
+  // GC, in ReclaimPhase, and are read by DumpPerformanceInfo (potentially from
+  // another thread). However, at present, DumpPerformanceInfo is only called
+  // when the runtime shuts down, so no concurrent access.
+  // The sum of of all copied live bytes ratio (to_bytes/from_bytes)
+  float copied_live_bytes_ratio_sum_;
+  // The number of GC counts, used to calculate the average above. (It doesn't
+  // include GC where from_bytes is zero, IOW, from-space is empty, which is
+  // possible for minor GC if all allocated objects are in non-moving
+  // space.)
+  size_t gc_count_;
+
   // Generational "sticky", only trace through dirty objects in region space.
   const bool young_gen_;
   // If true, the GC thread is done scanning marked objects on dirty and aged