Restore DDMS recent allocation tracking's behavior

Make the allocation tracker mark recently allocated objects as roots,
so the allocation records are not swept. Because DDMS needs recent
allocation tracking records even for dead objects. This should fix
the flaky failures for 098-ddmc test, but it cannot pass GC stress
test (OOM). Re-enabled 098-ddmc for other tests.

There should be an option to not mark them as roots, when user only
needs HPROF dump with traces but not DDMS's recent allocation tracking.
Probably need to add a new JNI API function for this option.

There could be another way to keep a second list of recent allocation
records and maintain a type cache for them, so not to make the objects
roots. But it's more complex, and not sure which is better.

Also reduce memory usage for AllocRecordStackTrace objects, and change
default stack depth to 16. Rename the property that controls the stack
depth to "debug.allocTracker.maxStackDepth" so developer can change it.

Bug:20037135
Change-Id: Ic6b9ae87bdcda558be6f14ded8057e763439881c
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 5918c10..b465985 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -54,10 +54,6 @@
 #include "verifier/method_verifier-inl.h"
 #include "well_known_classes.h"
 
-#ifdef HAVE_ANDROID_OS
-#include "cutils/properties.h"
-#endif
-
 namespace art {
 
 // The key identifying the debugger to update instrumentation.
@@ -65,22 +61,7 @@
 
 // Limit alloc_record_count to the 2BE value (64k-1) that is the limit of the current protocol.
 static uint16_t CappedAllocRecordCount(size_t alloc_record_count) {
-  size_t cap = 0xffff;
-#ifdef HAVE_ANDROID_OS
-  // Check whether there's a system property overriding the number of recent records.
-  const char* propertyName = "dalvik.vm.recentAllocMax";
-  char recentAllocMaxString[PROPERTY_VALUE_MAX];
-  if (property_get(propertyName, recentAllocMaxString, "") > 0) {
-    char* end;
-    size_t value = strtoul(recentAllocMaxString, &end, 10);
-    if (*end != '\0') {
-      LOG(ERROR) << "Ignoring  " << propertyName << " '" << recentAllocMaxString
-                 << "' --- invalid";
-    } else {
-      cap = value;
-    }
-  }
-#endif
+  const size_t cap = 0xffff;
   if (alloc_record_count > cap) {
     return cap;
   }
@@ -4725,7 +4706,7 @@
   gc::AllocRecordObjectMap* records = Runtime::Current()->GetHeap()->GetAllocationRecords();
   CHECK(records != nullptr);
 
-  const uint16_t capped_count = CappedAllocRecordCount(records->Size());
+  const uint16_t capped_count = CappedAllocRecordCount(records->GetRecentAllocationSize());
   uint16_t count = capped_count;
 
   LOG(INFO) << "Tracked allocations, (count=" << count << ")";
@@ -4863,7 +4844,7 @@
     StringTable method_names;
     StringTable filenames;
 
-    const uint16_t capped_count = CappedAllocRecordCount(records->Size());
+    const uint16_t capped_count = CappedAllocRecordCount(records->GetRecentAllocationSize());
     uint16_t count = capped_count;
     for (auto it = records->RBegin(), end = records->REnd();
          count > 0 && it != end; count--, it++) {
diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc
index a385363..ac7de63 100644
--- a/runtime/gc/allocation_record.cc
+++ b/runtime/gc/allocation_record.cc
@@ -45,10 +45,29 @@
                  << "' --- invalid";
     } else {
       alloc_record_max_ = value;
+      if (recent_record_max_ > value) {
+        recent_record_max_ = value;
+      }
+    }
+  }
+  // Check whether there's a system property overriding the number of recent records.
+  propertyName = "dalvik.vm.recentAllocMax";
+  char recentAllocMaxString[PROPERTY_VALUE_MAX];
+  if (property_get(propertyName, recentAllocMaxString, "") > 0) {
+    char* end;
+    size_t value = strtoul(recentAllocMaxString, &end, 10);
+    if (*end != '\0') {
+      LOG(ERROR) << "Ignoring  " << propertyName << " '" << recentAllocMaxString
+                 << "' --- invalid";
+    } else if (value > alloc_record_max_) {
+      LOG(ERROR) << "Ignoring  " << propertyName << " '" << recentAllocMaxString
+                 << "' --- should be less than " << alloc_record_max_;
+    } else {
+      recent_record_max_ = value;
     }
   }
   // Check whether there's a system property overriding the max depth of stack trace.
-  propertyName = "dalvik.vm.allocStackDepth";
+  propertyName = "debug.allocTracker.stackDepth";
   char stackDepthString[PROPERTY_VALUE_MAX];
   if (property_get(propertyName, stackDepthString, "") > 0) {
     char* end;
@@ -56,6 +75,10 @@
     if (*end != '\0') {
       LOG(ERROR) << "Ignoring  " << propertyName << " '" << stackDepthString
                  << "' --- invalid";
+    } else if (value > kMaxSupportedStackDepth) {
+      LOG(WARNING) << propertyName << " '" << stackDepthString << "' too large, using "
+                   << kMaxSupportedStackDepth;
+      max_stack_depth_ = kMaxSupportedStackDepth;
     } else {
       max_stack_depth_ = value;
     }
@@ -67,6 +90,20 @@
   STLDeleteValues(&entries_);
 }
 
+void AllocRecordObjectMap::VisitRoots(RootVisitor* visitor) {
+  CHECK_LE(recent_record_max_, alloc_record_max_);
+  BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(visitor, RootInfo(kRootDebugger));
+  size_t count = recent_record_max_;
+  // Only visit the last recent_record_max_ number of objects in entries_.
+  // They need to be retained for DDMS's recent allocation tracking.
+  // TODO: This will cause 098-ddmc test to run out of memory for GC stress test.
+  // There should be an option that do not keep these objects live if allocation tracking is only
+  // for the purpose of an HPROF dump. b/20037135
+  for (auto it = entries_.rbegin(), end = entries_.rend(); count > 0 && it != end; count--, ++it) {
+    buffered_visitor.VisitRoot(it->first);
+  }
+}
+
 void AllocRecordObjectMap::SweepAllocationRecords(IsMarkedCallback* callback, void* arg) {
   VLOG(heap) << "Start SweepAllocationRecords()";
   size_t count_deleted = 0, count_moved = 0;
@@ -139,6 +176,7 @@
       if (self_name == "JDWP") {
         records->alloc_ddm_thread_id_ = self->GetTid();
       }
+      records->scratch_trace_.SetDepth(records->max_stack_depth_);
       size_t sz = sizeof(AllocRecordStackTraceElement) * records->max_stack_depth_ +
                   sizeof(AllocRecord) + sizeof(AllocRecordStackTrace);
       LOG(INFO) << "Enabling alloc tracker (" << records->alloc_record_max_ << " entries of "
@@ -181,19 +219,14 @@
 
   DCHECK_LE(records->Size(), records->alloc_record_max_);
 
-  // Remove oldest record.
-  if (records->Size() == records->alloc_record_max_) {
-    records->RemoveOldest();
-  }
-
   // Get stack trace.
-  const size_t max_depth = records->max_stack_depth_;
-  AllocRecordStackTrace* trace = new AllocRecordStackTrace(self->GetTid(), max_depth);
-  // add scope to make "visitor" destroyed promptly, in order to set the trace->depth_
+  // add scope to make "visitor" destroyed promptly, in order to set the scratch_trace_->depth_
   {
-    AllocRecordStackVisitor visitor(self, trace, max_depth);
+    AllocRecordStackVisitor visitor(self, &records->scratch_trace_, records->max_stack_depth_);
     visitor.WalkStack();
   }
+  records->scratch_trace_.SetTid(self->GetTid());
+  AllocRecordStackTrace* trace = new AllocRecordStackTrace(records->scratch_trace_);
 
   // Fill in the basics.
   AllocRecord* record = new AllocRecord(byte_count, trace);
diff --git a/runtime/gc/allocation_record.h b/runtime/gc/allocation_record.h
index 45b3406..5214b6b 100644
--- a/runtime/gc/allocation_record.h
+++ b/runtime/gc/allocation_record.h
@@ -71,8 +71,15 @@
  public:
   static constexpr size_t kHashMultiplier = 17;
 
-  AllocRecordStackTrace(pid_t tid, size_t max_depth)
-      : tid_(tid), depth_(0), stack_(new AllocRecordStackTraceElement[max_depth]) {}
+  explicit AllocRecordStackTrace(size_t max_depth)
+      : tid_(0), depth_(0), stack_(new AllocRecordStackTraceElement[max_depth]) {}
+
+  AllocRecordStackTrace(const AllocRecordStackTrace& r)
+      : tid_(r.tid_), depth_(r.depth_), stack_(new AllocRecordStackTraceElement[r.depth_]) {
+    for (size_t i = 0; i < depth_; ++i) {
+      stack_[i] = r.stack_[i];
+    }
+  }
 
   ~AllocRecordStackTrace() {
     delete[] stack_;
@@ -82,6 +89,10 @@
     return tid_;
   }
 
+  void SetTid(pid_t t) {
+    tid_ = t;
+  }
+
   size_t GetDepth() const {
     return depth_;
   }
@@ -102,6 +113,7 @@
 
   bool operator==(const AllocRecordStackTrace& other) const {
     if (this == &other) return true;
+    if (tid_ != other.tid_) return false;
     if (depth_ != other.depth_) return false;
     for (size_t i = 0; i < depth_; ++i) {
       if (!(stack_[i] == other.stack_[i])) return false;
@@ -110,7 +122,7 @@
   }
 
  private:
-  const pid_t tid_;
+  pid_t tid_;
   size_t depth_;
   AllocRecordStackTraceElement* const stack_;
 };
@@ -200,7 +212,9 @@
 
   AllocRecordObjectMap() EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_)
       : alloc_record_max_(kDefaultNumAllocRecords),
+        recent_record_max_(kDefaultNumRecentRecords),
         max_stack_depth_(kDefaultAllocStackDepth),
+        scratch_trace_(kMaxSupportedStackDepth),
         alloc_ddm_thread_id_(0) {}
 
   ~AllocRecordObjectMap();
@@ -208,6 +222,10 @@
   void Put(mirror::Object* obj, AllocRecord* record)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) {
+    if (entries_.size() == alloc_record_max_) {
+      delete entries_.front().second;
+      entries_.pop_front();
+    }
     entries_.emplace_back(GcRoot<mirror::Object>(obj), record);
   }
 
@@ -215,17 +233,19 @@
     return entries_.size();
   }
 
-  void SweepAllocationRecords(IsMarkedCallback* callback, void* arg)
+  size_t GetRecentAllocationSize() const SHARED_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) {
+    CHECK_LE(recent_record_max_, alloc_record_max_);
+    size_t sz = entries_.size();
+    return std::min(recent_record_max_, sz);
+  }
+
+  void VisitRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_);
 
-  void RemoveOldest()
+  void SweepAllocationRecords(IsMarkedCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) {
-    DCHECK(!entries_.empty());
-    delete entries_.front().second;
-    entries_.pop_front();
-  }
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_);
 
   // TODO: Is there a better way to hide the entries_'s type?
   EntryList::iterator Begin()
@@ -254,12 +274,13 @@
 
  private:
   static constexpr size_t kDefaultNumAllocRecords = 512 * 1024;
-  static constexpr size_t kDefaultAllocStackDepth = 4;
+  static constexpr size_t kDefaultNumRecentRecords = 64 * 1024 - 1;
+  static constexpr size_t kDefaultAllocStackDepth = 16;
+  static constexpr size_t kMaxSupportedStackDepth = 128;
   size_t alloc_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_);
-  // The implementation always allocates max_stack_depth_ number of frames for each stack trace.
-  // As long as the max depth is not very large, this is not a waste of memory since most stack
-  // traces will fill up the max depth number of the frames.
+  size_t recent_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_);
   size_t max_stack_depth_ GUARDED_BY(Locks::alloc_tracker_lock_);
+  AllocRecordStackTrace scratch_trace_ GUARDED_BY(Locks::alloc_tracker_lock_);
   pid_t alloc_ddm_thread_id_ GUARDED_BY(Locks::alloc_tracker_lock_);
   EntryList entries_ GUARDED_BY(Locks::alloc_tracker_lock_);
 
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 3c020e2..fdac279 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -3681,6 +3681,15 @@
   allocation_records_.reset(records);
 }
 
+void Heap::VisitAllocationRecords(RootVisitor* visitor) const {
+  if (IsAllocTrackingEnabled()) {
+    MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_);
+    if (IsAllocTrackingEnabled()) {
+      GetAllocationRecords()->VisitRoots(visitor);
+    }
+  }
+}
+
 void Heap::SweepAllocationRecords(IsMarkedCallback* visitor, void* arg) const {
   if (IsAllocTrackingEnabled()) {
     MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_);
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 18244c8..b9afa23 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -703,6 +703,9 @@
   void SetAllocationRecords(AllocRecordObjectMap* records)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_);
 
+  void VisitAllocationRecords(RootVisitor* visitor) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   void SweepAllocationRecords(IsMarkedCallback* visitor, void* arg) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/gc_root.h b/runtime/gc_root.h
index d6146f3..bb604f0 100644
--- a/runtime/gc_root.h
+++ b/runtime/gc_root.h
@@ -46,7 +46,9 @@
   kRootMonitorUsed,
   kRootThreadObject,
   kRootInternedString,
+  kRootFinalizing,  // used for HPROF's conversion to HprofHeapTag
   kRootDebugger,
+  kRootReferenceCleanup,  // used for HPROF's conversion to HprofHeapTag
   kRootVMInternal,
   kRootJNIMonitor,
 };
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 7a78928..77d1425 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1378,6 +1378,7 @@
 void Runtime::VisitConcurrentRoots(RootVisitor* visitor, VisitRootFlags flags) {
   intern_table_->VisitRoots(visitor, flags);
   class_linker_->VisitRoots(visitor, flags);
+  heap_->VisitAllocationRecords(visitor);
   if ((flags & kVisitRootFlagNewRoots) == 0) {
     // Guaranteed to have no new roots in the constant roots.
     VisitConstantRoots(visitor);
diff --git a/test/098-ddmc/src/Main.java b/test/098-ddmc/src/Main.java
index 4914ba2..f41ff2a 100644
--- a/test/098-ddmc/src/Main.java
+++ b/test/098-ddmc/src/Main.java
@@ -43,24 +43,14 @@
 
         System.out.println("Confirm when we overflow, we don't roll over to zero. b/17392248");
         final int overflowAllocations = 64 * 1024;  // Won't fit in unsigned 16-bit value.
-        // TODO: Temporary fix. Keep the new objects live so they are not garbage collected.
-        // This will cause OOM exception for GC stress tests. The root cause is changed behaviour of
-        // getRecentAllocations(). Working on restoring its old behaviour. b/20037135
-        Object[] objects = new Object[overflowAllocations];
         for (int i = 0; i < overflowAllocations; i++) {
-            objects[i] = new Object();
+            new Object();
         }
         Allocations after = new Allocations(DdmVmInternal.getRecentAllocations());
         System.out.println("before < overflowAllocations=" + (before.numberOfEntries < overflowAllocations));
         System.out.println("after > before=" + (after.numberOfEntries > before.numberOfEntries));
         System.out.println("after.numberOfEntries=" + after.numberOfEntries);
 
-        // TODO: Temporary fix as above. b/20037135
-        objects = null;
-        Runtime.getRuntime().gc();
-        final int fillerStrings = 16 * 1024;
-        String[] strings = new String[fillerStrings];
-
         System.out.println("Disable and confirm back to empty");
         DdmVmInternal.enableRecentAllocations(false);
         System.out.println("status=" + DdmVmInternal.getRecentAllocationStatus());
@@ -76,8 +66,8 @@
         System.out.println("Confirm we can reenable twice in a row without losing allocations");
         DdmVmInternal.enableRecentAllocations(true);
         System.out.println("status=" + DdmVmInternal.getRecentAllocationStatus());
-        for (int i = 0; i < fillerStrings; i++) {
-            strings[i] = new String("fnord");
+        for (int i = 0; i < 16 * 1024; i++) {
+            new String("fnord");
         }
         Allocations first = new Allocations(DdmVmInternal.getRecentAllocations());
         DdmVmInternal.enableRecentAllocations(true);
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 60165d9..ac9656b 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -229,10 +229,19 @@
 
 TEST_ART_BROKEN_NO_RELOCATE_TESTS :=
 
-# 098-ddmc is broken until we restore the old behavior of getRecentAllocation() of DDMS. b/20037135
-ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-    $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-    $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), 098-ddmc, $(ALL_ADDRESS_SIZES))
+# Tests that are broken with GC stress.
+# 098-ddmc is broken until the allocation tracker does not mark recently allocated objects as roots.
+# Marking them roots is for consistent behavior with DDMS's getRecentAllocations(). b/20037135
+TEST_ART_BROKEN_GCSTRESS_RUN_TESTS := \
+  098-ddmc
+
+ifneq (,$(filter gcstress,$(GC_TYPES)))
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+      $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),gcstress,$(JNI_TYPES), \
+      $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_GCSTRESS_RUN_TESTS), $(ALL_ADDRESS_SIZES))
+endif
+
+TEST_ART_BROKEN_GCSTRESS_RUN_TESTS :=
 
 # 115-native-bridge setup is complicated. Need to implement it correctly for the target.
 ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES),$(COMPILER_TYPES), \