Make allocation tracker use less memory

The allocation tracker no longer keeps recently allocated objects live.
Instead it just keeps their class objects live as strong roots. This fixed
the gc-stress test failure for 098-ddmc.

Also fixed the issue in DisableNewSystemWeak() for allocation tracker,
by making new allocation to wait until GC's sweeping to complete. I didn't
feel any significant slowdown with this wait.

Bug: 20037135
Change-Id: I6a98188832cf7ee478007e3788e742dc6e18f7b8
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index f04f356..de46b35 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -4715,7 +4715,7 @@
     const gc::AllocRecord* record = it->second;
 
     LOG(INFO) << StringPrintf(" Thread %-2d %6zd bytes ", record->GetTid(), record->ByteCount())
-              << PrettyClass(it->first.Read()->GetClass());
+              << PrettyClass(record->GetClass());
 
     for (size_t stack_frame = 0, depth = record->GetDepth(); stack_frame < depth; ++stack_frame) {
       const gc::AllocRecordStackTraceElement& stack_element = record->StackElement(stack_frame);
@@ -4850,7 +4850,7 @@
          count > 0 && it != end; count--, it++) {
       const gc::AllocRecord* record = it->second;
       std::string temp;
-      class_names.Add(it->first.Read()->GetClass()->GetDescriptor(&temp));
+      class_names.Add(record->GetClass()->GetDescriptor(&temp));
       for (size_t i = 0, depth = record->GetDepth(); i < depth; i++) {
         ArtMethod* m = record->StackElement(i).GetMethod();
         class_names.Add(m->GetDeclaringClassDescriptor());
@@ -4902,7 +4902,7 @@
       const gc::AllocRecord* record = it->second;
       size_t stack_depth = record->GetDepth();
       size_t allocated_object_class_name_index =
-          class_names.IndexOf(it->first.Read()->GetClass()->GetDescriptor(&temp));
+          class_names.IndexOf(record->GetClass()->GetDescriptor(&temp));
       JDWP::Append4BE(bytes, record->ByteCount());
       JDWP::Append2BE(bytes, static_cast<uint16_t>(record->GetTid()));
       JDWP::Append2BE(bytes, allocated_object_class_name_index);
diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc
index ac7de63..c5b9f65 100644
--- a/runtime/gc/allocation_record.cc
+++ b/runtime/gc/allocation_record.cc
@@ -94,33 +94,58 @@
   CHECK_LE(recent_record_max_, alloc_record_max_);
   BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(visitor, RootInfo(kRootDebugger));
   size_t count = recent_record_max_;
-  // Only visit the last recent_record_max_ number of objects in entries_.
-  // They need to be retained for DDMS's recent allocation tracking.
-  // TODO: This will cause 098-ddmc test to run out of memory for GC stress test.
-  // There should be an option that do not keep these objects live if allocation tracking is only
-  // for the purpose of an HPROF dump. b/20037135
+  // Only visit the last recent_record_max_ number of allocation records in entries_ and mark the
+  // klass_ fields as strong roots.
   for (auto it = entries_.rbegin(), end = entries_.rend(); count > 0 && it != end; count--, ++it) {
-    buffered_visitor.VisitRoot(it->first);
+    buffered_visitor.VisitRoot(it->second->GetClassGcRoot());
+  }
+}
+
+static inline void SweepClassObject(AllocRecord* record, IsMarkedCallback* callback, void* arg)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+    EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) {
+  GcRoot<mirror::Class>& klass = record->GetClassGcRoot();
+  // This does not need a read barrier because this is called by GC.
+  mirror::Object* old_object = klass.Read<kWithoutReadBarrier>();
+  mirror::Object* new_object = callback(old_object, arg);
+  if (UNLIKELY(old_object != new_object)) {
+    mirror::Class* new_klass = (UNLIKELY(new_object == nullptr) ? nullptr : new_object->AsClass());
+    klass = GcRoot<mirror::Class>(new_klass);
   }
 }
 
 void AllocRecordObjectMap::SweepAllocationRecords(IsMarkedCallback* callback, void* arg) {
   VLOG(heap) << "Start SweepAllocationRecords()";
-  size_t count_deleted = 0, count_moved = 0;
+  size_t count_deleted = 0, count_moved = 0, count = 0;
+  // Only the first (size - recent_record_max_) number of records can be deleted.
+  size_t delete_bound;
+  if (entries_.size() <= recent_record_max_) {
+    delete_bound = 0;
+  } else {
+    delete_bound = entries_.size() - recent_record_max_;
+  }
   for (auto it = entries_.begin(), end = entries_.end(); it != end;) {
+    ++count;
     // This does not need a read barrier because this is called by GC.
     mirror::Object* old_object = it->first.Read<kWithoutReadBarrier>();
     AllocRecord* record = it->second;
     mirror::Object* new_object = callback(old_object, arg);
     if (new_object == nullptr) {
-      delete record;
-      it = entries_.erase(it);
-      ++count_deleted;
+      if (count > delete_bound) {
+        it->first = GcRoot<mirror::Object>(nullptr);
+        SweepClassObject(record, callback, arg);
+        ++it;
+      } else {
+        delete record;
+        it = entries_.erase(it);
+        ++count_deleted;
+      }
     } else {
       if (old_object != new_object) {
         it->first = GcRoot<mirror::Object>(new_object);
         ++count_moved;
       }
+      SweepClassObject(record, callback, arg);
       ++it;
     }
   }
@@ -128,6 +153,20 @@
   VLOG(heap) << "Updated " << count_moved << " allocation records";
 }
 
+void AllocRecordObjectMap::AllowNewAllocationRecords() {
+  allow_new_record_ = true;
+  new_record_condition_.Broadcast(Thread::Current());
+}
+
+void AllocRecordObjectMap::DisallowNewAllocationRecords() {
+  allow_new_record_ = false;
+}
+
+void AllocRecordObjectMap::EnsureNewAllocationRecordsDisallowed() {
+  CHECK(!allow_new_record_);
+}
+
+
 struct AllocRecordStackVisitor : public StackVisitor {
   AllocRecordStackVisitor(Thread* thread, AllocRecordStackTrace* trace_in, size_t max)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
@@ -201,7 +240,8 @@
   }
 }
 
-void AllocRecordObjectMap::RecordAllocation(Thread* self, mirror::Object* obj, size_t byte_count) {
+void AllocRecordObjectMap::RecordAllocation(Thread* self, mirror::Object* obj, mirror::Class* klass,
+                                            size_t byte_count) {
   MutexLock mu(self, *Locks::alloc_tracker_lock_);
   Heap* heap = Runtime::Current()->GetHeap();
   if (!heap->IsAllocTrackingEnabled()) {
@@ -217,6 +257,11 @@
     return;
   }
 
+  // Wait for GC's sweeping to complete and allow new records
+  while (UNLIKELY(!records->allow_new_record_)) {
+    records->new_record_condition_.WaitHoldingLocks(self);
+  }
+
   DCHECK_LE(records->Size(), records->alloc_record_max_);
 
   // Get stack trace.
@@ -229,7 +274,7 @@
   AllocRecordStackTrace* trace = new AllocRecordStackTrace(records->scratch_trace_);
 
   // Fill in the basics.
-  AllocRecord* record = new AllocRecord(byte_count, trace);
+  AllocRecord* record = new AllocRecord(byte_count, klass, trace);
 
   records->Put(obj, record);
   DCHECK_LE(records->Size(), records->alloc_record_max_);
diff --git a/runtime/gc/allocation_record.h b/runtime/gc/allocation_record.h
index 5214b6b..f567153 100644
--- a/runtime/gc/allocation_record.h
+++ b/runtime/gc/allocation_record.h
@@ -161,8 +161,8 @@
 class AllocRecord {
  public:
   // All instances of AllocRecord should be managed by an instance of AllocRecordObjectMap.
-  AllocRecord(size_t count, AllocRecordStackTrace* trace)
-      : byte_count_(count), trace_(trace) {}
+  AllocRecord(size_t count, mirror::Class* klass, AllocRecordStackTrace* trace)
+      : byte_count_(count), klass_(klass), trace_(trace) {}
 
   ~AllocRecord() {
     delete trace_;
@@ -184,12 +184,22 @@
     return trace_->GetTid();
   }
 
+  mirror::Class* GetClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return klass_.Read();
+  }
+
+  GcRoot<mirror::Class>& GetClassGcRoot() {
+    return klass_;
+  }
+
   const AllocRecordStackTraceElement& StackElement(size_t index) const {
     return trace_->GetStackElement(index);
   }
 
  private:
   const size_t byte_count_;
+  // The klass_ could be a strong or weak root for GC
+  GcRoot<mirror::Class> klass_;
   // TODO: Currently trace_ is like a std::unique_ptr,
   // but in future with deduplication it could be a std::shared_ptr.
   const AllocRecordStackTrace* const trace_;
@@ -197,14 +207,17 @@
 
 class AllocRecordObjectMap {
  public:
-  // Since the entries contain weak roots, they need a read barrier. Do not directly access
-  // the mirror::Object pointers in it. Use functions that contain read barriers.
-  // No need for "const AllocRecord*" in the list, because all fields of AllocRecord are const.
+  // GcRoot<mirror::Object> pointers in the list are weak roots, and the last recent_record_max_
+  // number of AllocRecord::klass_ pointers are strong roots (and the rest of klass_ pointers are
+  // weak roots). The last recent_record_max_ number of pairs in the list are always kept for DDMS's
+  // recent allocation tracking, but GcRoot<mirror::Object> pointers in these pairs can become null.
+  // Both types of pointers need read barriers, do not directly access them.
   typedef std::list<std::pair<GcRoot<mirror::Object>, AllocRecord*>> EntryList;
 
   // "static" because it is part of double-checked locking. It needs to check a bool first,
   // in order to make sure the AllocRecordObjectMap object is not null.
-  static void RecordAllocation(Thread* self, mirror::Object* obj, size_t byte_count)
+  static void RecordAllocation(Thread* self, mirror::Object* obj, mirror::Class* klass,
+                               size_t byte_count)
       LOCKS_EXCLUDED(Locks::alloc_tracker_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -215,7 +228,9 @@
         recent_record_max_(kDefaultNumRecentRecords),
         max_stack_depth_(kDefaultAllocStackDepth),
         scratch_trace_(kMaxSupportedStackDepth),
-        alloc_ddm_thread_id_(0) {}
+        alloc_ddm_thread_id_(0),
+        allow_new_record_(true),
+        new_record_condition_("New allocation record condition", *Locks::alloc_tracker_lock_) {}
 
   ~AllocRecordObjectMap();
 
@@ -247,6 +262,16 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_);
 
+  void DisallowNewAllocationRecords()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_);
+  void AllowNewAllocationRecords()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_);
+  void EnsureNewAllocationRecordsDisallowed()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_);
+
   // TODO: Is there a better way to hide the entries_'s type?
   EntryList::iterator Begin()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
@@ -282,6 +307,9 @@
   size_t max_stack_depth_ GUARDED_BY(Locks::alloc_tracker_lock_);
   AllocRecordStackTrace scratch_trace_ GUARDED_BY(Locks::alloc_tracker_lock_);
   pid_t alloc_ddm_thread_id_ GUARDED_BY(Locks::alloc_tracker_lock_);
+  bool allow_new_record_ GUARDED_BY(Locks::alloc_tracker_lock_);
+  ConditionVariable new_record_condition_ GUARDED_BY(Locks::alloc_tracker_lock_);
+  // see the comment in typedef of EntryList
   EntryList entries_ GUARDED_BY(Locks::alloc_tracker_lock_);
 
   void SetProperties() EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_);
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 0ed3b6d..2e66160 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -170,7 +170,8 @@
   }
   if (kInstrumented) {
     if (IsAllocTrackingEnabled()) {
-      AllocRecordObjectMap::RecordAllocation(self, obj, bytes_allocated);
+      // Use obj->GetClass() instead of klass, because PushOnAllocationStack() could move klass
+      AllocRecordObjectMap::RecordAllocation(self, obj, obj->GetClass(), bytes_allocated);
     }
   } else {
     DCHECK(!IsAllocTrackingEnabled());
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index f0ba0bd..3dafb7d 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -3716,6 +3716,35 @@
   }
 }
 
+void Heap::AllowNewAllocationRecords() const {
+  if (IsAllocTrackingEnabled()) {
+    MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_);
+    if (IsAllocTrackingEnabled()) {
+      GetAllocationRecords()->AllowNewAllocationRecords();
+    }
+  }
+}
+
+void Heap::DisallowNewAllocationRecords() const {
+  if (IsAllocTrackingEnabled()) {
+    MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_);
+    if (IsAllocTrackingEnabled()) {
+      GetAllocationRecords()->DisallowNewAllocationRecords();
+    }
+  }
+}
+
+void Heap::EnsureNewAllocationRecordsDisallowed() const {
+  if (IsAllocTrackingEnabled()) {
+    // Lock and unlock once to ensure that no threads are still in the
+    // middle of adding new allocation records.
+    MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_);
+    if (IsAllocTrackingEnabled()) {
+      GetAllocationRecords()->EnsureNewAllocationRecordsDisallowed();
+    }
+  }
+}
+
 // Based on debug malloc logic from libc/bionic/debug_stacktrace.cpp.
 class StackCrawlState {
  public:
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 54a3235..1c75bd0 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -706,10 +706,24 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_);
 
   void VisitAllocationRecords(RootVisitor* visitor) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::alloc_tracker_lock_);
 
   void SweepAllocationRecords(IsMarkedCallback* visitor, void* arg) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::alloc_tracker_lock_);
+
+  void DisallowNewAllocationRecords() const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::alloc_tracker_lock_);
+
+  void AllowNewAllocationRecords() const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::alloc_tracker_lock_);
+
+  void EnsureNewAllocationRecordsDisallowed() const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::alloc_tracker_lock_);
 
  private:
   class ConcurrentGCTask;
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index f32d5a1..3a22b16 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -823,9 +823,14 @@
     CHECK(records != nullptr);
     HprofStackTraceSerialNumber next_trace_sn = kHprofNullStackTrace + 1;
     HprofStackFrameId next_frame_id = 0;
+    size_t count = 0;
 
     for (auto it = records->Begin(), end = records->End(); it != end; ++it) {
       const mirror::Object* obj = it->first.Read();
+      if (obj == nullptr) {
+        continue;
+      }
+      ++count;
       const gc::AllocRecordStackTrace* trace = it->second->GetStackTrace();
 
       // Copy the pair into a real hash map to speed up look up.
@@ -849,6 +854,7 @@
     }
     CHECK_EQ(traces_.size(), next_trace_sn - kHprofNullStackTrace - 1);
     CHECK_EQ(frames_.size(), next_frame_id);
+    VLOG(heap) << "hprof: found " << count << " objects with allocation stack traces";
   }
 
   // If direct_to_ddms_ is set, "filename_" and "fd" will be ignored.
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index d0f01b3..96c15ea 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1483,17 +1483,14 @@
   monitor_list_->DisallowNewMonitors();
   intern_table_->DisallowNewInterns();
   java_vm_->DisallowNewWeakGlobals();
-  // TODO: add a similar call for heap.allocation_records_, otherwise some of the newly allocated
-  // objects that are not marked might be swept from the records, making the records incomplete.
-  // It is safe for now since the only effect is that those objects do not have allocation records.
-  // The number of such objects should be small, and current allocation tracker cannot collect
-  // allocation records for all objects anyway.
+  heap_->DisallowNewAllocationRecords();
 }
 
 void Runtime::AllowNewSystemWeaks() {
   monitor_list_->AllowNewMonitors();
   intern_table_->AllowNewInterns();
   java_vm_->AllowNewWeakGlobals();
+  heap_->AllowNewAllocationRecords();
 }
 
 void Runtime::EnsureNewSystemWeaksDisallowed() {
@@ -1502,6 +1499,7 @@
   monitor_list_->EnsureNewMonitorsDisallowed();
   intern_table_->EnsureNewInternsDisallowed();
   java_vm_->EnsureNewWeakGlobalsDisallowed();
+  heap_->EnsureNewAllocationRecordsDisallowed();
 }
 
 void Runtime::SetInstructionSet(InstructionSet instruction_set) {
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index ac9656b..29c3ea9 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -230,10 +230,7 @@
 TEST_ART_BROKEN_NO_RELOCATE_TESTS :=
 
 # Tests that are broken with GC stress.
-# 098-ddmc is broken until the allocation tracker does not mark recently allocated objects as roots.
-# Marking them roots is for consistent behavior with DDMS's getRecentAllocations(). b/20037135
-TEST_ART_BROKEN_GCSTRESS_RUN_TESTS := \
-  098-ddmc
+TEST_ART_BROKEN_GCSTRESS_RUN_TESTS :=
 
 ifneq (,$(filter gcstress,$(GC_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \