Clean up the locks around Heap::VisitObjects().

This is so that we could support suspending all threads when visiting
objects in the presence of a concurrent, moving collector.

Bug: 12687968
Change-Id: Icc8e60630465afde948ebc6ea91d4ebaff5d7837
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 8c7d611..ba02338 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -501,19 +501,14 @@
   size_t total_strings = 0;
   gc::Heap* heap = Runtime::Current()->GetHeap();
   ClassLinker* cl = Runtime::Current()->GetClassLinker();
-  {
-    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
-    heap->VisitObjects(CountStringsCallback, &total_strings);  // Count the strings.
-  }
+  // Count the strings.
+  heap->VisitObjects(CountStringsCallback, &total_strings);
   Thread* self = Thread::Current();
   StackHandleScope<1> hs(self);
   auto strings = hs.NewHandle(cl->AllocStringArray(self, total_strings));
   StringCollector string_collector(strings, 0U);
-  {
-    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
-    // Read strings into the array.
-    heap->VisitObjects(StringCollector::Callback, &string_collector);
-  }
+  // Read strings into the array.
+  heap->VisitObjects(StringCollector::Callback, &string_collector);
   // Some strings could have gotten freed if AllocStringArray caused a GC.
   CHECK_LE(string_collector.GetIndex(), total_strings);
   total_strings = string_collector.GetIndex();
@@ -595,7 +590,6 @@
 }
 
 void ImageWriter::ComputeEagerResolvedStrings() {
-  ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
   Runtime::Current()->GetHeap()->VisitObjects(ComputeEagerResolvedStringsCallback, this);
 }
 
@@ -668,7 +662,6 @@
 void ImageWriter::CheckNonImageClassesRemoved() {
   if (compiler_driver_.GetImageClasses() != nullptr) {
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
     heap->VisitObjects(CheckNonImageClassesRemovedCallback, this);
   }
 }
@@ -869,17 +862,14 @@
   // know where image_roots is going to end up
   image_end_ += RoundUp(sizeof(ImageHeader), kObjectAlignment);  // 64-bit-alignment
 
-  {
-    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    // TODO: Image spaces only?
-    DCHECK_LT(image_end_, image_->Size());
-    image_objects_offset_begin_ = image_end_;
-    // Clear any pre-existing monitors which may have been in the monitor words, assign bin slots.
-    heap->VisitObjects(WalkFieldsCallback, this);
-    // Transform each object's bin slot into an offset which will be used to do the final copy.
-    heap->VisitObjects(UnbinObjectsIntoOffsetCallback, this);
-    DCHECK(saved_hashes_map_.empty());  // All binslot hashes should've been put into vector by now.
-  }
+  // TODO: Image spaces only?
+  DCHECK_LT(image_end_, image_->Size());
+  image_objects_offset_begin_ = image_end_;
+  // Clear any pre-existing monitors which may have been in the monitor words, assign bin slots.
+  heap->VisitObjects(WalkFieldsCallback, this);
+  // Transform each object's bin slot into an offset which will be used to do the final copy.
+  heap->VisitObjects(UnbinObjectsIntoOffsetCallback, this);
+  DCHECK(saved_hashes_map_.empty());  // All binslot hashes should've been put into vector by now.
 
   DCHECK_GT(image_end_, GetBinSizeSum());
 
@@ -920,7 +910,6 @@
   // TODO: heap validation can't handle this fix up pass
   heap->DisableObjectValidation();
   // TODO: Image spaces only?
-  WriterMutexLock mu(ants.Self(), *Locks::heap_bitmap_lock_);
   heap->VisitObjects(CopyAndFixupObjectsCallback, this);
   // Fix up the object previously had hash codes.
   for (const std::pair<mirror::Object*, uint32_t>& hash_pair : saved_hashes_) {
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 05b6b1d..00f3855 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1714,7 +1714,6 @@
   // Set entry point to interpreter if in InterpretOnly mode.
   Runtime* runtime = Runtime::Current();
   if (!runtime->IsCompiler() && runtime->GetInstrumentation()->InterpretOnly()) {
-    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
     heap->VisitObjects(InitFromImageInterpretOnlyCallback, this);
   }
 
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index c595de7..c63e2d7 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -2194,6 +2194,7 @@
     case kWaitingForJniOnLoad:
     case kWaitingForMethodTracingStart:
     case kWaitingForSignalCatcherOutput:
+    case kWaitingForVisitObjects:
     case kWaitingInMainDebuggerLoop:
     case kWaitingInMainSignalCatcherLoop:
     case kWaitingPerformingGc:
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 553a5d3..eed9352 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -704,8 +704,29 @@
 }
 
 void Heap::VisitObjects(ObjectCallback callback, void* arg) {
-  // GCs can move objects, so don't allow this.
-  ScopedAssertNoThreadSuspension ants(Thread::Current(), "Visiting objects");
+  Thread* self = Thread::Current();
+  if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
+    // Threads are already suspended.
+    VisitObjectsInternal(callback, arg);
+  } else if (IsGcConcurrent() && IsMovingGc(collector_type_)) {
+    // Concurrent moving GC. Suspend all threads and visit objects.
+    DCHECK_EQ(collector_type_, foreground_collector_type_);
+    DCHECK_EQ(foreground_collector_type_, background_collector_type_)
+        << "Assume no transition such that collector_type_ won't change";
+    self->TransitionFromRunnableToSuspended(kWaitingForVisitObjects);
+    ThreadList* tl = Runtime::Current()->GetThreadList();
+    tl->SuspendAll();
+    VisitObjectsInternal(callback, arg);
+    tl->ResumeAll();
+    self->TransitionFromSuspendedToRunnable();
+  } else {
+    // GCs can move objects, so don't allow this.
+    ScopedAssertNoThreadSuspension ants(self, "Visiting objects");
+    VisitObjectsInternal(callback, arg);
+  }
+}
+
+void Heap::VisitObjectsInternal(ObjectCallback callback, void* arg) {
   if (bump_pointer_space_ != nullptr) {
     // Visit objects in bump pointer space.
     bump_pointer_space_->Walk(callback, arg);
@@ -721,7 +742,10 @@
       callback(obj, arg);
     }
   }
-  GetLiveBitmap()->Walk(callback, arg);
+  {
+    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    GetLiveBitmap()->Walk(callback, arg);
+  }
 }
 
 void Heap::MarkAllocStackAsLive(accounting::ObjectStack* stack) {
@@ -1459,10 +1483,7 @@
 
 void Heap::CountInstances(const std::vector<mirror::Class*>& classes, bool use_is_assignable_from,
                           uint64_t* counts) {
-  // Can't do any GC in this function since this may move classes.
-  ScopedAssertNoThreadSuspension ants(Thread::Current(), "CountInstances");
   InstanceCounter counter(classes, use_is_assignable_from, counts);
-  ReaderMutexLock mu(ants.Self(), *Locks::heap_bitmap_lock_);
   VisitObjects(InstanceCounter::Callback, &counter);
 }
 
@@ -1493,10 +1514,7 @@
 
 void Heap::GetInstances(mirror::Class* c, int32_t max_count,
                         std::vector<mirror::Object*>& instances) {
-  // Can't do any GC in this function since this may move classes.
-  ScopedAssertNoThreadSuspension ants(Thread::Current(), "GetInstances");
   InstanceCollector collector(c, max_count, instances);
-  ReaderMutexLock mu(ants.Self(), *Locks::heap_bitmap_lock_);
   VisitObjects(&InstanceCollector::Callback, &collector);
 }
 
@@ -1538,10 +1556,7 @@
 
 void Heap::GetReferringObjects(mirror::Object* o, int32_t max_count,
                                std::vector<mirror::Object*>& referring_objects) {
-  // Can't do any GC in this function since this may move the object o.
-  ScopedAssertNoThreadSuspension ants(Thread::Current(), "GetReferringObjects");
   ReferringObjectsFinder finder(o, max_count, referring_objects);
-  ReaderMutexLock mu(ants.Self(), *Locks::heap_bitmap_lock_);
   VisitObjects(&ReferringObjectsFinder::Callback, &finder);
 }
 
@@ -2702,7 +2717,6 @@
   TimingLogger::ScopedTiming t(__FUNCTION__, timings);
   if (verify_pre_gc_heap_) {
     TimingLogger::ScopedTiming t2("(Paused)PreGcVerifyHeapReferences", timings);
-    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
     size_t failures = VerifyHeapReferences();
     if (failures > 0) {
       LOG(FATAL) << "Pre " << gc->GetName() << " heap verification failed with " << failures
@@ -2754,9 +2768,11 @@
   if (verify_pre_sweeping_heap_) {
     TimingLogger::ScopedTiming t2("(Paused)PostSweepingVerifyHeapReferences", timings);
     CHECK_NE(self->GetState(), kRunnable);
-    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    // Swapping bound bitmaps does nothing.
-    gc->SwapBitmaps();
+    {
+      WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+      // Swapping bound bitmaps does nothing.
+      gc->SwapBitmaps();
+    }
     // Pass in false since concurrent reference processing can mean that the reference referents
     // may point to dead objects at the point which PreSweepingGcVerification is called.
     size_t failures = VerifyHeapReferences(false);
@@ -2764,7 +2780,10 @@
       LOG(FATAL) << "Pre sweeping " << gc->GetName() << " GC verification failed with " << failures
           << " failures";
     }
-    gc->SwapBitmaps();
+    {
+      WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+      gc->SwapBitmaps();
+    }
   }
   if (verify_pre_sweeping_rosalloc_) {
     RosAllocVerification(timings, "PreSweepingRosAllocVerification");
@@ -2786,7 +2805,6 @@
   }
   if (verify_post_gc_heap_) {
     TimingLogger::ScopedTiming t2("(Paused)PostGcVerifyHeapReferences", timings);
-    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
     size_t failures = VerifyHeapReferences();
     if (failures > 0) {
       LOG(FATAL) << "Pre " << gc->GetName() << " heap verification failed with " << failures
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index fc61fc5..36a3767 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -216,7 +216,11 @@
 
   // Visit all of the live objects in the heap.
   void VisitObjects(ObjectCallback callback, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+  void VisitObjectsInternal(ObjectCallback callback, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
 
   void CheckPreconditionsForAllocObject(mirror::Class* c, size_t byte_count)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -245,7 +249,7 @@
   void VerifyHeap() LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
   // Returns how many failures occured.
   size_t VerifyHeapReferences(bool verify_referents = true)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool VerifyMissingCardMarks()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
@@ -741,7 +745,8 @@
   void PrePauseRosAllocVerification(collector::GarbageCollector* gc)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
   void PreSweepingGcVerification(collector::GarbageCollector* gc)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
   void PostGcVerification(collector::GarbageCollector* gc)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
   void PostGcVerificationPaused(collector::GarbageCollector* gc)
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 040757b..0b04276 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -421,7 +421,6 @@
   void Dump()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::heap_bitmap_lock_) {
-    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
     // First pass to measure the size of the dump.
     size_t overall_size;
     size_t max_length;
@@ -487,8 +486,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void ProcessHeap(EndianOutput* output, bool header_first)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Reset current heap and object count.
     current_heap_ = HPROF_HEAP_DEFAULT;
     objects_in_segment_ = 0;
@@ -502,8 +500,7 @@
     }
   }
 
-  void ProcessBody(EndianOutput* output) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+  void ProcessBody(EndianOutput* output) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Runtime* runtime = Runtime::Current();
     // Walk the roots and the heap.
     output->StartNewRecord(HPROF_TAG_HEAP_DUMP_SEGMENT, kHprofTime);
@@ -646,8 +643,7 @@
   }
 
   bool DumpToDdmsBuffered(size_t overall_size ATTRIBUTE_UNUSED, size_t max_length ATTRIBUTE_UNUSED)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
     LOG(FATAL) << "Unimplemented";
     UNREACHABLE();
     //        // Send the data off to DDMS.
@@ -660,8 +656,7 @@
   }
 
   bool DumpToFile(size_t overall_size, size_t max_length)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Where exactly are we writing to?
     int out_fd;
     if (fd_ >= 0) {
@@ -708,8 +703,7 @@
   }
 
   bool DumpToDdmsDirect(size_t overall_size, size_t max_length, uint32_t chunk_type)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
     CHECK(direct_to_ddms_);
     JDWP::JdwpState* state = Dbg::GetJdwpState();
     CHECK(state != nullptr);
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index 760eb9b..e4b8db1 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -88,6 +88,7 @@
     case kWaitingForSignalCatcherOutput:  return kJavaWaiting;
     case kWaitingInMainSignalCatcherLoop: return kJavaWaiting;
     case kWaitingForMethodTracingStart:   return kJavaWaiting;
+    case kWaitingForVisitObjects:         return kJavaWaiting;
     case kSuspended:                      return kJavaRunnable;
     // Don't add a 'default' here so the compiler can spot incompatible enum changes.
   }
diff --git a/runtime/thread_state.h b/runtime/thread_state.h
index 6e5deeb..b5479ed 100644
--- a/runtime/thread_state.h
+++ b/runtime/thread_state.h
@@ -41,6 +41,7 @@
   kWaitingInMainSignalCatcherLoop,  // WAITING        TS_WAIT      blocking/reading/processing signals
   kWaitingForDeoptimization,        // WAITING        TS_WAIT      waiting for deoptimization suspend all
   kWaitingForMethodTracingStart,    // WAITING        TS_WAIT      waiting for method tracing to start
+  kWaitingForVisitObjects,          // WAITING        TS_WAIT      waiting for visiting objects
   kStarting,                        // NEW            TS_WAIT      native thread started, not yet ready to run managed code
   kNative,                          // RUNNABLE       TS_RUNNING   running in a JNI native method
   kSuspended,                       // RUNNABLE       TS_RUNNING   suspended by GC or debugger