Remove bitmap lock from checkpoint root marking.

Should reduce how long each mutator is paused during the checkpoint
root marking since there is no more contention on the heap bitmap
lock.

Change-Id: I6048d785952e1da807d74eeea1d9958090a22b73
diff --git a/src/gc/mark_sweep.cc b/src/gc/mark_sweep.cc
index f2617e4..0976f94 100644
--- a/src/gc/mark_sweep.cc
+++ b/src/gc/mark_sweep.cc
@@ -86,7 +86,8 @@
       work_chunks_created_(0), work_chunks_deleted_(0),
       reference_count_(0),
       gc_barrier_(new Barrier),
-      large_object_lock_("large object lock") {
+      large_object_lock_("large object lock"),
+      mark_stack_expand_lock_("mark stack expand lock") {
   DCHECK(mark_stack_ != NULL);
 }
 
@@ -113,6 +114,33 @@
   LOG(FATAL) << "Could not find a default mark bitmap";
 }
 
+void MarkSweep::ExpandMarkStack() {
+  // Rare case, no need to have Thread::Current be a parameter.
+  MutexLock mu(Thread::Current(), mark_stack_expand_lock_);
+  if (UNLIKELY(mark_stack_->Size() < mark_stack_->Capacity())) {
+    // Someone else acquired the lock and expanded the mark stack before us.
+    return;
+  }
+  std::vector<Object*> temp;
+  temp.insert(temp.begin(), mark_stack_->Begin(), mark_stack_->End());
+  mark_stack_->Resize(mark_stack_->Capacity() * 2);
+  for (size_t i = 0; i < temp.size(); ++i) {
+    mark_stack_->PushBack(temp[i]);
+  }
+}
+
+inline void MarkSweep::MarkObjectNonNullParallel(const Object* obj, bool check_finger) {
+  DCHECK(obj != NULL);
+  if (MarkObjectParallel(obj)) {
+    if (kDisableFinger || (check_finger && obj < finger_)) {
+      while (UNLIKELY(!mark_stack_->AtomicPushBack(const_cast<Object*>(obj)))) {
+        // Only reason a push can fail is that the mark stack is full.
+        ExpandMarkStack();
+      }
+    }
+  }
+}
+
 inline void MarkSweep::MarkObjectNonNull(const Object* obj, bool check_finger) {
   DCHECK(obj != NULL);
 
@@ -140,12 +168,7 @@
     if (kDisableFinger || (check_finger && obj < finger_)) {
       // Do we need to expand the mark stack?
       if (UNLIKELY(mark_stack_->Size() >= mark_stack_->Capacity())) {
-        std::vector<Object*> temp;
-        temp.insert(temp.begin(), mark_stack_->Begin(), mark_stack_->End());
-        mark_stack_->Resize(mark_stack_->Capacity() * 2);
-        for (size_t i = 0; i < temp.size(); ++i) {
-          mark_stack_->PushBack(temp[i]);
-        }
+        ExpandMarkStack();
       }
       // The object must be pushed on to the mark stack.
       mark_stack_->PushBack(const_cast<Object*>(obj));
@@ -215,6 +238,13 @@
   }
 }
 
+void MarkSweep::MarkRootParallelCallback(const Object* root, void* arg) {
+  DCHECK(root != NULL);
+  DCHECK(arg != NULL);
+  MarkSweep* mark_sweep = reinterpret_cast<MarkSweep*>(arg);
+  mark_sweep->MarkObjectNonNullParallel(root, false);
+}
+
 void MarkSweep::MarkObjectCallback(const Object* root, void* arg) {
   DCHECK(root != NULL);
   DCHECK(arg != NULL);
@@ -572,8 +602,7 @@
     Thread* self = Thread::Current();
     CHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc)
         << thread->GetState() << " thread " << thread << " self " << self;
-    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    thread->VisitRoots(MarkSweep::MarkObjectCallback, mark_sweep_);
+    thread->VisitRoots(MarkSweep::MarkRootParallelCallback, mark_sweep_);
     mark_sweep_->GetBarrier().Pass(self);
   }
 
diff --git a/src/gc/mark_sweep.h b/src/gc/mark_sweep.h
index 2297ce1..98445d4 100644
--- a/src/gc/mark_sweep.h
+++ b/src/gc/mark_sweep.h
@@ -237,6 +237,8 @@
   static void MarkObjectCallback(const Object* root, void* arg)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
+  static void MarkRootParallelCallback(const Object* root, void* arg);
+
   // Marks an object.
   void MarkObject(const Object* obj)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
@@ -260,23 +262,17 @@
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_,
                             Locks::mutator_lock_);
 
-  static void ScanDirtyCardCallback(Object* obj, void* arg)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   void MarkObjectNonNull(const Object* obj, bool check_finger)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
+  void MarkObjectNonNullParallel(const Object* obj, bool check_finger);
+
   bool MarkLargeObject(const Object* obj)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Returns true if we need to add obj to a mark stack.
   bool MarkObjectParallel(const Object* obj) NO_THREAD_SAFETY_ANALYSIS;
 
-  static void ScanBitmapCallback(Object* obj, void* finger, void* arg)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   static void SweepCallback(size_t num_ptrs, Object** ptrs, void* arg)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -295,6 +291,9 @@
   void VerifyRoots()
       NO_THREAD_SAFETY_ANALYSIS;
 
+  // Expand mark stack to 2x its current size. Thread safe.
+  void ExpandMarkStack();
+
   static void VerifyRootCallback(const Object* root, void* arg, size_t vreg,
                                  const AbstractMethod* method);
 
@@ -462,7 +461,8 @@
   AtomicInteger reference_count_;
 
   UniquePtr<Barrier> gc_barrier_;
-  Mutex large_object_lock_;
+  Mutex large_object_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  Mutex mark_stack_expand_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
 
   friend class AddIfReachesAllocSpaceVisitor; // Used by mod-union table.
   friend class CheckBitmapVisitor;