Revoke thread-local mark stack for CC in ~Thread

Move revoking thread-local mark stack from Thread::Destroy() to
Thread::~Thread. We also don't need to perform this revocation with
ScopedObjectAccess.
Also, updated the assertion in RevokeThreadLocalMarkStack for cases
where an exiting thread during marking phase of the 2-phase full-heap GC
invokes it.

Bug: 140119552
Test: art/testrunner/testrunner.py
Change-Id: I64c6cc52ca802bd3ddfb210ac9254f37f059ea03
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index a1663c8..4a6869f 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -2027,7 +2027,9 @@
   CHECK_EQ(self, thread);
   accounting::AtomicStack<mirror::Object>* tl_mark_stack = thread->GetThreadLocalMarkStack();
   if (tl_mark_stack != nullptr) {
-    CHECK(is_marking_);
+    // With 2-phase CC change, we cannot assert that is_marking_ will always be true
+    // as we perform thread stack scan even before enabling the read-barrier.
+    CHECK(is_marking_ || (use_generational_cc_ && !young_gen_));
     MutexLock mu(self, mark_stack_lock_);
     revoked_mark_stacks_.push_back(tl_mark_stack);
     RemoveThreadMarkStackMapping(thread, tl_mark_stack);
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 59a38e1..36c35f8 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2439,13 +2439,19 @@
   {
     ScopedObjectAccess soa(self);
     Runtime::Current()->GetHeap()->RevokeThreadLocalBuffers(this);
-    if (kUseReadBarrier) {
-      Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->RevokeThreadLocalMarkStack(this);
-    }
   }
 }
 
 Thread::~Thread() {
+  if (kUseReadBarrier) {
+    // It's a cheap operation so can be done in the destructor (instead of
+    // Destroy()).
+    // Doing it without mutator_lock mutual exclusion is also necessary as there
+    // is a checkpoint in ConcurrentCopying which scans the threads' stacks,
+    // thereby assigning a thread-local mark-stack to self, breaking some
+    // assumptions about how the GC works (see b/140119552).
+    Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->RevokeThreadLocalMarkStack(this);
+  }
   CHECK(tlsPtr_.class_loader_override == nullptr);
   CHECK(tlsPtr_.jpeer == nullptr);
   CHECK(tlsPtr_.opeer == nullptr);