Minor improvements for the CC collector.

- Split Mark() and inline its first part.
- Make sure some other routines are inlined.
- Add some UNLIKELY's.
- Use VisitConcurrentRoots().

Ritz EAAC GC time decreased from 28.9 -> 27.6s (-4.5%) on N5.

Bug: 12687968

Change-Id: I7bd13f162e7daa2a5853000fb22c5fefc318994f
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
new file mode 100644
index 0000000..26f5ad3
--- /dev/null
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_COLLECTOR_CONCURRENT_COPYING_INL_H_
+#define ART_RUNTIME_GC_COLLECTOR_CONCURRENT_COPYING_INL_H_
+
+#include "concurrent_copying.h"
+
+#include "gc/accounting/space_bitmap-inl.h"
+#include "gc/heap.h"
+#include "gc/space/region_space.h"
+#include "lock_word.h"
+
+namespace art {
+namespace gc {
+namespace collector {
+
+inline mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) {
+  if (from_ref == nullptr) {
+    return nullptr;
+  }
+  DCHECK(heap_->collector_type_ == kCollectorTypeCC);
+  if (UNLIKELY(kUseBakerReadBarrier && !is_active_)) {
+    // In the lock word forward address state, the read barrier bits
+    // in the lock word are part of the stored forwarding address and
+    // invalid. This is usually OK as the from-space copy of objects
+    // aren't accessed by mutators due to the to-space
+    // invariant. However, during the dex2oat image writing relocation
+    // and the zygote compaction, objects can be in the forward
+    // address state (to store the forward/relocation addresses) and
+    // they can still be accessed and the invalid read barrier bits
+    // are consulted. If they look like gray but aren't really, the
+    // read barriers slow path can trigger when it shouldn't. To guard
+    // against this, return here if the CC collector isn't running.
+    return from_ref;
+  }
+  DCHECK(region_space_ != nullptr) << "Read barrier slow path taken when CC isn't running?";
+  space::RegionSpace::RegionType rtype = region_space_->GetRegionType(from_ref);
+  switch (rtype) {
+    case space::RegionSpace::RegionType::kRegionTypeToSpace:
+      // It's already marked.
+      return from_ref;
+    case space::RegionSpace::RegionType::kRegionTypeFromSpace: {
+      mirror::Object* to_ref = GetFwdPtr(from_ref);
+      if (kUseBakerReadBarrier) {
+        DCHECK_NE(to_ref, ReadBarrier::GrayPtr())
+            << "from_ref=" << from_ref << " to_ref=" << to_ref;
+      }
+      if (to_ref == nullptr) {
+        // It isn't marked yet. Mark it by copying it to the to-space.
+        to_ref = Copy(from_ref);
+      }
+      DCHECK(region_space_->IsInToSpace(to_ref) || heap_->non_moving_space_->HasAddress(to_ref))
+          << "from_ref=" << from_ref << " to_ref=" << to_ref;
+      return to_ref;
+    }
+    case space::RegionSpace::RegionType::kRegionTypeUnevacFromSpace: {
+      // This may or may not succeed, which is ok.
+      if (kUseBakerReadBarrier) {
+        from_ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
+      }
+      mirror::Object* to_ref = from_ref;
+      if (region_space_bitmap_->AtomicTestAndSet(from_ref)) {
+        // Already marked.
+      } else {
+        // Newly marked.
+        if (kUseBakerReadBarrier) {
+          DCHECK_EQ(to_ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
+        }
+        PushOntoMarkStack(to_ref);
+      }
+      return to_ref;
+    }
+    case space::RegionSpace::RegionType::kRegionTypeNone:
+      return MarkNonMoving(from_ref);
+    default:
+      UNREACHABLE();
+  }
+}
+
+inline mirror::Object* ConcurrentCopying::GetFwdPtr(mirror::Object* from_ref) {
+  DCHECK(region_space_->IsInFromSpace(from_ref));
+  LockWord lw = from_ref->GetLockWord(false);
+  if (lw.GetState() == LockWord::kForwardingAddress) {
+    mirror::Object* fwd_ptr = reinterpret_cast<mirror::Object*>(lw.ForwardingAddress());
+    DCHECK(fwd_ptr != nullptr);
+    return fwd_ptr;
+  } else {
+    return nullptr;
+  }
+}
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_COLLECTOR_CONCURRENT_COPYING_INL_H_
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index d7e8f81..c73d301 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -368,30 +368,15 @@
       }
     }
   }
-  // TODO: Other garbage collectors uses Runtime::VisitConcurrentRoots(), refactor this part
-  // to also use the same function.
   {
-    TimingLogger::ScopedTiming split2("VisitConstantRoots", GetTimings());
-    Runtime::Current()->VisitConstantRoots(this);
-  }
-  {
-    TimingLogger::ScopedTiming split3("VisitInternTableRoots", GetTimings());
-    Runtime::Current()->GetInternTable()->VisitRoots(this, kVisitRootFlagAllRoots);
-  }
-  {
-    TimingLogger::ScopedTiming split4("VisitClassLinkerRoots", GetTimings());
-    Runtime::Current()->GetClassLinker()->VisitRoots(this, kVisitRootFlagAllRoots);
+    TimingLogger::ScopedTiming split2("VisitConcurrentRoots", GetTimings());
+    Runtime::Current()->VisitConcurrentRoots(this, kVisitRootFlagAllRoots);
   }
   {
     // TODO: don't visit the transaction roots if it's not active.
     TimingLogger::ScopedTiming split5("VisitNonThreadRoots", GetTimings());
     Runtime::Current()->VisitNonThreadRoots(this);
   }
-  {
-    TimingLogger::ScopedTiming split6("Dbg::VisitRoots", GetTimings());
-    Dbg::VisitRoots(this);
-  }
-  Runtime::Current()->GetHeap()->VisitAllocationRecords(this);
 
   // Immune spaces.
   for (auto& space : heap_->GetContinuousSpaces()) {
@@ -594,8 +579,8 @@
   Thread* self = Thread::Current();  // TODO: pass self as an argument from call sites?
   CHECK(thread_running_gc_ != nullptr);
   MarkStackMode mark_stack_mode = mark_stack_mode_.LoadRelaxed();
-  if (mark_stack_mode == kMarkStackModeThreadLocal) {
-    if (self == thread_running_gc_) {
+  if (LIKELY(mark_stack_mode == kMarkStackModeThreadLocal)) {
+    if (LIKELY(self == thread_running_gc_)) {
       // If GC-running thread, use the GC mark stack instead of a thread-local mark stack.
       CHECK(self->GetThreadLocalMarkStack() == nullptr);
       if (UNLIKELY(gc_mark_stack_->IsFull())) {
@@ -663,18 +648,6 @@
   return heap_->live_stack_.get();
 }
 
-inline mirror::Object* ConcurrentCopying::GetFwdPtr(mirror::Object* from_ref) {
-  DCHECK(region_space_->IsInFromSpace(from_ref));
-  LockWord lw = from_ref->GetLockWord(false);
-  if (lw.GetState() == LockWord::kForwardingAddress) {
-    mirror::Object* fwd_ptr = reinterpret_cast<mirror::Object*>(lw.ForwardingAddress());
-    CHECK(fwd_ptr != nullptr);
-    return fwd_ptr;
-  } else {
-    return nullptr;
-  }
-}
-
 // The following visitors are that used to verify that there's no
 // references to the from-space left after marking.
 class ConcurrentCopyingVerifyNoFromSpaceRefsVisitor : public SingleRootVisitor {
@@ -1080,7 +1053,7 @@
   return count;
 }
 
-void ConcurrentCopying::ProcessMarkStackRef(mirror::Object* to_ref) {
+inline void ConcurrentCopying::ProcessMarkStackRef(mirror::Object* to_ref) {
   DCHECK(!region_space_->IsInFromSpace(to_ref));
   if (kUseBakerReadBarrier) {
     DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
@@ -1095,9 +1068,10 @@
         << " " << to_ref << " " << to_ref->GetReadBarrierPointer()
         << " is_marked=" << IsMarked(to_ref);
   }
-  if (to_ref->GetClass<kVerifyNone, kWithoutReadBarrier>()->IsTypeOfReferenceClass() &&
-      to_ref->AsReference()->GetReferent<kWithoutReadBarrier>() != nullptr &&
-      !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())) {
+#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
+  if (UNLIKELY((to_ref->GetClass<kVerifyNone, kWithoutReadBarrier>()->IsTypeOfReferenceClass() &&
+                to_ref->AsReference()->GetReferent<kWithoutReadBarrier>() != nullptr &&
+                !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())))) {
     // Leave this Reference gray in the queue so that GetReferent() will trigger a read barrier. We
     // will change it to black or white later in ReferenceQueue::DequeuePendingReference().
     CHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref;
@@ -1106,14 +1080,13 @@
     // be concurrently marked after the Scan() call above has enqueued the Reference, in which case
     // the above IsInToSpace() evaluates to true and we change the color from gray to black or white
     // here in this else block.
-#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
     if (kUseBakerReadBarrier) {
       if (region_space_->IsInToSpace(to_ref)) {
         // If to-space, change from gray to white.
         bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
                                                            ReadBarrier::WhitePtr());
         CHECK(success) << "Must succeed as we won the race.";
-        CHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
+        DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
       } else {
         // If non-moving space/unevac from space, change from gray
         // to black. We can't change gray to white because it's not
@@ -1125,13 +1098,13 @@
         bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
                                                            ReadBarrier::BlackPtr());
         CHECK(success) << "Must succeed as we won the race.";
-        CHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
+        DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
       }
     }
-#else
-    DCHECK(!kUseBakerReadBarrier);
-#endif
   }
+#else
+  DCHECK(!kUseBakerReadBarrier);
+#endif
   if (ReadBarrier::kEnableToSpaceInvariantChecks || kIsDebugBuild) {
     ConcurrentCopyingAssertToSpaceInvariantObjectVisitor visitor(this);
     visitor(to_ref);
@@ -1622,6 +1595,7 @@
   }
 
   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
+      ALWAYS_INLINE
       SHARED_REQUIRES(Locks::mutator_lock_) {
     if (!root->IsNull()) {
       VisitRoot(root);
@@ -1629,6 +1603,7 @@
   }
 
   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
+      ALWAYS_INLINE
       SHARED_REQUIRES(Locks::mutator_lock_) {
     collector_->MarkRoot(root);
   }
@@ -1638,7 +1613,7 @@
 };
 
 // Scan ref fields of an object.
-void ConcurrentCopying::Scan(mirror::Object* to_ref) {
+inline void ConcurrentCopying::Scan(mirror::Object* to_ref) {
   DCHECK(!region_space_->IsInFromSpace(to_ref));
   ConcurrentCopyingRefFieldsVisitor visitor(this);
   to_ref->VisitReferences(visitor, visitor);
@@ -1648,9 +1623,6 @@
 inline void ConcurrentCopying::Process(mirror::Object* obj, MemberOffset offset) {
   mirror::Object* ref = obj->GetFieldObject<
       mirror::Object, kVerifyNone, kWithoutReadBarrier, false>(offset);
-  if (ref == nullptr || region_space_->IsInToSpace(ref)) {
-    return;
-  }
   mirror::Object* to_ref = Mark(ref);
   if (to_ref == ref) {
     return;
@@ -1669,14 +1641,11 @@
 }
 
 // Process some roots.
-void ConcurrentCopying::VisitRoots(
+inline void ConcurrentCopying::VisitRoots(
     mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED) {
   for (size_t i = 0; i < count; ++i) {
     mirror::Object** root = roots[i];
     mirror::Object* ref = *root;
-    if (ref == nullptr || region_space_->IsInToSpace(ref)) {
-      continue;
-    }
     mirror::Object* to_ref = Mark(ref);
     if (to_ref == ref) {
       continue;
@@ -1693,12 +1662,9 @@
   }
 }
 
-void ConcurrentCopying::MarkRoot(mirror::CompressedReference<mirror::Object>* root) {
+inline void ConcurrentCopying::MarkRoot(mirror::CompressedReference<mirror::Object>* root) {
   DCHECK(!root->IsNull());
   mirror::Object* const ref = root->AsMirrorPtr();
-  if (region_space_->IsInToSpace(ref)) {
-    return;
-  }
   mirror::Object* to_ref = Mark(ref);
   if (to_ref != ref) {
     auto* addr = reinterpret_cast<Atomic<mirror::CompressedReference<mirror::Object>>*>(root);
@@ -1714,7 +1680,7 @@
   }
 }
 
-void ConcurrentCopying::VisitRoots(
+inline void ConcurrentCopying::VisitRoots(
     mirror::CompressedReference<mirror::Object>** roots, size_t count,
     const RootInfo& info ATTRIBUTE_UNUSED) {
   for (size_t i = 0; i < count; ++i) {
@@ -2013,148 +1979,85 @@
   return alloc_stack->Contains(ref);
 }
 
-mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) {
-  if (from_ref == nullptr) {
-    return nullptr;
-  }
-  DCHECK(from_ref != nullptr);
-  DCHECK(heap_->collector_type_ == kCollectorTypeCC);
-  if (kUseBakerReadBarrier && !is_active_) {
-    // In the lock word forward address state, the read barrier bits
-    // in the lock word are part of the stored forwarding address and
-    // invalid. This is usually OK as the from-space copy of objects
-    // aren't accessed by mutators due to the to-space
-    // invariant. However, during the dex2oat image writing relocation
-    // and the zygote compaction, objects can be in the forward
-    // address state (to store the forward/relocation addresses) and
-    // they can still be accessed and the invalid read barrier bits
-    // are consulted. If they look like gray but aren't really, the
-    // read barriers slow path can trigger when it shouldn't. To guard
-    // against this, return here if the CC collector isn't running.
-    return from_ref;
-  }
-  DCHECK(region_space_ != nullptr) << "Read barrier slow path taken when CC isn't running?";
-  space::RegionSpace::RegionType rtype = region_space_->GetRegionType(from_ref);
-  if (rtype == space::RegionSpace::RegionType::kRegionTypeToSpace) {
-    // It's already marked.
-    return from_ref;
-  }
-  mirror::Object* to_ref;
-  if (rtype == space::RegionSpace::RegionType::kRegionTypeFromSpace) {
-    to_ref = GetFwdPtr(from_ref);
-    if (kUseBakerReadBarrier) {
-      DCHECK(to_ref != ReadBarrier::GrayPtr()) << "from_ref=" << from_ref << " to_ref=" << to_ref;
+mirror::Object* ConcurrentCopying::MarkNonMoving(mirror::Object* ref) {
+  // ref is in a non-moving space (from_ref == to_ref).
+  DCHECK(!region_space_->HasAddress(ref)) << ref;
+  if (immune_region_.ContainsObject(ref)) {
+    accounting::ContinuousSpaceBitmap* cc_bitmap =
+        cc_heap_bitmap_->GetContinuousSpaceBitmap(ref);
+    DCHECK(cc_bitmap != nullptr)
+        << "An immune space object must have a bitmap";
+    if (kIsDebugBuild) {
+      DCHECK(heap_mark_bitmap_->GetContinuousSpaceBitmap(ref)->Test(ref))
+          << "Immune space object must be already marked";
     }
-    if (to_ref == nullptr) {
-      // It isn't marked yet. Mark it by copying it to the to-space.
-      to_ref = Copy(from_ref);
-    }
-    DCHECK(region_space_->IsInToSpace(to_ref) || heap_->non_moving_space_->HasAddress(to_ref))
-        << "from_ref=" << from_ref << " to_ref=" << to_ref;
-  } else if (rtype == space::RegionSpace::RegionType::kRegionTypeUnevacFromSpace) {
     // This may or may not succeed, which is ok.
     if (kUseBakerReadBarrier) {
-      from_ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
+      ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
     }
-    if (region_space_bitmap_->AtomicTestAndSet(from_ref)) {
+    if (cc_bitmap->AtomicTestAndSet(ref)) {
       // Already marked.
-      to_ref = from_ref;
     } else {
       // Newly marked.
-      to_ref = from_ref;
       if (kUseBakerReadBarrier) {
-        DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
+        DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
       }
-      PushOntoMarkStack(to_ref);
+      PushOntoMarkStack(ref);
     }
   } else {
-    // from_ref is in a non-moving space.
-    DCHECK(!region_space_->HasAddress(from_ref)) << from_ref;
-    if (immune_region_.ContainsObject(from_ref)) {
-      accounting::ContinuousSpaceBitmap* cc_bitmap =
-          cc_heap_bitmap_->GetContinuousSpaceBitmap(from_ref);
-      DCHECK(cc_bitmap != nullptr)
-          << "An immune space object must have a bitmap";
-      if (kIsDebugBuild) {
-        DCHECK(heap_mark_bitmap_->GetContinuousSpaceBitmap(from_ref)->Test(from_ref))
-            << "Immune space object must be already marked";
-      }
-      // This may or may not succeed, which is ok.
+    // Use the mark bitmap.
+    accounting::ContinuousSpaceBitmap* mark_bitmap =
+        heap_mark_bitmap_->GetContinuousSpaceBitmap(ref);
+    accounting::LargeObjectBitmap* los_bitmap =
+        heap_mark_bitmap_->GetLargeObjectBitmap(ref);
+    CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range";
+    bool is_los = mark_bitmap == nullptr;
+    if (!is_los && mark_bitmap->Test(ref)) {
+      // Already marked.
       if (kUseBakerReadBarrier) {
-        from_ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
+        DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
+               ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
       }
-      if (cc_bitmap->AtomicTestAndSet(from_ref)) {
-        // Already marked.
-        to_ref = from_ref;
-      } else {
-        // Newly marked.
-        to_ref = from_ref;
-        if (kUseBakerReadBarrier) {
-          DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
-        }
-        PushOntoMarkStack(to_ref);
+    } else if (is_los && los_bitmap->Test(ref)) {
+      // Already marked in LOS.
+      if (kUseBakerReadBarrier) {
+        DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
+               ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
       }
     } else {
-      // Use the mark bitmap.
-      accounting::ContinuousSpaceBitmap* mark_bitmap =
-          heap_mark_bitmap_->GetContinuousSpaceBitmap(from_ref);
-      accounting::LargeObjectBitmap* los_bitmap =
-          heap_mark_bitmap_->GetLargeObjectBitmap(from_ref);
-      CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range";
-      bool is_los = mark_bitmap == nullptr;
-      if (!is_los && mark_bitmap->Test(from_ref)) {
-        // Already marked.
-        to_ref = from_ref;
-        if (kUseBakerReadBarrier) {
-          DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
-                 to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
+      // Not marked.
+      if (IsOnAllocStack(ref)) {
+        // If it's on the allocation stack, it's considered marked. Keep it white.
+        // Objects on the allocation stack need not be marked.
+        if (!is_los) {
+          DCHECK(!mark_bitmap->Test(ref));
+        } else {
+          DCHECK(!los_bitmap->Test(ref));
         }
-      } else if (is_los && los_bitmap->Test(from_ref)) {
-        // Already marked in LOS.
-        to_ref = from_ref;
         if (kUseBakerReadBarrier) {
-          DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
-                 to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
+          DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
         }
       } else {
-        // Not marked.
-        if (IsOnAllocStack(from_ref)) {
-          // If it's on the allocation stack, it's considered marked. Keep it white.
-          to_ref = from_ref;
-          // Objects on the allocation stack need not be marked.
-          if (!is_los) {
-            DCHECK(!mark_bitmap->Test(to_ref));
-          } else {
-            DCHECK(!los_bitmap->Test(to_ref));
-          }
-          if (kUseBakerReadBarrier) {
-            DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
-          }
+        // Not marked or on the allocation stack. Try to mark it.
+        // This may or may not succeed, which is ok.
+        if (kUseBakerReadBarrier) {
+          ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
+        }
+        if (!is_los && mark_bitmap->AtomicTestAndSet(ref)) {
+          // Already marked.
+        } else if (is_los && los_bitmap->AtomicTestAndSet(ref)) {
+          // Already marked in LOS.
         } else {
-          // Not marked or on the allocation stack. Try to mark it.
-          // This may or may not succeed, which is ok.
+          // Newly marked.
           if (kUseBakerReadBarrier) {
-            from_ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
+            DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
           }
-          if (!is_los && mark_bitmap->AtomicTestAndSet(from_ref)) {
-            // Already marked.
-            to_ref = from_ref;
-          } else if (is_los && los_bitmap->AtomicTestAndSet(from_ref)) {
-            // Already marked in LOS.
-            to_ref = from_ref;
-          } else {
-            // Newly marked.
-            to_ref = from_ref;
-            if (kUseBakerReadBarrier) {
-              DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
-            }
-            PushOntoMarkStack(to_ref);
-          }
+          PushOntoMarkStack(ref);
         }
       }
     }
   }
-  return to_ref;
+  return ref;
 }
 
 void ConcurrentCopying::FinishPhase() {
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index c32b19e..27726e2 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -93,7 +93,7 @@
     DCHECK(ref != nullptr);
     return IsMarked(ref) == ref;
   }
-  mirror::Object* Mark(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_)
+  ALWAYS_INLINE mirror::Object* Mark(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
   bool IsMarking() const {
     return is_marking_;
@@ -183,6 +183,8 @@
   void DisableMarking() SHARED_REQUIRES(Locks::mutator_lock_);
   void IssueDisableMarkingCheckpoint() SHARED_REQUIRES(Locks::mutator_lock_);
   void ExpandGcMarkStack() SHARED_REQUIRES(Locks::mutator_lock_);
+  mirror::Object* MarkNonMoving(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
 
   space::RegionSpace* region_space_;      // The underlying region space.
   std::unique_ptr<Barrier> gc_barrier_;
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index f75b8ae..4364d94 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -99,7 +99,7 @@
 #ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
   NO_RETURN
 #endif
-  bool AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr)
+  ALWAYS_INLINE bool AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void AssertReadBarrierPointer() const SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index 5b73557..5337760 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -270,7 +270,7 @@
 }
 
 template<class T> template<typename Visitor>
-void ObjectArray<T>::VisitReferences(const Visitor& visitor) {
+inline void ObjectArray<T>::VisitReferences(const Visitor& visitor) {
   const size_t length = static_cast<size_t>(GetLength());
   for (size_t i = 0; i < length; ++i) {
     visitor(this, OffsetOfElement(i), false);
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index 85ac4aa..b88cb5b 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -19,7 +19,7 @@
 
 #include "read_barrier.h"
 
-#include "gc/collector/concurrent_copying.h"
+#include "gc/collector/concurrent_copying-inl.h"
 #include "gc/heap.h"
 #include "mirror/object_reference.h"
 #include "mirror/reference.h"