ART: clear dirty cards of alloc space for MS/CMS partial and full GCs

For MS/CMS partial and full GCs, we could clear the dirty cards of alloc
space when we process cards as we care about the dirty cards after GC
starts.

Change-Id: I1f9b32b20d75979387bc5d26b0cf9a256dcf20b6
Signed-off-by: Lei Li <lei.l.li@intel.com>
diff --git a/runtime/gc/accounting/card_table.cc b/runtime/gc/accounting/card_table.cc
index b7b6099..ca1e7c1 100644
--- a/runtime/gc/accounting/card_table.cc
+++ b/runtime/gc/accounting/card_table.cc
@@ -102,6 +102,26 @@
   mem_map_->MadviseDontNeedAndZero();
 }
 
+void CardTable::ClearCardRange(uint8_t* start, uint8_t* end) {
+  if (!kMadviseZeroes) {
+    memset(start, 0, end - start);
+    return;
+  }
+  CHECK_ALIGNED(reinterpret_cast<uintptr_t>(start), kCardSize);
+  CHECK_ALIGNED(reinterpret_cast<uintptr_t>(end), kCardSize);
+  static_assert(kCardClean == 0, "kCardClean must be 0");
+  uint8_t* start_card = CardFromAddr(start);
+  uint8_t* end_card = CardFromAddr(end);
+  uint8_t* round_start = AlignUp(start_card, kPageSize);
+  uint8_t* round_end = AlignDown(end_card, kPageSize);
+  if (round_start < round_end) {
+    madvise(round_start, round_end - round_start, MADV_DONTNEED);
+  }
+  // Handle unaligned regions at start / end.
+  memset(start_card, 0, std::min(round_start, end_card) - start_card);
+  memset(std::max(round_end, start_card), 0, end_card - std::max(round_end, start_card));
+}
+
 bool CardTable::AddrIsInCardTable(const void* addr) const {
   return IsValidCard(biased_begin_ + ((uintptr_t)addr >> kCardShift));
 }
diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h
index a84cf34..3ea7651 100644
--- a/runtime/gc/accounting/card_table.h
+++ b/runtime/gc/accounting/card_table.h
@@ -113,6 +113,7 @@
 
   // Resets all of the bytes in the card table to clean.
   void ClearCardTable();
+  void ClearCardRange(uint8_t* start, uint8_t* end);
 
   // Resets all of the bytes in the card table which do not map to the image space.
   void ClearSpaceCards(space::ContinuousSpace* space);
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index 18af005..cdc398c 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -197,7 +197,7 @@
   BindBitmaps();
   t.NewTiming("ProcessCards");
   // Process dirty cards and add dirty cards to mod-union tables.
-  heap_->ProcessCards(GetTimings(), false);
+  heap_->ProcessCards(GetTimings(), false, false, true);
   // Clear the whole card table since we can not Get any additional dirty cards during the
   // paused GC. This saves memory but only works for pause the world collectors.
   t.NewTiming("ClearCardTable");
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 04fb694..64d2cc4 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -217,7 +217,7 @@
     Thread* self = Thread::Current();
     CHECK(!Locks::mutator_lock_->IsExclusiveHeld(self));
     // Process dirty cards and add dirty cards to mod union tables, also ages cards.
-    heap_->ProcessCards(GetTimings(), false);
+    heap_->ProcessCards(GetTimings(), false, true, false);
     // The checkpoint root marking is required to avoid a race condition which occurs if the
     // following happens during a reference write:
     // 1. mutator dirties the card (write barrier)
@@ -255,7 +255,8 @@
   BindBitmaps();
   FindDefaultSpaceBitmap();
   // Process dirty cards and add dirty cards to mod union tables.
-  heap_->ProcessCards(GetTimings(), false);
+  // If the GC type is non sticky, then we just clear the cards instead of ageing them.
+  heap_->ProcessCards(GetTimings(), false, true, GetGcType() != kGcTypeSticky);
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   MarkRoots(self);
   MarkReachableObjects();
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index fcc601f..4b95d10 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -216,7 +216,7 @@
   // Assume the cleared space is already empty.
   BindBitmaps();
   // Process dirty cards and add dirty cards to mod-union tables.
-  heap_->ProcessCards(GetTimings(), kUseRememberedSet && generational_);
+  heap_->ProcessCards(GetTimings(), kUseRememberedSet && generational_, false, true);
   // Clear the whole card table since we can not Get any additional dirty cards during the
   // paused GC. This saves memory but only works for pause the world collectors.
   t.NewTiming("ClearCardTable");
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index ba06e05..01d467a 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -2760,7 +2760,8 @@
   return it->second;
 }
 
-void Heap::ProcessCards(TimingLogger* timings, bool use_rem_sets) {
+void Heap::ProcessCards(TimingLogger* timings, bool use_rem_sets, bool process_alloc_space_cards,
+                        bool clear_alloc_space_cards) {
   TimingLogger::ScopedTiming t(__FUNCTION__, timings);
   // Clear cards and keep track of cards cleared in the mod-union table.
   for (const auto& space : continuous_spaces_) {
@@ -2776,17 +2777,21 @@
           << static_cast<int>(collector_type_);
       TimingLogger::ScopedTiming t2("AllocSpaceRemSetClearCards", timings);
       rem_set->ClearCards();
-    } else if (space->GetType() != space::kSpaceTypeBumpPointerSpace) {
+    } else if (process_alloc_space_cards) {
       TimingLogger::ScopedTiming t2("AllocSpaceClearCards", timings);
-      // No mod union table for the AllocSpace. Age the cards so that the GC knows that these cards
-      // were dirty before the GC started.
-      // TODO: Need to use atomic for the case where aged(cleaning thread) -> dirty(other thread)
-      // -> clean(cleaning thread).
-      // The races are we either end up with: Aged card, unaged card. Since we have the checkpoint
-      // roots and then we scan / update mod union tables after. We will always scan either card.
-      // If we end up with the non aged card, we scan it it in the pause.
-      card_table_->ModifyCardsAtomic(space->Begin(), space->End(), AgeCardVisitor(),
-                                     VoidFunctor());
+      if (clear_alloc_space_cards) {
+        card_table_->ClearCardRange(space->Begin(), space->End());
+      } else {
+        // No mod union table for the AllocSpace. Age the cards so that the GC knows that these
+        // cards were dirty before the GC started.
+        // TODO: Need to use atomic for the case where aged(cleaning thread) -> dirty(other thread)
+        // -> clean(cleaning thread).
+        // The races are we either end up with: Aged card, unaged card. Since we have the
+        // checkpoint roots and then we scan / update mod union tables after. We will always
+        // scan either card. If we end up with the non aged card, we scan it it in the pause.
+        card_table_->ModifyCardsAtomic(space->Begin(), space->End(), AgeCardVisitor(),
+                                       VoidFunctor());
+      }
     }
   }
 }
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index b0b53b0..d9399d0 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -820,8 +820,11 @@
   // Swap the allocation stack with the live stack.
   void SwapStacks(Thread* self);
 
-  // Clear cards and update the mod union table.
-  void ProcessCards(TimingLogger* timings, bool use_rem_sets);
+  // Clear cards and update the mod union table. When process_alloc_space_cards is true,
+  // if clear_alloc_space_cards is true, then we clear cards instead of ageing them. We do
+  // not process the alloc space if process_alloc_space_cards is false.
+  void ProcessCards(TimingLogger* timings, bool use_rem_sets, bool process_alloc_space_cards,
+                    bool clear_alloc_space_cards);
 
   // Push an object onto the allocation stack.
   void PushOnAllocationStack(Thread* self, mirror::Object** obj)