Fix a DCHECK failure due to unmatching numbers of cards scanned.

- See the bug for details of the failure.
- After a discussion, we decided to get rid of the DCHECK as a simple
  solution would not detect corner failure cases and a full solution
  would add undesired complexity, and left a comment that explains
  what situation had caused a DCHECK failure.
- Fix a potential error of failing to scan the last card that the end
  of the image space falls on as a result of the image end being not
  necessarily aligned by the card size.
- Remove dead/unused MarkSweep::ScanRoot().
- Add AlignUp and AlignDown for aligning pointers.

Bug: 11465268
Change-Id: Iee3018a42c48a159feb0e9cf77b1a6b303f5d245
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index a5e66d2..2c69c77 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -776,7 +776,6 @@
     ScanObjectParallelVisitor visitor(this);
     accounting::CardTable* card_table = mark_sweep_->GetHeap()->GetCardTable();
     size_t cards_scanned = card_table->Scan(bitmap_, begin_, end_, visitor, minimum_age_);
-    mark_sweep_->cards_scanned_.fetch_add(cards_scanned);
     VLOG(heap) << "Parallel scanning cards " << reinterpret_cast<void*>(begin_) << " - "
         << reinterpret_cast<void*>(end_) << " = " << cards_scanned;
     // Finish by emptying our local mark stack.
@@ -814,11 +813,14 @@
     DCHECK_NE(mark_stack_tasks, 0U);
     const size_t mark_stack_delta = std::min(CardScanTask::kMaxSize / 2,
                                              mark_stack_size / mark_stack_tasks + 1);
-    size_t ref_card_count = 0;
-    cards_scanned_ = 0;
     for (const auto& space : GetHeap()->GetContinuousSpaces()) {
       byte* card_begin = space->Begin();
       byte* card_end = space->End();
+      // Align up the end address. For example, the image space's end
+      // may not be card-size-aligned.
+      card_end = AlignUp(card_end, accounting::CardTable::kCardSize);
+      DCHECK(IsAligned<accounting::CardTable::kCardSize>(card_begin));
+      DCHECK(IsAligned<accounting::CardTable::kCardSize>(card_end));
       // Calculate how many bytes of heap we will scan,
       const size_t address_range = card_end - card_begin;
       // Calculate how much address range each task gets.
@@ -842,24 +844,15 @@
         thread_pool->AddTask(self, task);
         card_begin += card_increment;
       }
-
-      if (paused && kIsDebugBuild) {
-        // Make sure we don't miss scanning any cards.
-        size_t scanned_cards = card_table->Scan(space->GetMarkBitmap(), space->Begin(),
-                                                space->End(), VoidFunctor(), minimum_age);
-        VLOG(heap) << "Scanning space cards " << reinterpret_cast<void*>(space->Begin()) << " - "
-            << reinterpret_cast<void*>(space->End()) << " = " << scanned_cards;
-        ref_card_count += scanned_cards;
-      }
     }
 
+    // Note: the card scan below may dirty new cards (and scan them)
+    // as a side effect when a Reference object is encountered and
+    // queued during the marking. See b/11465268.
     thread_pool->SetMaxActiveWorkers(thread_count - 1);
     thread_pool->StartWorkers(self);
     thread_pool->Wait(self, true, true);
     thread_pool->StopWorkers(self);
-    if (paused) {
-      DCHECK_EQ(ref_card_count, static_cast<size_t>(cards_scanned_.load()));
-    }
     timings_.EndSplit();
   } else {
     for (const auto& space : GetHeap()->GetContinuousSpaces()) {
@@ -1356,10 +1349,6 @@
   }
 }
 
-void MarkSweep::ScanRoot(const Object* obj) {
-  ScanObject(obj);
-}
-
 class MarkObjectVisitor {
  public:
   explicit MarkObjectVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE : mark_sweep_(mark_sweep) {}
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 19df2da..637428b 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -157,11 +157,6 @@
     return cleared_reference_list_;
   }
 
-  // Proxy for external access to ScanObject.
-  void ScanRoot(const mirror::Object* obj)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Blackens an object.
   void ScanObject(const mirror::Object* obj)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
@@ -438,7 +433,6 @@
   AtomicInteger work_chunks_created_;
   AtomicInteger work_chunks_deleted_;
   AtomicInteger reference_count_;
-  AtomicInteger cards_scanned_;
 
   // Verification.
   size_t live_stack_freeze_size_;
diff --git a/runtime/utils.h b/runtime/utils.h
index 0174b37..51035b6 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -119,6 +119,7 @@
   typedef B value;
 };
 
+// For rounding integers.
 template<typename T>
 static inline T RoundDown(T x, int n) {
   CHECK(IsPowerOfTwo(n));
@@ -130,6 +131,18 @@
   return RoundDown(x + n - 1, n);
 }
 
+// For aligning pointers.
+template<typename T>
+static inline T* AlignDown(T* x, int n) {
+  CHECK(IsPowerOfTwo(n));
+  return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(x) & -static_cast<uintptr_t>(n));
+}
+
+template<typename T>
+static inline T* AlignUp(T* x, int n) {
+  return AlignDown(reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(x) + static_cast<uintptr_t>(n - 1)), n);
+}
+
 // Implementation is from "Hacker's Delight" by Henry S. Warren, Jr.,
 // figure 3-3, page 48, where the function is called clp2.
 static inline uint32_t RoundUpToPowerOfTwo(uint32_t x) {