Fix GC to use art::Atomic rather than compiler intrinsics.

Changes to SpaceBitmap::AtomicTestAndSet and Space::end_. Space::end_ is made
atomic rather than volatile to fully capture all its uses multi-threaded or not
uses.

Change-Id: I3058964b8ad90a8c253b3d7f75585f63ca2fb5e3
diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h
index 7f1da79..1e9556a 100644
--- a/runtime/gc/accounting/space_bitmap-inl.h
+++ b/runtime/gc/accounting/space_bitmap-inl.h
@@ -21,6 +21,7 @@
 
 #include <memory>
 
+#include "atomic.h"
 #include "base/logging.h"
 #include "dex_file-inl.h"
 #include "heap_bitmap.h"
@@ -43,17 +44,17 @@
   const uintptr_t offset = addr - heap_begin_;
   const size_t index = OffsetToIndex(offset);
   const uword mask = OffsetToMask(offset);
-  uword* const address = &bitmap_begin_[index];
+  Atomic<uword>* atomic_entry = reinterpret_cast<Atomic<uword>*>(&bitmap_begin_[index]);
   DCHECK_LT(index, bitmap_size_ / kWordSize) << " bitmap_size_ = " << bitmap_size_;
   uword old_word;
   do {
-    old_word = *address;
+    old_word = atomic_entry->LoadRelaxed();
     // Fast path: The bit is already set.
     if ((old_word & mask) != 0) {
       DCHECK(Test(obj));
       return true;
     }
-  } while (!__sync_bool_compare_and_swap(address, old_word, old_word | mask));
+  } while (!atomic_entry->CompareExchangeWeakSequentiallyConsistent(old_word, old_word | mask));
   DCHECK(Test(obj));
   return false;
 }
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index 27fb087..6d1ba87 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -60,17 +60,17 @@
   // <offset> is the difference from .base to a pointer address.
   // <index> is the index of .bits that contains the bit representing
   //         <offset>.
-  static size_t OffsetToIndex(size_t offset) ALWAYS_INLINE {
+  static constexpr size_t OffsetToIndex(size_t offset) {
     return offset / kAlignment / kBitsPerWord;
   }
 
   template<typename T>
-  static T IndexToOffset(T index) {
+  static constexpr T IndexToOffset(T index) {
     return static_cast<T>(index * kAlignment * kBitsPerWord);
   }
 
   // Bits are packed in the obvious way.
-  static uword OffsetToMask(uintptr_t offset) ALWAYS_INLINE {
+  static constexpr uword OffsetToMask(uintptr_t offset) {
     return (static_cast<size_t>(1)) << ((offset / kAlignment) % kBitsPerWord);
   }
 
diff --git a/runtime/gc/space/bump_pointer_space-inl.h b/runtime/gc/space/bump_pointer_space-inl.h
index 71c295e..ee3c979 100644
--- a/runtime/gc/space/bump_pointer_space-inl.h
+++ b/runtime/gc/space/bump_pointer_space-inl.h
@@ -41,11 +41,12 @@
                                                            size_t* usable_size) {
   Locks::mutator_lock_->AssertExclusiveHeld(self);
   num_bytes = RoundUp(num_bytes, kAlignment);
-  if (end_ + num_bytes > growth_end_) {
+  byte* end = end_.LoadRelaxed();
+  if (end + num_bytes > growth_end_) {
     return nullptr;
   }
-  mirror::Object* obj = reinterpret_cast<mirror::Object*>(end_);
-  end_ += num_bytes;
+  mirror::Object* obj = reinterpret_cast<mirror::Object*>(end);
+  end_.StoreRelaxed(end + num_bytes);
   *bytes_allocated = num_bytes;
   // Use the CAS free versions as an optimization.
   objects_allocated_.StoreRelaxed(objects_allocated_.LoadRelaxed() + 1);
@@ -61,15 +62,13 @@
   byte* old_end;
   byte* new_end;
   do {
-    old_end = end_;
+    old_end = end_.LoadRelaxed();
     new_end = old_end + num_bytes;
     // If there is no more room in the region, we are out of memory.
     if (UNLIKELY(new_end > growth_end_)) {
       return nullptr;
     }
-  } while (!__sync_bool_compare_and_swap(reinterpret_cast<volatile intptr_t*>(&end_),
-                                         reinterpret_cast<intptr_t>(old_end),
-                                         reinterpret_cast<intptr_t>(new_end)));
+  } while (!end_.CompareExchangeWeakSequentiallyConsistent(old_end, new_end));
   return reinterpret_cast<mirror::Object*>(old_end);
 }
 
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index 41a0458..5123e47 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -293,7 +293,7 @@
   madvise(GetMemMap()->Begin(), GetMemMap()->Size(), MADV_DONTNEED);
   live_bitmap_->Clear();
   mark_bitmap_->Clear();
-  end_ = Begin() + starting_size_;
+  SetEnd(Begin() + starting_size_);
   mspace_ = CreateMspace(mem_map_->Begin(), starting_size_, initial_size_);
   SetFootprintLimit(footprint_limit);
 }
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
index 4d74f3c..27f92b5 100644
--- a/runtime/gc/space/malloc_space.cc
+++ b/runtime/gc/space/malloc_space.cc
@@ -123,13 +123,13 @@
   growth_limit = RoundUp(growth_limit, kPageSize);
   growth_limit_ = growth_limit;
   if (Size() > growth_limit_) {
-    end_ = begin_ + growth_limit;
+    SetEnd(begin_ + growth_limit);
   }
 }
 
 void* MallocSpace::MoreCore(intptr_t increment) {
   CheckMoreCoreForPrecondition();
-  byte* original_end = end_;
+  byte* original_end = End();
   if (increment != 0) {
     VLOG(heap) << "MallocSpace::MoreCore " << PrettySize(increment);
     byte* new_end = original_end + increment;
@@ -151,8 +151,8 @@
       CHECK_MEMORY_CALL(madvise, (new_end, size, MADV_DONTNEED), GetName());
       CHECK_MEMORY_CALL(mprotect, (new_end, size, PROT_NONE), GetName());
     }
-    // Update end_
-    end_ = new_end;
+    // Update end_.
+    SetEnd(new_end);
   }
   return original_end;
 }
@@ -163,11 +163,11 @@
   // alloc space so that we won't mix thread local runs from different
   // alloc spaces.
   RevokeAllThreadLocalBuffers();
-  end_ = reinterpret_cast<byte*>(RoundUp(reinterpret_cast<uintptr_t>(end_), kPageSize));
+  SetEnd(reinterpret_cast<byte*>(RoundUp(reinterpret_cast<uintptr_t>(End()), kPageSize)));
   DCHECK(IsAligned<accounting::CardTable::kCardSize>(begin_));
-  DCHECK(IsAligned<accounting::CardTable::kCardSize>(end_));
+  DCHECK(IsAligned<accounting::CardTable::kCardSize>(End()));
   DCHECK(IsAligned<kPageSize>(begin_));
-  DCHECK(IsAligned<kPageSize>(end_));
+  DCHECK(IsAligned<kPageSize>(End()));
   size_t size = RoundUp(Size(), kPageSize);
   // Trimming the heap should be done by the caller since we may have invalidated the accounting
   // stored in between objects.
@@ -175,7 +175,7 @@
   const size_t growth_limit = growth_limit_ - size;
   const size_t capacity = Capacity() - size;
   VLOG(heap) << "Begin " << reinterpret_cast<const void*>(begin_) << "\n"
-             << "End " << reinterpret_cast<const void*>(end_) << "\n"
+             << "End " << reinterpret_cast<const void*>(End()) << "\n"
              << "Size " << size << "\n"
              << "GrowthLimit " << growth_limit_ << "\n"
              << "Capacity " << Capacity();
@@ -188,16 +188,17 @@
   VLOG(heap) << "Capacity " << PrettySize(capacity);
   // Remap the tail.
   std::string error_msg;
-  std::unique_ptr<MemMap> mem_map(GetMemMap()->RemapAtEnd(end_, alloc_space_name,
-                                                    PROT_READ | PROT_WRITE, &error_msg));
+  std::unique_ptr<MemMap> mem_map(GetMemMap()->RemapAtEnd(End(), alloc_space_name,
+                                                          PROT_READ | PROT_WRITE, &error_msg));
   CHECK(mem_map.get() != nullptr) << error_msg;
-  void* allocator = CreateAllocator(end_, starting_size_, initial_size_, capacity, low_memory_mode);
+  void* allocator = CreateAllocator(End(), starting_size_, initial_size_, capacity,
+                                    low_memory_mode);
   // Protect memory beyond the initial size.
   byte* end = mem_map->Begin() + starting_size_;
   if (capacity > initial_size_) {
     CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size_, PROT_NONE), alloc_space_name);
   }
-  *out_malloc_space = CreateInstance(alloc_space_name, mem_map.release(), allocator, end_, end,
+  *out_malloc_space = CreateInstance(alloc_space_name, mem_map.release(), allocator, End(), end,
                                      limit_, growth_limit, CanMoveObjects());
   SetLimit(End());
   live_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index a1511e7..5738d47 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -349,7 +349,7 @@
   madvise(GetMemMap()->Begin(), GetMemMap()->Size(), MADV_DONTNEED);
   live_bitmap_->Clear();
   mark_bitmap_->Clear();
-  end_ = begin_ + starting_size_;
+  SetEnd(begin_ + starting_size_);
   delete rosalloc_;
   rosalloc_ = CreateRosAlloc(mem_map_->Begin(), starting_size_, initial_size_, Capacity(),
                              low_memory_mode_);
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index 8444a70..fff4df1 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -20,6 +20,7 @@
 #include <memory>
 #include <string>
 
+#include "atomic.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "gc/accounting/space_bitmap.h"
@@ -249,7 +250,7 @@
 
   // Current address at which the space ends, which may vary as the space is filled.
   byte* End() const {
-    return end_;
+    return end_.LoadRelaxed();
   }
 
   // The end of the address range covered by the space.
@@ -260,7 +261,7 @@
   // Change the end of the space. Be careful with use since changing the end of a space to an
   // invalid value may break the GC.
   void SetEnd(byte* end) {
-    end_ = end;
+    end_.StoreRelaxed(end);
   }
 
   void SetLimit(byte* limit) {
@@ -307,7 +308,7 @@
   byte* begin_;
 
   // Current end of the space.
-  byte* volatile end_;
+  Atomic<byte*> end_;
 
   // Limit of the space.
   byte* limit_;