Record allocation after SetClass to have memory fence for SetClass card mark.

Record allocation is now called after SetClass to ensure that our SetClass card mark is behind a fence.

Also:
Removed remaining LOCKS_EXCLUDED mentionning statistics lock.
Cleaned up some of the code in heap.cc

Change-Id: Id781c6a9780ad72e90330acea647432974b240c6
diff --git a/src/heap.cc b/src/heap.cc
index 2f7e519..26e2d97 100644
--- a/src/heap.cc
+++ b/src/heap.cc
@@ -356,17 +356,24 @@
 }
 
 Object* Heap::AllocObject(Class* c, size_t byte_count) {
-  // Used in the detail message if we throw an OOME.
-  int64_t total_bytes_free;
-  size_t max_contiguous_allocation;
-
   DCHECK(c == NULL || (c->IsClassClass() && byte_count >= sizeof(Class)) ||
          (c->IsVariableSize() || c->GetObjectSize() == byte_count) ||
          strlen(ClassHelper(c).GetDescriptor()) == 0);
   DCHECK_GE(byte_count, sizeof(Object));
-  Object* obj = Allocate(byte_count);
-  if (obj != NULL) {
+  Object* obj = Allocate(alloc_space_, byte_count);
+  if (LIKELY(obj != NULL)) {
+#if VERIFY_OBJECT_ENABLED
+    WriterMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
+    // Verify objects doesn't like objects in allocation stack not being marked as live.
+    live_bitmap_->Set(obj);
+#endif
+
     obj->SetClass(c);
+
+    // Record allocation after since we want to use the atomic add for the atomic fence to guard
+    // the SetClass since we do not want the class to appear NULL in another thread.
+    RecordAllocation(alloc_space_, obj);
+
     if (Dbg::IsAllocTrackingEnabled()) {
       Dbg::RecordAllocation(c, byte_count);
     }
@@ -383,8 +390,8 @@
 
     return obj;
   }
-  total_bytes_free = GetFreeMemory();
-  max_contiguous_allocation = 0;
+  int64_t total_bytes_free = GetFreeMemory();
+  size_t max_contiguous_allocation = 0;
   // TODO: C++0x auto
   for (Spaces::const_iterator cur = spaces_.begin(); cur != spaces_.end(); ++cur) {
     if ((*cur)->IsAllocSpace()) {
@@ -509,11 +516,6 @@
   DCHECK(obj);
 
   allocation_stack_->AtomicPush(obj);
-#if VERIFY_OBJECT_ENABLED
-  WriterMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
-  // Verify objects doesn't like objects in allocation stack not being marked as live.
-  live_bitmap_->Set(obj);
-#endif
 }
 
 void Heap::RecordFree(size_t freed_objects, size_t freed_bytes) {
@@ -539,16 +541,6 @@
   }
 }
 
-Object* Heap::Allocate(size_t size) {
-  Object* obj = Allocate(alloc_space_, size);
-  if (obj != NULL) {
-    RecordAllocation(alloc_space_, obj);
-    return obj;
-  }
-
-  return NULL;
-}
-
 Object* Heap::Allocate(AllocSpace* space, size_t alloc_size) {
   Thread* self = Thread::Current();
   // Since allocation can cause a GC which will need to SuspendAll, make sure all allocations are
diff --git a/src/heap.h b/src/heap.h
index 72a63bd..6f6cd67 100644
--- a/src/heap.h
+++ b/src/heap.h
@@ -89,7 +89,6 @@
 
   // Allocates and initializes storage for an object instance.
   Object* AllocObject(Class* klass, size_t num_bytes)
-      LOCKS_EXCLUDED(statistics_lock_)
       SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Check sanity of given reference. Requires the heap lock.
@@ -125,7 +124,7 @@
   // Implements java.lang.Runtime.totalMemory.
   int64_t GetTotalMemory();
   // Implements java.lang.Runtime.freeMemory.
-  int64_t GetFreeMemory() LOCKS_EXCLUDED(statistics_lock_);
+  int64_t GetFreeMemory();
 
   // Implements VMDebug.countInstancesOfClass.
   int64_t CountInstances(Class* c, bool count_assignable)
@@ -197,7 +196,7 @@
     verify_objects_ = false;
   }
 
-  void RecordFree(size_t freed_objects, size_t freed_bytes) LOCKS_EXCLUDED(statistics_lock_);
+  void RecordFree(size_t freed_objects, size_t freed_bytes);
 
   // Must be called if a field of an Object in the heap changes, and before any GC safe-point.
   // The call is not needed if NULL is stored in the field.
@@ -226,17 +225,17 @@
 
   void AddFinalizerReference(Thread* self, Object* object);
 
-  size_t GetBytesAllocated() const LOCKS_EXCLUDED(statistics_lock_);
-  size_t GetObjectsAllocated() const LOCKS_EXCLUDED(statistics_lock_);
-  size_t GetConcurrentStartSize() const LOCKS_EXCLUDED(statistics_lock_);
-  size_t GetConcurrentMinFree() const LOCKS_EXCLUDED(statistics_lock_);
-  size_t GetUsedMemorySize() const LOCKS_EXCLUDED(statistics_lock_);
+  size_t GetBytesAllocated() const;
+  size_t GetObjectsAllocated() const;
+  size_t GetConcurrentStartSize() const;
+  size_t GetConcurrentMinFree() const;
+  size_t GetUsedMemorySize() const;
 
   // Functions for getting the bitmap which corresponds to an object's address.
   // This is probably slow, TODO: use better data structure like binary tree .
   Space* FindSpaceFromObject(const Object*) const;
 
-  void DumpForSigQuit(std::ostream& os) LOCKS_EXCLUDED(statistics_lock_);
+  void DumpForSigQuit(std::ostream& os);
 
   void Trim(AllocSpace* alloc_space);
 
@@ -268,8 +267,6 @@
 
  private:
   // Allocates uninitialized storage.
-  Object* Allocate(size_t num_bytes)
-      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   Object* Allocate(AllocSpace* space, size_t num_bytes)
       LOCKS_EXCLUDED(GlobalSynchronization::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
@@ -281,7 +278,7 @@
   void RequestConcurrentGC();
 
   void RecordAllocation(AllocSpace* space, const Object* object)
-      LOCKS_EXCLUDED(statistics_lock_, GlobalSynchronization::heap_bitmap_lock_);
+      LOCKS_EXCLUDED(GlobalSynchronization::heap_bitmap_lock_);
 
   void CollectGarbageInternal(GcType gc_plan, bool clear_soft_references)
       LOCKS_EXCLUDED(gc_complete_lock_,