Make allocations report usable size.

Work-in-progress to allow arrays to fill usable size. Bug: 13028925.
Use C++11's override keyword on GCC >= 2.7 to ensure that we override GC and
allocator methods.
Move initial mirror::Class set up into a Functor so that all allocated objects
have non-zero sizes. Use this property to assert that all objects are never
larger than their usable size.
Other bits of GC related clean-up, missing initialization, missing use of
const, hot methods in .cc files, "unimplemented" functions that fail at
runtime in header files, reducing header file includes, move valgrind's space
into its own files, reduce number of array allocation routines.

Change-Id: Id5760041a2d7f94dcaf17ec760f6095ec75dadaa
diff --git a/runtime/base/macros.h b/runtime/base/macros.h
index cf7029a..6cc9396 100644
--- a/runtime/base/macros.h
+++ b/runtime/base/macros.h
@@ -21,6 +21,15 @@
 
 #define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
 
+// C++11 final and override keywords that were introduced in GCC version 4.7.
+#if GCC_VERSION >= 40700
+#define OVERRIDE override
+#define FINAL final
+#else
+#define OVERRIDE
+#define FINAL
+#endif
+
 // The COMPILE_ASSERT macro can be used to verify that a compile time
 // expression is true. For example, you could use it to verify the
 // size of a static array:
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index ff6f9de..87323f9 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1185,27 +1185,47 @@
   return dex_cache.get();
 }
 
+// Used to initialize a class in the allocation code path to ensure it is guarded by a StoreStore
+// fence.
+class InitializeClassVisitor {
+ public:
+  explicit InitializeClassVisitor(uint32_t class_size) : class_size_(class_size) {
+  }
+
+  void operator()(mirror::Object* obj, size_t usable_size) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK_LE(class_size_, usable_size);
+    // Avoid AsClass as object is not yet in live bitmap or allocation stack.
+    mirror::Class* klass = down_cast<mirror::Class*>(obj);
+    // DCHECK(klass->IsClass());
+    klass->SetClassSize(class_size_);
+    klass->SetPrimitiveType(Primitive::kPrimNot);  // Default to not being primitive.
+    klass->SetDexClassDefIndex(DexFile::kDexNoIndex16);  // Default to no valid class def index.
+    klass->SetDexTypeIndex(DexFile::kDexNoIndex16);  // Default to no valid type index.
+  }
+
+ private:
+  const uint32_t class_size_;
+
+  DISALLOW_COPY_AND_ASSIGN(InitializeClassVisitor);
+};
+
 mirror::Class* ClassLinker::AllocClass(Thread* self, mirror::Class* java_lang_Class,
-                                       size_t class_size) {
+                                       uint32_t class_size) {
   DCHECK_GE(class_size, sizeof(mirror::Class));
   gc::Heap* heap = Runtime::Current()->GetHeap();
+  InitializeClassVisitor visitor(class_size);
   mirror::Object* k =
-      kMovingClasses ?
-          heap->AllocObject<true>(self, java_lang_Class, class_size) :
-          heap->AllocNonMovableObject<true>(self, java_lang_Class, class_size);
-  if (UNLIKELY(k == NULL)) {
+      kMovingClasses ? heap->AllocObject<true>(self, java_lang_Class, class_size, visitor)
+                     : heap->AllocNonMovableObject<true>(self, java_lang_Class, class_size, visitor);
+  if (UNLIKELY(k == nullptr)) {
     CHECK(self->IsExceptionPending());  // OOME.
-    return NULL;
+    return nullptr;
   }
-  mirror::Class* klass = k->AsClass();
-  klass->SetPrimitiveType(Primitive::kPrimNot);  // Default to not being primitive.
-  klass->SetClassSize(class_size);
-  klass->SetDexClassDefIndex(DexFile::kDexNoIndex16);  // Default to no valid class def index.
-  klass->SetDexTypeIndex(DexFile::kDexNoIndex16);  // Default to no valid type index.
-  return klass;
+  return k->AsClass();
 }
 
-mirror::Class* ClassLinker::AllocClass(Thread* self, size_t class_size) {
+mirror::Class* ClassLinker::AllocClass(Thread* self, uint32_t class_size) {
   return AllocClass(self, GetClassRoot(kJavaLangClass), class_size);
 }
 
@@ -1419,7 +1439,7 @@
 }
 
 // Precomputes size that will be needed for Class, matching LinkStaticFields
-size_t ClassLinker::SizeOfClass(const DexFile& dex_file,
+uint32_t ClassLinker::SizeOfClass(const DexFile& dex_file,
                                 const DexFile::ClassDef& dex_class_def) {
   const byte* class_data = dex_file.GetClassData(dex_class_def);
   size_t num_ref = 0;
@@ -1440,7 +1460,7 @@
     }
   }
   // start with generic class data
-  size_t size = sizeof(mirror::Class);
+  uint32_t size = sizeof(mirror::Class);
   // follow with reference fields which must be contiguous at start
   size += (num_ref * sizeof(uint32_t));
   // if there are 64-bit fields to add, make sure they are aligned
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index f346102..88dbb9c 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -388,13 +388,13 @@
   void FinishInit(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // For early bootstrapping by Init
-  mirror::Class* AllocClass(Thread* self, mirror::Class* java_lang_Class, size_t class_size)
+  mirror::Class* AllocClass(Thread* self, mirror::Class* java_lang_Class, uint32_t class_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Alloc* convenience functions to avoid needing to pass in mirror::Class*
   // values that are known to the ClassLinker such as
   // kObjectArrayClass and kJavaLangString etc.
-  mirror::Class* AllocClass(Thread* self, size_t class_size)
+  mirror::Class* AllocClass(Thread* self, uint32_t class_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   mirror::DexCache* AllocDexCache(Thread* self, const DexFile& dex_file)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -419,7 +419,7 @@
                          mirror::Class* c, SafeMap<uint32_t, mirror::ArtField*>& field_map)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  size_t SizeOfClass(const DexFile& dex_file,
+  uint32_t SizeOfClass(const DexFile& dex_file,
                      const DexFile::ClassDef& dex_class_def);
 
   void LoadClass(const DexFile& dex_file,
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 9f09709..3b4e9c7 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -1202,7 +1202,9 @@
   if (c == NULL) {
     return status;
   }
-  new_array = gRegistry->Add(mirror::Array::Alloc<true>(Thread::Current(), c, length));
+  new_array = gRegistry->Add(mirror::Array::Alloc<true>(Thread::Current(), c, length,
+                                                        c->GetComponentSize(),
+                                                        Runtime::Current()->GetHeap()->GetCurrentAllocator()));
   return JDWP::ERR_NONE;
 }
 
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 4078cac..829ec4a 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -87,7 +87,8 @@
   gc::Heap* heap = Runtime::Current()->GetHeap();
   // Use the current allocator type in case CheckFilledNewArrayAlloc caused us to suspend and then
   // the heap switched the allocator type while we were suspended.
-  return mirror::Array::Alloc<false>(self, klass, component_count, heap->GetCurrentAllocator());
+  return mirror::Array::Alloc<false>(self, klass, component_count, klass->GetComponentSize(),
+                                     heap->GetCurrentAllocator());
 }
 
 // Helper function to allocate array for FILLED_NEW_ARRAY.
@@ -103,7 +104,8 @@
   gc::Heap* heap = Runtime::Current()->GetHeap();
   // Use the current allocator type in case CheckFilledNewArrayAlloc caused us to suspend and then
   // the heap switched the allocator type while we were suspended.
-  return mirror::Array::Alloc<true>(self, klass, component_count, heap->GetCurrentAllocator());
+  return mirror::Array::Alloc<true>(self, klass, component_count, klass->GetComponentSize(),
+                                    heap->GetCurrentAllocator());
 }
 
 void ThrowStackOverflowError(Thread* self) {
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 2c08351..2ced942 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -228,9 +228,11 @@
     }
     gc::Heap* heap = Runtime::Current()->GetHeap();
     return mirror::Array::Alloc<kInstrumented>(self, klass, component_count,
+                                               klass->GetComponentSize(),
                                                heap->GetCurrentAllocator());
   }
-  return mirror::Array::Alloc<kInstrumented>(self, klass, component_count, allocator_type);
+  return mirror::Array::Alloc<kInstrumented>(self, klass, component_count,
+                                             klass->GetComponentSize(), allocator_type);
 }
 
 template <bool kAccessCheck, bool kInstrumented>
@@ -252,9 +254,10 @@
       return nullptr;  // Failure
     }
   }
-  // No need to retry a slow-path allocation as the above code won't
-  // cause a GC or thread suspension.
-  return mirror::Array::Alloc<kInstrumented>(self, klass, component_count, allocator_type);
+  // No need to retry a slow-path allocation as the above code won't cause a GC or thread
+  // suspension.
+  return mirror::Array::Alloc<kInstrumented>(self, klass, component_count,
+                                             klass->GetComponentSize(), allocator_type);
 }
 
 extern mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod* method,
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index c4238c7..5b4ca80 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -54,10 +54,10 @@
 namespace gc {
 namespace allocator {
 
-// A Runs-of-slots memory allocator.
+// A runs-of-slots memory allocator.
 class RosAlloc {
  private:
-  // Rerepresents a run of free pages.
+  // Represents a run of free pages.
   class FreePageRun {
    public:
     byte magic_num_;  // The magic number used for debugging only.
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 29fafd6..c55b2b2 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -64,16 +64,18 @@
 
   ~MarkSweep() {}
 
-  virtual void InitializePhase();
-  virtual bool IsConcurrent() const;
-  virtual bool HandleDirtyObjectsPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void MarkingPhase() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void ReclaimPhase() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void FinishPhase() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void InitializePhase() OVERRIDE;
+  virtual void MarkingPhase() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual bool HandleDirtyObjectsPhase() OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void ReclaimPhase() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void FinishPhase() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   virtual void MarkReachableObjects()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-  virtual GcType GetGcType() const {
+
+  virtual bool IsConcurrent() const OVERRIDE;
+
+  virtual GcType GetGcType() const OVERRIDE {
     return kGcTypeFull;
   }
 
@@ -131,7 +133,7 @@
   void ProcessReferences(Thread* self)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Update and mark references from immune spaces.
+  // Update and mark references from immune spaces. Virtual as overridden by StickyMarkSweep.
   virtual void UpdateAndMarkModUnion()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -140,7 +142,8 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Sweeps unmarked objects to complete the garbage collection.
+  // Sweeps unmarked objects to complete the garbage collection. Virtual as by default it sweeps
+  // all allocation spaces. Partial and sticky GCs want to just sweep a subset of the heap.
   virtual void Sweep(bool swap_bitmaps) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Sweeps unmarked objects to complete the garbage collection.
@@ -232,7 +235,7 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Mark the vm thread roots.
-  virtual void MarkThreadRoots(Thread* self)
+  void MarkThreadRoots(Thread* self)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/gc/collector/partial_mark_sweep.h b/runtime/gc/collector/partial_mark_sweep.h
index 3b788f4..44ae9e9 100644
--- a/runtime/gc/collector/partial_mark_sweep.h
+++ b/runtime/gc/collector/partial_mark_sweep.h
@@ -26,7 +26,8 @@
 
 class PartialMarkSweep : public MarkSweep {
  public:
-  virtual GcType GetGcType() const {
+  // Virtual as overridden by StickyMarkSweep.
+  virtual GcType GetGcType() const OVERRIDE {
     return kGcTypePartial;
   }
 
@@ -35,8 +36,9 @@
 
  protected:
   // Bind the live bits to the mark bits of bitmaps for spaces that aren't collected for partial
-  // collections, ie the Zygote space. Also mark this space is immune.
-  virtual void BindBitmaps() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // collections, ie the Zygote space. Also mark this space is immune. Virtual as overridden by
+  // StickyMarkSweep.
+  virtual void BindBitmaps() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
   DISALLOW_COPY_AND_ASSIGN(PartialMarkSweep);
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index d639db5..a4c9dea 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -133,13 +133,15 @@
       immune_end_(nullptr),
       is_large_object_space_immune_(false),
       to_space_(nullptr),
+      to_space_live_bitmap_(nullptr),
       from_space_(nullptr),
       self_(nullptr),
       generational_(generational),
       last_gc_to_space_end_(nullptr),
       bytes_promoted_(0),
       whole_heap_collection_(true),
-      whole_heap_collection_interval_counter_(0) {
+      whole_heap_collection_interval_counter_(0),
+      saved_bytes_(0) {
 }
 
 void SemiSpace::InitializePhase() {
@@ -263,7 +265,7 @@
     semi_space_->ScanObject(obj);
   }
  private:
-  SemiSpace* semi_space_;
+  SemiSpace* const semi_space_;
 };
 
 void SemiSpace::MarkReachableObjects() {
@@ -467,10 +469,10 @@
     // of an old generation.)
     size_t bytes_promoted;
     space::MallocSpace* promo_dest_space = GetHeap()->GetPrimaryFreeListSpace();
-    forward_address = promo_dest_space->Alloc(self_, object_size, &bytes_promoted);
+    forward_address = promo_dest_space->Alloc(self_, object_size, &bytes_promoted, nullptr);
     if (forward_address == nullptr) {
       // If out of space, fall back to the to-space.
-      forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated);
+      forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated, nullptr);
     } else {
       GetHeap()->num_bytes_allocated_.FetchAndAdd(bytes_promoted);
       bytes_promoted_ += bytes_promoted;
@@ -511,7 +513,7 @@
     DCHECK(forward_address != nullptr);
   } else {
     // If it's allocated after the last GC (younger), copy it to the to-space.
-    forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated);
+    forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated, nullptr);
   }
   // Copy over the object and add it to the mark stack since we still need to update its
   // references.
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index ba97376..c164c5f 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -275,7 +275,7 @@
   // When true, the generational mode (promotion and the bump pointer
   // space only collection) is enabled. TODO: move these to a new file
   // as a new garbage collector?
-  bool generational_;
+  const bool generational_;
 
   // Used for the generational mode. the end/top of the bump
   // pointer space at the end of the last collection.
diff --git a/runtime/gc/collector/sticky_mark_sweep.cc b/runtime/gc/collector/sticky_mark_sweep.cc
index 9e3adb4..ce51ac5 100644
--- a/runtime/gc/collector/sticky_mark_sweep.cc
+++ b/runtime/gc/collector/sticky_mark_sweep.cc
@@ -59,11 +59,6 @@
   SweepArray(GetHeap()->GetLiveStack(), false);
 }
 
-void StickyMarkSweep::MarkThreadRoots(Thread* self) {
-  MarkRootsCheckpoint(self);
-}
-
-
 }  // namespace collector
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/collector/sticky_mark_sweep.h b/runtime/gc/collector/sticky_mark_sweep.h
index b675877..98f2b592 100644
--- a/runtime/gc/collector/sticky_mark_sweep.h
+++ b/runtime/gc/collector/sticky_mark_sweep.h
@@ -25,9 +25,9 @@
 namespace gc {
 namespace collector {
 
-class StickyMarkSweep : public PartialMarkSweep {
+class StickyMarkSweep FINAL : public PartialMarkSweep {
  public:
-  GcType GetGcType() const {
+  GcType GetGcType() const OVERRIDE {
     return kGcTypeSticky;
   }
 
@@ -37,21 +37,17 @@
  protected:
   // Bind the live bits to the mark bits of bitmaps for all spaces, all spaces other than the
   // alloc space will be marked as immune.
-  void BindBitmaps() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void BindBitmaps() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void MarkReachableObjects()
+  void MarkReachableObjects() OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  virtual void MarkThreadRoots(Thread* self)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
-  void Sweep(bool swap_bitmaps) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  void Sweep(bool swap_bitmaps) OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Don't need to do anything special here since we scan all the cards which may have references
   // to the newly allocated objects.
-  virtual void UpdateAndMarkModUnion() { }
+  void UpdateAndMarkModUnion() OVERRIDE { }
 
  private:
   DISALLOW_COPY_AND_ASSIGN(StickyMarkSweep);
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 2e47a02..ce9dcea 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -50,11 +50,13 @@
   }
   mirror::Object* obj;
   AllocationTimer alloc_timer(this, &obj);
-  size_t bytes_allocated;
-  obj = TryToAllocate<kInstrumented, false>(self, allocator, byte_count, &bytes_allocated);
+  size_t bytes_allocated, usable_size;
+  obj = TryToAllocate<kInstrumented, false>(self, allocator, byte_count, &bytes_allocated,
+                                            &usable_size);
   if (UNLIKELY(obj == nullptr)) {
     bool is_current_allocator = allocator == GetCurrentAllocator();
-    obj = AllocateInternalWithGc(self, allocator, byte_count, &bytes_allocated, &klass);
+    obj = AllocateInternalWithGc(self, allocator, byte_count, &bytes_allocated, &usable_size,
+                                 &klass);
     if (obj == nullptr) {
       bool after_is_current_allocator = allocator == GetCurrentAllocator();
       if (is_current_allocator && !after_is_current_allocator) {
@@ -64,13 +66,17 @@
       return nullptr;
     }
   }
+  DCHECK_GT(bytes_allocated, 0u);
+  DCHECK_GT(usable_size, 0u);
   obj->SetClass(klass);
   if (kUseBrooksPointer) {
     obj->SetBrooksPointer(obj);
     obj->AssertSelfBrooksPointer();
   }
-  pre_fence_visitor(obj);
-  DCHECK_GT(bytes_allocated, 0u);
+  pre_fence_visitor(obj, usable_size);
+  if (kIsDebugBuild && klass != nullptr && Runtime::Current()->IsStarted()) {
+    CHECK_LE(obj->SizeOf(), usable_size);
+  }
   const size_t new_num_bytes_allocated =
       static_cast<size_t>(num_bytes_allocated_.FetchAndAdd(bytes_allocated)) + bytes_allocated;
   // TODO: Deprecate.
@@ -148,7 +154,8 @@
 
 template <const bool kInstrumented, const bool kGrow>
 inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator_type,
-                                           size_t alloc_size, size_t* bytes_allocated) {
+                                           size_t alloc_size, size_t* bytes_allocated,
+                                           size_t* usable_size) {
   if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) {
     return nullptr;
   }
@@ -160,35 +167,36 @@
       ret = bump_pointer_space_->AllocNonvirtual(alloc_size);
       if (LIKELY(ret != nullptr)) {
         *bytes_allocated = alloc_size;
+        *usable_size = alloc_size;
       }
       break;
     }
     case kAllocatorTypeRosAlloc: {
       if (kInstrumented && UNLIKELY(running_on_valgrind_)) {
         // If running on valgrind, we should be using the instrumented path.
-        ret = rosalloc_space_->Alloc(self, alloc_size, bytes_allocated);
+        ret = rosalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size);
       } else {
         DCHECK(!running_on_valgrind_);
-        ret = rosalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated);
+        ret = rosalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size);
       }
       break;
     }
     case kAllocatorTypeDlMalloc: {
       if (kInstrumented && UNLIKELY(running_on_valgrind_)) {
         // If running on valgrind, we should be using the instrumented path.
-        ret = dlmalloc_space_->Alloc(self, alloc_size, bytes_allocated);
+        ret = dlmalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size);
       } else {
         DCHECK(!running_on_valgrind_);
-        ret = dlmalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated);
+        ret = dlmalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size);
       }
       break;
     }
     case kAllocatorTypeNonMoving: {
-      ret = non_moving_space_->Alloc(self, alloc_size, bytes_allocated);
+      ret = non_moving_space_->Alloc(self, alloc_size, bytes_allocated, usable_size);
       break;
     }
     case kAllocatorTypeLOS: {
-      ret = large_object_space_->Alloc(self, alloc_size, bytes_allocated);
+      ret = large_object_space_->Alloc(self, alloc_size, bytes_allocated, usable_size);
       // Note that the bump pointer spaces aren't necessarily next to
       // the other continuous spaces like the non-moving alloc space or
       // the zygote space.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 4435d98..8d8cdd6 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -952,6 +952,7 @@
 
 mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocator,
                                              size_t alloc_size, size_t* bytes_allocated,
+                                             size_t* usable_size,
                                              mirror::Class** klass) {
   mirror::Object* ptr = nullptr;
   bool was_default_allocator = allocator == GetCurrentAllocator();
@@ -968,7 +969,7 @@
       return nullptr;
     }
     // A GC was in progress and we blocked, retry allocation now that memory has been freed.
-    ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated);
+    ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated, usable_size);
   }
 
   // Loop through our different Gc types and try to Gc until we get enough free memory.
@@ -985,13 +986,13 @@
     }
     if (gc_ran) {
       // Did we free sufficient memory for the allocation to succeed?
-      ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated);
+      ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated, usable_size);
     }
   }
   // Allocations have failed after GCs;  this is an exceptional state.
   if (ptr == nullptr) {
     // Try harder, growing the heap if necessary.
-    ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated);
+    ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, usable_size);
   }
   if (ptr == nullptr) {
     // Most allocations should have succeeded by now, so the heap is really full, really fragmented,
@@ -1008,7 +1009,7 @@
       *klass = sirt_klass.get();
       return nullptr;
     }
-    ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated);
+    ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, usable_size);
     if (ptr == nullptr) {
       ThrowOutOfMemoryError(self, alloc_size, false);
     }
@@ -1318,9 +1319,10 @@
 }
 
 // Special compacting collector which uses sub-optimal bin packing to reduce zygote space size.
-class ZygoteCompactingCollector : public collector::SemiSpace {
+class ZygoteCompactingCollector FINAL : public collector::SemiSpace {
  public:
-  explicit ZygoteCompactingCollector(gc::Heap* heap) : SemiSpace(heap, "zygote collector") {
+  explicit ZygoteCompactingCollector(gc::Heap* heap) : SemiSpace(heap, "zygote collector"),
+      bin_live_bitmap_(nullptr), bin_mark_bitmap_(nullptr) {
   }
 
   void BuildBins(space::ContinuousSpace* space) {
@@ -1382,7 +1384,7 @@
       // No available space in the bins, place it in the target space instead (grows the zygote
       // space).
       size_t bytes_allocated;
-      forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated);
+      forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated, nullptr);
       if (to_space_live_bitmap_ != nullptr) {
         to_space_live_bitmap_->Set(forward_address);
       } else {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 5d44ee1..5d3232f 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -151,18 +151,24 @@
   ~Heap();
 
   // Allocates and initializes storage for an object instance.
-  template <bool kInstrumented>
-  mirror::Object* AllocObject(Thread* self, mirror::Class* klass, size_t num_bytes)
+  template <bool kInstrumented, typename PreFenceVisitor = VoidFunctor>
+  mirror::Object* AllocObject(Thread* self, mirror::Class* klass, size_t num_bytes,
+                              const PreFenceVisitor& pre_fence_visitor = VoidFunctor())
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return AllocObjectWithAllocator<kInstrumented, true>(self, klass, num_bytes,
-                                                         GetCurrentAllocator());
+                                                         GetCurrentAllocator(),
+                                                         pre_fence_visitor);
   }
-  template <bool kInstrumented>
-  mirror::Object* AllocNonMovableObject(Thread* self, mirror::Class* klass, size_t num_bytes)
+
+  template <bool kInstrumented, typename PreFenceVisitor = VoidFunctor>
+  mirror::Object* AllocNonMovableObject(Thread* self, mirror::Class* klass, size_t num_bytes,
+                                        const PreFenceVisitor& pre_fence_visitor = VoidFunctor())
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return AllocObjectWithAllocator<kInstrumented, true>(self, klass, num_bytes,
-                                                         GetCurrentNonMovingAllocator());
+                                                         GetCurrentNonMovingAllocator(),
+                                                         pre_fence_visitor);
   }
+
   template <bool kInstrumented, bool kCheckLargeObject, typename PreFenceVisitor = VoidFunctor>
   ALWAYS_INLINE mirror::Object* AllocObjectWithAllocator(
       Thread* self, mirror::Class* klass, size_t byte_count, AllocatorType allocator,
@@ -570,7 +576,8 @@
   // Handles Allocate()'s slow allocation path with GC involved after
   // an initial allocation attempt failed.
   mirror::Object* AllocateInternalWithGc(Thread* self, AllocatorType allocator, size_t num_bytes,
-                                         size_t* bytes_allocated, mirror::Class** klass)
+                                         size_t* bytes_allocated, size_t* usable_size,
+                                         mirror::Class** klass)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -583,7 +590,8 @@
   // that the switch statement is constant optimized in the entrypoints.
   template <const bool kInstrumented, const bool kGrow>
   ALWAYS_INLINE mirror::Object* TryToAllocate(Thread* self, AllocatorType allocator_type,
-                                              size_t alloc_size, size_t* bytes_allocated)
+                                              size_t alloc_size, size_t* bytes_allocated,
+                                              size_t* usable_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void ThrowOutOfMemoryError(Thread* self, size_t byte_count, bool large_object_allocation)
diff --git a/runtime/gc/space/bump_pointer_space-inl.h b/runtime/gc/space/bump_pointer_space-inl.h
index 74a0274..70ab64b 100644
--- a/runtime/gc/space/bump_pointer_space-inl.h
+++ b/runtime/gc/space/bump_pointer_space-inl.h
@@ -23,6 +23,19 @@
 namespace gc {
 namespace space {
 
+inline mirror::Object* BumpPointerSpace::Alloc(Thread*, size_t num_bytes, size_t* bytes_allocated,
+                                               size_t* usable_size) {
+  num_bytes = RoundUp(num_bytes, kAlignment);
+  mirror::Object* ret = AllocNonvirtual(num_bytes);
+  if (LIKELY(ret != nullptr)) {
+    *bytes_allocated = num_bytes;
+    if (usable_size != nullptr) {
+      *usable_size = num_bytes;
+    }
+  }
+  return ret;
+}
+
 inline mirror::Object* BumpPointerSpace::AllocNonvirtualWithoutAccounting(size_t num_bytes) {
   DCHECK(IsAligned<kAlignment>(num_bytes));
   byte* old_end;
@@ -49,6 +62,15 @@
   return ret;
 }
 
+inline size_t BumpPointerSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  size_t num_bytes = obj->SizeOf();
+  if (usable_size != nullptr) {
+    *usable_size = RoundUp(num_bytes, kAlignment);
+  }
+  return num_bytes;
+}
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc
index f3f594f..43674ea 100644
--- a/runtime/gc/space/bump_pointer_space.cc
+++ b/runtime/gc/space/bump_pointer_space.cc
@@ -58,19 +58,6 @@
       num_blocks_(0) {
 }
 
-mirror::Object* BumpPointerSpace::Alloc(Thread*, size_t num_bytes, size_t* bytes_allocated) {
-  num_bytes = RoundUp(num_bytes, kAlignment);
-  mirror::Object* ret = AllocNonvirtual(num_bytes);
-  if (LIKELY(ret != nullptr)) {
-    *bytes_allocated = num_bytes;
-  }
-  return ret;
-}
-
-size_t BumpPointerSpace::AllocationSize(mirror::Object* obj) {
-  return AllocationSizeNonvirtual(obj);
-}
-
 void BumpPointerSpace::Clear() {
   // Release the pages back to the operating system.
   CHECK_NE(madvise(Begin(), Limit() - Begin(), MADV_DONTNEED), -1) << "madvise failed";
@@ -185,8 +172,9 @@
   }
 }
 
-bool BumpPointerSpace::IsEmpty() const {
-  return Begin() == End();
+accounting::SpaceBitmap::SweepCallback* BumpPointerSpace::GetSweepCallback() {
+  LOG(FATAL) << "Unimplemented";
+  return nullptr;
 }
 
 uint64_t BumpPointerSpace::GetBytesAllocated() {
diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h
index d7e6f5b..476b833 100644
--- a/runtime/gc/space/bump_pointer_space.h
+++ b/runtime/gc/space/bump_pointer_space.h
@@ -29,12 +29,13 @@
 
 namespace space {
 
-// A bump pointer space is a space where objects may be allocated and garbage collected.
-class BumpPointerSpace : public ContinuousMemMapAllocSpace {
+// A bump pointer space allocates by incrementing a pointer, it doesn't provide a free
+// implementation as its intended to be evacuated.
+class BumpPointerSpace FINAL : public ContinuousMemMapAllocSpace {
  public:
   typedef void(*WalkCallback)(void *start, void *end, size_t num_bytes, void* callback_arg);
 
-  SpaceType GetType() const {
+  SpaceType GetType() const OVERRIDE {
     return kSpaceTypeBumpPointerSpace;
   }
 
@@ -44,26 +45,29 @@
   static BumpPointerSpace* Create(const std::string& name, size_t capacity, byte* requested_begin);
 
   // Allocate num_bytes, returns nullptr if the space is full.
-  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
+  mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                        size_t* usable_size) OVERRIDE;
   mirror::Object* AllocNonvirtual(size_t num_bytes);
   mirror::Object* AllocNonvirtualWithoutAccounting(size_t num_bytes);
 
   // Return the storage space required by obj.
-  virtual size_t AllocationSize(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return AllocationSizeNonvirtual(obj, usable_size);
+  }
 
   // NOPS unless we support free lists.
-  virtual size_t Free(Thread*, mirror::Object*) {
-    return 0;
-  }
-  virtual size_t FreeList(Thread*, size_t, mirror::Object**) {
+  size_t Free(Thread*, mirror::Object*) OVERRIDE {
     return 0;
   }
 
-  size_t AllocationSizeNonvirtual(mirror::Object* obj)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return obj->SizeOf();
+  size_t FreeList(Thread*, size_t, mirror::Object**) OVERRIDE {
+    return 0;
   }
 
+  size_t AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Removes the fork time growth limit on capacity, allowing the application to allocate up to the
   // maximum reserved size of the heap.
   void ClearGrowthLimit() {
@@ -80,16 +84,16 @@
     return GetMemMap()->Size();
   }
 
-  accounting::SpaceBitmap* GetLiveBitmap() const {
+  accounting::SpaceBitmap* GetLiveBitmap() const OVERRIDE {
     return nullptr;
   }
 
-  accounting::SpaceBitmap* GetMarkBitmap() const {
+  accounting::SpaceBitmap* GetMarkBitmap() const OVERRIDE {
     return nullptr;
   }
 
   // Clear the memory and reset the pointer to the start of the space.
-  void Clear() LOCKS_EXCLUDED(block_lock_);
+  void Clear() OVERRIDE LOCKS_EXCLUDED(block_lock_);
 
   void Dump(std::ostream& os) const;
 
@@ -99,7 +103,10 @@
 
   uint64_t GetBytesAllocated() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   uint64_t GetObjectsAllocated() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool IsEmpty() const;
+  bool IsEmpty() const {
+    return Begin() == End();
+  }
+
 
   bool Contains(const mirror::Object* obj) const {
     const byte* byte_obj = reinterpret_cast<const byte*>(obj);
@@ -116,7 +123,7 @@
   // Allocate a new TLAB, returns false if the allocation failed.
   bool AllocNewTlab(Thread* self, size_t bytes);
 
-  virtual BumpPointerSpace* AsBumpPointerSpace() {
+  BumpPointerSpace* AsBumpPointerSpace() OVERRIDE {
     return this;
   }
 
@@ -124,6 +131,8 @@
   void Walk(ObjectCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  accounting::SpaceBitmap::SweepCallback* GetSweepCallback() OVERRIDE;
+
   // Object alignment within the space.
   static constexpr size_t kAlignment = 8;
 
diff --git a/runtime/gc/space/dlmalloc_space-inl.h b/runtime/gc/space/dlmalloc_space-inl.h
index c14a4e1..02d8b54 100644
--- a/runtime/gc/space/dlmalloc_space-inl.h
+++ b/runtime/gc/space/dlmalloc_space-inl.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_GC_SPACE_DLMALLOC_SPACE_INL_H_
 
 #include "dlmalloc_space.h"
+#include "gc/allocator/dlmalloc.h"
 #include "thread.h"
 
 namespace art {
@@ -25,11 +26,12 @@
 namespace space {
 
 inline mirror::Object* DlMallocSpace::AllocNonvirtual(Thread* self, size_t num_bytes,
-                                                      size_t* bytes_allocated) {
+                                                      size_t* bytes_allocated,
+                                                      size_t* usable_size) {
   mirror::Object* obj;
   {
     MutexLock mu(self, lock_);
-    obj = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated);
+    obj = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated, usable_size);
   }
   if (LIKELY(obj != NULL)) {
     // Zero freshly allocated memory, done while not holding the space's lock.
@@ -38,15 +40,25 @@
   return obj;
 }
 
+inline size_t DlMallocSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size) {
+  void* obj_ptr = const_cast<void*>(reinterpret_cast<const void*>(obj));
+  size_t size = mspace_usable_size(obj_ptr);
+  if (usable_size != nullptr) {
+    *usable_size = size;
+  }
+  return size + kChunkOverhead;
+}
+
 inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(Thread* /*self*/, size_t num_bytes,
-                                                               size_t* bytes_allocated) {
+                                                               size_t* bytes_allocated,
+                                                               size_t* usable_size) {
   mirror::Object* result = reinterpret_cast<mirror::Object*>(mspace_malloc(mspace_for_alloc_, num_bytes));
   if (LIKELY(result != NULL)) {
     if (kDebugSpaces) {
       CHECK(Contains(result)) << "Allocation (" << reinterpret_cast<void*>(result)
             << ") not in bounds of allocation space " << *this;
     }
-    size_t allocation_size = AllocationSizeNonvirtual(result);
+    size_t allocation_size = AllocationSizeNonvirtual(result, usable_size);
     DCHECK(bytes_allocated != NULL);
     *bytes_allocated = allocation_size;
   }
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index 1493019..caedaaf 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -25,15 +25,15 @@
 #include "thread.h"
 #include "thread_list.h"
 #include "utils.h"
-
-#include <valgrind.h>
-#include <memcheck/memcheck.h>
+#include "valgrind_malloc_space-inl.h"
 
 namespace art {
 namespace gc {
 namespace space {
 
-static const bool kPrefetchDuringDlMallocFreeList = true;
+static constexpr bool kPrefetchDuringDlMallocFreeList = true;
+
+template class ValgrindMallocSpace<DlMallocSpace, void*>;
 
 DlMallocSpace::DlMallocSpace(const std::string& name, MemMap* mem_map, void* mspace, byte* begin,
                              byte* end, byte* limit, size_t growth_limit)
@@ -119,11 +119,8 @@
   return msp;
 }
 
-mirror::Object* DlMallocSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
-  return AllocNonvirtual(self, num_bytes, bytes_allocated);
-}
-
-mirror::Object* DlMallocSpace::AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+mirror::Object* DlMallocSpace::AllocWithGrowth(Thread* self, size_t num_bytes,
+                                               size_t* bytes_allocated, size_t* usable_size) {
   mirror::Object* result;
   {
     MutexLock mu(self, lock_);
@@ -131,7 +128,7 @@
     size_t max_allowed = Capacity();
     mspace_set_footprint_limit(mspace_, max_allowed);
     // Try the allocation.
-    result = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated);
+    result = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated, usable_size);
     // Shrink back down as small as possible.
     size_t footprint = mspace_footprint(mspace_);
     mspace_set_footprint_limit(mspace_, footprint);
@@ -145,7 +142,8 @@
   return result;
 }
 
-MallocSpace* DlMallocSpace::CreateInstance(const std::string& name, MemMap* mem_map, void* allocator, byte* begin, byte* end,
+MallocSpace* DlMallocSpace::CreateInstance(const std::string& name, MemMap* mem_map,
+                                           void* allocator, byte* begin, byte* end,
                                            byte* limit, size_t growth_limit) {
   return new DlMallocSpace(name, mem_map, allocator, begin, end, limit, growth_limit);
 }
@@ -156,7 +154,7 @@
     CHECK(ptr != NULL);
     CHECK(Contains(ptr)) << "Free (" << ptr << ") not in bounds of heap " << *this;
   }
-  const size_t bytes_freed = AllocationSizeNonvirtual(ptr);
+  const size_t bytes_freed = AllocationSizeNonvirtual(ptr, nullptr);
   if (kRecentFreeCount > 0) {
     RegisterRecentFree(ptr);
   }
@@ -176,7 +174,7 @@
       // The head of chunk for the allocation is sizeof(size_t) behind the allocation.
       __builtin_prefetch(reinterpret_cast<char*>(ptrs[i + look_ahead]) - sizeof(size_t));
     }
-    bytes_freed += AllocationSizeNonvirtual(ptr);
+    bytes_freed += AllocationSizeNonvirtual(ptr, nullptr);
   }
 
   if (kRecentFreeCount > 0) {
@@ -228,10 +226,6 @@
   return dlmalloc_space->MoreCore(increment);
 }
 
-size_t DlMallocSpace::AllocationSize(mirror::Object* obj) {
-  return AllocationSizeNonvirtual(obj);
-}
-
 size_t DlMallocSpace::Trim() {
   MutexLock mu(Thread::Current(), lock_);
   // Trim to release memory at the end of the space.
diff --git a/runtime/gc/space/dlmalloc_space.h b/runtime/gc/space/dlmalloc_space.h
index 4507c36..6ea10ad 100644
--- a/runtime/gc/space/dlmalloc_space.h
+++ b/runtime/gc/space/dlmalloc_space.h
@@ -17,7 +17,6 @@
 #ifndef ART_RUNTIME_GC_SPACE_DLMALLOC_SPACE_H_
 #define ART_RUNTIME_GC_SPACE_DLMALLOC_SPACE_H_
 
-#include "gc/allocator/dlmalloc.h"
 #include "malloc_space.h"
 #include "space.h"
 
@@ -30,7 +29,8 @@
 
 namespace space {
 
-// An alloc space is a space where objects may be allocated and garbage collected.
+// An alloc space is a space where objects may be allocated and garbage collected. Not final as may
+// be overridden by a ValgrindMallocSpace.
 class DlMallocSpace : public MallocSpace {
  public:
   // Create a DlMallocSpace from an existing mem_map.
@@ -45,21 +45,39 @@
   static DlMallocSpace* Create(const std::string& name, size_t initial_size, size_t growth_limit,
                                size_t capacity, byte* requested_begin);
 
-  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
-                                          size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
-  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
-  virtual size_t AllocationSize(mirror::Object* obj);
-  virtual size_t Free(Thread* self, mirror::Object* ptr)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated);
-
-  size_t AllocationSizeNonvirtual(mirror::Object* obj) {
-    void* obj_ptr = const_cast<void*>(reinterpret_cast<const void*>(obj));
-    return mspace_usable_size(obj_ptr) + kChunkOverhead;
+  // Virtual to allow ValgrindMallocSpace to intercept.
+  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                                          size_t* usable_size) OVERRIDE LOCKS_EXCLUDED(lock_);
+  // Virtual to allow ValgrindMallocSpace to intercept.
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                        size_t* usable_size) OVERRIDE LOCKS_EXCLUDED(lock_) {
+    return AllocNonvirtual(self, num_bytes, bytes_allocated, usable_size);
   }
+  // Virtual to allow ValgrindMallocSpace to intercept.
+  virtual size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE {
+    return AllocationSizeNonvirtual(obj, usable_size);
+  }
+  // Virtual to allow ValgrindMallocSpace to intercept.
+  virtual size_t Free(Thread* self, mirror::Object* ptr) OVERRIDE
+      LOCKS_EXCLUDED(lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Virtual to allow ValgrindMallocSpace to intercept.
+  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) OVERRIDE
+      LOCKS_EXCLUDED(lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // DlMallocSpaces don't have thread local state.
+  void RevokeThreadLocalBuffers(art::Thread*) OVERRIDE {
+  }
+  void RevokeAllThreadLocalBuffers() OVERRIDE {
+  }
+
+  // Faster non-virtual allocation path.
+  mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                                  size_t* usable_size) LOCKS_EXCLUDED(lock_);
+
+  // Faster non-virtual allocation size path.
+  size_t AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size);
 
 #ifndef NDEBUG
   // Override only in the debug build.
@@ -70,39 +88,37 @@
     return mspace_;
   }
 
-  size_t Trim();
+  size_t Trim() OVERRIDE;
 
   // Perform a mspace_inspect_all which calls back for each allocation chunk. The chunk may not be
   // in use, indicated by num_bytes equaling zero.
-  void Walk(WalkCallback callback, void* arg) LOCKS_EXCLUDED(lock_);
+  void Walk(WalkCallback callback, void* arg) OVERRIDE LOCKS_EXCLUDED(lock_);
 
   // Returns the number of bytes that the space has currently obtained from the system. This is
   // greater or equal to the amount of live data in the space.
-  size_t GetFootprint();
+  size_t GetFootprint() OVERRIDE;
 
   // Returns the number of bytes that the heap is allowed to obtain from the system via MoreCore.
-  size_t GetFootprintLimit();
+  size_t GetFootprintLimit() OVERRIDE;
 
   // Set the maximum number of bytes that the heap is allowed to obtain from the system via
   // MoreCore. Note this is used to stop the mspace growing beyond the limit to Capacity. When
   // allocations fail we GC before increasing the footprint limit and allowing the mspace to grow.
-  void SetFootprintLimit(size_t limit);
+  void SetFootprintLimit(size_t limit) OVERRIDE;
 
   MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
                               byte* begin, byte* end, byte* limit, size_t growth_limit);
 
-  uint64_t GetBytesAllocated();
-  uint64_t GetObjectsAllocated();
+  uint64_t GetBytesAllocated() OVERRIDE;
+  uint64_t GetObjectsAllocated() OVERRIDE;
 
-  // Returns the class of a recently freed object.
-  mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
+  void Clear() OVERRIDE;
 
-  virtual void Clear();
-
-  virtual bool IsDlMallocSpace() const {
+  bool IsDlMallocSpace() const OVERRIDE {
     return true;
   }
-  virtual DlMallocSpace* AsDlMallocSpace() {
+
+  DlMallocSpace* AsDlMallocSpace() OVERRIDE {
     return this;
   }
 
@@ -111,10 +127,12 @@
                 byte* limit, size_t growth_limit);
 
  private:
-  mirror::Object* AllocWithoutGrowthLocked(Thread* self, size_t num_bytes, size_t* bytes_allocated)
+  mirror::Object* AllocWithoutGrowthLocked(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                                           size_t* usable_size)
       EXCLUSIVE_LOCKS_REQUIRED(lock_);
 
-  void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size, bool /*low_memory_mode*/) {
+  void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size,
+                        bool /*low_memory_mode*/) OVERRIDE {
     return CreateMspace(base, morecore_start, initial_size);
   }
   static void* CreateMspace(void* base, size_t morecore_start, size_t initial_size);
@@ -122,11 +140,11 @@
   // The boundary tag overhead.
   static const size_t kChunkOverhead = kWordSize;
 
-  // Underlying malloc space
+  // Underlying malloc space.
   void* const mspace_;
 
-  // A mspace pointer used for allocation. Equals to what mspace_
-  // points to or nullptr after InvalidateAllocator() is called.
+  // An mspace pointer used for allocation. Equals  mspace_ or nullptr after InvalidateAllocator()
+  // is called.
   void* mspace_for_alloc_;
 
   friend class collector::MarkSweep;
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index 987a655..1ca132e 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -57,7 +57,7 @@
 }
 
 mirror::Object* LargeObjectMapSpace::Alloc(Thread* self, size_t num_bytes,
-                                           size_t* bytes_allocated) {
+                                           size_t* bytes_allocated, size_t* usable_size) {
   std::string error_msg;
   MemMap* mem_map = MemMap::MapAnonymous("large object space allocation", NULL, num_bytes,
                                          PROT_READ | PROT_WRITE, true, &error_msg);
@@ -72,6 +72,9 @@
   size_t allocation_size = mem_map->Size();
   DCHECK(bytes_allocated != NULL);
   *bytes_allocated = allocation_size;
+  if (usable_size != nullptr) {
+    *usable_size = allocation_size;
+  }
   num_bytes_allocated_ += allocation_size;
   total_bytes_allocated_ += allocation_size;
   ++num_objects_allocated_;
@@ -92,9 +95,9 @@
   return allocation_size;
 }
 
-size_t LargeObjectMapSpace::AllocationSize(mirror::Object* obj) {
+size_t LargeObjectMapSpace::AllocationSize(mirror::Object* obj, size_t* usable_size) {
   MutexLock mu(Thread::Current(), lock_);
-  MemMaps::iterator found = mem_maps_.find(obj);
+  auto found = mem_maps_.find(obj);
   CHECK(found != mem_maps_.end()) << "Attempted to get size of a large object which is not live";
   return found->second->Size();
 }
@@ -112,7 +115,7 @@
 
 void LargeObjectMapSpace::Walk(DlMallocSpace::WalkCallback callback, void* arg) {
   MutexLock mu(Thread::Current(), lock_);
-  for (MemMaps::iterator it = mem_maps_.begin(); it != mem_maps_.end(); ++it) {
+  for (auto it = mem_maps_.begin(); it != mem_maps_.end(); ++it) {
     MemMap* mem_map = it->second;
     callback(mem_map->Begin(), mem_map->End(), mem_map->Size(), arg);
     callback(NULL, NULL, 0, arg);
@@ -244,14 +247,19 @@
   return mem_map_->HasAddress(obj);
 }
 
-size_t FreeListSpace::AllocationSize(mirror::Object* obj) {
+size_t FreeListSpace::AllocationSize(mirror::Object* obj, size_t* usable_size) {
   AllocationHeader* header = GetAllocationHeader(obj);
   DCHECK(Contains(obj));
   DCHECK(!header->IsFree());
-  return header->AllocationSize();
+  size_t alloc_size = header->AllocationSize();
+  if (usable_size != nullptr) {
+    *usable_size = alloc_size - sizeof(AllocationHeader);
+  }
+  return alloc_size;
 }
 
-mirror::Object* FreeListSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+mirror::Object* FreeListSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                                     size_t* usable_size) {
   MutexLock mu(self, lock_);
   size_t allocation_size = RoundUp(num_bytes + sizeof(AllocationHeader), kAlignment);
   AllocationHeader temp;
@@ -280,13 +288,15 @@
       new_header = reinterpret_cast<AllocationHeader*>(end_ - free_end_);
       free_end_ -= allocation_size;
     } else {
-      return NULL;
+      return nullptr;
     }
   }
 
-  DCHECK(bytes_allocated != NULL);
+  DCHECK(bytes_allocated != nullptr);
   *bytes_allocated = allocation_size;
-
+  if (usable_size != nullptr) {
+    *usable_size = allocation_size - sizeof(AllocationHeader);
+  }
   // Need to do these inside of the lock.
   ++num_objects_allocated_;
   ++total_objects_allocated_;
diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h
index 5274c8d..b1b0c3c 100644
--- a/runtime/gc/space/large_object_space.h
+++ b/runtime/gc/space/large_object_space.h
@@ -32,20 +32,20 @@
 // Abstraction implemented by all large object spaces.
 class LargeObjectSpace : public DiscontinuousSpace, public AllocSpace {
  public:
-  virtual SpaceType GetType() const {
+  SpaceType GetType() const OVERRIDE {
     return kSpaceTypeLargeObjectSpace;
   }
 
-  virtual void SwapBitmaps();
-  virtual void CopyLiveToMarked();
+  void SwapBitmaps();
+  void CopyLiveToMarked();
   virtual void Walk(DlMallocSpace::WalkCallback, void* arg) = 0;
   virtual ~LargeObjectSpace() {}
 
-  uint64_t GetBytesAllocated() {
+  uint64_t GetBytesAllocated() OVERRIDE {
     return num_bytes_allocated_;
   }
 
-  uint64_t GetObjectsAllocated() {
+  uint64_t GetObjectsAllocated() OVERRIDE {
     return num_objects_allocated_;
   }
 
@@ -57,17 +57,23 @@
     return total_objects_allocated_;
   }
 
-  size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs);
+  size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) OVERRIDE;
 
-  virtual bool IsAllocSpace() const {
+  // LargeObjectSpaces don't have thread local state.
+  void RevokeThreadLocalBuffers(art::Thread*) OVERRIDE {
+  }
+  void RevokeAllThreadLocalBuffers() OVERRIDE {
+  }
+
+  bool IsAllocSpace() const OVERRIDE {
     return true;
   }
 
-  virtual AllocSpace* AsAllocSpace() {
+  AllocSpace* AsAllocSpace() OVERRIDE {
     return this;
   }
 
-  virtual void Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes);
+  void Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes);
 
  protected:
   explicit LargeObjectSpace(const std::string& name);
@@ -85,17 +91,18 @@
 };
 
 // A discontinuous large object space implemented by individual mmap/munmap calls.
-class LargeObjectMapSpace : public LargeObjectSpace {
+class LargeObjectMapSpace FINAL : public LargeObjectSpace {
  public:
   // Creates a large object space. Allocations into the large object space use memory maps instead
   // of malloc.
   static LargeObjectMapSpace* Create(const std::string& name);
 
   // Return the storage space required by obj.
-  size_t AllocationSize(mirror::Object* obj);
-  mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
+  size_t AllocationSize(mirror::Object* obj, size_t* usable_size);
+  mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                        size_t* usable_size);
   size_t Free(Thread* self, mirror::Object* ptr);
-  void Walk(DlMallocSpace::WalkCallback, void* arg) LOCKS_EXCLUDED(lock_);
+  void Walk(DlMallocSpace::WalkCallback, void* arg) OVERRIDE LOCKS_EXCLUDED(lock_);
   // TODO: disabling thread safety analysis as this may be called when we already hold lock_.
   bool Contains(const mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS;
 
@@ -113,16 +120,18 @@
 };
 
 // A continuous large object space with a free-list to handle holes.
-class FreeListSpace : public LargeObjectSpace {
+class FreeListSpace FINAL : public LargeObjectSpace {
  public:
   virtual ~FreeListSpace();
   static FreeListSpace* Create(const std::string& name, byte* requested_begin, size_t capacity);
 
-  size_t AllocationSize(mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
-  size_t Free(Thread* self, mirror::Object* obj);
-  bool Contains(const mirror::Object* obj) const;
-  void Walk(DlMallocSpace::WalkCallback callback, void* arg) LOCKS_EXCLUDED(lock_);
+  size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE
+      EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                        size_t* usable_size) OVERRIDE;
+  size_t Free(Thread* self, mirror::Object* obj) OVERRIDE;
+  bool Contains(const mirror::Object* obj) const OVERRIDE;
+  void Walk(DlMallocSpace::WalkCallback callback, void* arg) OVERRIDE LOCKS_EXCLUDED(lock_);
 
   // Address at which the space begins.
   byte* Begin() const {
diff --git a/runtime/gc/space/large_object_space_test.cc b/runtime/gc/space/large_object_space_test.cc
index 845b9e3..8a6636d 100644
--- a/runtime/gc/space/large_object_space_test.cc
+++ b/runtime/gc/space/large_object_space_test.cc
@@ -45,9 +45,10 @@
       while (requests.size() < num_allocations) {
         size_t request_size = test_rand(&rand_seed) % max_allocation_size;
         size_t allocation_size = 0;
-        mirror::Object* obj = los->Alloc(Thread::Current(), request_size, &allocation_size);
+        mirror::Object* obj = los->Alloc(Thread::Current(), request_size, &allocation_size,
+                                         nullptr);
         ASSERT_TRUE(obj != nullptr);
-        ASSERT_EQ(allocation_size, los->AllocationSize(obj));
+        ASSERT_EQ(allocation_size, los->AllocationSize(obj, nullptr));
         ASSERT_GE(allocation_size, request_size);
         // Fill in our magic value.
         byte magic = (request_size & 0xFF) | 1;
@@ -78,7 +79,7 @@
 
     size_t bytes_allocated = 0;
     // Checks that the coalescing works.
-    mirror::Object* obj = los->Alloc(Thread::Current(), 100 * MB, &bytes_allocated);
+    mirror::Object* obj = los->Alloc(Thread::Current(), 100 * MB, &bytes_allocated, nullptr);
     EXPECT_TRUE(obj != nullptr);
     los->Free(Thread::Current(), obj);
 
diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h
index f17bcd2..8e34fd0 100644
--- a/runtime/gc/space/malloc_space.h
+++ b/runtime/gc/space/malloc_space.h
@@ -52,13 +52,15 @@
     return kSpaceTypeMallocSpace;
   }
 
-  // Allocate num_bytes without allowing the underlying space to grow.
-  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
-                                          size_t* bytes_allocated) = 0;
   // Allocate num_bytes allowing the underlying space to grow.
-  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) = 0;
-  // Return the storage space required by obj.
-  virtual size_t AllocationSize(mirror::Object* obj) = 0;
+  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
+                                          size_t* bytes_allocated, size_t* usable_size) = 0;
+  // Allocate num_bytes without allowing the underlying space to grow.
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                                size_t* usable_size) = 0;
+  // Return the storage space required by obj. If usable_size isn't nullptr then it is set to the
+  // amount of the storage space that may be used by obj.
+  virtual size_t AllocationSize(mirror::Object* obj, size_t* usable_size) = 0;
   virtual size_t Free(Thread* self, mirror::Object* ptr)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
   virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs)
@@ -132,9 +134,8 @@
   static MemMap* CreateMemMap(const std::string& name, size_t starting_size, size_t* initial_size,
                               size_t* growth_limit, size_t* capacity, byte* requested_begin);
 
-  // When true the low memory mode argument specifies that the heap
-  // wishes the created allocator to be more aggressive in releasing
-  // unused pages.
+  // When true the low memory mode argument specifies that the heap wishes the created allocator to
+  // be more aggressive in releasing unused pages.
   virtual void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size,
                                 bool low_memory_mode) = 0;
 
@@ -173,82 +174,6 @@
   DISALLOW_COPY_AND_ASSIGN(MallocSpace);
 };
 
-// Number of bytes to use as a red zone (rdz). A red zone of this size will be placed before and
-// after each allocation. 8 bytes provides long/double alignment.
-static constexpr size_t kValgrindRedZoneBytes = 8;
-
-// A specialization of DlMallocSpace/RosAllocSpace that provides information to valgrind wrt allocations.
-template <typename BaseMallocSpaceType, typename AllocatorType>
-class ValgrindMallocSpace : public BaseMallocSpaceType {
- public:
-  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
-    void* obj_with_rdz = BaseMallocSpaceType::AllocWithGrowth(self, num_bytes + 2 * kValgrindRedZoneBytes,
-                                                              bytes_allocated);
-    if (obj_with_rdz == NULL) {
-      return NULL;
-    }
-    mirror::Object* result = reinterpret_cast<mirror::Object*>(
-        reinterpret_cast<byte*>(obj_with_rdz) + kValgrindRedZoneBytes);
-    // Make redzones as no access.
-    VALGRIND_MAKE_MEM_NOACCESS(obj_with_rdz, kValgrindRedZoneBytes);
-    VALGRIND_MAKE_MEM_NOACCESS(reinterpret_cast<byte*>(result) + num_bytes, kValgrindRedZoneBytes);
-    return result;
-  }
-
-  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
-    void* obj_with_rdz = BaseMallocSpaceType::Alloc(self, num_bytes + 2 * kValgrindRedZoneBytes,
-                                                    bytes_allocated);
-    if (obj_with_rdz == NULL) {
-     return NULL;
-    }
-    mirror::Object* result = reinterpret_cast<mirror::Object*>(
-        reinterpret_cast<byte*>(obj_with_rdz) + kValgrindRedZoneBytes);
-    // Make redzones as no access.
-    VALGRIND_MAKE_MEM_NOACCESS(obj_with_rdz, kValgrindRedZoneBytes);
-    VALGRIND_MAKE_MEM_NOACCESS(reinterpret_cast<byte*>(result) + num_bytes, kValgrindRedZoneBytes);
-    return result;
-  }
-
-  virtual size_t AllocationSize(mirror::Object* obj) {
-    size_t result = BaseMallocSpaceType::AllocationSize(reinterpret_cast<mirror::Object*>(
-        reinterpret_cast<byte*>(obj) - kValgrindRedZoneBytes));
-    return result - 2 * kValgrindRedZoneBytes;
-  }
-
-  virtual size_t Free(Thread* self, mirror::Object* ptr)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    void* obj_after_rdz = reinterpret_cast<void*>(ptr);
-    void* obj_with_rdz = reinterpret_cast<byte*>(obj_after_rdz) - kValgrindRedZoneBytes;
-    // Make redzones undefined.
-    size_t allocation_size = BaseMallocSpaceType::AllocationSize(
-        reinterpret_cast<mirror::Object*>(obj_with_rdz));
-    VALGRIND_MAKE_MEM_UNDEFINED(obj_with_rdz, allocation_size);
-    size_t freed = BaseMallocSpaceType::Free(self, reinterpret_cast<mirror::Object*>(obj_with_rdz));
-    return freed - 2 * kValgrindRedZoneBytes;
-  }
-
-  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    size_t freed = 0;
-    for (size_t i = 0; i < num_ptrs; i++) {
-      freed += Free(self, ptrs[i]);
-    }
-    return freed;
-  }
-
-  ValgrindMallocSpace(const std::string& name, MemMap* mem_map, AllocatorType allocator, byte* begin,
-                      byte* end, byte* limit, size_t growth_limit, size_t initial_size) :
-      BaseMallocSpaceType(name, mem_map, allocator, begin, end, limit, growth_limit) {
-    VALGRIND_MAKE_MEM_UNDEFINED(mem_map->Begin() + initial_size, mem_map->Size() - initial_size);
-  }
-
-  virtual ~ValgrindMallocSpace() {
-  }
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(ValgrindMallocSpace);
-};
-
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/rosalloc_space-inl.h b/runtime/gc/space/rosalloc_space-inl.h
index 5de4265..2627c85 100644
--- a/runtime/gc/space/rosalloc_space-inl.h
+++ b/runtime/gc/space/rosalloc_space-inl.h
@@ -25,20 +25,32 @@
 namespace gc {
 namespace space {
 
-inline mirror::Object* RosAllocSpace::AllocNonvirtual(Thread* self, size_t num_bytes,
-                                                      size_t* bytes_allocated) {
-  mirror::Object* obj;
-  obj = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated);
-  // RosAlloc zeroes memory internally.
-  return obj;
+inline size_t RosAllocSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size) {
+  void* obj_ptr = const_cast<void*>(reinterpret_cast<const void*>(obj));
+  // obj is a valid object. Use its class in the header to get the size.
+  // Don't use verification since the object may be dead if we are sweeping.
+  size_t size = obj->SizeOf<kVerifyNone>();
+  size_t size_by_size = rosalloc_->UsableSize(size);
+  if (kIsDebugBuild) {
+    size_t size_by_ptr = rosalloc_->UsableSize(obj_ptr);
+    if (size_by_size != size_by_ptr) {
+      LOG(INFO) << "Found a bad sized obj of size " << size
+                << " at " << std::hex << reinterpret_cast<intptr_t>(obj_ptr) << std::dec
+                << " size_by_size=" << size_by_size << " size_by_ptr=" << size_by_ptr;
+    }
+    DCHECK_EQ(size_by_size, size_by_ptr);
+  }
+  if (usable_size != nullptr) {
+    *usable_size = size_by_size;
+  }
+  return size_by_size;
 }
 
-inline mirror::Object* RosAllocSpace::AllocWithoutGrowthLocked(Thread* self, size_t num_bytes,
-                                                               size_t* bytes_allocated) {
+inline mirror::Object* RosAllocSpace::AllocCommon(Thread* self, size_t num_bytes,
+                                                  size_t* bytes_allocated, size_t* usable_size) {
   size_t rosalloc_size = 0;
   mirror::Object* result = reinterpret_cast<mirror::Object*>(
-      rosalloc_for_alloc_->Alloc(self, num_bytes,
-                                 &rosalloc_size));
+      rosalloc_for_alloc_->Alloc(self, num_bytes, &rosalloc_size));
   if (LIKELY(result != NULL)) {
     if (kDebugSpaces) {
       CHECK(Contains(result)) << "Allocation (" << reinterpret_cast<void*>(result)
@@ -46,6 +58,10 @@
     }
     DCHECK(bytes_allocated != NULL);
     *bytes_allocated = rosalloc_size;
+    DCHECK_EQ(rosalloc_size, rosalloc_->UsableSize(result));
+    if (usable_size != nullptr) {
+      *usable_size = rosalloc_size;
+    }
   }
   return result;
 }
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index cc6c1d9..567ec99 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -26,15 +26,15 @@
 #include "thread.h"
 #include "thread_list.h"
 #include "utils.h"
-
-#include <valgrind.h>
-#include <memcheck/memcheck.h>
+#include "valgrind_malloc_space-inl.h"
 
 namespace art {
 namespace gc {
 namespace space {
 
-static const bool kPrefetchDuringRosAllocFreeList = true;
+static constexpr bool kPrefetchDuringRosAllocFreeList = true;
+
+template class ValgrindMallocSpace<RosAllocSpace, allocator::RosAlloc*>;
 
 RosAllocSpace::RosAllocSpace(const std::string& name, MemMap* mem_map,
                              art::gc::allocator::RosAlloc* rosalloc, byte* begin, byte* end,
@@ -45,9 +45,9 @@
 }
 
 RosAllocSpace* RosAllocSpace::CreateFromMemMap(MemMap* mem_map, const std::string& name,
-                                               size_t starting_size,
-                                               size_t initial_size, size_t growth_limit,
-                                               size_t capacity, bool low_memory_mode) {
+                                            size_t starting_size, size_t initial_size,
+                                            size_t growth_limit, size_t capacity,
+                                            bool low_memory_mode) {
   DCHECK(mem_map != nullptr);
   allocator::RosAlloc* rosalloc = CreateRosAlloc(mem_map->Begin(), starting_size, initial_size,
                                                  low_memory_mode);
@@ -63,19 +63,18 @@
   }
 
   // Everything is set so record in immutable structure and leave
-  RosAllocSpace* space;
   byte* begin = mem_map->Begin();
   if (RUNNING_ON_VALGRIND > 0) {
-    space = new ValgrindMallocSpace<RosAllocSpace, art::gc::allocator::RosAlloc*>(
+    return new ValgrindMallocSpace<RosAllocSpace, allocator::RosAlloc*>(
         name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit, initial_size);
   } else {
-    space = new RosAllocSpace(name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit);
+    return new RosAllocSpace(name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit);
   }
-  return space;
 }
 
-RosAllocSpace* RosAllocSpace::Create(const std::string& name, size_t initial_size, size_t growth_limit,
-                                     size_t capacity, byte* requested_begin, bool low_memory_mode) {
+RosAllocSpace* RosAllocSpace::Create(const std::string& name, size_t initial_size,
+                                  size_t growth_limit, size_t capacity, byte* requested_begin,
+                                  bool low_memory_mode) {
   uint64_t start_time = 0;
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     start_time = NanoTime();
@@ -129,11 +128,8 @@
   return rosalloc;
 }
 
-mirror::Object* RosAllocSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
-  return AllocNonvirtual(self, num_bytes, bytes_allocated);
-}
-
-mirror::Object* RosAllocSpace::AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+mirror::Object* RosAllocSpace::AllocWithGrowth(Thread* self, size_t num_bytes,
+                                               size_t* bytes_allocated, size_t* usable_size) {
   mirror::Object* result;
   {
     MutexLock mu(self, lock_);
@@ -141,7 +137,7 @@
     size_t max_allowed = Capacity();
     rosalloc_->SetFootprintLimit(max_allowed);
     // Try the allocation.
-    result = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated);
+    result = AllocCommon(self, num_bytes, bytes_allocated, usable_size);
     // Shrink back down as small as possible.
     size_t footprint = rosalloc_->Footprint();
     rosalloc_->SetFootprintLimit(footprint);
@@ -163,7 +159,7 @@
     CHECK(ptr != NULL);
     CHECK(Contains(ptr)) << "Free (" << ptr << ") not in bounds of heap " << *this;
   }
-  const size_t bytes_freed = AllocationSizeNonvirtual(ptr);
+  const size_t bytes_freed = AllocationSizeNonvirtual(ptr, nullptr);
   if (kRecentFreeCount > 0) {
     MutexLock mu(self, lock_);
     RegisterRecentFree(ptr);
@@ -183,7 +179,7 @@
     if (kPrefetchDuringRosAllocFreeList && i + look_ahead < num_ptrs) {
       __builtin_prefetch(reinterpret_cast<char*>(ptrs[i + look_ahead]));
     }
-    bytes_freed += AllocationSizeNonvirtual(ptr);
+    bytes_freed += AllocationSizeNonvirtual(ptr, nullptr);
   }
 
   if (kRecentFreeCount > 0) {
@@ -220,10 +216,6 @@
   return rosalloc_space->MoreCore(increment);
 }
 
-size_t RosAllocSpace::AllocationSize(mirror::Object* obj) {
-  return AllocationSizeNonvirtual(obj);
-}
-
 size_t RosAllocSpace::Trim() {
   {
     MutexLock mu(Thread::Current(), lock_);
diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h
index 72e84f6..bd32196 100644
--- a/runtime/gc/space/rosalloc_space.h
+++ b/runtime/gc/space/rosalloc_space.h
@@ -30,7 +30,8 @@
 
 namespace space {
 
-// An alloc space is a space where objects may be allocated and garbage collected.
+// An alloc space implemented using a runs-of-slots memory allocator. Not final as may be
+// overridden by a ValgrindMallocSpace.
 class RosAllocSpace : public MallocSpace {
  public:
   // Create a RosAllocSpace with the requested sizes. The requested
@@ -44,53 +45,46 @@
                                          size_t growth_limit, size_t capacity,
                                          bool low_memory_mode);
 
-  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
-                                          size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
-  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
-  virtual size_t AllocationSize(mirror::Object* obj);
-  virtual size_t Free(Thread* self, mirror::Object* ptr)
+  mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                                  size_t* usable_size) OVERRIDE LOCKS_EXCLUDED(lock_);
+  mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                        size_t* usable_size) OVERRIDE {
+    return AllocNonvirtual(self, num_bytes, bytes_allocated, usable_size);
+  }
+  size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE {
+    return AllocationSizeNonvirtual(obj, usable_size);
+  }
+  size_t Free(Thread* self, mirror::Object* ptr) OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs)
+  size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated);
-
-  size_t AllocationSizeNonvirtual(mirror::Object* obj)
-      NO_THREAD_SAFETY_ANALYSIS {
-    // TODO: NO_THREAD_SAFETY_ANALYSIS because SizeOf() requires that mutator_lock is held.
-    void* obj_ptr = const_cast<void*>(reinterpret_cast<const void*>(obj));
-    // obj is a valid object. Use its class in the header to get the size.
-    // Don't use verification since the object may be dead if we are sweeping.
-    size_t size = obj->SizeOf<kVerifyNone>();
-    size_t size_by_size = rosalloc_->UsableSize(size);
-    if (kIsDebugBuild) {
-      size_t size_by_ptr = rosalloc_->UsableSize(obj_ptr);
-      if (size_by_size != size_by_ptr) {
-        LOG(INFO) << "Found a bad sized obj of size " << size
-                  << " at " << std::hex << reinterpret_cast<intptr_t>(obj_ptr) << std::dec
-                  << " size_by_size=" << size_by_size << " size_by_ptr=" << size_by_ptr;
-      }
-      DCHECK_EQ(size_by_size, size_by_ptr);
-    }
-    return size_by_size;
+  mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                                  size_t* usable_size) {
+    // RosAlloc zeroes memory internally.
+    return AllocCommon(self, num_bytes, bytes_allocated, usable_size);
   }
 
-  art::gc::allocator::RosAlloc* GetRosAlloc() {
+  // TODO: NO_THREAD_SAFETY_ANALYSIS because SizeOf() requires that mutator_lock is held.
+  size_t AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size)
+      NO_THREAD_SAFETY_ANALYSIS;
+
+  allocator::RosAlloc* GetRosAlloc() const {
     return rosalloc_;
   }
 
-  size_t Trim();
-  void Walk(WalkCallback callback, void* arg) LOCKS_EXCLUDED(lock_);
-  size_t GetFootprint();
-  size_t GetFootprintLimit();
-  void SetFootprintLimit(size_t limit);
+  size_t Trim() OVERRIDE;
+  void Walk(WalkCallback callback, void* arg) OVERRIDE LOCKS_EXCLUDED(lock_);
+  size_t GetFootprint() OVERRIDE;
+  size_t GetFootprintLimit() OVERRIDE;
+  void SetFootprintLimit(size_t limit) OVERRIDE;
 
-  virtual void Clear();
+  void Clear() OVERRIDE;
   MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
                               byte* begin, byte* end, byte* limit, size_t growth_limit);
 
-  uint64_t GetBytesAllocated();
-  uint64_t GetObjectsAllocated();
+  uint64_t GetBytesAllocated() OVERRIDE;
+  uint64_t GetObjectsAllocated() OVERRIDE;
 
   void RevokeThreadLocalBuffers(Thread* thread);
   void RevokeAllThreadLocalBuffers();
@@ -98,10 +92,11 @@
   // Returns the class of a recently freed object.
   mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
 
-  virtual bool IsRosAllocSpace() const {
+  bool IsRosAllocSpace() const OVERRIDE {
     return true;
   }
-  virtual RosAllocSpace* AsRosAllocSpace() {
+
+  RosAllocSpace* AsRosAllocSpace() OVERRIDE {
     return this;
   }
 
@@ -114,9 +109,11 @@
                 byte* begin, byte* end, byte* limit, size_t growth_limit);
 
  private:
-  mirror::Object* AllocWithoutGrowthLocked(Thread* self, size_t num_bytes, size_t* bytes_allocated);
+  mirror::Object* AllocCommon(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                              size_t* usable_size);
 
-  void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size, bool low_memory_mode) {
+  void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size,
+                        bool low_memory_mode) OVERRIDE {
     return CreateRosAlloc(base, morecore_start, initial_size, low_memory_mode);
   }
   static allocator::RosAlloc* CreateRosAlloc(void* base, size_t morecore_start, size_t initial_size,
@@ -127,11 +124,11 @@
       LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, Locks::thread_list_lock_);
 
   // Underlying rosalloc.
-  art::gc::allocator::RosAlloc* const rosalloc_;
+  allocator::RosAlloc* const rosalloc_;
 
-  // A rosalloc pointer used for allocation. Equals to what rosalloc_
-  // points to or nullptr after InvalidateAllocator() is called.
-  art::gc::allocator::RosAlloc* rosalloc_for_alloc_;
+  // The rosalloc pointer used for allocation. Equal to rosalloc_ or nullptr after
+  // InvalidateAllocator() is called.
+  allocator::RosAlloc* rosalloc_for_alloc_;
 
   friend class collector::MarkSweep;
 
diff --git a/runtime/gc/space/space.cc b/runtime/gc/space/space.cc
index 32a00bc..4af65a9 100644
--- a/runtime/gc/space/space.cc
+++ b/runtime/gc/space/space.cc
@@ -37,6 +37,36 @@
   return os;
 }
 
+DlMallocSpace* Space::AsDlMallocSpace() {
+  LOG(FATAL) << "Unreachable";
+  return nullptr;
+}
+
+RosAllocSpace* Space::AsRosAllocSpace() {
+  LOG(FATAL) << "Unreachable";
+  return nullptr;
+}
+
+ZygoteSpace* Space::AsZygoteSpace() {
+  LOG(FATAL) << "Unreachable";
+  return nullptr;
+}
+
+BumpPointerSpace* Space::AsBumpPointerSpace() {
+  LOG(FATAL) << "Unreachable";
+  return nullptr;
+}
+
+AllocSpace* Space::AsAllocSpace() {
+  LOG(FATAL) << "Unimplemented";
+  return nullptr;
+}
+
+ContinuousMemMapAllocSpace* Space::AsContinuousMemMapAllocSpace() {
+  LOG(FATAL) << "Unimplemented";
+  return nullptr;
+}
+
 DiscontinuousSpace::DiscontinuousSpace(const std::string& name,
                                        GcRetentionPolicy gc_retention_policy) :
     Space(name, gc_retention_policy),
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index 98e6f65..0f8f38a 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -115,35 +115,24 @@
   virtual bool IsDlMallocSpace() const {
     return false;
   }
-  virtual DlMallocSpace* AsDlMallocSpace() {
-    LOG(FATAL) << "Unreachable";
-    return nullptr;
-  }
+  virtual DlMallocSpace* AsDlMallocSpace();
+
   virtual bool IsRosAllocSpace() const {
     return false;
   }
-  virtual RosAllocSpace* AsRosAllocSpace() {
-    LOG(FATAL) << "Unreachable";
-    return nullptr;
-  }
+  virtual RosAllocSpace* AsRosAllocSpace();
 
-  // Is this the space allocated into by the Zygote and no-longer in use?
+  // Is this the space allocated into by the Zygote and no-longer in use for allocation?
   bool IsZygoteSpace() const {
     return GetType() == kSpaceTypeZygoteSpace;
   }
-  virtual ZygoteSpace* AsZygoteSpace() {
-    LOG(FATAL) << "Unreachable";
-    return nullptr;
-  }
+  virtual ZygoteSpace* AsZygoteSpace();
 
   // Is this space a bump pointer space?
   bool IsBumpPointerSpace() const {
     return GetType() == kSpaceTypeBumpPointerSpace;
   }
-  virtual BumpPointerSpace* AsBumpPointerSpace() {
-    LOG(FATAL) << "Unreachable";
-    return nullptr;
-  }
+  virtual BumpPointerSpace* AsBumpPointerSpace();
 
   // Does this space hold large objects and implement the large object space abstraction?
   bool IsLargeObjectSpace() const {
@@ -164,18 +153,12 @@
   virtual bool IsAllocSpace() const {
     return false;
   }
-  virtual AllocSpace* AsAllocSpace() {
-    LOG(FATAL) << "Unimplemented";
-    return nullptr;
-  }
+  virtual AllocSpace* AsAllocSpace();
 
   virtual bool IsContinuousMemMapAllocSpace() const {
     return false;
   }
-  virtual ContinuousMemMapAllocSpace* AsContinuousMemMapAllocSpace() {
-    LOG(FATAL) << "Unimplemented";
-    return nullptr;
-  }
+  virtual ContinuousMemMapAllocSpace* AsContinuousMemMapAllocSpace();
 
   virtual ~Space() {}
 
@@ -220,10 +203,11 @@
   // Allocate num_bytes without allowing growth. If the allocation
   // succeeds, the output parameter bytes_allocated will be set to the
   // actually allocated bytes which is >= num_bytes.
-  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) = 0;
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                                size_t* usable_size) = 0;
 
   // Return the storage space required by obj.
-  virtual size_t AllocationSize(mirror::Object* obj) = 0;
+  virtual size_t AllocationSize(mirror::Object* obj, size_t* usable_size) = 0;
 
   // Returns how many bytes were freed.
   virtual size_t Free(Thread* self, mirror::Object* ptr) = 0;
@@ -231,15 +215,13 @@
   // Returns how many bytes were freed.
   virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) = 0;
 
-  // Revoke any sort of thread-local buffers that are used to speed up
-  // allocations for the given thread, if the alloc space
-  // implementation uses any. No-op by default.
-  virtual void RevokeThreadLocalBuffers(Thread* /*thread*/) {}
+  // Revoke any sort of thread-local buffers that are used to speed up allocations for the given
+  // thread, if the alloc space implementation uses any.
+  virtual void RevokeThreadLocalBuffers(Thread* thread) = 0;
 
-  // Revoke any sort of thread-local buffers that are used to speed up
-  // allocations for all the threads, if the alloc space
-  // implementation uses any. No-op by default.
-  virtual void RevokeAllThreadLocalBuffers() {}
+  // Revoke any sort of thread-local buffers that are used to speed up allocations for all the
+  // threads, if the alloc space implementation uses any.
+  virtual void RevokeAllThreadLocalBuffers() = 0;
 
  protected:
   AllocSpace() {}
@@ -393,17 +375,17 @@
 // Used by the heap compaction interface to enable copying from one type of alloc space to another.
 class ContinuousMemMapAllocSpace : public MemMapSpace, public AllocSpace {
  public:
-  virtual bool IsAllocSpace() const {
+  bool IsAllocSpace() const OVERRIDE {
     return true;
   }
-  virtual AllocSpace* AsAllocSpace() {
+  AllocSpace* AsAllocSpace() OVERRIDE {
     return this;
   }
 
-  virtual bool IsContinuousMemMapAllocSpace() const {
+  bool IsContinuousMemMapAllocSpace() const OVERRIDE {
     return true;
   }
-  virtual ContinuousMemMapAllocSpace* AsContinuousMemMapAllocSpace() {
+  ContinuousMemMapAllocSpace* AsContinuousMemMapAllocSpace() {
     return this;
   }
 
@@ -414,22 +396,19 @@
   // Swap the live and mark bitmaps of this space. This is used by the GC for concurrent sweeping.
   void SwapBitmaps();
 
-  virtual void Clear() {
-    LOG(FATAL) << "Unimplemented";
-  }
+  // Free all memory associated with this space.
+  virtual void Clear() = 0;
 
-  virtual accounting::SpaceBitmap* GetLiveBitmap() const {
+  accounting::SpaceBitmap* GetLiveBitmap() const {
     return live_bitmap_.get();
   }
-  virtual accounting::SpaceBitmap* GetMarkBitmap() const {
+
+  accounting::SpaceBitmap* GetMarkBitmap() const {
     return mark_bitmap_.get();
   }
 
-  virtual void Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes);
-  virtual accounting::SpaceBitmap::SweepCallback* GetSweepCallback() {
-    LOG(FATAL) << "Unimplemented";
-    return nullptr;
-  }
+  void Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes);
+  virtual accounting::SpaceBitmap::SweepCallback* GetSweepCallback() = 0;
 
  protected:
   UniquePtr<accounting::SpaceBitmap> live_bitmap_;
diff --git a/runtime/gc/space/space_test.h b/runtime/gc/space/space_test.h
index f17d6f4..cb036f8 100644
--- a/runtime/gc/space/space_test.h
+++ b/runtime/gc/space/space_test.h
@@ -123,7 +123,7 @@
 // allocations after the ZygoteSpace is created. The test should also do some GCs to ensure that
 // the GC works with the ZygoteSpace.
 void SpaceTest::ZygoteSpaceTestBody(CreateSpaceFn create_space) {
-  size_t dummy = 0;
+  size_t dummy;
   MallocSpace* space(create_space("test", 4 * MB, 16 * MB, 16 * MB, nullptr));
   ASSERT_TRUE(space != nullptr);
 
@@ -133,47 +133,60 @@
   ScopedObjectAccess soa(self);
 
   // Succeeds, fits without adjusting the footprint limit.
-  SirtRef<mirror::Object> ptr1(self, space->Alloc(self, 1 * MB, &dummy));
+  size_t ptr1_bytes_allocated, ptr1_usable_size;
+  SirtRef<mirror::Object> ptr1(self, space->Alloc(self, 1 * MB, &ptr1_bytes_allocated,
+                                                  &ptr1_usable_size));
   EXPECT_TRUE(ptr1.get() != nullptr);
+  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
+  EXPECT_LE(1U * MB, ptr1_usable_size);
+  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
   InstallClass(ptr1, 1 * MB);
 
   // Fails, requires a higher footprint limit.
-  mirror::Object* ptr2 = space->Alloc(self, 8 * MB, &dummy);
+  mirror::Object* ptr2 = space->Alloc(self, 8 * MB, &dummy, nullptr);
   EXPECT_TRUE(ptr2 == nullptr);
 
   // Succeeds, adjusts the footprint.
-  size_t ptr3_bytes_allocated;
-  SirtRef<mirror::Object> ptr3(self, space->AllocWithGrowth(self, 8 * MB, &ptr3_bytes_allocated));
+  size_t ptr3_bytes_allocated, ptr3_usable_size;
+  SirtRef<mirror::Object> ptr3(self, space->AllocWithGrowth(self, 8 * MB, &ptr3_bytes_allocated,
+                                                            &ptr3_usable_size));
   EXPECT_TRUE(ptr3.get() != nullptr);
   EXPECT_LE(8U * MB, ptr3_bytes_allocated);
+  EXPECT_LE(8U * MB, ptr3_usable_size);
+  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
   InstallClass(ptr3, 8 * MB);
 
   // Fails, requires a higher footprint limit.
-  mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy);
+  mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy, nullptr);
   EXPECT_TRUE(ptr4 == nullptr);
 
   // Also fails, requires a higher allowed footprint.
-  mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy);
+  mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy, nullptr);
   EXPECT_TRUE(ptr5 == nullptr);
 
   // Release some memory.
-  size_t free3 = space->AllocationSize(ptr3.get());
+  size_t free3 = space->AllocationSize(ptr3.get(), nullptr);
   EXPECT_EQ(free3, ptr3_bytes_allocated);
   EXPECT_EQ(free3, space->Free(self, ptr3.reset(nullptr)));
   EXPECT_LE(8U * MB, free3);
 
   // Succeeds, now that memory has been freed.
-  SirtRef<mirror::Object> ptr6(self, space->AllocWithGrowth(self, 9 * MB, &dummy));
+  size_t ptr6_bytes_allocated, ptr6_usable_size;
+  SirtRef<mirror::Object> ptr6(self, space->AllocWithGrowth(self, 9 * MB, &ptr6_bytes_allocated,
+                                                            &ptr6_usable_size));
   EXPECT_TRUE(ptr6.get() != nullptr);
+  EXPECT_LE(9U * MB, ptr6_bytes_allocated);
+  EXPECT_LE(9U * MB, ptr6_usable_size);
+  EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
   InstallClass(ptr6, 9 * MB);
 
   // Final clean up.
-  size_t free1 = space->AllocationSize(ptr1.get());
+  size_t free1 = space->AllocationSize(ptr1.get(), nullptr);
   space->Free(self, ptr1.reset(nullptr));
   EXPECT_LE(1U * MB, free1);
 
   // Make sure that the zygote space isn't directly at the start of the space.
-  space->Alloc(self, 1U * MB, &dummy);
+  EXPECT_TRUE(space->Alloc(self, 1U * MB, &dummy, nullptr) != nullptr);
 
   gc::Heap* heap = Runtime::Current()->GetHeap();
   space::Space* old_space = space;
@@ -189,22 +202,28 @@
   AddSpace(space);
 
   // Succeeds, fits without adjusting the footprint limit.
-  ptr1.reset(space->Alloc(self, 1 * MB, &dummy));
+  ptr1.reset(space->Alloc(self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size));
   EXPECT_TRUE(ptr1.get() != nullptr);
+  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
+  EXPECT_LE(1U * MB, ptr1_usable_size);
+  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
   InstallClass(ptr1, 1 * MB);
 
   // Fails, requires a higher footprint limit.
-  ptr2 = space->Alloc(self, 8 * MB, &dummy);
+  ptr2 = space->Alloc(self, 8 * MB, &dummy, nullptr);
   EXPECT_TRUE(ptr2 == nullptr);
 
   // Succeeds, adjusts the footprint.
-  ptr3.reset(space->AllocWithGrowth(self, 2 * MB, &dummy));
+  ptr3.reset(space->AllocWithGrowth(self, 2 * MB, &ptr3_bytes_allocated, &ptr3_usable_size));
   EXPECT_TRUE(ptr3.get() != nullptr);
+  EXPECT_LE(2U * MB, ptr3_bytes_allocated);
+  EXPECT_LE(2U * MB, ptr3_usable_size);
+  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
   InstallClass(ptr3, 2 * MB);
   space->Free(self, ptr3.reset(nullptr));
 
   // Final clean up.
-  free1 = space->AllocationSize(ptr1.get());
+  free1 = space->AllocationSize(ptr1.get(), nullptr);
   space->Free(self, ptr1.reset(nullptr));
   EXPECT_LE(1U * MB, free1);
 }
@@ -220,42 +239,55 @@
   AddSpace(space);
 
   // Succeeds, fits without adjusting the footprint limit.
-  SirtRef<mirror::Object> ptr1(self, space->Alloc(self, 1 * MB, &dummy));
+  size_t ptr1_bytes_allocated, ptr1_usable_size;
+  SirtRef<mirror::Object> ptr1(self, space->Alloc(self, 1 * MB, &ptr1_bytes_allocated,
+                                                  &ptr1_usable_size));
   EXPECT_TRUE(ptr1.get() != nullptr);
+  EXPECT_LE(1U * MB, ptr1_bytes_allocated);
+  EXPECT_LE(1U * MB, ptr1_usable_size);
+  EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated);
   InstallClass(ptr1, 1 * MB);
 
   // Fails, requires a higher footprint limit.
-  mirror::Object* ptr2 = space->Alloc(self, 8 * MB, &dummy);
+  mirror::Object* ptr2 = space->Alloc(self, 8 * MB, &dummy, nullptr);
   EXPECT_TRUE(ptr2 == nullptr);
 
   // Succeeds, adjusts the footprint.
-  size_t ptr3_bytes_allocated;
-  SirtRef<mirror::Object> ptr3(self, space->AllocWithGrowth(self, 8 * MB, &ptr3_bytes_allocated));
+  size_t ptr3_bytes_allocated, ptr3_usable_size;
+  SirtRef<mirror::Object> ptr3(self, space->AllocWithGrowth(self, 8 * MB, &ptr3_bytes_allocated,
+                                                            &ptr3_usable_size));
   EXPECT_TRUE(ptr3.get() != nullptr);
   EXPECT_LE(8U * MB, ptr3_bytes_allocated);
+  EXPECT_LE(8U * MB, ptr3_usable_size);
+  EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated);
   InstallClass(ptr3, 8 * MB);
 
   // Fails, requires a higher footprint limit.
-  mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy);
+  mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy, nullptr);
   EXPECT_TRUE(ptr4 == nullptr);
 
   // Also fails, requires a higher allowed footprint.
-  mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy);
+  mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy, nullptr);
   EXPECT_TRUE(ptr5 == nullptr);
 
   // Release some memory.
-  size_t free3 = space->AllocationSize(ptr3.get());
+  size_t free3 = space->AllocationSize(ptr3.get(), nullptr);
   EXPECT_EQ(free3, ptr3_bytes_allocated);
   space->Free(self, ptr3.reset(nullptr));
   EXPECT_LE(8U * MB, free3);
 
   // Succeeds, now that memory has been freed.
-  SirtRef<mirror::Object> ptr6(self, space->AllocWithGrowth(self, 9 * MB, &dummy));
+  size_t ptr6_bytes_allocated, ptr6_usable_size;
+  SirtRef<mirror::Object> ptr6(self, space->AllocWithGrowth(self, 9 * MB, &ptr6_bytes_allocated,
+                                                            &ptr6_usable_size));
   EXPECT_TRUE(ptr6.get() != nullptr);
+  EXPECT_LE(9U * MB, ptr6_bytes_allocated);
+  EXPECT_LE(9U * MB, ptr6_usable_size);
+  EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated);
   InstallClass(ptr6, 9 * MB);
 
   // Final clean up.
-  size_t free1 = space->AllocationSize(ptr1.get());
+  size_t free1 = space->AllocationSize(ptr1.get(), nullptr);
   space->Free(self, ptr1.reset(nullptr));
   EXPECT_LE(1U * MB, free1);
 }
@@ -272,14 +304,17 @@
   // Succeeds, fits without adjusting the max allowed footprint.
   mirror::Object* lots_of_objects[1024];
   for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
-    size_t allocation_size = 0;
+    size_t allocation_size, usable_size;
     size_t size_of_zero_length_byte_array = SizeOfZeroLengthByteArray();
-    lots_of_objects[i] = space->Alloc(self, size_of_zero_length_byte_array, &allocation_size);
+    lots_of_objects[i] = space->Alloc(self, size_of_zero_length_byte_array, &allocation_size,
+                                      &usable_size);
     EXPECT_TRUE(lots_of_objects[i] != nullptr);
     SirtRef<mirror::Object> obj(self, lots_of_objects[i]);
     InstallClass(obj, size_of_zero_length_byte_array);
     lots_of_objects[i] = obj.get();
-    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
+    size_t computed_usable_size;
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
+    EXPECT_EQ(usable_size, computed_usable_size);
   }
 
   // Release memory and check pointers are nullptr.
@@ -290,13 +325,15 @@
 
   // Succeeds, fits by adjusting the max allowed footprint.
   for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
-    size_t allocation_size = 0;
-    lots_of_objects[i] = space->AllocWithGrowth(self, 1024, &allocation_size);
+    size_t allocation_size, usable_size;
+    lots_of_objects[i] = space->AllocWithGrowth(self, 1024, &allocation_size, &usable_size);
     EXPECT_TRUE(lots_of_objects[i] != nullptr);
     SirtRef<mirror::Object> obj(self, lots_of_objects[i]);
     InstallClass(obj, 1024);
     lots_of_objects[i] = obj.get();
-    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
+    size_t computed_usable_size;
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size));
+    EXPECT_EQ(usable_size, computed_usable_size);
   }
 
   // Release memory and check pointers are nullptr
@@ -357,16 +394,16 @@
       SirtRef<mirror::Object> object(self, nullptr);
       size_t bytes_allocated = 0;
       if (round <= 1) {
-        object.reset(space->Alloc(self, alloc_size, &bytes_allocated));
+        object.reset(space->Alloc(self, alloc_size, &bytes_allocated, nullptr));
       } else {
-        object.reset(space->AllocWithGrowth(self, alloc_size, &bytes_allocated));
+        object.reset(space->AllocWithGrowth(self, alloc_size, &bytes_allocated, nullptr));
       }
       footprint = space->GetFootprint();
       EXPECT_GE(space->Size(), footprint);  // invariant
       if (object.get() != nullptr) {  // allocation succeeded
         InstallClass(object, alloc_size);
         lots_of_objects[i] = object.get();
-        size_t allocation_size = space->AllocationSize(object.get());
+        size_t allocation_size = space->AllocationSize(object.get(), nullptr);
         EXPECT_EQ(bytes_allocated, allocation_size);
         if (object_size > 0) {
           EXPECT_GE(allocation_size, static_cast<size_t>(object_size));
@@ -421,7 +458,7 @@
       if (object == nullptr) {
         continue;
       }
-      size_t allocation_size = space->AllocationSize(object);
+      size_t allocation_size = space->AllocationSize(object, nullptr);
       if (object_size > 0) {
         EXPECT_GE(allocation_size, static_cast<size_t>(object_size));
       } else {
@@ -450,9 +487,10 @@
   size_t three_quarters_space = (growth_limit / 2) + (growth_limit / 4);
   size_t bytes_allocated = 0;
   if (round <= 1) {
-    large_object.reset(space->Alloc(self, three_quarters_space, &bytes_allocated));
+    large_object.reset(space->Alloc(self, three_quarters_space, &bytes_allocated, nullptr));
   } else {
-    large_object.reset(space->AllocWithGrowth(self, three_quarters_space, &bytes_allocated));
+    large_object.reset(space->AllocWithGrowth(self, three_quarters_space, &bytes_allocated,
+                                              nullptr));
   }
   EXPECT_TRUE(large_object.get() != nullptr);
   InstallClass(large_object, three_quarters_space);
diff --git a/runtime/gc/space/valgrind_malloc_space-inl.h b/runtime/gc/space/valgrind_malloc_space-inl.h
new file mode 100644
index 0000000..4b0c8e3
--- /dev/null
+++ b/runtime/gc/space/valgrind_malloc_space-inl.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_VALGRIND_MALLOC_SPACE_INL_H_
+#define ART_RUNTIME_GC_SPACE_VALGRIND_MALLOC_SPACE_INL_H_
+
+#include "valgrind_malloc_space.h"
+
+#include <memcheck/memcheck.h>
+
+namespace art {
+namespace gc {
+namespace space {
+
+// Number of bytes to use as a red zone (rdz). A red zone of this size will be placed before and
+// after each allocation. 8 bytes provides long/double alignment.
+static constexpr size_t kValgrindRedZoneBytes = 8;
+
+template <typename S, typename A>
+mirror::Object* ValgrindMallocSpace<S, A>::AllocWithGrowth(Thread* self, size_t num_bytes,
+                                                           size_t* bytes_allocated,
+                                                           size_t* usable_size) {
+  void* obj_with_rdz = S::AllocWithGrowth(self, num_bytes + 2 * kValgrindRedZoneBytes,
+                                          bytes_allocated, usable_size);
+  if (obj_with_rdz == nullptr) {
+    return nullptr;
+  }
+  if (usable_size != nullptr) {
+    *usable_size -= 2 * kValgrindRedZoneBytes;
+  }
+  mirror::Object* result = reinterpret_cast<mirror::Object*>(
+      reinterpret_cast<byte*>(obj_with_rdz) + kValgrindRedZoneBytes);
+  // Make redzones as no access.
+  VALGRIND_MAKE_MEM_NOACCESS(obj_with_rdz, kValgrindRedZoneBytes);
+  VALGRIND_MAKE_MEM_NOACCESS(reinterpret_cast<byte*>(result) + num_bytes, kValgrindRedZoneBytes);
+  return result;
+}
+
+template <typename S, typename A>
+mirror::Object* ValgrindMallocSpace<S, A>::Alloc(Thread* self, size_t num_bytes,
+                                                 size_t* bytes_allocated,
+                                                 size_t* usable_size) {
+  void* obj_with_rdz = S::Alloc(self, num_bytes + 2 * kValgrindRedZoneBytes, bytes_allocated,
+                                usable_size);
+  if (obj_with_rdz == nullptr) {
+    return nullptr;
+  }
+  if (usable_size != nullptr) {
+    *usable_size -= 2 * kValgrindRedZoneBytes;
+  }
+  mirror::Object* result = reinterpret_cast<mirror::Object*>(
+      reinterpret_cast<byte*>(obj_with_rdz) + kValgrindRedZoneBytes);
+  // Make redzones as no access.
+  VALGRIND_MAKE_MEM_NOACCESS(obj_with_rdz, kValgrindRedZoneBytes);
+  VALGRIND_MAKE_MEM_NOACCESS(reinterpret_cast<byte*>(result) + num_bytes, kValgrindRedZoneBytes);
+  return result;
+}
+
+template <typename S, typename A>
+size_t ValgrindMallocSpace<S, A>::AllocationSize(mirror::Object* obj, size_t* usable_size) {
+  size_t result = S::AllocationSize(reinterpret_cast<mirror::Object*>(
+      reinterpret_cast<byte*>(obj) - kValgrindRedZoneBytes), usable_size);
+  if (usable_size != nullptr) {
+    *usable_size -= 2 * kValgrindRedZoneBytes;
+  }
+  return result - 2 * kValgrindRedZoneBytes;
+}
+
+template <typename S, typename A>
+size_t ValgrindMallocSpace<S, A>::Free(Thread* self, mirror::Object* ptr) {
+  void* obj_after_rdz = reinterpret_cast<void*>(ptr);
+  void* obj_with_rdz = reinterpret_cast<byte*>(obj_after_rdz) - kValgrindRedZoneBytes;
+  // Make redzones undefined.
+  size_t allocation_size =
+      AllocationSize(reinterpret_cast<mirror::Object*>(obj_with_rdz), nullptr);
+  VALGRIND_MAKE_MEM_UNDEFINED(obj_with_rdz, allocation_size);
+  size_t freed = S::Free(self, reinterpret_cast<mirror::Object*>(obj_with_rdz));
+  return freed - 2 * kValgrindRedZoneBytes;
+}
+
+template <typename S, typename A>
+size_t ValgrindMallocSpace<S, A>::FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) {
+  size_t freed = 0;
+  for (size_t i = 0; i < num_ptrs; i++) {
+    freed += Free(self, ptrs[i]);
+  }
+  return freed;
+}
+
+template <typename S, typename A>
+ValgrindMallocSpace<S, A>::ValgrindMallocSpace(const std::string& name, MemMap* mem_map,
+                                               A allocator, byte* begin,
+                                               byte* end, byte* limit, size_t growth_limit,
+                                               size_t initial_size) :
+    S(name, mem_map, allocator, begin, end, limit, growth_limit) {
+  VALGRIND_MAKE_MEM_UNDEFINED(mem_map->Begin() + initial_size, mem_map->Size() - initial_size);
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_VALGRIND_MALLOC_SPACE_INL_H_
diff --git a/runtime/gc/space/valgrind_malloc_space.h b/runtime/gc/space/valgrind_malloc_space.h
new file mode 100644
index 0000000..8d00b30
--- /dev/null
+++ b/runtime/gc/space/valgrind_malloc_space.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_VALGRIND_MALLOC_SPACE_H_
+#define ART_RUNTIME_GC_SPACE_VALGRIND_MALLOC_SPACE_H_
+
+#include "malloc_space.h"
+
+#include <valgrind.h>
+
+namespace art {
+namespace gc {
+namespace space {
+
+// A specialization of DlMallocSpace/RosAllocSpace that places valgrind red zones around
+// allocations.
+template <typename BaseMallocSpaceType, typename AllocatorType>
+class ValgrindMallocSpace FINAL : public BaseMallocSpaceType {
+ public:
+  mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                                  size_t* usable_size) OVERRIDE;
+  mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                        size_t* usable_size) OVERRIDE;
+
+  size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE;
+
+  size_t Free(Thread* self, mirror::Object* ptr) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  ValgrindMallocSpace(const std::string& name, MemMap* mem_map, AllocatorType allocator,
+                      byte* begin, byte* end, byte* limit, size_t growth_limit,
+                      size_t initial_size);
+  virtual ~ValgrindMallocSpace() {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ValgrindMallocSpace);
+};
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_VALGRIND_MALLOC_SPACE_H_
diff --git a/runtime/gc/space/zygote_space.cc b/runtime/gc/space/zygote_space.cc
index a303765..a60ab38 100644
--- a/runtime/gc/space/zygote_space.cc
+++ b/runtime/gc/space/zygote_space.cc
@@ -57,6 +57,10 @@
   return zygote_space;
 }
 
+void ZygoteSpace::Clear() {
+  LOG(FATAL) << "Unimplemented";
+}
+
 ZygoteSpace::ZygoteSpace(const std::string& name, MemMap* mem_map, size_t objects_allocated)
     : ContinuousMemMapAllocSpace(name, mem_map, mem_map->Begin(), mem_map->End(), mem_map->End(),
                                  kGcRetentionPolicyFullCollect),
@@ -71,6 +75,27 @@
       << ",name=\"" << GetName() << "\"]";
 }
 
+mirror::Object* ZygoteSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                                   size_t* usable_size) {
+  LOG(FATAL) << "Unimplemented";
+  return nullptr;
+}
+
+size_t ZygoteSpace::AllocationSize(mirror::Object* obj, size_t* usable_size) {
+  LOG(FATAL) << "Unimplemented";
+  return 0;
+}
+
+size_t ZygoteSpace::Free(Thread* self, mirror::Object* ptr) {
+  LOG(FATAL) << "Unimplemented";
+  return 0;
+}
+
+size_t ZygoteSpace::FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) {
+  LOG(FATAL) << "Unimplemented";
+  return 0;
+}
+
 void ZygoteSpace::SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg) {
   SweepCallbackContext* context = static_cast<SweepCallbackContext*>(arg);
   DCHECK(context->space->IsZygoteSpace());
diff --git a/runtime/gc/space/zygote_space.h b/runtime/gc/space/zygote_space.h
index e0035b3..8cd1a9f 100644
--- a/runtime/gc/space/zygote_space.h
+++ b/runtime/gc/space/zygote_space.h
@@ -30,7 +30,7 @@
 namespace space {
 
 // An zygote space is a space which you cannot allocate into or free from.
-class ZygoteSpace : public ContinuousMemMapAllocSpace {
+class ZygoteSpace FINAL : public ContinuousMemMapAllocSpace {
  public:
   // Returns the remaining storage in the out_map field.
   static ZygoteSpace* Create(const std::string& name, MemMap* mem_map,
@@ -39,40 +39,40 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void Dump(std::ostream& os) const;
-  virtual SpaceType GetType() const {
+
+  SpaceType GetType() const OVERRIDE {
     return kSpaceTypeZygoteSpace;
   }
-  virtual ZygoteSpace* AsZygoteSpace() {
+
+  ZygoteSpace* AsZygoteSpace() OVERRIDE {
     return this;
   }
-  virtual mirror::Object* AllocWithGrowth(Thread* /*self*/, size_t /*num_bytes*/,
-                                          size_t* /*bytes_allocated*/) {
-    LOG(FATAL) << "Unimplemented";
-    return nullptr;
+
+  mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                        size_t* usable_size) OVERRIDE;
+
+  size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE;
+
+  size_t Free(Thread* self, mirror::Object* ptr) OVERRIDE;
+
+  size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) OVERRIDE;
+
+  // ZygoteSpaces don't have thread local state.
+  void RevokeThreadLocalBuffers(art::Thread*) OVERRIDE {
   }
-  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
-    LOG(FATAL) << "Unimplemented";
-    return nullptr;
+  void RevokeAllThreadLocalBuffers() OVERRIDE {
   }
-  virtual size_t AllocationSize(mirror::Object* obj) {
-    LOG(FATAL) << "Unimplemented";
-    return 0;
-  }
-  virtual size_t Free(Thread* self, mirror::Object* ptr) {
-    LOG(FATAL) << "Unimplemented";
-    return 0;
-  }
-  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) {
-    LOG(FATAL) << "Unimplemented";
-    return 0;
-  }
-  virtual uint64_t GetBytesAllocated() {
+
+  uint64_t GetBytesAllocated() {
     return Size();
   }
-  virtual uint64_t GetObjectsAllocated() {
+
+  uint64_t GetObjectsAllocated() {
     return objects_allocated_;
   }
 
+  void Clear();
+
  protected:
   virtual accounting::SpaceBitmap::SweepCallback* GetSweepCallback() {
     return &SweepCallback;
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 83a1fbc..f76d50c 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -197,7 +197,8 @@
     }
     return false;
   }
-  Object* newArray = Array::Alloc<true>(self, arrayClass, length);
+  Object* newArray = Array::Alloc<true>(self, arrayClass, length, arrayClass->GetComponentSize(),
+                                        Runtime::Current()->GetHeap()->GetCurrentAllocator());
   if (UNLIKELY(newArray == NULL)) {
     DCHECK(self->IsExceptionPending());
     return false;
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index 90aaccd..d44f75f 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -27,6 +27,10 @@
 namespace art {
 namespace mirror {
 
+static inline size_t HeaderSize(size_t component_size) {
+  return sizeof(Object) + (component_size == sizeof(int64_t) ? 8 : 4);
+}
+
 template<VerifyObjectFlags kVerifyFlags>
 inline size_t Array::SizeOf() {
   // This is safe from overflow because the array was already allocated, so we know it's sane.
@@ -34,7 +38,7 @@
   // Don't need to check this since we already check this in GetClass.
   int32_t component_count =
       GetLength<static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis)>();
-  size_t header_size = sizeof(Object) + (component_size == sizeof(int64_t) ? 8 : 4);
+  size_t header_size = HeaderSize(component_size);
   size_t data_size = component_count * component_size;
   return header_size + data_size;
 }
@@ -46,7 +50,7 @@
   DCHECK_GE(component_count, 0);
   DCHECK(array_class->IsArrayClass());
 
-  size_t header_size = sizeof(Object) + (component_size == sizeof(int64_t) ? 8 : 4);
+  size_t header_size = HeaderSize(component_size);
   size_t data_size = component_count * component_size;
   size_t size = header_size + data_size;
 
@@ -61,13 +65,16 @@
   return size;
 }
 
-// Used for setting the array length in the allocation code path to ensure it is guarded by a CAS.
+// Used for setting the array length in the allocation code path to ensure it is guarded by a
+// StoreStore fence.
 class SetLengthVisitor {
  public:
   explicit SetLengthVisitor(int32_t length) : length_(length) {
   }
 
-  void operator()(Object* obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  void operator()(Object* obj, size_t usable_size) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    UNUSED(usable_size);
     // Avoid AsArray as object is not yet in live bitmap or allocation stack.
     Array* array = down_cast<Array*>(obj);
     // DCHECK(array->IsArrayInstance());
@@ -76,41 +83,64 @@
 
  private:
   const int32_t length_;
+
+  DISALLOW_COPY_AND_ASSIGN(SetLengthVisitor);
+};
+
+// Similar to SetLengthVisitor, used for setting the array length to fill the usable size of an
+// array.
+class SetLengthToUsableSizeVisitor {
+ public:
+  SetLengthToUsableSizeVisitor(size_t header_size, size_t component_size) :
+      header_size_(header_size), component_size_(component_size) {
+  }
+
+  void operator()(Object* obj, size_t usable_size) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Avoid AsArray as object is not yet in live bitmap or allocation stack.
+    Array* array = down_cast<Array*>(obj);
+    uint32_t length = (usable_size - header_size_) / component_size_;
+    // DCHECK(array->IsArrayInstance());
+    array->SetLength(length);
+  }
+
+ private:
+  const size_t header_size_;
+  const size_t component_size_;
+
+  DISALLOW_COPY_AND_ASSIGN(SetLengthToUsableSizeVisitor);
 };
 
 template <bool kIsInstrumented>
 inline Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count,
-                           size_t component_size, gc::AllocatorType allocator_type) {
+                           size_t component_size, gc::AllocatorType allocator_type,
+                           bool fill_usable) {
+  DCHECK(allocator_type != gc::kAllocatorTypeLOS);
   size_t size = ComputeArraySize(self, array_class, component_count, component_size);
   if (UNLIKELY(size == 0)) {
     return nullptr;
   }
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  SetLengthVisitor visitor(component_count);
-  DCHECK(allocator_type != gc::kAllocatorTypeLOS);
-  return down_cast<Array*>(
-      heap->AllocObjectWithAllocator<kIsInstrumented, true>(self, array_class, size,
-                                                            allocator_type, visitor));
-}
-
-template <bool kIsInstrumented>
-inline Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count,
-                           gc::AllocatorType allocator_type) {
-  DCHECK(array_class->IsArrayClass());
-  return Alloc<kIsInstrumented>(self, array_class, component_count, array_class->GetComponentSize(),
-                                allocator_type);
-}
-template <bool kIsInstrumented>
-inline Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count) {
-  return Alloc<kIsInstrumented>(self, array_class, component_count,
-               Runtime::Current()->GetHeap()->GetCurrentAllocator());
-}
-
-template <bool kIsInstrumented>
-inline Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count,
-                           size_t component_size) {
-  return Alloc<kIsInstrumented>(self, array_class, component_count, component_size,
-               Runtime::Current()->GetHeap()->GetCurrentAllocator());
+  Array* result;
+  if (!fill_usable) {
+    SetLengthVisitor visitor(component_count);
+    result = down_cast<Array*>(
+        heap->AllocObjectWithAllocator<kIsInstrumented, true>(self, array_class, size,
+                                                              allocator_type, visitor));
+  } else {
+    SetLengthToUsableSizeVisitor visitor(HeaderSize(component_size), component_size);
+    result = down_cast<Array*>(
+        heap->AllocObjectWithAllocator<kIsInstrumented, true>(self, array_class, size,
+                                                              allocator_type, visitor));
+  }
+  if (kIsDebugBuild && result != nullptr && Runtime::Current()->IsStarted()) {
+    if (!fill_usable) {
+      CHECK_EQ(result->SizeOf(), size);
+    } else {
+      CHECK_GE(result->SizeOf(), size);
+    }
+  }
+  return result;
 }
 
 template<class T>
@@ -133,9 +163,17 @@
   }
 }
 
+template<typename T>
+inline PrimitiveArray<T>* PrimitiveArray<T>::Alloc(Thread* self, size_t length) {
+  DCHECK(array_class_ != NULL);
+  Array* raw_array = Array::Alloc<true>(self, array_class_, length, sizeof(T),
+                                        Runtime::Current()->GetHeap()->GetCurrentAllocator());
+  return down_cast<PrimitiveArray<T>*>(raw_array);
+}
+
 template<class T>
-void PrimitiveArray<T>::Memmove(int32_t dst_pos, PrimitiveArray<T>* src, int32_t src_pos,
-                                int32_t count) {
+inline void PrimitiveArray<T>::Memmove(int32_t dst_pos, PrimitiveArray<T>* src, int32_t src_pos,
+                                       int32_t count) {
   if (UNLIKELY(count == 0)) {
     return;
   }
@@ -192,8 +230,8 @@
 
 
 template<class T>
-void PrimitiveArray<T>::Memcpy(int32_t dst_pos, PrimitiveArray<T>* src, int32_t src_pos,
-                               int32_t count) {
+inline void PrimitiveArray<T>::Memcpy(int32_t dst_pos, PrimitiveArray<T>* src, int32_t src_pos,
+                                      int32_t count) {
   if (UNLIKELY(count == 0)) {
     return;
   }
diff --git a/runtime/mirror/array.cc b/runtime/mirror/array.cc
index 715f072..139e2d0 100644
--- a/runtime/mirror/array.cc
+++ b/runtime/mirror/array.cc
@@ -46,7 +46,9 @@
                                         const SirtRef<mirror::IntArray>& dimensions)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   int32_t array_length = dimensions->Get(current_dimension);
-  SirtRef<Array> new_array(self, Array::Alloc<true>(self, array_class.get(), array_length));
+  SirtRef<Array> new_array(self, Array::Alloc<true>(self, array_class.get(), array_length,
+                                                    array_class->GetComponentSize(),
+                                                    Runtime::Current()->GetHeap()->GetCurrentAllocator()));
   if (UNLIKELY(new_array.get() == nullptr)) {
     CHECK(self->IsExceptionPending());
     return nullptr;
@@ -117,13 +119,6 @@
   art::ThrowArrayStoreException(object->GetClass(), this->GetClass());
 }
 
-template<typename T>
-PrimitiveArray<T>* PrimitiveArray<T>::Alloc(Thread* self, size_t length) {
-  DCHECK(array_class_ != NULL);
-  Array* raw_array = Array::Alloc<true>(self, array_class_, length, sizeof(T));
-  return down_cast<PrimitiveArray<T>*>(raw_array);
-}
-
 template <typename T> Class* PrimitiveArray<T>::array_class_ = NULL;
 
 // Explicitly instantiate all the primitive array types.
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index c4f9a75..772d303 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -28,25 +28,13 @@
 
 class MANAGED Array : public Object {
  public:
-  // A convenience for code that doesn't know the component size, and doesn't want to have to work
-  // it out itself.
+  // Allocates an array with the given properties, if fill_usable is true the array will be of at
+  // least component_count size, however, if there's usable space at the end of the allocation the
+  // array will fill it.
   template <bool kIsInstrumented>
   static Array* Alloc(Thread* self, Class* array_class, int32_t component_count,
-                      gc::AllocatorType allocator_type)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  template <bool kIsInstrumented>
-  static Array* Alloc(Thread* self, Class* array_class, int32_t component_count,
-                      size_t component_size, gc::AllocatorType allocator_type)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  template <bool kIsInstrumented>
-  static Array* Alloc(Thread* self, Class* array_class, int32_t component_count)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  template <bool kIsInstrumented>
-  static Array* Alloc(Thread* self, Class* array_class, int32_t component_count,
-                      size_t component_size)
+                      size_t component_size, gc::AllocatorType allocator_type,
+                      bool fill_usable = false)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static Array* CreateMultiArray(Thread* self, const SirtRef<Class>& element_class,
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index 5dfd007..7d8da14 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -148,16 +148,52 @@
 TEST_F(ObjectTest, AllocArray) {
   ScopedObjectAccess soa(Thread::Current());
   Class* c = class_linker_->FindSystemClass(soa.Self(), "[I");
-  SirtRef<Array> a(soa.Self(), Array::Alloc<true>(soa.Self(), c, 1));
-  ASSERT_TRUE(c == a->GetClass());
+  SirtRef<Array> a(soa.Self(), Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(),
+                                                  Runtime::Current()->GetHeap()->GetCurrentAllocator()));
+  EXPECT_TRUE(c == a->GetClass());
+  EXPECT_EQ(1, a->GetLength());
 
   c = class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/Object;");
-  a.reset(Array::Alloc<true>(soa.Self(), c, 1));
-  ASSERT_TRUE(c == a->GetClass());
+  a.reset(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(),
+                             Runtime::Current()->GetHeap()->GetCurrentAllocator()));
+  EXPECT_TRUE(c == a->GetClass());
+  EXPECT_EQ(1, a->GetLength());
 
   c = class_linker_->FindSystemClass(soa.Self(), "[[Ljava/lang/Object;");
-  a.reset(Array::Alloc<true>(soa.Self(), c, 1));
-  ASSERT_TRUE(c == a->GetClass());
+  a.reset(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(),
+                             Runtime::Current()->GetHeap()->GetCurrentAllocator()));
+  EXPECT_TRUE(c == a->GetClass());
+  EXPECT_EQ(1, a->GetLength());
+}
+
+TEST_F(ObjectTest, AllocArray_FillUsable) {
+  ScopedObjectAccess soa(Thread::Current());
+  Class* c = class_linker_->FindSystemClass(soa.Self(), "[B");
+  SirtRef<Array> a(soa.Self(), Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(),
+                                                  Runtime::Current()->GetHeap()->GetCurrentAllocator(),
+                                                  true));
+  EXPECT_TRUE(c == a->GetClass());
+  EXPECT_LE(1, a->GetLength());
+
+  c = class_linker_->FindSystemClass(soa.Self(), "[I");
+  a.reset(Array::Alloc<true>(soa.Self(), c, 2, c->GetComponentSize(),
+                             Runtime::Current()->GetHeap()->GetCurrentAllocator(),
+                             true));
+  EXPECT_TRUE(c == a->GetClass());
+  EXPECT_LE(2, a->GetLength());
+
+  c = class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/Object;");
+  a.reset(Array::Alloc<true>(soa.Self(), c, 2, c->GetComponentSize(),
+                             Runtime::Current()->GetHeap()->GetCurrentAllocator(),
+                             true));
+  EXPECT_TRUE(c == a->GetClass());
+  EXPECT_LE(2, a->GetLength());
+
+  c = class_linker_->FindSystemClass(soa.Self(), "[[Ljava/lang/Object;");
+  a.reset(Array::Alloc<true>(soa.Self(), c, 2, c->GetComponentSize(),
+                             Runtime::Current()->GetHeap()->GetCurrentAllocator(), true));
+  EXPECT_TRUE(c == a->GetClass());
+  EXPECT_LE(2, a->GetLength());
 }
 
 template<typename ArrayT>
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 5779442..3c703ba 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -72,7 +72,7 @@
   }
   gc::AllocatorType allocator = runtime->GetHeap()->GetCurrentNonMovingAllocator();
   mirror::Array* result = mirror::Array::Alloc<true>(soa.Self(), array_class, length,
-                                                     allocator);
+                                                     array_class->GetComponentSize(), allocator);
   return soa.AddLocalReference<jobject>(result);
 }
 
diff --git a/runtime/native/java_lang_reflect_Array.cc b/runtime/native/java_lang_reflect_Array.cc
index fc30aa6..a991818 100644
--- a/runtime/native/java_lang_reflect_Array.cc
+++ b/runtime/native/java_lang_reflect_Array.cc
@@ -50,14 +50,17 @@
     ThrowNegativeArraySizeException(length);
     return NULL;
   }
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  Runtime* runtime = Runtime::Current();
+  ClassLinker* class_linker = runtime->GetClassLinker();
   mirror::Class* array_class = class_linker->FindArrayClass(soa.Self(), element_class);
   if (UNLIKELY(array_class == NULL)) {
     CHECK(soa.Self()->IsExceptionPending());
     return NULL;
   }
-  DCHECK(array_class->IsArrayClass());
-  mirror::Array* new_array = mirror::Array::Alloc<true>(soa.Self(), array_class, length);
+  DCHECK(array_class->IsObjectArrayClass());
+  mirror::Array* new_array = mirror::Array::Alloc<true>(soa.Self(), array_class, length,
+                                                        sizeof(mirror::HeapReference<mirror::Object>),
+                                                        runtime->GetHeap()->GetCurrentAllocator());
   return soa.AddLocalReference<jobject>(new_array);
 }
 
diff --git a/runtime/transaction_test.cc b/runtime/transaction_test.cc
index 44e24fb..7242b81 100644
--- a/runtime/transaction_test.cc
+++ b/runtime/transaction_test.cc
@@ -86,7 +86,10 @@
 
   // Allocate an array during transaction.
   SirtRef<mirror::Array> sirt_obj(soa.Self(),
-                                  mirror::Array::Alloc<false>(soa.Self(), sirt_klass.get(), kArraySize));
+                                  mirror::Array::Alloc<false>(soa.Self(), sirt_klass.get(),
+                                                              kArraySize,
+                                                              sirt_klass->GetComponentSize(),
+                                                              Runtime::Current()->GetHeap()->GetCurrentAllocator()));
   ASSERT_TRUE(sirt_obj.get() != nullptr);
   ASSERT_EQ(sirt_obj->GetClass(), sirt_klass.get());
   Runtime::Current()->ExitTransactionMode();