A custom 'runs-of-slots' memory allocator.

Bug: 9986565
Change-Id: I0eb73b9458752113f519483616536d219d5f798b
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index e22e702..b9a87f1 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -99,7 +99,7 @@
     gc::space::ContinuousSpace* space = heap->GetNonMovingSpace();
     ASSERT_FALSE(space->IsImageSpace());
     ASSERT_TRUE(space != NULL);
-    ASSERT_TRUE(space->IsDlMallocSpace());
+    ASSERT_TRUE(space->IsMallocSpace());
     ASSERT_GE(sizeof(image_header) + space->Size(), static_cast<size_t>(file->GetLength()));
   }
 
@@ -141,7 +141,7 @@
 
   gc::Heap* heap = Runtime::Current()->GetHeap();
   ASSERT_TRUE(heap->HasImageSpace());
-  ASSERT_TRUE(heap->GetNonMovingSpace()->IsDlMallocSpace());
+  ASSERT_TRUE(heap->GetNonMovingSpace()->IsMallocSpace());
 
   gc::space::ImageSpace* image_space = heap->GetImageSpace();
   image_space->VerifyImageAllocations();
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 60683d0..2aa59d5 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -42,6 +42,7 @@
 	dex_instruction.cc \
 	elf_file.cc \
 	gc/allocator/dlmalloc.cc \
+	gc/allocator/rosalloc.cc \
 	gc/accounting/card_table.cc \
 	gc/accounting/gc_allocator.cc \
 	gc/accounting/heap_bitmap.cc \
@@ -58,6 +59,8 @@
 	gc/space/dlmalloc_space.cc \
 	gc/space/image_space.cc \
 	gc/space/large_object_space.cc \
+	gc/space/malloc_space.cc \
+	gc/space/rosalloc_space.cc \
 	gc/space/space.cc \
 	hprof/hprof.cc \
 	image.cc \
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 3ef0a7f..4bc16fb 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -3418,6 +3418,14 @@
   JDWP::Set4BE(&heap_id[0], 1);  // Heap id (bogus; we only have one heap).
   Dbg::DdmSendChunk(native ? CHUNK_TYPE("NHST") : CHUNK_TYPE("HPST"), sizeof(heap_id), heap_id);
 
+  Thread* self = Thread::Current();
+
+  // To allow the Walk/InspectAll() below to exclusively-lock the
+  // mutator lock, temporarily release the shared access to the
+  // mutator lock here by transitioning to the suspended state.
+  Locks::mutator_lock_->AssertSharedHeld(self);
+  self->TransitionFromRunnableToSuspended(kSuspended);
+
   // Send a series of heap segment chunks.
   HeapChunkContext context((what == HPSG_WHAT_MERGED_OBJECTS), native);
   if (native) {
@@ -3425,18 +3433,21 @@
   } else {
     gc::Heap* heap = Runtime::Current()->GetHeap();
     const std::vector<gc::space::ContinuousSpace*>& spaces = heap->GetContinuousSpaces();
-    Thread* self = Thread::Current();
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
     typedef std::vector<gc::space::ContinuousSpace*>::const_iterator It;
     for (It cur = spaces.begin(), end = spaces.end(); cur != end; ++cur) {
-      if ((*cur)->IsDlMallocSpace()) {
-        (*cur)->AsDlMallocSpace()->Walk(HeapChunkContext::HeapChunkCallback, &context);
+      if ((*cur)->IsMallocSpace()) {
+        (*cur)->AsMallocSpace()->Walk(HeapChunkContext::HeapChunkCallback, &context);
       }
     }
     // Walk the large objects, these are not in the AllocSpace.
     heap->GetLargeObjectsSpace()->Walk(HeapChunkContext::HeapChunkCallback, &context);
   }
 
+  // Shared-lock the mutator lock back.
+  self->TransitionFromSuspendedToRunnable();
+  Locks::mutator_lock_->AssertSharedHeld(self);
+
   // Finally, send a heap end chunk.
   Dbg::DdmSendChunk(native ? CHUNK_TYPE("NHEN") : CHUNK_TYPE("HPEN"), sizeof(heap_id), heap_id);
 }
diff --git a/runtime/gc/accounting/mod_union_table-inl.h b/runtime/gc/accounting/mod_union_table-inl.h
index fb425df..19c6768 100644
--- a/runtime/gc/accounting/mod_union_table-inl.h
+++ b/runtime/gc/accounting/mod_union_table-inl.h
@@ -37,7 +37,7 @@
     typedef std::vector<space::ContinuousSpace*>::const_iterator It;
     for (It it = spaces.begin(); it != spaces.end(); ++it) {
       if ((*it)->Contains(ref)) {
-        return (*it)->IsDlMallocSpace();
+        return (*it)->IsMallocSpace();
       }
     }
     // Assume it points to a large object.
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
new file mode 100644
index 0000000..9e65894
--- /dev/null
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -0,0 +1,1630 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/mutex-inl.h"
+#include "thread.h"
+#include "thread_list.h"
+#include "rosalloc.h"
+
+#include <map>
+#include <list>
+#include <vector>
+
+namespace art {
+namespace gc {
+namespace allocator {
+
+// If true, log verbose details of operations.
+static const bool kTraceRosAlloc = false;
+// If true, check that the returned memory is actually zero.
+static const bool kCheckZeroMemory = kIsDebugBuild;
+
+extern "C" void* art_heap_rosalloc_morecore(RosAlloc* rosalloc, intptr_t increment);
+
+size_t RosAlloc::bracketSizes[kNumOfSizeBrackets];
+size_t RosAlloc::numOfPages[kNumOfSizeBrackets];
+size_t RosAlloc::numOfSlots[kNumOfSizeBrackets];
+size_t RosAlloc::headerSizes[kNumOfSizeBrackets];
+size_t RosAlloc::bulkFreeBitMapOffsets[kNumOfSizeBrackets];
+size_t RosAlloc::threadLocalFreeBitMapOffsets[kNumOfSizeBrackets];
+bool RosAlloc::initialized_ = false;
+
+RosAlloc::RosAlloc(void* base, size_t capacity)
+    : base_(reinterpret_cast<byte*>(base)), footprint_(capacity),
+      capacity_(capacity),
+      lock_("rosalloc global lock", kRosAllocGlobalLock),
+      bulk_free_lock_("rosalloc bulk free lock", kRosAllocBulkFreeLock) {
+  DCHECK(RoundUp(capacity, kPageSize) == capacity);
+  if (!initialized_) {
+    Initialize();
+  }
+  VLOG(heap) << "RosAlloc base="
+             << std::hex << (intptr_t)base_ << ", end="
+             << std::hex << (intptr_t)(base_ + capacity_)
+             << ", capacity=" << std::dec << capacity_;
+  memset(current_runs_, 0, sizeof(current_runs_));
+  for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+    size_bracket_locks_[i] = new Mutex("an rosalloc size bracket lock",
+                                       kRosAllocBracketLock);
+  }
+  size_t num_of_pages = capacity_ / kPageSize;
+  page_map_.resize(num_of_pages);
+  free_page_run_size_map_.resize(num_of_pages);
+
+  FreePageRun* free_pages = reinterpret_cast<FreePageRun*>(base_);
+  if (kIsDebugBuild) {
+    free_pages->magic_num_ = kMagicNumFree;
+  }
+  free_pages->SetByteSize(this, capacity_);
+  DCHECK_EQ(capacity_ % kPageSize, static_cast<size_t>(0));
+  free_pages->ReleasePages(this);
+  free_page_runs_.insert(free_pages);
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::RosAlloc() : Inserted run 0x" << std::hex
+              << reinterpret_cast<intptr_t>(free_pages)
+              << " into free_page_runs_";
+  }
+}
+
+void* RosAlloc::AllocPages(Thread* self, size_t num_pages, byte page_map_type) {
+  lock_.AssertHeld(self);
+  DCHECK(page_map_type == kPageMapRun || page_map_type == kPageMapLargeObject);
+  FreePageRun* res = NULL;
+  size_t req_byte_size = num_pages * kPageSize;
+  // Find the lowest address free page run that's large enough.
+  for (auto it = free_page_runs_.begin(); it != free_page_runs_.end(); ) {
+    FreePageRun* fpr = *it;
+    DCHECK(fpr->IsFree());
+    size_t fpr_byte_size = fpr->ByteSize(this);
+    DCHECK_EQ(fpr_byte_size % kPageSize, static_cast<size_t>(0));
+    if (req_byte_size <= fpr_byte_size) {
+      // Found one.
+      free_page_runs_.erase(it++);
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::AllocPages() : Erased run 0x"
+                  << std::hex << reinterpret_cast<intptr_t>(fpr)
+                  << " from free_page_runs_";
+      }
+      if (req_byte_size < fpr_byte_size) {
+        // Split.
+        FreePageRun* remainder = reinterpret_cast<FreePageRun*>(reinterpret_cast<byte*>(fpr) + req_byte_size);
+        if (kIsDebugBuild) {
+          remainder->magic_num_ = kMagicNumFree;
+        }
+        remainder->SetByteSize(this, fpr_byte_size - req_byte_size);
+        DCHECK_EQ(remainder->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        // Don't need to call madvise on remainder here.
+        free_page_runs_.insert(remainder);
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::AllocPages() : Inserted run 0x" << std::hex
+                    << reinterpret_cast<intptr_t>(remainder)
+                    << " into free_page_runs_";
+        }
+        fpr->SetByteSize(this, req_byte_size);
+        DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+      }
+      res = fpr;
+      break;
+    } else {
+      ++it;
+    }
+  }
+
+  // Failed to allocate pages. Grow the footprint, if possible.
+  if (UNLIKELY(res == NULL && capacity_ > footprint_)) {
+    FreePageRun* last_free_page_run = NULL;
+    size_t last_free_page_run_size;
+    auto it = free_page_runs_.rbegin();
+    if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
+      // There is a free page run at the end.
+      DCHECK(last_free_page_run->IsFree());
+      DCHECK(page_map_[ToPageMapIndex(last_free_page_run)] == kPageMapEmpty);
+      last_free_page_run_size = last_free_page_run->ByteSize(this);
+    } else {
+      // There is no free page run at the end.
+      last_free_page_run_size = 0;
+    }
+    DCHECK_LT(last_free_page_run_size, req_byte_size);
+    if (capacity_ - footprint_ + last_free_page_run_size >= req_byte_size) {
+      // If we grow the heap, we can allocate it.
+      size_t increment = std::min(std::max(2 * MB, req_byte_size - last_free_page_run_size),
+                                  capacity_ - footprint_);
+      DCHECK_EQ(increment % kPageSize, static_cast<size_t>(0));
+      size_t new_footprint = footprint_ + increment;
+      size_t new_num_of_pages = new_footprint / kPageSize;
+      DCHECK_LT(page_map_.size(), new_num_of_pages);
+      DCHECK_LT(free_page_run_size_map_.size(), new_num_of_pages);
+      page_map_.resize(new_num_of_pages);
+      free_page_run_size_map_.resize(new_num_of_pages);
+      art_heap_rosalloc_morecore(this, increment);
+      if (last_free_page_run_size > 0) {
+        // There was a free page run at the end. Expand its size.
+        DCHECK_EQ(last_free_page_run_size, last_free_page_run->ByteSize(this));
+        last_free_page_run->SetByteSize(this, last_free_page_run_size + increment);
+        DCHECK_EQ(last_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        DCHECK(last_free_page_run->End(this) == base_ + new_footprint);
+      } else {
+        // Otherwise, insert a new free page run at the end.
+        FreePageRun* new_free_page_run = reinterpret_cast<FreePageRun*>(base_ + footprint_);
+        if (kIsDebugBuild) {
+          new_free_page_run->magic_num_ = kMagicNumFree;
+        }
+        new_free_page_run->SetByteSize(this, increment);
+        DCHECK_EQ(new_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        free_page_runs_.insert(new_free_page_run);
+        DCHECK(*free_page_runs_.rbegin() == new_free_page_run);
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::AlloPages() : Grew the heap by inserting run 0x"
+                    << std::hex << reinterpret_cast<intptr_t>(new_free_page_run)
+                    << " into free_page_runs_";
+        }
+      }
+      DCHECK_LE(footprint_ + increment, capacity_);
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::AllocPages() : increased the footprint from "
+                  << footprint_ << " to " << new_footprint;
+      }
+      footprint_ = new_footprint;
+
+      // And retry the last free page run.
+      it = free_page_runs_.rbegin();
+      DCHECK(it != free_page_runs_.rend());
+      FreePageRun* fpr = *it;
+      if (kIsDebugBuild && last_free_page_run_size > 0) {
+        DCHECK(last_free_page_run != NULL);
+        DCHECK_EQ(last_free_page_run, fpr);
+      }
+      size_t fpr_byte_size = fpr->ByteSize(this);
+      DCHECK_EQ(fpr_byte_size % kPageSize, static_cast<size_t>(0));
+      DCHECK_LE(req_byte_size, fpr_byte_size);
+      free_page_runs_.erase(fpr);
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::AllocPages() : Erased run 0x" << std::hex << reinterpret_cast<intptr_t>(fpr)
+                  << " from free_page_runs_";
+      }
+      if (req_byte_size < fpr_byte_size) {
+        // Split if there's a remainder.
+        FreePageRun* remainder = reinterpret_cast<FreePageRun*>(reinterpret_cast<byte*>(fpr) + req_byte_size);
+        if (kIsDebugBuild) {
+          remainder->magic_num_ = kMagicNumFree;
+        }
+        remainder->SetByteSize(this, fpr_byte_size - req_byte_size);
+        DCHECK_EQ(remainder->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        free_page_runs_.insert(remainder);
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::AllocPages() : Inserted run 0x" << std::hex
+                    << reinterpret_cast<intptr_t>(remainder)
+                    << " into free_page_runs_";
+        }
+        fpr->SetByteSize(this, req_byte_size);
+        DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+      }
+      res = fpr;
+    }
+  }
+  if (LIKELY(res != NULL)) {
+    // Update the page map.
+    size_t page_map_idx = ToPageMapIndex(res);
+    for (size_t i = 0; i < num_pages; i++) {
+      DCHECK(page_map_[page_map_idx + i] == kPageMapEmpty);
+    }
+    switch (page_map_type) {
+    case kPageMapRun:
+      page_map_[page_map_idx] = kPageMapRun;
+      for (size_t i = 1; i < num_pages; i++) {
+        page_map_[page_map_idx + i] = kPageMapRunPart;
+      }
+      break;
+    case kPageMapLargeObject:
+      page_map_[page_map_idx] = kPageMapLargeObject;
+      for (size_t i = 1; i < num_pages; i++) {
+        page_map_[page_map_idx + i] = kPageMapLargeObjectPart;
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unreachable - page map type: " << page_map_type;
+      break;
+    }
+    if (kIsDebugBuild) {
+      // Clear the first page which isn't madvised away in the debug
+      // build for the magic number.
+      memset(res, 0, kPageSize);
+    }
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::AllocPages() : 0x" << std::hex << reinterpret_cast<intptr_t>(res)
+                << "-0x" << (reinterpret_cast<intptr_t>(res) + num_pages * kPageSize)
+                << "(" << std::dec << (num_pages * kPageSize) << ")";
+    }
+    return res;
+  }
+
+  // Fail.
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::AllocPages() : NULL";
+  }
+  return nullptr;
+}
+
+void RosAlloc::FreePages(Thread* self, void* ptr) {
+  lock_.AssertHeld(self);
+  size_t pm_idx = ToPageMapIndex(ptr);
+  DCHECK(pm_idx < page_map_.size());
+  byte pm_type = page_map_[pm_idx];
+  DCHECK(pm_type == kPageMapRun || pm_type == kPageMapLargeObject);
+  byte pm_part_type;
+  switch (pm_type) {
+  case kPageMapRun:
+    pm_part_type = kPageMapRunPart;
+    break;
+  case kPageMapLargeObject:
+    pm_part_type = kPageMapLargeObjectPart;
+    break;
+  default:
+    pm_part_type = kPageMapEmpty;
+    LOG(FATAL) << "Unreachable - RosAlloc::FreePages() : " << "pm_idx=" << pm_idx << ", pm_type="
+               << static_cast<int>(pm_type) << ", ptr=" << std::hex
+               << reinterpret_cast<intptr_t>(ptr);
+    return;
+  }
+  // Update the page map and count the number of pages.
+  size_t num_pages = 1;
+  page_map_[pm_idx] = kPageMapEmpty;
+  size_t idx = pm_idx + 1;
+  size_t end = page_map_.size();
+  while (idx < end && page_map_[idx] == pm_part_type) {
+    page_map_[idx] = kPageMapEmpty;
+    num_pages++;
+    idx++;
+  }
+
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::FreePages() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
+              << "-0x" << (reinterpret_cast<intptr_t>(ptr) + num_pages * kPageSize)
+              << "(" << std::dec << (num_pages * kPageSize) << ")";
+  }
+
+  // Turn it into a free run.
+  FreePageRun* fpr = reinterpret_cast<FreePageRun*>(ptr);
+  if (kIsDebugBuild) {
+    fpr->magic_num_ = kMagicNumFree;
+  }
+  fpr->SetByteSize(this, num_pages * kPageSize);
+  DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+
+  DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
+  if (!free_page_runs_.empty()) {
+    // Try to coalesce in the higher address direction.
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce a free page run 0x"
+                << std::hex << reinterpret_cast<uintptr_t>(fpr) << " [" << std::dec << pm_idx << "] -0x"
+                << std::hex << reinterpret_cast<uintptr_t>(fpr->End(this)) << " [" << std::dec
+                << (fpr->End(this) == End() ? page_map_.size() : ToPageMapIndex(fpr->End(this))) << "]";
+    }
+    auto higher_it = free_page_runs_.upper_bound(fpr);
+    if (higher_it != free_page_runs_.end()) {
+      for (auto it = higher_it; it != free_page_runs_.end(); ) {
+        FreePageRun* h = *it;
+        DCHECK_EQ(h->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce with a higher free page run 0x"
+                    << std::hex << reinterpret_cast<uintptr_t>(h) << " [" << std::dec << ToPageMapIndex(h) << "] -0x"
+                    << std::hex << reinterpret_cast<uintptr_t>(h->End(this)) << " [" << std::dec
+                    << (h->End(this) == End() ? page_map_.size() : ToPageMapIndex(h->End(this))) << "]";
+        }
+        if (fpr->End(this) == h->Begin()) {
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "Success";
+          }
+          free_page_runs_.erase(it++);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::FreePages() : (coalesce) Erased run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(h)
+                      << " from free_page_runs_";
+          }
+          fpr->SetByteSize(this, fpr->ByteSize(this) + h->ByteSize(this));
+          DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        } else {
+          // Not adjacent. Stop.
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "Fail";
+          }
+          break;
+        }
+      }
+    }
+    // Try to coalesce in the lower address direction.
+    auto lower_it = free_page_runs_.upper_bound(fpr);
+    if (lower_it != free_page_runs_.begin()) {
+      --lower_it;
+      for (auto it = lower_it; ; ) {
+        // We want to try to coalesce with the first element but
+        // there's no "<=" operator for the iterator.
+        bool to_exit_loop = it == free_page_runs_.begin();
+
+        FreePageRun* l = *it;
+        DCHECK_EQ(l->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce with a lower free page run 0x"
+                    << std::hex << reinterpret_cast<uintptr_t>(l) << " [" << std::dec << ToPageMapIndex(l) << "] -0x"
+                    << std::hex << reinterpret_cast<uintptr_t>(l->End(this)) << " [" << std::dec
+                    << (l->End(this) == End() ? page_map_.size() : ToPageMapIndex(l->End(this))) << "]";
+        }
+        if (l->End(this) == fpr->Begin()) {
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "Success";
+          }
+          free_page_runs_.erase(it--);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::FreePages() : (coalesce) Erased run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(l)
+                      << " from free_page_runs_";
+          }
+          l->SetByteSize(this, l->ByteSize(this) + fpr->ByteSize(this));
+          DCHECK_EQ(l->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+          fpr = l;
+        } else {
+          // Not adjacent. Stop.
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "Fail";
+          }
+          break;
+        }
+        if (to_exit_loop) {
+          break;
+        }
+      }
+    }
+  }
+
+  // Insert it.
+  DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+  DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
+  fpr->ReleasePages(this);
+  free_page_runs_.insert(fpr);
+  DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::FreePages() : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(fpr)
+              << " into free_page_runs_";
+  }
+}
+
+void* RosAlloc::Alloc(Thread* self, size_t size, size_t* bytes_allocated) {
+  if (UNLIKELY(size > kLargeSizeThreshold)) {
+    size_t num_pages = RoundUp(size, kPageSize) / kPageSize;
+    void* r;
+    {
+      MutexLock mu(self, lock_);
+      r = AllocPages(self, num_pages, kPageMapLargeObject);
+    }
+    if (bytes_allocated != NULL) {
+      *bytes_allocated = num_pages * kPageSize;
+    }
+    if (kTraceRosAlloc) {
+      if (r != NULL) {
+        LOG(INFO) << "RosAlloc::Alloc() : (large obj) 0x" << std::hex << reinterpret_cast<intptr_t>(r)
+                  << "-0x" << (reinterpret_cast<intptr_t>(r) + num_pages * kPageSize)
+                  << "(" << std::dec << (num_pages * kPageSize) << ")";
+      } else {
+        LOG(INFO) << "RosAlloc::Alloc() : (large obj) NULL";
+      }
+    }
+    // Check if the returned memory is really all zero.
+    if (kCheckZeroMemory && r != NULL) {
+      byte* bytes = reinterpret_cast<byte*>(r);
+      for (size_t i = 0; i < size; ++i) {
+        DCHECK_EQ(bytes[i], 0);
+      }
+    }
+    return r;
+  }
+  void* m = AllocFromRun(self, size, bytes_allocated);
+  // Check if the returned memory is really all zero.
+  if (kCheckZeroMemory && m != NULL) {
+    byte* bytes = reinterpret_cast<byte*>(m);
+    for (size_t i = 0; i < size; ++i) {
+      DCHECK_EQ(bytes[i], 0);
+    }
+  }
+  return m;
+}
+
+void RosAlloc::FreeInternal(Thread* self, void* ptr) {
+  DCHECK(base_ <= ptr && ptr < base_ + footprint_);
+  size_t pm_idx = RoundDownToPageMapIndex(ptr);
+  bool free_from_run = false;
+  Run* run = NULL;
+  {
+    MutexLock mu(self, lock_);
+    DCHECK(pm_idx < page_map_.size());
+    byte page_map_entry = page_map_[pm_idx];
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::FreeInternal() : " << std::hex << ptr << ", pm_idx=" << std::dec << pm_idx
+                << ", page_map_entry=" << static_cast<int>(page_map_entry);
+    }
+    switch (page_map_[pm_idx]) {
+      case kPageMapEmpty:
+        LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+        return;
+      case kPageMapLargeObject:
+        FreePages(self, ptr);
+        return;
+      case kPageMapLargeObjectPart:
+        LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+        return;
+      case kPageMapRun:
+      case kPageMapRunPart: {
+        free_from_run = true;
+        size_t pi = pm_idx;
+        DCHECK(page_map_[pi] == kPageMapRun || page_map_[pi] == kPageMapRunPart);
+        // Find the beginning of the run.
+        while (page_map_[pi] != kPageMapRun) {
+          pi--;
+          DCHECK(pi < capacity_ / kPageSize);
+        }
+        DCHECK(page_map_[pi] == kPageMapRun);
+        run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
+        DCHECK(run->magic_num_ == kMagicNum);
+        break;
+      }
+      default:
+        LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+        return;
+    }
+  }
+  if (LIKELY(free_from_run)) {
+    DCHECK(run != NULL);
+    FreeFromRun(self, ptr, run);
+  }
+}
+
+void RosAlloc::Free(Thread* self, void* ptr) {
+  ReaderMutexLock rmu(self, bulk_free_lock_);
+  FreeInternal(self, ptr);
+}
+
+RosAlloc::Run* RosAlloc::RefillRun(Thread* self, size_t idx) {
+  Run* new_run;
+  size_t num_pages = numOfPages[idx];
+  // Get the lowest address non-full run from the binary tree.
+  Run* temp = NULL;
+  std::set<Run*>* bt = &non_full_runs_[idx];
+  std::set<Run*>::iterator found = bt->lower_bound(temp);
+  if (found != bt->end()) {
+    // If there's one, use it as the current run.
+    Run* non_full_run = *found;
+    DCHECK(non_full_run != NULL);
+    new_run = non_full_run;
+    DCHECK_EQ(new_run->is_thread_local_, 0);
+    bt->erase(found);
+    DCHECK_EQ(non_full_run->is_thread_local_, 0);
+  } else {
+    // If there's none, allocate a new run and use it as the
+    // current run.
+    {
+      MutexLock mu(self, lock_);
+      new_run = reinterpret_cast<Run*>(AllocPages(self, num_pages, kPageMapRun));
+    }
+    if (new_run == NULL) {
+      return NULL;
+    }
+    if (kIsDebugBuild) {
+      new_run->magic_num_ = kMagicNum;
+    }
+    new_run->size_bracket_idx_ = idx;
+    new_run->top_slot_idx_ = 0;
+    new_run->ClearBitMaps();
+    new_run->to_be_bulk_freed_ = false;
+  }
+  return new_run;
+}
+
+void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) {
+  DCHECK(size <= kLargeSizeThreshold);
+  size_t bracket_size;
+  size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
+  DCHECK_EQ(idx, SizeToIndex(size));
+  DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
+  DCHECK_EQ(bracket_size, bracketSizes[idx]);
+  DCHECK(size <= bracket_size);
+  DCHECK(size > 512 || bracket_size - size < 16);
+
+  void* slot_addr;
+
+  if (LIKELY(idx <= kMaxThreadLocalSizeBracketIdx)) {
+    // Use a thread-local run.
+    Run* thread_local_run = reinterpret_cast<Run*>(self->rosalloc_runs_[idx]);
+    if (UNLIKELY(thread_local_run == NULL)) {
+      MutexLock mu(self, *size_bracket_locks_[idx]);
+      thread_local_run = RefillRun(self, idx);
+      if (UNLIKELY(thread_local_run == NULL)) {
+        return NULL;
+      }
+      DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
+      thread_local_run->is_thread_local_ = 1;
+      self->rosalloc_runs_[idx] = thread_local_run;
+      DCHECK(!thread_local_run->IsFull());
+    }
+
+    DCHECK(thread_local_run != NULL);
+    DCHECK_NE(thread_local_run->is_thread_local_, 0);
+    slot_addr = thread_local_run->AllocSlot();
+
+    if (UNLIKELY(slot_addr == NULL)) {
+      // The run got full. Try to free slots.
+      DCHECK(thread_local_run->IsFull());
+      MutexLock mu(self, *size_bracket_locks_[idx]);
+      bool is_all_free_after_merge;
+      if (thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&is_all_free_after_merge)) {
+        // Some slot got freed. Keep it.
+        DCHECK(!thread_local_run->IsFull());
+        DCHECK_EQ(is_all_free_after_merge, thread_local_run->IsAllFree());
+        if (is_all_free_after_merge) {
+          // Reinstate the bump index mode if it's all free.
+          DCHECK_EQ(thread_local_run->top_slot_idx_, numOfSlots[idx]);
+          thread_local_run->top_slot_idx_ = 0;
+        }
+      } else {
+        // No slots got freed. Try to refill the thread-local run.
+        DCHECK(thread_local_run->IsFull());
+        self->rosalloc_runs_[idx] = NULL;
+        thread_local_run->is_thread_local_ = 0;
+        if (kIsDebugBuild) {
+          full_runs_[idx].insert(thread_local_run);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::AllocFromRun() : Inserted run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(thread_local_run)
+                      << " into full_runs_[" << std::dec << idx << "]";
+          }
+        }
+        DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
+        DCHECK(full_runs_[idx].find(thread_local_run) != full_runs_[idx].end());
+        thread_local_run = RefillRun(self, idx);
+        if (UNLIKELY(thread_local_run == NULL)) {
+          return NULL;
+        }
+        DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
+        DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
+        thread_local_run->is_thread_local_ = 1;
+        self->rosalloc_runs_[idx] = thread_local_run;
+        DCHECK(!thread_local_run->IsFull());
+      }
+
+      DCHECK(thread_local_run != NULL);
+      DCHECK(!thread_local_run->IsFull());
+      DCHECK_NE(thread_local_run->is_thread_local_, 0);
+      slot_addr = thread_local_run->AllocSlot();
+      // Must succeed now with a new run.
+      DCHECK(slot_addr != NULL);
+    }
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::AllocFromRun() thread-local : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
+                << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size)
+                << "(" << std::dec << (bracket_size) << ")";
+    }
+  } else {
+    // Use the (shared) current run.
+    MutexLock mu(self, *size_bracket_locks_[idx]);
+    Run* current_run = current_runs_[idx];
+    if (UNLIKELY(current_run == NULL)) {
+      current_run = RefillRun(self, idx);
+      if (UNLIKELY(current_run == NULL)) {
+        return NULL;
+      }
+      DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end());
+      current_run->is_thread_local_ = 0;
+      current_runs_[idx] = current_run;
+      DCHECK(!current_run->IsFull());
+    }
+    DCHECK(current_run != NULL);
+    slot_addr = current_run->AllocSlot();
+    if (UNLIKELY(slot_addr == NULL)) {
+      // The current run got full. Try to refill it.
+      DCHECK(current_run->IsFull());
+      current_runs_[idx] = NULL;
+      if (kIsDebugBuild) {
+        // Insert it into full_runs and set the current run to NULL.
+        full_runs_[idx].insert(current_run);
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::AllocFromRun() : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(current_run)
+                    << " into full_runs_[" << std::dec << idx << "]";
+        }
+      }
+      DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(current_run) != full_runs_[idx].end());
+      current_run = RefillRun(self, idx);
+      if (UNLIKELY(current_run == NULL)) {
+        return NULL;
+      }
+      DCHECK(current_run != NULL);
+      DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end());
+      current_run->is_thread_local_ = 0;
+      current_runs_[idx] = current_run;
+      DCHECK(!current_run->IsFull());
+      slot_addr = current_run->AllocSlot();
+      // Must succeed now with a new run.
+      DCHECK(slot_addr != NULL);
+    }
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::AllocFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
+                << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size)
+                << "(" << std::dec << (bracket_size) << ")";
+    }
+  }
+  if (LIKELY(bytes_allocated != NULL)) {
+    *bytes_allocated = bracket_size;
+  }
+  memset(slot_addr, 0, size);
+  return slot_addr;
+}
+
+void RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) {
+  DCHECK(run->magic_num_ == kMagicNum);
+  DCHECK(run < ptr && ptr < run->End());
+  size_t idx = run->size_bracket_idx_;
+  MutexLock mu(self, *size_bracket_locks_[idx]);
+  bool run_was_full = false;
+  if (kIsDebugBuild) {
+    run_was_full = run->IsFull();
+  }
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::FreeFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr);
+  }
+  if (LIKELY(run->is_thread_local_ != 0)) {
+    // It's a thread-local run. Just mark the thread-local free bit map and return.
+    DCHECK_LE(run->size_bracket_idx_, kMaxThreadLocalSizeBracketIdx);
+    DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
+    DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
+    run->MarkThreadLocalFreeBitMap(ptr);
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::FreeFromRun() : Freed a slot in a thread local run 0x" << std::hex
+                << reinterpret_cast<intptr_t>(run);
+    }
+    // A thread local run will be kept as a thread local even if it's become all free.
+    return;
+  }
+  // Free the slot in the run.
+  run->FreeSlot(ptr);
+  std::set<Run*>* non_full_runs = &non_full_runs_[idx];
+  if (run->IsAllFree()) {
+    // It has just become completely free. Free the pages of this run.
+    std::set<Run*>::iterator pos = non_full_runs->find(run);
+    if (pos != non_full_runs->end()) {
+      non_full_runs->erase(pos);
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::FreeFromRun() : Erased run 0x" << std::hex
+                  << reinterpret_cast<intptr_t>(run) << " from non_full_runs_";
+      }
+    }
+    if (run == current_runs_[idx]) {
+      current_runs_[idx] = NULL;
+    }
+    DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
+    DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
+    {
+      MutexLock mu(self, lock_);
+      FreePages(self, run);
+    }
+  } else {
+    // It is not completely free. If it wasn't the current run or
+    // already in the non-full run set (i.e., it was full) insert it
+    // into the non-full run set.
+    if (run != current_runs_[idx]) {
+      hash_set<Run*, hash_run, eq_run>* full_runs =
+          kIsDebugBuild ? &full_runs_[idx] : NULL;
+      std::set<Run*>::iterator pos = non_full_runs->find(run);
+      if (pos == non_full_runs->end()) {
+        DCHECK(run_was_full);
+        DCHECK(full_runs->find(run) != full_runs->end());
+        if (kIsDebugBuild) {
+          full_runs->erase(run);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::FreeFromRun() : Erased run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(run) << " from full_runs_";
+          }
+        }
+        non_full_runs->insert(run);
+        DCHECK(!run->IsFull());
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::FreeFromRun() : Inserted run 0x" << std::hex
+                    << reinterpret_cast<intptr_t>(run)
+                    << " into non_full_runs_[" << std::dec << idx << "]";
+        }
+      }
+    }
+  }
+}
+
+void RosAlloc::Run::Dump() {
+  size_t idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  std::string bit_map_str;
+  for (size_t v = 0; v < num_vec; v++) {
+    uint32_t vec = alloc_bit_map_[v];
+    if (v != num_vec - 1) {
+      bit_map_str.append(StringPrintf("%x-", vec));
+    } else {
+      bit_map_str.append(StringPrintf("%x", vec));
+    }
+  }
+  LOG(INFO) << "Run : " << std::hex << reinterpret_cast<intptr_t>(this)
+            << std::dec << ", idx=" << idx << ", bit_map=" << bit_map_str;
+}
+
+void* RosAlloc::Run::AllocSlot() {
+  size_t idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  DCHECK_LE(top_slot_idx_, num_slots);
+  if (LIKELY(top_slot_idx_ < num_slots)) {
+    // If it's in bump index mode, grab the top slot and increment the top index.
+    size_t slot_idx = top_slot_idx_;
+    byte* slot_addr = reinterpret_cast<byte*>(this) + headerSizes[idx] + slot_idx * bracketSizes[idx];
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
+                << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
+    }
+    top_slot_idx_++;
+    size_t vec_idx = slot_idx / 32;
+    size_t vec_off = slot_idx % 32;
+    uint32_t* vec = &alloc_bit_map_[vec_idx];
+    DCHECK_EQ((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+    *vec |= 1 << vec_off;
+    DCHECK_NE((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+    return slot_addr;
+  }
+  // Not in bump index mode. Search the alloc bit map for an empty slot.
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  size_t slot_idx = 0;
+  bool found_slot = false;
+  for (size_t v = 0; v < num_vec; v++) {
+    uint32_t *vecp = &alloc_bit_map_[v];
+    uint32_t ffz1 = __builtin_ffs(~*vecp);
+    uint32_t ffz;
+    // TODO: Use LIKELY or UNLIKELY here?
+    if (LIKELY(ffz1 > 0 && (ffz = ffz1 - 1) + v * 32 < num_slots)) {
+      // Found an empty slot. Set the bit.
+      DCHECK_EQ((*vecp & (1 << ffz)), static_cast<uint32_t>(0));
+      *vecp |= (1 << ffz);
+      DCHECK_NE((*vecp & (1 << ffz)), static_cast<uint32_t>(0));
+      slot_idx = ffz + v * 32;
+      found_slot = true;
+      break;
+    }
+  }
+  if (LIKELY(found_slot)) {
+    byte* slot_addr = reinterpret_cast<byte*>(this) + headerSizes[idx] + slot_idx * bracketSizes[idx];
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
+                << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
+    }
+    return slot_addr;
+  }
+  return NULL;
+}
+
+inline void RosAlloc::Run::FreeSlot(void* ptr) {
+  DCHECK_EQ(is_thread_local_, 0);
+  byte idx = size_bracket_idx_;
+  size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
+      - (reinterpret_cast<byte*>(this) + headerSizes[idx]);
+  DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
+  size_t slot_idx = offset_from_slot_base / bracketSizes[idx];
+  DCHECK(slot_idx < numOfSlots[idx]);
+  size_t vec_idx = slot_idx / 32;
+  if (kIsDebugBuild) {
+    size_t num_vec = RoundUp(numOfSlots[idx], 32) / 32;
+    DCHECK(vec_idx < num_vec);
+  }
+  size_t vec_off = slot_idx % 32;
+  uint32_t* vec = &alloc_bit_map_[vec_idx];
+  DCHECK_NE((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+  *vec &= ~(1 << vec_off);
+  DCHECK_EQ((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::Run::FreeSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
+              << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
+  }
+}
+
+inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out) {
+  DCHECK_NE(is_thread_local_, 0);
+  // Free slots in the alloc bit map based on the thread local free bit map.
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  bool changed = false;
+  uint32_t* vecp = &alloc_bit_map_[0];
+  uint32_t* tl_free_vecp = &thread_local_free_bit_map()[0];
+  bool is_all_free_after = true;
+  for (size_t v = 0; v < num_vec; v++, vecp++, tl_free_vecp++) {
+    uint32_t tl_free_vec = *tl_free_vecp;
+    uint32_t vec_before = *vecp;
+    uint32_t vec_after;
+    if (tl_free_vec != 0) {
+      vec_after = vec_before & ~tl_free_vec;
+      *vecp = vec_after;
+      changed = true;
+      *tl_free_vecp = 0;  // clear the thread local free bit map.
+    } else {
+      vec_after = vec_before;
+    }
+    if (vec_after != 0) {
+      is_all_free_after = false;
+    }
+    DCHECK_EQ(*tl_free_vecp, static_cast<uint32_t>(0));
+  }
+  *is_all_free_after_out = is_all_free_after;
+  // Return true if there was at least a bit set in the thread-local
+  // free bit map and at least a bit in the alloc bit map changed.
+  return changed;
+}
+
+inline void RosAlloc::Run::MergeBulkFreeBitMapIntoAllocBitMap() {
+  DCHECK_EQ(is_thread_local_, 0);
+  // Free slots in the alloc bit map based on the bulk free bit map.
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  uint32_t* vecp = &alloc_bit_map_[0];
+  uint32_t* free_vecp = &bulk_free_bit_map()[0];
+  for (size_t v = 0; v < num_vec; v++, vecp++, free_vecp++) {
+    uint32_t free_vec = *free_vecp;
+    if (free_vec != 0) {
+      *vecp &= ~free_vec;
+      *free_vecp = 0;  // clear the bulk free bit map.
+    }
+    DCHECK_EQ(*free_vecp, static_cast<uint32_t>(0));
+  }
+}
+
+inline void RosAlloc::Run::UnionBulkFreeBitMapToThreadLocalFreeBitMap() {
+  DCHECK_NE(is_thread_local_, 0);
+  // Union the thread local bit map with the bulk free bit map.
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  uint32_t* to_vecp = &thread_local_free_bit_map()[0];
+  uint32_t* from_vecp = &bulk_free_bit_map()[0];
+  for (size_t v = 0; v < num_vec; v++, to_vecp++, from_vecp++) {
+    uint32_t from_vec = *from_vecp;
+    if (from_vec != 0) {
+      *to_vecp |= from_vec;
+      *from_vecp = 0;  // clear the from free bit map.
+    }
+    DCHECK_EQ(*from_vecp, static_cast<uint32_t>(0));
+  }
+}
+
+inline void RosAlloc::Run::MarkThreadLocalFreeBitMap(void* ptr) {
+  DCHECK_NE(is_thread_local_, 0);
+  MarkFreeBitMapShared(ptr, thread_local_free_bit_map(), "MarkThreadLocalFreeBitMap");
+}
+
+inline void RosAlloc::Run::MarkBulkFreeBitMap(void* ptr) {
+  MarkFreeBitMapShared(ptr, bulk_free_bit_map(), "MarkFreeBitMap");
+}
+
+inline void RosAlloc::Run::MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base,
+                                              const char* caller_name) {
+  byte idx = size_bracket_idx_;
+  size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
+      - (reinterpret_cast<byte*>(this) + headerSizes[idx]);
+  DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
+  size_t slot_idx = offset_from_slot_base / bracketSizes[idx];
+  DCHECK(slot_idx < numOfSlots[idx]);
+  size_t vec_idx = slot_idx / 32;
+  if (kIsDebugBuild) {
+    size_t num_vec = RoundUp(numOfSlots[idx], 32) / 32;
+    DCHECK(vec_idx < num_vec);
+  }
+  size_t vec_off = slot_idx % 32;
+  uint32_t* vec = &free_bit_map_base[vec_idx];
+  DCHECK_EQ((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+  *vec |= 1 << vec_off;
+  DCHECK_NE((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::Run::" << caller_name << "() : 0x" << std::hex
+              << reinterpret_cast<intptr_t>(ptr)
+              << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
+  }
+}
+
+inline bool RosAlloc::Run::IsAllFree() {
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  for (size_t v = 0; v < num_vec; v++) {
+    uint32_t vec = alloc_bit_map_[v];
+    if (vec != 0) {
+      return false;
+    }
+  }
+  return true;
+}
+
+inline bool RosAlloc::Run::IsFull() {
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  size_t slots = 0;
+  for (size_t v = 0; v < num_vec; v++, slots += 32) {
+    DCHECK(num_slots >= slots);
+    uint32_t vec = alloc_bit_map_[v];
+    uint32_t mask = (num_slots - slots >= 32) ? static_cast<uint32_t>(-1)
+        : (1 << (num_slots - slots)) - 1;
+    DCHECK(num_slots - slots >= 32 ? mask == static_cast<uint32_t>(-1) : true);
+    if (vec != mask) {
+      return false;
+    }
+  }
+  return true;
+}
+
+inline void RosAlloc::Run::ClearBitMaps() {
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  memset(alloc_bit_map_, 0, sizeof(uint32_t) * num_vec * 3);
+}
+
+void RosAlloc::Run::InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
+                                    void* arg) {
+  size_t idx = size_bracket_idx_;
+  byte* slot_base = reinterpret_cast<byte*>(this) + headerSizes[idx];
+  size_t num_slots = numOfSlots[idx];
+  size_t bracket_size = IndexToBracketSize(idx);
+  DCHECK_EQ(slot_base + num_slots * bracket_size, reinterpret_cast<byte*>(this) + numOfPages[idx] * kPageSize);
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  size_t slots = 0;
+  for (size_t v = 0; v < num_vec; v++, slots += 32) {
+    DCHECK(num_slots >= slots);
+    uint32_t vec = alloc_bit_map_[v];
+    size_t end = std::min(num_slots - slots, static_cast<size_t>(32));
+    for (size_t i = 0; i < end; ++i) {
+      bool is_allocated = ((vec >> i) & 0x1) != 0;
+      byte* slot_addr = slot_base + (slots + i) * bracket_size;
+      if (is_allocated) {
+        handler(slot_addr, slot_addr + bracket_size, bracket_size, arg);
+      } else {
+        handler(slot_addr, slot_addr + bracket_size, 0, arg);
+      }
+    }
+  }
+}
+
+void RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) {
+  if (false) {
+    // Used only to test Free() as GC uses only BulkFree().
+    for (size_t i = 0; i < num_ptrs; ++i) {
+      FreeInternal(self, ptrs[i]);
+    }
+    return;
+  }
+
+  WriterMutexLock wmu(self, bulk_free_lock_);
+
+  // First mark slots to free in the bulk free bit map without locking the
+  // size bracket locks. On host, hash_set is faster than vector + flag.
+#ifdef HAVE_ANDROID_OS
+  std::vector<Run*> runs;
+#else
+  hash_set<Run*, hash_run, eq_run> runs;
+#endif
+  {
+    for (size_t i = 0; i < num_ptrs; i++) {
+      void* ptr = ptrs[i];
+      ptrs[i] = NULL;
+      DCHECK(base_ <= ptr && ptr < base_ + footprint_);
+      size_t pm_idx = RoundDownToPageMapIndex(ptr);
+      bool free_from_run = false;
+      Run* run = NULL;
+      {
+        MutexLock mu(self, lock_);
+        DCHECK(pm_idx < page_map_.size());
+        byte page_map_entry = page_map_[pm_idx];
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::BulkFree() : " << std::hex << ptr << ", pm_idx="
+                    << std::dec << pm_idx
+                    << ", page_map_entry=" << static_cast<int>(page_map_entry);
+        }
+        if (LIKELY(page_map_entry == kPageMapRun)) {
+          free_from_run = true;
+          run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
+          DCHECK(run->magic_num_ == kMagicNum);
+        } else if (LIKELY(page_map_entry == kPageMapRunPart)) {
+          free_from_run = true;
+          size_t pi = pm_idx;
+          DCHECK(page_map_[pi] == kPageMapRun || page_map_[pi] == kPageMapRunPart);
+          // Find the beginning of the run.
+          while (page_map_[pi] != kPageMapRun) {
+            pi--;
+            DCHECK(pi < capacity_ / kPageSize);
+          }
+          DCHECK(page_map_[pi] == kPageMapRun);
+          run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
+          DCHECK(run->magic_num_ == kMagicNum);
+        } else if (page_map_entry == kPageMapLargeObject) {
+          FreePages(self, ptr);
+        } else {
+          LOG(FATAL) << "Unreachable - page map type: " << page_map_entry;
+        }
+      }
+      if (LIKELY(free_from_run)) {
+        DCHECK(run != NULL);
+        // Set the bit in the bulk free bit map.
+        run->MarkBulkFreeBitMap(ptr);
+#ifdef HAVE_ANDROID_OS
+        if (!run->to_be_bulk_freed_) {
+          run->to_be_bulk_freed_ = true;
+          runs.push_back(run);
+        }
+#else
+        runs.insert(run);
+#endif
+      }
+    }
+  }
+
+  // Now, iterate over the affected runs and update the alloc bit map
+  // based on the bulk free bit map (for non-thread-local runs) and
+  // union the bulk free bit map into the thread-local free bit map
+  // (for thread-local runs.)
+#ifdef HAVE_ANDROID_OS
+  typedef std::vector<Run*>::iterator It;
+#else
+  typedef hash_set<Run*, hash_run, eq_run>::iterator It;
+#endif
+  for (It it = runs.begin(); it != runs.end(); ++it) {
+    Run* run = *it;
+#ifdef HAVE_ANDROID_OS
+    DCHECK(run->to_be_bulk_freed_);
+    run->to_be_bulk_freed_ = false;
+#endif
+    size_t idx = run->size_bracket_idx_;
+    MutexLock mu(self, *size_bracket_locks_[idx]);
+    if (run->is_thread_local_ != 0) {
+      DCHECK_LE(run->size_bracket_idx_, kMaxThreadLocalSizeBracketIdx);
+      DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
+      run->UnionBulkFreeBitMapToThreadLocalFreeBitMap();
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a thread local run 0x"
+                  << std::hex << reinterpret_cast<intptr_t>(run);
+      }
+      DCHECK_NE(run->is_thread_local_, 0);
+      // A thread local run will be kept as a thread local even if
+      // it's become all free.
+    } else {
+      bool run_was_full = run->IsFull();
+      run->MergeBulkFreeBitMapIntoAllocBitMap();
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a run 0x" << std::hex
+                  << reinterpret_cast<intptr_t>(run);
+      }
+      // Check if the run should be moved to non_full_runs_ or
+      // free_page_runs_.
+      std::set<Run*>* non_full_runs = &non_full_runs_[idx];
+      hash_set<Run*, hash_run, eq_run>* full_runs =
+          kIsDebugBuild ? &full_runs_[idx] : NULL;
+      if (run->IsAllFree()) {
+        // It has just become completely free. Free the pages of the
+        // run.
+        bool run_was_current = run == current_runs_[idx];
+        if (run_was_current) {
+          DCHECK(full_runs->find(run) == full_runs->end());
+          DCHECK(non_full_runs->find(run) == non_full_runs->end());
+          // If it was a current run, reuse it.
+        } else if (run_was_full) {
+          // If it was full, remove it from the full run set (debug
+          // only.)
+          if (kIsDebugBuild) {
+            hash_set<Run*, hash_run, eq_run>::iterator pos = full_runs->find(run);
+            DCHECK(pos != full_runs->end());
+            full_runs->erase(pos);
+            if (kTraceRosAlloc) {
+              LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
+                        << reinterpret_cast<intptr_t>(run)
+                        << " from full_runs_";
+            }
+            DCHECK(full_runs->find(run) == full_runs->end());
+          }
+        } else {
+          // If it was in a non full run set, remove it from the set.
+          DCHECK(full_runs->find(run) == full_runs->end());
+          DCHECK(non_full_runs->find(run) != non_full_runs->end());
+          non_full_runs->erase(run);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(run)
+                      << " from non_full_runs_";
+          }
+          DCHECK(non_full_runs->find(run) == non_full_runs->end());
+        }
+        if (!run_was_current) {
+          MutexLock mu(self, lock_);
+          FreePages(self, run);
+        }
+      } else {
+        // It is not completely free. If it wasn't the current run or
+        // already in the non-full run set (i.e., it was full) insert
+        // it into the non-full run set.
+        if (run == current_runs_[idx]) {
+          DCHECK(non_full_runs->find(run) == non_full_runs->end());
+          DCHECK(full_runs->find(run) == full_runs->end());
+          // If it was a current run, keep it.
+        } else if (run_was_full) {
+          // If it was full, remove it from the full run set (debug
+          // only) and insert into the non-full run set.
+          DCHECK(full_runs->find(run) != full_runs->end());
+          DCHECK(non_full_runs->find(run) == non_full_runs->end());
+          if (kIsDebugBuild) {
+            full_runs->erase(run);
+            if (kTraceRosAlloc) {
+              LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
+                        << reinterpret_cast<intptr_t>(run)
+                        << " from full_runs_";
+            }
+          }
+          non_full_runs->insert(run);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::BulkFree() : Inserted run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(run)
+                      << " into non_full_runs_[" << std::dec << idx;
+          }
+        } else {
+          // If it was not full, so leave it in the non full run set.
+          DCHECK(full_runs->find(run) == full_runs->end());
+          DCHECK(non_full_runs->find(run) != non_full_runs->end());
+        }
+      }
+    }
+  }
+}
+
+void RosAlloc::DumpPageMap(Thread* self) {
+  MutexLock mu(self, lock_);
+  size_t end = page_map_.size();
+  FreePageRun* curr_fpr = NULL;
+  size_t curr_fpr_size = 0;
+  size_t remaining_curr_fpr_size = 0;
+  size_t num_running_empty_pages = 0;
+  for (size_t i = 0; i < end; ++i) {
+    byte pm = page_map_[i];
+    switch (pm) {
+      case kPageMapEmpty: {
+        FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
+        if (free_page_runs_.find(fpr) != free_page_runs_.end()) {
+          // Encountered a fresh free page run.
+          DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
+          DCHECK(fpr->IsFree());
+          DCHECK(curr_fpr == NULL);
+          DCHECK_EQ(curr_fpr_size, static_cast<size_t>(0));
+          curr_fpr = fpr;
+          curr_fpr_size = fpr->ByteSize(this);
+          DCHECK_EQ(curr_fpr_size % kPageSize, static_cast<size_t>(0));
+          remaining_curr_fpr_size = curr_fpr_size - kPageSize;
+          LOG(INFO) << "[" << i << "]=Empty (FPR start)"
+                    << " fpr_size=" << curr_fpr_size
+                    << " remaining_fpr_size=" << remaining_curr_fpr_size;
+          if (remaining_curr_fpr_size == 0) {
+            // Reset at the end of the current free page run.
+            curr_fpr = NULL;
+            curr_fpr_size = 0;
+          }
+          LOG(INFO) << "curr_fpr=0x" << std::hex << reinterpret_cast<intptr_t>(curr_fpr);
+          DCHECK_EQ(num_running_empty_pages, static_cast<size_t>(0));
+        } else {
+          // Still part of the current free page run.
+          DCHECK_NE(num_running_empty_pages, static_cast<size_t>(0));
+          DCHECK(curr_fpr != NULL && curr_fpr_size > 0 && remaining_curr_fpr_size > 0);
+          DCHECK_EQ(remaining_curr_fpr_size % kPageSize, static_cast<size_t>(0));
+          DCHECK_GE(remaining_curr_fpr_size, static_cast<size_t>(kPageSize));
+          remaining_curr_fpr_size -= kPageSize;
+          LOG(INFO) << "[" << i << "]=Empty (FPR part)"
+                    << " remaining_fpr_size=" << remaining_curr_fpr_size;
+          if (remaining_curr_fpr_size == 0) {
+            // Reset at the end of the current free page run.
+            curr_fpr = NULL;
+            curr_fpr_size = 0;
+          }
+        }
+        num_running_empty_pages++;
+        break;
+      }
+      case kPageMapLargeObject: {
+        DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
+        num_running_empty_pages = 0;
+        LOG(INFO) << "[" << i << "]=Large (start)";
+        break;
+      }
+      case kPageMapLargeObjectPart:
+        DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
+        num_running_empty_pages = 0;
+        LOG(INFO) << "[" << i << "]=Large (part)";
+        break;
+      case kPageMapRun: {
+        DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
+        num_running_empty_pages = 0;
+        Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
+        size_t idx = run->size_bracket_idx_;
+        LOG(INFO) << "[" << i << "]=Run (start)"
+                  << " idx=" << idx
+                  << " numOfPages=" << numOfPages[idx]
+                  << " thread_local=" << static_cast<int>(run->is_thread_local_)
+                  << " is_all_free=" << (run->IsAllFree() ? 1 : 0);
+        break;
+      }
+      case kPageMapRunPart:
+        DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
+        num_running_empty_pages = 0;
+        LOG(INFO) << "[" << i << "]=Run (part)";
+        break;
+      default:
+        LOG(FATAL) << "Unreachable - page map type: " << pm;
+        break;
+    }
+  }
+}
+
+size_t RosAlloc::UsableSize(void* ptr) {
+  DCHECK(base_ <= ptr && ptr < base_ + footprint_);
+  size_t pm_idx = RoundDownToPageMapIndex(ptr);
+  MutexLock mu(Thread::Current(), lock_);
+  switch (page_map_[pm_idx]) {
+  case kPageMapEmpty:
+    LOG(FATAL) << "Unreachable - RosAlloc::UsableSize(): pm_idx=" << pm_idx << ", ptr=" << std::hex
+               << reinterpret_cast<intptr_t>(ptr);
+    break;
+  case kPageMapLargeObject: {
+    size_t num_pages = 1;
+    size_t idx = pm_idx + 1;
+    size_t end = page_map_.size();
+    while (idx < end && page_map_[idx] == kPageMapLargeObjectPart) {
+      num_pages++;
+      idx++;
+    }
+    return num_pages * kPageSize;
+  }
+  case kPageMapLargeObjectPart:
+    LOG(FATAL) << "Unreachable - RosAlloc::UsableSize(): pm_idx=" << pm_idx << ", ptr=" << std::hex
+               << reinterpret_cast<intptr_t>(ptr);
+    break;
+  case kPageMapRun:
+  case kPageMapRunPart: {
+    // Find the beginning of the run.
+    while (page_map_[pm_idx] != kPageMapRun) {
+      pm_idx--;
+      DCHECK(pm_idx < capacity_ / kPageSize);
+    }
+    DCHECK(page_map_[pm_idx] == kPageMapRun);
+    Run* run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
+    DCHECK(run->magic_num_ == kMagicNum);
+    size_t idx = run->size_bracket_idx_;
+    size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
+        - (reinterpret_cast<byte*>(run) + headerSizes[idx]);
+    DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
+    return IndexToBracketSize(idx);
+  }
+  default:
+    LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+    break;
+  }
+  return 0;
+}
+
+bool RosAlloc::Trim() {
+  MutexLock mu(Thread::Current(), lock_);
+  FreePageRun* last_free_page_run;
+  DCHECK_EQ(footprint_ % kPageSize, static_cast<size_t>(0));
+  auto it = free_page_runs_.rbegin();
+  if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
+    // Remove the last free page run, if any.
+    DCHECK(last_free_page_run->IsFree());
+    DCHECK(page_map_[ToPageMapIndex(last_free_page_run)] == kPageMapEmpty);
+    DCHECK_EQ(last_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+    DCHECK_EQ(last_free_page_run->End(this), base_ + footprint_);
+    free_page_runs_.erase(last_free_page_run);
+    size_t decrement = last_free_page_run->ByteSize(this);
+    size_t new_footprint = footprint_ - decrement;
+    DCHECK_EQ(new_footprint % kPageSize, static_cast<size_t>(0));
+    size_t new_num_of_pages = new_footprint / kPageSize;
+    DCHECK_GE(page_map_.size(), new_num_of_pages);
+    page_map_.resize(new_num_of_pages);
+    DCHECK_EQ(page_map_.size(), new_num_of_pages);
+    free_page_run_size_map_.resize(new_num_of_pages);
+    DCHECK_EQ(free_page_run_size_map_.size(), new_num_of_pages);
+    art_heap_rosalloc_morecore(this, -(static_cast<intptr_t>(decrement)));
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::Trim() : decreased the footprint from "
+                << footprint_ << " to " << new_footprint;
+    }
+    DCHECK_LT(new_footprint, footprint_);
+    DCHECK_LT(new_footprint, capacity_);
+    footprint_ = new_footprint;
+    return true;
+  }
+  return false;
+}
+
+void RosAlloc::InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
+                          void* arg) {
+  // Note: no need to use this to release pages as we already do so in FreePages().
+  if (handler == NULL) {
+    return;
+  }
+  MutexLock mu(Thread::Current(), lock_);
+  size_t pm_end = page_map_.size();
+  size_t i = 0;
+  while (i < pm_end) {
+    byte pm = page_map_[i];
+    switch (pm) {
+      case kPageMapEmpty: {
+        // The start of a free page run.
+        FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
+        DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
+        size_t fpr_size = fpr->ByteSize(this);
+        DCHECK(IsAligned<kPageSize>(fpr_size));
+        void* start = fpr;
+        void* end = reinterpret_cast<byte*>(start) + fpr_size;
+        handler(start, end, 0, arg);
+        size_t num_pages = fpr_size / kPageSize;
+        if (kIsDebugBuild) {
+          for (size_t j = i + 1; j < i + num_pages; ++j) {
+            DCHECK_EQ(page_map_[j], kPageMapEmpty);
+          }
+        }
+        i += fpr_size / kPageSize;
+        DCHECK_LE(i, pm_end);
+        break;
+      }
+      case kPageMapLargeObject: {
+        // The start of a large object.
+        size_t num_pages = 1;
+        size_t idx = i + 1;
+        while (idx < pm_end && page_map_[idx] == kPageMapLargeObjectPart) {
+          num_pages++;
+          idx++;
+        }
+        void* start = base_ + i * kPageSize;
+        void* end = base_ + (i + num_pages) * kPageSize;
+        size_t used_bytes = num_pages * kPageSize;
+        handler(start, end, used_bytes, arg);
+        if (kIsDebugBuild) {
+          for (size_t j = i + 1; j < i + num_pages; ++j) {
+            DCHECK_EQ(page_map_[j], kPageMapLargeObjectPart);
+          }
+        }
+        i += num_pages;
+        DCHECK_LE(i, pm_end);
+        break;
+      }
+      case kPageMapLargeObjectPart:
+        LOG(FATAL) << "Unreachable - page map type: " << pm;
+        break;
+      case kPageMapRun: {
+        // The start of a run.
+        Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
+        DCHECK(run->magic_num_ == kMagicNum);
+        run->InspectAllSlots(handler, arg);
+        size_t num_pages = numOfPages[run->size_bracket_idx_];
+        if (kIsDebugBuild) {
+          for (size_t j = i + 1; j < i + num_pages; ++j) {
+            DCHECK_EQ(page_map_[j], kPageMapRunPart);
+          }
+        }
+        i += num_pages;
+        DCHECK_LE(i, pm_end);
+        break;
+      }
+      case kPageMapRunPart:
+        LOG(FATAL) << "Unreachable - page map type: " << pm;
+        break;
+      default:
+        LOG(FATAL) << "Unreachable - page map type: " << pm;
+        break;
+    }
+  }
+}
+
+size_t RosAlloc::Footprint() {
+  MutexLock mu(Thread::Current(), lock_);
+  return footprint_;
+}
+
+size_t RosAlloc::FootprintLimit() {
+  MutexLock mu(Thread::Current(), lock_);
+  return capacity_;
+}
+
+void RosAlloc::SetFootprintLimit(size_t new_capacity) {
+  MutexLock mu(Thread::Current(), lock_);
+  DCHECK_EQ(RoundUp(new_capacity, kPageSize), new_capacity);
+  // Only growing is supported here. But Trim() is supported.
+  if (capacity_ < new_capacity) {
+    capacity_ = new_capacity;
+    VLOG(heap) << "new capacity=" << capacity_;
+  }
+}
+
+void RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
+  Thread* self = Thread::Current();
+  for (size_t idx = 0; idx < kNumOfSizeBrackets; idx++) {
+    MutexLock mu(self, *size_bracket_locks_[idx]);
+    Run* thread_local_run = reinterpret_cast<Run*>(thread->rosalloc_runs_[idx]);
+    if (thread_local_run != NULL) {
+      DCHECK_EQ(thread_local_run->magic_num_, kMagicNum);
+      DCHECK_NE(thread_local_run->is_thread_local_, 0);
+      thread->rosalloc_runs_[idx] = NULL;
+      // Note the thread local run may not be full here.
+      bool dont_care;
+      thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&dont_care);
+      thread_local_run->is_thread_local_ = 0;
+      thread_local_run->MergeBulkFreeBitMapIntoAllocBitMap();
+      DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
+      if (thread_local_run->IsFull()) {
+        if (kIsDebugBuild) {
+          full_runs_[idx].insert(thread_local_run);
+          DCHECK(full_runs_[idx].find(thread_local_run) != full_runs_[idx].end());
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::RevokeThreadLocalRuns() : Inserted run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(thread_local_run)
+                      << " into full_runs_[" << std::dec << idx << "]";
+          }
+        }
+      } else if (thread_local_run->IsAllFree()) {
+        MutexLock mu(self, lock_);
+        FreePages(self, thread_local_run);
+      } else {
+        non_full_runs_[idx].insert(thread_local_run);
+        DCHECK(non_full_runs_[idx].find(thread_local_run) != non_full_runs_[idx].end());
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::RevokeThreadLocalRuns() : Inserted run 0x" << std::hex
+                    << reinterpret_cast<intptr_t>(thread_local_run)
+                    << " into non_full_runs_[" << std::dec << idx << "]";
+        }
+      }
+    }
+  }
+}
+
+void RosAlloc::RevokeAllThreadLocalRuns() {
+  // This is called when a mutator thread won't allocate such as at
+  // the Zygote creation time or during the GC pause.
+  MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
+  std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
+  for (auto it = thread_list.begin(); it != thread_list.end(); ++it) {
+    Thread* t = *it;
+    RevokeThreadLocalRuns(t);
+  }
+}
+
+void RosAlloc::Initialize() {
+  // Check the consistency of the number of size brackets.
+  DCHECK_EQ(Thread::kRosAllocNumOfSizeBrackets, kNumOfSizeBrackets);
+  // bracketSizes.
+  for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+    if (i < kNumOfSizeBrackets - 2) {
+      bracketSizes[i] = 16 * (i + 1);
+    } else if (i == kNumOfSizeBrackets - 2) {
+      bracketSizes[i] = 1 * KB;
+    } else {
+      DCHECK(i == kNumOfSizeBrackets - 1);
+      bracketSizes[i] = 2 * KB;
+    }
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "bracketSizes[" << i << "]=" << bracketSizes[i];
+    }
+  }
+  // numOfPages.
+  for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+    if (i < 4) {
+      numOfPages[i] = 1;
+    } else if (i < 8) {
+      numOfPages[i] = 2;
+    } else if (i < 16) {
+      numOfPages[i] = 4;
+    } else if (i < 32) {
+      numOfPages[i] = 8;
+    } else if (i == 32) {
+      DCHECK(i = kNumOfSizeBrackets - 2);
+      numOfPages[i] = 16;
+    } else {
+      DCHECK(i = kNumOfSizeBrackets - 1);
+      numOfPages[i] = 32;
+    }
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "numOfPages[" << i << "]=" << numOfPages[i];
+    }
+  }
+  // Compute numOfSlots and slotOffsets.
+  for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+    size_t bracket_size = bracketSizes[i];
+    size_t run_size = kPageSize * numOfPages[i];
+    size_t max_num_of_slots = run_size / bracket_size;
+    // Compute the actual number of slots by taking the header and
+    // alignment into account.
+    size_t fixed_header_size = RoundUp(Run::fixed_header_size(), sizeof(uint32_t));
+    DCHECK_EQ(fixed_header_size, static_cast<size_t>(8));
+    size_t header_size = 0;
+    size_t bulk_free_bit_map_offset = 0;
+    size_t thread_local_free_bit_map_offset = 0;
+    size_t num_of_slots = 0;
+    // Search for the maximum number of slots that allows enough space
+    // for the header (including the bit maps.)
+    for (int s = max_num_of_slots; s >= 0; s--) {
+      size_t tmp_slots_size = bracket_size * s;
+      size_t tmp_bit_map_size = RoundUp(s, sizeof(uint32_t) * kBitsPerByte) / kBitsPerByte;
+      size_t tmp_bulk_free_bit_map_size = tmp_bit_map_size;
+      size_t tmp_bulk_free_bit_map_off = fixed_header_size + tmp_bit_map_size;
+      size_t tmp_thread_local_free_bit_map_size = tmp_bit_map_size;
+      size_t tmp_thread_local_free_bit_map_off = tmp_bulk_free_bit_map_off + tmp_bulk_free_bit_map_size;
+      size_t tmp_unaligned_header_size = tmp_thread_local_free_bit_map_off + tmp_thread_local_free_bit_map_size;
+      // Align up the unaligned header size. bracket_size may not be a power of two.
+      size_t tmp_header_size = (tmp_unaligned_header_size % bracket_size == 0) ?
+          tmp_unaligned_header_size :
+          tmp_unaligned_header_size + (bracket_size - tmp_unaligned_header_size % bracket_size);
+      DCHECK_EQ(tmp_header_size % bracket_size, static_cast<size_t>(0));
+      DCHECK_EQ(tmp_header_size % 8, static_cast<size_t>(0));
+      if (tmp_slots_size + tmp_header_size <= run_size) {
+        // Found the right number of slots, that is, there was enough
+        // space for the header (including the bit maps.)
+        num_of_slots = s;
+        header_size = tmp_header_size;
+        bulk_free_bit_map_offset = tmp_bulk_free_bit_map_off;
+        thread_local_free_bit_map_offset = tmp_thread_local_free_bit_map_off;
+        break;
+      }
+    }
+    DCHECK(num_of_slots > 0 && header_size > 0 && bulk_free_bit_map_offset > 0);
+    // Add the padding for the alignment remainder.
+    header_size += run_size % bracket_size;
+    DCHECK(header_size + num_of_slots * bracket_size == run_size);
+    numOfSlots[i] = num_of_slots;
+    headerSizes[i] = header_size;
+    bulkFreeBitMapOffsets[i] = bulk_free_bit_map_offset;
+    threadLocalFreeBitMapOffsets[i] = thread_local_free_bit_map_offset;
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "numOfSlots[" << i << "]=" << numOfSlots[i]
+                << ", headerSizes[" << i << "]=" << headerSizes[i]
+                << ", bulkFreeBitMapOffsets[" << i << "]=" << bulkFreeBitMapOffsets[i]
+                << ", threadLocalFreeBitMapOffsets[" << i << "]=" << threadLocalFreeBitMapOffsets[i];;
+    }
+  }
+}
+
+void RosAlloc::BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg) {
+  if (used_bytes == 0) {
+    return;
+  }
+  size_t* bytes_allocated = reinterpret_cast<size_t*>(arg);
+  *bytes_allocated += used_bytes;
+}
+
+void RosAlloc::ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg) {
+  if (used_bytes == 0) {
+    return;
+  }
+  size_t* objects_allocated = reinterpret_cast<size_t*>(arg);
+  ++(*objects_allocated);
+}
+
+}  // namespace allocator
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
new file mode 100644
index 0000000..5dda80c
--- /dev/null
+++ b/runtime/gc/allocator/rosalloc.h
@@ -0,0 +1,480 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
+#define ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
+
+#include <set>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string>
+#include <sys/mman.h>
+#include <vector>
+
+#include "base/mutex.h"
+#include "base/logging.h"
+#include "globals.h"
+#include "utils.h"
+
+// A boilerplate to use hash_map/hash_set both on host and device.
+#ifdef HAVE_ANDROID_OS
+#include <hash_map>
+#include <hash_set>
+using std::hash_map;
+using std::hash_set;
+#else  // HAVE_ANDROID_OS
+#ifdef __DEPRECATED
+#define ROSALLOC_OLD__DEPRECATED __DEPRECATED
+#undef __DEPRECATED
+#endif
+#include <ext/hash_map>
+#include <ext/hash_set>
+#ifdef ROSALLOC_OLD__DEPRECATED
+#define __DEPRECATED ROSALLOC_OLD__DEPRECATED
+#undef ROSALLOC_OLD__DEPRECATED
+#endif
+using __gnu_cxx::hash_map;
+using __gnu_cxx::hash_set;
+#endif  // HAVE_ANDROID_OS
+
+namespace art {
+namespace gc {
+namespace allocator {
+
+// A Runs-of-slots memory allocator.
+class RosAlloc {
+ private:
+  // Rerepresents a run of free pages.
+  class FreePageRun {
+   public:
+    byte magic_num_;  // The magic number used for debugging only.
+
+    bool IsFree() const {
+      if (kIsDebugBuild) {
+        return magic_num_ == kMagicNumFree;
+      }
+      return true;
+    }
+    size_t ByteSize(RosAlloc* rosalloc) const EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+      const byte* fpr_base = reinterpret_cast<const byte*>(this);
+      size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
+      size_t byte_size = rosalloc->free_page_run_size_map_[pm_idx];
+      DCHECK_GE(byte_size, static_cast<size_t>(0));
+      DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
+      return byte_size;
+    }
+    void SetByteSize(RosAlloc* rosalloc, size_t byte_size)
+        EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+      DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
+      byte* fpr_base = reinterpret_cast<byte*>(this);
+      size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
+      rosalloc->free_page_run_size_map_[pm_idx] = byte_size;
+    }
+    void* Begin() {
+      return reinterpret_cast<void*>(this);
+    }
+    void* End(RosAlloc* rosalloc) EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+      byte* fpr_base = reinterpret_cast<byte*>(this);
+      byte* end = fpr_base + ByteSize(rosalloc);
+      return end;
+    }
+    void ReleasePages(RosAlloc* rosalloc) EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+      size_t byte_size = ByteSize(rosalloc);
+      DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
+      if (kIsDebugBuild) {
+        // Exclude the first page that stores the magic number.
+        DCHECK_GE(byte_size, static_cast<size_t>(kPageSize));
+        byte_size -= kPageSize;
+        if (byte_size > 0) {
+          madvise(reinterpret_cast<byte*>(this) + kPageSize, byte_size, MADV_DONTNEED);
+        }
+      } else {
+        madvise(this, byte_size, MADV_DONTNEED);
+      }
+    }
+  };
+
+  // Represents a run of memory slots of the same size.
+  //
+  // A run's memory layout:
+  //
+  // +-------------------+
+  // | magic_num         |
+  // +-------------------+
+  // | size_bracket_idx  |
+  // +-------------------+
+  // | is_thread_local   |
+  // +-------------------+
+  // | to_be_bulk_freed  |
+  // +-------------------+
+  // | top_slot_idx      |
+  // +-------------------+
+  // |                   |
+  // | alloc bit map     |
+  // |                   |
+  // +-------------------+
+  // |                   |
+  // | bulk free bit map |
+  // |                   |
+  // +-------------------+
+  // |                   |
+  // | thread-local free |
+  // | bit map           |
+  // |                   |
+  // +-------------------+
+  // | padding due to    |
+  // | alignment         |
+  // +-------------------+
+  // | slot 0            |
+  // +-------------------+
+  // | slot 1            |
+  // +-------------------+
+  // | slot 2            |
+  // +-------------------+
+  // ...
+  // +-------------------+
+  // | last slot         |
+  // +-------------------+
+  //
+  class Run {
+   public:
+    byte magic_num_;            // The magic number used for debugging.
+    byte size_bracket_idx_;     // The index of the size bracket of this run.
+    byte is_thread_local_;      // True if this run is used as a thread-local run.
+    byte to_be_bulk_freed_;     // Used within BulkFree() to flag a run that's involved with a bulk free.
+    uint32_t top_slot_idx_;     // The top slot index when this run is in bump index mode.
+    uint32_t alloc_bit_map_[0]; // The bit map that allocates if each slot is in use.
+
+    // bulk_free_bit_map_[] : The bit map that is used for GC to
+    // temporarily mark the slots to free without using a lock. After
+    // all the slots to be freed in a run are marked, all those slots
+    // get freed in bulk with one locking per run, as opposed to one
+    // locking per slot to minimize the lock contention. This is used
+    // within BulkFree().
+
+    // thread_local_free_bit_map_[] : The bit map that is used for GC
+    // to temporarily mark the slots to free in a thread-local run
+    // without using a lock (without synchronizing the thread that
+    // owns the thread-local run.) When the thread-local run becomes
+    // full, the thread will check this bit map and update the
+    // allocation bit map of the run (that is, the slots get freed.)
+
+    // Returns the byte size of the header except for the bit maps.
+    static size_t fixed_header_size() {
+      Run temp;
+      size_t size = reinterpret_cast<byte*>(&temp.alloc_bit_map_) - reinterpret_cast<byte*>(&temp);
+      DCHECK_EQ(size, static_cast<size_t>(8));
+      return size;
+    }
+    // Returns the base address of the free bit map.
+    uint32_t* bulk_free_bit_map() {
+      return reinterpret_cast<uint32_t*>(reinterpret_cast<byte*>(this) + bulkFreeBitMapOffsets[size_bracket_idx_]);
+    }
+    // Returns the base address of the thread local free bit map.
+    uint32_t* thread_local_free_bit_map() {
+      return reinterpret_cast<uint32_t*>(reinterpret_cast<byte*>(this) + threadLocalFreeBitMapOffsets[size_bracket_idx_]);
+    }
+    void* End() {
+      return reinterpret_cast<byte*>(this) + kPageSize * numOfPages[size_bracket_idx_];
+    }
+    // Frees slots in the allocation bit map with regard to the
+    // thread-local free bit map. Used when a thread-local run becomes
+    // full.
+    bool MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out);
+    // Frees slots in the allocation bit map with regard to the bulk
+    // free bit map. Used in a bulk free.
+    void MergeBulkFreeBitMapIntoAllocBitMap();
+    // Unions the slots to be freed in the free bit map into the
+    // thread-local free bit map. In a bulk free, as a two-step
+    // process, GC will first record all the slots to free in a run in
+    // the free bit map where it can write without a lock, and later
+    // acquire a lock once per run to union the bits of the free bit
+    // map to the thread-local free bit map.
+    void UnionBulkFreeBitMapToThreadLocalFreeBitMap();
+    // Allocates a slot in a run.
+    void* AllocSlot();
+    // Frees a slot in a run. This is used in a non-bulk free.
+    void FreeSlot(void* ptr);
+    // Marks the slots to free in the bulk free bit map.
+    void MarkBulkFreeBitMap(void* ptr);
+    // Marks the slots to free in the thread-local free bit map.
+    void MarkThreadLocalFreeBitMap(void* ptr);
+    // Returns true if all the slots in the run are not in use.
+    bool IsAllFree();
+    // Returns true if all the slots in the run are in use.
+    bool IsFull();
+    // Clear all the bit maps.
+    void ClearBitMaps();
+    // Iterate over all the slots and apply the given function.
+    void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg);
+    // Dump the run metadata for debugging.
+    void Dump();
+
+   private:
+    // The common part of MarkFreeBitMap() and MarkThreadLocalFreeBitMap().
+    void MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base, const char* caller_name);
+  };
+
+  // The magic number for a run.
+  static const byte kMagicNum = 42;
+  // The magic number for free pages.
+  static const byte kMagicNumFree = 43;
+  // The number of size brackets. Sync this with the length of Thread::rosalloc_runs_.
+  static const size_t kNumOfSizeBrackets = 34;
+  // The number of smaller size brackets that are 16 bytes apart.
+  static const size_t kNumOfQuantumSizeBrackets = 32;
+  // The sizes (the slot sizes, in bytes) of the size brackets.
+  static size_t bracketSizes[kNumOfSizeBrackets];
+  // The numbers of pages that are used for runs for each size bracket.
+  static size_t numOfPages[kNumOfSizeBrackets];
+  // The numbers of slots of the runs for each size bracket.
+  static size_t numOfSlots[kNumOfSizeBrackets];
+  // The header sizes in bytes of the runs for each size bracket.
+  static size_t headerSizes[kNumOfSizeBrackets];
+  // The byte offsets of the bulk free bit maps of the runs for each size bracket.
+  static size_t bulkFreeBitMapOffsets[kNumOfSizeBrackets];
+  // The byte offsets of the thread-local free bit maps of the runs for each size bracket.
+  static size_t threadLocalFreeBitMapOffsets[kNumOfSizeBrackets];
+
+  // Initialize the run specs (the above arrays).
+  static void Initialize();
+  static bool initialized_;
+
+  // Returns the byte size of the bracket size from the index.
+  static size_t IndexToBracketSize(size_t idx) {
+    DCHECK(idx < kNumOfSizeBrackets);
+    return bracketSizes[idx];
+  }
+  // Returns the index of the size bracket from the bracket size.
+  static size_t BracketSizeToIndex(size_t size) {
+    DCHECK(16 <= size && ((size < 1 * KB && size % 16 == 0) || size == 1 * KB || size == 2 * KB));
+    size_t idx;
+    if (UNLIKELY(size == 1 * KB)) {
+      idx = kNumOfSizeBrackets - 2;
+    } else if (UNLIKELY(size == 2 * KB)) {
+      idx = kNumOfSizeBrackets - 1;
+    } else {
+      DCHECK(size < 1 * KB);
+      DCHECK_EQ(size % 16, static_cast<size_t>(0));
+      idx = size / 16 - 1;
+    }
+    DCHECK(bracketSizes[idx] == size);
+    return idx;
+  }
+  // Rounds up the size up the nearest bracket size.
+  static size_t RoundToBracketSize(size_t size) {
+    DCHECK(size <= kLargeSizeThreshold);
+    if (LIKELY(size <= 512)) {
+      return RoundUp(size, 16);
+    } else if (512 < size && size <= 1 * KB) {
+      return 1 * KB;
+    } else {
+      DCHECK(1 * KB < size && size <= 2 * KB);
+      return 2 * KB;
+    }
+  }
+  // Returns the size bracket index from the byte size with rounding.
+  static size_t SizeToIndex(size_t size) {
+    DCHECK(size <= kLargeSizeThreshold);
+    if (LIKELY(size <= 512)) {
+      return RoundUp(size, 16) / 16 - 1;
+    } else if (512 < size && size <= 1 * KB) {
+      return kNumOfSizeBrackets - 2;
+    } else {
+      DCHECK(1 * KB < size && size <= 2 * KB);
+      return kNumOfSizeBrackets - 1;
+    }
+  }
+  // A combination of SizeToIndex() and RoundToBracketSize().
+  static size_t SizeToIndexAndBracketSize(size_t size, size_t* bracket_size_out) {
+    DCHECK(size <= kLargeSizeThreshold);
+    if (LIKELY(size <= 512)) {
+      size_t bracket_size = RoundUp(size, 16);
+      *bracket_size_out = bracket_size;
+      size_t idx = bracket_size / 16 - 1;
+      DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
+      return idx;
+    } else if (512 < size && size <= 1 * KB) {
+      size_t bracket_size = 1024;
+      *bracket_size_out = bracket_size;
+      size_t idx = kNumOfSizeBrackets - 2;
+      DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
+      return idx;
+    } else {
+      DCHECK(1 * KB < size && size <= 2 * KB);
+      size_t bracket_size = 2048;
+      *bracket_size_out = bracket_size;
+      size_t idx = kNumOfSizeBrackets - 1;
+      DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
+      return idx;
+    }
+  }
+  // Returns the page map index from an address. Requires that the
+  // address is page size aligned.
+  size_t ToPageMapIndex(const void* addr) const {
+    DCHECK(base_ <= addr && addr < base_ + capacity_);
+    size_t byte_offset = reinterpret_cast<const byte*>(addr) - base_;
+    DCHECK_EQ(byte_offset % static_cast<size_t>(kPageSize), static_cast<size_t>(0));
+    return byte_offset / kPageSize;
+  }
+  // Returns the page map index from an address with rounding.
+  size_t RoundDownToPageMapIndex(void* addr) {
+    DCHECK(base_ <= addr && addr < reinterpret_cast<byte*>(base_) + capacity_);
+    return (reinterpret_cast<uintptr_t>(addr) - reinterpret_cast<uintptr_t>(base_)) / kPageSize;
+  }
+
+  // A memory allocation request larger than this size is treated as a large object and allocated
+  // at a page-granularity.
+  static const size_t kLargeSizeThreshold = 2048;
+
+  // We use use thread-local runs for the size Brackets whose indexes
+  // are less than or equal to this index. We use shared (current)
+  // runs for the rest.
+  static const size_t kMaxThreadLocalSizeBracketIdx = 10;
+
+  struct hash_run {
+    size_t operator()(const RosAlloc::Run* r) const {
+      return reinterpret_cast<size_t>(r);
+    }
+  };
+
+  struct eq_run {
+    bool operator()(const RosAlloc::Run* r1, const RosAlloc::Run* r2) const {
+      return r1 == r2;
+    }
+  };
+
+  // The base address of the memory region that's managed by this allocator.
+  byte* base_;
+
+  // The footprint in bytes of the currently allocated portion of the
+  // memory region.
+  size_t footprint_;
+
+  // The maximum footprint. The address, base_ + capacity_, indicates
+  // the end of the memory region that's managed by this allocator.
+  size_t capacity_;
+
+  // The run sets that hold the runs whose slots are not all
+  // full. non_full_runs_[i] is guarded by size_bracket_locks_[i].
+  std::set<Run*> non_full_runs_[kNumOfSizeBrackets];
+  // The run sets that hold the runs whose slots are all full. This is
+  // debug only. full_runs_[i] is guarded by size_bracket_locks_[i].
+  hash_set<Run*, hash_run, eq_run> full_runs_[kNumOfSizeBrackets];
+  // The set of free pages.
+  std::set<FreePageRun*> free_page_runs_ GUARDED_BY(lock_);
+  // The free page run whose end address is the end of the memory
+  // region that's managed by this allocator, if any.
+  FreePageRun* last_free_page_run_;
+  // The current runs where the allocations are first attempted for
+  // the size brackes that do not use thread-local
+  // runs. current_runs_[i] is guarded by size_bracket_locks_[i].
+  Run* current_runs_[kNumOfSizeBrackets];
+  // The mutexes, one per size bracket.
+  Mutex* size_bracket_locks_[kNumOfSizeBrackets];
+  // The types of page map entries.
+  enum {
+    kPageMapEmpty           = 0,  // Not allocated.
+    kPageMapRun             = 1,  // The beginning of a run.
+    kPageMapRunPart         = 2,  // The non-beginning part of a run.
+    kPageMapLargeObject     = 3,  // The beginning of a large object.
+    kPageMapLargeObjectPart = 4,  // The non-beginning part of a large object.
+  };
+  // The table that indicates what pages are currently used for.
+  std::vector<byte> page_map_ GUARDED_BY(lock_);
+  // The table that indicates the size of free page runs. These sizes
+  // are stored here to avoid storing in the free page header and
+  // release backing pages.
+  std::vector<size_t> free_page_run_size_map_ GUARDED_BY(lock_);
+  // The global lock. Used to guard the page map, the free page set,
+  // and the footprint.
+  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  // The reader-writer lock to allow one bulk free at a time while
+  // allowing multiple individual frees at the same time.
+  ReaderWriterMutex bulk_free_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+
+  // The base address of the memory region that's managed by this allocator.
+  byte* Begin() { return base_; }
+  // The end address of the memory region that's managed by this allocator.
+  byte* End() { return base_ + capacity_; }
+
+  // Page-granularity alloc/free
+  void* AllocPages(Thread* self, size_t num_pages, byte page_map_type)
+      EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  void FreePages(Thread* self, void* ptr) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
+  // Allocate/free a run slot.
+  void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated)
+      LOCKS_EXCLUDED(lock_);
+  void FreeFromRun(Thread* self, void* ptr, Run* run)
+      LOCKS_EXCLUDED(lock_);
+
+  // Used to acquire a new/reused run for a size bracket. Used when a
+  // thread-local or current run gets full.
+  Run* RefillRun(Thread* self, size_t idx) LOCKS_EXCLUDED(lock_);
+
+  // The internal of non-bulk Free().
+  void FreeInternal(Thread* self, void* ptr) LOCKS_EXCLUDED(lock_);
+
+ public:
+  RosAlloc(void* base, size_t capacity);
+  void* Alloc(Thread* self, size_t size, size_t* bytes_allocated)
+      LOCKS_EXCLUDED(lock_);
+  void Free(Thread* self, void* ptr)
+      LOCKS_EXCLUDED(bulk_free_lock_);
+  void BulkFree(Thread* self, void** ptrs, size_t num_ptrs)
+      LOCKS_EXCLUDED(bulk_free_lock_);
+  // Returns the size of the allocated slot for a given allocated memory chunk.
+  size_t UsableSize(void* ptr);
+  // Returns the size of the allocated slot for a given size.
+  size_t UsableSize(size_t bytes) {
+    if (UNLIKELY(bytes > kLargeSizeThreshold)) {
+      return RoundUp(bytes, kPageSize);
+    } else {
+      return RoundToBracketSize(bytes);
+    }
+  }
+  // Try to reduce the current footprint by releasing the free page
+  // run at the end of the memory region, if any.
+  bool Trim();
+  // Iterates over all the memory slots and apply the given function.
+  void InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
+                  void* arg)
+      LOCKS_EXCLUDED(lock_);
+  // Returns the current footprint.
+  size_t Footprint() LOCKS_EXCLUDED(lock_);
+  // Returns the current capacity, maximum footprint.
+  size_t FootprintLimit() LOCKS_EXCLUDED(lock_);
+  // Update the current capacity.
+  void SetFootprintLimit(size_t bytes) LOCKS_EXCLUDED(lock_);
+  // Releases the thread-local runs assigned to the given thread back to the common set of runs.
+  void RevokeThreadLocalRuns(Thread* thread);
+  // Releases the thread-local runs assigned to all the threads back to the common set of runs.
+  void RevokeAllThreadLocalRuns() LOCKS_EXCLUDED(Locks::thread_list_lock_);
+  // Dumps the page map for debugging.
+  void DumpPageMap(Thread* self);
+
+  // Callbacks for InspectAll that will count the number of bytes
+  // allocated and objects allocated, respectively.
+  static void BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
+  static void ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
+};
+
+}  // namespace allocator
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 1789103..56d9ef4 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -83,6 +83,7 @@
     thread_list->SuspendAll();
     MarkingPhase();
     ReclaimPhase();
+    GetHeap()->RevokeAllThreadLocalBuffers();
     thread_list->ResumeAll();
     ATRACE_END();
     uint64_t pause_end = NanoTime();
@@ -101,6 +102,9 @@
       ATRACE_END();
       ATRACE_BEGIN("All mutator threads suspended");
       done = HandleDirtyObjectsPhase();
+      if (done) {
+        GetHeap()->RevokeAllThreadLocalBuffers();
+      }
       ATRACE_END();
       uint64_t pause_end = NanoTime();
       ATRACE_BEGIN("Resuming mutator threads");
@@ -135,7 +139,7 @@
       if (live_bitmap != mark_bitmap) {
         heap_->GetLiveBitmap()->ReplaceBitmap(live_bitmap, mark_bitmap);
         heap_->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
-        space->AsDlMallocSpace()->SwapBitmaps();
+        space->AsMallocSpace()->SwapBitmaps();
       }
     }
   }
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 61b3f09..58068b1 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -613,8 +613,8 @@
 }
 
 void MarkSweep::BindLiveToMarkBitmap(space::ContinuousSpace* space) {
-  CHECK(space->IsDlMallocSpace());
-  space::DlMallocSpace* alloc_space = space->AsDlMallocSpace();
+  CHECK(space->IsMallocSpace());
+  space::MallocSpace* alloc_space = space->AsMallocSpace();
   accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
   accounting::SpaceBitmap* mark_bitmap = alloc_space->BindLiveToMarkBitmap();
   GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
@@ -1126,7 +1126,7 @@
 }
 
 void MarkSweep::SweepArray(accounting::ObjectStack* allocations, bool swap_bitmaps) {
-  space::DlMallocSpace* space = heap_->GetNonMovingSpace();
+  space::MallocSpace* space = heap_->GetNonMovingSpace();
   timings_.StartSplit("SweepArray");
   // Newly allocated objects MUST be in the alloc space and those are the only objects which we are
   // going to free.
@@ -1212,7 +1212,7 @@
   scc.mark_sweep = this;
   scc.self = Thread::Current();
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (!space->IsDlMallocSpace()) {
+    if (!space->IsMallocSpace()) {
       continue;
     }
     // We always sweep always collect spaces.
@@ -1224,7 +1224,7 @@
     if (sweep_space) {
       uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin());
       uintptr_t end = reinterpret_cast<uintptr_t>(space->End());
-      scc.space = space->AsDlMallocSpace();
+      scc.space = space->AsMallocSpace();
       accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
       accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
       if (swap_bitmaps) {
@@ -1274,7 +1274,7 @@
 
 void MarkSweep::CheckReference(const Object* obj, const Object* ref, MemberOffset offset, bool is_static) {
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsDlMallocSpace() && space->Contains(ref)) {
+    if (space->IsMallocSpace() && space->Contains(ref)) {
       DCHECK(IsMarked(obj));
 
       bool is_marked = IsMarked(ref);
@@ -1424,8 +1424,8 @@
 void MarkSweep::UnBindBitmaps() {
   TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsDlMallocSpace()) {
-      space::DlMallocSpace* alloc_space = space->AsDlMallocSpace();
+    if (space->IsMallocSpace()) {
+      space::MallocSpace* alloc_space = space->AsMallocSpace();
       if (alloc_space->temp_bitmap_.get() != NULL) {
         // At this point, the temp_bitmap holds our old mark bitmap.
         accounting::SpaceBitmap* new_bitmap = alloc_space->temp_bitmap_.release();
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index ba98314..00794d6 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -368,8 +368,8 @@
 }
 
 void SemiSpace::BindLiveToMarkBitmap(space::ContinuousSpace* space) {
-  CHECK(space->IsDlMallocSpace());
-  space::DlMallocSpace* alloc_space = space->AsDlMallocSpace();
+  CHECK(space->IsMallocSpace());
+  space::MallocSpace* alloc_space = space->AsMallocSpace();
   accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
   accounting::SpaceBitmap* mark_bitmap = alloc_space->BindLiveToMarkBitmap();
   GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
@@ -433,7 +433,7 @@
   scc.mark_sweep = this;
   scc.self = Thread::Current();
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (!space->IsDlMallocSpace()) {
+    if (!space->IsMallocSpace()) {
       continue;
     }
     // We always sweep always collect spaces.
@@ -442,10 +442,10 @@
       // We sweep full collect spaces when the GC isn't a partial GC (ie its full).
       sweep_space = (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect);
     }
-    if (sweep_space && space->IsDlMallocSpace()) {
+    if (sweep_space && space->IsMallocSpace()) {
       uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin());
       uintptr_t end = reinterpret_cast<uintptr_t>(space->End());
-      scc.space = space->AsDlMallocSpace();
+      scc.space = space->AsMallocSpace();
       accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
       accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
       if (swap_bitmaps) {
@@ -550,8 +550,8 @@
 void SemiSpace::UnBindBitmaps() {
   TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsDlMallocSpace()) {
-      space::DlMallocSpace* alloc_space = space->AsDlMallocSpace();
+    if (space->IsMallocSpace()) {
+      space::MallocSpace* alloc_space = space->AsMallocSpace();
       if (alloc_space->HasBoundBitmaps()) {
         alloc_space->UnBindBitmaps();
         heap_->GetMarkBitmap()->ReplaceBitmap(alloc_space->GetLiveBitmap(),
diff --git a/runtime/gc/collector/sticky_mark_sweep.cc b/runtime/gc/collector/sticky_mark_sweep.cc
index b27b8df..ee6077a 100644
--- a/runtime/gc/collector/sticky_mark_sweep.cc
+++ b/runtime/gc/collector/sticky_mark_sweep.cc
@@ -38,7 +38,7 @@
   // know what was allocated since the last GC. A side-effect of binding the allocation space mark
   // and live bitmap is that marking the objects will place them in the live bitmap.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsDlMallocSpace() &&
+    if (space->IsMallocSpace() &&
         space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) {
       BindLiveToMarkBitmap(space);
     }
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 1d3c0d8..e6829e2 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -23,6 +23,7 @@
 #include "gc/space/bump_pointer_space-inl.h"
 #include "gc/space/dlmalloc_space-inl.h"
 #include "gc/space/large_object_space.h"
+#include "gc/space/rosalloc_space-inl.h"
 #include "object_utils.h"
 #include "runtime.h"
 #include "thread.h"
@@ -41,7 +42,15 @@
                                                                    &obj, &bytes_allocated);
   if (LIKELY(!large_object_allocation)) {
     // Non-large object allocation.
-    obj = AllocateUninstrumented(self, non_moving_space_, byte_count, &bytes_allocated);
+    if (!kUseRosAlloc) {
+      DCHECK(non_moving_space_->IsDlMallocSpace());
+      obj = AllocateUninstrumented(self, reinterpret_cast<space::DlMallocSpace*>(non_moving_space_),
+                                   byte_count, &bytes_allocated);
+    } else {
+      DCHECK(non_moving_space_->IsRosAllocSpace());
+      obj = AllocateUninstrumented(self, reinterpret_cast<space::RosAllocSpace*>(non_moving_space_),
+                                   byte_count, &bytes_allocated);
+    }
     // Ensure that we did not allocate into a zygote space.
     DCHECK(obj == NULL || !have_zygote_space_ || !FindSpaceFromObject(obj, false)->IsZygoteSpace());
   }
@@ -131,6 +140,16 @@
   return space->AllocNonvirtual(self, alloc_size, bytes_allocated);
 }
 
+// RosAllocSpace-specific version.
+inline mirror::Object* Heap::TryToAllocateUninstrumented(Thread* self, space::RosAllocSpace* space, size_t alloc_size,
+                                                         bool grow, size_t* bytes_allocated) {
+  if (UNLIKELY(IsOutOfMemoryOnAllocation(alloc_size, grow))) {
+    return NULL;
+  }
+  DCHECK(!running_on_valgrind_);
+  return space->AllocNonvirtual(self, alloc_size, bytes_allocated);
+}
+
 template <class T>
 inline mirror::Object* Heap::AllocateUninstrumented(Thread* self, T* space, size_t alloc_size,
                                                     size_t* bytes_allocated) {
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index f446fcf..763bfe9 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -41,6 +41,7 @@
 #include "gc/space/dlmalloc_space-inl.h"
 #include "gc/space/image_space.h"
 #include "gc/space/large_object_space.h"
+#include "gc/space/rosalloc_space-inl.h"
 #include "gc/space/space-inl.h"
 #include "heap-inl.h"
 #include "image.h"
@@ -167,9 +168,13 @@
   }
 
   const char* name = Runtime::Current()->IsZygote() ? "zygote space" : "alloc space";
-  non_moving_space_ = space::DlMallocSpace::Create(name, initial_size, growth_limit, capacity,
-                                                   requested_alloc_space_begin);
-
+  if (!kUseRosAlloc) {
+    non_moving_space_ = space::DlMallocSpace::Create(name, initial_size, growth_limit, capacity,
+                                                     requested_alloc_space_begin);
+  } else {
+    non_moving_space_ = space::RosAllocSpace::Create(name, initial_size, growth_limit, capacity,
+                                                     requested_alloc_space_begin);
+  }
   if (kMovingCollector) {
     // TODO: Place bump-pointer spaces somewhere to minimize size of card table.
     // TODO: Having 3+ spaces as big as the large heap size can cause virtual memory fragmentation
@@ -482,8 +487,8 @@
     }
 
     continuous_spaces_.push_back(continuous_space);
-    if (continuous_space->IsDlMallocSpace()) {
-      non_moving_space_ = continuous_space->AsDlMallocSpace();
+    if (continuous_space->IsMallocSpace()) {
+      non_moving_space_ = continuous_space->AsMallocSpace();
     }
 
     // Ensure that spaces remain sorted in increasing order of start address.
@@ -501,7 +506,7 @@
       } else if (space->IsZygoteSpace()) {
         CHECK(!seen_alloc);
         seen_zygote = true;
-      } else if (space->IsDlMallocSpace()) {
+      } else if (space->IsMallocSpace()) {
         seen_alloc = true;
       }
     }
@@ -757,8 +762,15 @@
   if (!large_object_allocation && total_bytes_free >= byte_count) {
     size_t max_contiguous_allocation = 0;
     for (const auto& space : continuous_spaces_) {
-      if (space->IsDlMallocSpace()) {
-        space->AsDlMallocSpace()->Walk(MSpaceChunkCallback, &max_contiguous_allocation);
+      if (space->IsMallocSpace()) {
+        // To allow the Walk/InspectAll() to exclusively-lock the mutator
+        // lock, temporarily release the shared access to the mutator
+        // lock here by transitioning to the suspended state.
+        Locks::mutator_lock_->AssertSharedHeld(self);
+        self->TransitionFromRunnableToSuspended(kSuspended);
+        space->AsMallocSpace()->Walk(MSpaceChunkCallback, &max_contiguous_allocation);
+        self->TransitionFromSuspendedToRunnable();
+        Locks::mutator_lock_->AssertSharedHeld(self);
       }
     }
     oss << "; failed due to fragmentation (largest possible contiguous allocation "
@@ -834,7 +846,15 @@
                                                                  &bytes_allocated);
   if (LIKELY(!large_object_allocation)) {
     // Non-large object allocation.
-    obj = AllocateInstrumented(self, non_moving_space_, byte_count, &bytes_allocated);
+    if (!kUseRosAlloc) {
+      DCHECK(non_moving_space_->IsDlMallocSpace());
+      obj = AllocateInstrumented(self, reinterpret_cast<space::DlMallocSpace*>(non_moving_space_),
+                                 byte_count, &bytes_allocated);
+    } else {
+      DCHECK(non_moving_space_->IsRosAllocSpace());
+      obj = AllocateInstrumented(self, reinterpret_cast<space::RosAllocSpace*>(non_moving_space_),
+                                 byte_count, &bytes_allocated);
+    }
     // Ensure that we did not allocate into a zygote space.
     DCHECK(obj == NULL || !have_zygote_space_ || !FindSpaceFromObject(obj, false)->IsZygoteSpace());
   }
@@ -866,8 +886,8 @@
   uint64_t total_alloc_space_size = 0;
   uint64_t managed_reclaimed = 0;
   for (const auto& space : continuous_spaces_) {
-    if (space->IsDlMallocSpace() && !space->IsZygoteSpace()) {
-      gc::space::DlMallocSpace* alloc_space = space->AsDlMallocSpace();
+    if (space->IsMallocSpace() && !space->IsZygoteSpace()) {
+      gc::space::MallocSpace* alloc_space = space->AsMallocSpace();
       total_alloc_space_size += alloc_space->Size();
       managed_reclaimed += alloc_space->Trim();
     }
@@ -1101,6 +1121,19 @@
   }
 }
 
+// RosAllocSpace-specific version.
+inline mirror::Object* Heap::TryToAllocateInstrumented(Thread* self, space::RosAllocSpace* space, size_t alloc_size,
+                                                       bool grow, size_t* bytes_allocated) {
+  if (UNLIKELY(IsOutOfMemoryOnAllocation(alloc_size, grow))) {
+    return NULL;
+  }
+  if (LIKELY(!running_on_valgrind_)) {
+    return space->AllocNonvirtual(self, alloc_size, bytes_allocated);
+  } else {
+    return space->Alloc(self, alloc_size, bytes_allocated);
+  }
+}
+
 template <class T>
 inline mirror::Object* Heap::AllocateInstrumented(Thread* self, T* space, size_t alloc_size,
                                                   size_t* bytes_allocated) {
@@ -1390,14 +1423,14 @@
   }
   // Turn the current alloc space into a zygote space and obtain the new alloc space composed of
   // the remaining available heap memory.
-  space::DlMallocSpace* zygote_space = non_moving_space_;
+  space::MallocSpace* zygote_space = non_moving_space_;
   non_moving_space_ = zygote_space->CreateZygoteSpace("alloc space");
   non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
   // Change the GC retention policy of the zygote space to only collect when full.
   zygote_space->SetGcRetentionPolicy(space::kGcRetentionPolicyFullCollect);
   AddSpace(non_moving_space_);
   have_zygote_space_ = true;
-  zygote_space->InvalidateMSpace();
+  zygote_space->InvalidateAllocator();
   // Create the zygote space mod union table.
   accounting::ModUnionTable* mod_union_table =
       new accounting::ModUnionTableCardCache("zygote space mod-union table", this, zygote_space);
@@ -1639,8 +1672,8 @@
 
         // Attmept to find the class inside of the recently freed objects.
         space::ContinuousSpace* ref_space = heap_->FindContinuousSpaceFromObject(ref, true);
-        if (ref_space != nullptr && ref_space->IsDlMallocSpace()) {
-          space::DlMallocSpace* space = ref_space->AsDlMallocSpace();
+        if (ref_space != nullptr && ref_space->IsMallocSpace()) {
+          space::MallocSpace* space = ref_space->AsMallocSpace();
           mirror::Class* ref_class = space->FindRecentFreedObject(ref);
           if (ref_class != nullptr) {
             LOG(ERROR) << "Reference " << ref << " found as a recently freed object with class "
@@ -2280,6 +2313,14 @@
   }
 }
 
+void Heap::RevokeThreadLocalBuffers(Thread* thread) {
+  non_moving_space_->RevokeThreadLocalBuffers(thread);
+}
+
+void Heap::RevokeAllThreadLocalBuffers() {
+  non_moving_space_->RevokeAllThreadLocalBuffers();
+}
+
 bool Heap::IsGCRequestPending() const {
   return concurrent_start_bytes_ != std::numeric_limits<size_t>::max();
 }
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 08bec99..6e8890c 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -69,6 +69,8 @@
   class DlMallocSpace;
   class ImageSpace;
   class LargeObjectSpace;
+  class MallocSpace;
+  class RosAllocSpace;
   class Space;
   class SpaceTest;
   class ContinuousMemMapAllocSpace;
@@ -106,6 +108,9 @@
 };
 static constexpr HeapVerificationMode kDesiredHeapVerification = kNoHeapVerification;
 
+// If true, use rosalloc/RosAllocSpace instead of dlmalloc/DlMallocSpace
+static constexpr bool kUseRosAlloc = false;
+
 class Heap {
  public:
   // If true, measure the total allocation time.
@@ -413,6 +418,9 @@
   // Trim the managed and native heaps by releasing unused memory back to the OS.
   void Trim();
 
+  void RevokeThreadLocalBuffers(Thread* thread);
+  void RevokeAllThreadLocalBuffers();
+
   accounting::HeapBitmap* GetLiveBitmap() SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
     return live_bitmap_.get();
   }
@@ -447,7 +455,7 @@
   // Assumes there is only one image space.
   space::ImageSpace* GetImageSpace() const;
 
-  space::DlMallocSpace* GetNonMovingSpace() const {
+  space::MallocSpace* GetNonMovingSpace() const {
     return non_moving_space_;
   }
 
@@ -539,6 +547,12 @@
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Try to allocate a number of bytes, this function never does any GCs. RosAllocSpace-specialized version.
+  mirror::Object* TryToAllocateInstrumented(Thread* self, space::RosAllocSpace* space, size_t alloc_size,
+                                            bool grow, size_t* bytes_allocated)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   mirror::Object* TryToAllocateUninstrumented(Thread* self, space::AllocSpace* space, size_t alloc_size,
                                               bool grow, size_t* bytes_allocated)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
@@ -549,6 +563,11 @@
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  mirror::Object* TryToAllocateUninstrumented(Thread* self, space::RosAllocSpace* space, size_t alloc_size,
+                                              bool grow, size_t* bytes_allocated)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   void ThrowOutOfMemoryError(Thread* self, size_t byte_count, bool large_object_allocation)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool IsOutOfMemoryOnAllocation(size_t alloc_size, bool grow);
@@ -642,7 +661,7 @@
 
   // A space where non-movable objects are allocated, when compaction is enabled it contains
   // Classes, ArtMethods, ArtFields, and non moving objects.
-  space::DlMallocSpace* non_moving_space_;
+  space::MallocSpace* non_moving_space_;
 
   // The large object space we are currently allocating into.
   space::LargeObjectSpace* large_object_space_;
diff --git a/runtime/gc/space/dlmalloc_space-inl.h b/runtime/gc/space/dlmalloc_space-inl.h
index fb2c66b..fbbba1f 100644
--- a/runtime/gc/space/dlmalloc_space-inl.h
+++ b/runtime/gc/space/dlmalloc_space-inl.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_GC_SPACE_DLMALLOC_SPACE_INL_H_
 
 #include "dlmalloc_space.h"
+#include "thread.h"
 
 namespace art {
 namespace gc {
@@ -28,7 +29,7 @@
   mirror::Object* obj;
   {
     MutexLock mu(self, lock_);
-    obj = AllocWithoutGrowthLocked(num_bytes, bytes_allocated);
+    obj = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated);
   }
   if (LIKELY(obj != NULL)) {
     // Zero freshly allocated memory, done while not holding the space's lock.
@@ -37,7 +38,8 @@
   return obj;
 }
 
-inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(size_t num_bytes, size_t* bytes_allocated) {
+inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(Thread* /*self*/, size_t num_bytes,
+                                                               size_t* bytes_allocated) {
   mirror::Object* result = reinterpret_cast<mirror::Object*>(mspace_malloc(mspace_, num_bytes));
   if (LIKELY(result != NULL)) {
     if (kDebugSpaces) {
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index 1c7aa22..fcac588 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -13,13 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 #include "dlmalloc_space.h"
+
 #include "dlmalloc_space-inl.h"
 #include "gc/accounting/card_table.h"
 #include "gc/heap.h"
+#include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "runtime.h"
 #include "thread.h"
+#include "thread_list.h"
 #include "utils.h"
 
 #include <valgrind.h>
@@ -29,16 +33,6 @@
 namespace gc {
 namespace space {
 
-// TODO: Remove define macro
-#define CHECK_MEMORY_CALL(call, args, what) \
-  do { \
-    int rc = call args; \
-    if (UNLIKELY(rc != 0)) { \
-      errno = rc; \
-      PLOG(FATAL) << # call << " failed for " << what; \
-    } \
-  } while (false)
-
 static const bool kPrefetchDuringDlMallocFreeList = true;
 
 // Number of bytes to use as a red zone (rdz). A red zone of this size will be placed before and
@@ -114,81 +108,38 @@
   DISALLOW_COPY_AND_ASSIGN(ValgrindDlMallocSpace);
 };
 
-size_t DlMallocSpace::bitmap_index_ = 0;
-
 DlMallocSpace::DlMallocSpace(const std::string& name, MemMap* mem_map, void* mspace, byte* begin,
-                       byte* end, byte* limit, size_t growth_limit)
-    : ContinuousMemMapAllocSpace(name, mem_map, begin, end, limit, kGcRetentionPolicyAlwaysCollect),
-      recent_free_pos_(0), total_bytes_freed_(0), total_objects_freed_(0),
-      lock_("allocation space lock", kAllocSpaceLock), mspace_(mspace),
-      growth_limit_(growth_limit) {
+                             byte* end, byte* limit, size_t growth_limit)
+    : MallocSpace(name, mem_map, begin, end, limit, growth_limit),
+      total_bytes_freed_(0), total_objects_freed_(0), mspace_(mspace) {
   CHECK(mspace != NULL);
-  size_t bitmap_index = bitmap_index_++;
-  static const uintptr_t kGcCardSize = static_cast<uintptr_t>(accounting::CardTable::kCardSize);
-  CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->Begin())));
-  CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->End())));
-  live_bitmap_.reset(accounting::SpaceBitmap::Create(
-      StringPrintf("allocspace %s live-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
-      Begin(), Capacity()));
-  DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace live bitmap #" << bitmap_index;
-  mark_bitmap_.reset(accounting::SpaceBitmap::Create(
-      StringPrintf("allocspace %s mark-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
-      Begin(), Capacity()));
-  DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace mark bitmap #" << bitmap_index;
-  for (auto& freed : recent_freed_objects_) {
-    freed.first = nullptr;
-    freed.second = nullptr;
-  }
 }
 
-DlMallocSpace* DlMallocSpace::Create(const std::string& name, size_t initial_size, size_t
-                                     growth_limit, size_t capacity, byte* requested_begin) {
-  // Memory we promise to dlmalloc before it asks for morecore.
-  // Note: making this value large means that large allocations are unlikely to succeed as dlmalloc
-  // will ask for this memory from sys_alloc which will fail as the footprint (this value plus the
-  // size of the large allocation) will be greater than the footprint limit.
-  size_t starting_size = kPageSize;
+DlMallocSpace* DlMallocSpace::Create(const std::string& name, size_t initial_size, size_t growth_limit,
+                                     size_t capacity, byte* requested_begin) {
   uint64_t start_time = 0;
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     start_time = NanoTime();
-    VLOG(startup) << "Space::CreateAllocSpace entering " << name
+    VLOG(startup) << "DlMallocSpace::Create entering " << name
                   << " initial_size=" << PrettySize(initial_size)
                   << " growth_limit=" << PrettySize(growth_limit)
                   << " capacity=" << PrettySize(capacity)
                   << " requested_begin=" << reinterpret_cast<void*>(requested_begin);
   }
 
-  // Sanity check arguments
-  if (starting_size > initial_size) {
-    initial_size = starting_size;
-  }
-  if (initial_size > growth_limit) {
-    LOG(ERROR) << "Failed to create alloc space (" << name << ") where the initial size ("
-        << PrettySize(initial_size) << ") is larger than its capacity ("
-        << PrettySize(growth_limit) << ")";
+  // Memory we promise to dlmalloc before it asks for morecore.
+  // Note: making this value large means that large allocations are unlikely to succeed as dlmalloc
+  // will ask for this memory from sys_alloc which will fail as the footprint (this value plus the
+  // size of the large allocation) will be greater than the footprint limit.
+  size_t starting_size = kPageSize;
+  MemMap* mem_map = CreateMemMap(name, starting_size, &initial_size, &growth_limit, &capacity,
+                                 requested_begin);
+  if (mem_map == NULL) {
+    LOG(ERROR) << "Failed to create mem map for alloc space (" << name << ") of size "
+               << PrettySize(capacity);
     return NULL;
   }
-  if (growth_limit > capacity) {
-    LOG(ERROR) << "Failed to create alloc space (" << name << ") where the growth limit capacity ("
-        << PrettySize(growth_limit) << ") is larger than the capacity ("
-        << PrettySize(capacity) << ")";
-    return NULL;
-  }
-
-  // Page align growth limit and capacity which will be used to manage mmapped storage
-  growth_limit = RoundUp(growth_limit, kPageSize);
-  capacity = RoundUp(capacity, kPageSize);
-
-  std::string error_msg;
-  UniquePtr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), requested_begin, capacity,
-                                                 PROT_READ | PROT_WRITE, &error_msg));
-  if (mem_map.get() == NULL) {
-    LOG(ERROR) << "Failed to allocate pages for alloc space (" << name << ") of size "
-        << PrettySize(capacity) << ": " << error_msg;
-    return NULL;
-  }
-
-  void* mspace = CreateMallocSpace(mem_map->Begin(), starting_size, initial_size);
+  void* mspace = CreateMspace(mem_map->Begin(), starting_size, initial_size);
   if (mspace == NULL) {
     LOG(ERROR) << "Failed to initialize mspace for alloc space (" << name << ")";
     return NULL;
@@ -201,24 +152,23 @@
   }
 
   // Everything is set so record in immutable structure and leave
-  MemMap* mem_map_ptr = mem_map.release();
   DlMallocSpace* space;
-  byte* begin = mem_map_ptr->Begin();
+  byte* begin = mem_map->Begin();
   if (RUNNING_ON_VALGRIND > 0) {
-    space = new ValgrindDlMallocSpace(name, mem_map_ptr, mspace, begin, end, begin + capacity,
+    space = new ValgrindDlMallocSpace(name, mem_map, mspace, begin, end, begin + capacity,
                                       growth_limit, initial_size);
   } else {
-    space = new DlMallocSpace(name, mem_map_ptr, mspace, begin, end, begin + capacity, growth_limit);
+    space = new DlMallocSpace(name, mem_map, mspace, begin, end, begin + capacity, growth_limit);
   }
   // We start out with only the initial size possibly containing objects.
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
-    LOG(INFO) << "Space::CreateAllocSpace exiting (" << PrettyDuration(NanoTime() - start_time)
+    LOG(INFO) << "DlMallocSpace::Create exiting (" << PrettyDuration(NanoTime() - start_time)
         << " ) " << *space;
   }
   return space;
 }
 
-void* DlMallocSpace::CreateMallocSpace(void* begin, size_t morecore_start, size_t initial_size) {
+void* DlMallocSpace::CreateMspace(void* begin, size_t morecore_start, size_t initial_size) {
   // clear errno to allow PLOG on error
   errno = 0;
   // create mspace using our backing storage starting at begin and with a footprint of
@@ -234,14 +184,6 @@
   return msp;
 }
 
-void DlMallocSpace::SwapBitmaps() {
-  live_bitmap_.swap(mark_bitmap_);
-  // Swap names to get more descriptive diagnostics.
-  std::string temp_name(live_bitmap_->GetName());
-  live_bitmap_->SetName(mark_bitmap_->GetName());
-  mark_bitmap_->SetName(temp_name);
-}
-
 mirror::Object* DlMallocSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
   return AllocNonvirtual(self, num_bytes, bytes_allocated);
 }
@@ -250,11 +192,11 @@
   mirror::Object* result;
   {
     MutexLock mu(self, lock_);
-    // Grow as much as possible within the mspace.
+    // Grow as much as possible within the space.
     size_t max_allowed = Capacity();
     mspace_set_footprint_limit(mspace_, max_allowed);
     // Try the allocation.
-    result = AllocWithoutGrowthLocked(num_bytes, bytes_allocated);
+    result = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated);
     // Shrink back down as small as possible.
     size_t footprint = mspace_footprint(mspace_);
     mspace_set_footprint_limit(mspace_, footprint);
@@ -268,83 +210,9 @@
   return result;
 }
 
-void DlMallocSpace::SetGrowthLimit(size_t growth_limit) {
-  growth_limit = RoundUp(growth_limit, kPageSize);
-  growth_limit_ = growth_limit;
-  if (Size() > growth_limit_) {
-    end_ = begin_ + growth_limit;
-  }
-}
-
-DlMallocSpace* DlMallocSpace::CreateZygoteSpace(const char* alloc_space_name) {
-  end_ = reinterpret_cast<byte*>(RoundUp(reinterpret_cast<uintptr_t>(end_), kPageSize));
-  DCHECK(IsAligned<accounting::CardTable::kCardSize>(begin_));
-  DCHECK(IsAligned<accounting::CardTable::kCardSize>(end_));
-  DCHECK(IsAligned<kPageSize>(begin_));
-  DCHECK(IsAligned<kPageSize>(end_));
-  size_t size = RoundUp(Size(), kPageSize);
-  // Trim the heap so that we minimize the size of the Zygote space.
-  Trim();
-  // TODO: Not hardcode these in?
-  const size_t starting_size = kPageSize;
-  const size_t initial_size = 2 * MB;
-  // Remaining size is for the new alloc space.
-  const size_t growth_limit = growth_limit_ - size;
-  const size_t capacity = Capacity() - size;
-  VLOG(heap) << "Begin " << reinterpret_cast<const void*>(begin_) << "\n"
-             << "End " << reinterpret_cast<const void*>(end_) << "\n"
-             << "Size " << size << "\n"
-             << "GrowthLimit " << growth_limit_ << "\n"
-             << "Capacity " << Capacity();
-  SetGrowthLimit(RoundUp(size, kPageSize));
-  SetFootprintLimit(RoundUp(size, kPageSize));
-  // FIXME: Do we need reference counted pointers here?
-  // Make the two spaces share the same mark bitmaps since the bitmaps span both of the spaces.
-  VLOG(heap) << "Creating new AllocSpace: ";
-  VLOG(heap) << "Size " << GetMemMap()->Size();
-  VLOG(heap) << "GrowthLimit " << PrettySize(growth_limit);
-  VLOG(heap) << "Capacity " << PrettySize(capacity);
-  // Remap the tail.
-  std::string error_msg;
-  UniquePtr<MemMap> mem_map(GetMemMap()->RemapAtEnd(end_, alloc_space_name,
-                                                    PROT_READ | PROT_WRITE, &error_msg));
-  CHECK(mem_map.get() != nullptr) << error_msg;
-  void* mspace = CreateMallocSpace(end_, starting_size, initial_size);
-  // Protect memory beyond the initial size.
-  byte* end = mem_map->Begin() + starting_size;
-  if (capacity - initial_size > 0) {
-    CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size, PROT_NONE), alloc_space_name);
-  }
-  DlMallocSpace* alloc_space =
-      new DlMallocSpace(alloc_space_name, mem_map.release(), mspace, end_, end, limit_,
-                        growth_limit);
-  SetLimit(End());
-  live_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
-  CHECK_EQ(live_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
-  mark_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
-  CHECK_EQ(mark_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
-  VLOG(heap) << "zygote space creation done";
-  return alloc_space;
-}
-
-mirror::Class* DlMallocSpace::FindRecentFreedObject(const mirror::Object* obj) {
-  size_t pos = recent_free_pos_;
-  // Start at the most recently freed object and work our way back since there may be duplicates
-  // caused by dlmalloc reusing memory.
-  if (kRecentFreeCount > 0) {
-    for (size_t i = 0; i + 1 < kRecentFreeCount + 1; ++i) {
-      pos = pos != 0 ? pos - 1 : kRecentFreeMask;
-      if (recent_freed_objects_[pos].first == obj) {
-        return recent_freed_objects_[pos].second;
-      }
-    }
-  }
-  return nullptr;
-}
-
-void DlMallocSpace::RegisterRecentFree(mirror::Object* ptr) {
-  recent_freed_objects_[recent_free_pos_] = std::make_pair(ptr, ptr->GetClass());
-  recent_free_pos_ = (recent_free_pos_ + 1) & kRecentFreeMask;
+MallocSpace* DlMallocSpace::CreateInstance(const std::string& name, MemMap* mem_map, void* allocator, byte* begin, byte* end,
+                                           byte* limit, size_t growth_limit) {
+  return new DlMallocSpace(name, mem_map, allocator, begin, end, limit, growth_limit);
 }
 
 size_t DlMallocSpace::Free(Thread* self, mirror::Object* ptr) {
@@ -411,40 +279,11 @@
 // Callback from dlmalloc when it needs to increase the footprint
 extern "C" void* art_heap_morecore(void* mspace, intptr_t increment) {
   Heap* heap = Runtime::Current()->GetHeap();
-  DCHECK_EQ(heap->GetNonMovingSpace()->GetMspace(), mspace);
+  DCHECK(heap->GetNonMovingSpace()->IsDlMallocSpace());
+  DCHECK_EQ(heap->GetNonMovingSpace()->AsDlMallocSpace()->GetMspace(), mspace);
   return heap->GetNonMovingSpace()->MoreCore(increment);
 }
 
-void* DlMallocSpace::MoreCore(intptr_t increment) {
-  lock_.AssertHeld(Thread::Current());
-  byte* original_end = end_;
-  if (increment != 0) {
-    VLOG(heap) << "DlMallocSpace::MoreCore " << PrettySize(increment);
-    byte* new_end = original_end + increment;
-    if (increment > 0) {
-      // Should never be asked to increase the allocation beyond the capacity of the space. Enforced
-      // by mspace_set_footprint_limit.
-      CHECK_LE(new_end, Begin() + Capacity());
-      CHECK_MEMORY_CALL(mprotect, (original_end, increment, PROT_READ | PROT_WRITE), GetName());
-    } else {
-      // Should never be asked for negative footprint (ie before begin)
-      CHECK_GT(original_end + increment, Begin());
-      // Advise we don't need the pages and protect them
-      // TODO: by removing permissions to the pages we may be causing TLB shoot-down which can be
-      // expensive (note the same isn't true for giving permissions to a page as the protected
-      // page shouldn't be in a TLB). We should investigate performance impact of just
-      // removing ignoring the memory protection change here and in Space::CreateAllocSpace. It's
-      // likely just a useful debug feature.
-      size_t size = -increment;
-      CHECK_MEMORY_CALL(madvise, (new_end, size, MADV_DONTNEED), GetName());
-      CHECK_MEMORY_CALL(mprotect, (new_end, size, PROT_NONE), GetName());
-    }
-    // Update end_
-    end_ = new_end;
-  }
-  return original_end;
-}
-
 // Virtual functions can't get inlined.
 inline size_t DlMallocSpace::InternalAllocationSize(const mirror::Object* obj) {
   return AllocationSizeNonvirtual(obj);
@@ -481,32 +320,9 @@
   return mspace_footprint_limit(mspace_);
 }
 
-// Returns the old mark bitmap.
-accounting::SpaceBitmap* DlMallocSpace::BindLiveToMarkBitmap() {
-  accounting::SpaceBitmap* live_bitmap = GetLiveBitmap();
-  accounting::SpaceBitmap* mark_bitmap = mark_bitmap_.release();
-  temp_bitmap_.reset(mark_bitmap);
-  mark_bitmap_.reset(live_bitmap);
-  return mark_bitmap;
-}
-
-bool DlMallocSpace::HasBoundBitmaps() const {
-  return temp_bitmap_.get() != nullptr;
-}
-
-void DlMallocSpace::UnBindBitmaps() {
-  CHECK(HasBoundBitmaps());
-  // At this point, the temp_bitmap holds our old mark bitmap.
-  accounting::SpaceBitmap* new_bitmap = temp_bitmap_.release();
-  CHECK_EQ(mark_bitmap_.release(), live_bitmap_.get());
-  mark_bitmap_.reset(new_bitmap);
-  DCHECK(temp_bitmap_.get() == NULL);
-}
-
-
 void DlMallocSpace::SetFootprintLimit(size_t new_size) {
   MutexLock mu(Thread::Current(), lock_);
-  VLOG(heap) << "DLMallocSpace::SetFootprintLimit " << PrettySize(new_size);
+  VLOG(heap) << "DlMallocSpace::SetFootprintLimit " << PrettySize(new_size);
   // Compare against the actual footprint, rather than the Size(), because the heap may not have
   // grown all the way to the allowed size yet.
   size_t current_space_size = mspace_footprint(mspace_);
@@ -517,14 +333,6 @@
   mspace_set_footprint_limit(mspace_, new_size);
 }
 
-void DlMallocSpace::Dump(std::ostream& os) const {
-  os << GetType()
-      << " begin=" << reinterpret_cast<void*>(Begin())
-      << ",end=" << reinterpret_cast<void*>(End())
-      << ",size=" << PrettySize(Size()) << ",capacity=" << PrettySize(Capacity())
-      << ",name=\"" << GetName() << "\"]";
-}
-
 uint64_t DlMallocSpace::GetBytesAllocated() {
   if (mspace_ != nullptr) {
     MutexLock mu(Thread::Current(), lock_);
@@ -547,6 +355,12 @@
   }
 }
 
+#ifndef NDEBUG
+void DlMallocSpace::CheckMoreCoreForPrecondition() {
+  lock_.AssertHeld(Thread::Current());
+}
+#endif
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/dlmalloc_space.h b/runtime/gc/space/dlmalloc_space.h
index 59dafe3..63b1c21 100644
--- a/runtime/gc/space/dlmalloc_space.h
+++ b/runtime/gc/space/dlmalloc_space.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_GC_SPACE_DLMALLOC_SPACE_H_
 
 #include "gc/allocator/dlmalloc.h"
+#include "malloc_space.h"
 #include "space.h"
 
 namespace art {
@@ -30,33 +31,19 @@
 namespace space {
 
 // An alloc space is a space where objects may be allocated and garbage collected.
-class DlMallocSpace : public ContinuousMemMapAllocSpace {
+class DlMallocSpace : public MallocSpace {
  public:
-  typedef void(*WalkCallback)(void *start, void *end, size_t num_bytes, void* callback_arg);
 
-  SpaceType GetType() const {
-    if (GetGcRetentionPolicy() == kGcRetentionPolicyFullCollect) {
-      return kSpaceTypeZygoteSpace;
-    } else {
-      return kSpaceTypeAllocSpace;
-    }
-  }
-
-  // Create a AllocSpace with the requested sizes. The requested
+  // Create a DlMallocSpace with the requested sizes. The requested
   // base address is not guaranteed to be granted, if it is required,
-  // the caller should call Begin on the returned space to confirm
-  // the request was granted.
+  // the caller should call Begin on the returned space to confirm the
+  // request was granted.
   static DlMallocSpace* Create(const std::string& name, size_t initial_size, size_t growth_limit,
                                size_t capacity, byte* requested_begin);
 
-  // Allocate num_bytes without allowing the underlying mspace to grow.
   virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
                                           size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
-
-  // Allocate num_bytes allowing the underlying mspace to grow.
   virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
-
-  // Return the storage space required by obj.
   virtual size_t AllocationSize(const mirror::Object* obj);
   virtual size_t Free(Thread* self, mirror::Object* ptr);
   virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs);
@@ -64,17 +51,19 @@
   mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated);
 
   size_t AllocationSizeNonvirtual(const mirror::Object* obj) {
-    return mspace_usable_size(const_cast<void*>(reinterpret_cast<const void*>(obj))) +
-        kChunkOverhead;
+    void* obj_ptr = const_cast<void*>(reinterpret_cast<const void*>(obj));
+    return mspace_usable_size(obj_ptr) + kChunkOverhead;
   }
 
-  void* MoreCore(intptr_t increment);
+#ifndef NDEBUG
+  // Override only in the debug build.
+  void CheckMoreCoreForPrecondition();
+#endif
 
   void* GetMspace() const {
     return mspace_;
   }
 
-  // Hands unused pages back to the system.
   size_t Trim();
 
   // Perform a mspace_inspect_all which calls back for each allocation chunk. The chunk may not be
@@ -93,39 +82,8 @@
   // allocations fail we GC before increasing the footprint limit and allowing the mspace to grow.
   void SetFootprintLimit(size_t limit);
 
-  // Removes the fork time growth limit on capacity, allowing the application to allocate up to the
-  // maximum reserved size of the heap.
-  void ClearGrowthLimit() {
-    growth_limit_ = NonGrowthLimitCapacity();
-  }
-
-  // Override capacity so that we only return the possibly limited capacity
-  size_t Capacity() const {
-    return growth_limit_;
-  }
-
-  // The total amount of memory reserved for the alloc space.
-  size_t NonGrowthLimitCapacity() const {
-    return GetMemMap()->Size();
-  }
-
-  accounting::SpaceBitmap* GetLiveBitmap() const {
-    return live_bitmap_.get();
-  }
-
-  accounting::SpaceBitmap* GetMarkBitmap() const {
-    return mark_bitmap_.get();
-  }
-
-  void Dump(std::ostream& os) const;
-
-  void SetGrowthLimit(size_t growth_limit);
-
-  // Swap the live and mark bitmaps of this space. This is used by the GC for concurrent sweeping.
-  void SwapBitmaps();
-
-  // Turn ourself into a zygote space and return a new alloc space which has our unused memory.
-  DlMallocSpace* CreateZygoteSpace(const char* alloc_space_name);
+  MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
+                              byte* begin, byte* end, byte* limit, size_t growth_limit);
 
   uint64_t GetBytesAllocated();
   uint64_t GetObjectsAllocated();
@@ -136,66 +94,45 @@
     return GetObjectsAllocated() + total_objects_freed_;
   }
 
-  // Returns the old mark bitmap.
-  accounting::SpaceBitmap* BindLiveToMarkBitmap();
-  bool HasBoundBitmaps() const;
-  void UnBindBitmaps();
-
   // Returns the class of a recently freed object.
   mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
 
-  // Used to ensure that failure happens when you free / allocate into an invalidated space. If we
-  // don't do this we may get heap corruption instead of a segfault at null.
-  void InvalidateMSpace() {
+  virtual void InvalidateAllocator() {
     mspace_ = nullptr;
   }
 
+  virtual bool IsDlMallocSpace() const {
+    return true;
+  }
+  virtual DlMallocSpace* AsDlMallocSpace() {
+    return this;
+  }
+
  protected:
   DlMallocSpace(const std::string& name, MemMap* mem_map, void* mspace, byte* begin, byte* end,
                 byte* limit, size_t growth_limit);
 
  private:
   size_t InternalAllocationSize(const mirror::Object* obj);
-  mirror::Object* AllocWithoutGrowthLocked(size_t num_bytes, size_t* bytes_allocated)
+
+  mirror::Object* AllocWithoutGrowthLocked(Thread* self, size_t num_bytes, size_t* bytes_allocated)
       EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  bool Init(size_t initial_size, size_t maximum_size, size_t growth_size, byte* requested_base);
-  void RegisterRecentFree(mirror::Object* ptr) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  static void* CreateMallocSpace(void* base, size_t morecore_start, size_t initial_size);
 
-  UniquePtr<accounting::SpaceBitmap> live_bitmap_;
-  UniquePtr<accounting::SpaceBitmap> mark_bitmap_;
-  UniquePtr<accounting::SpaceBitmap> temp_bitmap_;
-
-  // Recent allocation buffer.
-  static constexpr size_t kRecentFreeCount = kDebugSpaces ? (1 << 16) : 0;
-  static constexpr size_t kRecentFreeMask = kRecentFreeCount - 1;
-  std::pair<const mirror::Object*, mirror::Class*> recent_freed_objects_[kRecentFreeCount];
-  size_t recent_free_pos_;
+  void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size) {
+    return CreateMspace(base, morecore_start, initial_size);
+  }
+  static void* CreateMspace(void* base, size_t morecore_start, size_t initial_size);
 
   // Approximate number of bytes and objects which have been deallocated in the space.
   size_t total_bytes_freed_;
   size_t total_objects_freed_;
 
-  static size_t bitmap_index_;
-
   // The boundary tag overhead.
   static const size_t kChunkOverhead = kWordSize;
 
-  // Used to ensure mutual exclusion when the allocation spaces data structures are being modified.
-  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-
   // Underlying malloc space
   void* mspace_;
 
-  // The capacity of the alloc space until such time that ClearGrowthLimit is called.
-  // The underlying mem_map_ controls the maximum size we allow the heap to grow to. The growth
-  // limit is a value <= to the mem_map_ capacity used for ergonomic reasons because of the zygote.
-  // Prior to forking the zygote the heap will have a maximally sized mem_map_ but the growth_limit_
-  // will be set to a lower value. The growth_limit_ is used as the capacity of the alloc_space_,
-  // however, capacity normally can't vary. In the case of the growth_limit_ it can be cleared
-  // one time by a call to ClearGrowthLimit.
-  size_t growth_limit_;
-
   friend class collector::MarkSweep;
 
   DISALLOW_COPY_AND_ASSIGN(DlMallocSpace);
diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h
index 07fb288..d374ad3 100644
--- a/runtime/gc/space/large_object_space.h
+++ b/runtime/gc/space/large_object_space.h
@@ -116,7 +116,8 @@
   virtual ~FreeListSpace();
   static FreeListSpace* Create(const std::string& name, byte* requested_begin, size_t capacity);
 
-  size_t AllocationSize(const mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  size_t AllocationSize(const mirror::Object* obj)
+      EXCLUSIVE_LOCKS_REQUIRED(lock_);
   mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
   size_t Free(Thread* self, mirror::Object* obj);
   bool Contains(const mirror::Object* obj) const;
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
new file mode 100644
index 0000000..785b5ed
--- /dev/null
+++ b/runtime/gc/space/malloc_space.cc
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "malloc_space.h"
+
+#include "gc/accounting/card_table.h"
+#include "gc/heap.h"
+#include "mirror/class-inl.h"
+#include "mirror/object-inl.h"
+#include "runtime.h"
+#include "thread.h"
+#include "thread_list.h"
+#include "utils.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+size_t MallocSpace::bitmap_index_ = 0;
+
+MallocSpace::MallocSpace(const std::string& name, MemMap* mem_map,
+                         byte* begin, byte* end, byte* limit, size_t growth_limit)
+    : ContinuousMemMapAllocSpace(name, mem_map, begin, end, limit, kGcRetentionPolicyAlwaysCollect),
+      recent_free_pos_(0), lock_("allocation space lock", kAllocSpaceLock),
+      growth_limit_(growth_limit) {
+  size_t bitmap_index = bitmap_index_++;
+  static const uintptr_t kGcCardSize = static_cast<uintptr_t>(accounting::CardTable::kCardSize);
+  CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->Begin())));
+  CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->End())));
+  live_bitmap_.reset(accounting::SpaceBitmap::Create(
+      StringPrintf("allocspace %s live-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
+      Begin(), Capacity()));
+  DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace live bitmap #" << bitmap_index;
+  mark_bitmap_.reset(accounting::SpaceBitmap::Create(
+      StringPrintf("allocspace %s mark-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
+      Begin(), Capacity()));
+  DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace mark bitmap #" << bitmap_index;
+  for (auto& freed : recent_freed_objects_) {
+    freed.first = nullptr;
+    freed.second = nullptr;
+  }
+}
+
+MemMap* MallocSpace::CreateMemMap(const std::string& name, size_t starting_size, size_t* initial_size,
+                                  size_t* growth_limit, size_t* capacity, byte* requested_begin) {
+  // Sanity check arguments
+  if (starting_size > *initial_size) {
+    *initial_size = starting_size;
+  }
+  if (*initial_size > *growth_limit) {
+    LOG(ERROR) << "Failed to create alloc space (" << name << ") where the initial size ("
+        << PrettySize(*initial_size) << ") is larger than its capacity ("
+        << PrettySize(*growth_limit) << ")";
+    return NULL;
+  }
+  if (*growth_limit > *capacity) {
+    LOG(ERROR) << "Failed to create alloc space (" << name << ") where the growth limit capacity ("
+        << PrettySize(*growth_limit) << ") is larger than the capacity ("
+        << PrettySize(*capacity) << ")";
+    return NULL;
+  }
+
+  // Page align growth limit and capacity which will be used to manage mmapped storage
+  *growth_limit = RoundUp(*growth_limit, kPageSize);
+  *capacity = RoundUp(*capacity, kPageSize);
+
+  std::string error_msg;
+  MemMap* mem_map = MemMap::MapAnonymous(name.c_str(), requested_begin, *capacity,
+                                         PROT_READ | PROT_WRITE, &error_msg);
+  if (mem_map == NULL) {
+    LOG(ERROR) << "Failed to allocate pages for alloc space (" << name << ") of size "
+               << PrettySize(*capacity) << ": " << error_msg;
+    return NULL;
+  }
+  return mem_map;
+}
+
+void MallocSpace::SwapBitmaps() {
+  live_bitmap_.swap(mark_bitmap_);
+  // Swap names to get more descriptive diagnostics.
+  std::string temp_name(live_bitmap_->GetName());
+  live_bitmap_->SetName(mark_bitmap_->GetName());
+  mark_bitmap_->SetName(temp_name);
+}
+
+mirror::Class* MallocSpace::FindRecentFreedObject(const mirror::Object* obj) {
+  size_t pos = recent_free_pos_;
+  // Start at the most recently freed object and work our way back since there may be duplicates
+  // caused by dlmalloc reusing memory.
+  if (kRecentFreeCount > 0) {
+    for (size_t i = 0; i + 1 < kRecentFreeCount + 1; ++i) {
+      pos = pos != 0 ? pos - 1 : kRecentFreeMask;
+      if (recent_freed_objects_[pos].first == obj) {
+        return recent_freed_objects_[pos].second;
+      }
+    }
+  }
+  return nullptr;
+}
+
+void MallocSpace::RegisterRecentFree(mirror::Object* ptr) {
+  recent_freed_objects_[recent_free_pos_] = std::make_pair(ptr, ptr->GetClass());
+  recent_free_pos_ = (recent_free_pos_ + 1) & kRecentFreeMask;
+}
+
+void MallocSpace::SetGrowthLimit(size_t growth_limit) {
+  growth_limit = RoundUp(growth_limit, kPageSize);
+  growth_limit_ = growth_limit;
+  if (Size() > growth_limit_) {
+    end_ = begin_ + growth_limit;
+  }
+}
+
+void* MallocSpace::MoreCore(intptr_t increment) {
+  CheckMoreCoreForPrecondition();
+  byte* original_end = end_;
+  if (increment != 0) {
+    VLOG(heap) << "MallocSpace::MoreCore " << PrettySize(increment);
+    byte* new_end = original_end + increment;
+    if (increment > 0) {
+      // Should never be asked to increase the allocation beyond the capacity of the space. Enforced
+      // by mspace_set_footprint_limit.
+      CHECK_LE(new_end, Begin() + Capacity());
+      CHECK_MEMORY_CALL(mprotect, (original_end, increment, PROT_READ | PROT_WRITE), GetName());
+    } else {
+      // Should never be asked for negative footprint (ie before begin). Zero footprint is ok.
+      CHECK_GE(original_end + increment, Begin());
+      // Advise we don't need the pages and protect them
+      // TODO: by removing permissions to the pages we may be causing TLB shoot-down which can be
+      // expensive (note the same isn't true for giving permissions to a page as the protected
+      // page shouldn't be in a TLB). We should investigate performance impact of just
+      // removing ignoring the memory protection change here and in Space::CreateAllocSpace. It's
+      // likely just a useful debug feature.
+      size_t size = -increment;
+      CHECK_MEMORY_CALL(madvise, (new_end, size, MADV_DONTNEED), GetName());
+      CHECK_MEMORY_CALL(mprotect, (new_end, size, PROT_NONE), GetName());
+    }
+    // Update end_
+    end_ = new_end;
+  }
+  return original_end;
+}
+
+// Returns the old mark bitmap.
+accounting::SpaceBitmap* MallocSpace::BindLiveToMarkBitmap() {
+  accounting::SpaceBitmap* live_bitmap = GetLiveBitmap();
+  accounting::SpaceBitmap* mark_bitmap = mark_bitmap_.release();
+  temp_bitmap_.reset(mark_bitmap);
+  mark_bitmap_.reset(live_bitmap);
+  return mark_bitmap;
+}
+
+bool MallocSpace::HasBoundBitmaps() const {
+  return temp_bitmap_.get() != nullptr;
+}
+
+void MallocSpace::UnBindBitmaps() {
+  CHECK(HasBoundBitmaps());
+  // At this point, the temp_bitmap holds our old mark bitmap.
+  accounting::SpaceBitmap* new_bitmap = temp_bitmap_.release();
+  CHECK_EQ(mark_bitmap_.release(), live_bitmap_.get());
+  mark_bitmap_.reset(new_bitmap);
+  DCHECK(temp_bitmap_.get() == NULL);
+}
+
+MallocSpace* MallocSpace::CreateZygoteSpace(const char* alloc_space_name) {
+  // For RosAlloc, revoke thread local runs before creating a new
+  // alloc space so that we won't mix thread local runs from different
+  // alloc spaces.
+  RevokeAllThreadLocalBuffers();
+  end_ = reinterpret_cast<byte*>(RoundUp(reinterpret_cast<uintptr_t>(end_), kPageSize));
+  DCHECK(IsAligned<accounting::CardTable::kCardSize>(begin_));
+  DCHECK(IsAligned<accounting::CardTable::kCardSize>(end_));
+  DCHECK(IsAligned<kPageSize>(begin_));
+  DCHECK(IsAligned<kPageSize>(end_));
+  size_t size = RoundUp(Size(), kPageSize);
+  // Trim the heap so that we minimize the size of the Zygote space.
+  Trim();
+  // TODO: Not hardcode these in?
+  const size_t starting_size = kPageSize;
+  const size_t initial_size = 2 * MB;
+  // Remaining size is for the new alloc space.
+  const size_t growth_limit = growth_limit_ - size;
+  const size_t capacity = Capacity() - size;
+  VLOG(heap) << "Begin " << reinterpret_cast<const void*>(begin_) << "\n"
+             << "End " << reinterpret_cast<const void*>(end_) << "\n"
+             << "Size " << size << "\n"
+             << "GrowthLimit " << growth_limit_ << "\n"
+             << "Capacity " << Capacity();
+  SetGrowthLimit(RoundUp(size, kPageSize));
+  SetFootprintLimit(RoundUp(size, kPageSize));
+  // FIXME: Do we need reference counted pointers here?
+  // Make the two spaces share the same mark bitmaps since the bitmaps span both of the spaces.
+  VLOG(heap) << "Creating new AllocSpace: ";
+  VLOG(heap) << "Size " << GetMemMap()->Size();
+  VLOG(heap) << "GrowthLimit " << PrettySize(growth_limit);
+  VLOG(heap) << "Capacity " << PrettySize(capacity);
+  // Remap the tail.
+  std::string error_msg;
+  UniquePtr<MemMap> mem_map(GetMemMap()->RemapAtEnd(end_, alloc_space_name,
+                                                    PROT_READ | PROT_WRITE, &error_msg));
+  CHECK(mem_map.get() != nullptr) << error_msg;
+  void* allocator = CreateAllocator(end_, starting_size, initial_size);
+  // Protect memory beyond the initial size.
+  byte* end = mem_map->Begin() + starting_size;
+  if (capacity - initial_size > 0) {
+    CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size, PROT_NONE), alloc_space_name);
+  }
+  MallocSpace* alloc_space = CreateInstance(alloc_space_name, mem_map.release(), allocator,
+                                            end_, end, limit_, growth_limit);
+  SetLimit(End());
+  live_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
+  CHECK_EQ(live_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
+  mark_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
+  CHECK_EQ(mark_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
+  VLOG(heap) << "zygote space creation done";
+  return alloc_space;
+}
+
+void MallocSpace::Dump(std::ostream& os) const {
+  os << GetType()
+      << " begin=" << reinterpret_cast<void*>(Begin())
+      << ",end=" << reinterpret_cast<void*>(End())
+      << ",size=" << PrettySize(Size()) << ",capacity=" << PrettySize(Capacity())
+      << ",name=\"" << GetName() << "\"]";
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h
new file mode 100644
index 0000000..bd21a4d
--- /dev/null
+++ b/runtime/gc/space/malloc_space.h
@@ -0,0 +1,191 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_MALLOC_SPACE_H_
+#define ART_RUNTIME_GC_SPACE_MALLOC_SPACE_H_
+
+#include "space.h"
+
+namespace art {
+namespace gc {
+
+namespace collector {
+  class MarkSweep;
+}  // namespace collector
+
+namespace space {
+
+// TODO: Remove define macro
+#define CHECK_MEMORY_CALL(call, args, what) \
+  do { \
+    int rc = call args; \
+    if (UNLIKELY(rc != 0)) { \
+      errno = rc; \
+      PLOG(FATAL) << # call << " failed for " << what; \
+    } \
+  } while (false)
+
+// const bool kUseRosAlloc = true;
+
+// A common parent of DlMallocSpace and RosAllocSpace.
+class MallocSpace : public ContinuousMemMapAllocSpace {
+ public:
+  typedef void(*WalkCallback)(void *start, void *end, size_t num_bytes, void* callback_arg);
+
+  SpaceType GetType() const {
+    if (GetGcRetentionPolicy() == kGcRetentionPolicyFullCollect) {
+      return kSpaceTypeZygoteSpace;
+    } else {
+      return kSpaceTypeAllocSpace;
+    }
+  }
+
+  // Allocate num_bytes without allowing the underlying space to grow.
+  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
+                                          size_t* bytes_allocated) = 0;
+  // Allocate num_bytes allowing the underlying space to grow.
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) = 0;
+  // Return the storage space required by obj.
+  virtual size_t AllocationSize(const mirror::Object* obj) = 0;
+  virtual size_t Free(Thread* self, mirror::Object* ptr) = 0;
+  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) = 0;
+
+#ifndef NDEBUG
+  virtual void CheckMoreCoreForPrecondition() {}  // to be overridden in the debug build.
+#else
+  void CheckMoreCoreForPrecondition() {}  // no-op in the non-debug build.
+#endif
+
+  void* MoreCore(intptr_t increment);
+
+  // Hands unused pages back to the system.
+  virtual size_t Trim() = 0;
+
+  // Perform a mspace_inspect_all which calls back for each allocation chunk. The chunk may not be
+  // in use, indicated by num_bytes equaling zero.
+  virtual void Walk(WalkCallback callback, void* arg) = 0;
+
+  // Returns the number of bytes that the space has currently obtained from the system. This is
+  // greater or equal to the amount of live data in the space.
+  virtual size_t GetFootprint() = 0;
+
+  // Returns the number of bytes that the heap is allowed to obtain from the system via MoreCore.
+  virtual size_t GetFootprintLimit() = 0;
+
+  // Set the maximum number of bytes that the heap is allowed to obtain from the system via
+  // MoreCore. Note this is used to stop the mspace growing beyond the limit to Capacity. When
+  // allocations fail we GC before increasing the footprint limit and allowing the mspace to grow.
+  virtual void SetFootprintLimit(size_t limit) = 0;
+
+  // Removes the fork time growth limit on capacity, allowing the application to allocate up to the
+  // maximum reserved size of the heap.
+  void ClearGrowthLimit() {
+    growth_limit_ = NonGrowthLimitCapacity();
+  }
+
+  // Override capacity so that we only return the possibly limited capacity
+  size_t Capacity() const {
+    return growth_limit_;
+  }
+
+  // The total amount of memory reserved for the alloc space.
+  size_t NonGrowthLimitCapacity() const {
+    return GetMemMap()->Size();
+  }
+
+  accounting::SpaceBitmap* GetLiveBitmap() const {
+    return live_bitmap_.get();
+  }
+
+  accounting::SpaceBitmap* GetMarkBitmap() const {
+    return mark_bitmap_.get();
+  }
+
+  void Dump(std::ostream& os) const;
+
+  void SetGrowthLimit(size_t growth_limit);
+
+  // Swap the live and mark bitmaps of this space. This is used by the GC for concurrent sweeping.
+  void SwapBitmaps();
+
+  virtual MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
+                                      byte* begin, byte* end, byte* limit, size_t growth_limit) = 0;
+
+  // Turn ourself into a zygote space and return a new alloc space which has our unused memory.
+  MallocSpace* CreateZygoteSpace(const char* alloc_space_name);
+
+  virtual uint64_t GetBytesAllocated() = 0;
+  virtual uint64_t GetObjectsAllocated() = 0;
+  virtual uint64_t GetTotalBytesAllocated() = 0;
+  virtual uint64_t GetTotalObjectsAllocated() = 0;
+
+  // Returns the old mark bitmap.
+  accounting::SpaceBitmap* BindLiveToMarkBitmap();
+  bool HasBoundBitmaps() const;
+  void UnBindBitmaps();
+
+  // Returns the class of a recently freed object.
+  mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
+
+  // Used to ensure that failure happens when you free / allocate into an invalidated space. If we
+  // don't do this we may get heap corruption instead of a segfault at null.
+  virtual void InvalidateAllocator() = 0;
+
+ protected:
+  MallocSpace(const std::string& name, MemMap* mem_map, byte* begin, byte* end,
+              byte* limit, size_t growth_limit);
+
+  static MemMap* CreateMemMap(const std::string& name, size_t starting_size, size_t* initial_size,
+                              size_t* growth_limit, size_t* capacity, byte* requested_begin);
+
+  virtual void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size) = 0;
+
+  void RegisterRecentFree(mirror::Object* ptr) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
+  UniquePtr<accounting::SpaceBitmap> live_bitmap_;
+  UniquePtr<accounting::SpaceBitmap> mark_bitmap_;
+  UniquePtr<accounting::SpaceBitmap> temp_bitmap_;
+
+  // Recent allocation buffer.
+  static constexpr size_t kRecentFreeCount = kDebugSpaces ? (1 << 16) : 0;
+  static constexpr size_t kRecentFreeMask = kRecentFreeCount - 1;
+  std::pair<const mirror::Object*, mirror::Class*> recent_freed_objects_[kRecentFreeCount];
+  size_t recent_free_pos_;
+
+  static size_t bitmap_index_;
+
+  // Used to ensure mutual exclusion when the allocation spaces data structures are being modified.
+  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+
+  // The capacity of the alloc space until such time that ClearGrowthLimit is called.
+  // The underlying mem_map_ controls the maximum size we allow the heap to grow to. The growth
+  // limit is a value <= to the mem_map_ capacity used for ergonomic reasons because of the zygote.
+  // Prior to forking the zygote the heap will have a maximally sized mem_map_ but the growth_limit_
+  // will be set to a lower value. The growth_limit_ is used as the capacity of the alloc_space_,
+  // however, capacity normally can't vary. In the case of the growth_limit_ it can be cleared
+  // one time by a call to ClearGrowthLimit.
+  size_t growth_limit_;
+
+  friend class collector::MarkSweep;
+
+  DISALLOW_COPY_AND_ASSIGN(MallocSpace);
+};
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_DLMALLOC_SPACE_H_
diff --git a/runtime/gc/space/rosalloc_space-inl.h b/runtime/gc/space/rosalloc_space-inl.h
new file mode 100644
index 0000000..92c69af
--- /dev/null
+++ b/runtime/gc/space/rosalloc_space-inl.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_INL_H_
+#define ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_INL_H_
+
+#include "rosalloc_space.h"
+#include "thread.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+inline mirror::Object* RosAllocSpace::AllocNonvirtual(Thread* self, size_t num_bytes,
+                                                      size_t* bytes_allocated) {
+  mirror::Object* obj;
+  obj = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated);
+  // RosAlloc zeroes memory internally.
+  return obj;
+}
+
+inline mirror::Object* RosAllocSpace::AllocWithoutGrowthLocked(Thread* self, size_t num_bytes,
+                                                               size_t* bytes_allocated) {
+  size_t rosalloc_size = 0;
+  mirror::Object* result = reinterpret_cast<mirror::Object*>(rosalloc_->Alloc(self, num_bytes,
+                                                                              &rosalloc_size));
+  if (LIKELY(result != NULL)) {
+    if (kDebugSpaces) {
+      CHECK(Contains(result)) << "Allocation (" << reinterpret_cast<void*>(result)
+            << ") not in bounds of allocation space " << *this;
+    }
+    DCHECK(bytes_allocated != NULL);
+    *bytes_allocated = rosalloc_size;
+  }
+  return result;
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_INL_H_
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
new file mode 100644
index 0000000..72692d6
--- /dev/null
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -0,0 +1,306 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rosalloc_space.h"
+
+#include "rosalloc_space-inl.h"
+#include "gc/accounting/card_table.h"
+#include "gc/heap.h"
+#include "mirror/class-inl.h"
+#include "mirror/object-inl.h"
+#include "runtime.h"
+#include "thread.h"
+#include "thread_list.h"
+#include "utils.h"
+
+#include <valgrind.h>
+#include <memcheck/memcheck.h>
+
+namespace art {
+namespace gc {
+namespace space {
+
+static const bool kPrefetchDuringRosAllocFreeList = true;
+
+RosAllocSpace::RosAllocSpace(const std::string& name, MemMap* mem_map,
+                             art::gc::allocator::RosAlloc* rosalloc, byte* begin, byte* end,
+                             byte* limit, size_t growth_limit)
+    : MallocSpace(name, mem_map, begin, end, limit, growth_limit), rosalloc_(rosalloc) {
+  CHECK(rosalloc != NULL);
+}
+
+RosAllocSpace* RosAllocSpace::Create(const std::string& name, size_t initial_size, size_t growth_limit,
+                                     size_t capacity, byte* requested_begin) {
+  uint64_t start_time = 0;
+  if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
+    start_time = NanoTime();
+    VLOG(startup) << "RosAllocSpace::Create entering " << name
+                  << " initial_size=" << PrettySize(initial_size)
+                  << " growth_limit=" << PrettySize(growth_limit)
+                  << " capacity=" << PrettySize(capacity)
+                  << " requested_begin=" << reinterpret_cast<void*>(requested_begin);
+  }
+
+  // Memory we promise to rosalloc before it asks for morecore.
+  // Note: making this value large means that large allocations are unlikely to succeed as rosalloc
+  // will ask for this memory from sys_alloc which will fail as the footprint (this value plus the
+  // size of the large allocation) will be greater than the footprint limit.
+  size_t starting_size = kPageSize;
+  MemMap* mem_map = CreateMemMap(name, starting_size, &initial_size, &growth_limit, &capacity,
+                                 requested_begin);
+  if (mem_map == NULL) {
+    LOG(ERROR) << "Failed to create mem map for alloc space (" << name << ") of size "
+               << PrettySize(capacity);
+    return NULL;
+  }
+  allocator::RosAlloc* rosalloc = CreateRosAlloc(mem_map->Begin(), starting_size, initial_size);
+  if (rosalloc == NULL) {
+    LOG(ERROR) << "Failed to initialize rosalloc for alloc space (" << name << ")";
+    return NULL;
+  }
+
+  // Protect memory beyond the initial size.
+  byte* end = mem_map->Begin() + starting_size;
+  if (capacity - initial_size > 0) {
+    CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size, PROT_NONE), name);
+  }
+
+  // Everything is set so record in immutable structure and leave
+  RosAllocSpace* space;
+  byte* begin = mem_map->Begin();
+  if (RUNNING_ON_VALGRIND > 0) {
+    // TODO: support valgrind.
+    LOG(FATAL) << "Unimplemented";
+    space = NULL;
+  } else {
+    space = new RosAllocSpace(name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit);
+  }
+  // We start out with only the initial size possibly containing objects.
+  if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
+    LOG(INFO) << "RosAllocSpace::Create exiting (" << PrettyDuration(NanoTime() - start_time)
+        << " ) " << *space;
+  }
+  return space;
+}
+
+allocator::RosAlloc* RosAllocSpace::CreateRosAlloc(void* begin, size_t morecore_start, size_t initial_size) {
+  // clear errno to allow PLOG on error
+  errno = 0;
+  // create rosalloc using our backing storage starting at begin and
+  // with a footprint of morecore_start. When morecore_start bytes of
+  // memory is exhaused morecore will be called.
+  allocator::RosAlloc* rosalloc = new art::gc::allocator::RosAlloc(begin, morecore_start);
+  if (rosalloc != NULL) {
+    rosalloc->SetFootprintLimit(initial_size);
+  } else {
+    PLOG(ERROR) << "RosAlloc::Create failed";
+    }
+  return rosalloc;
+}
+
+mirror::Object* RosAllocSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+  return AllocNonvirtual(self, num_bytes, bytes_allocated);
+}
+
+mirror::Object* RosAllocSpace::AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+  mirror::Object* result;
+  {
+    MutexLock mu(self, lock_);
+    // Grow as much as possible within the space.
+    size_t max_allowed = Capacity();
+    rosalloc_->SetFootprintLimit(max_allowed);
+    // Try the allocation.
+    result = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated);
+    // Shrink back down as small as possible.
+    size_t footprint = rosalloc_->Footprint();
+    rosalloc_->SetFootprintLimit(footprint);
+  }
+  // Note RosAlloc zeroes memory internally.
+  // Return the new allocation or NULL.
+  CHECK(!kDebugSpaces || result == NULL || Contains(result));
+  return result;
+}
+
+MallocSpace* RosAllocSpace::CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
+                                           byte* begin, byte* end, byte* limit, size_t growth_limit) {
+  return new RosAllocSpace(name, mem_map, reinterpret_cast<allocator::RosAlloc*>(allocator),
+                           begin, end, limit, growth_limit);
+}
+
+size_t RosAllocSpace::Free(Thread* self, mirror::Object* ptr) {
+  if (kDebugSpaces) {
+    CHECK(ptr != NULL);
+    CHECK(Contains(ptr)) << "Free (" << ptr << ") not in bounds of heap " << *this;
+  }
+  const size_t bytes_freed = InternalAllocationSize(ptr);
+  total_bytes_freed_atomic_.fetch_add(bytes_freed);
+  ++total_objects_freed_atomic_;
+  if (kRecentFreeCount > 0) {
+    MutexLock mu(self, lock_);
+    RegisterRecentFree(ptr);
+  }
+  rosalloc_->Free(self, ptr);
+  return bytes_freed;
+}
+
+size_t RosAllocSpace::FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) {
+  DCHECK(ptrs != NULL);
+
+  // Don't need the lock to calculate the size of the freed pointers.
+  size_t bytes_freed = 0;
+  for (size_t i = 0; i < num_ptrs; i++) {
+    mirror::Object* ptr = ptrs[i];
+    const size_t look_ahead = 8;
+    if (kPrefetchDuringRosAllocFreeList && i + look_ahead < num_ptrs) {
+      __builtin_prefetch(reinterpret_cast<char*>(ptrs[i + look_ahead]));
+    }
+    bytes_freed += InternalAllocationSize(ptr);
+  }
+
+  if (kRecentFreeCount > 0) {
+    MutexLock mu(self, lock_);
+    for (size_t i = 0; i < num_ptrs; i++) {
+      RegisterRecentFree(ptrs[i]);
+    }
+  }
+
+  if (kDebugSpaces) {
+    size_t num_broken_ptrs = 0;
+    for (size_t i = 0; i < num_ptrs; i++) {
+      if (!Contains(ptrs[i])) {
+        num_broken_ptrs++;
+        LOG(ERROR) << "FreeList[" << i << "] (" << ptrs[i] << ") not in bounds of heap " << *this;
+      } else {
+        size_t size = rosalloc_->UsableSize(ptrs[i]);
+        memset(ptrs[i], 0xEF, size);
+      }
+    }
+    CHECK_EQ(num_broken_ptrs, 0u);
+  }
+
+  rosalloc_->BulkFree(self, reinterpret_cast<void**>(ptrs), num_ptrs);
+  total_bytes_freed_atomic_.fetch_add(bytes_freed);
+  total_objects_freed_atomic_.fetch_add(num_ptrs);
+  return bytes_freed;
+}
+
+// Callback from rosalloc when it needs to increase the footprint
+extern "C" void* art_heap_rosalloc_morecore(allocator::RosAlloc* rosalloc, intptr_t increment) {
+  Heap* heap = Runtime::Current()->GetHeap();
+  DCHECK(heap->GetNonMovingSpace()->IsRosAllocSpace());
+  DCHECK_EQ(heap->GetNonMovingSpace()->AsRosAllocSpace()->GetRosAlloc(), rosalloc);
+  return heap->GetNonMovingSpace()->MoreCore(increment);
+}
+
+// Virtual functions can't get inlined.
+inline size_t RosAllocSpace::InternalAllocationSize(const mirror::Object* obj) {
+  return AllocationSizeNonvirtual(obj);
+}
+
+size_t RosAllocSpace::AllocationSize(const mirror::Object* obj) {
+  return InternalAllocationSize(obj);
+}
+
+size_t RosAllocSpace::Trim() {
+  MutexLock mu(Thread::Current(), lock_);
+  // Trim to release memory at the end of the space.
+  rosalloc_->Trim();
+  // No inspect_all necessary here as trimming of pages is built-in.
+  return 0;
+}
+
+void RosAllocSpace::Walk(void(*callback)(void *start, void *end, size_t num_bytes, void* callback_arg),
+                         void* arg) {
+  InspectAllRosAlloc(callback, arg);
+  callback(NULL, NULL, 0, arg);  // Indicate end of a space.
+}
+
+size_t RosAllocSpace::GetFootprint() {
+  MutexLock mu(Thread::Current(), lock_);
+  return rosalloc_->Footprint();
+}
+
+size_t RosAllocSpace::GetFootprintLimit() {
+  MutexLock mu(Thread::Current(), lock_);
+  return rosalloc_->FootprintLimit();
+}
+
+void RosAllocSpace::SetFootprintLimit(size_t new_size) {
+  MutexLock mu(Thread::Current(), lock_);
+  VLOG(heap) << "RosAllocSpace::SetFootprintLimit " << PrettySize(new_size);
+  // Compare against the actual footprint, rather than the Size(), because the heap may not have
+  // grown all the way to the allowed size yet.
+  size_t current_space_size = rosalloc_->Footprint();
+  if (new_size < current_space_size) {
+    // Don't let the space grow any more.
+    new_size = current_space_size;
+  }
+  rosalloc_->SetFootprintLimit(new_size);
+}
+
+uint64_t RosAllocSpace::GetBytesAllocated() {
+  if (rosalloc_ != NULL) {
+    size_t bytes_allocated = 0;
+    InspectAllRosAlloc(art::gc::allocator::RosAlloc::BytesAllocatedCallback, &bytes_allocated);
+    return bytes_allocated;
+  } else {
+    return Size();
+  }
+}
+
+uint64_t RosAllocSpace::GetObjectsAllocated() {
+  if (rosalloc_ != NULL) {
+    size_t objects_allocated = 0;
+    InspectAllRosAlloc(art::gc::allocator::RosAlloc::ObjectsAllocatedCallback, &objects_allocated);
+    return objects_allocated;
+  } else {
+    return 0;
+  }
+}
+
+void RosAllocSpace::InspectAllRosAlloc(void (*callback)(void *start, void *end, size_t num_bytes, void* callback_arg),
+                                       void* arg) NO_THREAD_SAFETY_ANALYSIS {
+  // TODO: NO_THREAD_SAFETY_ANALYSIS.
+  Thread* self = Thread::Current();
+  if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
+    // The mutators are already suspended. For example, a call path
+    // from SignalCatcher::HandleSigQuit().
+    rosalloc_->InspectAll(callback, arg);
+  } else {
+    // The mutators are not suspended yet.
+    DCHECK(!Locks::mutator_lock_->IsSharedHeld(self));
+    ThreadList* tl = Runtime::Current()->GetThreadList();
+    tl->SuspendAll();
+    {
+      MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+      MutexLock mu2(self, *Locks::thread_list_lock_);
+      rosalloc_->InspectAll(callback, arg);
+    }
+    tl->ResumeAll();
+  }
+}
+
+void RosAllocSpace::RevokeThreadLocalBuffers(Thread* thread) {
+  rosalloc_->RevokeThreadLocalRuns(thread);
+}
+
+void RosAllocSpace::RevokeAllThreadLocalBuffers() {
+  rosalloc_->RevokeAllThreadLocalRuns();
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h
new file mode 100644
index 0000000..8191ffb
--- /dev/null
+++ b/runtime/gc/space/rosalloc_space.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_H_
+#define ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_H_
+
+#include "gc/allocator/rosalloc.h"
+#include "malloc_space.h"
+#include "space.h"
+
+namespace art {
+namespace gc {
+
+namespace collector {
+  class MarkSweep;
+}  // namespace collector
+
+namespace space {
+
+// An alloc space is a space where objects may be allocated and garbage collected.
+class RosAllocSpace : public MallocSpace {
+ public:
+  // Create a RosAllocSpace with the requested sizes. The requested
+  // base address is not guaranteed to be granted, if it is required,
+  // the caller should call Begin on the returned space to confirm the
+  // request was granted.
+  static RosAllocSpace* Create(const std::string& name, size_t initial_size, size_t growth_limit,
+                               size_t capacity, byte* requested_begin);
+
+  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
+                                          size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
+  virtual size_t AllocationSize(const mirror::Object* obj);
+  virtual size_t Free(Thread* self, mirror::Object* ptr);
+  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs);
+
+  mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated);
+
+  size_t AllocationSizeNonvirtual(const mirror::Object* obj)
+      NO_THREAD_SAFETY_ANALYSIS {
+    // TODO: NO_THREAD_SAFETY_ANALYSIS because SizeOf() requires that mutator_lock is held.
+    void* obj_ptr = const_cast<void*>(reinterpret_cast<const void*>(obj));
+    // obj is a valid object. Use its class in the header to get the size.
+    size_t size = obj->SizeOf();
+    size_t size_by_size = rosalloc_->UsableSize(size);
+    if (kIsDebugBuild) {
+      size_t size_by_ptr = rosalloc_->UsableSize(obj_ptr);
+      if (size_by_size != size_by_ptr) {
+        LOG(INFO) << "Found a bad sized obj of size " << size
+                  << " at " << std::hex << reinterpret_cast<intptr_t>(obj_ptr) << std::dec
+                  << " size_by_size=" << size_by_size << " size_by_ptr=" << size_by_ptr;
+      }
+      DCHECK_EQ(size_by_size, size_by_ptr);
+    }
+    return size_by_size;
+  }
+
+  art::gc::allocator::RosAlloc* GetRosAlloc() {
+    return rosalloc_;
+  }
+
+  size_t Trim();
+  void Walk(WalkCallback callback, void* arg) LOCKS_EXCLUDED(lock_);
+  size_t GetFootprint();
+  size_t GetFootprintLimit();
+  void SetFootprintLimit(size_t limit);
+
+  MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
+                              byte* begin, byte* end, byte* limit, size_t growth_limit);
+
+  uint64_t GetBytesAllocated();
+  uint64_t GetObjectsAllocated();
+  uint64_t GetTotalBytesAllocated() {
+    return GetBytesAllocated() + total_bytes_freed_atomic_;
+  }
+  uint64_t GetTotalObjectsAllocated() {
+    return GetObjectsAllocated() + total_objects_freed_atomic_;
+  }
+
+  void RevokeThreadLocalBuffers(Thread* thread);
+  void RevokeAllThreadLocalBuffers();
+
+  // Returns the class of a recently freed object.
+  mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
+
+  virtual void InvalidateAllocator() {
+    rosalloc_ = NULL;
+  }
+
+  virtual bool IsRosAllocSpace() const {
+    return true;
+  }
+  virtual RosAllocSpace* AsRosAllocSpace() {
+    return this;
+  }
+
+ protected:
+  RosAllocSpace(const std::string& name, MemMap* mem_map, allocator::RosAlloc* rosalloc,
+                byte* begin, byte* end, byte* limit, size_t growth_limit);
+
+ private:
+  size_t InternalAllocationSize(const mirror::Object* obj);
+  mirror::Object* AllocWithoutGrowthLocked(Thread* self, size_t num_bytes, size_t* bytes_allocated);
+
+  void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size) {
+    return CreateRosAlloc(base, morecore_start, initial_size);
+  }
+  static allocator::RosAlloc* CreateRosAlloc(void* base, size_t morecore_start, size_t initial_size);
+
+
+  void InspectAllRosAlloc(void (*callback)(void *start, void *end, size_t num_bytes, void* callback_arg),
+                          void* arg)
+      LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, Locks::thread_list_lock_);
+
+  // Approximate number of bytes and objects which have been deallocated in the space.
+  AtomicInteger total_bytes_freed_atomic_;
+  AtomicInteger total_objects_freed_atomic_;
+
+  // Underlying rosalloc.
+  art::gc::allocator::RosAlloc* rosalloc_;
+
+  friend class collector::MarkSweep;
+
+  DISALLOW_COPY_AND_ASSIGN(RosAllocSpace);
+};
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_H_
diff --git a/runtime/gc/space/space-inl.h b/runtime/gc/space/space-inl.h
index f1031ff..0c1d7a2 100644
--- a/runtime/gc/space/space-inl.h
+++ b/runtime/gc/space/space-inl.h
@@ -31,9 +31,10 @@
   return down_cast<ImageSpace*>(down_cast<MemMapSpace*>(this));
 }
 
-inline DlMallocSpace* Space::AsDlMallocSpace() {
-  DCHECK(IsDlMallocSpace());
-  return down_cast<DlMallocSpace*>(down_cast<MemMapSpace*>(this));
+inline MallocSpace* Space::AsMallocSpace() {
+  DCHECK(GetType() == kSpaceTypeAllocSpace || GetType() == kSpaceTypeZygoteSpace);
+  DCHECK(IsDlMallocSpace() || IsRosAllocSpace());
+  return down_cast<MallocSpace*>(down_cast<MemMapSpace*>(this));
 }
 
 inline LargeObjectSpace* Space::AsLargeObjectSpace() {
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index 4c05dde..38b602e 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -44,8 +44,10 @@
 
 class AllocSpace;
 class ContinuousSpace;
-class DlMallocSpace;
 class DiscontinuousSpace;
+class MallocSpace;
+class DlMallocSpace;
+class RosAllocSpace;
 class ImageSpace;
 class LargeObjectSpace;
 
@@ -106,11 +108,26 @@
   ImageSpace* AsImageSpace();
 
   // Is this a dlmalloc backed allocation space?
-  bool IsDlMallocSpace() const {
+  bool IsMallocSpace() const {
     SpaceType type = GetType();
     return type == kSpaceTypeAllocSpace || type == kSpaceTypeZygoteSpace;
   }
-  DlMallocSpace* AsDlMallocSpace();
+  MallocSpace* AsMallocSpace();
+
+  virtual bool IsDlMallocSpace() const {
+    return false;
+  }
+  virtual DlMallocSpace* AsDlMallocSpace() {
+    LOG(FATAL) << "Unreachable";
+    return NULL;
+  }
+  virtual bool IsRosAllocSpace() const {
+    return false;
+  }
+  virtual RosAllocSpace* AsRosAllocSpace() {
+    LOG(FATAL) << "Unreachable";
+    return NULL;
+  }
 
   // Is this the space allocated into by the Zygote and no-longer in use?
   bool IsZygoteSpace() const {
@@ -195,6 +212,16 @@
   // Returns how many bytes were freed.
   virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) = 0;
 
+  // Revoke any sort of thread-local buffers that are used to speed up
+  // allocations for the given thread, if the alloc space
+  // implementation uses any. No-op by default.
+  virtual void RevokeThreadLocalBuffers(Thread* /*thread*/) {}
+
+  // Revoke any sort of thread-local buffers that are used to speed up
+  // allocations for all the threads, if the alloc space
+  // implementation uses any. No-op by default.
+  virtual void RevokeAllThreadLocalBuffers() {}
+
  protected:
   AllocSpace() {}
   virtual ~AllocSpace() {}
diff --git a/runtime/gc/space/space_test.cc b/runtime/gc/space/space_test.cc
index 383714b..6b597ae 100644
--- a/runtime/gc/space/space_test.cc
+++ b/runtime/gc/space/space_test.cc
@@ -20,6 +20,8 @@
 #include "common_test.h"
 #include "globals.h"
 #include "UniquePtr.h"
+#include "mirror/array-inl.h"
+#include "mirror/object-inl.h"
 
 #include <stdint.h>
 
@@ -34,8 +36,25 @@
   void SizeFootPrintGrowthLimitAndTrimDriver(size_t object_size);
 
   void AddSpace(ContinuousSpace* space) {
+    // For RosAlloc, revoke the thread local runs before moving onto a
+    // new alloc space.
+    Runtime::Current()->GetHeap()->RevokeAllThreadLocalBuffers();
     Runtime::Current()->GetHeap()->AddSpace(space);
   }
+  void InstallClass(mirror::Object* o, size_t size) NO_THREAD_SAFETY_ANALYSIS {
+    // Note the minimum size, which is the size of a zero-length byte array, is 12.
+    EXPECT_GE(size, static_cast<size_t>(12));
+    SirtRef<mirror::ClassLoader> null_loader(Thread::Current(), NULL);
+    mirror::Class* byte_array_class = Runtime::Current()->GetClassLinker()->FindClass("[B", null_loader);
+    EXPECT_TRUE(byte_array_class != NULL);
+    o->SetClass(byte_array_class);
+    mirror::Array* arr = o->AsArray();
+    // size_t header_size = sizeof(mirror::Object) + 4;
+    size_t header_size = arr->DataOffset(1).Uint32Value();
+    int32_t length = size - header_size;
+    arr->SetLength(length);
+    EXPECT_EQ(arr->SizeOf(), size);
+  }
 };
 
 static size_t test_rand(size_t* seed) {
@@ -87,7 +106,7 @@
 // the GC works with the ZygoteSpace.
 TEST_F(SpaceTest, ZygoteSpace) {
     size_t dummy = 0;
-    DlMallocSpace* space(DlMallocSpace::Create("test", 4 * MB, 16 * MB, 16 * MB, NULL));
+    MallocSpace* space(DlMallocSpace::Create("test", 4 * MB, 16 * MB, 16 * MB, NULL));
     ASSERT_TRUE(space != NULL);
 
     // Make space findable to the heap, will also delete space when runtime is cleaned up
@@ -97,6 +116,7 @@
     // Succeeds, fits without adjusting the footprint limit.
     mirror::Object* ptr1 = space->Alloc(self, 1 * MB, &dummy);
     EXPECT_TRUE(ptr1 != NULL);
+    InstallClass(ptr1, 1 * MB);
 
     // Fails, requires a higher footprint limit.
     mirror::Object* ptr2 = space->Alloc(self, 8 * MB, &dummy);
@@ -107,6 +127,7 @@
     mirror::Object* ptr3 = space->AllocWithGrowth(self, 8 * MB, &ptr3_bytes_allocated);
     EXPECT_TRUE(ptr3 != NULL);
     EXPECT_LE(8U * MB, ptr3_bytes_allocated);
+    InstallClass(ptr3, 8 * MB);
 
     // Fails, requires a higher footprint limit.
     mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy);
@@ -123,8 +144,9 @@
     EXPECT_LE(8U * MB, free3);
 
     // Succeeds, now that memory has been freed.
-    void* ptr6 = space->AllocWithGrowth(self, 9 * MB, &dummy);
+    mirror::Object* ptr6 = space->AllocWithGrowth(self, 9 * MB, &dummy);
     EXPECT_TRUE(ptr6 != NULL);
+    InstallClass(ptr6, 9 * MB);
 
     // Final clean up.
     size_t free1 = space->AllocationSize(ptr1);
@@ -141,6 +163,7 @@
     // Succeeds, fits without adjusting the footprint limit.
     ptr1 = space->Alloc(self, 1 * MB, &dummy);
     EXPECT_TRUE(ptr1 != NULL);
+    InstallClass(ptr1, 1 * MB);
 
     // Fails, requires a higher footprint limit.
     ptr2 = space->Alloc(self, 8 * MB, &dummy);
@@ -149,6 +172,7 @@
     // Succeeds, adjusts the footprint.
     ptr3 = space->AllocWithGrowth(self, 2 * MB, &dummy);
     EXPECT_TRUE(ptr3 != NULL);
+    InstallClass(ptr3, 2 * MB);
     space->Free(self, ptr3);
 
     // Final clean up.
@@ -169,6 +193,7 @@
   // Succeeds, fits without adjusting the footprint limit.
   mirror::Object* ptr1 = space->Alloc(self, 1 * MB, &dummy);
   EXPECT_TRUE(ptr1 != NULL);
+  InstallClass(ptr1, 1 * MB);
 
   // Fails, requires a higher footprint limit.
   mirror::Object* ptr2 = space->Alloc(self, 8 * MB, &dummy);
@@ -179,6 +204,7 @@
   mirror::Object* ptr3 = space->AllocWithGrowth(self, 8 * MB, &ptr3_bytes_allocated);
   EXPECT_TRUE(ptr3 != NULL);
   EXPECT_LE(8U * MB, ptr3_bytes_allocated);
+  InstallClass(ptr3, 8 * MB);
 
   // Fails, requires a higher footprint limit.
   mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy);
@@ -195,8 +221,9 @@
   EXPECT_LE(8U * MB, free3);
 
   // Succeeds, now that memory has been freed.
-  void* ptr6 = space->AllocWithGrowth(self, 9 * MB, &dummy);
+  mirror::Object* ptr6 = space->AllocWithGrowth(self, 9 * MB, &dummy);
   EXPECT_TRUE(ptr6 != NULL);
+  InstallClass(ptr6, 9 * MB);
 
   // Final clean up.
   size_t free1 = space->AllocationSize(ptr1);
@@ -278,8 +305,9 @@
   for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
     size_t allocation_size = 0;
     lots_of_objects[i] = space->Alloc(self, 16, &allocation_size);
-    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
     EXPECT_TRUE(lots_of_objects[i] != NULL);
+    InstallClass(lots_of_objects[i], 16);
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
   }
 
   // Release memory and check pointers are NULL
@@ -292,8 +320,9 @@
   for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
     size_t allocation_size = 0;
     lots_of_objects[i] = space->AllocWithGrowth(self, 1024, &allocation_size);
-    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
     EXPECT_TRUE(lots_of_objects[i] != NULL);
+    InstallClass(lots_of_objects[i], 1024);
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
   }
 
   // Release memory and check pointers are NULL
@@ -310,22 +339,20 @@
     // No allocation can succeed
     return;
   }
-  // Mspace for raw dlmalloc operations
-  void* mspace = space->GetMspace();
 
-  // mspace's footprint equals amount of resources requested from system
-  size_t footprint = mspace_footprint(mspace);
+  // The space's footprint equals amount of resources requested from system
+  size_t footprint = space->GetFootprint();
 
-  // mspace must at least have its book keeping allocated
+  // The space must at least have its book keeping allocated
   EXPECT_GT(footprint, 0u);
 
-  // mspace but it shouldn't exceed the initial size
+  // But it shouldn't exceed the initial size
   EXPECT_LE(footprint, growth_limit);
 
   // space's size shouldn't exceed the initial size
   EXPECT_LE(space->Size(), growth_limit);
 
-  // this invariant should always hold or else the mspace has grown to be larger than what the
+  // this invariant should always hold or else the space has grown to be larger than what the
   // space believes its size is (which will break invariants)
   EXPECT_GE(space->Size(), footprint);
 
@@ -345,8 +372,9 @@
         alloc_size = object_size;
       } else {
         alloc_size = test_rand(&rand_seed) % static_cast<size_t>(-object_size);
-        if (alloc_size < 8) {
-          alloc_size = 8;
+        // Note the minimum size, which is the size of a zero-length byte array, is 12.
+        if (alloc_size < 12) {
+          alloc_size = 12;
         }
       }
       mirror::Object* object;
@@ -356,9 +384,10 @@
       } else {
         object = space->AllocWithGrowth(self, alloc_size, &bytes_allocated);
       }
-      footprint = mspace_footprint(mspace);
+      footprint = space->GetFootprint();
       EXPECT_GE(space->Size(), footprint);  // invariant
       if (object != NULL) {  // allocation succeeded
+        InstallClass(object, alloc_size);
         lots_of_objects.get()[i] = object;
         size_t allocation_size = space->AllocationSize(object);
         EXPECT_EQ(bytes_allocated, allocation_size);
@@ -395,7 +424,7 @@
     space->Trim();
 
     // Bounds sanity
-    footprint = mspace_footprint(mspace);
+    footprint = space->GetFootprint();
     EXPECT_LE(amount_allocated, growth_limit);
     EXPECT_GE(footprint, amount_allocated);
     EXPECT_LE(footprint, growth_limit);
@@ -421,13 +450,21 @@
       space->Free(self, object);
       lots_of_objects.get()[i] = NULL;
       amount_allocated -= allocation_size;
-      footprint = mspace_footprint(mspace);
+      footprint = space->GetFootprint();
       EXPECT_GE(space->Size(), footprint);  // invariant
     }
 
     free_increment >>= 1;
   }
 
+  // The space has become empty here before allocating a large object
+  // below. For RosAlloc, revoke thread-local runs, which are kept
+  // even when empty for a performance reason, so that they won't
+  // cause the following large object allocation to fail due to
+  // potential fragmentation. Note they are normally revoked at each
+  // GC (but no GC here.)
+  space->RevokeAllThreadLocalBuffers();
+
   // All memory was released, try a large allocation to check freed memory is being coalesced
   mirror::Object* large_object;
   size_t three_quarters_space = (growth_limit / 2) + (growth_limit / 4);
@@ -438,9 +475,10 @@
     large_object = space->AllocWithGrowth(self, three_quarters_space, &bytes_allocated);
   }
   EXPECT_TRUE(large_object != NULL);
+  InstallClass(large_object, three_quarters_space);
 
   // Sanity check footprint
-  footprint = mspace_footprint(mspace);
+  footprint = space->GetFootprint();
   EXPECT_LE(footprint, growth_limit);
   EXPECT_GE(space->Size(), footprint);
   EXPECT_LE(space->Size(), growth_limit);
@@ -449,7 +487,7 @@
   space->Free(self, large_object);
 
   // Sanity check footprint
-  footprint = mspace_footprint(mspace);
+  footprint = space->GetFootprint();
   EXPECT_LE(footprint, growth_limit);
   EXPECT_GE(space->Size(), footprint);
   EXPECT_LE(space->Size(), growth_limit);
@@ -488,8 +526,8 @@
   }
 
 // Each size test is its own test so that we get a fresh heap each time
-TEST_F(SpaceTest, SizeFootPrintGrowthLimitAndTrim_AllocationsOf_8B) {
-  SizeFootPrintGrowthLimitAndTrimDriver(8);
+TEST_F(SpaceTest, SizeFootPrintGrowthLimitAndTrim_AllocationsOf_12B) {
+  SizeFootPrintGrowthLimitAndTrimDriver(12);
 }
 TEST_SizeFootPrintGrowthLimitAndTrim(16B, 16)
 TEST_SizeFootPrintGrowthLimitAndTrim(24B, 24)
diff --git a/runtime/locks.h b/runtime/locks.h
index 2262218..2308e95 100644
--- a/runtime/locks.h
+++ b/runtime/locks.h
@@ -37,6 +37,9 @@
   kThreadSuspendCountLock,
   kAbortLock,
   kJdwpSocketLock,
+  kRosAllocGlobalLock,
+  kRosAllocBracketLock,
+  kRosAllocBulkFreeLock,
   kAllocSpaceLock,
   kMarkSweepMarkStackLock,
   kDefaultMutexLevel,
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index 96c3e78..66fa100 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -267,14 +267,14 @@
     if (space->IsImageSpace()) {
       // Currently don't include the image space.
     } else if (space->IsZygoteSpace()) {
-      gc::space::DlMallocSpace* dlmalloc_space = space->AsDlMallocSpace();
-      zygoteSize += dlmalloc_space->GetFootprint();
-      zygoteUsed += dlmalloc_space->GetBytesAllocated();
+      gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
+      zygoteSize += malloc_space->GetFootprint();
+      zygoteUsed += malloc_space->GetBytesAllocated();
     } else {
       // This is the alloc space.
-      gc::space::DlMallocSpace* dlmalloc_space = space->AsDlMallocSpace();
-      allocSize += dlmalloc_space->GetFootprint();
-      allocUsed += dlmalloc_space->GetBytesAllocated();
+      gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
+      allocSize += malloc_space->GetFootprint();
+      allocUsed += malloc_space->GetBytesAllocated();
     }
   }
   typedef std::vector<gc::space::DiscontinuousSpace*>::const_iterator It2;
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 8e39023..08c0ba0 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -124,6 +124,11 @@
   heap_->WaitForGcToComplete(self);
   heap_->DeleteThreadPool();
 
+  // For RosAlloc, revoke thread local runs. Note that in tests
+  // (common_test.h) we repeat allocating and deleting Runtime
+  // objects.
+  heap_->RevokeAllThreadLocalBuffers();
+
   // Make sure our internal threads are dead before we start tearing down things they're using.
   Dbg::StopJdwp();
   delete signal_catcher_;
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 1f6dd69..e55c35f 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -930,6 +930,7 @@
   state_and_flags_.as_struct.flags = 0;
   state_and_flags_.as_struct.state = kNative;
   memset(&held_mutexes_[0], 0, sizeof(held_mutexes_));
+  memset(rosalloc_runs_, 0, sizeof(rosalloc_runs_));
 }
 
 bool Thread::IsStillStarting() const {
@@ -1022,6 +1023,8 @@
   delete name_;
   delete stack_trace_sample_;
 
+  Runtime::Current()->GetHeap()->RevokeThreadLocalBuffers(this);
+
   TearDownAlternateSignalStack();
 }
 
diff --git a/runtime/thread.h b/runtime/thread.h
index 6bd3607..f4b2ae5 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -790,6 +790,15 @@
 
   friend class ScopedThreadStateChange;
 
+ public:
+  // Thread-local rosalloc runs. There are 34 size brackets in rosalloc
+  // runs (RosAlloc::kNumOfSizeBrackets). We can't refer to the
+  // RosAlloc class due to a header file circular dependency issue.
+  // To compensate, we check that the two values match at RosAlloc
+  // initialization time.
+  static const size_t kRosAllocNumOfSizeBrackets = 34;
+  void* rosalloc_runs_[kRosAllocNumOfSizeBrackets];
+
   DISALLOW_COPY_AND_ASSIGN(Thread);
 };