Merge "Add native memory accounting through custom allocator." into lmp-dev
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 0876997..0ab6626 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -47,7 +47,6 @@
   gc/allocator/dlmalloc.cc \
   gc/allocator/rosalloc.cc \
   gc/accounting/card_table.cc \
-  gc/accounting/gc_allocator.cc \
   gc/accounting/heap_bitmap.cc \
   gc/accounting/mod_union_table.cc \
   gc/accounting/remembered_set.cc \
@@ -293,6 +292,7 @@
 
 LIBART_ENUM_OPERATOR_OUT_HEADER_FILES := \
   arch/x86_64/registers_x86_64.h \
+  base/allocator.h \
   base/mutex.h \
   dex_file.h \
   dex_instruction.h \
diff --git a/runtime/base/allocator.cc b/runtime/base/allocator.cc
index 4f7753d..39d51a5 100644
--- a/runtime/base/allocator.cc
+++ b/runtime/base/allocator.cc
@@ -19,10 +19,16 @@
 #include <inttypes.h>
 #include <stdlib.h>
 
+#include "atomic.h"
 #include "base/logging.h"
+#include "thread-inl.h"
 
 namespace art {
 
+Atomic<uint64_t> TrackedAllocators::bytes_used_[kAllocatorTagCount];
+Atomic<uint64_t> TrackedAllocators::max_bytes_used_[kAllocatorTagCount];
+Atomic<uint64_t> TrackedAllocators::total_bytes_used_[kAllocatorTagCount];
+
 class MallocAllocator : public Allocator {
  public:
   explicit MallocAllocator() {}
@@ -70,5 +76,19 @@
   return &g_noop_allocator;
 }
 
+void TrackedAllocators::Dump(std::ostream& os) {
+  if (kEnableTrackingAllocator) {
+    os << "Dumping native memory usage\n";
+    for (size_t i = 0; i < kAllocatorTagCount; ++i) {
+      uint64_t bytes_used = bytes_used_[i].LoadRelaxed();
+      uint64_t max_bytes_used = max_bytes_used_[i].LoadRelaxed();
+      uint64_t total_bytes_used = total_bytes_used_[i].LoadRelaxed();
+      if (total_bytes_used != 0) {
+        os << static_cast<AllocatorTag>(i) << " active=" << bytes_used << " max="
+           << max_bytes_used << " total=" << total_bytes_used << "\n";
+      }
+    }
+  }
+}
 
 }  // namespace art
diff --git a/runtime/base/allocator.h b/runtime/base/allocator.h
index 917bf0b..a7adb02 100644
--- a/runtime/base/allocator.h
+++ b/runtime/base/allocator.h
@@ -17,10 +17,17 @@
 #ifndef ART_RUNTIME_BASE_ALLOCATOR_H_
 #define ART_RUNTIME_BASE_ALLOCATOR_H_
 
+#include <map>
+
+#include "atomic.h"
 #include "base/macros.h"
+#include "base/mutex.h"
+#include "utils.h"
 
 namespace art {
 
+static constexpr bool kEnableTrackingAllocator = false;
+
 class Allocator {
  public:
   static Allocator* GetMallocAllocator();
@@ -36,6 +43,112 @@
   DISALLOW_COPY_AND_ASSIGN(Allocator);
 };
 
+// Used by TrackedAllocators.
+enum AllocatorTag {
+  kAllocatorTagHeap,
+  kAllocatorTagMonitorList,
+  kAllocatorTagClassTable,
+  kAllocatorTagInternTable,
+  kAllocatorTagMaps,
+  kAllocatorTagLOS,
+  kAllocatorTagSafeMap,
+  kAllocatorTagLOSMaps,
+  kAllocatorTagReferenceTable,
+  kAllocatorTagHeapBitmap,
+  kAllocatorTagHeapBitmapLOS,
+  kAllocatorTagMonitorPool,
+  kAllocatorTagLOSFreeList,
+  kAllocatorTagVerifier,
+  kAllocatorTagRememberedSet,
+  kAllocatorTagModUnionCardSet,
+  kAllocatorTagModUnionReferenceArray,
+  kAllocatorTagJNILibrarires,
+  kAllocatorTagCompileTimeClassPath,
+  kAllocatorTagOatFile,
+  kAllocatorTagDexFileVerifier,
+  kAllocatorTagCount,  // Must always be last element.
+};
+std::ostream& operator<<(std::ostream& os, const AllocatorTag& tag);
+
+class TrackedAllocators {
+ public:
+  static bool Add(uint32_t tag, AtomicInteger* bytes_used);
+  static void Dump(std::ostream& os);
+  static void RegisterAllocation(AllocatorTag tag, uint64_t bytes) {
+    total_bytes_used_[tag].FetchAndAddSequentiallyConsistent(bytes);
+    uint64_t new_bytes = bytes_used_[tag].FetchAndAddSequentiallyConsistent(bytes) + bytes;
+    max_bytes_used_[tag].StoreRelaxed(std::max(max_bytes_used_[tag].LoadRelaxed(), new_bytes));
+  }
+  static void RegisterFree(AllocatorTag tag, uint64_t bytes) {
+    bytes_used_[tag].FetchAndSubSequentiallyConsistent(bytes);
+  }
+
+ private:
+  static Atomic<uint64_t> bytes_used_[kAllocatorTagCount];
+  static Atomic<uint64_t> max_bytes_used_[kAllocatorTagCount];
+  static Atomic<uint64_t> total_bytes_used_[kAllocatorTagCount];
+};
+
+// Tracking allocator, tracks how much memory is used.
+template<class T, AllocatorTag kTag>
+class TrackingAllocatorImpl {
+ public:
+  typedef typename std::allocator<T>::value_type value_type;
+  typedef typename std::allocator<T>::size_type size_type;
+  typedef typename std::allocator<T>::difference_type difference_type;
+  typedef typename std::allocator<T>::pointer pointer;
+  typedef typename std::allocator<T>::const_pointer const_pointer;
+  typedef typename std::allocator<T>::reference reference;
+  typedef typename std::allocator<T>::const_reference const_reference;
+
+  // Used internally by STL data structures.
+  template <class U>
+  TrackingAllocatorImpl(const TrackingAllocatorImpl<U, kTag>& alloc) throw() {
+  }
+
+  // Used internally by STL data structures.
+  TrackingAllocatorImpl() throw() {
+    COMPILE_ASSERT(kTag < kAllocatorTagCount, must_be_less_than_count);
+  }
+
+  // Enables an allocator for objects of one type to allocate storage for objects of another type.
+  // Used internally by STL data structures.
+  template <class U>
+  struct rebind {
+    typedef TrackingAllocatorImpl<U, kTag> other;
+  };
+
+  pointer allocate(size_type n, const_pointer hint = 0) {
+    const size_t size = n * sizeof(T);
+    TrackedAllocators::RegisterAllocation(GetTag(), size);
+    return reinterpret_cast<pointer>(malloc(size));
+  }
+
+  template <typename PT>
+  void deallocate(PT p, size_type n) {
+    const size_t size = n * sizeof(T);
+    TrackedAllocators::RegisterFree(GetTag(), size);
+    free(p);
+  }
+
+  static AllocatorTag GetTag() {
+    return kTag;
+  }
+};
+
+template<class T, AllocatorTag kTag>
+// C++ doesn't allow template typedefs. This is a workaround template typedef which is
+// TrackingAllocatorImpl<T> if kEnableTrackingAllocator is true, std::allocator<T> otherwise.
+class TrackingAllocator : public TypeStaticIf<kEnableTrackingAllocator,
+                                              TrackingAllocatorImpl<T, kTag>,
+                                              std::allocator<T>>::type {
+};
+
+template<class Key, class T, AllocatorTag kTag, class Compare = std::less<Key>>
+class AllocationTrackingMultiMap : public std::multimap<
+    Key, T, Compare, TrackingAllocator<std::pair<Key, T>, kTag>> {
+};
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_ALLOCATOR_H_
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 3adb9d4..bf2e781 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -21,6 +21,7 @@
 #include <utility>
 #include <vector>
 
+#include "base/allocator.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "dex_file.h"
@@ -627,7 +628,7 @@
   // multimap from a string hash code of a class descriptor to
   // mirror::Class* instances. Results should be compared for a matching
   // Class::descriptor_ and Class::class_loader_.
-  typedef std::multimap<size_t, GcRoot<mirror::Class>> Table;
+  typedef AllocationTrackingMultiMap<size_t, GcRoot<mirror::Class>, kAllocatorTagClassTable> Table;
   // This contains strong roots. To enable concurrent root scanning of
   // the class table, be careful to use a read barrier when accessing this.
   Table class_table_ GUARDED_BY(Locks::classlinker_classes_lock_);
diff --git a/runtime/dex_file_verifier.h b/runtime/dex_file_verifier.h
index 0af3549..1d915b9 100644
--- a/runtime/dex_file_verifier.h
+++ b/runtime/dex_file_verifier.h
@@ -111,7 +111,7 @@
   const char* const location_;
   const DexFile::Header* const header_;
 
-  SafeMap<uint32_t, uint16_t> offset_to_type_map_;
+  AllocationTrackingSafeMap<uint32_t, uint16_t, kAllocatorTagDexFileVerifier> offset_to_type_map_;
   const byte* ptr_;
   const void* previous_item_;
 
diff --git a/runtime/gc/accounting/gc_allocator.cc b/runtime/gc/accounting/gc_allocator.cc
deleted file mode 100644
index ff6a135..0000000
--- a/runtime/gc/accounting/gc_allocator.cc
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gc_allocator.h"
-#include "gc/allocator/dlmalloc.h"
-#include "gc/heap.h"
-#include "runtime.h"
-
-namespace art {
-namespace gc {
-namespace accounting {
-
-void* RegisterGcAllocation(size_t bytes) {
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  if (heap != nullptr) {
-    heap->RegisterGCAllocation(bytes);
-  }
-  return malloc(bytes);
-}
-
-void RegisterGcDeallocation(void* p, size_t bytes) {
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  if (heap != nullptr) {
-    heap->RegisterGCDeAllocation(bytes);
-  }
-  free(p);
-}
-
-}  // namespace accounting
-}  // namespace gc
-}  // namespace art
diff --git a/runtime/gc/accounting/gc_allocator.h b/runtime/gc/accounting/gc_allocator.h
deleted file mode 100644
index d4142f8..0000000
--- a/runtime/gc/accounting/gc_allocator.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_GC_ACCOUNTING_GC_ALLOCATOR_H_
-#define ART_RUNTIME_GC_ACCOUNTING_GC_ALLOCATOR_H_
-
-#include "utils.h"
-
-#include <cstdlib>
-#include <limits>
-#include <memory>
-
-namespace art {
-namespace gc {
-namespace accounting {
-
-void* RegisterGcAllocation(size_t bytes);
-void RegisterGcDeallocation(void* p, size_t bytes);
-
-static constexpr bool kMeasureGcMemoryOverhead = false;
-
-template <typename T>
-class GcAllocatorImpl : public std::allocator<T> {
- public:
-  typedef typename std::allocator<T>::value_type value_type;
-  typedef typename std::allocator<T>::size_type size_type;
-  typedef typename std::allocator<T>::difference_type difference_type;
-  typedef typename std::allocator<T>::pointer pointer;
-  typedef typename std::allocator<T>::const_pointer const_pointer;
-  typedef typename std::allocator<T>::reference reference;
-  typedef typename std::allocator<T>::const_reference const_reference;
-
-  // Used internally by STL data structures.
-  template <class U>
-  GcAllocatorImpl(const GcAllocatorImpl<U>& alloc) throw() {
-  }
-
-  // Used internally by STL data structures.
-  GcAllocatorImpl() throw() {
-  }
-
-  // Enables an allocator for objects of one type to allocate storage for objects of another type.
-  // Used internally by STL data structures.
-  template <class U>
-  struct rebind {
-    typedef GcAllocatorImpl<U> other;
-  };
-
-  pointer allocate(size_type n, const_pointer hint = 0) {
-    return reinterpret_cast<pointer>(RegisterGcAllocation(n * sizeof(T)));
-  }
-
-  template <typename PT>
-  void deallocate(PT p, size_type n) {
-    RegisterGcDeallocation(p, n * sizeof(T));
-  }
-};
-
-// C++ doesn't allow template typedefs. This is a workaround template typedef which is
-// GCAllocatorImpl<T> if kMeasureGCMemoryOverhead is true, std::allocator<T> otherwise.
-template <typename T>
-class GcAllocator : public TypeStaticIf<kMeasureGcMemoryOverhead, GcAllocatorImpl<T>,
-                                        std::allocator<T>>::type {
-};
-
-}  // namespace accounting
-}  // namespace gc
-}  // namespace art
-
-#endif  // ART_RUNTIME_GC_ACCOUNTING_GC_ALLOCATOR_H_
diff --git a/runtime/gc/accounting/heap_bitmap.h b/runtime/gc/accounting/heap_bitmap.h
index 814dc06..ca6dc46 100644
--- a/runtime/gc/accounting/heap_bitmap.h
+++ b/runtime/gc/accounting/heap_bitmap.h
@@ -17,8 +17,8 @@
 #ifndef ART_RUNTIME_GC_ACCOUNTING_HEAP_BITMAP_H_
 #define ART_RUNTIME_GC_ACCOUNTING_HEAP_BITMAP_H_
 
+#include "base/allocator.h"
 #include "base/logging.h"
-#include "gc_allocator.h"
 #include "object_callbacks.h"
 #include "space_bitmap.h"
 
@@ -68,11 +68,14 @@
   void RemoveLargeObjectBitmap(LargeObjectBitmap* bitmap);
 
   // Bitmaps covering continuous spaces.
-  std::vector<ContinuousSpaceBitmap*, GcAllocator<ContinuousSpaceBitmap*>>
+  std::vector<ContinuousSpaceBitmap*,
+              TrackingAllocator<ContinuousSpaceBitmap*, kAllocatorTagHeapBitmap>>
       continuous_space_bitmaps_;
 
   // Sets covering discontinuous spaces.
-  std::vector<LargeObjectBitmap*, GcAllocator<LargeObjectBitmap*>> large_object_bitmaps_;
+  std::vector<LargeObjectBitmap*,
+              TrackingAllocator<LargeObjectBitmap*, kAllocatorTagHeapBitmapLOS>>
+      large_object_bitmaps_;
 
   friend class art::gc::Heap;
 };
diff --git a/runtime/gc/accounting/mod_union_table.h b/runtime/gc/accounting/mod_union_table.h
index 449e171..f9e8261 100644
--- a/runtime/gc/accounting/mod_union_table.h
+++ b/runtime/gc/accounting/mod_union_table.h
@@ -17,7 +17,7 @@
 #ifndef ART_RUNTIME_GC_ACCOUNTING_MOD_UNION_TABLE_H_
 #define ART_RUNTIME_GC_ACCOUNTING_MOD_UNION_TABLE_H_
 
-#include "gc_allocator.h"
+#include "base/allocator.h"
 #include "globals.h"
 #include "object_callbacks.h"
 #include "safe_map.h"
@@ -50,7 +50,8 @@
 // cleared between GC phases, reducing the number of dirty cards that need to be scanned.
 class ModUnionTable {
  public:
-  typedef std::set<byte*, std::less<byte*>, GcAllocator<byte*>> CardSet;
+  typedef std::set<byte*, std::less<byte*>,
+                   TrackingAllocator<byte*, kAllocatorTagModUnionCardSet>> CardSet;
 
   explicit ModUnionTable(const std::string& name, Heap* heap, space::ContinuousSpace* space)
       : name_(name),
@@ -125,9 +126,8 @@
   ModUnionTable::CardSet cleared_cards_;
 
   // Maps from dirty cards to their corresponding alloc space references.
-  SafeMap<const byte*, std::vector<mirror::HeapReference<mirror::Object>*>, std::less<const byte*>,
-      GcAllocator<std::pair<const byte*, std::vector<mirror::HeapReference<mirror::Object>*>>> >
-      references_;
+  AllocationTrackingSafeMap<const byte*, std::vector<mirror::HeapReference<mirror::Object>*>,
+                            kAllocatorTagModUnionReferenceArray> references_;
 };
 
 // Card caching implementation. Keeps track of which cards we cleared and only this information.
diff --git a/runtime/gc/accounting/remembered_set.h b/runtime/gc/accounting/remembered_set.h
index 706cf35..8d66e0e 100644
--- a/runtime/gc/accounting/remembered_set.h
+++ b/runtime/gc/accounting/remembered_set.h
@@ -17,7 +17,7 @@
 #ifndef ART_RUNTIME_GC_ACCOUNTING_REMEMBERED_SET_H_
 #define ART_RUNTIME_GC_ACCOUNTING_REMEMBERED_SET_H_
 
-#include "gc_allocator.h"
+#include "base/allocator.h"
 #include "globals.h"
 #include "object_callbacks.h"
 #include "safe_map.h"
@@ -43,7 +43,8 @@
 // from the free list spaces to the bump pointer spaces.
 class RememberedSet {
  public:
-  typedef std::set<byte*, std::less<byte*>, GcAllocator<byte*>> CardSet;
+  typedef std::set<byte*, std::less<byte*>,
+                   TrackingAllocator<byte*, kAllocatorTagRememberedSet>> CardSet;
 
   explicit RememberedSet(const std::string& name, Heap* heap, space::ContinuousSpace* space)
       : name_(name), heap_(heap), space_(space) {}
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index a3073bd..f72b30f 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -24,7 +24,6 @@
 #include <vector>
 
 #include "base/mutex.h"
-#include "gc_allocator.h"
 #include "globals.h"
 #include "object_callbacks.h"
 
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index e9353be..90ab323 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -23,6 +23,7 @@
 #include <memory>
 #include <vector>
 
+#include "base/allocator.h"
 #include "base/histogram-inl.h"
 #include "base/stl_util.h"
 #include "common_throws.h"
@@ -146,7 +147,6 @@
       total_objects_freed_ever_(0),
       num_bytes_allocated_(0),
       native_bytes_allocated_(0),
-      gc_memory_overhead_(0),
       verify_missing_card_marks_(false),
       verify_system_weaks_(false),
       verify_pre_gc_heap_(verify_pre_gc_heap),
@@ -789,14 +789,6 @@
   }
 }
 
-void Heap::RegisterGCAllocation(size_t bytes) {
-  gc_memory_overhead_.FetchAndAddSequentiallyConsistent(bytes);
-}
-
-void Heap::RegisterGCDeAllocation(size_t bytes) {
-  gc_memory_overhead_.FetchAndSubSequentiallyConsistent(bytes);
-}
-
 void Heap::DumpGcPerformanceInfo(std::ostream& os) {
   // Dump cumulative timings.
   os << "Dumping cumulative Gc timings\n";
@@ -854,7 +846,6 @@
   }
   os << "Total mutator paused time: " << PrettyDuration(total_paused_time) << "\n";
   os << "Total time waiting for GC to complete: " << PrettyDuration(total_wait_time_) << "\n";
-  os << "Approximate GC data structures memory overhead: " << gc_memory_overhead_.LoadRelaxed();
   BaseMutex::DumpAll(os);
 }
 
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index c2ba9f8..d2cc350 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -804,10 +804,12 @@
   std::unique_ptr<accounting::CardTable> card_table_;
 
   // A mod-union table remembers all of the references from the it's space to other spaces.
-  SafeMap<space::Space*, accounting::ModUnionTable*> mod_union_tables_;
+  AllocationTrackingSafeMap<space::Space*, accounting::ModUnionTable*, kAllocatorTagHeap>
+      mod_union_tables_;
 
   // A remembered set remembers all of the references from the it's space to the target space.
-  SafeMap<space::Space*, accounting::RememberedSet*> remembered_sets_;
+  AllocationTrackingSafeMap<space::Space*, accounting::RememberedSet*, kAllocatorTagHeap>
+      remembered_sets_;
 
   // The current collector type.
   CollectorType collector_type_;
@@ -910,9 +912,6 @@
   // Bytes which are allocated and managed by native code but still need to be accounted for.
   Atomic<size_t> native_bytes_allocated_;
 
-  // Data structure GC overhead.
-  Atomic<size_t> gc_memory_overhead_;
-
   // Info related to the current or previous GC iteration.
   collector::Iteration current_gc_iteration_;
 
diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h
index b1c20ca..09a0919 100644
--- a/runtime/gc/space/large_object_space.h
+++ b/runtime/gc/space/large_object_space.h
@@ -17,7 +17,7 @@
 #ifndef ART_RUNTIME_GC_SPACE_LARGE_OBJECT_SPACE_H_
 #define ART_RUNTIME_GC_SPACE_LARGE_OBJECT_SPACE_H_
 
-#include "gc/accounting/gc_allocator.h"
+#include "base/allocator.h"
 #include "dlmalloc_space.h"
 #include "safe_map.h"
 #include "space.h"
@@ -135,10 +135,10 @@
 
   // Used to ensure mutual exclusion when the allocation spaces data structures are being modified.
   mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  std::vector<mirror::Object*,
-      accounting::GcAllocator<mirror::Object*>> large_objects_ GUARDED_BY(lock_);
+  std::vector<mirror::Object*, TrackingAllocator<mirror::Object*, kAllocatorTagLOS>> large_objects_
+      GUARDED_BY(lock_);
   typedef SafeMap<mirror::Object*, MemMap*, std::less<mirror::Object*>,
-      accounting::GcAllocator<std::pair<mirror::Object*, MemMap*>>> MemMaps;
+      TrackingAllocator<std::pair<mirror::Object*, MemMap*>, kAllocatorTagLOSMaps>> MemMaps;
   MemMaps mem_maps_ GUARDED_BY(lock_);
 };
 
@@ -259,7 +259,7 @@
   AllocationHeader* GetAllocationHeader(const mirror::Object* obj);
 
   typedef std::set<AllocationHeader*, AllocationHeader::SortByPrevFree,
-                   accounting::GcAllocator<AllocationHeader*>> FreeBlocks;
+                   TrackingAllocator<AllocationHeader*, kAllocatorTagLOSFreeList>> FreeBlocks;
 
   // There is not footer for any allocations at the end of the space, so we keep track of how much
   // free space there is at the end manually.
diff --git a/runtime/gc_root.h b/runtime/gc_root.h
index 86a8847..b10a55c 100644
--- a/runtime/gc_root.h
+++ b/runtime/gc_root.h
@@ -17,13 +17,14 @@
 #ifndef ART_RUNTIME_GC_ROOT_H_
 #define ART_RUNTIME_GC_ROOT_H_
 
+#include "base/macros.h"
 #include "base/mutex.h"       // For Locks::mutator_lock_.
 #include "object_callbacks.h"
 
 namespace art {
 
 template<class MirrorType>
-class GcRoot {
+class PACKED(4) GcRoot {
  public:
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE MirrorType* Read() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 435cc43..21f8046 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -19,6 +19,7 @@
 
 #include <map>
 
+#include "base/allocator.h"
 #include "base/mutex.h"
 #include "gc_root.h"
 #include "object_callbacks.h"
@@ -78,7 +79,8 @@
   void AllowNewInterns() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
-  typedef std::multimap<int32_t, GcRoot<mirror::String>> Table;
+  typedef AllocationTrackingMultiMap<int32_t, GcRoot<mirror::String>,
+                                     kAllocatorTagInternTable> Table;
 
   mirror::String* Insert(mirror::String* s, bool is_strong)
       LOCKS_EXCLUDED(Locks::intern_table_lock_)
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 2156ed1..afbd5cb 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -24,6 +24,7 @@
 #include <vector>
 
 #include "atomic.h"
+#include "base/allocator.h"
 #include "base/logging.h"
 #include "base/mutex.h"
 #include "base/stl_util.h"
@@ -564,7 +565,7 @@
   }
 
  private:
-  SafeMap<std::string, SharedLibrary*> libraries_;
+  AllocationTrackingSafeMap<std::string, SharedLibrary*, kAllocatorTagJNILibrarires> libraries_;
 };
 
 #define CHECK_NON_NULL_ARGUMENT(value) \
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 6cda258..4d3f8c9 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -59,7 +59,7 @@
   return os;
 }
 
-std::ostream& operator<<(std::ostream& os, const std::multimap<void*, MemMap*>& mem_maps) {
+std::ostream& operator<<(std::ostream& os, const MemMap::Maps& mem_maps) {
   os << "MemMap:" << std::endl;
   for (auto it = mem_maps.begin(); it != mem_maps.end(); ++it) {
     void* base = it->first;
@@ -70,7 +70,7 @@
   return os;
 }
 
-std::multimap<void*, MemMap*> MemMap::maps_;
+MemMap::Maps MemMap::maps_;
 
 #if USE_ART_LOW_4G_ALLOCATOR
 // Handling mem_map in 32b address range for 64b architectures that do not support MAP_32BIT.
@@ -604,16 +604,12 @@
 }
 
 void MemMap::DumpMaps(std::ostream& os) {
-  DumpMaps(os, maps_);
-}
-
-void MemMap::DumpMaps(std::ostream& os, const std::multimap<void*, MemMap*>& mem_maps) {
   MutexLock mu(Thread::Current(), *Locks::mem_maps_lock_);
-  DumpMapsLocked(os, mem_maps);
+  DumpMapsLocked(os);
 }
 
-void MemMap::DumpMapsLocked(std::ostream& os, const std::multimap<void*, MemMap*>& mem_maps) {
-  os << mem_maps;
+void MemMap::DumpMapsLocked(std::ostream& os) {
+  os << maps_;
 }
 
 bool MemMap::HasMemMap(MemMap* map) {
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index 9bfcd96..e49ed48 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -26,6 +26,7 @@
 #include <sys/mman.h>  // For the PROT_* and MAP_* constants.
 #include <sys/types.h>
 
+#include "base/allocator.h"
 #include "globals.h"
 
 namespace art {
@@ -135,13 +136,13 @@
   static void DumpMaps(std::ostream& os)
       LOCKS_EXCLUDED(Locks::mem_maps_lock_);
 
+  typedef AllocationTrackingMultiMap<void*, MemMap*, kAllocatorTagMaps> Maps;
+
  private:
   MemMap(const std::string& name, byte* begin, size_t size, void* base_begin, size_t base_size,
          int prot, bool reuse) LOCKS_EXCLUDED(Locks::mem_maps_lock_);
 
-  static void DumpMaps(std::ostream& os, const std::multimap<void*, MemMap*>& mem_maps)
-      LOCKS_EXCLUDED(Locks::mem_maps_lock_);
-  static void DumpMapsLocked(std::ostream& os, const std::multimap<void*, MemMap*>& mem_maps)
+  static void DumpMapsLocked(std::ostream& os)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mem_maps_lock_);
   static bool HasMemMap(MemMap* map)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mem_maps_lock_);
@@ -166,7 +167,7 @@
 #endif
 
   // All the non-empty MemMaps. Use a multimap as we do a reserve-and-divide (eg ElfMap::Load()).
-  static std::multimap<void*, MemMap*> maps_ GUARDED_BY(Locks::mem_maps_lock_);
+  static Maps maps_ GUARDED_BY(Locks::mem_maps_lock_);
 
   friend class MemMapTest;  // To allow access to base_begin_ and base_size_.
 };
diff --git a/runtime/monitor.h b/runtime/monitor.h
index a94295a..be9e6f9 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -25,6 +25,7 @@
 #include <vector>
 
 #include "atomic.h"
+#include "base/allocator.h"
 #include "base/mutex.h"
 #include "gc_root.h"
 #include "object_callbacks.h"
@@ -251,6 +252,8 @@
   size_t DeflateMonitors() LOCKS_EXCLUDED(monitor_list_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  typedef std::list<Monitor*, TrackingAllocator<Monitor*, kAllocatorTagMonitorList>> Monitors;
+
  private:
   // During sweeping we may free an object and on a separate thread have an object created using
   // the newly freed memory. That object may then have its lock-word inflated and a monitor created.
@@ -259,7 +262,7 @@
   bool allow_new_monitors_ GUARDED_BY(monitor_list_lock_);
   Mutex monitor_list_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   ConditionVariable monitor_add_condition_ GUARDED_BY(monitor_list_lock_);
-  std::list<Monitor*> list_ GUARDED_BY(monitor_list_lock_);
+  Monitors list_ GUARDED_BY(monitor_list_lock_);
 
   friend class Monitor;
   DISALLOW_COPY_AND_ASSIGN(MonitorList);
diff --git a/runtime/monitor_pool.cc b/runtime/monitor_pool.cc
index 4964aa0..4a364ca 100644
--- a/runtime/monitor_pool.cc
+++ b/runtime/monitor_pool.cc
@@ -57,7 +57,7 @@
   }
 
   // Allocate the chunk.
-  void* chunk = malloc(kChunkSize);
+  void* chunk = allocator_.allocate(kChunkSize);
   // Check we allocated memory.
   CHECK_NE(reinterpret_cast<uintptr_t>(nullptr), reinterpret_cast<uintptr_t>(chunk));
   // Check it is aligned as we need it.
@@ -131,7 +131,7 @@
   monitor->monitor_id_ = id;
 }
 
-void MonitorPool::ReleaseMonitorsToPool(Thread* self, std::list<Monitor*>* monitors) {
+void MonitorPool::ReleaseMonitorsToPool(Thread* self, MonitorList::Monitors* monitors) {
   for (Monitor* mon : *monitors) {
     ReleaseMonitorToPool(self, mon);
   }
diff --git a/runtime/monitor_pool.h b/runtime/monitor_pool.h
index 5bc28f1..cb45162 100644
--- a/runtime/monitor_pool.h
+++ b/runtime/monitor_pool.h
@@ -19,6 +19,7 @@
 
 #include "monitor.h"
 
+#include "base/allocator.h"
 #ifdef __LP64__
 #include <stdint.h>
 #include "atomic.h"
@@ -58,7 +59,7 @@
 #endif
   }
 
-  static void ReleaseMonitors(Thread* self, std::list<Monitor*>* monitors) {
+  static void ReleaseMonitors(Thread* self, MonitorList::Monitors* monitors) {
 #ifndef __LP64__
     STLDeleteElements(monitors);
 #else
@@ -110,7 +111,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void ReleaseMonitorToPool(Thread* self, Monitor* monitor);
-  void ReleaseMonitorsToPool(Thread* self, std::list<Monitor*>* monitors);
+  void ReleaseMonitorsToPool(Thread* self, MonitorList::Monitors* monitors);
 
   // Note: This is safe as we do not ever move chunks.
   Monitor* LookupMonitor(MonitorId mon_id) {
@@ -171,6 +172,9 @@
   // To avoid race issues when resizing, we keep all the previous arrays.
   std::vector<uintptr_t*> old_chunk_arrays_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
 
+  typedef TrackingAllocator<byte, kAllocatorTagMonitorPool> Allocator;
+  Allocator allocator_;
+
   // Start of free list of monitors.
   // Note: these point to the right memory regions, but do *not* denote initialized objects.
   Monitor* first_free_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 3fd43f3..0bf2b7b 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -302,7 +302,7 @@
   // therefore we're using the OatDexFile::dex_file_location_ as the backing storage
   // for keys in oat_dex_files_ and the string_cache_ entries for the backing storage
   // of keys in secondary_oat_dex_files_ and oat_dex_files_by_canonical_location_.
-  typedef SafeMap<StringPiece, const OatDexFile*> Table;
+  typedef AllocationTrackingSafeMap<StringPiece, const OatDexFile*, kAllocatorTagOatFile> Table;
 
   // Map each plain dex file location retrieved from the oat file to its OatDexFile.
   // This map doesn't change after it's constructed in Setup() and therefore doesn't
diff --git a/runtime/reference_table.h b/runtime/reference_table.h
index 8765442..6cffa85 100644
--- a/runtime/reference_table.h
+++ b/runtime/reference_table.h
@@ -22,6 +22,7 @@
 #include <string>
 #include <vector>
 
+#include "base/allocator.h"
 #include "base/mutex.h"
 #include "gc_root.h"
 #include "object_callbacks.h"
@@ -51,7 +52,8 @@
   void VisitRoots(RootCallback* visitor, void* arg, uint32_t tid, RootType root_type);
 
  private:
-  typedef std::vector<GcRoot<mirror::Object>> Table;
+  typedef std::vector<GcRoot<mirror::Object>,
+                      TrackingAllocator<GcRoot<mirror::Object>, kAllocatorTagReferenceTable>> Table;
   static void Dump(std::ostream& os, Table& entries)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   friend class IndirectReferenceTable;  // For Dump.
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index fdebedf..c438ef7 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -936,6 +936,7 @@
   GetInternTable()->DumpForSigQuit(os);
   GetJavaVM()->DumpForSigQuit(os);
   GetHeap()->DumpForSigQuit(os);
+  TrackedAllocators::Dump(os);
   os << "\n";
 
   thread_list_->DumpForSigQuit(os);
diff --git a/runtime/runtime.h b/runtime/runtime.h
index b0a88d5..254e31a 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -26,6 +26,7 @@
 #include <utility>
 #include <vector>
 
+#include "base/allocator.h"
 #include "compiler_callbacks.h"
 #include "gc_root.h"
 #include "instrumentation.h"
@@ -592,7 +593,9 @@
   size_t method_trace_file_size_;
   instrumentation::Instrumentation instrumentation_;
 
-  typedef SafeMap<jobject, std::vector<const DexFile*>, JobjectComparator> CompileTimeClassPaths;
+  typedef AllocationTrackingSafeMap<jobject, std::vector<const DexFile*>,
+                                    kAllocatorTagCompileTimeClassPath, JobjectComparator>
+      CompileTimeClassPaths;
   CompileTimeClassPaths compile_time_class_paths_;
   bool use_compile_time_class_path_;
 
diff --git a/runtime/safe_map.h b/runtime/safe_map.h
index f7e238c..f9d81dc 100644
--- a/runtime/safe_map.h
+++ b/runtime/safe_map.h
@@ -20,6 +20,7 @@
 #include <map>
 #include <memory>
 
+#include "base/allocator.h"
 #include "base/logging.h"
 
 namespace art {
@@ -27,7 +28,7 @@
 // Equivalent to std::map, but without operator[] and its bug-prone semantics (in particular,
 // the implicit insertion of a default-constructed value on failed lookups).
 template <typename K, typename V, typename Comparator = std::less<K>,
-          typename Allocator = std::allocator<std::pair<const K, V>>>
+          typename Allocator = TrackingAllocator<std::pair<const K, V>, kAllocatorTagSafeMap>>
 class SafeMap {
  private:
   typedef SafeMap<K, V, Comparator, Allocator> Self;
@@ -130,6 +131,11 @@
   return !(lhs == rhs);
 }
 
+template<class Key, class T, AllocatorTag kTag, class Compare = std::less<Key>>
+class AllocationTrackingSafeMap : public SafeMap<
+    Key, T, Compare, TrackingAllocator<std::pair<Key, T>, kTag>> {
+};
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_SAFE_MAP_H_
diff --git a/runtime/verifier/register_line.h b/runtime/verifier/register_line.h
index b0018d2..06b7cca 100644
--- a/runtime/verifier/register_line.h
+++ b/runtime/verifier/register_line.h
@@ -293,14 +293,14 @@
 
  private:
   void CopyRegToLockDepth(size_t dst, size_t src) {
-    SafeMap<uint32_t, uint32_t>::iterator it = reg_to_lock_depths_.find(src);
+    auto it = reg_to_lock_depths_.find(src);
     if (it != reg_to_lock_depths_.end()) {
       reg_to_lock_depths_.Put(dst, it->second);
     }
   }
 
   bool IsSetLockDepth(size_t reg, size_t depth) {
-    SafeMap<uint32_t, uint32_t>::iterator it = reg_to_lock_depths_.find(reg);
+    auto it = reg_to_lock_depths_.find(reg);
     if (it != reg_to_lock_depths_.end()) {
       return (it->second & (1 << depth)) != 0;
     } else {
@@ -311,7 +311,7 @@
   void SetRegToLockDepth(size_t reg, size_t depth) {
     CHECK_LT(depth, 32u);
     DCHECK(!IsSetLockDepth(reg, depth));
-    SafeMap<uint32_t, uint32_t>::iterator it = reg_to_lock_depths_.find(reg);
+    auto it = reg_to_lock_depths_.find(reg);
     if (it == reg_to_lock_depths_.end()) {
       reg_to_lock_depths_.Put(reg, 1 << depth);
     } else {
@@ -322,7 +322,7 @@
   void ClearRegToLockDepth(size_t reg, size_t depth) {
     CHECK_LT(depth, 32u);
     DCHECK(IsSetLockDepth(reg, depth));
-    SafeMap<uint32_t, uint32_t>::iterator it = reg_to_lock_depths_.find(reg);
+    auto it = reg_to_lock_depths_.find(reg);
     DCHECK(it != reg_to_lock_depths_.end());
     uint32_t depths = it->second ^ (1 << depth);
     if (depths != 0) {
@@ -337,8 +337,7 @@
   }
 
   RegisterLine(size_t num_regs, MethodVerifier* verifier)
-      : verifier_(verifier),
-        num_regs_(num_regs) {
+      : verifier_(verifier), num_regs_(num_regs) {
     memset(&line_, 0, num_regs_ * sizeof(uint16_t));
     SetResultTypeToUnknown();
   }
@@ -352,11 +351,11 @@
   // Length of reg_types_
   const uint32_t num_regs_;
   // A stack of monitor enter locations
-  std::vector<uint32_t> monitors_;
+  std::vector<uint32_t, TrackingAllocator<uint32_t, kAllocatorTagVerifier>> monitors_;
   // A map from register to a bit vector of indices into the monitors_ stack. As we pop the monitor
   // stack we verify that monitor-enter/exit are correctly nested. That is, if there was a
   // monitor-enter on v5 and then on v6, we expect the monitor-exit to be on v6 then on v5
-  SafeMap<uint32_t, uint32_t> reg_to_lock_depths_;
+  AllocationTrackingSafeMap<uint32_t, uint32_t, kAllocatorTagVerifier> reg_to_lock_depths_;
 
   // An array of RegType Ids associated with each dex register.
   uint16_t line_[0];