Optimizing: Tag Arena allocations with their source.

This adds the ability to track where we allocate memory
when the kArenaAllocatorCountAllocations flag is turned on.

Also move some allocations from native heap to the Arena
and remove some unnecessary utilities.

Bug: 23736311
Change-Id: I1aaef3fd405d1de444fe9e618b1ce7ecef07ade3
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 603130a..acede45 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -335,7 +335,7 @@
     }
 
     // Create a `CompiledMethod`, with the quickened information in the vmap table.
-    Leb128EncodingVector builder;
+    Leb128EncodingVector<> builder;
     for (QuickenedInfo info : dex_compiler.GetQuickenedInfo()) {
       builder.PushBackUnsigned(info.dex_pc);
       builder.PushBackUnsigned(info.dex_member_index);
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 8bf709a..bcfd440 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -368,7 +368,7 @@
 
 struct SuccessorBlockInfo;
 
-class BasicBlock : public DeletableArenaObject<kArenaAllocBB> {
+class BasicBlock : public DeletableArenaObject<kArenaAllocBasicBlock> {
  public:
   BasicBlock(BasicBlockId block_id, BBType type, ArenaAllocator* allocator)
       : id(block_id),
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 72754ae..7082bed 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1115,7 +1115,7 @@
 
 CompiledMethod* Mir2Lir::GetCompiledMethod() {
   // Combine vmap tables - core regs, then fp regs - into vmap_table.
-  Leb128EncodingVector vmap_encoder;
+  Leb128EncodingVector<> vmap_encoder;
   if (frame_size_ > 0) {
     // Prefix the encoded data with its size.
     size_t size = core_vmap_table_.size() + 1 /* marker */ + fp_vmap_table_.size();
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 7c60026..3f69270 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -585,7 +585,7 @@
 }
 
 void CodeGenerator::BuildNativeGCMap(
-    std::vector<uint8_t>* data, const DexCompilationUnit& dex_compilation_unit) const {
+    ArenaVector<uint8_t>* data, const DexCompilationUnit& dex_compilation_unit) const {
   const std::vector<uint8_t>& gc_map_raw =
       dex_compilation_unit.GetVerifiedMethod()->GetDexGcMap();
   verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]);
@@ -613,7 +613,7 @@
   }
 }
 
-void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const {
+void CodeGenerator::BuildMappingTable(ArenaVector<uint8_t>* data) const {
   uint32_t pc2dex_data_size = 0u;
   uint32_t pc2dex_entries = stack_map_stream_.GetNumberOfStackMaps();
   uint32_t pc2dex_offset = 0u;
@@ -712,18 +712,16 @@
   }
 }
 
-void CodeGenerator::BuildVMapTable(std::vector<uint8_t>* data) const {
-  Leb128EncodingVector vmap_encoder;
+void CodeGenerator::BuildVMapTable(ArenaVector<uint8_t>* data) const {
+  Leb128Encoder<ArenaAllocatorAdapter<uint8_t>> vmap_encoder(data);
   // We currently don't use callee-saved registers.
   size_t size = 0 + 1 /* marker */ + 0;
   vmap_encoder.Reserve(size + 1u);  // All values are likely to be one byte in ULEB128 (<128).
   vmap_encoder.PushBackUnsigned(size);
   vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker);
-
-  *data = vmap_encoder.GetData();
 }
 
-void CodeGenerator::BuildStackMaps(std::vector<uint8_t>* data) {
+void CodeGenerator::BuildStackMaps(ArenaVector<uint8_t>* data) {
   uint32_t size = stack_map_stream_.PrepareForFillIn();
   data->resize(size);
   MemoryRegion region(data->data(), size);
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index cdd4675..754b5ec 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -19,6 +19,8 @@
 
 #include "arch/instruction_set.h"
 #include "arch/instruction_set_features.h"
+#include "base/arena_containers.h"
+#include "base/arena_object.h"
 #include "base/bit_field.h"
 #include "driver/compiler_options.h"
 #include "globals.h"
@@ -236,11 +238,11 @@
   }
 
   void BuildSourceMap(DefaultSrcMap* src_map) const;
-  void BuildMappingTable(std::vector<uint8_t>* vector) const;
-  void BuildVMapTable(std::vector<uint8_t>* vector) const;
+  void BuildMappingTable(ArenaVector<uint8_t>* vector) const;
+  void BuildVMapTable(ArenaVector<uint8_t>* vector) const;
   void BuildNativeGCMap(
-      std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
-  void BuildStackMaps(std::vector<uint8_t>* vector);
+      ArenaVector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
+  void BuildStackMaps(ArenaVector<uint8_t>* vector);
 
   bool IsBaseline() const {
     return is_baseline_;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 2ed2d9a..ee82fda 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_NODES_H_
 #define ART_COMPILER_OPTIMIZING_NODES_H_
 
+#include <array>
 #include <type_traits>
 
 #include "base/arena_containers.h"
@@ -81,7 +82,7 @@
   kCondGE,
 };
 
-class HInstructionList {
+class HInstructionList : public ValueObject {
  public:
   HInstructionList() : first_instruction_(nullptr), last_instruction_(nullptr) {}
 
@@ -127,7 +128,7 @@
 };
 
 // Control-flow graph of a method. Contains a list of basic blocks.
-class HGraph : public ArenaObject<kArenaAllocMisc> {
+class HGraph : public ArenaObject<kArenaAllocGraph> {
  public:
   HGraph(ArenaAllocator* arena,
          const DexFile& dex_file,
@@ -464,7 +465,7 @@
   DISALLOW_COPY_AND_ASSIGN(HGraph);
 };
 
-class HLoopInformation : public ArenaObject<kArenaAllocMisc> {
+class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> {
  public:
   HLoopInformation(HBasicBlock* header, HGraph* graph)
       : header_(header),
@@ -562,7 +563,7 @@
 // Stores try/catch information for basic blocks.
 // Note that HGraph is constructed so that catch blocks cannot simultaneously
 // be try blocks.
-class TryCatchInformation : public ArenaObject<kArenaAllocMisc> {
+class TryCatchInformation : public ArenaObject<kArenaAllocTryCatchInfo> {
  public:
   // Try block information constructor.
   explicit TryCatchInformation(const HTryBoundary& try_entry)
@@ -619,7 +620,7 @@
 // as a double linked list. Each block knows its predecessors and
 // successors.
 
-class HBasicBlock : public ArenaObject<kArenaAllocMisc> {
+class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> {
  public:
   explicit HBasicBlock(HGraph* graph, uint32_t dex_pc = kNoDexPc)
       : graph_(graph),
@@ -1107,7 +1108,7 @@
 template <typename T> class HUseList;
 
 template <typename T>
-class HUseListNode : public ArenaObject<kArenaAllocMisc> {
+class HUseListNode : public ArenaObject<kArenaAllocUseListNode> {
  public:
   HUseListNode* GetPrevious() const { return prev_; }
   HUseListNode* GetNext() const { return next_; }
@@ -1492,7 +1493,7 @@
 };
 
 // A HEnvironment object contains the values of virtual registers at a given location.
-class HEnvironment : public ArenaObject<kArenaAllocMisc> {
+class HEnvironment : public ArenaObject<kArenaAllocEnvironment> {
  public:
   HEnvironment(ArenaAllocator* arena,
                size_t number_of_vregs,
@@ -1682,7 +1683,7 @@
 
 std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs);
 
-class HInstruction : public ArenaObject<kArenaAllocMisc> {
+class HInstruction : public ArenaObject<kArenaAllocInstruction> {
  public:
   explicit HInstruction(SideEffects side_effects)
       : previous_(nullptr),
@@ -2038,54 +2039,7 @@
   DISALLOW_COPY_AND_ASSIGN(HBackwardInstructionIterator);
 };
 
-// An embedded container with N elements of type T.  Used (with partial
-// specialization for N=0) because embedded arrays cannot have size 0.
-template<typename T, intptr_t N>
-class EmbeddedArray {
- public:
-  EmbeddedArray() : elements_() {}
-
-  intptr_t GetLength() const { return N; }
-
-  const T& operator[](intptr_t i) const {
-    DCHECK_LT(i, GetLength());
-    return elements_[i];
-  }
-
-  T& operator[](intptr_t i) {
-    DCHECK_LT(i, GetLength());
-    return elements_[i];
-  }
-
-  const T& At(intptr_t i) const {
-    return (*this)[i];
-  }
-
-  void SetAt(intptr_t i, const T& val) {
-    (*this)[i] = val;
-  }
-
- private:
-  T elements_[N];
-};
-
-template<typename T>
-class EmbeddedArray<T, 0> {
- public:
-  intptr_t length() const { return 0; }
-  const T& operator[](intptr_t i) const {
-    UNUSED(i);
-    LOG(FATAL) << "Unreachable";
-    UNREACHABLE();
-  }
-  T& operator[](intptr_t i) {
-    UNUSED(i);
-    LOG(FATAL) << "Unreachable";
-    UNREACHABLE();
-  }
-};
-
-template<intptr_t N>
+template<size_t N>
 class HTemplateInstruction: public HInstruction {
  public:
   HTemplateInstruction<N>(SideEffects side_effects)
@@ -2095,18 +2049,47 @@
   size_t InputCount() const OVERRIDE { return N; }
 
  protected:
-  const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { return inputs_[i]; }
+  const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE {
+    DCHECK_LT(i, N);
+    return inputs_[i];
+  }
 
   void SetRawInputRecordAt(size_t i, const HUserRecord<HInstruction*>& input) OVERRIDE {
+    DCHECK_LT(i, N);
     inputs_[i] = input;
   }
 
  private:
-  EmbeddedArray<HUserRecord<HInstruction*>, N> inputs_;
+  std::array<HUserRecord<HInstruction*>, N> inputs_;
 
   friend class SsaBuilder;
 };
 
+// HTemplateInstruction specialization for N=0.
+template<>
+class HTemplateInstruction<0>: public HInstruction {
+ public:
+  explicit HTemplateInstruction(SideEffects side_effects) : HInstruction(side_effects) {}
+  virtual ~HTemplateInstruction() {}
+
+  size_t InputCount() const OVERRIDE { return 0; }
+
+ protected:
+  const HUserRecord<HInstruction*> InputRecordAt(size_t i ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << "Unreachable";
+    UNREACHABLE();
+  }
+
+  void SetRawInputRecordAt(size_t i ATTRIBUTE_UNUSED,
+                           const HUserRecord<HInstruction*>& input ATTRIBUTE_UNUSED) OVERRIDE {
+    LOG(FATAL) << "Unreachable";
+    UNREACHABLE();
+  }
+
+ private:
+  friend class SsaBuilder;
+};
+
 template<intptr_t N>
 class HExpression : public HTemplateInstruction<N> {
  public:
@@ -4833,7 +4816,7 @@
   DISALLOW_COPY_AND_ASSIGN(HFakeString);
 };
 
-class MoveOperands : public ArenaObject<kArenaAllocMisc> {
+class MoveOperands : public ArenaObject<kArenaAllocMoveOperands> {
  public:
   MoveOperands(Location source,
                Location destination,
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index f793a65..2f59d4c 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -26,7 +26,7 @@
 /**
  * Abstraction to implement an optimization pass.
  */
-class HOptimization : public ArenaObject<kArenaAllocMisc> {
+class HOptimization : public ArenaObject<kArenaAllocOptimization> {
  public:
   HOptimization(HGraph* graph,
                 const char* pass_name,
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 6f251e8..898b656 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -25,6 +25,7 @@
 
 #include "art_method-inl.h"
 #include "base/arena_allocator.h"
+#include "base/arena_containers.h"
 #include "base/dumpable.h"
 #include "base/timing_logger.h"
 #include "boolean_simplifier.h"
@@ -68,7 +69,9 @@
  */
 class CodeVectorAllocator FINAL : public CodeAllocator {
  public:
-  CodeVectorAllocator() : size_(0) {}
+  explicit CodeVectorAllocator(ArenaAllocator* arena)
+      : memory_(arena->Adapter(kArenaAllocCodeBuffer)),
+        size_(0) {}
 
   virtual uint8_t* Allocate(size_t size) {
     size_ = size;
@@ -77,10 +80,10 @@
   }
 
   size_t GetSize() const { return size_; }
-  const std::vector<uint8_t>& GetMemory() const { return memory_; }
+  const ArenaVector<uint8_t>& GetMemory() const { return memory_; }
 
  private:
-  std::vector<uint8_t> memory_;
+  ArenaVector<uint8_t> memory_;
   size_t size_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeVectorAllocator);
@@ -498,7 +501,7 @@
 
 // The stack map we generate must be 4-byte aligned on ARM. Since existing
 // maps are generated alongside these stack maps, we must also align them.
-static ArrayRef<const uint8_t> AlignVectorSize(std::vector<uint8_t>& vector) {
+static ArrayRef<const uint8_t> AlignVectorSize(ArenaVector<uint8_t>& vector) {
   size_t size = vector.size();
   size_t aligned_size = RoundUp(size, 4);
   for (; size < aligned_size; ++size) {
@@ -553,7 +556,8 @@
 
   AllocateRegisters(graph, codegen, pass_observer);
 
-  CodeVectorAllocator allocator;
+  ArenaAllocator* arena = graph->GetArena();
+  CodeVectorAllocator allocator(arena);
   codegen->CompileOptimized(&allocator);
 
   ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
@@ -563,7 +567,7 @@
     codegen->BuildSourceMap(&src_mapping_table);
   }
 
-  std::vector<uint8_t> stack_map;
+  ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps));
   codegen->BuildStackMaps(&stack_map);
 
   MaybeRecordStat(MethodCompilationStat::kCompiledOptimized);
@@ -595,20 +599,21 @@
     CompilerDriver* compiler_driver,
     const DexCompilationUnit& dex_compilation_unit,
     PassObserver* pass_observer) const {
-  CodeVectorAllocator allocator;
+  ArenaAllocator* arena = codegen->GetGraph()->GetArena();
+  CodeVectorAllocator allocator(arena);
   codegen->CompileBaseline(&allocator);
 
   ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
 
-  std::vector<uint8_t> mapping_table;
+  ArenaVector<uint8_t> mapping_table(arena->Adapter(kArenaAllocBaselineMaps));
   codegen->BuildMappingTable(&mapping_table);
   DefaultSrcMap src_mapping_table;
   if (compiler_driver->GetCompilerOptions().GetGenerateDebugInfo()) {
     codegen->BuildSourceMap(&src_mapping_table);
   }
-  std::vector<uint8_t> vmap_table;
+  ArenaVector<uint8_t> vmap_table(arena->Adapter(kArenaAllocBaselineMaps));
   codegen->BuildVMapTable(&vmap_table);
-  std::vector<uint8_t> gc_map;
+  ArenaVector<uint8_t> gc_map(arena->Adapter(kArenaAllocBaselineMaps));
   codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit);
 
   MaybeRecordStat(MethodCompilationStat::kCompiledBaseline);
@@ -752,6 +757,7 @@
   // or the debuggable flag). If it is set, we can run baseline. Otherwise, we fall back
   // to Quick.
   bool can_use_baseline = !run_optimizations_ && builder.CanUseBaselineForStringInit();
+  CompiledMethod* compiled_method = nullptr;
   if (run_optimizations_ && can_allocate_registers) {
     VLOG(compiler) << "Optimizing " << method_name;
 
@@ -766,11 +772,11 @@
       }
     }
 
-    return CompileOptimized(graph,
-                            codegen.get(),
-                            compiler_driver,
-                            dex_compilation_unit,
-                            &pass_observer);
+    compiled_method = CompileOptimized(graph,
+                                       codegen.get(),
+                                       compiler_driver,
+                                       dex_compilation_unit,
+                                       &pass_observer);
   } else if (shouldOptimize && can_allocate_registers) {
     LOG(FATAL) << "Could not allocate registers in optimizing compiler";
     UNREACHABLE();
@@ -783,13 +789,20 @@
       MaybeRecordStat(MethodCompilationStat::kNotOptimizedRegisterAllocator);
     }
 
-    return CompileBaseline(codegen.get(),
-                           compiler_driver,
-                           dex_compilation_unit,
-                           &pass_observer);
-  } else {
-    return nullptr;
+    compiled_method = CompileBaseline(codegen.get(),
+                                      compiler_driver,
+                                      dex_compilation_unit,
+                                      &pass_observer);
   }
+
+  if (kArenaAllocatorCountAllocations) {
+    if (arena.BytesAllocated() > 4 * MB) {
+      MemStats mem_stats(arena.GetMemStats());
+      LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats);
+    }
+  }
+
+  return compiled_method;
 }
 
 CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index e5832e1..3a4bccd 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -32,32 +32,43 @@
 
 template <bool kCount>
 const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = {
-  "Misc       ",
-  "BasicBlock ",
-  "BBList     ",
-  "BBPreds    ",
-  "DfsPreOrd  ",
-  "DfsPostOrd ",
-  "DomPostOrd ",
-  "TopoOrd    ",
-  "Lowering   ",
-  "LIR        ",
-  "LIR masks  ",
-  "SwitchTbl  ",
-  "FillArray  ",
-  "SlowPaths  ",
-  "MIR        ",
-  "DataFlow   ",
-  "GrowList   ",
-  "GrowBitMap ",
-  "SSA2Dalvik ",
-  "Dalvik2SSA ",
-  "DebugInfo  ",
-  "Successor  ",
-  "RegAlloc   ",
-  "Data       ",
-  "Preds      ",
-  "STL        ",
+  "Misc         ",
+  "BBList       ",
+  "BBPreds      ",
+  "DfsPreOrd    ",
+  "DfsPostOrd   ",
+  "DomPostOrd   ",
+  "TopoOrd      ",
+  "Lowering     ",
+  "LIR          ",
+  "LIR masks    ",
+  "SwitchTbl    ",
+  "FillArray    ",
+  "SlowPaths    ",
+  "MIR          ",
+  "DataFlow     ",
+  "GrowList     ",
+  "GrowBitMap   ",
+  "SSA2Dalvik   ",
+  "Dalvik2SSA   ",
+  "DebugInfo    ",
+  "Successor    ",
+  "RegAlloc     ",
+  "Data         ",
+  "Preds        ",
+  "STL          ",
+  "Graph        ",
+  "BasicBlock   ",
+  "Instruction  ",
+  "LoopInfo     ",
+  "TryCatchInf  ",
+  "UseListNode  ",
+  "Environment  ",
+  "MoveOperands ",
+  "CodeBuffer   ",
+  "StackMaps    ",
+  "BaselineMaps ",
+  "Optimization ",
 };
 
 template <bool kCount>
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index 05c66f0..af2bfbc 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -43,7 +43,6 @@
 // Type of allocation for memory tuning.
 enum ArenaAllocKind {
   kArenaAllocMisc,
-  kArenaAllocBB,
   kArenaAllocBBList,
   kArenaAllocBBPredecessors,
   kArenaAllocDfsPreOrder,
@@ -68,6 +67,18 @@
   kArenaAllocData,
   kArenaAllocPredecessors,
   kArenaAllocSTL,
+  kArenaAllocGraph,
+  kArenaAllocBasicBlock,
+  kArenaAllocInstruction,
+  kArenaAllocLoopInfo,
+  kArenaAllocTryCatchInfo,
+  kArenaAllocUseListNode,
+  kArenaAllocEnvironment,
+  kArenaAllocMoveOperands,
+  kArenaAllocCodeBuffer,
+  kArenaAllocStackMaps,
+  kArenaAllocBaselineMaps,
+  kArenaAllocOptimization,
   kNumArenaAllocKinds
 };
 
diff --git a/runtime/base/arena_containers.h b/runtime/base/arena_containers.h
index a7aafdf..810c1c4 100644
--- a/runtime/base/arena_containers.h
+++ b/runtime/base/arena_containers.h
@@ -76,6 +76,7 @@
 class ArenaAllocatorAdapterKindImpl {
  public:
   explicit ArenaAllocatorAdapterKindImpl(ArenaAllocKind kind) : kind_(kind) { }
+  ArenaAllocatorAdapterKindImpl(const ArenaAllocatorAdapterKindImpl&) = default;
   ArenaAllocatorAdapterKindImpl& operator=(const ArenaAllocatorAdapterKindImpl&) = default;
   ArenaAllocKind Kind() { return kind_; }
 
diff --git a/runtime/exception_test.cc b/runtime/exception_test.cc
index de4b3f4..33d756e 100644
--- a/runtime/exception_test.cc
+++ b/runtime/exception_test.cc
@@ -108,8 +108,8 @@
   const DexFile* dex_;
 
   std::vector<uint8_t> fake_code_;
-  Leb128EncodingVector fake_mapping_data_;
-  Leb128EncodingVector fake_vmap_table_data_;
+  Leb128EncodingVector<> fake_mapping_data_;
+  Leb128EncodingVector<> fake_vmap_table_data_;
   std::vector<uint8_t> fake_gc_map_;
   std::vector<uint8_t> fake_header_code_and_maps_;
 
diff --git a/runtime/leb128.h b/runtime/leb128.h
index 976936d..baf9da2 100644
--- a/runtime/leb128.h
+++ b/runtime/leb128.h
@@ -178,10 +178,11 @@
   dest->push_back(out);
 }
 
-// An encoder that pushed uint32_t data onto the given std::vector.
+// An encoder that pushes int32_t/uint32_t data onto the given std::vector.
+template <typename Allocator = std::allocator<uint8_t>>
 class Leb128Encoder {
  public:
-  explicit Leb128Encoder(std::vector<uint8_t>* data) : data_(data) {
+  explicit Leb128Encoder(std::vector<uint8_t, Allocator>* data) : data_(data) {
     DCHECK(data != nullptr);
   }
 
@@ -211,22 +212,27 @@
     }
   }
 
-  const std::vector<uint8_t>& GetData() const {
+  const std::vector<uint8_t, Allocator>& GetData() const {
     return *data_;
   }
 
  protected:
-  std::vector<uint8_t>* const data_;
+  std::vector<uint8_t, Allocator>* const data_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(Leb128Encoder);
 };
 
 // An encoder with an API similar to vector<uint32_t> where the data is captured in ULEB128 format.
-class Leb128EncodingVector FINAL : private std::vector<uint8_t>, public Leb128Encoder {
+template <typename Allocator = std::allocator<uint8_t>>
+class Leb128EncodingVector FINAL : private std::vector<uint8_t, Allocator>,
+                                   public Leb128Encoder<Allocator> {
  public:
-  Leb128EncodingVector() : Leb128Encoder(this) {
-  }
+  Leb128EncodingVector() : Leb128Encoder<Allocator>(this) { }
+
+  explicit Leb128EncodingVector(const Allocator& alloc)
+    : std::vector<uint8_t, Allocator>(alloc),
+      Leb128Encoder<Allocator>(this) { }
 
  private:
   DISALLOW_COPY_AND_ASSIGN(Leb128EncodingVector);
diff --git a/runtime/leb128_test.cc b/runtime/leb128_test.cc
index 1bb493d..09f7ecc 100644
--- a/runtime/leb128_test.cc
+++ b/runtime/leb128_test.cc
@@ -94,7 +94,7 @@
 TEST(Leb128Test, UnsignedSinglesVector) {
   // Test individual encodings.
   for (size_t i = 0; i < arraysize(uleb128_tests); ++i) {
-    Leb128EncodingVector builder;
+    Leb128EncodingVector<> builder;
     builder.PushBackUnsigned(uleb128_tests[i].decoded);
     EXPECT_EQ(UnsignedLeb128Size(uleb128_tests[i].decoded), builder.GetData().size());
     const uint8_t* data_ptr = &uleb128_tests[i].leb128_data[0];
@@ -131,7 +131,7 @@
 
 TEST(Leb128Test, UnsignedStreamVector) {
   // Encode a number of entries.
-  Leb128EncodingVector builder;
+  Leb128EncodingVector<> builder;
   for (size_t i = 0; i < arraysize(uleb128_tests); ++i) {
     builder.PushBackUnsigned(uleb128_tests[i].decoded);
   }
@@ -175,7 +175,7 @@
 TEST(Leb128Test, SignedSinglesVector) {
   // Test individual encodings.
   for (size_t i = 0; i < arraysize(sleb128_tests); ++i) {
-    Leb128EncodingVector builder;
+    Leb128EncodingVector<> builder;
     builder.PushBackSigned(sleb128_tests[i].decoded);
     EXPECT_EQ(SignedLeb128Size(sleb128_tests[i].decoded), builder.GetData().size());
     const uint8_t* data_ptr = &sleb128_tests[i].leb128_data[0];
@@ -212,7 +212,7 @@
 
 TEST(Leb128Test, SignedStreamVector) {
   // Encode a number of entries.
-  Leb128EncodingVector builder;
+  Leb128EncodingVector<> builder;
   for (size_t i = 0; i < arraysize(sleb128_tests); ++i) {
     builder.PushBackSigned(sleb128_tests[i].decoded);
   }
@@ -275,7 +275,7 @@
 TEST(Leb128Test, Speed) {
   std::unique_ptr<Histogram<uint64_t>> enc_hist(new Histogram<uint64_t>("Leb128EncodeSpeedTest", 5));
   std::unique_ptr<Histogram<uint64_t>> dec_hist(new Histogram<uint64_t>("Leb128DecodeSpeedTest", 5));
-  Leb128EncodingVector builder;
+  Leb128EncodingVector<> builder;
   // Push back 1024 chunks of 1024 values measuring encoding speed.
   uint64_t last_time = NanoTime();
   for (size_t i = 0; i < 1024; i++) {
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 8aa1189..27dacea 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1441,14 +1441,6 @@
   return true;
 }
 
-void EncodeUnsignedLeb128(uint32_t data, std::vector<uint8_t>* dst) {
-  Leb128Encoder(dst).PushBackUnsigned(data);
-}
-
-void EncodeSignedLeb128(int32_t data, std::vector<uint8_t>* dst) {
-  Leb128Encoder(dst).PushBackSigned(data);
-}
-
 std::string PrettyDescriptor(Primitive::Type type) {
   return PrettyDescriptor(Primitive::Descriptor(type));
 }
diff --git a/runtime/utils.h b/runtime/utils.h
index d1be51a..16835c2 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -294,9 +294,6 @@
   buf->push_back((data >> 24) & 0xff);
 }
 
-void EncodeUnsignedLeb128(uint32_t data, std::vector<uint8_t>* buf);
-void EncodeSignedLeb128(int32_t data, std::vector<uint8_t>* buf);
-
 // Deleter using free() for use with std::unique_ptr<>. See also UniqueCPtr<> below.
 struct FreeDelete {
   // NOTE: Deleting a const object is valid but free() takes a non-const pointer.