Merge "Trim arenas for JIT"
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 13a6d9d..02d74a0 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -628,12 +628,13 @@
 
   DCHECK(driver->GetCompilerOptions().IsCompilationEnabled());
 
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  Runtime* const runtime = Runtime::Current();
+  ClassLinker* const class_linker = runtime->GetClassLinker();
   InstructionSet instruction_set = driver->GetInstructionSet();
   if (instruction_set == kArm) {
     instruction_set = kThumb2;
   }
-  CompilationUnit cu(driver->GetArenaPool(), instruction_set, driver, class_linker);
+  CompilationUnit cu(runtime->GetArenaPool(), instruction_set, driver, class_linker);
 
   CHECK((cu.instruction_set == kThumb2) ||
         (cu.instruction_set == kArm64) ||
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index d1291fa..78dd6cc 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -2412,8 +2412,9 @@
 
 std::string CompilerDriver::GetMemoryUsageString(bool extended) const {
   std::ostringstream oss;
-  const ArenaPool* arena_pool = GetArenaPool();
-  gc::Heap* heap = Runtime::Current()->GetHeap();
+  Runtime* const runtime = Runtime::Current();
+  const ArenaPool* arena_pool = runtime->GetArenaPool();
+  gc::Heap* const heap = runtime->GetHeap();
   oss << "arena alloc=" << PrettySize(arena_pool->GetBytesAllocated());
   oss << " java alloc=" << PrettySize(heap->GetBytesAllocated());
 #ifdef HAVE_MALLOC_H
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index f949667..28a8245 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -362,12 +362,6 @@
     support_boot_image_fixup_ = support_boot_image_fixup;
   }
 
-  ArenaPool* GetArenaPool() {
-    return &arena_pool_;
-  }
-  const ArenaPool* GetArenaPool() const {
-    return &arena_pool_;
-  }
   SwapAllocator<void>& GetSwapSpaceAllocator() {
     return *swap_space_allocator_.get();
   }
@@ -606,9 +600,6 @@
 
   void* compiler_context_;
 
-  // Arena pool used by the compiler.
-  ArenaPool arena_pool_;
-
   bool support_boot_image_fixup_;
 
   // DeDuplication data structures, these own the corresponding byte arrays.
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 04efa21..a63e14a 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -103,7 +103,7 @@
 }
 
 bool JitCompiler::CompileMethod(Thread* self, mirror::ArtMethod* method) {
-  uint64_t start_time = NanoTime();
+  const uint64_t start_time = NanoTime();
   StackHandleScope<2> hs(self);
   self->AssertNoPendingException();
   Runtime* runtime = Runtime::Current();
@@ -130,6 +130,8 @@
     }
   }
   CompiledMethod* compiled_method(compiler_driver_->CompileMethod(self, h_method.Get()));
+  // Trim maps to reduce memory usage, TODO: measure how much this increases compile time.
+  runtime->GetArenaPool()->TrimMaps();
   if (compiled_method == nullptr) {
     return false;
   }
@@ -137,7 +139,7 @@
   // Don't add the method if we are supposed to be deoptimized.
   bool result = false;
   if (!runtime->GetInstrumentation()->AreAllMethodsDeoptimized()) {
-    const void* code = Runtime::Current()->GetClassLinker()->GetOatMethodQuickCodeFor(
+    const void* code = runtime->GetClassLinker()->GetOatMethodQuickCodeFor(
         h_method.Get());
     if (code != nullptr) {
       // Already have some compiled code, just use this instead of linking.
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index e6380bf..70d138d 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -26,8 +26,9 @@
 
 namespace art {
 
-// Memmap is a bit slower than malloc according to my measurements.
-static constexpr bool kUseMemMap = false;
+// Memmap is a bit slower than malloc to allocate, but this is mitigated by the arena pool which
+// only allocates few arenas and recycles them afterwards.
+static constexpr bool kUseMemMap = true;
 static constexpr bool kUseMemSet = true && kUseMemMap;
 static constexpr size_t kValgrindRedZoneBytes = 8;
 constexpr size_t Arena::kDefaultSize;
@@ -129,8 +130,8 @@
       next_(nullptr) {
   if (kUseMemMap) {
     std::string error_msg;
-    map_ = MemMap::MapAnonymous("dalvik-arena", nullptr, size, PROT_READ | PROT_WRITE, false, false,
-                                &error_msg);
+    map_ = MemMap::MapAnonymous("dalvik-LinearAlloc", nullptr, size, PROT_READ | PROT_WRITE, false,
+                                false, &error_msg);
     CHECK(map_ != nullptr) << error_msg;
     memory_ = map_->Begin();
     size_ = map_->Size();
@@ -148,8 +149,15 @@
   }
 }
 
+void Arena::Release() {
+  if (kUseMemMap && bytes_allocated_ > 0) {
+    map_->MadviseDontNeedAndZero();
+    bytes_allocated_ = 0;
+  }
+}
+
 void Arena::Reset() {
-  if (bytes_allocated_) {
+  if (bytes_allocated_ > 0) {
     if (kUseMemSet || !kUseMemMap) {
       memset(Begin(), 0, bytes_allocated_);
     } else {
@@ -162,6 +170,9 @@
 ArenaPool::ArenaPool()
     : lock_("Arena pool lock"),
       free_arenas_(nullptr) {
+  if (kUseMemMap) {
+    MemMap::Init();
+  }
 }
 
 ArenaPool::~ArenaPool() {
@@ -189,6 +200,13 @@
   return ret;
 }
 
+void ArenaPool::TrimMaps() {
+  MutexLock lock(Thread::Current(), lock_);
+  for (auto* arena = free_arenas_; arena != nullptr; arena = arena->next_) {
+    arena->Release();
+  }
+}
+
 size_t ArenaPool::GetBytesAllocated() const {
   size_t total = 0;
   MutexLock lock(Thread::Current(), lock_);
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index 9237391..04ca3ea 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -118,7 +118,10 @@
   static constexpr size_t kDefaultSize = 128 * KB;
   explicit Arena(size_t size = kDefaultSize);
   ~Arena();
+  // Reset is for pre-use and uses memset for performance.
   void Reset();
+  // Release is used inbetween uses and uses madvise for memory usage.
+  void Release();
   uint8_t* Begin() {
     return memory_;
   }
@@ -160,6 +163,9 @@
   Arena* AllocArena(size_t size) LOCKS_EXCLUDED(lock_);
   void FreeArenaChain(Arena* first) LOCKS_EXCLUDED(lock_);
   size_t GetBytesAllocated() const LOCKS_EXCLUDED(lock_);
+  // Trim the maps in arenas by madvising, used by JIT to reduce memory usage. This only works if
+  // kUseMemMap is true.
+  void TrimMaps() LOCKS_EXCLUDED(lock_);
 
  private:
   mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 70de0db..ac1040d 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -251,6 +251,7 @@
     VLOG(jit) << "Deleting jit";
     jit_.reset(nullptr);
   }
+  arena_pool_.reset();
 
   // Shutdown the fault manager if it was initialized.
   fault_manager.Shutdown();
@@ -787,6 +788,7 @@
   max_spins_before_thin_lock_inflation_ =
       runtime_options.GetOrDefault(Opt::MaxSpinsBeforeThinLockInflation);
 
+  arena_pool_.reset(new ArenaPool);
   monitor_list_ = new MonitorList;
   monitor_pool_ = MonitorPool::Create();
   thread_list_ = new ThreadList;
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 5078b7f..4cddb5c 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -28,6 +28,7 @@
 
 #include "arch/instruction_set.h"
 #include "base/allocator.h"
+#include "base/arena_allocator.h"
 #include "base/macros.h"
 #include "compiler_callbacks.h"
 #include "gc_root.h"
@@ -545,6 +546,13 @@
 
   void CreateJit();
 
+  ArenaPool* GetArenaPool() {
+    return arena_pool_.get();
+  }
+  const ArenaPool* GetArenaPool() const {
+    return arena_pool_.get();
+  }
+
  private:
   static void InitPlatformSignalHandlers();
 
@@ -608,6 +616,8 @@
 
   gc::Heap* heap_;
 
+  std::unique_ptr<ArenaPool> arena_pool_;
+
   // The number of spins that are done before thread suspension is used to forcibly inflate.
   size_t max_spins_before_thin_lock_inflation_;
   MonitorList* monitor_list_;