Align uffd compacted spaces for faster mremap during pause

By aligning moving space related mappings to 2MB and linear-alloc space
to 1GB helps in reducing mremap latency, which is done during compaction
pause, by leveraging faster page-table move.

Bug: 160737021
Test: art/test/testrunner/testrunner.py
Change-Id: I4f7b3b7dde785002d6c41b52eda9f1ac5132c0d7
diff --git a/libartbase/base/globals.h b/libartbase/base/globals.h
index f4d44b8..4103154 100644
--- a/libartbase/base/globals.h
+++ b/libartbase/base/globals.h
@@ -38,6 +38,17 @@
 // compile-time constant so the compiler can generate better code.
 static constexpr size_t kPageSize = 4096;
 
+// TODO: Kernels for arm and x86 in both, 32-bit and 64-bit modes use 512 entries per page-table
+// page. Find a way to confirm that in userspace.
+// Address range covered by 1 Page Middle Directory (PMD) entry in the page table
+static constexpr size_t kPMDSize = (kPageSize / sizeof(uint64_t)) * kPageSize;
+// Address range covered by 1 Page Upper Directory (PUD) entry in the page table
+static constexpr size_t kPUDSize = (kPageSize / sizeof(uint64_t)) * kPMDSize;
+// Returns the ideal alignment corresponding to page-table levels for the
+// given size.
+static constexpr size_t BestPageTableAlignment(size_t size) {
+  return size < kPUDSize ? kPMDSize : kPUDSize;
+}
 // Clion, clang analyzer, etc can falsely believe that "if (kIsDebugBuild)" always
 // returns the same value. By wrapping into a call to another constexpr function, we force it
 // to realize that is not actually always evaluating to the same value.
diff --git a/libartbase/base/mem_map.cc b/libartbase/base/mem_map.cc
index 688325d..b3e2840 100644
--- a/libartbase/base/mem_map.cc
+++ b/libartbase/base/mem_map.cc
@@ -389,6 +389,32 @@
                 reuse);
 }
 
+MemMap MemMap::MapAnonymousAligned(const char* name,
+                                   size_t byte_count,
+                                   int prot,
+                                   bool low_4gb,
+                                   size_t alignment,
+                                   /*out=*/std::string* error_msg) {
+  DCHECK(IsPowerOfTwo(alignment));
+  DCHECK_GT(alignment, kPageSize);
+  // Allocate extra 'alignment - kPageSize' bytes so that the mapping can be aligned.
+  MemMap ret = MapAnonymous(name,
+                            /*addr=*/nullptr,
+                            byte_count + alignment - kPageSize,
+                            prot,
+                            low_4gb,
+                            /*reuse=*/false,
+                            /*reservation=*/nullptr,
+                            error_msg);
+  if (LIKELY(ret.IsValid())) {
+    ret.AlignBy(alignment, /*align_both_ends=*/false);
+    ret.SetSize(byte_count);
+    DCHECK_EQ(ret.Size(), byte_count);
+    DCHECK_ALIGNED_PARAM(ret.Begin(), alignment);
+  }
+  return ret;
+}
+
 MemMap MemMap::MapPlaceholder(const char* name, uint8_t* addr, size_t byte_count) {
   if (byte_count == 0) {
     return Invalid();
@@ -1247,40 +1273,46 @@
   }
 }
 
-void MemMap::AlignBy(size_t size) {
+void MemMap::AlignBy(size_t alignment, bool align_both_ends) {
   CHECK_EQ(begin_, base_begin_) << "Unsupported";
   CHECK_EQ(size_, base_size_) << "Unsupported";
-  CHECK_GT(size, static_cast<size_t>(kPageSize));
-  CHECK_ALIGNED(size, kPageSize);
+  CHECK_GT(alignment, static_cast<size_t>(kPageSize));
+  CHECK_ALIGNED(alignment, kPageSize);
   CHECK(!reuse_);
-  if (IsAlignedParam(reinterpret_cast<uintptr_t>(base_begin_), size) &&
-      IsAlignedParam(base_size_, size)) {
+  if (IsAlignedParam(reinterpret_cast<uintptr_t>(base_begin_), alignment) &&
+      (!align_both_ends || IsAlignedParam(base_size_, alignment))) {
     // Already aligned.
     return;
   }
   uint8_t* base_begin = reinterpret_cast<uint8_t*>(base_begin_);
-  uint8_t* base_end = base_begin + base_size_;
-  uint8_t* aligned_base_begin = AlignUp(base_begin, size);
-  uint8_t* aligned_base_end = AlignDown(base_end, size);
+  uint8_t* aligned_base_begin = AlignUp(base_begin, alignment);
   CHECK_LE(base_begin, aligned_base_begin);
-  CHECK_LE(aligned_base_end, base_end);
-  size_t aligned_base_size = aligned_base_end - aligned_base_begin;
-  CHECK_LT(aligned_base_begin, aligned_base_end)
-      << "base_begin = " << reinterpret_cast<void*>(base_begin)
-      << " base_end = " << reinterpret_cast<void*>(base_end);
-  CHECK_GE(aligned_base_size, size);
-  // Unmap the unaligned parts.
   if (base_begin < aligned_base_begin) {
     MEMORY_TOOL_MAKE_UNDEFINED(base_begin, aligned_base_begin - base_begin);
     CHECK_EQ(TargetMUnmap(base_begin, aligned_base_begin - base_begin), 0)
         << "base_begin=" << reinterpret_cast<void*>(base_begin)
         << " aligned_base_begin=" << reinterpret_cast<void*>(aligned_base_begin);
   }
-  if (aligned_base_end < base_end) {
-    MEMORY_TOOL_MAKE_UNDEFINED(aligned_base_end, base_end - aligned_base_end);
-    CHECK_EQ(TargetMUnmap(aligned_base_end, base_end - aligned_base_end), 0)
-        << "base_end=" << reinterpret_cast<void*>(base_end)
-        << " aligned_base_end=" << reinterpret_cast<void*>(aligned_base_end);
+  uint8_t* base_end = base_begin + base_size_;
+  size_t aligned_base_size;
+  if (align_both_ends) {
+    uint8_t* aligned_base_end = AlignDown(base_end, alignment);
+    CHECK_LE(aligned_base_end, base_end);
+    CHECK_LT(aligned_base_begin, aligned_base_end)
+        << "base_begin = " << reinterpret_cast<void*>(base_begin)
+        << " base_end = " << reinterpret_cast<void*>(base_end);
+    aligned_base_size = aligned_base_end - aligned_base_begin;
+    CHECK_GE(aligned_base_size, alignment);
+    if (aligned_base_end < base_end) {
+      MEMORY_TOOL_MAKE_UNDEFINED(aligned_base_end, base_end - aligned_base_end);
+      CHECK_EQ(TargetMUnmap(aligned_base_end, base_end - aligned_base_end), 0)
+          << "base_end=" << reinterpret_cast<void*>(base_end)
+          << " aligned_base_end=" << reinterpret_cast<void*>(aligned_base_end);
+    }
+  } else {
+    CHECK_LT(aligned_base_begin, base_end)
+        << "base_begin = " << reinterpret_cast<void*>(base_begin);
+    aligned_base_size = base_end - aligned_base_begin;
   }
   std::lock_guard<std::mutex> mu(*mem_maps_lock_);
   if (base_begin < aligned_base_begin) {
diff --git a/libartbase/base/mem_map.h b/libartbase/base/mem_map.h
index 28d1058..42120a3 100644
--- a/libartbase/base/mem_map.h
+++ b/libartbase/base/mem_map.h
@@ -137,6 +137,17 @@
                              /*inout*/MemMap* reservation,
                              /*out*/std::string* error_msg,
                              bool use_debug_name = true);
+
+  // Request an aligned anonymous region. We can't directly ask for a MAP_SHARED (anonymous or
+  // otherwise) mapping to be aligned as in that case file offset is involved and could make
+  // the starting offset to be out of sync with another mapping of the same file.
+  static MemMap MapAnonymousAligned(const char* name,
+                                    size_t byte_count,
+                                    int prot,
+                                    bool low_4gb,
+                                    size_t alignment,
+                                    /*out=*/std::string* error_msg);
+
   static MemMap MapAnonymous(const char* name,
                              size_t byte_count,
                              int prot,
@@ -310,8 +321,9 @@
   // intermittently.
   void TryReadable();
 
-  // Align the map by unmapping the unaligned parts at the lower and the higher ends.
-  void AlignBy(size_t size);
+  // Align the map by unmapping the unaligned part at the lower end and if 'align_both_ends' is
+  // true, then the higher end as well.
+  void AlignBy(size_t alignment, bool align_both_ends = true);
 
   // For annotation reasons.
   static std::mutex* GetMemMapsLock() RETURN_CAPABILITY(mem_maps_lock_) {
diff --git a/runtime/base/gc_visited_arena_pool.cc b/runtime/base/gc_visited_arena_pool.cc
index 8b778c7..6bf52ce 100644
--- a/runtime/base/gc_visited_arena_pool.cc
+++ b/runtime/base/gc_visited_arena_pool.cc
@@ -81,35 +81,23 @@
     size = std::max(min_size, kLow4GBLinearAllocPoolSize);
   }
 #endif
-  Runtime* runtime = Runtime::Current();
-  gc::collector::MarkCompact* mark_compact = runtime->GetHeap()->MarkCompactCollector();
+  size_t alignment = BestPageTableAlignment(size);
+  DCHECK_GE(size, kPMDSize);
   std::string err_msg;
-  bool mapped_shared;
-  // We use MAP_SHARED on non-zygote processes for leveraging userfaultfd's minor-fault feature.
-  if (gUseUserfaultfd && mark_compact->MapLinearAllocShared()) {
-    maps_.emplace_back(MemMap::MapFile(size,
-                                       PROT_READ | PROT_WRITE,
-                                       MAP_ANONYMOUS | MAP_SHARED,
-                                       -1,
-                                       /*start=*/0,
-                                       low_4gb_,
-                                       name_,
-                                       &err_msg));
-    mapped_shared = true;
-  } else {
-    maps_.emplace_back(
-        MemMap::MapAnonymous(name_, size, PROT_READ | PROT_WRITE, low_4gb_, &err_msg));
-    mapped_shared = false;
-  }
-
+  maps_.emplace_back(MemMap::MapAnonymousAligned(
+      name_, size, PROT_READ | PROT_WRITE, low_4gb_, alignment, &err_msg));
   MemMap& map = maps_.back();
   if (!map.IsValid()) {
     LOG(FATAL) << "Failed to allocate " << name_ << ": " << err_msg;
     UNREACHABLE();
   }
+
   if (gUseUserfaultfd) {
     // Create a shadow-map for the map being added for userfaultfd GC
-    mark_compact->AddLinearAllocSpaceData(map.Begin(), map.Size(), mapped_shared);
+    gc::collector::MarkCompact* mark_compact =
+        Runtime::Current()->GetHeap()->MarkCompactCollector();
+    DCHECK_NE(mark_compact, nullptr);
+    mark_compact->AddLinearAllocSpaceData(map.Begin(), map.Size());
   }
   Chunk* chunk = new Chunk(map.Begin(), map.Size());
   best_fit_allocs_.insert(chunk);
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index 66b58cf..50f5708 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -266,8 +266,9 @@
           reinterpret_cast<uintptr_t>(bump_pointer_space_->Limit())));
 
   // Create one MemMap for all the data structures
-  size_t chunk_info_vec_size = bump_pointer_space_->Capacity() / kOffsetChunkSize;
-  size_t nr_moving_pages = bump_pointer_space_->Capacity() / kPageSize;
+  size_t moving_space_size = bump_pointer_space_->Capacity();
+  size_t chunk_info_vec_size = moving_space_size / kOffsetChunkSize;
+  size_t nr_moving_pages = moving_space_size / kPageSize;
   size_t nr_non_moving_pages = heap->GetNonMovingSpace()->Capacity() / kPageSize;
 
   std::string err_msg;
@@ -296,13 +297,21 @@
     pre_compact_offset_moving_space_ = reinterpret_cast<uint32_t*>(p);
   }
 
+  size_t moving_space_alignment = BestPageTableAlignment(moving_space_size);
+  // The moving space is created at a fixed address, which is expected to be
+  // PMD-size aligned.
+  if (!IsAlignedParam(bump_pointer_space_->Begin(), moving_space_alignment)) {
+    LOG(WARNING) << "Bump pointer space is not aligned to " << PrettySize(moving_space_alignment)
+                 << ". This can lead to longer stop-the-world pauses for compaction";
+  }
   // NOTE: PROT_NONE is used here as these mappings are for address space reservation
   // only and will be used only after appropriately remapping them.
-  from_space_map_ = MemMap::MapAnonymous("Concurrent mark-compact from-space",
-                                         bump_pointer_space_->Capacity(),
-                                         PROT_NONE,
-                                         /*low_4gb=*/ kObjPtrPoisoning,
-                                         &err_msg);
+  from_space_map_ = MemMap::MapAnonymousAligned("Concurrent mark-compact from-space",
+                                                moving_space_size,
+                                                PROT_NONE,
+                                                /*low_4gb=*/kObjPtrPoisoning,
+                                                moving_space_alignment,
+                                                &err_msg);
   if (UNLIKELY(!from_space_map_.IsValid())) {
     LOG(FATAL) << "Failed to allocate concurrent mark-compact from-space" << err_msg;
   } else {
@@ -316,7 +325,7 @@
   //
   // This map doesn't have to be aligned to 2MB as we don't mremap on it.
   shadow_to_space_map_ = MemMap::MapAnonymous("Concurrent mark-compact moving-space shadow",
-                                              bump_pointer_space_->Capacity(),
+                                              moving_space_size,
                                               PROT_NONE,
                                               /*low_4gb=*/kObjPtrPoisoning,
                                               &err_msg);
@@ -341,15 +350,30 @@
   linear_alloc_spaces_data_.reserve(1);
 }
 
-void MarkCompact::AddLinearAllocSpaceData(uint8_t* begin, size_t len, bool already_shared) {
+void MarkCompact::AddLinearAllocSpaceData(uint8_t* begin, size_t len) {
   DCHECK_ALIGNED(begin, kPageSize);
   DCHECK_ALIGNED(len, kPageSize);
+  DCHECK_GE(len, kPMDSize);
+  size_t alignment = BestPageTableAlignment(len);
+  bool is_shared = false;
+  // We use MAP_SHARED on non-zygote processes for leveraging userfaultfd's minor-fault feature.
+  if (map_linear_alloc_shared_) {
+    void* ret = mmap(begin,
+                     len,
+                     PROT_READ | PROT_WRITE,
+                     MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED,
+                     /*fd=*/-1,
+                     /*offset=*/0);
+    CHECK_EQ(ret, begin) << "mmap failed: " << strerror(errno);
+    is_shared = true;
+  }
   std::string err_msg;
-  MemMap shadow(MemMap::MapAnonymous("linear-alloc shadow map",
-                                     len,
-                                     PROT_NONE,
-                                     /*low_4gb=*/false,
-                                     &err_msg));
+  MemMap shadow(MemMap::MapAnonymousAligned("linear-alloc shadow map",
+                                            len,
+                                            PROT_NONE,
+                                            /*low_4gb=*/false,
+                                            alignment,
+                                            &err_msg));
   if (!shadow.IsValid()) {
     LOG(FATAL) << "Failed to allocate linear-alloc shadow map: " << err_msg;
     UNREACHABLE();
@@ -368,7 +392,7 @@
                                          std::forward<MemMap>(page_status_map),
                                          begin,
                                          begin + len,
-                                         already_shared);
+                                         is_shared);
 }
 
 void MarkCompact::BindAndResetBitmaps() {
@@ -2428,8 +2452,6 @@
                                      int fd,
                                      int uffd_mode,
                                      uint8_t* shadow_addr) {
-  // TODO: Create mapping's at 2MB aligned addresses to benefit from optimized
-  // mremap.
   int mremap_flags = MREMAP_MAYMOVE | MREMAP_FIXED;
   if (gHaveMremapDontunmap) {
     mremap_flags |= MREMAP_DONTUNMAP;
diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h
index fa2c430..8ba3774 100644
--- a/runtime/gc/collector/mark_compact.h
+++ b/runtime/gc/collector/mark_compact.h
@@ -137,14 +137,13 @@
   // created or was already done.
   bool CreateUserfaultfd(bool post_fork);
 
-  bool MapLinearAllocShared() const { return map_linear_alloc_shared_; }
   // Returns a pair indicating if userfaultfd itself is available (first) and if
   // so then whether its minor-fault feature is available or not (second).
   static std::pair<bool, bool> GetUffdAndMinorFault();
 
   // Add linear-alloc space data when a new space is added to
   // GcVisitedArenaPool, which mostly happens only once.
-  void AddLinearAllocSpaceData(uint8_t* begin, size_t len, bool already_shared);
+  void AddLinearAllocSpaceData(uint8_t* begin, size_t len);
 
   // In copy-mode of userfaultfd, we don't need to reach a 'processed' state as
   // it's given that processing thread also copies the page, thereby mapping it.