Move JIT code allocation to JitMemoryRegion. So that JitCodeCache does not need to know about dual view. Test: m Change-Id: I19d2e8e69eef07648803937fff928aa9b45ac5a2

commit: 349845ae9c820484973959e1f3d366099d2fa0c2 [log] [tgz]
author: Nicolas Geoffray <ngeoffray@google.com> Wed Jun 19 13:13:10 2019 +0100
committer: Nicolas Geoffray <ngeoffray@google.com> Thu Jun 20 07:08:52 2019 +0000
tree: 208431b346ab23f9aca9870ae06a993c640b3a8e
parent: 1b2a49b7aba39ed6663a69dfdf63d0df069f0d42 [diff]
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 100b399..300225e 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc

@@ -372,17 +372,6 @@
   return in_collection;
 }
 
-static size_t GetJitCodeAlignment() {
-  if (kRuntimeISA == InstructionSet::kArm || kRuntimeISA == InstructionSet::kThumb2) {
-    // Some devices with 32-bit ARM kernels need additional JIT code alignment when using dual
-    // view JIT (b/132205399). The alignment returned here coincides with the typical ARM d-cache
-    // line (though the value should be probed ideally). Both the method header and code in the
-    // cache are aligned to this size.
-    return 64;
-  }
-  return GetInstructionSetAlignment(kRuntimeISA);
-}
-
 static uintptr_t FromCodeToAllocation(const void* code) {
   size_t alignment = GetJitCodeAlignment();
   return reinterpret_cast<uintptr_t>(code) - RoundUp(sizeof(OatQuickMethodHeader), alignment);
@@ -704,85 +693,18 @@
     DCheckRootsAreValid(roots);
   }
 
-  OatQuickMethodHeader* method_header = nullptr;
-  uint8_t* code_ptr = nullptr;
-
   MutexLock mu(self, *Locks::jit_lock_);
   // We need to make sure that there will be no jit-gcs going on and wait for any ongoing one to
   // finish.
   WaitForPotentialCollectionToCompleteRunnable(self);
-  {
-    ScopedCodeCacheWrite scc(*region);
-
-    size_t alignment = GetJitCodeAlignment();
-    // Ensure the header ends up at expected instruction alignment.
-    size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
-    size_t total_size = header_size + code_size;
-
-    // AllocateCode allocates memory in non-executable region for alignment header and code. The
-    // header size may include alignment padding.
-    uint8_t* nox_memory = region->AllocateCode(total_size, alignment);
-    if (nox_memory == nullptr) {
-      return nullptr;
-    }
-
-    // code_ptr points to non-executable code.
-    code_ptr = nox_memory + header_size;
-    std::copy(code, code + code_size, code_ptr);
-    method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
-
-    // From here code_ptr points to executable code.
-    code_ptr = region->GetExecutableAddress(code_ptr);
-
-    new (method_header) OatQuickMethodHeader(
-        (stack_map != nullptr) ? code_ptr - stack_map : 0u,
-        code_size);
-
-    DCHECK(!Runtime::Current()->IsAotCompiler());
-    if (has_should_deoptimize_flag) {
-      method_header->SetHasShouldDeoptimizeFlag();
-    }
-
-    // Update method_header pointer to executable code region.
-    method_header = region->GetExecutableAddress(method_header);
-
-    // Both instruction and data caches need flushing to the point of unification where both share
-    // a common view of memory. Flushing the data cache ensures the dirty cachelines from the
-    // newly added code are written out to the point of unification. Flushing the instruction
-    // cache ensures the newly written code will be fetched from the point of unification before
-    // use. Memory in the code cache is re-cycled as code is added and removed. The flushes
-    // prevent stale code from residing in the instruction cache.
-    //
-    // Caches are flushed before write permission is removed because some ARMv8 Qualcomm kernels
-    // may trigger a segfault if a page fault occurs when requesting a cache maintenance
-    // operation. This is a kernel bug that we need to work around until affected devices
-    // (e.g. Nexus 5X and 6P) stop being supported or their kernels are fixed.
-    //
-    // For reference, this behavior is caused by this commit:
-    // https://android.googlesource.com/kernel/msm/+/3fbe6bc28a6b9939d0650f2f17eb5216c719950c
-    //
-    if (region->HasDualCodeMapping()) {
-      // Flush the data cache lines associated with the non-executable copy of the code just added.
-      FlushDataCache(nox_memory, nox_memory + total_size);
-    }
-    // FlushInstructionCache() flushes both data and instruction caches lines. The cacheline range
-    // flushed is for the executable mapping of the code just added.
-    uint8_t* x_memory = reinterpret_cast<uint8_t*>(method_header);
-    FlushInstructionCache(x_memory, x_memory + total_size);
-
-    // Ensure CPU instruction pipelines are flushed for all cores. This is necessary for
-    // correctness as code may still be in instruction pipelines despite the i-cache flush. It is
-    // not safe to assume that changing permissions with mprotect (RX->RWX->RX) will cause a TLB
-    // shootdown (incidentally invalidating the CPU pipelines by sending an IPI to all cores to
-    // notify them of the TLB invalidation). Some architectures, notably ARM and ARM64, have
-    // hardware support that broadcasts TLB invalidations and so their kernels have no software
-    // based TLB shootdown. The sync-core flavor of membarrier was introduced in Linux 4.16 to
-    // address this (see mbarrier(2)). The membarrier here will fail on prior kernels and on
-    // platforms lacking the appropriate support.
-    art::membarrier(art::MembarrierCommand::kPrivateExpeditedSyncCore);
-
-    number_of_compilations_++;
+  const uint8_t* code_ptr = region->AllocateCode(
+      code, code_size, stack_map, has_should_deoptimize_flag);
+  if (code_ptr == nullptr) {
+    return nullptr;
   }
+  OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+
+  number_of_compilations_++;
 
   // We need to update the entry point in the runnable state for the instrumentation.
   {

diff --git a/runtime/jit/jit_memory_region.cc b/runtime/jit/jit_memory_region.cc
index ab9a817..c250d6b 100644
--- a/runtime/jit/jit_memory_region.cc
+++ b/runtime/jit/jit_memory_region.cc

@@ -23,6 +23,7 @@
 #include "base/bit_utils.h"  // For RoundDown, RoundUp
 #include "base/globals.h"
 #include "base/logging.h"  // For VLOG.
+#include "base/membarrier.h"
 #include "base/memfd.h"
 #include "base/systrace.h"
 #include "gc/allocator/dlmalloc.h"
@@ -296,24 +297,87 @@
   }
 }
 
-uint8_t* JitMemoryRegion::AllocateCode(size_t code_size, size_t alignment) {
-  // Each allocation should be on its own set of cache lines.
-  // `code_size` covers the OatQuickMethodHeader, the JIT generated machine code,
-  // and any alignment padding.
-  size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
-  DCHECK_GT(code_size, header_size);
-  uint8_t* result = reinterpret_cast<uint8_t*>(
-      mspace_memalign(exec_mspace_, alignment, code_size));
+const uint8_t* JitMemoryRegion::AllocateCode(const uint8_t* code,
+                                             size_t code_size,
+                                             const uint8_t* stack_map,
+                                             bool has_should_deoptimize_flag) {
+  ScopedCodeCacheWrite scc(*this);
+
+  size_t alignment = GetJitCodeAlignment();
   // Ensure the header ends up at expected instruction alignment.
-  DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(result + header_size), alignment);
-  used_memory_for_code_ += mspace_usable_size(result);
+  size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
+  size_t total_size = header_size + code_size;
+
+  // Each allocation should be on its own set of cache lines.
+  // `total_size` covers the OatQuickMethodHeader, the JIT generated machine code,
+  // and any alignment padding.
+  DCHECK_GT(total_size, header_size);
+  uint8_t* w_memory = reinterpret_cast<uint8_t*>(
+      mspace_memalign(exec_mspace_, alignment, total_size));
+  if (w_memory == nullptr) {
+    return nullptr;
+  }
+  uint8_t* x_memory = GetExecutableAddress(w_memory);
+  // Ensure the header ends up at expected instruction alignment.
+  DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(w_memory + header_size), alignment);
+  used_memory_for_code_ += mspace_usable_size(w_memory);
+  const uint8_t* result = x_memory + header_size;
+
+  // Write the code.
+  std::copy(code, code + code_size, w_memory + header_size);
+
+  // Write the header.
+  OatQuickMethodHeader* method_header =
+      OatQuickMethodHeader::FromCodePointer(w_memory + header_size);
+  new (method_header) OatQuickMethodHeader(
+      (stack_map != nullptr) ? result - stack_map : 0u,
+      code_size);
+  if (has_should_deoptimize_flag) {
+    method_header->SetHasShouldDeoptimizeFlag();
+  }
+
+  // Both instruction and data caches need flushing to the point of unification where both share
+  // a common view of memory. Flushing the data cache ensures the dirty cachelines from the
+  // newly added code are written out to the point of unification. Flushing the instruction
+  // cache ensures the newly written code will be fetched from the point of unification before
+  // use. Memory in the code cache is re-cycled as code is added and removed. The flushes
+  // prevent stale code from residing in the instruction cache.
+  //
+  // Caches are flushed before write permission is removed because some ARMv8 Qualcomm kernels
+  // may trigger a segfault if a page fault occurs when requesting a cache maintenance
+  // operation. This is a kernel bug that we need to work around until affected devices
+  // (e.g. Nexus 5X and 6P) stop being supported or their kernels are fixed.
+  //
+  // For reference, this behavior is caused by this commit:
+  // https://android.googlesource.com/kernel/msm/+/3fbe6bc28a6b9939d0650f2f17eb5216c719950c
+  //
+  if (HasDualCodeMapping()) {
+    // Flush the data cache lines associated with the non-executable copy of the code just added.
+    FlushDataCache(w_memory, w_memory + total_size);
+  }
+
+  // FlushInstructionCache() flushes both data and instruction caches lines. The cacheline range
+  // flushed is for the executable mapping of the code just added.
+  FlushInstructionCache(x_memory, x_memory + total_size);
+
+  // Ensure CPU instruction pipelines are flushed for all cores. This is necessary for
+  // correctness as code may still be in instruction pipelines despite the i-cache flush. It is
+  // not safe to assume that changing permissions with mprotect (RX->RWX->RX) will cause a TLB
+  // shootdown (incidentally invalidating the CPU pipelines by sending an IPI to all cores to
+  // notify them of the TLB invalidation). Some architectures, notably ARM and ARM64, have
+  // hardware support that broadcasts TLB invalidations and so their kernels have no software
+  // based TLB shootdown. The sync-core flavor of membarrier was introduced in Linux 4.16 to
+  // address this (see mbarrier(2)). The membarrier here will fail on prior kernels and on
+  // platforms lacking the appropriate support.
+  art::membarrier(art::MembarrierCommand::kPrivateExpeditedSyncCore);
+
   return result;
 }
 
-void JitMemoryRegion::FreeCode(uint8_t* code) {
+void JitMemoryRegion::FreeCode(const uint8_t* code) {
   code = GetNonExecutableAddress(code);
   used_memory_for_code_ -= mspace_usable_size(code);
-  mspace_free(exec_mspace_, code);
+  mspace_free(exec_mspace_, const_cast<uint8_t*>(code));
 }
 
 uint8_t* JitMemoryRegion::AllocateData(size_t data_size) {

diff --git a/runtime/jit/jit_memory_region.h b/runtime/jit/jit_memory_region.h
index dda9fc2..8885ee8 100644
--- a/runtime/jit/jit_memory_region.h
+++ b/runtime/jit/jit_memory_region.h

@@ -19,6 +19,7 @@
 
 #include <string>
 
+#include "arch/instruction_set.h"
 #include "base/globals.h"
 #include "base/locks.h"
 #include "base/mem_map.h"
@@ -32,6 +33,17 @@
 // architectures.
 static constexpr int kJitCodeAccountingBytes = 16;
 
+size_t inline GetJitCodeAlignment() {
+  if (kRuntimeISA == InstructionSet::kArm || kRuntimeISA == InstructionSet::kThumb2) {
+    // Some devices with 32-bit ARM kernels need additional JIT code alignment when using dual
+    // view JIT (b/132205399). The alignment returned here coincides with the typical ARM d-cache
+    // line (though the value should be probed ideally). Both the method header and code in the
+    // cache are aligned to this size.
+    return 64;
+  }
+  return GetInstructionSetAlignment(kRuntimeISA);
+}
+
 // Represents a memory region for the JIT, where code and data are stored. This class
 // provides allocation and deallocation primitives.
 class JitMemoryRegion {
@@ -62,8 +74,15 @@
 
   // Set the footprint limit of the code cache.
   void SetFootprintLimit(size_t new_footprint) REQUIRES(Locks::jit_lock_);
-  uint8_t* AllocateCode(size_t code_size, size_t alignment) REQUIRES(Locks::jit_lock_);
-  void FreeCode(uint8_t* code) REQUIRES(Locks::jit_lock_);
+
+  // Copy the code into the region, and allocate an OatQuickMethodHeader.
+  // Callers should not write into the returned memory, as it may be read-only.
+  const uint8_t* AllocateCode(const uint8_t* code,
+                              size_t code_size,
+                              const uint8_t* stack_map,
+                              bool has_should_deoptimize_flag)
+      REQUIRES(Locks::jit_lock_);
+  void FreeCode(const uint8_t* code) REQUIRES(Locks::jit_lock_);
   uint8_t* AllocateData(size_t data_size) REQUIRES(Locks::jit_lock_);
   void FreeData(uint8_t* data) REQUIRES(Locks::jit_lock_);
 
@@ -83,28 +102,10 @@
     return exec_pages_.HasAddress(ptr);
   }
 
-  const MemMap* GetUpdatableCodeMapping() const {
-    if (HasDualCodeMapping()) {
-      return &non_exec_pages_;
-    } else if (HasCodeMapping()) {
-      return &exec_pages_;
-    } else {
-      return nullptr;
-    }
-  }
-
   const MemMap* GetExecPages() const {
     return &exec_pages_;
   }
 
-  template <typename T> T* GetExecutableAddress(T* src_ptr) {
-    return TranslateAddress(src_ptr, non_exec_pages_, exec_pages_);
-  }
-
-  template <typename T> T* GetNonExecutableAddress(T* src_ptr) {
-    return TranslateAddress(src_ptr, exec_pages_, non_exec_pages_);
-  }
-
   void* MoreCore(const void* mspace, intptr_t increment);
 
   bool OwnsSpace(const void* mspace) const NO_THREAD_SAFETY_ANALYSIS {
@@ -133,11 +134,29 @@
     if (!HasDualCodeMapping()) {
       return src_ptr;
     }
-    CHECK(src.HasAddress(src_ptr));
-    uint8_t* const raw_src_ptr = reinterpret_cast<uint8_t*>(src_ptr);
+    CHECK(src.HasAddress(src_ptr)) << reinterpret_cast<const void*>(src_ptr);
+    const uint8_t* const raw_src_ptr = reinterpret_cast<const uint8_t*>(src_ptr);
     return reinterpret_cast<T*>(raw_src_ptr - src.Begin() + dst.Begin());
   }
 
+  const MemMap* GetUpdatableCodeMapping() const {
+    if (HasDualCodeMapping()) {
+      return &non_exec_pages_;
+    } else if (HasCodeMapping()) {
+      return &exec_pages_;
+    } else {
+      return nullptr;
+    }
+  }
+
+  template <typename T> T* GetExecutableAddress(T* src_ptr) {
+    return TranslateAddress(src_ptr, non_exec_pages_, exec_pages_);
+  }
+
+  template <typename T> T* GetNonExecutableAddress(T* src_ptr) {
+    return TranslateAddress(src_ptr, exec_pages_, non_exec_pages_);
+  }
+
   static int CreateZygoteMemory(size_t capacity, std::string* error_msg);
   static bool ProtectZygoteMemory(int fd, std::string* error_msg);
 
@@ -178,6 +197,7 @@
   // The opaque mspace for allocating code.
   void* exec_mspace_ GUARDED_BY(Locks::jit_lock_);
 
+  friend class ScopedCodeCacheWrite;  // For GetUpdatableCodeMapping
   friend class TestZygoteMemory;
 };
commit	349845ae9c820484973959e1f3d366099d2fa0c2	[log] [tgz]
author	Nicolas Geoffray <ngeoffray@google.com>	Wed Jun 19 13:13:10 2019 +0100
committer	Nicolas Geoffray <ngeoffray@google.com>	Thu Jun 20 07:08:52 2019 +0000
tree	208431b346ab23f9aca9870ae06a993c640b3a8e
parent	1b2a49b7aba39ed6663a69dfdf63d0df069f0d42 [diff]